tsearch2 module

author Teodor Sigaev

Mon, 21 Jul 2003 10:27:44 +0000 (10:27 +0000)

committer Teodor Sigaev

Mon, 21 Jul 2003 10:27:44 +0000 (10:27 +0000)
author Teodor Sigaev
Mon, 21 Jul 2003 10:27:44 +0000 (10:27 +0000)
committer Teodor Sigaev
Mon, 21 Jul 2003 10:27:44 +0000 (10:27 +0000)
diff --git a/contrib/tsearch2/Makefile b/contrib/tsearch2/Makefile

new file mode 100644 (file)

index 0000000..a58370e
--- /dev/null
+++ b/contrib/tsearch2/Makefile
@@ -0,0 +1,44 @@
+subdir = contrib/tsearch2
+top_builddir = ../..
+include $(top_builddir)/src/Makefile.global
+
+
+override CPPFLAGS := -I. -I./snowball -I./ispell -I./wordparser $(CPPFLAGS)
+
+MODULE_big = tsearch2
+OBJS = dict_ex.o dict.o snmap.o stopword.o common.o prs_dcfg.o \
+       snowball/english_stem.o snowball/api.o snowball/russian_stem.o snowball/utilities.o \
+       dict_snowball.o ispell/spell.o dict_ispell.o dict_syn.o \
+       wparser.o wordparser/parser.o wordparser/deflex.o wparser_def.o \
+       ts_cfg.o tsvector.o rewrite.o crc32.o query.o gistidx.o \
+       tsvector_op.o rank.o ts_stat.o
+
+DATA_built = tsearch2.sql untsearch2.sql
+DOCS = README.tsearch2
+REGRESS = tsearch2
+
+wordparser/parser.c: wordparser/parser.l
+ifdef FLEX
+   $(FLEX) $(FLEXFLAGS) -8 -Ptsearch2_yy -o'$@' $<
+else
+   @$(missing) flex $< $@
+endif
+
+EXTRA_CLEAN = wordparser/parser.c tsearch2.sql.in
+
+SHLIB_LINK := -lm
+include $(top_srcdir)/contrib/contrib-global.mk
+# DO NOT DELETE
+
+install: installstop
+
+installstop:
+   cp stopword/*.stop $(datadir)
+
+
+tsearch2.sql.in: tsearch.sql._in
+   sed 's,DATA_PATH,$(datadir),g' < $< > $@
+
+untsearch2.sql: untsearch.sql.in
+   cp $< $@ 
+
diff --git a/contrib/tsearch2/README.tsearch2 b/contrib/tsearch2/README.tsearch2

new file mode 100644 (file)

index 0000000..801e3fe
--- /dev/null
+++ b/contrib/tsearch2/README.tsearch2
@@ -0,0 +1,199 @@
+Tsearch2 - full text search extension for PostgreSQL
+
+   [10][Online version] of this document is available
+   
+   This module is sponsored by Delta-Soft Ltd., Moscow, Russia.
+   
+   Notice: This version is fully incompatible with old tsearch (V1),
+   which is considered as deprecated in upcoming 7.4 release and
+   obsoleted in 7.5.
+   
+   The Tsearch2 contrib module contains an implementation of a new data
+   type tsvector - a searchable data type with indexed access. In a
+   nutshell, tsvector is a set of unique words along with their
+   positional information in the document, organized in a special
+   structure optimized for fast access and lookup. Actually, each word
+   entry, besides its position in the document, could have a weight
+   attribute, describing importance of this word (at a specific) position
+   in document. A set of bit-signatures of a fixed length, representing
+   tsvectors, are stored in a search tree (developed using PostgreSQL
+   GiST), which provides online update of full text index and fast query
+   lookup. The module provides indexed access methods, queries,
+   operations and supporting routines for the tsvector data type and easy
+   conversion of text data to tsvector. Table driven configuration allows
+   creation of custom configuration optimized for specific searches using
+   standard SQL commands.
+   
+   Configuration allows you to:
+     * specify the type of lexemes to be indexed and the way they are
+       processed.
+     * specify dictionaries to be used along with stop words recognition.
+     * specify the parser used to process a document.
+       
+   See [11]Documentation Roadmap for links to documentation.
+   
+Authors
+
+     * Oleg Bartunov , Moscow, Moscow University, Russia
+     * Teodor Sigaev , Moscow, Delta-Soft Ltd.,Russia
+       
+Contributors
+
+     * Robert John Shepherd and Andrew J. Kopciuch submitted
+       "Introduction to tsearch" (Robert - tsearch v1, Andrew - tsearch
+       v2)
+     * Brandon Craig Rhodes wrote "Tsearch2 Guide" and "Tsearch2
+       Reference" and proposed new naming convention for tsearch V2
+       
+New features
+
+     * Relevance ranking of search results
+     * Table driven configuration
+     * Morphology support (ispell dictionaries, snowball stemmers)
+     * Headline support (text fragments with highlighted search terms)
+     * Ability to plug-in custom dictionaries and parsers
+     * Synonym dictionary
+     * Generator of templates for dictionaries (built-in snowball stemmer
+       support)
+     * Statistics of indexed words is available
+       
+Limitations
+
+     * Lexeme should be not longer than 2048 bytes
+     * The number of lexemes is limited by 2^32. Note, that actual
+       capacity of tsvector is depends on whether positional information
+       is stored or not.
+     * tsvector - the size is limited by approximately 2^20 bytes.
+     * tsquery - the number of entries (lexemes and operations) < 32768
+     * Positional information
+          + maximal position of lexeme < 2^14 (16384)
+          + lexeme could have maximum 256 positions
+       
+References
+
+     * GiST development site -
+       [12]http://www.sai.msu.su/~megera/postgres/gist
+     * OpenFTS home page - [13]http://openfts.sourceforge.net/
+     * Mailing list -
+       [14]http://sourceforge.net/mailarchive/forum.php?forum=openfts-gen
+       eral
+       
+   [15]Documentation Roadmap
+   
+Documentation Roadmap
+
+     * Several docs are available from docs/ subdirectory
+          + "Tsearch V2 Introduction" by Andrew Kopciuch
+          + "Tsearch2 Guide" by Brandon Rhodes
+          + "Tsearch2 Reference" by Brandon Rhodes
+     * Readme.gendict in gendict/ subdirectory
+          + [16][Gendict tutorial]
+       
+   Online version of documentation is always available from Tsearch V2
+   home page -
+   [17]http://www.sai.msu.su/~megera/postgres/gist/tsearch/V2/
+   
+Support
+
+   Authors urgently recommend people to use [18][openfts-general] or
+   [19][pgsql-general] mailing lists for questions and discussions.
+   
+Caution
+
+   In spite of apparent easy full text searching with our tsearch module
+   (authors hope it's so), any serious search engine require profound
+   study of various aspects, such as stop words, dictionaries, special
+   parsers. Tsearch module was designed to facilitate both those cases.
+   
+Development History
+
+   Pre-tsearch era
+          Development of OpenFTS began in 2000 after realizing that we
+          needed a search engine optimized for online updates and able to
+          access metadata from the database. This is essential for online
+          news agencies, web portals, digital libraries, etc. Most search
+          engines available utilize an inverted index which is very fast
+          for searching but very slow for online updates. Incremental
+          updates of an inverted index is a complex engineering task
+          while we needed something light, free and with the ability to
+          access metadata from the database. The last requirement is very
+          important because in a real life application a search engine
+          should always consult metadata ( topic, permissions, date
+          range, version, etc.). We extensively use PostgreSQL as a
+          database backend and have no intention to move from it, so the
+          problem was to find a data structure and a fast way to access
+          it. PostgreSQL has rather unique data type for storing sets
+          (think about words) - arrays, but lacks index access to them. A
+          document is parsed into lexemes, which are identified in
+          various ways (e.g. stemming, morphology, dictionary), and as a
+          result is reduced to an array of integer numbers. During our
+          research we found a paper of Joseph Hellerstein which
+          introduced an interesting data structure suitable for sets -
+          RD-tree (Russian Doll tree). It looked very attractive, but
+          implementing it in PostgreSQL seemed difficult because of our
+          ignorance of database internals. Further research lead us to
+          the idea to use GiST for implementing RD-tree, but at that time
+          the GiST code had for a long while remained untouched and
+          contained several bugs. After work on improving GiST for
+          version 7.0.3 of PostgreSQL was done, we were able to implement
+          RD-Tree and use it for index access to arrays of integers. This
+          implementation was ideally suited for small arrays and
+          eliminated complex joins, but was practically useless for
+          indexing large arrays. The next improvement came from an idea
+          to represent a document by a single bit-signature, a so-called
+          superimposed signature (see "Index Structures for Databases
+          Containing Data Items with Set-valued Attributes", 1997, Sven
+          Helmer for details). We developeded the contrib/intarray module
+          and used it for full text indexing.
+          
+   tsearch v1
+          It was inconvenient to use integer id's instead of words, so we
+          introduced a new data type called 'txtidx' - a searchable data
+          type (textual) with indexed access. This was a first step of
+          our work on an implementation of a built-in PostgreSQL full
+          text search engine. Even though tsearch v1 had many features of
+          a search engine it lacked configuration support and relevance
+          ranking. People were encouraged to use OpenFTS, which provided
+          relevance ranking based on coordinate information and flexible
+          configuration. OpenFTS v.0.34 is the last version based on
+          tsearch v1.
+          
+   tsearch V2
+          People recognized tsearch as a powerful tool for full text
+          searching and insisted on adding ranking support, better
+          configurability, etc. We already thought about moving most of
+          the features of OpenFTS to tsearch, and in the early 2003 we
+          decided to work on a new version of tsearch - tsearch v2. We've
+          abandoned auxiliary index tables which were used by OpenFTS to
+          store coordinate information and modified the txtidx type to
+          store them internally. Also, we've added table-driven
+          configuration, support of ispell dictionaries, snowball
+          stemmers and the ability to specify which types of lexemes to
+          index. Also, it's now possible to generate headlines of
+          documents with highlighted search terms. These changes make
+          tsearch more user friendly and turn it into a really powerful
+          full text search engine. After announcing the alpha version, we
+          received a proposal from Brandon Rhodes to rename tsearch
+          functions to be more consistent. So, we have renamed txtidx
+          type to tsvector and other things as well.
+          
+   To allow users of tsearch v1 smooth upgrade, we named the module as
+   tsearch2.
+   
+   Future release of OpenFTS (v.0.35) will be based on tsearch2. Brave
+   people could download it from OpenFTS CVS (see link from [20][OpenFTS
+   page]
+
+References
+
+  10. http://www.sai.msu.su/~megera/postgres/gist/tsearch/V2/docs/Tsearch_V2_Readme.html
+  11. http://www.sai.msu.su/~megera/oddmuse/index.cgi/Tsearch_V2_Readme#Documentation_Roadmap
+  12. http://www.sai.msu.su/~megera/postgres/gist
+  13. http://openfts.sourceforge.net/
+  14. http://sourceforge.net/mailarchive/forum.php?forum=openfts-general
+  15. http://www.sai.msu.su/~megera/oddmuse/index.cgi?action=anchor&id=Documentation_Roadmap#Documentation_Roadmap
+  16. http://www.sai.msu.su/~megera/oddmuse/index.cgi?Gendict
+  17. http://www.sai.msu.su/~megera/postgres/gist/tsearch/V2/
+  18. http://sourceforge.net/mailarchive/forum.php?forum=openfts-general
+  19. http://archives.postgresql.org/pgsql-general/
+  20. http://openfts.sourceforge.net/
diff --git a/contrib/tsearch2/common.c b/contrib/tsearch2/common.c

new file mode 100644 (file)

index 0000000..acce0f5
--- /dev/null
+++ b/contrib/tsearch2/common.c
@@ -0,0 +1,82 @@
+#include "postgres.h"
+#include "common.h"
+#include "wparser.h"
+#include "ts_cfg.h"
+#include "dict.h"
+
+text* 
+char2text(char* in) {
+   return charl2text(in, strlen(in));
+}
+
+text* charl2text(char* in, int len) {
+   text *out=(text*)palloc(len+VARHDRSZ);
+   memcpy(VARDATA(out), in, len);
+   VARATT_SIZEP(out) = len+VARHDRSZ;
+   return out;
+}
+
+char   
+*text2char(text* in) {
+        char *out=palloc( VARSIZE(in) );
+        memcpy(out, VARDATA(in), VARSIZE(in)-VARHDRSZ);
+        out[ VARSIZE(in)-VARHDRSZ ] ='\0';
+   return out;
+}
+
+char    
+*pnstrdup(char* in, int len) {
+   char *out=palloc( len+1 );
+   memcpy(out, in, len);
+   out[len]='\0';
+   return out;
+}
+
+text    
+*ptextdup(text* in) {
+   text *out=(text*)palloc( VARSIZE(in) );
+   memcpy(out,in,VARSIZE(in));
+   return out;
+}
+
+text    
+*mtextdup(text* in) {
+   text *out=(text*)malloc( VARSIZE(in) );
+   if ( !out ) 
+       ts_error(ERROR, "No memory");
+   memcpy(out,in,VARSIZE(in));
+   return out;
+}
+
+void 
+ts_error(int state, const char *format, ...) {
+   va_list args;
+   int tlen = 128, len=0;
+   char    *buf;
+   
+   reset_cfg();
+   reset_dict();
+   reset_prs();
+
+   va_start(args, format);
+   buf = palloc(tlen);
+   len = vsnprintf(buf, tlen-1, format, args);
+   if ( len >= tlen ) {
+       tlen=len+1;
+       buf = repalloc( buf, tlen );
+       vsnprintf(buf, tlen-1, format, args);
+   }
+   va_end(args);
+ 
+   elog(state,buf);
+   pfree(buf);
+}
+
+int   
+text_cmp(text *a, text *b) {
+   if ( VARSIZE(a) == VARSIZE(b) )
+       return strncmp( VARDATA(a), VARDATA(b), VARSIZE(a)-VARHDRSZ );
+   return (int)VARSIZE(a) - (int)VARSIZE(b);
+
+}
+
diff --git a/contrib/tsearch2/common.h b/contrib/tsearch2/common.h

new file mode 100644 (file)

index 0000000..70313fa
--- /dev/null
+++ b/contrib/tsearch2/common.h
@@ -0,0 +1,24 @@
+#ifndef __TS_COMMON_H__
+#define __TS_COMMON_H__
+#include "postgres.h"
+#include "fmgr.h"
+
+#ifndef PG_NARGS
+#define PG_NARGS() (fcinfo->nargs)
+#endif
+
+text* char2text(char* in);
+text* charl2text(char* in, int len);
+char   *text2char(text* in);
+char   *pnstrdup(char* in, int len);
+text   *ptextdup(text* in);
+text   *mtextdup(text* in);
+
+int   text_cmp(text *a, text *b);
+
+#define NEXTVAL(x) ( (text*)( (char*)(x) + INTALIGN( VARSIZE(x) ) ) )
+#define ARRNELEMS(x)  ArrayGetNItems( ARR_NDIM(x), ARR_DIMS(x))
+
+void ts_error(int state, const char *format, ...);
+
+#endif
diff --git a/contrib/tsearch2/crc32.c b/contrib/tsearch2/crc32.c

new file mode 100644 (file)

index 0000000..dc93db7
--- /dev/null
+++ b/contrib/tsearch2/crc32.c
@@ -0,0 +1,103 @@
+/* Both POSIX and CRC32 checksums */
+
+#include 
+#include 
+#include 
+
+#include "crc32.h"
+
+/*
+ * This code implements the AUTODIN II polynomial
+ * The variable corresponding to the macro argument "crc" should
+ * be an unsigned long.
+ * Oroginal code  by Spencer Garrett 
+ */
+
+#define _CRC32_(crc, ch)    (crc = (crc >> 8) ^ crc32tab[(crc ^ (ch)) & 0xff])
+
+/* generated using the AUTODIN II polynomial
+ * x^32 + x^26 + x^23 + x^22 + x^16 +
+ * x^12 + x^11 + x^10 + x^8 + x^7 + x^5 + x^4 + x^2 + x^1 + 1
+ */
+
+static const unsigned int crc32tab[256] = {
+   0x00000000, 0x77073096, 0xee0e612c, 0x990951ba,
+   0x076dc419, 0x706af48f, 0xe963a535, 0x9e6495a3,
+   0x0edb8832, 0x79dcb8a4, 0xe0d5e91e, 0x97d2d988,
+   0x09b64c2b, 0x7eb17cbd, 0xe7b82d07, 0x90bf1d91,
+   0x1db71064, 0x6ab020f2, 0xf3b97148, 0x84be41de,
+   0x1adad47d, 0x6ddde4eb, 0xf4d4b551, 0x83d385c7,
+   0x136c9856, 0x646ba8c0, 0xfd62f97a, 0x8a65c9ec,
+   0x14015c4f, 0x63066cd9, 0xfa0f3d63, 0x8d080df5,
+   0x3b6e20c8, 0x4c69105e, 0xd56041e4, 0xa2677172,
+   0x3c03e4d1, 0x4b04d447, 0xd20d85fd, 0xa50ab56b,
+   0x35b5a8fa, 0x42b2986c, 0xdbbbc9d6, 0xacbcf940,
+   0x32d86ce3, 0x45df5c75, 0xdcd60dcf, 0xabd13d59,
+   0x26d930ac, 0x51de003a, 0xc8d75180, 0xbfd06116,
+   0x21b4f4b5, 0x56b3c423, 0xcfba9599, 0xb8bda50f,
+   0x2802b89e, 0x5f058808, 0xc60cd9b2, 0xb10be924,
+   0x2f6f7c87, 0x58684c11, 0xc1611dab, 0xb6662d3d,
+   0x76dc4190, 0x01db7106, 0x98d220bc, 0xefd5102a,
+   0x71b18589, 0x06b6b51f, 0x9fbfe4a5, 0xe8b8d433,
+   0x7807c9a2, 0x0f00f934, 0x9609a88e, 0xe10e9818,
+   0x7f6a0dbb, 0x086d3d2d, 0x91646c97, 0xe6635c01,
+   0x6b6b51f4, 0x1c6c6162, 0x856530d8, 0xf262004e,
+   0x6c0695ed, 0x1b01a57b, 0x8208f4c1, 0xf50fc457,
+   0x65b0d9c6, 0x12b7e950, 0x8bbeb8ea, 0xfcb9887c,
+   0x62dd1ddf, 0x15da2d49, 0x8cd37cf3, 0xfbd44c65,
+   0x4db26158, 0x3ab551ce, 0xa3bc0074, 0xd4bb30e2,
+   0x4adfa541, 0x3dd895d7, 0xa4d1c46d, 0xd3d6f4fb,
+   0x4369e96a, 0x346ed9fc, 0xad678846, 0xda60b8d0,
+   0x44042d73, 0x33031de5, 0xaa0a4c5f, 0xdd0d7cc9,
+   0x5005713c, 0x270241aa, 0xbe0b1010, 0xc90c2086,
+   0x5768b525, 0x206f85b3, 0xb966d409, 0xce61e49f,
+   0x5edef90e, 0x29d9c998, 0xb0d09822, 0xc7d7a8b4,
+   0x59b33d17, 0x2eb40d81, 0xb7bd5c3b, 0xc0ba6cad,
+   0xedb88320, 0x9abfb3b6, 0x03b6e20c, 0x74b1d29a,
+   0xead54739, 0x9dd277af, 0x04db2615, 0x73dc1683,
+   0xe3630b12, 0x94643b84, 0x0d6d6a3e, 0x7a6a5aa8,
+   0xe40ecf0b, 0x9309ff9d, 0x0a00ae27, 0x7d079eb1,
+   0xf00f9344, 0x8708a3d2, 0x1e01f268, 0x6906c2fe,
+   0xf762575d, 0x806567cb, 0x196c3671, 0x6e6b06e7,
+   0xfed41b76, 0x89d32be0, 0x10da7a5a, 0x67dd4acc,
+   0xf9b9df6f, 0x8ebeeff9, 0x17b7be43, 0x60b08ed5,
+   0xd6d6a3e8, 0xa1d1937e, 0x38d8c2c4, 0x4fdff252,
+   0xd1bb67f1, 0xa6bc5767, 0x3fb506dd, 0x48b2364b,
+   0xd80d2bda, 0xaf0a1b4c, 0x36034af6, 0x41047a60,
+   0xdf60efc3, 0xa867df55, 0x316e8eef, 0x4669be79,
+   0xcb61b38c, 0xbc66831a, 0x256fd2a0, 0x5268e236,
+   0xcc0c7795, 0xbb0b4703, 0x220216b9, 0x5505262f,
+   0xc5ba3bbe, 0xb2bd0b28, 0x2bb45a92, 0x5cb36a04,
+   0xc2d7ffa7, 0xb5d0cf31, 0x2cd99e8b, 0x5bdeae1d,
+   0x9b64c2b0, 0xec63f226, 0x756aa39c, 0x026d930a,
+   0x9c0906a9, 0xeb0e363f, 0x72076785, 0x05005713,
+   0x95bf4a82, 0xe2b87a14, 0x7bb12bae, 0x0cb61b38,
+   0x92d28e9b, 0xe5d5be0d, 0x7cdcefb7, 0x0bdbdf21,
+   0x86d3d2d4, 0xf1d4e242, 0x68ddb3f8, 0x1fda836e,
+   0x81be16cd, 0xf6b9265b, 0x6fb077e1, 0x18b74777,
+   0x88085ae6, 0xff0f6a70, 0x66063bca, 0x11010b5c,
+   0x8f659eff, 0xf862ae69, 0x616bffd3, 0x166ccf45,
+   0xa00ae278, 0xd70dd2ee, 0x4e048354, 0x3903b3c2,
+   0xa7672661, 0xd06016f7, 0x4969474d, 0x3e6e77db,
+   0xaed16a4a, 0xd9d65adc, 0x40df0b66, 0x37d83bf0,
+   0xa9bcae53, 0xdebb9ec5, 0x47b2cf7f, 0x30b5ffe9,
+   0xbdbdf21c, 0xcabac28a, 0x53b39330, 0x24b4a3a6,
+   0xbad03605, 0xcdd70693, 0x54de5729, 0x23d967bf,
+   0xb3667a2e, 0xc4614ab8, 0x5d681b02, 0x2a6f2b94,
+   0xb40bbe37, 0xc30c8ea1, 0x5a05df1b, 0x2d02ef8d,
+};
+
+unsigned int
+crc32_sz(char *buf, int size)
+{
+   unsigned int crc = ~0;
+   char       *p;
+   int         len,
+               nr;
+
+   len = 0;
+   nr = size;
+   for (len += nr, p = buf; nr--; ++p)
+       _CRC32_(crc, *p);
+   return ~crc;
+}
diff --git a/contrib/tsearch2/crc32.h b/contrib/tsearch2/crc32.h

new file mode 100644 (file)

index 0000000..97254a4
--- /dev/null
+++ b/contrib/tsearch2/crc32.h
@@ -0,0 +1,10 @@
+#ifndef _CRC32_H
+#define _CRC32_H
+
+/* Returns crc32 of data block */
+extern unsigned int crc32_sz(char *buf, int size);
+
+/* Returns crc32 of null-terminated string */
+#define crc32(buf) crc32_sz((buf),strlen(buf))
+
+#endif
diff --git a/contrib/tsearch2/data/test_tsearch.data b/contrib/tsearch2/data/test_tsearch.data

new file mode 100644 (file)

index 0000000..29a26f2
--- /dev/null
+++ b/contrib/tsearch2/data/test_tsearch.data
@@ -0,0 +1,508 @@
+\n 
+\n 
+\n 
+\n 
+\n 
+\n 
+\n 
+\n 
+\n i8 hy qo xa jl wr le l5 ja jx zf ro vw wd wa cc mm wh fn yd td l8 ec rv th oc ix ir sm y4 gh pr qg ue cx ww zv c9 zv tx eo f5 gd km b9 wb rm ym yl xj u7 xz uk iq tm ux di if uc hc ge
+\n gr ty ph jh po wa iw ag wq r3 yd ow rb ip et ej yl a9 dk pu y6 su ov hf xe qe sd qr zt kp ml ea tp pg dq e3 s3 hh gn hz j7 hb qs qd v0 v4 w0 nu ee wk ez un rd sz wx e7 pn yf gh uh ki kx rb qv f1 bh sr yj ry r2
+\n q1 q8 wp w9 vs ww rq de qt wo qp sa rv mc sn u8 yl
+\n hv ra sa fr qs ps 4w z5 ls wt ad wy q6 zg bd vt wa e4 ft w7 ld es yg et ic pm sw ja qv ov jm ma b3 wu wi qy ug hs wh ex rt tj en ur e2 ut gv as ui dy qy du qo gv cy lx kw xm fl x2 hd ny nu hh dt wg wh rs wb wz yy yu tj ha ak rw sw io h1 ux ku v6 wc qa rv xb s8 qd f2 zo k2 ew w4 yh yu yi
+\n rs tt gp qh wt q6 lg zh vr b8 uy uu lh px jm ww qe xu fp fd rs qu ki dr fn gq gw jv oq zt 2r lc ke wg l9 x3 x5 7g vs ar e7 u2 s8 t0 av dj kl nm u2 zp gf yw ee oc tw a1
+\n qs uz wr gq q9 rl e0 pe dj a9 hp qw aw er kq pp uu pl zo wp fr r6 ej pv u5 hh av lw ko qc pn qj ez n8 wn eu tq
+\n po h9 rd qs hr la u0 um me wp 0p rl mv rc ab r0 fe fj fk qn jh iy cn lb bl ln b5 ll yg yh qt qp uz od dq as gn cr qa wa cu fy zy vo xk eq vg mr ns yy t7 yi op th yo ov pv em tc hg az io s5 ct os wu lq dr mp hk si gx
+\n hm k5 pw a5 qh nb q3 ql wr wt z7 oz wu wh kv q8 c3 mt mg hb a3 rz pz uo y1 rb av us ek dz q0 d3 qw j2 ls wy qq jf ng eo gl ed ix em he qt du hp jc f2 m9 qp hb l4 gy zf l6 qr dn cp x1 oh qk kk s3 hy wg zs ot wj sl oz ie e9 ay it u5 ai hm gh py hz qk ki h8 ja zu qb ei vc qj hg ev h6 yh u0 tb id
+\n qg d1 bt c5 r3 iv g6 d7 rc ml gk uh yn y0 zo uh qd wh ib uo u4 om qg ql yz
+\n hb a3 q5 pl yj lo qy ki sy fo rj kk zq dl wn 7a zi wn wm yr w3 tv r1
+\n ft k6 iz qn qj q2 q3 bl zd av ro wo lk tg ea ew ed y1 ia yl ic g6 po aw sc zm qn gl wq qw zr jp wt j5 gs vt qt yc rr op yw tl ye hr i8 tb uu j0 xd lz vu nl qd fu wg pf wj bt ee wh t2 tp sz um oo tg ha u4 f5 sw pq pr ju qk mh ki zb vj ob cx df hj ef cj q6 u9 tv rv o4 sy ru fq ir
+\n ps ko uk tz vv um t9 uk k2 ja o6 ob
+\n qs nb gh ld q7 jc sp el w0 py qx i2 qe la rl qw tu ti dq ue iv oi wa qr ed t3 fg oa of rr fv qz xn wu wq te hx
+\n yb ty pq az fi qg qn la bu ji lg wg q8 mi cv rl up lg om oq ym pv in aq gg js ha on ww qr bj vn pv he b5 mh qe cc mk qt rb eu qy rw tr qo ec op sn oh e2 ao iv e4 hy dt s6 qt p1 hb ih qs wg x1 bd l1 t1 ro r9 uv wb aw gu os t0 ah e0 s0 hj pe or qj zz ql fd ks qv bq qm bg ec ry oj u8 u0 yj ru r1 yx o7
+\n z4 wr qz cg nq ir bb gb w7 e5 zc pj e9 px uo fp ts aq db q9 iy qe zv xu a9 l1 mb qw tc qu fi hw ur de e4 hk lj wo wf fi ep rl wh vh ek vp oi sv rh ay hj px aa er tv do ir
+\n tr o9 gb tt pp qa qs a5 ps rf q1 kj by ub ru ox co o8 ny wp wa ws rd kk b1 zc rl rz uo ts ig fh db qm q0 bg rr fu ld lr wb en nd cw vr hy rn qr en em au p8 so oh ut hz gq wp ow be ky wj dw t1 pl er wc ot na r9 wl ou un um wx iq sc e8 sn re rr f7 hz h4 ce wz qx wx kp px tl tx ai wq hf ec 6u rz og yt ok yy yp
+\n sa pp a7 qm qh of je qj lo ph wt h0 ji cg z8 2v xs zl mo ik hm on tu d8 av ot pn iv ez ja qn pq wy 7r mq qu p1 tu p6 ti ur pj uy ui qo i9 qa nj xm s1 ya fb 7j ro wn t6 wz yu iq yi go en pb aj f5 hf ug uh hk av pr wl wz im ja v9 u2 ks it br wv wn se ia o5 ox ei r2 ig aj sp
+\n sa tn z8 ew uo eh g8 zt wy 27 ff uh te en pd eh hv 2e wh ty oi sw xx 2p qs mx wb q3 rl eq aa eu
+\n d4 ef ta zq j2 em c0 vv wf kj dw uk ql y9 rn
+\n sq nm kl w8 ur kz c1 pc y1 g4 oi jv wr zy ew by se ec yn ti gq gt rd l5 ej yp tk da qz qx ir wm on q2 to ew
+\n rd gu z2 kj qk bl 6d wy nw xq iu 8t ri uc kq nx ql oa vi kd o6
+\n ra gr he wy q0 ow ti ia pb ha qr lv ms qu pu qw qr ml qt ep sv i5 of fm oe nl xh x1 xz u4 ha ao fc ug pw nh n9 qv kh vx uq w1 u0 ei if
+\n q1 d2 qz zd jd qb wj nt ah mj ea ed y1 et fj qe en b8 ty iv ht fv tn tm sg jb ky ai en us tl ud iu zj ql u1 ci ru iw tw
+\n fr ub h9 pd ub jk vh z6 wu wh wp 5z yt w9 w0 uy om tl rc r6 ax d7 et y2 tw dz se vf ii m3 lf b4 jf vr qw qy uf es qp en tl to ye ue ph e3 uy i0 jl pz oe qo zp wp ft ka zf qd wd kr qf l9 mm wf qx ef t3 x8 ex rg ev s8 ys it da rw al hn tc f6 fv nd nc ad fj nr x0 bx yq ti rx ok tb hx o8 dp
+\n o0 jq un xu q8 wo qq gg ta oj ec az dl bl wb
+\n o9 ij pq gu gp nv qk gg la q4 nw bo z8 9a iw wu q8 eh wi nt jk ut ys c1 r5 up y1 yl py oy ht gd td db qn cz qw lp re c7 dh j5 ia bz dj qr qt wd wf qi rt sv ul uz tl ta yr e4 tm sg pc jv hc hv lc xg xm br vf r8 na wl ou td wc up rj s8 e8 ir ys ii qk p0 lt ho wb x8 bv lw w1 rz ew aa rv ry gx o8
+\n tt hn gn un db fu uq qf d4 q3 pp ji lf wu bx q8 hx kb ny t5 bn hb ex yf ef yj g1 g2 to yk g3 ej sk hy dv qc gj qv sy bg wr na wy bx z0 rc rm ml ug te qp i5 ue oj s4 im oq qt gx sa gt l4 sv at v3 bq mv wd x3 80 x8 aq xk rg yp en gs us dq ak tz al tx o2 dg f9 kv or h4 jy k1 jo h8 kp lt os kh as tn eu ul tm su an tw sp
+\n za yi pe sh pv y4 y5 hy th jg qy qt ke ti ue qk yy ie cq wl p0 lw mf er w5
+\n k9 bt xu kc me is o5 z9 kb gv ur rc oe sk qn ve wi mm rn eu to ue uy qa xf by t1 td t7 aw up yf pr dk cg zr sc 3d at rw ec rl st zo rn do
+\n o9 z5 wy vi ya ea ee fo gf va ov ww rr wr lb ro qq vr gj nw ru ym iv s4 hu tm wo wp zs br fs wg ej du y1 yt yu e7 eb em dd pq v7 cr um ae oz 0z kc tq rw zl rt wb y9 xv tm tq di eo te gc
+\n tt un qs qn a7 qh je qj k0 o1 wr q6 wy ab q9 qm wr ea er eh pi hi sc hs m6 w1 bv lo zr tn yk ep op es ve xx sb ux hg sa gq qp wd n2 zh wf xf wj y3 wl e7 os u4 on ip kn ko qp s7 ly zn ba wu u4 kh f4 zo y9 q6 oh iw tq
+\n qa a4 gu a7 cp z1 he ma q7 lu dp w7 ea rc ee d8 y4 tw ez im ae bv ii qe vb zt lc lv wm ro lk qr hp re tw yv es fp as zu oe qu qi bp wg cp p7 v4 ek rd wc ar rj tj e8 od e0 pm h2 h4 in qf wu wi 19 bj rl rc ee yj et tw ep
+\n gv qd kj cd t3 c3 ih ws rg mc rx lh fd g8 gh cc vw b7 qe at j7 qo ws wg oy t6 t9 go eb e8 us u5 rq oe zj jy oz cj wb be ei pm og se w4 yu xw su yx if
+\n o9 ub rd hw gs z3 ql nq ru wg jc 1t kv mr zm ah dd jk w8 ej aq ig y8 pp fj li wq jj cc qr no wy wu en bx yr qy oo es fy pd tk ix ph yr sf vx pn p2 jq fs ed oy yk os ie s9 u5 ak ud gd uf kb xc u1 xm eu xw 19 wn vh w1 to ee er aa rb rn ru an r1 ei
+\n se kl 7h b6 xs ym tp an ta qb gn uo pt xi cl qp qy op vr ym ri ti tl i5 e1 e4 i9 ff i5 qp jx ht ql uo en pe ku h7 iw wn w4 ey ia si
+\n ql xt wi k6 ew sf eg up eh oy sq ja g9 i3 qe cv l1 qq bv w2 la eu wg ec ef oh fs tb pc xd qs nl qu fn dy oi iu yf re fc hj hk xv zn zz w1 ew
+\n po al hm qk jt cd ju nm li rs w9 ev ut ea 2f r4 d6 ey im pa nu wr m4 is bc xz w3 eu tb ha ft p4 ti to hr dy af i6 iz r4 jb x7 wj xg na rf gi at pn gd re wq qz ze bo wc vz sm zo my ye u7 oh dk w5 is yx tw fe dp
+\n jl za gk cm wu vq jc zc iu mb oe fo fp ic sc 2l hy qr eb p5 pf dq pa fy lc td sz oo aw u1 rj fl tz nx aq xx oz xb 55 y0
+\n uq wr lh jv ri i7 ss qo gy bt s3 u1 dy ox hg it
+\n ps hr lf jx bn qq up eh ab yl pn jg ng bz gd qr yw i9 j8 zi 3v oz at hd cx oj u9 rt uz ro ov
+\n sq ga ny se cj id rg r3 pk kv ee sh ek dk sz pp q0 mn az kp ei qi ry em ph p9 gw hc m0 cp ea mn yf t1 5y wx ol e6 ec u2 e7 uh uj uk av ql lw qx zr qv mw qg cq ww wb pw tu w2 mf ut gk af yo ie ob
+\n hn um a6 q7 af du r4 up tp ej sk lo le m8 rp eu ei qi ky op of tp ur oj hu tb dy qu gt tf oz wc s7 e7 ua pw ax nb wx wy fj wn 18 wv es yq ok w4 uz yx yc
+\n pa qg qh q4 fv qz kx q6 cp gb c6 pr eh id in qw we bk wn qq b6 qy qu es ic s1 og gn wp op qf ic ro os yp rj fj ag oc ay da fv wl qp f1 yx n7 ea w2 ly yj iq iw rm o5
+\n o9 ps d3 lp wr qc md e5 rk w0 pm gx lf ku qt qp to tc pk fb tb qi lh nt yd vt ot ra tg gd zx wx vj rq cr hm ma jp vg u8 rt ei it
+\n dx dv h9 rf qf uw a8 qh uv k3 ri is yr r3 eq uu tz yn y6 qc ps jf wq xe wx lc qr j4 ku xx nb 4z sr tr uq p6 uz of i6 s1 fs pj tc hu qu hz f1 hp lj s4 qx tg yp gs ob tz ds sw pm ug hm ip ql le vl wq tb xv eq w2 yg w4 st o6
+\n qd q4 pa z6 qz ia 70 r3 mb iu es r5 gh t9 cj vz qw mb ko vt qr qt gh qo ty eb kq n1 xb ef rp ek gu rg s7 rj sn ai hg o1 uj pr jt fg v0 tq tx ww bj bm ct w1 zi rn ox iw ri
+\n al rd w8 vp yd yk r0 pi po se sr qa l0 qk ir e9 hm kc rz aa w6
+\n un pq qd a8 z2 qk z5 ws bi xy qx wg wp t4 mj gv qm rg c6 w7 w9 es y1 g2 ej yz gg qc qn wq qw m9 wx qe kr 27 fp fq m7 xp 3p qr rr tr ij il eh au s1 uc fx ut qu sj j8 j9 ya nr rz wg wh eg x8 sl t7 yu vf ay ds ap re dh qg qh qj hz qk zz qx k3 cy iq ox qv eu nx n6 6r lq n0 y0 uq tb sy iw fm an
+\n yv dc qs gm q2 cv ok wt b2 cj wu mr zj kn e5 iu pz r8 pe fp ot tq a9 y5 sz ez cl wq qq wv a7 ln ky jd qe qr yx rm qi ea ln te y9 ev en eh iv tx e3 as tn j8 wf xh co fl nc wk xz es rx ee wh ub aq u1 ar e7 up it iu o2 wl ko jo cu pc wo al hm uq rn ul yz ro
+\n pw na wu jd yf oe qr xr sk wa hw ql wg x6 s9 u7 am
+\n uv tr ub k7 qg he u6 jt gs z3 by tn bi av z7 jc ck q7 2n ny cx km mk rf pj xi lh sf up yj to ia ab tq fq pm fd qc qv ps su qw fu xu cm zb bc qr qt tn ei rw gl p1 xi qo tt ed ef ri iz yw oh tc uy tv as qu l4 qr t4 wx e5 ae op oa em tz gd dq rw ug dr ux qj be ko cg nl je aj xw q1 vv ax rl w2 yt aa u0 eu ah
+\n dc ph sq jt ql un q5 cg lk w9 ur uy pz uo sx qv qq cc ln fu ym ho su pn qa bq pd wj wj yk ou wl rk o2 pt uc km ja wm ry rm ob
+\n gb pw qf we q3 ls q4 sy bl lg q8 t3 wl rg ed io ef if oi hp lo kw wy qw ei yz rt es p6 fp hi qo bn qw wg cy np uv yy oa uo ir of em ug x9 qh nj n8 ea u8 er w6
+\n ij dg cd lw gk wu zl dd eb eq sg ia am in wq xt nk wr xj qq p5 pd pk as sd fn lj jw fk l9 nt wl oo fj sb u4 gs fx hg o1 dr fb hj h8 xc yq ch er e2 aa af ah ob
+\n a2 o0 hn pd iz hw jg q1 jl qz ip le me wi bb r3 z7 g1 eh td sw g9 qq c9 vy ud qo es ec tj uw dq ur hj dy oe zp lk l5 fl wj ys t2 ej t4 ek rs sl yu oa u3 gd pm rw h1 pr h2 py wl 2p s7 wq 6r mi 10 ox o6
+\n i3 qw ee ur cy nx r2 wj t2 ub ir aj cl qm u0 oz
+\n qd qn un qz xy nq an kg hc c6 w8 93 eq ts g9 wy mg w3 rb 3f wf rw kt op es ef at em s6 pc wg bw x1 xl wg hl yk yo eb ud hm hl py wb u4 zp bj bm se sr sy ox am
+\n rc ix qs ls qy at ut pk yo ys ec hs lq xv ks
+\n yb al zf ws cn ac ih th ww vb kt b3 xo qe qi te ea p8 tn qd ci ix xk pk bg rc tl f4 wb rb ru
+\n iy qd a5 jq jw qh sw fv oz cj hc qq ya ee yn pr av or us iv fa qb q9 bh ns d0 qe i1 b0 fh qy qu qi ry os ul hq ri ix e1 ao p0 qt sf qi uh ll ko lx nz sg jz hq sh p8 x3 wg rd sx yo yp u3 pv rq ds tc rr wx lr xb wn ep hh bk yw q6 og yr yg si tq do if
+\n hv qa qf jg he q1 kj qz bh lr kn rj th kz ef eh av pp i1 ar gl ur lr bz xp yr ze qt tn es fl hw s5 qa ed t4 wz sx rg sv e9 fz hf al h1 av bg ym ee yg
+\n k8 nn jy q4 wd lf xu q9 a1 4v yd mb r6 yh pb ta g6 dn d3 pl j1 jk wc cn wy 26 rr te ti fa e4 uy fb gr hb kd lc qf p5 wh au fa iv xo hf ot eg ra wv tp ec yo ah iu pw hj ac h3 py k2 u1 wb rl rz yt er w6 ru af yo ep
+\n qd uq qh qm q3 vg qc c5 rd vp ut eq on yn ii xp up r8 d0 sz qx ue pl lx qe wr qr lm nh qt ha qo ki ri e2 tx iv ao s3 ow kp xf rh ya r2 rk cw nt by wd j8 t1 hk y1 ns t6 wc ev sq rq yf ux aw ch qs u2 zn sm rt wb bk yq dh 8w w3 rc yg o3 yi ox ov ir
+\n u0 q7 qb ml or nu b5 1l xb tr tp in qt hz so v6 dq o2 qh wl nb rv fw
+\n ss jr zf zh xt oy hy aw y8 js ob wq ny or vy fi en tb qi j9 gt ib ot oy rd e5 y6 tg th pt gq wz rt rl ew fm ie ri ir ro ah
+\n o0 qj h9 wy ee g9 gk jd fg qt 3d fu ru iz tl fd tv ad hl wp oo wf nb ez sv tl f4 dr oy rp
+\n ak il k6 qh q2 vd k3 zd bo lj k7 km 5c ut rz yd up ua is r0 qn zq wq j1 qe cv pw fu md bw yw qq ra rw qu ex ik at y0 ru ti yw fz ic ao ow gm jc i7 nf p4 fj xg kr br xk bs mb pk hl wl ta ez sv e9 us om rw ap gq wl k2 qz h8 gu kf et ru tq ag uz rp
+\n yb az dd fu rf hw qg we u9 o3 q5 q6 ag c2 o7 wa kh w8 vo mc yg tu ua uh ta tw ih hu fj su bg ww bh kw ry ru wy ky wu wi fw 20 b9 qo ik oa ev hw s1 e1 e3 fc uu s5 tn qy hz jc do ou jq gb kf pf xl x3 yv lz iq eb e8 os sn fx dw qg ql wc ka n8 gf ly se tv yk di si o7 r2 rp
+\n il mj vi sd ia y6 wq rm p5 ux ho nr ef ej wq iq fn
+\n ft cs uo io er ic tw ig mm c9 xk ab ze uw i5 s1 e4 pl ui f2 lj p4 sf x4 kz ej ez eb ov of rw dy av qh f0 h5 ki qx cx eb og gk oz uc
+\n ul io zd kn w9 y3 wt qq wp jl i9 jk ca h5 wx wb tm do
+\n iy hv cs a2 ee yz y6 gk kq em qy uq ts w0 rq rr vt pb nc q5
+\n qn q3 vt vu yk ej fp tw zm qq qy y9 hh wo wg rh ep x5 wk mr el l9 av hz w5
+\n hq qz wy cx rh ur w9 e8 r4 fq im fj gj dm qn gl jn iz l1 yh mz rw e2 qo wh nt wk zw t7 e5 iq fh eb sn ud az uv fh sv dq q1 ku zs eb ue xq rn o6 do
+\n ub lo sq wr d1 mt o7 ts t5 rd xe iu yg ot gg se pp qc js lu xt j3 j4 wt pc vz 5o yr qw zw qr eu db sy eb em fo i0 ad gw m9 ig ih lc od n4 pg rx bi ni kq wl aw e7 az jo mk bo wb ei mi ep wb eq di do
+\n q1 ub xt db wt ws ik pl ee or to ej ic is fr jk ls c9 qq yg qt eo rw tp p8 dy pz gm hz or xs bt x8 t4 t8 s7 oj lt wv vx u7 w4 et ox yo
+\n po o9 ih dx qa rf qf pd d2 kl ad lh kb bd qm bb b1 z8 ew d6 yg d7 ym ti eh ic iv oi y6 sz dx qn ut qm gz pj zw jj 4d bk wb lm xb ke yx oo qp yb yn en fo yw fp e4 aa fd jz qu gw qa zs nl v9 wf qt qi vg ni wx hk 9f sz tg t0 ga de re io av h2 jt x0 h4 wx wc fg rb rn nc yz iy zp ds ep zw pr xv rz yh yk zp do hc ep
+\n hb ty z2 qz qz zh gw mg kb ve zz ti tp py el jp tg qc ar qv gx la qr cn lr nd ng ve qt 6g ml op pd uq uw eh i8 uy dt ho j8 wp wd qe xm w0 x4 qk el e9 pb sm pn tc gt ce oj jr mi ds wb ym ew u8
+\n ij yb hn u7 cd gj co dp lp b2 r5 ed ti pn qx g0 jb jn jj we bl ri ot pi rb yc sv ty oh ph hh e4 hy sd wp ll ft l7 wh ca ys wf wb t7 sv uo sb sn ha pb sw de un qc bz wo en as tb eu af eo
+\n d2 k0 wr q4 q5 c2 sj iv pm g8 m1 l1 5s ij aa lb xm vf ej ta ar th od sm cw gy bu qd q1 u8 ry rn
+\n qa ux q3 mj ex yu zx rk gi rl ya is py am tw ja js db ps dn qb qn gn lc pe qq vr qr eo qi ec oa ev uz yq of in ho qo jj jk wk wd zp wf lz t8 tk ha pv fz pn ug o2 pe uk kv gq v7 oi qv wv dj tv fn fw
+\n dx a3 k5 um uq jd og nn q5 qx cu wp rd ws d6 px ac oe rb up tp ej ek ih ff qc gj qm xk b4 dz jg sq jh eu yx eo re es ul yw tp i6 pj ho qi qf sn og xo yv pk wj wb go ar uo eb ir iy pq uh qg h6 vt wv sn n0 rx af uz hx eo
+\n yv ub ty gn gu fu dm ca q2 d4 cn ad iw k6 bf zl zz 2o w7 uo ee yk ix g3 am fw oi jo se ha vs qn iy qq 24 bl j6 g4 cw jv 1l ei qy ke j4 qi ep of ao hh tb gm sh lh vc uf vu wd p6 xm qt kh rk l9 s4 wh mr t4 oi rf iq op ox u4 e9 fk u5 it re uk f0 kb nd qk ce jp lr cy js qd qb sb tq n7 n8 ed ue tn ox o6 id r2 it
+\n qa pa jd qn qg jt gh q5 lg ag qv ah qn vr da rh w7 b2 rz rx d6 d7 eg eh yl a9 ek dl tw sc hp ha su gz lo qe le ns kt qy qi 1h kp mz qu es yb yn p6 eh fs ok as im dy px gq qp qs l6 iv rl zw dr 4r hi wj rp t6 go s8 e8 at e9 f3 ak dg f9 qh pt dz ww rv wb oc pv be wq cs q1 xr xx eq yr u9 sr tb yl tq if hc ig
+\n a5 co dh bt lw ck lh w7 3e mp r3 rz yf yh uh eh td y8 fg pa ar va dm su q9 d5 qw re vh he jc 1g ib xz qq qw yg vt rn rb cb ry ym em i7 hr ff f2 qp rd lx wg lb kh va jv qi xd wh wc el un sz tf gu oz ae e9 e0 iu dr io dt fb dh jo um wx s5 oa kx ly rn oc zy f3 hb tt wb u9 oz hx if ig
+\n ak o0 qd q7 eq g1 y2 pt dk g8 qb vs qe dh 5i pt yh qo ul tp oj sp oq di uh zg xn rx tp tf ie f6 cg rv zm xw zq 5f md sr yk ru ro
+\n a2 tt ub rs ij ml ow pe el gd va ue zm sa pq lc yw qi qw lv ep qo uj ym tl ye hj s6 uf qp 82 fk y1 wl oi t8 fk pb tx o1 sk lm oo xv n2 ad fk n6 dp on q6 rv
+\n qf jf kk nm oz q7 b2 xo fw kj rh ua oe yl gh vd qe gn wb pt wi z0 se gj 48 of i5 oh so hz wp ae wg nc kg xf ev pv ov au iy az f7 qb q5 eq yr tv yy ol ry o4 oc di ep
+\n po o9 dc a5 jd z1 sq ws b7 ti r9 sl ez aw tg zm si ng qe ky b5 pp eb od jl ff oe ce qp gy yv qk r4 xf kw iw sn tx gg uh cq ql qa 2s mt eq rb dp
+\n qs qz cd dl se q0 lv eu yi rw qo uh uj ul en tx wo qd e6 pv gg je zx kp qc q3 ye en
+\n un qs qh se ws lf so eq yf ef y3 g4 zb hs q0 no qw j2 y0 uu fb di f1 kq oa ul t3 ot fh ak yf fv dt f8 jo sx wx at wn cs lq zc
+\n ub qa qs ik pw uq a6 pd dm d1 qm d2 qk cv zd bi wd ne ah qb kg kh ij 1p rk w9 wt r5 d5 px uf eh yk oy pm i2 hp st qn si qm zw we ls px lr ri qr sr db hp qu xk fy os eg en uc ur i7 sa hp vn qs kw dn od rh xj w9 wk ph ap yh el oi oo e7 gp ay s0 f4 gf az jy qk ql qx 1k v9 qc jq zy n5 kg hd ww wv bj hj ur er rn ry eo o7
+\n df a6 dn je ql no q6 ox wo zl bn rh ya mv e0 yn pr gd pi y8 i4 c7 g1 j6 wo rv eu xg eo c0 yx ea sv os wp qw wl ou un t6 u2 os of f6 dt f0 jt wc ja ae qv rm ds pq y7 qk ck aa ux
+\n db iz jk zd wy wh c3 zk 2o rj hw vp on ed ac to g2 r0 id ta th qb dm pj m8 np oe pu bb tc gh ml rq uf tu eh ye tx gv pk jv j8 lk xs kd fi mx be wd t3 mr wk wl td eb ie tz dw rw pm re fb dj h6 ql wz wx qx qv u3 vz xe ex 2w ty ew xm oz an
+\n ty a6 we wi ro lj bn rh r5 g4 aw jd q0 gz xy m6 wu qq et oo ex qp tr y0 fi au e3 oj gm px lg wk tu ek tg u2 ov em dg uk nd qj cy hp wq mi bj q4 ia fm r1 ei ie ux
+\n tr qg h7 qk jl kc jr am mo w8 e8 td gl kw sd jo qr vl gs qe b9 mm fh eo uh ft ik e2 i0 uu ff qu f2 jq v2 wg kg ek aq wm yi yo s8 e8 sq ab cw wt ck pb pn xl bj yq wm ew xq su r2
+\n qk wa q6 jj ws ut gd gf ly ec pj sa pd wl e5 wc da kx zk zz zb wv rm te
+\n jq uz nv ql as jx z8 q7 o7 yt rl ea e0 ym y2 pr ia sz sq sr qq qr vk oe pe lr bl ll rm yx y0 eg ti e1 ue uu ui jx zd oq kd rg lv lb r1 fx ro me ts ay f6 fc io qg py qj qk qs ky qh y9 ok o4 am
+\n qh dl jt wy a2 yk y2 i4 zq kq we bb dg m6 qq zw rt ta tc ff xs xd qf g0 1d yg du wz iy sw tc dd hj hk mh ov zi wn hk ee yj af
+\n ra ak uw q3 cb ji wy fw gw t4 mu ts qw ww rj vo rl yd ug d9 gj i3 zw qw wy md qq bv rn qy pd tl ic p9 hr dr hh ui sf f1 i6 ws cy es ef t5 kr ek oo t7 ec e8 u4 od dq ji ch jr zm jy q1 zp yq 2e og yo tm
+\n tr tt qa qd jf pg qh jr sw ao q3 qz za wt js bl vw q9 ws uu w0 ya pl yf rx ee tu r7 gg dv it lo ww up js qq qe lz eu qy rr yb ri ay ye ta tv im sh ss uk qd qf bw ro sl wl t5 e5 um th ha fx re ii fv je hk ot cq km h8 ks bk vl qn xe te tt rl u7 iq ry ag dp o8
+\n sa z1 qj q2 nn wr z5 mq xu q7 gv t6 w7 r4 c1 mb sd ed ym ot ta ht ts tw gd tf g8 se ar gh fh qv qn zm hs qw qe oq wc xj vz xl wo rn i2 sr rq at yq uw s1 tx hy fm pc wo hv gy vu wd lc ul p8 wk wk el oz oa rh gp pn gf fx fc f7 rr dy x9 uk f0 py wl v7 cr ch qs wv nz lv lu 5o xe ym ly er yi ia gl ox r1 dp
+\n uv qd hm qf gp k9 kj we lf bs ej 2i el wl t7 rj w0 rz yf ys r8 tp py tq tw dl im qc db qq sd ry c9 oe if qw aw qu uh tt p5 p7 p8 oj zi oe qu qi lk j9 sk zs ka lc wh wk zq mq vh t2 ej r9 mr ez t6 e5 op rk ga pb dq ap f9 py qk qz wc kd pv bd sm dr u7 mf o3 yk di r2
+\n po a3 uw q5 q7 ck kb zj td zz yf jd wq xh ld qr w4 p1 ij fu tp qq sv y2 yt t6 e5 op dw iu pw jp ka qv 4u qf rm vb w6
+\n fy a6 qg cs z3 ql dc jz wy me cj o6 ba kv wp w8 ea r5 uo fw ib ig g7 gg sy bg qr cb cq ro xl xv ex tt ru pd hg im oq gq ao rl pl aq sz t7 e6 os uf ug gg pr ql qz vt mj px wb ci qf ov be bg ww mp mi rz u7 w3 ei yc
+\n gh cm ca rg uy pm y7 g8 lx yc qi re uh yn uq eh tz ph wo cr sv fp kh sl oi sx ov ga iu h1 je fd rv qv wi jy yy ry o4 tq si
+\n qd iz qh q3 cg lf wy xa ez eq om ug eg yj fo fp yz qx qe wb or jg xb c9 p4 tj y0 iz tc oj tb i6 p1 ka zf qe yp wj mv ra ez rd uh pt zl lm sz wc lr bq oc zq sr af gl ei ux it
+\n a2 qa h9 qh q3 fn kx ve wz us sj yz fd g7 vh c9 xq xj ln tz wp wf kg kk by vf j9 5y un yi e6 rh sn tx hj kn rb
+\n un qs fi qm jk js bd o8 bx vt eq ya xp yg pz ym dj fp tp ta oy dl qc cx qq m1 rt wn d0 wm yr qw aq qt tb ha p1 uk yn ef tj gv im sd hk pz jx zi wa wf ba l0 wd mq ej wv t5 ek iq pv ov f3 em ak rq hn hh f7 uk qh ot ju ng ji h7 wz cr v9 bj bk rc er ia is iw ei id ov pu hc aj
+\n pw gm qd jf u6 z3 jl q4 wt bi lr id wa wz w8 ya ev ew r6 yn ee io r8 ip ej td im si j2 jo d7 m4 pv iv yg qi il ti i6 ta ib ap fb hz wd vu wf wh kt kh og kk nm rp ti ek ns t7 y5 wc ae ir pv hf ub wc ho wb wn mi rn w6 yk tm te rp
+\n o9 un h0 a6 pf iq xi tg w8 z7 r5 om oq eg or y4 sr fh zv vw zq tc ws rq db rw eo ym tl fv i9 pz jx j7 hx oi qf x2 l9 x3 qj by to el sx ys yd ao az uj hl dl tj nz wn kg kh on wv w1 w5 gl ei
+\n gv az ql nm rc r7 yl ja zn q9 xw no iz qt pk x4 l4 tg u3 of zk wc go qb mo eq u7 tv rm ig fe
+\n o0 ik qd um qg k9 mb bu wy bx ny ws hm ea mb iu pe eg ey sh uh g2 ic iv aq td qx ja qb ha 2j lp xr wc vl wn wi sl tx qt rq ec vw of uw sm ic qy j7 ns hb 2q kw wf vp 1f x1 5c zs y1 rg tg oz e7 fh eb ie up e0 ap ve wq zz cr wz h8 wv go ly fk az pr rz h4 ew w2 ok w5 ia si ro am
+\n dv d2 cd qc zt 25 xp wd te es sh eo wn f4 wo tv oc
+\n uv qd qn hr bj b1 mw lg io sh lo qq xh m6 28 rn xx p7 im qt jn jw bm qf r1 mn ny ed em ii dd wn cz ds vc wb hh q4 yw ur rt ie o6 ux r2
+\n rs dh z1 jr cb vq r3 eq om y1 sg r9 if tw qb qn m2 vy dc b0 ik fa ib aa jz qu sg qs 1s be of w9 oz sv t9 oc pv rw dd o2 dh lq ka lu 1m qk q4 y0 ye yq w2 w6 si ob it
+\n o9 db iz fi qn qj mw 3v wp li e3 km zz yy mo ya rz mv yg r7 yh pc or r9 pm a0 td ih db lt pg jf gl re ww qw m4 j5 xi wi yd nh yg qr rm et ey ug re rr y9 y0 sb tu od ay of iv oh i8 ok uu sf gq lg wa nn uj qs cu kd xb wg cs 3k yv hk pn ii hh cw km wz wx wc n3 jr wm qn bd zo mo jo wm q6 w3 rt an hx ah am uc dp
+\n ps pt vq kc bs vu vo xu ee ib hv wj x3 nu ud yf qh wb lb gz
+\n ra o9 qs ty rd ps db pu qj u6 k9 nb qk oj ql wt jx bo ri xo o0 mk rh bm mj ut mb rc yn et yl pm ih i1 g9 qm xq oq bj wt jp xu pt bc eo ep qi ky sv uk fy ri i5 im dy hk ui tm f2 uf ug qd kf nc s3 fs 12 t2 ro du wk ek yt ej t7 s7 oc ay s9 pv fl s0 gf tx fc ac py v5 qk ce qz cr jp ck hp u3 zm xr ii yv ea cf hj ye w3 tp do tw ux
+\n o9 qa ik wt kc q8 wk sp yy w8 w0 ys ea om tu yz pn fe ae g9 ps g0 i4 qb fk qn qm ut j1 d6 4d cb vj vk xy j5 be wi ve qq gf qr j3 ug qo p4 sm s2 ut fd pl qt jz ui qy qu qa nk fj iz xh wk iv qz fb ro x6 ti sl rs hc oi wm us ai dw o1 hh ab qh qj ju ng wl zr vk tw 5a vc hk md yr u7 w2 yt tn eu ul ah
+\n uv ra qs ty jh q4 o4 bc vr o9 rg jz mc r4 ui g1 ey g3 sj am sq fj qn it xt ln dl jh b9 g8 dv qt yz ea ue ss w9 wj kk bi ym tg t0 ob ys iu uj qk nf v6 nj ox qb wy mw 1n pq eq w1 rx rp ge
+\n a2 pa e4 xy yd sj vs jj xj lm qy qp ri ux p8 pj tv xs wd wf oz gd sw rw uj uk qj k1 xx um eu bx my em ey
+\n az h0 qz iq bl kb xp yf y8 qn rv hx oc re gt k2 bo qg cf rl
+\n yv uj d2 mq hx ws w7 mv yn r8 ab an ae jn xw al up be qr zr ep re qo ec ur ap hp pn wp i9 rf wf vo qk t8 eb yd uk kv ww wx wt ox kh mi eq yj oz fn ie am rp
+\n ik df qg jg k9 wy kc ro wi ve bb rl ew io or eh sq oi qc qe d7 m4 pu gd db oo yv yq ix eh fl pg ib hu pl cr fr xd cy ke mx yh wk ag hf hk qg sj we mz gp u4 ak ma rz rv af ox di yx ob
+\n hn pa pw qd il qh q1 z3 wr t3 wo ws vu uu ld pe fo dj ot dx pp vl qq rr ls j4 fs dl ve c6 rq ln xk ec rt ty ik y0 tu yq fz s5 sd sh jn wa uj ws lx qr ca rz wd nu ek yy yi y6 uo os up f4 fz qk h8 qc wr at 18 ca ww rv sy ox o6
+\n dv wu wo uo m2 we rp b6 qe ik e1 bq w8 x5 ez fh u4 iy jy wu li f5 u8 w3 yl te
+\n sa ds qd no q5 ra jd qo ru r7 uo ar ud on ak fv dg wl qx qv ye yl ep ge
+\n ss rf qn bu gj rj uo yz tf m1 kw zr oo y0 pf tc dy qu v6 xh t4 oo um df je qh dz v8 ho wn wo 0w dj rv o3 du ro
+\n ij uj k7 me lg ih hv ws rj pl sd uo y1 yk d0 pt y4 g4 ou tw sq td fj ha qm qq 4a kw d7 xy m5 bx c9 yx nw tr qo uj fu en p6 s1 ht tb qo zp kq x2 wk wj wk yt wz sz ae iw ay fk ao ug pq qg k1 ql xx qc cl qk 56 bn oj yt et ut uy tw ir yc
+\n k5 pg cp z5 wr no zd tk ej an qx gj i3 su we up 3q yq fx ib tv qp ik wj yf u1 os rk jt qo qx n9 w1 rb
+\n k9 uv gs wr 3b mh km bm we w9 es or yk r0 g5 aq gf nq qv ll m5 yd zq qt qp sv ed p5 of eh i7 pz hl sg jn wa m0 nm kf w8 wj de e7 ar iy pn ly wn fx w3 rb ey am
+\n o9 d2 vg gk ex rf rc hy qm j4 ga qw rm ls yl cm en tl tp fp tb i3 qy qo j9 vn zf wf qg mb kj qi jb mq wl rj s8 lw um zt wb f4 xw f9
+\n ra go ls qx wi c6 b0 rw g1 yz fe g8 ow qq ra mz ex oa fu iz tl uc e1 p6 x1 tf rh tk fz ap hl qh k3 xb mw zm yb yw q5 aa rp
+\n yu qx sc xe j2 oq gs i6 i9 l0
+\n yv ss tt gu fi qj bt ql ls io nw gk hl up zv gl ni xt wy dz qe ud nw rw qu uw e4 qy px qf zw za ty ek t7 pv dg ho wn uq rx yx ep
+\n ga 1w ld wy o7 xr pk r9 g6 hu jg lx sd no xt wr zy ku l2 nw 9r rt i5 to tp tc s6 f1 ud ko xb rj qy es t0 f4 fx ii rr hm hj fb ji oi n1 vk ci 9e mt yc 2r tv gk yp ux
+\n hb hn k0 wy m4 w7 rc ts y6 j3 qe ve qy rt so di qo dp lk xf mq wl em f5 pr wl wn 3k ew yt w4 ri
+\n qa ss lq wr bx t3 r5 ed eg sx dn we 7n ra qe b9 rm wd rw eo oa ri e1 e2 ut ap hu qo ws uz ai tz nl cu wq ln wn ie aj
+\n yb rs un hm dg qm qk ao mw fn kv ur uo pj e9 sf ia tp tw a0 td sx fg su xq m1 om na vk wy xk em l1 z0 nh b0 mz qy p2 ru au iv p9 pz ug lz xn wf xg fk zu wj wd u1 e9 tl ak hf sw o1 pw dt gq v5 lm h7 nn nl wq uq zt o3 ad ry id ig
+\n a3 pp dv qs gn u6 jy kk io dt wt ck rg ua yd ya yh ax ac y1 pe pc pv fp fw dc qv zb dn q0 ju jj m1 ui lx qe qr t9 ja he wi vw m7 l1 jn qe wa qt xg rq qy yi qu p3 yb ed en tz so s3 tb ho fm px gw zo lx wf sm mc dq wj yg l4 uv yk tp t5 wz ol fk f4 pe kc dj wz qb zm wi br zk ww ty 6i vm eb lt eq w2 ey yp yz o6 ei
+\n fu ga io nt wp jz yf rv oe eh pt dz ih sx qx g0 qm hf xe lz gc d5 bh 2z cn d9 1r sz li bv qe 6d bb er xv yx p2 ea tj p5 ay uq dq pg oh qt s6 px sh ko qa nn oa bq cs kk hh cr wg tu y4 t6 e5 oz th sn ov u5 dw qg dh uk n1 zr qv 3d n4 yx xe wv eb h4 yo ro hx o8 rp
+\n ra gu hm a7 jw qm qh jr gs k0 ql xt q5 dt ru wy k5 wh fw lu kb am m6 bx vy qq ev rk 2s mc yd es io pc pv g2 ek tq tw in ih ae qc d5 ui qe wt qq m8 vr nb ee hu rw rr tt ed fi em e1 e3 hh hi sh zp qo wp l4 ws qf qf pg eg to un gu u1 t9 ox e7 u4 od ds de hh py ql h7 gy js vz gf y8 uq se do ro rp
+\n qs qg gk ta bf r3 hw r7 r9 sh ua g3 sq td g7 ha lu qw xr wy wu rz ko bb i6 uy as di qi za hv jw rf 1f 2u va ap qi rc du wk yt t7 u2 ob re ax v8 cg qb wy wn kg pn yi rn ru
+\n gr ra jq qf go ga jh gs q3 tm q8 k7 o8 mj ym er ip ua ej hy i1 dv qb vg cv m5 wy xk g5 wi ng w3 3w ud rw ug ep hq ta fc fd aa i5 hx qp wp v7 qs l6 l9 l0 jn ty t6 ie rj tk od ys on pq tc zl nh qc xv wc cu ks ei lm vm cj yi ad r1 si sp
+\n qa hm a3 ac q9 na rj if qw rr vw tj ib su qu wo dp j0 wf pf 2y wk ym ra wb ae ga gs f8 gq im ar pb ec f9 yu rm
+\n t8 ej an y4 td ez ln z9 lj qy sm uw dq us cp nu tg vn
+\n qa ds k5 hw k8 k0 ql hl 1r wi fw c6 w7 mz rj xy r4 e0 ym yh eg r0 us fs ib oi qv q0 ww lp gm ln bx nc qi l1 qe wg ea qo eb tk eg to ur jc oe dp hv wa 2q nk at rg wf wg ca xk jn pk yg er ot uv wb aq ol wx e6 ev sv uo vf eb ah ud da pe qg jr kn ju ng ae wv n3 iw ly kf cl pb wv tt vn eb vm u7 ew aa w6 rm gx r2 o8
+\n q4 q6 vk d6 eg pc pv r0 tw i3 q0 we tw sm e4 ow sn kg up hm qx zv nz wm u9 ul ri do
+\n po uv dc qs qd hm qg q2 jj sw kl me q8 xa wa xf z5 yd r4 rq sf px ti ia r9 yl dj dk ek qx i2 sr qv qb lb wi nf wu qe tv fh qo rr yb fy eb ri ai ok qt pc ud qi qa ws qs lc zh nc x2 cs t2 di ke wk sz oc yp s9 ys ai ln wz cg wc wv os qb f2 ec y8 dg wm rz yr ee rn sy du su eu fm ei o6 dp hc
+\n ft qj q6 wq up ut lg er uw db ll ws of og e4 1i r0 wx fh th vf re hm zi
+\n a3 ty pw ph cg uo r7 oi q0 lb c0 vl xx mh hu b9 qy tt sv p5 eh to hh ow tm oe si sk oi gt kq cu vi j5 wf el tf yu u3 ya uj dy qh ql ct wc el y0 o3 o6 if ge
+\n a3 dd by wt lf 2v bl 7c bn cf yo go yf ii et ey yl aq aw g8 ho i3 qb dn qn lu vf vg 2k le ml wy t0 4h xk qw b7 bb eu xr qu tt y0 os sn tl pf og tz tx pl ss us xd cu oa qd xn ke qf vp kh ny wk r8 ej rd t7 sc e6 rh ud tx al gg re hj ux qj gw xx zv xm iy ca vb yw en oh u9 aa w5 w6 ul oc an uc
+\n qa un dn hq hw d1 jr jy kk kl wt kz zf z7 cm q7 me xp wp mj rh ue e7 ys rz eq ew ed xp ee yj y1 to fw aq po i2 jn li on m2 na vq wu ck er yu db yi gl ty eh uw fp tx e2 fs uu sf jx oe jb qp cy bn qd wg 3z nu j9 mr t6 gp pv ha tl ai fx uf fc kx qh gw xb zt qv qb ir cq vb y9 ct ol fn ah hx sp
+\n db wt cm ch jc wi dd ys on td po y8 q0 wq kt eu tc tv or fr s3 na e7 uf gg re f5 tt aa tb ie
+\n a5 jw qh q1 qj oj xy my b6 es yg yl y5 zm pv qw qt qo ea ri ao in s4 gv i0 ad lh wa qf gm rk vs oy r0 ez ab lm qs qh ry ox
+\n ga ca z6 nr wo rg bm vu uu rj e0 ui io pe eh d9 ab tw fe tf fk wy ln md rk sk qq qw hy kp dc qy y0 p5 p6 p7 ic pg e4 jb ge wp qa bn xf ks zi oy e5 um wx ie yp fv je ng oj ja v9 bp qv er an pu
+\n a4 jw a8 o1 q3 un gh le nq q8 ig rg rl ea io er tu fp dk hy sq ae lo qq wt wy 5i xj cw dz oo qo yn ty y9 y0 sb ef tj uw ta ur i8 tb oq af hz qi wo sk zs qs vi wf kt nb y1 oy wk r0 ol ex ec tg t9 eb ap fv qk ji cr s6 et xq bf ep mi ax rt yu iq af am yc
+\n gr uv hv tr a3 qs lo u7 jy qz kl z6 gk gz ag kn rg k0 w7 wr rl pj ii yh up ac ot d0 g6 td y6 fr se pp dc g9 cl gx qw pl m1 ii qr vz oy nf eu eo p1 os y0 ri ix au uc ai fx p9 nm jj lz pa kw ul rg gw qh 4m eo qc l5 rp oy ej un yy yu t8 sv ud fx ac dy av gq qj ve sl bu th u3 rn nz n5 zm yz bd tx el ex n9 es rt rz rx ol sy rn yo
+\n ph cb jx wu ib vb ih ty oy tl vu
+\n df qd k7 z2 q2 ju jz zf cm mw yr gu rx yh ym ef pc qx jd q0 ow pw wt rj xo mf xl qq qw ud tw ku ik oa od ti hk f1 xs qd wf dm s2 ph xo ou sx ae iw t9 eb u3 rk ak hf dw ax oe zl zz wm sm el cx cw lq za tu yw rx yu rn fn yi ei
+\n a6 ql wr jx z7 wu xi ym fo if a0 dv ww lx zv dk tu sn hh ff hu zo ws rf wf aa ni kq uv t7 um go e8 ob sm tz hl uc zz ol lr kc n6 bk ry if
+\n qs gm tn rp iu pi qe ec to l1 wh ra wl it kx fd vx q1 ri
+\n gu pw qg we d4 ws q4 cn q5 me qv zj zl ex wr xo yg r7 eg et ey us iv po aw se cx az lb nz nc qq ew rs rq yx ep tj uq eh fz hg gv jc di wp sa nc ya cs fv qz ti wn aw e7 ox u6 pn re o2 hm fv qg hl dl v9 qv tz 6y rl ye rx ur tn eo
+\n gr qj z6 ld tm jw hc ed y5 se ke ht tn jb 12 yt ek ao io wv ew ey fm tw ir
+\n gv fr ak o0 gb rd dv gu qf qg qh jg ux qj ph k0 oj wa jz bi ja eg c1 fe qn b5 rs rg b1 vo z7 us d5 r5 ii tu yh y1 or ek sl pm hy dc th sy ww ze vb wt m6 iv mj qe 6f qt gk tq ru yq au ap dr hh qy sf qa ik kt wd rz ej t3 ot ej ub wx oz th s7 t0 ag ga pv em fz sw o1 ip qh nd h5 et ho cu yz tq wq wn et tn yo si ov a1
+\n ij rs rd qd pd qg qh z4 ql ip nq q5 xu bz lh o7 my 3w xe ws 2p w9 rk es er d8 pu y6 qc gl bv qa rv qt rb os ru fn qy qu hx or wa qs at zg mx xo bg yh ec os eb hd rw dw ip vw ki ok qx cu wb sn wm yj o3 tm ei ah
+\n tr rd pq qd um qj u7 q3 cf db k4 gl mr gw c3 bs k8 vi 4v kz cg rz et ey tp fq y5 el gd dx qx hp mn cz wq xh m4 av t0 vz m6 qw tv rq ei il sb tk eg uq tc wo qo zs rd nx 2y fo j5 l0 l1 hy vy t3 t4 yt va y5 rg e7 uo ox at ir ys hd uf fx re rr ac kc cq qk cw h6 kp xn zu bd cz ca pn pq w1 rx vc w6 yo is fw ir ov
+\n ra ij a3 qs qn jf qm nv cs cv kz q5 um q7 q8 km ya ys rx yn d8 sh pt fe se js ue rk m7 wp et ei qy to tz s4 af 3i lc wk ej hc ex t7 oc sm s0 tl fx re fb jr jp qc kc jr cc w3 yl oc ob ep
+\n sa yb qn k8 lf d1 c3 wp vr wl yd iu kb sz g7 mn jm lz sd m3 lv qq j1 ex qo ry ru em pk i3 hi rf fk nc wd vu yt td sc tg s9 tz tx dh x9 qh ku dz my yr w3 oj se ei gz tq hx ah fe
+\n w9 rl rc or fq a9 pp db gj hs lc qr ec p4 ph hb x1 ez u5 qx ea 6t tn
+\n a3 qa dd qf qn qm qj vj wi ag wo ig e3 wz r6 d7 ax pe rb ey r9 is ot tq oy if hy se qx ar qb vd qw qe np xy nd wi in gj qu y9 ev ti tb qt px ud wo ll cy wd hw kh fp wk wg wh ym vo ub rd t7 iq yo ox eb yp ys au u6 rq ii io pe qh nz vl be n8 wv hk og rc er yu u0 rn yl is do eo
+\n dd nn oc el yu tl rc rv r7 y2 hi qc qm wu cq qw xc kp tr fu ib zi qu wp vi ci qj nu zw t1 wl fh ev os f4 f6 f7 cd zc qx zy wu bs qn u0
+\n o9 gr fu a7 qk xu q7 wp el yu fp ou y5 pm pp qm jm st op uc fx tn hl zs kp bq p7 hi ys qj ki qc qa n6 oj ey w6 yk si
+\n q7 xo sr he uu sd s6 gy ws iz fk sw al v6 lq fh ie oh uz pu
+\n of ch zj rk rx rc g8 i1 jk tv ul fi e1 ic sp in jl jv j7 nm rp r8 go hf wx tb oz tw it
+\n se kc tj rx yh eh td pa zb qv c8 j5 ri eq b9 rm ik ev ul ti p6 en ok tn wp jm ws ke br wj rp en gd rq f6 ac ab zc rz ew tb ro
+\n qd wr d6 i3 j1 ww if qt yn fd e4 qf j5 yh t8 u1 ev qc wv pw u7 oj ok yz tw o8
+\n un pa jd qh qm dz pi z3 ny gs k0 wt xy z6 cj k5 gl bz d1 fq ye yr rh t7 ot if g6 im pa ps fk zw lx kr lv na wu vw eo cm te qo tr ec ty sb y0 i5 to ye so tc i5 sg ct qs sc ws qr xj 2u n5 rl cw dw ys qj yn qc y1 sl t9 ox sv s8 ya s0 tl ys rw tx fx ds rr cq cd ql qa au qg vg rx yt iq tn yl uz ei si r2 ob
+\n rs gm qk gg m3 rj eq mv yf sk gx ve eh iv i7 n3 pb uf gh uj tg ox ww bg oz su o6
+\n ih a3 uj rd qs df h0 jd d2 kj q2 ap wr ol nw bz q7 fq ir ra w0 eq ya r6 d6 eg ej pn py pp sr qb jb wq ni xe we lm be xo w2 qo mj qr hp tr qo qp ef of yw ai e2 i8 fb tm do dp i8 l4 wd p5 sn pf gr vs rx kz vh t2 wj ot ar t9 at ir dw qj nc cw fd sx qc mz lt pv br wv dr q3 yq vn ye yw dk oh rc w3 yt se ov ge
+\n ds h0 jw he qh jr jt ql me na ah xa tf wt pj pk om 97 rc yg ym oe yj eg dl fe sz g9 lo qq qw wx rr c8 ns vq m7 xl gs vr qw qr kq qi qo eb tk ue dw e2 i8 i9 hy hh qt f1 pc vv qs bn ij i0 uj xh wk qz ns ej un oi yi rh od ha tl re tc o1 uh ac ip qj we qh lq eb w3 w4 ia oz eu ri uz ep
+\n fr ij dl qk z3 qz wt z9 gq mr wo zz rd dd rz ee sw pp g0 sy vg ww iu pz uo cb t9 ld qr ei yx rw es ts zi wp wd gw wj hf r4 tt x8 wl t6 hc gp eb aj ai iu o2 nh qv ey kg dp wq f5 rt cg yw sr tb gj rb fm ro ah ig
+\n ss ux q4 ji xa mj mi ld rl pj r4 rx yg ti ix a9 ig gj j1 ww ii qe j3 mz vl qq ye m8 b8 yl qp ik ki eg uq fi ok fb oq fm sf oe hp v4 nk wg mx kt vs j8 yn wc wj ot td wn iw os u4 tl u5 rq de io x9 dl ql n2 ji wb if hx
+\n iy jk ql q4 wt kz fb q7 vy w8 ur ax uf ym yl py ou dl pm in sq ho j1 qr ls j6 ic sl ko xq jm qe qr qt yc es ry pf he i8 s3 pj tm oe qo lk wp j9 nb yd bu rs e5 yi ar rh ga ud al hh oe wx s6 do kv be pm w5 fn ey du do pu
+\n po rd qs il rf uq of jf nf ih w8 b0 ur pl us ed tz tu ef rb sj tf ff i1 pa dv ue fk m2 qr wt j5 c0 vw xo b7 p7 qt zi wo gt qa oo qd bt nt zq x8 ou e5 u2 fj s0 yd sw re cd qx jp wc ja ga jt bh hm eq rv hx gx
+\n iy ij pa gy qd qg he d2 qk d4 qz q5 nw wu am qm ft w7 rq yn ef oe pv ek fq sk y4 ts am fd qx q9 jf ju wy lm zw wp er sy qu oa ta jl ss gq fe wa p2 kq ws 2w dn xz t2 ej rp rs yy e6 iq ag e0 u5 tx dd pq fv jr qj ku oj ql wz fd s5 nj qx zt 2d qb nx pm ce f9 w3 er tw rp aj it
+\n ss qa gm dm qk c1 jd k0 t8 mk rk tk om yn tz r7 px ac av ot ts if fw ez y6 se qb ha su qn cl wx we qr zt kr mj rc qo es tj ym iz tk i6 fa i7 s3 pl jx du qu j0 ws v2 wj ys yv wd s4 wf nm dt wv ub ez ta wx e0 fz al ap qg wr ar wb fj n4 cz qg wq fc mp yq ev se eu am gx dp te
+\n a2 hv yb nv h7 jt lw xt lu wp yr rg 2o 93 uo pr ej ez jb lz ww az oq bk b5 wi qw rq hs te ea es ed fu ti uz fd tm jc do qi j9 zs j0 wd xv iz hr wx el ns oi t8 sc t9 sb fk hg cd rb mz wn 4i ov ln yr oz tm ro o8
+\n iy pi jt kz st tm rh ya b2 om ef eh tp el in sc qc g0 ps zq nu pq j3 oe a7 ja js ng tc qe pp eo em fc s3 hh i9 jl qy i8 lk wa ae 1p vh ox rk em hf dd jt rn tq is oc o6 so pi
+\n qs gm qn gw qb cx w8 ur yp up uy ek ez ar sy qb hd bx rl qt yi nw tt eb fl eh pg oh ib qy qi bp jz lf eo ph wh oy y5 om az tc ab wv wb kg ww eq ok aa uy w6 ag ig pi
+\n ra a5 db co qn d4 bu qz kx me nr q8 my lp t8 gu rk yn et y1 ej g7 yq d0 j6 b6 qq rn kw ei yc uq e2 s3 oj s6 jl kf rl ny wg mw t2 co el yy ez eb e0 al qg km k3 n2 zr tk qb n5 n7 et tm ul
+\n po uv a2 o0 rd hq dh hw a8 d4 wi z2 vt ww kb d7 pv tq fa ta dl oi y6 im ff ae qv sy si wq pq bn b3 lm b5 wi ku qu ru ul ri tx fb ss or sk wp qd w7 kh nn es hy wh rp um sx e6 rh rk pn sq rr hm dt ip dh pt wl h8 qc vj ly bq zn gd wn q4 hj yq xb mf ok tm ge
+\n ub pa fy qf dh qj q4 wt mw cm k5 gw kb el w7 w8 mx ya ii dj dk gd dc gh st qb iu jk qr bz vz ab b5 mf pu qe xd nq eo yb pd i6 ue dq e1 qo wo sp 1o n1 4v at qf fi of xj rj dq ew nm x5 wh na ub e5 um sb ob em pb re ip x9 h7 zv xm bw 1v mp zr w3 xm ee yg rv rt ia is ro ep
+\n cm bp hc rx y4 sr q9 jj rt qo uk ev to ff so bg eg y4 l0 go os ay tx qh hl qc wb
+\n z3 nn o9 xf fs gd g8 ns ec p0 tb wf uv iw jt wr dq bj u7 e2
+\n da td ta tw tf tt ay dq sf gi ae rl e1 gk af dp
+\n un fi dx wt m5 vo ys j3 i5 ad nr wj mn tg ox bs ia
+\n hv yv qa qf dg qj do ek w0 is sl ez sr i2 ww we rl vr qw y9 tu p5 uc hj i6 ud ws l5 qf xh kh lg wf wj uv tf t7 e7 dt qz ka xn cx xe fn it
+\n iy yv rs qg uw oh q3 lr vq bz ab zm wa ds b1 w9 rl rz uy wy om uo ef fo py tw fe qx i2 qc qb qn ww vg ke wr j5 j6 oy qq ng sl qw mj yh xf yk xg qu te p2 ft y9 uk ym uq so fx ff fn qy du f1 na lh wo qo ge sk v1 wg mc dq wj iv 1h pk 3s ej oy ek td ex ae yi t9 go e8 rk tz ud rq ax hz dk qz kf wm yq cy w4 h6 rt ry tn r1 gz ux pi
+\n d2 we aa cb o3 xi tu ti gd wq pl xg wc lm de e4 sj hc ic wc ra wc go o1 dd ip wl in wx js tv rx yi yc
+\n ty um hq co ux ql q6 wi bc kn q0 r7 yz ib pm g7 po qv re we bh 8j ru xo ra eu ud qy uh ec ty ry yn hw sm e1 pg p0 dr qy oe lc x1 kt xz pl t4 el t5 ex sn us dq rq ao f8 pr md ql v7 v0 n7 kh vn wm u7 cz et w6 gl yo ei di tw
+\n ub jg ph q1 q2 d4 q4 qz kl ld cn ji z9 ro ek gb w7 rh pk ea ax yj pv ot yl an sl y5 po im i1 zb fj qb i4 gl xq si m1 jj lb l2 ul sn ue s1 ta hg zu lh nd j9 ci qu wd bh ef ro ra aq t7 ex t8 od en fz fc df h7 qz n1 v9 zy 4a tc bb ea yw mf ia yp eo aj rp ob
+\n o9 qa h9 dn vo a7 qj jt ji ne kc cj zh wo q0 w7 e5 ui vi ya wy c2 r6 ui px yh y2 to pr ab dj pn a9 pm tw sq ig hi bg ni ry lv wy ic bc li qe im dv rq xy ki i5 fa fv uu af do vv za l3 bi kd nx w8 nt lh hu ra ub un ec rj ua fk ir s0 f4 uf dw jt k2 kz ml cl km wv vv es tb yj w5 rn yo af ru ah ig
+\n vg wr zh wo on ew ef ae ha id uf eg p9 ef gi al ng 16 rz o3
+\n qs jw qh cv z4 ok wt k8 kg km wa uu us sj oy iv tw jm c9 fo nd 20 qw w3 yi re qo yv rr op qp ue oh s3 uu px jc hx wf v2 br ep wg pz t7 t9 au re zj kn xc bo kg 1v hb wv tt u9 gj yu ry iw dp o8
+\n qd wi ij rh ef fe jm kw xj wh uk ef ti e2 j8 ou xo ny wh rp wj ub s7 pb nb qv ev o6 o7 yx
+\n gr dc ft qs gm qd dn k6 lo k9 nb as zg bz lw ui ee g4 dl qv q9 lu jg rx w4 yj ep oo sv uq hq yw ao fc e3 ui dy du sk gc gy qs l7 kz ed ej wl un yu wm oc gq qk qc ks tk ti eq em ly vl er ry sy yo ro eo
+\n lq d7 i4 7w y0 qt gw ch o6 eo
+\n fr hb dc o0 yb hn gi jh sw kj we o1 vg nm q8 bz zk bf ml ev ed r8 iv ht fg th qv vz d3 ng xj 0h 42 ew vt yg qr qt ha qu hs qp ij yn eg of tl p8 fz oh iv jl ss dy zu or sk uj co kt rp wb wx fg ev t9 rj yp u5 us ys ak rw al io kc dt jr hl ln wl wz gy wy qv qb mu hd ky ku zp ww yw rl oh ee w4 yz
+\n fu dg qf pg jg o1 dc by q4 st t3 lj ve jr am 2i rz ea lh pl ed pz y4 g8 i2 db g0 fj q9 qn bl en hr m8 qw rn qt yi ei yk qu xi uh fy yn ix uy gn jx f2 gr fi x2 zo pl vh ek sz u1 s7 ya em u5 da re f7 hl qh ju oz ar zb ci tk ob n7 vh og w1 ok er o5 ri ro tw rp it
+\n gv ra fr ub h0 hm pf qj kk zf zh rj eq d7 oe eh ib oi gg i4 jd ph nu gc qw rr m3 vj ry is dk qi rm qy qu ep p3 ed pd ta s3 tc fd sa im ow jc oe qi j0 gt bm vm zf nj rg w7 x2 nr wf hi rp wk co t6 t7 e6 ag eb u3 e9 f4 om o2 dk h4 gq jo cr oz ka kx rn wn do ep wb vn ef rz ew yi r2 ro so ob
+\n ft a4 qs pq iz pd u5 cs q3 qz ra rh w7 rk mv kv ee y1 to dj sj ta pn oi tf i2 th q0 vx vf ww 2l cb wt yq ku ye gs qe w4 qy qi xi tt es qp ed ef ti i7 tc pl jz ho zo qi za fy zy rk x2 r3 ht yv ex op ae iq u2 ag pb of dd h4 lq wx cy cu zy wm ry ef dj vx st ia ey te
+\n rs al qd uq ga qj sw we pa bi ba e4 yy mo d6 er et ti rb py ek am ib fe y7 fh jv mn qe qr oe c0 l1 qi mh 44 xe ei ev hq ix e1 pg pj ui hp pm fr qs kd nk 1v wj fa wf yt t5 vp ex wx fh pn ug fc pq io gh dg oy nf v6 bt jo qz gu me wm n7 br tx mt q1 su eu di uz am if uc aj
+\n da a6 q1 ph uv oj ji mp t5 mi rj cf jl w0 pk ew ii rv oe r9 ic id sl se su q9 vd we j3 ac d9 yw ew w3 y0 tk ao hr in e4 hu du qu jb wp cr qs v9 p5 vi xm kf s1 ea t2 wh y1 co iq yo au iy on ds fx yf qa zv qv f1 y8 wm u8 rc o3
+\n iu r5 el dz rt m9 hb lc x2 zp aw uz
+\n k0 px qe qr i2 yz qo ap t1 ou n4
+\n qg q1 wr wt wu 5x ij rg lq eg ia r9 is dl aw g9 xx w2 qt au i7 us jc f2 ge qa gt l7 lb mc x3 3p tz u6 kx f8 fb ku ag hd oj o3 fn tw
+\n ds rs k5 go qg ga qj gs by q3 xy q6 k5 4k o8 ws td mo w8 th ys eq pk yf r5 uo rb r9 td y8 tg ho qn gz li m0 oq kw qr g1 wy iv b7 vt qr qu ti to ta ut sa i0 pl oq sd ho qa gy qq l4 ks fu wg qg kj eh ez yu tf s7 os s9 ya em pq tc fv qg ve sx af ci ah qj bj df ry rl wm zy tv ol ey ox ri ie tq ir yc
+\n ak ra yb ds gt fy qh d3 ql jk jl ni zs q5 zf lf so wo mu yt wa w8 kl ue e7 2d mb yn tu ac pv id pm sq sw jo dv jd jg qq qw qe wr j5 wu 1h b6 vr yf cx lz rn ho gh qi es ev ty p7 fx fs s5 pl sf lh sh i8 qa xs 1o kq zg qh wk fs vo wl ez iq uo tj u3 gs ii je jr hk ql xx 1j v8 nz kf vz ww yw yt w4 rb ol o4 rn ux ig sp gc
+\n yv fr qa rd gm ps jd a8 qh ls vg q5 lg eh z0 vt mi vy rg lp ex ew d6 yg rv oe fs sz g6 sy ha cx qq wy j6 dk hr l1 qe gl ex ln uk sv ty at ru uc ts hi hl lg jv qi vc m0 fy xg qg eo hf mu mo kz ot np oy na el yy wz fh gp up ir e9 s9 f4 gf pw uh uj jr ab qh uc wl ce qz h8 v9 wv ie 37 eu gf yv 1m ma yw wm oh dk sr oc ei o8
+\n qn cd zf y4 oi dv xq q0 lc av cw ki xd lx qi gn bh em uf we ja ox iw qb wn my zs y9 ux
+\n qd qf we ls lf k4 eg bc e5 rl ea r4 oq er ip g2 yl ot iv ps gx qr wy xj vz xl bx 3o qr eu qi uj p7 uc ph in pk qt i4 gq wp v6 kw kd xk zw 11 yj wj rd oz th yo eb ya tl au tx qj wl dz wz cg zv qa rb wm 7a zs vj yw ee eo
+\n jd go qg d2 ji qn wa bf t8 ys eq ui d6 ed yn r7 is qb q9 lp lz qe c0 wu tx wa te qp 64 uq in qt qy wp j0 lz l5 og ca sz un ec rh pb pw h2 kv aw wy qf 16 rw ew tb aj
+\n a2 gr qs fu db qn q1 uc jr qk cn q6 b2 ne lg q7 q8 wi wp b1 ec rk yj pc fo iv sk gk jb qm zw m1 wx zt xy wy em 41 ee gh xg cn yv qp sn od ao pj fs ut s5 tb ad jc j9 xa uj ws kf wg vp nv fa wk mq x6 vh wv t4 ex iq 7r y6 sv ox ev eb rj rk em aj pq gh f8 th os sb mt ak q1 xr yw ti ee tb as ox o5 yo gx uc
+\n qj lp z0 aj wp vr wa bb xt w9 ya on ew ym ia ix pt tw dz jo ae cc qe lc qr cn b3 c0 ib ml qi uj qp pf p8 e1 s3 tn ui sg pn i8 hb ij qw pd ld fo ap ty ro 3b r0 sz ie gp rj e9 fk gd pw rr uj cf qz zr rq 4p kp pr vj w5 iq ey rn ie eo ir pi
+\n gr rs gy pw qd ga jj z3 kj ql nn bg dm zz uy pl e0 lh ef oe am y5 fd qx hi uw i4 q9 hs jb vd cx ni qw wx zt qr d0 wi 43 w3 cc b9 qa rw oa ev ry p4 en tk ti yq pd i5 og ic ye so tc de pj ff hl oe sj qs wf v9 xn gm wg xm 1f ph dr vg wk ns t6 um oa e8 sb t0 gs sm fx o1 de h1 uk qh zj zk ng ct gp nx xe 3z wm rz yk tn ro
+\n qa pt k7 og kl wy rp hx wp wa ui mx eb 95 ac eg dj yz aq in ih i2 q0 cz cb dg xi cq jc qe qt es ed sb en iz fp ta fc tv tn gw ka i0 lz sd il qf 1s iz qf nc xj xk ep r7 rp gu t7 wc t0 en tl iy iu pw kn km ql ct qp ch fl wm n6 rw eo qm vx ty ee ru ig
+\n um rf qd db qf od d1 mb u0 le xu wy q6 mt bc qw cm uu us r5 uf or tq ek sx i1 it la cb ax t0 wu ab 1t qq g6 ko g7 mk qr ey ha ea qp y0 en ue tv ho i6 i8 sp xs qf v9 jl kt rk qy ot 14 na ub aq op yo en tk ob on tx f0 qk jp vi iw tj x9 zi n6 wo wb se aa ag oc gx
+\n iy ub gy pt pd qf me xp w7 rj tk r4 rx ui ii r7 us pb pt g5 fw gf dm wi w1 eu re tq oo es pd ri tl og s2 fx ap ok i4 di lh f2 1i vm cp bh wj wx of on tx dt h2 hl qk wq qz lr tl f3 ce kp yr yg ro yx
+\n k6 qf cp la wp gv es pl uo eg am tf y7 i3 hd jk we d7 rl b8 gg ug es rt p5 eg em tz ow 3y eo wg t1 lc wk ol tj en ak fc f6 df gt ol qc rn tz wv rx di ov
+\n pp qd iz qm vk jg r4 pl ym y7 sc qn jf qy rq p8 yr di qu hb wd rf ks gw qg s1 x7 ec ae iw eb ai sq v8 h8 le ea vh yw yp
+\n ik a7 cp sq q1 lq ql wa qz lr zh rp ra gb w9 ys ui ym px up r9 pr ek qv qb hs bg wt ku pu dc p1 qo ik uk y9 y0 en hr tx ts pk jl ce lj l5 p6 v4 wk nu vg oy aq aw rg os az uj kc py ql oj qc pc fj jr bf cx es vn q4 y0 og w2 ue u8 is ag ie yc
+\n rs dd ik k5 hm dg k7 go q1 qk wt q7 wi ws t6 k0 go ii ee io ym ey sl sz sw jg si d3 qq qw nh lp cc kw xt m3 ip ln nf zm qq tc ex ry at iz p7 ux of he og dq e1 i7 pj sp s4 ok qt gt sd xf ow qr pd hd wj qh x3 yb lx wx um e6 t8 s7 uo u2 it sw pm rr qg h3 aq ze h8 ks zb kb bh ec wb vb w2 oj af
+\n ak ds dh jg cp ws q5 nq wy su q7 kb o7 ys sf et r9 ta sq y6 dn sy cx na j6 jc qi qw qe qr rb tn 3g eo uh tr ft ri uw of i5 ue ta fs s3 uy as ss qu ns lj wp zf wg sm x1 ix mc va mi rx ej yy y4 t7 ex u1 y6 u3 up en au ds ap kv qh kn gw k1 zv eu lu kh tx qk dr dh wm ti h5 o4 w6 yk af fq so aj
+\n uv sa hb ps q4 as wi ej qm zc yd yn fp y3 td hy ue qw qy es tu uq tx e3 jz ud sv l6 fu xh dq wk wx yi dj qz v0 qd ga mp wm yy tn fn yx
+\n qh qz ar qq ma kq rx qa st ei
+\n dc df il he c1 jt qn yd yn pe et pn pi d7 ke g2 j6 rl sk ng z0 m8 mh qw j1 eu qu rr es ec uk ev ul pf e4 sg jv m9 qf vd wk gu rh e9 f3 on qv vj dh aa ru ux yx o8 a1
+\n ra qs h0 qh bf q3 dv bl mr if ws df ev b2 pl om tz ax yk ta y7 aw dn zr ax qt m5 xx wp qy qi qo at ti p7 tv i0 fm qu sh so lk qp hb p5 xk ib vd hk t2 np ek yt um u1 ir sm yf ug az qj v5 wr fg zv af qv ck ay cs ww pq wn w1 yh as yk ei
+\n tr yb df um qf iz k7 q3 we cb cj ne zg a2 e6 ya r3 ut on rq io ow qx ja qv cx cv bh vj qr lv pc 3a rm ep uk ed ev au p8 so fx p0 ts e4 fb hj qt dy px sf f1 zo vx qa wa sa qs vm wf xg kf fz r3 bu t1 tu ez t7 va e6 fl tz uf gg io qg qj h5 zz nh qz zt et ba lu tq vz xe bb md u7 oh 5k rv rt tb yu tn ah
+\n gr da ty qj by we ls av kc qc wi wo xr mx cm yg oe xs pr ua pt dk oy hp qm qq zw vk xi ln he rx ko dz yt qe tv eu qi yz tt y9 ev ry ym ay uq pg oj aa s4 sg hp f1 qu wp qa bn vi os iz hw kt t2 rs wl r0 ez rf pv hs om dd f8 uj dj pt dk km k1 qz qx wc n3 nl wv qn zo vx ww dr yr oj r1 tq
+\n ra jg jr ao c1 wh rj fp gz iy lo gc dh qw qr 8p eo ev fu tl i5 uy uu ui qp mb hk yt ou aq oi e9 ip dt k2 qx vb mf id
+\n rd h0 qn ql la vg qz lw q8 ra sp ts pr av qc vs vg ku am z0 lo ry ev eh i8 aa pl dt du i4 zs w7 wj xl yg yh ra ex u4 pn lw gu pc on n9 n0 wm em tn
+\n gb ik rd ql xi bd yr e3 qq w7 ex rz on ui yg ax fo pv ab ta jp qw xi wi qw qe fh mz eo gk qu uj ed ev en fo ux ye fv jv ws lx kr kf n5 qj ea s4 vh ez um tj ir od ga tk f5 dh uk pr pt in v9 js sv qb zn wb vl zj wm ca mu zs ef rl yw u8 er id uz ah
+\n iy ij ub qs lo ql jk dv h0 wy cm q7 wu eh fq w8 hm w9 mv yd rz rx rv r9 eh pr ek dk el hi qc sy i4 qq lp jj we m2 g2 fo j5 wy m6 ve tx yg w3 rv rn rq qy hs tt y9 ry ym eh to e1 ur ff hk do wa kq jw p7 yp ky r2 wx oy uv ra yt t6 yy sz t7 wc s0 of ds om kx ng ql qz vj wt wb ly wm lw dh md ew w3 tv er as yu an gz si ro do
+\n o9 k7 q2 dt 1i wa uu t8 ut mv ef uo g3 gj hp jn nt cm rj ms wi b6 im qu eo yc ex qp eg sf do sh i8 ih qa wa wf kd yo xj ql wf ek un wx t7 s8 rj f9 qh qk k1 lq h7 in nj um bu qv ov n7 bh bn 3z w1 yt et o4 gl
+\n a2 gt rs ty rd rf qd qn jf qh k8 q1 qj ql d4 cg wt q5 z7 lr wf wu q7 sd yg yh g1 eg to el ih sw tf fg qx dv q0 wq qq uu px vl xi js jd ze la ud qy rr ky ft i5 em p8 p9 i8 hg im as jz tn qo ul wg 8d vs ap mq x6 no t3 ub wl tf iw rh ox ua pv ir us pb tx pw dg h1 uk ux cr sz ko wx jw vl rc tv af du ei
+\n gm we cm jx lf vq vw kb wk e3 df r3 r4 ew yf ti id fe fr su xr sl jg rq rw uq tp ss qy ws od nv wg ro t7 ar th ak da yf sw io jt cq v9 kb iy u9
+\n qd ga q1 h8 xt um wt nq wy wp a2 rg w7 hm cf tj ut r3 ch oe r8 pa qb jb zw mm wq pl m2 wr wy mh hi ei qy nw uf yv s1 fx ut sa tb ss hl qu qi zp nf zd ar l5 5h gm vo ix xk wk wf vf el r0 sx e6 uo rj f3 em dd uh qj cf wz n9 ga tc qk mu rt ye w4 o4 ad ag
+\n qa jr kz c3 c6 vp e0 ng wu ug ty uk tu to hr sp ud m0 ar pa qf wf kr fi ya kk wl xs ed mp x6 ub gu fh rj e9 ya om wl vj ha ex y0 id
+\n qm q2 oh cd q7 kk ld ys yd rv yk id wt qy iz ri fi i5 ic e1 ht 5z iq ha ai sq pn al gh un kt wq mi dr ax u8 u9 gk ru ov hc ep
+\n iy sa un h9 rf fi he uc u6 cd q6 wu zl zz rk lf yd rx d7 ef er rb d9 r9 im hu zv ps qb jf qm m8 qq ji g2 kt qq ew la xy qo es ft ik tl ye ur as tb m9 i8 qa ka qs bm zg ix ya kl t1 wj r9 oi um aw yo ie ys yf hg gq nh zc sb nw qf xm bc xr bj es rx w3 yj iq tm di gx o7 pi aj sp
+\n il qf pd k6 h0 na is q8 4p zl jl z5 hm ec io sf dk if gd qw 1a ld lf qr yx re tq y9 pd iz yw sa wp bn jq w6 v3 x2 br ta yi ha en o1 io ip pr kp nl lt kd eu kf kn n8 zs rx ux
+\n ih db gm jd wr zj xp vp qb c8 pc g7 uf uz p7 sh or xh xm wh mt no fh dh wv tk li qm vb ms if
+\n he ql wi bn c1 rc ip ia av or y8 mx yr dx ex gz 1p ic wf aj kn 51 bj wn o6
+\n hb ty dv gu ps qj ls qz ch q8 zh xp bs vt rh oe ot pb y5 y6 fr ih sc q0 re zx lm id xp yy qr ry ay p6 he dq s4 ff qt sd vx jb qo qp gb ws wd sd co fp kg s1 nm rp cu 8l y2 tf ev sn au us fz hj qg wc u4 au qh wv bn eq r1
+\n uw vr eq rx et rb fa ek id qx ui kr wn uf p4 tl au hw tx im sf yd dz bo wb xw
+\n uv yb ik qd gm gp k8 qk ao z6 ps mw zf jc eg a1 wa 7c zz rh yi lf pl r7 yh d8 g2 r0 tq su cz pl qe qe wr wv ku ho qt yv uj ij es ec ik yn ym uw tl sm he p8 fa ho wo gy ws zf bw nb 5q ql t1 ro rp ej xg uv el l8 rd wz rg go rh sv fh ya it pn hd ao az tc dr ac dy ot sj nd qz ok um ol sx xb wb wi n8 ji rz yr sr h6 et o3 ru rm pi
+\n rs fi ag c3 lw ys ef sg qu qi uq eh e4 gy qt ya ro hx oa f5 1j qa cl wq rl yh pu
+\n ub rd qd fi jl zk oq r8 y1 tp sl i2 qn sd cq 6d mj w3 p6 ta fm bo nv qi wh yj e0 ao uh kn h6 r2
+\n pa q1 fm c4 ig ex 2a yi mx ek ez dv jf qw qe 4s xt ld dh qq mg qr yc eh s4 hj yy s9 pv rr uj or qj cd wc ly x0 wv hh ye ew yh rb yk o5 tm
+\n pp q3 mw rd up td j2 lv af ih hb ee xh yy ua ug aa tb
+\n sa tr ds az qd fi dn hw qg dh qh nt z3 qz ad q7 q8 tf vu ue mx vp lg tz er yj to hy fr sw th qn hf gx jj pz wt lb cm m7 wi b7 vr lo yl qi ry ef sn uq ri fx oh i3 sd i4 ho vb wa qa ik uk ar hw l8 ya cw s4 wg r7 ot wk gu u1 fh th rk en sm u5 iy iu re pr hk qg kn gq cf h8 nj ct gp wb qg hj wm cy ok er tv u0 sy fq o6 gx eo sp ob
+\n yv ak ra co wy zj e7 ew tl fo ek ez im q0 jm bj lc tc rm ec ou bn sd os x2 lh wj ot oi y6 e6 yp ob sq p0 js qh el bg rr rc xw pu
+\n ih um k9 q4 ls jx ej om sf uh dz oi qx cl zm qw qr zc qe i2 i6 uu qp wp ws qd sd fj mx qk yn wj ub gu ar pn rr qg ln dl al vg mf w6
+\n tr ub ds jd gp qk jk d4 kv xo ws gi yo sj sl el tw i3 ow qe zx nx b4 qq ee eu uf uh ex p2 rr ea ry ef eb y0 en ri eh e1 oh fx fv sj jn xf qd vi l7 wg x7 r9 uv ek yt ns aw sx sc vf tk ud ds o2 pr kv ab gt v6 un qz wr wv rb os ie u4 rm zm rw n8 vc za q3 zu yy o3 yi ag pu
+\n ij a4 uj gg jy dt 1w rj a6 r3 ii pe r0 ej ta ts ff i2 ho ov wq kw qt ot m7 qq xv ei lv kr yx yb ri fa ur de pj hi si jq wg r4 x5 hj oy 5i u3 tx tc nd v6 oz wc qv bz qb qj zl dg ed ka vh w3 yt ey w6
+\n o9 ft az ps hq uq a8 ql we wg z8 ye wk bf rs wa c6 dd ys rl wy om pe ix y3 g4 dz gf se tg pa va jn jj al qw sf ma j5 qy wu xo dc rn se eu xb nw qu qi p4 ef ru sm eh im ad gm jv pm zd g0 wg qf ai qz ym qc t8 op iw ox ay tc av k1 ko vj qb zo wq bg q2 n0 rl yt as rn uz
+\n o0 jq qf he qh 7k q7 kb wp z5 tl ew yg et or ez jp g9 jv gk lx vk vw qa qo ou qg kk xz rx ro wc oa us ip x0 ku hp jr o3
+\n pp dc gi fi qf ql by we la za un qz q4 jc zh wp kg o9 qm bm wt r6 rw eg ix yl tp am a0 aw i2 fh th xq gc eq xx yr qt xt nw qu ri uq tl ue pj p0 hk vx uf jm kq ws jl 1s p6 ca he x2 wk wd r5 wg bi hk ro wj t5 sx fh sv e9 ya aj e0 pn ao ug ac kn h4 gq nj wr cu qs kf vx xw k1 og yt u0 yk di gx dp
+\n ij tt ss dv a6 uz gp qk cv lq ql un kz gj wt 4y fq lh z0 6h w8 vp r5 ee tu sg pv y4 pu a0 tg q9 gx qq qw we la wx rr ls zy qu wi xz wi xp ew qe rn ei qo uj rt fy ik tr tk sn pf i5 tc sp s4 in gv i0 f1 si ks nl kw bq co mu eh oi ec sc wc u3 ga fk sm om kb wx bo zj hv y0 en og q6 er tv tb rb u0 w6 tm
+\n rd ik og q3 q5 cn xp c4 ig mi e0 rc ym id tq ou po gg qb sy ob hf pk xr qr up j6 ng xx b0 qt tm eo vw ux yw pg tc e4 gy p4 xv pd wg n5 r3 wk iv rl ht oy uv ub wc ar t9 ga s0 em pw x0 pt bw wm vv vm yg fn ad af do
+\n hb gg kl q5 t1 mi a4 b9 r4 ee up pr g8 gl q0 cc kr c9 vq yy wa qy mz ty yn yq ai og tx tn nd ws 1d ky x3 sz td gu t8 op gs tz de av sk cy zm be wv qk og uc
+\n ph qj d2 cd q3 q4 bi wp vt oq y1 ps cw kn gz ij p4 sp e4 wa sx nj v1 w7 me s7 e9 tc k1 lw zc vj wb kb tw a1 aj
+\n rd a5 hq qg qh q1 la cd kl mp k8 mj vy zz yu ut uo sg yz hi gk sy q0 m1 qw b5 wi dz qw rc aw eu zr uk ti em yr lk kw nz wg fx tt wg x7 rr lm ju th tt eq oc o6 yx ro o8 a1
+\n a2 dm wy ej rh rg pe a9 oi y7 zt vk ga yf pp fh ml tj p5 sn tk im jv v6 lb pf zq ty wh t5 go sv e8 it f7 ac p9 cu bw kg qk f7 w2 ee w5 tq ep
+\n vs rg rj 70 ys nq uf ex hh jn kg ep e0
+\n df q3 u9 4j bn rj hw up td i1 dc hi zb wv g3 l1 rz qt qy ty tj ef eg sm tx ap in px af mx r3 r6 t2 t4 rd uo e0 iy ii hh qg zc v8 qc ch px zy zi ye og er tm if
+\n gr pq se pp qe lq n1 cy qb wb ey
+\n gn q1 ji sc nh pe yh qt ss rw hf kx zm o6 gx
+\n ih hq ap bl wi wa 3y er pc eh r0 yl ta ts dl gg fg i3 hp kw lc ls rl ff wg qi uh uk yq yw ok as wp gc jm qf to yy fh e9 pn qk sz nm li q1 vb yq 2e rl tv sy di sp
+\n o0 yb rf k6 qm fv q5 wi rp fe dd mz rz ee oe ia yz am ig hp fj su gl li we m2 ls xj md z0 uh yx eb eh ur i8 qy oe i5 jv ce jb jm lx ci kg oy hx e5 u2 tj kv qh qo af pv tz az vg yz ri ge
+\n il z2 d2 cs ba wa 1o ys uy ed er d8 sh qx sr qb jd ov xq nu lv g1 ku pu rp qq ee et rm eu xh i5 p8 tx in i3 bn v3 ap r5 oy sl oo dr dg hj lm sk ff vu cu wb kd zi wv mu w3 yt ok rv ol ey yx ah
+\n hv qs qf qm ph o2 zg wa a2 rl pz ow uo y2 ey ix us d0 ek fg ww j4 wv a7 wu qq bv te tr uj tp ue ts do qo cy uk j5 ra ou aq iq ev tj u5 tx gg df dg h4 qj nc wz v7 im kv jt y8 rz w2 yy ei ge
+\n gt df jg og k0 lq kz zf iw kh gv cn ur ea eq yj ix id ez dc qc st wq c0 lm fa lj qq mj sw qe xb rq qo yn ru at e1 p9 s2 ts i8 s5 i3 sf jc xa hb qq gy wd p5 ow qf wj 1n wd qz yt ex e5 op at sm ud tz yf tc ax f7 dg qg ve sl qv tl wn 2h ky yv f8 rt o8
+\n yv dh kj jl wr bi kc 4j nt c5 lo z3 e8 on or g3 g8 uq sr qm hd ll c0 nh ws qu ug ph tc dy oq qo nj wn t7 ox zj qj km h6 ql zz qz qc hn bk fw ob
+\n un qs fu k7 co je gp o1 bg dt vu rj mc rz rx r6 g3 ek qc uw fj li qw lx ku z9 br b6 mz yc ty tl yw yr e4 i3 ud ce i9 rf bq xh ep wf ej rp vu wl u1 os ay of pm ap gg dt hk ab ql lq qs lt nl u3 wq n7 bm ef xm yh w6 ru fq tw
+\n qn ql zj rg ed hs we qw re p7 p8 yr tv pc lh gx i8 wp lz cp fv lq 1b di
+\n rs q0 is q0 yq rl qq vr qa v3 tu in h7 zy u9 o7
+\n ak pw af z0 wa jh tf to ey r0 ta fe tf th wt m5 xu wu xo l1 b7 bv se w4 qi es dy ge vm l8 nv kh l0 j9 bi ez iw ux f0 gt zc 6w bf cx u8 w6 yo o6 fw
+\n gb py gp pg ql um lf bx 6q ra w7 vu rj ui pj tl ii y1 r8 ac eg yl sj tp y3 py im tg zv ll ip wt av fq qw dc yu ei uf qu tw y9 em tl dq fx hk oq zu jx hb i9 bq iz mv wd qi pz lx ek aw fg ag u4 ir tk of pb az f7 qk wz le qs wy wn dd bj mp yq zb cj rc tm yo
+\n o0 pa rd qf dn qg nm ji q6 cm wp ec uu ax r8 us aq se lt g1 bl vz jc mj 6h ul en yw e1 ye tc i0 do ge gn v2 r1 7f ed x8 rf oo t8 s9 u5 pb fc ug ab pt uc ce qz h8 u3 ga fl bc yq iq iw id o6 r2 te
+\n da qf qn nm wr jw c2 ig rj vi ys pl ed ii ax y1 ua r9 ia ab tq an ek dl sl jo g8 qv gl hs jf d5 c7 kt ix wy wi lm qq dz ko qe ff pp qt eu wd yx ec p3 rt ty ik p6 pg i6 e3 sa fd dr gb jl as hz qo j9 j0 qs qd wf p7 br wd x5 hh t2 wl el rf tg ar rj tk em ud pn rw av iq nl qv x8 ov wq wb ec vm y0 w2 ew 5k w6 fw a1
+\n a4 a5 z1 hr qk q3 mq zf qc wu q8 bd a3 xf es yn rc et d8 pr or yl fa se dv dn uy vz wu en mb qa tj uq i6 pl du jx f1 xs qs qd vm kd wh kh mv ed rp ra co tg oa u2 ie ha ir da pr cq uv oy qb qb eo ye rz rc ei o6 id if do yc
+\n a3 qs w0 po gx xo nh uj e1 lz os wj uv ud tx rr gq ve ch cs xz
+\n rs rd qs qd hw dh q1 ql dy mj zz ws vy pj ea mv yf om uf sg up ix pb ab ej tq fr tg i3 nq dn gc wx wc qr wr lv t0 cm wy wu ko qe qr ze ug oa ed ym p5 os uq i5 tp pg s1 ok tb fb vx ns p1 wa qs ka qd lx ps sf zy kj pl ro rs rd wz tf ev gp e8 u3 ir od f5 dh qj cw qk h6 ql ad qb fj tl al zs h2 rl eq rx er w5 eu yz id pu hc te
+\n pa pw q0 j4 4f vq iv yu ry fa e4 kw xm wj t3 ff ye w1 oj rv ul ep
+\n mw di ec wt rx ko i4 qo l3 iq iw py yl
+\n dx uv yv qa a3 ij ps qh cg qc 2n mc ut eq rz ea kc pz sj dl in db fk su qm qe m2 we 1q ke bn xy wb yq qq vr qw xc gf tn re oo qo p8 iv e2 tc e3 e4 fv ff pl sd qy qi si pm ws aa zw bu hu fn yj pv hs gh o2 dy ln v5 qv zb tk bq 4p qj zz wv 8n h1 y9 wm yw og mf u7 tq ov sp
+\n iy wt gx nd t5 r0 yl tg dc qc th wt ld nd zn tc ke qu qo qp tk fa hh gn qp 5d qd ar rg oz fh at ag 2a kd nx w2 w5
+\n il wp ee ym tu ef dl el qb cl qn ob qe qr m6 mf xq i2 ud fn pc gw m9 l7 fs eh gi e7 fk ys pe ok 30 mf o8 uc
+\n db k5 a7 je kj q3 we wt q6 ie ck kg kn gr lf pj io us id in aq jo qx su hd qm li qq jm we lc wc ld d8 lb xj em rn j4 tq oo eg fl sm he ue so hg ok dt qt px j7 qi nz l9 lj x8 wj na wk ez ex rg e7 u5 h4 kn ad cu oa qd do q4 eq w3 yy sy su r1 ri if yc uc
+\n fr ij ft db dn gp qh ph ga gg kl ws wt ab q8 lk wp mi w9 tj pk yf ew us yz id gd y7 dc qx hp qv gl jn q0 d4 qw re qe cn cq pt wu lm dk cz yy qr rq te qi rr ea ex ye dq ok qt sg qa nk wh kt bt bd lh wf yb hu mw el rd rg sb em on qg wq oj zx wv n4 bo qf bf re wb ev yq cl yu w5 ad ag id a1 gc
+\n qd ga dt ej kn r5 ax sg ix av am pu g5 ez fe qm nu ii zt wr vm qt jd im rb ml yx yv ru hr gv ad dw x4 ub hx wz rd ol oo rg yf f6 ii uk dl wl yw yu ie id
+\n yv ga z3 kk pp nq le lr bp qc t1 c2 hc vt lw jl w9 ur r3 ys iu es rc ud yj r9 pb ot ta sk gj jm gx xg sf lc qr ht ve sl g7 qy nw vw fo ta tb oq fn qu hc qp op nz ow wk zo zq yg wg ke yt kr yy sb tk rk tz iu o1 rr pr qh dl dz p0 n2 ci cd vn ms aa yh ry w6 af du gc
+\n q1 q2 mw bz k6 xf ec r4 rx yg ed d8 pv is sj qc q9 zw vf gc cv d0 xx rn ex uj ij ts ff ge i8 zg 4m x5 vh oy yy y5 wc tg at fk ob fv f9 ql bo k4 zn be ww ea ry tv w4 ru ov
+\n tu g2 tq od pk tm fi y1 uf ku wn ew
+\n hn rd um qg qh ph aa zn bc gv w8 rj ea es ui r7 r8 ey fw gh jf qn nu cc la 3u bx ve po et ei eo ea qp ul i7 uu fb dt pz qt m0 zf l8 kl t1 ej wj oy rf th rk gs sm em ap hg o2 ub wx ka hd br q2 hg y8 2e eq tb an oc it ep
+\n tt um gi qh lp kl lw q4 q6 ro b1 if y6 qc th g0 q9 qm j2 we xt xi nd is ng bc ku yw sm ye dw e2 f1 lg qo wp zs gy l4 ul lv w9 br xl ql vf as yg y3 t5 wc ec iy hf ii f6 re hk qz jp oo qx xv v0 f2 vx cd vb yq ew zu yx
+\n lo dt mt z3 rg av us pa xq wq qe qt yx y9 ev ry tk hu oe oi r1 x7 wk wz td jy ww qc 15 ba hd mo 72
+\n rs wi vt rh we jl ur tz tw ht y8 fh i3 qb qm b3 qy ep op yn tu ay hw fd ug qp qd x7 rs yy td u1 t9 sm uh dz ql 4t rw kn wn rc eo
+\n a4 d3 kk q5 q6 wf wg m4 2b vt w7 ur uo rw pc g2 sl if y8 fj va rr ld wn xo qw rn ml la qu ep re rr qo pd eg og e1 i8 ui qt px i4 jc gq oe jj ws ks qd ul zh sm ql tf go e8 os tk rk ay us u6 dw dh pr qh qj oy lm jo cf ff wm br k1 en og aa rb yj o4 te
+\n gb dd rd qd a6 qj wt jx z7 xi q7 kv mv uy pk or yk ek el y5 ez aq dx qx th sy jn pe vl fp xz m8 ng jh kq qr la ei ft p9 oh hj tn ho ud xs wa jq vn il zg p7 fp ic qk wd bu e7 go hd sq ug hh ip sl ch qc px wb hs rq qh bk rl ef yw dj yt u9 st yi uz uc a1
+\n al tt pq um uq un z6 wa vu w8 ed sj r0 tq pu dz qx js 2j na ip vz lm qq qe yj ud ei wf qi te p1 tu il tk iv dy i0 xg sf ix gp fk ai qk lm ql ch fg qv vg yw rl yy rv rm pu uc
+\n rd ik qd jd ph gs k0 q3 qz ia q6 af q7 6w o8 tg gi sj ou aq po ja qn q0 qq gc nh xt wr fs m8 qw aq wp ho qu rm uh tr qp tj ay fi ao i8 aa i9 tv gv qu wo lj vv wa jm qa qd uk nl g0 qd dn gw ic kj nn wf wg dy ej rp y3 rs yy e6 wc oz fh eb e0 of hd yf uf ab gw qc ww yc cd ji y7 n0 wn cg u0 rn yp ie a1 dp o8
+\n ih h9 qd xy yl ez g9 lt qm on vg rz dx qa kt p1 ex yx od uz e3 in sd oi qa cq dr me e8 ua ya dk wx bw 4p vx tn o7 pu
+\n ij rf fu jt gs ld wu q7 wo 2u bd k8 rg ws gi oe yl if sq hi dn jn qw bb cb wt lm sq vr qe wd qi qo tk uc hp i0 pa w6 fo v4 1m x3 t7 u5 sq ai hg ap hm ju fl tz wv w4 ry tn yl fm gl ox
+\n ql db ch wu rl ih qc 25 pb qq ty yw fp ao qy sp p1 qa rf iu rw qg uk gq km dz wv at qb jo eq ur rb ad
+\n ra fy gm iz qk qz lf bp kc is nr ws mb es tl d6 up pe ey sj fq ek iv pn sx ly qb jd jn q0 lp m2 xl fs xx rs b8 hu rn ey qo yn vr yq ti ai e1 so ts jz du do oi sa i0 oq ow 3r 1a wg qg r4 yv ty r8 wk wl s7 u4 of e0 gd pm yf h1 qg n1 lt sv qb eu wi mt ez w2 yk su dp
+\n rd qf dz aa bo pd pm qw rn tf ah wu
+\n dv jd he jt ao ql zg eq pz oe dj fw sc fj nt io fi lr xu ns mx qe qt ei rq qy yw em e1 i8 sp in jv wo ci fa x6 fn ej wv oi um yi oc au yf mt te az yv yi ad yo yz
+\n uv hv ty qd nv jh qj q1 ql we fv vj q7 gw fe wl vu w8 vp 6k yd r7 yj ia ey ot g6 dx tf im hu ae qx fh g9 lt fk hs su ov lo m1 cn t9 wi ki qq qw xx aw nw es yb vw yn ry pd ix em so ut oj du f2 lj qp jm w5 xh ny wg 13 wc qc ek el sx oo th uo yo at pv hn uh hj qj zk ql lw zx qv cc wv en yr w3 yy st uy iq ox an ah
+\n rs pa df il iz qm k9 bu jz bi ji du lo ts yy xf cv gu mc ii rc up d8 ey pv av to yz y5 fd qx sc qv jv qr xt bk m6 ot md qq qt rq ex p2 yn sb xc fp pj qt ce wp i8 j0 wa qd gn ps qf be qt wh ky l9 za wg rp wn fh rh em vn dh vw ng wz k2 h8 f1 nz zh yz q5 zy e1 yh ad tw ep te
+\n db wr a1 pk ew uu r7 d0 pn fe g8 la qq b0 ef os oh pk fm wa wk yi ev ua sq wu n5 tw xc er
+\n fr yb qa hm d2 q2 o1 oj ox dm oc km kj r5 px rb et r9 y7 vs pj q0 4s lm gs qe eu ep ti tl of jq od pf nr nb ea ej yy rh u2 iu qh dk zl qv xe 2k vc vb zb xn yg gk ox tw
+\n hb dc rd fu gu zd js wu xa c5 a5 w9 vo r5 d0 g3 oy ib i3 ha jm nu rr wy m8 b9 ws qu ru eg uz hw eh hg sp sf do jb zp dp nz wj wv fg ae ah ob hd dd gh dz qz xc s5 vi je n4 re bh ma wm as st tn yl
+\n wi w7 om ug qa x4 yn r7 gi re n3 n7
+\n ij pa qd qg uw qm d4 z4 q8 t3 mu yp uf ia pm ez ih qn jm nu kw qr j6 qw bb yu qs rw wf re qi ru jz tm gw lh ce xa wa xs bn nl ys t1 wk r9 gu oc u4 hd ku hp yg rv as yz ro dp
+\n qa ss dc gu qk cs cv fv nm z7 lf z8 xs a3 rk r5 ys eg y1 po qv cl jf xq vh we wr qr b3 bv yr wf re qo sv tj ti eg dq ic fx jz du qp vb ws nl wd wj zq vg t2 zf wb tf yu ex yi yp tj en ua ud dr pe qk dl lq qz h7 ol v8 cy vi wv ck el gd q2 yw w1 ye xq w5 gk o6 ob
+\n k5 hr jk ju k3 jq q6 zg wi id bb wa gb rf rq g1 et ot pn ht dc ww c7 we c8 cm xo mg w1 1l yx tr p2 oo lm ry ao e2 i9 e4 hi tn di lv cp ca 2u t2 no ub ex rk ys pw qg av py ql qo lb pn eo wb er tb yk ie id r2 tw o8
+\n ra qa qd ph jh d2 dx d4 2z jl q5 ld cm wu wi 2t wa dd lg ui to id in uq ww rr g2 wu rl qe 1l qi qo ec yn ed uw p8 ut sj ig p4 zh xm p8 vs vg x7 ot cu l6 sx gu yp t0 gs az pe nf wl qz nj lr cy wr qv tj s7 u2 ly be br ym w2 af ri it ob
+\n pp uv nb bu kz wi ah z3 c6 rg la vi oe ia ot pm dv gk 2h xq kq xg bv qr b9 j2 ec od ay p8 qi wp zd ay kg ea mq 6b qc un fh tl u5 av ub ji zx k2 wc zy 1x kc ah rw vc wv yq zv e2 rp
+\n a2 iy sa ft pp un qn qz ol lf mg wo fr vu ya rk w0 pj pl el dz i3 jd su ob c8 pb id b9 ep yn ru tk s3 sh sj xa l3 wa nj ke kr ic xl bd ej rg yo f3 al f5 sw re uh h2 av cq bo vk kf bd mu wq wm ew ue tv ol tb o3 ul ov
+\n iy a5 gu q2 se ls dt zf o4 dm ez jj uu ik ue w0 ya ea on ui tu rv y1 et r9 tq y5 ht dc fg i2 vs q0 av iv ku in il en ri p7 uc e2 ut sp fv qt gn f2 wo qa op v7 ws l6 wh ys zq t2 wc y3 sx yi t9 t0 ys of rq ug o2 av kn h5 ju ji ko v0 nz wn kf te dw u8 yt fn r1 ie yc it
+\n qj lw ji eq oe g7 jf jc yr qo v7 p7 wd ma xg wz qb u7 w3
+\n un ol eh g5 px b8 rr og gn mx yf wv sl on jo uz
+\n qg qm q5 wy eg ri bm mz d5 rx pt ek fs pi td ez ho gh q0 ll pl kq wr d0 l1 qq ko er qt wf ei p2 ru uq ye tx s4 hc zd vn ps ix zo wk t4 y3 xh ez rf u3 up ys ou xx zv qa wb at rm eu qj wv za zs eq zy rv ry tn tq yc ob
+\n ss qa pp rd jf a7 lp h8 um kc q7 wl rg r3 w0 wy tl a0 ih ly qm qq m1 xg qr up ja b8 yh dc lx rw ep ea ev ay ux to p7 tp tb i3 qu gq do gr za l8 rj og oy ub e5 ae tg t0 sq tx hj ad gs it vc bh yb eb w2 yg u9 w5 fn iw si di ah hc
+\n ub o0 ik ps qd q1 ga lp cf kl m3 z7 q8 ue ee ud ix g5 ib gd aq fh th pa qc pg ue ur xw ww qr vj m5 jd ng in tx ff xc er qs eo qi p2 ky pd eg e1 yr ut ib oj tb hi hk ho gm qu qi hc ou gn wg sn wh ix wj wj wg ot ra wl un e5 rg s7 t0 oc sm tz hf fx pw x0 wz th qv 1z zt n4 qb cl wn xq xr y8 rt y9 rz tb iw vb
+\n qn lq bu eg iw wi 2u 3q t6 k0 yd sd ed rb eh ek yz if pu y6 in fr qc qm ob il ma b4 en wu dk nh b8 qe bb mj ws qt ho yl ug qi ea tw uh p5 eg tl yr i9 pl lh ce i7 wp qa xs dn 7p kr p7 eo vs mb pk ni yh ef rp wj ej y2 iq y6 u5 em e0 ii md jy lw nj fh bq pc xm km q2 wv k1 rx u7 ut et gj tb iw gx fw yx
+\n qa pw k6 qn qg qh as u0 dy q7 wp hv 4z 4c w0 d7 et aw wr bl mx md j6 an wi qt lc yx ec tj ri fx ht in gm ua qo qa ik ys eq n7 wh rs wz wx e7 eb ak gg ip sj py ka rl su ag
+\n gb uz q2 qz wr q4 z7 ia ad je am my vi zc mx ym r7 yk ua pt g6 hy y7 sx ih qx pa hp jd sy gk nh no qq yg rc 3s qy ep p2 yb oa tr eb p5 en ic yr dw tc in hk qt zp i8 lz ks ci lg x3 wd xa x5 zf yt y5 op u1 oa iw fh oc rk ay pb pw uh qg zj qh h5 nf cd nv qx kp qs qb 6q cl kh xe u7 ew tv sr as rt o4 ey tn is
+\n hv qj bo ru z9 t3 lj q9 rg vi rj sd r8 g2 sj yl aq fe po pa qv jf dm qq re we la wt wu qq vt gj ei yz rr xk uk pf so pk im zu ua sg j8 sk zd sd xz zw kl wk ol um yi rh sv u5 pb tz dl oi wz h7 s8 qf wn cx f4 mo wb ed oh ee er ry eu ei oc fw
+\n hn a7 cv q6 cj q8 fs jv rl qq qi od dt l5 co qr zq ex u2 ah on pr wx kp wb yh gx
+\n gv qg je zg jc q8 fr r6 yn ii g1 pe sj ta el jo sr jv ni jj zr bj ns qr qi ur hz vu wh cs ep s3 hu ez rh u2 t0 dw uj oi wx n5 18 bf wb yq oh ov
+\n gb dc um jr mn we bl t6 vi pj c4 d7 rb ia yz tf qn dm ke xb ft au ix tv xd qq xg rx x6 vg r8 wz op h3 qj qx lr xv qc kc wp lq ea rn rm ri eo yx
+\n ra gt qs dv ik gn co qg qm qj cb qz z6 wt ji q6 dy qc b4 ws ds vu cf on yg d8 eh py hu tg qc hp qn d3 wq c9 pv pr or qq ml eo ug tw es il os fi uw to em ic oj ho px wo qp m0 qa 1o ks 7o cp wh wk wg x5 ee yn bi ef wj ns r0 ez um u1 iw eb ir fk ov s0 fl hn h1 pr x0 ux cd wz aq jp im k4 qv bp wm n0 vm u7 w4 gj tm uz te a1
+\n gv il ps db he nb wr ql kc zh tf mp lw ab us pn a0 tg pa th ps hf wc 1a yw l1 fs eq wp qr rw yv tr eh so i0 qf wf l7 wg na ou ah ay f4 io ip f8 cd h7 nj rn wb qb qn wp oj w3 w6 di id pu eo
+\n hm fu pd qk bi wf q6 wu b2 q7 q8 oc lj c3 o7 6s a1 jh rg rj 2s z7 ya id ez fr gh vl cl zq hd jh xh ru c0 bz wu dl qw km kp b9 rn eu yc yc p4 ru tk ux fo ue p9 iv tv s5 do l4 cu rg w6 os fi 4b uz l7 ld l8 fx jb ee wx rp ek tg e8 uf de qh hz h4 qj gq nb wx qc sv go wm zi zo tc 3k ez ec rz ye oh ck w2 sy ia gk rm ei si dp
+\n gi go z5 qz wj mg kl yh g5 y6 g9 xt p6 eh ap sa qu dw j8 ql yg aw t7 ir zj v5 v7 ba tw yq cz gc
+\n ps qn z3 sw gs q4 ie gx ye wz r3 us ef d9 pb y6 tg y8 qb gc ww az c8 cb lv wy a9 qq qw l2 c8 qu uf yx qo ic de ut e4 uu tb fn oe dp wa uj bq sg mx lv v3 ya xk wd by n7 ra cp gu va yo u2 sv rk ir ya hf kc kp bo qb gp qb yc ku q3 dj o3 ey ad si o7 tw ge uc
+\n dx yv ij pw a8 qm ph k9 dz q1 q3 cn wo wp my el bb uo on eh id yz am fe hy sw ha m8 vg wt vl wm qq w3 ls gj yx eo ef en ta e3 i3 zu hl m0 wd co zy l9 nn ea yj e5 rg gi fg gp u4 ir tl tz pm dd kc p9 zx sx qc qv kf ln on qm lw vn vm ew yg se as is di ro gc
+\n al tt gu qf qj xo q8 c4 ws e5 ur vp ea rz g3 fw sx th db kq wt sv tb ad hv 1u gt ss xk wj qj pk rp e7 ha fk f6 dr rr hk dk nf qo lr ka ie fk cz yz q3 ym ks gl
+\n gb q1 qk we q3 q4 t1 ox di ny wa ws gi ea rx yg r6 io ow y1 d8 ey ab g3 is ek pu ez dx qx th i3 jv fk io xh wt oe kr nd md pb vz wi ro se b9 tr yb p4 i5 p7 ux fp p8 sp in ok hj qy hz wa qq zd qd 3t wj aa dy 5u el yi uo go t0 u5 tl dq gd rw uf gg kb ux dj qj go wb zm lu tx vc es ev ry zt w2 tp w5 tn o6
+\n ra ps hm qf qg 4q we ql q4 z6 d1 wp vt xs tg 2s e7 r3 ys oq ef c4 av dj pn aw sr th hf gx uy wr ac zv m6 wn ko c5 qt qy yl yc uh od ri uq fz dq i7 tx fc aa uu qt oe i5 ge ce wa gb vn xg wh og ya xk fs ea yf zw wh ub 8x th iw rj ah ya e9 tl yd tx yf ii fc kx hl zj or qj mh ww kf zm lb ob qn wp ww wn ym u7 rv ie pu
+\n uj by wo ml dl qx m3 8i 2y r1 u4 hj h6 qa xv rn rm
+\n qa gm qh ql u9 ls e3 yk fa ts wr vj ac en id ud ke ye i7 fn tn f1 ks at me l8 kl hk lx rp ek l6 ek oi e6 wc u4 2h pn 9y zq qk ec cf yq oj vz tb o4 iw ox gx te
+\n gt ub hn qd qf hq dh q1 lp qk by ql lq we wr sy wy lh z0 ge k9 w8 th vp pz yg ti fo tp r0 g4 yz ig sx im i1 jv qn q0 wq nu 5t pw wm id qe gg qt xg wh en uc tz pj e4 tv i0 ff qt gm dp jb qp cr gr qa nk ws os wf ne wd mm wf t1 vu wz t8 e7 t0 od hs dq df av km v5 kl we fg cx al ax yq y9 rx yh ul hc
+\n we cf q4 cj bf ws ww yd tk ef ek y8 qv fk wt ko qe ep rt ik ut op mr j0 ej t3 s8 ir pt qk km ww cg wc gi lu n6 yr rc oc
+\n ak ft gy rd hn a6 uq q2 q4 lr ia eg d1 eb on sj dj pn pp qv i4 hs gx ww xj m8 ko im rt fi tc uy tb pl qy pc uf kf kt mv l0 qj x7 oi um tf ap uk wl ql zb vj wv tk re y7 de q4 rc ad rm ul is fq yx r2
+\n dx gb he dl k9 z3 qk lf ad ch js o5 vq zl rj wr th r4 tu uo r8 fp ic g4 fs g6 im fr wq bg no wt dg ru ln rp wi yd qq xz ew i1 kq qt rq wg sb pj hk qu vx oi jm pa vi x5 wf ni ro ot oy di un y6 yo rh sb us tz ac f8 av ve h5 ji mj n2 ci rm yx ep rt 5f yq u9 rb hx aj
+\n ih a3 pi mt do w7 zc nh qe wv rs xc qr ts ut pj im hp xa lv x3 ph tt sc od rr qh km oc rq xl vv jp ef st tw
+\n ft ik a7 lp jz jx k4 hz wo bv q9 6t ur rl qx qv js dm fw wd re ea fd hu jc qu zo p3 lx pf wk vf fj 2o wx sb gs it ol yp di eo ro
+\n tr ih yv h9 k5 qj qk nb wy gk q8 c6 mj a5 yi ur rl uy eq up yj r8 xs sh a0 ez oi y8 ly lp lx fu il ke zy cj sk xq 7u ey p2 uh yv qp rt y9 eg pf so ph tx tc uy e4 tn j0 gy ik vm ul mt nm mm x6 wj rp eh sb u4 ov of tc jt pt qj jy k1 s5 qs 7z do zi cx wq wb ma wm uw sr w5 o3 o5 su r1 yx fe
+\n h9 gm cp z2 fb zd gk ve o7 mt bc wp bd rg w8 rk kx mv uu rb or yk eh r0 y5 ht pu tf se ar fj hp su m2 cb c9 c0 b3 ns qq qw rv gg ij y9 oa od of pg i7 hk do pm 2n sj wa vm zw vg yy om qx nj v9 f3 ee w5 w6 iw sp ep
+\n da jq iz z1 ls z5 cg nt zk 1i gt w7 yi r3 xi yn pc fa ta ez in i2 qc uw si qw d5 kw il b4 fa ib 6c ud rq yl wg tr qp p4 ry sm ut s5 hi qi do j7 jn j0 qs iz p7 wg aq ex go ax ku nc h8 n3 v0 oc ah wm li zp rl h5 is eu o6
+\n gr sa a4 ik dm gp q1 wy m5 fw a2 t6 rj mx e9 et ej q0 ot wu em fa mj qe yg gh j2 te tj p5 eg tk em ao i7 di lh ce m9 wa wf ys eq l1 t3 ej el tf t9 rk u4 ay rw gg dr hj ac qg zk h6 dz ok zy kc mr fz iu hk yj oz ey ag id r2 ov
+\n ds fu ps vo qf qn pf a8 ph q4 cb le cn h0 jg gy b9 rk r3 pj yd oe ht pu ig ez hu q0 qm nu ww qw ow xh b4 is a0 qq hu bb cn xh oo qo y0 fz ue e2 yr fa pj in sa hj ui sp nj zg wj ge wk xg ra ex vs oz eb pv tl iu x0 ln cq xx iq s7 wv qs zn tl wn lr yr r2
+\n az qf fi qn u5 we jq zh wh c4 sd is y5 po oq ki sz qe rm qy yv p4 ye tb ho 1u gq r3 pl td ov u4 hg ax zj wz wb vl vv se 5k eo
+\n tt gi pd jk lq q6 wi gx mj ut ax av g7 qv zm lo 5y dh cw xo ve vy xg yv iv i0 qq xg jv l2 yi sc ga pv pn iu ug gy k3 cl oj tv yl di
+\n qk se pd gt rz uu d6 io d7 tq gf em ym tu ib oe v3 si wa nm wf qf wg rk kz yd hl wx cy bp mx et
+\n uv yb dd k9 ph q4 me o6 nr xa mu ld r5 rb g2 or fe pa fj hp db lu qn nr j2 bk kt xl rn wf qp tl uc dq i7 tx fv ar sf xm mx x1 zw yh lz mr t6 l9 s8 ah u6 x7 yw ol tq eo gc pi
+\n ia ys jh wy sb i0 cp u3 ql kl kh
+\n po al qg qm d2 gs q2 ap qz q4 q7 kv ah o8 rs 2o ex zx qw z5 r4 r8 y1 is ts y8 qc dm ll we wr sg lb jx wu jv qe ee qt qy es ed ym hw to tx hr s2 oh dt dy af di do qo oo qa w5 uz wh kh x1 t1 hk no r7 rp na sl ej op ev tj eb sb ya pb u5 tx ds o1 hz v6 lw jp k4 wv do wn aj bc wn yw to w4 as ey yk is ig rp o8
+\n rs ty q1 wt wy xp e3 wa yd d7 ht ts sz fk su kw xg bw cw qw oo fu od ix sd zu jn qd ci fi xh mo cp ev th ua e0 em kc lm cu u3 n8 xr yq ti yj fm yx tw
+\n dx z2 ga kb yo sf sk fd gf i2 vs qw vj vw qe eu mz tu sp xs qs es t0 eb ak uh hl n1 v9 wv kd o8 rp
+\n ds qn qj qz cb kz bo wi o6 z9 fq wq ml cv cb lf eq r6 r8 ic y4 am sz sx po jp g8 ze we wu en ew qw 3q lz kw tt ty ti e1 fx ut tc uu s6 ow gm sh qp vn l4 uj op xn wh qk wz rc wh uv um ar e7 uf az uh py h5 lq vt nz lu li lm dd rl er rt yu o4 eu yz if
+\n iz ld me z1 y2 dj ar qb b4 l1 mz ij ry to ad xs sd wf eo hj wl ex ie u5 pr zj gt oi wc kg my ex zt ks yg eu aj
+\n gr iy ft pq um qj dz wt gj cn ru kx q8 ws ue rk eb ee fo jb jf la ji ke qq qi qw rq yb qp il eb y0 iv ff zp l3 xm fi x4 r5 xd r0 ol wc t8 ae iw ox fk of pb qj ku xc ct wc ie xn zi wm rz w2 tb u0 su pi
+\n a3 ss je un zf vq zg wo mx d7 pm gd vn eu yk tq ik fu ai qt qf j0 to yy at ii qk wz lw n7 ly
+\n ub dz rv qt rm wg ea pc j9 qa mr h8
+\n a4 wp td ur px qq ki yx go wc tm an
+\n po gn rf a5 uw qn q1 nn we is z8 wj ca t5 ij eb tz ef pr ix g3 ek ta a0 y6 sq pa wq cx kq qw we rt c0 mz pv py wi cw mj qt qu 0e oa tj ux pf to hr ao tx yr ts fd fv s5 ui qu j7 gw ug ss cy ks qf xm fk wg vp kt mv qj mn lk vh yt ol rf th os e8 tj ua rk on pq dg kv km wq kp ad bp os bw pb qh wp zs q6 u0 gj yu o5 if a1
+\n a6 he pu vd cd q4 jz z6 qc jc bz eh wi b1 ed ym eg fo us ib td y8 gj zm pk lc qr wu mh qw sw fi ue j0 xm kz y2 ev aj df h3 qk qc tx rr rl rc ut ad so ro tw
+\n a2 pa we kk eg q7 lh zj wp 4z gi yd yg rc io ix r9 jb xe rr iz jd ij tz p9 qi l4 pa g9 s4 vi tj pb hd f4 qh qk lq qs hn ro
+\n gn k9 qz aw wu ki yf e2 pk v8 xk wg 5y t3 sl u4 ya gd hn ql zr oj ig
+\n fr qn qh ph k0 q2 nq wi zz rh rx ee ef uo d7 ix el fh qv dm vg px wi m8 qq gh ud qy ec fu yw uw sa tb lg us sk wf 5h qf sh vp wk qk zw qz wg aq of ys ak al f5 re f8 pr ku qx wc u1 lr qs wb f4 cw k2 ka hk mf yr w3 ro ir
+\n gr qh ql wh kv e4 r4 oy lu qw pb et uj tb tn xs kr dt td t8 e8 uc xn eq yi af
+\n q5 dt ed y1 am qv ut gx m7 yt rr yq yr dy sh mt wd wm th bv ym
+\n tr qs ca lp uv q3 wu o5 c1 rx om ee er ta ou i1 jb rt ry os ti fc ss px jn gy jz vp ea tg ay rq u6 al de qc zt wn ez rz eq aa rm ox
+\n uv ds h9 fy rf jq he qh h8 d4 wr wf du ck wi km yp ut rv io g1 rb av y4 tw a0 hy sz qx gh ha q9 qw ze 1a bz bx bv qw po ee wa qi xk ri i6 ic he tm hl sj jb qp wp jk qr kf sm l8 be x3 qu ql t1 x6 yy fg rj ua ug qg kv k1 wl v7 xx bj ae wc n3 zy tl tz zk wq re n0 yw oh yr oz eu fm do ux uc
+\n hb pa h0 pg q3 mw q6 mg ls lh am sc gz al j2 wt t9 lm qe j2 rm re rr ry fl yw ux i0 2e eo bt vh ra ys sm pb on tx re ff wn wv tu rt ox ul ge
+\n al fw zb p6 hi qy ay ou rg sx ag rz uy
+\n bx wi kv t5 3w e7 sh ht ff nu la xo qi s3 uy jb 1o vm vi l6 be x3 ny pk aw u1 rk fx km qc be rw yn ey eo ro
+\n dn q3 jf w0 e0 lh rv zv js j2 xg ld hr qe mk s3 dr kw kc dh h2 ql cg zv n3 ym yt aa as
+\n ft ty qh pi d3 qz ip wu wi q8 wj a1 mg mj ut wt r3 om ua y3 ou fd dc zw lp xe cn dh ng qe kp qt mz xy ef ay od tz p8 i0 hu hz qo kw jw qf kf 3y v3 qj w0 ib ew t4 fg oc e9 ua ov hs u6 f9 h6 vj qv li wq iu yv xv rc w6 rm r2
+\n d2 d4 av jc si rs ut 5q pa mm s4 e5 tc km
+\n qa uk uw qh vd d3 q4 xo wo c3 wl wa w7 w9 mc r4 y2 fp r0 tw fr g8 ae qn lp sd bk en vr gd hu j1 xv xg rw ep wh ed fu ul eb fl fz i7 ht jx ns v3 ll zs j0 op xf qf l6 l7 sn wk zw wg ej ti wb wz t6 oz rh rj uf je av dj h6 ql wl oi im v8 zr qv wn ku wb bj ef o4 yl r1 ei so if uc
+\n k7 qg q5 kx oz mu wl ws rh b2 rk yh qn qe 6o 1y mj ei pf ye e1 dw hj hx do qo gc rh v2 zw x5 t1 t3 yu th e9 em au qh f0 qk km ql kp wc 5p vx bg ea ev wn wm w2 rx o3 yk ru
+\n a2 gr qa az dd gm d1 k9 hr we bz lg mg ny wp xd mp yo pj uy xs ua pt g4 tw ez jv q9 qn wr nd md nf qq ng pi bb j2 eu yl ij ty sb os eh hw i6 hg m9 nj wa qs w5 qf nz zh hw wh be zo fs rz me yk y3 ub t7 t8 u2 u3 en ha fl yd hf qh qk wc 1x ze w1 oj ee w3 ey du uz id ah pi
+\n o0 hn qz q5 du 2b bn a4 ex rj y1 dj yz y5 ig tf th js qv 5e gc j3 ls d8 yw bc cz tx mb wf ij ty ai as hi lh l3 qd n1 7o wf wh qh wg ot ra l6 el e5 s8 dg vm 3f 7o wn yw gj tb et
+\n fu a5 cp ch hx hc rd eh tg g4 li sw sf il eg eb zj 2o cg ew uc
+\n qs uc rp ml eb yd if pa c7 oq vk wu ot yq rl uz tv gb zu vm w0 sc tf ud qk wm ko yy er tw
+\n ak tt ub pa pq il jq q1 hr k0 uv ql q3 kk zs q5 z8 is lh q8 w7 qw es ii av yz dl ht td g6 vs jm zr px bj no g2 g7 la c7 xt mz yx tt ym os to s1 ur ta s3 gv pl qy pc qi sh jv j8 gr l3 bi oa wd fy v1 s1 zp hg 5e t1 rp wj ms wl t5 el wm at fl e0 sq h3 dl qz ka ox tj qb wb wq re xr rx em ee yu ri do uc a1 rp
+\n uv rs un qs um il ul q2 jy kl wo a1 rj tj pk ys r5 yn uo oe y4 ou y5 ar zb g0 qn gx zt lb 2v rm qy nw yz lm eg og i7 ht ss qy qi ge wf bw lv lb wh cs wk hf 7g yb zw hk ns ol rh th yo f4 rq dt uj qo fd wx nk rv ka fl mu rr q3 w2 oj ee rt w6 ru ul
+\n qs hq qf qn d1 k0 q2 kk o5 na si bv mx e9 tz d8 tq dl ez qv jf zw ww wu xo b8 w3 i2 te p3 ry iz s1 ut as s5 hk wo wa l6 wh bq xk wj wd ra gu yi u1 t0 ak ai tc ax ip uj nf zb wv bd wo fz qm qj pe md ew rv gj ol yk tn iq yl is si ie r2
+\n d3 ni wr ws li mj ds sh sl qx vs rp ft ik e1 sd af ho xn wg zh 6b rp eb f3 u5 uf df py k1 wz vk vx k2 dg wm er rv rb
+\n gv iz qz wo o7 k0 oq ti r9 us ib ps g0 jm jb tq ue iv pj sa cr kh t1 ot wc at e9 ys o2 ab qj ww za rn yi
+\n ty qn qh nt ql la q3 kl wt q5 mp mg o9 ls lh g2 id ez sw hu qb cx nu pl kw wt vz v2 1t wi mh qr eu rm qs xb ei ij uj yb sn ai iv pj oj de hy gv ka lz pa nx wg wj kj cw zp wk eq wj t5 ns rf e6 rh ev t9 eb ir e0 sm gs gd ds f5 dd de fc f8 x0 lm qz wz xc wc kx cu wv ks lv kv wn pv ei wm ju ww yv zy yt e2 rb w6 oc o6 tq ig
+\n q2 o2 gj q6 zh mg li mo vo ch tl ax ip ho wt ln ro wo qr tr tt os fo e1 de hg gb sk w5 fp kg mm l6 yi u3 fl hs u6 fx re rr dl wb jr el rw pm rt to rx w4 gx
+\n yv a3 qa uz k9 q2 o2 bp ny zl mj vo tk rx ui g5 pu qx nr xt ls lm rv qs yv ik fz tv wp nk gn qh qi yf ek e5 pb au tc ac kc br qz 4t qc mx ly wm kb ez w2 u8 ei o8
+\n rf vo q1 o2 wt q5 oc 5l a2 es oe sc cx wq qw ky em tx rm p2 qo ft ed uq fs i9 ok lz v3 au xj v4 wf l3 eg ej ex wl rv qv ak mi hm cj yr w4 si
+\n a2 tr qa fy qd qh oh kk lq dy bz oc jf wa k0 ip pm po qx wq cx we wr bj j5 yq qe gg rq rr oo ru od ix qt af or sj qo jn gr sk wd nc xo xl wk hh yf eh ek aq ex ar en tl ys rq pm ug ql qz wb wn yw og xw an if it
+\n ak kk wu ig df w0 rb y4 fr gg i2 qb qw yf j1 ij ue s2 qt ad i5 yp ai l9 wk km ql zn yb ee rb ir te
+\n hb q2 ld wt q7 qm km ws w7 vi iu yf rc g1 pr ot a9 pn dk ib qx hi qc jd jg hd q0 lo jj cn wb mz ec qp uj y9 tu yq au tp hg pl jx vx j7 wd 3y ca au rq kv qg dh k2 ok cf qa wv bp dw iu de k2 rt hj wm rc er o3 ey fn si if so
+\n hq q1 qj d3 ws as ld mu rj ut d8 ey ou ib ez gf y7 qx qn vz qm vd zw ww d8 xu v1 av b6 mg gs bb g8 rm qy yv rr ry oa tb dt jb qa j0 qs l5 nz qg wj t4 td t6 eb ua s0 pn ii ac x9 qj k4 wc v9 s7 cj zy wo 10 hn yq vz u0 fm uz ux
+\n ra pp qd d2 vg wj qn rh we ht st jm bv wu wi qy y9 de gw wa sb uz r1 qu pz ot td rg go e8 sn iy zr wc s0 ww ea pw ac w1 h4 w6 rp
+\n fr uv pa jt qk q4 ls wu wi mt xa vu tk ed rb pe fp am sr hp qv m1 gc en yw qq qt ud ey eo p3 tj tu en ix ux ye tp ic s3 ad hz qi uf qa kr wh vd lk yn 5t u1 t0 od rr x9 f0 zj kn nk wp zs se rv ei pi
+\n yb dc qs py qk nb jj ql q5 m3 mg zz e5 i1 zr lc zx xu vq hr xp by ei yx gl qi qp ij eg ai ph ap tn dy zp qp bp kf j5 ib vd eg el yy td yo ie ag em fc kx zx h8 wv sv q1 te wv vm yq ol if a1
+\n ak rd rf qg lo nq xy z6 q6 mw c1 cu z8 q7 vw wp a2 zz w8 yo uu ee yn r8 av yz y6 pp uq dv i3 db jv q9 2l xg rp ib lj sq tx tc mj mk qr rw te p3 ik eg pj e3 i9 im tb tn fm na j7 lj wa ct rg v4 he x3 kl bi r7 wj y1 l6 wk el 9f t6 gu tj od e9 tz re uh o2 zk ki cf lw jp sc s6 qs qb yn yw ms md w3 rv as yi ox du yz ir yc dp hc
+\n yv gy ik k5 db gm ux qj gc w8 ea g6 dx po jb 5t pv wi rz qp jv v1 ea en al ii dt qj py w4 if ux
+\n iy pd yg qq p4 in qa y1 yy ta fb zk s6 lu
+\n ql ws rv yj jk ke lm ff lb fx s4 av uv wl n1 rv dp
+\n qh d3 rs ih rc aq we 7y ud t3 h2 zt cu oc
+\n fr hn k6 je q3 k4 tm zh lj aj li a4 t7 w7 kx ut pl rc ih th hp wq pl ls ma oe lf wu l1 ve lp qt qy fi ti he oh hi ow tm cu p7 nr va r3 tt wk wc s8 s0 hs al o2 hk x0 qj lm v5 wl qz 7u iw os lu ah wm hk e1 o4 rm fq ro so
+\n gr sa rd um pf ca ga ql qz wt ld z6 vw kv my xt cn wr eq tk rw fe qx qc qr qr rk qa qi ex p3 p5 dq ff pc sh cy oq v4 wg s7 yo ya od rq dt un bw zm da bg q5 ru ah
+\n iy qd k6 dm oj qz zd vr w0 r7 d8 et y1 eg yj gz qq p3 il i8 ge wp sx s1 wj t4 lm jo qp pw xc vm sr uz ig
+\n qf u8 iq rg rk g4 im ih oq fp a0 ib tc uj tn nc kz ll u1 un qv ck lv vv
+\n a4 df um jg z2 lq wr tn xu id wo ez rk rz ew d5 ti ix to r0 sk fa fe fr hp jk wr v1 ms wu jm rc qt tr ex uk vr to tz ut hl sg hb qd xn rj jc 6x mo wz rp ek y4 oi oo ae sc e9 od pn hg wz js qv ln ju rx yg as rn gk o7 so it
+\n dd qs qd py k9 lw wt db gk zf nw wh t2 nf zx w8 rj ue w0 tl ew r6 ui ef g1 or ej pn an tq ou ff qv gj jv q0 kq 2l uo oe ku wi w1 tx qe rv 1l ws eu lv p4 ru fo tz ph ib fv uu i3 qu oe pc gw wp sx zd kw w6 bo w9 cs cw my qz zf rg fh e8 ay yd fz rw fc ng qz cg wx qp oz qv ly ha cx al iu rt mp e1 ee w4 tn ul ru o6 ag aj
+\n qa kv e5 bm yj j4 m5 rj qe ri ht oi qf qe t6 e5 aw t8 wc dw un yb
+\n iy jg jj d4 ju ol wy bs wk wq t8 a9 y4 ta dx jp qc q9 su si ut q0 m8 qq 4a zy qq zq sw po qe qt la sn p6 ht oj hy hh qi gc bn w7 au ya kz na oa ox ov je fb or wl qx ze cg we fk tz tv cg uq w6
+\n dx sa qa qs db go lo z3 lf ox jc wj 93 tj tk yf ii ef fo ua sk g5 fs el oi fr sx fg se q9 xq qw j3 m6 4h qq l2 eu rq qu qi hs uh p6 ix fx qa i0 wf ke bq ne wh xl ms un ex sx yi ua pb s0 rq ak ao fc pr qj cw wq vy wv u2 3h wq re yw eb gl eu
+\n hv gv il jd go qh d3 we q4 q5 ej lk my gv a2 ds ex e7 rk yf fq pu ff qx ho js gx j1 qe kw gm ja ns wy ln d0 cz xt te xu tt sb em ix ic p0 i8 im ui di gt ws os r1 qy pz l6 e5 e6 op sb pv sn ii rr v6 ql le zy pv mt da yq ol w6 yz ag it
+\n ds gy dg jf qg uw qj uv q5 q8 q8 q0 qm e5 rk yd tz pv if qn ju cv xy ki qw mj ls yv ru of tz so yr oq qi m9 sc kw qf zh jx wh kg l0 t1 pz un fj os ha sn f3 e0 om ab cw ct nj zy wn fl ww vn fn tn ie ov
+\n co qh jr jj cb bi wt q6 ra qm zx ur r5 an fw tg jm re j5 vl em sl xz qe wa 45 nq ex yb ed ef ph e4 tb s6 qy lz cu gm pd gq mc ca 85 yf wf hu sb eb fk fv dt cq lq qz ww wv xr n0 eq ok er et iw r2 o7 fe it
+\n ra dx og wd wt o9 i4 it pk qo ic dt hj jl oq sf lz wd ca hi fg f5 ap x9 gq nd iy q6 ep
+\n o9 gb a5 z5 q6 wu w0 tl r9 dj if ts ig it zq ll qw qy ep ed ry p5 ut hh dr i0 hl qp qa zs wd ya ot xk s7 e9 om io dl ki k2 wv q1 5g er rb rm
+\n qs w0 om ed tk ta th gf ii av og 6o ee o8
+\n po ty rf qf he qc hz bv c5 mi rh ew tu ef sh ix r0 d0 pb tq fw ig g8 fh i2 uw hf qw qr sf cn wi fh qt hp nq yv fy tj od ux ut fb hu tn qt oi qs oa xm dm fa nm qx yy oo ec ev ox sb ya rk ys ud jy dz zv qc zb qb 17 tb lt yt u9 w4 rb st et ry yl o5 di ux
+\n gb gt az uk gm qh d3 bt qz wd ld o3 bl cm q7 ck k7 we b2 yd ua ew tl rq yg us tq js fk jb gz jn jg qq in qr rq qy 3h c0 qp p3 yn ef sb ym jl sf sh hv qd p4 fj od ix kz ni wj wz tg t0 tj e0 sm om kx ku cw nb zc aw cy qv bq kg wq pn zz wv cd wn yr se o6 pu eo gc
+\n gy rf qf k6 qh qj cd q3 kk cj fw b4 fr mj w7 bm wr ya z7 wt w0 r8 ip ti is pn am y5 qb hs jf qw uu wr np qt wi 1t bx qq qw aw er cv qy rw eo oa iz fp iv qi jm nk kr qf xm eo nr w0 qj bi t3 uv wk ek wn ex e5 rf rh ga it f5 pm hm f8 qj gy jp le wc qc lt s9 zu lb q1 ju w1 uw w3 oj tn tq ir r2 te
+\n ak gn pw a5 qh k9 qk nb 2l qz wr kv mt gt w7 zx ii oe ug ix sz qx qc ar sr zb su vg qe np yg qt yj sv uk pd uq pf of eh jb sk gy ws ke kd be og kh x6 me wz e6 yo iw ah rw pm rr qg pt lm ql qz wz qs ly wb wn q3 ry rx gj ia o5 ge
+\n gy qm d3 q3 ia c1 ta ex e5 e8 eg sy cl jf qe nj nh m9 qa w6 ek iw kv qg ab n4 w5 iq do
+\n uv gv qa un az jd qm eg iw nr q8 zj ny c5 vu rl rx yn et ia ua is ot pt hu im gj qv vv xe xu 1g wo vt qt st qy rw qi wh ft es uk hw to p8 fn f1 hp qu sk p2 l4 zf qf g0 fi l7 be ky iv mn xp nn dt 6b ro kw uv ra tp e6 sv s8 sn tl fz iy qh hz ve v8 h8 th wc wy qb xm s9 hs wq zs yq tu en zt w4 dp yc
+\n sa ak gi hw qm gp pp qz fm id lh 6w 9h xr w7 ui ow rb oe ia us fq g5 y5 ig oi y6 im gk ze qe gn 3o ye xz jh qe db qi p1 ep re op te y9 os pj e3 p0 zp qa ih l4 cu zg wg sn rh lf fz ic kh ni wh vh wx th ag u3 f6 uj jy k3 2a wc kv lu wo hb eq w1 rb xw yo ei o7 gx
+\n hw wd r0 g4 sz b7 pi sn tc in qt zs rg eu
+\n iy hv hb hq qf z3 q2 xy ia tm zf jw wq a3 rk w0 d6 eg et is id po tg gh ob jj wr np no wt ja wy xl l2 yr bt bb tc cx qr rn qt lc rw sy ex y0 ru od to tz og ye hr pj de tv e4 qt ad oo qa jq fj l7 fo wh nt pl ro wl vp yy tf va e6 fg th ar s7 os at s8 re df pe hj f0 qx qc x7 lu nc zo tx bf ww pq cg w1 rx id pu it fe
+\n qf fi ld 4o ge da t5 zz mo zx vi ui w9 pj rl rz r4 uy r5 sf av ot fq tw sc zv g9 qv i4 ut iy m1 wy xo b0 ud cb qy rq ug wg sn fo ix uc aa i9 ss sd di sh j8 qp xa ep w0 7h wl t7 iw tj ya hs e0 fz hd u6 hh dr dh uk qj qk wx ol xb qv wb s8 15 wy zm jr nx it eo fx ww yv zy to ee yu yi iq ey iw rm uz yz ob
+\n po ra ik qd qf je jr lp wr ji ne wu 2b nt wa e7 rx xo ia yl ta ig ff hp gk jf q9 vd si gc qe rk wu by yg rq sb os fu eh em ux ic ao pj hy im du qy cr 2e l5 qw v1 lf mv wk rx dy rp ra rg gi eb hs au ap bo oa sn zi f6 h1 zt ey yz do
+\n lj vu y2 if qr wr ta so kg 3k ol u1 f5
+\n yb cv mq lf zz ue ui dx qe tv qu ex tz hh tb dy ds wi rb
+\n hb gt qa h9 rf qf qg k9 q1 lo q3 ql z8 gl zg q8 1t wo vr rg ez ws mo w9 yo r3 yd e9 ea sf a0 im tg y8 ar qb ni wr qr wv m4 ix d9 nz yq fa if yr bt qr mz qi ea rr xx at ic pk qi za hv kq w7 co xj wd x4 zs wh y3 rf ec u2 e9 ah s0 uh io un h8 iq zb bs nr be zo fz vb wm pr yw md rc ur er ia yl ox ei ux eo tw o8
+\n qs rd rh yf px ow d8 tq ig ih cl qr yw qq in qy wc ek rh ya qg cd x9 qm lq to o3 ul a1
+\n a2 cp as gk kc 4u zj z2 t5 zz rb eh sh sx fg fh g9 hp vd gx oq cv pe wv b8 qe cc nw tr il tl e3 qu l4 mt wk wh aq td e9 gf lm qz bu jq my wp vb dg y0 ye yq w6 r1
+\n ij az qn ph qv bv bf mz iu is y8 ar fh q9 hd qq ji sf ld up qo p2 at sp in qi nk l5 e0 rw px 5o ew oc te
+\n qd jr pa q5 w9 hw hu y8 dn qn zw wr ma ei xl dq i7 i9 vb sx wf wh na wl um e7 s0 h2 nh nk fj yl wn iu u7 as ad ey so uc
+\n la up ic g5 ay ic x8 u2 ar eb wb yr aj
+\n po da rd un qg uw lq m2 4r wg q8 z9 t3 zc d9 ae st q0 li qw wt kr qu ry en sm qf kf kh ny yt gi rh u5 em tc kv h8 qx lr jq ef
+\n a2 dh qh q1 h8 qz z6 kx z8 bv 3q df pk tl d8 tq tf g8 zb qw 5p zm qe cv yb ec uz iv e2 gq wp uh kq ws lc wk x3 t8 rj fc io je dk lr lt wv wt bw be eo q4 ye yy rv ok yo yp ir ig
+\n rs ij ty ps ul wr bh kb rs z4 z8 er px uo up y1 rb fo jo gg dv ph q0 jn xw ww d8 rp yd yf tx b0 op yn of jl tm px fm jc zf qd pk wh rp uv tp t9 ir yf ug qg v5 ku qz fd k4 cu mw zn iu bg lq ly rv su
+\n ik dm cm or tw pu lp eh qd kk j0 em ng tw
+\n ra ds qk cg q7 k6 4p t6 yu lq go yd eq r8 fw am dm xy cm v1 cz eo qi ij yn eg hq tc sj qa i0 oa l6 p6 wj vd wf mr yt ex e6 yo t9 ev s8 en rr bq kg hb lm re bj ms w1 et du
+\n o9 gy bl wz t8 hq iu ix av y5 y8 jn j1 np xt t9 vw qq 43 xv 9w yi ft es hy op lg vs hg wd ef wx ou ox sw dr ze xr st fm ah
+\n a3 wr fb jc c2 w8 rx fe q9 hd xq qq wi te y9 e1 qt qi qs nl ca bh u2 md tv hx
+\n ra jh q3 aa t3 1o eq lh rv fo us pt dj pm pi qn zr bj xj cm ix a0 ra hi eu nw yc p3 ru ri ue e2 jl hi wo g9 xn qh wl wx go yp rr dj nd ch u3 fj bd jy vn w1 ia ox tm uz
+\n ma y5 bl qi g7 ri fl ap 7a yo ko rp
+\n um dh pg wq r5 sf ia ta an hs ne q9 wt sh rk yi ym of tb oq do hv wj ic oi sc pe sc wq wb wm tq
+\n hq qm gg q4 xu k4 k7 uo wt kb et fo ey aw g0 xi am in qy eo qi eb ay ue og nt kl wx s9 df qg f9 v6 rv sv pc tl my bj wb eq h4 o3 ri
+\n tt uq qh nb qz wt jx ya on om io ow ha qp e2 fd e4 hp hx p2 vm xg xn ra l8 iu yf jr qh k4 1l oa tk zp yw rz sy ul yx eo ep
+\n ij yb qs fi ul qk by ql jl wr bi q5 bl tm q7 xp k7 vy gi tj rl rx yf ym tu er r8 pe ip ej y4 fd jn gx zt vj xt xh rj kt cm ri nh zq pp eu rw to pg e1 ue dw e2 so tb gm qi jb nk gy pa v1 xj fl kh 3k kl ed wx wc ek yy ez wc iq yo u4 it tz ak hn f8 h2 hl uv wz h8 gi nk ch zt wt bw kn yq e1 tv zp ag it
+\n qs rf jq go qh a8 jj xt le wu qq yd d6 d0 ff qc su c7 lc wp ty y0 tu ti pf ta aa ug vb rf vi rx no un yu e5 7r up rj ag ha hs fc wz bo qv bp tv ki er
+\n yb qa rd lo ok q6 o6 ba r6 ow or yl am aq gd dc ho cx c0 wu g4 ib c5 ep re qo ed yw iv ta hy jc pn xs oq xn bo zg ps kr iz yp wh wj xl xo zq me na ek wl wx wc rg uo ir tk aj da rw hm io uj fb jt qj dk nh zx jp qx wc zr zy zu nc xe ww wv vn h2 q6 en ew ad yl af eo if r2
+\n yb o0 dn z1 q1 sw qk po xt wr ls z8 ox wu jd ro q8 lh mh wa rg ea c3 pz tu g2 is a9 pn tw po qc sa jj vh ax xh kt wy jv mg nh kp b9 qi od ht e4 uu ij qf sf nc wk ap wd qz rd yi s7 tk pb it f7 o2 f9 kv qh h4 ln wz gi qd zn xm sn 39 yn rz u7 yr yj is ie ag ir tw
+\n po dx uv pq jd dm d2 hr cs gs d3 q4 wr np ab di mh vt w7 w0 on tl ia ta aq dz y7 i1 qc pg q0 wq j2 c7 la cb il wr lv na ru 4g vz nf bc sl qq qp qt ud rq wg ex uw he tz ye p9 ui ou vu at ix w0 vf bi ed yh wh ra y6 wc u1 t8 fj ov iy tz kx h1 hj jy qj h6 ng jo wz cj ie mt rq te n9 mp ma q5 8w rb o7 sp ob
+\n po ra dg ca qj q2 is kn rd ws lq tu ym yl y5 tg pp qw we cn a7 1t jd m7 wo yr sz qa hp ei qy ec p4 hw p7 to au iv ht qt qy qo l4 lz xm wd yf wg ez s7 en f3 tx yf rr f8 cw ji qv yz ry ew w2 oj w4 w5 st rn oz ri o6 dp aj
+\n gm jg ju q5 np lf q7 xo 1y qn k0 jo pp qx th q9 4f or ro pu bv eu nw uq ao dy jb j9 gr rd nz wj wj r9 co ta rk od dd gg hm df pr km ng oj qc sb qd wb tz cq ex wb vb ty eq tv iw
+\n fu uq co qk jl cg ld lg wo vr gc bd rj r3 yd rz iu ew d6 io to sh y7 jp db dn qn qm si xg qr ls jo lr wy rk wn m7 qu bb es op qp ru en ta e3 in hy dy hl vc gc gt jw ke 2t wh rk lj hg oy e6 yo ev em fz rw pq re dg qk ku oi qz k4 qv li rq n8 ec 4d yb wb e1 iw id o6 ir do ux pi ep
+\n a2 wu jd ef dc mn 5e qp pl xd ag ay
+\n yv o9 al a5 uq qg jw pi z2 jt cd q5 3m zl ez vu rg jl rz yf ix sj fp d0 tq ff ha hs zw om ni m0 xg c8 a7 ki qw cc ei xg j3 tt tu il p6 ix tp tx ib sp hg p0 fc pj su qu jv sj lk qp ws qs gm 1x mx lv wj qu l1 dt wh wv un aq fg rg e6 uo ar ie up it sw tx f6 o2 h3 qc qa ho vj u3 kd zy n6 my ww vc lr em w2 se rt o4 yc a1 te
+\n qs dv pa ty iz uw qh jj z3 tn jc eg e4 qq w7 ut r3 uu kb up g1 fo iv if fd gd sc qm qe xg ia wb he ky hu tv rw qu rr es p3 ue s2 as i0 dt qt hz jm j0 gy ci fi hw nv ea kk vf rx 68 ti rp wl oi vp at om io uk pt qh qj dl cf lr cx wq ku ki w2 yh af ul sp yc it
+\n ub yb dc ty gm dm go nv we ql by la o1 ju o3 jx fm aj wa rg e4 vi a6 r4 xo tz oe ip pv dk tq a0 tf fg tg i4 pz sd ry ky mg hy g7 eu qy yi rw qp eg yw hw sm uc i7 dw fx s3 sf zo m9 xs vn rf ci nz kr qt 9y pj lk ee pz ef rk e0 fx uf az fc qg jr oy lq cg qp um ad wc zn bw n6 my xr mp tu en o3 iq ir ro
+\n da a3 d1 jr dz ca ql nu q3 cf o6 nr mt lk yr rs lp w7 a5 pj ys ym r8 ey to fs dz im ih sw qx qv zn gl j1 xe lc zc vw 6a mh b9 qt rm re oo qp p4 tk ix p7 og tz yr sp aa hk ih lx qd mx 4n kk vf el oo td ae yo fj uf pr hl qj qk wr qc qv kf yz my wq hn zs dr ee u8 rv et ru ie ag tw
+\n gt ph z0 zl mu ui av zm om ui vh qr he qr es fl ws w6 nc ra rk kp ol wm yu it
+\n dd df jq jd ux ql el 3r ya uu iu ee eh g3 sj us ib pp qc jv hd bh zt uo d8 b3 xu bc rq te uh ex tt eb il qu pc ge sj qp ih xf 3r gr yh qx tu wl wn sz up ay it ab jt qz v7 wn li za 6o w3 fn yk eu ie gz ro
+\n yv o9 qf eg eh mh jh rh r3 rv ix y3 a0 sr qc qq qr wr qe bx ki m8 mk qi lm uk eb ai ur e2 xd nc ca eo mb ed uv rs up ya of hn lw wz qz et qh wm zr rc o3 r2
+\n ss qg qj ph qk q2 cs z4 bi qc cj q8 qn w7 rj ys ea r4 uy om rc ii fp sj ej yz el qx qv zn gk q9 hs m2 ii d7 nk c8 j4 qq dl gg pp ei qo yc od fo eh fp ta hy ok tv uu us dp qf sb zg ks sg n3 wh x2 nr cs wk kh wk wf ew 7h 7k oy t6 gi rg yp s9 ya e9 pb tc dt dh hk h2 pt qh h7 wz n1 qv kd pm cc xr kp yk tm ge
+\n gr qa ft tt gn qs pw pu ca ph ls cg cn zf bz q7 z0 c3 qn gv w7 rg ut e8 ii er ip sg y3 oy ek ht gd qx g0 qv db su jn qq lz uu jo ru an wi kn sq nh qr qy yx eo qi xz y9 ru pd au p7 dq he ut ok fd jz ui hc j9 l3 hb xd jq gy kh wf xs sl rs aq ez y4 ts um yi e0 gh dk py v5 qk ql ko jq wc nk v0 wb qv br iu wm 6p sr gk yp pu
+\n o0 pa pf z3 jt jy z7 cm ne w8 yz fd fg zn qq ll vg wr wb ia xx yj ty eh e1 so ts tc s4 i0 tn wo wp wa op va wk x3 vg qx rs sn au f3 tz sq hn rr o2 fv un k2 vj ey dj
+\n iy ra ij ty a4 un rf qg dm jr kj uv we cv gk wy z8 oc wp mo jl mz ev ch rv tu ax y2 g3 oy y6 im uq qv hp qb hd iy lp nk w2 bb ho ep tr os en sm p8 p9 hy ss ui gm qi oo vn ae qd w6 ps dn wd wg ro mr yt ol oz rg s7 u5 tl yd rr ax f0 cq ku qx ze n4 wn kt ca jy bg yc zs yw rz w1 eu rm r1
+\n hv fu ca q8 mt la r3 pl yh to sy yh tv x4 tg yp ov wn ze sp
+\n o9 qa az gm qd pw hq pd ga qj cd q3 jk pd du c2 zk xf t8 eq om es rc ua y3 pu ig qx se qv db st qn ii lx qe wt xk nx ku br qe qr qt rm eu xf xb rn qu qi ep qo rr ex xk p5 ym fi uq to ux ix ai hj gn zi oq qf kd wf xn kr w8 rl kk mq rp rf u1 s7 oa fh e7 yp e0 pr ql sx ck ag kg kt mp eb rl em ee w6 du rm yz if ep
+\n qs pi am 6a ut r4 ii sd ua ib y6 pa kt pb wm qq dz qt qp y0 he p8 ue tb qu or qo wh 40 8j t4 sl rf iu gh hh qc iq bf rl wm mf oh ew is dp
+\n da ps a7 jr z4 q3 bu xt ip jx q6 np z7 bp lg bz ye wo ig bb ww rf om uu ef r8 ey pt ta pn y7 gg th dn pg qb ri qq 42 qw zw b9 b0 xb qt qy yl wh xk ft at yw i7 sp de pz fn qy si m0 ik wf sg cq ql hk er eg lc ek na wc iw ir rk ua e0 ak iu sw ap uj av ab hl uv zc qa wc jw vj qv vk ay kg xw on rw 1b wn rl eb vm eq h4 yt oz eu uz
+\n a2 qa rd cp qh ub vg ws u0 4g bp da yo ev kz eq uu ee ef yk pr sj sk fe oi lt uy j2 gn io vk ns 27 ln wu ve yr l2 qu qi ry il ul tj eg ux i5 yr tx ph oj gq or zp wa qs gy iz v0 qt wj ic ca lh yb np ej sl td l8 yi iw s8 e8 ys yd sq al dt pt tg te yb eu tq r1 ir fe
+\n tr h9 go qj b1 wu q7 zh el tg mb ys ed ii er r8 xs pe r0 sw db ov m7 d3 re no nu zr pq ji wr lc or qw ee yy qr rn rm re qi te ea qo yb yn y0 uz uq iz tl yr i0 fm wp qs qd he wk kl ew as hl ez oo ox ie s0 f4 dl wq wl ww lr qc qv vk mt my kn ep em yt sy am so rp sp
+\n ak ft qg bg ji q6 bk xi tf mo ur pj yk ua qv wq gc qe ke ef eb tk uq i6 oh iv gb qs rx el yo rr pe wz wx ho xq tc mo yk du r1 tq oc hc te
+\n ra ub qj jh jt dx ql q6 da tf r3 ew iu sg tp yl el gc 6n tt ry pd ye ff lz kt yp fl yf dl rn
+\n o9 hb h9 qd dh qg q1 qj jy se q5 wt nr qv ge c5 el 6y uo rv ax pe et r0 fe y6 dx qx ha qq lo we zy v1 wy dl vr wa qr rm qi qp yn tz pg ph de p0 do qp wp wf bw xh ky xz wh hl to ek rd sv rj rq re h1 qg qh kl f1 zm 18 ez xe vm en 5j o3 rn fw fe it
+\n db c2 bb o0 w8 kl kc y4 qx zm pk cw id ve mh lp rq jb fl x1 qi wd lx f3 cy bq dd ye fn ig
diff --git a/contrib/tsearch2/dict.c b/contrib/tsearch2/dict.c

new file mode 100644 (file)

index 0000000..5c148c4
--- /dev/null
+++ b/contrib/tsearch2/dict.c
@@ -0,0 +1,275 @@
+/* 
+ * interface functions to dictionary 
+ * Teodor Sigaev 
+ */
+#include 
+#include 
+#include 
+#include 
+
+#include "postgres.h"
+#include "fmgr.h"
+#include "utils/array.h"
+#include "catalog/pg_type.h"
+#include "executor/spi.h"
+
+#include "dict.h"
+#include "common.h"
+#include "snmap.h"
+
+/*********top interface**********/
+
+static void *plan_getdict=NULL;
+
+void
+init_dict(Oid id, DictInfo *dict) {
+   Oid arg[1]={ OIDOID };
+   bool isnull;
+   Datum pars[1]={ ObjectIdGetDatum(id) };
+   int stat;
+
+   memset(dict,0,sizeof(DictInfo));
+   SPI_connect();
+   if ( !plan_getdict ) {
+       plan_getdict = SPI_saveplan( SPI_prepare( "select dict_init, dict_initoption, dict_lexize from pg_ts_dict where oid = $1" , 1, arg ) );
+       if ( !plan_getdict ) 
+           ts_error(ERROR, "SPI_prepare() failed");
+   }
+
+   stat = SPI_execp(plan_getdict, pars, " ", 1);
+   if ( stat < 0 )
+       ts_error (ERROR, "SPI_execp return %d", stat);
+   if ( SPI_processed > 0 ) {
+       Datum opt;
+       Oid oid=InvalidOid;
+       oid=DatumGetObjectId( SPI_getbinval(SPI_tuptable->vals[0], SPI_tuptable->tupdesc, 1, &isnull) );
+       if ( !(isnull || oid==InvalidOid) ) {
+           opt=SPI_getbinval(SPI_tuptable->vals[0], SPI_tuptable->tupdesc, 2, &isnull);
+           dict->dictionary=(void*)DatumGetPointer(OidFunctionCall1(oid, opt)); 
+       }
+       oid=DatumGetObjectId( SPI_getbinval(SPI_tuptable->vals[0], SPI_tuptable->tupdesc, 3, &isnull) );
+       if ( isnull || oid==InvalidOid ) 
+           ts_error(ERROR, "Null dict_lexize for dictonary %d", id);
+       fmgr_info_cxt(oid, &(dict->lexize_info), TopMemoryContext);
+       dict->dict_id=id;
+   } else 
+       ts_error(ERROR, "No dictionary with id %d", id);
+   SPI_finish();
+}
+
+typedef struct {
+   DictInfo    *last_dict;
+   int     len;
+   int     reallen;
+   DictInfo    *list;
+   SNMap       name2id_map;
+} DictList;
+
+static DictList DList = {NULL,0,0,NULL,{0,0,NULL}};
+
+void
+reset_dict(void) {
+   freeSNMap( &(DList.name2id_map) );
+   /* XXX need to free DList.list[*].dictionary */
+   if ( DList.list )
+       free(DList.list);
+   memset(&DList,0,sizeof(DictList));
+}
+
+
+static int
+comparedict(const void *a, const void *b) {
+   return ((DictInfo*)a)->dict_id - ((DictInfo*)b)->dict_id;
+}
+
+DictInfo *
+finddict(Oid id) {
+   /* last used dict */
+   if ( DList.last_dict && DList.last_dict->dict_id==id )
+       return DList.last_dict;
+
+
+   /* already used dict */
+   if (  DList.len != 0 ) {
+       DictInfo key;
+       key.dict_id=id;
+       DList.last_dict = bsearch(&key, DList.list, DList.len, sizeof(DictInfo), comparedict);
+       if ( DList.last_dict != NULL )
+           return DList.last_dict;
+   }
+
+   /* last chance */
+   if ( DList.len==DList.reallen ) {
+       DictInfo *tmp;
+       int reallen = ( DList.reallen ) ? 2*DList.reallen : 16;
+       tmp=(DictInfo*)realloc(DList.list,sizeof(DictInfo)*reallen);
+       if ( !tmp ) 
+           ts_error(ERROR,"No memory");
+       DList.reallen=reallen;
+       DList.list=tmp;
+   }
+   DList.last_dict=&(DList.list[DList.len]);
+   init_dict(id, DList.last_dict);
+
+   DList.len++;
+   qsort(DList.list, DList.len, sizeof(DictInfo), comparedict);
+   return finddict(id); /* qsort changed order!! */;
+}
+
+static void *plan_name2id=NULL;
+
+Oid
+name2id_dict(text *name) {
+   Oid arg[1]={ TEXTOID };
+   bool isnull;
+   Datum pars[1]={ PointerGetDatum(name) };
+   int stat;
+   Oid id=findSNMap_t( &(DList.name2id_map), name );
+
+   if ( id ) 
+       return id;
+   
+   SPI_connect();
+   if ( !plan_name2id ) {
+       plan_name2id = SPI_saveplan( SPI_prepare( "select oid from pg_ts_dict where dict_name = $1" , 1, arg ) );
+       if ( !plan_name2id ) 
+           ts_error(ERROR, "SPI_prepare() failed");
+   }
+
+   stat = SPI_execp(plan_name2id, pars, " ", 1);
+   if ( stat < 0 )
+       ts_error (ERROR, "SPI_execp return %d", stat);
+   if ( SPI_processed > 0 )
+       id=DatumGetObjectId( SPI_getbinval(SPI_tuptable->vals[0], SPI_tuptable->tupdesc, 1, &isnull) );
+   else  
+       ts_error(ERROR, "No dictionary with name '%s'", text2char(name));
+   SPI_finish();
+   addSNMap_t( &(DList.name2id_map), name, id );
+   return id;
+}
+
+
+/******sql-level interface******/
+PG_FUNCTION_INFO_V1(lexize);
+Datum lexize(PG_FUNCTION_ARGS);
+
+Datum
+lexize(PG_FUNCTION_ARGS) {
+   text *in=PG_GETARG_TEXT_P(1);
+   DictInfo *dict = finddict( PG_GETARG_OID(0) );
+   char    **res, **ptr;
+   Datum   *da;
+   ArrayType    *a;
+
+
+   ptr = res = (char**)DatumGetPointer(
+       FunctionCall3(&(dict->lexize_info), 
+           PointerGetDatum(dict->dictionary),
+           PointerGetDatum(VARDATA(in)),
+           Int32GetDatum(VARSIZE(in)-VARHDRSZ)
+       )
+   );
+   PG_FREE_IF_COPY(in, 1);
+   if ( !res ) {
+       if (PG_NARGS() > 2)  
+           PG_RETURN_POINTER(NULL);
+       else
+           PG_RETURN_NULL();
+   }
+
+   while(*ptr) ptr++;
+   da = (Datum*)palloc(sizeof(Datum)*(ptr-res+1));
+   ptr=res;
+   while(*ptr) {
+       da[ ptr-res ] = PointerGetDatum( char2text(*ptr) );
+       ptr++;
+   }
+
+   a = construct_array(
+       da,
+       ptr-res,
+       TEXTOID,
+       -1,
+       false,
+       'i'
+   );
+
+   ptr=res; 
+   while(*ptr) {
+       pfree( DatumGetPointer(da[ ptr-res ]) );
+       pfree( *ptr );
+       ptr++;
+   }
+   pfree(res);
+   pfree(da);
+       
+   PG_RETURN_POINTER(a);   
+}
+
+PG_FUNCTION_INFO_V1(lexize_byname);
+Datum lexize_byname(PG_FUNCTION_ARGS);
+Datum 
+lexize_byname(PG_FUNCTION_ARGS) {
+   text *dictname=PG_GETARG_TEXT_P(0);
+   Datum res;
+
+   strdup("simple");
+   res=DirectFunctionCall3(
+       lexize,
+       ObjectIdGetDatum(name2id_dict(dictname)),
+       PG_GETARG_DATUM(1),
+       (Datum)0
+   );
+   PG_FREE_IF_COPY(dictname, 0);
+   if (res) 
+       PG_RETURN_DATUM(res); 
+   else 
+       PG_RETURN_NULL();
+}
+
+static Oid currect_dictionary_id=0;
+
+PG_FUNCTION_INFO_V1(set_curdict);
+Datum set_curdict(PG_FUNCTION_ARGS);
+Datum
+set_curdict(PG_FUNCTION_ARGS) {
+   finddict(PG_GETARG_OID(0));
+   currect_dictionary_id=PG_GETARG_OID(0);
+   PG_RETURN_VOID();
+}
+
+PG_FUNCTION_INFO_V1(set_curdict_byname);
+Datum set_curdict_byname(PG_FUNCTION_ARGS);
+Datum
+set_curdict_byname(PG_FUNCTION_ARGS) {
+   text *dictname=PG_GETARG_TEXT_P(0);
+
+   DirectFunctionCall1(
+       set_curdict,
+       ObjectIdGetDatum( name2id_dict(dictname) )
+   );
+   PG_FREE_IF_COPY(dictname, 0);
+   PG_RETURN_VOID();
+}
+
+PG_FUNCTION_INFO_V1(lexize_bycurrent);
+Datum lexize_bycurrent(PG_FUNCTION_ARGS);
+Datum 
+lexize_bycurrent(PG_FUNCTION_ARGS) {
+   Datum res;
+   if ( currect_dictionary_id == 0 )
+       elog(ERROR, "No currect dictionary. Execute select set_curdict().");
+
+   res = DirectFunctionCall3(
+       lexize,
+       ObjectIdGetDatum(currect_dictionary_id),
+       PG_GETARG_DATUM(0),
+       (Datum)0
+   );
+   if (res) 
+       PG_RETURN_DATUM(res);
+   else 
+       PG_RETURN_NULL();
+}
+
+
diff --git a/contrib/tsearch2/dict.h b/contrib/tsearch2/dict.h

new file mode 100644 (file)

index 0000000..bbbbfc4
--- /dev/null
+++ b/contrib/tsearch2/dict.h
@@ -0,0 +1,38 @@
+#ifndef __DICT_H__
+#define __DICT_H__
+#include "postgres.h"
+#include "fmgr.h"
+
+typedef struct {
+   int len;
+   char    **stop;
+   char*   (*wordop)(char*);
+} StopList;
+
+void sortstoplist(StopList *s);
+void freestoplist(StopList *s); 
+void readstoplist(text *in, StopList *s);
+bool searchstoplist(StopList *s, char *key);
+char* lowerstr(char *str);
+
+typedef struct {
+   Oid dict_id;
+   FmgrInfo lexize_info;
+   void *dictionary;
+} DictInfo;
+
+void init_dict(Oid id, DictInfo *dict);
+DictInfo* finddict(Oid id);
+Oid name2id_dict(text *name);
+void reset_dict(void);
+
+
+/* simple parser of cfg string */
+typedef struct {
+        char    *key;
+        char    *value;
+} Map;
+
+void parse_cfgdict(text *in, Map **m);
+
+#endif
diff --git a/contrib/tsearch2/dict_ex.c b/contrib/tsearch2/dict_ex.c

new file mode 100644 (file)

index 0000000..b8c4f59
--- /dev/null
+++ b/contrib/tsearch2/dict_ex.c
@@ -0,0 +1,59 @@
+/* 
+ * example of dictionary 
+ * Teodor Sigaev 
+ */
+#include 
+#include 
+#include 
+
+#include "postgres.h"
+
+#include "dict.h"
+#include "common.h"
+
+typedef struct {
+   StopList    stoplist;
+} DictExample;
+
+
+PG_FUNCTION_INFO_V1(dex_init);
+Datum dex_init(PG_FUNCTION_ARGS);
+PG_FUNCTION_INFO_V1(dex_lexize);
+Datum dex_lexize(PG_FUNCTION_ARGS);
+
+Datum 
+dex_init(PG_FUNCTION_ARGS) {
+   DictExample *d = (DictExample*)malloc( sizeof(DictExample) );
+
+   if ( !d )
+       elog(ERROR, "No memory");
+   memset(d,0,sizeof(DictExample));
+
+   d->stoplist.wordop=lowerstr;
+       
+   if ( !PG_ARGISNULL(0) && PG_GETARG_POINTER(0)!=NULL ) {
+       text       *in = PG_GETARG_TEXT_P(0);
+       readstoplist(in, &(d->stoplist));
+       sortstoplist(&(d->stoplist));
+       PG_FREE_IF_COPY(in, 0);
+   }
+
+   PG_RETURN_POINTER(d);
+}
+
+Datum
+dex_lexize(PG_FUNCTION_ARGS) {
+   DictExample *d = (DictExample*)PG_GETARG_POINTER(0);
+   char       *in = (char*)PG_GETARG_POINTER(1);
+   char *txt = pnstrdup(in, PG_GETARG_INT32(2));
+   char    **res=palloc(sizeof(char*)*2);
+
+   if ( *txt=='\0' || searchstoplist(&(d->stoplist),txt) ) {
+       pfree(txt);
+       res[0]=NULL;
+   } else 
+       res[0]=txt;
+   res[1]=NULL;
+
+   PG_RETURN_POINTER(res);
+}
diff --git a/contrib/tsearch2/dict_ispell.c b/contrib/tsearch2/dict_ispell.c

new file mode 100644 (file)

index 0000000..c5b33a4
--- /dev/null
+++ b/contrib/tsearch2/dict_ispell.c
@@ -0,0 +1,141 @@
+/* 
+ * ISpell interface
+ * Teodor Sigaev 
+ */
+#include 
+#include 
+#include 
+
+#include "postgres.h"
+
+#include "dict.h"
+#include "common.h"
+#include "ispell/spell.h"
+
+typedef struct {
+   StopList    stoplist;
+   IspellDict  obj;
+} DictISpell;
+
+PG_FUNCTION_INFO_V1(spell_init);
+Datum spell_init(PG_FUNCTION_ARGS);
+PG_FUNCTION_INFO_V1(spell_lexize);
+Datum spell_lexize(PG_FUNCTION_ARGS);
+
+static void
+freeDictISpell(DictISpell  *d) {
+   FreeIspell(&(d->obj));
+   freestoplist(&(d->stoplist));
+   free(d);
+}
+
+Datum 
+spell_init(PG_FUNCTION_ARGS) {
+   DictISpell  *d;
+   Map *cfg, *pcfg;
+   text *in;
+   bool affloaded=false, dictloaded=false, stoploaded=false;
+
+   if ( PG_ARGISNULL(0) || PG_GETARG_POINTER(0)==NULL )
+       elog(ERROR,"ISpell confguration error");
+ 
+   d = (DictISpell*)malloc( sizeof(DictISpell) );
+   if ( !d )
+       elog(ERROR, "No memory");
+   memset(d,0,sizeof(DictISpell));
+   d->stoplist.wordop=lowerstr;
+
+   in = PG_GETARG_TEXT_P(0);
+   parse_cfgdict(in,&cfg);
+   PG_FREE_IF_COPY(in, 0);
+   pcfg=cfg;
+   while(pcfg->key) {
+       if ( strcasecmp("DictFile", pcfg->key) == 0 ) {
+           if ( dictloaded ) {
+               freeDictISpell(d);
+               elog(ERROR,"Dictionary already loaded");
+           }
+           if ( ImportDictionary(&(d->obj), pcfg->value) ) {
+               freeDictISpell(d);
+               elog(ERROR,"Can't load dictionary file (%s)", pcfg->value);
+           }
+           dictloaded=true;
+       } else if ( strcasecmp("AffFile", pcfg->key) == 0 ) {
+           if ( affloaded ) {
+               freeDictISpell(d);
+               elog(ERROR,"Affixes already loaded");
+           }
+           if ( ImportAffixes(&(d->obj), pcfg->value) ) {
+               freeDictISpell(d);
+               elog(ERROR,"Can't load affix file (%s)", pcfg->value);
+           }
+           affloaded=true;
+       } else if ( strcasecmp("StopFile", pcfg->key) == 0 ) {
+           text *tmp=char2text(pcfg->value);
+           if ( stoploaded ) {
+               freeDictISpell(d);
+               elog(ERROR,"Stop words already loaded");
+           }
+           readstoplist(tmp, &(d->stoplist));
+           sortstoplist(&(d->stoplist));
+           pfree(tmp);
+           stoploaded=true;
+       } else {
+           freeDictISpell(d);
+           elog(ERROR,"Unknown option: %s => %s", pcfg->key, pcfg->value);
+       }
+       pfree(pcfg->key);
+       pfree(pcfg->value);
+       pcfg++;
+   }
+   pfree(cfg);
+
+   if ( affloaded && dictloaded ) {
+       SortDictionary(&(d->obj));
+       SortAffixes(&(d->obj));
+   } else if ( !affloaded ) {
+       freeDictISpell(d);
+       elog(ERROR,"No affixes");
+   } else {
+       freeDictISpell(d);
+       elog(ERROR,"No dictionary");
+   }
+
+   PG_RETURN_POINTER(d);
+}
+
+Datum
+spell_lexize(PG_FUNCTION_ARGS) {
+   DictISpell *d = (DictISpell*)PG_GETARG_POINTER(0);
+   char       *in = (char*)PG_GETARG_POINTER(1);
+   char *txt;
+   char    **res;
+   char    **ptr, **cptr;
+
+   if ( !PG_GETARG_INT32(2) )
+       PG_RETURN_POINTER(NULL);
+
+   res=palloc(sizeof(char*)*2);
+   txt = pnstrdup(in, PG_GETARG_INT32(2));
+   res=NormalizeWord(&(d->obj), txt);
+   pfree(txt);
+
+   if ( res==NULL ) 
+       PG_RETURN_POINTER(NULL);
+
+   ptr=cptr=res;
+   while(*ptr) {
+       if ( searchstoplist(&(d->stoplist),*ptr) ) {
+           pfree(*ptr);
+           *ptr=NULL;
+           ptr++;
+       } else {
+           *cptr=*ptr;
+           cptr++; ptr++;
+       }
+   }
+   *cptr=NULL;
+
+   PG_RETURN_POINTER(res);
+}
+
diff --git a/contrib/tsearch2/dict_snowball.c b/contrib/tsearch2/dict_snowball.c

new file mode 100644 (file)

index 0000000..0fbcc52
--- /dev/null
+++ b/contrib/tsearch2/dict_snowball.c
@@ -0,0 +1,108 @@
+/* 
+ * example of Snowball dictionary
+ * http://snowball.tartarus.org/ 
+ * Teodor Sigaev 
+ */
+#include 
+#include 
+
+#include "postgres.h"
+
+#include "dict.h"
+#include "common.h"
+#include "snowball/header.h"
+#include "snowball/english_stem.h"
+#include "snowball/russian_stem.h"
+
+typedef struct {
+   struct SN_env *z;
+   StopList    stoplist;
+   int (*stem)(struct SN_env * z);
+} DictSnowball;
+
+
+PG_FUNCTION_INFO_V1(snb_en_init);
+Datum snb_en_init(PG_FUNCTION_ARGS);
+PG_FUNCTION_INFO_V1(snb_ru_init);
+Datum snb_ru_init(PG_FUNCTION_ARGS);
+PG_FUNCTION_INFO_V1(snb_lexize);
+Datum snb_lexize(PG_FUNCTION_ARGS);
+
+Datum 
+snb_en_init(PG_FUNCTION_ARGS) {
+   DictSnowball    *d = (DictSnowball*)malloc( sizeof(DictSnowball) );
+
+   if ( !d )
+       elog(ERROR, "No memory");
+   memset(d,0,sizeof(DictSnowball));
+   d->stoplist.wordop=lowerstr;
+       
+   if ( !PG_ARGISNULL(0) && PG_GETARG_POINTER(0)!=NULL ) {
+       text       *in = PG_GETARG_TEXT_P(0);
+       readstoplist(in, &(d->stoplist));
+       sortstoplist(&(d->stoplist));
+       PG_FREE_IF_COPY(in, 0);
+   }
+
+   d->z = english_create_env();
+   if (!d->z) {
+       freestoplist(&(d->stoplist));
+       elog(ERROR,"No memory");
+   }
+   d->stem=english_stem;
+
+   PG_RETURN_POINTER(d);
+}
+
+Datum 
+snb_ru_init(PG_FUNCTION_ARGS) {
+   DictSnowball    *d = (DictSnowball*)malloc( sizeof(DictSnowball) );
+
+   if ( !d )
+       elog(ERROR, "No memory");
+   memset(d,0,sizeof(DictSnowball));
+   d->stoplist.wordop=lowerstr;
+       
+   if ( !PG_ARGISNULL(0) && PG_GETARG_POINTER(0)!=NULL ) {
+       text       *in = PG_GETARG_TEXT_P(0);
+       readstoplist(in, &(d->stoplist));
+       sortstoplist(&(d->stoplist));
+       PG_FREE_IF_COPY(in, 0);
+   }
+
+   d->z = russian_create_env();
+   if (!d->z) {
+       freestoplist(&(d->stoplist));
+       elog(ERROR,"No memory");
+   }
+   d->stem=russian_stem;
+
+   PG_RETURN_POINTER(d);
+}
+
+Datum
+snb_lexize(PG_FUNCTION_ARGS) {
+   DictSnowball *d = (DictSnowball*)PG_GETARG_POINTER(0);
+   char       *in = (char*)PG_GETARG_POINTER(1);
+   char *txt = pnstrdup(in, PG_GETARG_INT32(2));
+   char    **res=palloc(sizeof(char*)*2);
+
+   if ( *txt=='\0' || searchstoplist(&(d->stoplist),txt) ) {
+       pfree(txt);
+       res[0]=NULL;
+   } else {
+       SN_set_current(d->z, strlen(txt), txt);
+       (d->stem)(d->z);
+       if ( d->z->p && d->z->l ) {
+           txt=repalloc(txt, d->z->l+1);
+           memcpy( txt, d->z->p, d->z->l);
+           txt[d->z->l]='\0';
+       }   
+       res[0]=txt;
+   }
+   res[1]=NULL;
+
+
+   PG_RETURN_POINTER(res);
+}
+
diff --git a/contrib/tsearch2/dict_syn.c b/contrib/tsearch2/dict_syn.c

new file mode 100644 (file)

index 0000000..7f5b5e0
--- /dev/null
+++ b/contrib/tsearch2/dict_syn.c
@@ -0,0 +1,157 @@
+/* 
+ * ISpell interface
+ * Teodor Sigaev 
+ */
+#include 
+#include 
+#include 
+#include 
+
+#include "postgres.h"
+
+#include "dict.h"
+#include "common.h"
+
+#define SYNBUFLEN  4096
+typedef struct {
+   char    *in;
+   char    *out;
+} Syn;
+
+typedef struct {
+   int len;
+   Syn *syn;   
+} DictSyn;
+
+PG_FUNCTION_INFO_V1(syn_init);
+Datum syn_init(PG_FUNCTION_ARGS);
+PG_FUNCTION_INFO_V1(syn_lexize);
+Datum syn_lexize(PG_FUNCTION_ARGS);
+
+static char *
+findwrd(char *in, char **end) {
+   char *start;
+
+   *end=NULL;
+   while(*in && isspace(*in))
+       in++;
+
+   if ( !in )
+       return NULL;
+   start=in;
+
+   while(*in && !isspace(*in))
+       in++;
+
+   *end=in;
+   return start;
+}
+
+static int
+compareSyn(const void *a, const void *b) {
+   return strcmp( ((Syn*)a)->in, ((Syn*)b)->in );
+}
+
+
+Datum 
+syn_init(PG_FUNCTION_ARGS) {
+   text       *in;
+   DictSyn     *d;
+   int cur=0;
+   FILE    *fin;
+   char *filename;
+   char buf[SYNBUFLEN];
+   char *starti,*starto,*end=NULL;
+   int slen;
+
+   if ( PG_ARGISNULL(0) || PG_GETARG_POINTER(0)==NULL )
+       elog(ERROR,"NULL config");
+
+   in = PG_GETARG_TEXT_P(0);
+   if ( VARSIZE(in) - VARHDRSZ == 0 )
+       elog(ERROR,"VOID config");
+
+   filename=text2char(in);
+   PG_FREE_IF_COPY(in, 0);
+   if ( (fin=fopen(filename,"r")) == NULL )
+       elog(ERROR,"Can't open file '%s': %s", filename, strerror(errno));
+
+   d = (DictSyn*)malloc( sizeof(DictSyn) );
+   if ( !d ) {
+       fclose(fin);
+       elog(ERROR, "No memory");
+   }
+   memset(d,0,sizeof(DictSyn));
+
+   while( fgets(buf,SYNBUFLEN,fin) ) {
+       slen = strlen(buf)-1;
+       buf[slen] = '\0';
+       if ( *buf=='\0' ) continue;
+       if (cur==d->len) {
+           d->len = (d->len) ? 2*d->len : 16;
+           d->syn=(Syn*)realloc( d->syn, sizeof(Syn)*d->len );
+           if ( !d->syn ) {
+               fclose(fin);
+               elog(ERROR, "No memory");
+           }
+       }
+
+       starti=findwrd(buf,&end);
+       if ( !starti )
+           continue;
+       *end='\0';
+       if ( end >= buf+slen )
+           continue;
+
+       starto= findwrd(end+1, &end);
+       if ( !starto )
+           continue;
+       *end='\0';
+
+       d->syn[cur].in=strdup(lowerstr(starti));
+       d->syn[cur].out=strdup(lowerstr(starto));
+       if ( !(d->syn[cur].in && d->syn[cur].out) ) {
+           fclose(fin);
+           elog(ERROR, "No memory");
+       }
+
+       cur++; 
+   }
+   
+   fclose(fin);    
+   
+   d->len=cur; 
+   if ( cur>1 )
+       qsort(d->syn, d->len, sizeof(Syn), compareSyn); 
+
+   pfree(filename);
+        PG_RETURN_POINTER(d);
+}
+
+Datum
+syn_lexize(PG_FUNCTION_ARGS) {
+   DictSyn *d = (DictSyn*)PG_GETARG_POINTER(0);
+   char       *in = (char*)PG_GETARG_POINTER(1);
+   Syn key,*found;
+        char    **res=NULL;
+
+   if ( !PG_GETARG_INT32(2) )
+       PG_RETURN_POINTER(NULL);
+
+   key.out=NULL;
+   key.in=lowerstr(pnstrdup(in, PG_GETARG_INT32(2)));
+
+   found=(Syn*)bsearch(&key, d->syn, d->len, sizeof(Syn), compareSyn);
+   pfree(key.in);
+
+   if ( !found ) 
+       PG_RETURN_POINTER(NULL);
+
+   res=palloc(sizeof(char*)*2);
+
+   res[0]=pstrdup(found->out);
+   res[1]=NULL;
+
+        PG_RETURN_POINTER(res);
+}
+
diff --git a/contrib/tsearch2/docs/tsearch-V2-intro.html b/contrib/tsearch2/docs/tsearch-V2-intro.html

new file mode 100644 (file)

index 0000000..8375d4c
--- /dev/null
+++ b/contrib/tsearch2/docs/tsearch-V2-intro.html
@@ -0,0 +1,975 @@
+
+
+
+
+  tsearch-v2-intro
+
+
+
+
+  
+    Tsearch2 - Introduction
+
+    
+    "http://www.sai.msu.su/~megera/postgres/gist/tsearch/V2/docs/tsearch-V2-intro.html">
+    [Online version] of this document is available.
+
+    The tsearch2 module is available to add as an extension to
+    the PostgreSQL database to allow for Full Text Indexing. This
+    document is an introduction to installing, configuring, using
+    and maintaining the database with the tsearch2 module
+    activated.
+
+    Please, note, tsearch2 module is fully incompatible with old
+    tsearch, which is deprecated in 7.4 and will be obsoleted in
+    7.5.
+
+    USING TSEARCH2 AND POSTGRESQL FOR A WEB BASED SEARCH
+    ENGINE
+
+    This documentation is provided as a short guide on how to
+    quickly get up and running with tsearch2 and PostgreSQL, for
+    those who want to implement a full text indexed based search
+    engine. It is not meant to be a complete in-depth guide into
+    the full ins and outs of the contrib/tsearch2 module, and is
+    primarily aimed at beginners who want to speed up searching of
+    large text fields, or those migrating from other database
+    systems such as MS-SQL.
+
+    The README.tsearch2 file included in the contrib/tsearch2
+    directory contains a brief overview and history behind tsearch.
+    This can also be found online 
+    "http://www.sai.msu.su/~megera/postgres/gist/tsearch/V2/">[right
+    here].
+
+    Further in depth documentation such as a full function
+    reference, and user guide can be found online at the 
+    "http://www.sai.msu.su/~megera/postgres/gist/tsearch/V2/docs/">[tsearch
+    documentation home].
+
+    ACKNOWLEDGEMENTS
+
+    Robert John Shepherd originally wrote this documentation for
+    the previous version of tsearch module (v1) included with the
+    postgres release. I took his documentation and updated it to
+    comply with the tsearch2 modifications.
+
+    Robert's original acknowledgements:
+
+    "Thanks to Oleg Bartunov for taking the time to answer many
+    of my questions regarding this module, and also to Teodor
+    Sigaev for clearing up the process of making your own
+    dictionaries. Plus of course a big thanks to the pair of them
+    for writing this module in the first place!"
+
+    I would also like to extend my thanks to the developers, and
+    Oleg Bartunov for all of his direction and help with the new
+    features of tsearch2.
+
+    OVERVIEW
+
+    MS-SQL provides a full text indexing (FTI) system which
+    enables the fast searching of text based fields, very useful
+    for websites (and other applications) that require a results
+    set based on key words. PostgreSQL ships with a contributed
+    module called tsearch2, which implements a special type of
+    index that can also be used for full text indexing. Further
+    more, unlike MS' offering which requires regular incremental
+    rebuilds of the text indexes themselves, tsearch2 indexes are
+    always up-to-date and keeping them so induces very little
+    overhead.
+
+    Before we get into the details, it is recommended that you
+    have installed and tested PostgreSQL, are reasonably familiar
+    with databases, the SQL query language and also understand the
+    basics of connecting to PostgreSQL from the local shell. This
+    document isn't intended for the complete PostgreSQL newbie, but
+    anyone with a reasonable grasp of the basics should be able to
+    follow it.
+
+    INSTALLATION
+
+    Starting with PostgreSQL version 7.4 tsearch2 is now
+    included in the contrib directory with the PostgreSQL sources.
+    contrib/tsearch2 is where you will find everything needed to
+    install and use tsearch2. Please note that tsearch2 will also
+    work with PostgreSQL version 7.3.x, but it is not the module
+    included with the source distribution. You will have to
+    download the module separately and install it in the same
+    fashion.
+
+    I installed the tsearch2 module to a PostgreSQL 7.3 database
+    from the contrib directory without squashing the original (old)
+    tsearch module. What I did was move the modules tsearch src
+    driectory into the contrib tree under the name tsearchV2.
+
+    Step one is to download the tsearch V2 module :
+
+    
+    "http://www.sai.msu.su/~megera/postgres/gist/tsearch/V2/">[http://www.sai.msu.su/~megera/postgres/gist/tsearch/V2/]
+    (check Development History for latest stable version !)
+    
+        tar -zxvf tsearch-v2.tar.gz
+        mv tsearch2 PGSQL_SRC/contrib/
+        cd PGSQL_SRC/contrib/tsearch2
+
+
+    If you are installing from PostgreSQL version 7.4 or higher,
+    you can skip those steps and just change to the
+    contrib/tsearch2 directory in the source tree and continue from
+    there.
+
+    Then continue with the regular building and installation
+    process
+    
+        gmake
+        gmake install
+        gmake installcheck
+
+
+    That is pretty much all you have to do, unless of course you
+    get errors. However if you get those, you better go check with
+    the mailing lists over at 
+    "http://www.postgresql.org">http://www.postgresql.org or
+    
+    "http://openfts.sourceforge.net/">http://openfts.sourceforge.net/
+    since its never failed for me.
+
+    The directory in the contib/ and the directory from the
+    archive is called tsearch2. Tsearch2 is completely incompatible
+    with the previous version of tsearch. This means that both
+    versions can be installed into a single database, and migration
+    the new version may be much easier.
+
+    NOTE: the previous version of tsearch found in the
+    contrib/tsearch directory is depricated. ALthough it is still
+    available and included within PostgreSQL version 7.4. It will
+    be removed in version 7.5.
+
+    ADDING TSEARCH2 FUNCTIONALITY TO A DATABASE
+
+    We should create a database to use as an example for the
+    remainder of this file. We can call the database "ftstest". You
+    can create it from the command line like this:
+    
+        #createdb ftstest
+
+
+    If you thought installation was easy, this next bit is even
+    easier. Change to the PGSQL_SRC/contrib/tsearch2 directory and
+    type:
+    
+        psql ftstest < tsearch2.sql
+
+
+    The file "tsearch2.sql" holds all the wonderful little
+    goodies you need to do full text indexing. It defines numerous
+    functions and operators, and creates the needed tables in the
+    database. There will be 4 new tables created after running the
+    tsearch2.sql file : pg_ts_dict, pg_ts_parser, pg_ts_cfg,
+    pg_ts_cfgmap are added.
+
+    You can check out the tables if you like:
+    
+        #psql ftstest
+        ftstest=# \d
+                    List of relations
+         Schema |     Name     | Type  |  Owner
+        --------+--------------+-------+----------
+         public | pg_ts_cfg    | table | kopciuch
+         public | pg_ts_cfgmap | table | kopciuch
+         public | pg_ts_dict   | table | kopciuch
+         public | pg_ts_parser | table | kopciuch
+        (4 rows)
+
+
+    TYPES AND FUNCTIONS PROVIDED BY TSEARCH2
+
+    The first thing we can do is try out some of the types that
+    are provided for us. Lets look at the tsvector type provided
+    for us:
+    
+        SELECT 'Our first string used today'::tsvector;
+                        tsvector
+        ---------------------------------------
+         'Our' 'used' 'first' 'today' 'string'
+        (1 row)
+
+
+    The results are the words used within our string. Notice
+    they are not in any particular order. The tsvector type returns
+    a string of space separated words.
+    
+        SELECT 'Our first string used today first string'::tsvector;
+                            tsvector
+        -----------------------------------------------
+         'Our' 'used' 'again' 'first' 'today' 'string'
+        (1 row)
+
+
+    Notice the results string has each unique word ('first' and
+    'string' only appear once in the tsvector value). Which of
+    course makes sense if you are searching the full text ... you
+    only need to know each unique word in the text.
+
+    Those examples were just casting a text field to that of
+    type tsvector. Lets check out one of the new functions created
+    by the tsearch2 module.
+
+    The function to_tsvector has 3 possible signatures:
+    
+        to_tsvector(oid, text);
+        to_tsvector(text, text);
+        to_tsvector(text);
+
+
+    We will use the second method using two text fields. The
+    overloaded methods provide us with a way to specifiy the way
+    the searchable text is broken up into words (Stemming process).
+    Right now we will specify the 'default' configuration. See the
+    section on TSEARCH2 CONFIGURATION to learn more about this.
+    
+        SELECT to_tsvector('default',
+                           'Our first string used today first string');
+                        to_tsvector
+        --------------------------------------------
+         'use':4 'first':2,6 'today':5 'string':3,7
+        (1 row)
+
+
+    The result returned from this function is of type tsvector.
+    The results came about by this reasoning: All of the words in
+    the text passed in are stemmed, or not used because they are
+    stop words defined in our configuration. Each lower case
+    morphed word is returned with all of the positons in the
+    text.
+
+    In this case the word "Our" is a stop word in the default
+    configuration. That means it will not be included in the
+    result. The word "first" is found at positions 2 and 6
+    (although "Our" is a stop word, it's position is maintained).
+    The word(s) positioning is maintained exactly as in the
+    original string. The word "used" is morphed to the word "use"
+    based on the default configuration for word stemming, and is
+    found at position 4. The rest of the results follow the same
+    logic. Just a reminder again ... the order of the 'word'
+    position in the output is not in any kind of order. (ie 'use':4
+    appears first)
+
+    If you want to view the output of the tsvector fields
+    without their positions, you can do so with the function
+    "strip(tsvector)".
+    
+        SELECT strip(to_tsvector('default',
+                     'Our first string used today first string'));
+                    strip
+        --------------------------------
+         'use' 'first' 'today' 'string'
+
+
+    If you wish to know the number of unique words returned in
+    the tsvector you can do so by using the function
+    "length(tsvector)"
+    
+        SELECT length(to_tsvector('default',
+                      'Our first string used today first string'));
+         length
+        --------
+              4
+        (1 row)
+
+
+    Lets take a look at the function to_tsquery. It also has 3
+    signatures which follow the same rational as the to_tsvector
+    function:
+    
+        to_tsquery(oid, text);
+        to_tsquery(text, text);
+        to_tsquery(text);
+
+
+    Lets try using the function with a single word :
+    
+        SELECT to_tsquery('default', 'word');
+         to_tsquery
+        -----------
+         'word'
+         (1 row)
+
+
+    I call the function the same way I would a to_tsvector
+    function, specifying the 'default' configuration for morphing,
+    and the result is the stemmed output 'word'.
+
+    Lets attempt to use the function with a string of multiple
+    words:
+    
+        SELECT to_tsquery('default', 'this is many words');
+        ERROR:  Syntax error
+
+
+    The function can not accept a space separated string. The
+    intention of the to_tsquery function is to return a type of
+    "tsquery" used for searching a tsvector field. What we need to
+    do is search for one to many words with some kind of logic (for
+    now simple boolean).
+    
+        SELECT to_tsquery('default', 'searching|sentence');
+              to_tsquery
+        ----------------------
+         'search' | 'sentenc'
+        (1 row)
+
+
+    Notice that the words are separated by the boolean logic
+    "OR", the text could contain boolean operators &,|,!,()
+    with their usual meaning.
+
+    You can not use words defined as being a stop word in your
+    configuration. The function will not fail ... you will just get
+    no result, and a NOTICE like this:
+    
+        SELECT to_tsquery('default', 'a|is&not|!the');
+        NOTICE:  Query contains only stopword(s)
+                 or doesn't contain lexem(s), ignored
+         to_tsquery
+        -----------
+        (1 row)
+
+
+    That is a beginning to using the types, and functions
+    defined in the tsearch2 module. There are numerous more
+    functions that I have not touched on. You can read through the
+    tsearch2.sql file built when compiling to get more familiar
+    with what is included.
+
+    INDEXING FIELDS IN A TABLE
+
+    The next stage is to add a full text index to an existing
+    table. In this example we already have a table defined as
+    follows:
+    
+        CREATE TABLE tblMessages
+        (
+                intIndex        int4,
+                strTopic        varchar(100),
+                strMessage      text
+        );
+
+
+    We are assuming there are several rows with some kind of
+    data in them. Any data will do, just do several inserts with
+    test strings for a topic, and a message. here is some test data
+    I inserted. (yes I know it's completely useless stuff ;-) but
+    it will serve our purpose right now).
+    
+        INSERT INTO tblMessages
+               VALUES ('1', 'Testing Topic', 'Testing message data input');
+        INSERT INTO tblMessages
+               VALUES ('2', 'Movie', 'Breakfast at Tiffany\'s');
+        INSERT INTO tblMessages
+               VALUES ('3', 'Famous Author', 'Stephen King');
+        INSERT INTO tblMessages
+               VALUES ('4', 'Political Topic',
+                            'Nelson Mandella is released from prison');
+        INSERT INTO tblMessages
+               VALUES ('5', 'Nursery rhyme phrase',
+                            'Little jack horner sat in a corner');
+        INSERT INTO tblMessages
+               VALUES ('6', 'Gettysburg address quotation',
+                            'Four score and seven years ago'
+                            ' our fathers brought forth on this'
+                            ' continent a new nation, conceived in'
+                            ' liberty and dedicated to the proposition'
+                            ' that all men are created equal');
+        INSERT INTO tblMessages
+               VALUES ('7', 'Classic Rock Bands',
+                            'Led Zeppelin Grateful Dead and The Sex Pistols');
+        INSERT INTO tblMessages
+               VALUES ('8', 'My birth address',
+                            '18 Sommervile road, Regina, Saskatchewan');
+        INSERT INTO tblMessages
+               VALUES ('9', 'Joke', 'knock knock : who\'s there?'
+                                    ' I will not finish this joke');
+        INSERT INTO tblMessages
+               VALUES ('10', 'Computer information',
+                             'My computer is a pentium III 400 mHz'
+                             ' with 192 megabytes of RAM');
+
+
+    The next stage is to create a special text index which we
+    will use for FTI, so we can search our table of messages for
+    words or a phrase. We do this using the SQL command:
+    
+        ALTER TABLE tblMessages ADD idxFTI tsvector;
+
+
+    Note that unlike traditional indexes, this is actually a new
+    field in the same table, which is then used (through the magic
+    of the tsearch2 operators and functions) by a special index we
+    will create in a moment.
+
+    The general rule for the initial insertion of data will
+    follow four steps:
+    
+    1. update table
+    2. vacuum full analyze
+    3. create index
+    4. vacuum full analyze
+
+
+    The data can be updated into the table, the vacuum full
+    analyze will reclaim unused space. The index can be created on
+    the table after the data has been inserted. Having the index
+    created prior to the update will slow down the process. It can
+    be done in that manner, this way is just more efficient. After
+    the index has been created on the table, vacuum full analyze is
+    run again to update postgres's statistics (ie having the index
+    take effect).
+    
+        UPDATE tblMessages SET idxFTI=to_tsvector('default', strMessage);
+        VACUUM FULL ANALYZE;
+
+
+    Note that this only inserts the field strMessage as a
+    tsvector, so if you want to also add strTopic to the
+    information stored, you should instead do the following, which
+    effectively concatenates the two fields into one before being
+    inserted into the table:
+    
+        UPDATE tblMessages
+            SET idxFTI=to_tsvector('default',coalesce(strTopic,'') ||' '|| coalesce(strMessage,''));
+        VACUUM FULL ANALYZE;
+
+
+    Using the coalesce function makes sure this

+    concatenation also works with NULL fields.
+
+    We need to create the index on the column idxFTI. Keep in
+    mind that the database will update the index when some action
+    is taken. In this case we _need_ the index (The whole point of
+    Full Text INDEXINGi ;-)), so don't worry about any indexing
+    overhead. We will create an index based on the gist function.
+    GiST is an index structure for Generalized Search Tree.
+    
+        CREATE INDEX idxFTI_idx ON tblMessages USING gist(idxFTI);
+        VACUUM FULL ANALYZE;
+
+
+    After you have converted all of your data and indexed the
+    column, you can select some rows to see what actually happened.
+    I will not display output here but you can play around
+    yourselves and see what happened.
+
+    The last thing to do is set up a trigger so every time a row
+    in this table is changed, the text index is automatically
+    updated. This is easily done using:
+    
+        CREATE TRIGGER tsvectorupdate BEFORE UPDATE OR INSERT ON tblMessages
+            FOR EACH ROW EXECUTE PROCEDURE tsearch2(idxFTI, strMessage);
+
+
+    Or if you are indexing both strMessage and strTopic you
+    should instead do:
+    
+        CREATE TRIGGER tsvectorupdate BEFORE UPDATE OR INSERT ON tblMessages
+            FOR EACH ROW EXECUTE PROCEDURE
+                tsearch2(idxFTI, strTopic, strMessage);
+
+
+    Before you ask, the tsearch2 function accepts multiple
+    fields as arguments so there is no need to concatenate the two
+    into one like we did before.
+
+    If you want to do something specific with columns, you may
+    write your very own trigger function using plpgsql or other
+    procedural languages (but not SQL, unfortunately) and use it
+    instead of tsearch2 trigger.
+
+    You could however call other stored procedures from within
+    the tsearch2 function. Lets say we want to create a function to
+    remove certain characters (like the @ symbol from all
+    text).
+    
+       CREATE FUNCTION dropatsymbol(text) 
+                     RETURNS text AS 'select replace($1, \'@\', \' \');' LANGUAGE SQL;
+
+
+    Now we can use this function within the tsearch2 function on
+    the trigger.
+    
+      DROP TRIGGER tsvectorupdate ON tblmessages;
+        CREATE TRIGGER tsvectorupdate BEFORE UPDATE OR INSERT ON tblMessages
+            FOR EACH ROW EXECUTE PROCEDURE tsearch2(idxFTI, dropatsymbol, strMessage);
+        INSERT INTO tblmessages VALUES (69, 'Attempt for dropatsymbol', '[email protected]');
+
+
+    If at this point you receive an error stating: ERROR: Can't
+    find tsearch config by locale
+
+    Do not worry. You have done nothing wrong. And tsearch2 is
+    not broken. All that has happened here is that the
+    configuration is setup to use a configuration based on the
+    locale of the server. All you have to do is change your default
+    configuration, or add a new one for your specific locale. See
+    the section on TSEARCH2 CONFIGURATION.
+    
+   SELECT * FROM tblmessages WHERE intindex = 69;
+
+         intindex |         strtopic         |  strmessage   |        idxfti
+        ----------+--------------------------+---------------+-----------------------   
+                69 | Attempt for dropatsymbol | [email protected] | 'test':1 'test.com':2
+        (1 row)
+Notice that the string content was passed throught the stored
+procedure dropatsymbol. The '@' character was replaced with a
+single space ... and the output from the procedure was then stored
+in the tsvector column.
+
+    This could be useful for removing other characters from
+    indexed text, or any kind of preprocessing needed to be done on
+    the text prior to insertion into the index.
+
+    QUERYING A TABLE
+
+    There are some examples in the README.tsearch2 file for
+    querying a table. One major difference between tsearch and
+    tsearch2 is the operator ## is no longer available. Only the
+    operator @@ is defined, using the types tsvector on one side
+    and tsquery on the other side.
+
+    Lets search the indexed data for the word "Test". I indexed
+    based on the the concatenation of the strTopic, and the
+    strMessage:
+    
+        SELECT intindex, strtopic FROM tblmessages
+                                  WHERE idxfti @@ 'test'::tsquery;
+         intindex |   strtopic
+        ----------+---------------
+                1 | Testing Topic
+        (1 row)
+
+
+    The only result that matched was the row with a topic
+    "Testing Topic". Notice that the word I search for was all
+    lowercase. Let's see what happens when I query for uppercase
+    "Test".
+    
+        SELECT intindex, strtopic FROM tblmessages
+                                  WHERE idxfti @@ 'Test'::tsquery;
+         intindex | strtopic
+        ----------+----------
+        (0 rows)
+
+
+    We get zero rows returned. The reason is because when the
+    text was inserted, it was morphed to my default configuration
+    (because of the call to to_tsvector in the UPDATE statement).
+    If there was no morphing done, and the tsvector field(s)
+    contained the word 'Text', a match would have been found.
+
+    Most likely the best way to query the field is to use the
+    to_tsquery function on the right hand side of the @@ operator
+    like this:
+    
+        SELECT intindex, strtopic FROM tblmessages
+               WHERE idxfti @@ to_tsquery('default', 'Test | Zeppelin');
+         intindex |      strtopic
+        ----------+--------------------
+                1 | Testing Topic
+                7 | Classic Rock Bands
+        (2 rows)
+
+
+    That query searched for all instances of "Test" OR
+    "Zeppelin". It returned two rows: the "Testing Topic" row, and
+    the "Classic Rock Bands" row. The to_tsquery function performed
+    the correct morphology upon the parameters, and searched the
+    tsvector field appropriately.
+
+    The last example here relates to searching for a phrase, for
+    example "minority report". This poses a problem with regard to
+    tsearch2, as it doesn't index phrases, only words. But there is
+    a way around which doesn't appear to have a significant impact
+    on query time, and that is to use a query such as the
+    following:
+    
+        SELECT intindex, strTopic FROM tblmessages
+                WHERE idxfti @@ to_tsquery('default', 'gettysburg & address')
+                AND strMessage ~* '.*men are created equal.*';
+         intindex |           strtopic
+        ----------+------------------------------
+                6 | Gettysburg address quotation
+        (1 row)
+        SELECT intindex, strTopic FROM tblmessages
+                WHERE idxfti @@ to_tsquery('default', 'gettysburg & address')
+                AND strMessage ~* '.*something that does not exist.*';
+         intindex | strtopic
+        ----------+----------
+        (0 rows)
+
+
+    Of course if your indexing both strTopic and strMessage, and
+    want to search for this phrase on both, then you will have to
+    get out the brackets and extend this query a little more.
+
+    TSEARCH2 CONFIGURATION
+
+    Some words such as "and", "the", and "who" are automatically
+    not indexed, since they belong to a pre-existing dictionary of
+    "Stop Words" which tsearch2 does not perform indexing on. If
+    someone needs to search for "The Who" in your database, they
+    are going to have a tough time coming up with any results,
+    since both are ignored in the indexes. But there is a
+    solution.
+
+    Lets say we want to add a word into the stop word list for
+    english stemming. We could edit the file
+    :'/usr/local/pgsql/share/english.stop' and add a word to the
+    list. I edited mine to exclude my name from indexing:
+    
+    - Edit /usr/local/pgsql/share/english.stop
+    - Add 'andy' to the list
+    - Save the file.
+
+
+    When you connect to the database, the dict_init procedure is
+    run during initialization. And in my configuration it will read
+    the stop words from the file I just edited. If you were
+    connected to the DB while editing the stop words, you will need
+    to end the current session and re-connect. When you re-connect
+    to the database, 'andy' is no longer indexed:
+    
+        SELECT to_tsvector('default', 'Andy');
+         to_tsvector
+        ------------
+        (1 row)
+
+
+    Originally I would get the result :
+    
+        SELECT to_tsvector('default', 'Andy');
+         to_tsvector
+        ------------
+         'andi':1
+        (1 row)
+
+
+    But since I added it as a stop word, it would be ingnored on
+    the indexing. The stop word added was used in the dictionary
+    "en_stem". If I were to use a different configuration such as
+    'simple', the results would be different. There are no stop
+    words for the simple dictionary. It will just convert to lower
+    case, and index every unique word.
+    
+        SELECT to_tsvector('simple', 'Andy andy The the in out');
+                     to_tsvector
+        -------------------------------------
+         'in':5 'out':6 'the':3,4 'andy':1,2
+        (1 row)
+
+
+    All this talk about which configuration to use is leading us
+    into the actual configuration of tsearch2. In the examples in
+    this document the configuration has always been specified when
+    using the tsearch2 functions:
+    
+        SELECT to_tsvector('default', 'Testing the default config');
+        SELECT to_tsvector('simple', 'Example of simple Config');
+
+
+    The pg_ts_cfg table holds each configuration you can use
+    with the tsearch2 functions. As you can see the ts_name column
+    contains both the 'default' configurations based on the 'C'
+    locale. And the 'simple' configuration which is not based on
+    any locale.
+    
+        SELECT * from pg_ts_cfg;
+             ts_name     | prs_name |    locale
+        -----------------+----------+--------------
+         default         | default  | C
+         default_russian | default  | ru_RU.KOI8-R
+         simple          | default  |
+        (3 rows)
+
+
+    Each row in the pg_ts_cfg table contains the name of the
+    tsearch2 configuration, the name of the parser to use, and the
+    locale mapped to the configuration. There is only one parser to
+    choose from the table pg_ts_parser called 'default'. More
+    parsers could be written, but for our needs we will use the
+    default.
+
+    There are 3 configurations installed by tsearch2 initially.
+    If your locale is set to 'en_US' for example (like my laptop),
+    then as you can see there is currently no dictionary configured
+    to use with that locale. You can either set up a new
+    configuration or just use one that already exists. If I do not
+    specify which configuration to use in the to_tsvector function,
+    I receive the following error.
+    
+        SELECT to_tsvector('learning tsearch is like going to school');
+        ERROR:  Can't find tsearch config by locale
+
+
+    We will create a new configuration for use with the server
+    encoding 'en_US'. The first step is to add a new configuration
+    into the pg_ts_cfg table. We will call the configuration
+    'default_english', with the default parser and use the locale
+    'en_US'.
+    
+        INSERT INTO pg_ts_cfg (ts_name, prs_name, locale)
+               VALUES ('default_english', 'default', 'en_US');
+
+
+    We have only declared that there is a configuration called
+    'default_english'. We need to set the configuration of how
+    'default_english' will work. The next step is creating a new
+    dictionary to use. The configuration of the dictionary is
+    completlely different in tsearch2. In the prior versions to
+    make changes, you would have to re-compile your changes into
+    the tsearch.so. All of the configuration has now been moved
+    into the system tables created by executing the SQL code from
+    tsearch2.sql
+
+    Lets take a first look at the pg_ts_dict table
+    
+        ftstest=# \d pg_ts_dict
+                Table "public.pg_ts_dict"
+         Column      |  Type   | Modifiers
+        -----------------+---------+-----------
+         dict_name       | text    | not null
+         dict_init       | oid     |
+         dict_initoption | text    |
+         dict_lemmatize  | oid     | not null
+         dict_comment    | text    |
+        Indexes: pg_ts_dict_idx unique btree (dict_name)
+
+
+    The dict_name column is the name of the dictionary, for
+    example 'simple', 'en_stem' or 'ru_stem'. The dict_init column
+    is an OID of a stored procedure to run for initialization of
+    that dictionary, for example 'snb_en_init' or 'snb_ru_init'.
+    The dict_init option is used for options passed to the init
+    function for the stored procedure. In the cases of 'en_stem' or
+    'ru_stem' it is a path to a stopword file for that dictionary,
+    for example '/usr/local/pgsql/share/english.stop'. This is
+    however dictated by the dictionary. ISpell dictionaries may
+    require different options. The dict_lemmatize column is another
+    OID of a stored procedure to the function used to lemmitize,
+    for example 'snb_lemmatize'. The dict_comment column is just a
+    comment.
+
+    Next we will configure the use of a new dictionary based on
+    ISpell. We will assume you have ISpell installed on you
+    machine. (in /usr/local/lib)
+
+    First lets register the dictionary(ies) to use from ISpell.
+    We will use the english dictionary from ISpell. We insert the
+    paths to the relevant ISpell dictionary (*.hash) and affixes
+    (*.aff) files. There seems to be some question as to which
+    ISpell files are to be used. I installed ISpell from the latest
+    sources on my computer. The installation installed the
+    dictionary files with an extension of *.hash. Some
+    installations install with an extension of *.dict As far as I
+    know the two extensions are equivilant. So *.hash ==
+    *.dict.
+
+    We will also continue to use the english word stop file that
+    was installed for the en_stem dictionary. You could use a
+    different one if you like. The ISpell configuration is based on
+    the "ispell_template" dictionary installed by default with
+    tsearch2. We will use the OIDs to the stored procedures from
+    the row where the dict_name = 'ispell_template'.
+    
+        INSERT INTO pg_ts_dict
+               (SELECT 'en_ispell',
+                       dict_init,
+                       'DictFile="/usr/local/lib/english.hash",'
+                       'AffFile="/usr/local/lib/english.aff",'
+                       'StopFile="/usr/local/pgsql/share/english.stop"',
+                       dict_lexize
+                FROM pg_ts_dict
+                WHERE dict_name = 'ispell_template');
+
+
+    Next we need to set up the configuration for mapping the
+    dictionay use to the lexxem parsings. This will be done by
+    altering the pg_ts_cfgmap table. We will insert several rows,
+    specifying to using the new dictionary we installed and
+    configured for use within tsearch2. There are several type of
+    lexims we would be concerned with forcing the use of the ISpell
+    dictionary.
+    
+        INSERT INTO pg_ts_cfgmap (ts_name, tok_alias, dict_name)
+               VALUES ('default_english', 'lhword', '{en_ispell,en_stem}');
+        INSERT INTO pg_ts_cfgmap (ts_name, tok_alias, dict_name)
+               VALUES ('default_english', 'lpart_hword', '{en_ispell,en_stem}');
+        INSERT INTO pg_ts_cfgmap (ts_name, tok_alias, dict_name)
+               VALUES ('default_english', 'lword', '{en_ispell,en_stem}');
+
+
+    We have just inserted 3 records to the configuration
+    mapping, specifying that the lexem types for "lhword,
+    lpart_hword and lword" are to be stemmed using the 'en_ispell'
+    dictionary we added into pg_ts_dict, when using the
+    configuration ' default_english' which we added to
+    pg_ts_cfg.
+
+    There are several other lexem types used that we do not need
+    to specify as using the ISpell dictionary. We can simply insert
+    values using the 'simple' stemming process dictionary.
+    
+        INSERT INTO pg_ts_cfgmap
+               VALUES ('default_english', 'url', '{simple}');
+        INSERT INTO pg_ts_cfgmap
+               VALUES ('default_english', 'host', '{simple}');
+        INSERT INTO pg_ts_cfgmap
+               VALUES ('default_english', 'sfloat', '{simple}');
+        INSERT INTO pg_ts_cfgmap
+               VALUES ('default_english', 'uri', '{simple}');
+        INSERT INTO pg_ts_cfgmap
+               VALUES ('default_english', 'int', '{simple}');
+        INSERT INTO pg_ts_cfgmap
+               VALUES ('default_english', 'float', '{simple}');
+        INSERT INTO pg_ts_cfgmap
+               VALUES ('default_english', 'email', '{simple}');
+        INSERT INTO pg_ts_cfgmap
+               VALUES ('default_english', 'word', '{simple}');
+        INSERT INTO pg_ts_cfgmap
+               VALUES ('default_english', 'hword', '{simple}');
+        INSERT INTO pg_ts_cfgmap
+               VALUES ('default_english', 'nlword', '{simple}');
+        INSERT INTO pg_ts_cfgmap
+               VALUES ('default_english', 'nlpart_hword', '{simple}');
+        INSERT INTO pg_ts_cfgmap
+               VALUES ('default_english', 'part_hword', '{simple}');
+        INSERT INTO pg_ts_cfgmap
+               VALUES ('default_english', 'nlhword', '{simple}');
+        INSERT INTO pg_ts_cfgmap
+               VALUES ('default_english', 'file', '{simple}');
+        INSERT INTO pg_ts_cfgmap
+               VALUES ('default_english', 'uint', '{simple}');
+        INSERT INTO pg_ts_cfgmap
+               VALUES ('default_english', 'version', '{simple}');
+
+
+    Our addition of a configuration for 'default_english' is now
+    complete. We have successfully created a new tsearch2
+    configuration. At the same time we have also set the new
+    configuration to be our default for en_US locale.
+    
+        SELECT to_tsvector('default_english',
+                           'learning tsearch is like going to school');
+                           to_tsvector
+        --------------------------------------------------
+         'go':5 'like':4 'learn':1 'school':7 'tsearch':2
+        SELECT to_tsvector('learning tsearch is like going to school');
+                            to_tsvector
+        --------------------------------------------------
+         'go':5 'like':4 'learn':1 'school':7 'tsearch':2
+        (1 row)
+
+
+    In the case that you already have a configuration set for
+    the locale, and you are changing it to your new dictionary
+    configuration. You will have to set the old locale to NULL. If
+    we are using the 'C' locale then we would do this:
+    
+        UPDATE pg_ts_cfg SET locale=NULL WHERE locale = 'C';
+
+
+    That about wraps up the configuration of tsearch2. There is
+    much more you can do with the tables provided. This was just an
+    introduction to get things working rather quickly.
+
+    ADDING NEW DICTIONARIES TO TSEARCH2
+
+    To aid in the addition of new dictionaries to the tsearch2
+    module you can use another additional module in combination
+    with tsearch2. The gendict module is included into tsearch2
+    distribution and is available from gendict/ subdirectory.
+
+    I will not go into detail about installation and
+    instructions on how to use gendict to it's fullest extent right
+    now. You can read the README.gendict ... it has all of the
+    instructions and information you will need.
+
+    BACKING UP AND RESTORING DATABASES THAT FEATURE
+    TSEARCH2
+
+    Believe it or not, this isn't as straight forward as it
+    should be, and you will have problems trying to backup and
+    restore any database which uses tsearch2 unless you take the
+    steps shown below. And before you ask using pg_dumpall will
+    result in failure every time. These took a lot of trial and
+    error to get working, but the process as laid down below has
+    been used a dozen times now in live production environments so
+    it should work fine.
+
+    HOWEVER never rely on anyone elses instructions to backup
+    and restore a database system, always develop and understand
+    your own methodology, and test it numerous times before you
+    need to do it for real.
+
+    To Backup a PostgreSQL database that uses the tsearch2
+    module:
+
+    1) Backup any global database objects such as users and
+    groups (this step is usually only necessary when you will be
+    restoring to a virgin system)
+    
+        pg_dumpall -g > GLOBALobjects.sql
+
+
+    2) Backup the full database schema using pg_dump
+    
+        pg_dump -s DATABASE > DATABASEschema.sql
+
+
+    3) Backup the full database using pg_dump
+    
+        pg_dump -Fc DATABASE > DATABASEdata.tar
+
+
+    To Restore a PostgreSQL database that uses the tsearch2
+    module:
+
+    1) Create the blank database
+    
+        createdb DATABASE
+
+
+    2) Restore any global database objects such as users and
+    groups (this step is usually only necessary when you will be
+    restoring to a virgin system)
+    
+        psql DATABASE < GLOBALobjects.sql
+
+
+    3) Create the tsearch2 objects, functions and operators
+    
+        psql DATABASE < tsearch2.sql
+
+
+    4) Edit the backed up database schema and delete all SQL
+    commands which create tsearch2 related functions, operators and
+    data types, BUT NOT fields in table definitions that specify
+    tsvector types. If your not sure what these are, they are the
+    ones listed in tsearch2.sql. Then restore the edited schema to
+    the database
+    
+        psql DATABASE < DATABASEschema.sql
+
+
+    5) Restore the data for the database
+    
+        pg_restore -N -a -d DATABASE DATABASEdata.tar
+
+
+    If you get any errors in step 4, it will most likely be
+    because you forgot to remove an object that was created in
+    tsearch2.sql. Any errors in step 5 will mean the database
+    schema was probably restored wrongly.
+  
+
+


diff --git a/contrib/tsearch2/docs/tsearch2-guide.html b/contrib/tsearch2/docs/tsearch2-guide.html

new file mode 100644 (file)

index 0000000..2529480


--- /dev/null
+++ b/contrib/tsearch2/docs/tsearch2-guide.html
@@ -0,0 +1,1057 @@
+
+
+
+
+tsearch2 guide
+
+
+The tsearch2 Guide
+
+
+Brandon Craig Rhodes
30 June 2003
+
+This Guide introduces the reader to the PostgreSQL tsearch2 module,
+version 2.
+More formal descriptions of the module's types and functions
+are provided in the tsearch2 Reference,
+which is a companion to this document.
+You can retrieve a beta copy of the tsearch2 module from the
+GiST for PostgreSQL
+page — look under the section entitled Development History
+for the current version.
+
+First we will examine the tsvector and tsquery types
+and how they are used to search documents;
+next, we will use them to build a simple search engine in SQL;
+and finally, we will study the internals of document conversion
+and how you might tune the internals to accommodate various searching needs.
+
+Once you have tsearch2 working with PostgreSQL,
+you should be able to run the examples here exactly as they are typed.
+
+
+Table of Contents
+
+Vectors and Queries

+A Simple Search Engine

+Ranking and Position Weights

+Casting Vectors and Queries

+Parsing and Lexing

+
+
+
+
+Vectors and Queries
+
+
+This section introduces

+the two data types upon which tsearch2 search engines are based,
+and illustrates their interaction using the simplest possible case.
+The complex examples we present later on
+are merely variations and elaborations of this basic mechanism.
+
+
+The tsearch2 module allows you to index documents by the words they contain,
+and then perform very efficient searches
+for documents that contain a given combination of words.
+Preparing your document index involves two steps:
+
+Making a list of the words each document contains.
+ You must reduce each document to a tsvector
+ which lists each word that appears in the document.
+ This process offers many options,
+ because there is no requirement
+ that you must copy words into the vector
+ exactly as they appear in the document.
+ For example,
+ many developers omit frequent and content-free stop words
+ like the to reduce the size of their index;
+ others reduce different forms of the same word
+ (forked, forking, forks)
+ to a common form (fork)
+ to make search results independent of tense and case.
+ Because words are very often stored in a modified form,
+ we use the special term lexemes
+ for the word forms we actually store in the vector.
+Creating an index of the documents by lexeme.
+ This is managed automatically by tsearch2
+ when you creat a gist() index
+ on the tsvector column of a table,
+ which implements a form of the Berkeley
+ Generalized Search Tree.
+
+Once your documents are indexed,
+performing a search involves:
+
+Reducing the search terms to lexemes.
+ You must express each search you want to perform
+ as a tsquery specifying a boolean combination of lexemes.
+ Note that tsearch2 only finds exact matches
+ between the lexemes in your query and the ones in each vector —
+ even capitalization counts as a difference
+ (which is why all lexemes are usually kept lowercase).
+ So you must process search words the same way you processed document words;
+ if forking became fork in the document's tsvector,
+ then the search term forking must also become fork
+ or the search will not find the document.
+Retrieving the documents that match the query.
+ Running a SELECT ... WHERE
+ query @@ vector
+ on the table with the vector column
+ will return the documents that match your query.
+Presenting your results.
+ This final stage offers as many options
+ as turning documents into vectors.
+ You can order documents by how well they matched the search terms;
+ create a headline for each document
+ showing some of the phrases in which it uses the search terms;
+ and restrict the number of results retrieved.
+ You will of course want some way to identify each document,
+ so the user can ask for the full text of the ones he wants to read.
+
+And beyond deciding upon rules for turning documents into vectors
+and for presenting search results to users,
+you have to decide where to perform these operations —
+whether one database server
+will parse documents, perform searches, and prepare search results,
+or whether to spread the load of these operations across several machines.
+These are complicated design issues
+which we will explore later;
+in this section and the next,
+we will illustrate what can be accomplished
+using a single database server.
+
+The default tsearch2 configuration,
+which we will learn more about later,
+provides a good example of a process for reducing documents to vectors:
+
+
+=# SELECT set_curcfg('default')
+=# SELECT to_tsvector('The air smells of sea water.')
+             to_tsvector             
+-------------------------------------
+ 'air':2 'sea':5 'smell':3 'water':6
+(1 row)
+
+
+Note the complex relationship between this document and its vector.
+The vector lists only words from the document —
+spaces and punctuation have disappeared.
+Common words like the and of have been eliminated.
+The -s that makes smells a plural has been removed,
+leaving a lexeme that represents the word in its simplest form.
+And finally,
+though the vector remembers the positions in which each word appeared,
+it does not store the lexemes in that order.
+
+Keeping word positions in your vectors is optional, by the way.
+The positions are necessary for the tsearch2 ranking functions,
+which you can use to prioritize documents
+based on how often each document uses the search terms
+and whether they appear in close proximity.
+But if you do not perform ranking,
+or use your own process that ignores the word positions stored in the vector,
+then you can save space by stripping them from your vectors:
+
+
+=# SELECT strip(to_tsvector('The air smells of sea water.'))
+            strip            
+-----------------------------
+ 'air' 'sea' 'smell' 'water'
+(1 row)
+
+
+Now that we have a procedure for creating vectors,
+we can build an indexed table of vectors very simply:
+
+
+=# CREATE TABLE vectors ( vector tsvector )
+=# CREATE INDEX vector_index ON vectors USING gist(vector)
+=# INSERT INTO vectors VALUES (to_tsvector('The path forks here'))
+=# INSERT INTO vectors VALUES (to_tsvector('A crawl leads west'))
+=# INSERT INTO vectors VALUES (to_tsvector('The left fork leads northeast'))
+=# SELECT * FROM vectors
+                  vector                  
+------------------------------------------
+ 'fork':3 'path':2
+ 'lead':3 'west':4 'crawl':2
+ 'fork':3 'lead':4 'left':2 'northeast':5
+(3 rows)
+
+
+Now we can search this collection of document vectors
+using the @@ operator and a tsquery
+that specifies the combination of lexemes we are looking for.
+Note that while vectors simply list lexemes,
+queries always combine them with the operators
+‘&’ and,
+‘|’ or,
+and  ‘!’ not,
+plus parentheses for grouping.
+Some examples of the query syntax:
+
+
+ ‘find documents with the word forks in them’

+ 'forks'
+
+ ‘... with both forks and leads’

+ 'forks & leads'
+
+ ‘... with either forks or leads’

+ 'forks | leads'
+
+ ‘... with either forks or leads,
+  but without crawl’

+ '(forks|leads) & !crawl'
+
+The tsearch2 module
+provides a to_tsquery() function for creating queries
+that uses the same process as to_tsvector() uses
+to reduce words to lexemes.
+For instance,
+it will remove the -s from the plurals in the last example above:
+
+
+=# SELECT to_tsquery('(leads|forks) & !crawl')
+           to_tsquery           
+--------------------------------
+ ( 'lead' | 'fork' ) & !'crawl'
+(1 row)
+
+
+Again,
+this is critically important because the search operator @@
+only finds exact matches
+between the words in a query and the words in a vector;
+if the document vector lists the lexeme fork
+but the query looks for the plural form forks,
+the query would not match that document.
+Thanks to the symmetry between our process
+for producing vectors and queries, however,
+the above searches return correct results:
+
+
+=# SELECT * FROM vectors WHERE vector @@ to_tsquery('(leads|forks) & !crawl')
+                  vector                  
+------------------------------------------
+ 'fork':3 'path':2
+ 'fork':3 'lead':4 'left':2 'northeast':5
+(2 rows)
+
+
+You may want to try the other queries shown above,
+and perhaps invent some of your own.
+
+You should not include stop words in a query,
+since you cannot search for words you have discarded.
+If you throw out the word the when building vectors, for example,
+your index will obviously not know which documents included it.
+The to_tsquery() function will automatically detect this
+and give you an error to prevent this mistake:
+
+
+=# SELECT to_tsquery('the')
+NOTICE:  Query contains only stopword(s) or doesn't contain lexem(s), ignored
+ to_tsquery 
+------------
+ 
+(1 row)
+
+
+But if you every build vectors and queries using your own routines,
+a possibility we will discuss later,
+then you will need to enforce this rule yourself.
+
+

+Now that you understand how vectors and queries work together,
+you are prepared to tackle many additional topics:
+how to distribute searching across many servers;
+how to customize the process
+by which tsearch2 turns documents and queries into lexemes,
+or use a process of your own;
+and how to sort and display search results to your users.
+But before discussing these detailed questions,
+we will build a simple search engine
+to see how easily its basic features work together.
+
+
+A Simple Search Engine
+
+

+In this section we build a simple search engine out of SQL functions
+that use the vector and query types described in the previous section.
+While this example is simpler
+than a search engine that has to interface with the outside world,
+it will illustrate the basic principles of building a search engine,
+and better prepare you for developing your own.
+
+Building a search engine involves only a few improvements
+upon the rudimentary vector searches described in the last section.
+
+Because the user wants to read documents, not vectors,
+ you must provide some way
+ for the full text of each document to be accessed —
+ either by storing the entire text of each document in the database,
+ or storing an identifier
+ like a URL, file name, or document routing number
+ that lets you fetch the document from other storage.
+You can make it easier for user interface code to refer to each document
+ by providing a unique identifier for each document,
+ perhaps with a SERIAL column.
+Search results should be ordered by relevance.
+ If you leave word positions in your vectors,
+ you can either have PostgreSQL ORDER your results
+ BY a ranking function,
+ or you can fetch the vectors yourself and perform your own sort.
+ If you choose to ignore word positions or strip them from your vectors,
+ you will have to determine relevance yourself,
+ using either the full text of the document
+ or other information about each document you may possess.
+For each document returned by a search,
+ you will usually want to display a summary called a headline
+ that shows short excerpts
+ illustrating how the document uses the query words.
+ Headlines are usually generated from the full text of the document,
+ not from position information in the tsvector,
+ since excerpts lacking stop words, punctuation, and suffixes
+ would not be comprehensible.
+ If you store the full text of each document in the database,
+ headlines can be generated very simply by a tsearch2 function.
+ If you store your documents elsewhere,
+ then you will either have to transmit each document to the database
+ every time you want to run the headline function on it,
+ or use your own headline code outside of the database.
+
+
+We can easily construct a simple search engine
+that accomplishes these goals.
+First we build a table that, for each document,
+stores a unique identifier, the full text of the document,
+and its tsvector:
+
+
+=# CREATE TABLE docs ( id SERIAL, doc TEXT, vector tsvector )
+=# CREATE INDEX docs_index ON docs USING gist(vector);
+
+
+Note that although searches will still work
+on tables where you have neglected
+to create a gist() index over your vectors,
+they will run much more slowly
+since they will have to compare the query
+against every document vector in the table.
+
+Because the table we have created
+stores each document in two different ways —
+both as text and as a vector —
+our INSERT statements must provide the document in both forms.
+While more advanced PostgreSQL programmers
+might accomplish this with a database trigger or rule,
+for this simple example we will use a small SQL function:
+
+
+=# CREATE FUNCTION insdoc(text) RETURNS void LANGUAGE sql AS

+  'INSERT INTO docs (doc, vector) VALUES ($1, to_tsvector($1));'
+
+
+Now, by calling insdoc() several times,
+we can populate our table with documents:
+
+
+=# SELECT insdoc('A low crawl over cobbles leads inward to the west.')
+=# SELECT insdoc('The canyon runs into a mass of boulders -- dead end.')
+=# SELECT insdoc('You are crawling over cobbles in a low passage.')
+=# SELECT insdoc('Cavernous passages lead east, north, and south.')
+=# SELECT insdoc('To the east a low wide crawl slants up.')
+=# SELECT insdoc('You are in the south side chamber.')
+=# SELECT insdoc('The passage here is blocked by a recent cave-in.')
+=# SELECT insdoc('You are in a splendid chamber thirty feet high.')
+
+
+Now we can build a search function.
+Its SELECT statement is based upon
+the same @@ operation illustrated in the previous section.
+But instead of returning matching vectors,
+we return for each document
+its SERIAL identifier, so the user can retrieve it later;
+a headline that illustrates its use of the search terms;
+and a ranking with which we also order the results.
+Our search operation can be coded as a single SELECT statement
+returning its own kind of table row,
+which we call a finddoc_t:
+
+
+=# CREATE TYPE finddoc_t AS (id INTEGER, headline TEXT, rank REAL)
+=# CREATE FUNCTION finddoc(text) RETURNS SETOF finddoc_t LANGUAGE sql AS '

+   SELECT id, headline(doc, q), rank(vector, q)
+     FROM docs, to_tsquery($1) AS q
+     WHERE vector @@ q ORDER BY rank(vector, q) DESC'
+
+
+This function is a rather satisfactory search engine.
+Here is one example search,
+after which the user fetches the top-ranking document itself;
+with similar commands you can try queries of your own:
+
+
+=# SELECT * FROM finddoc('passage|crawl')
+ id |                       headline                        | rank 
+----+-------------------------------------------------------+------
+  3 | <b>crawling</b> over cobbles in a low <b>passage</b>. | 0.19
+  1 | <b>crawl</b> over cobbles leads inward to the west.   |  0.1
+  4 | <b>passages</b> lead east, north, and south.          |  0.1
+  5 | <b>crawl</b> slants up.                               |  0.1
+  7 | <b>passage</b> here is blocked by a recent  cave-in.  |  0.1
+(5 rows)
+=# SELECT doc FROM docs WHERE id = 3
+                       doc                       
+-------------------------------------------------
+ You are crawling over cobbles in a low passage.
+(1 row)
+
+
+While by default the headline() function
+surrounds matching words with <b> and </b>
+in order to distinguish them from the surrounding text,
+you can provide options that change its behavior;
+consult the tsearch2 Reference for more details about
+Headline Functions.
+
+Though a search may match hundreds or thousands of documents,
+you will usually present only ten or twenty results to the user at a time.
+This can be most easily accomplished
+by limiting your query with a LIMIT
+and an OFFSET clause —
+to display results ten at a time, for example,
+your would generate your first page of results
+with LIMIT 10 OFFSET 0,
+your second page
+with LIMIT 10 OFFSET 10,
+your third page
+with LIMIT 10 OFFSET 20,
+and so forth.
+There are two problems with this approach, however.
+
+The first problem is the strain of running the query over again
+for every page of results the user views.
+For small document collections or lightly loaded servers,
+this may not be a problem;
+but the impact can be high
+when a search must repeatedly rank and sort
+the same ten thousand results
+on an already busy server.
+So instead of selecting only one page of results,
+you will probably use LIMIT and OFFSET
+to return a few dozen or few hundred results,
+which you can cache and display to the user one page at a time.
+Whether a result cache rewards your effort
+will depend principally on the behavior of your users —
+how often they even view the second page of results, for instance.
+
+The second issue solved by caching involves consistency.
+If the database is changing while the user browses their results,
+then documents might appear and disappear as they page through them.
+In some cases the user might even miss a particular result —
+perhaps the one they were looking for —
+if, say, its rank improves from 31th to 30th
+after they load results 21–30 but before they view results 31–40.
+While many databases are static or infrequently updated,
+and will not present this problem,
+users searching very dymanic document collections
+might benefit from the stable results that caches yield.
+
+

+Having seen the features of a search engine
+implemented entirely within the database,
+we will learn about some specific tsearch2 features.
+First we will look in more detail at document ranking.
+
+
+Ranking and Position Weights
+
+

+When we built our simple search engine,
+we used the rank() function to order our results.

+Here we describe tsearch2 ranking in more detail.
+
+
+There are two functions with which tsearch2 can rank search results.
+They both use the lexeme positions listed in the tsvector,
+so you cannot rank vectors
+from which these have been removed with strip().
+The rank() function existed in older versions of OpenFTS,
+and has the feature that you can assign different weights
+to words from different sections of your document.
+The rank_cd() uses a recent technique for weighting results
+but does not allow different weight to be given
+to different sections of your document.
+
+Both ranking functions allow you to specify,
+as an optional last argument,
+whether you want their results normalized —
+whether the rank returned should be adjusted for document length.
+Specifying a last argument of 0 (zero) makes no adjustment;
+1 (one) divides the document rank
+by the logarithm of the document length;
+and 2 divides it by the plain length.
+In all of these examples we omit this optional argument,
+which is the same as specifying zero —
+we are making no adjustment for document length.
+
+The rank_cd() function uses an experimental measurement
+called cover density ranking that rewards documents
+when they make frequent use of the search terms
+that are close together in the document.
+You can read about the algorithm in more detail
+in Clarke et al.,
+ “
+>Relevance Ranking for One to Three Term Queries.”
+An optional first argument allows you to tune their formula;
+for details
+see the section on ranking
+in the Reference.
+
+The rank() function offers more flexibility
+because it pays attention to the weights
+with which you have labelled lexeme positions.
+Currently tsearch2 supports four different weight labels:
+'D', the default weight;
+and 'A', 'B', and 'C'.
+All vectors created with to_tsvector()
+assign the weight 'D' to each position,
+which as the default is not displayed when you print a vector out.
+
+If you want positions with weights other than 'D',
+you have two options:
+either you can author a vector directly through the ::tsvector
+casting operation,
+as described in the following section,
+which lets you give each position whichever weight you want;
+or you can pass a vector through the setweight() function
+which sets all of its position weights to a single value.
+An example of the latter:
+
+
+
+=# SELECT vector FROM docs WHERE id = 3
+                 vector                 
+----------------------------------------
+ 'low':8 'cobbl':5 'crawl':3 'passag':9
+(1 row)
+=# SELECT setweight(vector, 'A') FROM docs WHERE id = 3
+                 setweight                  
+--------------------------------------------
+ 'low':8A 'cobbl':5A 'crawl':3A 'passag':9A
+(1 row)
+
+
+
+Merely changing all of the weights in a vector is not very useful,
+of course,
+since this results still in all words having the same weight.
+But if we parse different parts of a document separately,
+giving each section its own weight,
+and then concatenate the vectors of each part into a single vector,
+the result can be very useful.
+We can construct a simple example
+in which document titles are given greater weight
+that text in the body of the document:
+
+
+
+=# CREATE TABLE tdocs ( id SERIAL, title TEXT, doc TEXT, vector tsvector )
+=# CREATE INDEX tdocs_index ON tdocs USING gist(vector);
+=# CREATE FUNCTION instdoc(text, text) RETURNS void LANGUAGE sql AS

+  'INSERT INTO tdocs (title, doc, vector)
+   VALUES ($1, $2, setweight(to_tsvector($1), ''A'') || to_tsvector($2));'
+
+
+
+Now words from a document title will be weighted differently
+than those in the main text
+if we provide the title and body as separate arguments:
+
+
+
+=# SELECT instdoc('Spendid Chamber',

+ 'The walls are frozen rivers of orange stone.')
+ instdoc 
+---------
+ 
+(1 row)
+=# SELECT vector FROM tdocs
+                                    vector                                    
+------------------------------------------------------------------------------
+ 'wall':4 'orang':9 'river':7 'stone':10 'frozen':6 'chamber':2A 'spendid':1A
+(1 row)
+
+
+
+Note that although the necessity is unusual,
+you can constrain search terms
+to only match words from certain sections
+by following them with a colon
+and a list of the sections in which the word can occur;
+by default this list is 'ABCD'
+so that search terms match words from all sections.
+For example,
+here we search for a word both generally,
+and then looking only for specific weights:
+
+
+
+=# SELECT title, doc FROM tdocs WHERE vector @@ to_tsquery('spendid')
+      title      |                     doc                      
+-----------------+----------------------------------------------
+ Spendid Chamber | The walls are frozen rivers of orange stone.
+(1 row)
+=# SELECT title, doc FROM tdocs WHERE vector @@ to_tsquery('spendid:A')
+      title      |                     doc                      
+-----------------+----------------------------------------------
+ Spendid Chamber | The walls are frozen rivers of orange stone.
+(1 row)
+=# SELECT title, doc FROM tdocs WHERE vector @@ to_tsquery('spendid:D')
+ title | doc 
+-------+-----
+(0 rows)
+
+
+
+
+
+

+Our examples so far use tsearch2 to parse our documents into vectors.
+When your application needs absolute control over vector content,
+you will want to use direct type casting,
+which is described in the next section.
+
+
+Casting Vectors and Queries
+
+

+While tsearch2 has powerful and flexible ways
+to process documents and turn them into document vectors,
+you will sometimes want to parse documents on your own
+and place the results directly in vectors.
+Here we show you how.
+
+
+In the preceding examples,
+we used the to_tsvector() function
+when we needed a document's text reduced to a document vector.
+We saw that the function stripped whitespace and punctuation,
+eliminated common words,
+and altered suffixes to reduce words to a common form.
+While these operations are often desirable,
+and while in the sections below
+we will gain precise control over this process,
+there are occasions on which
+you want to avoid the changes that to_tsvector() makes to text
+and specify explicitly the words that you want in your vectors.
+Or you may want to create queries directly
+rather than through to_tsquery().
+
+For example,
+you may have already developed your own routine
+for reducing your documents to searchable lexemes,
+and do not want your carefully generated terms altered
+by passing them through to_tsvector().
+Or you might be developing and debugging parsing routines of your own
+that you are not ready to load into the database.
+In either case,
+you will find that direct insertion is easily accomplished
+if you simply follow some simple rules.
+
+Vectors are created directly
+when you cast a string of whitespace separated lexemes
+to the tsvector type:
+
+
+
+=# select 'the only exit is the way you came in'::tsvector
+                     tsvector                     
+--------------------------------------------------
+ 'in' 'is' 'the' 'way' 'you' 'came' 'exit' 'only'
+(1 row)
+
+
+
+Notice that the conversion interpreted the string
+simply as a list of lexemes to be included in the vector.
+Their order was lost,
+as was the number of times each lexeme appeared.
+You must keep in mind that directly creating vectors with casting
+is not an alternate means of parsing;
+it is a way of directly entering lexemes into a vector without parsing.
+
+Queries can also be created through casting,
+if you separate lexemes with boolean operators
+rather than with whitespace.
+When creating your own vectors and queries,
+remember that the search operator @@
+finds only exact matches between query lexemes and vector lexemes
+—
+if they are not exactly the same string,
+they will not be considered a match.
+
+To include lexeme positions in your vector,
+write the positions exactly the way tsearch2 displays them
+when it prints vectors:
+by following each lexeme with a colon
+and a comma-separated list of integer positions.
+If you list a lexeme more than once,
+then all the positions listed for it are combined into a single list.
+For example,
+here are two ways of writing the same vector,
+depending on whether you mention ‘the’ twice
+or combine its positions into a list yourself:
+
+
+
+=# select 'the:1 only:2 exit:3 is:4 the:5 way:6 you:7 came:8 in:9'::tsvector
+                              tsvector                              
+--------------------------------------------------------------------
+ 'in':9 'is':4 'the':1,5 'way':6 'you':7 'came':8 'exit':3 'only':2
+(1 row)
+=# select 'the:1,5 only:2 exit:3 is:4 way:6 you:7 came:8 in:9'::tsvector
+                              tsvector                              
+--------------------------------------------------------------------
+ 'in':9 'is':4 'the':1,5 'way':6 'you':7 'came':8 'exit':3 'only':2
+(1 row)
+
+
+
+Things can get slightly tricky
+if you want to include apostrophes, backslashes, or spaces
+inside your lexemes
+(wanting to include either of the latter would be unusual,
+but they can be included if you follow the rules).
+The main problem is that the apostrophe and backslash
+are important both to PostgreSQL when it is interpreting a string,
+and to the tsvector conversion function.
+You may want to review section
+1.1.2.1,
+“String Constants”
+in the PostgreSQL documentation before proceeding.
+
+When you cast strings directly into vectors:
+
+The string is interpreted as a whitespace-separated list of lexemes,
+ any of which can be suffixed with a colon and a list of positions.
+A lexeme can be quoted by preceding it with an apostrophe,
+ in which case it runs until the next apostrophe;
+ otherwise a lexeme ends with the first whitespace or colon encountered.
+Any character preceded by a backslash,
+ including whitespace, the apostrophe, the colon, and the backslash itself,
+ loses its normal meaning and is treated as a letter.
+ Backslashes are effective
+ both inside and outside of apostrophe-quoted lexemes.
+A lexeme can be suffixed with a list of positions
+ by appending a colon and a comma-separated list of integers,
+ each of which can itself be followed by a letter
+ to designate a position weight
+ (position weights are described below).
+
+
+Here are some example strings,
+showing the lexeme you want to insert
+together with the string that the ::tsvector operator
+needs to see,
+and how you would type that string at the PostgreSQL prompt:
+
+
+
+For the lexeme...
+you need the string...
+which you can type as:
+
+nugget
+nugget
+'nugget'
+
+won't
+won't
+'won''t'
+
+pinin'
+pinin'
+'pinin'''
+
+'bout
+\'bout
+'\\''bout'
+
+white mist
+white\ mist
+'white\\ mist'
+
+or:
+'white mist'
+'''white mist'''
+
+won't budge
+won\'t\ budge
+'won\\''t\\ budge'
+
+or:
+'won\'t budge'
+'''won\\''t budge'''
+
+back\slashed
+back\\slashed
+'back\\\\slashed'
+
+
+Remember to use the quoted quoting shown at the right
+only when typing in strings as part of a PostgreSQL query.
+If you are providing strings through a library
+that automatically quotes them
+or provides them in binary form to PostgreSQL,
+then you can use the strings in the middle instead —
+suitably quoted in the language you are using, of course.
+
+Position weights are described below
+and can be written exactly as they will be displayed
+when you select a weighted vector:
+
+
+=# select 'weighty:1,3A trivial:2B,4'::tsvector
+           tsvector            
+-------------------------------
+ 'trivial':2B,4 'weighty':1,3A
+(1 row)
+
+
+
+Note that if you are composing SQL queries
+in a scripting language like Perl or Python,
+that itself considers quotes and backslashes special,
+then you may have another quoting layer to deal with
+on top of the two layers already shown above.
+In such cases you may want to write a function
+that performs the necessary quoting for you.
+
+

+Having seen how to create vectors of your own,
+it is time to learn how the native tsearch2 parser
+reduces documents to vectors.
+
+
+Parsing and Lexing
+
+

+The previous section
+described how you can bypass the parser provided by tsearch2
+and populate your table of documents
+with vectors of your own devising.
+But for those interested in the native tsearch2 facilities,
+we present here an overview of how it goes about
+reducing documents to vectors.
+
+
+The to_tsvector() function reduces documents to vectors
+in two stages.
+First, a parser breaks the input document
+into short sequences of text called tokens.
+Each token is usually a word, space, or piece of punctuation,
+though some parsers return larger and more exotic items
+like HTML tags as single tokens.
+Each token returned by the parser
+is either discarded
+or passed to a dictionary that converts it into a lexeme.
+The resulting lexemes are collected into a vector and returned.
+
+The choice of which parser and dictionaries to_tsvector() should use
+is controlled by your choice of configuration.
+The tsearch2 module comes with several configurations,
+and you can define more of your own;
+in fact the creation of a new configuration is illustrated below,
+in the section on position weights.
+
+To learn about parsing in more detail,
+we will study this example:
+
+
+=# select to_tsvector('default',

+     'The walls extend upward for well over 100 feet.')
+                       to_tsvector                        
+----------------------------------------------------------
+ '100':8 'feet':9 'wall':2 'well':6 'extend':3 'upward':4
+(1 row)
+
+
+Unlike the to_tsvector() calls used in the above examples,
+this one specifies the 'default' configuration explicitly.
+When we called to_tsvector() in earlier examples
+with only one argument,
+it used the current configuration,
+which is chosen automatically based on your LOCALE
+if that locale is mentioned in the pg_ts_cfg table
+(which is shown under the first bullet in the description below).
+If your locale is not listed in the table,
+your attempts to use the current configuration will return:
+
+
+ERROR:  Can't find tsearch2 config by locale
+
+
+You can always change the current configuration manually
+by calling the set_curcfg() function
+described in the section on
+Configurations
+in the Reference.
+
+Each configuration serves as an index into two different tables:
+in pg_ts_cfg it determines
+which parser will break our text into tokens,
+and in pg_ts_cfgmap
+it directs each token to a dictionary for processing.
+The steps in detail are:
+
+
+
+First, our text is parsed,
+using the parser listed for our configuration in the pg_ts_cfg table.
+We are using the 'default' configuration,
+so the table tells us to use the 'default' parser:
+
+
+=# SELECT * FROM pg_ts_cfg WHERE ts_name = 'default'
+ ts_name | prs_name | locale 
+---------+----------+--------
+ default | default  | C
+(1 row)
+
+
+So our text will be parsed as though we had called:
+
+
+=# select * from parse('default',

+     'The walls extend upward for well over 100 feet.')
+
+
+This breaks the text into a list of tokens
+which are each labelled with an integer type:
+
+The₁♦_{12
+>walls₁♦_{12
+>extend₁♦_{12
+>upward₁♦_{12
+>for₁♦_{12
+>well₁♦_{12
+>over₁♦_{12
+>100₂₂♦_{12
+>feet₁.₁₂
+
+Each word has been assigned type 1;
+each space (represented here by a diamond) and the period, type 12;
+and the number one hundred, type 22.
+We can retrieve the alias for each type
+through the token_type function:
+
+
+=# select * from token_type('default')

+     where tokid = 1 or tokid = 12 or tokid = 22
+ tokid | alias |      descr       
+-------+-------+------------------
+     1 | lword | Latin word
+    12 | blank | Space symbols
+    22 | uint  | Unsigned integer
+(3 rows)
+
+
+
+
+Next, the tokens are assigned to dictionaries
+by looking up their type aliases in pg_ts_cfgmap
+to determine which dictionary should process each token.
+Since we are using the 'default' configuration:
+
+
+=# select * from pg_ts_cfgmap where ts_name = 'default' and

+      (tok_alias = 'lword' or tok_alias = 'blank' or tok_alias = 'uint')
+ ts_name | tok_alias | dict_name 
+---------+-----------+-----------
+ default | lword     | {en_stem}
+ default | uint      | {simple}
+(2 rows)
+
+
+Since this map provides no dictionary for blank tokens,
+the spaces and period are simply discarded,
+leaving nine tokens,
+which are then numbered by their position:
+
+The¹
+walls²
+extend³
+upward⁴
+for⁵
+well⁶
+over⁷
+100⁸
+feet⁹
+
+
+Finally, the words are reduced to lexemes by their respective dictionaries.
+The 100 is submitted to the simple dictionary,
+which returns tokens unaltered except for making them lowercase:
+
+
+=# select lexize('simple', '100')
+ lexize 
+--------
+ {100}
+(1 row)
+
+
+The other words are submitted to en_stem
+which reduces each English word to a linguistic stem,
+and then discards stems which belong to its list of stop words;
+you can see the list of stop words
+in the file whose path is in the dict_initoption field
+of the pg_ts_dict table entry for en_stem.
+The first three words of our text illustrate respectively
+an en_stem stop word,
+a word which en_stem alters by stemming,
+and a word which en_stem leaves alone:
+
+
+=# select lexize('en_stem', 'The')
+ lexize 
+--------
+ {}
+(1 row)
+=# select lexize('en_stem', 'walls')
+ lexize 
+--------
+ {wall}
+(1 row)
+=# select lexize('en_stem', 'extend')
+  lexize  
+----------
+ {extend}
+(1 row)
+
+
+Once en_stem is done discarding stop words and stemming the rest,
+we are left with:
+
+wall²
+extend³
+upward⁴
+well⁶
+100⁸
+feet⁹
+
+Which is precisely the result of the example that began this section.
+
+Query words are stemmed by the to_tsquery() function
+using the same scheme to determine the dictionary for each token,
+with the difference that the query parser recognizes as special
+the boolean operators that separate query words.
+
+
+
+
+}

diff --git a/contrib/tsearch2/docs/tsearch2-ref.html b/contrib/tsearch2/docs/tsearch2-ref.html

new file mode 100644 (file)

index 0000000..df0faa4


--- /dev/null
+++ b/contrib/tsearch2/docs/tsearch2-ref.html
@@ -0,0 +1,448 @@
+
+
+
+
+tsearch2 reference
+
+
+The tsearch2 Reference
+
+
+Brandon Craig Rhodes
30 June 2003
+
+This Reference documents the user types and functions
+of the tsearch2 module for PostgreSQL.
+An introduction to the module is provided
+by the tsearch2 Guide,
+a companion document to this one.
+You can retrieve a beta copy of the tsearch2 module from the
+GiST for PostgreSQL
+page — look under the section entitled Development History
+for the current version.
+
+Vectors and Queries
+
+Vectors and queries both store lexemes,
+but for different purposes.
+A tsvector stores the lexemes
+of the words that are parsed out of a document,
+and can also remember the position of each word.
+A tsquery specifies a boolean condition among lexemes.
+
+Any of the following functions with a configuration argument
+can use either an integer id or textual ts_name
+to select a configuration;
+if the option is omitted, then the current configuration is used.
+For more information on the current configuration,
+read the next section on Configurations.
+
+Vector Operations
+
+
+
+ to_tsvector( [configuration,]

+ document TEXT) RETURNS tsvector
+
+ Parses a document into tokens,
+ reduces the tokens to lexemes,
+ and returns a tsvector which lists the lexemes
+ together with their positions in the document.
+ For the best description of this process,
+ see the section on Parsing and Stemming
+ in the accompanying tsearch2 Guide.
+
+ strip(vector tsvector) RETURNS tsvector
+
+ Return a vector which lists the same lexemes
+ as the given vector,
+ but which lacks any information
+ about where in the document each lexeme appeared.
+ While the returned vector is thus useless for relevance ranking,
+ it will usually be much smaller.
+
+ setweight(vector tsvector, letter) RETURNS tsvector
+
+ This function returns a copy of the input vector
+ in which every location has been labelled
+ with either the letter
+ 'A', 'B', or 'C',
+ or the default label 'D'
+ (which is the default with which new vectors are created,
+ and as such is usually not displayed).
+ These labels are retained when vectors are concatenated,
+ allowing words from different parts of a document
+ to be weighted differently by ranking functions.
+
+ vector1 || vector2
+
+ concat(vector1 tsvector, vector2 tsvector)

+ RETURNS tsvector
+
+ Returns a vector which combines the lexemes and position information
+ in the two vectors given as arguments.
+ Position weight labels (described in the previous paragraph)
+ are retained intact during the concatenation.
+ This has at least two uses.
+ First,
+ if some sections of your document
+ need be parsed with different configurations than others,
+ you can parse them separately
+ and concatenate the resulting vectors into one.
+ Second,
+ you can weight words from some sections of you document
+ more heavily than those from others by:
+ parsing the sections into separate vectors;
+ assigning the vectors different position labels
+ with the setweight() function;
+ concatenating them into a single vector;
+ and then providing a weights argument
+ to the rank() function
+ that assigns different weights to positions with different labels.
+
+ tsvector_size(vector tsvector) RETURNS INT4
+
+ Returns the number of lexemes stored in the vector.
+
+ text::tsvector RETURNS tsvector
+
+ Directly casting text to a tsvector
+ allows you to directly inject lexemes into a vector,
+ with whatever positions and position weights you choose to specify.
+ The text should be formatted
+ like the vector would be printed by the output of a SELECT.
+ See the Casting
+ section in the Guide for details.
+
+
+Query Operations
+
+
+
+ to_tsquery( [configuration,]

+ querytext text) RETURNS tsvector
+
+ Parses a query,
+ which should be single words separated by the boolean operators
+ “&” and,
+ “|” or,
+ and “!” not,
+ which can be grouped using parenthesis.
+ Each word is reduced to a lexeme using the current
+ or specified configuration.
+
+
+ querytree(query tsquery) RETURNS text
+
+ This might return a textual representation of the given query.
+
+ text::tsquery RETURNS tsquery
+
+ Directly casting text to a tsquery
+ allows you to directly inject lexemes into a query,
+ with whatever positions and position weight flags you choose to specify.
+ The text should be formatted
+ like the query would be printed by the output of a SELECT.
+ See the Casting
+ section in the Guide for details.
+
+
+Configurations
+
+A configuration specifies all of the equipment necessary
+to transform a document into a tsvector:
+the parser that breaks its text into tokens,
+and the dictionaries which then transform each token into a lexeme.
+Every call to to_tsvector() (described above)
+uses a configuration to perform its processing.
+Three configurations come with tsearch2:
+
+
+default — Indexes words and numbers,
+ using the en_stem English Snowball stemmer for Latin-alphabet words
+ and the simple dictionary for all others.
+default_russian — Indexes words and numbers,
+ using the en_stem English Snowball stemmer for Latin-alphabet words
+ and the ru_stem Russian Snowball dictionary for all others.
+simple — Processes both words and numbers
+ with the simple dictionary,
+ which neither discards any stop words nor alters them.
+
+
+The tsearch2 modules initially chooses your current configuration
+by looking for your current locale in the locale field
+of the pg_ts_cfg table described below.
+You can manipulate the current configuration yourself with these functions:
+
+
+
+ set_curcfg( id INT | ts_name TEXT

+  ) RETURNS VOID
+
+ Set the current configuration used by to_tsvector
+ and to_tsquery.
+
+ show_curcfg() RETURNS INT4
+
+ Returns the integer id of the current configuration.
+
+
+
+Each configuration is defined by a record in the pg_ts_cfg table:
+
+create table pg_ts_cfg (
+   id      int not  null primary key,
+   ts_name     text not null,
+   prs_name    text not null,
+   locale      text
+);
+
+The id and ts_name are unique values
+which identify the configuration;
+the prs_name specifies which parser the configuration uses.
+Once this parser has split document text into tokens,
+the type of each resulting token —
+or, more specifically, the type's lex_alias
+as specified in the parser's lexem_type() table —
+is searched for together with the configuration's ts_name
+in the pg_ts_cfgmap table:
+
+create table pg_ts_cfgmap (
+   ts_name     text not null,
+   lex_alias   text not null,
+   dict_name   text[],
+   primary key (ts_name,lex_alias)
+);
+
+Those tokens whose types are not listed are discarded.
+The remaining tokens are assigned integer positions,
+starting with 1 for the first token in the document,
+and turned into lexemes with the help of the dictionaries
+whose names are given in the dict_name array for their type.
+These dictionaries are tried in order,
+stopping either with the first one to return a lexeme for the token,
+or discarding the token if no dictionary returns a lexeme for it.
+
+Parsers
+
+Each parser is defined by a record in the pg_ts_parser table:
+
+create table pg_ts_parser (
+   prs_id      int not null primary key,
+   prs_name    text not null,
+   prs_start   oid not null,
+   prs_getlexem    oid not null,
+   prs_end     oid not null,
+   prs_headline    oid not null,
+   prs_lextype oid not null,
+   prs_comment text
+);
+
+The prs_id and prs_name uniquely identify the parser,
+while prs_comment usually describes its name and version
+for the reference of users.
+The other items identify the low-level functions
+which make the parser operate,
+and are only of interest to someone writing a parser of their own.
+
+The tsearch2 module comes with one parser named default
+which is suitable for parsing most plain text and HTML documents.
+
+Each parser argument below
+must designate a parser with either an integer prs_id
+or a textual prs_name;
+the current parser is used when this argument is omitted.
+
+
+
+ CREATE FUNCTION set_curprs(parser) RETURNS VOID
+
+ Selects a current parser
+ which will be used when any of the following functions
+ are called without a parser as an argument.
+
+ CREATE FUNCTION lexem_type(

+  [ parser ]
+  ) RETURNS SETOF lexemtype
+
+ Returns a table which defines and describes
+ each kind of token the parser may produce as output.
+ For each token type the table gives the lexid
+ which the parser will label each token of that type,
+ the alias which names the token type,
+ and a short description descr for the user to read.
+
+ CREATE FUNCTION parse(

+  [ parser, ] document TEXT
+  ) RETURNS SETOF lexemtype
+
+ Parses the given document and returns a series of records,
+ one for each token produced by parsing.
+ Each token includes a lexid giving its type
+ and a lexem which gives its content.
+
+
+Dictionaries
+
+Dictionaries take textual tokens as input,
+usually those produced by a parser,
+and return lexemes which are usually some reduced form of the token.
+Among the dictionaries which come installed with tsearch2 are:
+
+
+simple simply folds uppercase letters to lowercase
+ before returning the word.
+en_stem runs an English Snowball stemmer on each word
+ that attempts to reduce the various forms of a verb or noun
+ to a single recognizable form.
+ru_stem runs a Russian Snowball stemmer on each word.
+
+
+Each dictionary is defined by an entry in the pg_ts_dict table:
+
+CREATE TABLE pg_ts_dict (
+   dict_id     int not null primary key,
+   dict_name   text not null,
+   dict_init   oid,
+   dict_initoption text,
+   dict_lemmatize  oid not null,
+   dict_comment    text
+);
+
+The dict_id and dict_name
+serve as unique identifiers for the dictionary.
+The meaning of the dict_initoption varies among dictionaries,
+but for the built-in Snowball dictionaries
+it specifies a file from which stop words should be read.
+The dict_comment is a human-readable description of the dictionary.
+The other fields are internal function identifiers
+useful only to developers trying to implement their own dictionaries.
+
+The argument named dictionary
+in each of the following functions
+should be either an integer dict_id or a textual dict_name
+identifying which dictionary should be used for the operation;
+if omitted then the current dictionary is used.
+
+
+
+ CREATE FUNCTION set_curdict(dictionary) RETURNS VOID
+
+ Selects a current dictionary for use by functions
+ that do not select a dictionary explicitly.
+
+ CREATE FUNCTION lexize(

+ [ dictionary, ] word text)
+ RETURNS TEXT[]
+
+ Reduces a single word to a lexeme.
+ Note that lexemes are arrays of zero or more strings,
+ since in some languages there might be several base words
+ from which an inflected form could arise.
+
+
+Ranking
+
+Ranking attempts to measure how relevant documents are to particular queries
+by inspecting the number of times each search word appears in the document,
+and whether different search terms occur near each other.
+Note that this information is only available in unstripped vectors —
+ranking functions will only return a useful result
+for a tsvector which still has position information!
+
+Both of these ranking functions
+take an integer normalization option
+that specifies whether a document's length should impact its rank.
+This is often desirable,
+since a hundred-word document with five instances of a search word
+is probably more relevant than a thousand-word document with five instances.
+The option can have the values:
+
+
+0 (the default) ignores document length.
+1 divides the rank by the logarithm of the length.
+2 divides the rank by the length itself.
+
+
+The two ranking functions currently available are:
+
+
+
+ CREATE FUNCTION rank(

+  [ weights float4[], ]
+  vector tsvector, query tsquery,
+  [ normalization int4 ]

+  ) RETURNS float4
+
+ This is the ranking function from the old version of OpenFTS,
+ and offers the ability to weight word instances more heavily
+ depending on how you have classified them.
+ The weights specify how heavily to weight each category of word:
+ 
+>{D-weight, A-weight, B-weight, C-weight}
+ If no weights are provided, then these defaults are used:
+ {0.1, 0.2, 0.4, 1.0}
+ Often weights are used to mark words from special areas of the document,
+ like the title or an initial abstract,
+ and make them more or less important than words in the document body.
+
+ CREATE FUNCTION rank_cd(

+  [ K int4, ]
+  vector tsvector, query tsquery,
+  [ normalization int4 ]

+  ) RETURNS float4
+
+ This function computes the cover density ranking
+ for the given document vector and query,
+ as described in Clarke, Cormack, and Tudhope's
+ “
+>Relevance Ranking for One to Three Term Queries”
+ in the 1999 Information Processing and Management.
+ The value K is one of the values from their formula,
+ and defaults to K=4.
+ The examples in their paper K=16;
+ we can roughly describe the term
+ as stating how far apart two search terms can fall
+ before the formula begins penalizing them for lack of proximity.
+
+
+Headlines
+
+
+
+ CREATE FUNCTION headline(

+  [ id int4, | ts_name text, ]
+  document text, query tsquery,
+  [ options text ]

+  ) RETURNS text
+
+ Every form of the the headline() function
+ accepts a document along with a query,
+ and returns one or more ellipse-separated excerpts from the document
+ in which terms from the query are highlighted.
+ The configuration with which to parse the document
+ can be specified by either its id or ts_name;
+ if none is specified that the current configuration is used instead.
+ 
+ An options string if provided should be a comma-separated list
+ of one or more ‘option=value’ pairs.
+ The available options are:
+ 
+  StartSel, StopSel —
+   the strings with which query words appearing in the document
+   should be delimited to distinguish them from other excerpted words.
+  MaxWords, MinWords —
+   limits on the shortest and longest headlines you will accept.
+  ShortWord —
+   this prevents your headline from beginning or ending
+   with a word which has this many characters or less.
+   The default value of 3 should eliminate most English
+   conjunctions and articles.
+ 
+ Any unspecified options receive these defaults:
+ 
+StartSel=<b>, StopSel=</b>, MaxWords=35, MinWords=15, ShortWord=3
+ 
+
+
+
+


diff --git a/contrib/tsearch2/expected/tsearch2.out b/contrib/tsearch2/expected/tsearch2.out

new file mode 100644 (file)

index 0000000..a842c5b


--- /dev/null
+++ b/contrib/tsearch2/expected/tsearch2.out
@@ -0,0 +1,2055 @@
+--
+-- first, define the datatype.  Turn off echoing so that expected file
+-- does not depend on contents of seg.sql.
+--
+\set ECHO none
+psql:tsearch2.sql:13: NOTICE:  CREATE TABLE / PRIMARY KEY will create implicit index 'pg_ts_dict_pkey' for table 'pg_ts_dict'
+psql:tsearch2.sql:145: NOTICE:  CREATE TABLE / PRIMARY KEY will create implicit index 'pg_ts_parser_pkey' for table 'pg_ts_parser'
+psql:tsearch2.sql:244: NOTICE:  CREATE TABLE / PRIMARY KEY will create implicit index 'pg_ts_cfg_pkey' for table 'pg_ts_cfg'
+psql:tsearch2.sql:251: NOTICE:  CREATE TABLE / PRIMARY KEY will create implicit index 'pg_ts_cfgmap_pkey' for table 'pg_ts_cfgmap'
+psql:tsearch2.sql:339: NOTICE:  ProcedureCreate: type tsvector is not yet defined
+psql:tsearch2.sql:344: NOTICE:  Argument type "tsvector" is only a shell
+psql:tsearch2.sql:398: NOTICE:  ProcedureCreate: type tsquery is not yet defined
+psql:tsearch2.sql:403: NOTICE:  Argument type "tsquery" is only a shell
+psql:tsearch2.sql:545: NOTICE:  ProcedureCreate: type gtsvector is not yet defined
+psql:tsearch2.sql:550: NOTICE:  Argument type "gtsvector" is only a shell
+--tsvector
+SELECT '1'::tsvector;
+ tsvector 
+----------
+ '1'
+(1 row)
+
+SELECT '1 '::tsvector;
+ tsvector 
+----------
+ '1'
+(1 row)
+
+SELECT ' 1'::tsvector;
+ tsvector 
+----------
+ '1'
+(1 row)
+
+SELECT ' 1 '::tsvector;
+ tsvector 
+----------
+ '1'
+(1 row)
+
+SELECT '1 2'::tsvector;
+ tsvector 
+----------
+ '1' '2'
+(1 row)
+
+SELECT '\'1 2\''::tsvector;
+ tsvector 
+----------
+ '1 2'
+(1 row)
+
+SELECT '\'1 \\\'2\''::tsvector;
+ tsvector 
+----------
+ '1 \'2'
+(1 row)
+
+SELECT '\'1 \\\'2\'3'::tsvector;
+  tsvector   
+-------------
+ '3' '1 \'2'
+(1 row)
+
+SELECT '\'1 \\\'2\' 3'::tsvector;
+  tsvector   
+-------------
+ '3' '1 \'2'
+(1 row)
+
+SELECT '\'1 \\\'2\' \' 3\' 4 '::tsvector;
+     tsvector     
+------------------
+ '4' ' 3' '1 \'2'
+(1 row)
+
+select '\'w\':4A,3B,2C,1D,5 a:8';
+       ?column?        
+-----------------------
+ 'w':4A,3B,2C,1D,5 a:8
+(1 row)
+
+select 'a:3A b:2a'::tsvector || 'ba:1234 a:1B';
+          ?column?          
+----------------------------
+ 'a':3A,4B 'b':2A 'ba':1237
+(1 row)
+
+select setweight('w:12B w:13* w:12,5,6 a:1,3* a:3 w asd:1dc asd zxc:81,567,222A'::tsvector, 'c');
+                        setweight                         
+----------------------------------------------------------
+ 'a':1C,3C 'w':5C,6C,12C,13C 'asd':1C 'zxc':81C,222C,567C
+(1 row)
+
+select strip('w:12B w:13* w:12,5,6 a:1,3* a:3 w asd:1dc asd'::tsvector);
+     strip     
+---------------
+ 'a' 'w' 'asd'
+(1 row)
+
+--tsquery
+SELECT '1'::tsquery;
+ tsquery 
+---------
+ '1'
+(1 row)
+
+SELECT '1 '::tsquery;
+ tsquery 
+---------
+ '1'
+(1 row)
+
+SELECT ' 1'::tsquery;
+ tsquery 
+---------
+ '1'
+(1 row)
+
+SELECT ' 1 '::tsquery;
+ tsquery 
+---------
+ '1'
+(1 row)
+
+SELECT '\'1 2\''::tsquery;
+ tsquery 
+---------
+ '1 2'
+(1 row)
+
+SELECT '\'1 \\\'2\''::tsquery;
+ tsquery 
+---------
+ '1 \'2'
+(1 row)
+
+SELECT '!1'::tsquery;
+ tsquery 
+---------
+ !'1'
+(1 row)
+
+SELECT '1|2'::tsquery;
+  tsquery  
+-----------
+ '1' | '2'
+(1 row)
+
+SELECT '1|!2'::tsquery;
+  tsquery   
+------------
+ '1' | !'2'
+(1 row)
+
+SELECT '!1|2'::tsquery;
+  tsquery   
+------------
+ !'1' | '2'
+(1 row)
+
+SELECT '!1|!2'::tsquery;
+   tsquery   
+-------------
+ !'1' | !'2'
+(1 row)
+
+SELECT '!(!1|!2)'::tsquery;
+     tsquery      
+------------------
+ !( !'1' | !'2' )
+(1 row)
+
+SELECT '!(!1|2)'::tsquery;
+     tsquery     
+-----------------
+ !( !'1' | '2' )
+(1 row)
+
+SELECT '!(1|!2)'::tsquery;
+     tsquery     
+-----------------
+ !( '1' | !'2' )
+(1 row)
+
+SELECT '!(1|2)'::tsquery;
+    tsquery     
+----------------
+ !( '1' | '2' )
+(1 row)
+
+SELECT '1&2'::tsquery;
+  tsquery  
+-----------
+ '1' & '2'
+(1 row)
+
+SELECT '!1&2'::tsquery;
+  tsquery   
+------------
+ !'1' & '2'
+(1 row)
+
+SELECT '1&!2'::tsquery;
+  tsquery   
+------------
+ '1' & !'2'
+(1 row)
+
+SELECT '!1&!2'::tsquery;
+   tsquery   
+-------------
+ !'1' & !'2'
+(1 row)
+
+SELECT '(1&2)'::tsquery;
+  tsquery  
+-----------
+ '1' & '2'
+(1 row)
+
+SELECT '1&(2)'::tsquery;
+  tsquery  
+-----------
+ '1' & '2'
+(1 row)
+
+SELECT '!(1)&2'::tsquery;
+  tsquery   
+------------
+ !'1' & '2'
+(1 row)
+
+SELECT '!(1&2)'::tsquery;
+    tsquery     
+----------------
+ !( '1' & '2' )
+(1 row)
+
+SELECT '1|2&3'::tsquery;
+     tsquery     
+-----------------
+ '1' | '2' & '3'
+(1 row)
+
+SELECT '1|(2&3)'::tsquery;
+     tsquery     
+-----------------
+ '1' | '2' & '3'
+(1 row)
+
+SELECT '(1|2)&3'::tsquery;
+       tsquery       
+---------------------
+ ( '1' | '2' ) & '3'
+(1 row)
+
+SELECT '1|2&!3'::tsquery;
+     tsquery      
+------------------
+ '1' | '2' & !'3'
+(1 row)
+
+SELECT '1|!2&3'::tsquery;
+     tsquery      
+------------------
+ '1' | !'2' & '3'
+(1 row)
+
+SELECT '!1|2&3'::tsquery;
+     tsquery      
+------------------
+ !'1' | '2' & '3'
+(1 row)
+
+SELECT '!1|(2&3)'::tsquery;
+     tsquery      
+------------------
+ !'1' | '2' & '3'
+(1 row)
+
+SELECT '!(1|2)&3'::tsquery;
+       tsquery        
+----------------------
+ !( '1' | '2' ) & '3'
+(1 row)
+
+SELECT '(!1|2)&3'::tsquery;
+       tsquery        
+----------------------
+ ( !'1' | '2' ) & '3'
+(1 row)
+
+SELECT '1|(2|(4|(5|6)))'::tsquery;
+                 tsquery                 
+-----------------------------------------
+ '1' | ( '2' | ( '4' | ( '5' | '6' ) ) )
+(1 row)
+
+SELECT '1|2|4|5|6'::tsquery;
+                 tsquery                 
+-----------------------------------------
+ ( ( ( '1' | '2' ) | '4' ) | '5' ) | '6'
+(1 row)
+
+SELECT '1&(2&(4&(5&6)))'::tsquery;
+           tsquery           
+-----------------------------
+ '1' & '2' & '4' & '5' & '6'
+(1 row)
+
+SELECT '1&2&4&5&6'::tsquery;
+           tsquery           
+-----------------------------
+ '1' & '2' & '4' & '5' & '6'
+(1 row)
+
+SELECT '1&(2&(4&(5|6)))'::tsquery;
+             tsquery             
+---------------------------------
+ '1' & '2' & '4' & ( '5' | '6' )
+(1 row)
+
+SELECT '1&(2&(4&(5|!6)))'::tsquery;
+             tsquery              
+----------------------------------
+ '1' & '2' & '4' & ( '5' | !'6' )
+(1 row)
+
+SELECT '1&(\'2\'&(\' 4\'&(\\|5 | \'6 \\\' !|&\')))'::tsquery;
+                 tsquery                  
+------------------------------------------
+ '1' & '2' & ' 4' & ( '|5' | '6 \' !|&' )
+(1 row)
+
+SELECT '\'the wether\':dc & \' sKies \':BC & a:d b:a';
+                 ?column?                 
+------------------------------------------
+ 'the wether':dc & ' sKies ':BC & a:d b:a
+(1 row)
+
+select lexize('simple', 'ASD56 hsdkf');
+     lexize      
+-----------------
+ {"asd56 hsdkf"}
+(1 row)
+
+select lexize('en_stem', 'SKIES Problems identity');
+          lexize          
+--------------------------
+ {"skies problems ident"}
+(1 row)
+
+select * from token_type('default');
+ tokid |    alias     |               descr               
+-------+--------------+-----------------------------------
+     1 | lword        | Latin word
+     2 | nlword       | Non-latin word
+     3 | word         | Word
+     4 | email        | Email
+     5 | url          | URL
+     6 | host         | Host
+     7 | sfloat       | Scientific notation
+     8 | version      | VERSION
+     9 | part_hword   | Part of hyphenated word
+    10 | nlpart_hword | Non-latin part of hyphenated word
+    11 | lpart_hword  | Latin part of hyphenated word
+    12 | blank        | Space symbols
+    13 | tag          | HTML Tag
+    14 | http         | HTTP head
+    15 | hword        | Hyphenated word
+    16 | lhword       | Latin hyphenated word
+    17 | nlhword      | Non-latin hyphenated word
+    18 | uri          | URI
+    19 | file         | File or path name
+    20 | float        | Decimal notation
+    21 | int          | Signed integer
+    22 | uint         | Unsigned integer
+    23 | entity       | HTML Entity
+(23 rows)
+
+select * from parse('default', '345 [email protected] \' http://www.com/ http://aew.werc.ewr/?ad=qwe&dw 1aew.werc.ewr/?ad=qwe&dw 2aew.werc.ewr http://3aew.werc.ewr/?ad=qwe&dw http://4aew.werc.ewr http://5aew.werc.ewr:8100/?  ad=qwe&dw 6aew.werc.ewr:8100/?ad=qwe&dw 7aew.werc.ewr:8100/?ad=qwe&dw=%20%32 +4.0e-10 qwe qwe qwqwe 234.435 455 5.005 [email protected] qwe-wer asdf qwer jf sdjk ewr1> ewri2 ">
+/usr/local/fff /awdf/dwqe/4325 rewt/ewr wefjn /wqe-324/ewr gist.h gist.h.c gist.c. readline 4.2 4.2. 4.2, readline-4.2 readline-4.2. 234 
+ wow  < jqw <> qwerty');
+ tokid |                token                 
+-------+--------------------------------------
+    22 | 345
+    12 |  
+     4 | [email protected]
+    12 |  
+    12 | '
+    12 |  
+    14 | http://
+     6 | www.com
+    12 | /
+    12 |  
+    14 | http://
+     5 | aew.werc.ewr/?ad=qwe&dw
+     6 | aew.werc.ewr
+    18 | /?ad=qwe&dw
+    12 |  
+     5 | 1aew.werc.ewr/?ad=qwe&dw
+     6 | 1aew.werc.ewr
+    18 | /?ad=qwe&dw
+    12 |  
+     6 | 2aew.werc.ewr
+    12 |  
+    14 | http://
+     5 | 3aew.werc.ewr/?ad=qwe&dw
+     6 | 3aew.werc.ewr
+    18 | /?ad=qwe&dw
+    12 |  
+    14 | http://
+     6 | 4aew.werc.ewr
+    12 |  
+    14 | http://
+     5 | 5aew.werc.ewr:8100/?
+     6 | 5aew.werc.ewr
+    18 | :8100/?
+    12 |   
+     1 | ad
+    12 | =
+     1 | qwe
+    12 | &
+     1 | dw
+    12 |  
+     5 | 6aew.werc.ewr:8100/?ad=qwe&dw
+     6 | 6aew.werc.ewr
+    18 | :8100/?ad=qwe&dw
+    12 |  
+     5 | 7aew.werc.ewr:8100/?ad=qwe&dw=%20%32
+     6 | 7aew.werc.ewr
+    18 | :8100/?ad=qwe&dw=%20%32
+    12 |  
+     7 | +4.0e-10
+    12 |  
+     1 | qwe
+    12 |  
+     1 | qwe
+    12 |  
+     1 | qwqwe
+    12 |  
+    20 | 234.435
+    12 |  
+    22 | 455
+    12 |  
+    20 | 5.005
+    12 |  
+     4 | [email protected]
+    12 |  
+    16 | qwe-wer
+    11 | qwe
+    12 | -
+    11 | wer
+    12 |  
+     1 | asdf
+    12 |  
+    13 |  
+     1 | qwer
+    12 |  
+     1 | jf
+    12 |  
+     1 | sdjk
+    13 |  
+    12 |  
+     3 | ewr1
+    12 | >
+    12 |  
+     3 | ewri2
+    12 |  
+    13 |  
+    12 | 
+
+    19 | /usr/local/fff
+    12 |  
+    19 | /awdf/dwqe/4325
+    12 |  
+    19 | rewt/ewr
+    12 |  
+     1 | wefjn
+    12 |  
+    19 | /wqe-324/ewr
+    12 |  
+     6 | gist.h
+    12 |  
+     6 | gist.h.c
+    12 |  
+     6 | gist.c
+    12 | .
+    12 |  
+     1 | readline
+    12 |  
+    20 | 4.2
+    12 |  
+    20 | 4.2
+    12 | .
+    12 |  
+    20 | 4.2
+    12 | ,
+    12 |  
+    15 | readline-4
+    11 | readline
+    12 | -
+    20 | 4.2
+    12 |  
+    15 | readline-4
+    11 | readline
+    12 | -
+    20 | 4.2
+    12 | .
+    12 |  
+    22 | 234
+    12 |  
+
+    13 |  
+    12 |  
+     1 | wow
+    12 |   
+    12 | <
+    12 |  
+     1 | jqw
+    12 |  
+    12 | <
+    12 | >
+    12 |  
+     1 | qwerty
+(138 rows)
+
+SELECT to_tsvector('default', '345 [email protected] \' http://www.com/ http://aew.werc.ewr/?ad=qwe&dw 1aew.werc.ewr/?ad=qwe&dw 2aew.werc.ewr http://3aew.werc.ewr/?ad=qwe&dw http://4aew.werc.ewr http://5aew.werc.ewr:8100/?  ad=qwe&dw 6aew.werc.ewr:8100/?ad=qwe&dw 7aew.werc.ewr:8100/?ad=qwe&dw=%20%32 +4.0e-10 qwe qwe qwqwe 234.435 455 5.005 [email protected] qwe-wer asdf qwer jf sdjk ewr1> ewri2 ">
+/usr/local/fff /awdf/dwqe/4325 rewt/ewr wefjn /wqe-324/ewr gist.h gist.h.c gist.c. readline 4.2 4.2. 4.2, readline-4.2 readline-4.2. 234 
+ wow  < jqw <> qwerty');
+                                                                                                                                                                                                                                                                                                                                                                                                                                               to_tsvector                                                                                                                                                                                                                                                                                                                                                                                                                                                
+----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
+ 'ad':18 'dw':20 'jf':40 '234':62 '345':1 '4.2':53,54,55,58,61 '455':32 'jqw':64 'qwe':19,28,29,36 'wer':37 'wow':63 'asdf':38 'ewr1':42 'qwer':39 'sdjk':41 '5.005':33 'ewri2':43 'qwqwe':30 'wefjn':47 'gist.c':51 'gist.h':49 'qwerti':65 '234.435':31 ':8100/?':17 'qwe-wer':35 'readlin':52,57,60 'www.com':3 '+4.0e-10':27 'gist.h.c':50 'rewt/ewr':46 '[email protected]':2 'readline-4':56,59 '/?ad=qwe&dw':6,9,13 '/wqe-324/ewr':48 'aew.werc.ewr':5 '1aew.werc.ewr':8 '2aew.werc.ewr':10 '3aew.werc.ewr':12 '4aew.werc.ewr':14 '5aew.werc.ewr':16 '6aew.werc.ewr':22 '7aew.werc.ewr':25 '/usr/local/fff':44 '/awdf/dwqe/4325':45 ':8100/?ad=qwe&dw':23 '[email protected]':34 '5aew.werc.ewr:8100/?':15 ':8100/?ad=qwe&dw=%20%32':26 'aew.werc.ewr/?ad=qwe&dw':4 '1aew.werc.ewr/?ad=qwe&dw':7 '3aew.werc.ewr/?ad=qwe&dw':11 '6aew.werc.ewr:8100/?ad=qwe&dw':21 '7aew.werc.ewr:8100/?ad=qwe&dw=%20%32':24
+(1 row)
+
+SELECT length(to_tsvector('default', '345 qw'));
+ length 
+--------
+      2
+(1 row)
+
+SELECT length(to_tsvector('default', '345 [email protected] \' http://www.com/ http://aew.werc.ewr/?ad=qwe&dw 1aew.werc.ewr/?ad=qwe&dw 2aew.werc.ewr http://3aew.werc.ewr/?ad=qwe&dw http://4aew.werc.ewr http://5aew.werc.ewr:8100/?  ad=qwe&dw 6aew.werc.ewr:8100/?ad=qwe&dw 7aew.werc.ewr:8100/?ad=qwe&dw=%20%32 +4.0e-10 qwe qwe qwqwe 234.435 455 5.005 [email protected] qwe-wer asdf qwer jf sdjk ewr1> ewri2 ">
+/usr/local/fff /awdf/dwqe/4325 rewt/ewr wefjn /wqe-324/ewr gist.h gist.h.c gist.c. readline 4.2 4.2. 4.2, readline-4.2 readline-4.2. 234 
+ wow  < jqw <> qwerty'));
+ length 
+--------
+     53
+(1 row)
+
+select to_tsquery('default', 'qwe & sKies '); 
+  to_tsquery   
+---------------
+ 'qwe' & 'sky'
+(1 row)
+
+select to_tsquery('simple', 'qwe & sKies '); 
+   to_tsquery    
+-----------------
+ 'qwe' & 'skies'
+(1 row)
+
+select to_tsquery('default', '\'the wether\':dc & \'           sKies \':BC ');
+       to_tsquery       
+------------------------
+ 'wether':CD & 'sky':BC
+(1 row)
+
+select 'a b:89  ca:23A,64b d:34c'::tsvector @@ 'd:AC & ca';
+ ?column? 
+----------
+ t
+(1 row)
+
+select 'a b:89  ca:23A,64b d:34c'::tsvector @@ 'd:AC & ca:B';
+ ?column? 
+----------
+ t
+(1 row)
+
+select 'a b:89  ca:23A,64b d:34c'::tsvector @@ 'd:AC & ca:A';
+ ?column? 
+----------
+ t
+(1 row)
+
+select 'a b:89  ca:23A,64b d:34c'::tsvector @@ 'd:AC & ca:C';
+ ?column? 
+----------
+ f
+(1 row)
+
+select 'a b:89  ca:23A,64b d:34c'::tsvector @@ 'd:AC & ca:CB';
+ ?column? 
+----------
+ t
+(1 row)
+
+CREATE TABLE test_tsvector( t text, a tsvector );
+\copy test_tsvector from 'data/test_tsearch.data'
+SELECT count(*) FROM test_tsvector WHERE a @@ 'wr|qh';
+ count 
+-------
+   158
+(1 row)
+
+SELECT count(*) FROM test_tsvector WHERE a @@ 'wr&qh';
+ count 
+-------
+    17
+(1 row)
+
+SELECT count(*) FROM test_tsvector WHERE a @@ 'eq&yt';
+ count 
+-------
+     6
+(1 row)
+
+SELECT count(*) FROM test_tsvector WHERE a @@ 'eq|yt';
+ count 
+-------
+    98
+(1 row)
+
+SELECT count(*) FROM test_tsvector WHERE a @@ '(eq&yt)|(wr&qh)';
+ count 
+-------
+    23
+(1 row)
+
+SELECT count(*) FROM test_tsvector WHERE a @@ '(eq|yt)&(wr|qh)';
+ count 
+-------
+    39
+(1 row)
+
+create index wowidx on test_tsvector using gist (a);
+set enable_seqscan=off;
+SELECT count(*) FROM test_tsvector WHERE a @@ 'wr|qh';
+ count 
+-------
+   158
+(1 row)
+
+SELECT count(*) FROM test_tsvector WHERE a @@ 'wr&qh';
+ count 
+-------
+    17
+(1 row)
+
+SELECT count(*) FROM test_tsvector WHERE a @@ 'eq&yt';
+ count 
+-------
+     6
+(1 row)
+
+SELECT count(*) FROM test_tsvector WHERE a @@ 'eq|yt';
+ count 
+-------
+    98
+(1 row)
+
+SELECT count(*) FROM test_tsvector WHERE a @@ '(eq&yt)|(wr&qh)';
+ count 
+-------
+    23
+(1 row)
+
+SELECT count(*) FROM test_tsvector WHERE a @@ '(eq|yt)&(wr|qh)';
+ count 
+-------
+    39
+(1 row)
+
+select set_curcfg('default');
+ set_curcfg 
+------------
+ 
+(1 row)
+
+CREATE TRIGGER tsvectorupdate
+BEFORE UPDATE OR INSERT ON test_tsvector
+FOR EACH ROW EXECUTE PROCEDURE tsearch2(a, t);
+SELECT count(*) FROM test_tsvector WHERE a @@ to_tsquery('345&qwerty');
+ count 
+-------
+     0
+(1 row)
+
+INSERT INTO test_tsvector (t) VALUES ('345 qwerty');
+SELECT count(*) FROM test_tsvector WHERE a @@ to_tsquery('345&qwerty');
+ count 
+-------
+     1
+(1 row)
+
+UPDATE test_tsvector SET t = null WHERE t = '345 qwerty';
+SELECT count(*) FROM test_tsvector WHERE a @@ to_tsquery('345&qwerty');
+ count 
+-------
+     0
+(1 row)
+
+drop trigger tsvectorupdate on test_tsvector;
+create function wow(text) returns text as 'select $1 || \' copyright\'; ' language sql;
+create trigger tsvectorupdate before update or insert on test_tsvector
+for each row execute procedure tsearch2(a, wow, t);
+insert into test_tsvector (t) values ('345 qwerty');
+select count(*) FROM test_tsvector WHERE a @@ to_tsquery('345&qwerty');
+ count 
+-------
+     1
+(1 row)
+
+select count(*) FROM test_tsvector WHERE a @@ to_tsquery('copyright');
+ count 
+-------
+     1
+(1 row)
+
+select rank(' a:1 s:2C d g'::tsvector, 'a | s');
+ rank 
+------
+ 0.28
+(1 row)
+
+select rank(' a:1 s:2B d g'::tsvector, 'a | s');
+ rank 
+------
+ 0.46
+(1 row)
+
+select rank(' a:1 s:2 d g'::tsvector, 'a | s');
+ rank 
+------
+ 0.19
+(1 row)
+
+select rank(' a:1 s:2C d g'::tsvector, 'a & s');
+   rank   
+----------
+ 0.140153
+(1 row)
+
+select rank(' a:1 s:2B d g'::tsvector, 'a & s');
+   rank   
+----------
+ 0.198206
+(1 row)
+
+select rank(' a:1 s:2 d g'::tsvector, 'a & s');
+   rank    
+-----------
+ 0.0991032
+(1 row)
+
+insert into test_tsvector (t) values ('foo bar foo the over foo qq bar');
+select * from stat('select a from test_tsvector') order by ndoc desc, nentry desc, word;
+   word    | ndoc | nentry 
+-----------+------+--------
+ qq        |  109 |    109
+ qt        |  102 |    102
+ qe        |  100 |    100
+ qh        |   98 |     98
+ qw        |   98 |     98
+ qa        |   97 |     97
+ ql        |   94 |     94
+ qs        |   94 |     94
+ qi        |   92 |     92
+ qr        |   92 |     92
+ qj        |   91 |     91
+ qd        |   87 |     87
+ qz        |   87 |     87
+ qc        |   86 |     86
+ qn        |   86 |     86
+ qv        |   85 |     85
+ qo        |   84 |     84
+ qy        |   84 |     84
+ wp        |   84 |     84
+ qf        |   81 |     81
+ qk        |   80 |     80
+ wt        |   80 |     80
+ qu        |   79 |     79
+ qg        |   78 |     78
+ wb        |   78 |     78
+ qx        |   77 |     77
+ wr        |   77 |     77
+ ws        |   73 |     73
+ wy        |   73 |     73
+ wa        |   72 |     72
+ wf        |   70 |     70
+ wg        |   70 |     70
+ wi        |   70 |     70
+ wu        |   70 |     70
+ wc        |   69 |     69
+ wj        |   69 |     69
+ qp        |   68 |     68
+ wh        |   68 |     68
+ wv        |   68 |     68
+ qb        |   66 |     66
+ eu        |   65 |     65
+ we        |   65 |     65
+ wl        |   65 |     65
+ wq        |   65 |     65
+ wk        |   64 |     64
+ ee        |   63 |     63
+ eo        |   63 |     63
+ qm        |   63 |     63
+ wn        |   63 |     63
+ ef        |   62 |     62
+ eh        |   62 |     62
+ ex        |   62 |     62
+ re        |   62 |     62
+ rl        |   62 |     62
+ rr        |   62 |     62
+ eb        |   61 |     61
+ ek        |   61 |     61
+ ww        |   61 |     61
+ ea        |   60 |     60
+ ei        |   60 |     60
+ em        |   60 |     60
+ eq        |   60 |     60
+ ew        |   60 |     60
+ ro        |   60 |     60
+ rw        |   60 |     60
+ tl        |   60 |     60
+ eg        |   59 |     59
+ en        |   59 |     59
+ ez        |   59 |     59
+ rj        |   59 |     59
+ ry        |   59 |     59
+ tw        |   59 |     59
+ tx        |   59 |     59
+ ej        |   58 |     58
+ es        |   58 |     58
+ ra        |   58 |     58
+ rd        |   58 |     58
+ rg        |   58 |     58
+ rx        |   58 |     58
+ tb        |   58 |     58
+ wd        |   58 |     58
+ ed        |   57 |     57
+ tc        |   57 |     57
+ wx        |   57 |     57
+ er        |   56 |     56
+ wm        |   56 |     56
+ wo        |   56 |     56
+ yw        |   56 |     56
+ ep        |   55 |     55
+ rk        |   55 |     55
+ rp        |   55 |     55
+ rz        |   55 |     55
+ ta        |   55 |     55
+ rq        |   54 |     54
+ yn        |   54 |     54
+ ec        |   53 |     53
+ el        |   53 |     53
+ ru        |   53 |     53
+ rv        |   53 |     53
+ tz        |   53 |     53
+ un        |   53 |     53
+ wz        |   53 |     53
+ ys        |   53 |     53
+ oe        |   52 |     52
+ tn        |   52 |     52
+ tq        |   52 |     52
+ ty        |   52 |     52
+ uq        |   52 |     52
+ yg        |   52 |     52
+ ym        |   52 |     52
+ oi        |   51 |     51
+ to        |   51 |     51
+ yi        |   51 |     51
+ pn        |   50 |     50
+ rb        |   50 |     50
+ ri        |   50 |     50
+ rn        |   50 |     50
+ ti        |   50 |     50
+ tv        |   50 |     50
+ um        |   50 |     50
+ ut        |   50 |     50
+ ya        |   50 |     50
+ et        |   49 |     49
+ ix        |   49 |     49
+ ox        |   49 |     49
+ q3        |   49 |     49
+ yf        |   49 |     49
+ yl        |   49 |     49
+ yo        |   49 |     49
+ yr        |   49 |     49
+ ev        |   48 |     48
+ ey        |   48 |     48
+ ot        |   48 |     48
+ rc        |   48 |     48
+ rm        |   48 |     48
+ th        |   48 |     48
+ uo        |   48 |     48
+ ia        |   47 |     47
+ q1        |   47 |     47
+ rh        |   47 |     47
+ yq        |   47 |     47
+ yz        |   47 |     47
+ av        |   46 |     46
+ im        |   46 |     46
+ os        |   46 |     46
+ tk        |   46 |     46
+ yy        |   46 |     46
+ ir        |   45 |     45
+ iv        |   45 |     45
+ iw        |   45 |     45
+ oj        |   45 |     45
+ pl        |   45 |     45
+ pv        |   45 |     45
+ te        |   45 |     45
+ tu        |   45 |     45
+ uv        |   45 |     45
+ ux        |   45 |     45
+ yd        |   45 |     45
+ yx        |   45 |     45
+ ij        |   44 |     44
+ pa        |   44 |     44
+ se        |   44 |     44
+ tg        |   44 |     44
+ ue        |   44 |     44
+ yb        |   44 |     44
+ yt        |   44 |     44
+ if        |   43 |     43
+ ik        |   43 |     43
+ in        |   43 |     43
+ ph        |   43 |     43
+ pj        |   43 |     43
+ q5        |   43 |     43
+ rt        |   43 |     43
+ ub        |   43 |     43
+ ud        |   43 |     43
+ uh        |   43 |     43
+ uj        |   43 |     43
+ w7        |   43 |     43
+ ye        |   43 |     43
+ yv        |   43 |     43
+ db        |   42 |     42
+ do        |   42 |     42
+ id        |   42 |     42
+ ie        |   42 |     42
+ ii        |   42 |     42
+ of        |   42 |     42
+ pr        |   42 |     42
+ q4        |   42 |     42
+ rf        |   42 |     42
+ td        |   42 |     42
+ uk        |   42 |     42
+ up        |   42 |     42
+ yh        |   42 |     42
+ yk        |   42 |     42
+ io        |   41 |     41
+ it        |   41 |     41
+ pb        |   41 |     41
+ q0        |   41 |     41
+ q7        |   41 |     41
+ rs        |   41 |     41
+ tj        |   41 |     41
+ ur        |   41 |     41
+ ig        |   40 |     40
+ iu        |   40 |     40
+ iy        |   40 |     40
+ od        |   40 |     40
+ q6        |   40 |     40
+ tt        |   40 |     40
+ ug        |   40 |     40
+ ul        |   40 |     40
+ us        |   40 |     40
+ uu        |   40 |     40
+ uz        |   40 |     40
+ ah        |   39 |     39
+ ar        |   39 |     39
+ as        |   39 |     39
+ dl        |   39 |     39
+ dt        |   39 |     39
+ hk        |   39 |     39
+ iq        |   39 |     39
+ is        |   39 |     39
+ oc        |   39 |     39
+ ov        |   39 |     39
+ oy        |   39 |     39
+ uf        |   39 |     39
+ ui        |   39 |     39
+ aa        |   38 |     38
+ ad        |   38 |     38
+ fh        |   38 |     38
+ gm        |   38 |     38
+ ic        |   38 |     38
+ jd        |   38 |     38
+ om        |   38 |     38
+ or        |   38 |     38
+ oz        |   38 |     38
+ pm        |   38 |     38
+ q8        |   38 |     38
+ sf        |   38 |     38
+ sm        |   38 |     38
+ sv        |   38 |     38
+ uc        |   38 |     38
+ ak        |   37 |     37
+ aq        |   37 |     37
+ di        |   37 |     37
+ e4        |   37 |     37
+ fi        |   37 |     37
+ fx        |   37 |     37
+ ha        |   37 |     37
+ hp        |   37 |     37
+ ih        |   37 |     37
+ og        |   37 |     37
+ po        |   37 |     37
+ pw        |   37 |     37
+ sn        |   37 |     37
+ su        |   37 |     37
+ sw        |   37 |     37
+ w6        |   37 |     37
+ yj        |   37 |     37
+ yu        |   37 |     37
+ ag        |   36 |     36
+ am        |   36 |     36
+ at        |   36 |     36
+ e1        |   36 |     36
+ ff        |   36 |     36
+ gx        |   36 |     36
+ he        |   36 |     36
+ hj        |   36 |     36
+ ib        |   36 |     36
+ iz        |   36 |     36
+ lm        |   36 |     36
+ ok        |   36 |     36
+ pk        |   36 |     36
+ pp        |   36 |     36
+ pu        |   36 |     36
+ sp        |   36 |     36
+ tf        |   36 |     36
+ tm        |   36 |     36
+ ay        |   35 |     35
+ dy        |   35 |     35
+ fu        |   35 |     35
+ ku        |   35 |     35
+ lh        |   35 |     35
+ lq        |   35 |     35
+ o6        |   35 |     35
+ ob        |   35 |     35
+ on        |   35 |     35
+ op        |   35 |     35
+ pd        |   35 |     35
+ ps        |   35 |     35
+ si        |   35 |     35
+ sl        |   35 |     35
+ sx        |   35 |     35
+ tp        |   35 |     35
+ tr        |   35 |     35
+ w3        |   35 |     35
+ y1        |   35 |     35
+ al        |   34 |     34
+ ap        |   34 |     34
+ az        |   34 |     34
+ dc        |   34 |     34
+ dd        |   34 |     34
+ dz        |   34 |     34
+ e0        |   34 |     34
+ fj        |   34 |     34
+ fp        |   34 |     34
+ gd        |   34 |     34
+ gg        |   34 |     34
+ gk        |   34 |     34
+ go        |   34 |     34
+ ho        |   34 |     34
+ jc        |   34 |     34
+ oa        |   34 |     34
+ oh        |   34 |     34
+ oo        |   34 |     34
+ pe        |   34 |     34
+ px        |   34 |     34
+ sd        |   34 |     34
+ sq        |   34 |     34
+ sy        |   34 |     34
+ ab        |   33 |     33
+ ae        |   33 |     33
+ af        |   33 |     33
+ aw        |   33 |     33
+ e5        |   33 |     33
+ fk        |   33 |     33
+ gu        |   33 |     33
+ gy        |   33 |     33
+ hb        |   33 |     33
+ hm        |   33 |     33
+ hy        |   33 |     33
+ jl        |   33 |     33
+ jr        |   33 |     33
+ ls        |   33 |     33
+ oq        |   33 |     33
+ pt        |   33 |     33
+ sa        |   33 |     33
+ sh        |   33 |     33
+ sj        |   33 |     33
+ so        |   33 |     33
+ sz        |   33 |     33
+ t7        |   33 |     33
+ uw        |   33 |     33
+ w8        |   33 |     33
+ y0        |   33 |     33
+ yp        |   33 |     33
+ dh        |   32 |     32
+ dp        |   32 |     32
+ dq        |   32 |     32
+ e7        |   32 |     32
+ fn        |   32 |     32
+ fo        |   32 |     32
+ fr        |   32 |     32
+ ga        |   32 |     32
+ gq        |   32 |     32
+ hh        |   32 |     32
+ il        |   32 |     32
+ ip        |   32 |     32
+ jv        |   32 |     32
+ lc        |   32 |     32
+ ol        |   32 |     32
+ pc        |   32 |     32
+ q9        |   32 |     32
+ ds        |   31 |     31
+ e9        |   31 |     31
+ fd        |   31 |     31
+ fe        |   31 |     31
+ ft        |   31 |     31
+ gs        |   31 |     31
+ hl        |   31 |     31
+ hs        |   31 |     31
+ jb        |   31 |     31
+ kc        |   31 |     31
+ kw        |   31 |     31
+ mj        |   31 |     31
+ q2        |   31 |     31
+ r3        |   31 |     31
+ sb        |   31 |     31
+ sk        |   31 |     31
+ ts        |   31 |     31
+ ua        |   31 |     31
+ yc        |   31 |     31
+ zw        |   31 |     31
+ ao        |   30 |     30
+ du        |   30 |     30
+ fw        |   30 |     30
+ gj        |   30 |     30
+ hu        |   30 |     30
+ kh        |   30 |     30
+ kl        |   30 |     30
+ kv        |   30 |     30
+ ld        |   30 |     30
+ lf        |   30 |     30
+ pq        |   30 |     30
+ py        |   30 |     30
+ sc        |   30 |     30
+ sr        |   30 |     30
+ uy        |   30 |     30
+ vg        |   30 |     30
+ w2        |   30 |     30
+ xg        |   30 |     30
+ xo        |   30 |     30
+ au        |   29 |     29
+ cx        |   29 |     29
+ fv        |   29 |     29
+ gh        |   29 |     29
+ gl        |   29 |     29
+ gt        |   29 |     29
+ hw        |   29 |     29
+ ji        |   29 |     29
+ km        |   29 |     29
+ la        |   29 |     29
+ ou        |   29 |     29
+ r0        |   29 |     29
+ w0        |   29 |     29
+ y9        |   29 |     29
+ zm        |   29 |     29
+ zs        |   29 |     29
+ zy        |   29 |     29
+ ax        |   28 |     28
+ cd        |   28 |     28
+ dj        |   28 |     28
+ dn        |   28 |     28
+ dr        |   28 |     28
+ ht        |   28 |     28
+ jf        |   28 |     28
+ lo        |   28 |     28
+ lr        |   28 |     28
+ na        |   28 |     28
+ ng        |   28 |     28
+ r8        |   28 |     28
+ ss        |   28 |     28
+ xt        |   28 |     28
+ y6        |   28 |     28
+ aj        |   27 |     27
+ ca        |   27 |     27
+ cg        |   27 |     27
+ df        |   27 |     27
+ dg        |   27 |     27
+ dv        |   27 |     27
+ gc        |   27 |     27
+ gn        |   27 |     27
+ gr        |   27 |     27
+ hd        |   27 |     27
+ i8        |   27 |     27
+ jn        |   27 |     27
+ jt        |   27 |     27
+ lp        |   27 |     27
+ o9        |   27 |     27
+ ow        |   27 |     27
+ r9        |   27 |     27
+ t8        |   27 |     27
+ u5        |   27 |     27
+ w4        |   27 |     27
+ xm        |   27 |     27
+ zz        |   27 |     27
+ a2        |   26 |     26
+ ac        |   26 |     26
+ ai        |   26 |     26
+ cm        |   26 |     26
+ cu        |   26 |     26
+ cw        |   26 |     26
+ dk        |   26 |     26
+ e2        |   26 |     26
+ fc        |   26 |     26
+ fg        |   26 |     26
+ fl        |   26 |     26
+ fs        |   26 |     26
+ ge        |   26 |     26
+ gv        |   26 |     26
+ hc        |   26 |     26
+ hi        |   26 |     26
+ hx        |   26 |     26
+ jj        |   26 |     26
+ jm        |   26 |     26
+ kg        |   26 |     26
+ kk        |   26 |     26
+ kn        |   26 |     26
+ ko        |   26 |     26
+ kt        |   26 |     26
+ ln        |   26 |     26
+ mx        |   26 |     26
+ pg        |   26 |     26
+ r4        |   26 |     26
+ t6        |   26 |     26
+ u1        |   26 |     26
+ u4        |   26 |     26
+ vi        |   26 |     26
+ vr        |   26 |     26
+ w1        |   26 |     26
+ w9        |   26 |     26
+ xk        |   26 |     26
+ xs        |   26 |     26
+ zf        |   26 |     26
+ bb        |   25 |     25
+ dm        |   25 |     25
+ dw        |   25 |     25
+ e8        |   25 |     25
+ fb        |   25 |     25
+ gw        |   25 |     25
+ h8        |   25 |     25
+ hf        |   25 |     25
+ hg        |   25 |     25
+ hn        |   25 |     25
+ hv        |   25 |     25
+ i0        |   25 |     25
+ i3        |   25 |     25
+ jg        |   25 |     25
+ jo        |   25 |     25
+ jx        |   25 |     25
+ kq        |   25 |     25
+ lw        |   25 |     25
+ lx        |   25 |     25
+ o3        |   25 |     25
+ p7        |   25 |     25
+ pf        |   25 |     25
+ pi        |   25 |     25
+ pz        |   25 |     25
+ r2        |   25 |     25
+ r5        |   25 |     25
+ t9        |   25 |     25
+ u7        |   25 |     25
+ ve        |   25 |     25
+ vu        |   25 |     25
+ y5        |   25 |     25
+ y8        |   25 |     25
+ zt        |   25 |     25
+ an        |   24 |     24
+ bj        |   24 |     24
+ dx        |   24 |     24
+ fm        |   24 |     24
+ fz        |   24 |     24
+ gb        |   24 |     24
+ gi        |   24 |     24
+ gp        |   24 |     24
+ hr        |   24 |     24
+ hz        |   24 |     24
+ i5        |   24 |     24
+ jq        |   24 |     24
+ kb        |   24 |     24
+ ke        |   24 |     24
+ kf        |   24 |     24
+ kp        |   24 |     24
+ lv        |   24 |     24
+ lz        |   24 |     24
+ o8        |   24 |     24
+ r1        |   24 |     24
+ s7        |   24 |     24
+ sg        |   24 |     24
+ u3        |   24 |     24
+ vj        |   24 |     24
+ vt        |   24 |     24
+ w5        |   24 |     24
+ zj        |   24 |     24
+ be        |   23 |     23
+ bi        |   23 |     23
+ bn        |   23 |     23
+ cn        |   23 |     23
+ cy        |   23 |     23
+ da        |   23 |     23
+ e6        |   23 |     23
+ fa        |   23 |     23
+ js        |   23 |     23
+ ki        |   23 |     23
+ kz        |   23 |     23
+ li        |   23 |     23
+ mt        |   23 |     23
+ mz        |   23 |     23
+ nu        |   23 |     23
+ o2        |   23 |     23
+ p5        |   23 |     23
+ p8        |   23 |     23
+ r7        |   23 |     23
+ t0        |   23 |     23
+ t1        |   23 |     23
+ t3        |   23 |     23
+ vm        |   23 |     23
+ xh        |   23 |     23
+ xx        |   23 |     23
+ zp        |   23 |     23
+ zr        |   23 |     23
+ a3        |   22 |     22
+ bg        |   22 |     22
+ de        |   22 |     22
+ e3        |   22 |     22
+ fq        |   22 |     22
+ i2        |   22 |     22
+ i7        |   22 |     22
+ ja        |   22 |     22
+ jk        |   22 |     22
+ jy        |   22 |     22
+ kr        |   22 |     22
+ kx        |   22 |     22
+ ly        |   22 |     22
+ nb        |   22 |     22
+ nh        |   22 |     22
+ ns        |   22 |     22
+ s3        |   22 |     22
+ u2        |   22 |     22
+ vn        |   22 |     22
+ xe        |   22 |     22
+ y4        |   22 |     22
+ zh        |   22 |     22
+ zo        |   22 |     22
+ zq        |   22 |     22
+ a1        |   21 |     21
+ bl        |   21 |     21
+ bo        |   21 |     21
+ cb        |   21 |     21
+ ch        |   21 |     21
+ co        |   21 |     21
+ cq        |   21 |     21
+ cv        |   21 |     21
+ d7        |   21 |     21
+ g8        |   21 |     21
+ je        |   21 |     21
+ jp        |   21 |     21
+ jz        |   21 |     21
+ lg        |   21 |     21
+ me        |   21 |     21
+ nc        |   21 |     21
+ p4        |   21 |     21
+ st        |   21 |     21
+ vb        |   21 |     21
+ vw        |   21 |     21
+ vz        |   21 |     21
+ xj        |   21 |     21
+ xq        |   21 |     21
+ xu        |   21 |     21
+ xy        |   21 |     21
+ zb        |   21 |     21
+ bv        |   20 |     20
+ bz        |   20 |     20
+ cj        |   20 |     20
+ cp        |   20 |     20
+ cs        |   20 |     20
+ d8        |   20 |     20
+ ju        |   20 |     20
+ k0        |   20 |     20
+ ks        |   20 |     20
+ ky        |   20 |     20
+ l1        |   20 |     20
+ lb        |   20 |     20
+ lj        |   20 |     20
+ lu        |   20 |     20
+ nm        |   20 |     20
+ nw        |   20 |     20
+ nz        |   20 |     20
+ o7        |   20 |     20
+ p6        |   20 |     20
+ vh        |   20 |     20
+ vp        |   20 |     20
+ vs        |   20 |     20
+ xb        |   20 |     20
+ xr        |   20 |     20
+ z3        |   20 |     20
+ zv        |   20 |     20
+ bq        |   19 |     19
+ br        |   19 |     19
+ by        |   19 |     19
+ cl        |   19 |     19
+ d2        |   19 |     19
+ f1        |   19 |     19
+ f4        |   19 |     19
+ gf        |   19 |     19
+ hq        |   19 |     19
+ k9        |   19 |     19
+ ka        |   19 |     19
+ kd        |   19 |     19
+ kj        |   19 |     19
+ md        |   19 |     19
+ mi        |   19 |     19
+ ml        |   19 |     19
+ my        |   19 |     19
+ nj        |   19 |     19
+ ny        |   19 |     19
+ o1        |   19 |     19
+ s4        |   19 |     19
+ s8        |   19 |     19
+ t5        |   19 |     19
+ u0        |   19 |     19
+ xl        |   19 |     19
+ zg        |   19 |     19
+ zi        |   19 |     19
+ a5        |   18 |     18
+ b9        |   18 |     18
+ bh        |   18 |     18
+ bx        |   18 |     18
+ d3        |   18 |     18
+ fy        |   18 |     18
+ g2        |   18 |     18
+ i4        |   18 |     18
+ i6        |   18 |     18
+ i9        |   18 |     18
+ jw        |   18 |     18
+ lk        |   18 |     18
+ mb        |   18 |     18
+ mv        |   18 |     18
+ nd        |   18 |     18
+ nr        |   18 |     18
+ nt        |   18 |     18
+ t2        |   18 |     18
+ xf        |   18 |     18
+ xv        |   18 |     18
+ zc        |   18 |     18
+ zd        |   18 |     18
+ a7        |   17 |     17
+ bc        |   17 |     17
+ bd        |   17 |     17
+ ce        |   17 |     17
+ cf        |   17 |     17
+ cr        |   17 |     17
+ g9        |   17 |     17
+ j0        |   17 |     17
+ j5        |   17 |     17
+ mp        |   17 |     17
+ mr        |   17 |     17
+ mw        |   17 |     17
+ nk        |   17 |     17
+ no        |   17 |     17
+ o0        |   17 |     17
+ o4        |   17 |     17
+ s0        |   17 |     17
+ s1        |   17 |     17
+ t4        |   17 |     17
+ u9        |   17 |     17
+ vf        |   17 |     17
+ vx        |   17 |     17
+ x3        |   17 |     17
+ xi        |   17 |     17
+ xn        |   17 |     17
+ xz        |   17 |     17
+ zl        |   17 |     17
+ zn        |   17 |     17
+ a0        |   16 |     16
+ bu        |   16 |     16
+ bw        |   16 |     16
+ ci        |   16 |     16
+ ck        |   16 |     16
+ d0        |   16 |     16
+ d4        |   16 |     16
+ d6        |   16 |     16
+ f5        |   16 |     16
+ g1        |   16 |     16
+ gz        |   16 |     16
+ h4        |   16 |     16
+ jh        |   16 |     16
+ l4        |   16 |     16
+ lt        |   16 |     16
+ mg        |   16 |     16
+ mh        |   16 |     16
+ mo        |   16 |     16
+ ni        |   16 |     16
+ nl        |   16 |     16
+ nq        |   16 |     16
+ p2        |   16 |     16
+ u8        |   16 |     16
+ v9        |   16 |     16
+ vl        |   16 |     16
+ vo        |   16 |     16
+ xp        |   16 |     16
+ y3        |   16 |     16
+ y7        |   16 |     16
+ z7        |   16 |     16
+ za        |   16 |     16
+ zx        |   16 |     16
+ bf        |   15 |     15
+ bp        |   15 |     15
+ cc        |   15 |     15
+ g0        |   15 |     15
+ j2        |   15 |     15
+ j9        |   15 |     15
+ l6        |   15 |     15
+ le        |   15 |     15
+ ll        |   15 |     15
+ m8        |   15 |     15
+ ma        |   15 |     15
+ mu        |   15 |     15
+ nf        |   15 |     15
+ r6        |   15 |     15
+ s5        |   15 |     15
+ vd        |   15 |     15
+ vk        |   15 |     15
+ xa        |   15 |     15
+ xw        |   15 |     15
+ y2        |   15 |     15
+ z8        |   15 |     15
+ ze        |   15 |     15
+ zu        |   15 |     15
+ a6        |   14 |     14
+ bk        |   14 |     14
+ bt        |   14 |     14
+ c0        |   14 |     14
+ f8        |   14 |     14
+ g3        |   14 |     14
+ g4        |   14 |     14
+ g7        |   14 |     14
+ h6        |   14 |     14
+ h7        |   14 |     14
+ h9        |   14 |     14
+ i1        |   14 |     14
+ k1        |   14 |     14
+ k2        |   14 |     14
+ k6        |   14 |     14
+ k7        |   14 |     14
+ mc        |   14 |     14
+ nn        |   14 |     14
+ p9        |   14 |     14
+ u6        |   14 |     14
+ xd        |   14 |     14
+ z6        |   14 |     14
+ zk        |   14 |     14
+ a4        |   13 |     13
+ a9        |   13 |     13
+ bm        |   13 |     13
+ cz        |   13 |     13
+ f2        |   13 |     13
+ f3        |   13 |     13
+ f6        |   13 |     13
+ g6        |   13 |     13
+ h2        |   13 |     13
+ j1        |   13 |     13
+ k5        |   13 |     13
+ m1        |   13 |     13
+ mf        |   13 |     13
+ mq        |   13 |     13
+ np        |   13 |     13
+ nx        |   13 |     13
+ o5        |   13 |     13
+ p0        |   13 |     13
+ p1        |   13 |     13
+ s6        |   13 |     13
+ s9        |   13 |     13
+ v6        |   13 |     13
+ va        |   13 |     13
+ vc        |   13 |     13
+ xc        |   13 |     13
+ z0        |   13 |     13
+ c9        |   12 |     12
+ d1        |   12 |     12
+ h0        |   12 |     12
+ h1        |   12 |     12
+ j8        |   12 |     12
+ k4        |   12 |     12
+ l5        |   12 |     12
+ l9        |   12 |     12
+ m2        |   12 |     12
+ m6        |   12 |     12
+ m9        |   12 |     12
+ n7        |   12 |     12
+ nv        |   12 |     12
+ p3        |   12 |     12
+ vq        |   12 |     12
+ vy        |   12 |     12
+ x1        |   12 |     12
+ x2        |   12 |     12
+ z5        |   12 |     12
+ c1        |   11 |     11
+ c3        |   11 |     11
+ ct        |   11 |     11
+ f9        |   11 |     11
+ g5        |   11 |     11
+ j6        |   11 |     11
+ l8        |   11 |     11
+ n1        |   11 |     11
+ v7        |   11 |     11
+ vv        |   11 |     11
+ x5        |   11 |     11
+ x8        |   11 |     11
+ z2        |   11 |     11
+ b0        |   10 |     10
+ b2        |   10 |     10
+ b8        |   10 |     10
+ c6        |   10 |     10
+ f0        |   10 |     10
+ f7        |   10 |     10
+ h5        |   10 |     10
+ j3        |   10 |     10
+ j4        |   10 |     10
+ j7        |   10 |     10
+ l7        |   10 |     10
+ m0        |   10 |     10
+ m7        |   10 |     10
+ mm        |   10 |     10
+ mn        |   10 |     10
+ n8        |   10 |     10
+ v1        |   10 |     10
+ x0        |   10 |     10
+ x6        |   10 |     10
+ x7        |   10 |     10
+ x9        |   10 |     10
+ a8        |    9 |      9
+ b1        |    9 |      9
+ b4        |    9 |      9
+ b5        |    9 |      9
+ b6        |    9 |      9
+ ba        |    9 |      9
+ bs        |    9 |      9
+ c5        |    9 |      9
+ d5        |    9 |      9
+ k8        |    9 |      9
+ l0        |    9 |      9
+ m5        |    9 |      9
+ mk        |    9 |      9
+ ms        |    9 |      9
+ n3        |    9 |      9
+ n4        |    9 |      9
+ n6        |    9 |      9
+ ne        |    9 |      9
+ v0        |    9 |      9
+ v3        |    9 |      9
+ v5        |    9 |      9
+ v8        |    9 |      9
+ b3        |    8 |      8
+ b7        |    8 |      8
+ c2        |    8 |      8
+ c7        |    8 |      8
+ c8        |    8 |      8
+ d9        |    8 |      8
+ k3        |    8 |      8
+ l3        |    8 |      8
+ m3        |    8 |      8
+ m4        |    8 |      8
+ n0        |    8 |      8
+ n5        |    8 |      8
+ v4        |    8 |      8
+ x4        |    8 |      8
+ z1        |    8 |      8
+ z9        |    8 |      8
+ l2        |    7 |      7
+ s2        |    7 |      7
+ z4        |    7 |      7
+ 1l        |    6 |      6
+ 1o        |    6 |      6
+ 1t        |    6 |      6
+ 2e        |    6 |      6
+ 2o        |    6 |      6
+ c4        |    6 |      6
+ h3        |    6 |      6
+ n2        |    6 |      6
+ n9        |    6 |      6
+ v2        |    6 |      6
+ 2l        |    5 |      5
+ 2u        |    5 |      5
+ 3k        |    5 |      5
+ 4p        |    5 |      5
+ 18        |    4 |      4
+ 1a        |    4 |      4
+ 1i        |    4 |      4
+ 2s        |    4 |      4
+ 3q        |    4 |      4
+ 3y        |    4 |      4
+ 5y        |    4 |      4
+ 1f        |    3 |      3
+ 1h        |    3 |      3
+ 1m        |    3 |      3
+ 1p        |    3 |      3
+ 1s        |    3 |      3
+ 1v        |    3 |      3
+ 1x        |    3 |      3
+ 27        |    3 |      3
+ 2a        |    3 |      3
+ 2b        |    3 |      3
+ 2h        |    3 |      3
+ 2n        |    3 |      3
+ 2p        |    3 |      3
+ 2v        |    3 |      3
+ 2y        |    3 |      3
+ 3d        |    3 |      3
+ 3w        |    3 |      3
+ 3z        |    3 |      3
+ 4a        |    3 |      3
+ 4d        |    3 |      3
+ 4v        |    3 |      3
+ 4z        |    3 |      3
+ 5e        |    3 |      3
+ 5i        |    3 |      3
+ 5k        |    3 |      3
+ 5o        |    3 |      3
+ 5t        |    3 |      3
+ 6b        |    3 |      3
+ 6d        |    3 |      3
+ 6o        |    3 |      3
+ 6w        |    3 |      3
+ 7a        |    3 |      3
+ 7h        |    3 |      3
+ 7r        |    3 |      3
+ 93        |    3 |      3
+ 10        |    2 |      2
+ 12        |    2 |      2
+ 15        |    2 |      2
+ 16        |    2 |      2
+ 19        |    2 |      2
+ 1b        |    2 |      2
+ 1d        |    2 |      2
+ 1g        |    2 |      2
+ 1j        |    2 |      2
+ 1n        |    2 |      2
+ 1r        |    2 |      2
+ 1u        |    2 |      2
+ 1w        |    2 |      2
+ 1y        |    2 |      2
+ 20        |    2 |      2
+ 25        |    2 |      2
+ 2d        |    2 |      2
+ 2i        |    2 |      2
+ 2j        |    2 |      2
+ 2k        |    2 |      2
+ 2q        |    2 |      2
+ 2r        |    2 |      2
+ 2t        |    2 |      2
+ 2w        |    2 |      2
+ 2z        |    2 |      2
+ 3b        |    2 |      2
+ 3f        |    2 |      2
+ 3h        |    2 |      2
+ 3o        |    2 |      2
+ 3p        |    2 |      2
+ 3r        |    2 |      2
+ 3s        |    2 |      2
+ 3v        |    2 |      2
+ 42        |    2 |      2
+ 43        |    2 |      2
+ 4f        |    2 |      2
+ 4g        |    2 |      2
+ 4h        |    2 |      2
+ 4j        |    2 |      2
+ 4m        |    2 |      2
+ 4r        |    2 |      2
+ 4s        |    2 |      2
+ 4t        |    2 |      2
+ 4u        |    2 |      2
+ 5c        |    2 |      2
+ 5f        |    2 |      2
+ 5h        |    2 |      2
+ 5p        |    2 |      2
+ 5q        |    2 |      2
+ 5z        |    2 |      2
+ 6a        |    2 |      2
+ 6h        |    2 |      2
+ 6q        |    2 |      2
+ 6r        |    2 |      2
+ 6t        |    2 |      2
+ 6y        |    2 |      2
+ 70        |    2 |      2
+ 7c        |    2 |      2
+ 7g        |    2 |      2
+ 7k        |    2 |      2
+ 7o        |    2 |      2
+ 7u        |    2 |      2
+ 8j        |    2 |      2
+ 8w        |    2 |      2
+ 9f        |    2 |      2
+ 9y        |    2 |      2
+ copyright |    2 |      2
+ foo       |    1 |      3
+ bar       |    1 |      2
+ 0e        |    1 |      1
+ 0h        |    1 |      1
+ 0p        |    1 |      1
+ 0w        |    1 |      1
+ 0z        |    1 |      1
+ 11        |    1 |      1
+ 13        |    1 |      1
+ 14        |    1 |      1
+ 17        |    1 |      1
+ 1k        |    1 |      1
+ 1q        |    1 |      1
+ 1z        |    1 |      1
+ 24        |    1 |      1
+ 26        |    1 |      1
+ 28        |    1 |      1
+ 2f        |    1 |      1
+ 30        |    1 |      1
+ 345       |    1 |      1
+ 37        |    1 |      1
+ 39        |    1 |      1
+ 3a        |    1 |      1
+ 3e        |    1 |      1
+ 3g        |    1 |      1
+ 3i        |    1 |      1
+ 3m        |    1 |      1
+ 3t        |    1 |      1
+ 3u        |    1 |      1
+ 40        |    1 |      1
+ 41        |    1 |      1
+ 44        |    1 |      1
+ 45        |    1 |      1
+ 48        |    1 |      1
+ 4b        |    1 |      1
+ 4c        |    1 |      1
+ 4i        |    1 |      1
+ 4k        |    1 |      1
+ 4n        |    1 |      1
+ 4o        |    1 |      1
+ 4q        |    1 |      1
+ 4w        |    1 |      1
+ 4y        |    1 |      1
+ 51        |    1 |      1
+ 55        |    1 |      1
+ 56        |    1 |      1
+ 5a        |    1 |      1
+ 5d        |    1 |      1
+ 5g        |    1 |      1
+ 5j        |    1 |      1
+ 5l        |    1 |      1
+ 5s        |    1 |      1
+ 5u        |    1 |      1
+ 5x        |    1 |      1
+ 64        |    1 |      1
+ 68        |    1 |      1
+ 6c        |    1 |      1
+ 6f        |    1 |      1
+ 6g        |    1 |      1
+ 6i        |    1 |      1
+ 6k        |    1 |      1
+ 6n        |    1 |      1
+ 6p        |    1 |      1
+ 6s        |    1 |      1
+ 6u        |    1 |      1
+ 6x        |    1 |      1
+ 72        |    1 |      1
+ 7f        |    1 |      1
+ 7j        |    1 |      1
+ 7n        |    1 |      1
+ 7p        |    1 |      1
+ 7w        |    1 |      1
+ 7y        |    1 |      1
+ 7z        |    1 |      1
+ 80        |    1 |      1
+ 82        |    1 |      1
+ 85        |    1 |      1
+ 8d        |    1 |      1
+ 8i        |    1 |      1
+ 8l        |    1 |      1
+ 8n        |    1 |      1
+ 8p        |    1 |      1
+ 8t        |    1 |      1
+ 8x        |    1 |      1
+ 95        |    1 |      1
+ 97        |    1 |      1
+ 9a        |    1 |      1
+ 9e        |    1 |      1
+ 9h        |    1 |      1
+ 9r        |    1 |      1
+ 9w        |    1 |      1
+ qwerti    |    1 |      1
+(1146 rows)
+
+select reset_tsearch();
+NOTICE:  TSearch cache cleaned
+ reset_tsearch 
+---------------
+ 
+(1 row)
+
+select to_tsquery('default', 'skies & books');
+   to_tsquery   
+----------------
+ 'sky' & 'book'
+(1 row)
+
+select rank_cd(to_tsvector('Erosion It took the sea a thousand years,
+A thousand years to trace
+The granite features of this cliff
+In crag and scarp and base.
+It took the sea an hour one night
+An hour of storm to place
+The sculpture of these granite seams,
+Upon a woman s face. E.  J.  Pratt  (1882 1964)
+'), to_tsquery('sea&thousand&years'));
+ rank_cd 
+---------
+     1.2
+(1 row)
+
+select rank_cd(to_tsvector('Erosion It took the sea a thousand years,
+A thousand years to trace
+The granite features of this cliff
+In crag and scarp and base.
+It took the sea an hour one night
+An hour of storm to place
+The sculpture of these granite seams,
+Upon a woman s face. E.  J.  Pratt  (1882 1964)
+'), to_tsquery('granite&sea'));
+ rank_cd  
+----------
+ 0.880303
+(1 row)
+
+select rank_cd(to_tsvector('Erosion It took the sea a thousand years,
+A thousand years to trace
+The granite features of this cliff
+In crag and scarp and base.
+It took the sea an hour one night
+An hour of storm to place
+The sculpture of these granite seams,
+Upon a woman s face. E.  J.  Pratt  (1882 1964)
+'), to_tsquery('sea'));
+ rank_cd 
+---------
+       2
+(1 row)
+
+select get_covers(to_tsvector('Erosion It took the sea a thousand years,
+A thousand years to trace
+The granite features of this cliff
+In crag and scarp and base.
+It took the sea an hour one night
+An hour of storm to place
+The sculpture of these granite seams,
+Upon a woman s face. E.  J.  Pratt  (1882 1964)
+'), to_tsquery('sea&thousand&years'));
+                                                                                             get_covers                                                                                             
+----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
+ eros took {1 sea thousand year }1 {2 thousand year trace granit featur cliff crag scarp base took sea }2 hour one night hour storm place sculptur granit seam upon woman face e j pratt 1882 1964 
+(1 row)
+
+select get_covers(to_tsvector('Erosion It took the sea a thousand years,
+A thousand years to trace
+The granite features of this cliff
+In crag and scarp and base.
+It took the sea an hour one night
+An hour of storm to place
+The sculpture of these granite seams,
+Upon a woman s face. E.  J.  Pratt  (1882 1964)
+'), to_tsquery('granite&sea'));
+                                                                                                get_covers                                                                                                
+----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
+ eros took {1 sea thousand year thousand year trace {2 granit }1 featur cliff crag scarp base took {3 sea }2 hour one night hour storm place sculptur granit }3 seam upon woman face e j pratt 1882 1964 
+(1 row)
+
+select get_covers(to_tsvector('Erosion It took the sea a thousand years,
+A thousand years to trace
+The granite features of this cliff
+In crag and scarp and base.
+It took the sea an hour one night
+An hour of storm to place
+The sculpture of these granite seams,
+Upon a woman s face. E.  J.  Pratt  (1882 1964)
+'), to_tsquery('sea'));
+                                                                                             get_covers                                                                                             
+----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
+ eros took {1 sea }1 thousand year thousand year trace granit featur cliff crag scarp base took {2 sea }2 hour one night hour storm place sculptur granit seam upon woman face e j pratt 1882 1964 
+(1 row)
+
+select headline('Erosion It took the sea a thousand years,
+A thousand years to trace
+The granite features of this cliff
+In crag and scarp and base.
+It took the sea an hour one night
+An hour of storm to place
+The sculpture of these granite seams,
+Upon a woman s face. E.  J.  Pratt  (1882 1964)
+', to_tsquery('sea&thousand&years'));
+                                                       headline                                                        
+-----------------------------------------------------------------------------------------------------------------------
+ sea a thousand years,
+A thousand years to trace
+The granite features of this cliff
+(1 row)
+
+ 
+select headline('Erosion It took the sea a thousand years,
+A thousand years to trace
+The granite features of this cliff
+In crag and scarp and base.
+It took the sea an hour one night
+An hour of storm to place
+The sculpture of these granite seams,
+Upon a woman s face. E.  J.  Pratt  (1882 1964)
+', to_tsquery('granite&sea'));
+                                           headline                                           
+----------------------------------------------------------------------------------------------
+ sea an hour one night
+An hour of storm to place
+The sculpture of these granite
+(1 row)
+
+ 
+select headline('Erosion It took the sea a thousand years,
+A thousand years to trace
+The granite features of this cliff
+In crag and scarp and base.
+It took the sea an hour one night
+An hour of storm to place
+The sculpture of these granite seams,
+Upon a woman s face. E.  J.  Pratt  (1882 1964)
+', to_tsquery('sea'));
+                                         headline                                          
+-------------------------------------------------------------------------------------------
+ sea a thousand years,
+A thousand years to trace
+The granite features of this cliff
+(1 row)
+


diff --git a/contrib/tsearch2/gendict/Makefile.IN b/contrib/tsearch2/gendict/Makefile.IN

new file mode 100644 (file)

index 0000000..c13e496


--- /dev/null
+++ b/contrib/tsearch2/gendict/Makefile.IN
@@ -0,0 +1,12 @@
+subdir = contrib/CFG_DIR
+top_builddir = ../..
+include $(top_builddir)/src/Makefile.global
+
+MODULE_big = dict_CFG_MODNAME
+OBJS = CFG_OFILE
+DATA_built = dict_CFG_MODNAME.sql
+DOCS = README.CFG_MODNAME
+PG_CPPFLAGS =
+SHLIB_LINK = ../tsearch2/libtsearch2.a
+
+include $(top_srcdir)/contrib/contrib-global.mk


diff --git a/contrib/tsearch2/gendict/README.gendict b/contrib/tsearch2/gendict/README.gendict

new file mode 100644 (file)

index 0000000..e91f1b7


--- /dev/null
+++ b/contrib/tsearch2/gendict/README.gendict
@@ -0,0 +1,130 @@
+Gendict - generate dictionary templates for contrib/tsearch2 module.
+
+This utility aims to help people creating dictionary for contrib/tsearch v2
+module. Particularly, it has built-in support for snowball stemmers.
+
+Programming API to tsearch2 dictionaries is described in tsearch v2 
+documentation.
+
+
+Prerequisities:
+
+* PostgreSQL 7.3 and above.
+
+* You need tsearch2 module sources already compiled
+
+* Rights to install contrib modules
+
+Usage:
+
+    run config.sh without parameters to see options and arguments
+
+Usage:
+./config.sh -n DICTNAME ( [ -s [ -p PREFIX ] ] | [ -c CFILES ] [ -h HFILES ] [ -i ] ) [ -v ] [ -d DIR ] [ -C COMMENT ]
+    -v - be verbose
+    -d DIR - name of directory in PGSQL_SRC/contrib (default dict_DICTNAME)
+    -C COMMENT - dictionary comment
+Generate Snowball stemmer:
+./config.sh -n DICTNAME -s [ -p PREFIX ] [ -v ] [ -d DIR ] [ -C COMMENT ]
+    -s - generate Snowball wrapper
+    -p - prefix of Snowball's function, (default DICTNAME)
+Generate template dictionary:
+./config.sh -n DICTNAME [ -c CFILES ] [ -h HFILES ] [ -i ] [ -v ] [ -d DIR ] [ -C COMMENT ]
+    -c CFILES - source files, must be placed in contrib/tsearch2/gendict directory.
+                These files will be used in Makefile.
+    -h HFILES - header files, must be placed in contrib/tsearch2/gendict directory.
+                These files will be used in Makefile and subinclude.h
+    -i - dictionary has init method
+
+
+Example 1:
+
+   Create Portuguese stemmer
+ 
+   0. cd PGSQL_SRC/contrib/tsearch2/gendict
+
+   1. Obtain stem.{c,h} files for Portuguese
+
+      wget http://snowball.tartarus.org/portuguese/stem.c
+      wget http://snowball.tartarus.org/portuguese/stem.h
+   
+   2. Create template files for Portuguese
+
+      ./config.sh -n pt -s -p portuguese -v -C'Snowball stemmer for Portuguese'
+
+      Note, that argument for -p option should be *the same* as name of stemming
+      function in stem.c (without _stem)
+
+      A bunch of files will be generated and placed in PGSQL_SRC/contrib/dict_pt
+      directory.
+
+   3. Compile and install dictionary
+
+   cd PGSQL_SRC/contrib/dict_pt
+   make
+   make install
+
+   4. Test it 
+
+   Sample portuguese words with the stemmed forms are available
+        from http://snowball.tartarus.org/portuguese/stemmer.html
+
+   createdb testdict
+   psql testdict < /usr/local/pgsql/share/contrib/tsearch2.sql
+   psql testdict < /usr/local/pgsql/share/contrib/dict_pt.sql
+   psql -d testdict -c "select lexize('pt','bobagem');"
+    lexize  
+   ---------
+    {bobag}
+   (1 row)
+
+   Here is what I have in pg_ts_dict table
+
+   psql -d testdict -c "select * from pg_ts_dict where dict_name='pt';"
+    dict_name | dict_init | dict_initoption | dict_lexize |          dict_comment           
+   -----------+-----------+-----------------+-------------+---------------------------------
+    pt        |   7177806 |                 |     7159330 | Snowball stemmer for Portuguese
+   (1 row)
+
+ 
+        Note, that you have already installed dictionary and corresponding
+   entry in tsearch configuration and you may modify it using
+   plain SQL commands, for example, specify stop words.
+
+Example 2:
+
+      a) Simple template dictionary with init method 
+
+       ./config.sh -n wow -v -i -C WOW
+
+      b) Create simple template dict (without init method):
+   ./config.sh -n wow -v  -C WOW
+
+        The same as above, but dictionary will have not init method
+
+       Dictionaries obtained in a) and b) are fully working and ready
+       for use: 
+     a) lowercase input word and remove it if it is a stop word
+     b) recognizes any word
+
+      c) Simple template dictionary with source files (with init method):
+
+       ./config.sh -n wow -v -i -c a.c -h a.h -C WOW
+
+        Source files ( a.c ) must be placed in contrib/tsearch2/gendict directory.
+        These files will be used in Makefile.
+
+        Header files ( a.h ), must be placed in contrib/tsearch2/gendict directory.
+        These files will be used in Makefile and subinclude.h
+
+      d) Simple template dictionary with source files (without init method):
+
+   ./config.sh -n wow -v  -c a.c -h a.h -C WOW
+
+   The same as above, but dictionary will have not init method
+
+       After that you have sources in PGSQL_SRC/contrib/dict_wow and
+       you may edit them to create actual dictionary.
+
+  Please, check Tsearch2 home page (http://www.sai.msu.su/~megera/postgres/gist/tsearch/V2/)
+  for additional information about "Gendict tutorial" and dictionaries.
\ No newline at end of file


diff --git a/contrib/tsearch2/gendict/config.sh b/contrib/tsearch2/gendict/config.sh

new file mode 100755 (executable)

index 0000000..26bb542


--- /dev/null
+++ b/contrib/tsearch2/gendict/config.sh
@@ -0,0 +1,183 @@
+#!/bin/sh
+
+usage () {
+   echo Usage:
+   echo $0 -n DICTNAME  \( [ -s [ -p PREFIX ] ] \| [ -c CFILES ] [ -h HFILES ] [ -i ] \) [ -v ] [ -d DIR ] [ -C COMMENT ]
+   echo '    -v - be verbose'
+   echo '    -d DIR - name of directory in PGSQL_SRL/contrib (default dict_DICTNAME)'
+   echo '    -C COMMENT - dictionary comment' 
+   echo Generate Snowball stemmer:
+   echo $0 -n DICTNAME -s [ -p PREFIX ] [ -v ] [ -d DIR ] [ -C COMMENT ]
+   echo '    -s - generate Snowball wrapper'
+   echo "    -p - prefix of Snowball's function, (default DICTNAME)" 
+   echo Generate template dictionary:
+   echo $0 -n DICTNAME [ -c CFILES ] [ -h HFILES ] [ -i ] [ -v ] [ -d DIR ] [ -C COMMENT ]
+   echo '    -c CFILES - source files, must be placed in contrib/tsearch2/gendict directory.'
+   echo '                These files will be used in Makefile.'
+   echo '    -h HFILES - header files, must be placed in contrib/tsearch2/gendict directory.'
+   echo '                These files will be used in Makefile and subinclude.h'
+   echo '    -i - dictionary has init method'
+   exit 1;
+}
+
+dictname=
+stemmode=no
+verbose=no
+cfile=
+hfile=
+dir= 
+hasinit=no
+comment=
+prefix=
+
+while getopts n:c:C:h:d:p:vis opt
+do
+   case "$opt" in
+       v) verbose=yes;;
+       s) stemmode=yes;;
+       i) hasinit=yes;;
+       n) dictname="$OPTARG";;
+       c) cfile="$OPTARG";;
+       h) hfile="$OPTARG";;
+       d) dir="$OPTARG";;
+       C) comment="$OPTARG";;
+       p) prefix="$OPTARG";;
+       \?) usage;;
+   esac
+done
+
+[ ${#dictname} -eq 0 ] && usage
+
+dictname=`echo $dictname | tr '[:upper:]' '[:lower:]'`
+
+if [ $stemmode = "yes" ] ; then 
+   [ ${#prefix} -eq 0 ] && prefix=$dictname
+   hasinit=yes
+   cfile="stem.c"
+   hfile="stem.h"
+fi 
+
+[ ${#dir}   -eq 0 ] && dir="dict_$dictname"
+
+if [ ${#comment} -eq 0 ]; then
+   comment=null
+else
+   comment="'$comment'"
+fi
+
+ofile=
+for f in $cfile
+do
+   f=` echo $f | sed 's#c$#o#'`
+   ofile="$ofile $f"
+done
+
+if [ $stemmode = "yes" ] ; then
+   ofile="$ofile dict_snowball.o"
+else
+   ofile="$ofile dict_tmpl.o"
+fi
+
+if [ $verbose = "yes" ]; then
+   echo Dictname: "'"$dictname"'"
+   echo Snowball stemmer: $stemmode
+   echo Has init method: $hasinit
+   [ $stemmode = "yes" ] && echo Function prefix: $prefix 
+   echo Source files: $cfile
+   echo Header files: $hfile
+   echo Object files: $ofile
+   echo Comment: $comment
+   echo Directory: ../../$dir
+fi
+
+
+[ $verbose = "yes" ] && echo -n 'Build directory...  '
+if [ ! -d ../../$dir ]; then
+   if ! mkdir ../../$dir ; then 
+       echo "Can't create directory ../../$dir"
+       exit 1
+   fi 
+fi
+[ $verbose = "yes" ] && echo ok
+
+
+[ $verbose = "yes" ] && echo -n 'Build Makefile...  '
+sed s#CFG_DIR#$dir# < Makefile.IN | sed s#CFG_MODNAME#$dictname# | sed "s#CFG_OFILE#$ofile#" > ../../$dir/Makefile.tmp
+if [ $stemmode = "yes" ] ; then
+   sed "s#^PG_CPPFLAGS.*\$#PG_CPPFLAGS = -I../tsearch2/snowball -I../tsearch2#" < ../../$dir/Makefile.tmp >  ../../$dir/Makefile 
+else
+   sed "s#^PG_CPPFLAGS.*\$#PG_CPPFLAGS = -I../tsearch2#" < ../../$dir/Makefile.tmp >  ../../$dir/Makefile 
+fi
+rm ../../$dir/Makefile.tmp
+[ $verbose = "yes" ] && echo ok
+
+
+[ $verbose = "yes" ] && echo -n Build dict_$dictname'.sql.in...  '
+if [ $hasinit = "yes" ]; then
+   sed s#CFG_MODNAME#$dictname# < sql.IN | sed "s#CFG_COMMENT#$comment#" | sed s#^HASINIT## | sed 's#^NOINIT.*$##' > ../../$dir/dict_$dictname.sql.in.tmp
+   if [ $stemmode = "yes" ] ; then
+       sed s#^ISSNOWBALL## < ../../$dir/dict_$dictname.sql.in.tmp | sed s#^NOSNOWBALL.*\$## > ../../$dir/dict_$dictname.sql.in
+   else
+       sed s#^NOSNOWBALL## < ../../$dir/dict_$dictname.sql.in.tmp | sed s#^ISSNOWBALL.*\$## > ../../$dir/dict_$dictname.sql.in
+   fi
+   rm ../../$dir/dict_$dictname.sql.in.tmp 
+else 
+   sed s#CFG_MODNAME#$dictname# < sql.IN | sed "s#CFG_COMMENT#$comment#" | sed s#^NOINIT## | sed 's#^HASINIT.*$##' | sed s#^NOSNOWBALL## | sed s#^ISSNOWBALL.*\$## > ../../$dir/dict_$dictname.sql.in
+fi
+[ $verbose = "yes" ] && echo ok
+
+
+
+if [ ${#cfile} -ne 0 ] || [ ${#hfile} -ne 0 ] ; then
+   [ $verbose = "yes" ] && echo -n 'Copy source and header files...  '
+   if [ ${#cfile} -ne 0 ] ; then
+       if ! cp $cfile ../../$dir ; then 
+           echo "Cant cp all or one of files: $cfile"
+           exit 1
+       fi
+   fi
+   if [ ${#hfile} -ne 0 ] ; then 
+       if ! cp $hfile ../../$dir ; then 
+               echo "Cant cp all or one of files: $hfile"
+           exit 1
+       fi
+   fi
+   [ $verbose = "yes" ] && echo ok
+fi
+
+
+[ $verbose = "yes" ] && echo -n 'Build sub-include header...  '
+echo -n > ../../$dir/subinclude.h 
+for i in $hfile
+do
+   echo "#include \"$i\"" >> ../../$dir/subinclude.h
+done
+[ $verbose = "yes" ] && echo ok
+
+
+if  [ $stemmode = "yes" ] ; then 
+   [ $verbose = "yes" ] && echo -n 'Build Snowball stemmer...  '
+   sed s#CFG_MODNAME#$dictname#g < dict_snowball.c.IN | sed s#CFG_PREFIX#$prefix#g > ../../$dir/dict_snowball.c
+else
+   [ $verbose = "yes" ] && echo -n 'Build dictinonary...  '
+   sed s#CFG_MODNAME#$dictname#g < dict_tmpl.c.IN > ../../$dir/dict_tmpl.c.tmp
+   if [ $hasinit = "yes" ]; then
+       sed s#^HASINIT## <  ../../$dir/dict_tmpl.c.tmp | sed 's#^NOINIT.*$##' > ../../$dir/dict_tmpl.c
+   else 
+       sed s#^HASINIT.*\$## <  ../../$dir/dict_tmpl.c.tmp | sed 's#^NOINIT##' > ../../$dir/dict_tmpl.c
+   fi
+   rm ../../$dir/dict_tmpl.c.tmp
+fi 
+[ $verbose = "yes" ] && echo ok
+
+
+[ $verbose = "yes" ] && echo -n "Build README.$dictname...  "
+if  [ $stemmode = "yes" ] ; then
+   echo "Autogenerated Snowball's wrapper for $prefix" > ../../$dir/README.$dictname
+else
+   echo "Autogenerated template for $dictname" > ../../$dir/README.$dictname
+fi
+[ $verbose = "yes" ] && echo ok
+
+echo All is done
+


diff --git a/contrib/tsearch2/gendict/dict_snowball.c.IN b/contrib/tsearch2/gendict/dict_snowball.c.IN

new file mode 100644 (file)

index 0000000..10ef6f1


--- /dev/null
+++ b/contrib/tsearch2/gendict/dict_snowball.c.IN
@@ -0,0 +1,52 @@
+/* 
+ * example of Snowball dictionary
+ * http://snowball.tartarus.org/ 
+ * Teodor Sigaev 
+ */
+#include 
+#include 
+
+#include "postgres.h"
+
+#include "dict.h"
+#include "common.h"
+#include "snowball/header.h"
+#include "subinclude.h"
+
+typedef struct {
+   struct SN_env *z;
+   StopList    stoplist;
+   int (*stem)(struct SN_env * z);
+} DictSnowball;
+
+
+PG_FUNCTION_INFO_V1(dinit_CFG_MODNAME);
+Datum dinit_CFG_MODNAME(PG_FUNCTION_ARGS);
+
+Datum 
+dinit_CFG_MODNAME(PG_FUNCTION_ARGS) {
+   DictSnowball    *d = (DictSnowball*)malloc( sizeof(DictSnowball) );
+
+   if ( !d )
+       elog(ERROR, "No memory");
+   memset(d,0,sizeof(DictSnowball));
+   d->stoplist.wordop=lowerstr;
+       
+   if ( !PG_ARGISNULL(0) && PG_GETARG_POINTER(0)!=NULL ) {
+       text       *in = PG_GETARG_TEXT_P(0);
+       readstoplist(in, &(d->stoplist));
+       sortstoplist(&(d->stoplist));
+       PG_FREE_IF_COPY(in, 0);
+   }
+
+   d->z = CFG_PREFIX_create_env();
+   if (!d->z) {
+       freestoplist(&(d->stoplist));
+       elog(ERROR,"No memory");
+   }
+   d->stem=CFG_PREFIX_stem;
+
+   PG_RETURN_POINTER(d);
+}
+
+


diff --git a/contrib/tsearch2/gendict/dict_tmpl.c.IN b/contrib/tsearch2/gendict/dict_tmpl.c.IN

new file mode 100644 (file)

index 0000000..10c0381


--- /dev/null
+++ b/contrib/tsearch2/gendict/dict_tmpl.c.IN
@@ -0,0 +1,64 @@
+/* 
+ * example of dictionary 
+ * Teodor Sigaev 
+ */
+#include 
+#include 
+#include 
+
+#include "postgres.h"
+
+#include "dict.h"
+#include "common.h"
+
+#include "subinclude.h"
+
+HASINIT typedef struct {
+HASINIT    StopList    stoplist;
+HASINIT } DictExample;
+
+
+HASINIT PG_FUNCTION_INFO_V1(dinit_CFG_MODNAME);
+HASINIT Datum dinit_CFG_MODNAME(PG_FUNCTION_ARGS);
+
+HASINIT Datum 
+HASINIT dinit_CFG_MODNAME(PG_FUNCTION_ARGS) {
+HASINIT    DictExample *d = (DictExample*)malloc( sizeof(DictExample) );
+HASINIT 
+HASINIT    if ( !d )
+HASINIT        elog(ERROR, "No memory");
+HASINIT    memset(d,0,sizeof(DictExample));
+HASINIT 
+HASINIT    d->stoplist.wordop=lowerstr;
+HASINIT    
+HASINIT    /* Your INIT code */
+HASINIT    
+HASINIT    if ( !PG_ARGISNULL(0) && PG_GETARG_POINTER(0)!=NULL ) {
+HASINIT        text       *in = PG_GETARG_TEXT_P(0);
+HASINIT        readstoplist(in, &(d->stoplist));
+HASINIT        sortstoplist(&(d->stoplist));
+HASINIT        PG_FREE_IF_COPY(in, 0);
+HASINIT    }
+HASINIT 
+HASINIT    PG_RETURN_POINTER(d);
+HASINIT }
+
+PG_FUNCTION_INFO_V1(dlexize_CFG_MODNAME);
+Datum dlexize_CFG_MODNAME(PG_FUNCTION_ARGS);
+Datum
+dlexize_CFG_MODNAME(PG_FUNCTION_ARGS) {
+HASINIT    DictExample *d = (DictExample*)PG_GETARG_POINTER(0);
+   char       *in = (char*)PG_GETARG_POINTER(1);
+   char *txt = pnstrdup(in, PG_GETARG_INT32(2));
+   char    **res=palloc(sizeof(char*)*2);
+
+   /* Your INIT dictionary code */
+HASINIT    if ( *txt=='\0' || searchstoplist(&(d->stoplist),txt) ) {
+HASINIT        pfree(txt);
+HASINIT        res[0]=NULL;
+HASINIT    } else 
+       res[0]=txt;
+   res[1]=NULL;
+
+   PG_RETURN_POINTER(res);
+}


diff --git a/contrib/tsearch2/gendict/sql.IN b/contrib/tsearch2/gendict/sql.IN

new file mode 100644 (file)

index 0000000..ff0d842


--- /dev/null
+++ b/contrib/tsearch2/gendict/sql.IN
@@ -0,0 +1,26 @@
+SET search_path = public;
+BEGIN;
+
+HASINIT create function dinit_CFG_MODNAME(text)
+HASINIT         returns internal
+HASINIT         as 'MODULE_PATHNAME'
+HASINIT         language 'C';
+
+NOSNOWBALL create function dlexize_CFG_MODNAME(internal,internal,int4)
+NOSNOWBALL        returns internal
+NOSNOWBALL        as 'MODULE_PATHNAME'
+NOSNOWBALL        language 'C'
+NOSNOWBALL        with (isstrict);
+
+insert into pg_ts_dict select
+        'CFG_MODNAME',
+HASINIT        (select oid from pg_proc where proname='dinit_CFG_MODNAME'),
+NOINIT        null,
+        null,
+ISSNOWBALL        (select oid from pg_proc where proname='snb_lexize'),
+NOSNOWBALL        (select oid from pg_proc where proname='dlexize_CFG_MODNAME'),
+        CFG_COMMENT
+;
+
+
+END;


diff --git a/contrib/tsearch2/gistidx.c b/contrib/tsearch2/gistidx.c

new file mode 100644 (file)

index 0000000..5a34f74


--- /dev/null
+++ b/contrib/tsearch2/gistidx.c
@@ -0,0 +1,686 @@
+#include "postgres.h"
+
+#include 
+
+#include "access/gist.h"
+#include "access/itup.h"
+#include "access/rtree.h"
+#include "utils/elog.h"
+#include "utils/palloc.h"
+#include "utils/array.h"
+#include "utils/builtins.h"
+#include "storage/bufpage.h"
+#include "access/tuptoaster.h"
+
+#include "tsvector.h"
+#include "query.h"
+#include "gistidx.h"
+#include "crc32.h"
+
+PG_FUNCTION_INFO_V1(gtsvector_in);
+Datum      gtsvector_in(PG_FUNCTION_ARGS);
+
+PG_FUNCTION_INFO_V1(gtsvector_out);
+Datum      gtsvector_out(PG_FUNCTION_ARGS);
+
+PG_FUNCTION_INFO_V1(gtsvector_compress);
+Datum      gtsvector_compress(PG_FUNCTION_ARGS);
+
+PG_FUNCTION_INFO_V1(gtsvector_decompress);
+Datum      gtsvector_decompress(PG_FUNCTION_ARGS);
+
+PG_FUNCTION_INFO_V1(gtsvector_consistent);
+Datum      gtsvector_consistent(PG_FUNCTION_ARGS);
+
+PG_FUNCTION_INFO_V1(gtsvector_union);
+Datum      gtsvector_union(PG_FUNCTION_ARGS);
+
+PG_FUNCTION_INFO_V1(gtsvector_same);
+Datum      gtsvector_same(PG_FUNCTION_ARGS);
+
+PG_FUNCTION_INFO_V1(gtsvector_penalty);
+Datum      gtsvector_penalty(PG_FUNCTION_ARGS);
+
+PG_FUNCTION_INFO_V1(gtsvector_picksplit);
+Datum      gtsvector_picksplit(PG_FUNCTION_ARGS);
+
+#define GETENTRY(vec,pos) ((GISTTYPE *) DatumGetPointer(((GISTENTRY *) VARDATA(vec))[(pos)].key))
+#define SUMBIT(val) (       \
+   GETBITBYTE(val,0) + \
+   GETBITBYTE(val,1) + \
+   GETBITBYTE(val,2) + \
+   GETBITBYTE(val,3) + \
+   GETBITBYTE(val,4) + \
+   GETBITBYTE(val,5) + \
+   GETBITBYTE(val,6) + \
+   GETBITBYTE(val,7)   \
+)
+
+
+Datum
+gtsvector_in(PG_FUNCTION_ARGS)
+{
+   elog(ERROR, "Not implemented");
+   PG_RETURN_DATUM(0);
+}
+
+Datum
+gtsvector_out(PG_FUNCTION_ARGS)
+{
+   elog(ERROR, "Not implemented");
+   PG_RETURN_DATUM(0);
+}
+
+static int
+compareint(const void *a, const void *b)
+{
+   if (*((int4 *) a) == *((int4 *) b))
+       return 0;
+   return (*((int4 *) a) > *((int4 *) b)) ? 1 : -1;
+}
+
+static int
+uniqueint(int4 *a, int4 l)
+{
+   int4       *ptr,
+              *res;
+
+   if (l == 1)
+       return l;
+
+   ptr = res = a;
+
+   qsort((void *) a, l, sizeof(int4), compareint);
+
+   while (ptr - a < l)
+       if (*ptr != *res)
+           *(++res) = *ptr++;
+       else
+           ptr++;
+   return res + 1 - a;
+}
+
+static void
+makesign(BITVECP sign, GISTTYPE * a)
+{
+   int4        k,
+               len = ARRNELEM(a);
+   int4       *ptr = GETARR(a);
+
+   MemSet((void *) sign, 0, sizeof(BITVEC));
+   for (k = 0; k < len; k++)
+       HASH(sign, ptr[k]);
+}
+
+Datum
+gtsvector_compress(PG_FUNCTION_ARGS)
+{
+   GISTENTRY  *entry = (GISTENTRY *) PG_GETARG_POINTER(0);
+   GISTENTRY  *retval = entry;
+
+   if (entry->leafkey)
+   {                           /* tsvector */
+       GISTTYPE   *res;
+       tsvector       *toastedval = (tsvector *) DatumGetPointer(entry->key);
+       tsvector       *val = (tsvector *) DatumGetPointer(PG_DETOAST_DATUM(entry->key));
+       int4        len;
+       int4       *arr;
+       WordEntry  *ptr = ARRPTR(val);
+       char       *words = STRPTR(val);
+
+       len = CALCGTSIZE(ARRKEY, val->size);
+       res = (GISTTYPE *) palloc(len);
+       res->len = len;
+       res->flag = ARRKEY;
+       arr = GETARR(res);
+       len = val->size;
+       while (len--)
+       {
+           *arr = crc32_sz((uint8 *) &words[ptr->pos], ptr->len);
+           arr++;
+           ptr++;
+       }
+
+       len = uniqueint(GETARR(res), val->size);
+       if (len != val->size)
+       {
+           /*
+            * there is a collision of hash-function; len is always less
+            * than val->size
+            */
+           len = CALCGTSIZE(ARRKEY, len);
+           res = (GISTTYPE *) repalloc((void *) res, len);
+           res->len = len;
+       }
+       if (val != toastedval)
+           pfree(val);
+
+       /* make signature, if array is too long */
+       if (res->len > TOAST_INDEX_TARGET)
+       {
+           GISTTYPE   *ressign;
+
+           len = CALCGTSIZE(SIGNKEY, 0);
+           ressign = (GISTTYPE *) palloc(len);
+           ressign->len = len;
+           ressign->flag = SIGNKEY;
+           makesign(GETSIGN(ressign), res);
+           pfree(res);
+           res = ressign;
+       }
+
+       retval = (GISTENTRY *) palloc(sizeof(GISTENTRY));
+       gistentryinit(*retval, PointerGetDatum(res),
+                     entry->rel, entry->page,
+                     entry->offset, res->len, FALSE);
+   }
+   else if (ISSIGNKEY(DatumGetPointer(entry->key)) &&
+            !ISALLTRUE(DatumGetPointer(entry->key)))
+   {
+       int4        i,
+                   len;
+       GISTTYPE   *res;
+       BITVECP     sign = GETSIGN(DatumGetPointer(entry->key));
+
+       LOOPBYTE(
+                if ((sign[i] & 0xff) != 0xff)
+                PG_RETURN_POINTER(retval);
+       );
+
+       len = CALCGTSIZE(SIGNKEY | ALLISTRUE, 0);
+       res = (GISTTYPE *) palloc(len);
+       res->len = len;
+       res->flag = SIGNKEY | ALLISTRUE;
+
+       retval = (GISTENTRY *) palloc(sizeof(GISTENTRY));
+       gistentryinit(*retval, PointerGetDatum(res),
+                     entry->rel, entry->page,
+                     entry->offset, res->len, FALSE);
+   }
+   PG_RETURN_POINTER(retval);
+}
+
+Datum
+gtsvector_decompress(PG_FUNCTION_ARGS)
+{
+   GISTENTRY  *entry = (GISTENTRY *) PG_GETARG_POINTER(0);
+   GISTTYPE   *key = (GISTTYPE *) DatumGetPointer(PG_DETOAST_DATUM(entry->key));
+
+   if (key != (GISTTYPE *) DatumGetPointer(entry->key))
+   {
+       GISTENTRY  *retval = (GISTENTRY *) palloc(sizeof(GISTENTRY));
+
+       gistentryinit(*retval, PointerGetDatum(key),
+                     entry->rel, entry->page,
+                     entry->offset, key->len, FALSE);
+
+       PG_RETURN_POINTER(retval);
+   }
+
+   PG_RETURN_POINTER(entry);
+}
+
+typedef struct
+{
+   int4       *arrb;
+   int4       *arre;
+}  CHKVAL;
+
+/*
+ * is there value 'val' in array or not ?
+ */
+static bool
+checkcondition_arr(void *checkval, ITEM * val)
+{
+   int4       *StopLow = ((CHKVAL *) checkval)->arrb;
+   int4       *StopHigh = ((CHKVAL *) checkval)->arre;
+   int4       *StopMiddle;
+
+   /* Loop invariant: StopLow <= val < StopHigh */
+
+   while (StopLow < StopHigh)
+   {
+       StopMiddle = StopLow + (StopHigh - StopLow) / 2;
+       if (*StopMiddle == val->val)
+           return (true);
+       else if (*StopMiddle < val->val)
+           StopLow = StopMiddle + 1;
+       else
+           StopHigh = StopMiddle;
+   }
+
+   return (false);
+}
+
+static bool
+checkcondition_bit(void *checkval, ITEM * val)
+{
+   return GETBIT(checkval, HASHVAL(val->val));
+}
+
+Datum
+gtsvector_consistent(PG_FUNCTION_ARGS)
+{
+   QUERYTYPE  *query = (QUERYTYPE *) PG_GETARG_POINTER(1);
+   GISTTYPE   *key = (GISTTYPE *) DatumGetPointer(
+                               ((GISTENTRY *) PG_GETARG_POINTER(0))->key
+   );
+
+   if (!query->size)
+       PG_RETURN_BOOL(false);
+
+   if (ISSIGNKEY(key))
+   {
+       if (ISALLTRUE(key))
+           PG_RETURN_BOOL(true);
+
+       PG_RETURN_BOOL(TS_execute(
+                              GETQUERY(query),
+                              (void *) GETSIGN(key), false,
+                              checkcondition_bit
+                              ));
+   }
+   else
+   {                           /* only leaf pages */
+       CHKVAL      chkval;
+
+       chkval.arrb = GETARR(key);
+       chkval.arre = chkval.arrb + ARRNELEM(key);
+       PG_RETURN_BOOL(TS_execute(
+                              GETQUERY(query),
+                              (void *) &chkval, true,
+                              checkcondition_arr
+                              ));
+   }
+}
+
+static int4
+unionkey(BITVECP sbase, GISTTYPE * add)
+{
+   int4        i;
+
+   if (ISSIGNKEY(add))
+   {
+       BITVECP     sadd = GETSIGN(add);
+
+       if (ISALLTRUE(add))
+           return 1;
+
+       LOOPBYTE(
+                sbase[i] |= sadd[i];
+       );
+   }
+   else
+   {
+       int4       *ptr = GETARR(add);
+
+       for (i = 0; i < ARRNELEM(add); i++)
+           HASH(sbase, ptr[i]);
+   }
+   return 0;
+}
+
+
+Datum
+gtsvector_union(PG_FUNCTION_ARGS)
+{
+   bytea      *entryvec = (bytea *) PG_GETARG_POINTER(0);
+   int        *size = (int *) PG_GETARG_POINTER(1);
+   BITVEC      base;
+   int4        len = (VARSIZE(entryvec) - VARHDRSZ) / sizeof(GISTENTRY);
+   int4        i;
+   int4        flag = 0;
+   GISTTYPE   *result;
+
+   MemSet((void *) base, 0, sizeof(BITVEC));
+   for (i = 0; i < len; i++)
+   {
+       if (unionkey(base, GETENTRY(entryvec, i)))
+       {
+           flag = ALLISTRUE;
+           break;
+       }
+   }
+
+   flag |= SIGNKEY;
+   len = CALCGTSIZE(flag, 0);
+   result = (GISTTYPE *) palloc(len);
+   *size = result->len = len;
+   result->flag = flag;
+   if (!ISALLTRUE(result))
+       memcpy((void *) GETSIGN(result), (void *) base, sizeof(BITVEC));
+
+   PG_RETURN_POINTER(result);
+}
+
+Datum
+gtsvector_same(PG_FUNCTION_ARGS)
+{
+   GISTTYPE   *a = (GISTTYPE *) PG_GETARG_POINTER(0);
+   GISTTYPE   *b = (GISTTYPE *) PG_GETARG_POINTER(1);
+   bool       *result = (bool *) PG_GETARG_POINTER(2);
+
+   if (ISSIGNKEY(a))
+   {                           /* then b also ISSIGNKEY */
+       if (ISALLTRUE(a) && ISALLTRUE(b))
+           *result = true;
+       else if (ISALLTRUE(a))
+           *result = false;
+       else if (ISALLTRUE(b))
+           *result = false;
+       else
+       {
+           int4        i;
+           BITVECP     sa = GETSIGN(a),
+                       sb = GETSIGN(b);
+
+           *result = true;
+           LOOPBYTE(
+                    if (sa[i] != sb[i])
+                    {
+               *result = false;
+               break;
+           }
+           );
+       }
+   }
+   else
+   {                           /* a and b ISARRKEY */
+       int4        lena = ARRNELEM(a),
+                   lenb = ARRNELEM(b);
+
+       if (lena != lenb)
+           *result = false;
+       else
+       {
+           int4       *ptra = GETARR(a),
+                      *ptrb = GETARR(b);
+           int4        i;
+
+           *result = true;
+           for (i = 0; i < lena; i++)
+               if (ptra[i] != ptrb[i])
+               {
+                   *result = false;
+                   break;
+               }
+       }
+   }
+
+   PG_RETURN_POINTER(result);
+}
+
+static int4
+sizebitvec(BITVECP sign)
+{
+   int4        size = 0,
+               i;
+
+   LOOPBYTE(
+       size += SUMBIT(*(char *) sign);
+       sign = (BITVECP) (((char *) sign) + 1);
+   );
+   return size;
+}
+
+static int
+hemdistsign(BITVECP  a, BITVECP b) {
+   int i,dist=0;
+
+   LOOPBIT(
+       if ( GETBIT(a,i) != GETBIT(b,i) )
+           dist++;
+   );
+   return dist;
+}
+
+static int
+hemdist(GISTTYPE   *a, GISTTYPE   *b) {
+   if ( ISALLTRUE(a) ) {
+       if (ISALLTRUE(b))
+           return 0;
+       else
+           return SIGLENBIT-sizebitvec(GETSIGN(b));
+   } else if (ISALLTRUE(b))
+       return SIGLENBIT-sizebitvec(GETSIGN(a));
+
+   return hemdistsign( GETSIGN(a), GETSIGN(b) );
+}
+
+Datum
+gtsvector_penalty(PG_FUNCTION_ARGS)
+{
+   GISTENTRY  *origentry = (GISTENTRY *) PG_GETARG_POINTER(0); /* always ISSIGNKEY */
+   GISTENTRY  *newentry = (GISTENTRY *) PG_GETARG_POINTER(1);
+   float      *penalty = (float *) PG_GETARG_POINTER(2);
+   GISTTYPE   *origval = (GISTTYPE *) DatumGetPointer(origentry->key);
+   GISTTYPE   *newval = (GISTTYPE *) DatumGetPointer(newentry->key);
+   BITVECP     orig = GETSIGN(origval);
+
+   *penalty = 0.0;
+
+   if (ISARRKEY(newval)) {
+       BITVEC sign;
+       makesign(sign, newval);
+
+       if ( ISALLTRUE(origval) ) 
+           *penalty=((float)(SIGLENBIT-sizebitvec(sign)))/(float)(SIGLENBIT+1);
+       else 
+           *penalty=hemdistsign(sign,orig);
+   } else {
+       *penalty=hemdist(origval,newval);
+   }
+   PG_RETURN_POINTER(penalty);
+}
+
+typedef struct
+{
+   bool        allistrue;
+   BITVEC      sign;
+}  CACHESIGN;
+
+static void
+fillcache(CACHESIGN * item, GISTTYPE * key)
+{
+   item->allistrue = false;
+   if (ISARRKEY(key))
+       makesign(item->sign, key);
+   else if (ISALLTRUE(key))
+       item->allistrue = true;
+   else
+       memcpy((void *) item->sign, (void *) GETSIGN(key), sizeof(BITVEC));
+}
+
+#define WISH_F(a,b,c) (double)( -(double)(((a)-(b))*((a)-(b))*((a)-(b)))*(c) )
+typedef struct
+{
+   OffsetNumber pos;
+   int4        cost;
+} SPLITCOST;
+
+static int
+comparecost(const void *a, const void *b)
+{
+   if (((SPLITCOST *) a)->cost == ((SPLITCOST *) b)->cost)
+       return 0;
+   else
+       return (((SPLITCOST *) a)->cost > ((SPLITCOST *) b)->cost) ? 1 : -1;
+}
+
+
+static int
+hemdistcache(CACHESIGN   *a, CACHESIGN   *b) {
+   if ( a->allistrue ) {
+       if (b->allistrue)
+           return 0;
+       else
+           return SIGLENBIT-sizebitvec(b->sign);
+   } else if (b->allistrue)
+       return SIGLENBIT-sizebitvec(a->sign);
+
+   return hemdistsign( a->sign, b->sign );
+}
+
+Datum
+gtsvector_picksplit(PG_FUNCTION_ARGS)
+{
+   bytea      *entryvec = (bytea *) PG_GETARG_POINTER(0);
+   GIST_SPLITVEC *v = (GIST_SPLITVEC *) PG_GETARG_POINTER(1);
+   OffsetNumber k,
+               j;
+   GISTTYPE   *datum_l,
+              *datum_r;
+   BITVECP     union_l,
+               union_r;
+   int4        size_alpha,
+               size_beta;
+   int4        size_waste,
+               waste = -1;
+   int4        nbytes;
+   OffsetNumber seed_1 = 0,
+               seed_2 = 0;
+   OffsetNumber *left,
+              *right;
+   OffsetNumber maxoff;
+   BITVECP     ptr;
+   int         i;
+   CACHESIGN  *cache;
+   SPLITCOST  *costvector;
+
+   maxoff = ((VARSIZE(entryvec) - VARHDRSZ) / sizeof(GISTENTRY)) - 2;
+   nbytes = (maxoff + 2) * sizeof(OffsetNumber);
+   v->spl_left = (OffsetNumber *) palloc(nbytes);
+   v->spl_right = (OffsetNumber *) palloc(nbytes);
+
+   cache = (CACHESIGN *) palloc(sizeof(CACHESIGN) * (maxoff + 2));
+   fillcache(&cache[FirstOffsetNumber], GETENTRY(entryvec, FirstOffsetNumber));
+
+   for (k = FirstOffsetNumber; k < maxoff; k = OffsetNumberNext(k)) {
+       for (j = OffsetNumberNext(k); j <= maxoff; j = OffsetNumberNext(j)) {
+           if (k == FirstOffsetNumber)
+               fillcache(&cache[j], GETENTRY(entryvec, j));
+
+           size_waste=hemdistcache(&(cache[j]),&(cache[k]));
+           if (size_waste > waste) {
+               waste = size_waste;
+               seed_1 = k;
+               seed_2 = j;
+           }
+       }
+   }
+
+   left = v->spl_left;
+   v->spl_nleft = 0;
+   right = v->spl_right;
+   v->spl_nright = 0;
+
+   if (seed_1 == 0 || seed_2 == 0) {
+       seed_1 = 1;
+       seed_2 = 2;
+   }
+
+   /* form initial .. */
+   if (cache[seed_1].allistrue) {
+       datum_l = (GISTTYPE *) palloc(CALCGTSIZE(SIGNKEY | ALLISTRUE, 0));
+       datum_l->len = CALCGTSIZE(SIGNKEY | ALLISTRUE, 0);
+       datum_l->flag = SIGNKEY | ALLISTRUE;
+   } else {
+       datum_l = (GISTTYPE *) palloc(CALCGTSIZE(SIGNKEY, 0));
+       datum_l->len = CALCGTSIZE(SIGNKEY, 0);
+       datum_l->flag = SIGNKEY;
+       memcpy((void *) GETSIGN(datum_l), (void *) cache[seed_1].sign, sizeof(BITVEC));
+   }
+   if (cache[seed_2].allistrue) {
+       datum_r = (GISTTYPE *) palloc(CALCGTSIZE(SIGNKEY | ALLISTRUE, 0));
+       datum_r->len = CALCGTSIZE(SIGNKEY | ALLISTRUE, 0);
+       datum_r->flag = SIGNKEY | ALLISTRUE;
+   } else {
+       datum_r = (GISTTYPE *) palloc(CALCGTSIZE(SIGNKEY, 0));
+       datum_r->len = CALCGTSIZE(SIGNKEY, 0);
+       datum_r->flag = SIGNKEY;
+       memcpy((void *) GETSIGN(datum_r), (void *) cache[seed_2].sign, sizeof(BITVEC));
+   }
+
+   union_l=GETSIGN(datum_l);
+   union_r=GETSIGN(datum_r);
+   maxoff = OffsetNumberNext(maxoff);
+   fillcache(&cache[maxoff], GETENTRY(entryvec, maxoff));
+   /* sort before ... */
+   costvector = (SPLITCOST *) palloc(sizeof(SPLITCOST) * maxoff);
+   for (j = FirstOffsetNumber; j <= maxoff; j = OffsetNumberNext(j)) {
+       costvector[j - 1].pos = j;
+       size_alpha = hemdistcache(&(cache[seed_1]), &(cache[j]));
+       size_beta  = hemdistcache(&(cache[seed_2]), &(cache[j]));
+       costvector[j - 1].cost = abs(size_alpha - size_beta);
+   }
+   qsort((void *) costvector, maxoff, sizeof(SPLITCOST), comparecost);
+
+   for (k = 0; k < maxoff; k++) {
+       j = costvector[k].pos;
+       if (j == seed_1) {
+           *left++ = j;
+           v->spl_nleft++;
+           continue;
+       } else if (j == seed_2) {
+           *right++ = j;
+           v->spl_nright++;
+           continue;
+       }
+
+       if (ISALLTRUE(datum_l) || cache[j].allistrue) {
+           if ( ISALLTRUE(datum_l) && cache[j].allistrue )
+               size_alpha=0;
+           else
+               size_alpha = SIGLENBIT-sizebitvec(  
+                   ( cache[j].allistrue ) ? GETSIGN(datum_l) : GETSIGN(cache[j].sign)  
+               );
+       } else {
+           size_alpha=hemdistsign(cache[j].sign,GETSIGN(datum_l));
+       }
+
+       if (ISALLTRUE(datum_r) || cache[j].allistrue) {
+           if ( ISALLTRUE(datum_r) && cache[j].allistrue )
+               size_beta=0;
+           else
+               size_beta = SIGLENBIT-sizebitvec(  
+                   ( cache[j].allistrue ) ? GETSIGN(datum_r) : GETSIGN(cache[j].sign)  
+               );
+       } else {
+           size_beta=hemdistsign(cache[j].sign,GETSIGN(datum_r));
+       }
+
+       if (size_alpha  < size_beta + WISH_F(v->spl_nleft, v->spl_nright, 0.1)) {
+           if (ISALLTRUE(datum_l) || cache[j].allistrue) {
+               if (! ISALLTRUE(datum_l) )
+                   MemSet((void *) GETSIGN(datum_l), 0xff, sizeof(BITVEC));
+           } else {
+               ptr=cache[j].sign;
+               LOOPBYTE(
+                   union_l[i] |= ptr[i];
+               );
+           }
+           *left++ = j;
+           v->spl_nleft++;
+       } else {
+           if (ISALLTRUE(datum_r) || cache[j].allistrue) {
+               if (! ISALLTRUE(datum_r) )
+                   MemSet((void *) GETSIGN(datum_r), 0xff, sizeof(BITVEC));
+           } else {
+               ptr=cache[j].sign;
+               LOOPBYTE(
+                   union_r[i] |= ptr[i];
+               );
+           }
+           *right++ = j;
+           v->spl_nright++;
+       }
+   }
+
+   *right = *left = FirstOffsetNumber;
+   pfree(costvector);
+   pfree(cache);
+   v->spl_ldatum = PointerGetDatum(datum_l);
+   v->spl_rdatum = PointerGetDatum(datum_r);
+
+   PG_RETURN_POINTER(v);
+}


diff --git a/contrib/tsearch2/gistidx.h b/contrib/tsearch2/gistidx.h

new file mode 100644 (file)

index 0000000..d081c74


--- /dev/null
+++ b/contrib/tsearch2/gistidx.h
@@ -0,0 +1,67 @@
+#ifndef __GISTIDX_H__
+#define __GISTIDX_H__
+
+/*
+#define GISTIDX_DEBUG
+*/
+
+/*
+ * signature defines
+ */
+
+#define BITBYTE 8
+#define SIGLENINT  63          /* >121 => key will toast, so it will not
+                                * work !!! */
+#define SIGLEN ( sizeof(int4)*SIGLENINT )
+#define SIGLENBIT (SIGLEN*BITBYTE)
+
+typedef char BITVEC[SIGLEN];
+typedef char *BITVECP;
+
+#define LOOPBYTE(a) \
+       for(i=0;i
+               a;\
+       }
+#define LOOPBIT(a) \
+               for(i=0;i
+                               a;\
+               }
+
+#define GETBYTE(x,i) ( *( (BITVECP)(x) + (int)( (i) / BITBYTE ) ) )
+#define GETBITBYTE(x,i) ( ((char)(x)) >> i & 0x01 )
+#define CLRBIT(x,i)   GETBYTE(x,i) &= ~( 0x01 << ( (i) % BITBYTE ) )
+#define SETBIT(x,i)   GETBYTE(x,i) |=  ( 0x01 << ( (i) % BITBYTE ) )
+#define GETBIT(x,i) ( (GETBYTE(x,i) >> ( (i) % BITBYTE )) & 0x01 )
+
+#define abs(a)         ((a) <  (0) ? -(a) : (a))
+#define min(a,b)           ((a) <  (b) ? (a) : (b))
+#define HASHVAL(val) (((unsigned int)(val)) % SIGLENBIT)
+#define HASH(sign, val) SETBIT((sign), HASHVAL(val))
+
+
+/*
+ * type of index key
+ */
+typedef struct
+{
+   int4        len;
+   int4        flag;
+   char        data[1];
+}  GISTTYPE;
+
+#define ARRKEY     0x01
+#define SIGNKEY        0x02
+#define ALLISTRUE  0x04
+
+#define ISARRKEY(x) ( ((GISTTYPE*)x)->flag & ARRKEY )
+#define ISSIGNKEY(x)   ( ((GISTTYPE*)x)->flag & SIGNKEY )
+#define ISALLTRUE(x)   ( ((GISTTYPE*)x)->flag & ALLISTRUE )
+
+#define GTHDRSIZE  ( sizeof(int4)*2  )
+#define CALCGTSIZE(flag, len) ( GTHDRSIZE + ( ( (flag) & ARRKEY ) ? ((len)*sizeof(int4)) : (((flag) & ALLISTRUE) ? 0 : SIGLEN) ) )
+
+#define GETSIGN(x) ( (BITVECP)( (char*)x+GTHDRSIZE ) )
+#define GETARR(x)  ( (int4*)( (char*)x+GTHDRSIZE ) )
+#define ARRNELEM(x) ( ( ((GISTTYPE*)x)->len - GTHDRSIZE )/sizeof(int4) )
+
+#endif


diff --git a/contrib/tsearch2/ispell/spell.c b/contrib/tsearch2/ispell/spell.c

new file mode 100644 (file)

index 0000000..3cf2cc8


--- /dev/null
+++ b/contrib/tsearch2/ispell/spell.c
@@ -0,0 +1,520 @@
+#include 
+#include 
+#include 
+#include 
+
+#include "postgres.h"
+
+#include "spell.h"
+
+#define MAXNORMLEN 56
+
+#define STRNCASECMP(x,y)        (strncasecmp(x,y,strlen(y)))
+
+static int cmpspell(const void *s1,const void *s2){
+   return(strcmp(((const SPELL*)s1)->word,((const SPELL*)s2)->word));
+}
+
+static void 
+strlower( char * str ) {
+   unsigned char *ptr = (unsigned char *)str;
+   while ( *ptr ) {
+       *ptr = tolower( *ptr );
+       ptr++;
+   }
+}
+
+/* backward string compaire for suffix tree operations */
+static int 
+strbcmp(const char *s1, const char *s2) { 
+   int l1 = strlen(s1)-1, l2 = strlen(s2)-1;
+   while (l1 >= 0 && l2 >= 0) {
+       if (s1[l1] < s2[l2]) return -1;
+       if (s1[l1] > s2[l2]) return 1;
+       l1--; l2--;
+   }
+   if (l1 < l2) return -1;
+   if (l1 > l2) return 1;
+
+   return 0;
+}
+static int 
+strbncmp(const char *s1, const char *s2, size_t count) { 
+   int l1 = strlen(s1) - 1, l2 = strlen(s2) - 1, l = count;
+   while (l1 >= 0 && l2 >= 0 && l > 0) {
+       if (s1[l1] < s2[l2]) return -1;
+       if (s1[l1] > s2[l2]) return 1;
+       l1--;
+       l2--;
+       l--;
+   }
+   if (l == 0) return 0;
+   if (l1 < l2) return -1;
+   if (l1 > l2) return 1;
+   return 0;
+}
+
+static int 
+cmpaffix(const void *s1,const void *s2){
+   if (((const AFFIX*)s1)->type < ((const AFFIX*)s2)->type) return -1;
+   if (((const AFFIX*)s1)->type > ((const AFFIX*)s2)->type) return 1;
+   if (((const AFFIX*)s1)->type == 'p')
+       return(strcmp(((const AFFIX*)s1)->repl,((const AFFIX*)s2)->repl));
+   else 
+       return(strbcmp(((const AFFIX*)s1)->repl,((const AFFIX*)s2)->repl));
+}
+
+int 
+AddSpell(IspellDict * Conf,const char * word,const char *flag){
+   if(Conf->nspell>=Conf->mspell){
+       if(Conf->mspell){
+           Conf->mspell+=1024*20;
+           Conf->Spell=(SPELL *)realloc(Conf->Spell,Conf->mspell*sizeof(SPELL));
+       }else{
+           Conf->mspell=1024*20;
+           Conf->Spell=(SPELL *)malloc(Conf->mspell*sizeof(SPELL));
+       }
+       if ( Conf->Spell == NULL )
+           elog(ERROR,"No memory for AddSpell"); 
+   }
+   Conf->Spell[Conf->nspell].word=strdup(word);
+   if ( !Conf->Spell[Conf->nspell].word ) 
+       elog(ERROR,"No memory for AddSpell");
+   strncpy(Conf->Spell[Conf->nspell].flag,flag,10);
+   Conf->nspell++;
+   return(0);
+}
+
+
+int 
+ImportDictionary(IspellDict * Conf,const char *filename){
+   unsigned char str[BUFSIZ];  
+   FILE *dict;
+
+   if(!(dict=fopen(filename,"r")))return(1);
+   while(fgets(str,sizeof(str),dict)){
+       unsigned char *s;
+       const unsigned char *flag;
+
+           flag = NULL;
+       if((s=strchr(str,'/'))){
+           *s=0;
+           s++;flag=s;
+           while(*s){
+               if (((*s>='A')&&(*s<='Z'))||((*s>='a')&&(*s<='z')))
+                   s++;
+               else {
+                   *s=0;
+                   break;
+               }
+           }
+       }else{
+           flag="";
+       }
+       strlower(str);
+       /* Dont load words if first letter is not required */
+       /* It allows to optimize loading at  search time   */
+       s=str;
+       while(*s){
+           if(*s=='\r')*s=0;
+           if(*s=='\n')*s=0;
+           s++;
+       }
+       AddSpell(Conf,str,flag);
+   }
+   fclose(dict);
+   return(0);
+}
+
+
+static SPELL * 
+FindWord(IspellDict * Conf, const char *word, int affixflag) {
+   int l,c,r,resc,resl,resr, i;
+
+   i = (int)(*word) & 255;
+   l = Conf->SpellTree.Left[i];
+   r = Conf->SpellTree.Right[i];
+   if (l == -1) return (NULL);
+   while(l<=r){
+       c = (l + r) >> 1;
+       resc = strcmp(Conf->Spell[c].word, word);
+       if( (resc == 0) && 
+           ((affixflag == 0) || (strchr(Conf->Spell[c].flag, affixflag) != NULL)) ) {
+           return(&Conf->Spell[c]);
+       }
+       resl = strcmp(Conf->Spell[l].word, word);
+       if( (resl == 0) && 
+           ((affixflag == 0) || (strchr(Conf->Spell[l].flag, affixflag) != NULL)) ) {
+           return(&Conf->Spell[l]);
+       }
+       resr = strcmp(Conf->Spell[r].word, word);
+       if( (resr == 0) && 
+           ((affixflag == 0) || (strchr(Conf->Spell[r].flag, affixflag) != NULL)) ) {
+           return(&Conf->Spell[r]);
+       }
+       if(resc < 0){
+           l = c + 1;
+           r--;
+       } else if(resc > 0){
+           r = c - 1;
+           l++;
+       } else {
+           l++;
+           r--;
+       }
+   }
+   return(NULL);
+}
+
+int 
+AddAffix(IspellDict * Conf,int flag,const char *mask,const char *find,const char *repl,int type) {
+   if(Conf->naffixes>=Conf->maffixes){
+       if(Conf->maffixes){
+           Conf->maffixes+=16;
+           Conf->Affix = (AFFIX*)realloc((void*)Conf->Affix,Conf->maffixes*sizeof(AFFIX));
+       }else{
+           Conf->maffixes=16;
+           Conf->Affix = (AFFIX*)malloc(Conf->maffixes * sizeof(AFFIX));
+       }
+       if ( Conf->Affix == NULL ) 
+           elog(ERROR,"No memory for AddAffix");
+   }
+   if (type=='s') {
+       sprintf(Conf->Affix[Conf->naffixes].mask,"%s$",mask);
+   } else {
+       sprintf(Conf->Affix[Conf->naffixes].mask,"^%s",mask);
+   }
+   Conf->Affix[Conf->naffixes].compile = 1;
+   Conf->Affix[Conf->naffixes].flag=flag;
+   Conf->Affix[Conf->naffixes].type=type;
+   
+   strcpy(Conf->Affix[Conf->naffixes].find,find);
+   strcpy(Conf->Affix[Conf->naffixes].repl,repl);
+   Conf->Affix[Conf->naffixes].replen=strlen(repl);
+   Conf->naffixes++;
+   return(0);
+}
+
+static char * 
+remove_spaces(char *dist,char *src){
+char *d,*s;
+   d=dist;
+   s=src;
+   while(*s){
+       if(*s!=' '&&*s!='-'&&*s!='\t'){
+           *d=*s;
+           d++;
+       }
+       s++;
+   }
+   *d=0;
+   return(dist);
+}
+
+
+int 
+ImportAffixes(IspellDict * Conf,const char *filename){
+   unsigned char str[BUFSIZ];
+   unsigned char flag=0;
+   unsigned char mask[BUFSIZ]="";
+   unsigned char find[BUFSIZ]="";
+   unsigned char repl[BUFSIZ]="";
+   unsigned char *s;
+   int i;
+   int suffixes=0;
+   int prefixes=0;
+   FILE *affix;
+
+   if(!(affix=fopen(filename,"r")))
+       return(1);
+
+   while(fgets(str,sizeof(str),affix)){
+       if(!STRNCASECMP(str,"suffixes")){
+           suffixes=1;
+           prefixes=0;
+           continue;
+       }
+       if(!STRNCASECMP(str,"prefixes")){
+           suffixes=0;
+           prefixes=1;
+           continue;
+       }
+       if(!STRNCASECMP(str,"flag ")){
+           s=str+5;
+           while(strchr("* ",*s))
+               s++;
+           flag=*s;
+           continue;
+       }
+       if((!suffixes)&&(!prefixes))continue;
+       if((s=strchr(str,'#')))*s=0;
+       if(!*str)continue;
+       strlower(str);
+       strcpy(mask,"");
+       strcpy(find,"");
+       strcpy(repl,"");
+       i=sscanf(str,"%[^>\n]>%[^,\n],%[^\n]",mask,find,repl);
+       remove_spaces(str,repl);strcpy(repl,str);
+       remove_spaces(str,find);strcpy(find,str);
+       remove_spaces(str,mask);strcpy(mask,str);
+       switch(i){
+           case 3:
+               break;
+           case 2:
+               if(*find != '\0'){
+                   strcpy(repl,find);
+                   strcpy(find,"");
+               }
+               break;
+           default:
+               continue;
+       }
+       
+       AddAffix(Conf,(int)flag,mask,find,repl,suffixes?'s':'p');
+       
+   }
+   fclose(affix);
+       
+   return(0);
+}
+
+void 
+SortDictionary(IspellDict * Conf){
+  int  CurLet = -1, Let;size_t i;
+
+        qsort((void*)Conf->Spell,Conf->nspell,sizeof(SPELL),cmpspell);
+
+   for(i = 0; i < 256 ; i++ )
+       Conf->SpellTree.Left[i] = -1;
+
+   for(i = 0; i < Conf->nspell; i++) {
+     Let = (int)(*(Conf->Spell[i].word)) & 255;
+     if (CurLet != Let) {
+       Conf->SpellTree.Left[Let] = i;
+       CurLet = Let;
+     }
+     Conf->SpellTree.Right[Let] = i;
+   }
+}
+
+void 
+SortAffixes(IspellDict * Conf) {
+  int   CurLetP = -1, CurLetS = -1, Let;
+  AFFIX *Affix; size_t i;
+  
+  if (Conf->naffixes > 1)
+    qsort((void*)Conf->Affix,Conf->naffixes,sizeof(AFFIX),cmpaffix);
+  for(i = 0; i < 256; i++) {
+      Conf->PrefixTree.Left[i] = Conf->PrefixTree.Right[i] = -1;
+      Conf->SuffixTree.Left[i] = Conf->SuffixTree.Right[i] = -1;
+  }
+
+  for(i = 0; i < Conf->naffixes; i++) {
+    Affix = &(((AFFIX*)Conf->Affix)[i]);
+    if(Affix->type == 'p') {
+      Let = (int)(*(Affix->repl)) & 255;
+      if (CurLetP != Let) {
+   Conf->PrefixTree.Left[Let] = i;
+   CurLetP = Let;
+      }
+      Conf->PrefixTree.Right[Let] = i;
+    } else {
+      Let = (Affix->replen) ? (int)(Affix->repl[Affix->replen-1]) & 255 : 0;
+      if (CurLetS != Let) {
+   Conf->SuffixTree.Left[Let] = i;
+   CurLetS = Let;
+      }
+      Conf->SuffixTree.Right[Let] = i;
+    }
+  }
+}
+
+static char * 
+CheckSuffix(const char *word, size_t len, AFFIX *Affix, int *res, IspellDict *Conf) {
+  regmatch_t subs[2]; /* workaround for apache&linux */
+  char newword[2*MAXNORMLEN] = "";
+  int err;
+  
+  *res = strbncmp(word, Affix->repl, Affix->replen);
+  if (*res < 0) {
+    return NULL;
+  }
+  if (*res > 0) {
+    return NULL;
+  }
+  strcpy(newword, word);
+  strcpy(newword+len-Affix->replen, Affix->find);
+
+  if (Affix->compile) {
+    err = regcomp(&(Affix->reg),Affix->mask,REG_EXTENDED|REG_ICASE|REG_NOSUB);
+    if(err){
+      /*regerror(err, &(Affix->reg), regerrstr, ERRSTRSIZE);*/
+      regfree(&(Affix->reg));
+      return(NULL);
+    }
+    Affix->compile = 0;
+  }
+  if(!(err=regexec(&(Affix->reg),newword,1,subs,0))){
+    if(FindWord(Conf, newword, Affix->flag))
+   return pstrdup(newword);    
+  }
+  return NULL;
+}
+
+#define NS 1
+#define MAX_NORM 512
+static int 
+CheckPrefix(const char *word, size_t len, AFFIX *Affix, IspellDict *Conf, int pi,
+       char **forms, char ***cur ) {
+  regmatch_t subs[NS*2];
+  char newword[2*MAXNORMLEN] = "";
+  int err, ls, res, lres;
+  size_t newlen;
+  AFFIX *CAffix = Conf->Affix;
+  
+  res = strncmp(word, Affix->repl, Affix->replen);
+  if (res != 0) {
+    return res;
+  }
+  strcpy(newword, Affix->find);
+  strcat(newword, word+Affix->replen);
+
+  if (Affix->compile) {
+    err = regcomp(&(Affix->reg),Affix->mask,REG_EXTENDED|REG_ICASE|REG_NOSUB);
+    if(err){
+      /*regerror(err, &(Affix->reg), regerrstr, ERRSTRSIZE);*/
+      regfree(&(Affix->reg));
+      return (0);
+    }
+    Affix->compile = 0;
+  }
+  if(!(err=regexec(&(Affix->reg),newword,1,subs,0))){
+    SPELL * curspell;
+
+    if((curspell=FindWord(Conf, newword, Affix->flag))){
+      if ((*cur - forms) < (MAX_NORM-1)) {
+   **cur =  pstrdup(newword);
+   (*cur)++; **cur = NULL;
+      }
+    } 
+    newlen = strlen(newword);
+    ls = Conf->SuffixTree.Left[pi];
+      if ( ls>=0 && ((*cur - forms) < (MAX_NORM-1)) ) {
+   **cur = CheckSuffix(newword, newlen, &CAffix[ls], &lres, Conf);
+   if (**cur) {
+     (*cur)++; **cur = NULL;
+   }
+      }
+  }
+  return 0;
+}
+
+
+char ** 
+NormalizeWord(IspellDict * Conf,char *word){
+/*regmatch_t subs[NS];*/
+size_t len;
+char ** forms;
+char **cur;
+AFFIX * Affix;
+int ri, pi, ipi, lp, rp, cp, ls, rs;
+int lres, rres, cres = 0;
+  SPELL *spell;
+
+   len=strlen(word);
+   if (len > MAXNORMLEN)
+       return(NULL);
+
+   strlower(word);
+
+   forms=(char **) palloc(MAX_NORM*sizeof(char **));
+   cur=forms;*cur=NULL;
+
+   ri = (int)(*word) & 255;
+   pi = (int)(word[strlen(word)-1]) & 255;
+   Affix=(AFFIX*)Conf->Affix;
+
+   /* Check that the word itself is normal form */
+   if((spell = FindWord(Conf, word, 0))){
+       *cur=pstrdup(word);
+       cur++;*cur=NULL;
+   }
+
+   /* Find all other NORMAL forms of the 'word' */
+
+   for (ipi = 0; ipi <= pi; ipi += pi) {
+
+       /* check prefix */
+       lp = Conf->PrefixTree.Left[ri];
+       rp = Conf->PrefixTree.Right[ri];
+       while (lp >= 0 && lp <= rp) {
+         cp = (lp + rp) >> 1;
+         cres = 0;
+         if ((cur - forms) < (MAX_NORM-1)) {
+       cres = CheckPrefix(word, len, &Affix[cp], Conf, ipi, forms, &cur);
+         }
+         if ((lp < cp) && ((cur - forms) < (MAX_NORM-1)) ) {
+       lres = CheckPrefix(word, len, &Affix[lp], Conf, ipi, forms, &cur);
+         }
+         if ( (rp > cp) && ((cur - forms) < (MAX_NORM-1)) ) {
+       rres = CheckPrefix(word, len, &Affix[rp], Conf, ipi, forms, &cur);
+         }
+         if (cres < 0) {
+       rp = cp - 1;
+       lp++;
+         } else if (cres > 0) {
+       lp = cp + 1;
+       rp--;
+         } else {
+       lp++;
+       rp--;
+         }
+       }
+
+       /* check suffix */
+       ls = Conf->SuffixTree.Left[ipi];
+       rs = Conf->SuffixTree.Right[ipi];
+       while (ls >= 0 && ls <= rs) {
+         if (  ((cur - forms) < (MAX_NORM-1)) ) {
+       *cur = CheckSuffix(word, len, &Affix[ls], &lres, Conf);
+       if (*cur) {
+         cur++; *cur = NULL;
+       }
+         }
+         if ( (rs > ls) && ((cur - forms) < (MAX_NORM-1)) ) {
+       *cur = CheckSuffix(word, len, &Affix[rs], &rres, Conf);
+       if (*cur) {
+         cur++; *cur = NULL;
+       }
+         }
+         ls++;
+         rs--;
+       } /* end while */
+     
+   } /* for ipi */
+
+   if(cur==forms){
+       pfree(forms);
+       return(NULL);
+   }
+   return(forms);
+}
+
+void 
+FreeIspell (IspellDict *Conf) {
+  int i;
+  AFFIX *Affix = (AFFIX *)Conf->Affix;
+
+  for (i = 0; i < Conf->naffixes; i++) {
+    if (Affix[i].compile == 0) {
+      regfree(&(Affix[i].reg));
+    }
+  }
+  for (i = 0; i < Conf->naffixes; i++) {
+   free( Conf->Spell[i].word );
+  }
+  free(Conf->Affix);
+  free(Conf->Spell);
+  memset( (void*)Conf, 0, sizeof(IspellDict) );
+  return;
+}


diff --git a/contrib/tsearch2/ispell/spell.h b/contrib/tsearch2/ispell/spell.h

new file mode 100644 (file)

index 0000000..3034ca6


--- /dev/null
+++ b/contrib/tsearch2/ispell/spell.h
@@ -0,0 +1,51 @@
+#ifndef __SPELL_H__
+#define __SPELL_H__
+
+#include 
+#include 
+
+typedef struct spell_struct {
+        char * word; 
+        char flag[10];
+} SPELL;
+
+typedef struct aff_struct {   
+        char flag;
+        char type;
+        char mask[33];
+        char find[16];
+        char repl[16];
+        regex_t reg;
+        size_t replen;
+        char compile;
+} AFFIX;
+
+typedef struct Tree_struct {
+        int Left[256], Right[256];
+} Tree_struct;
+
+typedef struct {
+   int maffixes;
+   int naffixes;
+   AFFIX * Affix;
+
+   int nspell;
+   int mspell;
+   SPELL   *Spell;
+   Tree_struct SpellTree;
+   Tree_struct PrefixTree;
+   Tree_struct SuffixTree;
+
+} IspellDict;
+
+char ** NormalizeWord(IspellDict * Conf,char *word);
+int ImportAffixes(IspellDict * Conf, const char *filename);
+int ImportDictionary(IspellDict * Conf,const char *filename);
+
+int  AddSpell(IspellDict * Conf,const char * word,const char *flag);
+int  AddAffix(IspellDict * Conf,int flag,const char *mask,const char *find,const char *repl,int type);
+void SortDictionary(IspellDict * Conf);
+void SortAffixes(IspellDict * Conf);
+void FreeIspell (IspellDict *Conf);
+
+#endif


diff --git a/contrib/tsearch2/prs_dcfg.c b/contrib/tsearch2/prs_dcfg.c

new file mode 100644 (file)

index 0000000..e4b0e8b


--- /dev/null
+++ b/contrib/tsearch2/prs_dcfg.c
@@ -0,0 +1,119 @@
+/* 
+ * Simple config parser 
+ * Teodor Sigaev 
+ */
+#include 
+#include 
+#include 
+
+#include "postgres.h"
+
+#include "dict.h"
+#include "common.h"
+
+#define CS_WAITKEY 0
+#define CS_INKEY   1
+#define CS_WAITEQ  2
+#define CS_WAITVALUE   3
+#define CS_INVALUE 4
+#define CS_IN2VALUE    5
+#define CS_WAITDELIM   6
+#define CS_INESC   7
+#define CS_IN2ESC  8
+
+static char *
+nstrdup(char *ptr, int len) {
+   char *res=palloc(len+1), *cptr;
+   memcpy(res,ptr,len);
+   res[len]='\0';
+   cptr = ptr = res;
+   while(*ptr) {
+       if ( *ptr == '\\' ) 
+           ptr++;
+       *cptr=*ptr; ptr++; cptr++;
+   }
+   *cptr='\0';
+
+   return res;
+}
+
+void
+parse_cfgdict(text *in, Map **m) {
+   Map *mptr;
+   char *ptr=VARDATA(in), *begin=NULL;
+   char num=0;
+   int state=CS_WAITKEY;
+
+   while( ptr-VARDATA(in) < VARSIZE(in) - VARHDRSZ ) {
+       if ( *ptr==',' ) num++;
+       ptr++;
+   }
+
+   *m=mptr=(Map*)palloc( sizeof(Map)*(num+2) );
+   memset(mptr, 0, sizeof(Map)*(num+2) );
+   ptr=VARDATA(in);
+   while( ptr-VARDATA(in) < VARSIZE(in) - VARHDRSZ ) {
+       if (state==CS_WAITKEY) {
+           if (isalpha(*ptr)) {
+               begin=ptr;
+               state=CS_INKEY;
+           } else if ( !isspace(*ptr) )
+               elog(ERROR,"Syntax error in position %d near '%c'", ptr-VARDATA(in), *ptr);
+       } else if (state==CS_INKEY) {
+           if ( isspace(*ptr) ) {
+               mptr->key=nstrdup(begin, ptr-begin);
+               state=CS_WAITEQ;
+           } else if ( *ptr=='=' ) {
+               mptr->key=nstrdup(begin, ptr-begin);
+               state=CS_WAITVALUE;
+           } else if ( !isalpha(*ptr) ) 
+               elog(ERROR,"Syntax error in position %d near '%c'", ptr-VARDATA(in), *ptr);
+       } else if ( state==CS_WAITEQ ) {
+           if ( *ptr=='=' )
+               state=CS_WAITVALUE;
+           else if ( !isspace(*ptr) )
+               elog(ERROR,"Syntax error in position %d near '%c'", ptr-VARDATA(in), *ptr);
+       } else if ( state==CS_WAITVALUE ) {
+           if ( *ptr=='"' ) {
+               begin=ptr+1;
+               state=CS_INVALUE;
+           } else if ( !isspace(*ptr) ) {
+               begin=ptr;
+               state=CS_IN2VALUE;
+           }
+       } else if ( state==CS_INVALUE ) {
+           if ( *ptr=='"' ) {
+               mptr->value = nstrdup(begin, ptr-begin);
+               mptr++;
+               state=CS_WAITDELIM;
+           } else if ( *ptr=='\\' )
+               state=CS_INESC;
+       } else if ( state==CS_IN2VALUE ) {
+           if ( isspace(*ptr) || *ptr==',' ) {
+               mptr->value = nstrdup(begin, ptr-begin);
+               mptr++;
+               state=( *ptr==',' ) ? CS_WAITKEY : CS_WAITDELIM;
+           } else if ( *ptr=='\\' )
+               state=CS_INESC;
+       } else if ( state==CS_WAITDELIM ) {
+           if ( *ptr==',' ) 
+               state=CS_WAITKEY; 
+           else if ( !isspace(*ptr) )
+               elog(ERROR,"Syntax error in position %d near '%c'", ptr-VARDATA(in), *ptr);
+       } else if ( state == CS_INESC ) {
+           state=CS_INVALUE;
+       } else if ( state == CS_IN2ESC ) {
+           state=CS_IN2VALUE;
+       } else 
+           elog(ERROR,"Bad parser state: %d at position %d near '%c'", state, ptr-VARDATA(in), *ptr);
+       ptr++;
+   }
+
+   if (state==CS_IN2VALUE) {
+       mptr->value = nstrdup(begin, ptr-begin);
+       mptr++;
+   } else if ( !(state==CS_WAITDELIM || state==CS_WAITKEY) ) 
+       elog(ERROR,"Unexpected end of line");
+}
+
+


diff --git a/contrib/tsearch2/query.c b/contrib/tsearch2/query.c

new file mode 100644 (file)

index 0000000..8e714f2


--- /dev/null
+++ b/contrib/tsearch2/query.c
@@ -0,0 +1,862 @@
+/*
+ * IO definitions for tsquery and mtsquery. This type
+ * are identical, but for parsing mtsquery used parser for text
+ * and also morphology is used.
+ * Internal structure:
+ * query tree, then string with original value.
+ * Query tree with plain view. It's means that in array of nodes
+ * right child is always next and left position = item+item->left
+ * Teodor Sigaev 
+ */
+#include "postgres.h"
+
+#include 
+#include 
+
+#include "access/gist.h"
+#include "access/itup.h"
+#include "access/rtree.h"
+#include "utils/elog.h"
+#include "utils/palloc.h"
+#include "utils/array.h"
+#include "utils/builtins.h"
+#include "storage/bufpage.h"
+
+#include "ts_cfg.h"
+#include "tsvector.h"
+#include "crc32.h"
+#include "query.h"
+#include "rewrite.h"
+#include "common.h"
+
+
+PG_FUNCTION_INFO_V1(tsquery_in);
+Datum      tsquery_in(PG_FUNCTION_ARGS);
+
+PG_FUNCTION_INFO_V1(tsquery_out);
+Datum      tsquery_out(PG_FUNCTION_ARGS);
+
+PG_FUNCTION_INFO_V1(exectsq);
+Datum      exectsq(PG_FUNCTION_ARGS);
+
+PG_FUNCTION_INFO_V1(rexectsq);
+Datum      rexectsq(PG_FUNCTION_ARGS);
+
+PG_FUNCTION_INFO_V1(tsquerytree);
+Datum      tsquerytree(PG_FUNCTION_ARGS);
+
+PG_FUNCTION_INFO_V1(to_tsquery);
+Datum      to_tsquery(PG_FUNCTION_ARGS);
+
+PG_FUNCTION_INFO_V1(to_tsquery_name);
+Datum      to_tsquery_name(PG_FUNCTION_ARGS);
+
+PG_FUNCTION_INFO_V1(to_tsquery_current);
+Datum      to_tsquery_current(PG_FUNCTION_ARGS);
+
+#define END            0
+#define ERR            1
+#define VAL            2
+#define OPR            3
+#define OPEN       4
+#define CLOSE      5
+#define VALTRUE        6           /* for stop words */
+#define VALFALSE   7
+
+/* parser's states */
+#define WAITOPERAND 1
+#define WAITOPERATOR   2
+
+/*
+ * node of query tree, also used
+ * for storing polish notation in parser
+ */
+typedef struct NODE
+{
+   int2        weight;
+   int2        type;
+   int4        val;
+   int2        distance;
+   int2        length;
+   struct NODE *next;
+}  NODE;
+
+typedef struct
+{
+   char       *buf;
+   int4        state;
+   int4        count;
+   /* reverse polish notation in list (for temprorary usage) */
+   NODE       *str;
+   /* number in str */
+   int4        num;
+
+   /* user-friendly operand */
+   int4        lenop;
+   int4        sumlen;
+   char       *op;
+   char       *curop;
+
+   /* state for value's parser */
+   TI_IN_STATE valstate;
+
+   /* tscfg */
+   int cfg_id;
+}  QPRS_STATE;
+
+static char*
+get_weight(char *buf, int2 *weight) {
+   *weight = 0;
+
+   if ( *buf != ':' )
+       return buf;
+
+   buf++;
+   while( *buf ) {
+       switch(tolower(*buf)) {
+           case 'a': *weight |= 1<<3; break; 
+           case 'b': *weight |= 1<<2; break; 
+           case 'c': *weight |= 1<<1; break; 
+           case 'd': *weight |= 1;    break;
+           default: return buf; 
+       }
+       buf++;
+   }
+   
+   return buf;
+}
+
+/*
+ * get token from query string
+ */
+static int4
+gettoken_query(QPRS_STATE * state, int4 *val, int4 *lenval, char **strval, int2 *weight)
+{
+   while (1)
+   {
+       switch (state->state)
+       {
+           case WAITOPERAND:
+               if (*(state->buf) == '!')
+               {
+                   (state->buf)++;
+                   *val = (int4) '!';
+                   return OPR;
+               }
+               else if (*(state->buf) == '(')
+               {
+                   state->count++;
+                   (state->buf)++;
+                   return OPEN;
+               } else if ( *(state->buf) == ':' ) {
+                   elog(ERROR,"Error at start of operand"); 
+               } else if (*(state->buf) != ' ') {
+                   state->valstate.prsbuf = state->buf;
+                   state->state = WAITOPERATOR;
+                   if (gettoken_tsvector(&(state->valstate)))
+                   {
+                       *strval = state->valstate.word;
+                       *lenval = state->valstate.curpos - state->valstate.word;
+                       state->buf = get_weight(state->valstate.prsbuf, weight);
+                       return VAL;
+                   }
+                   else
+                       elog(ERROR, "No operand");
+               }
+               break;
+           case WAITOPERATOR:
+               if (*(state->buf) == '&' || *(state->buf) == '|')
+               {
+                   state->state = WAITOPERAND;
+                   *val = (int4) *(state->buf);
+                   (state->buf)++;
+                   return OPR;
+               }
+               else if (*(state->buf) == ')')
+               {
+                   (state->buf)++;
+                   state->count--;
+                   return (state->count < 0) ? ERR : CLOSE;
+               }
+               else if (*(state->buf) == '\0')
+                   return (state->count) ? ERR : END;
+               else if (*(state->buf) != ' ')
+                   return ERR;
+               break;
+           default:
+               return ERR;
+               break;
+       }
+       (state->buf)++;
+   }
+   return END;
+}
+
+/*
+ * push new one in polish notation reverse view
+ */
+static void
+pushquery(QPRS_STATE * state, int4 type, int4 val, int4 distance, int4 lenval, int2 weight)
+{
+   NODE       *tmp = (NODE *) palloc(sizeof(NODE));
+
+   tmp->weight = weight;
+   tmp->type = type;
+   tmp->val = val;
+   if (distance >= MAXSTRPOS)
+       elog(ERROR, "Value is too big");
+   if (lenval >= MAXSTRLEN)
+       elog(ERROR, "Operand is too long");
+   tmp->distance = distance;
+   tmp->length = lenval;
+   tmp->next = state->str;
+   state->str = tmp;
+   state->num++;
+}
+
+/*
+ * This function is used for tsquery parsing
+ */
+static void
+pushval_asis(QPRS_STATE * state, int type, char *strval, int lenval, int2 weight)
+{
+   if (lenval >= MAXSTRLEN)
+       elog(ERROR, "Word is too long");
+
+   pushquery(state, type, crc32_sz((uint8 *) strval, lenval),
+             state->curop - state->op, lenval, weight);
+
+   while (state->curop - state->op + lenval + 1 >= state->lenop)
+   {
+       int4        tmp = state->curop - state->op;
+
+       state->lenop *= 2;
+       state->op = (char *) repalloc((void *) state->op, state->lenop);
+       state->curop = state->op + tmp;
+   }
+   memcpy((void *) state->curop, (void *) strval, lenval);
+   state->curop += lenval;
+   *(state->curop) = '\0';
+   state->curop++;
+   state->sumlen += lenval + 1;
+   return;
+}
+
+/*
+ * This function is used for morph parsing
+ */
+static void
+pushval_morph(QPRS_STATE * state, int typeval, char *strval, int lenval, int2 weight)
+{
+   int4        count = 0;
+   PRSTEXT         prs;
+
+   prs.lenwords = 32;
+   prs.curwords = 0;
+   prs.pos = 0;
+   prs.words = (WORD *) palloc(sizeof(WORD) * prs.lenwords);
+
+   parsetext_v2(findcfg(state->cfg_id), &prs, strval, lenval);
+
+   for(count=0;count
+       pushval_asis(state, VAL, prs.words[count].word, prs.words[count].len, weight);
+       pfree( prs.words[count].word );
+       if (count)
+           pushquery(state, OPR, (int4) '&', 0, 0, 0 );
+   }   
+   pfree(prs.words);
+
+   /* XXX */
+   if ( prs.curwords==0 ) 
+       pushval_asis(state, VALTRUE, 0, 0, 0);
+}
+
+#define STACKDEPTH 32
+/*
+ * make polish notaion of query
+ */
+static int4
+makepol(QPRS_STATE * state, void (*pushval) (QPRS_STATE *, int, char *, int, int2))
+{
+   int4        val,
+               type;
+   int4        lenval;
+   char       *strval;
+   int4        stack[STACKDEPTH];
+   int4        lenstack = 0;
+   int2        weight;
+
+   while ((type = gettoken_query(state, &val, &lenval, &strval, &weight)) != END)
+   {
+       switch (type)
+       {
+           case VAL:
+               (*pushval) (state, VAL, strval, lenval, weight);
+               while (lenstack && (stack[lenstack - 1] == (int4) '&' ||
+                                   stack[lenstack - 1] == (int4) '!'))
+               {
+                   lenstack--;
+                   pushquery(state, OPR, stack[lenstack], 0, 0, 0);
+               }
+               break;
+           case OPR:
+               if (lenstack && val == (int4) '|')
+                   pushquery(state, OPR, val, 0, 0, 0);
+               else
+               {
+                   if (lenstack == STACKDEPTH)
+                       elog(ERROR, "Stack too short");
+                   stack[lenstack] = val;
+                   lenstack++;
+               }
+               break;
+           case OPEN:
+               if (makepol(state, pushval) == ERR)
+                   return ERR;
+               if (lenstack && (stack[lenstack - 1] == (int4) '&' ||
+                                stack[lenstack - 1] == (int4) '!'))
+               {
+                   lenstack--;
+                   pushquery(state, OPR, stack[lenstack], 0, 0, 0);
+               }
+               break;
+           case CLOSE:
+               while (lenstack)
+               {
+                   lenstack--;
+                   pushquery(state, OPR, stack[lenstack], 0, 0, 0);
+               };
+               return END;
+               break;
+           case ERR:
+           default:
+               elog(ERROR, "Syntax error");
+               return ERR;
+
+       }
+   }
+   while (lenstack)
+   {
+       lenstack--;
+       pushquery(state, OPR, stack[lenstack], 0, 0, 0);
+   };
+   return END;
+}
+
+typedef struct
+{
+   WordEntry  *arrb;
+   WordEntry  *arre;
+   char       *values;
+   char       *operand;
+}  CHKVAL;
+
+/*
+ * compare 2 string values
+ */
+static int4
+ValCompare(CHKVAL * chkval, WordEntry * ptr, ITEM * item)
+{
+   if (ptr->len == item->length)
+       return strncmp(
+                      &(chkval->values[ptr->pos]),
+                      &(chkval->operand[item->distance]),
+                      item->length);
+
+   return (ptr->len > item->length) ? 1 : -1;
+}
+
+/*
+ * check weight info
+ */
+static bool
+checkclass_str(CHKVAL * chkval, WordEntry * val, ITEM * item) {
+   WordEntryPos *ptr = (WordEntryPos*) (chkval->values+val->pos+SHORTALIGN(val->len)+sizeof(uint16));
+   uint16  len = *( (uint16*) (chkval->values+val->pos+SHORTALIGN(val->len)) );
+   while (len--) {
+       if ( item->weight & ( 1<weight ) )
+           return true;
+       ptr++;
+   }
+   return false; 
+}
+
+/*
+ * is there value 'val' in array or not ?
+ */
+static bool
+checkcondition_str(void *checkval, ITEM * val)
+{
+   WordEntry  *StopLow = ((CHKVAL *) checkval)->arrb;
+   WordEntry  *StopHigh = ((CHKVAL *) checkval)->arre;
+   WordEntry  *StopMiddle;
+   int         difference;
+
+   /* Loop invariant: StopLow <= val < StopHigh */
+
+   while (StopLow < StopHigh)
+   {
+       StopMiddle = StopLow + (StopHigh - StopLow) / 2;
+       difference = ValCompare((CHKVAL *) checkval, StopMiddle, val);
+       if (difference == 0)
+           return ( val->weight && StopMiddle->haspos ) ? 
+               checkclass_str((CHKVAL *) checkval,StopMiddle, val) : true;
+       else if (difference < 0)
+           StopLow = StopMiddle + 1;
+       else
+           StopHigh = StopMiddle;
+   }
+
+   return (false);
+}
+
+/*
+ * check for boolean condition
+ */
+bool
+TS_execute(ITEM * curitem, void *checkval, bool calcnot, bool (*chkcond) (void *checkval, ITEM * val))
+{
+   if (curitem->type == VAL)
+       return (*chkcond) (checkval, curitem);
+   else if (curitem->val == (int4) '!')
+   {
+       return (calcnot) ?
+           ((TS_execute(curitem + 1, checkval, calcnot, chkcond)) ? false : true)
+           : true;
+   }
+   else if (curitem->val == (int4) '&')
+   {
+       if (TS_execute(curitem + curitem->left, checkval, calcnot, chkcond))
+           return TS_execute(curitem + 1, checkval, calcnot, chkcond);
+       else
+           return false;
+   }
+   else
+   {                           /* |-operator */
+       if (TS_execute(curitem + curitem->left, checkval, calcnot, chkcond))
+           return true;
+       else
+           return TS_execute(curitem + 1, checkval, calcnot, chkcond);
+   }
+   return false;
+}
+
+/*
+ * boolean operations
+ */
+Datum
+rexectsq(PG_FUNCTION_ARGS)
+{
+   return DirectFunctionCall2(
+                              exectsq,
+                              PG_GETARG_DATUM(1),
+                              PG_GETARG_DATUM(0)
+       );
+}
+
+Datum
+exectsq(PG_FUNCTION_ARGS)
+{
+   tsvector       *val = (tsvector *) DatumGetPointer(PG_DETOAST_DATUM(PG_GETARG_DATUM(0)));
+   QUERYTYPE  *query = (QUERYTYPE *) DatumGetPointer(PG_DETOAST_DATUM(PG_GETARG_DATUM(1)));
+   CHKVAL      chkval;
+   bool        result;
+
+   if (!val->size || !query->size)
+   {
+       PG_FREE_IF_COPY(val, 0);
+       PG_FREE_IF_COPY(query, 1);
+       PG_RETURN_BOOL(false);
+   }
+
+   chkval.arrb = ARRPTR(val);
+   chkval.arre = chkval.arrb + val->size;
+   chkval.values = STRPTR(val);
+   chkval.operand = GETOPERAND(query);
+   result = TS_execute(
+                    GETQUERY(query),
+                    &chkval,
+                    true,
+                    checkcondition_str
+       );
+
+   PG_FREE_IF_COPY(val, 0);
+   PG_FREE_IF_COPY(query, 1);
+   PG_RETURN_BOOL(result);
+}
+
+/*
+ * find left operand in polish notation view
+ */
+static void
+findoprnd(ITEM * ptr, int4 *pos)
+{
+#ifdef BS_DEBUG
+   elog(DEBUG3, (ptr[*pos].type == OPR) ?
+        "%d  %c" : "%d  %d ", *pos, ptr[*pos].val);
+#endif
+   if (ptr[*pos].type == VAL || ptr[*pos].type == VALTRUE)
+   {
+       ptr[*pos].left = 0;
+       (*pos)++;
+   }
+   else if (ptr[*pos].val == (int4) '!')
+   {
+       ptr[*pos].left = 1;
+       (*pos)++;
+       findoprnd(ptr, pos);
+   }
+   else
+   {
+       ITEM       *curitem = &ptr[*pos];
+       int4        tmp = *pos;
+
+       (*pos)++;
+       findoprnd(ptr, pos);
+       curitem->left = *pos - tmp;
+       findoprnd(ptr, pos);
+   }
+}
+
+
+/*
+ * input
+ */
+static QUERYTYPE *
+queryin(char *buf, void (*pushval) (QPRS_STATE *, int, char *, int, int2), int cfg_id)
+{
+   QPRS_STATE  state;
+   int4        i;
+   QUERYTYPE  *query;
+   int4        commonlen;
+   ITEM       *ptr;
+   NODE       *tmp;
+   int4        pos = 0;
+
+#ifdef BS_DEBUG
+   char        pbuf[16384],
+              *cur;
+#endif
+
+   /* init state */
+   state.buf = buf;
+   state.state = WAITOPERAND;
+   state.count = 0;
+   state.num = 0;
+   state.str = NULL;
+   state.cfg_id=cfg_id;
+
+   /* init value parser's state */
+   state.valstate.oprisdelim = true;
+   state.valstate.len = 32;
+   state.valstate.word = (char *) palloc(state.valstate.len);
+
+   /* init list of operand */
+   state.sumlen = 0;
+   state.lenop = 64;
+   state.curop = state.op = (char *) palloc(state.lenop);
+   *(state.curop) = '\0';
+
+   /* parse query & make polish notation (postfix, but in reverse order) */
+   makepol(&state, pushval);
+   pfree(state.valstate.word);
+   if (!state.num)
+       elog(ERROR, "Empty query");
+
+   /* make finish struct */
+   commonlen = COMPUTESIZE(state.num, state.sumlen);
+   query = (QUERYTYPE *) palloc(commonlen);
+   query->len = commonlen;
+   query->size = state.num;
+   ptr = GETQUERY(query);
+
+   /* set item in polish notation */
+   for (i = 0; i < state.num; i++)
+   {
+       ptr[i].weight = state.str->weight;
+       ptr[i].type = state.str->type;
+       ptr[i].val = state.str->val;
+       ptr[i].distance = state.str->distance;
+       ptr[i].length = state.str->length;
+       tmp = state.str->next;
+       pfree(state.str);
+       state.str = tmp;
+   }
+
+   /* set user friendly-operand view */
+   memcpy((void *) GETOPERAND(query), (void *) state.op, state.sumlen);
+   pfree(state.op);
+
+   /* set left operand's position for every operator */
+   pos = 0;
+   findoprnd(ptr, &pos);
+
+#ifdef BS_DEBUG
+   cur = pbuf;
+   *cur = '\0';
+   for (i = 0; i < query->size; i++)
+   {
+       if (ptr[i].type == OPR)
+           sprintf(cur, "%c(%d) ", ptr[i].val, ptr[i].left);
+       else
+           sprintf(cur, "%d(%s) ", ptr[i].val, GETOPERAND(query) + ptr[i].distance);
+       cur = strchr(cur, '\0');
+   }
+   elog(DEBUG3, "POR: %s", pbuf);
+#endif
+
+   return query;
+}
+
+/*
+ * in without morphology
+ */
+Datum
+tsquery_in(PG_FUNCTION_ARGS)
+{
+   PG_RETURN_POINTER(queryin((char *) PG_GETARG_POINTER(0), pushval_asis, 0));
+}
+
+/*
+ * out function
+ */
+typedef struct
+{
+   ITEM       *curpol;
+   char       *buf;
+   char       *cur;
+   char       *op;
+   int4        buflen;
+}  INFIX;
+
+#define RESIZEBUF(inf,addsize) \
+while( ( inf->cur - inf->buf ) + addsize + 1 >= inf->buflen ) \
+{ \
+   int4 len = inf->cur - inf->buf; \
+   inf->buflen *= 2; \
+   inf->buf = (char*) repalloc( (void*)inf->buf, inf->buflen ); \
+   inf->cur = inf->buf + len; \
+}
+
+/*
+ * recursive walk on tree and print it in
+ * infix (human-readable) view
+ */
+static void
+infix(INFIX * in, bool first)
+{
+   if (in->curpol->type == VAL)
+   {
+       char       *op = in->op + in->curpol->distance;
+
+       RESIZEBUF(in, in->curpol->length * 2 + 2 + 5);
+       *(in->cur) = '\'';
+       in->cur++;
+       while (*op)
+       {
+           if (*op == '\'')
+           {
+               *(in->cur) = '\\';
+               in->cur++;
+           }
+           *(in->cur) = *op;
+           op++;
+           in->cur++;
+       }
+       *(in->cur) = '\'';
+       in->cur++;
+       if ( in->curpol->weight ) {
+           *(in->cur) = ':'; in->cur++;
+           if ( in->curpol->weight & (1<<3) ) { *(in->cur) = 'A'; in->cur++; }
+           if ( in->curpol->weight & (1<<2) ) { *(in->cur) = 'B'; in->cur++; }
+           if ( in->curpol->weight & (1<<1) ) { *(in->cur) = 'C'; in->cur++; }
+           if ( in->curpol->weight & 1 )      { *(in->cur) = 'D'; in->cur++; }
+       }
+       *(in->cur) = '\0';
+       in->curpol++;
+   }
+   else if (in->curpol->val == (int4) '!')
+   {
+       bool        isopr = false;
+
+       RESIZEBUF(in, 1);
+       *(in->cur) = '!';
+       in->cur++;
+       *(in->cur) = '\0';
+       in->curpol++;
+       if (in->curpol->type == OPR)
+       {
+           isopr = true;
+           RESIZEBUF(in, 2);
+           sprintf(in->cur, "( ");
+           in->cur = strchr(in->cur, '\0');
+       }
+       infix(in, isopr);
+       if (isopr)
+       {
+           RESIZEBUF(in, 2);
+           sprintf(in->cur, " )");
+           in->cur = strchr(in->cur, '\0');
+       }
+   }
+   else
+   {
+       int4        op = in->curpol->val;
+       INFIX       nrm;
+
+       in->curpol++;
+       if (op == (int4) '|' && !first)
+       {
+           RESIZEBUF(in, 2);
+           sprintf(in->cur, "( ");
+           in->cur = strchr(in->cur, '\0');
+       }
+
+       nrm.curpol = in->curpol;
+       nrm.op = in->op;
+       nrm.buflen = 16;
+       nrm.cur = nrm.buf = (char *) palloc(sizeof(char) * nrm.buflen);
+
+       /* get right operand */
+       infix(&nrm, false);
+
+       /* get & print left operand */
+       in->curpol = nrm.curpol;
+       infix(in, false);
+
+       /* print operator & right operand */
+       RESIZEBUF(in, 3 + (nrm.cur - nrm.buf));
+       sprintf(in->cur, " %c %s", op, nrm.buf);
+       in->cur = strchr(in->cur, '\0');
+       pfree(nrm.buf);
+
+       if (op == (int4) '|' && !first)
+       {
+           RESIZEBUF(in, 2);
+           sprintf(in->cur, " )");
+           in->cur = strchr(in->cur, '\0');
+       }
+   }
+}
+
+
+Datum
+tsquery_out(PG_FUNCTION_ARGS)
+{
+   QUERYTYPE  *query = (QUERYTYPE *) DatumGetPointer(PG_DETOAST_DATUM(PG_GETARG_DATUM(0)));
+   INFIX       nrm;
+
+   if (query->size == 0)
+   {
+       char       *b = palloc(1);
+
+       *b = '\0';
+       PG_RETURN_POINTER(b);
+   }
+   nrm.curpol = GETQUERY(query);
+   nrm.buflen = 32;
+   nrm.cur = nrm.buf = (char *) palloc(sizeof(char) * nrm.buflen);
+   *(nrm.cur) = '\0';
+   nrm.op = GETOPERAND(query);
+   infix(&nrm, true);
+
+   PG_FREE_IF_COPY(query, 0);
+   PG_RETURN_POINTER(nrm.buf);
+}
+
+/*
+ * debug function, used only for view query
+ * which will be executed in non-leaf pages in index
+ */
+Datum
+tsquerytree(PG_FUNCTION_ARGS)
+{
+   QUERYTYPE  *query = (QUERYTYPE *) DatumGetPointer(PG_DETOAST_DATUM(PG_GETARG_DATUM(0)));
+   INFIX       nrm;
+   text       *res;
+   ITEM       *q;
+   int4        len;
+
+
+   if (query->size == 0)
+   {
+       res = (text *) palloc(VARHDRSZ);
+       VARATT_SIZEP(res) = VARHDRSZ;
+       PG_RETURN_POINTER(res);
+   }
+
+   q = clean_NOT_v2(GETQUERY(query), &len);
+
+   if (!q)
+   {
+       res = (text *) palloc(1 + VARHDRSZ);
+       VARATT_SIZEP(res) = 1 + VARHDRSZ;
+       *((char *) VARDATA(res)) = 'T';
+   }
+   else
+   {
+       nrm.curpol = q;
+       nrm.buflen = 32;
+       nrm.cur = nrm.buf = (char *) palloc(sizeof(char) * nrm.buflen);
+       *(nrm.cur) = '\0';
+       nrm.op = GETOPERAND(query);
+       infix(&nrm, true);
+
+       res = (text *) palloc(nrm.cur - nrm.buf + VARHDRSZ);
+       VARATT_SIZEP(res) = nrm.cur - nrm.buf + VARHDRSZ;
+       strncpy(VARDATA(res), nrm.buf, nrm.cur - nrm.buf);
+       pfree(q);
+   }
+
+   PG_FREE_IF_COPY(query, 0);
+
+   PG_RETURN_POINTER(res);
+}
+
+Datum
+to_tsquery(PG_FUNCTION_ARGS) {
+   text    *in = PG_GETARG_TEXT_P(1);
+   char *str;
+   QUERYTYPE  *query;
+   ITEM       *res;
+   int4        len;
+
+   str=text2char(in);
+   PG_FREE_IF_COPY(in,1);
+
+   query = queryin(str, pushval_morph, PG_GETARG_INT32(0));
+   res = clean_fakeval_v2(GETQUERY(query), &len);
+   if (!res)
+   {
+       query->len = HDRSIZEQT;
+       query->size = 0;
+       PG_RETURN_POINTER(query);
+   }
+   memcpy((void *) GETQUERY(query), (void *) res, len * sizeof(ITEM));
+   pfree(res);
+   PG_RETURN_POINTER(query);
+}
+
+Datum
+to_tsquery_name(PG_FUNCTION_ARGS) {
+   text *name=PG_GETARG_TEXT_P(0);
+   Datum res= DirectFunctionCall2(
+       to_tsquery,
+       Int32GetDatum( name2id_cfg(name) ),
+       PG_GETARG_DATUM(1)
+   );
+   
+   PG_FREE_IF_COPY(name,1);
+   PG_RETURN_DATUM(res);
+}
+
+Datum
+to_tsquery_current(PG_FUNCTION_ARGS) {
+   PG_RETURN_DATUM( DirectFunctionCall2(
+       to_tsquery,
+       Int32GetDatum( get_currcfg() ),
+       PG_GETARG_DATUM(0)
+   ));
+}
+
+


diff --git a/contrib/tsearch2/query.h b/contrib/tsearch2/query.h

new file mode 100644 (file)

index 0000000..c0715a2


--- /dev/null
+++ b/contrib/tsearch2/query.h
@@ -0,0 +1,55 @@
+#ifndef __QUERY_H__
+#define __QUERY_H__
+/*
+#define BS_DEBUG
+*/
+
+
+/*
+ * item in polish notation with back link
+ * to left operand
+ */
+typedef struct ITEM
+{
+   int8        type;
+   int8        weight;
+   int2        left;
+   int4        val;
+   /* user-friendly value, must correlate with WordEntry */
+   uint32  
+       unused:1,
+       length:11,
+       distance:20;
+}  ITEM;
+
+/*
+ *Storage:
+ * (len)(size)(array of ITEM)(array of operand in user-friendly form)
+ */
+typedef struct
+{
+   int4        len;
+   int4        size;
+   char        data[1];
+}  QUERYTYPE;
+
+#define HDRSIZEQT  ( 2*sizeof(int4) )
+#define COMPUTESIZE(size,lenofoperand) ( HDRSIZEQT + size * sizeof(ITEM) + lenofoperand )
+#define GETQUERY(x)  (ITEM*)( (char*)(x)+HDRSIZEQT )
+#define GETOPERAND(x)  ( (char*)GETQUERY(x) + ((QUERYTYPE*)x)->size * sizeof(ITEM) )
+
+#define ISOPERATOR(x) ( (x)=='!' || (x)=='&' || (x)=='|' || (x)=='(' || (x)==')' )
+
+#define END                0
+#define ERR                1
+#define VAL                2
+#define OPR                3
+#define OPEN           4
+#define CLOSE          5
+#define VALTRUE            6       /* for stop words */
+#define VALFALSE       7
+
+bool TS_execute(ITEM * curitem, void *checkval,
+       bool calcnot, bool (*chkcond) (void *checkval, ITEM * val));
+
+#endif


diff --git a/contrib/tsearch2/rank.c b/contrib/tsearch2/rank.c

new file mode 100644 (file)

index 0000000..b73f400


--- /dev/null
+++ b/contrib/tsearch2/rank.c
@@ -0,0 +1,591 @@
+/*
+ * Relevation
+ * Teodor Sigaev 
+ */
+#include "postgres.h"
+#include 
+
+#include "access/gist.h"
+#include "access/itup.h"
+#include "utils/elog.h"
+#include "utils/palloc.h"
+#include "utils/builtins.h"
+#include "fmgr.h"
+#include "funcapi.h"
+#include "storage/bufpage.h"
+#include "executor/spi.h"
+#include "commands/trigger.h"
+#include "nodes/pg_list.h"
+#include "catalog/namespace.h"
+
+#include "utils/array.h"
+
+#include "tsvector.h"
+#include "query.h"
+#include "common.h"
+
+PG_FUNCTION_INFO_V1(rank);
+Datum      rank(PG_FUNCTION_ARGS);
+
+PG_FUNCTION_INFO_V1(rank_def);
+Datum      rank_def(PG_FUNCTION_ARGS);
+
+PG_FUNCTION_INFO_V1(rank_cd);
+Datum      rank_cd(PG_FUNCTION_ARGS);
+
+PG_FUNCTION_INFO_V1(rank_cd_def);
+Datum      rank_cd_def(PG_FUNCTION_ARGS);
+
+PG_FUNCTION_INFO_V1(get_covers);
+Datum      get_covers(PG_FUNCTION_ARGS);
+
+static float weights[]={0.1, 0.2, 0.4, 1.0};
+
+#define wpos(wep)  ( w[ ((WordEntryPos*)(wep))->weight ] )
+
+#define DEF_NORM_METHOD    0
+
+/*
+ * Returns a weight of a word collocation
+ */
+static float4 word_distance ( int4 w ) {
+   if ( w>100 )
+   return 1e-30;
+
+   return 1.0/(1.005+0.05*exp( ((float4)w)/1.5-2) );
+}
+
+static int
+cnt_length( tsvector *t ) {
+   WordEntry   *ptr=ARRPTR(t), *end=(WordEntry*)STRPTR(t);
+   int len = 0, clen;
+
+   while(ptr < end) {
+       if ( (clen=POSDATALEN(t, ptr)) == 0 )
+           len += 1;
+       else
+           len += clen;
+       ptr++;
+   }
+
+   return len;
+}
+
+static int4
+WordECompareITEM(char *eval, char *qval, WordEntry * ptr, ITEM * item) {
+        if (ptr->len == item->length)
+                return strncmp(
+                                           eval + ptr->pos,
+                                           qval + item->distance,
+                                           item->length);
+
+        return (ptr->len > item->length) ? 1 : -1;
+}
+
+static WordEntry*
+find_wordentry(tsvector *t, QUERYTYPE *q, ITEM *item) {
+        WordEntry  *StopLow = ARRPTR(t);
+        WordEntry  *StopHigh = (WordEntry*)STRPTR(t);
+        WordEntry  *StopMiddle;
+        int                     difference;
+
+        /* Loop invariant: StopLow <= item < StopHigh */
+
+        while (StopLow < StopHigh)
+        {
+                StopMiddle = StopLow + (StopHigh - StopLow) / 2;
+                difference = WordECompareITEM(STRPTR(t), GETOPERAND(q), StopMiddle, item);
+                if (difference == 0)
+                        return StopMiddle;
+                else if (difference < 0)
+                        StopLow = StopMiddle + 1;
+                else
+                        StopHigh = StopMiddle;
+        }
+
+        return NULL;
+}
+
+static WordEntryPos    POSNULL[]={
+   {0,0},
+   {0,MAXENTRYPOS-1}
+};
+
+static float
+calc_rank_and(float *w, tsvector *t, QUERYTYPE *q) {
+   uint16 **pos=(uint16**)palloc(sizeof(uint16*) * q->size);
+   int i,k,l,p;
+   WordEntry *entry;
+   WordEntryPos    *post,*ct;
+   int4    dimt,lenct,dist;
+   float res=-1.0;
+   ITEM    *item=GETQUERY(q);
+
+   memset(pos,0,sizeof(uint16**) * q->size);
+   *(uint16*)POSNULL = lengthof(POSNULL)-1;
+
+   for(i=0; isize; i++) {
+       
+       if ( item[i].type != VAL )
+           continue;
+
+       entry=find_wordentry(t,q,&(item[i]));
+       if ( !entry )
+           continue;
+
+       if ( entry->haspos )
+           pos[i] = (uint16*)_POSDATAPTR(t,entry);
+       else
+           pos[i] = (uint16*)POSNULL;
+
+
+       dimt = *(uint16*)(pos[i]);
+       post = (WordEntryPos*)(pos[i]+1);
+       for( k=0; k
+           if ( !pos[k] ) continue;
+           lenct = *(uint16*)(pos[k]);
+           ct = (WordEntryPos*)(pos[k]+1);
+           for(l=0; l
+               for(p=0; p
+                   dist = abs( post[l].pos - ct[p].pos );
+                   if ( dist || (dist==0 && (pos[i]==(uint16*)POSNULL || pos[k]==(uint16*)POSNULL) ) ) {
+                       float curw; 
+                       if ( !dist ) dist=MAXENTRYPOS;  
+                       curw= sqrt( wpos(&(post[l])) * wpos( &(ct[p]) ) * word_distance(dist) );
+                       res = ( res < 0 ) ? curw : 1.0 - ( 1.0 - res ) * ( 1.0 - curw );
+                   }
+               }
+           }
+       }
+   }
+   pfree(pos);
+   return res; 
+}
+
+static float
+calc_rank_or(float *w, tsvector *t, QUERYTYPE *q) {
+   WordEntry *entry;
+   WordEntryPos    *post;
+   int4    dimt,j,i;
+   float res=-1.0;
+   ITEM    *item=GETQUERY(q);
+
+   *(uint16*)POSNULL = lengthof(POSNULL)-1;
+
+   for(i=0; isize; i++) {
+       if ( item[i].type != VAL )
+           continue;
+
+       entry=find_wordentry(t,q,&(item[i]));
+       if ( !entry )
+           continue;
+
+       if ( entry->haspos ) {
+           dimt = POSDATALEN(t,entry);
+           post = POSDATAPTR(t,entry);
+       } else {
+           dimt = *(uint16*)POSNULL;
+           post = POSNULL+1;
+       }
+
+       for(j=0;j
+           if ( res < 0 )
+               res = wpos( &(post[j]) );
+           else
+               res = 1.0 - ( 1.0-res ) * ( 1.0-wpos( &(post[j]) ) );
+       }
+   }
+   return res;
+}
+
+static float
+calc_rank(float *w, tsvector *t, QUERYTYPE *q, int4 method) {
+   ITEM *item = GETQUERY(q);
+   float res=0.0;
+
+   if (!t->size || !q->size)
+       return 0.0;
+
+   res = ( item->type != VAL && item->val == (int4) '&' ) ?
+       calc_rank_and(w,t,q) : calc_rank_or(w,t,q);
+
+   if ( res < 0 )
+       res = 1e-20;
+
+        switch(method) {
+       case 0: break;
+       case 1: res /= log((float)cnt_length(t)); break;
+       case 2: res /= (float)cnt_length(t); break;
+       default:
+       elog(ERROR,"Unknown normalization method: %d",method);
+        }
+
+   return res;
+}
+
+Datum
+rank(PG_FUNCTION_ARGS) {
+   ArrayType  *win = (ArrayType *) PG_DETOAST_DATUM(PG_GETARG_DATUM(0));
+   tsvector       *txt = (tsvector *) PG_DETOAST_DATUM(PG_GETARG_DATUM(1));
+   QUERYTYPE  *query = (QUERYTYPE *) PG_DETOAST_DATUM(PG_GETARG_DATUM(2));
+   int method=DEF_NORM_METHOD;
+   float res=0.0;
+   float ws[ lengthof(weights) ];
+   int i;
+
+   if ( ARR_NDIM(win) != 1 ) 
+       elog(ERROR,"Array of weight is not one dimentional");
+   if ( ARRNELEMS(win) < lengthof(weights) )
+        elog(ERROR,"Array of weight is too short");
+
+   for(i=0;i
+       ws[ i ] = ( ((float4*)ARR_DATA_PTR(win))[i] >= 0 ) ? ((float4*)ARR_DATA_PTR(win))[i] : weights[i];
+       if ( ws[ i ] > 1.0 ) 
+           elog(ERROR,"Weight out of range");
+   } 
+
+   if ( PG_NARGS() == 4 )
+       method=PG_GETARG_INT32(3);
+
+   res=calc_rank(ws, txt, query, method); 
+       
+   PG_FREE_IF_COPY(win, 0);
+   PG_FREE_IF_COPY(txt, 1);
+   PG_FREE_IF_COPY(query, 2);
+   PG_RETURN_FLOAT4(res);
+}
+
+Datum
+rank_def(PG_FUNCTION_ARGS) {
+   tsvector       *txt = (tsvector *) PG_DETOAST_DATUM(PG_GETARG_DATUM(0));
+   QUERYTYPE  *query = (QUERYTYPE *) PG_DETOAST_DATUM(PG_GETARG_DATUM(1));
+   float res=0.0;
+   int method=DEF_NORM_METHOD;
+
+   if ( PG_NARGS() == 3 )
+       method=PG_GETARG_INT32(2);
+
+   res=calc_rank(weights, txt, query, method); 
+       
+   PG_FREE_IF_COPY(txt, 0);
+   PG_FREE_IF_COPY(query, 1);
+   PG_RETURN_FLOAT4(res);
+}
+
+
+typedef struct {
+   ITEM    *item;
+   int32   pos;
+} DocRepresentation;
+
+static int
+compareDocR(const void *a, const void *b) {
+   if ( ((DocRepresentation *) a)->pos == ((DocRepresentation *) b)->pos )
+       return 1;
+   return ( ((DocRepresentation *) a)->pos > ((DocRepresentation *) b)->pos ) ? 1 : -1;
+}
+
+
+typedef struct {
+   DocRepresentation *doc;
+   int len;
+}  ChkDocR;
+
+static bool
+checkcondition_DR(void *checkval, ITEM *val) {
+   DocRepresentation *ptr = ((ChkDocR*)checkval)->doc;
+
+   while( ptr - ((ChkDocR*)checkval)->doc < ((ChkDocR*)checkval)->len ) {
+       if ( val == ptr->item )
+           return true;
+       ptr++;
+   }   
+
+   return false;
+}
+
+
+static bool
+Cover(DocRepresentation *doc, int len, QUERYTYPE *query, int *pos, int *p, int *q) {
+   int i;
+   DocRepresentation   *ptr,*f=(DocRepresentation*)0xffffffff;
+   ITEM    *item=GETQUERY(query);
+   int lastpos=*pos;
+   int oldq=*q;
+
+   *p=0x7fffffff;
+   *q=0;
+
+   for(i=0; isize; i++) {
+       if ( item->type != VAL ) {
+           item++;
+           continue;
+       }
+       ptr = doc + *pos;
+
+       while(ptr-doc
+           if ( ptr->item == item ) {
+               if ( ptr->pos > *q ) {
+                   *q = ptr->pos;
+                   lastpos= ptr - doc;
+               } 
+               break;
+           } 
+           ptr++;
+       }
+
+       item++;
+   }
+
+   if (*q==0 )
+       return false;
+
+   if (*q==oldq) { /* already check this pos */
+       (*pos)++;
+       return Cover(doc, len, query, pos,p,q);
+   } 
+
+   item=GETQUERY(query);
+   for(i=0; isize; i++) {
+       if ( item->type != VAL ) {
+           item++;
+           continue;
+       }
+       ptr = doc + lastpos;
+
+       while(ptr>=doc+*pos) {
+           if ( ptr->item == item ) {
+               if ( ptr->pos < *p ) {
+                   *p = ptr->pos;
+                   f=ptr;
+               }
+               break;
+           }
+           ptr--;
+       }
+       item++;
+   }
+ 
+   if ( *p<=*q ) {
+       ChkDocR ch = { f, (doc + lastpos)-f+1 };
+       *pos = f-doc+1;
+       if ( TS_execute(GETQUERY(query), &ch, false, checkcondition_DR) ) { 
+ /*elog(NOTICE,"OP:%d NP:%d P:%d Q:%d", *pos, lastpos, *p, *q);*/ 
+           return true;
+       } else
+           return Cover(doc, len, query, pos,p,q); 
+   }
+ 
+   return false;
+}
+
+static DocRepresentation*
+get_docrep(tsvector     *txt, QUERYTYPE  *query, int *doclen) {
+   ITEM    *item=GETQUERY(query);
+   WordEntry *entry;
+   WordEntryPos    *post;
+   int4    dimt,j,i;
+   int len=query->size*4,cur=0;
+   DocRepresentation *doc;
+
+   *(uint16*)POSNULL = lengthof(POSNULL)-1;
+   doc = (DocRepresentation*)palloc(sizeof(DocRepresentation)*len);
+   for(i=0; isize; i++) {
+       if ( item[i].type != VAL )
+           continue;
+
+       entry=find_wordentry(txt,query,&(item[i]));
+       if ( !entry )
+           continue;
+
+       if ( entry->haspos ) {
+           dimt = POSDATALEN(txt,entry);
+           post = POSDATAPTR(txt,entry);
+       } else {
+           dimt = *(uint16*)POSNULL;
+           post = POSNULL+1;
+       }
+
+       while( cur+dimt >= len ) {
+           len*=2;
+           doc = (DocRepresentation*)repalloc(doc,sizeof(DocRepresentation)*len);
+       }
+
+       for(j=0;j
+           doc[cur].item=&(item[i]);
+           doc[cur].pos=post[j].pos;
+           cur++;
+       }
+   }
+
+   *doclen=cur;
+   
+   if ( cur>0 ) {
+       if ( cur>1 ) 
+           qsort((void *) doc, cur, sizeof(DocRepresentation), compareDocR);
+       return doc;
+   }
+   
+   pfree(doc);
+   return NULL;
+}
+
+
+Datum
+rank_cd(PG_FUNCTION_ARGS) {
+   int K = PG_GETARG_INT32(0);
+   tsvector       *txt = (tsvector *) PG_DETOAST_DATUM(PG_GETARG_DATUM(1));
+   QUERYTYPE  *query = (QUERYTYPE *) PG_DETOAST_DATUM(PG_GETARG_DATUM(2));
+   int method=DEF_NORM_METHOD;
+   DocRepresentation   *doc;
+   float   res=0.0;
+   int p=0,q=0,len,cur;
+
+   doc = get_docrep(txt, query, &len);
+   if ( !doc ) {
+       PG_FREE_IF_COPY(txt, 1);
+       PG_FREE_IF_COPY(query, 2);
+       PG_RETURN_FLOAT4(0.0);
+   }
+
+   cur=0;
+   if (K<=0)
+       K=4;    
+   while( Cover(doc, len, query, &cur, &p, &q) ) 
+       res += ( q-p+1 > K ) ? ((float)K)/((float)(q-p+1)) : 1.0;
+
+   if ( PG_NARGS() == 4 )
+       method=PG_GETARG_INT32(3);
+
+        switch(method) {
+       case 0: break;
+       case 1: res /= log((float)cnt_length(txt)); break;
+       case 2: res /= (float)cnt_length(txt); break;
+       default:
+       elog(ERROR,"Unknown normalization method: %d",method);
+        }
+
+   pfree(doc);
+   PG_FREE_IF_COPY(txt, 1);
+   PG_FREE_IF_COPY(query, 2);
+
+   PG_RETURN_FLOAT4(res);
+}
+
+
+Datum
+rank_cd_def(PG_FUNCTION_ARGS) {
+   PG_RETURN_DATUM( DirectFunctionCall4(   
+       rank_cd,
+       Int32GetDatum(-1),
+       PG_GETARG_DATUM(0),
+       PG_GETARG_DATUM(1),
+       ( PG_NARGS() == 3 ) ? PG_GETARG_DATUM(2) : Int32GetDatum(DEF_NORM_METHOD)
+   )); 
+}
+
+/**************debug*************/
+
+typedef struct {
+   char    *w;
+   int2    len;
+   int2    pos;
+   int2    start;
+   int2    finish;
+} DocWord;
+
+static int
+compareDocWord(const void *a, const void *b) {
+   if ( ((DocWord *) a)->pos == ((DocWord *) b)->pos )
+       return 1;
+   return ( ((DocWord *) a)->pos > ((DocWord *) b)->pos ) ? 1 : -1;
+}
+
+
+Datum 
+get_covers(PG_FUNCTION_ARGS) {
+   tsvector     *txt = (tsvector *) PG_DETOAST_DATUM(PG_GETARG_DATUM(0));
+   QUERYTYPE  *query = (QUERYTYPE *) PG_DETOAST_DATUM(PG_GETARG_DATUM(1));
+   WordEntry       *pptr=ARRPTR(txt);
+   int i,dlen=0,j,cur=0,len=0,rlen;
+   DocWord *dw,*dwptr;
+   text    *out;
+   char *cptr;
+   DocRepresentation *doc;
+   int pos=0,p,q,olddwpos=0;
+   int ncover=1;
+
+   doc = get_docrep(txt, query, &rlen);
+
+   if ( !doc ) {
+       out=palloc(VARHDRSZ);
+       VARATT_SIZEP(out) = VARHDRSZ;
+       PG_FREE_IF_COPY(txt,0);
+       PG_FREE_IF_COPY(query,1);
+       PG_RETURN_POINTER(out);
+   }
+
+   for(i=0;isize;i++) {
+       if (!pptr[i].haspos)
+           elog(ERROR,"No pos info");
+        dlen += POSDATALEN(txt,&(pptr[i]));
+   }
+
+   dwptr=dw=palloc(sizeof(DocWord)*dlen);
+   memset(dw,0,sizeof(DocWord)*dlen);
+
+   for(i=0;isize;i++) {
+       WordEntryPos    *posdata = POSDATAPTR(txt,&(pptr[i]));
+       for(j=0;j
+           dw[cur].w=STRPTR(txt)+pptr[i].pos;  
+           dw[cur].len=pptr[i].len;    
+           dw[cur].pos=posdata[j].pos;
+           cur++;
+       }
+       len+=(pptr[i].len + 1) * (int)POSDATALEN(txt,&(pptr[i]));
+   }
+   qsort((void *) dw, dlen, sizeof(DocWord), compareDocWord);
+
+   while( Cover(doc, rlen, query, &pos, &p, &q) ) {
+       dwptr=dw+olddwpos;
+       while(dwptr->pos < p && dwptr-dw
+           dwptr++;
+       olddwpos=dwptr-dw;
+       dwptr->start=ncover;
+       while(dwptr->pos < q+1 && dwptr-dw
+           dwptr++;
+       (dwptr-1)->finish=ncover;
+       len+= 4 /* {}+two spaces */ + 2*16 /*numbers*/;
+       ncover++; 
+   } 
+   
+   out=palloc(VARHDRSZ+len);
+   cptr=((char*)out)+VARHDRSZ;
+   dwptr=dw;
+
+   while( dwptr-dw < dlen) {
+       if ( dwptr->start ) {
+           sprintf(cptr,"{%d ",dwptr->start);
+           cptr=strchr(cptr,'\0');
+       }
+       memcpy(cptr,dwptr->w,dwptr->len);
+       cptr+=dwptr->len;
+       *cptr=' ';
+       cptr++;
+       if ( dwptr->finish ) { 
+           sprintf(cptr,"}%d ",dwptr->finish);
+           cptr=strchr(cptr,'\0');
+       }
+       dwptr++;
+   }   
+
+   VARATT_SIZEP(out) = cptr - ((char*)out);
+   
+   pfree(dw);
+   pfree(doc);
+
+   PG_FREE_IF_COPY(txt,0);
+   PG_FREE_IF_COPY(query,1);
+   PG_RETURN_POINTER(out);
+}
+


diff --git a/contrib/tsearch2/rewrite.c b/contrib/tsearch2/rewrite.c

new file mode 100644 (file)

index 0000000..d5bc0f6


--- /dev/null
+++ b/contrib/tsearch2/rewrite.c
@@ -0,0 +1,292 @@
+/*
+ * Rewrite routines of query tree
+ * Teodor Sigaev 
+ */
+
+#include "postgres.h"
+
+#include 
+
+#include "access/gist.h"
+#include "access/itup.h"
+#include "access/rtree.h"
+#include "utils/elog.h"
+#include "utils/palloc.h"
+#include "utils/array.h"
+#include "utils/builtins.h"
+#include "storage/bufpage.h"
+
+#include "query.h"
+#include "rewrite.h"
+
+typedef struct NODE
+{
+   struct NODE *left;
+   struct NODE *right;
+   ITEM       *valnode;
+}  NODE;
+
+/*
+ * make query tree from plain view of query
+ */
+static NODE *
+maketree(ITEM * in)
+{
+   NODE       *node = (NODE *) palloc(sizeof(NODE));
+
+   node->valnode = in;
+   node->right = node->left = NULL;
+   if (in->type == OPR)
+   {
+       node->right = maketree(in + 1);
+       if (in->val != (int4) '!')
+           node->left = maketree(in + in->left);
+   }
+   return node;
+}
+
+typedef struct
+{
+   ITEM       *ptr;
+   int4        len;
+   int4        cur;
+}  PLAINTREE;
+
+static void
+plainnode(PLAINTREE * state, NODE * node)
+{
+   if (state->cur == state->len)
+   {
+       state->len *= 2;
+       state->ptr = (ITEM *) repalloc((void *) state->ptr, state->len * sizeof(ITEM));
+   }
+   memcpy((void *) &(state->ptr[state->cur]), (void *) node->valnode, sizeof(ITEM));
+   if (node->valnode->type == VAL)
+       state->cur++;
+   else if (node->valnode->val == (int4) '!')
+   {
+       state->ptr[state->cur].left = 1;
+       state->cur++;
+       plainnode(state, node->right);
+   }
+   else
+   {
+       int4        cur = state->cur;
+
+       state->cur++;
+       plainnode(state, node->right);
+       state->ptr[cur].left = state->cur - cur;
+       plainnode(state, node->left);
+   }
+   pfree(node);
+}
+
+/*
+ * make plain view of tree from 'normal' view of tree
+ */
+static ITEM *
+plaintree(NODE * root, int4 *len)
+{
+   PLAINTREE   pl;
+
+   pl.cur = 0;
+   pl.len = 16;
+   if (root && (root->valnode->type == VAL || root->valnode->type == OPR))
+   {
+       pl.ptr = (ITEM *) palloc(pl.len * sizeof(ITEM));
+       plainnode(&pl, root);
+   }
+   else
+       pl.ptr = NULL;
+   *len = pl.cur;
+   return pl.ptr;
+}
+
+static void
+freetree(NODE * node)
+{
+   if (!node)
+       return;
+   if (node->left)
+       freetree(node->left);
+   if (node->right)
+       freetree(node->right);
+   pfree(node);
+}
+
+/*
+ * clean tree for ! operator.
+ * It's usefull for debug, but in
+ * other case, such view is used with search in index.
+ * Operator ! always return TRUE
+ */
+static NODE *
+clean_NOT_intree(NODE * node)
+{
+   if (node->valnode->type == VAL)
+       return node;
+
+   if (node->valnode->val == (int4) '!')
+   {
+       freetree(node);
+       return NULL;
+   }
+
+   /* operator & or | */
+   if (node->valnode->val == (int4) '|')
+   {
+       if ((node->left = clean_NOT_intree(node->left)) == NULL ||
+           (node->right = clean_NOT_intree(node->right)) == NULL)
+       {
+           freetree(node);
+           return NULL;
+       }
+   }
+   else
+   {
+       NODE       *res = node;
+
+       node->left = clean_NOT_intree(node->left);
+       node->right = clean_NOT_intree(node->right);
+       if (node->left == NULL && node->right == NULL)
+       {
+           pfree(node);
+           res = NULL;
+       }
+       else if (node->left == NULL)
+       {
+           res = node->right;
+           pfree(node);
+       }
+       else if (node->right == NULL)
+       {
+           res = node->left;
+           pfree(node);
+       }
+       return res;
+   }
+   return node;
+}
+
+ITEM *
+clean_NOT_v2(ITEM * ptr, int4 *len)
+{
+   NODE       *root = maketree(ptr);
+
+   return plaintree(clean_NOT_intree(root), len);
+}
+
+#define V_UNKNOWN  0
+#define V_TRUE     1
+#define V_FALSE        2
+
+/*
+ * Clean query tree from values which is always in
+ * text (stopword)
+ */
+static NODE *
+clean_fakeval_intree(NODE * node, char *result)
+{
+   char        lresult = V_UNKNOWN,
+               rresult = V_UNKNOWN;
+
+   if (node->valnode->type == VAL)
+       return node;
+   else if (node->valnode->type == VALTRUE)
+   {
+       pfree(node);
+       *result = V_TRUE;
+       return NULL;
+   }
+
+
+   if (node->valnode->val == (int4) '!')
+   {
+       node->right = clean_fakeval_intree(node->right, &rresult);
+       if (!node->right)
+       {
+           *result = (rresult == V_TRUE) ? V_FALSE : V_TRUE;
+           freetree(node);
+           return NULL;
+       }
+   }
+   else if (node->valnode->val == (int4) '|')
+   {
+       NODE       *res = node;
+
+       node->left = clean_fakeval_intree(node->left, &lresult);
+       node->right = clean_fakeval_intree(node->right, &rresult);
+       if (lresult == V_TRUE || rresult == V_TRUE)
+       {
+           freetree(node);
+           *result = V_TRUE;
+           return NULL;
+       }
+       else if (lresult == V_FALSE && rresult == V_FALSE)
+       {
+           freetree(node);
+           *result = V_FALSE;
+           return NULL;
+       }
+       else if (lresult == V_FALSE)
+       {
+           res = node->right;
+           pfree(node);
+       }
+       else if (rresult == V_FALSE)
+       {
+           res = node->left;
+           pfree(node);
+       }
+       return res;
+   }
+   else
+   {
+       NODE       *res = node;
+
+       node->left = clean_fakeval_intree(node->left, &lresult);
+       node->right = clean_fakeval_intree(node->right, &rresult);
+       if (lresult == V_FALSE || rresult == V_FALSE)
+       {
+           freetree(node);
+           *result = V_FALSE;
+           return NULL;
+       }
+       else if (lresult == V_TRUE && rresult == V_TRUE)
+       {
+           freetree(node);
+           *result = V_TRUE;
+           return NULL;
+       }
+       else if (lresult == V_TRUE)
+       {
+           res = node->right;
+           pfree(node);
+       }
+       else if (rresult == V_TRUE)
+       {
+           res = node->left;
+           pfree(node);
+       }
+       return res;
+   }
+   return node;
+}
+
+ITEM *
+clean_fakeval_v2(ITEM * ptr, int4 *len)
+{
+   NODE       *root = maketree(ptr);
+   char        result = V_UNKNOWN;
+   NODE       *resroot;
+
+   resroot = clean_fakeval_intree(root, &result);
+   if (result != V_UNKNOWN)
+   {
+       elog(NOTICE, "Query contains only stopword(s) or doesn't contain lexem(s), ignored");
+       *len = 0;
+       return NULL;
+   }
+
+   return plaintree(resroot, len);
+}


diff --git a/contrib/tsearch2/rewrite.h b/contrib/tsearch2/rewrite.h

new file mode 100644 (file)

index 0000000..d47788a


--- /dev/null
+++ b/contrib/tsearch2/rewrite.h
@@ -0,0 +1,7 @@
+#ifndef __REWRITE_H__
+#define __REWRITE_H__
+
+ITEM      *clean_NOT_v2(ITEM * ptr, int4 *len);
+ITEM      *clean_fakeval_v2(ITEM * ptr, int4 *len);
+
+#endif


diff --git a/contrib/tsearch2/snmap.c b/contrib/tsearch2/snmap.c

new file mode 100644 (file)

index 0000000..fe138ad


--- /dev/null
+++ b/contrib/tsearch2/snmap.c
@@ -0,0 +1,75 @@
+/* 
+ * simple but fast map from str to Oid
+ * Teodor Sigaev 
+ */
+#include 
+#include 
+#include 
+
+#include "postgres.h"
+#include "snmap.h"
+#include "common.h"
+
+static int
+compareSNMapEntry(const void *a, const void *b) {
+   return strcmp( ((SNMapEntry*)a)->key, ((SNMapEntry*)b)->key );
+}
+
+void 
+addSNMap( SNMap *map, char *key, Oid value ) {
+   if (map->len>=map->reallen) {
+       SNMapEntry *tmp;
+       int len = (map->reallen) ? 2*map->reallen : 16;
+       tmp=(SNMapEntry*)realloc(map->list, sizeof(SNMapEntry) * len);
+       if ( !tmp )
+           elog(ERROR, "No memory");
+       map->reallen=len;
+       map->list=tmp;
+   }
+   map->list[ map->len ].key = strdup(key);
+   if ( ! map->list[ map->len ].key )
+       elog(ERROR, "No memory");
+   map->list[ map->len ].value=value;
+   map->len++;
+   if ( map->len>1 ) qsort(map->list, map->len, sizeof(SNMapEntry), compareSNMapEntry);
+}
+
+void 
+addSNMap_t( SNMap *map, text *key, Oid value ) {
+   char *k=text2char( key );
+   addSNMap(map, k, value);
+   pfree(k);
+}
+
+Oid 
+findSNMap( SNMap *map, char *key ) {
+   SNMapEntry *ptr;
+   SNMapEntry ks = {key, 0};
+   if ( map->len==0 || !map->list )
+       return 0;   
+   ptr = (SNMapEntry*) bsearch(&ks, map->list, map->len, sizeof(SNMapEntry), compareSNMapEntry);
+   return (ptr) ? ptr->value : 0;
+}
+
+Oid  
+findSNMap_t( SNMap *map, text *key ) {
+   char *k=text2char(key);
+   int res;
+   res= findSNMap(map, k);
+   pfree(k);
+   return res;
+}
+
+void freeSNMap( SNMap *map ) {
+   SNMapEntry *entry=map->list;
+   if ( map->list ) {
+       while( map->len ) {
+           if ( entry->key ) free(entry->key);
+           entry++; map->len--;
+       }
+       free( map->list );
+   }
+   memset(map,0,sizeof(SNMap));
+}
+
+


diff --git a/contrib/tsearch2/snmap.h b/contrib/tsearch2/snmap.h

new file mode 100644 (file)

index 0000000..b485601


--- /dev/null
+++ b/contrib/tsearch2/snmap.h
@@ -0,0 +1,23 @@
+#ifndef __SNMAP_H__
+#define __SNMAP_H__
+
+#include "postgres.h"
+
+typedef struct {
+   char    *key;
+   Oid value;
+} SNMapEntry;
+
+typedef struct {
+   int len;
+   int reallen;
+   SNMapEntry  *list;
+} SNMap;
+
+void addSNMap( SNMap *map, char *key, Oid value );
+void addSNMap_t( SNMap *map, text *key, Oid value );
+Oid findSNMap( SNMap *map, char *key );
+Oid findSNMap_t( SNMap *map, text *key );
+void freeSNMap( SNMap *map );
+
+#endif


diff --git a/contrib/tsearch2/snowball/api.c b/contrib/tsearch2/snowball/api.c

new file mode 100644 (file)

index 0000000..c9019ce


--- /dev/null
+++ b/contrib/tsearch2/snowball/api.c
@@ -0,0 +1,48 @@
+
+#include "header.h"
+
+extern struct SN_env * SN_create_env(int S_size, int I_size, int B_size)
+{   struct SN_env * z = (struct SN_env *) calloc(1, sizeof(struct SN_env));
+    z->p = create_s();
+    if (S_size)
+    {   z->S = (symbol * *) calloc(S_size, sizeof(symbol *));
+        {   int i;
+            for (i = 0; i < S_size; i++) z->S[i] = create_s();
+        }
+        z->S_size = S_size;
+    }
+
+    if (I_size)
+    {   z->I = (int *) calloc(I_size, sizeof(int));
+        z->I_size = I_size;
+    }
+
+    if (B_size)
+    {   z->B = (symbol *) calloc(B_size, sizeof(symbol));
+        z->B_size = B_size;
+    }
+
+    return z;
+}
+
+extern void SN_close_env(struct SN_env * z)
+{
+    if (z->S_size)
+    {
+        {   int i;
+            for (i = 0; i < z->S_size; i++) lose_s(z->S[i]);
+        }
+        free(z->S);
+    }
+    if (z->I_size) free(z->I);
+    if (z->B_size) free(z->B);
+    if (z->p) lose_s(z->p);
+    free(z);
+}
+
+extern void SN_set_current(struct SN_env * z, int size, const symbol * s)
+{
+    replace_s(z, 0, z->l, size, s);
+    z->c = 0;
+}
+


diff --git a/contrib/tsearch2/snowball/api.h b/contrib/tsearch2/snowball/api.h

new file mode 100644 (file)

index 0000000..3e8b6e1


--- /dev/null
+++ b/contrib/tsearch2/snowball/api.h
@@ -0,0 +1,27 @@
+
+typedef unsigned char symbol;
+
+/* Or replace 'char' above with 'short' for 16 bit characters.
+
+   More precisely, replace 'char' with whatever type guarantees the
+   character width you need. Note however that sizeof(symbol) should divide
+   HEAD, defined in header.h as 2*sizeof(int), without remainder, otherwise
+   there is an alignment problem. In the unlikely event of a problem here,
+   consult Martin Porter.
+
+*/
+
+struct SN_env {
+    symbol * p;
+    int c; int a; int l; int lb; int bra; int ket;
+    int S_size; int I_size; int B_size;
+    symbol * * S;
+    int * I;
+    symbol * B;
+};
+
+extern struct SN_env * SN_create_env(int S_size, int I_size, int B_size);
+extern void SN_close_env(struct SN_env * z);
+
+extern void SN_set_current(struct SN_env * z, int size, const symbol * s);
+


diff --git a/contrib/tsearch2/snowball/english_stem.c b/contrib/tsearch2/snowball/english_stem.c

new file mode 100644 (file)

index 0000000..6715c7c


--- /dev/null
+++ b/contrib/tsearch2/snowball/english_stem.c
@@ -0,0 +1,894 @@
+
+/* This file was generated automatically by the Snowball to ANSI C compiler */
+
+#include "header.h"
+
+extern int english_stem(struct SN_env * z);
+static int r_exception2(struct SN_env * z);
+static int r_exception1(struct SN_env * z);
+static int r_Step_5(struct SN_env * z);
+static int r_Step_4(struct SN_env * z);
+static int r_Step_3(struct SN_env * z);
+static int r_Step_2(struct SN_env * z);
+static int r_Step_1c(struct SN_env * z);
+static int r_Step_1b(struct SN_env * z);
+static int r_Step_1a(struct SN_env * z);
+static int r_R2(struct SN_env * z);
+static int r_R1(struct SN_env * z);
+static int r_shortv(struct SN_env * z);
+static int r_mark_regions(struct SN_env * z);
+static int r_postlude(struct SN_env * z);
+static int r_prelude(struct SN_env * z);
+
+extern struct SN_env * english_create_env(void);
+extern void english_close_env(struct SN_env * z);
+
+static symbol s_0_0[5] = { 'g', 'e', 'n', 'e', 'r' };
+
+static struct among a_0[1] =
+{
+/*  0 */ { 5, s_0_0, -1, -1, 0}
+};
+
+static symbol s_1_0[3] = { 'i', 'e', 'd' };
+static symbol s_1_1[1] = { 's' };
+static symbol s_1_2[3] = { 'i', 'e', 's' };
+static symbol s_1_3[4] = { 's', 's', 'e', 's' };
+static symbol s_1_4[2] = { 's', 's' };
+static symbol s_1_5[2] = { 'u', 's' };
+
+static struct among a_1[6] =
+{
+/*  0 */ { 3, s_1_0, -1, 2, 0},
+/*  1 */ { 1, s_1_1, -1, 3, 0},
+/*  2 */ { 3, s_1_2, 1, 2, 0},
+/*  3 */ { 4, s_1_3, 1, 1, 0},
+/*  4 */ { 2, s_1_4, 1, -1, 0},
+/*  5 */ { 2, s_1_5, 1, -1, 0}
+};
+
+static symbol s_2_1[2] = { 'b', 'b' };
+static symbol s_2_2[2] = { 'd', 'd' };
+static symbol s_2_3[2] = { 'f', 'f' };
+static symbol s_2_4[2] = { 'g', 'g' };
+static symbol s_2_5[2] = { 'b', 'l' };
+static symbol s_2_6[2] = { 'm', 'm' };
+static symbol s_2_7[2] = { 'n', 'n' };
+static symbol s_2_8[2] = { 'p', 'p' };
+static symbol s_2_9[2] = { 'r', 'r' };
+static symbol s_2_10[2] = { 'a', 't' };
+static symbol s_2_11[2] = { 't', 't' };
+static symbol s_2_12[2] = { 'i', 'z' };
+
+static struct among a_2[13] =
+{
+/*  0 */ { 0, 0, -1, 3, 0},
+/*  1 */ { 2, s_2_1, 0, 2, 0},
+/*  2 */ { 2, s_2_2, 0, 2, 0},
+/*  3 */ { 2, s_2_3, 0, 2, 0},
+/*  4 */ { 2, s_2_4, 0, 2, 0},
+/*  5 */ { 2, s_2_5, 0, 1, 0},
+/*  6 */ { 2, s_2_6, 0, 2, 0},
+/*  7 */ { 2, s_2_7, 0, 2, 0},
+/*  8 */ { 2, s_2_8, 0, 2, 0},
+/*  9 */ { 2, s_2_9, 0, 2, 0},
+/* 10 */ { 2, s_2_10, 0, 1, 0},
+/* 11 */ { 2, s_2_11, 0, 2, 0},
+/* 12 */ { 2, s_2_12, 0, 1, 0}
+};
+
+static symbol s_3_0[2] = { 'e', 'd' };
+static symbol s_3_1[3] = { 'e', 'e', 'd' };
+static symbol s_3_2[3] = { 'i', 'n', 'g' };
+static symbol s_3_3[4] = { 'e', 'd', 'l', 'y' };
+static symbol s_3_4[5] = { 'e', 'e', 'd', 'l', 'y' };
+static symbol s_3_5[5] = { 'i', 'n', 'g', 'l', 'y' };
+
+static struct among a_3[6] =
+{
+/*  0 */ { 2, s_3_0, -1, 2, 0},
+/*  1 */ { 3, s_3_1, 0, 1, 0},
+/*  2 */ { 3, s_3_2, -1, 2, 0},
+/*  3 */ { 4, s_3_3, -1, 2, 0},
+/*  4 */ { 5, s_3_4, 3, 1, 0},
+/*  5 */ { 5, s_3_5, -1, 2, 0}
+};
+
+static symbol s_4_0[4] = { 'a', 'n', 'c', 'i' };
+static symbol s_4_1[4] = { 'e', 'n', 'c', 'i' };
+static symbol s_4_2[3] = { 'o', 'g', 'i' };
+static symbol s_4_3[2] = { 'l', 'i' };
+static symbol s_4_4[3] = { 'b', 'l', 'i' };
+static symbol s_4_5[4] = { 'a', 'b', 'l', 'i' };
+static symbol s_4_6[4] = { 'a', 'l', 'l', 'i' };
+static symbol s_4_7[5] = { 'f', 'u', 'l', 'l', 'i' };
+static symbol s_4_8[6] = { 'l', 'e', 's', 's', 'l', 'i' };
+static symbol s_4_9[5] = { 'o', 'u', 's', 'l', 'i' };
+static symbol s_4_10[5] = { 'e', 'n', 't', 'l', 'i' };
+static symbol s_4_11[5] = { 'a', 'l', 'i', 't', 'i' };
+static symbol s_4_12[6] = { 'b', 'i', 'l', 'i', 't', 'i' };
+static symbol s_4_13[5] = { 'i', 'v', 'i', 't', 'i' };
+static symbol s_4_14[6] = { 't', 'i', 'o', 'n', 'a', 'l' };
+static symbol s_4_15[7] = { 'a', 't', 'i', 'o', 'n', 'a', 'l' };
+static symbol s_4_16[5] = { 'a', 'l', 'i', 's', 'm' };
+static symbol s_4_17[5] = { 'a', 't', 'i', 'o', 'n' };
+static symbol s_4_18[7] = { 'i', 'z', 'a', 't', 'i', 'o', 'n' };
+static symbol s_4_19[4] = { 'i', 'z', 'e', 'r' };
+static symbol s_4_20[4] = { 'a', 't', 'o', 'r' };
+static symbol s_4_21[7] = { 'i', 'v', 'e', 'n', 'e', 's', 's' };
+static symbol s_4_22[7] = { 'f', 'u', 'l', 'n', 'e', 's', 's' };
+static symbol s_4_23[7] = { 'o', 'u', 's', 'n', 'e', 's', 's' };
+
+static struct among a_4[24] =
+{
+/*  0 */ { 4, s_4_0, -1, 3, 0},
+/*  1 */ { 4, s_4_1, -1, 2, 0},
+/*  2 */ { 3, s_4_2, -1, 13, 0},
+/*  3 */ { 2, s_4_3, -1, 16, 0},
+/*  4 */ { 3, s_4_4, 3, 12, 0},
+/*  5 */ { 4, s_4_5, 4, 4, 0},
+/*  6 */ { 4, s_4_6, 3, 8, 0},
+/*  7 */ { 5, s_4_7, 3, 14, 0},
+/*  8 */ { 6, s_4_8, 3, 15, 0},
+/*  9 */ { 5, s_4_9, 3, 10, 0},
+/* 10 */ { 5, s_4_10, 3, 5, 0},
+/* 11 */ { 5, s_4_11, -1, 8, 0},
+/* 12 */ { 6, s_4_12, -1, 12, 0},
+/* 13 */ { 5, s_4_13, -1, 11, 0},
+/* 14 */ { 6, s_4_14, -1, 1, 0},
+/* 15 */ { 7, s_4_15, 14, 7, 0},
+/* 16 */ { 5, s_4_16, -1, 8, 0},
+/* 17 */ { 5, s_4_17, -1, 7, 0},
+/* 18 */ { 7, s_4_18, 17, 6, 0},
+/* 19 */ { 4, s_4_19, -1, 6, 0},
+/* 20 */ { 4, s_4_20, -1, 7, 0},
+/* 21 */ { 7, s_4_21, -1, 11, 0},
+/* 22 */ { 7, s_4_22, -1, 9, 0},
+/* 23 */ { 7, s_4_23, -1, 10, 0}
+};
+
+static symbol s_5_0[5] = { 'i', 'c', 'a', 't', 'e' };
+static symbol s_5_1[5] = { 'a', 't', 'i', 'v', 'e' };
+static symbol s_5_2[5] = { 'a', 'l', 'i', 'z', 'e' };
+static symbol s_5_3[5] = { 'i', 'c', 'i', 't', 'i' };
+static symbol s_5_4[4] = { 'i', 'c', 'a', 'l' };
+static symbol s_5_5[6] = { 't', 'i', 'o', 'n', 'a', 'l' };
+static symbol s_5_6[7] = { 'a', 't', 'i', 'o', 'n', 'a', 'l' };
+static symbol s_5_7[3] = { 'f', 'u', 'l' };
+static symbol s_5_8[4] = { 'n', 'e', 's', 's' };
+
+static struct among a_5[9] =
+{
+/*  0 */ { 5, s_5_0, -1, 4, 0},
+/*  1 */ { 5, s_5_1, -1, 6, 0},
+/*  2 */ { 5, s_5_2, -1, 3, 0},
+/*  3 */ { 5, s_5_3, -1, 4, 0},
+/*  4 */ { 4, s_5_4, -1, 4, 0},
+/*  5 */ { 6, s_5_5, -1, 1, 0},
+/*  6 */ { 7, s_5_6, 5, 2, 0},
+/*  7 */ { 3, s_5_7, -1, 5, 0},
+/*  8 */ { 4, s_5_8, -1, 5, 0}
+};
+
+static symbol s_6_0[2] = { 'i', 'c' };
+static symbol s_6_1[4] = { 'a', 'n', 'c', 'e' };
+static symbol s_6_2[4] = { 'e', 'n', 'c', 'e' };
+static symbol s_6_3[4] = { 'a', 'b', 'l', 'e' };
+static symbol s_6_4[4] = { 'i', 'b', 'l', 'e' };
+static symbol s_6_5[3] = { 'a', 't', 'e' };
+static symbol s_6_6[3] = { 'i', 'v', 'e' };
+static symbol s_6_7[3] = { 'i', 'z', 'e' };
+static symbol s_6_8[3] = { 'i', 't', 'i' };
+static symbol s_6_9[2] = { 'a', 'l' };
+static symbol s_6_10[3] = { 'i', 's', 'm' };
+static symbol s_6_11[3] = { 'i', 'o', 'n' };
+static symbol s_6_12[2] = { 'e', 'r' };
+static symbol s_6_13[3] = { 'o', 'u', 's' };
+static symbol s_6_14[3] = { 'a', 'n', 't' };
+static symbol s_6_15[3] = { 'e', 'n', 't' };
+static symbol s_6_16[4] = { 'm', 'e', 'n', 't' };
+static symbol s_6_17[5] = { 'e', 'm', 'e', 'n', 't' };
+
+static struct among a_6[18] =
+{
+/*  0 */ { 2, s_6_0, -1, 1, 0},
+/*  1 */ { 4, s_6_1, -1, 1, 0},
+/*  2 */ { 4, s_6_2, -1, 1, 0},
+/*  3 */ { 4, s_6_3, -1, 1, 0},
+/*  4 */ { 4, s_6_4, -1, 1, 0},
+/*  5 */ { 3, s_6_5, -1, 1, 0},
+/*  6 */ { 3, s_6_6, -1, 1, 0},
+/*  7 */ { 3, s_6_7, -1, 1, 0},
+/*  8 */ { 3, s_6_8, -1, 1, 0},
+/*  9 */ { 2, s_6_9, -1, 1, 0},
+/* 10 */ { 3, s_6_10, -1, 1, 0},
+/* 11 */ { 3, s_6_11, -1, 2, 0},
+/* 12 */ { 2, s_6_12, -1, 1, 0},
+/* 13 */ { 3, s_6_13, -1, 1, 0},
+/* 14 */ { 3, s_6_14, -1, 1, 0},
+/* 15 */ { 3, s_6_15, -1, 1, 0},
+/* 16 */ { 4, s_6_16, 15, 1, 0},
+/* 17 */ { 5, s_6_17, 16, 1, 0}
+};
+
+static symbol s_7_0[1] = { 'e' };
+static symbol s_7_1[1] = { 'l' };
+
+static struct among a_7[2] =
+{
+/*  0 */ { 1, s_7_0, -1, 1, 0},
+/*  1 */ { 1, s_7_1, -1, 2, 0}
+};
+
+static symbol s_8_0[7] = { 's', 'u', 'c', 'c', 'e', 'e', 'd' };
+static symbol s_8_1[7] = { 'p', 'r', 'o', 'c', 'e', 'e', 'd' };
+static symbol s_8_2[6] = { 'e', 'x', 'c', 'e', 'e', 'd' };
+static symbol s_8_3[7] = { 'c', 'a', 'n', 'n', 'i', 'n', 'g' };
+static symbol s_8_4[6] = { 'i', 'n', 'n', 'i', 'n', 'g' };
+static symbol s_8_5[7] = { 'e', 'a', 'r', 'r', 'i', 'n', 'g' };
+static symbol s_8_6[7] = { 'h', 'e', 'r', 'r', 'i', 'n', 'g' };
+static symbol s_8_7[6] = { 'o', 'u', 't', 'i', 'n', 'g' };
+
+static struct among a_8[8] =
+{
+/*  0 */ { 7, s_8_0, -1, -1, 0},
+/*  1 */ { 7, s_8_1, -1, -1, 0},
+/*  2 */ { 6, s_8_2, -1, -1, 0},
+/*  3 */ { 7, s_8_3, -1, -1, 0},
+/*  4 */ { 6, s_8_4, -1, -1, 0},
+/*  5 */ { 7, s_8_5, -1, -1, 0},
+/*  6 */ { 7, s_8_6, -1, -1, 0},
+/*  7 */ { 6, s_8_7, -1, -1, 0}
+};
+
+static symbol s_9_0[5] = { 'a', 'n', 'd', 'e', 's' };
+static symbol s_9_1[5] = { 'a', 't', 'l', 'a', 's' };
+static symbol s_9_2[4] = { 'b', 'i', 'a', 's' };
+static symbol s_9_3[6] = { 'c', 'o', 's', 'm', 'o', 's' };
+static symbol s_9_4[5] = { 'd', 'y', 'i', 'n', 'g' };
+static symbol s_9_5[5] = { 'e', 'a', 'r', 'l', 'y' };
+static symbol s_9_6[6] = { 'g', 'e', 'n', 't', 'l', 'y' };
+static symbol s_9_7[4] = { 'h', 'o', 'w', 'e' };
+static symbol s_9_8[4] = { 'i', 'd', 'l', 'y' };
+static symbol s_9_9[5] = { 'l', 'y', 'i', 'n', 'g' };
+static symbol s_9_10[4] = { 'n', 'e', 'w', 's' };
+static symbol s_9_11[4] = { 'o', 'n', 'l', 'y' };
+static symbol s_9_12[6] = { 's', 'i', 'n', 'g', 'l', 'y' };
+static symbol s_9_13[5] = { 's', 'k', 'i', 'e', 's' };
+static symbol s_9_14[4] = { 's', 'k', 'i', 's' };
+static symbol s_9_15[3] = { 's', 'k', 'y' };
+static symbol s_9_16[5] = { 't', 'y', 'i', 'n', 'g' };
+static symbol s_9_17[4] = { 'u', 'g', 'l', 'y' };
+
+static struct among a_9[18] =
+{
+/*  0 */ { 5, s_9_0, -1, -1, 0},
+/*  1 */ { 5, s_9_1, -1, -1, 0},
+/*  2 */ { 4, s_9_2, -1, -1, 0},
+/*  3 */ { 6, s_9_3, -1, -1, 0},
+/*  4 */ { 5, s_9_4, -1, 3, 0},
+/*  5 */ { 5, s_9_5, -1, 9, 0},
+/*  6 */ { 6, s_9_6, -1, 7, 0},
+/*  7 */ { 4, s_9_7, -1, -1, 0},
+/*  8 */ { 4, s_9_8, -1, 6, 0},
+/*  9 */ { 5, s_9_9, -1, 4, 0},
+/* 10 */ { 4, s_9_10, -1, -1, 0},
+/* 11 */ { 4, s_9_11, -1, 10, 0},
+/* 12 */ { 6, s_9_12, -1, 11, 0},
+/* 13 */ { 5, s_9_13, -1, 2, 0},
+/* 14 */ { 4, s_9_14, -1, 1, 0},
+/* 15 */ { 3, s_9_15, -1, -1, 0},
+/* 16 */ { 5, s_9_16, -1, 5, 0},
+/* 17 */ { 4, s_9_17, -1, 8, 0}
+};
+
+static unsigned char g_v[] = { 17, 65, 16, 1 };
+
+static unsigned char g_v_WXY[] = { 1, 17, 65, 208, 1 };
+
+static unsigned char g_valid_LI[] = { 55, 141, 2 };
+
+static symbol s_0[] = { 'y' };
+static symbol s_1[] = { 'Y' };
+static symbol s_2[] = { 'y' };
+static symbol s_3[] = { 'Y' };
+static symbol s_4[] = { 's', 's' };
+static symbol s_5[] = { 'i', 'e' };
+static symbol s_6[] = { 'i' };
+static symbol s_7[] = { 'e', 'e' };
+static symbol s_8[] = { 'e' };
+static symbol s_9[] = { 'e' };
+static symbol s_10[] = { 'y' };
+static symbol s_11[] = { 'Y' };
+static symbol s_12[] = { 'i' };
+static symbol s_13[] = { 't', 'i', 'o', 'n' };
+static symbol s_14[] = { 'e', 'n', 'c', 'e' };
+static symbol s_15[] = { 'a', 'n', 'c', 'e' };
+static symbol s_16[] = { 'a', 'b', 'l', 'e' };
+static symbol s_17[] = { 'e', 'n', 't' };
+static symbol s_18[] = { 'i', 'z', 'e' };
+static symbol s_19[] = { 'a', 't', 'e' };
+static symbol s_20[] = { 'a', 'l' };
+static symbol s_21[] = { 'f', 'u', 'l' };
+static symbol s_22[] = { 'o', 'u', 's' };
+static symbol s_23[] = { 'i', 'v', 'e' };
+static symbol s_24[] = { 'b', 'l', 'e' };
+static symbol s_25[] = { 'l' };
+static symbol s_26[] = { 'o', 'g' };
+static symbol s_27[] = { 'f', 'u', 'l' };
+static symbol s_28[] = { 'l', 'e', 's', 's' };
+static symbol s_29[] = { 't', 'i', 'o', 'n' };
+static symbol s_30[] = { 'a', 't', 'e' };
+static symbol s_31[] = { 'a', 'l' };
+static symbol s_32[] = { 'i', 'c' };
+static symbol s_33[] = { 's' };
+static symbol s_34[] = { 't' };
+static symbol s_35[] = { 'l' };
+static symbol s_36[] = { 's', 'k', 'i' };
+static symbol s_37[] = { 's', 'k', 'y' };
+static symbol s_38[] = { 'd', 'i', 'e' };
+static symbol s_39[] = { 'l', 'i', 'e' };
+static symbol s_40[] = { 't', 'i', 'e' };
+static symbol s_41[] = { 'i', 'd', 'l' };
+static symbol s_42[] = { 'g', 'e', 'n', 't', 'l' };
+static symbol s_43[] = { 'u', 'g', 'l', 'i' };
+static symbol s_44[] = { 'e', 'a', 'r', 'l', 'i' };
+static symbol s_45[] = { 'o', 'n', 'l', 'i' };
+static symbol s_46[] = { 's', 'i', 'n', 'g', 'l' };
+static symbol s_47[] = { 'Y' };
+static symbol s_48[] = { 'y' };
+
+static int r_prelude(struct SN_env * z) {
+    z->B[0] = 0; /* unset Y_found, line 24 */
+    {   int c = z->c; /* do, line 25 */
+        z->bra = z->c; /* [, line 25 */
+        if (!(eq_s(z, 1, s_0))) goto lab0;
+        z->ket = z->c; /* ], line 25 */
+        if (!(in_grouping(z, g_v, 97, 121))) goto lab0;
+        slice_from_s(z, 1, s_1); /* <-, line 25 */
+        z->B[0] = 1; /* set Y_found, line 25 */
+    lab0:
+        z->c = c;
+    }
+    {   int c = z->c; /* do, line 26 */
+        while(1) { /* repeat, line 26 */
+            int c = z->c;
+            while(1) { /* goto, line 26 */
+                int c = z->c;
+                if (!(in_grouping(z, g_v, 97, 121))) goto lab3;
+                z->bra = z->c; /* [, line 26 */
+                if (!(eq_s(z, 1, s_2))) goto lab3;
+                z->ket = z->c; /* ], line 26 */
+                z->c = c;
+                break;
+            lab3:
+                z->c = c;
+                if (z->c >= z->l) goto lab2;
+                z->c++;
+            }
+            slice_from_s(z, 1, s_3); /* <-, line 26 */
+            z->B[0] = 1; /* set Y_found, line 26 */
+            continue;
+        lab2:
+            z->c = c;
+            break;
+        }
+    lab1:
+        z->c = c;
+    }
+    return 1;
+}
+
+static int r_mark_regions(struct SN_env * z) {
+    z->I[0] = z->l;
+    z->I[1] = z->l;
+    {   int c = z->c; /* do, line 32 */
+        {   int c = z->c; /* or, line 36 */
+            if (!(find_among(z, a_0, 1))) goto lab2; /* among, line 33 */
+            goto lab1;
+        lab2:
+            z->c = c;
+            while(1) { /* gopast, line 36 */
+                if (!(in_grouping(z, g_v, 97, 121))) goto lab3;
+                break;
+            lab3:
+                if (z->c >= z->l) goto lab0;
+                z->c++;
+            }
+            while(1) { /* gopast, line 36 */
+                if (!(out_grouping(z, g_v, 97, 121))) goto lab4;
+                break;
+            lab4:
+                if (z->c >= z->l) goto lab0;
+                z->c++;
+            }
+        }
+    lab1:
+        z->I[0] = z->c; /* setmark p1, line 37 */
+        while(1) { /* gopast, line 38 */
+            if (!(in_grouping(z, g_v, 97, 121))) goto lab5;
+            break;
+        lab5:
+            if (z->c >= z->l) goto lab0;
+            z->c++;
+        }
+        while(1) { /* gopast, line 38 */
+            if (!(out_grouping(z, g_v, 97, 121))) goto lab6;
+            break;
+        lab6:
+            if (z->c >= z->l) goto lab0;
+            z->c++;
+        }
+        z->I[1] = z->c; /* setmark p2, line 38 */
+    lab0:
+        z->c = c;
+    }
+    return 1;
+}
+
+static int r_shortv(struct SN_env * z) {
+    {   int m = z->l - z->c; /* or, line 46 */
+        if (!(out_grouping_b(z, g_v_WXY, 89, 121))) goto lab1;
+        if (!(in_grouping_b(z, g_v, 97, 121))) goto lab1;
+        if (!(out_grouping_b(z, g_v, 97, 121))) goto lab1;
+        goto lab0;
+    lab1:
+        z->c = z->l - m;
+        if (!(out_grouping_b(z, g_v, 97, 121))) return 0;
+        if (!(in_grouping_b(z, g_v, 97, 121))) return 0;
+        if (z->c > z->lb) return 0; /* atlimit, line 47 */
+    }
+lab0:
+    return 1;
+}
+
+static int r_R1(struct SN_env * z) {
+    if (!(z->I[0] <= z->c)) return 0;
+    return 1;
+}
+
+static int r_R2(struct SN_env * z) {
+    if (!(z->I[1] <= z->c)) return 0;
+    return 1;
+}
+
+static int r_Step_1a(struct SN_env * z) {
+    int among_var;
+    z->ket = z->c; /* [, line 54 */
+    among_var = find_among_b(z, a_1, 6); /* substring, line 54 */
+    if (!(among_var)) return 0;
+    z->bra = z->c; /* ], line 54 */
+    switch(among_var) {
+        case 0: return 0;
+        case 1:
+            slice_from_s(z, 2, s_4); /* <-, line 55 */
+            break;
+        case 2:
+            {   int m = z->l - z->c; /* or, line 57 */
+                if (z->c <= z->lb) goto lab1;
+                z->c--; /* next, line 57 */
+                if (z->c > z->lb) goto lab1; /* atlimit, line 57 */
+                slice_from_s(z, 2, s_5); /* <-, line 57 */
+                goto lab0;
+            lab1:
+                z->c = z->l - m;
+                slice_from_s(z, 1, s_6); /* <-, line 57 */
+            }
+        lab0:
+            break;
+        case 3:
+            if (z->c <= z->lb) return 0;
+            z->c--; /* next, line 58 */
+            while(1) { /* gopast, line 58 */
+                if (!(in_grouping_b(z, g_v, 97, 121))) goto lab2;
+                break;
+            lab2:
+                if (z->c <= z->lb) return 0;
+                z->c--;
+            }
+            slice_del(z); /* delete, line 58 */
+            break;
+    }
+    return 1;
+}
+
+static int r_Step_1b(struct SN_env * z) {
+    int among_var;
+    z->ket = z->c; /* [, line 64 */
+    among_var = find_among_b(z, a_3, 6); /* substring, line 64 */
+    if (!(among_var)) return 0;
+    z->bra = z->c; /* ], line 64 */
+    switch(among_var) {
+        case 0: return 0;
+        case 1:
+            if (!r_R1(z)) return 0; /* call R1, line 66 */
+            slice_from_s(z, 2, s_7); /* <-, line 66 */
+            break;
+        case 2:
+            {   int m_test = z->l - z->c; /* test, line 69 */
+                while(1) { /* gopast, line 69 */
+                    if (!(in_grouping_b(z, g_v, 97, 121))) goto lab0;
+                    break;
+                lab0:
+                    if (z->c <= z->lb) return 0;
+                    z->c--;
+                }
+                z->c = z->l - m_test;
+            }
+            slice_del(z); /* delete, line 69 */
+            {   int m_test = z->l - z->c; /* test, line 70 */
+                among_var = find_among_b(z, a_2, 13); /* substring, line 70 */
+                if (!(among_var)) return 0;
+                z->c = z->l - m_test;
+            }
+            switch(among_var) {
+                case 0: return 0;
+                case 1:
+                    {   int c = z->c;
+                        insert_s(z, z->c, z->c, 1, s_8); /* <+, line 72 */
+                        z->c = c;
+                    }
+                    break;
+                case 2:
+                    z->ket = z->c; /* [, line 75 */
+                    if (z->c <= z->lb) return 0;
+                    z->c--; /* next, line 75 */
+                    z->bra = z->c; /* ], line 75 */
+                    slice_del(z); /* delete, line 75 */
+                    break;
+                case 3:
+                    if (z->c != z->I[0]) return 0; /* atmark, line 76 */
+                    {   int m_test = z->l - z->c; /* test, line 76 */
+                        if (!r_shortv(z)) return 0; /* call shortv, line 76 */
+                        z->c = z->l - m_test;
+                    }
+                    {   int c = z->c;
+                        insert_s(z, z->c, z->c, 1, s_9); /* <+, line 76 */
+                        z->c = c;
+                    }
+                    break;
+            }
+            break;
+    }
+    return 1;
+}
+
+static int r_Step_1c(struct SN_env * z) {
+    z->ket = z->c; /* [, line 83 */
+    {   int m = z->l - z->c; /* or, line 83 */
+        if (!(eq_s_b(z, 1, s_10))) goto lab1;
+        goto lab0;
+    lab1:
+        z->c = z->l - m;
+        if (!(eq_s_b(z, 1, s_11))) return 0;
+    }
+lab0:
+    z->bra = z->c; /* ], line 83 */
+    if (!(out_grouping_b(z, g_v, 97, 121))) return 0;
+    {   int m = z->l - z->c; /* not, line 84 */
+        if (z->c > z->lb) goto lab2; /* atlimit, line 84 */
+        return 0;
+    lab2:
+        z->c = z->l - m;
+    }
+    slice_from_s(z, 1, s_12); /* <-, line 85 */
+    return 1;
+}
+
+static int r_Step_2(struct SN_env * z) {
+    int among_var;
+    z->ket = z->c; /* [, line 89 */
+    among_var = find_among_b(z, a_4, 24); /* substring, line 89 */
+    if (!(among_var)) return 0;
+    z->bra = z->c; /* ], line 89 */
+    if (!r_R1(z)) return 0; /* call R1, line 89 */
+    switch(among_var) {
+        case 0: return 0;
+        case 1:
+            slice_from_s(z, 4, s_13); /* <-, line 90 */
+            break;
+        case 2:
+            slice_from_s(z, 4, s_14); /* <-, line 91 */
+            break;
+        case 3:
+            slice_from_s(z, 4, s_15); /* <-, line 92 */
+            break;
+        case 4:
+            slice_from_s(z, 4, s_16); /* <-, line 93 */
+            break;
+        case 5:
+            slice_from_s(z, 3, s_17); /* <-, line 94 */
+            break;
+        case 6:
+            slice_from_s(z, 3, s_18); /* <-, line 96 */
+            break;
+        case 7:
+            slice_from_s(z, 3, s_19); /* <-, line 98 */
+            break;
+        case 8:
+            slice_from_s(z, 2, s_20); /* <-, line 100 */
+            break;
+        case 9:
+            slice_from_s(z, 3, s_21); /* <-, line 101 */
+            break;
+        case 10:
+            slice_from_s(z, 3, s_22); /* <-, line 103 */
+            break;
+        case 11:
+            slice_from_s(z, 3, s_23); /* <-, line 105 */
+            break;
+        case 12:
+            slice_from_s(z, 3, s_24); /* <-, line 107 */
+            break;
+        case 13:
+            if (!(eq_s_b(z, 1, s_25))) return 0;
+            slice_from_s(z, 2, s_26); /* <-, line 108 */
+            break;
+        case 14:
+            slice_from_s(z, 3, s_27); /* <-, line 109 */
+            break;
+        case 15:
+            slice_from_s(z, 4, s_28); /* <-, line 110 */
+            break;
+        case 16:
+            if (!(in_grouping_b(z, g_valid_LI, 99, 116))) return 0;
+            slice_del(z); /* delete, line 111 */
+            break;
+    }
+    return 1;
+}
+
+static int r_Step_3(struct SN_env * z) {
+    int among_var;
+    z->ket = z->c; /* [, line 116 */
+    among_var = find_among_b(z, a_5, 9); /* substring, line 116 */
+    if (!(among_var)) return 0;
+    z->bra = z->c; /* ], line 116 */
+    if (!r_R1(z)) return 0; /* call R1, line 116 */
+    switch(among_var) {
+        case 0: return 0;
+        case 1:
+            slice_from_s(z, 4, s_29); /* <-, line 117 */
+            break;
+        case 2:
+            slice_from_s(z, 3, s_30); /* <-, line 118 */
+            break;
+        case 3:
+            slice_from_s(z, 2, s_31); /* <-, line 119 */
+            break;
+        case 4:
+            slice_from_s(z, 2, s_32); /* <-, line 121 */
+            break;
+        case 5:
+            slice_del(z); /* delete, line 123 */
+            break;
+        case 6:
+            if (!r_R2(z)) return 0; /* call R2, line 125 */
+            slice_del(z); /* delete, line 125 */
+            break;
+    }
+    return 1;
+}
+
+static int r_Step_4(struct SN_env * z) {
+    int among_var;
+    z->ket = z->c; /* [, line 130 */
+    among_var = find_among_b(z, a_6, 18); /* substring, line 130 */
+    if (!(among_var)) return 0;
+    z->bra = z->c; /* ], line 130 */
+    if (!r_R2(z)) return 0; /* call R2, line 130 */
+    switch(among_var) {
+        case 0: return 0;
+        case 1:
+            slice_del(z); /* delete, line 133 */
+            break;
+        case 2:
+            {   int m = z->l - z->c; /* or, line 134 */
+                if (!(eq_s_b(z, 1, s_33))) goto lab1;
+                goto lab0;
+            lab1:
+                z->c = z->l - m;
+                if (!(eq_s_b(z, 1, s_34))) return 0;
+            }
+        lab0:
+            slice_del(z); /* delete, line 134 */
+            break;
+    }
+    return 1;
+}
+
+static int r_Step_5(struct SN_env * z) {
+    int among_var;
+    z->ket = z->c; /* [, line 139 */
+    among_var = find_among_b(z, a_7, 2); /* substring, line 139 */
+    if (!(among_var)) return 0;
+    z->bra = z->c; /* ], line 139 */
+    switch(among_var) {
+        case 0: return 0;
+        case 1:
+            {   int m = z->l - z->c; /* or, line 140 */
+                if (!r_R2(z)) goto lab1; /* call R2, line 140 */
+                goto lab0;
+            lab1:
+                z->c = z->l - m;
+                if (!r_R1(z)) return 0; /* call R1, line 140 */
+                {   int m = z->l - z->c; /* not, line 140 */
+                    if (!r_shortv(z)) goto lab2; /* call shortv, line 140 */
+                    return 0;
+                lab2:
+                    z->c = z->l - m;
+                }
+            }
+        lab0:
+            slice_del(z); /* delete, line 140 */
+            break;
+        case 2:
+            if (!r_R2(z)) return 0; /* call R2, line 141 */
+            if (!(eq_s_b(z, 1, s_35))) return 0;
+            slice_del(z); /* delete, line 141 */
+            break;
+    }
+    return 1;
+}
+
+static int r_exception2(struct SN_env * z) {
+    z->ket = z->c; /* [, line 147 */
+    if (!(find_among_b(z, a_8, 8))) return 0; /* substring, line 147 */
+    z->bra = z->c; /* ], line 147 */
+    if (z->c > z->lb) return 0; /* atlimit, line 147 */
+    return 1;
+}
+
+static int r_exception1(struct SN_env * z) {
+    int among_var;
+    z->bra = z->c; /* [, line 159 */
+    among_var = find_among(z, a_9, 18); /* substring, line 159 */
+    if (!(among_var)) return 0;
+    z->ket = z->c; /* ], line 159 */
+    if (z->c < z->l) return 0; /* atlimit, line 159 */
+    switch(among_var) {
+        case 0: return 0;
+        case 1:
+            slice_from_s(z, 3, s_36); /* <-, line 163 */
+            break;
+        case 2:
+            slice_from_s(z, 3, s_37); /* <-, line 164 */
+            break;
+        case 3:
+            slice_from_s(z, 3, s_38); /* <-, line 165 */
+            break;
+        case 4:
+            slice_from_s(z, 3, s_39); /* <-, line 166 */
+            break;
+        case 5:
+            slice_from_s(z, 3, s_40); /* <-, line 167 */
+            break;
+        case 6:
+            slice_from_s(z, 3, s_41); /* <-, line 171 */
+            break;
+        case 7:
+            slice_from_s(z, 5, s_42); /* <-, line 172 */
+            break;
+        case 8:
+            slice_from_s(z, 4, s_43); /* <-, line 173 */
+            break;
+        case 9:
+            slice_from_s(z, 5, s_44); /* <-, line 174 */
+            break;
+        case 10:
+            slice_from_s(z, 4, s_45); /* <-, line 175 */
+            break;
+        case 11:
+            slice_from_s(z, 5, s_46); /* <-, line 176 */
+            break;
+    }
+    return 1;
+}
+
+static int r_postlude(struct SN_env * z) {
+    if (!(z->B[0])) return 0; /* Boolean test Y_found, line 192 */
+    while(1) { /* repeat, line 192 */
+        int c = z->c;
+        while(1) { /* goto, line 192 */
+            int c = z->c;
+            z->bra = z->c; /* [, line 192 */
+            if (!(eq_s(z, 1, s_47))) goto lab1;
+            z->ket = z->c; /* ], line 192 */
+            z->c = c;
+            break;
+        lab1:
+            z->c = c;
+            if (z->c >= z->l) goto lab0;
+            z->c++;
+        }
+        slice_from_s(z, 1, s_48); /* <-, line 192 */
+        continue;
+    lab0:
+        z->c = c;
+        break;
+    }
+    return 1;
+}
+
+extern int english_stem(struct SN_env * z) {
+    {   int c = z->c; /* or, line 196 */
+        if (!r_exception1(z)) goto lab1; /* call exception1, line 196 */
+        goto lab0;
+    lab1:
+        z->c = c;
+        {   int c_test = z->c; /* test, line 198 */
+            {   int c = z->c + 3;
+                if (0 > c || c > z->l) return 0;
+                z->c = c; /* hop, line 198 */
+            }
+            z->c = c_test;
+        }
+        {   int c = z->c; /* do, line 199 */
+            if (!r_prelude(z)) goto lab2; /* call prelude, line 199 */
+        lab2:
+            z->c = c;
+        }
+        {   int c = z->c; /* do, line 200 */
+            if (!r_mark_regions(z)) goto lab3; /* call mark_regions, line 200 */
+        lab3:
+            z->c = c;
+        }
+        z->lb = z->c; z->c = z->l; /* backwards, line 201 */
+
+        {   int m = z->l - z->c; /* do, line 203 */
+            if (!r_Step_1a(z)) goto lab4; /* call Step_1a, line 203 */
+        lab4:
+            z->c = z->l - m;
+        }
+        {   int m = z->l - z->c; /* or, line 205 */
+            if (!r_exception2(z)) goto lab6; /* call exception2, line 205 */
+            goto lab5;
+        lab6:
+            z->c = z->l - m;
+            {   int m = z->l - z->c; /* do, line 207 */
+                if (!r_Step_1b(z)) goto lab7; /* call Step_1b, line 207 */
+            lab7:
+                z->c = z->l - m;
+            }
+            {   int m = z->l - z->c; /* do, line 208 */
+                if (!r_Step_1c(z)) goto lab8; /* call Step_1c, line 208 */
+            lab8:
+                z->c = z->l - m;
+            }
+            {   int m = z->l - z->c; /* do, line 210 */
+                if (!r_Step_2(z)) goto lab9; /* call Step_2, line 210 */
+            lab9:
+                z->c = z->l - m;
+            }
+            {   int m = z->l - z->c; /* do, line 211 */
+                if (!r_Step_3(z)) goto lab10; /* call Step_3, line 211 */
+            lab10:
+                z->c = z->l - m;
+            }
+            {   int m = z->l - z->c; /* do, line 212 */
+                if (!r_Step_4(z)) goto lab11; /* call Step_4, line 212 */
+            lab11:
+                z->c = z->l - m;
+            }
+            {   int m = z->l - z->c; /* do, line 214 */
+                if (!r_Step_5(z)) goto lab12; /* call Step_5, line 214 */
+            lab12:
+                z->c = z->l - m;
+            }
+        }
+    lab5:
+        z->c = z->lb;
+        {   int c = z->c; /* do, line 217 */
+            if (!r_postlude(z)) goto lab13; /* call postlude, line 217 */
+        lab13:
+            z->c = c;
+        }
+    }
+lab0:
+    return 1;
+}
+
+extern struct SN_env * english_create_env(void) { return SN_create_env(0, 2, 1); }
+
+extern void english_close_env(struct SN_env * z) { SN_close_env(z); }
+


diff --git a/contrib/tsearch2/snowball/english_stem.h b/contrib/tsearch2/snowball/english_stem.h

new file mode 100644 (file)

index 0000000..bfefcd5


--- /dev/null
+++ b/contrib/tsearch2/snowball/english_stem.h
@@ -0,0 +1,8 @@
+
+/* This file was generated automatically by the Snowball to ANSI C compiler */
+
+extern struct SN_env * english_create_env(void);
+extern void english_close_env(struct SN_env * z);
+
+extern int english_stem(struct SN_env * z);
+


diff --git a/contrib/tsearch2/snowball/header.h b/contrib/tsearch2/snowball/header.h

new file mode 100644 (file)

index 0000000..aaec3ae


--- /dev/null
+++ b/contrib/tsearch2/snowball/header.h
@@ -0,0 +1,57 @@
+
+#include 
+
+#include "api.h"
+
+#define MAXINT INT_MAX
+#define MININT INT_MIN
+
+#define HEAD 2*sizeof(int)
+
+#define SIZE(p)        ((int *)(p))[-1]
+#define SET_SIZE(p, n) ((int *)(p))[-1] = n
+#define CAPACITY(p)    ((int *)(p))[-2]
+
+struct among
+{   int s_size;     /* number of chars in string */
+    symbol * s;       /* search string */
+    int substring_i;/* index to longest matching substring */
+    int result;     /* result of the lookup */
+    int (* function)(struct SN_env *);
+};
+
+extern symbol * create_s(void);
+extern void lose_s(symbol * p);
+
+extern int in_grouping(struct SN_env * z, unsigned char * s, int min, int max);
+extern int in_grouping_b(struct SN_env * z, unsigned char * s, int min, int max);
+extern int out_grouping(struct SN_env * z, unsigned char * s, int min, int max);
+extern int out_grouping_b(struct SN_env * z, unsigned char * s, int min, int max);
+
+extern int in_range(struct SN_env * z, int min, int max);
+extern int in_range_b(struct SN_env * z, int min, int max);
+extern int out_range(struct SN_env * z, int min, int max);
+extern int out_range_b(struct SN_env * z, int min, int max);
+
+extern int eq_s(struct SN_env * z, int s_size, symbol * s);
+extern int eq_s_b(struct SN_env * z, int s_size, symbol * s);
+extern int eq_v(struct SN_env * z, symbol * p);
+extern int eq_v_b(struct SN_env * z, symbol * p);
+
+extern int find_among(struct SN_env * z, struct among * v, int v_size);
+extern int find_among_b(struct SN_env * z, struct among * v, int v_size);
+
+extern symbol * increase_size(symbol * p, int n);
+extern int replace_s(struct SN_env * z, int c_bra, int c_ket, int s_size, const symbol * s);
+extern void slice_from_s(struct SN_env * z, int s_size, symbol * s);
+extern void slice_from_v(struct SN_env * z, symbol * p);
+extern void slice_del(struct SN_env * z);
+
+extern void insert_s(struct SN_env * z, int bra, int ket, int s_size, symbol * s);
+extern void insert_v(struct SN_env * z, int bra, int ket, symbol * p);
+
+extern symbol * slice_to(struct SN_env * z, symbol * p);
+extern symbol * assign_to(struct SN_env * z, symbol * p);
+
+extern void debug(struct SN_env * z, int number, int line_count);
+


diff --git a/contrib/tsearch2/snowball/russian_stem.c b/contrib/tsearch2/snowball/russian_stem.c

new file mode 100644 (file)

index 0000000..14fd491


--- /dev/null
+++ b/contrib/tsearch2/snowball/russian_stem.c
@@ -0,0 +1,626 @@
+
+/* This file was generated automatically by the Snowball to ANSI C compiler */
+
+#include "header.h"
+
+extern int russian_stem(struct SN_env * z);
+static int r_tidy_up(struct SN_env * z);
+static int r_derivational(struct SN_env * z);
+static int r_noun(struct SN_env * z);
+static int r_verb(struct SN_env * z);
+static int r_reflexive(struct SN_env * z);
+static int r_adjectival(struct SN_env * z);
+static int r_adjective(struct SN_env * z);
+static int r_perfective_gerund(struct SN_env * z);
+static int r_R2(struct SN_env * z);
+static int r_mark_regions(struct SN_env * z);
+
+extern struct SN_env * russian_create_env(void);
+extern void russian_close_env(struct SN_env * z);
+
+static symbol s_0_0[3] = { 215, 219, 201 };
+static symbol s_0_1[4] = { 201, 215, 219, 201 };
+static symbol s_0_2[4] = { 217, 215, 219, 201 };
+static symbol s_0_3[1] = { 215 };
+static symbol s_0_4[2] = { 201, 215 };
+static symbol s_0_5[2] = { 217, 215 };
+static symbol s_0_6[5] = { 215, 219, 201, 211, 216 };
+static symbol s_0_7[6] = { 201, 215, 219, 201, 211, 216 };
+static symbol s_0_8[6] = { 217, 215, 219, 201, 211, 216 };
+
+static struct among a_0[9] =
+{
+/*  0 */ { 3, s_0_0, -1, 1, 0},
+/*  1 */ { 4, s_0_1, 0, 2, 0},
+/*  2 */ { 4, s_0_2, 0, 2, 0},
+/*  3 */ { 1, s_0_3, -1, 1, 0},
+/*  4 */ { 2, s_0_4, 3, 2, 0},
+/*  5 */ { 2, s_0_5, 3, 2, 0},
+/*  6 */ { 5, s_0_6, -1, 1, 0},
+/*  7 */ { 6, s_0_7, 6, 2, 0},
+/*  8 */ { 6, s_0_8, 6, 2, 0}
+};
+
+static symbol s_1_0[2] = { 192, 192 };
+static symbol s_1_1[2] = { 197, 192 };
+static symbol s_1_2[2] = { 207, 192 };
+static symbol s_1_3[2] = { 213, 192 };
+static symbol s_1_4[2] = { 197, 197 };
+static symbol s_1_5[2] = { 201, 197 };
+static symbol s_1_6[2] = { 207, 197 };
+static symbol s_1_7[2] = { 217, 197 };
+static symbol s_1_8[2] = { 201, 200 };
+static symbol s_1_9[2] = { 217, 200 };
+static symbol s_1_10[3] = { 201, 205, 201 };
+static symbol s_1_11[3] = { 217, 205, 201 };
+static symbol s_1_12[2] = { 197, 202 };
+static symbol s_1_13[2] = { 201, 202 };
+static symbol s_1_14[2] = { 207, 202 };
+static symbol s_1_15[2] = { 217, 202 };
+static symbol s_1_16[2] = { 197, 205 };
+static symbol s_1_17[2] = { 201, 205 };
+static symbol s_1_18[2] = { 207, 205 };
+static symbol s_1_19[2] = { 217, 205 };
+static symbol s_1_20[3] = { 197, 199, 207 };
+static symbol s_1_21[3] = { 207, 199, 207 };
+static symbol s_1_22[2] = { 193, 209 };
+static symbol s_1_23[2] = { 209, 209 };
+static symbol s_1_24[3] = { 197, 205, 213 };
+static symbol s_1_25[3] = { 207, 205, 213 };
+
+static struct among a_1[26] =
+{
+/*  0 */ { 2, s_1_0, -1, 1, 0},
+/*  1 */ { 2, s_1_1, -1, 1, 0},
+/*  2 */ { 2, s_1_2, -1, 1, 0},
+/*  3 */ { 2, s_1_3, -1, 1, 0},
+/*  4 */ { 2, s_1_4, -1, 1, 0},
+/*  5 */ { 2, s_1_5, -1, 1, 0},
+/*  6 */ { 2, s_1_6, -1, 1, 0},
+/*  7 */ { 2, s_1_7, -1, 1, 0},
+/*  8 */ { 2, s_1_8, -1, 1, 0},
+/*  9 */ { 2, s_1_9, -1, 1, 0},
+/* 10 */ { 3, s_1_10, -1, 1, 0},
+/* 11 */ { 3, s_1_11, -1, 1, 0},
+/* 12 */ { 2, s_1_12, -1, 1, 0},
+/* 13 */ { 2, s_1_13, -1, 1, 0},
+/* 14 */ { 2, s_1_14, -1, 1, 0},
+/* 15 */ { 2, s_1_15, -1, 1, 0},
+/* 16 */ { 2, s_1_16, -1, 1, 0},
+/* 17 */ { 2, s_1_17, -1, 1, 0},
+/* 18 */ { 2, s_1_18, -1, 1, 0},
+/* 19 */ { 2, s_1_19, -1, 1, 0},
+/* 20 */ { 3, s_1_20, -1, 1, 0},
+/* 21 */ { 3, s_1_21, -1, 1, 0},
+/* 22 */ { 2, s_1_22, -1, 1, 0},
+/* 23 */ { 2, s_1_23, -1, 1, 0},
+/* 24 */ { 3, s_1_24, -1, 1, 0},
+/* 25 */ { 3, s_1_25, -1, 1, 0}
+};
+
+static symbol s_2_0[2] = { 197, 205 };
+static symbol s_2_1[2] = { 206, 206 };
+static symbol s_2_2[2] = { 215, 219 };
+static symbol s_2_3[3] = { 201, 215, 219 };
+static symbol s_2_4[3] = { 217, 215, 219 };
+static symbol s_2_5[1] = { 221 };
+static symbol s_2_6[2] = { 192, 221 };
+static symbol s_2_7[3] = { 213, 192, 221 };
+
+static struct among a_2[8] =
+{
+/*  0 */ { 2, s_2_0, -1, 1, 0},
+/*  1 */ { 2, s_2_1, -1, 1, 0},
+/*  2 */ { 2, s_2_2, -1, 1, 0},
+/*  3 */ { 3, s_2_3, 2, 2, 0},
+/*  4 */ { 3, s_2_4, 2, 2, 0},
+/*  5 */ { 1, s_2_5, -1, 1, 0},
+/*  6 */ { 2, s_2_6, 5, 1, 0},
+/*  7 */ { 3, s_2_7, 6, 2, 0}
+};
+
+static symbol s_3_0[2] = { 211, 209 };
+static symbol s_3_1[2] = { 211, 216 };
+
+static struct among a_3[2] =
+{
+/*  0 */ { 2, s_3_0, -1, 1, 0},
+/*  1 */ { 2, s_3_1, -1, 1, 0}
+};
+
+static symbol s_4_0[1] = { 192 };
+static symbol s_4_1[2] = { 213, 192 };
+static symbol s_4_2[2] = { 204, 193 };
+static symbol s_4_3[3] = { 201, 204, 193 };
+static symbol s_4_4[3] = { 217, 204, 193 };
+static symbol s_4_5[2] = { 206, 193 };
+static symbol s_4_6[3] = { 197, 206, 193 };
+static symbol s_4_7[3] = { 197, 212, 197 };
+static symbol s_4_8[3] = { 201, 212, 197 };
+static symbol s_4_9[3] = { 202, 212, 197 };
+static symbol s_4_10[4] = { 197, 202, 212, 197 };
+static symbol s_4_11[4] = { 213, 202, 212, 197 };
+static symbol s_4_12[2] = { 204, 201 };
+static symbol s_4_13[3] = { 201, 204, 201 };
+static symbol s_4_14[3] = { 217, 204, 201 };
+static symbol s_4_15[1] = { 202 };
+static symbol s_4_16[2] = { 197, 202 };
+static symbol s_4_17[2] = { 213, 202 };
+static symbol s_4_18[1] = { 204 };
+static symbol s_4_19[2] = { 201, 204 };
+static symbol s_4_20[2] = { 217, 204 };
+static symbol s_4_21[2] = { 197, 205 };
+static symbol s_4_22[2] = { 201, 205 };
+static symbol s_4_23[2] = { 217, 205 };
+static symbol s_4_24[1] = { 206 };
+static symbol s_4_25[2] = { 197, 206 };
+static symbol s_4_26[2] = { 204, 207 };
+static symbol s_4_27[3] = { 201, 204, 207 };
+static symbol s_4_28[3] = { 217, 204, 207 };
+static symbol s_4_29[2] = { 206, 207 };
+static symbol s_4_30[3] = { 197, 206, 207 };
+static symbol s_4_31[3] = { 206, 206, 207 };
+static symbol s_4_32[2] = { 192, 212 };
+static symbol s_4_33[3] = { 213, 192, 212 };
+static symbol s_4_34[2] = { 197, 212 };
+static symbol s_4_35[3] = { 213, 197, 212 };
+static symbol s_4_36[2] = { 201, 212 };
+static symbol s_4_37[2] = { 209, 212 };
+static symbol s_4_38[2] = { 217, 212 };
+static symbol s_4_39[2] = { 212, 216 };
+static symbol s_4_40[3] = { 201, 212, 216 };
+static symbol s_4_41[3] = { 217, 212, 216 };
+static symbol s_4_42[3] = { 197, 219, 216 };
+static symbol s_4_43[3] = { 201, 219, 216 };
+static symbol s_4_44[2] = { 206, 217 };
+static symbol s_4_45[3] = { 197, 206, 217 };
+
+static struct among a_4[46] =
+{
+/*  0 */ { 1, s_4_0, -1, 2, 0},
+/*  1 */ { 2, s_4_1, 0, 2, 0},
+/*  2 */ { 2, s_4_2, -1, 1, 0},
+/*  3 */ { 3, s_4_3, 2, 2, 0},
+/*  4 */ { 3, s_4_4, 2, 2, 0},
+/*  5 */ { 2, s_4_5, -1, 1, 0},
+/*  6 */ { 3, s_4_6, 5, 2, 0},
+/*  7 */ { 3, s_4_7, -1, 1, 0},
+/*  8 */ { 3, s_4_8, -1, 2, 0},
+/*  9 */ { 3, s_4_9, -1, 1, 0},
+/* 10 */ { 4, s_4_10, 9, 2, 0},
+/* 11 */ { 4, s_4_11, 9, 2, 0},
+/* 12 */ { 2, s_4_12, -1, 1, 0},
+/* 13 */ { 3, s_4_13, 12, 2, 0},
+/* 14 */ { 3, s_4_14, 12, 2, 0},
+/* 15 */ { 1, s_4_15, -1, 1, 0},
+/* 16 */ { 2, s_4_16, 15, 2, 0},
+/* 17 */ { 2, s_4_17, 15, 2, 0},
+/* 18 */ { 1, s_4_18, -1, 1, 0},
+/* 19 */ { 2, s_4_19, 18, 2, 0},
+/* 20 */ { 2, s_4_20, 18, 2, 0},
+/* 21 */ { 2, s_4_21, -1, 1, 0},
+/* 22 */ { 2, s_4_22, -1, 2, 0},
+/* 23 */ { 2, s_4_23, -1, 2, 0},
+/* 24 */ { 1, s_4_24, -1, 1, 0},
+/* 25 */ { 2, s_4_25, 24, 2, 0},
+/* 26 */ { 2, s_4_26, -1, 1, 0},
+/* 27 */ { 3, s_4_27, 26, 2, 0},
+/* 28 */ { 3, s_4_28, 26, 2, 0},
+/* 29 */ { 2, s_4_29, -1, 1, 0},
+/* 30 */ { 3, s_4_30, 29, 2, 0},
+/* 31 */ { 3, s_4_31, 29, 1, 0},
+/* 32 */ { 2, s_4_32, -1, 1, 0},
+/* 33 */ { 3, s_4_33, 32, 2, 0},
+/* 34 */ { 2, s_4_34, -1, 1, 0},
+/* 35 */ { 3, s_4_35, 34, 2, 0},
+/* 36 */ { 2, s_4_36, -1, 2, 0},
+/* 37 */ { 2, s_4_37, -1, 2, 0},
+/* 38 */ { 2, s_4_38, -1, 2, 0},
+/* 39 */ { 2, s_4_39, -1, 1, 0},
+/* 40 */ { 3, s_4_40, 39, 2, 0},
+/* 41 */ { 3, s_4_41, 39, 2, 0},
+/* 42 */ { 3, s_4_42, -1, 1, 0},
+/* 43 */ { 3, s_4_43, -1, 2, 0},
+/* 44 */ { 2, s_4_44, -1, 1, 0},
+/* 45 */ { 3, s_4_45, 44, 2, 0}
+};
+
+static symbol s_5_0[1] = { 192 };
+static symbol s_5_1[2] = { 201, 192 };
+static symbol s_5_2[2] = { 216, 192 };
+static symbol s_5_3[1] = { 193 };
+static symbol s_5_4[1] = { 197 };
+static symbol s_5_5[2] = { 201, 197 };
+static symbol s_5_6[2] = { 216, 197 };
+static symbol s_5_7[2] = { 193, 200 };
+static symbol s_5_8[2] = { 209, 200 };
+static symbol s_5_9[3] = { 201, 209, 200 };
+static symbol s_5_10[1] = { 201 };
+static symbol s_5_11[2] = { 197, 201 };
+static symbol s_5_12[2] = { 201, 201 };
+static symbol s_5_13[3] = { 193, 205, 201 };
+static symbol s_5_14[3] = { 209, 205, 201 };
+static symbol s_5_15[4] = { 201, 209, 205, 201 };
+static symbol s_5_16[1] = { 202 };
+static symbol s_5_17[2] = { 197, 202 };
+static symbol s_5_18[3] = { 201, 197, 202 };
+static symbol s_5_19[2] = { 201, 202 };
+static symbol s_5_20[2] = { 207, 202 };
+static symbol s_5_21[2] = { 193, 205 };
+static symbol s_5_22[2] = { 197, 205 };
+static symbol s_5_23[3] = { 201, 197, 205 };
+static symbol s_5_24[2] = { 207, 205 };
+static symbol s_5_25[2] = { 209, 205 };
+static symbol s_5_26[3] = { 201, 209, 205 };
+static symbol s_5_27[1] = { 207 };
+static symbol s_5_28[1] = { 209 };
+static symbol s_5_29[2] = { 201, 209 };
+static symbol s_5_30[2] = { 216, 209 };
+static symbol s_5_31[1] = { 213 };
+static symbol s_5_32[2] = { 197, 215 };
+static symbol s_5_33[2] = { 207, 215 };
+static symbol s_5_34[1] = { 216 };
+static symbol s_5_35[1] = { 217 };
+
+static struct among a_5[36] =
+{
+/*  0 */ { 1, s_5_0, -1, 1, 0},
+/*  1 */ { 2, s_5_1, 0, 1, 0},
+/*  2 */ { 2, s_5_2, 0, 1, 0},
+/*  3 */ { 1, s_5_3, -1, 1, 0},
+/*  4 */ { 1, s_5_4, -1, 1, 0},
+/*  5 */ { 2, s_5_5, 4, 1, 0},
+/*  6 */ { 2, s_5_6, 4, 1, 0},
+/*  7 */ { 2, s_5_7, -1, 1, 0},
+/*  8 */ { 2, s_5_8, -1, 1, 0},
+/*  9 */ { 3, s_5_9, 8, 1, 0},
+/* 10 */ { 1, s_5_10, -1, 1, 0},
+/* 11 */ { 2, s_5_11, 10, 1, 0},
+/* 12 */ { 2, s_5_12, 10, 1, 0},
+/* 13 */ { 3, s_5_13, 10, 1, 0},
+/* 14 */ { 3, s_5_14, 10, 1, 0},
+/* 15 */ { 4, s_5_15, 14, 1, 0},
+/* 16 */ { 1, s_5_16, -1, 1, 0},
+/* 17 */ { 2, s_5_17, 16, 1, 0},
+/* 18 */ { 3, s_5_18, 17, 1, 0},
+/* 19 */ { 2, s_5_19, 16, 1, 0},
+/* 20 */ { 2, s_5_20, 16, 1, 0},
+/* 21 */ { 2, s_5_21, -1, 1, 0},
+/* 22 */ { 2, s_5_22, -1, 1, 0},
+/* 23 */ { 3, s_5_23, 22, 1, 0},
+/* 24 */ { 2, s_5_24, -1, 1, 0},
+/* 25 */ { 2, s_5_25, -1, 1, 0},
+/* 26 */ { 3, s_5_26, 25, 1, 0},
+/* 27 */ { 1, s_5_27, -1, 1, 0},
+/* 28 */ { 1, s_5_28, -1, 1, 0},
+/* 29 */ { 2, s_5_29, 28, 1, 0},
+/* 30 */ { 2, s_5_30, 28, 1, 0},
+/* 31 */ { 1, s_5_31, -1, 1, 0},
+/* 32 */ { 2, s_5_32, -1, 1, 0},
+/* 33 */ { 2, s_5_33, -1, 1, 0},
+/* 34 */ { 1, s_5_34, -1, 1, 0},
+/* 35 */ { 1, s_5_35, -1, 1, 0}
+};
+
+static symbol s_6_0[3] = { 207, 211, 212 };
+static symbol s_6_1[4] = { 207, 211, 212, 216 };
+
+static struct among a_6[2] =
+{
+/*  0 */ { 3, s_6_0, -1, 1, 0},
+/*  1 */ { 4, s_6_1, -1, 1, 0}
+};
+
+static symbol s_7_0[4] = { 197, 202, 219, 197 };
+static symbol s_7_1[1] = { 206 };
+static symbol s_7_2[1] = { 216 };
+static symbol s_7_3[3] = { 197, 202, 219 };
+
+static struct among a_7[4] =
+{
+/*  0 */ { 4, s_7_0, -1, 1, 0},
+/*  1 */ { 1, s_7_1, -1, 2, 0},
+/*  2 */ { 1, s_7_2, -1, 3, 0},
+/*  3 */ { 3, s_7_3, -1, 1, 0}
+};
+
+static unsigned char g_v[] = { 35, 130, 34, 18 };
+
+static symbol s_0[] = { 193 };
+static symbol s_1[] = { 209 };
+static symbol s_2[] = { 193 };
+static symbol s_3[] = { 209 };
+static symbol s_4[] = { 193 };
+static symbol s_5[] = { 209 };
+static symbol s_6[] = { 206 };
+static symbol s_7[] = { 206 };
+static symbol s_8[] = { 206 };
+static symbol s_9[] = { 201 };
+
+static int r_mark_regions(struct SN_env * z) {
+    z->I[0] = z->l;
+    z->I[1] = z->l;
+    {   int c = z->c; /* do, line 100 */
+        while(1) { /* gopast, line 101 */
+            if (!(in_grouping(z, g_v, 192, 220))) goto lab1;
+            break;
+        lab1:
+            if (z->c >= z->l) goto lab0;
+            z->c++;
+        }
+        z->I[0] = z->c; /* setmark pV, line 101 */
+        while(1) { /* gopast, line 101 */
+            if (!(out_grouping(z, g_v, 192, 220))) goto lab2;
+            break;
+        lab2:
+            if (z->c >= z->l) goto lab0;
+            z->c++;
+        }
+        while(1) { /* gopast, line 102 */
+            if (!(in_grouping(z, g_v, 192, 220))) goto lab3;
+            break;
+        lab3:
+            if (z->c >= z->l) goto lab0;
+            z->c++;
+        }
+        while(1) { /* gopast, line 102 */
+            if (!(out_grouping(z, g_v, 192, 220))) goto lab4;
+            break;
+        lab4:
+            if (z->c >= z->l) goto lab0;
+            z->c++;
+        }
+        z->I[1] = z->c; /* setmark p2, line 102 */
+    lab0:
+        z->c = c;
+    }
+    return 1;
+}
+
+static int r_R2(struct SN_env * z) {
+    if (!(z->I[1] <= z->c)) return 0;
+    return 1;
+}
+
+static int r_perfective_gerund(struct SN_env * z) {
+    int among_var;
+    z->ket = z->c; /* [, line 111 */
+    among_var = find_among_b(z, a_0, 9); /* substring, line 111 */
+    if (!(among_var)) return 0;
+    z->bra = z->c; /* ], line 111 */
+    switch(among_var) {
+        case 0: return 0;
+        case 1:
+            {   int m = z->l - z->c; /* or, line 115 */
+                if (!(eq_s_b(z, 1, s_0))) goto lab1;
+                goto lab0;
+            lab1:
+                z->c = z->l - m;
+                if (!(eq_s_b(z, 1, s_1))) return 0;
+            }
+        lab0:
+            slice_del(z); /* delete, line 115 */
+            break;
+        case 2:
+            slice_del(z); /* delete, line 122 */
+            break;
+    }
+    return 1;
+}
+
+static int r_adjective(struct SN_env * z) {
+    int among_var;
+    z->ket = z->c; /* [, line 127 */
+    among_var = find_among_b(z, a_1, 26); /* substring, line 127 */
+    if (!(among_var)) return 0;
+    z->bra = z->c; /* ], line 127 */
+    switch(among_var) {
+        case 0: return 0;
+        case 1:
+            slice_del(z); /* delete, line 136 */
+            break;
+    }
+    return 1;
+}
+
+static int r_adjectival(struct SN_env * z) {
+    int among_var;
+    if (!r_adjective(z)) return 0; /* call adjective, line 141 */
+    {   int m = z->l - z->c; /* try, line 148 */
+        z->ket = z->c; /* [, line 149 */
+        among_var = find_among_b(z, a_2, 8); /* substring, line 149 */
+        if (!(among_var)) { z->c = z->l - m; goto lab0; }
+        z->bra = z->c; /* ], line 149 */
+        switch(among_var) {
+            case 0: { z->c = z->l - m; goto lab0; }
+            case 1:
+                {   int m = z->l - z->c; /* or, line 154 */
+                    if (!(eq_s_b(z, 1, s_2))) goto lab2;
+                    goto lab1;
+                lab2:
+                    z->c = z->l - m;
+                    if (!(eq_s_b(z, 1, s_3))) { z->c = z->l - m; goto lab0; }
+                }
+            lab1:
+                slice_del(z); /* delete, line 154 */
+                break;
+            case 2:
+                slice_del(z); /* delete, line 161 */
+                break;
+        }
+    lab0:
+        ;
+    }
+    return 1;
+}
+
+static int r_reflexive(struct SN_env * z) {
+    int among_var;
+    z->ket = z->c; /* [, line 168 */
+    among_var = find_among_b(z, a_3, 2); /* substring, line 168 */
+    if (!(among_var)) return 0;
+    z->bra = z->c; /* ], line 168 */
+    switch(among_var) {
+        case 0: return 0;
+        case 1:
+            slice_del(z); /* delete, line 171 */
+            break;
+    }
+    return 1;
+}
+
+static int r_verb(struct SN_env * z) {
+    int among_var;
+    z->ket = z->c; /* [, line 176 */
+    among_var = find_among_b(z, a_4, 46); /* substring, line 176 */
+    if (!(among_var)) return 0;
+    z->bra = z->c; /* ], line 176 */
+    switch(among_var) {
+        case 0: return 0;
+        case 1:
+            {   int m = z->l - z->c; /* or, line 182 */
+                if (!(eq_s_b(z, 1, s_4))) goto lab1;
+                goto lab0;
+            lab1:
+                z->c = z->l - m;
+                if (!(eq_s_b(z, 1, s_5))) return 0;
+            }
+        lab0:
+            slice_del(z); /* delete, line 182 */
+            break;
+        case 2:
+            slice_del(z); /* delete, line 190 */
+            break;
+    }
+    return 1;
+}
+
+static int r_noun(struct SN_env * z) {
+    int among_var;
+    z->ket = z->c; /* [, line 199 */
+    among_var = find_among_b(z, a_5, 36); /* substring, line 199 */
+    if (!(among_var)) return 0;
+    z->bra = z->c; /* ], line 199 */
+    switch(among_var) {
+        case 0: return 0;
+        case 1:
+            slice_del(z); /* delete, line 206 */
+            break;
+    }
+    return 1;
+}
+
+static int r_derivational(struct SN_env * z) {
+    int among_var;
+    z->ket = z->c; /* [, line 215 */
+    among_var = find_among_b(z, a_6, 2); /* substring, line 215 */
+    if (!(among_var)) return 0;
+    z->bra = z->c; /* ], line 215 */
+    if (!r_R2(z)) return 0; /* call R2, line 215 */
+    switch(among_var) {
+        case 0: return 0;
+        case 1:
+            slice_del(z); /* delete, line 218 */
+            break;
+    }
+    return 1;
+}
+
+static int r_tidy_up(struct SN_env * z) {
+    int among_var;
+    z->ket = z->c; /* [, line 223 */
+    among_var = find_among_b(z, a_7, 4); /* substring, line 223 */
+    if (!(among_var)) return 0;
+    z->bra = z->c; /* ], line 223 */
+    switch(among_var) {
+        case 0: return 0;
+        case 1:
+            slice_del(z); /* delete, line 227 */
+            z->ket = z->c; /* [, line 228 */
+            if (!(eq_s_b(z, 1, s_6))) return 0;
+            z->bra = z->c; /* ], line 228 */
+            if (!(eq_s_b(z, 1, s_7))) return 0;
+            slice_del(z); /* delete, line 228 */
+            break;
+        case 2:
+            if (!(eq_s_b(z, 1, s_8))) return 0;
+            slice_del(z); /* delete, line 231 */
+            break;
+        case 3:
+            slice_del(z); /* delete, line 233 */
+            break;
+    }
+    return 1;
+}
+
+extern int russian_stem(struct SN_env * z) {
+    {   int c = z->c; /* do, line 240 */
+        if (!r_mark_regions(z)) goto lab0; /* call mark_regions, line 240 */
+    lab0:
+        z->c = c;
+    }
+    z->lb = z->c; z->c = z->l; /* backwards, line 241 */
+
+    {   int m = z->l - z->c; /* setlimit, line 241 */
+        int m3;
+        if (z->c < z->I[0]) return 0;
+        z->c = z->I[0]; /* tomark, line 241 */
+        m3 = z->lb; z->lb = z->c;
+        z->c = z->l - m;
+        {   int m = z->l - z->c; /* do, line 242 */
+            {   int m = z->l - z->c; /* or, line 243 */
+                if (!r_perfective_gerund(z)) goto lab3; /* call perfective_gerund, line 243 */
+                goto lab2;
+            lab3:
+                z->c = z->l - m;
+                {   int m = z->l - z->c; /* try, line 244 */
+                    if (!r_reflexive(z)) { z->c = z->l - m; goto lab4; } /* call reflexive, line 244 */
+                lab4:
+                    ;
+                }
+                {   int m = z->l - z->c; /* or, line 245 */
+                    if (!r_adjectival(z)) goto lab6; /* call adjectival, line 245 */
+                    goto lab5;
+                lab6:
+                    z->c = z->l - m;
+                    if (!r_verb(z)) goto lab7; /* call verb, line 245 */
+                    goto lab5;
+                lab7:
+                    z->c = z->l - m;
+                    if (!r_noun(z)) goto lab1; /* call noun, line 245 */
+                }
+            lab5:
+                ;
+            }
+        lab2:
+        lab1:
+            z->c = z->l - m;
+        }
+        {   int m = z->l - z->c; /* try, line 248 */
+            z->ket = z->c; /* [, line 248 */
+            if (!(eq_s_b(z, 1, s_9))) { z->c = z->l - m; goto lab8; }
+            z->bra = z->c; /* ], line 248 */
+            slice_del(z); /* delete, line 248 */
+        lab8:
+            ;
+        }
+        {   int m = z->l - z->c; /* do, line 251 */
+            if (!r_derivational(z)) goto lab9; /* call derivational, line 251 */
+        lab9:
+            z->c = z->l - m;
+        }
+        {   int m = z->l - z->c; /* do, line 252 */
+            if (!r_tidy_up(z)) goto lab10; /* call tidy_up, line 252 */
+        lab10:
+            z->c = z->l - m;
+        }
+        z->lb = m3;
+    }
+    z->c = z->lb;
+    return 1;
+}
+
+extern struct SN_env * russian_create_env(void) { return SN_create_env(0, 2, 0); }
+
+extern void russian_close_env(struct SN_env * z) { SN_close_env(z); }
+


diff --git a/contrib/tsearch2/snowball/russian_stem.h b/contrib/tsearch2/snowball/russian_stem.h

new file mode 100644 (file)

index 0000000..7dc26d4


--- /dev/null
+++ b/contrib/tsearch2/snowball/russian_stem.h
@@ -0,0 +1,8 @@
+
+/* This file was generated automatically by the Snowball to ANSI C compiler */
+
+extern struct SN_env * russian_create_env(void);
+extern void russian_close_env(struct SN_env * z);
+
+extern int russian_stem(struct SN_env * z);
+


diff --git a/contrib/tsearch2/snowball/utilities.c b/contrib/tsearch2/snowball/utilities.c

new file mode 100644 (file)

index 0000000..5dc7524


--- /dev/null
+++ b/contrib/tsearch2/snowball/utilities.c
@@ -0,0 +1,328 @@
+
+#include 
+#include 
+#include 
+
+#include "header.h"
+
+#define unless(C) if(!(C))
+
+#define CREATE_SIZE 1
+
+extern symbol * create_s(void)
+{   symbol * p = (symbol *) (HEAD + (char *) malloc(HEAD + (CREATE_SIZE + 1) * sizeof(symbol)));
+    CAPACITY(p) = CREATE_SIZE;
+    SET_SIZE(p, CREATE_SIZE);
+    return p;
+}
+
+extern void lose_s(symbol * p) { free((char *) p - HEAD); }
+
+extern int in_grouping(struct SN_env * z, unsigned char * s, int min, int max)
+{   if (z->c >= z->l) return 0;
+    {   int ch = z->p[z->c];
+        if
+        (ch > max || (ch -= min) < 0 ||
+         (s[ch >> 3] & (0X1 << (ch & 0X7))) == 0) return 0;
+    }
+    z->c++; return 1;
+}
+
+extern int in_grouping_b(struct SN_env * z, unsigned char * s, int min, int max)
+{   if (z->c <= z->lb) return 0;
+    {   int ch = z->p[z->c - 1];
+        if
+        (ch > max || (ch -= min) < 0 ||
+         (s[ch >> 3] & (0X1 << (ch & 0X7))) == 0) return 0;
+    }
+    z->c--; return 1;
+}
+
+extern int out_grouping(struct SN_env * z, unsigned char * s, int min, int max)
+{   if (z->c >= z->l) return 0;
+    {   int ch = z->p[z->c];
+        unless
+        (ch > max || (ch -= min) < 0 ||
+         (s[ch >> 3] & (0X1 << (ch & 0X7))) == 0) return 0;
+    }
+    z->c++; return 1;
+}
+
+extern int out_grouping_b(struct SN_env * z, unsigned char * s, int min, int max)
+{   if (z->c <= z->lb) return 0;
+    {   int ch = z->p[z->c - 1];
+        unless
+        (ch > max || (ch -= min) < 0 ||
+         (s[ch >> 3] & (0X1 << (ch & 0X7))) == 0) return 0;
+    }
+    z->c--; return 1;
+}
+
+
+extern int in_range(struct SN_env * z, int min, int max)
+{   if (z->c >= z->l) return 0;
+    {   int ch = z->p[z->c];
+        if
+        (ch > max || ch < min) return 0;
+    }
+    z->c++; return 1;
+}
+
+extern int in_range_b(struct SN_env * z, int min, int max)
+{   if (z->c <= z->lb) return 0;
+    {   int ch = z->p[z->c - 1];
+        if
+        (ch > max || ch < min) return 0;
+    }
+    z->c--; return 1;
+}
+
+extern int out_range(struct SN_env * z, int min, int max)
+{   if (z->c >= z->l) return 0;
+    {   int ch = z->p[z->c];
+        unless
+        (ch > max || ch < min) return 0;
+    }
+    z->c++; return 1;
+}
+
+extern int out_range_b(struct SN_env * z, int min, int max)
+{   if (z->c <= z->lb) return 0;
+    {   int ch = z->p[z->c - 1];
+        unless
+        (ch > max || ch < min) return 0;
+    }
+    z->c--; return 1;
+}
+
+extern int eq_s(struct SN_env * z, int s_size, symbol * s)
+{   if (z->l - z->c < s_size ||
+        memcmp(z->p + z->c, s, s_size * sizeof(symbol)) != 0) return 0;
+    z->c += s_size; return 1;
+}
+
+extern int eq_s_b(struct SN_env * z, int s_size, symbol * s)
+{   if (z->c - z->lb < s_size ||
+        memcmp(z->p + z->c - s_size, s, s_size * sizeof(symbol)) != 0) return 0;
+    z->c -= s_size; return 1;
+}
+
+extern int eq_v(struct SN_env * z, symbol * p)
+{   return eq_s(z, SIZE(p), p);
+}
+
+extern int eq_v_b(struct SN_env * z, symbol * p)
+{   return eq_s_b(z, SIZE(p), p);
+}
+
+extern int find_among(struct SN_env * z, struct among * v, int v_size)
+{
+    int i = 0;
+    int j = v_size;
+
+    int c = z->c; int l = z->l;
+    symbol * q = z->p + c;
+
+    struct among * w;
+
+    int common_i = 0;
+    int common_j = 0;
+
+    int first_key_inspected = 0;
+
+    while(1)
+    {   int k = i + ((j - i) >> 1);
+        int diff = 0;
+        int common = common_i < common_j ? common_i : common_j; /* smaller */
+        w = v + k;
+        {   int i; for (i = common; i < w->s_size; i++)
+            {   if (c + common == l) { diff = -1; break; }
+                diff = q[common] - w->s[i];
+                if (diff != 0) break;
+                common++;
+            }
+        }
+        if (diff < 0) { j = k; common_j = common; }
+                 else { i = k; common_i = common; }
+        if (j - i <= 1)
+        {   if (i > 0) break; /* v->s has been inspected */
+            if (j == i) break; /* only one item in v */
+
+            /* - but now we need to go round once more to get
+               v->s inspected. This looks messy, but is actually
+               the optimal approach.  */
+
+            if (first_key_inspected) break;
+            first_key_inspected = 1;
+        }
+    }
+    while(1)
+    {   w = v + i;
+        if (common_i >= w->s_size)
+        {   z->c = c + w->s_size;
+            if (w->function == 0) return w->result;
+            {   int res = w->function(z);
+                z->c = c + w->s_size;
+                if (res) return w->result;
+            }
+        }
+        i = w->substring_i;
+        if (i < 0) return 0;
+    }
+}
+
+/* find_among_b is for backwards processing. Same comments apply */
+
+extern int find_among_b(struct SN_env * z, struct among * v, int v_size)
+{
+    int i = 0;
+    int j = v_size;
+
+    int c = z->c; int lb = z->lb;
+    symbol * q = z->p + c - 1;
+
+    struct among * w;
+
+    int common_i = 0;
+    int common_j = 0;
+
+    int first_key_inspected = 0;
+
+    while(1)
+    {   int k = i + ((j - i) >> 1);
+        int diff = 0;
+        int common = common_i < common_j ? common_i : common_j;
+        w = v + k;
+        {   int i; for (i = w->s_size - 1 - common; i >= 0; i--)
+            {   if (c - common == lb) { diff = -1; break; }
+                diff = q[- common] - w->s[i];
+                if (diff != 0) break;
+                common++;
+            }
+        }
+        if (diff < 0) { j = k; common_j = common; }
+                 else { i = k; common_i = common; }
+        if (j - i <= 1)
+        {   if (i > 0) break;
+            if (j == i) break;
+            if (first_key_inspected) break;
+            first_key_inspected = 1;
+        }
+    }
+    while(1)
+    {   w = v + i;
+        if (common_i >= w->s_size)
+        {   z->c = c - w->s_size;
+            if (w->function == 0) return w->result;
+            {   int res = w->function(z);
+                z->c = c - w->s_size;
+                if (res) return w->result;
+            }
+        }
+        i = w->substring_i;
+        if (i < 0) return 0;
+    }
+}
+
+
+extern symbol * increase_size(symbol * p, int n)
+{   int new_size = n + 20;
+    symbol * q = (symbol *) (HEAD + (char *) malloc(HEAD + (new_size + 1) * sizeof(symbol)));
+    CAPACITY(q) = new_size;
+    memmove(q, p, CAPACITY(p) * sizeof(symbol)); lose_s(p); return q;
+}
+
+/* to replace symbols between c_bra and c_ket in z->p by the
+   s_size symbols at s
+*/
+
+extern int replace_s(struct SN_env * z, int c_bra, int c_ket, int s_size, const symbol * s)
+{   int adjustment = s_size - (c_ket - c_bra);
+    int len = SIZE(z->p);
+    if (adjustment != 0)
+    {   if (adjustment + len > CAPACITY(z->p)) z->p = increase_size(z->p, adjustment + len);
+        memmove(z->p + c_ket + adjustment, z->p + c_ket, (len - c_ket) * sizeof(symbol));
+        SET_SIZE(z->p, adjustment + len);
+        z->l += adjustment;
+        if (z->c >= c_ket) z->c += adjustment; else
+            if (z->c > c_bra) z->c = c_bra;
+    }
+    unless (s_size == 0) memmove(z->p + c_bra, s, s_size * sizeof(symbol));
+    return adjustment;
+}
+
+static void slice_check(struct SN_env * z)
+{
+    if (!(0 <= z->bra &&
+          z->bra <= z->ket &&
+          z->ket <= z->l &&
+          z->l <= SIZE(z->p)))   /* this line could be removed */
+    {
+        fprintf(stderr, "faulty slice operation:\n");
+        debug(z, -1, 0);
+        exit(1);
+    }
+}
+
+extern void slice_from_s(struct SN_env * z, int s_size, symbol * s)
+{   slice_check(z);
+    replace_s(z, z->bra, z->ket, s_size, s);
+}
+
+extern void slice_from_v(struct SN_env * z, symbol * p)
+{   slice_from_s(z, SIZE(p), p);
+}
+
+extern void slice_del(struct SN_env * z)
+{   slice_from_s(z, 0, 0);
+}
+
+extern void insert_s(struct SN_env * z, int bra, int ket, int s_size, symbol * s)
+{   int adjustment = replace_s(z, bra, ket, s_size, s);
+    if (bra <= z->bra) z->bra += adjustment;
+    if (bra <= z->ket) z->ket += adjustment;
+}
+
+extern void insert_v(struct SN_env * z, int bra, int ket, symbol * p)
+{   int adjustment = replace_s(z, bra, ket, SIZE(p), p);
+    if (bra <= z->bra) z->bra += adjustment;
+    if (bra <= z->ket) z->ket += adjustment;
+}
+
+extern symbol * slice_to(struct SN_env * z, symbol * p)
+{   slice_check(z);
+    {   int len = z->ket - z->bra;
+        if (CAPACITY(p) < len) p = increase_size(p, len);
+        memmove(p, z->p + z->bra, len * sizeof(symbol));
+        SET_SIZE(p, len);
+    }
+    return p;
+}
+
+extern symbol * assign_to(struct SN_env * z, symbol * p)
+{   int len = z->l;
+    if (CAPACITY(p) < len) p = increase_size(p, len);
+    memmove(p, z->p, len * sizeof(symbol));
+    SET_SIZE(p, len);
+    return p;
+}
+
+extern void debug(struct SN_env * z, int number, int line_count)
+{   int i;
+    int limit = SIZE(z->p);
+    /*if (number >= 0) printf("%3d (line %4d): '", number, line_count);*/
+    if (number >= 0) printf("%3d (line %4d): [%d]'", number, line_count,limit);
+    for (i = 0; i <= limit; i++)
+    {   if (z->lb == i) printf("{");
+        if (z->bra == i) printf("[");
+        if (z->c == i) printf("|");
+        if (z->ket == i) printf("]");
+        if (z->l == i) printf("}");
+        if (i < limit)
+        {   int ch = z->p[i];
+            if (ch == 0) ch = '#';
+            printf("%c", ch);
+        }
+    }
+    printf("'\n");
+}


diff --git a/contrib/tsearch2/sql/tsearch2.sql b/contrib/tsearch2/sql/tsearch2.sql

new file mode 100644 (file)

index 0000000..6ca6480


--- /dev/null
+++ b/contrib/tsearch2/sql/tsearch2.sql
@@ -0,0 +1,243 @@
+--
+-- first, define the datatype.  Turn off echoing so that expected file
+-- does not depend on contents of seg.sql.
+--
+\set ECHO none
+\i tsearch2.sql
+\set ECHO all
+
+--tsvector
+SELECT '1'::tsvector;
+SELECT '1 '::tsvector;
+SELECT ' 1'::tsvector;
+SELECT ' 1 '::tsvector;
+SELECT '1 2'::tsvector;
+SELECT '\'1 2\''::tsvector;
+SELECT '\'1 \\\'2\''::tsvector;
+SELECT '\'1 \\\'2\'3'::tsvector;
+SELECT '\'1 \\\'2\' 3'::tsvector;
+SELECT '\'1 \\\'2\' \' 3\' 4 '::tsvector;
+select '\'w\':4A,3B,2C,1D,5 a:8';
+select 'a:3A b:2a'::tsvector || 'ba:1234 a:1B';
+select setweight('w:12B w:13* w:12,5,6 a:1,3* a:3 w asd:1dc asd zxc:81,567,222A'::tsvector, 'c');
+select strip('w:12B w:13* w:12,5,6 a:1,3* a:3 w asd:1dc asd'::tsvector);
+
+
+--tsquery
+SELECT '1'::tsquery;
+SELECT '1 '::tsquery;
+SELECT ' 1'::tsquery;
+SELECT ' 1 '::tsquery;
+SELECT '\'1 2\''::tsquery;
+SELECT '\'1 \\\'2\''::tsquery;
+SELECT '!1'::tsquery;
+SELECT '1|2'::tsquery;
+SELECT '1|!2'::tsquery;
+SELECT '!1|2'::tsquery;
+SELECT '!1|!2'::tsquery;
+SELECT '!(!1|!2)'::tsquery;
+SELECT '!(!1|2)'::tsquery;
+SELECT '!(1|!2)'::tsquery;
+SELECT '!(1|2)'::tsquery;
+SELECT '1&2'::tsquery;
+SELECT '!1&2'::tsquery;
+SELECT '1&!2'::tsquery;
+SELECT '!1&!2'::tsquery;
+SELECT '(1&2)'::tsquery;
+SELECT '1&(2)'::tsquery;
+SELECT '!(1)&2'::tsquery;
+SELECT '!(1&2)'::tsquery;
+SELECT '1|2&3'::tsquery;
+SELECT '1|(2&3)'::tsquery;
+SELECT '(1|2)&3'::tsquery;
+SELECT '1|2&!3'::tsquery;
+SELECT '1|!2&3'::tsquery;
+SELECT '!1|2&3'::tsquery;
+SELECT '!1|(2&3)'::tsquery;
+SELECT '!(1|2)&3'::tsquery;
+SELECT '(!1|2)&3'::tsquery;
+SELECT '1|(2|(4|(5|6)))'::tsquery;
+SELECT '1|2|4|5|6'::tsquery;
+SELECT '1&(2&(4&(5&6)))'::tsquery;
+SELECT '1&2&4&5&6'::tsquery;
+SELECT '1&(2&(4&(5|6)))'::tsquery;
+SELECT '1&(2&(4&(5|!6)))'::tsquery;
+SELECT '1&(\'2\'&(\' 4\'&(\\|5 | \'6 \\\' !|&\')))'::tsquery;
+SELECT '\'the wether\':dc & \' sKies \':BC & a:d b:a';
+
+select lexize('simple', 'ASD56 hsdkf');
+select lexize('en_stem', 'SKIES Problems identity');
+
+select * from token_type('default');
+select * from parse('default', '345 [email protected] \' http://www.com/ http://aew.werc.ewr/?ad=qwe&dw 1aew.werc.ewr/?ad=qwe&dw 2aew.werc.ewr http://3aew.werc.ewr/?ad=qwe&dw http://4aew.werc.ewr http://5aew.werc.ewr:8100/?  ad=qwe&dw 6aew.werc.ewr:8100/?ad=qwe&dw 7aew.werc.ewr:8100/?ad=qwe&dw=%20%32 +4.0e-10 qwe qwe qwqwe 234.435 455 5.005 [email protected] qwe-wer asdf qwer jf sdjk ewr1> ewri2 ">
+/usr/local/fff /awdf/dwqe/4325 rewt/ewr wefjn /wqe-324/ewr gist.h gist.h.c gist.c. readline 4.2 4.2. 4.2, readline-4.2 readline-4.2. 234 
+ wow  < jqw <> qwerty');
+
+SELECT to_tsvector('default', '345 [email protected] \' http://www.com/ http://aew.werc.ewr/?ad=qwe&dw 1aew.werc.ewr/?ad=qwe&dw 2aew.werc.ewr http://3aew.werc.ewr/?ad=qwe&dw http://4aew.werc.ewr http://5aew.werc.ewr:8100/?  ad=qwe&dw 6aew.werc.ewr:8100/?ad=qwe&dw 7aew.werc.ewr:8100/?ad=qwe&dw=%20%32 +4.0e-10 qwe qwe qwqwe 234.435 455 5.005 [email protected] qwe-wer asdf qwer jf sdjk ewr1> ewri2 ">
+/usr/local/fff /awdf/dwqe/4325 rewt/ewr wefjn /wqe-324/ewr gist.h gist.h.c gist.c. readline 4.2 4.2. 4.2, readline-4.2 readline-4.2. 234 
+ wow  < jqw <> qwerty');
+
+SELECT length(to_tsvector('default', '345 qw'));
+
+SELECT length(to_tsvector('default', '345 [email protected] \' http://www.com/ http://aew.werc.ewr/?ad=qwe&dw 1aew.werc.ewr/?ad=qwe&dw 2aew.werc.ewr http://3aew.werc.ewr/?ad=qwe&dw http://4aew.werc.ewr http://5aew.werc.ewr:8100/?  ad=qwe&dw 6aew.werc.ewr:8100/?ad=qwe&dw 7aew.werc.ewr:8100/?ad=qwe&dw=%20%32 +4.0e-10 qwe qwe qwqwe 234.435 455 5.005 [email protected] qwe-wer asdf qwer jf sdjk ewr1> ewri2 ">
+/usr/local/fff /awdf/dwqe/4325 rewt/ewr wefjn /wqe-324/ewr gist.h gist.h.c gist.c. readline 4.2 4.2. 4.2, readline-4.2 readline-4.2. 234 
+ wow  < jqw <> qwerty'));
+
+
+select to_tsquery('default', 'qwe & sKies '); 
+select to_tsquery('simple', 'qwe & sKies '); 
+select to_tsquery('default', '\'the wether\':dc & \'           sKies \':BC ');
+select 'a b:89  ca:23A,64b d:34c'::tsvector @@ 'd:AC & ca';
+select 'a b:89  ca:23A,64b d:34c'::tsvector @@ 'd:AC & ca:B';
+select 'a b:89  ca:23A,64b d:34c'::tsvector @@ 'd:AC & ca:A';
+select 'a b:89  ca:23A,64b d:34c'::tsvector @@ 'd:AC & ca:C';
+select 'a b:89  ca:23A,64b d:34c'::tsvector @@ 'd:AC & ca:CB';
+
+CREATE TABLE test_tsvector( t text, a tsvector );
+
+\copy test_tsvector from 'data/test_tsearch.data'
+
+SELECT count(*) FROM test_tsvector WHERE a @@ 'wr|qh';
+SELECT count(*) FROM test_tsvector WHERE a @@ 'wr&qh';
+SELECT count(*) FROM test_tsvector WHERE a @@ 'eq&yt';
+SELECT count(*) FROM test_tsvector WHERE a @@ 'eq|yt';
+SELECT count(*) FROM test_tsvector WHERE a @@ '(eq&yt)|(wr&qh)';
+SELECT count(*) FROM test_tsvector WHERE a @@ '(eq|yt)&(wr|qh)';
+
+create index wowidx on test_tsvector using gist (a);
+set enable_seqscan=off;
+
+SELECT count(*) FROM test_tsvector WHERE a @@ 'wr|qh';
+SELECT count(*) FROM test_tsvector WHERE a @@ 'wr&qh';
+SELECT count(*) FROM test_tsvector WHERE a @@ 'eq&yt';
+SELECT count(*) FROM test_tsvector WHERE a @@ 'eq|yt';
+SELECT count(*) FROM test_tsvector WHERE a @@ '(eq&yt)|(wr&qh)';
+SELECT count(*) FROM test_tsvector WHERE a @@ '(eq|yt)&(wr|qh)';
+
+select set_curcfg('default');
+
+CREATE TRIGGER tsvectorupdate
+BEFORE UPDATE OR INSERT ON test_tsvector
+FOR EACH ROW EXECUTE PROCEDURE tsearch2(a, t);
+
+SELECT count(*) FROM test_tsvector WHERE a @@ to_tsquery('345&qwerty');
+
+INSERT INTO test_tsvector (t) VALUES ('345 qwerty');
+
+SELECT count(*) FROM test_tsvector WHERE a @@ to_tsquery('345&qwerty');
+
+UPDATE test_tsvector SET t = null WHERE t = '345 qwerty';
+
+SELECT count(*) FROM test_tsvector WHERE a @@ to_tsquery('345&qwerty');
+
+drop trigger tsvectorupdate on test_tsvector;
+create function wow(text) returns text as 'select $1 || \' copyright\'; ' language sql;
+create trigger tsvectorupdate before update or insert on test_tsvector
+for each row execute procedure tsearch2(a, wow, t);
+insert into test_tsvector (t) values ('345 qwerty');
+select count(*) FROM test_tsvector WHERE a @@ to_tsquery('345&qwerty');
+select count(*) FROM test_tsvector WHERE a @@ to_tsquery('copyright');
+
+select rank(' a:1 s:2C d g'::tsvector, 'a | s');
+select rank(' a:1 s:2B d g'::tsvector, 'a | s');
+select rank(' a:1 s:2 d g'::tsvector, 'a | s');
+select rank(' a:1 s:2C d g'::tsvector, 'a & s');
+select rank(' a:1 s:2B d g'::tsvector, 'a & s');
+select rank(' a:1 s:2 d g'::tsvector, 'a & s');
+
+insert into test_tsvector (t) values ('foo bar foo the over foo qq bar');
+select * from stat('select a from test_tsvector') order by ndoc desc, nentry desc, word;
+
+select reset_tsearch();
+select to_tsquery('default', 'skies & books');
+
+select rank_cd(to_tsvector('Erosion It took the sea a thousand years,
+A thousand years to trace
+The granite features of this cliff
+In crag and scarp and base.
+It took the sea an hour one night
+An hour of storm to place
+The sculpture of these granite seams,
+Upon a woman s face. E.  J.  Pratt  (1882 1964)
+'), to_tsquery('sea&thousand&years'));
+
+select rank_cd(to_tsvector('Erosion It took the sea a thousand years,
+A thousand years to trace
+The granite features of this cliff
+In crag and scarp and base.
+It took the sea an hour one night
+An hour of storm to place
+The sculpture of these granite seams,
+Upon a woman s face. E.  J.  Pratt  (1882 1964)
+'), to_tsquery('granite&sea'));
+
+select rank_cd(to_tsvector('Erosion It took the sea a thousand years,
+A thousand years to trace
+The granite features of this cliff
+In crag and scarp and base.
+It took the sea an hour one night
+An hour of storm to place
+The sculpture of these granite seams,
+Upon a woman s face. E.  J.  Pratt  (1882 1964)
+'), to_tsquery('sea'));
+
+select get_covers(to_tsvector('Erosion It took the sea a thousand years,
+A thousand years to trace
+The granite features of this cliff
+In crag and scarp and base.
+It took the sea an hour one night
+An hour of storm to place
+The sculpture of these granite seams,
+Upon a woman s face. E.  J.  Pratt  (1882 1964)
+'), to_tsquery('sea&thousand&years'));
+
+select get_covers(to_tsvector('Erosion It took the sea a thousand years,
+A thousand years to trace
+The granite features of this cliff
+In crag and scarp and base.
+It took the sea an hour one night
+An hour of storm to place
+The sculpture of these granite seams,
+Upon a woman s face. E.  J.  Pratt  (1882 1964)
+'), to_tsquery('granite&sea'));
+
+select get_covers(to_tsvector('Erosion It took the sea a thousand years,
+A thousand years to trace
+The granite features of this cliff
+In crag and scarp and base.
+It took the sea an hour one night
+An hour of storm to place
+The sculpture of these granite seams,
+Upon a woman s face. E.  J.  Pratt  (1882 1964)
+'), to_tsquery('sea'));
+
+select headline('Erosion It took the sea a thousand years,
+A thousand years to trace
+The granite features of this cliff
+In crag and scarp and base.
+It took the sea an hour one night
+An hour of storm to place
+The sculpture of these granite seams,
+Upon a woman s face. E.  J.  Pratt  (1882 1964)
+', to_tsquery('sea&thousand&years'));
+ 
+select headline('Erosion It took the sea a thousand years,
+A thousand years to trace
+The granite features of this cliff
+In crag and scarp and base.
+It took the sea an hour one night
+An hour of storm to place
+The sculpture of these granite seams,
+Upon a woman s face. E.  J.  Pratt  (1882 1964)
+', to_tsquery('granite&sea'));
+ 
+select headline('Erosion It took the sea a thousand years,
+A thousand years to trace
+The granite features of this cliff
+In crag and scarp and base.
+It took the sea an hour one night
+An hour of storm to place
+The sculpture of these granite seams,
+Upon a woman s face. E.  J.  Pratt  (1882 1964)
+', to_tsquery('sea'));
+


diff --git a/contrib/tsearch2/stopword.c b/contrib/tsearch2/stopword.c

new file mode 100644 (file)

index 0000000..7f7806f


--- /dev/null
+++ b/contrib/tsearch2/stopword.c
@@ -0,0 +1,101 @@
+/* 
+ * stopword library
+ * Teodor Sigaev 
+ */
+#include 
+#include 
+#include 
+#include 
+
+#include "postgres.h"
+#include "common.h"
+#include "dict.h"
+
+#define STOPBUFLEN 4096
+
+char*
+lowerstr(char *str) {
+   char *ptr=str;
+   while(*ptr) {
+       *ptr = tolower(*(unsigned char*)ptr);
+       ptr++;
+   }
+   return str;
+}
+
+void
+freestoplist(StopList *s) {
+   char **ptr=s->stop;
+   if ( ptr )
+       while( *ptr && s->len >0 ) {
+           free(*ptr);
+           ptr++; s->len--;
+       free(s->stop);
+   }
+   memset(s,0,sizeof(StopList));
+}
+
+void
+readstoplist(text *in, StopList *s) {
+   char **stop=NULL;
+   s->len=0;
+   if ( in && VARSIZE(in) - VARHDRSZ > 0 ) {
+       char *filename=text2char(in);
+       FILE    *hin=NULL;
+       char    buf[STOPBUFLEN];
+       int reallen=0;
+
+       if ( (hin=fopen(filename,"r")) == NULL )
+           elog(ERROR,"Can't open file '%s': %s", filename, strerror(errno));
+       while( fgets(buf,STOPBUFLEN,hin) ) {
+           buf[strlen(buf)-1] = '\0';
+           if ( *buf=='\0' ) continue;
+
+           if ( s->len>= reallen ) {
+               char **tmp;
+               reallen=(reallen) ? reallen*2 : 16;
+               tmp=(char**)realloc((void*)stop, sizeof(char*)*reallen);
+               if (!tmp) {
+                   freestoplist(s);
+                   fclose(hin); 
+                   elog(ERROR,"Not enough memory");
+               }
+               stop=tmp;
+           }
+    
+           stop[s->len]=strdup(buf);
+           if ( !stop[s->len] ) {
+               freestoplist(s);
+               fclose(hin); 
+               elog(ERROR,"Not enough memory");
+           }
+           if ( s->wordop ) 
+               stop[s->len]=(s->wordop)(stop[s->len]);
+
+           (s->len)++; 
+       }
+       fclose(hin);
+       pfree(filename); 
+   }
+   s->stop=stop;
+} 
+
+static int
+comparestr(const void *a, const void *b) {
+   return strcmp( *(char**)a, *(char**)b );
+}
+
+void
+sortstoplist(StopList *s) {
+   if (s->stop && s->len>0)
+       qsort(s->stop, s->len, sizeof(char*), comparestr);
+}
+
+bool
+searchstoplist(StopList *s, char *key) {
+   if ( s->wordop ) 
+       key=(*(s->wordop))(key);
+   return ( s->stop && s->len>0 && bsearch(&key, s->stop, s->len, sizeof(char*), comparestr) ) ? true : false;
+}
+
+


diff --git a/contrib/tsearch2/stopword/english.stop b/contrib/tsearch2/stopword/english.stop

new file mode 100644 (file)

index 0000000..a913011


--- /dev/null
+++ b/contrib/tsearch2/stopword/english.stop
@@ -0,0 +1,128 @@
+i
+me
+my
+myself
+we
+our
+ours
+ourselves
+you
+your
+yours
+yourself
+yourselves
+he
+him
+his
+himself
+she
+her
+hers
+herself
+it
+its
+itself
+they
+them
+their
+theirs
+themselves
+what
+which
+who
+whom
+this
+that
+these
+those
+am
+is
+are
+was
+were
+be
+been
+being
+have
+has
+had
+having
+do
+does
+did
+doing
+a
+an
+the
+and
+but
+if
+or
+because
+as
+until
+while
+of
+at
+by
+for
+with
+about
+against
+between
+into
+through
+during
+before
+after
+above
+below
+to
+from
+up
+down
+in
+out
+on
+off
+over
+under
+again
+further
+then
+once
+here
+there
+when
+where
+why
+how
+all
+any
+both
+each
+few
+more
+most
+other
+some
+such
+no
+nor
+not
+only
+own
+same
+so
+than
+too
+very
+s
+t
+can
+will
+just
+don
+should
+now
+


diff --git a/contrib/tsearch2/stopword/russian.stop b/contrib/tsearch2/stopword/russian.stop

new file mode 100644 (file)

index 0000000..1877e3a


--- /dev/null
+++ b/contrib/tsearch2/stopword/russian.stop
@@ -0,0 +1,151 @@
+É
+×
+×Ï
+ÎÅ
+ÞÔÏ
+ÏÎ
+ÎÁ
+Ñ
+Ó
+ÓÏ
+ËÁË
+Á
+ÔÏ
+×ÓÅ
+ÏÎÁ
+ÔÁË
+ÅÇÏ
+ÎÏ
+ÄÁ
+ÔÙ
+Ë
+Õ
+ÖÅ
+×Ù
+ÚÁ
+ÂÙ
+ÐÏ
+ÔÏÌØËÏ
+ÅÅ
+ÍÎÅ
+ÂÙÌÏ
+×ÏÔ
+ÏÔ
+ÍÅÎÑ
+ÅÝÅ
+ÎÅÔ
+Ï
+ÉÚ
+ÅÍÕ
+ÔÅÐÅÒØ
+ËÏÇÄÁ
+ÄÁÖÅ
+ÎÕ
+×ÄÒÕÇ
+ÌÉ
+ÅÓÌÉ
+ÕÖÅ
+ÉÌÉ
+ÎÉ
+ÂÙÔØ
+ÂÙÌ
+ÎÅÇÏ
+ÄÏ
+×ÁÓ
+ÎÉÂÕÄØ
+ÏÐÑÔØ
+ÕÖ
+×ÁÍ
+×ÅÄØ
+ÔÁÍ
+ÐÏÔÏÍ
+ÓÅÂÑ
+ÎÉÞÅÇÏ
+ÅÊ
+ÍÏÖÅÔ
+ÏÎÉ
+ÔÕÔ
+ÇÄÅ
+ÅÓÔØ
+ÎÁÄÏ
+ÎÅÊ
+ÄÌÑ
+ÍÙ
+ÔÅÂÑ
+ÉÈ
+ÞÅÍ
+ÂÙÌÁ
+ÓÁÍ
+ÞÔÏÂ
+ÂÅÚ
+ÂÕÄÔÏ
+ÞÅÇÏ
+ÒÁÚ
+ÔÏÖÅ
+ÓÅÂÅ
+ÐÏÄ
+ÂÕÄÅÔ
+Ö
+ÔÏÇÄÁ
+ËÔÏ
+ÜÔÏÔ
+ÔÏÇÏ
+ÐÏÔÏÍÕ
+ÜÔÏÇÏ
+ËÁËÏÊ
+ÓÏ×ÓÅÍ
+ÎÉÍ
+ÚÄÅÓØ
+ÜÔÏÍ
+ÏÄÉÎ
+ÐÏÞÔÉ
+ÍÏÊ
+ÔÅÍ
+ÞÔÏÂÙ
+ÎÅÅ
+ÓÅÊÞÁÓ
+ÂÙÌÉ
+ËÕÄÁ
+ÚÁÞÅÍ
+×ÓÅÈ
+ÎÉËÏÇÄÁ
+ÍÏÖÎÏ
+ÐÒÉ
+ÎÁËÏÎÅÃ
+Ä×Á
+ÏÂ
+ÄÒÕÇÏÊ
+ÈÏÔØ
+ÐÏÓÌÅ
+ÎÁÄ
+ÂÏÌØÛÅ
+ÔÏÔ
+ÞÅÒÅÚ
+ÜÔÉ
+ÎÁÓ
+ÐÒÏ
+×ÓÅÇÏ
+ÎÉÈ
+ËÁËÁÑ
+ÍÎÏÇÏ
+ÒÁÚ×Å
+ÔÒÉ
+ÜÔÕ
+ÍÏÑ
+×ÐÒÏÞÅÍ
+ÈÏÒÏÛÏ
+Ó×ÏÀ
+ÜÔÏÊ
+ÐÅÒÅÄ
+ÉÎÏÇÄÁ
+ÌÕÞÛÅ
+ÞÕÔØ
+ÔÏÍ
+ÎÅÌØÚÑ
+ÔÁËÏÊ
+ÉÍ
+ÂÏÌÅÅ
+×ÓÅÇÄÁ
+ËÏÎÅÞÎÏ
+×ÓÀ
+ÍÅÖÄÕ


diff --git a/contrib/tsearch2/ts_cfg.c b/contrib/tsearch2/ts_cfg.c

new file mode 100644 (file)

index 0000000..7c9f20c


--- /dev/null
+++ b/contrib/tsearch2/ts_cfg.c
@@ -0,0 +1,509 @@
+/* 
+ * interface functions to tscfg 
+ * Teodor Sigaev 
+ */
+#include 
+#include 
+#include 
+#include 
+#include 
+
+#include "postgres.h"
+#include "fmgr.h"
+#include "utils/array.h"
+#include "catalog/pg_type.h"
+#include "executor/spi.h"
+
+#include "ts_cfg.h"
+#include "dict.h"
+#include "wparser.h"
+#include "snmap.h"
+#include "common.h"
+#include "tsvector.h"
+
+/*********top interface**********/
+
+static void *plan_getcfg_bylocale=NULL;
+static void *plan_getcfg=NULL;
+static void *plan_getmap=NULL;
+static void *plan_name2id=NULL;
+static Oid current_cfg_id=0;
+
+void
+init_cfg(Oid id, TSCfgInfo *cfg) {
+   Oid arg[2]={ OIDOID, OIDOID };
+   bool isnull;
+   Datum pars[2]={ ObjectIdGetDatum(id), ObjectIdGetDatum(id) } ;
+   int stat,i,j;
+   text *ptr;
+   text *prsname=NULL;
+   MemoryContext   oldcontext;
+
+   memset(cfg,0,sizeof(TSCfgInfo));
+   SPI_connect();
+   if ( !plan_getcfg ) {
+       plan_getcfg = SPI_saveplan( SPI_prepare( "select prs_name from pg_ts_cfg where oid = $1" , 1, arg ) );
+       if ( !plan_getcfg ) 
+           ts_error(ERROR, "SPI_prepare() failed");
+   }
+
+   stat = SPI_execp(plan_getcfg, pars, " ", 1);
+   if ( stat < 0 )
+       ts_error (ERROR, "SPI_execp return %d", stat);
+   if ( SPI_processed > 0 ) {
+       prsname = (text*) DatumGetPointer( 
+           SPI_getbinval(SPI_tuptable->vals[0], SPI_tuptable->tupdesc, 1, &isnull) 
+       );
+       oldcontext = MemoryContextSwitchTo(TopMemoryContext);
+       prsname = ptextdup( prsname );
+       MemoryContextSwitchTo(oldcontext);
+       
+       cfg->id=id;
+   } else 
+       ts_error(ERROR, "No tsearch cfg with id %d", id);
+
+   arg[0]=TEXTOID;
+   if ( !plan_getmap ) {
+       plan_getmap = SPI_saveplan( SPI_prepare( "select lt.tokid, pg_ts_cfgmap.dict_name from pg_ts_cfgmap, pg_ts_cfg, token_type( $1 ) as lt where lt.alias = pg_ts_cfgmap.tok_alias and pg_ts_cfgmap.ts_name = pg_ts_cfg.ts_name and pg_ts_cfg.oid= $2 order by lt.tokid desc;" , 2, arg ) );
+       if ( !plan_getmap )
+           ts_error(ERROR, "SPI_prepare() failed");
+   }
+
+   pars[0]=PointerGetDatum( prsname );
+   stat = SPI_execp(plan_getmap, pars, " ", 0);
+   if ( stat < 0 )
+       ts_error (ERROR, "SPI_execp return %d", stat);
+   if ( SPI_processed <= 0 )
+       ts_error(ERROR, "No parser with id %d", id);
+
+   for(i=0;i
+       int lexid = DatumGetInt32(SPI_getbinval(SPI_tuptable->vals[i], SPI_tuptable->tupdesc, 1, &isnull));
+       ArrayType *toasted_a = (ArrayType*)PointerGetDatum(SPI_getbinval(SPI_tuptable->vals[i], SPI_tuptable->tupdesc, 2, &isnull));
+       ArrayType *a;
+
+       if ( !cfg->map ) {
+           cfg->len=lexid+1;
+           cfg->map = (ListDictionary*)malloc( sizeof(ListDictionary)*cfg->len );
+           if ( !cfg->map )
+               ts_error(ERROR,"No memory");
+           memset( cfg->map, 0, sizeof(ListDictionary)*cfg->len );
+       }
+
+       if (isnull)
+           continue;
+
+       a=(ArrayType*)PointerGetDatum( PG_DETOAST_DATUM( DatumGetPointer(toasted_a) ) );
+       
+       if ( ARR_NDIM(a) != 1 )
+           ts_error(ERROR,"Wrong dimension");
+       if ( ARRNELEMS(a) < 1 )
+           continue;
+
+       cfg->map[lexid].len=ARRNELEMS(a);
+       cfg->map[lexid].dict_id=(Datum*)malloc( sizeof(Datum)*cfg->map[lexid].len );
+       memset(cfg->map[lexid].dict_id,0,sizeof(Datum)*cfg->map[lexid].len );
+       ptr=(text*)ARR_DATA_PTR(a);
+       oldcontext = MemoryContextSwitchTo(TopMemoryContext);
+       for(j=0;jmap[lexid].len;j++) {
+           cfg->map[lexid].dict_id[j] = PointerGetDatum(ptextdup(ptr));
+           ptr=NEXTVAL(ptr);
+       } 
+       MemoryContextSwitchTo(oldcontext);
+
+       if ( a != toasted_a ) 
+           pfree(a);
+   }
+   
+   SPI_finish();
+   cfg->prs_id = name2id_prs( prsname );
+   pfree(prsname);
+   for(i=0;ilen;i++) {
+       for(j=0;jmap[i].len;j++) {
+           ptr = (text*)DatumGetPointer( cfg->map[i].dict_id[j] );
+           cfg->map[i].dict_id[j] = ObjectIdGetDatum( name2id_dict(ptr) );
+           pfree(ptr);
+       }
+   }
+}
+
+typedef struct {
+   TSCfgInfo   *last_cfg;
+   int     len;
+   int     reallen;
+   TSCfgInfo   *list;
+   SNMap       name2id_map;
+} CFGList;
+
+static CFGList CList = {NULL,0,0,NULL,{0,0,NULL}};
+
+void
+reset_cfg(void) {
+        freeSNMap( &(CList.name2id_map) );
+        if ( CList.list ) {
+       int i,j;
+       for(i=0;i
+           if ( CList.list[i].map ) {
+               for(j=0;j
+                   if ( CList.list[i].map[j].dict_id )
+                       free(CList.list[i].map[j].dict_id);
+               free( CList.list[i].map );
+           }
+                free(CList.list);
+   }
+        memset(&CList,0,sizeof(CFGList));
+}
+
+static int
+comparecfg(const void *a, const void *b) {
+   return ((TSCfgInfo*)a)->id - ((TSCfgInfo*)b)->id;
+}
+
+TSCfgInfo *
+findcfg(Oid id) {
+   /* last used cfg */
+   if ( CList.last_cfg && CList.last_cfg->id==id )
+       return CList.last_cfg;
+
+   /* already used cfg */
+   if ( CList.len != 0 ) {
+       TSCfgInfo key;
+       key.id=id;
+       CList.last_cfg = bsearch(&key, CList.list, CList.len, sizeof(TSCfgInfo), comparecfg);
+       if ( CList.last_cfg != NULL )
+           return CList.last_cfg;
+   }
+
+   /* last chance */
+   if ( CList.len==CList.reallen ) {
+       TSCfgInfo *tmp;
+       int reallen = ( CList.reallen ) ? 2*CList.reallen : 16;
+       tmp=(TSCfgInfo*)realloc(CList.list,sizeof(TSCfgInfo)*reallen);
+       if ( !tmp ) 
+           ts_error(ERROR,"No memory");
+       CList.reallen=reallen;
+       CList.list=tmp;
+   }
+   CList.last_cfg=&(CList.list[CList.len]);
+   init_cfg(id, CList.last_cfg);
+   CList.len++;
+   qsort(CList.list, CList.len, sizeof(TSCfgInfo), comparecfg);
+   return findcfg(id); /* qsort changed order!! */;
+}
+
+
+Oid
+name2id_cfg(text *name) {
+   Oid arg[1]={ TEXTOID };
+   bool isnull;
+   Datum pars[1]={ PointerGetDatum(name) };
+   int stat;
+   Oid id=findSNMap_t( &(CList.name2id_map), name );
+   
+   if ( id ) 
+       return id;
+   
+   SPI_connect();
+   if ( !plan_name2id ) {
+       plan_name2id = SPI_saveplan( SPI_prepare( "select oid from pg_ts_cfg where ts_name = $1" , 1, arg ) );
+       if ( !plan_name2id ) 
+           elog(ERROR, "SPI_prepare() failed");
+   }
+
+   stat = SPI_execp(plan_name2id, pars, " ", 1);
+   if ( stat < 0 )
+       elog (ERROR, "SPI_execp return %d", stat);
+   if ( SPI_processed > 0 ) {
+       id=DatumGetObjectId( SPI_getbinval(SPI_tuptable->vals[0], SPI_tuptable->tupdesc, 1, &isnull) );
+       if ( isnull ) 
+           elog(ERROR, "Null id for tsearch config");
+   } else 
+       elog(ERROR, "No tsearch config");
+   SPI_finish();
+   addSNMap_t( &(CList.name2id_map), name, id );
+   return id;
+}
+
+
+void 
+parsetext_v2(TSCfgInfo *cfg, PRSTEXT * prs, char *buf, int4 buflen) {
+   int type, lenlemm, i;
+   char    *lemm=NULL;
+   WParserInfo *prsobj = findprs(cfg->prs_id);
+
+   prsobj->prs=(void*)DatumGetPointer(
+       FunctionCall2(
+           &(prsobj->start_info),
+           PointerGetDatum(buf),
+           Int32GetDatum(buflen)
+       )
+   );
+
+   while( ( type=DatumGetInt32(FunctionCall3(
+           &(prsobj->getlexeme_info),
+           PointerGetDatum(prsobj->prs),
+           PointerGetDatum(&lemm),
+           PointerGetDatum(&lenlemm))) ) != 0 ) {
+
+       if ( lenlemm >= MAXSTRLEN )
+           elog(ERROR, "Word is too long");
+
+
+       if ( type >= cfg->len ) /* skip this type of lexem */
+           continue; 
+
+       for(i=0;imap[type].len;i++) {
+           DictInfo    *dict=finddict( DatumGetObjectId(cfg->map[type].dict_id[i]) );
+           char    **norms, **ptr;
+   
+           norms = ptr = (char**)DatumGetPointer(
+               FunctionCall3(
+                   &(dict->lexize_info),
+                   PointerGetDatum(dict->dictionary),
+                   PointerGetDatum(lemm),
+                   PointerGetDatum(lenlemm)
+               )
+           );
+           if ( !norms ) /* dictionary doesn't know this lexem */
+               continue;
+
+           prs->pos++; /*set pos*/
+
+           while( *ptr ) {
+               if (prs->curwords == prs->lenwords) {
+                   prs->lenwords *= 2;
+                   prs->words = (WORD *) repalloc((void *) prs->words, prs->lenwords * sizeof(WORD));
+               }
+
+               prs->words[prs->curwords].len = strlen(*ptr);
+               prs->words[prs->curwords].word = *ptr;
+               prs->words[prs->curwords].alen = 0;
+               prs->words[prs->curwords].pos.pos = LIMITPOS(prs->pos);
+               ptr++;
+               prs->curwords++;
+           }
+           pfree(norms);
+           break; /* lexem already normalized or is stop word*/
+       }
+   }
+
+   FunctionCall1(
+       &(prsobj->end_info),
+       PointerGetDatum(prsobj->prs)
+   );
+}
+
+static void
+hladdword(HLPRSTEXT * prs, char *buf, int4 buflen, int type) {
+   while (prs->curwords >= prs->lenwords) {
+       prs->lenwords *= 2;
+       prs->words = (HLWORD *) repalloc((void *) prs->words, prs->lenwords * sizeof(HLWORD));
+   }
+   memset( &(prs->words[prs->curwords]), 0, sizeof(HLWORD) ); 
+   prs->words[prs->curwords].type = (uint8)type;
+   prs->words[prs->curwords].len = buflen; 
+   prs->words[prs->curwords].word = palloc(buflen);
+   memcpy(prs->words[prs->curwords].word, buf, buflen);
+   prs->curwords++;    
+}
+
+static void
+hlfinditem(HLPRSTEXT * prs, QUERYTYPE *query, char *buf, int buflen ) {
+   int i;
+   ITEM    *item=GETQUERY(query);
+   HLWORD  *word=&( prs->words[prs->curwords-1] );
+
+   while (prs->curwords + query->size >= prs->lenwords) {
+       prs->lenwords *= 2;
+       prs->words = (HLWORD *) repalloc((void *) prs->words, prs->lenwords * sizeof(HLWORD));
+   }
+
+   for(i=0; isize; i++) { 
+       if ( item->type == VAL && item->length == buflen && strncmp( GETOPERAND(query) + item->distance, buf, buflen )==0 ) {
+           if ( word->item ) {
+               memcpy( &(prs->words[prs->curwords]), word, sizeof(HLWORD) );
+               prs->words[prs->curwords].item=item;
+               prs->words[prs->curwords].repeated=1;
+               prs->curwords++;
+           } else 
+               word->item=item;    
+       }
+       item++;
+   }
+}
+
+void 
+hlparsetext(TSCfgInfo *cfg, HLPRSTEXT * prs, QUERYTYPE *query, char *buf, int4 buflen) {
+   int type, lenlemm, i;
+   char    *lemm=NULL;
+   WParserInfo *prsobj = findprs(cfg->prs_id);
+
+   prsobj->prs=(void*)DatumGetPointer(
+       FunctionCall2(
+           &(prsobj->start_info),
+           PointerGetDatum(buf),
+           Int32GetDatum(buflen)
+       )
+   );
+
+   while( ( type=DatumGetInt32(FunctionCall3(
+           &(prsobj->getlexeme_info),
+           PointerGetDatum(prsobj->prs),
+           PointerGetDatum(&lemm),
+           PointerGetDatum(&lenlemm))) ) != 0 ) {
+
+       if ( lenlemm >= MAXSTRLEN )
+           elog(ERROR, "Word is too long");
+
+       hladdword(prs,lemm,lenlemm,type);
+
+       if ( type >= cfg->len ) 
+           continue; 
+
+       for(i=0;imap[type].len;i++) {
+           DictInfo    *dict=finddict( DatumGetObjectId(cfg->map[type].dict_id[i]) );
+           char    **norms, **ptr;
+   
+           norms = ptr = (char**)DatumGetPointer(
+               FunctionCall3(
+                   &(dict->lexize_info),
+                   PointerGetDatum(dict->dictionary),
+                   PointerGetDatum(lemm),
+                   PointerGetDatum(lenlemm)
+               )
+           );
+           if ( !norms ) /* dictionary doesn't know this lexem */
+               continue;
+
+           while( *ptr ) {
+               hlfinditem(prs,query,*ptr,strlen(*ptr));
+               pfree(*ptr);
+               ptr++;
+           }
+           pfree(norms);
+           break; /* lexem already normalized or is stop word*/
+       }
+   }
+
+   FunctionCall1(
+       &(prsobj->end_info),
+       PointerGetDatum(prsobj->prs)
+   );
+}
+
+text* 
+genhl(HLPRSTEXT * prs) {
+   text *out;
+   int len=128;
+   char *ptr;
+   HLWORD  *wrd=prs->words;
+
+   out = (text*)palloc( len );
+   ptr=((char*)out) + VARHDRSZ;
+
+   while( wrd - prs->words < prs->curwords ) {
+       while (  wrd->len + prs->stopsellen + prs->startsellen + (ptr - ((char*)out)) >= len ) {
+           int dist = ptr - ((char*)out);
+           len*= 2;
+           out = (text *) repalloc(out, len);
+           ptr=((char*)out) + dist;
+       }
+
+       if ( wrd->in && !wrd->skip && !wrd->repeated ) {
+           if ( wrd->replace ) {
+               *ptr=' ';
+               ptr++;
+           } else {
+               if (wrd->selected) {
+                   memcpy(ptr,prs->startsel,prs->startsellen);
+                   ptr+=prs->startsellen;
+               }
+               memcpy(ptr,wrd->word,wrd->len);
+               ptr+=wrd->len;
+               if (wrd->selected) {
+                   memcpy(ptr,prs->stopsel,prs->stopsellen);
+                   ptr+=prs->stopsellen;
+               }
+           }
+       }
+
+       if ( !wrd->repeated )
+           pfree(wrd->word);
+
+       wrd++;
+   }
+
+   VARATT_SIZEP(out)=ptr - ((char*)out);
+   return out; 
+}
+
+int  
+get_currcfg(void) {
+   Oid arg[1]={ TEXTOID };
+   const char *curlocale;
+   Datum pars[1];
+   bool isnull;
+   int stat;
+
+   if ( current_cfg_id > 0 )
+       return current_cfg_id;
+
+   SPI_connect();
+   if ( !plan_getcfg_bylocale ) {
+       plan_getcfg_bylocale=SPI_saveplan( SPI_prepare( "select oid from pg_ts_cfg where locale = $1 ", 1, arg ) );
+       if ( !plan_getcfg_bylocale )
+           elog(ERROR, "SPI_prepare() failed");
+   }
+
+   curlocale = setlocale(LC_CTYPE, NULL);
+   pars[0] = PointerGetDatum( char2text((char*)curlocale) );
+   stat = SPI_execp(plan_getcfg_bylocale, pars, " ", 1);
+
+   if ( stat < 0 )
+       elog (ERROR, "SPI_execp return %d", stat);
+   if ( SPI_processed > 0 )
+       current_cfg_id = DatumGetObjectId( SPI_getbinval(SPI_tuptable->vals[0], SPI_tuptable->tupdesc, 1, &isnull) );
+   else 
+       elog(ERROR,"Can't find tsearch config by locale");
+
+   pfree(DatumGetPointer(pars[0]));
+   SPI_finish();
+   return current_cfg_id;
+}
+
+PG_FUNCTION_INFO_V1(set_curcfg);
+Datum set_curcfg(PG_FUNCTION_ARGS);
+Datum
+set_curcfg(PG_FUNCTION_ARGS) {
+        findcfg(PG_GETARG_OID(0));
+        current_cfg_id=PG_GETARG_OID(0);
+        PG_RETURN_VOID();
+}
+                
+PG_FUNCTION_INFO_V1(set_curcfg_byname);
+Datum set_curcfg_byname(PG_FUNCTION_ARGS);
+Datum
+set_curcfg_byname(PG_FUNCTION_ARGS) {
+        text *name=PG_GETARG_TEXT_P(0);
+   
+        DirectFunctionCall1(
+                set_curcfg,
+                ObjectIdGetDatum( name2id_cfg(name) )
+        );
+        PG_FREE_IF_COPY(name, 0);
+        PG_RETURN_VOID();      
+}       
+
+PG_FUNCTION_INFO_V1(show_curcfg);
+Datum show_curcfg(PG_FUNCTION_ARGS);
+Datum
+show_curcfg(PG_FUNCTION_ARGS) {
+   PG_RETURN_OID( get_currcfg() ); 
+}
+
+PG_FUNCTION_INFO_V1(reset_tsearch);
+Datum reset_tsearch(PG_FUNCTION_ARGS);
+Datum
+reset_tsearch(PG_FUNCTION_ARGS) {
+   ts_error(NOTICE,"TSearch cache cleaned");
+   PG_RETURN_VOID(); 
+}


diff --git a/contrib/tsearch2/ts_cfg.h b/contrib/tsearch2/ts_cfg.h

new file mode 100644 (file)

index 0000000..01006c1


--- /dev/null
+++ b/contrib/tsearch2/ts_cfg.h
@@ -0,0 +1,68 @@
+#ifndef __TS_CFG_H__
+#define __TS_CFG_H__
+#include "postgres.h"
+#include "query.h"
+
+typedef struct {
+   int len;
+   Datum   *dict_id;
+} ListDictionary;
+
+typedef struct {
+   Oid id;
+   Oid prs_id;
+   int len;
+   ListDictionary  *map;   
+}  TSCfgInfo;
+
+Oid name2id_cfg(text *name);
+TSCfgInfo * findcfg(Oid id);
+void init_cfg(Oid id, TSCfgInfo *cfg);
+void reset_cfg(void);
+
+typedef struct {
+        uint16          len;
+   union {
+       uint16      pos;
+       uint16      *apos;
+   } pos;
+        char       *word;
+   uint32  alen;
+}       WORD;
+   
+typedef struct {
+        WORD       *words;
+        int4            lenwords;
+        int4            curwords;
+   int4        pos;
+}       PRSTEXT;
+
+typedef struct {
+        uint16    len;
+   uint8    selected:1,
+         in:1,
+         skip:1,
+         replace:1,
+         repeated:1;
+   uint8   type;
+        char      *word;
+   ITEM      *item;
+}       HLWORD;
+   
+typedef struct {
+        HLWORD       *words;
+        int4            lenwords;
+        int4            curwords;
+        char           *startsel;
+        char            *stopsel;
+        int2            startsellen;
+        int2            stopsellen;
+}       HLPRSTEXT;
+
+void hlparsetext(TSCfgInfo *cfg, HLPRSTEXT * prs, QUERYTYPE *query, char *buf, int4 buflen);
+text* genhl(HLPRSTEXT * prs);
+
+void parsetext_v2(TSCfgInfo *cfg, PRSTEXT * prs, char *buf, int4 buflen);
+int  get_currcfg(void);
+
+#endif


diff --git a/contrib/tsearch2/ts_stat.c b/contrib/tsearch2/ts_stat.c

new file mode 100644 (file)

index 0000000..9099981


--- /dev/null
+++ b/contrib/tsearch2/ts_stat.c
@@ -0,0 +1,412 @@
+/*
+ * stat functions
+ */
+
+#include "tsvector.h"
+#include "ts_stat.h"
+#include "funcapi.h"
+#include "catalog/pg_type.h"
+#include "executor/spi.h"
+#include "common.h"
+
+PG_FUNCTION_INFO_V1(tsstat_in);
+Datum           tsstat_in(PG_FUNCTION_ARGS);
+Datum           
+tsstat_in(PG_FUNCTION_ARGS) {
+   tsstat *stat=palloc(STATHDRSIZE);
+   stat->len=STATHDRSIZE;
+   stat->size=0;
+   PG_RETURN_POINTER(stat);
+}
+
+PG_FUNCTION_INFO_V1(tsstat_out);
+Datum           tsstat_out(PG_FUNCTION_ARGS);
+Datum           
+tsstat_out(PG_FUNCTION_ARGS) {
+   elog(ERROR,"Unimplemented");
+   PG_RETURN_NULL();
+}
+
+static WordEntry**
+SEI_realloc( WordEntry** in, uint32 *len ) {
+   if ( *len==0 || in==NULL ) {
+       *len=8;
+       in=palloc( sizeof(WordEntry*)* (*len) );
+   } else {
+       *len *= 2;
+       in=repalloc( in, sizeof(WordEntry*)* (*len) );
+   }
+   return in;
+}
+
+static int
+compareStatWord(StatEntry *a, WordEntry *b, tsstat *stat, tsvector *txt) {
+   if ( a->len == b->len ) 
+       return strncmp(
+           STATSTRPTR(stat) + a->pos,
+           STRPTR(txt) + b->pos,
+           a->len
+       );
+   return ( a->len > b->len ) ? 1 : -1;
+}
+
+static tsstat*
+formstat(tsstat *stat, tsvector *txt, WordEntry** entry, uint32 len) {
+   tsstat  *newstat;
+   uint32 totallen, nentry;
+   uint32  slen=0;
+   WordEntry   **ptr=entry;
+   char    *curptr;
+   StatEntry   *sptr,*nptr;
+
+   while(ptr-entry
+       slen += (*ptr)->len;
+       ptr++;
+   }
+
+   nentry=stat->size + len;
+   slen+=STATSTRSIZE(stat);
+   totallen=CALCSTATSIZE(nentry,slen);
+   newstat=palloc(totallen);
+   newstat->len=totallen;
+   newstat->size=nentry;
+
+   memcpy(STATSTRPTR(newstat), STATSTRPTR(stat), STATSTRSIZE(stat));
+   curptr=STATSTRPTR(newstat) + STATSTRSIZE(stat);
+
+   ptr=entry;
+   sptr=STATPTR(stat);
+   nptr=STATPTR(newstat);
+
+   if ( len == 1 ) {
+       StatEntry *StopLow = STATPTR(stat);
+       StatEntry *StopHigh = (StatEntry*)STATSTRPTR(stat);
+
+       while (StopLow < StopHigh) {
+           sptr=StopLow + (StopHigh - StopLow) / 2;
+           if ( compareStatWord(sptr,*ptr,stat,txt) < 0 )
+               StopLow = sptr + 1;
+           else
+               StopHigh = sptr; 
+       }
+       nptr =STATPTR(newstat) + (StopLow-STATPTR(stat));
+       memcpy( STATPTR(newstat), STATPTR(stat), sizeof(StatEntry) * (StopLow-STATPTR(stat)) );
+       nptr->nentry=POSDATALEN(txt,*ptr);
+       if ( nptr->nentry==0 )
+               nptr->nentry=1; 
+       nptr->ndoc=1;
+       nptr->len=(*ptr)->len;
+       memcpy(curptr, STRPTR(txt) + (*ptr)->pos, nptr->len);
+       nptr->pos = curptr - STATSTRPTR(newstat);
+       memcpy( nptr+1, StopLow, sizeof(StatEntry) * ( ((StatEntry*)STATSTRPTR(stat))-StopLow ) );
+   } else {
+       while( sptr-STATPTR(stat) < stat->size && ptr-entry
+           if ( compareStatWord(sptr,*ptr,stat,txt) < 0 ) {
+               memcpy(nptr, sptr, sizeof(StatEntry));
+               sptr++;
+           } else {
+               nptr->nentry=POSDATALEN(txt,*ptr);
+               if ( nptr->nentry==0 )
+                   nptr->nentry=1; 
+               nptr->ndoc=1;
+               nptr->len=(*ptr)->len;
+               memcpy(curptr, STRPTR(txt) + (*ptr)->pos, nptr->len);
+               nptr->pos = curptr - STATSTRPTR(newstat);
+               curptr += nptr->len;
+               ptr++;
+           }
+           nptr++;
+       }
+
+       memcpy( nptr, sptr, sizeof(StatEntry)*( stat->size - (sptr-STATPTR(stat)) ) ); 
+       
+       while(ptr-entry
+           nptr->nentry=POSDATALEN(txt,*ptr);
+           if ( nptr->nentry==0 )
+               nptr->nentry=1; 
+           nptr->ndoc=1;
+           nptr->len=(*ptr)->len;
+           memcpy(curptr, STRPTR(txt) + (*ptr)->pos, nptr->len);
+           nptr->pos = curptr - STATSTRPTR(newstat);
+           curptr += nptr->len;
+           ptr++; nptr++;
+       }
+   }
+
+   return newstat;
+} 
+
+PG_FUNCTION_INFO_V1(ts_accum);
+Datum           ts_accum(PG_FUNCTION_ARGS);
+Datum 
+ts_accum(PG_FUNCTION_ARGS) {
+   tsstat *newstat,*stat= (tsstat*)PG_GETARG_POINTER(0);
+   tsvector  *txt = (tsvector *) PG_DETOAST_DATUM(PG_GETARG_DATUM(1));
+   WordEntry   **newentry=NULL;
+   uint32  len=0, cur=0;
+   StatEntry   *sptr;
+   WordEntry   *wptr;
+
+   if ( stat==NULL || PG_ARGISNULL(0) ) { /* Init in first */ 
+       stat=palloc(STATHDRSIZE);
+       stat->len=STATHDRSIZE;
+       stat->size=0;
+   }
+
+   /* simple check of correctness */
+   if ( txt==NULL || PG_ARGISNULL(1) || txt->size==0 ) {
+       PG_FREE_IF_COPY(txt,1); 
+       PG_RETURN_POINTER(stat);
+   }
+
+   sptr=STATPTR(stat);
+   wptr=ARRPTR(txt);
+
+   if ( stat->size < 100*txt->size ) { /* merge */
+       while( sptr-STATPTR(stat) < stat->size && wptr-ARRPTR(txt) < txt->size ) {
+           int cmp = compareStatWord(sptr,wptr,stat,txt);
+           if ( cmp<0 ) {
+               sptr++;
+           } else if ( cmp==0 ) {
+               int n=POSDATALEN(txt,wptr);
+   
+               if (n==0) n=1;
+               sptr->ndoc++;
+               sptr->nentry +=n ;
+               sptr++; wptr++;
+           } else {
+               if ( cur==len )
+                   newentry=SEI_realloc(newentry, &len);
+               newentry[cur]=wptr;
+               wptr++; cur++;
+           }
+       }
+
+       while( wptr-ARRPTR(txt) < txt->size ) {
+           if ( cur==len )
+               newentry=SEI_realloc(newentry, &len);
+           newentry[cur]=wptr;
+           wptr++; cur++;
+       }
+   } else { /* search */
+       while( wptr-ARRPTR(txt) < txt->size ) {
+           StatEntry *StopLow = STATPTR(stat);
+           StatEntry *StopHigh = (StatEntry*)STATSTRPTR(stat);
+           int cmp;
+
+           while (StopLow < StopHigh) {
+               sptr=StopLow + (StopHigh - StopLow) / 2;
+               cmp =  compareStatWord(sptr,wptr,stat,txt);
+               if (cmp==0) {
+                   int n=POSDATALEN(txt,wptr);
+                   if (n==0) n=1;
+                   sptr->ndoc++;
+                   sptr->nentry +=n ;
+                   break;
+               } else if ( cmp < 0 )
+                   StopLow = sptr + 1;
+               else
+                   StopHigh = sptr; 
+           }
+       
+           if ( StopLow >= StopHigh ) { /* not found */
+               if ( cur==len )
+                   newentry=SEI_realloc(newentry, &len);
+               newentry[cur]=wptr;
+               cur++;
+           }
+           wptr++;
+       }   
+   }
+
+   
+   if ( cur==0 ) { /* no new words */ 
+       PG_FREE_IF_COPY(txt,1);
+       PG_RETURN_POINTER(stat);
+   }
+
+   newstat = formstat(stat, txt, newentry, cur);
+   pfree(newentry);
+   PG_FREE_IF_COPY(txt,1);
+   /* pfree(stat); */
+
+   PG_RETURN_POINTER(newstat);
+}
+
+typedef struct {
+   uint32  cur;
+   tsvector *stat;
+} StatStorage;
+
+static void
+ts_setup_firstcall(FuncCallContext  *funcctx, tsstat *stat) {
+   TupleDesc            tupdesc;
+   MemoryContext     oldcontext;
+   StatStorage     *st;
+   
+   oldcontext = MemoryContextSwitchTo(funcctx->multi_call_memory_ctx);
+   st=palloc( sizeof(StatStorage) );
+   st->cur=0;
+   st->stat=palloc( stat->len );
+   memcpy(st->stat, stat, stat->len);
+   funcctx->user_fctx = (void*)st;
+   tupdesc = RelationNameGetTupleDesc("statinfo");
+   funcctx->slot = TupleDescGetSlot(tupdesc);
+   funcctx->attinmeta = TupleDescGetAttInMetadata(tupdesc);
+   MemoryContextSwitchTo(oldcontext);
+}
+
+
+static Datum
+ts_process_call(FuncCallContext  *funcctx) {
+   StatStorage     *st;
+   st=(StatStorage*)funcctx->user_fctx;
+
+   if ( st->cur < st->stat->size ) {
+       Datum result;
+       char* values[3];
+       char    ndoc[16];
+       char    nentry[16];
+       StatEntry *entry=STATPTR(st->stat) + st->cur;
+       HeapTuple    tuple;
+
+       values[1]=ndoc;
+       sprintf(ndoc,"%d",entry->ndoc);
+       values[2]=nentry;
+       sprintf(nentry,"%d",entry->nentry);
+       values[0]=palloc( entry->len+1 );
+       memcpy( values[0], STATSTRPTR(st->stat)+entry->pos, entry->len);
+       (values[0])[entry->len]='\0';
+
+       tuple = BuildTupleFromCStrings(funcctx->attinmeta, values);
+       result = TupleGetDatum(funcctx->slot, tuple);
+
+       pfree(values[0]);
+       st->cur++;
+       return result;  
+   } else {
+       pfree(st->stat);
+       pfree(st);
+   }
+   
+   return (Datum)0;
+}
+
+PG_FUNCTION_INFO_V1(ts_accum_finish);
+Datum           ts_accum_finish(PG_FUNCTION_ARGS);
+Datum 
+ts_accum_finish(PG_FUNCTION_ARGS) {
+   FuncCallContext  *funcctx;
+   Datum result;
+
+   if (SRF_IS_FIRSTCALL()) {
+       funcctx = SRF_FIRSTCALL_INIT();
+       ts_setup_firstcall(funcctx, (tsstat*)PG_GETARG_POINTER(0) );
+   }
+
+   funcctx = SRF_PERCALL_SETUP();
+   if (  (result=ts_process_call(funcctx)) != (Datum)0 )
+       SRF_RETURN_NEXT(funcctx, result);
+   SRF_RETURN_DONE(funcctx);
+}
+
+static Oid tiOid=InvalidOid;
+static void 
+get_ti_Oid(void) {
+   int ret;
+   bool isnull; 
+
+   if ( (ret = SPI_exec("select oid from pg_type where typname='tsvector'",1)) < 0 )   
+       elog(ERROR, "SPI_exec to get tsvector oid returns %d", ret);
+
+   if ( SPI_processed<0 )
+       elog(ERROR, "There is no tsvector type");
+   tiOid = DatumGetObjectId( SPI_getbinval(SPI_tuptable->vals[0], SPI_tuptable->tupdesc, 1, &isnull) );
+   if ( tiOid==InvalidOid )
+       elog(ERROR, "tsvector type has InvalidOid");
+}
+
+static tsstat*
+ts_stat_sql(text *txt) {
+   char *query=text2char(txt);
+   int i;
+   tsstat *newstat,*stat;
+   bool isnull;
+   Portal portal;
+   void    *plan;
+
+   if ( tiOid==InvalidOid ) 
+       get_ti_Oid();
+
+   if ( (plan = SPI_prepare(query,0,NULL))==NULL )
+       elog(ERROR, "SPI_prepare('%s') returns NULL",query);
+
+   if ( (portal = SPI_cursor_open(NULL, plan, NULL, NULL)) == NULL )
+       elog(ERROR, "SPI_cursor_open('%s') returns NULL",query);
+
+   SPI_cursor_fetch(portal, true, 100);
+
+   if ( SPI_tuptable->tupdesc->natts != 1 )
+       elog(ERROR, "Number of fields doesn't equal to 1");
+
+   if ( SPI_gettypeid(SPI_tuptable->tupdesc, 1) != tiOid )
+       elog(ERROR, "Column isn't of tsvector type");
+
+   stat=palloc(STATHDRSIZE);
+   stat->len=STATHDRSIZE;
+   stat->size=0;
+
+   while(SPI_processed>0) {
+       for(i=0;i
+           Datum data=SPI_getbinval(SPI_tuptable->vals[i], SPI_tuptable->tupdesc, 1, &isnull);
+
+           if ( !isnull ) {
+               newstat = (tsstat*)DatumGetPointer(DirectFunctionCall2(
+                   ts_accum,
+                   PointerGetDatum(stat),
+                   data
+               ));
+               if ( stat!=newstat && stat )
+                   pfree(stat);
+               stat=newstat;
+           }
+       } 
+
+       SPI_freetuptable(SPI_tuptable);
+       SPI_cursor_fetch(portal, true, 100);        
+   }   
+
+   SPI_freetuptable(SPI_tuptable);
+   SPI_cursor_close(portal);
+   SPI_freeplan(plan);
+   pfree(query);
+
+   return stat;    
+}
+
+PG_FUNCTION_INFO_V1(ts_stat);
+Datum           ts_stat(PG_FUNCTION_ARGS);
+Datum 
+ts_stat(PG_FUNCTION_ARGS) {
+   FuncCallContext  *funcctx;
+   Datum result;
+
+   if (SRF_IS_FIRSTCALL()) {
+       tsstat *stat;
+       text    *txt=PG_GETARG_TEXT_P(0);
+   
+       funcctx = SRF_FIRSTCALL_INIT();
+       SPI_connect();
+       stat = ts_stat_sql(txt);
+       PG_FREE_IF_COPY(txt,0); 
+       ts_setup_firstcall(funcctx, stat );
+       SPI_finish();
+   }
+
+   funcctx = SRF_PERCALL_SETUP();
+   if (  (result=ts_process_call(funcctx)) != (Datum)0 )
+       SRF_RETURN_NEXT(funcctx, result);
+   SRF_RETURN_DONE(funcctx);
+}
+
+


diff --git a/contrib/tsearch2/ts_stat.h b/contrib/tsearch2/ts_stat.h

new file mode 100644 (file)

index 0000000..c32b17a


--- /dev/null
+++ b/contrib/tsearch2/ts_stat.h
@@ -0,0 +1,32 @@
+#ifndef __TXTIDX_STAT_H__
+#define __TXTIDX_STAT_H__
+
+#include "postgres.h"
+
+#include "access/gist.h"
+#include "access/itup.h"
+#include "utils/elog.h"
+#include "utils/palloc.h"
+#include "utils/builtins.h"
+#include "storage/bufpage.h"
+
+typedef struct {
+   uint32  len;
+   uint32  pos;
+   uint32  ndoc;   
+   uint32  nentry; 
+}  StatEntry;
+
+typedef struct {
+   int4        len;
+   int4        size;
+   char        data[1];
+}  tsstat;
+
+#define STATHDRSIZE (sizeof(int4)*2)
+#define CALCSTATSIZE(x, lenstr) ( x * sizeof(StatEntry) + STATHDRSIZE + lenstr )
+#define STATPTR(x) ( (StatEntry*) ( (char*)x + STATHDRSIZE ) )
+#define STATSTRPTR(x)  ( (char*)x + STATHDRSIZE + ( sizeof(StatEntry) * ((tsvector*)x)->size ) )
+#define STATSTRSIZE(x) ( ((tsvector*)x)->len - STATHDRSIZE - ( sizeof(StatEntry) * ((tsvector*)x)->size ) )
+
+#endif


diff --git a/contrib/tsearch2/tsearch.sql._in b/contrib/tsearch2/tsearch.sql._in

new file mode 100644 (file)

index 0000000..91ffbc8


--- /dev/null
+++ b/contrib/tsearch2/tsearch.sql._in
@@ -0,0 +1,674 @@
+-- Adjust this setting to control where the objects get CREATEd.
+SET search_path = public;
+
+BEGIN;
+
+--dict conf
+CREATE TABLE pg_ts_dict (
+   dict_name   text not null primary key,
+   dict_init   oid,
+   dict_initoption text,
+   dict_lexize oid not null,
+   dict_comment    text
+) with oids;
+
+--dict interface
+CREATE FUNCTION lexize(oid, text) 
+   returns _text
+   as 'MODULE_PATHNAME'
+   language 'C'
+   with (isstrict);
+
+CREATE FUNCTION lexize(text, text)
+        returns _text
+        as 'MODULE_PATHNAME', 'lexize_byname'
+        language 'C'
+        with (isstrict);
+
+CREATE FUNCTION lexize(text)
+        returns _text
+        as 'MODULE_PATHNAME', 'lexize_bycurrent'
+        language 'C'
+        with (isstrict);
+
+CREATE FUNCTION set_curdict(int)
+   returns void
+   as 'MODULE_PATHNAME'
+   language 'C'
+   with (isstrict);
+
+CREATE FUNCTION set_curdict(text)
+   returns void
+   as 'MODULE_PATHNAME', 'set_curdict_byname'
+   language 'C'
+   with (isstrict);
+
+--built-in dictionaries
+CREATE FUNCTION dex_init(text)
+   returns internal
+   as 'MODULE_PATHNAME' 
+   language 'C';
+
+CREATE FUNCTION dex_lexize(internal,internal,int4)
+   returns internal
+   as 'MODULE_PATHNAME'
+   language 'C'
+   with (isstrict);
+
+insert into pg_ts_dict select 
+   'simple', 
+   (select oid from pg_proc where proname='dex_init'),
+   null,
+   (select oid from pg_proc where proname='dex_lexize'),
+   'Simple example of dictionary.'
+;
+    
+CREATE FUNCTION snb_en_init(text)
+   returns internal
+   as 'MODULE_PATHNAME' 
+   language 'C';
+
+CREATE FUNCTION snb_lexize(internal,internal,int4)
+   returns internal
+   as 'MODULE_PATHNAME'
+   language 'C'
+   with (isstrict);
+
+insert into pg_ts_dict select 
+   'en_stem', 
+   (select oid from pg_proc where proname='snb_en_init'),
+   'DATA_PATH/english.stop',
+   (select oid from pg_proc where proname='snb_lexize'),
+   'English Stemmer. Snowball.'
+;
+
+CREATE FUNCTION snb_ru_init(text)
+   returns internal
+   as 'MODULE_PATHNAME' 
+   language 'C';
+
+insert into pg_ts_dict select 
+   'ru_stem', 
+   (select oid from pg_proc where proname='snb_ru_init'),
+   'DATA_PATH/russian.stop',
+   (select oid from pg_proc where proname='snb_lexize'),
+   'Russian Stemmer. Snowball.'
+;
+    
+CREATE FUNCTION spell_init(text)
+   returns internal
+   as 'MODULE_PATHNAME' 
+   language 'C';
+
+CREATE FUNCTION spell_lexize(internal,internal,int4)
+   returns internal
+   as 'MODULE_PATHNAME'
+   language 'C'
+   with (isstrict);
+
+insert into pg_ts_dict select 
+   'ispell_template', 
+   (select oid from pg_proc where proname='spell_init'),
+   null,
+   (select oid from pg_proc where proname='spell_lexize'),
+   'ISpell interface. Must have .dict and .aff files'
+;
+
+CREATE FUNCTION syn_init(text)
+   returns internal
+   as 'MODULE_PATHNAME' 
+   language 'C';
+
+CREATE FUNCTION syn_lexize(internal,internal,int4)
+   returns internal
+   as 'MODULE_PATHNAME'
+   language 'C'
+   with (isstrict);
+
+insert into pg_ts_dict select 
+   'synonym', 
+   (select oid from pg_proc where proname='syn_init'),
+   null,
+   (select oid from pg_proc where proname='syn_lexize'),
+   'Example of synonym dictionary'
+;
+
+--dict conf
+CREATE TABLE pg_ts_parser (
+   prs_name    text not null primary key,
+   prs_start   oid not null,
+   prs_nexttoken   oid not null,
+   prs_end     oid not null,
+   prs_headline    oid not null,
+   prs_lextype oid not null,
+   prs_comment text
+) with oids;
+
+--sql-level interface
+CREATE TYPE tokentype 
+   as (tokid int4, alias text, descr text); 
+
+CREATE FUNCTION token_type(int4)
+   returns setof tokentype
+   as 'MODULE_PATHNAME'
+   language 'C'
+   with (isstrict);
+
+CREATE FUNCTION token_type(text)
+   returns setof tokentype
+   as 'MODULE_PATHNAME', 'token_type_byname'
+   language 'C'
+   with (isstrict);
+
+CREATE FUNCTION token_type()
+   returns setof tokentype
+   as 'MODULE_PATHNAME', 'token_type_current'
+   language 'C'
+   with (isstrict);
+
+CREATE FUNCTION set_curprs(int)
+   returns void
+   as 'MODULE_PATHNAME'
+   language 'C'
+   with (isstrict);
+
+CREATE FUNCTION set_curprs(text)
+   returns void
+   as 'MODULE_PATHNAME', 'set_curprs_byname'
+   language 'C'
+   with (isstrict);
+
+CREATE TYPE tokenout 
+   as (tokid int4, token text);
+
+CREATE FUNCTION parse(oid,text)
+   returns setof tokenout
+   as 'MODULE_PATHNAME'
+   language 'C'
+   with (isstrict);
+ 
+CREATE FUNCTION parse(text,text)
+   returns setof tokenout
+   as 'MODULE_PATHNAME', 'parse_byname'
+   language 'C'
+   with (isstrict);
+ 
+CREATE FUNCTION parse(text)
+   returns setof tokenout
+   as 'MODULE_PATHNAME', 'parse_current'
+   language 'C'
+   with (isstrict);
+ 
+--default parser
+CREATE FUNCTION prsd_start(internal,int4)
+   returns internal
+   as 'MODULE_PATHNAME'
+   language 'C';
+
+CREATE FUNCTION prsd_getlexeme(internal,internal,internal)
+   returns int4
+   as 'MODULE_PATHNAME'
+   language 'C';
+
+CREATE FUNCTION prsd_end(internal)
+   returns void
+   as 'MODULE_PATHNAME'
+   language 'C';
+
+CREATE FUNCTION prsd_lextype(internal)
+   returns internal
+   as 'MODULE_PATHNAME'
+   language 'C';
+
+CREATE FUNCTION prsd_headline(internal,internal,internal)
+   returns internal
+   as 'MODULE_PATHNAME'
+   language 'C';
+
+insert into pg_ts_parser select
+   'default',
+   (select oid from pg_proc where proname='prsd_start'),   
+   (select oid from pg_proc where proname='prsd_getlexeme'),   
+   (select oid from pg_proc where proname='prsd_end'), 
+   (select oid from pg_proc where proname='prsd_headline'),
+   (select oid from pg_proc where proname='prsd_lextype'),
+   'Parser from OpenFTS v0.34'
+;  
+
+--tsearch config
+
+CREATE TABLE pg_ts_cfg (
+   ts_name     text not null primary key,
+   prs_name    text not null,
+   locale      text
+) with oids;
+
+CREATE TABLE pg_ts_cfgmap (
+   ts_name     text not null,
+   tok_alias   text not null,
+   dict_name   text[],
+   primary key (ts_name,tok_alias)
+) with oids;
+
+CREATE FUNCTION set_curcfg(int)
+   returns void
+   as 'MODULE_PATHNAME'
+   language 'C'
+   with (isstrict);
+
+CREATE FUNCTION set_curcfg(text)
+   returns void
+   as 'MODULE_PATHNAME', 'set_curcfg_byname'
+   language 'C'
+   with (isstrict);
+
+CREATE FUNCTION show_curcfg()
+   returns oid
+   as 'MODULE_PATHNAME'
+   language 'C'
+   with (isstrict);
+
+insert into pg_ts_cfg values ('default', 'default','C');
+insert into pg_ts_cfg values ('default_russian', 'default','ru_RU.KOI8-R');
+insert into pg_ts_cfg values ('simple', 'default');
+
+copy pg_ts_cfgmap from stdin;
+default    lword   {en_stem}
+default    nlword  {simple}
+default    word    {simple}
+default    email   {simple}
+default    url {simple}
+default    host    {simple}
+default    sfloat  {simple}
+default    version {simple}
+default    part_hword  {simple}
+default    nlpart_hword    {simple}
+default    lpart_hword {en_stem}
+default    hword   {simple}
+default    lhword  {en_stem}
+default    nlhword {simple}
+default    uri {simple}
+default    file    {simple}
+default    float   {simple}
+default    int {simple}
+default    uint    {simple}
+default_russian    lword   {en_stem}
+default_russian    nlword  {ru_stem}
+default_russian    word    {ru_stem}
+default_russian    email   {simple}
+default_russian    url {simple}
+default_russian    host    {simple}
+default_russian    sfloat  {simple}
+default_russian    version {simple}
+default_russian    part_hword  {simple}
+default_russian    nlpart_hword    {ru_stem}
+default_russian    lpart_hword {en_stem}
+default_russian    hword   {ru_stem}
+default_russian    lhword  {en_stem}
+default_russian    nlhword {ru_stem}
+default_russian    uri {simple}
+default_russian    file    {simple}
+default_russian    float   {simple}
+default_russian    int {simple}
+default_russian    uint    {simple}
+simple lword   {simple}
+simple nlword  {simple}
+simple word    {simple}
+simple email   {simple}
+simple url {simple}
+simple host    {simple}
+simple sfloat  {simple}
+simple version {simple}
+simple part_hword  {simple}
+simple nlpart_hword    {simple}
+simple lpart_hword {simple}
+simple hword   {simple}
+simple lhword  {simple}
+simple nlhword {simple}
+simple uri {simple}
+simple file    {simple}
+simple float   {simple}
+simple int {simple}
+simple uint    {simple}
+\.
+
+--tsvector type
+CREATE FUNCTION tsvector_in(cstring)
+RETURNS tsvector
+AS 'MODULE_PATHNAME'
+LANGUAGE 'C' with (isstrict);
+
+CREATE FUNCTION tsvector_out(tsvector)
+RETURNS cstring
+AS 'MODULE_PATHNAME'
+LANGUAGE 'C' with (isstrict);
+
+CREATE TYPE tsvector (
+        INTERNALLENGTH = -1,
+        INPUT = tsvector_in,
+        OUTPUT = tsvector_out,
+        STORAGE = extended
+);
+
+CREATE FUNCTION length(tsvector)
+RETURNS int4
+AS 'MODULE_PATHNAME', 'tsvector_length'
+LANGUAGE 'C' with (isstrict,iscachable);
+
+CREATE FUNCTION to_tsvector(oid, text)
+RETURNS tsvector
+AS 'MODULE_PATHNAME'
+LANGUAGE 'C' with (isstrict,iscachable);
+
+CREATE FUNCTION to_tsvector(text, text)
+RETURNS tsvector
+AS 'MODULE_PATHNAME', 'to_tsvector_name'
+LANGUAGE 'C' with (isstrict,iscachable);
+
+CREATE FUNCTION to_tsvector(text)
+RETURNS tsvector
+AS 'MODULE_PATHNAME', 'to_tsvector_current'
+LANGUAGE 'C' with (isstrict,iscachable);
+
+CREATE FUNCTION strip(tsvector)
+RETURNS tsvector
+AS 'MODULE_PATHNAME'
+LANGUAGE 'C' with (isstrict,iscachable);
+
+CREATE FUNCTION setweight(tsvector,"char")
+RETURNS tsvector
+AS 'MODULE_PATHNAME'
+LANGUAGE 'C' with (isstrict,iscachable);
+
+CREATE FUNCTION concat(tsvector,tsvector)
+RETURNS tsvector
+AS 'MODULE_PATHNAME'
+LANGUAGE 'C' with (isstrict,iscachable);
+
+CREATE OPERATOR || (
+        LEFTARG = tsvector,
+        RIGHTARG = tsvector,
+        PROCEDURE = concat
+);
+
+--query type
+CREATE FUNCTION tsquery_in(cstring)
+RETURNS tsquery
+AS 'MODULE_PATHNAME'
+LANGUAGE 'C' with (isstrict);
+
+CREATE FUNCTION tsquery_out(tsquery)
+RETURNS cstring
+AS 'MODULE_PATHNAME'
+LANGUAGE 'C' with (isstrict);
+
+CREATE TYPE tsquery (
+        INTERNALLENGTH = -1,
+        INPUT = tsquery_in,
+        OUTPUT = tsquery_out
+);
+
+CREATE FUNCTION querytree(tsquery)
+RETURNS text
+AS 'MODULE_PATHNAME', 'tsquerytree'
+LANGUAGE 'C' with (isstrict);
+
+CREATE FUNCTION to_tsquery(oid, text)
+RETURNS tsquery
+AS 'MODULE_PATHNAME'
+LANGUAGE 'c' with (isstrict,iscachable);
+
+CREATE FUNCTION to_tsquery(text, text)
+RETURNS tsquery
+AS 'MODULE_PATHNAME','to_tsquery_name'
+LANGUAGE 'c' with (isstrict,iscachable);
+
+CREATE FUNCTION to_tsquery(text)
+RETURNS tsquery
+AS 'MODULE_PATHNAME','to_tsquery_current'
+LANGUAGE 'c' with (isstrict,iscachable);
+
+--operations
+CREATE FUNCTION exectsq(tsvector, tsquery)
+RETURNS bool
+AS 'MODULE_PATHNAME'
+LANGUAGE 'C' with (isstrict, iscachable);
+  
+COMMENT ON FUNCTION exectsq(tsvector, tsquery) IS 'boolean operation with text index';
+
+CREATE FUNCTION rexectsq(tsquery, tsvector)
+RETURNS bool
+AS 'MODULE_PATHNAME'
+LANGUAGE 'C' with (isstrict, iscachable);
+
+COMMENT ON FUNCTION rexectsq(tsquery, tsvector) IS 'boolean operation with text index';
+
+CREATE OPERATOR @@ (
+        LEFTARG = tsvector,
+        RIGHTARG = tsquery,
+        PROCEDURE = exectsq,
+        COMMUTATOR = '@@',
+        RESTRICT = contsel,
+        JOIN = contjoinsel
+);
+CREATE OPERATOR @@ (
+        LEFTARG = tsquery,
+        RIGHTARG = tsvector,
+        PROCEDURE = rexectsq,
+        COMMUTATOR = '@@',
+        RESTRICT = contsel,
+        JOIN = contjoinsel
+);
+
+--Trigger
+CREATE FUNCTION tsearch2()
+RETURNS trigger
+AS 'MODULE_PATHNAME'
+LANGUAGE 'C';
+
+--Relevation
+CREATE FUNCTION rank(float4[], tsvector, tsquery)
+RETURNS float4
+AS 'MODULE_PATHNAME'
+LANGUAGE 'C' WITH (isstrict, iscachable);
+
+CREATE FUNCTION rank(float4[], tsvector, tsquery, int4)
+RETURNS float4
+AS 'MODULE_PATHNAME'
+LANGUAGE 'C' WITH (isstrict, iscachable);
+
+CREATE FUNCTION rank(tsvector, tsquery)
+RETURNS float4
+AS 'MODULE_PATHNAME', 'rank_def'
+LANGUAGE 'C' WITH (isstrict, iscachable);
+
+CREATE FUNCTION rank(tsvector, tsquery, int4)
+RETURNS float4
+AS 'MODULE_PATHNAME', 'rank_def'
+LANGUAGE 'C' WITH (isstrict, iscachable);
+
+CREATE FUNCTION rank_cd(int4, tsvector, tsquery)
+RETURNS float4
+AS 'MODULE_PATHNAME'
+LANGUAGE 'C' WITH (isstrict, iscachable);
+
+CREATE FUNCTION rank_cd(int4, tsvector, tsquery, int4)
+RETURNS float4
+AS 'MODULE_PATHNAME'
+LANGUAGE 'C' WITH (isstrict, iscachable);
+
+CREATE FUNCTION rank_cd(tsvector, tsquery)
+RETURNS float4
+AS 'MODULE_PATHNAME', 'rank_cd_def'
+LANGUAGE 'C' WITH (isstrict, iscachable);
+
+CREATE FUNCTION rank_cd(tsvector, tsquery, int4)
+RETURNS float4
+AS 'MODULE_PATHNAME', 'rank_cd_def'
+LANGUAGE 'C' WITH (isstrict, iscachable);
+
+CREATE FUNCTION headline(oid, text, tsquery, text)
+RETURNS text
+AS 'MODULE_PATHNAME', 'headline'
+LANGUAGE 'C' WITH (isstrict, iscachable);
+
+CREATE FUNCTION headline(oid, text, tsquery)
+RETURNS text
+AS 'MODULE_PATHNAME', 'headline'
+LANGUAGE 'C' WITH (isstrict, iscachable);
+
+CREATE FUNCTION headline(text, text, tsquery, text)
+RETURNS text
+AS 'MODULE_PATHNAME', 'headline_byname'
+LANGUAGE 'C' WITH (isstrict, iscachable);
+
+CREATE FUNCTION headline(text, text, tsquery)
+RETURNS text
+AS 'MODULE_PATHNAME', 'headline_byname'
+LANGUAGE 'C' WITH (isstrict, iscachable);
+
+CREATE FUNCTION headline(text, tsquery, text)
+RETURNS text
+AS 'MODULE_PATHNAME', 'headline_current'
+LANGUAGE 'C' WITH (isstrict, iscachable);
+
+CREATE FUNCTION headline(text, tsquery)
+RETURNS text
+AS 'MODULE_PATHNAME', 'headline_current'
+LANGUAGE 'C' WITH (isstrict, iscachable);
+
+--GiST
+--GiST key type 
+CREATE FUNCTION gtsvector_in(cstring)
+RETURNS gtsvector
+AS 'MODULE_PATHNAME'
+LANGUAGE 'C' with (isstrict);
+
+CREATE FUNCTION gtsvector_out(gtsvector)
+RETURNS cstring
+AS 'MODULE_PATHNAME'
+LANGUAGE 'C' with (isstrict);
+
+CREATE TYPE gtsvector (
+        INTERNALLENGTH = -1,
+        INPUT = gtsvector_in,
+        OUTPUT = gtsvector_out
+);
+
+-- support FUNCTIONs
+CREATE FUNCTION gtsvector_consistent(gtsvector,internal,int4)
+RETURNS bool
+AS 'MODULE_PATHNAME'
+LANGUAGE 'C';
+  
+CREATE FUNCTION gtsvector_compress(internal)
+RETURNS internal
+AS 'MODULE_PATHNAME'
+LANGUAGE 'C';
+
+CREATE FUNCTION gtsvector_decompress(internal)
+RETURNS internal
+AS 'MODULE_PATHNAME'
+LANGUAGE 'C';
+
+CREATE FUNCTION gtsvector_penalty(internal,internal,internal)
+RETURNS internal
+AS 'MODULE_PATHNAME'
+LANGUAGE 'C' with (isstrict);
+
+CREATE FUNCTION gtsvector_picksplit(internal, internal)
+RETURNS internal
+AS 'MODULE_PATHNAME'
+LANGUAGE 'C';
+
+CREATE FUNCTION gtsvector_union(bytea, internal)
+RETURNS _int4
+AS 'MODULE_PATHNAME'
+LANGUAGE 'C';
+
+CREATE FUNCTION gtsvector_same(gtsvector, gtsvector, internal)
+RETURNS internal
+AS 'MODULE_PATHNAME'
+LANGUAGE 'C';
+
+-- CREATE the OPERATOR class
+CREATE OPERATOR CLASS gist_tsvector_ops
+DEFAULT FOR TYPE tsvector USING gist
+AS
+        OPERATOR        1       @@ (tsvector, tsquery)  RECHECK ,
+        FUNCTION        1       gtsvector_consistent (gtsvector, internal, int4),
+        FUNCTION        2       gtsvector_union (bytea, internal),
+        FUNCTION        3       gtsvector_compress (internal),
+        FUNCTION        4       gtsvector_decompress (internal),
+        FUNCTION        5       gtsvector_penalty (internal, internal, internal),
+        FUNCTION        6       gtsvector_picksplit (internal, internal),
+        FUNCTION        7       gtsvector_same (gtsvector, gtsvector, internal),
+        STORAGE         gtsvector;
+
+
+--stat info
+CREATE TYPE statinfo 
+   as (word text, ndoc int4, nentry int4);
+
+--REATE FUNCTION tsstat_in(cstring)
+--RETURNS tsstat
+--AS 'MODULE_PATHNAME'
+--LANGUAGE 'C' with (isstrict);
+--
+--CREATE FUNCTION tsstat_out(tsstat)
+--RETURNS cstring
+--AS 'MODULE_PATHNAME'
+--LANGUAGE 'C' with (isstrict);
+--
+--CREATE TYPE tsstat (
+--        INTERNALLENGTH = -1,
+--        INPUT = tsstat_in,
+--        OUTPUT = tsstat_out,
+--        STORAGE = plain
+--);
+--
+--CREATE FUNCTION ts_accum(tsstat,tsvector)
+--RETURNS tsstat
+--AS 'MODULE_PATHNAME'
+--LANGUAGE 'C' with (isstrict);
+--
+--CREATE FUNCTION ts_accum_finish(tsstat)
+-- returns setof statinfo
+-- as 'MODULE_PATHNAME'
+-- language 'C'
+-- with (isstrict);
+--
+--CREATE AGGREGATE stat (
+-- BASETYPE=tsvector,
+-- SFUNC=ts_accum,
+-- STYPE=tsstat,
+-- FINALFUNC = ts_accum_finish,
+-- initcond = ''
+--); 
+
+CREATE FUNCTION stat(text)
+   returns setof statinfo
+   as 'MODULE_PATHNAME', 'ts_stat'
+   language 'C'
+   with (isstrict);
+
+--reset - just for debuging
+CREATE FUNCTION reset_tsearch()
+        returns void
+        as 'MODULE_PATHNAME'
+        language 'C'
+        with (isstrict);
+
+--get cover (debug for rank_cd)
+CREATE FUNCTION get_covers(tsvector,tsquery)
+        returns text
+        as 'MODULE_PATHNAME'
+        language 'C'
+        with (isstrict);
+
+
+--example of ISpell dictionary
+--update pg_ts_dict set dict_initoption='DictFile="/usr/local/share/ispell/russian.dict" ,AffFile ="/usr/local/share/ispell/russian.aff", StopFile="/usr/local/share/ispell/russian.stop"' where dict_id=4;
+--example of synonym dict
+--update pg_ts_dict set dict_initoption='/usr/local/share/ispell/english.syn' where dict_id=5;
+END;


diff --git a/contrib/tsearch2/tsvector.c b/contrib/tsearch2/tsvector.c

new file mode 100644 (file)

index 0000000..ff0794d


--- /dev/null
+++ b/contrib/tsearch2/tsvector.c
@@ -0,0 +1,804 @@
+/*
+ * In/Out definitions for tsvector type
+ * Internal structure:
+ * string of values, array of position lexem in string and it's length
+ * Teodor Sigaev 
+ */
+#include "postgres.h"
+
+#include "access/gist.h"
+#include "access/itup.h"
+#include "utils/elog.h"
+#include "utils/palloc.h"
+#include "utils/builtins.h"
+#include "storage/bufpage.h"
+#include "executor/spi.h"
+#include "commands/trigger.h"
+#include "nodes/pg_list.h"
+#include "catalog/namespace.h"
+
+#include "utils/pg_locale.h"
+
+#include              /* tolower */
+#include "tsvector.h"
+#include "query.h"
+#include "ts_cfg.h"
+#include "common.h"
+
+PG_FUNCTION_INFO_V1(tsvector_in);
+Datum      tsvector_in(PG_FUNCTION_ARGS);
+
+PG_FUNCTION_INFO_V1(tsvector_out);
+Datum      tsvector_out(PG_FUNCTION_ARGS);
+
+PG_FUNCTION_INFO_V1(to_tsvector);
+Datum      to_tsvector(PG_FUNCTION_ARGS);
+PG_FUNCTION_INFO_V1(to_tsvector_current);
+Datum      to_tsvector_current(PG_FUNCTION_ARGS);
+PG_FUNCTION_INFO_V1(to_tsvector_name);
+Datum      to_tsvector_name(PG_FUNCTION_ARGS);
+
+PG_FUNCTION_INFO_V1(tsearch2);
+Datum      tsearch2(PG_FUNCTION_ARGS);
+
+PG_FUNCTION_INFO_V1(tsvector_length);
+Datum      tsvector_length(PG_FUNCTION_ARGS);
+
+/*
+ * in/out text index type
+ */
+static int 
+comparePos(const void *a, const void *b) {
+   if ( ((WordEntryPos *) a)->pos == ((WordEntryPos *) b)->pos )
+       return 1;
+   return ( ((WordEntryPos *) a)->pos > ((WordEntryPos *) b)->pos ) ? 1 : -1;
+}
+
+static int
+uniquePos(WordEntryPos *a, int4 l) {
+   WordEntryPos *ptr, *res;
+
+   res=a;
+   if (l==1)
+       return l;
+
+   qsort((void *) a, l, sizeof(WordEntryPos), comparePos);
+
+   ptr = a + 1;
+   while (ptr - a < l) {
+       if ( ptr->pos != res->pos ) {
+           res++;
+           res->pos = ptr->pos;
+           res->weight = ptr->weight;
+           if ( res-a >= MAXNUMPOS-1 || res->pos == MAXENTRYPOS-1 )
+               break;
+       } else if ( ptr->weight > res->weight )
+           res->weight = ptr->weight;
+       ptr++;
+   }
+   return res + 1 - a;
+}
+
+static char *BufferStr;
+static int
+compareentry(const void *a, const void *b)
+{
+   if ( ((WordEntryIN *) a)->entry.len == ((WordEntryIN *) b)->entry.len)
+   {
+       return strncmp(
+                      &BufferStr[((WordEntryIN *) a)->entry.pos],
+                      &BufferStr[((WordEntryIN *) b)->entry.pos],
+                      ((WordEntryIN *) a)->entry.len);
+   }
+   return ( ((WordEntryIN *) a)->entry.len > ((WordEntryIN *) b)->entry.len ) ? 1 : -1;
+}
+
+static int
+uniqueentry(WordEntryIN * a, int4 l, char *buf, int4 *outbuflen)
+{
+   WordEntryIN  *ptr,
+              *res;
+
+   res = a;
+   if (l == 1) {
+       if ( a->entry.haspos ) {
+           *(uint16*)(a->pos) = uniquePos( &(a->pos[1]), *(uint16*)(a->pos));
+           *outbuflen = SHORTALIGN(res->entry.len) + (*(uint16*)(a->pos) +1 )*sizeof(WordEntryPos);
+       }
+       return l;
+   }
+
+   ptr = a + 1;
+   BufferStr = buf;
+   qsort((void *) a, l, sizeof(WordEntryIN), compareentry);
+
+   while (ptr - a < l)
+   {
+       if (!(ptr->entry.len == res->entry.len &&
+             strncmp(&buf[ptr->entry.pos], &buf[res->entry.pos], res->entry.len) == 0))
+       {
+           if ( res->entry.haspos ) {
+               *(uint16*)(res->pos) = uniquePos( &(res->pos[1]), *(uint16*)(res->pos));
+               *outbuflen += *(uint16*)(res->pos) * sizeof(WordEntryPos);
+           }
+           *outbuflen += SHORTALIGN(res->entry.len);
+           res++;
+           memcpy(res,ptr,sizeof(WordEntryIN));
+       } else if ( ptr->entry.haspos ){
+           if ( res->entry.haspos ) {
+               int4 len=*(uint16*)(ptr->pos) + 1 + *(uint16*)(res->pos);
+               res->pos=(WordEntryPos*)repalloc( res->pos, len*sizeof(WordEntryPos));
+               memcpy( &(res->pos[ *(uint16*)(res->pos) + 1 ]), 
+                   &(ptr->pos[1]), *(uint16*)(ptr->pos) * sizeof(WordEntryPos));
+               *(uint16*)(res->pos) += *(uint16*)(ptr->pos);
+               pfree( ptr->pos );
+           } else {
+               res->entry.haspos=1;
+               res->pos = ptr->pos;
+           }
+       }
+       ptr++;
+   }
+   if ( res->entry.haspos ) {
+       *(uint16*)(res->pos) = uniquePos( &(res->pos[1]), *(uint16*)(res->pos));
+       *outbuflen += *(uint16*)(res->pos) * sizeof(WordEntryPos);
+   }
+   *outbuflen += SHORTALIGN(res->entry.len);
+
+   return res + 1 - a;
+}
+
+#define WAITWORD   1
+#define WAITENDWORD 2
+#define WAITNEXTCHAR   3
+#define WAITENDCMPLX   4
+#define WAITPOSINFO    5
+#define INPOSINFO  6
+#define WAITPOSDELIM   7
+
+#define RESIZEPRSBUF \
+do { \
+   if ( state->curpos - state->word + 1 >= state->len ) \
+   { \
+       int4 clen = state->curpos - state->word; \
+       state->len *= 2; \
+       state->word = (char*)repalloc( (void*)state->word, state->len ); \
+       state->curpos = state->word + clen; \
+   } \
+} while (0)
+
+int4
+gettoken_tsvector(TI_IN_STATE * state)
+{
+   int4        oldstate = 0;
+
+   state->curpos = state->word;
+   state->state = WAITWORD;
+   state->alen=0;
+
+   while (1)
+   {
+       if (state->state == WAITWORD)
+       {
+           if (*(state->prsbuf) == '\0')
+               return 0;
+           else if (*(state->prsbuf) == '\'')
+               state->state = WAITENDCMPLX;
+           else if (*(state->prsbuf) == '\\')
+           {
+               state->state = WAITNEXTCHAR;
+               oldstate = WAITENDWORD;
+           }
+           else if (state->oprisdelim && ISOPERATOR(*(state->prsbuf)))
+               elog(ERROR, "Syntax error");
+           else if (*(state->prsbuf) != ' ')
+           {
+               *(state->curpos) = *(state->prsbuf);
+               state->curpos++;
+               state->state = WAITENDWORD;
+           }
+       }
+       else if (state->state == WAITNEXTCHAR)
+       {
+           if (*(state->prsbuf) == '\0')
+               elog(ERROR, "There is no escaped character");
+           else
+           {
+               RESIZEPRSBUF;
+               *(state->curpos) = *(state->prsbuf);
+               state->curpos++;
+               state->state = oldstate;
+           }
+       }
+       else if (state->state == WAITENDWORD)
+       {
+           if (*(state->prsbuf) == '\\')
+           {
+               state->state = WAITNEXTCHAR;
+               oldstate = WAITENDWORD;
+           }
+           else if (*(state->prsbuf) == ' ' || *(state->prsbuf) == '\0' ||
+                    (state->oprisdelim && ISOPERATOR(*(state->prsbuf))))
+           {
+               RESIZEPRSBUF;
+               if (state->curpos == state->word)
+                   elog(ERROR, "Syntax error");
+               *(state->curpos) = '\0';
+               return 1; 
+           } else if ( *(state->prsbuf) == ':' ) {
+               if (state->curpos == state->word)
+                   elog(ERROR, "Syntax error");
+               *(state->curpos) = '\0';
+               if ( state->oprisdelim )
+                   return 1;
+               else
+                   state->state = INPOSINFO;
+           }
+           else
+           {
+               RESIZEPRSBUF;
+               *(state->curpos) = *(state->prsbuf);
+               state->curpos++;
+           }
+       }
+       else if (state->state == WAITENDCMPLX)
+       {
+           if (*(state->prsbuf) == '\'')
+           {
+               RESIZEPRSBUF;
+               *(state->curpos) = '\0';
+               if (state->curpos == state->word)
+                   elog(ERROR, "Syntax error");
+               if ( state->oprisdelim ) {
+                   state->prsbuf++;
+                   return 1;
+               } else
+                   state->state = WAITPOSINFO;
+           }
+           else if (*(state->prsbuf) == '\\')
+           {
+               state->state = WAITNEXTCHAR;
+               oldstate = WAITENDCMPLX;
+           }
+           else if (*(state->prsbuf) == '\0')
+               elog(ERROR, "Syntax error");
+           else
+           {
+               RESIZEPRSBUF;
+               *(state->curpos) = *(state->prsbuf);
+               state->curpos++;
+           }
+       } else if (state->state == WAITPOSINFO) {
+           if ( *(state->prsbuf) == ':' )
+               state->state=INPOSINFO;
+           else
+               return 1;
+       } else if (state->state == INPOSINFO) {
+           if ( isdigit(*(state->prsbuf)) ) {
+               if ( state->alen==0 ) {
+                   state->alen=4;
+                   state->pos = (WordEntryPos*)palloc( sizeof(WordEntryPos)*state->alen );
+                   *(uint16*)(state->pos)=0;
+               } else if ( *(uint16*)(state->pos) +1 >= state->alen ) {
+                   state->alen *= 2; 
+                   state->pos = (WordEntryPos*)repalloc( state->pos, sizeof(WordEntryPos)*state->alen );
+               }
+               (  *(uint16*)(state->pos) )++;
+               state->pos[ *(uint16*)(state->pos) ].pos = LIMITPOS(atoi(state->prsbuf));
+               if ( state->pos[ *(uint16*)(state->pos) ].pos == 0 )
+                   elog(ERROR,"Wrong position info");
+               state->pos[ *(uint16*)(state->pos) ].weight = 0;
+               state->state = WAITPOSDELIM;
+           } else
+               elog(ERROR,"Syntax error");
+       } else if (state->state == WAITPOSDELIM) {
+           if ( *(state->prsbuf) == ',' ) {
+               state->state = INPOSINFO;
+           } else if ( tolower(*(state->prsbuf)) == 'a' || *(state->prsbuf)=='*' ) {
+               if ( state->pos[ *(uint16*)(state->pos) ].weight )
+                   elog(ERROR,"Syntax error");
+               state->pos[ *(uint16*)(state->pos) ].weight = 3;
+           } else if ( tolower(*(state->prsbuf)) == 'b' ) {
+               if ( state->pos[ *(uint16*)(state->pos) ].weight )
+                   elog(ERROR,"Syntax error");
+               state->pos[ *(uint16*)(state->pos) ].weight = 2;
+           } else if ( tolower(*(state->prsbuf)) == 'c' ) {
+               if ( state->pos[ *(uint16*)(state->pos) ].weight )
+                   elog(ERROR,"Syntax error");
+               state->pos[ *(uint16*)(state->pos) ].weight = 1;
+           } else if ( tolower(*(state->prsbuf)) == 'd' ) {
+               if ( state->pos[ *(uint16*)(state->pos) ].weight )
+                   elog(ERROR,"Syntax error");
+               state->pos[ *(uint16*)(state->pos) ].weight = 0;
+           } else if ( isspace(*(state->prsbuf)) || *(state->prsbuf) == '\0' ) {
+               return 1;
+           } else if ( !isdigit(*(state->prsbuf)) )
+               elog(ERROR,"Syntax error");
+       } else
+           elog(ERROR, "Inner bug :(");
+       state->prsbuf++;
+   }
+
+   return 0;
+}
+
+Datum
+tsvector_in(PG_FUNCTION_ARGS)
+{
+   char       *buf = PG_GETARG_CSTRING(0);
+   TI_IN_STATE state;
+   WordEntryIN  *arr;
+   WordEntry  *inarr;
+   int4        len = 0,
+               totallen = 64;
+   tsvector       *in;
+   char       *tmpbuf,
+              *cur;
+   int4        i,
+               buflen = 256;
+
+   state.prsbuf = buf;
+   state.len = 32;
+   state.word = (char *) palloc(state.len);
+   state.oprisdelim = false;
+
+   arr = (WordEntryIN *) palloc(sizeof(WordEntryIN) * totallen);
+   cur = tmpbuf = (char *) palloc(buflen);
+   while (gettoken_tsvector(&state))
+   {
+       if (len >= totallen)
+       {
+           totallen *= 2;
+           arr = (WordEntryIN *) repalloc((void *) arr, sizeof(WordEntryIN) * totallen);
+       }
+       while ((cur - tmpbuf) + (state.curpos - state.word) >= buflen)
+       {
+           int4        dist = cur - tmpbuf;
+
+           buflen *= 2;
+           tmpbuf = (char *) repalloc((void *) tmpbuf, buflen);
+           cur = tmpbuf + dist;
+       }
+       if (state.curpos - state.word >= MAXSTRLEN)
+           elog(ERROR, "Word is too long");
+       arr[len].entry.len= state.curpos - state.word;
+       if (cur - tmpbuf > MAXSTRPOS)
+           elog(ERROR, "Too long value");
+       arr[len].entry.pos=cur - tmpbuf;
+       memcpy((void *) cur, (void *) state.word, arr[len].entry.len);
+       cur += arr[len].entry.len;
+       if ( state.alen ) {
+           arr[len].entry.haspos=1;
+           arr[len].pos = state.pos;
+       } else
+           arr[len].entry.haspos=0;
+       len++;
+   }
+   pfree(state.word);
+
+   if ( len > 0 )
+       len = uniqueentry(arr, len, tmpbuf, &buflen);
+   totallen = CALCDATASIZE(len, buflen);
+   in = (tsvector *) palloc(totallen);
+   memset(in,0,totallen);
+   in->len = totallen;
+   in->size = len;
+   cur = STRPTR(in);
+   inarr = ARRPTR(in);
+   for (i = 0; i < len; i++)
+   {
+       memcpy((void *) cur, (void *) &tmpbuf[arr[i].entry.pos], arr[i].entry.len);
+       arr[i].entry.pos=cur - STRPTR(in);
+       cur += SHORTALIGN(arr[i].entry.len);
+       if ( arr[i].entry.haspos ) {
+           memcpy( cur, arr[i].pos, (*(uint16*)arr[i].pos + 1) * sizeof(WordEntryPos));
+           cur +=  (*(uint16*)arr[i].pos + 1) * sizeof(WordEntryPos);
+           pfree( arr[i].pos ); 
+       }
+       memcpy( &(inarr[i]), &(arr[i].entry), sizeof(WordEntry) );
+   }
+   pfree(tmpbuf);
+   pfree(arr);
+   PG_RETURN_POINTER(in);
+}
+
+Datum
+tsvector_length(PG_FUNCTION_ARGS)
+{
+   tsvector       *in = (tsvector *) PG_DETOAST_DATUM(PG_GETARG_DATUM(0));
+   int4        ret = in->size;
+
+   PG_FREE_IF_COPY(in, 0);
+   PG_RETURN_INT32(ret);
+}
+
+Datum
+tsvector_out(PG_FUNCTION_ARGS)
+{
+   tsvector       *out = (tsvector *) PG_DETOAST_DATUM(PG_GETARG_DATUM(0));
+   char       *outbuf;
+   int4        i,
+               j,
+               lenbuf = 0, pp;
+   WordEntry  *ptr = ARRPTR(out);
+   char       *curin,
+              *curout;
+
+       lenbuf=out->size * 2 /* '' */ + out->size - 1 /* space */ + 2 /*\0*/;
+       for (i = 0; i < out->size; i++) {
+               lenbuf += ptr[i].len*2 /*for escape */;
+               if ( ptr[i].haspos )
+                       lenbuf += 7*POSDATALEN(out, &(ptr[i]));
+       }
+
+   curout = outbuf = (char *) palloc(lenbuf);
+   for (i = 0; i < out->size; i++)
+   {
+       curin = STRPTR(out)+ptr->pos;
+       if (i != 0)
+           *curout++ = ' ';
+       *curout++ = '\'';
+       j = ptr->len;
+       while (j--)
+       {
+           if (*curin == '\'')
+           {
+               int4        pos = curout - outbuf;
+
+               outbuf = (char *) repalloc((void *) outbuf, ++lenbuf);
+               curout = outbuf + pos;
+               *curout++ = '\\';
+           }
+           *curout++ = *curin++;
+       }
+       *curout++ = '\'';
+       if ( (pp=POSDATALEN(out,ptr)) != 0 ) {
+           WordEntryPos *wptr;
+           *curout++ = ':';
+           wptr=POSDATAPTR(out,ptr);
+           while(pp) {
+               sprintf(curout,"%d",wptr->pos);
+               curout=strchr(curout,'\0');
+               switch( wptr->weight ) {
+                   case 3:   *curout++ = 'A'; break;
+                   case 2:   *curout++ = 'B'; break;
+                   case 1:   *curout++ = 'C'; break;
+                   case 0: 
+                   default: break;
+               }
+               if ( pp>1 )     *curout++ = ',';
+               pp--; wptr++;
+           }
+       }
+       ptr++;
+   }
+   *curout='\0';
+   outbuf[lenbuf - 1] = '\0';
+   PG_FREE_IF_COPY(out, 0);
+   PG_RETURN_POINTER(outbuf);
+}
+
+static int
+compareWORD(const void *a, const void *b)
+{
+   if (((WORD *) a)->len == ((WORD *) b)->len) {
+       int res = strncmp(
+                      ((WORD *) a)->word,
+                      ((WORD *) b)->word,
+                      ((WORD *) b)->len);
+       if ( res==0 ) 
+           return ( ((WORD *) a)->pos.pos > ((WORD *) b)->pos.pos ) ? 1 : -1;
+       return res;
+   }
+   return (((WORD *) a)->len > ((WORD *) b)->len) ? 1 : -1;
+}
+
+static int
+uniqueWORD(WORD * a, int4 l)
+{
+   WORD       *ptr,
+              *res;
+   int tmppos;
+
+   if (l == 1) {
+       tmppos=LIMITPOS(a->pos.pos);
+       a->alen=2;
+       a->pos.apos=(uint16*)palloc( sizeof(uint16)*a->alen );
+       a->pos.apos[0]=1;
+       a->pos.apos[1]=tmppos;
+       return l;
+   }
+
+   res = a;
+   ptr = a + 1;
+
+   qsort((void *) a, l, sizeof(WORD), compareWORD);
+   tmppos=LIMITPOS(a->pos.pos);
+   a->alen=2;
+   a->pos.apos=(uint16*)palloc( sizeof(uint16)*a->alen );
+   a->pos.apos[0]=1;
+   a->pos.apos[1]=tmppos;
+
+   while (ptr - a < l)
+   {
+       if (!(ptr->len == res->len &&
+             strncmp(ptr->word, res->word, res->len) == 0))
+       {
+           res++;
+           res->len = ptr->len;
+           res->word = ptr->word;
+           tmppos=LIMITPOS(ptr->pos.pos);
+           res->alen=2;
+           res->pos.apos=(uint16*)palloc( sizeof(uint16)*res->alen );
+           res->pos.apos[0]=1;
+           res->pos.apos[1]=tmppos;
+       } else {
+           pfree(ptr->word);
+           if ( res->pos.apos[0] < MAXNUMPOS-1 && res->pos.apos[ res->pos.apos[0] ] != MAXENTRYPOS-1 ) {
+               if ( res->pos.apos[0]+1 >= res->alen ) {
+                   res->alen*=2;
+                   res->pos.apos=(uint16*)repalloc( res->pos.apos, sizeof(uint16)*res->alen );
+               }
+               res->pos.apos[ res->pos.apos[0]+1 ] = LIMITPOS(ptr->pos.pos);
+               res->pos.apos[0]++; 
+           }
+       }
+       ptr++;
+   }
+
+   return res + 1 - a;
+}
+
+/*
+ * make value of tsvector
+ */
+static tsvector *
+makevalue(PRSTEXT * prs)
+{
+   int4        i,j,
+               lenstr = 0,
+               totallen;
+   tsvector       *in;
+   WordEntry  *ptr;
+   char       *str,
+              *cur;
+
+   prs->curwords = uniqueWORD(prs->words, prs->curwords);
+   for (i = 0; i < prs->curwords; i++) {
+       lenstr += SHORTALIGN(prs->words[i].len);
+
+       if ( prs->words[i].alen )
+           lenstr += sizeof(uint16) + prs->words[i].pos.apos[0] * sizeof(WordEntryPos);
+   }
+
+   totallen = CALCDATASIZE(prs->curwords, lenstr);
+   in = (tsvector *) palloc(totallen);
+   memset(in,0,totallen);  
+   in->len = totallen;
+   in->size = prs->curwords;
+
+   ptr = ARRPTR(in);
+   cur = str = STRPTR(in);
+   for (i = 0; i < prs->curwords; i++)
+   {
+       ptr->len = prs->words[i].len;
+       if (cur - str > MAXSTRPOS)
+           elog(ERROR, "Value is too big");
+       ptr->pos= cur - str;
+       memcpy((void *) cur, (void *) prs->words[i].word, prs->words[i].len);
+       pfree(prs->words[i].word);
+       cur += SHORTALIGN(prs->words[i].len);
+       if ( prs->words[i].alen ) {
+           WordEntryPos *wptr;
+           
+           ptr->haspos=1;
+           *(uint16*)cur = prs->words[i].pos.apos[0];
+           wptr=POSDATAPTR(in,ptr);
+           for(j=0;j<*(uint16*)cur;j++) {
+               wptr[j].weight=0;
+               wptr[j].pos=prs->words[i].pos.apos[j+1];
+           }
+           cur += sizeof(uint16) + prs->words[i].pos.apos[0] * sizeof(WordEntryPos);
+           pfree(prs->words[i].pos.apos);
+       } else
+           ptr->haspos=0;
+       ptr++;
+   }
+   pfree(prs->words);
+   return in;
+}
+
+
+Datum
+to_tsvector(PG_FUNCTION_ARGS)
+{
+   text       *in = PG_GETARG_TEXT_P(1);
+   PRSTEXT     prs;
+   tsvector       *out = NULL;
+   TSCfgInfo *cfg=findcfg(PG_GETARG_INT32(0)); 
+
+   prs.lenwords = 32;
+   prs.curwords = 0;
+   prs.pos = 0;
+   prs.words = (WORD *) palloc(sizeof(WORD) * prs.lenwords);
+   
+   parsetext_v2(cfg, &prs, VARDATA(in), VARSIZE(in) - VARHDRSZ);
+   PG_FREE_IF_COPY(in, 1);
+
+   if (prs.curwords)
+       out = makevalue(&prs);
+   else {
+       pfree(prs.words);
+       out = palloc(CALCDATASIZE(0,0));
+       out->len = CALCDATASIZE(0,0);
+       out->size = 0;
+   } 
+   PG_RETURN_POINTER(out);
+}
+
+Datum
+to_tsvector_name(PG_FUNCTION_ARGS) {
+   text       *cfg=PG_GETARG_TEXT_P(0);
+   Datum res = DirectFunctionCall3(
+       to_tsvector,
+       Int32GetDatum( name2id_cfg( cfg ) ),
+       PG_GETARG_DATUM(1),
+       (Datum)0
+   );
+   PG_FREE_IF_COPY(cfg,0);
+   PG_RETURN_DATUM(res);   
+}
+
+Datum
+to_tsvector_current(PG_FUNCTION_ARGS) {
+   Datum res = DirectFunctionCall3(
+       to_tsvector,
+       Int32GetDatum( get_currcfg() ),
+       PG_GETARG_DATUM(0),
+       (Datum)0
+   );
+   PG_RETURN_DATUM(res);   
+}
+
+static Oid
+findFunc(char *fname) {
+   FuncCandidateList clist,ptr;
+   Oid funcid = InvalidOid;
+   List *names=makeList1(makeString(fname));
+
+   ptr = clist = FuncnameGetCandidates(names, 1);
+   freeList(names);
+
+   if ( !ptr )
+       return funcid;
+
+   while(ptr) {
+       if ( ptr->args[0] == TEXTOID && funcid == InvalidOid )
+           funcid=ptr->oid;
+       clist=ptr->next;
+       pfree(ptr);
+       ptr=clist;
+   }
+
+   return funcid;
+}
+
+/*
+ * Trigger
+ */
+Datum
+tsearch2(PG_FUNCTION_ARGS)
+{
+   TriggerData *trigdata;
+   Trigger    *trigger;
+   Relation    rel;
+   HeapTuple   rettuple = NULL;
+   TSCfgInfo *cfg=findcfg(get_currcfg()); 
+   int         numidxattr,
+               i;
+   PRSTEXT     prs;
+   Datum       datum = (Datum) 0;
+   Oid     funcoid = InvalidOid;
+
+   if (!CALLED_AS_TRIGGER(fcinfo))
+       elog(ERROR, "TSearch: Not fired by trigger manager");
+
+   trigdata = (TriggerData *) fcinfo->context;
+   if (TRIGGER_FIRED_FOR_STATEMENT(trigdata->tg_event))
+       elog(ERROR, "TSearch: Can't process STATEMENT events");
+   if (TRIGGER_FIRED_AFTER(trigdata->tg_event))
+       elog(ERROR, "TSearch: Must be fired BEFORE event");
+
+   if (TRIGGER_FIRED_BY_INSERT(trigdata->tg_event))
+       rettuple = trigdata->tg_trigtuple;
+   else if (TRIGGER_FIRED_BY_UPDATE(trigdata->tg_event))
+       rettuple = trigdata->tg_newtuple;
+   else
+       elog(ERROR, "TSearch: Unknown event");
+
+   trigger = trigdata->tg_trigger;
+   rel = trigdata->tg_relation;
+
+   if (trigger->tgnargs < 2)
+       elog(ERROR, "TSearch: format tsearch2(tsvector_field, text_field1,...)");
+
+   numidxattr = SPI_fnumber(rel->rd_att, trigger->tgargs[0]);
+   if (numidxattr == SPI_ERROR_NOATTRIBUTE)
+       elog(ERROR, "TSearch: Can not find tsvector_field");
+
+   prs.lenwords = 32;
+   prs.curwords = 0;
+   prs.pos = 0;
+   prs.words = (WORD *) palloc(sizeof(WORD) * prs.lenwords);
+
+   /* find all words in indexable column */
+   for (i = 1; i < trigger->tgnargs; i++)
+   {
+       int         numattr;
+       Oid         oidtype;
+       Datum       txt_toasted;
+       bool        isnull;
+       text       *txt;
+
+       numattr = SPI_fnumber(rel->rd_att, trigger->tgargs[i]);
+       if (numattr == SPI_ERROR_NOATTRIBUTE)
+       {
+           funcoid=findFunc(trigger->tgargs[i]);
+           if ( funcoid==InvalidOid )
+               elog(ERROR,"TSearch: can't find function or field '%s'",trigger->tgargs[i]);
+           continue;
+       }
+       oidtype = SPI_gettypeid(rel->rd_att, numattr);
+       /* We assume char() and varchar() are binary-equivalent to text */
+       if (!(oidtype == TEXTOID ||
+             oidtype == VARCHAROID ||
+             oidtype == BPCHAROID))
+       {
+           elog(WARNING, "TSearch: '%s' is not of character type",
+                trigger->tgargs[i]);
+           continue;
+       }
+       txt_toasted = SPI_getbinval(rettuple, rel->rd_att, numattr, &isnull);
+       if (isnull)
+           continue;
+
+       if ( funcoid!=InvalidOid ) {
+           text *txttmp = (text *) DatumGetPointer( OidFunctionCall1(
+               funcoid,
+               PointerGetDatum(txt_toasted)
+           ));
+           txt = (text *) DatumGetPointer(PG_DETOAST_DATUM(PointerGetDatum(txttmp)));
+           if ( txt == txttmp )
+               txt_toasted = PointerGetDatum(txt);
+       } else
+            txt = (text *) DatumGetPointer(PG_DETOAST_DATUM(PointerGetDatum(txt_toasted)));
+
+       parsetext_v2(cfg, &prs, VARDATA(txt), VARSIZE(txt) - VARHDRSZ);
+       if (txt != (text*)DatumGetPointer(txt_toasted) )
+           pfree(txt);
+   }
+
+   /* make tsvector value */
+   if (prs.curwords)
+   {
+       datum = PointerGetDatum(makevalue(&prs));
+       rettuple = SPI_modifytuple(rel, rettuple, 1, &numidxattr,
+                                  &datum, NULL);
+       pfree(DatumGetPointer(datum));
+   }
+   else
+   {
+       tsvector *out = palloc(CALCDATASIZE(0,0));
+       out->len = CALCDATASIZE(0,0);
+       out->size = 0;
+       datum = PointerGetDatum(out);
+       pfree(prs.words);
+       rettuple = SPI_modifytuple(rel, rettuple, 1, &numidxattr,
+                                  &datum, NULL);
+   }
+
+   if (rettuple == NULL)
+       elog(ERROR, "TSearch: %d returned by SPI_modifytuple", SPI_result);
+
+   return PointerGetDatum(rettuple);
+}


diff --git a/contrib/tsearch2/tsvector.h b/contrib/tsearch2/tsvector.h

new file mode 100644 (file)

index 0000000..31e6a4b


--- /dev/null
+++ b/contrib/tsearch2/tsvector.h
@@ -0,0 +1,71 @@
+#ifndef __TXTIDX_H__
+#define __TXTIDX_H__
+
+/*
+#define TXTIDX_DEBUG
+*/
+
+#include "postgres.h"
+
+#include "access/gist.h"
+#include "access/itup.h"
+#include "utils/elog.h"
+#include "utils/palloc.h"
+#include "utils/builtins.h"
+#include "storage/bufpage.h"
+
+typedef struct {
+   uint32
+       haspos:1,
+       len:11, /* MAX 2Kb */
+       pos:20; /* MAX 1Mb */
+}  WordEntry;
+#define MAXSTRLEN ( 1<<11 )
+#define MAXSTRPOS ( 1<<20 )
+
+typedef struct {
+   uint16
+       weight:2,
+       pos:14;
+} WordEntryPos;
+#define MAXENTRYPOS    (1<<14)
+#define MAXNUMPOS  256
+#define LIMITPOS(x)    ( ( (x) >= MAXENTRYPOS ) ? (MAXENTRYPOS-1) : (x) )
+
+typedef struct
+{
+   int4        len;
+   int4        size;
+   char        data[1];
+}  tsvector;
+
+#define DATAHDRSIZE (sizeof(int4)*2)
+#define CALCDATASIZE(x, lenstr) ( x * sizeof(WordEntry) + DATAHDRSIZE + lenstr )
+#define ARRPTR(x)  ( (WordEntry*) ( (char*)x + DATAHDRSIZE ) )
+#define STRPTR(x)  ( (char*)x + DATAHDRSIZE + ( sizeof(WordEntry) * ((tsvector*)x)->size ) )
+#define STRSIZE(x) ( ((tsvector*)x)->len - DATAHDRSIZE - ( sizeof(WordEntry) * ((tsvector*)x)->size ) )
+#define _POSDATAPTR(x,e)   (STRPTR(x)+((WordEntry*)(e))->pos+SHORTALIGN(((WordEntry*)(e))->len))
+#define POSDATALEN(x,e) ( ( ((WordEntry*)(e))->haspos ) ? (*(uint16*)_POSDATAPTR(x,e)) : 0 ) 
+#define POSDATAPTR(x,e)    ( (WordEntryPos*)( _POSDATAPTR(x,e)+sizeof(uint16) ) )
+
+
+typedef struct {
+   WordEntry   entry;
+   WordEntryPos    *pos;
+}  WordEntryIN;
+
+typedef struct
+{
+   char       *prsbuf;
+   char       *word;
+   char       *curpos;
+   int4        len;
+   int4        state;
+   int4        alen;
+   WordEntryPos    *pos;
+   bool        oprisdelim;
+}  TI_IN_STATE;
+
+int4       gettoken_tsvector(TI_IN_STATE * state);
+
+#endif


diff --git a/contrib/tsearch2/tsvector_op.c b/contrib/tsearch2/tsvector_op.c

new file mode 100644 (file)

index 0000000..3f38014


--- /dev/null
+++ b/contrib/tsearch2/tsvector_op.c
@@ -0,0 +1,264 @@
+/*
+ * Operations for tsvector type
+ * Teodor Sigaev 
+ */
+#include "postgres.h"
+
+#include "access/gist.h"
+#include "access/itup.h"
+#include "utils/elog.h"
+#include "utils/palloc.h"
+#include "utils/builtins.h"
+#include "storage/bufpage.h"
+#include "executor/spi.h"
+#include "commands/trigger.h"
+#include "nodes/pg_list.h"
+#include "catalog/namespace.h"
+
+#include "utils/pg_locale.h"
+
+#include              /* tolower */
+#include "tsvector.h"
+#include "query.h"
+#include "ts_cfg.h"
+#include "common.h"
+
+PG_FUNCTION_INFO_V1(strip);
+Datum      strip(PG_FUNCTION_ARGS);
+
+PG_FUNCTION_INFO_V1(setweight);
+Datum      setweight(PG_FUNCTION_ARGS);
+
+PG_FUNCTION_INFO_V1(concat);
+Datum      concat(PG_FUNCTION_ARGS);
+
+Datum
+strip(PG_FUNCTION_ARGS)
+{
+   tsvector       *in = (tsvector *) PG_DETOAST_DATUM(PG_GETARG_DATUM(0));
+   tsvector    *out;
+   int i,len=0;
+   WordEntry *arrin=ARRPTR(in), *arrout;
+   char *cur;
+
+   for(i=0;isize;i++) 
+       len += SHORTALIGN( arrin[i].len );
+
+   len = CALCDATASIZE(in->size, len);
+   out=(tsvector*)palloc(len);
+   memset(out,0,len);
+   out->len=len;
+   out->size=in->size;
+   arrout=ARRPTR(out);
+   cur=STRPTR(out);
+   for(i=0;isize;i++) {
+       memcpy(cur, STRPTR(in)+arrin[i].pos, arrin[i].len);
+       arrout[i].haspos = 0;
+       arrout[i].len = arrin[i].len;
+       arrout[i].pos = cur - STRPTR(out);
+       cur += SHORTALIGN( arrout[i].len );
+   }
+       
+   PG_FREE_IF_COPY(in, 0);
+   PG_RETURN_POINTER(out);
+}
+
+Datum
+setweight(PG_FUNCTION_ARGS)
+{
+   tsvector       *in = (tsvector *) PG_DETOAST_DATUM(PG_GETARG_DATUM(0));
+   char       cw = PG_GETARG_CHAR(1);
+   tsvector    *out;
+   int i,j;
+   WordEntry *entry;
+   WordEntryPos *p;
+   int w=0;
+
+   switch(tolower(cw)) {
+       case 'a': w=3; break;
+       case 'b': w=2; break;
+       case 'c': w=1; break;
+       case 'd': w=0; break;
+       default: elog(ERROR,"Unknown weight");
+   }
+
+   out=(tsvector*)palloc(in->len);
+   memcpy(out,in,in->len);
+   entry=ARRPTR(out);
+   i=out->size;    
+   while(i--) {
+       if ( (j=POSDATALEN(out,entry)) != 0 ) {
+           p=POSDATAPTR(out,entry);
+           while(j--) {
+               p->weight=w;
+               p++;
+           }
+       }
+       entry++;
+   }
+       
+   PG_FREE_IF_COPY(in, 0);
+   PG_RETURN_POINTER(out);
+}
+
+static int
+compareEntry(char *ptra, WordEntry* a, char *ptrb, WordEntry* b)
+{
+        if ( a->len == b->len)
+        {
+                return strncmp(
+                                           ptra + a->pos,
+                                           ptrb + b->pos,
+                                           a->len);
+        }
+        return ( a->len > b->len ) ? 1 : -1;
+}
+
+static int4
+add_pos(tsvector *src, WordEntry *srcptr, tsvector *dest, WordEntry *destptr, int4 maxpos ) {
+   uint16 *clen = (uint16*)_POSDATAPTR(dest,destptr);
+   int i;
+   uint16 slen = POSDATALEN(src, srcptr), startlen;
+   WordEntryPos *spos=POSDATAPTR(src, srcptr), *dpos=POSDATAPTR(dest,destptr);
+
+   if ( ! destptr->haspos ) 
+       *clen=0;
+
+   startlen = *clen;
+   for(i=0; i
+       dpos[ *clen ].weight = spos[i].weight; 
+       dpos[ *clen ].pos    = LIMITPOS(spos[i].pos + maxpos);
+       (*clen)++;
+   }
+
+   if ( *clen != startlen )
+       destptr->haspos=1; 
+   return  *clen - startlen;
+}
+
+
+Datum
+concat(PG_FUNCTION_ARGS) {
+   tsvector       *in1 = (tsvector *) PG_DETOAST_DATUM(PG_GETARG_DATUM(0));
+   tsvector       *in2 = (tsvector *) PG_DETOAST_DATUM(PG_GETARG_DATUM(1));
+   tsvector       *out;
+   WordEntry *ptr;
+   WordEntry *ptr1,*ptr2;
+   WordEntryPos *p;
+   int maxpos=0,i,j,i1,i2;
+   char *cur;
+   char *data,*data1,*data2;
+
+   ptr=ARRPTR(in1);
+   i=in1->size;
+   while(i--) {
+       if ( (j=POSDATALEN(in1,ptr)) != 0 ) {
+           p=POSDATAPTR(in1,ptr);
+           while(j--) {
+               if ( p->pos > maxpos ) 
+                   maxpos = p->pos;
+               p++;
+           }
+       }
+       ptr++;
+   }
+   
+   ptr1=ARRPTR(in1); ptr2=ARRPTR(in2);
+   data1=STRPTR(in1); data2=STRPTR(in2);
+   i1=in1->size;   i2=in2->size;
+   out=(tsvector*)palloc( in1->len + in2->len );
+   memset(out,0,in1->len + in2->len);
+   out->len = in1->len + in2->len;
+   out->size = in1->size + in2->size;
+   data=cur=STRPTR(out);
+   ptr=ARRPTR(out);
+   while( i1 && i2 ) {
+       int cmp=compareEntry(data1,ptr1,data2,ptr2);
+       if ( cmp < 0 ) { /* in1 first */
+           ptr->haspos = ptr1->haspos;
+           ptr->len = ptr1->len;
+           memcpy( cur, data1 + ptr1->pos, ptr1->len );
+           ptr->pos = cur - data; 
+           cur+=SHORTALIGN(ptr1->len);
+           if ( ptr->haspos ) {
+               memcpy(cur, _POSDATAPTR(in1, ptr1), POSDATALEN(in1, ptr1)*sizeof(WordEntryPos) + sizeof(uint16));
+               cur+=POSDATALEN(in1, ptr1)*sizeof(WordEntryPos) + sizeof(uint16);
+           }
+           ptr++; ptr1++; i1--;
+       } else if ( cmp>0 ) { /* in2 first */ 
+           ptr->haspos = ptr2->haspos;
+           ptr->len = ptr2->len;
+           memcpy( cur, data2 + ptr2->pos, ptr2->len );
+           ptr->pos = cur - data; 
+           cur+=SHORTALIGN(ptr2->len);
+           if ( ptr->haspos ) {
+               int addlen = add_pos(in2, ptr2, out, ptr, maxpos );
+               if ( addlen == 0 )
+                   ptr->haspos=0;
+               else
+                   cur += addlen*sizeof(WordEntryPos) + sizeof(uint16); 
+           }
+           ptr++; ptr2++; i2--;
+       } else {
+           ptr->haspos = ptr1->haspos | ptr2->haspos;
+           ptr->len = ptr1->len;
+           memcpy( cur, data1 + ptr1->pos, ptr1->len );
+           ptr->pos = cur - data; 
+           cur+=SHORTALIGN(ptr1->len);
+           if ( ptr->haspos ) {
+               if ( ptr1->haspos ) {
+                   memcpy(cur, _POSDATAPTR(in1, ptr1), POSDATALEN(in1, ptr1)*sizeof(WordEntryPos) + sizeof(uint16));
+                   cur+=POSDATALEN(in1, ptr1)*sizeof(WordEntryPos) + sizeof(uint16);
+                   if ( ptr2->haspos )
+                       cur += add_pos(in2, ptr2, out, ptr, maxpos )*sizeof(WordEntryPos);
+               } else if ( ptr2->haspos ) {
+                   int addlen = add_pos(in2, ptr2, out, ptr, maxpos );
+                   if ( addlen == 0 )
+                       ptr->haspos=0;
+                   else
+                       cur += addlen*sizeof(WordEntryPos) + sizeof(uint16); 
+               }
+           }
+           ptr++; ptr1++; ptr2++; i1--; i2--;
+       }
+   }
+
+   while(i1) {
+       ptr->haspos = ptr1->haspos;
+       ptr->len = ptr1->len;
+       memcpy( cur, data1 + ptr1->pos, ptr1->len );
+       ptr->pos = cur - data; 
+       cur+=SHORTALIGN(ptr1->len);
+       if ( ptr->haspos ) {
+           memcpy(cur, _POSDATAPTR(in1, ptr1), POSDATALEN(in1, ptr1)*sizeof(WordEntryPos) + sizeof(uint16));
+           cur+=POSDATALEN(in1, ptr1)*sizeof(WordEntryPos) + sizeof(uint16);
+       }
+       ptr++; ptr1++; i1--;
+   }
+
+   while(i2) {
+       ptr->haspos = ptr2->haspos;
+       ptr->len = ptr2->len;
+       memcpy( cur, data2 + ptr2->pos, ptr2->len );
+       ptr->pos = cur - data; 
+       cur+=SHORTALIGN(ptr2->len);
+       if ( ptr->haspos ) {
+           int addlen = add_pos(in2, ptr2, out, ptr, maxpos );
+           if ( addlen == 0 )
+               ptr->haspos=0;
+           else
+               cur += addlen*sizeof(WordEntryPos) + sizeof(uint16); 
+       }
+       ptr++; ptr2++; i2--;
+   }
+   
+   out->size=ptr-ARRPTR(out);
+   out->len = CALCDATASIZE( out->size, cur-data );
+   if ( data != STRPTR(out) )
+       memmove( STRPTR(out), data, cur-data );
+
+   PG_FREE_IF_COPY(in1, 0);
+   PG_FREE_IF_COPY(in2, 1);
+   PG_RETURN_POINTER(out);
+}
+


diff --git a/contrib/tsearch2/untsearch.sql.in b/contrib/tsearch2/untsearch.sql.in

new file mode 100644 (file)

index 0000000..a4fe145


--- /dev/null
+++ b/contrib/tsearch2/untsearch.sql.in
@@ -0,0 +1,62 @@
+BEGIN;
+
+--Be careful !!!
+--script drops all indices, triggers and columns with types defined
+--in tsearch2.sql
+
+
+DROP OPERATOR CLASS gist_tsvector_ops USING gist CASCADE;
+
+
+DROP OPERATOR || (tsvector, tsvector);
+DROP OPERATOR @@ (tsvector, tsquery);
+DROP OPERATOR @@ (tsquery, tsvector);
+
+DROP AGGREGATE stat(tsvector);
+
+DROP TABLE pg_ts_dict;
+DROP TABLE pg_ts_parser;
+DROP TABLE pg_ts_cfg;
+DROP TABLE pg_ts_cfgmap;
+
+DROP TYPE tokentype CASCADE;
+DROP TYPE tokenout CASCADE;
+DROP TYPE tsvector CASCADE;
+DROP TYPE tsquery CASCADE;
+DROP TYPE gtsvector CASCADE;
+DROP TYPE tsstat CASCADE;
+DROP TYPE statinfo CASCADE;
+
+DROP FUNCTION lexize(oid, text) ;
+DROP FUNCTION lexize(text, text);
+DROP FUNCTION lexize(text);
+DROP FUNCTION set_curdict(int);
+DROP FUNCTION set_curdict(text);
+DROP FUNCTION dex_init(text);
+DROP FUNCTION dex_lexize(internal,internal,int4);
+DROP FUNCTION snb_en_init(text);
+DROP FUNCTION snb_lexize(internal,internal,int4);
+DROP FUNCTION snb_ru_init(text);
+DROP FUNCTION spell_init(text);
+DROP FUNCTION spell_lexize(internal,internal,int4);
+DROP FUNCTION syn_init(text);
+DROP FUNCTION syn_lexize(internal,internal,int4);
+DROP FUNCTION set_curprs(int);
+DROP FUNCTION set_curprs(text);
+DROP FUNCTION prsd_start(internal,int4);
+DROP FUNCTION prsd_getlexeme(internal,internal,internal);
+DROP FUNCTION prsd_end(internal);
+DROP FUNCTION prsd_lextype(internal);
+DROP FUNCTION prsd_headline(internal,internal,internal);
+DROP FUNCTION set_curcfg(int);
+DROP FUNCTION set_curcfg(text);
+DROP FUNCTION show_curcfg();
+DROP FUNCTION gtsvector_compress(internal);
+DROP FUNCTION gtsvector_decompress(internal);
+DROP FUNCTION gtsvector_penalty(internal,internal,internal);
+DROP FUNCTION gtsvector_picksplit(internal, internal);
+DROP FUNCTION gtsvector_union(bytea, internal);
+DROP FUNCTION reset_tsearch();
+DROP FUNCTION tsearch2() CASCADE;
+
+END;


diff --git a/contrib/tsearch2/wordparser/deflex.c b/contrib/tsearch2/wordparser/deflex.c

new file mode 100644 (file)

index 0000000..ea596c5


--- /dev/null
+++ b/contrib/tsearch2/wordparser/deflex.c
@@ -0,0 +1,56 @@
+#include "deflex.h"
+
+const char *lex_descr[]={
+   "",
+   "Latin word",
+   "Non-latin word",
+   "Word",
+   "Email",
+   "URL",
+   "Host",
+   "Scientific notation",
+   "VERSION",
+   "Part of hyphenated word",
+   "Non-latin part of hyphenated word",
+   "Latin part of hyphenated word",
+   "Space symbols",
+   "HTML Tag",
+   "HTTP head",
+   "Hyphenated word",
+   "Latin hyphenated word",
+   "Non-latin hyphenated word",
+   "URI",
+   "File or path name",
+   "Decimal notation",
+   "Signed integer",
+   "Unsigned integer",
+   "HTML Entity"
+};
+
+const char *tok_alias[]={
+   "",
+   "lword",
+   "nlword",
+   "word",
+   "email",
+   "url",
+   "host",
+   "sfloat",
+   "version",
+   "part_hword",
+   "nlpart_hword",
+   "lpart_hword",
+   "blank",
+   "tag",
+   "http",
+   "hword",
+   "lhword",
+   "nlhword",
+   "uri",
+   "file",
+   "float",
+   "int",
+   "uint",
+   "entity"
+};
+


diff --git a/contrib/tsearch2/wordparser/deflex.h b/contrib/tsearch2/wordparser/deflex.h

new file mode 100644 (file)

index 0000000..651d1f9


--- /dev/null
+++ b/contrib/tsearch2/wordparser/deflex.h
@@ -0,0 +1,34 @@
+#ifndef __DEFLEX_H__
+#define __DEFLEX_H__
+
+/* rememder !!!! */
+#define LASTNUM        23
+
+#define LATWORD        1
+#define CYRWORD        2
+#define UWORD      3
+#define EMAIL      4
+#define FURL       5
+#define HOST       6
+#define SCIENTIFIC 7
+#define VERSIONNUMBER  8
+#define PARTHYPHENWORD 9
+#define CYRPARTHYPHENWORD  10
+#define LATPARTHYPHENWORD  11
+#define SPACE      12
+#define TAG            13
+#define HTTP       14
+#define HYPHENWORD 15
+#define LATHYPHENWORD  16
+#define CYRHYPHENWORD  17
+#define URI        18
+#define FILEPATH   19
+#define DECIMAL        20
+#define SIGNEDINT  21
+#define UNSIGNEDINT 22
+#define HTMLENTITY 23
+
+extern const char *lex_descr[];
+extern const char *tok_alias[];
+
+#endif


diff --git a/contrib/tsearch2/wordparser/parser.h b/contrib/tsearch2/wordparser/parser.h

new file mode 100644 (file)

index 0000000..55cf005


--- /dev/null
+++ b/contrib/tsearch2/wordparser/parser.h
@@ -0,0 +1,11 @@
+#ifndef __PARSER_H__
+#define __PARSER_H__
+
+char      *token;
+int            tokenlen;
+int            tsearch2_yylex(void);
+void       start_parse_str(char *, int);
+void       start_parse_fh(FILE *, int);
+void       end_parse(void);
+
+#endif


diff --git a/contrib/tsearch2/wordparser/parser.l b/contrib/tsearch2/wordparser/parser.l

new file mode 100644 (file)

index 0000000..49824f5


--- /dev/null
+++ b/contrib/tsearch2/wordparser/parser.l
@@ -0,0 +1,346 @@
+%{
+#include "postgres.h"
+
+#include "deflex.h"
+#include "parser.h"
+#include "common.h"
+
+/* Avoid exit() on fatal scanner errors */
+#define fprintf(file, fmt, msg)  ts_error(ERROR, fmt, msg)
+
+/* postgres allocation function */
+#define free    pfree
+#define malloc  palloc
+#define realloc repalloc
+
+#ifdef strdup
+#undef strdup
+#endif
+#define strdup  pstrdup
+
+char *token = NULL;  /* pointer to token */
+char *s     = NULL;  /* to return WHOLE hyphenated-word */
+
+YY_BUFFER_STATE buf = NULL; /* buffer to parse; it need for parse from string */
+
+int lrlimit = -1;  /* for limiting read from filehandle ( -1 - unlimited read ) */
+int bytestoread = 0;   /* for limiting read from filehandle */
+
+/* redefine macro for read limited length */
+#define YY_INPUT(buf,result,max_size) \
+   if ( yy_current_buffer->yy_is_interactive ) { \
+                int c = '*', n; \
+                for ( n = 0; n < max_size && \
+                             (c = getc( tsearch2_yyin )) != EOF && c != '\n'; ++n ) \
+                        buf[n] = (char) c; \
+                if ( c == '\n' ) \
+                        buf[n++] = (char) c; \
+                if ( c == EOF && ferror( tsearch2_yyin ) ) \
+                        YY_FATAL_ERROR( "input in flex scanner failed" ); \
+                result = n; \
+        }  else { \
+       if ( lrlimit == 0 ) \
+           result=YY_NULL; \
+       else { \
+           if ( lrlimit>0 ) { \
+               bytestoread = ( lrlimit > max_size ) ? max_size : lrlimit; \
+               lrlimit -= bytestoread; \
+           } else \
+               bytestoread = max_size; \
+               if ( ((result = fread( buf, 1, bytestoread, tsearch2_yyin )) == 0) \
+                       && ferror( tsearch2_yyin ) ) \
+                       YY_FATAL_ERROR( "input in flex scanner failed" ); \
+       } \
+   }
+
+%}
+
+%option 8bit
+%option never-interactive
+%option nounput
+%option noyywrap
+
+/* parser's state for parsing hyphenated-word */
+%x DELIM  
+/* parser's state for parsing URL*/
+%x URL  
+%x SERVER  
+
+/* parser's state for parsing TAGS */
+%x INTAG
+%x QINTAG
+%x INCOMMENT
+%x INSCRIPT
+
+/* cyrillic koi8 char */
+CYRALNUM   [0-9\200-\377]
+CYRALPHA   [\200-\377]
+ALPHA      [a-zA-Z\200-\377]
+ALNUM      [0-9a-zA-Z\200-\377]
+
+
+HOSTNAME   ([-_[:alnum:]]+\.)+[[:alpha:]]+
+URI        [-_[:alnum:]/%,\.;=&?#]+
+
+%%
+
+"<"[Ss][Cc][Rr][Ii][Pp][Tt] { BEGIN INSCRIPT; }
+
+"" {
+   BEGIN INITIAL; 
+   *tsearch2_yytext=' '; *(tsearch2_yytext+1) = '\0'; 
+   token = tsearch2_yytext;
+   tokenlen = tsearch2_yyleng;
+   return SPACE;
+}
+
+""   { 
+   BEGIN INITIAL;
+   *tsearch2_yytext=' '; *(tsearch2_yytext+1) = '\0'; 
+   token = tsearch2_yytext;
+   tokenlen = tsearch2_yyleng;
+   return SPACE;
+}
+
+
+"<"[\![:alpha:]]   { BEGIN INTAG; }
+
+"
+
+"\""    { BEGIN QINTAG; }
+
+"\\\"" ;
+
+"\""   { BEGIN INTAG; }
+
+">" { 
+   BEGIN INITIAL;
+   token = tsearch2_yytext;
+   *tsearch2_yytext=' '; 
+   token = tsearch2_yytext;
+   tokenlen = 1;
+   return TAG;
+}
+
+.|\n  ;
+
+\&(quot|amp|nbsp|lt|gt)\;   {
+   token = tsearch2_yytext;
+   tokenlen = tsearch2_yyleng;
+   return HTMLENTITY;
+}
+
+\&\#[0-9][0-9]?[0-9]?\; {
+   token = tsearch2_yytext;
+   tokenlen = tsearch2_yyleng;
+   return HTMLENTITY;
+}
+ 
+[-_\.[:alnum:]]+@{HOSTNAME}  /* Emails */ { 
+   token = tsearch2_yytext; 
+   tokenlen = tsearch2_yyleng;
+   return EMAIL; 
+}
+
+[+-]?[0-9]+(\.[0-9]+)?[eEdD][+-]?[0-9]+  /* float */   { 
+   token = tsearch2_yytext; 
+   tokenlen = tsearch2_yyleng;
+   return SCIENTIFIC; 
+}
+
+[0-9]+\.[0-9]+\.[0-9\.]*[0-9] {
+   token = tsearch2_yytext;
+   tokenlen = tsearch2_yyleng;
+   return VERSIONNUMBER;
+}
+
+[+-]?[0-9]+\.[0-9]+ {
+   token = tsearch2_yytext;
+   tokenlen = tsearch2_yyleng;
+   return DECIMAL;
+}
+
+[+-][0-9]+ { 
+   token = tsearch2_yytext; 
+   tokenlen = tsearch2_yyleng;
+   return SIGNEDINT; 
+}
+
+[0-9]+ { 
+   token = tsearch2_yytext; 
+   tokenlen = tsearch2_yyleng;
+   return UNSIGNEDINT; 
+}
+
+http"://"        { 
+   BEGIN URL; 
+   token = tsearch2_yytext;
+   tokenlen = tsearch2_yyleng;
+   return HTTP;
+}
+
+ftp"://"        { 
+   BEGIN URL; 
+   token = tsearch2_yytext;
+   tokenlen = tsearch2_yyleng;
+   return HTTP;
+}
+
+{HOSTNAME}[/:]{URI} { 
+   BEGIN SERVER;
+   if (s) { free(s); s=NULL; } 
+   s = strdup( tsearch2_yytext ); 
+   tokenlen = tsearch2_yyleng;
+   yyless( 0 ); 
+   token = s;
+   return FURL;
+}
+
+{HOSTNAME} {
+   token = tsearch2_yytext; 
+   tokenlen = tsearch2_yyleng;
+   return HOST;
+}
+
+[/:]{URI}  {
+   token = tsearch2_yytext;
+   tokenlen = tsearch2_yyleng;
+   return URI;
+}
+
+[[:alnum:]\./_-]+"/"[[:alnum:]\./_-]+ {
+   token = tsearch2_yytext;
+   tokenlen = tsearch2_yyleng;
+   return FILEPATH;
+}
+
+({CYRALPHA}+-)+{CYRALPHA}+ /* composite-word */    {
+   BEGIN DELIM;
+   if (s) { free(s); s=NULL; } 
+   s = strdup( tsearch2_yytext );
+   tokenlen = tsearch2_yyleng;
+   yyless( 0 );
+   token = s;
+   return CYRHYPHENWORD;
+}
+
+([[:alpha:]]+-)+[[:alpha:]]+ /* composite-word */  {
+    BEGIN DELIM;
+   if (s) { free(s); s=NULL; } 
+   s = strdup( tsearch2_yytext );
+   tokenlen = tsearch2_yyleng;
+   yyless( 0 );
+   token = s;
+   return LATHYPHENWORD;
+}
+
+({ALNUM}+-)+{ALNUM}+ /* composite-word */  {
+   BEGIN DELIM;
+   if (s) { free(s); s=NULL; } 
+   s = strdup( tsearch2_yytext );
+   tokenlen = tsearch2_yyleng;
+   yyless( 0 );
+   token = s;
+   return HYPHENWORD;
+}
+
+[0-9]+\.[0-9]+\.[0-9\.]*[0-9] {
+   token = tsearch2_yytext;
+   tokenlen = tsearch2_yyleng;
+   return VERSIONNUMBER;
+}
+
+\+?[0-9]+\.[0-9]+ {
+   token = tsearch2_yytext;
+   tokenlen = tsearch2_yyleng;
+   return DECIMAL;
+}
+
+{CYRALPHA}+  /* one word in composite-word */   { 
+   token = tsearch2_yytext; 
+   tokenlen = tsearch2_yyleng;
+   return CYRPARTHYPHENWORD; 
+}
+
+[[:alpha:]]+  /* one word in composite-word */  { 
+   token = tsearch2_yytext; 
+   tokenlen = tsearch2_yyleng;
+   return LATPARTHYPHENWORD; 
+}
+
+{ALNUM}+  /* one word in composite-word */  { 
+   token = tsearch2_yytext; 
+   tokenlen = tsearch2_yyleng;
+   return PARTHYPHENWORD; 
+}
+
+-  { 
+   token = tsearch2_yytext;
+   tokenlen = tsearch2_yyleng;
+   return SPACE;
+}
+
+.|\n /* return in basic state */ {
+   BEGIN INITIAL;
+   yyless( 0 );
+}
+
+{CYRALPHA}+ /* normal word */  { 
+   token = tsearch2_yytext; 
+   tokenlen = tsearch2_yyleng;
+   return CYRWORD; 
+}
+
+[[:alpha:]]+ /* normal word */ { 
+   token = tsearch2_yytext; 
+   tokenlen = tsearch2_yyleng;
+   return LATWORD; 
+}
+
+{ALNUM}+ /* normal word */ { 
+   token = tsearch2_yytext; 
+   tokenlen = tsearch2_yyleng;
+   return UWORD; 
+}
+
+[ \r\n\t]+ {
+   token = tsearch2_yytext;
+   tokenlen = tsearch2_yyleng;
+   return SPACE;
+}
+
+. {
+   token = tsearch2_yytext;
+   tokenlen = tsearch2_yyleng;
+   return SPACE;
+} 
+
+%%
+
+/* clearing after parsing from string */
+void end_parse() {
+   if (s) { free(s); s=NULL; } 
+   tsearch2_yy_delete_buffer( buf );
+   buf = NULL;
+} 
+
+/* start parse from string */
+void start_parse_str(char* str, int limit) {
+   if (buf) end_parse();
+   buf = tsearch2_yy_scan_bytes( str, limit );
+   tsearch2_yy_switch_to_buffer( buf );
+   BEGIN INITIAL;
+}
+
+/* start parse from filehandle */
+void start_parse_fh( FILE* fh, int limit ) {
+   if (buf) end_parse();
+   lrlimit = ( limit ) ? limit : -1;
+   buf = tsearch2_yy_create_buffer( fh, YY_BUF_SIZE );
+   tsearch2_yy_switch_to_buffer( buf );
+   BEGIN INITIAL;
+}
+
+


diff --git a/contrib/tsearch2/wparser.c b/contrib/tsearch2/wparser.c

new file mode 100644 (file)

index 0000000..deff94c


--- /dev/null
+++ b/contrib/tsearch2/wparser.c
@@ -0,0 +1,529 @@
+/* 
+ * interface functions to parser 
+ * Teodor Sigaev 
+ */
+#include 
+#include 
+#include 
+#include 
+
+#include "postgres.h"
+#include "fmgr.h"
+#include "utils/array.h"
+#include "catalog/pg_type.h"
+#include "executor/spi.h"
+#include "funcapi.h"
+
+#include "wparser.h"
+#include "ts_cfg.h"
+#include "snmap.h"
+#include "common.h"
+
+/*********top interface**********/
+
+static void *plan_getparser=NULL;
+static Oid current_parser_id=InvalidOid;
+
+void
+init_prs(Oid id, WParserInfo *prs) {
+   Oid arg[1]={ OIDOID };
+   bool isnull;
+   Datum pars[1]={ ObjectIdGetDatum(id) };
+   int stat;
+
+   memset(prs,0,sizeof(WParserInfo));
+   SPI_connect();
+   if ( !plan_getparser ) {
+       plan_getparser = SPI_saveplan( SPI_prepare( "select prs_start, prs_nexttoken, prs_end, prs_lextype, prs_headline from pg_ts_parser where oid = $1" , 1, arg ) );
+       if ( !plan_getparser ) 
+           ts_error(ERROR, "SPI_prepare() failed");
+   }
+
+   stat = SPI_execp(plan_getparser, pars, " ", 1);
+   if ( stat < 0 )
+       ts_error (ERROR, "SPI_execp return %d", stat);
+   if ( SPI_processed > 0 ) {
+       Oid oid=InvalidOid;
+       oid=DatumGetObjectId( SPI_getbinval(SPI_tuptable->vals[0], SPI_tuptable->tupdesc, 1, &isnull) );
+       fmgr_info_cxt(oid, &(prs->start_info), TopMemoryContext);
+       oid=DatumGetObjectId( SPI_getbinval(SPI_tuptable->vals[0], SPI_tuptable->tupdesc, 2, &isnull) );
+       fmgr_info_cxt(oid, &(prs->getlexeme_info), TopMemoryContext);
+       oid=DatumGetObjectId( SPI_getbinval(SPI_tuptable->vals[0], SPI_tuptable->tupdesc, 3, &isnull) );
+       fmgr_info_cxt(oid, &(prs->end_info), TopMemoryContext);
+       prs->lextype=DatumGetObjectId( SPI_getbinval(SPI_tuptable->vals[0], SPI_tuptable->tupdesc, 4, &isnull) );
+       oid=DatumGetObjectId( SPI_getbinval(SPI_tuptable->vals[0], SPI_tuptable->tupdesc, 5, &isnull) );
+       fmgr_info_cxt(oid, &(prs->headline_info), TopMemoryContext);
+       prs->prs_id=id;
+   } else 
+       ts_error(ERROR, "No parser with id %d", id);
+   SPI_finish();
+}
+
+typedef struct {
+   WParserInfo *last_prs;
+   int     len;
+   int     reallen;
+   WParserInfo *list;
+   SNMap       name2id_map;
+} PrsList;
+
+static PrsList PList = {NULL,0,0,NULL,{0,0,NULL}};
+
+void    
+reset_prs(void) {
+   freeSNMap( &(PList.name2id_map) );
+   if ( PList.list )
+       free(PList.list);
+   memset(&PList,0,sizeof(PrsList));
+}
+
+static int
+compareprs(const void *a, const void *b) {
+   return ((WParserInfo*)a)->prs_id - ((WParserInfo*)b)->prs_id;
+}
+
+WParserInfo *
+findprs(Oid id) {
+   /* last used prs */
+   if ( PList.last_prs && PList.last_prs->prs_id==id )
+       return PList.last_prs;
+
+   /* already used prs */
+   if ( PList.len != 0 ) {
+       WParserInfo key;
+       key.prs_id=id;
+       PList.last_prs = bsearch(&key, PList.list, PList.len, sizeof(WParserInfo), compareprs);
+       if ( PList.last_prs != NULL )
+           return PList.last_prs;
+   }
+
+   /* last chance */
+   if ( PList.len==PList.reallen ) {
+       WParserInfo *tmp;
+       int reallen = ( PList.reallen ) ? 2*PList.reallen : 16;
+       tmp=(WParserInfo*)realloc(PList.list,sizeof(WParserInfo)*reallen);
+       if ( !tmp ) 
+           ts_error(ERROR,"No memory");
+       PList.reallen=reallen;
+       PList.list=tmp;
+   }
+   PList.last_prs=&(PList.list[PList.len]);
+   init_prs(id, PList.last_prs);
+   PList.len++;
+   qsort(PList.list, PList.len, sizeof(WParserInfo), compareprs);
+   return findprs(id); /* qsort changed order!! */;
+}
+
+static void *plan_name2id=NULL;
+
+Oid
+name2id_prs(text *name) {
+   Oid arg[1]={ TEXTOID };
+   bool isnull;
+   Datum pars[1]={ PointerGetDatum(name) };
+   int stat;
+   Oid id=findSNMap_t( &(PList.name2id_map), name );
+   
+   if ( id ) 
+       return id;
+   
+
+   SPI_connect();
+   if ( !plan_name2id ) {
+       plan_name2id = SPI_saveplan( SPI_prepare( "select oid from pg_ts_parser where prs_name = $1" , 1, arg ) );
+       if ( !plan_name2id ) 
+           ts_error(ERROR, "SPI_prepare() failed");
+   }
+
+   stat = SPI_execp(plan_name2id, pars, " ", 1);
+   if ( stat < 0 )
+       ts_error (ERROR, "SPI_execp return %d", stat);
+   if ( SPI_processed > 0 )
+       id=DatumGetObjectId( SPI_getbinval(SPI_tuptable->vals[0], SPI_tuptable->tupdesc, 1, &isnull) );
+   else 
+       ts_error(ERROR, "No parser '%s'", text2char(name));
+   SPI_finish();
+   addSNMap_t( &(PList.name2id_map), name, id );
+   return id;
+}
+
+
+/******sql-level interface******/
+typedef struct {
+   int     cur;
+   LexDescr    *list;
+} TypeStorage;
+
+static void
+setup_firstcall(FuncCallContext  *funcctx, Oid prsid) {
+   TupleDesc            tupdesc;
+   MemoryContext     oldcontext;
+   TypeStorage     *st;
+   WParserInfo *prs = findprs(prsid); 
+
+   oldcontext = MemoryContextSwitchTo(funcctx->multi_call_memory_ctx);
+
+   st=(TypeStorage*)palloc( sizeof(TypeStorage) );
+   st->cur=0;
+   st->list = (LexDescr*)DatumGetPointer(
+       OidFunctionCall1( prs->lextype, PointerGetDatum(prs->prs) )
+   );
+   funcctx->user_fctx = (void*)st;
+   tupdesc = RelationNameGetTupleDesc("tokentype");
+   funcctx->slot = TupleDescGetSlot(tupdesc);
+   funcctx->attinmeta = TupleDescGetAttInMetadata(tupdesc);
+   MemoryContextSwitchTo(oldcontext);
+}
+
+static Datum
+process_call(FuncCallContext  *funcctx) {
+   TypeStorage     *st;
+
+   st=(TypeStorage*)funcctx->user_fctx;
+   if (  st->list && st->list[st->cur].lexid ) {
+       Datum result;
+       char* values[3];
+       char    txtid[16];
+       HeapTuple    tuple;
+
+       values[0]=txtid;
+       sprintf(txtid,"%d",st->list[st->cur].lexid);
+       values[1]=st->list[st->cur].alias;
+       values[2]=st->list[st->cur].descr;
+
+       tuple = BuildTupleFromCStrings(funcctx->attinmeta, values);
+       result = TupleGetDatum(funcctx->slot, tuple);
+
+       pfree(values[1]);
+       pfree(values[2]);
+       st->cur++;
+       return result;
+   } else {
+       if ( st->list ) pfree(st->list);
+       pfree(st);
+   }
+   return (Datum)0;
+}
+
+PG_FUNCTION_INFO_V1(token_type);
+Datum token_type(PG_FUNCTION_ARGS);
+
+Datum
+token_type(PG_FUNCTION_ARGS) {
+   FuncCallContext  *funcctx;
+   Datum result;
+
+   if (SRF_IS_FIRSTCALL()) { 
+       funcctx = SRF_FIRSTCALL_INIT();
+       setup_firstcall(funcctx, PG_GETARG_OID(0) );
+   }
+
+   funcctx = SRF_PERCALL_SETUP();
+
+   if (  (result=process_call(funcctx)) != (Datum)0 )
+       SRF_RETURN_NEXT(funcctx, result);
+   SRF_RETURN_DONE(funcctx);
+}
+
+PG_FUNCTION_INFO_V1(token_type_byname);
+Datum token_type_byname(PG_FUNCTION_ARGS);
+Datum
+token_type_byname(PG_FUNCTION_ARGS) {
+   FuncCallContext  *funcctx;
+   Datum result;
+
+   if (SRF_IS_FIRSTCALL()) {
+       text *name = PG_GETARG_TEXT_P(0); 
+       funcctx = SRF_FIRSTCALL_INIT();
+       setup_firstcall(funcctx, name2id_prs( name ) );
+       PG_FREE_IF_COPY(name,0);
+   }
+
+   funcctx = SRF_PERCALL_SETUP();
+
+   if (  (result=process_call(funcctx)) != (Datum)0 )
+       SRF_RETURN_NEXT(funcctx, result);
+   SRF_RETURN_DONE(funcctx);
+}
+
+PG_FUNCTION_INFO_V1(token_type_current);
+Datum token_type_current(PG_FUNCTION_ARGS);
+Datum
+token_type_current(PG_FUNCTION_ARGS) {
+   FuncCallContext  *funcctx;
+   Datum result;
+
+   if (SRF_IS_FIRSTCALL()) {
+       funcctx = SRF_FIRSTCALL_INIT();
+       if ( current_parser_id==InvalidOid ) 
+           current_parser_id = name2id_prs( char2text("default") );
+       setup_firstcall(funcctx, current_parser_id );
+   }
+
+   funcctx = SRF_PERCALL_SETUP();
+
+   if (  (result=process_call(funcctx)) != (Datum)0 )
+       SRF_RETURN_NEXT(funcctx, result);
+   SRF_RETURN_DONE(funcctx);
+}
+
+
+PG_FUNCTION_INFO_V1(set_curprs);
+Datum set_curprs(PG_FUNCTION_ARGS);
+Datum
+set_curprs(PG_FUNCTION_ARGS) {
+        findprs(PG_GETARG_OID(0));
+        current_parser_id=PG_GETARG_OID(0);
+        PG_RETURN_VOID();
+}
+
+PG_FUNCTION_INFO_V1(set_curprs_byname);
+Datum set_curprs_byname(PG_FUNCTION_ARGS);
+Datum
+set_curprs_byname(PG_FUNCTION_ARGS) {
+        text *name=PG_GETARG_TEXT_P(0);
+    
+        DirectFunctionCall1(
+                set_curprs,
+                ObjectIdGetDatum( name2id_prs(name) )
+        );
+        PG_FREE_IF_COPY(name, 0);
+        PG_RETURN_VOID();
+}
+
+typedef struct {
+   int type;
+   char    *lexem;
+} LexemEntry;
+
+typedef struct {
+   int cur;
+   int len;
+   LexemEntry  *list;
+} PrsStorage;
+   
+
+static void
+prs_setup_firstcall(FuncCallContext  *funcctx, int prsid, text *txt) {
+   TupleDesc            tupdesc;
+   MemoryContext     oldcontext;
+   PrsStorage  *st;
+   WParserInfo *prs = findprs(prsid); 
+   char    *lex=NULL;
+   int     llen=0, type=0; 
+
+   oldcontext = MemoryContextSwitchTo(funcctx->multi_call_memory_ctx);
+
+   st=(PrsStorage*)palloc( sizeof(PrsStorage) );
+   st->cur=0;
+   st->len=16;
+   st->list=(LexemEntry*)palloc( sizeof(LexemEntry)*st->len );
+
+   prs->prs = (void*)DatumGetPointer(
+       FunctionCall2(
+           &(prs->start_info),
+           PointerGetDatum(VARDATA(txt)),
+           Int32GetDatum(VARSIZE(txt)-VARHDRSZ)
+       )
+   );
+
+   while( ( type=DatumGetInt32(FunctionCall3(
+           &(prs->getlexeme_info),
+           PointerGetDatum(prs->prs),
+           PointerGetDatum(&lex),
+           PointerGetDatum(&llen))) ) != 0 ) {
+
+       if ( st->cur>=st->len ) {
+           st->len=2*st->len;
+           st->list=(LexemEntry*)repalloc(st->list, sizeof(LexemEntry)*st->len);
+       }
+       st->list[st->cur].lexem = palloc(llen+1);
+       memcpy( st->list[st->cur].lexem, lex, llen);
+       st->list[st->cur].lexem[llen]='\0';
+       st->list[st->cur].type=type;
+       st->cur++;
+   }
+       
+   FunctionCall1(
+       &(prs->end_info),
+       PointerGetDatum(prs->prs)
+   );
+
+   st->len=st->cur;
+   st->cur=0;
+   
+   funcctx->user_fctx = (void*)st;
+   tupdesc = RelationNameGetTupleDesc("tokenout");
+   funcctx->slot = TupleDescGetSlot(tupdesc);
+   funcctx->attinmeta = TupleDescGetAttInMetadata(tupdesc);
+   MemoryContextSwitchTo(oldcontext);
+}
+
+static Datum
+prs_process_call(FuncCallContext  *funcctx) {
+   PrsStorage  *st;
+
+   st=(PrsStorage*)funcctx->user_fctx;
+   if (  st->cur < st->len ) {
+       Datum result;
+       char* values[2];
+       char    tid[16];
+       HeapTuple    tuple;
+
+       values[0]=tid;
+       sprintf(tid,"%d",st->list[st->cur].type);
+       values[1]=st->list[st->cur].lexem;
+       tuple = BuildTupleFromCStrings(funcctx->attinmeta, values);
+       result = TupleGetDatum(funcctx->slot, tuple);
+
+       pfree(values[1]);
+       st->cur++;
+       return result;
+   } else {
+       if ( st->list ) pfree(st->list);
+       pfree(st);
+   }
+   return (Datum)0;
+}
+
+           
+
+PG_FUNCTION_INFO_V1(parse);
+Datum parse(PG_FUNCTION_ARGS);
+Datum
+parse(PG_FUNCTION_ARGS) {
+   FuncCallContext  *funcctx;
+   Datum result;
+
+   if (SRF_IS_FIRSTCALL()) {
+       text *txt = PG_GETARG_TEXT_P(1); 
+       funcctx = SRF_FIRSTCALL_INIT();
+       prs_setup_firstcall(funcctx, PG_GETARG_OID(0),txt );
+       PG_FREE_IF_COPY(txt,1);
+   }
+
+   funcctx = SRF_PERCALL_SETUP();
+
+   if (  (result=prs_process_call(funcctx)) != (Datum)0 )
+       SRF_RETURN_NEXT(funcctx, result);
+   SRF_RETURN_DONE(funcctx);
+}
+
+PG_FUNCTION_INFO_V1(parse_byname);
+Datum parse_byname(PG_FUNCTION_ARGS);
+Datum
+parse_byname(PG_FUNCTION_ARGS) {
+   FuncCallContext  *funcctx;
+   Datum result;
+
+   if (SRF_IS_FIRSTCALL()) {
+       text *name = PG_GETARG_TEXT_P(0); 
+       text *txt = PG_GETARG_TEXT_P(1); 
+       funcctx = SRF_FIRSTCALL_INIT();
+       prs_setup_firstcall(funcctx, name2id_prs( name ),txt );
+       PG_FREE_IF_COPY(name,0);
+       PG_FREE_IF_COPY(txt,1);
+   }
+
+   funcctx = SRF_PERCALL_SETUP();
+
+   if (  (result=prs_process_call(funcctx)) != (Datum)0 )
+       SRF_RETURN_NEXT(funcctx, result);
+   SRF_RETURN_DONE(funcctx);
+}
+
+
+PG_FUNCTION_INFO_V1(parse_current);
+Datum parse_current(PG_FUNCTION_ARGS);
+Datum
+parse_current(PG_FUNCTION_ARGS) {
+   FuncCallContext  *funcctx;
+   Datum result;
+
+   if (SRF_IS_FIRSTCALL()) {
+       text *txt = PG_GETARG_TEXT_P(0); 
+       funcctx = SRF_FIRSTCALL_INIT();
+       if ( current_parser_id==InvalidOid ) 
+           current_parser_id = name2id_prs( char2text("default") );
+       prs_setup_firstcall(funcctx, current_parser_id,txt );
+       PG_FREE_IF_COPY(txt,0);
+   }
+
+   funcctx = SRF_PERCALL_SETUP();
+
+   if (  (result=prs_process_call(funcctx)) != (Datum)0 )
+       SRF_RETURN_NEXT(funcctx, result);
+   SRF_RETURN_DONE(funcctx);
+}
+
+PG_FUNCTION_INFO_V1(headline);
+Datum headline(PG_FUNCTION_ARGS);
+Datum
+headline(PG_FUNCTION_ARGS) {
+   TSCfgInfo *cfg=findcfg(PG_GETARG_OID(0));
+   text       *in = PG_GETARG_TEXT_P(1);
+   QUERYTYPE  *query = (QUERYTYPE *) DatumGetPointer(PG_DETOAST_DATUM(PG_GETARG_DATUM(2)));
+   text       *opt=( PG_NARGS()>3 && PG_GETARG_POINTER(3) ) ? PG_GETARG_TEXT_P(3) : NULL;
+   HLPRSTEXT   prs;
+   text *out;
+   WParserInfo *prsobj = findprs(cfg->prs_id);
+
+   memset(&prs,0,sizeof(HLPRSTEXT));
+   prs.lenwords = 32;
+   prs.words = (HLWORD *) palloc(sizeof(HLWORD) * prs.lenwords);
+   hlparsetext(cfg, &prs, query, VARDATA(in), VARSIZE(in) - VARHDRSZ);
+
+
+   FunctionCall3(
+       &(prsobj->headline_info),
+       PointerGetDatum(&prs),
+       PointerGetDatum(opt),
+       PointerGetDatum(query)
+   );
+
+   out = genhl(&prs);
+
+   PG_FREE_IF_COPY(in,1);
+   PG_FREE_IF_COPY(query,2);
+   if ( opt ) PG_FREE_IF_COPY(opt,3);
+   pfree(prs.words);
+   pfree(prs.startsel);
+   pfree(prs.stopsel);
+
+   PG_RETURN_POINTER(out);
+}
+
+
+PG_FUNCTION_INFO_V1(headline_byname);
+Datum headline_byname(PG_FUNCTION_ARGS);
+Datum
+headline_byname(PG_FUNCTION_ARGS) {
+   text *cfg=PG_GETARG_TEXT_P(0);
+
+   Datum out=DirectFunctionCall4(
+       headline,
+       ObjectIdGetDatum(name2id_cfg( cfg ) ),
+       PG_GETARG_DATUM(1),
+       PG_GETARG_DATUM(2),
+       ( PG_NARGS()>3 ) ? PG_GETARG_DATUM(3) : PointerGetDatum(NULL)
+   );
+
+   PG_FREE_IF_COPY(cfg,0);
+   PG_RETURN_DATUM(out);   
+}
+
+PG_FUNCTION_INFO_V1(headline_current);
+Datum headline_current(PG_FUNCTION_ARGS);
+Datum
+headline_current(PG_FUNCTION_ARGS) {
+   PG_RETURN_DATUM(DirectFunctionCall4(
+       headline,
+       ObjectIdGetDatum(get_currcfg()),
+       PG_GETARG_DATUM(0),
+       PG_GETARG_DATUM(1),
+       ( PG_NARGS()>2 ) ? PG_GETARG_DATUM(2) : PointerGetDatum(NULL)
+   ));
+}
+
+
+


diff --git a/contrib/tsearch2/wparser.h b/contrib/tsearch2/wparser.h

new file mode 100644 (file)

index 0000000..a8afc56


--- /dev/null
+++ b/contrib/tsearch2/wparser.h
@@ -0,0 +1,28 @@
+#ifndef __WPARSER_H__
+#define __WPARSER_H__
+#include "postgres.h"
+#include "fmgr.h"
+
+typedef struct {
+   Oid prs_id;
+   FmgrInfo start_info;
+   FmgrInfo getlexeme_info;
+   FmgrInfo end_info;
+   FmgrInfo headline_info;
+   Oid lextype;
+   void *prs;
+} WParserInfo;
+
+void init_prs(Oid id, WParserInfo *prs);
+WParserInfo* findprs(Oid id);
+Oid name2id_prs(text *name);
+void   reset_prs(void);
+
+
+typedef struct {
+   int lexid;
+   char    *alias;
+   char    *descr;
+} LexDescr;
+
+#endif


diff --git a/contrib/tsearch2/wparser_def.c b/contrib/tsearch2/wparser_def.c

new file mode 100644 (file)

index 0000000..eec8b03


--- /dev/null
+++ b/contrib/tsearch2/wparser_def.c
@@ -0,0 +1,291 @@
+/* 
+ * default word parser 
+ * Teodor Sigaev 
+ */
+#include 
+#include 
+#include 
+
+#include "postgres.h"
+#include "utils/builtins.h"
+
+#include "dict.h"
+#include "wparser.h"
+#include "common.h"
+#include "ts_cfg.h"
+#include "wordparser/parser.h"
+#include "wordparser/deflex.h"
+
+PG_FUNCTION_INFO_V1(prsd_lextype);
+Datum prsd_lextype(PG_FUNCTION_ARGS);
+
+Datum 
+prsd_lextype(PG_FUNCTION_ARGS) {
+   LexDescr *descr=(LexDescr*)palloc(sizeof(LexDescr)*(LASTNUM+1));
+   int i;
+
+   for(i=1;i<=LASTNUM;i++) {
+       descr[i-1].lexid = i;
+       descr[i-1].alias = pstrdup(tok_alias[i]);
+       descr[i-1].descr = pstrdup(lex_descr[i]);
+   }
+   
+   descr[LASTNUM].lexid=0;
+       
+   PG_RETURN_POINTER(descr);
+}
+
+PG_FUNCTION_INFO_V1(prsd_start);
+Datum prsd_start(PG_FUNCTION_ARGS);
+Datum 
+prsd_start(PG_FUNCTION_ARGS) {
+   start_parse_str( (char*)PG_GETARG_POINTER(0), PG_GETARG_INT32(1) );
+   PG_RETURN_POINTER(NULL);
+}
+
+PG_FUNCTION_INFO_V1(prsd_getlexeme);
+Datum prsd_getlexeme(PG_FUNCTION_ARGS);
+Datum 
+prsd_getlexeme(PG_FUNCTION_ARGS) {
+   /* ParserState *p=(ParserState*)PG_GETARG_POINTER(0); */
+   char **t=(char**)PG_GETARG_POINTER(1); 
+   int *tlen=(int*)PG_GETARG_POINTER(2);
+   int  type=tsearch2_yylex();
+
+   *t = token;
+   *tlen = tokenlen;
+   PG_RETURN_INT32(type);
+}
+
+PG_FUNCTION_INFO_V1(prsd_end);
+Datum prsd_end(PG_FUNCTION_ARGS);
+Datum 
+prsd_end(PG_FUNCTION_ARGS) {
+   /* ParserState *p=(ParserState*)PG_GETARG_POINTER(0); */
+   end_parse();
+   PG_RETURN_VOID();
+}
+
+#define LEAVETOKEN(x)  ( (x)==12 )
+#define COMPLEXTOKEN(x)    ( (x)==5 || (x)==15 || (x)==16 || (x)==17 )
+#define ENDPUNCTOKEN(x)    ( (x)==12 )
+
+
+#define IDIGNORE(x) ( (x)==13 || (x)==14 || (x)==12 || (x)==23 )
+#define HLIDIGNORE(x) ( (x)==5 || (x)==13 || (x)==15 || (x)==16 || (x)==17 )
+#define NONWORDTOKEN(x)    ( (x)==12 || HLIDIGNORE(x) )
+#define NOENDTOKEN(x)  ( NONWORDTOKEN(x) || (x)==7 || (x)==8 || (x)==20 || (x)==21 || (x)==22 || IDIGNORE(x) )
+
+typedef struct {
+   HLWORD  *words;
+   int len;
+} hlCheck;
+
+static bool
+checkcondition_HL(void *checkval, ITEM *val) {
+   int i;
+   for(i=0;i<((hlCheck*)checkval)->len;i++) {
+       if ( ((hlCheck*)checkval)->words[i].item==val )
+           return true;
+   }
+   return false;
+}
+
+
+static bool
+hlCover(HLPRSTEXT *prs, QUERYTYPE *query, int *p, int *q) {
+   int i,j;
+   ITEM    *item=GETQUERY(query);
+   int pos=*p;
+   *q=0;
+   *p=0x7fffffff;
+
+   for(j=0;jsize;j++) {
+       if ( item->type != VAL ) {
+           item++;
+           continue;
+       }
+       for(i=pos;icurwords;i++) {
+           if ( prs->words[i].item == item ) {
+               if ( i>*q) 
+                   *q = i;
+               break;
+           }
+       }
+       item++;
+   }
+
+   if ( *q==0 )
+       return false;
+
+   item=GETQUERY(query);
+   for(j=0;jsize;j++) {
+       if ( item->type != VAL ) {
+           item++;
+           continue;
+       }
+       for(i=*q;i>=pos;i--) {
+           if ( prs->words[i].item == item ) {
+               if ( i<*p )
+                   *p=i;
+               break;
+           }
+       }
+       item++;
+   }   
+
+   if ( *p<=*q ) {
+       hlCheck ch={ &(prs->words[*p]), *q-*p+1 };
+       if ( TS_execute(GETQUERY(query), &ch, false, checkcondition_HL) ) { 
+           return true;
+       } else {
+           (*p)++;
+           return hlCover(prs,query,p,q);
+       }
+   }
+
+   return false;
+}
+
+PG_FUNCTION_INFO_V1(prsd_headline);
+Datum prsd_headline(PG_FUNCTION_ARGS);
+Datum 
+prsd_headline(PG_FUNCTION_ARGS) {
+   HLPRSTEXT   *prs=(HLPRSTEXT*)PG_GETARG_POINTER(0);
+   text    *opt=(text*)PG_GETARG_POINTER(1); /* can't be toasted */
+   QUERYTYPE   *query=(QUERYTYPE*)PG_GETARG_POINTER(2); /* can't be toasted */
+   /* from opt + start and and tag */
+   int min_words=15;   
+   int max_words=35;   
+   int shortword=3;    
+
+   int p=0,q=0;
+   int bestb=-1,beste=-1;
+   int bestlen=-1;
+   int pose=0, poslen, curlen;
+
+   int i;
+
+   /*config*/
+   prs->startsel=NULL;
+   prs->stopsel=NULL;
+   if ( opt ) {
+       Map *map,*mptr;
+       
+       parse_cfgdict(opt,&map);
+       mptr=map;
+
+       while(mptr && mptr->key) {
+           if ( strcasecmp(mptr->key,"MaxWords")==0 )
+               max_words=pg_atoi(mptr->value,4,1);
+           else if ( strcasecmp(mptr->key,"MinWords")==0 )
+               min_words=pg_atoi(mptr->value,4,1);
+           else if ( strcasecmp(mptr->key,"ShortWord")==0 )
+               shortword=pg_atoi(mptr->value,4,1);
+           else if ( strcasecmp(mptr->key,"StartSel")==0 )
+               prs->startsel=pstrdup(mptr->value);
+           else if ( strcasecmp(mptr->key,"StopSel")==0 )
+               prs->stopsel=pstrdup(mptr->value);
+               
+           pfree(mptr->key);
+           pfree(mptr->value);
+
+           mptr++;
+       }
+       pfree(map);
+
+       if ( min_words >= max_words )
+           elog(ERROR,"Must be MinWords < MaxWords");
+       if ( min_words<=0 )
+           elog(ERROR,"Must be MinWords > 0");
+       if ( shortword<0 )
+           elog(ERROR,"Must be ShortWord >= 0");
+   }
+
+   while( hlCover(prs,query,&p,&q) ) {
+       /* find cover len in words */
+       curlen=0;
+       poslen=0;
+       for(i=p;i<=q && curlen < max_words ; i++) {
+           if ( !NONWORDTOKEN(prs->words[i].type) ) 
+               curlen++;
+           if ( prs->words[i].item && !prs->words[i].repeated )
+               poslen++; 
+           pose=i;
+       }
+
+       if ( poslenwords[beste].type) || prs->words[beste].len <= shortword) ) { 
+           /* best already finded, so try one more cover */
+           p++;
+           continue;
+       }
+
+       if ( curlen < max_words ) { /* find good end */
+           for(i=i-1 ;icurwords && curlen
+               if ( i!=q ) {
+                   if ( !NONWORDTOKEN(prs->words[i].type) ) 
+                       curlen++;
+                   if ( prs->words[i].item && !prs->words[i].repeated )
+                       poslen++;
+               }
+               pose=i;
+               if ( NOENDTOKEN(prs->words[i].type) || prs->words[i].len <= shortword ) 
+                   continue;
+               if ( curlen>=min_words )    
+                   break;
+           }
+       } else { /* shorter cover :((( */
+           for(;curlen>min_words;i--) {
+               if ( !NONWORDTOKEN(prs->words[i].type) ) 
+                   curlen--;
+               if ( prs->words[i].item && !prs->words[i].repeated )
+                   poslen--;
+               pose=i;
+               if ( NOENDTOKEN(prs->words[i].type) || prs->words[i].len <= shortword ) 
+                   continue;
+               break;
+           }
+       }
+
+       if ( bestlen <0 || (poslen>bestlen && !(NOENDTOKEN(prs->words[pose].type) || prs->words[pose].len <= shortword)) || 
+               ( bestlen>=0 && !(NOENDTOKEN(prs->words[pose].type)  || prs->words[pose].len <= shortword) && 
+                   (NOENDTOKEN(prs->words[beste].type) || prs->words[beste].len <= shortword) ) ) {
+           bestb=p; beste=pose;
+           bestlen=poslen;
+       } 
+
+       p++;
+   }
+
+   if ( bestlen<0 ) {
+       curlen=0;
+       poslen=0;
+       for(i=0;icurwords && curlen
+           if ( !NONWORDTOKEN(prs->words[i].type) ) 
+               curlen++;
+           pose=i;
+       }
+       bestb=0; beste=pose;
+   }
+
+   for(i=bestb;i<=beste;i++) {
+       if ( prs->words[i].item )
+           prs->words[i].selected=1;
+       if ( prs->words[i].repeated )
+           prs->words[i].skip=1;
+       if ( HLIDIGNORE(prs->words[i].type) )
+           prs->words[i].replace=1;
+
+       prs->words[i].in=1;
+   }
+
+   if (!prs->startsel)
+       prs->startsel=pstrdup("");

+   if (!prs->stopsel)
+       prs->stopsel=pstrdup("");
+        prs->startsellen=strlen(prs->startsel);
+   prs->stopsellen=strlen(prs->stopsel);
+
+   PG_RETURN_POINTER(prs);
+}
+




This is the main PostgreSQL git repository.
RSS
Atom}}}}}}}
+    "http://www.sai.msu.su/~megera/postgres/gist/tsearch/V2/docs/tsearch-V2-intro.html">
+    [Online version] of this document is available.
+
+    The tsearch2 module is available to add as an extension to
+    the PostgreSQL database to allow for Full Text Indexing. This
+    document is an introduction to installing, configuring, using
+    and maintaining the database with the tsearch2 module
+    activated.
+
+    Please, note, tsearch2 module is fully incompatible with old
+    tsearch, which is deprecated in 7.4 and will be obsoleted in
+    7.5.
+
+    USING TSEARCH2 AND POSTGRESQL FOR A WEB BASED SEARCH
+    ENGINE
+
+    This documentation is provided as a short guide on how to
+    quickly get up and running with tsearch2 and PostgreSQL, for
+    those who want to implement a full text indexed based search
+    engine. It is not meant to be a complete in-depth guide into
+    the full ins and outs of the contrib/tsearch2 module, and is
+    primarily aimed at beginners who want to speed up searching of
+    large text fields, or those migrating from other database
+    systems such as MS-SQL.
+
+    The README.tsearch2 file included in the contrib/tsearch2
+    directory contains a brief overview and history behind tsearch.
+    This can also be found online 
+    "http://www.sai.msu.su/~megera/postgres/gist/tsearch/V2/">[right
+    here].
+
+    Further in depth documentation such as a full function
+    reference, and user guide can be found online at the 
+    "http://www.sai.msu.su/~megera/postgres/gist/tsearch/V2/docs/">[tsearch
+    documentation home].
+
+    ACKNOWLEDGEMENTS
+
+    Robert John Shepherd originally wrote this documentation for
+    the previous version of tsearch module (v1) included with the
+    postgres release. I took his documentation and updated it to
+    comply with the tsearch2 modifications.
+
+    Robert's original acknowledgements:
+
+    "Thanks to Oleg Bartunov for taking the time to answer many
+    of my questions regarding this module, and also to Teodor
+    Sigaev for clearing up the process of making your own
+    dictionaries. Plus of course a big thanks to the pair of them
+    for writing this module in the first place!"
+
+    I would also like to extend my thanks to the developers, and
+    Oleg Bartunov for all of his direction and help with the new
+    features of tsearch2.
+
+    OVERVIEW
+
+    MS-SQL provides a full text indexing (FTI) system which
+    enables the fast searching of text based fields, very useful
+    for websites (and other applications) that require a results
+    set based on key words. PostgreSQL ships with a contributed
+    module called tsearch2, which implements a special type of
+    index that can also be used for full text indexing. Further
+    more, unlike MS' offering which requires regular incremental
+    rebuilds of the text indexes themselves, tsearch2 indexes are
+    always up-to-date and keeping them so induces very little
+    overhead.
+
+    Before we get into the details, it is recommended that you
+    have installed and tested PostgreSQL, are reasonably familiar
+    with databases, the SQL query language and also understand the
+    basics of connecting to PostgreSQL from the local shell. This
+    document isn't intended for the complete PostgreSQL newbie, but
+    anyone with a reasonable grasp of the basics should be able to
+    follow it.
+
+    INSTALLATION
+
+    Starting with PostgreSQL version 7.4 tsearch2 is now
+    included in the contrib directory with the PostgreSQL sources.
+    contrib/tsearch2 is where you will find everything needed to
+    install and use tsearch2. Please note that tsearch2 will also
+    work with PostgreSQL version 7.3.x, but it is not the module
+    included with the source distribution. You will have to
+    download the module separately and install it in the same
+    fashion.
+
+    I installed the tsearch2 module to a PostgreSQL 7.3 database
+    from the contrib directory without squashing the original (old)
+    tsearch module. What I did was move the modules tsearch src
+    driectory into the contrib tree under the name tsearchV2.
+
+    Step one is to download the tsearch V2 module :
+
+    
+    "http://www.sai.msu.su/~megera/postgres/gist/tsearch/V2/">[http://www.sai.msu.su/~megera/postgres/gist/tsearch/V2/]
+    (check Development History for latest stable version !)
+    
+        tar -zxvf tsearch-v2.tar.gz
+        mv tsearch2 PGSQL_SRC/contrib/
+        cd PGSQL_SRC/contrib/tsearch2
+
+
+    If you are installing from PostgreSQL version 7.4 or higher,
+    you can skip those steps and just change to the
+    contrib/tsearch2 directory in the source tree and continue from
+    there.
+
+    Then continue with the regular building and installation
+    process
+    
+        gmake
+        gmake install
+        gmake installcheck
+
+
+    That is pretty much all you have to do, unless of course you
+    get errors. However if you get those, you better go check with
+    the mailing lists over at 
+    "http://www.postgresql.org">http://www.postgresql.org or
+    
+    "http://openfts.sourceforge.net/">http://openfts.sourceforge.net/
+    since its never failed for me.
+
+    The directory in the contib/ and the directory from the
+    archive is called tsearch2. Tsearch2 is completely incompatible
+    with the previous version of tsearch. This means that both
+    versions can be installed into a single database, and migration
+    the new version may be much easier.
+
+    NOTE: the previous version of tsearch found in the
+    contrib/tsearch directory is depricated. ALthough it is still
+    available and included within PostgreSQL version 7.4. It will
+    be removed in version 7.5.
+
+    ADDING TSEARCH2 FUNCTIONALITY TO A DATABASE
+
+    We should create a database to use as an example for the
+    remainder of this file. We can call the database "ftstest". You
+    can create it from the command line like this:
+    
+        #createdb ftstest
+
+
+    If you thought installation was easy, this next bit is even
+    easier. Change to the PGSQL_SRC/contrib/tsearch2 directory and
+    type:
+    
+        psql ftstest < tsearch2.sql
+
+
+    The file "tsearch2.sql" holds all the wonderful little
+    goodies you need to do full text indexing. It defines numerous
+    functions and operators, and creates the needed tables in the
+    database. There will be 4 new tables created after running the
+    tsearch2.sql file : pg_ts_dict, pg_ts_parser, pg_ts_cfg,
+    pg_ts_cfgmap are added.
+
+    You can check out the tables if you like:
+    
+        #psql ftstest
+        ftstest=# \d
+                    List of relations
+         Schema |     Name     | Type  |  Owner
+        --------+--------------+-------+----------
+         public | pg_ts_cfg    | table | kopciuch
+         public | pg_ts_cfgmap | table | kopciuch
+         public | pg_ts_dict   | table | kopciuch
+         public | pg_ts_parser | table | kopciuch
+        (4 rows)
+
+
+    TYPES AND FUNCTIONS PROVIDED BY TSEARCH2
+
+    The first thing we can do is try out some of the types that
+    are provided for us. Lets look at the tsvector type provided
+    for us:
+    
+        SELECT 'Our first string used today'::tsvector;
+                        tsvector
+        ---------------------------------------
+         'Our' 'used' 'first' 'today' 'string'
+        (1 row)
+
+
+    The results are the words used within our string. Notice
+    they are not in any particular order. The tsvector type returns
+    a string of space separated words.
+    
+        SELECT 'Our first string used today first string'::tsvector;
+                            tsvector
+        -----------------------------------------------
+         'Our' 'used' 'again' 'first' 'today' 'string'
+        (1 row)
+
+
+    Notice the results string has each unique word ('first' and
+    'string' only appear once in the tsvector value). Which of
+    course makes sense if you are searching the full text ... you
+    only need to know each unique word in the text.
+
+    Those examples were just casting a text field to that of
+    type tsvector. Lets check out one of the new functions created
+    by the tsearch2 module.
+
+    The function to_tsvector has 3 possible signatures:
+    
+        to_tsvector(oid, text);
+        to_tsvector(text, text);
+        to_tsvector(text);
+
+
+    We will use the second method using two text fields. The
+    overloaded methods provide us with a way to specifiy the way
+    the searchable text is broken up into words (Stemming process).
+    Right now we will specify the 'default' configuration. See the
+    section on TSEARCH2 CONFIGURATION to learn more about this.
+    
+        SELECT to_tsvector('default',
+                           'Our first string used today first string');
+                        to_tsvector
+        --------------------------------------------
+         'use':4 'first':2,6 'today':5 'string':3,7
+        (1 row)
+
+
+    The result returned from this function is of type tsvector.
+    The results came about by this reasoning: All of the words in
+    the text passed in are stemmed, or not used because they are
+    stop words defined in our configuration. Each lower case
+    morphed word is returned with all of the positons in the
+    text.
+
+    In this case the word "Our" is a stop word in the default
+    configuration. That means it will not be included in the
+    result. The word "first" is found at positions 2 and 6
+    (although "Our" is a stop word, it's position is maintained).
+    The word(s) positioning is maintained exactly as in the
+    original string. The word "used" is morphed to the word "use"
+    based on the default configuration for word stemming, and is
+    found at position 4. The rest of the results follow the same
+    logic. Just a reminder again ... the order of the 'word'
+    position in the output is not in any kind of order. (ie 'use':4
+    appears first)
+
+    If you want to view the output of the tsvector fields
+    without their positions, you can do so with the function
+    "strip(tsvector)".
+    
+        SELECT strip(to_tsvector('default',
+                     'Our first string used today first string'));
+                    strip
+        --------------------------------
+         'use' 'first' 'today' 'string'
+
+
+    If you wish to know the number of unique words returned in
+    the tsvector you can do so by using the function
+    "length(tsvector)"
+    
+        SELECT length(to_tsvector('default',
+                      'Our first string used today first string'));
+         length
+        --------
+              4
+        (1 row)
+
+
+    Lets take a look at the function to_tsquery. It also has 3
+    signatures which follow the same rational as the to_tsvector
+    function:
+    
+        to_tsquery(oid, text);
+        to_tsquery(text, text);
+        to_tsquery(text);
+
+
+    Lets try using the function with a single word :
+    
+        SELECT to_tsquery('default', 'word');
+         to_tsquery
+        -----------
+         'word'
+         (1 row)
+
+
+    I call the function the same way I would a to_tsvector
+    function, specifying the 'default' configuration for morphing,
+    and the result is the stemmed output 'word'.
+
+    Lets attempt to use the function with a string of multiple
+    words:
+    
+        SELECT to_tsquery('default', 'this is many words');
+        ERROR:  Syntax error
+
+
+    The function can not accept a space separated string. The
+    intention of the to_tsquery function is to return a type of
+    "tsquery" used for searching a tsvector field. What we need to
+    do is search for one to many words with some kind of logic (for
+    now simple boolean).
+    
+        SELECT to_tsquery('default', 'searching|sentence');
+              to_tsquery
+        ----------------------
+         'search' | 'sentenc'
+        (1 row)
+
+
+    Notice that the words are separated by the boolean logic
+    "OR", the text could contain boolean operators &,|,!,()
+    with their usual meaning.
+
+    You can not use words defined as being a stop word in your
+    configuration. The function will not fail ... you will just get
+    no result, and a NOTICE like this:
+    
+        SELECT to_tsquery('default', 'a|is&not|!the');
+        NOTICE:  Query contains only stopword(s)
+                 or doesn't contain lexem(s), ignored
+         to_tsquery
+        -----------
+        (1 row)
+
+
+    That is a beginning to using the types, and functions
+    defined in the tsearch2 module. There are numerous more
+    functions that I have not touched on. You can read through the
+    tsearch2.sql file built when compiling to get more familiar
+    with what is included.
+
+    INDEXING FIELDS IN A TABLE
+
+    The next stage is to add a full text index to an existing
+    table. In this example we already have a table defined as
+    follows:
+    
+        CREATE TABLE tblMessages
+        (
+                intIndex        int4,
+                strTopic        varchar(100),
+                strMessage      text
+        );
+
+
+    We are assuming there are several rows with some kind of
+    data in them. Any data will do, just do several inserts with
+    test strings for a topic, and a message. here is some test data
+    I inserted. (yes I know it's completely useless stuff ;-) but
+    it will serve our purpose right now).
+    
+        INSERT INTO tblMessages
+               VALUES ('1', 'Testing Topic', 'Testing message data input');
+        INSERT INTO tblMessages
+               VALUES ('2', 'Movie', 'Breakfast at Tiffany\'s');
+        INSERT INTO tblMessages
+               VALUES ('3', 'Famous Author', 'Stephen King');
+        INSERT INTO tblMessages
+               VALUES ('4', 'Political Topic',
+                            'Nelson Mandella is released from prison');
+        INSERT INTO tblMessages
+               VALUES ('5', 'Nursery rhyme phrase',
+                            'Little jack horner sat in a corner');
+        INSERT INTO tblMessages
+               VALUES ('6', 'Gettysburg address quotation',
+                            'Four score and seven years ago'
+                            ' our fathers brought forth on this'
+                            ' continent a new nation, conceived in'
+                            ' liberty and dedicated to the proposition'
+                            ' that all men are created equal');
+        INSERT INTO tblMessages
+               VALUES ('7', 'Classic Rock Bands',
+                            'Led Zeppelin Grateful Dead and The Sex Pistols');
+        INSERT INTO tblMessages
+               VALUES ('8', 'My birth address',
+                            '18 Sommervile road, Regina, Saskatchewan');
+        INSERT INTO tblMessages
+               VALUES ('9', 'Joke', 'knock knock : who\'s there?'
+                                    ' I will not finish this joke');
+        INSERT INTO tblMessages
+               VALUES ('10', 'Computer information',
+                             'My computer is a pentium III 400 mHz'
+                             ' with 192 megabytes of RAM');
+
+
+    The next stage is to create a special text index which we
+    will use for FTI, so we can search our table of messages for
+    words or a phrase. We do this using the SQL command:
+    
+        ALTER TABLE tblMessages ADD idxFTI tsvector;
+
+
+    Note that unlike traditional indexes, this is actually a new
+    field in the same table, which is then used (through the magic
+    of the tsearch2 operators and functions) by a special index we
+    will create in a moment.
+
+    The general rule for the initial insertion of data will
+    follow four steps:
+    
+    1. update table
+    2. vacuum full analyze
+    3. create index
+    4. vacuum full analyze
+
+
+    The data can be updated into the table, the vacuum full
+    analyze will reclaim unused space. The index can be created on
+    the table after the data has been inserted. Having the index
+    created prior to the update will slow down the process. It can
+    be done in that manner, this way is just more efficient. After
+    the index has been created on the table, vacuum full analyze is
+    run again to update postgres's statistics (ie having the index
+    take effect).
+    
+        UPDATE tblMessages SET idxFTI=to_tsvector('default', strMessage);
+        VACUUM FULL ANALYZE;
+
+
+    Note that this only inserts the field strMessage as a
+    tsvector, so if you want to also add strTopic to the
+    information stored, you should instead do the following, which
+    effectively concatenates the two fields into one before being
+    inserted into the table:
+    
+        UPDATE tblMessages
+            SET idxFTI=to_tsvector('default',coalesce(strTopic,'') ||' '|| coalesce(strMessage,''));
+        VACUUM FULL ANALYZE;
+
+
+    Using the coalesce function makes sure this

+    concatenation also works with NULL fields.
+
+    We need to create the index on the column idxFTI. Keep in
+    mind that the database will update the index when some action
+    is taken. In this case we _need_ the index (The whole point of
+    Full Text INDEXINGi ;-)), so don't worry about any indexing
+    overhead. We will create an index based on the gist function.
+    GiST is an index structure for Generalized Search Tree.
+    
+        CREATE INDEX idxFTI_idx ON tblMessages USING gist(idxFTI);
+        VACUUM FULL ANALYZE;
+
+
+    After you have converted all of your data and indexed the
+    column, you can select some rows to see what actually happened.
+    I will not display output here but you can play around
+    yourselves and see what happened.
+
+    The last thing to do is set up a trigger so every time a row
+    in this table is changed, the text index is automatically
+    updated. This is easily done using:
+    
+        CREATE TRIGGER tsvectorupdate BEFORE UPDATE OR INSERT ON tblMessages
+            FOR EACH ROW EXECUTE PROCEDURE tsearch2(idxFTI, strMessage);
+
+
+    Or if you are indexing both strMessage and strTopic you
+    should instead do:
+    
+        CREATE TRIGGER tsvectorupdate BEFORE UPDATE OR INSERT ON tblMessages
+            FOR EACH ROW EXECUTE PROCEDURE
+                tsearch2(idxFTI, strTopic, strMessage);
+
+
+    Before you ask, the tsearch2 function accepts multiple
+    fields as arguments so there is no need to concatenate the two
+    into one like we did before.
+
+    If you want to do something specific with columns, you may
+    write your very own trigger function using plpgsql or other
+    procedural languages (but not SQL, unfortunately) and use it
+    instead of tsearch2 trigger.
+
+    You could however call other stored procedures from within
+    the tsearch2 function. Lets say we want to create a function to
+    remove certain characters (like the @ symbol from all
+    text).
+    
+       CREATE FUNCTION dropatsymbol(text) 
+                     RETURNS text AS 'select replace($1, \'@\', \' \');' LANGUAGE SQL;
+
+
+    Now we can use this function within the tsearch2 function on
+    the trigger.
+    
+      DROP TRIGGER tsvectorupdate ON tblmessages;
+        CREATE TRIGGER tsvectorupdate BEFORE UPDATE OR INSERT ON tblMessages
+            FOR EACH ROW EXECUTE PROCEDURE tsearch2(idxFTI, dropatsymbol, strMessage);
+        INSERT INTO tblmessages VALUES (69, 'Attempt for dropatsymbol', '[email protected]');
+
+
+    If at this point you receive an error stating: ERROR: Can't
+    find tsearch config by locale
+
+    Do not worry. You have done nothing wrong. And tsearch2 is
+    not broken. All that has happened here is that the
+    configuration is setup to use a configuration based on the
+    locale of the server. All you have to do is change your default
+    configuration, or add a new one for your specific locale. See
+    the section on TSEARCH2 CONFIGURATION.
+    
+   SELECT * FROM tblmessages WHERE intindex = 69;
+
+         intindex |         strtopic         |  strmessage   |        idxfti
+        ----------+--------------------------+---------------+-----------------------   
+                69 | Attempt for dropatsymbol | [email protected] | 'test':1 'test.com':2
+        (1 row)
+Notice that the string content was passed throught the stored
+procedure dropatsymbol. The '@' character was replaced with a
+single space ... and the output from the procedure was then stored
+in the tsvector column.
+
+    This could be useful for removing other characters from
+    indexed text, or any kind of preprocessing needed to be done on
+    the text prior to insertion into the index.
+
+    QUERYING A TABLE
+
+    There are some examples in the README.tsearch2 file for
+    querying a table. One major difference between tsearch and
+    tsearch2 is the operator ## is no longer available. Only the
+    operator @@ is defined, using the types tsvector on one side
+    and tsquery on the other side.
+
+    Lets search the indexed data for the word "Test". I indexed
+    based on the the concatenation of the strTopic, and the
+    strMessage:
+    
+        SELECT intindex, strtopic FROM tblmessages
+                                  WHERE idxfti @@ 'test'::tsquery;
+         intindex |   strtopic
+        ----------+---------------
+                1 | Testing Topic
+        (1 row)
+
+
+    The only result that matched was the row with a topic
+    "Testing Topic". Notice that the word I search for was all
+    lowercase. Let's see what happens when I query for uppercase
+    "Test".
+    
+        SELECT intindex, strtopic FROM tblmessages
+                                  WHERE idxfti @@ 'Test'::tsquery;
+         intindex | strtopic
+        ----------+----------
+        (0 rows)
+
+
+    We get zero rows returned. The reason is because when the
+    text was inserted, it was morphed to my default configuration
+    (because of the call to to_tsvector in the UPDATE statement).
+    If there was no morphing done, and the tsvector field(s)
+    contained the word 'Text', a match would have been found.
+
+    Most likely the best way to query the field is to use the
+    to_tsquery function on the right hand side of the @@ operator
+    like this:
+    
+        SELECT intindex, strtopic FROM tblmessages
+               WHERE idxfti @@ to_tsquery('default', 'Test | Zeppelin');
+         intindex |      strtopic
+        ----------+--------------------
+                1 | Testing Topic
+                7 | Classic Rock Bands
+        (2 rows)
+
+
+    That query searched for all instances of "Test" OR
+    "Zeppelin". It returned two rows: the "Testing Topic" row, and
+    the "Classic Rock Bands" row. The to_tsquery function performed
+    the correct morphology upon the parameters, and searched the
+    tsvector field appropriately.
+
+    The last example here relates to searching for a phrase, for
+    example "minority report". This poses a problem with regard to
+    tsearch2, as it doesn't index phrases, only words. But there is
+    a way around which doesn't appear to have a significant impact
+    on query time, and that is to use a query such as the
+    following:
+    
+        SELECT intindex, strTopic FROM tblmessages
+                WHERE idxfti @@ to_tsquery('default', 'gettysburg & address')
+                AND strMessage ~* '.*men are created equal.*';
+         intindex |           strtopic
+        ----------+------------------------------
+                6 | Gettysburg address quotation
+        (1 row)
+        SELECT intindex, strTopic FROM tblmessages
+                WHERE idxfti @@ to_tsquery('default', 'gettysburg & address')
+                AND strMessage ~* '.*something that does not exist.*';
+         intindex | strtopic
+        ----------+----------
+        (0 rows)
+
+
+    Of course if your indexing both strTopic and strMessage, and
+    want to search for this phrase on both, then you will have to
+    get out the brackets and extend this query a little more.
+
+    TSEARCH2 CONFIGURATION
+
+    Some words such as "and", "the", and "who" are automatically
+    not indexed, since they belong to a pre-existing dictionary of
+    "Stop Words" which tsearch2 does not perform indexing on. If
+    someone needs to search for "The Who" in your database, they
+    are going to have a tough time coming up with any results,
+    since both are ignored in the indexes. But there is a
+    solution.
+
+    Lets say we want to add a word into the stop word list for
+    english stemming. We could edit the file
+    :'/usr/local/pgsql/share/english.stop' and add a word to the
+    list. I edited mine to exclude my name from indexing:
+    
+    - Edit /usr/local/pgsql/share/english.stop
+    - Add 'andy' to the list
+    - Save the file.
+
+
+    When you connect to the database, the dict_init procedure is
+    run during initialization. And in my configuration it will read
+    the stop words from the file I just edited. If you were
+    connected to the DB while editing the stop words, you will need
+    to end the current session and re-connect. When you re-connect
+    to the database, 'andy' is no longer indexed:
+    
+        SELECT to_tsvector('default', 'Andy');
+         to_tsvector
+        ------------
+        (1 row)
+
+
+    Originally I would get the result :
+    
+        SELECT to_tsvector('default', 'Andy');
+         to_tsvector
+        ------------
+         'andi':1
+        (1 row)
+
+
+    But since I added it as a stop word, it would be ingnored on
+    the indexing. The stop word added was used in the dictionary
+    "en_stem". If I were to use a different configuration such as
+    'simple', the results would be different. There are no stop
+    words for the simple dictionary. It will just convert to lower
+    case, and index every unique word.
+    
+        SELECT to_tsvector('simple', 'Andy andy The the in out');
+                     to_tsvector
+        -------------------------------------
+         'in':5 'out':6 'the':3,4 'andy':1,2
+        (1 row)
+
+
+    All this talk about which configuration to use is leading us
+    into the actual configuration of tsearch2. In the examples in
+    this document the configuration has always been specified when
+    using the tsearch2 functions:
+    
+        SELECT to_tsvector('default', 'Testing the default config');
+        SELECT to_tsvector('simple', 'Example of simple Config');
+
+
+    The pg_ts_cfg table holds each configuration you can use
+    with the tsearch2 functions. As you can see the ts_name column
+    contains both the 'default' configurations based on the 'C'
+    locale. And the 'simple' configuration which is not based on
+    any locale.
+    
+        SELECT * from pg_ts_cfg;
+             ts_name     | prs_name |    locale
+        -----------------+----------+--------------
+         default         | default  | C
+         default_russian | default  | ru_RU.KOI8-R
+         simple          | default  |
+        (3 rows)
+
+
+    Each row in the pg_ts_cfg table contains the name of the
+    tsearch2 configuration, the name of the parser to use, and the
+    locale mapped to the configuration. There is only one parser to
+    choose from the table pg_ts_parser called 'default'. More
+    parsers could be written, but for our needs we will use the
+    default.
+
+    There are 3 configurations installed by tsearch2 initially.
+    If your locale is set to 'en_US' for example (like my laptop),
+    then as you can see there is currently no dictionary configured
+    to use with that locale. You can either set up a new
+    configuration or just use one that already exists. If I do not
+    specify which configuration to use in the to_tsvector function,
+    I receive the following error.
+    
+        SELECT to_tsvector('learning tsearch is like going to school');
+        ERROR:  Can't find tsearch config by locale
+
+
+    We will create a new configuration for use with the server
+    encoding 'en_US'. The first step is to add a new configuration
+    into the pg_ts_cfg table. We will call the configuration
+    'default_english', with the default parser and use the locale
+    'en_US'.
+    
+        INSERT INTO pg_ts_cfg (ts_name, prs_name, locale)
+               VALUES ('default_english', 'default', 'en_US');
+
+
+    We have only declared that there is a configuration called
+    'default_english'. We need to set the configuration of how
+    'default_english' will work. The next step is creating a new
+    dictionary to use. The configuration of the dictionary is
+    completlely different in tsearch2. In the prior versions to
+    make changes, you would have to re-compile your changes into
+    the tsearch.so. All of the configuration has now been moved
+    into the system tables created by executing the SQL code from
+    tsearch2.sql
+
+    Lets take a first look at the pg_ts_dict table
+    
+        ftstest=# \d pg_ts_dict
+                Table "public.pg_ts_dict"
+         Column      |  Type   | Modifiers
+        -----------------+---------+-----------
+         dict_name       | text    | not null
+         dict_init       | oid     |
+         dict_initoption | text    |
+         dict_lemmatize  | oid     | not null
+         dict_comment    | text    |
+        Indexes: pg_ts_dict_idx unique btree (dict_name)
+
+
+    The dict_name column is the name of the dictionary, for
+    example 'simple', 'en_stem' or 'ru_stem'. The dict_init column
+    is an OID of a stored procedure to run for initialization of
+    that dictionary, for example 'snb_en_init' or 'snb_ru_init'.
+    The dict_init option is used for options passed to the init
+    function for the stored procedure. In the cases of 'en_stem' or
+    'ru_stem' it is a path to a stopword file for that dictionary,
+    for example '/usr/local/pgsql/share/english.stop'. This is
+    however dictated by the dictionary. ISpell dictionaries may
+    require different options. The dict_lemmatize column is another
+    OID of a stored procedure to the function used to lemmitize,
+    for example 'snb_lemmatize'. The dict_comment column is just a
+    comment.
+
+    Next we will configure the use of a new dictionary based on
+    ISpell. We will assume you have ISpell installed on you
+    machine. (in /usr/local/lib)
+
+    First lets register the dictionary(ies) to use from ISpell.
+    We will use the english dictionary from ISpell. We insert the
+    paths to the relevant ISpell dictionary (*.hash) and affixes
+    (*.aff) files. There seems to be some question as to which
+    ISpell files are to be used. I installed ISpell from the latest
+    sources on my computer. The installation installed the
+    dictionary files with an extension of *.hash. Some
+    installations install with an extension of *.dict As far as I
+    know the two extensions are equivilant. So *.hash ==
+    *.dict.
+
+    We will also continue to use the english word stop file that
+    was installed for the en_stem dictionary. You could use a
+    different one if you like. The ISpell configuration is based on
+    the "ispell_template" dictionary installed by default with
+    tsearch2. We will use the OIDs to the stored procedures from
+    the row where the dict_name = 'ispell_template'.
+    
+        INSERT INTO pg_ts_dict
+               (SELECT 'en_ispell',
+                       dict_init,
+                       'DictFile="/usr/local/lib/english.hash",'
+                       'AffFile="/usr/local/lib/english.aff",'
+                       'StopFile="/usr/local/pgsql/share/english.stop"',
+                       dict_lexize
+                FROM pg_ts_dict
+                WHERE dict_name = 'ispell_template');
+
+
+    Next we need to set up the configuration for mapping the
+    dictionay use to the lexxem parsings. This will be done by
+    altering the pg_ts_cfgmap table. We will insert several rows,
+    specifying to using the new dictionary we installed and
+    configured for use within tsearch2. There are several type of
+    lexims we would be concerned with forcing the use of the ISpell
+    dictionary.
+    
+        INSERT INTO pg_ts_cfgmap (ts_name, tok_alias, dict_name)
+               VALUES ('default_english', 'lhword', '{en_ispell,en_stem}');
+        INSERT INTO pg_ts_cfgmap (ts_name, tok_alias, dict_name)
+               VALUES ('default_english', 'lpart_hword', '{en_ispell,en_stem}');
+        INSERT INTO pg_ts_cfgmap (ts_name, tok_alias, dict_name)
+               VALUES ('default_english', 'lword', '{en_ispell,en_stem}');
+
+
+    We have just inserted 3 records to the configuration
+    mapping, specifying that the lexem types for "lhword,
+    lpart_hword and lword" are to be stemmed using the 'en_ispell'
+    dictionary we added into pg_ts_dict, when using the
+    configuration ' default_english' which we added to
+    pg_ts_cfg.
+
+    There are several other lexem types used that we do not need
+    to specify as using the ISpell dictionary. We can simply insert
+    values using the 'simple' stemming process dictionary.
+    
+        INSERT INTO pg_ts_cfgmap
+               VALUES ('default_english', 'url', '{simple}');
+        INSERT INTO pg_ts_cfgmap
+               VALUES ('default_english', 'host', '{simple}');
+        INSERT INTO pg_ts_cfgmap
+               VALUES ('default_english', 'sfloat', '{simple}');
+        INSERT INTO pg_ts_cfgmap
+               VALUES ('default_english', 'uri', '{simple}');
+        INSERT INTO pg_ts_cfgmap
+               VALUES ('default_english', 'int', '{simple}');
+        INSERT INTO pg_ts_cfgmap
+               VALUES ('default_english', 'float', '{simple}');
+        INSERT INTO pg_ts_cfgmap
+               VALUES ('default_english', 'email', '{simple}');
+        INSERT INTO pg_ts_cfgmap
+               VALUES ('default_english', 'word', '{simple}');
+        INSERT INTO pg_ts_cfgmap
+               VALUES ('default_english', 'hword', '{simple}');
+        INSERT INTO pg_ts_cfgmap
+               VALUES ('default_english', 'nlword', '{simple}');
+        INSERT INTO pg_ts_cfgmap
+               VALUES ('default_english', 'nlpart_hword', '{simple}');
+        INSERT INTO pg_ts_cfgmap
+               VALUES ('default_english', 'part_hword', '{simple}');
+        INSERT INTO pg_ts_cfgmap
+               VALUES ('default_english', 'nlhword', '{simple}');
+        INSERT INTO pg_ts_cfgmap
+               VALUES ('default_english', 'file', '{simple}');
+        INSERT INTO pg_ts_cfgmap
+               VALUES ('default_english', 'uint', '{simple}');
+        INSERT INTO pg_ts_cfgmap
+               VALUES ('default_english', 'version', '{simple}');
+
+
+    Our addition of a configuration for 'default_english' is now
+    complete. We have successfully created a new tsearch2
+    configuration. At the same time we have also set the new
+    configuration to be our default for en_US locale.
+    
+        SELECT to_tsvector('default_english',
+                           'learning tsearch is like going to school');
+                           to_tsvector
+        --------------------------------------------------
+         'go':5 'like':4 'learn':1 'school':7 'tsearch':2
+        SELECT to_tsvector('learning tsearch is like going to school');
+                            to_tsvector
+        --------------------------------------------------
+         'go':5 'like':4 'learn':1 'school':7 'tsearch':2
+        (1 row)
+
+
+    In the case that you already have a configuration set for
+    the locale, and you are changing it to your new dictionary
+    configuration. You will have to set the old locale to NULL. If
+    we are using the 'C' locale then we would do this:
+    
+        UPDATE pg_ts_cfg SET locale=NULL WHERE locale = 'C';
+
+
+    That about wraps up the configuration of tsearch2. There is
+    much more you can do with the tables provided. This was just an
+    introduction to get things working rather quickly.
+
+    ADDING NEW DICTIONARIES TO TSEARCH2
+
+    To aid in the addition of new dictionaries to the tsearch2
+    module you can use another additional module in combination
+    with tsearch2. The gendict module is included into tsearch2
+    distribution and is available from gendict/ subdirectory.
+
+    I will not go into detail about installation and
+    instructions on how to use gendict to it's fullest extent right
+    now. You can read the README.gendict ... it has all of the
+    instructions and information you will need.
+
+    BACKING UP AND RESTORING DATABASES THAT FEATURE
+    TSEARCH2
+
+    Believe it or not, this isn't as straight forward as it
+    should be, and you will have problems trying to backup and
+    restore any database which uses tsearch2 unless you take the
+    steps shown below. And before you ask using pg_dumpall will
+    result in failure every time. These took a lot of trial and
+    error to get working, but the process as laid down below has
+    been used a dozen times now in live production environments so
+    it should work fine.
+
+    HOWEVER never rely on anyone elses instructions to backup
+    and restore a database system, always develop and understand
+    your own methodology, and test it numerous times before you
+    need to do it for real.
+
+    To Backup a PostgreSQL database that uses the tsearch2
+    module:
+
+    1) Backup any global database objects such as users and
+    groups (this step is usually only necessary when you will be
+    restoring to a virgin system)
+    
+        pg_dumpall -g > GLOBALobjects.sql
+
+
+    2) Backup the full database schema using pg_dump
+    
+        pg_dump -s DATABASE > DATABASEschema.sql
+
+
+    3) Backup the full database using pg_dump
+    
+        pg_dump -Fc DATABASE > DATABASEdata.tar
+
+
+    To Restore a PostgreSQL database that uses the tsearch2
+    module:
+
+    1) Create the blank database
+    
+        createdb DATABASE
+
+
+    2) Restore any global database objects such as users and
+    groups (this step is usually only necessary when you will be
+    restoring to a virgin system)
+    
+        psql DATABASE < GLOBALobjects.sql
+
+
+    3) Create the tsearch2 objects, functions and operators
+    
+        psql DATABASE < tsearch2.sql
+
+
+    4) Edit the backed up database schema and delete all SQL
+    commands which create tsearch2 related functions, operators and
+    data types, BUT NOT fields in table definitions that specify
+    tsvector types. If your not sure what these are, they are the
+    ones listed in tsearch2.sql. Then restore the edited schema to
+    the database
+    
+        psql DATABASE < DATABASEschema.sql
+
+
+    5) Restore the data for the database
+    
+        pg_restore -N -a -d DATABASE DATABASEdata.tar
+
+
+    If you get any errors in step 4, it will most likely be
+    because you forgot to remove an object that was created in
+    tsearch2.sql. Any errors in step 5 will mean the database
+    schema was probably restored wrongly.
+  
+
+


diff --git a/contrib/tsearch2/docs/tsearch2-guide.html b/contrib/tsearch2/docs/tsearch2-guide.html

new file mode 100644 (file)

index 0000000..2529480


--- /dev/null
+++ b/contrib/tsearch2/docs/tsearch2-guide.html
@@ -0,0 +1,1057 @@
+
+
+
+
+tsearch2 guide
+
+
+The tsearch2 Guide
+
+
+Brandon Craig Rhodes
30 June 2003
+
+This Guide introduces the reader to the PostgreSQL tsearch2 module,
+version 2.
+More formal descriptions of the module's types and functions
+are provided in the tsearch2 Reference,
+which is a companion to this document.
+You can retrieve a beta copy of the tsearch2 module from the
+GiST for PostgreSQL
+page — look under the section entitled Development History
+for the current version.
+
+First we will examine the tsvector and tsquery types
+and how they are used to search documents;
+next, we will use them to build a simple search engine in SQL;
+and finally, we will study the internals of document conversion
+and how you might tune the internals to accommodate various searching needs.
+
+Once you have tsearch2 working with PostgreSQL,
+you should be able to run the examples here exactly as they are typed.
+
+
+Table of Contents
+
+Vectors and Queries

+A Simple Search Engine

+Ranking and Position Weights

+Casting Vectors and Queries

+Parsing and Lexing

+
+
+
+
+Vectors and Queries
+
+
+This section introduces

+the two data types upon which tsearch2 search engines are based,
+and illustrates their interaction using the simplest possible case.
+The complex examples we present later on
+are merely variations and elaborations of this basic mechanism.
+
+
+The tsearch2 module allows you to index documents by the words they contain,
+and then perform very efficient searches
+for documents that contain a given combination of words.
+Preparing your document index involves two steps:
+
+Making a list of the words each document contains.
+ You must reduce each document to a tsvector
+ which lists each word that appears in the document.
+ This process offers many options,
+ because there is no requirement
+ that you must copy words into the vector
+ exactly as they appear in the document.
+ For example,
+ many developers omit frequent and content-free stop words
+ like the to reduce the size of their index;
+ others reduce different forms of the same word
+ (forked, forking, forks)
+ to a common form (fork)
+ to make search results independent of tense and case.
+ Because words are very often stored in a modified form,
+ we use the special term lexemes
+ for the word forms we actually store in the vector.
+Creating an index of the documents by lexeme.
+ This is managed automatically by tsearch2
+ when you creat a gist() index
+ on the tsvector column of a table,
+ which implements a form of the Berkeley
+ Generalized Search Tree.
+
+Once your documents are indexed,
+performing a search involves:
+
+Reducing the search terms to lexemes.
+ You must express each search you want to perform
+ as a tsquery specifying a boolean combination of lexemes.
+ Note that tsearch2 only finds exact matches
+ between the lexemes in your query and the ones in each vector —
+ even capitalization counts as a difference
+ (which is why all lexemes are usually kept lowercase).
+ So you must process search words the same way you processed document words;
+ if forking became fork in the document's tsvector,
+ then the search term forking must also become fork
+ or the search will not find the document.
+Retrieving the documents that match the query.
+ Running a SELECT ... WHERE
+ query @@ vector
+ on the table with the vector column
+ will return the documents that match your query.
+Presenting your results.
+ This final stage offers as many options
+ as turning documents into vectors.
+ You can order documents by how well they matched the search terms;
+ create a headline for each document
+ showing some of the phrases in which it uses the search terms;
+ and restrict the number of results retrieved.
+ You will of course want some way to identify each document,
+ so the user can ask for the full text of the ones he wants to read.
+
+And beyond deciding upon rules for turning documents into vectors
+and for presenting search results to users,
+you have to decide where to perform these operations —
+whether one database server
+will parse documents, perform searches, and prepare search results,
+or whether to spread the load of these operations across several machines.
+These are complicated design issues
+which we will explore later;
+in this section and the next,
+we will illustrate what can be accomplished
+using a single database server.
+
+The default tsearch2 configuration,
+which we will learn more about later,
+provides a good example of a process for reducing documents to vectors:
+
+
+=# SELECT set_curcfg('default')
+=# SELECT to_tsvector('The air smells of sea water.')
+             to_tsvector             
+-------------------------------------
+ 'air':2 'sea':5 'smell':3 'water':6
+(1 row)
+
+
+Note the complex relationship between this document and its vector.
+The vector lists only words from the document —
+spaces and punctuation have disappeared.
+Common words like the and of have been eliminated.
+The -s that makes smells a plural has been removed,
+leaving a lexeme that represents the word in its simplest form.
+And finally,
+though the vector remembers the positions in which each word appeared,
+it does not store the lexemes in that order.
+
+Keeping word positions in your vectors is optional, by the way.
+The positions are necessary for the tsearch2 ranking functions,
+which you can use to prioritize documents
+based on how often each document uses the search terms
+and whether they appear in close proximity.
+But if you do not perform ranking,
+or use your own process that ignores the word positions stored in the vector,
+then you can save space by stripping them from your vectors:
+
+
+=# SELECT strip(to_tsvector('The air smells of sea water.'))
+            strip            
+-----------------------------
+ 'air' 'sea' 'smell' 'water'
+(1 row)
+
+
+Now that we have a procedure for creating vectors,
+we can build an indexed table of vectors very simply:
+
+
+=# CREATE TABLE vectors ( vector tsvector )
+=# CREATE INDEX vector_index ON vectors USING gist(vector)
+=# INSERT INTO vectors VALUES (to_tsvector('The path forks here'))
+=# INSERT INTO vectors VALUES (to_tsvector('A crawl leads west'))
+=# INSERT INTO vectors VALUES (to_tsvector('The left fork leads northeast'))
+=# SELECT * FROM vectors
+                  vector                  
+------------------------------------------
+ 'fork':3 'path':2
+ 'lead':3 'west':4 'crawl':2
+ 'fork':3 'lead':4 'left':2 'northeast':5
+(3 rows)
+
+
+Now we can search this collection of document vectors
+using the @@ operator and a tsquery
+that specifies the combination of lexemes we are looking for.
+Note that while vectors simply list lexemes,
+queries always combine them with the operators
+‘&’ and,
+‘|’ or,
+and  ‘!’ not,
+plus parentheses for grouping.
+Some examples of the query syntax:
+
+
+ ‘find documents with the word forks in them’

+ 'forks'
+
+ ‘... with both forks and leads’

+ 'forks & leads'
+
+ ‘... with either forks or leads’

+ 'forks | leads'
+
+ ‘... with either forks or leads,
+  but without crawl’

+ '(forks|leads) & !crawl'
+
+The tsearch2 module
+provides a to_tsquery() function for creating queries
+that uses the same process as to_tsvector() uses
+to reduce words to lexemes.
+For instance,
+it will remove the -s from the plurals in the last example above:
+
+
+=# SELECT to_tsquery('(leads|forks) & !crawl')
+           to_tsquery           
+--------------------------------
+ ( 'lead' | 'fork' ) & !'crawl'
+(1 row)
+
+
+Again,
+this is critically important because the search operator @@
+only finds exact matches
+between the words in a query and the words in a vector;
+if the document vector lists the lexeme fork
+but the query looks for the plural form forks,
+the query would not match that document.
+Thanks to the symmetry between our process
+for producing vectors and queries, however,
+the above searches return correct results:
+
+
+=# SELECT * FROM vectors WHERE vector @@ to_tsquery('(leads|forks) & !crawl')
+                  vector                  
+------------------------------------------
+ 'fork':3 'path':2
+ 'fork':3 'lead':4 'left':2 'northeast':5
+(2 rows)
+
+
+You may want to try the other queries shown above,
+and perhaps invent some of your own.
+
+You should not include stop words in a query,
+since you cannot search for words you have discarded.
+If you throw out the word the when building vectors, for example,
+your index will obviously not know which documents included it.
+The to_tsquery() function will automatically detect this
+and give you an error to prevent this mistake:
+
+
+=# SELECT to_tsquery('the')
+NOTICE:  Query contains only stopword(s) or doesn't contain lexem(s), ignored
+ to_tsquery 
+------------
+ 
+(1 row)
+
+
+But if you every build vectors and queries using your own routines,
+a possibility we will discuss later,
+then you will need to enforce this rule yourself.
+
+

+Now that you understand how vectors and queries work together,
+you are prepared to tackle many additional topics:
+how to distribute searching across many servers;
+how to customize the process
+by which tsearch2 turns documents and queries into lexemes,
+or use a process of your own;
+and how to sort and display search results to your users.
+But before discussing these detailed questions,
+we will build a simple search engine
+to see how easily its basic features work together.
+
+
+A Simple Search Engine
+
+

+In this section we build a simple search engine out of SQL functions
+that use the vector and query types described in the previous section.
+While this example is simpler
+than a search engine that has to interface with the outside world,
+it will illustrate the basic principles of building a search engine,
+and better prepare you for developing your own.
+
+Building a search engine involves only a few improvements
+upon the rudimentary vector searches described in the last section.
+
+Because the user wants to read documents, not vectors,
+ you must provide some way
+ for the full text of each document to be accessed —
+ either by storing the entire text of each document in the database,
+ or storing an identifier
+ like a URL, file name, or document routing number
+ that lets you fetch the document from other storage.
+You can make it easier for user interface code to refer to each document
+ by providing a unique identifier for each document,
+ perhaps with a SERIAL column.
+Search results should be ordered by relevance.
+ If you leave word positions in your vectors,
+ you can either have PostgreSQL ORDER your results
+ BY a ranking function,
+ or you can fetch the vectors yourself and perform your own sort.
+ If you choose to ignore word positions or strip them from your vectors,
+ you will have to determine relevance yourself,
+ using either the full text of the document
+ or other information about each document you may possess.
+For each document returned by a search,
+ you will usually want to display a summary called a headline
+ that shows short excerpts
+ illustrating how the document uses the query words.
+ Headlines are usually generated from the full text of the document,
+ not from position information in the tsvector,
+ since excerpts lacking stop words, punctuation, and suffixes
+ would not be comprehensible.
+ If you store the full text of each document in the database,
+ headlines can be generated very simply by a tsearch2 function.
+ If you store your documents elsewhere,
+ then you will either have to transmit each document to the database
+ every time you want to run the headline function on it,
+ or use your own headline code outside of the database.
+
+
+We can easily construct a simple search engine
+that accomplishes these goals.
+First we build a table that, for each document,
+stores a unique identifier, the full text of the document,
+and its tsvector:
+
+
+=# CREATE TABLE docs ( id SERIAL, doc TEXT, vector tsvector )
+=# CREATE INDEX docs_index ON docs USING gist(vector);
+
+
+Note that although searches will still work
+on tables where you have neglected
+to create a gist() index over your vectors,
+they will run much more slowly
+since they will have to compare the query
+against every document vector in the table.
+
+Because the table we have created
+stores each document in two different ways —
+both as text and as a vector —
+our INSERT statements must provide the document in both forms.
+While more advanced PostgreSQL programmers
+might accomplish this with a database trigger or rule,
+for this simple example we will use a small SQL function:
+
+
+=# CREATE FUNCTION insdoc(text) RETURNS void LANGUAGE sql AS

+  'INSERT INTO docs (doc, vector) VALUES ($1, to_tsvector($1));'
+
+
+Now, by calling insdoc() several times,
+we can populate our table with documents:
+
+
+=# SELECT insdoc('A low crawl over cobbles leads inward to the west.')
+=# SELECT insdoc('The canyon runs into a mass of boulders -- dead end.')
+=# SELECT insdoc('You are crawling over cobbles in a low passage.')
+=# SELECT insdoc('Cavernous passages lead east, north, and south.')
+=# SELECT insdoc('To the east a low wide crawl slants up.')
+=# SELECT insdoc('You are in the south side chamber.')
+=# SELECT insdoc('The passage here is blocked by a recent cave-in.')
+=# SELECT insdoc('You are in a splendid chamber thirty feet high.')
+
+
+Now we can build a search function.
+Its SELECT statement is based upon
+the same @@ operation illustrated in the previous section.
+But instead of returning matching vectors,
+we return for each document
+its SERIAL identifier, so the user can retrieve it later;
+a headline that illustrates its use of the search terms;
+and a ranking with which we also order the results.
+Our search operation can be coded as a single SELECT statement
+returning its own kind of table row,
+which we call a finddoc_t:
+
+
+=# CREATE TYPE finddoc_t AS (id INTEGER, headline TEXT, rank REAL)
+=# CREATE FUNCTION finddoc(text) RETURNS SETOF finddoc_t LANGUAGE sql AS '

+   SELECT id, headline(doc, q), rank(vector, q)
+     FROM docs, to_tsquery($1) AS q
+     WHERE vector @@ q ORDER BY rank(vector, q) DESC'
+
+
+This function is a rather satisfactory search engine.
+Here is one example search,
+after which the user fetches the top-ranking document itself;
+with similar commands you can try queries of your own:
+
+
+=# SELECT * FROM finddoc('passage|crawl')
+ id |                       headline                        | rank 
+----+-------------------------------------------------------+------
+  3 | <b>crawling</b> over cobbles in a low <b>passage</b>. | 0.19
+  1 | <b>crawl</b> over cobbles leads inward to the west.   |  0.1
+  4 | <b>passages</b> lead east, north, and south.          |  0.1
+  5 | <b>crawl</b> slants up.                               |  0.1
+  7 | <b>passage</b> here is blocked by a recent  cave-in.  |  0.1
+(5 rows)
+=# SELECT doc FROM docs WHERE id = 3
+                       doc                       
+-------------------------------------------------
+ You are crawling over cobbles in a low passage.
+(1 row)
+
+
+While by default the headline() function
+surrounds matching words with <b> and </b>
+in order to distinguish them from the surrounding text,
+you can provide options that change its behavior;
+consult the tsearch2 Reference for more details about
+Headline Functions.
+
+Though a search may match hundreds or thousands of documents,
+you will usually present only ten or twenty results to the user at a time.
+This can be most easily accomplished
+by limiting your query with a LIMIT
+and an OFFSET clause —
+to display results ten at a time, for example,
+your would generate your first page of results
+with LIMIT 10 OFFSET 0,
+your second page
+with LIMIT 10 OFFSET 10,
+your third page
+with LIMIT 10 OFFSET 20,
+and so forth.
+There are two problems with this approach, however.
+
+The first problem is the strain of running the query over again
+for every page of results the user views.
+For small document collections or lightly loaded servers,
+this may not be a problem;
+but the impact can be high
+when a search must repeatedly rank and sort
+the same ten thousand results
+on an already busy server.
+So instead of selecting only one page of results,
+you will probably use LIMIT and OFFSET
+to return a few dozen or few hundred results,
+which you can cache and display to the user one page at a time.
+Whether a result cache rewards your effort
+will depend principally on the behavior of your users —
+how often they even view the second page of results, for instance.
+
+The second issue solved by caching involves consistency.
+If the database is changing while the user browses their results,
+then documents might appear and disappear as they page through them.
+In some cases the user might even miss a particular result —
+perhaps the one they were looking for —
+if, say, its rank improves from 31th to 30th
+after they load results 21–30 but before they view results 31–40.
+While many databases are static or infrequently updated,
+and will not present this problem,
+users searching very dymanic document collections
+might benefit from the stable results that caches yield.
+
+

+Having seen the features of a search engine
+implemented entirely within the database,
+we will learn about some specific tsearch2 features.
+First we will look in more detail at document ranking.
+
+
+Ranking and Position Weights
+
+

+When we built our simple search engine,
+we used the rank() function to order our results.

+Here we describe tsearch2 ranking in more detail.
+
+
+There are two functions with which tsearch2 can rank search results.
+They both use the lexeme positions listed in the tsvector,
+so you cannot rank vectors
+from which these have been removed with strip().
+The rank() function existed in older versions of OpenFTS,
+and has the feature that you can assign different weights
+to words from different sections of your document.
+The rank_cd() uses a recent technique for weighting results
+but does not allow different weight to be given
+to different sections of your document.
+
+Both ranking functions allow you to specify,
+as an optional last argument,
+whether you want their results normalized —
+whether the rank returned should be adjusted for document length.
+Specifying a last argument of 0 (zero) makes no adjustment;
+1 (one) divides the document rank
+by the logarithm of the document length;
+and 2 divides it by the plain length.
+In all of these examples we omit this optional argument,
+which is the same as specifying zero —
+we are making no adjustment for document length.
+
+The rank_cd() function uses an experimental measurement
+called cover density ranking that rewards documents
+when they make frequent use of the search terms
+that are close together in the document.
+You can read about the algorithm in more detail
+in Clarke et al.,
+ “
+>Relevance Ranking for One to Three Term Queries.”
+An optional first argument allows you to tune their formula;
+for details
+see the section on ranking
+in the Reference.
+
+The rank() function offers more flexibility
+because it pays attention to the weights
+with which you have labelled lexeme positions.
+Currently tsearch2 supports four different weight labels:
+'D', the default weight;
+and 'A', 'B', and 'C'.
+All vectors created with to_tsvector()
+assign the weight 'D' to each position,
+which as the default is not displayed when you print a vector out.
+
+If you want positions with weights other than 'D',
+you have two options:
+either you can author a vector directly through the ::tsvector
+casting operation,
+as described in the following section,
+which lets you give each position whichever weight you want;
+or you can pass a vector through the setweight() function
+which sets all of its position weights to a single value.
+An example of the latter:
+
+
+
+=# SELECT vector FROM docs WHERE id = 3
+                 vector                 
+----------------------------------------
+ 'low':8 'cobbl':5 'crawl':3 'passag':9
+(1 row)
+=# SELECT setweight(vector, 'A') FROM docs WHERE id = 3
+                 setweight                  
+--------------------------------------------
+ 'low':8A 'cobbl':5A 'crawl':3A 'passag':9A
+(1 row)
+
+
+
+Merely changing all of the weights in a vector is not very useful,
+of course,
+since this results still in all words having the same weight.
+But if we parse different parts of a document separately,
+giving each section its own weight,
+and then concatenate the vectors of each part into a single vector,
+the result can be very useful.
+We can construct a simple example
+in which document titles are given greater weight
+that text in the body of the document:
+
+
+
+=# CREATE TABLE tdocs ( id SERIAL, title TEXT, doc TEXT, vector tsvector )
+=# CREATE INDEX tdocs_index ON tdocs USING gist(vector);
+=# CREATE FUNCTION instdoc(text, text) RETURNS void LANGUAGE sql AS

+  'INSERT INTO tdocs (title, doc, vector)
+   VALUES ($1, $2, setweight(to_tsvector($1), ''A'') || to_tsvector($2));'
+
+
+
+Now words from a document title will be weighted differently
+than those in the main text
+if we provide the title and body as separate arguments:
+
+
+
+=# SELECT instdoc('Spendid Chamber',

+ 'The walls are frozen rivers of orange stone.')
+ instdoc 
+---------
+ 
+(1 row)
+=# SELECT vector FROM tdocs
+                                    vector                                    
+------------------------------------------------------------------------------
+ 'wall':4 'orang':9 'river':7 'stone':10 'frozen':6 'chamber':2A 'spendid':1A
+(1 row)
+
+
+
+Note that although the necessity is unusual,
+you can constrain search terms
+to only match words from certain sections
+by following them with a colon
+and a list of the sections in which the word can occur;
+by default this list is 'ABCD'
+so that search terms match words from all sections.
+For example,
+here we search for a word both generally,
+and then looking only for specific weights:
+
+
+
+=# SELECT title, doc FROM tdocs WHERE vector @@ to_tsquery('spendid')
+      title      |                     doc                      
+-----------------+----------------------------------------------
+ Spendid Chamber | The walls are frozen rivers of orange stone.
+(1 row)
+=# SELECT title, doc FROM tdocs WHERE vector @@ to_tsquery('spendid:A')
+      title      |                     doc                      
+-----------------+----------------------------------------------
+ Spendid Chamber | The walls are frozen rivers of orange stone.
+(1 row)
+=# SELECT title, doc FROM tdocs WHERE vector @@ to_tsquery('spendid:D')
+ title | doc 
+-------+-----
+(0 rows)
+
+
+
+
+
+

+Our examples so far use tsearch2 to parse our documents into vectors.
+When your application needs absolute control over vector content,
+you will want to use direct type casting,
+which is described in the next section.
+
+
+Casting Vectors and Queries
+
+

+While tsearch2 has powerful and flexible ways
+to process documents and turn them into document vectors,
+you will sometimes want to parse documents on your own
+and place the results directly in vectors.
+Here we show you how.
+
+
+In the preceding examples,
+we used the to_tsvector() function
+when we needed a document's text reduced to a document vector.
+We saw that the function stripped whitespace and punctuation,
+eliminated common words,
+and altered suffixes to reduce words to a common form.
+While these operations are often desirable,
+and while in the sections below
+we will gain precise control over this process,
+there are occasions on which
+you want to avoid the changes that to_tsvector() makes to text
+and specify explicitly the words that you want in your vectors.
+Or you may want to create queries directly
+rather than through to_tsquery().
+
+For example,
+you may have already developed your own routine
+for reducing your documents to searchable lexemes,
+and do not want your carefully generated terms altered
+by passing them through to_tsvector().
+Or you might be developing and debugging parsing routines of your own
+that you are not ready to load into the database.
+In either case,
+you will find that direct insertion is easily accomplished
+if you simply follow some simple rules.
+
+Vectors are created directly
+when you cast a string of whitespace separated lexemes
+to the tsvector type:
+
+
+
+=# select 'the only exit is the way you came in'::tsvector
+                     tsvector                     
+--------------------------------------------------
+ 'in' 'is' 'the' 'way' 'you' 'came' 'exit' 'only'
+(1 row)
+
+
+
+Notice that the conversion interpreted the string
+simply as a list of lexemes to be included in the vector.
+Their order was lost,
+as was the number of times each lexeme appeared.
+You must keep in mind that directly creating vectors with casting
+is not an alternate means of parsing;
+it is a way of directly entering lexemes into a vector without parsing.
+
+Queries can also be created through casting,
+if you separate lexemes with boolean operators
+rather than with whitespace.
+When creating your own vectors and queries,
+remember that the search operator @@
+finds only exact matches between query lexemes and vector lexemes
+—
+if they are not exactly the same string,
+they will not be considered a match.
+
+To include lexeme positions in your vector,
+write the positions exactly the way tsearch2 displays them
+when it prints vectors:
+by following each lexeme with a colon
+and a comma-separated list of integer positions.
+If you list a lexeme more than once,
+then all the positions listed for it are combined into a single list.
+For example,
+here are two ways of writing the same vector,
+depending on whether you mention ‘the’ twice
+or combine its positions into a list yourself:
+
+
+
+=# select 'the:1 only:2 exit:3 is:4 the:5 way:6 you:7 came:8 in:9'::tsvector
+                              tsvector                              
+--------------------------------------------------------------------
+ 'in':9 'is':4 'the':1,5 'way':6 'you':7 'came':8 'exit':3 'only':2
+(1 row)
+=# select 'the:1,5 only:2 exit:3 is:4 way:6 you:7 came:8 in:9'::tsvector
+                              tsvector                              
+--------------------------------------------------------------------
+ 'in':9 'is':4 'the':1,5 'way':6 'you':7 'came':8 'exit':3 'only':2
+(1 row)
+
+
+
+Things can get slightly tricky
+if you want to include apostrophes, backslashes, or spaces
+inside your lexemes
+(wanting to include either of the latter would be unusual,
+but they can be included if you follow the rules).
+The main problem is that the apostrophe and backslash
+are important both to PostgreSQL when it is interpreting a string,
+and to the tsvector conversion function.
+You may want to review section
+1.1.2.1,
+“String Constants”
+in the PostgreSQL documentation before proceeding.
+
+When you cast strings directly into vectors:
+
+The string is interpreted as a whitespace-separated list of lexemes,
+ any of which can be suffixed with a colon and a list of positions.
+A lexeme can be quoted by preceding it with an apostrophe,
+ in which case it runs until the next apostrophe;
+ otherwise a lexeme ends with the first whitespace or colon encountered.
+Any character preceded by a backslash,
+ including whitespace, the apostrophe, the colon, and the backslash itself,
+ loses its normal meaning and is treated as a letter.
+ Backslashes are effective
+ both inside and outside of apostrophe-quoted lexemes.
+A lexeme can be suffixed with a list of positions
+ by appending a colon and a comma-separated list of integers,
+ each of which can itself be followed by a letter
+ to designate a position weight
+ (position weights are described below).
+
+
+Here are some example strings,
+showing the lexeme you want to insert
+together with the string that the ::tsvector operator
+needs to see,
+and how you would type that string at the PostgreSQL prompt:
+
+
+
+For the lexeme...
+you need the string...
+which you can type as:
+
+nugget
+nugget
+'nugget'
+
+won't
+won't
+'won''t'
+
+pinin'
+pinin'
+'pinin'''
+
+'bout
+\'bout
+'\\''bout'
+
+white mist
+white\ mist
+'white\\ mist'
+
+or:
+'white mist'
+'''white mist'''
+
+won't budge
+won\'t\ budge
+'won\\''t\\ budge'
+
+or:
+'won\'t budge'
+'''won\\''t budge'''
+
+back\slashed
+back\\slashed
+'back\\\\slashed'
+
+
+Remember to use the quoted quoting shown at the right
+only when typing in strings as part of a PostgreSQL query.
+If you are providing strings through a library
+that automatically quotes them
+or provides them in binary form to PostgreSQL,
+then you can use the strings in the middle instead —
+suitably quoted in the language you are using, of course.
+
+Position weights are described below
+and can be written exactly as they will be displayed
+when you select a weighted vector:
+
+
+=# select 'weighty:1,3A trivial:2B,4'::tsvector
+           tsvector            
+-------------------------------
+ 'trivial':2B,4 'weighty':1,3A
+(1 row)
+
+
+
+Note that if you are composing SQL queries
+in a scripting language like Perl or Python,
+that itself considers quotes and backslashes special,
+then you may have another quoting layer to deal with
+on top of the two layers already shown above.
+In such cases you may want to write a function
+that performs the necessary quoting for you.
+
+

+Having seen how to create vectors of your own,
+it is time to learn how the native tsearch2 parser
+reduces documents to vectors.
+
+
+Parsing and Lexing
+
+

+The previous section
+described how you can bypass the parser provided by tsearch2
+and populate your table of documents
+with vectors of your own devising.
+But for those interested in the native tsearch2 facilities,
+we present here an overview of how it goes about
+reducing documents to vectors.
+
+
+The to_tsvector() function reduces documents to vectors
+in two stages.
+First, a parser breaks the input document
+into short sequences of text called tokens.
+Each token is usually a word, space, or piece of punctuation,
+though some parsers return larger and more exotic items
+like HTML tags as single tokens.
+Each token returned by the parser
+is either discarded
+or passed to a dictionary that converts it into a lexeme.
+The resulting lexemes are collected into a vector and returned.
+
+The choice of which parser and dictionaries to_tsvector() should use
+is controlled by your choice of configuration.
+The tsearch2 module comes with several configurations,
+and you can define more of your own;
+in fact the creation of a new configuration is illustrated below,
+in the section on position weights.
+
+To learn about parsing in more detail,
+we will study this example:
+
+
+=# select to_tsvector('default',

+     'The walls extend upward for well over 100 feet.')
+                       to_tsvector                        
+----------------------------------------------------------
+ '100':8 'feet':9 'wall':2 'well':6 'extend':3 'upward':4
+(1 row)
+
+
+Unlike the to_tsvector() calls used in the above examples,
+this one specifies the 'default' configuration explicitly.
+When we called to_tsvector() in earlier examples
+with only one argument,
+it used the current configuration,
+which is chosen automatically based on your LOCALE
+if that locale is mentioned in the pg_ts_cfg table
+(which is shown under the first bullet in the description below).
+If your locale is not listed in the table,
+your attempts to use the current configuration will return:
+
+
+ERROR:  Can't find tsearch2 config by locale
+
+
+You can always change the current configuration manually
+by calling the set_curcfg() function
+described in the section on
+Configurations
+in the Reference.
+
+Each configuration serves as an index into two different tables:
+in pg_ts_cfg it determines
+which parser will break our text into tokens,
+and in pg_ts_cfgmap
+it directs each token to a dictionary for processing.
+The steps in detail are:
+
+
+
+First, our text is parsed,
+using the parser listed for our configuration in the pg_ts_cfg table.
+We are using the 'default' configuration,
+so the table tells us to use the 'default' parser:
+
+
+=# SELECT * FROM pg_ts_cfg WHERE ts_name = 'default'
+ ts_name | prs_name | locale 
+---------+----------+--------
+ default | default  | C
+(1 row)
+
+
+So our text will be parsed as though we had called:
+
+
+=# select * from parse('default',

+     'The walls extend upward for well over 100 feet.')
+
+
+This breaks the text into a list of tokens
+which are each labelled with an integer type:
+
+The₁♦_{12
+>walls₁♦_{12
+>extend₁♦_{12
+>upward₁♦_{12
+>for₁♦_{12
+>well₁♦_{12
+>over₁♦_{12
+>100₂₂♦_{12
+>feet₁.₁₂
+
+Each word has been assigned type 1;
+each space (represented here by a diamond) and the period, type 12;
+and the number one hundred, type 22.
+We can retrieve the alias for each type
+through the token_type function:
+
+
+=# select * from token_type('default')

+     where tokid = 1 or tokid = 12 or tokid = 22
+ tokid | alias |      descr       
+-------+-------+------------------
+     1 | lword | Latin word
+    12 | blank | Space symbols
+    22 | uint  | Unsigned integer
+(3 rows)
+
+
+
+
+Next, the tokens are assigned to dictionaries
+by looking up their type aliases in pg_ts_cfgmap
+to determine which dictionary should process each token.
+Since we are using the 'default' configuration:
+
+
+=# select * from pg_ts_cfgmap where ts_name = 'default' and

+      (tok_alias = 'lword' or tok_alias = 'blank' or tok_alias = 'uint')
+ ts_name | tok_alias | dict_name 
+---------+-----------+-----------
+ default | lword     | {en_stem}
+ default | uint      | {simple}
+(2 rows)
+
+
+Since this map provides no dictionary for blank tokens,
+the spaces and period are simply discarded,
+leaving nine tokens,
+which are then numbered by their position:
+
+The¹
+walls²
+extend³
+upward⁴
+for⁵
+well⁶
+over⁷
+100⁸
+feet⁹
+
+
+Finally, the words are reduced to lexemes by their respective dictionaries.
+The 100 is submitted to the simple dictionary,
+which returns tokens unaltered except for making them lowercase:
+
+
+=# select lexize('simple', '100')
+ lexize 
+--------
+ {100}
+(1 row)
+
+
+The other words are submitted to en_stem
+which reduces each English word to a linguistic stem,
+and then discards stems which belong to its list of stop words;
+you can see the list of stop words
+in the file whose path is in the dict_initoption field
+of the pg_ts_dict table entry for en_stem.
+The first three words of our text illustrate respectively
+an en_stem stop word,
+a word which en_stem alters by stemming,
+and a word which en_stem leaves alone:
+
+
+=# select lexize('en_stem', 'The')
+ lexize 
+--------
+ {}
+(1 row)
+=# select lexize('en_stem', 'walls')
+ lexize 
+--------
+ {wall}
+(1 row)
+=# select lexize('en_stem', 'extend')
+  lexize  
+----------
+ {extend}
+(1 row)
+
+
+Once en_stem is done discarding stop words and stemming the rest,
+we are left with:
+
+wall²
+extend³
+upward⁴
+well⁶
+100⁸
+feet⁹
+
+Which is precisely the result of the example that began this section.
+
+Query words are stemmed by the to_tsquery() function
+using the same scheme to determine the dictionary for each token,
+with the difference that the query parser recognizes as special
+the boolean operators that separate query words.
+
+
+
+
+}

diff --git a/contrib/tsearch2/docs/tsearch2-ref.html b/contrib/tsearch2/docs/tsearch2-ref.html

new file mode 100644 (file)

index 0000000..df0faa4


--- /dev/null
+++ b/contrib/tsearch2/docs/tsearch2-ref.html
@@ -0,0 +1,448 @@
+
+
+
+
+tsearch2 reference
+
+
+The tsearch2 Reference
+
+
+Brandon Craig Rhodes
30 June 2003
+
+This Reference documents the user types and functions
+of the tsearch2 module for PostgreSQL.
+An introduction to the module is provided
+by the tsearch2 Guide,
+a companion document to this one.
+You can retrieve a beta copy of the tsearch2 module from the
+GiST for PostgreSQL
+page — look under the section entitled Development History
+for the current version.
+
+Vectors and Queries
+
+Vectors and queries both store lexemes,
+but for different purposes.
+A tsvector stores the lexemes
+of the words that are parsed out of a document,
+and can also remember the position of each word.
+A tsquery specifies a boolean condition among lexemes.
+
+Any of the following functions with a configuration argument
+can use either an integer id or textual ts_name
+to select a configuration;
+if the option is omitted, then the current configuration is used.
+For more information on the current configuration,
+read the next section on Configurations.
+
+Vector Operations
+
+
+
+ to_tsvector( [configuration,]

+ document TEXT) RETURNS tsvector
+
+ Parses a document into tokens,
+ reduces the tokens to lexemes,
+ and returns a tsvector which lists the lexemes
+ together with their positions in the document.
+ For the best description of this process,
+ see the section on Parsing and Stemming
+ in the accompanying tsearch2 Guide.
+
+ strip(vector tsvector) RETURNS tsvector
+
+ Return a vector which lists the same lexemes
+ as the given vector,
+ but which lacks any information
+ about where in the document each lexeme appeared.
+ While the returned vector is thus useless for relevance ranking,
+ it will usually be much smaller.
+
+ setweight(vector tsvector, letter) RETURNS tsvector
+
+ This function returns a copy of the input vector
+ in which every location has been labelled
+ with either the letter
+ 'A', 'B', or 'C',
+ or the default label 'D'
+ (which is the default with which new vectors are created,
+ and as such is usually not displayed).
+ These labels are retained when vectors are concatenated,
+ allowing words from different parts of a document
+ to be weighted differently by ranking functions.
+
+ vector1 || vector2
+
+ concat(vector1 tsvector, vector2 tsvector)

+ RETURNS tsvector
+
+ Returns a vector which combines the lexemes and position information
+ in the two vectors given as arguments.
+ Position weight labels (described in the previous paragraph)
+ are retained intact during the concatenation.
+ This has at least two uses.
+ First,
+ if some sections of your document
+ need be parsed with different configurations than others,
+ you can parse them separately
+ and concatenate the resulting vectors into one.
+ Second,
+ you can weight words from some sections of you document
+ more heavily than those from others by:
+ parsing the sections into separate vectors;
+ assigning the vectors different position labels
+ with the setweight() function;
+ concatenating them into a single vector;
+ and then providing a weights argument
+ to the rank() function
+ that assigns different weights to positions with different labels.
+
+ tsvector_size(vector tsvector) RETURNS INT4
+
+ Returns the number of lexemes stored in the vector.
+
+ text::tsvector RETURNS tsvector
+
+ Directly casting text to a tsvector
+ allows you to directly inject lexemes into a vector,
+ with whatever positions and position weights you choose to specify.
+ The text should be formatted
+ like the vector would be printed by the output of a SELECT.
+ See the Casting
+ section in the Guide for details.
+
+
+Query Operations
+
+
+
+ to_tsquery( [configuration,]

+ querytext text) RETURNS tsvector
+
+ Parses a query,
+ which should be single words separated by the boolean operators
+ “&” and,
+ “|” or,
+ and “!” not,
+ which can be grouped using parenthesis.
+ Each word is reduced to a lexeme using the current
+ or specified configuration.
+
+
+ querytree(query tsquery) RETURNS text
+
+ This might return a textual representation of the given query.
+
+ text::tsquery RETURNS tsquery
+
+ Directly casting text to a tsquery
+ allows you to directly inject lexemes into a query,
+ with whatever positions and position weight flags you choose to specify.
+ The text should be formatted
+ like the query would be printed by the output of a SELECT.
+ See the Casting
+ section in the Guide for details.
+
+
+Configurations
+
+A configuration specifies all of the equipment necessary
+to transform a document into a tsvector:
+the parser that breaks its text into tokens,
+and the dictionaries which then transform each token into a lexeme.
+Every call to to_tsvector() (described above)
+uses a configuration to perform its processing.
+Three configurations come with tsearch2:
+
+
+default — Indexes words and numbers,
+ using the en_stem English Snowball stemmer for Latin-alphabet words
+ and the simple dictionary for all others.
+default_russian — Indexes words and numbers,
+ using the en_stem English Snowball stemmer for Latin-alphabet words
+ and the ru_stem Russian Snowball dictionary for all others.
+simple — Processes both words and numbers
+ with the simple dictionary,
+ which neither discards any stop words nor alters them.
+
+
+The tsearch2 modules initially chooses your current configuration
+by looking for your current locale in the locale field
+of the pg_ts_cfg table described below.
+You can manipulate the current configuration yourself with these functions:
+
+
+
+ set_curcfg( id INT | ts_name TEXT

+  ) RETURNS VOID
+
+ Set the current configuration used by to_tsvector
+ and to_tsquery.
+
+ show_curcfg() RETURNS INT4
+
+ Returns the integer id of the current configuration.
+
+
+
+Each configuration is defined by a record in the pg_ts_cfg table:
+
+create table pg_ts_cfg (
+   id      int not  null primary key,
+   ts_name     text not null,
+   prs_name    text not null,
+   locale      text
+);
+
+The id and ts_name are unique values
+which identify the configuration;
+the prs_name specifies which parser the configuration uses.
+Once this parser has split document text into tokens,
+the type of each resulting token —
+or, more specifically, the type's lex_alias
+as specified in the parser's lexem_type() table —
+is searched for together with the configuration's ts_name
+in the pg_ts_cfgmap table:
+
+create table pg_ts_cfgmap (
+   ts_name     text not null,
+   lex_alias   text not null,
+   dict_name   text[],
+   primary key (ts_name,lex_alias)
+);
+
+Those tokens whose types are not listed are discarded.
+The remaining tokens are assigned integer positions,
+starting with 1 for the first token in the document,
+and turned into lexemes with the help of the dictionaries
+whose names are given in the dict_name array for their type.
+These dictionaries are tried in order,
+stopping either with the first one to return a lexeme for the token,
+or discarding the token if no dictionary returns a lexeme for it.
+
+Parsers
+
+Each parser is defined by a record in the pg_ts_parser table:
+
+create table pg_ts_parser (
+   prs_id      int not null primary key,
+   prs_name    text not null,
+   prs_start   oid not null,
+   prs_getlexem    oid not null,
+   prs_end     oid not null,
+   prs_headline    oid not null,
+   prs_lextype oid not null,
+   prs_comment text
+);
+
+The prs_id and prs_name uniquely identify the parser,
+while prs_comment usually describes its name and version
+for the reference of users.
+The other items identify the low-level functions
+which make the parser operate,
+and are only of interest to someone writing a parser of their own.
+
+The tsearch2 module comes with one parser named default
+which is suitable for parsing most plain text and HTML documents.
+
+Each parser argument below
+must designate a parser with either an integer prs_id
+or a textual prs_name;
+the current parser is used when this argument is omitted.
+
+
+
+ CREATE FUNCTION set_curprs(parser) RETURNS VOID
+
+ Selects a current parser
+ which will be used when any of the following functions
+ are called without a parser as an argument.
+
+ CREATE FUNCTION lexem_type(

+  [ parser ]
+  ) RETURNS SETOF lexemtype
+
+ Returns a table which defines and describes
+ each kind of token the parser may produce as output.
+ For each token type the table gives the lexid
+ which the parser will label each token of that type,
+ the alias which names the token type,
+ and a short description descr for the user to read.
+
+ CREATE FUNCTION parse(

+  [ parser, ] document TEXT
+  ) RETURNS SETOF lexemtype
+
+ Parses the given document and returns a series of records,
+ one for each token produced by parsing.
+ Each token includes a lexid giving its type
+ and a lexem which gives its content.
+
+
+Dictionaries
+
+Dictionaries take textual tokens as input,
+usually those produced by a parser,
+and return lexemes which are usually some reduced form of the token.
+Among the dictionaries which come installed with tsearch2 are:
+
+
+simple simply folds uppercase letters to lowercase
+ before returning the word.
+en_stem runs an English Snowball stemmer on each word
+ that attempts to reduce the various forms of a verb or noun
+ to a single recognizable form.
+ru_stem runs a Russian Snowball stemmer on each word.
+
+
+Each dictionary is defined by an entry in the pg_ts_dict table:
+
+CREATE TABLE pg_ts_dict (
+   dict_id     int not null primary key,
+   dict_name   text not null,
+   dict_init   oid,
+   dict_initoption text,
+   dict_lemmatize  oid not null,
+   dict_comment    text
+);
+
+The dict_id and dict_name
+serve as unique identifiers for the dictionary.
+The meaning of the dict_initoption varies among dictionaries,
+but for the built-in Snowball dictionaries
+it specifies a file from which stop words should be read.
+The dict_comment is a human-readable description of the dictionary.
+The other fields are internal function identifiers
+useful only to developers trying to implement their own dictionaries.
+
+The argument named dictionary
+in each of the following functions
+should be either an integer dict_id or a textual dict_name
+identifying which dictionary should be used for the operation;
+if omitted then the current dictionary is used.
+
+
+
+ CREATE FUNCTION set_curdict(dictionary) RETURNS VOID
+
+ Selects a current dictionary for use by functions
+ that do not select a dictionary explicitly.
+
+ CREATE FUNCTION lexize(

+ [ dictionary, ] word text)
+ RETURNS TEXT[]
+
+ Reduces a single word to a lexeme.
+ Note that lexemes are arrays of zero or more strings,
+ since in some languages there might be several base words
+ from which an inflected form could arise.
+
+
+Ranking
+
+Ranking attempts to measure how relevant documents are to particular queries
+by inspecting the number of times each search word appears in the document,
+and whether different search terms occur near each other.
+Note that this information is only available in unstripped vectors —
+ranking functions will only return a useful result
+for a tsvector which still has position information!
+
+Both of these ranking functions
+take an integer normalization option
+that specifies whether a document's length should impact its rank.
+This is often desirable,
+since a hundred-word document with five instances of a search word
+is probably more relevant than a thousand-word document with five instances.
+The option can have the values:
+
+
+0 (the default) ignores document length.
+1 divides the rank by the logarithm of the length.
+2 divides the rank by the length itself.
+
+
+The two ranking functions currently available are:
+
+
+
+ CREATE FUNCTION rank(

+  [ weights float4[], ]
+  vector tsvector, query tsquery,
+  [ normalization int4 ]

+  ) RETURNS float4
+
+ This is the ranking function from the old version of OpenFTS,
+ and offers the ability to weight word instances more heavily
+ depending on how you have classified them.
+ The weights specify how heavily to weight each category of word:
+ 
+>{D-weight, A-weight, B-weight, C-weight}
+ If no weights are provided, then these defaults are used:
+ {0.1, 0.2, 0.4, 1.0}
+ Often weights are used to mark words from special areas of the document,
+ like the title or an initial abstract,
+ and make them more or less important than words in the document body.
+
+ CREATE FUNCTION rank_cd(

+  [ K int4, ]
+  vector tsvector, query tsquery,
+  [ normalization int4 ]

+  ) RETURNS float4
+
+ This function computes the cover density ranking
+ for the given document vector and query,
+ as described in Clarke, Cormack, and Tudhope's
+ “
+>Relevance Ranking for One to Three Term Queries”
+ in the 1999 Information Processing and Management.
+ The value K is one of the values from their formula,
+ and defaults to K=4.
+ The examples in their paper K=16;
+ we can roughly describe the term
+ as stating how far apart two search terms can fall
+ before the formula begins penalizing them for lack of proximity.
+
+
+Headlines
+
+
+
+ CREATE FUNCTION headline(

+  [ id int4, | ts_name text, ]
+  document text, query tsquery,
+  [ options text ]

+  ) RETURNS text
+
+ Every form of the the headline() function
+ accepts a document along with a query,
+ and returns one or more ellipse-separated excerpts from the document
+ in which terms from the query are highlighted.
+ The configuration with which to parse the document
+ can be specified by either its id or ts_name;
+ if none is specified that the current configuration is used instead.
+ 
+ An options string if provided should be a comma-separated list
+ of one or more ‘option=value’ pairs.
+ The available options are:
+ 
+  StartSel, StopSel —
+   the strings with which query words appearing in the document
+   should be delimited to distinguish them from other excerpted words.
+  MaxWords, MinWords —
+   limits on the shortest and longest headlines you will accept.
+  ShortWord —
+   this prevents your headline from beginning or ending
+   with a word which has this many characters or less.
+   The default value of 3 should eliminate most English
+   conjunctions and articles.
+ 
+ Any unspecified options receive these defaults:
+ 
+StartSel=<b>, StopSel=</b>, MaxWords=35, MinWords=15, ShortWord=3
+ 
+
+
+
+


diff --git a/contrib/tsearch2/expected/tsearch2.out b/contrib/tsearch2/expected/tsearch2.out

new file mode 100644 (file)

index 0000000..a842c5b


--- /dev/null
+++ b/contrib/tsearch2/expected/tsearch2.out
@@ -0,0 +1,2055 @@
+--
+-- first, define the datatype.  Turn off echoing so that expected file
+-- does not depend on contents of seg.sql.
+--
+\set ECHO none
+psql:tsearch2.sql:13: NOTICE:  CREATE TABLE / PRIMARY KEY will create implicit index 'pg_ts_dict_pkey' for table 'pg_ts_dict'
+psql:tsearch2.sql:145: NOTICE:  CREATE TABLE / PRIMARY KEY will create implicit index 'pg_ts_parser_pkey' for table 'pg_ts_parser'
+psql:tsearch2.sql:244: NOTICE:  CREATE TABLE / PRIMARY KEY will create implicit index 'pg_ts_cfg_pkey' for table 'pg_ts_cfg'
+psql:tsearch2.sql:251: NOTICE:  CREATE TABLE / PRIMARY KEY will create implicit index 'pg_ts_cfgmap_pkey' for table 'pg_ts_cfgmap'
+psql:tsearch2.sql:339: NOTICE:  ProcedureCreate: type tsvector is not yet defined
+psql:tsearch2.sql:344: NOTICE:  Argument type "tsvector" is only a shell
+psql:tsearch2.sql:398: NOTICE:  ProcedureCreate: type tsquery is not yet defined
+psql:tsearch2.sql:403: NOTICE:  Argument type "tsquery" is only a shell
+psql:tsearch2.sql:545: NOTICE:  ProcedureCreate: type gtsvector is not yet defined
+psql:tsearch2.sql:550: NOTICE:  Argument type "gtsvector" is only a shell
+--tsvector
+SELECT '1'::tsvector;
+ tsvector 
+----------
+ '1'
+(1 row)
+
+SELECT '1 '::tsvector;
+ tsvector 
+----------
+ '1'
+(1 row)
+
+SELECT ' 1'::tsvector;
+ tsvector 
+----------
+ '1'
+(1 row)
+
+SELECT ' 1 '::tsvector;
+ tsvector 
+----------
+ '1'
+(1 row)
+
+SELECT '1 2'::tsvector;
+ tsvector 
+----------
+ '1' '2'
+(1 row)
+
+SELECT '\'1 2\''::tsvector;
+ tsvector 
+----------
+ '1 2'
+(1 row)
+
+SELECT '\'1 \\\'2\''::tsvector;
+ tsvector 
+----------
+ '1 \'2'
+(1 row)
+
+SELECT '\'1 \\\'2\'3'::tsvector;
+  tsvector   
+-------------
+ '3' '1 \'2'
+(1 row)
+
+SELECT '\'1 \\\'2\' 3'::tsvector;
+  tsvector   
+-------------
+ '3' '1 \'2'
+(1 row)
+
+SELECT '\'1 \\\'2\' \' 3\' 4 '::tsvector;
+     tsvector     
+------------------
+ '4' ' 3' '1 \'2'
+(1 row)
+
+select '\'w\':4A,3B,2C,1D,5 a:8';
+       ?column?        
+-----------------------
+ 'w':4A,3B,2C,1D,5 a:8
+(1 row)
+
+select 'a:3A b:2a'::tsvector || 'ba:1234 a:1B';
+          ?column?          
+----------------------------
+ 'a':3A,4B 'b':2A 'ba':1237
+(1 row)
+
+select setweight('w:12B w:13* w:12,5,6 a:1,3* a:3 w asd:1dc asd zxc:81,567,222A'::tsvector, 'c');
+                        setweight                         
+----------------------------------------------------------
+ 'a':1C,3C 'w':5C,6C,12C,13C 'asd':1C 'zxc':81C,222C,567C
+(1 row)
+
+select strip('w:12B w:13* w:12,5,6 a:1,3* a:3 w asd:1dc asd'::tsvector);
+     strip     
+---------------
+ 'a' 'w' 'asd'
+(1 row)
+
+--tsquery
+SELECT '1'::tsquery;
+ tsquery 
+---------
+ '1'
+(1 row)
+
+SELECT '1 '::tsquery;
+ tsquery 
+---------
+ '1'
+(1 row)
+
+SELECT ' 1'::tsquery;
+ tsquery 
+---------
+ '1'
+(1 row)
+
+SELECT ' 1 '::tsquery;
+ tsquery 
+---------
+ '1'
+(1 row)
+
+SELECT '\'1 2\''::tsquery;
+ tsquery 
+---------
+ '1 2'
+(1 row)
+
+SELECT '\'1 \\\'2\''::tsquery;
+ tsquery 
+---------
+ '1 \'2'
+(1 row)
+
+SELECT '!1'::tsquery;
+ tsquery 
+---------
+ !'1'
+(1 row)
+
+SELECT '1|2'::tsquery;
+  tsquery  
+-----------
+ '1' | '2'
+(1 row)
+
+SELECT '1|!2'::tsquery;
+  tsquery   
+------------
+ '1' | !'2'
+(1 row)
+
+SELECT '!1|2'::tsquery;
+  tsquery   
+------------
+ !'1' | '2'
+(1 row)
+
+SELECT '!1|!2'::tsquery;
+   tsquery   
+-------------
+ !'1' | !'2'
+(1 row)
+
+SELECT '!(!1|!2)'::tsquery;
+     tsquery      
+------------------
+ !( !'1' | !'2' )
+(1 row)
+
+SELECT '!(!1|2)'::tsquery;
+     tsquery     
+-----------------
+ !( !'1' | '2' )
+(1 row)
+
+SELECT '!(1|!2)'::tsquery;
+     tsquery     
+-----------------
+ !( '1' | !'2' )
+(1 row)
+
+SELECT '!(1|2)'::tsquery;
+    tsquery     
+----------------
+ !( '1' | '2' )
+(1 row)
+
+SELECT '1&2'::tsquery;
+  tsquery  
+-----------
+ '1' & '2'
+(1 row)
+
+SELECT '!1&2'::tsquery;
+  tsquery   
+------------
+ !'1' & '2'
+(1 row)
+
+SELECT '1&!2'::tsquery;
+  tsquery   
+------------
+ '1' & !'2'
+(1 row)
+
+SELECT '!1&!2'::tsquery;
+   tsquery   
+-------------
+ !'1' & !'2'
+(1 row)
+
+SELECT '(1&2)'::tsquery;
+  tsquery  
+-----------
+ '1' & '2'
+(1 row)
+
+SELECT '1&(2)'::tsquery;
+  tsquery  
+-----------
+ '1' & '2'
+(1 row)
+
+SELECT '!(1)&2'::tsquery;
+  tsquery   
+------------
+ !'1' & '2'
+(1 row)
+
+SELECT '!(1&2)'::tsquery;
+    tsquery     
+----------------
+ !( '1' & '2' )
+(1 row)
+
+SELECT '1|2&3'::tsquery;
+     tsquery     
+-----------------
+ '1' | '2' & '3'
+(1 row)
+
+SELECT '1|(2&3)'::tsquery;
+     tsquery     
+-----------------
+ '1' | '2' & '3'
+(1 row)
+
+SELECT '(1|2)&3'::tsquery;
+       tsquery       
+---------------------
+ ( '1' | '2' ) & '3'
+(1 row)
+
+SELECT '1|2&!3'::tsquery;
+     tsquery      
+------------------
+ '1' | '2' & !'3'
+(1 row)
+
+SELECT '1|!2&3'::tsquery;
+     tsquery      
+------------------
+ '1' | !'2' & '3'
+(1 row)
+
+SELECT '!1|2&3'::tsquery;
+     tsquery      
+------------------
+ !'1' | '2' & '3'
+(1 row)
+
+SELECT '!1|(2&3)'::tsquery;
+     tsquery      
+------------------
+ !'1' | '2' & '3'
+(1 row)
+
+SELECT '!(1|2)&3'::tsquery;
+       tsquery        
+----------------------
+ !( '1' | '2' ) & '3'
+(1 row)
+
+SELECT '(!1|2)&3'::tsquery;
+       tsquery        
+----------------------
+ ( !'1' | '2' ) & '3'
+(1 row)
+
+SELECT '1|(2|(4|(5|6)))'::tsquery;
+                 tsquery                 
+-----------------------------------------
+ '1' | ( '2' | ( '4' | ( '5' | '6' ) ) )
+(1 row)
+
+SELECT '1|2|4|5|6'::tsquery;
+                 tsquery                 
+-----------------------------------------
+ ( ( ( '1' | '2' ) | '4' ) | '5' ) | '6'
+(1 row)
+
+SELECT '1&(2&(4&(5&6)))'::tsquery;
+           tsquery           
+-----------------------------
+ '1' & '2' & '4' & '5' & '6'
+(1 row)
+
+SELECT '1&2&4&5&6'::tsquery;
+           tsquery           
+-----------------------------
+ '1' & '2' & '4' & '5' & '6'
+(1 row)
+
+SELECT '1&(2&(4&(5|6)))'::tsquery;
+             tsquery             
+---------------------------------
+ '1' & '2' & '4' & ( '5' | '6' )
+(1 row)
+
+SELECT '1&(2&(4&(5|!6)))'::tsquery;
+             tsquery              
+----------------------------------
+ '1' & '2' & '4' & ( '5' | !'6' )
+(1 row)
+
+SELECT '1&(\'2\'&(\' 4\'&(\\|5 | \'6 \\\' !|&\')))'::tsquery;
+                 tsquery                  
+------------------------------------------
+ '1' & '2' & ' 4' & ( '|5' | '6 \' !|&' )
+(1 row)
+
+SELECT '\'the wether\':dc & \' sKies \':BC & a:d b:a';
+                 ?column?                 
+------------------------------------------
+ 'the wether':dc & ' sKies ':BC & a:d b:a
+(1 row)
+
+select lexize('simple', 'ASD56 hsdkf');
+     lexize      
+-----------------
+ {"asd56 hsdkf"}
+(1 row)
+
+select lexize('en_stem', 'SKIES Problems identity');
+          lexize          
+--------------------------
+ {"skies problems ident"}
+(1 row)
+
+select * from token_type('default');
+ tokid |    alias     |               descr               
+-------+--------------+-----------------------------------
+     1 | lword        | Latin word
+     2 | nlword       | Non-latin word
+     3 | word         | Word
+     4 | email        | Email
+     5 | url          | URL
+     6 | host         | Host
+     7 | sfloat       | Scientific notation
+     8 | version      | VERSION
+     9 | part_hword   | Part of hyphenated word
+    10 | nlpart_hword | Non-latin part of hyphenated word
+    11 | lpart_hword  | Latin part of hyphenated word
+    12 | blank        | Space symbols
+    13 | tag          | HTML Tag
+    14 | http         | HTTP head
+    15 | hword        | Hyphenated word
+    16 | lhword       | Latin hyphenated word
+    17 | nlhword      | Non-latin hyphenated word
+    18 | uri          | URI
+    19 | file         | File or path name
+    20 | float        | Decimal notation
+    21 | int          | Signed integer
+    22 | uint         | Unsigned integer
+    23 | entity       | HTML Entity
+(23 rows)
+
+select * from parse('default', '345 [email protected] \' http://www.com/ http://aew.werc.ewr/?ad=qwe&dw 1aew.werc.ewr/?ad=qwe&dw 2aew.werc.ewr http://3aew.werc.ewr/?ad=qwe&dw http://4aew.werc.ewr http://5aew.werc.ewr:8100/?  ad=qwe&dw 6aew.werc.ewr:8100/?ad=qwe&dw 7aew.werc.ewr:8100/?ad=qwe&dw=%20%32 +4.0e-10 qwe qwe qwqwe 234.435 455 5.005 [email protected] qwe-wer asdf qwer jf sdjk ewr1> ewri2 ">
+/usr/local/fff /awdf/dwqe/4325 rewt/ewr wefjn /wqe-324/ewr gist.h gist.h.c gist.c. readline 4.2 4.2. 4.2, readline-4.2 readline-4.2. 234 
+ wow  < jqw <> qwerty');
+ tokid |                token                 
+-------+--------------------------------------
+    22 | 345
+    12 |  
+     4 | [email protected]
+    12 |  
+    12 | '
+    12 |  
+    14 | http://
+     6 | www.com
+    12 | /
+    12 |  
+    14 | http://
+     5 | aew.werc.ewr/?ad=qwe&dw
+     6 | aew.werc.ewr
+    18 | /?ad=qwe&dw
+    12 |  
+     5 | 1aew.werc.ewr/?ad=qwe&dw
+     6 | 1aew.werc.ewr
+    18 | /?ad=qwe&dw
+    12 |  
+     6 | 2aew.werc.ewr
+    12 |  
+    14 | http://
+     5 | 3aew.werc.ewr/?ad=qwe&dw
+     6 | 3aew.werc.ewr
+    18 | /?ad=qwe&dw
+    12 |  
+    14 | http://
+     6 | 4aew.werc.ewr
+    12 |  
+    14 | http://
+     5 | 5aew.werc.ewr:8100/?
+     6 | 5aew.werc.ewr
+    18 | :8100/?
+    12 |   
+     1 | ad
+    12 | =
+     1 | qwe
+    12 | &
+     1 | dw
+    12 |  
+     5 | 6aew.werc.ewr:8100/?ad=qwe&dw
+     6 | 6aew.werc.ewr
+    18 | :8100/?ad=qwe&dw
+    12 |  
+     5 | 7aew.werc.ewr:8100/?ad=qwe&dw=%20%32
+     6 | 7aew.werc.ewr
+    18 | :8100/?ad=qwe&dw=%20%32
+    12 |  
+     7 | +4.0e-10
+    12 |  
+     1 | qwe
+    12 |  
+     1 | qwe
+    12 |  
+     1 | qwqwe
+    12 |  
+    20 | 234.435
+    12 |  
+    22 | 455
+    12 |  
+    20 | 5.005
+    12 |  
+     4 | [email protected]
+    12 |  
+    16 | qwe-wer
+    11 | qwe
+    12 | -
+    11 | wer
+    12 |  
+     1 | asdf
+    12 |  
+    13 |  
+     1 | qwer
+    12 |  
+     1 | jf
+    12 |  
+     1 | sdjk
+    13 |  
+    12 |  
+     3 | ewr1
+    12 | >
+    12 |  
+     3 | ewri2
+    12 |  
+    13 |  
+    12 | 
+
+    19 | /usr/local/fff
+    12 |  
+    19 | /awdf/dwqe/4325
+    12 |  
+    19 | rewt/ewr
+    12 |  
+     1 | wefjn
+    12 |  
+    19 | /wqe-324/ewr
+    12 |  
+     6 | gist.h
+    12 |  
+     6 | gist.h.c
+    12 |  
+     6 | gist.c
+    12 | .
+    12 |  
+     1 | readline
+    12 |  
+    20 | 4.2
+    12 |  
+    20 | 4.2
+    12 | .
+    12 |  
+    20 | 4.2
+    12 | ,
+    12 |  
+    15 | readline-4
+    11 | readline
+    12 | -
+    20 | 4.2
+    12 |  
+    15 | readline-4
+    11 | readline
+    12 | -
+    20 | 4.2
+    12 | .
+    12 |  
+    22 | 234
+    12 |  
+
+    13 |  
+    12 |  
+     1 | wow
+    12 |   
+    12 | <
+    12 |  
+     1 | jqw
+    12 |  
+    12 | <
+    12 | >
+    12 |  
+     1 | qwerty
+(138 rows)
+
+SELECT to_tsvector('default', '345 [email protected] \' http://www.com/ http://aew.werc.ewr/?ad=qwe&dw 1aew.werc.ewr/?ad=qwe&dw 2aew.werc.ewr http://3aew.werc.ewr/?ad=qwe&dw http://4aew.werc.ewr http://5aew.werc.ewr:8100/?  ad=qwe&dw 6aew.werc.ewr:8100/?ad=qwe&dw 7aew.werc.ewr:8100/?ad=qwe&dw=%20%32 +4.0e-10 qwe qwe qwqwe 234.435 455 5.005 [email protected] qwe-wer asdf qwer jf sdjk ewr1> ewri2 ">
+/usr/local/fff /awdf/dwqe/4325 rewt/ewr wefjn /wqe-324/ewr gist.h gist.h.c gist.c. readline 4.2 4.2. 4.2, readline-4.2 readline-4.2. 234 
+ wow  < jqw <> qwerty');
+                                                                                                                                                                                                                                                                                                                                                                                                                                               to_tsvector                                                                                                                                                                                                                                                                                                                                                                                                                                                
+----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
+ 'ad':18 'dw':20 'jf':40 '234':62 '345':1 '4.2':53,54,55,58,61 '455':32 'jqw':64 'qwe':19,28,29,36 'wer':37 'wow':63 'asdf':38 'ewr1':42 'qwer':39 'sdjk':41 '5.005':33 'ewri2':43 'qwqwe':30 'wefjn':47 'gist.c':51 'gist.h':49 'qwerti':65 '234.435':31 ':8100/?':17 'qwe-wer':35 'readlin':52,57,60 'www.com':3 '+4.0e-10':27 'gist.h.c':50 'rewt/ewr':46 '[email protected]':2 'readline-4':56,59 '/?ad=qwe&dw':6,9,13 '/wqe-324/ewr':48 'aew.werc.ewr':5 '1aew.werc.ewr':8 '2aew.werc.ewr':10 '3aew.werc.ewr':12 '4aew.werc.ewr':14 '5aew.werc.ewr':16 '6aew.werc.ewr':22 '7aew.werc.ewr':25 '/usr/local/fff':44 '/awdf/dwqe/4325':45 ':8100/?ad=qwe&dw':23 '[email protected]':34 '5aew.werc.ewr:8100/?':15 ':8100/?ad=qwe&dw=%20%32':26 'aew.werc.ewr/?ad=qwe&dw':4 '1aew.werc.ewr/?ad=qwe&dw':7 '3aew.werc.ewr/?ad=qwe&dw':11 '6aew.werc.ewr:8100/?ad=qwe&dw':21 '7aew.werc.ewr:8100/?ad=qwe&dw=%20%32':24
+(1 row)
+
+SELECT length(to_tsvector('default', '345 qw'));
+ length 
+--------
+      2
+(1 row)
+
+SELECT length(to_tsvector('default', '345 [email protected] \' http://www.com/ http://aew.werc.ewr/?ad=qwe&dw 1aew.werc.ewr/?ad=qwe&dw 2aew.werc.ewr http://3aew.werc.ewr/?ad=qwe&dw http://4aew.werc.ewr http://5aew.werc.ewr:8100/?  ad=qwe&dw 6aew.werc.ewr:8100/?ad=qwe&dw 7aew.werc.ewr:8100/?ad=qwe&dw=%20%32 +4.0e-10 qwe qwe qwqwe 234.435 455 5.005 [email protected] qwe-wer asdf qwer jf sdjk ewr1> ewri2 ">
+/usr/local/fff /awdf/dwqe/4325 rewt/ewr wefjn /wqe-324/ewr gist.h gist.h.c gist.c. readline 4.2 4.2. 4.2, readline-4.2 readline-4.2. 234 
+ wow  < jqw <> qwerty'));
+ length 
+--------
+     53
+(1 row)
+
+select to_tsquery('default', 'qwe & sKies '); 
+  to_tsquery   
+---------------
+ 'qwe' & 'sky'
+(1 row)
+
+select to_tsquery('simple', 'qwe & sKies '); 
+   to_tsquery    
+-----------------
+ 'qwe' & 'skies'
+(1 row)
+
+select to_tsquery('default', '\'the wether\':dc & \'           sKies \':BC ');
+       to_tsquery       
+------------------------
+ 'wether':CD & 'sky':BC
+(1 row)
+
+select 'a b:89  ca:23A,64b d:34c'::tsvector @@ 'd:AC & ca';
+ ?column? 
+----------
+ t
+(1 row)
+
+select 'a b:89  ca:23A,64b d:34c'::tsvector @@ 'd:AC & ca:B';
+ ?column? 
+----------
+ t
+(1 row)
+
+select 'a b:89  ca:23A,64b d:34c'::tsvector @@ 'd:AC & ca:A';
+ ?column? 
+----------
+ t
+(1 row)
+
+select 'a b:89  ca:23A,64b d:34c'::tsvector @@ 'd:AC & ca:C';
+ ?column? 
+----------
+ f
+(1 row)
+
+select 'a b:89  ca:23A,64b d:34c'::tsvector @@ 'd:AC & ca:CB';
+ ?column? 
+----------
+ t
+(1 row)
+
+CREATE TABLE test_tsvector( t text, a tsvector );
+\copy test_tsvector from 'data/test_tsearch.data'
+SELECT count(*) FROM test_tsvector WHERE a @@ 'wr|qh';
+ count 
+-------
+   158
+(1 row)
+
+SELECT count(*) FROM test_tsvector WHERE a @@ 'wr&qh';
+ count 
+-------
+    17
+(1 row)
+
+SELECT count(*) FROM test_tsvector WHERE a @@ 'eq&yt';
+ count 
+-------
+     6
+(1 row)
+
+SELECT count(*) FROM test_tsvector WHERE a @@ 'eq|yt';
+ count 
+-------
+    98
+(1 row)
+
+SELECT count(*) FROM test_tsvector WHERE a @@ '(eq&yt)|(wr&qh)';
+ count 
+-------
+    23
+(1 row)
+
+SELECT count(*) FROM test_tsvector WHERE a @@ '(eq|yt)&(wr|qh)';
+ count 
+-------
+    39
+(1 row)
+
+create index wowidx on test_tsvector using gist (a);
+set enable_seqscan=off;
+SELECT count(*) FROM test_tsvector WHERE a @@ 'wr|qh';
+ count 
+-------
+   158
+(1 row)
+
+SELECT count(*) FROM test_tsvector WHERE a @@ 'wr&qh';
+ count 
+-------
+    17
+(1 row)
+
+SELECT count(*) FROM test_tsvector WHERE a @@ 'eq&yt';
+ count 
+-------
+     6
+(1 row)
+
+SELECT count(*) FROM test_tsvector WHERE a @@ 'eq|yt';
+ count 
+-------
+    98
+(1 row)
+
+SELECT count(*) FROM test_tsvector WHERE a @@ '(eq&yt)|(wr&qh)';
+ count 
+-------
+    23
+(1 row)
+
+SELECT count(*) FROM test_tsvector WHERE a @@ '(eq|yt)&(wr|qh)';
+ count 
+-------
+    39
+(1 row)
+
+select set_curcfg('default');
+ set_curcfg 
+------------
+ 
+(1 row)
+
+CREATE TRIGGER tsvectorupdate
+BEFORE UPDATE OR INSERT ON test_tsvector
+FOR EACH ROW EXECUTE PROCEDURE tsearch2(a, t);
+SELECT count(*) FROM test_tsvector WHERE a @@ to_tsquery('345&qwerty');
+ count 
+-------
+     0
+(1 row)
+
+INSERT INTO test_tsvector (t) VALUES ('345 qwerty');
+SELECT count(*) FROM test_tsvector WHERE a @@ to_tsquery('345&qwerty');
+ count 
+-------
+     1
+(1 row)
+
+UPDATE test_tsvector SET t = null WHERE t = '345 qwerty';
+SELECT count(*) FROM test_tsvector WHERE a @@ to_tsquery('345&qwerty');
+ count 
+-------
+     0
+(1 row)
+
+drop trigger tsvectorupdate on test_tsvector;
+create function wow(text) returns text as 'select $1 || \' copyright\'; ' language sql;
+create trigger tsvectorupdate before update or insert on test_tsvector
+for each row execute procedure tsearch2(a, wow, t);
+insert into test_tsvector (t) values ('345 qwerty');
+select count(*) FROM test_tsvector WHERE a @@ to_tsquery('345&qwerty');
+ count 
+-------
+     1
+(1 row)
+
+select count(*) FROM test_tsvector WHERE a @@ to_tsquery('copyright');
+ count 
+-------
+     1
+(1 row)
+
+select rank(' a:1 s:2C d g'::tsvector, 'a | s');
+ rank 
+------
+ 0.28
+(1 row)
+
+select rank(' a:1 s:2B d g'::tsvector, 'a | s');
+ rank 
+------
+ 0.46
+(1 row)
+
+select rank(' a:1 s:2 d g'::tsvector, 'a | s');
+ rank 
+------
+ 0.19
+(1 row)
+
+select rank(' a:1 s:2C d g'::tsvector, 'a & s');
+   rank   
+----------
+ 0.140153
+(1 row)
+
+select rank(' a:1 s:2B d g'::tsvector, 'a & s');
+   rank   
+----------
+ 0.198206
+(1 row)
+
+select rank(' a:1 s:2 d g'::tsvector, 'a & s');
+   rank    
+-----------
+ 0.0991032
+(1 row)
+
+insert into test_tsvector (t) values ('foo bar foo the over foo qq bar');
+select * from stat('select a from test_tsvector') order by ndoc desc, nentry desc, word;
+   word    | ndoc | nentry 
+-----------+------+--------
+ qq        |  109 |    109
+ qt        |  102 |    102
+ qe        |  100 |    100
+ qh        |   98 |     98
+ qw        |   98 |     98
+ qa        |   97 |     97
+ ql        |   94 |     94
+ qs        |   94 |     94
+ qi        |   92 |     92
+ qr        |   92 |     92
+ qj        |   91 |     91
+ qd        |   87 |     87
+ qz        |   87 |     87
+ qc        |   86 |     86
+ qn        |   86 |     86
+ qv        |   85 |     85
+ qo        |   84 |     84
+ qy        |   84 |     84
+ wp        |   84 |     84
+ qf        |   81 |     81
+ qk        |   80 |     80
+ wt        |   80 |     80
+ qu        |   79 |     79
+ qg        |   78 |     78
+ wb        |   78 |     78
+ qx        |   77 |     77
+ wr        |   77 |     77
+ ws        |   73 |     73
+ wy        |   73 |     73
+ wa        |   72 |     72
+ wf        |   70 |     70
+ wg        |   70 |     70
+ wi        |   70 |     70
+ wu        |   70 |     70
+ wc        |   69 |     69
+ wj        |   69 |     69
+ qp        |   68 |     68
+ wh        |   68 |     68
+ wv        |   68 |     68
+ qb        |   66 |     66
+ eu        |   65 |     65
+ we        |   65 |     65
+ wl        |   65 |     65
+ wq        |   65 |     65
+ wk        |   64 |     64
+ ee        |   63 |     63
+ eo        |   63 |     63
+ qm        |   63 |     63
+ wn        |   63 |     63
+ ef        |   62 |     62
+ eh        |   62 |     62
+ ex        |   62 |     62
+ re        |   62 |     62
+ rl        |   62 |     62
+ rr        |   62 |     62
+ eb        |   61 |     61
+ ek        |   61 |     61
+ ww        |   61 |     61
+ ea        |   60 |     60
+ ei        |   60 |     60
+ em        |   60 |     60
+ eq        |   60 |     60
+ ew        |   60 |     60
+ ro        |   60 |     60
+ rw        |   60 |     60
+ tl        |   60 |     60
+ eg        |   59 |     59
+ en        |   59 |     59
+ ez        |   59 |     59
+ rj        |   59 |     59
+ ry        |   59 |     59
+ tw        |   59 |     59
+ tx        |   59 |     59
+ ej        |   58 |     58
+ es        |   58 |     58
+ ra        |   58 |     58
+ rd        |   58 |     58
+ rg        |   58 |     58
+ rx        |   58 |     58
+ tb        |   58 |     58
+ wd        |   58 |     58
+ ed        |   57 |     57
+ tc        |   57 |     57
+ wx        |   57 |     57
+ er        |   56 |     56
+ wm        |   56 |     56
+ wo        |   56 |     56
+ yw        |   56 |     56
+ ep        |   55 |     55
+ rk        |   55 |     55
+ rp        |   55 |     55
+ rz        |   55 |     55
+ ta        |   55 |     55
+ rq        |   54 |     54
+ yn        |   54 |     54
+ ec        |   53 |     53
+ el        |   53 |     53
+ ru        |   53 |     53
+ rv        |   53 |     53
+ tz        |   53 |     53
+ un        |   53 |     53
+ wz        |   53 |     53
+ ys        |   53 |     53
+ oe        |   52 |     52
+ tn        |   52 |     52
+ tq        |   52 |     52
+ ty        |   52 |     52
+ uq        |   52 |     52
+ yg        |   52 |     52
+ ym        |   52 |     52
+ oi        |   51 |     51
+ to        |   51 |     51
+ yi        |   51 |     51
+ pn        |   50 |     50
+ rb        |   50 |     50
+ ri        |   50 |     50
+ rn        |   50 |     50
+ ti        |   50 |     50
+ tv        |   50 |     50
+ um        |   50 |     50
+ ut        |   50 |     50
+ ya        |   50 |     50
+ et        |   49 |     49
+ ix        |   49 |     49
+ ox        |   49 |     49
+ q3        |   49 |     49
+ yf        |   49 |     49
+ yl        |   49 |     49
+ yo        |   49 |     49
+ yr        |   49 |     49
+ ev        |   48 |     48
+ ey        |   48 |     48
+ ot        |   48 |     48
+ rc        |   48 |     48
+ rm        |   48 |     48
+ th        |   48 |     48
+ uo        |   48 |     48
+ ia        |   47 |     47
+ q1        |   47 |     47
+ rh        |   47 |     47
+ yq        |   47 |     47
+ yz        |   47 |     47
+ av        |   46 |     46
+ im        |   46 |     46
+ os        |   46 |     46
+ tk        |   46 |     46
+ yy        |   46 |     46
+ ir        |   45 |     45
+ iv        |   45 |     45
+ iw        |   45 |     45
+ oj        |   45 |     45
+ pl        |   45 |     45
+ pv        |   45 |     45
+ te        |   45 |     45
+ tu        |   45 |     45
+ uv        |   45 |     45
+ ux        |   45 |     45
+ yd        |   45 |     45
+ yx        |   45 |     45
+ ij        |   44 |     44
+ pa        |   44 |     44
+ se        |   44 |     44
+ tg        |   44 |     44
+ ue        |   44 |     44
+ yb        |   44 |     44
+ yt        |   44 |     44
+ if        |   43 |     43
+ ik        |   43 |     43
+ in        |   43 |     43
+ ph        |   43 |     43
+ pj        |   43 |     43
+ q5        |   43 |     43
+ rt        |   43 |     43
+ ub        |   43 |     43
+ ud        |   43 |     43
+ uh        |   43 |     43
+ uj        |   43 |     43
+ w7        |   43 |     43
+ ye        |   43 |     43
+ yv        |   43 |     43
+ db        |   42 |     42
+ do        |   42 |     42
+ id        |   42 |     42
+ ie        |   42 |     42
+ ii        |   42 |     42
+ of        |   42 |     42
+ pr        |   42 |     42
+ q4        |   42 |     42
+ rf        |   42 |     42
+ td        |   42 |     42
+ uk        |   42 |     42
+ up        |   42 |     42
+ yh        |   42 |     42
+ yk        |   42 |     42
+ io        |   41 |     41
+ it        |   41 |     41
+ pb        |   41 |     41
+ q0        |   41 |     41
+ q7        |   41 |     41
+ rs        |   41 |     41
+ tj        |   41 |     41
+ ur        |   41 |     41
+ ig        |   40 |     40
+ iu        |   40 |     40
+ iy        |   40 |     40
+ od        |   40 |     40
+ q6        |   40 |     40
+ tt        |   40 |     40
+ ug        |   40 |     40
+ ul        |   40 |     40
+ us        |   40 |     40
+ uu        |   40 |     40
+ uz        |   40 |     40
+ ah        |   39 |     39
+ ar        |   39 |     39
+ as        |   39 |     39
+ dl        |   39 |     39
+ dt        |   39 |     39
+ hk        |   39 |     39
+ iq        |   39 |     39
+ is        |   39 |     39
+ oc        |   39 |     39
+ ov        |   39 |     39
+ oy        |   39 |     39
+ uf        |   39 |     39
+ ui        |   39 |     39
+ aa        |   38 |     38
+ ad        |   38 |     38
+ fh        |   38 |     38
+ gm        |   38 |     38
+ ic        |   38 |     38
+ jd        |   38 |     38
+ om        |   38 |     38
+ or        |   38 |     38
+ oz        |   38 |     38
+ pm        |   38 |     38
+ q8        |   38 |     38
+ sf        |   38 |     38
+ sm        |   38 |     38
+ sv        |   38 |     38
+ uc        |   38 |     38
+ ak        |   37 |     37
+ aq        |   37 |     37
+ di        |   37 |     37
+ e4        |   37 |     37
+ fi        |   37 |     37
+ fx        |   37 |     37
+ ha        |   37 |     37
+ hp        |   37 |     37
+ ih        |   37 |     37
+ og        |   37 |     37
+ po        |   37 |     37
+ pw        |   37 |     37
+ sn        |   37 |     37
+ su        |   37 |     37
+ sw        |   37 |     37
+ w6        |   37 |     37
+ yj        |   37 |     37
+ yu        |   37 |     37
+ ag        |   36 |     36
+ am        |   36 |     36
+ at        |   36 |     36
+ e1        |   36 |     36
+ ff        |   36 |     36
+ gx        |   36 |     36
+ he        |   36 |     36
+ hj        |   36 |     36
+ ib        |   36 |     36
+ iz        |   36 |     36
+ lm        |   36 |     36
+ ok        |   36 |     36
+ pk        |   36 |     36
+ pp        |   36 |     36
+ pu        |   36 |     36
+ sp        |   36 |     36
+ tf        |   36 |     36
+ tm        |   36 |     36
+ ay        |   35 |     35
+ dy        |   35 |     35
+ fu        |   35 |     35
+ ku        |   35 |     35
+ lh        |   35 |     35
+ lq        |   35 |     35
+ o6        |   35 |     35
+ ob        |   35 |     35
+ on        |   35 |     35
+ op        |   35 |     35
+ pd        |   35 |     35
+ ps        |   35 |     35
+ si        |   35 |     35
+ sl        |   35 |     35
+ sx        |   35 |     35
+ tp        |   35 |     35
+ tr        |   35 |     35
+ w3        |   35 |     35
+ y1        |   35 |     35
+ al        |   34 |     34
+ ap        |   34 |     34
+ az        |   34 |     34
+ dc        |   34 |     34
+ dd        |   34 |     34
+ dz        |   34 |     34
+ e0        |   34 |     34
+ fj        |   34 |     34
+ fp        |   34 |     34
+ gd        |   34 |     34
+ gg        |   34 |     34
+ gk        |   34 |     34
+ go        |   34 |     34
+ ho        |   34 |     34
+ jc        |   34 |     34
+ oa        |   34 |     34
+ oh        |   34 |     34
+ oo        |   34 |     34
+ pe        |   34 |     34
+ px        |   34 |     34
+ sd        |   34 |     34
+ sq        |   34 |     34
+ sy        |   34 |     34
+ ab        |   33 |     33
+ ae        |   33 |     33
+ af        |   33 |     33
+ aw        |   33 |     33
+ e5        |   33 |     33
+ fk        |   33 |     33
+ gu        |   33 |     33
+ gy        |   33 |     33
+ hb        |   33 |     33
+ hm        |   33 |     33
+ hy        |   33 |     33
+ jl        |   33 |     33
+ jr        |   33 |     33
+ ls        |   33 |     33
+ oq        |   33 |     33
+ pt        |   33 |     33
+ sa        |   33 |     33
+ sh        |   33 |     33
+ sj        |   33 |     33
+ so        |   33 |     33
+ sz        |   33 |     33
+ t7        |   33 |     33
+ uw        |   33 |     33
+ w8        |   33 |     33
+ y0        |   33 |     33
+ yp        |   33 |     33
+ dh        |   32 |     32
+ dp        |   32 |     32
+ dq        |   32 |     32
+ e7        |   32 |     32
+ fn        |   32 |     32
+ fo        |   32 |     32
+ fr        |   32 |     32
+ ga        |   32 |     32
+ gq        |   32 |     32
+ hh        |   32 |     32
+ il        |   32 |     32
+ ip        |   32 |     32
+ jv        |   32 |     32
+ lc        |   32 |     32
+ ol        |   32 |     32
+ pc        |   32 |     32
+ q9        |   32 |     32
+ ds        |   31 |     31
+ e9        |   31 |     31
+ fd        |   31 |     31
+ fe        |   31 |     31
+ ft        |   31 |     31
+ gs        |   31 |     31
+ hl        |   31 |     31
+ hs        |   31 |     31
+ jb        |   31 |     31
+ kc        |   31 |     31
+ kw        |   31 |     31
+ mj        |   31 |     31
+ q2        |   31 |     31
+ r3        |   31 |     31
+ sb        |   31 |     31
+ sk        |   31 |     31
+ ts        |   31 |     31
+ ua        |   31 |     31
+ yc        |   31 |     31
+ zw        |   31 |     31
+ ao        |   30 |     30
+ du        |   30 |     30
+ fw        |   30 |     30
+ gj        |   30 |     30
+ hu        |   30 |     30
+ kh        |   30 |     30
+ kl        |   30 |     30
+ kv        |   30 |     30
+ ld        |   30 |     30
+ lf        |   30 |     30
+ pq        |   30 |     30
+ py        |   30 |     30
+ sc        |   30 |     30
+ sr        |   30 |     30
+ uy        |   30 |     30
+ vg        |   30 |     30
+ w2        |   30 |     30
+ xg        |   30 |     30
+ xo        |   30 |     30
+ au        |   29 |     29
+ cx        |   29 |     29
+ fv        |   29 |     29
+ gh        |   29 |     29
+ gl        |   29 |     29
+ gt        |   29 |     29
+ hw        |   29 |     29
+ ji        |   29 |     29
+ km        |   29 |     29
+ la        |   29 |     29
+ ou        |   29 |     29
+ r0        |   29 |     29
+ w0        |   29 |     29
+ y9        |   29 |     29
+ zm        |   29 |     29
+ zs        |   29 |     29
+ zy        |   29 |     29
+ ax        |   28 |     28
+ cd        |   28 |     28
+ dj        |   28 |     28
+ dn        |   28 |     28
+ dr        |   28 |     28
+ ht        |   28 |     28
+ jf        |   28 |     28
+ lo        |   28 |     28
+ lr        |   28 |     28
+ na        |   28 |     28
+ ng        |   28 |     28
+ r8        |   28 |     28
+ ss        |   28 |     28
+ xt        |   28 |     28
+ y6        |   28 |     28
+ aj        |   27 |     27
+ ca        |   27 |     27
+ cg        |   27 |     27
+ df        |   27 |     27
+ dg        |   27 |     27
+ dv        |   27 |     27
+ gc        |   27 |     27
+ gn        |   27 |     27
+ gr        |   27 |     27
+ hd        |   27 |     27
+ i8        |   27 |     27
+ jn        |   27 |     27
+ jt        |   27 |     27
+ lp        |   27 |     27
+ o9        |   27 |     27
+ ow        |   27 |     27
+ r9        |   27 |     27
+ t8        |   27 |     27
+ u5        |   27 |     27
+ w4        |   27 |     27
+ xm        |   27 |     27
+ zz        |   27 |     27
+ a2        |   26 |     26
+ ac        |   26 |     26
+ ai        |   26 |     26
+ cm        |   26 |     26
+ cu        |   26 |     26
+ cw        |   26 |     26
+ dk        |   26 |     26
+ e2        |   26 |     26
+ fc        |   26 |     26
+ fg        |   26 |     26
+ fl        |   26 |     26
+ fs        |   26 |     26
+ ge        |   26 |     26
+ gv        |   26 |     26
+ hc        |   26 |     26
+ hi        |   26 |     26
+ hx        |   26 |     26
+ jj        |   26 |     26
+ jm        |   26 |     26
+ kg        |   26 |     26
+ kk        |   26 |     26
+ kn        |   26 |     26
+ ko        |   26 |     26
+ kt        |   26 |     26
+ ln        |   26 |     26
+ mx        |   26 |     26
+ pg        |   26 |     26
+ r4        |   26 |     26
+ t6        |   26 |     26
+ u1        |   26 |     26
+ u4        |   26 |     26
+ vi        |   26 |     26
+ vr        |   26 |     26
+ w1        |   26 |     26
+ w9        |   26 |     26
+ xk        |   26 |     26
+ xs        |   26 |     26
+ zf        |   26 |     26
+ bb        |   25 |     25
+ dm        |   25 |     25
+ dw        |   25 |     25
+ e8        |   25 |     25
+ fb        |   25 |     25
+ gw        |   25 |     25
+ h8        |   25 |     25
+ hf        |   25 |     25
+ hg        |   25 |     25
+ hn        |   25 |     25
+ hv        |   25 |     25
+ i0        |   25 |     25
+ i3        |   25 |     25
+ jg        |   25 |     25
+ jo        |   25 |     25
+ jx        |   25 |     25
+ kq        |   25 |     25
+ lw        |   25 |     25
+ lx        |   25 |     25
+ o3        |   25 |     25
+ p7        |   25 |     25
+ pf        |   25 |     25
+ pi        |   25 |     25
+ pz        |   25 |     25
+ r2        |   25 |     25
+ r5        |   25 |     25
+ t9        |   25 |     25
+ u7        |   25 |     25
+ ve        |   25 |     25
+ vu        |   25 |     25
+ y5        |   25 |     25
+ y8        |   25 |     25
+ zt        |   25 |     25
+ an        |   24 |     24
+ bj        |   24 |     24
+ dx        |   24 |     24
+ fm        |   24 |     24
+ fz        |   24 |     24
+ gb        |   24 |     24
+ gi        |   24 |     24
+ gp        |   24 |     24
+ hr        |   24 |     24
+ hz        |   24 |     24
+ i5        |   24 |     24
+ jq        |   24 |     24
+ kb        |   24 |     24
+ ke        |   24 |     24
+ kf        |   24 |     24
+ kp        |   24 |     24
+ lv        |   24 |     24
+ lz        |   24 |     24
+ o8        |   24 |     24
+ r1        |   24 |     24
+ s7        |   24 |     24
+ sg        |   24 |     24
+ u3        |   24 |     24
+ vj        |   24 |     24
+ vt        |   24 |     24
+ w5        |   24 |     24
+ zj        |   24 |     24
+ be        |   23 |     23
+ bi        |   23 |     23
+ bn        |   23 |     23
+ cn        |   23 |     23
+ cy        |   23 |     23
+ da        |   23 |     23
+ e6        |   23 |     23
+ fa        |   23 |     23
+ js        |   23 |     23
+ ki        |   23 |     23
+ kz        |   23 |     23
+ li        |   23 |     23
+ mt        |   23 |     23
+ mz        |   23 |     23
+ nu        |   23 |     23
+ o2        |   23 |     23
+ p5        |   23 |     23
+ p8        |   23 |     23
+ r7        |   23 |     23
+ t0        |   23 |     23
+ t1        |   23 |     23
+ t3        |   23 |     23
+ vm        |   23 |     23
+ xh        |   23 |     23
+ xx        |   23 |     23
+ zp        |   23 |     23
+ zr        |   23 |     23
+ a3        |   22 |     22
+ bg        |   22 |     22
+ de        |   22 |     22
+ e3        |   22 |     22
+ fq        |   22 |     22
+ i2        |   22 |     22
+ i7        |   22 |     22
+ ja        |   22 |     22
+ jk        |   22 |     22
+ jy        |   22 |     22
+ kr        |   22 |     22
+ kx        |   22 |     22
+ ly        |   22 |     22
+ nb        |   22 |     22
+ nh        |   22 |     22
+ ns        |   22 |     22
+ s3        |   22 |     22
+ u2        |   22 |     22
+ vn        |   22 |     22
+ xe        |   22 |     22
+ y4        |   22 |     22
+ zh        |   22 |     22
+ zo        |   22 |     22
+ zq        |   22 |     22
+ a1        |   21 |     21
+ bl        |   21 |     21
+ bo        |   21 |     21
+ cb        |   21 |     21
+ ch        |   21 |     21
+ co        |   21 |     21
+ cq        |   21 |     21
+ cv        |   21 |     21
+ d7        |   21 |     21
+ g8        |   21 |     21
+ je        |   21 |     21
+ jp        |   21 |     21
+ jz        |   21 |     21
+ lg        |   21 |     21
+ me        |   21 |     21
+ nc        |   21 |     21
+ p4        |   21 |     21
+ st        |   21 |     21
+ vb        |   21 |     21
+ vw        |   21 |     21
+ vz        |   21 |     21
+ xj        |   21 |     21
+ xq        |   21 |     21
+ xu        |   21 |     21
+ xy        |   21 |     21
+ zb        |   21 |     21
+ bv        |   20 |     20
+ bz        |   20 |     20
+ cj        |   20 |     20
+ cp        |   20 |     20
+ cs        |   20 |     20
+ d8        |   20 |     20
+ ju        |   20 |     20
+ k0        |   20 |     20
+ ks        |   20 |     20
+ ky        |   20 |     20
+ l1        |   20 |     20
+ lb        |   20 |     20
+ lj        |   20 |     20
+ lu        |   20 |     20
+ nm        |   20 |     20
+ nw        |   20 |     20
+ nz        |   20 |     20
+ o7        |   20 |     20
+ p6        |   20 |     20
+ vh        |   20 |     20
+ vp        |   20 |     20
+ vs        |   20 |     20
+ xb        |   20 |     20
+ xr        |   20 |     20
+ z3        |   20 |     20
+ zv        |   20 |     20
+ bq        |   19 |     19
+ br        |   19 |     19
+ by        |   19 |     19
+ cl        |   19 |     19
+ d2        |   19 |     19
+ f1        |   19 |     19
+ f4        |   19 |     19
+ gf        |   19 |     19
+ hq        |   19 |     19
+ k9        |   19 |     19
+ ka        |   19 |     19
+ kd        |   19 |     19
+ kj        |   19 |     19
+ md        |   19 |     19
+ mi        |   19 |     19
+ ml        |   19 |     19
+ my        |   19 |     19
+ nj        |   19 |     19
+ ny        |   19 |     19
+ o1        |   19 |     19
+ s4        |   19 |     19
+ s8        |   19 |     19
+ t5        |   19 |     19
+ u0        |   19 |     19
+ xl        |   19 |     19
+ zg        |   19 |     19
+ zi        |   19 |     19
+ a5        |   18 |     18
+ b9        |   18 |     18
+ bh        |   18 |     18
+ bx        |   18 |     18
+ d3        |   18 |     18
+ fy        |   18 |     18
+ g2        |   18 |     18
+ i4        |   18 |     18
+ i6        |   18 |     18
+ i9        |   18 |     18
+ jw        |   18 |     18
+ lk        |   18 |     18
+ mb        |   18 |     18
+ mv        |   18 |     18
+ nd        |   18 |     18
+ nr        |   18 |     18
+ nt        |   18 |     18
+ t2        |   18 |     18
+ xf        |   18 |     18
+ xv        |   18 |     18
+ zc        |   18 |     18
+ zd        |   18 |     18
+ a7        |   17 |     17
+ bc        |   17 |     17
+ bd        |   17 |     17
+ ce        |   17 |     17
+ cf        |   17 |     17
+ cr        |   17 |     17
+ g9        |   17 |     17
+ j0        |   17 |     17
+ j5        |   17 |     17
+ mp        |   17 |     17
+ mr        |   17 |     17
+ mw        |   17 |     17
+ nk        |   17 |     17
+ no        |   17 |     17
+ o0        |   17 |     17
+ o4        |   17 |     17
+ s0        |   17 |     17
+ s1        |   17 |     17
+ t4        |   17 |     17
+ u9        |   17 |     17
+ vf        |   17 |     17
+ vx        |   17 |     17
+ x3        |   17 |     17
+ xi        |   17 |     17
+ xn        |   17 |     17
+ xz        |   17 |     17
+ zl        |   17 |     17
+ zn        |   17 |     17
+ a0        |   16 |     16
+ bu        |   16 |     16
+ bw        |   16 |     16
+ ci        |   16 |     16
+ ck        |   16 |     16
+ d0        |   16 |     16
+ d4        |   16 |     16
+ d6        |   16 |     16
+ f5        |   16 |     16
+ g1        |   16 |     16
+ gz        |   16 |     16
+ h4        |   16 |     16
+ jh        |   16 |     16
+ l4        |   16 |     16
+ lt        |   16 |     16
+ mg        |   16 |     16
+ mh        |   16 |     16
+ mo        |   16 |     16
+ ni        |   16 |     16
+ nl        |   16 |     16
+ nq        |   16 |     16
+ p2        |   16 |     16
+ u8        |   16 |     16
+ v9        |   16 |     16
+ vl        |   16 |     16
+ vo        |   16 |     16
+ xp        |   16 |     16
+ y3        |   16 |     16
+ y7        |   16 |     16
+ z7        |   16 |     16
+ za        |   16 |     16
+ zx        |   16 |     16
+ bf        |   15 |     15
+ bp        |   15 |     15
+ cc        |   15 |     15
+ g0        |   15 |     15
+ j2        |   15 |     15
+ j9        |   15 |     15
+ l6        |   15 |     15
+ le        |   15 |     15
+ ll        |   15 |     15
+ m8        |   15 |     15
+ ma        |   15 |     15
+ mu        |   15 |     15
+ nf        |   15 |     15
+ r6        |   15 |     15
+ s5        |   15 |     15
+ vd        |   15 |     15
+ vk        |   15 |     15
+ xa        |   15 |     15
+ xw        |   15 |     15
+ y2        |   15 |     15
+ z8        |   15 |     15
+ ze        |   15 |     15
+ zu        |   15 |     15
+ a6        |   14 |     14
+ bk        |   14 |     14
+ bt        |   14 |     14
+ c0        |   14 |     14
+ f8        |   14 |     14
+ g3        |   14 |     14
+ g4        |   14 |     14
+ g7        |   14 |     14
+ h6        |   14 |     14
+ h7        |   14 |     14
+ h9        |   14 |     14
+ i1        |   14 |     14
+ k1        |   14 |     14
+ k2        |   14 |     14
+ k6        |   14 |     14
+ k7        |   14 |     14
+ mc        |   14 |     14
+ nn        |   14 |     14
+ p9        |   14 |     14
+ u6        |   14 |     14
+ xd        |   14 |     14
+ z6        |   14 |     14
+ zk        |   14 |     14
+ a4        |   13 |     13
+ a9        |   13 |     13
+ bm        |   13 |     13
+ cz        |   13 |     13
+ f2        |   13 |     13
+ f3        |   13 |     13
+ f6        |   13 |     13
+ g6        |   13 |     13
+ h2        |   13 |     13
+ j1        |   13 |     13
+ k5        |   13 |     13
+ m1        |   13 |     13
+ mf        |   13 |     13
+ mq        |   13 |     13
+ np        |   13 |     13
+ nx        |   13 |     13
+ o5        |   13 |     13
+ p0        |   13 |     13
+ p1        |   13 |     13
+ s6        |   13 |     13
+ s9        |   13 |     13
+ v6        |   13 |     13
+ va        |   13 |     13
+ vc        |   13 |     13
+ xc        |   13 |     13
+ z0        |   13 |     13
+ c9        |   12 |     12
+ d1        |   12 |     12
+ h0        |   12 |     12
+ h1        |   12 |     12
+ j8        |   12 |     12
+ k4        |   12 |     12
+ l5        |   12 |     12
+ l9        |   12 |     12
+ m2        |   12 |     12
+ m6        |   12 |     12
+ m9        |   12 |     12
+ n7        |   12 |     12
+ nv        |   12 |     12
+ p3        |   12 |     12
+ vq        |   12 |     12
+ vy        |   12 |     12
+ x1        |   12 |     12
+ x2        |   12 |     12
+ z5        |   12 |     12
+ c1        |   11 |     11
+ c3        |   11 |     11
+ ct        |   11 |     11
+ f9        |   11 |     11
+ g5        |   11 |     11
+ j6        |   11 |     11
+ l8        |   11 |     11
+ n1        |   11 |     11
+ v7        |   11 |     11
+ vv        |   11 |     11
+ x5        |   11 |     11
+ x8        |   11 |     11
+ z2        |   11 |     11
+ b0        |   10 |     10
+ b2        |   10 |     10
+ b8        |   10 |     10
+ c6        |   10 |     10
+ f0        |   10 |     10
+ f7        |   10 |     10
+ h5        |   10 |     10
+ j3        |   10 |     10
+ j4        |   10 |     10
+ j7        |   10 |     10
+ l7        |   10 |     10
+ m0        |   10 |     10
+ m7        |   10 |     10
+ mm        |   10 |     10
+ mn        |   10 |     10
+ n8        |   10 |     10
+ v1        |   10 |     10
+ x0        |   10 |     10
+ x6        |   10 |     10
+ x7        |   10 |     10
+ x9        |   10 |     10
+ a8        |    9 |      9
+ b1        |    9 |      9
+ b4        |    9 |      9
+ b5        |    9 |      9
+ b6        |    9 |      9
+ ba        |    9 |      9
+ bs        |    9 |      9
+ c5        |    9 |      9
+ d5        |    9 |      9
+ k8        |    9 |      9
+ l0        |    9 |      9
+ m5        |    9 |      9
+ mk        |    9 |      9
+ ms        |    9 |      9
+ n3        |    9 |      9
+ n4        |    9 |      9
+ n6        |    9 |      9
+ ne        |    9 |      9
+ v0        |    9 |      9
+ v3        |    9 |      9
+ v5        |    9 |      9
+ v8        |    9 |      9
+ b3        |    8 |      8
+ b7        |    8 |      8
+ c2        |    8 |      8
+ c7        |    8 |      8
+ c8        |    8 |      8
+ d9        |    8 |      8
+ k3        |    8 |      8
+ l3        |    8 |      8
+ m3        |    8 |      8
+ m4        |    8 |      8
+ n0        |    8 |      8
+ n5        |    8 |      8
+ v4        |    8 |      8
+ x4        |    8 |      8
+ z1        |    8 |      8
+ z9        |    8 |      8
+ l2        |    7 |      7
+ s2        |    7 |      7
+ z4        |    7 |      7
+ 1l        |    6 |      6
+ 1o        |    6 |      6
+ 1t        |    6 |      6
+ 2e        |    6 |      6
+ 2o        |    6 |      6
+ c4        |    6 |      6
+ h3        |    6 |      6
+ n2        |    6 |      6
+ n9        |    6 |      6
+ v2        |    6 |      6
+ 2l        |    5 |      5
+ 2u        |    5 |      5
+ 3k        |    5 |      5
+ 4p        |    5 |      5
+ 18        |    4 |      4
+ 1a        |    4 |      4
+ 1i        |    4 |      4
+ 2s        |    4 |      4
+ 3q        |    4 |      4
+ 3y        |    4 |      4
+ 5y        |    4 |      4
+ 1f        |    3 |      3
+ 1h        |    3 |      3
+ 1m        |    3 |      3
+ 1p        |    3 |      3
+ 1s        |    3 |      3
+ 1v        |    3 |      3
+ 1x        |    3 |      3
+ 27        |    3 |      3
+ 2a        |    3 |      3
+ 2b        |    3 |      3
+ 2h        |    3 |      3
+ 2n        |    3 |      3
+ 2p        |    3 |      3
+ 2v        |    3 |      3
+ 2y        |    3 |      3
+ 3d        |    3 |      3
+ 3w        |    3 |      3
+ 3z        |    3 |      3
+ 4a        |    3 |      3
+ 4d        |    3 |      3
+ 4v        |    3 |      3
+ 4z        |    3 |      3
+ 5e        |    3 |      3
+ 5i        |    3 |      3
+ 5k        |    3 |      3
+ 5o        |    3 |      3
+ 5t        |    3 |      3
+ 6b        |    3 |      3
+ 6d        |    3 |      3
+ 6o        |    3 |      3
+ 6w        |    3 |      3
+ 7a        |    3 |      3
+ 7h        |    3 |      3
+ 7r        |    3 |      3
+ 93        |    3 |      3
+ 10        |    2 |      2
+ 12        |    2 |      2
+ 15        |    2 |      2
+ 16        |    2 |      2
+ 19        |    2 |      2
+ 1b        |    2 |      2
+ 1d        |    2 |      2
+ 1g        |    2 |      2
+ 1j        |    2 |      2
+ 1n        |    2 |      2
+ 1r        |    2 |      2
+ 1u        |    2 |      2
+ 1w        |    2 |      2
+ 1y        |    2 |      2
+ 20        |    2 |      2
+ 25        |    2 |      2
+ 2d        |    2 |      2
+ 2i        |    2 |      2
+ 2j        |    2 |      2
+ 2k        |    2 |      2
+ 2q        |    2 |      2
+ 2r        |    2 |      2
+ 2t        |    2 |      2
+ 2w        |    2 |      2
+ 2z        |    2 |      2
+ 3b        |    2 |      2
+ 3f        |    2 |      2
+ 3h        |    2 |      2
+ 3o        |    2 |      2
+ 3p        |    2 |      2
+ 3r        |    2 |      2
+ 3s        |    2 |      2
+ 3v        |    2 |      2
+ 42        |    2 |      2
+ 43        |    2 |      2
+ 4f        |    2 |      2
+ 4g        |    2 |      2
+ 4h        |    2 |      2
+ 4j        |    2 |      2
+ 4m        |    2 |      2
+ 4r        |    2 |      2
+ 4s        |    2 |      2
+ 4t        |    2 |      2
+ 4u        |    2 |      2
+ 5c        |    2 |      2
+ 5f        |    2 |      2
+ 5h        |    2 |      2
+ 5p        |    2 |      2
+ 5q        |    2 |      2
+ 5z        |    2 |      2
+ 6a        |    2 |      2
+ 6h        |    2 |      2
+ 6q        |    2 |      2
+ 6r        |    2 |      2
+ 6t        |    2 |      2
+ 6y        |    2 |      2
+ 70        |    2 |      2
+ 7c        |    2 |      2
+ 7g        |    2 |      2
+ 7k        |    2 |      2
+ 7o        |    2 |      2
+ 7u        |    2 |      2
+ 8j        |    2 |      2
+ 8w        |    2 |      2
+ 9f        |    2 |      2
+ 9y        |    2 |      2
+ copyright |    2 |      2
+ foo       |    1 |      3
+ bar       |    1 |      2
+ 0e        |    1 |      1
+ 0h        |    1 |      1
+ 0p        |    1 |      1
+ 0w        |    1 |      1
+ 0z        |    1 |      1
+ 11        |    1 |      1
+ 13        |    1 |      1
+ 14        |    1 |      1
+ 17        |    1 |      1
+ 1k        |    1 |      1
+ 1q        |    1 |      1
+ 1z        |    1 |      1
+ 24        |    1 |      1
+ 26        |    1 |      1
+ 28        |    1 |      1
+ 2f        |    1 |      1
+ 30        |    1 |      1
+ 345       |    1 |      1
+ 37        |    1 |      1
+ 39        |    1 |      1
+ 3a        |    1 |      1
+ 3e        |    1 |      1
+ 3g        |    1 |      1
+ 3i        |    1 |      1
+ 3m        |    1 |      1
+ 3t        |    1 |      1
+ 3u        |    1 |      1
+ 40        |    1 |      1
+ 41        |    1 |      1
+ 44        |    1 |      1
+ 45        |    1 |      1
+ 48        |    1 |      1
+ 4b        |    1 |      1
+ 4c        |    1 |      1
+ 4i        |    1 |      1
+ 4k        |    1 |      1
+ 4n        |    1 |      1
+ 4o        |    1 |      1
+ 4q        |    1 |      1
+ 4w        |    1 |      1
+ 4y        |    1 |      1
+ 51        |    1 |      1
+ 55        |    1 |      1
+ 56        |    1 |      1
+ 5a        |    1 |      1
+ 5d        |    1 |      1
+ 5g        |    1 |      1
+ 5j        |    1 |      1
+ 5l        |    1 |      1
+ 5s        |    1 |      1
+ 5u        |    1 |      1
+ 5x        |    1 |      1
+ 64        |    1 |      1
+ 68        |    1 |      1
+ 6c        |    1 |      1
+ 6f        |    1 |      1
+ 6g        |    1 |      1
+ 6i        |    1 |      1
+ 6k        |    1 |      1
+ 6n        |    1 |      1
+ 6p        |    1 |      1
+ 6s        |    1 |      1
+ 6u        |    1 |      1
+ 6x        |    1 |      1
+ 72        |    1 |      1
+ 7f        |    1 |      1
+ 7j        |    1 |      1
+ 7n        |    1 |      1
+ 7p        |    1 |      1
+ 7w        |    1 |      1
+ 7y        |    1 |      1
+ 7z        |    1 |      1
+ 80        |    1 |      1
+ 82        |    1 |      1
+ 85        |    1 |      1
+ 8d        |    1 |      1
+ 8i        |    1 |      1
+ 8l        |    1 |      1
+ 8n        |    1 |      1
+ 8p        |    1 |      1
+ 8t        |    1 |      1
+ 8x        |    1 |      1
+ 95        |    1 |      1
+ 97        |    1 |      1
+ 9a        |    1 |      1
+ 9e        |    1 |      1
+ 9h        |    1 |      1
+ 9r        |    1 |      1
+ 9w        |    1 |      1
+ qwerti    |    1 |      1
+(1146 rows)
+
+select reset_tsearch();
+NOTICE:  TSearch cache cleaned
+ reset_tsearch 
+---------------
+ 
+(1 row)
+
+select to_tsquery('default', 'skies & books');
+   to_tsquery   
+----------------
+ 'sky' & 'book'
+(1 row)
+
+select rank_cd(to_tsvector('Erosion It took the sea a thousand years,
+A thousand years to trace
+The granite features of this cliff
+In crag and scarp and base.
+It took the sea an hour one night
+An hour of storm to place
+The sculpture of these granite seams,
+Upon a woman s face. E.  J.  Pratt  (1882 1964)
+'), to_tsquery('sea&thousand&years'));
+ rank_cd 
+---------
+     1.2
+(1 row)
+
+select rank_cd(to_tsvector('Erosion It took the sea a thousand years,
+A thousand years to trace
+The granite features of this cliff
+In crag and scarp and base.
+It took the sea an hour one night
+An hour of storm to place
+The sculpture of these granite seams,
+Upon a woman s face. E.  J.  Pratt  (1882 1964)
+'), to_tsquery('granite&sea'));
+ rank_cd  
+----------
+ 0.880303
+(1 row)
+
+select rank_cd(to_tsvector('Erosion It took the sea a thousand years,
+A thousand years to trace
+The granite features of this cliff
+In crag and scarp and base.
+It took the sea an hour one night
+An hour of storm to place
+The sculpture of these granite seams,
+Upon a woman s face. E.  J.  Pratt  (1882 1964)
+'), to_tsquery('sea'));
+ rank_cd 
+---------
+       2
+(1 row)
+
+select get_covers(to_tsvector('Erosion It took the sea a thousand years,
+A thousand years to trace
+The granite features of this cliff
+In crag and scarp and base.
+It took the sea an hour one night
+An hour of storm to place
+The sculpture of these granite seams,
+Upon a woman s face. E.  J.  Pratt  (1882 1964)
+'), to_tsquery('sea&thousand&years'));
+                                                                                             get_covers                                                                                             
+----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
+ eros took {1 sea thousand year }1 {2 thousand year trace granit featur cliff crag scarp base took sea }2 hour one night hour storm place sculptur granit seam upon woman face e j pratt 1882 1964 
+(1 row)
+
+select get_covers(to_tsvector('Erosion It took the sea a thousand years,
+A thousand years to trace
+The granite features of this cliff
+In crag and scarp and base.
+It took the sea an hour one night
+An hour of storm to place
+The sculpture of these granite seams,
+Upon a woman s face. E.  J.  Pratt  (1882 1964)
+'), to_tsquery('granite&sea'));
+                                                                                                get_covers                                                                                                
+----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
+ eros took {1 sea thousand year thousand year trace {2 granit }1 featur cliff crag scarp base took {3 sea }2 hour one night hour storm place sculptur granit }3 seam upon woman face e j pratt 1882 1964 
+(1 row)
+
+select get_covers(to_tsvector('Erosion It took the sea a thousand years,
+A thousand years to trace
+The granite features of this cliff
+In crag and scarp and base.
+It took the sea an hour one night
+An hour of storm to place
+The sculpture of these granite seams,
+Upon a woman s face. E.  J.  Pratt  (1882 1964)
+'), to_tsquery('sea'));
+                                                                                             get_covers                                                                                             
+----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
+ eros took {1 sea }1 thousand year thousand year trace granit featur cliff crag scarp base took {2 sea }2 hour one night hour storm place sculptur granit seam upon woman face e j pratt 1882 1964 
+(1 row)
+
+select headline('Erosion It took the sea a thousand years,
+A thousand years to trace
+The granite features of this cliff
+In crag and scarp and base.
+It took the sea an hour one night
+An hour of storm to place
+The sculpture of these granite seams,
+Upon a woman s face. E.  J.  Pratt  (1882 1964)
+', to_tsquery('sea&thousand&years'));
+                                                       headline                                                        
+-----------------------------------------------------------------------------------------------------------------------
+ sea a thousand years,
+A thousand years to trace
+The granite features of this cliff
+(1 row)
+
+ 
+select headline('Erosion It took the sea a thousand years,
+A thousand years to trace
+The granite features of this cliff
+In crag and scarp and base.
+It took the sea an hour one night
+An hour of storm to place
+The sculpture of these granite seams,
+Upon a woman s face. E.  J.  Pratt  (1882 1964)
+', to_tsquery('granite&sea'));
+                                           headline                                           
+----------------------------------------------------------------------------------------------
+ sea an hour one night
+An hour of storm to place
+The sculpture of these granite
+(1 row)
+
+ 
+select headline('Erosion It took the sea a thousand years,
+A thousand years to trace
+The granite features of this cliff
+In crag and scarp and base.
+It took the sea an hour one night
+An hour of storm to place
+The sculpture of these granite seams,
+Upon a woman s face. E.  J.  Pratt  (1882 1964)
+', to_tsquery('sea'));
+                                         headline                                          
+-------------------------------------------------------------------------------------------
+ sea a thousand years,
+A thousand years to trace
+The granite features of this cliff
+(1 row)
+


diff --git a/contrib/tsearch2/gendict/Makefile.IN b/contrib/tsearch2/gendict/Makefile.IN

new file mode 100644 (file)

index 0000000..c13e496


--- /dev/null
+++ b/contrib/tsearch2/gendict/Makefile.IN
@@ -0,0 +1,12 @@
+subdir = contrib/CFG_DIR
+top_builddir = ../..
+include $(top_builddir)/src/Makefile.global
+
+MODULE_big = dict_CFG_MODNAME
+OBJS = CFG_OFILE
+DATA_built = dict_CFG_MODNAME.sql
+DOCS = README.CFG_MODNAME
+PG_CPPFLAGS =
+SHLIB_LINK = ../tsearch2/libtsearch2.a
+
+include $(top_srcdir)/contrib/contrib-global.mk


diff --git a/contrib/tsearch2/gendict/README.gendict b/contrib/tsearch2/gendict/README.gendict

new file mode 100644 (file)

index 0000000..e91f1b7


--- /dev/null
+++ b/contrib/tsearch2/gendict/README.gendict
@@ -0,0 +1,130 @@
+Gendict - generate dictionary templates for contrib/tsearch2 module.
+
+This utility aims to help people creating dictionary for contrib/tsearch v2
+module. Particularly, it has built-in support for snowball stemmers.
+
+Programming API to tsearch2 dictionaries is described in tsearch v2 
+documentation.
+
+
+Prerequisities:
+
+* PostgreSQL 7.3 and above.
+
+* You need tsearch2 module sources already compiled
+
+* Rights to install contrib modules
+
+Usage:
+
+    run config.sh without parameters to see options and arguments
+
+Usage:
+./config.sh -n DICTNAME ( [ -s [ -p PREFIX ] ] | [ -c CFILES ] [ -h HFILES ] [ -i ] ) [ -v ] [ -d DIR ] [ -C COMMENT ]
+    -v - be verbose
+    -d DIR - name of directory in PGSQL_SRC/contrib (default dict_DICTNAME)
+    -C COMMENT - dictionary comment
+Generate Snowball stemmer:
+./config.sh -n DICTNAME -s [ -p PREFIX ] [ -v ] [ -d DIR ] [ -C COMMENT ]
+    -s - generate Snowball wrapper
+    -p - prefix of Snowball's function, (default DICTNAME)
+Generate template dictionary:
+./config.sh -n DICTNAME [ -c CFILES ] [ -h HFILES ] [ -i ] [ -v ] [ -d DIR ] [ -C COMMENT ]
+    -c CFILES - source files, must be placed in contrib/tsearch2/gendict directory.
+                These files will be used in Makefile.
+    -h HFILES - header files, must be placed in contrib/tsearch2/gendict directory.
+                These files will be used in Makefile and subinclude.h
+    -i - dictionary has init method
+
+
+Example 1:
+
+   Create Portuguese stemmer
+ 
+   0. cd PGSQL_SRC/contrib/tsearch2/gendict
+
+   1. Obtain stem.{c,h} files for Portuguese
+
+      wget http://snowball.tartarus.org/portuguese/stem.c
+      wget http://snowball.tartarus.org/portuguese/stem.h
+   
+   2. Create template files for Portuguese
+
+      ./config.sh -n pt -s -p portuguese -v -C'Snowball stemmer for Portuguese'
+
+      Note, that argument for -p option should be *the same* as name of stemming
+      function in stem.c (without _stem)
+
+      A bunch of files will be generated and placed in PGSQL_SRC/contrib/dict_pt
+      directory.
+
+   3. Compile and install dictionary
+
+   cd PGSQL_SRC/contrib/dict_pt
+   make
+   make install
+
+   4. Test it 
+
+   Sample portuguese words with the stemmed forms are available
+        from http://snowball.tartarus.org/portuguese/stemmer.html
+
+   createdb testdict
+   psql testdict < /usr/local/pgsql/share/contrib/tsearch2.sql
+   psql testdict < /usr/local/pgsql/share/contrib/dict_pt.sql
+   psql -d testdict -c "select lexize('pt','bobagem');"
+    lexize  
+   ---------
+    {bobag}
+   (1 row)
+
+   Here is what I have in pg_ts_dict table
+
+   psql -d testdict -c "select * from pg_ts_dict where dict_name='pt';"
+    dict_name | dict_init | dict_initoption | dict_lexize |          dict_comment           
+   -----------+-----------+-----------------+-------------+---------------------------------
+    pt        |   7177806 |                 |     7159330 | Snowball stemmer for Portuguese
+   (1 row)
+
+ 
+        Note, that you have already installed dictionary and corresponding
+   entry in tsearch configuration and you may modify it using
+   plain SQL commands, for example, specify stop words.
+
+Example 2:
+
+      a) Simple template dictionary with init method 
+
+       ./config.sh -n wow -v -i -C WOW
+
+      b) Create simple template dict (without init method):
+   ./config.sh -n wow -v  -C WOW
+
+        The same as above, but dictionary will have not init method
+
+       Dictionaries obtained in a) and b) are fully working and ready
+       for use: 
+     a) lowercase input word and remove it if it is a stop word
+     b) recognizes any word
+
+      c) Simple template dictionary with source files (with init method):
+
+       ./config.sh -n wow -v -i -c a.c -h a.h -C WOW
+
+        Source files ( a.c ) must be placed in contrib/tsearch2/gendict directory.
+        These files will be used in Makefile.
+
+        Header files ( a.h ), must be placed in contrib/tsearch2/gendict directory.
+        These files will be used in Makefile and subinclude.h
+
+      d) Simple template dictionary with source files (without init method):
+
+   ./config.sh -n wow -v  -c a.c -h a.h -C WOW
+
+   The same as above, but dictionary will have not init method
+
+       After that you have sources in PGSQL_SRC/contrib/dict_wow and
+       you may edit them to create actual dictionary.
+
+  Please, check Tsearch2 home page (http://www.sai.msu.su/~megera/postgres/gist/tsearch/V2/)
+  for additional information about "Gendict tutorial" and dictionaries.
\ No newline at end of file


diff --git a/contrib/tsearch2/gendict/config.sh b/contrib/tsearch2/gendict/config.sh

new file mode 100755 (executable)

index 0000000..26bb542


--- /dev/null
+++ b/contrib/tsearch2/gendict/config.sh
@@ -0,0 +1,183 @@
+#!/bin/sh
+
+usage () {
+   echo Usage:
+   echo $0 -n DICTNAME  \( [ -s [ -p PREFIX ] ] \| [ -c CFILES ] [ -h HFILES ] [ -i ] \) [ -v ] [ -d DIR ] [ -C COMMENT ]
+   echo '    -v - be verbose'
+   echo '    -d DIR - name of directory in PGSQL_SRL/contrib (default dict_DICTNAME)'
+   echo '    -C COMMENT - dictionary comment' 
+   echo Generate Snowball stemmer:
+   echo $0 -n DICTNAME -s [ -p PREFIX ] [ -v ] [ -d DIR ] [ -C COMMENT ]
+   echo '    -s - generate Snowball wrapper'
+   echo "    -p - prefix of Snowball's function, (default DICTNAME)" 
+   echo Generate template dictionary:
+   echo $0 -n DICTNAME [ -c CFILES ] [ -h HFILES ] [ -i ] [ -v ] [ -d DIR ] [ -C COMMENT ]
+   echo '    -c CFILES - source files, must be placed in contrib/tsearch2/gendict directory.'
+   echo '                These files will be used in Makefile.'
+   echo '    -h HFILES - header files, must be placed in contrib/tsearch2/gendict directory.'
+   echo '                These files will be used in Makefile and subinclude.h'
+   echo '    -i - dictionary has init method'
+   exit 1;
+}
+
+dictname=
+stemmode=no
+verbose=no
+cfile=
+hfile=
+dir= 
+hasinit=no
+comment=
+prefix=
+
+while getopts n:c:C:h:d:p:vis opt
+do
+   case "$opt" in
+       v) verbose=yes;;
+       s) stemmode=yes;;
+       i) hasinit=yes;;
+       n) dictname="$OPTARG";;
+       c) cfile="$OPTARG";;
+       h) hfile="$OPTARG";;
+       d) dir="$OPTARG";;
+       C) comment="$OPTARG";;
+       p) prefix="$OPTARG";;
+       \?) usage;;
+   esac
+done
+
+[ ${#dictname} -eq 0 ] && usage
+
+dictname=`echo $dictname | tr '[:upper:]' '[:lower:]'`
+
+if [ $stemmode = "yes" ] ; then 
+   [ ${#prefix} -eq 0 ] && prefix=$dictname
+   hasinit=yes
+   cfile="stem.c"
+   hfile="stem.h"
+fi 
+
+[ ${#dir}   -eq 0 ] && dir="dict_$dictname"
+
+if [ ${#comment} -eq 0 ]; then
+   comment=null
+else
+   comment="'$comment'"
+fi
+
+ofile=
+for f in $cfile
+do
+   f=` echo $f | sed 's#c$#o#'`
+   ofile="$ofile $f"
+done
+
+if [ $stemmode = "yes" ] ; then
+   ofile="$ofile dict_snowball.o"
+else
+   ofile="$ofile dict_tmpl.o"
+fi
+
+if [ $verbose = "yes" ]; then
+   echo Dictname: "'"$dictname"'"
+   echo Snowball stemmer: $stemmode
+   echo Has init method: $hasinit
+   [ $stemmode = "yes" ] && echo Function prefix: $prefix 
+   echo Source files: $cfile
+   echo Header files: $hfile
+   echo Object files: $ofile
+   echo Comment: $comment
+   echo Directory: ../../$dir
+fi
+
+
+[ $verbose = "yes" ] && echo -n 'Build directory...  '
+if [ ! -d ../../$dir ]; then
+   if ! mkdir ../../$dir ; then 
+       echo "Can't create directory ../../$dir"
+       exit 1
+   fi 
+fi
+[ $verbose = "yes" ] && echo ok
+
+
+[ $verbose = "yes" ] && echo -n 'Build Makefile...  '
+sed s#CFG_DIR#$dir# < Makefile.IN | sed s#CFG_MODNAME#$dictname# | sed "s#CFG_OFILE#$ofile#" > ../../$dir/Makefile.tmp
+if [ $stemmode = "yes" ] ; then
+   sed "s#^PG_CPPFLAGS.*\$#PG_CPPFLAGS = -I../tsearch2/snowball -I../tsearch2#" < ../../$dir/Makefile.tmp >  ../../$dir/Makefile 
+else
+   sed "s#^PG_CPPFLAGS.*\$#PG_CPPFLAGS = -I../tsearch2#" < ../../$dir/Makefile.tmp >  ../../$dir/Makefile 
+fi
+rm ../../$dir/Makefile.tmp
+[ $verbose = "yes" ] && echo ok
+
+
+[ $verbose = "yes" ] && echo -n Build dict_$dictname'.sql.in...  '
+if [ $hasinit = "yes" ]; then
+   sed s#CFG_MODNAME#$dictname# < sql.IN | sed "s#CFG_COMMENT#$comment#" | sed s#^HASINIT## | sed 's#^NOINIT.*$##' > ../../$dir/dict_$dictname.sql.in.tmp
+   if [ $stemmode = "yes" ] ; then
+       sed s#^ISSNOWBALL## < ../../$dir/dict_$dictname.sql.in.tmp | sed s#^NOSNOWBALL.*\$## > ../../$dir/dict_$dictname.sql.in
+   else
+       sed s#^NOSNOWBALL## < ../../$dir/dict_$dictname.sql.in.tmp | sed s#^ISSNOWBALL.*\$## > ../../$dir/dict_$dictname.sql.in
+   fi
+   rm ../../$dir/dict_$dictname.sql.in.tmp 
+else 
+   sed s#CFG_MODNAME#$dictname# < sql.IN | sed "s#CFG_COMMENT#$comment#" | sed s#^NOINIT## | sed 's#^HASINIT.*$##' | sed s#^NOSNOWBALL## | sed s#^ISSNOWBALL.*\$## > ../../$dir/dict_$dictname.sql.in
+fi
+[ $verbose = "yes" ] && echo ok
+
+
+
+if [ ${#cfile} -ne 0 ] || [ ${#hfile} -ne 0 ] ; then
+   [ $verbose = "yes" ] && echo -n 'Copy source and header files...  '
+   if [ ${#cfile} -ne 0 ] ; then
+       if ! cp $cfile ../../$dir ; then 
+           echo "Cant cp all or one of files: $cfile"
+           exit 1
+       fi
+   fi
+   if [ ${#hfile} -ne 0 ] ; then 
+       if ! cp $hfile ../../$dir ; then 
+               echo "Cant cp all or one of files: $hfile"
+           exit 1
+       fi
+   fi
+   [ $verbose = "yes" ] && echo ok
+fi
+
+
+[ $verbose = "yes" ] && echo -n 'Build sub-include header...  '
+echo -n > ../../$dir/subinclude.h 
+for i in $hfile
+do
+   echo "#include \"$i\"" >> ../../$dir/subinclude.h
+done
+[ $verbose = "yes" ] && echo ok
+
+
+if  [ $stemmode = "yes" ] ; then 
+   [ $verbose = "yes" ] && echo -n 'Build Snowball stemmer...  '
+   sed s#CFG_MODNAME#$dictname#g < dict_snowball.c.IN | sed s#CFG_PREFIX#$prefix#g > ../../$dir/dict_snowball.c
+else
+   [ $verbose = "yes" ] && echo -n 'Build dictinonary...  '
+   sed s#CFG_MODNAME#$dictname#g < dict_tmpl.c.IN > ../../$dir/dict_tmpl.c.tmp
+   if [ $hasinit = "yes" ]; then
+       sed s#^HASINIT## <  ../../$dir/dict_tmpl.c.tmp | sed 's#^NOINIT.*$##' > ../../$dir/dict_tmpl.c
+   else 
+       sed s#^HASINIT.*\$## <  ../../$dir/dict_tmpl.c.tmp | sed 's#^NOINIT##' > ../../$dir/dict_tmpl.c
+   fi
+   rm ../../$dir/dict_tmpl.c.tmp
+fi 
+[ $verbose = "yes" ] && echo ok
+
+
+[ $verbose = "yes" ] && echo -n "Build README.$dictname...  "
+if  [ $stemmode = "yes" ] ; then
+   echo "Autogenerated Snowball's wrapper for $prefix" > ../../$dir/README.$dictname
+else
+   echo "Autogenerated template for $dictname" > ../../$dir/README.$dictname
+fi
+[ $verbose = "yes" ] && echo ok
+
+echo All is done
+


diff --git a/contrib/tsearch2/gendict/dict_snowball.c.IN b/contrib/tsearch2/gendict/dict_snowball.c.IN

new file mode 100644 (file)

index 0000000..10ef6f1


--- /dev/null
+++ b/contrib/tsearch2/gendict/dict_snowball.c.IN
@@ -0,0 +1,52 @@
+/* 
+ * example of Snowball dictionary
+ * http://snowball.tartarus.org/ 
+ * Teodor Sigaev 
+ */
+#include 
+#include 
+
+#include "postgres.h"
+
+#include "dict.h"
+#include "common.h"
+#include "snowball/header.h"
+#include "subinclude.h"
+
+typedef struct {
+   struct SN_env *z;
+   StopList    stoplist;
+   int (*stem)(struct SN_env * z);
+} DictSnowball;
+
+
+PG_FUNCTION_INFO_V1(dinit_CFG_MODNAME);
+Datum dinit_CFG_MODNAME(PG_FUNCTION_ARGS);
+
+Datum 
+dinit_CFG_MODNAME(PG_FUNCTION_ARGS) {
+   DictSnowball    *d = (DictSnowball*)malloc( sizeof(DictSnowball) );
+
+   if ( !d )
+       elog(ERROR, "No memory");
+   memset(d,0,sizeof(DictSnowball));
+   d->stoplist.wordop=lowerstr;
+       
+   if ( !PG_ARGISNULL(0) && PG_GETARG_POINTER(0)!=NULL ) {
+       text       *in = PG_GETARG_TEXT_P(0);
+       readstoplist(in, &(d->stoplist));
+       sortstoplist(&(d->stoplist));
+       PG_FREE_IF_COPY(in, 0);
+   }
+
+   d->z = CFG_PREFIX_create_env();
+   if (!d->z) {
+       freestoplist(&(d->stoplist));
+       elog(ERROR,"No memory");
+   }
+   d->stem=CFG_PREFIX_stem;
+
+   PG_RETURN_POINTER(d);
+}
+
+


diff --git a/contrib/tsearch2/gendict/dict_tmpl.c.IN b/contrib/tsearch2/gendict/dict_tmpl.c.IN

new file mode 100644 (file)

index 0000000..10c0381


--- /dev/null
+++ b/contrib/tsearch2/gendict/dict_tmpl.c.IN
@@ -0,0 +1,64 @@
+/* 
+ * example of dictionary 
+ * Teodor Sigaev 
+ */
+#include 
+#include 
+#include 
+
+#include "postgres.h"
+
+#include "dict.h"
+#include "common.h"
+
+#include "subinclude.h"
+
+HASINIT typedef struct {
+HASINIT    StopList    stoplist;
+HASINIT } DictExample;
+
+
+HASINIT PG_FUNCTION_INFO_V1(dinit_CFG_MODNAME);
+HASINIT Datum dinit_CFG_MODNAME(PG_FUNCTION_ARGS);
+
+HASINIT Datum 
+HASINIT dinit_CFG_MODNAME(PG_FUNCTION_ARGS) {
+HASINIT    DictExample *d = (DictExample*)malloc( sizeof(DictExample) );
+HASINIT 
+HASINIT    if ( !d )
+HASINIT        elog(ERROR, "No memory");
+HASINIT    memset(d,0,sizeof(DictExample));
+HASINIT 
+HASINIT    d->stoplist.wordop=lowerstr;
+HASINIT    
+HASINIT    /* Your INIT code */
+HASINIT    
+HASINIT    if ( !PG_ARGISNULL(0) && PG_GETARG_POINTER(0)!=NULL ) {
+HASINIT        text       *in = PG_GETARG_TEXT_P(0);
+HASINIT        readstoplist(in, &(d->stoplist));
+HASINIT        sortstoplist(&(d->stoplist));
+HASINIT        PG_FREE_IF_COPY(in, 0);
+HASINIT    }
+HASINIT 
+HASINIT    PG_RETURN_POINTER(d);
+HASINIT }
+
+PG_FUNCTION_INFO_V1(dlexize_CFG_MODNAME);
+Datum dlexize_CFG_MODNAME(PG_FUNCTION_ARGS);
+Datum
+dlexize_CFG_MODNAME(PG_FUNCTION_ARGS) {
+HASINIT    DictExample *d = (DictExample*)PG_GETARG_POINTER(0);
+   char       *in = (char*)PG_GETARG_POINTER(1);
+   char *txt = pnstrdup(in, PG_GETARG_INT32(2));
+   char    **res=palloc(sizeof(char*)*2);
+
+   /* Your INIT dictionary code */
+HASINIT    if ( *txt=='\0' || searchstoplist(&(d->stoplist),txt) ) {
+HASINIT        pfree(txt);
+HASINIT        res[0]=NULL;
+HASINIT    } else 
+       res[0]=txt;
+   res[1]=NULL;
+
+   PG_RETURN_POINTER(res);
+}


diff --git a/contrib/tsearch2/gendict/sql.IN b/contrib/tsearch2/gendict/sql.IN

new file mode 100644 (file)

index 0000000..ff0d842


--- /dev/null
+++ b/contrib/tsearch2/gendict/sql.IN
@@ -0,0 +1,26 @@
+SET search_path = public;
+BEGIN;
+
+HASINIT create function dinit_CFG_MODNAME(text)
+HASINIT         returns internal
+HASINIT         as 'MODULE_PATHNAME'
+HASINIT         language 'C';
+
+NOSNOWBALL create function dlexize_CFG_MODNAME(internal,internal,int4)
+NOSNOWBALL        returns internal
+NOSNOWBALL        as 'MODULE_PATHNAME'
+NOSNOWBALL        language 'C'
+NOSNOWBALL        with (isstrict);
+
+insert into pg_ts_dict select
+        'CFG_MODNAME',
+HASINIT        (select oid from pg_proc where proname='dinit_CFG_MODNAME'),
+NOINIT        null,
+        null,
+ISSNOWBALL        (select oid from pg_proc where proname='snb_lexize'),
+NOSNOWBALL        (select oid from pg_proc where proname='dlexize_CFG_MODNAME'),
+        CFG_COMMENT
+;
+
+
+END;


diff --git a/contrib/tsearch2/gistidx.c b/contrib/tsearch2/gistidx.c

new file mode 100644 (file)

index 0000000..5a34f74


--- /dev/null
+++ b/contrib/tsearch2/gistidx.c
@@ -0,0 +1,686 @@
+#include "postgres.h"
+
+#include 
+
+#include "access/gist.h"
+#include "access/itup.h"
+#include "access/rtree.h"
+#include "utils/elog.h"
+#include "utils/palloc.h"
+#include "utils/array.h"
+#include "utils/builtins.h"
+#include "storage/bufpage.h"
+#include "access/tuptoaster.h"
+
+#include "tsvector.h"
+#include "query.h"
+#include "gistidx.h"
+#include "crc32.h"
+
+PG_FUNCTION_INFO_V1(gtsvector_in);
+Datum      gtsvector_in(PG_FUNCTION_ARGS);
+
+PG_FUNCTION_INFO_V1(gtsvector_out);
+Datum      gtsvector_out(PG_FUNCTION_ARGS);
+
+PG_FUNCTION_INFO_V1(gtsvector_compress);
+Datum      gtsvector_compress(PG_FUNCTION_ARGS);
+
+PG_FUNCTION_INFO_V1(gtsvector_decompress);
+Datum      gtsvector_decompress(PG_FUNCTION_ARGS);
+
+PG_FUNCTION_INFO_V1(gtsvector_consistent);
+Datum      gtsvector_consistent(PG_FUNCTION_ARGS);
+
+PG_FUNCTION_INFO_V1(gtsvector_union);
+Datum      gtsvector_union(PG_FUNCTION_ARGS);
+
+PG_FUNCTION_INFO_V1(gtsvector_same);
+Datum      gtsvector_same(PG_FUNCTION_ARGS);
+
+PG_FUNCTION_INFO_V1(gtsvector_penalty);
+Datum      gtsvector_penalty(PG_FUNCTION_ARGS);
+
+PG_FUNCTION_INFO_V1(gtsvector_picksplit);
+Datum      gtsvector_picksplit(PG_FUNCTION_ARGS);
+
+#define GETENTRY(vec,pos) ((GISTTYPE *) DatumGetPointer(((GISTENTRY *) VARDATA(vec))[(pos)].key))
+#define SUMBIT(val) (       \
+   GETBITBYTE(val,0) + \
+   GETBITBYTE(val,1) + \
+   GETBITBYTE(val,2) + \
+   GETBITBYTE(val,3) + \
+   GETBITBYTE(val,4) + \
+   GETBITBYTE(val,5) + \
+   GETBITBYTE(val,6) + \
+   GETBITBYTE(val,7)   \
+)
+
+
+Datum
+gtsvector_in(PG_FUNCTION_ARGS)
+{
+   elog(ERROR, "Not implemented");
+   PG_RETURN_DATUM(0);
+}
+
+Datum
+gtsvector_out(PG_FUNCTION_ARGS)
+{
+   elog(ERROR, "Not implemented");
+   PG_RETURN_DATUM(0);
+}
+
+static int
+compareint(const void *a, const void *b)
+{
+   if (*((int4 *) a) == *((int4 *) b))
+       return 0;
+   return (*((int4 *) a) > *((int4 *) b)) ? 1 : -1;
+}
+
+static int
+uniqueint(int4 *a, int4 l)
+{
+   int4       *ptr,
+              *res;
+
+   if (l == 1)
+       return l;
+
+   ptr = res = a;
+
+   qsort((void *) a, l, sizeof(int4), compareint);
+
+   while (ptr - a < l)
+       if (*ptr != *res)
+           *(++res) = *ptr++;
+       else
+           ptr++;
+   return res + 1 - a;
+}
+
+static void
+makesign(BITVECP sign, GISTTYPE * a)
+{
+   int4        k,
+               len = ARRNELEM(a);
+   int4       *ptr = GETARR(a);
+
+   MemSet((void *) sign, 0, sizeof(BITVEC));
+   for (k = 0; k < len; k++)
+       HASH(sign, ptr[k]);
+}
+
+Datum
+gtsvector_compress(PG_FUNCTION_ARGS)
+{
+   GISTENTRY  *entry = (GISTENTRY *) PG_GETARG_POINTER(0);
+   GISTENTRY  *retval = entry;
+
+   if (entry->leafkey)
+   {                           /* tsvector */
+       GISTTYPE   *res;
+       tsvector       *toastedval = (tsvector *) DatumGetPointer(entry->key);
+       tsvector       *val = (tsvector *) DatumGetPointer(PG_DETOAST_DATUM(entry->key));
+       int4        len;
+       int4       *arr;
+       WordEntry  *ptr = ARRPTR(val);
+       char       *words = STRPTR(val);
+
+       len = CALCGTSIZE(ARRKEY, val->size);
+       res = (GISTTYPE *) palloc(len);
+       res->len = len;
+       res->flag = ARRKEY;
+       arr = GETARR(res);
+       len = val->size;
+       while (len--)
+       {
+           *arr = crc32_sz((uint8 *) &words[ptr->pos], ptr->len);
+           arr++;
+           ptr++;
+       }
+
+       len = uniqueint(GETARR(res), val->size);
+       if (len != val->size)
+       {
+           /*
+            * there is a collision of hash-function; len is always less
+            * than val->size
+            */
+           len = CALCGTSIZE(ARRKEY, len);
+           res = (GISTTYPE *) repalloc((void *) res, len);
+           res->len = len;
+       }
+       if (val != toastedval)
+           pfree(val);
+
+       /* make signature, if array is too long */
+       if (res->len > TOAST_INDEX_TARGET)
+       {
+           GISTTYPE   *ressign;
+
+           len = CALCGTSIZE(SIGNKEY, 0);
+           ressign = (GISTTYPE *) palloc(len);
+           ressign->len = len;
+           ressign->flag = SIGNKEY;
+           makesign(GETSIGN(ressign), res);
+           pfree(res);
+           res = ressign;
+       }
+
+       retval = (GISTENTRY *) palloc(sizeof(GISTENTRY));
+       gistentryinit(*retval, PointerGetDatum(res),
+                     entry->rel, entry->page,
+                     entry->offset, res->len, FALSE);
+   }
+   else if (ISSIGNKEY(DatumGetPointer(entry->key)) &&
+            !ISALLTRUE(DatumGetPointer(entry->key)))
+   {
+       int4        i,
+                   len;
+       GISTTYPE   *res;
+       BITVECP     sign = GETSIGN(DatumGetPointer(entry->key));
+
+       LOOPBYTE(
+                if ((sign[i] & 0xff) != 0xff)
+                PG_RETURN_POINTER(retval);
+       );
+
+       len = CALCGTSIZE(SIGNKEY | ALLISTRUE, 0);
+       res = (GISTTYPE *) palloc(len);
+       res->len = len;
+       res->flag = SIGNKEY | ALLISTRUE;
+
+       retval = (GISTENTRY *) palloc(sizeof(GISTENTRY));
+       gistentryinit(*retval, PointerGetDatum(res),
+                     entry->rel, entry->page,
+                     entry->offset, res->len, FALSE);
+   }
+   PG_RETURN_POINTER(retval);
+}
+
+Datum
+gtsvector_decompress(PG_FUNCTION_ARGS)
+{
+   GISTENTRY  *entry = (GISTENTRY *) PG_GETARG_POINTER(0);
+   GISTTYPE   *key = (GISTTYPE *) DatumGetPointer(PG_DETOAST_DATUM(entry->key));
+
+   if (key != (GISTTYPE *) DatumGetPointer(entry->key))
+   {
+       GISTENTRY  *retval = (GISTENTRY *) palloc(sizeof(GISTENTRY));
+
+       gistentryinit(*retval, PointerGetDatum(key),
+                     entry->rel, entry->page,
+                     entry->offset, key->len, FALSE);
+
+       PG_RETURN_POINTER(retval);
+   }
+
+   PG_RETURN_POINTER(entry);
+}
+
+typedef struct
+{
+   int4       *arrb;
+   int4       *arre;
+}  CHKVAL;
+
+/*
+ * is there value 'val' in array or not ?
+ */
+static bool
+checkcondition_arr(void *checkval, ITEM * val)
+{
+   int4       *StopLow = ((CHKVAL *) checkval)->arrb;
+   int4       *StopHigh = ((CHKVAL *) checkval)->arre;
+   int4       *StopMiddle;
+
+   /* Loop invariant: StopLow <= val < StopHigh */
+
+   while (StopLow < StopHigh)
+   {
+       StopMiddle = StopLow + (StopHigh - StopLow) / 2;
+       if (*StopMiddle == val->val)
+           return (true);
+       else if (*StopMiddle < val->val)
+           StopLow = StopMiddle + 1;
+       else
+           StopHigh = StopMiddle;
+   }
+
+   return (false);
+}
+
+static bool
+checkcondition_bit(void *checkval, ITEM * val)
+{
+   return GETBIT(checkval, HASHVAL(val->val));
+}
+
+Datum
+gtsvector_consistent(PG_FUNCTION_ARGS)
+{
+   QUERYTYPE  *query = (QUERYTYPE *) PG_GETARG_POINTER(1);
+   GISTTYPE   *key = (GISTTYPE *) DatumGetPointer(
+                               ((GISTENTRY *) PG_GETARG_POINTER(0))->key
+   );
+
+   if (!query->size)
+       PG_RETURN_BOOL(false);
+
+   if (ISSIGNKEY(key))
+   {
+       if (ISALLTRUE(key))
+           PG_RETURN_BOOL(true);
+
+       PG_RETURN_BOOL(TS_execute(
+                              GETQUERY(query),
+                              (void *) GETSIGN(key), false,
+                              checkcondition_bit
+                              ));
+   }
+   else
+   {                           /* only leaf pages */
+       CHKVAL      chkval;
+
+       chkval.arrb = GETARR(key);
+       chkval.arre = chkval.arrb + ARRNELEM(key);
+       PG_RETURN_BOOL(TS_execute(
+                              GETQUERY(query),
+                              (void *) &chkval, true,
+                              checkcondition_arr
+                              ));
+   }
+}
+
+static int4
+unionkey(BITVECP sbase, GISTTYPE * add)
+{
+   int4        i;
+
+   if (ISSIGNKEY(add))
+   {
+       BITVECP     sadd = GETSIGN(add);
+
+       if (ISALLTRUE(add))
+           return 1;
+
+       LOOPBYTE(
+                sbase[i] |= sadd[i];
+       );
+   }
+   else
+   {
+       int4       *ptr = GETARR(add);
+
+       for (i = 0; i < ARRNELEM(add); i++)
+           HASH(sbase, ptr[i]);
+   }
+   return 0;
+}
+
+
+Datum
+gtsvector_union(PG_FUNCTION_ARGS)
+{
+   bytea      *entryvec = (bytea *) PG_GETARG_POINTER(0);
+   int        *size = (int *) PG_GETARG_POINTER(1);
+   BITVEC      base;
+   int4        len = (VARSIZE(entryvec) - VARHDRSZ) / sizeof(GISTENTRY);
+   int4        i;
+   int4        flag = 0;
+   GISTTYPE   *result;
+
+   MemSet((void *) base, 0, sizeof(BITVEC));
+   for (i = 0; i < len; i++)
+   {
+       if (unionkey(base, GETENTRY(entryvec, i)))
+       {
+           flag = ALLISTRUE;
+           break;
+       }
+   }
+
+   flag |= SIGNKEY;
+   len = CALCGTSIZE(flag, 0);
+   result = (GISTTYPE *) palloc(len);
+   *size = result->len = len;
+   result->flag = flag;
+   if (!ISALLTRUE(result))
+       memcpy((void *) GETSIGN(result), (void *) base, sizeof(BITVEC));
+
+   PG_RETURN_POINTER(result);
+}
+
+Datum
+gtsvector_same(PG_FUNCTION_ARGS)
+{
+   GISTTYPE   *a = (GISTTYPE *) PG_GETARG_POINTER(0);
+   GISTTYPE   *b = (GISTTYPE *) PG_GETARG_POINTER(1);
+   bool       *result = (bool *) PG_GETARG_POINTER(2);
+
+   if (ISSIGNKEY(a))
+   {                           /* then b also ISSIGNKEY */
+       if (ISALLTRUE(a) && ISALLTRUE(b))
+           *result = true;
+       else if (ISALLTRUE(a))
+           *result = false;
+       else if (ISALLTRUE(b))
+           *result = false;
+       else
+       {
+           int4        i;
+           BITVECP     sa = GETSIGN(a),
+                       sb = GETSIGN(b);
+
+           *result = true;
+           LOOPBYTE(
+                    if (sa[i] != sb[i])
+                    {
+               *result = false;
+               break;
+           }
+           );
+       }
+   }
+   else
+   {                           /* a and b ISARRKEY */
+       int4        lena = ARRNELEM(a),
+                   lenb = ARRNELEM(b);
+
+       if (lena != lenb)
+           *result = false;
+       else
+       {
+           int4       *ptra = GETARR(a),
+                      *ptrb = GETARR(b);
+           int4        i;
+
+           *result = true;
+           for (i = 0; i < lena; i++)
+               if (ptra[i] != ptrb[i])
+               {
+                   *result = false;
+                   break;
+               }
+       }
+   }
+
+   PG_RETURN_POINTER(result);
+}
+
+static int4
+sizebitvec(BITVECP sign)
+{
+   int4        size = 0,
+               i;
+
+   LOOPBYTE(
+       size += SUMBIT(*(char *) sign);
+       sign = (BITVECP) (((char *) sign) + 1);
+   );
+   return size;
+}
+
+static int
+hemdistsign(BITVECP  a, BITVECP b) {
+   int i,dist=0;
+
+   LOOPBIT(
+       if ( GETBIT(a,i) != GETBIT(b,i) )
+           dist++;
+   );
+   return dist;
+}
+
+static int
+hemdist(GISTTYPE   *a, GISTTYPE   *b) {
+   if ( ISALLTRUE(a) ) {
+       if (ISALLTRUE(b))
+           return 0;
+       else
+           return SIGLENBIT-sizebitvec(GETSIGN(b));
+   } else if (ISALLTRUE(b))
+       return SIGLENBIT-sizebitvec(GETSIGN(a));
+
+   return hemdistsign( GETSIGN(a), GETSIGN(b) );
+}
+
+Datum
+gtsvector_penalty(PG_FUNCTION_ARGS)
+{
+   GISTENTRY  *origentry = (GISTENTRY *) PG_GETARG_POINTER(0); /* always ISSIGNKEY */
+   GISTENTRY  *newentry = (GISTENTRY *) PG_GETARG_POINTER(1);
+   float      *penalty = (float *) PG_GETARG_POINTER(2);
+   GISTTYPE   *origval = (GISTTYPE *) DatumGetPointer(origentry->key);
+   GISTTYPE   *newval = (GISTTYPE *) DatumGetPointer(newentry->key);
+   BITVECP     orig = GETSIGN(origval);
+
+   *penalty = 0.0;
+
+   if (ISARRKEY(newval)) {
+       BITVEC sign;
+       makesign(sign, newval);
+
+       if ( ISALLTRUE(origval) ) 
+           *penalty=((float)(SIGLENBIT-sizebitvec(sign)))/(float)(SIGLENBIT+1);
+       else 
+           *penalty=hemdistsign(sign,orig);
+   } else {
+       *penalty=hemdist(origval,newval);
+   }
+   PG_RETURN_POINTER(penalty);
+}
+
+typedef struct
+{
+   bool        allistrue;
+   BITVEC      sign;
+}  CACHESIGN;
+
+static void
+fillcache(CACHESIGN * item, GISTTYPE * key)
+{
+   item->allistrue = false;
+   if (ISARRKEY(key))
+       makesign(item->sign, key);
+   else if (ISALLTRUE(key))
+       item->allistrue = true;
+   else
+       memcpy((void *) item->sign, (void *) GETSIGN(key), sizeof(BITVEC));
+}
+
+#define WISH_F(a,b,c) (double)( -(double)(((a)-(b))*((a)-(b))*((a)-(b)))*(c) )
+typedef struct
+{
+   OffsetNumber pos;
+   int4        cost;
+} SPLITCOST;
+
+static int
+comparecost(const void *a, const void *b)
+{
+   if (((SPLITCOST *) a)->cost == ((SPLITCOST *) b)->cost)
+       return 0;
+   else
+       return (((SPLITCOST *) a)->cost > ((SPLITCOST *) b)->cost) ? 1 : -1;
+}
+
+
+static int
+hemdistcache(CACHESIGN   *a, CACHESIGN   *b) {
+   if ( a->allistrue ) {
+       if (b->allistrue)
+           return 0;
+       else
+           return SIGLENBIT-sizebitvec(b->sign);
+   } else if (b->allistrue)
+       return SIGLENBIT-sizebitvec(a->sign);
+
+   return hemdistsign( a->sign, b->sign );
+}
+
+Datum
+gtsvector_picksplit(PG_FUNCTION_ARGS)
+{
+   bytea      *entryvec = (bytea *) PG_GETARG_POINTER(0);
+   GIST_SPLITVEC *v = (GIST_SPLITVEC *) PG_GETARG_POINTER(1);
+   OffsetNumber k,
+               j;
+   GISTTYPE   *datum_l,
+              *datum_r;
+   BITVECP     union_l,
+               union_r;
+   int4        size_alpha,
+               size_beta;
+   int4        size_waste,
+               waste = -1;
+   int4        nbytes;
+   OffsetNumber seed_1 = 0,
+               seed_2 = 0;
+   OffsetNumber *left,
+              *right;
+   OffsetNumber maxoff;
+   BITVECP     ptr;
+   int         i;
+   CACHESIGN  *cache;
+   SPLITCOST  *costvector;
+
+   maxoff = ((VARSIZE(entryvec) - VARHDRSZ) / sizeof(GISTENTRY)) - 2;
+   nbytes = (maxoff + 2) * sizeof(OffsetNumber);
+   v->spl_left = (OffsetNumber *) palloc(nbytes);
+   v->spl_right = (OffsetNumber *) palloc(nbytes);
+
+   cache = (CACHESIGN *) palloc(sizeof(CACHESIGN) * (maxoff + 2));
+   fillcache(&cache[FirstOffsetNumber], GETENTRY(entryvec, FirstOffsetNumber));
+
+   for (k = FirstOffsetNumber; k < maxoff; k = OffsetNumberNext(k)) {
+       for (j = OffsetNumberNext(k); j <= maxoff; j = OffsetNumberNext(j)) {
+           if (k == FirstOffsetNumber)
+               fillcache(&cache[j], GETENTRY(entryvec, j));
+
+           size_waste=hemdistcache(&(cache[j]),&(cache[k]));
+           if (size_waste > waste) {
+               waste = size_waste;
+               seed_1 = k;
+               seed_2 = j;
+           }
+       }
+   }
+
+   left = v->spl_left;
+   v->spl_nleft = 0;
+   right = v->spl_right;
+   v->spl_nright = 0;
+
+   if (seed_1 == 0 || seed_2 == 0) {
+       seed_1 = 1;
+       seed_2 = 2;
+   }
+
+   /* form initial .. */
+   if (cache[seed_1].allistrue) {
+       datum_l = (GISTTYPE *) palloc(CALCGTSIZE(SIGNKEY | ALLISTRUE, 0));
+       datum_l->len = CALCGTSIZE(SIGNKEY | ALLISTRUE, 0);
+       datum_l->flag = SIGNKEY | ALLISTRUE;
+   } else {
+       datum_l = (GISTTYPE *) palloc(CALCGTSIZE(SIGNKEY, 0));
+       datum_l->len = CALCGTSIZE(SIGNKEY, 0);
+       datum_l->flag = SIGNKEY;
+       memcpy((void *) GETSIGN(datum_l), (void *) cache[seed_1].sign, sizeof(BITVEC));
+   }
+   if (cache[seed_2].allistrue) {
+       datum_r = (GISTTYPE *) palloc(CALCGTSIZE(SIGNKEY | ALLISTRUE, 0));
+       datum_r->len = CALCGTSIZE(SIGNKEY | ALLISTRUE, 0);
+       datum_r->flag = SIGNKEY | ALLISTRUE;
+   } else {
+       datum_r = (GISTTYPE *) palloc(CALCGTSIZE(SIGNKEY, 0));
+       datum_r->len = CALCGTSIZE(SIGNKEY, 0);
+       datum_r->flag = SIGNKEY;
+       memcpy((void *) GETSIGN(datum_r), (void *) cache[seed_2].sign, sizeof(BITVEC));
+   }
+
+   union_l=GETSIGN(datum_l);
+   union_r=GETSIGN(datum_r);
+   maxoff = OffsetNumberNext(maxoff);
+   fillcache(&cache[maxoff], GETENTRY(entryvec, maxoff));
+   /* sort before ... */
+   costvector = (SPLITCOST *) palloc(sizeof(SPLITCOST) * maxoff);
+   for (j = FirstOffsetNumber; j <= maxoff; j = OffsetNumberNext(j)) {
+       costvector[j - 1].pos = j;
+       size_alpha = hemdistcache(&(cache[seed_1]), &(cache[j]));
+       size_beta  = hemdistcache(&(cache[seed_2]), &(cache[j]));
+       costvector[j - 1].cost = abs(size_alpha - size_beta);
+   }
+   qsort((void *) costvector, maxoff, sizeof(SPLITCOST), comparecost);
+
+   for (k = 0; k < maxoff; k++) {
+       j = costvector[k].pos;
+       if (j == seed_1) {
+           *left++ = j;
+           v->spl_nleft++;
+           continue;
+       } else if (j == seed_2) {
+           *right++ = j;
+           v->spl_nright++;
+           continue;
+       }
+
+       if (ISALLTRUE(datum_l) || cache[j].allistrue) {
+           if ( ISALLTRUE(datum_l) && cache[j].allistrue )
+               size_alpha=0;
+           else
+               size_alpha = SIGLENBIT-sizebitvec(  
+                   ( cache[j].allistrue ) ? GETSIGN(datum_l) : GETSIGN(cache[j].sign)  
+               );
+       } else {
+           size_alpha=hemdistsign(cache[j].sign,GETSIGN(datum_l));
+       }
+
+       if (ISALLTRUE(datum_r) || cache[j].allistrue) {
+           if ( ISALLTRUE(datum_r) && cache[j].allistrue )
+               size_beta=0;
+           else
+               size_beta = SIGLENBIT-sizebitvec(  
+                   ( cache[j].allistrue ) ? GETSIGN(datum_r) : GETSIGN(cache[j].sign)  
+               );
+       } else {
+           size_beta=hemdistsign(cache[j].sign,GETSIGN(datum_r));
+       }
+
+       if (size_alpha  < size_beta + WISH_F(v->spl_nleft, v->spl_nright, 0.1)) {
+           if (ISALLTRUE(datum_l) || cache[j].allistrue) {
+               if (! ISALLTRUE(datum_l) )
+                   MemSet((void *) GETSIGN(datum_l), 0xff, sizeof(BITVEC));
+           } else {
+               ptr=cache[j].sign;
+               LOOPBYTE(
+                   union_l[i] |= ptr[i];
+               );
+           }
+           *left++ = j;
+           v->spl_nleft++;
+       } else {
+           if (ISALLTRUE(datum_r) || cache[j].allistrue) {
+               if (! ISALLTRUE(datum_r) )
+                   MemSet((void *) GETSIGN(datum_r), 0xff, sizeof(BITVEC));
+           } else {
+               ptr=cache[j].sign;
+               LOOPBYTE(
+                   union_r[i] |= ptr[i];
+               );
+           }
+           *right++ = j;
+           v->spl_nright++;
+       }
+   }
+
+   *right = *left = FirstOffsetNumber;
+   pfree(costvector);
+   pfree(cache);
+   v->spl_ldatum = PointerGetDatum(datum_l);
+   v->spl_rdatum = PointerGetDatum(datum_r);
+
+   PG_RETURN_POINTER(v);
+}


diff --git a/contrib/tsearch2/gistidx.h b/contrib/tsearch2/gistidx.h

new file mode 100644 (file)

index 0000000..d081c74


--- /dev/null
+++ b/contrib/tsearch2/gistidx.h
@@ -0,0 +1,67 @@
+#ifndef __GISTIDX_H__
+#define __GISTIDX_H__
+
+/*
+#define GISTIDX_DEBUG
+*/
+
+/*
+ * signature defines
+ */
+
+#define BITBYTE 8
+#define SIGLENINT  63          /* >121 => key will toast, so it will not
+                                * work !!! */
+#define SIGLEN ( sizeof(int4)*SIGLENINT )
+#define SIGLENBIT (SIGLEN*BITBYTE)
+
+typedef char BITVEC[SIGLEN];
+typedef char *BITVECP;
+
+#define LOOPBYTE(a) \
+       for(i=0;i
+               a;\
+       }
+#define LOOPBIT(a) \
+               for(i=0;i
+                               a;\
+               }
+
+#define GETBYTE(x,i) ( *( (BITVECP)(x) + (int)( (i) / BITBYTE ) ) )
+#define GETBITBYTE(x,i) ( ((char)(x)) >> i & 0x01 )
+#define CLRBIT(x,i)   GETBYTE(x,i) &= ~( 0x01 << ( (i) % BITBYTE ) )
+#define SETBIT(x,i)   GETBYTE(x,i) |=  ( 0x01 << ( (i) % BITBYTE ) )
+#define GETBIT(x,i) ( (GETBYTE(x,i) >> ( (i) % BITBYTE )) & 0x01 )
+
+#define abs(a)         ((a) <  (0) ? -(a) : (a))
+#define min(a,b)           ((a) <  (b) ? (a) : (b))
+#define HASHVAL(val) (((unsigned int)(val)) % SIGLENBIT)
+#define HASH(sign, val) SETBIT((sign), HASHVAL(val))
+
+
+/*
+ * type of index key
+ */
+typedef struct
+{
+   int4        len;
+   int4        flag;
+   char        data[1];
+}  GISTTYPE;
+
+#define ARRKEY     0x01
+#define SIGNKEY        0x02
+#define ALLISTRUE  0x04
+
+#define ISARRKEY(x) ( ((GISTTYPE*)x)->flag & ARRKEY )
+#define ISSIGNKEY(x)   ( ((GISTTYPE*)x)->flag & SIGNKEY )
+#define ISALLTRUE(x)   ( ((GISTTYPE*)x)->flag & ALLISTRUE )
+
+#define GTHDRSIZE  ( sizeof(int4)*2  )
+#define CALCGTSIZE(flag, len) ( GTHDRSIZE + ( ( (flag) & ARRKEY ) ? ((len)*sizeof(int4)) : (((flag) & ALLISTRUE) ? 0 : SIGLEN) ) )
+
+#define GETSIGN(x) ( (BITVECP)( (char*)x+GTHDRSIZE ) )
+#define GETARR(x)  ( (int4*)( (char*)x+GTHDRSIZE ) )
+#define ARRNELEM(x) ( ( ((GISTTYPE*)x)->len - GTHDRSIZE )/sizeof(int4) )
+
+#endif


diff --git a/contrib/tsearch2/ispell/spell.c b/contrib/tsearch2/ispell/spell.c

new file mode 100644 (file)

index 0000000..3cf2cc8


--- /dev/null
+++ b/contrib/tsearch2/ispell/spell.c
@@ -0,0 +1,520 @@
+#include 
+#include 
+#include 
+#include 
+
+#include "postgres.h"
+
+#include "spell.h"
+
+#define MAXNORMLEN 56
+
+#define STRNCASECMP(x,y)        (strncasecmp(x,y,strlen(y)))
+
+static int cmpspell(const void *s1,const void *s2){
+   return(strcmp(((const SPELL*)s1)->word,((const SPELL*)s2)->word));
+}
+
+static void 
+strlower( char * str ) {
+   unsigned char *ptr = (unsigned char *)str;
+   while ( *ptr ) {
+       *ptr = tolower( *ptr );
+       ptr++;
+   }
+}
+
+/* backward string compaire for suffix tree operations */
+static int 
+strbcmp(const char *s1, const char *s2) { 
+   int l1 = strlen(s1)-1, l2 = strlen(s2)-1;
+   while (l1 >= 0 && l2 >= 0) {
+       if (s1[l1] < s2[l2]) return -1;
+       if (s1[l1] > s2[l2]) return 1;
+       l1--; l2--;
+   }
+   if (l1 < l2) return -1;
+   if (l1 > l2) return 1;
+
+   return 0;
+}
+static int 
+strbncmp(const char *s1, const char *s2, size_t count) { 
+   int l1 = strlen(s1) - 1, l2 = strlen(s2) - 1, l = count;
+   while (l1 >= 0 && l2 >= 0 && l > 0) {
+       if (s1[l1] < s2[l2]) return -1;
+       if (s1[l1] > s2[l2]) return 1;
+       l1--;
+       l2--;
+       l--;
+   }
+   if (l == 0) return 0;
+   if (l1 < l2) return -1;
+   if (l1 > l2) return 1;
+   return 0;
+}
+
+static int 
+cmpaffix(const void *s1,const void *s2){
+   if (((const AFFIX*)s1)->type < ((const AFFIX*)s2)->type) return -1;
+   if (((const AFFIX*)s1)->type > ((const AFFIX*)s2)->type) return 1;
+   if (((const AFFIX*)s1)->type == 'p')
+       return(strcmp(((const AFFIX*)s1)->repl,((const AFFIX*)s2)->repl));
+   else 
+       return(strbcmp(((const AFFIX*)s1)->repl,((const AFFIX*)s2)->repl));
+}
+
+int 
+AddSpell(IspellDict * Conf,const char * word,const char *flag){
+   if(Conf->nspell>=Conf->mspell){
+       if(Conf->mspell){
+           Conf->mspell+=1024*20;
+           Conf->Spell=(SPELL *)realloc(Conf->Spell,Conf->mspell*sizeof(SPELL));
+       }else{
+           Conf->mspell=1024*20;
+           Conf->Spell=(SPELL *)malloc(Conf->mspell*sizeof(SPELL));
+       }
+       if ( Conf->Spell == NULL )
+           elog(ERROR,"No memory for AddSpell"); 
+   }
+   Conf->Spell[Conf->nspell].word=strdup(word);
+   if ( !Conf->Spell[Conf->nspell].word ) 
+       elog(ERROR,"No memory for AddSpell");
+   strncpy(Conf->Spell[Conf->nspell].flag,flag,10);
+   Conf->nspell++;
+   return(0);
+}
+
+
+int 
+ImportDictionary(IspellDict * Conf,const char *filename){
+   unsigned char str[BUFSIZ];  
+   FILE *dict;
+
+   if(!(dict=fopen(filename,"r")))return(1);
+   while(fgets(str,sizeof(str),dict)){
+       unsigned char *s;
+       const unsigned char *flag;
+
+           flag = NULL;
+       if((s=strchr(str,'/'))){
+           *s=0;
+           s++;flag=s;
+           while(*s){
+               if (((*s>='A')&&(*s<='Z'))||((*s>='a')&&(*s<='z')))
+                   s++;
+               else {
+                   *s=0;
+                   break;
+               }
+           }
+       }else{
+           flag="";
+       }
+       strlower(str);
+       /* Dont load words if first letter is not required */
+       /* It allows to optimize loading at  search time   */
+       s=str;
+       while(*s){
+           if(*s=='\r')*s=0;
+           if(*s=='\n')*s=0;
+           s++;
+       }
+       AddSpell(Conf,str,flag);
+   }
+   fclose(dict);
+   return(0);
+}
+
+
+static SPELL * 
+FindWord(IspellDict * Conf, const char *word, int affixflag) {
+   int l,c,r,resc,resl,resr, i;
+
+   i = (int)(*word) & 255;
+   l = Conf->SpellTree.Left[i];
+   r = Conf->SpellTree.Right[i];
+   if (l == -1) return (NULL);
+   while(l<=r){
+       c = (l + r) >> 1;
+       resc = strcmp(Conf->Spell[c].word, word);
+       if( (resc == 0) && 
+           ((affixflag == 0) || (strchr(Conf->Spell[c].flag, affixflag) != NULL)) ) {
+           return(&Conf->Spell[c]);
+       }
+       resl = strcmp(Conf->Spell[l].word, word);
+       if( (resl == 0) && 
+           ((affixflag == 0) || (strchr(Conf->Spell[l].flag, affixflag) != NULL)) ) {
+           return(&Conf->Spell[l]);
+       }
+       resr = strcmp(Conf->Spell[r].word, word);
+       if( (resr == 0) && 
+           ((affixflag == 0) || (strchr(Conf->Spell[r].flag, affixflag) != NULL)) ) {
+           return(&Conf->Spell[r]);
+       }
+       if(resc < 0){
+           l = c + 1;
+           r--;
+       } else if(resc > 0){
+           r = c - 1;
+           l++;
+       } else {
+           l++;
+           r--;
+       }
+   }
+   return(NULL);
+}
+
+int 
+AddAffix(IspellDict * Conf,int flag,const char *mask,const char *find,const char *repl,int type) {
+   if(Conf->naffixes>=Conf->maffixes){
+       if(Conf->maffixes){
+           Conf->maffixes+=16;
+           Conf->Affix = (AFFIX*)realloc((void*)Conf->Affix,Conf->maffixes*sizeof(AFFIX));
+       }else{
+           Conf->maffixes=16;
+           Conf->Affix = (AFFIX*)malloc(Conf->maffixes * sizeof(AFFIX));
+       }
+       if ( Conf->Affix == NULL ) 
+           elog(ERROR,"No memory for AddAffix");
+   }
+   if (type=='s') {
+       sprintf(Conf->Affix[Conf->naffixes].mask,"%s$",mask);
+   } else {
+       sprintf(Conf->Affix[Conf->naffixes].mask,"^%s",mask);
+   }
+   Conf->Affix[Conf->naffixes].compile = 1;
+   Conf->Affix[Conf->naffixes].flag=flag;
+   Conf->Affix[Conf->naffixes].type=type;
+   
+   strcpy(Conf->Affix[Conf->naffixes].find,find);
+   strcpy(Conf->Affix[Conf->naffixes].repl,repl);
+   Conf->Affix[Conf->naffixes].replen=strlen(repl);
+   Conf->naffixes++;
+   return(0);
+}
+
+static char * 
+remove_spaces(char *dist,char *src){
+char *d,*s;
+   d=dist;
+   s=src;
+   while(*s){
+       if(*s!=' '&&*s!='-'&&*s!='\t'){
+           *d=*s;
+           d++;
+       }
+       s++;
+   }
+   *d=0;
+   return(dist);
+}
+
+
+int 
+ImportAffixes(IspellDict * Conf,const char *filename){
+   unsigned char str[BUFSIZ];
+   unsigned char flag=0;
+   unsigned char mask[BUFSIZ]="";
+   unsigned char find[BUFSIZ]="";
+   unsigned char repl[BUFSIZ]="";
+   unsigned char *s;
+   int i;
+   int suffixes=0;
+   int prefixes=0;
+   FILE *affix;
+
+   if(!(affix=fopen(filename,"r")))
+       return(1);
+
+   while(fgets(str,sizeof(str),affix)){
+       if(!STRNCASECMP(str,"suffixes")){
+           suffixes=1;
+           prefixes=0;
+           continue;
+       }
+       if(!STRNCASECMP(str,"prefixes")){
+           suffixes=0;
+           prefixes=1;
+           continue;
+       }
+       if(!STRNCASECMP(str,"flag ")){
+           s=str+5;
+           while(strchr("* ",*s))
+               s++;
+           flag=*s;
+           continue;
+       }
+       if((!suffixes)&&(!prefixes))continue;
+       if((s=strchr(str,'#')))*s=0;
+       if(!*str)continue;
+       strlower(str);
+       strcpy(mask,"");
+       strcpy(find,"");
+       strcpy(repl,"");
+       i=sscanf(str,"%[^>\n]>%[^,\n],%[^\n]",mask,find,repl);
+       remove_spaces(str,repl);strcpy(repl,str);
+       remove_spaces(str,find);strcpy(find,str);
+       remove_spaces(str,mask);strcpy(mask,str);
+       switch(i){
+           case 3:
+               break;
+           case 2:
+               if(*find != '\0'){
+                   strcpy(repl,find);
+                   strcpy(find,"");
+               }
+               break;
+           default:
+               continue;
+       }
+       
+       AddAffix(Conf,(int)flag,mask,find,repl,suffixes?'s':'p');
+       
+   }
+   fclose(affix);
+       
+   return(0);
+}
+
+void 
+SortDictionary(IspellDict * Conf){
+  int  CurLet = -1, Let;size_t i;
+
+        qsort((void*)Conf->Spell,Conf->nspell,sizeof(SPELL),cmpspell);
+
+   for(i = 0; i < 256 ; i++ )
+       Conf->SpellTree.Left[i] = -1;
+
+   for(i = 0; i < Conf->nspell; i++) {
+     Let = (int)(*(Conf->Spell[i].word)) & 255;
+     if (CurLet != Let) {
+       Conf->SpellTree.Left[Let] = i;
+       CurLet = Let;
+     }
+     Conf->SpellTree.Right[Let] = i;
+   }
+}
+
+void 
+SortAffixes(IspellDict * Conf) {
+  int   CurLetP = -1, CurLetS = -1, Let;
+  AFFIX *Affix; size_t i;
+  
+  if (Conf->naffixes > 1)
+    qsort((void*)Conf->Affix,Conf->naffixes,sizeof(AFFIX),cmpaffix);
+  for(i = 0; i < 256; i++) {
+      Conf->PrefixTree.Left[i] = Conf->PrefixTree.Right[i] = -1;
+      Conf->SuffixTree.Left[i] = Conf->SuffixTree.Right[i] = -1;
+  }
+
+  for(i = 0; i < Conf->naffixes; i++) {
+    Affix = &(((AFFIX*)Conf->Affix)[i]);
+    if(Affix->type == 'p') {
+      Let = (int)(*(Affix->repl)) & 255;
+      if (CurLetP != Let) {
+   Conf->PrefixTree.Left[Let] = i;
+   CurLetP = Let;
+      }
+      Conf->PrefixTree.Right[Let] = i;
+    } else {
+      Let = (Affix->replen) ? (int)(Affix->repl[Affix->replen-1]) & 255 : 0;
+      if (CurLetS != Let) {
+   Conf->SuffixTree.Left[Let] = i;
+   CurLetS = Let;
+      }
+      Conf->SuffixTree.Right[Let] = i;
+    }
+  }
+}
+
+static char * 
+CheckSuffix(const char *word, size_t len, AFFIX *Affix, int *res, IspellDict *Conf) {
+  regmatch_t subs[2]; /* workaround for apache&linux */
+  char newword[2*MAXNORMLEN] = "";
+  int err;
+  
+  *res = strbncmp(word, Affix->repl, Affix->replen);
+  if (*res < 0) {
+    return NULL;
+  }
+  if (*res > 0) {
+    return NULL;
+  }
+  strcpy(newword, word);
+  strcpy(newword+len-Affix->replen, Affix->find);
+
+  if (Affix->compile) {
+    err = regcomp(&(Affix->reg),Affix->mask,REG_EXTENDED|REG_ICASE|REG_NOSUB);
+    if(err){
+      /*regerror(err, &(Affix->reg), regerrstr, ERRSTRSIZE);*/
+      regfree(&(Affix->reg));
+      return(NULL);
+    }
+    Affix->compile = 0;
+  }
+  if(!(err=regexec(&(Affix->reg),newword,1,subs,0))){
+    if(FindWord(Conf, newword, Affix->flag))
+   return pstrdup(newword);    
+  }
+  return NULL;
+}
+
+#define NS 1
+#define MAX_NORM 512
+static int 
+CheckPrefix(const char *word, size_t len, AFFIX *Affix, IspellDict *Conf, int pi,
+       char **forms, char ***cur ) {
+  regmatch_t subs[NS*2];
+  char newword[2*MAXNORMLEN] = "";
+  int err, ls, res, lres;
+  size_t newlen;
+  AFFIX *CAffix = Conf->Affix;
+  
+  res = strncmp(word, Affix->repl, Affix->replen);
+  if (res != 0) {
+    return res;
+  }
+  strcpy(newword, Affix->find);
+  strcat(newword, word+Affix->replen);
+
+  if (Affix->compile) {
+    err = regcomp(&(Affix->reg),Affix->mask,REG_EXTENDED|REG_ICASE|REG_NOSUB);
+    if(err){
+      /*regerror(err, &(Affix->reg), regerrstr, ERRSTRSIZE);*/
+      regfree(&(Affix->reg));
+      return (0);
+    }
+    Affix->compile = 0;
+  }
+  if(!(err=regexec(&(Affix->reg),newword,1,subs,0))){
+    SPELL * curspell;
+
+    if((curspell=FindWord(Conf, newword, Affix->flag))){
+      if ((*cur - forms) < (MAX_NORM-1)) {
+   **cur =  pstrdup(newword);
+   (*cur)++; **cur = NULL;
+      }
+    } 
+    newlen = strlen(newword);
+    ls = Conf->SuffixTree.Left[pi];
+      if ( ls>=0 && ((*cur - forms) < (MAX_NORM-1)) ) {
+   **cur = CheckSuffix(newword, newlen, &CAffix[ls], &lres, Conf);
+   if (**cur) {
+     (*cur)++; **cur = NULL;
+   }
+      }
+  }
+  return 0;
+}
+
+
+char ** 
+NormalizeWord(IspellDict * Conf,char *word){
+/*regmatch_t subs[NS];*/
+size_t len;
+char ** forms;
+char **cur;
+AFFIX * Affix;
+int ri, pi, ipi, lp, rp, cp, ls, rs;
+int lres, rres, cres = 0;
+  SPELL *spell;
+
+   len=strlen(word);
+   if (len > MAXNORMLEN)
+       return(NULL);
+
+   strlower(word);
+
+   forms=(char **) palloc(MAX_NORM*sizeof(char **));
+   cur=forms;*cur=NULL;
+
+   ri = (int)(*word) & 255;
+   pi = (int)(word[strlen(word)-1]) & 255;
+   Affix=(AFFIX*)Conf->Affix;
+
+   /* Check that the word itself is normal form */
+   if((spell = FindWord(Conf, word, 0))){
+       *cur=pstrdup(word);
+       cur++;*cur=NULL;
+   }
+
+   /* Find all other NORMAL forms of the 'word' */
+
+   for (ipi = 0; ipi <= pi; ipi += pi) {
+
+       /* check prefix */
+       lp = Conf->PrefixTree.Left[ri];
+       rp = Conf->PrefixTree.Right[ri];
+       while (lp >= 0 && lp <= rp) {
+         cp = (lp + rp) >> 1;
+         cres = 0;
+         if ((cur - forms) < (MAX_NORM-1)) {
+       cres = CheckPrefix(word, len, &Affix[cp], Conf, ipi, forms, &cur);
+         }
+         if ((lp < cp) && ((cur - forms) < (MAX_NORM-1)) ) {
+       lres = CheckPrefix(word, len, &Affix[lp], Conf, ipi, forms, &cur);
+         }
+         if ( (rp > cp) && ((cur - forms) < (MAX_NORM-1)) ) {
+       rres = CheckPrefix(word, len, &Affix[rp], Conf, ipi, forms, &cur);
+         }
+         if (cres < 0) {
+       rp = cp - 1;
+       lp++;
+         } else if (cres > 0) {
+       lp = cp + 1;
+       rp--;
+         } else {
+       lp++;
+       rp--;
+         }
+       }
+
+       /* check suffix */
+       ls = Conf->SuffixTree.Left[ipi];
+       rs = Conf->SuffixTree.Right[ipi];
+       while (ls >= 0 && ls <= rs) {
+         if (  ((cur - forms) < (MAX_NORM-1)) ) {
+       *cur = CheckSuffix(word, len, &Affix[ls], &lres, Conf);
+       if (*cur) {
+         cur++; *cur = NULL;
+       }
+         }
+         if ( (rs > ls) && ((cur - forms) < (MAX_NORM-1)) ) {
+       *cur = CheckSuffix(word, len, &Affix[rs], &rres, Conf);
+       if (*cur) {
+         cur++; *cur = NULL;
+       }
+         }
+         ls++;
+         rs--;
+       } /* end while */
+     
+   } /* for ipi */
+
+   if(cur==forms){
+       pfree(forms);
+       return(NULL);
+   }
+   return(forms);
+}
+
+void 
+FreeIspell (IspellDict *Conf) {
+  int i;
+  AFFIX *Affix = (AFFIX *)Conf->Affix;
+
+  for (i = 0; i < Conf->naffixes; i++) {
+    if (Affix[i].compile == 0) {
+      regfree(&(Affix[i].reg));
+    }
+  }
+  for (i = 0; i < Conf->naffixes; i++) {
+   free( Conf->Spell[i].word );
+  }
+  free(Conf->Affix);
+  free(Conf->Spell);
+  memset( (void*)Conf, 0, sizeof(IspellDict) );
+  return;
+}


diff --git a/contrib/tsearch2/ispell/spell.h b/contrib/tsearch2/ispell/spell.h

new file mode 100644 (file)

index 0000000..3034ca6


--- /dev/null
+++ b/contrib/tsearch2/ispell/spell.h
@@ -0,0 +1,51 @@
+#ifndef __SPELL_H__
+#define __SPELL_H__
+
+#include 
+#include 
+
+typedef struct spell_struct {
+        char * word; 
+        char flag[10];
+} SPELL;
+
+typedef struct aff_struct {   
+        char flag;
+        char type;
+        char mask[33];
+        char find[16];
+        char repl[16];
+        regex_t reg;
+        size_t replen;
+        char compile;
+} AFFIX;
+
+typedef struct Tree_struct {
+        int Left[256], Right[256];
+} Tree_struct;
+
+typedef struct {
+   int maffixes;
+   int naffixes;
+   AFFIX * Affix;
+
+   int nspell;
+   int mspell;
+   SPELL   *Spell;
+   Tree_struct SpellTree;
+   Tree_struct PrefixTree;
+   Tree_struct SuffixTree;
+
+} IspellDict;
+
+char ** NormalizeWord(IspellDict * Conf,char *word);
+int ImportAffixes(IspellDict * Conf, const char *filename);
+int ImportDictionary(IspellDict * Conf,const char *filename);
+
+int  AddSpell(IspellDict * Conf,const char * word,const char *flag);
+int  AddAffix(IspellDict * Conf,int flag,const char *mask,const char *find,const char *repl,int type);
+void SortDictionary(IspellDict * Conf);
+void SortAffixes(IspellDict * Conf);
+void FreeIspell (IspellDict *Conf);
+
+#endif


diff --git a/contrib/tsearch2/prs_dcfg.c b/contrib/tsearch2/prs_dcfg.c

new file mode 100644 (file)

index 0000000..e4b0e8b


--- /dev/null
+++ b/contrib/tsearch2/prs_dcfg.c
@@ -0,0 +1,119 @@
+/* 
+ * Simple config parser 
+ * Teodor Sigaev 
+ */
+#include 
+#include 
+#include 
+
+#include "postgres.h"
+
+#include "dict.h"
+#include "common.h"
+
+#define CS_WAITKEY 0
+#define CS_INKEY   1
+#define CS_WAITEQ  2
+#define CS_WAITVALUE   3
+#define CS_INVALUE 4
+#define CS_IN2VALUE    5
+#define CS_WAITDELIM   6
+#define CS_INESC   7
+#define CS_IN2ESC  8
+
+static char *
+nstrdup(char *ptr, int len) {
+   char *res=palloc(len+1), *cptr;
+   memcpy(res,ptr,len);
+   res[len]='\0';
+   cptr = ptr = res;
+   while(*ptr) {
+       if ( *ptr == '\\' ) 
+           ptr++;
+       *cptr=*ptr; ptr++; cptr++;
+   }
+   *cptr='\0';
+
+   return res;
+}
+
+void
+parse_cfgdict(text *in, Map **m) {
+   Map *mptr;
+   char *ptr=VARDATA(in), *begin=NULL;
+   char num=0;
+   int state=CS_WAITKEY;
+
+   while( ptr-VARDATA(in) < VARSIZE(in) - VARHDRSZ ) {
+       if ( *ptr==',' ) num++;
+       ptr++;
+   }
+
+   *m=mptr=(Map*)palloc( sizeof(Map)*(num+2) );
+   memset(mptr, 0, sizeof(Map)*(num+2) );
+   ptr=VARDATA(in);
+   while( ptr-VARDATA(in) < VARSIZE(in) - VARHDRSZ ) {
+       if (state==CS_WAITKEY) {
+           if (isalpha(*ptr)) {
+               begin=ptr;
+               state=CS_INKEY;
+           } else if ( !isspace(*ptr) )
+               elog(ERROR,"Syntax error in position %d near '%c'", ptr-VARDATA(in), *ptr);
+       } else if (state==CS_INKEY) {
+           if ( isspace(*ptr) ) {
+               mptr->key=nstrdup(begin, ptr-begin);
+               state=CS_WAITEQ;
+           } else if ( *ptr=='=' ) {
+               mptr->key=nstrdup(begin, ptr-begin);
+               state=CS_WAITVALUE;
+           } else if ( !isalpha(*ptr) ) 
+               elog(ERROR,"Syntax error in position %d near '%c'", ptr-VARDATA(in), *ptr);
+       } else if ( state==CS_WAITEQ ) {
+           if ( *ptr=='=' )
+               state=CS_WAITVALUE;
+           else if ( !isspace(*ptr) )
+               elog(ERROR,"Syntax error in position %d near '%c'", ptr-VARDATA(in), *ptr);
+       } else if ( state==CS_WAITVALUE ) {
+           if ( *ptr=='"' ) {
+               begin=ptr+1;
+               state=CS_INVALUE;
+           } else if ( !isspace(*ptr) ) {
+               begin=ptr;
+               state=CS_IN2VALUE;
+           }
+       } else if ( state==CS_INVALUE ) {
+           if ( *ptr=='"' ) {
+               mptr->value = nstrdup(begin, ptr-begin);
+               mptr++;
+               state=CS_WAITDELIM;
+           } else if ( *ptr=='\\' )
+               state=CS_INESC;
+       } else if ( state==CS_IN2VALUE ) {
+           if ( isspace(*ptr) || *ptr==',' ) {
+               mptr->value = nstrdup(begin, ptr-begin);
+               mptr++;
+               state=( *ptr==',' ) ? CS_WAITKEY : CS_WAITDELIM;
+           } else if ( *ptr=='\\' )
+               state=CS_INESC;
+       } else if ( state==CS_WAITDELIM ) {
+           if ( *ptr==',' ) 
+               state=CS_WAITKEY; 
+           else if ( !isspace(*ptr) )
+               elog(ERROR,"Syntax error in position %d near '%c'", ptr-VARDATA(in), *ptr);
+       } else if ( state == CS_INESC ) {
+           state=CS_INVALUE;
+       } else if ( state == CS_IN2ESC ) {
+           state=CS_IN2VALUE;
+       } else 
+           elog(ERROR,"Bad parser state: %d at position %d near '%c'", state, ptr-VARDATA(in), *ptr);
+       ptr++;
+   }
+
+   if (state==CS_IN2VALUE) {
+       mptr->value = nstrdup(begin, ptr-begin);
+       mptr++;
+   } else if ( !(state==CS_WAITDELIM || state==CS_WAITKEY) ) 
+       elog(ERROR,"Unexpected end of line");
+}
+
+


diff --git a/contrib/tsearch2/query.c b/contrib/tsearch2/query.c

new file mode 100644 (file)

index 0000000..8e714f2


--- /dev/null
+++ b/contrib/tsearch2/query.c
@@ -0,0 +1,862 @@
+/*
+ * IO definitions for tsquery and mtsquery. This type
+ * are identical, but for parsing mtsquery used parser for text
+ * and also morphology is used.
+ * Internal structure:
+ * query tree, then string with original value.
+ * Query tree with plain view. It's means that in array of nodes
+ * right child is always next and left position = item+item->left
+ * Teodor Sigaev 
+ */
+#include "postgres.h"
+
+#include 
+#include 
+
+#include "access/gist.h"
+#include "access/itup.h"
+#include "access/rtree.h"
+#include "utils/elog.h"
+#include "utils/palloc.h"
+#include "utils/array.h"
+#include "utils/builtins.h"
+#include "storage/bufpage.h"
+
+#include "ts_cfg.h"
+#include "tsvector.h"
+#include "crc32.h"
+#include "query.h"
+#include "rewrite.h"
+#include "common.h"
+
+
+PG_FUNCTION_INFO_V1(tsquery_in);
+Datum      tsquery_in(PG_FUNCTION_ARGS);
+
+PG_FUNCTION_INFO_V1(tsquery_out);
+Datum      tsquery_out(PG_FUNCTION_ARGS);
+
+PG_FUNCTION_INFO_V1(exectsq);
+Datum      exectsq(PG_FUNCTION_ARGS);
+
+PG_FUNCTION_INFO_V1(rexectsq);
+Datum      rexectsq(PG_FUNCTION_ARGS);
+
+PG_FUNCTION_INFO_V1(tsquerytree);
+Datum      tsquerytree(PG_FUNCTION_ARGS);
+
+PG_FUNCTION_INFO_V1(to_tsquery);
+Datum      to_tsquery(PG_FUNCTION_ARGS);
+
+PG_FUNCTION_INFO_V1(to_tsquery_name);
+Datum      to_tsquery_name(PG_FUNCTION_ARGS);
+
+PG_FUNCTION_INFO_V1(to_tsquery_current);
+Datum      to_tsquery_current(PG_FUNCTION_ARGS);
+
+#define END            0
+#define ERR            1
+#define VAL            2
+#define OPR            3
+#define OPEN       4
+#define CLOSE      5
+#define VALTRUE        6           /* for stop words */
+#define VALFALSE   7
+
+/* parser's states */
+#define WAITOPERAND 1
+#define WAITOPERATOR   2
+
+/*
+ * node of query tree, also used
+ * for storing polish notation in parser
+ */
+typedef struct NODE
+{
+   int2        weight;
+   int2        type;
+   int4        val;
+   int2        distance;
+   int2        length;
+   struct NODE *next;
+}  NODE;
+
+typedef struct
+{
+   char       *buf;
+   int4        state;
+   int4        count;
+   /* reverse polish notation in list (for temprorary usage) */
+   NODE       *str;
+   /* number in str */
+   int4        num;
+
+   /* user-friendly operand */
+   int4        lenop;
+   int4        sumlen;
+   char       *op;
+   char       *curop;
+
+   /* state for value's parser */
+   TI_IN_STATE valstate;
+
+   /* tscfg */
+   int cfg_id;
+}  QPRS_STATE;
+
+static char*
+get_weight(char *buf, int2 *weight) {
+   *weight = 0;
+
+   if ( *buf != ':' )
+       return buf;
+
+   buf++;
+   while( *buf ) {
+       switch(tolower(*buf)) {
+           case 'a': *weight |= 1<<3; break; 
+           case 'b': *weight |= 1<<2; break; 
+           case 'c': *weight |= 1<<1; break; 
+           case 'd': *weight |= 1;    break;
+           default: return buf; 
+       }
+       buf++;
+   }
+   
+   return buf;
+}
+
+/*
+ * get token from query string
+ */
+static int4
+gettoken_query(QPRS_STATE * state, int4 *val, int4 *lenval, char **strval, int2 *weight)
+{
+   while (1)
+   {
+       switch (state->state)
+       {
+           case WAITOPERAND:
+               if (*(state->buf) == '!')
+               {
+                   (state->buf)++;
+                   *val = (int4) '!';
+                   return OPR;
+               }
+               else if (*(state->buf) == '(')
+               {
+                   state->count++;
+                   (state->buf)++;
+                   return OPEN;
+               } else if ( *(state->buf) == ':' ) {
+                   elog(ERROR,"Error at start of operand"); 
+               } else if (*(state->buf) != ' ') {
+                   state->valstate.prsbuf = state->buf;
+                   state->state = WAITOPERATOR;
+                   if (gettoken_tsvector(&(state->valstate)))
+                   {
+                       *strval = state->valstate.word;
+                       *lenval = state->valstate.curpos - state->valstate.word;
+                       state->buf = get_weight(state->valstate.prsbuf, weight);
+                       return VAL;
+                   }
+                   else
+                       elog(ERROR, "No operand");
+               }
+               break;
+           case WAITOPERATOR:
+               if (*(state->buf) == '&' || *(state->buf) == '|')
+               {
+                   state->state = WAITOPERAND;
+                   *val = (int4) *(state->buf);
+                   (state->buf)++;
+                   return OPR;
+               }
+               else if (*(state->buf) == ')')
+               {
+                   (state->buf)++;
+                   state->count--;
+                   return (state->count < 0) ? ERR : CLOSE;
+               }
+               else if (*(state->buf) == '\0')
+                   return (state->count) ? ERR : END;
+               else if (*(state->buf) != ' ')
+                   return ERR;
+               break;
+           default:
+               return ERR;
+               break;
+       }
+       (state->buf)++;
+   }
+   return END;
+}
+
+/*
+ * push new one in polish notation reverse view
+ */
+static void
+pushquery(QPRS_STATE * state, int4 type, int4 val, int4 distance, int4 lenval, int2 weight)
+{
+   NODE       *tmp = (NODE *) palloc(sizeof(NODE));
+
+   tmp->weight = weight;
+   tmp->type = type;
+   tmp->val = val;
+   if (distance >= MAXSTRPOS)
+       elog(ERROR, "Value is too big");
+   if (lenval >= MAXSTRLEN)
+       elog(ERROR, "Operand is too long");
+   tmp->distance = distance;
+   tmp->length = lenval;
+   tmp->next = state->str;
+   state->str = tmp;
+   state->num++;
+}
+
+/*
+ * This function is used for tsquery parsing
+ */
+static void
+pushval_asis(QPRS_STATE * state, int type, char *strval, int lenval, int2 weight)
+{
+   if (lenval >= MAXSTRLEN)
+       elog(ERROR, "Word is too long");
+
+   pushquery(state, type, crc32_sz((uint8 *) strval, lenval),
+             state->curop - state->op, lenval, weight);
+
+   while (state->curop - state->op + lenval + 1 >= state->lenop)
+   {
+       int4        tmp = state->curop - state->op;
+
+       state->lenop *= 2;
+       state->op = (char *) repalloc((void *) state->op, state->lenop);
+       state->curop = state->op + tmp;
+   }
+   memcpy((void *) state->curop, (void *) strval, lenval);
+   state->curop += lenval;
+   *(state->curop) = '\0';
+   state->curop++;
+   state->sumlen += lenval + 1;
+   return;
+}
+
+/*
+ * This function is used for morph parsing
+ */
+static void
+pushval_morph(QPRS_STATE * state, int typeval, char *strval, int lenval, int2 weight)
+{
+   int4        count = 0;
+   PRSTEXT         prs;
+
+   prs.lenwords = 32;
+   prs.curwords = 0;
+   prs.pos = 0;
+   prs.words = (WORD *) palloc(sizeof(WORD) * prs.lenwords);
+
+   parsetext_v2(findcfg(state->cfg_id), &prs, strval, lenval);
+
+   for(count=0;count
+       pushval_asis(state, VAL, prs.words[count].word, prs.words[count].len, weight);
+       pfree( prs.words[count].word );
+       if (count)
+           pushquery(state, OPR, (int4) '&', 0, 0, 0 );
+   }   
+   pfree(prs.words);
+
+   /* XXX */
+   if ( prs.curwords==0 ) 
+       pushval_asis(state, VALTRUE, 0, 0, 0);
+}
+
+#define STACKDEPTH 32
+/*
+ * make polish notaion of query
+ */
+static int4
+makepol(QPRS_STATE * state, void (*pushval) (QPRS_STATE *, int, char *, int, int2))
+{
+   int4        val,
+               type;
+   int4        lenval;
+   char       *strval;
+   int4        stack[STACKDEPTH];
+   int4        lenstack = 0;
+   int2        weight;
+
+   while ((type = gettoken_query(state, &val, &lenval, &strval, &weight)) != END)
+   {
+       switch (type)
+       {
+           case VAL:
+               (*pushval) (state, VAL, strval, lenval, weight);
+               while (lenstack && (stack[lenstack - 1] == (int4) '&' ||
+                                   stack[lenstack - 1] == (int4) '!'))
+               {
+                   lenstack--;
+                   pushquery(state, OPR, stack[lenstack], 0, 0, 0);
+               }
+               break;
+           case OPR:
+               if (lenstack && val == (int4) '|')
+                   pushquery(state, OPR, val, 0, 0, 0);
+               else
+               {
+                   if (lenstack == STACKDEPTH)
+                       elog(ERROR, "Stack too short");
+                   stack[lenstack] = val;
+                   lenstack++;
+               }
+               break;
+           case OPEN:
+               if (makepol(state, pushval) == ERR)
+                   return ERR;
+               if (lenstack && (stack[lenstack - 1] == (int4) '&' ||
+                                stack[lenstack - 1] == (int4) '!'))
+               {
+                   lenstack--;
+                   pushquery(state, OPR, stack[lenstack], 0, 0, 0);
+               }
+               break;
+           case CLOSE:
+               while (lenstack)
+               {
+                   lenstack--;
+                   pushquery(state, OPR, stack[lenstack], 0, 0, 0);
+               };
+               return END;
+               break;
+           case ERR:
+           default:
+               elog(ERROR, "Syntax error");
+               return ERR;
+
+       }
+   }
+   while (lenstack)
+   {
+       lenstack--;
+       pushquery(state, OPR, stack[lenstack], 0, 0, 0);
+   };
+   return END;
+}
+
+typedef struct
+{
+   WordEntry  *arrb;
+   WordEntry  *arre;
+   char       *values;
+   char       *operand;
+}  CHKVAL;
+
+/*
+ * compare 2 string values
+ */
+static int4
+ValCompare(CHKVAL * chkval, WordEntry * ptr, ITEM * item)
+{
+   if (ptr->len == item->length)
+       return strncmp(
+                      &(chkval->values[ptr->pos]),
+                      &(chkval->operand[item->distance]),
+                      item->length);
+
+   return (ptr->len > item->length) ? 1 : -1;
+}
+
+/*
+ * check weight info
+ */
+static bool
+checkclass_str(CHKVAL * chkval, WordEntry * val, ITEM * item) {
+   WordEntryPos *ptr = (WordEntryPos*) (chkval->values+val->pos+SHORTALIGN(val->len)+sizeof(uint16));
+   uint16  len = *( (uint16*) (chkval->values+val->pos+SHORTALIGN(val->len)) );
+   while (len--) {
+       if ( item->weight & ( 1<weight ) )
+           return true;
+       ptr++;
+   }
+   return false; 
+}
+
+/*
+ * is there value 'val' in array or not ?
+ */
+static bool
+checkcondition_str(void *checkval, ITEM * val)
+{
+   WordEntry  *StopLow = ((CHKVAL *) checkval)->arrb;
+   WordEntry  *StopHigh = ((CHKVAL *) checkval)->arre;
+   WordEntry  *StopMiddle;
+   int         difference;
+
+   /* Loop invariant: StopLow <= val < StopHigh */
+
+   while (StopLow < StopHigh)
+   {
+       StopMiddle = StopLow + (StopHigh - StopLow) / 2;
+       difference = ValCompare((CHKVAL *) checkval, StopMiddle, val);
+       if (difference == 0)
+           return ( val->weight && StopMiddle->haspos ) ? 
+               checkclass_str((CHKVAL *) checkval,StopMiddle, val) : true;
+       else if (difference < 0)
+           StopLow = StopMiddle + 1;
+       else
+           StopHigh = StopMiddle;
+   }
+
+   return (false);
+}
+
+/*
+ * check for boolean condition
+ */
+bool
+TS_execute(ITEM * curitem, void *checkval, bool calcnot, bool (*chkcond) (void *checkval, ITEM * val))
+{
+   if (curitem->type == VAL)
+       return (*chkcond) (checkval, curitem);
+   else if (curitem->val == (int4) '!')
+   {
+       return (calcnot) ?
+           ((TS_execute(curitem + 1, checkval, calcnot, chkcond)) ? false : true)
+           : true;
+   }
+   else if (curitem->val == (int4) '&')
+   {
+       if (TS_execute(curitem + curitem->left, checkval, calcnot, chkcond))
+           return TS_execute(curitem + 1, checkval, calcnot, chkcond);
+       else
+           return false;
+   }
+   else
+   {                           /* |-operator */
+       if (TS_execute(curitem + curitem->left, checkval, calcnot, chkcond))
+           return true;
+       else
+           return TS_execute(curitem + 1, checkval, calcnot, chkcond);
+   }
+   return false;
+}
+
+/*
+ * boolean operations
+ */
+Datum
+rexectsq(PG_FUNCTION_ARGS)
+{
+   return DirectFunctionCall2(
+                              exectsq,
+                              PG_GETARG_DATUM(1),
+                              PG_GETARG_DATUM(0)
+       );
+}
+
+Datum
+exectsq(PG_FUNCTION_ARGS)
+{
+   tsvector       *val = (tsvector *) DatumGetPointer(PG_DETOAST_DATUM(PG_GETARG_DATUM(0)));
+   QUERYTYPE  *query = (QUERYTYPE *) DatumGetPointer(PG_DETOAST_DATUM(PG_GETARG_DATUM(1)));
+   CHKVAL      chkval;
+   bool        result;
+
+   if (!val->size || !query->size)
+   {
+       PG_FREE_IF_COPY(val, 0);
+       PG_FREE_IF_COPY(query, 1);
+       PG_RETURN_BOOL(false);
+   }
+
+   chkval.arrb = ARRPTR(val);
+   chkval.arre = chkval.arrb + val->size;
+   chkval.values = STRPTR(val);
+   chkval.operand = GETOPERAND(query);
+   result = TS_execute(
+                    GETQUERY(query),
+                    &chkval,
+                    true,
+                    checkcondition_str
+       );
+
+   PG_FREE_IF_COPY(val, 0);
+   PG_FREE_IF_COPY(query, 1);
+   PG_RETURN_BOOL(result);
+}
+
+/*
+ * find left operand in polish notation view
+ */
+static void
+findoprnd(ITEM * ptr, int4 *pos)
+{
+#ifdef BS_DEBUG
+   elog(DEBUG3, (ptr[*pos].type == OPR) ?
+        "%d  %c" : "%d  %d ", *pos, ptr[*pos].val);
+#endif
+   if (ptr[*pos].type == VAL || ptr[*pos].type == VALTRUE)
+   {
+       ptr[*pos].left = 0;
+       (*pos)++;
+   }
+   else if (ptr[*pos].val == (int4) '!')
+   {
+       ptr[*pos].left = 1;
+       (*pos)++;
+       findoprnd(ptr, pos);
+   }
+   else
+   {
+       ITEM       *curitem = &ptr[*pos];
+       int4        tmp = *pos;
+
+       (*pos)++;
+       findoprnd(ptr, pos);
+       curitem->left = *pos - tmp;
+       findoprnd(ptr, pos);
+   }
+}
+
+
+/*
+ * input
+ */
+static QUERYTYPE *
+queryin(char *buf, void (*pushval) (QPRS_STATE *, int, char *, int, int2), int cfg_id)
+{
+   QPRS_STATE  state;
+   int4        i;
+   QUERYTYPE  *query;
+   int4        commonlen;
+   ITEM       *ptr;
+   NODE       *tmp;
+   int4        pos = 0;
+
+#ifdef BS_DEBUG
+   char        pbuf[16384],
+              *cur;
+#endif
+
+   /* init state */
+   state.buf = buf;
+   state.state = WAITOPERAND;
+   state.count = 0;
+   state.num = 0;
+   state.str = NULL;
+   state.cfg_id=cfg_id;
+
+   /* init value parser's state */
+   state.valstate.oprisdelim = true;
+   state.valstate.len = 32;
+   state.valstate.word = (char *) palloc(state.valstate.len);
+
+   /* init list of operand */
+   state.sumlen = 0;
+   state.lenop = 64;
+   state.curop = state.op = (char *) palloc(state.lenop);
+   *(state.curop) = '\0';
+
+   /* parse query & make polish notation (postfix, but in reverse order) */
+   makepol(&state, pushval);
+   pfree(state.valstate.word);
+   if (!state.num)
+       elog(ERROR, "Empty query");
+
+   /* make finish struct */
+   commonlen = COMPUTESIZE(state.num, state.sumlen);
+   query = (QUERYTYPE *) palloc(commonlen);
+   query->len = commonlen;
+   query->size = state.num;
+   ptr = GETQUERY(query);
+
+   /* set item in polish notation */
+   for (i = 0; i < state.num; i++)
+   {
+       ptr[i].weight = state.str->weight;
+       ptr[i].type = state.str->type;
+       ptr[i].val = state.str->val;
+       ptr[i].distance = state.str->distance;
+       ptr[i].length = state.str->length;
+       tmp = state.str->next;
+       pfree(state.str);
+       state.str = tmp;
+   }
+
+   /* set user friendly-operand view */
+   memcpy((void *) GETOPERAND(query), (void *) state.op, state.sumlen);
+   pfree(state.op);
+
+   /* set left operand's position for every operator */
+   pos = 0;
+   findoprnd(ptr, &pos);
+
+#ifdef BS_DEBUG
+   cur = pbuf;
+   *cur = '\0';
+   for (i = 0; i < query->size; i++)
+   {
+       if (ptr[i].type == OPR)
+           sprintf(cur, "%c(%d) ", ptr[i].val, ptr[i].left);
+       else
+           sprintf(cur, "%d(%s) ", ptr[i].val, GETOPERAND(query) + ptr[i].distance);
+       cur = strchr(cur, '\0');
+   }
+   elog(DEBUG3, "POR: %s", pbuf);
+#endif
+
+   return query;
+}
+
+/*
+ * in without morphology
+ */
+Datum
+tsquery_in(PG_FUNCTION_ARGS)
+{
+   PG_RETURN_POINTER(queryin((char *) PG_GETARG_POINTER(0), pushval_asis, 0));
+}
+
+/*
+ * out function
+ */
+typedef struct
+{
+   ITEM       *curpol;
+   char       *buf;
+   char       *cur;
+   char       *op;
+   int4        buflen;
+}  INFIX;
+
+#define RESIZEBUF(inf,addsize) \
+while( ( inf->cur - inf->buf ) + addsize + 1 >= inf->buflen ) \
+{ \
+   int4 len = inf->cur - inf->buf; \
+   inf->buflen *= 2; \
+   inf->buf = (char*) repalloc( (void*)inf->buf, inf->buflen ); \
+   inf->cur = inf->buf + len; \
+}
+
+/*
+ * recursive walk on tree and print it in
+ * infix (human-readable) view
+ */
+static void
+infix(INFIX * in, bool first)
+{
+   if (in->curpol->type == VAL)
+   {
+       char       *op = in->op + in->curpol->distance;
+
+       RESIZEBUF(in, in->curpol->length * 2 + 2 + 5);
+       *(in->cur) = '\'';
+       in->cur++;
+       while (*op)
+       {
+           if (*op == '\'')
+           {
+               *(in->cur) = '\\';
+               in->cur++;
+           }
+           *(in->cur) = *op;
+           op++;
+           in->cur++;
+       }
+       *(in->cur) = '\'';
+       in->cur++;
+       if ( in->curpol->weight ) {
+           *(in->cur) = ':'; in->cur++;
+           if ( in->curpol->weight & (1<<3) ) { *(in->cur) = 'A'; in->cur++; }
+           if ( in->curpol->weight & (1<<2) ) { *(in->cur) = 'B'; in->cur++; }
+           if ( in->curpol->weight & (1<<1) ) { *(in->cur) = 'C'; in->cur++; }
+           if ( in->curpol->weight & 1 )      { *(in->cur) = 'D'; in->cur++; }
+       }
+       *(in->cur) = '\0';
+       in->curpol++;
+   }
+   else if (in->curpol->val == (int4) '!')
+   {
+       bool        isopr = false;
+
+       RESIZEBUF(in, 1);
+       *(in->cur) = '!';
+       in->cur++;
+       *(in->cur) = '\0';
+       in->curpol++;
+       if (in->curpol->type == OPR)
+       {
+           isopr = true;
+           RESIZEBUF(in, 2);
+           sprintf(in->cur, "( ");
+           in->cur = strchr(in->cur, '\0');
+       }
+       infix(in, isopr);
+       if (isopr)
+       {
+           RESIZEBUF(in, 2);
+           sprintf(in->cur, " )");
+           in->cur = strchr(in->cur, '\0');
+       }
+   }
+   else
+   {
+       int4        op = in->curpol->val;
+       INFIX       nrm;
+
+       in->curpol++;
+       if (op == (int4) '|' && !first)
+       {
+           RESIZEBUF(in, 2);
+           sprintf(in->cur, "( ");
+           in->cur = strchr(in->cur, '\0');
+       }
+
+       nrm.curpol = in->curpol;
+       nrm.op = in->op;
+       nrm.buflen = 16;
+       nrm.cur = nrm.buf = (char *) palloc(sizeof(char) * nrm.buflen);
+
+       /* get right operand */
+       infix(&nrm, false);
+
+       /* get & print left operand */
+       in->curpol = nrm.curpol;
+       infix(in, false);
+
+       /* print operator & right operand */
+       RESIZEBUF(in, 3 + (nrm.cur - nrm.buf));
+       sprintf(in->cur, " %c %s", op, nrm.buf);
+       in->cur = strchr(in->cur, '\0');
+       pfree(nrm.buf);
+
+       if (op == (int4) '|' && !first)
+       {
+           RESIZEBUF(in, 2);
+           sprintf(in->cur, " )");
+           in->cur = strchr(in->cur, '\0');
+       }
+   }
+}
+
+
+Datum
+tsquery_out(PG_FUNCTION_ARGS)
+{
+   QUERYTYPE  *query = (QUERYTYPE *) DatumGetPointer(PG_DETOAST_DATUM(PG_GETARG_DATUM(0)));
+   INFIX       nrm;
+
+   if (query->size == 0)
+   {
+       char       *b = palloc(1);
+
+       *b = '\0';
+       PG_RETURN_POINTER(b);
+   }
+   nrm.curpol = GETQUERY(query);
+   nrm.buflen = 32;
+   nrm.cur = nrm.buf = (char *) palloc(sizeof(char) * nrm.buflen);
+   *(nrm.cur) = '\0';
+   nrm.op = GETOPERAND(query);
+   infix(&nrm, true);
+
+   PG_FREE_IF_COPY(query, 0);
+   PG_RETURN_POINTER(nrm.buf);
+}
+
+/*
+ * debug function, used only for view query
+ * which will be executed in non-leaf pages in index
+ */
+Datum
+tsquerytree(PG_FUNCTION_ARGS)
+{
+   QUERYTYPE  *query = (QUERYTYPE *) DatumGetPointer(PG_DETOAST_DATUM(PG_GETARG_DATUM(0)));
+   INFIX       nrm;
+   text       *res;
+   ITEM       *q;
+   int4        len;
+
+
+   if (query->size == 0)
+   {
+       res = (text *) palloc(VARHDRSZ);
+       VARATT_SIZEP(res) = VARHDRSZ;
+       PG_RETURN_POINTER(res);
+   }
+
+   q = clean_NOT_v2(GETQUERY(query), &len);
+
+   if (!q)
+   {
+       res = (text *) palloc(1 + VARHDRSZ);
+       VARATT_SIZEP(res) = 1 + VARHDRSZ;
+       *((char *) VARDATA(res)) = 'T';
+   }
+   else
+   {
+       nrm.curpol = q;
+       nrm.buflen = 32;
+       nrm.cur = nrm.buf = (char *) palloc(sizeof(char) * nrm.buflen);
+       *(nrm.cur) = '\0';
+       nrm.op = GETOPERAND(query);
+       infix(&nrm, true);
+
+       res = (text *) palloc(nrm.cur - nrm.buf + VARHDRSZ);
+       VARATT_SIZEP(res) = nrm.cur - nrm.buf + VARHDRSZ;
+       strncpy(VARDATA(res), nrm.buf, nrm.cur - nrm.buf);
+       pfree(q);
+   }
+
+   PG_FREE_IF_COPY(query, 0);
+
+   PG_RETURN_POINTER(res);
+}
+
+Datum
+to_tsquery(PG_FUNCTION_ARGS) {
+   text    *in = PG_GETARG_TEXT_P(1);
+   char *str;
+   QUERYTYPE  *query;
+   ITEM       *res;
+   int4        len;
+
+   str=text2char(in);
+   PG_FREE_IF_COPY(in,1);
+
+   query = queryin(str, pushval_morph, PG_GETARG_INT32(0));
+   res = clean_fakeval_v2(GETQUERY(query), &len);
+   if (!res)
+   {
+       query->len = HDRSIZEQT;
+       query->size = 0;
+       PG_RETURN_POINTER(query);
+   }
+   memcpy((void *) GETQUERY(query), (void *) res, len * sizeof(ITEM));
+   pfree(res);
+   PG_RETURN_POINTER(query);
+}
+
+Datum
+to_tsquery_name(PG_FUNCTION_ARGS) {
+   text *name=PG_GETARG_TEXT_P(0);
+   Datum res= DirectFunctionCall2(
+       to_tsquery,
+       Int32GetDatum( name2id_cfg(name) ),
+       PG_GETARG_DATUM(1)
+   );
+   
+   PG_FREE_IF_COPY(name,1);
+   PG_RETURN_DATUM(res);
+}
+
+Datum
+to_tsquery_current(PG_FUNCTION_ARGS) {
+   PG_RETURN_DATUM( DirectFunctionCall2(
+       to_tsquery,
+       Int32GetDatum( get_currcfg() ),
+       PG_GETARG_DATUM(0)
+   ));
+}
+
+


diff --git a/contrib/tsearch2/query.h b/contrib/tsearch2/query.h

new file mode 100644 (file)

index 0000000..c0715a2


--- /dev/null
+++ b/contrib/tsearch2/query.h
@@ -0,0 +1,55 @@
+#ifndef __QUERY_H__
+#define __QUERY_H__
+/*
+#define BS_DEBUG
+*/
+
+
+/*
+ * item in polish notation with back link
+ * to left operand
+ */
+typedef struct ITEM
+{
+   int8        type;
+   int8        weight;
+   int2        left;
+   int4        val;
+   /* user-friendly value, must correlate with WordEntry */
+   uint32  
+       unused:1,
+       length:11,
+       distance:20;
+}  ITEM;
+
+/*
+ *Storage:
+ * (len)(size)(array of ITEM)(array of operand in user-friendly form)
+ */
+typedef struct
+{
+   int4        len;
+   int4        size;
+   char        data[1];
+}  QUERYTYPE;
+
+#define HDRSIZEQT  ( 2*sizeof(int4) )
+#define COMPUTESIZE(size,lenofoperand) ( HDRSIZEQT + size * sizeof(ITEM) + lenofoperand )
+#define GETQUERY(x)  (ITEM*)( (char*)(x)+HDRSIZEQT )
+#define GETOPERAND(x)  ( (char*)GETQUERY(x) + ((QUERYTYPE*)x)->size * sizeof(ITEM) )
+
+#define ISOPERATOR(x) ( (x)=='!' || (x)=='&' || (x)=='|' || (x)=='(' || (x)==')' )
+
+#define END                0
+#define ERR                1
+#define VAL                2
+#define OPR                3
+#define OPEN           4
+#define CLOSE          5
+#define VALTRUE            6       /* for stop words */
+#define VALFALSE       7
+
+bool TS_execute(ITEM * curitem, void *checkval,
+       bool calcnot, bool (*chkcond) (void *checkval, ITEM * val));
+
+#endif


diff --git a/contrib/tsearch2/rank.c b/contrib/tsearch2/rank.c

new file mode 100644 (file)

index 0000000..b73f400


--- /dev/null
+++ b/contrib/tsearch2/rank.c
@@ -0,0 +1,591 @@
+/*
+ * Relevation
+ * Teodor Sigaev 
+ */
+#include "postgres.h"
+#include 
+
+#include "access/gist.h"
+#include "access/itup.h"
+#include "utils/elog.h"
+#include "utils/palloc.h"
+#include "utils/builtins.h"
+#include "fmgr.h"
+#include "funcapi.h"
+#include "storage/bufpage.h"
+#include "executor/spi.h"
+#include "commands/trigger.h"
+#include "nodes/pg_list.h"
+#include "catalog/namespace.h"
+
+#include "utils/array.h"
+
+#include "tsvector.h"
+#include "query.h"
+#include "common.h"
+
+PG_FUNCTION_INFO_V1(rank);
+Datum      rank(PG_FUNCTION_ARGS);
+
+PG_FUNCTION_INFO_V1(rank_def);
+Datum      rank_def(PG_FUNCTION_ARGS);
+
+PG_FUNCTION_INFO_V1(rank_cd);
+Datum      rank_cd(PG_FUNCTION_ARGS);
+
+PG_FUNCTION_INFO_V1(rank_cd_def);
+Datum      rank_cd_def(PG_FUNCTION_ARGS);
+
+PG_FUNCTION_INFO_V1(get_covers);
+Datum      get_covers(PG_FUNCTION_ARGS);
+
+static float weights[]={0.1, 0.2, 0.4, 1.0};
+
+#define wpos(wep)  ( w[ ((WordEntryPos*)(wep))->weight ] )
+
+#define DEF_NORM_METHOD    0
+
+/*
+ * Returns a weight of a word collocation
+ */
+static float4 word_distance ( int4 w ) {
+   if ( w>100 )
+   return 1e-30;
+
+   return 1.0/(1.005+0.05*exp( ((float4)w)/1.5-2) );
+}
+
+static int
+cnt_length( tsvector *t ) {
+   WordEntry   *ptr=ARRPTR(t), *end=(WordEntry*)STRPTR(t);
+   int len = 0, clen;
+
+   while(ptr < end) {
+       if ( (clen=POSDATALEN(t, ptr)) == 0 )
+           len += 1;
+       else
+           len += clen;
+       ptr++;
+   }
+
+   return len;
+}
+
+static int4
+WordECompareITEM(char *eval, char *qval, WordEntry * ptr, ITEM * item) {
+        if (ptr->len == item->length)
+                return strncmp(
+                                           eval + ptr->pos,
+                                           qval + item->distance,
+                                           item->length);
+
+        return (ptr->len > item->length) ? 1 : -1;
+}
+
+static WordEntry*
+find_wordentry(tsvector *t, QUERYTYPE *q, ITEM *item) {
+        WordEntry  *StopLow = ARRPTR(t);
+        WordEntry  *StopHigh = (WordEntry*)STRPTR(t);
+        WordEntry  *StopMiddle;
+        int                     difference;
+
+        /* Loop invariant: StopLow <= item < StopHigh */
+
+        while (StopLow < StopHigh)
+        {
+                StopMiddle = StopLow + (StopHigh - StopLow) / 2;
+                difference = WordECompareITEM(STRPTR(t), GETOPERAND(q), StopMiddle, item);
+                if (difference == 0)
+                        return StopMiddle;
+                else if (difference < 0)
+                        StopLow = StopMiddle + 1;
+                else
+                        StopHigh = StopMiddle;
+        }
+
+        return NULL;
+}
+
+static WordEntryPos    POSNULL[]={
+   {0,0},
+   {0,MAXENTRYPOS-1}
+};
+
+static float
+calc_rank_and(float *w, tsvector *t, QUERYTYPE *q) {
+   uint16 **pos=(uint16**)palloc(sizeof(uint16*) * q->size);
+   int i,k,l,p;
+   WordEntry *entry;
+   WordEntryPos    *post,*ct;
+   int4    dimt,lenct,dist;
+   float res=-1.0;
+   ITEM    *item=GETQUERY(q);
+
+   memset(pos,0,sizeof(uint16**) * q->size);
+   *(uint16*)POSNULL = lengthof(POSNULL)-1;
+
+   for(i=0; isize; i++) {
+       
+       if ( item[i].type != VAL )
+           continue;
+
+       entry=find_wordentry(t,q,&(item[i]));
+       if ( !entry )
+           continue;
+
+       if ( entry->haspos )
+           pos[i] = (uint16*)_POSDATAPTR(t,entry);
+       else
+           pos[i] = (uint16*)POSNULL;
+
+
+       dimt = *(uint16*)(pos[i]);
+       post = (WordEntryPos*)(pos[i]+1);
+       for( k=0; k
+           if ( !pos[k] ) continue;
+           lenct = *(uint16*)(pos[k]);
+           ct = (WordEntryPos*)(pos[k]+1);
+           for(l=0; l
+               for(p=0; p
+                   dist = abs( post[l].pos - ct[p].pos );
+                   if ( dist || (dist==0 && (pos[i]==(uint16*)POSNULL || pos[k]==(uint16*)POSNULL) ) ) {
+                       float curw; 
+                       if ( !dist ) dist=MAXENTRYPOS;  
+                       curw= sqrt( wpos(&(post[l])) * wpos( &(ct[p]) ) * word_distance(dist) );
+                       res = ( res < 0 ) ? curw : 1.0 - ( 1.0 - res ) * ( 1.0 - curw );
+                   }
+               }
+           }
+       }
+   }
+   pfree(pos);
+   return res; 
+}
+
+static float
+calc_rank_or(float *w, tsvector *t, QUERYTYPE *q) {
+   WordEntry *entry;
+   WordEntryPos    *post;
+   int4    dimt,j,i;
+   float res=-1.0;
+   ITEM    *item=GETQUERY(q);
+
+   *(uint16*)POSNULL = lengthof(POSNULL)-1;
+
+   for(i=0; isize; i++) {
+       if ( item[i].type != VAL )
+           continue;
+
+       entry=find_wordentry(t,q,&(item[i]));
+       if ( !entry )
+           continue;
+
+       if ( entry->haspos ) {
+           dimt = POSDATALEN(t,entry);
+           post = POSDATAPTR(t,entry);
+       } else {
+           dimt = *(uint16*)POSNULL;
+           post = POSNULL+1;
+       }
+
+       for(j=0;j
+           if ( res < 0 )
+               res = wpos( &(post[j]) );
+           else
+               res = 1.0 - ( 1.0-res ) * ( 1.0-wpos( &(post[j]) ) );
+       }
+   }
+   return res;
+}
+
+static float
+calc_rank(float *w, tsvector *t, QUERYTYPE *q, int4 method) {
+   ITEM *item = GETQUERY(q);
+   float res=0.0;
+
+   if (!t->size || !q->size)
+       return 0.0;
+
+   res = ( item->type != VAL && item->val == (int4) '&' ) ?
+       calc_rank_and(w,t,q) : calc_rank_or(w,t,q);
+
+   if ( res < 0 )
+       res = 1e-20;
+
+        switch(method) {
+       case 0: break;
+       case 1: res /= log((float)cnt_length(t)); break;
+       case 2: res /= (float)cnt_length(t); break;
+       default:
+       elog(ERROR,"Unknown normalization method: %d",method);
+        }
+
+   return res;
+}
+
+Datum
+rank(PG_FUNCTION_ARGS) {
+   ArrayType  *win = (ArrayType *) PG_DETOAST_DATUM(PG_GETARG_DATUM(0));
+   tsvector       *txt = (tsvector *) PG_DETOAST_DATUM(PG_GETARG_DATUM(1));
+   QUERYTYPE  *query = (QUERYTYPE *) PG_DETOAST_DATUM(PG_GETARG_DATUM(2));
+   int method=DEF_NORM_METHOD;
+   float res=0.0;
+   float ws[ lengthof(weights) ];
+   int i;
+
+   if ( ARR_NDIM(win) != 1 ) 
+       elog(ERROR,"Array of weight is not one dimentional");
+   if ( ARRNELEMS(win) < lengthof(weights) )
+        elog(ERROR,"Array of weight is too short");
+
+   for(i=0;i
+       ws[ i ] = ( ((float4*)ARR_DATA_PTR(win))[i] >= 0 ) ? ((float4*)ARR_DATA_PTR(win))[i] : weights[i];
+       if ( ws[ i ] > 1.0 ) 
+           elog(ERROR,"Weight out of range");
+   } 
+
+   if ( PG_NARGS() == 4 )
+       method=PG_GETARG_INT32(3);
+
+   res=calc_rank(ws, txt, query, method); 
+       
+   PG_FREE_IF_COPY(win, 0);
+   PG_FREE_IF_COPY(txt, 1);
+   PG_FREE_IF_COPY(query, 2);
+   PG_RETURN_FLOAT4(res);
+}
+
+Datum
+rank_def(PG_FUNCTION_ARGS) {
+   tsvector       *txt = (tsvector *) PG_DETOAST_DATUM(PG_GETARG_DATUM(0));
+   QUERYTYPE  *query = (QUERYTYPE *) PG_DETOAST_DATUM(PG_GETARG_DATUM(1));
+   float res=0.0;
+   int method=DEF_NORM_METHOD;
+
+   if ( PG_NARGS() == 3 )
+       method=PG_GETARG_INT32(2);
+
+   res=calc_rank(weights, txt, query, method); 
+       
+   PG_FREE_IF_COPY(txt, 0);
+   PG_FREE_IF_COPY(query, 1);
+   PG_RETURN_FLOAT4(res);
+}
+
+
+typedef struct {
+   ITEM    *item;
+   int32   pos;
+} DocRepresentation;
+
+static int
+compareDocR(const void *a, const void *b) {
+   if ( ((DocRepresentation *) a)->pos == ((DocRepresentation *) b)->pos )
+       return 1;
+   return ( ((DocRepresentation *) a)->pos > ((DocRepresentation *) b)->pos ) ? 1 : -1;
+}
+
+
+typedef struct {
+   DocRepresentation *doc;
+   int len;
+}  ChkDocR;
+
+static bool
+checkcondition_DR(void *checkval, ITEM *val) {
+   DocRepresentation *ptr = ((ChkDocR*)checkval)->doc;
+
+   while( ptr - ((ChkDocR*)checkval)->doc < ((ChkDocR*)checkval)->len ) {
+       if ( val == ptr->item )
+           return true;
+       ptr++;
+   }   
+
+   return false;
+}
+
+
+static bool
+Cover(DocRepresentation *doc, int len, QUERYTYPE *query, int *pos, int *p, int *q) {
+   int i;
+   DocRepresentation   *ptr,*f=(DocRepresentation*)0xffffffff;
+   ITEM    *item=GETQUERY(query);
+   int lastpos=*pos;
+   int oldq=*q;
+
+   *p=0x7fffffff;
+   *q=0;
+
+   for(i=0; isize; i++) {
+       if ( item->type != VAL ) {
+           item++;
+           continue;
+       }
+       ptr = doc + *pos;
+
+       while(ptr-doc
+           if ( ptr->item == item ) {
+               if ( ptr->pos > *q ) {
+                   *q = ptr->pos;
+                   lastpos= ptr - doc;
+               } 
+               break;
+           } 
+           ptr++;
+       }
+
+       item++;
+   }
+
+   if (*q==0 )
+       return false;
+
+   if (*q==oldq) { /* already check this pos */
+       (*pos)++;
+       return Cover(doc, len, query, pos,p,q);
+   } 
+
+   item=GETQUERY(query);
+   for(i=0; isize; i++) {
+       if ( item->type != VAL ) {
+           item++;
+           continue;
+       }
+       ptr = doc + lastpos;
+
+       while(ptr>=doc+*pos) {
+           if ( ptr->item == item ) {
+               if ( ptr->pos < *p ) {
+                   *p = ptr->pos;
+                   f=ptr;
+               }
+               break;
+           }
+           ptr--;
+       }
+       item++;
+   }
+ 
+   if ( *p<=*q ) {
+       ChkDocR ch = { f, (doc + lastpos)-f+1 };
+       *pos = f-doc+1;
+       if ( TS_execute(GETQUERY(query), &ch, false, checkcondition_DR) ) { 
+ /*elog(NOTICE,"OP:%d NP:%d P:%d Q:%d", *pos, lastpos, *p, *q);*/ 
+           return true;
+       } else
+           return Cover(doc, len, query, pos,p,q); 
+   }
+ 
+   return false;
+}
+
+static DocRepresentation*
+get_docrep(tsvector     *txt, QUERYTYPE  *query, int *doclen) {
+   ITEM    *item=GETQUERY(query);
+   WordEntry *entry;
+   WordEntryPos    *post;
+   int4    dimt,j,i;
+   int len=query->size*4,cur=0;
+   DocRepresentation *doc;
+
+   *(uint16*)POSNULL = lengthof(POSNULL)-1;
+   doc = (DocRepresentation*)palloc(sizeof(DocRepresentation)*len);
+   for(i=0; isize; i++) {
+       if ( item[i].type != VAL )
+           continue;
+
+       entry=find_wordentry(txt,query,&(item[i]));
+       if ( !entry )
+           continue;
+
+       if ( entry->haspos ) {
+           dimt = POSDATALEN(txt,entry);
+           post = POSDATAPTR(txt,entry);
+       } else {
+           dimt = *(uint16*)POSNULL;
+           post = POSNULL+1;
+       }
+
+       while( cur+dimt >= len ) {
+           len*=2;
+           doc = (DocRepresentation*)repalloc(doc,sizeof(DocRepresentation)*len);
+       }
+
+       for(j=0;j
+           doc[cur].item=&(item[i]);
+           doc[cur].pos=post[j].pos;
+           cur++;
+       }
+   }
+
+   *doclen=cur;
+   
+   if ( cur>0 ) {
+       if ( cur>1 ) 
+           qsort((void *) doc, cur, sizeof(DocRepresentation), compareDocR);
+       return doc;
+   }
+   
+   pfree(doc);
+   return NULL;
+}
+
+
+Datum
+rank_cd(PG_FUNCTION_ARGS) {
+   int K = PG_GETARG_INT32(0);
+   tsvector       *txt = (tsvector *) PG_DETOAST_DATUM(PG_GETARG_DATUM(1));
+   QUERYTYPE  *query = (QUERYTYPE *) PG_DETOAST_DATUM(PG_GETARG_DATUM(2));
+   int method=DEF_NORM_METHOD;
+   DocRepresentation   *doc;
+   float   res=0.0;
+   int p=0,q=0,len,cur;
+
+   doc = get_docrep(txt, query, &len);
+   if ( !doc ) {
+       PG_FREE_IF_COPY(txt, 1);
+       PG_FREE_IF_COPY(query, 2);
+       PG_RETURN_FLOAT4(0.0);
+   }
+
+   cur=0;
+   if (K<=0)
+       K=4;    
+   while( Cover(doc, len, query, &cur, &p, &q) ) 
+       res += ( q-p+1 > K ) ? ((float)K)/((float)(q-p+1)) : 1.0;
+
+   if ( PG_NARGS() == 4 )
+       method=PG_GETARG_INT32(3);
+
+        switch(method) {
+       case 0: break;
+       case 1: res /= log((float)cnt_length(txt)); break;
+       case 2: res /= (float)cnt_length(txt); break;
+       default:
+       elog(ERROR,"Unknown normalization method: %d",method);
+        }
+
+   pfree(doc);
+   PG_FREE_IF_COPY(txt, 1);
+   PG_FREE_IF_COPY(query, 2);
+
+   PG_RETURN_FLOAT4(res);
+}
+
+
+Datum
+rank_cd_def(PG_FUNCTION_ARGS) {
+   PG_RETURN_DATUM( DirectFunctionCall4(   
+       rank_cd,
+       Int32GetDatum(-1),
+       PG_GETARG_DATUM(0),
+       PG_GETARG_DATUM(1),
+       ( PG_NARGS() == 3 ) ? PG_GETARG_DATUM(2) : Int32GetDatum(DEF_NORM_METHOD)
+   )); 
+}
+
+/**************debug*************/
+
+typedef struct {
+   char    *w;
+   int2    len;
+   int2    pos;
+   int2    start;
+   int2    finish;
+} DocWord;
+
+static int
+compareDocWord(const void *a, const void *b) {
+   if ( ((DocWord *) a)->pos == ((DocWord *) b)->pos )
+       return 1;
+   return ( ((DocWord *) a)->pos > ((DocWord *) b)->pos ) ? 1 : -1;
+}
+
+
+Datum 
+get_covers(PG_FUNCTION_ARGS) {
+   tsvector     *txt = (tsvector *) PG_DETOAST_DATUM(PG_GETARG_DATUM(0));
+   QUERYTYPE  *query = (QUERYTYPE *) PG_DETOAST_DATUM(PG_GETARG_DATUM(1));
+   WordEntry       *pptr=ARRPTR(txt);
+   int i,dlen=0,j,cur=0,len=0,rlen;
+   DocWord *dw,*dwptr;
+   text    *out;
+   char *cptr;
+   DocRepresentation *doc;
+   int pos=0,p,q,olddwpos=0;
+   int ncover=1;
+
+   doc = get_docrep(txt, query, &rlen);
+
+   if ( !doc ) {
+       out=palloc(VARHDRSZ);
+       VARATT_SIZEP(out) = VARHDRSZ;
+       PG_FREE_IF_COPY(txt,0);
+       PG_FREE_IF_COPY(query,1);
+       PG_RETURN_POINTER(out);
+   }
+
+   for(i=0;isize;i++) {
+       if (!pptr[i].haspos)
+           elog(ERROR,"No pos info");
+        dlen += POSDATALEN(txt,&(pptr[i]));
+   }
+
+   dwptr=dw=palloc(sizeof(DocWord)*dlen);
+   memset(dw,0,sizeof(DocWord)*dlen);
+
+   for(i=0;isize;i++) {
+       WordEntryPos    *posdata = POSDATAPTR(txt,&(pptr[i]));
+       for(j=0;j
+           dw[cur].w=STRPTR(txt)+pptr[i].pos;  
+           dw[cur].len=pptr[i].len;    
+           dw[cur].pos=posdata[j].pos;
+           cur++;
+       }
+       len+=(pptr[i].len + 1) * (int)POSDATALEN(txt,&(pptr[i]));
+   }
+   qsort((void *) dw, dlen, sizeof(DocWord), compareDocWord);
+
+   while( Cover(doc, rlen, query, &pos, &p, &q) ) {
+       dwptr=dw+olddwpos;
+       while(dwptr->pos < p && dwptr-dw
+           dwptr++;
+       olddwpos=dwptr-dw;
+       dwptr->start=ncover;
+       while(dwptr->pos < q+1 && dwptr-dw
+           dwptr++;
+       (dwptr-1)->finish=ncover;
+       len+= 4 /* {}+two spaces */ + 2*16 /*numbers*/;
+       ncover++; 
+   } 
+   
+   out=palloc(VARHDRSZ+len);
+   cptr=((char*)out)+VARHDRSZ;
+   dwptr=dw;
+
+   while( dwptr-dw < dlen) {
+       if ( dwptr->start ) {
+           sprintf(cptr,"{%d ",dwptr->start);
+           cptr=strchr(cptr,'\0');
+       }
+       memcpy(cptr,dwptr->w,dwptr->len);
+       cptr+=dwptr->len;
+       *cptr=' ';
+       cptr++;
+       if ( dwptr->finish ) { 
+           sprintf(cptr,"}%d ",dwptr->finish);
+           cptr=strchr(cptr,'\0');
+       }
+       dwptr++;
+   }   
+
+   VARATT_SIZEP(out) = cptr - ((char*)out);
+   
+   pfree(dw);
+   pfree(doc);
+
+   PG_FREE_IF_COPY(txt,0);
+   PG_FREE_IF_COPY(query,1);
+   PG_RETURN_POINTER(out);
+}
+


diff --git a/contrib/tsearch2/rewrite.c b/contrib/tsearch2/rewrite.c

new file mode 100644 (file)

index 0000000..d5bc0f6


--- /dev/null
+++ b/contrib/tsearch2/rewrite.c
@@ -0,0 +1,292 @@
+/*
+ * Rewrite routines of query tree
+ * Teodor Sigaev 
+ */
+
+#include "postgres.h"
+
+#include 
+
+#include "access/gist.h"
+#include "access/itup.h"
+#include "access/rtree.h"
+#include "utils/elog.h"
+#include "utils/palloc.h"
+#include "utils/array.h"
+#include "utils/builtins.h"
+#include "storage/bufpage.h"
+
+#include "query.h"
+#include "rewrite.h"
+
+typedef struct NODE
+{
+   struct NODE *left;
+   struct NODE *right;
+   ITEM       *valnode;
+}  NODE;
+
+/*
+ * make query tree from plain view of query
+ */
+static NODE *
+maketree(ITEM * in)
+{
+   NODE       *node = (NODE *) palloc(sizeof(NODE));
+
+   node->valnode = in;
+   node->right = node->left = NULL;
+   if (in->type == OPR)
+   {
+       node->right = maketree(in + 1);
+       if (in->val != (int4) '!')
+           node->left = maketree(in + in->left);
+   }
+   return node;
+}
+
+typedef struct
+{
+   ITEM       *ptr;
+   int4        len;
+   int4        cur;
+}  PLAINTREE;
+
+static void
+plainnode(PLAINTREE * state, NODE * node)
+{
+   if (state->cur == state->len)
+   {
+       state->len *= 2;
+       state->ptr = (ITEM *) repalloc((void *) state->ptr, state->len * sizeof(ITEM));
+   }
+   memcpy((void *) &(state->ptr[state->cur]), (void *) node->valnode, sizeof(ITEM));
+   if (node->valnode->type == VAL)
+       state->cur++;
+   else if (node->valnode->val == (int4) '!')
+   {
+       state->ptr[state->cur].left = 1;
+       state->cur++;
+       plainnode(state, node->right);
+   }
+   else
+   {
+       int4        cur = state->cur;
+
+       state->cur++;
+       plainnode(state, node->right);
+       state->ptr[cur].left = state->cur - cur;
+       plainnode(state, node->left);
+   }
+   pfree(node);
+}
+
+/*
+ * make plain view of tree from 'normal' view of tree
+ */
+static ITEM *
+plaintree(NODE * root, int4 *len)
+{
+   PLAINTREE   pl;
+
+   pl.cur = 0;
+   pl.len = 16;
+   if (root && (root->valnode->type == VAL || root->valnode->type == OPR))
+   {
+       pl.ptr = (ITEM *) palloc(pl.len * sizeof(ITEM));
+       plainnode(&pl, root);
+   }
+   else
+       pl.ptr = NULL;
+   *len = pl.cur;
+   return pl.ptr;
+}
+
+static void
+freetree(NODE * node)
+{
+   if (!node)
+       return;
+   if (node->left)
+       freetree(node->left);
+   if (node->right)
+       freetree(node->right);
+   pfree(node);
+}
+
+/*
+ * clean tree for ! operator.
+ * It's usefull for debug, but in
+ * other case, such view is used with search in index.
+ * Operator ! always return TRUE
+ */
+static NODE *
+clean_NOT_intree(NODE * node)
+{
+   if (node->valnode->type == VAL)
+       return node;
+
+   if (node->valnode->val == (int4) '!')
+   {
+       freetree(node);
+       return NULL;
+   }
+
+   /* operator & or | */
+   if (node->valnode->val == (int4) '|')
+   {
+       if ((node->left = clean_NOT_intree(node->left)) == NULL ||
+           (node->right = clean_NOT_intree(node->right)) == NULL)
+       {
+           freetree(node);
+           return NULL;
+       }
+   }
+   else
+   {
+       NODE       *res = node;
+
+       node->left = clean_NOT_intree(node->left);
+       node->right = clean_NOT_intree(node->right);
+       if (node->left == NULL && node->right == NULL)
+       {
+           pfree(node);
+           res = NULL;
+       }
+       else if (node->left == NULL)
+       {
+           res = node->right;
+           pfree(node);
+       }
+       else if (node->right == NULL)
+       {
+           res = node->left;
+           pfree(node);
+       }
+       return res;
+   }
+   return node;
+}
+
+ITEM *
+clean_NOT_v2(ITEM * ptr, int4 *len)
+{
+   NODE       *root = maketree(ptr);
+
+   return plaintree(clean_NOT_intree(root), len);
+}
+
+#define V_UNKNOWN  0
+#define V_TRUE     1
+#define V_FALSE        2
+
+/*
+ * Clean query tree from values which is always in
+ * text (stopword)
+ */
+static NODE *
+clean_fakeval_intree(NODE * node, char *result)
+{
+   char        lresult = V_UNKNOWN,
+               rresult = V_UNKNOWN;
+
+   if (node->valnode->type == VAL)
+       return node;
+   else if (node->valnode->type == VALTRUE)
+   {
+       pfree(node);
+       *result = V_TRUE;
+       return NULL;
+   }
+
+
+   if (node->valnode->val == (int4) '!')
+   {
+       node->right = clean_fakeval_intree(node->right, &rresult);
+       if (!node->right)
+       {
+           *result = (rresult == V_TRUE) ? V_FALSE : V_TRUE;
+           freetree(node);
+           return NULL;
+       }
+   }
+   else if (node->valnode->val == (int4) '|')
+   {
+       NODE       *res = node;
+
+       node->left = clean_fakeval_intree(node->left, &lresult);
+       node->right = clean_fakeval_intree(node->right, &rresult);
+       if (lresult == V_TRUE || rresult == V_TRUE)
+       {
+           freetree(node);
+           *result = V_TRUE;
+           return NULL;
+       }
+       else if (lresult == V_FALSE && rresult == V_FALSE)
+       {
+           freetree(node);
+           *result = V_FALSE;
+           return NULL;
+       }
+       else if (lresult == V_FALSE)
+       {
+           res = node->right;
+           pfree(node);
+       }
+       else if (rresult == V_FALSE)
+       {
+           res = node->left;
+           pfree(node);
+       }
+       return res;
+   }
+   else
+   {
+       NODE       *res = node;
+
+       node->left = clean_fakeval_intree(node->left, &lresult);
+       node->right = clean_fakeval_intree(node->right, &rresult);
+       if (lresult == V_FALSE || rresult == V_FALSE)
+       {
+           freetree(node);
+           *result = V_FALSE;
+           return NULL;
+       }
+       else if (lresult == V_TRUE && rresult == V_TRUE)
+       {
+           freetree(node);
+           *result = V_TRUE;
+           return NULL;
+       }
+       else if (lresult == V_TRUE)
+       {
+           res = node->right;
+           pfree(node);
+       }
+       else if (rresult == V_TRUE)
+       {
+           res = node->left;
+           pfree(node);
+       }
+       return res;
+   }
+   return node;
+}
+
+ITEM *
+clean_fakeval_v2(ITEM * ptr, int4 *len)
+{
+   NODE       *root = maketree(ptr);
+   char        result = V_UNKNOWN;
+   NODE       *resroot;
+
+   resroot = clean_fakeval_intree(root, &result);
+   if (result != V_UNKNOWN)
+   {
+       elog(NOTICE, "Query contains only stopword(s) or doesn't contain lexem(s), ignored");
+       *len = 0;
+       return NULL;
+   }
+
+   return plaintree(resroot, len);
+}


diff --git a/contrib/tsearch2/rewrite.h b/contrib/tsearch2/rewrite.h

new file mode 100644 (file)

index 0000000..d47788a


--- /dev/null
+++ b/contrib/tsearch2/rewrite.h
@@ -0,0 +1,7 @@
+#ifndef __REWRITE_H__
+#define __REWRITE_H__
+
+ITEM      *clean_NOT_v2(ITEM * ptr, int4 *len);
+ITEM      *clean_fakeval_v2(ITEM * ptr, int4 *len);
+
+#endif


diff --git a/contrib/tsearch2/snmap.c b/contrib/tsearch2/snmap.c

new file mode 100644 (file)

index 0000000..fe138ad


--- /dev/null
+++ b/contrib/tsearch2/snmap.c
@@ -0,0 +1,75 @@
+/* 
+ * simple but fast map from str to Oid
+ * Teodor Sigaev 
+ */
+#include 
+#include 
+#include 
+
+#include "postgres.h"
+#include "snmap.h"
+#include "common.h"
+
+static int
+compareSNMapEntry(const void *a, const void *b) {
+   return strcmp( ((SNMapEntry*)a)->key, ((SNMapEntry*)b)->key );
+}
+
+void 
+addSNMap( SNMap *map, char *key, Oid value ) {
+   if (map->len>=map->reallen) {
+       SNMapEntry *tmp;
+       int len = (map->reallen) ? 2*map->reallen : 16;
+       tmp=(SNMapEntry*)realloc(map->list, sizeof(SNMapEntry) * len);
+       if ( !tmp )
+           elog(ERROR, "No memory");
+       map->reallen=len;
+       map->list=tmp;
+   }
+   map->list[ map->len ].key = strdup(key);
+   if ( ! map->list[ map->len ].key )
+       elog(ERROR, "No memory");
+   map->list[ map->len ].value=value;
+   map->len++;
+   if ( map->len>1 ) qsort(map->list, map->len, sizeof(SNMapEntry), compareSNMapEntry);
+}
+
+void 
+addSNMap_t( SNMap *map, text *key, Oid value ) {
+   char *k=text2char( key );
+   addSNMap(map, k, value);
+   pfree(k);
+}
+
+Oid 
+findSNMap( SNMap *map, char *key ) {
+   SNMapEntry *ptr;
+   SNMapEntry ks = {key, 0};
+   if ( map->len==0 || !map->list )
+       return 0;   
+   ptr = (SNMapEntry*) bsearch(&ks, map->list, map->len, sizeof(SNMapEntry), compareSNMapEntry);
+   return (ptr) ? ptr->value : 0;
+}
+
+Oid  
+findSNMap_t( SNMap *map, text *key ) {
+   char *k=text2char(key);
+   int res;
+   res= findSNMap(map, k);
+   pfree(k);
+   return res;
+}
+
+void freeSNMap( SNMap *map ) {
+   SNMapEntry *entry=map->list;
+   if ( map->list ) {
+       while( map->len ) {
+           if ( entry->key ) free(entry->key);
+           entry++; map->len--;
+       }
+       free( map->list );
+   }
+   memset(map,0,sizeof(SNMap));
+}
+
+


diff --git a/contrib/tsearch2/snmap.h b/contrib/tsearch2/snmap.h

new file mode 100644 (file)

index 0000000..b485601


--- /dev/null
+++ b/contrib/tsearch2/snmap.h
@@ -0,0 +1,23 @@
+#ifndef __SNMAP_H__
+#define __SNMAP_H__
+
+#include "postgres.h"
+
+typedef struct {
+   char    *key;
+   Oid value;
+} SNMapEntry;
+
+typedef struct {
+   int len;
+   int reallen;
+   SNMapEntry  *list;
+} SNMap;
+
+void addSNMap( SNMap *map, char *key, Oid value );
+void addSNMap_t( SNMap *map, text *key, Oid value );
+Oid findSNMap( SNMap *map, char *key );
+Oid findSNMap_t( SNMap *map, text *key );
+void freeSNMap( SNMap *map );
+
+#endif


diff --git a/contrib/tsearch2/snowball/api.c b/contrib/tsearch2/snowball/api.c

new file mode 100644 (file)

index 0000000..c9019ce


--- /dev/null
+++ b/contrib/tsearch2/snowball/api.c
@@ -0,0 +1,48 @@
+
+#include "header.h"
+
+extern struct SN_env * SN_create_env(int S_size, int I_size, int B_size)
+{   struct SN_env * z = (struct SN_env *) calloc(1, sizeof(struct SN_env));
+    z->p = create_s();
+    if (S_size)
+    {   z->S = (symbol * *) calloc(S_size, sizeof(symbol *));
+        {   int i;
+            for (i = 0; i < S_size; i++) z->S[i] = create_s();
+        }
+        z->S_size = S_size;
+    }
+
+    if (I_size)
+    {   z->I = (int *) calloc(I_size, sizeof(int));
+        z->I_size = I_size;
+    }
+
+    if (B_size)
+    {   z->B = (symbol *) calloc(B_size, sizeof(symbol));
+        z->B_size = B_size;
+    }
+
+    return z;
+}
+
+extern void SN_close_env(struct SN_env * z)
+{
+    if (z->S_size)
+    {
+        {   int i;
+            for (i = 0; i < z->S_size; i++) lose_s(z->S[i]);
+        }
+        free(z->S);
+    }
+    if (z->I_size) free(z->I);
+    if (z->B_size) free(z->B);
+    if (z->p) lose_s(z->p);
+    free(z);
+}
+
+extern void SN_set_current(struct SN_env * z, int size, const symbol * s)
+{
+    replace_s(z, 0, z->l, size, s);
+    z->c = 0;
+}
+


diff --git a/contrib/tsearch2/snowball/api.h b/contrib/tsearch2/snowball/api.h

new file mode 100644 (file)

index 0000000..3e8b6e1


--- /dev/null
+++ b/contrib/tsearch2/snowball/api.h
@@ -0,0 +1,27 @@
+
+typedef unsigned char symbol;
+
+/* Or replace 'char' above with 'short' for 16 bit characters.
+
+   More precisely, replace 'char' with whatever type guarantees the
+   character width you need. Note however that sizeof(symbol) should divide
+   HEAD, defined in header.h as 2*sizeof(int), without remainder, otherwise
+   there is an alignment problem. In the unlikely event of a problem here,
+   consult Martin Porter.
+
+*/
+
+struct SN_env {
+    symbol * p;
+    int c; int a; int l; int lb; int bra; int ket;
+    int S_size; int I_size; int B_size;
+    symbol * * S;
+    int * I;
+    symbol * B;
+};
+
+extern struct SN_env * SN_create_env(int S_size, int I_size, int B_size);
+extern void SN_close_env(struct SN_env * z);
+
+extern void SN_set_current(struct SN_env * z, int size, const symbol * s);
+


diff --git a/contrib/tsearch2/snowball/english_stem.c b/contrib/tsearch2/snowball/english_stem.c

new file mode 100644 (file)

index 0000000..6715c7c


--- /dev/null
+++ b/contrib/tsearch2/snowball/english_stem.c
@@ -0,0 +1,894 @@
+
+/* This file was generated automatically by the Snowball to ANSI C compiler */
+
+#include "header.h"
+
+extern int english_stem(struct SN_env * z);
+static int r_exception2(struct SN_env * z);
+static int r_exception1(struct SN_env * z);
+static int r_Step_5(struct SN_env * z);
+static int r_Step_4(struct SN_env * z);
+static int r_Step_3(struct SN_env * z);
+static int r_Step_2(struct SN_env * z);
+static int r_Step_1c(struct SN_env * z);
+static int r_Step_1b(struct SN_env * z);
+static int r_Step_1a(struct SN_env * z);
+static int r_R2(struct SN_env * z);
+static int r_R1(struct SN_env * z);
+static int r_shortv(struct SN_env * z);
+static int r_mark_regions(struct SN_env * z);
+static int r_postlude(struct SN_env * z);
+static int r_prelude(struct SN_env * z);
+
+extern struct SN_env * english_create_env(void);
+extern void english_close_env(struct SN_env * z);
+
+static symbol s_0_0[5] = { 'g', 'e', 'n', 'e', 'r' };
+
+static struct among a_0[1] =
+{
+/*  0 */ { 5, s_0_0, -1, -1, 0}
+};
+
+static symbol s_1_0[3] = { 'i', 'e', 'd' };
+static symbol s_1_1[1] = { 's' };
+static symbol s_1_2[3] = { 'i', 'e', 's' };
+static symbol s_1_3[4] = { 's', 's', 'e', 's' };
+static symbol s_1_4[2] = { 's', 's' };
+static symbol s_1_5[2] = { 'u', 's' };
+
+static struct among a_1[6] =
+{
+/*  0 */ { 3, s_1_0, -1, 2, 0},
+/*  1 */ { 1, s_1_1, -1, 3, 0},
+/*  2 */ { 3, s_1_2, 1, 2, 0},
+/*  3 */ { 4, s_1_3, 1, 1, 0},
+/*  4 */ { 2, s_1_4, 1, -1, 0},
+/*  5 */ { 2, s_1_5, 1, -1, 0}
+};
+
+static symbol s_2_1[2] = { 'b', 'b' };
+static symbol s_2_2[2] = { 'd', 'd' };
+static symbol s_2_3[2] = { 'f', 'f' };
+static symbol s_2_4[2] = { 'g', 'g' };
+static symbol s_2_5[2] = { 'b', 'l' };
+static symbol s_2_6[2] = { 'm', 'm' };
+static symbol s_2_7[2] = { 'n', 'n' };
+static symbol s_2_8[2] = { 'p', 'p' };
+static symbol s_2_9[2] = { 'r', 'r' };
+static symbol s_2_10[2] = { 'a', 't' };
+static symbol s_2_11[2] = { 't', 't' };
+static symbol s_2_12[2] = { 'i', 'z' };
+
+static struct among a_2[13] =
+{
+/*  0 */ { 0, 0, -1, 3, 0},
+/*  1 */ { 2, s_2_1, 0, 2, 0},
+/*  2 */ { 2, s_2_2, 0, 2, 0},
+/*  3 */ { 2, s_2_3, 0, 2, 0},
+/*  4 */ { 2, s_2_4, 0, 2, 0},
+/*  5 */ { 2, s_2_5, 0, 1, 0},
+/*  6 */ { 2, s_2_6, 0, 2, 0},
+/*  7 */ { 2, s_2_7, 0, 2, 0},
+/*  8 */ { 2, s_2_8, 0, 2, 0},
+/*  9 */ { 2, s_2_9, 0, 2, 0},
+/* 10 */ { 2, s_2_10, 0, 1, 0},
+/* 11 */ { 2, s_2_11, 0, 2, 0},
+/* 12 */ { 2, s_2_12, 0, 1, 0}
+};
+
+static symbol s_3_0[2] = { 'e', 'd' };
+static symbol s_3_1[3] = { 'e', 'e', 'd' };
+static symbol s_3_2[3] = { 'i', 'n', 'g' };
+static symbol s_3_3[4] = { 'e', 'd', 'l', 'y' };
+static symbol s_3_4[5] = { 'e', 'e', 'd', 'l', 'y' };
+static symbol s_3_5[5] = { 'i', 'n', 'g', 'l', 'y' };
+
+static struct among a_3[6] =
+{
+/*  0 */ { 2, s_3_0, -1, 2, 0},
+/*  1 */ { 3, s_3_1, 0, 1, 0},
+/*  2 */ { 3, s_3_2, -1, 2, 0},
+/*  3 */ { 4, s_3_3, -1, 2, 0},
+/*  4 */ { 5, s_3_4, 3, 1, 0},
+/*  5 */ { 5, s_3_5, -1, 2, 0}
+};
+
+static symbol s_4_0[4] = { 'a', 'n', 'c', 'i' };
+static symbol s_4_1[4] = { 'e', 'n', 'c', 'i' };
+static symbol s_4_2[3] = { 'o', 'g', 'i' };
+static symbol s_4_3[2] = { 'l', 'i' };
+static symbol s_4_4[3] = { 'b', 'l', 'i' };
+static symbol s_4_5[4] = { 'a', 'b', 'l', 'i' };
+static symbol s_4_6[4] = { 'a', 'l', 'l', 'i' };
+static symbol s_4_7[5] = { 'f', 'u', 'l', 'l', 'i' };
+static symbol s_4_8[6] = { 'l', 'e', 's', 's', 'l', 'i' };
+static symbol s_4_9[5] = { 'o', 'u', 's', 'l', 'i' };
+static symbol s_4_10[5] = { 'e', 'n', 't', 'l', 'i' };
+static symbol s_4_11[5] = { 'a', 'l', 'i', 't', 'i' };
+static symbol s_4_12[6] = { 'b', 'i', 'l', 'i', 't', 'i' };
+static symbol s_4_13[5] = { 'i', 'v', 'i', 't', 'i' };
+static symbol s_4_14[6] = { 't', 'i', 'o', 'n', 'a', 'l' };
+static symbol s_4_15[7] = { 'a', 't', 'i', 'o', 'n', 'a', 'l' };
+static symbol s_4_16[5] = { 'a', 'l', 'i', 's', 'm' };
+static symbol s_4_17[5] = { 'a', 't', 'i', 'o', 'n' };
+static symbol s_4_18[7] = { 'i', 'z', 'a', 't', 'i', 'o', 'n' };
+static symbol s_4_19[4] = { 'i', 'z', 'e', 'r' };
+static symbol s_4_20[4] = { 'a', 't', 'o', 'r' };
+static symbol s_4_21[7] = { 'i', 'v', 'e', 'n', 'e', 's', 's' };
+static symbol s_4_22[7] = { 'f', 'u', 'l', 'n', 'e', 's', 's' };
+static symbol s_4_23[7] = { 'o', 'u', 's', 'n', 'e', 's', 's' };
+
+static struct among a_4[24] =
+{
+/*  0 */ { 4, s_4_0, -1, 3, 0},
+/*  1 */ { 4, s_4_1, -1, 2, 0},
+/*  2 */ { 3, s_4_2, -1, 13, 0},
+/*  3 */ { 2, s_4_3, -1, 16, 0},
+/*  4 */ { 3, s_4_4, 3, 12, 0},
+/*  5 */ { 4, s_4_5, 4, 4, 0},
+/*  6 */ { 4, s_4_6, 3, 8, 0},
+/*  7 */ { 5, s_4_7, 3, 14, 0},
+/*  8 */ { 6, s_4_8, 3, 15, 0},
+/*  9 */ { 5, s_4_9, 3, 10, 0},
+/* 10 */ { 5, s_4_10, 3, 5, 0},
+/* 11 */ { 5, s_4_11, -1, 8, 0},
+/* 12 */ { 6, s_4_12, -1, 12, 0},
+/* 13 */ { 5, s_4_13, -1, 11, 0},
+/* 14 */ { 6, s_4_14, -1, 1, 0},
+/* 15 */ { 7, s_4_15, 14, 7, 0},
+/* 16 */ { 5, s_4_16, -1, 8, 0},
+/* 17 */ { 5, s_4_17, -1, 7, 0},
+/* 18 */ { 7, s_4_18, 17, 6, 0},
+/* 19 */ { 4, s_4_19, -1, 6, 0},
+/* 20 */ { 4, s_4_20, -1, 7, 0},
+/* 21 */ { 7, s_4_21, -1, 11, 0},
+/* 22 */ { 7, s_4_22, -1, 9, 0},
+/* 23 */ { 7, s_4_23, -1, 10, 0}
+};
+
+static symbol s_5_0[5] = { 'i', 'c', 'a', 't', 'e' };
+static symbol s_5_1[5] = { 'a', 't', 'i', 'v', 'e' };
+static symbol s_5_2[5] = { 'a', 'l', 'i', 'z', 'e' };
+static symbol s_5_3[5] = { 'i', 'c', 'i', 't', 'i' };
+static symbol s_5_4[4] = { 'i', 'c', 'a', 'l' };
+static symbol s_5_5[6] = { 't', 'i', 'o', 'n', 'a', 'l' };
+static symbol s_5_6[7] = { 'a', 't', 'i', 'o', 'n', 'a', 'l' };
+static symbol s_5_7[3] = { 'f', 'u', 'l' };
+static symbol s_5_8[4] = { 'n', 'e', 's', 's' };
+
+static struct among a_5[9] =
+{
+/*  0 */ { 5, s_5_0, -1, 4, 0},
+/*  1 */ { 5, s_5_1, -1, 6, 0},
+/*  2 */ { 5, s_5_2, -1, 3, 0},
+/*  3 */ { 5, s_5_3, -1, 4, 0},
+/*  4 */ { 4, s_5_4, -1, 4, 0},
+/*  5 */ { 6, s_5_5, -1, 1, 0},
+/*  6 */ { 7, s_5_6, 5, 2, 0},
+/*  7 */ { 3, s_5_7, -1, 5, 0},
+/*  8 */ { 4, s_5_8, -1, 5, 0}
+};
+
+static symbol s_6_0[2] = { 'i', 'c' };
+static symbol s_6_1[4] = { 'a', 'n', 'c', 'e' };
+static symbol s_6_2[4] = { 'e', 'n', 'c', 'e' };
+static symbol s_6_3[4] = { 'a', 'b', 'l', 'e' };
+static symbol s_6_4[4] = { 'i', 'b', 'l', 'e' };
+static symbol s_6_5[3] = { 'a', 't', 'e' };
+static symbol s_6_6[3] = { 'i', 'v', 'e' };
+static symbol s_6_7[3] = { 'i', 'z', 'e' };
+static symbol s_6_8[3] = { 'i', 't', 'i' };
+static symbol s_6_9[2] = { 'a', 'l' };
+static symbol s_6_10[3] = { 'i', 's', 'm' };
+static symbol s_6_11[3] = { 'i', 'o', 'n' };
+static symbol s_6_12[2] = { 'e', 'r' };
+static symbol s_6_13[3] = { 'o', 'u', 's' };
+static symbol s_6_14[3] = { 'a', 'n', 't' };
+static symbol s_6_15[3] = { 'e', 'n', 't' };
+static symbol s_6_16[4] = { 'm', 'e', 'n', 't' };
+static symbol s_6_17[5] = { 'e', 'm', 'e', 'n', 't' };
+
+static struct among a_6[18] =
+{
+/*  0 */ { 2, s_6_0, -1, 1, 0},
+/*  1 */ { 4, s_6_1, -1, 1, 0},
+/*  2 */ { 4, s_6_2, -1, 1, 0},
+/*  3 */ { 4, s_6_3, -1, 1, 0},
+/*  4 */ { 4, s_6_4, -1, 1, 0},
+/*  5 */ { 3, s_6_5, -1, 1, 0},
+/*  6 */ { 3, s_6_6, -1, 1, 0},
+/*  7 */ { 3, s_6_7, -1, 1, 0},
+/*  8 */ { 3, s_6_8, -1, 1, 0},
+/*  9 */ { 2, s_6_9, -1, 1, 0},
+/* 10 */ { 3, s_6_10, -1, 1, 0},
+/* 11 */ { 3, s_6_11, -1, 2, 0},
+/* 12 */ { 2, s_6_12, -1, 1, 0},
+/* 13 */ { 3, s_6_13, -1, 1, 0},
+/* 14 */ { 3, s_6_14, -1, 1, 0},
+/* 15 */ { 3, s_6_15, -1, 1, 0},
+/* 16 */ { 4, s_6_16, 15, 1, 0},
+/* 17 */ { 5, s_6_17, 16, 1, 0}
+};
+
+static symbol s_7_0[1] = { 'e' };
+static symbol s_7_1[1] = { 'l' };
+
+static struct among a_7[2] =
+{
+/*  0 */ { 1, s_7_0, -1, 1, 0},
+/*  1 */ { 1, s_7_1, -1, 2, 0}
+};
+
+static symbol s_8_0[7] = { 's', 'u', 'c', 'c', 'e', 'e', 'd' };
+static symbol s_8_1[7] = { 'p', 'r', 'o', 'c', 'e', 'e', 'd' };
+static symbol s_8_2[6] = { 'e', 'x', 'c', 'e', 'e', 'd' };
+static symbol s_8_3[7] = { 'c', 'a', 'n', 'n', 'i', 'n', 'g' };
+static symbol s_8_4[6] = { 'i', 'n', 'n', 'i', 'n', 'g' };
+static symbol s_8_5[7] = { 'e', 'a', 'r', 'r', 'i', 'n', 'g' };
+static symbol s_8_6[7] = { 'h', 'e', 'r', 'r', 'i', 'n', 'g' };
+static symbol s_8_7[6] = { 'o', 'u', 't', 'i', 'n', 'g' };
+
+static struct among a_8[8] =
+{
+/*  0 */ { 7, s_8_0, -1, -1, 0},
+/*  1 */ { 7, s_8_1, -1, -1, 0},
+/*  2 */ { 6, s_8_2, -1, -1, 0},
+/*  3 */ { 7, s_8_3, -1, -1, 0},
+/*  4 */ { 6, s_8_4, -1, -1, 0},
+/*  5 */ { 7, s_8_5, -1, -1, 0},
+/*  6 */ { 7, s_8_6, -1, -1, 0},
+/*  7 */ { 6, s_8_7, -1, -1, 0}
+};
+
+static symbol s_9_0[5] = { 'a', 'n', 'd', 'e', 's' };
+static symbol s_9_1[5] = { 'a', 't', 'l', 'a', 's' };
+static symbol s_9_2[4] = { 'b', 'i', 'a', 's' };
+static symbol s_9_3[6] = { 'c', 'o', 's', 'm', 'o', 's' };
+static symbol s_9_4[5] = { 'd', 'y', 'i', 'n', 'g' };
+static symbol s_9_5[5] = { 'e', 'a', 'r', 'l', 'y' };
+static symbol s_9_6[6] = { 'g', 'e', 'n', 't', 'l', 'y' };
+static symbol s_9_7[4] = { 'h', 'o', 'w', 'e' };
+static symbol s_9_8[4] = { 'i', 'd', 'l', 'y' };
+static symbol s_9_9[5] = { 'l', 'y', 'i', 'n', 'g' };
+static symbol s_9_10[4] = { 'n', 'e', 'w', 's' };
+static symbol s_9_11[4] = { 'o', 'n', 'l', 'y' };
+static symbol s_9_12[6] = { 's', 'i', 'n', 'g', 'l', 'y' };
+static symbol s_9_13[5] = { 's', 'k', 'i', 'e', 's' };
+static symbol s_9_14[4] = { 's', 'k', 'i', 's' };
+static symbol s_9_15[3] = { 's', 'k', 'y' };
+static symbol s_9_16[5] = { 't', 'y', 'i', 'n', 'g' };
+static symbol s_9_17[4] = { 'u', 'g', 'l', 'y' };
+
+static struct among a_9[18] =
+{
+/*  0 */ { 5, s_9_0, -1, -1, 0},
+/*  1 */ { 5, s_9_1, -1, -1, 0},
+/*  2 */ { 4, s_9_2, -1, -1, 0},
+/*  3 */ { 6, s_9_3, -1, -1, 0},
+/*  4 */ { 5, s_9_4, -1, 3, 0},
+/*  5 */ { 5, s_9_5, -1, 9, 0},
+/*  6 */ { 6, s_9_6, -1, 7, 0},
+/*  7 */ { 4, s_9_7, -1, -1, 0},
+/*  8 */ { 4, s_9_8, -1, 6, 0},
+/*  9 */ { 5, s_9_9, -1, 4, 0},
+/* 10 */ { 4, s_9_10, -1, -1, 0},
+/* 11 */ { 4, s_9_11, -1, 10, 0},
+/* 12 */ { 6, s_9_12, -1, 11, 0},
+/* 13 */ { 5, s_9_13, -1, 2, 0},
+/* 14 */ { 4, s_9_14, -1, 1, 0},
+/* 15 */ { 3, s_9_15, -1, -1, 0},
+/* 16 */ { 5, s_9_16, -1, 5, 0},
+/* 17 */ { 4, s_9_17, -1, 8, 0}
+};
+
+static unsigned char g_v[] = { 17, 65, 16, 1 };
+
+static unsigned char g_v_WXY[] = { 1, 17, 65, 208, 1 };
+
+static unsigned char g_valid_LI[] = { 55, 141, 2 };
+
+static symbol s_0[] = { 'y' };
+static symbol s_1[] = { 'Y' };
+static symbol s_2[] = { 'y' };
+static symbol s_3[] = { 'Y' };
+static symbol s_4[] = { 's', 's' };
+static symbol s_5[] = { 'i', 'e' };
+static symbol s_6[] = { 'i' };
+static symbol s_7[] = { 'e', 'e' };
+static symbol s_8[] = { 'e' };
+static symbol s_9[] = { 'e' };
+static symbol s_10[] = { 'y' };
+static symbol s_11[] = { 'Y' };
+static symbol s_12[] = { 'i' };
+static symbol s_13[] = { 't', 'i', 'o', 'n' };
+static symbol s_14[] = { 'e', 'n', 'c', 'e' };
+static symbol s_15[] = { 'a', 'n', 'c', 'e' };
+static symbol s_16[] = { 'a', 'b', 'l', 'e' };
+static symbol s_17[] = { 'e', 'n', 't' };
+static symbol s_18[] = { 'i', 'z', 'e' };
+static symbol s_19[] = { 'a', 't', 'e' };
+static symbol s_20[] = { 'a', 'l' };
+static symbol s_21[] = { 'f', 'u', 'l' };
+static symbol s_22[] = { 'o', 'u', 's' };
+static symbol s_23[] = { 'i', 'v', 'e' };
+static symbol s_24[] = { 'b', 'l', 'e' };
+static symbol s_25[] = { 'l' };
+static symbol s_26[] = { 'o', 'g' };
+static symbol s_27[] = { 'f', 'u', 'l' };
+static symbol s_28[] = { 'l', 'e', 's', 's' };
+static symbol s_29[] = { 't', 'i', 'o', 'n' };
+static symbol s_30[] = { 'a', 't', 'e' };
+static symbol s_31[] = { 'a', 'l' };
+static symbol s_32[] = { 'i', 'c' };
+static symbol s_33[] = { 's' };
+static symbol s_34[] = { 't' };
+static symbol s_35[] = { 'l' };
+static symbol s_36[] = { 's', 'k', 'i' };
+static symbol s_37[] = { 's', 'k', 'y' };
+static symbol s_38[] = { 'd', 'i', 'e' };
+static symbol s_39[] = { 'l', 'i', 'e' };
+static symbol s_40[] = { 't', 'i', 'e' };
+static symbol s_41[] = { 'i', 'd', 'l' };
+static symbol s_42[] = { 'g', 'e', 'n', 't', 'l' };
+static symbol s_43[] = { 'u', 'g', 'l', 'i' };
+static symbol s_44[] = { 'e', 'a', 'r', 'l', 'i' };
+static symbol s_45[] = { 'o', 'n', 'l', 'i' };
+static symbol s_46[] = { 's', 'i', 'n', 'g', 'l' };
+static symbol s_47[] = { 'Y' };
+static symbol s_48[] = { 'y' };
+
+static int r_prelude(struct SN_env * z) {
+    z->B[0] = 0; /* unset Y_found, line 24 */
+    {   int c = z->c; /* do, line 25 */
+        z->bra = z->c; /* [, line 25 */
+        if (!(eq_s(z, 1, s_0))) goto lab0;
+        z->ket = z->c; /* ], line 25 */
+        if (!(in_grouping(z, g_v, 97, 121))) goto lab0;
+        slice_from_s(z, 1, s_1); /* <-, line 25 */
+        z->B[0] = 1; /* set Y_found, line 25 */
+    lab0:
+        z->c = c;
+    }
+    {   int c = z->c; /* do, line 26 */
+        while(1) { /* repeat, line 26 */
+            int c = z->c;
+            while(1) { /* goto, line 26 */
+                int c = z->c;
+                if (!(in_grouping(z, g_v, 97, 121))) goto lab3;
+                z->bra = z->c; /* [, line 26 */
+                if (!(eq_s(z, 1, s_2))) goto lab3;
+                z->ket = z->c; /* ], line 26 */
+                z->c = c;
+                break;
+            lab3:
+                z->c = c;
+                if (z->c >= z->l) goto lab2;
+                z->c++;
+            }
+            slice_from_s(z, 1, s_3); /* <-, line 26 */
+            z->B[0] = 1; /* set Y_found, line 26 */
+            continue;
+        lab2:
+            z->c = c;
+            break;
+        }
+    lab1:
+        z->c = c;
+    }
+    return 1;
+}
+
+static int r_mark_regions(struct SN_env * z) {
+    z->I[0] = z->l;
+    z->I[1] = z->l;
+    {   int c = z->c; /* do, line 32 */
+        {   int c = z->c; /* or, line 36 */
+            if (!(find_among(z, a_0, 1))) goto lab2; /* among, line 33 */
+            goto lab1;
+        lab2:
+            z->c = c;
+            while(1) { /* gopast, line 36 */
+                if (!(in_grouping(z, g_v, 97, 121))) goto lab3;
+                break;
+            lab3:
+                if (z->c >= z->l) goto lab0;
+                z->c++;
+            }
+            while(1) { /* gopast, line 36 */
+                if (!(out_grouping(z, g_v, 97, 121))) goto lab4;
+                break;
+            lab4:
+                if (z->c >= z->l) goto lab0;
+                z->c++;
+            }
+        }
+    lab1:
+        z->I[0] = z->c; /* setmark p1, line 37 */
+        while(1) { /* gopast, line 38 */
+            if (!(in_grouping(z, g_v, 97, 121))) goto lab5;
+            break;
+        lab5:
+            if (z->c >= z->l) goto lab0;
+            z->c++;
+        }
+        while(1) { /* gopast, line 38 */
+            if (!(out_grouping(z, g_v, 97, 121))) goto lab6;
+            break;
+        lab6:
+            if (z->c >= z->l) goto lab0;
+            z->c++;
+        }
+        z->I[1] = z->c; /* setmark p2, line 38 */
+    lab0:
+        z->c = c;
+    }
+    return 1;
+}
+
+static int r_shortv(struct SN_env * z) {
+    {   int m = z->l - z->c; /* or, line 46 */
+        if (!(out_grouping_b(z, g_v_WXY, 89, 121))) goto lab1;
+        if (!(in_grouping_b(z, g_v, 97, 121))) goto lab1;
+        if (!(out_grouping_b(z, g_v, 97, 121))) goto lab1;
+        goto lab0;
+    lab1:
+        z->c = z->l - m;
+        if (!(out_grouping_b(z, g_v, 97, 121))) return 0;
+        if (!(in_grouping_b(z, g_v, 97, 121))) return 0;
+        if (z->c > z->lb) return 0; /* atlimit, line 47 */
+    }
+lab0:
+    return 1;
+}
+
+static int r_R1(struct SN_env * z) {
+    if (!(z->I[0] <= z->c)) return 0;
+    return 1;
+}
+
+static int r_R2(struct SN_env * z) {
+    if (!(z->I[1] <= z->c)) return 0;
+    return 1;
+}
+
+static int r_Step_1a(struct SN_env * z) {
+    int among_var;
+    z->ket = z->c; /* [, line 54 */
+    among_var = find_among_b(z, a_1, 6); /* substring, line 54 */
+    if (!(among_var)) return 0;
+    z->bra = z->c; /* ], line 54 */
+    switch(among_var) {
+        case 0: return 0;
+        case 1:
+            slice_from_s(z, 2, s_4); /* <-, line 55 */
+            break;
+        case 2:
+            {   int m = z->l - z->c; /* or, line 57 */
+                if (z->c <= z->lb) goto lab1;
+                z->c--; /* next, line 57 */
+                if (z->c > z->lb) goto lab1; /* atlimit, line 57 */
+                slice_from_s(z, 2, s_5); /* <-, line 57 */
+                goto lab0;
+            lab1:
+                z->c = z->l - m;
+                slice_from_s(z, 1, s_6); /* <-, line 57 */
+            }
+        lab0:
+            break;
+        case 3:
+            if (z->c <= z->lb) return 0;
+            z->c--; /* next, line 58 */
+            while(1) { /* gopast, line 58 */
+                if (!(in_grouping_b(z, g_v, 97, 121))) goto lab2;
+                break;
+            lab2:
+                if (z->c <= z->lb) return 0;
+                z->c--;
+            }
+            slice_del(z); /* delete, line 58 */
+            break;
+    }
+    return 1;
+}
+
+static int r_Step_1b(struct SN_env * z) {
+    int among_var;
+    z->ket = z->c; /* [, line 64 */
+    among_var = find_among_b(z, a_3, 6); /* substring, line 64 */
+    if (!(among_var)) return 0;
+    z->bra = z->c; /* ], line 64 */
+    switch(among_var) {
+        case 0: return 0;
+        case 1:
+            if (!r_R1(z)) return 0; /* call R1, line 66 */
+            slice_from_s(z, 2, s_7); /* <-, line 66 */
+            break;
+        case 2:
+            {   int m_test = z->l - z->c; /* test, line 69 */
+                while(1) { /* gopast, line 69 */
+                    if (!(in_grouping_b(z, g_v, 97, 121))) goto lab0;
+                    break;
+                lab0:
+                    if (z->c <= z->lb) return 0;
+                    z->c--;
+                }
+                z->c = z->l - m_test;
+            }
+            slice_del(z); /* delete, line 69 */
+            {   int m_test = z->l - z->c; /* test, line 70 */
+                among_var = find_among_b(z, a_2, 13); /* substring, line 70 */
+                if (!(among_var)) return 0;
+                z->c = z->l - m_test;
+            }
+            switch(among_var) {
+                case 0: return 0;
+                case 1:
+                    {   int c = z->c;
+                        insert_s(z, z->c, z->c, 1, s_8); /* <+, line 72 */
+                        z->c = c;
+                    }
+                    break;
+                case 2:
+                    z->ket = z->c; /* [, line 75 */
+                    if (z->c <= z->lb) return 0;
+                    z->c--; /* next, line 75 */
+                    z->bra = z->c; /* ], line 75 */
+                    slice_del(z); /* delete, line 75 */
+                    break;
+                case 3:
+                    if (z->c != z->I[0]) return 0; /* atmark, line 76 */
+                    {   int m_test = z->l - z->c; /* test, line 76 */
+                        if (!r_shortv(z)) return 0; /* call shortv, line 76 */
+                        z->c = z->l - m_test;
+                    }
+                    {   int c = z->c;
+                        insert_s(z, z->c, z->c, 1, s_9); /* <+, line 76 */
+                        z->c = c;
+                    }
+                    break;
+            }
+            break;
+    }
+    return 1;
+}
+
+static int r_Step_1c(struct SN_env * z) {
+    z->ket = z->c; /* [, line 83 */
+    {   int m = z->l - z->c; /* or, line 83 */
+        if (!(eq_s_b(z, 1, s_10))) goto lab1;
+        goto lab0;
+    lab1:
+        z->c = z->l - m;
+        if (!(eq_s_b(z, 1, s_11))) return 0;
+    }
+lab0:
+    z->bra = z->c; /* ], line 83 */
+    if (!(out_grouping_b(z, g_v, 97, 121))) return 0;
+    {   int m = z->l - z->c; /* not, line 84 */
+        if (z->c > z->lb) goto lab2; /* atlimit, line 84 */
+        return 0;
+    lab2:
+        z->c = z->l - m;
+    }
+    slice_from_s(z, 1, s_12); /* <-, line 85 */
+    return 1;
+}
+
+static int r_Step_2(struct SN_env * z) {
+    int among_var;
+    z->ket = z->c; /* [, line 89 */
+    among_var = find_among_b(z, a_4, 24); /* substring, line 89 */
+    if (!(among_var)) return 0;
+    z->bra = z->c; /* ], line 89 */
+    if (!r_R1(z)) return 0; /* call R1, line 89 */
+    switch(among_var) {
+        case 0: return 0;
+        case 1:
+            slice_from_s(z, 4, s_13); /* <-, line 90 */
+            break;
+        case 2:
+            slice_from_s(z, 4, s_14); /* <-, line 91 */
+            break;
+        case 3:
+            slice_from_s(z, 4, s_15); /* <-, line 92 */
+            break;
+        case 4:
+            slice_from_s(z, 4, s_16); /* <-, line 93 */
+            break;
+        case 5:
+            slice_from_s(z, 3, s_17); /* <-, line 94 */
+            break;
+        case 6:
+            slice_from_s(z, 3, s_18); /* <-, line 96 */
+            break;
+        case 7:
+            slice_from_s(z, 3, s_19); /* <-, line 98 */
+            break;
+        case 8:
+            slice_from_s(z, 2, s_20); /* <-, line 100 */
+            break;
+        case 9:
+            slice_from_s(z, 3, s_21); /* <-, line 101 */
+            break;
+        case 10:
+            slice_from_s(z, 3, s_22); /* <-, line 103 */
+            break;
+        case 11:
+            slice_from_s(z, 3, s_23); /* <-, line 105 */
+            break;
+        case 12:
+            slice_from_s(z, 3, s_24); /* <-, line 107 */
+            break;
+        case 13:
+            if (!(eq_s_b(z, 1, s_25))) return 0;
+            slice_from_s(z, 2, s_26); /* <-, line 108 */
+            break;
+        case 14:
+            slice_from_s(z, 3, s_27); /* <-, line 109 */
+            break;
+        case 15:
+            slice_from_s(z, 4, s_28); /* <-, line 110 */
+            break;
+        case 16:
+            if (!(in_grouping_b(z, g_valid_LI, 99, 116))) return 0;
+            slice_del(z); /* delete, line 111 */
+            break;
+    }
+    return 1;
+}
+
+static int r_Step_3(struct SN_env * z) {
+    int among_var;
+    z->ket = z->c; /* [, line 116 */
+    among_var = find_among_b(z, a_5, 9); /* substring, line 116 */
+    if (!(among_var)) return 0;
+    z->bra = z->c; /* ], line 116 */
+    if (!r_R1(z)) return 0; /* call R1, line 116 */
+    switch(among_var) {
+        case 0: return 0;
+        case 1:
+            slice_from_s(z, 4, s_29); /* <-, line 117 */
+            break;
+        case 2:
+            slice_from_s(z, 3, s_30); /* <-, line 118 */
+            break;
+        case 3:
+            slice_from_s(z, 2, s_31); /* <-, line 119 */
+            break;
+        case 4:
+            slice_from_s(z, 2, s_32); /* <-, line 121 */
+            break;
+        case 5:
+            slice_del(z); /* delete, line 123 */
+            break;
+        case 6:
+            if (!r_R2(z)) return 0; /* call R2, line 125 */
+            slice_del(z); /* delete, line 125 */
+            break;
+    }
+    return 1;
+}
+
+static int r_Step_4(struct SN_env * z) {
+    int among_var;
+    z->ket = z->c; /* [, line 130 */
+    among_var = find_among_b(z, a_6, 18); /* substring, line 130 */
+    if (!(among_var)) return 0;
+    z->bra = z->c; /* ], line 130 */
+    if (!r_R2(z)) return 0; /* call R2, line 130 */
+    switch(among_var) {
+        case 0: return 0;
+        case 1:
+            slice_del(z); /* delete, line 133 */
+            break;
+        case 2:
+            {   int m = z->l - z->c; /* or, line 134 */
+                if (!(eq_s_b(z, 1, s_33))) goto lab1;
+                goto lab0;
+            lab1:
+                z->c = z->l - m;
+                if (!(eq_s_b(z, 1, s_34))) return 0;
+            }
+        lab0:
+            slice_del(z); /* delete, line 134 */
+            break;
+    }
+    return 1;
+}
+
+static int r_Step_5(struct SN_env * z) {
+    int among_var;
+    z->ket = z->c; /* [, line 139 */
+    among_var = find_among_b(z, a_7, 2); /* substring, line 139 */
+    if (!(among_var)) return 0;
+    z->bra = z->c; /* ], line 139 */
+    switch(among_var) {
+        case 0: return 0;
+        case 1:
+            {   int m = z->l - z->c; /* or, line 140 */
+                if (!r_R2(z)) goto lab1; /* call R2, line 140 */
+                goto lab0;
+            lab1:
+                z->c = z->l - m;
+                if (!r_R1(z)) return 0; /* call R1, line 140 */
+                {   int m = z->l - z->c; /* not, line 140 */
+                    if (!r_shortv(z)) goto lab2; /* call shortv, line 140 */
+                    return 0;
+                lab2:
+                    z->c = z->l - m;
+                }
+            }
+        lab0:
+            slice_del(z); /* delete, line 140 */
+            break;
+        case 2:
+            if (!r_R2(z)) return 0; /* call R2, line 141 */
+            if (!(eq_s_b(z, 1, s_35))) return 0;
+            slice_del(z); /* delete, line 141 */
+            break;
+    }
+    return 1;
+}
+
+static int r_exception2(struct SN_env * z) {
+    z->ket = z->c; /* [, line 147 */
+    if (!(find_among_b(z, a_8, 8))) return 0; /* substring, line 147 */
+    z->bra = z->c; /* ], line 147 */
+    if (z->c > z->lb) return 0; /* atlimit, line 147 */
+    return 1;
+}
+
+static int r_exception1(struct SN_env * z) {
+    int among_var;
+    z->bra = z->c; /* [, line 159 */
+    among_var = find_among(z, a_9, 18); /* substring, line 159 */
+    if (!(among_var)) return 0;
+    z->ket = z->c; /* ], line 159 */
+    if (z->c < z->l) return 0; /* atlimit, line 159 */
+    switch(among_var) {
+        case 0: return 0;
+        case 1:
+            slice_from_s(z, 3, s_36); /* <-, line 163 */
+            break;
+        case 2:
+            slice_from_s(z, 3, s_37); /* <-, line 164 */
+            break;
+        case 3:
+            slice_from_s(z, 3, s_38); /* <-, line 165 */
+            break;
+        case 4:
+            slice_from_s(z, 3, s_39); /* <-, line 166 */
+            break;
+        case 5:
+            slice_from_s(z, 3, s_40); /* <-, line 167 */
+            break;
+        case 6:
+            slice_from_s(z, 3, s_41); /* <-, line 171 */
+            break;
+        case 7:
+            slice_from_s(z, 5, s_42); /* <-, line 172 */
+            break;
+        case 8:
+            slice_from_s(z, 4, s_43); /* <-, line 173 */
+            break;
+        case 9:
+            slice_from_s(z, 5, s_44); /* <-, line 174 */
+            break;
+        case 10:
+            slice_from_s(z, 4, s_45); /* <-, line 175 */
+            break;
+        case 11:
+            slice_from_s(z, 5, s_46); /* <-, line 176 */
+            break;
+    }
+    return 1;
+}
+
+static int r_postlude(struct SN_env * z) {
+    if (!(z->B[0])) return 0; /* Boolean test Y_found, line 192 */
+    while(1) { /* repeat, line 192 */
+        int c = z->c;
+        while(1) { /* goto, line 192 */
+            int c = z->c;
+            z->bra = z->c; /* [, line 192 */
+            if (!(eq_s(z, 1, s_47))) goto lab1;
+            z->ket = z->c; /* ], line 192 */
+            z->c = c;
+            break;
+        lab1:
+            z->c = c;
+            if (z->c >= z->l) goto lab0;
+            z->c++;
+        }
+        slice_from_s(z, 1, s_48); /* <-, line 192 */
+        continue;
+    lab0:
+        z->c = c;
+        break;
+    }
+    return 1;
+}
+
+extern int english_stem(struct SN_env * z) {
+    {   int c = z->c; /* or, line 196 */
+        if (!r_exception1(z)) goto lab1; /* call exception1, line 196 */
+        goto lab0;
+    lab1:
+        z->c = c;
+        {   int c_test = z->c; /* test, line 198 */
+            {   int c = z->c + 3;
+                if (0 > c || c > z->l) return 0;
+                z->c = c; /* hop, line 198 */
+            }
+            z->c = c_test;
+        }
+        {   int c = z->c; /* do, line 199 */
+            if (!r_prelude(z)) goto lab2; /* call prelude, line 199 */
+        lab2:
+            z->c = c;
+        }
+        {   int c = z->c; /* do, line 200 */
+            if (!r_mark_regions(z)) goto lab3; /* call mark_regions, line 200 */
+        lab3:
+            z->c = c;
+        }
+        z->lb = z->c; z->c = z->l; /* backwards, line 201 */
+
+        {   int m = z->l - z->c; /* do, line 203 */
+            if (!r_Step_1a(z)) goto lab4; /* call Step_1a, line 203 */
+        lab4:
+            z->c = z->l - m;
+        }
+        {   int m = z->l - z->c; /* or, line 205 */
+            if (!r_exception2(z)) goto lab6; /* call exception2, line 205 */
+            goto lab5;
+        lab6:
+            z->c = z->l - m;
+            {   int m = z->l - z->c; /* do, line 207 */
+                if (!r_Step_1b(z)) goto lab7; /* call Step_1b, line 207 */
+            lab7:
+                z->c = z->l - m;
+            }
+            {   int m = z->l - z->c; /* do, line 208 */
+                if (!r_Step_1c(z)) goto lab8; /* call Step_1c, line 208 */
+            lab8:
+                z->c = z->l - m;
+            }
+            {   int m = z->l - z->c; /* do, line 210 */
+                if (!r_Step_2(z)) goto lab9; /* call Step_2, line 210 */
+            lab9:
+                z->c = z->l - m;
+            }
+            {   int m = z->l - z->c; /* do, line 211 */
+                if (!r_Step_3(z)) goto lab10; /* call Step_3, line 211 */
+            lab10:
+                z->c = z->l - m;
+            }
+            {   int m = z->l - z->c; /* do, line 212 */
+                if (!r_Step_4(z)) goto lab11; /* call Step_4, line 212 */
+            lab11:
+                z->c = z->l - m;
+            }
+            {   int m = z->l - z->c; /* do, line 214 */
+                if (!r_Step_5(z)) goto lab12; /* call Step_5, line 214 */
+            lab12:
+                z->c = z->l - m;
+            }
+        }
+    lab5:
+        z->c = z->lb;
+        {   int c = z->c; /* do, line 217 */
+            if (!r_postlude(z)) goto lab13; /* call postlude, line 217 */
+        lab13:
+            z->c = c;
+        }
+    }
+lab0:
+    return 1;
+}
+
+extern struct SN_env * english_create_env(void) { return SN_create_env(0, 2, 1); }
+
+extern void english_close_env(struct SN_env * z) { SN_close_env(z); }
+


diff --git a/contrib/tsearch2/snowball/english_stem.h b/contrib/tsearch2/snowball/english_stem.h

new file mode 100644 (file)

index 0000000..bfefcd5


--- /dev/null
+++ b/contrib/tsearch2/snowball/english_stem.h
@@ -0,0 +1,8 @@
+
+/* This file was generated automatically by the Snowball to ANSI C compiler */
+
+extern struct SN_env * english_create_env(void);
+extern void english_close_env(struct SN_env * z);
+
+extern int english_stem(struct SN_env * z);
+


diff --git a/contrib/tsearch2/snowball/header.h b/contrib/tsearch2/snowball/header.h

new file mode 100644 (file)

index 0000000..aaec3ae


--- /dev/null
+++ b/contrib/tsearch2/snowball/header.h
@@ -0,0 +1,57 @@
+
+#include 
+
+#include "api.h"
+
+#define MAXINT INT_MAX
+#define MININT INT_MIN
+
+#define HEAD 2*sizeof(int)
+
+#define SIZE(p)        ((int *)(p))[-1]
+#define SET_SIZE(p, n) ((int *)(p))[-1] = n
+#define CAPACITY(p)    ((int *)(p))[-2]
+
+struct among
+{   int s_size;     /* number of chars in string */
+    symbol * s;       /* search string */
+    int substring_i;/* index to longest matching substring */
+    int result;     /* result of the lookup */
+    int (* function)(struct SN_env *);
+};
+
+extern symbol * create_s(void);
+extern void lose_s(symbol * p);
+
+extern int in_grouping(struct SN_env * z, unsigned char * s, int min, int max);
+extern int in_grouping_b(struct SN_env * z, unsigned char * s, int min, int max);
+extern int out_grouping(struct SN_env * z, unsigned char * s, int min, int max);
+extern int out_grouping_b(struct SN_env * z, unsigned char * s, int min, int max);
+
+extern int in_range(struct SN_env * z, int min, int max);
+extern int in_range_b(struct SN_env * z, int min, int max);
+extern int out_range(struct SN_env * z, int min, int max);
+extern int out_range_b(struct SN_env * z, int min, int max);
+
+extern int eq_s(struct SN_env * z, int s_size, symbol * s);
+extern int eq_s_b(struct SN_env * z, int s_size, symbol * s);
+extern int eq_v(struct SN_env * z, symbol * p);
+extern int eq_v_b(struct SN_env * z, symbol * p);
+
+extern int find_among(struct SN_env * z, struct among * v, int v_size);
+extern int find_among_b(struct SN_env * z, struct among * v, int v_size);
+
+extern symbol * increase_size(symbol * p, int n);
+extern int replace_s(struct SN_env * z, int c_bra, int c_ket, int s_size, const symbol * s);
+extern void slice_from_s(struct SN_env * z, int s_size, symbol * s);
+extern void slice_from_v(struct SN_env * z, symbol * p);
+extern void slice_del(struct SN_env * z);
+
+extern void insert_s(struct SN_env * z, int bra, int ket, int s_size, symbol * s);
+extern void insert_v(struct SN_env * z, int bra, int ket, symbol * p);
+
+extern symbol * slice_to(struct SN_env * z, symbol * p);
+extern symbol * assign_to(struct SN_env * z, symbol * p);
+
+extern void debug(struct SN_env * z, int number, int line_count);
+


diff --git a/contrib/tsearch2/snowball/russian_stem.c b/contrib/tsearch2/snowball/russian_stem.c

new file mode 100644 (file)

index 0000000..14fd491


--- /dev/null
+++ b/contrib/tsearch2/snowball/russian_stem.c
@@ -0,0 +1,626 @@
+
+/* This file was generated automatically by the Snowball to ANSI C compiler */
+
+#include "header.h"
+
+extern int russian_stem(struct SN_env * z);
+static int r_tidy_up(struct SN_env * z);
+static int r_derivational(struct SN_env * z);
+static int r_noun(struct SN_env * z);
+static int r_verb(struct SN_env * z);
+static int r_reflexive(struct SN_env * z);
+static int r_adjectival(struct SN_env * z);
+static int r_adjective(struct SN_env * z);
+static int r_perfective_gerund(struct SN_env * z);
+static int r_R2(struct SN_env * z);
+static int r_mark_regions(struct SN_env * z);
+
+extern struct SN_env * russian_create_env(void);
+extern void russian_close_env(struct SN_env * z);
+
+static symbol s_0_0[3] = { 215, 219, 201 };
+static symbol s_0_1[4] = { 201, 215, 219, 201 };
+static symbol s_0_2[4] = { 217, 215, 219, 201 };
+static symbol s_0_3[1] = { 215 };
+static symbol s_0_4[2] = { 201, 215 };
+static symbol s_0_5[2] = { 217, 215 };
+static symbol s_0_6[5] = { 215, 219, 201, 211, 216 };
+static symbol s_0_7[6] = { 201, 215, 219, 201, 211, 216 };
+static symbol s_0_8[6] = { 217, 215, 219, 201, 211, 216 };
+
+static struct among a_0[9] =
+{
+/*  0 */ { 3, s_0_0, -1, 1, 0},
+/*  1 */ { 4, s_0_1, 0, 2, 0},
+/*  2 */ { 4, s_0_2, 0, 2, 0},
+/*  3 */ { 1, s_0_3, -1, 1, 0},
+/*  4 */ { 2, s_0_4, 3, 2, 0},
+/*  5 */ { 2, s_0_5, 3, 2, 0},
+/*  6 */ { 5, s_0_6, -1, 1, 0},
+/*  7 */ { 6, s_0_7, 6, 2, 0},
+/*  8 */ { 6, s_0_8, 6, 2, 0}
+};
+
+static symbol s_1_0[2] = { 192, 192 };
+static symbol s_1_1[2] = { 197, 192 };
+static symbol s_1_2[2] = { 207, 192 };
+static symbol s_1_3[2] = { 213, 192 };
+static symbol s_1_4[2] = { 197, 197 };
+static symbol s_1_5[2] = { 201, 197 };
+static symbol s_1_6[2] = { 207, 197 };
+static symbol s_1_7[2] = { 217, 197 };
+static symbol s_1_8[2] = { 201, 200 };
+static symbol s_1_9[2] = { 217, 200 };
+static symbol s_1_10[3] = { 201, 205, 201 };
+static symbol s_1_11[3] = { 217, 205, 201 };
+static symbol s_1_12[2] = { 197, 202 };
+static symbol s_1_13[2] = { 201, 202 };
+static symbol s_1_14[2] = { 207, 202 };
+static symbol s_1_15[2] = { 217, 202 };
+static symbol s_1_16[2] = { 197, 205 };
+static symbol s_1_17[2] = { 201, 205 };
+static symbol s_1_18[2] = { 207, 205 };
+static symbol s_1_19[2] = { 217, 205 };
+static symbol s_1_20[3] = { 197, 199, 207 };
+static symbol s_1_21[3] = { 207, 199, 207 };
+static symbol s_1_22[2] = { 193, 209 };
+static symbol s_1_23[2] = { 209, 209 };
+static symbol s_1_24[3] = { 197, 205, 213 };
+static symbol s_1_25[3] = { 207, 205, 213 };
+
+static struct among a_1[26] =
+{
+/*  0 */ { 2, s_1_0, -1, 1, 0},
+/*  1 */ { 2, s_1_1, -1, 1, 0},
+/*  2 */ { 2, s_1_2, -1, 1, 0},
+/*  3 */ { 2, s_1_3, -1, 1, 0},
+/*  4 */ { 2, s_1_4, -1, 1, 0},
+/*  5 */ { 2, s_1_5, -1, 1, 0},
+/*  6 */ { 2, s_1_6, -1, 1, 0},
+/*  7 */ { 2, s_1_7, -1, 1, 0},
+/*  8 */ { 2, s_1_8, -1, 1, 0},
+/*  9 */ { 2, s_1_9, -1, 1, 0},
+/* 10 */ { 3, s_1_10, -1, 1, 0},
+/* 11 */ { 3, s_1_11, -1, 1, 0},
+/* 12 */ { 2, s_1_12, -1, 1, 0},
+/* 13 */ { 2, s_1_13, -1, 1, 0},
+/* 14 */ { 2, s_1_14, -1, 1, 0},
+/* 15 */ { 2, s_1_15, -1, 1, 0},
+/* 16 */ { 2, s_1_16, -1, 1, 0},
+/* 17 */ { 2, s_1_17, -1, 1, 0},
+/* 18 */ { 2, s_1_18, -1, 1, 0},
+/* 19 */ { 2, s_1_19, -1, 1, 0},
+/* 20 */ { 3, s_1_20, -1, 1, 0},
+/* 21 */ { 3, s_1_21, -1, 1, 0},
+/* 22 */ { 2, s_1_22, -1, 1, 0},
+/* 23 */ { 2, s_1_23, -1, 1, 0},
+/* 24 */ { 3, s_1_24, -1, 1, 0},
+/* 25 */ { 3, s_1_25, -1, 1, 0}
+};
+
+static symbol s_2_0[2] = { 197, 205 };
+static symbol s_2_1[2] = { 206, 206 };
+static symbol s_2_2[2] = { 215, 219 };
+static symbol s_2_3[3] = { 201, 215, 219 };
+static symbol s_2_4[3] = { 217, 215, 219 };
+static symbol s_2_5[1] = { 221 };
+static symbol s_2_6[2] = { 192, 221 };
+static symbol s_2_7[3] = { 213, 192, 221 };
+
+static struct among a_2[8] =
+{
+/*  0 */ { 2, s_2_0, -1, 1, 0},
+/*  1 */ { 2, s_2_1, -1, 1, 0},
+/*  2 */ { 2, s_2_2, -1, 1, 0},
+/*  3 */ { 3, s_2_3, 2, 2, 0},
+/*  4 */ { 3, s_2_4, 2, 2, 0},
+/*  5 */ { 1, s_2_5, -1, 1, 0},
+/*  6 */ { 2, s_2_6, 5, 1, 0},
+/*  7 */ { 3, s_2_7, 6, 2, 0}
+};
+
+static symbol s_3_0[2] = { 211, 209 };
+static symbol s_3_1[2] = { 211, 216 };
+
+static struct among a_3[2] =
+{
+/*  0 */ { 2, s_3_0, -1, 1, 0},
+/*  1 */ { 2, s_3_1, -1, 1, 0}
+};
+
+static symbol s_4_0[1] = { 192 };
+static symbol s_4_1[2] = { 213, 192 };
+static symbol s_4_2[2] = { 204, 193 };
+static symbol s_4_3[3] = { 201, 204, 193 };
+static symbol s_4_4[3] = { 217, 204, 193 };
+static symbol s_4_5[2] = { 206, 193 };
+static symbol s_4_6[3] = { 197, 206, 193 };
+static symbol s_4_7[3] = { 197, 212, 197 };
+static symbol s_4_8[3] = { 201, 212, 197 };
+static symbol s_4_9[3] = { 202, 212, 197 };
+static symbol s_4_10[4] = { 197, 202, 212, 197 };
+static symbol s_4_11[4] = { 213, 202, 212, 197 };
+static symbol s_4_12[2] = { 204, 201 };
+static symbol s_4_13[3] = { 201, 204, 201 };
+static symbol s_4_14[3] = { 217, 204, 201 };
+static symbol s_4_15[1] = { 202 };
+static symbol s_4_16[2] = { 197, 202 };
+static symbol s_4_17[2] = { 213, 202 };
+static symbol s_4_18[1] = { 204 };
+static symbol s_4_19[2] = { 201, 204 };
+static symbol s_4_20[2] = { 217, 204 };
+static symbol s_4_21[2] = { 197, 205 };
+static symbol s_4_22[2] = { 201, 205 };
+static symbol s_4_23[2] = { 217, 205 };
+static symbol s_4_24[1] = { 206 };
+static symbol s_4_25[2] = { 197, 206 };
+static symbol s_4_26[2] = { 204, 207 };
+static symbol s_4_27[3] = { 201, 204, 207 };
+static symbol s_4_28[3] = { 217, 204, 207 };
+static symbol s_4_29[2] = { 206, 207 };
+static symbol s_4_30[3] = { 197, 206, 207 };
+static symbol s_4_31[3] = { 206, 206, 207 };
+static symbol s_4_32[2] = { 192, 212 };
+static symbol s_4_33[3] = { 213, 192, 212 };
+static symbol s_4_34[2] = { 197, 212 };
+static symbol s_4_35[3] = { 213, 197, 212 };
+static symbol s_4_36[2] = { 201, 212 };
+static symbol s_4_37[2] = { 209, 212 };
+static symbol s_4_38[2] = { 217, 212 };
+static symbol s_4_39[2] = { 212, 216 };
+static symbol s_4_40[3] = { 201, 212, 216 };
+static symbol s_4_41[3] = { 217, 212, 216 };
+static symbol s_4_42[3] = { 197, 219, 216 };
+static symbol s_4_43[3] = { 201, 219, 216 };
+static symbol s_4_44[2] = { 206, 217 };
+static symbol s_4_45[3] = { 197, 206, 217 };
+
+static struct among a_4[46] =
+{
+/*  0 */ { 1, s_4_0, -1, 2, 0},
+/*  1 */ { 2, s_4_1, 0, 2, 0},
+/*  2 */ { 2, s_4_2, -1, 1, 0},
+/*  3 */ { 3, s_4_3, 2, 2, 0},
+/*  4 */ { 3, s_4_4, 2, 2, 0},
+/*  5 */ { 2, s_4_5, -1, 1, 0},
+/*  6 */ { 3, s_4_6, 5, 2, 0},
+/*  7 */ { 3, s_4_7, -1, 1, 0},
+/*  8 */ { 3, s_4_8, -1, 2, 0},
+/*  9 */ { 3, s_4_9, -1, 1, 0},
+/* 10 */ { 4, s_4_10, 9, 2, 0},
+/* 11 */ { 4, s_4_11, 9, 2, 0},
+/* 12 */ { 2, s_4_12, -1, 1, 0},
+/* 13 */ { 3, s_4_13, 12, 2, 0},
+/* 14 */ { 3, s_4_14, 12, 2, 0},
+/* 15 */ { 1, s_4_15, -1, 1, 0},
+/* 16 */ { 2, s_4_16, 15, 2, 0},
+/* 17 */ { 2, s_4_17, 15, 2, 0},
+/* 18 */ { 1, s_4_18, -1, 1, 0},
+/* 19 */ { 2, s_4_19, 18, 2, 0},
+/* 20 */ { 2, s_4_20, 18, 2, 0},
+/* 21 */ { 2, s_4_21, -1, 1, 0},
+/* 22 */ { 2, s_4_22, -1, 2, 0},
+/* 23 */ { 2, s_4_23, -1, 2, 0},
+/* 24 */ { 1, s_4_24, -1, 1, 0},
+/* 25 */ { 2, s_4_25, 24, 2, 0},
+/* 26 */ { 2, s_4_26, -1, 1, 0},
+/* 27 */ { 3, s_4_27, 26, 2, 0},
+/* 28 */ { 3, s_4_28, 26, 2, 0},
+/* 29 */ { 2, s_4_29, -1, 1, 0},
+/* 30 */ { 3, s_4_30, 29, 2, 0},
+/* 31 */ { 3, s_4_31, 29, 1, 0},
+/* 32 */ { 2, s_4_32, -1, 1, 0},
+/* 33 */ { 3, s_4_33, 32, 2, 0},
+/* 34 */ { 2, s_4_34, -1, 1, 0},
+/* 35 */ { 3, s_4_35, 34, 2, 0},
+/* 36 */ { 2, s_4_36, -1, 2, 0},
+/* 37 */ { 2, s_4_37, -1, 2, 0},
+/* 38 */ { 2, s_4_38, -1, 2, 0},
+/* 39 */ { 2, s_4_39, -1, 1, 0},
+/* 40 */ { 3, s_4_40, 39, 2, 0},
+/* 41 */ { 3, s_4_41, 39, 2, 0},
+/* 42 */ { 3, s_4_42, -1, 1, 0},
+/* 43 */ { 3, s_4_43, -1, 2, 0},
+/* 44 */ { 2, s_4_44, -1, 1, 0},
+/* 45 */ { 3, s_4_45, 44, 2, 0}
+};
+
+static symbol s_5_0[1] = { 192 };
+static symbol s_5_1[2] = { 201, 192 };
+static symbol s_5_2[2] = { 216, 192 };
+static symbol s_5_3[1] = { 193 };
+static symbol s_5_4[1] = { 197 };
+static symbol s_5_5[2] = { 201, 197 };
+static symbol s_5_6[2] = { 216, 197 };
+static symbol s_5_7[2] = { 193, 200 };
+static symbol s_5_8[2] = { 209, 200 };
+static symbol s_5_9[3] = { 201, 209, 200 };
+static symbol s_5_10[1] = { 201 };
+static symbol s_5_11[2] = { 197, 201 };
+static symbol s_5_12[2] = { 201, 201 };
+static symbol s_5_13[3] = { 193, 205, 201 };
+static symbol s_5_14[3] = { 209, 205, 201 };
+static symbol s_5_15[4] = { 201, 209, 205, 201 };
+static symbol s_5_16[1] = { 202 };
+static symbol s_5_17[2] = { 197, 202 };
+static symbol s_5_18[3] = { 201, 197, 202 };
+static symbol s_5_19[2] = { 201, 202 };
+static symbol s_5_20[2] = { 207, 202 };
+static symbol s_5_21[2] = { 193, 205 };
+static symbol s_5_22[2] = { 197, 205 };
+static symbol s_5_23[3] = { 201, 197, 205 };
+static symbol s_5_24[2] = { 207, 205 };
+static symbol s_5_25[2] = { 209, 205 };
+static symbol s_5_26[3] = { 201, 209, 205 };
+static symbol s_5_27[1] = { 207 };
+static symbol s_5_28[1] = { 209 };
+static symbol s_5_29[2] = { 201, 209 };
+static symbol s_5_30[2] = { 216, 209 };
+static symbol s_5_31[1] = { 213 };
+static symbol s_5_32[2] = { 197, 215 };
+static symbol s_5_33[2] = { 207, 215 };
+static symbol s_5_34[1] = { 216 };
+static symbol s_5_35[1] = { 217 };
+
+static struct among a_5[36] =
+{
+/*  0 */ { 1, s_5_0, -1, 1, 0},
+/*  1 */ { 2, s_5_1, 0, 1, 0},
+/*  2 */ { 2, s_5_2, 0, 1, 0},
+/*  3 */ { 1, s_5_3, -1, 1, 0},
+/*  4 */ { 1, s_5_4, -1, 1, 0},
+/*  5 */ { 2, s_5_5, 4, 1, 0},
+/*  6 */ { 2, s_5_6, 4, 1, 0},
+/*  7 */ { 2, s_5_7, -1, 1, 0},
+/*  8 */ { 2, s_5_8, -1, 1, 0},
+/*  9 */ { 3, s_5_9, 8, 1, 0},
+/* 10 */ { 1, s_5_10, -1, 1, 0},
+/* 11 */ { 2, s_5_11, 10, 1, 0},
+/* 12 */ { 2, s_5_12, 10, 1, 0},
+/* 13 */ { 3, s_5_13, 10, 1, 0},
+/* 14 */ { 3, s_5_14, 10, 1, 0},
+/* 15 */ { 4, s_5_15, 14, 1, 0},
+/* 16 */ { 1, s_5_16, -1, 1, 0},
+/* 17 */ { 2, s_5_17, 16, 1, 0},
+/* 18 */ { 3, s_5_18, 17, 1, 0},
+/* 19 */ { 2, s_5_19, 16, 1, 0},
+/* 20 */ { 2, s_5_20, 16, 1, 0},
+/* 21 */ { 2, s_5_21, -1, 1, 0},
+/* 22 */ { 2, s_5_22, -1, 1, 0},
+/* 23 */ { 3, s_5_23, 22, 1, 0},
+/* 24 */ { 2, s_5_24, -1, 1, 0},
+/* 25 */ { 2, s_5_25, -1, 1, 0},
+/* 26 */ { 3, s_5_26, 25, 1, 0},
+/* 27 */ { 1, s_5_27, -1, 1, 0},
+/* 28 */ { 1, s_5_28, -1, 1, 0},
+/* 29 */ { 2, s_5_29, 28, 1, 0},
+/* 30 */ { 2, s_5_30, 28, 1, 0},
+/* 31 */ { 1, s_5_31, -1, 1, 0},
+/* 32 */ { 2, s_5_32, -1, 1, 0},
+/* 33 */ { 2, s_5_33, -1, 1, 0},
+/* 34 */ { 1, s_5_34, -1, 1, 0},
+/* 35 */ { 1, s_5_35, -1, 1, 0}
+};
+
+static symbol s_6_0[3] = { 207, 211, 212 };
+static symbol s_6_1[4] = { 207, 211, 212, 216 };
+
+static struct among a_6[2] =
+{
+/*  0 */ { 3, s_6_0, -1, 1, 0},
+/*  1 */ { 4, s_6_1, -1, 1, 0}
+};
+
+static symbol s_7_0[4] = { 197, 202, 219, 197 };
+static symbol s_7_1[1] = { 206 };
+static symbol s_7_2[1] = { 216 };
+static symbol s_7_3[3] = { 197, 202, 219 };
+
+static struct among a_7[4] =
+{
+/*  0 */ { 4, s_7_0, -1, 1, 0},
+/*  1 */ { 1, s_7_1, -1, 2, 0},
+/*  2 */ { 1, s_7_2, -1, 3, 0},
+/*  3 */ { 3, s_7_3, -1, 1, 0}
+};
+
+static unsigned char g_v[] = { 35, 130, 34, 18 };
+
+static symbol s_0[] = { 193 };
+static symbol s_1[] = { 209 };
+static symbol s_2[] = { 193 };
+static symbol s_3[] = { 209 };
+static symbol s_4[] = { 193 };
+static symbol s_5[] = { 209 };
+static symbol s_6[] = { 206 };
+static symbol s_7[] = { 206 };
+static symbol s_8[] = { 206 };
+static symbol s_9[] = { 201 };
+
+static int r_mark_regions(struct SN_env * z) {
+    z->I[0] = z->l;
+    z->I[1] = z->l;
+    {   int c = z->c; /* do, line 100 */
+        while(1) { /* gopast, line 101 */
+            if (!(in_grouping(z, g_v, 192, 220))) goto lab1;
+            break;
+        lab1:
+            if (z->c >= z->l) goto lab0;
+            z->c++;
+        }
+        z->I[0] = z->c; /* setmark pV, line 101 */
+        while(1) { /* gopast, line 101 */
+            if (!(out_grouping(z, g_v, 192, 220))) goto lab2;
+            break;
+        lab2:
+            if (z->c >= z->l) goto lab0;
+            z->c++;
+        }
+        while(1) { /* gopast, line 102 */
+            if (!(in_grouping(z, g_v, 192, 220))) goto lab3;
+            break;
+        lab3:
+            if (z->c >= z->l) goto lab0;
+            z->c++;
+        }
+        while(1) { /* gopast, line 102 */
+            if (!(out_grouping(z, g_v, 192, 220))) goto lab4;
+            break;
+        lab4:
+            if (z->c >= z->l) goto lab0;
+            z->c++;
+        }
+        z->I[1] = z->c; /* setmark p2, line 102 */
+    lab0:
+        z->c = c;
+    }
+    return 1;
+}
+
+static int r_R2(struct SN_env * z) {
+    if (!(z->I[1] <= z->c)) return 0;
+    return 1;
+}
+
+static int r_perfective_gerund(struct SN_env * z) {
+    int among_var;
+    z->ket = z->c; /* [, line 111 */
+    among_var = find_among_b(z, a_0, 9); /* substring, line 111 */
+    if (!(among_var)) return 0;
+    z->bra = z->c; /* ], line 111 */
+    switch(among_var) {
+        case 0: return 0;
+        case 1:
+            {   int m = z->l - z->c; /* or, line 115 */
+                if (!(eq_s_b(z, 1, s_0))) goto lab1;
+                goto lab0;
+            lab1:
+                z->c = z->l - m;
+                if (!(eq_s_b(z, 1, s_1))) return 0;
+            }
+        lab0:
+            slice_del(z); /* delete, line 115 */
+            break;
+        case 2:
+            slice_del(z); /* delete, line 122 */
+            break;
+    }
+    return 1;
+}
+
+static int r_adjective(struct SN_env * z) {
+    int among_var;
+    z->ket = z->c; /* [, line 127 */
+    among_var = find_among_b(z, a_1, 26); /* substring, line 127 */
+    if (!(among_var)) return 0;
+    z->bra = z->c; /* ], line 127 */
+    switch(among_var) {
+        case 0: return 0;
+        case 1:
+            slice_del(z); /* delete, line 136 */
+            break;
+    }
+    return 1;
+}
+
+static int r_adjectival(struct SN_env * z) {
+    int among_var;
+    if (!r_adjective(z)) return 0; /* call adjective, line 141 */
+    {   int m = z->l - z->c; /* try, line 148 */
+        z->ket = z->c; /* [, line 149 */
+        among_var = find_among_b(z, a_2, 8); /* substring, line 149 */
+        if (!(among_var)) { z->c = z->l - m; goto lab0; }
+        z->bra = z->c; /* ], line 149 */
+        switch(among_var) {
+            case 0: { z->c = z->l - m; goto lab0; }
+            case 1:
+                {   int m = z->l - z->c; /* or, line 154 */
+                    if (!(eq_s_b(z, 1, s_2))) goto lab2;
+                    goto lab1;
+                lab2:
+                    z->c = z->l - m;
+                    if (!(eq_s_b(z, 1, s_3))) { z->c = z->l - m; goto lab0; }
+                }
+            lab1:
+                slice_del(z); /* delete, line 154 */
+                break;
+            case 2:
+                slice_del(z); /* delete, line 161 */
+                break;
+        }
+    lab0:
+        ;
+    }
+    return 1;
+}
+
+static int r_reflexive(struct SN_env * z) {
+    int among_var;
+    z->ket = z->c; /* [, line 168 */
+    among_var = find_among_b(z, a_3, 2); /* substring, line 168 */
+    if (!(among_var)) return 0;
+    z->bra = z->c; /* ], line 168 */
+    switch(among_var) {
+        case 0: return 0;
+        case 1:
+            slice_del(z); /* delete, line 171 */
+            break;
+    }
+    return 1;
+}
+
+static int r_verb(struct SN_env * z) {
+    int among_var;
+    z->ket = z->c; /* [, line 176 */
+    among_var = find_among_b(z, a_4, 46); /* substring, line 176 */
+    if (!(among_var)) return 0;
+    z->bra = z->c; /* ], line 176 */
+    switch(among_var) {
+        case 0: return 0;
+        case 1:
+            {   int m = z->l - z->c; /* or, line 182 */
+                if (!(eq_s_b(z, 1, s_4))) goto lab1;
+                goto lab0;
+            lab1:
+                z->c = z->l - m;
+                if (!(eq_s_b(z, 1, s_5))) return 0;
+            }
+        lab0:
+            slice_del(z); /* delete, line 182 */
+            break;
+        case 2:
+            slice_del(z); /* delete, line 190 */
+            break;
+    }
+    return 1;
+}
+
+static int r_noun(struct SN_env * z) {
+    int among_var;
+    z->ket = z->c; /* [, line 199 */
+    among_var = find_among_b(z, a_5, 36); /* substring, line 199 */
+    if (!(among_var)) return 0;
+    z->bra = z->c; /* ], line 199 */
+    switch(among_var) {
+        case 0: return 0;
+        case 1:
+            slice_del(z); /* delete, line 206 */
+            break;
+    }
+    return 1;
+}
+
+static int r_derivational(struct SN_env * z) {
+    int among_var;
+    z->ket = z->c; /* [, line 215 */
+    among_var = find_among_b(z, a_6, 2); /* substring, line 215 */
+    if (!(among_var)) return 0;
+    z->bra = z->c; /* ], line 215 */
+    if (!r_R2(z)) return 0; /* call R2, line 215 */
+    switch(among_var) {
+        case 0: return 0;
+        case 1:
+            slice_del(z); /* delete, line 218 */
+            break;
+    }
+    return 1;
+}
+
+static int r_tidy_up(struct SN_env * z) {
+    int among_var;
+    z->ket = z->c; /* [, line 223 */
+    among_var = find_among_b(z, a_7, 4); /* substring, line 223 */
+    if (!(among_var)) return 0;
+    z->bra = z->c; /* ], line 223 */
+    switch(among_var) {
+        case 0: return 0;
+        case 1:
+            slice_del(z); /* delete, line 227 */
+            z->ket = z->c; /* [, line 228 */
+            if (!(eq_s_b(z, 1, s_6))) return 0;
+            z->bra = z->c; /* ], line 228 */
+            if (!(eq_s_b(z, 1, s_7))) return 0;
+            slice_del(z); /* delete, line 228 */
+            break;
+        case 2:
+            if (!(eq_s_b(z, 1, s_8))) return 0;
+            slice_del(z); /* delete, line 231 */
+            break;
+        case 3:
+            slice_del(z); /* delete, line 233 */
+            break;
+    }
+    return 1;
+}
+
+extern int russian_stem(struct SN_env * z) {
+    {   int c = z->c; /* do, line 240 */
+        if (!r_mark_regions(z)) goto lab0; /* call mark_regions, line 240 */
+    lab0:
+        z->c = c;
+    }
+    z->lb = z->c; z->c = z->l; /* backwards, line 241 */
+
+    {   int m = z->l - z->c; /* setlimit, line 241 */
+        int m3;
+        if (z->c < z->I[0]) return 0;
+        z->c = z->I[0]; /* tomark, line 241 */
+        m3 = z->lb; z->lb = z->c;
+        z->c = z->l - m;
+        {   int m = z->l - z->c; /* do, line 242 */
+            {   int m = z->l - z->c; /* or, line 243 */
+                if (!r_perfective_gerund(z)) goto lab3; /* call perfective_gerund, line 243 */
+                goto lab2;
+            lab3:
+                z->c = z->l - m;
+                {   int m = z->l - z->c; /* try, line 244 */
+                    if (!r_reflexive(z)) { z->c = z->l - m; goto lab4; } /* call reflexive, line 244 */
+                lab4:
+                    ;
+                }
+                {   int m = z->l - z->c; /* or, line 245 */
+                    if (!r_adjectival(z)) goto lab6; /* call adjectival, line 245 */
+                    goto lab5;
+                lab6:
+                    z->c = z->l - m;
+                    if (!r_verb(z)) goto lab7; /* call verb, line 245 */
+                    goto lab5;
+                lab7:
+                    z->c = z->l - m;
+                    if (!r_noun(z)) goto lab1; /* call noun, line 245 */
+                }
+            lab5:
+                ;
+            }
+        lab2:
+        lab1:
+            z->c = z->l - m;
+        }
+        {   int m = z->l - z->c; /* try, line 248 */
+            z->ket = z->c; /* [, line 248 */
+            if (!(eq_s_b(z, 1, s_9))) { z->c = z->l - m; goto lab8; }
+            z->bra = z->c; /* ], line 248 */
+            slice_del(z); /* delete, line 248 */
+        lab8:
+            ;
+        }
+        {   int m = z->l - z->c; /* do, line 251 */
+            if (!r_derivational(z)) goto lab9; /* call derivational, line 251 */
+        lab9:
+            z->c = z->l - m;
+        }
+        {   int m = z->l - z->c; /* do, line 252 */
+            if (!r_tidy_up(z)) goto lab10; /* call tidy_up, line 252 */
+        lab10:
+            z->c = z->l - m;
+        }
+        z->lb = m3;
+    }
+    z->c = z->lb;
+    return 1;
+}
+
+extern struct SN_env * russian_create_env(void) { return SN_create_env(0, 2, 0); }
+
+extern void russian_close_env(struct SN_env * z) { SN_close_env(z); }
+


diff --git a/contrib/tsearch2/snowball/russian_stem.h b/contrib/tsearch2/snowball/russian_stem.h

new file mode 100644 (file)

index 0000000..7dc26d4


--- /dev/null
+++ b/contrib/tsearch2/snowball/russian_stem.h
@@ -0,0 +1,8 @@
+
+/* This file was generated automatically by the Snowball to ANSI C compiler */
+
+extern struct SN_env * russian_create_env(void);
+extern void russian_close_env(struct SN_env * z);
+
+extern int russian_stem(struct SN_env * z);
+


diff --git a/contrib/tsearch2/snowball/utilities.c b/contrib/tsearch2/snowball/utilities.c

new file mode 100644 (file)

index 0000000..5dc7524


--- /dev/null
+++ b/contrib/tsearch2/snowball/utilities.c
@@ -0,0 +1,328 @@
+
+#include 
+#include 
+#include 
+
+#include "header.h"
+
+#define unless(C) if(!(C))
+
+#define CREATE_SIZE 1
+
+extern symbol * create_s(void)
+{   symbol * p = (symbol *) (HEAD + (char *) malloc(HEAD + (CREATE_SIZE + 1) * sizeof(symbol)));
+    CAPACITY(p) = CREATE_SIZE;
+    SET_SIZE(p, CREATE_SIZE);
+    return p;
+}
+
+extern void lose_s(symbol * p) { free((char *) p - HEAD); }
+
+extern int in_grouping(struct SN_env * z, unsigned char * s, int min, int max)
+{   if (z->c >= z->l) return 0;
+    {   int ch = z->p[z->c];
+        if
+        (ch > max || (ch -= min) < 0 ||
+         (s[ch >> 3] & (0X1 << (ch & 0X7))) == 0) return 0;
+    }
+    z->c++; return 1;
+}
+
+extern int in_grouping_b(struct SN_env * z, unsigned char * s, int min, int max)
+{   if (z->c <= z->lb) return 0;
+    {   int ch = z->p[z->c - 1];
+        if
+        (ch > max || (ch -= min) < 0 ||
+         (s[ch >> 3] & (0X1 << (ch & 0X7))) == 0) return 0;
+    }
+    z->c--; return 1;
+}
+
+extern int out_grouping(struct SN_env * z, unsigned char * s, int min, int max)
+{   if (z->c >= z->l) return 0;
+    {   int ch = z->p[z->c];
+        unless
+        (ch > max || (ch -= min) < 0 ||
+         (s[ch >> 3] & (0X1 << (ch & 0X7))) == 0) return 0;
+    }
+    z->c++; return 1;
+}
+
+extern int out_grouping_b(struct SN_env * z, unsigned char * s, int min, int max)
+{   if (z->c <= z->lb) return 0;
+    {   int ch = z->p[z->c - 1];
+        unless
+        (ch > max || (ch -= min) < 0 ||
+         (s[ch >> 3] & (0X1 << (ch & 0X7))) == 0) return 0;
+    }
+    z->c--; return 1;
+}
+
+
+extern int in_range(struct SN_env * z, int min, int max)
+{   if (z->c >= z->l) return 0;
+    {   int ch = z->p[z->c];
+        if
+        (ch > max || ch < min) return 0;
+    }
+    z->c++; return 1;
+}
+
+extern int in_range_b(struct SN_env * z, int min, int max)
+{   if (z->c <= z->lb) return 0;
+    {   int ch = z->p[z->c - 1];
+        if
+        (ch > max || ch < min) return 0;
+    }
+    z->c--; return 1;
+}
+
+extern int out_range(struct SN_env * z, int min, int max)
+{   if (z->c >= z->l) return 0;
+    {   int ch = z->p[z->c];
+        unless
+        (ch > max || ch < min) return 0;
+    }
+    z->c++; return 1;
+}
+
+extern int out_range_b(struct SN_env * z, int min, int max)
+{   if (z->c <= z->lb) return 0;
+    {   int ch = z->p[z->c - 1];
+        unless
+        (ch > max || ch < min) return 0;
+    }
+    z->c--; return 1;
+}
+
+extern int eq_s(struct SN_env * z, int s_size, symbol * s)
+{   if (z->l - z->c < s_size ||
+        memcmp(z->p + z->c, s, s_size * sizeof(symbol)) != 0) return 0;
+    z->c += s_size; return 1;
+}
+
+extern int eq_s_b(struct SN_env * z, int s_size, symbol * s)
+{   if (z->c - z->lb < s_size ||
+        memcmp(z->p + z->c - s_size, s, s_size * sizeof(symbol)) != 0) return 0;
+    z->c -= s_size; return 1;
+}
+
+extern int eq_v(struct SN_env * z, symbol * p)
+{   return eq_s(z, SIZE(p), p);
+}
+
+extern int eq_v_b(struct SN_env * z, symbol * p)
+{   return eq_s_b(z, SIZE(p), p);
+}
+
+extern int find_among(struct SN_env * z, struct among * v, int v_size)
+{
+    int i = 0;
+    int j = v_size;
+
+    int c = z->c; int l = z->l;
+    symbol * q = z->p + c;
+
+    struct among * w;
+
+    int common_i = 0;
+    int common_j = 0;
+
+    int first_key_inspected = 0;
+
+    while(1)
+    {   int k = i + ((j - i) >> 1);
+        int diff = 0;
+        int common = common_i < common_j ? common_i : common_j; /* smaller */
+        w = v + k;
+        {   int i; for (i = common; i < w->s_size; i++)
+            {   if (c + common == l) { diff = -1; break; }
+                diff = q[common] - w->s[i];
+                if (diff != 0) break;
+                common++;
+            }
+        }
+        if (diff < 0) { j = k; common_j = common; }
+                 else { i = k; common_i = common; }
+        if (j - i <= 1)
+        {   if (i > 0) break; /* v->s has been inspected */
+            if (j == i) break; /* only one item in v */
+
+            /* - but now we need to go round once more to get
+               v->s inspected. This looks messy, but is actually
+               the optimal approach.  */
+
+            if (first_key_inspected) break;
+            first_key_inspected = 1;
+        }
+    }
+    while(1)
+    {   w = v + i;
+        if (common_i >= w->s_size)
+        {   z->c = c + w->s_size;
+            if (w->function == 0) return w->result;
+            {   int res = w->function(z);
+                z->c = c + w->s_size;
+                if (res) return w->result;
+            }
+        }
+        i = w->substring_i;
+        if (i < 0) return 0;
+    }
+}
+
+/* find_among_b is for backwards processing. Same comments apply */
+
+extern int find_among_b(struct SN_env * z, struct among * v, int v_size)
+{
+    int i = 0;
+    int j = v_size;
+
+    int c = z->c; int lb = z->lb;
+    symbol * q = z->p + c - 1;
+
+    struct among * w;
+
+    int common_i = 0;
+    int common_j = 0;
+
+    int first_key_inspected = 0;
+
+    while(1)
+    {   int k = i + ((j - i) >> 1);
+        int diff = 0;
+        int common = common_i < common_j ? common_i : common_j;
+        w = v + k;
+        {   int i; for (i = w->s_size - 1 - common; i >= 0; i--)
+            {   if (c - common == lb) { diff = -1; break; }
+                diff = q[- common] - w->s[i];
+                if (diff != 0) break;
+                common++;
+            }
+        }
+        if (diff < 0) { j = k; common_j = common; }
+                 else { i = k; common_i = common; }
+        if (j - i <= 1)
+        {   if (i > 0) break;
+            if (j == i) break;
+            if (first_key_inspected) break;
+            first_key_inspected = 1;
+        }
+    }
+    while(1)
+    {   w = v + i;
+        if (common_i >= w->s_size)
+        {   z->c = c - w->s_size;
+            if (w->function == 0) return w->result;
+            {   int res = w->function(z);
+                z->c = c - w->s_size;
+                if (res) return w->result;
+            }
+        }
+        i = w->substring_i;
+        if (i < 0) return 0;
+    }
+}
+
+
+extern symbol * increase_size(symbol * p, int n)
+{   int new_size = n + 20;
+    symbol * q = (symbol *) (HEAD + (char *) malloc(HEAD + (new_size + 1) * sizeof(symbol)));
+    CAPACITY(q) = new_size;
+    memmove(q, p, CAPACITY(p) * sizeof(symbol)); lose_s(p); return q;
+}
+
+/* to replace symbols between c_bra and c_ket in z->p by the
+   s_size symbols at s
+*/
+
+extern int replace_s(struct SN_env * z, int c_bra, int c_ket, int s_size, const symbol * s)
+{   int adjustment = s_size - (c_ket - c_bra);
+    int len = SIZE(z->p);
+    if (adjustment != 0)
+    {   if (adjustment + len > CAPACITY(z->p)) z->p = increase_size(z->p, adjustment + len);
+        memmove(z->p + c_ket + adjustment, z->p + c_ket, (len - c_ket) * sizeof(symbol));
+        SET_SIZE(z->p, adjustment + len);
+        z->l += adjustment;
+        if (z->c >= c_ket) z->c += adjustment; else
+            if (z->c > c_bra) z->c = c_bra;
+    }
+    unless (s_size == 0) memmove(z->p + c_bra, s, s_size * sizeof(symbol));
+    return adjustment;
+}
+
+static void slice_check(struct SN_env * z)
+{
+    if (!(0 <= z->bra &&
+          z->bra <= z->ket &&
+          z->ket <= z->l &&
+          z->l <= SIZE(z->p)))   /* this line could be removed */
+    {
+        fprintf(stderr, "faulty slice operation:\n");
+        debug(z, -1, 0);
+        exit(1);
+    }
+}
+
+extern void slice_from_s(struct SN_env * z, int s_size, symbol * s)
+{   slice_check(z);
+    replace_s(z, z->bra, z->ket, s_size, s);
+}
+
+extern void slice_from_v(struct SN_env * z, symbol * p)
+{   slice_from_s(z, SIZE(p), p);
+}
+
+extern void slice_del(struct SN_env * z)
+{   slice_from_s(z, 0, 0);
+}
+
+extern void insert_s(struct SN_env * z, int bra, int ket, int s_size, symbol * s)
+{   int adjustment = replace_s(z, bra, ket, s_size, s);
+    if (bra <= z->bra) z->bra += adjustment;
+    if (bra <= z->ket) z->ket += adjustment;
+}
+
+extern void insert_v(struct SN_env * z, int bra, int ket, symbol * p)
+{   int adjustment = replace_s(z, bra, ket, SIZE(p), p);
+    if (bra <= z->bra) z->bra += adjustment;
+    if (bra <= z->ket) z->ket += adjustment;
+}
+
+extern symbol * slice_to(struct SN_env * z, symbol * p)
+{   slice_check(z);
+    {   int len = z->ket - z->bra;
+        if (CAPACITY(p) < len) p = increase_size(p, len);
+        memmove(p, z->p + z->bra, len * sizeof(symbol));
+        SET_SIZE(p, len);
+    }
+    return p;
+}
+
+extern symbol * assign_to(struct SN_env * z, symbol * p)
+{   int len = z->l;
+    if (CAPACITY(p) < len) p = increase_size(p, len);
+    memmove(p, z->p, len * sizeof(symbol));
+    SET_SIZE(p, len);
+    return p;
+}
+
+extern void debug(struct SN_env * z, int number, int line_count)
+{   int i;
+    int limit = SIZE(z->p);
+    /*if (number >= 0) printf("%3d (line %4d): '", number, line_count);*/
+    if (number >= 0) printf("%3d (line %4d): [%d]'", number, line_count,limit);
+    for (i = 0; i <= limit; i++)
+    {   if (z->lb == i) printf("{");
+        if (z->bra == i) printf("[");
+        if (z->c == i) printf("|");
+        if (z->ket == i) printf("]");
+        if (z->l == i) printf("}");
+        if (i < limit)
+        {   int ch = z->p[i];
+            if (ch == 0) ch = '#';
+            printf("%c", ch);
+        }
+    }
+    printf("'\n");
+}


diff --git a/contrib/tsearch2/sql/tsearch2.sql b/contrib/tsearch2/sql/tsearch2.sql

new file mode 100644 (file)

index 0000000..6ca6480


--- /dev/null
+++ b/contrib/tsearch2/sql/tsearch2.sql
@@ -0,0 +1,243 @@
+--
+-- first, define the datatype.  Turn off echoing so that expected file
+-- does not depend on contents of seg.sql.
+--
+\set ECHO none
+\i tsearch2.sql
+\set ECHO all
+
+--tsvector
+SELECT '1'::tsvector;
+SELECT '1 '::tsvector;
+SELECT ' 1'::tsvector;
+SELECT ' 1 '::tsvector;
+SELECT '1 2'::tsvector;
+SELECT '\'1 2\''::tsvector;
+SELECT '\'1 \\\'2\''::tsvector;
+SELECT '\'1 \\\'2\'3'::tsvector;
+SELECT '\'1 \\\'2\' 3'::tsvector;
+SELECT '\'1 \\\'2\' \' 3\' 4 '::tsvector;
+select '\'w\':4A,3B,2C,1D,5 a:8';
+select 'a:3A b:2a'::tsvector || 'ba:1234 a:1B';
+select setweight('w:12B w:13* w:12,5,6 a:1,3* a:3 w asd:1dc asd zxc:81,567,222A'::tsvector, 'c');
+select strip('w:12B w:13* w:12,5,6 a:1,3* a:3 w asd:1dc asd'::tsvector);
+
+
+--tsquery
+SELECT '1'::tsquery;
+SELECT '1 '::tsquery;
+SELECT ' 1'::tsquery;
+SELECT ' 1 '::tsquery;
+SELECT '\'1 2\''::tsquery;
+SELECT '\'1 \\\'2\''::tsquery;
+SELECT '!1'::tsquery;
+SELECT '1|2'::tsquery;
+SELECT '1|!2'::tsquery;
+SELECT '!1|2'::tsquery;
+SELECT '!1|!2'::tsquery;
+SELECT '!(!1|!2)'::tsquery;
+SELECT '!(!1|2)'::tsquery;
+SELECT '!(1|!2)'::tsquery;
+SELECT '!(1|2)'::tsquery;
+SELECT '1&2'::tsquery;
+SELECT '!1&2'::tsquery;
+SELECT '1&!2'::tsquery;
+SELECT '!1&!2'::tsquery;
+SELECT '(1&2)'::tsquery;
+SELECT '1&(2)'::tsquery;
+SELECT '!(1)&2'::tsquery;
+SELECT '!(1&2)'::tsquery;
+SELECT '1|2&3'::tsquery;
+SELECT '1|(2&3)'::tsquery;
+SELECT '(1|2)&3'::tsquery;
+SELECT '1|2&!3'::tsquery;
+SELECT '1|!2&3'::tsquery;
+SELECT '!1|2&3'::tsquery;
+SELECT '!1|(2&3)'::tsquery;
+SELECT '!(1|2)&3'::tsquery;
+SELECT '(!1|2)&3'::tsquery;
+SELECT '1|(2|(4|(5|6)))'::tsquery;
+SELECT '1|2|4|5|6'::tsquery;
+SELECT '1&(2&(4&(5&6)))'::tsquery;
+SELECT '1&2&4&5&6'::tsquery;
+SELECT '1&(2&(4&(5|6)))'::tsquery;
+SELECT '1&(2&(4&(5|!6)))'::tsquery;
+SELECT '1&(\'2\'&(\' 4\'&(\\|5 | \'6 \\\' !|&\')))'::tsquery;
+SELECT '\'the wether\':dc & \' sKies \':BC & a:d b:a';
+
+select lexize('simple', 'ASD56 hsdkf');
+select lexize('en_stem', 'SKIES Problems identity');
+
+select * from token_type('default');
+select * from parse('default', '345 [email protected] \' http://www.com/ http://aew.werc.ewr/?ad=qwe&dw 1aew.werc.ewr/?ad=qwe&dw 2aew.werc.ewr http://3aew.werc.ewr/?ad=qwe&dw http://4aew.werc.ewr http://5aew.werc.ewr:8100/?  ad=qwe&dw 6aew.werc.ewr:8100/?ad=qwe&dw 7aew.werc.ewr:8100/?ad=qwe&dw=%20%32 +4.0e-10 qwe qwe qwqwe 234.435 455 5.005 [email protected] qwe-wer asdf qwer jf sdjk ewr1> ewri2 ">
+/usr/local/fff /awdf/dwqe/4325 rewt/ewr wefjn /wqe-324/ewr gist.h gist.h.c gist.c. readline 4.2 4.2. 4.2, readline-4.2 readline-4.2. 234 
+ wow  < jqw <> qwerty');
+
+SELECT to_tsvector('default', '345 [email protected] \' http://www.com/ http://aew.werc.ewr/?ad=qwe&dw 1aew.werc.ewr/?ad=qwe&dw 2aew.werc.ewr http://3aew.werc.ewr/?ad=qwe&dw http://4aew.werc.ewr http://5aew.werc.ewr:8100/?  ad=qwe&dw 6aew.werc.ewr:8100/?ad=qwe&dw 7aew.werc.ewr:8100/?ad=qwe&dw=%20%32 +4.0e-10 qwe qwe qwqwe 234.435 455 5.005 [email protected] qwe-wer asdf qwer jf sdjk ewr1> ewri2 ">
+/usr/local/fff /awdf/dwqe/4325 rewt/ewr wefjn /wqe-324/ewr gist.h gist.h.c gist.c. readline 4.2 4.2. 4.2, readline-4.2 readline-4.2. 234 
+ wow  < jqw <> qwerty');
+
+SELECT length(to_tsvector('default', '345 qw'));
+
+SELECT length(to_tsvector('default', '345 [email protected] \' http://www.com/ http://aew.werc.ewr/?ad=qwe&dw 1aew.werc.ewr/?ad=qwe&dw 2aew.werc.ewr http://3aew.werc.ewr/?ad=qwe&dw http://4aew.werc.ewr http://5aew.werc.ewr:8100/?  ad=qwe&dw 6aew.werc.ewr:8100/?ad=qwe&dw 7aew.werc.ewr:8100/?ad=qwe&dw=%20%32 +4.0e-10 qwe qwe qwqwe 234.435 455 5.005 [email protected] qwe-wer asdf qwer jf sdjk ewr1> ewri2 ">
+/usr/local/fff /awdf/dwqe/4325 rewt/ewr wefjn /wqe-324/ewr gist.h gist.h.c gist.c. readline 4.2 4.2. 4.2, readline-4.2 readline-4.2. 234 
+ wow  < jqw <> qwerty'));
+
+
+select to_tsquery('default', 'qwe & sKies '); 
+select to_tsquery('simple', 'qwe & sKies '); 
+select to_tsquery('default', '\'the wether\':dc & \'           sKies \':BC ');
+select 'a b:89  ca:23A,64b d:34c'::tsvector @@ 'd:AC & ca';
+select 'a b:89  ca:23A,64b d:34c'::tsvector @@ 'd:AC & ca:B';
+select 'a b:89  ca:23A,64b d:34c'::tsvector @@ 'd:AC & ca:A';
+select 'a b:89  ca:23A,64b d:34c'::tsvector @@ 'd:AC & ca:C';
+select 'a b:89  ca:23A,64b d:34c'::tsvector @@ 'd:AC & ca:CB';
+
+CREATE TABLE test_tsvector( t text, a tsvector );
+
+\copy test_tsvector from 'data/test_tsearch.data'
+
+SELECT count(*) FROM test_tsvector WHERE a @@ 'wr|qh';
+SELECT count(*) FROM test_tsvector WHERE a @@ 'wr&qh';
+SELECT count(*) FROM test_tsvector WHERE a @@ 'eq&yt';
+SELECT count(*) FROM test_tsvector WHERE a @@ 'eq|yt';
+SELECT count(*) FROM test_tsvector WHERE a @@ '(eq&yt)|(wr&qh)';
+SELECT count(*) FROM test_tsvector WHERE a @@ '(eq|yt)&(wr|qh)';
+
+create index wowidx on test_tsvector using gist (a);
+set enable_seqscan=off;
+
+SELECT count(*) FROM test_tsvector WHERE a @@ 'wr|qh';
+SELECT count(*) FROM test_tsvector WHERE a @@ 'wr&qh';
+SELECT count(*) FROM test_tsvector WHERE a @@ 'eq&yt';
+SELECT count(*) FROM test_tsvector WHERE a @@ 'eq|yt';
+SELECT count(*) FROM test_tsvector WHERE a @@ '(eq&yt)|(wr&qh)';
+SELECT count(*) FROM test_tsvector WHERE a @@ '(eq|yt)&(wr|qh)';
+
+select set_curcfg('default');
+
+CREATE TRIGGER tsvectorupdate
+BEFORE UPDATE OR INSERT ON test_tsvector
+FOR EACH ROW EXECUTE PROCEDURE tsearch2(a, t);
+
+SELECT count(*) FROM test_tsvector WHERE a @@ to_tsquery('345&qwerty');
+
+INSERT INTO test_tsvector (t) VALUES ('345 qwerty');
+
+SELECT count(*) FROM test_tsvector WHERE a @@ to_tsquery('345&qwerty');
+
+UPDATE test_tsvector SET t = null WHERE t = '345 qwerty';
+
+SELECT count(*) FROM test_tsvector WHERE a @@ to_tsquery('345&qwerty');
+
+drop trigger tsvectorupdate on test_tsvector;
+create function wow(text) returns text as 'select $1 || \' copyright\'; ' language sql;
+create trigger tsvectorupdate before update or insert on test_tsvector
+for each row execute procedure tsearch2(a, wow, t);
+insert into test_tsvector (t) values ('345 qwerty');
+select count(*) FROM test_tsvector WHERE a @@ to_tsquery('345&qwerty');
+select count(*) FROM test_tsvector WHERE a @@ to_tsquery('copyright');
+
+select rank(' a:1 s:2C d g'::tsvector, 'a | s');
+select rank(' a:1 s:2B d g'::tsvector, 'a | s');
+select rank(' a:1 s:2 d g'::tsvector, 'a | s');
+select rank(' a:1 s:2C d g'::tsvector, 'a & s');
+select rank(' a:1 s:2B d g'::tsvector, 'a & s');
+select rank(' a:1 s:2 d g'::tsvector, 'a & s');
+
+insert into test_tsvector (t) values ('foo bar foo the over foo qq bar');
+select * from stat('select a from test_tsvector') order by ndoc desc, nentry desc, word;
+
+select reset_tsearch();
+select to_tsquery('default', 'skies & books');
+
+select rank_cd(to_tsvector('Erosion It took the sea a thousand years,
+A thousand years to trace
+The granite features of this cliff
+In crag and scarp and base.
+It took the sea an hour one night
+An hour of storm to place
+The sculpture of these granite seams,
+Upon a woman s face. E.  J.  Pratt  (1882 1964)
+'), to_tsquery('sea&thousand&years'));
+
+select rank_cd(to_tsvector('Erosion It took the sea a thousand years,
+A thousand years to trace
+The granite features of this cliff
+In crag and scarp and base.
+It took the sea an hour one night
+An hour of storm to place
+The sculpture of these granite seams,
+Upon a woman s face. E.  J.  Pratt  (1882 1964)
+'), to_tsquery('granite&sea'));
+
+select rank_cd(to_tsvector('Erosion It took the sea a thousand years,
+A thousand years to trace
+The granite features of this cliff
+In crag and scarp and base.
+It took the sea an hour one night
+An hour of storm to place
+The sculpture of these granite seams,
+Upon a woman s face. E.  J.  Pratt  (1882 1964)
+'), to_tsquery('sea'));
+
+select get_covers(to_tsvector('Erosion It took the sea a thousand years,
+A thousand years to trace
+The granite features of this cliff
+In crag and scarp and base.
+It took the sea an hour one night
+An hour of storm to place
+The sculpture of these granite seams,
+Upon a woman s face. E.  J.  Pratt  (1882 1964)
+'), to_tsquery('sea&thousand&years'));
+
+select get_covers(to_tsvector('Erosion It took the sea a thousand years,
+A thousand years to trace
+The granite features of this cliff
+In crag and scarp and base.
+It took the sea an hour one night
+An hour of storm to place
+The sculpture of these granite seams,
+Upon a woman s face. E.  J.  Pratt  (1882 1964)
+'), to_tsquery('granite&sea'));
+
+select get_covers(to_tsvector('Erosion It took the sea a thousand years,
+A thousand years to trace
+The granite features of this cliff
+In crag and scarp and base.
+It took the sea an hour one night
+An hour of storm to place
+The sculpture of these granite seams,
+Upon a woman s face. E.  J.  Pratt  (1882 1964)
+'), to_tsquery('sea'));
+
+select headline('Erosion It took the sea a thousand years,
+A thousand years to trace
+The granite features of this cliff
+In crag and scarp and base.
+It took the sea an hour one night
+An hour of storm to place
+The sculpture of these granite seams,
+Upon a woman s face. E.  J.  Pratt  (1882 1964)
+', to_tsquery('sea&thousand&years'));
+ 
+select headline('Erosion It took the sea a thousand years,
+A thousand years to trace
+The granite features of this cliff
+In crag and scarp and base.
+It took the sea an hour one night
+An hour of storm to place
+The sculpture of these granite seams,
+Upon a woman s face. E.  J.  Pratt  (1882 1964)
+', to_tsquery('granite&sea'));
+ 
+select headline('Erosion It took the sea a thousand years,
+A thousand years to trace
+The granite features of this cliff
+In crag and scarp and base.
+It took the sea an hour one night
+An hour of storm to place
+The sculpture of these granite seams,
+Upon a woman s face. E.  J.  Pratt  (1882 1964)
+', to_tsquery('sea'));
+


diff --git a/contrib/tsearch2/stopword.c b/contrib/tsearch2/stopword.c

new file mode 100644 (file)

index 0000000..7f7806f


--- /dev/null
+++ b/contrib/tsearch2/stopword.c
@@ -0,0 +1,101 @@
+/* 
+ * stopword library
+ * Teodor Sigaev 
+ */
+#include 
+#include 
+#include 
+#include 
+
+#include "postgres.h"
+#include "common.h"
+#include "dict.h"
+
+#define STOPBUFLEN 4096
+
+char*
+lowerstr(char *str) {
+   char *ptr=str;
+   while(*ptr) {
+       *ptr = tolower(*(unsigned char*)ptr);
+       ptr++;
+   }
+   return str;
+}
+
+void
+freestoplist(StopList *s) {
+   char **ptr=s->stop;
+   if ( ptr )
+       while( *ptr && s->len >0 ) {
+           free(*ptr);
+           ptr++; s->len--;
+       free(s->stop);
+   }
+   memset(s,0,sizeof(StopList));
+}
+
+void
+readstoplist(text *in, StopList *s) {
+   char **stop=NULL;
+   s->len=0;
+   if ( in && VARSIZE(in) - VARHDRSZ > 0 ) {
+       char *filename=text2char(in);
+       FILE    *hin=NULL;
+       char    buf[STOPBUFLEN];
+       int reallen=0;
+
+       if ( (hin=fopen(filename,"r")) == NULL )
+           elog(ERROR,"Can't open file '%s': %s", filename, strerror(errno));
+       while( fgets(buf,STOPBUFLEN,hin) ) {
+           buf[strlen(buf)-1] = '\0';
+           if ( *buf=='\0' ) continue;
+
+           if ( s->len>= reallen ) {
+               char **tmp;
+               reallen=(reallen) ? reallen*2 : 16;
+               tmp=(char**)realloc((void*)stop, sizeof(char*)*reallen);
+               if (!tmp) {
+                   freestoplist(s);
+                   fclose(hin); 
+                   elog(ERROR,"Not enough memory");
+               }
+               stop=tmp;
+           }
+    
+           stop[s->len]=strdup(buf);
+           if ( !stop[s->len] ) {
+               freestoplist(s);
+               fclose(hin); 
+               elog(ERROR,"Not enough memory");
+           }
+           if ( s->wordop ) 
+               stop[s->len]=(s->wordop)(stop[s->len]);
+
+           (s->len)++; 
+       }
+       fclose(hin);
+       pfree(filename); 
+   }
+   s->stop=stop;
+} 
+
+static int
+comparestr(const void *a, const void *b) {
+   return strcmp( *(char**)a, *(char**)b );
+}
+
+void
+sortstoplist(StopList *s) {
+   if (s->stop && s->len>0)
+       qsort(s->stop, s->len, sizeof(char*), comparestr);
+}
+
+bool
+searchstoplist(StopList *s, char *key) {
+   if ( s->wordop ) 
+       key=(*(s->wordop))(key);
+   return ( s->stop && s->len>0 && bsearch(&key, s->stop, s->len, sizeof(char*), comparestr) ) ? true : false;
+}
+
+


diff --git a/contrib/tsearch2/stopword/english.stop b/contrib/tsearch2/stopword/english.stop

new file mode 100644 (file)

index 0000000..a913011


--- /dev/null
+++ b/contrib/tsearch2/stopword/english.stop
@@ -0,0 +1,128 @@
+i
+me
+my
+myself
+we
+our
+ours
+ourselves
+you
+your
+yours
+yourself
+yourselves
+he
+him
+his
+himself
+she
+her
+hers
+herself
+it
+its
+itself
+they
+them
+their
+theirs
+themselves
+what
+which
+who
+whom
+this
+that
+these
+those
+am
+is
+are
+was
+were
+be
+been
+being
+have
+has
+had
+having
+do
+does
+did
+doing
+a
+an
+the
+and
+but
+if
+or
+because
+as
+until
+while
+of
+at
+by
+for
+with
+about
+against
+between
+into
+through
+during
+before
+after
+above
+below
+to
+from
+up
+down
+in
+out
+on
+off
+over
+under
+again
+further
+then
+once
+here
+there
+when
+where
+why
+how
+all
+any
+both
+each
+few
+more
+most
+other
+some
+such
+no
+nor
+not
+only
+own
+same
+so
+than
+too
+very
+s
+t
+can
+will
+just
+don
+should
+now
+


diff --git a/contrib/tsearch2/stopword/russian.stop b/contrib/tsearch2/stopword/russian.stop

new file mode 100644 (file)

index 0000000..1877e3a


--- /dev/null
+++ b/contrib/tsearch2/stopword/russian.stop
@@ -0,0 +1,151 @@
+É
+×
+×Ï
+ÎÅ
+ÞÔÏ
+ÏÎ
+ÎÁ
+Ñ
+Ó
+ÓÏ
+ËÁË
+Á
+ÔÏ
+×ÓÅ
+ÏÎÁ
+ÔÁË
+ÅÇÏ
+ÎÏ
+ÄÁ
+ÔÙ
+Ë
+Õ
+ÖÅ
+×Ù
+ÚÁ
+ÂÙ
+ÐÏ
+ÔÏÌØËÏ
+ÅÅ
+ÍÎÅ
+ÂÙÌÏ
+×ÏÔ
+ÏÔ
+ÍÅÎÑ
+ÅÝÅ
+ÎÅÔ
+Ï
+ÉÚ
+ÅÍÕ
+ÔÅÐÅÒØ
+ËÏÇÄÁ
+ÄÁÖÅ
+ÎÕ
+×ÄÒÕÇ
+ÌÉ
+ÅÓÌÉ
+ÕÖÅ
+ÉÌÉ
+ÎÉ
+ÂÙÔØ
+ÂÙÌ
+ÎÅÇÏ
+ÄÏ
+×ÁÓ
+ÎÉÂÕÄØ
+ÏÐÑÔØ
+ÕÖ
+×ÁÍ
+×ÅÄØ
+ÔÁÍ
+ÐÏÔÏÍ
+ÓÅÂÑ
+ÎÉÞÅÇÏ
+ÅÊ
+ÍÏÖÅÔ
+ÏÎÉ
+ÔÕÔ
+ÇÄÅ
+ÅÓÔØ
+ÎÁÄÏ
+ÎÅÊ
+ÄÌÑ
+ÍÙ
+ÔÅÂÑ
+ÉÈ
+ÞÅÍ
+ÂÙÌÁ
+ÓÁÍ
+ÞÔÏÂ
+ÂÅÚ
+ÂÕÄÔÏ
+ÞÅÇÏ
+ÒÁÚ
+ÔÏÖÅ
+ÓÅÂÅ
+ÐÏÄ
+ÂÕÄÅÔ
+Ö
+ÔÏÇÄÁ
+ËÔÏ
+ÜÔÏÔ
+ÔÏÇÏ
+ÐÏÔÏÍÕ
+ÜÔÏÇÏ
+ËÁËÏÊ
+ÓÏ×ÓÅÍ
+ÎÉÍ
+ÚÄÅÓØ
+ÜÔÏÍ
+ÏÄÉÎ
+ÐÏÞÔÉ
+ÍÏÊ
+ÔÅÍ
+ÞÔÏÂÙ
+ÎÅÅ
+ÓÅÊÞÁÓ
+ÂÙÌÉ
+ËÕÄÁ
+ÚÁÞÅÍ
+×ÓÅÈ
+ÎÉËÏÇÄÁ
+ÍÏÖÎÏ
+ÐÒÉ
+ÎÁËÏÎÅÃ
+Ä×Á
+ÏÂ
+ÄÒÕÇÏÊ
+ÈÏÔØ
+ÐÏÓÌÅ
+ÎÁÄ
+ÂÏÌØÛÅ
+ÔÏÔ
+ÞÅÒÅÚ
+ÜÔÉ
+ÎÁÓ
+ÐÒÏ
+×ÓÅÇÏ
+ÎÉÈ
+ËÁËÁÑ
+ÍÎÏÇÏ
+ÒÁÚ×Å
+ÔÒÉ
+ÜÔÕ
+ÍÏÑ
+×ÐÒÏÞÅÍ
+ÈÏÒÏÛÏ
+Ó×ÏÀ
+ÜÔÏÊ
+ÐÅÒÅÄ
+ÉÎÏÇÄÁ
+ÌÕÞÛÅ
+ÞÕÔØ
+ÔÏÍ
+ÎÅÌØÚÑ
+ÔÁËÏÊ
+ÉÍ
+ÂÏÌÅÅ
+×ÓÅÇÄÁ
+ËÏÎÅÞÎÏ
+×ÓÀ
+ÍÅÖÄÕ


diff --git a/contrib/tsearch2/ts_cfg.c b/contrib/tsearch2/ts_cfg.c

new file mode 100644 (file)

index 0000000..7c9f20c


--- /dev/null
+++ b/contrib/tsearch2/ts_cfg.c
@@ -0,0 +1,509 @@
+/* 
+ * interface functions to tscfg 
+ * Teodor Sigaev 
+ */
+#include 
+#include 
+#include 
+#include 
+#include 
+
+#include "postgres.h"
+#include "fmgr.h"
+#include "utils/array.h"
+#include "catalog/pg_type.h"
+#include "executor/spi.h"
+
+#include "ts_cfg.h"
+#include "dict.h"
+#include "wparser.h"
+#include "snmap.h"
+#include "common.h"
+#include "tsvector.h"
+
+/*********top interface**********/
+
+static void *plan_getcfg_bylocale=NULL;
+static void *plan_getcfg=NULL;
+static void *plan_getmap=NULL;
+static void *plan_name2id=NULL;
+static Oid current_cfg_id=0;
+
+void
+init_cfg(Oid id, TSCfgInfo *cfg) {
+   Oid arg[2]={ OIDOID, OIDOID };
+   bool isnull;
+   Datum pars[2]={ ObjectIdGetDatum(id), ObjectIdGetDatum(id) } ;
+   int stat,i,j;
+   text *ptr;
+   text *prsname=NULL;
+   MemoryContext   oldcontext;
+
+   memset(cfg,0,sizeof(TSCfgInfo));
+   SPI_connect();
+   if ( !plan_getcfg ) {
+       plan_getcfg = SPI_saveplan( SPI_prepare( "select prs_name from pg_ts_cfg where oid = $1" , 1, arg ) );
+       if ( !plan_getcfg ) 
+           ts_error(ERROR, "SPI_prepare() failed");
+   }
+
+   stat = SPI_execp(plan_getcfg, pars, " ", 1);
+   if ( stat < 0 )
+       ts_error (ERROR, "SPI_execp return %d", stat);
+   if ( SPI_processed > 0 ) {
+       prsname = (text*) DatumGetPointer( 
+           SPI_getbinval(SPI_tuptable->vals[0], SPI_tuptable->tupdesc, 1, &isnull) 
+       );
+       oldcontext = MemoryContextSwitchTo(TopMemoryContext);
+       prsname = ptextdup( prsname );
+       MemoryContextSwitchTo(oldcontext);
+       
+       cfg->id=id;
+   } else 
+       ts_error(ERROR, "No tsearch cfg with id %d", id);
+
+   arg[0]=TEXTOID;
+   if ( !plan_getmap ) {
+       plan_getmap = SPI_saveplan( SPI_prepare( "select lt.tokid, pg_ts_cfgmap.dict_name from pg_ts_cfgmap, pg_ts_cfg, token_type( $1 ) as lt where lt.alias = pg_ts_cfgmap.tok_alias and pg_ts_cfgmap.ts_name = pg_ts_cfg.ts_name and pg_ts_cfg.oid= $2 order by lt.tokid desc;" , 2, arg ) );
+       if ( !plan_getmap )
+           ts_error(ERROR, "SPI_prepare() failed");
+   }
+
+   pars[0]=PointerGetDatum( prsname );
+   stat = SPI_execp(plan_getmap, pars, " ", 0);
+   if ( stat < 0 )
+       ts_error (ERROR, "SPI_execp return %d", stat);
+   if ( SPI_processed <= 0 )
+       ts_error(ERROR, "No parser with id %d", id);
+
+   for(i=0;i
+       int lexid = DatumGetInt32(SPI_getbinval(SPI_tuptable->vals[i], SPI_tuptable->tupdesc, 1, &isnull));
+       ArrayType *toasted_a = (ArrayType*)PointerGetDatum(SPI_getbinval(SPI_tuptable->vals[i], SPI_tuptable->tupdesc, 2, &isnull));
+       ArrayType *a;
+
+       if ( !cfg->map ) {
+           cfg->len=lexid+1;
+           cfg->map = (ListDictionary*)malloc( sizeof(ListDictionary)*cfg->len );
+           if ( !cfg->map )
+               ts_error(ERROR,"No memory");
+           memset( cfg->map, 0, sizeof(ListDictionary)*cfg->len );
+       }
+
+       if (isnull)
+           continue;
+
+       a=(ArrayType*)PointerGetDatum( PG_DETOAST_DATUM( DatumGetPointer(toasted_a) ) );
+       
+       if ( ARR_NDIM(a) != 1 )
+           ts_error(ERROR,"Wrong dimension");
+       if ( ARRNELEMS(a) < 1 )
+           continue;
+
+       cfg->map[lexid].len=ARRNELEMS(a);
+       cfg->map[lexid].dict_id=(Datum*)malloc( sizeof(Datum)*cfg->map[lexid].len );
+       memset(cfg->map[lexid].dict_id,0,sizeof(Datum)*cfg->map[lexid].len );
+       ptr=(text*)ARR_DATA_PTR(a);
+       oldcontext = MemoryContextSwitchTo(TopMemoryContext);
+       for(j=0;jmap[lexid].len;j++) {
+           cfg->map[lexid].dict_id[j] = PointerGetDatum(ptextdup(ptr));
+           ptr=NEXTVAL(ptr);
+       } 
+       MemoryContextSwitchTo(oldcontext);
+
+       if ( a != toasted_a ) 
+           pfree(a);
+   }
+   
+   SPI_finish();
+   cfg->prs_id = name2id_prs( prsname );
+   pfree(prsname);
+   for(i=0;ilen;i++) {
+       for(j=0;jmap[i].len;j++) {
+           ptr = (text*)DatumGetPointer( cfg->map[i].dict_id[j] );
+           cfg->map[i].dict_id[j] = ObjectIdGetDatum( name2id_dict(ptr) );
+           pfree(ptr);
+       }
+   }
+}
+
+typedef struct {
+   TSCfgInfo   *last_cfg;
+   int     len;
+   int     reallen;
+   TSCfgInfo   *list;
+   SNMap       name2id_map;
+} CFGList;
+
+static CFGList CList = {NULL,0,0,NULL,{0,0,NULL}};
+
+void
+reset_cfg(void) {
+        freeSNMap( &(CList.name2id_map) );
+        if ( CList.list ) {
+       int i,j;
+       for(i=0;i
+           if ( CList.list[i].map ) {
+               for(j=0;j
+                   if ( CList.list[i].map[j].dict_id )
+                       free(CList.list[i].map[j].dict_id);
+               free( CList.list[i].map );
+           }
+                free(CList.list);
+   }
+        memset(&CList,0,sizeof(CFGList));
+}
+
+static int
+comparecfg(const void *a, const void *b) {
+   return ((TSCfgInfo*)a)->id - ((TSCfgInfo*)b)->id;
+}
+
+TSCfgInfo *
+findcfg(Oid id) {
+   /* last used cfg */
+   if ( CList.last_cfg && CList.last_cfg->id==id )
+       return CList.last_cfg;
+
+   /* already used cfg */
+   if ( CList.len != 0 ) {
+       TSCfgInfo key;
+       key.id=id;
+       CList.last_cfg = bsearch(&key, CList.list, CList.len, sizeof(TSCfgInfo), comparecfg);
+       if ( CList.last_cfg != NULL )
+           return CList.last_cfg;
+   }
+
+   /* last chance */
+   if ( CList.len==CList.reallen ) {
+       TSCfgInfo *tmp;
+       int reallen = ( CList.reallen ) ? 2*CList.reallen : 16;
+       tmp=(TSCfgInfo*)realloc(CList.list,sizeof(TSCfgInfo)*reallen);
+       if ( !tmp ) 
+           ts_error(ERROR,"No memory");
+       CList.reallen=reallen;
+       CList.list=tmp;
+   }
+   CList.last_cfg=&(CList.list[CList.len]);
+   init_cfg(id, CList.last_cfg);
+   CList.len++;
+   qsort(CList.list, CList.len, sizeof(TSCfgInfo), comparecfg);
+   return findcfg(id); /* qsort changed order!! */;
+}
+
+
+Oid
+name2id_cfg(text *name) {
+   Oid arg[1]={ TEXTOID };
+   bool isnull;
+   Datum pars[1]={ PointerGetDatum(name) };
+   int stat;
+   Oid id=findSNMap_t( &(CList.name2id_map), name );
+   
+   if ( id ) 
+       return id;
+   
+   SPI_connect();
+   if ( !plan_name2id ) {
+       plan_name2id = SPI_saveplan( SPI_prepare( "select oid from pg_ts_cfg where ts_name = $1" , 1, arg ) );
+       if ( !plan_name2id ) 
+           elog(ERROR, "SPI_prepare() failed");
+   }
+
+   stat = SPI_execp(plan_name2id, pars, " ", 1);
+   if ( stat < 0 )
+       elog (ERROR, "SPI_execp return %d", stat);
+   if ( SPI_processed > 0 ) {
+       id=DatumGetObjectId( SPI_getbinval(SPI_tuptable->vals[0], SPI_tuptable->tupdesc, 1, &isnull) );
+       if ( isnull ) 
+           elog(ERROR, "Null id for tsearch config");
+   } else 
+       elog(ERROR, "No tsearch config");
+   SPI_finish();
+   addSNMap_t( &(CList.name2id_map), name, id );
+   return id;
+}
+
+
+void 
+parsetext_v2(TSCfgInfo *cfg, PRSTEXT * prs, char *buf, int4 buflen) {
+   int type, lenlemm, i;
+   char    *lemm=NULL;
+   WParserInfo *prsobj = findprs(cfg->prs_id);
+
+   prsobj->prs=(void*)DatumGetPointer(
+       FunctionCall2(
+           &(prsobj->start_info),
+           PointerGetDatum(buf),
+           Int32GetDatum(buflen)
+       )
+   );
+
+   while( ( type=DatumGetInt32(FunctionCall3(
+           &(prsobj->getlexeme_info),
+           PointerGetDatum(prsobj->prs),
+           PointerGetDatum(&lemm),
+           PointerGetDatum(&lenlemm))) ) != 0 ) {
+
+       if ( lenlemm >= MAXSTRLEN )
+           elog(ERROR, "Word is too long");
+
+
+       if ( type >= cfg->len ) /* skip this type of lexem */
+           continue; 
+
+       for(i=0;imap[type].len;i++) {
+           DictInfo    *dict=finddict( DatumGetObjectId(cfg->map[type].dict_id[i]) );
+           char    **norms, **ptr;
+   
+           norms = ptr = (char**)DatumGetPointer(
+               FunctionCall3(
+                   &(dict->lexize_info),
+                   PointerGetDatum(dict->dictionary),
+                   PointerGetDatum(lemm),
+                   PointerGetDatum(lenlemm)
+               )
+           );
+           if ( !norms ) /* dictionary doesn't know this lexem */
+               continue;
+
+           prs->pos++; /*set pos*/
+
+           while( *ptr ) {
+               if (prs->curwords == prs->lenwords) {
+                   prs->lenwords *= 2;
+                   prs->words = (WORD *) repalloc((void *) prs->words, prs->lenwords * sizeof(WORD));
+               }
+
+               prs->words[prs->curwords].len = strlen(*ptr);
+               prs->words[prs->curwords].word = *ptr;
+               prs->words[prs->curwords].alen = 0;
+               prs->words[prs->curwords].pos.pos = LIMITPOS(prs->pos);
+               ptr++;
+               prs->curwords++;
+           }
+           pfree(norms);
+           break; /* lexem already normalized or is stop word*/
+       }
+   }
+
+   FunctionCall1(
+       &(prsobj->end_info),
+       PointerGetDatum(prsobj->prs)
+   );
+}
+
+static void
+hladdword(HLPRSTEXT * prs, char *buf, int4 buflen, int type) {
+   while (prs->curwords >= prs->lenwords) {
+       prs->lenwords *= 2;
+       prs->words = (HLWORD *) repalloc((void *) prs->words, prs->lenwords * sizeof(HLWORD));
+   }
+   memset( &(prs->words[prs->curwords]), 0, sizeof(HLWORD) ); 
+   prs->words[prs->curwords].type = (uint8)type;
+   prs->words[prs->curwords].len = buflen; 
+   prs->words[prs->curwords].word = palloc(buflen);
+   memcpy(prs->words[prs->curwords].word, buf, buflen);
+   prs->curwords++;    
+}
+
+static void
+hlfinditem(HLPRSTEXT * prs, QUERYTYPE *query, char *buf, int buflen ) {
+   int i;
+   ITEM    *item=GETQUERY(query);
+   HLWORD  *word=&( prs->words[prs->curwords-1] );
+
+   while (prs->curwords + query->size >= prs->lenwords) {
+       prs->lenwords *= 2;
+       prs->words = (HLWORD *) repalloc((void *) prs->words, prs->lenwords * sizeof(HLWORD));
+   }
+
+   for(i=0; isize; i++) { 
+       if ( item->type == VAL && item->length == buflen && strncmp( GETOPERAND(query) + item->distance, buf, buflen )==0 ) {
+           if ( word->item ) {
+               memcpy( &(prs->words[prs->curwords]), word, sizeof(HLWORD) );
+               prs->words[prs->curwords].item=item;
+               prs->words[prs->curwords].repeated=1;
+               prs->curwords++;
+           } else 
+               word->item=item;    
+       }
+       item++;
+   }
+}
+
+void 
+hlparsetext(TSCfgInfo *cfg, HLPRSTEXT * prs, QUERYTYPE *query, char *buf, int4 buflen) {
+   int type, lenlemm, i;
+   char    *lemm=NULL;
+   WParserInfo *prsobj = findprs(cfg->prs_id);
+
+   prsobj->prs=(void*)DatumGetPointer(
+       FunctionCall2(
+           &(prsobj->start_info),
+           PointerGetDatum(buf),
+           Int32GetDatum(buflen)
+       )
+   );
+
+   while( ( type=DatumGetInt32(FunctionCall3(
+           &(prsobj->getlexeme_info),
+           PointerGetDatum(prsobj->prs),
+           PointerGetDatum(&lemm),
+           PointerGetDatum(&lenlemm))) ) != 0 ) {
+
+       if ( lenlemm >= MAXSTRLEN )
+           elog(ERROR, "Word is too long");
+
+       hladdword(prs,lemm,lenlemm,type);
+
+       if ( type >= cfg->len ) 
+           continue; 
+
+       for(i=0;imap[type].len;i++) {
+           DictInfo    *dict=finddict( DatumGetObjectId(cfg->map[type].dict_id[i]) );
+           char    **norms, **ptr;
+   
+           norms = ptr = (char**)DatumGetPointer(
+               FunctionCall3(
+                   &(dict->lexize_info),
+                   PointerGetDatum(dict->dictionary),
+                   PointerGetDatum(lemm),
+                   PointerGetDatum(lenlemm)
+               )
+           );
+           if ( !norms ) /* dictionary doesn't know this lexem */
+               continue;
+
+           while( *ptr ) {
+               hlfinditem(prs,query,*ptr,strlen(*ptr));
+               pfree(*ptr);
+               ptr++;
+           }
+           pfree(norms);
+           break; /* lexem already normalized or is stop word*/
+       }
+   }
+
+   FunctionCall1(
+       &(prsobj->end_info),
+       PointerGetDatum(prsobj->prs)
+   );
+}
+
+text* 
+genhl(HLPRSTEXT * prs) {
+   text *out;
+   int len=128;
+   char *ptr;
+   HLWORD  *wrd=prs->words;
+
+   out = (text*)palloc( len );
+   ptr=((char*)out) + VARHDRSZ;
+
+   while( wrd - prs->words < prs->curwords ) {
+       while (  wrd->len + prs->stopsellen + prs->startsellen + (ptr - ((char*)out)) >= len ) {
+           int dist = ptr - ((char*)out);
+           len*= 2;
+           out = (text *) repalloc(out, len);
+           ptr=((char*)out) + dist;
+       }
+
+       if ( wrd->in && !wrd->skip && !wrd->repeated ) {
+           if ( wrd->replace ) {
+               *ptr=' ';
+               ptr++;
+           } else {
+               if (wrd->selected) {
+                   memcpy(ptr,prs->startsel,prs->startsellen);
+                   ptr+=prs->startsellen;
+               }
+               memcpy(ptr,wrd->word,wrd->len);
+               ptr+=wrd->len;
+               if (wrd->selected) {
+                   memcpy(ptr,prs->stopsel,prs->stopsellen);
+                   ptr+=prs->stopsellen;
+               }
+           }
+       }
+
+       if ( !wrd->repeated )
+           pfree(wrd->word);
+
+       wrd++;
+   }
+
+   VARATT_SIZEP(out)=ptr - ((char*)out);
+   return out; 
+}
+
+int  
+get_currcfg(void) {
+   Oid arg[1]={ TEXTOID };
+   const char *curlocale;
+   Datum pars[1];
+   bool isnull;
+   int stat;
+
+   if ( current_cfg_id > 0 )
+       return current_cfg_id;
+
+   SPI_connect();
+   if ( !plan_getcfg_bylocale ) {
+       plan_getcfg_bylocale=SPI_saveplan( SPI_prepare( "select oid from pg_ts_cfg where locale = $1 ", 1, arg ) );
+       if ( !plan_getcfg_bylocale )
+           elog(ERROR, "SPI_prepare() failed");
+   }
+
+   curlocale = setlocale(LC_CTYPE, NULL);
+   pars[0] = PointerGetDatum( char2text((char*)curlocale) );
+   stat = SPI_execp(plan_getcfg_bylocale, pars, " ", 1);
+
+   if ( stat < 0 )
+       elog (ERROR, "SPI_execp return %d", stat);
+   if ( SPI_processed > 0 )
+       current_cfg_id = DatumGetObjectId( SPI_getbinval(SPI_tuptable->vals[0], SPI_tuptable->tupdesc, 1, &isnull) );
+   else 
+       elog(ERROR,"Can't find tsearch config by locale");
+
+   pfree(DatumGetPointer(pars[0]));
+   SPI_finish();
+   return current_cfg_id;
+}
+
+PG_FUNCTION_INFO_V1(set_curcfg);
+Datum set_curcfg(PG_FUNCTION_ARGS);
+Datum
+set_curcfg(PG_FUNCTION_ARGS) {
+        findcfg(PG_GETARG_OID(0));
+        current_cfg_id=PG_GETARG_OID(0);
+        PG_RETURN_VOID();
+}
+                
+PG_FUNCTION_INFO_V1(set_curcfg_byname);
+Datum set_curcfg_byname(PG_FUNCTION_ARGS);
+Datum
+set_curcfg_byname(PG_FUNCTION_ARGS) {
+        text *name=PG_GETARG_TEXT_P(0);
+   
+        DirectFunctionCall1(
+                set_curcfg,
+                ObjectIdGetDatum( name2id_cfg(name) )
+        );
+        PG_FREE_IF_COPY(name, 0);
+        PG_RETURN_VOID();      
+}       
+
+PG_FUNCTION_INFO_V1(show_curcfg);
+Datum show_curcfg(PG_FUNCTION_ARGS);
+Datum
+show_curcfg(PG_FUNCTION_ARGS) {
+   PG_RETURN_OID( get_currcfg() ); 
+}
+
+PG_FUNCTION_INFO_V1(reset_tsearch);
+Datum reset_tsearch(PG_FUNCTION_ARGS);
+Datum
+reset_tsearch(PG_FUNCTION_ARGS) {
+   ts_error(NOTICE,"TSearch cache cleaned");
+   PG_RETURN_VOID(); 
+}


diff --git a/contrib/tsearch2/ts_cfg.h b/contrib/tsearch2/ts_cfg.h

new file mode 100644 (file)

index 0000000..01006c1


--- /dev/null
+++ b/contrib/tsearch2/ts_cfg.h
@@ -0,0 +1,68 @@
+#ifndef __TS_CFG_H__
+#define __TS_CFG_H__
+#include "postgres.h"
+#include "query.h"
+
+typedef struct {
+   int len;
+   Datum   *dict_id;
+} ListDictionary;
+
+typedef struct {
+   Oid id;
+   Oid prs_id;
+   int len;
+   ListDictionary  *map;   
+}  TSCfgInfo;
+
+Oid name2id_cfg(text *name);
+TSCfgInfo * findcfg(Oid id);
+void init_cfg(Oid id, TSCfgInfo *cfg);
+void reset_cfg(void);
+
+typedef struct {
+        uint16          len;
+   union {
+       uint16      pos;
+       uint16      *apos;
+   } pos;
+        char       *word;
+   uint32  alen;
+}       WORD;
+   
+typedef struct {
+        WORD       *words;
+        int4            lenwords;
+        int4            curwords;
+   int4        pos;
+}       PRSTEXT;
+
+typedef struct {
+        uint16    len;
+   uint8    selected:1,
+         in:1,
+         skip:1,
+         replace:1,
+         repeated:1;
+   uint8   type;
+        char      *word;
+   ITEM      *item;
+}       HLWORD;
+   
+typedef struct {
+        HLWORD       *words;
+        int4            lenwords;
+        int4            curwords;
+        char           *startsel;
+        char            *stopsel;
+        int2            startsellen;
+        int2            stopsellen;
+}       HLPRSTEXT;
+
+void hlparsetext(TSCfgInfo *cfg, HLPRSTEXT * prs, QUERYTYPE *query, char *buf, int4 buflen);
+text* genhl(HLPRSTEXT * prs);
+
+void parsetext_v2(TSCfgInfo *cfg, PRSTEXT * prs, char *buf, int4 buflen);
+int  get_currcfg(void);
+
+#endif


diff --git a/contrib/tsearch2/ts_stat.c b/contrib/tsearch2/ts_stat.c

new file mode 100644 (file)

index 0000000..9099981


--- /dev/null
+++ b/contrib/tsearch2/ts_stat.c
@@ -0,0 +1,412 @@
+/*
+ * stat functions
+ */
+
+#include "tsvector.h"
+#include "ts_stat.h"
+#include "funcapi.h"
+#include "catalog/pg_type.h"
+#include "executor/spi.h"
+#include "common.h"
+
+PG_FUNCTION_INFO_V1(tsstat_in);
+Datum           tsstat_in(PG_FUNCTION_ARGS);
+Datum           
+tsstat_in(PG_FUNCTION_ARGS) {
+   tsstat *stat=palloc(STATHDRSIZE);
+   stat->len=STATHDRSIZE;
+   stat->size=0;
+   PG_RETURN_POINTER(stat);
+}
+
+PG_FUNCTION_INFO_V1(tsstat_out);
+Datum           tsstat_out(PG_FUNCTION_ARGS);
+Datum           
+tsstat_out(PG_FUNCTION_ARGS) {
+   elog(ERROR,"Unimplemented");
+   PG_RETURN_NULL();
+}
+
+static WordEntry**
+SEI_realloc( WordEntry** in, uint32 *len ) {
+   if ( *len==0 || in==NULL ) {
+       *len=8;
+       in=palloc( sizeof(WordEntry*)* (*len) );
+   } else {
+       *len *= 2;
+       in=repalloc( in, sizeof(WordEntry*)* (*len) );
+   }
+   return in;
+}
+
+static int
+compareStatWord(StatEntry *a, WordEntry *b, tsstat *stat, tsvector *txt) {
+   if ( a->len == b->len ) 
+       return strncmp(
+           STATSTRPTR(stat) + a->pos,
+           STRPTR(txt) + b->pos,
+           a->len
+       );
+   return ( a->len > b->len ) ? 1 : -1;
+}
+
+static tsstat*
+formstat(tsstat *stat, tsvector *txt, WordEntry** entry, uint32 len) {
+   tsstat  *newstat;
+   uint32 totallen, nentry;
+   uint32  slen=0;
+   WordEntry   **ptr=entry;
+   char    *curptr;
+   StatEntry   *sptr,*nptr;
+
+   while(ptr-entry
+       slen += (*ptr)->len;
+       ptr++;
+   }
+
+   nentry=stat->size + len;
+   slen+=STATSTRSIZE(stat);
+   totallen=CALCSTATSIZE(nentry,slen);
+   newstat=palloc(totallen);
+   newstat->len=totallen;
+   newstat->size=nentry;
+
+   memcpy(STATSTRPTR(newstat), STATSTRPTR(stat), STATSTRSIZE(stat));
+   curptr=STATSTRPTR(newstat) + STATSTRSIZE(stat);
+
+   ptr=entry;
+   sptr=STATPTR(stat);
+   nptr=STATPTR(newstat);
+
+   if ( len == 1 ) {
+       StatEntry *StopLow = STATPTR(stat);
+       StatEntry *StopHigh = (StatEntry*)STATSTRPTR(stat);
+
+       while (StopLow < StopHigh) {
+           sptr=StopLow + (StopHigh - StopLow) / 2;
+           if ( compareStatWord(sptr,*ptr,stat,txt) < 0 )
+               StopLow = sptr + 1;
+           else
+               StopHigh = sptr; 
+       }
+       nptr =STATPTR(newstat) + (StopLow-STATPTR(stat));
+       memcpy( STATPTR(newstat), STATPTR(stat), sizeof(StatEntry) * (StopLow-STATPTR(stat)) );
+       nptr->nentry=POSDATALEN(txt,*ptr);
+       if ( nptr->nentry==0 )
+               nptr->nentry=1; 
+       nptr->ndoc=1;
+       nptr->len=(*ptr)->len;
+       memcpy(curptr, STRPTR(txt) + (*ptr)->pos, nptr->len);
+       nptr->pos = curptr - STATSTRPTR(newstat);
+       memcpy( nptr+1, StopLow, sizeof(StatEntry) * ( ((StatEntry*)STATSTRPTR(stat))-StopLow ) );
+   } else {
+       while( sptr-STATPTR(stat) < stat->size && ptr-entry
+           if ( compareStatWord(sptr,*ptr,stat,txt) < 0 ) {
+               memcpy(nptr, sptr, sizeof(StatEntry));
+               sptr++;
+           } else {
+               nptr->nentry=POSDATALEN(txt,*ptr);
+               if ( nptr->nentry==0 )
+                   nptr->nentry=1; 
+               nptr->ndoc=1;
+               nptr->len=(*ptr)->len;
+               memcpy(curptr, STRPTR(txt) + (*ptr)->pos, nptr->len);
+               nptr->pos = curptr - STATSTRPTR(newstat);
+               curptr += nptr->len;
+               ptr++;
+           }
+           nptr++;
+       }
+
+       memcpy( nptr, sptr, sizeof(StatEntry)*( stat->size - (sptr-STATPTR(stat)) ) ); 
+       
+       while(ptr-entry
+           nptr->nentry=POSDATALEN(txt,*ptr);
+           if ( nptr->nentry==0 )
+               nptr->nentry=1; 
+           nptr->ndoc=1;
+           nptr->len=(*ptr)->len;
+           memcpy(curptr, STRPTR(txt) + (*ptr)->pos, nptr->len);
+           nptr->pos = curptr - STATSTRPTR(newstat);
+           curptr += nptr->len;
+           ptr++; nptr++;
+       }
+   }
+
+   return newstat;
+} 
+
+PG_FUNCTION_INFO_V1(ts_accum);
+Datum           ts_accum(PG_FUNCTION_ARGS);
+Datum 
+ts_accum(PG_FUNCTION_ARGS) {
+   tsstat *newstat,*stat= (tsstat*)PG_GETARG_POINTER(0);
+   tsvector  *txt = (tsvector *) PG_DETOAST_DATUM(PG_GETARG_DATUM(1));
+   WordEntry   **newentry=NULL;
+   uint32  len=0, cur=0;
+   StatEntry   *sptr;
+   WordEntry   *wptr;
+
+   if ( stat==NULL || PG_ARGISNULL(0) ) { /* Init in first */ 
+       stat=palloc(STATHDRSIZE);
+       stat->len=STATHDRSIZE;
+       stat->size=0;
+   }
+
+   /* simple check of correctness */
+   if ( txt==NULL || PG_ARGISNULL(1) || txt->size==0 ) {
+       PG_FREE_IF_COPY(txt,1); 
+       PG_RETURN_POINTER(stat);
+   }
+
+   sptr=STATPTR(stat);
+   wptr=ARRPTR(txt);
+
+   if ( stat->size < 100*txt->size ) { /* merge */
+       while( sptr-STATPTR(stat) < stat->size && wptr-ARRPTR(txt) < txt->size ) {
+           int cmp = compareStatWord(sptr,wptr,stat,txt);
+           if ( cmp<0 ) {
+               sptr++;
+           } else if ( cmp==0 ) {
+               int n=POSDATALEN(txt,wptr);
+   
+               if (n==0) n=1;
+               sptr->ndoc++;
+               sptr->nentry +=n ;
+               sptr++; wptr++;
+           } else {
+               if ( cur==len )
+                   newentry=SEI_realloc(newentry, &len);
+               newentry[cur]=wptr;
+               wptr++; cur++;
+           }
+       }
+
+       while( wptr-ARRPTR(txt) < txt->size ) {
+           if ( cur==len )
+               newentry=SEI_realloc(newentry, &len);
+           newentry[cur]=wptr;
+           wptr++; cur++;
+       }
+   } else { /* search */
+       while( wptr-ARRPTR(txt) < txt->size ) {
+           StatEntry *StopLow = STATPTR(stat);
+           StatEntry *StopHigh = (StatEntry*)STATSTRPTR(stat);
+           int cmp;
+
+           while (StopLow < StopHigh) {
+               sptr=StopLow + (StopHigh - StopLow) / 2;
+               cmp =  compareStatWord(sptr,wptr,stat,txt);
+               if (cmp==0) {
+                   int n=POSDATALEN(txt,wptr);
+                   if (n==0) n=1;
+                   sptr->ndoc++;
+                   sptr->nentry +=n ;
+                   break;
+               } else if ( cmp < 0 )
+                   StopLow = sptr + 1;
+               else
+                   StopHigh = sptr; 
+           }
+       
+           if ( StopLow >= StopHigh ) { /* not found */
+               if ( cur==len )
+                   newentry=SEI_realloc(newentry, &len);
+               newentry[cur]=wptr;
+               cur++;
+           }
+           wptr++;
+       }   
+   }
+
+   
+   if ( cur==0 ) { /* no new words */ 
+       PG_FREE_IF_COPY(txt,1);
+       PG_RETURN_POINTER(stat);
+   }
+
+   newstat = formstat(stat, txt, newentry, cur);
+   pfree(newentry);
+   PG_FREE_IF_COPY(txt,1);
+   /* pfree(stat); */
+
+   PG_RETURN_POINTER(newstat);
+}
+
+typedef struct {
+   uint32  cur;
+   tsvector *stat;
+} StatStorage;
+
+static void
+ts_setup_firstcall(FuncCallContext  *funcctx, tsstat *stat) {
+   TupleDesc            tupdesc;
+   MemoryContext     oldcontext;
+   StatStorage     *st;
+   
+   oldcontext = MemoryContextSwitchTo(funcctx->multi_call_memory_ctx);
+   st=palloc( sizeof(StatStorage) );
+   st->cur=0;
+   st->stat=palloc( stat->len );
+   memcpy(st->stat, stat, stat->len);
+   funcctx->user_fctx = (void*)st;
+   tupdesc = RelationNameGetTupleDesc("statinfo");
+   funcctx->slot = TupleDescGetSlot(tupdesc);
+   funcctx->attinmeta = TupleDescGetAttInMetadata(tupdesc);
+   MemoryContextSwitchTo(oldcontext);
+}
+
+
+static Datum
+ts_process_call(FuncCallContext  *funcctx) {
+   StatStorage     *st;
+   st=(StatStorage*)funcctx->user_fctx;
+
+   if ( st->cur < st->stat->size ) {
+       Datum result;
+       char* values[3];
+       char    ndoc[16];
+       char    nentry[16];
+       StatEntry *entry=STATPTR(st->stat) + st->cur;
+       HeapTuple    tuple;
+
+       values[1]=ndoc;
+       sprintf(ndoc,"%d",entry->ndoc);
+       values[2]=nentry;
+       sprintf(nentry,"%d",entry->nentry);
+       values[0]=palloc( entry->len+1 );
+       memcpy( values[0], STATSTRPTR(st->stat)+entry->pos, entry->len);
+       (values[0])[entry->len]='\0';
+
+       tuple = BuildTupleFromCStrings(funcctx->attinmeta, values);
+       result = TupleGetDatum(funcctx->slot, tuple);
+
+       pfree(values[0]);
+       st->cur++;
+       return result;  
+   } else {
+       pfree(st->stat);
+       pfree(st);
+   }
+   
+   return (Datum)0;
+}
+
+PG_FUNCTION_INFO_V1(ts_accum_finish);
+Datum           ts_accum_finish(PG_FUNCTION_ARGS);
+Datum 
+ts_accum_finish(PG_FUNCTION_ARGS) {
+   FuncCallContext  *funcctx;
+   Datum result;
+
+   if (SRF_IS_FIRSTCALL()) {
+       funcctx = SRF_FIRSTCALL_INIT();
+       ts_setup_firstcall(funcctx, (tsstat*)PG_GETARG_POINTER(0) );
+   }
+
+   funcctx = SRF_PERCALL_SETUP();
+   if (  (result=ts_process_call(funcctx)) != (Datum)0 )
+       SRF_RETURN_NEXT(funcctx, result);
+   SRF_RETURN_DONE(funcctx);
+}
+
+static Oid tiOid=InvalidOid;
+static void 
+get_ti_Oid(void) {
+   int ret;
+   bool isnull; 
+
+   if ( (ret = SPI_exec("select oid from pg_type where typname='tsvector'",1)) < 0 )   
+       elog(ERROR, "SPI_exec to get tsvector oid returns %d", ret);
+
+   if ( SPI_processed<0 )
+       elog(ERROR, "There is no tsvector type");
+   tiOid = DatumGetObjectId( SPI_getbinval(SPI_tuptable->vals[0], SPI_tuptable->tupdesc, 1, &isnull) );
+   if ( tiOid==InvalidOid )
+       elog(ERROR, "tsvector type has InvalidOid");
+}
+
+static tsstat*
+ts_stat_sql(text *txt) {
+   char *query=text2char(txt);
+   int i;
+   tsstat *newstat,*stat;
+   bool isnull;
+   Portal portal;
+   void    *plan;
+
+   if ( tiOid==InvalidOid ) 
+       get_ti_Oid();
+
+   if ( (plan = SPI_prepare(query,0,NULL))==NULL )
+       elog(ERROR, "SPI_prepare('%s') returns NULL",query);
+
+   if ( (portal = SPI_cursor_open(NULL, plan, NULL, NULL)) == NULL )
+       elog(ERROR, "SPI_cursor_open('%s') returns NULL",query);
+
+   SPI_cursor_fetch(portal, true, 100);
+
+   if ( SPI_tuptable->tupdesc->natts != 1 )
+       elog(ERROR, "Number of fields doesn't equal to 1");
+
+   if ( SPI_gettypeid(SPI_tuptable->tupdesc, 1) != tiOid )
+       elog(ERROR, "Column isn't of tsvector type");
+
+   stat=palloc(STATHDRSIZE);
+   stat->len=STATHDRSIZE;
+   stat->size=0;
+
+   while(SPI_processed>0) {
+       for(i=0;i
+           Datum data=SPI_getbinval(SPI_tuptable->vals[i], SPI_tuptable->tupdesc, 1, &isnull);
+
+           if ( !isnull ) {
+               newstat = (tsstat*)DatumGetPointer(DirectFunctionCall2(
+                   ts_accum,
+                   PointerGetDatum(stat),
+                   data
+               ));
+               if ( stat!=newstat && stat )
+                   pfree(stat);
+               stat=newstat;
+           }
+       } 
+
+       SPI_freetuptable(SPI_tuptable);
+       SPI_cursor_fetch(portal, true, 100);        
+   }   
+
+   SPI_freetuptable(SPI_tuptable);
+   SPI_cursor_close(portal);
+   SPI_freeplan(plan);
+   pfree(query);
+
+   return stat;    
+}
+
+PG_FUNCTION_INFO_V1(ts_stat);
+Datum           ts_stat(PG_FUNCTION_ARGS);
+Datum 
+ts_stat(PG_FUNCTION_ARGS) {
+   FuncCallContext  *funcctx;
+   Datum result;
+
+   if (SRF_IS_FIRSTCALL()) {
+       tsstat *stat;
+       text    *txt=PG_GETARG_TEXT_P(0);
+   
+       funcctx = SRF_FIRSTCALL_INIT();
+       SPI_connect();
+       stat = ts_stat_sql(txt);
+       PG_FREE_IF_COPY(txt,0); 
+       ts_setup_firstcall(funcctx, stat );
+       SPI_finish();
+   }
+
+   funcctx = SRF_PERCALL_SETUP();
+   if (  (result=ts_process_call(funcctx)) != (Datum)0 )
+       SRF_RETURN_NEXT(funcctx, result);
+   SRF_RETURN_DONE(funcctx);
+}
+
+


diff --git a/contrib/tsearch2/ts_stat.h b/contrib/tsearch2/ts_stat.h

new file mode 100644 (file)

index 0000000..c32b17a


--- /dev/null
+++ b/contrib/tsearch2/ts_stat.h
@@ -0,0 +1,32 @@
+#ifndef __TXTIDX_STAT_H__
+#define __TXTIDX_STAT_H__
+
+#include "postgres.h"
+
+#include "access/gist.h"
+#include "access/itup.h"
+#include "utils/elog.h"
+#include "utils/palloc.h"
+#include "utils/builtins.h"
+#include "storage/bufpage.h"
+
+typedef struct {
+   uint32  len;
+   uint32  pos;
+   uint32  ndoc;   
+   uint32  nentry; 
+}  StatEntry;
+
+typedef struct {
+   int4        len;
+   int4        size;
+   char        data[1];
+}  tsstat;
+
+#define STATHDRSIZE (sizeof(int4)*2)
+#define CALCSTATSIZE(x, lenstr) ( x * sizeof(StatEntry) + STATHDRSIZE + lenstr )
+#define STATPTR(x) ( (StatEntry*) ( (char*)x + STATHDRSIZE ) )
+#define STATSTRPTR(x)  ( (char*)x + STATHDRSIZE + ( sizeof(StatEntry) * ((tsvector*)x)->size ) )
+#define STATSTRSIZE(x) ( ((tsvector*)x)->len - STATHDRSIZE - ( sizeof(StatEntry) * ((tsvector*)x)->size ) )
+
+#endif


diff --git a/contrib/tsearch2/tsearch.sql._in b/contrib/tsearch2/tsearch.sql._in

new file mode 100644 (file)

index 0000000..91ffbc8


--- /dev/null
+++ b/contrib/tsearch2/tsearch.sql._in
@@ -0,0 +1,674 @@
+-- Adjust this setting to control where the objects get CREATEd.
+SET search_path = public;
+
+BEGIN;
+
+--dict conf
+CREATE TABLE pg_ts_dict (
+   dict_name   text not null primary key,
+   dict_init   oid,
+   dict_initoption text,
+   dict_lexize oid not null,
+   dict_comment    text
+) with oids;
+
+--dict interface
+CREATE FUNCTION lexize(oid, text) 
+   returns _text
+   as 'MODULE_PATHNAME'
+   language 'C'
+   with (isstrict);
+
+CREATE FUNCTION lexize(text, text)
+        returns _text
+        as 'MODULE_PATHNAME', 'lexize_byname'
+        language 'C'
+        with (isstrict);
+
+CREATE FUNCTION lexize(text)
+        returns _text
+        as 'MODULE_PATHNAME', 'lexize_bycurrent'
+        language 'C'
+        with (isstrict);
+
+CREATE FUNCTION set_curdict(int)
+   returns void
+   as 'MODULE_PATHNAME'
+   language 'C'
+   with (isstrict);
+
+CREATE FUNCTION set_curdict(text)
+   returns void
+   as 'MODULE_PATHNAME', 'set_curdict_byname'
+   language 'C'
+   with (isstrict);
+
+--built-in dictionaries
+CREATE FUNCTION dex_init(text)
+   returns internal
+   as 'MODULE_PATHNAME' 
+   language 'C';
+
+CREATE FUNCTION dex_lexize(internal,internal,int4)
+   returns internal
+   as 'MODULE_PATHNAME'
+   language 'C'
+   with (isstrict);
+
+insert into pg_ts_dict select 
+   'simple', 
+   (select oid from pg_proc where proname='dex_init'),
+   null,
+   (select oid from pg_proc where proname='dex_lexize'),
+   'Simple example of dictionary.'
+;
+    
+CREATE FUNCTION snb_en_init(text)
+   returns internal
+   as 'MODULE_PATHNAME' 
+   language 'C';
+
+CREATE FUNCTION snb_lexize(internal,internal,int4)
+   returns internal
+   as 'MODULE_PATHNAME'
+   language 'C'
+   with (isstrict);
+
+insert into pg_ts_dict select 
+   'en_stem', 
+   (select oid from pg_proc where proname='snb_en_init'),
+   'DATA_PATH/english.stop',
+   (select oid from pg_proc where proname='snb_lexize'),
+   'English Stemmer. Snowball.'
+;
+
+CREATE FUNCTION snb_ru_init(text)
+   returns internal
+   as 'MODULE_PATHNAME' 
+   language 'C';
+
+insert into pg_ts_dict select 
+   'ru_stem', 
+   (select oid from pg_proc where proname='snb_ru_init'),
+   'DATA_PATH/russian.stop',
+   (select oid from pg_proc where proname='snb_lexize'),
+   'Russian Stemmer. Snowball.'
+;
+    
+CREATE FUNCTION spell_init(text)
+   returns internal
+   as 'MODULE_PATHNAME' 
+   language 'C';
+
+CREATE FUNCTION spell_lexize(internal,internal,int4)
+   returns internal
+   as 'MODULE_PATHNAME'
+   language 'C'
+   with (isstrict);
+
+insert into pg_ts_dict select 
+   'ispell_template', 
+   (select oid from pg_proc where proname='spell_init'),
+   null,
+   (select oid from pg_proc where proname='spell_lexize'),
+   'ISpell interface. Must have .dict and .aff files'
+;
+
+CREATE FUNCTION syn_init(text)
+   returns internal
+   as 'MODULE_PATHNAME' 
+   language 'C';
+
+CREATE FUNCTION syn_lexize(internal,internal,int4)
+   returns internal
+   as 'MODULE_PATHNAME'
+   language 'C'
+   with (isstrict);
+
+insert into pg_ts_dict select 
+   'synonym', 
+   (select oid from pg_proc where proname='syn_init'),
+   null,
+   (select oid from pg_proc where proname='syn_lexize'),
+   'Example of synonym dictionary'
+;
+
+--dict conf
+CREATE TABLE pg_ts_parser (
+   prs_name    text not null primary key,
+   prs_start   oid not null,
+   prs_nexttoken   oid not null,
+   prs_end     oid not null,
+   prs_headline    oid not null,
+   prs_lextype oid not null,
+   prs_comment text
+) with oids;
+
+--sql-level interface
+CREATE TYPE tokentype 
+   as (tokid int4, alias text, descr text); 
+
+CREATE FUNCTION token_type(int4)
+   returns setof tokentype
+   as 'MODULE_PATHNAME'
+   language 'C'
+   with (isstrict);
+
+CREATE FUNCTION token_type(text)
+   returns setof tokentype
+   as 'MODULE_PATHNAME', 'token_type_byname'
+   language 'C'
+   with (isstrict);
+
+CREATE FUNCTION token_type()
+   returns setof tokentype
+   as 'MODULE_PATHNAME', 'token_type_current'
+   language 'C'
+   with (isstrict);
+
+CREATE FUNCTION set_curprs(int)
+   returns void
+   as 'MODULE_PATHNAME'
+   language 'C'
+   with (isstrict);
+
+CREATE FUNCTION set_curprs(text)
+   returns void
+   as 'MODULE_PATHNAME', 'set_curprs_byname'
+   language 'C'
+   with (isstrict);
+
+CREATE TYPE tokenout 
+   as (tokid int4, token text);
+
+CREATE FUNCTION parse(oid,text)
+   returns setof tokenout
+   as 'MODULE_PATHNAME'
+   language 'C'
+   with (isstrict);
+ 
+CREATE FUNCTION parse(text,text)
+   returns setof tokenout
+   as 'MODULE_PATHNAME', 'parse_byname'
+   language 'C'
+   with (isstrict);
+ 
+CREATE FUNCTION parse(text)
+   returns setof tokenout
+   as 'MODULE_PATHNAME', 'parse_current'
+   language 'C'
+   with (isstrict);
+ 
+--default parser
+CREATE FUNCTION prsd_start(internal,int4)
+   returns internal
+   as 'MODULE_PATHNAME'
+   language 'C';
+
+CREATE FUNCTION prsd_getlexeme(internal,internal,internal)
+   returns int4
+   as 'MODULE_PATHNAME'
+   language 'C';
+
+CREATE FUNCTION prsd_end(internal)
+   returns void
+   as 'MODULE_PATHNAME'
+   language 'C';
+
+CREATE FUNCTION prsd_lextype(internal)
+   returns internal
+   as 'MODULE_PATHNAME'
+   language 'C';
+
+CREATE FUNCTION prsd_headline(internal,internal,internal)
+   returns internal
+   as 'MODULE_PATHNAME'
+   language 'C';
+
+insert into pg_ts_parser select
+   'default',
+   (select oid from pg_proc where proname='prsd_start'),   
+   (select oid from pg_proc where proname='prsd_getlexeme'),   
+   (select oid from pg_proc where proname='prsd_end'), 
+   (select oid from pg_proc where proname='prsd_headline'),
+   (select oid from pg_proc where proname='prsd_lextype'),
+   'Parser from OpenFTS v0.34'
+;  
+
+--tsearch config
+
+CREATE TABLE pg_ts_cfg (
+   ts_name     text not null primary key,
+   prs_name    text not null,
+   locale      text
+) with oids;
+
+CREATE TABLE pg_ts_cfgmap (
+   ts_name     text not null,
+   tok_alias   text not null,
+   dict_name   text[],
+   primary key (ts_name,tok_alias)
+) with oids;
+
+CREATE FUNCTION set_curcfg(int)
+   returns void
+   as 'MODULE_PATHNAME'
+   language 'C'
+   with (isstrict);
+
+CREATE FUNCTION set_curcfg(text)
+   returns void
+   as 'MODULE_PATHNAME', 'set_curcfg_byname'
+   language 'C'
+   with (isstrict);
+
+CREATE FUNCTION show_curcfg()
+   returns oid
+   as 'MODULE_PATHNAME'
+   language 'C'
+   with (isstrict);
+
+insert into pg_ts_cfg values ('default', 'default','C');
+insert into pg_ts_cfg values ('default_russian', 'default','ru_RU.KOI8-R');
+insert into pg_ts_cfg values ('simple', 'default');
+
+copy pg_ts_cfgmap from stdin;
+default    lword   {en_stem}
+default    nlword  {simple}
+default    word    {simple}
+default    email   {simple}
+default    url {simple}
+default    host    {simple}
+default    sfloat  {simple}
+default    version {simple}
+default    part_hword  {simple}
+default    nlpart_hword    {simple}
+default    lpart_hword {en_stem}
+default    hword   {simple}
+default    lhword  {en_stem}
+default    nlhword {simple}
+default    uri {simple}
+default    file    {simple}
+default    float   {simple}
+default    int {simple}
+default    uint    {simple}
+default_russian    lword   {en_stem}
+default_russian    nlword  {ru_stem}
+default_russian    word    {ru_stem}
+default_russian    email   {simple}
+default_russian    url {simple}
+default_russian    host    {simple}
+default_russian    sfloat  {simple}
+default_russian    version {simple}
+default_russian    part_hword  {simple}
+default_russian    nlpart_hword    {ru_stem}
+default_russian    lpart_hword {en_stem}
+default_russian    hword   {ru_stem}
+default_russian    lhword  {en_stem}
+default_russian    nlhword {ru_stem}
+default_russian    uri {simple}
+default_russian    file    {simple}
+default_russian    float   {simple}
+default_russian    int {simple}
+default_russian    uint    {simple}
+simple lword   {simple}
+simple nlword  {simple}
+simple word    {simple}
+simple email   {simple}
+simple url {simple}
+simple host    {simple}
+simple sfloat  {simple}
+simple version {simple}
+simple part_hword  {simple}
+simple nlpart_hword    {simple}
+simple lpart_hword {simple}
+simple hword   {simple}
+simple lhword  {simple}
+simple nlhword {simple}
+simple uri {simple}
+simple file    {simple}
+simple float   {simple}
+simple int {simple}
+simple uint    {simple}
+\.
+
+--tsvector type
+CREATE FUNCTION tsvector_in(cstring)
+RETURNS tsvector
+AS 'MODULE_PATHNAME'
+LANGUAGE 'C' with (isstrict);
+
+CREATE FUNCTION tsvector_out(tsvector)
+RETURNS cstring
+AS 'MODULE_PATHNAME'
+LANGUAGE 'C' with (isstrict);
+
+CREATE TYPE tsvector (
+        INTERNALLENGTH = -1,
+        INPUT = tsvector_in,
+        OUTPUT = tsvector_out,
+        STORAGE = extended
+);
+
+CREATE FUNCTION length(tsvector)
+RETURNS int4
+AS 'MODULE_PATHNAME', 'tsvector_length'
+LANGUAGE 'C' with (isstrict,iscachable);
+
+CREATE FUNCTION to_tsvector(oid, text)
+RETURNS tsvector
+AS 'MODULE_PATHNAME'
+LANGUAGE 'C' with (isstrict,iscachable);
+
+CREATE FUNCTION to_tsvector(text, text)
+RETURNS tsvector
+AS 'MODULE_PATHNAME', 'to_tsvector_name'
+LANGUAGE 'C' with (isstrict,iscachable);
+
+CREATE FUNCTION to_tsvector(text)
+RETURNS tsvector
+AS 'MODULE_PATHNAME', 'to_tsvector_current'
+LANGUAGE 'C' with (isstrict,iscachable);
+
+CREATE FUNCTION strip(tsvector)
+RETURNS tsvector
+AS 'MODULE_PATHNAME'
+LANGUAGE 'C' with (isstrict,iscachable);
+
+CREATE FUNCTION setweight(tsvector,"char")
+RETURNS tsvector
+AS 'MODULE_PATHNAME'
+LANGUAGE 'C' with (isstrict,iscachable);
+
+CREATE FUNCTION concat(tsvector,tsvector)
+RETURNS tsvector
+AS 'MODULE_PATHNAME'
+LANGUAGE 'C' with (isstrict,iscachable);
+
+CREATE OPERATOR || (
+        LEFTARG = tsvector,
+        RIGHTARG = tsvector,
+        PROCEDURE = concat
+);
+
+--query type
+CREATE FUNCTION tsquery_in(cstring)
+RETURNS tsquery
+AS 'MODULE_PATHNAME'
+LANGUAGE 'C' with (isstrict);
+
+CREATE FUNCTION tsquery_out(tsquery)
+RETURNS cstring
+AS 'MODULE_PATHNAME'
+LANGUAGE 'C' with (isstrict);
+
+CREATE TYPE tsquery (
+        INTERNALLENGTH = -1,
+        INPUT = tsquery_in,
+        OUTPUT = tsquery_out
+);
+
+CREATE FUNCTION querytree(tsquery)
+RETURNS text
+AS 'MODULE_PATHNAME', 'tsquerytree'
+LANGUAGE 'C' with (isstrict);
+
+CREATE FUNCTION to_tsquery(oid, text)
+RETURNS tsquery
+AS 'MODULE_PATHNAME'
+LANGUAGE 'c' with (isstrict,iscachable);
+
+CREATE FUNCTION to_tsquery(text, text)
+RETURNS tsquery
+AS 'MODULE_PATHNAME','to_tsquery_name'
+LANGUAGE 'c' with (isstrict,iscachable);
+
+CREATE FUNCTION to_tsquery(text)
+RETURNS tsquery
+AS 'MODULE_PATHNAME','to_tsquery_current'
+LANGUAGE 'c' with (isstrict,iscachable);
+
+--operations
+CREATE FUNCTION exectsq(tsvector, tsquery)
+RETURNS bool
+AS 'MODULE_PATHNAME'
+LANGUAGE 'C' with (isstrict, iscachable);
+  
+COMMENT ON FUNCTION exectsq(tsvector, tsquery) IS 'boolean operation with text index';
+
+CREATE FUNCTION rexectsq(tsquery, tsvector)
+RETURNS bool
+AS 'MODULE_PATHNAME'
+LANGUAGE 'C' with (isstrict, iscachable);
+
+COMMENT ON FUNCTION rexectsq(tsquery, tsvector) IS 'boolean operation with text index';
+
+CREATE OPERATOR @@ (
+        LEFTARG = tsvector,
+        RIGHTARG = tsquery,
+        PROCEDURE = exectsq,
+        COMMUTATOR = '@@',
+        RESTRICT = contsel,
+        JOIN = contjoinsel
+);
+CREATE OPERATOR @@ (
+        LEFTARG = tsquery,
+        RIGHTARG = tsvector,
+        PROCEDURE = rexectsq,
+        COMMUTATOR = '@@',
+        RESTRICT = contsel,
+        JOIN = contjoinsel
+);
+
+--Trigger
+CREATE FUNCTION tsearch2()
+RETURNS trigger
+AS 'MODULE_PATHNAME'
+LANGUAGE 'C';
+
+--Relevation
+CREATE FUNCTION rank(float4[], tsvector, tsquery)
+RETURNS float4
+AS 'MODULE_PATHNAME'
+LANGUAGE 'C' WITH (isstrict, iscachable);
+
+CREATE FUNCTION rank(float4[], tsvector, tsquery, int4)
+RETURNS float4
+AS 'MODULE_PATHNAME'
+LANGUAGE 'C' WITH (isstrict, iscachable);
+
+CREATE FUNCTION rank(tsvector, tsquery)
+RETURNS float4
+AS 'MODULE_PATHNAME', 'rank_def'
+LANGUAGE 'C' WITH (isstrict, iscachable);
+
+CREATE FUNCTION rank(tsvector, tsquery, int4)
+RETURNS float4
+AS 'MODULE_PATHNAME', 'rank_def'
+LANGUAGE 'C' WITH (isstrict, iscachable);
+
+CREATE FUNCTION rank_cd(int4, tsvector, tsquery)
+RETURNS float4
+AS 'MODULE_PATHNAME'
+LANGUAGE 'C' WITH (isstrict, iscachable);
+
+CREATE FUNCTION rank_cd(int4, tsvector, tsquery, int4)
+RETURNS float4
+AS 'MODULE_PATHNAME'
+LANGUAGE 'C' WITH (isstrict, iscachable);
+
+CREATE FUNCTION rank_cd(tsvector, tsquery)
+RETURNS float4
+AS 'MODULE_PATHNAME', 'rank_cd_def'
+LANGUAGE 'C' WITH (isstrict, iscachable);
+
+CREATE FUNCTION rank_cd(tsvector, tsquery, int4)
+RETURNS float4
+AS 'MODULE_PATHNAME', 'rank_cd_def'
+LANGUAGE 'C' WITH (isstrict, iscachable);
+
+CREATE FUNCTION headline(oid, text, tsquery, text)
+RETURNS text
+AS 'MODULE_PATHNAME', 'headline'
+LANGUAGE 'C' WITH (isstrict, iscachable);
+
+CREATE FUNCTION headline(oid, text, tsquery)
+RETURNS text
+AS 'MODULE_PATHNAME', 'headline'
+LANGUAGE 'C' WITH (isstrict, iscachable);
+
+CREATE FUNCTION headline(text, text, tsquery, text)
+RETURNS text
+AS 'MODULE_PATHNAME', 'headline_byname'
+LANGUAGE 'C' WITH (isstrict, iscachable);
+
+CREATE FUNCTION headline(text, text, tsquery)
+RETURNS text
+AS 'MODULE_PATHNAME', 'headline_byname'
+LANGUAGE 'C' WITH (isstrict, iscachable);
+
+CREATE FUNCTION headline(text, tsquery, text)
+RETURNS text
+AS 'MODULE_PATHNAME', 'headline_current'
+LANGUAGE 'C' WITH (isstrict, iscachable);
+
+CREATE FUNCTION headline(text, tsquery)
+RETURNS text
+AS 'MODULE_PATHNAME', 'headline_current'
+LANGUAGE 'C' WITH (isstrict, iscachable);
+
+--GiST
+--GiST key type 
+CREATE FUNCTION gtsvector_in(cstring)
+RETURNS gtsvector
+AS 'MODULE_PATHNAME'
+LANGUAGE 'C' with (isstrict);
+
+CREATE FUNCTION gtsvector_out(gtsvector)
+RETURNS cstring
+AS 'MODULE_PATHNAME'
+LANGUAGE 'C' with (isstrict);
+
+CREATE TYPE gtsvector (
+        INTERNALLENGTH = -1,
+        INPUT = gtsvector_in,
+        OUTPUT = gtsvector_out
+);
+
+-- support FUNCTIONs
+CREATE FUNCTION gtsvector_consistent(gtsvector,internal,int4)
+RETURNS bool
+AS 'MODULE_PATHNAME'
+LANGUAGE 'C';
+  
+CREATE FUNCTION gtsvector_compress(internal)
+RETURNS internal
+AS 'MODULE_PATHNAME'
+LANGUAGE 'C';
+
+CREATE FUNCTION gtsvector_decompress(internal)
+RETURNS internal
+AS 'MODULE_PATHNAME'
+LANGUAGE 'C';
+
+CREATE FUNCTION gtsvector_penalty(internal,internal,internal)
+RETURNS internal
+AS 'MODULE_PATHNAME'
+LANGUAGE 'C' with (isstrict);
+
+CREATE FUNCTION gtsvector_picksplit(internal, internal)
+RETURNS internal
+AS 'MODULE_PATHNAME'
+LANGUAGE 'C';
+
+CREATE FUNCTION gtsvector_union(bytea, internal)
+RETURNS _int4
+AS 'MODULE_PATHNAME'
+LANGUAGE 'C';
+
+CREATE FUNCTION gtsvector_same(gtsvector, gtsvector, internal)
+RETURNS internal
+AS 'MODULE_PATHNAME'
+LANGUAGE 'C';
+
+-- CREATE the OPERATOR class
+CREATE OPERATOR CLASS gist_tsvector_ops
+DEFAULT FOR TYPE tsvector USING gist
+AS
+        OPERATOR        1       @@ (tsvector, tsquery)  RECHECK ,
+        FUNCTION        1       gtsvector_consistent (gtsvector, internal, int4),
+        FUNCTION        2       gtsvector_union (bytea, internal),
+        FUNCTION        3       gtsvector_compress (internal),
+        FUNCTION        4       gtsvector_decompress (internal),
+        FUNCTION        5       gtsvector_penalty (internal, internal, internal),
+        FUNCTION        6       gtsvector_picksplit (internal, internal),
+        FUNCTION        7       gtsvector_same (gtsvector, gtsvector, internal),
+        STORAGE         gtsvector;
+
+
+--stat info
+CREATE TYPE statinfo 
+   as (word text, ndoc int4, nentry int4);
+
+--REATE FUNCTION tsstat_in(cstring)
+--RETURNS tsstat
+--AS 'MODULE_PATHNAME'
+--LANGUAGE 'C' with (isstrict);
+--
+--CREATE FUNCTION tsstat_out(tsstat)
+--RETURNS cstring
+--AS 'MODULE_PATHNAME'
+--LANGUAGE 'C' with (isstrict);
+--
+--CREATE TYPE tsstat (
+--        INTERNALLENGTH = -1,
+--        INPUT = tsstat_in,
+--        OUTPUT = tsstat_out,
+--        STORAGE = plain
+--);
+--
+--CREATE FUNCTION ts_accum(tsstat,tsvector)
+--RETURNS tsstat
+--AS 'MODULE_PATHNAME'
+--LANGUAGE 'C' with (isstrict);
+--
+--CREATE FUNCTION ts_accum_finish(tsstat)
+-- returns setof statinfo
+-- as 'MODULE_PATHNAME'
+-- language 'C'
+-- with (isstrict);
+--
+--CREATE AGGREGATE stat (
+-- BASETYPE=tsvector,
+-- SFUNC=ts_accum,
+-- STYPE=tsstat,
+-- FINALFUNC = ts_accum_finish,
+-- initcond = ''
+--); 
+
+CREATE FUNCTION stat(text)
+   returns setof statinfo
+   as 'MODULE_PATHNAME', 'ts_stat'
+   language 'C'
+   with (isstrict);
+
+--reset - just for debuging
+CREATE FUNCTION reset_tsearch()
+        returns void
+        as 'MODULE_PATHNAME'
+        language 'C'
+        with (isstrict);
+
+--get cover (debug for rank_cd)
+CREATE FUNCTION get_covers(tsvector,tsquery)
+        returns text
+        as 'MODULE_PATHNAME'
+        language 'C'
+        with (isstrict);
+
+
+--example of ISpell dictionary
+--update pg_ts_dict set dict_initoption='DictFile="/usr/local/share/ispell/russian.dict" ,AffFile ="/usr/local/share/ispell/russian.aff", StopFile="/usr/local/share/ispell/russian.stop"' where dict_id=4;
+--example of synonym dict
+--update pg_ts_dict set dict_initoption='/usr/local/share/ispell/english.syn' where dict_id=5;
+END;


diff --git a/contrib/tsearch2/tsvector.c b/contrib/tsearch2/tsvector.c

new file mode 100644 (file)

index 0000000..ff0794d


--- /dev/null
+++ b/contrib/tsearch2/tsvector.c
@@ -0,0 +1,804 @@
+/*
+ * In/Out definitions for tsvector type
+ * Internal structure:
+ * string of values, array of position lexem in string and it's length
+ * Teodor Sigaev 
+ */
+#include "postgres.h"
+
+#include "access/gist.h"
+#include "access/itup.h"
+#include "utils/elog.h"
+#include "utils/palloc.h"
+#include "utils/builtins.h"
+#include "storage/bufpage.h"
+#include "executor/spi.h"
+#include "commands/trigger.h"
+#include "nodes/pg_list.h"
+#include "catalog/namespace.h"
+
+#include "utils/pg_locale.h"
+
+#include              /* tolower */
+#include "tsvector.h"
+#include "query.h"
+#include "ts_cfg.h"
+#include "common.h"
+
+PG_FUNCTION_INFO_V1(tsvector_in);
+Datum      tsvector_in(PG_FUNCTION_ARGS);
+
+PG_FUNCTION_INFO_V1(tsvector_out);
+Datum      tsvector_out(PG_FUNCTION_ARGS);
+
+PG_FUNCTION_INFO_V1(to_tsvector);
+Datum      to_tsvector(PG_FUNCTION_ARGS);
+PG_FUNCTION_INFO_V1(to_tsvector_current);
+Datum      to_tsvector_current(PG_FUNCTION_ARGS);
+PG_FUNCTION_INFO_V1(to_tsvector_name);
+Datum      to_tsvector_name(PG_FUNCTION_ARGS);
+
+PG_FUNCTION_INFO_V1(tsearch2);
+Datum      tsearch2(PG_FUNCTION_ARGS);
+
+PG_FUNCTION_INFO_V1(tsvector_length);
+Datum      tsvector_length(PG_FUNCTION_ARGS);
+
+/*
+ * in/out text index type
+ */
+static int 
+comparePos(const void *a, const void *b) {
+   if ( ((WordEntryPos *) a)->pos == ((WordEntryPos *) b)->pos )
+       return 1;
+   return ( ((WordEntryPos *) a)->pos > ((WordEntryPos *) b)->pos ) ? 1 : -1;
+}
+
+static int
+uniquePos(WordEntryPos *a, int4 l) {
+   WordEntryPos *ptr, *res;
+
+   res=a;
+   if (l==1)
+       return l;
+
+   qsort((void *) a, l, sizeof(WordEntryPos), comparePos);
+
+   ptr = a + 1;
+   while (ptr - a < l) {
+       if ( ptr->pos != res->pos ) {
+           res++;
+           res->pos = ptr->pos;
+           res->weight = ptr->weight;
+           if ( res-a >= MAXNUMPOS-1 || res->pos == MAXENTRYPOS-1 )
+               break;
+       } else if ( ptr->weight > res->weight )
+           res->weight = ptr->weight;
+       ptr++;
+   }
+   return res + 1 - a;
+}
+
+static char *BufferStr;
+static int
+compareentry(const void *a, const void *b)
+{
+   if ( ((WordEntryIN *) a)->entry.len == ((WordEntryIN *) b)->entry.len)
+   {
+       return strncmp(
+                      &BufferStr[((WordEntryIN *) a)->entry.pos],
+                      &BufferStr[((WordEntryIN *) b)->entry.pos],
+                      ((WordEntryIN *) a)->entry.len);
+   }
+   return ( ((WordEntryIN *) a)->entry.len > ((WordEntryIN *) b)->entry.len ) ? 1 : -1;
+}
+
+static int
+uniqueentry(WordEntryIN * a, int4 l, char *buf, int4 *outbuflen)
+{
+   WordEntryIN  *ptr,
+              *res;
+
+   res = a;
+   if (l == 1) {
+       if ( a->entry.haspos ) {
+           *(uint16*)(a->pos) = uniquePos( &(a->pos[1]), *(uint16*)(a->pos));
+           *outbuflen = SHORTALIGN(res->entry.len) + (*(uint16*)(a->pos) +1 )*sizeof(WordEntryPos);
+       }
+       return l;
+   }
+
+   ptr = a + 1;
+   BufferStr = buf;
+   qsort((void *) a, l, sizeof(WordEntryIN), compareentry);
+
+   while (ptr - a < l)
+   {
+       if (!(ptr->entry.len == res->entry.len &&
+             strncmp(&buf[ptr->entry.pos], &buf[res->entry.pos], res->entry.len) == 0))
+       {
+           if ( res->entry.haspos ) {
+               *(uint16*)(res->pos) = uniquePos( &(res->pos[1]), *(uint16*)(res->pos));
+               *outbuflen += *(uint16*)(res->pos) * sizeof(WordEntryPos);
+           }
+           *outbuflen += SHORTALIGN(res->entry.len);
+           res++;
+           memcpy(res,ptr,sizeof(WordEntryIN));
+       } else if ( ptr->entry.haspos ){
+           if ( res->entry.haspos ) {
+               int4 len=*(uint16*)(ptr->pos) + 1 + *(uint16*)(res->pos);
+               res->pos=(WordEntryPos*)repalloc( res->pos, len*sizeof(WordEntryPos));
+               memcpy( &(res->pos[ *(uint16*)(res->pos) + 1 ]), 
+                   &(ptr->pos[1]), *(uint16*)(ptr->pos) * sizeof(WordEntryPos));
+               *(uint16*)(res->pos) += *(uint16*)(ptr->pos);
+               pfree( ptr->pos );
+           } else {
+               res->entry.haspos=1;
+               res->pos = ptr->pos;
+           }
+       }
+       ptr++;
+   }
+   if ( res->entry.haspos ) {
+       *(uint16*)(res->pos) = uniquePos( &(res->pos[1]), *(uint16*)(res->pos));
+       *outbuflen += *(uint16*)(res->pos) * sizeof(WordEntryPos);
+   }
+   *outbuflen += SHORTALIGN(res->entry.len);
+
+   return res + 1 - a;
+}
+
+#define WAITWORD   1
+#define WAITENDWORD 2
+#define WAITNEXTCHAR   3
+#define WAITENDCMPLX   4
+#define WAITPOSINFO    5
+#define INPOSINFO  6
+#define WAITPOSDELIM   7
+
+#define RESIZEPRSBUF \
+do { \
+   if ( state->curpos - state->word + 1 >= state->len ) \
+   { \
+       int4 clen = state->curpos - state->word; \
+       state->len *= 2; \
+       state->word = (char*)repalloc( (void*)state->word, state->len ); \
+       state->curpos = state->word + clen; \
+   } \
+} while (0)
+
+int4
+gettoken_tsvector(TI_IN_STATE * state)
+{
+   int4        oldstate = 0;
+
+   state->curpos = state->word;
+   state->state = WAITWORD;
+   state->alen=0;
+
+   while (1)
+   {
+       if (state->state == WAITWORD)
+       {
+           if (*(state->prsbuf) == '\0')
+               return 0;
+           else if (*(state->prsbuf) == '\'')
+               state->state = WAITENDCMPLX;
+           else if (*(state->prsbuf) == '\\')
+           {
+               state->state = WAITNEXTCHAR;
+               oldstate = WAITENDWORD;
+           }
+           else if (state->oprisdelim && ISOPERATOR(*(state->prsbuf)))
+               elog(ERROR, "Syntax error");
+           else if (*(state->prsbuf) != ' ')
+           {
+               *(state->curpos) = *(state->prsbuf);
+               state->curpos++;
+               state->state = WAITENDWORD;
+           }
+       }
+       else if (state->state == WAITNEXTCHAR)
+       {
+           if (*(state->prsbuf) == '\0')
+               elog(ERROR, "There is no escaped character");
+           else
+           {
+               RESIZEPRSBUF;
+               *(state->curpos) = *(state->prsbuf);
+               state->curpos++;
+               state->state = oldstate;
+           }
+       }
+       else if (state->state == WAITENDWORD)
+       {
+           if (*(state->prsbuf) == '\\')
+           {
+               state->state = WAITNEXTCHAR;
+               oldstate = WAITENDWORD;
+           }
+           else if (*(state->prsbuf) == ' ' || *(state->prsbuf) == '\0' ||
+                    (state->oprisdelim && ISOPERATOR(*(state->prsbuf))))
+           {
+               RESIZEPRSBUF;
+               if (state->curpos == state->word)
+                   elog(ERROR, "Syntax error");
+               *(state->curpos) = '\0';
+               return 1; 
+           } else if ( *(state->prsbuf) == ':' ) {
+               if (state->curpos == state->word)
+                   elog(ERROR, "Syntax error");
+               *(state->curpos) = '\0';
+               if ( state->oprisdelim )
+                   return 1;
+               else
+                   state->state = INPOSINFO;
+           }
+           else
+           {
+               RESIZEPRSBUF;
+               *(state->curpos) = *(state->prsbuf);
+               state->curpos++;
+           }
+       }
+       else if (state->state == WAITENDCMPLX)
+       {
+           if (*(state->prsbuf) == '\'')
+           {
+               RESIZEPRSBUF;
+               *(state->curpos) = '\0';
+               if (state->curpos == state->word)
+                   elog(ERROR, "Syntax error");
+               if ( state->oprisdelim ) {
+                   state->prsbuf++;
+                   return 1;
+               } else
+                   state->state = WAITPOSINFO;
+           }
+           else if (*(state->prsbuf) == '\\')
+           {
+               state->state = WAITNEXTCHAR;
+               oldstate = WAITENDCMPLX;
+           }
+           else if (*(state->prsbuf) == '\0')
+               elog(ERROR, "Syntax error");
+           else
+           {
+               RESIZEPRSBUF;
+               *(state->curpos) = *(state->prsbuf);
+               state->curpos++;
+           }
+       } else if (state->state == WAITPOSINFO) {
+           if ( *(state->prsbuf) == ':' )
+               state->state=INPOSINFO;
+           else
+               return 1;
+       } else if (state->state == INPOSINFO) {
+           if ( isdigit(*(state->prsbuf)) ) {
+               if ( state->alen==0 ) {
+                   state->alen=4;
+                   state->pos = (WordEntryPos*)palloc( sizeof(WordEntryPos)*state->alen );
+                   *(uint16*)(state->pos)=0;
+               } else if ( *(uint16*)(state->pos) +1 >= state->alen ) {
+                   state->alen *= 2; 
+                   state->pos = (WordEntryPos*)repalloc( state->pos, sizeof(WordEntryPos)*state->alen );
+               }
+               (  *(uint16*)(state->pos) )++;
+               state->pos[ *(uint16*)(state->pos) ].pos = LIMITPOS(atoi(state->prsbuf));
+               if ( state->pos[ *(uint16*)(state->pos) ].pos == 0 )
+                   elog(ERROR,"Wrong position info");
+               state->pos[ *(uint16*)(state->pos) ].weight = 0;
+               state->state = WAITPOSDELIM;
+           } else
+               elog(ERROR,"Syntax error");
+       } else if (state->state == WAITPOSDELIM) {
+           if ( *(state->prsbuf) == ',' ) {
+               state->state = INPOSINFO;
+           } else if ( tolower(*(state->prsbuf)) == 'a' || *(state->prsbuf)=='*' ) {
+               if ( state->pos[ *(uint16*)(state->pos) ].weight )
+                   elog(ERROR,"Syntax error");
+               state->pos[ *(uint16*)(state->pos) ].weight = 3;
+           } else if ( tolower(*(state->prsbuf)) == 'b' ) {
+               if ( state->pos[ *(uint16*)(state->pos) ].weight )
+                   elog(ERROR,"Syntax error");
+               state->pos[ *(uint16*)(state->pos) ].weight = 2;
+           } else if ( tolower(*(state->prsbuf)) == 'c' ) {
+               if ( state->pos[ *(uint16*)(state->pos) ].weight )
+                   elog(ERROR,"Syntax error");
+               state->pos[ *(uint16*)(state->pos) ].weight = 1;
+           } else if ( tolower(*(state->prsbuf)) == 'd' ) {
+               if ( state->pos[ *(uint16*)(state->pos) ].weight )
+                   elog(ERROR,"Syntax error");
+               state->pos[ *(uint16*)(state->pos) ].weight = 0;
+           } else if ( isspace(*(state->prsbuf)) || *(state->prsbuf) == '\0' ) {
+               return 1;
+           } else if ( !isdigit(*(state->prsbuf)) )
+               elog(ERROR,"Syntax error");
+       } else
+           elog(ERROR, "Inner bug :(");
+       state->prsbuf++;
+   }
+
+   return 0;
+}
+
+Datum
+tsvector_in(PG_FUNCTION_ARGS)
+{
+   char       *buf = PG_GETARG_CSTRING(0);
+   TI_IN_STATE state;
+   WordEntryIN  *arr;
+   WordEntry  *inarr;
+   int4        len = 0,
+               totallen = 64;
+   tsvector       *in;
+   char       *tmpbuf,
+              *cur;
+   int4        i,
+               buflen = 256;
+
+   state.prsbuf = buf;
+   state.len = 32;
+   state.word = (char *) palloc(state.len);
+   state.oprisdelim = false;
+
+   arr = (WordEntryIN *) palloc(sizeof(WordEntryIN) * totallen);
+   cur = tmpbuf = (char *) palloc(buflen);
+   while (gettoken_tsvector(&state))
+   {
+       if (len >= totallen)
+       {
+           totallen *= 2;
+           arr = (WordEntryIN *) repalloc((void *) arr, sizeof(WordEntryIN) * totallen);
+       }
+       while ((cur - tmpbuf) + (state.curpos - state.word) >= buflen)
+       {
+           int4        dist = cur - tmpbuf;
+
+           buflen *= 2;
+           tmpbuf = (char *) repalloc((void *) tmpbuf, buflen);
+           cur = tmpbuf + dist;
+       }
+       if (state.curpos - state.word >= MAXSTRLEN)
+           elog(ERROR, "Word is too long");
+       arr[len].entry.len= state.curpos - state.word;
+       if (cur - tmpbuf > MAXSTRPOS)
+           elog(ERROR, "Too long value");
+       arr[len].entry.pos=cur - tmpbuf;
+       memcpy((void *) cur, (void *) state.word, arr[len].entry.len);
+       cur += arr[len].entry.len;
+       if ( state.alen ) {
+           arr[len].entry.haspos=1;
+           arr[len].pos = state.pos;
+       } else
+           arr[len].entry.haspos=0;
+       len++;
+   }
+   pfree(state.word);
+
+   if ( len > 0 )
+       len = uniqueentry(arr, len, tmpbuf, &buflen);
+   totallen = CALCDATASIZE(len, buflen);
+   in = (tsvector *) palloc(totallen);
+   memset(in,0,totallen);
+   in->len = totallen;
+   in->size = len;
+   cur = STRPTR(in);
+   inarr = ARRPTR(in);
+   for (i = 0; i < len; i++)
+   {
+       memcpy((void *) cur, (void *) &tmpbuf[arr[i].entry.pos], arr[i].entry.len);
+       arr[i].entry.pos=cur - STRPTR(in);
+       cur += SHORTALIGN(arr[i].entry.len);
+       if ( arr[i].entry.haspos ) {
+           memcpy( cur, arr[i].pos, (*(uint16*)arr[i].pos + 1) * sizeof(WordEntryPos));
+           cur +=  (*(uint16*)arr[i].pos + 1) * sizeof(WordEntryPos);
+           pfree( arr[i].pos ); 
+       }
+       memcpy( &(inarr[i]), &(arr[i].entry), sizeof(WordEntry) );
+   }
+   pfree(tmpbuf);
+   pfree(arr);
+   PG_RETURN_POINTER(in);
+}
+
+Datum
+tsvector_length(PG_FUNCTION_ARGS)
+{
+   tsvector       *in = (tsvector *) PG_DETOAST_DATUM(PG_GETARG_DATUM(0));
+   int4        ret = in->size;
+
+   PG_FREE_IF_COPY(in, 0);
+   PG_RETURN_INT32(ret);
+}
+
+Datum
+tsvector_out(PG_FUNCTION_ARGS)
+{
+   tsvector       *out = (tsvector *) PG_DETOAST_DATUM(PG_GETARG_DATUM(0));
+   char       *outbuf;
+   int4        i,
+               j,
+               lenbuf = 0, pp;
+   WordEntry  *ptr = ARRPTR(out);
+   char       *curin,
+              *curout;
+
+       lenbuf=out->size * 2 /* '' */ + out->size - 1 /* space */ + 2 /*\0*/;
+       for (i = 0; i < out->size; i++) {
+               lenbuf += ptr[i].len*2 /*for escape */;
+               if ( ptr[i].haspos )
+                       lenbuf += 7*POSDATALEN(out, &(ptr[i]));
+       }
+
+   curout = outbuf = (char *) palloc(lenbuf);
+   for (i = 0; i < out->size; i++)
+   {
+       curin = STRPTR(out)+ptr->pos;
+       if (i != 0)
+           *curout++ = ' ';
+       *curout++ = '\'';
+       j = ptr->len;
+       while (j--)
+       {
+           if (*curin == '\'')
+           {
+               int4        pos = curout - outbuf;
+
+               outbuf = (char *) repalloc((void *) outbuf, ++lenbuf);
+               curout = outbuf + pos;
+               *curout++ = '\\';
+           }
+           *curout++ = *curin++;
+       }
+       *curout++ = '\'';
+       if ( (pp=POSDATALEN(out,ptr)) != 0 ) {
+           WordEntryPos *wptr;
+           *curout++ = ':';
+           wptr=POSDATAPTR(out,ptr);
+           while(pp) {
+               sprintf(curout,"%d",wptr->pos);
+               curout=strchr(curout,'\0');
+               switch( wptr->weight ) {
+                   case 3:   *curout++ = 'A'; break;
+                   case 2:   *curout++ = 'B'; break;
+                   case 1:   *curout++ = 'C'; break;
+                   case 0: 
+                   default: break;
+               }
+               if ( pp>1 )     *curout++ = ',';
+               pp--; wptr++;
+           }
+       }
+       ptr++;
+   }
+   *curout='\0';
+   outbuf[lenbuf - 1] = '\0';
+   PG_FREE_IF_COPY(out, 0);
+   PG_RETURN_POINTER(outbuf);
+}
+
+static int
+compareWORD(const void *a, const void *b)
+{
+   if (((WORD *) a)->len == ((WORD *) b)->len) {
+       int res = strncmp(
+                      ((WORD *) a)->word,
+                      ((WORD *) b)->word,
+                      ((WORD *) b)->len);
+       if ( res==0 ) 
+           return ( ((WORD *) a)->pos.pos > ((WORD *) b)->pos.pos ) ? 1 : -1;
+       return res;
+   }
+   return (((WORD *) a)->len > ((WORD *) b)->len) ? 1 : -1;
+}
+
+static int
+uniqueWORD(WORD * a, int4 l)
+{
+   WORD       *ptr,
+              *res;
+   int tmppos;
+
+   if (l == 1) {
+       tmppos=LIMITPOS(a->pos.pos);
+       a->alen=2;
+       a->pos.apos=(uint16*)palloc( sizeof(uint16)*a->alen );
+       a->pos.apos[0]=1;
+       a->pos.apos[1]=tmppos;
+       return l;
+   }
+
+   res = a;
+   ptr = a + 1;
+
+   qsort((void *) a, l, sizeof(WORD), compareWORD);
+   tmppos=LIMITPOS(a->pos.pos);
+   a->alen=2;
+   a->pos.apos=(uint16*)palloc( sizeof(uint16)*a->alen );
+   a->pos.apos[0]=1;
+   a->pos.apos[1]=tmppos;
+
+   while (ptr - a < l)
+   {
+       if (!(ptr->len == res->len &&
+             strncmp(ptr->word, res->word, res->len) == 0))
+       {
+           res++;
+           res->len = ptr->len;
+           res->word = ptr->word;
+           tmppos=LIMITPOS(ptr->pos.pos);
+           res->alen=2;
+           res->pos.apos=(uint16*)palloc( sizeof(uint16)*res->alen );
+           res->pos.apos[0]=1;
+           res->pos.apos[1]=tmppos;
+       } else {
+           pfree(ptr->word);
+           if ( res->pos.apos[0] < MAXNUMPOS-1 && res->pos.apos[ res->pos.apos[0] ] != MAXENTRYPOS-1 ) {
+               if ( res->pos.apos[0]+1 >= res->alen ) {
+                   res->alen*=2;
+                   res->pos.apos=(uint16*)repalloc( res->pos.apos, sizeof(uint16)*res->alen );
+               }
+               res->pos.apos[ res->pos.apos[0]+1 ] = LIMITPOS(ptr->pos.pos);
+               res->pos.apos[0]++; 
+           }
+       }
+       ptr++;
+   }
+
+   return res + 1 - a;
+}
+
+/*
+ * make value of tsvector
+ */
+static tsvector *
+makevalue(PRSTEXT * prs)
+{
+   int4        i,j,
+               lenstr = 0,
+               totallen;
+   tsvector       *in;
+   WordEntry  *ptr;
+   char       *str,
+              *cur;
+
+   prs->curwords = uniqueWORD(prs->words, prs->curwords);
+   for (i = 0; i < prs->curwords; i++) {
+       lenstr += SHORTALIGN(prs->words[i].len);
+
+       if ( prs->words[i].alen )
+           lenstr += sizeof(uint16) + prs->words[i].pos.apos[0] * sizeof(WordEntryPos);
+   }
+
+   totallen = CALCDATASIZE(prs->curwords, lenstr);
+   in = (tsvector *) palloc(totallen);
+   memset(in,0,totallen);  
+   in->len = totallen;
+   in->size = prs->curwords;
+
+   ptr = ARRPTR(in);
+   cur = str = STRPTR(in);
+   for (i = 0; i < prs->curwords; i++)
+   {
+       ptr->len = prs->words[i].len;
+       if (cur - str > MAXSTRPOS)
+           elog(ERROR, "Value is too big");
+       ptr->pos= cur - str;
+       memcpy((void *) cur, (void *) prs->words[i].word, prs->words[i].len);
+       pfree(prs->words[i].word);
+       cur += SHORTALIGN(prs->words[i].len);
+       if ( prs->words[i].alen ) {
+           WordEntryPos *wptr;
+           
+           ptr->haspos=1;
+           *(uint16*)cur = prs->words[i].pos.apos[0];
+           wptr=POSDATAPTR(in,ptr);
+           for(j=0;j<*(uint16*)cur;j++) {
+               wptr[j].weight=0;
+               wptr[j].pos=prs->words[i].pos.apos[j+1];
+           }
+           cur += sizeof(uint16) + prs->words[i].pos.apos[0] * sizeof(WordEntryPos);
+           pfree(prs->words[i].pos.apos);
+       } else
+           ptr->haspos=0;
+       ptr++;
+   }
+   pfree(prs->words);
+   return in;
+}
+
+
+Datum
+to_tsvector(PG_FUNCTION_ARGS)
+{
+   text       *in = PG_GETARG_TEXT_P(1);
+   PRSTEXT     prs;
+   tsvector       *out = NULL;
+   TSCfgInfo *cfg=findcfg(PG_GETARG_INT32(0)); 
+
+   prs.lenwords = 32;
+   prs.curwords = 0;
+   prs.pos = 0;
+   prs.words = (WORD *) palloc(sizeof(WORD) * prs.lenwords);
+   
+   parsetext_v2(cfg, &prs, VARDATA(in), VARSIZE(in) - VARHDRSZ);
+   PG_FREE_IF_COPY(in, 1);
+
+   if (prs.curwords)
+       out = makevalue(&prs);
+   else {
+       pfree(prs.words);
+       out = palloc(CALCDATASIZE(0,0));
+       out->len = CALCDATASIZE(0,0);
+       out->size = 0;
+   } 
+   PG_RETURN_POINTER(out);
+}
+
+Datum
+to_tsvector_name(PG_FUNCTION_ARGS) {
+   text       *cfg=PG_GETARG_TEXT_P(0);
+   Datum res = DirectFunctionCall3(
+       to_tsvector,
+       Int32GetDatum( name2id_cfg( cfg ) ),
+       PG_GETARG_DATUM(1),
+       (Datum)0
+   );
+   PG_FREE_IF_COPY(cfg,0);
+   PG_RETURN_DATUM(res);   
+}
+
+Datum
+to_tsvector_current(PG_FUNCTION_ARGS) {
+   Datum res = DirectFunctionCall3(
+       to_tsvector,
+       Int32GetDatum( get_currcfg() ),
+       PG_GETARG_DATUM(0),
+       (Datum)0
+   );
+   PG_RETURN_DATUM(res);   
+}
+
+static Oid
+findFunc(char *fname) {
+   FuncCandidateList clist,ptr;
+   Oid funcid = InvalidOid;
+   List *names=makeList1(makeString(fname));
+
+   ptr = clist = FuncnameGetCandidates(names, 1);
+   freeList(names);
+
+   if ( !ptr )
+       return funcid;
+
+   while(ptr) {
+       if ( ptr->args[0] == TEXTOID && funcid == InvalidOid )
+           funcid=ptr->oid;
+       clist=ptr->next;
+       pfree(ptr);
+       ptr=clist;
+   }
+
+   return funcid;
+}
+
+/*
+ * Trigger
+ */
+Datum
+tsearch2(PG_FUNCTION_ARGS)
+{
+   TriggerData *trigdata;
+   Trigger    *trigger;
+   Relation    rel;
+   HeapTuple   rettuple = NULL;
+   TSCfgInfo *cfg=findcfg(get_currcfg()); 
+   int         numidxattr,
+               i;
+   PRSTEXT     prs;
+   Datum       datum = (Datum) 0;
+   Oid     funcoid = InvalidOid;
+
+   if (!CALLED_AS_TRIGGER(fcinfo))
+       elog(ERROR, "TSearch: Not fired by trigger manager");
+
+   trigdata = (TriggerData *) fcinfo->context;
+   if (TRIGGER_FIRED_FOR_STATEMENT(trigdata->tg_event))
+       elog(ERROR, "TSearch: Can't process STATEMENT events");
+   if (TRIGGER_FIRED_AFTER(trigdata->tg_event))
+       elog(ERROR, "TSearch: Must be fired BEFORE event");
+
+   if (TRIGGER_FIRED_BY_INSERT(trigdata->tg_event))
+       rettuple = trigdata->tg_trigtuple;
+   else if (TRIGGER_FIRED_BY_UPDATE(trigdata->tg_event))
+       rettuple = trigdata->tg_newtuple;
+   else
+       elog(ERROR, "TSearch: Unknown event");
+
+   trigger = trigdata->tg_trigger;
+   rel = trigdata->tg_relation;
+
+   if (trigger->tgnargs < 2)
+       elog(ERROR, "TSearch: format tsearch2(tsvector_field, text_field1,...)");
+
+   numidxattr = SPI_fnumber(rel->rd_att, trigger->tgargs[0]);
+   if (numidxattr == SPI_ERROR_NOATTRIBUTE)
+       elog(ERROR, "TSearch: Can not find tsvector_field");
+
+   prs.lenwords = 32;
+   prs.curwords = 0;
+   prs.pos = 0;
+   prs.words = (WORD *) palloc(sizeof(WORD) * prs.lenwords);
+
+   /* find all words in indexable column */
+   for (i = 1; i < trigger->tgnargs; i++)
+   {
+       int         numattr;
+       Oid         oidtype;
+       Datum       txt_toasted;
+       bool        isnull;
+       text       *txt;
+
+       numattr = SPI_fnumber(rel->rd_att, trigger->tgargs[i]);
+       if (numattr == SPI_ERROR_NOATTRIBUTE)
+       {
+           funcoid=findFunc(trigger->tgargs[i]);
+           if ( funcoid==InvalidOid )
+               elog(ERROR,"TSearch: can't find function or field '%s'",trigger->tgargs[i]);
+           continue;
+       }
+       oidtype = SPI_gettypeid(rel->rd_att, numattr);
+       /* We assume char() and varchar() are binary-equivalent to text */
+       if (!(oidtype == TEXTOID ||
+             oidtype == VARCHAROID ||
+             oidtype == BPCHAROID))
+       {
+           elog(WARNING, "TSearch: '%s' is not of character type",
+                trigger->tgargs[i]);
+           continue;
+       }
+       txt_toasted = SPI_getbinval(rettuple, rel->rd_att, numattr, &isnull);
+       if (isnull)
+           continue;
+
+       if ( funcoid!=InvalidOid ) {
+           text *txttmp = (text *) DatumGetPointer( OidFunctionCall1(
+               funcoid,
+               PointerGetDatum(txt_toasted)
+           ));
+           txt = (text *) DatumGetPointer(PG_DETOAST_DATUM(PointerGetDatum(txttmp)));
+           if ( txt == txttmp )
+               txt_toasted = PointerGetDatum(txt);
+       } else
+            txt = (text *) DatumGetPointer(PG_DETOAST_DATUM(PointerGetDatum(txt_toasted)));
+
+       parsetext_v2(cfg, &prs, VARDATA(txt), VARSIZE(txt) - VARHDRSZ);
+       if (txt != (text*)DatumGetPointer(txt_toasted) )
+           pfree(txt);
+   }
+
+   /* make tsvector value */
+   if (prs.curwords)
+   {
+       datum = PointerGetDatum(makevalue(&prs));
+       rettuple = SPI_modifytuple(rel, rettuple, 1, &numidxattr,
+                                  &datum, NULL);
+       pfree(DatumGetPointer(datum));
+   }
+   else
+   {
+       tsvector *out = palloc(CALCDATASIZE(0,0));
+       out->len = CALCDATASIZE(0,0);
+       out->size = 0;
+       datum = PointerGetDatum(out);
+       pfree(prs.words);
+       rettuple = SPI_modifytuple(rel, rettuple, 1, &numidxattr,
+                                  &datum, NULL);
+   }
+
+   if (rettuple == NULL)
+       elog(ERROR, "TSearch: %d returned by SPI_modifytuple", SPI_result);
+
+   return PointerGetDatum(rettuple);
+}


diff --git a/contrib/tsearch2/tsvector.h b/contrib/tsearch2/tsvector.h

new file mode 100644 (file)

index 0000000..31e6a4b


--- /dev/null
+++ b/contrib/tsearch2/tsvector.h
@@ -0,0 +1,71 @@
+#ifndef __TXTIDX_H__
+#define __TXTIDX_H__
+
+/*
+#define TXTIDX_DEBUG
+*/
+
+#include "postgres.h"
+
+#include "access/gist.h"
+#include "access/itup.h"
+#include "utils/elog.h"
+#include "utils/palloc.h"
+#include "utils/builtins.h"
+#include "storage/bufpage.h"
+
+typedef struct {
+   uint32
+       haspos:1,
+       len:11, /* MAX 2Kb */
+       pos:20; /* MAX 1Mb */
+}  WordEntry;
+#define MAXSTRLEN ( 1<<11 )
+#define MAXSTRPOS ( 1<<20 )
+
+typedef struct {
+   uint16
+       weight:2,
+       pos:14;
+} WordEntryPos;
+#define MAXENTRYPOS    (1<<14)
+#define MAXNUMPOS  256
+#define LIMITPOS(x)    ( ( (x) >= MAXENTRYPOS ) ? (MAXENTRYPOS-1) : (x) )
+
+typedef struct
+{
+   int4        len;
+   int4        size;
+   char        data[1];
+}  tsvector;
+
+#define DATAHDRSIZE (sizeof(int4)*2)
+#define CALCDATASIZE(x, lenstr) ( x * sizeof(WordEntry) + DATAHDRSIZE + lenstr )
+#define ARRPTR(x)  ( (WordEntry*) ( (char*)x + DATAHDRSIZE ) )
+#define STRPTR(x)  ( (char*)x + DATAHDRSIZE + ( sizeof(WordEntry) * ((tsvector*)x)->size ) )
+#define STRSIZE(x) ( ((tsvector*)x)->len - DATAHDRSIZE - ( sizeof(WordEntry) * ((tsvector*)x)->size ) )
+#define _POSDATAPTR(x,e)   (STRPTR(x)+((WordEntry*)(e))->pos+SHORTALIGN(((WordEntry*)(e))->len))
+#define POSDATALEN(x,e) ( ( ((WordEntry*)(e))->haspos ) ? (*(uint16*)_POSDATAPTR(x,e)) : 0 ) 
+#define POSDATAPTR(x,e)    ( (WordEntryPos*)( _POSDATAPTR(x,e)+sizeof(uint16) ) )
+
+
+typedef struct {
+   WordEntry   entry;
+   WordEntryPos    *pos;
+}  WordEntryIN;
+
+typedef struct
+{
+   char       *prsbuf;
+   char       *word;
+   char       *curpos;
+   int4        len;
+   int4        state;
+   int4        alen;
+   WordEntryPos    *pos;
+   bool        oprisdelim;
+}  TI_IN_STATE;
+
+int4       gettoken_tsvector(TI_IN_STATE * state);
+
+#endif


diff --git a/contrib/tsearch2/tsvector_op.c b/contrib/tsearch2/tsvector_op.c

new file mode 100644 (file)

index 0000000..3f38014


--- /dev/null
+++ b/contrib/tsearch2/tsvector_op.c
@@ -0,0 +1,264 @@
+/*
+ * Operations for tsvector type
+ * Teodor Sigaev 
+ */
+#include "postgres.h"
+
+#include "access/gist.h"
+#include "access/itup.h"
+#include "utils/elog.h"
+#include "utils/palloc.h"
+#include "utils/builtins.h"
+#include "storage/bufpage.h"
+#include "executor/spi.h"
+#include "commands/trigger.h"
+#include "nodes/pg_list.h"
+#include "catalog/namespace.h"
+
+#include "utils/pg_locale.h"
+
+#include              /* tolower */
+#include "tsvector.h"
+#include "query.h"
+#include "ts_cfg.h"
+#include "common.h"
+
+PG_FUNCTION_INFO_V1(strip);
+Datum      strip(PG_FUNCTION_ARGS);
+
+PG_FUNCTION_INFO_V1(setweight);
+Datum      setweight(PG_FUNCTION_ARGS);
+
+PG_FUNCTION_INFO_V1(concat);
+Datum      concat(PG_FUNCTION_ARGS);
+
+Datum
+strip(PG_FUNCTION_ARGS)
+{
+   tsvector       *in = (tsvector *) PG_DETOAST_DATUM(PG_GETARG_DATUM(0));
+   tsvector    *out;
+   int i,len=0;
+   WordEntry *arrin=ARRPTR(in), *arrout;
+   char *cur;
+
+   for(i=0;isize;i++) 
+       len += SHORTALIGN( arrin[i].len );
+
+   len = CALCDATASIZE(in->size, len);
+   out=(tsvector*)palloc(len);
+   memset(out,0,len);
+   out->len=len;
+   out->size=in->size;
+   arrout=ARRPTR(out);
+   cur=STRPTR(out);
+   for(i=0;isize;i++) {
+       memcpy(cur, STRPTR(in)+arrin[i].pos, arrin[i].len);
+       arrout[i].haspos = 0;
+       arrout[i].len = arrin[i].len;
+       arrout[i].pos = cur - STRPTR(out);
+       cur += SHORTALIGN( arrout[i].len );
+   }
+       
+   PG_FREE_IF_COPY(in, 0);
+   PG_RETURN_POINTER(out);
+}
+
+Datum
+setweight(PG_FUNCTION_ARGS)
+{
+   tsvector       *in = (tsvector *) PG_DETOAST_DATUM(PG_GETARG_DATUM(0));
+   char       cw = PG_GETARG_CHAR(1);
+   tsvector    *out;
+   int i,j;
+   WordEntry *entry;
+   WordEntryPos *p;
+   int w=0;
+
+   switch(tolower(cw)) {
+       case 'a': w=3; break;
+       case 'b': w=2; break;
+       case 'c': w=1; break;
+       case 'd': w=0; break;
+       default: elog(ERROR,"Unknown weight");
+   }
+
+   out=(tsvector*)palloc(in->len);
+   memcpy(out,in,in->len);
+   entry=ARRPTR(out);
+   i=out->size;    
+   while(i--) {
+       if ( (j=POSDATALEN(out,entry)) != 0 ) {
+           p=POSDATAPTR(out,entry);
+           while(j--) {
+               p->weight=w;
+               p++;
+           }
+       }
+       entry++;
+   }
+       
+   PG_FREE_IF_COPY(in, 0);
+   PG_RETURN_POINTER(out);
+}
+
+static int
+compareEntry(char *ptra, WordEntry* a, char *ptrb, WordEntry* b)
+{
+        if ( a->len == b->len)
+        {
+                return strncmp(
+                                           ptra + a->pos,
+                                           ptrb + b->pos,
+                                           a->len);
+        }
+        return ( a->len > b->len ) ? 1 : -1;
+}
+
+static int4
+add_pos(tsvector *src, WordEntry *srcptr, tsvector *dest, WordEntry *destptr, int4 maxpos ) {
+   uint16 *clen = (uint16*)_POSDATAPTR(dest,destptr);
+   int i;
+   uint16 slen = POSDATALEN(src, srcptr), startlen;
+   WordEntryPos *spos=POSDATAPTR(src, srcptr), *dpos=POSDATAPTR(dest,destptr);
+
+   if ( ! destptr->haspos ) 
+       *clen=0;
+
+   startlen = *clen;
+   for(i=0; i
+       dpos[ *clen ].weight = spos[i].weight; 
+       dpos[ *clen ].pos    = LIMITPOS(spos[i].pos + maxpos);
+       (*clen)++;
+   }
+
+   if ( *clen != startlen )
+       destptr->haspos=1; 
+   return  *clen - startlen;
+}
+
+
+Datum
+concat(PG_FUNCTION_ARGS) {
+   tsvector       *in1 = (tsvector *) PG_DETOAST_DATUM(PG_GETARG_DATUM(0));
+   tsvector       *in2 = (tsvector *) PG_DETOAST_DATUM(PG_GETARG_DATUM(1));
+   tsvector       *out;
+   WordEntry *ptr;
+   WordEntry *ptr1,*ptr2;
+   WordEntryPos *p;
+   int maxpos=0,i,j,i1,i2;
+   char *cur;
+   char *data,*data1,*data2;
+
+   ptr=ARRPTR(in1);
+   i=in1->size;
+   while(i--) {
+       if ( (j=POSDATALEN(in1,ptr)) != 0 ) {
+           p=POSDATAPTR(in1,ptr);
+           while(j--) {
+               if ( p->pos > maxpos ) 
+                   maxpos = p->pos;
+               p++;
+           }
+       }
+       ptr++;
+   }
+   
+   ptr1=ARRPTR(in1); ptr2=ARRPTR(in2);
+   data1=STRPTR(in1); data2=STRPTR(in2);
+   i1=in1->size;   i2=in2->size;
+   out=(tsvector*)palloc( in1->len + in2->len );
+   memset(out,0,in1->len + in2->len);
+   out->len = in1->len + in2->len;
+   out->size = in1->size + in2->size;
+   data=cur=STRPTR(out);
+   ptr=ARRPTR(out);
+   while( i1 && i2 ) {
+       int cmp=compareEntry(data1,ptr1,data2,ptr2);
+       if ( cmp < 0 ) { /* in1 first */
+           ptr->haspos = ptr1->haspos;
+           ptr->len = ptr1->len;
+           memcpy( cur, data1 + ptr1->pos, ptr1->len );
+           ptr->pos = cur - data; 
+           cur+=SHORTALIGN(ptr1->len);
+           if ( ptr->haspos ) {
+               memcpy(cur, _POSDATAPTR(in1, ptr1), POSDATALEN(in1, ptr1)*sizeof(WordEntryPos) + sizeof(uint16));
+               cur+=POSDATALEN(in1, ptr1)*sizeof(WordEntryPos) + sizeof(uint16);
+           }
+           ptr++; ptr1++; i1--;
+       } else if ( cmp>0 ) { /* in2 first */ 
+           ptr->haspos = ptr2->haspos;
+           ptr->len = ptr2->len;
+           memcpy( cur, data2 + ptr2->pos, ptr2->len );
+           ptr->pos = cur - data; 
+           cur+=SHORTALIGN(ptr2->len);
+           if ( ptr->haspos ) {
+               int addlen = add_pos(in2, ptr2, out, ptr, maxpos );
+               if ( addlen == 0 )
+                   ptr->haspos=0;
+               else
+                   cur += addlen*sizeof(WordEntryPos) + sizeof(uint16); 
+           }
+           ptr++; ptr2++; i2--;
+       } else {
+           ptr->haspos = ptr1->haspos | ptr2->haspos;
+           ptr->len = ptr1->len;
+           memcpy( cur, data1 + ptr1->pos, ptr1->len );
+           ptr->pos = cur - data; 
+           cur+=SHORTALIGN(ptr1->len);
+           if ( ptr->haspos ) {
+               if ( ptr1->haspos ) {
+                   memcpy(cur, _POSDATAPTR(in1, ptr1), POSDATALEN(in1, ptr1)*sizeof(WordEntryPos) + sizeof(uint16));
+                   cur+=POSDATALEN(in1, ptr1)*sizeof(WordEntryPos) + sizeof(uint16);
+                   if ( ptr2->haspos )
+                       cur += add_pos(in2, ptr2, out, ptr, maxpos )*sizeof(WordEntryPos);
+               } else if ( ptr2->haspos ) {
+                   int addlen = add_pos(in2, ptr2, out, ptr, maxpos );
+                   if ( addlen == 0 )
+                       ptr->haspos=0;
+                   else
+                       cur += addlen*sizeof(WordEntryPos) + sizeof(uint16); 
+               }
+           }
+           ptr++; ptr1++; ptr2++; i1--; i2--;
+       }
+   }
+
+   while(i1) {
+       ptr->haspos = ptr1->haspos;
+       ptr->len = ptr1->len;
+       memcpy( cur, data1 + ptr1->pos, ptr1->len );
+       ptr->pos = cur - data; 
+       cur+=SHORTALIGN(ptr1->len);
+       if ( ptr->haspos ) {
+           memcpy(cur, _POSDATAPTR(in1, ptr1), POSDATALEN(in1, ptr1)*sizeof(WordEntryPos) + sizeof(uint16));
+           cur+=POSDATALEN(in1, ptr1)*sizeof(WordEntryPos) + sizeof(uint16);
+       }
+       ptr++; ptr1++; i1--;
+   }
+
+   while(i2) {
+       ptr->haspos = ptr2->haspos;
+       ptr->len = ptr2->len;
+       memcpy( cur, data2 + ptr2->pos, ptr2->len );
+       ptr->pos = cur - data; 
+       cur+=SHORTALIGN(ptr2->len);
+       if ( ptr->haspos ) {
+           int addlen = add_pos(in2, ptr2, out, ptr, maxpos );
+           if ( addlen == 0 )
+               ptr->haspos=0;
+           else
+               cur += addlen*sizeof(WordEntryPos) + sizeof(uint16); 
+       }
+       ptr++; ptr2++; i2--;
+   }
+   
+   out->size=ptr-ARRPTR(out);
+   out->len = CALCDATASIZE( out->size, cur-data );
+   if ( data != STRPTR(out) )
+       memmove( STRPTR(out), data, cur-data );
+
+   PG_FREE_IF_COPY(in1, 0);
+   PG_FREE_IF_COPY(in2, 1);
+   PG_RETURN_POINTER(out);
+}
+


diff --git a/contrib/tsearch2/untsearch.sql.in b/contrib/tsearch2/untsearch.sql.in

new file mode 100644 (file)

index 0000000..a4fe145


--- /dev/null
+++ b/contrib/tsearch2/untsearch.sql.in
@@ -0,0 +1,62 @@
+BEGIN;
+
+--Be careful !!!
+--script drops all indices, triggers and columns with types defined
+--in tsearch2.sql
+
+
+DROP OPERATOR CLASS gist_tsvector_ops USING gist CASCADE;
+
+
+DROP OPERATOR || (tsvector, tsvector);
+DROP OPERATOR @@ (tsvector, tsquery);
+DROP OPERATOR @@ (tsquery, tsvector);
+
+DROP AGGREGATE stat(tsvector);
+
+DROP TABLE pg_ts_dict;
+DROP TABLE pg_ts_parser;
+DROP TABLE pg_ts_cfg;
+DROP TABLE pg_ts_cfgmap;
+
+DROP TYPE tokentype CASCADE;
+DROP TYPE tokenout CASCADE;
+DROP TYPE tsvector CASCADE;
+DROP TYPE tsquery CASCADE;
+DROP TYPE gtsvector CASCADE;
+DROP TYPE tsstat CASCADE;
+DROP TYPE statinfo CASCADE;
+
+DROP FUNCTION lexize(oid, text) ;
+DROP FUNCTION lexize(text, text);
+DROP FUNCTION lexize(text);
+DROP FUNCTION set_curdict(int);
+DROP FUNCTION set_curdict(text);
+DROP FUNCTION dex_init(text);
+DROP FUNCTION dex_lexize(internal,internal,int4);
+DROP FUNCTION snb_en_init(text);
+DROP FUNCTION snb_lexize(internal,internal,int4);
+DROP FUNCTION snb_ru_init(text);
+DROP FUNCTION spell_init(text);
+DROP FUNCTION spell_lexize(internal,internal,int4);
+DROP FUNCTION syn_init(text);
+DROP FUNCTION syn_lexize(internal,internal,int4);
+DROP FUNCTION set_curprs(int);
+DROP FUNCTION set_curprs(text);
+DROP FUNCTION prsd_start(internal,int4);
+DROP FUNCTION prsd_getlexeme(internal,internal,internal);
+DROP FUNCTION prsd_end(internal);
+DROP FUNCTION prsd_lextype(internal);
+DROP FUNCTION prsd_headline(internal,internal,internal);
+DROP FUNCTION set_curcfg(int);
+DROP FUNCTION set_curcfg(text);
+DROP FUNCTION show_curcfg();
+DROP FUNCTION gtsvector_compress(internal);
+DROP FUNCTION gtsvector_decompress(internal);
+DROP FUNCTION gtsvector_penalty(internal,internal,internal);
+DROP FUNCTION gtsvector_picksplit(internal, internal);
+DROP FUNCTION gtsvector_union(bytea, internal);
+DROP FUNCTION reset_tsearch();
+DROP FUNCTION tsearch2() CASCADE;
+
+END;


diff --git a/contrib/tsearch2/wordparser/deflex.c b/contrib/tsearch2/wordparser/deflex.c

new file mode 100644 (file)

index 0000000..ea596c5


--- /dev/null
+++ b/contrib/tsearch2/wordparser/deflex.c
@@ -0,0 +1,56 @@
+#include "deflex.h"
+
+const char *lex_descr[]={
+   "",
+   "Latin word",
+   "Non-latin word",
+   "Word",
+   "Email",
+   "URL",
+   "Host",
+   "Scientific notation",
+   "VERSION",
+   "Part of hyphenated word",
+   "Non-latin part of hyphenated word",
+   "Latin part of hyphenated word",
+   "Space symbols",
+   "HTML Tag",
+   "HTTP head",
+   "Hyphenated word",
+   "Latin hyphenated word",
+   "Non-latin hyphenated word",
+   "URI",
+   "File or path name",
+   "Decimal notation",
+   "Signed integer",
+   "Unsigned integer",
+   "HTML Entity"
+};
+
+const char *tok_alias[]={
+   "",
+   "lword",
+   "nlword",
+   "word",
+   "email",
+   "url",
+   "host",
+   "sfloat",
+   "version",
+   "part_hword",
+   "nlpart_hword",
+   "lpart_hword",
+   "blank",
+   "tag",
+   "http",
+   "hword",
+   "lhword",
+   "nlhword",
+   "uri",
+   "file",
+   "float",
+   "int",
+   "uint",
+   "entity"
+};
+


diff --git a/contrib/tsearch2/wordparser/deflex.h b/contrib/tsearch2/wordparser/deflex.h

new file mode 100644 (file)

index 0000000..651d1f9


--- /dev/null
+++ b/contrib/tsearch2/wordparser/deflex.h
@@ -0,0 +1,34 @@
+#ifndef __DEFLEX_H__
+#define __DEFLEX_H__
+
+/* rememder !!!! */
+#define LASTNUM        23
+
+#define LATWORD        1
+#define CYRWORD        2
+#define UWORD      3
+#define EMAIL      4
+#define FURL       5
+#define HOST       6
+#define SCIENTIFIC 7
+#define VERSIONNUMBER  8
+#define PARTHYPHENWORD 9
+#define CYRPARTHYPHENWORD  10
+#define LATPARTHYPHENWORD  11
+#define SPACE      12
+#define TAG            13
+#define HTTP       14
+#define HYPHENWORD 15
+#define LATHYPHENWORD  16
+#define CYRHYPHENWORD  17
+#define URI        18
+#define FILEPATH   19
+#define DECIMAL        20
+#define SIGNEDINT  21
+#define UNSIGNEDINT 22
+#define HTMLENTITY 23
+
+extern const char *lex_descr[];
+extern const char *tok_alias[];
+
+#endif


diff --git a/contrib/tsearch2/wordparser/parser.h b/contrib/tsearch2/wordparser/parser.h

new file mode 100644 (file)

index 0000000..55cf005


--- /dev/null
+++ b/contrib/tsearch2/wordparser/parser.h
@@ -0,0 +1,11 @@
+#ifndef __PARSER_H__
+#define __PARSER_H__
+
+char      *token;
+int            tokenlen;
+int            tsearch2_yylex(void);
+void       start_parse_str(char *, int);
+void       start_parse_fh(FILE *, int);
+void       end_parse(void);
+
+#endif


diff --git a/contrib/tsearch2/wordparser/parser.l b/contrib/tsearch2/wordparser/parser.l

new file mode 100644 (file)

index 0000000..49824f5


--- /dev/null
+++ b/contrib/tsearch2/wordparser/parser.l
@@ -0,0 +1,346 @@
+%{
+#include "postgres.h"
+
+#include "deflex.h"
+#include "parser.h"
+#include "common.h"
+
+/* Avoid exit() on fatal scanner errors */
+#define fprintf(file, fmt, msg)  ts_error(ERROR, fmt, msg)
+
+/* postgres allocation function */
+#define free    pfree
+#define malloc  palloc
+#define realloc repalloc
+
+#ifdef strdup
+#undef strdup
+#endif
+#define strdup  pstrdup
+
+char *token = NULL;  /* pointer to token */
+char *s     = NULL;  /* to return WHOLE hyphenated-word */
+
+YY_BUFFER_STATE buf = NULL; /* buffer to parse; it need for parse from string */
+
+int lrlimit = -1;  /* for limiting read from filehandle ( -1 - unlimited read ) */
+int bytestoread = 0;   /* for limiting read from filehandle */
+
+/* redefine macro for read limited length */
+#define YY_INPUT(buf,result,max_size) \
+   if ( yy_current_buffer->yy_is_interactive ) { \
+                int c = '*', n; \
+                for ( n = 0; n < max_size && \
+                             (c = getc( tsearch2_yyin )) != EOF && c != '\n'; ++n ) \
+                        buf[n] = (char) c; \
+                if ( c == '\n' ) \
+                        buf[n++] = (char) c; \
+                if ( c == EOF && ferror( tsearch2_yyin ) ) \
+                        YY_FATAL_ERROR( "input in flex scanner failed" ); \
+                result = n; \
+        }  else { \
+       if ( lrlimit == 0 ) \
+           result=YY_NULL; \
+       else { \
+           if ( lrlimit>0 ) { \
+               bytestoread = ( lrlimit > max_size ) ? max_size : lrlimit; \
+               lrlimit -= bytestoread; \
+           } else \
+               bytestoread = max_size; \
+               if ( ((result = fread( buf, 1, bytestoread, tsearch2_yyin )) == 0) \
+                       && ferror( tsearch2_yyin ) ) \
+                       YY_FATAL_ERROR( "input in flex scanner failed" ); \
+       } \
+   }
+
+%}
+
+%option 8bit
+%option never-interactive
+%option nounput
+%option noyywrap
+
+/* parser's state for parsing hyphenated-word */
+%x DELIM  
+/* parser's state for parsing URL*/
+%x URL  
+%x SERVER  
+
+/* parser's state for parsing TAGS */
+%x INTAG
+%x QINTAG
+%x INCOMMENT
+%x INSCRIPT
+
+/* cyrillic koi8 char */
+CYRALNUM   [0-9\200-\377]
+CYRALPHA   [\200-\377]
+ALPHA      [a-zA-Z\200-\377]
+ALNUM      [0-9a-zA-Z\200-\377]
+
+
+HOSTNAME   ([-_[:alnum:]]+\.)+[[:alpha:]]+
+URI        [-_[:alnum:]/%,\.;=&?#]+
+
+%%
+
+"<"[Ss][Cc][Rr][Ii][Pp][Tt] { BEGIN INSCRIPT; }
+
+"" {
+   BEGIN INITIAL; 
+   *tsearch2_yytext=' '; *(tsearch2_yytext+1) = '\0'; 
+   token = tsearch2_yytext;
+   tokenlen = tsearch2_yyleng;
+   return SPACE;
+}
+
+""   { 
+   BEGIN INITIAL;
+   *tsearch2_yytext=' '; *(tsearch2_yytext+1) = '\0'; 
+   token = tsearch2_yytext;
+   tokenlen = tsearch2_yyleng;
+   return SPACE;
+}
+
+
+"<"[\![:alpha:]]   { BEGIN INTAG; }
+
+"
+
+"\""    { BEGIN QINTAG; }
+
+"\\\"" ;
+
+"\""   { BEGIN INTAG; }
+
+">" { 
+   BEGIN INITIAL;
+   token = tsearch2_yytext;
+   *tsearch2_yytext=' '; 
+   token = tsearch2_yytext;
+   tokenlen = 1;
+   return TAG;
+}
+
+.|\n  ;
+
+\&(quot|amp|nbsp|lt|gt)\;   {
+   token = tsearch2_yytext;
+   tokenlen = tsearch2_yyleng;
+   return HTMLENTITY;
+}
+
+\&\#[0-9][0-9]?[0-9]?\; {
+   token = tsearch2_yytext;
+   tokenlen = tsearch2_yyleng;
+   return HTMLENTITY;
+}
+ 
+[-_\.[:alnum:]]+@{HOSTNAME}  /* Emails */ { 
+   token = tsearch2_yytext; 
+   tokenlen = tsearch2_yyleng;
+   return EMAIL; 
+}
+
+[+-]?[0-9]+(\.[0-9]+)?[eEdD][+-]?[0-9]+  /* float */   { 
+   token = tsearch2_yytext; 
+   tokenlen = tsearch2_yyleng;
+   return SCIENTIFIC; 
+}
+
+[0-9]+\.[0-9]+\.[0-9\.]*[0-9] {
+   token = tsearch2_yytext;
+   tokenlen = tsearch2_yyleng;
+   return VERSIONNUMBER;
+}
+
+[+-]?[0-9]+\.[0-9]+ {
+   token = tsearch2_yytext;
+   tokenlen = tsearch2_yyleng;
+   return DECIMAL;
+}
+
+[+-][0-9]+ { 
+   token = tsearch2_yytext; 
+   tokenlen = tsearch2_yyleng;
+   return SIGNEDINT; 
+}
+
+[0-9]+ { 
+   token = tsearch2_yytext; 
+   tokenlen = tsearch2_yyleng;
+   return UNSIGNEDINT; 
+}
+
+http"://"        { 
+   BEGIN URL; 
+   token = tsearch2_yytext;
+   tokenlen = tsearch2_yyleng;
+   return HTTP;
+}
+
+ftp"://"        { 
+   BEGIN URL; 
+   token = tsearch2_yytext;
+   tokenlen = tsearch2_yyleng;
+   return HTTP;
+}
+
+{HOSTNAME}[/:]{URI} { 
+   BEGIN SERVER;
+   if (s) { free(s); s=NULL; } 
+   s = strdup( tsearch2_yytext ); 
+   tokenlen = tsearch2_yyleng;
+   yyless( 0 ); 
+   token = s;
+   return FURL;
+}
+
+{HOSTNAME} {
+   token = tsearch2_yytext; 
+   tokenlen = tsearch2_yyleng;
+   return HOST;
+}
+
+[/:]{URI}  {
+   token = tsearch2_yytext;
+   tokenlen = tsearch2_yyleng;
+   return URI;
+}
+
+[[:alnum:]\./_-]+"/"[[:alnum:]\./_-]+ {
+   token = tsearch2_yytext;
+   tokenlen = tsearch2_yyleng;
+   return FILEPATH;
+}
+
+({CYRALPHA}+-)+{CYRALPHA}+ /* composite-word */    {
+   BEGIN DELIM;
+   if (s) { free(s); s=NULL; } 
+   s = strdup( tsearch2_yytext );
+   tokenlen = tsearch2_yyleng;
+   yyless( 0 );
+   token = s;
+   return CYRHYPHENWORD;
+}
+
+([[:alpha:]]+-)+[[:alpha:]]+ /* composite-word */  {
+    BEGIN DELIM;
+   if (s) { free(s); s=NULL; } 
+   s = strdup( tsearch2_yytext );
+   tokenlen = tsearch2_yyleng;
+   yyless( 0 );
+   token = s;
+   return LATHYPHENWORD;
+}
+
+({ALNUM}+-)+{ALNUM}+ /* composite-word */  {
+   BEGIN DELIM;
+   if (s) { free(s); s=NULL; } 
+   s = strdup( tsearch2_yytext );
+   tokenlen = tsearch2_yyleng;
+   yyless( 0 );
+   token = s;
+   return HYPHENWORD;
+}
+
+[0-9]+\.[0-9]+\.[0-9\.]*[0-9] {
+   token = tsearch2_yytext;
+   tokenlen = tsearch2_yyleng;
+   return VERSIONNUMBER;
+}
+
+\+?[0-9]+\.[0-9]+ {
+   token = tsearch2_yytext;
+   tokenlen = tsearch2_yyleng;
+   return DECIMAL;
+}
+
+{CYRALPHA}+  /* one word in composite-word */   { 
+   token = tsearch2_yytext; 
+   tokenlen = tsearch2_yyleng;
+   return CYRPARTHYPHENWORD; 
+}
+
+[[:alpha:]]+  /* one word in composite-word */  { 
+   token = tsearch2_yytext; 
+   tokenlen = tsearch2_yyleng;
+   return LATPARTHYPHENWORD; 
+}
+
+{ALNUM}+  /* one word in composite-word */  { 
+   token = tsearch2_yytext; 
+   tokenlen = tsearch2_yyleng;
+   return PARTHYPHENWORD; 
+}
+
+-  { 
+   token = tsearch2_yytext;
+   tokenlen = tsearch2_yyleng;
+   return SPACE;
+}
+
+.|\n /* return in basic state */ {
+   BEGIN INITIAL;
+   yyless( 0 );
+}
+
+{CYRALPHA}+ /* normal word */  { 
+   token = tsearch2_yytext; 
+   tokenlen = tsearch2_yyleng;
+   return CYRWORD; 
+}
+
+[[:alpha:]]+ /* normal word */ { 
+   token = tsearch2_yytext; 
+   tokenlen = tsearch2_yyleng;
+   return LATWORD; 
+}
+
+{ALNUM}+ /* normal word */ { 
+   token = tsearch2_yytext; 
+   tokenlen = tsearch2_yyleng;
+   return UWORD; 
+}
+
+[ \r\n\t]+ {
+   token = tsearch2_yytext;
+   tokenlen = tsearch2_yyleng;
+   return SPACE;
+}
+
+. {
+   token = tsearch2_yytext;
+   tokenlen = tsearch2_yyleng;
+   return SPACE;
+} 
+
+%%
+
+/* clearing after parsing from string */
+void end_parse() {
+   if (s) { free(s); s=NULL; } 
+   tsearch2_yy_delete_buffer( buf );
+   buf = NULL;
+} 
+
+/* start parse from string */
+void start_parse_str(char* str, int limit) {
+   if (buf) end_parse();
+   buf = tsearch2_yy_scan_bytes( str, limit );
+   tsearch2_yy_switch_to_buffer( buf );
+   BEGIN INITIAL;
+}
+
+/* start parse from filehandle */
+void start_parse_fh( FILE* fh, int limit ) {
+   if (buf) end_parse();
+   lrlimit = ( limit ) ? limit : -1;
+   buf = tsearch2_yy_create_buffer( fh, YY_BUF_SIZE );
+   tsearch2_yy_switch_to_buffer( buf );
+   BEGIN INITIAL;
+}
+
+


diff --git a/contrib/tsearch2/wparser.c b/contrib/tsearch2/wparser.c

new file mode 100644 (file)

index 0000000..deff94c


--- /dev/null
+++ b/contrib/tsearch2/wparser.c
@@ -0,0 +1,529 @@
+/* 
+ * interface functions to parser 
+ * Teodor Sigaev 
+ */
+#include 
+#include 
+#include 
+#include 
+
+#include "postgres.h"
+#include "fmgr.h"
+#include "utils/array.h"
+#include "catalog/pg_type.h"
+#include "executor/spi.h"
+#include "funcapi.h"
+
+#include "wparser.h"
+#include "ts_cfg.h"
+#include "snmap.h"
+#include "common.h"
+
+/*********top interface**********/
+
+static void *plan_getparser=NULL;
+static Oid current_parser_id=InvalidOid;
+
+void
+init_prs(Oid id, WParserInfo *prs) {
+   Oid arg[1]={ OIDOID };
+   bool isnull;
+   Datum pars[1]={ ObjectIdGetDatum(id) };
+   int stat;
+
+   memset(prs,0,sizeof(WParserInfo));
+   SPI_connect();
+   if ( !plan_getparser ) {
+       plan_getparser = SPI_saveplan( SPI_prepare( "select prs_start, prs_nexttoken, prs_end, prs_lextype, prs_headline from pg_ts_parser where oid = $1" , 1, arg ) );
+       if ( !plan_getparser ) 
+           ts_error(ERROR, "SPI_prepare() failed");
+   }
+
+   stat = SPI_execp(plan_getparser, pars, " ", 1);
+   if ( stat < 0 )
+       ts_error (ERROR, "SPI_execp return %d", stat);
+   if ( SPI_processed > 0 ) {
+       Oid oid=InvalidOid;
+       oid=DatumGetObjectId( SPI_getbinval(SPI_tuptable->vals[0], SPI_tuptable->tupdesc, 1, &isnull) );
+       fmgr_info_cxt(oid, &(prs->start_info), TopMemoryContext);
+       oid=DatumGetObjectId( SPI_getbinval(SPI_tuptable->vals[0], SPI_tuptable->tupdesc, 2, &isnull) );
+       fmgr_info_cxt(oid, &(prs->getlexeme_info), TopMemoryContext);
+       oid=DatumGetObjectId( SPI_getbinval(SPI_tuptable->vals[0], SPI_tuptable->tupdesc, 3, &isnull) );
+       fmgr_info_cxt(oid, &(prs->end_info), TopMemoryContext);
+       prs->lextype=DatumGetObjectId( SPI_getbinval(SPI_tuptable->vals[0], SPI_tuptable->tupdesc, 4, &isnull) );
+       oid=DatumGetObjectId( SPI_getbinval(SPI_tuptable->vals[0], SPI_tuptable->tupdesc, 5, &isnull) );
+       fmgr_info_cxt(oid, &(prs->headline_info), TopMemoryContext);
+       prs->prs_id=id;
+   } else 
+       ts_error(ERROR, "No parser with id %d", id);
+   SPI_finish();
+}
+
+typedef struct {
+   WParserInfo *last_prs;
+   int     len;
+   int     reallen;
+   WParserInfo *list;
+   SNMap       name2id_map;
+} PrsList;
+
+static PrsList PList = {NULL,0,0,NULL,{0,0,NULL}};
+
+void    
+reset_prs(void) {
+   freeSNMap( &(PList.name2id_map) );
+   if ( PList.list )
+       free(PList.list);
+   memset(&PList,0,sizeof(PrsList));
+}
+
+static int
+compareprs(const void *a, const void *b) {
+   return ((WParserInfo*)a)->prs_id - ((WParserInfo*)b)->prs_id;
+}
+
+WParserInfo *
+findprs(Oid id) {
+   /* last used prs */
+   if ( PList.last_prs && PList.last_prs->prs_id==id )
+       return PList.last_prs;
+
+   /* already used prs */
+   if ( PList.len != 0 ) {
+       WParserInfo key;
+       key.prs_id=id;
+       PList.last_prs = bsearch(&key, PList.list, PList.len, sizeof(WParserInfo), compareprs);
+       if ( PList.last_prs != NULL )
+           return PList.last_prs;
+   }
+
+   /* last chance */
+   if ( PList.len==PList.reallen ) {
+       WParserInfo *tmp;
+       int reallen = ( PList.reallen ) ? 2*PList.reallen : 16;
+       tmp=(WParserInfo*)realloc(PList.list,sizeof(WParserInfo)*reallen);
+       if ( !tmp ) 
+           ts_error(ERROR,"No memory");
+       PList.reallen=reallen;
+       PList.list=tmp;
+   }
+   PList.last_prs=&(PList.list[PList.len]);
+   init_prs(id, PList.last_prs);
+   PList.len++;
+   qsort(PList.list, PList.len, sizeof(WParserInfo), compareprs);
+   return findprs(id); /* qsort changed order!! */;
+}
+
+static void *plan_name2id=NULL;
+
+Oid
+name2id_prs(text *name) {
+   Oid arg[1]={ TEXTOID };
+   bool isnull;
+   Datum pars[1]={ PointerGetDatum(name) };
+   int stat;
+   Oid id=findSNMap_t( &(PList.name2id_map), name );
+   
+   if ( id ) 
+       return id;
+   
+
+   SPI_connect();
+   if ( !plan_name2id ) {
+       plan_name2id = SPI_saveplan( SPI_prepare( "select oid from pg_ts_parser where prs_name = $1" , 1, arg ) );
+       if ( !plan_name2id ) 
+           ts_error(ERROR, "SPI_prepare() failed");
+   }
+
+   stat = SPI_execp(plan_name2id, pars, " ", 1);
+   if ( stat < 0 )
+       ts_error (ERROR, "SPI_execp return %d", stat);
+   if ( SPI_processed > 0 )
+       id=DatumGetObjectId( SPI_getbinval(SPI_tuptable->vals[0], SPI_tuptable->tupdesc, 1, &isnull) );
+   else 
+       ts_error(ERROR, "No parser '%s'", text2char(name));
+   SPI_finish();
+   addSNMap_t( &(PList.name2id_map), name, id );
+   return id;
+}
+
+
+/******sql-level interface******/
+typedef struct {
+   int     cur;
+   LexDescr    *list;
+} TypeStorage;
+
+static void
+setup_firstcall(FuncCallContext  *funcctx, Oid prsid) {
+   TupleDesc            tupdesc;
+   MemoryContext     oldcontext;
+   TypeStorage     *st;
+   WParserInfo *prs = findprs(prsid); 
+
+   oldcontext = MemoryContextSwitchTo(funcctx->multi_call_memory_ctx);
+
+   st=(TypeStorage*)palloc( sizeof(TypeStorage) );
+   st->cur=0;
+   st->list = (LexDescr*)DatumGetPointer(
+       OidFunctionCall1( prs->lextype, PointerGetDatum(prs->prs) )
+   );
+   funcctx->user_fctx = (void*)st;
+   tupdesc = RelationNameGetTupleDesc("tokentype");
+   funcctx->slot = TupleDescGetSlot(tupdesc);
+   funcctx->attinmeta = TupleDescGetAttInMetadata(tupdesc);
+   MemoryContextSwitchTo(oldcontext);
+}
+
+static Datum
+process_call(FuncCallContext  *funcctx) {
+   TypeStorage     *st;
+
+   st=(TypeStorage*)funcctx->user_fctx;
+   if (  st->list && st->list[st->cur].lexid ) {
+       Datum result;
+       char* values[3];
+       char    txtid[16];
+       HeapTuple    tuple;
+
+       values[0]=txtid;
+       sprintf(txtid,"%d",st->list[st->cur].lexid);
+       values[1]=st->list[st->cur].alias;
+       values[2]=st->list[st->cur].descr;
+
+       tuple = BuildTupleFromCStrings(funcctx->attinmeta, values);
+       result = TupleGetDatum(funcctx->slot, tuple);
+
+       pfree(values[1]);
+       pfree(values[2]);
+       st->cur++;
+       return result;
+   } else {
+       if ( st->list ) pfree(st->list);
+       pfree(st);
+   }
+   return (Datum)0;
+}
+
+PG_FUNCTION_INFO_V1(token_type);
+Datum token_type(PG_FUNCTION_ARGS);
+
+Datum
+token_type(PG_FUNCTION_ARGS) {
+   FuncCallContext  *funcctx;
+   Datum result;
+
+   if (SRF_IS_FIRSTCALL()) { 
+       funcctx = SRF_FIRSTCALL_INIT();
+       setup_firstcall(funcctx, PG_GETARG_OID(0) );
+   }
+
+   funcctx = SRF_PERCALL_SETUP();
+
+   if (  (result=process_call(funcctx)) != (Datum)0 )
+       SRF_RETURN_NEXT(funcctx, result);
+   SRF_RETURN_DONE(funcctx);
+}
+
+PG_FUNCTION_INFO_V1(token_type_byname);
+Datum token_type_byname(PG_FUNCTION_ARGS);
+Datum
+token_type_byname(PG_FUNCTION_ARGS) {
+   FuncCallContext  *funcctx;
+   Datum result;
+
+   if (SRF_IS_FIRSTCALL()) {
+       text *name = PG_GETARG_TEXT_P(0); 
+       funcctx = SRF_FIRSTCALL_INIT();
+       setup_firstcall(funcctx, name2id_prs( name ) );
+       PG_FREE_IF_COPY(name,0);
+   }
+
+   funcctx = SRF_PERCALL_SETUP();
+
+   if (  (result=process_call(funcctx)) != (Datum)0 )
+       SRF_RETURN_NEXT(funcctx, result);
+   SRF_RETURN_DONE(funcctx);
+}
+
+PG_FUNCTION_INFO_V1(token_type_current);
+Datum token_type_current(PG_FUNCTION_ARGS);
+Datum
+token_type_current(PG_FUNCTION_ARGS) {
+   FuncCallContext  *funcctx;
+   Datum result;
+
+   if (SRF_IS_FIRSTCALL()) {
+       funcctx = SRF_FIRSTCALL_INIT();
+       if ( current_parser_id==InvalidOid ) 
+           current_parser_id = name2id_prs( char2text("default") );
+       setup_firstcall(funcctx, current_parser_id );
+   }
+
+   funcctx = SRF_PERCALL_SETUP();
+
+   if (  (result=process_call(funcctx)) != (Datum)0 )
+       SRF_RETURN_NEXT(funcctx, result);
+   SRF_RETURN_DONE(funcctx);
+}
+
+
+PG_FUNCTION_INFO_V1(set_curprs);
+Datum set_curprs(PG_FUNCTION_ARGS);
+Datum
+set_curprs(PG_FUNCTION_ARGS) {
+        findprs(PG_GETARG_OID(0));
+        current_parser_id=PG_GETARG_OID(0);
+        PG_RETURN_VOID();
+}
+
+PG_FUNCTION_INFO_V1(set_curprs_byname);
+Datum set_curprs_byname(PG_FUNCTION_ARGS);
+Datum
+set_curprs_byname(PG_FUNCTION_ARGS) {
+        text *name=PG_GETARG_TEXT_P(0);
+    
+        DirectFunctionCall1(
+                set_curprs,
+                ObjectIdGetDatum( name2id_prs(name) )
+        );
+        PG_FREE_IF_COPY(name, 0);
+        PG_RETURN_VOID();
+}
+
+typedef struct {
+   int type;
+   char    *lexem;
+} LexemEntry;
+
+typedef struct {
+   int cur;
+   int len;
+   LexemEntry  *list;
+} PrsStorage;
+   
+
+static void
+prs_setup_firstcall(FuncCallContext  *funcctx, int prsid, text *txt) {
+   TupleDesc            tupdesc;
+   MemoryContext     oldcontext;
+   PrsStorage  *st;
+   WParserInfo *prs = findprs(prsid); 
+   char    *lex=NULL;
+   int     llen=0, type=0; 
+
+   oldcontext = MemoryContextSwitchTo(funcctx->multi_call_memory_ctx);
+
+   st=(PrsStorage*)palloc( sizeof(PrsStorage) );
+   st->cur=0;
+   st->len=16;
+   st->list=(LexemEntry*)palloc( sizeof(LexemEntry)*st->len );
+
+   prs->prs = (void*)DatumGetPointer(
+       FunctionCall2(
+           &(prs->start_info),
+           PointerGetDatum(VARDATA(txt)),
+           Int32GetDatum(VARSIZE(txt)-VARHDRSZ)
+       )
+   );
+
+   while( ( type=DatumGetInt32(FunctionCall3(
+           &(prs->getlexeme_info),
+           PointerGetDatum(prs->prs),
+           PointerGetDatum(&lex),
+           PointerGetDatum(&llen))) ) != 0 ) {
+
+       if ( st->cur>=st->len ) {
+           st->len=2*st->len;
+           st->list=(LexemEntry*)repalloc(st->list, sizeof(LexemEntry)*st->len);
+       }
+       st->list[st->cur].lexem = palloc(llen+1);
+       memcpy( st->list[st->cur].lexem, lex, llen);
+       st->list[st->cur].lexem[llen]='\0';
+       st->list[st->cur].type=type;
+       st->cur++;
+   }
+       
+   FunctionCall1(
+       &(prs->end_info),
+       PointerGetDatum(prs->prs)
+   );
+
+   st->len=st->cur;
+   st->cur=0;
+   
+   funcctx->user_fctx = (void*)st;
+   tupdesc = RelationNameGetTupleDesc("tokenout");
+   funcctx->slot = TupleDescGetSlot(tupdesc);
+   funcctx->attinmeta = TupleDescGetAttInMetadata(tupdesc);
+   MemoryContextSwitchTo(oldcontext);
+}
+
+static Datum
+prs_process_call(FuncCallContext  *funcctx) {
+   PrsStorage  *st;
+
+   st=(PrsStorage*)funcctx->user_fctx;
+   if (  st->cur < st->len ) {
+       Datum result;
+       char* values[2];
+       char    tid[16];
+       HeapTuple    tuple;
+
+       values[0]=tid;
+       sprintf(tid,"%d",st->list[st->cur].type);
+       values[1]=st->list[st->cur].lexem;
+       tuple = BuildTupleFromCStrings(funcctx->attinmeta, values);
+       result = TupleGetDatum(funcctx->slot, tuple);
+
+       pfree(values[1]);
+       st->cur++;
+       return result;
+   } else {
+       if ( st->list ) pfree(st->list);
+       pfree(st);
+   }
+   return (Datum)0;
+}
+
+           
+
+PG_FUNCTION_INFO_V1(parse);
+Datum parse(PG_FUNCTION_ARGS);
+Datum
+parse(PG_FUNCTION_ARGS) {
+   FuncCallContext  *funcctx;
+   Datum result;
+
+   if (SRF_IS_FIRSTCALL()) {
+       text *txt = PG_GETARG_TEXT_P(1); 
+       funcctx = SRF_FIRSTCALL_INIT();
+       prs_setup_firstcall(funcctx, PG_GETARG_OID(0),txt );
+       PG_FREE_IF_COPY(txt,1);
+   }
+
+   funcctx = SRF_PERCALL_SETUP();
+
+   if (  (result=prs_process_call(funcctx)) != (Datum)0 )
+       SRF_RETURN_NEXT(funcctx, result);
+   SRF_RETURN_DONE(funcctx);
+}
+
+PG_FUNCTION_INFO_V1(parse_byname);
+Datum parse_byname(PG_FUNCTION_ARGS);
+Datum
+parse_byname(PG_FUNCTION_ARGS) {
+   FuncCallContext  *funcctx;
+   Datum result;
+
+   if (SRF_IS_FIRSTCALL()) {
+       text *name = PG_GETARG_TEXT_P(0); 
+       text *txt = PG_GETARG_TEXT_P(1); 
+       funcctx = SRF_FIRSTCALL_INIT();
+       prs_setup_firstcall(funcctx, name2id_prs( name ),txt );
+       PG_FREE_IF_COPY(name,0);
+       PG_FREE_IF_COPY(txt,1);
+   }
+
+   funcctx = SRF_PERCALL_SETUP();
+
+   if (  (result=prs_process_call(funcctx)) != (Datum)0 )
+       SRF_RETURN_NEXT(funcctx, result);
+   SRF_RETURN_DONE(funcctx);
+}
+
+
+PG_FUNCTION_INFO_V1(parse_current);
+Datum parse_current(PG_FUNCTION_ARGS);
+Datum
+parse_current(PG_FUNCTION_ARGS) {
+   FuncCallContext  *funcctx;
+   Datum result;
+
+   if (SRF_IS_FIRSTCALL()) {
+       text *txt = PG_GETARG_TEXT_P(0); 
+       funcctx = SRF_FIRSTCALL_INIT();
+       if ( current_parser_id==InvalidOid ) 
+           current_parser_id = name2id_prs( char2text("default") );
+       prs_setup_firstcall(funcctx, current_parser_id,txt );
+       PG_FREE_IF_COPY(txt,0);
+   }
+
+   funcctx = SRF_PERCALL_SETUP();
+
+   if (  (result=prs_process_call(funcctx)) != (Datum)0 )
+       SRF_RETURN_NEXT(funcctx, result);
+   SRF_RETURN_DONE(funcctx);
+}
+
+PG_FUNCTION_INFO_V1(headline);
+Datum headline(PG_FUNCTION_ARGS);
+Datum
+headline(PG_FUNCTION_ARGS) {
+   TSCfgInfo *cfg=findcfg(PG_GETARG_OID(0));
+   text       *in = PG_GETARG_TEXT_P(1);
+   QUERYTYPE  *query = (QUERYTYPE *) DatumGetPointer(PG_DETOAST_DATUM(PG_GETARG_DATUM(2)));
+   text       *opt=( PG_NARGS()>3 && PG_GETARG_POINTER(3) ) ? PG_GETARG_TEXT_P(3) : NULL;
+   HLPRSTEXT   prs;
+   text *out;
+   WParserInfo *prsobj = findprs(cfg->prs_id);
+
+   memset(&prs,0,sizeof(HLPRSTEXT));
+   prs.lenwords = 32;
+   prs.words = (HLWORD *) palloc(sizeof(HLWORD) * prs.lenwords);
+   hlparsetext(cfg, &prs, query, VARDATA(in), VARSIZE(in) - VARHDRSZ);
+
+
+   FunctionCall3(
+       &(prsobj->headline_info),
+       PointerGetDatum(&prs),
+       PointerGetDatum(opt),
+       PointerGetDatum(query)
+   );
+
+   out = genhl(&prs);
+
+   PG_FREE_IF_COPY(in,1);
+   PG_FREE_IF_COPY(query,2);
+   if ( opt ) PG_FREE_IF_COPY(opt,3);
+   pfree(prs.words);
+   pfree(prs.startsel);
+   pfree(prs.stopsel);
+
+   PG_RETURN_POINTER(out);
+}
+
+
+PG_FUNCTION_INFO_V1(headline_byname);
+Datum headline_byname(PG_FUNCTION_ARGS);
+Datum
+headline_byname(PG_FUNCTION_ARGS) {
+   text *cfg=PG_GETARG_TEXT_P(0);
+
+   Datum out=DirectFunctionCall4(
+       headline,
+       ObjectIdGetDatum(name2id_cfg( cfg ) ),
+       PG_GETARG_DATUM(1),
+       PG_GETARG_DATUM(2),
+       ( PG_NARGS()>3 ) ? PG_GETARG_DATUM(3) : PointerGetDatum(NULL)
+   );
+
+   PG_FREE_IF_COPY(cfg,0);
+   PG_RETURN_DATUM(out);   
+}
+
+PG_FUNCTION_INFO_V1(headline_current);
+Datum headline_current(PG_FUNCTION_ARGS);
+Datum
+headline_current(PG_FUNCTION_ARGS) {
+   PG_RETURN_DATUM(DirectFunctionCall4(
+       headline,
+       ObjectIdGetDatum(get_currcfg()),
+       PG_GETARG_DATUM(0),
+       PG_GETARG_DATUM(1),
+       ( PG_NARGS()>2 ) ? PG_GETARG_DATUM(2) : PointerGetDatum(NULL)
+   ));
+}
+
+
+


diff --git a/contrib/tsearch2/wparser.h b/contrib/tsearch2/wparser.h

new file mode 100644 (file)

index 0000000..a8afc56


--- /dev/null
+++ b/contrib/tsearch2/wparser.h
@@ -0,0 +1,28 @@
+#ifndef __WPARSER_H__
+#define __WPARSER_H__
+#include "postgres.h"
+#include "fmgr.h"
+
+typedef struct {
+   Oid prs_id;
+   FmgrInfo start_info;
+   FmgrInfo getlexeme_info;
+   FmgrInfo end_info;
+   FmgrInfo headline_info;
+   Oid lextype;
+   void *prs;
+} WParserInfo;
+
+void init_prs(Oid id, WParserInfo *prs);
+WParserInfo* findprs(Oid id);
+Oid name2id_prs(text *name);
+void   reset_prs(void);
+
+
+typedef struct {
+   int lexid;
+   char    *alias;
+   char    *descr;
+} LexDescr;
+
+#endif


diff --git a/contrib/tsearch2/wparser_def.c b/contrib/tsearch2/wparser_def.c

new file mode 100644 (file)

index 0000000..eec8b03


--- /dev/null
+++ b/contrib/tsearch2/wparser_def.c
@@ -0,0 +1,291 @@
+/* 
+ * default word parser 
+ * Teodor Sigaev 
+ */
+#include 
+#include 
+#include 
+
+#include "postgres.h"
+#include "utils/builtins.h"
+
+#include "dict.h"
+#include "wparser.h"
+#include "common.h"
+#include "ts_cfg.h"
+#include "wordparser/parser.h"
+#include "wordparser/deflex.h"
+
+PG_FUNCTION_INFO_V1(prsd_lextype);
+Datum prsd_lextype(PG_FUNCTION_ARGS);
+
+Datum 
+prsd_lextype(PG_FUNCTION_ARGS) {
+   LexDescr *descr=(LexDescr*)palloc(sizeof(LexDescr)*(LASTNUM+1));
+   int i;
+
+   for(i=1;i<=LASTNUM;i++) {
+       descr[i-1].lexid = i;
+       descr[i-1].alias = pstrdup(tok_alias[i]);
+       descr[i-1].descr = pstrdup(lex_descr[i]);
+   }
+   
+   descr[LASTNUM].lexid=0;
+       
+   PG_RETURN_POINTER(descr);
+}
+
+PG_FUNCTION_INFO_V1(prsd_start);
+Datum prsd_start(PG_FUNCTION_ARGS);
+Datum 
+prsd_start(PG_FUNCTION_ARGS) {
+   start_parse_str( (char*)PG_GETARG_POINTER(0), PG_GETARG_INT32(1) );
+   PG_RETURN_POINTER(NULL);
+}
+
+PG_FUNCTION_INFO_V1(prsd_getlexeme);
+Datum prsd_getlexeme(PG_FUNCTION_ARGS);
+Datum 
+prsd_getlexeme(PG_FUNCTION_ARGS) {
+   /* ParserState *p=(ParserState*)PG_GETARG_POINTER(0); */
+   char **t=(char**)PG_GETARG_POINTER(1); 
+   int *tlen=(int*)PG_GETARG_POINTER(2);
+   int  type=tsearch2_yylex();
+
+   *t = token;
+   *tlen = tokenlen;
+   PG_RETURN_INT32(type);
+}
+
+PG_FUNCTION_INFO_V1(prsd_end);
+Datum prsd_end(PG_FUNCTION_ARGS);
+Datum 
+prsd_end(PG_FUNCTION_ARGS) {
+   /* ParserState *p=(ParserState*)PG_GETARG_POINTER(0); */
+   end_parse();
+   PG_RETURN_VOID();
+}
+
+#define LEAVETOKEN(x)  ( (x)==12 )
+#define COMPLEXTOKEN(x)    ( (x)==5 || (x)==15 || (x)==16 || (x)==17 )
+#define ENDPUNCTOKEN(x)    ( (x)==12 )
+
+
+#define IDIGNORE(x) ( (x)==13 || (x)==14 || (x)==12 || (x)==23 )
+#define HLIDIGNORE(x) ( (x)==5 || (x)==13 || (x)==15 || (x)==16 || (x)==17 )
+#define NONWORDTOKEN(x)    ( (x)==12 || HLIDIGNORE(x) )
+#define NOENDTOKEN(x)  ( NONWORDTOKEN(x) || (x)==7 || (x)==8 || (x)==20 || (x)==21 || (x)==22 || IDIGNORE(x) )
+
+typedef struct {
+   HLWORD  *words;
+   int len;
+} hlCheck;
+
+static bool
+checkcondition_HL(void *checkval, ITEM *val) {
+   int i;
+   for(i=0;i<((hlCheck*)checkval)->len;i++) {
+       if ( ((hlCheck*)checkval)->words[i].item==val )
+           return true;
+   }
+   return false;
+}
+
+
+static bool
+hlCover(HLPRSTEXT *prs, QUERYTYPE *query, int *p, int *q) {
+   int i,j;
+   ITEM    *item=GETQUERY(query);
+   int pos=*p;
+   *q=0;
+   *p=0x7fffffff;
+
+   for(j=0;jsize;j++) {
+       if ( item->type != VAL ) {
+           item++;
+           continue;
+       }
+       for(i=pos;icurwords;i++) {
+           if ( prs->words[i].item == item ) {
+               if ( i>*q) 
+                   *q = i;
+               break;
+           }
+       }
+       item++;
+   }
+
+   if ( *q==0 )
+       return false;
+
+   item=GETQUERY(query);
+   for(j=0;jsize;j++) {
+       if ( item->type != VAL ) {
+           item++;
+           continue;
+       }
+       for(i=*q;i>=pos;i--) {
+           if ( prs->words[i].item == item ) {
+               if ( i<*p )
+                   *p=i;
+               break;
+           }
+       }
+       item++;
+   }   
+
+   if ( *p<=*q ) {
+       hlCheck ch={ &(prs->words[*p]), *q-*p+1 };
+       if ( TS_execute(GETQUERY(query), &ch, false, checkcondition_HL) ) { 
+           return true;
+       } else {
+           (*p)++;
+           return hlCover(prs,query,p,q);
+       }
+   }
+
+   return false;
+}
+
+PG_FUNCTION_INFO_V1(prsd_headline);
+Datum prsd_headline(PG_FUNCTION_ARGS);
+Datum 
+prsd_headline(PG_FUNCTION_ARGS) {
+   HLPRSTEXT   *prs=(HLPRSTEXT*)PG_GETARG_POINTER(0);
+   text    *opt=(text*)PG_GETARG_POINTER(1); /* can't be toasted */
+   QUERYTYPE   *query=(QUERYTYPE*)PG_GETARG_POINTER(2); /* can't be toasted */
+   /* from opt + start and and tag */
+   int min_words=15;   
+   int max_words=35;   
+   int shortword=3;    
+
+   int p=0,q=0;
+   int bestb=-1,beste=-1;
+   int bestlen=-1;
+   int pose=0, poslen, curlen;
+
+   int i;
+
+   /*config*/
+   prs->startsel=NULL;
+   prs->stopsel=NULL;
+   if ( opt ) {
+       Map *map,*mptr;
+       
+       parse_cfgdict(opt,&map);
+       mptr=map;
+
+       while(mptr && mptr->key) {
+           if ( strcasecmp(mptr->key,"MaxWords")==0 )
+               max_words=pg_atoi(mptr->value,4,1);
+           else if ( strcasecmp(mptr->key,"MinWords")==0 )
+               min_words=pg_atoi(mptr->value,4,1);
+           else if ( strcasecmp(mptr->key,"ShortWord")==0 )
+               shortword=pg_atoi(mptr->value,4,1);
+           else if ( strcasecmp(mptr->key,"StartSel")==0 )
+               prs->startsel=pstrdup(mptr->value);
+           else if ( strcasecmp(mptr->key,"StopSel")==0 )
+               prs->stopsel=pstrdup(mptr->value);
+               
+           pfree(mptr->key);
+           pfree(mptr->value);
+
+           mptr++;
+       }
+       pfree(map);
+
+       if ( min_words >= max_words )
+           elog(ERROR,"Must be MinWords < MaxWords");
+       if ( min_words<=0 )
+           elog(ERROR,"Must be MinWords > 0");
+       if ( shortword<0 )
+           elog(ERROR,"Must be ShortWord >= 0");
+   }
+
+   while( hlCover(prs,query,&p,&q) ) {
+       /* find cover len in words */
+       curlen=0;
+       poslen=0;
+       for(i=p;i<=q && curlen < max_words ; i++) {
+           if ( !NONWORDTOKEN(prs->words[i].type) ) 
+               curlen++;
+           if ( prs->words[i].item && !prs->words[i].repeated )
+               poslen++; 
+           pose=i;
+       }
+
+       if ( poslenwords[beste].type) || prs->words[beste].len <= shortword) ) { 
+           /* best already finded, so try one more cover */
+           p++;
+           continue;
+       }
+
+       if ( curlen < max_words ) { /* find good end */
+           for(i=i-1 ;icurwords && curlen
+               if ( i!=q ) {
+                   if ( !NONWORDTOKEN(prs->words[i].type) ) 
+                       curlen++;
+                   if ( prs->words[i].item && !prs->words[i].repeated )
+                       poslen++;
+               }
+               pose=i;
+               if ( NOENDTOKEN(prs->words[i].type) || prs->words[i].len <= shortword ) 
+                   continue;
+               if ( curlen>=min_words )    
+                   break;
+           }
+       } else { /* shorter cover :((( */
+           for(;curlen>min_words;i--) {
+               if ( !NONWORDTOKEN(prs->words[i].type) ) 
+                   curlen--;
+               if ( prs->words[i].item && !prs->words[i].repeated )
+                   poslen--;
+               pose=i;
+               if ( NOENDTOKEN(prs->words[i].type) || prs->words[i].len <= shortword ) 
+                   continue;
+               break;
+           }
+       }
+
+       if ( bestlen <0 || (poslen>bestlen && !(NOENDTOKEN(prs->words[pose].type) || prs->words[pose].len <= shortword)) || 
+               ( bestlen>=0 && !(NOENDTOKEN(prs->words[pose].type)  || prs->words[pose].len <= shortword) && 
+                   (NOENDTOKEN(prs->words[beste].type) || prs->words[beste].len <= shortword) ) ) {
+           bestb=p; beste=pose;
+           bestlen=poslen;
+       } 
+
+       p++;
+   }
+
+   if ( bestlen<0 ) {
+       curlen=0;
+       poslen=0;
+       for(i=0;icurwords && curlen
+           if ( !NONWORDTOKEN(prs->words[i].type) ) 
+               curlen++;
+           pose=i;
+       }
+       bestb=0; beste=pose;
+   }
+
+   for(i=bestb;i<=beste;i++) {
+       if ( prs->words[i].item )
+           prs->words[i].selected=1;
+       if ( prs->words[i].repeated )
+           prs->words[i].skip=1;
+       if ( HLIDIGNORE(prs->words[i].type) )
+           prs->words[i].replace=1;
+
+       prs->words[i].in=1;
+   }
+
+   if (!prs->startsel)
+       prs->startsel=pstrdup("");

+   if (!prs->stopsel)
+       prs->stopsel=pstrdup("");
+        prs->startsellen=strlen(prs->startsel);
+   prs->stopsellen=strlen(prs->stopsel);
+
+   PG_RETURN_POINTER(prs);
+}
+




This is the main PostgreSQL git repository.
RSS
Atom}}}}}}}
+    "http://www.sai.msu.su/~megera/postgres/gist/tsearch/V2/">[right
+    here].
+
+    Further in depth documentation such as a full function
+    reference, and user guide can be found online at the 
+    "http://www.sai.msu.su/~megera/postgres/gist/tsearch/V2/docs/">[tsearch
+    documentation home].
+
+    ACKNOWLEDGEMENTS
+
+    Robert John Shepherd originally wrote this documentation for
+    the previous version of tsearch module (v1) included with the
+    postgres release. I took his documentation and updated it to
+    comply with the tsearch2 modifications.
+
+    Robert's original acknowledgements:
+
+    "Thanks to Oleg Bartunov for taking the time to answer many
+    of my questions regarding this module, and also to Teodor
+    Sigaev for clearing up the process of making your own
+    dictionaries. Plus of course a big thanks to the pair of them
+    for writing this module in the first place!"
+
+    I would also like to extend my thanks to the developers, and
+    Oleg Bartunov for all of his direction and help with the new
+    features of tsearch2.
+
+    OVERVIEW
+
+    MS-SQL provides a full text indexing (FTI) system which
+    enables the fast searching of text based fields, very useful
+    for websites (and other applications) that require a results
+    set based on key words. PostgreSQL ships with a contributed
+    module called tsearch2, which implements a special type of
+    index that can also be used for full text indexing. Further
+    more, unlike MS' offering which requires regular incremental
+    rebuilds of the text indexes themselves, tsearch2 indexes are
+    always up-to-date and keeping them so induces very little
+    overhead.
+
+    Before we get into the details, it is recommended that you
+    have installed and tested PostgreSQL, are reasonably familiar
+    with databases, the SQL query language and also understand the
+    basics of connecting to PostgreSQL from the local shell. This
+    document isn't intended for the complete PostgreSQL newbie, but
+    anyone with a reasonable grasp of the basics should be able to
+    follow it.
+
+    INSTALLATION
+
+    Starting with PostgreSQL version 7.4 tsearch2 is now
+    included in the contrib directory with the PostgreSQL sources.
+    contrib/tsearch2 is where you will find everything needed to
+    install and use tsearch2. Please note that tsearch2 will also
+    work with PostgreSQL version 7.3.x, but it is not the module
+    included with the source distribution. You will have to
+    download the module separately and install it in the same
+    fashion.
+
+    I installed the tsearch2 module to a PostgreSQL 7.3 database
+    from the contrib directory without squashing the original (old)
+    tsearch module. What I did was move the modules tsearch src
+    driectory into the contrib tree under the name tsearchV2.
+
+    Step one is to download the tsearch V2 module :
+
+    
+    "http://www.sai.msu.su/~megera/postgres/gist/tsearch/V2/">[http://www.sai.msu.su/~megera/postgres/gist/tsearch/V2/]
+    (check Development History for latest stable version !)
+    
+        tar -zxvf tsearch-v2.tar.gz
+        mv tsearch2 PGSQL_SRC/contrib/
+        cd PGSQL_SRC/contrib/tsearch2
+
+
+    If you are installing from PostgreSQL version 7.4 or higher,
+    you can skip those steps and just change to the
+    contrib/tsearch2 directory in the source tree and continue from
+    there.
+
+    Then continue with the regular building and installation
+    process
+    
+        gmake
+        gmake install
+        gmake installcheck
+
+
+    That is pretty much all you have to do, unless of course you
+    get errors. However if you get those, you better go check with
+    the mailing lists over at 
+    "http://www.postgresql.org">http://www.postgresql.org or
+    
+    "http://openfts.sourceforge.net/">http://openfts.sourceforge.net/
+    since its never failed for me.
+
+    The directory in the contib/ and the directory from the
+    archive is called tsearch2. Tsearch2 is completely incompatible
+    with the previous version of tsearch. This means that both
+    versions can be installed into a single database, and migration
+    the new version may be much easier.
+
+    NOTE: the previous version of tsearch found in the
+    contrib/tsearch directory is depricated. ALthough it is still
+    available and included within PostgreSQL version 7.4. It will
+    be removed in version 7.5.
+
+    ADDING TSEARCH2 FUNCTIONALITY TO A DATABASE
+
+    We should create a database to use as an example for the
+    remainder of this file. We can call the database "ftstest". You
+    can create it from the command line like this:
+    
+        #createdb ftstest
+
+
+    If you thought installation was easy, this next bit is even
+    easier. Change to the PGSQL_SRC/contrib/tsearch2 directory and
+    type:
+    
+        psql ftstest < tsearch2.sql
+
+
+    The file "tsearch2.sql" holds all the wonderful little
+    goodies you need to do full text indexing. It defines numerous
+    functions and operators, and creates the needed tables in the
+    database. There will be 4 new tables created after running the
+    tsearch2.sql file : pg_ts_dict, pg_ts_parser, pg_ts_cfg,
+    pg_ts_cfgmap are added.
+
+    You can check out the tables if you like:
+    
+        #psql ftstest
+        ftstest=# \d
+                    List of relations
+         Schema |     Name     | Type  |  Owner
+        --------+--------------+-------+----------
+         public | pg_ts_cfg    | table | kopciuch
+         public | pg_ts_cfgmap | table | kopciuch
+         public | pg_ts_dict   | table | kopciuch
+         public | pg_ts_parser | table | kopciuch
+        (4 rows)
+
+
+    TYPES AND FUNCTIONS PROVIDED BY TSEARCH2
+
+    The first thing we can do is try out some of the types that
+    are provided for us. Lets look at the tsvector type provided
+    for us:
+    
+        SELECT 'Our first string used today'::tsvector;
+                        tsvector
+        ---------------------------------------
+         'Our' 'used' 'first' 'today' 'string'
+        (1 row)
+
+
+    The results are the words used within our string. Notice
+    they are not in any particular order. The tsvector type returns
+    a string of space separated words.
+    
+        SELECT 'Our first string used today first string'::tsvector;
+                            tsvector
+        -----------------------------------------------
+         'Our' 'used' 'again' 'first' 'today' 'string'
+        (1 row)
+
+
+    Notice the results string has each unique word ('first' and
+    'string' only appear once in the tsvector value). Which of
+    course makes sense if you are searching the full text ... you
+    only need to know each unique word in the text.
+
+    Those examples were just casting a text field to that of
+    type tsvector. Lets check out one of the new functions created
+    by the tsearch2 module.
+
+    The function to_tsvector has 3 possible signatures:
+    
+        to_tsvector(oid, text);
+        to_tsvector(text, text);
+        to_tsvector(text);
+
+
+    We will use the second method using two text fields. The
+    overloaded methods provide us with a way to specifiy the way
+    the searchable text is broken up into words (Stemming process).
+    Right now we will specify the 'default' configuration. See the
+    section on TSEARCH2 CONFIGURATION to learn more about this.
+    
+        SELECT to_tsvector('default',
+                           'Our first string used today first string');
+                        to_tsvector
+        --------------------------------------------
+         'use':4 'first':2,6 'today':5 'string':3,7
+        (1 row)
+
+
+    The result returned from this function is of type tsvector.
+    The results came about by this reasoning: All of the words in
+    the text passed in are stemmed, or not used because they are
+    stop words defined in our configuration. Each lower case
+    morphed word is returned with all of the positons in the
+    text.
+
+    In this case the word "Our" is a stop word in the default
+    configuration. That means it will not be included in the
+    result. The word "first" is found at positions 2 and 6
+    (although "Our" is a stop word, it's position is maintained).
+    The word(s) positioning is maintained exactly as in the
+    original string. The word "used" is morphed to the word "use"
+    based on the default configuration for word stemming, and is
+    found at position 4. The rest of the results follow the same
+    logic. Just a reminder again ... the order of the 'word'
+    position in the output is not in any kind of order. (ie 'use':4
+    appears first)
+
+    If you want to view the output of the tsvector fields
+    without their positions, you can do so with the function
+    "strip(tsvector)".
+    
+        SELECT strip(to_tsvector('default',
+                     'Our first string used today first string'));
+                    strip
+        --------------------------------
+         'use' 'first' 'today' 'string'
+
+
+    If you wish to know the number of unique words returned in
+    the tsvector you can do so by using the function
+    "length(tsvector)"
+    
+        SELECT length(to_tsvector('default',
+                      'Our first string used today first string'));
+         length
+        --------
+              4
+        (1 row)
+
+
+    Lets take a look at the function to_tsquery. It also has 3
+    signatures which follow the same rational as the to_tsvector
+    function:
+    
+        to_tsquery(oid, text);
+        to_tsquery(text, text);
+        to_tsquery(text);
+
+
+    Lets try using the function with a single word :
+    
+        SELECT to_tsquery('default', 'word');
+         to_tsquery
+        -----------
+         'word'
+         (1 row)
+
+
+    I call the function the same way I would a to_tsvector
+    function, specifying the 'default' configuration for morphing,
+    and the result is the stemmed output 'word'.
+
+    Lets attempt to use the function with a string of multiple
+    words:
+    
+        SELECT to_tsquery('default', 'this is many words');
+        ERROR:  Syntax error
+
+
+    The function can not accept a space separated string. The
+    intention of the to_tsquery function is to return a type of
+    "tsquery" used for searching a tsvector field. What we need to
+    do is search for one to many words with some kind of logic (for
+    now simple boolean).
+    
+        SELECT to_tsquery('default', 'searching|sentence');
+              to_tsquery
+        ----------------------
+         'search' | 'sentenc'
+        (1 row)
+
+
+    Notice that the words are separated by the boolean logic
+    "OR", the text could contain boolean operators &,|,!,()
+    with their usual meaning.
+
+    You can not use words defined as being a stop word in your
+    configuration. The function will not fail ... you will just get
+    no result, and a NOTICE like this:
+    
+        SELECT to_tsquery('default', 'a|is&not|!the');
+        NOTICE:  Query contains only stopword(s)
+                 or doesn't contain lexem(s), ignored
+         to_tsquery
+        -----------
+        (1 row)
+
+
+    That is a beginning to using the types, and functions
+    defined in the tsearch2 module. There are numerous more
+    functions that I have not touched on. You can read through the
+    tsearch2.sql file built when compiling to get more familiar
+    with what is included.
+
+    INDEXING FIELDS IN A TABLE
+
+    The next stage is to add a full text index to an existing
+    table. In this example we already have a table defined as
+    follows:
+    
+        CREATE TABLE tblMessages
+        (
+                intIndex        int4,
+                strTopic        varchar(100),
+                strMessage      text
+        );
+
+
+    We are assuming there are several rows with some kind of
+    data in them. Any data will do, just do several inserts with
+    test strings for a topic, and a message. here is some test data
+    I inserted. (yes I know it's completely useless stuff ;-) but
+    it will serve our purpose right now).
+    
+        INSERT INTO tblMessages
+               VALUES ('1', 'Testing Topic', 'Testing message data input');
+        INSERT INTO tblMessages
+               VALUES ('2', 'Movie', 'Breakfast at Tiffany\'s');
+        INSERT INTO tblMessages
+               VALUES ('3', 'Famous Author', 'Stephen King');
+        INSERT INTO tblMessages
+               VALUES ('4', 'Political Topic',
+                            'Nelson Mandella is released from prison');
+        INSERT INTO tblMessages
+               VALUES ('5', 'Nursery rhyme phrase',
+                            'Little jack horner sat in a corner');
+        INSERT INTO tblMessages
+               VALUES ('6', 'Gettysburg address quotation',
+                            'Four score and seven years ago'
+                            ' our fathers brought forth on this'
+                            ' continent a new nation, conceived in'
+                            ' liberty and dedicated to the proposition'
+                            ' that all men are created equal');
+        INSERT INTO tblMessages
+               VALUES ('7', 'Classic Rock Bands',
+                            'Led Zeppelin Grateful Dead and The Sex Pistols');
+        INSERT INTO tblMessages
+               VALUES ('8', 'My birth address',
+                            '18 Sommervile road, Regina, Saskatchewan');
+        INSERT INTO tblMessages
+               VALUES ('9', 'Joke', 'knock knock : who\'s there?'
+                                    ' I will not finish this joke');
+        INSERT INTO tblMessages
+               VALUES ('10', 'Computer information',
+                             'My computer is a pentium III 400 mHz'
+                             ' with 192 megabytes of RAM');
+
+
+    The next stage is to create a special text index which we
+    will use for FTI, so we can search our table of messages for
+    words or a phrase. We do this using the SQL command:
+    
+        ALTER TABLE tblMessages ADD idxFTI tsvector;
+
+
+    Note that unlike traditional indexes, this is actually a new
+    field in the same table, which is then used (through the magic
+    of the tsearch2 operators and functions) by a special index we
+    will create in a moment.
+
+    The general rule for the initial insertion of data will
+    follow four steps:
+    
+    1. update table
+    2. vacuum full analyze
+    3. create index
+    4. vacuum full analyze
+
+
+    The data can be updated into the table, the vacuum full
+    analyze will reclaim unused space. The index can be created on
+    the table after the data has been inserted. Having the index
+    created prior to the update will slow down the process. It can
+    be done in that manner, this way is just more efficient. After
+    the index has been created on the table, vacuum full analyze is
+    run again to update postgres's statistics (ie having the index
+    take effect).
+    
+        UPDATE tblMessages SET idxFTI=to_tsvector('default', strMessage);
+        VACUUM FULL ANALYZE;
+
+
+    Note that this only inserts the field strMessage as a
+    tsvector, so if you want to also add strTopic to the
+    information stored, you should instead do the following, which
+    effectively concatenates the two fields into one before being
+    inserted into the table:
+    
+        UPDATE tblMessages
+            SET idxFTI=to_tsvector('default',coalesce(strTopic,'') ||' '|| coalesce(strMessage,''));
+        VACUUM FULL ANALYZE;
+
+
+    Using the coalesce function makes sure this

+    concatenation also works with NULL fields.
+
+    We need to create the index on the column idxFTI. Keep in
+    mind that the database will update the index when some action
+    is taken. In this case we _need_ the index (The whole point of
+    Full Text INDEXINGi ;-)), so don't worry about any indexing
+    overhead. We will create an index based on the gist function.
+    GiST is an index structure for Generalized Search Tree.
+    
+        CREATE INDEX idxFTI_idx ON tblMessages USING gist(idxFTI);
+        VACUUM FULL ANALYZE;
+
+
+    After you have converted all of your data and indexed the
+    column, you can select some rows to see what actually happened.
+    I will not display output here but you can play around
+    yourselves and see what happened.
+
+    The last thing to do is set up a trigger so every time a row
+    in this table is changed, the text index is automatically
+    updated. This is easily done using:
+    
+        CREATE TRIGGER tsvectorupdate BEFORE UPDATE OR INSERT ON tblMessages
+            FOR EACH ROW EXECUTE PROCEDURE tsearch2(idxFTI, strMessage);
+
+
+    Or if you are indexing both strMessage and strTopic you
+    should instead do:
+    
+        CREATE TRIGGER tsvectorupdate BEFORE UPDATE OR INSERT ON tblMessages
+            FOR EACH ROW EXECUTE PROCEDURE
+                tsearch2(idxFTI, strTopic, strMessage);
+
+
+    Before you ask, the tsearch2 function accepts multiple
+    fields as arguments so there is no need to concatenate the two
+    into one like we did before.
+
+    If you want to do something specific with columns, you may
+    write your very own trigger function using plpgsql or other
+    procedural languages (but not SQL, unfortunately) and use it
+    instead of tsearch2 trigger.
+
+    You could however call other stored procedures from within
+    the tsearch2 function. Lets say we want to create a function to
+    remove certain characters (like the @ symbol from all
+    text).
+    
+       CREATE FUNCTION dropatsymbol(text) 
+                     RETURNS text AS 'select replace($1, \'@\', \' \');' LANGUAGE SQL;
+
+
+    Now we can use this function within the tsearch2 function on
+    the trigger.
+    
+      DROP TRIGGER tsvectorupdate ON tblmessages;
+        CREATE TRIGGER tsvectorupdate BEFORE UPDATE OR INSERT ON tblMessages
+            FOR EACH ROW EXECUTE PROCEDURE tsearch2(idxFTI, dropatsymbol, strMessage);
+        INSERT INTO tblmessages VALUES (69, 'Attempt for dropatsymbol', '[email protected]');
+
+
+    If at this point you receive an error stating: ERROR: Can't
+    find tsearch config by locale
+
+    Do not worry. You have done nothing wrong. And tsearch2 is
+    not broken. All that has happened here is that the
+    configuration is setup to use a configuration based on the
+    locale of the server. All you have to do is change your default
+    configuration, or add a new one for your specific locale. See
+    the section on TSEARCH2 CONFIGURATION.
+    
+   SELECT * FROM tblmessages WHERE intindex = 69;
+
+         intindex |         strtopic         |  strmessage   |        idxfti
+        ----------+--------------------------+---------------+-----------------------   
+                69 | Attempt for dropatsymbol | [email protected] | 'test':1 'test.com':2
+        (1 row)
+Notice that the string content was passed throught the stored
+procedure dropatsymbol. The '@' character was replaced with a
+single space ... and the output from the procedure was then stored
+in the tsvector column.
+
+    This could be useful for removing other characters from
+    indexed text, or any kind of preprocessing needed to be done on
+    the text prior to insertion into the index.
+
+    QUERYING A TABLE
+
+    There are some examples in the README.tsearch2 file for
+    querying a table. One major difference between tsearch and
+    tsearch2 is the operator ## is no longer available. Only the
+    operator @@ is defined, using the types tsvector on one side
+    and tsquery on the other side.
+
+    Lets search the indexed data for the word "Test". I indexed
+    based on the the concatenation of the strTopic, and the
+    strMessage:
+    
+        SELECT intindex, strtopic FROM tblmessages
+                                  WHERE idxfti @@ 'test'::tsquery;
+         intindex |   strtopic
+        ----------+---------------
+                1 | Testing Topic
+        (1 row)
+
+
+    The only result that matched was the row with a topic
+    "Testing Topic". Notice that the word I search for was all
+    lowercase. Let's see what happens when I query for uppercase
+    "Test".
+    
+        SELECT intindex, strtopic FROM tblmessages
+                                  WHERE idxfti @@ 'Test'::tsquery;
+         intindex | strtopic
+        ----------+----------
+        (0 rows)
+
+
+    We get zero rows returned. The reason is because when the
+    text was inserted, it was morphed to my default configuration
+    (because of the call to to_tsvector in the UPDATE statement).
+    If there was no morphing done, and the tsvector field(s)
+    contained the word 'Text', a match would have been found.
+
+    Most likely the best way to query the field is to use the
+    to_tsquery function on the right hand side of the @@ operator
+    like this:
+    
+        SELECT intindex, strtopic FROM tblmessages
+               WHERE idxfti @@ to_tsquery('default', 'Test | Zeppelin');
+         intindex |      strtopic
+        ----------+--------------------
+                1 | Testing Topic
+                7 | Classic Rock Bands
+        (2 rows)
+
+
+    That query searched for all instances of "Test" OR
+    "Zeppelin". It returned two rows: the "Testing Topic" row, and
+    the "Classic Rock Bands" row. The to_tsquery function performed
+    the correct morphology upon the parameters, and searched the
+    tsvector field appropriately.
+
+    The last example here relates to searching for a phrase, for
+    example "minority report". This poses a problem with regard to
+    tsearch2, as it doesn't index phrases, only words. But there is
+    a way around which doesn't appear to have a significant impact
+    on query time, and that is to use a query such as the
+    following:
+    
+        SELECT intindex, strTopic FROM tblmessages
+                WHERE idxfti @@ to_tsquery('default', 'gettysburg & address')
+                AND strMessage ~* '.*men are created equal.*';
+         intindex |           strtopic
+        ----------+------------------------------
+                6 | Gettysburg address quotation
+        (1 row)
+        SELECT intindex, strTopic FROM tblmessages
+                WHERE idxfti @@ to_tsquery('default', 'gettysburg & address')
+                AND strMessage ~* '.*something that does not exist.*';
+         intindex | strtopic
+        ----------+----------
+        (0 rows)
+
+
+    Of course if your indexing both strTopic and strMessage, and
+    want to search for this phrase on both, then you will have to
+    get out the brackets and extend this query a little more.
+
+    TSEARCH2 CONFIGURATION
+
+    Some words such as "and", "the", and "who" are automatically
+    not indexed, since they belong to a pre-existing dictionary of
+    "Stop Words" which tsearch2 does not perform indexing on. If
+    someone needs to search for "The Who" in your database, they
+    are going to have a tough time coming up with any results,
+    since both are ignored in the indexes. But there is a
+    solution.
+
+    Lets say we want to add a word into the stop word list for
+    english stemming. We could edit the file
+    :'/usr/local/pgsql/share/english.stop' and add a word to the
+    list. I edited mine to exclude my name from indexing:
+    
+    - Edit /usr/local/pgsql/share/english.stop
+    - Add 'andy' to the list
+    - Save the file.
+
+
+    When you connect to the database, the dict_init procedure is
+    run during initialization. And in my configuration it will read
+    the stop words from the file I just edited. If you were
+    connected to the DB while editing the stop words, you will need
+    to end the current session and re-connect. When you re-connect
+    to the database, 'andy' is no longer indexed:
+    
+        SELECT to_tsvector('default', 'Andy');
+         to_tsvector
+        ------------
+        (1 row)
+
+
+    Originally I would get the result :
+    
+        SELECT to_tsvector('default', 'Andy');
+         to_tsvector
+        ------------
+         'andi':1
+        (1 row)
+
+
+    But since I added it as a stop word, it would be ingnored on
+    the indexing. The stop word added was used in the dictionary
+    "en_stem". If I were to use a different configuration such as
+    'simple', the results would be different. There are no stop
+    words for the simple dictionary. It will just convert to lower
+    case, and index every unique word.
+    
+        SELECT to_tsvector('simple', 'Andy andy The the in out');
+                     to_tsvector
+        -------------------------------------
+         'in':5 'out':6 'the':3,4 'andy':1,2
+        (1 row)
+
+
+    All this talk about which configuration to use is leading us
+    into the actual configuration of tsearch2. In the examples in
+    this document the configuration has always been specified when
+    using the tsearch2 functions:
+    
+        SELECT to_tsvector('default', 'Testing the default config');
+        SELECT to_tsvector('simple', 'Example of simple Config');
+
+
+    The pg_ts_cfg table holds each configuration you can use
+    with the tsearch2 functions. As you can see the ts_name column
+    contains both the 'default' configurations based on the 'C'
+    locale. And the 'simple' configuration which is not based on
+    any locale.
+    
+        SELECT * from pg_ts_cfg;
+             ts_name     | prs_name |    locale
+        -----------------+----------+--------------
+         default         | default  | C
+         default_russian | default  | ru_RU.KOI8-R
+         simple          | default  |
+        (3 rows)
+
+
+    Each row in the pg_ts_cfg table contains the name of the
+    tsearch2 configuration, the name of the parser to use, and the
+    locale mapped to the configuration. There is only one parser to
+    choose from the table pg_ts_parser called 'default'. More
+    parsers could be written, but for our needs we will use the
+    default.
+
+    There are 3 configurations installed by tsearch2 initially.
+    If your locale is set to 'en_US' for example (like my laptop),
+    then as you can see there is currently no dictionary configured
+    to use with that locale. You can either set up a new
+    configuration or just use one that already exists. If I do not
+    specify which configuration to use in the to_tsvector function,
+    I receive the following error.
+    
+        SELECT to_tsvector('learning tsearch is like going to school');
+        ERROR:  Can't find tsearch config by locale
+
+
+    We will create a new configuration for use with the server
+    encoding 'en_US'. The first step is to add a new configuration
+    into the pg_ts_cfg table. We will call the configuration
+    'default_english', with the default parser and use the locale
+    'en_US'.
+    
+        INSERT INTO pg_ts_cfg (ts_name, prs_name, locale)
+               VALUES ('default_english', 'default', 'en_US');
+
+
+    We have only declared that there is a configuration called
+    'default_english'. We need to set the configuration of how
+    'default_english' will work. The next step is creating a new
+    dictionary to use. The configuration of the dictionary is
+    completlely different in tsearch2. In the prior versions to
+    make changes, you would have to re-compile your changes into
+    the tsearch.so. All of the configuration has now been moved
+    into the system tables created by executing the SQL code from
+    tsearch2.sql
+
+    Lets take a first look at the pg_ts_dict table
+    
+        ftstest=# \d pg_ts_dict
+                Table "public.pg_ts_dict"
+         Column      |  Type   | Modifiers
+        -----------------+---------+-----------
+         dict_name       | text    | not null
+         dict_init       | oid     |
+         dict_initoption | text    |
+         dict_lemmatize  | oid     | not null
+         dict_comment    | text    |
+        Indexes: pg_ts_dict_idx unique btree (dict_name)
+
+
+    The dict_name column is the name of the dictionary, for
+    example 'simple', 'en_stem' or 'ru_stem'. The dict_init column
+    is an OID of a stored procedure to run for initialization of
+    that dictionary, for example 'snb_en_init' or 'snb_ru_init'.
+    The dict_init option is used for options passed to the init
+    function for the stored procedure. In the cases of 'en_stem' or
+    'ru_stem' it is a path to a stopword file for that dictionary,
+    for example '/usr/local/pgsql/share/english.stop'. This is
+    however dictated by the dictionary. ISpell dictionaries may
+    require different options. The dict_lemmatize column is another
+    OID of a stored procedure to the function used to lemmitize,
+    for example 'snb_lemmatize'. The dict_comment column is just a
+    comment.
+
+    Next we will configure the use of a new dictionary based on
+    ISpell. We will assume you have ISpell installed on you
+    machine. (in /usr/local/lib)
+
+    First lets register the dictionary(ies) to use from ISpell.
+    We will use the english dictionary from ISpell. We insert the
+    paths to the relevant ISpell dictionary (*.hash) and affixes
+    (*.aff) files. There seems to be some question as to which
+    ISpell files are to be used. I installed ISpell from the latest
+    sources on my computer. The installation installed the
+    dictionary files with an extension of *.hash. Some
+    installations install with an extension of *.dict As far as I
+    know the two extensions are equivilant. So *.hash ==
+    *.dict.
+
+    We will also continue to use the english word stop file that
+    was installed for the en_stem dictionary. You could use a
+    different one if you like. The ISpell configuration is based on
+    the "ispell_template" dictionary installed by default with
+    tsearch2. We will use the OIDs to the stored procedures from
+    the row where the dict_name = 'ispell_template'.
+    
+        INSERT INTO pg_ts_dict
+               (SELECT 'en_ispell',
+                       dict_init,
+                       'DictFile="/usr/local/lib/english.hash",'
+                       'AffFile="/usr/local/lib/english.aff",'
+                       'StopFile="/usr/local/pgsql/share/english.stop"',
+                       dict_lexize
+                FROM pg_ts_dict
+                WHERE dict_name = 'ispell_template');
+
+
+    Next we need to set up the configuration for mapping the
+    dictionay use to the lexxem parsings. This will be done by
+    altering the pg_ts_cfgmap table. We will insert several rows,
+    specifying to using the new dictionary we installed and
+    configured for use within tsearch2. There are several type of
+    lexims we would be concerned with forcing the use of the ISpell
+    dictionary.
+    
+        INSERT INTO pg_ts_cfgmap (ts_name, tok_alias, dict_name)
+               VALUES ('default_english', 'lhword', '{en_ispell,en_stem}');
+        INSERT INTO pg_ts_cfgmap (ts_name, tok_alias, dict_name)
+               VALUES ('default_english', 'lpart_hword', '{en_ispell,en_stem}');
+        INSERT INTO pg_ts_cfgmap (ts_name, tok_alias, dict_name)
+               VALUES ('default_english', 'lword', '{en_ispell,en_stem}');
+
+
+    We have just inserted 3 records to the configuration
+    mapping, specifying that the lexem types for "lhword,
+    lpart_hword and lword" are to be stemmed using the 'en_ispell'
+    dictionary we added into pg_ts_dict, when using the
+    configuration ' default_english' which we added to
+    pg_ts_cfg.
+
+    There are several other lexem types used that we do not need
+    to specify as using the ISpell dictionary. We can simply insert
+    values using the 'simple' stemming process dictionary.
+    
+        INSERT INTO pg_ts_cfgmap
+               VALUES ('default_english', 'url', '{simple}');
+        INSERT INTO pg_ts_cfgmap
+               VALUES ('default_english', 'host', '{simple}');
+        INSERT INTO pg_ts_cfgmap
+               VALUES ('default_english', 'sfloat', '{simple}');
+        INSERT INTO pg_ts_cfgmap
+               VALUES ('default_english', 'uri', '{simple}');
+        INSERT INTO pg_ts_cfgmap
+               VALUES ('default_english', 'int', '{simple}');
+        INSERT INTO pg_ts_cfgmap
+               VALUES ('default_english', 'float', '{simple}');
+        INSERT INTO pg_ts_cfgmap
+               VALUES ('default_english', 'email', '{simple}');
+        INSERT INTO pg_ts_cfgmap
+               VALUES ('default_english', 'word', '{simple}');
+        INSERT INTO pg_ts_cfgmap
+               VALUES ('default_english', 'hword', '{simple}');
+        INSERT INTO pg_ts_cfgmap
+               VALUES ('default_english', 'nlword', '{simple}');
+        INSERT INTO pg_ts_cfgmap
+               VALUES ('default_english', 'nlpart_hword', '{simple}');
+        INSERT INTO pg_ts_cfgmap
+               VALUES ('default_english', 'part_hword', '{simple}');
+        INSERT INTO pg_ts_cfgmap
+               VALUES ('default_english', 'nlhword', '{simple}');
+        INSERT INTO pg_ts_cfgmap
+               VALUES ('default_english', 'file', '{simple}');
+        INSERT INTO pg_ts_cfgmap
+               VALUES ('default_english', 'uint', '{simple}');
+        INSERT INTO pg_ts_cfgmap
+               VALUES ('default_english', 'version', '{simple}');
+
+
+    Our addition of a configuration for 'default_english' is now
+    complete. We have successfully created a new tsearch2
+    configuration. At the same time we have also set the new
+    configuration to be our default for en_US locale.
+    
+        SELECT to_tsvector('default_english',
+                           'learning tsearch is like going to school');
+                           to_tsvector
+        --------------------------------------------------
+         'go':5 'like':4 'learn':1 'school':7 'tsearch':2
+        SELECT to_tsvector('learning tsearch is like going to school');
+                            to_tsvector
+        --------------------------------------------------
+         'go':5 'like':4 'learn':1 'school':7 'tsearch':2
+        (1 row)
+
+
+    In the case that you already have a configuration set for
+    the locale, and you are changing it to your new dictionary
+    configuration. You will have to set the old locale to NULL. If
+    we are using the 'C' locale then we would do this:
+    
+        UPDATE pg_ts_cfg SET locale=NULL WHERE locale = 'C';
+
+
+    That about wraps up the configuration of tsearch2. There is
+    much more you can do with the tables provided. This was just an
+    introduction to get things working rather quickly.
+
+    ADDING NEW DICTIONARIES TO TSEARCH2
+
+    To aid in the addition of new dictionaries to the tsearch2
+    module you can use another additional module in combination
+    with tsearch2. The gendict module is included into tsearch2
+    distribution and is available from gendict/ subdirectory.
+
+    I will not go into detail about installation and
+    instructions on how to use gendict to it's fullest extent right
+    now. You can read the README.gendict ... it has all of the
+    instructions and information you will need.
+
+    BACKING UP AND RESTORING DATABASES THAT FEATURE
+    TSEARCH2
+
+    Believe it or not, this isn't as straight forward as it
+    should be, and you will have problems trying to backup and
+    restore any database which uses tsearch2 unless you take the
+    steps shown below. And before you ask using pg_dumpall will
+    result in failure every time. These took a lot of trial and
+    error to get working, but the process as laid down below has
+    been used a dozen times now in live production environments so
+    it should work fine.
+
+    HOWEVER never rely on anyone elses instructions to backup
+    and restore a database system, always develop and understand
+    your own methodology, and test it numerous times before you
+    need to do it for real.
+
+    To Backup a PostgreSQL database that uses the tsearch2
+    module:
+
+    1) Backup any global database objects such as users and
+    groups (this step is usually only necessary when you will be
+    restoring to a virgin system)
+    
+        pg_dumpall -g > GLOBALobjects.sql
+
+
+    2) Backup the full database schema using pg_dump
+    
+        pg_dump -s DATABASE > DATABASEschema.sql
+
+
+    3) Backup the full database using pg_dump
+    
+        pg_dump -Fc DATABASE > DATABASEdata.tar
+
+
+    To Restore a PostgreSQL database that uses the tsearch2
+    module:
+
+    1) Create the blank database
+    
+        createdb DATABASE
+
+
+    2) Restore any global database objects such as users and
+    groups (this step is usually only necessary when you will be
+    restoring to a virgin system)
+    
+        psql DATABASE < GLOBALobjects.sql
+
+
+    3) Create the tsearch2 objects, functions and operators
+    
+        psql DATABASE < tsearch2.sql
+
+
+    4) Edit the backed up database schema and delete all SQL
+    commands which create tsearch2 related functions, operators and
+    data types, BUT NOT fields in table definitions that specify
+    tsvector types. If your not sure what these are, they are the
+    ones listed in tsearch2.sql. Then restore the edited schema to
+    the database
+    
+        psql DATABASE < DATABASEschema.sql
+
+
+    5) Restore the data for the database
+    
+        pg_restore -N -a -d DATABASE DATABASEdata.tar
+
+
+    If you get any errors in step 4, it will most likely be
+    because you forgot to remove an object that was created in
+    tsearch2.sql. Any errors in step 5 will mean the database
+    schema was probably restored wrongly.
+  
+
+


diff --git a/contrib/tsearch2/docs/tsearch2-guide.html b/contrib/tsearch2/docs/tsearch2-guide.html

new file mode 100644 (file)

index 0000000..2529480


--- /dev/null
+++ b/contrib/tsearch2/docs/tsearch2-guide.html
@@ -0,0 +1,1057 @@
+
+
+
+
+tsearch2 guide
+
+
+The tsearch2 Guide
+
+
+Brandon Craig Rhodes
30 June 2003
+
+This Guide introduces the reader to the PostgreSQL tsearch2 module,
+version 2.
+More formal descriptions of the module's types and functions
+are provided in the tsearch2 Reference,
+which is a companion to this document.
+You can retrieve a beta copy of the tsearch2 module from the
+GiST for PostgreSQL
+page — look under the section entitled Development History
+for the current version.
+
+First we will examine the tsvector and tsquery types
+and how they are used to search documents;
+next, we will use them to build a simple search engine in SQL;
+and finally, we will study the internals of document conversion
+and how you might tune the internals to accommodate various searching needs.
+
+Once you have tsearch2 working with PostgreSQL,
+you should be able to run the examples here exactly as they are typed.
+
+
+Table of Contents
+
+Vectors and Queries

+A Simple Search Engine

+Ranking and Position Weights

+Casting Vectors and Queries

+Parsing and Lexing

+
+
+
+
+Vectors and Queries
+
+
+This section introduces

+the two data types upon which tsearch2 search engines are based,
+and illustrates their interaction using the simplest possible case.
+The complex examples we present later on
+are merely variations and elaborations of this basic mechanism.
+
+
+The tsearch2 module allows you to index documents by the words they contain,
+and then perform very efficient searches
+for documents that contain a given combination of words.
+Preparing your document index involves two steps:
+
+Making a list of the words each document contains.
+ You must reduce each document to a tsvector
+ which lists each word that appears in the document.
+ This process offers many options,
+ because there is no requirement
+ that you must copy words into the vector
+ exactly as they appear in the document.
+ For example,
+ many developers omit frequent and content-free stop words
+ like the to reduce the size of their index;
+ others reduce different forms of the same word
+ (forked, forking, forks)
+ to a common form (fork)
+ to make search results independent of tense and case.
+ Because words are very often stored in a modified form,
+ we use the special term lexemes
+ for the word forms we actually store in the vector.
+Creating an index of the documents by lexeme.
+ This is managed automatically by tsearch2
+ when you creat a gist() index
+ on the tsvector column of a table,
+ which implements a form of the Berkeley
+ Generalized Search Tree.
+
+Once your documents are indexed,
+performing a search involves:
+
+Reducing the search terms to lexemes.
+ You must express each search you want to perform
+ as a tsquery specifying a boolean combination of lexemes.
+ Note that tsearch2 only finds exact matches
+ between the lexemes in your query and the ones in each vector —
+ even capitalization counts as a difference
+ (which is why all lexemes are usually kept lowercase).
+ So you must process search words the same way you processed document words;
+ if forking became fork in the document's tsvector,
+ then the search term forking must also become fork
+ or the search will not find the document.
+Retrieving the documents that match the query.
+ Running a SELECT ... WHERE
+ query @@ vector
+ on the table with the vector column
+ will return the documents that match your query.
+Presenting your results.
+ This final stage offers as many options
+ as turning documents into vectors.
+ You can order documents by how well they matched the search terms;
+ create a headline for each document
+ showing some of the phrases in which it uses the search terms;
+ and restrict the number of results retrieved.
+ You will of course want some way to identify each document,
+ so the user can ask for the full text of the ones he wants to read.
+
+And beyond deciding upon rules for turning documents into vectors
+and for presenting search results to users,
+you have to decide where to perform these operations —
+whether one database server
+will parse documents, perform searches, and prepare search results,
+or whether to spread the load of these operations across several machines.
+These are complicated design issues
+which we will explore later;
+in this section and the next,
+we will illustrate what can be accomplished
+using a single database server.
+
+The default tsearch2 configuration,
+which we will learn more about later,
+provides a good example of a process for reducing documents to vectors:
+
+
+=# SELECT set_curcfg('default')
+=# SELECT to_tsvector('The air smells of sea water.')
+             to_tsvector             
+-------------------------------------
+ 'air':2 'sea':5 'smell':3 'water':6
+(1 row)
+
+
+Note the complex relationship between this document and its vector.
+The vector lists only words from the document —
+spaces and punctuation have disappeared.
+Common words like the and of have been eliminated.
+The -s that makes smells a plural has been removed,
+leaving a lexeme that represents the word in its simplest form.
+And finally,
+though the vector remembers the positions in which each word appeared,
+it does not store the lexemes in that order.
+
+Keeping word positions in your vectors is optional, by the way.
+The positions are necessary for the tsearch2 ranking functions,
+which you can use to prioritize documents
+based on how often each document uses the search terms
+and whether they appear in close proximity.
+But if you do not perform ranking,
+or use your own process that ignores the word positions stored in the vector,
+then you can save space by stripping them from your vectors:
+
+
+=# SELECT strip(to_tsvector('The air smells of sea water.'))
+            strip            
+-----------------------------
+ 'air' 'sea' 'smell' 'water'
+(1 row)
+
+
+Now that we have a procedure for creating vectors,
+we can build an indexed table of vectors very simply:
+
+
+=# CREATE TABLE vectors ( vector tsvector )
+=# CREATE INDEX vector_index ON vectors USING gist(vector)
+=# INSERT INTO vectors VALUES (to_tsvector('The path forks here'))
+=# INSERT INTO vectors VALUES (to_tsvector('A crawl leads west'))
+=# INSERT INTO vectors VALUES (to_tsvector('The left fork leads northeast'))
+=# SELECT * FROM vectors
+                  vector                  
+------------------------------------------
+ 'fork':3 'path':2
+ 'lead':3 'west':4 'crawl':2
+ 'fork':3 'lead':4 'left':2 'northeast':5
+(3 rows)
+
+
+Now we can search this collection of document vectors
+using the @@ operator and a tsquery
+that specifies the combination of lexemes we are looking for.
+Note that while vectors simply list lexemes,
+queries always combine them with the operators
+‘&’ and,
+‘|’ or,
+and  ‘!’ not,
+plus parentheses for grouping.
+Some examples of the query syntax:
+
+
+ ‘find documents with the word forks in them’

+ 'forks'
+
+ ‘... with both forks and leads’

+ 'forks & leads'
+
+ ‘... with either forks or leads’

+ 'forks | leads'
+
+ ‘... with either forks or leads,
+  but without crawl’

+ '(forks|leads) & !crawl'
+
+The tsearch2 module
+provides a to_tsquery() function for creating queries
+that uses the same process as to_tsvector() uses
+to reduce words to lexemes.
+For instance,
+it will remove the -s from the plurals in the last example above:
+
+
+=# SELECT to_tsquery('(leads|forks) & !crawl')
+           to_tsquery           
+--------------------------------
+ ( 'lead' | 'fork' ) & !'crawl'
+(1 row)
+
+
+Again,
+this is critically important because the search operator @@
+only finds exact matches
+between the words in a query and the words in a vector;
+if the document vector lists the lexeme fork
+but the query looks for the plural form forks,
+the query would not match that document.
+Thanks to the symmetry between our process
+for producing vectors and queries, however,
+the above searches return correct results:
+
+
+=# SELECT * FROM vectors WHERE vector @@ to_tsquery('(leads|forks) & !crawl')
+                  vector                  
+------------------------------------------
+ 'fork':3 'path':2
+ 'fork':3 'lead':4 'left':2 'northeast':5
+(2 rows)
+
+
+You may want to try the other queries shown above,
+and perhaps invent some of your own.
+
+You should not include stop words in a query,
+since you cannot search for words you have discarded.
+If you throw out the word the when building vectors, for example,
+your index will obviously not know which documents included it.
+The to_tsquery() function will automatically detect this
+and give you an error to prevent this mistake:
+
+
+=# SELECT to_tsquery('the')
+NOTICE:  Query contains only stopword(s) or doesn't contain lexem(s), ignored
+ to_tsquery 
+------------
+ 
+(1 row)
+
+
+But if you every build vectors and queries using your own routines,
+a possibility we will discuss later,
+then you will need to enforce this rule yourself.
+
+

+Now that you understand how vectors and queries work together,
+you are prepared to tackle many additional topics:
+how to distribute searching across many servers;
+how to customize the process
+by which tsearch2 turns documents and queries into lexemes,
+or use a process of your own;
+and how to sort and display search results to your users.
+But before discussing these detailed questions,
+we will build a simple search engine
+to see how easily its basic features work together.
+
+
+A Simple Search Engine
+
+

+In this section we build a simple search engine out of SQL functions
+that use the vector and query types described in the previous section.
+While this example is simpler
+than a search engine that has to interface with the outside world,
+it will illustrate the basic principles of building a search engine,
+and better prepare you for developing your own.
+
+Building a search engine involves only a few improvements
+upon the rudimentary vector searches described in the last section.
+
+Because the user wants to read documents, not vectors,
+ you must provide some way
+ for the full text of each document to be accessed —
+ either by storing the entire text of each document in the database,
+ or storing an identifier
+ like a URL, file name, or document routing number
+ that lets you fetch the document from other storage.
+You can make it easier for user interface code to refer to each document
+ by providing a unique identifier for each document,
+ perhaps with a SERIAL column.
+Search results should be ordered by relevance.
+ If you leave word positions in your vectors,
+ you can either have PostgreSQL ORDER your results
+ BY a ranking function,
+ or you can fetch the vectors yourself and perform your own sort.
+ If you choose to ignore word positions or strip them from your vectors,
+ you will have to determine relevance yourself,
+ using either the full text of the document
+ or other information about each document you may possess.
+For each document returned by a search,
+ you will usually want to display a summary called a headline
+ that shows short excerpts
+ illustrating how the document uses the query words.
+ Headlines are usually generated from the full text of the document,
+ not from position information in the tsvector,
+ since excerpts lacking stop words, punctuation, and suffixes
+ would not be comprehensible.
+ If you store the full text of each document in the database,
+ headlines can be generated very simply by a tsearch2 function.
+ If you store your documents elsewhere,
+ then you will either have to transmit each document to the database
+ every time you want to run the headline function on it,
+ or use your own headline code outside of the database.
+
+
+We can easily construct a simple search engine
+that accomplishes these goals.
+First we build a table that, for each document,
+stores a unique identifier, the full text of the document,
+and its tsvector:
+
+
+=# CREATE TABLE docs ( id SERIAL, doc TEXT, vector tsvector )
+=# CREATE INDEX docs_index ON docs USING gist(vector);
+
+
+Note that although searches will still work
+on tables where you have neglected
+to create a gist() index over your vectors,
+they will run much more slowly
+since they will have to compare the query
+against every document vector in the table.
+
+Because the table we have created
+stores each document in two different ways —
+both as text and as a vector —
+our INSERT statements must provide the document in both forms.
+While more advanced PostgreSQL programmers
+might accomplish this with a database trigger or rule,
+for this simple example we will use a small SQL function:
+
+
+=# CREATE FUNCTION insdoc(text) RETURNS void LANGUAGE sql AS

+  'INSERT INTO docs (doc, vector) VALUES ($1, to_tsvector($1));'
+
+
+Now, by calling insdoc() several times,
+we can populate our table with documents:
+
+
+=# SELECT insdoc('A low crawl over cobbles leads inward to the west.')
+=# SELECT insdoc('The canyon runs into a mass of boulders -- dead end.')
+=# SELECT insdoc('You are crawling over cobbles in a low passage.')
+=# SELECT insdoc('Cavernous passages lead east, north, and south.')
+=# SELECT insdoc('To the east a low wide crawl slants up.')
+=# SELECT insdoc('You are in the south side chamber.')
+=# SELECT insdoc('The passage here is blocked by a recent cave-in.')
+=# SELECT insdoc('You are in a splendid chamber thirty feet high.')
+
+
+Now we can build a search function.
+Its SELECT statement is based upon
+the same @@ operation illustrated in the previous section.
+But instead of returning matching vectors,
+we return for each document
+its SERIAL identifier, so the user can retrieve it later;
+a headline that illustrates its use of the search terms;
+and a ranking with which we also order the results.
+Our search operation can be coded as a single SELECT statement
+returning its own kind of table row,
+which we call a finddoc_t:
+
+
+=# CREATE TYPE finddoc_t AS (id INTEGER, headline TEXT, rank REAL)
+=# CREATE FUNCTION finddoc(text) RETURNS SETOF finddoc_t LANGUAGE sql AS '

+   SELECT id, headline(doc, q), rank(vector, q)
+     FROM docs, to_tsquery($1) AS q
+     WHERE vector @@ q ORDER BY rank(vector, q) DESC'
+
+
+This function is a rather satisfactory search engine.
+Here is one example search,
+after which the user fetches the top-ranking document itself;
+with similar commands you can try queries of your own:
+
+
+=# SELECT * FROM finddoc('passage|crawl')
+ id |                       headline                        | rank 
+----+-------------------------------------------------------+------
+  3 | <b>crawling</b> over cobbles in a low <b>passage</b>. | 0.19
+  1 | <b>crawl</b> over cobbles leads inward to the west.   |  0.1
+  4 | <b>passages</b> lead east, north, and south.          |  0.1
+  5 | <b>crawl</b> slants up.                               |  0.1
+  7 | <b>passage</b> here is blocked by a recent  cave-in.  |  0.1
+(5 rows)
+=# SELECT doc FROM docs WHERE id = 3
+                       doc                       
+-------------------------------------------------
+ You are crawling over cobbles in a low passage.
+(1 row)
+
+
+While by default the headline() function
+surrounds matching words with <b> and </b>
+in order to distinguish them from the surrounding text,
+you can provide options that change its behavior;
+consult the tsearch2 Reference for more details about
+Headline Functions.
+
+Though a search may match hundreds or thousands of documents,
+you will usually present only ten or twenty results to the user at a time.
+This can be most easily accomplished
+by limiting your query with a LIMIT
+and an OFFSET clause —
+to display results ten at a time, for example,
+your would generate your first page of results
+with LIMIT 10 OFFSET 0,
+your second page
+with LIMIT 10 OFFSET 10,
+your third page
+with LIMIT 10 OFFSET 20,
+and so forth.
+There are two problems with this approach, however.
+
+The first problem is the strain of running the query over again
+for every page of results the user views.
+For small document collections or lightly loaded servers,
+this may not be a problem;
+but the impact can be high
+when a search must repeatedly rank and sort
+the same ten thousand results
+on an already busy server.
+So instead of selecting only one page of results,
+you will probably use LIMIT and OFFSET
+to return a few dozen or few hundred results,
+which you can cache and display to the user one page at a time.
+Whether a result cache rewards your effort
+will depend principally on the behavior of your users —
+how often they even view the second page of results, for instance.
+
+The second issue solved by caching involves consistency.
+If the database is changing while the user browses their results,
+then documents might appear and disappear as they page through them.
+In some cases the user might even miss a particular result —
+perhaps the one they were looking for —
+if, say, its rank improves from 31th to 30th
+after they load results 21–30 but before they view results 31–40.
+While many databases are static or infrequently updated,
+and will not present this problem,
+users searching very dymanic document collections
+might benefit from the stable results that caches yield.
+
+

+Having seen the features of a search engine
+implemented entirely within the database,
+we will learn about some specific tsearch2 features.
+First we will look in more detail at document ranking.
+
+
+Ranking and Position Weights
+
+

+When we built our simple search engine,
+we used the rank() function to order our results.

+Here we describe tsearch2 ranking in more detail.
+
+
+There are two functions with which tsearch2 can rank search results.
+They both use the lexeme positions listed in the tsvector,
+so you cannot rank vectors
+from which these have been removed with strip().
+The rank() function existed in older versions of OpenFTS,
+and has the feature that you can assign different weights
+to words from different sections of your document.
+The rank_cd() uses a recent technique for weighting results
+but does not allow different weight to be given
+to different sections of your document.
+
+Both ranking functions allow you to specify,
+as an optional last argument,
+whether you want their results normalized —
+whether the rank returned should be adjusted for document length.
+Specifying a last argument of 0 (zero) makes no adjustment;
+1 (one) divides the document rank
+by the logarithm of the document length;
+and 2 divides it by the plain length.
+In all of these examples we omit this optional argument,
+which is the same as specifying zero —
+we are making no adjustment for document length.
+
+The rank_cd() function uses an experimental measurement
+called cover density ranking that rewards documents
+when they make frequent use of the search terms
+that are close together in the document.
+You can read about the algorithm in more detail
+in Clarke et al.,
+ “
+>Relevance Ranking for One to Three Term Queries.”
+An optional first argument allows you to tune their formula;
+for details
+see the section on ranking
+in the Reference.
+
+The rank() function offers more flexibility
+because it pays attention to the weights
+with which you have labelled lexeme positions.
+Currently tsearch2 supports four different weight labels:
+'D', the default weight;
+and 'A', 'B', and 'C'.
+All vectors created with to_tsvector()
+assign the weight 'D' to each position,
+which as the default is not displayed when you print a vector out.
+
+If you want positions with weights other than 'D',
+you have two options:
+either you can author a vector directly through the ::tsvector
+casting operation,
+as described in the following section,
+which lets you give each position whichever weight you want;
+or you can pass a vector through the setweight() function
+which sets all of its position weights to a single value.
+An example of the latter:
+
+
+
+=# SELECT vector FROM docs WHERE id = 3
+                 vector                 
+----------------------------------------
+ 'low':8 'cobbl':5 'crawl':3 'passag':9
+(1 row)
+=# SELECT setweight(vector, 'A') FROM docs WHERE id = 3
+                 setweight                  
+--------------------------------------------
+ 'low':8A 'cobbl':5A 'crawl':3A 'passag':9A
+(1 row)
+
+
+
+Merely changing all of the weights in a vector is not very useful,
+of course,
+since this results still in all words having the same weight.
+But if we parse different parts of a document separately,
+giving each section its own weight,
+and then concatenate the vectors of each part into a single vector,
+the result can be very useful.
+We can construct a simple example
+in which document titles are given greater weight
+that text in the body of the document:
+
+
+
+=# CREATE TABLE tdocs ( id SERIAL, title TEXT, doc TEXT, vector tsvector )
+=# CREATE INDEX tdocs_index ON tdocs USING gist(vector);
+=# CREATE FUNCTION instdoc(text, text) RETURNS void LANGUAGE sql AS

+  'INSERT INTO tdocs (title, doc, vector)
+   VALUES ($1, $2, setweight(to_tsvector($1), ''A'') || to_tsvector($2));'
+
+
+
+Now words from a document title will be weighted differently
+than those in the main text
+if we provide the title and body as separate arguments:
+
+
+
+=# SELECT instdoc('Spendid Chamber',

+ 'The walls are frozen rivers of orange stone.')
+ instdoc 
+---------
+ 
+(1 row)
+=# SELECT vector FROM tdocs
+                                    vector                                    
+------------------------------------------------------------------------------
+ 'wall':4 'orang':9 'river':7 'stone':10 'frozen':6 'chamber':2A 'spendid':1A
+(1 row)
+
+
+
+Note that although the necessity is unusual,
+you can constrain search terms
+to only match words from certain sections
+by following them with a colon
+and a list of the sections in which the word can occur;
+by default this list is 'ABCD'
+so that search terms match words from all sections.
+For example,
+here we search for a word both generally,
+and then looking only for specific weights:
+
+
+
+=# SELECT title, doc FROM tdocs WHERE vector @@ to_tsquery('spendid')
+      title      |                     doc                      
+-----------------+----------------------------------------------
+ Spendid Chamber | The walls are frozen rivers of orange stone.
+(1 row)
+=# SELECT title, doc FROM tdocs WHERE vector @@ to_tsquery('spendid:A')
+      title      |                     doc                      
+-----------------+----------------------------------------------
+ Spendid Chamber | The walls are frozen rivers of orange stone.
+(1 row)
+=# SELECT title, doc FROM tdocs WHERE vector @@ to_tsquery('spendid:D')
+ title | doc 
+-------+-----
+(0 rows)
+
+
+
+
+
+

+Our examples so far use tsearch2 to parse our documents into vectors.
+When your application needs absolute control over vector content,
+you will want to use direct type casting,
+which is described in the next section.
+
+
+Casting Vectors and Queries
+
+

+While tsearch2 has powerful and flexible ways
+to process documents and turn them into document vectors,
+you will sometimes want to parse documents on your own
+and place the results directly in vectors.
+Here we show you how.
+
+
+In the preceding examples,
+we used the to_tsvector() function
+when we needed a document's text reduced to a document vector.
+We saw that the function stripped whitespace and punctuation,
+eliminated common words,
+and altered suffixes to reduce words to a common form.
+While these operations are often desirable,
+and while in the sections below
+we will gain precise control over this process,
+there are occasions on which
+you want to avoid the changes that to_tsvector() makes to text
+and specify explicitly the words that you want in your vectors.
+Or you may want to create queries directly
+rather than through to_tsquery().
+
+For example,
+you may have already developed your own routine
+for reducing your documents to searchable lexemes,
+and do not want your carefully generated terms altered
+by passing them through to_tsvector().
+Or you might be developing and debugging parsing routines of your own
+that you are not ready to load into the database.
+In either case,
+you will find that direct insertion is easily accomplished
+if you simply follow some simple rules.
+
+Vectors are created directly
+when you cast a string of whitespace separated lexemes
+to the tsvector type:
+
+
+
+=# select 'the only exit is the way you came in'::tsvector
+                     tsvector                     
+--------------------------------------------------
+ 'in' 'is' 'the' 'way' 'you' 'came' 'exit' 'only'
+(1 row)
+
+
+
+Notice that the conversion interpreted the string
+simply as a list of lexemes to be included in the vector.
+Their order was lost,
+as was the number of times each lexeme appeared.
+You must keep in mind that directly creating vectors with casting
+is not an alternate means of parsing;
+it is a way of directly entering lexemes into a vector without parsing.
+
+Queries can also be created through casting,
+if you separate lexemes with boolean operators
+rather than with whitespace.
+When creating your own vectors and queries,
+remember that the search operator @@
+finds only exact matches between query lexemes and vector lexemes
+—
+if they are not exactly the same string,
+they will not be considered a match.
+
+To include lexeme positions in your vector,
+write the positions exactly the way tsearch2 displays them
+when it prints vectors:
+by following each lexeme with a colon
+and a comma-separated list of integer positions.
+If you list a lexeme more than once,
+then all the positions listed for it are combined into a single list.
+For example,
+here are two ways of writing the same vector,
+depending on whether you mention ‘the’ twice
+or combine its positions into a list yourself:
+
+
+
+=# select 'the:1 only:2 exit:3 is:4 the:5 way:6 you:7 came:8 in:9'::tsvector
+                              tsvector                              
+--------------------------------------------------------------------
+ 'in':9 'is':4 'the':1,5 'way':6 'you':7 'came':8 'exit':3 'only':2
+(1 row)
+=# select 'the:1,5 only:2 exit:3 is:4 way:6 you:7 came:8 in:9'::tsvector
+                              tsvector                              
+--------------------------------------------------------------------
+ 'in':9 'is':4 'the':1,5 'way':6 'you':7 'came':8 'exit':3 'only':2
+(1 row)
+
+
+
+Things can get slightly tricky
+if you want to include apostrophes, backslashes, or spaces
+inside your lexemes
+(wanting to include either of the latter would be unusual,
+but they can be included if you follow the rules).
+The main problem is that the apostrophe and backslash
+are important both to PostgreSQL when it is interpreting a string,
+and to the tsvector conversion function.
+You may want to review section
+1.1.2.1,
+“String Constants”
+in the PostgreSQL documentation before proceeding.
+
+When you cast strings directly into vectors:
+
+The string is interpreted as a whitespace-separated list of lexemes,
+ any of which can be suffixed with a colon and a list of positions.
+A lexeme can be quoted by preceding it with an apostrophe,
+ in which case it runs until the next apostrophe;
+ otherwise a lexeme ends with the first whitespace or colon encountered.
+Any character preceded by a backslash,
+ including whitespace, the apostrophe, the colon, and the backslash itself,
+ loses its normal meaning and is treated as a letter.
+ Backslashes are effective
+ both inside and outside of apostrophe-quoted lexemes.
+A lexeme can be suffixed with a list of positions
+ by appending a colon and a comma-separated list of integers,
+ each of which can itself be followed by a letter
+ to designate a position weight
+ (position weights are described below).
+
+
+Here are some example strings,
+showing the lexeme you want to insert
+together with the string that the ::tsvector operator
+needs to see,
+and how you would type that string at the PostgreSQL prompt:
+
+
+
+For the lexeme...
+you need the string...
+which you can type as:
+
+nugget
+nugget
+'nugget'
+
+won't
+won't
+'won''t'
+
+pinin'
+pinin'
+'pinin'''
+
+'bout
+\'bout
+'\\''bout'
+
+white mist
+white\ mist
+'white\\ mist'
+
+or:
+'white mist'
+'''white mist'''
+
+won't budge
+won\'t\ budge
+'won\\''t\\ budge'
+
+or:
+'won\'t budge'
+'''won\\''t budge'''
+
+back\slashed
+back\\slashed
+'back\\\\slashed'
+
+
+Remember to use the quoted quoting shown at the right
+only when typing in strings as part of a PostgreSQL query.
+If you are providing strings through a library
+that automatically quotes them
+or provides them in binary form to PostgreSQL,
+then you can use the strings in the middle instead —
+suitably quoted in the language you are using, of course.
+
+Position weights are described below
+and can be written exactly as they will be displayed
+when you select a weighted vector:
+
+
+=# select 'weighty:1,3A trivial:2B,4'::tsvector
+           tsvector            
+-------------------------------
+ 'trivial':2B,4 'weighty':1,3A
+(1 row)
+
+
+
+Note that if you are composing SQL queries
+in a scripting language like Perl or Python,
+that itself considers quotes and backslashes special,
+then you may have another quoting layer to deal with
+on top of the two layers already shown above.
+In such cases you may want to write a function
+that performs the necessary quoting for you.
+
+

+Having seen how to create vectors of your own,
+it is time to learn how the native tsearch2 parser
+reduces documents to vectors.
+
+
+Parsing and Lexing
+
+

+The previous section
+described how you can bypass the parser provided by tsearch2
+and populate your table of documents
+with vectors of your own devising.
+But for those interested in the native tsearch2 facilities,
+we present here an overview of how it goes about
+reducing documents to vectors.
+
+
+The to_tsvector() function reduces documents to vectors
+in two stages.
+First, a parser breaks the input document
+into short sequences of text called tokens.
+Each token is usually a word, space, or piece of punctuation,
+though some parsers return larger and more exotic items
+like HTML tags as single tokens.
+Each token returned by the parser
+is either discarded
+or passed to a dictionary that converts it into a lexeme.
+The resulting lexemes are collected into a vector and returned.
+
+The choice of which parser and dictionaries to_tsvector() should use
+is controlled by your choice of configuration.
+The tsearch2 module comes with several configurations,
+and you can define more of your own;
+in fact the creation of a new configuration is illustrated below,
+in the section on position weights.
+
+To learn about parsing in more detail,
+we will study this example:
+
+
+=# select to_tsvector('default',

+     'The walls extend upward for well over 100 feet.')
+                       to_tsvector                        
+----------------------------------------------------------
+ '100':8 'feet':9 'wall':2 'well':6 'extend':3 'upward':4
+(1 row)
+
+
+Unlike the to_tsvector() calls used in the above examples,
+this one specifies the 'default' configuration explicitly.
+When we called to_tsvector() in earlier examples
+with only one argument,
+it used the current configuration,
+which is chosen automatically based on your LOCALE
+if that locale is mentioned in the pg_ts_cfg table
+(which is shown under the first bullet in the description below).
+If your locale is not listed in the table,
+your attempts to use the current configuration will return:
+
+
+ERROR:  Can't find tsearch2 config by locale
+
+
+You can always change the current configuration manually
+by calling the set_curcfg() function
+described in the section on
+Configurations
+in the Reference.
+
+Each configuration serves as an index into two different tables:
+in pg_ts_cfg it determines
+which parser will break our text into tokens,
+and in pg_ts_cfgmap
+it directs each token to a dictionary for processing.
+The steps in detail are:
+
+
+
+First, our text is parsed,
+using the parser listed for our configuration in the pg_ts_cfg table.
+We are using the 'default' configuration,
+so the table tells us to use the 'default' parser:
+
+
+=# SELECT * FROM pg_ts_cfg WHERE ts_name = 'default'
+ ts_name | prs_name | locale 
+---------+----------+--------
+ default | default  | C
+(1 row)
+
+
+So our text will be parsed as though we had called:
+
+
+=# select * from parse('default',

+     'The walls extend upward for well over 100 feet.')
+
+
+This breaks the text into a list of tokens
+which are each labelled with an integer type:
+
+The₁♦_{12
+>walls₁♦_{12
+>extend₁♦_{12
+>upward₁♦_{12
+>for₁♦_{12
+>well₁♦_{12
+>over₁♦_{12
+>100₂₂♦_{12
+>feet₁.₁₂
+
+Each word has been assigned type 1;
+each space (represented here by a diamond) and the period, type 12;
+and the number one hundred, type 22.
+We can retrieve the alias for each type
+through the token_type function:
+
+
+=# select * from token_type('default')

+     where tokid = 1 or tokid = 12 or tokid = 22
+ tokid | alias |      descr       
+-------+-------+------------------
+     1 | lword | Latin word
+    12 | blank | Space symbols
+    22 | uint  | Unsigned integer
+(3 rows)
+
+
+
+
+Next, the tokens are assigned to dictionaries
+by looking up their type aliases in pg_ts_cfgmap
+to determine which dictionary should process each token.
+Since we are using the 'default' configuration:
+
+
+=# select * from pg_ts_cfgmap where ts_name = 'default' and

+      (tok_alias = 'lword' or tok_alias = 'blank' or tok_alias = 'uint')
+ ts_name | tok_alias | dict_name 
+---------+-----------+-----------
+ default | lword     | {en_stem}
+ default | uint      | {simple}
+(2 rows)
+
+
+Since this map provides no dictionary for blank tokens,
+the spaces and period are simply discarded,
+leaving nine tokens,
+which are then numbered by their position:
+
+The¹
+walls²
+extend³
+upward⁴
+for⁵
+well⁶
+over⁷
+100⁸
+feet⁹
+
+
+Finally, the words are reduced to lexemes by their respective dictionaries.
+The 100 is submitted to the simple dictionary,
+which returns tokens unaltered except for making them lowercase:
+
+
+=# select lexize('simple', '100')
+ lexize 
+--------
+ {100}
+(1 row)
+
+
+The other words are submitted to en_stem
+which reduces each English word to a linguistic stem,
+and then discards stems which belong to its list of stop words;
+you can see the list of stop words
+in the file whose path is in the dict_initoption field
+of the pg_ts_dict table entry for en_stem.
+The first three words of our text illustrate respectively
+an en_stem stop word,
+a word which en_stem alters by stemming,
+and a word which en_stem leaves alone:
+
+
+=# select lexize('en_stem', 'The')
+ lexize 
+--------
+ {}
+(1 row)
+=# select lexize('en_stem', 'walls')
+ lexize 
+--------
+ {wall}
+(1 row)
+=# select lexize('en_stem', 'extend')
+  lexize  
+----------
+ {extend}
+(1 row)
+
+
+Once en_stem is done discarding stop words and stemming the rest,
+we are left with:
+
+wall²
+extend³
+upward⁴
+well⁶
+100⁸
+feet⁹
+
+Which is precisely the result of the example that began this section.
+
+Query words are stemmed by the to_tsquery() function
+using the same scheme to determine the dictionary for each token,
+with the difference that the query parser recognizes as special
+the boolean operators that separate query words.
+
+
+
+
+}

diff --git a/contrib/tsearch2/docs/tsearch2-ref.html b/contrib/tsearch2/docs/tsearch2-ref.html

new file mode 100644 (file)

index 0000000..df0faa4


--- /dev/null
+++ b/contrib/tsearch2/docs/tsearch2-ref.html
@@ -0,0 +1,448 @@
+
+
+
+
+tsearch2 reference
+
+
+The tsearch2 Reference
+
+
+Brandon Craig Rhodes
30 June 2003
+
+This Reference documents the user types and functions
+of the tsearch2 module for PostgreSQL.
+An introduction to the module is provided
+by the tsearch2 Guide,
+a companion document to this one.
+You can retrieve a beta copy of the tsearch2 module from the
+GiST for PostgreSQL
+page — look under the section entitled Development History
+for the current version.
+
+Vectors and Queries
+
+Vectors and queries both store lexemes,
+but for different purposes.
+A tsvector stores the lexemes
+of the words that are parsed out of a document,
+and can also remember the position of each word.
+A tsquery specifies a boolean condition among lexemes.
+
+Any of the following functions with a configuration argument
+can use either an integer id or textual ts_name
+to select a configuration;
+if the option is omitted, then the current configuration is used.
+For more information on the current configuration,
+read the next section on Configurations.
+
+Vector Operations
+
+
+
+ to_tsvector( [configuration,]

+ document TEXT) RETURNS tsvector
+
+ Parses a document into tokens,
+ reduces the tokens to lexemes,
+ and returns a tsvector which lists the lexemes
+ together with their positions in the document.
+ For the best description of this process,
+ see the section on Parsing and Stemming
+ in the accompanying tsearch2 Guide.
+
+ strip(vector tsvector) RETURNS tsvector
+
+ Return a vector which lists the same lexemes
+ as the given vector,
+ but which lacks any information
+ about where in the document each lexeme appeared.
+ While the returned vector is thus useless for relevance ranking,
+ it will usually be much smaller.
+
+ setweight(vector tsvector, letter) RETURNS tsvector
+
+ This function returns a copy of the input vector
+ in which every location has been labelled
+ with either the letter
+ 'A', 'B', or 'C',
+ or the default label 'D'
+ (which is the default with which new vectors are created,
+ and as such is usually not displayed).
+ These labels are retained when vectors are concatenated,
+ allowing words from different parts of a document
+ to be weighted differently by ranking functions.
+
+ vector1 || vector2
+
+ concat(vector1 tsvector, vector2 tsvector)

+ RETURNS tsvector
+
+ Returns a vector which combines the lexemes and position information
+ in the two vectors given as arguments.
+ Position weight labels (described in the previous paragraph)
+ are retained intact during the concatenation.
+ This has at least two uses.
+ First,
+ if some sections of your document
+ need be parsed with different configurations than others,
+ you can parse them separately
+ and concatenate the resulting vectors into one.
+ Second,
+ you can weight words from some sections of you document
+ more heavily than those from others by:
+ parsing the sections into separate vectors;
+ assigning the vectors different position labels
+ with the setweight() function;
+ concatenating them into a single vector;
+ and then providing a weights argument
+ to the rank() function
+ that assigns different weights to positions with different labels.
+
+ tsvector_size(vector tsvector) RETURNS INT4
+
+ Returns the number of lexemes stored in the vector.
+
+ text::tsvector RETURNS tsvector
+
+ Directly casting text to a tsvector
+ allows you to directly inject lexemes into a vector,
+ with whatever positions and position weights you choose to specify.
+ The text should be formatted
+ like the vector would be printed by the output of a SELECT.
+ See the Casting
+ section in the Guide for details.
+
+
+Query Operations
+
+
+
+ to_tsquery( [configuration,]

+ querytext text) RETURNS tsvector
+
+ Parses a query,
+ which should be single words separated by the boolean operators
+ “&” and,
+ “|” or,
+ and “!” not,
+ which can be grouped using parenthesis.
+ Each word is reduced to a lexeme using the current
+ or specified configuration.
+
+
+ querytree(query tsquery) RETURNS text
+
+ This might return a textual representation of the given query.
+
+ text::tsquery RETURNS tsquery
+
+ Directly casting text to a tsquery
+ allows you to directly inject lexemes into a query,
+ with whatever positions and position weight flags you choose to specify.
+ The text should be formatted
+ like the query would be printed by the output of a SELECT.
+ See the Casting
+ section in the Guide for details.
+
+
+Configurations
+
+A configuration specifies all of the equipment necessary
+to transform a document into a tsvector:
+the parser that breaks its text into tokens,
+and the dictionaries which then transform each token into a lexeme.
+Every call to to_tsvector() (described above)
+uses a configuration to perform its processing.
+Three configurations come with tsearch2:
+
+
+default — Indexes words and numbers,
+ using the en_stem English Snowball stemmer for Latin-alphabet words
+ and the simple dictionary for all others.
+default_russian — Indexes words and numbers,
+ using the en_stem English Snowball stemmer for Latin-alphabet words
+ and the ru_stem Russian Snowball dictionary for all others.
+simple — Processes both words and numbers
+ with the simple dictionary,
+ which neither discards any stop words nor alters them.
+
+
+The tsearch2 modules initially chooses your current configuration
+by looking for your current locale in the locale field
+of the pg_ts_cfg table described below.
+You can manipulate the current configuration yourself with these functions:
+
+
+
+ set_curcfg( id INT | ts_name TEXT

+  ) RETURNS VOID
+
+ Set the current configuration used by to_tsvector
+ and to_tsquery.
+
+ show_curcfg() RETURNS INT4
+
+ Returns the integer id of the current configuration.
+
+
+
+Each configuration is defined by a record in the pg_ts_cfg table:
+
+create table pg_ts_cfg (
+   id      int not  null primary key,
+   ts_name     text not null,
+   prs_name    text not null,
+   locale      text
+);
+
+The id and ts_name are unique values
+which identify the configuration;
+the prs_name specifies which parser the configuration uses.
+Once this parser has split document text into tokens,
+the type of each resulting token —
+or, more specifically, the type's lex_alias
+as specified in the parser's lexem_type() table —
+is searched for together with the configuration's ts_name
+in the pg_ts_cfgmap table:
+
+create table pg_ts_cfgmap (
+   ts_name     text not null,
+   lex_alias   text not null,
+   dict_name   text[],
+   primary key (ts_name,lex_alias)
+);
+
+Those tokens whose types are not listed are discarded.
+The remaining tokens are assigned integer positions,
+starting with 1 for the first token in the document,
+and turned into lexemes with the help of the dictionaries
+whose names are given in the dict_name array for their type.
+These dictionaries are tried in order,
+stopping either with the first one to return a lexeme for the token,
+or discarding the token if no dictionary returns a lexeme for it.
+
+Parsers
+
+Each parser is defined by a record in the pg_ts_parser table:
+
+create table pg_ts_parser (
+   prs_id      int not null primary key,
+   prs_name    text not null,
+   prs_start   oid not null,
+   prs_getlexem    oid not null,
+   prs_end     oid not null,
+   prs_headline    oid not null,
+   prs_lextype oid not null,
+   prs_comment text
+);
+
+The prs_id and prs_name uniquely identify the parser,
+while prs_comment usually describes its name and version
+for the reference of users.
+The other items identify the low-level functions
+which make the parser operate,
+and are only of interest to someone writing a parser of their own.
+
+The tsearch2 module comes with one parser named default
+which is suitable for parsing most plain text and HTML documents.
+
+Each parser argument below
+must designate a parser with either an integer prs_id
+or a textual prs_name;
+the current parser is used when this argument is omitted.
+
+
+
+ CREATE FUNCTION set_curprs(parser) RETURNS VOID
+
+ Selects a current parser
+ which will be used when any of the following functions
+ are called without a parser as an argument.
+
+ CREATE FUNCTION lexem_type(

+  [ parser ]
+  ) RETURNS SETOF lexemtype
+
+ Returns a table which defines and describes
+ each kind of token the parser may produce as output.
+ For each token type the table gives the lexid
+ which the parser will label each token of that type,
+ the alias which names the token type,
+ and a short description descr for the user to read.
+
+ CREATE FUNCTION parse(

+  [ parser, ] document TEXT
+  ) RETURNS SETOF lexemtype
+
+ Parses the given document and returns a series of records,
+ one for each token produced by parsing.
+ Each token includes a lexid giving its type
+ and a lexem which gives its content.
+
+
+Dictionaries
+
+Dictionaries take textual tokens as input,
+usually those produced by a parser,
+and return lexemes which are usually some reduced form of the token.
+Among the dictionaries which come installed with tsearch2 are:
+
+
+simple simply folds uppercase letters to lowercase
+ before returning the word.
+en_stem runs an English Snowball stemmer on each word
+ that attempts to reduce the various forms of a verb or noun
+ to a single recognizable form.
+ru_stem runs a Russian Snowball stemmer on each word.
+
+
+Each dictionary is defined by an entry in the pg_ts_dict table:
+
+CREATE TABLE pg_ts_dict (
+   dict_id     int not null primary key,
+   dict_name   text not null,
+   dict_init   oid,
+   dict_initoption text,
+   dict_lemmatize  oid not null,
+   dict_comment    text
+);
+
+The dict_id and dict_name
+serve as unique identifiers for the dictionary.
+The meaning of the dict_initoption varies among dictionaries,
+but for the built-in Snowball dictionaries
+it specifies a file from which stop words should be read.
+The dict_comment is a human-readable description of the dictionary.
+The other fields are internal function identifiers
+useful only to developers trying to implement their own dictionaries.
+
+The argument named dictionary
+in each of the following functions
+should be either an integer dict_id or a textual dict_name
+identifying which dictionary should be used for the operation;
+if omitted then the current dictionary is used.
+
+
+
+ CREATE FUNCTION set_curdict(dictionary) RETURNS VOID
+
+ Selects a current dictionary for use by functions
+ that do not select a dictionary explicitly.
+
+ CREATE FUNCTION lexize(

+ [ dictionary, ] word text)
+ RETURNS TEXT[]
+
+ Reduces a single word to a lexeme.
+ Note that lexemes are arrays of zero or more strings,
+ since in some languages there might be several base words
+ from which an inflected form could arise.
+
+
+Ranking
+
+Ranking attempts to measure how relevant documents are to particular queries
+by inspecting the number of times each search word appears in the document,
+and whether different search terms occur near each other.
+Note that this information is only available in unstripped vectors —
+ranking functions will only return a useful result
+for a tsvector which still has position information!
+
+Both of these ranking functions
+take an integer normalization option
+that specifies whether a document's length should impact its rank.
+This is often desirable,
+since a hundred-word document with five instances of a search word
+is probably more relevant than a thousand-word document with five instances.
+The option can have the values:
+
+
+0 (the default) ignores document length.
+1 divides the rank by the logarithm of the length.
+2 divides the rank by the length itself.
+
+
+The two ranking functions currently available are:
+
+
+
+ CREATE FUNCTION rank(

+  [ weights float4[], ]
+  vector tsvector, query tsquery,
+  [ normalization int4 ]

+  ) RETURNS float4
+
+ This is the ranking function from the old version of OpenFTS,
+ and offers the ability to weight word instances more heavily
+ depending on how you have classified them.
+ The weights specify how heavily to weight each category of word:
+ 
+>{D-weight, A-weight, B-weight, C-weight}
+ If no weights are provided, then these defaults are used:
+ {0.1, 0.2, 0.4, 1.0}
+ Often weights are used to mark words from special areas of the document,
+ like the title or an initial abstract,
+ and make them more or less important than words in the document body.
+
+ CREATE FUNCTION rank_cd(

+  [ K int4, ]
+  vector tsvector, query tsquery,
+  [ normalization int4 ]

+  ) RETURNS float4
+
+ This function computes the cover density ranking
+ for the given document vector and query,
+ as described in Clarke, Cormack, and Tudhope's
+ “
+>Relevance Ranking for One to Three Term Queries”
+ in the 1999 Information Processing and Management.
+ The value K is one of the values from their formula,
+ and defaults to K=4.
+ The examples in their paper K=16;
+ we can roughly describe the term
+ as stating how far apart two search terms can fall
+ before the formula begins penalizing them for lack of proximity.
+
+
+Headlines
+
+
+
+ CREATE FUNCTION headline(

+  [ id int4, | ts_name text, ]
+  document text, query tsquery,
+  [ options text ]

+  ) RETURNS text
+
+ Every form of the the headline() function
+ accepts a document along with a query,
+ and returns one or more ellipse-separated excerpts from the document
+ in which terms from the query are highlighted.
+ The configuration with which to parse the document
+ can be specified by either its id or ts_name;
+ if none is specified that the current configuration is used instead.
+ 
+ An options string if provided should be a comma-separated list
+ of one or more ‘option=value’ pairs.
+ The available options are:
+ 
+  StartSel, StopSel —
+   the strings with which query words appearing in the document
+   should be delimited to distinguish them from other excerpted words.
+  MaxWords, MinWords —
+   limits on the shortest and longest headlines you will accept.
+  ShortWord —
+   this prevents your headline from beginning or ending
+   with a word which has this many characters or less.
+   The default value of 3 should eliminate most English
+   conjunctions and articles.
+ 
+ Any unspecified options receive these defaults:
+ 
+StartSel=<b>, StopSel=</b>, MaxWords=35, MinWords=15, ShortWord=3
+ 
+
+
+
+


diff --git a/contrib/tsearch2/expected/tsearch2.out b/contrib/tsearch2/expected/tsearch2.out

new file mode 100644 (file)

index 0000000..a842c5b


--- /dev/null
+++ b/contrib/tsearch2/expected/tsearch2.out
@@ -0,0 +1,2055 @@
+--
+-- first, define the datatype.  Turn off echoing so that expected file
+-- does not depend on contents of seg.sql.
+--
+\set ECHO none
+psql:tsearch2.sql:13: NOTICE:  CREATE TABLE / PRIMARY KEY will create implicit index 'pg_ts_dict_pkey' for table 'pg_ts_dict'
+psql:tsearch2.sql:145: NOTICE:  CREATE TABLE / PRIMARY KEY will create implicit index 'pg_ts_parser_pkey' for table 'pg_ts_parser'
+psql:tsearch2.sql:244: NOTICE:  CREATE TABLE / PRIMARY KEY will create implicit index 'pg_ts_cfg_pkey' for table 'pg_ts_cfg'
+psql:tsearch2.sql:251: NOTICE:  CREATE TABLE / PRIMARY KEY will create implicit index 'pg_ts_cfgmap_pkey' for table 'pg_ts_cfgmap'
+psql:tsearch2.sql:339: NOTICE:  ProcedureCreate: type tsvector is not yet defined
+psql:tsearch2.sql:344: NOTICE:  Argument type "tsvector" is only a shell
+psql:tsearch2.sql:398: NOTICE:  ProcedureCreate: type tsquery is not yet defined
+psql:tsearch2.sql:403: NOTICE:  Argument type "tsquery" is only a shell
+psql:tsearch2.sql:545: NOTICE:  ProcedureCreate: type gtsvector is not yet defined
+psql:tsearch2.sql:550: NOTICE:  Argument type "gtsvector" is only a shell
+--tsvector
+SELECT '1'::tsvector;
+ tsvector 
+----------
+ '1'
+(1 row)
+
+SELECT '1 '::tsvector;
+ tsvector 
+----------
+ '1'
+(1 row)
+
+SELECT ' 1'::tsvector;
+ tsvector 
+----------
+ '1'
+(1 row)
+
+SELECT ' 1 '::tsvector;
+ tsvector 
+----------
+ '1'
+(1 row)
+
+SELECT '1 2'::tsvector;
+ tsvector 
+----------
+ '1' '2'
+(1 row)
+
+SELECT '\'1 2\''::tsvector;
+ tsvector 
+----------
+ '1 2'
+(1 row)
+
+SELECT '\'1 \\\'2\''::tsvector;
+ tsvector 
+----------
+ '1 \'2'
+(1 row)
+
+SELECT '\'1 \\\'2\'3'::tsvector;
+  tsvector   
+-------------
+ '3' '1 \'2'
+(1 row)
+
+SELECT '\'1 \\\'2\' 3'::tsvector;
+  tsvector   
+-------------
+ '3' '1 \'2'
+(1 row)
+
+SELECT '\'1 \\\'2\' \' 3\' 4 '::tsvector;
+     tsvector     
+------------------
+ '4' ' 3' '1 \'2'
+(1 row)
+
+select '\'w\':4A,3B,2C,1D,5 a:8';
+       ?column?        
+-----------------------
+ 'w':4A,3B,2C,1D,5 a:8
+(1 row)
+
+select 'a:3A b:2a'::tsvector || 'ba:1234 a:1B';
+          ?column?          
+----------------------------
+ 'a':3A,4B 'b':2A 'ba':1237
+(1 row)
+
+select setweight('w:12B w:13* w:12,5,6 a:1,3* a:3 w asd:1dc asd zxc:81,567,222A'::tsvector, 'c');
+                        setweight                         
+----------------------------------------------------------
+ 'a':1C,3C 'w':5C,6C,12C,13C 'asd':1C 'zxc':81C,222C,567C
+(1 row)
+
+select strip('w:12B w:13* w:12,5,6 a:1,3* a:3 w asd:1dc asd'::tsvector);
+     strip     
+---------------
+ 'a' 'w' 'asd'
+(1 row)
+
+--tsquery
+SELECT '1'::tsquery;
+ tsquery 
+---------
+ '1'
+(1 row)
+
+SELECT '1 '::tsquery;
+ tsquery 
+---------
+ '1'
+(1 row)
+
+SELECT ' 1'::tsquery;
+ tsquery 
+---------
+ '1'
+(1 row)
+
+SELECT ' 1 '::tsquery;
+ tsquery 
+---------
+ '1'
+(1 row)
+
+SELECT '\'1 2\''::tsquery;
+ tsquery 
+---------
+ '1 2'
+(1 row)
+
+SELECT '\'1 \\\'2\''::tsquery;
+ tsquery 
+---------
+ '1 \'2'
+(1 row)
+
+SELECT '!1'::tsquery;
+ tsquery 
+---------
+ !'1'
+(1 row)
+
+SELECT '1|2'::tsquery;
+  tsquery  
+-----------
+ '1' | '2'
+(1 row)
+
+SELECT '1|!2'::tsquery;
+  tsquery   
+------------
+ '1' | !'2'
+(1 row)
+
+SELECT '!1|2'::tsquery;
+  tsquery   
+------------
+ !'1' | '2'
+(1 row)
+
+SELECT '!1|!2'::tsquery;
+   tsquery   
+-------------
+ !'1' | !'2'
+(1 row)
+
+SELECT '!(!1|!2)'::tsquery;
+     tsquery      
+------------------
+ !( !'1' | !'2' )
+(1 row)
+
+SELECT '!(!1|2)'::tsquery;
+     tsquery     
+-----------------
+ !( !'1' | '2' )
+(1 row)
+
+SELECT '!(1|!2)'::tsquery;
+     tsquery     
+-----------------
+ !( '1' | !'2' )
+(1 row)
+
+SELECT '!(1|2)'::tsquery;
+    tsquery     
+----------------
+ !( '1' | '2' )
+(1 row)
+
+SELECT '1&2'::tsquery;
+  tsquery  
+-----------
+ '1' & '2'
+(1 row)
+
+SELECT '!1&2'::tsquery;
+  tsquery   
+------------
+ !'1' & '2'
+(1 row)
+
+SELECT '1&!2'::tsquery;
+  tsquery   
+------------
+ '1' & !'2'
+(1 row)
+
+SELECT '!1&!2'::tsquery;
+   tsquery   
+-------------
+ !'1' & !'2'
+(1 row)
+
+SELECT '(1&2)'::tsquery;
+  tsquery  
+-----------
+ '1' & '2'
+(1 row)
+
+SELECT '1&(2)'::tsquery;
+  tsquery  
+-----------
+ '1' & '2'
+(1 row)
+
+SELECT '!(1)&2'::tsquery;
+  tsquery   
+------------
+ !'1' & '2'
+(1 row)
+
+SELECT '!(1&2)'::tsquery;
+    tsquery     
+----------------
+ !( '1' & '2' )
+(1 row)
+
+SELECT '1|2&3'::tsquery;
+     tsquery     
+-----------------
+ '1' | '2' & '3'
+(1 row)
+
+SELECT '1|(2&3)'::tsquery;
+     tsquery     
+-----------------
+ '1' | '2' & '3'
+(1 row)
+
+SELECT '(1|2)&3'::tsquery;
+       tsquery       
+---------------------
+ ( '1' | '2' ) & '3'
+(1 row)
+
+SELECT '1|2&!3'::tsquery;
+     tsquery      
+------------------
+ '1' | '2' & !'3'
+(1 row)
+
+SELECT '1|!2&3'::tsquery;
+     tsquery      
+------------------
+ '1' | !'2' & '3'
+(1 row)
+
+SELECT '!1|2&3'::tsquery;
+     tsquery      
+------------------
+ !'1' | '2' & '3'
+(1 row)
+
+SELECT '!1|(2&3)'::tsquery;
+     tsquery      
+------------------
+ !'1' | '2' & '3'
+(1 row)
+
+SELECT '!(1|2)&3'::tsquery;
+       tsquery        
+----------------------
+ !( '1' | '2' ) & '3'
+(1 row)
+
+SELECT '(!1|2)&3'::tsquery;
+       tsquery        
+----------------------
+ ( !'1' | '2' ) & '3'
+(1 row)
+
+SELECT '1|(2|(4|(5|6)))'::tsquery;
+                 tsquery                 
+-----------------------------------------
+ '1' | ( '2' | ( '4' | ( '5' | '6' ) ) )
+(1 row)
+
+SELECT '1|2|4|5|6'::tsquery;
+                 tsquery                 
+-----------------------------------------
+ ( ( ( '1' | '2' ) | '4' ) | '5' ) | '6'
+(1 row)
+
+SELECT '1&(2&(4&(5&6)))'::tsquery;
+           tsquery           
+-----------------------------
+ '1' & '2' & '4' & '5' & '6'
+(1 row)
+
+SELECT '1&2&4&5&6'::tsquery;
+           tsquery           
+-----------------------------
+ '1' & '2' & '4' & '5' & '6'
+(1 row)
+
+SELECT '1&(2&(4&(5|6)))'::tsquery;
+             tsquery             
+---------------------------------
+ '1' & '2' & '4' & ( '5' | '6' )
+(1 row)
+
+SELECT '1&(2&(4&(5|!6)))'::tsquery;
+             tsquery              
+----------------------------------
+ '1' & '2' & '4' & ( '5' | !'6' )
+(1 row)
+
+SELECT '1&(\'2\'&(\' 4\'&(\\|5 | \'6 \\\' !|&\')))'::tsquery;
+                 tsquery                  
+------------------------------------------
+ '1' & '2' & ' 4' & ( '|5' | '6 \' !|&' )
+(1 row)
+
+SELECT '\'the wether\':dc & \' sKies \':BC & a:d b:a';
+                 ?column?                 
+------------------------------------------
+ 'the wether':dc & ' sKies ':BC & a:d b:a
+(1 row)
+
+select lexize('simple', 'ASD56 hsdkf');
+     lexize      
+-----------------
+ {"asd56 hsdkf"}
+(1 row)
+
+select lexize('en_stem', 'SKIES Problems identity');
+          lexize          
+--------------------------
+ {"skies problems ident"}
+(1 row)
+
+select * from token_type('default');
+ tokid |    alias     |               descr               
+-------+--------------+-----------------------------------
+     1 | lword        | Latin word
+     2 | nlword       | Non-latin word
+     3 | word         | Word
+     4 | email        | Email
+     5 | url          | URL
+     6 | host         | Host
+     7 | sfloat       | Scientific notation
+     8 | version      | VERSION
+     9 | part_hword   | Part of hyphenated word
+    10 | nlpart_hword | Non-latin part of hyphenated word
+    11 | lpart_hword  | Latin part of hyphenated word
+    12 | blank        | Space symbols
+    13 | tag          | HTML Tag
+    14 | http         | HTTP head
+    15 | hword        | Hyphenated word
+    16 | lhword       | Latin hyphenated word
+    17 | nlhword      | Non-latin hyphenated word
+    18 | uri          | URI
+    19 | file         | File or path name
+    20 | float        | Decimal notation
+    21 | int          | Signed integer
+    22 | uint         | Unsigned integer
+    23 | entity       | HTML Entity
+(23 rows)
+
+select * from parse('default', '345 [email protected] \' http://www.com/ http://aew.werc.ewr/?ad=qwe&dw 1aew.werc.ewr/?ad=qwe&dw 2aew.werc.ewr http://3aew.werc.ewr/?ad=qwe&dw http://4aew.werc.ewr http://5aew.werc.ewr:8100/?  ad=qwe&dw 6aew.werc.ewr:8100/?ad=qwe&dw 7aew.werc.ewr:8100/?ad=qwe&dw=%20%32 +4.0e-10 qwe qwe qwqwe 234.435 455 5.005 [email protected] qwe-wer asdf qwer jf sdjk ewr1> ewri2 ">
+/usr/local/fff /awdf/dwqe/4325 rewt/ewr wefjn /wqe-324/ewr gist.h gist.h.c gist.c. readline 4.2 4.2. 4.2, readline-4.2 readline-4.2. 234 
+ wow  < jqw <> qwerty');
+ tokid |                token                 
+-------+--------------------------------------
+    22 | 345
+    12 |  
+     4 | [email protected]
+    12 |  
+    12 | '
+    12 |  
+    14 | http://
+     6 | www.com
+    12 | /
+    12 |  
+    14 | http://
+     5 | aew.werc.ewr/?ad=qwe&dw
+     6 | aew.werc.ewr
+    18 | /?ad=qwe&dw
+    12 |  
+     5 | 1aew.werc.ewr/?ad=qwe&dw
+     6 | 1aew.werc.ewr
+    18 | /?ad=qwe&dw
+    12 |  
+     6 | 2aew.werc.ewr
+    12 |  
+    14 | http://
+     5 | 3aew.werc.ewr/?ad=qwe&dw
+     6 | 3aew.werc.ewr
+    18 | /?ad=qwe&dw
+    12 |  
+    14 | http://
+     6 | 4aew.werc.ewr
+    12 |  
+    14 | http://
+     5 | 5aew.werc.ewr:8100/?
+     6 | 5aew.werc.ewr
+    18 | :8100/?
+    12 |   
+     1 | ad
+    12 | =
+     1 | qwe
+    12 | &
+     1 | dw
+    12 |  
+     5 | 6aew.werc.ewr:8100/?ad=qwe&dw
+     6 | 6aew.werc.ewr
+    18 | :8100/?ad=qwe&dw
+    12 |  
+     5 | 7aew.werc.ewr:8100/?ad=qwe&dw=%20%32
+     6 | 7aew.werc.ewr
+    18 | :8100/?ad=qwe&dw=%20%32
+    12 |  
+     7 | +4.0e-10
+    12 |  
+     1 | qwe
+    12 |  
+     1 | qwe
+    12 |  
+     1 | qwqwe
+    12 |  
+    20 | 234.435
+    12 |  
+    22 | 455
+    12 |  
+    20 | 5.005
+    12 |  
+     4 | [email protected]
+    12 |  
+    16 | qwe-wer
+    11 | qwe
+    12 | -
+    11 | wer
+    12 |  
+     1 | asdf
+    12 |  
+    13 |  
+     1 | qwer
+    12 |  
+     1 | jf
+    12 |  
+     1 | sdjk
+    13 |  
+    12 |  
+     3 | ewr1
+    12 | >
+    12 |  
+     3 | ewri2
+    12 |  
+    13 |  
+    12 | 
+
+    19 | /usr/local/fff
+    12 |  
+    19 | /awdf/dwqe/4325
+    12 |  
+    19 | rewt/ewr
+    12 |  
+     1 | wefjn
+    12 |  
+    19 | /wqe-324/ewr
+    12 |  
+     6 | gist.h
+    12 |  
+     6 | gist.h.c
+    12 |  
+     6 | gist.c
+    12 | .
+    12 |  
+     1 | readline
+    12 |  
+    20 | 4.2
+    12 |  
+    20 | 4.2
+    12 | .
+    12 |  
+    20 | 4.2
+    12 | ,
+    12 |  
+    15 | readline-4
+    11 | readline
+    12 | -
+    20 | 4.2
+    12 |  
+    15 | readline-4
+    11 | readline
+    12 | -
+    20 | 4.2
+    12 | .
+    12 |  
+    22 | 234
+    12 |  
+
+    13 |  
+    12 |  
+     1 | wow
+    12 |   
+    12 | <
+    12 |  
+     1 | jqw
+    12 |  
+    12 | <
+    12 | >
+    12 |  
+     1 | qwerty
+(138 rows)
+
+SELECT to_tsvector('default', '345 [email protected] \' http://www.com/ http://aew.werc.ewr/?ad=qwe&dw 1aew.werc.ewr/?ad=qwe&dw 2aew.werc.ewr http://3aew.werc.ewr/?ad=qwe&dw http://4aew.werc.ewr http://5aew.werc.ewr:8100/?  ad=qwe&dw 6aew.werc.ewr:8100/?ad=qwe&dw 7aew.werc.ewr:8100/?ad=qwe&dw=%20%32 +4.0e-10 qwe qwe qwqwe 234.435 455 5.005 [email protected] qwe-wer asdf qwer jf sdjk ewr1> ewri2 ">
+/usr/local/fff /awdf/dwqe/4325 rewt/ewr wefjn /wqe-324/ewr gist.h gist.h.c gist.c. readline 4.2 4.2. 4.2, readline-4.2 readline-4.2. 234 
+ wow  < jqw <> qwerty');
+                                                                                                                                                                                                                                                                                                                                                                                                                                               to_tsvector                                                                                                                                                                                                                                                                                                                                                                                                                                                
+----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
+ 'ad':18 'dw':20 'jf':40 '234':62 '345':1 '4.2':53,54,55,58,61 '455':32 'jqw':64 'qwe':19,28,29,36 'wer':37 'wow':63 'asdf':38 'ewr1':42 'qwer':39 'sdjk':41 '5.005':33 'ewri2':43 'qwqwe':30 'wefjn':47 'gist.c':51 'gist.h':49 'qwerti':65 '234.435':31 ':8100/?':17 'qwe-wer':35 'readlin':52,57,60 'www.com':3 '+4.0e-10':27 'gist.h.c':50 'rewt/ewr':46 '[email protected]':2 'readline-4':56,59 '/?ad=qwe&dw':6,9,13 '/wqe-324/ewr':48 'aew.werc.ewr':5 '1aew.werc.ewr':8 '2aew.werc.ewr':10 '3aew.werc.ewr':12 '4aew.werc.ewr':14 '5aew.werc.ewr':16 '6aew.werc.ewr':22 '7aew.werc.ewr':25 '/usr/local/fff':44 '/awdf/dwqe/4325':45 ':8100/?ad=qwe&dw':23 '[email protected]':34 '5aew.werc.ewr:8100/?':15 ':8100/?ad=qwe&dw=%20%32':26 'aew.werc.ewr/?ad=qwe&dw':4 '1aew.werc.ewr/?ad=qwe&dw':7 '3aew.werc.ewr/?ad=qwe&dw':11 '6aew.werc.ewr:8100/?ad=qwe&dw':21 '7aew.werc.ewr:8100/?ad=qwe&dw=%20%32':24
+(1 row)
+
+SELECT length(to_tsvector('default', '345 qw'));
+ length 
+--------
+      2
+(1 row)
+
+SELECT length(to_tsvector('default', '345 [email protected] \' http://www.com/ http://aew.werc.ewr/?ad=qwe&dw 1aew.werc.ewr/?ad=qwe&dw 2aew.werc.ewr http://3aew.werc.ewr/?ad=qwe&dw http://4aew.werc.ewr http://5aew.werc.ewr:8100/?  ad=qwe&dw 6aew.werc.ewr:8100/?ad=qwe&dw 7aew.werc.ewr:8100/?ad=qwe&dw=%20%32 +4.0e-10 qwe qwe qwqwe 234.435 455 5.005 [email protected] qwe-wer asdf qwer jf sdjk ewr1> ewri2 ">
+/usr/local/fff /awdf/dwqe/4325 rewt/ewr wefjn /wqe-324/ewr gist.h gist.h.c gist.c. readline 4.2 4.2. 4.2, readline-4.2 readline-4.2. 234 
+ wow  < jqw <> qwerty'));
+ length 
+--------
+     53
+(1 row)
+
+select to_tsquery('default', 'qwe & sKies '); 
+  to_tsquery   
+---------------
+ 'qwe' & 'sky'
+(1 row)
+
+select to_tsquery('simple', 'qwe & sKies '); 
+   to_tsquery    
+-----------------
+ 'qwe' & 'skies'
+(1 row)
+
+select to_tsquery('default', '\'the wether\':dc & \'           sKies \':BC ');
+       to_tsquery       
+------------------------
+ 'wether':CD & 'sky':BC
+(1 row)
+
+select 'a b:89  ca:23A,64b d:34c'::tsvector @@ 'd:AC & ca';
+ ?column? 
+----------
+ t
+(1 row)
+
+select 'a b:89  ca:23A,64b d:34c'::tsvector @@ 'd:AC & ca:B';
+ ?column? 
+----------
+ t
+(1 row)
+
+select 'a b:89  ca:23A,64b d:34c'::tsvector @@ 'd:AC & ca:A';
+ ?column? 
+----------
+ t
+(1 row)
+
+select 'a b:89  ca:23A,64b d:34c'::tsvector @@ 'd:AC & ca:C';
+ ?column? 
+----------
+ f
+(1 row)
+
+select 'a b:89  ca:23A,64b d:34c'::tsvector @@ 'd:AC & ca:CB';
+ ?column? 
+----------
+ t
+(1 row)
+
+CREATE TABLE test_tsvector( t text, a tsvector );
+\copy test_tsvector from 'data/test_tsearch.data'
+SELECT count(*) FROM test_tsvector WHERE a @@ 'wr|qh';
+ count 
+-------
+   158
+(1 row)
+
+SELECT count(*) FROM test_tsvector WHERE a @@ 'wr&qh';
+ count 
+-------
+    17
+(1 row)
+
+SELECT count(*) FROM test_tsvector WHERE a @@ 'eq&yt';
+ count 
+-------
+     6
+(1 row)
+
+SELECT count(*) FROM test_tsvector WHERE a @@ 'eq|yt';
+ count 
+-------
+    98
+(1 row)
+
+SELECT count(*) FROM test_tsvector WHERE a @@ '(eq&yt)|(wr&qh)';
+ count 
+-------
+    23
+(1 row)
+
+SELECT count(*) FROM test_tsvector WHERE a @@ '(eq|yt)&(wr|qh)';
+ count 
+-------
+    39
+(1 row)
+
+create index wowidx on test_tsvector using gist (a);
+set enable_seqscan=off;
+SELECT count(*) FROM test_tsvector WHERE a @@ 'wr|qh';
+ count 
+-------
+   158
+(1 row)
+
+SELECT count(*) FROM test_tsvector WHERE a @@ 'wr&qh';
+ count 
+-------
+    17
+(1 row)
+
+SELECT count(*) FROM test_tsvector WHERE a @@ 'eq&yt';
+ count 
+-------
+     6
+(1 row)
+
+SELECT count(*) FROM test_tsvector WHERE a @@ 'eq|yt';
+ count 
+-------
+    98
+(1 row)
+
+SELECT count(*) FROM test_tsvector WHERE a @@ '(eq&yt)|(wr&qh)';
+ count 
+-------
+    23
+(1 row)
+
+SELECT count(*) FROM test_tsvector WHERE a @@ '(eq|yt)&(wr|qh)';
+ count 
+-------
+    39
+(1 row)
+
+select set_curcfg('default');
+ set_curcfg 
+------------
+ 
+(1 row)
+
+CREATE TRIGGER tsvectorupdate
+BEFORE UPDATE OR INSERT ON test_tsvector
+FOR EACH ROW EXECUTE PROCEDURE tsearch2(a, t);
+SELECT count(*) FROM test_tsvector WHERE a @@ to_tsquery('345&qwerty');
+ count 
+-------
+     0
+(1 row)
+
+INSERT INTO test_tsvector (t) VALUES ('345 qwerty');
+SELECT count(*) FROM test_tsvector WHERE a @@ to_tsquery('345&qwerty');
+ count 
+-------
+     1
+(1 row)
+
+UPDATE test_tsvector SET t = null WHERE t = '345 qwerty';
+SELECT count(*) FROM test_tsvector WHERE a @@ to_tsquery('345&qwerty');
+ count 
+-------
+     0
+(1 row)
+
+drop trigger tsvectorupdate on test_tsvector;
+create function wow(text) returns text as 'select $1 || \' copyright\'; ' language sql;
+create trigger tsvectorupdate before update or insert on test_tsvector
+for each row execute procedure tsearch2(a, wow, t);
+insert into test_tsvector (t) values ('345 qwerty');
+select count(*) FROM test_tsvector WHERE a @@ to_tsquery('345&qwerty');
+ count 
+-------
+     1
+(1 row)
+
+select count(*) FROM test_tsvector WHERE a @@ to_tsquery('copyright');
+ count 
+-------
+     1
+(1 row)
+
+select rank(' a:1 s:2C d g'::tsvector, 'a | s');
+ rank 
+------
+ 0.28
+(1 row)
+
+select rank(' a:1 s:2B d g'::tsvector, 'a | s');
+ rank 
+------
+ 0.46
+(1 row)
+
+select rank(' a:1 s:2 d g'::tsvector, 'a | s');
+ rank 
+------
+ 0.19
+(1 row)
+
+select rank(' a:1 s:2C d g'::tsvector, 'a & s');
+   rank   
+----------
+ 0.140153
+(1 row)
+
+select rank(' a:1 s:2B d g'::tsvector, 'a & s');
+   rank   
+----------
+ 0.198206
+(1 row)
+
+select rank(' a:1 s:2 d g'::tsvector, 'a & s');
+   rank    
+-----------
+ 0.0991032
+(1 row)
+
+insert into test_tsvector (t) values ('foo bar foo the over foo qq bar');
+select * from stat('select a from test_tsvector') order by ndoc desc, nentry desc, word;
+   word    | ndoc | nentry 
+-----------+------+--------
+ qq        |  109 |    109
+ qt        |  102 |    102
+ qe        |  100 |    100
+ qh        |   98 |     98
+ qw        |   98 |     98
+ qa        |   97 |     97
+ ql        |   94 |     94
+ qs        |   94 |     94
+ qi        |   92 |     92
+ qr        |   92 |     92
+ qj        |   91 |     91
+ qd        |   87 |     87
+ qz        |   87 |     87
+ qc        |   86 |     86
+ qn        |   86 |     86
+ qv        |   85 |     85
+ qo        |   84 |     84
+ qy        |   84 |     84
+ wp        |   84 |     84
+ qf        |   81 |     81
+ qk        |   80 |     80
+ wt        |   80 |     80
+ qu        |   79 |     79
+ qg        |   78 |     78
+ wb        |   78 |     78
+ qx        |   77 |     77
+ wr        |   77 |     77
+ ws        |   73 |     73
+ wy        |   73 |     73
+ wa        |   72 |     72
+ wf        |   70 |     70
+ wg        |   70 |     70
+ wi        |   70 |     70
+ wu        |   70 |     70
+ wc        |   69 |     69
+ wj        |   69 |     69
+ qp        |   68 |     68
+ wh        |   68 |     68
+ wv        |   68 |     68
+ qb        |   66 |     66
+ eu        |   65 |     65
+ we        |   65 |     65
+ wl        |   65 |     65
+ wq        |   65 |     65
+ wk        |   64 |     64
+ ee        |   63 |     63
+ eo        |   63 |     63
+ qm        |   63 |     63
+ wn        |   63 |     63
+ ef        |   62 |     62
+ eh        |   62 |     62
+ ex        |   62 |     62
+ re        |   62 |     62
+ rl        |   62 |     62
+ rr        |   62 |     62
+ eb        |   61 |     61
+ ek        |   61 |     61
+ ww        |   61 |     61
+ ea        |   60 |     60
+ ei        |   60 |     60
+ em        |   60 |     60
+ eq        |   60 |     60
+ ew        |   60 |     60
+ ro        |   60 |     60
+ rw        |   60 |     60
+ tl        |   60 |     60
+ eg        |   59 |     59
+ en        |   59 |     59
+ ez        |   59 |     59
+ rj        |   59 |     59
+ ry        |   59 |     59
+ tw        |   59 |     59
+ tx        |   59 |     59
+ ej        |   58 |     58
+ es        |   58 |     58
+ ra        |   58 |     58
+ rd        |   58 |     58
+ rg        |   58 |     58
+ rx        |   58 |     58
+ tb        |   58 |     58
+ wd        |   58 |     58
+ ed        |   57 |     57
+ tc        |   57 |     57
+ wx        |   57 |     57
+ er        |   56 |     56
+ wm        |   56 |     56
+ wo        |   56 |     56
+ yw        |   56 |     56
+ ep        |   55 |     55
+ rk        |   55 |     55
+ rp        |   55 |     55
+ rz        |   55 |     55
+ ta        |   55 |     55
+ rq        |   54 |     54
+ yn        |   54 |     54
+ ec        |   53 |     53
+ el        |   53 |     53
+ ru        |   53 |     53
+ rv        |   53 |     53
+ tz        |   53 |     53
+ un        |   53 |     53
+ wz        |   53 |     53
+ ys        |   53 |     53
+ oe        |   52 |     52
+ tn        |   52 |     52
+ tq        |   52 |     52
+ ty        |   52 |     52
+ uq        |   52 |     52
+ yg        |   52 |     52
+ ym        |   52 |     52
+ oi        |   51 |     51
+ to        |   51 |     51
+ yi        |   51 |     51
+ pn        |   50 |     50
+ rb        |   50 |     50
+ ri        |   50 |     50
+ rn        |   50 |     50
+ ti        |   50 |     50
+ tv        |   50 |     50
+ um        |   50 |     50
+ ut        |   50 |     50
+ ya        |   50 |     50
+ et        |   49 |     49
+ ix        |   49 |     49
+ ox        |   49 |     49
+ q3        |   49 |     49
+ yf        |   49 |     49
+ yl        |   49 |     49
+ yo        |   49 |     49
+ yr        |   49 |     49
+ ev        |   48 |     48
+ ey        |   48 |     48
+ ot        |   48 |     48
+ rc        |   48 |     48
+ rm        |   48 |     48
+ th        |   48 |     48
+ uo        |   48 |     48
+ ia        |   47 |     47
+ q1        |   47 |     47
+ rh        |   47 |     47
+ yq        |   47 |     47
+ yz        |   47 |     47
+ av        |   46 |     46
+ im        |   46 |     46
+ os        |   46 |     46
+ tk        |   46 |     46
+ yy        |   46 |     46
+ ir        |   45 |     45
+ iv        |   45 |     45
+ iw        |   45 |     45
+ oj        |   45 |     45
+ pl        |   45 |     45
+ pv        |   45 |     45
+ te        |   45 |     45
+ tu        |   45 |     45
+ uv        |   45 |     45
+ ux        |   45 |     45
+ yd        |   45 |     45
+ yx        |   45 |     45
+ ij        |   44 |     44
+ pa        |   44 |     44
+ se        |   44 |     44
+ tg        |   44 |     44
+ ue        |   44 |     44
+ yb        |   44 |     44
+ yt        |   44 |     44
+ if        |   43 |     43
+ ik        |   43 |     43
+ in        |   43 |     43
+ ph        |   43 |     43
+ pj        |   43 |     43
+ q5        |   43 |     43
+ rt        |   43 |     43
+ ub        |   43 |     43
+ ud        |   43 |     43
+ uh        |   43 |     43
+ uj        |   43 |     43
+ w7        |   43 |     43
+ ye        |   43 |     43
+ yv        |   43 |     43
+ db        |   42 |     42
+ do        |   42 |     42
+ id        |   42 |     42
+ ie        |   42 |     42
+ ii        |   42 |     42
+ of        |   42 |     42
+ pr        |   42 |     42
+ q4        |   42 |     42
+ rf        |   42 |     42
+ td        |   42 |     42
+ uk        |   42 |     42
+ up        |   42 |     42
+ yh        |   42 |     42
+ yk        |   42 |     42
+ io        |   41 |     41
+ it        |   41 |     41
+ pb        |   41 |     41
+ q0        |   41 |     41
+ q7        |   41 |     41
+ rs        |   41 |     41
+ tj        |   41 |     41
+ ur        |   41 |     41
+ ig        |   40 |     40
+ iu        |   40 |     40
+ iy        |   40 |     40
+ od        |   40 |     40
+ q6        |   40 |     40
+ tt        |   40 |     40
+ ug        |   40 |     40
+ ul        |   40 |     40
+ us        |   40 |     40
+ uu        |   40 |     40
+ uz        |   40 |     40
+ ah        |   39 |     39
+ ar        |   39 |     39
+ as        |   39 |     39
+ dl        |   39 |     39
+ dt        |   39 |     39
+ hk        |   39 |     39
+ iq        |   39 |     39
+ is        |   39 |     39
+ oc        |   39 |     39
+ ov        |   39 |     39
+ oy        |   39 |     39
+ uf        |   39 |     39
+ ui        |   39 |     39
+ aa        |   38 |     38
+ ad        |   38 |     38
+ fh        |   38 |     38
+ gm        |   38 |     38
+ ic        |   38 |     38
+ jd        |   38 |     38
+ om        |   38 |     38
+ or        |   38 |     38
+ oz        |   38 |     38
+ pm        |   38 |     38
+ q8        |   38 |     38
+ sf        |   38 |     38
+ sm        |   38 |     38
+ sv        |   38 |     38
+ uc        |   38 |     38
+ ak        |   37 |     37
+ aq        |   37 |     37
+ di        |   37 |     37
+ e4        |   37 |     37
+ fi        |   37 |     37
+ fx        |   37 |     37
+ ha        |   37 |     37
+ hp        |   37 |     37
+ ih        |   37 |     37
+ og        |   37 |     37
+ po        |   37 |     37
+ pw        |   37 |     37
+ sn        |   37 |     37
+ su        |   37 |     37
+ sw        |   37 |     37
+ w6        |   37 |     37
+ yj        |   37 |     37
+ yu        |   37 |     37
+ ag        |   36 |     36
+ am        |   36 |     36
+ at        |   36 |     36
+ e1        |   36 |     36
+ ff        |   36 |     36
+ gx        |   36 |     36
+ he        |   36 |     36
+ hj        |   36 |     36
+ ib        |   36 |     36
+ iz        |   36 |     36
+ lm        |   36 |     36
+ ok        |   36 |     36
+ pk        |   36 |     36
+ pp        |   36 |     36
+ pu        |   36 |     36
+ sp        |   36 |     36
+ tf        |   36 |     36
+ tm        |   36 |     36
+ ay        |   35 |     35
+ dy        |   35 |     35
+ fu        |   35 |     35
+ ku        |   35 |     35
+ lh        |   35 |     35
+ lq        |   35 |     35
+ o6        |   35 |     35
+ ob        |   35 |     35
+ on        |   35 |     35
+ op        |   35 |     35
+ pd        |   35 |     35
+ ps        |   35 |     35
+ si        |   35 |     35
+ sl        |   35 |     35
+ sx        |   35 |     35
+ tp        |   35 |     35
+ tr        |   35 |     35
+ w3        |   35 |     35
+ y1        |   35 |     35
+ al        |   34 |     34
+ ap        |   34 |     34
+ az        |   34 |     34
+ dc        |   34 |     34
+ dd        |   34 |     34
+ dz        |   34 |     34
+ e0        |   34 |     34
+ fj        |   34 |     34
+ fp        |   34 |     34
+ gd        |   34 |     34
+ gg        |   34 |     34
+ gk        |   34 |     34
+ go        |   34 |     34
+ ho        |   34 |     34
+ jc        |   34 |     34
+ oa        |   34 |     34
+ oh        |   34 |     34
+ oo        |   34 |     34
+ pe        |   34 |     34
+ px        |   34 |     34
+ sd        |   34 |     34
+ sq        |   34 |     34
+ sy        |   34 |     34
+ ab        |   33 |     33
+ ae        |   33 |     33
+ af        |   33 |     33
+ aw        |   33 |     33
+ e5        |   33 |     33
+ fk        |   33 |     33
+ gu        |   33 |     33
+ gy        |   33 |     33
+ hb        |   33 |     33
+ hm        |   33 |     33
+ hy        |   33 |     33
+ jl        |   33 |     33
+ jr        |   33 |     33
+ ls        |   33 |     33
+ oq        |   33 |     33
+ pt        |   33 |     33
+ sa        |   33 |     33
+ sh        |   33 |     33
+ sj        |   33 |     33
+ so        |   33 |     33
+ sz        |   33 |     33
+ t7        |   33 |     33
+ uw        |   33 |     33
+ w8        |   33 |     33
+ y0        |   33 |     33
+ yp        |   33 |     33
+ dh        |   32 |     32
+ dp        |   32 |     32
+ dq        |   32 |     32
+ e7        |   32 |     32
+ fn        |   32 |     32
+ fo        |   32 |     32
+ fr        |   32 |     32
+ ga        |   32 |     32
+ gq        |   32 |     32
+ hh        |   32 |     32
+ il        |   32 |     32
+ ip        |   32 |     32
+ jv        |   32 |     32
+ lc        |   32 |     32
+ ol        |   32 |     32
+ pc        |   32 |     32
+ q9        |   32 |     32
+ ds        |   31 |     31
+ e9        |   31 |     31
+ fd        |   31 |     31
+ fe        |   31 |     31
+ ft        |   31 |     31
+ gs        |   31 |     31
+ hl        |   31 |     31
+ hs        |   31 |     31
+ jb        |   31 |     31
+ kc        |   31 |     31
+ kw        |   31 |     31
+ mj        |   31 |     31
+ q2        |   31 |     31
+ r3        |   31 |     31
+ sb        |   31 |     31
+ sk        |   31 |     31
+ ts        |   31 |     31
+ ua        |   31 |     31
+ yc        |   31 |     31
+ zw        |   31 |     31
+ ao        |   30 |     30
+ du        |   30 |     30
+ fw        |   30 |     30
+ gj        |   30 |     30
+ hu        |   30 |     30
+ kh        |   30 |     30
+ kl        |   30 |     30
+ kv        |   30 |     30
+ ld        |   30 |     30
+ lf        |   30 |     30
+ pq        |   30 |     30
+ py        |   30 |     30
+ sc        |   30 |     30
+ sr        |   30 |     30
+ uy        |   30 |     30
+ vg        |   30 |     30
+ w2        |   30 |     30
+ xg        |   30 |     30
+ xo        |   30 |     30
+ au        |   29 |     29
+ cx        |   29 |     29
+ fv        |   29 |     29
+ gh        |   29 |     29
+ gl        |   29 |     29
+ gt        |   29 |     29
+ hw        |   29 |     29
+ ji        |   29 |     29
+ km        |   29 |     29
+ la        |   29 |     29
+ ou        |   29 |     29
+ r0        |   29 |     29
+ w0        |   29 |     29
+ y9        |   29 |     29
+ zm        |   29 |     29
+ zs        |   29 |     29
+ zy        |   29 |     29
+ ax        |   28 |     28
+ cd        |   28 |     28
+ dj        |   28 |     28
+ dn        |   28 |     28
+ dr        |   28 |     28
+ ht        |   28 |     28
+ jf        |   28 |     28
+ lo        |   28 |     28
+ lr        |   28 |     28
+ na        |   28 |     28
+ ng        |   28 |     28
+ r8        |   28 |     28
+ ss        |   28 |     28
+ xt        |   28 |     28
+ y6        |   28 |     28
+ aj        |   27 |     27
+ ca        |   27 |     27
+ cg        |   27 |     27
+ df        |   27 |     27
+ dg        |   27 |     27
+ dv        |   27 |     27
+ gc        |   27 |     27
+ gn        |   27 |     27
+ gr        |   27 |     27
+ hd        |   27 |     27
+ i8        |   27 |     27
+ jn        |   27 |     27
+ jt        |   27 |     27
+ lp        |   27 |     27
+ o9        |   27 |     27
+ ow        |   27 |     27
+ r9        |   27 |     27
+ t8        |   27 |     27
+ u5        |   27 |     27
+ w4        |   27 |     27
+ xm        |   27 |     27
+ zz        |   27 |     27
+ a2        |   26 |     26
+ ac        |   26 |     26
+ ai        |   26 |     26
+ cm        |   26 |     26
+ cu        |   26 |     26
+ cw        |   26 |     26
+ dk        |   26 |     26
+ e2        |   26 |     26
+ fc        |   26 |     26
+ fg        |   26 |     26
+ fl        |   26 |     26
+ fs        |   26 |     26
+ ge        |   26 |     26
+ gv        |   26 |     26
+ hc        |   26 |     26
+ hi        |   26 |     26
+ hx        |   26 |     26
+ jj        |   26 |     26
+ jm        |   26 |     26
+ kg        |   26 |     26
+ kk        |   26 |     26
+ kn        |   26 |     26
+ ko        |   26 |     26
+ kt        |   26 |     26
+ ln        |   26 |     26
+ mx        |   26 |     26
+ pg        |   26 |     26
+ r4        |   26 |     26
+ t6        |   26 |     26
+ u1        |   26 |     26
+ u4        |   26 |     26
+ vi        |   26 |     26
+ vr        |   26 |     26
+ w1        |   26 |     26
+ w9        |   26 |     26
+ xk        |   26 |     26
+ xs        |   26 |     26
+ zf        |   26 |     26
+ bb        |   25 |     25
+ dm        |   25 |     25
+ dw        |   25 |     25
+ e8        |   25 |     25
+ fb        |   25 |     25
+ gw        |   25 |     25
+ h8        |   25 |     25
+ hf        |   25 |     25
+ hg        |   25 |     25
+ hn        |   25 |     25
+ hv        |   25 |     25
+ i0        |   25 |     25
+ i3        |   25 |     25
+ jg        |   25 |     25
+ jo        |   25 |     25
+ jx        |   25 |     25
+ kq        |   25 |     25
+ lw        |   25 |     25
+ lx        |   25 |     25
+ o3        |   25 |     25
+ p7        |   25 |     25
+ pf        |   25 |     25
+ pi        |   25 |     25
+ pz        |   25 |     25
+ r2        |   25 |     25
+ r5        |   25 |     25
+ t9        |   25 |     25
+ u7        |   25 |     25
+ ve        |   25 |     25
+ vu        |   25 |     25
+ y5        |   25 |     25
+ y8        |   25 |     25
+ zt        |   25 |     25
+ an        |   24 |     24
+ bj        |   24 |     24
+ dx        |   24 |     24
+ fm        |   24 |     24
+ fz        |   24 |     24
+ gb        |   24 |     24
+ gi        |   24 |     24
+ gp        |   24 |     24
+ hr        |   24 |     24
+ hz        |   24 |     24
+ i5        |   24 |     24
+ jq        |   24 |     24
+ kb        |   24 |     24
+ ke        |   24 |     24
+ kf        |   24 |     24
+ kp        |   24 |     24
+ lv        |   24 |     24
+ lz        |   24 |     24
+ o8        |   24 |     24
+ r1        |   24 |     24
+ s7        |   24 |     24
+ sg        |   24 |     24
+ u3        |   24 |     24
+ vj        |   24 |     24
+ vt        |   24 |     24
+ w5        |   24 |     24
+ zj        |   24 |     24
+ be        |   23 |     23
+ bi        |   23 |     23
+ bn        |   23 |     23
+ cn        |   23 |     23
+ cy        |   23 |     23
+ da        |   23 |     23
+ e6        |   23 |     23
+ fa        |   23 |     23
+ js        |   23 |     23
+ ki        |   23 |     23
+ kz        |   23 |     23
+ li        |   23 |     23
+ mt        |   23 |     23
+ mz        |   23 |     23
+ nu        |   23 |     23
+ o2        |   23 |     23
+ p5        |   23 |     23
+ p8        |   23 |     23
+ r7        |   23 |     23
+ t0        |   23 |     23
+ t1        |   23 |     23
+ t3        |   23 |     23
+ vm        |   23 |     23
+ xh        |   23 |     23
+ xx        |   23 |     23
+ zp        |   23 |     23
+ zr        |   23 |     23
+ a3        |   22 |     22
+ bg        |   22 |     22
+ de        |   22 |     22
+ e3        |   22 |     22
+ fq        |   22 |     22
+ i2        |   22 |     22
+ i7        |   22 |     22
+ ja        |   22 |     22
+ jk        |   22 |     22
+ jy        |   22 |     22
+ kr        |   22 |     22
+ kx        |   22 |     22
+ ly        |   22 |     22
+ nb        |   22 |     22
+ nh        |   22 |     22
+ ns        |   22 |     22
+ s3        |   22 |     22
+ u2        |   22 |     22
+ vn        |   22 |     22
+ xe        |   22 |     22
+ y4        |   22 |     22
+ zh        |   22 |     22
+ zo        |   22 |     22
+ zq        |   22 |     22
+ a1        |   21 |     21
+ bl        |   21 |     21
+ bo        |   21 |     21
+ cb        |   21 |     21
+ ch        |   21 |     21
+ co        |   21 |     21
+ cq        |   21 |     21
+ cv        |   21 |     21
+ d7        |   21 |     21
+ g8        |   21 |     21
+ je        |   21 |     21
+ jp        |   21 |     21
+ jz        |   21 |     21
+ lg        |   21 |     21
+ me        |   21 |     21
+ nc        |   21 |     21
+ p4        |   21 |     21
+ st        |   21 |     21
+ vb        |   21 |     21
+ vw        |   21 |     21
+ vz        |   21 |     21
+ xj        |   21 |     21
+ xq        |   21 |     21
+ xu        |   21 |     21
+ xy        |   21 |     21
+ zb        |   21 |     21
+ bv        |   20 |     20
+ bz        |   20 |     20
+ cj        |   20 |     20
+ cp        |   20 |     20
+ cs        |   20 |     20
+ d8        |   20 |     20
+ ju        |   20 |     20
+ k0        |   20 |     20
+ ks        |   20 |     20
+ ky        |   20 |     20
+ l1        |   20 |     20
+ lb        |   20 |     20
+ lj        |   20 |     20
+ lu        |   20 |     20
+ nm        |   20 |     20
+ nw        |   20 |     20
+ nz        |   20 |     20
+ o7        |   20 |     20
+ p6        |   20 |     20
+ vh        |   20 |     20
+ vp        |   20 |     20
+ vs        |   20 |     20
+ xb        |   20 |     20
+ xr        |   20 |     20
+ z3        |   20 |     20
+ zv        |   20 |     20
+ bq        |   19 |     19
+ br        |   19 |     19
+ by        |   19 |     19
+ cl        |   19 |     19
+ d2        |   19 |     19
+ f1        |   19 |     19
+ f4        |   19 |     19
+ gf        |   19 |     19
+ hq        |   19 |     19
+ k9        |   19 |     19
+ ka        |   19 |     19
+ kd        |   19 |     19
+ kj        |   19 |     19
+ md        |   19 |     19
+ mi        |   19 |     19
+ ml        |   19 |     19
+ my        |   19 |     19
+ nj        |   19 |     19
+ ny        |   19 |     19
+ o1        |   19 |     19
+ s4        |   19 |     19
+ s8        |   19 |     19
+ t5        |   19 |     19
+ u0        |   19 |     19
+ xl        |   19 |     19
+ zg        |   19 |     19
+ zi        |   19 |     19
+ a5        |   18 |     18
+ b9        |   18 |     18
+ bh        |   18 |     18
+ bx        |   18 |     18
+ d3        |   18 |     18
+ fy        |   18 |     18
+ g2        |   18 |     18
+ i4        |   18 |     18
+ i6        |   18 |     18
+ i9        |   18 |     18
+ jw        |   18 |     18
+ lk        |   18 |     18
+ mb        |   18 |     18
+ mv        |   18 |     18
+ nd        |   18 |     18
+ nr        |   18 |     18
+ nt        |   18 |     18
+ t2        |   18 |     18
+ xf        |   18 |     18
+ xv        |   18 |     18
+ zc        |   18 |     18
+ zd        |   18 |     18
+ a7        |   17 |     17
+ bc        |   17 |     17
+ bd        |   17 |     17
+ ce        |   17 |     17
+ cf        |   17 |     17
+ cr        |   17 |     17
+ g9        |   17 |     17
+ j0        |   17 |     17
+ j5        |   17 |     17
+ mp        |   17 |     17
+ mr        |   17 |     17
+ mw        |   17 |     17
+ nk        |   17 |     17
+ no        |   17 |     17
+ o0        |   17 |     17
+ o4        |   17 |     17
+ s0        |   17 |     17
+ s1        |   17 |     17
+ t4        |   17 |     17
+ u9        |   17 |     17
+ vf        |   17 |     17
+ vx        |   17 |     17
+ x3        |   17 |     17
+ xi        |   17 |     17
+ xn        |   17 |     17
+ xz        |   17 |     17
+ zl        |   17 |     17
+ zn        |   17 |     17
+ a0        |   16 |     16
+ bu        |   16 |     16
+ bw        |   16 |     16
+ ci        |   16 |     16
+ ck        |   16 |     16
+ d0        |   16 |     16
+ d4        |   16 |     16
+ d6        |   16 |     16
+ f5        |   16 |     16
+ g1        |   16 |     16
+ gz        |   16 |     16
+ h4        |   16 |     16
+ jh        |   16 |     16
+ l4        |   16 |     16
+ lt        |   16 |     16
+ mg        |   16 |     16
+ mh        |   16 |     16
+ mo        |   16 |     16
+ ni        |   16 |     16
+ nl        |   16 |     16
+ nq        |   16 |     16
+ p2        |   16 |     16
+ u8        |   16 |     16
+ v9        |   16 |     16
+ vl        |   16 |     16
+ vo        |   16 |     16
+ xp        |   16 |     16
+ y3        |   16 |     16
+ y7        |   16 |     16
+ z7        |   16 |     16
+ za        |   16 |     16
+ zx        |   16 |     16
+ bf        |   15 |     15
+ bp        |   15 |     15
+ cc        |   15 |     15
+ g0        |   15 |     15
+ j2        |   15 |     15
+ j9        |   15 |     15
+ l6        |   15 |     15
+ le        |   15 |     15
+ ll        |   15 |     15
+ m8        |   15 |     15
+ ma        |   15 |     15
+ mu        |   15 |     15
+ nf        |   15 |     15
+ r6        |   15 |     15
+ s5        |   15 |     15
+ vd        |   15 |     15
+ vk        |   15 |     15
+ xa        |   15 |     15
+ xw        |   15 |     15
+ y2        |   15 |     15
+ z8        |   15 |     15
+ ze        |   15 |     15
+ zu        |   15 |     15
+ a6        |   14 |     14
+ bk        |   14 |     14
+ bt        |   14 |     14
+ c0        |   14 |     14
+ f8        |   14 |     14
+ g3        |   14 |     14
+ g4        |   14 |     14
+ g7        |   14 |     14
+ h6        |   14 |     14
+ h7        |   14 |     14
+ h9        |   14 |     14
+ i1        |   14 |     14
+ k1        |   14 |     14
+ k2        |   14 |     14
+ k6        |   14 |     14
+ k7        |   14 |     14
+ mc        |   14 |     14
+ nn        |   14 |     14
+ p9        |   14 |     14
+ u6        |   14 |     14
+ xd        |   14 |     14
+ z6        |   14 |     14
+ zk        |   14 |     14
+ a4        |   13 |     13
+ a9        |   13 |     13
+ bm        |   13 |     13
+ cz        |   13 |     13
+ f2        |   13 |     13
+ f3        |   13 |     13
+ f6        |   13 |     13
+ g6        |   13 |     13
+ h2        |   13 |     13
+ j1        |   13 |     13
+ k5        |   13 |     13
+ m1        |   13 |     13
+ mf        |   13 |     13
+ mq        |   13 |     13
+ np        |   13 |     13
+ nx        |   13 |     13
+ o5        |   13 |     13
+ p0        |   13 |     13
+ p1        |   13 |     13
+ s6        |   13 |     13
+ s9        |   13 |     13
+ v6        |   13 |     13
+ va        |   13 |     13
+ vc        |   13 |     13
+ xc        |   13 |     13
+ z0        |   13 |     13
+ c9        |   12 |     12
+ d1        |   12 |     12
+ h0        |   12 |     12
+ h1        |   12 |     12
+ j8        |   12 |     12
+ k4        |   12 |     12
+ l5        |   12 |     12
+ l9        |   12 |     12
+ m2        |   12 |     12
+ m6        |   12 |     12
+ m9        |   12 |     12
+ n7        |   12 |     12
+ nv        |   12 |     12
+ p3        |   12 |     12
+ vq        |   12 |     12
+ vy        |   12 |     12
+ x1        |   12 |     12
+ x2        |   12 |     12
+ z5        |   12 |     12
+ c1        |   11 |     11
+ c3        |   11 |     11
+ ct        |   11 |     11
+ f9        |   11 |     11
+ g5        |   11 |     11
+ j6        |   11 |     11
+ l8        |   11 |     11
+ n1        |   11 |     11
+ v7        |   11 |     11
+ vv        |   11 |     11
+ x5        |   11 |     11
+ x8        |   11 |     11
+ z2        |   11 |     11
+ b0        |   10 |     10
+ b2        |   10 |     10
+ b8        |   10 |     10
+ c6        |   10 |     10
+ f0        |   10 |     10
+ f7        |   10 |     10
+ h5        |   10 |     10
+ j3        |   10 |     10
+ j4        |   10 |     10
+ j7        |   10 |     10
+ l7        |   10 |     10
+ m0        |   10 |     10
+ m7        |   10 |     10
+ mm        |   10 |     10
+ mn        |   10 |     10
+ n8        |   10 |     10
+ v1        |   10 |     10
+ x0        |   10 |     10
+ x6        |   10 |     10
+ x7        |   10 |     10
+ x9        |   10 |     10
+ a8        |    9 |      9
+ b1        |    9 |      9
+ b4        |    9 |      9
+ b5        |    9 |      9
+ b6        |    9 |      9
+ ba        |    9 |      9
+ bs        |    9 |      9
+ c5        |    9 |      9
+ d5        |    9 |      9
+ k8        |    9 |      9
+ l0        |    9 |      9
+ m5        |    9 |      9
+ mk        |    9 |      9
+ ms        |    9 |      9
+ n3        |    9 |      9
+ n4        |    9 |      9
+ n6        |    9 |      9
+ ne        |    9 |      9
+ v0        |    9 |      9
+ v3        |    9 |      9
+ v5        |    9 |      9
+ v8        |    9 |      9
+ b3        |    8 |      8
+ b7        |    8 |      8
+ c2        |    8 |      8
+ c7        |    8 |      8
+ c8        |    8 |      8
+ d9        |    8 |      8
+ k3        |    8 |      8
+ l3        |    8 |      8
+ m3        |    8 |      8
+ m4        |    8 |      8
+ n0        |    8 |      8
+ n5        |    8 |      8
+ v4        |    8 |      8
+ x4        |    8 |      8
+ z1        |    8 |      8
+ z9        |    8 |      8
+ l2        |    7 |      7
+ s2        |    7 |      7
+ z4        |    7 |      7
+ 1l        |    6 |      6
+ 1o        |    6 |      6
+ 1t        |    6 |      6
+ 2e        |    6 |      6
+ 2o        |    6 |      6
+ c4        |    6 |      6
+ h3        |    6 |      6
+ n2        |    6 |      6
+ n9        |    6 |      6
+ v2        |    6 |      6
+ 2l        |    5 |      5
+ 2u        |    5 |      5
+ 3k        |    5 |      5
+ 4p        |    5 |      5
+ 18        |    4 |      4
+ 1a        |    4 |      4
+ 1i        |    4 |      4
+ 2s        |    4 |      4
+ 3q        |    4 |      4
+ 3y        |    4 |      4
+ 5y        |    4 |      4
+ 1f        |    3 |      3
+ 1h        |    3 |      3
+ 1m        |    3 |      3
+ 1p        |    3 |      3
+ 1s        |    3 |      3
+ 1v        |    3 |      3
+ 1x        |    3 |      3
+ 27        |    3 |      3
+ 2a        |    3 |      3
+ 2b        |    3 |      3
+ 2h        |    3 |      3
+ 2n        |    3 |      3
+ 2p        |    3 |      3
+ 2v        |    3 |      3
+ 2y        |    3 |      3
+ 3d        |    3 |      3
+ 3w        |    3 |      3
+ 3z        |    3 |      3
+ 4a        |    3 |      3
+ 4d        |    3 |      3
+ 4v        |    3 |      3
+ 4z        |    3 |      3
+ 5e        |    3 |      3
+ 5i        |    3 |      3
+ 5k        |    3 |      3
+ 5o        |    3 |      3
+ 5t        |    3 |      3
+ 6b        |    3 |      3
+ 6d        |    3 |      3
+ 6o        |    3 |      3
+ 6w        |    3 |      3
+ 7a        |    3 |      3
+ 7h        |    3 |      3
+ 7r        |    3 |      3
+ 93        |    3 |      3
+ 10        |    2 |      2
+ 12        |    2 |      2
+ 15        |    2 |      2
+ 16        |    2 |      2
+ 19        |    2 |      2
+ 1b        |    2 |      2
+ 1d        |    2 |      2
+ 1g        |    2 |      2
+ 1j        |    2 |      2
+ 1n        |    2 |      2
+ 1r        |    2 |      2
+ 1u        |    2 |      2
+ 1w        |    2 |      2
+ 1y        |    2 |      2
+ 20        |    2 |      2
+ 25        |    2 |      2
+ 2d        |    2 |      2
+ 2i        |    2 |      2
+ 2j        |    2 |      2
+ 2k        |    2 |      2
+ 2q        |    2 |      2
+ 2r        |    2 |      2
+ 2t        |    2 |      2
+ 2w        |    2 |      2
+ 2z        |    2 |      2
+ 3b        |    2 |      2
+ 3f        |    2 |      2
+ 3h        |    2 |      2
+ 3o        |    2 |      2
+ 3p        |    2 |      2
+ 3r        |    2 |      2
+ 3s        |    2 |      2
+ 3v        |    2 |      2
+ 42        |    2 |      2
+ 43        |    2 |      2
+ 4f        |    2 |      2
+ 4g        |    2 |      2
+ 4h        |    2 |      2
+ 4j        |    2 |      2
+ 4m        |    2 |      2
+ 4r        |    2 |      2
+ 4s        |    2 |      2
+ 4t        |    2 |      2
+ 4u        |    2 |      2
+ 5c        |    2 |      2
+ 5f        |    2 |      2
+ 5h        |    2 |      2
+ 5p        |    2 |      2
+ 5q        |    2 |      2
+ 5z        |    2 |      2
+ 6a        |    2 |      2
+ 6h        |    2 |      2
+ 6q        |    2 |      2
+ 6r        |    2 |      2
+ 6t        |    2 |      2
+ 6y        |    2 |      2
+ 70        |    2 |      2
+ 7c        |    2 |      2
+ 7g        |    2 |      2
+ 7k        |    2 |      2
+ 7o        |    2 |      2
+ 7u        |    2 |      2
+ 8j        |    2 |      2
+ 8w        |    2 |      2
+ 9f        |    2 |      2
+ 9y        |    2 |      2
+ copyright |    2 |      2
+ foo       |    1 |      3
+ bar       |    1 |      2
+ 0e        |    1 |      1
+ 0h        |    1 |      1
+ 0p        |    1 |      1
+ 0w        |    1 |      1
+ 0z        |    1 |      1
+ 11        |    1 |      1
+ 13        |    1 |      1
+ 14        |    1 |      1
+ 17        |    1 |      1
+ 1k        |    1 |      1
+ 1q        |    1 |      1
+ 1z        |    1 |      1
+ 24        |    1 |      1
+ 26        |    1 |      1
+ 28        |    1 |      1
+ 2f        |    1 |      1
+ 30        |    1 |      1
+ 345       |    1 |      1
+ 37        |    1 |      1
+ 39        |    1 |      1
+ 3a        |    1 |      1
+ 3e        |    1 |      1
+ 3g        |    1 |      1
+ 3i        |    1 |      1
+ 3m        |    1 |      1
+ 3t        |    1 |      1
+ 3u        |    1 |      1
+ 40        |    1 |      1
+ 41        |    1 |      1
+ 44        |    1 |      1
+ 45        |    1 |      1
+ 48        |    1 |      1
+ 4b        |    1 |      1
+ 4c        |    1 |      1
+ 4i        |    1 |      1
+ 4k        |    1 |      1
+ 4n        |    1 |      1
+ 4o        |    1 |      1
+ 4q        |    1 |      1
+ 4w        |    1 |      1
+ 4y        |    1 |      1
+ 51        |    1 |      1
+ 55        |    1 |      1
+ 56        |    1 |      1
+ 5a        |    1 |      1
+ 5d        |    1 |      1
+ 5g        |    1 |      1
+ 5j        |    1 |      1
+ 5l        |    1 |      1
+ 5s        |    1 |      1
+ 5u        |    1 |      1
+ 5x        |    1 |      1
+ 64        |    1 |      1
+ 68        |    1 |      1
+ 6c        |    1 |      1
+ 6f        |    1 |      1
+ 6g        |    1 |      1
+ 6i        |    1 |      1
+ 6k        |    1 |      1
+ 6n        |    1 |      1
+ 6p        |    1 |      1
+ 6s        |    1 |      1
+ 6u        |    1 |      1
+ 6x        |    1 |      1
+ 72        |    1 |      1
+ 7f        |    1 |      1
+ 7j        |    1 |      1
+ 7n        |    1 |      1
+ 7p        |    1 |      1
+ 7w        |    1 |      1
+ 7y        |    1 |      1
+ 7z        |    1 |      1
+ 80        |    1 |      1
+ 82        |    1 |      1
+ 85        |    1 |      1
+ 8d        |    1 |      1
+ 8i        |    1 |      1
+ 8l        |    1 |      1
+ 8n        |    1 |      1
+ 8p        |    1 |      1
+ 8t        |    1 |      1
+ 8x        |    1 |      1
+ 95        |    1 |      1
+ 97        |    1 |      1
+ 9a        |    1 |      1
+ 9e        |    1 |      1
+ 9h        |    1 |      1
+ 9r        |    1 |      1
+ 9w        |    1 |      1
+ qwerti    |    1 |      1
+(1146 rows)
+
+select reset_tsearch();
+NOTICE:  TSearch cache cleaned
+ reset_tsearch 
+---------------
+ 
+(1 row)
+
+select to_tsquery('default', 'skies & books');
+   to_tsquery   
+----------------
+ 'sky' & 'book'
+(1 row)
+
+select rank_cd(to_tsvector('Erosion It took the sea a thousand years,
+A thousand years to trace
+The granite features of this cliff
+In crag and scarp and base.
+It took the sea an hour one night
+An hour of storm to place
+The sculpture of these granite seams,
+Upon a woman s face. E.  J.  Pratt  (1882 1964)
+'), to_tsquery('sea&thousand&years'));
+ rank_cd 
+---------
+     1.2
+(1 row)
+
+select rank_cd(to_tsvector('Erosion It took the sea a thousand years,
+A thousand years to trace
+The granite features of this cliff
+In crag and scarp and base.
+It took the sea an hour one night
+An hour of storm to place
+The sculpture of these granite seams,
+Upon a woman s face. E.  J.  Pratt  (1882 1964)
+'), to_tsquery('granite&sea'));
+ rank_cd  
+----------
+ 0.880303
+(1 row)
+
+select rank_cd(to_tsvector('Erosion It took the sea a thousand years,
+A thousand years to trace
+The granite features of this cliff
+In crag and scarp and base.
+It took the sea an hour one night
+An hour of storm to place
+The sculpture of these granite seams,
+Upon a woman s face. E.  J.  Pratt  (1882 1964)
+'), to_tsquery('sea'));
+ rank_cd 
+---------
+       2
+(1 row)
+
+select get_covers(to_tsvector('Erosion It took the sea a thousand years,
+A thousand years to trace
+The granite features of this cliff
+In crag and scarp and base.
+It took the sea an hour one night
+An hour of storm to place
+The sculpture of these granite seams,
+Upon a woman s face. E.  J.  Pratt  (1882 1964)
+'), to_tsquery('sea&thousand&years'));
+                                                                                             get_covers                                                                                             
+----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
+ eros took {1 sea thousand year }1 {2 thousand year trace granit featur cliff crag scarp base took sea }2 hour one night hour storm place sculptur granit seam upon woman face e j pratt 1882 1964 
+(1 row)
+
+select get_covers(to_tsvector('Erosion It took the sea a thousand years,
+A thousand years to trace
+The granite features of this cliff
+In crag and scarp and base.
+It took the sea an hour one night
+An hour of storm to place
+The sculpture of these granite seams,
+Upon a woman s face. E.  J.  Pratt  (1882 1964)
+'), to_tsquery('granite&sea'));
+                                                                                                get_covers                                                                                                
+----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
+ eros took {1 sea thousand year thousand year trace {2 granit }1 featur cliff crag scarp base took {3 sea }2 hour one night hour storm place sculptur granit }3 seam upon woman face e j pratt 1882 1964 
+(1 row)
+
+select get_covers(to_tsvector('Erosion It took the sea a thousand years,
+A thousand years to trace
+The granite features of this cliff
+In crag and scarp and base.
+It took the sea an hour one night
+An hour of storm to place
+The sculpture of these granite seams,
+Upon a woman s face. E.  J.  Pratt  (1882 1964)
+'), to_tsquery('sea'));
+                                                                                             get_covers                                                                                             
+----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
+ eros took {1 sea }1 thousand year thousand year trace granit featur cliff crag scarp base took {2 sea }2 hour one night hour storm place sculptur granit seam upon woman face e j pratt 1882 1964 
+(1 row)
+
+select headline('Erosion It took the sea a thousand years,
+A thousand years to trace
+The granite features of this cliff
+In crag and scarp and base.
+It took the sea an hour one night
+An hour of storm to place
+The sculpture of these granite seams,
+Upon a woman s face. E.  J.  Pratt  (1882 1964)
+', to_tsquery('sea&thousand&years'));
+                                                       headline                                                        
+-----------------------------------------------------------------------------------------------------------------------
+ sea a thousand years,
+A thousand years to trace
+The granite features of this cliff
+(1 row)
+
+ 
+select headline('Erosion It took the sea a thousand years,
+A thousand years to trace
+The granite features of this cliff
+In crag and scarp and base.
+It took the sea an hour one night
+An hour of storm to place
+The sculpture of these granite seams,
+Upon a woman s face. E.  J.  Pratt  (1882 1964)
+', to_tsquery('granite&sea'));
+                                           headline                                           
+----------------------------------------------------------------------------------------------
+ sea an hour one night
+An hour of storm to place
+The sculpture of these granite
+(1 row)
+
+ 
+select headline('Erosion It took the sea a thousand years,
+A thousand years to trace
+The granite features of this cliff
+In crag and scarp and base.
+It took the sea an hour one night
+An hour of storm to place
+The sculpture of these granite seams,
+Upon a woman s face. E.  J.  Pratt  (1882 1964)
+', to_tsquery('sea'));
+                                         headline                                          
+-------------------------------------------------------------------------------------------
+ sea a thousand years,
+A thousand years to trace
+The granite features of this cliff
+(1 row)
+


diff --git a/contrib/tsearch2/gendict/Makefile.IN b/contrib/tsearch2/gendict/Makefile.IN

new file mode 100644 (file)

index 0000000..c13e496


--- /dev/null
+++ b/contrib/tsearch2/gendict/Makefile.IN
@@ -0,0 +1,12 @@
+subdir = contrib/CFG_DIR
+top_builddir = ../..
+include $(top_builddir)/src/Makefile.global
+
+MODULE_big = dict_CFG_MODNAME
+OBJS = CFG_OFILE
+DATA_built = dict_CFG_MODNAME.sql
+DOCS = README.CFG_MODNAME
+PG_CPPFLAGS =
+SHLIB_LINK = ../tsearch2/libtsearch2.a
+
+include $(top_srcdir)/contrib/contrib-global.mk


diff --git a/contrib/tsearch2/gendict/README.gendict b/contrib/tsearch2/gendict/README.gendict

new file mode 100644 (file)

index 0000000..e91f1b7


--- /dev/null
+++ b/contrib/tsearch2/gendict/README.gendict
@@ -0,0 +1,130 @@
+Gendict - generate dictionary templates for contrib/tsearch2 module.
+
+This utility aims to help people creating dictionary for contrib/tsearch v2
+module. Particularly, it has built-in support for snowball stemmers.
+
+Programming API to tsearch2 dictionaries is described in tsearch v2 
+documentation.
+
+
+Prerequisities:
+
+* PostgreSQL 7.3 and above.
+
+* You need tsearch2 module sources already compiled
+
+* Rights to install contrib modules
+
+Usage:
+
+    run config.sh without parameters to see options and arguments
+
+Usage:
+./config.sh -n DICTNAME ( [ -s [ -p PREFIX ] ] | [ -c CFILES ] [ -h HFILES ] [ -i ] ) [ -v ] [ -d DIR ] [ -C COMMENT ]
+    -v - be verbose
+    -d DIR - name of directory in PGSQL_SRC/contrib (default dict_DICTNAME)
+    -C COMMENT - dictionary comment
+Generate Snowball stemmer:
+./config.sh -n DICTNAME -s [ -p PREFIX ] [ -v ] [ -d DIR ] [ -C COMMENT ]
+    -s - generate Snowball wrapper
+    -p - prefix of Snowball's function, (default DICTNAME)
+Generate template dictionary:
+./config.sh -n DICTNAME [ -c CFILES ] [ -h HFILES ] [ -i ] [ -v ] [ -d DIR ] [ -C COMMENT ]
+    -c CFILES - source files, must be placed in contrib/tsearch2/gendict directory.
+                These files will be used in Makefile.
+    -h HFILES - header files, must be placed in contrib/tsearch2/gendict directory.
+                These files will be used in Makefile and subinclude.h
+    -i - dictionary has init method
+
+
+Example 1:
+
+   Create Portuguese stemmer
+ 
+   0. cd PGSQL_SRC/contrib/tsearch2/gendict
+
+   1. Obtain stem.{c,h} files for Portuguese
+
+      wget http://snowball.tartarus.org/portuguese/stem.c
+      wget http://snowball.tartarus.org/portuguese/stem.h
+   
+   2. Create template files for Portuguese
+
+      ./config.sh -n pt -s -p portuguese -v -C'Snowball stemmer for Portuguese'
+
+      Note, that argument for -p option should be *the same* as name of stemming
+      function in stem.c (without _stem)
+
+      A bunch of files will be generated and placed in PGSQL_SRC/contrib/dict_pt
+      directory.
+
+   3. Compile and install dictionary
+
+   cd PGSQL_SRC/contrib/dict_pt
+   make
+   make install
+
+   4. Test it 
+
+   Sample portuguese words with the stemmed forms are available
+        from http://snowball.tartarus.org/portuguese/stemmer.html
+
+   createdb testdict
+   psql testdict < /usr/local/pgsql/share/contrib/tsearch2.sql
+   psql testdict < /usr/local/pgsql/share/contrib/dict_pt.sql
+   psql -d testdict -c "select lexize('pt','bobagem');"
+    lexize  
+   ---------
+    {bobag}
+   (1 row)
+
+   Here is what I have in pg_ts_dict table
+
+   psql -d testdict -c "select * from pg_ts_dict where dict_name='pt';"
+    dict_name | dict_init | dict_initoption | dict_lexize |          dict_comment           
+   -----------+-----------+-----------------+-------------+---------------------------------
+    pt        |   7177806 |                 |     7159330 | Snowball stemmer for Portuguese
+   (1 row)
+
+ 
+        Note, that you have already installed dictionary and corresponding
+   entry in tsearch configuration and you may modify it using
+   plain SQL commands, for example, specify stop words.
+
+Example 2:
+
+      a) Simple template dictionary with init method 
+
+       ./config.sh -n wow -v -i -C WOW
+
+      b) Create simple template dict (without init method):
+   ./config.sh -n wow -v  -C WOW
+
+        The same as above, but dictionary will have not init method
+
+       Dictionaries obtained in a) and b) are fully working and ready
+       for use: 
+     a) lowercase input word and remove it if it is a stop word
+     b) recognizes any word
+
+      c) Simple template dictionary with source files (with init method):
+
+       ./config.sh -n wow -v -i -c a.c -h a.h -C WOW
+
+        Source files ( a.c ) must be placed in contrib/tsearch2/gendict directory.
+        These files will be used in Makefile.
+
+        Header files ( a.h ), must be placed in contrib/tsearch2/gendict directory.
+        These files will be used in Makefile and subinclude.h
+
+      d) Simple template dictionary with source files (without init method):
+
+   ./config.sh -n wow -v  -c a.c -h a.h -C WOW
+
+   The same as above, but dictionary will have not init method
+
+       After that you have sources in PGSQL_SRC/contrib/dict_wow and
+       you may edit them to create actual dictionary.
+
+  Please, check Tsearch2 home page (http://www.sai.msu.su/~megera/postgres/gist/tsearch/V2/)
+  for additional information about "Gendict tutorial" and dictionaries.
\ No newline at end of file


diff --git a/contrib/tsearch2/gendict/config.sh b/contrib/tsearch2/gendict/config.sh

new file mode 100755 (executable)

index 0000000..26bb542


--- /dev/null
+++ b/contrib/tsearch2/gendict/config.sh
@@ -0,0 +1,183 @@
+#!/bin/sh
+
+usage () {
+   echo Usage:
+   echo $0 -n DICTNAME  \( [ -s [ -p PREFIX ] ] \| [ -c CFILES ] [ -h HFILES ] [ -i ] \) [ -v ] [ -d DIR ] [ -C COMMENT ]
+   echo '    -v - be verbose'
+   echo '    -d DIR - name of directory in PGSQL_SRL/contrib (default dict_DICTNAME)'
+   echo '    -C COMMENT - dictionary comment' 
+   echo Generate Snowball stemmer:
+   echo $0 -n DICTNAME -s [ -p PREFIX ] [ -v ] [ -d DIR ] [ -C COMMENT ]
+   echo '    -s - generate Snowball wrapper'
+   echo "    -p - prefix of Snowball's function, (default DICTNAME)" 
+   echo Generate template dictionary:
+   echo $0 -n DICTNAME [ -c CFILES ] [ -h HFILES ] [ -i ] [ -v ] [ -d DIR ] [ -C COMMENT ]
+   echo '    -c CFILES - source files, must be placed in contrib/tsearch2/gendict directory.'
+   echo '                These files will be used in Makefile.'
+   echo '    -h HFILES - header files, must be placed in contrib/tsearch2/gendict directory.'
+   echo '                These files will be used in Makefile and subinclude.h'
+   echo '    -i - dictionary has init method'
+   exit 1;
+}
+
+dictname=
+stemmode=no
+verbose=no
+cfile=
+hfile=
+dir= 
+hasinit=no
+comment=
+prefix=
+
+while getopts n:c:C:h:d:p:vis opt
+do
+   case "$opt" in
+       v) verbose=yes;;
+       s) stemmode=yes;;
+       i) hasinit=yes;;
+       n) dictname="$OPTARG";;
+       c) cfile="$OPTARG";;
+       h) hfile="$OPTARG";;
+       d) dir="$OPTARG";;
+       C) comment="$OPTARG";;
+       p) prefix="$OPTARG";;
+       \?) usage;;
+   esac
+done
+
+[ ${#dictname} -eq 0 ] && usage
+
+dictname=`echo $dictname | tr '[:upper:]' '[:lower:]'`
+
+if [ $stemmode = "yes" ] ; then 
+   [ ${#prefix} -eq 0 ] && prefix=$dictname
+   hasinit=yes
+   cfile="stem.c"
+   hfile="stem.h"
+fi 
+
+[ ${#dir}   -eq 0 ] && dir="dict_$dictname"
+
+if [ ${#comment} -eq 0 ]; then
+   comment=null
+else
+   comment="'$comment'"
+fi
+
+ofile=
+for f in $cfile
+do
+   f=` echo $f | sed 's#c$#o#'`
+   ofile="$ofile $f"
+done
+
+if [ $stemmode = "yes" ] ; then
+   ofile="$ofile dict_snowball.o"
+else
+   ofile="$ofile dict_tmpl.o"
+fi
+
+if [ $verbose = "yes" ]; then
+   echo Dictname: "'"$dictname"'"
+   echo Snowball stemmer: $stemmode
+   echo Has init method: $hasinit
+   [ $stemmode = "yes" ] && echo Function prefix: $prefix 
+   echo Source files: $cfile
+   echo Header files: $hfile
+   echo Object files: $ofile
+   echo Comment: $comment
+   echo Directory: ../../$dir
+fi
+
+
+[ $verbose = "yes" ] && echo -n 'Build directory...  '
+if [ ! -d ../../$dir ]; then
+   if ! mkdir ../../$dir ; then 
+       echo "Can't create directory ../../$dir"
+       exit 1
+   fi 
+fi
+[ $verbose = "yes" ] && echo ok
+
+
+[ $verbose = "yes" ] && echo -n 'Build Makefile...  '
+sed s#CFG_DIR#$dir# < Makefile.IN | sed s#CFG_MODNAME#$dictname# | sed "s#CFG_OFILE#$ofile#" > ../../$dir/Makefile.tmp
+if [ $stemmode = "yes" ] ; then
+   sed "s#^PG_CPPFLAGS.*\$#PG_CPPFLAGS = -I../tsearch2/snowball -I../tsearch2#" < ../../$dir/Makefile.tmp >  ../../$dir/Makefile 
+else
+   sed "s#^PG_CPPFLAGS.*\$#PG_CPPFLAGS = -I../tsearch2#" < ../../$dir/Makefile.tmp >  ../../$dir/Makefile 
+fi
+rm ../../$dir/Makefile.tmp
+[ $verbose = "yes" ] && echo ok
+
+
+[ $verbose = "yes" ] && echo -n Build dict_$dictname'.sql.in...  '
+if [ $hasinit = "yes" ]; then
+   sed s#CFG_MODNAME#$dictname# < sql.IN | sed "s#CFG_COMMENT#$comment#" | sed s#^HASINIT## | sed 's#^NOINIT.*$##' > ../../$dir/dict_$dictname.sql.in.tmp
+   if [ $stemmode = "yes" ] ; then
+       sed s#^ISSNOWBALL## < ../../$dir/dict_$dictname.sql.in.tmp | sed s#^NOSNOWBALL.*\$## > ../../$dir/dict_$dictname.sql.in
+   else
+       sed s#^NOSNOWBALL## < ../../$dir/dict_$dictname.sql.in.tmp | sed s#^ISSNOWBALL.*\$## > ../../$dir/dict_$dictname.sql.in
+   fi
+   rm ../../$dir/dict_$dictname.sql.in.tmp 
+else 
+   sed s#CFG_MODNAME#$dictname# < sql.IN | sed "s#CFG_COMMENT#$comment#" | sed s#^NOINIT## | sed 's#^HASINIT.*$##' | sed s#^NOSNOWBALL## | sed s#^ISSNOWBALL.*\$## > ../../$dir/dict_$dictname.sql.in
+fi
+[ $verbose = "yes" ] && echo ok
+
+
+
+if [ ${#cfile} -ne 0 ] || [ ${#hfile} -ne 0 ] ; then
+   [ $verbose = "yes" ] && echo -n 'Copy source and header files...  '
+   if [ ${#cfile} -ne 0 ] ; then
+       if ! cp $cfile ../../$dir ; then 
+           echo "Cant cp all or one of files: $cfile"
+           exit 1
+       fi
+   fi
+   if [ ${#hfile} -ne 0 ] ; then 
+       if ! cp $hfile ../../$dir ; then 
+               echo "Cant cp all or one of files: $hfile"
+           exit 1
+       fi
+   fi
+   [ $verbose = "yes" ] && echo ok
+fi
+
+
+[ $verbose = "yes" ] && echo -n 'Build sub-include header...  '
+echo -n > ../../$dir/subinclude.h 
+for i in $hfile
+do
+   echo "#include \"$i\"" >> ../../$dir/subinclude.h
+done
+[ $verbose = "yes" ] && echo ok
+
+
+if  [ $stemmode = "yes" ] ; then 
+   [ $verbose = "yes" ] && echo -n 'Build Snowball stemmer...  '
+   sed s#CFG_MODNAME#$dictname#g < dict_snowball.c.IN | sed s#CFG_PREFIX#$prefix#g > ../../$dir/dict_snowball.c
+else
+   [ $verbose = "yes" ] && echo -n 'Build dictinonary...  '
+   sed s#CFG_MODNAME#$dictname#g < dict_tmpl.c.IN > ../../$dir/dict_tmpl.c.tmp
+   if [ $hasinit = "yes" ]; then
+       sed s#^HASINIT## <  ../../$dir/dict_tmpl.c.tmp | sed 's#^NOINIT.*$##' > ../../$dir/dict_tmpl.c
+   else 
+       sed s#^HASINIT.*\$## <  ../../$dir/dict_tmpl.c.tmp | sed 's#^NOINIT##' > ../../$dir/dict_tmpl.c
+   fi
+   rm ../../$dir/dict_tmpl.c.tmp
+fi 
+[ $verbose = "yes" ] && echo ok
+
+
+[ $verbose = "yes" ] && echo -n "Build README.$dictname...  "
+if  [ $stemmode = "yes" ] ; then
+   echo "Autogenerated Snowball's wrapper for $prefix" > ../../$dir/README.$dictname
+else
+   echo "Autogenerated template for $dictname" > ../../$dir/README.$dictname
+fi
+[ $verbose = "yes" ] && echo ok
+
+echo All is done
+


diff --git a/contrib/tsearch2/gendict/dict_snowball.c.IN b/contrib/tsearch2/gendict/dict_snowball.c.IN

new file mode 100644 (file)

index 0000000..10ef6f1


--- /dev/null
+++ b/contrib/tsearch2/gendict/dict_snowball.c.IN
@@ -0,0 +1,52 @@
+/* 
+ * example of Snowball dictionary
+ * http://snowball.tartarus.org/ 
+ * Teodor Sigaev 
+ */
+#include 
+#include 
+
+#include "postgres.h"
+
+#include "dict.h"
+#include "common.h"
+#include "snowball/header.h"
+#include "subinclude.h"
+
+typedef struct {
+   struct SN_env *z;
+   StopList    stoplist;
+   int (*stem)(struct SN_env * z);
+} DictSnowball;
+
+
+PG_FUNCTION_INFO_V1(dinit_CFG_MODNAME);
+Datum dinit_CFG_MODNAME(PG_FUNCTION_ARGS);
+
+Datum 
+dinit_CFG_MODNAME(PG_FUNCTION_ARGS) {
+   DictSnowball    *d = (DictSnowball*)malloc( sizeof(DictSnowball) );
+
+   if ( !d )
+       elog(ERROR, "No memory");
+   memset(d,0,sizeof(DictSnowball));
+   d->stoplist.wordop=lowerstr;
+       
+   if ( !PG_ARGISNULL(0) && PG_GETARG_POINTER(0)!=NULL ) {
+       text       *in = PG_GETARG_TEXT_P(0);
+       readstoplist(in, &(d->stoplist));
+       sortstoplist(&(d->stoplist));
+       PG_FREE_IF_COPY(in, 0);
+   }
+
+   d->z = CFG_PREFIX_create_env();
+   if (!d->z) {
+       freestoplist(&(d->stoplist));
+       elog(ERROR,"No memory");
+   }
+   d->stem=CFG_PREFIX_stem;
+
+   PG_RETURN_POINTER(d);
+}
+
+


diff --git a/contrib/tsearch2/gendict/dict_tmpl.c.IN b/contrib/tsearch2/gendict/dict_tmpl.c.IN

new file mode 100644 (file)

index 0000000..10c0381


--- /dev/null
+++ b/contrib/tsearch2/gendict/dict_tmpl.c.IN
@@ -0,0 +1,64 @@
+/* 
+ * example of dictionary 
+ * Teodor Sigaev 
+ */
+#include 
+#include 
+#include 
+
+#include "postgres.h"
+
+#include "dict.h"
+#include "common.h"
+
+#include "subinclude.h"
+
+HASINIT typedef struct {
+HASINIT    StopList    stoplist;
+HASINIT } DictExample;
+
+
+HASINIT PG_FUNCTION_INFO_V1(dinit_CFG_MODNAME);
+HASINIT Datum dinit_CFG_MODNAME(PG_FUNCTION_ARGS);
+
+HASINIT Datum 
+HASINIT dinit_CFG_MODNAME(PG_FUNCTION_ARGS) {
+HASINIT    DictExample *d = (DictExample*)malloc( sizeof(DictExample) );
+HASINIT 
+HASINIT    if ( !d )
+HASINIT        elog(ERROR, "No memory");
+HASINIT    memset(d,0,sizeof(DictExample));
+HASINIT 
+HASINIT    d->stoplist.wordop=lowerstr;
+HASINIT    
+HASINIT    /* Your INIT code */
+HASINIT    
+HASINIT    if ( !PG_ARGISNULL(0) && PG_GETARG_POINTER(0)!=NULL ) {
+HASINIT        text       *in = PG_GETARG_TEXT_P(0);
+HASINIT        readstoplist(in, &(d->stoplist));
+HASINIT        sortstoplist(&(d->stoplist));
+HASINIT        PG_FREE_IF_COPY(in, 0);
+HASINIT    }
+HASINIT 
+HASINIT    PG_RETURN_POINTER(d);
+HASINIT }
+
+PG_FUNCTION_INFO_V1(dlexize_CFG_MODNAME);
+Datum dlexize_CFG_MODNAME(PG_FUNCTION_ARGS);
+Datum
+dlexize_CFG_MODNAME(PG_FUNCTION_ARGS) {
+HASINIT    DictExample *d = (DictExample*)PG_GETARG_POINTER(0);
+   char       *in = (char*)PG_GETARG_POINTER(1);
+   char *txt = pnstrdup(in, PG_GETARG_INT32(2));
+   char    **res=palloc(sizeof(char*)*2);
+
+   /* Your INIT dictionary code */
+HASINIT    if ( *txt=='\0' || searchstoplist(&(d->stoplist),txt) ) {
+HASINIT        pfree(txt);
+HASINIT        res[0]=NULL;
+HASINIT    } else 
+       res[0]=txt;
+   res[1]=NULL;
+
+   PG_RETURN_POINTER(res);
+}


diff --git a/contrib/tsearch2/gendict/sql.IN b/contrib/tsearch2/gendict/sql.IN

new file mode 100644 (file)

index 0000000..ff0d842


--- /dev/null
+++ b/contrib/tsearch2/gendict/sql.IN
@@ -0,0 +1,26 @@
+SET search_path = public;
+BEGIN;
+
+HASINIT create function dinit_CFG_MODNAME(text)
+HASINIT         returns internal
+HASINIT         as 'MODULE_PATHNAME'
+HASINIT         language 'C';
+
+NOSNOWBALL create function dlexize_CFG_MODNAME(internal,internal,int4)
+NOSNOWBALL        returns internal
+NOSNOWBALL        as 'MODULE_PATHNAME'
+NOSNOWBALL        language 'C'
+NOSNOWBALL        with (isstrict);
+
+insert into pg_ts_dict select
+        'CFG_MODNAME',
+HASINIT        (select oid from pg_proc where proname='dinit_CFG_MODNAME'),
+NOINIT        null,
+        null,
+ISSNOWBALL        (select oid from pg_proc where proname='snb_lexize'),
+NOSNOWBALL        (select oid from pg_proc where proname='dlexize_CFG_MODNAME'),
+        CFG_COMMENT
+;
+
+
+END;


diff --git a/contrib/tsearch2/gistidx.c b/contrib/tsearch2/gistidx.c

new file mode 100644 (file)

index 0000000..5a34f74


--- /dev/null
+++ b/contrib/tsearch2/gistidx.c
@@ -0,0 +1,686 @@
+#include "postgres.h"
+
+#include 
+
+#include "access/gist.h"
+#include "access/itup.h"
+#include "access/rtree.h"
+#include "utils/elog.h"
+#include "utils/palloc.h"
+#include "utils/array.h"
+#include "utils/builtins.h"
+#include "storage/bufpage.h"
+#include "access/tuptoaster.h"
+
+#include "tsvector.h"
+#include "query.h"
+#include "gistidx.h"
+#include "crc32.h"
+
+PG_FUNCTION_INFO_V1(gtsvector_in);
+Datum      gtsvector_in(PG_FUNCTION_ARGS);
+
+PG_FUNCTION_INFO_V1(gtsvector_out);
+Datum      gtsvector_out(PG_FUNCTION_ARGS);
+
+PG_FUNCTION_INFO_V1(gtsvector_compress);
+Datum      gtsvector_compress(PG_FUNCTION_ARGS);
+
+PG_FUNCTION_INFO_V1(gtsvector_decompress);
+Datum      gtsvector_decompress(PG_FUNCTION_ARGS);
+
+PG_FUNCTION_INFO_V1(gtsvector_consistent);
+Datum      gtsvector_consistent(PG_FUNCTION_ARGS);
+
+PG_FUNCTION_INFO_V1(gtsvector_union);
+Datum      gtsvector_union(PG_FUNCTION_ARGS);
+
+PG_FUNCTION_INFO_V1(gtsvector_same);
+Datum      gtsvector_same(PG_FUNCTION_ARGS);
+
+PG_FUNCTION_INFO_V1(gtsvector_penalty);
+Datum      gtsvector_penalty(PG_FUNCTION_ARGS);
+
+PG_FUNCTION_INFO_V1(gtsvector_picksplit);
+Datum      gtsvector_picksplit(PG_FUNCTION_ARGS);
+
+#define GETENTRY(vec,pos) ((GISTTYPE *) DatumGetPointer(((GISTENTRY *) VARDATA(vec))[(pos)].key))
+#define SUMBIT(val) (       \
+   GETBITBYTE(val,0) + \
+   GETBITBYTE(val,1) + \
+   GETBITBYTE(val,2) + \
+   GETBITBYTE(val,3) + \
+   GETBITBYTE(val,4) + \
+   GETBITBYTE(val,5) + \
+   GETBITBYTE(val,6) + \
+   GETBITBYTE(val,7)   \
+)
+
+
+Datum
+gtsvector_in(PG_FUNCTION_ARGS)
+{
+   elog(ERROR, "Not implemented");
+   PG_RETURN_DATUM(0);
+}
+
+Datum
+gtsvector_out(PG_FUNCTION_ARGS)
+{
+   elog(ERROR, "Not implemented");
+   PG_RETURN_DATUM(0);
+}
+
+static int
+compareint(const void *a, const void *b)
+{
+   if (*((int4 *) a) == *((int4 *) b))
+       return 0;
+   return (*((int4 *) a) > *((int4 *) b)) ? 1 : -1;
+}
+
+static int
+uniqueint(int4 *a, int4 l)
+{
+   int4       *ptr,
+              *res;
+
+   if (l == 1)
+       return l;
+
+   ptr = res = a;
+
+   qsort((void *) a, l, sizeof(int4), compareint);
+
+   while (ptr - a < l)
+       if (*ptr != *res)
+           *(++res) = *ptr++;
+       else
+           ptr++;
+   return res + 1 - a;
+}
+
+static void
+makesign(BITVECP sign, GISTTYPE * a)
+{
+   int4        k,
+               len = ARRNELEM(a);
+   int4       *ptr = GETARR(a);
+
+   MemSet((void *) sign, 0, sizeof(BITVEC));
+   for (k = 0; k < len; k++)
+       HASH(sign, ptr[k]);
+}
+
+Datum
+gtsvector_compress(PG_FUNCTION_ARGS)
+{
+   GISTENTRY  *entry = (GISTENTRY *) PG_GETARG_POINTER(0);
+   GISTENTRY  *retval = entry;
+
+   if (entry->leafkey)
+   {                           /* tsvector */
+       GISTTYPE   *res;
+       tsvector       *toastedval = (tsvector *) DatumGetPointer(entry->key);
+       tsvector       *val = (tsvector *) DatumGetPointer(PG_DETOAST_DATUM(entry->key));
+       int4        len;
+       int4       *arr;
+       WordEntry  *ptr = ARRPTR(val);
+       char       *words = STRPTR(val);
+
+       len = CALCGTSIZE(ARRKEY, val->size);
+       res = (GISTTYPE *) palloc(len);
+       res->len = len;
+       res->flag = ARRKEY;
+       arr = GETARR(res);
+       len = val->size;
+       while (len--)
+       {
+           *arr = crc32_sz((uint8 *) &words[ptr->pos], ptr->len);
+           arr++;
+           ptr++;
+       }
+
+       len = uniqueint(GETARR(res), val->size);
+       if (len != val->size)
+       {
+           /*
+            * there is a collision of hash-function; len is always less
+            * than val->size
+            */
+           len = CALCGTSIZE(ARRKEY, len);
+           res = (GISTTYPE *) repalloc((void *) res, len);
+           res->len = len;
+       }
+       if (val != toastedval)
+           pfree(val);
+
+       /* make signature, if array is too long */
+       if (res->len > TOAST_INDEX_TARGET)
+       {
+           GISTTYPE   *ressign;
+
+           len = CALCGTSIZE(SIGNKEY, 0);
+           ressign = (GISTTYPE *) palloc(len);
+           ressign->len = len;
+           ressign->flag = SIGNKEY;
+           makesign(GETSIGN(ressign), res);
+           pfree(res);
+           res = ressign;
+       }
+
+       retval = (GISTENTRY *) palloc(sizeof(GISTENTRY));
+       gistentryinit(*retval, PointerGetDatum(res),
+                     entry->rel, entry->page,
+                     entry->offset, res->len, FALSE);
+   }
+   else if (ISSIGNKEY(DatumGetPointer(entry->key)) &&
+            !ISALLTRUE(DatumGetPointer(entry->key)))
+   {
+       int4        i,
+                   len;
+       GISTTYPE   *res;
+       BITVECP     sign = GETSIGN(DatumGetPointer(entry->key));
+
+       LOOPBYTE(
+                if ((sign[i] & 0xff) != 0xff)
+                PG_RETURN_POINTER(retval);
+       );
+
+       len = CALCGTSIZE(SIGNKEY | ALLISTRUE, 0);
+       res = (GISTTYPE *) palloc(len);
+       res->len = len;
+       res->flag = SIGNKEY | ALLISTRUE;
+
+       retval = (GISTENTRY *) palloc(sizeof(GISTENTRY));
+       gistentryinit(*retval, PointerGetDatum(res),
+                     entry->rel, entry->page,
+                     entry->offset, res->len, FALSE);
+   }
+   PG_RETURN_POINTER(retval);
+}
+
+Datum
+gtsvector_decompress(PG_FUNCTION_ARGS)
+{
+   GISTENTRY  *entry = (GISTENTRY *) PG_GETARG_POINTER(0);
+   GISTTYPE   *key = (GISTTYPE *) DatumGetPointer(PG_DETOAST_DATUM(entry->key));
+
+   if (key != (GISTTYPE *) DatumGetPointer(entry->key))
+   {
+       GISTENTRY  *retval = (GISTENTRY *) palloc(sizeof(GISTENTRY));
+
+       gistentryinit(*retval, PointerGetDatum(key),
+                     entry->rel, entry->page,
+                     entry->offset, key->len, FALSE);
+
+       PG_RETURN_POINTER(retval);
+   }
+
+   PG_RETURN_POINTER(entry);
+}
+
+typedef struct
+{
+   int4       *arrb;
+   int4       *arre;
+}  CHKVAL;
+
+/*
+ * is there value 'val' in array or not ?
+ */
+static bool
+checkcondition_arr(void *checkval, ITEM * val)
+{
+   int4       *StopLow = ((CHKVAL *) checkval)->arrb;
+   int4       *StopHigh = ((CHKVAL *) checkval)->arre;
+   int4       *StopMiddle;
+
+   /* Loop invariant: StopLow <= val < StopHigh */
+
+   while (StopLow < StopHigh)
+   {
+       StopMiddle = StopLow + (StopHigh - StopLow) / 2;
+       if (*StopMiddle == val->val)
+           return (true);
+       else if (*StopMiddle < val->val)
+           StopLow = StopMiddle + 1;
+       else
+           StopHigh = StopMiddle;
+   }
+
+   return (false);
+}
+
+static bool
+checkcondition_bit(void *checkval, ITEM * val)
+{
+   return GETBIT(checkval, HASHVAL(val->val));
+}
+
+Datum
+gtsvector_consistent(PG_FUNCTION_ARGS)
+{
+   QUERYTYPE  *query = (QUERYTYPE *) PG_GETARG_POINTER(1);
+   GISTTYPE   *key = (GISTTYPE *) DatumGetPointer(
+                               ((GISTENTRY *) PG_GETARG_POINTER(0))->key
+   );
+
+   if (!query->size)
+       PG_RETURN_BOOL(false);
+
+   if (ISSIGNKEY(key))
+   {
+       if (ISALLTRUE(key))
+           PG_RETURN_BOOL(true);
+
+       PG_RETURN_BOOL(TS_execute(
+                              GETQUERY(query),
+                              (void *) GETSIGN(key), false,
+                              checkcondition_bit
+                              ));
+   }
+   else
+   {                           /* only leaf pages */
+       CHKVAL      chkval;
+
+       chkval.arrb = GETARR(key);
+       chkval.arre = chkval.arrb + ARRNELEM(key);
+       PG_RETURN_BOOL(TS_execute(
+                              GETQUERY(query),
+                              (void *) &chkval, true,
+                              checkcondition_arr
+                              ));
+   }
+}
+
+static int4
+unionkey(BITVECP sbase, GISTTYPE * add)
+{
+   int4        i;
+
+   if (ISSIGNKEY(add))
+   {
+       BITVECP     sadd = GETSIGN(add);
+
+       if (ISALLTRUE(add))
+           return 1;
+
+       LOOPBYTE(
+                sbase[i] |= sadd[i];
+       );
+   }
+   else
+   {
+       int4       *ptr = GETARR(add);
+
+       for (i = 0; i < ARRNELEM(add); i++)
+           HASH(sbase, ptr[i]);
+   }
+   return 0;
+}
+
+
+Datum
+gtsvector_union(PG_FUNCTION_ARGS)
+{
+   bytea      *entryvec = (bytea *) PG_GETARG_POINTER(0);
+   int        *size = (int *) PG_GETARG_POINTER(1);
+   BITVEC      base;
+   int4        len = (VARSIZE(entryvec) - VARHDRSZ) / sizeof(GISTENTRY);
+   int4        i;
+   int4        flag = 0;
+   GISTTYPE   *result;
+
+   MemSet((void *) base, 0, sizeof(BITVEC));
+   for (i = 0; i < len; i++)
+   {
+       if (unionkey(base, GETENTRY(entryvec, i)))
+       {
+           flag = ALLISTRUE;
+           break;
+       }
+   }
+
+   flag |= SIGNKEY;
+   len = CALCGTSIZE(flag, 0);
+   result = (GISTTYPE *) palloc(len);
+   *size = result->len = len;
+   result->flag = flag;
+   if (!ISALLTRUE(result))
+       memcpy((void *) GETSIGN(result), (void *) base, sizeof(BITVEC));
+
+   PG_RETURN_POINTER(result);
+}
+
+Datum
+gtsvector_same(PG_FUNCTION_ARGS)
+{
+   GISTTYPE   *a = (GISTTYPE *) PG_GETARG_POINTER(0);
+   GISTTYPE   *b = (GISTTYPE *) PG_GETARG_POINTER(1);
+   bool       *result = (bool *) PG_GETARG_POINTER(2);
+
+   if (ISSIGNKEY(a))
+   {                           /* then b also ISSIGNKEY */
+       if (ISALLTRUE(a) && ISALLTRUE(b))
+           *result = true;
+       else if (ISALLTRUE(a))
+           *result = false;
+       else if (ISALLTRUE(b))
+           *result = false;
+       else
+       {
+           int4        i;
+           BITVECP     sa = GETSIGN(a),
+                       sb = GETSIGN(b);
+
+           *result = true;
+           LOOPBYTE(
+                    if (sa[i] != sb[i])
+                    {
+               *result = false;
+               break;
+           }
+           );
+       }
+   }
+   else
+   {                           /* a and b ISARRKEY */
+       int4        lena = ARRNELEM(a),
+                   lenb = ARRNELEM(b);
+
+       if (lena != lenb)
+           *result = false;
+       else
+       {
+           int4       *ptra = GETARR(a),
+                      *ptrb = GETARR(b);
+           int4        i;
+
+           *result = true;
+           for (i = 0; i < lena; i++)
+               if (ptra[i] != ptrb[i])
+               {
+                   *result = false;
+                   break;
+               }
+       }
+   }
+
+   PG_RETURN_POINTER(result);
+}
+
+static int4
+sizebitvec(BITVECP sign)
+{
+   int4        size = 0,
+               i;
+
+   LOOPBYTE(
+       size += SUMBIT(*(char *) sign);
+       sign = (BITVECP) (((char *) sign) + 1);
+   );
+   return size;
+}
+
+static int
+hemdistsign(BITVECP  a, BITVECP b) {
+   int i,dist=0;
+
+   LOOPBIT(
+       if ( GETBIT(a,i) != GETBIT(b,i) )
+           dist++;
+   );
+   return dist;
+}
+
+static int
+hemdist(GISTTYPE   *a, GISTTYPE   *b) {
+   if ( ISALLTRUE(a) ) {
+       if (ISALLTRUE(b))
+           return 0;
+       else
+           return SIGLENBIT-sizebitvec(GETSIGN(b));
+   } else if (ISALLTRUE(b))
+       return SIGLENBIT-sizebitvec(GETSIGN(a));
+
+   return hemdistsign( GETSIGN(a), GETSIGN(b) );
+}
+
+Datum
+gtsvector_penalty(PG_FUNCTION_ARGS)
+{
+   GISTENTRY  *origentry = (GISTENTRY *) PG_GETARG_POINTER(0); /* always ISSIGNKEY */
+   GISTENTRY  *newentry = (GISTENTRY *) PG_GETARG_POINTER(1);
+   float      *penalty = (float *) PG_GETARG_POINTER(2);
+   GISTTYPE   *origval = (GISTTYPE *) DatumGetPointer(origentry->key);
+   GISTTYPE   *newval = (GISTTYPE *) DatumGetPointer(newentry->key);
+   BITVECP     orig = GETSIGN(origval);
+
+   *penalty = 0.0;
+
+   if (ISARRKEY(newval)) {
+       BITVEC sign;
+       makesign(sign, newval);
+
+       if ( ISALLTRUE(origval) ) 
+           *penalty=((float)(SIGLENBIT-sizebitvec(sign)))/(float)(SIGLENBIT+1);
+       else 
+           *penalty=hemdistsign(sign,orig);
+   } else {
+       *penalty=hemdist(origval,newval);
+   }
+   PG_RETURN_POINTER(penalty);
+}
+
+typedef struct
+{
+   bool        allistrue;
+   BITVEC      sign;
+}  CACHESIGN;
+
+static void
+fillcache(CACHESIGN * item, GISTTYPE * key)
+{
+   item->allistrue = false;
+   if (ISARRKEY(key))
+       makesign(item->sign, key);
+   else if (ISALLTRUE(key))
+       item->allistrue = true;
+   else
+       memcpy((void *) item->sign, (void *) GETSIGN(key), sizeof(BITVEC));
+}
+
+#define WISH_F(a,b,c) (double)( -(double)(((a)-(b))*((a)-(b))*((a)-(b)))*(c) )
+typedef struct
+{
+   OffsetNumber pos;
+   int4        cost;
+} SPLITCOST;
+
+static int
+comparecost(const void *a, const void *b)
+{
+   if (((SPLITCOST *) a)->cost == ((SPLITCOST *) b)->cost)
+       return 0;
+   else
+       return (((SPLITCOST *) a)->cost > ((SPLITCOST *) b)->cost) ? 1 : -1;
+}
+
+
+static int
+hemdistcache(CACHESIGN   *a, CACHESIGN   *b) {
+   if ( a->allistrue ) {
+       if (b->allistrue)
+           return 0;
+       else
+           return SIGLENBIT-sizebitvec(b->sign);
+   } else if (b->allistrue)
+       return SIGLENBIT-sizebitvec(a->sign);
+
+   return hemdistsign( a->sign, b->sign );
+}
+
+Datum
+gtsvector_picksplit(PG_FUNCTION_ARGS)
+{
+   bytea      *entryvec = (bytea *) PG_GETARG_POINTER(0);
+   GIST_SPLITVEC *v = (GIST_SPLITVEC *) PG_GETARG_POINTER(1);
+   OffsetNumber k,
+               j;
+   GISTTYPE   *datum_l,
+              *datum_r;
+   BITVECP     union_l,
+               union_r;
+   int4        size_alpha,
+               size_beta;
+   int4        size_waste,
+               waste = -1;
+   int4        nbytes;
+   OffsetNumber seed_1 = 0,
+               seed_2 = 0;
+   OffsetNumber *left,
+              *right;
+   OffsetNumber maxoff;
+   BITVECP     ptr;
+   int         i;
+   CACHESIGN  *cache;
+   SPLITCOST  *costvector;
+
+   maxoff = ((VARSIZE(entryvec) - VARHDRSZ) / sizeof(GISTENTRY)) - 2;
+   nbytes = (maxoff + 2) * sizeof(OffsetNumber);
+   v->spl_left = (OffsetNumber *) palloc(nbytes);
+   v->spl_right = (OffsetNumber *) palloc(nbytes);
+
+   cache = (CACHESIGN *) palloc(sizeof(CACHESIGN) * (maxoff + 2));
+   fillcache(&cache[FirstOffsetNumber], GETENTRY(entryvec, FirstOffsetNumber));
+
+   for (k = FirstOffsetNumber; k < maxoff; k = OffsetNumberNext(k)) {
+       for (j = OffsetNumberNext(k); j <= maxoff; j = OffsetNumberNext(j)) {
+           if (k == FirstOffsetNumber)
+               fillcache(&cache[j], GETENTRY(entryvec, j));
+
+           size_waste=hemdistcache(&(cache[j]),&(cache[k]));
+           if (size_waste > waste) {
+               waste = size_waste;
+               seed_1 = k;
+               seed_2 = j;
+           }
+       }
+   }
+
+   left = v->spl_left;
+   v->spl_nleft = 0;
+   right = v->spl_right;
+   v->spl_nright = 0;
+
+   if (seed_1 == 0 || seed_2 == 0) {
+       seed_1 = 1;
+       seed_2 = 2;
+   }
+
+   /* form initial .. */
+   if (cache[seed_1].allistrue) {
+       datum_l = (GISTTYPE *) palloc(CALCGTSIZE(SIGNKEY | ALLISTRUE, 0));
+       datum_l->len = CALCGTSIZE(SIGNKEY | ALLISTRUE, 0);
+       datum_l->flag = SIGNKEY | ALLISTRUE;
+   } else {
+       datum_l = (GISTTYPE *) palloc(CALCGTSIZE(SIGNKEY, 0));
+       datum_l->len = CALCGTSIZE(SIGNKEY, 0);
+       datum_l->flag = SIGNKEY;
+       memcpy((void *) GETSIGN(datum_l), (void *) cache[seed_1].sign, sizeof(BITVEC));
+   }
+   if (cache[seed_2].allistrue) {
+       datum_r = (GISTTYPE *) palloc(CALCGTSIZE(SIGNKEY | ALLISTRUE, 0));
+       datum_r->len = CALCGTSIZE(SIGNKEY | ALLISTRUE, 0);
+       datum_r->flag = SIGNKEY | ALLISTRUE;
+   } else {
+       datum_r = (GISTTYPE *) palloc(CALCGTSIZE(SIGNKEY, 0));
+       datum_r->len = CALCGTSIZE(SIGNKEY, 0);
+       datum_r->flag = SIGNKEY;
+       memcpy((void *) GETSIGN(datum_r), (void *) cache[seed_2].sign, sizeof(BITVEC));
+   }
+
+   union_l=GETSIGN(datum_l);
+   union_r=GETSIGN(datum_r);
+   maxoff = OffsetNumberNext(maxoff);
+   fillcache(&cache[maxoff], GETENTRY(entryvec, maxoff));
+   /* sort before ... */
+   costvector = (SPLITCOST *) palloc(sizeof(SPLITCOST) * maxoff);
+   for (j = FirstOffsetNumber; j <= maxoff; j = OffsetNumberNext(j)) {
+       costvector[j - 1].pos = j;
+       size_alpha = hemdistcache(&(cache[seed_1]), &(cache[j]));
+       size_beta  = hemdistcache(&(cache[seed_2]), &(cache[j]));
+       costvector[j - 1].cost = abs(size_alpha - size_beta);
+   }
+   qsort((void *) costvector, maxoff, sizeof(SPLITCOST), comparecost);
+
+   for (k = 0; k < maxoff; k++) {
+       j = costvector[k].pos;
+       if (j == seed_1) {
+           *left++ = j;
+           v->spl_nleft++;
+           continue;
+       } else if (j == seed_2) {
+           *right++ = j;
+           v->spl_nright++;
+           continue;
+       }
+
+       if (ISALLTRUE(datum_l) || cache[j].allistrue) {
+           if ( ISALLTRUE(datum_l) && cache[j].allistrue )
+               size_alpha=0;
+           else
+               size_alpha = SIGLENBIT-sizebitvec(  
+                   ( cache[j].allistrue ) ? GETSIGN(datum_l) : GETSIGN(cache[j].sign)  
+               );
+       } else {
+           size_alpha=hemdistsign(cache[j].sign,GETSIGN(datum_l));
+       }
+
+       if (ISALLTRUE(datum_r) || cache[j].allistrue) {
+           if ( ISALLTRUE(datum_r) && cache[j].allistrue )
+               size_beta=0;
+           else
+               size_beta = SIGLENBIT-sizebitvec(  
+                   ( cache[j].allistrue ) ? GETSIGN(datum_r) : GETSIGN(cache[j].sign)  
+               );
+       } else {
+           size_beta=hemdistsign(cache[j].sign,GETSIGN(datum_r));
+       }
+
+       if (size_alpha  < size_beta + WISH_F(v->spl_nleft, v->spl_nright, 0.1)) {
+           if (ISALLTRUE(datum_l) || cache[j].allistrue) {
+               if (! ISALLTRUE(datum_l) )
+                   MemSet((void *) GETSIGN(datum_l), 0xff, sizeof(BITVEC));
+           } else {
+               ptr=cache[j].sign;
+               LOOPBYTE(
+                   union_l[i] |= ptr[i];
+               );
+           }
+           *left++ = j;
+           v->spl_nleft++;
+       } else {
+           if (ISALLTRUE(datum_r) || cache[j].allistrue) {
+               if (! ISALLTRUE(datum_r) )
+                   MemSet((void *) GETSIGN(datum_r), 0xff, sizeof(BITVEC));
+           } else {
+               ptr=cache[j].sign;
+               LOOPBYTE(
+                   union_r[i] |= ptr[i];
+               );
+           }
+           *right++ = j;
+           v->spl_nright++;
+       }
+   }
+
+   *right = *left = FirstOffsetNumber;
+   pfree(costvector);
+   pfree(cache);
+   v->spl_ldatum = PointerGetDatum(datum_l);
+   v->spl_rdatum = PointerGetDatum(datum_r);
+
+   PG_RETURN_POINTER(v);
+}


diff --git a/contrib/tsearch2/gistidx.h b/contrib/tsearch2/gistidx.h

new file mode 100644 (file)

index 0000000..d081c74


--- /dev/null
+++ b/contrib/tsearch2/gistidx.h
@@ -0,0 +1,67 @@
+#ifndef __GISTIDX_H__
+#define __GISTIDX_H__
+
+/*
+#define GISTIDX_DEBUG
+*/
+
+/*
+ * signature defines
+ */
+
+#define BITBYTE 8
+#define SIGLENINT  63          /* >121 => key will toast, so it will not
+                                * work !!! */
+#define SIGLEN ( sizeof(int4)*SIGLENINT )
+#define SIGLENBIT (SIGLEN*BITBYTE)
+
+typedef char BITVEC[SIGLEN];
+typedef char *BITVECP;
+
+#define LOOPBYTE(a) \
+       for(i=0;i
+               a;\
+       }
+#define LOOPBIT(a) \
+               for(i=0;i
+                               a;\
+               }
+
+#define GETBYTE(x,i) ( *( (BITVECP)(x) + (int)( (i) / BITBYTE ) ) )
+#define GETBITBYTE(x,i) ( ((char)(x)) >> i & 0x01 )
+#define CLRBIT(x,i)   GETBYTE(x,i) &= ~( 0x01 << ( (i) % BITBYTE ) )
+#define SETBIT(x,i)   GETBYTE(x,i) |=  ( 0x01 << ( (i) % BITBYTE ) )
+#define GETBIT(x,i) ( (GETBYTE(x,i) >> ( (i) % BITBYTE )) & 0x01 )
+
+#define abs(a)         ((a) <  (0) ? -(a) : (a))
+#define min(a,b)           ((a) <  (b) ? (a) : (b))
+#define HASHVAL(val) (((unsigned int)(val)) % SIGLENBIT)
+#define HASH(sign, val) SETBIT((sign), HASHVAL(val))
+
+
+/*
+ * type of index key
+ */
+typedef struct
+{
+   int4        len;
+   int4        flag;
+   char        data[1];
+}  GISTTYPE;
+
+#define ARRKEY     0x01
+#define SIGNKEY        0x02
+#define ALLISTRUE  0x04
+
+#define ISARRKEY(x) ( ((GISTTYPE*)x)->flag & ARRKEY )
+#define ISSIGNKEY(x)   ( ((GISTTYPE*)x)->flag & SIGNKEY )
+#define ISALLTRUE(x)   ( ((GISTTYPE*)x)->flag & ALLISTRUE )
+
+#define GTHDRSIZE  ( sizeof(int4)*2  )
+#define CALCGTSIZE(flag, len) ( GTHDRSIZE + ( ( (flag) & ARRKEY ) ? ((len)*sizeof(int4)) : (((flag) & ALLISTRUE) ? 0 : SIGLEN) ) )
+
+#define GETSIGN(x) ( (BITVECP)( (char*)x+GTHDRSIZE ) )
+#define GETARR(x)  ( (int4*)( (char*)x+GTHDRSIZE ) )
+#define ARRNELEM(x) ( ( ((GISTTYPE*)x)->len - GTHDRSIZE )/sizeof(int4) )
+
+#endif


diff --git a/contrib/tsearch2/ispell/spell.c b/contrib/tsearch2/ispell/spell.c

new file mode 100644 (file)

index 0000000..3cf2cc8


--- /dev/null
+++ b/contrib/tsearch2/ispell/spell.c
@@ -0,0 +1,520 @@
+#include 
+#include 
+#include 
+#include 
+
+#include "postgres.h"
+
+#include "spell.h"
+
+#define MAXNORMLEN 56
+
+#define STRNCASECMP(x,y)        (strncasecmp(x,y,strlen(y)))
+
+static int cmpspell(const void *s1,const void *s2){
+   return(strcmp(((const SPELL*)s1)->word,((const SPELL*)s2)->word));
+}
+
+static void 
+strlower( char * str ) {
+   unsigned char *ptr = (unsigned char *)str;
+   while ( *ptr ) {
+       *ptr = tolower( *ptr );
+       ptr++;
+   }
+}
+
+/* backward string compaire for suffix tree operations */
+static int 
+strbcmp(const char *s1, const char *s2) { 
+   int l1 = strlen(s1)-1, l2 = strlen(s2)-1;
+   while (l1 >= 0 && l2 >= 0) {
+       if (s1[l1] < s2[l2]) return -1;
+       if (s1[l1] > s2[l2]) return 1;
+       l1--; l2--;
+   }
+   if (l1 < l2) return -1;
+   if (l1 > l2) return 1;
+
+   return 0;
+}
+static int 
+strbncmp(const char *s1, const char *s2, size_t count) { 
+   int l1 = strlen(s1) - 1, l2 = strlen(s2) - 1, l = count;
+   while (l1 >= 0 && l2 >= 0 && l > 0) {
+       if (s1[l1] < s2[l2]) return -1;
+       if (s1[l1] > s2[l2]) return 1;
+       l1--;
+       l2--;
+       l--;
+   }
+   if (l == 0) return 0;
+   if (l1 < l2) return -1;
+   if (l1 > l2) return 1;
+   return 0;
+}
+
+static int 
+cmpaffix(const void *s1,const void *s2){
+   if (((const AFFIX*)s1)->type < ((const AFFIX*)s2)->type) return -1;
+   if (((const AFFIX*)s1)->type > ((const AFFIX*)s2)->type) return 1;
+   if (((const AFFIX*)s1)->type == 'p')
+       return(strcmp(((const AFFIX*)s1)->repl,((const AFFIX*)s2)->repl));
+   else 
+       return(strbcmp(((const AFFIX*)s1)->repl,((const AFFIX*)s2)->repl));
+}
+
+int 
+AddSpell(IspellDict * Conf,const char * word,const char *flag){
+   if(Conf->nspell>=Conf->mspell){
+       if(Conf->mspell){
+           Conf->mspell+=1024*20;
+           Conf->Spell=(SPELL *)realloc(Conf->Spell,Conf->mspell*sizeof(SPELL));
+       }else{
+           Conf->mspell=1024*20;
+           Conf->Spell=(SPELL *)malloc(Conf->mspell*sizeof(SPELL));
+       }
+       if ( Conf->Spell == NULL )
+           elog(ERROR,"No memory for AddSpell"); 
+   }
+   Conf->Spell[Conf->nspell].word=strdup(word);
+   if ( !Conf->Spell[Conf->nspell].word ) 
+       elog(ERROR,"No memory for AddSpell");
+   strncpy(Conf->Spell[Conf->nspell].flag,flag,10);
+   Conf->nspell++;
+   return(0);
+}
+
+
+int 
+ImportDictionary(IspellDict * Conf,const char *filename){
+   unsigned char str[BUFSIZ];  
+   FILE *dict;
+
+   if(!(dict=fopen(filename,"r")))return(1);
+   while(fgets(str,sizeof(str),dict)){
+       unsigned char *s;
+       const unsigned char *flag;
+
+           flag = NULL;
+       if((s=strchr(str,'/'))){
+           *s=0;
+           s++;flag=s;
+           while(*s){
+               if (((*s>='A')&&(*s<='Z'))||((*s>='a')&&(*s<='z')))
+                   s++;
+               else {
+                   *s=0;
+                   break;
+               }
+           }
+       }else{
+           flag="";
+       }
+       strlower(str);
+       /* Dont load words if first letter is not required */
+       /* It allows to optimize loading at  search time   */
+       s=str;
+       while(*s){
+           if(*s=='\r')*s=0;
+           if(*s=='\n')*s=0;
+           s++;
+       }
+       AddSpell(Conf,str,flag);
+   }
+   fclose(dict);
+   return(0);
+}
+
+
+static SPELL * 
+FindWord(IspellDict * Conf, const char *word, int affixflag) {
+   int l,c,r,resc,resl,resr, i;
+
+   i = (int)(*word) & 255;
+   l = Conf->SpellTree.Left[i];
+   r = Conf->SpellTree.Right[i];
+   if (l == -1) return (NULL);
+   while(l<=r){
+       c = (l + r) >> 1;
+       resc = strcmp(Conf->Spell[c].word, word);
+       if( (resc == 0) && 
+           ((affixflag == 0) || (strchr(Conf->Spell[c].flag, affixflag) != NULL)) ) {
+           return(&Conf->Spell[c]);
+       }
+       resl = strcmp(Conf->Spell[l].word, word);
+       if( (resl == 0) && 
+           ((affixflag == 0) || (strchr(Conf->Spell[l].flag, affixflag) != NULL)) ) {
+           return(&Conf->Spell[l]);
+       }
+       resr = strcmp(Conf->Spell[r].word, word);
+       if( (resr == 0) && 
+           ((affixflag == 0) || (strchr(Conf->Spell[r].flag, affixflag) != NULL)) ) {
+           return(&Conf->Spell[r]);
+       }
+       if(resc < 0){
+           l = c + 1;
+           r--;
+       } else if(resc > 0){
+           r = c - 1;
+           l++;
+       } else {
+           l++;
+           r--;
+       }
+   }
+   return(NULL);
+}
+
+int 
+AddAffix(IspellDict * Conf,int flag,const char *mask,const char *find,const char *repl,int type) {
+   if(Conf->naffixes>=Conf->maffixes){
+       if(Conf->maffixes){
+           Conf->maffixes+=16;
+           Conf->Affix = (AFFIX*)realloc((void*)Conf->Affix,Conf->maffixes*sizeof(AFFIX));
+       }else{
+           Conf->maffixes=16;
+           Conf->Affix = (AFFIX*)malloc(Conf->maffixes * sizeof(AFFIX));
+       }
+       if ( Conf->Affix == NULL ) 
+           elog(ERROR,"No memory for AddAffix");
+   }
+   if (type=='s') {
+       sprintf(Conf->Affix[Conf->naffixes].mask,"%s$",mask);
+   } else {
+       sprintf(Conf->Affix[Conf->naffixes].mask,"^%s",mask);
+   }
+   Conf->Affix[Conf->naffixes].compile = 1;
+   Conf->Affix[Conf->naffixes].flag=flag;
+   Conf->Affix[Conf->naffixes].type=type;
+   
+   strcpy(Conf->Affix[Conf->naffixes].find,find);
+   strcpy(Conf->Affix[Conf->naffixes].repl,repl);
+   Conf->Affix[Conf->naffixes].replen=strlen(repl);
+   Conf->naffixes++;
+   return(0);
+}
+
+static char * 
+remove_spaces(char *dist,char *src){
+char *d,*s;
+   d=dist;
+   s=src;
+   while(*s){
+       if(*s!=' '&&*s!='-'&&*s!='\t'){
+           *d=*s;
+           d++;
+       }
+       s++;
+   }
+   *d=0;
+   return(dist);
+}
+
+
+int 
+ImportAffixes(IspellDict * Conf,const char *filename){
+   unsigned char str[BUFSIZ];
+   unsigned char flag=0;
+   unsigned char mask[BUFSIZ]="";
+   unsigned char find[BUFSIZ]="";
+   unsigned char repl[BUFSIZ]="";
+   unsigned char *s;
+   int i;
+   int suffixes=0;
+   int prefixes=0;
+   FILE *affix;
+
+   if(!(affix=fopen(filename,"r")))
+       return(1);
+
+   while(fgets(str,sizeof(str),affix)){
+       if(!STRNCASECMP(str,"suffixes")){
+           suffixes=1;
+           prefixes=0;
+           continue;
+       }
+       if(!STRNCASECMP(str,"prefixes")){
+           suffixes=0;
+           prefixes=1;
+           continue;
+       }
+       if(!STRNCASECMP(str,"flag ")){
+           s=str+5;
+           while(strchr("* ",*s))
+               s++;
+           flag=*s;
+           continue;
+       }
+       if((!suffixes)&&(!prefixes))continue;
+       if((s=strchr(str,'#')))*s=0;
+       if(!*str)continue;
+       strlower(str);
+       strcpy(mask,"");
+       strcpy(find,"");
+       strcpy(repl,"");
+       i=sscanf(str,"%[^>\n]>%[^,\n],%[^\n]",mask,find,repl);
+       remove_spaces(str,repl);strcpy(repl,str);
+       remove_spaces(str,find);strcpy(find,str);
+       remove_spaces(str,mask);strcpy(mask,str);
+       switch(i){
+           case 3:
+               break;
+           case 2:
+               if(*find != '\0'){
+                   strcpy(repl,find);
+                   strcpy(find,"");
+               }
+               break;
+           default:
+               continue;
+       }
+       
+       AddAffix(Conf,(int)flag,mask,find,repl,suffixes?'s':'p');
+       
+   }
+   fclose(affix);
+       
+   return(0);
+}
+
+void 
+SortDictionary(IspellDict * Conf){
+  int  CurLet = -1, Let;size_t i;
+
+        qsort((void*)Conf->Spell,Conf->nspell,sizeof(SPELL),cmpspell);
+
+   for(i = 0; i < 256 ; i++ )
+       Conf->SpellTree.Left[i] = -1;
+
+   for(i = 0; i < Conf->nspell; i++) {
+     Let = (int)(*(Conf->Spell[i].word)) & 255;
+     if (CurLet != Let) {
+       Conf->SpellTree.Left[Let] = i;
+       CurLet = Let;
+     }
+     Conf->SpellTree.Right[Let] = i;
+   }
+}
+
+void 
+SortAffixes(IspellDict * Conf) {
+  int   CurLetP = -1, CurLetS = -1, Let;
+  AFFIX *Affix; size_t i;
+  
+  if (Conf->naffixes > 1)
+    qsort((void*)Conf->Affix,Conf->naffixes,sizeof(AFFIX),cmpaffix);
+  for(i = 0; i < 256; i++) {
+      Conf->PrefixTree.Left[i] = Conf->PrefixTree.Right[i] = -1;
+      Conf->SuffixTree.Left[i] = Conf->SuffixTree.Right[i] = -1;
+  }
+
+  for(i = 0; i < Conf->naffixes; i++) {
+    Affix = &(((AFFIX*)Conf->Affix)[i]);
+    if(Affix->type == 'p') {
+      Let = (int)(*(Affix->repl)) & 255;
+      if (CurLetP != Let) {
+   Conf->PrefixTree.Left[Let] = i;
+   CurLetP = Let;
+      }
+      Conf->PrefixTree.Right[Let] = i;
+    } else {
+      Let = (Affix->replen) ? (int)(Affix->repl[Affix->replen-1]) & 255 : 0;
+      if (CurLetS != Let) {
+   Conf->SuffixTree.Left[Let] = i;
+   CurLetS = Let;
+      }
+      Conf->SuffixTree.Right[Let] = i;
+    }
+  }
+}
+
+static char * 
+CheckSuffix(const char *word, size_t len, AFFIX *Affix, int *res, IspellDict *Conf) {
+  regmatch_t subs[2]; /* workaround for apache&linux */
+  char newword[2*MAXNORMLEN] = "";
+  int err;
+  
+  *res = strbncmp(word, Affix->repl, Affix->replen);
+  if (*res < 0) {
+    return NULL;
+  }
+  if (*res > 0) {
+    return NULL;
+  }
+  strcpy(newword, word);
+  strcpy(newword+len-Affix->replen, Affix->find);
+
+  if (Affix->compile) {
+    err = regcomp(&(Affix->reg),Affix->mask,REG_EXTENDED|REG_ICASE|REG_NOSUB);
+    if(err){
+      /*regerror(err, &(Affix->reg), regerrstr, ERRSTRSIZE);*/
+      regfree(&(Affix->reg));
+      return(NULL);
+    }
+    Affix->compile = 0;
+  }
+  if(!(err=regexec(&(Affix->reg),newword,1,subs,0))){
+    if(FindWord(Conf, newword, Affix->flag))
+   return pstrdup(newword);    
+  }
+  return NULL;
+}
+
+#define NS 1
+#define MAX_NORM 512
+static int 
+CheckPrefix(const char *word, size_t len, AFFIX *Affix, IspellDict *Conf, int pi,
+       char **forms, char ***cur ) {
+  regmatch_t subs[NS*2];
+  char newword[2*MAXNORMLEN] = "";
+  int err, ls, res, lres;
+  size_t newlen;
+  AFFIX *CAffix = Conf->Affix;
+  
+  res = strncmp(word, Affix->repl, Affix->replen);
+  if (res != 0) {
+    return res;
+  }
+  strcpy(newword, Affix->find);
+  strcat(newword, word+Affix->replen);
+
+  if (Affix->compile) {
+    err = regcomp(&(Affix->reg),Affix->mask,REG_EXTENDED|REG_ICASE|REG_NOSUB);
+    if(err){
+      /*regerror(err, &(Affix->reg), regerrstr, ERRSTRSIZE);*/
+      regfree(&(Affix->reg));
+      return (0);
+    }
+    Affix->compile = 0;
+  }
+  if(!(err=regexec(&(Affix->reg),newword,1,subs,0))){
+    SPELL * curspell;
+
+    if((curspell=FindWord(Conf, newword, Affix->flag))){
+      if ((*cur - forms) < (MAX_NORM-1)) {
+   **cur =  pstrdup(newword);
+   (*cur)++; **cur = NULL;
+      }
+    } 
+    newlen = strlen(newword);
+    ls = Conf->SuffixTree.Left[pi];
+      if ( ls>=0 && ((*cur - forms) < (MAX_NORM-1)) ) {
+   **cur = CheckSuffix(newword, newlen, &CAffix[ls], &lres, Conf);
+   if (**cur) {
+     (*cur)++; **cur = NULL;
+   }
+      }
+  }
+  return 0;
+}
+
+
+char ** 
+NormalizeWord(IspellDict * Conf,char *word){
+/*regmatch_t subs[NS];*/
+size_t len;
+char ** forms;
+char **cur;
+AFFIX * Affix;
+int ri, pi, ipi, lp, rp, cp, ls, rs;
+int lres, rres, cres = 0;
+  SPELL *spell;
+
+   len=strlen(word);
+   if (len > MAXNORMLEN)
+       return(NULL);
+
+   strlower(word);
+
+   forms=(char **) palloc(MAX_NORM*sizeof(char **));
+   cur=forms;*cur=NULL;
+
+   ri = (int)(*word) & 255;
+   pi = (int)(word[strlen(word)-1]) & 255;
+   Affix=(AFFIX*)Conf->Affix;
+
+   /* Check that the word itself is normal form */
+   if((spell = FindWord(Conf, word, 0))){
+       *cur=pstrdup(word);
+       cur++;*cur=NULL;
+   }
+
+   /* Find all other NORMAL forms of the 'word' */
+
+   for (ipi = 0; ipi <= pi; ipi += pi) {
+
+       /* check prefix */
+       lp = Conf->PrefixTree.Left[ri];
+       rp = Conf->PrefixTree.Right[ri];
+       while (lp >= 0 && lp <= rp) {
+         cp = (lp + rp) >> 1;
+         cres = 0;
+         if ((cur - forms) < (MAX_NORM-1)) {
+       cres = CheckPrefix(word, len, &Affix[cp], Conf, ipi, forms, &cur);
+         }
+         if ((lp < cp) && ((cur - forms) < (MAX_NORM-1)) ) {
+       lres = CheckPrefix(word, len, &Affix[lp], Conf, ipi, forms, &cur);
+         }
+         if ( (rp > cp) && ((cur - forms) < (MAX_NORM-1)) ) {
+       rres = CheckPrefix(word, len, &Affix[rp], Conf, ipi, forms, &cur);
+         }
+         if (cres < 0) {
+       rp = cp - 1;
+       lp++;
+         } else if (cres > 0) {
+       lp = cp + 1;
+       rp--;
+         } else {
+       lp++;
+       rp--;
+         }
+       }
+
+       /* check suffix */
+       ls = Conf->SuffixTree.Left[ipi];
+       rs = Conf->SuffixTree.Right[ipi];
+       while (ls >= 0 && ls <= rs) {
+         if (  ((cur - forms) < (MAX_NORM-1)) ) {
+       *cur = CheckSuffix(word, len, &Affix[ls], &lres, Conf);
+       if (*cur) {
+         cur++; *cur = NULL;
+       }
+         }
+         if ( (rs > ls) && ((cur - forms) < (MAX_NORM-1)) ) {
+       *cur = CheckSuffix(word, len, &Affix[rs], &rres, Conf);
+       if (*cur) {
+         cur++; *cur = NULL;
+       }
+         }
+         ls++;
+         rs--;
+       } /* end while */
+     
+   } /* for ipi */
+
+   if(cur==forms){
+       pfree(forms);
+       return(NULL);
+   }
+   return(forms);
+}
+
+void 
+FreeIspell (IspellDict *Conf) {
+  int i;
+  AFFIX *Affix = (AFFIX *)Conf->Affix;
+
+  for (i = 0; i < Conf->naffixes; i++) {
+    if (Affix[i].compile == 0) {
+      regfree(&(Affix[i].reg));
+    }
+  }
+  for (i = 0; i < Conf->naffixes; i++) {
+   free( Conf->Spell[i].word );
+  }
+  free(Conf->Affix);
+  free(Conf->Spell);
+  memset( (void*)Conf, 0, sizeof(IspellDict) );
+  return;
+}


diff --git a/contrib/tsearch2/ispell/spell.h b/contrib/tsearch2/ispell/spell.h

new file mode 100644 (file)

index 0000000..3034ca6


--- /dev/null
+++ b/contrib/tsearch2/ispell/spell.h
@@ -0,0 +1,51 @@
+#ifndef __SPELL_H__
+#define __SPELL_H__
+
+#include 
+#include 
+
+typedef struct spell_struct {
+        char * word; 
+        char flag[10];
+} SPELL;
+
+typedef struct aff_struct {   
+        char flag;
+        char type;
+        char mask[33];
+        char find[16];
+        char repl[16];
+        regex_t reg;
+        size_t replen;
+        char compile;
+} AFFIX;
+
+typedef struct Tree_struct {
+        int Left[256], Right[256];
+} Tree_struct;
+
+typedef struct {
+   int maffixes;
+   int naffixes;
+   AFFIX * Affix;
+
+   int nspell;
+   int mspell;
+   SPELL   *Spell;
+   Tree_struct SpellTree;
+   Tree_struct PrefixTree;
+   Tree_struct SuffixTree;
+
+} IspellDict;
+
+char ** NormalizeWord(IspellDict * Conf,char *word);
+int ImportAffixes(IspellDict * Conf, const char *filename);
+int ImportDictionary(IspellDict * Conf,const char *filename);
+
+int  AddSpell(IspellDict * Conf,const char * word,const char *flag);
+int  AddAffix(IspellDict * Conf,int flag,const char *mask,const char *find,const char *repl,int type);
+void SortDictionary(IspellDict * Conf);
+void SortAffixes(IspellDict * Conf);
+void FreeIspell (IspellDict *Conf);
+
+#endif


diff --git a/contrib/tsearch2/prs_dcfg.c b/contrib/tsearch2/prs_dcfg.c

new file mode 100644 (file)

index 0000000..e4b0e8b


--- /dev/null
+++ b/contrib/tsearch2/prs_dcfg.c
@@ -0,0 +1,119 @@
+/* 
+ * Simple config parser 
+ * Teodor Sigaev 
+ */
+#include 
+#include 
+#include 
+
+#include "postgres.h"
+
+#include "dict.h"
+#include "common.h"
+
+#define CS_WAITKEY 0
+#define CS_INKEY   1
+#define CS_WAITEQ  2
+#define CS_WAITVALUE   3
+#define CS_INVALUE 4
+#define CS_IN2VALUE    5
+#define CS_WAITDELIM   6
+#define CS_INESC   7
+#define CS_IN2ESC  8
+
+static char *
+nstrdup(char *ptr, int len) {
+   char *res=palloc(len+1), *cptr;
+   memcpy(res,ptr,len);
+   res[len]='\0';
+   cptr = ptr = res;
+   while(*ptr) {
+       if ( *ptr == '\\' ) 
+           ptr++;
+       *cptr=*ptr; ptr++; cptr++;
+   }
+   *cptr='\0';
+
+   return res;
+}
+
+void
+parse_cfgdict(text *in, Map **m) {
+   Map *mptr;
+   char *ptr=VARDATA(in), *begin=NULL;
+   char num=0;
+   int state=CS_WAITKEY;
+
+   while( ptr-VARDATA(in) < VARSIZE(in) - VARHDRSZ ) {
+       if ( *ptr==',' ) num++;
+       ptr++;
+   }
+
+   *m=mptr=(Map*)palloc( sizeof(Map)*(num+2) );
+   memset(mptr, 0, sizeof(Map)*(num+2) );
+   ptr=VARDATA(in);
+   while( ptr-VARDATA(in) < VARSIZE(in) - VARHDRSZ ) {
+       if (state==CS_WAITKEY) {
+           if (isalpha(*ptr)) {
+               begin=ptr;
+               state=CS_INKEY;
+           } else if ( !isspace(*ptr) )
+               elog(ERROR,"Syntax error in position %d near '%c'", ptr-VARDATA(in), *ptr);
+       } else if (state==CS_INKEY) {
+           if ( isspace(*ptr) ) {
+               mptr->key=nstrdup(begin, ptr-begin);
+               state=CS_WAITEQ;
+           } else if ( *ptr=='=' ) {
+               mptr->key=nstrdup(begin, ptr-begin);
+               state=CS_WAITVALUE;
+           } else if ( !isalpha(*ptr) ) 
+               elog(ERROR,"Syntax error in position %d near '%c'", ptr-VARDATA(in), *ptr);
+       } else if ( state==CS_WAITEQ ) {
+           if ( *ptr=='=' )
+               state=CS_WAITVALUE;
+           else if ( !isspace(*ptr) )
+               elog(ERROR,"Syntax error in position %d near '%c'", ptr-VARDATA(in), *ptr);
+       } else if ( state==CS_WAITVALUE ) {
+           if ( *ptr=='"' ) {
+               begin=ptr+1;
+               state=CS_INVALUE;
+           } else if ( !isspace(*ptr) ) {
+               begin=ptr;
+               state=CS_IN2VALUE;
+           }
+       } else if ( state==CS_INVALUE ) {
+           if ( *ptr=='"' ) {
+               mptr->value = nstrdup(begin, ptr-begin);
+               mptr++;
+               state=CS_WAITDELIM;
+           } else if ( *ptr=='\\' )
+               state=CS_INESC;
+       } else if ( state==CS_IN2VALUE ) {
+           if ( isspace(*ptr) || *ptr==',' ) {
+               mptr->value = nstrdup(begin, ptr-begin);
+               mptr++;
+               state=( *ptr==',' ) ? CS_WAITKEY : CS_WAITDELIM;
+           } else if ( *ptr=='\\' )
+               state=CS_INESC;
+       } else if ( state==CS_WAITDELIM ) {
+           if ( *ptr==',' ) 
+               state=CS_WAITKEY; 
+           else if ( !isspace(*ptr) )
+               elog(ERROR,"Syntax error in position %d near '%c'", ptr-VARDATA(in), *ptr);
+       } else if ( state == CS_INESC ) {
+           state=CS_INVALUE;
+       } else if ( state == CS_IN2ESC ) {
+           state=CS_IN2VALUE;
+       } else 
+           elog(ERROR,"Bad parser state: %d at position %d near '%c'", state, ptr-VARDATA(in), *ptr);
+       ptr++;
+   }
+
+   if (state==CS_IN2VALUE) {
+       mptr->value = nstrdup(begin, ptr-begin);
+       mptr++;
+   } else if ( !(state==CS_WAITDELIM || state==CS_WAITKEY) ) 
+       elog(ERROR,"Unexpected end of line");
+}
+
+


diff --git a/contrib/tsearch2/query.c b/contrib/tsearch2/query.c

new file mode 100644 (file)

index 0000000..8e714f2


--- /dev/null
+++ b/contrib/tsearch2/query.c
@@ -0,0 +1,862 @@
+/*
+ * IO definitions for tsquery and mtsquery. This type
+ * are identical, but for parsing mtsquery used parser for text
+ * and also morphology is used.
+ * Internal structure:
+ * query tree, then string with original value.
+ * Query tree with plain view. It's means that in array of nodes
+ * right child is always next and left position = item+item->left
+ * Teodor Sigaev 
+ */
+#include "postgres.h"
+
+#include 
+#include 
+
+#include "access/gist.h"
+#include "access/itup.h"
+#include "access/rtree.h"
+#include "utils/elog.h"
+#include "utils/palloc.h"
+#include "utils/array.h"
+#include "utils/builtins.h"
+#include "storage/bufpage.h"
+
+#include "ts_cfg.h"
+#include "tsvector.h"
+#include "crc32.h"
+#include "query.h"
+#include "rewrite.h"
+#include "common.h"
+
+
+PG_FUNCTION_INFO_V1(tsquery_in);
+Datum      tsquery_in(PG_FUNCTION_ARGS);
+
+PG_FUNCTION_INFO_V1(tsquery_out);
+Datum      tsquery_out(PG_FUNCTION_ARGS);
+
+PG_FUNCTION_INFO_V1(exectsq);
+Datum      exectsq(PG_FUNCTION_ARGS);
+
+PG_FUNCTION_INFO_V1(rexectsq);
+Datum      rexectsq(PG_FUNCTION_ARGS);
+
+PG_FUNCTION_INFO_V1(tsquerytree);
+Datum      tsquerytree(PG_FUNCTION_ARGS);
+
+PG_FUNCTION_INFO_V1(to_tsquery);
+Datum      to_tsquery(PG_FUNCTION_ARGS);
+
+PG_FUNCTION_INFO_V1(to_tsquery_name);
+Datum      to_tsquery_name(PG_FUNCTION_ARGS);
+
+PG_FUNCTION_INFO_V1(to_tsquery_current);
+Datum      to_tsquery_current(PG_FUNCTION_ARGS);
+
+#define END            0
+#define ERR            1
+#define VAL            2
+#define OPR            3
+#define OPEN       4
+#define CLOSE      5
+#define VALTRUE        6           /* for stop words */
+#define VALFALSE   7
+
+/* parser's states */
+#define WAITOPERAND 1
+#define WAITOPERATOR   2
+
+/*
+ * node of query tree, also used
+ * for storing polish notation in parser
+ */
+typedef struct NODE
+{
+   int2        weight;
+   int2        type;
+   int4        val;
+   int2        distance;
+   int2        length;
+   struct NODE *next;
+}  NODE;
+
+typedef struct
+{
+   char       *buf;
+   int4        state;
+   int4        count;
+   /* reverse polish notation in list (for temprorary usage) */
+   NODE       *str;
+   /* number in str */
+   int4        num;
+
+   /* user-friendly operand */
+   int4        lenop;
+   int4        sumlen;
+   char       *op;
+   char       *curop;
+
+   /* state for value's parser */
+   TI_IN_STATE valstate;
+
+   /* tscfg */
+   int cfg_id;
+}  QPRS_STATE;
+
+static char*
+get_weight(char *buf, int2 *weight) {
+   *weight = 0;
+
+   if ( *buf != ':' )
+       return buf;
+
+   buf++;
+   while( *buf ) {
+       switch(tolower(*buf)) {
+           case 'a': *weight |= 1<<3; break; 
+           case 'b': *weight |= 1<<2; break; 
+           case 'c': *weight |= 1<<1; break; 
+           case 'd': *weight |= 1;    break;
+           default: return buf; 
+       }
+       buf++;
+   }
+   
+   return buf;
+}
+
+/*
+ * get token from query string
+ */
+static int4
+gettoken_query(QPRS_STATE * state, int4 *val, int4 *lenval, char **strval, int2 *weight)
+{
+   while (1)
+   {
+       switch (state->state)
+       {
+           case WAITOPERAND:
+               if (*(state->buf) == '!')
+               {
+                   (state->buf)++;
+                   *val = (int4) '!';
+                   return OPR;
+               }
+               else if (*(state->buf) == '(')
+               {
+                   state->count++;
+                   (state->buf)++;
+                   return OPEN;
+               } else if ( *(state->buf) == ':' ) {
+                   elog(ERROR,"Error at start of operand"); 
+               } else if (*(state->buf) != ' ') {
+                   state->valstate.prsbuf = state->buf;
+                   state->state = WAITOPERATOR;
+                   if (gettoken_tsvector(&(state->valstate)))
+                   {
+                       *strval = state->valstate.word;
+                       *lenval = state->valstate.curpos - state->valstate.word;
+                       state->buf = get_weight(state->valstate.prsbuf, weight);
+                       return VAL;
+                   }
+                   else
+                       elog(ERROR, "No operand");
+               }
+               break;
+           case WAITOPERATOR:
+               if (*(state->buf) == '&' || *(state->buf) == '|')
+               {
+                   state->state = WAITOPERAND;
+                   *val = (int4) *(state->buf);
+                   (state->buf)++;
+                   return OPR;
+               }
+               else if (*(state->buf) == ')')
+               {
+                   (state->buf)++;
+                   state->count--;
+                   return (state->count < 0) ? ERR : CLOSE;
+               }
+               else if (*(state->buf) == '\0')
+                   return (state->count) ? ERR : END;
+               else if (*(state->buf) != ' ')
+                   return ERR;
+               break;
+           default:
+               return ERR;
+               break;
+       }
+       (state->buf)++;
+   }
+   return END;
+}
+
+/*
+ * push new one in polish notation reverse view
+ */
+static void
+pushquery(QPRS_STATE * state, int4 type, int4 val, int4 distance, int4 lenval, int2 weight)
+{
+   NODE       *tmp = (NODE *) palloc(sizeof(NODE));
+
+   tmp->weight = weight;
+   tmp->type = type;
+   tmp->val = val;
+   if (distance >= MAXSTRPOS)
+       elog(ERROR, "Value is too big");
+   if (lenval >= MAXSTRLEN)
+       elog(ERROR, "Operand is too long");
+   tmp->distance = distance;
+   tmp->length = lenval;
+   tmp->next = state->str;
+   state->str = tmp;
+   state->num++;
+}
+
+/*
+ * This function is used for tsquery parsing
+ */
+static void
+pushval_asis(QPRS_STATE * state, int type, char *strval, int lenval, int2 weight)
+{
+   if (lenval >= MAXSTRLEN)
+       elog(ERROR, "Word is too long");
+
+   pushquery(state, type, crc32_sz((uint8 *) strval, lenval),
+             state->curop - state->op, lenval, weight);
+
+   while (state->curop - state->op + lenval + 1 >= state->lenop)
+   {
+       int4        tmp = state->curop - state->op;
+
+       state->lenop *= 2;
+       state->op = (char *) repalloc((void *) state->op, state->lenop);
+       state->curop = state->op + tmp;
+   }
+   memcpy((void *) state->curop, (void *) strval, lenval);
+   state->curop += lenval;
+   *(state->curop) = '\0';
+   state->curop++;
+   state->sumlen += lenval + 1;
+   return;
+}
+
+/*
+ * This function is used for morph parsing
+ */
+static void
+pushval_morph(QPRS_STATE * state, int typeval, char *strval, int lenval, int2 weight)
+{
+   int4        count = 0;
+   PRSTEXT         prs;
+
+   prs.lenwords = 32;
+   prs.curwords = 0;
+   prs.pos = 0;
+   prs.words = (WORD *) palloc(sizeof(WORD) * prs.lenwords);
+
+   parsetext_v2(findcfg(state->cfg_id), &prs, strval, lenval);
+
+   for(count=0;count
+       pushval_asis(state, VAL, prs.words[count].word, prs.words[count].len, weight);
+       pfree( prs.words[count].word );
+       if (count)
+           pushquery(state, OPR, (int4) '&', 0, 0, 0 );
+   }   
+   pfree(prs.words);
+
+   /* XXX */
+   if ( prs.curwords==0 ) 
+       pushval_asis(state, VALTRUE, 0, 0, 0);
+}
+
+#define STACKDEPTH 32
+/*
+ * make polish notaion of query
+ */
+static int4
+makepol(QPRS_STATE * state, void (*pushval) (QPRS_STATE *, int, char *, int, int2))
+{
+   int4        val,
+               type;
+   int4        lenval;
+   char       *strval;
+   int4        stack[STACKDEPTH];
+   int4        lenstack = 0;
+   int2        weight;
+
+   while ((type = gettoken_query(state, &val, &lenval, &strval, &weight)) != END)
+   {
+       switch (type)
+       {
+           case VAL:
+               (*pushval) (state, VAL, strval, lenval, weight);
+               while (lenstack && (stack[lenstack - 1] == (int4) '&' ||
+                                   stack[lenstack - 1] == (int4) '!'))
+               {
+                   lenstack--;
+                   pushquery(state, OPR, stack[lenstack], 0, 0, 0);
+               }
+               break;
+           case OPR:
+               if (lenstack && val == (int4) '|')
+                   pushquery(state, OPR, val, 0, 0, 0);
+               else
+               {
+                   if (lenstack == STACKDEPTH)
+                       elog(ERROR, "Stack too short");
+                   stack[lenstack] = val;
+                   lenstack++;
+               }
+               break;
+           case OPEN:
+               if (makepol(state, pushval) == ERR)
+                   return ERR;
+               if (lenstack && (stack[lenstack - 1] == (int4) '&' ||
+                                stack[lenstack - 1] == (int4) '!'))
+               {
+                   lenstack--;
+                   pushquery(state, OPR, stack[lenstack], 0, 0, 0);
+               }
+               break;
+           case CLOSE:
+               while (lenstack)
+               {
+                   lenstack--;
+                   pushquery(state, OPR, stack[lenstack], 0, 0, 0);
+               };
+               return END;
+               break;
+           case ERR:
+           default:
+               elog(ERROR, "Syntax error");
+               return ERR;
+
+       }
+   }
+   while (lenstack)
+   {
+       lenstack--;
+       pushquery(state, OPR, stack[lenstack], 0, 0, 0);
+   };
+   return END;
+}
+
+typedef struct
+{
+   WordEntry  *arrb;
+   WordEntry  *arre;
+   char       *values;
+   char       *operand;
+}  CHKVAL;
+
+/*
+ * compare 2 string values
+ */
+static int4
+ValCompare(CHKVAL * chkval, WordEntry * ptr, ITEM * item)
+{
+   if (ptr->len == item->length)
+       return strncmp(
+                      &(chkval->values[ptr->pos]),
+                      &(chkval->operand[item->distance]),
+                      item->length);
+
+   return (ptr->len > item->length) ? 1 : -1;
+}
+
+/*
+ * check weight info
+ */
+static bool
+checkclass_str(CHKVAL * chkval, WordEntry * val, ITEM * item) {
+   WordEntryPos *ptr = (WordEntryPos*) (chkval->values+val->pos+SHORTALIGN(val->len)+sizeof(uint16));
+   uint16  len = *( (uint16*) (chkval->values+val->pos+SHORTALIGN(val->len)) );
+   while (len--) {
+       if ( item->weight & ( 1<weight ) )
+           return true;
+       ptr++;
+   }
+   return false; 
+}
+
+/*
+ * is there value 'val' in array or not ?
+ */
+static bool
+checkcondition_str(void *checkval, ITEM * val)
+{
+   WordEntry  *StopLow = ((CHKVAL *) checkval)->arrb;
+   WordEntry  *StopHigh = ((CHKVAL *) checkval)->arre;
+   WordEntry  *StopMiddle;
+   int         difference;
+
+   /* Loop invariant: StopLow <= val < StopHigh */
+
+   while (StopLow < StopHigh)
+   {
+       StopMiddle = StopLow + (StopHigh - StopLow) / 2;
+       difference = ValCompare((CHKVAL *) checkval, StopMiddle, val);
+       if (difference == 0)
+           return ( val->weight && StopMiddle->haspos ) ? 
+               checkclass_str((CHKVAL *) checkval,StopMiddle, val) : true;
+       else if (difference < 0)
+           StopLow = StopMiddle + 1;
+       else
+           StopHigh = StopMiddle;
+   }
+
+   return (false);
+}
+
+/*
+ * check for boolean condition
+ */
+bool
+TS_execute(ITEM * curitem, void *checkval, bool calcnot, bool (*chkcond) (void *checkval, ITEM * val))
+{
+   if (curitem->type == VAL)
+       return (*chkcond) (checkval, curitem);
+   else if (curitem->val == (int4) '!')
+   {
+       return (calcnot) ?
+           ((TS_execute(curitem + 1, checkval, calcnot, chkcond)) ? false : true)
+           : true;
+   }
+   else if (curitem->val == (int4) '&')
+   {
+       if (TS_execute(curitem + curitem->left, checkval, calcnot, chkcond))
+           return TS_execute(curitem + 1, checkval, calcnot, chkcond);
+       else
+           return false;
+   }
+   else
+   {                           /* |-operator */
+       if (TS_execute(curitem + curitem->left, checkval, calcnot, chkcond))
+           return true;
+       else
+           return TS_execute(curitem + 1, checkval, calcnot, chkcond);
+   }
+   return false;
+}
+
+/*
+ * boolean operations
+ */
+Datum
+rexectsq(PG_FUNCTION_ARGS)
+{
+   return DirectFunctionCall2(
+                              exectsq,
+                              PG_GETARG_DATUM(1),
+                              PG_GETARG_DATUM(0)
+       );
+}
+
+Datum
+exectsq(PG_FUNCTION_ARGS)
+{
+   tsvector       *val = (tsvector *) DatumGetPointer(PG_DETOAST_DATUM(PG_GETARG_DATUM(0)));
+   QUERYTYPE  *query = (QUERYTYPE *) DatumGetPointer(PG_DETOAST_DATUM(PG_GETARG_DATUM(1)));
+   CHKVAL      chkval;
+   bool        result;
+
+   if (!val->size || !query->size)
+   {
+       PG_FREE_IF_COPY(val, 0);
+       PG_FREE_IF_COPY(query, 1);
+       PG_RETURN_BOOL(false);
+   }
+
+   chkval.arrb = ARRPTR(val);
+   chkval.arre = chkval.arrb + val->size;
+   chkval.values = STRPTR(val);
+   chkval.operand = GETOPERAND(query);
+   result = TS_execute(
+                    GETQUERY(query),
+                    &chkval,
+                    true,
+                    checkcondition_str
+       );
+
+   PG_FREE_IF_COPY(val, 0);
+   PG_FREE_IF_COPY(query, 1);
+   PG_RETURN_BOOL(result);
+}
+
+/*
+ * find left operand in polish notation view
+ */
+static void
+findoprnd(ITEM * ptr, int4 *pos)
+{
+#ifdef BS_DEBUG
+   elog(DEBUG3, (ptr[*pos].type == OPR) ?
+        "%d  %c" : "%d  %d ", *pos, ptr[*pos].val);
+#endif
+   if (ptr[*pos].type == VAL || ptr[*pos].type == VALTRUE)
+   {
+       ptr[*pos].left = 0;
+       (*pos)++;
+   }
+   else if (ptr[*pos].val == (int4) '!')
+   {
+       ptr[*pos].left = 1;
+       (*pos)++;
+       findoprnd(ptr, pos);
+   }
+   else
+   {
+       ITEM       *curitem = &ptr[*pos];
+       int4        tmp = *pos;
+
+       (*pos)++;
+       findoprnd(ptr, pos);
+       curitem->left = *pos - tmp;
+       findoprnd(ptr, pos);
+   }
+}
+
+
+/*
+ * input
+ */
+static QUERYTYPE *
+queryin(char *buf, void (*pushval) (QPRS_STATE *, int, char *, int, int2), int cfg_id)
+{
+   QPRS_STATE  state;
+   int4        i;
+   QUERYTYPE  *query;
+   int4        commonlen;
+   ITEM       *ptr;
+   NODE       *tmp;
+   int4        pos = 0;
+
+#ifdef BS_DEBUG
+   char        pbuf[16384],
+              *cur;
+#endif
+
+   /* init state */
+   state.buf = buf;
+   state.state = WAITOPERAND;
+   state.count = 0;
+   state.num = 0;
+   state.str = NULL;
+   state.cfg_id=cfg_id;
+
+   /* init value parser's state */
+   state.valstate.oprisdelim = true;
+   state.valstate.len = 32;
+   state.valstate.word = (char *) palloc(state.valstate.len);
+
+   /* init list of operand */
+   state.sumlen = 0;
+   state.lenop = 64;
+   state.curop = state.op = (char *) palloc(state.lenop);
+   *(state.curop) = '\0';
+
+   /* parse query & make polish notation (postfix, but in reverse order) */
+   makepol(&state, pushval);
+   pfree(state.valstate.word);
+   if (!state.num)
+       elog(ERROR, "Empty query");
+
+   /* make finish struct */
+   commonlen = COMPUTESIZE(state.num, state.sumlen);
+   query = (QUERYTYPE *) palloc(commonlen);
+   query->len = commonlen;
+   query->size = state.num;
+   ptr = GETQUERY(query);
+
+   /* set item in polish notation */
+   for (i = 0; i < state.num; i++)
+   {
+       ptr[i].weight = state.str->weight;
+       ptr[i].type = state.str->type;
+       ptr[i].val = state.str->val;
+       ptr[i].distance = state.str->distance;
+       ptr[i].length = state.str->length;
+       tmp = state.str->next;
+       pfree(state.str);
+       state.str = tmp;
+   }
+
+   /* set user friendly-operand view */
+   memcpy((void *) GETOPERAND(query), (void *) state.op, state.sumlen);
+   pfree(state.op);
+
+   /* set left operand's position for every operator */
+   pos = 0;
+   findoprnd(ptr, &pos);
+
+#ifdef BS_DEBUG
+   cur = pbuf;
+   *cur = '\0';
+   for (i = 0; i < query->size; i++)
+   {
+       if (ptr[i].type == OPR)
+           sprintf(cur, "%c(%d) ", ptr[i].val, ptr[i].left);
+       else
+           sprintf(cur, "%d(%s) ", ptr[i].val, GETOPERAND(query) + ptr[i].distance);
+       cur = strchr(cur, '\0');
+   }
+   elog(DEBUG3, "POR: %s", pbuf);
+#endif
+
+   return query;
+}
+
+/*
+ * in without morphology
+ */
+Datum
+tsquery_in(PG_FUNCTION_ARGS)
+{
+   PG_RETURN_POINTER(queryin((char *) PG_GETARG_POINTER(0), pushval_asis, 0));
+}
+
+/*
+ * out function
+ */
+typedef struct
+{
+   ITEM       *curpol;
+   char       *buf;
+   char       *cur;
+   char       *op;
+   int4        buflen;
+}  INFIX;
+
+#define RESIZEBUF(inf,addsize) \
+while( ( inf->cur - inf->buf ) + addsize + 1 >= inf->buflen ) \
+{ \
+   int4 len = inf->cur - inf->buf; \
+   inf->buflen *= 2; \
+   inf->buf = (char*) repalloc( (void*)inf->buf, inf->buflen ); \
+   inf->cur = inf->buf + len; \
+}
+
+/*
+ * recursive walk on tree and print it in
+ * infix (human-readable) view
+ */
+static void
+infix(INFIX * in, bool first)
+{
+   if (in->curpol->type == VAL)
+   {
+       char       *op = in->op + in->curpol->distance;
+
+       RESIZEBUF(in, in->curpol->length * 2 + 2 + 5);
+       *(in->cur) = '\'';
+       in->cur++;
+       while (*op)
+       {
+           if (*op == '\'')
+           {
+               *(in->cur) = '\\';
+               in->cur++;
+           }
+           *(in->cur) = *op;
+           op++;
+           in->cur++;
+       }
+       *(in->cur) = '\'';
+       in->cur++;
+       if ( in->curpol->weight ) {
+           *(in->cur) = ':'; in->cur++;
+           if ( in->curpol->weight & (1<<3) ) { *(in->cur) = 'A'; in->cur++; }
+           if ( in->curpol->weight & (1<<2) ) { *(in->cur) = 'B'; in->cur++; }
+           if ( in->curpol->weight & (1<<1) ) { *(in->cur) = 'C'; in->cur++; }
+           if ( in->curpol->weight & 1 )      { *(in->cur) = 'D'; in->cur++; }
+       }
+       *(in->cur) = '\0';
+       in->curpol++;
+   }
+   else if (in->curpol->val == (int4) '!')
+   {
+       bool        isopr = false;
+
+       RESIZEBUF(in, 1);
+       *(in->cur) = '!';
+       in->cur++;
+       *(in->cur) = '\0';
+       in->curpol++;
+       if (in->curpol->type == OPR)
+       {
+           isopr = true;
+           RESIZEBUF(in, 2);
+           sprintf(in->cur, "( ");
+           in->cur = strchr(in->cur, '\0');
+       }
+       infix(in, isopr);
+       if (isopr)
+       {
+           RESIZEBUF(in, 2);
+           sprintf(in->cur, " )");
+           in->cur = strchr(in->cur, '\0');
+       }
+   }
+   else
+   {
+       int4        op = in->curpol->val;
+       INFIX       nrm;
+
+       in->curpol++;
+       if (op == (int4) '|' && !first)
+       {
+           RESIZEBUF(in, 2);
+           sprintf(in->cur, "( ");
+           in->cur = strchr(in->cur, '\0');
+       }
+
+       nrm.curpol = in->curpol;
+       nrm.op = in->op;
+       nrm.buflen = 16;
+       nrm.cur = nrm.buf = (char *) palloc(sizeof(char) * nrm.buflen);
+
+       /* get right operand */
+       infix(&nrm, false);
+
+       /* get & print left operand */
+       in->curpol = nrm.curpol;
+       infix(in, false);
+
+       /* print operator & right operand */
+       RESIZEBUF(in, 3 + (nrm.cur - nrm.buf));
+       sprintf(in->cur, " %c %s", op, nrm.buf);
+       in->cur = strchr(in->cur, '\0');
+       pfree(nrm.buf);
+
+       if (op == (int4) '|' && !first)
+       {
+           RESIZEBUF(in, 2);
+           sprintf(in->cur, " )");
+           in->cur = strchr(in->cur, '\0');
+       }
+   }
+}
+
+
+Datum
+tsquery_out(PG_FUNCTION_ARGS)
+{
+   QUERYTYPE  *query = (QUERYTYPE *) DatumGetPointer(PG_DETOAST_DATUM(PG_GETARG_DATUM(0)));
+   INFIX       nrm;
+
+   if (query->size == 0)
+   {
+       char       *b = palloc(1);
+
+       *b = '\0';
+       PG_RETURN_POINTER(b);
+   }
+   nrm.curpol = GETQUERY(query);
+   nrm.buflen = 32;
+   nrm.cur = nrm.buf = (char *) palloc(sizeof(char) * nrm.buflen);
+   *(nrm.cur) = '\0';
+   nrm.op = GETOPERAND(query);
+   infix(&nrm, true);
+
+   PG_FREE_IF_COPY(query, 0);
+   PG_RETURN_POINTER(nrm.buf);
+}
+
+/*
+ * debug function, used only for view query
+ * which will be executed in non-leaf pages in index
+ */
+Datum
+tsquerytree(PG_FUNCTION_ARGS)
+{
+   QUERYTYPE  *query = (QUERYTYPE *) DatumGetPointer(PG_DETOAST_DATUM(PG_GETARG_DATUM(0)));
+   INFIX       nrm;
+   text       *res;
+   ITEM       *q;
+   int4        len;
+
+
+   if (query->size == 0)
+   {
+       res = (text *) palloc(VARHDRSZ);
+       VARATT_SIZEP(res) = VARHDRSZ;
+       PG_RETURN_POINTER(res);
+   }
+
+   q = clean_NOT_v2(GETQUERY(query), &len);
+
+   if (!q)
+   {
+       res = (text *) palloc(1 + VARHDRSZ);
+       VARATT_SIZEP(res) = 1 + VARHDRSZ;
+       *((char *) VARDATA(res)) = 'T';
+   }
+   else
+   {
+       nrm.curpol = q;
+       nrm.buflen = 32;
+       nrm.cur = nrm.buf = (char *) palloc(sizeof(char) * nrm.buflen);
+       *(nrm.cur) = '\0';
+       nrm.op = GETOPERAND(query);
+       infix(&nrm, true);
+
+       res = (text *) palloc(nrm.cur - nrm.buf + VARHDRSZ);
+       VARATT_SIZEP(res) = nrm.cur - nrm.buf + VARHDRSZ;
+       strncpy(VARDATA(res), nrm.buf, nrm.cur - nrm.buf);
+       pfree(q);
+   }
+
+   PG_FREE_IF_COPY(query, 0);
+
+   PG_RETURN_POINTER(res);
+}
+
+Datum
+to_tsquery(PG_FUNCTION_ARGS) {
+   text    *in = PG_GETARG_TEXT_P(1);
+   char *str;
+   QUERYTYPE  *query;
+   ITEM       *res;
+   int4        len;
+
+   str=text2char(in);
+   PG_FREE_IF_COPY(in,1);
+
+   query = queryin(str, pushval_morph, PG_GETARG_INT32(0));
+   res = clean_fakeval_v2(GETQUERY(query), &len);
+   if (!res)
+   {
+       query->len = HDRSIZEQT;
+       query->size = 0;
+       PG_RETURN_POINTER(query);
+   }
+   memcpy((void *) GETQUERY(query), (void *) res, len * sizeof(ITEM));
+   pfree(res);
+   PG_RETURN_POINTER(query);
+}
+
+Datum
+to_tsquery_name(PG_FUNCTION_ARGS) {
+   text *name=PG_GETARG_TEXT_P(0);
+   Datum res= DirectFunctionCall2(
+       to_tsquery,
+       Int32GetDatum( name2id_cfg(name) ),
+       PG_GETARG_DATUM(1)
+   );
+   
+   PG_FREE_IF_COPY(name,1);
+   PG_RETURN_DATUM(res);
+}
+
+Datum
+to_tsquery_current(PG_FUNCTION_ARGS) {
+   PG_RETURN_DATUM( DirectFunctionCall2(
+       to_tsquery,
+       Int32GetDatum( get_currcfg() ),
+       PG_GETARG_DATUM(0)
+   ));
+}
+
+


diff --git a/contrib/tsearch2/query.h b/contrib/tsearch2/query.h

new file mode 100644 (file)

index 0000000..c0715a2


--- /dev/null
+++ b/contrib/tsearch2/query.h
@@ -0,0 +1,55 @@
+#ifndef __QUERY_H__
+#define __QUERY_H__
+/*
+#define BS_DEBUG
+*/
+
+
+/*
+ * item in polish notation with back link
+ * to left operand
+ */
+typedef struct ITEM
+{
+   int8        type;
+   int8        weight;
+   int2        left;
+   int4        val;
+   /* user-friendly value, must correlate with WordEntry */
+   uint32  
+       unused:1,
+       length:11,
+       distance:20;
+}  ITEM;
+
+/*
+ *Storage:
+ * (len)(size)(array of ITEM)(array of operand in user-friendly form)
+ */
+typedef struct
+{
+   int4        len;
+   int4        size;
+   char        data[1];
+}  QUERYTYPE;
+
+#define HDRSIZEQT  ( 2*sizeof(int4) )
+#define COMPUTESIZE(size,lenofoperand) ( HDRSIZEQT + size * sizeof(ITEM) + lenofoperand )
+#define GETQUERY(x)  (ITEM*)( (char*)(x)+HDRSIZEQT )
+#define GETOPERAND(x)  ( (char*)GETQUERY(x) + ((QUERYTYPE*)x)->size * sizeof(ITEM) )
+
+#define ISOPERATOR(x) ( (x)=='!' || (x)=='&' || (x)=='|' || (x)=='(' || (x)==')' )
+
+#define END                0
+#define ERR                1
+#define VAL                2
+#define OPR                3
+#define OPEN           4
+#define CLOSE          5
+#define VALTRUE            6       /* for stop words */
+#define VALFALSE       7
+
+bool TS_execute(ITEM * curitem, void *checkval,
+       bool calcnot, bool (*chkcond) (void *checkval, ITEM * val));
+
+#endif


diff --git a/contrib/tsearch2/rank.c b/contrib/tsearch2/rank.c

new file mode 100644 (file)

index 0000000..b73f400


--- /dev/null
+++ b/contrib/tsearch2/rank.c
@@ -0,0 +1,591 @@
+/*
+ * Relevation
+ * Teodor Sigaev 
+ */
+#include "postgres.h"
+#include 
+
+#include "access/gist.h"
+#include "access/itup.h"
+#include "utils/elog.h"
+#include "utils/palloc.h"
+#include "utils/builtins.h"
+#include "fmgr.h"
+#include "funcapi.h"
+#include "storage/bufpage.h"
+#include "executor/spi.h"
+#include "commands/trigger.h"
+#include "nodes/pg_list.h"
+#include "catalog/namespace.h"
+
+#include "utils/array.h"
+
+#include "tsvector.h"
+#include "query.h"
+#include "common.h"
+
+PG_FUNCTION_INFO_V1(rank);
+Datum      rank(PG_FUNCTION_ARGS);
+
+PG_FUNCTION_INFO_V1(rank_def);
+Datum      rank_def(PG_FUNCTION_ARGS);
+
+PG_FUNCTION_INFO_V1(rank_cd);
+Datum      rank_cd(PG_FUNCTION_ARGS);
+
+PG_FUNCTION_INFO_V1(rank_cd_def);
+Datum      rank_cd_def(PG_FUNCTION_ARGS);
+
+PG_FUNCTION_INFO_V1(get_covers);
+Datum      get_covers(PG_FUNCTION_ARGS);
+
+static float weights[]={0.1, 0.2, 0.4, 1.0};
+
+#define wpos(wep)  ( w[ ((WordEntryPos*)(wep))->weight ] )
+
+#define DEF_NORM_METHOD    0
+
+/*
+ * Returns a weight of a word collocation
+ */
+static float4 word_distance ( int4 w ) {
+   if ( w>100 )
+   return 1e-30;
+
+   return 1.0/(1.005+0.05*exp( ((float4)w)/1.5-2) );
+}
+
+static int
+cnt_length( tsvector *t ) {
+   WordEntry   *ptr=ARRPTR(t), *end=(WordEntry*)STRPTR(t);
+   int len = 0, clen;
+
+   while(ptr < end) {
+       if ( (clen=POSDATALEN(t, ptr)) == 0 )
+           len += 1;
+       else
+           len += clen;
+       ptr++;
+   }
+
+   return len;
+}
+
+static int4
+WordECompareITEM(char *eval, char *qval, WordEntry * ptr, ITEM * item) {
+        if (ptr->len == item->length)
+                return strncmp(
+                                           eval + ptr->pos,
+                                           qval + item->distance,
+                                           item->length);
+
+        return (ptr->len > item->length) ? 1 : -1;
+}
+
+static WordEntry*
+find_wordentry(tsvector *t, QUERYTYPE *q, ITEM *item) {
+        WordEntry  *StopLow = ARRPTR(t);
+        WordEntry  *StopHigh = (WordEntry*)STRPTR(t);
+        WordEntry  *StopMiddle;
+        int                     difference;
+
+        /* Loop invariant: StopLow <= item < StopHigh */
+
+        while (StopLow < StopHigh)
+        {
+                StopMiddle = StopLow + (StopHigh - StopLow) / 2;
+                difference = WordECompareITEM(STRPTR(t), GETOPERAND(q), StopMiddle, item);
+                if (difference == 0)
+                        return StopMiddle;
+                else if (difference < 0)
+                        StopLow = StopMiddle + 1;
+                else
+                        StopHigh = StopMiddle;
+        }
+
+        return NULL;
+}
+
+static WordEntryPos    POSNULL[]={
+   {0,0},
+   {0,MAXENTRYPOS-1}
+};
+
+static float
+calc_rank_and(float *w, tsvector *t, QUERYTYPE *q) {
+   uint16 **pos=(uint16**)palloc(sizeof(uint16*) * q->size);
+   int i,k,l,p;
+   WordEntry *entry;
+   WordEntryPos    *post,*ct;
+   int4    dimt,lenct,dist;
+   float res=-1.0;
+   ITEM    *item=GETQUERY(q);
+
+   memset(pos,0,sizeof(uint16**) * q->size);
+   *(uint16*)POSNULL = lengthof(POSNULL)-1;
+
+   for(i=0; isize; i++) {
+       
+       if ( item[i].type != VAL )
+           continue;
+
+       entry=find_wordentry(t,q,&(item[i]));
+       if ( !entry )
+           continue;
+
+       if ( entry->haspos )
+           pos[i] = (uint16*)_POSDATAPTR(t,entry);
+       else
+           pos[i] = (uint16*)POSNULL;
+
+
+       dimt = *(uint16*)(pos[i]);
+       post = (WordEntryPos*)(pos[i]+1);
+       for( k=0; k
+           if ( !pos[k] ) continue;
+           lenct = *(uint16*)(pos[k]);
+           ct = (WordEntryPos*)(pos[k]+1);
+           for(l=0; l
+               for(p=0; p
+                   dist = abs( post[l].pos - ct[p].pos );
+                   if ( dist || (dist==0 && (pos[i]==(uint16*)POSNULL || pos[k]==(uint16*)POSNULL) ) ) {
+                       float curw; 
+                       if ( !dist ) dist=MAXENTRYPOS;  
+                       curw= sqrt( wpos(&(post[l])) * wpos( &(ct[p]) ) * word_distance(dist) );
+                       res = ( res < 0 ) ? curw : 1.0 - ( 1.0 - res ) * ( 1.0 - curw );
+                   }
+               }
+           }
+       }
+   }
+   pfree(pos);
+   return res; 
+}
+
+static float
+calc_rank_or(float *w, tsvector *t, QUERYTYPE *q) {
+   WordEntry *entry;
+   WordEntryPos    *post;
+   int4    dimt,j,i;
+   float res=-1.0;
+   ITEM    *item=GETQUERY(q);
+
+   *(uint16*)POSNULL = lengthof(POSNULL)-1;
+
+   for(i=0; isize; i++) {
+       if ( item[i].type != VAL )
+           continue;
+
+       entry=find_wordentry(t,q,&(item[i]));
+       if ( !entry )
+           continue;
+
+       if ( entry->haspos ) {
+           dimt = POSDATALEN(t,entry);
+           post = POSDATAPTR(t,entry);
+       } else {
+           dimt = *(uint16*)POSNULL;
+           post = POSNULL+1;
+       }
+
+       for(j=0;j
+           if ( res < 0 )
+               res = wpos( &(post[j]) );
+           else
+               res = 1.0 - ( 1.0-res ) * ( 1.0-wpos( &(post[j]) ) );
+       }
+   }
+   return res;
+}
+
+static float
+calc_rank(float *w, tsvector *t, QUERYTYPE *q, int4 method) {
+   ITEM *item = GETQUERY(q);
+   float res=0.0;
+
+   if (!t->size || !q->size)
+       return 0.0;
+
+   res = ( item->type != VAL && item->val == (int4) '&' ) ?
+       calc_rank_and(w,t,q) : calc_rank_or(w,t,q);
+
+   if ( res < 0 )
+       res = 1e-20;
+
+        switch(method) {
+       case 0: break;
+       case 1: res /= log((float)cnt_length(t)); break;
+       case 2: res /= (float)cnt_length(t); break;
+       default:
+       elog(ERROR,"Unknown normalization method: %d",method);
+        }
+
+   return res;
+}
+
+Datum
+rank(PG_FUNCTION_ARGS) {
+   ArrayType  *win = (ArrayType *) PG_DETOAST_DATUM(PG_GETARG_DATUM(0));
+   tsvector       *txt = (tsvector *) PG_DETOAST_DATUM(PG_GETARG_DATUM(1));
+   QUERYTYPE  *query = (QUERYTYPE *) PG_DETOAST_DATUM(PG_GETARG_DATUM(2));
+   int method=DEF_NORM_METHOD;
+   float res=0.0;
+   float ws[ lengthof(weights) ];
+   int i;
+
+   if ( ARR_NDIM(win) != 1 ) 
+       elog(ERROR,"Array of weight is not one dimentional");
+   if ( ARRNELEMS(win) < lengthof(weights) )
+        elog(ERROR,"Array of weight is too short");
+
+   for(i=0;i
+       ws[ i ] = ( ((float4*)ARR_DATA_PTR(win))[i] >= 0 ) ? ((float4*)ARR_DATA_PTR(win))[i] : weights[i];
+       if ( ws[ i ] > 1.0 ) 
+           elog(ERROR,"Weight out of range");
+   } 
+
+   if ( PG_NARGS() == 4 )
+       method=PG_GETARG_INT32(3);
+
+   res=calc_rank(ws, txt, query, method); 
+       
+   PG_FREE_IF_COPY(win, 0);
+   PG_FREE_IF_COPY(txt, 1);
+   PG_FREE_IF_COPY(query, 2);
+   PG_RETURN_FLOAT4(res);
+}
+
+Datum
+rank_def(PG_FUNCTION_ARGS) {
+   tsvector       *txt = (tsvector *) PG_DETOAST_DATUM(PG_GETARG_DATUM(0));
+   QUERYTYPE  *query = (QUERYTYPE *) PG_DETOAST_DATUM(PG_GETARG_DATUM(1));
+   float res=0.0;
+   int method=DEF_NORM_METHOD;
+
+   if ( PG_NARGS() == 3 )
+       method=PG_GETARG_INT32(2);
+
+   res=calc_rank(weights, txt, query, method); 
+       
+   PG_FREE_IF_COPY(txt, 0);
+   PG_FREE_IF_COPY(query, 1);
+   PG_RETURN_FLOAT4(res);
+}
+
+
+typedef struct {
+   ITEM    *item;
+   int32   pos;
+} DocRepresentation;
+
+static int
+compareDocR(const void *a, const void *b) {
+   if ( ((DocRepresentation *) a)->pos == ((DocRepresentation *) b)->pos )
+       return 1;
+   return ( ((DocRepresentation *) a)->pos > ((DocRepresentation *) b)->pos ) ? 1 : -1;
+}
+
+
+typedef struct {
+   DocRepresentation *doc;
+   int len;
+}  ChkDocR;
+
+static bool
+checkcondition_DR(void *checkval, ITEM *val) {
+   DocRepresentation *ptr = ((ChkDocR*)checkval)->doc;
+
+   while( ptr - ((ChkDocR*)checkval)->doc < ((ChkDocR*)checkval)->len ) {
+       if ( val == ptr->item )
+           return true;
+       ptr++;
+   }   
+
+   return false;
+}
+
+
+static bool
+Cover(DocRepresentation *doc, int len, QUERYTYPE *query, int *pos, int *p, int *q) {
+   int i;
+   DocRepresentation   *ptr,*f=(DocRepresentation*)0xffffffff;
+   ITEM    *item=GETQUERY(query);
+   int lastpos=*pos;
+   int oldq=*q;
+
+   *p=0x7fffffff;
+   *q=0;
+
+   for(i=0; isize; i++) {
+       if ( item->type != VAL ) {
+           item++;
+           continue;
+       }
+       ptr = doc + *pos;
+
+       while(ptr-doc
+           if ( ptr->item == item ) {
+               if ( ptr->pos > *q ) {
+                   *q = ptr->pos;
+                   lastpos= ptr - doc;
+               } 
+               break;
+           } 
+           ptr++;
+       }
+
+       item++;
+   }
+
+   if (*q==0 )
+       return false;
+
+   if (*q==oldq) { /* already check this pos */
+       (*pos)++;
+       return Cover(doc, len, query, pos,p,q);
+   } 
+
+   item=GETQUERY(query);
+   for(i=0; isize; i++) {
+       if ( item->type != VAL ) {
+           item++;
+           continue;
+       }
+       ptr = doc + lastpos;
+
+       while(ptr>=doc+*pos) {
+           if ( ptr->item == item ) {
+               if ( ptr->pos < *p ) {
+                   *p = ptr->pos;
+                   f=ptr;
+               }
+               break;
+           }
+           ptr--;
+       }
+       item++;
+   }
+ 
+   if ( *p<=*q ) {
+       ChkDocR ch = { f, (doc + lastpos)-f+1 };
+       *pos = f-doc+1;
+       if ( TS_execute(GETQUERY(query), &ch, false, checkcondition_DR) ) { 
+ /*elog(NOTICE,"OP:%d NP:%d P:%d Q:%d", *pos, lastpos, *p, *q);*/ 
+           return true;
+       } else
+           return Cover(doc, len, query, pos,p,q); 
+   }
+ 
+   return false;
+}
+
+static DocRepresentation*
+get_docrep(tsvector     *txt, QUERYTYPE  *query, int *doclen) {
+   ITEM    *item=GETQUERY(query);
+   WordEntry *entry;
+   WordEntryPos    *post;
+   int4    dimt,j,i;
+   int len=query->size*4,cur=0;
+   DocRepresentation *doc;
+
+   *(uint16*)POSNULL = lengthof(POSNULL)-1;
+   doc = (DocRepresentation*)palloc(sizeof(DocRepresentation)*len);
+   for(i=0; isize; i++) {
+       if ( item[i].type != VAL )
+           continue;
+
+       entry=find_wordentry(txt,query,&(item[i]));
+       if ( !entry )
+           continue;
+
+       if ( entry->haspos ) {
+           dimt = POSDATALEN(txt,entry);
+           post = POSDATAPTR(txt,entry);
+       } else {
+           dimt = *(uint16*)POSNULL;
+           post = POSNULL+1;
+       }
+
+       while( cur+dimt >= len ) {
+           len*=2;
+           doc = (DocRepresentation*)repalloc(doc,sizeof(DocRepresentation)*len);
+       }
+
+       for(j=0;j
+           doc[cur].item=&(item[i]);
+           doc[cur].pos=post[j].pos;
+           cur++;
+       }
+   }
+
+   *doclen=cur;
+   
+   if ( cur>0 ) {
+       if ( cur>1 ) 
+           qsort((void *) doc, cur, sizeof(DocRepresentation), compareDocR);
+       return doc;
+   }
+   
+   pfree(doc);
+   return NULL;
+}
+
+
+Datum
+rank_cd(PG_FUNCTION_ARGS) {
+   int K = PG_GETARG_INT32(0);
+   tsvector       *txt = (tsvector *) PG_DETOAST_DATUM(PG_GETARG_DATUM(1));
+   QUERYTYPE  *query = (QUERYTYPE *) PG_DETOAST_DATUM(PG_GETARG_DATUM(2));
+   int method=DEF_NORM_METHOD;
+   DocRepresentation   *doc;
+   float   res=0.0;
+   int p=0,q=0,len,cur;
+
+   doc = get_docrep(txt, query, &len);
+   if ( !doc ) {
+       PG_FREE_IF_COPY(txt, 1);
+       PG_FREE_IF_COPY(query, 2);
+       PG_RETURN_FLOAT4(0.0);
+   }
+
+   cur=0;
+   if (K<=0)
+       K=4;    
+   while( Cover(doc, len, query, &cur, &p, &q) ) 
+       res += ( q-p+1 > K ) ? ((float)K)/((float)(q-p+1)) : 1.0;
+
+   if ( PG_NARGS() == 4 )
+       method=PG_GETARG_INT32(3);
+
+        switch(method) {
+       case 0: break;
+       case 1: res /= log((float)cnt_length(txt)); break;
+       case 2: res /= (float)cnt_length(txt); break;
+       default:
+       elog(ERROR,"Unknown normalization method: %d",method);
+        }
+
+   pfree(doc);
+   PG_FREE_IF_COPY(txt, 1);
+   PG_FREE_IF_COPY(query, 2);
+
+   PG_RETURN_FLOAT4(res);
+}
+
+
+Datum
+rank_cd_def(PG_FUNCTION_ARGS) {
+   PG_RETURN_DATUM( DirectFunctionCall4(   
+       rank_cd,
+       Int32GetDatum(-1),
+       PG_GETARG_DATUM(0),
+       PG_GETARG_DATUM(1),
+       ( PG_NARGS() == 3 ) ? PG_GETARG_DATUM(2) : Int32GetDatum(DEF_NORM_METHOD)
+   )); 
+}
+
+/**************debug*************/
+
+typedef struct {
+   char    *w;
+   int2    len;
+   int2    pos;
+   int2    start;
+   int2    finish;
+} DocWord;
+
+static int
+compareDocWord(const void *a, const void *b) {
+   if ( ((DocWord *) a)->pos == ((DocWord *) b)->pos )
+       return 1;
+   return ( ((DocWord *) a)->pos > ((DocWord *) b)->pos ) ? 1 : -1;
+}
+
+
+Datum 
+get_covers(PG_FUNCTION_ARGS) {
+   tsvector     *txt = (tsvector *) PG_DETOAST_DATUM(PG_GETARG_DATUM(0));
+   QUERYTYPE  *query = (QUERYTYPE *) PG_DETOAST_DATUM(PG_GETARG_DATUM(1));
+   WordEntry       *pptr=ARRPTR(txt);
+   int i,dlen=0,j,cur=0,len=0,rlen;
+   DocWord *dw,*dwptr;
+   text    *out;
+   char *cptr;
+   DocRepresentation *doc;
+   int pos=0,p,q,olddwpos=0;
+   int ncover=1;
+
+   doc = get_docrep(txt, query, &rlen);
+
+   if ( !doc ) {
+       out=palloc(VARHDRSZ);
+       VARATT_SIZEP(out) = VARHDRSZ;
+       PG_FREE_IF_COPY(txt,0);
+       PG_FREE_IF_COPY(query,1);
+       PG_RETURN_POINTER(out);
+   }
+
+   for(i=0;isize;i++) {
+       if (!pptr[i].haspos)
+           elog(ERROR,"No pos info");
+        dlen += POSDATALEN(txt,&(pptr[i]));
+   }
+
+   dwptr=dw=palloc(sizeof(DocWord)*dlen);
+   memset(dw,0,sizeof(DocWord)*dlen);
+
+   for(i=0;isize;i++) {
+       WordEntryPos    *posdata = POSDATAPTR(txt,&(pptr[i]));
+       for(j=0;j
+           dw[cur].w=STRPTR(txt)+pptr[i].pos;  
+           dw[cur].len=pptr[i].len;    
+           dw[cur].pos=posdata[j].pos;
+           cur++;
+       }
+       len+=(pptr[i].len + 1) * (int)POSDATALEN(txt,&(pptr[i]));
+   }
+   qsort((void *) dw, dlen, sizeof(DocWord), compareDocWord);
+
+   while( Cover(doc, rlen, query, &pos, &p, &q) ) {
+       dwptr=dw+olddwpos;
+       while(dwptr->pos < p && dwptr-dw
+           dwptr++;
+       olddwpos=dwptr-dw;
+       dwptr->start=ncover;
+       while(dwptr->pos < q+1 && dwptr-dw
+           dwptr++;
+       (dwptr-1)->finish=ncover;
+       len+= 4 /* {}+two spaces */ + 2*16 /*numbers*/;
+       ncover++; 
+   } 
+   
+   out=palloc(VARHDRSZ+len);
+   cptr=((char*)out)+VARHDRSZ;
+   dwptr=dw;
+
+   while( dwptr-dw < dlen) {
+       if ( dwptr->start ) {
+           sprintf(cptr,"{%d ",dwptr->start);
+           cptr=strchr(cptr,'\0');
+       }
+       memcpy(cptr,dwptr->w,dwptr->len);
+       cptr+=dwptr->len;
+       *cptr=' ';
+       cptr++;
+       if ( dwptr->finish ) { 
+           sprintf(cptr,"}%d ",dwptr->finish);
+           cptr=strchr(cptr,'\0');
+       }
+       dwptr++;
+   }   
+
+   VARATT_SIZEP(out) = cptr - ((char*)out);
+   
+   pfree(dw);
+   pfree(doc);
+
+   PG_FREE_IF_COPY(txt,0);
+   PG_FREE_IF_COPY(query,1);
+   PG_RETURN_POINTER(out);
+}
+


diff --git a/contrib/tsearch2/rewrite.c b/contrib/tsearch2/rewrite.c

new file mode 100644 (file)

index 0000000..d5bc0f6


--- /dev/null
+++ b/contrib/tsearch2/rewrite.c
@@ -0,0 +1,292 @@
+/*
+ * Rewrite routines of query tree
+ * Teodor Sigaev 
+ */
+
+#include "postgres.h"
+
+#include 
+
+#include "access/gist.h"
+#include "access/itup.h"
+#include "access/rtree.h"
+#include "utils/elog.h"
+#include "utils/palloc.h"
+#include "utils/array.h"
+#include "utils/builtins.h"
+#include "storage/bufpage.h"
+
+#include "query.h"
+#include "rewrite.h"
+
+typedef struct NODE
+{
+   struct NODE *left;
+   struct NODE *right;
+   ITEM       *valnode;
+}  NODE;
+
+/*
+ * make query tree from plain view of query
+ */
+static NODE *
+maketree(ITEM * in)
+{
+   NODE       *node = (NODE *) palloc(sizeof(NODE));
+
+   node->valnode = in;
+   node->right = node->left = NULL;
+   if (in->type == OPR)
+   {
+       node->right = maketree(in + 1);
+       if (in->val != (int4) '!')
+           node->left = maketree(in + in->left);
+   }
+   return node;
+}
+
+typedef struct
+{
+   ITEM       *ptr;
+   int4        len;
+   int4        cur;
+}  PLAINTREE;
+
+static void
+plainnode(PLAINTREE * state, NODE * node)
+{
+   if (state->cur == state->len)
+   {
+       state->len *= 2;
+       state->ptr = (ITEM *) repalloc((void *) state->ptr, state->len * sizeof(ITEM));
+   }
+   memcpy((void *) &(state->ptr[state->cur]), (void *) node->valnode, sizeof(ITEM));
+   if (node->valnode->type == VAL)
+       state->cur++;
+   else if (node->valnode->val == (int4) '!')
+   {
+       state->ptr[state->cur].left = 1;
+       state->cur++;
+       plainnode(state, node->right);
+   }
+   else
+   {
+       int4        cur = state->cur;
+
+       state->cur++;
+       plainnode(state, node->right);
+       state->ptr[cur].left = state->cur - cur;
+       plainnode(state, node->left);
+   }
+   pfree(node);
+}
+
+/*
+ * make plain view of tree from 'normal' view of tree
+ */
+static ITEM *
+plaintree(NODE * root, int4 *len)
+{
+   PLAINTREE   pl;
+
+   pl.cur = 0;
+   pl.len = 16;
+   if (root && (root->valnode->type == VAL || root->valnode->type == OPR))
+   {
+       pl.ptr = (ITEM *) palloc(pl.len * sizeof(ITEM));
+       plainnode(&pl, root);
+   }
+   else
+       pl.ptr = NULL;
+   *len = pl.cur;
+   return pl.ptr;
+}
+
+static void
+freetree(NODE * node)
+{
+   if (!node)
+       return;
+   if (node->left)
+       freetree(node->left);
+   if (node->right)
+       freetree(node->right);
+   pfree(node);
+}
+
+/*
+ * clean tree for ! operator.
+ * It's usefull for debug, but in
+ * other case, such view is used with search in index.
+ * Operator ! always return TRUE
+ */
+static NODE *
+clean_NOT_intree(NODE * node)
+{
+   if (node->valnode->type == VAL)
+       return node;
+
+   if (node->valnode->val == (int4) '!')
+   {
+       freetree(node);
+       return NULL;
+   }
+
+   /* operator & or | */
+   if (node->valnode->val == (int4) '|')
+   {
+       if ((node->left = clean_NOT_intree(node->left)) == NULL ||
+           (node->right = clean_NOT_intree(node->right)) == NULL)
+       {
+           freetree(node);
+           return NULL;
+       }
+   }
+   else
+   {
+       NODE       *res = node;
+
+       node->left = clean_NOT_intree(node->left);
+       node->right = clean_NOT_intree(node->right);
+       if (node->left == NULL && node->right == NULL)
+       {
+           pfree(node);
+           res = NULL;
+       }
+       else if (node->left == NULL)
+       {
+           res = node->right;
+           pfree(node);
+       }
+       else if (node->right == NULL)
+       {
+           res = node->left;
+           pfree(node);
+       }
+       return res;
+   }
+   return node;
+}
+
+ITEM *
+clean_NOT_v2(ITEM * ptr, int4 *len)
+{
+   NODE       *root = maketree(ptr);
+
+   return plaintree(clean_NOT_intree(root), len);
+}
+
+#define V_UNKNOWN  0
+#define V_TRUE     1
+#define V_FALSE        2
+
+/*
+ * Clean query tree from values which is always in
+ * text (stopword)
+ */
+static NODE *
+clean_fakeval_intree(NODE * node, char *result)
+{
+   char        lresult = V_UNKNOWN,
+               rresult = V_UNKNOWN;
+
+   if (node->valnode->type == VAL)
+       return node;
+   else if (node->valnode->type == VALTRUE)
+   {
+       pfree(node);
+       *result = V_TRUE;
+       return NULL;
+   }
+
+
+   if (node->valnode->val == (int4) '!')
+   {
+       node->right = clean_fakeval_intree(node->right, &rresult);
+       if (!node->right)
+       {
+           *result = (rresult == V_TRUE) ? V_FALSE : V_TRUE;
+           freetree(node);
+           return NULL;
+       }
+   }
+   else if (node->valnode->val == (int4) '|')
+   {
+       NODE       *res = node;
+
+       node->left = clean_fakeval_intree(node->left, &lresult);
+       node->right = clean_fakeval_intree(node->right, &rresult);
+       if (lresult == V_TRUE || rresult == V_TRUE)
+       {
+           freetree(node);
+           *result = V_TRUE;
+           return NULL;
+       }
+       else if (lresult == V_FALSE && rresult == V_FALSE)
+       {
+           freetree(node);
+           *result = V_FALSE;
+           return NULL;
+       }
+       else if (lresult == V_FALSE)
+       {
+           res = node->right;
+           pfree(node);
+       }
+       else if (rresult == V_FALSE)
+       {
+           res = node->left;
+           pfree(node);
+       }
+       return res;
+   }
+   else
+   {
+       NODE       *res = node;
+
+       node->left = clean_fakeval_intree(node->left, &lresult);
+       node->right = clean_fakeval_intree(node->right, &rresult);
+       if (lresult == V_FALSE || rresult == V_FALSE)
+       {
+           freetree(node);
+           *result = V_FALSE;
+           return NULL;
+       }
+       else if (lresult == V_TRUE && rresult == V_TRUE)
+       {
+           freetree(node);
+           *result = V_TRUE;
+           return NULL;
+       }
+       else if (lresult == V_TRUE)
+       {
+           res = node->right;
+           pfree(node);
+       }
+       else if (rresult == V_TRUE)
+       {
+           res = node->left;
+           pfree(node);
+       }
+       return res;
+   }
+   return node;
+}
+
+ITEM *
+clean_fakeval_v2(ITEM * ptr, int4 *len)
+{
+   NODE       *root = maketree(ptr);
+   char        result = V_UNKNOWN;
+   NODE       *resroot;
+
+   resroot = clean_fakeval_intree(root, &result);
+   if (result != V_UNKNOWN)
+   {
+       elog(NOTICE, "Query contains only stopword(s) or doesn't contain lexem(s), ignored");
+       *len = 0;
+       return NULL;
+   }
+
+   return plaintree(resroot, len);
+}


diff --git a/contrib/tsearch2/rewrite.h b/contrib/tsearch2/rewrite.h

new file mode 100644 (file)

index 0000000..d47788a


--- /dev/null
+++ b/contrib/tsearch2/rewrite.h
@@ -0,0 +1,7 @@
+#ifndef __REWRITE_H__
+#define __REWRITE_H__
+
+ITEM      *clean_NOT_v2(ITEM * ptr, int4 *len);
+ITEM      *clean_fakeval_v2(ITEM * ptr, int4 *len);
+
+#endif


diff --git a/contrib/tsearch2/snmap.c b/contrib/tsearch2/snmap.c

new file mode 100644 (file)

index 0000000..fe138ad


--- /dev/null
+++ b/contrib/tsearch2/snmap.c
@@ -0,0 +1,75 @@
+/* 
+ * simple but fast map from str to Oid
+ * Teodor Sigaev 
+ */
+#include 
+#include 
+#include 
+
+#include "postgres.h"
+#include "snmap.h"
+#include "common.h"
+
+static int
+compareSNMapEntry(const void *a, const void *b) {
+   return strcmp( ((SNMapEntry*)a)->key, ((SNMapEntry*)b)->key );
+}
+
+void 
+addSNMap( SNMap *map, char *key, Oid value ) {
+   if (map->len>=map->reallen) {
+       SNMapEntry *tmp;
+       int len = (map->reallen) ? 2*map->reallen : 16;
+       tmp=(SNMapEntry*)realloc(map->list, sizeof(SNMapEntry) * len);
+       if ( !tmp )
+           elog(ERROR, "No memory");
+       map->reallen=len;
+       map->list=tmp;
+   }
+   map->list[ map->len ].key = strdup(key);
+   if ( ! map->list[ map->len ].key )
+       elog(ERROR, "No memory");
+   map->list[ map->len ].value=value;
+   map->len++;
+   if ( map->len>1 ) qsort(map->list, map->len, sizeof(SNMapEntry), compareSNMapEntry);
+}
+
+void 
+addSNMap_t( SNMap *map, text *key, Oid value ) {
+   char *k=text2char( key );
+   addSNMap(map, k, value);
+   pfree(k);
+}
+
+Oid 
+findSNMap( SNMap *map, char *key ) {
+   SNMapEntry *ptr;
+   SNMapEntry ks = {key, 0};
+   if ( map->len==0 || !map->list )
+       return 0;   
+   ptr = (SNMapEntry*) bsearch(&ks, map->list, map->len, sizeof(SNMapEntry), compareSNMapEntry);
+   return (ptr) ? ptr->value : 0;
+}
+
+Oid  
+findSNMap_t( SNMap *map, text *key ) {
+   char *k=text2char(key);
+   int res;
+   res= findSNMap(map, k);
+   pfree(k);
+   return res;
+}
+
+void freeSNMap( SNMap *map ) {
+   SNMapEntry *entry=map->list;
+   if ( map->list ) {
+       while( map->len ) {
+           if ( entry->key ) free(entry->key);
+           entry++; map->len--;
+       }
+       free( map->list );
+   }
+   memset(map,0,sizeof(SNMap));
+}
+
+


diff --git a/contrib/tsearch2/snmap.h b/contrib/tsearch2/snmap.h

new file mode 100644 (file)

index 0000000..b485601


--- /dev/null
+++ b/contrib/tsearch2/snmap.h
@@ -0,0 +1,23 @@
+#ifndef __SNMAP_H__
+#define __SNMAP_H__
+
+#include "postgres.h"
+
+typedef struct {
+   char    *key;
+   Oid value;
+} SNMapEntry;
+
+typedef struct {
+   int len;
+   int reallen;
+   SNMapEntry  *list;
+} SNMap;
+
+void addSNMap( SNMap *map, char *key, Oid value );
+void addSNMap_t( SNMap *map, text *key, Oid value );
+Oid findSNMap( SNMap *map, char *key );
+Oid findSNMap_t( SNMap *map, text *key );
+void freeSNMap( SNMap *map );
+
+#endif


diff --git a/contrib/tsearch2/snowball/api.c b/contrib/tsearch2/snowball/api.c

new file mode 100644 (file)

index 0000000..c9019ce


--- /dev/null
+++ b/contrib/tsearch2/snowball/api.c
@@ -0,0 +1,48 @@
+
+#include "header.h"
+
+extern struct SN_env * SN_create_env(int S_size, int I_size, int B_size)
+{   struct SN_env * z = (struct SN_env *) calloc(1, sizeof(struct SN_env));
+    z->p = create_s();
+    if (S_size)
+    {   z->S = (symbol * *) calloc(S_size, sizeof(symbol *));
+        {   int i;
+            for (i = 0; i < S_size; i++) z->S[i] = create_s();
+        }
+        z->S_size = S_size;
+    }
+
+    if (I_size)
+    {   z->I = (int *) calloc(I_size, sizeof(int));
+        z->I_size = I_size;
+    }
+
+    if (B_size)
+    {   z->B = (symbol *) calloc(B_size, sizeof(symbol));
+        z->B_size = B_size;
+    }
+
+    return z;
+}
+
+extern void SN_close_env(struct SN_env * z)
+{
+    if (z->S_size)
+    {
+        {   int i;
+            for (i = 0; i < z->S_size; i++) lose_s(z->S[i]);
+        }
+        free(z->S);
+    }
+    if (z->I_size) free(z->I);
+    if (z->B_size) free(z->B);
+    if (z->p) lose_s(z->p);
+    free(z);
+}
+
+extern void SN_set_current(struct SN_env * z, int size, const symbol * s)
+{
+    replace_s(z, 0, z->l, size, s);
+    z->c = 0;
+}
+


diff --git a/contrib/tsearch2/snowball/api.h b/contrib/tsearch2/snowball/api.h

new file mode 100644 (file)

index 0000000..3e8b6e1


--- /dev/null
+++ b/contrib/tsearch2/snowball/api.h
@@ -0,0 +1,27 @@
+
+typedef unsigned char symbol;
+
+/* Or replace 'char' above with 'short' for 16 bit characters.
+
+   More precisely, replace 'char' with whatever type guarantees the
+   character width you need. Note however that sizeof(symbol) should divide
+   HEAD, defined in header.h as 2*sizeof(int), without remainder, otherwise
+   there is an alignment problem. In the unlikely event of a problem here,
+   consult Martin Porter.
+
+*/
+
+struct SN_env {
+    symbol * p;
+    int c; int a; int l; int lb; int bra; int ket;
+    int S_size; int I_size; int B_size;
+    symbol * * S;
+    int * I;
+    symbol * B;
+};
+
+extern struct SN_env * SN_create_env(int S_size, int I_size, int B_size);
+extern void SN_close_env(struct SN_env * z);
+
+extern void SN_set_current(struct SN_env * z, int size, const symbol * s);
+


diff --git a/contrib/tsearch2/snowball/english_stem.c b/contrib/tsearch2/snowball/english_stem.c

new file mode 100644 (file)

index 0000000..6715c7c


--- /dev/null
+++ b/contrib/tsearch2/snowball/english_stem.c
@@ -0,0 +1,894 @@
+
+/* This file was generated automatically by the Snowball to ANSI C compiler */
+
+#include "header.h"
+
+extern int english_stem(struct SN_env * z);
+static int r_exception2(struct SN_env * z);
+static int r_exception1(struct SN_env * z);
+static int r_Step_5(struct SN_env * z);
+static int r_Step_4(struct SN_env * z);
+static int r_Step_3(struct SN_env * z);
+static int r_Step_2(struct SN_env * z);
+static int r_Step_1c(struct SN_env * z);
+static int r_Step_1b(struct SN_env * z);
+static int r_Step_1a(struct SN_env * z);
+static int r_R2(struct SN_env * z);
+static int r_R1(struct SN_env * z);
+static int r_shortv(struct SN_env * z);
+static int r_mark_regions(struct SN_env * z);
+static int r_postlude(struct SN_env * z);
+static int r_prelude(struct SN_env * z);
+
+extern struct SN_env * english_create_env(void);
+extern void english_close_env(struct SN_env * z);
+
+static symbol s_0_0[5] = { 'g', 'e', 'n', 'e', 'r' };
+
+static struct among a_0[1] =
+{
+/*  0 */ { 5, s_0_0, -1, -1, 0}
+};
+
+static symbol s_1_0[3] = { 'i', 'e', 'd' };
+static symbol s_1_1[1] = { 's' };
+static symbol s_1_2[3] = { 'i', 'e', 's' };
+static symbol s_1_3[4] = { 's', 's', 'e', 's' };
+static symbol s_1_4[2] = { 's', 's' };
+static symbol s_1_5[2] = { 'u', 's' };
+
+static struct among a_1[6] =
+{
+/*  0 */ { 3, s_1_0, -1, 2, 0},
+/*  1 */ { 1, s_1_1, -1, 3, 0},
+/*  2 */ { 3, s_1_2, 1, 2, 0},
+/*  3 */ { 4, s_1_3, 1, 1, 0},
+/*  4 */ { 2, s_1_4, 1, -1, 0},
+/*  5 */ { 2, s_1_5, 1, -1, 0}
+};
+
+static symbol s_2_1[2] = { 'b', 'b' };
+static symbol s_2_2[2] = { 'd', 'd' };
+static symbol s_2_3[2] = { 'f', 'f' };
+static symbol s_2_4[2] = { 'g', 'g' };
+static symbol s_2_5[2] = { 'b', 'l' };
+static symbol s_2_6[2] = { 'm', 'm' };
+static symbol s_2_7[2] = { 'n', 'n' };
+static symbol s_2_8[2] = { 'p', 'p' };
+static symbol s_2_9[2] = { 'r', 'r' };
+static symbol s_2_10[2] = { 'a', 't' };
+static symbol s_2_11[2] = { 't', 't' };
+static symbol s_2_12[2] = { 'i', 'z' };
+
+static struct among a_2[13] =
+{
+/*  0 */ { 0, 0, -1, 3, 0},
+/*  1 */ { 2, s_2_1, 0, 2, 0},
+/*  2 */ { 2, s_2_2, 0, 2, 0},
+/*  3 */ { 2, s_2_3, 0, 2, 0},
+/*  4 */ { 2, s_2_4, 0, 2, 0},
+/*  5 */ { 2, s_2_5, 0, 1, 0},
+/*  6 */ { 2, s_2_6, 0, 2, 0},
+/*  7 */ { 2, s_2_7, 0, 2, 0},
+/*  8 */ { 2, s_2_8, 0, 2, 0},
+/*  9 */ { 2, s_2_9, 0, 2, 0},
+/* 10 */ { 2, s_2_10, 0, 1, 0},
+/* 11 */ { 2, s_2_11, 0, 2, 0},
+/* 12 */ { 2, s_2_12, 0, 1, 0}
+};
+
+static symbol s_3_0[2] = { 'e', 'd' };
+static symbol s_3_1[3] = { 'e', 'e', 'd' };
+static symbol s_3_2[3] = { 'i', 'n', 'g' };
+static symbol s_3_3[4] = { 'e', 'd', 'l', 'y' };
+static symbol s_3_4[5] = { 'e', 'e', 'd', 'l', 'y' };
+static symbol s_3_5[5] = { 'i', 'n', 'g', 'l', 'y' };
+
+static struct among a_3[6] =
+{
+/*  0 */ { 2, s_3_0, -1, 2, 0},
+/*  1 */ { 3, s_3_1, 0, 1, 0},
+/*  2 */ { 3, s_3_2, -1, 2, 0},
+/*  3 */ { 4, s_3_3, -1, 2, 0},
+/*  4 */ { 5, s_3_4, 3, 1, 0},
+/*  5 */ { 5, s_3_5, -1, 2, 0}
+};
+
+static symbol s_4_0[4] = { 'a', 'n', 'c', 'i' };
+static symbol s_4_1[4] = { 'e', 'n', 'c', 'i' };
+static symbol s_4_2[3] = { 'o', 'g', 'i' };
+static symbol s_4_3[2] = { 'l', 'i' };
+static symbol s_4_4[3] = { 'b', 'l', 'i' };
+static symbol s_4_5[4] = { 'a', 'b', 'l', 'i' };
+static symbol s_4_6[4] = { 'a', 'l', 'l', 'i' };
+static symbol s_4_7[5] = { 'f', 'u', 'l', 'l', 'i' };
+static symbol s_4_8[6] = { 'l', 'e', 's', 's', 'l', 'i' };
+static symbol s_4_9[5] = { 'o', 'u', 's', 'l', 'i' };
+static symbol s_4_10[5] = { 'e', 'n', 't', 'l', 'i' };
+static symbol s_4_11[5] = { 'a', 'l', 'i', 't', 'i' };
+static symbol s_4_12[6] = { 'b', 'i', 'l', 'i', 't', 'i' };
+static symbol s_4_13[5] = { 'i', 'v', 'i', 't', 'i' };
+static symbol s_4_14[6] = { 't', 'i', 'o', 'n', 'a', 'l' };
+static symbol s_4_15[7] = { 'a', 't', 'i', 'o', 'n', 'a', 'l' };
+static symbol s_4_16[5] = { 'a', 'l', 'i', 's', 'm' };
+static symbol s_4_17[5] = { 'a', 't', 'i', 'o', 'n' };
+static symbol s_4_18[7] = { 'i', 'z', 'a', 't', 'i', 'o', 'n' };
+static symbol s_4_19[4] = { 'i', 'z', 'e', 'r' };
+static symbol s_4_20[4] = { 'a', 't', 'o', 'r' };
+static symbol s_4_21[7] = { 'i', 'v', 'e', 'n', 'e', 's', 's' };
+static symbol s_4_22[7] = { 'f', 'u', 'l', 'n', 'e', 's', 's' };
+static symbol s_4_23[7] = { 'o', 'u', 's', 'n', 'e', 's', 's' };
+
+static struct among a_4[24] =
+{
+/*  0 */ { 4, s_4_0, -1, 3, 0},
+/*  1 */ { 4, s_4_1, -1, 2, 0},
+/*  2 */ { 3, s_4_2, -1, 13, 0},
+/*  3 */ { 2, s_4_3, -1, 16, 0},
+/*  4 */ { 3, s_4_4, 3, 12, 0},
+/*  5 */ { 4, s_4_5, 4, 4, 0},
+/*  6 */ { 4, s_4_6, 3, 8, 0},
+/*  7 */ { 5, s_4_7, 3, 14, 0},
+/*  8 */ { 6, s_4_8, 3, 15, 0},
+/*  9 */ { 5, s_4_9, 3, 10, 0},
+/* 10 */ { 5, s_4_10, 3, 5, 0},
+/* 11 */ { 5, s_4_11, -1, 8, 0},
+/* 12 */ { 6, s_4_12, -1, 12, 0},
+/* 13 */ { 5, s_4_13, -1, 11, 0},
+/* 14 */ { 6, s_4_14, -1, 1, 0},
+/* 15 */ { 7, s_4_15, 14, 7, 0},
+/* 16 */ { 5, s_4_16, -1, 8, 0},
+/* 17 */ { 5, s_4_17, -1, 7, 0},
+/* 18 */ { 7, s_4_18, 17, 6, 0},
+/* 19 */ { 4, s_4_19, -1, 6, 0},
+/* 20 */ { 4, s_4_20, -1, 7, 0},
+/* 21 */ { 7, s_4_21, -1, 11, 0},
+/* 22 */ { 7, s_4_22, -1, 9, 0},
+/* 23 */ { 7, s_4_23, -1, 10, 0}
+};
+
+static symbol s_5_0[5] = { 'i', 'c', 'a', 't', 'e' };
+static symbol s_5_1[5] = { 'a', 't', 'i', 'v', 'e' };
+static symbol s_5_2[5] = { 'a', 'l', 'i', 'z', 'e' };
+static symbol s_5_3[5] = { 'i', 'c', 'i', 't', 'i' };
+static symbol s_5_4[4] = { 'i', 'c', 'a', 'l' };
+static symbol s_5_5[6] = { 't', 'i', 'o', 'n', 'a', 'l' };
+static symbol s_5_6[7] = { 'a', 't', 'i', 'o', 'n', 'a', 'l' };
+static symbol s_5_7[3] = { 'f', 'u', 'l' };
+static symbol s_5_8[4] = { 'n', 'e', 's', 's' };
+
+static struct among a_5[9] =
+{
+/*  0 */ { 5, s_5_0, -1, 4, 0},
+/*  1 */ { 5, s_5_1, -1, 6, 0},
+/*  2 */ { 5, s_5_2, -1, 3, 0},
+/*  3 */ { 5, s_5_3, -1, 4, 0},
+/*  4 */ { 4, s_5_4, -1, 4, 0},
+/*  5 */ { 6, s_5_5, -1, 1, 0},
+/*  6 */ { 7, s_5_6, 5, 2, 0},
+/*  7 */ { 3, s_5_7, -1, 5, 0},
+/*  8 */ { 4, s_5_8, -1, 5, 0}
+};
+
+static symbol s_6_0[2] = { 'i', 'c' };
+static symbol s_6_1[4] = { 'a', 'n', 'c', 'e' };
+static symbol s_6_2[4] = { 'e', 'n', 'c', 'e' };
+static symbol s_6_3[4] = { 'a', 'b', 'l', 'e' };
+static symbol s_6_4[4] = { 'i', 'b', 'l', 'e' };
+static symbol s_6_5[3] = { 'a', 't', 'e' };
+static symbol s_6_6[3] = { 'i', 'v', 'e' };
+static symbol s_6_7[3] = { 'i', 'z', 'e' };
+static symbol s_6_8[3] = { 'i', 't', 'i' };
+static symbol s_6_9[2] = { 'a', 'l' };
+static symbol s_6_10[3] = { 'i', 's', 'm' };
+static symbol s_6_11[3] = { 'i', 'o', 'n' };
+static symbol s_6_12[2] = { 'e', 'r' };
+static symbol s_6_13[3] = { 'o', 'u', 's' };
+static symbol s_6_14[3] = { 'a', 'n', 't' };
+static symbol s_6_15[3] = { 'e', 'n', 't' };
+static symbol s_6_16[4] = { 'm', 'e', 'n', 't' };
+static symbol s_6_17[5] = { 'e', 'm', 'e', 'n', 't' };
+
+static struct among a_6[18] =
+{
+/*  0 */ { 2, s_6_0, -1, 1, 0},
+/*  1 */ { 4, s_6_1, -1, 1, 0},
+/*  2 */ { 4, s_6_2, -1, 1, 0},
+/*  3 */ { 4, s_6_3, -1, 1, 0},
+/*  4 */ { 4, s_6_4, -1, 1, 0},
+/*  5 */ { 3, s_6_5, -1, 1, 0},
+/*  6 */ { 3, s_6_6, -1, 1, 0},
+/*  7 */ { 3, s_6_7, -1, 1, 0},
+/*  8 */ { 3, s_6_8, -1, 1, 0},
+/*  9 */ { 2, s_6_9, -1, 1, 0},
+/* 10 */ { 3, s_6_10, -1, 1, 0},
+/* 11 */ { 3, s_6_11, -1, 2, 0},
+/* 12 */ { 2, s_6_12, -1, 1, 0},
+/* 13 */ { 3, s_6_13, -1, 1, 0},
+/* 14 */ { 3, s_6_14, -1, 1, 0},
+/* 15 */ { 3, s_6_15, -1, 1, 0},
+/* 16 */ { 4, s_6_16, 15, 1, 0},
+/* 17 */ { 5, s_6_17, 16, 1, 0}
+};
+
+static symbol s_7_0[1] = { 'e' };
+static symbol s_7_1[1] = { 'l' };
+
+static struct among a_7[2] =
+{
+/*  0 */ { 1, s_7_0, -1, 1, 0},
+/*  1 */ { 1, s_7_1, -1, 2, 0}
+};
+
+static symbol s_8_0[7] = { 's', 'u', 'c', 'c', 'e', 'e', 'd' };
+static symbol s_8_1[7] = { 'p', 'r', 'o', 'c', 'e', 'e', 'd' };
+static symbol s_8_2[6] = { 'e', 'x', 'c', 'e', 'e', 'd' };
+static symbol s_8_3[7] = { 'c', 'a', 'n', 'n', 'i', 'n', 'g' };
+static symbol s_8_4[6] = { 'i', 'n', 'n', 'i', 'n', 'g' };
+static symbol s_8_5[7] = { 'e', 'a', 'r', 'r', 'i', 'n', 'g' };
+static symbol s_8_6[7] = { 'h', 'e', 'r', 'r', 'i', 'n', 'g' };
+static symbol s_8_7[6] = { 'o', 'u', 't', 'i', 'n', 'g' };
+
+static struct among a_8[8] =
+{
+/*  0 */ { 7, s_8_0, -1, -1, 0},
+/*  1 */ { 7, s_8_1, -1, -1, 0},
+/*  2 */ { 6, s_8_2, -1, -1, 0},
+/*  3 */ { 7, s_8_3, -1, -1, 0},
+/*  4 */ { 6, s_8_4, -1, -1, 0},
+/*  5 */ { 7, s_8_5, -1, -1, 0},
+/*  6 */ { 7, s_8_6, -1, -1, 0},
+/*  7 */ { 6, s_8_7, -1, -1, 0}
+};
+
+static symbol s_9_0[5] = { 'a', 'n', 'd', 'e', 's' };
+static symbol s_9_1[5] = { 'a', 't', 'l', 'a', 's' };
+static symbol s_9_2[4] = { 'b', 'i', 'a', 's' };
+static symbol s_9_3[6] = { 'c', 'o', 's', 'm', 'o', 's' };
+static symbol s_9_4[5] = { 'd', 'y', 'i', 'n', 'g' };
+static symbol s_9_5[5] = { 'e', 'a', 'r', 'l', 'y' };
+static symbol s_9_6[6] = { 'g', 'e', 'n', 't', 'l', 'y' };
+static symbol s_9_7[4] = { 'h', 'o', 'w', 'e' };
+static symbol s_9_8[4] = { 'i', 'd', 'l', 'y' };
+static symbol s_9_9[5] = { 'l', 'y', 'i', 'n', 'g' };
+static symbol s_9_10[4] = { 'n', 'e', 'w', 's' };
+static symbol s_9_11[4] = { 'o', 'n', 'l', 'y' };
+static symbol s_9_12[6] = { 's', 'i', 'n', 'g', 'l', 'y' };
+static symbol s_9_13[5] = { 's', 'k', 'i', 'e', 's' };
+static symbol s_9_14[4] = { 's', 'k', 'i', 's' };
+static symbol s_9_15[3] = { 's', 'k', 'y' };
+static symbol s_9_16[5] = { 't', 'y', 'i', 'n', 'g' };
+static symbol s_9_17[4] = { 'u', 'g', 'l', 'y' };
+
+static struct among a_9[18] =
+{
+/*  0 */ { 5, s_9_0, -1, -1, 0},
+/*  1 */ { 5, s_9_1, -1, -1, 0},
+/*  2 */ { 4, s_9_2, -1, -1, 0},
+/*  3 */ { 6, s_9_3, -1, -1, 0},
+/*  4 */ { 5, s_9_4, -1, 3, 0},
+/*  5 */ { 5, s_9_5, -1, 9, 0},
+/*  6 */ { 6, s_9_6, -1, 7, 0},
+/*  7 */ { 4, s_9_7, -1, -1, 0},
+/*  8 */ { 4, s_9_8, -1, 6, 0},
+/*  9 */ { 5, s_9_9, -1, 4, 0},
+/* 10 */ { 4, s_9_10, -1, -1, 0},
+/* 11 */ { 4, s_9_11, -1, 10, 0},
+/* 12 */ { 6, s_9_12, -1, 11, 0},
+/* 13 */ { 5, s_9_13, -1, 2, 0},
+/* 14 */ { 4, s_9_14, -1, 1, 0},
+/* 15 */ { 3, s_9_15, -1, -1, 0},
+/* 16 */ { 5, s_9_16, -1, 5, 0},
+/* 17 */ { 4, s_9_17, -1, 8, 0}
+};
+
+static unsigned char g_v[] = { 17, 65, 16, 1 };
+
+static unsigned char g_v_WXY[] = { 1, 17, 65, 208, 1 };
+
+static unsigned char g_valid_LI[] = { 55, 141, 2 };
+
+static symbol s_0[] = { 'y' };
+static symbol s_1[] = { 'Y' };
+static symbol s_2[] = { 'y' };
+static symbol s_3[] = { 'Y' };
+static symbol s_4[] = { 's', 's' };
+static symbol s_5[] = { 'i', 'e' };
+static symbol s_6[] = { 'i' };
+static symbol s_7[] = { 'e', 'e' };
+static symbol s_8[] = { 'e' };
+static symbol s_9[] = { 'e' };
+static symbol s_10[] = { 'y' };
+static symbol s_11[] = { 'Y' };
+static symbol s_12[] = { 'i' };
+static symbol s_13[] = { 't', 'i', 'o', 'n' };
+static symbol s_14[] = { 'e', 'n', 'c', 'e' };
+static symbol s_15[] = { 'a', 'n', 'c', 'e' };
+static symbol s_16[] = { 'a', 'b', 'l', 'e' };
+static symbol s_17[] = { 'e', 'n', 't' };
+static symbol s_18[] = { 'i', 'z', 'e' };
+static symbol s_19[] = { 'a', 't', 'e' };
+static symbol s_20[] = { 'a', 'l' };
+static symbol s_21[] = { 'f', 'u', 'l' };
+static symbol s_22[] = { 'o', 'u', 's' };
+static symbol s_23[] = { 'i', 'v', 'e' };
+static symbol s_24[] = { 'b', 'l', 'e' };
+static symbol s_25[] = { 'l' };
+static symbol s_26[] = { 'o', 'g' };
+static symbol s_27[] = { 'f', 'u', 'l' };
+static symbol s_28[] = { 'l', 'e', 's', 's' };
+static symbol s_29[] = { 't', 'i', 'o', 'n' };
+static symbol s_30[] = { 'a', 't', 'e' };
+static symbol s_31[] = { 'a', 'l' };
+static symbol s_32[] = { 'i', 'c' };
+static symbol s_33[] = { 's' };
+static symbol s_34[] = { 't' };
+static symbol s_35[] = { 'l' };
+static symbol s_36[] = { 's', 'k', 'i' };
+static symbol s_37[] = { 's', 'k', 'y' };
+static symbol s_38[] = { 'd', 'i', 'e' };
+static symbol s_39[] = { 'l', 'i', 'e' };
+static symbol s_40[] = { 't', 'i', 'e' };
+static symbol s_41[] = { 'i', 'd', 'l' };
+static symbol s_42[] = { 'g', 'e', 'n', 't', 'l' };
+static symbol s_43[] = { 'u', 'g', 'l', 'i' };
+static symbol s_44[] = { 'e', 'a', 'r', 'l', 'i' };
+static symbol s_45[] = { 'o', 'n', 'l', 'i' };
+static symbol s_46[] = { 's', 'i', 'n', 'g', 'l' };
+static symbol s_47[] = { 'Y' };
+static symbol s_48[] = { 'y' };
+
+static int r_prelude(struct SN_env * z) {
+    z->B[0] = 0; /* unset Y_found, line 24 */
+    {   int c = z->c; /* do, line 25 */
+        z->bra = z->c; /* [, line 25 */
+        if (!(eq_s(z, 1, s_0))) goto lab0;
+        z->ket = z->c; /* ], line 25 */
+        if (!(in_grouping(z, g_v, 97, 121))) goto lab0;
+        slice_from_s(z, 1, s_1); /* <-, line 25 */
+        z->B[0] = 1; /* set Y_found, line 25 */
+    lab0:
+        z->c = c;
+    }
+    {   int c = z->c; /* do, line 26 */
+        while(1) { /* repeat, line 26 */
+            int c = z->c;
+            while(1) { /* goto, line 26 */
+                int c = z->c;
+                if (!(in_grouping(z, g_v, 97, 121))) goto lab3;
+                z->bra = z->c; /* [, line 26 */
+                if (!(eq_s(z, 1, s_2))) goto lab3;
+                z->ket = z->c; /* ], line 26 */
+                z->c = c;
+                break;
+            lab3:
+                z->c = c;
+                if (z->c >= z->l) goto lab2;
+                z->c++;
+            }
+            slice_from_s(z, 1, s_3); /* <-, line 26 */
+            z->B[0] = 1; /* set Y_found, line 26 */
+            continue;
+        lab2:
+            z->c = c;
+            break;
+        }
+    lab1:
+        z->c = c;
+    }
+    return 1;
+}
+
+static int r_mark_regions(struct SN_env * z) {
+    z->I[0] = z->l;
+    z->I[1] = z->l;
+    {   int c = z->c; /* do, line 32 */
+        {   int c = z->c; /* or, line 36 */
+            if (!(find_among(z, a_0, 1))) goto lab2; /* among, line 33 */
+            goto lab1;
+        lab2:
+            z->c = c;
+            while(1) { /* gopast, line 36 */
+                if (!(in_grouping(z, g_v, 97, 121))) goto lab3;
+                break;
+            lab3:
+                if (z->c >= z->l) goto lab0;
+                z->c++;
+            }
+            while(1) { /* gopast, line 36 */
+                if (!(out_grouping(z, g_v, 97, 121))) goto lab4;
+                break;
+            lab4:
+                if (z->c >= z->l) goto lab0;
+                z->c++;
+            }
+        }
+    lab1:
+        z->I[0] = z->c; /* setmark p1, line 37 */
+        while(1) { /* gopast, line 38 */
+            if (!(in_grouping(z, g_v, 97, 121))) goto lab5;
+            break;
+        lab5:
+            if (z->c >= z->l) goto lab0;
+            z->c++;
+        }
+        while(1) { /* gopast, line 38 */
+            if (!(out_grouping(z, g_v, 97, 121))) goto lab6;
+            break;
+        lab6:
+            if (z->c >= z->l) goto lab0;
+            z->c++;
+        }
+        z->I[1] = z->c; /* setmark p2, line 38 */
+    lab0:
+        z->c = c;
+    }
+    return 1;
+}
+
+static int r_shortv(struct SN_env * z) {
+    {   int m = z->l - z->c; /* or, line 46 */
+        if (!(out_grouping_b(z, g_v_WXY, 89, 121))) goto lab1;
+        if (!(in_grouping_b(z, g_v, 97, 121))) goto lab1;
+        if (!(out_grouping_b(z, g_v, 97, 121))) goto lab1;
+        goto lab0;
+    lab1:
+        z->c = z->l - m;
+        if (!(out_grouping_b(z, g_v, 97, 121))) return 0;
+        if (!(in_grouping_b(z, g_v, 97, 121))) return 0;
+        if (z->c > z->lb) return 0; /* atlimit, line 47 */
+    }
+lab0:
+    return 1;
+}
+
+static int r_R1(struct SN_env * z) {
+    if (!(z->I[0] <= z->c)) return 0;
+    return 1;
+}
+
+static int r_R2(struct SN_env * z) {
+    if (!(z->I[1] <= z->c)) return 0;
+    return 1;
+}
+
+static int r_Step_1a(struct SN_env * z) {
+    int among_var;
+    z->ket = z->c; /* [, line 54 */
+    among_var = find_among_b(z, a_1, 6); /* substring, line 54 */
+    if (!(among_var)) return 0;
+    z->bra = z->c; /* ], line 54 */
+    switch(among_var) {
+        case 0: return 0;
+        case 1:
+            slice_from_s(z, 2, s_4); /* <-, line 55 */
+            break;
+        case 2:
+            {   int m = z->l - z->c; /* or, line 57 */
+                if (z->c <= z->lb) goto lab1;
+                z->c--; /* next, line 57 */
+                if (z->c > z->lb) goto lab1; /* atlimit, line 57 */
+                slice_from_s(z, 2, s_5); /* <-, line 57 */
+                goto lab0;
+            lab1:
+                z->c = z->l - m;
+                slice_from_s(z, 1, s_6); /* <-, line 57 */
+            }
+        lab0:
+            break;
+        case 3:
+            if (z->c <= z->lb) return 0;
+            z->c--; /* next, line 58 */
+            while(1) { /* gopast, line 58 */
+                if (!(in_grouping_b(z, g_v, 97, 121))) goto lab2;
+                break;
+            lab2:
+                if (z->c <= z->lb) return 0;
+                z->c--;
+            }
+            slice_del(z); /* delete, line 58 */
+            break;
+    }
+    return 1;
+}
+
+static int r_Step_1b(struct SN_env * z) {
+    int among_var;
+    z->ket = z->c; /* [, line 64 */
+    among_var = find_among_b(z, a_3, 6); /* substring, line 64 */
+    if (!(among_var)) return 0;
+    z->bra = z->c; /* ], line 64 */
+    switch(among_var) {
+        case 0: return 0;
+        case 1:
+            if (!r_R1(z)) return 0; /* call R1, line 66 */
+            slice_from_s(z, 2, s_7); /* <-, line 66 */
+            break;
+        case 2:
+            {   int m_test = z->l - z->c; /* test, line 69 */
+                while(1) { /* gopast, line 69 */
+                    if (!(in_grouping_b(z, g_v, 97, 121))) goto lab0;
+                    break;
+                lab0:
+                    if (z->c <= z->lb) return 0;
+                    z->c--;
+                }
+                z->c = z->l - m_test;
+            }
+            slice_del(z); /* delete, line 69 */
+            {   int m_test = z->l - z->c; /* test, line 70 */
+                among_var = find_among_b(z, a_2, 13); /* substring, line 70 */
+                if (!(among_var)) return 0;
+                z->c = z->l - m_test;
+            }
+            switch(among_var) {
+                case 0: return 0;
+                case 1:
+                    {   int c = z->c;
+                        insert_s(z, z->c, z->c, 1, s_8); /* <+, line 72 */
+                        z->c = c;
+                    }
+                    break;
+                case 2:
+                    z->ket = z->c; /* [, line 75 */
+                    if (z->c <= z->lb) return 0;
+                    z->c--; /* next, line 75 */
+                    z->bra = z->c; /* ], line 75 */
+                    slice_del(z); /* delete, line 75 */
+                    break;
+                case 3:
+                    if (z->c != z->I[0]) return 0; /* atmark, line 76 */
+                    {   int m_test = z->l - z->c; /* test, line 76 */
+                        if (!r_shortv(z)) return 0; /* call shortv, line 76 */
+                        z->c = z->l - m_test;
+                    }
+                    {   int c = z->c;
+                        insert_s(z, z->c, z->c, 1, s_9); /* <+, line 76 */
+                        z->c = c;
+                    }
+                    break;
+            }
+            break;
+    }
+    return 1;
+}
+
+static int r_Step_1c(struct SN_env * z) {
+    z->ket = z->c; /* [, line 83 */
+    {   int m = z->l - z->c; /* or, line 83 */
+        if (!(eq_s_b(z, 1, s_10))) goto lab1;
+        goto lab0;
+    lab1:
+        z->c = z->l - m;
+        if (!(eq_s_b(z, 1, s_11))) return 0;
+    }
+lab0:
+    z->bra = z->c; /* ], line 83 */
+    if (!(out_grouping_b(z, g_v, 97, 121))) return 0;
+    {   int m = z->l - z->c; /* not, line 84 */
+        if (z->c > z->lb) goto lab2; /* atlimit, line 84 */
+        return 0;
+    lab2:
+        z->c = z->l - m;
+    }
+    slice_from_s(z, 1, s_12); /* <-, line 85 */
+    return 1;
+}
+
+static int r_Step_2(struct SN_env * z) {
+    int among_var;
+    z->ket = z->c; /* [, line 89 */
+    among_var = find_among_b(z, a_4, 24); /* substring, line 89 */
+    if (!(among_var)) return 0;
+    z->bra = z->c; /* ], line 89 */
+    if (!r_R1(z)) return 0; /* call R1, line 89 */
+    switch(among_var) {
+        case 0: return 0;
+        case 1:
+            slice_from_s(z, 4, s_13); /* <-, line 90 */
+            break;
+        case 2:
+            slice_from_s(z, 4, s_14); /* <-, line 91 */
+            break;
+        case 3:
+            slice_from_s(z, 4, s_15); /* <-, line 92 */
+            break;
+        case 4:
+            slice_from_s(z, 4, s_16); /* <-, line 93 */
+            break;
+        case 5:
+            slice_from_s(z, 3, s_17); /* <-, line 94 */
+            break;
+        case 6:
+            slice_from_s(z, 3, s_18); /* <-, line 96 */
+            break;
+        case 7:
+            slice_from_s(z, 3, s_19); /* <-, line 98 */
+            break;
+        case 8:
+            slice_from_s(z, 2, s_20); /* <-, line 100 */
+            break;
+        case 9:
+            slice_from_s(z, 3, s_21); /* <-, line 101 */
+            break;
+        case 10:
+            slice_from_s(z, 3, s_22); /* <-, line 103 */
+            break;
+        case 11:
+            slice_from_s(z, 3, s_23); /* <-, line 105 */
+            break;
+        case 12:
+            slice_from_s(z, 3, s_24); /* <-, line 107 */
+            break;
+        case 13:
+            if (!(eq_s_b(z, 1, s_25))) return 0;
+            slice_from_s(z, 2, s_26); /* <-, line 108 */
+            break;
+        case 14:
+            slice_from_s(z, 3, s_27); /* <-, line 109 */
+            break;
+        case 15:
+            slice_from_s(z, 4, s_28); /* <-, line 110 */
+            break;
+        case 16:
+            if (!(in_grouping_b(z, g_valid_LI, 99, 116))) return 0;
+            slice_del(z); /* delete, line 111 */
+            break;
+    }
+    return 1;
+}
+
+static int r_Step_3(struct SN_env * z) {
+    int among_var;
+    z->ket = z->c; /* [, line 116 */
+    among_var = find_among_b(z, a_5, 9); /* substring, line 116 */
+    if (!(among_var)) return 0;
+    z->bra = z->c; /* ], line 116 */
+    if (!r_R1(z)) return 0; /* call R1, line 116 */
+    switch(among_var) {
+        case 0: return 0;
+        case 1:
+            slice_from_s(z, 4, s_29); /* <-, line 117 */
+            break;
+        case 2:
+            slice_from_s(z, 3, s_30); /* <-, line 118 */
+            break;
+        case 3:
+            slice_from_s(z, 2, s_31); /* <-, line 119 */
+            break;
+        case 4:
+            slice_from_s(z, 2, s_32); /* <-, line 121 */
+            break;
+        case 5:
+            slice_del(z); /* delete, line 123 */
+            break;
+        case 6:
+            if (!r_R2(z)) return 0; /* call R2, line 125 */
+            slice_del(z); /* delete, line 125 */
+            break;
+    }
+    return 1;
+}
+
+static int r_Step_4(struct SN_env * z) {
+    int among_var;
+    z->ket = z->c; /* [, line 130 */
+    among_var = find_among_b(z, a_6, 18); /* substring, line 130 */
+    if (!(among_var)) return 0;
+    z->bra = z->c; /* ], line 130 */
+    if (!r_R2(z)) return 0; /* call R2, line 130 */
+    switch(among_var) {
+        case 0: return 0;
+        case 1:
+            slice_del(z); /* delete, line 133 */
+            break;
+        case 2:
+            {   int m = z->l - z->c; /* or, line 134 */
+                if (!(eq_s_b(z, 1, s_33))) goto lab1;
+                goto lab0;
+            lab1:
+                z->c = z->l - m;
+                if (!(eq_s_b(z, 1, s_34))) return 0;
+            }
+        lab0:
+            slice_del(z); /* delete, line 134 */
+            break;
+    }
+    return 1;
+}
+
+static int r_Step_5(struct SN_env * z) {
+    int among_var;
+    z->ket = z->c; /* [, line 139 */
+    among_var = find_among_b(z, a_7, 2); /* substring, line 139 */
+    if (!(among_var)) return 0;
+    z->bra = z->c; /* ], line 139 */
+    switch(among_var) {
+        case 0: return 0;
+        case 1:
+            {   int m = z->l - z->c; /* or, line 140 */
+                if (!r_R2(z)) goto lab1; /* call R2, line 140 */
+                goto lab0;
+            lab1:
+                z->c = z->l - m;
+                if (!r_R1(z)) return 0; /* call R1, line 140 */
+                {   int m = z->l - z->c; /* not, line 140 */
+                    if (!r_shortv(z)) goto lab2; /* call shortv, line 140 */
+                    return 0;
+                lab2:
+                    z->c = z->l - m;
+                }
+            }
+        lab0:
+            slice_del(z); /* delete, line 140 */
+            break;
+        case 2:
+            if (!r_R2(z)) return 0; /* call R2, line 141 */
+            if (!(eq_s_b(z, 1, s_35))) return 0;
+            slice_del(z); /* delete, line 141 */
+            break;
+    }
+    return 1;
+}
+
+static int r_exception2(struct SN_env * z) {
+    z->ket = z->c; /* [, line 147 */
+    if (!(find_among_b(z, a_8, 8))) return 0; /* substring, line 147 */
+    z->bra = z->c; /* ], line 147 */
+    if (z->c > z->lb) return 0; /* atlimit, line 147 */
+    return 1;
+}
+
+static int r_exception1(struct SN_env * z) {
+    int among_var;
+    z->bra = z->c; /* [, line 159 */
+    among_var = find_among(z, a_9, 18); /* substring, line 159 */
+    if (!(among_var)) return 0;
+    z->ket = z->c; /* ], line 159 */
+    if (z->c < z->l) return 0; /* atlimit, line 159 */
+    switch(among_var) {
+        case 0: return 0;
+        case 1:
+            slice_from_s(z, 3, s_36); /* <-, line 163 */
+            break;
+        case 2:
+            slice_from_s(z, 3, s_37); /* <-, line 164 */
+            break;
+        case 3:
+            slice_from_s(z, 3, s_38); /* <-, line 165 */
+            break;
+        case 4:
+            slice_from_s(z, 3, s_39); /* <-, line 166 */
+            break;
+        case 5:
+            slice_from_s(z, 3, s_40); /* <-, line 167 */
+            break;
+        case 6:
+            slice_from_s(z, 3, s_41); /* <-, line 171 */
+            break;
+        case 7:
+            slice_from_s(z, 5, s_42); /* <-, line 172 */
+            break;
+        case 8:
+            slice_from_s(z, 4, s_43); /* <-, line 173 */
+            break;
+        case 9:
+            slice_from_s(z, 5, s_44); /* <-, line 174 */
+            break;
+        case 10:
+            slice_from_s(z, 4, s_45); /* <-, line 175 */
+            break;
+        case 11:
+            slice_from_s(z, 5, s_46); /* <-, line 176 */
+            break;
+    }
+    return 1;
+}
+
+static int r_postlude(struct SN_env * z) {
+    if (!(z->B[0])) return 0; /* Boolean test Y_found, line 192 */
+    while(1) { /* repeat, line 192 */
+        int c = z->c;
+        while(1) { /* goto, line 192 */
+            int c = z->c;
+            z->bra = z->c; /* [, line 192 */
+            if (!(eq_s(z, 1, s_47))) goto lab1;
+            z->ket = z->c; /* ], line 192 */
+            z->c = c;
+            break;
+        lab1:
+            z->c = c;
+            if (z->c >= z->l) goto lab0;
+            z->c++;
+        }
+        slice_from_s(z, 1, s_48); /* <-, line 192 */
+        continue;
+    lab0:
+        z->c = c;
+        break;
+    }
+    return 1;
+}
+
+extern int english_stem(struct SN_env * z) {
+    {   int c = z->c; /* or, line 196 */
+        if (!r_exception1(z)) goto lab1; /* call exception1, line 196 */
+        goto lab0;
+    lab1:
+        z->c = c;
+        {   int c_test = z->c; /* test, line 198 */
+            {   int c = z->c + 3;
+                if (0 > c || c > z->l) return 0;
+                z->c = c; /* hop, line 198 */
+            }
+            z->c = c_test;
+        }
+        {   int c = z->c; /* do, line 199 */
+            if (!r_prelude(z)) goto lab2; /* call prelude, line 199 */
+        lab2:
+            z->c = c;
+        }
+        {   int c = z->c; /* do, line 200 */
+            if (!r_mark_regions(z)) goto lab3; /* call mark_regions, line 200 */
+        lab3:
+            z->c = c;
+        }
+        z->lb = z->c; z->c = z->l; /* backwards, line 201 */
+
+        {   int m = z->l - z->c; /* do, line 203 */
+            if (!r_Step_1a(z)) goto lab4; /* call Step_1a, line 203 */
+        lab4:
+            z->c = z->l - m;
+        }
+        {   int m = z->l - z->c; /* or, line 205 */
+            if (!r_exception2(z)) goto lab6; /* call exception2, line 205 */
+            goto lab5;
+        lab6:
+            z->c = z->l - m;
+            {   int m = z->l - z->c; /* do, line 207 */
+                if (!r_Step_1b(z)) goto lab7; /* call Step_1b, line 207 */
+            lab7:
+                z->c = z->l - m;
+            }
+            {   int m = z->l - z->c; /* do, line 208 */
+                if (!r_Step_1c(z)) goto lab8; /* call Step_1c, line 208 */
+            lab8:
+                z->c = z->l - m;
+            }
+            {   int m = z->l - z->c; /* do, line 210 */
+                if (!r_Step_2(z)) goto lab9; /* call Step_2, line 210 */
+            lab9:
+                z->c = z->l - m;
+            }
+            {   int m = z->l - z->c; /* do, line 211 */
+                if (!r_Step_3(z)) goto lab10; /* call Step_3, line 211 */
+            lab10:
+                z->c = z->l - m;
+            }
+            {   int m = z->l - z->c; /* do, line 212 */
+                if (!r_Step_4(z)) goto lab11; /* call Step_4, line 212 */
+            lab11:
+                z->c = z->l - m;
+            }
+            {   int m = z->l - z->c; /* do, line 214 */
+                if (!r_Step_5(z)) goto lab12; /* call Step_5, line 214 */
+            lab12:
+                z->c = z->l - m;
+            }
+        }
+    lab5:
+        z->c = z->lb;
+        {   int c = z->c; /* do, line 217 */
+            if (!r_postlude(z)) goto lab13; /* call postlude, line 217 */
+        lab13:
+            z->c = c;
+        }
+    }
+lab0:
+    return 1;
+}
+
+extern struct SN_env * english_create_env(void) { return SN_create_env(0, 2, 1); }
+
+extern void english_close_env(struct SN_env * z) { SN_close_env(z); }
+


diff --git a/contrib/tsearch2/snowball/english_stem.h b/contrib/tsearch2/snowball/english_stem.h

new file mode 100644 (file)

index 0000000..bfefcd5


--- /dev/null
+++ b/contrib/tsearch2/snowball/english_stem.h
@@ -0,0 +1,8 @@
+
+/* This file was generated automatically by the Snowball to ANSI C compiler */
+
+extern struct SN_env * english_create_env(void);
+extern void english_close_env(struct SN_env * z);
+
+extern int english_stem(struct SN_env * z);
+


diff --git a/contrib/tsearch2/snowball/header.h b/contrib/tsearch2/snowball/header.h

new file mode 100644 (file)

index 0000000..aaec3ae


--- /dev/null
+++ b/contrib/tsearch2/snowball/header.h
@@ -0,0 +1,57 @@
+
+#include 
+
+#include "api.h"
+
+#define MAXINT INT_MAX
+#define MININT INT_MIN
+
+#define HEAD 2*sizeof(int)
+
+#define SIZE(p)        ((int *)(p))[-1]
+#define SET_SIZE(p, n) ((int *)(p))[-1] = n
+#define CAPACITY(p)    ((int *)(p))[-2]
+
+struct among
+{   int s_size;     /* number of chars in string */
+    symbol * s;       /* search string */
+    int substring_i;/* index to longest matching substring */
+    int result;     /* result of the lookup */
+    int (* function)(struct SN_env *);
+};
+
+extern symbol * create_s(void);
+extern void lose_s(symbol * p);
+
+extern int in_grouping(struct SN_env * z, unsigned char * s, int min, int max);
+extern int in_grouping_b(struct SN_env * z, unsigned char * s, int min, int max);
+extern int out_grouping(struct SN_env * z, unsigned char * s, int min, int max);
+extern int out_grouping_b(struct SN_env * z, unsigned char * s, int min, int max);
+
+extern int in_range(struct SN_env * z, int min, int max);
+extern int in_range_b(struct SN_env * z, int min, int max);
+extern int out_range(struct SN_env * z, int min, int max);
+extern int out_range_b(struct SN_env * z, int min, int max);
+
+extern int eq_s(struct SN_env * z, int s_size, symbol * s);
+extern int eq_s_b(struct SN_env * z, int s_size, symbol * s);
+extern int eq_v(struct SN_env * z, symbol * p);
+extern int eq_v_b(struct SN_env * z, symbol * p);
+
+extern int find_among(struct SN_env * z, struct among * v, int v_size);
+extern int find_among_b(struct SN_env * z, struct among * v, int v_size);
+
+extern symbol * increase_size(symbol * p, int n);
+extern int replace_s(struct SN_env * z, int c_bra, int c_ket, int s_size, const symbol * s);
+extern void slice_from_s(struct SN_env * z, int s_size, symbol * s);
+extern void slice_from_v(struct SN_env * z, symbol * p);
+extern void slice_del(struct SN_env * z);
+
+extern void insert_s(struct SN_env * z, int bra, int ket, int s_size, symbol * s);
+extern void insert_v(struct SN_env * z, int bra, int ket, symbol * p);
+
+extern symbol * slice_to(struct SN_env * z, symbol * p);
+extern symbol * assign_to(struct SN_env * z, symbol * p);
+
+extern void debug(struct SN_env * z, int number, int line_count);
+


diff --git a/contrib/tsearch2/snowball/russian_stem.c b/contrib/tsearch2/snowball/russian_stem.c

new file mode 100644 (file)

index 0000000..14fd491


--- /dev/null
+++ b/contrib/tsearch2/snowball/russian_stem.c
@@ -0,0 +1,626 @@
+
+/* This file was generated automatically by the Snowball to ANSI C compiler */
+
+#include "header.h"
+
+extern int russian_stem(struct SN_env * z);
+static int r_tidy_up(struct SN_env * z);
+static int r_derivational(struct SN_env * z);
+static int r_noun(struct SN_env * z);
+static int r_verb(struct SN_env * z);
+static int r_reflexive(struct SN_env * z);
+static int r_adjectival(struct SN_env * z);
+static int r_adjective(struct SN_env * z);
+static int r_perfective_gerund(struct SN_env * z);
+static int r_R2(struct SN_env * z);
+static int r_mark_regions(struct SN_env * z);
+
+extern struct SN_env * russian_create_env(void);
+extern void russian_close_env(struct SN_env * z);
+
+static symbol s_0_0[3] = { 215, 219, 201 };
+static symbol s_0_1[4] = { 201, 215, 219, 201 };
+static symbol s_0_2[4] = { 217, 215, 219, 201 };
+static symbol s_0_3[1] = { 215 };
+static symbol s_0_4[2] = { 201, 215 };
+static symbol s_0_5[2] = { 217, 215 };
+static symbol s_0_6[5] = { 215, 219, 201, 211, 216 };
+static symbol s_0_7[6] = { 201, 215, 219, 201, 211, 216 };
+static symbol s_0_8[6] = { 217, 215, 219, 201, 211, 216 };
+
+static struct among a_0[9] =
+{
+/*  0 */ { 3, s_0_0, -1, 1, 0},
+/*  1 */ { 4, s_0_1, 0, 2, 0},
+/*  2 */ { 4, s_0_2, 0, 2, 0},
+/*  3 */ { 1, s_0_3, -1, 1, 0},
+/*  4 */ { 2, s_0_4, 3, 2, 0},
+/*  5 */ { 2, s_0_5, 3, 2, 0},
+/*  6 */ { 5, s_0_6, -1, 1, 0},
+/*  7 */ { 6, s_0_7, 6, 2, 0},
+/*  8 */ { 6, s_0_8, 6, 2, 0}
+};
+
+static symbol s_1_0[2] = { 192, 192 };
+static symbol s_1_1[2] = { 197, 192 };
+static symbol s_1_2[2] = { 207, 192 };
+static symbol s_1_3[2] = { 213, 192 };
+static symbol s_1_4[2] = { 197, 197 };
+static symbol s_1_5[2] = { 201, 197 };
+static symbol s_1_6[2] = { 207, 197 };
+static symbol s_1_7[2] = { 217, 197 };
+static symbol s_1_8[2] = { 201, 200 };
+static symbol s_1_9[2] = { 217, 200 };
+static symbol s_1_10[3] = { 201, 205, 201 };
+static symbol s_1_11[3] = { 217, 205, 201 };
+static symbol s_1_12[2] = { 197, 202 };
+static symbol s_1_13[2] = { 201, 202 };
+static symbol s_1_14[2] = { 207, 202 };
+static symbol s_1_15[2] = { 217, 202 };
+static symbol s_1_16[2] = { 197, 205 };
+static symbol s_1_17[2] = { 201, 205 };
+static symbol s_1_18[2] = { 207, 205 };
+static symbol s_1_19[2] = { 217, 205 };
+static symbol s_1_20[3] = { 197, 199, 207 };
+static symbol s_1_21[3] = { 207, 199, 207 };
+static symbol s_1_22[2] = { 193, 209 };
+static symbol s_1_23[2] = { 209, 209 };
+static symbol s_1_24[3] = { 197, 205, 213 };
+static symbol s_1_25[3] = { 207, 205, 213 };
+
+static struct among a_1[26] =
+{
+/*  0 */ { 2, s_1_0, -1, 1, 0},
+/*  1 */ { 2, s_1_1, -1, 1, 0},
+/*  2 */ { 2, s_1_2, -1, 1, 0},
+/*  3 */ { 2, s_1_3, -1, 1, 0},
+/*  4 */ { 2, s_1_4, -1, 1, 0},
+/*  5 */ { 2, s_1_5, -1, 1, 0},
+/*  6 */ { 2, s_1_6, -1, 1, 0},
+/*  7 */ { 2, s_1_7, -1, 1, 0},
+/*  8 */ { 2, s_1_8, -1, 1, 0},
+/*  9 */ { 2, s_1_9, -1, 1, 0},
+/* 10 */ { 3, s_1_10, -1, 1, 0},
+/* 11 */ { 3, s_1_11, -1, 1, 0},
+/* 12 */ { 2, s_1_12, -1, 1, 0},
+/* 13 */ { 2, s_1_13, -1, 1, 0},
+/* 14 */ { 2, s_1_14, -1, 1, 0},
+/* 15 */ { 2, s_1_15, -1, 1, 0},
+/* 16 */ { 2, s_1_16, -1, 1, 0},
+/* 17 */ { 2, s_1_17, -1, 1, 0},
+/* 18 */ { 2, s_1_18, -1, 1, 0},
+/* 19 */ { 2, s_1_19, -1, 1, 0},
+/* 20 */ { 3, s_1_20, -1, 1, 0},
+/* 21 */ { 3, s_1_21, -1, 1, 0},
+/* 22 */ { 2, s_1_22, -1, 1, 0},
+/* 23 */ { 2, s_1_23, -1, 1, 0},
+/* 24 */ { 3, s_1_24, -1, 1, 0},
+/* 25 */ { 3, s_1_25, -1, 1, 0}
+};
+
+static symbol s_2_0[2] = { 197, 205 };
+static symbol s_2_1[2] = { 206, 206 };
+static symbol s_2_2[2] = { 215, 219 };
+static symbol s_2_3[3] = { 201, 215, 219 };
+static symbol s_2_4[3] = { 217, 215, 219 };
+static symbol s_2_5[1] = { 221 };
+static symbol s_2_6[2] = { 192, 221 };
+static symbol s_2_7[3] = { 213, 192, 221 };
+
+static struct among a_2[8] =
+{
+/*  0 */ { 2, s_2_0, -1, 1, 0},
+/*  1 */ { 2, s_2_1, -1, 1, 0},
+/*  2 */ { 2, s_2_2, -1, 1, 0},
+/*  3 */ { 3, s_2_3, 2, 2, 0},
+/*  4 */ { 3, s_2_4, 2, 2, 0},
+/*  5 */ { 1, s_2_5, -1, 1, 0},
+/*  6 */ { 2, s_2_6, 5, 1, 0},
+/*  7 */ { 3, s_2_7, 6, 2, 0}
+};
+
+static symbol s_3_0[2] = { 211, 209 };
+static symbol s_3_1[2] = { 211, 216 };
+
+static struct among a_3[2] =
+{
+/*  0 */ { 2, s_3_0, -1, 1, 0},
+/*  1 */ { 2, s_3_1, -1, 1, 0}
+};
+
+static symbol s_4_0[1] = { 192 };
+static symbol s_4_1[2] = { 213, 192 };
+static symbol s_4_2[2] = { 204, 193 };
+static symbol s_4_3[3] = { 201, 204, 193 };
+static symbol s_4_4[3] = { 217, 204, 193 };
+static symbol s_4_5[2] = { 206, 193 };
+static symbol s_4_6[3] = { 197, 206, 193 };
+static symbol s_4_7[3] = { 197, 212, 197 };
+static symbol s_4_8[3] = { 201, 212, 197 };
+static symbol s_4_9[3] = { 202, 212, 197 };
+static symbol s_4_10[4] = { 197, 202, 212, 197 };
+static symbol s_4_11[4] = { 213, 202, 212, 197 };
+static symbol s_4_12[2] = { 204, 201 };
+static symbol s_4_13[3] = { 201, 204, 201 };
+static symbol s_4_14[3] = { 217, 204, 201 };
+static symbol s_4_15[1] = { 202 };
+static symbol s_4_16[2] = { 197, 202 };
+static symbol s_4_17[2] = { 213, 202 };
+static symbol s_4_18[1] = { 204 };
+static symbol s_4_19[2] = { 201, 204 };
+static symbol s_4_20[2] = { 217, 204 };
+static symbol s_4_21[2] = { 197, 205 };
+static symbol s_4_22[2] = { 201, 205 };
+static symbol s_4_23[2] = { 217, 205 };
+static symbol s_4_24[1] = { 206 };
+static symbol s_4_25[2] = { 197, 206 };
+static symbol s_4_26[2] = { 204, 207 };
+static symbol s_4_27[3] = { 201, 204, 207 };
+static symbol s_4_28[3] = { 217, 204, 207 };
+static symbol s_4_29[2] = { 206, 207 };
+static symbol s_4_30[3] = { 197, 206, 207 };
+static symbol s_4_31[3] = { 206, 206, 207 };
+static symbol s_4_32[2] = { 192, 212 };
+static symbol s_4_33[3] = { 213, 192, 212 };
+static symbol s_4_34[2] = { 197, 212 };
+static symbol s_4_35[3] = { 213, 197, 212 };
+static symbol s_4_36[2] = { 201, 212 };
+static symbol s_4_37[2] = { 209, 212 };
+static symbol s_4_38[2] = { 217, 212 };
+static symbol s_4_39[2] = { 212, 216 };
+static symbol s_4_40[3] = { 201, 212, 216 };
+static symbol s_4_41[3] = { 217, 212, 216 };
+static symbol s_4_42[3] = { 197, 219, 216 };
+static symbol s_4_43[3] = { 201, 219, 216 };
+static symbol s_4_44[2] = { 206, 217 };
+static symbol s_4_45[3] = { 197, 206, 217 };
+
+static struct among a_4[46] =
+{
+/*  0 */ { 1, s_4_0, -1, 2, 0},
+/*  1 */ { 2, s_4_1, 0, 2, 0},
+/*  2 */ { 2, s_4_2, -1, 1, 0},
+/*  3 */ { 3, s_4_3, 2, 2, 0},
+/*  4 */ { 3, s_4_4, 2, 2, 0},
+/*  5 */ { 2, s_4_5, -1, 1, 0},
+/*  6 */ { 3, s_4_6, 5, 2, 0},
+/*  7 */ { 3, s_4_7, -1, 1, 0},
+/*  8 */ { 3, s_4_8, -1, 2, 0},
+/*  9 */ { 3, s_4_9, -1, 1, 0},
+/* 10 */ { 4, s_4_10, 9, 2, 0},
+/* 11 */ { 4, s_4_11, 9, 2, 0},
+/* 12 */ { 2, s_4_12, -1, 1, 0},
+/* 13 */ { 3, s_4_13, 12, 2, 0},
+/* 14 */ { 3, s_4_14, 12, 2, 0},
+/* 15 */ { 1, s_4_15, -1, 1, 0},
+/* 16 */ { 2, s_4_16, 15, 2, 0},
+/* 17 */ { 2, s_4_17, 15, 2, 0},
+/* 18 */ { 1, s_4_18, -1, 1, 0},
+/* 19 */ { 2, s_4_19, 18, 2, 0},
+/* 20 */ { 2, s_4_20, 18, 2, 0},
+/* 21 */ { 2, s_4_21, -1, 1, 0},
+/* 22 */ { 2, s_4_22, -1, 2, 0},
+/* 23 */ { 2, s_4_23, -1, 2, 0},
+/* 24 */ { 1, s_4_24, -1, 1, 0},
+/* 25 */ { 2, s_4_25, 24, 2, 0},
+/* 26 */ { 2, s_4_26, -1, 1, 0},
+/* 27 */ { 3, s_4_27, 26, 2, 0},
+/* 28 */ { 3, s_4_28, 26, 2, 0},
+/* 29 */ { 2, s_4_29, -1, 1, 0},
+/* 30 */ { 3, s_4_30, 29, 2, 0},
+/* 31 */ { 3, s_4_31, 29, 1, 0},
+/* 32 */ { 2, s_4_32, -1, 1, 0},
+/* 33 */ { 3, s_4_33, 32, 2, 0},
+/* 34 */ { 2, s_4_34, -1, 1, 0},
+/* 35 */ { 3, s_4_35, 34, 2, 0},
+/* 36 */ { 2, s_4_36, -1, 2, 0},
+/* 37 */ { 2, s_4_37, -1, 2, 0},
+/* 38 */ { 2, s_4_38, -1, 2, 0},
+/* 39 */ { 2, s_4_39, -1, 1, 0},
+/* 40 */ { 3, s_4_40, 39, 2, 0},
+/* 41 */ { 3, s_4_41, 39, 2, 0},
+/* 42 */ { 3, s_4_42, -1, 1, 0},
+/* 43 */ { 3, s_4_43, -1, 2, 0},
+/* 44 */ { 2, s_4_44, -1, 1, 0},
+/* 45 */ { 3, s_4_45, 44, 2, 0}
+};
+
+static symbol s_5_0[1] = { 192 };
+static symbol s_5_1[2] = { 201, 192 };
+static symbol s_5_2[2] = { 216, 192 };
+static symbol s_5_3[1] = { 193 };
+static symbol s_5_4[1] = { 197 };
+static symbol s_5_5[2] = { 201, 197 };
+static symbol s_5_6[2] = { 216, 197 };
+static symbol s_5_7[2] = { 193, 200 };
+static symbol s_5_8[2] = { 209, 200 };
+static symbol s_5_9[3] = { 201, 209, 200 };
+static symbol s_5_10[1] = { 201 };
+static symbol s_5_11[2] = { 197, 201 };
+static symbol s_5_12[2] = { 201, 201 };
+static symbol s_5_13[3] = { 193, 205, 201 };
+static symbol s_5_14[3] = { 209, 205, 201 };
+static symbol s_5_15[4] = { 201, 209, 205, 201 };
+static symbol s_5_16[1] = { 202 };
+static symbol s_5_17[2] = { 197, 202 };
+static symbol s_5_18[3] = { 201, 197, 202 };
+static symbol s_5_19[2] = { 201, 202 };
+static symbol s_5_20[2] = { 207, 202 };
+static symbol s_5_21[2] = { 193, 205 };
+static symbol s_5_22[2] = { 197, 205 };
+static symbol s_5_23[3] = { 201, 197, 205 };
+static symbol s_5_24[2] = { 207, 205 };
+static symbol s_5_25[2] = { 209, 205 };
+static symbol s_5_26[3] = { 201, 209, 205 };
+static symbol s_5_27[1] = { 207 };
+static symbol s_5_28[1] = { 209 };
+static symbol s_5_29[2] = { 201, 209 };
+static symbol s_5_30[2] = { 216, 209 };
+static symbol s_5_31[1] = { 213 };
+static symbol s_5_32[2] = { 197, 215 };
+static symbol s_5_33[2] = { 207, 215 };
+static symbol s_5_34[1] = { 216 };
+static symbol s_5_35[1] = { 217 };
+
+static struct among a_5[36] =
+{
+/*  0 */ { 1, s_5_0, -1, 1, 0},
+/*  1 */ { 2, s_5_1, 0, 1, 0},
+/*  2 */ { 2, s_5_2, 0, 1, 0},
+/*  3 */ { 1, s_5_3, -1, 1, 0},
+/*  4 */ { 1, s_5_4, -1, 1, 0},
+/*  5 */ { 2, s_5_5, 4, 1, 0},
+/*  6 */ { 2, s_5_6, 4, 1, 0},
+/*  7 */ { 2, s_5_7, -1, 1, 0},
+/*  8 */ { 2, s_5_8, -1, 1, 0},
+/*  9 */ { 3, s_5_9, 8, 1, 0},
+/* 10 */ { 1, s_5_10, -1, 1, 0},
+/* 11 */ { 2, s_5_11, 10, 1, 0},
+/* 12 */ { 2, s_5_12, 10, 1, 0},
+/* 13 */ { 3, s_5_13, 10, 1, 0},
+/* 14 */ { 3, s_5_14, 10, 1, 0},
+/* 15 */ { 4, s_5_15, 14, 1, 0},
+/* 16 */ { 1, s_5_16, -1, 1, 0},
+/* 17 */ { 2, s_5_17, 16, 1, 0},
+/* 18 */ { 3, s_5_18, 17, 1, 0},
+/* 19 */ { 2, s_5_19, 16, 1, 0},
+/* 20 */ { 2, s_5_20, 16, 1, 0},
+/* 21 */ { 2, s_5_21, -1, 1, 0},
+/* 22 */ { 2, s_5_22, -1, 1, 0},
+/* 23 */ { 3, s_5_23, 22, 1, 0},
+/* 24 */ { 2, s_5_24, -1, 1, 0},
+/* 25 */ { 2, s_5_25, -1, 1, 0},
+/* 26 */ { 3, s_5_26, 25, 1, 0},
+/* 27 */ { 1, s_5_27, -1, 1, 0},
+/* 28 */ { 1, s_5_28, -1, 1, 0},
+/* 29 */ { 2, s_5_29, 28, 1, 0},
+/* 30 */ { 2, s_5_30, 28, 1, 0},
+/* 31 */ { 1, s_5_31, -1, 1, 0},
+/* 32 */ { 2, s_5_32, -1, 1, 0},
+/* 33 */ { 2, s_5_33, -1, 1, 0},
+/* 34 */ { 1, s_5_34, -1, 1, 0},
+/* 35 */ { 1, s_5_35, -1, 1, 0}
+};
+
+static symbol s_6_0[3] = { 207, 211, 212 };
+static symbol s_6_1[4] = { 207, 211, 212, 216 };
+
+static struct among a_6[2] =
+{
+/*  0 */ { 3, s_6_0, -1, 1, 0},
+/*  1 */ { 4, s_6_1, -1, 1, 0}
+};
+
+static symbol s_7_0[4] = { 197, 202, 219, 197 };
+static symbol s_7_1[1] = { 206 };
+static symbol s_7_2[1] = { 216 };
+static symbol s_7_3[3] = { 197, 202, 219 };
+
+static struct among a_7[4] =
+{
+/*  0 */ { 4, s_7_0, -1, 1, 0},
+/*  1 */ { 1, s_7_1, -1, 2, 0},
+/*  2 */ { 1, s_7_2, -1, 3, 0},
+/*  3 */ { 3, s_7_3, -1, 1, 0}
+};
+
+static unsigned char g_v[] = { 35, 130, 34, 18 };
+
+static symbol s_0[] = { 193 };
+static symbol s_1[] = { 209 };
+static symbol s_2[] = { 193 };
+static symbol s_3[] = { 209 };
+static symbol s_4[] = { 193 };
+static symbol s_5[] = { 209 };
+static symbol s_6[] = { 206 };
+static symbol s_7[] = { 206 };
+static symbol s_8[] = { 206 };
+static symbol s_9[] = { 201 };
+
+static int r_mark_regions(struct SN_env * z) {
+    z->I[0] = z->l;
+    z->I[1] = z->l;
+    {   int c = z->c; /* do, line 100 */
+        while(1) { /* gopast, line 101 */
+            if (!(in_grouping(z, g_v, 192, 220))) goto lab1;
+            break;
+        lab1:
+            if (z->c >= z->l) goto lab0;
+            z->c++;
+        }
+        z->I[0] = z->c; /* setmark pV, line 101 */
+        while(1) { /* gopast, line 101 */
+            if (!(out_grouping(z, g_v, 192, 220))) goto lab2;
+            break;
+        lab2:
+            if (z->c >= z->l) goto lab0;
+            z->c++;
+        }
+        while(1) { /* gopast, line 102 */
+            if (!(in_grouping(z, g_v, 192, 220))) goto lab3;
+            break;
+        lab3:
+            if (z->c >= z->l) goto lab0;
+            z->c++;
+        }
+        while(1) { /* gopast, line 102 */
+            if (!(out_grouping(z, g_v, 192, 220))) goto lab4;
+            break;
+        lab4:
+            if (z->c >= z->l) goto lab0;
+            z->c++;
+        }
+        z->I[1] = z->c; /* setmark p2, line 102 */
+    lab0:
+        z->c = c;
+    }
+    return 1;
+}
+
+static int r_R2(struct SN_env * z) {
+    if (!(z->I[1] <= z->c)) return 0;
+    return 1;
+}
+
+static int r_perfective_gerund(struct SN_env * z) {
+    int among_var;
+    z->ket = z->c; /* [, line 111 */
+    among_var = find_among_b(z, a_0, 9); /* substring, line 111 */
+    if (!(among_var)) return 0;
+    z->bra = z->c; /* ], line 111 */
+    switch(among_var) {
+        case 0: return 0;
+        case 1:
+            {   int m = z->l - z->c; /* or, line 115 */
+                if (!(eq_s_b(z, 1, s_0))) goto lab1;
+                goto lab0;
+            lab1:
+                z->c = z->l - m;
+                if (!(eq_s_b(z, 1, s_1))) return 0;
+            }
+        lab0:
+            slice_del(z); /* delete, line 115 */
+            break;
+        case 2:
+            slice_del(z); /* delete, line 122 */
+            break;
+    }
+    return 1;
+}
+
+static int r_adjective(struct SN_env * z) {
+    int among_var;
+    z->ket = z->c; /* [, line 127 */
+    among_var = find_among_b(z, a_1, 26); /* substring, line 127 */
+    if (!(among_var)) return 0;
+    z->bra = z->c; /* ], line 127 */
+    switch(among_var) {
+        case 0: return 0;
+        case 1:
+            slice_del(z); /* delete, line 136 */
+            break;
+    }
+    return 1;
+}
+
+static int r_adjectival(struct SN_env * z) {
+    int among_var;
+    if (!r_adjective(z)) return 0; /* call adjective, line 141 */
+    {   int m = z->l - z->c; /* try, line 148 */
+        z->ket = z->c; /* [, line 149 */
+        among_var = find_among_b(z, a_2, 8); /* substring, line 149 */
+        if (!(among_var)) { z->c = z->l - m; goto lab0; }
+        z->bra = z->c; /* ], line 149 */
+        switch(among_var) {
+            case 0: { z->c = z->l - m; goto lab0; }
+            case 1:
+                {   int m = z->l - z->c; /* or, line 154 */
+                    if (!(eq_s_b(z, 1, s_2))) goto lab2;
+                    goto lab1;
+                lab2:
+                    z->c = z->l - m;
+                    if (!(eq_s_b(z, 1, s_3))) { z->c = z->l - m; goto lab0; }
+                }
+            lab1:
+                slice_del(z); /* delete, line 154 */
+                break;
+            case 2:
+                slice_del(z); /* delete, line 161 */
+                break;
+        }
+    lab0:
+        ;
+    }
+    return 1;
+}
+
+static int r_reflexive(struct SN_env * z) {
+    int among_var;
+    z->ket = z->c; /* [, line 168 */
+    among_var = find_among_b(z, a_3, 2); /* substring, line 168 */
+    if (!(among_var)) return 0;
+    z->bra = z->c; /* ], line 168 */
+    switch(among_var) {
+        case 0: return 0;
+        case 1:
+            slice_del(z); /* delete, line 171 */
+            break;
+    }
+    return 1;
+}
+
+static int r_verb(struct SN_env * z) {
+    int among_var;
+    z->ket = z->c; /* [, line 176 */
+    among_var = find_among_b(z, a_4, 46); /* substring, line 176 */
+    if (!(among_var)) return 0;
+    z->bra = z->c; /* ], line 176 */
+    switch(among_var) {
+        case 0: return 0;
+        case 1:
+            {   int m = z->l - z->c; /* or, line 182 */
+                if (!(eq_s_b(z, 1, s_4))) goto lab1;
+                goto lab0;
+            lab1:
+                z->c = z->l - m;
+                if (!(eq_s_b(z, 1, s_5))) return 0;
+            }
+        lab0:
+            slice_del(z); /* delete, line 182 */
+            break;
+        case 2:
+            slice_del(z); /* delete, line 190 */
+            break;
+    }
+    return 1;
+}
+
+static int r_noun(struct SN_env * z) {
+    int among_var;
+    z->ket = z->c; /* [, line 199 */
+    among_var = find_among_b(z, a_5, 36); /* substring, line 199 */
+    if (!(among_var)) return 0;
+    z->bra = z->c; /* ], line 199 */
+    switch(among_var) {
+        case 0: return 0;
+        case 1:
+            slice_del(z); /* delete, line 206 */
+            break;
+    }
+    return 1;
+}
+
+static int r_derivational(struct SN_env * z) {
+    int among_var;
+    z->ket = z->c; /* [, line 215 */
+    among_var = find_among_b(z, a_6, 2); /* substring, line 215 */
+    if (!(among_var)) return 0;
+    z->bra = z->c; /* ], line 215 */
+    if (!r_R2(z)) return 0; /* call R2, line 215 */
+    switch(among_var) {
+        case 0: return 0;
+        case 1:
+            slice_del(z); /* delete, line 218 */
+            break;
+    }
+    return 1;
+}
+
+static int r_tidy_up(struct SN_env * z) {
+    int among_var;
+    z->ket = z->c; /* [, line 223 */
+    among_var = find_among_b(z, a_7, 4); /* substring, line 223 */
+    if (!(among_var)) return 0;
+    z->bra = z->c; /* ], line 223 */
+    switch(among_var) {
+        case 0: return 0;
+        case 1:
+            slice_del(z); /* delete, line 227 */
+            z->ket = z->c; /* [, line 228 */
+            if (!(eq_s_b(z, 1, s_6))) return 0;
+            z->bra = z->c; /* ], line 228 */
+            if (!(eq_s_b(z, 1, s_7))) return 0;
+            slice_del(z); /* delete, line 228 */
+            break;
+        case 2:
+            if (!(eq_s_b(z, 1, s_8))) return 0;
+            slice_del(z); /* delete, line 231 */
+            break;
+        case 3:
+            slice_del(z); /* delete, line 233 */
+            break;
+    }
+    return 1;
+}
+
+extern int russian_stem(struct SN_env * z) {
+    {   int c = z->c; /* do, line 240 */
+        if (!r_mark_regions(z)) goto lab0; /* call mark_regions, line 240 */
+    lab0:
+        z->c = c;
+    }
+    z->lb = z->c; z->c = z->l; /* backwards, line 241 */
+
+    {   int m = z->l - z->c; /* setlimit, line 241 */
+        int m3;
+        if (z->c < z->I[0]) return 0;
+        z->c = z->I[0]; /* tomark, line 241 */
+        m3 = z->lb; z->lb = z->c;
+        z->c = z->l - m;
+        {   int m = z->l - z->c; /* do, line 242 */
+            {   int m = z->l - z->c; /* or, line 243 */
+                if (!r_perfective_gerund(z)) goto lab3; /* call perfective_gerund, line 243 */
+                goto lab2;
+            lab3:
+                z->c = z->l - m;
+                {   int m = z->l - z->c; /* try, line 244 */
+                    if (!r_reflexive(z)) { z->c = z->l - m; goto lab4; } /* call reflexive, line 244 */
+                lab4:
+                    ;
+                }
+                {   int m = z->l - z->c; /* or, line 245 */
+                    if (!r_adjectival(z)) goto lab6; /* call adjectival, line 245 */
+                    goto lab5;
+                lab6:
+                    z->c = z->l - m;
+                    if (!r_verb(z)) goto lab7; /* call verb, line 245 */
+                    goto lab5;
+                lab7:
+                    z->c = z->l - m;
+                    if (!r_noun(z)) goto lab1; /* call noun, line 245 */
+                }
+            lab5:
+                ;
+            }
+        lab2:
+        lab1:
+            z->c = z->l - m;
+        }
+        {   int m = z->l - z->c; /* try, line 248 */
+            z->ket = z->c; /* [, line 248 */
+            if (!(eq_s_b(z, 1, s_9))) { z->c = z->l - m; goto lab8; }
+            z->bra = z->c; /* ], line 248 */
+            slice_del(z); /* delete, line 248 */
+        lab8:
+            ;
+        }
+        {   int m = z->l - z->c; /* do, line 251 */
+            if (!r_derivational(z)) goto lab9; /* call derivational, line 251 */
+        lab9:
+            z->c = z->l - m;
+        }
+        {   int m = z->l - z->c; /* do, line 252 */
+            if (!r_tidy_up(z)) goto lab10; /* call tidy_up, line 252 */
+        lab10:
+            z->c = z->l - m;
+        }
+        z->lb = m3;
+    }
+    z->c = z->lb;
+    return 1;
+}
+
+extern struct SN_env * russian_create_env(void) { return SN_create_env(0, 2, 0); }
+
+extern void russian_close_env(struct SN_env * z) { SN_close_env(z); }
+


diff --git a/contrib/tsearch2/snowball/russian_stem.h b/contrib/tsearch2/snowball/russian_stem.h

new file mode 100644 (file)

index 0000000..7dc26d4


--- /dev/null
+++ b/contrib/tsearch2/snowball/russian_stem.h
@@ -0,0 +1,8 @@
+
+/* This file was generated automatically by the Snowball to ANSI C compiler */
+
+extern struct SN_env * russian_create_env(void);
+extern void russian_close_env(struct SN_env * z);
+
+extern int russian_stem(struct SN_env * z);
+


diff --git a/contrib/tsearch2/snowball/utilities.c b/contrib/tsearch2/snowball/utilities.c

new file mode 100644 (file)

index 0000000..5dc7524


--- /dev/null
+++ b/contrib/tsearch2/snowball/utilities.c
@@ -0,0 +1,328 @@
+
+#include 
+#include 
+#include 
+
+#include "header.h"
+
+#define unless(C) if(!(C))
+
+#define CREATE_SIZE 1
+
+extern symbol * create_s(void)
+{   symbol * p = (symbol *) (HEAD + (char *) malloc(HEAD + (CREATE_SIZE + 1) * sizeof(symbol)));
+    CAPACITY(p) = CREATE_SIZE;
+    SET_SIZE(p, CREATE_SIZE);
+    return p;
+}
+
+extern void lose_s(symbol * p) { free((char *) p - HEAD); }
+
+extern int in_grouping(struct SN_env * z, unsigned char * s, int min, int max)
+{   if (z->c >= z->l) return 0;
+    {   int ch = z->p[z->c];
+        if
+        (ch > max || (ch -= min) < 0 ||
+         (s[ch >> 3] & (0X1 << (ch & 0X7))) == 0) return 0;
+    }
+    z->c++; return 1;
+}
+
+extern int in_grouping_b(struct SN_env * z, unsigned char * s, int min, int max)
+{   if (z->c <= z->lb) return 0;
+    {   int ch = z->p[z->c - 1];
+        if
+        (ch > max || (ch -= min) < 0 ||
+         (s[ch >> 3] & (0X1 << (ch & 0X7))) == 0) return 0;
+    }
+    z->c--; return 1;
+}
+
+extern int out_grouping(struct SN_env * z, unsigned char * s, int min, int max)
+{   if (z->c >= z->l) return 0;
+    {   int ch = z->p[z->c];
+        unless
+        (ch > max || (ch -= min) < 0 ||
+         (s[ch >> 3] & (0X1 << (ch & 0X7))) == 0) return 0;
+    }
+    z->c++; return 1;
+}
+
+extern int out_grouping_b(struct SN_env * z, unsigned char * s, int min, int max)
+{   if (z->c <= z->lb) return 0;
+    {   int ch = z->p[z->c - 1];
+        unless
+        (ch > max || (ch -= min) < 0 ||
+         (s[ch >> 3] & (0X1 << (ch & 0X7))) == 0) return 0;
+    }
+    z->c--; return 1;
+}
+
+
+extern int in_range(struct SN_env * z, int min, int max)
+{   if (z->c >= z->l) return 0;
+    {   int ch = z->p[z->c];
+        if
+        (ch > max || ch < min) return 0;
+    }
+    z->c++; return 1;
+}
+
+extern int in_range_b(struct SN_env * z, int min, int max)
+{   if (z->c <= z->lb) return 0;
+    {   int ch = z->p[z->c - 1];
+        if
+        (ch > max || ch < min) return 0;
+    }
+    z->c--; return 1;
+}
+
+extern int out_range(struct SN_env * z, int min, int max)
+{   if (z->c >= z->l) return 0;
+    {   int ch = z->p[z->c];
+        unless
+        (ch > max || ch < min) return 0;
+    }
+    z->c++; return 1;
+}
+
+extern int out_range_b(struct SN_env * z, int min, int max)
+{   if (z->c <= z->lb) return 0;
+    {   int ch = z->p[z->c - 1];
+        unless
+        (ch > max || ch < min) return 0;
+    }
+    z->c--; return 1;
+}
+
+extern int eq_s(struct SN_env * z, int s_size, symbol * s)
+{   if (z->l - z->c < s_size ||
+        memcmp(z->p + z->c, s, s_size * sizeof(symbol)) != 0) return 0;
+    z->c += s_size; return 1;
+}
+
+extern int eq_s_b(struct SN_env * z, int s_size, symbol * s)
+{   if (z->c - z->lb < s_size ||
+        memcmp(z->p + z->c - s_size, s, s_size * sizeof(symbol)) != 0) return 0;
+    z->c -= s_size; return 1;
+}
+
+extern int eq_v(struct SN_env * z, symbol * p)
+{   return eq_s(z, SIZE(p), p);
+}
+
+extern int eq_v_b(struct SN_env * z, symbol * p)
+{   return eq_s_b(z, SIZE(p), p);
+}
+
+extern int find_among(struct SN_env * z, struct among * v, int v_size)
+{
+    int i = 0;
+    int j = v_size;
+
+    int c = z->c; int l = z->l;
+    symbol * q = z->p + c;
+
+    struct among * w;
+
+    int common_i = 0;
+    int common_j = 0;
+
+    int first_key_inspected = 0;
+
+    while(1)
+    {   int k = i + ((j - i) >> 1);
+        int diff = 0;
+        int common = common_i < common_j ? common_i : common_j; /* smaller */
+        w = v + k;
+        {   int i; for (i = common; i < w->s_size; i++)
+            {   if (c + common == l) { diff = -1; break; }
+                diff = q[common] - w->s[i];
+                if (diff != 0) break;
+                common++;
+            }
+        }
+        if (diff < 0) { j = k; common_j = common; }
+                 else { i = k; common_i = common; }
+        if (j - i <= 1)
+        {   if (i > 0) break; /* v->s has been inspected */
+            if (j == i) break; /* only one item in v */
+
+            /* - but now we need to go round once more to get
+               v->s inspected. This looks messy, but is actually
+               the optimal approach.  */
+
+            if (first_key_inspected) break;
+            first_key_inspected = 1;
+        }
+    }
+    while(1)
+    {   w = v + i;
+        if (common_i >= w->s_size)
+        {   z->c = c + w->s_size;
+            if (w->function == 0) return w->result;
+            {   int res = w->function(z);
+                z->c = c + w->s_size;
+                if (res) return w->result;
+            }
+        }
+        i = w->substring_i;
+        if (i < 0) return 0;
+    }
+}
+
+/* find_among_b is for backwards processing. Same comments apply */
+
+extern int find_among_b(struct SN_env * z, struct among * v, int v_size)
+{
+    int i = 0;
+    int j = v_size;
+
+    int c = z->c; int lb = z->lb;
+    symbol * q = z->p + c - 1;
+
+    struct among * w;
+
+    int common_i = 0;
+    int common_j = 0;
+
+    int first_key_inspected = 0;
+
+    while(1)
+    {   int k = i + ((j - i) >> 1);
+        int diff = 0;
+        int common = common_i < common_j ? common_i : common_j;
+        w = v + k;
+        {   int i; for (i = w->s_size - 1 - common; i >= 0; i--)
+            {   if (c - common == lb) { diff = -1; break; }
+                diff = q[- common] - w->s[i];
+                if (diff != 0) break;
+                common++;
+            }
+        }
+        if (diff < 0) { j = k; common_j = common; }
+                 else { i = k; common_i = common; }
+        if (j - i <= 1)
+        {   if (i > 0) break;
+            if (j == i) break;
+            if (first_key_inspected) break;
+            first_key_inspected = 1;
+        }
+    }
+    while(1)
+    {   w = v + i;
+        if (common_i >= w->s_size)
+        {   z->c = c - w->s_size;
+            if (w->function == 0) return w->result;
+            {   int res = w->function(z);
+                z->c = c - w->s_size;
+                if (res) return w->result;
+            }
+        }
+        i = w->substring_i;
+        if (i < 0) return 0;
+    }
+}
+
+
+extern symbol * increase_size(symbol * p, int n)
+{   int new_size = n + 20;
+    symbol * q = (symbol *) (HEAD + (char *) malloc(HEAD + (new_size + 1) * sizeof(symbol)));
+    CAPACITY(q) = new_size;
+    memmove(q, p, CAPACITY(p) * sizeof(symbol)); lose_s(p); return q;
+}
+
+/* to replace symbols between c_bra and c_ket in z->p by the
+   s_size symbols at s
+*/
+
+extern int replace_s(struct SN_env * z, int c_bra, int c_ket, int s_size, const symbol * s)
+{   int adjustment = s_size - (c_ket - c_bra);
+    int len = SIZE(z->p);
+    if (adjustment != 0)
+    {   if (adjustment + len > CAPACITY(z->p)) z->p = increase_size(z->p, adjustment + len);
+        memmove(z->p + c_ket + adjustment, z->p + c_ket, (len - c_ket) * sizeof(symbol));
+        SET_SIZE(z->p, adjustment + len);
+        z->l += adjustment;
+        if (z->c >= c_ket) z->c += adjustment; else
+            if (z->c > c_bra) z->c = c_bra;
+    }
+    unless (s_size == 0) memmove(z->p + c_bra, s, s_size * sizeof(symbol));
+    return adjustment;
+}
+
+static void slice_check(struct SN_env * z)
+{
+    if (!(0 <= z->bra &&
+          z->bra <= z->ket &&
+          z->ket <= z->l &&
+          z->l <= SIZE(z->p)))   /* this line could be removed */
+    {
+        fprintf(stderr, "faulty slice operation:\n");
+        debug(z, -1, 0);
+        exit(1);
+    }
+}
+
+extern void slice_from_s(struct SN_env * z, int s_size, symbol * s)
+{   slice_check(z);
+    replace_s(z, z->bra, z->ket, s_size, s);
+}
+
+extern void slice_from_v(struct SN_env * z, symbol * p)
+{   slice_from_s(z, SIZE(p), p);
+}
+
+extern void slice_del(struct SN_env * z)
+{   slice_from_s(z, 0, 0);
+}
+
+extern void insert_s(struct SN_env * z, int bra, int ket, int s_size, symbol * s)
+{   int adjustment = replace_s(z, bra, ket, s_size, s);
+    if (bra <= z->bra) z->bra += adjustment;
+    if (bra <= z->ket) z->ket += adjustment;
+}
+
+extern void insert_v(struct SN_env * z, int bra, int ket, symbol * p)
+{   int adjustment = replace_s(z, bra, ket, SIZE(p), p);
+    if (bra <= z->bra) z->bra += adjustment;
+    if (bra <= z->ket) z->ket += adjustment;
+}
+
+extern symbol * slice_to(struct SN_env * z, symbol * p)
+{   slice_check(z);
+    {   int len = z->ket - z->bra;
+        if (CAPACITY(p) < len) p = increase_size(p, len);
+        memmove(p, z->p + z->bra, len * sizeof(symbol));
+        SET_SIZE(p, len);
+    }
+    return p;
+}
+
+extern symbol * assign_to(struct SN_env * z, symbol * p)
+{   int len = z->l;
+    if (CAPACITY(p) < len) p = increase_size(p, len);
+    memmove(p, z->p, len * sizeof(symbol));
+    SET_SIZE(p, len);
+    return p;
+}
+
+extern void debug(struct SN_env * z, int number, int line_count)
+{   int i;
+    int limit = SIZE(z->p);
+    /*if (number >= 0) printf("%3d (line %4d): '", number, line_count);*/
+    if (number >= 0) printf("%3d (line %4d): [%d]'", number, line_count,limit);
+    for (i = 0; i <= limit; i++)
+    {   if (z->lb == i) printf("{");
+        if (z->bra == i) printf("[");
+        if (z->c == i) printf("|");
+        if (z->ket == i) printf("]");
+        if (z->l == i) printf("}");
+        if (i < limit)
+        {   int ch = z->p[i];
+            if (ch == 0) ch = '#';
+            printf("%c", ch);
+        }
+    }
+    printf("'\n");
+}


diff --git a/contrib/tsearch2/sql/tsearch2.sql b/contrib/tsearch2/sql/tsearch2.sql

new file mode 100644 (file)

index 0000000..6ca6480


--- /dev/null
+++ b/contrib/tsearch2/sql/tsearch2.sql
@@ -0,0 +1,243 @@
+--
+-- first, define the datatype.  Turn off echoing so that expected file
+-- does not depend on contents of seg.sql.
+--
+\set ECHO none
+\i tsearch2.sql
+\set ECHO all
+
+--tsvector
+SELECT '1'::tsvector;
+SELECT '1 '::tsvector;
+SELECT ' 1'::tsvector;
+SELECT ' 1 '::tsvector;
+SELECT '1 2'::tsvector;
+SELECT '\'1 2\''::tsvector;
+SELECT '\'1 \\\'2\''::tsvector;
+SELECT '\'1 \\\'2\'3'::tsvector;
+SELECT '\'1 \\\'2\' 3'::tsvector;
+SELECT '\'1 \\\'2\' \' 3\' 4 '::tsvector;
+select '\'w\':4A,3B,2C,1D,5 a:8';
+select 'a:3A b:2a'::tsvector || 'ba:1234 a:1B';
+select setweight('w:12B w:13* w:12,5,6 a:1,3* a:3 w asd:1dc asd zxc:81,567,222A'::tsvector, 'c');
+select strip('w:12B w:13* w:12,5,6 a:1,3* a:3 w asd:1dc asd'::tsvector);
+
+
+--tsquery
+SELECT '1'::tsquery;
+SELECT '1 '::tsquery;
+SELECT ' 1'::tsquery;
+SELECT ' 1 '::tsquery;
+SELECT '\'1 2\''::tsquery;
+SELECT '\'1 \\\'2\''::tsquery;
+SELECT '!1'::tsquery;
+SELECT '1|2'::tsquery;
+SELECT '1|!2'::tsquery;
+SELECT '!1|2'::tsquery;
+SELECT '!1|!2'::tsquery;
+SELECT '!(!1|!2)'::tsquery;
+SELECT '!(!1|2)'::tsquery;
+SELECT '!(1|!2)'::tsquery;
+SELECT '!(1|2)'::tsquery;
+SELECT '1&2'::tsquery;
+SELECT '!1&2'::tsquery;
+SELECT '1&!2'::tsquery;
+SELECT '!1&!2'::tsquery;
+SELECT '(1&2)'::tsquery;
+SELECT '1&(2)'::tsquery;
+SELECT '!(1)&2'::tsquery;
+SELECT '!(1&2)'::tsquery;
+SELECT '1|2&3'::tsquery;
+SELECT '1|(2&3)'::tsquery;
+SELECT '(1|2)&3'::tsquery;
+SELECT '1|2&!3'::tsquery;
+SELECT '1|!2&3'::tsquery;
+SELECT '!1|2&3'::tsquery;
+SELECT '!1|(2&3)'::tsquery;
+SELECT '!(1|2)&3'::tsquery;
+SELECT '(!1|2)&3'::tsquery;
+SELECT '1|(2|(4|(5|6)))'::tsquery;
+SELECT '1|2|4|5|6'::tsquery;
+SELECT '1&(2&(4&(5&6)))'::tsquery;
+SELECT '1&2&4&5&6'::tsquery;
+SELECT '1&(2&(4&(5|6)))'::tsquery;
+SELECT '1&(2&(4&(5|!6)))'::tsquery;
+SELECT '1&(\'2\'&(\' 4\'&(\\|5 | \'6 \\\' !|&\')))'::tsquery;
+SELECT '\'the wether\':dc & \' sKies \':BC & a:d b:a';
+
+select lexize('simple', 'ASD56 hsdkf');
+select lexize('en_stem', 'SKIES Problems identity');
+
+select * from token_type('default');
+select * from parse('default', '345 [email protected] \' http://www.com/ http://aew.werc.ewr/?ad=qwe&dw 1aew.werc.ewr/?ad=qwe&dw 2aew.werc.ewr http://3aew.werc.ewr/?ad=qwe&dw http://4aew.werc.ewr http://5aew.werc.ewr:8100/?  ad=qwe&dw 6aew.werc.ewr:8100/?ad=qwe&dw 7aew.werc.ewr:8100/?ad=qwe&dw=%20%32 +4.0e-10 qwe qwe qwqwe 234.435 455 5.005 [email protected] qwe-wer asdf qwer jf sdjk ewr1> ewri2 ">
+/usr/local/fff /awdf/dwqe/4325 rewt/ewr wefjn /wqe-324/ewr gist.h gist.h.c gist.c. readline 4.2 4.2. 4.2, readline-4.2 readline-4.2. 234 
+ wow  < jqw <> qwerty');
+
+SELECT to_tsvector('default', '345 [email protected] \' http://www.com/ http://aew.werc.ewr/?ad=qwe&dw 1aew.werc.ewr/?ad=qwe&dw 2aew.werc.ewr http://3aew.werc.ewr/?ad=qwe&dw http://4aew.werc.ewr http://5aew.werc.ewr:8100/?  ad=qwe&dw 6aew.werc.ewr:8100/?ad=qwe&dw 7aew.werc.ewr:8100/?ad=qwe&dw=%20%32 +4.0e-10 qwe qwe qwqwe 234.435 455 5.005 [email protected] qwe-wer asdf qwer jf sdjk ewr1> ewri2 ">
+/usr/local/fff /awdf/dwqe/4325 rewt/ewr wefjn /wqe-324/ewr gist.h gist.h.c gist.c. readline 4.2 4.2. 4.2, readline-4.2 readline-4.2. 234 
+ wow  < jqw <> qwerty');
+
+SELECT length(to_tsvector('default', '345 qw'));
+
+SELECT length(to_tsvector('default', '345 [email protected] \' http://www.com/ http://aew.werc.ewr/?ad=qwe&dw 1aew.werc.ewr/?ad=qwe&dw 2aew.werc.ewr http://3aew.werc.ewr/?ad=qwe&dw http://4aew.werc.ewr http://5aew.werc.ewr:8100/?  ad=qwe&dw 6aew.werc.ewr:8100/?ad=qwe&dw 7aew.werc.ewr:8100/?ad=qwe&dw=%20%32 +4.0e-10 qwe qwe qwqwe 234.435 455 5.005 [email protected] qwe-wer asdf qwer jf sdjk ewr1> ewri2 ">
+/usr/local/fff /awdf/dwqe/4325 rewt/ewr wefjn /wqe-324/ewr gist.h gist.h.c gist.c. readline 4.2 4.2. 4.2, readline-4.2 readline-4.2. 234 
+ wow  < jqw <> qwerty'));
+
+
+select to_tsquery('default', 'qwe & sKies '); 
+select to_tsquery('simple', 'qwe & sKies '); 
+select to_tsquery('default', '\'the wether\':dc & \'           sKies \':BC ');
+select 'a b:89  ca:23A,64b d:34c'::tsvector @@ 'd:AC & ca';
+select 'a b:89  ca:23A,64b d:34c'::tsvector @@ 'd:AC & ca:B';
+select 'a b:89  ca:23A,64b d:34c'::tsvector @@ 'd:AC & ca:A';
+select 'a b:89  ca:23A,64b d:34c'::tsvector @@ 'd:AC & ca:C';
+select 'a b:89  ca:23A,64b d:34c'::tsvector @@ 'd:AC & ca:CB';
+
+CREATE TABLE test_tsvector( t text, a tsvector );
+
+\copy test_tsvector from 'data/test_tsearch.data'
+
+SELECT count(*) FROM test_tsvector WHERE a @@ 'wr|qh';
+SELECT count(*) FROM test_tsvector WHERE a @@ 'wr&qh';
+SELECT count(*) FROM test_tsvector WHERE a @@ 'eq&yt';
+SELECT count(*) FROM test_tsvector WHERE a @@ 'eq|yt';
+SELECT count(*) FROM test_tsvector WHERE a @@ '(eq&yt)|(wr&qh)';
+SELECT count(*) FROM test_tsvector WHERE a @@ '(eq|yt)&(wr|qh)';
+
+create index wowidx on test_tsvector using gist (a);
+set enable_seqscan=off;
+
+SELECT count(*) FROM test_tsvector WHERE a @@ 'wr|qh';
+SELECT count(*) FROM test_tsvector WHERE a @@ 'wr&qh';
+SELECT count(*) FROM test_tsvector WHERE a @@ 'eq&yt';
+SELECT count(*) FROM test_tsvector WHERE a @@ 'eq|yt';
+SELECT count(*) FROM test_tsvector WHERE a @@ '(eq&yt)|(wr&qh)';
+SELECT count(*) FROM test_tsvector WHERE a @@ '(eq|yt)&(wr|qh)';
+
+select set_curcfg('default');
+
+CREATE TRIGGER tsvectorupdate
+BEFORE UPDATE OR INSERT ON test_tsvector
+FOR EACH ROW EXECUTE PROCEDURE tsearch2(a, t);
+
+SELECT count(*) FROM test_tsvector WHERE a @@ to_tsquery('345&qwerty');
+
+INSERT INTO test_tsvector (t) VALUES ('345 qwerty');
+
+SELECT count(*) FROM test_tsvector WHERE a @@ to_tsquery('345&qwerty');
+
+UPDATE test_tsvector SET t = null WHERE t = '345 qwerty';
+
+SELECT count(*) FROM test_tsvector WHERE a @@ to_tsquery('345&qwerty');
+
+drop trigger tsvectorupdate on test_tsvector;
+create function wow(text) returns text as 'select $1 || \' copyright\'; ' language sql;
+create trigger tsvectorupdate before update or insert on test_tsvector
+for each row execute procedure tsearch2(a, wow, t);
+insert into test_tsvector (t) values ('345 qwerty');
+select count(*) FROM test_tsvector WHERE a @@ to_tsquery('345&qwerty');
+select count(*) FROM test_tsvector WHERE a @@ to_tsquery('copyright');
+
+select rank(' a:1 s:2C d g'::tsvector, 'a | s');
+select rank(' a:1 s:2B d g'::tsvector, 'a | s');
+select rank(' a:1 s:2 d g'::tsvector, 'a | s');
+select rank(' a:1 s:2C d g'::tsvector, 'a & s');
+select rank(' a:1 s:2B d g'::tsvector, 'a & s');
+select rank(' a:1 s:2 d g'::tsvector, 'a & s');
+
+insert into test_tsvector (t) values ('foo bar foo the over foo qq bar');
+select * from stat('select a from test_tsvector') order by ndoc desc, nentry desc, word;
+
+select reset_tsearch();
+select to_tsquery('default', 'skies & books');
+
+select rank_cd(to_tsvector('Erosion It took the sea a thousand years,
+A thousand years to trace
+The granite features of this cliff
+In crag and scarp and base.
+It took the sea an hour one night
+An hour of storm to place
+The sculpture of these granite seams,
+Upon a woman s face. E.  J.  Pratt  (1882 1964)
+'), to_tsquery('sea&thousand&years'));
+
+select rank_cd(to_tsvector('Erosion It took the sea a thousand years,
+A thousand years to trace
+The granite features of this cliff
+In crag and scarp and base.
+It took the sea an hour one night
+An hour of storm to place
+The sculpture of these granite seams,
+Upon a woman s face. E.  J.  Pratt  (1882 1964)
+'), to_tsquery('granite&sea'));
+
+select rank_cd(to_tsvector('Erosion It took the sea a thousand years,
+A thousand years to trace
+The granite features of this cliff
+In crag and scarp and base.
+It took the sea an hour one night
+An hour of storm to place
+The sculpture of these granite seams,
+Upon a woman s face. E.  J.  Pratt  (1882 1964)
+'), to_tsquery('sea'));
+
+select get_covers(to_tsvector('Erosion It took the sea a thousand years,
+A thousand years to trace
+The granite features of this cliff
+In crag and scarp and base.
+It took the sea an hour one night
+An hour of storm to place
+The sculpture of these granite seams,
+Upon a woman s face. E.  J.  Pratt  (1882 1964)
+'), to_tsquery('sea&thousand&years'));
+
+select get_covers(to_tsvector('Erosion It took the sea a thousand years,
+A thousand years to trace
+The granite features of this cliff
+In crag and scarp and base.
+It took the sea an hour one night
+An hour of storm to place
+The sculpture of these granite seams,
+Upon a woman s face. E.  J.  Pratt  (1882 1964)
+'), to_tsquery('granite&sea'));
+
+select get_covers(to_tsvector('Erosion It took the sea a thousand years,
+A thousand years to trace
+The granite features of this cliff
+In crag and scarp and base.
+It took the sea an hour one night
+An hour of storm to place
+The sculpture of these granite seams,
+Upon a woman s face. E.  J.  Pratt  (1882 1964)
+'), to_tsquery('sea'));
+
+select headline('Erosion It took the sea a thousand years,
+A thousand years to trace
+The granite features of this cliff
+In crag and scarp and base.
+It took the sea an hour one night
+An hour of storm to place
+The sculpture of these granite seams,
+Upon a woman s face. E.  J.  Pratt  (1882 1964)
+', to_tsquery('sea&thousand&years'));
+ 
+select headline('Erosion It took the sea a thousand years,
+A thousand years to trace
+The granite features of this cliff
+In crag and scarp and base.
+It took the sea an hour one night
+An hour of storm to place
+The sculpture of these granite seams,
+Upon a woman s face. E.  J.  Pratt  (1882 1964)
+', to_tsquery('granite&sea'));
+ 
+select headline('Erosion It took the sea a thousand years,
+A thousand years to trace
+The granite features of this cliff
+In crag and scarp and base.
+It took the sea an hour one night
+An hour of storm to place
+The sculpture of these granite seams,
+Upon a woman s face. E.  J.  Pratt  (1882 1964)
+', to_tsquery('sea'));
+


diff --git a/contrib/tsearch2/stopword.c b/contrib/tsearch2/stopword.c

new file mode 100644 (file)

index 0000000..7f7806f


--- /dev/null
+++ b/contrib/tsearch2/stopword.c
@@ -0,0 +1,101 @@
+/* 
+ * stopword library
+ * Teodor Sigaev 
+ */
+#include 
+#include 
+#include 
+#include 
+
+#include "postgres.h"
+#include "common.h"
+#include "dict.h"
+
+#define STOPBUFLEN 4096
+
+char*
+lowerstr(char *str) {
+   char *ptr=str;
+   while(*ptr) {
+       *ptr = tolower(*(unsigned char*)ptr);
+       ptr++;
+   }
+   return str;
+}
+
+void
+freestoplist(StopList *s) {
+   char **ptr=s->stop;
+   if ( ptr )
+       while( *ptr && s->len >0 ) {
+           free(*ptr);
+           ptr++; s->len--;
+       free(s->stop);
+   }
+   memset(s,0,sizeof(StopList));
+}
+
+void
+readstoplist(text *in, StopList *s) {
+   char **stop=NULL;
+   s->len=0;
+   if ( in && VARSIZE(in) - VARHDRSZ > 0 ) {
+       char *filename=text2char(in);
+       FILE    *hin=NULL;
+       char    buf[STOPBUFLEN];
+       int reallen=0;
+
+       if ( (hin=fopen(filename,"r")) == NULL )
+           elog(ERROR,"Can't open file '%s': %s", filename, strerror(errno));
+       while( fgets(buf,STOPBUFLEN,hin) ) {
+           buf[strlen(buf)-1] = '\0';
+           if ( *buf=='\0' ) continue;
+
+           if ( s->len>= reallen ) {
+               char **tmp;
+               reallen=(reallen) ? reallen*2 : 16;
+               tmp=(char**)realloc((void*)stop, sizeof(char*)*reallen);
+               if (!tmp) {
+                   freestoplist(s);
+                   fclose(hin); 
+                   elog(ERROR,"Not enough memory");
+               }
+               stop=tmp;
+           }
+    
+           stop[s->len]=strdup(buf);
+           if ( !stop[s->len] ) {
+               freestoplist(s);
+               fclose(hin); 
+               elog(ERROR,"Not enough memory");
+           }
+           if ( s->wordop ) 
+               stop[s->len]=(s->wordop)(stop[s->len]);
+
+           (s->len)++; 
+       }
+       fclose(hin);
+       pfree(filename); 
+   }
+   s->stop=stop;
+} 
+
+static int
+comparestr(const void *a, const void *b) {
+   return strcmp( *(char**)a, *(char**)b );
+}
+
+void
+sortstoplist(StopList *s) {
+   if (s->stop && s->len>0)
+       qsort(s->stop, s->len, sizeof(char*), comparestr);
+}
+
+bool
+searchstoplist(StopList *s, char *key) {
+   if ( s->wordop ) 
+       key=(*(s->wordop))(key);
+   return ( s->stop && s->len>0 && bsearch(&key, s->stop, s->len, sizeof(char*), comparestr) ) ? true : false;
+}
+
+


diff --git a/contrib/tsearch2/stopword/english.stop b/contrib/tsearch2/stopword/english.stop

new file mode 100644 (file)

index 0000000..a913011


--- /dev/null
+++ b/contrib/tsearch2/stopword/english.stop
@@ -0,0 +1,128 @@
+i
+me
+my
+myself
+we
+our
+ours
+ourselves
+you
+your
+yours
+yourself
+yourselves
+he
+him
+his
+himself
+she
+her
+hers
+herself
+it
+its
+itself
+they
+them
+their
+theirs
+themselves
+what
+which
+who
+whom
+this
+that
+these
+those
+am
+is
+are
+was
+were
+be
+been
+being
+have
+has
+had
+having
+do
+does
+did
+doing
+a
+an
+the
+and
+but
+if
+or
+because
+as
+until
+while
+of
+at
+by
+for
+with
+about
+against
+between
+into
+through
+during
+before
+after
+above
+below
+to
+from
+up
+down
+in
+out
+on
+off
+over
+under
+again
+further
+then
+once
+here
+there
+when
+where
+why
+how
+all
+any
+both
+each
+few
+more
+most
+other
+some
+such
+no
+nor
+not
+only
+own
+same
+so
+than
+too
+very
+s
+t
+can
+will
+just
+don
+should
+now
+


diff --git a/contrib/tsearch2/stopword/russian.stop b/contrib/tsearch2/stopword/russian.stop

new file mode 100644 (file)

index 0000000..1877e3a


--- /dev/null
+++ b/contrib/tsearch2/stopword/russian.stop
@@ -0,0 +1,151 @@
+É
+×
+×Ï
+ÎÅ
+ÞÔÏ
+ÏÎ
+ÎÁ
+Ñ
+Ó
+ÓÏ
+ËÁË
+Á
+ÔÏ
+×ÓÅ
+ÏÎÁ
+ÔÁË
+ÅÇÏ
+ÎÏ
+ÄÁ
+ÔÙ
+Ë
+Õ
+ÖÅ
+×Ù
+ÚÁ
+ÂÙ
+ÐÏ
+ÔÏÌØËÏ
+ÅÅ
+ÍÎÅ
+ÂÙÌÏ
+×ÏÔ
+ÏÔ
+ÍÅÎÑ
+ÅÝÅ
+ÎÅÔ
+Ï
+ÉÚ
+ÅÍÕ
+ÔÅÐÅÒØ
+ËÏÇÄÁ
+ÄÁÖÅ
+ÎÕ
+×ÄÒÕÇ
+ÌÉ
+ÅÓÌÉ
+ÕÖÅ
+ÉÌÉ
+ÎÉ
+ÂÙÔØ
+ÂÙÌ
+ÎÅÇÏ
+ÄÏ
+×ÁÓ
+ÎÉÂÕÄØ
+ÏÐÑÔØ
+ÕÖ
+×ÁÍ
+×ÅÄØ
+ÔÁÍ
+ÐÏÔÏÍ
+ÓÅÂÑ
+ÎÉÞÅÇÏ
+ÅÊ
+ÍÏÖÅÔ
+ÏÎÉ
+ÔÕÔ
+ÇÄÅ
+ÅÓÔØ
+ÎÁÄÏ
+ÎÅÊ
+ÄÌÑ
+ÍÙ
+ÔÅÂÑ
+ÉÈ
+ÞÅÍ
+ÂÙÌÁ
+ÓÁÍ
+ÞÔÏÂ
+ÂÅÚ
+ÂÕÄÔÏ
+ÞÅÇÏ
+ÒÁÚ
+ÔÏÖÅ
+ÓÅÂÅ
+ÐÏÄ
+ÂÕÄÅÔ
+Ö
+ÔÏÇÄÁ
+ËÔÏ
+ÜÔÏÔ
+ÔÏÇÏ
+ÐÏÔÏÍÕ
+ÜÔÏÇÏ
+ËÁËÏÊ
+ÓÏ×ÓÅÍ
+ÎÉÍ
+ÚÄÅÓØ
+ÜÔÏÍ
+ÏÄÉÎ
+ÐÏÞÔÉ
+ÍÏÊ
+ÔÅÍ
+ÞÔÏÂÙ
+ÎÅÅ
+ÓÅÊÞÁÓ
+ÂÙÌÉ
+ËÕÄÁ
+ÚÁÞÅÍ
+×ÓÅÈ
+ÎÉËÏÇÄÁ
+ÍÏÖÎÏ
+ÐÒÉ
+ÎÁËÏÎÅÃ
+Ä×Á
+ÏÂ
+ÄÒÕÇÏÊ
+ÈÏÔØ
+ÐÏÓÌÅ
+ÎÁÄ
+ÂÏÌØÛÅ
+ÔÏÔ
+ÞÅÒÅÚ
+ÜÔÉ
+ÎÁÓ
+ÐÒÏ
+×ÓÅÇÏ
+ÎÉÈ
+ËÁËÁÑ
+ÍÎÏÇÏ
+ÒÁÚ×Å
+ÔÒÉ
+ÜÔÕ
+ÍÏÑ
+×ÐÒÏÞÅÍ
+ÈÏÒÏÛÏ
+Ó×ÏÀ
+ÜÔÏÊ
+ÐÅÒÅÄ
+ÉÎÏÇÄÁ
+ÌÕÞÛÅ
+ÞÕÔØ
+ÔÏÍ
+ÎÅÌØÚÑ
+ÔÁËÏÊ
+ÉÍ
+ÂÏÌÅÅ
+×ÓÅÇÄÁ
+ËÏÎÅÞÎÏ
+×ÓÀ
+ÍÅÖÄÕ


diff --git a/contrib/tsearch2/ts_cfg.c b/contrib/tsearch2/ts_cfg.c

new file mode 100644 (file)

index 0000000..7c9f20c


--- /dev/null
+++ b/contrib/tsearch2/ts_cfg.c
@@ -0,0 +1,509 @@
+/* 
+ * interface functions to tscfg 
+ * Teodor Sigaev 
+ */
+#include 
+#include 
+#include 
+#include 
+#include 
+
+#include "postgres.h"
+#include "fmgr.h"
+#include "utils/array.h"
+#include "catalog/pg_type.h"
+#include "executor/spi.h"
+
+#include "ts_cfg.h"
+#include "dict.h"
+#include "wparser.h"
+#include "snmap.h"
+#include "common.h"
+#include "tsvector.h"
+
+/*********top interface**********/
+
+static void *plan_getcfg_bylocale=NULL;
+static void *plan_getcfg=NULL;
+static void *plan_getmap=NULL;
+static void *plan_name2id=NULL;
+static Oid current_cfg_id=0;
+
+void
+init_cfg(Oid id, TSCfgInfo *cfg) {
+   Oid arg[2]={ OIDOID, OIDOID };
+   bool isnull;
+   Datum pars[2]={ ObjectIdGetDatum(id), ObjectIdGetDatum(id) } ;
+   int stat,i,j;
+   text *ptr;
+   text *prsname=NULL;
+   MemoryContext   oldcontext;
+
+   memset(cfg,0,sizeof(TSCfgInfo));
+   SPI_connect();
+   if ( !plan_getcfg ) {
+       plan_getcfg = SPI_saveplan( SPI_prepare( "select prs_name from pg_ts_cfg where oid = $1" , 1, arg ) );
+       if ( !plan_getcfg ) 
+           ts_error(ERROR, "SPI_prepare() failed");
+   }
+
+   stat = SPI_execp(plan_getcfg, pars, " ", 1);
+   if ( stat < 0 )
+       ts_error (ERROR, "SPI_execp return %d", stat);
+   if ( SPI_processed > 0 ) {
+       prsname = (text*) DatumGetPointer( 
+           SPI_getbinval(SPI_tuptable->vals[0], SPI_tuptable->tupdesc, 1, &isnull) 
+       );
+       oldcontext = MemoryContextSwitchTo(TopMemoryContext);
+       prsname = ptextdup( prsname );
+       MemoryContextSwitchTo(oldcontext);
+       
+       cfg->id=id;
+   } else 
+       ts_error(ERROR, "No tsearch cfg with id %d", id);
+
+   arg[0]=TEXTOID;
+   if ( !plan_getmap ) {
+       plan_getmap = SPI_saveplan( SPI_prepare( "select lt.tokid, pg_ts_cfgmap.dict_name from pg_ts_cfgmap, pg_ts_cfg, token_type( $1 ) as lt where lt.alias = pg_ts_cfgmap.tok_alias and pg_ts_cfgmap.ts_name = pg_ts_cfg.ts_name and pg_ts_cfg.oid= $2 order by lt.tokid desc;" , 2, arg ) );
+       if ( !plan_getmap )
+           ts_error(ERROR, "SPI_prepare() failed");
+   }
+
+   pars[0]=PointerGetDatum( prsname );
+   stat = SPI_execp(plan_getmap, pars, " ", 0);
+   if ( stat < 0 )
+       ts_error (ERROR, "SPI_execp return %d", stat);
+   if ( SPI_processed <= 0 )
+       ts_error(ERROR, "No parser with id %d", id);
+
+   for(i=0;i
+       int lexid = DatumGetInt32(SPI_getbinval(SPI_tuptable->vals[i], SPI_tuptable->tupdesc, 1, &isnull));
+       ArrayType *toasted_a = (ArrayType*)PointerGetDatum(SPI_getbinval(SPI_tuptable->vals[i], SPI_tuptable->tupdesc, 2, &isnull));
+       ArrayType *a;
+
+       if ( !cfg->map ) {
+           cfg->len=lexid+1;
+           cfg->map = (ListDictionary*)malloc( sizeof(ListDictionary)*cfg->len );
+           if ( !cfg->map )
+               ts_error(ERROR,"No memory");
+           memset( cfg->map, 0, sizeof(ListDictionary)*cfg->len );
+       }
+
+       if (isnull)
+           continue;
+
+       a=(ArrayType*)PointerGetDatum( PG_DETOAST_DATUM( DatumGetPointer(toasted_a) ) );
+       
+       if ( ARR_NDIM(a) != 1 )
+           ts_error(ERROR,"Wrong dimension");
+       if ( ARRNELEMS(a) < 1 )
+           continue;
+
+       cfg->map[lexid].len=ARRNELEMS(a);
+       cfg->map[lexid].dict_id=(Datum*)malloc( sizeof(Datum)*cfg->map[lexid].len );
+       memset(cfg->map[lexid].dict_id,0,sizeof(Datum)*cfg->map[lexid].len );
+       ptr=(text*)ARR_DATA_PTR(a);
+       oldcontext = MemoryContextSwitchTo(TopMemoryContext);
+       for(j=0;jmap[lexid].len;j++) {
+           cfg->map[lexid].dict_id[j] = PointerGetDatum(ptextdup(ptr));
+           ptr=NEXTVAL(ptr);
+       } 
+       MemoryContextSwitchTo(oldcontext);
+
+       if ( a != toasted_a ) 
+           pfree(a);
+   }
+   
+   SPI_finish();
+   cfg->prs_id = name2id_prs( prsname );
+   pfree(prsname);
+   for(i=0;ilen;i++) {
+       for(j=0;jmap[i].len;j++) {
+           ptr = (text*)DatumGetPointer( cfg->map[i].dict_id[j] );
+           cfg->map[i].dict_id[j] = ObjectIdGetDatum( name2id_dict(ptr) );
+           pfree(ptr);
+       }
+   }
+}
+
+typedef struct {
+   TSCfgInfo   *last_cfg;
+   int     len;
+   int     reallen;
+   TSCfgInfo   *list;
+   SNMap       name2id_map;
+} CFGList;
+
+static CFGList CList = {NULL,0,0,NULL,{0,0,NULL}};
+
+void
+reset_cfg(void) {
+        freeSNMap( &(CList.name2id_map) );
+        if ( CList.list ) {
+       int i,j;
+       for(i=0;i
+           if ( CList.list[i].map ) {
+               for(j=0;j
+                   if ( CList.list[i].map[j].dict_id )
+                       free(CList.list[i].map[j].dict_id);
+               free( CList.list[i].map );
+           }
+                free(CList.list);
+   }
+        memset(&CList,0,sizeof(CFGList));
+}
+
+static int
+comparecfg(const void *a, const void *b) {
+   return ((TSCfgInfo*)a)->id - ((TSCfgInfo*)b)->id;
+}
+
+TSCfgInfo *
+findcfg(Oid id) {
+   /* last used cfg */
+   if ( CList.last_cfg && CList.last_cfg->id==id )
+       return CList.last_cfg;
+
+   /* already used cfg */
+   if ( CList.len != 0 ) {
+       TSCfgInfo key;
+       key.id=id;
+       CList.last_cfg = bsearch(&key, CList.list, CList.len, sizeof(TSCfgInfo), comparecfg);
+       if ( CList.last_cfg != NULL )
+           return CList.last_cfg;
+   }
+
+   /* last chance */
+   if ( CList.len==CList.reallen ) {
+       TSCfgInfo *tmp;
+       int reallen = ( CList.reallen ) ? 2*CList.reallen : 16;
+       tmp=(TSCfgInfo*)realloc(CList.list,sizeof(TSCfgInfo)*reallen);
+       if ( !tmp ) 
+           ts_error(ERROR,"No memory");
+       CList.reallen=reallen;
+       CList.list=tmp;
+   }
+   CList.last_cfg=&(CList.list[CList.len]);
+   init_cfg(id, CList.last_cfg);
+   CList.len++;
+   qsort(CList.list, CList.len, sizeof(TSCfgInfo), comparecfg);
+   return findcfg(id); /* qsort changed order!! */;
+}
+
+
+Oid
+name2id_cfg(text *name) {
+   Oid arg[1]={ TEXTOID };
+   bool isnull;
+   Datum pars[1]={ PointerGetDatum(name) };
+   int stat;
+   Oid id=findSNMap_t( &(CList.name2id_map), name );
+   
+   if ( id ) 
+       return id;
+   
+   SPI_connect();
+   if ( !plan_name2id ) {
+       plan_name2id = SPI_saveplan( SPI_prepare( "select oid from pg_ts_cfg where ts_name = $1" , 1, arg ) );
+       if ( !plan_name2id ) 
+           elog(ERROR, "SPI_prepare() failed");
+   }
+
+   stat = SPI_execp(plan_name2id, pars, " ", 1);
+   if ( stat < 0 )
+       elog (ERROR, "SPI_execp return %d", stat);
+   if ( SPI_processed > 0 ) {
+       id=DatumGetObjectId( SPI_getbinval(SPI_tuptable->vals[0], SPI_tuptable->tupdesc, 1, &isnull) );
+       if ( isnull ) 
+           elog(ERROR, "Null id for tsearch config");
+   } else 
+       elog(ERROR, "No tsearch config");
+   SPI_finish();
+   addSNMap_t( &(CList.name2id_map), name, id );
+   return id;
+}
+
+
+void 
+parsetext_v2(TSCfgInfo *cfg, PRSTEXT * prs, char *buf, int4 buflen) {
+   int type, lenlemm, i;
+   char    *lemm=NULL;
+   WParserInfo *prsobj = findprs(cfg->prs_id);
+
+   prsobj->prs=(void*)DatumGetPointer(
+       FunctionCall2(
+           &(prsobj->start_info),
+           PointerGetDatum(buf),
+           Int32GetDatum(buflen)
+       )
+   );
+
+   while( ( type=DatumGetInt32(FunctionCall3(
+           &(prsobj->getlexeme_info),
+           PointerGetDatum(prsobj->prs),
+           PointerGetDatum(&lemm),
+           PointerGetDatum(&lenlemm))) ) != 0 ) {
+
+       if ( lenlemm >= MAXSTRLEN )
+           elog(ERROR, "Word is too long");
+
+
+       if ( type >= cfg->len ) /* skip this type of lexem */
+           continue; 
+
+       for(i=0;imap[type].len;i++) {
+           DictInfo    *dict=finddict( DatumGetObjectId(cfg->map[type].dict_id[i]) );
+           char    **norms, **ptr;
+   
+           norms = ptr = (char**)DatumGetPointer(
+               FunctionCall3(
+                   &(dict->lexize_info),
+                   PointerGetDatum(dict->dictionary),
+                   PointerGetDatum(lemm),
+                   PointerGetDatum(lenlemm)
+               )
+           );
+           if ( !norms ) /* dictionary doesn't know this lexem */
+               continue;
+
+           prs->pos++; /*set pos*/
+
+           while( *ptr ) {
+               if (prs->curwords == prs->lenwords) {
+                   prs->lenwords *= 2;
+                   prs->words = (WORD *) repalloc((void *) prs->words, prs->lenwords * sizeof(WORD));
+               }
+
+               prs->words[prs->curwords].len = strlen(*ptr);
+               prs->words[prs->curwords].word = *ptr;
+               prs->words[prs->curwords].alen = 0;
+               prs->words[prs->curwords].pos.pos = LIMITPOS(prs->pos);
+               ptr++;
+               prs->curwords++;
+           }
+           pfree(norms);
+           break; /* lexem already normalized or is stop word*/
+       }
+   }
+
+   FunctionCall1(
+       &(prsobj->end_info),
+       PointerGetDatum(prsobj->prs)
+   );
+}
+
+static void
+hladdword(HLPRSTEXT * prs, char *buf, int4 buflen, int type) {
+   while (prs->curwords >= prs->lenwords) {
+       prs->lenwords *= 2;
+       prs->words = (HLWORD *) repalloc((void *) prs->words, prs->lenwords * sizeof(HLWORD));
+   }
+   memset( &(prs->words[prs->curwords]), 0, sizeof(HLWORD) ); 
+   prs->words[prs->curwords].type = (uint8)type;
+   prs->words[prs->curwords].len = buflen; 
+   prs->words[prs->curwords].word = palloc(buflen);
+   memcpy(prs->words[prs->curwords].word, buf, buflen);
+   prs->curwords++;    
+}
+
+static void
+hlfinditem(HLPRSTEXT * prs, QUERYTYPE *query, char *buf, int buflen ) {
+   int i;
+   ITEM    *item=GETQUERY(query);
+   HLWORD  *word=&( prs->words[prs->curwords-1] );
+
+   while (prs->curwords + query->size >= prs->lenwords) {
+       prs->lenwords *= 2;
+       prs->words = (HLWORD *) repalloc((void *) prs->words, prs->lenwords * sizeof(HLWORD));
+   }
+
+   for(i=0; isize; i++) { 
+       if ( item->type == VAL && item->length == buflen && strncmp( GETOPERAND(query) + item->distance, buf, buflen )==0 ) {
+           if ( word->item ) {
+               memcpy( &(prs->words[prs->curwords]), word, sizeof(HLWORD) );
+               prs->words[prs->curwords].item=item;
+               prs->words[prs->curwords].repeated=1;
+               prs->curwords++;
+           } else 
+               word->item=item;    
+       }
+       item++;
+   }
+}
+
+void 
+hlparsetext(TSCfgInfo *cfg, HLPRSTEXT * prs, QUERYTYPE *query, char *buf, int4 buflen) {
+   int type, lenlemm, i;
+   char    *lemm=NULL;
+   WParserInfo *prsobj = findprs(cfg->prs_id);
+
+   prsobj->prs=(void*)DatumGetPointer(
+       FunctionCall2(
+           &(prsobj->start_info),
+           PointerGetDatum(buf),
+           Int32GetDatum(buflen)
+       )
+   );
+
+   while( ( type=DatumGetInt32(FunctionCall3(
+           &(prsobj->getlexeme_info),
+           PointerGetDatum(prsobj->prs),
+           PointerGetDatum(&lemm),
+           PointerGetDatum(&lenlemm))) ) != 0 ) {
+
+       if ( lenlemm >= MAXSTRLEN )
+           elog(ERROR, "Word is too long");
+
+       hladdword(prs,lemm,lenlemm,type);
+
+       if ( type >= cfg->len ) 
+           continue; 
+
+       for(i=0;imap[type].len;i++) {
+           DictInfo    *dict=finddict( DatumGetObjectId(cfg->map[type].dict_id[i]) );
+           char    **norms, **ptr;
+   
+           norms = ptr = (char**)DatumGetPointer(
+               FunctionCall3(
+                   &(dict->lexize_info),
+                   PointerGetDatum(dict->dictionary),
+                   PointerGetDatum(lemm),
+                   PointerGetDatum(lenlemm)
+               )
+           );
+           if ( !norms ) /* dictionary doesn't know this lexem */
+               continue;
+
+           while( *ptr ) {
+               hlfinditem(prs,query,*ptr,strlen(*ptr));
+               pfree(*ptr);
+               ptr++;
+           }
+           pfree(norms);
+           break; /* lexem already normalized or is stop word*/
+       }
+   }
+
+   FunctionCall1(
+       &(prsobj->end_info),
+       PointerGetDatum(prsobj->prs)
+   );
+}
+
+text* 
+genhl(HLPRSTEXT * prs) {
+   text *out;
+   int len=128;
+   char *ptr;
+   HLWORD  *wrd=prs->words;
+
+   out = (text*)palloc( len );
+   ptr=((char*)out) + VARHDRSZ;
+
+   while( wrd - prs->words < prs->curwords ) {
+       while (  wrd->len + prs->stopsellen + prs->startsellen + (ptr - ((char*)out)) >= len ) {
+           int dist = ptr - ((char*)out);
+           len*= 2;
+           out = (text *) repalloc(out, len);
+           ptr=((char*)out) + dist;
+       }
+
+       if ( wrd->in && !wrd->skip && !wrd->repeated ) {
+           if ( wrd->replace ) {
+               *ptr=' ';
+               ptr++;
+           } else {
+               if (wrd->selected) {
+                   memcpy(ptr,prs->startsel,prs->startsellen);
+                   ptr+=prs->startsellen;
+               }
+               memcpy(ptr,wrd->word,wrd->len);
+               ptr+=wrd->len;
+               if (wrd->selected) {
+                   memcpy(ptr,prs->stopsel,prs->stopsellen);
+                   ptr+=prs->stopsellen;
+               }
+           }
+       }
+
+       if ( !wrd->repeated )
+           pfree(wrd->word);
+
+       wrd++;
+   }
+
+   VARATT_SIZEP(out)=ptr - ((char*)out);
+   return out; 
+}
+
+int  
+get_currcfg(void) {
+   Oid arg[1]={ TEXTOID };
+   const char *curlocale;
+   Datum pars[1];
+   bool isnull;
+   int stat;
+
+   if ( current_cfg_id > 0 )
+       return current_cfg_id;
+
+   SPI_connect();
+   if ( !plan_getcfg_bylocale ) {
+       plan_getcfg_bylocale=SPI_saveplan( SPI_prepare( "select oid from pg_ts_cfg where locale = $1 ", 1, arg ) );
+       if ( !plan_getcfg_bylocale )
+           elog(ERROR, "SPI_prepare() failed");
+   }
+
+   curlocale = setlocale(LC_CTYPE, NULL);
+   pars[0] = PointerGetDatum( char2text((char*)curlocale) );
+   stat = SPI_execp(plan_getcfg_bylocale, pars, " ", 1);
+
+   if ( stat < 0 )
+       elog (ERROR, "SPI_execp return %d", stat);
+   if ( SPI_processed > 0 )
+       current_cfg_id = DatumGetObjectId( SPI_getbinval(SPI_tuptable->vals[0], SPI_tuptable->tupdesc, 1, &isnull) );
+   else 
+       elog(ERROR,"Can't find tsearch config by locale");
+
+   pfree(DatumGetPointer(pars[0]));
+   SPI_finish();
+   return current_cfg_id;
+}
+
+PG_FUNCTION_INFO_V1(set_curcfg);
+Datum set_curcfg(PG_FUNCTION_ARGS);
+Datum
+set_curcfg(PG_FUNCTION_ARGS) {
+        findcfg(PG_GETARG_OID(0));
+        current_cfg_id=PG_GETARG_OID(0);
+        PG_RETURN_VOID();
+}
+                
+PG_FUNCTION_INFO_V1(set_curcfg_byname);
+Datum set_curcfg_byname(PG_FUNCTION_ARGS);
+Datum
+set_curcfg_byname(PG_FUNCTION_ARGS) {
+        text *name=PG_GETARG_TEXT_P(0);
+   
+        DirectFunctionCall1(
+                set_curcfg,
+                ObjectIdGetDatum( name2id_cfg(name) )
+        );
+        PG_FREE_IF_COPY(name, 0);
+        PG_RETURN_VOID();      
+}       
+
+PG_FUNCTION_INFO_V1(show_curcfg);
+Datum show_curcfg(PG_FUNCTION_ARGS);
+Datum
+show_curcfg(PG_FUNCTION_ARGS) {
+   PG_RETURN_OID( get_currcfg() ); 
+}
+
+PG_FUNCTION_INFO_V1(reset_tsearch);
+Datum reset_tsearch(PG_FUNCTION_ARGS);
+Datum
+reset_tsearch(PG_FUNCTION_ARGS) {
+   ts_error(NOTICE,"TSearch cache cleaned");
+   PG_RETURN_VOID(); 
+}


diff --git a/contrib/tsearch2/ts_cfg.h b/contrib/tsearch2/ts_cfg.h

new file mode 100644 (file)

index 0000000..01006c1


--- /dev/null
+++ b/contrib/tsearch2/ts_cfg.h
@@ -0,0 +1,68 @@
+#ifndef __TS_CFG_H__
+#define __TS_CFG_H__
+#include "postgres.h"
+#include "query.h"
+
+typedef struct {
+   int len;
+   Datum   *dict_id;
+} ListDictionary;
+
+typedef struct {
+   Oid id;
+   Oid prs_id;
+   int len;
+   ListDictionary  *map;   
+}  TSCfgInfo;
+
+Oid name2id_cfg(text *name);
+TSCfgInfo * findcfg(Oid id);
+void init_cfg(Oid id, TSCfgInfo *cfg);
+void reset_cfg(void);
+
+typedef struct {
+        uint16          len;
+   union {
+       uint16      pos;
+       uint16      *apos;
+   } pos;
+        char       *word;
+   uint32  alen;
+}       WORD;
+   
+typedef struct {
+        WORD       *words;
+        int4            lenwords;
+        int4            curwords;
+   int4        pos;
+}       PRSTEXT;
+
+typedef struct {
+        uint16    len;
+   uint8    selected:1,
+         in:1,
+         skip:1,
+         replace:1,
+         repeated:1;
+   uint8   type;
+        char      *word;
+   ITEM      *item;
+}       HLWORD;
+   
+typedef struct {
+        HLWORD       *words;
+        int4            lenwords;
+        int4            curwords;
+        char           *startsel;
+        char            *stopsel;
+        int2            startsellen;
+        int2            stopsellen;
+}       HLPRSTEXT;
+
+void hlparsetext(TSCfgInfo *cfg, HLPRSTEXT * prs, QUERYTYPE *query, char *buf, int4 buflen);
+text* genhl(HLPRSTEXT * prs);
+
+void parsetext_v2(TSCfgInfo *cfg, PRSTEXT * prs, char *buf, int4 buflen);
+int  get_currcfg(void);
+
+#endif


diff --git a/contrib/tsearch2/ts_stat.c b/contrib/tsearch2/ts_stat.c

new file mode 100644 (file)

index 0000000..9099981


--- /dev/null
+++ b/contrib/tsearch2/ts_stat.c
@@ -0,0 +1,412 @@
+/*
+ * stat functions
+ */
+
+#include "tsvector.h"
+#include "ts_stat.h"
+#include "funcapi.h"
+#include "catalog/pg_type.h"
+#include "executor/spi.h"
+#include "common.h"
+
+PG_FUNCTION_INFO_V1(tsstat_in);
+Datum           tsstat_in(PG_FUNCTION_ARGS);
+Datum           
+tsstat_in(PG_FUNCTION_ARGS) {
+   tsstat *stat=palloc(STATHDRSIZE);
+   stat->len=STATHDRSIZE;
+   stat->size=0;
+   PG_RETURN_POINTER(stat);
+}
+
+PG_FUNCTION_INFO_V1(tsstat_out);
+Datum           tsstat_out(PG_FUNCTION_ARGS);
+Datum           
+tsstat_out(PG_FUNCTION_ARGS) {
+   elog(ERROR,"Unimplemented");
+   PG_RETURN_NULL();
+}
+
+static WordEntry**
+SEI_realloc( WordEntry** in, uint32 *len ) {
+   if ( *len==0 || in==NULL ) {
+       *len=8;
+       in=palloc( sizeof(WordEntry*)* (*len) );
+   } else {
+       *len *= 2;
+       in=repalloc( in, sizeof(WordEntry*)* (*len) );
+   }
+   return in;
+}
+
+static int
+compareStatWord(StatEntry *a, WordEntry *b, tsstat *stat, tsvector *txt) {
+   if ( a->len == b->len ) 
+       return strncmp(
+           STATSTRPTR(stat) + a->pos,
+           STRPTR(txt) + b->pos,
+           a->len
+       );
+   return ( a->len > b->len ) ? 1 : -1;
+}
+
+static tsstat*
+formstat(tsstat *stat, tsvector *txt, WordEntry** entry, uint32 len) {
+   tsstat  *newstat;
+   uint32 totallen, nentry;
+   uint32  slen=0;
+   WordEntry   **ptr=entry;
+   char    *curptr;
+   StatEntry   *sptr,*nptr;
+
+   while(ptr-entry
+       slen += (*ptr)->len;
+       ptr++;
+   }
+
+   nentry=stat->size + len;
+   slen+=STATSTRSIZE(stat);
+   totallen=CALCSTATSIZE(nentry,slen);
+   newstat=palloc(totallen);
+   newstat->len=totallen;
+   newstat->size=nentry;
+
+   memcpy(STATSTRPTR(newstat), STATSTRPTR(stat), STATSTRSIZE(stat));
+   curptr=STATSTRPTR(newstat) + STATSTRSIZE(stat);
+
+   ptr=entry;
+   sptr=STATPTR(stat);
+   nptr=STATPTR(newstat);
+
+   if ( len == 1 ) {
+       StatEntry *StopLow = STATPTR(stat);
+       StatEntry *StopHigh = (StatEntry*)STATSTRPTR(stat);
+
+       while (StopLow < StopHigh) {
+           sptr=StopLow + (StopHigh - StopLow) / 2;
+           if ( compareStatWord(sptr,*ptr,stat,txt) < 0 )
+               StopLow = sptr + 1;
+           else
+               StopHigh = sptr; 
+       }
+       nptr =STATPTR(newstat) + (StopLow-STATPTR(stat));
+       memcpy( STATPTR(newstat), STATPTR(stat), sizeof(StatEntry) * (StopLow-STATPTR(stat)) );
+       nptr->nentry=POSDATALEN(txt,*ptr);
+       if ( nptr->nentry==0 )
+               nptr->nentry=1; 
+       nptr->ndoc=1;
+       nptr->len=(*ptr)->len;
+       memcpy(curptr, STRPTR(txt) + (*ptr)->pos, nptr->len);
+       nptr->pos = curptr - STATSTRPTR(newstat);
+       memcpy( nptr+1, StopLow, sizeof(StatEntry) * ( ((StatEntry*)STATSTRPTR(stat))-StopLow ) );
+   } else {
+       while( sptr-STATPTR(stat) < stat->size && ptr-entry
+           if ( compareStatWord(sptr,*ptr,stat,txt) < 0 ) {
+               memcpy(nptr, sptr, sizeof(StatEntry));
+               sptr++;
+           } else {
+               nptr->nentry=POSDATALEN(txt,*ptr);
+               if ( nptr->nentry==0 )
+                   nptr->nentry=1; 
+               nptr->ndoc=1;
+               nptr->len=(*ptr)->len;
+               memcpy(curptr, STRPTR(txt) + (*ptr)->pos, nptr->len);
+               nptr->pos = curptr - STATSTRPTR(newstat);
+               curptr += nptr->len;
+               ptr++;
+           }
+           nptr++;
+       }
+
+       memcpy( nptr, sptr, sizeof(StatEntry)*( stat->size - (sptr-STATPTR(stat)) ) ); 
+       
+       while(ptr-entry
+           nptr->nentry=POSDATALEN(txt,*ptr);
+           if ( nptr->nentry==0 )
+               nptr->nentry=1; 
+           nptr->ndoc=1;
+           nptr->len=(*ptr)->len;
+           memcpy(curptr, STRPTR(txt) + (*ptr)->pos, nptr->len);
+           nptr->pos = curptr - STATSTRPTR(newstat);
+           curptr += nptr->len;
+           ptr++; nptr++;
+       }
+   }
+
+   return newstat;
+} 
+
+PG_FUNCTION_INFO_V1(ts_accum);
+Datum           ts_accum(PG_FUNCTION_ARGS);
+Datum 
+ts_accum(PG_FUNCTION_ARGS) {
+   tsstat *newstat,*stat= (tsstat*)PG_GETARG_POINTER(0);
+   tsvector  *txt = (tsvector *) PG_DETOAST_DATUM(PG_GETARG_DATUM(1));
+   WordEntry   **newentry=NULL;
+   uint32  len=0, cur=0;
+   StatEntry   *sptr;
+   WordEntry   *wptr;
+
+   if ( stat==NULL || PG_ARGISNULL(0) ) { /* Init in first */ 
+       stat=palloc(STATHDRSIZE);
+       stat->len=STATHDRSIZE;
+       stat->size=0;
+   }
+
+   /* simple check of correctness */
+   if ( txt==NULL || PG_ARGISNULL(1) || txt->size==0 ) {
+       PG_FREE_IF_COPY(txt,1); 
+       PG_RETURN_POINTER(stat);
+   }
+
+   sptr=STATPTR(stat);
+   wptr=ARRPTR(txt);
+
+   if ( stat->size < 100*txt->size ) { /* merge */
+       while( sptr-STATPTR(stat) < stat->size && wptr-ARRPTR(txt) < txt->size ) {
+           int cmp = compareStatWord(sptr,wptr,stat,txt);
+           if ( cmp<0 ) {
+               sptr++;
+           } else if ( cmp==0 ) {
+               int n=POSDATALEN(txt,wptr);
+   
+               if (n==0) n=1;
+               sptr->ndoc++;
+               sptr->nentry +=n ;
+               sptr++; wptr++;
+           } else {
+               if ( cur==len )
+                   newentry=SEI_realloc(newentry, &len);
+               newentry[cur]=wptr;
+               wptr++; cur++;
+           }
+       }
+
+       while( wptr-ARRPTR(txt) < txt->size ) {
+           if ( cur==len )
+               newentry=SEI_realloc(newentry, &len);
+           newentry[cur]=wptr;
+           wptr++; cur++;
+       }
+   } else { /* search */
+       while( wptr-ARRPTR(txt) < txt->size ) {
+           StatEntry *StopLow = STATPTR(stat);
+           StatEntry *StopHigh = (StatEntry*)STATSTRPTR(stat);
+           int cmp;
+
+           while (StopLow < StopHigh) {
+               sptr=StopLow + (StopHigh - StopLow) / 2;
+               cmp =  compareStatWord(sptr,wptr,stat,txt);
+               if (cmp==0) {
+                   int n=POSDATALEN(txt,wptr);
+                   if (n==0) n=1;
+                   sptr->ndoc++;
+                   sptr->nentry +=n ;
+                   break;
+               } else if ( cmp < 0 )
+                   StopLow = sptr + 1;
+               else
+                   StopHigh = sptr; 
+           }
+       
+           if ( StopLow >= StopHigh ) { /* not found */
+               if ( cur==len )
+                   newentry=SEI_realloc(newentry, &len);
+               newentry[cur]=wptr;
+               cur++;
+           }
+           wptr++;
+       }   
+   }
+
+   
+   if ( cur==0 ) { /* no new words */ 
+       PG_FREE_IF_COPY(txt,1);
+       PG_RETURN_POINTER(stat);
+   }
+
+   newstat = formstat(stat, txt, newentry, cur);
+   pfree(newentry);
+   PG_FREE_IF_COPY(txt,1);
+   /* pfree(stat); */
+
+   PG_RETURN_POINTER(newstat);
+}
+
+typedef struct {
+   uint32  cur;
+   tsvector *stat;
+} StatStorage;
+
+static void
+ts_setup_firstcall(FuncCallContext  *funcctx, tsstat *stat) {
+   TupleDesc            tupdesc;
+   MemoryContext     oldcontext;
+   StatStorage     *st;
+   
+   oldcontext = MemoryContextSwitchTo(funcctx->multi_call_memory_ctx);
+   st=palloc( sizeof(StatStorage) );
+   st->cur=0;
+   st->stat=palloc( stat->len );
+   memcpy(st->stat, stat, stat->len);
+   funcctx->user_fctx = (void*)st;
+   tupdesc = RelationNameGetTupleDesc("statinfo");
+   funcctx->slot = TupleDescGetSlot(tupdesc);
+   funcctx->attinmeta = TupleDescGetAttInMetadata(tupdesc);
+   MemoryContextSwitchTo(oldcontext);
+}
+
+
+static Datum
+ts_process_call(FuncCallContext  *funcctx) {
+   StatStorage     *st;
+   st=(StatStorage*)funcctx->user_fctx;
+
+   if ( st->cur < st->stat->size ) {
+       Datum result;
+       char* values[3];
+       char    ndoc[16];
+       char    nentry[16];
+       StatEntry *entry=STATPTR(st->stat) + st->cur;
+       HeapTuple    tuple;
+
+       values[1]=ndoc;
+       sprintf(ndoc,"%d",entry->ndoc);
+       values[2]=nentry;
+       sprintf(nentry,"%d",entry->nentry);
+       values[0]=palloc( entry->len+1 );
+       memcpy( values[0], STATSTRPTR(st->stat)+entry->pos, entry->len);
+       (values[0])[entry->len]='\0';
+
+       tuple = BuildTupleFromCStrings(funcctx->attinmeta, values);
+       result = TupleGetDatum(funcctx->slot, tuple);
+
+       pfree(values[0]);
+       st->cur++;
+       return result;  
+   } else {
+       pfree(st->stat);
+       pfree(st);
+   }
+   
+   return (Datum)0;
+}
+
+PG_FUNCTION_INFO_V1(ts_accum_finish);
+Datum           ts_accum_finish(PG_FUNCTION_ARGS);
+Datum 
+ts_accum_finish(PG_FUNCTION_ARGS) {
+   FuncCallContext  *funcctx;
+   Datum result;
+
+   if (SRF_IS_FIRSTCALL()) {
+       funcctx = SRF_FIRSTCALL_INIT();
+       ts_setup_firstcall(funcctx, (tsstat*)PG_GETARG_POINTER(0) );
+   }
+
+   funcctx = SRF_PERCALL_SETUP();
+   if (  (result=ts_process_call(funcctx)) != (Datum)0 )
+       SRF_RETURN_NEXT(funcctx, result);
+   SRF_RETURN_DONE(funcctx);
+}
+
+static Oid tiOid=InvalidOid;
+static void 
+get_ti_Oid(void) {
+   int ret;
+   bool isnull; 
+
+   if ( (ret = SPI_exec("select oid from pg_type where typname='tsvector'",1)) < 0 )   
+       elog(ERROR, "SPI_exec to get tsvector oid returns %d", ret);
+
+   if ( SPI_processed<0 )
+       elog(ERROR, "There is no tsvector type");
+   tiOid = DatumGetObjectId( SPI_getbinval(SPI_tuptable->vals[0], SPI_tuptable->tupdesc, 1, &isnull) );
+   if ( tiOid==InvalidOid )
+       elog(ERROR, "tsvector type has InvalidOid");
+}
+
+static tsstat*
+ts_stat_sql(text *txt) {
+   char *query=text2char(txt);
+   int i;
+   tsstat *newstat,*stat;
+   bool isnull;
+   Portal portal;
+   void    *plan;
+
+   if ( tiOid==InvalidOid ) 
+       get_ti_Oid();
+
+   if ( (plan = SPI_prepare(query,0,NULL))==NULL )
+       elog(ERROR, "SPI_prepare('%s') returns NULL",query);
+
+   if ( (portal = SPI_cursor_open(NULL, plan, NULL, NULL)) == NULL )
+       elog(ERROR, "SPI_cursor_open('%s') returns NULL",query);
+
+   SPI_cursor_fetch(portal, true, 100);
+
+   if ( SPI_tuptable->tupdesc->natts != 1 )
+       elog(ERROR, "Number of fields doesn't equal to 1");
+
+   if ( SPI_gettypeid(SPI_tuptable->tupdesc, 1) != tiOid )
+       elog(ERROR, "Column isn't of tsvector type");
+
+   stat=palloc(STATHDRSIZE);
+   stat->len=STATHDRSIZE;
+   stat->size=0;
+
+   while(SPI_processed>0) {
+       for(i=0;i
+           Datum data=SPI_getbinval(SPI_tuptable->vals[i], SPI_tuptable->tupdesc, 1, &isnull);
+
+           if ( !isnull ) {
+               newstat = (tsstat*)DatumGetPointer(DirectFunctionCall2(
+                   ts_accum,
+                   PointerGetDatum(stat),
+                   data
+               ));
+               if ( stat!=newstat && stat )
+                   pfree(stat);
+               stat=newstat;
+           }
+       } 
+
+       SPI_freetuptable(SPI_tuptable);
+       SPI_cursor_fetch(portal, true, 100);        
+   }   
+
+   SPI_freetuptable(SPI_tuptable);
+   SPI_cursor_close(portal);
+   SPI_freeplan(plan);
+   pfree(query);
+
+   return stat;    
+}
+
+PG_FUNCTION_INFO_V1(ts_stat);
+Datum           ts_stat(PG_FUNCTION_ARGS);
+Datum 
+ts_stat(PG_FUNCTION_ARGS) {
+   FuncCallContext  *funcctx;
+   Datum result;
+
+   if (SRF_IS_FIRSTCALL()) {
+       tsstat *stat;
+       text    *txt=PG_GETARG_TEXT_P(0);
+   
+       funcctx = SRF_FIRSTCALL_INIT();
+       SPI_connect();
+       stat = ts_stat_sql(txt);
+       PG_FREE_IF_COPY(txt,0); 
+       ts_setup_firstcall(funcctx, stat );
+       SPI_finish();
+   }
+
+   funcctx = SRF_PERCALL_SETUP();
+   if (  (result=ts_process_call(funcctx)) != (Datum)0 )
+       SRF_RETURN_NEXT(funcctx, result);
+   SRF_RETURN_DONE(funcctx);
+}
+
+


diff --git a/contrib/tsearch2/ts_stat.h b/contrib/tsearch2/ts_stat.h

new file mode 100644 (file)

index 0000000..c32b17a


--- /dev/null
+++ b/contrib/tsearch2/ts_stat.h
@@ -0,0 +1,32 @@
+#ifndef __TXTIDX_STAT_H__
+#define __TXTIDX_STAT_H__
+
+#include "postgres.h"
+
+#include "access/gist.h"
+#include "access/itup.h"
+#include "utils/elog.h"
+#include "utils/palloc.h"
+#include "utils/builtins.h"
+#include "storage/bufpage.h"
+
+typedef struct {
+   uint32  len;
+   uint32  pos;
+   uint32  ndoc;   
+   uint32  nentry; 
+}  StatEntry;
+
+typedef struct {
+   int4        len;
+   int4        size;
+   char        data[1];
+}  tsstat;
+
+#define STATHDRSIZE (sizeof(int4)*2)
+#define CALCSTATSIZE(x, lenstr) ( x * sizeof(StatEntry) + STATHDRSIZE + lenstr )
+#define STATPTR(x) ( (StatEntry*) ( (char*)x + STATHDRSIZE ) )
+#define STATSTRPTR(x)  ( (char*)x + STATHDRSIZE + ( sizeof(StatEntry) * ((tsvector*)x)->size ) )
+#define STATSTRSIZE(x) ( ((tsvector*)x)->len - STATHDRSIZE - ( sizeof(StatEntry) * ((tsvector*)x)->size ) )
+
+#endif


diff --git a/contrib/tsearch2/tsearch.sql._in b/contrib/tsearch2/tsearch.sql._in

new file mode 100644 (file)

index 0000000..91ffbc8


--- /dev/null
+++ b/contrib/tsearch2/tsearch.sql._in
@@ -0,0 +1,674 @@
+-- Adjust this setting to control where the objects get CREATEd.
+SET search_path = public;
+
+BEGIN;
+
+--dict conf
+CREATE TABLE pg_ts_dict (
+   dict_name   text not null primary key,
+   dict_init   oid,
+   dict_initoption text,
+   dict_lexize oid not null,
+   dict_comment    text
+) with oids;
+
+--dict interface
+CREATE FUNCTION lexize(oid, text) 
+   returns _text
+   as 'MODULE_PATHNAME'
+   language 'C'
+   with (isstrict);
+
+CREATE FUNCTION lexize(text, text)
+        returns _text
+        as 'MODULE_PATHNAME', 'lexize_byname'
+        language 'C'
+        with (isstrict);
+
+CREATE FUNCTION lexize(text)
+        returns _text
+        as 'MODULE_PATHNAME', 'lexize_bycurrent'
+        language 'C'
+        with (isstrict);
+
+CREATE FUNCTION set_curdict(int)
+   returns void
+   as 'MODULE_PATHNAME'
+   language 'C'
+   with (isstrict);
+
+CREATE FUNCTION set_curdict(text)
+   returns void
+   as 'MODULE_PATHNAME', 'set_curdict_byname'
+   language 'C'
+   with (isstrict);
+
+--built-in dictionaries
+CREATE FUNCTION dex_init(text)
+   returns internal
+   as 'MODULE_PATHNAME' 
+   language 'C';
+
+CREATE FUNCTION dex_lexize(internal,internal,int4)
+   returns internal
+   as 'MODULE_PATHNAME'
+   language 'C'
+   with (isstrict);
+
+insert into pg_ts_dict select 
+   'simple', 
+   (select oid from pg_proc where proname='dex_init'),
+   null,
+   (select oid from pg_proc where proname='dex_lexize'),
+   'Simple example of dictionary.'
+;
+    
+CREATE FUNCTION snb_en_init(text)
+   returns internal
+   as 'MODULE_PATHNAME' 
+   language 'C';
+
+CREATE FUNCTION snb_lexize(internal,internal,int4)
+   returns internal
+   as 'MODULE_PATHNAME'
+   language 'C'
+   with (isstrict);
+
+insert into pg_ts_dict select 
+   'en_stem', 
+   (select oid from pg_proc where proname='snb_en_init'),
+   'DATA_PATH/english.stop',
+   (select oid from pg_proc where proname='snb_lexize'),
+   'English Stemmer. Snowball.'
+;
+
+CREATE FUNCTION snb_ru_init(text)
+   returns internal
+   as 'MODULE_PATHNAME' 
+   language 'C';
+
+insert into pg_ts_dict select 
+   'ru_stem', 
+   (select oid from pg_proc where proname='snb_ru_init'),
+   'DATA_PATH/russian.stop',
+   (select oid from pg_proc where proname='snb_lexize'),
+   'Russian Stemmer. Snowball.'
+;
+    
+CREATE FUNCTION spell_init(text)
+   returns internal
+   as 'MODULE_PATHNAME' 
+   language 'C';
+
+CREATE FUNCTION spell_lexize(internal,internal,int4)
+   returns internal
+   as 'MODULE_PATHNAME'
+   language 'C'
+   with (isstrict);
+
+insert into pg_ts_dict select 
+   'ispell_template', 
+   (select oid from pg_proc where proname='spell_init'),
+   null,
+   (select oid from pg_proc where proname='spell_lexize'),
+   'ISpell interface. Must have .dict and .aff files'
+;
+
+CREATE FUNCTION syn_init(text)
+   returns internal
+   as 'MODULE_PATHNAME' 
+   language 'C';
+
+CREATE FUNCTION syn_lexize(internal,internal,int4)
+   returns internal
+   as 'MODULE_PATHNAME'
+   language 'C'
+   with (isstrict);
+
+insert into pg_ts_dict select 
+   'synonym', 
+   (select oid from pg_proc where proname='syn_init'),
+   null,
+   (select oid from pg_proc where proname='syn_lexize'),
+   'Example of synonym dictionary'
+;
+
+--dict conf
+CREATE TABLE pg_ts_parser (
+   prs_name    text not null primary key,
+   prs_start   oid not null,
+   prs_nexttoken   oid not null,
+   prs_end     oid not null,
+   prs_headline    oid not null,
+   prs_lextype oid not null,
+   prs_comment text
+) with oids;
+
+--sql-level interface
+CREATE TYPE tokentype 
+   as (tokid int4, alias text, descr text); 
+
+CREATE FUNCTION token_type(int4)
+   returns setof tokentype
+   as 'MODULE_PATHNAME'
+   language 'C'
+   with (isstrict);
+
+CREATE FUNCTION token_type(text)
+   returns setof tokentype
+   as 'MODULE_PATHNAME', 'token_type_byname'
+   language 'C'
+   with (isstrict);
+
+CREATE FUNCTION token_type()
+   returns setof tokentype
+   as 'MODULE_PATHNAME', 'token_type_current'
+   language 'C'
+   with (isstrict);
+
+CREATE FUNCTION set_curprs(int)
+   returns void
+   as 'MODULE_PATHNAME'
+   language 'C'
+   with (isstrict);
+
+CREATE FUNCTION set_curprs(text)
+   returns void
+   as 'MODULE_PATHNAME', 'set_curprs_byname'
+   language 'C'
+   with (isstrict);
+
+CREATE TYPE tokenout 
+   as (tokid int4, token text);
+
+CREATE FUNCTION parse(oid,text)
+   returns setof tokenout
+   as 'MODULE_PATHNAME'
+   language 'C'
+   with (isstrict);
+ 
+CREATE FUNCTION parse(text,text)
+   returns setof tokenout
+   as 'MODULE_PATHNAME', 'parse_byname'
+   language 'C'
+   with (isstrict);
+ 
+CREATE FUNCTION parse(text)
+   returns setof tokenout
+   as 'MODULE_PATHNAME', 'parse_current'
+   language 'C'
+   with (isstrict);
+ 
+--default parser
+CREATE FUNCTION prsd_start(internal,int4)
+   returns internal
+   as 'MODULE_PATHNAME'
+   language 'C';
+
+CREATE FUNCTION prsd_getlexeme(internal,internal,internal)
+   returns int4
+   as 'MODULE_PATHNAME'
+   language 'C';
+
+CREATE FUNCTION prsd_end(internal)
+   returns void
+   as 'MODULE_PATHNAME'
+   language 'C';
+
+CREATE FUNCTION prsd_lextype(internal)
+   returns internal
+   as 'MODULE_PATHNAME'
+   language 'C';
+
+CREATE FUNCTION prsd_headline(internal,internal,internal)
+   returns internal
+   as 'MODULE_PATHNAME'
+   language 'C';
+
+insert into pg_ts_parser select
+   'default',
+   (select oid from pg_proc where proname='prsd_start'),   
+   (select oid from pg_proc where proname='prsd_getlexeme'),   
+   (select oid from pg_proc where proname='prsd_end'), 
+   (select oid from pg_proc where proname='prsd_headline'),
+   (select oid from pg_proc where proname='prsd_lextype'),
+   'Parser from OpenFTS v0.34'
+;  
+
+--tsearch config
+
+CREATE TABLE pg_ts_cfg (
+   ts_name     text not null primary key,
+   prs_name    text not null,
+   locale      text
+) with oids;
+
+CREATE TABLE pg_ts_cfgmap (
+   ts_name     text not null,
+   tok_alias   text not null,
+   dict_name   text[],
+   primary key (ts_name,tok_alias)
+) with oids;
+
+CREATE FUNCTION set_curcfg(int)
+   returns void
+   as 'MODULE_PATHNAME'
+   language 'C'
+   with (isstrict);
+
+CREATE FUNCTION set_curcfg(text)
+   returns void
+   as 'MODULE_PATHNAME', 'set_curcfg_byname'
+   language 'C'
+   with (isstrict);
+
+CREATE FUNCTION show_curcfg()
+   returns oid
+   as 'MODULE_PATHNAME'
+   language 'C'
+   with (isstrict);
+
+insert into pg_ts_cfg values ('default', 'default','C');
+insert into pg_ts_cfg values ('default_russian', 'default','ru_RU.KOI8-R');
+insert into pg_ts_cfg values ('simple', 'default');
+
+copy pg_ts_cfgmap from stdin;
+default    lword   {en_stem}
+default    nlword  {simple}
+default    word    {simple}
+default    email   {simple}
+default    url {simple}
+default    host    {simple}
+default    sfloat  {simple}
+default    version {simple}
+default    part_hword  {simple}
+default    nlpart_hword    {simple}
+default    lpart_hword {en_stem}
+default    hword   {simple}
+default    lhword  {en_stem}
+default    nlhword {simple}
+default    uri {simple}
+default    file    {simple}
+default    float   {simple}
+default    int {simple}
+default    uint    {simple}
+default_russian    lword   {en_stem}
+default_russian    nlword  {ru_stem}
+default_russian    word    {ru_stem}
+default_russian    email   {simple}
+default_russian    url {simple}
+default_russian    host    {simple}
+default_russian    sfloat  {simple}
+default_russian    version {simple}
+default_russian    part_hword  {simple}
+default_russian    nlpart_hword    {ru_stem}
+default_russian    lpart_hword {en_stem}
+default_russian    hword   {ru_stem}
+default_russian    lhword  {en_stem}
+default_russian    nlhword {ru_stem}
+default_russian    uri {simple}
+default_russian    file    {simple}
+default_russian    float   {simple}
+default_russian    int {simple}
+default_russian    uint    {simple}
+simple lword   {simple}
+simple nlword  {simple}
+simple word    {simple}
+simple email   {simple}
+simple url {simple}
+simple host    {simple}
+simple sfloat  {simple}
+simple version {simple}
+simple part_hword  {simple}
+simple nlpart_hword    {simple}
+simple lpart_hword {simple}
+simple hword   {simple}
+simple lhword  {simple}
+simple nlhword {simple}
+simple uri {simple}
+simple file    {simple}
+simple float   {simple}
+simple int {simple}
+simple uint    {simple}
+\.
+
+--tsvector type
+CREATE FUNCTION tsvector_in(cstring)
+RETURNS tsvector
+AS 'MODULE_PATHNAME'
+LANGUAGE 'C' with (isstrict);
+
+CREATE FUNCTION tsvector_out(tsvector)
+RETURNS cstring
+AS 'MODULE_PATHNAME'
+LANGUAGE 'C' with (isstrict);
+
+CREATE TYPE tsvector (
+        INTERNALLENGTH = -1,
+        INPUT = tsvector_in,
+        OUTPUT = tsvector_out,
+        STORAGE = extended
+);
+
+CREATE FUNCTION length(tsvector)
+RETURNS int4
+AS 'MODULE_PATHNAME', 'tsvector_length'
+LANGUAGE 'C' with (isstrict,iscachable);
+
+CREATE FUNCTION to_tsvector(oid, text)
+RETURNS tsvector
+AS 'MODULE_PATHNAME'
+LANGUAGE 'C' with (isstrict,iscachable);
+
+CREATE FUNCTION to_tsvector(text, text)
+RETURNS tsvector
+AS 'MODULE_PATHNAME', 'to_tsvector_name'
+LANGUAGE 'C' with (isstrict,iscachable);
+
+CREATE FUNCTION to_tsvector(text)
+RETURNS tsvector
+AS 'MODULE_PATHNAME', 'to_tsvector_current'
+LANGUAGE 'C' with (isstrict,iscachable);
+
+CREATE FUNCTION strip(tsvector)
+RETURNS tsvector
+AS 'MODULE_PATHNAME'
+LANGUAGE 'C' with (isstrict,iscachable);
+
+CREATE FUNCTION setweight(tsvector,"char")
+RETURNS tsvector
+AS 'MODULE_PATHNAME'
+LANGUAGE 'C' with (isstrict,iscachable);
+
+CREATE FUNCTION concat(tsvector,tsvector)
+RETURNS tsvector
+AS 'MODULE_PATHNAME'
+LANGUAGE 'C' with (isstrict,iscachable);
+
+CREATE OPERATOR || (
+        LEFTARG = tsvector,
+        RIGHTARG = tsvector,
+        PROCEDURE = concat
+);
+
+--query type
+CREATE FUNCTION tsquery_in(cstring)
+RETURNS tsquery
+AS 'MODULE_PATHNAME'
+LANGUAGE 'C' with (isstrict);
+
+CREATE FUNCTION tsquery_out(tsquery)
+RETURNS cstring
+AS 'MODULE_PATHNAME'
+LANGUAGE 'C' with (isstrict);
+
+CREATE TYPE tsquery (
+        INTERNALLENGTH = -1,
+        INPUT = tsquery_in,
+        OUTPUT = tsquery_out
+);
+
+CREATE FUNCTION querytree(tsquery)
+RETURNS text
+AS 'MODULE_PATHNAME', 'tsquerytree'
+LANGUAGE 'C' with (isstrict);
+
+CREATE FUNCTION to_tsquery(oid, text)
+RETURNS tsquery
+AS 'MODULE_PATHNAME'
+LANGUAGE 'c' with (isstrict,iscachable);
+
+CREATE FUNCTION to_tsquery(text, text)
+RETURNS tsquery
+AS 'MODULE_PATHNAME','to_tsquery_name'
+LANGUAGE 'c' with (isstrict,iscachable);
+
+CREATE FUNCTION to_tsquery(text)
+RETURNS tsquery
+AS 'MODULE_PATHNAME','to_tsquery_current'
+LANGUAGE 'c' with (isstrict,iscachable);
+
+--operations
+CREATE FUNCTION exectsq(tsvector, tsquery)
+RETURNS bool
+AS 'MODULE_PATHNAME'
+LANGUAGE 'C' with (isstrict, iscachable);
+  
+COMMENT ON FUNCTION exectsq(tsvector, tsquery) IS 'boolean operation with text index';
+
+CREATE FUNCTION rexectsq(tsquery, tsvector)
+RETURNS bool
+AS 'MODULE_PATHNAME'
+LANGUAGE 'C' with (isstrict, iscachable);
+
+COMMENT ON FUNCTION rexectsq(tsquery, tsvector) IS 'boolean operation with text index';
+
+CREATE OPERATOR @@ (
+        LEFTARG = tsvector,
+        RIGHTARG = tsquery,
+        PROCEDURE = exectsq,
+        COMMUTATOR = '@@',
+        RESTRICT = contsel,
+        JOIN = contjoinsel
+);
+CREATE OPERATOR @@ (
+        LEFTARG = tsquery,
+        RIGHTARG = tsvector,
+        PROCEDURE = rexectsq,
+        COMMUTATOR = '@@',
+        RESTRICT = contsel,
+        JOIN = contjoinsel
+);
+
+--Trigger
+CREATE FUNCTION tsearch2()
+RETURNS trigger
+AS 'MODULE_PATHNAME'
+LANGUAGE 'C';
+
+--Relevation
+CREATE FUNCTION rank(float4[], tsvector, tsquery)
+RETURNS float4
+AS 'MODULE_PATHNAME'
+LANGUAGE 'C' WITH (isstrict, iscachable);
+
+CREATE FUNCTION rank(float4[], tsvector, tsquery, int4)
+RETURNS float4
+AS 'MODULE_PATHNAME'
+LANGUAGE 'C' WITH (isstrict, iscachable);
+
+CREATE FUNCTION rank(tsvector, tsquery)
+RETURNS float4
+AS 'MODULE_PATHNAME', 'rank_def'
+LANGUAGE 'C' WITH (isstrict, iscachable);
+
+CREATE FUNCTION rank(tsvector, tsquery, int4)
+RETURNS float4
+AS 'MODULE_PATHNAME', 'rank_def'
+LANGUAGE 'C' WITH (isstrict, iscachable);
+
+CREATE FUNCTION rank_cd(int4, tsvector, tsquery)
+RETURNS float4
+AS 'MODULE_PATHNAME'
+LANGUAGE 'C' WITH (isstrict, iscachable);
+
+CREATE FUNCTION rank_cd(int4, tsvector, tsquery, int4)
+RETURNS float4
+AS 'MODULE_PATHNAME'
+LANGUAGE 'C' WITH (isstrict, iscachable);
+
+CREATE FUNCTION rank_cd(tsvector, tsquery)
+RETURNS float4
+AS 'MODULE_PATHNAME', 'rank_cd_def'
+LANGUAGE 'C' WITH (isstrict, iscachable);
+
+CREATE FUNCTION rank_cd(tsvector, tsquery, int4)
+RETURNS float4
+AS 'MODULE_PATHNAME', 'rank_cd_def'
+LANGUAGE 'C' WITH (isstrict, iscachable);
+
+CREATE FUNCTION headline(oid, text, tsquery, text)
+RETURNS text
+AS 'MODULE_PATHNAME', 'headline'
+LANGUAGE 'C' WITH (isstrict, iscachable);
+
+CREATE FUNCTION headline(oid, text, tsquery)
+RETURNS text
+AS 'MODULE_PATHNAME', 'headline'
+LANGUAGE 'C' WITH (isstrict, iscachable);
+
+CREATE FUNCTION headline(text, text, tsquery, text)
+RETURNS text
+AS 'MODULE_PATHNAME', 'headline_byname'
+LANGUAGE 'C' WITH (isstrict, iscachable);
+
+CREATE FUNCTION headline(text, text, tsquery)
+RETURNS text
+AS 'MODULE_PATHNAME', 'headline_byname'
+LANGUAGE 'C' WITH (isstrict, iscachable);
+
+CREATE FUNCTION headline(text, tsquery, text)
+RETURNS text
+AS 'MODULE_PATHNAME', 'headline_current'
+LANGUAGE 'C' WITH (isstrict, iscachable);
+
+CREATE FUNCTION headline(text, tsquery)
+RETURNS text
+AS 'MODULE_PATHNAME', 'headline_current'
+LANGUAGE 'C' WITH (isstrict, iscachable);
+
+--GiST
+--GiST key type 
+CREATE FUNCTION gtsvector_in(cstring)
+RETURNS gtsvector
+AS 'MODULE_PATHNAME'
+LANGUAGE 'C' with (isstrict);
+
+CREATE FUNCTION gtsvector_out(gtsvector)
+RETURNS cstring
+AS 'MODULE_PATHNAME'
+LANGUAGE 'C' with (isstrict);
+
+CREATE TYPE gtsvector (
+        INTERNALLENGTH = -1,
+        INPUT = gtsvector_in,
+        OUTPUT = gtsvector_out
+);
+
+-- support FUNCTIONs
+CREATE FUNCTION gtsvector_consistent(gtsvector,internal,int4)
+RETURNS bool
+AS 'MODULE_PATHNAME'
+LANGUAGE 'C';
+  
+CREATE FUNCTION gtsvector_compress(internal)
+RETURNS internal
+AS 'MODULE_PATHNAME'
+LANGUAGE 'C';
+
+CREATE FUNCTION gtsvector_decompress(internal)
+RETURNS internal
+AS 'MODULE_PATHNAME'
+LANGUAGE 'C';
+
+CREATE FUNCTION gtsvector_penalty(internal,internal,internal)
+RETURNS internal
+AS 'MODULE_PATHNAME'
+LANGUAGE 'C' with (isstrict);
+
+CREATE FUNCTION gtsvector_picksplit(internal, internal)
+RETURNS internal
+AS 'MODULE_PATHNAME'
+LANGUAGE 'C';
+
+CREATE FUNCTION gtsvector_union(bytea, internal)
+RETURNS _int4
+AS 'MODULE_PATHNAME'
+LANGUAGE 'C';
+
+CREATE FUNCTION gtsvector_same(gtsvector, gtsvector, internal)
+RETURNS internal
+AS 'MODULE_PATHNAME'
+LANGUAGE 'C';
+
+-- CREATE the OPERATOR class
+CREATE OPERATOR CLASS gist_tsvector_ops
+DEFAULT FOR TYPE tsvector USING gist
+AS
+        OPERATOR        1       @@ (tsvector, tsquery)  RECHECK ,
+        FUNCTION        1       gtsvector_consistent (gtsvector, internal, int4),
+        FUNCTION        2       gtsvector_union (bytea, internal),
+        FUNCTION        3       gtsvector_compress (internal),
+        FUNCTION        4       gtsvector_decompress (internal),
+        FUNCTION        5       gtsvector_penalty (internal, internal, internal),
+        FUNCTION        6       gtsvector_picksplit (internal, internal),
+        FUNCTION        7       gtsvector_same (gtsvector, gtsvector, internal),
+        STORAGE         gtsvector;
+
+
+--stat info
+CREATE TYPE statinfo 
+   as (word text, ndoc int4, nentry int4);
+
+--REATE FUNCTION tsstat_in(cstring)
+--RETURNS tsstat
+--AS 'MODULE_PATHNAME'
+--LANGUAGE 'C' with (isstrict);
+--
+--CREATE FUNCTION tsstat_out(tsstat)
+--RETURNS cstring
+--AS 'MODULE_PATHNAME'
+--LANGUAGE 'C' with (isstrict);
+--
+--CREATE TYPE tsstat (
+--        INTERNALLENGTH = -1,
+--        INPUT = tsstat_in,
+--        OUTPUT = tsstat_out,
+--        STORAGE = plain
+--);
+--
+--CREATE FUNCTION ts_accum(tsstat,tsvector)
+--RETURNS tsstat
+--AS 'MODULE_PATHNAME'
+--LANGUAGE 'C' with (isstrict);
+--
+--CREATE FUNCTION ts_accum_finish(tsstat)
+-- returns setof statinfo
+-- as 'MODULE_PATHNAME'
+-- language 'C'
+-- with (isstrict);
+--
+--CREATE AGGREGATE stat (
+-- BASETYPE=tsvector,
+-- SFUNC=ts_accum,
+-- STYPE=tsstat,
+-- FINALFUNC = ts_accum_finish,
+-- initcond = ''
+--); 
+
+CREATE FUNCTION stat(text)
+   returns setof statinfo
+   as 'MODULE_PATHNAME', 'ts_stat'
+   language 'C'
+   with (isstrict);
+
+--reset - just for debuging
+CREATE FUNCTION reset_tsearch()
+        returns void
+        as 'MODULE_PATHNAME'
+        language 'C'
+        with (isstrict);
+
+--get cover (debug for rank_cd)
+CREATE FUNCTION get_covers(tsvector,tsquery)
+        returns text
+        as 'MODULE_PATHNAME'
+        language 'C'
+        with (isstrict);
+
+
+--example of ISpell dictionary
+--update pg_ts_dict set dict_initoption='DictFile="/usr/local/share/ispell/russian.dict" ,AffFile ="/usr/local/share/ispell/russian.aff", StopFile="/usr/local/share/ispell/russian.stop"' where dict_id=4;
+--example of synonym dict
+--update pg_ts_dict set dict_initoption='/usr/local/share/ispell/english.syn' where dict_id=5;
+END;


diff --git a/contrib/tsearch2/tsvector.c b/contrib/tsearch2/tsvector.c

new file mode 100644 (file)

index 0000000..ff0794d


--- /dev/null
+++ b/contrib/tsearch2/tsvector.c
@@ -0,0 +1,804 @@
+/*
+ * In/Out definitions for tsvector type
+ * Internal structure:
+ * string of values, array of position lexem in string and it's length
+ * Teodor Sigaev 
+ */
+#include "postgres.h"
+
+#include "access/gist.h"
+#include "access/itup.h"
+#include "utils/elog.h"
+#include "utils/palloc.h"
+#include "utils/builtins.h"
+#include "storage/bufpage.h"
+#include "executor/spi.h"
+#include "commands/trigger.h"
+#include "nodes/pg_list.h"
+#include "catalog/namespace.h"
+
+#include "utils/pg_locale.h"
+
+#include              /* tolower */
+#include "tsvector.h"
+#include "query.h"
+#include "ts_cfg.h"
+#include "common.h"
+
+PG_FUNCTION_INFO_V1(tsvector_in);
+Datum      tsvector_in(PG_FUNCTION_ARGS);
+
+PG_FUNCTION_INFO_V1(tsvector_out);
+Datum      tsvector_out(PG_FUNCTION_ARGS);
+
+PG_FUNCTION_INFO_V1(to_tsvector);
+Datum      to_tsvector(PG_FUNCTION_ARGS);
+PG_FUNCTION_INFO_V1(to_tsvector_current);
+Datum      to_tsvector_current(PG_FUNCTION_ARGS);
+PG_FUNCTION_INFO_V1(to_tsvector_name);
+Datum      to_tsvector_name(PG_FUNCTION_ARGS);
+
+PG_FUNCTION_INFO_V1(tsearch2);
+Datum      tsearch2(PG_FUNCTION_ARGS);
+
+PG_FUNCTION_INFO_V1(tsvector_length);
+Datum      tsvector_length(PG_FUNCTION_ARGS);
+
+/*
+ * in/out text index type
+ */
+static int 
+comparePos(const void *a, const void *b) {
+   if ( ((WordEntryPos *) a)->pos == ((WordEntryPos *) b)->pos )
+       return 1;
+   return ( ((WordEntryPos *) a)->pos > ((WordEntryPos *) b)->pos ) ? 1 : -1;
+}
+
+static int
+uniquePos(WordEntryPos *a, int4 l) {
+   WordEntryPos *ptr, *res;
+
+   res=a;
+   if (l==1)
+       return l;
+
+   qsort((void *) a, l, sizeof(WordEntryPos), comparePos);
+
+   ptr = a + 1;
+   while (ptr - a < l) {
+       if ( ptr->pos != res->pos ) {
+           res++;
+           res->pos = ptr->pos;
+           res->weight = ptr->weight;
+           if ( res-a >= MAXNUMPOS-1 || res->pos == MAXENTRYPOS-1 )
+               break;
+       } else if ( ptr->weight > res->weight )
+           res->weight = ptr->weight;
+       ptr++;
+   }
+   return res + 1 - a;
+}
+
+static char *BufferStr;
+static int
+compareentry(const void *a, const void *b)
+{
+   if ( ((WordEntryIN *) a)->entry.len == ((WordEntryIN *) b)->entry.len)
+   {
+       return strncmp(
+                      &BufferStr[((WordEntryIN *) a)->entry.pos],
+                      &BufferStr[((WordEntryIN *) b)->entry.pos],
+                      ((WordEntryIN *) a)->entry.len);
+   }
+   return ( ((WordEntryIN *) a)->entry.len > ((WordEntryIN *) b)->entry.len ) ? 1 : -1;
+}
+
+static int
+uniqueentry(WordEntryIN * a, int4 l, char *buf, int4 *outbuflen)
+{
+   WordEntryIN  *ptr,
+              *res;
+
+   res = a;
+   if (l == 1) {
+       if ( a->entry.haspos ) {
+           *(uint16*)(a->pos) = uniquePos( &(a->pos[1]), *(uint16*)(a->pos));
+           *outbuflen = SHORTALIGN(res->entry.len) + (*(uint16*)(a->pos) +1 )*sizeof(WordEntryPos);
+       }
+       return l;
+   }
+
+   ptr = a + 1;
+   BufferStr = buf;
+   qsort((void *) a, l, sizeof(WordEntryIN), compareentry);
+
+   while (ptr - a < l)
+   {
+       if (!(ptr->entry.len == res->entry.len &&
+             strncmp(&buf[ptr->entry.pos], &buf[res->entry.pos], res->entry.len) == 0))
+       {
+           if ( res->entry.haspos ) {
+               *(uint16*)(res->pos) = uniquePos( &(res->pos[1]), *(uint16*)(res->pos));
+               *outbuflen += *(uint16*)(res->pos) * sizeof(WordEntryPos);
+           }
+           *outbuflen += SHORTALIGN(res->entry.len);
+           res++;
+           memcpy(res,ptr,sizeof(WordEntryIN));
+       } else if ( ptr->entry.haspos ){
+           if ( res->entry.haspos ) {
+               int4 len=*(uint16*)(ptr->pos) + 1 + *(uint16*)(res->pos);
+               res->pos=(WordEntryPos*)repalloc( res->pos, len*sizeof(WordEntryPos));
+               memcpy( &(res->pos[ *(uint16*)(res->pos) + 1 ]), 
+                   &(ptr->pos[1]), *(uint16*)(ptr->pos) * sizeof(WordEntryPos));
+               *(uint16*)(res->pos) += *(uint16*)(ptr->pos);
+               pfree( ptr->pos );
+           } else {
+               res->entry.haspos=1;
+               res->pos = ptr->pos;
+           }
+       }
+       ptr++;
+   }
+   if ( res->entry.haspos ) {
+       *(uint16*)(res->pos) = uniquePos( &(res->pos[1]), *(uint16*)(res->pos));
+       *outbuflen += *(uint16*)(res->pos) * sizeof(WordEntryPos);
+   }
+   *outbuflen += SHORTALIGN(res->entry.len);
+
+   return res + 1 - a;
+}
+
+#define WAITWORD   1
+#define WAITENDWORD 2
+#define WAITNEXTCHAR   3
+#define WAITENDCMPLX   4
+#define WAITPOSINFO    5
+#define INPOSINFO  6
+#define WAITPOSDELIM   7
+
+#define RESIZEPRSBUF \
+do { \
+   if ( state->curpos - state->word + 1 >= state->len ) \
+   { \
+       int4 clen = state->curpos - state->word; \
+       state->len *= 2; \
+       state->word = (char*)repalloc( (void*)state->word, state->len ); \
+       state->curpos = state->word + clen; \
+   } \
+} while (0)
+
+int4
+gettoken_tsvector(TI_IN_STATE * state)
+{
+   int4        oldstate = 0;
+
+   state->curpos = state->word;
+   state->state = WAITWORD;
+   state->alen=0;
+
+   while (1)
+   {
+       if (state->state == WAITWORD)
+       {
+           if (*(state->prsbuf) == '\0')
+               return 0;
+           else if (*(state->prsbuf) == '\'')
+               state->state = WAITENDCMPLX;
+           else if (*(state->prsbuf) == '\\')
+           {
+               state->state = WAITNEXTCHAR;
+               oldstate = WAITENDWORD;
+           }
+           else if (state->oprisdelim && ISOPERATOR(*(state->prsbuf)))
+               elog(ERROR, "Syntax error");
+           else if (*(state->prsbuf) != ' ')
+           {
+               *(state->curpos) = *(state->prsbuf);
+               state->curpos++;
+               state->state = WAITENDWORD;
+           }
+       }
+       else if (state->state == WAITNEXTCHAR)
+       {
+           if (*(state->prsbuf) == '\0')
+               elog(ERROR, "There is no escaped character");
+           else
+           {
+               RESIZEPRSBUF;
+               *(state->curpos) = *(state->prsbuf);
+               state->curpos++;
+               state->state = oldstate;
+           }
+       }
+       else if (state->state == WAITENDWORD)
+       {
+           if (*(state->prsbuf) == '\\')
+           {
+               state->state = WAITNEXTCHAR;
+               oldstate = WAITENDWORD;
+           }
+           else if (*(state->prsbuf) == ' ' || *(state->prsbuf) == '\0' ||
+                    (state->oprisdelim && ISOPERATOR(*(state->prsbuf))))
+           {
+               RESIZEPRSBUF;
+               if (state->curpos == state->word)
+                   elog(ERROR, "Syntax error");
+               *(state->curpos) = '\0';
+               return 1; 
+           } else if ( *(state->prsbuf) == ':' ) {
+               if (state->curpos == state->word)
+                   elog(ERROR, "Syntax error");
+               *(state->curpos) = '\0';
+               if ( state->oprisdelim )
+                   return 1;
+               else
+                   state->state = INPOSINFO;
+           }
+           else
+           {
+               RESIZEPRSBUF;
+               *(state->curpos) = *(state->prsbuf);
+               state->curpos++;
+           }
+       }
+       else if (state->state == WAITENDCMPLX)
+       {
+           if (*(state->prsbuf) == '\'')
+           {
+               RESIZEPRSBUF;
+               *(state->curpos) = '\0';
+               if (state->curpos == state->word)
+                   elog(ERROR, "Syntax error");
+               if ( state->oprisdelim ) {
+                   state->prsbuf++;
+                   return 1;
+               } else
+                   state->state = WAITPOSINFO;
+           }
+           else if (*(state->prsbuf) == '\\')
+           {
+               state->state = WAITNEXTCHAR;
+               oldstate = WAITENDCMPLX;
+           }
+           else if (*(state->prsbuf) == '\0')
+               elog(ERROR, "Syntax error");
+           else
+           {
+               RESIZEPRSBUF;
+               *(state->curpos) = *(state->prsbuf);
+               state->curpos++;
+           }
+       } else if (state->state == WAITPOSINFO) {
+           if ( *(state->prsbuf) == ':' )
+               state->state=INPOSINFO;
+           else
+               return 1;
+       } else if (state->state == INPOSINFO) {
+           if ( isdigit(*(state->prsbuf)) ) {
+               if ( state->alen==0 ) {
+                   state->alen=4;
+                   state->pos = (WordEntryPos*)palloc( sizeof(WordEntryPos)*state->alen );
+                   *(uint16*)(state->pos)=0;
+               } else if ( *(uint16*)(state->pos) +1 >= state->alen ) {
+                   state->alen *= 2; 
+                   state->pos = (WordEntryPos*)repalloc( state->pos, sizeof(WordEntryPos)*state->alen );
+               }
+               (  *(uint16*)(state->pos) )++;
+               state->pos[ *(uint16*)(state->pos) ].pos = LIMITPOS(atoi(state->prsbuf));
+               if ( state->pos[ *(uint16*)(state->pos) ].pos == 0 )
+                   elog(ERROR,"Wrong position info");
+               state->pos[ *(uint16*)(state->pos) ].weight = 0;
+               state->state = WAITPOSDELIM;
+           } else
+               elog(ERROR,"Syntax error");
+       } else if (state->state == WAITPOSDELIM) {
+           if ( *(state->prsbuf) == ',' ) {
+               state->state = INPOSINFO;
+           } else if ( tolower(*(state->prsbuf)) == 'a' || *(state->prsbuf)=='*' ) {
+               if ( state->pos[ *(uint16*)(state->pos) ].weight )
+                   elog(ERROR,"Syntax error");
+               state->pos[ *(uint16*)(state->pos) ].weight = 3;
+           } else if ( tolower(*(state->prsbuf)) == 'b' ) {
+               if ( state->pos[ *(uint16*)(state->pos) ].weight )
+                   elog(ERROR,"Syntax error");
+               state->pos[ *(uint16*)(state->pos) ].weight = 2;
+           } else if ( tolower(*(state->prsbuf)) == 'c' ) {
+               if ( state->pos[ *(uint16*)(state->pos) ].weight )
+                   elog(ERROR,"Syntax error");
+               state->pos[ *(uint16*)(state->pos) ].weight = 1;
+           } else if ( tolower(*(state->prsbuf)) == 'd' ) {
+               if ( state->pos[ *(uint16*)(state->pos) ].weight )
+                   elog(ERROR,"Syntax error");
+               state->pos[ *(uint16*)(state->pos) ].weight = 0;
+           } else if ( isspace(*(state->prsbuf)) || *(state->prsbuf) == '\0' ) {
+               return 1;
+           } else if ( !isdigit(*(state->prsbuf)) )
+               elog(ERROR,"Syntax error");
+       } else
+           elog(ERROR, "Inner bug :(");
+       state->prsbuf++;
+   }
+
+   return 0;
+}
+
+Datum
+tsvector_in(PG_FUNCTION_ARGS)
+{
+   char       *buf = PG_GETARG_CSTRING(0);
+   TI_IN_STATE state;
+   WordEntryIN  *arr;
+   WordEntry  *inarr;
+   int4        len = 0,
+               totallen = 64;
+   tsvector       *in;
+   char       *tmpbuf,
+              *cur;
+   int4        i,
+               buflen = 256;
+
+   state.prsbuf = buf;
+   state.len = 32;
+   state.word = (char *) palloc(state.len);
+   state.oprisdelim = false;
+
+   arr = (WordEntryIN *) palloc(sizeof(WordEntryIN) * totallen);
+   cur = tmpbuf = (char *) palloc(buflen);
+   while (gettoken_tsvector(&state))
+   {
+       if (len >= totallen)
+       {
+           totallen *= 2;
+           arr = (WordEntryIN *) repalloc((void *) arr, sizeof(WordEntryIN) * totallen);
+       }
+       while ((cur - tmpbuf) + (state.curpos - state.word) >= buflen)
+       {
+           int4        dist = cur - tmpbuf;
+
+           buflen *= 2;
+           tmpbuf = (char *) repalloc((void *) tmpbuf, buflen);
+           cur = tmpbuf + dist;
+       }
+       if (state.curpos - state.word >= MAXSTRLEN)
+           elog(ERROR, "Word is too long");
+       arr[len].entry.len= state.curpos - state.word;
+       if (cur - tmpbuf > MAXSTRPOS)
+           elog(ERROR, "Too long value");
+       arr[len].entry.pos=cur - tmpbuf;
+       memcpy((void *) cur, (void *) state.word, arr[len].entry.len);
+       cur += arr[len].entry.len;
+       if ( state.alen ) {
+           arr[len].entry.haspos=1;
+           arr[len].pos = state.pos;
+       } else
+           arr[len].entry.haspos=0;
+       len++;
+   }
+   pfree(state.word);
+
+   if ( len > 0 )
+       len = uniqueentry(arr, len, tmpbuf, &buflen);
+   totallen = CALCDATASIZE(len, buflen);
+   in = (tsvector *) palloc(totallen);
+   memset(in,0,totallen);
+   in->len = totallen;
+   in->size = len;
+   cur = STRPTR(in);
+   inarr = ARRPTR(in);
+   for (i = 0; i < len; i++)
+   {
+       memcpy((void *) cur, (void *) &tmpbuf[arr[i].entry.pos], arr[i].entry.len);
+       arr[i].entry.pos=cur - STRPTR(in);
+       cur += SHORTALIGN(arr[i].entry.len);
+       if ( arr[i].entry.haspos ) {
+           memcpy( cur, arr[i].pos, (*(uint16*)arr[i].pos + 1) * sizeof(WordEntryPos));
+           cur +=  (*(uint16*)arr[i].pos + 1) * sizeof(WordEntryPos);
+           pfree( arr[i].pos ); 
+       }
+       memcpy( &(inarr[i]), &(arr[i].entry), sizeof(WordEntry) );
+   }
+   pfree(tmpbuf);
+   pfree(arr);
+   PG_RETURN_POINTER(in);
+}
+
+Datum
+tsvector_length(PG_FUNCTION_ARGS)
+{
+   tsvector       *in = (tsvector *) PG_DETOAST_DATUM(PG_GETARG_DATUM(0));
+   int4        ret = in->size;
+
+   PG_FREE_IF_COPY(in, 0);
+   PG_RETURN_INT32(ret);
+}
+
+Datum
+tsvector_out(PG_FUNCTION_ARGS)
+{
+   tsvector       *out = (tsvector *) PG_DETOAST_DATUM(PG_GETARG_DATUM(0));
+   char       *outbuf;
+   int4        i,
+               j,
+               lenbuf = 0, pp;
+   WordEntry  *ptr = ARRPTR(out);
+   char       *curin,
+              *curout;
+
+       lenbuf=out->size * 2 /* '' */ + out->size - 1 /* space */ + 2 /*\0*/;
+       for (i = 0; i < out->size; i++) {
+               lenbuf += ptr[i].len*2 /*for escape */;
+               if ( ptr[i].haspos )
+                       lenbuf += 7*POSDATALEN(out, &(ptr[i]));
+       }
+
+   curout = outbuf = (char *) palloc(lenbuf);
+   for (i = 0; i < out->size; i++)
+   {
+       curin = STRPTR(out)+ptr->pos;
+       if (i != 0)
+           *curout++ = ' ';
+       *curout++ = '\'';
+       j = ptr->len;
+       while (j--)
+       {
+           if (*curin == '\'')
+           {
+               int4        pos = curout - outbuf;
+
+               outbuf = (char *) repalloc((void *) outbuf, ++lenbuf);
+               curout = outbuf + pos;
+               *curout++ = '\\';
+           }
+           *curout++ = *curin++;
+       }
+       *curout++ = '\'';
+       if ( (pp=POSDATALEN(out,ptr)) != 0 ) {
+           WordEntryPos *wptr;
+           *curout++ = ':';
+           wptr=POSDATAPTR(out,ptr);
+           while(pp) {
+               sprintf(curout,"%d",wptr->pos);
+               curout=strchr(curout,'\0');
+               switch( wptr->weight ) {
+                   case 3:   *curout++ = 'A'; break;
+                   case 2:   *curout++ = 'B'; break;
+                   case 1:   *curout++ = 'C'; break;
+                   case 0: 
+                   default: break;
+               }
+               if ( pp>1 )     *curout++ = ',';
+               pp--; wptr++;
+           }
+       }
+       ptr++;
+   }
+   *curout='\0';
+   outbuf[lenbuf - 1] = '\0';
+   PG_FREE_IF_COPY(out, 0);
+   PG_RETURN_POINTER(outbuf);
+}
+
+static int
+compareWORD(const void *a, const void *b)
+{
+   if (((WORD *) a)->len == ((WORD *) b)->len) {
+       int res = strncmp(
+                      ((WORD *) a)->word,
+                      ((WORD *) b)->word,
+                      ((WORD *) b)->len);
+       if ( res==0 ) 
+           return ( ((WORD *) a)->pos.pos > ((WORD *) b)->pos.pos ) ? 1 : -1;
+       return res;
+   }
+   return (((WORD *) a)->len > ((WORD *) b)->len) ? 1 : -1;
+}
+
+static int
+uniqueWORD(WORD * a, int4 l)
+{
+   WORD       *ptr,
+              *res;
+   int tmppos;
+
+   if (l == 1) {
+       tmppos=LIMITPOS(a->pos.pos);
+       a->alen=2;
+       a->pos.apos=(uint16*)palloc( sizeof(uint16)*a->alen );
+       a->pos.apos[0]=1;
+       a->pos.apos[1]=tmppos;
+       return l;
+   }
+
+   res = a;
+   ptr = a + 1;
+
+   qsort((void *) a, l, sizeof(WORD), compareWORD);
+   tmppos=LIMITPOS(a->pos.pos);
+   a->alen=2;
+   a->pos.apos=(uint16*)palloc( sizeof(uint16)*a->alen );
+   a->pos.apos[0]=1;
+   a->pos.apos[1]=tmppos;
+
+   while (ptr - a < l)
+   {
+       if (!(ptr->len == res->len &&
+             strncmp(ptr->word, res->word, res->len) == 0))
+       {
+           res++;
+           res->len = ptr->len;
+           res->word = ptr->word;
+           tmppos=LIMITPOS(ptr->pos.pos);
+           res->alen=2;
+           res->pos.apos=(uint16*)palloc( sizeof(uint16)*res->alen );
+           res->pos.apos[0]=1;
+           res->pos.apos[1]=tmppos;
+       } else {
+           pfree(ptr->word);
+           if ( res->pos.apos[0] < MAXNUMPOS-1 && res->pos.apos[ res->pos.apos[0] ] != MAXENTRYPOS-1 ) {
+               if ( res->pos.apos[0]+1 >= res->alen ) {
+                   res->alen*=2;
+                   res->pos.apos=(uint16*)repalloc( res->pos.apos, sizeof(uint16)*res->alen );
+               }
+               res->pos.apos[ res->pos.apos[0]+1 ] = LIMITPOS(ptr->pos.pos);
+               res->pos.apos[0]++; 
+           }
+       }
+       ptr++;
+   }
+
+   return res + 1 - a;
+}
+
+/*
+ * make value of tsvector
+ */
+static tsvector *
+makevalue(PRSTEXT * prs)
+{
+   int4        i,j,
+               lenstr = 0,
+               totallen;
+   tsvector       *in;
+   WordEntry  *ptr;
+   char       *str,
+              *cur;
+
+   prs->curwords = uniqueWORD(prs->words, prs->curwords);
+   for (i = 0; i < prs->curwords; i++) {
+       lenstr += SHORTALIGN(prs->words[i].len);
+
+       if ( prs->words[i].alen )
+           lenstr += sizeof(uint16) + prs->words[i].pos.apos[0] * sizeof(WordEntryPos);
+   }
+
+   totallen = CALCDATASIZE(prs->curwords, lenstr);
+   in = (tsvector *) palloc(totallen);
+   memset(in,0,totallen);  
+   in->len = totallen;
+   in->size = prs->curwords;
+
+   ptr = ARRPTR(in);
+   cur = str = STRPTR(in);
+   for (i = 0; i < prs->curwords; i++)
+   {
+       ptr->len = prs->words[i].len;
+       if (cur - str > MAXSTRPOS)
+           elog(ERROR, "Value is too big");
+       ptr->pos= cur - str;
+       memcpy((void *) cur, (void *) prs->words[i].word, prs->words[i].len);
+       pfree(prs->words[i].word);
+       cur += SHORTALIGN(prs->words[i].len);
+       if ( prs->words[i].alen ) {
+           WordEntryPos *wptr;
+           
+           ptr->haspos=1;
+           *(uint16*)cur = prs->words[i].pos.apos[0];
+           wptr=POSDATAPTR(in,ptr);
+           for(j=0;j<*(uint16*)cur;j++) {
+               wptr[j].weight=0;
+               wptr[j].pos=prs->words[i].pos.apos[j+1];
+           }
+           cur += sizeof(uint16) + prs->words[i].pos.apos[0] * sizeof(WordEntryPos);
+           pfree(prs->words[i].pos.apos);
+       } else
+           ptr->haspos=0;
+       ptr++;
+   }
+   pfree(prs->words);
+   return in;
+}
+
+
+Datum
+to_tsvector(PG_FUNCTION_ARGS)
+{
+   text       *in = PG_GETARG_TEXT_P(1);
+   PRSTEXT     prs;
+   tsvector       *out = NULL;
+   TSCfgInfo *cfg=findcfg(PG_GETARG_INT32(0)); 
+
+   prs.lenwords = 32;
+   prs.curwords = 0;
+   prs.pos = 0;
+   prs.words = (WORD *) palloc(sizeof(WORD) * prs.lenwords);
+   
+   parsetext_v2(cfg, &prs, VARDATA(in), VARSIZE(in) - VARHDRSZ);
+   PG_FREE_IF_COPY(in, 1);
+
+   if (prs.curwords)
+       out = makevalue(&prs);
+   else {
+       pfree(prs.words);
+       out = palloc(CALCDATASIZE(0,0));
+       out->len = CALCDATASIZE(0,0);
+       out->size = 0;
+   } 
+   PG_RETURN_POINTER(out);
+}
+
+Datum
+to_tsvector_name(PG_FUNCTION_ARGS) {
+   text       *cfg=PG_GETARG_TEXT_P(0);
+   Datum res = DirectFunctionCall3(
+       to_tsvector,
+       Int32GetDatum( name2id_cfg( cfg ) ),
+       PG_GETARG_DATUM(1),
+       (Datum)0
+   );
+   PG_FREE_IF_COPY(cfg,0);
+   PG_RETURN_DATUM(res);   
+}
+
+Datum
+to_tsvector_current(PG_FUNCTION_ARGS) {
+   Datum res = DirectFunctionCall3(
+       to_tsvector,
+       Int32GetDatum( get_currcfg() ),
+       PG_GETARG_DATUM(0),
+       (Datum)0
+   );
+   PG_RETURN_DATUM(res);   
+}
+
+static Oid
+findFunc(char *fname) {
+   FuncCandidateList clist,ptr;
+   Oid funcid = InvalidOid;
+   List *names=makeList1(makeString(fname));
+
+   ptr = clist = FuncnameGetCandidates(names, 1);
+   freeList(names);
+
+   if ( !ptr )
+       return funcid;
+
+   while(ptr) {
+       if ( ptr->args[0] == TEXTOID && funcid == InvalidOid )
+           funcid=ptr->oid;
+       clist=ptr->next;
+       pfree(ptr);
+       ptr=clist;
+   }
+
+   return funcid;
+}
+
+/*
+ * Trigger
+ */
+Datum
+tsearch2(PG_FUNCTION_ARGS)
+{
+   TriggerData *trigdata;
+   Trigger    *trigger;
+   Relation    rel;
+   HeapTuple   rettuple = NULL;
+   TSCfgInfo *cfg=findcfg(get_currcfg()); 
+   int         numidxattr,
+               i;
+   PRSTEXT     prs;
+   Datum       datum = (Datum) 0;
+   Oid     funcoid = InvalidOid;
+
+   if (!CALLED_AS_TRIGGER(fcinfo))
+       elog(ERROR, "TSearch: Not fired by trigger manager");
+
+   trigdata = (TriggerData *) fcinfo->context;
+   if (TRIGGER_FIRED_FOR_STATEMENT(trigdata->tg_event))
+       elog(ERROR, "TSearch: Can't process STATEMENT events");
+   if (TRIGGER_FIRED_AFTER(trigdata->tg_event))
+       elog(ERROR, "TSearch: Must be fired BEFORE event");
+
+   if (TRIGGER_FIRED_BY_INSERT(trigdata->tg_event))
+       rettuple = trigdata->tg_trigtuple;
+   else if (TRIGGER_FIRED_BY_UPDATE(trigdata->tg_event))
+       rettuple = trigdata->tg_newtuple;
+   else
+       elog(ERROR, "TSearch: Unknown event");
+
+   trigger = trigdata->tg_trigger;
+   rel = trigdata->tg_relation;
+
+   if (trigger->tgnargs < 2)
+       elog(ERROR, "TSearch: format tsearch2(tsvector_field, text_field1,...)");
+
+   numidxattr = SPI_fnumber(rel->rd_att, trigger->tgargs[0]);
+   if (numidxattr == SPI_ERROR_NOATTRIBUTE)
+       elog(ERROR, "TSearch: Can not find tsvector_field");
+
+   prs.lenwords = 32;
+   prs.curwords = 0;
+   prs.pos = 0;
+   prs.words = (WORD *) palloc(sizeof(WORD) * prs.lenwords);
+
+   /* find all words in indexable column */
+   for (i = 1; i < trigger->tgnargs; i++)
+   {
+       int         numattr;
+       Oid         oidtype;
+       Datum       txt_toasted;
+       bool        isnull;
+       text       *txt;
+
+       numattr = SPI_fnumber(rel->rd_att, trigger->tgargs[i]);
+       if (numattr == SPI_ERROR_NOATTRIBUTE)
+       {
+           funcoid=findFunc(trigger->tgargs[i]);
+           if ( funcoid==InvalidOid )
+               elog(ERROR,"TSearch: can't find function or field '%s'",trigger->tgargs[i]);
+           continue;
+       }
+       oidtype = SPI_gettypeid(rel->rd_att, numattr);
+       /* We assume char() and varchar() are binary-equivalent to text */
+       if (!(oidtype == TEXTOID ||
+             oidtype == VARCHAROID ||
+             oidtype == BPCHAROID))
+       {
+           elog(WARNING, "TSearch: '%s' is not of character type",
+                trigger->tgargs[i]);
+           continue;
+       }
+       txt_toasted = SPI_getbinval(rettuple, rel->rd_att, numattr, &isnull);
+       if (isnull)
+           continue;
+
+       if ( funcoid!=InvalidOid ) {
+           text *txttmp = (text *) DatumGetPointer( OidFunctionCall1(
+               funcoid,
+               PointerGetDatum(txt_toasted)
+           ));
+           txt = (text *) DatumGetPointer(PG_DETOAST_DATUM(PointerGetDatum(txttmp)));
+           if ( txt == txttmp )
+               txt_toasted = PointerGetDatum(txt);
+       } else
+            txt = (text *) DatumGetPointer(PG_DETOAST_DATUM(PointerGetDatum(txt_toasted)));
+
+       parsetext_v2(cfg, &prs, VARDATA(txt), VARSIZE(txt) - VARHDRSZ);
+       if (txt != (text*)DatumGetPointer(txt_toasted) )
+           pfree(txt);
+   }
+
+   /* make tsvector value */
+   if (prs.curwords)
+   {
+       datum = PointerGetDatum(makevalue(&prs));
+       rettuple = SPI_modifytuple(rel, rettuple, 1, &numidxattr,
+                                  &datum, NULL);
+       pfree(DatumGetPointer(datum));
+   }
+   else
+   {
+       tsvector *out = palloc(CALCDATASIZE(0,0));
+       out->len = CALCDATASIZE(0,0);
+       out->size = 0;
+       datum = PointerGetDatum(out);
+       pfree(prs.words);
+       rettuple = SPI_modifytuple(rel, rettuple, 1, &numidxattr,
+                                  &datum, NULL);
+   }
+
+   if (rettuple == NULL)
+       elog(ERROR, "TSearch: %d returned by SPI_modifytuple", SPI_result);
+
+   return PointerGetDatum(rettuple);
+}


diff --git a/contrib/tsearch2/tsvector.h b/contrib/tsearch2/tsvector.h

new file mode 100644 (file)

index 0000000..31e6a4b


--- /dev/null
+++ b/contrib/tsearch2/tsvector.h
@@ -0,0 +1,71 @@
+#ifndef __TXTIDX_H__
+#define __TXTIDX_H__
+
+/*
+#define TXTIDX_DEBUG
+*/
+
+#include "postgres.h"
+
+#include "access/gist.h"
+#include "access/itup.h"
+#include "utils/elog.h"
+#include "utils/palloc.h"
+#include "utils/builtins.h"
+#include "storage/bufpage.h"
+
+typedef struct {
+   uint32
+       haspos:1,
+       len:11, /* MAX 2Kb */
+       pos:20; /* MAX 1Mb */
+}  WordEntry;
+#define MAXSTRLEN ( 1<<11 )
+#define MAXSTRPOS ( 1<<20 )
+
+typedef struct {
+   uint16
+       weight:2,
+       pos:14;
+} WordEntryPos;
+#define MAXENTRYPOS    (1<<14)
+#define MAXNUMPOS  256
+#define LIMITPOS(x)    ( ( (x) >= MAXENTRYPOS ) ? (MAXENTRYPOS-1) : (x) )
+
+typedef struct
+{
+   int4        len;
+   int4        size;
+   char        data[1];
+}  tsvector;
+
+#define DATAHDRSIZE (sizeof(int4)*2)
+#define CALCDATASIZE(x, lenstr) ( x * sizeof(WordEntry) + DATAHDRSIZE + lenstr )
+#define ARRPTR(x)  ( (WordEntry*) ( (char*)x + DATAHDRSIZE ) )
+#define STRPTR(x)  ( (char*)x + DATAHDRSIZE + ( sizeof(WordEntry) * ((tsvector*)x)->size ) )
+#define STRSIZE(x) ( ((tsvector*)x)->len - DATAHDRSIZE - ( sizeof(WordEntry) * ((tsvector*)x)->size ) )
+#define _POSDATAPTR(x,e)   (STRPTR(x)+((WordEntry*)(e))->pos+SHORTALIGN(((WordEntry*)(e))->len))
+#define POSDATALEN(x,e) ( ( ((WordEntry*)(e))->haspos ) ? (*(uint16*)_POSDATAPTR(x,e)) : 0 ) 
+#define POSDATAPTR(x,e)    ( (WordEntryPos*)( _POSDATAPTR(x,e)+sizeof(uint16) ) )
+
+
+typedef struct {
+   WordEntry   entry;
+   WordEntryPos    *pos;
+}  WordEntryIN;
+
+typedef struct
+{
+   char       *prsbuf;
+   char       *word;
+   char       *curpos;
+   int4        len;
+   int4        state;
+   int4        alen;
+   WordEntryPos    *pos;
+   bool        oprisdelim;
+}  TI_IN_STATE;
+
+int4       gettoken_tsvector(TI_IN_STATE * state);
+
+#endif


diff --git a/contrib/tsearch2/tsvector_op.c b/contrib/tsearch2/tsvector_op.c

new file mode 100644 (file)

index 0000000..3f38014


--- /dev/null
+++ b/contrib/tsearch2/tsvector_op.c
@@ -0,0 +1,264 @@
+/*
+ * Operations for tsvector type
+ * Teodor Sigaev 
+ */
+#include "postgres.h"
+
+#include "access/gist.h"
+#include "access/itup.h"
+#include "utils/elog.h"
+#include "utils/palloc.h"
+#include "utils/builtins.h"
+#include "storage/bufpage.h"
+#include "executor/spi.h"
+#include "commands/trigger.h"
+#include "nodes/pg_list.h"
+#include "catalog/namespace.h"
+
+#include "utils/pg_locale.h"
+
+#include              /* tolower */
+#include "tsvector.h"
+#include "query.h"
+#include "ts_cfg.h"
+#include "common.h"
+
+PG_FUNCTION_INFO_V1(strip);
+Datum      strip(PG_FUNCTION_ARGS);
+
+PG_FUNCTION_INFO_V1(setweight);
+Datum      setweight(PG_FUNCTION_ARGS);
+
+PG_FUNCTION_INFO_V1(concat);
+Datum      concat(PG_FUNCTION_ARGS);
+
+Datum
+strip(PG_FUNCTION_ARGS)
+{
+   tsvector       *in = (tsvector *) PG_DETOAST_DATUM(PG_GETARG_DATUM(0));
+   tsvector    *out;
+   int i,len=0;
+   WordEntry *arrin=ARRPTR(in), *arrout;
+   char *cur;
+
+   for(i=0;isize;i++) 
+       len += SHORTALIGN( arrin[i].len );
+
+   len = CALCDATASIZE(in->size, len);
+   out=(tsvector*)palloc(len);
+   memset(out,0,len);
+   out->len=len;
+   out->size=in->size;
+   arrout=ARRPTR(out);
+   cur=STRPTR(out);
+   for(i=0;isize;i++) {
+       memcpy(cur, STRPTR(in)+arrin[i].pos, arrin[i].len);
+       arrout[i].haspos = 0;
+       arrout[i].len = arrin[i].len;
+       arrout[i].pos = cur - STRPTR(out);
+       cur += SHORTALIGN( arrout[i].len );
+   }
+       
+   PG_FREE_IF_COPY(in, 0);
+   PG_RETURN_POINTER(out);
+}
+
+Datum
+setweight(PG_FUNCTION_ARGS)
+{
+   tsvector       *in = (tsvector *) PG_DETOAST_DATUM(PG_GETARG_DATUM(0));
+   char       cw = PG_GETARG_CHAR(1);
+   tsvector    *out;
+   int i,j;
+   WordEntry *entry;
+   WordEntryPos *p;
+   int w=0;
+
+   switch(tolower(cw)) {
+       case 'a': w=3; break;
+       case 'b': w=2; break;
+       case 'c': w=1; break;
+       case 'd': w=0; break;
+       default: elog(ERROR,"Unknown weight");
+   }
+
+   out=(tsvector*)palloc(in->len);
+   memcpy(out,in,in->len);
+   entry=ARRPTR(out);
+   i=out->size;    
+   while(i--) {
+       if ( (j=POSDATALEN(out,entry)) != 0 ) {
+           p=POSDATAPTR(out,entry);
+           while(j--) {
+               p->weight=w;
+               p++;
+           }
+       }
+       entry++;
+   }
+       
+   PG_FREE_IF_COPY(in, 0);
+   PG_RETURN_POINTER(out);
+}
+
+static int
+compareEntry(char *ptra, WordEntry* a, char *ptrb, WordEntry* b)
+{
+        if ( a->len == b->len)
+        {
+                return strncmp(
+                                           ptra + a->pos,
+                                           ptrb + b->pos,
+                                           a->len);
+        }
+        return ( a->len > b->len ) ? 1 : -1;
+}
+
+static int4
+add_pos(tsvector *src, WordEntry *srcptr, tsvector *dest, WordEntry *destptr, int4 maxpos ) {
+   uint16 *clen = (uint16*)_POSDATAPTR(dest,destptr);
+   int i;
+   uint16 slen = POSDATALEN(src, srcptr), startlen;
+   WordEntryPos *spos=POSDATAPTR(src, srcptr), *dpos=POSDATAPTR(dest,destptr);
+
+   if ( ! destptr->haspos ) 
+       *clen=0;
+
+   startlen = *clen;
+   for(i=0; i
+       dpos[ *clen ].weight = spos[i].weight; 
+       dpos[ *clen ].pos    = LIMITPOS(spos[i].pos + maxpos);
+       (*clen)++;
+   }
+
+   if ( *clen != startlen )
+       destptr->haspos=1; 
+   return  *clen - startlen;
+}
+
+
+Datum
+concat(PG_FUNCTION_ARGS) {
+   tsvector       *in1 = (tsvector *) PG_DETOAST_DATUM(PG_GETARG_DATUM(0));
+   tsvector       *in2 = (tsvector *) PG_DETOAST_DATUM(PG_GETARG_DATUM(1));
+   tsvector       *out;
+   WordEntry *ptr;
+   WordEntry *ptr1,*ptr2;
+   WordEntryPos *p;
+   int maxpos=0,i,j,i1,i2;
+   char *cur;
+   char *data,*data1,*data2;
+
+   ptr=ARRPTR(in1);
+   i=in1->size;
+   while(i--) {
+       if ( (j=POSDATALEN(in1,ptr)) != 0 ) {
+           p=POSDATAPTR(in1,ptr);
+           while(j--) {
+               if ( p->pos > maxpos ) 
+                   maxpos = p->pos;
+               p++;
+           }
+       }
+       ptr++;
+   }
+   
+   ptr1=ARRPTR(in1); ptr2=ARRPTR(in2);
+   data1=STRPTR(in1); data2=STRPTR(in2);
+   i1=in1->size;   i2=in2->size;
+   out=(tsvector*)palloc( in1->len + in2->len );
+   memset(out,0,in1->len + in2->len);
+   out->len = in1->len + in2->len;
+   out->size = in1->size + in2->size;
+   data=cur=STRPTR(out);
+   ptr=ARRPTR(out);
+   while( i1 && i2 ) {
+       int cmp=compareEntry(data1,ptr1,data2,ptr2);
+       if ( cmp < 0 ) { /* in1 first */
+           ptr->haspos = ptr1->haspos;
+           ptr->len = ptr1->len;
+           memcpy( cur, data1 + ptr1->pos, ptr1->len );
+           ptr->pos = cur - data; 
+           cur+=SHORTALIGN(ptr1->len);
+           if ( ptr->haspos ) {
+               memcpy(cur, _POSDATAPTR(in1, ptr1), POSDATALEN(in1, ptr1)*sizeof(WordEntryPos) + sizeof(uint16));
+               cur+=POSDATALEN(in1, ptr1)*sizeof(WordEntryPos) + sizeof(uint16);
+           }
+           ptr++; ptr1++; i1--;
+       } else if ( cmp>0 ) { /* in2 first */ 
+           ptr->haspos = ptr2->haspos;
+           ptr->len = ptr2->len;
+           memcpy( cur, data2 + ptr2->pos, ptr2->len );
+           ptr->pos = cur - data; 
+           cur+=SHORTALIGN(ptr2->len);
+           if ( ptr->haspos ) {
+               int addlen = add_pos(in2, ptr2, out, ptr, maxpos );
+               if ( addlen == 0 )
+                   ptr->haspos=0;
+               else
+                   cur += addlen*sizeof(WordEntryPos) + sizeof(uint16); 
+           }
+           ptr++; ptr2++; i2--;
+       } else {
+           ptr->haspos = ptr1->haspos | ptr2->haspos;
+           ptr->len = ptr1->len;
+           memcpy( cur, data1 + ptr1->pos, ptr1->len );
+           ptr->pos = cur - data; 
+           cur+=SHORTALIGN(ptr1->len);
+           if ( ptr->haspos ) {
+               if ( ptr1->haspos ) {
+                   memcpy(cur, _POSDATAPTR(in1, ptr1), POSDATALEN(in1, ptr1)*sizeof(WordEntryPos) + sizeof(uint16));
+                   cur+=POSDATALEN(in1, ptr1)*sizeof(WordEntryPos) + sizeof(uint16);
+                   if ( ptr2->haspos )
+                       cur += add_pos(in2, ptr2, out, ptr, maxpos )*sizeof(WordEntryPos);
+               } else if ( ptr2->haspos ) {
+                   int addlen = add_pos(in2, ptr2, out, ptr, maxpos );
+                   if ( addlen == 0 )
+                       ptr->haspos=0;
+                   else
+                       cur += addlen*sizeof(WordEntryPos) + sizeof(uint16); 
+               }
+           }
+           ptr++; ptr1++; ptr2++; i1--; i2--;
+       }
+   }
+
+   while(i1) {
+       ptr->haspos = ptr1->haspos;
+       ptr->len = ptr1->len;
+       memcpy( cur, data1 + ptr1->pos, ptr1->len );
+       ptr->pos = cur - data; 
+       cur+=SHORTALIGN(ptr1->len);
+       if ( ptr->haspos ) {
+           memcpy(cur, _POSDATAPTR(in1, ptr1), POSDATALEN(in1, ptr1)*sizeof(WordEntryPos) + sizeof(uint16));
+           cur+=POSDATALEN(in1, ptr1)*sizeof(WordEntryPos) + sizeof(uint16);
+       }
+       ptr++; ptr1++; i1--;
+   }
+
+   while(i2) {
+       ptr->haspos = ptr2->haspos;
+       ptr->len = ptr2->len;
+       memcpy( cur, data2 + ptr2->pos, ptr2->len );
+       ptr->pos = cur - data; 
+       cur+=SHORTALIGN(ptr2->len);
+       if ( ptr->haspos ) {
+           int addlen = add_pos(in2, ptr2, out, ptr, maxpos );
+           if ( addlen == 0 )
+               ptr->haspos=0;
+           else
+               cur += addlen*sizeof(WordEntryPos) + sizeof(uint16); 
+       }
+       ptr++; ptr2++; i2--;
+   }
+   
+   out->size=ptr-ARRPTR(out);
+   out->len = CALCDATASIZE( out->size, cur-data );
+   if ( data != STRPTR(out) )
+       memmove( STRPTR(out), data, cur-data );
+
+   PG_FREE_IF_COPY(in1, 0);
+   PG_FREE_IF_COPY(in2, 1);
+   PG_RETURN_POINTER(out);
+}
+


diff --git a/contrib/tsearch2/untsearch.sql.in b/contrib/tsearch2/untsearch.sql.in

new file mode 100644 (file)

index 0000000..a4fe145


--- /dev/null
+++ b/contrib/tsearch2/untsearch.sql.in
@@ -0,0 +1,62 @@
+BEGIN;
+
+--Be careful !!!
+--script drops all indices, triggers and columns with types defined
+--in tsearch2.sql
+
+
+DROP OPERATOR CLASS gist_tsvector_ops USING gist CASCADE;
+
+
+DROP OPERATOR || (tsvector, tsvector);
+DROP OPERATOR @@ (tsvector, tsquery);
+DROP OPERATOR @@ (tsquery, tsvector);
+
+DROP AGGREGATE stat(tsvector);
+
+DROP TABLE pg_ts_dict;
+DROP TABLE pg_ts_parser;
+DROP TABLE pg_ts_cfg;
+DROP TABLE pg_ts_cfgmap;
+
+DROP TYPE tokentype CASCADE;
+DROP TYPE tokenout CASCADE;
+DROP TYPE tsvector CASCADE;
+DROP TYPE tsquery CASCADE;
+DROP TYPE gtsvector CASCADE;
+DROP TYPE tsstat CASCADE;
+DROP TYPE statinfo CASCADE;
+
+DROP FUNCTION lexize(oid, text) ;
+DROP FUNCTION lexize(text, text);
+DROP FUNCTION lexize(text);
+DROP FUNCTION set_curdict(int);
+DROP FUNCTION set_curdict(text);
+DROP FUNCTION dex_init(text);
+DROP FUNCTION dex_lexize(internal,internal,int4);
+DROP FUNCTION snb_en_init(text);
+DROP FUNCTION snb_lexize(internal,internal,int4);
+DROP FUNCTION snb_ru_init(text);
+DROP FUNCTION spell_init(text);
+DROP FUNCTION spell_lexize(internal,internal,int4);
+DROP FUNCTION syn_init(text);
+DROP FUNCTION syn_lexize(internal,internal,int4);
+DROP FUNCTION set_curprs(int);
+DROP FUNCTION set_curprs(text);
+DROP FUNCTION prsd_start(internal,int4);
+DROP FUNCTION prsd_getlexeme(internal,internal,internal);
+DROP FUNCTION prsd_end(internal);
+DROP FUNCTION prsd_lextype(internal);
+DROP FUNCTION prsd_headline(internal,internal,internal);
+DROP FUNCTION set_curcfg(int);
+DROP FUNCTION set_curcfg(text);
+DROP FUNCTION show_curcfg();
+DROP FUNCTION gtsvector_compress(internal);
+DROP FUNCTION gtsvector_decompress(internal);
+DROP FUNCTION gtsvector_penalty(internal,internal,internal);
+DROP FUNCTION gtsvector_picksplit(internal, internal);
+DROP FUNCTION gtsvector_union(bytea, internal);
+DROP FUNCTION reset_tsearch();
+DROP FUNCTION tsearch2() CASCADE;
+
+END;


diff --git a/contrib/tsearch2/wordparser/deflex.c b/contrib/tsearch2/wordparser/deflex.c

new file mode 100644 (file)

index 0000000..ea596c5


--- /dev/null
+++ b/contrib/tsearch2/wordparser/deflex.c
@@ -0,0 +1,56 @@
+#include "deflex.h"
+
+const char *lex_descr[]={
+   "",
+   "Latin word",
+   "Non-latin word",
+   "Word",
+   "Email",
+   "URL",
+   "Host",
+   "Scientific notation",
+   "VERSION",
+   "Part of hyphenated word",
+   "Non-latin part of hyphenated word",
+   "Latin part of hyphenated word",
+   "Space symbols",
+   "HTML Tag",
+   "HTTP head",
+   "Hyphenated word",
+   "Latin hyphenated word",
+   "Non-latin hyphenated word",
+   "URI",
+   "File or path name",
+   "Decimal notation",
+   "Signed integer",
+   "Unsigned integer",
+   "HTML Entity"
+};
+
+const char *tok_alias[]={
+   "",
+   "lword",
+   "nlword",
+   "word",
+   "email",
+   "url",
+   "host",
+   "sfloat",
+   "version",
+   "part_hword",
+   "nlpart_hword",
+   "lpart_hword",
+   "blank",
+   "tag",
+   "http",
+   "hword",
+   "lhword",
+   "nlhword",
+   "uri",
+   "file",
+   "float",
+   "int",
+   "uint",
+   "entity"
+};
+


diff --git a/contrib/tsearch2/wordparser/deflex.h b/contrib/tsearch2/wordparser/deflex.h

new file mode 100644 (file)

index 0000000..651d1f9


--- /dev/null
+++ b/contrib/tsearch2/wordparser/deflex.h
@@ -0,0 +1,34 @@
+#ifndef __DEFLEX_H__
+#define __DEFLEX_H__
+
+/* rememder !!!! */
+#define LASTNUM        23
+
+#define LATWORD        1
+#define CYRWORD        2
+#define UWORD      3
+#define EMAIL      4
+#define FURL       5
+#define HOST       6
+#define SCIENTIFIC 7
+#define VERSIONNUMBER  8
+#define PARTHYPHENWORD 9
+#define CYRPARTHYPHENWORD  10
+#define LATPARTHYPHENWORD  11
+#define SPACE      12
+#define TAG            13
+#define HTTP       14
+#define HYPHENWORD 15
+#define LATHYPHENWORD  16
+#define CYRHYPHENWORD  17
+#define URI        18
+#define FILEPATH   19
+#define DECIMAL        20
+#define SIGNEDINT  21
+#define UNSIGNEDINT 22
+#define HTMLENTITY 23
+
+extern const char *lex_descr[];
+extern const char *tok_alias[];
+
+#endif


diff --git a/contrib/tsearch2/wordparser/parser.h b/contrib/tsearch2/wordparser/parser.h

new file mode 100644 (file)

index 0000000..55cf005


--- /dev/null
+++ b/contrib/tsearch2/wordparser/parser.h
@@ -0,0 +1,11 @@
+#ifndef __PARSER_H__
+#define __PARSER_H__
+
+char      *token;
+int            tokenlen;
+int            tsearch2_yylex(void);
+void       start_parse_str(char *, int);
+void       start_parse_fh(FILE *, int);
+void       end_parse(void);
+
+#endif


diff --git a/contrib/tsearch2/wordparser/parser.l b/contrib/tsearch2/wordparser/parser.l

new file mode 100644 (file)

index 0000000..49824f5


--- /dev/null
+++ b/contrib/tsearch2/wordparser/parser.l
@@ -0,0 +1,346 @@
+%{
+#include "postgres.h"
+
+#include "deflex.h"
+#include "parser.h"
+#include "common.h"
+
+/* Avoid exit() on fatal scanner errors */
+#define fprintf(file, fmt, msg)  ts_error(ERROR, fmt, msg)
+
+/* postgres allocation function */
+#define free    pfree
+#define malloc  palloc
+#define realloc repalloc
+
+#ifdef strdup
+#undef strdup
+#endif
+#define strdup  pstrdup
+
+char *token = NULL;  /* pointer to token */
+char *s     = NULL;  /* to return WHOLE hyphenated-word */
+
+YY_BUFFER_STATE buf = NULL; /* buffer to parse; it need for parse from string */
+
+int lrlimit = -1;  /* for limiting read from filehandle ( -1 - unlimited read ) */
+int bytestoread = 0;   /* for limiting read from filehandle */
+
+/* redefine macro for read limited length */
+#define YY_INPUT(buf,result,max_size) \
+   if ( yy_current_buffer->yy_is_interactive ) { \
+                int c = '*', n; \
+                for ( n = 0; n < max_size && \
+                             (c = getc( tsearch2_yyin )) != EOF && c != '\n'; ++n ) \
+                        buf[n] = (char) c; \
+                if ( c == '\n' ) \
+                        buf[n++] = (char) c; \
+                if ( c == EOF && ferror( tsearch2_yyin ) ) \
+                        YY_FATAL_ERROR( "input in flex scanner failed" ); \
+                result = n; \
+        }  else { \
+       if ( lrlimit == 0 ) \
+           result=YY_NULL; \
+       else { \
+           if ( lrlimit>0 ) { \
+               bytestoread = ( lrlimit > max_size ) ? max_size : lrlimit; \
+               lrlimit -= bytestoread; \
+           } else \
+               bytestoread = max_size; \
+               if ( ((result = fread( buf, 1, bytestoread, tsearch2_yyin )) == 0) \
+                       && ferror( tsearch2_yyin ) ) \
+                       YY_FATAL_ERROR( "input in flex scanner failed" ); \
+       } \
+   }
+
+%}
+
+%option 8bit
+%option never-interactive
+%option nounput
+%option noyywrap
+
+/* parser's state for parsing hyphenated-word */
+%x DELIM  
+/* parser's state for parsing URL*/
+%x URL  
+%x SERVER  
+
+/* parser's state for parsing TAGS */
+%x INTAG
+%x QINTAG
+%x INCOMMENT
+%x INSCRIPT
+
+/* cyrillic koi8 char */
+CYRALNUM   [0-9\200-\377]
+CYRALPHA   [\200-\377]
+ALPHA      [a-zA-Z\200-\377]
+ALNUM      [0-9a-zA-Z\200-\377]
+
+
+HOSTNAME   ([-_[:alnum:]]+\.)+[[:alpha:]]+
+URI        [-_[:alnum:]/%,\.;=&?#]+
+
+%%
+
+"<"[Ss][Cc][Rr][Ii][Pp][Tt] { BEGIN INSCRIPT; }
+
+"" {
+   BEGIN INITIAL; 
+   *tsearch2_yytext=' '; *(tsearch2_yytext+1) = '\0'; 
+   token = tsearch2_yytext;
+   tokenlen = tsearch2_yyleng;
+   return SPACE;
+}
+
+""   { 
+   BEGIN INITIAL;
+   *tsearch2_yytext=' '; *(tsearch2_yytext+1) = '\0'; 
+   token = tsearch2_yytext;
+   tokenlen = tsearch2_yyleng;
+   return SPACE;
+}
+
+
+"<"[\![:alpha:]]   { BEGIN INTAG; }
+
+"
+
+"\""    { BEGIN QINTAG; }
+
+"\\\"" ;
+
+"\""   { BEGIN INTAG; }
+
+">" { 
+   BEGIN INITIAL;
+   token = tsearch2_yytext;
+   *tsearch2_yytext=' '; 
+   token = tsearch2_yytext;
+   tokenlen = 1;
+   return TAG;
+}
+
+.|\n  ;
+
+\&(quot|amp|nbsp|lt|gt)\;   {
+   token = tsearch2_yytext;
+   tokenlen = tsearch2_yyleng;
+   return HTMLENTITY;
+}
+
+\&\#[0-9][0-9]?[0-9]?\; {
+   token = tsearch2_yytext;
+   tokenlen = tsearch2_yyleng;
+   return HTMLENTITY;
+}
+ 
+[-_\.[:alnum:]]+@{HOSTNAME}  /* Emails */ { 
+   token = tsearch2_yytext; 
+   tokenlen = tsearch2_yyleng;
+   return EMAIL; 
+}
+
+[+-]?[0-9]+(\.[0-9]+)?[eEdD][+-]?[0-9]+  /* float */   { 
+   token = tsearch2_yytext; 
+   tokenlen = tsearch2_yyleng;
+   return SCIENTIFIC; 
+}
+
+[0-9]+\.[0-9]+\.[0-9\.]*[0-9] {
+   token = tsearch2_yytext;
+   tokenlen = tsearch2_yyleng;
+   return VERSIONNUMBER;
+}
+
+[+-]?[0-9]+\.[0-9]+ {
+   token = tsearch2_yytext;
+   tokenlen = tsearch2_yyleng;
+   return DECIMAL;
+}
+
+[+-][0-9]+ { 
+   token = tsearch2_yytext; 
+   tokenlen = tsearch2_yyleng;
+   return SIGNEDINT; 
+}
+
+[0-9]+ { 
+   token = tsearch2_yytext; 
+   tokenlen = tsearch2_yyleng;
+   return UNSIGNEDINT; 
+}
+
+http"://"        { 
+   BEGIN URL; 
+   token = tsearch2_yytext;
+   tokenlen = tsearch2_yyleng;
+   return HTTP;
+}
+
+ftp"://"        { 
+   BEGIN URL; 
+   token = tsearch2_yytext;
+   tokenlen = tsearch2_yyleng;
+   return HTTP;
+}
+
+{HOSTNAME}[/:]{URI} { 
+   BEGIN SERVER;
+   if (s) { free(s); s=NULL; } 
+   s = strdup( tsearch2_yytext ); 
+   tokenlen = tsearch2_yyleng;
+   yyless( 0 ); 
+   token = s;
+   return FURL;
+}
+
+{HOSTNAME} {
+   token = tsearch2_yytext; 
+   tokenlen = tsearch2_yyleng;
+   return HOST;
+}
+
+[/:]{URI}  {
+   token = tsearch2_yytext;
+   tokenlen = tsearch2_yyleng;
+   return URI;
+}
+
+[[:alnum:]\./_-]+"/"[[:alnum:]\./_-]+ {
+   token = tsearch2_yytext;
+   tokenlen = tsearch2_yyleng;
+   return FILEPATH;
+}
+
+({CYRALPHA}+-)+{CYRALPHA}+ /* composite-word */    {
+   BEGIN DELIM;
+   if (s) { free(s); s=NULL; } 
+   s = strdup( tsearch2_yytext );
+   tokenlen = tsearch2_yyleng;
+   yyless( 0 );
+   token = s;
+   return CYRHYPHENWORD;
+}
+
+([[:alpha:]]+-)+[[:alpha:]]+ /* composite-word */  {
+    BEGIN DELIM;
+   if (s) { free(s); s=NULL; } 
+   s = strdup( tsearch2_yytext );
+   tokenlen = tsearch2_yyleng;
+   yyless( 0 );
+   token = s;
+   return LATHYPHENWORD;
+}
+
+({ALNUM}+-)+{ALNUM}+ /* composite-word */  {
+   BEGIN DELIM;
+   if (s) { free(s); s=NULL; } 
+   s = strdup( tsearch2_yytext );
+   tokenlen = tsearch2_yyleng;
+   yyless( 0 );
+   token = s;
+   return HYPHENWORD;
+}
+
+[0-9]+\.[0-9]+\.[0-9\.]*[0-9] {
+   token = tsearch2_yytext;
+   tokenlen = tsearch2_yyleng;
+   return VERSIONNUMBER;
+}
+
+\+?[0-9]+\.[0-9]+ {
+   token = tsearch2_yytext;
+   tokenlen = tsearch2_yyleng;
+   return DECIMAL;
+}
+
+{CYRALPHA}+  /* one word in composite-word */   { 
+   token = tsearch2_yytext; 
+   tokenlen = tsearch2_yyleng;
+   return CYRPARTHYPHENWORD; 
+}
+
+[[:alpha:]]+  /* one word in composite-word */  { 
+   token = tsearch2_yytext; 
+   tokenlen = tsearch2_yyleng;
+   return LATPARTHYPHENWORD; 
+}
+
+{ALNUM}+  /* one word in composite-word */  { 
+   token = tsearch2_yytext; 
+   tokenlen = tsearch2_yyleng;
+   return PARTHYPHENWORD; 
+}
+
+-  { 
+   token = tsearch2_yytext;
+   tokenlen = tsearch2_yyleng;
+   return SPACE;
+}
+
+.|\n /* return in basic state */ {
+   BEGIN INITIAL;
+   yyless( 0 );
+}
+
+{CYRALPHA}+ /* normal word */  { 
+   token = tsearch2_yytext; 
+   tokenlen = tsearch2_yyleng;
+   return CYRWORD; 
+}
+
+[[:alpha:]]+ /* normal word */ { 
+   token = tsearch2_yytext; 
+   tokenlen = tsearch2_yyleng;
+   return LATWORD; 
+}
+
+{ALNUM}+ /* normal word */ { 
+   token = tsearch2_yytext; 
+   tokenlen = tsearch2_yyleng;
+   return UWORD; 
+}
+
+[ \r\n\t]+ {
+   token = tsearch2_yytext;
+   tokenlen = tsearch2_yyleng;
+   return SPACE;
+}
+
+. {
+   token = tsearch2_yytext;
+   tokenlen = tsearch2_yyleng;
+   return SPACE;
+} 
+
+%%
+
+/* clearing after parsing from string */
+void end_parse() {
+   if (s) { free(s); s=NULL; } 
+   tsearch2_yy_delete_buffer( buf );
+   buf = NULL;
+} 
+
+/* start parse from string */
+void start_parse_str(char* str, int limit) {
+   if (buf) end_parse();
+   buf = tsearch2_yy_scan_bytes( str, limit );
+   tsearch2_yy_switch_to_buffer( buf );
+   BEGIN INITIAL;
+}
+
+/* start parse from filehandle */
+void start_parse_fh( FILE* fh, int limit ) {
+   if (buf) end_parse();
+   lrlimit = ( limit ) ? limit : -1;
+   buf = tsearch2_yy_create_buffer( fh, YY_BUF_SIZE );
+   tsearch2_yy_switch_to_buffer( buf );
+   BEGIN INITIAL;
+}
+
+


diff --git a/contrib/tsearch2/wparser.c b/contrib/tsearch2/wparser.c

new file mode 100644 (file)

index 0000000..deff94c


--- /dev/null
+++ b/contrib/tsearch2/wparser.c
@@ -0,0 +1,529 @@
+/* 
+ * interface functions to parser 
+ * Teodor Sigaev 
+ */
+#include 
+#include 
+#include 
+#include 
+
+#include "postgres.h"
+#include "fmgr.h"
+#include "utils/array.h"
+#include "catalog/pg_type.h"
+#include "executor/spi.h"
+#include "funcapi.h"
+
+#include "wparser.h"
+#include "ts_cfg.h"
+#include "snmap.h"
+#include "common.h"
+
+/*********top interface**********/
+
+static void *plan_getparser=NULL;
+static Oid current_parser_id=InvalidOid;
+
+void
+init_prs(Oid id, WParserInfo *prs) {
+   Oid arg[1]={ OIDOID };
+   bool isnull;
+   Datum pars[1]={ ObjectIdGetDatum(id) };
+   int stat;
+
+   memset(prs,0,sizeof(WParserInfo));
+   SPI_connect();
+   if ( !plan_getparser ) {
+       plan_getparser = SPI_saveplan( SPI_prepare( "select prs_start, prs_nexttoken, prs_end, prs_lextype, prs_headline from pg_ts_parser where oid = $1" , 1, arg ) );
+       if ( !plan_getparser ) 
+           ts_error(ERROR, "SPI_prepare() failed");
+   }
+
+   stat = SPI_execp(plan_getparser, pars, " ", 1);
+   if ( stat < 0 )
+       ts_error (ERROR, "SPI_execp return %d", stat);
+   if ( SPI_processed > 0 ) {
+       Oid oid=InvalidOid;
+       oid=DatumGetObjectId( SPI_getbinval(SPI_tuptable->vals[0], SPI_tuptable->tupdesc, 1, &isnull) );
+       fmgr_info_cxt(oid, &(prs->start_info), TopMemoryContext);
+       oid=DatumGetObjectId( SPI_getbinval(SPI_tuptable->vals[0], SPI_tuptable->tupdesc, 2, &isnull) );
+       fmgr_info_cxt(oid, &(prs->getlexeme_info), TopMemoryContext);
+       oid=DatumGetObjectId( SPI_getbinval(SPI_tuptable->vals[0], SPI_tuptable->tupdesc, 3, &isnull) );
+       fmgr_info_cxt(oid, &(prs->end_info), TopMemoryContext);
+       prs->lextype=DatumGetObjectId( SPI_getbinval(SPI_tuptable->vals[0], SPI_tuptable->tupdesc, 4, &isnull) );
+       oid=DatumGetObjectId( SPI_getbinval(SPI_tuptable->vals[0], SPI_tuptable->tupdesc, 5, &isnull) );
+       fmgr_info_cxt(oid, &(prs->headline_info), TopMemoryContext);
+       prs->prs_id=id;
+   } else 
+       ts_error(ERROR, "No parser with id %d", id);
+   SPI_finish();
+}
+
+typedef struct {
+   WParserInfo *last_prs;
+   int     len;
+   int     reallen;
+   WParserInfo *list;
+   SNMap       name2id_map;
+} PrsList;
+
+static PrsList PList = {NULL,0,0,NULL,{0,0,NULL}};
+
+void    
+reset_prs(void) {
+   freeSNMap( &(PList.name2id_map) );
+   if ( PList.list )
+       free(PList.list);
+   memset(&PList,0,sizeof(PrsList));
+}
+
+static int
+compareprs(const void *a, const void *b) {
+   return ((WParserInfo*)a)->prs_id - ((WParserInfo*)b)->prs_id;
+}
+
+WParserInfo *
+findprs(Oid id) {
+   /* last used prs */
+   if ( PList.last_prs && PList.last_prs->prs_id==id )
+       return PList.last_prs;
+
+   /* already used prs */
+   if ( PList.len != 0 ) {
+       WParserInfo key;
+       key.prs_id=id;
+       PList.last_prs = bsearch(&key, PList.list, PList.len, sizeof(WParserInfo), compareprs);
+       if ( PList.last_prs != NULL )
+           return PList.last_prs;
+   }
+
+   /* last chance */
+   if ( PList.len==PList.reallen ) {
+       WParserInfo *tmp;
+       int reallen = ( PList.reallen ) ? 2*PList.reallen : 16;
+       tmp=(WParserInfo*)realloc(PList.list,sizeof(WParserInfo)*reallen);
+       if ( !tmp ) 
+           ts_error(ERROR,"No memory");
+       PList.reallen=reallen;
+       PList.list=tmp;
+   }
+   PList.last_prs=&(PList.list[PList.len]);
+   init_prs(id, PList.last_prs);
+   PList.len++;
+   qsort(PList.list, PList.len, sizeof(WParserInfo), compareprs);
+   return findprs(id); /* qsort changed order!! */;
+}
+
+static void *plan_name2id=NULL;
+
+Oid
+name2id_prs(text *name) {
+   Oid arg[1]={ TEXTOID };
+   bool isnull;
+   Datum pars[1]={ PointerGetDatum(name) };
+   int stat;
+   Oid id=findSNMap_t( &(PList.name2id_map), name );
+   
+   if ( id ) 
+       return id;
+   
+
+   SPI_connect();
+   if ( !plan_name2id ) {
+       plan_name2id = SPI_saveplan( SPI_prepare( "select oid from pg_ts_parser where prs_name = $1" , 1, arg ) );
+       if ( !plan_name2id ) 
+           ts_error(ERROR, "SPI_prepare() failed");
+   }
+
+   stat = SPI_execp(plan_name2id, pars, " ", 1);
+   if ( stat < 0 )
+       ts_error (ERROR, "SPI_execp return %d", stat);
+   if ( SPI_processed > 0 )
+       id=DatumGetObjectId( SPI_getbinval(SPI_tuptable->vals[0], SPI_tuptable->tupdesc, 1, &isnull) );
+   else 
+       ts_error(ERROR, "No parser '%s'", text2char(name));
+   SPI_finish();
+   addSNMap_t( &(PList.name2id_map), name, id );
+   return id;
+}
+
+
+/******sql-level interface******/
+typedef struct {
+   int     cur;
+   LexDescr    *list;
+} TypeStorage;
+
+static void
+setup_firstcall(FuncCallContext  *funcctx, Oid prsid) {
+   TupleDesc            tupdesc;
+   MemoryContext     oldcontext;
+   TypeStorage     *st;
+   WParserInfo *prs = findprs(prsid); 
+
+   oldcontext = MemoryContextSwitchTo(funcctx->multi_call_memory_ctx);
+
+   st=(TypeStorage*)palloc( sizeof(TypeStorage) );
+   st->cur=0;
+   st->list = (LexDescr*)DatumGetPointer(
+       OidFunctionCall1( prs->lextype, PointerGetDatum(prs->prs) )
+   );
+   funcctx->user_fctx = (void*)st;
+   tupdesc = RelationNameGetTupleDesc("tokentype");
+   funcctx->slot = TupleDescGetSlot(tupdesc);
+   funcctx->attinmeta = TupleDescGetAttInMetadata(tupdesc);
+   MemoryContextSwitchTo(oldcontext);
+}
+
+static Datum
+process_call(FuncCallContext  *funcctx) {
+   TypeStorage     *st;
+
+   st=(TypeStorage*)funcctx->user_fctx;
+   if (  st->list && st->list[st->cur].lexid ) {
+       Datum result;
+       char* values[3];
+       char    txtid[16];
+       HeapTuple    tuple;
+
+       values[0]=txtid;
+       sprintf(txtid,"%d",st->list[st->cur].lexid);
+       values[1]=st->list[st->cur].alias;
+       values[2]=st->list[st->cur].descr;
+
+       tuple = BuildTupleFromCStrings(funcctx->attinmeta, values);
+       result = TupleGetDatum(funcctx->slot, tuple);
+
+       pfree(values[1]);
+       pfree(values[2]);
+       st->cur++;
+       return result;
+   } else {
+       if ( st->list ) pfree(st->list);
+       pfree(st);
+   }
+   return (Datum)0;
+}
+
+PG_FUNCTION_INFO_V1(token_type);
+Datum token_type(PG_FUNCTION_ARGS);
+
+Datum
+token_type(PG_FUNCTION_ARGS) {
+   FuncCallContext  *funcctx;
+   Datum result;
+
+   if (SRF_IS_FIRSTCALL()) { 
+       funcctx = SRF_FIRSTCALL_INIT();
+       setup_firstcall(funcctx, PG_GETARG_OID(0) );
+   }
+
+   funcctx = SRF_PERCALL_SETUP();
+
+   if (  (result=process_call(funcctx)) != (Datum)0 )
+       SRF_RETURN_NEXT(funcctx, result);
+   SRF_RETURN_DONE(funcctx);
+}
+
+PG_FUNCTION_INFO_V1(token_type_byname);
+Datum token_type_byname(PG_FUNCTION_ARGS);
+Datum
+token_type_byname(PG_FUNCTION_ARGS) {
+   FuncCallContext  *funcctx;
+   Datum result;
+
+   if (SRF_IS_FIRSTCALL()) {
+       text *name = PG_GETARG_TEXT_P(0); 
+       funcctx = SRF_FIRSTCALL_INIT();
+       setup_firstcall(funcctx, name2id_prs( name ) );
+       PG_FREE_IF_COPY(name,0);
+   }
+
+   funcctx = SRF_PERCALL_SETUP();
+
+   if (  (result=process_call(funcctx)) != (Datum)0 )
+       SRF_RETURN_NEXT(funcctx, result);
+   SRF_RETURN_DONE(funcctx);
+}
+
+PG_FUNCTION_INFO_V1(token_type_current);
+Datum token_type_current(PG_FUNCTION_ARGS);
+Datum
+token_type_current(PG_FUNCTION_ARGS) {
+   FuncCallContext  *funcctx;
+   Datum result;
+
+   if (SRF_IS_FIRSTCALL()) {
+       funcctx = SRF_FIRSTCALL_INIT();
+       if ( current_parser_id==InvalidOid ) 
+           current_parser_id = name2id_prs( char2text("default") );
+       setup_firstcall(funcctx, current_parser_id );
+   }
+
+   funcctx = SRF_PERCALL_SETUP();
+
+   if (  (result=process_call(funcctx)) != (Datum)0 )
+       SRF_RETURN_NEXT(funcctx, result);
+   SRF_RETURN_DONE(funcctx);
+}
+
+
+PG_FUNCTION_INFO_V1(set_curprs);
+Datum set_curprs(PG_FUNCTION_ARGS);
+Datum
+set_curprs(PG_FUNCTION_ARGS) {
+        findprs(PG_GETARG_OID(0));
+        current_parser_id=PG_GETARG_OID(0);
+        PG_RETURN_VOID();
+}
+
+PG_FUNCTION_INFO_V1(set_curprs_byname);
+Datum set_curprs_byname(PG_FUNCTION_ARGS);
+Datum
+set_curprs_byname(PG_FUNCTION_ARGS) {
+        text *name=PG_GETARG_TEXT_P(0);
+    
+        DirectFunctionCall1(
+                set_curprs,
+                ObjectIdGetDatum( name2id_prs(name) )
+        );
+        PG_FREE_IF_COPY(name, 0);
+        PG_RETURN_VOID();
+}
+
+typedef struct {
+   int type;
+   char    *lexem;
+} LexemEntry;
+
+typedef struct {
+   int cur;
+   int len;
+   LexemEntry  *list;
+} PrsStorage;
+   
+
+static void
+prs_setup_firstcall(FuncCallContext  *funcctx, int prsid, text *txt) {
+   TupleDesc            tupdesc;
+   MemoryContext     oldcontext;
+   PrsStorage  *st;
+   WParserInfo *prs = findprs(prsid); 
+   char    *lex=NULL;
+   int     llen=0, type=0; 
+
+   oldcontext = MemoryContextSwitchTo(funcctx->multi_call_memory_ctx);
+
+   st=(PrsStorage*)palloc( sizeof(PrsStorage) );
+   st->cur=0;
+   st->len=16;
+   st->list=(LexemEntry*)palloc( sizeof(LexemEntry)*st->len );
+
+   prs->prs = (void*)DatumGetPointer(
+       FunctionCall2(
+           &(prs->start_info),
+           PointerGetDatum(VARDATA(txt)),
+           Int32GetDatum(VARSIZE(txt)-VARHDRSZ)
+       )
+   );
+
+   while( ( type=DatumGetInt32(FunctionCall3(
+           &(prs->getlexeme_info),
+           PointerGetDatum(prs->prs),
+           PointerGetDatum(&lex),
+           PointerGetDatum(&llen))) ) != 0 ) {
+
+       if ( st->cur>=st->len ) {
+           st->len=2*st->len;
+           st->list=(LexemEntry*)repalloc(st->list, sizeof(LexemEntry)*st->len);
+       }
+       st->list[st->cur].lexem = palloc(llen+1);
+       memcpy( st->list[st->cur].lexem, lex, llen);
+       st->list[st->cur].lexem[llen]='\0';
+       st->list[st->cur].type=type;
+       st->cur++;
+   }
+       
+   FunctionCall1(
+       &(prs->end_info),
+       PointerGetDatum(prs->prs)
+   );
+
+   st->len=st->cur;
+   st->cur=0;
+   
+   funcctx->user_fctx = (void*)st;
+   tupdesc = RelationNameGetTupleDesc("tokenout");
+   funcctx->slot = TupleDescGetSlot(tupdesc);
+   funcctx->attinmeta = TupleDescGetAttInMetadata(tupdesc);
+   MemoryContextSwitchTo(oldcontext);
+}
+
+static Datum
+prs_process_call(FuncCallContext  *funcctx) {
+   PrsStorage  *st;
+
+   st=(PrsStorage*)funcctx->user_fctx;
+   if (  st->cur < st->len ) {
+       Datum result;
+       char* values[2];
+       char    tid[16];
+       HeapTuple    tuple;
+
+       values[0]=tid;
+       sprintf(tid,"%d",st->list[st->cur].type);
+       values[1]=st->list[st->cur].lexem;
+       tuple = BuildTupleFromCStrings(funcctx->attinmeta, values);
+       result = TupleGetDatum(funcctx->slot, tuple);
+
+       pfree(values[1]);
+       st->cur++;
+       return result;
+   } else {
+       if ( st->list ) pfree(st->list);
+       pfree(st);
+   }
+   return (Datum)0;
+}
+
+           
+
+PG_FUNCTION_INFO_V1(parse);
+Datum parse(PG_FUNCTION_ARGS);
+Datum
+parse(PG_FUNCTION_ARGS) {
+   FuncCallContext  *funcctx;
+   Datum result;
+
+   if (SRF_IS_FIRSTCALL()) {
+       text *txt = PG_GETARG_TEXT_P(1); 
+       funcctx = SRF_FIRSTCALL_INIT();
+       prs_setup_firstcall(funcctx, PG_GETARG_OID(0),txt );
+       PG_FREE_IF_COPY(txt,1);
+   }
+
+   funcctx = SRF_PERCALL_SETUP();
+
+   if (  (result=prs_process_call(funcctx)) != (Datum)0 )
+       SRF_RETURN_NEXT(funcctx, result);
+   SRF_RETURN_DONE(funcctx);
+}
+
+PG_FUNCTION_INFO_V1(parse_byname);
+Datum parse_byname(PG_FUNCTION_ARGS);
+Datum
+parse_byname(PG_FUNCTION_ARGS) {
+   FuncCallContext  *funcctx;
+   Datum result;
+
+   if (SRF_IS_FIRSTCALL()) {
+       text *name = PG_GETARG_TEXT_P(0); 
+       text *txt = PG_GETARG_TEXT_P(1); 
+       funcctx = SRF_FIRSTCALL_INIT();
+       prs_setup_firstcall(funcctx, name2id_prs( name ),txt );
+       PG_FREE_IF_COPY(name,0);
+       PG_FREE_IF_COPY(txt,1);
+   }
+
+   funcctx = SRF_PERCALL_SETUP();
+
+   if (  (result=prs_process_call(funcctx)) != (Datum)0 )
+       SRF_RETURN_NEXT(funcctx, result);
+   SRF_RETURN_DONE(funcctx);
+}
+
+
+PG_FUNCTION_INFO_V1(parse_current);
+Datum parse_current(PG_FUNCTION_ARGS);
+Datum
+parse_current(PG_FUNCTION_ARGS) {
+   FuncCallContext  *funcctx;
+   Datum result;
+
+   if (SRF_IS_FIRSTCALL()) {
+       text *txt = PG_GETARG_TEXT_P(0); 
+       funcctx = SRF_FIRSTCALL_INIT();
+       if ( current_parser_id==InvalidOid ) 
+           current_parser_id = name2id_prs( char2text("default") );
+       prs_setup_firstcall(funcctx, current_parser_id,txt );
+       PG_FREE_IF_COPY(txt,0);
+   }
+
+   funcctx = SRF_PERCALL_SETUP();
+
+   if (  (result=prs_process_call(funcctx)) != (Datum)0 )
+       SRF_RETURN_NEXT(funcctx, result);
+   SRF_RETURN_DONE(funcctx);
+}
+
+PG_FUNCTION_INFO_V1(headline);
+Datum headline(PG_FUNCTION_ARGS);
+Datum
+headline(PG_FUNCTION_ARGS) {
+   TSCfgInfo *cfg=findcfg(PG_GETARG_OID(0));
+   text       *in = PG_GETARG_TEXT_P(1);
+   QUERYTYPE  *query = (QUERYTYPE *) DatumGetPointer(PG_DETOAST_DATUM(PG_GETARG_DATUM(2)));
+   text       *opt=( PG_NARGS()>3 && PG_GETARG_POINTER(3) ) ? PG_GETARG_TEXT_P(3) : NULL;
+   HLPRSTEXT   prs;
+   text *out;
+   WParserInfo *prsobj = findprs(cfg->prs_id);
+
+   memset(&prs,0,sizeof(HLPRSTEXT));
+   prs.lenwords = 32;
+   prs.words = (HLWORD *) palloc(sizeof(HLWORD) * prs.lenwords);
+   hlparsetext(cfg, &prs, query, VARDATA(in), VARSIZE(in) - VARHDRSZ);
+
+
+   FunctionCall3(
+       &(prsobj->headline_info),
+       PointerGetDatum(&prs),
+       PointerGetDatum(opt),
+       PointerGetDatum(query)
+   );
+
+   out = genhl(&prs);
+
+   PG_FREE_IF_COPY(in,1);
+   PG_FREE_IF_COPY(query,2);
+   if ( opt ) PG_FREE_IF_COPY(opt,3);
+   pfree(prs.words);
+   pfree(prs.startsel);
+   pfree(prs.stopsel);
+
+   PG_RETURN_POINTER(out);
+}
+
+
+PG_FUNCTION_INFO_V1(headline_byname);
+Datum headline_byname(PG_FUNCTION_ARGS);
+Datum
+headline_byname(PG_FUNCTION_ARGS) {
+   text *cfg=PG_GETARG_TEXT_P(0);
+
+   Datum out=DirectFunctionCall4(
+       headline,
+       ObjectIdGetDatum(name2id_cfg( cfg ) ),
+       PG_GETARG_DATUM(1),
+       PG_GETARG_DATUM(2),
+       ( PG_NARGS()>3 ) ? PG_GETARG_DATUM(3) : PointerGetDatum(NULL)
+   );
+
+   PG_FREE_IF_COPY(cfg,0);
+   PG_RETURN_DATUM(out);   
+}
+
+PG_FUNCTION_INFO_V1(headline_current);
+Datum headline_current(PG_FUNCTION_ARGS);
+Datum
+headline_current(PG_FUNCTION_ARGS) {
+   PG_RETURN_DATUM(DirectFunctionCall4(
+       headline,
+       ObjectIdGetDatum(get_currcfg()),
+       PG_GETARG_DATUM(0),
+       PG_GETARG_DATUM(1),
+       ( PG_NARGS()>2 ) ? PG_GETARG_DATUM(2) : PointerGetDatum(NULL)
+   ));
+}
+
+
+


diff --git a/contrib/tsearch2/wparser.h b/contrib/tsearch2/wparser.h

new file mode 100644 (file)

index 0000000..a8afc56


--- /dev/null
+++ b/contrib/tsearch2/wparser.h
@@ -0,0 +1,28 @@
+#ifndef __WPARSER_H__
+#define __WPARSER_H__
+#include "postgres.h"
+#include "fmgr.h"
+
+typedef struct {
+   Oid prs_id;
+   FmgrInfo start_info;
+   FmgrInfo getlexeme_info;
+   FmgrInfo end_info;
+   FmgrInfo headline_info;
+   Oid lextype;
+   void *prs;
+} WParserInfo;
+
+void init_prs(Oid id, WParserInfo *prs);
+WParserInfo* findprs(Oid id);
+Oid name2id_prs(text *name);
+void   reset_prs(void);
+
+
+typedef struct {
+   int lexid;
+   char    *alias;
+   char    *descr;
+} LexDescr;
+
+#endif


diff --git a/contrib/tsearch2/wparser_def.c b/contrib/tsearch2/wparser_def.c

new file mode 100644 (file)

index 0000000..eec8b03


--- /dev/null
+++ b/contrib/tsearch2/wparser_def.c
@@ -0,0 +1,291 @@
+/* 
+ * default word parser 
+ * Teodor Sigaev 
+ */
+#include 
+#include 
+#include 
+
+#include "postgres.h"
+#include "utils/builtins.h"
+
+#include "dict.h"
+#include "wparser.h"
+#include "common.h"
+#include "ts_cfg.h"
+#include "wordparser/parser.h"
+#include "wordparser/deflex.h"
+
+PG_FUNCTION_INFO_V1(prsd_lextype);
+Datum prsd_lextype(PG_FUNCTION_ARGS);
+
+Datum 
+prsd_lextype(PG_FUNCTION_ARGS) {
+   LexDescr *descr=(LexDescr*)palloc(sizeof(LexDescr)*(LASTNUM+1));
+   int i;
+
+   for(i=1;i<=LASTNUM;i++) {
+       descr[i-1].lexid = i;
+       descr[i-1].alias = pstrdup(tok_alias[i]);
+       descr[i-1].descr = pstrdup(lex_descr[i]);
+   }
+   
+   descr[LASTNUM].lexid=0;
+       
+   PG_RETURN_POINTER(descr);
+}
+
+PG_FUNCTION_INFO_V1(prsd_start);
+Datum prsd_start(PG_FUNCTION_ARGS);
+Datum 
+prsd_start(PG_FUNCTION_ARGS) {
+   start_parse_str( (char*)PG_GETARG_POINTER(0), PG_GETARG_INT32(1) );
+   PG_RETURN_POINTER(NULL);
+}
+
+PG_FUNCTION_INFO_V1(prsd_getlexeme);
+Datum prsd_getlexeme(PG_FUNCTION_ARGS);
+Datum 
+prsd_getlexeme(PG_FUNCTION_ARGS) {
+   /* ParserState *p=(ParserState*)PG_GETARG_POINTER(0); */
+   char **t=(char**)PG_GETARG_POINTER(1); 
+   int *tlen=(int*)PG_GETARG_POINTER(2);
+   int  type=tsearch2_yylex();
+
+   *t = token;
+   *tlen = tokenlen;
+   PG_RETURN_INT32(type);
+}
+
+PG_FUNCTION_INFO_V1(prsd_end);
+Datum prsd_end(PG_FUNCTION_ARGS);
+Datum 
+prsd_end(PG_FUNCTION_ARGS) {
+   /* ParserState *p=(ParserState*)PG_GETARG_POINTER(0); */
+   end_parse();
+   PG_RETURN_VOID();
+}
+
+#define LEAVETOKEN(x)  ( (x)==12 )
+#define COMPLEXTOKEN(x)    ( (x)==5 || (x)==15 || (x)==16 || (x)==17 )
+#define ENDPUNCTOKEN(x)    ( (x)==12 )
+
+
+#define IDIGNORE(x) ( (x)==13 || (x)==14 || (x)==12 || (x)==23 )
+#define HLIDIGNORE(x) ( (x)==5 || (x)==13 || (x)==15 || (x)==16 || (x)==17 )
+#define NONWORDTOKEN(x)    ( (x)==12 || HLIDIGNORE(x) )
+#define NOENDTOKEN(x)  ( NONWORDTOKEN(x) || (x)==7 || (x)==8 || (x)==20 || (x)==21 || (x)==22 || IDIGNORE(x) )
+
+typedef struct {
+   HLWORD  *words;
+   int len;
+} hlCheck;
+
+static bool
+checkcondition_HL(void *checkval, ITEM *val) {
+   int i;
+   for(i=0;i<((hlCheck*)checkval)->len;i++) {
+       if ( ((hlCheck*)checkval)->words[i].item==val )
+           return true;
+   }
+   return false;
+}
+
+
+static bool
+hlCover(HLPRSTEXT *prs, QUERYTYPE *query, int *p, int *q) {
+   int i,j;
+   ITEM    *item=GETQUERY(query);
+   int pos=*p;
+   *q=0;
+   *p=0x7fffffff;
+
+   for(j=0;jsize;j++) {
+       if ( item->type != VAL ) {
+           item++;
+           continue;
+       }
+       for(i=pos;icurwords;i++) {
+           if ( prs->words[i].item == item ) {
+               if ( i>*q) 
+                   *q = i;
+               break;
+           }
+       }
+       item++;
+   }
+
+   if ( *q==0 )
+       return false;
+
+   item=GETQUERY(query);
+   for(j=0;jsize;j++) {
+       if ( item->type != VAL ) {
+           item++;
+           continue;
+       }
+       for(i=*q;i>=pos;i--) {
+           if ( prs->words[i].item == item ) {
+               if ( i<*p )
+                   *p=i;
+               break;
+           }
+       }
+       item++;
+   }   
+
+   if ( *p<=*q ) {
+       hlCheck ch={ &(prs->words[*p]), *q-*p+1 };
+       if ( TS_execute(GETQUERY(query), &ch, false, checkcondition_HL) ) { 
+           return true;
+       } else {
+           (*p)++;
+           return hlCover(prs,query,p,q);
+       }
+   }
+
+   return false;
+}
+
+PG_FUNCTION_INFO_V1(prsd_headline);
+Datum prsd_headline(PG_FUNCTION_ARGS);
+Datum 
+prsd_headline(PG_FUNCTION_ARGS) {
+   HLPRSTEXT   *prs=(HLPRSTEXT*)PG_GETARG_POINTER(0);
+   text    *opt=(text*)PG_GETARG_POINTER(1); /* can't be toasted */
+   QUERYTYPE   *query=(QUERYTYPE*)PG_GETARG_POINTER(2); /* can't be toasted */
+   /* from opt + start and and tag */
+   int min_words=15;   
+   int max_words=35;   
+   int shortword=3;    
+
+   int p=0,q=0;
+   int bestb=-1,beste=-1;
+   int bestlen=-1;
+   int pose=0, poslen, curlen;
+
+   int i;
+
+   /*config*/
+   prs->startsel=NULL;
+   prs->stopsel=NULL;
+   if ( opt ) {
+       Map *map,*mptr;
+       
+       parse_cfgdict(opt,&map);
+       mptr=map;
+
+       while(mptr && mptr->key) {
+           if ( strcasecmp(mptr->key,"MaxWords")==0 )
+               max_words=pg_atoi(mptr->value,4,1);
+           else if ( strcasecmp(mptr->key,"MinWords")==0 )
+               min_words=pg_atoi(mptr->value,4,1);
+           else if ( strcasecmp(mptr->key,"ShortWord")==0 )
+               shortword=pg_atoi(mptr->value,4,1);
+           else if ( strcasecmp(mptr->key,"StartSel")==0 )
+               prs->startsel=pstrdup(mptr->value);
+           else if ( strcasecmp(mptr->key,"StopSel")==0 )
+               prs->stopsel=pstrdup(mptr->value);
+               
+           pfree(mptr->key);
+           pfree(mptr->value);
+
+           mptr++;
+       }
+       pfree(map);
+
+       if ( min_words >= max_words )
+           elog(ERROR,"Must be MinWords < MaxWords");
+       if ( min_words<=0 )
+           elog(ERROR,"Must be MinWords > 0");
+       if ( shortword<0 )
+           elog(ERROR,"Must be ShortWord >= 0");
+   }
+
+   while( hlCover(prs,query,&p,&q) ) {
+       /* find cover len in words */
+       curlen=0;
+       poslen=0;
+       for(i=p;i<=q && curlen < max_words ; i++) {
+           if ( !NONWORDTOKEN(prs->words[i].type) ) 
+               curlen++;
+           if ( prs->words[i].item && !prs->words[i].repeated )
+               poslen++; 
+           pose=i;
+       }
+
+       if ( poslenwords[beste].type) || prs->words[beste].len <= shortword) ) { 
+           /* best already finded, so try one more cover */
+           p++;
+           continue;
+       }
+
+       if ( curlen < max_words ) { /* find good end */
+           for(i=i-1 ;icurwords && curlen
+               if ( i!=q ) {
+                   if ( !NONWORDTOKEN(prs->words[i].type) ) 
+                       curlen++;
+                   if ( prs->words[i].item && !prs->words[i].repeated )
+                       poslen++;
+               }
+               pose=i;
+               if ( NOENDTOKEN(prs->words[i].type) || prs->words[i].len <= shortword ) 
+                   continue;
+               if ( curlen>=min_words )    
+                   break;
+           }
+       } else { /* shorter cover :((( */
+           for(;curlen>min_words;i--) {
+               if ( !NONWORDTOKEN(prs->words[i].type) ) 
+                   curlen--;
+               if ( prs->words[i].item && !prs->words[i].repeated )
+                   poslen--;
+               pose=i;
+               if ( NOENDTOKEN(prs->words[i].type) || prs->words[i].len <= shortword ) 
+                   continue;
+               break;
+           }
+       }
+
+       if ( bestlen <0 || (poslen>bestlen && !(NOENDTOKEN(prs->words[pose].type) || prs->words[pose].len <= shortword)) || 
+               ( bestlen>=0 && !(NOENDTOKEN(prs->words[pose].type)  || prs->words[pose].len <= shortword) && 
+                   (NOENDTOKEN(prs->words[beste].type) || prs->words[beste].len <= shortword) ) ) {
+           bestb=p; beste=pose;
+           bestlen=poslen;
+       } 
+
+       p++;
+   }
+
+   if ( bestlen<0 ) {
+       curlen=0;
+       poslen=0;
+       for(i=0;icurwords && curlen
+           if ( !NONWORDTOKEN(prs->words[i].type) ) 
+               curlen++;
+           pose=i;
+       }
+       bestb=0; beste=pose;
+   }
+
+   for(i=bestb;i<=beste;i++) {
+       if ( prs->words[i].item )
+           prs->words[i].selected=1;
+       if ( prs->words[i].repeated )
+           prs->words[i].skip=1;
+       if ( HLIDIGNORE(prs->words[i].type) )
+           prs->words[i].replace=1;
+
+       prs->words[i].in=1;
+   }
+
+   if (!prs->startsel)
+       prs->startsel=pstrdup("");

+   if (!prs->stopsel)
+       prs->stopsel=pstrdup("");
+        prs->startsellen=strlen(prs->startsel);
+   prs->stopsellen=strlen(prs->stopsel);
+
+   PG_RETURN_POINTER(prs);
+}
+




This is the main PostgreSQL git repository.
RSS
Atom}}}}}}}
+    "http://www.sai.msu.su/~megera/postgres/gist/tsearch/V2/docs/">[tsearch
+    documentation home].
+
+    ACKNOWLEDGEMENTS
+
+    Robert John Shepherd originally wrote this documentation for
+    the previous version of tsearch module (v1) included with the
+    postgres release. I took his documentation and updated it to
+    comply with the tsearch2 modifications.
+
+    Robert's original acknowledgements:
+
+    "Thanks to Oleg Bartunov for taking the time to answer many
+    of my questions regarding this module, and also to Teodor
+    Sigaev for clearing up the process of making your own
+    dictionaries. Plus of course a big thanks to the pair of them
+    for writing this module in the first place!"
+
+    I would also like to extend my thanks to the developers, and
+    Oleg Bartunov for all of his direction and help with the new
+    features of tsearch2.
+
+    OVERVIEW
+
+    MS-SQL provides a full text indexing (FTI) system which
+    enables the fast searching of text based fields, very useful
+    for websites (and other applications) that require a results
+    set based on key words. PostgreSQL ships with a contributed
+    module called tsearch2, which implements a special type of
+    index that can also be used for full text indexing. Further
+    more, unlike MS' offering which requires regular incremental
+    rebuilds of the text indexes themselves, tsearch2 indexes are
+    always up-to-date and keeping them so induces very little
+    overhead.
+
+    Before we get into the details, it is recommended that you
+    have installed and tested PostgreSQL, are reasonably familiar
+    with databases, the SQL query language and also understand the
+    basics of connecting to PostgreSQL from the local shell. This
+    document isn't intended for the complete PostgreSQL newbie, but
+    anyone with a reasonable grasp of the basics should be able to
+    follow it.
+
+    INSTALLATION
+
+    Starting with PostgreSQL version 7.4 tsearch2 is now
+    included in the contrib directory with the PostgreSQL sources.
+    contrib/tsearch2 is where you will find everything needed to
+    install and use tsearch2. Please note that tsearch2 will also
+    work with PostgreSQL version 7.3.x, but it is not the module
+    included with the source distribution. You will have to
+    download the module separately and install it in the same
+    fashion.
+
+    I installed the tsearch2 module to a PostgreSQL 7.3 database
+    from the contrib directory without squashing the original (old)
+    tsearch module. What I did was move the modules tsearch src
+    driectory into the contrib tree under the name tsearchV2.
+
+    Step one is to download the tsearch V2 module :
+
+    
+    "http://www.sai.msu.su/~megera/postgres/gist/tsearch/V2/">[http://www.sai.msu.su/~megera/postgres/gist/tsearch/V2/]
+    (check Development History for latest stable version !)
+    
+        tar -zxvf tsearch-v2.tar.gz
+        mv tsearch2 PGSQL_SRC/contrib/
+        cd PGSQL_SRC/contrib/tsearch2
+
+
+    If you are installing from PostgreSQL version 7.4 or higher,
+    you can skip those steps and just change to the
+    contrib/tsearch2 directory in the source tree and continue from
+    there.
+
+    Then continue with the regular building and installation
+    process
+    
+        gmake
+        gmake install
+        gmake installcheck
+
+
+    That is pretty much all you have to do, unless of course you
+    get errors. However if you get those, you better go check with
+    the mailing lists over at 
+    "http://www.postgresql.org">http://www.postgresql.org or
+    
+    "http://openfts.sourceforge.net/">http://openfts.sourceforge.net/
+    since its never failed for me.
+
+    The directory in the contib/ and the directory from the
+    archive is called tsearch2. Tsearch2 is completely incompatible
+    with the previous version of tsearch. This means that both
+    versions can be installed into a single database, and migration
+    the new version may be much easier.
+
+    NOTE: the previous version of tsearch found in the
+    contrib/tsearch directory is depricated. ALthough it is still
+    available and included within PostgreSQL version 7.4. It will
+    be removed in version 7.5.
+
+    ADDING TSEARCH2 FUNCTIONALITY TO A DATABASE
+
+    We should create a database to use as an example for the
+    remainder of this file. We can call the database "ftstest". You
+    can create it from the command line like this:
+    
+        #createdb ftstest
+
+
+    If you thought installation was easy, this next bit is even
+    easier. Change to the PGSQL_SRC/contrib/tsearch2 directory and
+    type:
+    
+        psql ftstest < tsearch2.sql
+
+
+    The file "tsearch2.sql" holds all the wonderful little
+    goodies you need to do full text indexing. It defines numerous
+    functions and operators, and creates the needed tables in the
+    database. There will be 4 new tables created after running the
+    tsearch2.sql file : pg_ts_dict, pg_ts_parser, pg_ts_cfg,
+    pg_ts_cfgmap are added.
+
+    You can check out the tables if you like:
+    
+        #psql ftstest
+        ftstest=# \d
+                    List of relations
+         Schema |     Name     | Type  |  Owner
+        --------+--------------+-------+----------
+         public | pg_ts_cfg    | table | kopciuch
+         public | pg_ts_cfgmap | table | kopciuch
+         public | pg_ts_dict   | table | kopciuch
+         public | pg_ts_parser | table | kopciuch
+        (4 rows)
+
+
+    TYPES AND FUNCTIONS PROVIDED BY TSEARCH2
+
+    The first thing we can do is try out some of the types that
+    are provided for us. Lets look at the tsvector type provided
+    for us:
+    
+        SELECT 'Our first string used today'::tsvector;
+                        tsvector
+        ---------------------------------------
+         'Our' 'used' 'first' 'today' 'string'
+        (1 row)
+
+
+    The results are the words used within our string. Notice
+    they are not in any particular order. The tsvector type returns
+    a string of space separated words.
+    
+        SELECT 'Our first string used today first string'::tsvector;
+                            tsvector
+        -----------------------------------------------
+         'Our' 'used' 'again' 'first' 'today' 'string'
+        (1 row)
+
+
+    Notice the results string has each unique word ('first' and
+    'string' only appear once in the tsvector value). Which of
+    course makes sense if you are searching the full text ... you
+    only need to know each unique word in the text.
+
+    Those examples were just casting a text field to that of
+    type tsvector. Lets check out one of the new functions created
+    by the tsearch2 module.
+
+    The function to_tsvector has 3 possible signatures:
+    
+        to_tsvector(oid, text);
+        to_tsvector(text, text);
+        to_tsvector(text);
+
+
+    We will use the second method using two text fields. The
+    overloaded methods provide us with a way to specifiy the way
+    the searchable text is broken up into words (Stemming process).
+    Right now we will specify the 'default' configuration. See the
+    section on TSEARCH2 CONFIGURATION to learn more about this.
+    
+        SELECT to_tsvector('default',
+                           'Our first string used today first string');
+                        to_tsvector
+        --------------------------------------------
+         'use':4 'first':2,6 'today':5 'string':3,7
+        (1 row)
+
+
+    The result returned from this function is of type tsvector.
+    The results came about by this reasoning: All of the words in
+    the text passed in are stemmed, or not used because they are
+    stop words defined in our configuration. Each lower case
+    morphed word is returned with all of the positons in the
+    text.
+
+    In this case the word "Our" is a stop word in the default
+    configuration. That means it will not be included in the
+    result. The word "first" is found at positions 2 and 6
+    (although "Our" is a stop word, it's position is maintained).
+    The word(s) positioning is maintained exactly as in the
+    original string. The word "used" is morphed to the word "use"
+    based on the default configuration for word stemming, and is
+    found at position 4. The rest of the results follow the same
+    logic. Just a reminder again ... the order of the 'word'
+    position in the output is not in any kind of order. (ie 'use':4
+    appears first)
+
+    If you want to view the output of the tsvector fields
+    without their positions, you can do so with the function
+    "strip(tsvector)".
+    
+        SELECT strip(to_tsvector('default',
+                     'Our first string used today first string'));
+                    strip
+        --------------------------------
+         'use' 'first' 'today' 'string'
+
+
+    If you wish to know the number of unique words returned in
+    the tsvector you can do so by using the function
+    "length(tsvector)"
+    
+        SELECT length(to_tsvector('default',
+                      'Our first string used today first string'));
+         length
+        --------
+              4
+        (1 row)
+
+
+    Lets take a look at the function to_tsquery. It also has 3
+    signatures which follow the same rational as the to_tsvector
+    function:
+    
+        to_tsquery(oid, text);
+        to_tsquery(text, text);
+        to_tsquery(text);
+
+
+    Lets try using the function with a single word :
+    
+        SELECT to_tsquery('default', 'word');
+         to_tsquery
+        -----------
+         'word'
+         (1 row)
+
+
+    I call the function the same way I would a to_tsvector
+    function, specifying the 'default' configuration for morphing,
+    and the result is the stemmed output 'word'.
+
+    Lets attempt to use the function with a string of multiple
+    words:
+    
+        SELECT to_tsquery('default', 'this is many words');
+        ERROR:  Syntax error
+
+
+    The function can not accept a space separated string. The
+    intention of the to_tsquery function is to return a type of
+    "tsquery" used for searching a tsvector field. What we need to
+    do is search for one to many words with some kind of logic (for
+    now simple boolean).
+    
+        SELECT to_tsquery('default', 'searching|sentence');
+              to_tsquery
+        ----------------------
+         'search' | 'sentenc'
+        (1 row)
+
+
+    Notice that the words are separated by the boolean logic
+    "OR", the text could contain boolean operators &,|,!,()
+    with their usual meaning.
+
+    You can not use words defined as being a stop word in your
+    configuration. The function will not fail ... you will just get
+    no result, and a NOTICE like this:
+    
+        SELECT to_tsquery('default', 'a|is&not|!the');
+        NOTICE:  Query contains only stopword(s)
+                 or doesn't contain lexem(s), ignored
+         to_tsquery
+        -----------
+        (1 row)
+
+
+    That is a beginning to using the types, and functions
+    defined in the tsearch2 module. There are numerous more
+    functions that I have not touched on. You can read through the
+    tsearch2.sql file built when compiling to get more familiar
+    with what is included.
+
+    INDEXING FIELDS IN A TABLE
+
+    The next stage is to add a full text index to an existing
+    table. In this example we already have a table defined as
+    follows:
+    
+        CREATE TABLE tblMessages
+        (
+                intIndex        int4,
+                strTopic        varchar(100),
+                strMessage      text
+        );
+
+
+    We are assuming there are several rows with some kind of
+    data in them. Any data will do, just do several inserts with
+    test strings for a topic, and a message. here is some test data
+    I inserted. (yes I know it's completely useless stuff ;-) but
+    it will serve our purpose right now).
+    
+        INSERT INTO tblMessages
+               VALUES ('1', 'Testing Topic', 'Testing message data input');
+        INSERT INTO tblMessages
+               VALUES ('2', 'Movie', 'Breakfast at Tiffany\'s');
+        INSERT INTO tblMessages
+               VALUES ('3', 'Famous Author', 'Stephen King');
+        INSERT INTO tblMessages
+               VALUES ('4', 'Political Topic',
+                            'Nelson Mandella is released from prison');
+        INSERT INTO tblMessages
+               VALUES ('5', 'Nursery rhyme phrase',
+                            'Little jack horner sat in a corner');
+        INSERT INTO tblMessages
+               VALUES ('6', 'Gettysburg address quotation',
+                            'Four score and seven years ago'
+                            ' our fathers brought forth on this'
+                            ' continent a new nation, conceived in'
+                            ' liberty and dedicated to the proposition'
+                            ' that all men are created equal');
+        INSERT INTO tblMessages
+               VALUES ('7', 'Classic Rock Bands',
+                            'Led Zeppelin Grateful Dead and The Sex Pistols');
+        INSERT INTO tblMessages
+               VALUES ('8', 'My birth address',
+                            '18 Sommervile road, Regina, Saskatchewan');
+        INSERT INTO tblMessages
+               VALUES ('9', 'Joke', 'knock knock : who\'s there?'
+                                    ' I will not finish this joke');
+        INSERT INTO tblMessages
+               VALUES ('10', 'Computer information',
+                             'My computer is a pentium III 400 mHz'
+                             ' with 192 megabytes of RAM');
+
+
+    The next stage is to create a special text index which we
+    will use for FTI, so we can search our table of messages for
+    words or a phrase. We do this using the SQL command:
+    
+        ALTER TABLE tblMessages ADD idxFTI tsvector;
+
+
+    Note that unlike traditional indexes, this is actually a new
+    field in the same table, which is then used (through the magic
+    of the tsearch2 operators and functions) by a special index we
+    will create in a moment.
+
+    The general rule for the initial insertion of data will
+    follow four steps:
+    
+    1. update table
+    2. vacuum full analyze
+    3. create index
+    4. vacuum full analyze
+
+
+    The data can be updated into the table, the vacuum full
+    analyze will reclaim unused space. The index can be created on
+    the table after the data has been inserted. Having the index
+    created prior to the update will slow down the process. It can
+    be done in that manner, this way is just more efficient. After
+    the index has been created on the table, vacuum full analyze is
+    run again to update postgres's statistics (ie having the index
+    take effect).
+    
+        UPDATE tblMessages SET idxFTI=to_tsvector('default', strMessage);
+        VACUUM FULL ANALYZE;
+
+
+    Note that this only inserts the field strMessage as a
+    tsvector, so if you want to also add strTopic to the
+    information stored, you should instead do the following, which
+    effectively concatenates the two fields into one before being
+    inserted into the table:
+    
+        UPDATE tblMessages
+            SET idxFTI=to_tsvector('default',coalesce(strTopic,'') ||' '|| coalesce(strMessage,''));
+        VACUUM FULL ANALYZE;
+
+
+    Using the coalesce function makes sure this

+    concatenation also works with NULL fields.
+
+    We need to create the index on the column idxFTI. Keep in
+    mind that the database will update the index when some action
+    is taken. In this case we _need_ the index (The whole point of
+    Full Text INDEXINGi ;-)), so don't worry about any indexing
+    overhead. We will create an index based on the gist function.
+    GiST is an index structure for Generalized Search Tree.
+    
+        CREATE INDEX idxFTI_idx ON tblMessages USING gist(idxFTI);
+        VACUUM FULL ANALYZE;
+
+
+    After you have converted all of your data and indexed the
+    column, you can select some rows to see what actually happened.
+    I will not display output here but you can play around
+    yourselves and see what happened.
+
+    The last thing to do is set up a trigger so every time a row
+    in this table is changed, the text index is automatically
+    updated. This is easily done using:
+    
+        CREATE TRIGGER tsvectorupdate BEFORE UPDATE OR INSERT ON tblMessages
+            FOR EACH ROW EXECUTE PROCEDURE tsearch2(idxFTI, strMessage);
+
+
+    Or if you are indexing both strMessage and strTopic you
+    should instead do:
+    
+        CREATE TRIGGER tsvectorupdate BEFORE UPDATE OR INSERT ON tblMessages
+            FOR EACH ROW EXECUTE PROCEDURE
+                tsearch2(idxFTI, strTopic, strMessage);
+
+
+    Before you ask, the tsearch2 function accepts multiple
+    fields as arguments so there is no need to concatenate the two
+    into one like we did before.
+
+    If you want to do something specific with columns, you may
+    write your very own trigger function using plpgsql or other
+    procedural languages (but not SQL, unfortunately) and use it
+    instead of tsearch2 trigger.
+
+    You could however call other stored procedures from within
+    the tsearch2 function. Lets say we want to create a function to
+    remove certain characters (like the @ symbol from all
+    text).
+    
+       CREATE FUNCTION dropatsymbol(text) 
+                     RETURNS text AS 'select replace($1, \'@\', \' \');' LANGUAGE SQL;
+
+
+    Now we can use this function within the tsearch2 function on
+    the trigger.
+    
+      DROP TRIGGER tsvectorupdate ON tblmessages;
+        CREATE TRIGGER tsvectorupdate BEFORE UPDATE OR INSERT ON tblMessages
+            FOR EACH ROW EXECUTE PROCEDURE tsearch2(idxFTI, dropatsymbol, strMessage);
+        INSERT INTO tblmessages VALUES (69, 'Attempt for dropatsymbol', '[email protected]');
+
+
+    If at this point you receive an error stating: ERROR: Can't
+    find tsearch config by locale
+
+    Do not worry. You have done nothing wrong. And tsearch2 is
+    not broken. All that has happened here is that the
+    configuration is setup to use a configuration based on the
+    locale of the server. All you have to do is change your default
+    configuration, or add a new one for your specific locale. See
+    the section on TSEARCH2 CONFIGURATION.
+    
+   SELECT * FROM tblmessages WHERE intindex = 69;
+
+         intindex |         strtopic         |  strmessage   |        idxfti
+        ----------+--------------------------+---------------+-----------------------   
+                69 | Attempt for dropatsymbol | [email protected] | 'test':1 'test.com':2
+        (1 row)
+Notice that the string content was passed throught the stored
+procedure dropatsymbol. The '@' character was replaced with a
+single space ... and the output from the procedure was then stored
+in the tsvector column.
+
+    This could be useful for removing other characters from
+    indexed text, or any kind of preprocessing needed to be done on
+    the text prior to insertion into the index.
+
+    QUERYING A TABLE
+
+    There are some examples in the README.tsearch2 file for
+    querying a table. One major difference between tsearch and
+    tsearch2 is the operator ## is no longer available. Only the
+    operator @@ is defined, using the types tsvector on one side
+    and tsquery on the other side.
+
+    Lets search the indexed data for the word "Test". I indexed
+    based on the the concatenation of the strTopic, and the
+    strMessage:
+    
+        SELECT intindex, strtopic FROM tblmessages
+                                  WHERE idxfti @@ 'test'::tsquery;
+         intindex |   strtopic
+        ----------+---------------
+                1 | Testing Topic
+        (1 row)
+
+
+    The only result that matched was the row with a topic
+    "Testing Topic". Notice that the word I search for was all
+    lowercase. Let's see what happens when I query for uppercase
+    "Test".
+    
+        SELECT intindex, strtopic FROM tblmessages
+                                  WHERE idxfti @@ 'Test'::tsquery;
+         intindex | strtopic
+        ----------+----------
+        (0 rows)
+
+
+    We get zero rows returned. The reason is because when the
+    text was inserted, it was morphed to my default configuration
+    (because of the call to to_tsvector in the UPDATE statement).
+    If there was no morphing done, and the tsvector field(s)
+    contained the word 'Text', a match would have been found.
+
+    Most likely the best way to query the field is to use the
+    to_tsquery function on the right hand side of the @@ operator
+    like this:
+    
+        SELECT intindex, strtopic FROM tblmessages
+               WHERE idxfti @@ to_tsquery('default', 'Test | Zeppelin');
+         intindex |      strtopic
+        ----------+--------------------
+                1 | Testing Topic
+                7 | Classic Rock Bands
+        (2 rows)
+
+
+    That query searched for all instances of "Test" OR
+    "Zeppelin". It returned two rows: the "Testing Topic" row, and
+    the "Classic Rock Bands" row. The to_tsquery function performed
+    the correct morphology upon the parameters, and searched the
+    tsvector field appropriately.
+
+    The last example here relates to searching for a phrase, for
+    example "minority report". This poses a problem with regard to
+    tsearch2, as it doesn't index phrases, only words. But there is
+    a way around which doesn't appear to have a significant impact
+    on query time, and that is to use a query such as the
+    following:
+    
+        SELECT intindex, strTopic FROM tblmessages
+                WHERE idxfti @@ to_tsquery('default', 'gettysburg & address')
+                AND strMessage ~* '.*men are created equal.*';
+         intindex |           strtopic
+        ----------+------------------------------
+                6 | Gettysburg address quotation
+        (1 row)
+        SELECT intindex, strTopic FROM tblmessages
+                WHERE idxfti @@ to_tsquery('default', 'gettysburg & address')
+                AND strMessage ~* '.*something that does not exist.*';
+         intindex | strtopic
+        ----------+----------
+        (0 rows)
+
+
+    Of course if your indexing both strTopic and strMessage, and
+    want to search for this phrase on both, then you will have to
+    get out the brackets and extend this query a little more.
+
+    TSEARCH2 CONFIGURATION
+
+    Some words such as "and", "the", and "who" are automatically
+    not indexed, since they belong to a pre-existing dictionary of
+    "Stop Words" which tsearch2 does not perform indexing on. If
+    someone needs to search for "The Who" in your database, they
+    are going to have a tough time coming up with any results,
+    since both are ignored in the indexes. But there is a
+    solution.
+
+    Lets say we want to add a word into the stop word list for
+    english stemming. We could edit the file
+    :'/usr/local/pgsql/share/english.stop' and add a word to the
+    list. I edited mine to exclude my name from indexing:
+    
+    - Edit /usr/local/pgsql/share/english.stop
+    - Add 'andy' to the list
+    - Save the file.
+
+
+    When you connect to the database, the dict_init procedure is
+    run during initialization. And in my configuration it will read
+    the stop words from the file I just edited. If you were
+    connected to the DB while editing the stop words, you will need
+    to end the current session and re-connect. When you re-connect
+    to the database, 'andy' is no longer indexed:
+    
+        SELECT to_tsvector('default', 'Andy');
+         to_tsvector
+        ------------
+        (1 row)
+
+
+    Originally I would get the result :
+    
+        SELECT to_tsvector('default', 'Andy');
+         to_tsvector
+        ------------
+         'andi':1
+        (1 row)
+
+
+    But since I added it as a stop word, it would be ingnored on
+    the indexing. The stop word added was used in the dictionary
+    "en_stem". If I were to use a different configuration such as
+    'simple', the results would be different. There are no stop
+    words for the simple dictionary. It will just convert to lower
+    case, and index every unique word.
+    
+        SELECT to_tsvector('simple', 'Andy andy The the in out');
+                     to_tsvector
+        -------------------------------------
+         'in':5 'out':6 'the':3,4 'andy':1,2
+        (1 row)
+
+
+    All this talk about which configuration to use is leading us
+    into the actual configuration of tsearch2. In the examples in
+    this document the configuration has always been specified when
+    using the tsearch2 functions:
+    
+        SELECT to_tsvector('default', 'Testing the default config');
+        SELECT to_tsvector('simple', 'Example of simple Config');
+
+
+    The pg_ts_cfg table holds each configuration you can use
+    with the tsearch2 functions. As you can see the ts_name column
+    contains both the 'default' configurations based on the 'C'
+    locale. And the 'simple' configuration which is not based on
+    any locale.
+    
+        SELECT * from pg_ts_cfg;
+             ts_name     | prs_name |    locale
+        -----------------+----------+--------------
+         default         | default  | C
+         default_russian | default  | ru_RU.KOI8-R
+         simple          | default  |
+        (3 rows)
+
+
+    Each row in the pg_ts_cfg table contains the name of the
+    tsearch2 configuration, the name of the parser to use, and the
+    locale mapped to the configuration. There is only one parser to
+    choose from the table pg_ts_parser called 'default'. More
+    parsers could be written, but for our needs we will use the
+    default.
+
+    There are 3 configurations installed by tsearch2 initially.
+    If your locale is set to 'en_US' for example (like my laptop),
+    then as you can see there is currently no dictionary configured
+    to use with that locale. You can either set up a new
+    configuration or just use one that already exists. If I do not
+    specify which configuration to use in the to_tsvector function,
+    I receive the following error.
+    
+        SELECT to_tsvector('learning tsearch is like going to school');
+        ERROR:  Can't find tsearch config by locale
+
+
+    We will create a new configuration for use with the server
+    encoding 'en_US'. The first step is to add a new configuration
+    into the pg_ts_cfg table. We will call the configuration
+    'default_english', with the default parser and use the locale
+    'en_US'.
+    
+        INSERT INTO pg_ts_cfg (ts_name, prs_name, locale)
+               VALUES ('default_english', 'default', 'en_US');
+
+
+    We have only declared that there is a configuration called
+    'default_english'. We need to set the configuration of how
+    'default_english' will work. The next step is creating a new
+    dictionary to use. The configuration of the dictionary is
+    completlely different in tsearch2. In the prior versions to
+    make changes, you would have to re-compile your changes into
+    the tsearch.so. All of the configuration has now been moved
+    into the system tables created by executing the SQL code from
+    tsearch2.sql
+
+    Lets take a first look at the pg_ts_dict table
+    
+        ftstest=# \d pg_ts_dict
+                Table "public.pg_ts_dict"
+         Column      |  Type   | Modifiers
+        -----------------+---------+-----------
+         dict_name       | text    | not null
+         dict_init       | oid     |
+         dict_initoption | text    |
+         dict_lemmatize  | oid     | not null
+         dict_comment    | text    |
+        Indexes: pg_ts_dict_idx unique btree (dict_name)
+
+
+    The dict_name column is the name of the dictionary, for
+    example 'simple', 'en_stem' or 'ru_stem'. The dict_init column
+    is an OID of a stored procedure to run for initialization of
+    that dictionary, for example 'snb_en_init' or 'snb_ru_init'.
+    The dict_init option is used for options passed to the init
+    function for the stored procedure. In the cases of 'en_stem' or
+    'ru_stem' it is a path to a stopword file for that dictionary,
+    for example '/usr/local/pgsql/share/english.stop'. This is
+    however dictated by the dictionary. ISpell dictionaries may
+    require different options. The dict_lemmatize column is another
+    OID of a stored procedure to the function used to lemmitize,
+    for example 'snb_lemmatize'. The dict_comment column is just a
+    comment.
+
+    Next we will configure the use of a new dictionary based on
+    ISpell. We will assume you have ISpell installed on you
+    machine. (in /usr/local/lib)
+
+    First lets register the dictionary(ies) to use from ISpell.
+    We will use the english dictionary from ISpell. We insert the
+    paths to the relevant ISpell dictionary (*.hash) and affixes
+    (*.aff) files. There seems to be some question as to which
+    ISpell files are to be used. I installed ISpell from the latest
+    sources on my computer. The installation installed the
+    dictionary files with an extension of *.hash. Some
+    installations install with an extension of *.dict As far as I
+    know the two extensions are equivilant. So *.hash ==
+    *.dict.
+
+    We will also continue to use the english word stop file that
+    was installed for the en_stem dictionary. You could use a
+    different one if you like. The ISpell configuration is based on
+    the "ispell_template" dictionary installed by default with
+    tsearch2. We will use the OIDs to the stored procedures from
+    the row where the dict_name = 'ispell_template'.
+    
+        INSERT INTO pg_ts_dict
+               (SELECT 'en_ispell',
+                       dict_init,
+                       'DictFile="/usr/local/lib/english.hash",'
+                       'AffFile="/usr/local/lib/english.aff",'
+                       'StopFile="/usr/local/pgsql/share/english.stop"',
+                       dict_lexize
+                FROM pg_ts_dict
+                WHERE dict_name = 'ispell_template');
+
+
+    Next we need to set up the configuration for mapping the
+    dictionay use to the lexxem parsings. This will be done by
+    altering the pg_ts_cfgmap table. We will insert several rows,
+    specifying to using the new dictionary we installed and
+    configured for use within tsearch2. There are several type of
+    lexims we would be concerned with forcing the use of the ISpell
+    dictionary.
+    
+        INSERT INTO pg_ts_cfgmap (ts_name, tok_alias, dict_name)
+               VALUES ('default_english', 'lhword', '{en_ispell,en_stem}');
+        INSERT INTO pg_ts_cfgmap (ts_name, tok_alias, dict_name)
+               VALUES ('default_english', 'lpart_hword', '{en_ispell,en_stem}');
+        INSERT INTO pg_ts_cfgmap (ts_name, tok_alias, dict_name)
+               VALUES ('default_english', 'lword', '{en_ispell,en_stem}');
+
+
+    We have just inserted 3 records to the configuration
+    mapping, specifying that the lexem types for "lhword,
+    lpart_hword and lword" are to be stemmed using the 'en_ispell'
+    dictionary we added into pg_ts_dict, when using the
+    configuration ' default_english' which we added to
+    pg_ts_cfg.
+
+    There are several other lexem types used that we do not need
+    to specify as using the ISpell dictionary. We can simply insert
+    values using the 'simple' stemming process dictionary.
+    
+        INSERT INTO pg_ts_cfgmap
+               VALUES ('default_english', 'url', '{simple}');
+        INSERT INTO pg_ts_cfgmap
+               VALUES ('default_english', 'host', '{simple}');
+        INSERT INTO pg_ts_cfgmap
+               VALUES ('default_english', 'sfloat', '{simple}');
+        INSERT INTO pg_ts_cfgmap
+               VALUES ('default_english', 'uri', '{simple}');
+        INSERT INTO pg_ts_cfgmap
+               VALUES ('default_english', 'int', '{simple}');
+        INSERT INTO pg_ts_cfgmap
+               VALUES ('default_english', 'float', '{simple}');
+        INSERT INTO pg_ts_cfgmap
+               VALUES ('default_english', 'email', '{simple}');
+        INSERT INTO pg_ts_cfgmap
+               VALUES ('default_english', 'word', '{simple}');
+        INSERT INTO pg_ts_cfgmap
+               VALUES ('default_english', 'hword', '{simple}');
+        INSERT INTO pg_ts_cfgmap
+               VALUES ('default_english', 'nlword', '{simple}');
+        INSERT INTO pg_ts_cfgmap
+               VALUES ('default_english', 'nlpart_hword', '{simple}');
+        INSERT INTO pg_ts_cfgmap
+               VALUES ('default_english', 'part_hword', '{simple}');
+        INSERT INTO pg_ts_cfgmap
+               VALUES ('default_english', 'nlhword', '{simple}');
+        INSERT INTO pg_ts_cfgmap
+               VALUES ('default_english', 'file', '{simple}');
+        INSERT INTO pg_ts_cfgmap
+               VALUES ('default_english', 'uint', '{simple}');
+        INSERT INTO pg_ts_cfgmap
+               VALUES ('default_english', 'version', '{simple}');
+
+
+    Our addition of a configuration for 'default_english' is now
+    complete. We have successfully created a new tsearch2
+    configuration. At the same time we have also set the new
+    configuration to be our default for en_US locale.
+    
+        SELECT to_tsvector('default_english',
+                           'learning tsearch is like going to school');
+                           to_tsvector
+        --------------------------------------------------
+         'go':5 'like':4 'learn':1 'school':7 'tsearch':2
+        SELECT to_tsvector('learning tsearch is like going to school');
+                            to_tsvector
+        --------------------------------------------------
+         'go':5 'like':4 'learn':1 'school':7 'tsearch':2
+        (1 row)
+
+
+    In the case that you already have a configuration set for
+    the locale, and you are changing it to your new dictionary
+    configuration. You will have to set the old locale to NULL. If
+    we are using the 'C' locale then we would do this:
+    
+        UPDATE pg_ts_cfg SET locale=NULL WHERE locale = 'C';
+
+
+    That about wraps up the configuration of tsearch2. There is
+    much more you can do with the tables provided. This was just an
+    introduction to get things working rather quickly.
+
+    ADDING NEW DICTIONARIES TO TSEARCH2
+
+    To aid in the addition of new dictionaries to the tsearch2
+    module you can use another additional module in combination
+    with tsearch2. The gendict module is included into tsearch2
+    distribution and is available from gendict/ subdirectory.
+
+    I will not go into detail about installation and
+    instructions on how to use gendict to it's fullest extent right
+    now. You can read the README.gendict ... it has all of the
+    instructions and information you will need.
+
+    BACKING UP AND RESTORING DATABASES THAT FEATURE
+    TSEARCH2
+
+    Believe it or not, this isn't as straight forward as it
+    should be, and you will have problems trying to backup and
+    restore any database which uses tsearch2 unless you take the
+    steps shown below. And before you ask using pg_dumpall will
+    result in failure every time. These took a lot of trial and
+    error to get working, but the process as laid down below has
+    been used a dozen times now in live production environments so
+    it should work fine.
+
+    HOWEVER never rely on anyone elses instructions to backup
+    and restore a database system, always develop and understand
+    your own methodology, and test it numerous times before you
+    need to do it for real.
+
+    To Backup a PostgreSQL database that uses the tsearch2
+    module:
+
+    1) Backup any global database objects such as users and
+    groups (this step is usually only necessary when you will be
+    restoring to a virgin system)
+    
+        pg_dumpall -g > GLOBALobjects.sql
+
+
+    2) Backup the full database schema using pg_dump
+    
+        pg_dump -s DATABASE > DATABASEschema.sql
+
+
+    3) Backup the full database using pg_dump
+    
+        pg_dump -Fc DATABASE > DATABASEdata.tar
+
+
+    To Restore a PostgreSQL database that uses the tsearch2
+    module:
+
+    1) Create the blank database
+    
+        createdb DATABASE
+
+
+    2) Restore any global database objects such as users and
+    groups (this step is usually only necessary when you will be
+    restoring to a virgin system)
+    
+        psql DATABASE < GLOBALobjects.sql
+
+
+    3) Create the tsearch2 objects, functions and operators
+    
+        psql DATABASE < tsearch2.sql
+
+
+    4) Edit the backed up database schema and delete all SQL
+    commands which create tsearch2 related functions, operators and
+    data types, BUT NOT fields in table definitions that specify
+    tsvector types. If your not sure what these are, they are the
+    ones listed in tsearch2.sql. Then restore the edited schema to
+    the database
+    
+        psql DATABASE < DATABASEschema.sql
+
+
+    5) Restore the data for the database
+    
+        pg_restore -N -a -d DATABASE DATABASEdata.tar
+
+
+    If you get any errors in step 4, it will most likely be
+    because you forgot to remove an object that was created in
+    tsearch2.sql. Any errors in step 5 will mean the database
+    schema was probably restored wrongly.
+  
+
+


diff --git a/contrib/tsearch2/docs/tsearch2-guide.html b/contrib/tsearch2/docs/tsearch2-guide.html

new file mode 100644 (file)

index 0000000..2529480


--- /dev/null
+++ b/contrib/tsearch2/docs/tsearch2-guide.html
@@ -0,0 +1,1057 @@
+
+
+
+
+tsearch2 guide
+
+
+The tsearch2 Guide
+
+
+Brandon Craig Rhodes
30 June 2003
+
+This Guide introduces the reader to the PostgreSQL tsearch2 module,
+version 2.
+More formal descriptions of the module's types and functions
+are provided in the tsearch2 Reference,
+which is a companion to this document.
+You can retrieve a beta copy of the tsearch2 module from the
+GiST for PostgreSQL
+page — look under the section entitled Development History
+for the current version.
+
+First we will examine the tsvector and tsquery types
+and how they are used to search documents;
+next, we will use them to build a simple search engine in SQL;
+and finally, we will study the internals of document conversion
+and how you might tune the internals to accommodate various searching needs.
+
+Once you have tsearch2 working with PostgreSQL,
+you should be able to run the examples here exactly as they are typed.
+
+
+Table of Contents
+
+Vectors and Queries

+A Simple Search Engine

+Ranking and Position Weights

+Casting Vectors and Queries

+Parsing and Lexing

+
+
+
+
+Vectors and Queries
+
+
+This section introduces

+the two data types upon which tsearch2 search engines are based,
+and illustrates their interaction using the simplest possible case.
+The complex examples we present later on
+are merely variations and elaborations of this basic mechanism.
+
+
+The tsearch2 module allows you to index documents by the words they contain,
+and then perform very efficient searches
+for documents that contain a given combination of words.
+Preparing your document index involves two steps:
+
+Making a list of the words each document contains.
+ You must reduce each document to a tsvector
+ which lists each word that appears in the document.
+ This process offers many options,
+ because there is no requirement
+ that you must copy words into the vector
+ exactly as they appear in the document.
+ For example,
+ many developers omit frequent and content-free stop words
+ like the to reduce the size of their index;
+ others reduce different forms of the same word
+ (forked, forking, forks)
+ to a common form (fork)
+ to make search results independent of tense and case.
+ Because words are very often stored in a modified form,
+ we use the special term lexemes
+ for the word forms we actually store in the vector.
+Creating an index of the documents by lexeme.
+ This is managed automatically by tsearch2
+ when you creat a gist() index
+ on the tsvector column of a table,
+ which implements a form of the Berkeley
+ Generalized Search Tree.
+
+Once your documents are indexed,
+performing a search involves:
+
+Reducing the search terms to lexemes.
+ You must express each search you want to perform
+ as a tsquery specifying a boolean combination of lexemes.
+ Note that tsearch2 only finds exact matches
+ between the lexemes in your query and the ones in each vector —
+ even capitalization counts as a difference
+ (which is why all lexemes are usually kept lowercase).
+ So you must process search words the same way you processed document words;
+ if forking became fork in the document's tsvector,
+ then the search term forking must also become fork
+ or the search will not find the document.
+Retrieving the documents that match the query.
+ Running a SELECT ... WHERE
+ query @@ vector
+ on the table with the vector column
+ will return the documents that match your query.
+Presenting your results.
+ This final stage offers as many options
+ as turning documents into vectors.
+ You can order documents by how well they matched the search terms;
+ create a headline for each document
+ showing some of the phrases in which it uses the search terms;
+ and restrict the number of results retrieved.
+ You will of course want some way to identify each document,
+ so the user can ask for the full text of the ones he wants to read.
+
+And beyond deciding upon rules for turning documents into vectors
+and for presenting search results to users,
+you have to decide where to perform these operations —
+whether one database server
+will parse documents, perform searches, and prepare search results,
+or whether to spread the load of these operations across several machines.
+These are complicated design issues
+which we will explore later;
+in this section and the next,
+we will illustrate what can be accomplished
+using a single database server.
+
+The default tsearch2 configuration,
+which we will learn more about later,
+provides a good example of a process for reducing documents to vectors:
+
+
+=# SELECT set_curcfg('default')
+=# SELECT to_tsvector('The air smells of sea water.')
+             to_tsvector             
+-------------------------------------
+ 'air':2 'sea':5 'smell':3 'water':6
+(1 row)
+
+
+Note the complex relationship between this document and its vector.
+The vector lists only words from the document —
+spaces and punctuation have disappeared.
+Common words like the and of have been eliminated.
+The -s that makes smells a plural has been removed,
+leaving a lexeme that represents the word in its simplest form.
+And finally,
+though the vector remembers the positions in which each word appeared,
+it does not store the lexemes in that order.
+
+Keeping word positions in your vectors is optional, by the way.
+The positions are necessary for the tsearch2 ranking functions,
+which you can use to prioritize documents
+based on how often each document uses the search terms
+and whether they appear in close proximity.
+But if you do not perform ranking,
+or use your own process that ignores the word positions stored in the vector,
+then you can save space by stripping them from your vectors:
+
+
+=# SELECT strip(to_tsvector('The air smells of sea water.'))
+            strip            
+-----------------------------
+ 'air' 'sea' 'smell' 'water'
+(1 row)
+
+
+Now that we have a procedure for creating vectors,
+we can build an indexed table of vectors very simply:
+
+
+=# CREATE TABLE vectors ( vector tsvector )
+=# CREATE INDEX vector_index ON vectors USING gist(vector)
+=# INSERT INTO vectors VALUES (to_tsvector('The path forks here'))
+=# INSERT INTO vectors VALUES (to_tsvector('A crawl leads west'))
+=# INSERT INTO vectors VALUES (to_tsvector('The left fork leads northeast'))
+=# SELECT * FROM vectors
+                  vector                  
+------------------------------------------
+ 'fork':3 'path':2
+ 'lead':3 'west':4 'crawl':2
+ 'fork':3 'lead':4 'left':2 'northeast':5
+(3 rows)
+
+
+Now we can search this collection of document vectors
+using the @@ operator and a tsquery
+that specifies the combination of lexemes we are looking for.
+Note that while vectors simply list lexemes,
+queries always combine them with the operators
+‘&’ and,
+‘|’ or,
+and  ‘!’ not,
+plus parentheses for grouping.
+Some examples of the query syntax:
+
+
+ ‘find documents with the word forks in them’

+ 'forks'
+
+ ‘... with both forks and leads’

+ 'forks & leads'
+
+ ‘... with either forks or leads’

+ 'forks | leads'
+
+ ‘... with either forks or leads,
+  but without crawl’

+ '(forks|leads) & !crawl'
+
+The tsearch2 module
+provides a to_tsquery() function for creating queries
+that uses the same process as to_tsvector() uses
+to reduce words to lexemes.
+For instance,
+it will remove the -s from the plurals in the last example above:
+
+
+=# SELECT to_tsquery('(leads|forks) & !crawl')
+           to_tsquery           
+--------------------------------
+ ( 'lead' | 'fork' ) & !'crawl'
+(1 row)
+
+
+Again,
+this is critically important because the search operator @@
+only finds exact matches
+between the words in a query and the words in a vector;
+if the document vector lists the lexeme fork
+but the query looks for the plural form forks,
+the query would not match that document.
+Thanks to the symmetry between our process
+for producing vectors and queries, however,
+the above searches return correct results:
+
+
+=# SELECT * FROM vectors WHERE vector @@ to_tsquery('(leads|forks) & !crawl')
+                  vector                  
+------------------------------------------
+ 'fork':3 'path':2
+ 'fork':3 'lead':4 'left':2 'northeast':5
+(2 rows)
+
+
+You may want to try the other queries shown above,
+and perhaps invent some of your own.
+
+You should not include stop words in a query,
+since you cannot search for words you have discarded.
+If you throw out the word the when building vectors, for example,
+your index will obviously not know which documents included it.
+The to_tsquery() function will automatically detect this
+and give you an error to prevent this mistake:
+
+
+=# SELECT to_tsquery('the')
+NOTICE:  Query contains only stopword(s) or doesn't contain lexem(s), ignored
+ to_tsquery 
+------------
+ 
+(1 row)
+
+
+But if you every build vectors and queries using your own routines,
+a possibility we will discuss later,
+then you will need to enforce this rule yourself.
+
+

+Now that you understand how vectors and queries work together,
+you are prepared to tackle many additional topics:
+how to distribute searching across many servers;
+how to customize the process
+by which tsearch2 turns documents and queries into lexemes,
+or use a process of your own;
+and how to sort and display search results to your users.
+But before discussing these detailed questions,
+we will build a simple search engine
+to see how easily its basic features work together.
+
+
+A Simple Search Engine
+
+

+In this section we build a simple search engine out of SQL functions
+that use the vector and query types described in the previous section.
+While this example is simpler
+than a search engine that has to interface with the outside world,
+it will illustrate the basic principles of building a search engine,
+and better prepare you for developing your own.
+
+Building a search engine involves only a few improvements
+upon the rudimentary vector searches described in the last section.
+
+Because the user wants to read documents, not vectors,
+ you must provide some way
+ for the full text of each document to be accessed —
+ either by storing the entire text of each document in the database,
+ or storing an identifier
+ like a URL, file name, or document routing number
+ that lets you fetch the document from other storage.
+You can make it easier for user interface code to refer to each document
+ by providing a unique identifier for each document,
+ perhaps with a SERIAL column.
+Search results should be ordered by relevance.
+ If you leave word positions in your vectors,
+ you can either have PostgreSQL ORDER your results
+ BY a ranking function,
+ or you can fetch the vectors yourself and perform your own sort.
+ If you choose to ignore word positions or strip them from your vectors,
+ you will have to determine relevance yourself,
+ using either the full text of the document
+ or other information about each document you may possess.
+For each document returned by a search,
+ you will usually want to display a summary called a headline
+ that shows short excerpts
+ illustrating how the document uses the query words.
+ Headlines are usually generated from the full text of the document,
+ not from position information in the tsvector,
+ since excerpts lacking stop words, punctuation, and suffixes
+ would not be comprehensible.
+ If you store the full text of each document in the database,
+ headlines can be generated very simply by a tsearch2 function.
+ If you store your documents elsewhere,
+ then you will either have to transmit each document to the database
+ every time you want to run the headline function on it,
+ or use your own headline code outside of the database.
+
+
+We can easily construct a simple search engine
+that accomplishes these goals.
+First we build a table that, for each document,
+stores a unique identifier, the full text of the document,
+and its tsvector:
+
+
+=# CREATE TABLE docs ( id SERIAL, doc TEXT, vector tsvector )
+=# CREATE INDEX docs_index ON docs USING gist(vector);
+
+
+Note that although searches will still work
+on tables where you have neglected
+to create a gist() index over your vectors,
+they will run much more slowly
+since they will have to compare the query
+against every document vector in the table.
+
+Because the table we have created
+stores each document in two different ways —
+both as text and as a vector —
+our INSERT statements must provide the document in both forms.
+While more advanced PostgreSQL programmers
+might accomplish this with a database trigger or rule,
+for this simple example we will use a small SQL function:
+
+
+=# CREATE FUNCTION insdoc(text) RETURNS void LANGUAGE sql AS

+  'INSERT INTO docs (doc, vector) VALUES ($1, to_tsvector($1));'
+
+
+Now, by calling insdoc() several times,
+we can populate our table with documents:
+
+
+=# SELECT insdoc('A low crawl over cobbles leads inward to the west.')
+=# SELECT insdoc('The canyon runs into a mass of boulders -- dead end.')
+=# SELECT insdoc('You are crawling over cobbles in a low passage.')
+=# SELECT insdoc('Cavernous passages lead east, north, and south.')
+=# SELECT insdoc('To the east a low wide crawl slants up.')
+=# SELECT insdoc('You are in the south side chamber.')
+=# SELECT insdoc('The passage here is blocked by a recent cave-in.')
+=# SELECT insdoc('You are in a splendid chamber thirty feet high.')
+
+
+Now we can build a search function.
+Its SELECT statement is based upon
+the same @@ operation illustrated in the previous section.
+But instead of returning matching vectors,
+we return for each document
+its SERIAL identifier, so the user can retrieve it later;
+a headline that illustrates its use of the search terms;
+and a ranking with which we also order the results.
+Our search operation can be coded as a single SELECT statement
+returning its own kind of table row,
+which we call a finddoc_t:
+
+
+=# CREATE TYPE finddoc_t AS (id INTEGER, headline TEXT, rank REAL)
+=# CREATE FUNCTION finddoc(text) RETURNS SETOF finddoc_t LANGUAGE sql AS '

+   SELECT id, headline(doc, q), rank(vector, q)
+     FROM docs, to_tsquery($1) AS q
+     WHERE vector @@ q ORDER BY rank(vector, q) DESC'
+
+
+This function is a rather satisfactory search engine.
+Here is one example search,
+after which the user fetches the top-ranking document itself;
+with similar commands you can try queries of your own:
+
+
+=# SELECT * FROM finddoc('passage|crawl')
+ id |                       headline                        | rank 
+----+-------------------------------------------------------+------
+  3 | <b>crawling</b> over cobbles in a low <b>passage</b>. | 0.19
+  1 | <b>crawl</b> over cobbles leads inward to the west.   |  0.1
+  4 | <b>passages</b> lead east, north, and south.          |  0.1
+  5 | <b>crawl</b> slants up.                               |  0.1
+  7 | <b>passage</b> here is blocked by a recent  cave-in.  |  0.1
+(5 rows)
+=# SELECT doc FROM docs WHERE id = 3
+                       doc                       
+-------------------------------------------------
+ You are crawling over cobbles in a low passage.
+(1 row)
+
+
+While by default the headline() function
+surrounds matching words with <b> and </b>
+in order to distinguish them from the surrounding text,
+you can provide options that change its behavior;
+consult the tsearch2 Reference for more details about
+Headline Functions.
+
+Though a search may match hundreds or thousands of documents,
+you will usually present only ten or twenty results to the user at a time.
+This can be most easily accomplished
+by limiting your query with a LIMIT
+and an OFFSET clause —
+to display results ten at a time, for example,
+your would generate your first page of results
+with LIMIT 10 OFFSET 0,
+your second page
+with LIMIT 10 OFFSET 10,
+your third page
+with LIMIT 10 OFFSET 20,
+and so forth.
+There are two problems with this approach, however.
+
+The first problem is the strain of running the query over again
+for every page of results the user views.
+For small document collections or lightly loaded servers,
+this may not be a problem;
+but the impact can be high
+when a search must repeatedly rank and sort
+the same ten thousand results
+on an already busy server.
+So instead of selecting only one page of results,
+you will probably use LIMIT and OFFSET
+to return a few dozen or few hundred results,
+which you can cache and display to the user one page at a time.
+Whether a result cache rewards your effort
+will depend principally on the behavior of your users —
+how often they even view the second page of results, for instance.
+
+The second issue solved by caching involves consistency.
+If the database is changing while the user browses their results,
+then documents might appear and disappear as they page through them.
+In some cases the user might even miss a particular result —
+perhaps the one they were looking for —
+if, say, its rank improves from 31th to 30th
+after they load results 21–30 but before they view results 31–40.
+While many databases are static or infrequently updated,
+and will not present this problem,
+users searching very dymanic document collections
+might benefit from the stable results that caches yield.
+
+

+Having seen the features of a search engine
+implemented entirely within the database,
+we will learn about some specific tsearch2 features.
+First we will look in more detail at document ranking.
+
+
+Ranking and Position Weights
+
+

+When we built our simple search engine,
+we used the rank() function to order our results.

+Here we describe tsearch2 ranking in more detail.
+
+
+There are two functions with which tsearch2 can rank search results.
+They both use the lexeme positions listed in the tsvector,
+so you cannot rank vectors
+from which these have been removed with strip().
+The rank() function existed in older versions of OpenFTS,
+and has the feature that you can assign different weights
+to words from different sections of your document.
+The rank_cd() uses a recent technique for weighting results
+but does not allow different weight to be given
+to different sections of your document.
+
+Both ranking functions allow you to specify,
+as an optional last argument,
+whether you want their results normalized —
+whether the rank returned should be adjusted for document length.
+Specifying a last argument of 0 (zero) makes no adjustment;
+1 (one) divides the document rank
+by the logarithm of the document length;
+and 2 divides it by the plain length.
+In all of these examples we omit this optional argument,
+which is the same as specifying zero —
+we are making no adjustment for document length.
+
+The rank_cd() function uses an experimental measurement
+called cover density ranking that rewards documents
+when they make frequent use of the search terms
+that are close together in the document.
+You can read about the algorithm in more detail
+in Clarke et al.,
+ “
+>Relevance Ranking for One to Three Term Queries.”
+An optional first argument allows you to tune their formula;
+for details
+see the section on ranking
+in the Reference.
+
+The rank() function offers more flexibility
+because it pays attention to the weights
+with which you have labelled lexeme positions.
+Currently tsearch2 supports four different weight labels:
+'D', the default weight;
+and 'A', 'B', and 'C'.
+All vectors created with to_tsvector()
+assign the weight 'D' to each position,
+which as the default is not displayed when you print a vector out.
+
+If you want positions with weights other than 'D',
+you have two options:
+either you can author a vector directly through the ::tsvector
+casting operation,
+as described in the following section,
+which lets you give each position whichever weight you want;
+or you can pass a vector through the setweight() function
+which sets all of its position weights to a single value.
+An example of the latter:
+
+
+
+=# SELECT vector FROM docs WHERE id = 3
+                 vector                 
+----------------------------------------
+ 'low':8 'cobbl':5 'crawl':3 'passag':9
+(1 row)
+=# SELECT setweight(vector, 'A') FROM docs WHERE id = 3
+                 setweight                  
+--------------------------------------------
+ 'low':8A 'cobbl':5A 'crawl':3A 'passag':9A
+(1 row)
+
+
+
+Merely changing all of the weights in a vector is not very useful,
+of course,
+since this results still in all words having the same weight.
+But if we parse different parts of a document separately,
+giving each section its own weight,
+and then concatenate the vectors of each part into a single vector,
+the result can be very useful.
+We can construct a simple example
+in which document titles are given greater weight
+that text in the body of the document:
+
+
+
+=# CREATE TABLE tdocs ( id SERIAL, title TEXT, doc TEXT, vector tsvector )
+=# CREATE INDEX tdocs_index ON tdocs USING gist(vector);
+=# CREATE FUNCTION instdoc(text, text) RETURNS void LANGUAGE sql AS

+  'INSERT INTO tdocs (title, doc, vector)
+   VALUES ($1, $2, setweight(to_tsvector($1), ''A'') || to_tsvector($2));'
+
+
+
+Now words from a document title will be weighted differently
+than those in the main text
+if we provide the title and body as separate arguments:
+
+
+
+=# SELECT instdoc('Spendid Chamber',

+ 'The walls are frozen rivers of orange stone.')
+ instdoc 
+---------
+ 
+(1 row)
+=# SELECT vector FROM tdocs
+                                    vector                                    
+------------------------------------------------------------------------------
+ 'wall':4 'orang':9 'river':7 'stone':10 'frozen':6 'chamber':2A 'spendid':1A
+(1 row)
+
+
+
+Note that although the necessity is unusual,
+you can constrain search terms
+to only match words from certain sections
+by following them with a colon
+and a list of the sections in which the word can occur;
+by default this list is 'ABCD'
+so that search terms match words from all sections.
+For example,
+here we search for a word both generally,
+and then looking only for specific weights:
+
+
+
+=# SELECT title, doc FROM tdocs WHERE vector @@ to_tsquery('spendid')
+      title      |                     doc                      
+-----------------+----------------------------------------------
+ Spendid Chamber | The walls are frozen rivers of orange stone.
+(1 row)
+=# SELECT title, doc FROM tdocs WHERE vector @@ to_tsquery('spendid:A')
+      title      |                     doc                      
+-----------------+----------------------------------------------
+ Spendid Chamber | The walls are frozen rivers of orange stone.
+(1 row)
+=# SELECT title, doc FROM tdocs WHERE vector @@ to_tsquery('spendid:D')
+ title | doc 
+-------+-----
+(0 rows)
+
+
+
+
+
+

+Our examples so far use tsearch2 to parse our documents into vectors.
+When your application needs absolute control over vector content,
+you will want to use direct type casting,
+which is described in the next section.
+
+
+Casting Vectors and Queries
+
+

+While tsearch2 has powerful and flexible ways
+to process documents and turn them into document vectors,
+you will sometimes want to parse documents on your own
+and place the results directly in vectors.
+Here we show you how.
+
+
+In the preceding examples,
+we used the to_tsvector() function
+when we needed a document's text reduced to a document vector.
+We saw that the function stripped whitespace and punctuation,
+eliminated common words,
+and altered suffixes to reduce words to a common form.
+While these operations are often desirable,
+and while in the sections below
+we will gain precise control over this process,
+there are occasions on which
+you want to avoid the changes that to_tsvector() makes to text
+and specify explicitly the words that you want in your vectors.
+Or you may want to create queries directly
+rather than through to_tsquery().
+
+For example,
+you may have already developed your own routine
+for reducing your documents to searchable lexemes,
+and do not want your carefully generated terms altered
+by passing them through to_tsvector().
+Or you might be developing and debugging parsing routines of your own
+that you are not ready to load into the database.
+In either case,
+you will find that direct insertion is easily accomplished
+if you simply follow some simple rules.
+
+Vectors are created directly
+when you cast a string of whitespace separated lexemes
+to the tsvector type:
+
+
+
+=# select 'the only exit is the way you came in'::tsvector
+                     tsvector                     
+--------------------------------------------------
+ 'in' 'is' 'the' 'way' 'you' 'came' 'exit' 'only'
+(1 row)
+
+
+
+Notice that the conversion interpreted the string
+simply as a list of lexemes to be included in the vector.
+Their order was lost,
+as was the number of times each lexeme appeared.
+You must keep in mind that directly creating vectors with casting
+is not an alternate means of parsing;
+it is a way of directly entering lexemes into a vector without parsing.
+
+Queries can also be created through casting,
+if you separate lexemes with boolean operators
+rather than with whitespace.
+When creating your own vectors and queries,
+remember that the search operator @@
+finds only exact matches between query lexemes and vector lexemes
+—
+if they are not exactly the same string,
+they will not be considered a match.
+
+To include lexeme positions in your vector,
+write the positions exactly the way tsearch2 displays them
+when it prints vectors:
+by following each lexeme with a colon
+and a comma-separated list of integer positions.
+If you list a lexeme more than once,
+then all the positions listed for it are combined into a single list.
+For example,
+here are two ways of writing the same vector,
+depending on whether you mention ‘the’ twice
+or combine its positions into a list yourself:
+
+
+
+=# select 'the:1 only:2 exit:3 is:4 the:5 way:6 you:7 came:8 in:9'::tsvector
+                              tsvector                              
+--------------------------------------------------------------------
+ 'in':9 'is':4 'the':1,5 'way':6 'you':7 'came':8 'exit':3 'only':2
+(1 row)
+=# select 'the:1,5 only:2 exit:3 is:4 way:6 you:7 came:8 in:9'::tsvector
+                              tsvector                              
+--------------------------------------------------------------------
+ 'in':9 'is':4 'the':1,5 'way':6 'you':7 'came':8 'exit':3 'only':2
+(1 row)
+
+
+
+Things can get slightly tricky
+if you want to include apostrophes, backslashes, or spaces
+inside your lexemes
+(wanting to include either of the latter would be unusual,
+but they can be included if you follow the rules).
+The main problem is that the apostrophe and backslash
+are important both to PostgreSQL when it is interpreting a string,
+and to the tsvector conversion function.
+You may want to review section
+1.1.2.1,
+“String Constants”
+in the PostgreSQL documentation before proceeding.
+
+When you cast strings directly into vectors:
+
+The string is interpreted as a whitespace-separated list of lexemes,
+ any of which can be suffixed with a colon and a list of positions.
+A lexeme can be quoted by preceding it with an apostrophe,
+ in which case it runs until the next apostrophe;
+ otherwise a lexeme ends with the first whitespace or colon encountered.
+Any character preceded by a backslash,
+ including whitespace, the apostrophe, the colon, and the backslash itself,
+ loses its normal meaning and is treated as a letter.
+ Backslashes are effective
+ both inside and outside of apostrophe-quoted lexemes.
+A lexeme can be suffixed with a list of positions
+ by appending a colon and a comma-separated list of integers,
+ each of which can itself be followed by a letter
+ to designate a position weight
+ (position weights are described below).
+
+
+Here are some example strings,
+showing the lexeme you want to insert
+together with the string that the ::tsvector operator
+needs to see,
+and how you would type that string at the PostgreSQL prompt:
+
+
+
+For the lexeme...
+you need the string...
+which you can type as:
+
+nugget
+nugget
+'nugget'
+
+won't
+won't
+'won''t'
+
+pinin'
+pinin'
+'pinin'''
+
+'bout
+\'bout
+'\\''bout'
+
+white mist
+white\ mist
+'white\\ mist'
+
+or:
+'white mist'
+'''white mist'''
+
+won't budge
+won\'t\ budge
+'won\\''t\\ budge'
+
+or:
+'won\'t budge'
+'''won\\''t budge'''
+
+back\slashed
+back\\slashed
+'back\\\\slashed'
+
+
+Remember to use the quoted quoting shown at the right
+only when typing in strings as part of a PostgreSQL query.
+If you are providing strings through a library
+that automatically quotes them
+or provides them in binary form to PostgreSQL,
+then you can use the strings in the middle instead —
+suitably quoted in the language you are using, of course.
+
+Position weights are described below
+and can be written exactly as they will be displayed
+when you select a weighted vector:
+
+
+=# select 'weighty:1,3A trivial:2B,4'::tsvector
+           tsvector            
+-------------------------------
+ 'trivial':2B,4 'weighty':1,3A
+(1 row)
+
+
+
+Note that if you are composing SQL queries
+in a scripting language like Perl or Python,
+that itself considers quotes and backslashes special,
+then you may have another quoting layer to deal with
+on top of the two layers already shown above.
+In such cases you may want to write a function
+that performs the necessary quoting for you.
+
+

+Having seen how to create vectors of your own,
+it is time to learn how the native tsearch2 parser
+reduces documents to vectors.
+
+
+Parsing and Lexing
+
+

+The previous section
+described how you can bypass the parser provided by tsearch2
+and populate your table of documents
+with vectors of your own devising.
+But for those interested in the native tsearch2 facilities,
+we present here an overview of how it goes about
+reducing documents to vectors.
+
+
+The to_tsvector() function reduces documents to vectors
+in two stages.
+First, a parser breaks the input document
+into short sequences of text called tokens.
+Each token is usually a word, space, or piece of punctuation,
+though some parsers return larger and more exotic items
+like HTML tags as single tokens.
+Each token returned by the parser
+is either discarded
+or passed to a dictionary that converts it into a lexeme.
+The resulting lexemes are collected into a vector and returned.
+
+The choice of which parser and dictionaries to_tsvector() should use
+is controlled by your choice of configuration.
+The tsearch2 module comes with several configurations,
+and you can define more of your own;
+in fact the creation of a new configuration is illustrated below,
+in the section on position weights.
+
+To learn about parsing in more detail,
+we will study this example:
+
+
+=# select to_tsvector('default',

+     'The walls extend upward for well over 100 feet.')
+                       to_tsvector                        
+----------------------------------------------------------
+ '100':8 'feet':9 'wall':2 'well':6 'extend':3 'upward':4
+(1 row)
+
+
+Unlike the to_tsvector() calls used in the above examples,
+this one specifies the 'default' configuration explicitly.
+When we called to_tsvector() in earlier examples
+with only one argument,
+it used the current configuration,
+which is chosen automatically based on your LOCALE
+if that locale is mentioned in the pg_ts_cfg table
+(which is shown under the first bullet in the description below).
+If your locale is not listed in the table,
+your attempts to use the current configuration will return:
+
+
+ERROR:  Can't find tsearch2 config by locale
+
+
+You can always change the current configuration manually
+by calling the set_curcfg() function
+described in the section on
+Configurations
+in the Reference.
+
+Each configuration serves as an index into two different tables:
+in pg_ts_cfg it determines
+which parser will break our text into tokens,
+and in pg_ts_cfgmap
+it directs each token to a dictionary for processing.
+The steps in detail are:
+
+
+
+First, our text is parsed,
+using the parser listed for our configuration in the pg_ts_cfg table.
+We are using the 'default' configuration,
+so the table tells us to use the 'default' parser:
+
+
+=# SELECT * FROM pg_ts_cfg WHERE ts_name = 'default'
+ ts_name | prs_name | locale 
+---------+----------+--------
+ default | default  | C
+(1 row)
+
+
+So our text will be parsed as though we had called:
+
+
+=# select * from parse('default',

+     'The walls extend upward for well over 100 feet.')
+
+
+This breaks the text into a list of tokens
+which are each labelled with an integer type:
+
+The₁♦_{12
+>walls₁♦_{12
+>extend₁♦_{12
+>upward₁♦_{12
+>for₁♦_{12
+>well₁♦_{12
+>over₁♦_{12
+>100₂₂♦_{12
+>feet₁.₁₂
+
+Each word has been assigned type 1;
+each space (represented here by a diamond) and the period, type 12;
+and the number one hundred, type 22.
+We can retrieve the alias for each type
+through the token_type function:
+
+
+=# select * from token_type('default')

+     where tokid = 1 or tokid = 12 or tokid = 22
+ tokid | alias |      descr       
+-------+-------+------------------
+     1 | lword | Latin word
+    12 | blank | Space symbols
+    22 | uint  | Unsigned integer
+(3 rows)
+
+
+
+
+Next, the tokens are assigned to dictionaries
+by looking up their type aliases in pg_ts_cfgmap
+to determine which dictionary should process each token.
+Since we are using the 'default' configuration:
+
+
+=# select * from pg_ts_cfgmap where ts_name = 'default' and

+      (tok_alias = 'lword' or tok_alias = 'blank' or tok_alias = 'uint')
+ ts_name | tok_alias | dict_name 
+---------+-----------+-----------
+ default | lword     | {en_stem}
+ default | uint      | {simple}
+(2 rows)
+
+
+Since this map provides no dictionary for blank tokens,
+the spaces and period are simply discarded,
+leaving nine tokens,
+which are then numbered by their position:
+
+The¹
+walls²
+extend³
+upward⁴
+for⁵
+well⁶
+over⁷
+100⁸
+feet⁹
+
+
+Finally, the words are reduced to lexemes by their respective dictionaries.
+The 100 is submitted to the simple dictionary,
+which returns tokens unaltered except for making them lowercase:
+
+
+=# select lexize('simple', '100')
+ lexize 
+--------
+ {100}
+(1 row)
+
+
+The other words are submitted to en_stem
+which reduces each English word to a linguistic stem,
+and then discards stems which belong to its list of stop words;
+you can see the list of stop words
+in the file whose path is in the dict_initoption field
+of the pg_ts_dict table entry for en_stem.
+The first three words of our text illustrate respectively
+an en_stem stop word,
+a word which en_stem alters by stemming,
+and a word which en_stem leaves alone:
+
+
+=# select lexize('en_stem', 'The')
+ lexize 
+--------
+ {}
+(1 row)
+=# select lexize('en_stem', 'walls')
+ lexize 
+--------
+ {wall}
+(1 row)
+=# select lexize('en_stem', 'extend')
+  lexize  
+----------
+ {extend}
+(1 row)
+
+
+Once en_stem is done discarding stop words and stemming the rest,
+we are left with:
+
+wall²
+extend³
+upward⁴
+well⁶
+100⁸
+feet⁹
+
+Which is precisely the result of the example that began this section.
+
+Query words are stemmed by the to_tsquery() function
+using the same scheme to determine the dictionary for each token,
+with the difference that the query parser recognizes as special
+the boolean operators that separate query words.
+
+
+
+
+}

diff --git a/contrib/tsearch2/docs/tsearch2-ref.html b/contrib/tsearch2/docs/tsearch2-ref.html

new file mode 100644 (file)

index 0000000..df0faa4


--- /dev/null
+++ b/contrib/tsearch2/docs/tsearch2-ref.html
@@ -0,0 +1,448 @@
+
+
+
+
+tsearch2 reference
+
+
+The tsearch2 Reference
+
+
+Brandon Craig Rhodes
30 June 2003
+
+This Reference documents the user types and functions
+of the tsearch2 module for PostgreSQL.
+An introduction to the module is provided
+by the tsearch2 Guide,
+a companion document to this one.
+You can retrieve a beta copy of the tsearch2 module from the
+GiST for PostgreSQL
+page — look under the section entitled Development History
+for the current version.
+
+Vectors and Queries
+
+Vectors and queries both store lexemes,
+but for different purposes.
+A tsvector stores the lexemes
+of the words that are parsed out of a document,
+and can also remember the position of each word.
+A tsquery specifies a boolean condition among lexemes.
+
+Any of the following functions with a configuration argument
+can use either an integer id or textual ts_name
+to select a configuration;
+if the option is omitted, then the current configuration is used.
+For more information on the current configuration,
+read the next section on Configurations.
+
+Vector Operations
+
+
+
+ to_tsvector( [configuration,]

+ document TEXT) RETURNS tsvector
+
+ Parses a document into tokens,
+ reduces the tokens to lexemes,
+ and returns a tsvector which lists the lexemes
+ together with their positions in the document.
+ For the best description of this process,
+ see the section on Parsing and Stemming
+ in the accompanying tsearch2 Guide.
+
+ strip(vector tsvector) RETURNS tsvector
+
+ Return a vector which lists the same lexemes
+ as the given vector,
+ but which lacks any information
+ about where in the document each lexeme appeared.
+ While the returned vector is thus useless for relevance ranking,
+ it will usually be much smaller.
+
+ setweight(vector tsvector, letter) RETURNS tsvector
+
+ This function returns a copy of the input vector
+ in which every location has been labelled
+ with either the letter
+ 'A', 'B', or 'C',
+ or the default label 'D'
+ (which is the default with which new vectors are created,
+ and as such is usually not displayed).
+ These labels are retained when vectors are concatenated,
+ allowing words from different parts of a document
+ to be weighted differently by ranking functions.
+
+ vector1 || vector2
+
+ concat(vector1 tsvector, vector2 tsvector)

+ RETURNS tsvector
+
+ Returns a vector which combines the lexemes and position information
+ in the two vectors given as arguments.
+ Position weight labels (described in the previous paragraph)
+ are retained intact during the concatenation.
+ This has at least two uses.
+ First,
+ if some sections of your document
+ need be parsed with different configurations than others,
+ you can parse them separately
+ and concatenate the resulting vectors into one.
+ Second,
+ you can weight words from some sections of you document
+ more heavily than those from others by:
+ parsing the sections into separate vectors;
+ assigning the vectors different position labels
+ with the setweight() function;
+ concatenating them into a single vector;
+ and then providing a weights argument
+ to the rank() function
+ that assigns different weights to positions with different labels.
+
+ tsvector_size(vector tsvector) RETURNS INT4
+
+ Returns the number of lexemes stored in the vector.
+
+ text::tsvector RETURNS tsvector
+
+ Directly casting text to a tsvector
+ allows you to directly inject lexemes into a vector,
+ with whatever positions and position weights you choose to specify.
+ The text should be formatted
+ like the vector would be printed by the output of a SELECT.
+ See the Casting
+ section in the Guide for details.
+
+
+Query Operations
+
+
+
+ to_tsquery( [configuration,]

+ querytext text) RETURNS tsvector
+
+ Parses a query,
+ which should be single words separated by the boolean operators
+ “&” and,
+ “|” or,
+ and “!” not,
+ which can be grouped using parenthesis.
+ Each word is reduced to a lexeme using the current
+ or specified configuration.
+
+
+ querytree(query tsquery) RETURNS text
+
+ This might return a textual representation of the given query.
+
+ text::tsquery RETURNS tsquery
+
+ Directly casting text to a tsquery
+ allows you to directly inject lexemes into a query,
+ with whatever positions and position weight flags you choose to specify.
+ The text should be formatted
+ like the query would be printed by the output of a SELECT.
+ See the Casting
+ section in the Guide for details.
+
+
+Configurations
+
+A configuration specifies all of the equipment necessary
+to transform a document into a tsvector:
+the parser that breaks its text into tokens,
+and the dictionaries which then transform each token into a lexeme.
+Every call to to_tsvector() (described above)
+uses a configuration to perform its processing.
+Three configurations come with tsearch2:
+
+
+default — Indexes words and numbers,
+ using the en_stem English Snowball stemmer for Latin-alphabet words
+ and the simple dictionary for all others.
+default_russian — Indexes words and numbers,
+ using the en_stem English Snowball stemmer for Latin-alphabet words
+ and the ru_stem Russian Snowball dictionary for all others.
+simple — Processes both words and numbers
+ with the simple dictionary,
+ which neither discards any stop words nor alters them.
+
+
+The tsearch2 modules initially chooses your current configuration
+by looking for your current locale in the locale field
+of the pg_ts_cfg table described below.
+You can manipulate the current configuration yourself with these functions:
+
+
+
+ set_curcfg( id INT | ts_name TEXT

+  ) RETURNS VOID
+
+ Set the current configuration used by to_tsvector
+ and to_tsquery.
+
+ show_curcfg() RETURNS INT4
+
+ Returns the integer id of the current configuration.
+
+
+
+Each configuration is defined by a record in the pg_ts_cfg table:
+
+create table pg_ts_cfg (
+   id      int not  null primary key,
+   ts_name     text not null,
+   prs_name    text not null,
+   locale      text
+);
+
+The id and ts_name are unique values
+which identify the configuration;
+the prs_name specifies which parser the configuration uses.
+Once this parser has split document text into tokens,
+the type of each resulting token —
+or, more specifically, the type's lex_alias
+as specified in the parser's lexem_type() table —
+is searched for together with the configuration's ts_name
+in the pg_ts_cfgmap table:
+
+create table pg_ts_cfgmap (
+   ts_name     text not null,
+   lex_alias   text not null,
+   dict_name   text[],
+   primary key (ts_name,lex_alias)
+);
+
+Those tokens whose types are not listed are discarded.
+The remaining tokens are assigned integer positions,
+starting with 1 for the first token in the document,
+and turned into lexemes with the help of the dictionaries
+whose names are given in the dict_name array for their type.
+These dictionaries are tried in order,
+stopping either with the first one to return a lexeme for the token,
+or discarding the token if no dictionary returns a lexeme for it.
+
+Parsers
+
+Each parser is defined by a record in the pg_ts_parser table:
+
+create table pg_ts_parser (
+   prs_id      int not null primary key,
+   prs_name    text not null,
+   prs_start   oid not null,
+   prs_getlexem    oid not null,
+   prs_end     oid not null,
+   prs_headline    oid not null,
+   prs_lextype oid not null,
+   prs_comment text
+);
+
+The prs_id and prs_name uniquely identify the parser,
+while prs_comment usually describes its name and version
+for the reference of users.
+The other items identify the low-level functions
+which make the parser operate,
+and are only of interest to someone writing a parser of their own.
+
+The tsearch2 module comes with one parser named default
+which is suitable for parsing most plain text and HTML documents.
+
+Each parser argument below
+must designate a parser with either an integer prs_id
+or a textual prs_name;
+the current parser is used when this argument is omitted.
+
+
+
+ CREATE FUNCTION set_curprs(parser) RETURNS VOID
+
+ Selects a current parser
+ which will be used when any of the following functions
+ are called without a parser as an argument.
+
+ CREATE FUNCTION lexem_type(

+  [ parser ]
+  ) RETURNS SETOF lexemtype
+
+ Returns a table which defines and describes
+ each kind of token the parser may produce as output.
+ For each token type the table gives the lexid
+ which the parser will label each token of that type,
+ the alias which names the token type,
+ and a short description descr for the user to read.
+
+ CREATE FUNCTION parse(

+  [ parser, ] document TEXT
+  ) RETURNS SETOF lexemtype
+
+ Parses the given document and returns a series of records,
+ one for each token produced by parsing.
+ Each token includes a lexid giving its type
+ and a lexem which gives its content.
+
+
+Dictionaries
+
+Dictionaries take textual tokens as input,
+usually those produced by a parser,
+and return lexemes which are usually some reduced form of the token.
+Among the dictionaries which come installed with tsearch2 are:
+
+
+simple simply folds uppercase letters to lowercase
+ before returning the word.
+en_stem runs an English Snowball stemmer on each word
+ that attempts to reduce the various forms of a verb or noun
+ to a single recognizable form.
+ru_stem runs a Russian Snowball stemmer on each word.
+
+
+Each dictionary is defined by an entry in the pg_ts_dict table:
+
+CREATE TABLE pg_ts_dict (
+   dict_id     int not null primary key,
+   dict_name   text not null,
+   dict_init   oid,
+   dict_initoption text,
+   dict_lemmatize  oid not null,
+   dict_comment    text
+);
+
+The dict_id and dict_name
+serve as unique identifiers for the dictionary.
+The meaning of the dict_initoption varies among dictionaries,
+but for the built-in Snowball dictionaries
+it specifies a file from which stop words should be read.
+The dict_comment is a human-readable description of the dictionary.
+The other fields are internal function identifiers
+useful only to developers trying to implement their own dictionaries.
+
+The argument named dictionary
+in each of the following functions
+should be either an integer dict_id or a textual dict_name
+identifying which dictionary should be used for the operation;
+if omitted then the current dictionary is used.
+
+
+
+ CREATE FUNCTION set_curdict(dictionary) RETURNS VOID
+
+ Selects a current dictionary for use by functions
+ that do not select a dictionary explicitly.
+
+ CREATE FUNCTION lexize(

+ [ dictionary, ] word text)
+ RETURNS TEXT[]
+
+ Reduces a single word to a lexeme.
+ Note that lexemes are arrays of zero or more strings,
+ since in some languages there might be several base words
+ from which an inflected form could arise.
+
+
+Ranking
+
+Ranking attempts to measure how relevant documents are to particular queries
+by inspecting the number of times each search word appears in the document,
+and whether different search terms occur near each other.
+Note that this information is only available in unstripped vectors —
+ranking functions will only return a useful result
+for a tsvector which still has position information!
+
+Both of these ranking functions
+take an integer normalization option
+that specifies whether a document's length should impact its rank.
+This is often desirable,
+since a hundred-word document with five instances of a search word
+is probably more relevant than a thousand-word document with five instances.
+The option can have the values:
+
+
+0 (the default) ignores document length.
+1 divides the rank by the logarithm of the length.
+2 divides the rank by the length itself.
+
+
+The two ranking functions currently available are:
+
+
+
+ CREATE FUNCTION rank(

+  [ weights float4[], ]
+  vector tsvector, query tsquery,
+  [ normalization int4 ]

+  ) RETURNS float4
+
+ This is the ranking function from the old version of OpenFTS,
+ and offers the ability to weight word instances more heavily
+ depending on how you have classified them.
+ The weights specify how heavily to weight each category of word:
+ 
+>{D-weight, A-weight, B-weight, C-weight}
+ If no weights are provided, then these defaults are used:
+ {0.1, 0.2, 0.4, 1.0}
+ Often weights are used to mark words from special areas of the document,
+ like the title or an initial abstract,
+ and make them more or less important than words in the document body.
+
+ CREATE FUNCTION rank_cd(

+  [ K int4, ]
+  vector tsvector, query tsquery,
+  [ normalization int4 ]

+  ) RETURNS float4
+
+ This function computes the cover density ranking
+ for the given document vector and query,
+ as described in Clarke, Cormack, and Tudhope's
+ “
+>Relevance Ranking for One to Three Term Queries”
+ in the 1999 Information Processing and Management.
+ The value K is one of the values from their formula,
+ and defaults to K=4.
+ The examples in their paper K=16;
+ we can roughly describe the term
+ as stating how far apart two search terms can fall
+ before the formula begins penalizing them for lack of proximity.
+
+
+Headlines
+
+
+
+ CREATE FUNCTION headline(

+  [ id int4, | ts_name text, ]
+  document text, query tsquery,
+  [ options text ]

+  ) RETURNS text
+
+ Every form of the the headline() function
+ accepts a document along with a query,
+ and returns one or more ellipse-separated excerpts from the document
+ in which terms from the query are highlighted.
+ The configuration with which to parse the document
+ can be specified by either its id or ts_name;
+ if none is specified that the current configuration is used instead.
+ 
+ An options string if provided should be a comma-separated list
+ of one or more ‘option=value’ pairs.
+ The available options are:
+ 
+  StartSel, StopSel —
+   the strings with which query words appearing in the document
+   should be delimited to distinguish them from other excerpted words.
+  MaxWords, MinWords —
+   limits on the shortest and longest headlines you will accept.
+  ShortWord —
+   this prevents your headline from beginning or ending
+   with a word which has this many characters or less.
+   The default value of 3 should eliminate most English
+   conjunctions and articles.
+ 
+ Any unspecified options receive these defaults:
+ 
+StartSel=<b>, StopSel=</b>, MaxWords=35, MinWords=15, ShortWord=3
+ 
+
+
+
+


diff --git a/contrib/tsearch2/expected/tsearch2.out b/contrib/tsearch2/expected/tsearch2.out

new file mode 100644 (file)

index 0000000..a842c5b


--- /dev/null
+++ b/contrib/tsearch2/expected/tsearch2.out
@@ -0,0 +1,2055 @@
+--
+-- first, define the datatype.  Turn off echoing so that expected file
+-- does not depend on contents of seg.sql.
+--
+\set ECHO none
+psql:tsearch2.sql:13: NOTICE:  CREATE TABLE / PRIMARY KEY will create implicit index 'pg_ts_dict_pkey' for table 'pg_ts_dict'
+psql:tsearch2.sql:145: NOTICE:  CREATE TABLE / PRIMARY KEY will create implicit index 'pg_ts_parser_pkey' for table 'pg_ts_parser'
+psql:tsearch2.sql:244: NOTICE:  CREATE TABLE / PRIMARY KEY will create implicit index 'pg_ts_cfg_pkey' for table 'pg_ts_cfg'
+psql:tsearch2.sql:251: NOTICE:  CREATE TABLE / PRIMARY KEY will create implicit index 'pg_ts_cfgmap_pkey' for table 'pg_ts_cfgmap'
+psql:tsearch2.sql:339: NOTICE:  ProcedureCreate: type tsvector is not yet defined
+psql:tsearch2.sql:344: NOTICE:  Argument type "tsvector" is only a shell
+psql:tsearch2.sql:398: NOTICE:  ProcedureCreate: type tsquery is not yet defined
+psql:tsearch2.sql:403: NOTICE:  Argument type "tsquery" is only a shell
+psql:tsearch2.sql:545: NOTICE:  ProcedureCreate: type gtsvector is not yet defined
+psql:tsearch2.sql:550: NOTICE:  Argument type "gtsvector" is only a shell
+--tsvector
+SELECT '1'::tsvector;
+ tsvector 
+----------
+ '1'
+(1 row)
+
+SELECT '1 '::tsvector;
+ tsvector 
+----------
+ '1'
+(1 row)
+
+SELECT ' 1'::tsvector;
+ tsvector 
+----------
+ '1'
+(1 row)
+
+SELECT ' 1 '::tsvector;
+ tsvector 
+----------
+ '1'
+(1 row)
+
+SELECT '1 2'::tsvector;
+ tsvector 
+----------
+ '1' '2'
+(1 row)
+
+SELECT '\'1 2\''::tsvector;
+ tsvector 
+----------
+ '1 2'
+(1 row)
+
+SELECT '\'1 \\\'2\''::tsvector;
+ tsvector 
+----------
+ '1 \'2'
+(1 row)
+
+SELECT '\'1 \\\'2\'3'::tsvector;
+  tsvector   
+-------------
+ '3' '1 \'2'
+(1 row)
+
+SELECT '\'1 \\\'2\' 3'::tsvector;
+  tsvector   
+-------------
+ '3' '1 \'2'
+(1 row)
+
+SELECT '\'1 \\\'2\' \' 3\' 4 '::tsvector;
+     tsvector     
+------------------
+ '4' ' 3' '1 \'2'
+(1 row)
+
+select '\'w\':4A,3B,2C,1D,5 a:8';
+       ?column?        
+-----------------------
+ 'w':4A,3B,2C,1D,5 a:8
+(1 row)
+
+select 'a:3A b:2a'::tsvector || 'ba:1234 a:1B';
+          ?column?          
+----------------------------
+ 'a':3A,4B 'b':2A 'ba':1237
+(1 row)
+
+select setweight('w:12B w:13* w:12,5,6 a:1,3* a:3 w asd:1dc asd zxc:81,567,222A'::tsvector, 'c');
+                        setweight                         
+----------------------------------------------------------
+ 'a':1C,3C 'w':5C,6C,12C,13C 'asd':1C 'zxc':81C,222C,567C
+(1 row)
+
+select strip('w:12B w:13* w:12,5,6 a:1,3* a:3 w asd:1dc asd'::tsvector);
+     strip     
+---------------
+ 'a' 'w' 'asd'
+(1 row)
+
+--tsquery
+SELECT '1'::tsquery;
+ tsquery 
+---------
+ '1'
+(1 row)
+
+SELECT '1 '::tsquery;
+ tsquery 
+---------
+ '1'
+(1 row)
+
+SELECT ' 1'::tsquery;
+ tsquery 
+---------
+ '1'
+(1 row)
+
+SELECT ' 1 '::tsquery;
+ tsquery 
+---------
+ '1'
+(1 row)
+
+SELECT '\'1 2\''::tsquery;
+ tsquery 
+---------
+ '1 2'
+(1 row)
+
+SELECT '\'1 \\\'2\''::tsquery;
+ tsquery 
+---------
+ '1 \'2'
+(1 row)
+
+SELECT '!1'::tsquery;
+ tsquery 
+---------
+ !'1'
+(1 row)
+
+SELECT '1|2'::tsquery;
+  tsquery  
+-----------
+ '1' | '2'
+(1 row)
+
+SELECT '1|!2'::tsquery;
+  tsquery   
+------------
+ '1' | !'2'
+(1 row)
+
+SELECT '!1|2'::tsquery;
+  tsquery   
+------------
+ !'1' | '2'
+(1 row)
+
+SELECT '!1|!2'::tsquery;
+   tsquery   
+-------------
+ !'1' | !'2'
+(1 row)
+
+SELECT '!(!1|!2)'::tsquery;
+     tsquery      
+------------------
+ !( !'1' | !'2' )
+(1 row)
+
+SELECT '!(!1|2)'::tsquery;
+     tsquery     
+-----------------
+ !( !'1' | '2' )
+(1 row)
+
+SELECT '!(1|!2)'::tsquery;
+     tsquery     
+-----------------
+ !( '1' | !'2' )
+(1 row)
+
+SELECT '!(1|2)'::tsquery;
+    tsquery     
+----------------
+ !( '1' | '2' )
+(1 row)
+
+SELECT '1&2'::tsquery;
+  tsquery  
+-----------
+ '1' & '2'
+(1 row)
+
+SELECT '!1&2'::tsquery;
+  tsquery   
+------------
+ !'1' & '2'
+(1 row)
+
+SELECT '1&!2'::tsquery;
+  tsquery   
+------------
+ '1' & !'2'
+(1 row)
+
+SELECT '!1&!2'::tsquery;
+   tsquery   
+-------------
+ !'1' & !'2'
+(1 row)
+
+SELECT '(1&2)'::tsquery;
+  tsquery  
+-----------
+ '1' & '2'
+(1 row)
+
+SELECT '1&(2)'::tsquery;
+  tsquery  
+-----------
+ '1' & '2'
+(1 row)
+
+SELECT '!(1)&2'::tsquery;
+  tsquery   
+------------
+ !'1' & '2'
+(1 row)
+
+SELECT '!(1&2)'::tsquery;
+    tsquery     
+----------------
+ !( '1' & '2' )
+(1 row)
+
+SELECT '1|2&3'::tsquery;
+     tsquery     
+-----------------
+ '1' | '2' & '3'
+(1 row)
+
+SELECT '1|(2&3)'::tsquery;
+     tsquery     
+-----------------
+ '1' | '2' & '3'
+(1 row)
+
+SELECT '(1|2)&3'::tsquery;
+       tsquery       
+---------------------
+ ( '1' | '2' ) & '3'
+(1 row)
+
+SELECT '1|2&!3'::tsquery;
+     tsquery      
+------------------
+ '1' | '2' & !'3'
+(1 row)
+
+SELECT '1|!2&3'::tsquery;
+     tsquery      
+------------------
+ '1' | !'2' & '3'
+(1 row)
+
+SELECT '!1|2&3'::tsquery;
+     tsquery      
+------------------
+ !'1' | '2' & '3'
+(1 row)
+
+SELECT '!1|(2&3)'::tsquery;
+     tsquery      
+------------------
+ !'1' | '2' & '3'
+(1 row)
+
+SELECT '!(1|2)&3'::tsquery;
+       tsquery        
+----------------------
+ !( '1' | '2' ) & '3'
+(1 row)
+
+SELECT '(!1|2)&3'::tsquery;
+       tsquery        
+----------------------
+ ( !'1' | '2' ) & '3'
+(1 row)
+
+SELECT '1|(2|(4|(5|6)))'::tsquery;
+                 tsquery                 
+-----------------------------------------
+ '1' | ( '2' | ( '4' | ( '5' | '6' ) ) )
+(1 row)
+
+SELECT '1|2|4|5|6'::tsquery;
+                 tsquery                 
+-----------------------------------------
+ ( ( ( '1' | '2' ) | '4' ) | '5' ) | '6'
+(1 row)
+
+SELECT '1&(2&(4&(5&6)))'::tsquery;
+           tsquery           
+-----------------------------
+ '1' & '2' & '4' & '5' & '6'
+(1 row)
+
+SELECT '1&2&4&5&6'::tsquery;
+           tsquery           
+-----------------------------
+ '1' & '2' & '4' & '5' & '6'
+(1 row)
+
+SELECT '1&(2&(4&(5|6)))'::tsquery;
+             tsquery             
+---------------------------------
+ '1' & '2' & '4' & ( '5' | '6' )
+(1 row)
+
+SELECT '1&(2&(4&(5|!6)))'::tsquery;
+             tsquery              
+----------------------------------
+ '1' & '2' & '4' & ( '5' | !'6' )
+(1 row)
+
+SELECT '1&(\'2\'&(\' 4\'&(\\|5 | \'6 \\\' !|&\')))'::tsquery;
+                 tsquery                  
+------------------------------------------
+ '1' & '2' & ' 4' & ( '|5' | '6 \' !|&' )
+(1 row)
+
+SELECT '\'the wether\':dc & \' sKies \':BC & a:d b:a';
+                 ?column?                 
+------------------------------------------
+ 'the wether':dc & ' sKies ':BC & a:d b:a
+(1 row)
+
+select lexize('simple', 'ASD56 hsdkf');
+     lexize      
+-----------------
+ {"asd56 hsdkf"}
+(1 row)
+
+select lexize('en_stem', 'SKIES Problems identity');
+          lexize          
+--------------------------
+ {"skies problems ident"}
+(1 row)
+
+select * from token_type('default');
+ tokid |    alias     |               descr               
+-------+--------------+-----------------------------------
+     1 | lword        | Latin word
+     2 | nlword       | Non-latin word
+     3 | word         | Word
+     4 | email        | Email
+     5 | url          | URL
+     6 | host         | Host
+     7 | sfloat       | Scientific notation
+     8 | version      | VERSION
+     9 | part_hword   | Part of hyphenated word
+    10 | nlpart_hword | Non-latin part of hyphenated word
+    11 | lpart_hword  | Latin part of hyphenated word
+    12 | blank        | Space symbols
+    13 | tag          | HTML Tag
+    14 | http         | HTTP head
+    15 | hword        | Hyphenated word
+    16 | lhword       | Latin hyphenated word
+    17 | nlhword      | Non-latin hyphenated word
+    18 | uri          | URI
+    19 | file         | File or path name
+    20 | float        | Decimal notation
+    21 | int          | Signed integer
+    22 | uint         | Unsigned integer
+    23 | entity       | HTML Entity
+(23 rows)
+
+select * from parse('default', '345 [email protected] \' http://www.com/ http://aew.werc.ewr/?ad=qwe&dw 1aew.werc.ewr/?ad=qwe&dw 2aew.werc.ewr http://3aew.werc.ewr/?ad=qwe&dw http://4aew.werc.ewr http://5aew.werc.ewr:8100/?  ad=qwe&dw 6aew.werc.ewr:8100/?ad=qwe&dw 7aew.werc.ewr:8100/?ad=qwe&dw=%20%32 +4.0e-10 qwe qwe qwqwe 234.435 455 5.005 [email protected] qwe-wer asdf qwer jf sdjk ewr1> ewri2 ">
+/usr/local/fff /awdf/dwqe/4325 rewt/ewr wefjn /wqe-324/ewr gist.h gist.h.c gist.c. readline 4.2 4.2. 4.2, readline-4.2 readline-4.2. 234 
+ wow  < jqw <> qwerty');
+ tokid |                token                 
+-------+--------------------------------------
+    22 | 345
+    12 |  
+     4 | [email protected]
+    12 |  
+    12 | '
+    12 |  
+    14 | http://
+     6 | www.com
+    12 | /
+    12 |  
+    14 | http://
+     5 | aew.werc.ewr/?ad=qwe&dw
+     6 | aew.werc.ewr
+    18 | /?ad=qwe&dw
+    12 |  
+     5 | 1aew.werc.ewr/?ad=qwe&dw
+     6 | 1aew.werc.ewr
+    18 | /?ad=qwe&dw
+    12 |  
+     6 | 2aew.werc.ewr
+    12 |  
+    14 | http://
+     5 | 3aew.werc.ewr/?ad=qwe&dw
+     6 | 3aew.werc.ewr
+    18 | /?ad=qwe&dw
+    12 |  
+    14 | http://
+     6 | 4aew.werc.ewr
+    12 |  
+    14 | http://
+     5 | 5aew.werc.ewr:8100/?
+     6 | 5aew.werc.ewr
+    18 | :8100/?
+    12 |   
+     1 | ad
+    12 | =
+     1 | qwe
+    12 | &
+     1 | dw
+    12 |  
+     5 | 6aew.werc.ewr:8100/?ad=qwe&dw
+     6 | 6aew.werc.ewr
+    18 | :8100/?ad=qwe&dw
+    12 |  
+     5 | 7aew.werc.ewr:8100/?ad=qwe&dw=%20%32
+     6 | 7aew.werc.ewr
+    18 | :8100/?ad=qwe&dw=%20%32
+    12 |  
+     7 | +4.0e-10
+    12 |  
+     1 | qwe
+    12 |  
+     1 | qwe
+    12 |  
+     1 | qwqwe
+    12 |  
+    20 | 234.435
+    12 |  
+    22 | 455
+    12 |  
+    20 | 5.005
+    12 |  
+     4 | [email protected]
+    12 |  
+    16 | qwe-wer
+    11 | qwe
+    12 | -
+    11 | wer
+    12 |  
+     1 | asdf
+    12 |  
+    13 |  
+     1 | qwer
+    12 |  
+     1 | jf
+    12 |  
+     1 | sdjk
+    13 |  
+    12 |  
+     3 | ewr1
+    12 | >
+    12 |  
+     3 | ewri2
+    12 |  
+    13 |  
+    12 | 
+
+    19 | /usr/local/fff
+    12 |  
+    19 | /awdf/dwqe/4325
+    12 |  
+    19 | rewt/ewr
+    12 |  
+     1 | wefjn
+    12 |  
+    19 | /wqe-324/ewr
+    12 |  
+     6 | gist.h
+    12 |  
+     6 | gist.h.c
+    12 |  
+     6 | gist.c
+    12 | .
+    12 |  
+     1 | readline
+    12 |  
+    20 | 4.2
+    12 |  
+    20 | 4.2
+    12 | .
+    12 |  
+    20 | 4.2
+    12 | ,
+    12 |  
+    15 | readline-4
+    11 | readline
+    12 | -
+    20 | 4.2
+    12 |  
+    15 | readline-4
+    11 | readline
+    12 | -
+    20 | 4.2
+    12 | .
+    12 |  
+    22 | 234
+    12 |  
+
+    13 |  
+    12 |  
+     1 | wow
+    12 |   
+    12 | <
+    12 |  
+     1 | jqw
+    12 |  
+    12 | <
+    12 | >
+    12 |  
+     1 | qwerty
+(138 rows)
+
+SELECT to_tsvector('default', '345 [email protected] \' http://www.com/ http://aew.werc.ewr/?ad=qwe&dw 1aew.werc.ewr/?ad=qwe&dw 2aew.werc.ewr http://3aew.werc.ewr/?ad=qwe&dw http://4aew.werc.ewr http://5aew.werc.ewr:8100/?  ad=qwe&dw 6aew.werc.ewr:8100/?ad=qwe&dw 7aew.werc.ewr:8100/?ad=qwe&dw=%20%32 +4.0e-10 qwe qwe qwqwe 234.435 455 5.005 [email protected] qwe-wer asdf qwer jf sdjk ewr1> ewri2 ">
+/usr/local/fff /awdf/dwqe/4325 rewt/ewr wefjn /wqe-324/ewr gist.h gist.h.c gist.c. readline 4.2 4.2. 4.2, readline-4.2 readline-4.2. 234 
+ wow  < jqw <> qwerty');
+                                                                                                                                                                                                                                                                                                                                                                                                                                               to_tsvector                                                                                                                                                                                                                                                                                                                                                                                                                                                
+----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
+ 'ad':18 'dw':20 'jf':40 '234':62 '345':1 '4.2':53,54,55,58,61 '455':32 'jqw':64 'qwe':19,28,29,36 'wer':37 'wow':63 'asdf':38 'ewr1':42 'qwer':39 'sdjk':41 '5.005':33 'ewri2':43 'qwqwe':30 'wefjn':47 'gist.c':51 'gist.h':49 'qwerti':65 '234.435':31 ':8100/?':17 'qwe-wer':35 'readlin':52,57,60 'www.com':3 '+4.0e-10':27 'gist.h.c':50 'rewt/ewr':46 '[email protected]':2 'readline-4':56,59 '/?ad=qwe&dw':6,9,13 '/wqe-324/ewr':48 'aew.werc.ewr':5 '1aew.werc.ewr':8 '2aew.werc.ewr':10 '3aew.werc.ewr':12 '4aew.werc.ewr':14 '5aew.werc.ewr':16 '6aew.werc.ewr':22 '7aew.werc.ewr':25 '/usr/local/fff':44 '/awdf/dwqe/4325':45 ':8100/?ad=qwe&dw':23 '[email protected]':34 '5aew.werc.ewr:8100/?':15 ':8100/?ad=qwe&dw=%20%32':26 'aew.werc.ewr/?ad=qwe&dw':4 '1aew.werc.ewr/?ad=qwe&dw':7 '3aew.werc.ewr/?ad=qwe&dw':11 '6aew.werc.ewr:8100/?ad=qwe&dw':21 '7aew.werc.ewr:8100/?ad=qwe&dw=%20%32':24
+(1 row)
+
+SELECT length(to_tsvector('default', '345 qw'));
+ length 
+--------
+      2
+(1 row)
+
+SELECT length(to_tsvector('default', '345 [email protected] \' http://www.com/ http://aew.werc.ewr/?ad=qwe&dw 1aew.werc.ewr/?ad=qwe&dw 2aew.werc.ewr http://3aew.werc.ewr/?ad=qwe&dw http://4aew.werc.ewr http://5aew.werc.ewr:8100/?  ad=qwe&dw 6aew.werc.ewr:8100/?ad=qwe&dw 7aew.werc.ewr:8100/?ad=qwe&dw=%20%32 +4.0e-10 qwe qwe qwqwe 234.435 455 5.005 [email protected] qwe-wer asdf qwer jf sdjk ewr1> ewri2 ">
+/usr/local/fff /awdf/dwqe/4325 rewt/ewr wefjn /wqe-324/ewr gist.h gist.h.c gist.c. readline 4.2 4.2. 4.2, readline-4.2 readline-4.2. 234 
+ wow  < jqw <> qwerty'));
+ length 
+--------
+     53
+(1 row)
+
+select to_tsquery('default', 'qwe & sKies '); 
+  to_tsquery   
+---------------
+ 'qwe' & 'sky'
+(1 row)
+
+select to_tsquery('simple', 'qwe & sKies '); 
+   to_tsquery    
+-----------------
+ 'qwe' & 'skies'
+(1 row)
+
+select to_tsquery('default', '\'the wether\':dc & \'           sKies \':BC ');
+       to_tsquery       
+------------------------
+ 'wether':CD & 'sky':BC
+(1 row)
+
+select 'a b:89  ca:23A,64b d:34c'::tsvector @@ 'd:AC & ca';
+ ?column? 
+----------
+ t
+(1 row)
+
+select 'a b:89  ca:23A,64b d:34c'::tsvector @@ 'd:AC & ca:B';
+ ?column? 
+----------
+ t
+(1 row)
+
+select 'a b:89  ca:23A,64b d:34c'::tsvector @@ 'd:AC & ca:A';
+ ?column? 
+----------
+ t
+(1 row)
+
+select 'a b:89  ca:23A,64b d:34c'::tsvector @@ 'd:AC & ca:C';
+ ?column? 
+----------
+ f
+(1 row)
+
+select 'a b:89  ca:23A,64b d:34c'::tsvector @@ 'd:AC & ca:CB';
+ ?column? 
+----------
+ t
+(1 row)
+
+CREATE TABLE test_tsvector( t text, a tsvector );
+\copy test_tsvector from 'data/test_tsearch.data'
+SELECT count(*) FROM test_tsvector WHERE a @@ 'wr|qh';
+ count 
+-------
+   158
+(1 row)
+
+SELECT count(*) FROM test_tsvector WHERE a @@ 'wr&qh';
+ count 
+-------
+    17
+(1 row)
+
+SELECT count(*) FROM test_tsvector WHERE a @@ 'eq&yt';
+ count 
+-------
+     6
+(1 row)
+
+SELECT count(*) FROM test_tsvector WHERE a @@ 'eq|yt';
+ count 
+-------
+    98
+(1 row)
+
+SELECT count(*) FROM test_tsvector WHERE a @@ '(eq&yt)|(wr&qh)';
+ count 
+-------
+    23
+(1 row)
+
+SELECT count(*) FROM test_tsvector WHERE a @@ '(eq|yt)&(wr|qh)';
+ count 
+-------
+    39
+(1 row)
+
+create index wowidx on test_tsvector using gist (a);
+set enable_seqscan=off;
+SELECT count(*) FROM test_tsvector WHERE a @@ 'wr|qh';
+ count 
+-------
+   158
+(1 row)
+
+SELECT count(*) FROM test_tsvector WHERE a @@ 'wr&qh';
+ count 
+-------
+    17
+(1 row)
+
+SELECT count(*) FROM test_tsvector WHERE a @@ 'eq&yt';
+ count 
+-------
+     6
+(1 row)
+
+SELECT count(*) FROM test_tsvector WHERE a @@ 'eq|yt';
+ count 
+-------
+    98
+(1 row)
+
+SELECT count(*) FROM test_tsvector WHERE a @@ '(eq&yt)|(wr&qh)';
+ count 
+-------
+    23
+(1 row)
+
+SELECT count(*) FROM test_tsvector WHERE a @@ '(eq|yt)&(wr|qh)';
+ count 
+-------
+    39
+(1 row)
+
+select set_curcfg('default');
+ set_curcfg 
+------------
+ 
+(1 row)
+
+CREATE TRIGGER tsvectorupdate
+BEFORE UPDATE OR INSERT ON test_tsvector
+FOR EACH ROW EXECUTE PROCEDURE tsearch2(a, t);
+SELECT count(*) FROM test_tsvector WHERE a @@ to_tsquery('345&qwerty');
+ count 
+-------
+     0
+(1 row)
+
+INSERT INTO test_tsvector (t) VALUES ('345 qwerty');
+SELECT count(*) FROM test_tsvector WHERE a @@ to_tsquery('345&qwerty');
+ count 
+-------
+     1
+(1 row)
+
+UPDATE test_tsvector SET t = null WHERE t = '345 qwerty';
+SELECT count(*) FROM test_tsvector WHERE a @@ to_tsquery('345&qwerty');
+ count 
+-------
+     0
+(1 row)
+
+drop trigger tsvectorupdate on test_tsvector;
+create function wow(text) returns text as 'select $1 || \' copyright\'; ' language sql;
+create trigger tsvectorupdate before update or insert on test_tsvector
+for each row execute procedure tsearch2(a, wow, t);
+insert into test_tsvector (t) values ('345 qwerty');
+select count(*) FROM test_tsvector WHERE a @@ to_tsquery('345&qwerty');
+ count 
+-------
+     1
+(1 row)
+
+select count(*) FROM test_tsvector WHERE a @@ to_tsquery('copyright');
+ count 
+-------
+     1
+(1 row)
+
+select rank(' a:1 s:2C d g'::tsvector, 'a | s');
+ rank 
+------
+ 0.28
+(1 row)
+
+select rank(' a:1 s:2B d g'::tsvector, 'a | s');
+ rank 
+------
+ 0.46
+(1 row)
+
+select rank(' a:1 s:2 d g'::tsvector, 'a | s');
+ rank 
+------
+ 0.19
+(1 row)
+
+select rank(' a:1 s:2C d g'::tsvector, 'a & s');
+   rank   
+----------
+ 0.140153
+(1 row)
+
+select rank(' a:1 s:2B d g'::tsvector, 'a & s');
+   rank   
+----------
+ 0.198206
+(1 row)
+
+select rank(' a:1 s:2 d g'::tsvector, 'a & s');
+   rank    
+-----------
+ 0.0991032
+(1 row)
+
+insert into test_tsvector (t) values ('foo bar foo the over foo qq bar');
+select * from stat('select a from test_tsvector') order by ndoc desc, nentry desc, word;
+   word    | ndoc | nentry 
+-----------+------+--------
+ qq        |  109 |    109
+ qt        |  102 |    102
+ qe        |  100 |    100
+ qh        |   98 |     98
+ qw        |   98 |     98
+ qa        |   97 |     97
+ ql        |   94 |     94
+ qs        |   94 |     94
+ qi        |   92 |     92
+ qr        |   92 |     92
+ qj        |   91 |     91
+ qd        |   87 |     87
+ qz        |   87 |     87
+ qc        |   86 |     86
+ qn        |   86 |     86
+ qv        |   85 |     85
+ qo        |   84 |     84
+ qy        |   84 |     84
+ wp        |   84 |     84
+ qf        |   81 |     81
+ qk        |   80 |     80
+ wt        |   80 |     80
+ qu        |   79 |     79
+ qg        |   78 |     78
+ wb        |   78 |     78
+ qx        |   77 |     77
+ wr        |   77 |     77
+ ws        |   73 |     73
+ wy        |   73 |     73
+ wa        |   72 |     72
+ wf        |   70 |     70
+ wg        |   70 |     70
+ wi        |   70 |     70
+ wu        |   70 |     70
+ wc        |   69 |     69
+ wj        |   69 |     69
+ qp        |   68 |     68
+ wh        |   68 |     68
+ wv        |   68 |     68
+ qb        |   66 |     66
+ eu        |   65 |     65
+ we        |   65 |     65
+ wl        |   65 |     65
+ wq        |   65 |     65
+ wk        |   64 |     64
+ ee        |   63 |     63
+ eo        |   63 |     63
+ qm        |   63 |     63
+ wn        |   63 |     63
+ ef        |   62 |     62
+ eh        |   62 |     62
+ ex        |   62 |     62
+ re        |   62 |     62
+ rl        |   62 |     62
+ rr        |   62 |     62
+ eb        |   61 |     61
+ ek        |   61 |     61
+ ww        |   61 |     61
+ ea        |   60 |     60
+ ei        |   60 |     60
+ em        |   60 |     60
+ eq        |   60 |     60
+ ew        |   60 |     60
+ ro        |   60 |     60
+ rw        |   60 |     60
+ tl        |   60 |     60
+ eg        |   59 |     59
+ en        |   59 |     59
+ ez        |   59 |     59
+ rj        |   59 |     59
+ ry        |   59 |     59
+ tw        |   59 |     59
+ tx        |   59 |     59
+ ej        |   58 |     58
+ es        |   58 |     58
+ ra        |   58 |     58
+ rd        |   58 |     58
+ rg        |   58 |     58
+ rx        |   58 |     58
+ tb        |   58 |     58
+ wd        |   58 |     58
+ ed        |   57 |     57
+ tc        |   57 |     57
+ wx        |   57 |     57
+ er        |   56 |     56
+ wm        |   56 |     56
+ wo        |   56 |     56
+ yw        |   56 |     56
+ ep        |   55 |     55
+ rk        |   55 |     55
+ rp        |   55 |     55
+ rz        |   55 |     55
+ ta        |   55 |     55
+ rq        |   54 |     54
+ yn        |   54 |     54
+ ec        |   53 |     53
+ el        |   53 |     53
+ ru        |   53 |     53
+ rv        |   53 |     53
+ tz        |   53 |     53
+ un        |   53 |     53
+ wz        |   53 |     53
+ ys        |   53 |     53
+ oe        |   52 |     52
+ tn        |   52 |     52
+ tq        |   52 |     52
+ ty        |   52 |     52
+ uq        |   52 |     52
+ yg        |   52 |     52
+ ym        |   52 |     52
+ oi        |   51 |     51
+ to        |   51 |     51
+ yi        |   51 |     51
+ pn        |   50 |     50
+ rb        |   50 |     50
+ ri        |   50 |     50
+ rn        |   50 |     50
+ ti        |   50 |     50
+ tv        |   50 |     50
+ um        |   50 |     50
+ ut        |   50 |     50
+ ya        |   50 |     50
+ et        |   49 |     49
+ ix        |   49 |     49
+ ox        |   49 |     49
+ q3        |   49 |     49
+ yf        |   49 |     49
+ yl        |   49 |     49
+ yo        |   49 |     49
+ yr        |   49 |     49
+ ev        |   48 |     48
+ ey        |   48 |     48
+ ot        |   48 |     48
+ rc        |   48 |     48
+ rm        |   48 |     48
+ th        |   48 |     48
+ uo        |   48 |     48
+ ia        |   47 |     47
+ q1        |   47 |     47
+ rh        |   47 |     47
+ yq        |   47 |     47
+ yz        |   47 |     47
+ av        |   46 |     46
+ im        |   46 |     46
+ os        |   46 |     46
+ tk        |   46 |     46
+ yy        |   46 |     46
+ ir        |   45 |     45
+ iv        |   45 |     45
+ iw        |   45 |     45
+ oj        |   45 |     45
+ pl        |   45 |     45
+ pv        |   45 |     45
+ te        |   45 |     45
+ tu        |   45 |     45
+ uv        |   45 |     45
+ ux        |   45 |     45
+ yd        |   45 |     45
+ yx        |   45 |     45
+ ij        |   44 |     44
+ pa        |   44 |     44
+ se        |   44 |     44
+ tg        |   44 |     44
+ ue        |   44 |     44
+ yb        |   44 |     44
+ yt        |   44 |     44
+ if        |   43 |     43
+ ik        |   43 |     43
+ in        |   43 |     43
+ ph        |   43 |     43
+ pj        |   43 |     43
+ q5        |   43 |     43
+ rt        |   43 |     43
+ ub        |   43 |     43
+ ud        |   43 |     43
+ uh        |   43 |     43
+ uj        |   43 |     43
+ w7        |   43 |     43
+ ye        |   43 |     43
+ yv        |   43 |     43
+ db        |   42 |     42
+ do        |   42 |     42
+ id        |   42 |     42
+ ie        |   42 |     42
+ ii        |   42 |     42
+ of        |   42 |     42
+ pr        |   42 |     42
+ q4        |   42 |     42
+ rf        |   42 |     42
+ td        |   42 |     42
+ uk        |   42 |     42
+ up        |   42 |     42
+ yh        |   42 |     42
+ yk        |   42 |     42
+ io        |   41 |     41
+ it        |   41 |     41
+ pb        |   41 |     41
+ q0        |   41 |     41
+ q7        |   41 |     41
+ rs        |   41 |     41
+ tj        |   41 |     41
+ ur        |   41 |     41
+ ig        |   40 |     40
+ iu        |   40 |     40
+ iy        |   40 |     40
+ od        |   40 |     40
+ q6        |   40 |     40
+ tt        |   40 |     40
+ ug        |   40 |     40
+ ul        |   40 |     40
+ us        |   40 |     40
+ uu        |   40 |     40
+ uz        |   40 |     40
+ ah        |   39 |     39
+ ar        |   39 |     39
+ as        |   39 |     39
+ dl        |   39 |     39
+ dt        |   39 |     39
+ hk        |   39 |     39
+ iq        |   39 |     39
+ is        |   39 |     39
+ oc        |   39 |     39
+ ov        |   39 |     39
+ oy        |   39 |     39
+ uf        |   39 |     39
+ ui        |   39 |     39
+ aa        |   38 |     38
+ ad        |   38 |     38
+ fh        |   38 |     38
+ gm        |   38 |     38
+ ic        |   38 |     38
+ jd        |   38 |     38
+ om        |   38 |     38
+ or        |   38 |     38
+ oz        |   38 |     38
+ pm        |   38 |     38
+ q8        |   38 |     38
+ sf        |   38 |     38
+ sm        |   38 |     38
+ sv        |   38 |     38
+ uc        |   38 |     38
+ ak        |   37 |     37
+ aq        |   37 |     37
+ di        |   37 |     37
+ e4        |   37 |     37
+ fi        |   37 |     37
+ fx        |   37 |     37
+ ha        |   37 |     37
+ hp        |   37 |     37
+ ih        |   37 |     37
+ og        |   37 |     37
+ po        |   37 |     37
+ pw        |   37 |     37
+ sn        |   37 |     37
+ su        |   37 |     37
+ sw        |   37 |     37
+ w6        |   37 |     37
+ yj        |   37 |     37
+ yu        |   37 |     37
+ ag        |   36 |     36
+ am        |   36 |     36
+ at        |   36 |     36
+ e1        |   36 |     36
+ ff        |   36 |     36
+ gx        |   36 |     36
+ he        |   36 |     36
+ hj        |   36 |     36
+ ib        |   36 |     36
+ iz        |   36 |     36
+ lm        |   36 |     36
+ ok        |   36 |     36
+ pk        |   36 |     36
+ pp        |   36 |     36
+ pu        |   36 |     36
+ sp        |   36 |     36
+ tf        |   36 |     36
+ tm        |   36 |     36
+ ay        |   35 |     35
+ dy        |   35 |     35
+ fu        |   35 |     35
+ ku        |   35 |     35
+ lh        |   35 |     35
+ lq        |   35 |     35
+ o6        |   35 |     35
+ ob        |   35 |     35
+ on        |   35 |     35
+ op        |   35 |     35
+ pd        |   35 |     35
+ ps        |   35 |     35
+ si        |   35 |     35
+ sl        |   35 |     35
+ sx        |   35 |     35
+ tp        |   35 |     35
+ tr        |   35 |     35
+ w3        |   35 |     35
+ y1        |   35 |     35
+ al        |   34 |     34
+ ap        |   34 |     34
+ az        |   34 |     34
+ dc        |   34 |     34
+ dd        |   34 |     34
+ dz        |   34 |     34
+ e0        |   34 |     34
+ fj        |   34 |     34
+ fp        |   34 |     34
+ gd        |   34 |     34
+ gg        |   34 |     34
+ gk        |   34 |     34
+ go        |   34 |     34
+ ho        |   34 |     34
+ jc        |   34 |     34
+ oa        |   34 |     34
+ oh        |   34 |     34
+ oo        |   34 |     34
+ pe        |   34 |     34
+ px        |   34 |     34
+ sd        |   34 |     34
+ sq        |   34 |     34
+ sy        |   34 |     34
+ ab        |   33 |     33
+ ae        |   33 |     33
+ af        |   33 |     33
+ aw        |   33 |     33
+ e5        |   33 |     33
+ fk        |   33 |     33
+ gu        |   33 |     33
+ gy        |   33 |     33
+ hb        |   33 |     33
+ hm        |   33 |     33
+ hy        |   33 |     33
+ jl        |   33 |     33
+ jr        |   33 |     33
+ ls        |   33 |     33
+ oq        |   33 |     33
+ pt        |   33 |     33
+ sa        |   33 |     33
+ sh        |   33 |     33
+ sj        |   33 |     33
+ so        |   33 |     33
+ sz        |   33 |     33
+ t7        |   33 |     33
+ uw        |   33 |     33
+ w8        |   33 |     33
+ y0        |   33 |     33
+ yp        |   33 |     33
+ dh        |   32 |     32
+ dp        |   32 |     32
+ dq        |   32 |     32
+ e7        |   32 |     32
+ fn        |   32 |     32
+ fo        |   32 |     32
+ fr        |   32 |     32
+ ga        |   32 |     32
+ gq        |   32 |     32
+ hh        |   32 |     32
+ il        |   32 |     32
+ ip        |   32 |     32
+ jv        |   32 |     32
+ lc        |   32 |     32
+ ol        |   32 |     32
+ pc        |   32 |     32
+ q9        |   32 |     32
+ ds        |   31 |     31
+ e9        |   31 |     31
+ fd        |   31 |     31
+ fe        |   31 |     31
+ ft        |   31 |     31
+ gs        |   31 |     31
+ hl        |   31 |     31
+ hs        |   31 |     31
+ jb        |   31 |     31
+ kc        |   31 |     31
+ kw        |   31 |     31
+ mj        |   31 |     31
+ q2        |   31 |     31
+ r3        |   31 |     31
+ sb        |   31 |     31
+ sk        |   31 |     31
+ ts        |   31 |     31
+ ua        |   31 |     31
+ yc        |   31 |     31
+ zw        |   31 |     31
+ ao        |   30 |     30
+ du        |   30 |     30
+ fw        |   30 |     30
+ gj        |   30 |     30
+ hu        |   30 |     30
+ kh        |   30 |     30
+ kl        |   30 |     30
+ kv        |   30 |     30
+ ld        |   30 |     30
+ lf        |   30 |     30
+ pq        |   30 |     30
+ py        |   30 |     30
+ sc        |   30 |     30
+ sr        |   30 |     30
+ uy        |   30 |     30
+ vg        |   30 |     30
+ w2        |   30 |     30
+ xg        |   30 |     30
+ xo        |   30 |     30
+ au        |   29 |     29
+ cx        |   29 |     29
+ fv        |   29 |     29
+ gh        |   29 |     29
+ gl        |   29 |     29
+ gt        |   29 |     29
+ hw        |   29 |     29
+ ji        |   29 |     29
+ km        |   29 |     29
+ la        |   29 |     29
+ ou        |   29 |     29
+ r0        |   29 |     29
+ w0        |   29 |     29
+ y9        |   29 |     29
+ zm        |   29 |     29
+ zs        |   29 |     29
+ zy        |   29 |     29
+ ax        |   28 |     28
+ cd        |   28 |     28
+ dj        |   28 |     28
+ dn        |   28 |     28
+ dr        |   28 |     28
+ ht        |   28 |     28
+ jf        |   28 |     28
+ lo        |   28 |     28
+ lr        |   28 |     28
+ na        |   28 |     28
+ ng        |   28 |     28
+ r8        |   28 |     28
+ ss        |   28 |     28
+ xt        |   28 |     28
+ y6        |   28 |     28
+ aj        |   27 |     27
+ ca        |   27 |     27
+ cg        |   27 |     27
+ df        |   27 |     27
+ dg        |   27 |     27
+ dv        |   27 |     27
+ gc        |   27 |     27
+ gn        |   27 |     27
+ gr        |   27 |     27
+ hd        |   27 |     27
+ i8        |   27 |     27
+ jn        |   27 |     27
+ jt        |   27 |     27
+ lp        |   27 |     27
+ o9        |   27 |     27
+ ow        |   27 |     27
+ r9        |   27 |     27
+ t8        |   27 |     27
+ u5        |   27 |     27
+ w4        |   27 |     27
+ xm        |   27 |     27
+ zz        |   27 |     27
+ a2        |   26 |     26
+ ac        |   26 |     26
+ ai        |   26 |     26
+ cm        |   26 |     26
+ cu        |   26 |     26
+ cw        |   26 |     26
+ dk        |   26 |     26
+ e2        |   26 |     26
+ fc        |   26 |     26
+ fg        |   26 |     26
+ fl        |   26 |     26
+ fs        |   26 |     26
+ ge        |   26 |     26
+ gv        |   26 |     26
+ hc        |   26 |     26
+ hi        |   26 |     26
+ hx        |   26 |     26
+ jj        |   26 |     26
+ jm        |   26 |     26
+ kg        |   26 |     26
+ kk        |   26 |     26
+ kn        |   26 |     26
+ ko        |   26 |     26
+ kt        |   26 |     26
+ ln        |   26 |     26
+ mx        |   26 |     26
+ pg        |   26 |     26
+ r4        |   26 |     26
+ t6        |   26 |     26
+ u1        |   26 |     26
+ u4        |   26 |     26
+ vi        |   26 |     26
+ vr        |   26 |     26
+ w1        |   26 |     26
+ w9        |   26 |     26
+ xk        |   26 |     26
+ xs        |   26 |     26
+ zf        |   26 |     26
+ bb        |   25 |     25
+ dm        |   25 |     25
+ dw        |   25 |     25
+ e8        |   25 |     25
+ fb        |   25 |     25
+ gw        |   25 |     25
+ h8        |   25 |     25
+ hf        |   25 |     25
+ hg        |   25 |     25
+ hn        |   25 |     25
+ hv        |   25 |     25
+ i0        |   25 |     25
+ i3        |   25 |     25
+ jg        |   25 |     25
+ jo        |   25 |     25
+ jx        |   25 |     25
+ kq        |   25 |     25
+ lw        |   25 |     25
+ lx        |   25 |     25
+ o3        |   25 |     25
+ p7        |   25 |     25
+ pf        |   25 |     25
+ pi        |   25 |     25
+ pz        |   25 |     25
+ r2        |   25 |     25
+ r5        |   25 |     25
+ t9        |   25 |     25
+ u7        |   25 |     25
+ ve        |   25 |     25
+ vu        |   25 |     25
+ y5        |   25 |     25
+ y8        |   25 |     25
+ zt        |   25 |     25
+ an        |   24 |     24
+ bj        |   24 |     24
+ dx        |   24 |     24
+ fm        |   24 |     24
+ fz        |   24 |     24
+ gb        |   24 |     24
+ gi        |   24 |     24
+ gp        |   24 |     24
+ hr        |   24 |     24
+ hz        |   24 |     24
+ i5        |   24 |     24
+ jq        |   24 |     24
+ kb        |   24 |     24
+ ke        |   24 |     24
+ kf        |   24 |     24
+ kp        |   24 |     24
+ lv        |   24 |     24
+ lz        |   24 |     24
+ o8        |   24 |     24
+ r1        |   24 |     24
+ s7        |   24 |     24
+ sg        |   24 |     24
+ u3        |   24 |     24
+ vj        |   24 |     24
+ vt        |   24 |     24
+ w5        |   24 |     24
+ zj        |   24 |     24
+ be        |   23 |     23
+ bi        |   23 |     23
+ bn        |   23 |     23
+ cn        |   23 |     23
+ cy        |   23 |     23
+ da        |   23 |     23
+ e6        |   23 |     23
+ fa        |   23 |     23
+ js        |   23 |     23
+ ki        |   23 |     23
+ kz        |   23 |     23
+ li        |   23 |     23
+ mt        |   23 |     23
+ mz        |   23 |     23
+ nu        |   23 |     23
+ o2        |   23 |     23
+ p5        |   23 |     23
+ p8        |   23 |     23
+ r7        |   23 |     23
+ t0        |   23 |     23
+ t1        |   23 |     23
+ t3        |   23 |     23
+ vm        |   23 |     23
+ xh        |   23 |     23
+ xx        |   23 |     23
+ zp        |   23 |     23
+ zr        |   23 |     23
+ a3        |   22 |     22
+ bg        |   22 |     22
+ de        |   22 |     22
+ e3        |   22 |     22
+ fq        |   22 |     22
+ i2        |   22 |     22
+ i7        |   22 |     22
+ ja        |   22 |     22
+ jk        |   22 |     22
+ jy        |   22 |     22
+ kr        |   22 |     22
+ kx        |   22 |     22
+ ly        |   22 |     22
+ nb        |   22 |     22
+ nh        |   22 |     22
+ ns        |   22 |     22
+ s3        |   22 |     22
+ u2        |   22 |     22
+ vn        |   22 |     22
+ xe        |   22 |     22
+ y4        |   22 |     22
+ zh        |   22 |     22
+ zo        |   22 |     22
+ zq        |   22 |     22
+ a1        |   21 |     21
+ bl        |   21 |     21
+ bo        |   21 |     21
+ cb        |   21 |     21
+ ch        |   21 |     21
+ co        |   21 |     21
+ cq        |   21 |     21
+ cv        |   21 |     21
+ d7        |   21 |     21
+ g8        |   21 |     21
+ je        |   21 |     21
+ jp        |   21 |     21
+ jz        |   21 |     21
+ lg        |   21 |     21
+ me        |   21 |     21
+ nc        |   21 |     21
+ p4        |   21 |     21
+ st        |   21 |     21
+ vb        |   21 |     21
+ vw        |   21 |     21
+ vz        |   21 |     21
+ xj        |   21 |     21
+ xq        |   21 |     21
+ xu        |   21 |     21
+ xy        |   21 |     21
+ zb        |   21 |     21
+ bv        |   20 |     20
+ bz        |   20 |     20
+ cj        |   20 |     20
+ cp        |   20 |     20
+ cs        |   20 |     20
+ d8        |   20 |     20
+ ju        |   20 |     20
+ k0        |   20 |     20
+ ks        |   20 |     20
+ ky        |   20 |     20
+ l1        |   20 |     20
+ lb        |   20 |     20
+ lj        |   20 |     20
+ lu        |   20 |     20
+ nm        |   20 |     20
+ nw        |   20 |     20
+ nz        |   20 |     20
+ o7        |   20 |     20
+ p6        |   20 |     20
+ vh        |   20 |     20
+ vp        |   20 |     20
+ vs        |   20 |     20
+ xb        |   20 |     20
+ xr        |   20 |     20
+ z3        |   20 |     20
+ zv        |   20 |     20
+ bq        |   19 |     19
+ br        |   19 |     19
+ by        |   19 |     19
+ cl        |   19 |     19
+ d2        |   19 |     19
+ f1        |   19 |     19
+ f4        |   19 |     19
+ gf        |   19 |     19
+ hq        |   19 |     19
+ k9        |   19 |     19
+ ka        |   19 |     19
+ kd        |   19 |     19
+ kj        |   19 |     19
+ md        |   19 |     19
+ mi        |   19 |     19
+ ml        |   19 |     19
+ my        |   19 |     19
+ nj        |   19 |     19
+ ny        |   19 |     19
+ o1        |   19 |     19
+ s4        |   19 |     19
+ s8        |   19 |     19
+ t5        |   19 |     19
+ u0        |   19 |     19
+ xl        |   19 |     19
+ zg        |   19 |     19
+ zi        |   19 |     19
+ a5        |   18 |     18
+ b9        |   18 |     18
+ bh        |   18 |     18
+ bx        |   18 |     18
+ d3        |   18 |     18
+ fy        |   18 |     18
+ g2        |   18 |     18
+ i4        |   18 |     18
+ i6        |   18 |     18
+ i9        |   18 |     18
+ jw        |   18 |     18
+ lk        |   18 |     18
+ mb        |   18 |     18
+ mv        |   18 |     18
+ nd        |   18 |     18
+ nr        |   18 |     18
+ nt        |   18 |     18
+ t2        |   18 |     18
+ xf        |   18 |     18
+ xv        |   18 |     18
+ zc        |   18 |     18
+ zd        |   18 |     18
+ a7        |   17 |     17
+ bc        |   17 |     17
+ bd        |   17 |     17
+ ce        |   17 |     17
+ cf        |   17 |     17
+ cr        |   17 |     17
+ g9        |   17 |     17
+ j0        |   17 |     17
+ j5        |   17 |     17
+ mp        |   17 |     17
+ mr        |   17 |     17
+ mw        |   17 |     17
+ nk        |   17 |     17
+ no        |   17 |     17
+ o0        |   17 |     17
+ o4        |   17 |     17
+ s0        |   17 |     17
+ s1        |   17 |     17
+ t4        |   17 |     17
+ u9        |   17 |     17
+ vf        |   17 |     17
+ vx        |   17 |     17
+ x3        |   17 |     17
+ xi        |   17 |     17
+ xn        |   17 |     17
+ xz        |   17 |     17
+ zl        |   17 |     17
+ zn        |   17 |     17
+ a0        |   16 |     16
+ bu        |   16 |     16
+ bw        |   16 |     16
+ ci        |   16 |     16
+ ck        |   16 |     16
+ d0        |   16 |     16
+ d4        |   16 |     16
+ d6        |   16 |     16
+ f5        |   16 |     16
+ g1        |   16 |     16
+ gz        |   16 |     16
+ h4        |   16 |     16
+ jh        |   16 |     16
+ l4        |   16 |     16
+ lt        |   16 |     16
+ mg        |   16 |     16
+ mh        |   16 |     16
+ mo        |   16 |     16
+ ni        |   16 |     16
+ nl        |   16 |     16
+ nq        |   16 |     16
+ p2        |   16 |     16
+ u8        |   16 |     16
+ v9        |   16 |     16
+ vl        |   16 |     16
+ vo        |   16 |     16
+ xp        |   16 |     16
+ y3        |   16 |     16
+ y7        |   16 |     16
+ z7        |   16 |     16
+ za        |   16 |     16
+ zx        |   16 |     16
+ bf        |   15 |     15
+ bp        |   15 |     15
+ cc        |   15 |     15
+ g0        |   15 |     15
+ j2        |   15 |     15
+ j9        |   15 |     15
+ l6        |   15 |     15
+ le        |   15 |     15
+ ll        |   15 |     15
+ m8        |   15 |     15
+ ma        |   15 |     15
+ mu        |   15 |     15
+ nf        |   15 |     15
+ r6        |   15 |     15
+ s5        |   15 |     15
+ vd        |   15 |     15
+ vk        |   15 |     15
+ xa        |   15 |     15
+ xw        |   15 |     15
+ y2        |   15 |     15
+ z8        |   15 |     15
+ ze        |   15 |     15
+ zu        |   15 |     15
+ a6        |   14 |     14
+ bk        |   14 |     14
+ bt        |   14 |     14
+ c0        |   14 |     14
+ f8        |   14 |     14
+ g3        |   14 |     14
+ g4        |   14 |     14
+ g7        |   14 |     14
+ h6        |   14 |     14
+ h7        |   14 |     14
+ h9        |   14 |     14
+ i1        |   14 |     14
+ k1        |   14 |     14
+ k2        |   14 |     14
+ k6        |   14 |     14
+ k7        |   14 |     14
+ mc        |   14 |     14
+ nn        |   14 |     14
+ p9        |   14 |     14
+ u6        |   14 |     14
+ xd        |   14 |     14
+ z6        |   14 |     14
+ zk        |   14 |     14
+ a4        |   13 |     13
+ a9        |   13 |     13
+ bm        |   13 |     13
+ cz        |   13 |     13
+ f2        |   13 |     13
+ f3        |   13 |     13
+ f6        |   13 |     13
+ g6        |   13 |     13
+ h2        |   13 |     13
+ j1        |   13 |     13
+ k5        |   13 |     13
+ m1        |   13 |     13
+ mf        |   13 |     13
+ mq        |   13 |     13
+ np        |   13 |     13
+ nx        |   13 |     13
+ o5        |   13 |     13
+ p0        |   13 |     13
+ p1        |   13 |     13
+ s6        |   13 |     13
+ s9        |   13 |     13
+ v6        |   13 |     13
+ va        |   13 |     13
+ vc        |   13 |     13
+ xc        |   13 |     13
+ z0        |   13 |     13
+ c9        |   12 |     12
+ d1        |   12 |     12
+ h0        |   12 |     12
+ h1        |   12 |     12
+ j8        |   12 |     12
+ k4        |   12 |     12
+ l5        |   12 |     12
+ l9        |   12 |     12
+ m2        |   12 |     12
+ m6        |   12 |     12
+ m9        |   12 |     12
+ n7        |   12 |     12
+ nv        |   12 |     12
+ p3        |   12 |     12
+ vq        |   12 |     12
+ vy        |   12 |     12
+ x1        |   12 |     12
+ x2        |   12 |     12
+ z5        |   12 |     12
+ c1        |   11 |     11
+ c3        |   11 |     11
+ ct        |   11 |     11
+ f9        |   11 |     11
+ g5        |   11 |     11
+ j6        |   11 |     11
+ l8        |   11 |     11
+ n1        |   11 |     11
+ v7        |   11 |     11
+ vv        |   11 |     11
+ x5        |   11 |     11
+ x8        |   11 |     11
+ z2        |   11 |     11
+ b0        |   10 |     10
+ b2        |   10 |     10
+ b8        |   10 |     10
+ c6        |   10 |     10
+ f0        |   10 |     10
+ f7        |   10 |     10
+ h5        |   10 |     10
+ j3        |   10 |     10
+ j4        |   10 |     10
+ j7        |   10 |     10
+ l7        |   10 |     10
+ m0        |   10 |     10
+ m7        |   10 |     10
+ mm        |   10 |     10
+ mn        |   10 |     10
+ n8        |   10 |     10
+ v1        |   10 |     10
+ x0        |   10 |     10
+ x6        |   10 |     10
+ x7        |   10 |     10
+ x9        |   10 |     10
+ a8        |    9 |      9
+ b1        |    9 |      9
+ b4        |    9 |      9
+ b5        |    9 |      9
+ b6        |    9 |      9
+ ba        |    9 |      9
+ bs        |    9 |      9
+ c5        |    9 |      9
+ d5        |    9 |      9
+ k8        |    9 |      9
+ l0        |    9 |      9
+ m5        |    9 |      9
+ mk        |    9 |      9
+ ms        |    9 |      9
+ n3        |    9 |      9
+ n4        |    9 |      9
+ n6        |    9 |      9
+ ne        |    9 |      9
+ v0        |    9 |      9
+ v3        |    9 |      9
+ v5        |    9 |      9
+ v8        |    9 |      9
+ b3        |    8 |      8
+ b7        |    8 |      8
+ c2        |    8 |      8
+ c7        |    8 |      8
+ c8        |    8 |      8
+ d9        |    8 |      8
+ k3        |    8 |      8
+ l3        |    8 |      8
+ m3        |    8 |      8
+ m4        |    8 |      8
+ n0        |    8 |      8
+ n5        |    8 |      8
+ v4        |    8 |      8
+ x4        |    8 |      8
+ z1        |    8 |      8
+ z9        |    8 |      8
+ l2        |    7 |      7
+ s2        |    7 |      7
+ z4        |    7 |      7
+ 1l        |    6 |      6
+ 1o        |    6 |      6
+ 1t        |    6 |      6
+ 2e        |    6 |      6
+ 2o        |    6 |      6
+ c4        |    6 |      6
+ h3        |    6 |      6
+ n2        |    6 |      6
+ n9        |    6 |      6
+ v2        |    6 |      6
+ 2l        |    5 |      5
+ 2u        |    5 |      5
+ 3k        |    5 |      5
+ 4p        |    5 |      5
+ 18        |    4 |      4
+ 1a        |    4 |      4
+ 1i        |    4 |      4
+ 2s        |    4 |      4
+ 3q        |    4 |      4
+ 3y        |    4 |      4
+ 5y        |    4 |      4
+ 1f        |    3 |      3
+ 1h        |    3 |      3
+ 1m        |    3 |      3
+ 1p        |    3 |      3
+ 1s        |    3 |      3
+ 1v        |    3 |      3
+ 1x        |    3 |      3
+ 27        |    3 |      3
+ 2a        |    3 |      3
+ 2b        |    3 |      3
+ 2h        |    3 |      3
+ 2n        |    3 |      3
+ 2p        |    3 |      3
+ 2v        |    3 |      3
+ 2y        |    3 |      3
+ 3d        |    3 |      3
+ 3w        |    3 |      3
+ 3z        |    3 |      3
+ 4a        |    3 |      3
+ 4d        |    3 |      3
+ 4v        |    3 |      3
+ 4z        |    3 |      3
+ 5e        |    3 |      3
+ 5i        |    3 |      3
+ 5k        |    3 |      3
+ 5o        |    3 |      3
+ 5t        |    3 |      3
+ 6b        |    3 |      3
+ 6d        |    3 |      3
+ 6o        |    3 |      3
+ 6w        |    3 |      3
+ 7a        |    3 |      3
+ 7h        |    3 |      3
+ 7r        |    3 |      3
+ 93        |    3 |      3
+ 10        |    2 |      2
+ 12        |    2 |      2
+ 15        |    2 |      2
+ 16        |    2 |      2
+ 19        |    2 |      2
+ 1b        |    2 |      2
+ 1d        |    2 |      2
+ 1g        |    2 |      2
+ 1j        |    2 |      2
+ 1n        |    2 |      2
+ 1r        |    2 |      2
+ 1u        |    2 |      2
+ 1w        |    2 |      2
+ 1y        |    2 |      2
+ 20        |    2 |      2
+ 25        |    2 |      2
+ 2d        |    2 |      2
+ 2i        |    2 |      2
+ 2j        |    2 |      2
+ 2k        |    2 |      2
+ 2q        |    2 |      2
+ 2r        |    2 |      2
+ 2t        |    2 |      2
+ 2w        |    2 |      2
+ 2z        |    2 |      2
+ 3b        |    2 |      2
+ 3f        |    2 |      2
+ 3h        |    2 |      2
+ 3o        |    2 |      2
+ 3p        |    2 |      2
+ 3r        |    2 |      2
+ 3s        |    2 |      2
+ 3v        |    2 |      2
+ 42        |    2 |      2
+ 43        |    2 |      2
+ 4f        |    2 |      2
+ 4g        |    2 |      2
+ 4h        |    2 |      2
+ 4j        |    2 |      2
+ 4m        |    2 |      2
+ 4r        |    2 |      2
+ 4s        |    2 |      2
+ 4t        |    2 |      2
+ 4u        |    2 |      2
+ 5c        |    2 |      2
+ 5f        |    2 |      2
+ 5h        |    2 |      2
+ 5p        |    2 |      2
+ 5q        |    2 |      2
+ 5z        |    2 |      2
+ 6a        |    2 |      2
+ 6h        |    2 |      2
+ 6q        |    2 |      2
+ 6r        |    2 |      2
+ 6t        |    2 |      2
+ 6y        |    2 |      2
+ 70        |    2 |      2
+ 7c        |    2 |      2
+ 7g        |    2 |      2
+ 7k        |    2 |      2
+ 7o        |    2 |      2
+ 7u        |    2 |      2
+ 8j        |    2 |      2
+ 8w        |    2 |      2
+ 9f        |    2 |      2
+ 9y        |    2 |      2
+ copyright |    2 |      2
+ foo       |    1 |      3
+ bar       |    1 |      2
+ 0e        |    1 |      1
+ 0h        |    1 |      1
+ 0p        |    1 |      1
+ 0w        |    1 |      1
+ 0z        |    1 |      1
+ 11        |    1 |      1
+ 13        |    1 |      1
+ 14        |    1 |      1
+ 17        |    1 |      1
+ 1k        |    1 |      1
+ 1q        |    1 |      1
+ 1z        |    1 |      1
+ 24        |    1 |      1
+ 26        |    1 |      1
+ 28        |    1 |      1
+ 2f        |    1 |      1
+ 30        |    1 |      1
+ 345       |    1 |      1
+ 37        |    1 |      1
+ 39        |    1 |      1
+ 3a        |    1 |      1
+ 3e        |    1 |      1
+ 3g        |    1 |      1
+ 3i        |    1 |      1
+ 3m        |    1 |      1
+ 3t        |    1 |      1
+ 3u        |    1 |      1
+ 40        |    1 |      1
+ 41        |    1 |      1
+ 44        |    1 |      1
+ 45        |    1 |      1
+ 48        |    1 |      1
+ 4b        |    1 |      1
+ 4c        |    1 |      1
+ 4i        |    1 |      1
+ 4k        |    1 |      1
+ 4n        |    1 |      1
+ 4o        |    1 |      1
+ 4q        |    1 |      1
+ 4w        |    1 |      1
+ 4y        |    1 |      1
+ 51        |    1 |      1
+ 55        |    1 |      1
+ 56        |    1 |      1
+ 5a        |    1 |      1
+ 5d        |    1 |      1
+ 5g        |    1 |      1
+ 5j        |    1 |      1
+ 5l        |    1 |      1
+ 5s        |    1 |      1
+ 5u        |    1 |      1
+ 5x        |    1 |      1
+ 64        |    1 |      1
+ 68        |    1 |      1
+ 6c        |    1 |      1
+ 6f        |    1 |      1
+ 6g        |    1 |      1
+ 6i        |    1 |      1
+ 6k        |    1 |      1
+ 6n        |    1 |      1
+ 6p        |    1 |      1
+ 6s        |    1 |      1
+ 6u        |    1 |      1
+ 6x        |    1 |      1
+ 72        |    1 |      1
+ 7f        |    1 |      1
+ 7j        |    1 |      1
+ 7n        |    1 |      1
+ 7p        |    1 |      1
+ 7w        |    1 |      1
+ 7y        |    1 |      1
+ 7z        |    1 |      1
+ 80        |    1 |      1
+ 82        |    1 |      1
+ 85        |    1 |      1
+ 8d        |    1 |      1
+ 8i        |    1 |      1
+ 8l        |    1 |      1
+ 8n        |    1 |      1
+ 8p        |    1 |      1
+ 8t        |    1 |      1
+ 8x        |    1 |      1
+ 95        |    1 |      1
+ 97        |    1 |      1
+ 9a        |    1 |      1
+ 9e        |    1 |      1
+ 9h        |    1 |      1
+ 9r        |    1 |      1
+ 9w        |    1 |      1
+ qwerti    |    1 |      1
+(1146 rows)
+
+select reset_tsearch();
+NOTICE:  TSearch cache cleaned
+ reset_tsearch 
+---------------
+ 
+(1 row)
+
+select to_tsquery('default', 'skies & books');
+   to_tsquery   
+----------------
+ 'sky' & 'book'
+(1 row)
+
+select rank_cd(to_tsvector('Erosion It took the sea a thousand years,
+A thousand years to trace
+The granite features of this cliff
+In crag and scarp and base.
+It took the sea an hour one night
+An hour of storm to place
+The sculpture of these granite seams,
+Upon a woman s face. E.  J.  Pratt  (1882 1964)
+'), to_tsquery('sea&thousand&years'));
+ rank_cd 
+---------
+     1.2
+(1 row)
+
+select rank_cd(to_tsvector('Erosion It took the sea a thousand years,
+A thousand years to trace
+The granite features of this cliff
+In crag and scarp and base.
+It took the sea an hour one night
+An hour of storm to place
+The sculpture of these granite seams,
+Upon a woman s face. E.  J.  Pratt  (1882 1964)
+'), to_tsquery('granite&sea'));
+ rank_cd  
+----------
+ 0.880303
+(1 row)
+
+select rank_cd(to_tsvector('Erosion It took the sea a thousand years,
+A thousand years to trace
+The granite features of this cliff
+In crag and scarp and base.
+It took the sea an hour one night
+An hour of storm to place
+The sculpture of these granite seams,
+Upon a woman s face. E.  J.  Pratt  (1882 1964)
+'), to_tsquery('sea'));
+ rank_cd 
+---------
+       2
+(1 row)
+
+select get_covers(to_tsvector('Erosion It took the sea a thousand years,
+A thousand years to trace
+The granite features of this cliff
+In crag and scarp and base.
+It took the sea an hour one night
+An hour of storm to place
+The sculpture of these granite seams,
+Upon a woman s face. E.  J.  Pratt  (1882 1964)
+'), to_tsquery('sea&thousand&years'));
+                                                                                             get_covers                                                                                             
+----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
+ eros took {1 sea thousand year }1 {2 thousand year trace granit featur cliff crag scarp base took sea }2 hour one night hour storm place sculptur granit seam upon woman face e j pratt 1882 1964 
+(1 row)
+
+select get_covers(to_tsvector('Erosion It took the sea a thousand years,
+A thousand years to trace
+The granite features of this cliff
+In crag and scarp and base.
+It took the sea an hour one night
+An hour of storm to place
+The sculpture of these granite seams,
+Upon a woman s face. E.  J.  Pratt  (1882 1964)
+'), to_tsquery('granite&sea'));
+                                                                                                get_covers                                                                                                
+----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
+ eros took {1 sea thousand year thousand year trace {2 granit }1 featur cliff crag scarp base took {3 sea }2 hour one night hour storm place sculptur granit }3 seam upon woman face e j pratt 1882 1964 
+(1 row)
+
+select get_covers(to_tsvector('Erosion It took the sea a thousand years,
+A thousand years to trace
+The granite features of this cliff
+In crag and scarp and base.
+It took the sea an hour one night
+An hour of storm to place
+The sculpture of these granite seams,
+Upon a woman s face. E.  J.  Pratt  (1882 1964)
+'), to_tsquery('sea'));
+                                                                                             get_covers                                                                                             
+----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
+ eros took {1 sea }1 thousand year thousand year trace granit featur cliff crag scarp base took {2 sea }2 hour one night hour storm place sculptur granit seam upon woman face e j pratt 1882 1964 
+(1 row)
+
+select headline('Erosion It took the sea a thousand years,
+A thousand years to trace
+The granite features of this cliff
+In crag and scarp and base.
+It took the sea an hour one night
+An hour of storm to place
+The sculpture of these granite seams,
+Upon a woman s face. E.  J.  Pratt  (1882 1964)
+', to_tsquery('sea&thousand&years'));
+                                                       headline                                                        
+-----------------------------------------------------------------------------------------------------------------------
+ sea a thousand years,
+A thousand years to trace
+The granite features of this cliff
+(1 row)
+
+ 
+select headline('Erosion It took the sea a thousand years,
+A thousand years to trace
+The granite features of this cliff
+In crag and scarp and base.
+It took the sea an hour one night
+An hour of storm to place
+The sculpture of these granite seams,
+Upon a woman s face. E.  J.  Pratt  (1882 1964)
+', to_tsquery('granite&sea'));
+                                           headline                                           
+----------------------------------------------------------------------------------------------
+ sea an hour one night
+An hour of storm to place
+The sculpture of these granite
+(1 row)
+
+ 
+select headline('Erosion It took the sea a thousand years,
+A thousand years to trace
+The granite features of this cliff
+In crag and scarp and base.
+It took the sea an hour one night
+An hour of storm to place
+The sculpture of these granite seams,
+Upon a woman s face. E.  J.  Pratt  (1882 1964)
+', to_tsquery('sea'));
+                                         headline                                          
+-------------------------------------------------------------------------------------------
+ sea a thousand years,
+A thousand years to trace
+The granite features of this cliff
+(1 row)
+


diff --git a/contrib/tsearch2/gendict/Makefile.IN b/contrib/tsearch2/gendict/Makefile.IN

new file mode 100644 (file)

index 0000000..c13e496


--- /dev/null
+++ b/contrib/tsearch2/gendict/Makefile.IN
@@ -0,0 +1,12 @@
+subdir = contrib/CFG_DIR
+top_builddir = ../..
+include $(top_builddir)/src/Makefile.global
+
+MODULE_big = dict_CFG_MODNAME
+OBJS = CFG_OFILE
+DATA_built = dict_CFG_MODNAME.sql
+DOCS = README.CFG_MODNAME
+PG_CPPFLAGS =
+SHLIB_LINK = ../tsearch2/libtsearch2.a
+
+include $(top_srcdir)/contrib/contrib-global.mk


diff --git a/contrib/tsearch2/gendict/README.gendict b/contrib/tsearch2/gendict/README.gendict

new file mode 100644 (file)

index 0000000..e91f1b7


--- /dev/null
+++ b/contrib/tsearch2/gendict/README.gendict
@@ -0,0 +1,130 @@
+Gendict - generate dictionary templates for contrib/tsearch2 module.
+
+This utility aims to help people creating dictionary for contrib/tsearch v2
+module. Particularly, it has built-in support for snowball stemmers.
+
+Programming API to tsearch2 dictionaries is described in tsearch v2 
+documentation.
+
+
+Prerequisities:
+
+* PostgreSQL 7.3 and above.
+
+* You need tsearch2 module sources already compiled
+
+* Rights to install contrib modules
+
+Usage:
+
+    run config.sh without parameters to see options and arguments
+
+Usage:
+./config.sh -n DICTNAME ( [ -s [ -p PREFIX ] ] | [ -c CFILES ] [ -h HFILES ] [ -i ] ) [ -v ] [ -d DIR ] [ -C COMMENT ]
+    -v - be verbose
+    -d DIR - name of directory in PGSQL_SRC/contrib (default dict_DICTNAME)
+    -C COMMENT - dictionary comment
+Generate Snowball stemmer:
+./config.sh -n DICTNAME -s [ -p PREFIX ] [ -v ] [ -d DIR ] [ -C COMMENT ]
+    -s - generate Snowball wrapper
+    -p - prefix of Snowball's function, (default DICTNAME)
+Generate template dictionary:
+./config.sh -n DICTNAME [ -c CFILES ] [ -h HFILES ] [ -i ] [ -v ] [ -d DIR ] [ -C COMMENT ]
+    -c CFILES - source files, must be placed in contrib/tsearch2/gendict directory.
+                These files will be used in Makefile.
+    -h HFILES - header files, must be placed in contrib/tsearch2/gendict directory.
+                These files will be used in Makefile and subinclude.h
+    -i - dictionary has init method
+
+
+Example 1:
+
+   Create Portuguese stemmer
+ 
+   0. cd PGSQL_SRC/contrib/tsearch2/gendict
+
+   1. Obtain stem.{c,h} files for Portuguese
+
+      wget http://snowball.tartarus.org/portuguese/stem.c
+      wget http://snowball.tartarus.org/portuguese/stem.h
+   
+   2. Create template files for Portuguese
+
+      ./config.sh -n pt -s -p portuguese -v -C'Snowball stemmer for Portuguese'
+
+      Note, that argument for -p option should be *the same* as name of stemming
+      function in stem.c (without _stem)
+
+      A bunch of files will be generated and placed in PGSQL_SRC/contrib/dict_pt
+      directory.
+
+   3. Compile and install dictionary
+
+   cd PGSQL_SRC/contrib/dict_pt
+   make
+   make install
+
+   4. Test it 
+
+   Sample portuguese words with the stemmed forms are available
+        from http://snowball.tartarus.org/portuguese/stemmer.html
+
+   createdb testdict
+   psql testdict < /usr/local/pgsql/share/contrib/tsearch2.sql
+   psql testdict < /usr/local/pgsql/share/contrib/dict_pt.sql
+   psql -d testdict -c "select lexize('pt','bobagem');"
+    lexize  
+   ---------
+    {bobag}
+   (1 row)
+
+   Here is what I have in pg_ts_dict table
+
+   psql -d testdict -c "select * from pg_ts_dict where dict_name='pt';"
+    dict_name | dict_init | dict_initoption | dict_lexize |          dict_comment           
+   -----------+-----------+-----------------+-------------+---------------------------------
+    pt        |   7177806 |                 |     7159330 | Snowball stemmer for Portuguese
+   (1 row)
+
+ 
+        Note, that you have already installed dictionary and corresponding
+   entry in tsearch configuration and you may modify it using
+   plain SQL commands, for example, specify stop words.
+
+Example 2:
+
+      a) Simple template dictionary with init method 
+
+       ./config.sh -n wow -v -i -C WOW
+
+      b) Create simple template dict (without init method):
+   ./config.sh -n wow -v  -C WOW
+
+        The same as above, but dictionary will have not init method
+
+       Dictionaries obtained in a) and b) are fully working and ready
+       for use: 
+     a) lowercase input word and remove it if it is a stop word
+     b) recognizes any word
+
+      c) Simple template dictionary with source files (with init method):
+
+       ./config.sh -n wow -v -i -c a.c -h a.h -C WOW
+
+        Source files ( a.c ) must be placed in contrib/tsearch2/gendict directory.
+        These files will be used in Makefile.
+
+        Header files ( a.h ), must be placed in contrib/tsearch2/gendict directory.
+        These files will be used in Makefile and subinclude.h
+
+      d) Simple template dictionary with source files (without init method):
+
+   ./config.sh -n wow -v  -c a.c -h a.h -C WOW
+
+   The same as above, but dictionary will have not init method
+
+       After that you have sources in PGSQL_SRC/contrib/dict_wow and
+       you may edit them to create actual dictionary.
+
+  Please, check Tsearch2 home page (http://www.sai.msu.su/~megera/postgres/gist/tsearch/V2/)
+  for additional information about "Gendict tutorial" and dictionaries.
\ No newline at end of file


diff --git a/contrib/tsearch2/gendict/config.sh b/contrib/tsearch2/gendict/config.sh

new file mode 100755 (executable)

index 0000000..26bb542


--- /dev/null
+++ b/contrib/tsearch2/gendict/config.sh
@@ -0,0 +1,183 @@
+#!/bin/sh
+
+usage () {
+   echo Usage:
+   echo $0 -n DICTNAME  \( [ -s [ -p PREFIX ] ] \| [ -c CFILES ] [ -h HFILES ] [ -i ] \) [ -v ] [ -d DIR ] [ -C COMMENT ]
+   echo '    -v - be verbose'
+   echo '    -d DIR - name of directory in PGSQL_SRL/contrib (default dict_DICTNAME)'
+   echo '    -C COMMENT - dictionary comment' 
+   echo Generate Snowball stemmer:
+   echo $0 -n DICTNAME -s [ -p PREFIX ] [ -v ] [ -d DIR ] [ -C COMMENT ]
+   echo '    -s - generate Snowball wrapper'
+   echo "    -p - prefix of Snowball's function, (default DICTNAME)" 
+   echo Generate template dictionary:
+   echo $0 -n DICTNAME [ -c CFILES ] [ -h HFILES ] [ -i ] [ -v ] [ -d DIR ] [ -C COMMENT ]
+   echo '    -c CFILES - source files, must be placed in contrib/tsearch2/gendict directory.'
+   echo '                These files will be used in Makefile.'
+   echo '    -h HFILES - header files, must be placed in contrib/tsearch2/gendict directory.'
+   echo '                These files will be used in Makefile and subinclude.h'
+   echo '    -i - dictionary has init method'
+   exit 1;
+}
+
+dictname=
+stemmode=no
+verbose=no
+cfile=
+hfile=
+dir= 
+hasinit=no
+comment=
+prefix=
+
+while getopts n:c:C:h:d:p:vis opt
+do
+   case "$opt" in
+       v) verbose=yes;;
+       s) stemmode=yes;;
+       i) hasinit=yes;;
+       n) dictname="$OPTARG";;
+       c) cfile="$OPTARG";;
+       h) hfile="$OPTARG";;
+       d) dir="$OPTARG";;
+       C) comment="$OPTARG";;
+       p) prefix="$OPTARG";;
+       \?) usage;;
+   esac
+done
+
+[ ${#dictname} -eq 0 ] && usage
+
+dictname=`echo $dictname | tr '[:upper:]' '[:lower:]'`
+
+if [ $stemmode = "yes" ] ; then 
+   [ ${#prefix} -eq 0 ] && prefix=$dictname
+   hasinit=yes
+   cfile="stem.c"
+   hfile="stem.h"
+fi 
+
+[ ${#dir}   -eq 0 ] && dir="dict_$dictname"
+
+if [ ${#comment} -eq 0 ]; then
+   comment=null
+else
+   comment="'$comment'"
+fi
+
+ofile=
+for f in $cfile
+do
+   f=` echo $f | sed 's#c$#o#'`
+   ofile="$ofile $f"
+done
+
+if [ $stemmode = "yes" ] ; then
+   ofile="$ofile dict_snowball.o"
+else
+   ofile="$ofile dict_tmpl.o"
+fi
+
+if [ $verbose = "yes" ]; then
+   echo Dictname: "'"$dictname"'"
+   echo Snowball stemmer: $stemmode
+   echo Has init method: $hasinit
+   [ $stemmode = "yes" ] && echo Function prefix: $prefix 
+   echo Source files: $cfile
+   echo Header files: $hfile
+   echo Object files: $ofile
+   echo Comment: $comment
+   echo Directory: ../../$dir
+fi
+
+
+[ $verbose = "yes" ] && echo -n 'Build directory...  '
+if [ ! -d ../../$dir ]; then
+   if ! mkdir ../../$dir ; then 
+       echo "Can't create directory ../../$dir"
+       exit 1
+   fi 
+fi
+[ $verbose = "yes" ] && echo ok
+
+
+[ $verbose = "yes" ] && echo -n 'Build Makefile...  '
+sed s#CFG_DIR#$dir# < Makefile.IN | sed s#CFG_MODNAME#$dictname# | sed "s#CFG_OFILE#$ofile#" > ../../$dir/Makefile.tmp
+if [ $stemmode = "yes" ] ; then
+   sed "s#^PG_CPPFLAGS.*\$#PG_CPPFLAGS = -I../tsearch2/snowball -I../tsearch2#" < ../../$dir/Makefile.tmp >  ../../$dir/Makefile 
+else
+   sed "s#^PG_CPPFLAGS.*\$#PG_CPPFLAGS = -I../tsearch2#" < ../../$dir/Makefile.tmp >  ../../$dir/Makefile 
+fi
+rm ../../$dir/Makefile.tmp
+[ $verbose = "yes" ] && echo ok
+
+
+[ $verbose = "yes" ] && echo -n Build dict_$dictname'.sql.in...  '
+if [ $hasinit = "yes" ]; then
+   sed s#CFG_MODNAME#$dictname# < sql.IN | sed "s#CFG_COMMENT#$comment#" | sed s#^HASINIT## | sed 's#^NOINIT.*$##' > ../../$dir/dict_$dictname.sql.in.tmp
+   if [ $stemmode = "yes" ] ; then
+       sed s#^ISSNOWBALL## < ../../$dir/dict_$dictname.sql.in.tmp | sed s#^NOSNOWBALL.*\$## > ../../$dir/dict_$dictname.sql.in
+   else
+       sed s#^NOSNOWBALL## < ../../$dir/dict_$dictname.sql.in.tmp | sed s#^ISSNOWBALL.*\$## > ../../$dir/dict_$dictname.sql.in
+   fi
+   rm ../../$dir/dict_$dictname.sql.in.tmp 
+else 
+   sed s#CFG_MODNAME#$dictname# < sql.IN | sed "s#CFG_COMMENT#$comment#" | sed s#^NOINIT## | sed 's#^HASINIT.*$##' | sed s#^NOSNOWBALL## | sed s#^ISSNOWBALL.*\$## > ../../$dir/dict_$dictname.sql.in
+fi
+[ $verbose = "yes" ] && echo ok
+
+
+
+if [ ${#cfile} -ne 0 ] || [ ${#hfile} -ne 0 ] ; then
+   [ $verbose = "yes" ] && echo -n 'Copy source and header files...  '
+   if [ ${#cfile} -ne 0 ] ; then
+       if ! cp $cfile ../../$dir ; then 
+           echo "Cant cp all or one of files: $cfile"
+           exit 1
+       fi
+   fi
+   if [ ${#hfile} -ne 0 ] ; then 
+       if ! cp $hfile ../../$dir ; then 
+               echo "Cant cp all or one of files: $hfile"
+           exit 1
+       fi
+   fi
+   [ $verbose = "yes" ] && echo ok
+fi
+
+
+[ $verbose = "yes" ] && echo -n 'Build sub-include header...  '
+echo -n > ../../$dir/subinclude.h 
+for i in $hfile
+do
+   echo "#include \"$i\"" >> ../../$dir/subinclude.h
+done
+[ $verbose = "yes" ] && echo ok
+
+
+if  [ $stemmode = "yes" ] ; then 
+   [ $verbose = "yes" ] && echo -n 'Build Snowball stemmer...  '
+   sed s#CFG_MODNAME#$dictname#g < dict_snowball.c.IN | sed s#CFG_PREFIX#$prefix#g > ../../$dir/dict_snowball.c
+else
+   [ $verbose = "yes" ] && echo -n 'Build dictinonary...  '
+   sed s#CFG_MODNAME#$dictname#g < dict_tmpl.c.IN > ../../$dir/dict_tmpl.c.tmp
+   if [ $hasinit = "yes" ]; then
+       sed s#^HASINIT## <  ../../$dir/dict_tmpl.c.tmp | sed 's#^NOINIT.*$##' > ../../$dir/dict_tmpl.c
+   else 
+       sed s#^HASINIT.*\$## <  ../../$dir/dict_tmpl.c.tmp | sed 's#^NOINIT##' > ../../$dir/dict_tmpl.c
+   fi
+   rm ../../$dir/dict_tmpl.c.tmp
+fi 
+[ $verbose = "yes" ] && echo ok
+
+
+[ $verbose = "yes" ] && echo -n "Build README.$dictname...  "
+if  [ $stemmode = "yes" ] ; then
+   echo "Autogenerated Snowball's wrapper for $prefix" > ../../$dir/README.$dictname
+else
+   echo "Autogenerated template for $dictname" > ../../$dir/README.$dictname
+fi
+[ $verbose = "yes" ] && echo ok
+
+echo All is done
+


diff --git a/contrib/tsearch2/gendict/dict_snowball.c.IN b/contrib/tsearch2/gendict/dict_snowball.c.IN

new file mode 100644 (file)

index 0000000..10ef6f1


--- /dev/null
+++ b/contrib/tsearch2/gendict/dict_snowball.c.IN
@@ -0,0 +1,52 @@
+/* 
+ * example of Snowball dictionary
+ * http://snowball.tartarus.org/ 
+ * Teodor Sigaev 
+ */
+#include 
+#include 
+
+#include "postgres.h"
+
+#include "dict.h"
+#include "common.h"
+#include "snowball/header.h"
+#include "subinclude.h"
+
+typedef struct {
+   struct SN_env *z;
+   StopList    stoplist;
+   int (*stem)(struct SN_env * z);
+} DictSnowball;
+
+
+PG_FUNCTION_INFO_V1(dinit_CFG_MODNAME);
+Datum dinit_CFG_MODNAME(PG_FUNCTION_ARGS);
+
+Datum 
+dinit_CFG_MODNAME(PG_FUNCTION_ARGS) {
+   DictSnowball    *d = (DictSnowball*)malloc( sizeof(DictSnowball) );
+
+   if ( !d )
+       elog(ERROR, "No memory");
+   memset(d,0,sizeof(DictSnowball));
+   d->stoplist.wordop=lowerstr;
+       
+   if ( !PG_ARGISNULL(0) && PG_GETARG_POINTER(0)!=NULL ) {
+       text       *in = PG_GETARG_TEXT_P(0);
+       readstoplist(in, &(d->stoplist));
+       sortstoplist(&(d->stoplist));
+       PG_FREE_IF_COPY(in, 0);
+   }
+
+   d->z = CFG_PREFIX_create_env();
+   if (!d->z) {
+       freestoplist(&(d->stoplist));
+       elog(ERROR,"No memory");
+   }
+   d->stem=CFG_PREFIX_stem;
+
+   PG_RETURN_POINTER(d);
+}
+
+


diff --git a/contrib/tsearch2/gendict/dict_tmpl.c.IN b/contrib/tsearch2/gendict/dict_tmpl.c.IN

new file mode 100644 (file)

index 0000000..10c0381


--- /dev/null
+++ b/contrib/tsearch2/gendict/dict_tmpl.c.IN
@@ -0,0 +1,64 @@
+/* 
+ * example of dictionary 
+ * Teodor Sigaev 
+ */
+#include 
+#include 
+#include 
+
+#include "postgres.h"
+
+#include "dict.h"
+#include "common.h"
+
+#include "subinclude.h"
+
+HASINIT typedef struct {
+HASINIT    StopList    stoplist;
+HASINIT } DictExample;
+
+
+HASINIT PG_FUNCTION_INFO_V1(dinit_CFG_MODNAME);
+HASINIT Datum dinit_CFG_MODNAME(PG_FUNCTION_ARGS);
+
+HASINIT Datum 
+HASINIT dinit_CFG_MODNAME(PG_FUNCTION_ARGS) {
+HASINIT    DictExample *d = (DictExample*)malloc( sizeof(DictExample) );
+HASINIT 
+HASINIT    if ( !d )
+HASINIT        elog(ERROR, "No memory");
+HASINIT    memset(d,0,sizeof(DictExample));
+HASINIT 
+HASINIT    d->stoplist.wordop=lowerstr;
+HASINIT    
+HASINIT    /* Your INIT code */
+HASINIT    
+HASINIT    if ( !PG_ARGISNULL(0) && PG_GETARG_POINTER(0)!=NULL ) {
+HASINIT        text       *in = PG_GETARG_TEXT_P(0);
+HASINIT        readstoplist(in, &(d->stoplist));
+HASINIT        sortstoplist(&(d->stoplist));
+HASINIT        PG_FREE_IF_COPY(in, 0);
+HASINIT    }
+HASINIT 
+HASINIT    PG_RETURN_POINTER(d);
+HASINIT }
+
+PG_FUNCTION_INFO_V1(dlexize_CFG_MODNAME);
+Datum dlexize_CFG_MODNAME(PG_FUNCTION_ARGS);
+Datum
+dlexize_CFG_MODNAME(PG_FUNCTION_ARGS) {
+HASINIT    DictExample *d = (DictExample*)PG_GETARG_POINTER(0);
+   char       *in = (char*)PG_GETARG_POINTER(1);
+   char *txt = pnstrdup(in, PG_GETARG_INT32(2));
+   char    **res=palloc(sizeof(char*)*2);
+
+   /* Your INIT dictionary code */
+HASINIT    if ( *txt=='\0' || searchstoplist(&(d->stoplist),txt) ) {
+HASINIT        pfree(txt);
+HASINIT        res[0]=NULL;
+HASINIT    } else 
+       res[0]=txt;
+   res[1]=NULL;
+
+   PG_RETURN_POINTER(res);
+}


diff --git a/contrib/tsearch2/gendict/sql.IN b/contrib/tsearch2/gendict/sql.IN

new file mode 100644 (file)

index 0000000..ff0d842


--- /dev/null
+++ b/contrib/tsearch2/gendict/sql.IN
@@ -0,0 +1,26 @@
+SET search_path = public;
+BEGIN;
+
+HASINIT create function dinit_CFG_MODNAME(text)
+HASINIT         returns internal
+HASINIT         as 'MODULE_PATHNAME'
+HASINIT         language 'C';
+
+NOSNOWBALL create function dlexize_CFG_MODNAME(internal,internal,int4)
+NOSNOWBALL        returns internal
+NOSNOWBALL        as 'MODULE_PATHNAME'
+NOSNOWBALL        language 'C'
+NOSNOWBALL        with (isstrict);
+
+insert into pg_ts_dict select
+        'CFG_MODNAME',
+HASINIT        (select oid from pg_proc where proname='dinit_CFG_MODNAME'),
+NOINIT        null,
+        null,
+ISSNOWBALL        (select oid from pg_proc where proname='snb_lexize'),
+NOSNOWBALL        (select oid from pg_proc where proname='dlexize_CFG_MODNAME'),
+        CFG_COMMENT
+;
+
+
+END;


diff --git a/contrib/tsearch2/gistidx.c b/contrib/tsearch2/gistidx.c

new file mode 100644 (file)

index 0000000..5a34f74


--- /dev/null
+++ b/contrib/tsearch2/gistidx.c
@@ -0,0 +1,686 @@
+#include "postgres.h"
+
+#include 
+
+#include "access/gist.h"
+#include "access/itup.h"
+#include "access/rtree.h"
+#include "utils/elog.h"
+#include "utils/palloc.h"
+#include "utils/array.h"
+#include "utils/builtins.h"
+#include "storage/bufpage.h"
+#include "access/tuptoaster.h"
+
+#include "tsvector.h"
+#include "query.h"
+#include "gistidx.h"
+#include "crc32.h"
+
+PG_FUNCTION_INFO_V1(gtsvector_in);
+Datum      gtsvector_in(PG_FUNCTION_ARGS);
+
+PG_FUNCTION_INFO_V1(gtsvector_out);
+Datum      gtsvector_out(PG_FUNCTION_ARGS);
+
+PG_FUNCTION_INFO_V1(gtsvector_compress);
+Datum      gtsvector_compress(PG_FUNCTION_ARGS);
+
+PG_FUNCTION_INFO_V1(gtsvector_decompress);
+Datum      gtsvector_decompress(PG_FUNCTION_ARGS);
+
+PG_FUNCTION_INFO_V1(gtsvector_consistent);
+Datum      gtsvector_consistent(PG_FUNCTION_ARGS);
+
+PG_FUNCTION_INFO_V1(gtsvector_union);
+Datum      gtsvector_union(PG_FUNCTION_ARGS);
+
+PG_FUNCTION_INFO_V1(gtsvector_same);
+Datum      gtsvector_same(PG_FUNCTION_ARGS);
+
+PG_FUNCTION_INFO_V1(gtsvector_penalty);
+Datum      gtsvector_penalty(PG_FUNCTION_ARGS);
+
+PG_FUNCTION_INFO_V1(gtsvector_picksplit);
+Datum      gtsvector_picksplit(PG_FUNCTION_ARGS);
+
+#define GETENTRY(vec,pos) ((GISTTYPE *) DatumGetPointer(((GISTENTRY *) VARDATA(vec))[(pos)].key))
+#define SUMBIT(val) (       \
+   GETBITBYTE(val,0) + \
+   GETBITBYTE(val,1) + \
+   GETBITBYTE(val,2) + \
+   GETBITBYTE(val,3) + \
+   GETBITBYTE(val,4) + \
+   GETBITBYTE(val,5) + \
+   GETBITBYTE(val,6) + \
+   GETBITBYTE(val,7)   \
+)
+
+
+Datum
+gtsvector_in(PG_FUNCTION_ARGS)
+{
+   elog(ERROR, "Not implemented");
+   PG_RETURN_DATUM(0);
+}
+
+Datum
+gtsvector_out(PG_FUNCTION_ARGS)
+{
+   elog(ERROR, "Not implemented");
+   PG_RETURN_DATUM(0);
+}
+
+static int
+compareint(const void *a, const void *b)
+{
+   if (*((int4 *) a) == *((int4 *) b))
+       return 0;
+   return (*((int4 *) a) > *((int4 *) b)) ? 1 : -1;
+}
+
+static int
+uniqueint(int4 *a, int4 l)
+{
+   int4       *ptr,
+              *res;
+
+   if (l == 1)
+       return l;
+
+   ptr = res = a;
+
+   qsort((void *) a, l, sizeof(int4), compareint);
+
+   while (ptr - a < l)
+       if (*ptr != *res)
+           *(++res) = *ptr++;
+       else
+           ptr++;
+   return res + 1 - a;
+}
+
+static void
+makesign(BITVECP sign, GISTTYPE * a)
+{
+   int4        k,
+               len = ARRNELEM(a);
+   int4       *ptr = GETARR(a);
+
+   MemSet((void *) sign, 0, sizeof(BITVEC));
+   for (k = 0; k < len; k++)
+       HASH(sign, ptr[k]);
+}
+
+Datum
+gtsvector_compress(PG_FUNCTION_ARGS)
+{
+   GISTENTRY  *entry = (GISTENTRY *) PG_GETARG_POINTER(0);
+   GISTENTRY  *retval = entry;
+
+   if (entry->leafkey)
+   {                           /* tsvector */
+       GISTTYPE   *res;
+       tsvector       *toastedval = (tsvector *) DatumGetPointer(entry->key);
+       tsvector       *val = (tsvector *) DatumGetPointer(PG_DETOAST_DATUM(entry->key));
+       int4        len;
+       int4       *arr;
+       WordEntry  *ptr = ARRPTR(val);
+       char       *words = STRPTR(val);
+
+       len = CALCGTSIZE(ARRKEY, val->size);
+       res = (GISTTYPE *) palloc(len);
+       res->len = len;
+       res->flag = ARRKEY;
+       arr = GETARR(res);
+       len = val->size;
+       while (len--)
+       {
+           *arr = crc32_sz((uint8 *) &words[ptr->pos], ptr->len);
+           arr++;
+           ptr++;
+       }
+
+       len = uniqueint(GETARR(res), val->size);
+       if (len != val->size)
+       {
+           /*
+            * there is a collision of hash-function; len is always less
+            * than val->size
+            */
+           len = CALCGTSIZE(ARRKEY, len);
+           res = (GISTTYPE *) repalloc((void *) res, len);
+           res->len = len;
+       }
+       if (val != toastedval)
+           pfree(val);
+
+       /* make signature, if array is too long */
+       if (res->len > TOAST_INDEX_TARGET)
+       {
+           GISTTYPE   *ressign;
+
+           len = CALCGTSIZE(SIGNKEY, 0);
+           ressign = (GISTTYPE *) palloc(len);
+           ressign->len = len;
+           ressign->flag = SIGNKEY;
+           makesign(GETSIGN(ressign), res);
+           pfree(res);
+           res = ressign;
+       }
+
+       retval = (GISTENTRY *) palloc(sizeof(GISTENTRY));
+       gistentryinit(*retval, PointerGetDatum(res),
+                     entry->rel, entry->page,
+                     entry->offset, res->len, FALSE);
+   }
+   else if (ISSIGNKEY(DatumGetPointer(entry->key)) &&
+            !ISALLTRUE(DatumGetPointer(entry->key)))
+   {
+       int4        i,
+                   len;
+       GISTTYPE   *res;
+       BITVECP     sign = GETSIGN(DatumGetPointer(entry->key));
+
+       LOOPBYTE(
+                if ((sign[i] & 0xff) != 0xff)
+                PG_RETURN_POINTER(retval);
+       );
+
+       len = CALCGTSIZE(SIGNKEY | ALLISTRUE, 0);
+       res = (GISTTYPE *) palloc(len);
+       res->len = len;
+       res->flag = SIGNKEY | ALLISTRUE;
+
+       retval = (GISTENTRY *) palloc(sizeof(GISTENTRY));
+       gistentryinit(*retval, PointerGetDatum(res),
+                     entry->rel, entry->page,
+                     entry->offset, res->len, FALSE);
+   }
+   PG_RETURN_POINTER(retval);
+}
+
+Datum
+gtsvector_decompress(PG_FUNCTION_ARGS)
+{
+   GISTENTRY  *entry = (GISTENTRY *) PG_GETARG_POINTER(0);
+   GISTTYPE   *key = (GISTTYPE *) DatumGetPointer(PG_DETOAST_DATUM(entry->key));
+
+   if (key != (GISTTYPE *) DatumGetPointer(entry->key))
+   {
+       GISTENTRY  *retval = (GISTENTRY *) palloc(sizeof(GISTENTRY));
+
+       gistentryinit(*retval, PointerGetDatum(key),
+                     entry->rel, entry->page,
+                     entry->offset, key->len, FALSE);
+
+       PG_RETURN_POINTER(retval);
+   }
+
+   PG_RETURN_POINTER(entry);
+}
+
+typedef struct
+{
+   int4       *arrb;
+   int4       *arre;
+}  CHKVAL;
+
+/*
+ * is there value 'val' in array or not ?
+ */
+static bool
+checkcondition_arr(void *checkval, ITEM * val)
+{
+   int4       *StopLow = ((CHKVAL *) checkval)->arrb;
+   int4       *StopHigh = ((CHKVAL *) checkval)->arre;
+   int4       *StopMiddle;
+
+   /* Loop invariant: StopLow <= val < StopHigh */
+
+   while (StopLow < StopHigh)
+   {
+       StopMiddle = StopLow + (StopHigh - StopLow) / 2;
+       if (*StopMiddle == val->val)
+           return (true);
+       else if (*StopMiddle < val->val)
+           StopLow = StopMiddle + 1;
+       else
+           StopHigh = StopMiddle;
+   }
+
+   return (false);
+}
+
+static bool
+checkcondition_bit(void *checkval, ITEM * val)
+{
+   return GETBIT(checkval, HASHVAL(val->val));
+}
+
+Datum
+gtsvector_consistent(PG_FUNCTION_ARGS)
+{
+   QUERYTYPE  *query = (QUERYTYPE *) PG_GETARG_POINTER(1);
+   GISTTYPE   *key = (GISTTYPE *) DatumGetPointer(
+                               ((GISTENTRY *) PG_GETARG_POINTER(0))->key
+   );
+
+   if (!query->size)
+       PG_RETURN_BOOL(false);
+
+   if (ISSIGNKEY(key))
+   {
+       if (ISALLTRUE(key))
+           PG_RETURN_BOOL(true);
+
+       PG_RETURN_BOOL(TS_execute(
+                              GETQUERY(query),
+                              (void *) GETSIGN(key), false,
+                              checkcondition_bit
+                              ));
+   }
+   else
+   {                           /* only leaf pages */
+       CHKVAL      chkval;
+
+       chkval.arrb = GETARR(key);
+       chkval.arre = chkval.arrb + ARRNELEM(key);
+       PG_RETURN_BOOL(TS_execute(
+                              GETQUERY(query),
+                              (void *) &chkval, true,
+                              checkcondition_arr
+                              ));
+   }
+}
+
+static int4
+unionkey(BITVECP sbase, GISTTYPE * add)
+{
+   int4        i;
+
+   if (ISSIGNKEY(add))
+   {
+       BITVECP     sadd = GETSIGN(add);
+
+       if (ISALLTRUE(add))
+           return 1;
+
+       LOOPBYTE(
+                sbase[i] |= sadd[i];
+       );
+   }
+   else
+   {
+       int4       *ptr = GETARR(add);
+
+       for (i = 0; i < ARRNELEM(add); i++)
+           HASH(sbase, ptr[i]);
+   }
+   return 0;
+}
+
+
+Datum
+gtsvector_union(PG_FUNCTION_ARGS)
+{
+   bytea      *entryvec = (bytea *) PG_GETARG_POINTER(0);
+   int        *size = (int *) PG_GETARG_POINTER(1);
+   BITVEC      base;
+   int4        len = (VARSIZE(entryvec) - VARHDRSZ) / sizeof(GISTENTRY);
+   int4        i;
+   int4        flag = 0;
+   GISTTYPE   *result;
+
+   MemSet((void *) base, 0, sizeof(BITVEC));
+   for (i = 0; i < len; i++)
+   {
+       if (unionkey(base, GETENTRY(entryvec, i)))
+       {
+           flag = ALLISTRUE;
+           break;
+       }
+   }
+
+   flag |= SIGNKEY;
+   len = CALCGTSIZE(flag, 0);
+   result = (GISTTYPE *) palloc(len);
+   *size = result->len = len;
+   result->flag = flag;
+   if (!ISALLTRUE(result))
+       memcpy((void *) GETSIGN(result), (void *) base, sizeof(BITVEC));
+
+   PG_RETURN_POINTER(result);
+}
+
+Datum
+gtsvector_same(PG_FUNCTION_ARGS)
+{
+   GISTTYPE   *a = (GISTTYPE *) PG_GETARG_POINTER(0);
+   GISTTYPE   *b = (GISTTYPE *) PG_GETARG_POINTER(1);
+   bool       *result = (bool *) PG_GETARG_POINTER(2);
+
+   if (ISSIGNKEY(a))
+   {                           /* then b also ISSIGNKEY */
+       if (ISALLTRUE(a) && ISALLTRUE(b))
+           *result = true;
+       else if (ISALLTRUE(a))
+           *result = false;
+       else if (ISALLTRUE(b))
+           *result = false;
+       else
+       {
+           int4        i;
+           BITVECP     sa = GETSIGN(a),
+                       sb = GETSIGN(b);
+
+           *result = true;
+           LOOPBYTE(
+                    if (sa[i] != sb[i])
+                    {
+               *result = false;
+               break;
+           }
+           );
+       }
+   }
+   else
+   {                           /* a and b ISARRKEY */
+       int4        lena = ARRNELEM(a),
+                   lenb = ARRNELEM(b);
+
+       if (lena != lenb)
+           *result = false;
+       else
+       {
+           int4       *ptra = GETARR(a),
+                      *ptrb = GETARR(b);
+           int4        i;
+
+           *result = true;
+           for (i = 0; i < lena; i++)
+               if (ptra[i] != ptrb[i])
+               {
+                   *result = false;
+                   break;
+               }
+       }
+   }
+
+   PG_RETURN_POINTER(result);
+}
+
+static int4
+sizebitvec(BITVECP sign)
+{
+   int4        size = 0,
+               i;
+
+   LOOPBYTE(
+       size += SUMBIT(*(char *) sign);
+       sign = (BITVECP) (((char *) sign) + 1);
+   );
+   return size;
+}
+
+static int
+hemdistsign(BITVECP  a, BITVECP b) {
+   int i,dist=0;
+
+   LOOPBIT(
+       if ( GETBIT(a,i) != GETBIT(b,i) )
+           dist++;
+   );
+   return dist;
+}
+
+static int
+hemdist(GISTTYPE   *a, GISTTYPE   *b) {
+   if ( ISALLTRUE(a) ) {
+       if (ISALLTRUE(b))
+           return 0;
+       else
+           return SIGLENBIT-sizebitvec(GETSIGN(b));
+   } else if (ISALLTRUE(b))
+       return SIGLENBIT-sizebitvec(GETSIGN(a));
+
+   return hemdistsign( GETSIGN(a), GETSIGN(b) );
+}
+
+Datum
+gtsvector_penalty(PG_FUNCTION_ARGS)
+{
+   GISTENTRY  *origentry = (GISTENTRY *) PG_GETARG_POINTER(0); /* always ISSIGNKEY */
+   GISTENTRY  *newentry = (GISTENTRY *) PG_GETARG_POINTER(1);
+   float      *penalty = (float *) PG_GETARG_POINTER(2);
+   GISTTYPE   *origval = (GISTTYPE *) DatumGetPointer(origentry->key);
+   GISTTYPE   *newval = (GISTTYPE *) DatumGetPointer(newentry->key);
+   BITVECP     orig = GETSIGN(origval);
+
+   *penalty = 0.0;
+
+   if (ISARRKEY(newval)) {
+       BITVEC sign;
+       makesign(sign, newval);
+
+       if ( ISALLTRUE(origval) ) 
+           *penalty=((float)(SIGLENBIT-sizebitvec(sign)))/(float)(SIGLENBIT+1);
+       else 
+           *penalty=hemdistsign(sign,orig);
+   } else {
+       *penalty=hemdist(origval,newval);
+   }
+   PG_RETURN_POINTER(penalty);
+}
+
+typedef struct
+{
+   bool        allistrue;
+   BITVEC      sign;
+}  CACHESIGN;
+
+static void
+fillcache(CACHESIGN * item, GISTTYPE * key)
+{
+   item->allistrue = false;
+   if (ISARRKEY(key))
+       makesign(item->sign, key);
+   else if (ISALLTRUE(key))
+       item->allistrue = true;
+   else
+       memcpy((void *) item->sign, (void *) GETSIGN(key), sizeof(BITVEC));
+}
+
+#define WISH_F(a,b,c) (double)( -(double)(((a)-(b))*((a)-(b))*((a)-(b)))*(c) )
+typedef struct
+{
+   OffsetNumber pos;
+   int4        cost;
+} SPLITCOST;
+
+static int
+comparecost(const void *a, const void *b)
+{
+   if (((SPLITCOST *) a)->cost == ((SPLITCOST *) b)->cost)
+       return 0;
+   else
+       return (((SPLITCOST *) a)->cost > ((SPLITCOST *) b)->cost) ? 1 : -1;
+}
+
+
+static int
+hemdistcache(CACHESIGN   *a, CACHESIGN   *b) {
+   if ( a->allistrue ) {
+       if (b->allistrue)
+           return 0;
+       else
+           return SIGLENBIT-sizebitvec(b->sign);
+   } else if (b->allistrue)
+       return SIGLENBIT-sizebitvec(a->sign);
+
+   return hemdistsign( a->sign, b->sign );
+}
+
+Datum
+gtsvector_picksplit(PG_FUNCTION_ARGS)
+{
+   bytea      *entryvec = (bytea *) PG_GETARG_POINTER(0);
+   GIST_SPLITVEC *v = (GIST_SPLITVEC *) PG_GETARG_POINTER(1);
+   OffsetNumber k,
+               j;
+   GISTTYPE   *datum_l,
+              *datum_r;
+   BITVECP     union_l,
+               union_r;
+   int4        size_alpha,
+               size_beta;
+   int4        size_waste,
+               waste = -1;
+   int4        nbytes;
+   OffsetNumber seed_1 = 0,
+               seed_2 = 0;
+   OffsetNumber *left,
+              *right;
+   OffsetNumber maxoff;
+   BITVECP     ptr;
+   int         i;
+   CACHESIGN  *cache;
+   SPLITCOST  *costvector;
+
+   maxoff = ((VARSIZE(entryvec) - VARHDRSZ) / sizeof(GISTENTRY)) - 2;
+   nbytes = (maxoff + 2) * sizeof(OffsetNumber);
+   v->spl_left = (OffsetNumber *) palloc(nbytes);
+   v->spl_right = (OffsetNumber *) palloc(nbytes);
+
+   cache = (CACHESIGN *) palloc(sizeof(CACHESIGN) * (maxoff + 2));
+   fillcache(&cache[FirstOffsetNumber], GETENTRY(entryvec, FirstOffsetNumber));
+
+   for (k = FirstOffsetNumber; k < maxoff; k = OffsetNumberNext(k)) {
+       for (j = OffsetNumberNext(k); j <= maxoff; j = OffsetNumberNext(j)) {
+           if (k == FirstOffsetNumber)
+               fillcache(&cache[j], GETENTRY(entryvec, j));
+
+           size_waste=hemdistcache(&(cache[j]),&(cache[k]));
+           if (size_waste > waste) {
+               waste = size_waste;
+               seed_1 = k;
+               seed_2 = j;
+           }
+       }
+   }
+
+   left = v->spl_left;
+   v->spl_nleft = 0;
+   right = v->spl_right;
+   v->spl_nright = 0;
+
+   if (seed_1 == 0 || seed_2 == 0) {
+       seed_1 = 1;
+       seed_2 = 2;
+   }
+
+   /* form initial .. */
+   if (cache[seed_1].allistrue) {
+       datum_l = (GISTTYPE *) palloc(CALCGTSIZE(SIGNKEY | ALLISTRUE, 0));
+       datum_l->len = CALCGTSIZE(SIGNKEY | ALLISTRUE, 0);
+       datum_l->flag = SIGNKEY | ALLISTRUE;
+   } else {
+       datum_l = (GISTTYPE *) palloc(CALCGTSIZE(SIGNKEY, 0));
+       datum_l->len = CALCGTSIZE(SIGNKEY, 0);
+       datum_l->flag = SIGNKEY;
+       memcpy((void *) GETSIGN(datum_l), (void *) cache[seed_1].sign, sizeof(BITVEC));
+   }
+   if (cache[seed_2].allistrue) {
+       datum_r = (GISTTYPE *) palloc(CALCGTSIZE(SIGNKEY | ALLISTRUE, 0));
+       datum_r->len = CALCGTSIZE(SIGNKEY | ALLISTRUE, 0);
+       datum_r->flag = SIGNKEY | ALLISTRUE;
+   } else {
+       datum_r = (GISTTYPE *) palloc(CALCGTSIZE(SIGNKEY, 0));
+       datum_r->len = CALCGTSIZE(SIGNKEY, 0);
+       datum_r->flag = SIGNKEY;
+       memcpy((void *) GETSIGN(datum_r), (void *) cache[seed_2].sign, sizeof(BITVEC));
+   }
+
+   union_l=GETSIGN(datum_l);
+   union_r=GETSIGN(datum_r);
+   maxoff = OffsetNumberNext(maxoff);
+   fillcache(&cache[maxoff], GETENTRY(entryvec, maxoff));
+   /* sort before ... */
+   costvector = (SPLITCOST *) palloc(sizeof(SPLITCOST) * maxoff);
+   for (j = FirstOffsetNumber; j <= maxoff; j = OffsetNumberNext(j)) {
+       costvector[j - 1].pos = j;
+       size_alpha = hemdistcache(&(cache[seed_1]), &(cache[j]));
+       size_beta  = hemdistcache(&(cache[seed_2]), &(cache[j]));
+       costvector[j - 1].cost = abs(size_alpha - size_beta);
+   }
+   qsort((void *) costvector, maxoff, sizeof(SPLITCOST), comparecost);
+
+   for (k = 0; k < maxoff; k++) {
+       j = costvector[k].pos;
+       if (j == seed_1) {
+           *left++ = j;
+           v->spl_nleft++;
+           continue;
+       } else if (j == seed_2) {
+           *right++ = j;
+           v->spl_nright++;
+           continue;
+       }
+
+       if (ISALLTRUE(datum_l) || cache[j].allistrue) {
+           if ( ISALLTRUE(datum_l) && cache[j].allistrue )
+               size_alpha=0;
+           else
+               size_alpha = SIGLENBIT-sizebitvec(  
+                   ( cache[j].allistrue ) ? GETSIGN(datum_l) : GETSIGN(cache[j].sign)  
+               );
+       } else {
+           size_alpha=hemdistsign(cache[j].sign,GETSIGN(datum_l));
+       }
+
+       if (ISALLTRUE(datum_r) || cache[j].allistrue) {
+           if ( ISALLTRUE(datum_r) && cache[j].allistrue )
+               size_beta=0;
+           else
+               size_beta = SIGLENBIT-sizebitvec(  
+                   ( cache[j].allistrue ) ? GETSIGN(datum_r) : GETSIGN(cache[j].sign)  
+               );
+       } else {
+           size_beta=hemdistsign(cache[j].sign,GETSIGN(datum_r));
+       }
+
+       if (size_alpha  < size_beta + WISH_F(v->spl_nleft, v->spl_nright, 0.1)) {
+           if (ISALLTRUE(datum_l) || cache[j].allistrue) {
+               if (! ISALLTRUE(datum_l) )
+                   MemSet((void *) GETSIGN(datum_l), 0xff, sizeof(BITVEC));
+           } else {
+               ptr=cache[j].sign;
+               LOOPBYTE(
+                   union_l[i] |= ptr[i];
+               );
+           }
+           *left++ = j;
+           v->spl_nleft++;
+       } else {
+           if (ISALLTRUE(datum_r) || cache[j].allistrue) {
+               if (! ISALLTRUE(datum_r) )
+                   MemSet((void *) GETSIGN(datum_r), 0xff, sizeof(BITVEC));
+           } else {
+               ptr=cache[j].sign;
+               LOOPBYTE(
+                   union_r[i] |= ptr[i];
+               );
+           }
+           *right++ = j;
+           v->spl_nright++;
+       }
+   }
+
+   *right = *left = FirstOffsetNumber;
+   pfree(costvector);
+   pfree(cache);
+   v->spl_ldatum = PointerGetDatum(datum_l);
+   v->spl_rdatum = PointerGetDatum(datum_r);
+
+   PG_RETURN_POINTER(v);
+}


diff --git a/contrib/tsearch2/gistidx.h b/contrib/tsearch2/gistidx.h

new file mode 100644 (file)

index 0000000..d081c74


--- /dev/null
+++ b/contrib/tsearch2/gistidx.h
@@ -0,0 +1,67 @@
+#ifndef __GISTIDX_H__
+#define __GISTIDX_H__
+
+/*
+#define GISTIDX_DEBUG
+*/
+
+/*
+ * signature defines
+ */
+
+#define BITBYTE 8
+#define SIGLENINT  63          /* >121 => key will toast, so it will not
+                                * work !!! */
+#define SIGLEN ( sizeof(int4)*SIGLENINT )
+#define SIGLENBIT (SIGLEN*BITBYTE)
+
+typedef char BITVEC[SIGLEN];
+typedef char *BITVECP;
+
+#define LOOPBYTE(a) \
+       for(i=0;i
+               a;\
+       }
+#define LOOPBIT(a) \
+               for(i=0;i
+                               a;\
+               }
+
+#define GETBYTE(x,i) ( *( (BITVECP)(x) + (int)( (i) / BITBYTE ) ) )
+#define GETBITBYTE(x,i) ( ((char)(x)) >> i & 0x01 )
+#define CLRBIT(x,i)   GETBYTE(x,i) &= ~( 0x01 << ( (i) % BITBYTE ) )
+#define SETBIT(x,i)   GETBYTE(x,i) |=  ( 0x01 << ( (i) % BITBYTE ) )
+#define GETBIT(x,i) ( (GETBYTE(x,i) >> ( (i) % BITBYTE )) & 0x01 )
+
+#define abs(a)         ((a) <  (0) ? -(a) : (a))
+#define min(a,b)           ((a) <  (b) ? (a) : (b))
+#define HASHVAL(val) (((unsigned int)(val)) % SIGLENBIT)
+#define HASH(sign, val) SETBIT((sign), HASHVAL(val))
+
+
+/*
+ * type of index key
+ */
+typedef struct
+{
+   int4        len;
+   int4        flag;
+   char        data[1];
+}  GISTTYPE;
+
+#define ARRKEY     0x01
+#define SIGNKEY        0x02
+#define ALLISTRUE  0x04
+
+#define ISARRKEY(x) ( ((GISTTYPE*)x)->flag & ARRKEY )
+#define ISSIGNKEY(x)   ( ((GISTTYPE*)x)->flag & SIGNKEY )
+#define ISALLTRUE(x)   ( ((GISTTYPE*)x)->flag & ALLISTRUE )
+
+#define GTHDRSIZE  ( sizeof(int4)*2  )
+#define CALCGTSIZE(flag, len) ( GTHDRSIZE + ( ( (flag) & ARRKEY ) ? ((len)*sizeof(int4)) : (((flag) & ALLISTRUE) ? 0 : SIGLEN) ) )
+
+#define GETSIGN(x) ( (BITVECP)( (char*)x+GTHDRSIZE ) )
+#define GETARR(x)  ( (int4*)( (char*)x+GTHDRSIZE ) )
+#define ARRNELEM(x) ( ( ((GISTTYPE*)x)->len - GTHDRSIZE )/sizeof(int4) )
+
+#endif


diff --git a/contrib/tsearch2/ispell/spell.c b/contrib/tsearch2/ispell/spell.c

new file mode 100644 (file)

index 0000000..3cf2cc8


--- /dev/null
+++ b/contrib/tsearch2/ispell/spell.c
@@ -0,0 +1,520 @@
+#include 
+#include 
+#include 
+#include 
+
+#include "postgres.h"
+
+#include "spell.h"
+
+#define MAXNORMLEN 56
+
+#define STRNCASECMP(x,y)        (strncasecmp(x,y,strlen(y)))
+
+static int cmpspell(const void *s1,const void *s2){
+   return(strcmp(((const SPELL*)s1)->word,((const SPELL*)s2)->word));
+}
+
+static void 
+strlower( char * str ) {
+   unsigned char *ptr = (unsigned char *)str;
+   while ( *ptr ) {
+       *ptr = tolower( *ptr );
+       ptr++;
+   }
+}
+
+/* backward string compaire for suffix tree operations */
+static int 
+strbcmp(const char *s1, const char *s2) { 
+   int l1 = strlen(s1)-1, l2 = strlen(s2)-1;
+   while (l1 >= 0 && l2 >= 0) {
+       if (s1[l1] < s2[l2]) return -1;
+       if (s1[l1] > s2[l2]) return 1;
+       l1--; l2--;
+   }
+   if (l1 < l2) return -1;
+   if (l1 > l2) return 1;
+
+   return 0;
+}
+static int 
+strbncmp(const char *s1, const char *s2, size_t count) { 
+   int l1 = strlen(s1) - 1, l2 = strlen(s2) - 1, l = count;
+   while (l1 >= 0 && l2 >= 0 && l > 0) {
+       if (s1[l1] < s2[l2]) return -1;
+       if (s1[l1] > s2[l2]) return 1;
+       l1--;
+       l2--;
+       l--;
+   }
+   if (l == 0) return 0;
+   if (l1 < l2) return -1;
+   if (l1 > l2) return 1;
+   return 0;
+}
+
+static int 
+cmpaffix(const void *s1,const void *s2){
+   if (((const AFFIX*)s1)->type < ((const AFFIX*)s2)->type) return -1;
+   if (((const AFFIX*)s1)->type > ((const AFFIX*)s2)->type) return 1;
+   if (((const AFFIX*)s1)->type == 'p')
+       return(strcmp(((const AFFIX*)s1)->repl,((const AFFIX*)s2)->repl));
+   else 
+       return(strbcmp(((const AFFIX*)s1)->repl,((const AFFIX*)s2)->repl));
+}
+
+int 
+AddSpell(IspellDict * Conf,const char * word,const char *flag){
+   if(Conf->nspell>=Conf->mspell){
+       if(Conf->mspell){
+           Conf->mspell+=1024*20;
+           Conf->Spell=(SPELL *)realloc(Conf->Spell,Conf->mspell*sizeof(SPELL));
+       }else{
+           Conf->mspell=1024*20;
+           Conf->Spell=(SPELL *)malloc(Conf->mspell*sizeof(SPELL));
+       }
+       if ( Conf->Spell == NULL )
+           elog(ERROR,"No memory for AddSpell"); 
+   }
+   Conf->Spell[Conf->nspell].word=strdup(word);
+   if ( !Conf->Spell[Conf->nspell].word ) 
+       elog(ERROR,"No memory for AddSpell");
+   strncpy(Conf->Spell[Conf->nspell].flag,flag,10);
+   Conf->nspell++;
+   return(0);
+}
+
+
+int 
+ImportDictionary(IspellDict * Conf,const char *filename){
+   unsigned char str[BUFSIZ];  
+   FILE *dict;
+
+   if(!(dict=fopen(filename,"r")))return(1);
+   while(fgets(str,sizeof(str),dict)){
+       unsigned char *s;
+       const unsigned char *flag;
+
+           flag = NULL;
+       if((s=strchr(str,'/'))){
+           *s=0;
+           s++;flag=s;
+           while(*s){
+               if (((*s>='A')&&(*s<='Z'))||((*s>='a')&&(*s<='z')))
+                   s++;
+               else {
+                   *s=0;
+                   break;
+               }
+           }
+       }else{
+           flag="";
+       }
+       strlower(str);
+       /* Dont load words if first letter is not required */
+       /* It allows to optimize loading at  search time   */
+       s=str;
+       while(*s){
+           if(*s=='\r')*s=0;
+           if(*s=='\n')*s=0;
+           s++;
+       }
+       AddSpell(Conf,str,flag);
+   }
+   fclose(dict);
+   return(0);
+}
+
+
+static SPELL * 
+FindWord(IspellDict * Conf, const char *word, int affixflag) {
+   int l,c,r,resc,resl,resr, i;
+
+   i = (int)(*word) & 255;
+   l = Conf->SpellTree.Left[i];
+   r = Conf->SpellTree.Right[i];
+   if (l == -1) return (NULL);
+   while(l<=r){
+       c = (l + r) >> 1;
+       resc = strcmp(Conf->Spell[c].word, word);
+       if( (resc == 0) && 
+           ((affixflag == 0) || (strchr(Conf->Spell[c].flag, affixflag) != NULL)) ) {
+           return(&Conf->Spell[c]);
+       }
+       resl = strcmp(Conf->Spell[l].word, word);
+       if( (resl == 0) && 
+           ((affixflag == 0) || (strchr(Conf->Spell[l].flag, affixflag) != NULL)) ) {
+           return(&Conf->Spell[l]);
+       }
+       resr = strcmp(Conf->Spell[r].word, word);
+       if( (resr == 0) && 
+           ((affixflag == 0) || (strchr(Conf->Spell[r].flag, affixflag) != NULL)) ) {
+           return(&Conf->Spell[r]);
+       }
+       if(resc < 0){
+           l = c + 1;
+           r--;
+       } else if(resc > 0){
+           r = c - 1;
+           l++;
+       } else {
+           l++;
+           r--;
+       }
+   }
+   return(NULL);
+}
+
+int 
+AddAffix(IspellDict * Conf,int flag,const char *mask,const char *find,const char *repl,int type) {
+   if(Conf->naffixes>=Conf->maffixes){
+       if(Conf->maffixes){
+           Conf->maffixes+=16;
+           Conf->Affix = (AFFIX*)realloc((void*)Conf->Affix,Conf->maffixes*sizeof(AFFIX));
+       }else{
+           Conf->maffixes=16;
+           Conf->Affix = (AFFIX*)malloc(Conf->maffixes * sizeof(AFFIX));
+       }
+       if ( Conf->Affix == NULL ) 
+           elog(ERROR,"No memory for AddAffix");
+   }
+   if (type=='s') {
+       sprintf(Conf->Affix[Conf->naffixes].mask,"%s$",mask);
+   } else {
+       sprintf(Conf->Affix[Conf->naffixes].mask,"^%s",mask);
+   }
+   Conf->Affix[Conf->naffixes].compile = 1;
+   Conf->Affix[Conf->naffixes].flag=flag;
+   Conf->Affix[Conf->naffixes].type=type;
+   
+   strcpy(Conf->Affix[Conf->naffixes].find,find);
+   strcpy(Conf->Affix[Conf->naffixes].repl,repl);
+   Conf->Affix[Conf->naffixes].replen=strlen(repl);
+   Conf->naffixes++;
+   return(0);
+}
+
+static char * 
+remove_spaces(char *dist,char *src){
+char *d,*s;
+   d=dist;
+   s=src;
+   while(*s){
+       if(*s!=' '&&*s!='-'&&*s!='\t'){
+           *d=*s;
+           d++;
+       }
+       s++;
+   }
+   *d=0;
+   return(dist);
+}
+
+
+int 
+ImportAffixes(IspellDict * Conf,const char *filename){
+   unsigned char str[BUFSIZ];
+   unsigned char flag=0;
+   unsigned char mask[BUFSIZ]="";
+   unsigned char find[BUFSIZ]="";
+   unsigned char repl[BUFSIZ]="";
+   unsigned char *s;
+   int i;
+   int suffixes=0;
+   int prefixes=0;
+   FILE *affix;
+
+   if(!(affix=fopen(filename,"r")))
+       return(1);
+
+   while(fgets(str,sizeof(str),affix)){
+       if(!STRNCASECMP(str,"suffixes")){
+           suffixes=1;
+           prefixes=0;
+           continue;
+       }
+       if(!STRNCASECMP(str,"prefixes")){
+           suffixes=0;
+           prefixes=1;
+           continue;
+       }
+       if(!STRNCASECMP(str,"flag ")){
+           s=str+5;
+           while(strchr("* ",*s))
+               s++;
+           flag=*s;
+           continue;
+       }
+       if((!suffixes)&&(!prefixes))continue;
+       if((s=strchr(str,'#')))*s=0;
+       if(!*str)continue;
+       strlower(str);
+       strcpy(mask,"");
+       strcpy(find,"");
+       strcpy(repl,"");
+       i=sscanf(str,"%[^>\n]>%[^,\n],%[^\n]",mask,find,repl);
+       remove_spaces(str,repl);strcpy(repl,str);
+       remove_spaces(str,find);strcpy(find,str);
+       remove_spaces(str,mask);strcpy(mask,str);
+       switch(i){
+           case 3:
+               break;
+           case 2:
+               if(*find != '\0'){
+                   strcpy(repl,find);
+                   strcpy(find,"");
+               }
+               break;
+           default:
+               continue;
+       }
+       
+       AddAffix(Conf,(int)flag,mask,find,repl,suffixes?'s':'p');
+       
+   }
+   fclose(affix);
+       
+   return(0);
+}
+
+void 
+SortDictionary(IspellDict * Conf){
+  int  CurLet = -1, Let;size_t i;
+
+        qsort((void*)Conf->Spell,Conf->nspell,sizeof(SPELL),cmpspell);
+
+   for(i = 0; i < 256 ; i++ )
+       Conf->SpellTree.Left[i] = -1;
+
+   for(i = 0; i < Conf->nspell; i++) {
+     Let = (int)(*(Conf->Spell[i].word)) & 255;
+     if (CurLet != Let) {
+       Conf->SpellTree.Left[Let] = i;
+       CurLet = Let;
+     }
+     Conf->SpellTree.Right[Let] = i;
+   }
+}
+
+void 
+SortAffixes(IspellDict * Conf) {
+  int   CurLetP = -1, CurLetS = -1, Let;
+  AFFIX *Affix; size_t i;
+  
+  if (Conf->naffixes > 1)
+    qsort((void*)Conf->Affix,Conf->naffixes,sizeof(AFFIX),cmpaffix);
+  for(i = 0; i < 256; i++) {
+      Conf->PrefixTree.Left[i] = Conf->PrefixTree.Right[i] = -1;
+      Conf->SuffixTree.Left[i] = Conf->SuffixTree.Right[i] = -1;
+  }
+
+  for(i = 0; i < Conf->naffixes; i++) {
+    Affix = &(((AFFIX*)Conf->Affix)[i]);
+    if(Affix->type == 'p') {
+      Let = (int)(*(Affix->repl)) & 255;
+      if (CurLetP != Let) {
+   Conf->PrefixTree.Left[Let] = i;
+   CurLetP = Let;
+      }
+      Conf->PrefixTree.Right[Let] = i;
+    } else {
+      Let = (Affix->replen) ? (int)(Affix->repl[Affix->replen-1]) & 255 : 0;
+      if (CurLetS != Let) {
+   Conf->SuffixTree.Left[Let] = i;
+   CurLetS = Let;
+      }
+      Conf->SuffixTree.Right[Let] = i;
+    }
+  }
+}
+
+static char * 
+CheckSuffix(const char *word, size_t len, AFFIX *Affix, int *res, IspellDict *Conf) {
+  regmatch_t subs[2]; /* workaround for apache&linux */
+  char newword[2*MAXNORMLEN] = "";
+  int err;
+  
+  *res = strbncmp(word, Affix->repl, Affix->replen);
+  if (*res < 0) {
+    return NULL;
+  }
+  if (*res > 0) {
+    return NULL;
+  }
+  strcpy(newword, word);
+  strcpy(newword+len-Affix->replen, Affix->find);
+
+  if (Affix->compile) {
+    err = regcomp(&(Affix->reg),Affix->mask,REG_EXTENDED|REG_ICASE|REG_NOSUB);
+    if(err){
+      /*regerror(err, &(Affix->reg), regerrstr, ERRSTRSIZE);*/
+      regfree(&(Affix->reg));
+      return(NULL);
+    }
+    Affix->compile = 0;
+  }
+  if(!(err=regexec(&(Affix->reg),newword,1,subs,0))){
+    if(FindWord(Conf, newword, Affix->flag))
+   return pstrdup(newword);    
+  }
+  return NULL;
+}
+
+#define NS 1
+#define MAX_NORM 512
+static int 
+CheckPrefix(const char *word, size_t len, AFFIX *Affix, IspellDict *Conf, int pi,
+       char **forms, char ***cur ) {
+  regmatch_t subs[NS*2];
+  char newword[2*MAXNORMLEN] = "";
+  int err, ls, res, lres;
+  size_t newlen;
+  AFFIX *CAffix = Conf->Affix;
+  
+  res = strncmp(word, Affix->repl, Affix->replen);
+  if (res != 0) {
+    return res;
+  }
+  strcpy(newword, Affix->find);
+  strcat(newword, word+Affix->replen);
+
+  if (Affix->compile) {
+    err = regcomp(&(Affix->reg),Affix->mask,REG_EXTENDED|REG_ICASE|REG_NOSUB);
+    if(err){
+      /*regerror(err, &(Affix->reg), regerrstr, ERRSTRSIZE);*/
+      regfree(&(Affix->reg));
+      return (0);
+    }
+    Affix->compile = 0;
+  }
+  if(!(err=regexec(&(Affix->reg),newword,1,subs,0))){
+    SPELL * curspell;
+
+    if((curspell=FindWord(Conf, newword, Affix->flag))){
+      if ((*cur - forms) < (MAX_NORM-1)) {
+   **cur =  pstrdup(newword);
+   (*cur)++; **cur = NULL;
+      }
+    } 
+    newlen = strlen(newword);
+    ls = Conf->SuffixTree.Left[pi];
+      if ( ls>=0 && ((*cur - forms) < (MAX_NORM-1)) ) {
+   **cur = CheckSuffix(newword, newlen, &CAffix[ls], &lres, Conf);
+   if (**cur) {
+     (*cur)++; **cur = NULL;
+   }
+      }
+  }
+  return 0;
+}
+
+
+char ** 
+NormalizeWord(IspellDict * Conf,char *word){
+/*regmatch_t subs[NS];*/
+size_t len;
+char ** forms;
+char **cur;
+AFFIX * Affix;
+int ri, pi, ipi, lp, rp, cp, ls, rs;
+int lres, rres, cres = 0;
+  SPELL *spell;
+
+   len=strlen(word);
+   if (len > MAXNORMLEN)
+       return(NULL);
+
+   strlower(word);
+
+   forms=(char **) palloc(MAX_NORM*sizeof(char **));
+   cur=forms;*cur=NULL;
+
+   ri = (int)(*word) & 255;
+   pi = (int)(word[strlen(word)-1]) & 255;
+   Affix=(AFFIX*)Conf->Affix;
+
+   /* Check that the word itself is normal form */
+   if((spell = FindWord(Conf, word, 0))){
+       *cur=pstrdup(word);
+       cur++;*cur=NULL;
+   }
+
+   /* Find all other NORMAL forms of the 'word' */
+
+   for (ipi = 0; ipi <= pi; ipi += pi) {
+
+       /* check prefix */
+       lp = Conf->PrefixTree.Left[ri];
+       rp = Conf->PrefixTree.Right[ri];
+       while (lp >= 0 && lp <= rp) {
+         cp = (lp + rp) >> 1;
+         cres = 0;
+         if ((cur - forms) < (MAX_NORM-1)) {
+       cres = CheckPrefix(word, len, &Affix[cp], Conf, ipi, forms, &cur);
+         }
+         if ((lp < cp) && ((cur - forms) < (MAX_NORM-1)) ) {
+       lres = CheckPrefix(word, len, &Affix[lp], Conf, ipi, forms, &cur);
+         }
+         if ( (rp > cp) && ((cur - forms) < (MAX_NORM-1)) ) {
+       rres = CheckPrefix(word, len, &Affix[rp], Conf, ipi, forms, &cur);
+         }
+         if (cres < 0) {
+       rp = cp - 1;
+       lp++;
+         } else if (cres > 0) {
+       lp = cp + 1;
+       rp--;
+         } else {
+       lp++;
+       rp--;
+         }
+       }
+
+       /* check suffix */
+       ls = Conf->SuffixTree.Left[ipi];
+       rs = Conf->SuffixTree.Right[ipi];
+       while (ls >= 0 && ls <= rs) {
+         if (  ((cur - forms) < (MAX_NORM-1)) ) {
+       *cur = CheckSuffix(word, len, &Affix[ls], &lres, Conf);
+       if (*cur) {
+         cur++; *cur = NULL;
+       }
+         }
+         if ( (rs > ls) && ((cur - forms) < (MAX_NORM-1)) ) {
+       *cur = CheckSuffix(word, len, &Affix[rs], &rres, Conf);
+       if (*cur) {
+         cur++; *cur = NULL;
+       }
+         }
+         ls++;
+         rs--;
+       } /* end while */
+     
+   } /* for ipi */
+
+   if(cur==forms){
+       pfree(forms);
+       return(NULL);
+   }
+   return(forms);
+}
+
+void 
+FreeIspell (IspellDict *Conf) {
+  int i;
+  AFFIX *Affix = (AFFIX *)Conf->Affix;
+
+  for (i = 0; i < Conf->naffixes; i++) {
+    if (Affix[i].compile == 0) {
+      regfree(&(Affix[i].reg));
+    }
+  }
+  for (i = 0; i < Conf->naffixes; i++) {
+   free( Conf->Spell[i].word );
+  }
+  free(Conf->Affix);
+  free(Conf->Spell);
+  memset( (void*)Conf, 0, sizeof(IspellDict) );
+  return;
+}


diff --git a/contrib/tsearch2/ispell/spell.h b/contrib/tsearch2/ispell/spell.h

new file mode 100644 (file)

index 0000000..3034ca6


--- /dev/null
+++ b/contrib/tsearch2/ispell/spell.h
@@ -0,0 +1,51 @@
+#ifndef __SPELL_H__
+#define __SPELL_H__
+
+#include 
+#include 
+
+typedef struct spell_struct {
+        char * word; 
+        char flag[10];
+} SPELL;
+
+typedef struct aff_struct {   
+        char flag;
+        char type;
+        char mask[33];
+        char find[16];
+        char repl[16];
+        regex_t reg;
+        size_t replen;
+        char compile;
+} AFFIX;
+
+typedef struct Tree_struct {
+        int Left[256], Right[256];
+} Tree_struct;
+
+typedef struct {
+   int maffixes;
+   int naffixes;
+   AFFIX * Affix;
+
+   int nspell;
+   int mspell;
+   SPELL   *Spell;
+   Tree_struct SpellTree;
+   Tree_struct PrefixTree;
+   Tree_struct SuffixTree;
+
+} IspellDict;
+
+char ** NormalizeWord(IspellDict * Conf,char *word);
+int ImportAffixes(IspellDict * Conf, const char *filename);
+int ImportDictionary(IspellDict * Conf,const char *filename);
+
+int  AddSpell(IspellDict * Conf,const char * word,const char *flag);
+int  AddAffix(IspellDict * Conf,int flag,const char *mask,const char *find,const char *repl,int type);
+void SortDictionary(IspellDict * Conf);
+void SortAffixes(IspellDict * Conf);
+void FreeIspell (IspellDict *Conf);
+
+#endif


diff --git a/contrib/tsearch2/prs_dcfg.c b/contrib/tsearch2/prs_dcfg.c

new file mode 100644 (file)

index 0000000..e4b0e8b


--- /dev/null
+++ b/contrib/tsearch2/prs_dcfg.c
@@ -0,0 +1,119 @@
+/* 
+ * Simple config parser 
+ * Teodor Sigaev 
+ */
+#include 
+#include 
+#include 
+
+#include "postgres.h"
+
+#include "dict.h"
+#include "common.h"
+
+#define CS_WAITKEY 0
+#define CS_INKEY   1
+#define CS_WAITEQ  2
+#define CS_WAITVALUE   3
+#define CS_INVALUE 4
+#define CS_IN2VALUE    5
+#define CS_WAITDELIM   6
+#define CS_INESC   7
+#define CS_IN2ESC  8
+
+static char *
+nstrdup(char *ptr, int len) {
+   char *res=palloc(len+1), *cptr;
+   memcpy(res,ptr,len);
+   res[len]='\0';
+   cptr = ptr = res;
+   while(*ptr) {
+       if ( *ptr == '\\' ) 
+           ptr++;
+       *cptr=*ptr; ptr++; cptr++;
+   }
+   *cptr='\0';
+
+   return res;
+}
+
+void
+parse_cfgdict(text *in, Map **m) {
+   Map *mptr;
+   char *ptr=VARDATA(in), *begin=NULL;
+   char num=0;
+   int state=CS_WAITKEY;
+
+   while( ptr-VARDATA(in) < VARSIZE(in) - VARHDRSZ ) {
+       if ( *ptr==',' ) num++;
+       ptr++;
+   }
+
+   *m=mptr=(Map*)palloc( sizeof(Map)*(num+2) );
+   memset(mptr, 0, sizeof(Map)*(num+2) );
+   ptr=VARDATA(in);
+   while( ptr-VARDATA(in) < VARSIZE(in) - VARHDRSZ ) {
+       if (state==CS_WAITKEY) {
+           if (isalpha(*ptr)) {
+               begin=ptr;
+               state=CS_INKEY;
+           } else if ( !isspace(*ptr) )
+               elog(ERROR,"Syntax error in position %d near '%c'", ptr-VARDATA(in), *ptr);
+       } else if (state==CS_INKEY) {
+           if ( isspace(*ptr) ) {
+               mptr->key=nstrdup(begin, ptr-begin);
+               state=CS_WAITEQ;
+           } else if ( *ptr=='=' ) {
+               mptr->key=nstrdup(begin, ptr-begin);
+               state=CS_WAITVALUE;
+           } else if ( !isalpha(*ptr) ) 
+               elog(ERROR,"Syntax error in position %d near '%c'", ptr-VARDATA(in), *ptr);
+       } else if ( state==CS_WAITEQ ) {
+           if ( *ptr=='=' )
+               state=CS_WAITVALUE;
+           else if ( !isspace(*ptr) )
+               elog(ERROR,"Syntax error in position %d near '%c'", ptr-VARDATA(in), *ptr);
+       } else if ( state==CS_WAITVALUE ) {
+           if ( *ptr=='"' ) {
+               begin=ptr+1;
+               state=CS_INVALUE;
+           } else if ( !isspace(*ptr) ) {
+               begin=ptr;
+               state=CS_IN2VALUE;
+           }
+       } else if ( state==CS_INVALUE ) {
+           if ( *ptr=='"' ) {
+               mptr->value = nstrdup(begin, ptr-begin);
+               mptr++;
+               state=CS_WAITDELIM;
+           } else if ( *ptr=='\\' )
+               state=CS_INESC;
+       } else if ( state==CS_IN2VALUE ) {
+           if ( isspace(*ptr) || *ptr==',' ) {
+               mptr->value = nstrdup(begin, ptr-begin);
+               mptr++;
+               state=( *ptr==',' ) ? CS_WAITKEY : CS_WAITDELIM;
+           } else if ( *ptr=='\\' )
+               state=CS_INESC;
+       } else if ( state==CS_WAITDELIM ) {
+           if ( *ptr==',' ) 
+               state=CS_WAITKEY; 
+           else if ( !isspace(*ptr) )
+               elog(ERROR,"Syntax error in position %d near '%c'", ptr-VARDATA(in), *ptr);
+       } else if ( state == CS_INESC ) {
+           state=CS_INVALUE;
+       } else if ( state == CS_IN2ESC ) {
+           state=CS_IN2VALUE;
+       } else 
+           elog(ERROR,"Bad parser state: %d at position %d near '%c'", state, ptr-VARDATA(in), *ptr);
+       ptr++;
+   }
+
+   if (state==CS_IN2VALUE) {
+       mptr->value = nstrdup(begin, ptr-begin);
+       mptr++;
+   } else if ( !(state==CS_WAITDELIM || state==CS_WAITKEY) ) 
+       elog(ERROR,"Unexpected end of line");
+}
+
+


diff --git a/contrib/tsearch2/query.c b/contrib/tsearch2/query.c

new file mode 100644 (file)

index 0000000..8e714f2


--- /dev/null
+++ b/contrib/tsearch2/query.c
@@ -0,0 +1,862 @@
+/*
+ * IO definitions for tsquery and mtsquery. This type
+ * are identical, but for parsing mtsquery used parser for text
+ * and also morphology is used.
+ * Internal structure:
+ * query tree, then string with original value.
+ * Query tree with plain view. It's means that in array of nodes
+ * right child is always next and left position = item+item->left
+ * Teodor Sigaev 
+ */
+#include "postgres.h"
+
+#include 
+#include 
+
+#include "access/gist.h"
+#include "access/itup.h"
+#include "access/rtree.h"
+#include "utils/elog.h"
+#include "utils/palloc.h"
+#include "utils/array.h"
+#include "utils/builtins.h"
+#include "storage/bufpage.h"
+
+#include "ts_cfg.h"
+#include "tsvector.h"
+#include "crc32.h"
+#include "query.h"
+#include "rewrite.h"
+#include "common.h"
+
+
+PG_FUNCTION_INFO_V1(tsquery_in);
+Datum      tsquery_in(PG_FUNCTION_ARGS);
+
+PG_FUNCTION_INFO_V1(tsquery_out);
+Datum      tsquery_out(PG_FUNCTION_ARGS);
+
+PG_FUNCTION_INFO_V1(exectsq);
+Datum      exectsq(PG_FUNCTION_ARGS);
+
+PG_FUNCTION_INFO_V1(rexectsq);
+Datum      rexectsq(PG_FUNCTION_ARGS);
+
+PG_FUNCTION_INFO_V1(tsquerytree);
+Datum      tsquerytree(PG_FUNCTION_ARGS);
+
+PG_FUNCTION_INFO_V1(to_tsquery);
+Datum      to_tsquery(PG_FUNCTION_ARGS);
+
+PG_FUNCTION_INFO_V1(to_tsquery_name);
+Datum      to_tsquery_name(PG_FUNCTION_ARGS);
+
+PG_FUNCTION_INFO_V1(to_tsquery_current);
+Datum      to_tsquery_current(PG_FUNCTION_ARGS);
+
+#define END            0
+#define ERR            1
+#define VAL            2
+#define OPR            3
+#define OPEN       4
+#define CLOSE      5
+#define VALTRUE        6           /* for stop words */
+#define VALFALSE   7
+
+/* parser's states */
+#define WAITOPERAND 1
+#define WAITOPERATOR   2
+
+/*
+ * node of query tree, also used
+ * for storing polish notation in parser
+ */
+typedef struct NODE
+{
+   int2        weight;
+   int2        type;
+   int4        val;
+   int2        distance;
+   int2        length;
+   struct NODE *next;
+}  NODE;
+
+typedef struct
+{
+   char       *buf;
+   int4        state;
+   int4        count;
+   /* reverse polish notation in list (for temprorary usage) */
+   NODE       *str;
+   /* number in str */
+   int4        num;
+
+   /* user-friendly operand */
+   int4        lenop;
+   int4        sumlen;
+   char       *op;
+   char       *curop;
+
+   /* state for value's parser */
+   TI_IN_STATE valstate;
+
+   /* tscfg */
+   int cfg_id;
+}  QPRS_STATE;
+
+static char*
+get_weight(char *buf, int2 *weight) {
+   *weight = 0;
+
+   if ( *buf != ':' )
+       return buf;
+
+   buf++;
+   while( *buf ) {
+       switch(tolower(*buf)) {
+           case 'a': *weight |= 1<<3; break; 
+           case 'b': *weight |= 1<<2; break; 
+           case 'c': *weight |= 1<<1; break; 
+           case 'd': *weight |= 1;    break;
+           default: return buf; 
+       }
+       buf++;
+   }
+   
+   return buf;
+}
+
+/*
+ * get token from query string
+ */
+static int4
+gettoken_query(QPRS_STATE * state, int4 *val, int4 *lenval, char **strval, int2 *weight)
+{
+   while (1)
+   {
+       switch (state->state)
+       {
+           case WAITOPERAND:
+               if (*(state->buf) == '!')
+               {
+                   (state->buf)++;
+                   *val = (int4) '!';
+                   return OPR;
+               }
+               else if (*(state->buf) == '(')
+               {
+                   state->count++;
+                   (state->buf)++;
+                   return OPEN;
+               } else if ( *(state->buf) == ':' ) {
+                   elog(ERROR,"Error at start of operand"); 
+               } else if (*(state->buf) != ' ') {
+                   state->valstate.prsbuf = state->buf;
+                   state->state = WAITOPERATOR;
+                   if (gettoken_tsvector(&(state->valstate)))
+                   {
+                       *strval = state->valstate.word;
+                       *lenval = state->valstate.curpos - state->valstate.word;
+                       state->buf = get_weight(state->valstate.prsbuf, weight);
+                       return VAL;
+                   }
+                   else
+                       elog(ERROR, "No operand");
+               }
+               break;
+           case WAITOPERATOR:
+               if (*(state->buf) == '&' || *(state->buf) == '|')
+               {
+                   state->state = WAITOPERAND;
+                   *val = (int4) *(state->buf);
+                   (state->buf)++;
+                   return OPR;
+               }
+               else if (*(state->buf) == ')')
+               {
+                   (state->buf)++;
+                   state->count--;
+                   return (state->count < 0) ? ERR : CLOSE;
+               }
+               else if (*(state->buf) == '\0')
+                   return (state->count) ? ERR : END;
+               else if (*(state->buf) != ' ')
+                   return ERR;
+               break;
+           default:
+               return ERR;
+               break;
+       }
+       (state->buf)++;
+   }
+   return END;
+}
+
+/*
+ * push new one in polish notation reverse view
+ */
+static void
+pushquery(QPRS_STATE * state, int4 type, int4 val, int4 distance, int4 lenval, int2 weight)
+{
+   NODE       *tmp = (NODE *) palloc(sizeof(NODE));
+
+   tmp->weight = weight;
+   tmp->type = type;
+   tmp->val = val;
+   if (distance >= MAXSTRPOS)
+       elog(ERROR, "Value is too big");
+   if (lenval >= MAXSTRLEN)
+       elog(ERROR, "Operand is too long");
+   tmp->distance = distance;
+   tmp->length = lenval;
+   tmp->next = state->str;
+   state->str = tmp;
+   state->num++;
+}
+
+/*
+ * This function is used for tsquery parsing
+ */
+static void
+pushval_asis(QPRS_STATE * state, int type, char *strval, int lenval, int2 weight)
+{
+   if (lenval >= MAXSTRLEN)
+       elog(ERROR, "Word is too long");
+
+   pushquery(state, type, crc32_sz((uint8 *) strval, lenval),
+             state->curop - state->op, lenval, weight);
+
+   while (state->curop - state->op + lenval + 1 >= state->lenop)
+   {
+       int4        tmp = state->curop - state->op;
+
+       state->lenop *= 2;
+       state->op = (char *) repalloc((void *) state->op, state->lenop);
+       state->curop = state->op + tmp;
+   }
+   memcpy((void *) state->curop, (void *) strval, lenval);
+   state->curop += lenval;
+   *(state->curop) = '\0';
+   state->curop++;
+   state->sumlen += lenval + 1;
+   return;
+}
+
+/*
+ * This function is used for morph parsing
+ */
+static void
+pushval_morph(QPRS_STATE * state, int typeval, char *strval, int lenval, int2 weight)
+{
+   int4        count = 0;
+   PRSTEXT         prs;
+
+   prs.lenwords = 32;
+   prs.curwords = 0;
+   prs.pos = 0;
+   prs.words = (WORD *) palloc(sizeof(WORD) * prs.lenwords);
+
+   parsetext_v2(findcfg(state->cfg_id), &prs, strval, lenval);
+
+   for(count=0;count
+       pushval_asis(state, VAL, prs.words[count].word, prs.words[count].len, weight);
+       pfree( prs.words[count].word );
+       if (count)
+           pushquery(state, OPR, (int4) '&', 0, 0, 0 );
+   }   
+   pfree(prs.words);
+
+   /* XXX */
+   if ( prs.curwords==0 ) 
+       pushval_asis(state, VALTRUE, 0, 0, 0);
+}
+
+#define STACKDEPTH 32
+/*
+ * make polish notaion of query
+ */
+static int4
+makepol(QPRS_STATE * state, void (*pushval) (QPRS_STATE *, int, char *, int, int2))
+{
+   int4        val,
+               type;
+   int4        lenval;
+   char       *strval;
+   int4        stack[STACKDEPTH];
+   int4        lenstack = 0;
+   int2        weight;
+
+   while ((type = gettoken_query(state, &val, &lenval, &strval, &weight)) != END)
+   {
+       switch (type)
+       {
+           case VAL:
+               (*pushval) (state, VAL, strval, lenval, weight);
+               while (lenstack && (stack[lenstack - 1] == (int4) '&' ||
+                                   stack[lenstack - 1] == (int4) '!'))
+               {
+                   lenstack--;
+                   pushquery(state, OPR, stack[lenstack], 0, 0, 0);
+               }
+               break;
+           case OPR:
+               if (lenstack && val == (int4) '|')
+                   pushquery(state, OPR, val, 0, 0, 0);
+               else
+               {
+                   if (lenstack == STACKDEPTH)
+                       elog(ERROR, "Stack too short");
+                   stack[lenstack] = val;
+                   lenstack++;
+               }
+               break;
+           case OPEN:
+               if (makepol(state, pushval) == ERR)
+                   return ERR;
+               if (lenstack && (stack[lenstack - 1] == (int4) '&' ||
+                                stack[lenstack - 1] == (int4) '!'))
+               {
+                   lenstack--;
+                   pushquery(state, OPR, stack[lenstack], 0, 0, 0);
+               }
+               break;
+           case CLOSE:
+               while (lenstack)
+               {
+                   lenstack--;
+                   pushquery(state, OPR, stack[lenstack], 0, 0, 0);
+               };
+               return END;
+               break;
+           case ERR:
+           default:
+               elog(ERROR, "Syntax error");
+               return ERR;
+
+       }
+   }
+   while (lenstack)
+   {
+       lenstack--;
+       pushquery(state, OPR, stack[lenstack], 0, 0, 0);
+   };
+   return END;
+}
+
+typedef struct
+{
+   WordEntry  *arrb;
+   WordEntry  *arre;
+   char       *values;
+   char       *operand;
+}  CHKVAL;
+
+/*
+ * compare 2 string values
+ */
+static int4
+ValCompare(CHKVAL * chkval, WordEntry * ptr, ITEM * item)
+{
+   if (ptr->len == item->length)
+       return strncmp(
+                      &(chkval->values[ptr->pos]),
+                      &(chkval->operand[item->distance]),
+                      item->length);
+
+   return (ptr->len > item->length) ? 1 : -1;
+}
+
+/*
+ * check weight info
+ */
+static bool
+checkclass_str(CHKVAL * chkval, WordEntry * val, ITEM * item) {
+   WordEntryPos *ptr = (WordEntryPos*) (chkval->values+val->pos+SHORTALIGN(val->len)+sizeof(uint16));
+   uint16  len = *( (uint16*) (chkval->values+val->pos+SHORTALIGN(val->len)) );
+   while (len--) {
+       if ( item->weight & ( 1<weight ) )
+           return true;
+       ptr++;
+   }
+   return false; 
+}
+
+/*
+ * is there value 'val' in array or not ?
+ */
+static bool
+checkcondition_str(void *checkval, ITEM * val)
+{
+   WordEntry  *StopLow = ((CHKVAL *) checkval)->arrb;
+   WordEntry  *StopHigh = ((CHKVAL *) checkval)->arre;
+   WordEntry  *StopMiddle;
+   int         difference;
+
+   /* Loop invariant: StopLow <= val < StopHigh */
+
+   while (StopLow < StopHigh)
+   {
+       StopMiddle = StopLow + (StopHigh - StopLow) / 2;
+       difference = ValCompare((CHKVAL *) checkval, StopMiddle, val);
+       if (difference == 0)
+           return ( val->weight && StopMiddle->haspos ) ? 
+               checkclass_str((CHKVAL *) checkval,StopMiddle, val) : true;
+       else if (difference < 0)
+           StopLow = StopMiddle + 1;
+       else
+           StopHigh = StopMiddle;
+   }
+
+   return (false);
+}
+
+/*
+ * check for boolean condition
+ */
+bool
+TS_execute(ITEM * curitem, void *checkval, bool calcnot, bool (*chkcond) (void *checkval, ITEM * val))
+{
+   if (curitem->type == VAL)
+       return (*chkcond) (checkval, curitem);
+   else if (curitem->val == (int4) '!')
+   {
+       return (calcnot) ?
+           ((TS_execute(curitem + 1, checkval, calcnot, chkcond)) ? false : true)
+           : true;
+   }
+   else if (curitem->val == (int4) '&')
+   {
+       if (TS_execute(curitem + curitem->left, checkval, calcnot, chkcond))
+           return TS_execute(curitem + 1, checkval, calcnot, chkcond);
+       else
+           return false;
+   }
+   else
+   {                           /* |-operator */
+       if (TS_execute(curitem + curitem->left, checkval, calcnot, chkcond))
+           return true;
+       else
+           return TS_execute(curitem + 1, checkval, calcnot, chkcond);
+   }
+   return false;
+}
+
+/*
+ * boolean operations
+ */
+Datum
+rexectsq(PG_FUNCTION_ARGS)
+{
+   return DirectFunctionCall2(
+                              exectsq,
+                              PG_GETARG_DATUM(1),
+                              PG_GETARG_DATUM(0)
+       );
+}
+
+Datum
+exectsq(PG_FUNCTION_ARGS)
+{
+   tsvector       *val = (tsvector *) DatumGetPointer(PG_DETOAST_DATUM(PG_GETARG_DATUM(0)));
+   QUERYTYPE  *query = (QUERYTYPE *) DatumGetPointer(PG_DETOAST_DATUM(PG_GETARG_DATUM(1)));
+   CHKVAL      chkval;
+   bool        result;
+
+   if (!val->size || !query->size)
+   {
+       PG_FREE_IF_COPY(val, 0);
+       PG_FREE_IF_COPY(query, 1);
+       PG_RETURN_BOOL(false);
+   }
+
+   chkval.arrb = ARRPTR(val);
+   chkval.arre = chkval.arrb + val->size;
+   chkval.values = STRPTR(val);
+   chkval.operand = GETOPERAND(query);
+   result = TS_execute(
+                    GETQUERY(query),
+                    &chkval,
+                    true,
+                    checkcondition_str
+       );
+
+   PG_FREE_IF_COPY(val, 0);
+   PG_FREE_IF_COPY(query, 1);
+   PG_RETURN_BOOL(result);
+}
+
+/*
+ * find left operand in polish notation view
+ */
+static void
+findoprnd(ITEM * ptr, int4 *pos)
+{
+#ifdef BS_DEBUG
+   elog(DEBUG3, (ptr[*pos].type == OPR) ?
+        "%d  %c" : "%d  %d ", *pos, ptr[*pos].val);
+#endif
+   if (ptr[*pos].type == VAL || ptr[*pos].type == VALTRUE)
+   {
+       ptr[*pos].left = 0;
+       (*pos)++;
+   }
+   else if (ptr[*pos].val == (int4) '!')
+   {
+       ptr[*pos].left = 1;
+       (*pos)++;
+       findoprnd(ptr, pos);
+   }
+   else
+   {
+       ITEM       *curitem = &ptr[*pos];
+       int4        tmp = *pos;
+
+       (*pos)++;
+       findoprnd(ptr, pos);
+       curitem->left = *pos - tmp;
+       findoprnd(ptr, pos);
+   }
+}
+
+
+/*
+ * input
+ */
+static QUERYTYPE *
+queryin(char *buf, void (*pushval) (QPRS_STATE *, int, char *, int, int2), int cfg_id)
+{
+   QPRS_STATE  state;
+   int4        i;
+   QUERYTYPE  *query;
+   int4        commonlen;
+   ITEM       *ptr;
+   NODE       *tmp;
+   int4        pos = 0;
+
+#ifdef BS_DEBUG
+   char        pbuf[16384],
+              *cur;
+#endif
+
+   /* init state */
+   state.buf = buf;
+   state.state = WAITOPERAND;
+   state.count = 0;
+   state.num = 0;
+   state.str = NULL;
+   state.cfg_id=cfg_id;
+
+   /* init value parser's state */
+   state.valstate.oprisdelim = true;
+   state.valstate.len = 32;
+   state.valstate.word = (char *) palloc(state.valstate.len);
+
+   /* init list of operand */
+   state.sumlen = 0;
+   state.lenop = 64;
+   state.curop = state.op = (char *) palloc(state.lenop);
+   *(state.curop) = '\0';
+
+   /* parse query & make polish notation (postfix, but in reverse order) */
+   makepol(&state, pushval);
+   pfree(state.valstate.word);
+   if (!state.num)
+       elog(ERROR, "Empty query");
+
+   /* make finish struct */
+   commonlen = COMPUTESIZE(state.num, state.sumlen);
+   query = (QUERYTYPE *) palloc(commonlen);
+   query->len = commonlen;
+   query->size = state.num;
+   ptr = GETQUERY(query);
+
+   /* set item in polish notation */
+   for (i = 0; i < state.num; i++)
+   {
+       ptr[i].weight = state.str->weight;
+       ptr[i].type = state.str->type;
+       ptr[i].val = state.str->val;
+       ptr[i].distance = state.str->distance;
+       ptr[i].length = state.str->length;
+       tmp = state.str->next;
+       pfree(state.str);
+       state.str = tmp;
+   }
+
+   /* set user friendly-operand view */
+   memcpy((void *) GETOPERAND(query), (void *) state.op, state.sumlen);
+   pfree(state.op);
+
+   /* set left operand's position for every operator */
+   pos = 0;
+   findoprnd(ptr, &pos);
+
+#ifdef BS_DEBUG
+   cur = pbuf;
+   *cur = '\0';
+   for (i = 0; i < query->size; i++)
+   {
+       if (ptr[i].type == OPR)
+           sprintf(cur, "%c(%d) ", ptr[i].val, ptr[i].left);
+       else
+           sprintf(cur, "%d(%s) ", ptr[i].val, GETOPERAND(query) + ptr[i].distance);
+       cur = strchr(cur, '\0');
+   }
+   elog(DEBUG3, "POR: %s", pbuf);
+#endif
+
+   return query;
+}
+
+/*
+ * in without morphology
+ */
+Datum
+tsquery_in(PG_FUNCTION_ARGS)
+{
+   PG_RETURN_POINTER(queryin((char *) PG_GETARG_POINTER(0), pushval_asis, 0));
+}
+
+/*
+ * out function
+ */
+typedef struct
+{
+   ITEM       *curpol;
+   char       *buf;
+   char       *cur;
+   char       *op;
+   int4        buflen;
+}  INFIX;
+
+#define RESIZEBUF(inf,addsize) \
+while( ( inf->cur - inf->buf ) + addsize + 1 >= inf->buflen ) \
+{ \
+   int4 len = inf->cur - inf->buf; \
+   inf->buflen *= 2; \
+   inf->buf = (char*) repalloc( (void*)inf->buf, inf->buflen ); \
+   inf->cur = inf->buf + len; \
+}
+
+/*
+ * recursive walk on tree and print it in
+ * infix (human-readable) view
+ */
+static void
+infix(INFIX * in, bool first)
+{
+   if (in->curpol->type == VAL)
+   {
+       char       *op = in->op + in->curpol->distance;
+
+       RESIZEBUF(in, in->curpol->length * 2 + 2 + 5);
+       *(in->cur) = '\'';
+       in->cur++;
+       while (*op)
+       {
+           if (*op == '\'')
+           {
+               *(in->cur) = '\\';
+               in->cur++;
+           }
+           *(in->cur) = *op;
+           op++;
+           in->cur++;
+       }
+       *(in->cur) = '\'';
+       in->cur++;
+       if ( in->curpol->weight ) {
+           *(in->cur) = ':'; in->cur++;
+           if ( in->curpol->weight & (1<<3) ) { *(in->cur) = 'A'; in->cur++; }
+           if ( in->curpol->weight & (1<<2) ) { *(in->cur) = 'B'; in->cur++; }
+           if ( in->curpol->weight & (1<<1) ) { *(in->cur) = 'C'; in->cur++; }
+           if ( in->curpol->weight & 1 )      { *(in->cur) = 'D'; in->cur++; }
+       }
+       *(in->cur) = '\0';
+       in->curpol++;
+   }
+   else if (in->curpol->val == (int4) '!')
+   {
+       bool        isopr = false;
+
+       RESIZEBUF(in, 1);
+       *(in->cur) = '!';
+       in->cur++;
+       *(in->cur) = '\0';
+       in->curpol++;
+       if (in->curpol->type == OPR)
+       {
+           isopr = true;
+           RESIZEBUF(in, 2);
+           sprintf(in->cur, "( ");
+           in->cur = strchr(in->cur, '\0');
+       }
+       infix(in, isopr);
+       if (isopr)
+       {
+           RESIZEBUF(in, 2);
+           sprintf(in->cur, " )");
+           in->cur = strchr(in->cur, '\0');
+       }
+   }
+   else
+   {
+       int4        op = in->curpol->val;
+       INFIX       nrm;
+
+       in->curpol++;
+       if (op == (int4) '|' && !first)
+       {
+           RESIZEBUF(in, 2);
+           sprintf(in->cur, "( ");
+           in->cur = strchr(in->cur, '\0');
+       }
+
+       nrm.curpol = in->curpol;
+       nrm.op = in->op;
+       nrm.buflen = 16;
+       nrm.cur = nrm.buf = (char *) palloc(sizeof(char) * nrm.buflen);
+
+       /* get right operand */
+       infix(&nrm, false);
+
+       /* get & print left operand */
+       in->curpol = nrm.curpol;
+       infix(in, false);
+
+       /* print operator & right operand */
+       RESIZEBUF(in, 3 + (nrm.cur - nrm.buf));
+       sprintf(in->cur, " %c %s", op, nrm.buf);
+       in->cur = strchr(in->cur, '\0');
+       pfree(nrm.buf);
+
+       if (op == (int4) '|' && !first)
+       {
+           RESIZEBUF(in, 2);
+           sprintf(in->cur, " )");
+           in->cur = strchr(in->cur, '\0');
+       }
+   }
+}
+
+
+Datum
+tsquery_out(PG_FUNCTION_ARGS)
+{
+   QUERYTYPE  *query = (QUERYTYPE *) DatumGetPointer(PG_DETOAST_DATUM(PG_GETARG_DATUM(0)));
+   INFIX       nrm;
+
+   if (query->size == 0)
+   {
+       char       *b = palloc(1);
+
+       *b = '\0';
+       PG_RETURN_POINTER(b);
+   }
+   nrm.curpol = GETQUERY(query);
+   nrm.buflen = 32;
+   nrm.cur = nrm.buf = (char *) palloc(sizeof(char) * nrm.buflen);
+   *(nrm.cur) = '\0';
+   nrm.op = GETOPERAND(query);
+   infix(&nrm, true);
+
+   PG_FREE_IF_COPY(query, 0);
+   PG_RETURN_POINTER(nrm.buf);
+}
+
+/*
+ * debug function, used only for view query
+ * which will be executed in non-leaf pages in index
+ */
+Datum
+tsquerytree(PG_FUNCTION_ARGS)
+{
+   QUERYTYPE  *query = (QUERYTYPE *) DatumGetPointer(PG_DETOAST_DATUM(PG_GETARG_DATUM(0)));
+   INFIX       nrm;
+   text       *res;
+   ITEM       *q;
+   int4        len;
+
+
+   if (query->size == 0)
+   {
+       res = (text *) palloc(VARHDRSZ);
+       VARATT_SIZEP(res) = VARHDRSZ;
+       PG_RETURN_POINTER(res);
+   }
+
+   q = clean_NOT_v2(GETQUERY(query), &len);
+
+   if (!q)
+   {
+       res = (text *) palloc(1 + VARHDRSZ);
+       VARATT_SIZEP(res) = 1 + VARHDRSZ;
+       *((char *) VARDATA(res)) = 'T';
+   }
+   else
+   {
+       nrm.curpol = q;
+       nrm.buflen = 32;
+       nrm.cur = nrm.buf = (char *) palloc(sizeof(char) * nrm.buflen);
+       *(nrm.cur) = '\0';
+       nrm.op = GETOPERAND(query);
+       infix(&nrm, true);
+
+       res = (text *) palloc(nrm.cur - nrm.buf + VARHDRSZ);
+       VARATT_SIZEP(res) = nrm.cur - nrm.buf + VARHDRSZ;
+       strncpy(VARDATA(res), nrm.buf, nrm.cur - nrm.buf);
+       pfree(q);
+   }
+
+   PG_FREE_IF_COPY(query, 0);
+
+   PG_RETURN_POINTER(res);
+}
+
+Datum
+to_tsquery(PG_FUNCTION_ARGS) {
+   text    *in = PG_GETARG_TEXT_P(1);
+   char *str;
+   QUERYTYPE  *query;
+   ITEM       *res;
+   int4        len;
+
+   str=text2char(in);
+   PG_FREE_IF_COPY(in,1);
+
+   query = queryin(str, pushval_morph, PG_GETARG_INT32(0));
+   res = clean_fakeval_v2(GETQUERY(query), &len);
+   if (!res)
+   {
+       query->len = HDRSIZEQT;
+       query->size = 0;
+       PG_RETURN_POINTER(query);
+   }
+   memcpy((void *) GETQUERY(query), (void *) res, len * sizeof(ITEM));
+   pfree(res);
+   PG_RETURN_POINTER(query);
+}
+
+Datum
+to_tsquery_name(PG_FUNCTION_ARGS) {
+   text *name=PG_GETARG_TEXT_P(0);
+   Datum res= DirectFunctionCall2(
+       to_tsquery,
+       Int32GetDatum( name2id_cfg(name) ),
+       PG_GETARG_DATUM(1)
+   );
+   
+   PG_FREE_IF_COPY(name,1);
+   PG_RETURN_DATUM(res);
+}
+
+Datum
+to_tsquery_current(PG_FUNCTION_ARGS) {
+   PG_RETURN_DATUM( DirectFunctionCall2(
+       to_tsquery,
+       Int32GetDatum( get_currcfg() ),
+       PG_GETARG_DATUM(0)
+   ));
+}
+
+


diff --git a/contrib/tsearch2/query.h b/contrib/tsearch2/query.h

new file mode 100644 (file)

index 0000000..c0715a2


--- /dev/null
+++ b/contrib/tsearch2/query.h
@@ -0,0 +1,55 @@
+#ifndef __QUERY_H__
+#define __QUERY_H__
+/*
+#define BS_DEBUG
+*/
+
+
+/*
+ * item in polish notation with back link
+ * to left operand
+ */
+typedef struct ITEM
+{
+   int8        type;
+   int8        weight;
+   int2        left;
+   int4        val;
+   /* user-friendly value, must correlate with WordEntry */
+   uint32  
+       unused:1,
+       length:11,
+       distance:20;
+}  ITEM;
+
+/*
+ *Storage:
+ * (len)(size)(array of ITEM)(array of operand in user-friendly form)
+ */
+typedef struct
+{
+   int4        len;
+   int4        size;
+   char        data[1];
+}  QUERYTYPE;
+
+#define HDRSIZEQT  ( 2*sizeof(int4) )
+#define COMPUTESIZE(size,lenofoperand) ( HDRSIZEQT + size * sizeof(ITEM) + lenofoperand )
+#define GETQUERY(x)  (ITEM*)( (char*)(x)+HDRSIZEQT )
+#define GETOPERAND(x)  ( (char*)GETQUERY(x) + ((QUERYTYPE*)x)->size * sizeof(ITEM) )
+
+#define ISOPERATOR(x) ( (x)=='!' || (x)=='&' || (x)=='|' || (x)=='(' || (x)==')' )
+
+#define END                0
+#define ERR                1
+#define VAL                2
+#define OPR                3
+#define OPEN           4
+#define CLOSE          5
+#define VALTRUE            6       /* for stop words */
+#define VALFALSE       7
+
+bool TS_execute(ITEM * curitem, void *checkval,
+       bool calcnot, bool (*chkcond) (void *checkval, ITEM * val));
+
+#endif


diff --git a/contrib/tsearch2/rank.c b/contrib/tsearch2/rank.c

new file mode 100644 (file)

index 0000000..b73f400


--- /dev/null
+++ b/contrib/tsearch2/rank.c
@@ -0,0 +1,591 @@
+/*
+ * Relevation
+ * Teodor Sigaev 
+ */
+#include "postgres.h"
+#include 
+
+#include "access/gist.h"
+#include "access/itup.h"
+#include "utils/elog.h"
+#include "utils/palloc.h"
+#include "utils/builtins.h"
+#include "fmgr.h"
+#include "funcapi.h"
+#include "storage/bufpage.h"
+#include "executor/spi.h"
+#include "commands/trigger.h"
+#include "nodes/pg_list.h"
+#include "catalog/namespace.h"
+
+#include "utils/array.h"
+
+#include "tsvector.h"
+#include "query.h"
+#include "common.h"
+
+PG_FUNCTION_INFO_V1(rank);
+Datum      rank(PG_FUNCTION_ARGS);
+
+PG_FUNCTION_INFO_V1(rank_def);
+Datum      rank_def(PG_FUNCTION_ARGS);
+
+PG_FUNCTION_INFO_V1(rank_cd);
+Datum      rank_cd(PG_FUNCTION_ARGS);
+
+PG_FUNCTION_INFO_V1(rank_cd_def);
+Datum      rank_cd_def(PG_FUNCTION_ARGS);
+
+PG_FUNCTION_INFO_V1(get_covers);
+Datum      get_covers(PG_FUNCTION_ARGS);
+
+static float weights[]={0.1, 0.2, 0.4, 1.0};
+
+#define wpos(wep)  ( w[ ((WordEntryPos*)(wep))->weight ] )
+
+#define DEF_NORM_METHOD    0
+
+/*
+ * Returns a weight of a word collocation
+ */
+static float4 word_distance ( int4 w ) {
+   if ( w>100 )
+   return 1e-30;
+
+   return 1.0/(1.005+0.05*exp( ((float4)w)/1.5-2) );
+}
+
+static int
+cnt_length( tsvector *t ) {
+   WordEntry   *ptr=ARRPTR(t), *end=(WordEntry*)STRPTR(t);
+   int len = 0, clen;
+
+   while(ptr < end) {
+       if ( (clen=POSDATALEN(t, ptr)) == 0 )
+           len += 1;
+       else
+           len += clen;
+       ptr++;
+   }
+
+   return len;
+}
+
+static int4
+WordECompareITEM(char *eval, char *qval, WordEntry * ptr, ITEM * item) {
+        if (ptr->len == item->length)
+                return strncmp(
+                                           eval + ptr->pos,
+                                           qval + item->distance,
+                                           item->length);
+
+        return (ptr->len > item->length) ? 1 : -1;
+}
+
+static WordEntry*
+find_wordentry(tsvector *t, QUERYTYPE *q, ITEM *item) {
+        WordEntry  *StopLow = ARRPTR(t);
+        WordEntry  *StopHigh = (WordEntry*)STRPTR(t);
+        WordEntry  *StopMiddle;
+        int                     difference;
+
+        /* Loop invariant: StopLow <= item < StopHigh */
+
+        while (StopLow < StopHigh)
+        {
+                StopMiddle = StopLow + (StopHigh - StopLow) / 2;
+                difference = WordECompareITEM(STRPTR(t), GETOPERAND(q), StopMiddle, item);
+                if (difference == 0)
+                        return StopMiddle;
+                else if (difference < 0)
+                        StopLow = StopMiddle + 1;
+                else
+                        StopHigh = StopMiddle;
+        }
+
+        return NULL;
+}
+
+static WordEntryPos    POSNULL[]={
+   {0,0},
+   {0,MAXENTRYPOS-1}
+};
+
+static float
+calc_rank_and(float *w, tsvector *t, QUERYTYPE *q) {
+   uint16 **pos=(uint16**)palloc(sizeof(uint16*) * q->size);
+   int i,k,l,p;
+   WordEntry *entry;
+   WordEntryPos    *post,*ct;
+   int4    dimt,lenct,dist;
+   float res=-1.0;
+   ITEM    *item=GETQUERY(q);
+
+   memset(pos,0,sizeof(uint16**) * q->size);
+   *(uint16*)POSNULL = lengthof(POSNULL)-1;
+
+   for(i=0; isize; i++) {
+       
+       if ( item[i].type != VAL )
+           continue;
+
+       entry=find_wordentry(t,q,&(item[i]));
+       if ( !entry )
+           continue;
+
+       if ( entry->haspos )
+           pos[i] = (uint16*)_POSDATAPTR(t,entry);
+       else
+           pos[i] = (uint16*)POSNULL;
+
+
+       dimt = *(uint16*)(pos[i]);
+       post = (WordEntryPos*)(pos[i]+1);
+       for( k=0; k
+           if ( !pos[k] ) continue;
+           lenct = *(uint16*)(pos[k]);
+           ct = (WordEntryPos*)(pos[k]+1);
+           for(l=0; l
+               for(p=0; p
+                   dist = abs( post[l].pos - ct[p].pos );
+                   if ( dist || (dist==0 && (pos[i]==(uint16*)POSNULL || pos[k]==(uint16*)POSNULL) ) ) {
+                       float curw; 
+                       if ( !dist ) dist=MAXENTRYPOS;  
+                       curw= sqrt( wpos(&(post[l])) * wpos( &(ct[p]) ) * word_distance(dist) );
+                       res = ( res < 0 ) ? curw : 1.0 - ( 1.0 - res ) * ( 1.0 - curw );
+                   }
+               }
+           }
+       }
+   }
+   pfree(pos);
+   return res; 
+}
+
+static float
+calc_rank_or(float *w, tsvector *t, QUERYTYPE *q) {
+   WordEntry *entry;
+   WordEntryPos    *post;
+   int4    dimt,j,i;
+   float res=-1.0;
+   ITEM    *item=GETQUERY(q);
+
+   *(uint16*)POSNULL = lengthof(POSNULL)-1;
+
+   for(i=0; isize; i++) {
+       if ( item[i].type != VAL )
+           continue;
+
+       entry=find_wordentry(t,q,&(item[i]));
+       if ( !entry )
+           continue;
+
+       if ( entry->haspos ) {
+           dimt = POSDATALEN(t,entry);
+           post = POSDATAPTR(t,entry);
+       } else {
+           dimt = *(uint16*)POSNULL;
+           post = POSNULL+1;
+       }
+
+       for(j=0;j
+           if ( res < 0 )
+               res = wpos( &(post[j]) );
+           else
+               res = 1.0 - ( 1.0-res ) * ( 1.0-wpos( &(post[j]) ) );
+       }
+   }
+   return res;
+}
+
+static float
+calc_rank(float *w, tsvector *t, QUERYTYPE *q, int4 method) {
+   ITEM *item = GETQUERY(q);
+   float res=0.0;
+
+   if (!t->size || !q->size)
+       return 0.0;
+
+   res = ( item->type != VAL && item->val == (int4) '&' ) ?
+       calc_rank_and(w,t,q) : calc_rank_or(w,t,q);
+
+   if ( res < 0 )
+       res = 1e-20;
+
+        switch(method) {
+       case 0: break;
+       case 1: res /= log((float)cnt_length(t)); break;
+       case 2: res /= (float)cnt_length(t); break;
+       default:
+       elog(ERROR,"Unknown normalization method: %d",method);
+        }
+
+   return res;
+}
+
+Datum
+rank(PG_FUNCTION_ARGS) {
+   ArrayType  *win = (ArrayType *) PG_DETOAST_DATUM(PG_GETARG_DATUM(0));
+   tsvector       *txt = (tsvector *) PG_DETOAST_DATUM(PG_GETARG_DATUM(1));
+   QUERYTYPE  *query = (QUERYTYPE *) PG_DETOAST_DATUM(PG_GETARG_DATUM(2));
+   int method=DEF_NORM_METHOD;
+   float res=0.0;
+   float ws[ lengthof(weights) ];
+   int i;
+
+   if ( ARR_NDIM(win) != 1 ) 
+       elog(ERROR,"Array of weight is not one dimentional");
+   if ( ARRNELEMS(win) < lengthof(weights) )
+        elog(ERROR,"Array of weight is too short");
+
+   for(i=0;i
+       ws[ i ] = ( ((float4*)ARR_DATA_PTR(win))[i] >= 0 ) ? ((float4*)ARR_DATA_PTR(win))[i] : weights[i];
+       if ( ws[ i ] > 1.0 ) 
+           elog(ERROR,"Weight out of range");
+   } 
+
+   if ( PG_NARGS() == 4 )
+       method=PG_GETARG_INT32(3);
+
+   res=calc_rank(ws, txt, query, method); 
+       
+   PG_FREE_IF_COPY(win, 0);
+   PG_FREE_IF_COPY(txt, 1);
+   PG_FREE_IF_COPY(query, 2);
+   PG_RETURN_FLOAT4(res);
+}
+
+Datum
+rank_def(PG_FUNCTION_ARGS) {
+   tsvector       *txt = (tsvector *) PG_DETOAST_DATUM(PG_GETARG_DATUM(0));
+   QUERYTYPE  *query = (QUERYTYPE *) PG_DETOAST_DATUM(PG_GETARG_DATUM(1));
+   float res=0.0;
+   int method=DEF_NORM_METHOD;
+
+   if ( PG_NARGS() == 3 )
+       method=PG_GETARG_INT32(2);
+
+   res=calc_rank(weights, txt, query, method); 
+       
+   PG_FREE_IF_COPY(txt, 0);
+   PG_FREE_IF_COPY(query, 1);
+   PG_RETURN_FLOAT4(res);
+}
+
+
+typedef struct {
+   ITEM    *item;
+   int32   pos;
+} DocRepresentation;
+
+static int
+compareDocR(const void *a, const void *b) {
+   if ( ((DocRepresentation *) a)->pos == ((DocRepresentation *) b)->pos )
+       return 1;
+   return ( ((DocRepresentation *) a)->pos > ((DocRepresentation *) b)->pos ) ? 1 : -1;
+}
+
+
+typedef struct {
+   DocRepresentation *doc;
+   int len;
+}  ChkDocR;
+
+static bool
+checkcondition_DR(void *checkval, ITEM *val) {
+   DocRepresentation *ptr = ((ChkDocR*)checkval)->doc;
+
+   while( ptr - ((ChkDocR*)checkval)->doc < ((ChkDocR*)checkval)->len ) {
+       if ( val == ptr->item )
+           return true;
+       ptr++;
+   }   
+
+   return false;
+}
+
+
+static bool
+Cover(DocRepresentation *doc, int len, QUERYTYPE *query, int *pos, int *p, int *q) {
+   int i;
+   DocRepresentation   *ptr,*f=(DocRepresentation*)0xffffffff;
+   ITEM    *item=GETQUERY(query);
+   int lastpos=*pos;
+   int oldq=*q;
+
+   *p=0x7fffffff;
+   *q=0;
+
+   for(i=0; isize; i++) {
+       if ( item->type != VAL ) {
+           item++;
+           continue;
+       }
+       ptr = doc + *pos;
+
+       while(ptr-doc
+           if ( ptr->item == item ) {
+               if ( ptr->pos > *q ) {
+                   *q = ptr->pos;
+                   lastpos= ptr - doc;
+               } 
+               break;
+           } 
+           ptr++;
+       }
+
+       item++;
+   }
+
+   if (*q==0 )
+       return false;
+
+   if (*q==oldq) { /* already check this pos */
+       (*pos)++;
+       return Cover(doc, len, query, pos,p,q);
+   } 
+
+   item=GETQUERY(query);
+   for(i=0; isize; i++) {
+       if ( item->type != VAL ) {
+           item++;
+           continue;
+       }
+       ptr = doc + lastpos;
+
+       while(ptr>=doc+*pos) {
+           if ( ptr->item == item ) {
+               if ( ptr->pos < *p ) {
+                   *p = ptr->pos;
+                   f=ptr;
+               }
+               break;
+           }
+           ptr--;
+       }
+       item++;
+   }
+ 
+   if ( *p<=*q ) {
+       ChkDocR ch = { f, (doc + lastpos)-f+1 };
+       *pos = f-doc+1;
+       if ( TS_execute(GETQUERY(query), &ch, false, checkcondition_DR) ) { 
+ /*elog(NOTICE,"OP:%d NP:%d P:%d Q:%d", *pos, lastpos, *p, *q);*/ 
+           return true;
+       } else
+           return Cover(doc, len, query, pos,p,q); 
+   }
+ 
+   return false;
+}
+
+static DocRepresentation*
+get_docrep(tsvector     *txt, QUERYTYPE  *query, int *doclen) {
+   ITEM    *item=GETQUERY(query);
+   WordEntry *entry;
+   WordEntryPos    *post;
+   int4    dimt,j,i;
+   int len=query->size*4,cur=0;
+   DocRepresentation *doc;
+
+   *(uint16*)POSNULL = lengthof(POSNULL)-1;
+   doc = (DocRepresentation*)palloc(sizeof(DocRepresentation)*len);
+   for(i=0; isize; i++) {
+       if ( item[i].type != VAL )
+           continue;
+
+       entry=find_wordentry(txt,query,&(item[i]));
+       if ( !entry )
+           continue;
+
+       if ( entry->haspos ) {
+           dimt = POSDATALEN(txt,entry);
+           post = POSDATAPTR(txt,entry);
+       } else {
+           dimt = *(uint16*)POSNULL;
+           post = POSNULL+1;
+       }
+
+       while( cur+dimt >= len ) {
+           len*=2;
+           doc = (DocRepresentation*)repalloc(doc,sizeof(DocRepresentation)*len);
+       }
+
+       for(j=0;j
+           doc[cur].item=&(item[i]);
+           doc[cur].pos=post[j].pos;
+           cur++;
+       }
+   }
+
+   *doclen=cur;
+   
+   if ( cur>0 ) {
+       if ( cur>1 ) 
+           qsort((void *) doc, cur, sizeof(DocRepresentation), compareDocR);
+       return doc;
+   }
+   
+   pfree(doc);
+   return NULL;
+}
+
+
+Datum
+rank_cd(PG_FUNCTION_ARGS) {
+   int K = PG_GETARG_INT32(0);
+   tsvector       *txt = (tsvector *) PG_DETOAST_DATUM(PG_GETARG_DATUM(1));
+   QUERYTYPE  *query = (QUERYTYPE *) PG_DETOAST_DATUM(PG_GETARG_DATUM(2));
+   int method=DEF_NORM_METHOD;
+   DocRepresentation   *doc;
+   float   res=0.0;
+   int p=0,q=0,len,cur;
+
+   doc = get_docrep(txt, query, &len);
+   if ( !doc ) {
+       PG_FREE_IF_COPY(txt, 1);
+       PG_FREE_IF_COPY(query, 2);
+       PG_RETURN_FLOAT4(0.0);
+   }
+
+   cur=0;
+   if (K<=0)
+       K=4;    
+   while( Cover(doc, len, query, &cur, &p, &q) ) 
+       res += ( q-p+1 > K ) ? ((float)K)/((float)(q-p+1)) : 1.0;
+
+   if ( PG_NARGS() == 4 )
+       method=PG_GETARG_INT32(3);
+
+        switch(method) {
+       case 0: break;
+       case 1: res /= log((float)cnt_length(txt)); break;
+       case 2: res /= (float)cnt_length(txt); break;
+       default:
+       elog(ERROR,"Unknown normalization method: %d",method);
+        }
+
+   pfree(doc);
+   PG_FREE_IF_COPY(txt, 1);
+   PG_FREE_IF_COPY(query, 2);
+
+   PG_RETURN_FLOAT4(res);
+}
+
+
+Datum
+rank_cd_def(PG_FUNCTION_ARGS) {
+   PG_RETURN_DATUM( DirectFunctionCall4(   
+       rank_cd,
+       Int32GetDatum(-1),
+       PG_GETARG_DATUM(0),
+       PG_GETARG_DATUM(1),
+       ( PG_NARGS() == 3 ) ? PG_GETARG_DATUM(2) : Int32GetDatum(DEF_NORM_METHOD)
+   )); 
+}
+
+/**************debug*************/
+
+typedef struct {
+   char    *w;
+   int2    len;
+   int2    pos;
+   int2    start;
+   int2    finish;
+} DocWord;
+
+static int
+compareDocWord(const void *a, const void *b) {
+   if ( ((DocWord *) a)->pos == ((DocWord *) b)->pos )
+       return 1;
+   return ( ((DocWord *) a)->pos > ((DocWord *) b)->pos ) ? 1 : -1;
+}
+
+
+Datum 
+get_covers(PG_FUNCTION_ARGS) {
+   tsvector     *txt = (tsvector *) PG_DETOAST_DATUM(PG_GETARG_DATUM(0));
+   QUERYTYPE  *query = (QUERYTYPE *) PG_DETOAST_DATUM(PG_GETARG_DATUM(1));
+   WordEntry       *pptr=ARRPTR(txt);
+   int i,dlen=0,j,cur=0,len=0,rlen;
+   DocWord *dw,*dwptr;
+   text    *out;
+   char *cptr;
+   DocRepresentation *doc;
+   int pos=0,p,q,olddwpos=0;
+   int ncover=1;
+
+   doc = get_docrep(txt, query, &rlen);
+
+   if ( !doc ) {
+       out=palloc(VARHDRSZ);
+       VARATT_SIZEP(out) = VARHDRSZ;
+       PG_FREE_IF_COPY(txt,0);
+       PG_FREE_IF_COPY(query,1);
+       PG_RETURN_POINTER(out);
+   }
+
+   for(i=0;isize;i++) {
+       if (!pptr[i].haspos)
+           elog(ERROR,"No pos info");
+        dlen += POSDATALEN(txt,&(pptr[i]));
+   }
+
+   dwptr=dw=palloc(sizeof(DocWord)*dlen);
+   memset(dw,0,sizeof(DocWord)*dlen);
+
+   for(i=0;isize;i++) {
+       WordEntryPos    *posdata = POSDATAPTR(txt,&(pptr[i]));
+       for(j=0;j
+           dw[cur].w=STRPTR(txt)+pptr[i].pos;  
+           dw[cur].len=pptr[i].len;    
+           dw[cur].pos=posdata[j].pos;
+           cur++;
+       }
+       len+=(pptr[i].len + 1) * (int)POSDATALEN(txt,&(pptr[i]));
+   }
+   qsort((void *) dw, dlen, sizeof(DocWord), compareDocWord);
+
+   while( Cover(doc, rlen, query, &pos, &p, &q) ) {
+       dwptr=dw+olddwpos;
+       while(dwptr->pos < p && dwptr-dw
+           dwptr++;
+       olddwpos=dwptr-dw;
+       dwptr->start=ncover;
+       while(dwptr->pos < q+1 && dwptr-dw
+           dwptr++;
+       (dwptr-1)->finish=ncover;
+       len+= 4 /* {}+two spaces */ + 2*16 /*numbers*/;
+       ncover++; 
+   } 
+   
+   out=palloc(VARHDRSZ+len);
+   cptr=((char*)out)+VARHDRSZ;
+   dwptr=dw;
+
+   while( dwptr-dw < dlen) {
+       if ( dwptr->start ) {
+           sprintf(cptr,"{%d ",dwptr->start);
+           cptr=strchr(cptr,'\0');
+       }
+       memcpy(cptr,dwptr->w,dwptr->len);
+       cptr+=dwptr->len;
+       *cptr=' ';
+       cptr++;
+       if ( dwptr->finish ) { 
+           sprintf(cptr,"}%d ",dwptr->finish);
+           cptr=strchr(cptr,'\0');
+       }
+       dwptr++;
+   }   
+
+   VARATT_SIZEP(out) = cptr - ((char*)out);
+   
+   pfree(dw);
+   pfree(doc);
+
+   PG_FREE_IF_COPY(txt,0);
+   PG_FREE_IF_COPY(query,1);
+   PG_RETURN_POINTER(out);
+}
+


diff --git a/contrib/tsearch2/rewrite.c b/contrib/tsearch2/rewrite.c

new file mode 100644 (file)

index 0000000..d5bc0f6


--- /dev/null
+++ b/contrib/tsearch2/rewrite.c
@@ -0,0 +1,292 @@
+/*
+ * Rewrite routines of query tree
+ * Teodor Sigaev 
+ */
+
+#include "postgres.h"
+
+#include 
+
+#include "access/gist.h"
+#include "access/itup.h"
+#include "access/rtree.h"
+#include "utils/elog.h"
+#include "utils/palloc.h"
+#include "utils/array.h"
+#include "utils/builtins.h"
+#include "storage/bufpage.h"
+
+#include "query.h"
+#include "rewrite.h"
+
+typedef struct NODE
+{
+   struct NODE *left;
+   struct NODE *right;
+   ITEM       *valnode;
+}  NODE;
+
+/*
+ * make query tree from plain view of query
+ */
+static NODE *
+maketree(ITEM * in)
+{
+   NODE       *node = (NODE *) palloc(sizeof(NODE));
+
+   node->valnode = in;
+   node->right = node->left = NULL;
+   if (in->type == OPR)
+   {
+       node->right = maketree(in + 1);
+       if (in->val != (int4) '!')
+           node->left = maketree(in + in->left);
+   }
+   return node;
+}
+
+typedef struct
+{
+   ITEM       *ptr;
+   int4        len;
+   int4        cur;
+}  PLAINTREE;
+
+static void
+plainnode(PLAINTREE * state, NODE * node)
+{
+   if (state->cur == state->len)
+   {
+       state->len *= 2;
+       state->ptr = (ITEM *) repalloc((void *) state->ptr, state->len * sizeof(ITEM));
+   }
+   memcpy((void *) &(state->ptr[state->cur]), (void *) node->valnode, sizeof(ITEM));
+   if (node->valnode->type == VAL)
+       state->cur++;
+   else if (node->valnode->val == (int4) '!')
+   {
+       state->ptr[state->cur].left = 1;
+       state->cur++;
+       plainnode(state, node->right);
+   }
+   else
+   {
+       int4        cur = state->cur;
+
+       state->cur++;
+       plainnode(state, node->right);
+       state->ptr[cur].left = state->cur - cur;
+       plainnode(state, node->left);
+   }
+   pfree(node);
+}
+
+/*
+ * make plain view of tree from 'normal' view of tree
+ */
+static ITEM *
+plaintree(NODE * root, int4 *len)
+{
+   PLAINTREE   pl;
+
+   pl.cur = 0;
+   pl.len = 16;
+   if (root && (root->valnode->type == VAL || root->valnode->type == OPR))
+   {
+       pl.ptr = (ITEM *) palloc(pl.len * sizeof(ITEM));
+       plainnode(&pl, root);
+   }
+   else
+       pl.ptr = NULL;
+   *len = pl.cur;
+   return pl.ptr;
+}
+
+static void
+freetree(NODE * node)
+{
+   if (!node)
+       return;
+   if (node->left)
+       freetree(node->left);
+   if (node->right)
+       freetree(node->right);
+   pfree(node);
+}
+
+/*
+ * clean tree for ! operator.
+ * It's usefull for debug, but in
+ * other case, such view is used with search in index.
+ * Operator ! always return TRUE
+ */
+static NODE *
+clean_NOT_intree(NODE * node)
+{
+   if (node->valnode->type == VAL)
+       return node;
+
+   if (node->valnode->val == (int4) '!')
+   {
+       freetree(node);
+       return NULL;
+   }
+
+   /* operator & or | */
+   if (node->valnode->val == (int4) '|')
+   {
+       if ((node->left = clean_NOT_intree(node->left)) == NULL ||
+           (node->right = clean_NOT_intree(node->right)) == NULL)
+       {
+           freetree(node);
+           return NULL;
+       }
+   }
+   else
+   {
+       NODE       *res = node;
+
+       node->left = clean_NOT_intree(node->left);
+       node->right = clean_NOT_intree(node->right);
+       if (node->left == NULL && node->right == NULL)
+       {
+           pfree(node);
+           res = NULL;
+       }
+       else if (node->left == NULL)
+       {
+           res = node->right;
+           pfree(node);
+       }
+       else if (node->right == NULL)
+       {
+           res = node->left;
+           pfree(node);
+       }
+       return res;
+   }
+   return node;
+}
+
+ITEM *
+clean_NOT_v2(ITEM * ptr, int4 *len)
+{
+   NODE       *root = maketree(ptr);
+
+   return plaintree(clean_NOT_intree(root), len);
+}
+
+#define V_UNKNOWN  0
+#define V_TRUE     1
+#define V_FALSE        2
+
+/*
+ * Clean query tree from values which is always in
+ * text (stopword)
+ */
+static NODE *
+clean_fakeval_intree(NODE * node, char *result)
+{
+   char        lresult = V_UNKNOWN,
+               rresult = V_UNKNOWN;
+
+   if (node->valnode->type == VAL)
+       return node;
+   else if (node->valnode->type == VALTRUE)
+   {
+       pfree(node);
+       *result = V_TRUE;
+       return NULL;
+   }
+
+
+   if (node->valnode->val == (int4) '!')
+   {
+       node->right = clean_fakeval_intree(node->right, &rresult);
+       if (!node->right)
+       {
+           *result = (rresult == V_TRUE) ? V_FALSE : V_TRUE;
+           freetree(node);
+           return NULL;
+       }
+   }
+   else if (node->valnode->val == (int4) '|')
+   {
+       NODE       *res = node;
+
+       node->left = clean_fakeval_intree(node->left, &lresult);
+       node->right = clean_fakeval_intree(node->right, &rresult);
+       if (lresult == V_TRUE || rresult == V_TRUE)
+       {
+           freetree(node);
+           *result = V_TRUE;
+           return NULL;
+       }
+       else if (lresult == V_FALSE && rresult == V_FALSE)
+       {
+           freetree(node);
+           *result = V_FALSE;
+           return NULL;
+       }
+       else if (lresult == V_FALSE)
+       {
+           res = node->right;
+           pfree(node);
+       }
+       else if (rresult == V_FALSE)
+       {
+           res = node->left;
+           pfree(node);
+       }
+       return res;
+   }
+   else
+   {
+       NODE       *res = node;
+
+       node->left = clean_fakeval_intree(node->left, &lresult);
+       node->right = clean_fakeval_intree(node->right, &rresult);
+       if (lresult == V_FALSE || rresult == V_FALSE)
+       {
+           freetree(node);
+           *result = V_FALSE;
+           return NULL;
+       }
+       else if (lresult == V_TRUE && rresult == V_TRUE)
+       {
+           freetree(node);
+           *result = V_TRUE;
+           return NULL;
+       }
+       else if (lresult == V_TRUE)
+       {
+           res = node->right;
+           pfree(node);
+       }
+       else if (rresult == V_TRUE)
+       {
+           res = node->left;
+           pfree(node);
+       }
+       return res;
+   }
+   return node;
+}
+
+ITEM *
+clean_fakeval_v2(ITEM * ptr, int4 *len)
+{
+   NODE       *root = maketree(ptr);
+   char        result = V_UNKNOWN;
+   NODE       *resroot;
+
+   resroot = clean_fakeval_intree(root, &result);
+   if (result != V_UNKNOWN)
+   {
+       elog(NOTICE, "Query contains only stopword(s) or doesn't contain lexem(s), ignored");
+       *len = 0;
+       return NULL;
+   }
+
+   return plaintree(resroot, len);
+}


diff --git a/contrib/tsearch2/rewrite.h b/contrib/tsearch2/rewrite.h

new file mode 100644 (file)

index 0000000..d47788a


--- /dev/null
+++ b/contrib/tsearch2/rewrite.h
@@ -0,0 +1,7 @@
+#ifndef __REWRITE_H__
+#define __REWRITE_H__
+
+ITEM      *clean_NOT_v2(ITEM * ptr, int4 *len);
+ITEM      *clean_fakeval_v2(ITEM * ptr, int4 *len);
+
+#endif


diff --git a/contrib/tsearch2/snmap.c b/contrib/tsearch2/snmap.c

new file mode 100644 (file)

index 0000000..fe138ad


--- /dev/null
+++ b/contrib/tsearch2/snmap.c
@@ -0,0 +1,75 @@
+/* 
+ * simple but fast map from str to Oid
+ * Teodor Sigaev 
+ */
+#include 
+#include 
+#include 
+
+#include "postgres.h"
+#include "snmap.h"
+#include "common.h"
+
+static int
+compareSNMapEntry(const void *a, const void *b) {
+   return strcmp( ((SNMapEntry*)a)->key, ((SNMapEntry*)b)->key );
+}
+
+void 
+addSNMap( SNMap *map, char *key, Oid value ) {
+   if (map->len>=map->reallen) {
+       SNMapEntry *tmp;
+       int len = (map->reallen) ? 2*map->reallen : 16;
+       tmp=(SNMapEntry*)realloc(map->list, sizeof(SNMapEntry) * len);
+       if ( !tmp )
+           elog(ERROR, "No memory");
+       map->reallen=len;
+       map->list=tmp;
+   }
+   map->list[ map->len ].key = strdup(key);
+   if ( ! map->list[ map->len ].key )
+       elog(ERROR, "No memory");
+   map->list[ map->len ].value=value;
+   map->len++;
+   if ( map->len>1 ) qsort(map->list, map->len, sizeof(SNMapEntry), compareSNMapEntry);
+}
+
+void 
+addSNMap_t( SNMap *map, text *key, Oid value ) {
+   char *k=text2char( key );
+   addSNMap(map, k, value);
+   pfree(k);
+}
+
+Oid 
+findSNMap( SNMap *map, char *key ) {
+   SNMapEntry *ptr;
+   SNMapEntry ks = {key, 0};
+   if ( map->len==0 || !map->list )
+       return 0;   
+   ptr = (SNMapEntry*) bsearch(&ks, map->list, map->len, sizeof(SNMapEntry), compareSNMapEntry);
+   return (ptr) ? ptr->value : 0;
+}
+
+Oid  
+findSNMap_t( SNMap *map, text *key ) {
+   char *k=text2char(key);
+   int res;
+   res= findSNMap(map, k);
+   pfree(k);
+   return res;
+}
+
+void freeSNMap( SNMap *map ) {
+   SNMapEntry *entry=map->list;
+   if ( map->list ) {
+       while( map->len ) {
+           if ( entry->key ) free(entry->key);
+           entry++; map->len--;
+       }
+       free( map->list );
+   }
+   memset(map,0,sizeof(SNMap));
+}
+
+


diff --git a/contrib/tsearch2/snmap.h b/contrib/tsearch2/snmap.h

new file mode 100644 (file)

index 0000000..b485601


--- /dev/null
+++ b/contrib/tsearch2/snmap.h
@@ -0,0 +1,23 @@
+#ifndef __SNMAP_H__
+#define __SNMAP_H__
+
+#include "postgres.h"
+
+typedef struct {
+   char    *key;
+   Oid value;
+} SNMapEntry;
+
+typedef struct {
+   int len;
+   int reallen;
+   SNMapEntry  *list;
+} SNMap;
+
+void addSNMap( SNMap *map, char *key, Oid value );
+void addSNMap_t( SNMap *map, text *key, Oid value );
+Oid findSNMap( SNMap *map, char *key );
+Oid findSNMap_t( SNMap *map, text *key );
+void freeSNMap( SNMap *map );
+
+#endif


diff --git a/contrib/tsearch2/snowball/api.c b/contrib/tsearch2/snowball/api.c

new file mode 100644 (file)

index 0000000..c9019ce


--- /dev/null
+++ b/contrib/tsearch2/snowball/api.c
@@ -0,0 +1,48 @@
+
+#include "header.h"
+
+extern struct SN_env * SN_create_env(int S_size, int I_size, int B_size)
+{   struct SN_env * z = (struct SN_env *) calloc(1, sizeof(struct SN_env));
+    z->p = create_s();
+    if (S_size)
+    {   z->S = (symbol * *) calloc(S_size, sizeof(symbol *));
+        {   int i;
+            for (i = 0; i < S_size; i++) z->S[i] = create_s();
+        }
+        z->S_size = S_size;
+    }
+
+    if (I_size)
+    {   z->I = (int *) calloc(I_size, sizeof(int));
+        z->I_size = I_size;
+    }
+
+    if (B_size)
+    {   z->B = (symbol *) calloc(B_size, sizeof(symbol));
+        z->B_size = B_size;
+    }
+
+    return z;
+}
+
+extern void SN_close_env(struct SN_env * z)
+{
+    if (z->S_size)
+    {
+        {   int i;
+            for (i = 0; i < z->S_size; i++) lose_s(z->S[i]);
+        }
+        free(z->S);
+    }
+    if (z->I_size) free(z->I);
+    if (z->B_size) free(z->B);
+    if (z->p) lose_s(z->p);
+    free(z);
+}
+
+extern void SN_set_current(struct SN_env * z, int size, const symbol * s)
+{
+    replace_s(z, 0, z->l, size, s);
+    z->c = 0;
+}
+


diff --git a/contrib/tsearch2/snowball/api.h b/contrib/tsearch2/snowball/api.h

new file mode 100644 (file)

index 0000000..3e8b6e1


--- /dev/null
+++ b/contrib/tsearch2/snowball/api.h
@@ -0,0 +1,27 @@
+
+typedef unsigned char symbol;
+
+/* Or replace 'char' above with 'short' for 16 bit characters.
+
+   More precisely, replace 'char' with whatever type guarantees the
+   character width you need. Note however that sizeof(symbol) should divide
+   HEAD, defined in header.h as 2*sizeof(int), without remainder, otherwise
+   there is an alignment problem. In the unlikely event of a problem here,
+   consult Martin Porter.
+
+*/
+
+struct SN_env {
+    symbol * p;
+    int c; int a; int l; int lb; int bra; int ket;
+    int S_size; int I_size; int B_size;
+    symbol * * S;
+    int * I;
+    symbol * B;
+};
+
+extern struct SN_env * SN_create_env(int S_size, int I_size, int B_size);
+extern void SN_close_env(struct SN_env * z);
+
+extern void SN_set_current(struct SN_env * z, int size, const symbol * s);
+


diff --git a/contrib/tsearch2/snowball/english_stem.c b/contrib/tsearch2/snowball/english_stem.c

new file mode 100644 (file)

index 0000000..6715c7c


--- /dev/null
+++ b/contrib/tsearch2/snowball/english_stem.c
@@ -0,0 +1,894 @@
+
+/* This file was generated automatically by the Snowball to ANSI C compiler */
+
+#include "header.h"
+
+extern int english_stem(struct SN_env * z);
+static int r_exception2(struct SN_env * z);
+static int r_exception1(struct SN_env * z);
+static int r_Step_5(struct SN_env * z);
+static int r_Step_4(struct SN_env * z);
+static int r_Step_3(struct SN_env * z);
+static int r_Step_2(struct SN_env * z);
+static int r_Step_1c(struct SN_env * z);
+static int r_Step_1b(struct SN_env * z);
+static int r_Step_1a(struct SN_env * z);
+static int r_R2(struct SN_env * z);
+static int r_R1(struct SN_env * z);
+static int r_shortv(struct SN_env * z);
+static int r_mark_regions(struct SN_env * z);
+static int r_postlude(struct SN_env * z);
+static int r_prelude(struct SN_env * z);
+
+extern struct SN_env * english_create_env(void);
+extern void english_close_env(struct SN_env * z);
+
+static symbol s_0_0[5] = { 'g', 'e', 'n', 'e', 'r' };
+
+static struct among a_0[1] =
+{
+/*  0 */ { 5, s_0_0, -1, -1, 0}
+};
+
+static symbol s_1_0[3] = { 'i', 'e', 'd' };
+static symbol s_1_1[1] = { 's' };
+static symbol s_1_2[3] = { 'i', 'e', 's' };
+static symbol s_1_3[4] = { 's', 's', 'e', 's' };
+static symbol s_1_4[2] = { 's', 's' };
+static symbol s_1_5[2] = { 'u', 's' };
+
+static struct among a_1[6] =
+{
+/*  0 */ { 3, s_1_0, -1, 2, 0},
+/*  1 */ { 1, s_1_1, -1, 3, 0},
+/*  2 */ { 3, s_1_2, 1, 2, 0},
+/*  3 */ { 4, s_1_3, 1, 1, 0},
+/*  4 */ { 2, s_1_4, 1, -1, 0},
+/*  5 */ { 2, s_1_5, 1, -1, 0}
+};
+
+static symbol s_2_1[2] = { 'b', 'b' };
+static symbol s_2_2[2] = { 'd', 'd' };
+static symbol s_2_3[2] = { 'f', 'f' };
+static symbol s_2_4[2] = { 'g', 'g' };
+static symbol s_2_5[2] = { 'b', 'l' };
+static symbol s_2_6[2] = { 'm', 'm' };
+static symbol s_2_7[2] = { 'n', 'n' };
+static symbol s_2_8[2] = { 'p', 'p' };
+static symbol s_2_9[2] = { 'r', 'r' };
+static symbol s_2_10[2] = { 'a', 't' };
+static symbol s_2_11[2] = { 't', 't' };
+static symbol s_2_12[2] = { 'i', 'z' };
+
+static struct among a_2[13] =
+{
+/*  0 */ { 0, 0, -1, 3, 0},
+/*  1 */ { 2, s_2_1, 0, 2, 0},
+/*  2 */ { 2, s_2_2, 0, 2, 0},
+/*  3 */ { 2, s_2_3, 0, 2, 0},
+/*  4 */ { 2, s_2_4, 0, 2, 0},
+/*  5 */ { 2, s_2_5, 0, 1, 0},
+/*  6 */ { 2, s_2_6, 0, 2, 0},
+/*  7 */ { 2, s_2_7, 0, 2, 0},
+/*  8 */ { 2, s_2_8, 0, 2, 0},
+/*  9 */ { 2, s_2_9, 0, 2, 0},
+/* 10 */ { 2, s_2_10, 0, 1, 0},
+/* 11 */ { 2, s_2_11, 0, 2, 0},
+/* 12 */ { 2, s_2_12, 0, 1, 0}
+};
+
+static symbol s_3_0[2] = { 'e', 'd' };
+static symbol s_3_1[3] = { 'e', 'e', 'd' };
+static symbol s_3_2[3] = { 'i', 'n', 'g' };
+static symbol s_3_3[4] = { 'e', 'd', 'l', 'y' };
+static symbol s_3_4[5] = { 'e', 'e', 'd', 'l', 'y' };
+static symbol s_3_5[5] = { 'i', 'n', 'g', 'l', 'y' };
+
+static struct among a_3[6] =
+{
+/*  0 */ { 2, s_3_0, -1, 2, 0},
+/*  1 */ { 3, s_3_1, 0, 1, 0},
+/*  2 */ { 3, s_3_2, -1, 2, 0},
+/*  3 */ { 4, s_3_3, -1, 2, 0},
+/*  4 */ { 5, s_3_4, 3, 1, 0},
+/*  5 */ { 5, s_3_5, -1, 2, 0}
+};
+
+static symbol s_4_0[4] = { 'a', 'n', 'c', 'i' };
+static symbol s_4_1[4] = { 'e', 'n', 'c', 'i' };
+static symbol s_4_2[3] = { 'o', 'g', 'i' };
+static symbol s_4_3[2] = { 'l', 'i' };
+static symbol s_4_4[3] = { 'b', 'l', 'i' };
+static symbol s_4_5[4] = { 'a', 'b', 'l', 'i' };
+static symbol s_4_6[4] = { 'a', 'l', 'l', 'i' };
+static symbol s_4_7[5] = { 'f', 'u', 'l', 'l', 'i' };
+static symbol s_4_8[6] = { 'l', 'e', 's', 's', 'l', 'i' };
+static symbol s_4_9[5] = { 'o', 'u', 's', 'l', 'i' };
+static symbol s_4_10[5] = { 'e', 'n', 't', 'l', 'i' };
+static symbol s_4_11[5] = { 'a', 'l', 'i', 't', 'i' };
+static symbol s_4_12[6] = { 'b', 'i', 'l', 'i', 't', 'i' };
+static symbol s_4_13[5] = { 'i', 'v', 'i', 't', 'i' };
+static symbol s_4_14[6] = { 't', 'i', 'o', 'n', 'a', 'l' };
+static symbol s_4_15[7] = { 'a', 't', 'i', 'o', 'n', 'a', 'l' };
+static symbol s_4_16[5] = { 'a', 'l', 'i', 's', 'm' };
+static symbol s_4_17[5] = { 'a', 't', 'i', 'o', 'n' };
+static symbol s_4_18[7] = { 'i', 'z', 'a', 't', 'i', 'o', 'n' };
+static symbol s_4_19[4] = { 'i', 'z', 'e', 'r' };
+static symbol s_4_20[4] = { 'a', 't', 'o', 'r' };
+static symbol s_4_21[7] = { 'i', 'v', 'e', 'n', 'e', 's', 's' };
+static symbol s_4_22[7] = { 'f', 'u', 'l', 'n', 'e', 's', 's' };
+static symbol s_4_23[7] = { 'o', 'u', 's', 'n', 'e', 's', 's' };
+
+static struct among a_4[24] =
+{
+/*  0 */ { 4, s_4_0, -1, 3, 0},
+/*  1 */ { 4, s_4_1, -1, 2, 0},
+/*  2 */ { 3, s_4_2, -1, 13, 0},
+/*  3 */ { 2, s_4_3, -1, 16, 0},
+/*  4 */ { 3, s_4_4, 3, 12, 0},
+/*  5 */ { 4, s_4_5, 4, 4, 0},
+/*  6 */ { 4, s_4_6, 3, 8, 0},
+/*  7 */ { 5, s_4_7, 3, 14, 0},
+/*  8 */ { 6, s_4_8, 3, 15, 0},
+/*  9 */ { 5, s_4_9, 3, 10, 0},
+/* 10 */ { 5, s_4_10, 3, 5, 0},
+/* 11 */ { 5, s_4_11, -1, 8, 0},
+/* 12 */ { 6, s_4_12, -1, 12, 0},
+/* 13 */ { 5, s_4_13, -1, 11, 0},
+/* 14 */ { 6, s_4_14, -1, 1, 0},
+/* 15 */ { 7, s_4_15, 14, 7, 0},
+/* 16 */ { 5, s_4_16, -1, 8, 0},
+/* 17 */ { 5, s_4_17, -1, 7, 0},
+/* 18 */ { 7, s_4_18, 17, 6, 0},
+/* 19 */ { 4, s_4_19, -1, 6, 0},
+/* 20 */ { 4, s_4_20, -1, 7, 0},
+/* 21 */ { 7, s_4_21, -1, 11, 0},
+/* 22 */ { 7, s_4_22, -1, 9, 0},
+/* 23 */ { 7, s_4_23, -1, 10, 0}
+};
+
+static symbol s_5_0[5] = { 'i', 'c', 'a', 't', 'e' };
+static symbol s_5_1[5] = { 'a', 't', 'i', 'v', 'e' };
+static symbol s_5_2[5] = { 'a', 'l', 'i', 'z', 'e' };
+static symbol s_5_3[5] = { 'i', 'c', 'i', 't', 'i' };
+static symbol s_5_4[4] = { 'i', 'c', 'a', 'l' };
+static symbol s_5_5[6] = { 't', 'i', 'o', 'n', 'a', 'l' };
+static symbol s_5_6[7] = { 'a', 't', 'i', 'o', 'n', 'a', 'l' };
+static symbol s_5_7[3] = { 'f', 'u', 'l' };
+static symbol s_5_8[4] = { 'n', 'e', 's', 's' };
+
+static struct among a_5[9] =
+{
+/*  0 */ { 5, s_5_0, -1, 4, 0},
+/*  1 */ { 5, s_5_1, -1, 6, 0},
+/*  2 */ { 5, s_5_2, -1, 3, 0},
+/*  3 */ { 5, s_5_3, -1, 4, 0},
+/*  4 */ { 4, s_5_4, -1, 4, 0},
+/*  5 */ { 6, s_5_5, -1, 1, 0},
+/*  6 */ { 7, s_5_6, 5, 2, 0},
+/*  7 */ { 3, s_5_7, -1, 5, 0},
+/*  8 */ { 4, s_5_8, -1, 5, 0}
+};
+
+static symbol s_6_0[2] = { 'i', 'c' };
+static symbol s_6_1[4] = { 'a', 'n', 'c', 'e' };
+static symbol s_6_2[4] = { 'e', 'n', 'c', 'e' };
+static symbol s_6_3[4] = { 'a', 'b', 'l', 'e' };
+static symbol s_6_4[4] = { 'i', 'b', 'l', 'e' };
+static symbol s_6_5[3] = { 'a', 't', 'e' };
+static symbol s_6_6[3] = { 'i', 'v', 'e' };
+static symbol s_6_7[3] = { 'i', 'z', 'e' };
+static symbol s_6_8[3] = { 'i', 't', 'i' };
+static symbol s_6_9[2] = { 'a', 'l' };
+static symbol s_6_10[3] = { 'i', 's', 'm' };
+static symbol s_6_11[3] = { 'i', 'o', 'n' };
+static symbol s_6_12[2] = { 'e', 'r' };
+static symbol s_6_13[3] = { 'o', 'u', 's' };
+static symbol s_6_14[3] = { 'a', 'n', 't' };
+static symbol s_6_15[3] = { 'e', 'n', 't' };
+static symbol s_6_16[4] = { 'm', 'e', 'n', 't' };
+static symbol s_6_17[5] = { 'e', 'm', 'e', 'n', 't' };
+
+static struct among a_6[18] =
+{
+/*  0 */ { 2, s_6_0, -1, 1, 0},
+/*  1 */ { 4, s_6_1, -1, 1, 0},
+/*  2 */ { 4, s_6_2, -1, 1, 0},
+/*  3 */ { 4, s_6_3, -1, 1, 0},
+/*  4 */ { 4, s_6_4, -1, 1, 0},
+/*  5 */ { 3, s_6_5, -1, 1, 0},
+/*  6 */ { 3, s_6_6, -1, 1, 0},
+/*  7 */ { 3, s_6_7, -1, 1, 0},
+/*  8 */ { 3, s_6_8, -1, 1, 0},
+/*  9 */ { 2, s_6_9, -1, 1, 0},
+/* 10 */ { 3, s_6_10, -1, 1, 0},
+/* 11 */ { 3, s_6_11, -1, 2, 0},
+/* 12 */ { 2, s_6_12, -1, 1, 0},
+/* 13 */ { 3, s_6_13, -1, 1, 0},
+/* 14 */ { 3, s_6_14, -1, 1, 0},
+/* 15 */ { 3, s_6_15, -1, 1, 0},
+/* 16 */ { 4, s_6_16, 15, 1, 0},
+/* 17 */ { 5, s_6_17, 16, 1, 0}
+};
+
+static symbol s_7_0[1] = { 'e' };
+static symbol s_7_1[1] = { 'l' };
+
+static struct among a_7[2] =
+{
+/*  0 */ { 1, s_7_0, -1, 1, 0},
+/*  1 */ { 1, s_7_1, -1, 2, 0}
+};
+
+static symbol s_8_0[7] = { 's', 'u', 'c', 'c', 'e', 'e', 'd' };
+static symbol s_8_1[7] = { 'p', 'r', 'o', 'c', 'e', 'e', 'd' };
+static symbol s_8_2[6] = { 'e', 'x', 'c', 'e', 'e', 'd' };
+static symbol s_8_3[7] = { 'c', 'a', 'n', 'n', 'i', 'n', 'g' };
+static symbol s_8_4[6] = { 'i', 'n', 'n', 'i', 'n', 'g' };
+static symbol s_8_5[7] = { 'e', 'a', 'r', 'r', 'i', 'n', 'g' };
+static symbol s_8_6[7] = { 'h', 'e', 'r', 'r', 'i', 'n', 'g' };
+static symbol s_8_7[6] = { 'o', 'u', 't', 'i', 'n', 'g' };
+
+static struct among a_8[8] =
+{
+/*  0 */ { 7, s_8_0, -1, -1, 0},
+/*  1 */ { 7, s_8_1, -1, -1, 0},
+/*  2 */ { 6, s_8_2, -1, -1, 0},
+/*  3 */ { 7, s_8_3, -1, -1, 0},
+/*  4 */ { 6, s_8_4, -1, -1, 0},
+/*  5 */ { 7, s_8_5, -1, -1, 0},
+/*  6 */ { 7, s_8_6, -1, -1, 0},
+/*  7 */ { 6, s_8_7, -1, -1, 0}
+};
+
+static symbol s_9_0[5] = { 'a', 'n', 'd', 'e', 's' };
+static symbol s_9_1[5] = { 'a', 't', 'l', 'a', 's' };
+static symbol s_9_2[4] = { 'b', 'i', 'a', 's' };
+static symbol s_9_3[6] = { 'c', 'o', 's', 'm', 'o', 's' };
+static symbol s_9_4[5] = { 'd', 'y', 'i', 'n', 'g' };
+static symbol s_9_5[5] = { 'e', 'a', 'r', 'l', 'y' };
+static symbol s_9_6[6] = { 'g', 'e', 'n', 't', 'l', 'y' };
+static symbol s_9_7[4] = { 'h', 'o', 'w', 'e' };
+static symbol s_9_8[4] = { 'i', 'd', 'l', 'y' };
+static symbol s_9_9[5] = { 'l', 'y', 'i', 'n', 'g' };
+static symbol s_9_10[4] = { 'n', 'e', 'w', 's' };
+static symbol s_9_11[4] = { 'o', 'n', 'l', 'y' };
+static symbol s_9_12[6] = { 's', 'i', 'n', 'g', 'l', 'y' };
+static symbol s_9_13[5] = { 's', 'k', 'i', 'e', 's' };
+static symbol s_9_14[4] = { 's', 'k', 'i', 's' };
+static symbol s_9_15[3] = { 's', 'k', 'y' };
+static symbol s_9_16[5] = { 't', 'y', 'i', 'n', 'g' };
+static symbol s_9_17[4] = { 'u', 'g', 'l', 'y' };
+
+static struct among a_9[18] =
+{
+/*  0 */ { 5, s_9_0, -1, -1, 0},
+/*  1 */ { 5, s_9_1, -1, -1, 0},
+/*  2 */ { 4, s_9_2, -1, -1, 0},
+/*  3 */ { 6, s_9_3, -1, -1, 0},
+/*  4 */ { 5, s_9_4, -1, 3, 0},
+/*  5 */ { 5, s_9_5, -1, 9, 0},
+/*  6 */ { 6, s_9_6, -1, 7, 0},
+/*  7 */ { 4, s_9_7, -1, -1, 0},
+/*  8 */ { 4, s_9_8, -1, 6, 0},
+/*  9 */ { 5, s_9_9, -1, 4, 0},
+/* 10 */ { 4, s_9_10, -1, -1, 0},
+/* 11 */ { 4, s_9_11, -1, 10, 0},
+/* 12 */ { 6, s_9_12, -1, 11, 0},
+/* 13 */ { 5, s_9_13, -1, 2, 0},
+/* 14 */ { 4, s_9_14, -1, 1, 0},
+/* 15 */ { 3, s_9_15, -1, -1, 0},
+/* 16 */ { 5, s_9_16, -1, 5, 0},
+/* 17 */ { 4, s_9_17, -1, 8, 0}
+};
+
+static unsigned char g_v[] = { 17, 65, 16, 1 };
+
+static unsigned char g_v_WXY[] = { 1, 17, 65, 208, 1 };
+
+static unsigned char g_valid_LI[] = { 55, 141, 2 };
+
+static symbol s_0[] = { 'y' };
+static symbol s_1[] = { 'Y' };
+static symbol s_2[] = { 'y' };
+static symbol s_3[] = { 'Y' };
+static symbol s_4[] = { 's', 's' };
+static symbol s_5[] = { 'i', 'e' };
+static symbol s_6[] = { 'i' };
+static symbol s_7[] = { 'e', 'e' };
+static symbol s_8[] = { 'e' };
+static symbol s_9[] = { 'e' };
+static symbol s_10[] = { 'y' };
+static symbol s_11[] = { 'Y' };
+static symbol s_12[] = { 'i' };
+static symbol s_13[] = { 't', 'i', 'o', 'n' };
+static symbol s_14[] = { 'e', 'n', 'c', 'e' };
+static symbol s_15[] = { 'a', 'n', 'c', 'e' };
+static symbol s_16[] = { 'a', 'b', 'l', 'e' };
+static symbol s_17[] = { 'e', 'n', 't' };
+static symbol s_18[] = { 'i', 'z', 'e' };
+static symbol s_19[] = { 'a', 't', 'e' };
+static symbol s_20[] = { 'a', 'l' };
+static symbol s_21[] = { 'f', 'u', 'l' };
+static symbol s_22[] = { 'o', 'u', 's' };
+static symbol s_23[] = { 'i', 'v', 'e' };
+static symbol s_24[] = { 'b', 'l', 'e' };
+static symbol s_25[] = { 'l' };
+static symbol s_26[] = { 'o', 'g' };
+static symbol s_27[] = { 'f', 'u', 'l' };
+static symbol s_28[] = { 'l', 'e', 's', 's' };
+static symbol s_29[] = { 't', 'i', 'o', 'n' };
+static symbol s_30[] = { 'a', 't', 'e' };
+static symbol s_31[] = { 'a', 'l' };
+static symbol s_32[] = { 'i', 'c' };
+static symbol s_33[] = { 's' };
+static symbol s_34[] = { 't' };
+static symbol s_35[] = { 'l' };
+static symbol s_36[] = { 's', 'k', 'i' };
+static symbol s_37[] = { 's', 'k', 'y' };
+static symbol s_38[] = { 'd', 'i', 'e' };
+static symbol s_39[] = { 'l', 'i', 'e' };
+static symbol s_40[] = { 't', 'i', 'e' };
+static symbol s_41[] = { 'i', 'd', 'l' };
+static symbol s_42[] = { 'g', 'e', 'n', 't', 'l' };
+static symbol s_43[] = { 'u', 'g', 'l', 'i' };
+static symbol s_44[] = { 'e', 'a', 'r', 'l', 'i' };
+static symbol s_45[] = { 'o', 'n', 'l', 'i' };
+static symbol s_46[] = { 's', 'i', 'n', 'g', 'l' };
+static symbol s_47[] = { 'Y' };
+static symbol s_48[] = { 'y' };
+
+static int r_prelude(struct SN_env * z) {
+    z->B[0] = 0; /* unset Y_found, line 24 */
+    {   int c = z->c; /* do, line 25 */
+        z->bra = z->c; /* [, line 25 */
+        if (!(eq_s(z, 1, s_0))) goto lab0;
+        z->ket = z->c; /* ], line 25 */
+        if (!(in_grouping(z, g_v, 97, 121))) goto lab0;
+        slice_from_s(z, 1, s_1); /* <-, line 25 */
+        z->B[0] = 1; /* set Y_found, line 25 */
+    lab0:
+        z->c = c;
+    }
+    {   int c = z->c; /* do, line 26 */
+        while(1) { /* repeat, line 26 */
+            int c = z->c;
+            while(1) { /* goto, line 26 */
+                int c = z->c;
+                if (!(in_grouping(z, g_v, 97, 121))) goto lab3;
+                z->bra = z->c; /* [, line 26 */
+                if (!(eq_s(z, 1, s_2))) goto lab3;
+                z->ket = z->c; /* ], line 26 */
+                z->c = c;
+                break;
+            lab3:
+                z->c = c;
+                if (z->c >= z->l) goto lab2;
+                z->c++;
+            }
+            slice_from_s(z, 1, s_3); /* <-, line 26 */
+            z->B[0] = 1; /* set Y_found, line 26 */
+            continue;
+        lab2:
+            z->c = c;
+            break;
+        }
+    lab1:
+        z->c = c;
+    }
+    return 1;
+}
+
+static int r_mark_regions(struct SN_env * z) {
+    z->I[0] = z->l;
+    z->I[1] = z->l;
+    {   int c = z->c; /* do, line 32 */
+        {   int c = z->c; /* or, line 36 */
+            if (!(find_among(z, a_0, 1))) goto lab2; /* among, line 33 */
+            goto lab1;
+        lab2:
+            z->c = c;
+            while(1) { /* gopast, line 36 */
+                if (!(in_grouping(z, g_v, 97, 121))) goto lab3;
+                break;
+            lab3:
+                if (z->c >= z->l) goto lab0;
+                z->c++;
+            }
+            while(1) { /* gopast, line 36 */
+                if (!(out_grouping(z, g_v, 97, 121))) goto lab4;
+                break;
+            lab4:
+                if (z->c >= z->l) goto lab0;
+                z->c++;
+            }
+        }
+    lab1:
+        z->I[0] = z->c; /* setmark p1, line 37 */
+        while(1) { /* gopast, line 38 */
+            if (!(in_grouping(z, g_v, 97, 121))) goto lab5;
+            break;
+        lab5:
+            if (z->c >= z->l) goto lab0;
+            z->c++;
+        }
+        while(1) { /* gopast, line 38 */
+            if (!(out_grouping(z, g_v, 97, 121))) goto lab6;
+            break;
+        lab6:
+            if (z->c >= z->l) goto lab0;
+            z->c++;
+        }
+        z->I[1] = z->c; /* setmark p2, line 38 */
+    lab0:
+        z->c = c;
+    }
+    return 1;
+}
+
+static int r_shortv(struct SN_env * z) {
+    {   int m = z->l - z->c; /* or, line 46 */
+        if (!(out_grouping_b(z, g_v_WXY, 89, 121))) goto lab1;
+        if (!(in_grouping_b(z, g_v, 97, 121))) goto lab1;
+        if (!(out_grouping_b(z, g_v, 97, 121))) goto lab1;
+        goto lab0;
+    lab1:
+        z->c = z->l - m;
+        if (!(out_grouping_b(z, g_v, 97, 121))) return 0;
+        if (!(in_grouping_b(z, g_v, 97, 121))) return 0;
+        if (z->c > z->lb) return 0; /* atlimit, line 47 */
+    }
+lab0:
+    return 1;
+}
+
+static int r_R1(struct SN_env * z) {
+    if (!(z->I[0] <= z->c)) return 0;
+    return 1;
+}
+
+static int r_R2(struct SN_env * z) {
+    if (!(z->I[1] <= z->c)) return 0;
+    return 1;
+}
+
+static int r_Step_1a(struct SN_env * z) {
+    int among_var;
+    z->ket = z->c; /* [, line 54 */
+    among_var = find_among_b(z, a_1, 6); /* substring, line 54 */
+    if (!(among_var)) return 0;
+    z->bra = z->c; /* ], line 54 */
+    switch(among_var) {
+        case 0: return 0;
+        case 1:
+            slice_from_s(z, 2, s_4); /* <-, line 55 */
+            break;
+        case 2:
+            {   int m = z->l - z->c; /* or, line 57 */
+                if (z->c <= z->lb) goto lab1;
+                z->c--; /* next, line 57 */
+                if (z->c > z->lb) goto lab1; /* atlimit, line 57 */
+                slice_from_s(z, 2, s_5); /* <-, line 57 */
+                goto lab0;
+            lab1:
+                z->c = z->l - m;
+                slice_from_s(z, 1, s_6); /* <-, line 57 */
+            }
+        lab0:
+            break;
+        case 3:
+            if (z->c <= z->lb) return 0;
+            z->c--; /* next, line 58 */
+            while(1) { /* gopast, line 58 */
+                if (!(in_grouping_b(z, g_v, 97, 121))) goto lab2;
+                break;
+            lab2:
+                if (z->c <= z->lb) return 0;
+                z->c--;
+            }
+            slice_del(z); /* delete, line 58 */
+            break;
+    }
+    return 1;
+}
+
+static int r_Step_1b(struct SN_env * z) {
+    int among_var;
+    z->ket = z->c; /* [, line 64 */
+    among_var = find_among_b(z, a_3, 6); /* substring, line 64 */
+    if (!(among_var)) return 0;
+    z->bra = z->c; /* ], line 64 */
+    switch(among_var) {
+        case 0: return 0;
+        case 1:
+            if (!r_R1(z)) return 0; /* call R1, line 66 */
+            slice_from_s(z, 2, s_7); /* <-, line 66 */
+            break;
+        case 2:
+            {   int m_test = z->l - z->c; /* test, line 69 */
+                while(1) { /* gopast, line 69 */
+                    if (!(in_grouping_b(z, g_v, 97, 121))) goto lab0;
+                    break;
+                lab0:
+                    if (z->c <= z->lb) return 0;
+                    z->c--;
+                }
+                z->c = z->l - m_test;
+            }
+            slice_del(z); /* delete, line 69 */
+            {   int m_test = z->l - z->c; /* test, line 70 */
+                among_var = find_among_b(z, a_2, 13); /* substring, line 70 */
+                if (!(among_var)) return 0;
+                z->c = z->l - m_test;
+            }
+            switch(among_var) {
+                case 0: return 0;
+                case 1:
+                    {   int c = z->c;
+                        insert_s(z, z->c, z->c, 1, s_8); /* <+, line 72 */
+                        z->c = c;
+                    }
+                    break;
+                case 2:
+                    z->ket = z->c; /* [, line 75 */
+                    if (z->c <= z->lb) return 0;
+                    z->c--; /* next, line 75 */
+                    z->bra = z->c; /* ], line 75 */
+                    slice_del(z); /* delete, line 75 */
+                    break;
+                case 3:
+                    if (z->c != z->I[0]) return 0; /* atmark, line 76 */
+                    {   int m_test = z->l - z->c; /* test, line 76 */
+                        if (!r_shortv(z)) return 0; /* call shortv, line 76 */
+                        z->c = z->l - m_test;
+                    }
+                    {   int c = z->c;
+                        insert_s(z, z->c, z->c, 1, s_9); /* <+, line 76 */
+                        z->c = c;
+                    }
+                    break;
+            }
+            break;
+    }
+    return 1;
+}
+
+static int r_Step_1c(struct SN_env * z) {
+    z->ket = z->c; /* [, line 83 */
+    {   int m = z->l - z->c; /* or, line 83 */
+        if (!(eq_s_b(z, 1, s_10))) goto lab1;
+        goto lab0;
+    lab1:
+        z->c = z->l - m;
+        if (!(eq_s_b(z, 1, s_11))) return 0;
+    }
+lab0:
+    z->bra = z->c; /* ], line 83 */
+    if (!(out_grouping_b(z, g_v, 97, 121))) return 0;
+    {   int m = z->l - z->c; /* not, line 84 */
+        if (z->c > z->lb) goto lab2; /* atlimit, line 84 */
+        return 0;
+    lab2:
+        z->c = z->l - m;
+    }
+    slice_from_s(z, 1, s_12); /* <-, line 85 */
+    return 1;
+}
+
+static int r_Step_2(struct SN_env * z) {
+    int among_var;
+    z->ket = z->c; /* [, line 89 */
+    among_var = find_among_b(z, a_4, 24); /* substring, line 89 */
+    if (!(among_var)) return 0;
+    z->bra = z->c; /* ], line 89 */
+    if (!r_R1(z)) return 0; /* call R1, line 89 */
+    switch(among_var) {
+        case 0: return 0;
+        case 1:
+            slice_from_s(z, 4, s_13); /* <-, line 90 */
+            break;
+        case 2:
+            slice_from_s(z, 4, s_14); /* <-, line 91 */
+            break;
+        case 3:
+            slice_from_s(z, 4, s_15); /* <-, line 92 */
+            break;
+        case 4:
+            slice_from_s(z, 4, s_16); /* <-, line 93 */
+            break;
+        case 5:
+            slice_from_s(z, 3, s_17); /* <-, line 94 */
+            break;
+        case 6:
+            slice_from_s(z, 3, s_18); /* <-, line 96 */
+            break;
+        case 7:
+            slice_from_s(z, 3, s_19); /* <-, line 98 */
+            break;
+        case 8:
+            slice_from_s(z, 2, s_20); /* <-, line 100 */
+            break;
+        case 9:
+            slice_from_s(z, 3, s_21); /* <-, line 101 */
+            break;
+        case 10:
+            slice_from_s(z, 3, s_22); /* <-, line 103 */
+            break;
+        case 11:
+            slice_from_s(z, 3, s_23); /* <-, line 105 */
+            break;
+        case 12:
+            slice_from_s(z, 3, s_24); /* <-, line 107 */
+            break;
+        case 13:
+            if (!(eq_s_b(z, 1, s_25))) return 0;
+            slice_from_s(z, 2, s_26); /* <-, line 108 */
+            break;
+        case 14:
+            slice_from_s(z, 3, s_27); /* <-, line 109 */
+            break;
+        case 15:
+            slice_from_s(z, 4, s_28); /* <-, line 110 */
+            break;
+        case 16:
+            if (!(in_grouping_b(z, g_valid_LI, 99, 116))) return 0;
+            slice_del(z); /* delete, line 111 */
+            break;
+    }
+    return 1;
+}
+
+static int r_Step_3(struct SN_env * z) {
+    int among_var;
+    z->ket = z->c; /* [, line 116 */
+    among_var = find_among_b(z, a_5, 9); /* substring, line 116 */
+    if (!(among_var)) return 0;
+    z->bra = z->c; /* ], line 116 */
+    if (!r_R1(z)) return 0; /* call R1, line 116 */
+    switch(among_var) {
+        case 0: return 0;
+        case 1:
+            slice_from_s(z, 4, s_29); /* <-, line 117 */
+            break;
+        case 2:
+            slice_from_s(z, 3, s_30); /* <-, line 118 */
+            break;
+        case 3:
+            slice_from_s(z, 2, s_31); /* <-, line 119 */
+            break;
+        case 4:
+            slice_from_s(z, 2, s_32); /* <-, line 121 */
+            break;
+        case 5:
+            slice_del(z); /* delete, line 123 */
+            break;
+        case 6:
+            if (!r_R2(z)) return 0; /* call R2, line 125 */
+            slice_del(z); /* delete, line 125 */
+            break;
+    }
+    return 1;
+}
+
+static int r_Step_4(struct SN_env * z) {
+    int among_var;
+    z->ket = z->c; /* [, line 130 */
+    among_var = find_among_b(z, a_6, 18); /* substring, line 130 */
+    if (!(among_var)) return 0;
+    z->bra = z->c; /* ], line 130 */
+    if (!r_R2(z)) return 0; /* call R2, line 130 */
+    switch(among_var) {
+        case 0: return 0;
+        case 1:
+            slice_del(z); /* delete, line 133 */
+            break;
+        case 2:
+            {   int m = z->l - z->c; /* or, line 134 */
+                if (!(eq_s_b(z, 1, s_33))) goto lab1;
+                goto lab0;
+            lab1:
+                z->c = z->l - m;
+                if (!(eq_s_b(z, 1, s_34))) return 0;
+            }
+        lab0:
+            slice_del(z); /* delete, line 134 */
+            break;
+    }
+    return 1;
+}
+
+static int r_Step_5(struct SN_env * z) {
+    int among_var;
+    z->ket = z->c; /* [, line 139 */
+    among_var = find_among_b(z, a_7, 2); /* substring, line 139 */
+    if (!(among_var)) return 0;
+    z->bra = z->c; /* ], line 139 */
+    switch(among_var) {
+        case 0: return 0;
+        case 1:
+            {   int m = z->l - z->c; /* or, line 140 */
+                if (!r_R2(z)) goto lab1; /* call R2, line 140 */
+                goto lab0;
+            lab1:
+                z->c = z->l - m;
+                if (!r_R1(z)) return 0; /* call R1, line 140 */
+                {   int m = z->l - z->c; /* not, line 140 */
+                    if (!r_shortv(z)) goto lab2; /* call shortv, line 140 */
+                    return 0;
+                lab2:
+                    z->c = z->l - m;
+                }
+            }
+        lab0:
+            slice_del(z); /* delete, line 140 */
+            break;
+        case 2:
+            if (!r_R2(z)) return 0; /* call R2, line 141 */
+            if (!(eq_s_b(z, 1, s_35))) return 0;
+            slice_del(z); /* delete, line 141 */
+            break;
+    }
+    return 1;
+}
+
+static int r_exception2(struct SN_env * z) {
+    z->ket = z->c; /* [, line 147 */
+    if (!(find_among_b(z, a_8, 8))) return 0; /* substring, line 147 */
+    z->bra = z->c; /* ], line 147 */
+    if (z->c > z->lb) return 0; /* atlimit, line 147 */
+    return 1;
+}
+
+static int r_exception1(struct SN_env * z) {
+    int among_var;
+    z->bra = z->c; /* [, line 159 */
+    among_var = find_among(z, a_9, 18); /* substring, line 159 */
+    if (!(among_var)) return 0;
+    z->ket = z->c; /* ], line 159 */
+    if (z->c < z->l) return 0; /* atlimit, line 159 */
+    switch(among_var) {
+        case 0: return 0;
+        case 1:
+            slice_from_s(z, 3, s_36); /* <-, line 163 */
+            break;
+        case 2:
+            slice_from_s(z, 3, s_37); /* <-, line 164 */
+            break;
+        case 3:
+            slice_from_s(z, 3, s_38); /* <-, line 165 */
+            break;
+        case 4:
+            slice_from_s(z, 3, s_39); /* <-, line 166 */
+            break;
+        case 5:
+            slice_from_s(z, 3, s_40); /* <-, line 167 */
+            break;
+        case 6:
+            slice_from_s(z, 3, s_41); /* <-, line 171 */
+            break;
+        case 7:
+            slice_from_s(z, 5, s_42); /* <-, line 172 */
+            break;
+        case 8:
+            slice_from_s(z, 4, s_43); /* <-, line 173 */
+            break;
+        case 9:
+            slice_from_s(z, 5, s_44); /* <-, line 174 */
+            break;
+        case 10:
+            slice_from_s(z, 4, s_45); /* <-, line 175 */
+            break;
+        case 11:
+            slice_from_s(z, 5, s_46); /* <-, line 176 */
+            break;
+    }
+    return 1;
+}
+
+static int r_postlude(struct SN_env * z) {
+    if (!(z->B[0])) return 0; /* Boolean test Y_found, line 192 */
+    while(1) { /* repeat, line 192 */
+        int c = z->c;
+        while(1) { /* goto, line 192 */
+            int c = z->c;
+            z->bra = z->c; /* [, line 192 */
+            if (!(eq_s(z, 1, s_47))) goto lab1;
+            z->ket = z->c; /* ], line 192 */
+            z->c = c;
+            break;
+        lab1:
+            z->c = c;
+            if (z->c >= z->l) goto lab0;
+            z->c++;
+        }
+        slice_from_s(z, 1, s_48); /* <-, line 192 */
+        continue;
+    lab0:
+        z->c = c;
+        break;
+    }
+    return 1;
+}
+
+extern int english_stem(struct SN_env * z) {
+    {   int c = z->c; /* or, line 196 */
+        if (!r_exception1(z)) goto lab1; /* call exception1, line 196 */
+        goto lab0;
+    lab1:
+        z->c = c;
+        {   int c_test = z->c; /* test, line 198 */
+            {   int c = z->c + 3;
+                if (0 > c || c > z->l) return 0;
+                z->c = c; /* hop, line 198 */
+            }
+            z->c = c_test;
+        }
+        {   int c = z->c; /* do, line 199 */
+            if (!r_prelude(z)) goto lab2; /* call prelude, line 199 */
+        lab2:
+            z->c = c;
+        }
+        {   int c = z->c; /* do, line 200 */
+            if (!r_mark_regions(z)) goto lab3; /* call mark_regions, line 200 */
+        lab3:
+            z->c = c;
+        }
+        z->lb = z->c; z->c = z->l; /* backwards, line 201 */
+
+        {   int m = z->l - z->c; /* do, line 203 */
+            if (!r_Step_1a(z)) goto lab4; /* call Step_1a, line 203 */
+        lab4:
+            z->c = z->l - m;
+        }
+        {   int m = z->l - z->c; /* or, line 205 */
+            if (!r_exception2(z)) goto lab6; /* call exception2, line 205 */
+            goto lab5;
+        lab6:
+            z->c = z->l - m;
+            {   int m = z->l - z->c; /* do, line 207 */
+                if (!r_Step_1b(z)) goto lab7; /* call Step_1b, line 207 */
+            lab7:
+                z->c = z->l - m;
+            }
+            {   int m = z->l - z->c; /* do, line 208 */
+                if (!r_Step_1c(z)) goto lab8; /* call Step_1c, line 208 */
+            lab8:
+                z->c = z->l - m;
+            }
+            {   int m = z->l - z->c; /* do, line 210 */
+                if (!r_Step_2(z)) goto lab9; /* call Step_2, line 210 */
+            lab9:
+                z->c = z->l - m;
+            }
+            {   int m = z->l - z->c; /* do, line 211 */
+                if (!r_Step_3(z)) goto lab10; /* call Step_3, line 211 */
+            lab10:
+                z->c = z->l - m;
+            }
+            {   int m = z->l - z->c; /* do, line 212 */
+                if (!r_Step_4(z)) goto lab11; /* call Step_4, line 212 */
+            lab11:
+                z->c = z->l - m;
+            }
+            {   int m = z->l - z->c; /* do, line 214 */
+                if (!r_Step_5(z)) goto lab12; /* call Step_5, line 214 */
+            lab12:
+                z->c = z->l - m;
+            }
+        }
+    lab5:
+        z->c = z->lb;
+        {   int c = z->c; /* do, line 217 */
+            if (!r_postlude(z)) goto lab13; /* call postlude, line 217 */
+        lab13:
+            z->c = c;
+        }
+    }
+lab0:
+    return 1;
+}
+
+extern struct SN_env * english_create_env(void) { return SN_create_env(0, 2, 1); }
+
+extern void english_close_env(struct SN_env * z) { SN_close_env(z); }
+


diff --git a/contrib/tsearch2/snowball/english_stem.h b/contrib/tsearch2/snowball/english_stem.h

new file mode 100644 (file)

index 0000000..bfefcd5


--- /dev/null
+++ b/contrib/tsearch2/snowball/english_stem.h
@@ -0,0 +1,8 @@
+
+/* This file was generated automatically by the Snowball to ANSI C compiler */
+
+extern struct SN_env * english_create_env(void);
+extern void english_close_env(struct SN_env * z);
+
+extern int english_stem(struct SN_env * z);
+


diff --git a/contrib/tsearch2/snowball/header.h b/contrib/tsearch2/snowball/header.h

new file mode 100644 (file)

index 0000000..aaec3ae


--- /dev/null
+++ b/contrib/tsearch2/snowball/header.h
@@ -0,0 +1,57 @@
+
+#include 
+
+#include "api.h"
+
+#define MAXINT INT_MAX
+#define MININT INT_MIN
+
+#define HEAD 2*sizeof(int)
+
+#define SIZE(p)        ((int *)(p))[-1]
+#define SET_SIZE(p, n) ((int *)(p))[-1] = n
+#define CAPACITY(p)    ((int *)(p))[-2]
+
+struct among
+{   int s_size;     /* number of chars in string */
+    symbol * s;       /* search string */
+    int substring_i;/* index to longest matching substring */
+    int result;     /* result of the lookup */
+    int (* function)(struct SN_env *);
+};
+
+extern symbol * create_s(void);
+extern void lose_s(symbol * p);
+
+extern int in_grouping(struct SN_env * z, unsigned char * s, int min, int max);
+extern int in_grouping_b(struct SN_env * z, unsigned char * s, int min, int max);
+extern int out_grouping(struct SN_env * z, unsigned char * s, int min, int max);
+extern int out_grouping_b(struct SN_env * z, unsigned char * s, int min, int max);
+
+extern int in_range(struct SN_env * z, int min, int max);
+extern int in_range_b(struct SN_env * z, int min, int max);
+extern int out_range(struct SN_env * z, int min, int max);
+extern int out_range_b(struct SN_env * z, int min, int max);
+
+extern int eq_s(struct SN_env * z, int s_size, symbol * s);
+extern int eq_s_b(struct SN_env * z, int s_size, symbol * s);
+extern int eq_v(struct SN_env * z, symbol * p);
+extern int eq_v_b(struct SN_env * z, symbol * p);
+
+extern int find_among(struct SN_env * z, struct among * v, int v_size);
+extern int find_among_b(struct SN_env * z, struct among * v, int v_size);
+
+extern symbol * increase_size(symbol * p, int n);
+extern int replace_s(struct SN_env * z, int c_bra, int c_ket, int s_size, const symbol * s);
+extern void slice_from_s(struct SN_env * z, int s_size, symbol * s);
+extern void slice_from_v(struct SN_env * z, symbol * p);
+extern void slice_del(struct SN_env * z);
+
+extern void insert_s(struct SN_env * z, int bra, int ket, int s_size, symbol * s);
+extern void insert_v(struct SN_env * z, int bra, int ket, symbol * p);
+
+extern symbol * slice_to(struct SN_env * z, symbol * p);
+extern symbol * assign_to(struct SN_env * z, symbol * p);
+
+extern void debug(struct SN_env * z, int number, int line_count);
+


diff --git a/contrib/tsearch2/snowball/russian_stem.c b/contrib/tsearch2/snowball/russian_stem.c

new file mode 100644 (file)

index 0000000..14fd491


--- /dev/null
+++ b/contrib/tsearch2/snowball/russian_stem.c
@@ -0,0 +1,626 @@
+
+/* This file was generated automatically by the Snowball to ANSI C compiler */
+
+#include "header.h"
+
+extern int russian_stem(struct SN_env * z);
+static int r_tidy_up(struct SN_env * z);
+static int r_derivational(struct SN_env * z);
+static int r_noun(struct SN_env * z);
+static int r_verb(struct SN_env * z);
+static int r_reflexive(struct SN_env * z);
+static int r_adjectival(struct SN_env * z);
+static int r_adjective(struct SN_env * z);
+static int r_perfective_gerund(struct SN_env * z);
+static int r_R2(struct SN_env * z);
+static int r_mark_regions(struct SN_env * z);
+
+extern struct SN_env * russian_create_env(void);
+extern void russian_close_env(struct SN_env * z);
+
+static symbol s_0_0[3] = { 215, 219, 201 };
+static symbol s_0_1[4] = { 201, 215, 219, 201 };
+static symbol s_0_2[4] = { 217, 215, 219, 201 };
+static symbol s_0_3[1] = { 215 };
+static symbol s_0_4[2] = { 201, 215 };
+static symbol s_0_5[2] = { 217, 215 };
+static symbol s_0_6[5] = { 215, 219, 201, 211, 216 };
+static symbol s_0_7[6] = { 201, 215, 219, 201, 211, 216 };
+static symbol s_0_8[6] = { 217, 215, 219, 201, 211, 216 };
+
+static struct among a_0[9] =
+{
+/*  0 */ { 3, s_0_0, -1, 1, 0},
+/*  1 */ { 4, s_0_1, 0, 2, 0},
+/*  2 */ { 4, s_0_2, 0, 2, 0},
+/*  3 */ { 1, s_0_3, -1, 1, 0},
+/*  4 */ { 2, s_0_4, 3, 2, 0},
+/*  5 */ { 2, s_0_5, 3, 2, 0},
+/*  6 */ { 5, s_0_6, -1, 1, 0},
+/*  7 */ { 6, s_0_7, 6, 2, 0},
+/*  8 */ { 6, s_0_8, 6, 2, 0}
+};
+
+static symbol s_1_0[2] = { 192, 192 };
+static symbol s_1_1[2] = { 197, 192 };
+static symbol s_1_2[2] = { 207, 192 };
+static symbol s_1_3[2] = { 213, 192 };
+static symbol s_1_4[2] = { 197, 197 };
+static symbol s_1_5[2] = { 201, 197 };
+static symbol s_1_6[2] = { 207, 197 };
+static symbol s_1_7[2] = { 217, 197 };
+static symbol s_1_8[2] = { 201, 200 };
+static symbol s_1_9[2] = { 217, 200 };
+static symbol s_1_10[3] = { 201, 205, 201 };
+static symbol s_1_11[3] = { 217, 205, 201 };
+static symbol s_1_12[2] = { 197, 202 };
+static symbol s_1_13[2] = { 201, 202 };
+static symbol s_1_14[2] = { 207, 202 };
+static symbol s_1_15[2] = { 217, 202 };
+static symbol s_1_16[2] = { 197, 205 };
+static symbol s_1_17[2] = { 201, 205 };
+static symbol s_1_18[2] = { 207, 205 };
+static symbol s_1_19[2] = { 217, 205 };
+static symbol s_1_20[3] = { 197, 199, 207 };
+static symbol s_1_21[3] = { 207, 199, 207 };
+static symbol s_1_22[2] = { 193, 209 };
+static symbol s_1_23[2] = { 209, 209 };
+static symbol s_1_24[3] = { 197, 205, 213 };
+static symbol s_1_25[3] = { 207, 205, 213 };
+
+static struct among a_1[26] =
+{
+/*  0 */ { 2, s_1_0, -1, 1, 0},
+/*  1 */ { 2, s_1_1, -1, 1, 0},
+/*  2 */ { 2, s_1_2, -1, 1, 0},
+/*  3 */ { 2, s_1_3, -1, 1, 0},
+/*  4 */ { 2, s_1_4, -1, 1, 0},
+/*  5 */ { 2, s_1_5, -1, 1, 0},
+/*  6 */ { 2, s_1_6, -1, 1, 0},
+/*  7 */ { 2, s_1_7, -1, 1, 0},
+/*  8 */ { 2, s_1_8, -1, 1, 0},
+/*  9 */ { 2, s_1_9, -1, 1, 0},
+/* 10 */ { 3, s_1_10, -1, 1, 0},
+/* 11 */ { 3, s_1_11, -1, 1, 0},
+/* 12 */ { 2, s_1_12, -1, 1, 0},
+/* 13 */ { 2, s_1_13, -1, 1, 0},
+/* 14 */ { 2, s_1_14, -1, 1, 0},
+/* 15 */ { 2, s_1_15, -1, 1, 0},
+/* 16 */ { 2, s_1_16, -1, 1, 0},
+/* 17 */ { 2, s_1_17, -1, 1, 0},
+/* 18 */ { 2, s_1_18, -1, 1, 0},
+/* 19 */ { 2, s_1_19, -1, 1, 0},
+/* 20 */ { 3, s_1_20, -1, 1, 0},
+/* 21 */ { 3, s_1_21, -1, 1, 0},
+/* 22 */ { 2, s_1_22, -1, 1, 0},
+/* 23 */ { 2, s_1_23, -1, 1, 0},
+/* 24 */ { 3, s_1_24, -1, 1, 0},
+/* 25 */ { 3, s_1_25, -1, 1, 0}
+};
+
+static symbol s_2_0[2] = { 197, 205 };
+static symbol s_2_1[2] = { 206, 206 };
+static symbol s_2_2[2] = { 215, 219 };
+static symbol s_2_3[3] = { 201, 215, 219 };
+static symbol s_2_4[3] = { 217, 215, 219 };
+static symbol s_2_5[1] = { 221 };
+static symbol s_2_6[2] = { 192, 221 };
+static symbol s_2_7[3] = { 213, 192, 221 };
+
+static struct among a_2[8] =
+{
+/*  0 */ { 2, s_2_0, -1, 1, 0},
+/*  1 */ { 2, s_2_1, -1, 1, 0},
+/*  2 */ { 2, s_2_2, -1, 1, 0},
+/*  3 */ { 3, s_2_3, 2, 2, 0},
+/*  4 */ { 3, s_2_4, 2, 2, 0},
+/*  5 */ { 1, s_2_5, -1, 1, 0},
+/*  6 */ { 2, s_2_6, 5, 1, 0},
+/*  7 */ { 3, s_2_7, 6, 2, 0}
+};
+
+static symbol s_3_0[2] = { 211, 209 };
+static symbol s_3_1[2] = { 211, 216 };
+
+static struct among a_3[2] =
+{
+/*  0 */ { 2, s_3_0, -1, 1, 0},
+/*  1 */ { 2, s_3_1, -1, 1, 0}
+};
+
+static symbol s_4_0[1] = { 192 };
+static symbol s_4_1[2] = { 213, 192 };
+static symbol s_4_2[2] = { 204, 193 };
+static symbol s_4_3[3] = { 201, 204, 193 };
+static symbol s_4_4[3] = { 217, 204, 193 };
+static symbol s_4_5[2] = { 206, 193 };
+static symbol s_4_6[3] = { 197, 206, 193 };
+static symbol s_4_7[3] = { 197, 212, 197 };
+static symbol s_4_8[3] = { 201, 212, 197 };
+static symbol s_4_9[3] = { 202, 212, 197 };
+static symbol s_4_10[4] = { 197, 202, 212, 197 };
+static symbol s_4_11[4] = { 213, 202, 212, 197 };
+static symbol s_4_12[2] = { 204, 201 };
+static symbol s_4_13[3] = { 201, 204, 201 };
+static symbol s_4_14[3] = { 217, 204, 201 };
+static symbol s_4_15[1] = { 202 };
+static symbol s_4_16[2] = { 197, 202 };
+static symbol s_4_17[2] = { 213, 202 };
+static symbol s_4_18[1] = { 204 };
+static symbol s_4_19[2] = { 201, 204 };
+static symbol s_4_20[2] = { 217, 204 };
+static symbol s_4_21[2] = { 197, 205 };
+static symbol s_4_22[2] = { 201, 205 };
+static symbol s_4_23[2] = { 217, 205 };
+static symbol s_4_24[1] = { 206 };
+static symbol s_4_25[2] = { 197, 206 };
+static symbol s_4_26[2] = { 204, 207 };
+static symbol s_4_27[3] = { 201, 204, 207 };
+static symbol s_4_28[3] = { 217, 204, 207 };
+static symbol s_4_29[2] = { 206, 207 };
+static symbol s_4_30[3] = { 197, 206, 207 };
+static symbol s_4_31[3] = { 206, 206, 207 };
+static symbol s_4_32[2] = { 192, 212 };
+static symbol s_4_33[3] = { 213, 192, 212 };
+static symbol s_4_34[2] = { 197, 212 };
+static symbol s_4_35[3] = { 213, 197, 212 };
+static symbol s_4_36[2] = { 201, 212 };
+static symbol s_4_37[2] = { 209, 212 };
+static symbol s_4_38[2] = { 217, 212 };
+static symbol s_4_39[2] = { 212, 216 };
+static symbol s_4_40[3] = { 201, 212, 216 };
+static symbol s_4_41[3] = { 217, 212, 216 };
+static symbol s_4_42[3] = { 197, 219, 216 };
+static symbol s_4_43[3] = { 201, 219, 216 };
+static symbol s_4_44[2] = { 206, 217 };
+static symbol s_4_45[3] = { 197, 206, 217 };
+
+static struct among a_4[46] =
+{
+/*  0 */ { 1, s_4_0, -1, 2, 0},
+/*  1 */ { 2, s_4_1, 0, 2, 0},
+/*  2 */ { 2, s_4_2, -1, 1, 0},
+/*  3 */ { 3, s_4_3, 2, 2, 0},
+/*  4 */ { 3, s_4_4, 2, 2, 0},
+/*  5 */ { 2, s_4_5, -1, 1, 0},
+/*  6 */ { 3, s_4_6, 5, 2, 0},
+/*  7 */ { 3, s_4_7, -1, 1, 0},
+/*  8 */ { 3, s_4_8, -1, 2, 0},
+/*  9 */ { 3, s_4_9, -1, 1, 0},
+/* 10 */ { 4, s_4_10, 9, 2, 0},
+/* 11 */ { 4, s_4_11, 9, 2, 0},
+/* 12 */ { 2, s_4_12, -1, 1, 0},
+/* 13 */ { 3, s_4_13, 12, 2, 0},
+/* 14 */ { 3, s_4_14, 12, 2, 0},
+/* 15 */ { 1, s_4_15, -1, 1, 0},
+/* 16 */ { 2, s_4_16, 15, 2, 0},
+/* 17 */ { 2, s_4_17, 15, 2, 0},
+/* 18 */ { 1, s_4_18, -1, 1, 0},
+/* 19 */ { 2, s_4_19, 18, 2, 0},
+/* 20 */ { 2, s_4_20, 18, 2, 0},
+/* 21 */ { 2, s_4_21, -1, 1, 0},
+/* 22 */ { 2, s_4_22, -1, 2, 0},
+/* 23 */ { 2, s_4_23, -1, 2, 0},
+/* 24 */ { 1, s_4_24, -1, 1, 0},
+/* 25 */ { 2, s_4_25, 24, 2, 0},
+/* 26 */ { 2, s_4_26, -1, 1, 0},
+/* 27 */ { 3, s_4_27, 26, 2, 0},
+/* 28 */ { 3, s_4_28, 26, 2, 0},
+/* 29 */ { 2, s_4_29, -1, 1, 0},
+/* 30 */ { 3, s_4_30, 29, 2, 0},
+/* 31 */ { 3, s_4_31, 29, 1, 0},
+/* 32 */ { 2, s_4_32, -1, 1, 0},
+/* 33 */ { 3, s_4_33, 32, 2, 0},
+/* 34 */ { 2, s_4_34, -1, 1, 0},
+/* 35 */ { 3, s_4_35, 34, 2, 0},
+/* 36 */ { 2, s_4_36, -1, 2, 0},
+/* 37 */ { 2, s_4_37, -1, 2, 0},
+/* 38 */ { 2, s_4_38, -1, 2, 0},
+/* 39 */ { 2, s_4_39, -1, 1, 0},
+/* 40 */ { 3, s_4_40, 39, 2, 0},
+/* 41 */ { 3, s_4_41, 39, 2, 0},
+/* 42 */ { 3, s_4_42, -1, 1, 0},
+/* 43 */ { 3, s_4_43, -1, 2, 0},
+/* 44 */ { 2, s_4_44, -1, 1, 0},
+/* 45 */ { 3, s_4_45, 44, 2, 0}
+};
+
+static symbol s_5_0[1] = { 192 };
+static symbol s_5_1[2] = { 201, 192 };
+static symbol s_5_2[2] = { 216, 192 };
+static symbol s_5_3[1] = { 193 };
+static symbol s_5_4[1] = { 197 };
+static symbol s_5_5[2] = { 201, 197 };
+static symbol s_5_6[2] = { 216, 197 };
+static symbol s_5_7[2] = { 193, 200 };
+static symbol s_5_8[2] = { 209, 200 };
+static symbol s_5_9[3] = { 201, 209, 200 };
+static symbol s_5_10[1] = { 201 };
+static symbol s_5_11[2] = { 197, 201 };
+static symbol s_5_12[2] = { 201, 201 };
+static symbol s_5_13[3] = { 193, 205, 201 };
+static symbol s_5_14[3] = { 209, 205, 201 };
+static symbol s_5_15[4] = { 201, 209, 205, 201 };
+static symbol s_5_16[1] = { 202 };
+static symbol s_5_17[2] = { 197, 202 };
+static symbol s_5_18[3] = { 201, 197, 202 };
+static symbol s_5_19[2] = { 201, 202 };
+static symbol s_5_20[2] = { 207, 202 };
+static symbol s_5_21[2] = { 193, 205 };
+static symbol s_5_22[2] = { 197, 205 };
+static symbol s_5_23[3] = { 201, 197, 205 };
+static symbol s_5_24[2] = { 207, 205 };
+static symbol s_5_25[2] = { 209, 205 };
+static symbol s_5_26[3] = { 201, 209, 205 };
+static symbol s_5_27[1] = { 207 };
+static symbol s_5_28[1] = { 209 };
+static symbol s_5_29[2] = { 201, 209 };
+static symbol s_5_30[2] = { 216, 209 };
+static symbol s_5_31[1] = { 213 };
+static symbol s_5_32[2] = { 197, 215 };
+static symbol s_5_33[2] = { 207, 215 };
+static symbol s_5_34[1] = { 216 };
+static symbol s_5_35[1] = { 217 };
+
+static struct among a_5[36] =
+{
+/*  0 */ { 1, s_5_0, -1, 1, 0},
+/*  1 */ { 2, s_5_1, 0, 1, 0},
+/*  2 */ { 2, s_5_2, 0, 1, 0},
+/*  3 */ { 1, s_5_3, -1, 1, 0},
+/*  4 */ { 1, s_5_4, -1, 1, 0},
+/*  5 */ { 2, s_5_5, 4, 1, 0},
+/*  6 */ { 2, s_5_6, 4, 1, 0},
+/*  7 */ { 2, s_5_7, -1, 1, 0},
+/*  8 */ { 2, s_5_8, -1, 1, 0},
+/*  9 */ { 3, s_5_9, 8, 1, 0},
+/* 10 */ { 1, s_5_10, -1, 1, 0},
+/* 11 */ { 2, s_5_11, 10, 1, 0},
+/* 12 */ { 2, s_5_12, 10, 1, 0},
+/* 13 */ { 3, s_5_13, 10, 1, 0},
+/* 14 */ { 3, s_5_14, 10, 1, 0},
+/* 15 */ { 4, s_5_15, 14, 1, 0},
+/* 16 */ { 1, s_5_16, -1, 1, 0},
+/* 17 */ { 2, s_5_17, 16, 1, 0},
+/* 18 */ { 3, s_5_18, 17, 1, 0},
+/* 19 */ { 2, s_5_19, 16, 1, 0},
+/* 20 */ { 2, s_5_20, 16, 1, 0},
+/* 21 */ { 2, s_5_21, -1, 1, 0},
+/* 22 */ { 2, s_5_22, -1, 1, 0},
+/* 23 */ { 3, s_5_23, 22, 1, 0},
+/* 24 */ { 2, s_5_24, -1, 1, 0},
+/* 25 */ { 2, s_5_25, -1, 1, 0},
+/* 26 */ { 3, s_5_26, 25, 1, 0},
+/* 27 */ { 1, s_5_27, -1, 1, 0},
+/* 28 */ { 1, s_5_28, -1, 1, 0},
+/* 29 */ { 2, s_5_29, 28, 1, 0},
+/* 30 */ { 2, s_5_30, 28, 1, 0},
+/* 31 */ { 1, s_5_31, -1, 1, 0},
+/* 32 */ { 2, s_5_32, -1, 1, 0},
+/* 33 */ { 2, s_5_33, -1, 1, 0},
+/* 34 */ { 1, s_5_34, -1, 1, 0},
+/* 35 */ { 1, s_5_35, -1, 1, 0}
+};
+
+static symbol s_6_0[3] = { 207, 211, 212 };
+static symbol s_6_1[4] = { 207, 211, 212, 216 };
+
+static struct among a_6[2] =
+{
+/*  0 */ { 3, s_6_0, -1, 1, 0},
+/*  1 */ { 4, s_6_1, -1, 1, 0}
+};
+
+static symbol s_7_0[4] = { 197, 202, 219, 197 };
+static symbol s_7_1[1] = { 206 };
+static symbol s_7_2[1] = { 216 };
+static symbol s_7_3[3] = { 197, 202, 219 };
+
+static struct among a_7[4] =
+{
+/*  0 */ { 4, s_7_0, -1, 1, 0},
+/*  1 */ { 1, s_7_1, -1, 2, 0},
+/*  2 */ { 1, s_7_2, -1, 3, 0},
+/*  3 */ { 3, s_7_3, -1, 1, 0}
+};
+
+static unsigned char g_v[] = { 35, 130, 34, 18 };
+
+static symbol s_0[] = { 193 };
+static symbol s_1[] = { 209 };
+static symbol s_2[] = { 193 };
+static symbol s_3[] = { 209 };
+static symbol s_4[] = { 193 };
+static symbol s_5[] = { 209 };
+static symbol s_6[] = { 206 };
+static symbol s_7[] = { 206 };
+static symbol s_8[] = { 206 };
+static symbol s_9[] = { 201 };
+
+static int r_mark_regions(struct SN_env * z) {
+    z->I[0] = z->l;
+    z->I[1] = z->l;
+    {   int c = z->c; /* do, line 100 */
+        while(1) { /* gopast, line 101 */
+            if (!(in_grouping(z, g_v, 192, 220))) goto lab1;
+            break;
+        lab1:
+            if (z->c >= z->l) goto lab0;
+            z->c++;
+        }
+        z->I[0] = z->c; /* setmark pV, line 101 */
+        while(1) { /* gopast, line 101 */
+            if (!(out_grouping(z, g_v, 192, 220))) goto lab2;
+            break;
+        lab2:
+            if (z->c >= z->l) goto lab0;
+            z->c++;
+        }
+        while(1) { /* gopast, line 102 */
+            if (!(in_grouping(z, g_v, 192, 220))) goto lab3;
+            break;
+        lab3:
+            if (z->c >= z->l) goto lab0;
+            z->c++;
+        }
+        while(1) { /* gopast, line 102 */
+            if (!(out_grouping(z, g_v, 192, 220))) goto lab4;
+            break;
+        lab4:
+            if (z->c >= z->l) goto lab0;
+            z->c++;
+        }
+        z->I[1] = z->c; /* setmark p2, line 102 */
+    lab0:
+        z->c = c;
+    }
+    return 1;
+}
+
+static int r_R2(struct SN_env * z) {
+    if (!(z->I[1] <= z->c)) return 0;
+    return 1;
+}
+
+static int r_perfective_gerund(struct SN_env * z) {
+    int among_var;
+    z->ket = z->c; /* [, line 111 */
+    among_var = find_among_b(z, a_0, 9); /* substring, line 111 */
+    if (!(among_var)) return 0;
+    z->bra = z->c; /* ], line 111 */
+    switch(among_var) {
+        case 0: return 0;
+        case 1:
+            {   int m = z->l - z->c; /* or, line 115 */
+                if (!(eq_s_b(z, 1, s_0))) goto lab1;
+                goto lab0;
+            lab1:
+                z->c = z->l - m;
+                if (!(eq_s_b(z, 1, s_1))) return 0;
+            }
+        lab0:
+            slice_del(z); /* delete, line 115 */
+            break;
+        case 2:
+            slice_del(z); /* delete, line 122 */
+            break;
+    }
+    return 1;
+}
+
+static int r_adjective(struct SN_env * z) {
+    int among_var;
+    z->ket = z->c; /* [, line 127 */
+    among_var = find_among_b(z, a_1, 26); /* substring, line 127 */
+    if (!(among_var)) return 0;
+    z->bra = z->c; /* ], line 127 */
+    switch(among_var) {
+        case 0: return 0;
+        case 1:
+            slice_del(z); /* delete, line 136 */
+            break;
+    }
+    return 1;
+}
+
+static int r_adjectival(struct SN_env * z) {
+    int among_var;
+    if (!r_adjective(z)) return 0; /* call adjective, line 141 */
+    {   int m = z->l - z->c; /* try, line 148 */
+        z->ket = z->c; /* [, line 149 */
+        among_var = find_among_b(z, a_2, 8); /* substring, line 149 */
+        if (!(among_var)) { z->c = z->l - m; goto lab0; }
+        z->bra = z->c; /* ], line 149 */
+        switch(among_var) {
+            case 0: { z->c = z->l - m; goto lab0; }
+            case 1:
+                {   int m = z->l - z->c; /* or, line 154 */
+                    if (!(eq_s_b(z, 1, s_2))) goto lab2;
+                    goto lab1;
+                lab2:
+                    z->c = z->l - m;
+                    if (!(eq_s_b(z, 1, s_3))) { z->c = z->l - m; goto lab0; }
+                }
+            lab1:
+                slice_del(z); /* delete, line 154 */
+                break;
+            case 2:
+                slice_del(z); /* delete, line 161 */
+                break;
+        }
+    lab0:
+        ;
+    }
+    return 1;
+}
+
+static int r_reflexive(struct SN_env * z) {
+    int among_var;
+    z->ket = z->c; /* [, line 168 */
+    among_var = find_among_b(z, a_3, 2); /* substring, line 168 */
+    if (!(among_var)) return 0;
+    z->bra = z->c; /* ], line 168 */
+    switch(among_var) {
+        case 0: return 0;
+        case 1:
+            slice_del(z); /* delete, line 171 */
+            break;
+    }
+    return 1;
+}
+
+static int r_verb(struct SN_env * z) {
+    int among_var;
+    z->ket = z->c; /* [, line 176 */
+    among_var = find_among_b(z, a_4, 46); /* substring, line 176 */
+    if (!(among_var)) return 0;
+    z->bra = z->c; /* ], line 176 */
+    switch(among_var) {
+        case 0: return 0;
+        case 1:
+            {   int m = z->l - z->c; /* or, line 182 */
+                if (!(eq_s_b(z, 1, s_4))) goto lab1;
+                goto lab0;
+            lab1:
+                z->c = z->l - m;
+                if (!(eq_s_b(z, 1, s_5))) return 0;
+            }
+        lab0:
+            slice_del(z); /* delete, line 182 */
+            break;
+        case 2:
+            slice_del(z); /* delete, line 190 */
+            break;
+    }
+    return 1;
+}
+
+static int r_noun(struct SN_env * z) {
+    int among_var;
+    z->ket = z->c; /* [, line 199 */
+    among_var = find_among_b(z, a_5, 36); /* substring, line 199 */
+    if (!(among_var)) return 0;
+    z->bra = z->c; /* ], line 199 */
+    switch(among_var) {
+        case 0: return 0;
+        case 1:
+            slice_del(z); /* delete, line 206 */
+            break;
+    }
+    return 1;
+}
+
+static int r_derivational(struct SN_env * z) {
+    int among_var;
+    z->ket = z->c; /* [, line 215 */
+    among_var = find_among_b(z, a_6, 2); /* substring, line 215 */
+    if (!(among_var)) return 0;
+    z->bra = z->c; /* ], line 215 */
+    if (!r_R2(z)) return 0; /* call R2, line 215 */
+    switch(among_var) {
+        case 0: return 0;
+        case 1:
+            slice_del(z); /* delete, line 218 */
+            break;
+    }
+    return 1;
+}
+
+static int r_tidy_up(struct SN_env * z) {
+    int among_var;
+    z->ket = z->c; /* [, line 223 */
+    among_var = find_among_b(z, a_7, 4); /* substring, line 223 */
+    if (!(among_var)) return 0;
+    z->bra = z->c; /* ], line 223 */
+    switch(among_var) {
+        case 0: return 0;
+        case 1:
+            slice_del(z); /* delete, line 227 */
+            z->ket = z->c; /* [, line 228 */
+            if (!(eq_s_b(z, 1, s_6))) return 0;
+            z->bra = z->c; /* ], line 228 */
+            if (!(eq_s_b(z, 1, s_7))) return 0;
+            slice_del(z); /* delete, line 228 */
+            break;
+        case 2:
+            if (!(eq_s_b(z, 1, s_8))) return 0;
+            slice_del(z); /* delete, line 231 */
+            break;
+        case 3:
+            slice_del(z); /* delete, line 233 */
+            break;
+    }
+    return 1;
+}
+
+extern int russian_stem(struct SN_env * z) {
+    {   int c = z->c; /* do, line 240 */
+        if (!r_mark_regions(z)) goto lab0; /* call mark_regions, line 240 */
+    lab0:
+        z->c = c;
+    }
+    z->lb = z->c; z->c = z->l; /* backwards, line 241 */
+
+    {   int m = z->l - z->c; /* setlimit, line 241 */
+        int m3;
+        if (z->c < z->I[0]) return 0;
+        z->c = z->I[0]; /* tomark, line 241 */
+        m3 = z->lb; z->lb = z->c;
+        z->c = z->l - m;
+        {   int m = z->l - z->c; /* do, line 242 */
+            {   int m = z->l - z->c; /* or, line 243 */
+                if (!r_perfective_gerund(z)) goto lab3; /* call perfective_gerund, line 243 */
+                goto lab2;
+            lab3:
+                z->c = z->l - m;
+                {   int m = z->l - z->c; /* try, line 244 */
+                    if (!r_reflexive(z)) { z->c = z->l - m; goto lab4; } /* call reflexive, line 244 */
+                lab4:
+                    ;
+                }
+                {   int m = z->l - z->c; /* or, line 245 */
+                    if (!r_adjectival(z)) goto lab6; /* call adjectival, line 245 */
+                    goto lab5;
+                lab6:
+                    z->c = z->l - m;
+                    if (!r_verb(z)) goto lab7; /* call verb, line 245 */
+                    goto lab5;
+                lab7:
+                    z->c = z->l - m;
+                    if (!r_noun(z)) goto lab1; /* call noun, line 245 */
+                }
+            lab5:
+                ;
+            }
+        lab2:
+        lab1:
+            z->c = z->l - m;
+        }
+        {   int m = z->l - z->c; /* try, line 248 */
+            z->ket = z->c; /* [, line 248 */
+            if (!(eq_s_b(z, 1, s_9))) { z->c = z->l - m; goto lab8; }
+            z->bra = z->c; /* ], line 248 */
+            slice_del(z); /* delete, line 248 */
+        lab8:
+            ;
+        }
+        {   int m = z->l - z->c; /* do, line 251 */
+            if (!r_derivational(z)) goto lab9; /* call derivational, line 251 */
+        lab9:
+            z->c = z->l - m;
+        }
+        {   int m = z->l - z->c; /* do, line 252 */
+            if (!r_tidy_up(z)) goto lab10; /* call tidy_up, line 252 */
+        lab10:
+            z->c = z->l - m;
+        }
+        z->lb = m3;
+    }
+    z->c = z->lb;
+    return 1;
+}
+
+extern struct SN_env * russian_create_env(void) { return SN_create_env(0, 2, 0); }
+
+extern void russian_close_env(struct SN_env * z) { SN_close_env(z); }
+


diff --git a/contrib/tsearch2/snowball/russian_stem.h b/contrib/tsearch2/snowball/russian_stem.h

new file mode 100644 (file)

index 0000000..7dc26d4


--- /dev/null
+++ b/contrib/tsearch2/snowball/russian_stem.h
@@ -0,0 +1,8 @@
+
+/* This file was generated automatically by the Snowball to ANSI C compiler */
+
+extern struct SN_env * russian_create_env(void);
+extern void russian_close_env(struct SN_env * z);
+
+extern int russian_stem(struct SN_env * z);
+


diff --git a/contrib/tsearch2/snowball/utilities.c b/contrib/tsearch2/snowball/utilities.c

new file mode 100644 (file)

index 0000000..5dc7524


--- /dev/null
+++ b/contrib/tsearch2/snowball/utilities.c
@@ -0,0 +1,328 @@
+
+#include 
+#include 
+#include 
+
+#include "header.h"
+
+#define unless(C) if(!(C))
+
+#define CREATE_SIZE 1
+
+extern symbol * create_s(void)
+{   symbol * p = (symbol *) (HEAD + (char *) malloc(HEAD + (CREATE_SIZE + 1) * sizeof(symbol)));
+    CAPACITY(p) = CREATE_SIZE;
+    SET_SIZE(p, CREATE_SIZE);
+    return p;
+}
+
+extern void lose_s(symbol * p) { free((char *) p - HEAD); }
+
+extern int in_grouping(struct SN_env * z, unsigned char * s, int min, int max)
+{   if (z->c >= z->l) return 0;
+    {   int ch = z->p[z->c];
+        if
+        (ch > max || (ch -= min) < 0 ||
+         (s[ch >> 3] & (0X1 << (ch & 0X7))) == 0) return 0;
+    }
+    z->c++; return 1;
+}
+
+extern int in_grouping_b(struct SN_env * z, unsigned char * s, int min, int max)
+{   if (z->c <= z->lb) return 0;
+    {   int ch = z->p[z->c - 1];
+        if
+        (ch > max || (ch -= min) < 0 ||
+         (s[ch >> 3] & (0X1 << (ch & 0X7))) == 0) return 0;
+    }
+    z->c--; return 1;
+}
+
+extern int out_grouping(struct SN_env * z, unsigned char * s, int min, int max)
+{   if (z->c >= z->l) return 0;
+    {   int ch = z->p[z->c];
+        unless
+        (ch > max || (ch -= min) < 0 ||
+         (s[ch >> 3] & (0X1 << (ch & 0X7))) == 0) return 0;
+    }
+    z->c++; return 1;
+}
+
+extern int out_grouping_b(struct SN_env * z, unsigned char * s, int min, int max)
+{   if (z->c <= z->lb) return 0;
+    {   int ch = z->p[z->c - 1];
+        unless
+        (ch > max || (ch -= min) < 0 ||
+         (s[ch >> 3] & (0X1 << (ch & 0X7))) == 0) return 0;
+    }
+    z->c--; return 1;
+}
+
+
+extern int in_range(struct SN_env * z, int min, int max)
+{   if (z->c >= z->l) return 0;
+    {   int ch = z->p[z->c];
+        if
+        (ch > max || ch < min) return 0;
+    }
+    z->c++; return 1;
+}
+
+extern int in_range_b(struct SN_env * z, int min, int max)
+{   if (z->c <= z->lb) return 0;
+    {   int ch = z->p[z->c - 1];
+        if
+        (ch > max || ch < min) return 0;
+    }
+    z->c--; return 1;
+}
+
+extern int out_range(struct SN_env * z, int min, int max)
+{   if (z->c >= z->l) return 0;
+    {   int ch = z->p[z->c];
+        unless
+        (ch > max || ch < min) return 0;
+    }
+    z->c++; return 1;
+}
+
+extern int out_range_b(struct SN_env * z, int min, int max)
+{   if (z->c <= z->lb) return 0;
+    {   int ch = z->p[z->c - 1];
+        unless
+        (ch > max || ch < min) return 0;
+    }
+    z->c--; return 1;
+}
+
+extern int eq_s(struct SN_env * z, int s_size, symbol * s)
+{   if (z->l - z->c < s_size ||
+        memcmp(z->p + z->c, s, s_size * sizeof(symbol)) != 0) return 0;
+    z->c += s_size; return 1;
+}
+
+extern int eq_s_b(struct SN_env * z, int s_size, symbol * s)
+{   if (z->c - z->lb < s_size ||
+        memcmp(z->p + z->c - s_size, s, s_size * sizeof(symbol)) != 0) return 0;
+    z->c -= s_size; return 1;
+}
+
+extern int eq_v(struct SN_env * z, symbol * p)
+{   return eq_s(z, SIZE(p), p);
+}
+
+extern int eq_v_b(struct SN_env * z, symbol * p)
+{   return eq_s_b(z, SIZE(p), p);
+}
+
+extern int find_among(struct SN_env * z, struct among * v, int v_size)
+{
+    int i = 0;
+    int j = v_size;
+
+    int c = z->c; int l = z->l;
+    symbol * q = z->p + c;
+
+    struct among * w;
+
+    int common_i = 0;
+    int common_j = 0;
+
+    int first_key_inspected = 0;
+
+    while(1)
+    {   int k = i + ((j - i) >> 1);
+        int diff = 0;
+        int common = common_i < common_j ? common_i : common_j; /* smaller */
+        w = v + k;
+        {   int i; for (i = common; i < w->s_size; i++)
+            {   if (c + common == l) { diff = -1; break; }
+                diff = q[common] - w->s[i];
+                if (diff != 0) break;
+                common++;
+            }
+        }
+        if (diff < 0) { j = k; common_j = common; }
+                 else { i = k; common_i = common; }
+        if (j - i <= 1)
+        {   if (i > 0) break; /* v->s has been inspected */
+            if (j == i) break; /* only one item in v */
+
+            /* - but now we need to go round once more to get
+               v->s inspected. This looks messy, but is actually
+               the optimal approach.  */
+
+            if (first_key_inspected) break;
+            first_key_inspected = 1;
+        }
+    }
+    while(1)
+    {   w = v + i;
+        if (common_i >= w->s_size)
+        {   z->c = c + w->s_size;
+            if (w->function == 0) return w->result;
+            {   int res = w->function(z);
+                z->c = c + w->s_size;
+                if (res) return w->result;
+            }
+        }
+        i = w->substring_i;
+        if (i < 0) return 0;
+    }
+}
+
+/* find_among_b is for backwards processing. Same comments apply */
+
+extern int find_among_b(struct SN_env * z, struct among * v, int v_size)
+{
+    int i = 0;
+    int j = v_size;
+
+    int c = z->c; int lb = z->lb;
+    symbol * q = z->p + c - 1;
+
+    struct among * w;
+
+    int common_i = 0;
+    int common_j = 0;
+
+    int first_key_inspected = 0;
+
+    while(1)
+    {   int k = i + ((j - i) >> 1);
+        int diff = 0;
+        int common = common_i < common_j ? common_i : common_j;
+        w = v + k;
+        {   int i; for (i = w->s_size - 1 - common; i >= 0; i--)
+            {   if (c - common == lb) { diff = -1; break; }
+                diff = q[- common] - w->s[i];
+                if (diff != 0) break;
+                common++;
+            }
+        }
+        if (diff < 0) { j = k; common_j = common; }
+                 else { i = k; common_i = common; }
+        if (j - i <= 1)
+        {   if (i > 0) break;
+            if (j == i) break;
+            if (first_key_inspected) break;
+            first_key_inspected = 1;
+        }
+    }
+    while(1)
+    {   w = v + i;
+        if (common_i >= w->s_size)
+        {   z->c = c - w->s_size;
+            if (w->function == 0) return w->result;
+            {   int res = w->function(z);
+                z->c = c - w->s_size;
+                if (res) return w->result;
+            }
+        }
+        i = w->substring_i;
+        if (i < 0) return 0;
+    }
+}
+
+
+extern symbol * increase_size(symbol * p, int n)
+{   int new_size = n + 20;
+    symbol * q = (symbol *) (HEAD + (char *) malloc(HEAD + (new_size + 1) * sizeof(symbol)));
+    CAPACITY(q) = new_size;
+    memmove(q, p, CAPACITY(p) * sizeof(symbol)); lose_s(p); return q;
+}
+
+/* to replace symbols between c_bra and c_ket in z->p by the
+   s_size symbols at s
+*/
+
+extern int replace_s(struct SN_env * z, int c_bra, int c_ket, int s_size, const symbol * s)
+{   int adjustment = s_size - (c_ket - c_bra);
+    int len = SIZE(z->p);
+    if (adjustment != 0)
+    {   if (adjustment + len > CAPACITY(z->p)) z->p = increase_size(z->p, adjustment + len);
+        memmove(z->p + c_ket + adjustment, z->p + c_ket, (len - c_ket) * sizeof(symbol));
+        SET_SIZE(z->p, adjustment + len);
+        z->l += adjustment;
+        if (z->c >= c_ket) z->c += adjustment; else
+            if (z->c > c_bra) z->c = c_bra;
+    }
+    unless (s_size == 0) memmove(z->p + c_bra, s, s_size * sizeof(symbol));
+    return adjustment;
+}
+
+static void slice_check(struct SN_env * z)
+{
+    if (!(0 <= z->bra &&
+          z->bra <= z->ket &&
+          z->ket <= z->l &&
+          z->l <= SIZE(z->p)))   /* this line could be removed */
+    {
+        fprintf(stderr, "faulty slice operation:\n");
+        debug(z, -1, 0);
+        exit(1);
+    }
+}
+
+extern void slice_from_s(struct SN_env * z, int s_size, symbol * s)
+{   slice_check(z);
+    replace_s(z, z->bra, z->ket, s_size, s);
+}
+
+extern void slice_from_v(struct SN_env * z, symbol * p)
+{   slice_from_s(z, SIZE(p), p);
+}
+
+extern void slice_del(struct SN_env * z)
+{   slice_from_s(z, 0, 0);
+}
+
+extern void insert_s(struct SN_env * z, int bra, int ket, int s_size, symbol * s)
+{   int adjustment = replace_s(z, bra, ket, s_size, s);
+    if (bra <= z->bra) z->bra += adjustment;
+    if (bra <= z->ket) z->ket += adjustment;
+}
+
+extern void insert_v(struct SN_env * z, int bra, int ket, symbol * p)
+{   int adjustment = replace_s(z, bra, ket, SIZE(p), p);
+    if (bra <= z->bra) z->bra += adjustment;
+    if (bra <= z->ket) z->ket += adjustment;
+}
+
+extern symbol * slice_to(struct SN_env * z, symbol * p)
+{   slice_check(z);
+    {   int len = z->ket - z->bra;
+        if (CAPACITY(p) < len) p = increase_size(p, len);
+        memmove(p, z->p + z->bra, len * sizeof(symbol));
+        SET_SIZE(p, len);
+    }
+    return p;
+}
+
+extern symbol * assign_to(struct SN_env * z, symbol * p)
+{   int len = z->l;
+    if (CAPACITY(p) < len) p = increase_size(p, len);
+    memmove(p, z->p, len * sizeof(symbol));
+    SET_SIZE(p, len);
+    return p;
+}
+
+extern void debug(struct SN_env * z, int number, int line_count)
+{   int i;
+    int limit = SIZE(z->p);
+    /*if (number >= 0) printf("%3d (line %4d): '", number, line_count);*/
+    if (number >= 0) printf("%3d (line %4d): [%d]'", number, line_count,limit);
+    for (i = 0; i <= limit; i++)
+    {   if (z->lb == i) printf("{");
+        if (z->bra == i) printf("[");
+        if (z->c == i) printf("|");
+        if (z->ket == i) printf("]");
+        if (z->l == i) printf("}");
+        if (i < limit)
+        {   int ch = z->p[i];
+            if (ch == 0) ch = '#';
+            printf("%c", ch);
+        }
+    }
+    printf("'\n");
+}


diff --git a/contrib/tsearch2/sql/tsearch2.sql b/contrib/tsearch2/sql/tsearch2.sql

new file mode 100644 (file)

index 0000000..6ca6480


--- /dev/null
+++ b/contrib/tsearch2/sql/tsearch2.sql
@@ -0,0 +1,243 @@
+--
+-- first, define the datatype.  Turn off echoing so that expected file
+-- does not depend on contents of seg.sql.
+--
+\set ECHO none
+\i tsearch2.sql
+\set ECHO all
+
+--tsvector
+SELECT '1'::tsvector;
+SELECT '1 '::tsvector;
+SELECT ' 1'::tsvector;
+SELECT ' 1 '::tsvector;
+SELECT '1 2'::tsvector;
+SELECT '\'1 2\''::tsvector;
+SELECT '\'1 \\\'2\''::tsvector;
+SELECT '\'1 \\\'2\'3'::tsvector;
+SELECT '\'1 \\\'2\' 3'::tsvector;
+SELECT '\'1 \\\'2\' \' 3\' 4 '::tsvector;
+select '\'w\':4A,3B,2C,1D,5 a:8';
+select 'a:3A b:2a'::tsvector || 'ba:1234 a:1B';
+select setweight('w:12B w:13* w:12,5,6 a:1,3* a:3 w asd:1dc asd zxc:81,567,222A'::tsvector, 'c');
+select strip('w:12B w:13* w:12,5,6 a:1,3* a:3 w asd:1dc asd'::tsvector);
+
+
+--tsquery
+SELECT '1'::tsquery;
+SELECT '1 '::tsquery;
+SELECT ' 1'::tsquery;
+SELECT ' 1 '::tsquery;
+SELECT '\'1 2\''::tsquery;
+SELECT '\'1 \\\'2\''::tsquery;
+SELECT '!1'::tsquery;
+SELECT '1|2'::tsquery;
+SELECT '1|!2'::tsquery;
+SELECT '!1|2'::tsquery;
+SELECT '!1|!2'::tsquery;
+SELECT '!(!1|!2)'::tsquery;
+SELECT '!(!1|2)'::tsquery;
+SELECT '!(1|!2)'::tsquery;
+SELECT '!(1|2)'::tsquery;
+SELECT '1&2'::tsquery;
+SELECT '!1&2'::tsquery;
+SELECT '1&!2'::tsquery;
+SELECT '!1&!2'::tsquery;
+SELECT '(1&2)'::tsquery;
+SELECT '1&(2)'::tsquery;
+SELECT '!(1)&2'::tsquery;
+SELECT '!(1&2)'::tsquery;
+SELECT '1|2&3'::tsquery;
+SELECT '1|(2&3)'::tsquery;
+SELECT '(1|2)&3'::tsquery;
+SELECT '1|2&!3'::tsquery;
+SELECT '1|!2&3'::tsquery;
+SELECT '!1|2&3'::tsquery;
+SELECT '!1|(2&3)'::tsquery;
+SELECT '!(1|2)&3'::tsquery;
+SELECT '(!1|2)&3'::tsquery;
+SELECT '1|(2|(4|(5|6)))'::tsquery;
+SELECT '1|2|4|5|6'::tsquery;
+SELECT '1&(2&(4&(5&6)))'::tsquery;
+SELECT '1&2&4&5&6'::tsquery;
+SELECT '1&(2&(4&(5|6)))'::tsquery;
+SELECT '1&(2&(4&(5|!6)))'::tsquery;
+SELECT '1&(\'2\'&(\' 4\'&(\\|5 | \'6 \\\' !|&\')))'::tsquery;
+SELECT '\'the wether\':dc & \' sKies \':BC & a:d b:a';
+
+select lexize('simple', 'ASD56 hsdkf');
+select lexize('en_stem', 'SKIES Problems identity');
+
+select * from token_type('default');
+select * from parse('default', '345 [email protected] \' http://www.com/ http://aew.werc.ewr/?ad=qwe&dw 1aew.werc.ewr/?ad=qwe&dw 2aew.werc.ewr http://3aew.werc.ewr/?ad=qwe&dw http://4aew.werc.ewr http://5aew.werc.ewr:8100/?  ad=qwe&dw 6aew.werc.ewr:8100/?ad=qwe&dw 7aew.werc.ewr:8100/?ad=qwe&dw=%20%32 +4.0e-10 qwe qwe qwqwe 234.435 455 5.005 [email protected] qwe-wer asdf qwer jf sdjk ewr1> ewri2 ">
+/usr/local/fff /awdf/dwqe/4325 rewt/ewr wefjn /wqe-324/ewr gist.h gist.h.c gist.c. readline 4.2 4.2. 4.2, readline-4.2 readline-4.2. 234 
+ wow  < jqw <> qwerty');
+
+SELECT to_tsvector('default', '345 [email protected] \' http://www.com/ http://aew.werc.ewr/?ad=qwe&dw 1aew.werc.ewr/?ad=qwe&dw 2aew.werc.ewr http://3aew.werc.ewr/?ad=qwe&dw http://4aew.werc.ewr http://5aew.werc.ewr:8100/?  ad=qwe&dw 6aew.werc.ewr:8100/?ad=qwe&dw 7aew.werc.ewr:8100/?ad=qwe&dw=%20%32 +4.0e-10 qwe qwe qwqwe 234.435 455 5.005 [email protected] qwe-wer asdf qwer jf sdjk ewr1> ewri2 ">
+/usr/local/fff /awdf/dwqe/4325 rewt/ewr wefjn /wqe-324/ewr gist.h gist.h.c gist.c. readline 4.2 4.2. 4.2, readline-4.2 readline-4.2. 234 
+ wow  < jqw <> qwerty');
+
+SELECT length(to_tsvector('default', '345 qw'));
+
+SELECT length(to_tsvector('default', '345 [email protected] \' http://www.com/ http://aew.werc.ewr/?ad=qwe&dw 1aew.werc.ewr/?ad=qwe&dw 2aew.werc.ewr http://3aew.werc.ewr/?ad=qwe&dw http://4aew.werc.ewr http://5aew.werc.ewr:8100/?  ad=qwe&dw 6aew.werc.ewr:8100/?ad=qwe&dw 7aew.werc.ewr:8100/?ad=qwe&dw=%20%32 +4.0e-10 qwe qwe qwqwe 234.435 455 5.005 [email protected] qwe-wer asdf qwer jf sdjk ewr1> ewri2 ">
+/usr/local/fff /awdf/dwqe/4325 rewt/ewr wefjn /wqe-324/ewr gist.h gist.h.c gist.c. readline 4.2 4.2. 4.2, readline-4.2 readline-4.2. 234 
+ wow  < jqw <> qwerty'));
+
+
+select to_tsquery('default', 'qwe & sKies '); 
+select to_tsquery('simple', 'qwe & sKies '); 
+select to_tsquery('default', '\'the wether\':dc & \'           sKies \':BC ');
+select 'a b:89  ca:23A,64b d:34c'::tsvector @@ 'd:AC & ca';
+select 'a b:89  ca:23A,64b d:34c'::tsvector @@ 'd:AC & ca:B';
+select 'a b:89  ca:23A,64b d:34c'::tsvector @@ 'd:AC & ca:A';
+select 'a b:89  ca:23A,64b d:34c'::tsvector @@ 'd:AC & ca:C';
+select 'a b:89  ca:23A,64b d:34c'::tsvector @@ 'd:AC & ca:CB';
+
+CREATE TABLE test_tsvector( t text, a tsvector );
+
+\copy test_tsvector from 'data/test_tsearch.data'
+
+SELECT count(*) FROM test_tsvector WHERE a @@ 'wr|qh';
+SELECT count(*) FROM test_tsvector WHERE a @@ 'wr&qh';
+SELECT count(*) FROM test_tsvector WHERE a @@ 'eq&yt';
+SELECT count(*) FROM test_tsvector WHERE a @@ 'eq|yt';
+SELECT count(*) FROM test_tsvector WHERE a @@ '(eq&yt)|(wr&qh)';
+SELECT count(*) FROM test_tsvector WHERE a @@ '(eq|yt)&(wr|qh)';
+
+create index wowidx on test_tsvector using gist (a);
+set enable_seqscan=off;
+
+SELECT count(*) FROM test_tsvector WHERE a @@ 'wr|qh';
+SELECT count(*) FROM test_tsvector WHERE a @@ 'wr&qh';
+SELECT count(*) FROM test_tsvector WHERE a @@ 'eq&yt';
+SELECT count(*) FROM test_tsvector WHERE a @@ 'eq|yt';
+SELECT count(*) FROM test_tsvector WHERE a @@ '(eq&yt)|(wr&qh)';
+SELECT count(*) FROM test_tsvector WHERE a @@ '(eq|yt)&(wr|qh)';
+
+select set_curcfg('default');
+
+CREATE TRIGGER tsvectorupdate
+BEFORE UPDATE OR INSERT ON test_tsvector
+FOR EACH ROW EXECUTE PROCEDURE tsearch2(a, t);
+
+SELECT count(*) FROM test_tsvector WHERE a @@ to_tsquery('345&qwerty');
+
+INSERT INTO test_tsvector (t) VALUES ('345 qwerty');
+
+SELECT count(*) FROM test_tsvector WHERE a @@ to_tsquery('345&qwerty');
+
+UPDATE test_tsvector SET t = null WHERE t = '345 qwerty';
+
+SELECT count(*) FROM test_tsvector WHERE a @@ to_tsquery('345&qwerty');
+
+drop trigger tsvectorupdate on test_tsvector;
+create function wow(text) returns text as 'select $1 || \' copyright\'; ' language sql;
+create trigger tsvectorupdate before update or insert on test_tsvector
+for each row execute procedure tsearch2(a, wow, t);
+insert into test_tsvector (t) values ('345 qwerty');
+select count(*) FROM test_tsvector WHERE a @@ to_tsquery('345&qwerty');
+select count(*) FROM test_tsvector WHERE a @@ to_tsquery('copyright');
+
+select rank(' a:1 s:2C d g'::tsvector, 'a | s');
+select rank(' a:1 s:2B d g'::tsvector, 'a | s');
+select rank(' a:1 s:2 d g'::tsvector, 'a | s');
+select rank(' a:1 s:2C d g'::tsvector, 'a & s');
+select rank(' a:1 s:2B d g'::tsvector, 'a & s');
+select rank(' a:1 s:2 d g'::tsvector, 'a & s');
+
+insert into test_tsvector (t) values ('foo bar foo the over foo qq bar');
+select * from stat('select a from test_tsvector') order by ndoc desc, nentry desc, word;
+
+select reset_tsearch();
+select to_tsquery('default', 'skies & books');
+
+select rank_cd(to_tsvector('Erosion It took the sea a thousand years,
+A thousand years to trace
+The granite features of this cliff
+In crag and scarp and base.
+It took the sea an hour one night
+An hour of storm to place
+The sculpture of these granite seams,
+Upon a woman s face. E.  J.  Pratt  (1882 1964)
+'), to_tsquery('sea&thousand&years'));
+
+select rank_cd(to_tsvector('Erosion It took the sea a thousand years,
+A thousand years to trace
+The granite features of this cliff
+In crag and scarp and base.
+It took the sea an hour one night
+An hour of storm to place
+The sculpture of these granite seams,
+Upon a woman s face. E.  J.  Pratt  (1882 1964)
+'), to_tsquery('granite&sea'));
+
+select rank_cd(to_tsvector('Erosion It took the sea a thousand years,
+A thousand years to trace
+The granite features of this cliff
+In crag and scarp and base.
+It took the sea an hour one night
+An hour of storm to place
+The sculpture of these granite seams,
+Upon a woman s face. E.  J.  Pratt  (1882 1964)
+'), to_tsquery('sea'));
+
+select get_covers(to_tsvector('Erosion It took the sea a thousand years,
+A thousand years to trace
+The granite features of this cliff
+In crag and scarp and base.
+It took the sea an hour one night
+An hour of storm to place
+The sculpture of these granite seams,
+Upon a woman s face. E.  J.  Pratt  (1882 1964)
+'), to_tsquery('sea&thousand&years'));
+
+select get_covers(to_tsvector('Erosion It took the sea a thousand years,
+A thousand years to trace
+The granite features of this cliff
+In crag and scarp and base.
+It took the sea an hour one night
+An hour of storm to place
+The sculpture of these granite seams,
+Upon a woman s face. E.  J.  Pratt  (1882 1964)
+'), to_tsquery('granite&sea'));
+
+select get_covers(to_tsvector('Erosion It took the sea a thousand years,
+A thousand years to trace
+The granite features of this cliff
+In crag and scarp and base.
+It took the sea an hour one night
+An hour of storm to place
+The sculpture of these granite seams,
+Upon a woman s face. E.  J.  Pratt  (1882 1964)
+'), to_tsquery('sea'));
+
+select headline('Erosion It took the sea a thousand years,
+A thousand years to trace
+The granite features of this cliff
+In crag and scarp and base.
+It took the sea an hour one night
+An hour of storm to place
+The sculpture of these granite seams,
+Upon a woman s face. E.  J.  Pratt  (1882 1964)
+', to_tsquery('sea&thousand&years'));
+ 
+select headline('Erosion It took the sea a thousand years,
+A thousand years to trace
+The granite features of this cliff
+In crag and scarp and base.
+It took the sea an hour one night
+An hour of storm to place
+The sculpture of these granite seams,
+Upon a woman s face. E.  J.  Pratt  (1882 1964)
+', to_tsquery('granite&sea'));
+ 
+select headline('Erosion It took the sea a thousand years,
+A thousand years to trace
+The granite features of this cliff
+In crag and scarp and base.
+It took the sea an hour one night
+An hour of storm to place
+The sculpture of these granite seams,
+Upon a woman s face. E.  J.  Pratt  (1882 1964)
+', to_tsquery('sea'));
+


diff --git a/contrib/tsearch2/stopword.c b/contrib/tsearch2/stopword.c

new file mode 100644 (file)

index 0000000..7f7806f


--- /dev/null
+++ b/contrib/tsearch2/stopword.c
@@ -0,0 +1,101 @@
+/* 
+ * stopword library
+ * Teodor Sigaev 
+ */
+#include 
+#include 
+#include 
+#include 
+
+#include "postgres.h"
+#include "common.h"
+#include "dict.h"
+
+#define STOPBUFLEN 4096
+
+char*
+lowerstr(char *str) {
+   char *ptr=str;
+   while(*ptr) {
+       *ptr = tolower(*(unsigned char*)ptr);
+       ptr++;
+   }
+   return str;
+}
+
+void
+freestoplist(StopList *s) {
+   char **ptr=s->stop;
+   if ( ptr )
+       while( *ptr && s->len >0 ) {
+           free(*ptr);
+           ptr++; s->len--;
+       free(s->stop);
+   }
+   memset(s,0,sizeof(StopList));
+}
+
+void
+readstoplist(text *in, StopList *s) {
+   char **stop=NULL;
+   s->len=0;
+   if ( in && VARSIZE(in) - VARHDRSZ > 0 ) {
+       char *filename=text2char(in);
+       FILE    *hin=NULL;
+       char    buf[STOPBUFLEN];
+       int reallen=0;
+
+       if ( (hin=fopen(filename,"r")) == NULL )
+           elog(ERROR,"Can't open file '%s': %s", filename, strerror(errno));
+       while( fgets(buf,STOPBUFLEN,hin) ) {
+           buf[strlen(buf)-1] = '\0';
+           if ( *buf=='\0' ) continue;
+
+           if ( s->len>= reallen ) {
+               char **tmp;
+               reallen=(reallen) ? reallen*2 : 16;
+               tmp=(char**)realloc((void*)stop, sizeof(char*)*reallen);
+               if (!tmp) {
+                   freestoplist(s);
+                   fclose(hin); 
+                   elog(ERROR,"Not enough memory");
+               }
+               stop=tmp;
+           }
+    
+           stop[s->len]=strdup(buf);
+           if ( !stop[s->len] ) {
+               freestoplist(s);
+               fclose(hin); 
+               elog(ERROR,"Not enough memory");
+           }
+           if ( s->wordop ) 
+               stop[s->len]=(s->wordop)(stop[s->len]);
+
+           (s->len)++; 
+       }
+       fclose(hin);
+       pfree(filename); 
+   }
+   s->stop=stop;
+} 
+
+static int
+comparestr(const void *a, const void *b) {
+   return strcmp( *(char**)a, *(char**)b );
+}
+
+void
+sortstoplist(StopList *s) {
+   if (s->stop && s->len>0)
+       qsort(s->stop, s->len, sizeof(char*), comparestr);
+}
+
+bool
+searchstoplist(StopList *s, char *key) {
+   if ( s->wordop ) 
+       key=(*(s->wordop))(key);
+   return ( s->stop && s->len>0 && bsearch(&key, s->stop, s->len, sizeof(char*), comparestr) ) ? true : false;
+}
+
+


diff --git a/contrib/tsearch2/stopword/english.stop b/contrib/tsearch2/stopword/english.stop

new file mode 100644 (file)

index 0000000..a913011


--- /dev/null
+++ b/contrib/tsearch2/stopword/english.stop
@@ -0,0 +1,128 @@
+i
+me
+my
+myself
+we
+our
+ours
+ourselves
+you
+your
+yours
+yourself
+yourselves
+he
+him
+his
+himself
+she
+her
+hers
+herself
+it
+its
+itself
+they
+them
+their
+theirs
+themselves
+what
+which
+who
+whom
+this
+that
+these
+those
+am
+is
+are
+was
+were
+be
+been
+being
+have
+has
+had
+having
+do
+does
+did
+doing
+a
+an
+the
+and
+but
+if
+or
+because
+as
+until
+while
+of
+at
+by
+for
+with
+about
+against
+between
+into
+through
+during
+before
+after
+above
+below
+to
+from
+up
+down
+in
+out
+on
+off
+over
+under
+again
+further
+then
+once
+here
+there
+when
+where
+why
+how
+all
+any
+both
+each
+few
+more
+most
+other
+some
+such
+no
+nor
+not
+only
+own
+same
+so
+than
+too
+very
+s
+t
+can
+will
+just
+don
+should
+now
+


diff --git a/contrib/tsearch2/stopword/russian.stop b/contrib/tsearch2/stopword/russian.stop

new file mode 100644 (file)

index 0000000..1877e3a


--- /dev/null
+++ b/contrib/tsearch2/stopword/russian.stop
@@ -0,0 +1,151 @@
+É
+×
+×Ï
+ÎÅ
+ÞÔÏ
+ÏÎ
+ÎÁ
+Ñ
+Ó
+ÓÏ
+ËÁË
+Á
+ÔÏ
+×ÓÅ
+ÏÎÁ
+ÔÁË
+ÅÇÏ
+ÎÏ
+ÄÁ
+ÔÙ
+Ë
+Õ
+ÖÅ
+×Ù
+ÚÁ
+ÂÙ
+ÐÏ
+ÔÏÌØËÏ
+ÅÅ
+ÍÎÅ
+ÂÙÌÏ
+×ÏÔ
+ÏÔ
+ÍÅÎÑ
+ÅÝÅ
+ÎÅÔ
+Ï
+ÉÚ
+ÅÍÕ
+ÔÅÐÅÒØ
+ËÏÇÄÁ
+ÄÁÖÅ
+ÎÕ
+×ÄÒÕÇ
+ÌÉ
+ÅÓÌÉ
+ÕÖÅ
+ÉÌÉ
+ÎÉ
+ÂÙÔØ
+ÂÙÌ
+ÎÅÇÏ
+ÄÏ
+×ÁÓ
+ÎÉÂÕÄØ
+ÏÐÑÔØ
+ÕÖ
+×ÁÍ
+×ÅÄØ
+ÔÁÍ
+ÐÏÔÏÍ
+ÓÅÂÑ
+ÎÉÞÅÇÏ
+ÅÊ
+ÍÏÖÅÔ
+ÏÎÉ
+ÔÕÔ
+ÇÄÅ
+ÅÓÔØ
+ÎÁÄÏ
+ÎÅÊ
+ÄÌÑ
+ÍÙ
+ÔÅÂÑ
+ÉÈ
+ÞÅÍ
+ÂÙÌÁ
+ÓÁÍ
+ÞÔÏÂ
+ÂÅÚ
+ÂÕÄÔÏ
+ÞÅÇÏ
+ÒÁÚ
+ÔÏÖÅ
+ÓÅÂÅ
+ÐÏÄ
+ÂÕÄÅÔ
+Ö
+ÔÏÇÄÁ
+ËÔÏ
+ÜÔÏÔ
+ÔÏÇÏ
+ÐÏÔÏÍÕ
+ÜÔÏÇÏ
+ËÁËÏÊ
+ÓÏ×ÓÅÍ
+ÎÉÍ
+ÚÄÅÓØ
+ÜÔÏÍ
+ÏÄÉÎ
+ÐÏÞÔÉ
+ÍÏÊ
+ÔÅÍ
+ÞÔÏÂÙ
+ÎÅÅ
+ÓÅÊÞÁÓ
+ÂÙÌÉ
+ËÕÄÁ
+ÚÁÞÅÍ
+×ÓÅÈ
+ÎÉËÏÇÄÁ
+ÍÏÖÎÏ
+ÐÒÉ
+ÎÁËÏÎÅÃ
+Ä×Á
+ÏÂ
+ÄÒÕÇÏÊ
+ÈÏÔØ
+ÐÏÓÌÅ
+ÎÁÄ
+ÂÏÌØÛÅ
+ÔÏÔ
+ÞÅÒÅÚ
+ÜÔÉ
+ÎÁÓ
+ÐÒÏ
+×ÓÅÇÏ
+ÎÉÈ
+ËÁËÁÑ
+ÍÎÏÇÏ
+ÒÁÚ×Å
+ÔÒÉ
+ÜÔÕ
+ÍÏÑ
+×ÐÒÏÞÅÍ
+ÈÏÒÏÛÏ
+Ó×ÏÀ
+ÜÔÏÊ
+ÐÅÒÅÄ
+ÉÎÏÇÄÁ
+ÌÕÞÛÅ
+ÞÕÔØ
+ÔÏÍ
+ÎÅÌØÚÑ
+ÔÁËÏÊ
+ÉÍ
+ÂÏÌÅÅ
+×ÓÅÇÄÁ
+ËÏÎÅÞÎÏ
+×ÓÀ
+ÍÅÖÄÕ


diff --git a/contrib/tsearch2/ts_cfg.c b/contrib/tsearch2/ts_cfg.c

new file mode 100644 (file)

index 0000000..7c9f20c


--- /dev/null
+++ b/contrib/tsearch2/ts_cfg.c
@@ -0,0 +1,509 @@
+/* 
+ * interface functions to tscfg 
+ * Teodor Sigaev 
+ */
+#include 
+#include 
+#include 
+#include 
+#include 
+
+#include "postgres.h"
+#include "fmgr.h"
+#include "utils/array.h"
+#include "catalog/pg_type.h"
+#include "executor/spi.h"
+
+#include "ts_cfg.h"
+#include "dict.h"
+#include "wparser.h"
+#include "snmap.h"
+#include "common.h"
+#include "tsvector.h"
+
+/*********top interface**********/
+
+static void *plan_getcfg_bylocale=NULL;
+static void *plan_getcfg=NULL;
+static void *plan_getmap=NULL;
+static void *plan_name2id=NULL;
+static Oid current_cfg_id=0;
+
+void
+init_cfg(Oid id, TSCfgInfo *cfg) {
+   Oid arg[2]={ OIDOID, OIDOID };
+   bool isnull;
+   Datum pars[2]={ ObjectIdGetDatum(id), ObjectIdGetDatum(id) } ;
+   int stat,i,j;
+   text *ptr;
+   text *prsname=NULL;
+   MemoryContext   oldcontext;
+
+   memset(cfg,0,sizeof(TSCfgInfo));
+   SPI_connect();
+   if ( !plan_getcfg ) {
+       plan_getcfg = SPI_saveplan( SPI_prepare( "select prs_name from pg_ts_cfg where oid = $1" , 1, arg ) );
+       if ( !plan_getcfg ) 
+           ts_error(ERROR, "SPI_prepare() failed");
+   }
+
+   stat = SPI_execp(plan_getcfg, pars, " ", 1);
+   if ( stat < 0 )
+       ts_error (ERROR, "SPI_execp return %d", stat);
+   if ( SPI_processed > 0 ) {
+       prsname = (text*) DatumGetPointer( 
+           SPI_getbinval(SPI_tuptable->vals[0], SPI_tuptable->tupdesc, 1, &isnull) 
+       );
+       oldcontext = MemoryContextSwitchTo(TopMemoryContext);
+       prsname = ptextdup( prsname );
+       MemoryContextSwitchTo(oldcontext);
+       
+       cfg->id=id;
+   } else 
+       ts_error(ERROR, "No tsearch cfg with id %d", id);
+
+   arg[0]=TEXTOID;
+   if ( !plan_getmap ) {
+       plan_getmap = SPI_saveplan( SPI_prepare( "select lt.tokid, pg_ts_cfgmap.dict_name from pg_ts_cfgmap, pg_ts_cfg, token_type( $1 ) as lt where lt.alias = pg_ts_cfgmap.tok_alias and pg_ts_cfgmap.ts_name = pg_ts_cfg.ts_name and pg_ts_cfg.oid= $2 order by lt.tokid desc;" , 2, arg ) );
+       if ( !plan_getmap )
+           ts_error(ERROR, "SPI_prepare() failed");
+   }
+
+   pars[0]=PointerGetDatum( prsname );
+   stat = SPI_execp(plan_getmap, pars, " ", 0);
+   if ( stat < 0 )
+       ts_error (ERROR, "SPI_execp return %d", stat);
+   if ( SPI_processed <= 0 )
+       ts_error(ERROR, "No parser with id %d", id);
+
+   for(i=0;i
+       int lexid = DatumGetInt32(SPI_getbinval(SPI_tuptable->vals[i], SPI_tuptable->tupdesc, 1, &isnull));
+       ArrayType *toasted_a = (ArrayType*)PointerGetDatum(SPI_getbinval(SPI_tuptable->vals[i], SPI_tuptable->tupdesc, 2, &isnull));
+       ArrayType *a;
+
+       if ( !cfg->map ) {
+           cfg->len=lexid+1;
+           cfg->map = (ListDictionary*)malloc( sizeof(ListDictionary)*cfg->len );
+           if ( !cfg->map )
+               ts_error(ERROR,"No memory");
+           memset( cfg->map, 0, sizeof(ListDictionary)*cfg->len );
+       }
+
+       if (isnull)
+           continue;
+
+       a=(ArrayType*)PointerGetDatum( PG_DETOAST_DATUM( DatumGetPointer(toasted_a) ) );
+       
+       if ( ARR_NDIM(a) != 1 )
+           ts_error(ERROR,"Wrong dimension");
+       if ( ARRNELEMS(a) < 1 )
+           continue;
+
+       cfg->map[lexid].len=ARRNELEMS(a);
+       cfg->map[lexid].dict_id=(Datum*)malloc( sizeof(Datum)*cfg->map[lexid].len );
+       memset(cfg->map[lexid].dict_id,0,sizeof(Datum)*cfg->map[lexid].len );
+       ptr=(text*)ARR_DATA_PTR(a);
+       oldcontext = MemoryContextSwitchTo(TopMemoryContext);
+       for(j=0;jmap[lexid].len;j++) {
+           cfg->map[lexid].dict_id[j] = PointerGetDatum(ptextdup(ptr));
+           ptr=NEXTVAL(ptr);
+       } 
+       MemoryContextSwitchTo(oldcontext);
+
+       if ( a != toasted_a ) 
+           pfree(a);
+   }
+   
+   SPI_finish();
+   cfg->prs_id = name2id_prs( prsname );
+   pfree(prsname);
+   for(i=0;ilen;i++) {
+       for(j=0;jmap[i].len;j++) {
+           ptr = (text*)DatumGetPointer( cfg->map[i].dict_id[j] );
+           cfg->map[i].dict_id[j] = ObjectIdGetDatum( name2id_dict(ptr) );
+           pfree(ptr);
+       }
+   }
+}
+
+typedef struct {
+   TSCfgInfo   *last_cfg;
+   int     len;
+   int     reallen;
+   TSCfgInfo   *list;
+   SNMap       name2id_map;
+} CFGList;
+
+static CFGList CList = {NULL,0,0,NULL,{0,0,NULL}};
+
+void
+reset_cfg(void) {
+        freeSNMap( &(CList.name2id_map) );
+        if ( CList.list ) {
+       int i,j;
+       for(i=0;i
+           if ( CList.list[i].map ) {
+               for(j=0;j
+                   if ( CList.list[i].map[j].dict_id )
+                       free(CList.list[i].map[j].dict_id);
+               free( CList.list[i].map );
+           }
+                free(CList.list);
+   }
+        memset(&CList,0,sizeof(CFGList));
+}
+
+static int
+comparecfg(const void *a, const void *b) {
+   return ((TSCfgInfo*)a)->id - ((TSCfgInfo*)b)->id;
+}
+
+TSCfgInfo *
+findcfg(Oid id) {
+   /* last used cfg */
+   if ( CList.last_cfg && CList.last_cfg->id==id )
+       return CList.last_cfg;
+
+   /* already used cfg */
+   if ( CList.len != 0 ) {
+       TSCfgInfo key;
+       key.id=id;
+       CList.last_cfg = bsearch(&key, CList.list, CList.len, sizeof(TSCfgInfo), comparecfg);
+       if ( CList.last_cfg != NULL )
+           return CList.last_cfg;
+   }
+
+   /* last chance */
+   if ( CList.len==CList.reallen ) {
+       TSCfgInfo *tmp;
+       int reallen = ( CList.reallen ) ? 2*CList.reallen : 16;
+       tmp=(TSCfgInfo*)realloc(CList.list,sizeof(TSCfgInfo)*reallen);
+       if ( !tmp ) 
+           ts_error(ERROR,"No memory");
+       CList.reallen=reallen;
+       CList.list=tmp;
+   }
+   CList.last_cfg=&(CList.list[CList.len]);
+   init_cfg(id, CList.last_cfg);
+   CList.len++;
+   qsort(CList.list, CList.len, sizeof(TSCfgInfo), comparecfg);
+   return findcfg(id); /* qsort changed order!! */;
+}
+
+
+Oid
+name2id_cfg(text *name) {
+   Oid arg[1]={ TEXTOID };
+   bool isnull;
+   Datum pars[1]={ PointerGetDatum(name) };
+   int stat;
+   Oid id=findSNMap_t( &(CList.name2id_map), name );
+   
+   if ( id ) 
+       return id;
+   
+   SPI_connect();
+   if ( !plan_name2id ) {
+       plan_name2id = SPI_saveplan( SPI_prepare( "select oid from pg_ts_cfg where ts_name = $1" , 1, arg ) );
+       if ( !plan_name2id ) 
+           elog(ERROR, "SPI_prepare() failed");
+   }
+
+   stat = SPI_execp(plan_name2id, pars, " ", 1);
+   if ( stat < 0 )
+       elog (ERROR, "SPI_execp return %d", stat);
+   if ( SPI_processed > 0 ) {
+       id=DatumGetObjectId( SPI_getbinval(SPI_tuptable->vals[0], SPI_tuptable->tupdesc, 1, &isnull) );
+       if ( isnull ) 
+           elog(ERROR, "Null id for tsearch config");
+   } else 
+       elog(ERROR, "No tsearch config");
+   SPI_finish();
+   addSNMap_t( &(CList.name2id_map), name, id );
+   return id;
+}
+
+
+void 
+parsetext_v2(TSCfgInfo *cfg, PRSTEXT * prs, char *buf, int4 buflen) {
+   int type, lenlemm, i;
+   char    *lemm=NULL;
+   WParserInfo *prsobj = findprs(cfg->prs_id);
+
+   prsobj->prs=(void*)DatumGetPointer(
+       FunctionCall2(
+           &(prsobj->start_info),
+           PointerGetDatum(buf),
+           Int32GetDatum(buflen)
+       )
+   );
+
+   while( ( type=DatumGetInt32(FunctionCall3(
+           &(prsobj->getlexeme_info),
+           PointerGetDatum(prsobj->prs),
+           PointerGetDatum(&lemm),
+           PointerGetDatum(&lenlemm))) ) != 0 ) {
+
+       if ( lenlemm >= MAXSTRLEN )
+           elog(ERROR, "Word is too long");
+
+
+       if ( type >= cfg->len ) /* skip this type of lexem */
+           continue; 
+
+       for(i=0;imap[type].len;i++) {
+           DictInfo    *dict=finddict( DatumGetObjectId(cfg->map[type].dict_id[i]) );
+           char    **norms, **ptr;
+   
+           norms = ptr = (char**)DatumGetPointer(
+               FunctionCall3(
+                   &(dict->lexize_info),
+                   PointerGetDatum(dict->dictionary),
+                   PointerGetDatum(lemm),
+                   PointerGetDatum(lenlemm)
+               )
+           );
+           if ( !norms ) /* dictionary doesn't know this lexem */
+               continue;
+
+           prs->pos++; /*set pos*/
+
+           while( *ptr ) {
+               if (prs->curwords == prs->lenwords) {
+                   prs->lenwords *= 2;
+                   prs->words = (WORD *) repalloc((void *) prs->words, prs->lenwords * sizeof(WORD));
+               }
+
+               prs->words[prs->curwords].len = strlen(*ptr);
+               prs->words[prs->curwords].word = *ptr;
+               prs->words[prs->curwords].alen = 0;
+               prs->words[prs->curwords].pos.pos = LIMITPOS(prs->pos);
+               ptr++;
+               prs->curwords++;
+           }
+           pfree(norms);
+           break; /* lexem already normalized or is stop word*/
+       }
+   }
+
+   FunctionCall1(
+       &(prsobj->end_info),
+       PointerGetDatum(prsobj->prs)
+   );
+}
+
+static void
+hladdword(HLPRSTEXT * prs, char *buf, int4 buflen, int type) {
+   while (prs->curwords >= prs->lenwords) {
+       prs->lenwords *= 2;
+       prs->words = (HLWORD *) repalloc((void *) prs->words, prs->lenwords * sizeof(HLWORD));
+   }
+   memset( &(prs->words[prs->curwords]), 0, sizeof(HLWORD) ); 
+   prs->words[prs->curwords].type = (uint8)type;
+   prs->words[prs->curwords].len = buflen; 
+   prs->words[prs->curwords].word = palloc(buflen);
+   memcpy(prs->words[prs->curwords].word, buf, buflen);
+   prs->curwords++;    
+}
+
+static void
+hlfinditem(HLPRSTEXT * prs, QUERYTYPE *query, char *buf, int buflen ) {
+   int i;
+   ITEM    *item=GETQUERY(query);
+   HLWORD  *word=&( prs->words[prs->curwords-1] );
+
+   while (prs->curwords + query->size >= prs->lenwords) {
+       prs->lenwords *= 2;
+       prs->words = (HLWORD *) repalloc((void *) prs->words, prs->lenwords * sizeof(HLWORD));
+   }
+
+   for(i=0; isize; i++) { 
+       if ( item->type == VAL && item->length == buflen && strncmp( GETOPERAND(query) + item->distance, buf, buflen )==0 ) {
+           if ( word->item ) {
+               memcpy( &(prs->words[prs->curwords]), word, sizeof(HLWORD) );
+               prs->words[prs->curwords].item=item;
+               prs->words[prs->curwords].repeated=1;
+               prs->curwords++;
+           } else 
+               word->item=item;    
+       }
+       item++;
+   }
+}
+
+void 
+hlparsetext(TSCfgInfo *cfg, HLPRSTEXT * prs, QUERYTYPE *query, char *buf, int4 buflen) {
+   int type, lenlemm, i;
+   char    *lemm=NULL;
+   WParserInfo *prsobj = findprs(cfg->prs_id);
+
+   prsobj->prs=(void*)DatumGetPointer(
+       FunctionCall2(
+           &(prsobj->start_info),
+           PointerGetDatum(buf),
+           Int32GetDatum(buflen)
+       )
+   );
+
+   while( ( type=DatumGetInt32(FunctionCall3(
+           &(prsobj->getlexeme_info),
+           PointerGetDatum(prsobj->prs),
+           PointerGetDatum(&lemm),
+           PointerGetDatum(&lenlemm))) ) != 0 ) {
+
+       if ( lenlemm >= MAXSTRLEN )
+           elog(ERROR, "Word is too long");
+
+       hladdword(prs,lemm,lenlemm,type);
+
+       if ( type >= cfg->len ) 
+           continue; 
+
+       for(i=0;imap[type].len;i++) {
+           DictInfo    *dict=finddict( DatumGetObjectId(cfg->map[type].dict_id[i]) );
+           char    **norms, **ptr;
+   
+           norms = ptr = (char**)DatumGetPointer(
+               FunctionCall3(
+                   &(dict->lexize_info),
+                   PointerGetDatum(dict->dictionary),
+                   PointerGetDatum(lemm),
+                   PointerGetDatum(lenlemm)
+               )
+           );
+           if ( !norms ) /* dictionary doesn't know this lexem */
+               continue;
+
+           while( *ptr ) {
+               hlfinditem(prs,query,*ptr,strlen(*ptr));
+               pfree(*ptr);
+               ptr++;
+           }
+           pfree(norms);
+           break; /* lexem already normalized or is stop word*/
+       }
+   }
+
+   FunctionCall1(
+       &(prsobj->end_info),
+       PointerGetDatum(prsobj->prs)
+   );
+}
+
+text* 
+genhl(HLPRSTEXT * prs) {
+   text *out;
+   int len=128;
+   char *ptr;
+   HLWORD  *wrd=prs->words;
+
+   out = (text*)palloc( len );
+   ptr=((char*)out) + VARHDRSZ;
+
+   while( wrd - prs->words < prs->curwords ) {
+       while (  wrd->len + prs->stopsellen + prs->startsellen + (ptr - ((char*)out)) >= len ) {
+           int dist = ptr - ((char*)out);
+           len*= 2;
+           out = (text *) repalloc(out, len);
+           ptr=((char*)out) + dist;
+       }
+
+       if ( wrd->in && !wrd->skip && !wrd->repeated ) {
+           if ( wrd->replace ) {
+               *ptr=' ';
+               ptr++;
+           } else {
+               if (wrd->selected) {
+                   memcpy(ptr,prs->startsel,prs->startsellen);
+                   ptr+=prs->startsellen;
+               }
+               memcpy(ptr,wrd->word,wrd->len);
+               ptr+=wrd->len;
+               if (wrd->selected) {
+                   memcpy(ptr,prs->stopsel,prs->stopsellen);
+                   ptr+=prs->stopsellen;
+               }
+           }
+       }
+
+       if ( !wrd->repeated )
+           pfree(wrd->word);
+
+       wrd++;
+   }
+
+   VARATT_SIZEP(out)=ptr - ((char*)out);
+   return out; 
+}
+
+int  
+get_currcfg(void) {
+   Oid arg[1]={ TEXTOID };
+   const char *curlocale;
+   Datum pars[1];
+   bool isnull;
+   int stat;
+
+   if ( current_cfg_id > 0 )
+       return current_cfg_id;
+
+   SPI_connect();
+   if ( !plan_getcfg_bylocale ) {
+       plan_getcfg_bylocale=SPI_saveplan( SPI_prepare( "select oid from pg_ts_cfg where locale = $1 ", 1, arg ) );
+       if ( !plan_getcfg_bylocale )
+           elog(ERROR, "SPI_prepare() failed");
+   }
+
+   curlocale = setlocale(LC_CTYPE, NULL);
+   pars[0] = PointerGetDatum( char2text((char*)curlocale) );
+   stat = SPI_execp(plan_getcfg_bylocale, pars, " ", 1);
+
+   if ( stat < 0 )
+       elog (ERROR, "SPI_execp return %d", stat);
+   if ( SPI_processed > 0 )
+       current_cfg_id = DatumGetObjectId( SPI_getbinval(SPI_tuptable->vals[0], SPI_tuptable->tupdesc, 1, &isnull) );
+   else 
+       elog(ERROR,"Can't find tsearch config by locale");
+
+   pfree(DatumGetPointer(pars[0]));
+   SPI_finish();
+   return current_cfg_id;
+}
+
+PG_FUNCTION_INFO_V1(set_curcfg);
+Datum set_curcfg(PG_FUNCTION_ARGS);
+Datum
+set_curcfg(PG_FUNCTION_ARGS) {
+        findcfg(PG_GETARG_OID(0));
+        current_cfg_id=PG_GETARG_OID(0);
+        PG_RETURN_VOID();
+}
+                
+PG_FUNCTION_INFO_V1(set_curcfg_byname);
+Datum set_curcfg_byname(PG_FUNCTION_ARGS);
+Datum
+set_curcfg_byname(PG_FUNCTION_ARGS) {
+        text *name=PG_GETARG_TEXT_P(0);
+   
+        DirectFunctionCall1(
+                set_curcfg,
+                ObjectIdGetDatum( name2id_cfg(name) )
+        );
+        PG_FREE_IF_COPY(name, 0);
+        PG_RETURN_VOID();      
+}       
+
+PG_FUNCTION_INFO_V1(show_curcfg);
+Datum show_curcfg(PG_FUNCTION_ARGS);
+Datum
+show_curcfg(PG_FUNCTION_ARGS) {
+   PG_RETURN_OID( get_currcfg() ); 
+}
+
+PG_FUNCTION_INFO_V1(reset_tsearch);
+Datum reset_tsearch(PG_FUNCTION_ARGS);
+Datum
+reset_tsearch(PG_FUNCTION_ARGS) {
+   ts_error(NOTICE,"TSearch cache cleaned");
+   PG_RETURN_VOID(); 
+}


diff --git a/contrib/tsearch2/ts_cfg.h b/contrib/tsearch2/ts_cfg.h

new file mode 100644 (file)

index 0000000..01006c1


--- /dev/null
+++ b/contrib/tsearch2/ts_cfg.h
@@ -0,0 +1,68 @@
+#ifndef __TS_CFG_H__
+#define __TS_CFG_H__
+#include "postgres.h"
+#include "query.h"
+
+typedef struct {
+   int len;
+   Datum   *dict_id;
+} ListDictionary;
+
+typedef struct {
+   Oid id;
+   Oid prs_id;
+   int len;
+   ListDictionary  *map;   
+}  TSCfgInfo;
+
+Oid name2id_cfg(text *name);
+TSCfgInfo * findcfg(Oid id);
+void init_cfg(Oid id, TSCfgInfo *cfg);
+void reset_cfg(void);
+
+typedef struct {
+        uint16          len;
+   union {
+       uint16      pos;
+       uint16      *apos;
+   } pos;
+        char       *word;
+   uint32  alen;
+}       WORD;
+   
+typedef struct {
+        WORD       *words;
+        int4            lenwords;
+        int4            curwords;
+   int4        pos;
+}       PRSTEXT;
+
+typedef struct {
+        uint16    len;
+   uint8    selected:1,
+         in:1,
+         skip:1,
+         replace:1,
+         repeated:1;
+   uint8   type;
+        char      *word;
+   ITEM      *item;
+}       HLWORD;
+   
+typedef struct {
+        HLWORD       *words;
+        int4            lenwords;
+        int4            curwords;
+        char           *startsel;
+        char            *stopsel;
+        int2            startsellen;
+        int2            stopsellen;
+}       HLPRSTEXT;
+
+void hlparsetext(TSCfgInfo *cfg, HLPRSTEXT * prs, QUERYTYPE *query, char *buf, int4 buflen);
+text* genhl(HLPRSTEXT * prs);
+
+void parsetext_v2(TSCfgInfo *cfg, PRSTEXT * prs, char *buf, int4 buflen);
+int  get_currcfg(void);
+
+#endif


diff --git a/contrib/tsearch2/ts_stat.c b/contrib/tsearch2/ts_stat.c

new file mode 100644 (file)

index 0000000..9099981


--- /dev/null
+++ b/contrib/tsearch2/ts_stat.c
@@ -0,0 +1,412 @@
+/*
+ * stat functions
+ */
+
+#include "tsvector.h"
+#include "ts_stat.h"
+#include "funcapi.h"
+#include "catalog/pg_type.h"
+#include "executor/spi.h"
+#include "common.h"
+
+PG_FUNCTION_INFO_V1(tsstat_in);
+Datum           tsstat_in(PG_FUNCTION_ARGS);
+Datum           
+tsstat_in(PG_FUNCTION_ARGS) {
+   tsstat *stat=palloc(STATHDRSIZE);
+   stat->len=STATHDRSIZE;
+   stat->size=0;
+   PG_RETURN_POINTER(stat);
+}
+
+PG_FUNCTION_INFO_V1(tsstat_out);
+Datum           tsstat_out(PG_FUNCTION_ARGS);
+Datum           
+tsstat_out(PG_FUNCTION_ARGS) {
+   elog(ERROR,"Unimplemented");
+   PG_RETURN_NULL();
+}
+
+static WordEntry**
+SEI_realloc( WordEntry** in, uint32 *len ) {
+   if ( *len==0 || in==NULL ) {
+       *len=8;
+       in=palloc( sizeof(WordEntry*)* (*len) );
+   } else {
+       *len *= 2;
+       in=repalloc( in, sizeof(WordEntry*)* (*len) );
+   }
+   return in;
+}
+
+static int
+compareStatWord(StatEntry *a, WordEntry *b, tsstat *stat, tsvector *txt) {
+   if ( a->len == b->len ) 
+       return strncmp(
+           STATSTRPTR(stat) + a->pos,
+           STRPTR(txt) + b->pos,
+           a->len
+       );
+   return ( a->len > b->len ) ? 1 : -1;
+}
+
+static tsstat*
+formstat(tsstat *stat, tsvector *txt, WordEntry** entry, uint32 len) {
+   tsstat  *newstat;
+   uint32 totallen, nentry;
+   uint32  slen=0;
+   WordEntry   **ptr=entry;
+   char    *curptr;
+   StatEntry   *sptr,*nptr;
+
+   while(ptr-entry
+       slen += (*ptr)->len;
+       ptr++;
+   }
+
+   nentry=stat->size + len;
+   slen+=STATSTRSIZE(stat);
+   totallen=CALCSTATSIZE(nentry,slen);
+   newstat=palloc(totallen);
+   newstat->len=totallen;
+   newstat->size=nentry;
+
+   memcpy(STATSTRPTR(newstat), STATSTRPTR(stat), STATSTRSIZE(stat));
+   curptr=STATSTRPTR(newstat) + STATSTRSIZE(stat);
+
+   ptr=entry;
+   sptr=STATPTR(stat);
+   nptr=STATPTR(newstat);
+
+   if ( len == 1 ) {
+       StatEntry *StopLow = STATPTR(stat);
+       StatEntry *StopHigh = (StatEntry*)STATSTRPTR(stat);
+
+       while (StopLow < StopHigh) {
+           sptr=StopLow + (StopHigh - StopLow) / 2;
+           if ( compareStatWord(sptr,*ptr,stat,txt) < 0 )
+               StopLow = sptr + 1;
+           else
+               StopHigh = sptr; 
+       }
+       nptr =STATPTR(newstat) + (StopLow-STATPTR(stat));
+       memcpy( STATPTR(newstat), STATPTR(stat), sizeof(StatEntry) * (StopLow-STATPTR(stat)) );
+       nptr->nentry=POSDATALEN(txt,*ptr);
+       if ( nptr->nentry==0 )
+               nptr->nentry=1; 
+       nptr->ndoc=1;
+       nptr->len=(*ptr)->len;
+       memcpy(curptr, STRPTR(txt) + (*ptr)->pos, nptr->len);
+       nptr->pos = curptr - STATSTRPTR(newstat);
+       memcpy( nptr+1, StopLow, sizeof(StatEntry) * ( ((StatEntry*)STATSTRPTR(stat))-StopLow ) );
+   } else {
+       while( sptr-STATPTR(stat) < stat->size && ptr-entry
+           if ( compareStatWord(sptr,*ptr,stat,txt) < 0 ) {
+               memcpy(nptr, sptr, sizeof(StatEntry));
+               sptr++;
+           } else {
+               nptr->nentry=POSDATALEN(txt,*ptr);
+               if ( nptr->nentry==0 )
+                   nptr->nentry=1; 
+               nptr->ndoc=1;
+               nptr->len=(*ptr)->len;
+               memcpy(curptr, STRPTR(txt) + (*ptr)->pos, nptr->len);
+               nptr->pos = curptr - STATSTRPTR(newstat);
+               curptr += nptr->len;
+               ptr++;
+           }
+           nptr++;
+       }
+
+       memcpy( nptr, sptr, sizeof(StatEntry)*( stat->size - (sptr-STATPTR(stat)) ) ); 
+       
+       while(ptr-entry
+           nptr->nentry=POSDATALEN(txt,*ptr);
+           if ( nptr->nentry==0 )
+               nptr->nentry=1; 
+           nptr->ndoc=1;
+           nptr->len=(*ptr)->len;
+           memcpy(curptr, STRPTR(txt) + (*ptr)->pos, nptr->len);
+           nptr->pos = curptr - STATSTRPTR(newstat);
+           curptr += nptr->len;
+           ptr++; nptr++;
+       }
+   }
+
+   return newstat;
+} 
+
+PG_FUNCTION_INFO_V1(ts_accum);
+Datum           ts_accum(PG_FUNCTION_ARGS);
+Datum 
+ts_accum(PG_FUNCTION_ARGS) {
+   tsstat *newstat,*stat= (tsstat*)PG_GETARG_POINTER(0);
+   tsvector  *txt = (tsvector *) PG_DETOAST_DATUM(PG_GETARG_DATUM(1));
+   WordEntry   **newentry=NULL;
+   uint32  len=0, cur=0;
+   StatEntry   *sptr;
+   WordEntry   *wptr;
+
+   if ( stat==NULL || PG_ARGISNULL(0) ) { /* Init in first */ 
+       stat=palloc(STATHDRSIZE);
+       stat->len=STATHDRSIZE;
+       stat->size=0;
+   }
+
+   /* simple check of correctness */
+   if ( txt==NULL || PG_ARGISNULL(1) || txt->size==0 ) {
+       PG_FREE_IF_COPY(txt,1); 
+       PG_RETURN_POINTER(stat);
+   }
+
+   sptr=STATPTR(stat);
+   wptr=ARRPTR(txt);
+
+   if ( stat->size < 100*txt->size ) { /* merge */
+       while( sptr-STATPTR(stat) < stat->size && wptr-ARRPTR(txt) < txt->size ) {
+           int cmp = compareStatWord(sptr,wptr,stat,txt);
+           if ( cmp<0 ) {
+               sptr++;
+           } else if ( cmp==0 ) {
+               int n=POSDATALEN(txt,wptr);
+   
+               if (n==0) n=1;
+               sptr->ndoc++;
+               sptr->nentry +=n ;
+               sptr++; wptr++;
+           } else {
+               if ( cur==len )
+                   newentry=SEI_realloc(newentry, &len);
+               newentry[cur]=wptr;
+               wptr++; cur++;
+           }
+       }
+
+       while( wptr-ARRPTR(txt) < txt->size ) {
+           if ( cur==len )
+               newentry=SEI_realloc(newentry, &len);
+           newentry[cur]=wptr;
+           wptr++; cur++;
+       }
+   } else { /* search */
+       while( wptr-ARRPTR(txt) < txt->size ) {
+           StatEntry *StopLow = STATPTR(stat);
+           StatEntry *StopHigh = (StatEntry*)STATSTRPTR(stat);
+           int cmp;
+
+           while (StopLow < StopHigh) {
+               sptr=StopLow + (StopHigh - StopLow) / 2;
+               cmp =  compareStatWord(sptr,wptr,stat,txt);
+               if (cmp==0) {
+                   int n=POSDATALEN(txt,wptr);
+                   if (n==0) n=1;
+                   sptr->ndoc++;
+                   sptr->nentry +=n ;
+                   break;
+               } else if ( cmp < 0 )
+                   StopLow = sptr + 1;
+               else
+                   StopHigh = sptr; 
+           }
+       
+           if ( StopLow >= StopHigh ) { /* not found */
+               if ( cur==len )
+                   newentry=SEI_realloc(newentry, &len);
+               newentry[cur]=wptr;
+               cur++;
+           }
+           wptr++;
+       }   
+   }
+
+   
+   if ( cur==0 ) { /* no new words */ 
+       PG_FREE_IF_COPY(txt,1);
+       PG_RETURN_POINTER(stat);
+   }
+
+   newstat = formstat(stat, txt, newentry, cur);
+   pfree(newentry);
+   PG_FREE_IF_COPY(txt,1);
+   /* pfree(stat); */
+
+   PG_RETURN_POINTER(newstat);
+}
+
+typedef struct {
+   uint32  cur;
+   tsvector *stat;
+} StatStorage;
+
+static void
+ts_setup_firstcall(FuncCallContext  *funcctx, tsstat *stat) {
+   TupleDesc            tupdesc;
+   MemoryContext     oldcontext;
+   StatStorage     *st;
+   
+   oldcontext = MemoryContextSwitchTo(funcctx->multi_call_memory_ctx);
+   st=palloc( sizeof(StatStorage) );
+   st->cur=0;
+   st->stat=palloc( stat->len );
+   memcpy(st->stat, stat, stat->len);
+   funcctx->user_fctx = (void*)st;
+   tupdesc = RelationNameGetTupleDesc("statinfo");
+   funcctx->slot = TupleDescGetSlot(tupdesc);
+   funcctx->attinmeta = TupleDescGetAttInMetadata(tupdesc);
+   MemoryContextSwitchTo(oldcontext);
+}
+
+
+static Datum
+ts_process_call(FuncCallContext  *funcctx) {
+   StatStorage     *st;
+   st=(StatStorage*)funcctx->user_fctx;
+
+   if ( st->cur < st->stat->size ) {
+       Datum result;
+       char* values[3];
+       char    ndoc[16];
+       char    nentry[16];
+       StatEntry *entry=STATPTR(st->stat) + st->cur;
+       HeapTuple    tuple;
+
+       values[1]=ndoc;
+       sprintf(ndoc,"%d",entry->ndoc);
+       values[2]=nentry;
+       sprintf(nentry,"%d",entry->nentry);
+       values[0]=palloc( entry->len+1 );
+       memcpy( values[0], STATSTRPTR(st->stat)+entry->pos, entry->len);
+       (values[0])[entry->len]='\0';
+
+       tuple = BuildTupleFromCStrings(funcctx->attinmeta, values);
+       result = TupleGetDatum(funcctx->slot, tuple);
+
+       pfree(values[0]);
+       st->cur++;
+       return result;  
+   } else {
+       pfree(st->stat);
+       pfree(st);
+   }
+   
+   return (Datum)0;
+}
+
+PG_FUNCTION_INFO_V1(ts_accum_finish);
+Datum           ts_accum_finish(PG_FUNCTION_ARGS);
+Datum 
+ts_accum_finish(PG_FUNCTION_ARGS) {
+   FuncCallContext  *funcctx;
+   Datum result;
+
+   if (SRF_IS_FIRSTCALL()) {
+       funcctx = SRF_FIRSTCALL_INIT();
+       ts_setup_firstcall(funcctx, (tsstat*)PG_GETARG_POINTER(0) );
+   }
+
+   funcctx = SRF_PERCALL_SETUP();
+   if (  (result=ts_process_call(funcctx)) != (Datum)0 )
+       SRF_RETURN_NEXT(funcctx, result);
+   SRF_RETURN_DONE(funcctx);
+}
+
+static Oid tiOid=InvalidOid;
+static void 
+get_ti_Oid(void) {
+   int ret;
+   bool isnull; 
+
+   if ( (ret = SPI_exec("select oid from pg_type where typname='tsvector'",1)) < 0 )   
+       elog(ERROR, "SPI_exec to get tsvector oid returns %d", ret);
+
+   if ( SPI_processed<0 )
+       elog(ERROR, "There is no tsvector type");
+   tiOid = DatumGetObjectId( SPI_getbinval(SPI_tuptable->vals[0], SPI_tuptable->tupdesc, 1, &isnull) );
+   if ( tiOid==InvalidOid )
+       elog(ERROR, "tsvector type has InvalidOid");
+}
+
+static tsstat*
+ts_stat_sql(text *txt) {
+   char *query=text2char(txt);
+   int i;
+   tsstat *newstat,*stat;
+   bool isnull;
+   Portal portal;
+   void    *plan;
+
+   if ( tiOid==InvalidOid ) 
+       get_ti_Oid();
+
+   if ( (plan = SPI_prepare(query,0,NULL))==NULL )
+       elog(ERROR, "SPI_prepare('%s') returns NULL",query);
+
+   if ( (portal = SPI_cursor_open(NULL, plan, NULL, NULL)) == NULL )
+       elog(ERROR, "SPI_cursor_open('%s') returns NULL",query);
+
+   SPI_cursor_fetch(portal, true, 100);
+
+   if ( SPI_tuptable->tupdesc->natts != 1 )
+       elog(ERROR, "Number of fields doesn't equal to 1");
+
+   if ( SPI_gettypeid(SPI_tuptable->tupdesc, 1) != tiOid )
+       elog(ERROR, "Column isn't of tsvector type");
+
+   stat=palloc(STATHDRSIZE);
+   stat->len=STATHDRSIZE;
+   stat->size=0;
+
+   while(SPI_processed>0) {
+       for(i=0;i
+           Datum data=SPI_getbinval(SPI_tuptable->vals[i], SPI_tuptable->tupdesc, 1, &isnull);
+
+           if ( !isnull ) {
+               newstat = (tsstat*)DatumGetPointer(DirectFunctionCall2(
+                   ts_accum,
+                   PointerGetDatum(stat),
+                   data
+               ));
+               if ( stat!=newstat && stat )
+                   pfree(stat);
+               stat=newstat;
+           }
+       } 
+
+       SPI_freetuptable(SPI_tuptable);
+       SPI_cursor_fetch(portal, true, 100);        
+   }   
+
+   SPI_freetuptable(SPI_tuptable);
+   SPI_cursor_close(portal);
+   SPI_freeplan(plan);
+   pfree(query);
+
+   return stat;    
+}
+
+PG_FUNCTION_INFO_V1(ts_stat);
+Datum           ts_stat(PG_FUNCTION_ARGS);
+Datum 
+ts_stat(PG_FUNCTION_ARGS) {
+   FuncCallContext  *funcctx;
+   Datum result;
+
+   if (SRF_IS_FIRSTCALL()) {
+       tsstat *stat;
+       text    *txt=PG_GETARG_TEXT_P(0);
+   
+       funcctx = SRF_FIRSTCALL_INIT();
+       SPI_connect();
+       stat = ts_stat_sql(txt);
+       PG_FREE_IF_COPY(txt,0); 
+       ts_setup_firstcall(funcctx, stat );
+       SPI_finish();
+   }
+
+   funcctx = SRF_PERCALL_SETUP();
+   if (  (result=ts_process_call(funcctx)) != (Datum)0 )
+       SRF_RETURN_NEXT(funcctx, result);
+   SRF_RETURN_DONE(funcctx);
+}
+
+


diff --git a/contrib/tsearch2/ts_stat.h b/contrib/tsearch2/ts_stat.h

new file mode 100644 (file)

index 0000000..c32b17a


--- /dev/null
+++ b/contrib/tsearch2/ts_stat.h
@@ -0,0 +1,32 @@
+#ifndef __TXTIDX_STAT_H__
+#define __TXTIDX_STAT_H__
+
+#include "postgres.h"
+
+#include "access/gist.h"
+#include "access/itup.h"
+#include "utils/elog.h"
+#include "utils/palloc.h"
+#include "utils/builtins.h"
+#include "storage/bufpage.h"
+
+typedef struct {
+   uint32  len;
+   uint32  pos;
+   uint32  ndoc;   
+   uint32  nentry; 
+}  StatEntry;
+
+typedef struct {
+   int4        len;
+   int4        size;
+   char        data[1];
+}  tsstat;
+
+#define STATHDRSIZE (sizeof(int4)*2)
+#define CALCSTATSIZE(x, lenstr) ( x * sizeof(StatEntry) + STATHDRSIZE + lenstr )
+#define STATPTR(x) ( (StatEntry*) ( (char*)x + STATHDRSIZE ) )
+#define STATSTRPTR(x)  ( (char*)x + STATHDRSIZE + ( sizeof(StatEntry) * ((tsvector*)x)->size ) )
+#define STATSTRSIZE(x) ( ((tsvector*)x)->len - STATHDRSIZE - ( sizeof(StatEntry) * ((tsvector*)x)->size ) )
+
+#endif


diff --git a/contrib/tsearch2/tsearch.sql._in b/contrib/tsearch2/tsearch.sql._in

new file mode 100644 (file)

index 0000000..91ffbc8


--- /dev/null
+++ b/contrib/tsearch2/tsearch.sql._in
@@ -0,0 +1,674 @@
+-- Adjust this setting to control where the objects get CREATEd.
+SET search_path = public;
+
+BEGIN;
+
+--dict conf
+CREATE TABLE pg_ts_dict (
+   dict_name   text not null primary key,
+   dict_init   oid,
+   dict_initoption text,
+   dict_lexize oid not null,
+   dict_comment    text
+) with oids;
+
+--dict interface
+CREATE FUNCTION lexize(oid, text) 
+   returns _text
+   as 'MODULE_PATHNAME'
+   language 'C'
+   with (isstrict);
+
+CREATE FUNCTION lexize(text, text)
+        returns _text
+        as 'MODULE_PATHNAME', 'lexize_byname'
+        language 'C'
+        with (isstrict);
+
+CREATE FUNCTION lexize(text)
+        returns _text
+        as 'MODULE_PATHNAME', 'lexize_bycurrent'
+        language 'C'
+        with (isstrict);
+
+CREATE FUNCTION set_curdict(int)
+   returns void
+   as 'MODULE_PATHNAME'
+   language 'C'
+   with (isstrict);
+
+CREATE FUNCTION set_curdict(text)
+   returns void
+   as 'MODULE_PATHNAME', 'set_curdict_byname'
+   language 'C'
+   with (isstrict);
+
+--built-in dictionaries
+CREATE FUNCTION dex_init(text)
+   returns internal
+   as 'MODULE_PATHNAME' 
+   language 'C';
+
+CREATE FUNCTION dex_lexize(internal,internal,int4)
+   returns internal
+   as 'MODULE_PATHNAME'
+   language 'C'
+   with (isstrict);
+
+insert into pg_ts_dict select 
+   'simple', 
+   (select oid from pg_proc where proname='dex_init'),
+   null,
+   (select oid from pg_proc where proname='dex_lexize'),
+   'Simple example of dictionary.'
+;
+    
+CREATE FUNCTION snb_en_init(text)
+   returns internal
+   as 'MODULE_PATHNAME' 
+   language 'C';
+
+CREATE FUNCTION snb_lexize(internal,internal,int4)
+   returns internal
+   as 'MODULE_PATHNAME'
+   language 'C'
+   with (isstrict);
+
+insert into pg_ts_dict select 
+   'en_stem', 
+   (select oid from pg_proc where proname='snb_en_init'),
+   'DATA_PATH/english.stop',
+   (select oid from pg_proc where proname='snb_lexize'),
+   'English Stemmer. Snowball.'
+;
+
+CREATE FUNCTION snb_ru_init(text)
+   returns internal
+   as 'MODULE_PATHNAME' 
+   language 'C';
+
+insert into pg_ts_dict select 
+   'ru_stem', 
+   (select oid from pg_proc where proname='snb_ru_init'),
+   'DATA_PATH/russian.stop',
+   (select oid from pg_proc where proname='snb_lexize'),
+   'Russian Stemmer. Snowball.'
+;
+    
+CREATE FUNCTION spell_init(text)
+   returns internal
+   as 'MODULE_PATHNAME' 
+   language 'C';
+
+CREATE FUNCTION spell_lexize(internal,internal,int4)
+   returns internal
+   as 'MODULE_PATHNAME'
+   language 'C'
+   with (isstrict);
+
+insert into pg_ts_dict select 
+   'ispell_template', 
+   (select oid from pg_proc where proname='spell_init'),
+   null,
+   (select oid from pg_proc where proname='spell_lexize'),
+   'ISpell interface. Must have .dict and .aff files'
+;
+
+CREATE FUNCTION syn_init(text)
+   returns internal
+   as 'MODULE_PATHNAME' 
+   language 'C';
+
+CREATE FUNCTION syn_lexize(internal,internal,int4)
+   returns internal
+   as 'MODULE_PATHNAME'
+   language 'C'
+   with (isstrict);
+
+insert into pg_ts_dict select 
+   'synonym', 
+   (select oid from pg_proc where proname='syn_init'),
+   null,
+   (select oid from pg_proc where proname='syn_lexize'),
+   'Example of synonym dictionary'
+;
+
+--dict conf
+CREATE TABLE pg_ts_parser (
+   prs_name    text not null primary key,
+   prs_start   oid not null,
+   prs_nexttoken   oid not null,
+   prs_end     oid not null,
+   prs_headline    oid not null,
+   prs_lextype oid not null,
+   prs_comment text
+) with oids;
+
+--sql-level interface
+CREATE TYPE tokentype 
+   as (tokid int4, alias text, descr text); 
+
+CREATE FUNCTION token_type(int4)
+   returns setof tokentype
+   as 'MODULE_PATHNAME'
+   language 'C'
+   with (isstrict);
+
+CREATE FUNCTION token_type(text)
+   returns setof tokentype
+   as 'MODULE_PATHNAME', 'token_type_byname'
+   language 'C'
+   with (isstrict);
+
+CREATE FUNCTION token_type()
+   returns setof tokentype
+   as 'MODULE_PATHNAME', 'token_type_current'
+   language 'C'
+   with (isstrict);
+
+CREATE FUNCTION set_curprs(int)
+   returns void
+   as 'MODULE_PATHNAME'
+   language 'C'
+   with (isstrict);
+
+CREATE FUNCTION set_curprs(text)
+   returns void
+   as 'MODULE_PATHNAME', 'set_curprs_byname'
+   language 'C'
+   with (isstrict);
+
+CREATE TYPE tokenout 
+   as (tokid int4, token text);
+
+CREATE FUNCTION parse(oid,text)
+   returns setof tokenout
+   as 'MODULE_PATHNAME'
+   language 'C'
+   with (isstrict);
+ 
+CREATE FUNCTION parse(text,text)
+   returns setof tokenout
+   as 'MODULE_PATHNAME', 'parse_byname'
+   language 'C'
+   with (isstrict);
+ 
+CREATE FUNCTION parse(text)
+   returns setof tokenout
+   as 'MODULE_PATHNAME', 'parse_current'
+   language 'C'
+   with (isstrict);
+ 
+--default parser
+CREATE FUNCTION prsd_start(internal,int4)
+   returns internal
+   as 'MODULE_PATHNAME'
+   language 'C';
+
+CREATE FUNCTION prsd_getlexeme(internal,internal,internal)
+   returns int4
+   as 'MODULE_PATHNAME'
+   language 'C';
+
+CREATE FUNCTION prsd_end(internal)
+   returns void
+   as 'MODULE_PATHNAME'
+   language 'C';
+
+CREATE FUNCTION prsd_lextype(internal)
+   returns internal
+   as 'MODULE_PATHNAME'
+   language 'C';
+
+CREATE FUNCTION prsd_headline(internal,internal,internal)
+   returns internal
+   as 'MODULE_PATHNAME'
+   language 'C';
+
+insert into pg_ts_parser select
+   'default',
+   (select oid from pg_proc where proname='prsd_start'),   
+   (select oid from pg_proc where proname='prsd_getlexeme'),   
+   (select oid from pg_proc where proname='prsd_end'), 
+   (select oid from pg_proc where proname='prsd_headline'),
+   (select oid from pg_proc where proname='prsd_lextype'),
+   'Parser from OpenFTS v0.34'
+;  
+
+--tsearch config
+
+CREATE TABLE pg_ts_cfg (
+   ts_name     text not null primary key,
+   prs_name    text not null,
+   locale      text
+) with oids;
+
+CREATE TABLE pg_ts_cfgmap (
+   ts_name     text not null,
+   tok_alias   text not null,
+   dict_name   text[],
+   primary key (ts_name,tok_alias)
+) with oids;
+
+CREATE FUNCTION set_curcfg(int)
+   returns void
+   as 'MODULE_PATHNAME'
+   language 'C'
+   with (isstrict);
+
+CREATE FUNCTION set_curcfg(text)
+   returns void
+   as 'MODULE_PATHNAME', 'set_curcfg_byname'
+   language 'C'
+   with (isstrict);
+
+CREATE FUNCTION show_curcfg()
+   returns oid
+   as 'MODULE_PATHNAME'
+   language 'C'
+   with (isstrict);
+
+insert into pg_ts_cfg values ('default', 'default','C');
+insert into pg_ts_cfg values ('default_russian', 'default','ru_RU.KOI8-R');
+insert into pg_ts_cfg values ('simple', 'default');
+
+copy pg_ts_cfgmap from stdin;
+default    lword   {en_stem}
+default    nlword  {simple}
+default    word    {simple}
+default    email   {simple}
+default    url {simple}
+default    host    {simple}
+default    sfloat  {simple}
+default    version {simple}
+default    part_hword  {simple}
+default    nlpart_hword    {simple}
+default    lpart_hword {en_stem}
+default    hword   {simple}
+default    lhword  {en_stem}
+default    nlhword {simple}
+default    uri {simple}
+default    file    {simple}
+default    float   {simple}
+default    int {simple}
+default    uint    {simple}
+default_russian    lword   {en_stem}
+default_russian    nlword  {ru_stem}
+default_russian    word    {ru_stem}
+default_russian    email   {simple}
+default_russian    url {simple}
+default_russian    host    {simple}
+default_russian    sfloat  {simple}
+default_russian    version {simple}
+default_russian    part_hword  {simple}
+default_russian    nlpart_hword    {ru_stem}
+default_russian    lpart_hword {en_stem}
+default_russian    hword   {ru_stem}
+default_russian    lhword  {en_stem}
+default_russian    nlhword {ru_stem}
+default_russian    uri {simple}
+default_russian    file    {simple}
+default_russian    float   {simple}
+default_russian    int {simple}
+default_russian    uint    {simple}
+simple lword   {simple}
+simple nlword  {simple}
+simple word    {simple}
+simple email   {simple}
+simple url {simple}
+simple host    {simple}
+simple sfloat  {simple}
+simple version {simple}
+simple part_hword  {simple}
+simple nlpart_hword    {simple}
+simple lpart_hword {simple}
+simple hword   {simple}
+simple lhword  {simple}
+simple nlhword {simple}
+simple uri {simple}
+simple file    {simple}
+simple float   {simple}
+simple int {simple}
+simple uint    {simple}
+\.
+
+--tsvector type
+CREATE FUNCTION tsvector_in(cstring)
+RETURNS tsvector
+AS 'MODULE_PATHNAME'
+LANGUAGE 'C' with (isstrict);
+
+CREATE FUNCTION tsvector_out(tsvector)
+RETURNS cstring
+AS 'MODULE_PATHNAME'
+LANGUAGE 'C' with (isstrict);
+
+CREATE TYPE tsvector (
+        INTERNALLENGTH = -1,
+        INPUT = tsvector_in,
+        OUTPUT = tsvector_out,
+        STORAGE = extended
+);
+
+CREATE FUNCTION length(tsvector)
+RETURNS int4
+AS 'MODULE_PATHNAME', 'tsvector_length'
+LANGUAGE 'C' with (isstrict,iscachable);
+
+CREATE FUNCTION to_tsvector(oid, text)
+RETURNS tsvector
+AS 'MODULE_PATHNAME'
+LANGUAGE 'C' with (isstrict,iscachable);
+
+CREATE FUNCTION to_tsvector(text, text)
+RETURNS tsvector
+AS 'MODULE_PATHNAME', 'to_tsvector_name'
+LANGUAGE 'C' with (isstrict,iscachable);
+
+CREATE FUNCTION to_tsvector(text)
+RETURNS tsvector
+AS 'MODULE_PATHNAME', 'to_tsvector_current'
+LANGUAGE 'C' with (isstrict,iscachable);
+
+CREATE FUNCTION strip(tsvector)
+RETURNS tsvector
+AS 'MODULE_PATHNAME'
+LANGUAGE 'C' with (isstrict,iscachable);
+
+CREATE FUNCTION setweight(tsvector,"char")
+RETURNS tsvector
+AS 'MODULE_PATHNAME'
+LANGUAGE 'C' with (isstrict,iscachable);
+
+CREATE FUNCTION concat(tsvector,tsvector)
+RETURNS tsvector
+AS 'MODULE_PATHNAME'
+LANGUAGE 'C' with (isstrict,iscachable);
+
+CREATE OPERATOR || (
+        LEFTARG = tsvector,
+        RIGHTARG = tsvector,
+        PROCEDURE = concat
+);
+
+--query type
+CREATE FUNCTION tsquery_in(cstring)
+RETURNS tsquery
+AS 'MODULE_PATHNAME'
+LANGUAGE 'C' with (isstrict);
+
+CREATE FUNCTION tsquery_out(tsquery)
+RETURNS cstring
+AS 'MODULE_PATHNAME'
+LANGUAGE 'C' with (isstrict);
+
+CREATE TYPE tsquery (
+        INTERNALLENGTH = -1,
+        INPUT = tsquery_in,
+        OUTPUT = tsquery_out
+);
+
+CREATE FUNCTION querytree(tsquery)
+RETURNS text
+AS 'MODULE_PATHNAME', 'tsquerytree'
+LANGUAGE 'C' with (isstrict);
+
+CREATE FUNCTION to_tsquery(oid, text)
+RETURNS tsquery
+AS 'MODULE_PATHNAME'
+LANGUAGE 'c' with (isstrict,iscachable);
+
+CREATE FUNCTION to_tsquery(text, text)
+RETURNS tsquery
+AS 'MODULE_PATHNAME','to_tsquery_name'
+LANGUAGE 'c' with (isstrict,iscachable);
+
+CREATE FUNCTION to_tsquery(text)
+RETURNS tsquery
+AS 'MODULE_PATHNAME','to_tsquery_current'
+LANGUAGE 'c' with (isstrict,iscachable);
+
+--operations
+CREATE FUNCTION exectsq(tsvector, tsquery)
+RETURNS bool
+AS 'MODULE_PATHNAME'
+LANGUAGE 'C' with (isstrict, iscachable);
+  
+COMMENT ON FUNCTION exectsq(tsvector, tsquery) IS 'boolean operation with text index';
+
+CREATE FUNCTION rexectsq(tsquery, tsvector)
+RETURNS bool
+AS 'MODULE_PATHNAME'
+LANGUAGE 'C' with (isstrict, iscachable);
+
+COMMENT ON FUNCTION rexectsq(tsquery, tsvector) IS 'boolean operation with text index';
+
+CREATE OPERATOR @@ (
+        LEFTARG = tsvector,
+        RIGHTARG = tsquery,
+        PROCEDURE = exectsq,
+        COMMUTATOR = '@@',
+        RESTRICT = contsel,
+        JOIN = contjoinsel
+);
+CREATE OPERATOR @@ (
+        LEFTARG = tsquery,
+        RIGHTARG = tsvector,
+        PROCEDURE = rexectsq,
+        COMMUTATOR = '@@',
+        RESTRICT = contsel,
+        JOIN = contjoinsel
+);
+
+--Trigger
+CREATE FUNCTION tsearch2()
+RETURNS trigger
+AS 'MODULE_PATHNAME'
+LANGUAGE 'C';
+
+--Relevation
+CREATE FUNCTION rank(float4[], tsvector, tsquery)
+RETURNS float4
+AS 'MODULE_PATHNAME'
+LANGUAGE 'C' WITH (isstrict, iscachable);
+
+CREATE FUNCTION rank(float4[], tsvector, tsquery, int4)
+RETURNS float4
+AS 'MODULE_PATHNAME'
+LANGUAGE 'C' WITH (isstrict, iscachable);
+
+CREATE FUNCTION rank(tsvector, tsquery)
+RETURNS float4
+AS 'MODULE_PATHNAME', 'rank_def'
+LANGUAGE 'C' WITH (isstrict, iscachable);
+
+CREATE FUNCTION rank(tsvector, tsquery, int4)
+RETURNS float4
+AS 'MODULE_PATHNAME', 'rank_def'
+LANGUAGE 'C' WITH (isstrict, iscachable);
+
+CREATE FUNCTION rank_cd(int4, tsvector, tsquery)
+RETURNS float4
+AS 'MODULE_PATHNAME'
+LANGUAGE 'C' WITH (isstrict, iscachable);
+
+CREATE FUNCTION rank_cd(int4, tsvector, tsquery, int4)
+RETURNS float4
+AS 'MODULE_PATHNAME'
+LANGUAGE 'C' WITH (isstrict, iscachable);
+
+CREATE FUNCTION rank_cd(tsvector, tsquery)
+RETURNS float4
+AS 'MODULE_PATHNAME', 'rank_cd_def'
+LANGUAGE 'C' WITH (isstrict, iscachable);
+
+CREATE FUNCTION rank_cd(tsvector, tsquery, int4)
+RETURNS float4
+AS 'MODULE_PATHNAME', 'rank_cd_def'
+LANGUAGE 'C' WITH (isstrict, iscachable);
+
+CREATE FUNCTION headline(oid, text, tsquery, text)
+RETURNS text
+AS 'MODULE_PATHNAME', 'headline'
+LANGUAGE 'C' WITH (isstrict, iscachable);
+
+CREATE FUNCTION headline(oid, text, tsquery)
+RETURNS text
+AS 'MODULE_PATHNAME', 'headline'
+LANGUAGE 'C' WITH (isstrict, iscachable);
+
+CREATE FUNCTION headline(text, text, tsquery, text)
+RETURNS text
+AS 'MODULE_PATHNAME', 'headline_byname'
+LANGUAGE 'C' WITH (isstrict, iscachable);
+
+CREATE FUNCTION headline(text, text, tsquery)
+RETURNS text
+AS 'MODULE_PATHNAME', 'headline_byname'
+LANGUAGE 'C' WITH (isstrict, iscachable);
+
+CREATE FUNCTION headline(text, tsquery, text)
+RETURNS text
+AS 'MODULE_PATHNAME', 'headline_current'
+LANGUAGE 'C' WITH (isstrict, iscachable);
+
+CREATE FUNCTION headline(text, tsquery)
+RETURNS text
+AS 'MODULE_PATHNAME', 'headline_current'
+LANGUAGE 'C' WITH (isstrict, iscachable);
+
+--GiST
+--GiST key type 
+CREATE FUNCTION gtsvector_in(cstring)
+RETURNS gtsvector
+AS 'MODULE_PATHNAME'
+LANGUAGE 'C' with (isstrict);
+
+CREATE FUNCTION gtsvector_out(gtsvector)
+RETURNS cstring
+AS 'MODULE_PATHNAME'
+LANGUAGE 'C' with (isstrict);
+
+CREATE TYPE gtsvector (
+        INTERNALLENGTH = -1,
+        INPUT = gtsvector_in,
+        OUTPUT = gtsvector_out
+);
+
+-- support FUNCTIONs
+CREATE FUNCTION gtsvector_consistent(gtsvector,internal,int4)
+RETURNS bool
+AS 'MODULE_PATHNAME'
+LANGUAGE 'C';
+  
+CREATE FUNCTION gtsvector_compress(internal)
+RETURNS internal
+AS 'MODULE_PATHNAME'
+LANGUAGE 'C';
+
+CREATE FUNCTION gtsvector_decompress(internal)
+RETURNS internal
+AS 'MODULE_PATHNAME'
+LANGUAGE 'C';
+
+CREATE FUNCTION gtsvector_penalty(internal,internal,internal)
+RETURNS internal
+AS 'MODULE_PATHNAME'
+LANGUAGE 'C' with (isstrict);
+
+CREATE FUNCTION gtsvector_picksplit(internal, internal)
+RETURNS internal
+AS 'MODULE_PATHNAME'
+LANGUAGE 'C';
+
+CREATE FUNCTION gtsvector_union(bytea, internal)
+RETURNS _int4
+AS 'MODULE_PATHNAME'
+LANGUAGE 'C';
+
+CREATE FUNCTION gtsvector_same(gtsvector, gtsvector, internal)
+RETURNS internal
+AS 'MODULE_PATHNAME'
+LANGUAGE 'C';
+
+-- CREATE the OPERATOR class
+CREATE OPERATOR CLASS gist_tsvector_ops
+DEFAULT FOR TYPE tsvector USING gist
+AS
+        OPERATOR        1       @@ (tsvector, tsquery)  RECHECK ,
+        FUNCTION        1       gtsvector_consistent (gtsvector, internal, int4),
+        FUNCTION        2       gtsvector_union (bytea, internal),
+        FUNCTION        3       gtsvector_compress (internal),
+        FUNCTION        4       gtsvector_decompress (internal),
+        FUNCTION        5       gtsvector_penalty (internal, internal, internal),
+        FUNCTION        6       gtsvector_picksplit (internal, internal),
+        FUNCTION        7       gtsvector_same (gtsvector, gtsvector, internal),
+        STORAGE         gtsvector;
+
+
+--stat info
+CREATE TYPE statinfo 
+   as (word text, ndoc int4, nentry int4);
+
+--REATE FUNCTION tsstat_in(cstring)
+--RETURNS tsstat
+--AS 'MODULE_PATHNAME'
+--LANGUAGE 'C' with (isstrict);
+--
+--CREATE FUNCTION tsstat_out(tsstat)
+--RETURNS cstring
+--AS 'MODULE_PATHNAME'
+--LANGUAGE 'C' with (isstrict);
+--
+--CREATE TYPE tsstat (
+--        INTERNALLENGTH = -1,
+--        INPUT = tsstat_in,
+--        OUTPUT = tsstat_out,
+--        STORAGE = plain
+--);
+--
+--CREATE FUNCTION ts_accum(tsstat,tsvector)
+--RETURNS tsstat
+--AS 'MODULE_PATHNAME'
+--LANGUAGE 'C' with (isstrict);
+--
+--CREATE FUNCTION ts_accum_finish(tsstat)
+-- returns setof statinfo
+-- as 'MODULE_PATHNAME'
+-- language 'C'
+-- with (isstrict);
+--
+--CREATE AGGREGATE stat (
+-- BASETYPE=tsvector,
+-- SFUNC=ts_accum,
+-- STYPE=tsstat,
+-- FINALFUNC = ts_accum_finish,
+-- initcond = ''
+--); 
+
+CREATE FUNCTION stat(text)
+   returns setof statinfo
+   as 'MODULE_PATHNAME', 'ts_stat'
+   language 'C'
+   with (isstrict);
+
+--reset - just for debuging
+CREATE FUNCTION reset_tsearch()
+        returns void
+        as 'MODULE_PATHNAME'
+        language 'C'
+        with (isstrict);
+
+--get cover (debug for rank_cd)
+CREATE FUNCTION get_covers(tsvector,tsquery)
+        returns text
+        as 'MODULE_PATHNAME'
+        language 'C'
+        with (isstrict);
+
+
+--example of ISpell dictionary
+--update pg_ts_dict set dict_initoption='DictFile="/usr/local/share/ispell/russian.dict" ,AffFile ="/usr/local/share/ispell/russian.aff", StopFile="/usr/local/share/ispell/russian.stop"' where dict_id=4;
+--example of synonym dict
+--update pg_ts_dict set dict_initoption='/usr/local/share/ispell/english.syn' where dict_id=5;
+END;


diff --git a/contrib/tsearch2/tsvector.c b/contrib/tsearch2/tsvector.c

new file mode 100644 (file)

index 0000000..ff0794d


--- /dev/null
+++ b/contrib/tsearch2/tsvector.c
@@ -0,0 +1,804 @@
+/*
+ * In/Out definitions for tsvector type
+ * Internal structure:
+ * string of values, array of position lexem in string and it's length
+ * Teodor Sigaev 
+ */
+#include "postgres.h"
+
+#include "access/gist.h"
+#include "access/itup.h"
+#include "utils/elog.h"
+#include "utils/palloc.h"
+#include "utils/builtins.h"
+#include "storage/bufpage.h"
+#include "executor/spi.h"
+#include "commands/trigger.h"
+#include "nodes/pg_list.h"
+#include "catalog/namespace.h"
+
+#include "utils/pg_locale.h"
+
+#include              /* tolower */
+#include "tsvector.h"
+#include "query.h"
+#include "ts_cfg.h"
+#include "common.h"
+
+PG_FUNCTION_INFO_V1(tsvector_in);
+Datum      tsvector_in(PG_FUNCTION_ARGS);
+
+PG_FUNCTION_INFO_V1(tsvector_out);
+Datum      tsvector_out(PG_FUNCTION_ARGS);
+
+PG_FUNCTION_INFO_V1(to_tsvector);
+Datum      to_tsvector(PG_FUNCTION_ARGS);
+PG_FUNCTION_INFO_V1(to_tsvector_current);
+Datum      to_tsvector_current(PG_FUNCTION_ARGS);
+PG_FUNCTION_INFO_V1(to_tsvector_name);
+Datum      to_tsvector_name(PG_FUNCTION_ARGS);
+
+PG_FUNCTION_INFO_V1(tsearch2);
+Datum      tsearch2(PG_FUNCTION_ARGS);
+
+PG_FUNCTION_INFO_V1(tsvector_length);
+Datum      tsvector_length(PG_FUNCTION_ARGS);
+
+/*
+ * in/out text index type
+ */
+static int 
+comparePos(const void *a, const void *b) {
+   if ( ((WordEntryPos *) a)->pos == ((WordEntryPos *) b)->pos )
+       return 1;
+   return ( ((WordEntryPos *) a)->pos > ((WordEntryPos *) b)->pos ) ? 1 : -1;
+}
+
+static int
+uniquePos(WordEntryPos *a, int4 l) {
+   WordEntryPos *ptr, *res;
+
+   res=a;
+   if (l==1)
+       return l;
+
+   qsort((void *) a, l, sizeof(WordEntryPos), comparePos);
+
+   ptr = a + 1;
+   while (ptr - a < l) {
+       if ( ptr->pos != res->pos ) {
+           res++;
+           res->pos = ptr->pos;
+           res->weight = ptr->weight;
+           if ( res-a >= MAXNUMPOS-1 || res->pos == MAXENTRYPOS-1 )
+               break;
+       } else if ( ptr->weight > res->weight )
+           res->weight = ptr->weight;
+       ptr++;
+   }
+   return res + 1 - a;
+}
+
+static char *BufferStr;
+static int
+compareentry(const void *a, const void *b)
+{
+   if ( ((WordEntryIN *) a)->entry.len == ((WordEntryIN *) b)->entry.len)
+   {
+       return strncmp(
+                      &BufferStr[((WordEntryIN *) a)->entry.pos],
+                      &BufferStr[((WordEntryIN *) b)->entry.pos],
+                      ((WordEntryIN *) a)->entry.len);
+   }
+   return ( ((WordEntryIN *) a)->entry.len > ((WordEntryIN *) b)->entry.len ) ? 1 : -1;
+}
+
+static int
+uniqueentry(WordEntryIN * a, int4 l, char *buf, int4 *outbuflen)
+{
+   WordEntryIN  *ptr,
+              *res;
+
+   res = a;
+   if (l == 1) {
+       if ( a->entry.haspos ) {
+           *(uint16*)(a->pos) = uniquePos( &(a->pos[1]), *(uint16*)(a->pos));
+           *outbuflen = SHORTALIGN(res->entry.len) + (*(uint16*)(a->pos) +1 )*sizeof(WordEntryPos);
+       }
+       return l;
+   }
+
+   ptr = a + 1;
+   BufferStr = buf;
+   qsort((void *) a, l, sizeof(WordEntryIN), compareentry);
+
+   while (ptr - a < l)
+   {
+       if (!(ptr->entry.len == res->entry.len &&
+             strncmp(&buf[ptr->entry.pos], &buf[res->entry.pos], res->entry.len) == 0))
+       {
+           if ( res->entry.haspos ) {
+               *(uint16*)(res->pos) = uniquePos( &(res->pos[1]), *(uint16*)(res->pos));
+               *outbuflen += *(uint16*)(res->pos) * sizeof(WordEntryPos);
+           }
+           *outbuflen += SHORTALIGN(res->entry.len);
+           res++;
+           memcpy(res,ptr,sizeof(WordEntryIN));
+       } else if ( ptr->entry.haspos ){
+           if ( res->entry.haspos ) {
+               int4 len=*(uint16*)(ptr->pos) + 1 + *(uint16*)(res->pos);
+               res->pos=(WordEntryPos*)repalloc( res->pos, len*sizeof(WordEntryPos));
+               memcpy( &(res->pos[ *(uint16*)(res->pos) + 1 ]), 
+                   &(ptr->pos[1]), *(uint16*)(ptr->pos) * sizeof(WordEntryPos));
+               *(uint16*)(res->pos) += *(uint16*)(ptr->pos);
+               pfree( ptr->pos );
+           } else {
+               res->entry.haspos=1;
+               res->pos = ptr->pos;
+           }
+       }
+       ptr++;
+   }
+   if ( res->entry.haspos ) {
+       *(uint16*)(res->pos) = uniquePos( &(res->pos[1]), *(uint16*)(res->pos));
+       *outbuflen += *(uint16*)(res->pos) * sizeof(WordEntryPos);
+   }
+   *outbuflen += SHORTALIGN(res->entry.len);
+
+   return res + 1 - a;
+}
+
+#define WAITWORD   1
+#define WAITENDWORD 2
+#define WAITNEXTCHAR   3
+#define WAITENDCMPLX   4
+#define WAITPOSINFO    5
+#define INPOSINFO  6
+#define WAITPOSDELIM   7
+
+#define RESIZEPRSBUF \
+do { \
+   if ( state->curpos - state->word + 1 >= state->len ) \
+   { \
+       int4 clen = state->curpos - state->word; \
+       state->len *= 2; \
+       state->word = (char*)repalloc( (void*)state->word, state->len ); \
+       state->curpos = state->word + clen; \
+   } \
+} while (0)
+
+int4
+gettoken_tsvector(TI_IN_STATE * state)
+{
+   int4        oldstate = 0;
+
+   state->curpos = state->word;
+   state->state = WAITWORD;
+   state->alen=0;
+
+   while (1)
+   {
+       if (state->state == WAITWORD)
+       {
+           if (*(state->prsbuf) == '\0')
+               return 0;
+           else if (*(state->prsbuf) == '\'')
+               state->state = WAITENDCMPLX;
+           else if (*(state->prsbuf) == '\\')
+           {
+               state->state = WAITNEXTCHAR;
+               oldstate = WAITENDWORD;
+           }
+           else if (state->oprisdelim && ISOPERATOR(*(state->prsbuf)))
+               elog(ERROR, "Syntax error");
+           else if (*(state->prsbuf) != ' ')
+           {
+               *(state->curpos) = *(state->prsbuf);
+               state->curpos++;
+               state->state = WAITENDWORD;
+           }
+       }
+       else if (state->state == WAITNEXTCHAR)
+       {
+           if (*(state->prsbuf) == '\0')
+               elog(ERROR, "There is no escaped character");
+           else
+           {
+               RESIZEPRSBUF;
+               *(state->curpos) = *(state->prsbuf);
+               state->curpos++;
+               state->state = oldstate;
+           }
+       }
+       else if (state->state == WAITENDWORD)
+       {
+           if (*(state->prsbuf) == '\\')
+           {
+               state->state = WAITNEXTCHAR;
+               oldstate = WAITENDWORD;
+           }
+           else if (*(state->prsbuf) == ' ' || *(state->prsbuf) == '\0' ||
+                    (state->oprisdelim && ISOPERATOR(*(state->prsbuf))))
+           {
+               RESIZEPRSBUF;
+               if (state->curpos == state->word)
+                   elog(ERROR, "Syntax error");
+               *(state->curpos) = '\0';
+               return 1; 
+           } else if ( *(state->prsbuf) == ':' ) {
+               if (state->curpos == state->word)
+                   elog(ERROR, "Syntax error");
+               *(state->curpos) = '\0';
+               if ( state->oprisdelim )
+                   return 1;
+               else
+                   state->state = INPOSINFO;
+           }
+           else
+           {
+               RESIZEPRSBUF;
+               *(state->curpos) = *(state->prsbuf);
+               state->curpos++;
+           }
+       }
+       else if (state->state == WAITENDCMPLX)
+       {
+           if (*(state->prsbuf) == '\'')
+           {
+               RESIZEPRSBUF;
+               *(state->curpos) = '\0';
+               if (state->curpos == state->word)
+                   elog(ERROR, "Syntax error");
+               if ( state->oprisdelim ) {
+                   state->prsbuf++;
+                   return 1;
+               } else
+                   state->state = WAITPOSINFO;
+           }
+           else if (*(state->prsbuf) == '\\')
+           {
+               state->state = WAITNEXTCHAR;
+               oldstate = WAITENDCMPLX;
+           }
+           else if (*(state->prsbuf) == '\0')
+               elog(ERROR, "Syntax error");
+           else
+           {
+               RESIZEPRSBUF;
+               *(state->curpos) = *(state->prsbuf);
+               state->curpos++;
+           }
+       } else if (state->state == WAITPOSINFO) {
+           if ( *(state->prsbuf) == ':' )
+               state->state=INPOSINFO;
+           else
+               return 1;
+       } else if (state->state == INPOSINFO) {
+           if ( isdigit(*(state->prsbuf)) ) {
+               if ( state->alen==0 ) {
+                   state->alen=4;
+                   state->pos = (WordEntryPos*)palloc( sizeof(WordEntryPos)*state->alen );
+                   *(uint16*)(state->pos)=0;
+               } else if ( *(uint16*)(state->pos) +1 >= state->alen ) {
+                   state->alen *= 2; 
+                   state->pos = (WordEntryPos*)repalloc( state->pos, sizeof(WordEntryPos)*state->alen );
+               }
+               (  *(uint16*)(state->pos) )++;
+               state->pos[ *(uint16*)(state->pos) ].pos = LIMITPOS(atoi(state->prsbuf));
+               if ( state->pos[ *(uint16*)(state->pos) ].pos == 0 )
+                   elog(ERROR,"Wrong position info");
+               state->pos[ *(uint16*)(state->pos) ].weight = 0;
+               state->state = WAITPOSDELIM;
+           } else
+               elog(ERROR,"Syntax error");
+       } else if (state->state == WAITPOSDELIM) {
+           if ( *(state->prsbuf) == ',' ) {
+               state->state = INPOSINFO;
+           } else if ( tolower(*(state->prsbuf)) == 'a' || *(state->prsbuf)=='*' ) {
+               if ( state->pos[ *(uint16*)(state->pos) ].weight )
+                   elog(ERROR,"Syntax error");
+               state->pos[ *(uint16*)(state->pos) ].weight = 3;
+           } else if ( tolower(*(state->prsbuf)) == 'b' ) {
+               if ( state->pos[ *(uint16*)(state->pos) ].weight )
+                   elog(ERROR,"Syntax error");
+               state->pos[ *(uint16*)(state->pos) ].weight = 2;
+           } else if ( tolower(*(state->prsbuf)) == 'c' ) {
+               if ( state->pos[ *(uint16*)(state->pos) ].weight )
+                   elog(ERROR,"Syntax error");
+               state->pos[ *(uint16*)(state->pos) ].weight = 1;
+           } else if ( tolower(*(state->prsbuf)) == 'd' ) {
+               if ( state->pos[ *(uint16*)(state->pos) ].weight )
+                   elog(ERROR,"Syntax error");
+               state->pos[ *(uint16*)(state->pos) ].weight = 0;
+           } else if ( isspace(*(state->prsbuf)) || *(state->prsbuf) == '\0' ) {
+               return 1;
+           } else if ( !isdigit(*(state->prsbuf)) )
+               elog(ERROR,"Syntax error");
+       } else
+           elog(ERROR, "Inner bug :(");
+       state->prsbuf++;
+   }
+
+   return 0;
+}
+
+Datum
+tsvector_in(PG_FUNCTION_ARGS)
+{
+   char       *buf = PG_GETARG_CSTRING(0);
+   TI_IN_STATE state;
+   WordEntryIN  *arr;
+   WordEntry  *inarr;
+   int4        len = 0,
+               totallen = 64;
+   tsvector       *in;
+   char       *tmpbuf,
+              *cur;
+   int4        i,
+               buflen = 256;
+
+   state.prsbuf = buf;
+   state.len = 32;
+   state.word = (char *) palloc(state.len);
+   state.oprisdelim = false;
+
+   arr = (WordEntryIN *) palloc(sizeof(WordEntryIN) * totallen);
+   cur = tmpbuf = (char *) palloc(buflen);
+   while (gettoken_tsvector(&state))
+   {
+       if (len >= totallen)
+       {
+           totallen *= 2;
+           arr = (WordEntryIN *) repalloc((void *) arr, sizeof(WordEntryIN) * totallen);
+       }
+       while ((cur - tmpbuf) + (state.curpos - state.word) >= buflen)
+       {
+           int4        dist = cur - tmpbuf;
+
+           buflen *= 2;
+           tmpbuf = (char *) repalloc((void *) tmpbuf, buflen);
+           cur = tmpbuf + dist;
+       }
+       if (state.curpos - state.word >= MAXSTRLEN)
+           elog(ERROR, "Word is too long");
+       arr[len].entry.len= state.curpos - state.word;
+       if (cur - tmpbuf > MAXSTRPOS)
+           elog(ERROR, "Too long value");
+       arr[len].entry.pos=cur - tmpbuf;
+       memcpy((void *) cur, (void *) state.word, arr[len].entry.len);
+       cur += arr[len].entry.len;
+       if ( state.alen ) {
+           arr[len].entry.haspos=1;
+           arr[len].pos = state.pos;
+       } else
+           arr[len].entry.haspos=0;
+       len++;
+   }
+   pfree(state.word);
+
+   if ( len > 0 )
+       len = uniqueentry(arr, len, tmpbuf, &buflen);
+   totallen = CALCDATASIZE(len, buflen);
+   in = (tsvector *) palloc(totallen);
+   memset(in,0,totallen);
+   in->len = totallen;
+   in->size = len;
+   cur = STRPTR(in);
+   inarr = ARRPTR(in);
+   for (i = 0; i < len; i++)
+   {
+       memcpy((void *) cur, (void *) &tmpbuf[arr[i].entry.pos], arr[i].entry.len);
+       arr[i].entry.pos=cur - STRPTR(in);
+       cur += SHORTALIGN(arr[i].entry.len);
+       if ( arr[i].entry.haspos ) {
+           memcpy( cur, arr[i].pos, (*(uint16*)arr[i].pos + 1) * sizeof(WordEntryPos));
+           cur +=  (*(uint16*)arr[i].pos + 1) * sizeof(WordEntryPos);
+           pfree( arr[i].pos ); 
+       }
+       memcpy( &(inarr[i]), &(arr[i].entry), sizeof(WordEntry) );
+   }
+   pfree(tmpbuf);
+   pfree(arr);
+   PG_RETURN_POINTER(in);
+}
+
+Datum
+tsvector_length(PG_FUNCTION_ARGS)
+{
+   tsvector       *in = (tsvector *) PG_DETOAST_DATUM(PG_GETARG_DATUM(0));
+   int4        ret = in->size;
+
+   PG_FREE_IF_COPY(in, 0);
+   PG_RETURN_INT32(ret);
+}
+
+Datum
+tsvector_out(PG_FUNCTION_ARGS)
+{
+   tsvector       *out = (tsvector *) PG_DETOAST_DATUM(PG_GETARG_DATUM(0));
+   char       *outbuf;
+   int4        i,
+               j,
+               lenbuf = 0, pp;
+   WordEntry  *ptr = ARRPTR(out);
+   char       *curin,
+              *curout;
+
+       lenbuf=out->size * 2 /* '' */ + out->size - 1 /* space */ + 2 /*\0*/;
+       for (i = 0; i < out->size; i++) {
+               lenbuf += ptr[i].len*2 /*for escape */;
+               if ( ptr[i].haspos )
+                       lenbuf += 7*POSDATALEN(out, &(ptr[i]));
+       }
+
+   curout = outbuf = (char *) palloc(lenbuf);
+   for (i = 0; i < out->size; i++)
+   {
+       curin = STRPTR(out)+ptr->pos;
+       if (i != 0)
+           *curout++ = ' ';
+       *curout++ = '\'';
+       j = ptr->len;
+       while (j--)
+       {
+           if (*curin == '\'')
+           {
+               int4        pos = curout - outbuf;
+
+               outbuf = (char *) repalloc((void *) outbuf, ++lenbuf);
+               curout = outbuf + pos;
+               *curout++ = '\\';
+           }
+           *curout++ = *curin++;
+       }
+       *curout++ = '\'';
+       if ( (pp=POSDATALEN(out,ptr)) != 0 ) {
+           WordEntryPos *wptr;
+           *curout++ = ':';
+           wptr=POSDATAPTR(out,ptr);
+           while(pp) {
+               sprintf(curout,"%d",wptr->pos);
+               curout=strchr(curout,'\0');
+               switch( wptr->weight ) {
+                   case 3:   *curout++ = 'A'; break;
+                   case 2:   *curout++ = 'B'; break;
+                   case 1:   *curout++ = 'C'; break;
+                   case 0: 
+                   default: break;
+               }
+               if ( pp>1 )     *curout++ = ',';
+               pp--; wptr++;
+           }
+       }
+       ptr++;
+   }
+   *curout='\0';
+   outbuf[lenbuf - 1] = '\0';
+   PG_FREE_IF_COPY(out, 0);
+   PG_RETURN_POINTER(outbuf);
+}
+
+static int
+compareWORD(const void *a, const void *b)
+{
+   if (((WORD *) a)->len == ((WORD *) b)->len) {
+       int res = strncmp(
+                      ((WORD *) a)->word,
+                      ((WORD *) b)->word,
+                      ((WORD *) b)->len);
+       if ( res==0 ) 
+           return ( ((WORD *) a)->pos.pos > ((WORD *) b)->pos.pos ) ? 1 : -1;
+       return res;
+   }
+   return (((WORD *) a)->len > ((WORD *) b)->len) ? 1 : -1;
+}
+
+static int
+uniqueWORD(WORD * a, int4 l)
+{
+   WORD       *ptr,
+              *res;
+   int tmppos;
+
+   if (l == 1) {
+       tmppos=LIMITPOS(a->pos.pos);
+       a->alen=2;
+       a->pos.apos=(uint16*)palloc( sizeof(uint16)*a->alen );
+       a->pos.apos[0]=1;
+       a->pos.apos[1]=tmppos;
+       return l;
+   }
+
+   res = a;
+   ptr = a + 1;
+
+   qsort((void *) a, l, sizeof(WORD), compareWORD);
+   tmppos=LIMITPOS(a->pos.pos);
+   a->alen=2;
+   a->pos.apos=(uint16*)palloc( sizeof(uint16)*a->alen );
+   a->pos.apos[0]=1;
+   a->pos.apos[1]=tmppos;
+
+   while (ptr - a < l)
+   {
+       if (!(ptr->len == res->len &&
+             strncmp(ptr->word, res->word, res->len) == 0))
+       {
+           res++;
+           res->len = ptr->len;
+           res->word = ptr->word;
+           tmppos=LIMITPOS(ptr->pos.pos);
+           res->alen=2;
+           res->pos.apos=(uint16*)palloc( sizeof(uint16)*res->alen );
+           res->pos.apos[0]=1;
+           res->pos.apos[1]=tmppos;
+       } else {
+           pfree(ptr->word);
+           if ( res->pos.apos[0] < MAXNUMPOS-1 && res->pos.apos[ res->pos.apos[0] ] != MAXENTRYPOS-1 ) {
+               if ( res->pos.apos[0]+1 >= res->alen ) {
+                   res->alen*=2;
+                   res->pos.apos=(uint16*)repalloc( res->pos.apos, sizeof(uint16)*res->alen );
+               }
+               res->pos.apos[ res->pos.apos[0]+1 ] = LIMITPOS(ptr->pos.pos);
+               res->pos.apos[0]++; 
+           }
+       }
+       ptr++;
+   }
+
+   return res + 1 - a;
+}
+
+/*
+ * make value of tsvector
+ */
+static tsvector *
+makevalue(PRSTEXT * prs)
+{
+   int4        i,j,
+               lenstr = 0,
+               totallen;
+   tsvector       *in;
+   WordEntry  *ptr;
+   char       *str,
+              *cur;
+
+   prs->curwords = uniqueWORD(prs->words, prs->curwords);
+   for (i = 0; i < prs->curwords; i++) {
+       lenstr += SHORTALIGN(prs->words[i].len);
+
+       if ( prs->words[i].alen )
+           lenstr += sizeof(uint16) + prs->words[i].pos.apos[0] * sizeof(WordEntryPos);
+   }
+
+   totallen = CALCDATASIZE(prs->curwords, lenstr);
+   in = (tsvector *) palloc(totallen);
+   memset(in,0,totallen);  
+   in->len = totallen;
+   in->size = prs->curwords;
+
+   ptr = ARRPTR(in);
+   cur = str = STRPTR(in);
+   for (i = 0; i < prs->curwords; i++)
+   {
+       ptr->len = prs->words[i].len;
+       if (cur - str > MAXSTRPOS)
+           elog(ERROR, "Value is too big");
+       ptr->pos= cur - str;
+       memcpy((void *) cur, (void *) prs->words[i].word, prs->words[i].len);
+       pfree(prs->words[i].word);
+       cur += SHORTALIGN(prs->words[i].len);
+       if ( prs->words[i].alen ) {
+           WordEntryPos *wptr;
+           
+           ptr->haspos=1;
+           *(uint16*)cur = prs->words[i].pos.apos[0];
+           wptr=POSDATAPTR(in,ptr);
+           for(j=0;j<*(uint16*)cur;j++) {
+               wptr[j].weight=0;
+               wptr[j].pos=prs->words[i].pos.apos[j+1];
+           }
+           cur += sizeof(uint16) + prs->words[i].pos.apos[0] * sizeof(WordEntryPos);
+           pfree(prs->words[i].pos.apos);
+       } else
+           ptr->haspos=0;
+       ptr++;
+   }
+   pfree(prs->words);
+   return in;
+}
+
+
+Datum
+to_tsvector(PG_FUNCTION_ARGS)
+{
+   text       *in = PG_GETARG_TEXT_P(1);
+   PRSTEXT     prs;
+   tsvector       *out = NULL;
+   TSCfgInfo *cfg=findcfg(PG_GETARG_INT32(0)); 
+
+   prs.lenwords = 32;
+   prs.curwords = 0;
+   prs.pos = 0;
+   prs.words = (WORD *) palloc(sizeof(WORD) * prs.lenwords);
+   
+   parsetext_v2(cfg, &prs, VARDATA(in), VARSIZE(in) - VARHDRSZ);
+   PG_FREE_IF_COPY(in, 1);
+
+   if (prs.curwords)
+       out = makevalue(&prs);
+   else {
+       pfree(prs.words);
+       out = palloc(CALCDATASIZE(0,0));
+       out->len = CALCDATASIZE(0,0);
+       out->size = 0;
+   } 
+   PG_RETURN_POINTER(out);
+}
+
+Datum
+to_tsvector_name(PG_FUNCTION_ARGS) {
+   text       *cfg=PG_GETARG_TEXT_P(0);
+   Datum res = DirectFunctionCall3(
+       to_tsvector,
+       Int32GetDatum( name2id_cfg( cfg ) ),
+       PG_GETARG_DATUM(1),
+       (Datum)0
+   );
+   PG_FREE_IF_COPY(cfg,0);
+   PG_RETURN_DATUM(res);   
+}
+
+Datum
+to_tsvector_current(PG_FUNCTION_ARGS) {
+   Datum res = DirectFunctionCall3(
+       to_tsvector,
+       Int32GetDatum( get_currcfg() ),
+       PG_GETARG_DATUM(0),
+       (Datum)0
+   );
+   PG_RETURN_DATUM(res);   
+}
+
+static Oid
+findFunc(char *fname) {
+   FuncCandidateList clist,ptr;
+   Oid funcid = InvalidOid;
+   List *names=makeList1(makeString(fname));
+
+   ptr = clist = FuncnameGetCandidates(names, 1);
+   freeList(names);
+
+   if ( !ptr )
+       return funcid;
+
+   while(ptr) {
+       if ( ptr->args[0] == TEXTOID && funcid == InvalidOid )
+           funcid=ptr->oid;
+       clist=ptr->next;
+       pfree(ptr);
+       ptr=clist;
+   }
+
+   return funcid;
+}
+
+/*
+ * Trigger
+ */
+Datum
+tsearch2(PG_FUNCTION_ARGS)
+{
+   TriggerData *trigdata;
+   Trigger    *trigger;
+   Relation    rel;
+   HeapTuple   rettuple = NULL;
+   TSCfgInfo *cfg=findcfg(get_currcfg()); 
+   int         numidxattr,
+               i;
+   PRSTEXT     prs;
+   Datum       datum = (Datum) 0;
+   Oid     funcoid = InvalidOid;
+
+   if (!CALLED_AS_TRIGGER(fcinfo))
+       elog(ERROR, "TSearch: Not fired by trigger manager");
+
+   trigdata = (TriggerData *) fcinfo->context;
+   if (TRIGGER_FIRED_FOR_STATEMENT(trigdata->tg_event))
+       elog(ERROR, "TSearch: Can't process STATEMENT events");
+   if (TRIGGER_FIRED_AFTER(trigdata->tg_event))
+       elog(ERROR, "TSearch: Must be fired BEFORE event");
+
+   if (TRIGGER_FIRED_BY_INSERT(trigdata->tg_event))
+       rettuple = trigdata->tg_trigtuple;
+   else if (TRIGGER_FIRED_BY_UPDATE(trigdata->tg_event))
+       rettuple = trigdata->tg_newtuple;
+   else
+       elog(ERROR, "TSearch: Unknown event");
+
+   trigger = trigdata->tg_trigger;
+   rel = trigdata->tg_relation;
+
+   if (trigger->tgnargs < 2)
+       elog(ERROR, "TSearch: format tsearch2(tsvector_field, text_field1,...)");
+
+   numidxattr = SPI_fnumber(rel->rd_att, trigger->tgargs[0]);
+   if (numidxattr == SPI_ERROR_NOATTRIBUTE)
+       elog(ERROR, "TSearch: Can not find tsvector_field");
+
+   prs.lenwords = 32;
+   prs.curwords = 0;
+   prs.pos = 0;
+   prs.words = (WORD *) palloc(sizeof(WORD) * prs.lenwords);
+
+   /* find all words in indexable column */
+   for (i = 1; i < trigger->tgnargs; i++)
+   {
+       int         numattr;
+       Oid         oidtype;
+       Datum       txt_toasted;
+       bool        isnull;
+       text       *txt;
+
+       numattr = SPI_fnumber(rel->rd_att, trigger->tgargs[i]);
+       if (numattr == SPI_ERROR_NOATTRIBUTE)
+       {
+           funcoid=findFunc(trigger->tgargs[i]);
+           if ( funcoid==InvalidOid )
+               elog(ERROR,"TSearch: can't find function or field '%s'",trigger->tgargs[i]);
+           continue;
+       }
+       oidtype = SPI_gettypeid(rel->rd_att, numattr);
+       /* We assume char() and varchar() are binary-equivalent to text */
+       if (!(oidtype == TEXTOID ||
+             oidtype == VARCHAROID ||
+             oidtype == BPCHAROID))
+       {
+           elog(WARNING, "TSearch: '%s' is not of character type",
+                trigger->tgargs[i]);
+           continue;
+       }
+       txt_toasted = SPI_getbinval(rettuple, rel->rd_att, numattr, &isnull);
+       if (isnull)
+           continue;
+
+       if ( funcoid!=InvalidOid ) {
+           text *txttmp = (text *) DatumGetPointer( OidFunctionCall1(
+               funcoid,
+               PointerGetDatum(txt_toasted)
+           ));
+           txt = (text *) DatumGetPointer(PG_DETOAST_DATUM(PointerGetDatum(txttmp)));
+           if ( txt == txttmp )
+               txt_toasted = PointerGetDatum(txt);
+       } else
+            txt = (text *) DatumGetPointer(PG_DETOAST_DATUM(PointerGetDatum(txt_toasted)));
+
+       parsetext_v2(cfg, &prs, VARDATA(txt), VARSIZE(txt) - VARHDRSZ);
+       if (txt != (text*)DatumGetPointer(txt_toasted) )
+           pfree(txt);
+   }
+
+   /* make tsvector value */
+   if (prs.curwords)
+   {
+       datum = PointerGetDatum(makevalue(&prs));
+       rettuple = SPI_modifytuple(rel, rettuple, 1, &numidxattr,
+                                  &datum, NULL);
+       pfree(DatumGetPointer(datum));
+   }
+   else
+   {
+       tsvector *out = palloc(CALCDATASIZE(0,0));
+       out->len = CALCDATASIZE(0,0);
+       out->size = 0;
+       datum = PointerGetDatum(out);
+       pfree(prs.words);
+       rettuple = SPI_modifytuple(rel, rettuple, 1, &numidxattr,
+                                  &datum, NULL);
+   }
+
+   if (rettuple == NULL)
+       elog(ERROR, "TSearch: %d returned by SPI_modifytuple", SPI_result);
+
+   return PointerGetDatum(rettuple);
+}


diff --git a/contrib/tsearch2/tsvector.h b/contrib/tsearch2/tsvector.h

new file mode 100644 (file)

index 0000000..31e6a4b


--- /dev/null
+++ b/contrib/tsearch2/tsvector.h
@@ -0,0 +1,71 @@
+#ifndef __TXTIDX_H__
+#define __TXTIDX_H__
+
+/*
+#define TXTIDX_DEBUG
+*/
+
+#include "postgres.h"
+
+#include "access/gist.h"
+#include "access/itup.h"
+#include "utils/elog.h"
+#include "utils/palloc.h"
+#include "utils/builtins.h"
+#include "storage/bufpage.h"
+
+typedef struct {
+   uint32
+       haspos:1,
+       len:11, /* MAX 2Kb */
+       pos:20; /* MAX 1Mb */
+}  WordEntry;
+#define MAXSTRLEN ( 1<<11 )
+#define MAXSTRPOS ( 1<<20 )
+
+typedef struct {
+   uint16
+       weight:2,
+       pos:14;
+} WordEntryPos;
+#define MAXENTRYPOS    (1<<14)
+#define MAXNUMPOS  256
+#define LIMITPOS(x)    ( ( (x) >= MAXENTRYPOS ) ? (MAXENTRYPOS-1) : (x) )
+
+typedef struct
+{
+   int4        len;
+   int4        size;
+   char        data[1];
+}  tsvector;
+
+#define DATAHDRSIZE (sizeof(int4)*2)
+#define CALCDATASIZE(x, lenstr) ( x * sizeof(WordEntry) + DATAHDRSIZE + lenstr )
+#define ARRPTR(x)  ( (WordEntry*) ( (char*)x + DATAHDRSIZE ) )
+#define STRPTR(x)  ( (char*)x + DATAHDRSIZE + ( sizeof(WordEntry) * ((tsvector*)x)->size ) )
+#define STRSIZE(x) ( ((tsvector*)x)->len - DATAHDRSIZE - ( sizeof(WordEntry) * ((tsvector*)x)->size ) )
+#define _POSDATAPTR(x,e)   (STRPTR(x)+((WordEntry*)(e))->pos+SHORTALIGN(((WordEntry*)(e))->len))
+#define POSDATALEN(x,e) ( ( ((WordEntry*)(e))->haspos ) ? (*(uint16*)_POSDATAPTR(x,e)) : 0 ) 
+#define POSDATAPTR(x,e)    ( (WordEntryPos*)( _POSDATAPTR(x,e)+sizeof(uint16) ) )
+
+
+typedef struct {
+   WordEntry   entry;
+   WordEntryPos    *pos;
+}  WordEntryIN;
+
+typedef struct
+{
+   char       *prsbuf;
+   char       *word;
+   char       *curpos;
+   int4        len;
+   int4        state;
+   int4        alen;
+   WordEntryPos    *pos;
+   bool        oprisdelim;
+}  TI_IN_STATE;
+
+int4       gettoken_tsvector(TI_IN_STATE * state);
+
+#endif


diff --git a/contrib/tsearch2/tsvector_op.c b/contrib/tsearch2/tsvector_op.c

new file mode 100644 (file)

index 0000000..3f38014


--- /dev/null
+++ b/contrib/tsearch2/tsvector_op.c
@@ -0,0 +1,264 @@
+/*
+ * Operations for tsvector type
+ * Teodor Sigaev 
+ */
+#include "postgres.h"
+
+#include "access/gist.h"
+#include "access/itup.h"
+#include "utils/elog.h"
+#include "utils/palloc.h"
+#include "utils/builtins.h"
+#include "storage/bufpage.h"
+#include "executor/spi.h"
+#include "commands/trigger.h"
+#include "nodes/pg_list.h"
+#include "catalog/namespace.h"
+
+#include "utils/pg_locale.h"
+
+#include              /* tolower */
+#include "tsvector.h"
+#include "query.h"
+#include "ts_cfg.h"
+#include "common.h"
+
+PG_FUNCTION_INFO_V1(strip);
+Datum      strip(PG_FUNCTION_ARGS);
+
+PG_FUNCTION_INFO_V1(setweight);
+Datum      setweight(PG_FUNCTION_ARGS);
+
+PG_FUNCTION_INFO_V1(concat);
+Datum      concat(PG_FUNCTION_ARGS);
+
+Datum
+strip(PG_FUNCTION_ARGS)
+{
+   tsvector       *in = (tsvector *) PG_DETOAST_DATUM(PG_GETARG_DATUM(0));
+   tsvector    *out;
+   int i,len=0;
+   WordEntry *arrin=ARRPTR(in), *arrout;
+   char *cur;
+
+   for(i=0;isize;i++) 
+       len += SHORTALIGN( arrin[i].len );
+
+   len = CALCDATASIZE(in->size, len);
+   out=(tsvector*)palloc(len);
+   memset(out,0,len);
+   out->len=len;
+   out->size=in->size;
+   arrout=ARRPTR(out);
+   cur=STRPTR(out);
+   for(i=0;isize;i++) {
+       memcpy(cur, STRPTR(in)+arrin[i].pos, arrin[i].len);
+       arrout[i].haspos = 0;
+       arrout[i].len = arrin[i].len;
+       arrout[i].pos = cur - STRPTR(out);
+       cur += SHORTALIGN( arrout[i].len );
+   }
+       
+   PG_FREE_IF_COPY(in, 0);
+   PG_RETURN_POINTER(out);
+}
+
+Datum
+setweight(PG_FUNCTION_ARGS)
+{
+   tsvector       *in = (tsvector *) PG_DETOAST_DATUM(PG_GETARG_DATUM(0));
+   char       cw = PG_GETARG_CHAR(1);
+   tsvector    *out;
+   int i,j;
+   WordEntry *entry;
+   WordEntryPos *p;
+   int w=0;
+
+   switch(tolower(cw)) {
+       case 'a': w=3; break;
+       case 'b': w=2; break;
+       case 'c': w=1; break;
+       case 'd': w=0; break;
+       default: elog(ERROR,"Unknown weight");
+   }
+
+   out=(tsvector*)palloc(in->len);
+   memcpy(out,in,in->len);
+   entry=ARRPTR(out);
+   i=out->size;    
+   while(i--) {
+       if ( (j=POSDATALEN(out,entry)) != 0 ) {
+           p=POSDATAPTR(out,entry);
+           while(j--) {
+               p->weight=w;
+               p++;
+           }
+       }
+       entry++;
+   }
+       
+   PG_FREE_IF_COPY(in, 0);
+   PG_RETURN_POINTER(out);
+}
+
+static int
+compareEntry(char *ptra, WordEntry* a, char *ptrb, WordEntry* b)
+{
+        if ( a->len == b->len)
+        {
+                return strncmp(
+                                           ptra + a->pos,
+                                           ptrb + b->pos,
+                                           a->len);
+        }
+        return ( a->len > b->len ) ? 1 : -1;
+}
+
+static int4
+add_pos(tsvector *src, WordEntry *srcptr, tsvector *dest, WordEntry *destptr, int4 maxpos ) {
+   uint16 *clen = (uint16*)_POSDATAPTR(dest,destptr);
+   int i;
+   uint16 slen = POSDATALEN(src, srcptr), startlen;
+   WordEntryPos *spos=POSDATAPTR(src, srcptr), *dpos=POSDATAPTR(dest,destptr);
+
+   if ( ! destptr->haspos ) 
+       *clen=0;
+
+   startlen = *clen;
+   for(i=0; i
+       dpos[ *clen ].weight = spos[i].weight; 
+       dpos[ *clen ].pos    = LIMITPOS(spos[i].pos + maxpos);
+       (*clen)++;
+   }
+
+   if ( *clen != startlen )
+       destptr->haspos=1; 
+   return  *clen - startlen;
+}
+
+
+Datum
+concat(PG_FUNCTION_ARGS) {
+   tsvector       *in1 = (tsvector *) PG_DETOAST_DATUM(PG_GETARG_DATUM(0));
+   tsvector       *in2 = (tsvector *) PG_DETOAST_DATUM(PG_GETARG_DATUM(1));
+   tsvector       *out;
+   WordEntry *ptr;
+   WordEntry *ptr1,*ptr2;
+   WordEntryPos *p;
+   int maxpos=0,i,j,i1,i2;
+   char *cur;
+   char *data,*data1,*data2;
+
+   ptr=ARRPTR(in1);
+   i=in1->size;
+   while(i--) {
+       if ( (j=POSDATALEN(in1,ptr)) != 0 ) {
+           p=POSDATAPTR(in1,ptr);
+           while(j--) {
+               if ( p->pos > maxpos ) 
+                   maxpos = p->pos;
+               p++;
+           }
+       }
+       ptr++;
+   }
+   
+   ptr1=ARRPTR(in1); ptr2=ARRPTR(in2);
+   data1=STRPTR(in1); data2=STRPTR(in2);
+   i1=in1->size;   i2=in2->size;
+   out=(tsvector*)palloc( in1->len + in2->len );
+   memset(out,0,in1->len + in2->len);
+   out->len = in1->len + in2->len;
+   out->size = in1->size + in2->size;
+   data=cur=STRPTR(out);
+   ptr=ARRPTR(out);
+   while( i1 && i2 ) {
+       int cmp=compareEntry(data1,ptr1,data2,ptr2);
+       if ( cmp < 0 ) { /* in1 first */
+           ptr->haspos = ptr1->haspos;
+           ptr->len = ptr1->len;
+           memcpy( cur, data1 + ptr1->pos, ptr1->len );
+           ptr->pos = cur - data; 
+           cur+=SHORTALIGN(ptr1->len);
+           if ( ptr->haspos ) {
+               memcpy(cur, _POSDATAPTR(in1, ptr1), POSDATALEN(in1, ptr1)*sizeof(WordEntryPos) + sizeof(uint16));
+               cur+=POSDATALEN(in1, ptr1)*sizeof(WordEntryPos) + sizeof(uint16);
+           }
+           ptr++; ptr1++; i1--;
+       } else if ( cmp>0 ) { /* in2 first */ 
+           ptr->haspos = ptr2->haspos;
+           ptr->len = ptr2->len;
+           memcpy( cur, data2 + ptr2->pos, ptr2->len );
+           ptr->pos = cur - data; 
+           cur+=SHORTALIGN(ptr2->len);
+           if ( ptr->haspos ) {
+               int addlen = add_pos(in2, ptr2, out, ptr, maxpos );
+               if ( addlen == 0 )
+                   ptr->haspos=0;
+               else
+                   cur += addlen*sizeof(WordEntryPos) + sizeof(uint16); 
+           }
+           ptr++; ptr2++; i2--;
+       } else {
+           ptr->haspos = ptr1->haspos | ptr2->haspos;
+           ptr->len = ptr1->len;
+           memcpy( cur, data1 + ptr1->pos, ptr1->len );
+           ptr->pos = cur - data; 
+           cur+=SHORTALIGN(ptr1->len);
+           if ( ptr->haspos ) {
+               if ( ptr1->haspos ) {
+                   memcpy(cur, _POSDATAPTR(in1, ptr1), POSDATALEN(in1, ptr1)*sizeof(WordEntryPos) + sizeof(uint16));
+                   cur+=POSDATALEN(in1, ptr1)*sizeof(WordEntryPos) + sizeof(uint16);
+                   if ( ptr2->haspos )
+                       cur += add_pos(in2, ptr2, out, ptr, maxpos )*sizeof(WordEntryPos);
+               } else if ( ptr2->haspos ) {
+                   int addlen = add_pos(in2, ptr2, out, ptr, maxpos );
+                   if ( addlen == 0 )
+                       ptr->haspos=0;
+                   else
+                       cur += addlen*sizeof(WordEntryPos) + sizeof(uint16); 
+               }
+           }
+           ptr++; ptr1++; ptr2++; i1--; i2--;
+       }
+   }
+
+   while(i1) {
+       ptr->haspos = ptr1->haspos;
+       ptr->len = ptr1->len;
+       memcpy( cur, data1 + ptr1->pos, ptr1->len );
+       ptr->pos = cur - data; 
+       cur+=SHORTALIGN(ptr1->len);
+       if ( ptr->haspos ) {
+           memcpy(cur, _POSDATAPTR(in1, ptr1), POSDATALEN(in1, ptr1)*sizeof(WordEntryPos) + sizeof(uint16));
+           cur+=POSDATALEN(in1, ptr1)*sizeof(WordEntryPos) + sizeof(uint16);
+       }
+       ptr++; ptr1++; i1--;
+   }
+
+   while(i2) {
+       ptr->haspos = ptr2->haspos;
+       ptr->len = ptr2->len;
+       memcpy( cur, data2 + ptr2->pos, ptr2->len );
+       ptr->pos = cur - data; 
+       cur+=SHORTALIGN(ptr2->len);
+       if ( ptr->haspos ) {
+           int addlen = add_pos(in2, ptr2, out, ptr, maxpos );
+           if ( addlen == 0 )
+               ptr->haspos=0;
+           else
+               cur += addlen*sizeof(WordEntryPos) + sizeof(uint16); 
+       }
+       ptr++; ptr2++; i2--;
+   }
+   
+   out->size=ptr-ARRPTR(out);
+   out->len = CALCDATASIZE( out->size, cur-data );
+   if ( data != STRPTR(out) )
+       memmove( STRPTR(out), data, cur-data );
+
+   PG_FREE_IF_COPY(in1, 0);
+   PG_FREE_IF_COPY(in2, 1);
+   PG_RETURN_POINTER(out);
+}
+


diff --git a/contrib/tsearch2/untsearch.sql.in b/contrib/tsearch2/untsearch.sql.in

new file mode 100644 (file)

index 0000000..a4fe145


--- /dev/null
+++ b/contrib/tsearch2/untsearch.sql.in
@@ -0,0 +1,62 @@
+BEGIN;
+
+--Be careful !!!
+--script drops all indices, triggers and columns with types defined
+--in tsearch2.sql
+
+
+DROP OPERATOR CLASS gist_tsvector_ops USING gist CASCADE;
+
+
+DROP OPERATOR || (tsvector, tsvector);
+DROP OPERATOR @@ (tsvector, tsquery);
+DROP OPERATOR @@ (tsquery, tsvector);
+
+DROP AGGREGATE stat(tsvector);
+
+DROP TABLE pg_ts_dict;
+DROP TABLE pg_ts_parser;
+DROP TABLE pg_ts_cfg;
+DROP TABLE pg_ts_cfgmap;
+
+DROP TYPE tokentype CASCADE;
+DROP TYPE tokenout CASCADE;
+DROP TYPE tsvector CASCADE;
+DROP TYPE tsquery CASCADE;
+DROP TYPE gtsvector CASCADE;
+DROP TYPE tsstat CASCADE;
+DROP TYPE statinfo CASCADE;
+
+DROP FUNCTION lexize(oid, text) ;
+DROP FUNCTION lexize(text, text);
+DROP FUNCTION lexize(text);
+DROP FUNCTION set_curdict(int);
+DROP FUNCTION set_curdict(text);
+DROP FUNCTION dex_init(text);
+DROP FUNCTION dex_lexize(internal,internal,int4);
+DROP FUNCTION snb_en_init(text);
+DROP FUNCTION snb_lexize(internal,internal,int4);
+DROP FUNCTION snb_ru_init(text);
+DROP FUNCTION spell_init(text);
+DROP FUNCTION spell_lexize(internal,internal,int4);
+DROP FUNCTION syn_init(text);
+DROP FUNCTION syn_lexize(internal,internal,int4);
+DROP FUNCTION set_curprs(int);
+DROP FUNCTION set_curprs(text);
+DROP FUNCTION prsd_start(internal,int4);
+DROP FUNCTION prsd_getlexeme(internal,internal,internal);
+DROP FUNCTION prsd_end(internal);
+DROP FUNCTION prsd_lextype(internal);
+DROP FUNCTION prsd_headline(internal,internal,internal);
+DROP FUNCTION set_curcfg(int);
+DROP FUNCTION set_curcfg(text);
+DROP FUNCTION show_curcfg();
+DROP FUNCTION gtsvector_compress(internal);
+DROP FUNCTION gtsvector_decompress(internal);
+DROP FUNCTION gtsvector_penalty(internal,internal,internal);
+DROP FUNCTION gtsvector_picksplit(internal, internal);
+DROP FUNCTION gtsvector_union(bytea, internal);
+DROP FUNCTION reset_tsearch();
+DROP FUNCTION tsearch2() CASCADE;
+
+END;


diff --git a/contrib/tsearch2/wordparser/deflex.c b/contrib/tsearch2/wordparser/deflex.c

new file mode 100644 (file)

index 0000000..ea596c5


--- /dev/null
+++ b/contrib/tsearch2/wordparser/deflex.c
@@ -0,0 +1,56 @@
+#include "deflex.h"
+
+const char *lex_descr[]={
+   "",
+   "Latin word",
+   "Non-latin word",
+   "Word",
+   "Email",
+   "URL",
+   "Host",
+   "Scientific notation",
+   "VERSION",
+   "Part of hyphenated word",
+   "Non-latin part of hyphenated word",
+   "Latin part of hyphenated word",
+   "Space symbols",
+   "HTML Tag",
+   "HTTP head",
+   "Hyphenated word",
+   "Latin hyphenated word",
+   "Non-latin hyphenated word",
+   "URI",
+   "File or path name",
+   "Decimal notation",
+   "Signed integer",
+   "Unsigned integer",
+   "HTML Entity"
+};
+
+const char *tok_alias[]={
+   "",
+   "lword",
+   "nlword",
+   "word",
+   "email",
+   "url",
+   "host",
+   "sfloat",
+   "version",
+   "part_hword",
+   "nlpart_hword",
+   "lpart_hword",
+   "blank",
+   "tag",
+   "http",
+   "hword",
+   "lhword",
+   "nlhword",
+   "uri",
+   "file",
+   "float",
+   "int",
+   "uint",
+   "entity"
+};
+


diff --git a/contrib/tsearch2/wordparser/deflex.h b/contrib/tsearch2/wordparser/deflex.h

new file mode 100644 (file)

index 0000000..651d1f9


--- /dev/null
+++ b/contrib/tsearch2/wordparser/deflex.h
@@ -0,0 +1,34 @@
+#ifndef __DEFLEX_H__
+#define __DEFLEX_H__
+
+/* rememder !!!! */
+#define LASTNUM        23
+
+#define LATWORD        1
+#define CYRWORD        2
+#define UWORD      3
+#define EMAIL      4
+#define FURL       5
+#define HOST       6
+#define SCIENTIFIC 7
+#define VERSIONNUMBER  8
+#define PARTHYPHENWORD 9
+#define CYRPARTHYPHENWORD  10
+#define LATPARTHYPHENWORD  11
+#define SPACE      12
+#define TAG            13
+#define HTTP       14
+#define HYPHENWORD 15
+#define LATHYPHENWORD  16
+#define CYRHYPHENWORD  17
+#define URI        18
+#define FILEPATH   19
+#define DECIMAL        20
+#define SIGNEDINT  21
+#define UNSIGNEDINT 22
+#define HTMLENTITY 23
+
+extern const char *lex_descr[];
+extern const char *tok_alias[];
+
+#endif


diff --git a/contrib/tsearch2/wordparser/parser.h b/contrib/tsearch2/wordparser/parser.h

new file mode 100644 (file)

index 0000000..55cf005


--- /dev/null
+++ b/contrib/tsearch2/wordparser/parser.h
@@ -0,0 +1,11 @@
+#ifndef __PARSER_H__
+#define __PARSER_H__
+
+char      *token;
+int            tokenlen;
+int            tsearch2_yylex(void);
+void       start_parse_str(char *, int);
+void       start_parse_fh(FILE *, int);
+void       end_parse(void);
+
+#endif


diff --git a/contrib/tsearch2/wordparser/parser.l b/contrib/tsearch2/wordparser/parser.l

new file mode 100644 (file)

index 0000000..49824f5


--- /dev/null
+++ b/contrib/tsearch2/wordparser/parser.l
@@ -0,0 +1,346 @@
+%{
+#include "postgres.h"
+
+#include "deflex.h"
+#include "parser.h"
+#include "common.h"
+
+/* Avoid exit() on fatal scanner errors */
+#define fprintf(file, fmt, msg)  ts_error(ERROR, fmt, msg)
+
+/* postgres allocation function */
+#define free    pfree
+#define malloc  palloc
+#define realloc repalloc
+
+#ifdef strdup
+#undef strdup
+#endif
+#define strdup  pstrdup
+
+char *token = NULL;  /* pointer to token */
+char *s     = NULL;  /* to return WHOLE hyphenated-word */
+
+YY_BUFFER_STATE buf = NULL; /* buffer to parse; it need for parse from string */
+
+int lrlimit = -1;  /* for limiting read from filehandle ( -1 - unlimited read ) */
+int bytestoread = 0;   /* for limiting read from filehandle */
+
+/* redefine macro for read limited length */
+#define YY_INPUT(buf,result,max_size) \
+   if ( yy_current_buffer->yy_is_interactive ) { \
+                int c = '*', n; \
+                for ( n = 0; n < max_size && \
+                             (c = getc( tsearch2_yyin )) != EOF && c != '\n'; ++n ) \
+                        buf[n] = (char) c; \
+                if ( c == '\n' ) \
+                        buf[n++] = (char) c; \
+                if ( c == EOF && ferror( tsearch2_yyin ) ) \
+                        YY_FATAL_ERROR( "input in flex scanner failed" ); \
+                result = n; \
+        }  else { \
+       if ( lrlimit == 0 ) \
+           result=YY_NULL; \
+       else { \
+           if ( lrlimit>0 ) { \
+               bytestoread = ( lrlimit > max_size ) ? max_size : lrlimit; \
+               lrlimit -= bytestoread; \
+           } else \
+               bytestoread = max_size; \
+               if ( ((result = fread( buf, 1, bytestoread, tsearch2_yyin )) == 0) \
+                       && ferror( tsearch2_yyin ) ) \
+                       YY_FATAL_ERROR( "input in flex scanner failed" ); \
+       } \
+   }
+
+%}
+
+%option 8bit
+%option never-interactive
+%option nounput
+%option noyywrap
+
+/* parser's state for parsing hyphenated-word */
+%x DELIM  
+/* parser's state for parsing URL*/
+%x URL  
+%x SERVER  
+
+/* parser's state for parsing TAGS */
+%x INTAG
+%x QINTAG
+%x INCOMMENT
+%x INSCRIPT
+
+/* cyrillic koi8 char */
+CYRALNUM   [0-9\200-\377]
+CYRALPHA   [\200-\377]
+ALPHA      [a-zA-Z\200-\377]
+ALNUM      [0-9a-zA-Z\200-\377]
+
+
+HOSTNAME   ([-_[:alnum:]]+\.)+[[:alpha:]]+
+URI        [-_[:alnum:]/%,\.;=&?#]+
+
+%%
+
+"<"[Ss][Cc][Rr][Ii][Pp][Tt] { BEGIN INSCRIPT; }
+
+"" {
+   BEGIN INITIAL; 
+   *tsearch2_yytext=' '; *(tsearch2_yytext+1) = '\0'; 
+   token = tsearch2_yytext;
+   tokenlen = tsearch2_yyleng;
+   return SPACE;
+}
+
+""   { 
+   BEGIN INITIAL;
+   *tsearch2_yytext=' '; *(tsearch2_yytext+1) = '\0'; 
+   token = tsearch2_yytext;
+   tokenlen = tsearch2_yyleng;
+   return SPACE;
+}
+
+
+"<"[\![:alpha:]]   { BEGIN INTAG; }
+
+"
+
+"\""    { BEGIN QINTAG; }
+
+"\\\"" ;
+
+"\""   { BEGIN INTAG; }
+
+">" { 
+   BEGIN INITIAL;
+   token = tsearch2_yytext;
+   *tsearch2_yytext=' '; 
+   token = tsearch2_yytext;
+   tokenlen = 1;
+   return TAG;
+}
+
+.|\n  ;
+
+\&(quot|amp|nbsp|lt|gt)\;   {
+   token = tsearch2_yytext;
+   tokenlen = tsearch2_yyleng;
+   return HTMLENTITY;
+}
+
+\&\#[0-9][0-9]?[0-9]?\; {
+   token = tsearch2_yytext;
+   tokenlen = tsearch2_yyleng;
+   return HTMLENTITY;
+}
+ 
+[-_\.[:alnum:]]+@{HOSTNAME}  /* Emails */ { 
+   token = tsearch2_yytext; 
+   tokenlen = tsearch2_yyleng;
+   return EMAIL; 
+}
+
+[+-]?[0-9]+(\.[0-9]+)?[eEdD][+-]?[0-9]+  /* float */   { 
+   token = tsearch2_yytext; 
+   tokenlen = tsearch2_yyleng;
+   return SCIENTIFIC; 
+}
+
+[0-9]+\.[0-9]+\.[0-9\.]*[0-9] {
+   token = tsearch2_yytext;
+   tokenlen = tsearch2_yyleng;
+   return VERSIONNUMBER;
+}
+
+[+-]?[0-9]+\.[0-9]+ {
+   token = tsearch2_yytext;
+   tokenlen = tsearch2_yyleng;
+   return DECIMAL;
+}
+
+[+-][0-9]+ { 
+   token = tsearch2_yytext; 
+   tokenlen = tsearch2_yyleng;
+   return SIGNEDINT; 
+}
+
+[0-9]+ { 
+   token = tsearch2_yytext; 
+   tokenlen = tsearch2_yyleng;
+   return UNSIGNEDINT; 
+}
+
+http"://"        { 
+   BEGIN URL; 
+   token = tsearch2_yytext;
+   tokenlen = tsearch2_yyleng;
+   return HTTP;
+}
+
+ftp"://"        { 
+   BEGIN URL; 
+   token = tsearch2_yytext;
+   tokenlen = tsearch2_yyleng;
+   return HTTP;
+}
+
+{HOSTNAME}[/:]{URI} { 
+   BEGIN SERVER;
+   if (s) { free(s); s=NULL; } 
+   s = strdup( tsearch2_yytext ); 
+   tokenlen = tsearch2_yyleng;
+   yyless( 0 ); 
+   token = s;
+   return FURL;
+}
+
+{HOSTNAME} {
+   token = tsearch2_yytext; 
+   tokenlen = tsearch2_yyleng;
+   return HOST;
+}
+
+[/:]{URI}  {
+   token = tsearch2_yytext;
+   tokenlen = tsearch2_yyleng;
+   return URI;
+}
+
+[[:alnum:]\./_-]+"/"[[:alnum:]\./_-]+ {
+   token = tsearch2_yytext;
+   tokenlen = tsearch2_yyleng;
+   return FILEPATH;
+}
+
+({CYRALPHA}+-)+{CYRALPHA}+ /* composite-word */    {
+   BEGIN DELIM;
+   if (s) { free(s); s=NULL; } 
+   s = strdup( tsearch2_yytext );
+   tokenlen = tsearch2_yyleng;
+   yyless( 0 );
+   token = s;
+   return CYRHYPHENWORD;
+}
+
+([[:alpha:]]+-)+[[:alpha:]]+ /* composite-word */  {
+    BEGIN DELIM;
+   if (s) { free(s); s=NULL; } 
+   s = strdup( tsearch2_yytext );
+   tokenlen = tsearch2_yyleng;
+   yyless( 0 );
+   token = s;
+   return LATHYPHENWORD;
+}
+
+({ALNUM}+-)+{ALNUM}+ /* composite-word */  {
+   BEGIN DELIM;
+   if (s) { free(s); s=NULL; } 
+   s = strdup( tsearch2_yytext );
+   tokenlen = tsearch2_yyleng;
+   yyless( 0 );
+   token = s;
+   return HYPHENWORD;
+}
+
+[0-9]+\.[0-9]+\.[0-9\.]*[0-9] {
+   token = tsearch2_yytext;
+   tokenlen = tsearch2_yyleng;
+   return VERSIONNUMBER;
+}
+
+\+?[0-9]+\.[0-9]+ {
+   token = tsearch2_yytext;
+   tokenlen = tsearch2_yyleng;
+   return DECIMAL;
+}
+
+{CYRALPHA}+  /* one word in composite-word */   { 
+   token = tsearch2_yytext; 
+   tokenlen = tsearch2_yyleng;
+   return CYRPARTHYPHENWORD; 
+}
+
+[[:alpha:]]+  /* one word in composite-word */  { 
+   token = tsearch2_yytext; 
+   tokenlen = tsearch2_yyleng;
+   return LATPARTHYPHENWORD; 
+}
+
+{ALNUM}+  /* one word in composite-word */  { 
+   token = tsearch2_yytext; 
+   tokenlen = tsearch2_yyleng;
+   return PARTHYPHENWORD; 
+}
+
+-  { 
+   token = tsearch2_yytext;
+   tokenlen = tsearch2_yyleng;
+   return SPACE;
+}
+
+.|\n /* return in basic state */ {
+   BEGIN INITIAL;
+   yyless( 0 );
+}
+
+{CYRALPHA}+ /* normal word */  { 
+   token = tsearch2_yytext; 
+   tokenlen = tsearch2_yyleng;
+   return CYRWORD; 
+}
+
+[[:alpha:]]+ /* normal word */ { 
+   token = tsearch2_yytext; 
+   tokenlen = tsearch2_yyleng;
+   return LATWORD; 
+}
+
+{ALNUM}+ /* normal word */ { 
+   token = tsearch2_yytext; 
+   tokenlen = tsearch2_yyleng;
+   return UWORD; 
+}
+
+[ \r\n\t]+ {
+   token = tsearch2_yytext;
+   tokenlen = tsearch2_yyleng;
+   return SPACE;
+}
+
+. {
+   token = tsearch2_yytext;
+   tokenlen = tsearch2_yyleng;
+   return SPACE;
+} 
+
+%%
+
+/* clearing after parsing from string */
+void end_parse() {
+   if (s) { free(s); s=NULL; } 
+   tsearch2_yy_delete_buffer( buf );
+   buf = NULL;
+} 
+
+/* start parse from string */
+void start_parse_str(char* str, int limit) {
+   if (buf) end_parse();
+   buf = tsearch2_yy_scan_bytes( str, limit );
+   tsearch2_yy_switch_to_buffer( buf );
+   BEGIN INITIAL;
+}
+
+/* start parse from filehandle */
+void start_parse_fh( FILE* fh, int limit ) {
+   if (buf) end_parse();
+   lrlimit = ( limit ) ? limit : -1;
+   buf = tsearch2_yy_create_buffer( fh, YY_BUF_SIZE );
+   tsearch2_yy_switch_to_buffer( buf );
+   BEGIN INITIAL;
+}
+
+


diff --git a/contrib/tsearch2/wparser.c b/contrib/tsearch2/wparser.c

new file mode 100644 (file)

index 0000000..deff94c


--- /dev/null
+++ b/contrib/tsearch2/wparser.c
@@ -0,0 +1,529 @@
+/* 
+ * interface functions to parser 
+ * Teodor Sigaev 
+ */
+#include 
+#include 
+#include 
+#include 
+
+#include "postgres.h"
+#include "fmgr.h"
+#include "utils/array.h"
+#include "catalog/pg_type.h"
+#include "executor/spi.h"
+#include "funcapi.h"
+
+#include "wparser.h"
+#include "ts_cfg.h"
+#include "snmap.h"
+#include "common.h"
+
+/*********top interface**********/
+
+static void *plan_getparser=NULL;
+static Oid current_parser_id=InvalidOid;
+
+void
+init_prs(Oid id, WParserInfo *prs) {
+   Oid arg[1]={ OIDOID };
+   bool isnull;
+   Datum pars[1]={ ObjectIdGetDatum(id) };
+   int stat;
+
+   memset(prs,0,sizeof(WParserInfo));
+   SPI_connect();
+   if ( !plan_getparser ) {
+       plan_getparser = SPI_saveplan( SPI_prepare( "select prs_start, prs_nexttoken, prs_end, prs_lextype, prs_headline from pg_ts_parser where oid = $1" , 1, arg ) );
+       if ( !plan_getparser ) 
+           ts_error(ERROR, "SPI_prepare() failed");
+   }
+
+   stat = SPI_execp(plan_getparser, pars, " ", 1);
+   if ( stat < 0 )
+       ts_error (ERROR, "SPI_execp return %d", stat);
+   if ( SPI_processed > 0 ) {
+       Oid oid=InvalidOid;
+       oid=DatumGetObjectId( SPI_getbinval(SPI_tuptable->vals[0], SPI_tuptable->tupdesc, 1, &isnull) );
+       fmgr_info_cxt(oid, &(prs->start_info), TopMemoryContext);
+       oid=DatumGetObjectId( SPI_getbinval(SPI_tuptable->vals[0], SPI_tuptable->tupdesc, 2, &isnull) );
+       fmgr_info_cxt(oid, &(prs->getlexeme_info), TopMemoryContext);
+       oid=DatumGetObjectId( SPI_getbinval(SPI_tuptable->vals[0], SPI_tuptable->tupdesc, 3, &isnull) );
+       fmgr_info_cxt(oid, &(prs->end_info), TopMemoryContext);
+       prs->lextype=DatumGetObjectId( SPI_getbinval(SPI_tuptable->vals[0], SPI_tuptable->tupdesc, 4, &isnull) );
+       oid=DatumGetObjectId( SPI_getbinval(SPI_tuptable->vals[0], SPI_tuptable->tupdesc, 5, &isnull) );
+       fmgr_info_cxt(oid, &(prs->headline_info), TopMemoryContext);
+       prs->prs_id=id;
+   } else 
+       ts_error(ERROR, "No parser with id %d", id);
+   SPI_finish();
+}
+
+typedef struct {
+   WParserInfo *last_prs;
+   int     len;
+   int     reallen;
+   WParserInfo *list;
+   SNMap       name2id_map;
+} PrsList;
+
+static PrsList PList = {NULL,0,0,NULL,{0,0,NULL}};
+
+void    
+reset_prs(void) {
+   freeSNMap( &(PList.name2id_map) );
+   if ( PList.list )
+       free(PList.list);
+   memset(&PList,0,sizeof(PrsList));
+}
+
+static int
+compareprs(const void *a, const void *b) {
+   return ((WParserInfo*)a)->prs_id - ((WParserInfo*)b)->prs_id;
+}
+
+WParserInfo *
+findprs(Oid id) {
+   /* last used prs */
+   if ( PList.last_prs && PList.last_prs->prs_id==id )
+       return PList.last_prs;
+
+   /* already used prs */
+   if ( PList.len != 0 ) {
+       WParserInfo key;
+       key.prs_id=id;
+       PList.last_prs = bsearch(&key, PList.list, PList.len, sizeof(WParserInfo), compareprs);
+       if ( PList.last_prs != NULL )
+           return PList.last_prs;
+   }
+
+   /* last chance */
+   if ( PList.len==PList.reallen ) {
+       WParserInfo *tmp;
+       int reallen = ( PList.reallen ) ? 2*PList.reallen : 16;
+       tmp=(WParserInfo*)realloc(PList.list,sizeof(WParserInfo)*reallen);
+       if ( !tmp ) 
+           ts_error(ERROR,"No memory");
+       PList.reallen=reallen;
+       PList.list=tmp;
+   }
+   PList.last_prs=&(PList.list[PList.len]);
+   init_prs(id, PList.last_prs);
+   PList.len++;
+   qsort(PList.list, PList.len, sizeof(WParserInfo), compareprs);
+   return findprs(id); /* qsort changed order!! */;
+}
+
+static void *plan_name2id=NULL;
+
+Oid
+name2id_prs(text *name) {
+   Oid arg[1]={ TEXTOID };
+   bool isnull;
+   Datum pars[1]={ PointerGetDatum(name) };
+   int stat;
+   Oid id=findSNMap_t( &(PList.name2id_map), name );
+   
+   if ( id ) 
+       return id;
+   
+
+   SPI_connect();
+   if ( !plan_name2id ) {
+       plan_name2id = SPI_saveplan( SPI_prepare( "select oid from pg_ts_parser where prs_name = $1" , 1, arg ) );
+       if ( !plan_name2id ) 
+           ts_error(ERROR, "SPI_prepare() failed");
+   }
+
+   stat = SPI_execp(plan_name2id, pars, " ", 1);
+   if ( stat < 0 )
+       ts_error (ERROR, "SPI_execp return %d", stat);
+   if ( SPI_processed > 0 )
+       id=DatumGetObjectId( SPI_getbinval(SPI_tuptable->vals[0], SPI_tuptable->tupdesc, 1, &isnull) );
+   else 
+       ts_error(ERROR, "No parser '%s'", text2char(name));
+   SPI_finish();
+   addSNMap_t( &(PList.name2id_map), name, id );
+   return id;
+}
+
+
+/******sql-level interface******/
+typedef struct {
+   int     cur;
+   LexDescr    *list;
+} TypeStorage;
+
+static void
+setup_firstcall(FuncCallContext  *funcctx, Oid prsid) {
+   TupleDesc            tupdesc;
+   MemoryContext     oldcontext;
+   TypeStorage     *st;
+   WParserInfo *prs = findprs(prsid); 
+
+   oldcontext = MemoryContextSwitchTo(funcctx->multi_call_memory_ctx);
+
+   st=(TypeStorage*)palloc( sizeof(TypeStorage) );
+   st->cur=0;
+   st->list = (LexDescr*)DatumGetPointer(
+       OidFunctionCall1( prs->lextype, PointerGetDatum(prs->prs) )
+   );
+   funcctx->user_fctx = (void*)st;
+   tupdesc = RelationNameGetTupleDesc("tokentype");
+   funcctx->slot = TupleDescGetSlot(tupdesc);
+   funcctx->attinmeta = TupleDescGetAttInMetadata(tupdesc);
+   MemoryContextSwitchTo(oldcontext);
+}
+
+static Datum
+process_call(FuncCallContext  *funcctx) {
+   TypeStorage     *st;
+
+   st=(TypeStorage*)funcctx->user_fctx;
+   if (  st->list && st->list[st->cur].lexid ) {
+       Datum result;
+       char* values[3];
+       char    txtid[16];
+       HeapTuple    tuple;
+
+       values[0]=txtid;
+       sprintf(txtid,"%d",st->list[st->cur].lexid);
+       values[1]=st->list[st->cur].alias;
+       values[2]=st->list[st->cur].descr;
+
+       tuple = BuildTupleFromCStrings(funcctx->attinmeta, values);
+       result = TupleGetDatum(funcctx->slot, tuple);
+
+       pfree(values[1]);
+       pfree(values[2]);
+       st->cur++;
+       return result;
+   } else {
+       if ( st->list ) pfree(st->list);
+       pfree(st);
+   }
+   return (Datum)0;
+}
+
+PG_FUNCTION_INFO_V1(token_type);
+Datum token_type(PG_FUNCTION_ARGS);
+
+Datum
+token_type(PG_FUNCTION_ARGS) {
+   FuncCallContext  *funcctx;
+   Datum result;
+
+   if (SRF_IS_FIRSTCALL()) { 
+       funcctx = SRF_FIRSTCALL_INIT();
+       setup_firstcall(funcctx, PG_GETARG_OID(0) );
+   }
+
+   funcctx = SRF_PERCALL_SETUP();
+
+   if (  (result=process_call(funcctx)) != (Datum)0 )
+       SRF_RETURN_NEXT(funcctx, result);
+   SRF_RETURN_DONE(funcctx);
+}
+
+PG_FUNCTION_INFO_V1(token_type_byname);
+Datum token_type_byname(PG_FUNCTION_ARGS);
+Datum
+token_type_byname(PG_FUNCTION_ARGS) {
+   FuncCallContext  *funcctx;
+   Datum result;
+
+   if (SRF_IS_FIRSTCALL()) {
+       text *name = PG_GETARG_TEXT_P(0); 
+       funcctx = SRF_FIRSTCALL_INIT();
+       setup_firstcall(funcctx, name2id_prs( name ) );
+       PG_FREE_IF_COPY(name,0);
+   }
+
+   funcctx = SRF_PERCALL_SETUP();
+
+   if (  (result=process_call(funcctx)) != (Datum)0 )
+       SRF_RETURN_NEXT(funcctx, result);
+   SRF_RETURN_DONE(funcctx);
+}
+
+PG_FUNCTION_INFO_V1(token_type_current);
+Datum token_type_current(PG_FUNCTION_ARGS);
+Datum
+token_type_current(PG_FUNCTION_ARGS) {
+   FuncCallContext  *funcctx;
+   Datum result;
+
+   if (SRF_IS_FIRSTCALL()) {
+       funcctx = SRF_FIRSTCALL_INIT();
+       if ( current_parser_id==InvalidOid ) 
+           current_parser_id = name2id_prs( char2text("default") );
+       setup_firstcall(funcctx, current_parser_id );
+   }
+
+   funcctx = SRF_PERCALL_SETUP();
+
+   if (  (result=process_call(funcctx)) != (Datum)0 )
+       SRF_RETURN_NEXT(funcctx, result);
+   SRF_RETURN_DONE(funcctx);
+}
+
+
+PG_FUNCTION_INFO_V1(set_curprs);
+Datum set_curprs(PG_FUNCTION_ARGS);
+Datum
+set_curprs(PG_FUNCTION_ARGS) {
+        findprs(PG_GETARG_OID(0));
+        current_parser_id=PG_GETARG_OID(0);
+        PG_RETURN_VOID();
+}
+
+PG_FUNCTION_INFO_V1(set_curprs_byname);
+Datum set_curprs_byname(PG_FUNCTION_ARGS);
+Datum
+set_curprs_byname(PG_FUNCTION_ARGS) {
+        text *name=PG_GETARG_TEXT_P(0);
+    
+        DirectFunctionCall1(
+                set_curprs,
+                ObjectIdGetDatum( name2id_prs(name) )
+        );
+        PG_FREE_IF_COPY(name, 0);
+        PG_RETURN_VOID();
+}
+
+typedef struct {
+   int type;
+   char    *lexem;
+} LexemEntry;
+
+typedef struct {
+   int cur;
+   int len;
+   LexemEntry  *list;
+} PrsStorage;
+   
+
+static void
+prs_setup_firstcall(FuncCallContext  *funcctx, int prsid, text *txt) {
+   TupleDesc            tupdesc;
+   MemoryContext     oldcontext;
+   PrsStorage  *st;
+   WParserInfo *prs = findprs(prsid); 
+   char    *lex=NULL;
+   int     llen=0, type=0; 
+
+   oldcontext = MemoryContextSwitchTo(funcctx->multi_call_memory_ctx);
+
+   st=(PrsStorage*)palloc( sizeof(PrsStorage) );
+   st->cur=0;
+   st->len=16;
+   st->list=(LexemEntry*)palloc( sizeof(LexemEntry)*st->len );
+
+   prs->prs = (void*)DatumGetPointer(
+       FunctionCall2(
+           &(prs->start_info),
+           PointerGetDatum(VARDATA(txt)),
+           Int32GetDatum(VARSIZE(txt)-VARHDRSZ)
+       )
+   );
+
+   while( ( type=DatumGetInt32(FunctionCall3(
+           &(prs->getlexeme_info),
+           PointerGetDatum(prs->prs),
+           PointerGetDatum(&lex),
+           PointerGetDatum(&llen))) ) != 0 ) {
+
+       if ( st->cur>=st->len ) {
+           st->len=2*st->len;
+           st->list=(LexemEntry*)repalloc(st->list, sizeof(LexemEntry)*st->len);
+       }
+       st->list[st->cur].lexem = palloc(llen+1);
+       memcpy( st->list[st->cur].lexem, lex, llen);
+       st->list[st->cur].lexem[llen]='\0';
+       st->list[st->cur].type=type;
+       st->cur++;
+   }
+       
+   FunctionCall1(
+       &(prs->end_info),
+       PointerGetDatum(prs->prs)
+   );
+
+   st->len=st->cur;
+   st->cur=0;
+   
+   funcctx->user_fctx = (void*)st;
+   tupdesc = RelationNameGetTupleDesc("tokenout");
+   funcctx->slot = TupleDescGetSlot(tupdesc);
+   funcctx->attinmeta = TupleDescGetAttInMetadata(tupdesc);
+   MemoryContextSwitchTo(oldcontext);
+}
+
+static Datum
+prs_process_call(FuncCallContext  *funcctx) {
+   PrsStorage  *st;
+
+   st=(PrsStorage*)funcctx->user_fctx;
+   if (  st->cur < st->len ) {
+       Datum result;
+       char* values[2];
+       char    tid[16];
+       HeapTuple    tuple;
+
+       values[0]=tid;
+       sprintf(tid,"%d",st->list[st->cur].type);
+       values[1]=st->list[st->cur].lexem;
+       tuple = BuildTupleFromCStrings(funcctx->attinmeta, values);
+       result = TupleGetDatum(funcctx->slot, tuple);
+
+       pfree(values[1]);
+       st->cur++;
+       return result;
+   } else {
+       if ( st->list ) pfree(st->list);
+       pfree(st);
+   }
+   return (Datum)0;
+}
+
+           
+
+PG_FUNCTION_INFO_V1(parse);
+Datum parse(PG_FUNCTION_ARGS);
+Datum
+parse(PG_FUNCTION_ARGS) {
+   FuncCallContext  *funcctx;
+   Datum result;
+
+   if (SRF_IS_FIRSTCALL()) {
+       text *txt = PG_GETARG_TEXT_P(1); 
+       funcctx = SRF_FIRSTCALL_INIT();
+       prs_setup_firstcall(funcctx, PG_GETARG_OID(0),txt );
+       PG_FREE_IF_COPY(txt,1);
+   }
+
+   funcctx = SRF_PERCALL_SETUP();
+
+   if (  (result=prs_process_call(funcctx)) != (Datum)0 )
+       SRF_RETURN_NEXT(funcctx, result);
+   SRF_RETURN_DONE(funcctx);
+}
+
+PG_FUNCTION_INFO_V1(parse_byname);
+Datum parse_byname(PG_FUNCTION_ARGS);
+Datum
+parse_byname(PG_FUNCTION_ARGS) {
+   FuncCallContext  *funcctx;
+   Datum result;
+
+   if (SRF_IS_FIRSTCALL()) {
+       text *name = PG_GETARG_TEXT_P(0); 
+       text *txt = PG_GETARG_TEXT_P(1); 
+       funcctx = SRF_FIRSTCALL_INIT();
+       prs_setup_firstcall(funcctx, name2id_prs( name ),txt );
+       PG_FREE_IF_COPY(name,0);
+       PG_FREE_IF_COPY(txt,1);
+   }
+
+   funcctx = SRF_PERCALL_SETUP();
+
+   if (  (result=prs_process_call(funcctx)) != (Datum)0 )
+       SRF_RETURN_NEXT(funcctx, result);
+   SRF_RETURN_DONE(funcctx);
+}
+
+
+PG_FUNCTION_INFO_V1(parse_current);
+Datum parse_current(PG_FUNCTION_ARGS);
+Datum
+parse_current(PG_FUNCTION_ARGS) {
+   FuncCallContext  *funcctx;
+   Datum result;
+
+   if (SRF_IS_FIRSTCALL()) {
+       text *txt = PG_GETARG_TEXT_P(0); 
+       funcctx = SRF_FIRSTCALL_INIT();
+       if ( current_parser_id==InvalidOid ) 
+           current_parser_id = name2id_prs( char2text("default") );
+       prs_setup_firstcall(funcctx, current_parser_id,txt );
+       PG_FREE_IF_COPY(txt,0);
+   }
+
+   funcctx = SRF_PERCALL_SETUP();
+
+   if (  (result=prs_process_call(funcctx)) != (Datum)0 )
+       SRF_RETURN_NEXT(funcctx, result);
+   SRF_RETURN_DONE(funcctx);
+}
+
+PG_FUNCTION_INFO_V1(headline);
+Datum headline(PG_FUNCTION_ARGS);
+Datum
+headline(PG_FUNCTION_ARGS) {
+   TSCfgInfo *cfg=findcfg(PG_GETARG_OID(0));
+   text       *in = PG_GETARG_TEXT_P(1);
+   QUERYTYPE  *query = (QUERYTYPE *) DatumGetPointer(PG_DETOAST_DATUM(PG_GETARG_DATUM(2)));
+   text       *opt=( PG_NARGS()>3 && PG_GETARG_POINTER(3) ) ? PG_GETARG_TEXT_P(3) : NULL;
+   HLPRSTEXT   prs;
+   text *out;
+   WParserInfo *prsobj = findprs(cfg->prs_id);
+
+   memset(&prs,0,sizeof(HLPRSTEXT));
+   prs.lenwords = 32;
+   prs.words = (HLWORD *) palloc(sizeof(HLWORD) * prs.lenwords);
+   hlparsetext(cfg, &prs, query, VARDATA(in), VARSIZE(in) - VARHDRSZ);
+
+
+   FunctionCall3(
+       &(prsobj->headline_info),
+       PointerGetDatum(&prs),
+       PointerGetDatum(opt),
+       PointerGetDatum(query)
+   );
+
+   out = genhl(&prs);
+
+   PG_FREE_IF_COPY(in,1);
+   PG_FREE_IF_COPY(query,2);
+   if ( opt ) PG_FREE_IF_COPY(opt,3);
+   pfree(prs.words);
+   pfree(prs.startsel);
+   pfree(prs.stopsel);
+
+   PG_RETURN_POINTER(out);
+}
+
+
+PG_FUNCTION_INFO_V1(headline_byname);
+Datum headline_byname(PG_FUNCTION_ARGS);
+Datum
+headline_byname(PG_FUNCTION_ARGS) {
+   text *cfg=PG_GETARG_TEXT_P(0);
+
+   Datum out=DirectFunctionCall4(
+       headline,
+       ObjectIdGetDatum(name2id_cfg( cfg ) ),
+       PG_GETARG_DATUM(1),
+       PG_GETARG_DATUM(2),
+       ( PG_NARGS()>3 ) ? PG_GETARG_DATUM(3) : PointerGetDatum(NULL)
+   );
+
+   PG_FREE_IF_COPY(cfg,0);
+   PG_RETURN_DATUM(out);   
+}
+
+PG_FUNCTION_INFO_V1(headline_current);
+Datum headline_current(PG_FUNCTION_ARGS);
+Datum
+headline_current(PG_FUNCTION_ARGS) {
+   PG_RETURN_DATUM(DirectFunctionCall4(
+       headline,
+       ObjectIdGetDatum(get_currcfg()),
+       PG_GETARG_DATUM(0),
+       PG_GETARG_DATUM(1),
+       ( PG_NARGS()>2 ) ? PG_GETARG_DATUM(2) : PointerGetDatum(NULL)
+   ));
+}
+
+
+


diff --git a/contrib/tsearch2/wparser.h b/contrib/tsearch2/wparser.h

new file mode 100644 (file)

index 0000000..a8afc56


--- /dev/null
+++ b/contrib/tsearch2/wparser.h
@@ -0,0 +1,28 @@
+#ifndef __WPARSER_H__
+#define __WPARSER_H__
+#include "postgres.h"
+#include "fmgr.h"
+
+typedef struct {
+   Oid prs_id;
+   FmgrInfo start_info;
+   FmgrInfo getlexeme_info;
+   FmgrInfo end_info;
+   FmgrInfo headline_info;
+   Oid lextype;
+   void *prs;
+} WParserInfo;
+
+void init_prs(Oid id, WParserInfo *prs);
+WParserInfo* findprs(Oid id);
+Oid name2id_prs(text *name);
+void   reset_prs(void);
+
+
+typedef struct {
+   int lexid;
+   char    *alias;
+   char    *descr;
+} LexDescr;
+
+#endif


diff --git a/contrib/tsearch2/wparser_def.c b/contrib/tsearch2/wparser_def.c

new file mode 100644 (file)

index 0000000..eec8b03


--- /dev/null
+++ b/contrib/tsearch2/wparser_def.c
@@ -0,0 +1,291 @@
+/* 
+ * default word parser 
+ * Teodor Sigaev 
+ */
+#include 
+#include 
+#include 
+
+#include "postgres.h"
+#include "utils/builtins.h"
+
+#include "dict.h"
+#include "wparser.h"
+#include "common.h"
+#include "ts_cfg.h"
+#include "wordparser/parser.h"
+#include "wordparser/deflex.h"
+
+PG_FUNCTION_INFO_V1(prsd_lextype);
+Datum prsd_lextype(PG_FUNCTION_ARGS);
+
+Datum 
+prsd_lextype(PG_FUNCTION_ARGS) {
+   LexDescr *descr=(LexDescr*)palloc(sizeof(LexDescr)*(LASTNUM+1));
+   int i;
+
+   for(i=1;i<=LASTNUM;i++) {
+       descr[i-1].lexid = i;
+       descr[i-1].alias = pstrdup(tok_alias[i]);
+       descr[i-1].descr = pstrdup(lex_descr[i]);
+   }
+   
+   descr[LASTNUM].lexid=0;
+       
+   PG_RETURN_POINTER(descr);
+}
+
+PG_FUNCTION_INFO_V1(prsd_start);
+Datum prsd_start(PG_FUNCTION_ARGS);
+Datum 
+prsd_start(PG_FUNCTION_ARGS) {
+   start_parse_str( (char*)PG_GETARG_POINTER(0), PG_GETARG_INT32(1) );
+   PG_RETURN_POINTER(NULL);
+}
+
+PG_FUNCTION_INFO_V1(prsd_getlexeme);
+Datum prsd_getlexeme(PG_FUNCTION_ARGS);
+Datum 
+prsd_getlexeme(PG_FUNCTION_ARGS) {
+   /* ParserState *p=(ParserState*)PG_GETARG_POINTER(0); */
+   char **t=(char**)PG_GETARG_POINTER(1); 
+   int *tlen=(int*)PG_GETARG_POINTER(2);
+   int  type=tsearch2_yylex();
+
+   *t = token;
+   *tlen = tokenlen;
+   PG_RETURN_INT32(type);
+}
+
+PG_FUNCTION_INFO_V1(prsd_end);
+Datum prsd_end(PG_FUNCTION_ARGS);
+Datum 
+prsd_end(PG_FUNCTION_ARGS) {
+   /* ParserState *p=(ParserState*)PG_GETARG_POINTER(0); */
+   end_parse();
+   PG_RETURN_VOID();
+}
+
+#define LEAVETOKEN(x)  ( (x)==12 )
+#define COMPLEXTOKEN(x)    ( (x)==5 || (x)==15 || (x)==16 || (x)==17 )
+#define ENDPUNCTOKEN(x)    ( (x)==12 )
+
+
+#define IDIGNORE(x) ( (x)==13 || (x)==14 || (x)==12 || (x)==23 )
+#define HLIDIGNORE(x) ( (x)==5 || (x)==13 || (x)==15 || (x)==16 || (x)==17 )
+#define NONWORDTOKEN(x)    ( (x)==12 || HLIDIGNORE(x) )
+#define NOENDTOKEN(x)  ( NONWORDTOKEN(x) || (x)==7 || (x)==8 || (x)==20 || (x)==21 || (x)==22 || IDIGNORE(x) )
+
+typedef struct {
+   HLWORD  *words;
+   int len;
+} hlCheck;
+
+static bool
+checkcondition_HL(void *checkval, ITEM *val) {
+   int i;
+   for(i=0;i<((hlCheck*)checkval)->len;i++) {
+       if ( ((hlCheck*)checkval)->words[i].item==val )
+           return true;
+   }
+   return false;
+}
+
+
+static bool
+hlCover(HLPRSTEXT *prs, QUERYTYPE *query, int *p, int *q) {
+   int i,j;
+   ITEM    *item=GETQUERY(query);
+   int pos=*p;
+   *q=0;
+   *p=0x7fffffff;
+
+   for(j=0;jsize;j++) {
+       if ( item->type != VAL ) {
+           item++;
+           continue;
+       }
+       for(i=pos;icurwords;i++) {
+           if ( prs->words[i].item == item ) {
+               if ( i>*q) 
+                   *q = i;
+               break;
+           }
+       }
+       item++;
+   }
+
+   if ( *q==0 )
+       return false;
+
+   item=GETQUERY(query);
+   for(j=0;jsize;j++) {
+       if ( item->type != VAL ) {
+           item++;
+           continue;
+       }
+       for(i=*q;i>=pos;i--) {
+           if ( prs->words[i].item == item ) {
+               if ( i<*p )
+                   *p=i;
+               break;
+           }
+       }
+       item++;
+   }   
+
+   if ( *p<=*q ) {
+       hlCheck ch={ &(prs->words[*p]), *q-*p+1 };
+       if ( TS_execute(GETQUERY(query), &ch, false, checkcondition_HL) ) { 
+           return true;
+       } else {
+           (*p)++;
+           return hlCover(prs,query,p,q);
+       }
+   }
+
+   return false;
+}
+
+PG_FUNCTION_INFO_V1(prsd_headline);
+Datum prsd_headline(PG_FUNCTION_ARGS);
+Datum 
+prsd_headline(PG_FUNCTION_ARGS) {
+   HLPRSTEXT   *prs=(HLPRSTEXT*)PG_GETARG_POINTER(0);
+   text    *opt=(text*)PG_GETARG_POINTER(1); /* can't be toasted */
+   QUERYTYPE   *query=(QUERYTYPE*)PG_GETARG_POINTER(2); /* can't be toasted */
+   /* from opt + start and and tag */
+   int min_words=15;   
+   int max_words=35;   
+   int shortword=3;    
+
+   int p=0,q=0;
+   int bestb=-1,beste=-1;
+   int bestlen=-1;
+   int pose=0, poslen, curlen;
+
+   int i;
+
+   /*config*/
+   prs->startsel=NULL;
+   prs->stopsel=NULL;
+   if ( opt ) {
+       Map *map,*mptr;
+       
+       parse_cfgdict(opt,&map);
+       mptr=map;
+
+       while(mptr && mptr->key) {
+           if ( strcasecmp(mptr->key,"MaxWords")==0 )
+               max_words=pg_atoi(mptr->value,4,1);
+           else if ( strcasecmp(mptr->key,"MinWords")==0 )
+               min_words=pg_atoi(mptr->value,4,1);
+           else if ( strcasecmp(mptr->key,"ShortWord")==0 )
+               shortword=pg_atoi(mptr->value,4,1);
+           else if ( strcasecmp(mptr->key,"StartSel")==0 )
+               prs->startsel=pstrdup(mptr->value);
+           else if ( strcasecmp(mptr->key,"StopSel")==0 )
+               prs->stopsel=pstrdup(mptr->value);
+               
+           pfree(mptr->key);
+           pfree(mptr->value);
+
+           mptr++;
+       }
+       pfree(map);
+
+       if ( min_words >= max_words )
+           elog(ERROR,"Must be MinWords < MaxWords");
+       if ( min_words<=0 )
+           elog(ERROR,"Must be MinWords > 0");
+       if ( shortword<0 )
+           elog(ERROR,"Must be ShortWord >= 0");
+   }
+
+   while( hlCover(prs,query,&p,&q) ) {
+       /* find cover len in words */
+       curlen=0;
+       poslen=0;
+       for(i=p;i<=q && curlen < max_words ; i++) {
+           if ( !NONWORDTOKEN(prs->words[i].type) ) 
+               curlen++;
+           if ( prs->words[i].item && !prs->words[i].repeated )
+               poslen++; 
+           pose=i;
+       }
+
+       if ( poslenwords[beste].type) || prs->words[beste].len <= shortword) ) { 
+           /* best already finded, so try one more cover */
+           p++;
+           continue;
+       }
+
+       if ( curlen < max_words ) { /* find good end */
+           for(i=i-1 ;icurwords && curlen
+               if ( i!=q ) {
+                   if ( !NONWORDTOKEN(prs->words[i].type) ) 
+                       curlen++;
+                   if ( prs->words[i].item && !prs->words[i].repeated )
+                       poslen++;
+               }
+               pose=i;
+               if ( NOENDTOKEN(prs->words[i].type) || prs->words[i].len <= shortword ) 
+                   continue;
+               if ( curlen>=min_words )    
+                   break;
+           }
+       } else { /* shorter cover :((( */
+           for(;curlen>min_words;i--) {
+               if ( !NONWORDTOKEN(prs->words[i].type) ) 
+                   curlen--;
+               if ( prs->words[i].item && !prs->words[i].repeated )
+                   poslen--;
+               pose=i;
+               if ( NOENDTOKEN(prs->words[i].type) || prs->words[i].len <= shortword ) 
+                   continue;
+               break;
+           }
+       }
+
+       if ( bestlen <0 || (poslen>bestlen && !(NOENDTOKEN(prs->words[pose].type) || prs->words[pose].len <= shortword)) || 
+               ( bestlen>=0 && !(NOENDTOKEN(prs->words[pose].type)  || prs->words[pose].len <= shortword) && 
+                   (NOENDTOKEN(prs->words[beste].type) || prs->words[beste].len <= shortword) ) ) {
+           bestb=p; beste=pose;
+           bestlen=poslen;
+       } 
+
+       p++;
+   }
+
+   if ( bestlen<0 ) {
+       curlen=0;
+       poslen=0;
+       for(i=0;icurwords && curlen
+           if ( !NONWORDTOKEN(prs->words[i].type) ) 
+               curlen++;
+           pose=i;
+       }
+       bestb=0; beste=pose;
+   }
+
+   for(i=bestb;i<=beste;i++) {
+       if ( prs->words[i].item )
+           prs->words[i].selected=1;
+       if ( prs->words[i].repeated )
+           prs->words[i].skip=1;
+       if ( HLIDIGNORE(prs->words[i].type) )
+           prs->words[i].replace=1;
+
+       prs->words[i].in=1;
+   }
+
+   if (!prs->startsel)
+       prs->startsel=pstrdup("");

+   if (!prs->stopsel)
+       prs->stopsel=pstrdup("");
+        prs->startsellen=strlen(prs->startsel);
+   prs->stopsellen=strlen(prs->stopsel);
+
+   PG_RETURN_POINTER(prs);
+}
+




This is the main PostgreSQL git repository.
RSS
Atom}}}}}}}
+    "http://www.sai.msu.su/~megera/postgres/gist/tsearch/V2/">[http://www.sai.msu.su/~megera/postgres/gist/tsearch/V2/]
+    (check Development History for latest stable version !)
+    
+        tar -zxvf tsearch-v2.tar.gz
+        mv tsearch2 PGSQL_SRC/contrib/
+        cd PGSQL_SRC/contrib/tsearch2
+
+
+    If you are installing from PostgreSQL version 7.4 or higher,
+    you can skip those steps and just change to the
+    contrib/tsearch2 directory in the source tree and continue from
+    there.
+
+    Then continue with the regular building and installation
+    process
+    
+        gmake
+        gmake install
+        gmake installcheck
+
+
+    That is pretty much all you have to do, unless of course you
+    get errors. However if you get those, you better go check with
+    the mailing lists over at 
+    "http://www.postgresql.org">http://www.postgresql.org or
+    
+    "http://openfts.sourceforge.net/">http://openfts.sourceforge.net/
+    since its never failed for me.
+
+    The directory in the contib/ and the directory from the
+    archive is called tsearch2. Tsearch2 is completely incompatible
+    with the previous version of tsearch. This means that both
+    versions can be installed into a single database, and migration
+    the new version may be much easier.
+
+    NOTE: the previous version of tsearch found in the
+    contrib/tsearch directory is depricated. ALthough it is still
+    available and included within PostgreSQL version 7.4. It will
+    be removed in version 7.5.
+
+    ADDING TSEARCH2 FUNCTIONALITY TO A DATABASE
+
+    We should create a database to use as an example for the
+    remainder of this file. We can call the database "ftstest". You
+    can create it from the command line like this:
+    
+        #createdb ftstest
+
+
+    If you thought installation was easy, this next bit is even
+    easier. Change to the PGSQL_SRC/contrib/tsearch2 directory and
+    type:
+    
+        psql ftstest < tsearch2.sql
+
+
+    The file "tsearch2.sql" holds all the wonderful little
+    goodies you need to do full text indexing. It defines numerous
+    functions and operators, and creates the needed tables in the
+    database. There will be 4 new tables created after running the
+    tsearch2.sql file : pg_ts_dict, pg_ts_parser, pg_ts_cfg,
+    pg_ts_cfgmap are added.
+
+    You can check out the tables if you like:
+    
+        #psql ftstest
+        ftstest=# \d
+                    List of relations
+         Schema |     Name     | Type  |  Owner
+        --------+--------------+-------+----------
+         public | pg_ts_cfg    | table | kopciuch
+         public | pg_ts_cfgmap | table | kopciuch
+         public | pg_ts_dict   | table | kopciuch
+         public | pg_ts_parser | table | kopciuch
+        (4 rows)
+
+
+    TYPES AND FUNCTIONS PROVIDED BY TSEARCH2
+
+    The first thing we can do is try out some of the types that
+    are provided for us. Lets look at the tsvector type provided
+    for us:
+    
+        SELECT 'Our first string used today'::tsvector;
+                        tsvector
+        ---------------------------------------
+         'Our' 'used' 'first' 'today' 'string'
+        (1 row)
+
+
+    The results are the words used within our string. Notice
+    they are not in any particular order. The tsvector type returns
+    a string of space separated words.
+    
+        SELECT 'Our first string used today first string'::tsvector;
+                            tsvector
+        -----------------------------------------------
+         'Our' 'used' 'again' 'first' 'today' 'string'
+        (1 row)
+
+
+    Notice the results string has each unique word ('first' and
+    'string' only appear once in the tsvector value). Which of
+    course makes sense if you are searching the full text ... you
+    only need to know each unique word in the text.
+
+    Those examples were just casting a text field to that of
+    type tsvector. Lets check out one of the new functions created
+    by the tsearch2 module.
+
+    The function to_tsvector has 3 possible signatures:
+    
+        to_tsvector(oid, text);
+        to_tsvector(text, text);
+        to_tsvector(text);
+
+
+    We will use the second method using two text fields. The
+    overloaded methods provide us with a way to specifiy the way
+    the searchable text is broken up into words (Stemming process).
+    Right now we will specify the 'default' configuration. See the
+    section on TSEARCH2 CONFIGURATION to learn more about this.
+    
+        SELECT to_tsvector('default',
+                           'Our first string used today first string');
+                        to_tsvector
+        --------------------------------------------
+         'use':4 'first':2,6 'today':5 'string':3,7
+        (1 row)
+
+
+    The result returned from this function is of type tsvector.
+    The results came about by this reasoning: All of the words in
+    the text passed in are stemmed, or not used because they are
+    stop words defined in our configuration. Each lower case
+    morphed word is returned with all of the positons in the
+    text.
+
+    In this case the word "Our" is a stop word in the default
+    configuration. That means it will not be included in the
+    result. The word "first" is found at positions 2 and 6
+    (although "Our" is a stop word, it's position is maintained).
+    The word(s) positioning is maintained exactly as in the
+    original string. The word "used" is morphed to the word "use"
+    based on the default configuration for word stemming, and is
+    found at position 4. The rest of the results follow the same
+    logic. Just a reminder again ... the order of the 'word'
+    position in the output is not in any kind of order. (ie 'use':4
+    appears first)
+
+    If you want to view the output of the tsvector fields
+    without their positions, you can do so with the function
+    "strip(tsvector)".
+    
+        SELECT strip(to_tsvector('default',
+                     'Our first string used today first string'));
+                    strip
+        --------------------------------
+         'use' 'first' 'today' 'string'
+
+
+    If you wish to know the number of unique words returned in
+    the tsvector you can do so by using the function
+    "length(tsvector)"
+    
+        SELECT length(to_tsvector('default',
+                      'Our first string used today first string'));
+         length
+        --------
+              4
+        (1 row)
+
+
+    Lets take a look at the function to_tsquery. It also has 3
+    signatures which follow the same rational as the to_tsvector
+    function:
+    
+        to_tsquery(oid, text);
+        to_tsquery(text, text);
+        to_tsquery(text);
+
+
+    Lets try using the function with a single word :
+    
+        SELECT to_tsquery('default', 'word');
+         to_tsquery
+        -----------
+         'word'
+         (1 row)
+
+
+    I call the function the same way I would a to_tsvector
+    function, specifying the 'default' configuration for morphing,
+    and the result is the stemmed output 'word'.
+
+    Lets attempt to use the function with a string of multiple
+    words:
+    
+        SELECT to_tsquery('default', 'this is many words');
+        ERROR:  Syntax error
+
+
+    The function can not accept a space separated string. The
+    intention of the to_tsquery function is to return a type of
+    "tsquery" used for searching a tsvector field. What we need to
+    do is search for one to many words with some kind of logic (for
+    now simple boolean).
+    
+        SELECT to_tsquery('default', 'searching|sentence');
+              to_tsquery
+        ----------------------
+         'search' | 'sentenc'
+        (1 row)
+
+
+    Notice that the words are separated by the boolean logic
+    "OR", the text could contain boolean operators &,|,!,()
+    with their usual meaning.
+
+    You can not use words defined as being a stop word in your
+    configuration. The function will not fail ... you will just get
+    no result, and a NOTICE like this:
+    
+        SELECT to_tsquery('default', 'a|is&not|!the');
+        NOTICE:  Query contains only stopword(s)
+                 or doesn't contain lexem(s), ignored
+         to_tsquery
+        -----------
+        (1 row)
+
+
+    That is a beginning to using the types, and functions
+    defined in the tsearch2 module. There are numerous more
+    functions that I have not touched on. You can read through the
+    tsearch2.sql file built when compiling to get more familiar
+    with what is included.
+
+    INDEXING FIELDS IN A TABLE
+
+    The next stage is to add a full text index to an existing
+    table. In this example we already have a table defined as
+    follows:
+    
+        CREATE TABLE tblMessages
+        (
+                intIndex        int4,
+                strTopic        varchar(100),
+                strMessage      text
+        );
+
+
+    We are assuming there are several rows with some kind of
+    data in them. Any data will do, just do several inserts with
+    test strings for a topic, and a message. here is some test data
+    I inserted. (yes I know it's completely useless stuff ;-) but
+    it will serve our purpose right now).
+    
+        INSERT INTO tblMessages
+               VALUES ('1', 'Testing Topic', 'Testing message data input');
+        INSERT INTO tblMessages
+               VALUES ('2', 'Movie', 'Breakfast at Tiffany\'s');
+        INSERT INTO tblMessages
+               VALUES ('3', 'Famous Author', 'Stephen King');
+        INSERT INTO tblMessages
+               VALUES ('4', 'Political Topic',
+                            'Nelson Mandella is released from prison');
+        INSERT INTO tblMessages
+               VALUES ('5', 'Nursery rhyme phrase',
+                            'Little jack horner sat in a corner');
+        INSERT INTO tblMessages
+               VALUES ('6', 'Gettysburg address quotation',
+                            'Four score and seven years ago'
+                            ' our fathers brought forth on this'
+                            ' continent a new nation, conceived in'
+                            ' liberty and dedicated to the proposition'
+                            ' that all men are created equal');
+        INSERT INTO tblMessages
+               VALUES ('7', 'Classic Rock Bands',
+                            'Led Zeppelin Grateful Dead and The Sex Pistols');
+        INSERT INTO tblMessages
+               VALUES ('8', 'My birth address',
+                            '18 Sommervile road, Regina, Saskatchewan');
+        INSERT INTO tblMessages
+               VALUES ('9', 'Joke', 'knock knock : who\'s there?'
+                                    ' I will not finish this joke');
+        INSERT INTO tblMessages
+               VALUES ('10', 'Computer information',
+                             'My computer is a pentium III 400 mHz'
+                             ' with 192 megabytes of RAM');
+
+
+    The next stage is to create a special text index which we
+    will use for FTI, so we can search our table of messages for
+    words or a phrase. We do this using the SQL command:
+    
+        ALTER TABLE tblMessages ADD idxFTI tsvector;
+
+
+    Note that unlike traditional indexes, this is actually a new
+    field in the same table, which is then used (through the magic
+    of the tsearch2 operators and functions) by a special index we
+    will create in a moment.
+
+    The general rule for the initial insertion of data will
+    follow four steps:
+    
+    1. update table
+    2. vacuum full analyze
+    3. create index
+    4. vacuum full analyze
+
+
+    The data can be updated into the table, the vacuum full
+    analyze will reclaim unused space. The index can be created on
+    the table after the data has been inserted. Having the index
+    created prior to the update will slow down the process. It can
+    be done in that manner, this way is just more efficient. After
+    the index has been created on the table, vacuum full analyze is
+    run again to update postgres's statistics (ie having the index
+    take effect).
+    
+        UPDATE tblMessages SET idxFTI=to_tsvector('default', strMessage);
+        VACUUM FULL ANALYZE;
+
+
+    Note that this only inserts the field strMessage as a
+    tsvector, so if you want to also add strTopic to the
+    information stored, you should instead do the following, which
+    effectively concatenates the two fields into one before being
+    inserted into the table:
+    
+        UPDATE tblMessages
+            SET idxFTI=to_tsvector('default',coalesce(strTopic,'') ||' '|| coalesce(strMessage,''));
+        VACUUM FULL ANALYZE;
+
+
+    Using the coalesce function makes sure this

+    concatenation also works with NULL fields.
+
+    We need to create the index on the column idxFTI. Keep in
+    mind that the database will update the index when some action
+    is taken. In this case we _need_ the index (The whole point of
+    Full Text INDEXINGi ;-)), so don't worry about any indexing
+    overhead. We will create an index based on the gist function.
+    GiST is an index structure for Generalized Search Tree.
+    
+        CREATE INDEX idxFTI_idx ON tblMessages USING gist(idxFTI);
+        VACUUM FULL ANALYZE;
+
+
+    After you have converted all of your data and indexed the
+    column, you can select some rows to see what actually happened.
+    I will not display output here but you can play around
+    yourselves and see what happened.
+
+    The last thing to do is set up a trigger so every time a row
+    in this table is changed, the text index is automatically
+    updated. This is easily done using:
+    
+        CREATE TRIGGER tsvectorupdate BEFORE UPDATE OR INSERT ON tblMessages
+            FOR EACH ROW EXECUTE PROCEDURE tsearch2(idxFTI, strMessage);
+
+
+    Or if you are indexing both strMessage and strTopic you
+    should instead do:
+    
+        CREATE TRIGGER tsvectorupdate BEFORE UPDATE OR INSERT ON tblMessages
+            FOR EACH ROW EXECUTE PROCEDURE
+                tsearch2(idxFTI, strTopic, strMessage);
+
+
+    Before you ask, the tsearch2 function accepts multiple
+    fields as arguments so there is no need to concatenate the two
+    into one like we did before.
+
+    If you want to do something specific with columns, you may
+    write your very own trigger function using plpgsql or other
+    procedural languages (but not SQL, unfortunately) and use it
+    instead of tsearch2 trigger.
+
+    You could however call other stored procedures from within
+    the tsearch2 function. Lets say we want to create a function to
+    remove certain characters (like the @ symbol from all
+    text).
+    
+       CREATE FUNCTION dropatsymbol(text) 
+                     RETURNS text AS 'select replace($1, \'@\', \' \');' LANGUAGE SQL;
+
+
+    Now we can use this function within the tsearch2 function on
+    the trigger.
+    
+      DROP TRIGGER tsvectorupdate ON tblmessages;
+        CREATE TRIGGER tsvectorupdate BEFORE UPDATE OR INSERT ON tblMessages
+            FOR EACH ROW EXECUTE PROCEDURE tsearch2(idxFTI, dropatsymbol, strMessage);
+        INSERT INTO tblmessages VALUES (69, 'Attempt for dropatsymbol', '[email protected]');
+
+
+    If at this point you receive an error stating: ERROR: Can't
+    find tsearch config by locale
+
+    Do not worry. You have done nothing wrong. And tsearch2 is
+    not broken. All that has happened here is that the
+    configuration is setup to use a configuration based on the
+    locale of the server. All you have to do is change your default
+    configuration, or add a new one for your specific locale. See
+    the section on TSEARCH2 CONFIGURATION.
+    
+   SELECT * FROM tblmessages WHERE intindex = 69;
+
+         intindex |         strtopic         |  strmessage   |        idxfti
+        ----------+--------------------------+---------------+-----------------------   
+                69 | Attempt for dropatsymbol | [email protected] | 'test':1 'test.com':2
+        (1 row)
+Notice that the string content was passed throught the stored
+procedure dropatsymbol. The '@' character was replaced with a
+single space ... and the output from the procedure was then stored
+in the tsvector column.
+
+    This could be useful for removing other characters from
+    indexed text, or any kind of preprocessing needed to be done on
+    the text prior to insertion into the index.
+
+    QUERYING A TABLE
+
+    There are some examples in the README.tsearch2 file for
+    querying a table. One major difference between tsearch and
+    tsearch2 is the operator ## is no longer available. Only the
+    operator @@ is defined, using the types tsvector on one side
+    and tsquery on the other side.
+
+    Lets search the indexed data for the word "Test". I indexed
+    based on the the concatenation of the strTopic, and the
+    strMessage:
+    
+        SELECT intindex, strtopic FROM tblmessages
+                                  WHERE idxfti @@ 'test'::tsquery;
+         intindex |   strtopic
+        ----------+---------------
+                1 | Testing Topic
+        (1 row)
+
+
+    The only result that matched was the row with a topic
+    "Testing Topic". Notice that the word I search for was all
+    lowercase. Let's see what happens when I query for uppercase
+    "Test".
+    
+        SELECT intindex, strtopic FROM tblmessages
+                                  WHERE idxfti @@ 'Test'::tsquery;
+         intindex | strtopic
+        ----------+----------
+        (0 rows)
+
+
+    We get zero rows returned. The reason is because when the
+    text was inserted, it was morphed to my default configuration
+    (because of the call to to_tsvector in the UPDATE statement).
+    If there was no morphing done, and the tsvector field(s)
+    contained the word 'Text', a match would have been found.
+
+    Most likely the best way to query the field is to use the
+    to_tsquery function on the right hand side of the @@ operator
+    like this:
+    
+        SELECT intindex, strtopic FROM tblmessages
+               WHERE idxfti @@ to_tsquery('default', 'Test | Zeppelin');
+         intindex |      strtopic
+        ----------+--------------------
+                1 | Testing Topic
+                7 | Classic Rock Bands
+        (2 rows)
+
+
+    That query searched for all instances of "Test" OR
+    "Zeppelin". It returned two rows: the "Testing Topic" row, and
+    the "Classic Rock Bands" row. The to_tsquery function performed
+    the correct morphology upon the parameters, and searched the
+    tsvector field appropriately.
+
+    The last example here relates to searching for a phrase, for
+    example "minority report". This poses a problem with regard to
+    tsearch2, as it doesn't index phrases, only words. But there is
+    a way around which doesn't appear to have a significant impact
+    on query time, and that is to use a query such as the
+    following:
+    
+        SELECT intindex, strTopic FROM tblmessages
+                WHERE idxfti @@ to_tsquery('default', 'gettysburg & address')
+                AND strMessage ~* '.*men are created equal.*';
+         intindex |           strtopic
+        ----------+------------------------------
+                6 | Gettysburg address quotation
+        (1 row)
+        SELECT intindex, strTopic FROM tblmessages
+                WHERE idxfti @@ to_tsquery('default', 'gettysburg & address')
+                AND strMessage ~* '.*something that does not exist.*';
+         intindex | strtopic
+        ----------+----------
+        (0 rows)
+
+
+    Of course if your indexing both strTopic and strMessage, and
+    want to search for this phrase on both, then you will have to
+    get out the brackets and extend this query a little more.
+
+    TSEARCH2 CONFIGURATION
+
+    Some words such as "and", "the", and "who" are automatically
+    not indexed, since they belong to a pre-existing dictionary of
+    "Stop Words" which tsearch2 does not perform indexing on. If
+    someone needs to search for "The Who" in your database, they
+    are going to have a tough time coming up with any results,
+    since both are ignored in the indexes. But there is a
+    solution.
+
+    Lets say we want to add a word into the stop word list for
+    english stemming. We could edit the file
+    :'/usr/local/pgsql/share/english.stop' and add a word to the
+    list. I edited mine to exclude my name from indexing:
+    
+    - Edit /usr/local/pgsql/share/english.stop
+    - Add 'andy' to the list
+    - Save the file.
+
+
+    When you connect to the database, the dict_init procedure is
+    run during initialization. And in my configuration it will read
+    the stop words from the file I just edited. If you were
+    connected to the DB while editing the stop words, you will need
+    to end the current session and re-connect. When you re-connect
+    to the database, 'andy' is no longer indexed:
+    
+        SELECT to_tsvector('default', 'Andy');
+         to_tsvector
+        ------------
+        (1 row)
+
+
+    Originally I would get the result :
+    
+        SELECT to_tsvector('default', 'Andy');
+         to_tsvector
+        ------------
+         'andi':1
+        (1 row)
+
+
+    But since I added it as a stop word, it would be ingnored on
+    the indexing. The stop word added was used in the dictionary
+    "en_stem". If I were to use a different configuration such as
+    'simple', the results would be different. There are no stop
+    words for the simple dictionary. It will just convert to lower
+    case, and index every unique word.
+    
+        SELECT to_tsvector('simple', 'Andy andy The the in out');
+                     to_tsvector
+        -------------------------------------
+         'in':5 'out':6 'the':3,4 'andy':1,2
+        (1 row)
+
+
+    All this talk about which configuration to use is leading us
+    into the actual configuration of tsearch2. In the examples in
+    this document the configuration has always been specified when
+    using the tsearch2 functions:
+    
+        SELECT to_tsvector('default', 'Testing the default config');
+        SELECT to_tsvector('simple', 'Example of simple Config');
+
+
+    The pg_ts_cfg table holds each configuration you can use
+    with the tsearch2 functions. As you can see the ts_name column
+    contains both the 'default' configurations based on the 'C'
+    locale. And the 'simple' configuration which is not based on
+    any locale.
+    
+        SELECT * from pg_ts_cfg;
+             ts_name     | prs_name |    locale
+        -----------------+----------+--------------
+         default         | default  | C
+         default_russian | default  | ru_RU.KOI8-R
+         simple          | default  |
+        (3 rows)
+
+
+    Each row in the pg_ts_cfg table contains the name of the
+    tsearch2 configuration, the name of the parser to use, and the
+    locale mapped to the configuration. There is only one parser to
+    choose from the table pg_ts_parser called 'default'. More
+    parsers could be written, but for our needs we will use the
+    default.
+
+    There are 3 configurations installed by tsearch2 initially.
+    If your locale is set to 'en_US' for example (like my laptop),
+    then as you can see there is currently no dictionary configured
+    to use with that locale. You can either set up a new
+    configuration or just use one that already exists. If I do not
+    specify which configuration to use in the to_tsvector function,
+    I receive the following error.
+    
+        SELECT to_tsvector('learning tsearch is like going to school');
+        ERROR:  Can't find tsearch config by locale
+
+
+    We will create a new configuration for use with the server
+    encoding 'en_US'. The first step is to add a new configuration
+    into the pg_ts_cfg table. We will call the configuration
+    'default_english', with the default parser and use the locale
+    'en_US'.
+    
+        INSERT INTO pg_ts_cfg (ts_name, prs_name, locale)
+               VALUES ('default_english', 'default', 'en_US');
+
+
+    We have only declared that there is a configuration called
+    'default_english'. We need to set the configuration of how
+    'default_english' will work. The next step is creating a new
+    dictionary to use. The configuration of the dictionary is
+    completlely different in tsearch2. In the prior versions to
+    make changes, you would have to re-compile your changes into
+    the tsearch.so. All of the configuration has now been moved
+    into the system tables created by executing the SQL code from
+    tsearch2.sql
+
+    Lets take a first look at the pg_ts_dict table
+    
+        ftstest=# \d pg_ts_dict
+                Table "public.pg_ts_dict"
+         Column      |  Type   | Modifiers
+        -----------------+---------+-----------
+         dict_name       | text    | not null
+         dict_init       | oid     |
+         dict_initoption | text    |
+         dict_lemmatize  | oid     | not null
+         dict_comment    | text    |
+        Indexes: pg_ts_dict_idx unique btree (dict_name)
+
+
+    The dict_name column is the name of the dictionary, for
+    example 'simple', 'en_stem' or 'ru_stem'. The dict_init column
+    is an OID of a stored procedure to run for initialization of
+    that dictionary, for example 'snb_en_init' or 'snb_ru_init'.
+    The dict_init option is used for options passed to the init
+    function for the stored procedure. In the cases of 'en_stem' or
+    'ru_stem' it is a path to a stopword file for that dictionary,
+    for example '/usr/local/pgsql/share/english.stop'. This is
+    however dictated by the dictionary. ISpell dictionaries may
+    require different options. The dict_lemmatize column is another
+    OID of a stored procedure to the function used to lemmitize,
+    for example 'snb_lemmatize'. The dict_comment column is just a
+    comment.
+
+    Next we will configure the use of a new dictionary based on
+    ISpell. We will assume you have ISpell installed on you
+    machine. (in /usr/local/lib)
+
+    First lets register the dictionary(ies) to use from ISpell.
+    We will use the english dictionary from ISpell. We insert the
+    paths to the relevant ISpell dictionary (*.hash) and affixes
+    (*.aff) files. There seems to be some question as to which
+    ISpell files are to be used. I installed ISpell from the latest
+    sources on my computer. The installation installed the
+    dictionary files with an extension of *.hash. Some
+    installations install with an extension of *.dict As far as I
+    know the two extensions are equivilant. So *.hash ==
+    *.dict.
+
+    We will also continue to use the english word stop file that
+    was installed for the en_stem dictionary. You could use a
+    different one if you like. The ISpell configuration is based on
+    the "ispell_template" dictionary installed by default with
+    tsearch2. We will use the OIDs to the stored procedures from
+    the row where the dict_name = 'ispell_template'.
+    
+        INSERT INTO pg_ts_dict
+               (SELECT 'en_ispell',
+                       dict_init,
+                       'DictFile="/usr/local/lib/english.hash",'
+                       'AffFile="/usr/local/lib/english.aff",'
+                       'StopFile="/usr/local/pgsql/share/english.stop"',
+                       dict_lexize
+                FROM pg_ts_dict
+                WHERE dict_name = 'ispell_template');
+
+
+    Next we need to set up the configuration for mapping the
+    dictionay use to the lexxem parsings. This will be done by
+    altering the pg_ts_cfgmap table. We will insert several rows,
+    specifying to using the new dictionary we installed and
+    configured for use within tsearch2. There are several type of
+    lexims we would be concerned with forcing the use of the ISpell
+    dictionary.
+    
+        INSERT INTO pg_ts_cfgmap (ts_name, tok_alias, dict_name)
+               VALUES ('default_english', 'lhword', '{en_ispell,en_stem}');
+        INSERT INTO pg_ts_cfgmap (ts_name, tok_alias, dict_name)
+               VALUES ('default_english', 'lpart_hword', '{en_ispell,en_stem}');
+        INSERT INTO pg_ts_cfgmap (ts_name, tok_alias, dict_name)
+               VALUES ('default_english', 'lword', '{en_ispell,en_stem}');
+
+
+    We have just inserted 3 records to the configuration
+    mapping, specifying that the lexem types for "lhword,
+    lpart_hword and lword" are to be stemmed using the 'en_ispell'
+    dictionary we added into pg_ts_dict, when using the
+    configuration ' default_english' which we added to
+    pg_ts_cfg.
+
+    There are several other lexem types used that we do not need
+    to specify as using the ISpell dictionary. We can simply insert
+    values using the 'simple' stemming process dictionary.
+    
+        INSERT INTO pg_ts_cfgmap
+               VALUES ('default_english', 'url', '{simple}');
+        INSERT INTO pg_ts_cfgmap
+               VALUES ('default_english', 'host', '{simple}');
+        INSERT INTO pg_ts_cfgmap
+               VALUES ('default_english', 'sfloat', '{simple}');
+        INSERT INTO pg_ts_cfgmap
+               VALUES ('default_english', 'uri', '{simple}');
+        INSERT INTO pg_ts_cfgmap
+               VALUES ('default_english', 'int', '{simple}');
+        INSERT INTO pg_ts_cfgmap
+               VALUES ('default_english', 'float', '{simple}');
+        INSERT INTO pg_ts_cfgmap
+               VALUES ('default_english', 'email', '{simple}');
+        INSERT INTO pg_ts_cfgmap
+               VALUES ('default_english', 'word', '{simple}');
+        INSERT INTO pg_ts_cfgmap
+               VALUES ('default_english', 'hword', '{simple}');
+        INSERT INTO pg_ts_cfgmap
+               VALUES ('default_english', 'nlword', '{simple}');
+        INSERT INTO pg_ts_cfgmap
+               VALUES ('default_english', 'nlpart_hword', '{simple}');
+        INSERT INTO pg_ts_cfgmap
+               VALUES ('default_english', 'part_hword', '{simple}');
+        INSERT INTO pg_ts_cfgmap
+               VALUES ('default_english', 'nlhword', '{simple}');
+        INSERT INTO pg_ts_cfgmap
+               VALUES ('default_english', 'file', '{simple}');
+        INSERT INTO pg_ts_cfgmap
+               VALUES ('default_english', 'uint', '{simple}');
+        INSERT INTO pg_ts_cfgmap
+               VALUES ('default_english', 'version', '{simple}');
+
+
+    Our addition of a configuration for 'default_english' is now
+    complete. We have successfully created a new tsearch2
+    configuration. At the same time we have also set the new
+    configuration to be our default for en_US locale.
+    
+        SELECT to_tsvector('default_english',
+                           'learning tsearch is like going to school');
+                           to_tsvector
+        --------------------------------------------------
+         'go':5 'like':4 'learn':1 'school':7 'tsearch':2
+        SELECT to_tsvector('learning tsearch is like going to school');
+                            to_tsvector
+        --------------------------------------------------
+         'go':5 'like':4 'learn':1 'school':7 'tsearch':2
+        (1 row)
+
+
+    In the case that you already have a configuration set for
+    the locale, and you are changing it to your new dictionary
+    configuration. You will have to set the old locale to NULL. If
+    we are using the 'C' locale then we would do this:
+    
+        UPDATE pg_ts_cfg SET locale=NULL WHERE locale = 'C';
+
+
+    That about wraps up the configuration of tsearch2. There is
+    much more you can do with the tables provided. This was just an
+    introduction to get things working rather quickly.
+
+    ADDING NEW DICTIONARIES TO TSEARCH2
+
+    To aid in the addition of new dictionaries to the tsearch2
+    module you can use another additional module in combination
+    with tsearch2. The gendict module is included into tsearch2
+    distribution and is available from gendict/ subdirectory.
+
+    I will not go into detail about installation and
+    instructions on how to use gendict to it's fullest extent right
+    now. You can read the README.gendict ... it has all of the
+    instructions and information you will need.
+
+    BACKING UP AND RESTORING DATABASES THAT FEATURE
+    TSEARCH2
+
+    Believe it or not, this isn't as straight forward as it
+    should be, and you will have problems trying to backup and
+    restore any database which uses tsearch2 unless you take the
+    steps shown below. And before you ask using pg_dumpall will
+    result in failure every time. These took a lot of trial and
+    error to get working, but the process as laid down below has
+    been used a dozen times now in live production environments so
+    it should work fine.
+
+    HOWEVER never rely on anyone elses instructions to backup
+    and restore a database system, always develop and understand
+    your own methodology, and test it numerous times before you
+    need to do it for real.
+
+    To Backup a PostgreSQL database that uses the tsearch2
+    module:
+
+    1) Backup any global database objects such as users and
+    groups (this step is usually only necessary when you will be
+    restoring to a virgin system)
+    
+        pg_dumpall -g > GLOBALobjects.sql
+
+
+    2) Backup the full database schema using pg_dump
+    
+        pg_dump -s DATABASE > DATABASEschema.sql
+
+
+    3) Backup the full database using pg_dump
+    
+        pg_dump -Fc DATABASE > DATABASEdata.tar
+
+
+    To Restore a PostgreSQL database that uses the tsearch2
+    module:
+
+    1) Create the blank database
+    
+        createdb DATABASE
+
+
+    2) Restore any global database objects such as users and
+    groups (this step is usually only necessary when you will be
+    restoring to a virgin system)
+    
+        psql DATABASE < GLOBALobjects.sql
+
+
+    3) Create the tsearch2 objects, functions and operators
+    
+        psql DATABASE < tsearch2.sql
+
+
+    4) Edit the backed up database schema and delete all SQL
+    commands which create tsearch2 related functions, operators and
+    data types, BUT NOT fields in table definitions that specify
+    tsvector types. If your not sure what these are, they are the
+    ones listed in tsearch2.sql. Then restore the edited schema to
+    the database
+    
+        psql DATABASE < DATABASEschema.sql
+
+
+    5) Restore the data for the database
+    
+        pg_restore -N -a -d DATABASE DATABASEdata.tar
+
+
+    If you get any errors in step 4, it will most likely be
+    because you forgot to remove an object that was created in
+    tsearch2.sql. Any errors in step 5 will mean the database
+    schema was probably restored wrongly.
+  
+
+
+    "http://www.postgresql.org">http://www.postgresql.org or
+    
+    "http://openfts.sourceforge.net/">http://openfts.sourceforge.net/
+    since its never failed for me.
+
+    The directory in the contib/ and the directory from the
+    archive is called tsearch2. Tsearch2 is completely incompatible
+    with the previous version of tsearch. This means that both
+    versions can be installed into a single database, and migration
+    the new version may be much easier.
+
+    NOTE: the previous version of tsearch found in the
+    contrib/tsearch directory is depricated. ALthough it is still
+    available and included within PostgreSQL version 7.4. It will
+    be removed in version 7.5.
+
+    ADDING TSEARCH2 FUNCTIONALITY TO A DATABASE
+
+    We should create a database to use as an example for the
+    remainder of this file. We can call the database "ftstest". You
+    can create it from the command line like this:
+    
+        #createdb ftstest
+
+
+    If you thought installation was easy, this next bit is even
+    easier. Change to the PGSQL_SRC/contrib/tsearch2 directory and
+    type:
+    
+        psql ftstest < tsearch2.sql
+
+
+    The file "tsearch2.sql" holds all the wonderful little
+    goodies you need to do full text indexing. It defines numerous
+    functions and operators, and creates the needed tables in the
+    database. There will be 4 new tables created after running the
+    tsearch2.sql file : pg_ts_dict, pg_ts_parser, pg_ts_cfg,
+    pg_ts_cfgmap are added.
+
+    You can check out the tables if you like:
+    
+        #psql ftstest
+        ftstest=# \d
+                    List of relations
+         Schema |     Name     | Type  |  Owner
+        --------+--------------+-------+----------
+         public | pg_ts_cfg    | table | kopciuch
+         public | pg_ts_cfgmap | table | kopciuch
+         public | pg_ts_dict   | table | kopciuch
+         public | pg_ts_parser | table | kopciuch
+        (4 rows)
+
+
+    TYPES AND FUNCTIONS PROVIDED BY TSEARCH2
+
+    The first thing we can do is try out some of the types that
+    are provided for us. Lets look at the tsvector type provided
+    for us:
+    
+        SELECT 'Our first string used today'::tsvector;
+                        tsvector
+        ---------------------------------------
+         'Our' 'used' 'first' 'today' 'string'
+        (1 row)
+
+
+    The results are the words used within our string. Notice
+    they are not in any particular order. The tsvector type returns
+    a string of space separated words.
+    
+        SELECT 'Our first string used today first string'::tsvector;
+                            tsvector
+        -----------------------------------------------
+         'Our' 'used' 'again' 'first' 'today' 'string'
+        (1 row)
+
+
+    Notice the results string has each unique word ('first' and
+    'string' only appear once in the tsvector value). Which of
+    course makes sense if you are searching the full text ... you
+    only need to know each unique word in the text.
+
+    Those examples were just casting a text field to that of
+    type tsvector. Lets check out one of the new functions created
+    by the tsearch2 module.
+
+    The function to_tsvector has 3 possible signatures:
+    
+        to_tsvector(oid, text);
+        to_tsvector(text, text);
+        to_tsvector(text);
+
+
+    We will use the second method using two text fields. The
+    overloaded methods provide us with a way to specifiy the way
+    the searchable text is broken up into words (Stemming process).
+    Right now we will specify the 'default' configuration. See the
+    section on TSEARCH2 CONFIGURATION to learn more about this.
+    
+        SELECT to_tsvector('default',
+                           'Our first string used today first string');
+                        to_tsvector
+        --------------------------------------------
+         'use':4 'first':2,6 'today':5 'string':3,7
+        (1 row)
+
+
+    The result returned from this function is of type tsvector.
+    The results came about by this reasoning: All of the words in
+    the text passed in are stemmed, or not used because they are
+    stop words defined in our configuration. Each lower case
+    morphed word is returned with all of the positons in the
+    text.
+
+    In this case the word "Our" is a stop word in the default
+    configuration. That means it will not be included in the
+    result. The word "first" is found at positions 2 and 6
+    (although "Our" is a stop word, it's position is maintained).
+    The word(s) positioning is maintained exactly as in the
+    original string. The word "used" is morphed to the word "use"
+    based on the default configuration for word stemming, and is
+    found at position 4. The rest of the results follow the same
+    logic. Just a reminder again ... the order of the 'word'
+    position in the output is not in any kind of order. (ie 'use':4
+    appears first)
+
+    If you want to view the output of the tsvector fields
+    without their positions, you can do so with the function
+    "strip(tsvector)".
+    
+        SELECT strip(to_tsvector('default',
+                     'Our first string used today first string'));
+                    strip
+        --------------------------------
+         'use' 'first' 'today' 'string'
+
+
+    If you wish to know the number of unique words returned in
+    the tsvector you can do so by using the function
+    "length(tsvector)"
+    
+        SELECT length(to_tsvector('default',
+                      'Our first string used today first string'));
+         length
+        --------
+              4
+        (1 row)
+
+
+    Lets take a look at the function to_tsquery. It also has 3
+    signatures which follow the same rational as the to_tsvector
+    function:
+    
+        to_tsquery(oid, text);
+        to_tsquery(text, text);
+        to_tsquery(text);
+
+
+    Lets try using the function with a single word :
+    
+        SELECT to_tsquery('default', 'word');
+         to_tsquery
+        -----------
+         'word'
+         (1 row)
+
+
+    I call the function the same way I would a to_tsvector
+    function, specifying the 'default' configuration for morphing,
+    and the result is the stemmed output 'word'.
+
+    Lets attempt to use the function with a string of multiple
+    words:
+    
+        SELECT to_tsquery('default', 'this is many words');
+        ERROR:  Syntax error
+
+
+    The function can not accept a space separated string. The
+    intention of the to_tsquery function is to return a type of
+    "tsquery" used for searching a tsvector field. What we need to
+    do is search for one to many words with some kind of logic (for
+    now simple boolean).
+    
+        SELECT to_tsquery('default', 'searching|sentence');
+              to_tsquery
+        ----------------------
+         'search' | 'sentenc'
+        (1 row)
+
+
+    Notice that the words are separated by the boolean logic
+    "OR", the text could contain boolean operators &,|,!,()
+    with their usual meaning.
+
+    You can not use words defined as being a stop word in your
+    configuration. The function will not fail ... you will just get
+    no result, and a NOTICE like this:
+    
+        SELECT to_tsquery('default', 'a|is&not|!the');
+        NOTICE:  Query contains only stopword(s)
+                 or doesn't contain lexem(s), ignored
+         to_tsquery
+        -----------
+        (1 row)
+
+
+    That is a beginning to using the types, and functions
+    defined in the tsearch2 module. There are numerous more
+    functions that I have not touched on. You can read through the
+    tsearch2.sql file built when compiling to get more familiar
+    with what is included.
+
+    INDEXING FIELDS IN A TABLE
+
+    The next stage is to add a full text index to an existing
+    table. In this example we already have a table defined as
+    follows:
+    
+        CREATE TABLE tblMessages
+        (
+                intIndex        int4,
+                strTopic        varchar(100),
+                strMessage      text
+        );
+
+
+    We are assuming there are several rows with some kind of
+    data in them. Any data will do, just do several inserts with
+    test strings for a topic, and a message. here is some test data
+    I inserted. (yes I know it's completely useless stuff ;-) but
+    it will serve our purpose right now).
+    
+        INSERT INTO tblMessages
+               VALUES ('1', 'Testing Topic', 'Testing message data input');
+        INSERT INTO tblMessages
+               VALUES ('2', 'Movie', 'Breakfast at Tiffany\'s');
+        INSERT INTO tblMessages
+               VALUES ('3', 'Famous Author', 'Stephen King');
+        INSERT INTO tblMessages
+               VALUES ('4', 'Political Topic',
+                            'Nelson Mandella is released from prison');
+        INSERT INTO tblMessages
+               VALUES ('5', 'Nursery rhyme phrase',
+                            'Little jack horner sat in a corner');
+        INSERT INTO tblMessages
+               VALUES ('6', 'Gettysburg address quotation',
+                            'Four score and seven years ago'
+                            ' our fathers brought forth on this'
+                            ' continent a new nation, conceived in'
+                            ' liberty and dedicated to the proposition'
+                            ' that all men are created equal');
+        INSERT INTO tblMessages
+               VALUES ('7', 'Classic Rock Bands',
+                            'Led Zeppelin Grateful Dead and The Sex Pistols');
+        INSERT INTO tblMessages
+               VALUES ('8', 'My birth address',
+                            '18 Sommervile road, Regina, Saskatchewan');
+        INSERT INTO tblMessages
+               VALUES ('9', 'Joke', 'knock knock : who\'s there?'
+                                    ' I will not finish this joke');
+        INSERT INTO tblMessages
+               VALUES ('10', 'Computer information',
+                             'My computer is a pentium III 400 mHz'
+                             ' with 192 megabytes of RAM');
+
+
+    The next stage is to create a special text index which we
+    will use for FTI, so we can search our table of messages for
+    words or a phrase. We do this using the SQL command:
+    
+        ALTER TABLE tblMessages ADD idxFTI tsvector;
+
+
+    Note that unlike traditional indexes, this is actually a new
+    field in the same table, which is then used (through the magic
+    of the tsearch2 operators and functions) by a special index we
+    will create in a moment.
+
+    The general rule for the initial insertion of data will
+    follow four steps:
+    
+    1. update table
+    2. vacuum full analyze
+    3. create index
+    4. vacuum full analyze
+
+
+    The data can be updated into the table, the vacuum full
+    analyze will reclaim unused space. The index can be created on
+    the table after the data has been inserted. Having the index
+    created prior to the update will slow down the process. It can
+    be done in that manner, this way is just more efficient. After
+    the index has been created on the table, vacuum full analyze is
+    run again to update postgres's statistics (ie having the index
+    take effect).
+    
+        UPDATE tblMessages SET idxFTI=to_tsvector('default', strMessage);
+        VACUUM FULL ANALYZE;
+
+
+    Note that this only inserts the field strMessage as a
+    tsvector, so if you want to also add strTopic to the
+    information stored, you should instead do the following, which
+    effectively concatenates the two fields into one before being
+    inserted into the table:
+    
+        UPDATE tblMessages
+            SET idxFTI=to_tsvector('default',coalesce(strTopic,'') ||' '|| coalesce(strMessage,''));
+        VACUUM FULL ANALYZE;
+
+
+    Using the coalesce function makes sure this

+    concatenation also works with NULL fields.
+
+    We need to create the index on the column idxFTI. Keep in
+    mind that the database will update the index when some action
+    is taken. In this case we _need_ the index (The whole point of
+    Full Text INDEXINGi ;-)), so don't worry about any indexing
+    overhead. We will create an index based on the gist function.
+    GiST is an index structure for Generalized Search Tree.
+    
+        CREATE INDEX idxFTI_idx ON tblMessages USING gist(idxFTI);
+        VACUUM FULL ANALYZE;
+
+
+    After you have converted all of your data and indexed the
+    column, you can select some rows to see what actually happened.
+    I will not display output here but you can play around
+    yourselves and see what happened.
+
+    The last thing to do is set up a trigger so every time a row
+    in this table is changed, the text index is automatically
+    updated. This is easily done using:
+    
+        CREATE TRIGGER tsvectorupdate BEFORE UPDATE OR INSERT ON tblMessages
+            FOR EACH ROW EXECUTE PROCEDURE tsearch2(idxFTI, strMessage);
+
+
+    Or if you are indexing both strMessage and strTopic you
+    should instead do:
+    
+        CREATE TRIGGER tsvectorupdate BEFORE UPDATE OR INSERT ON tblMessages
+            FOR EACH ROW EXECUTE PROCEDURE
+                tsearch2(idxFTI, strTopic, strMessage);
+
+
+    Before you ask, the tsearch2 function accepts multiple
+    fields as arguments so there is no need to concatenate the two
+    into one like we did before.
+
+    If you want to do something specific with columns, you may
+    write your very own trigger function using plpgsql or other
+    procedural languages (but not SQL, unfortunately) and use it
+    instead of tsearch2 trigger.
+
+    You could however call other stored procedures from within
+    the tsearch2 function. Lets say we want to create a function to
+    remove certain characters (like the @ symbol from all
+    text).
+    
+       CREATE FUNCTION dropatsymbol(text) 
+                     RETURNS text AS 'select replace($1, \'@\', \' \');' LANGUAGE SQL;
+
+
+    Now we can use this function within the tsearch2 function on
+    the trigger.
+    
+      DROP TRIGGER tsvectorupdate ON tblmessages;
+        CREATE TRIGGER tsvectorupdate BEFORE UPDATE OR INSERT ON tblMessages
+            FOR EACH ROW EXECUTE PROCEDURE tsearch2(idxFTI, dropatsymbol, strMessage);
+        INSERT INTO tblmessages VALUES (69, 'Attempt for dropatsymbol', '[email protected]');
+
+
+    If at this point you receive an error stating: ERROR: Can't
+    find tsearch config by locale
+
+    Do not worry. You have done nothing wrong. And tsearch2 is
+    not broken. All that has happened here is that the
+    configuration is setup to use a configuration based on the
+    locale of the server. All you have to do is change your default
+    configuration, or add a new one for your specific locale. See
+    the section on TSEARCH2 CONFIGURATION.
+    
+   SELECT * FROM tblmessages WHERE intindex = 69;
+
+         intindex |         strtopic         |  strmessage   |        idxfti
+        ----------+--------------------------+---------------+-----------------------   
+                69 | Attempt for dropatsymbol | [email protected] | 'test':1 'test.com':2
+        (1 row)
+Notice that the string content was passed throught the stored
+procedure dropatsymbol. The '@' character was replaced with a
+single space ... and the output from the procedure was then stored
+in the tsvector column.
+
+    This could be useful for removing other characters from
+    indexed text, or any kind of preprocessing needed to be done on
+    the text prior to insertion into the index.
+
+    QUERYING A TABLE
+
+    There are some examples in the README.tsearch2 file for
+    querying a table. One major difference between tsearch and
+    tsearch2 is the operator ## is no longer available. Only the
+    operator @@ is defined, using the types tsvector on one side
+    and tsquery on the other side.
+
+    Lets search the indexed data for the word "Test". I indexed
+    based on the the concatenation of the strTopic, and the
+    strMessage:
+    
+        SELECT intindex, strtopic FROM tblmessages
+                                  WHERE idxfti @@ 'test'::tsquery;
+         intindex |   strtopic
+        ----------+---------------
+                1 | Testing Topic
+        (1 row)
+
+
+    The only result that matched was the row with a topic
+    "Testing Topic". Notice that the word I search for was all
+    lowercase. Let's see what happens when I query for uppercase
+    "Test".
+    
+        SELECT intindex, strtopic FROM tblmessages
+                                  WHERE idxfti @@ 'Test'::tsquery;
+         intindex | strtopic
+        ----------+----------
+        (0 rows)
+
+
+    We get zero rows returned. The reason is because when the
+    text was inserted, it was morphed to my default configuration
+    (because of the call to to_tsvector in the UPDATE statement).
+    If there was no morphing done, and the tsvector field(s)
+    contained the word 'Text', a match would have been found.
+
+    Most likely the best way to query the field is to use the
+    to_tsquery function on the right hand side of the @@ operator
+    like this:
+    
+        SELECT intindex, strtopic FROM tblmessages
+               WHERE idxfti @@ to_tsquery('default', 'Test | Zeppelin');
+         intindex |      strtopic
+        ----------+--------------------
+                1 | Testing Topic
+                7 | Classic Rock Bands
+        (2 rows)
+
+
+    That query searched for all instances of "Test" OR
+    "Zeppelin". It returned two rows: the "Testing Topic" row, and
+    the "Classic Rock Bands" row. The to_tsquery function performed
+    the correct morphology upon the parameters, and searched the
+    tsvector field appropriately.
+
+    The last example here relates to searching for a phrase, for
+    example "minority report". This poses a problem with regard to
+    tsearch2, as it doesn't index phrases, only words. But there is
+    a way around which doesn't appear to have a significant impact
+    on query time, and that is to use a query such as the
+    following:
+    
+        SELECT intindex, strTopic FROM tblmessages
+                WHERE idxfti @@ to_tsquery('default', 'gettysburg & address')
+                AND strMessage ~* '.*men are created equal.*';
+         intindex |           strtopic
+        ----------+------------------------------
+                6 | Gettysburg address quotation
+        (1 row)
+        SELECT intindex, strTopic FROM tblmessages
+                WHERE idxfti @@ to_tsquery('default', 'gettysburg & address')
+                AND strMessage ~* '.*something that does not exist.*';
+         intindex | strtopic
+        ----------+----------
+        (0 rows)
+
+
+    Of course if your indexing both strTopic and strMessage, and
+    want to search for this phrase on both, then you will have to
+    get out the brackets and extend this query a little more.
+
+    TSEARCH2 CONFIGURATION
+
+    Some words such as "and", "the", and "who" are automatically
+    not indexed, since they belong to a pre-existing dictionary of
+    "Stop Words" which tsearch2 does not perform indexing on. If
+    someone needs to search for "The Who" in your database, they
+    are going to have a tough time coming up with any results,
+    since both are ignored in the indexes. But there is a
+    solution.
+
+    Lets say we want to add a word into the stop word list for
+    english stemming. We could edit the file
+    :'/usr/local/pgsql/share/english.stop' and add a word to the
+    list. I edited mine to exclude my name from indexing:
+    
+    - Edit /usr/local/pgsql/share/english.stop
+    - Add 'andy' to the list
+    - Save the file.
+
+
+    When you connect to the database, the dict_init procedure is
+    run during initialization. And in my configuration it will read
+    the stop words from the file I just edited. If you were
+    connected to the DB while editing the stop words, you will need
+    to end the current session and re-connect. When you re-connect
+    to the database, 'andy' is no longer indexed:
+    
+        SELECT to_tsvector('default', 'Andy');
+         to_tsvector
+        ------------
+        (1 row)
+
+
+    Originally I would get the result :
+    
+        SELECT to_tsvector('default', 'Andy');
+         to_tsvector
+        ------------
+         'andi':1
+        (1 row)
+
+
+    But since I added it as a stop word, it would be ingnored on
+    the indexing. The stop word added was used in the dictionary
+    "en_stem". If I were to use a different configuration such as
+    'simple', the results would be different. There are no stop
+    words for the simple dictionary. It will just convert to lower
+    case, and index every unique word.
+    
+        SELECT to_tsvector('simple', 'Andy andy The the in out');
+                     to_tsvector
+        -------------------------------------
+         'in':5 'out':6 'the':3,4 'andy':1,2
+        (1 row)
+
+
+    All this talk about which configuration to use is leading us
+    into the actual configuration of tsearch2. In the examples in
+    this document the configuration has always been specified when
+    using the tsearch2 functions:
+    
+        SELECT to_tsvector('default', 'Testing the default config');
+        SELECT to_tsvector('simple', 'Example of simple Config');
+
+
+    The pg_ts_cfg table holds each configuration you can use
+    with the tsearch2 functions. As you can see the ts_name column
+    contains both the 'default' configurations based on the 'C'
+    locale. And the 'simple' configuration which is not based on
+    any locale.
+    
+        SELECT * from pg_ts_cfg;
+             ts_name     | prs_name |    locale
+        -----------------+----------+--------------
+         default         | default  | C
+         default_russian | default  | ru_RU.KOI8-R
+         simple          | default  |
+        (3 rows)
+
+
+    Each row in the pg_ts_cfg table contains the name of the
+    tsearch2 configuration, the name of the parser to use, and the
+    locale mapped to the configuration. There is only one parser to
+    choose from the table pg_ts_parser called 'default'. More
+    parsers could be written, but for our needs we will use the
+    default.
+
+    There are 3 configurations installed by tsearch2 initially.
+    If your locale is set to 'en_US' for example (like my laptop),
+    then as you can see there is currently no dictionary configured
+    to use with that locale. You can either set up a new
+    configuration or just use one that already exists. If I do not
+    specify which configuration to use in the to_tsvector function,
+    I receive the following error.
+    
+        SELECT to_tsvector('learning tsearch is like going to school');
+        ERROR:  Can't find tsearch config by locale
+
+
+    We will create a new configuration for use with the server
+    encoding 'en_US'. The first step is to add a new configuration
+    into the pg_ts_cfg table. We will call the configuration
+    'default_english', with the default parser and use the locale
+    'en_US'.
+    
+        INSERT INTO pg_ts_cfg (ts_name, prs_name, locale)
+               VALUES ('default_english', 'default', 'en_US');
+
+
+    We have only declared that there is a configuration called
+    'default_english'. We need to set the configuration of how
+    'default_english' will work. The next step is creating a new
+    dictionary to use. The configuration of the dictionary is
+    completlely different in tsearch2. In the prior versions to
+    make changes, you would have to re-compile your changes into
+    the tsearch.so. All of the configuration has now been moved
+    into the system tables created by executing the SQL code from
+    tsearch2.sql
+
+    Lets take a first look at the pg_ts_dict table
+    
+        ftstest=# \d pg_ts_dict
+                Table "public.pg_ts_dict"
+         Column      |  Type   | Modifiers
+        -----------------+---------+-----------
+         dict_name       | text    | not null
+         dict_init       | oid     |
+         dict_initoption | text    |
+         dict_lemmatize  | oid     | not null
+         dict_comment    | text    |
+        Indexes: pg_ts_dict_idx unique btree (dict_name)
+
+
+    The dict_name column is the name of the dictionary, for
+    example 'simple', 'en_stem' or 'ru_stem'. The dict_init column
+    is an OID of a stored procedure to run for initialization of
+    that dictionary, for example 'snb_en_init' or 'snb_ru_init'.
+    The dict_init option is used for options passed to the init
+    function for the stored procedure. In the cases of 'en_stem' or
+    'ru_stem' it is a path to a stopword file for that dictionary,
+    for example '/usr/local/pgsql/share/english.stop'. This is
+    however dictated by the dictionary. ISpell dictionaries may
+    require different options. The dict_lemmatize column is another
+    OID of a stored procedure to the function used to lemmitize,
+    for example 'snb_lemmatize'. The dict_comment column is just a
+    comment.
+
+    Next we will configure the use of a new dictionary based on
+    ISpell. We will assume you have ISpell installed on you
+    machine. (in /usr/local/lib)
+
+    First lets register the dictionary(ies) to use from ISpell.
+    We will use the english dictionary from ISpell. We insert the
+    paths to the relevant ISpell dictionary (*.hash) and affixes
+    (*.aff) files. There seems to be some question as to which
+    ISpell files are to be used. I installed ISpell from the latest
+    sources on my computer. The installation installed the
+    dictionary files with an extension of *.hash. Some
+    installations install with an extension of *.dict As far as I
+    know the two extensions are equivilant. So *.hash ==
+    *.dict.
+
+    We will also continue to use the english word stop file that
+    was installed for the en_stem dictionary. You could use a
+    different one if you like. The ISpell configuration is based on
+    the "ispell_template" dictionary installed by default with
+    tsearch2. We will use the OIDs to the stored procedures from
+    the row where the dict_name = 'ispell_template'.
+    
+        INSERT INTO pg_ts_dict
+               (SELECT 'en_ispell',
+                       dict_init,
+                       'DictFile="/usr/local/lib/english.hash",'
+                       'AffFile="/usr/local/lib/english.aff",'
+                       'StopFile="/usr/local/pgsql/share/english.stop"',
+                       dict_lexize
+                FROM pg_ts_dict
+                WHERE dict_name = 'ispell_template');
+
+
+    Next we need to set up the configuration for mapping the
+    dictionay use to the lexxem parsings. This will be done by
+    altering the pg_ts_cfgmap table. We will insert several rows,
+    specifying to using the new dictionary we installed and
+    configured for use within tsearch2. There are several type of
+    lexims we would be concerned with forcing the use of the ISpell
+    dictionary.
+    
+        INSERT INTO pg_ts_cfgmap (ts_name, tok_alias, dict_name)
+               VALUES ('default_english', 'lhword', '{en_ispell,en_stem}');
+        INSERT INTO pg_ts_cfgmap (ts_name, tok_alias, dict_name)
+               VALUES ('default_english', 'lpart_hword', '{en_ispell,en_stem}');
+        INSERT INTO pg_ts_cfgmap (ts_name, tok_alias, dict_name)
+               VALUES ('default_english', 'lword', '{en_ispell,en_stem}');
+
+
+    We have just inserted 3 records to the configuration
+    mapping, specifying that the lexem types for "lhword,
+    lpart_hword and lword" are to be stemmed using the 'en_ispell'
+    dictionary we added into pg_ts_dict, when using the
+    configuration ' default_english' which we added to
+    pg_ts_cfg.
+
+    There are several other lexem types used that we do not need
+    to specify as using the ISpell dictionary. We can simply insert
+    values using the 'simple' stemming process dictionary.
+    
+        INSERT INTO pg_ts_cfgmap
+               VALUES ('default_english', 'url', '{simple}');
+        INSERT INTO pg_ts_cfgmap
+               VALUES ('default_english', 'host', '{simple}');
+        INSERT INTO pg_ts_cfgmap
+               VALUES ('default_english', 'sfloat', '{simple}');
+        INSERT INTO pg_ts_cfgmap
+               VALUES ('default_english', 'uri', '{simple}');
+        INSERT INTO pg_ts_cfgmap
+               VALUES ('default_english', 'int', '{simple}');
+        INSERT INTO pg_ts_cfgmap
+               VALUES ('default_english', 'float', '{simple}');
+        INSERT INTO pg_ts_cfgmap
+               VALUES ('default_english', 'email', '{simple}');
+        INSERT INTO pg_ts_cfgmap
+               VALUES ('default_english', 'word', '{simple}');
+        INSERT INTO pg_ts_cfgmap
+               VALUES ('default_english', 'hword', '{simple}');
+        INSERT INTO pg_ts_cfgmap
+               VALUES ('default_english', 'nlword', '{simple}');
+        INSERT INTO pg_ts_cfgmap
+               VALUES ('default_english', 'nlpart_hword', '{simple}');
+        INSERT INTO pg_ts_cfgmap
+               VALUES ('default_english', 'part_hword', '{simple}');
+        INSERT INTO pg_ts_cfgmap
+               VALUES ('default_english', 'nlhword', '{simple}');
+        INSERT INTO pg_ts_cfgmap
+               VALUES ('default_english', 'file', '{simple}');
+        INSERT INTO pg_ts_cfgmap
+               VALUES ('default_english', 'uint', '{simple}');
+        INSERT INTO pg_ts_cfgmap
+               VALUES ('default_english', 'version', '{simple}');
+
+
+    Our addition of a configuration for 'default_english' is now
+    complete. We have successfully created a new tsearch2
+    configuration. At the same time we have also set the new
+    configuration to be our default for en_US locale.
+    
+        SELECT to_tsvector('default_english',
+                           'learning tsearch is like going to school');
+                           to_tsvector
+        --------------------------------------------------
+         'go':5 'like':4 'learn':1 'school':7 'tsearch':2
+        SELECT to_tsvector('learning tsearch is like going to school');
+                            to_tsvector
+        --------------------------------------------------
+         'go':5 'like':4 'learn':1 'school':7 'tsearch':2
+        (1 row)
+
+
+    In the case that you already have a configuration set for
+    the locale, and you are changing it to your new dictionary
+    configuration. You will have to set the old locale to NULL. If
+    we are using the 'C' locale then we would do this:
+    
+        UPDATE pg_ts_cfg SET locale=NULL WHERE locale = 'C';
+
+
+    That about wraps up the configuration of tsearch2. There is
+    much more you can do with the tables provided. This was just an
+    introduction to get things working rather quickly.
+
+    ADDING NEW DICTIONARIES TO TSEARCH2
+
+    To aid in the addition of new dictionaries to the tsearch2
+    module you can use another additional module in combination
+    with tsearch2. The gendict module is included into tsearch2
+    distribution and is available from gendict/ subdirectory.
+
+    I will not go into detail about installation and
+    instructions on how to use gendict to it's fullest extent right
+    now. You can read the README.gendict ... it has all of the
+    instructions and information you will need.
+
+    BACKING UP AND RESTORING DATABASES THAT FEATURE
+    TSEARCH2
+
+    Believe it or not, this isn't as straight forward as it
+    should be, and you will have problems trying to backup and
+    restore any database which uses tsearch2 unless you take the
+    steps shown below. And before you ask using pg_dumpall will
+    result in failure every time. These took a lot of trial and
+    error to get working, but the process as laid down below has
+    been used a dozen times now in live production environments so
+    it should work fine.
+
+    HOWEVER never rely on anyone elses instructions to backup
+    and restore a database system, always develop and understand
+    your own methodology, and test it numerous times before you
+    need to do it for real.
+
+    To Backup a PostgreSQL database that uses the tsearch2
+    module:
+
+    1) Backup any global database objects such as users and
+    groups (this step is usually only necessary when you will be
+    restoring to a virgin system)
+    
+        pg_dumpall -g > GLOBALobjects.sql
+
+
+    2) Backup the full database schema using pg_dump
+    
+        pg_dump -s DATABASE > DATABASEschema.sql
+
+
+    3) Backup the full database using pg_dump
+    
+        pg_dump -Fc DATABASE > DATABASEdata.tar
+
+
+    To Restore a PostgreSQL database that uses the tsearch2
+    module:
+
+    1) Create the blank database
+    
+        createdb DATABASE
+
+
+    2) Restore any global database objects such as users and
+    groups (this step is usually only necessary when you will be
+    restoring to a virgin system)
+    
+        psql DATABASE < GLOBALobjects.sql
+
+
+    3) Create the tsearch2 objects, functions and operators
+    
+        psql DATABASE < tsearch2.sql
+
+
+    4) Edit the backed up database schema and delete all SQL
+    commands which create tsearch2 related functions, operators and
+    data types, BUT NOT fields in table definitions that specify
+    tsvector types. If your not sure what these are, they are the
+    ones listed in tsearch2.sql. Then restore the edited schema to
+    the database
+    
+        psql DATABASE < DATABASEschema.sql
+
+
+    5) Restore the data for the database
+    
+        pg_restore -N -a -d DATABASE DATABASEdata.tar
+
+
+    If you get any errors in step 4, it will most likely be
+    because you forgot to remove an object that was created in
+    tsearch2.sql. Any errors in step 5 will mean the database
+    schema was probably restored wrongly.
+  
+    "http://openfts.sourceforge.net/">http://openfts.sourceforge.net/
+    since its never failed for me.
+
+    The directory in the contib/ and the directory from the
+    archive is called tsearch2. Tsearch2 is completely incompatible
+    with the previous version of tsearch. This means that both
+    versions can be installed into a single database, and migration
+    the new version may be much easier.
+
+    NOTE: the previous version of tsearch found in the
+    contrib/tsearch directory is depricated. ALthough it is still
+    available and included within PostgreSQL version 7.4. It will
+    be removed in version 7.5.
+
+    ADDING TSEARCH2 FUNCTIONALITY TO A DATABASE
+
+    We should create a database to use as an example for the
+    remainder of this file. We can call the database "ftstest". You
+    can create it from the command line like this:
+    
+        #createdb ftstest
+
+
+    If you thought installation was easy, this next bit is even
+    easier. Change to the PGSQL_SRC/contrib/tsearch2 directory and
+    type:
+    
+        psql ftstest < tsearch2.sql
+
+
+    The file "tsearch2.sql" holds all the wonderful little
+    goodies you need to do full text indexing. It defines numerous
+    functions and operators, and creates the needed tables in the
+    database. There will be 4 new tables created after running the
+    tsearch2.sql file : pg_ts_dict, pg_ts_parser, pg_ts_cfg,
+    pg_ts_cfgmap are added.
+
+    You can check out the tables if you like:
+    
+        #psql ftstest
+        ftstest=# \d
+                    List of relations
+         Schema |     Name     | Type  |  Owner
+        --------+--------------+-------+----------
+         public | pg_ts_cfg    | table | kopciuch
+         public | pg_ts_cfgmap | table | kopciuch
+         public | pg_ts_dict   | table | kopciuch
+         public | pg_ts_parser | table | kopciuch
+        (4 rows)
+
+
+    TYPES AND FUNCTIONS PROVIDED BY TSEARCH2
+
+    The first thing we can do is try out some of the types that
+    are provided for us. Lets look at the tsvector type provided
+    for us:
+    
+        SELECT 'Our first string used today'::tsvector;
+                        tsvector
+        ---------------------------------------
+         'Our' 'used' 'first' 'today' 'string'
+        (1 row)
+
+
+    The results are the words used within our string. Notice
+    they are not in any particular order. The tsvector type returns
+    a string of space separated words.
+    
+        SELECT 'Our first string used today first string'::tsvector;
+                            tsvector
+        -----------------------------------------------
+         'Our' 'used' 'again' 'first' 'today' 'string'
+        (1 row)
+
+
+    Notice the results string has each unique word ('first' and
+    'string' only appear once in the tsvector value). Which of
+    course makes sense if you are searching the full text ... you
+    only need to know each unique word in the text.
+
+    Those examples were just casting a text field to that of
+    type tsvector. Lets check out one of the new functions created
+    by the tsearch2 module.
+
+    The function to_tsvector has 3 possible signatures:
+    
+        to_tsvector(oid, text);
+        to_tsvector(text, text);
+        to_tsvector(text);
+
+
+    We will use the second method using two text fields. The
+    overloaded methods provide us with a way to specifiy the way
+    the searchable text is broken up into words (Stemming process).
+    Right now we will specify the 'default' configuration. See the
+    section on TSEARCH2 CONFIGURATION to learn more about this.
+    
+        SELECT to_tsvector('default',
+                           'Our first string used today first string');
+                        to_tsvector
+        --------------------------------------------
+         'use':4 'first':2,6 'today':5 'string':3,7
+        (1 row)
+
+
+    The result returned from this function is of type tsvector.
+    The results came about by this reasoning: All of the words in
+    the text passed in are stemmed, or not used because they are
+    stop words defined in our configuration. Each lower case
+    morphed word is returned with all of the positons in the
+    text.
+
+    In this case the word "Our" is a stop word in the default
+    configuration. That means it will not be included in the
+    result. The word "first" is found at positions 2 and 6
+    (although "Our" is a stop word, it's position is maintained).
+    The word(s) positioning is maintained exactly as in the
+    original string. The word "used" is morphed to the word "use"
+    based on the default configuration for word stemming, and is
+    found at position 4. The rest of the results follow the same
+    logic. Just a reminder again ... the order of the 'word'
+    position in the output is not in any kind of order. (ie 'use':4
+    appears first)
+
+    If you want to view the output of the tsvector fields
+    without their positions, you can do so with the function
+    "strip(tsvector)".
+    
+        SELECT strip(to_tsvector('default',
+                     'Our first string used today first string'));
+                    strip
+        --------------------------------
+         'use' 'first' 'today' 'string'
+
+
+    If you wish to know the number of unique words returned in
+    the tsvector you can do so by using the function
+    "length(tsvector)"
+    
+        SELECT length(to_tsvector('default',
+                      'Our first string used today first string'));
+         length
+        --------
+              4
+        (1 row)
+
+
+    Lets take a look at the function to_tsquery. It also has 3
+    signatures which follow the same rational as the to_tsvector
+    function:
+    
+        to_tsquery(oid, text);
+        to_tsquery(text, text);
+        to_tsquery(text);
+
+
+    Lets try using the function with a single word :
+    
+        SELECT to_tsquery('default', 'word');
+         to_tsquery
+        -----------
+         'word'
+         (1 row)
+
+
+    I call the function the same way I would a to_tsvector
+    function, specifying the 'default' configuration for morphing,
+    and the result is the stemmed output 'word'.
+
+    Lets attempt to use the function with a string of multiple
+    words:
+    
+        SELECT to_tsquery('default', 'this is many words');
+        ERROR:  Syntax error
+
+
+    The function can not accept a space separated string. The
+    intention of the to_tsquery function is to return a type of
+    "tsquery" used for searching a tsvector field. What we need to
+    do is search for one to many words with some kind of logic (for
+    now simple boolean).
+    
+        SELECT to_tsquery('default', 'searching|sentence');
+              to_tsquery
+        ----------------------
+         'search' | 'sentenc'
+        (1 row)
+
+
+    Notice that the words are separated by the boolean logic
+    "OR", the text could contain boolean operators &,|,!,()
+    with their usual meaning.
+
+    You can not use words defined as being a stop word in your
+    configuration. The function will not fail ... you will just get
+    no result, and a NOTICE like this:
+    
+        SELECT to_tsquery('default', 'a|is&not|!the');
+        NOTICE:  Query contains only stopword(s)
+                 or doesn't contain lexem(s), ignored
+         to_tsquery
+        -----------
+        (1 row)
+
+
+    That is a beginning to using the types, and functions
+    defined in the tsearch2 module. There are numerous more
+    functions that I have not touched on. You can read through the
+    tsearch2.sql file built when compiling to get more familiar
+    with what is included.
+
+    INDEXING FIELDS IN A TABLE
+
+    The next stage is to add a full text index to an existing
+    table. In this example we already have a table defined as
+    follows:
+    
+        CREATE TABLE tblMessages
+        (
+                intIndex        int4,
+                strTopic        varchar(100),
+                strMessage      text
+        );
+
+
+    We are assuming there are several rows with some kind of
+    data in them. Any data will do, just do several inserts with
+    test strings for a topic, and a message. here is some test data
+    I inserted. (yes I know it's completely useless stuff ;-) but
+    it will serve our purpose right now).
+    
+        INSERT INTO tblMessages
+               VALUES ('1', 'Testing Topic', 'Testing message data input');
+        INSERT INTO tblMessages
+               VALUES ('2', 'Movie', 'Breakfast at Tiffany\'s');
+        INSERT INTO tblMessages
+               VALUES ('3', 'Famous Author', 'Stephen King');
+        INSERT INTO tblMessages
+               VALUES ('4', 'Political Topic',
+                            'Nelson Mandella is released from prison');
+        INSERT INTO tblMessages
+               VALUES ('5', 'Nursery rhyme phrase',
+                            'Little jack horner sat in a corner');
+        INSERT INTO tblMessages
+               VALUES ('6', 'Gettysburg address quotation',
+                            'Four score and seven years ago'
+                            ' our fathers brought forth on this'
+                            ' continent a new nation, conceived in'
+                            ' liberty and dedicated to the proposition'
+                            ' that all men are created equal');
+        INSERT INTO tblMessages
+               VALUES ('7', 'Classic Rock Bands',
+                            'Led Zeppelin Grateful Dead and The Sex Pistols');
+        INSERT INTO tblMessages
+               VALUES ('8', 'My birth address',
+                            '18 Sommervile road, Regina, Saskatchewan');
+        INSERT INTO tblMessages
+               VALUES ('9', 'Joke', 'knock knock : who\'s there?'
+                                    ' I will not finish this joke');
+        INSERT INTO tblMessages
+               VALUES ('10', 'Computer information',
+                             'My computer is a pentium III 400 mHz'
+                             ' with 192 megabytes of RAM');
+
+
+    The next stage is to create a special text index which we
+    will use for FTI, so we can search our table of messages for
+    words or a phrase. We do this using the SQL command:
+    
+        ALTER TABLE tblMessages ADD idxFTI tsvector;
+
+
+    Note that unlike traditional indexes, this is actually a new
+    field in the same table, which is then used (through the magic
+    of the tsearch2 operators and functions) by a special index we
+    will create in a moment.
+
+    The general rule for the initial insertion of data will
+    follow four steps:
+    
+    1. update table
+    2. vacuum full analyze
+    3. create index
+    4. vacuum full analyze
+
+
+    The data can be updated into the table, the vacuum full
+    analyze will reclaim unused space. The index can be created on
+    the table after the data has been inserted. Having the index
+    created prior to the update will slow down the process. It can
+    be done in that manner, this way is just more efficient. After
+    the index has been created on the table, vacuum full analyze is
+    run again to update postgres's statistics (ie having the index
+    take effect).
+    
+        UPDATE tblMessages SET idxFTI=to_tsvector('default', strMessage);
+        VACUUM FULL ANALYZE;
+
+
+    Note that this only inserts the field strMessage as a
+    tsvector, so if you want to also add strTopic to the
+    information stored, you should instead do the following, which
+    effectively concatenates the two fields into one before being
+    inserted into the table:
+    
+        UPDATE tblMessages
+            SET idxFTI=to_tsvector('default',coalesce(strTopic,'') ||' '|| coalesce(strMessage,''));
+        VACUUM FULL ANALYZE;
+
+
+    Using the coalesce function makes sure this
+    concatenation also works with NULL fields.
+
+    We need to create the index on the column idxFTI. Keep in
+    mind that the database will update the index when some action
+    is taken. In this case we _need_ the index (The whole point of
+    Full Text INDEXINGi ;-)), so don't worry about any indexing
+    overhead. We will create an index based on the gist function.
+    GiST is an index structure for Generalized Search Tree.
+    
+        CREATE INDEX idxFTI_idx ON tblMessages USING gist(idxFTI);
+        VACUUM FULL ANALYZE;
+
+
+    After you have converted all of your data and indexed the
+    column, you can select some rows to see what actually happened.
+    I will not display output here but you can play around
+    yourselves and see what happened.
+
+    The last thing to do is set up a trigger so every time a row
+    in this table is changed, the text index is automatically
+    updated. This is easily done using:
+    
+        CREATE TRIGGER tsvectorupdate BEFORE UPDATE OR INSERT ON tblMessages
+            FOR EACH ROW EXECUTE PROCEDURE tsearch2(idxFTI, strMessage);
+
+
+    Or if you are indexing both strMessage and strTopic you
+    should instead do:
+    
+        CREATE TRIGGER tsvectorupdate BEFORE UPDATE OR INSERT ON tblMessages
+            FOR EACH ROW EXECUTE PROCEDURE
+                tsearch2(idxFTI, strTopic, strMessage);
+
+
+    Before you ask, the tsearch2 function accepts multiple
+    fields as arguments so there is no need to concatenate the two
+    into one like we did before.
+
+    If you want to do something specific with columns, you may
+    write your very own trigger function using plpgsql or other
+    procedural languages (but not SQL, unfortunately) and use it
+    instead of tsearch2 trigger.
+
+    You could however call other stored procedures from within
+    the tsearch2 function. Lets say we want to create a function to
+    remove certain characters (like the @ symbol from all
+    text).
+    
+       CREATE FUNCTION dropatsymbol(text) 
+                     RETURNS text AS 'select replace($1, \'@\', \' \');' LANGUAGE SQL;
+
+
+    Now we can use this function within the tsearch2 function on
+    the trigger.
+    
+      DROP TRIGGER tsvectorupdate ON tblmessages;
+        CREATE TRIGGER tsvectorupdate BEFORE UPDATE OR INSERT ON tblMessages
+            FOR EACH ROW EXECUTE PROCEDURE tsearch2(idxFTI, dropatsymbol, strMessage);
+        INSERT INTO tblmessages VALUES (69, 'Attempt for dropatsymbol', '[email protected]');
+
+
+    If at this point you receive an error stating: ERROR: Can't
+    find tsearch config by locale
+
+    Do not worry. You have done nothing wrong. And tsearch2 is
+    not broken. All that has happened here is that the
+    configuration is setup to use a configuration based on the
+    locale of the server. All you have to do is change your default
+    configuration, or add a new one for your specific locale. See
+    the section on TSEARCH2 CONFIGURATION.
+    
+   SELECT * FROM tblmessages WHERE intindex = 69;
+
+         intindex |         strtopic         |  strmessage   |        idxfti
+        ----------+--------------------------+---------------+-----------------------   
+                69 | Attempt for dropatsymbol | [email protected] | 'test':1 'test.com':2
+        (1 row)
+Notice that the string content was passed throught the stored
+procedure dropatsymbol. The '@' character was replaced with a
+single space ... and the output from the procedure was then stored
+in the tsvector column.
+
+    This could be useful for removing other characters from
+    indexed text, or any kind of preprocessing needed to be done on
+    the text prior to insertion into the index.
+
+    QUERYING A TABLE
+
+    There are some examples in the README.tsearch2 file for
+    querying a table. One major difference between tsearch and
+    tsearch2 is the operator ## is no longer available. Only the
+    operator @@ is defined, using the types tsvector on one side
+    and tsquery on the other side.
+
+    Lets search the indexed data for the word "Test". I indexed
+    based on the the concatenation of the strTopic, and the
+    strMessage:
+    
+        SELECT intindex, strtopic FROM tblmessages
+                                  WHERE idxfti @@ 'test'::tsquery;
+         intindex |   strtopic
+        ----------+---------------
+                1 | Testing Topic
+        (1 row)
+
+
+    The only result that matched was the row with a topic
+    "Testing Topic". Notice that the word I search for was all
+    lowercase. Let's see what happens when I query for uppercase
+    "Test".
+    
+        SELECT intindex, strtopic FROM tblmessages
+                                  WHERE idxfti @@ 'Test'::tsquery;
+         intindex | strtopic
+        ----------+----------
+        (0 rows)
+
+
+    We get zero rows returned. The reason is because when the
+    text was inserted, it was morphed to my default configuration
+    (because of the call to to_tsvector in the UPDATE statement).
+    If there was no morphing done, and the tsvector field(s)
+    contained the word 'Text', a match would have been found.
+
+    Most likely the best way to query the field is to use the
+    to_tsquery function on the right hand side of the @@ operator
+    like this:
+    
+        SELECT intindex, strtopic FROM tblmessages
+               WHERE idxfti @@ to_tsquery('default', 'Test | Zeppelin');
+         intindex |      strtopic
+        ----------+--------------------
+                1 | Testing Topic
+                7 | Classic Rock Bands
+        (2 rows)
+
+
+    That query searched for all instances of "Test" OR
+    "Zeppelin". It returned two rows: the "Testing Topic" row, and
+    the "Classic Rock Bands" row. The to_tsquery function performed
+    the correct morphology upon the parameters, and searched the
+    tsvector field appropriately.
+
+    The last example here relates to searching for a phrase, for
+    example "minority report". This poses a problem with regard to
+    tsearch2, as it doesn't index phrases, only words. But there is
+    a way around which doesn't appear to have a significant impact
+    on query time, and that is to use a query such as the
+    following:
+    
+        SELECT intindex, strTopic FROM tblmessages
+                WHERE idxfti @@ to_tsquery('default', 'gettysburg & address')
+                AND strMessage ~* '.*men are created equal.*';
+         intindex |           strtopic
+        ----------+------------------------------
+                6 | Gettysburg address quotation
+        (1 row)
+        SELECT intindex, strTopic FROM tblmessages
+                WHERE idxfti @@ to_tsquery('default', 'gettysburg & address')
+                AND strMessage ~* '.*something that does not exist.*';
+         intindex | strtopic
+        ----------+----------
+        (0 rows)
+
+
+    Of course if your indexing both strTopic and strMessage, and
+    want to search for this phrase on both, then you will have to
+    get out the brackets and extend this query a little more.
+
+    TSEARCH2 CONFIGURATION
+
+    Some words such as "and", "the", and "who" are automatically
+    not indexed, since they belong to a pre-existing dictionary of
+    "Stop Words" which tsearch2 does not perform indexing on. If
+    someone needs to search for "The Who" in your database, they
+    are going to have a tough time coming up with any results,
+    since both are ignored in the indexes. But there is a
+    solution.
+
+    Lets say we want to add a word into the stop word list for
+    english stemming. We could edit the file
+    :'/usr/local/pgsql/share/english.stop' and add a word to the
+    list. I edited mine to exclude my name from indexing:
+    
+    - Edit /usr/local/pgsql/share/english.stop
+    - Add 'andy' to the list
+    - Save the file.
+
+
+    When you connect to the database, the dict_init procedure is
+    run during initialization. And in my configuration it will read
+    the stop words from the file I just edited. If you were
+    connected to the DB while editing the stop words, you will need
+    to end the current session and re-connect. When you re-connect
+    to the database, 'andy' is no longer indexed:
+    
+        SELECT to_tsvector('default', 'Andy');
+         to_tsvector
+        ------------
+        (1 row)
+
+
+    Originally I would get the result :
+    
+        SELECT to_tsvector('default', 'Andy');
+         to_tsvector
+        ------------
+         'andi':1
+        (1 row)
+
+
+    But since I added it as a stop word, it would be ingnored on
+    the indexing. The stop word added was used in the dictionary
+    "en_stem". If I were to use a different configuration such as
+    'simple', the results would be different. There are no stop
+    words for the simple dictionary. It will just convert to lower
+    case, and index every unique word.
+    
+        SELECT to_tsvector('simple', 'Andy andy The the in out');
+                     to_tsvector
+        -------------------------------------
+         'in':5 'out':6 'the':3,4 'andy':1,2
+        (1 row)
+
+
+    All this talk about which configuration to use is leading us
+    into the actual configuration of tsearch2. In the examples in
+    this document the configuration has always been specified when
+    using the tsearch2 functions:
+    
+        SELECT to_tsvector('default', 'Testing the default config');
+        SELECT to_tsvector('simple', 'Example of simple Config');
+
+
+    The pg_ts_cfg table holds each configuration you can use
+    with the tsearch2 functions. As you can see the ts_name column
+    contains both the 'default' configurations based on the 'C'
+    locale. And the 'simple' configuration which is not based on
+    any locale.
+    
+        SELECT * from pg_ts_cfg;
+             ts_name     | prs_name |    locale
+        -----------------+----------+--------------
+         default         | default  | C
+         default_russian | default  | ru_RU.KOI8-R
+         simple          | default  |
+        (3 rows)
+
+
+    Each row in the pg_ts_cfg table contains the name of the
+    tsearch2 configuration, the name of the parser to use, and the
+    locale mapped to the configuration. There is only one parser to
+    choose from the table pg_ts_parser called 'default'. More
+    parsers could be written, but for our needs we will use the
+    default.
+
+    There are 3 configurations installed by tsearch2 initially.
+    If your locale is set to 'en_US' for example (like my laptop),
+    then as you can see there is currently no dictionary configured
+    to use with that locale. You can either set up a new
+    configuration or just use one that already exists. If I do not
+    specify which configuration to use in the to_tsvector function,
+    I receive the following error.
+    
+        SELECT to_tsvector('learning tsearch is like going to school');
+        ERROR:  Can't find tsearch config by locale
+
+
+    We will create a new configuration for use with the server
+    encoding 'en_US'. The first step is to add a new configuration
+    into the pg_ts_cfg table. We will call the configuration
+    'default_english', with the default parser and use the locale
+    'en_US'.
+    
+        INSERT INTO pg_ts_cfg (ts_name, prs_name, locale)
+               VALUES ('default_english', 'default', 'en_US');
+
+
+    We have only declared that there is a configuration called
+    'default_english'. We need to set the configuration of how
+    'default_english' will work. The next step is creating a new
+    dictionary to use. The configuration of the dictionary is
+    completlely different in tsearch2. In the prior versions to
+    make changes, you would have to re-compile your changes into
+    the tsearch.so. All of the configuration has now been moved
+    into the system tables created by executing the SQL code from
+    tsearch2.sql
+
+    Lets take a first look at the pg_ts_dict table
+    
+        ftstest=# \d pg_ts_dict
+                Table "public.pg_ts_dict"
+         Column      |  Type   | Modifiers
+        -----------------+---------+-----------
+         dict_name       | text    | not null
+         dict_init       | oid     |
+         dict_initoption | text    |
+         dict_lemmatize  | oid     | not null
+         dict_comment    | text    |
+        Indexes: pg_ts_dict_idx unique btree (dict_name)
+
+
+    The dict_name column is the name of the dictionary, for
+    example 'simple', 'en_stem' or 'ru_stem'. The dict_init column
+    is an OID of a stored procedure to run for initialization of
+    that dictionary, for example 'snb_en_init' or 'snb_ru_init'.
+    The dict_init option is used for options passed to the init
+    function for the stored procedure. In the cases of 'en_stem' or
+    'ru_stem' it is a path to a stopword file for that dictionary,
+    for example '/usr/local/pgsql/share/english.stop'. This is
+    however dictated by the dictionary. ISpell dictionaries may
+    require different options. The dict_lemmatize column is another
+    OID of a stored procedure to the function used to lemmitize,
+    for example 'snb_lemmatize'. The dict_comment column is just a
+    comment.
+
+    Next we will configure the use of a new dictionary based on
+    ISpell. We will assume you have ISpell installed on you
+    machine. (in /usr/local/lib)
+
+    First lets register the dictionary(ies) to use from ISpell.
+    We will use the english dictionary from ISpell. We insert the
+    paths to the relevant ISpell dictionary (*.hash) and affixes
+    (*.aff) files. There seems to be some question as to which
+    ISpell files are to be used. I installed ISpell from the latest
+    sources on my computer. The installation installed the
+    dictionary files with an extension of *.hash. Some
+    installations install with an extension of *.dict As far as I
+    know the two extensions are equivilant. So *.hash ==
+    *.dict.
+
+    We will also continue to use the english word stop file that
+    was installed for the en_stem dictionary. You could use a
+    different one if you like. The ISpell configuration is based on
+    the "ispell_template" dictionary installed by default with
+    tsearch2. We will use the OIDs to the stored procedures from
+    the row where the dict_name = 'ispell_template'.
+    
+        INSERT INTO pg_ts_dict
+               (SELECT 'en_ispell',
+                       dict_init,
+                       'DictFile="/usr/local/lib/english.hash",'
+                       'AffFile="/usr/local/lib/english.aff",'
+                       'StopFile="/usr/local/pgsql/share/english.stop"',
+                       dict_lexize
+                FROM pg_ts_dict
+                WHERE dict_name = 'ispell_template');
+
+
+    Next we need to set up the configuration for mapping the
+    dictionay use to the lexxem parsings. This will be done by
+    altering the pg_ts_cfgmap table. We will insert several rows,
+    specifying to using the new dictionary we installed and
+    configured for use within tsearch2. There are several type of
+    lexims we would be concerned with forcing the use of the ISpell
+    dictionary.
+    
+        INSERT INTO pg_ts_cfgmap (ts_name, tok_alias, dict_name)
+               VALUES ('default_english', 'lhword', '{en_ispell,en_stem}');
+        INSERT INTO pg_ts_cfgmap (ts_name, tok_alias, dict_name)
+               VALUES ('default_english', 'lpart_hword', '{en_ispell,en_stem}');
+        INSERT INTO pg_ts_cfgmap (ts_name, tok_alias, dict_name)
+               VALUES ('default_english', 'lword', '{en_ispell,en_stem}');
+
+
+    We have just inserted 3 records to the configuration
+    mapping, specifying that the lexem types for "lhword,
+    lpart_hword and lword" are to be stemmed using the 'en_ispell'
+    dictionary we added into pg_ts_dict, when using the
+    configuration ' default_english' which we added to
+    pg_ts_cfg.
+
+    There are several other lexem types used that we do not need
+    to specify as using the ISpell dictionary. We can simply insert
+    values using the 'simple' stemming process dictionary.
+    
+        INSERT INTO pg_ts_cfgmap
+               VALUES ('default_english', 'url', '{simple}');
+        INSERT INTO pg_ts_cfgmap
+               VALUES ('default_english', 'host', '{simple}');
+        INSERT INTO pg_ts_cfgmap
+               VALUES ('default_english', 'sfloat', '{simple}');
+        INSERT INTO pg_ts_cfgmap
+               VALUES ('default_english', 'uri', '{simple}');
+        INSERT INTO pg_ts_cfgmap
+               VALUES ('default_english', 'int', '{simple}');
+        INSERT INTO pg_ts_cfgmap
+               VALUES ('default_english', 'float', '{simple}');
+        INSERT INTO pg_ts_cfgmap
+               VALUES ('default_english', 'email', '{simple}');
+        INSERT INTO pg_ts_cfgmap
+               VALUES ('default_english', 'word', '{simple}');
+        INSERT INTO pg_ts_cfgmap
+               VALUES ('default_english', 'hword', '{simple}');
+        INSERT INTO pg_ts_cfgmap
+               VALUES ('default_english', 'nlword', '{simple}');
+        INSERT INTO pg_ts_cfgmap
+               VALUES ('default_english', 'nlpart_hword', '{simple}');
+        INSERT INTO pg_ts_cfgmap
+               VALUES ('default_english', 'part_hword', '{simple}');
+        INSERT INTO pg_ts_cfgmap
+               VALUES ('default_english', 'nlhword', '{simple}');
+        INSERT INTO pg_ts_cfgmap
+               VALUES ('default_english', 'file', '{simple}');
+        INSERT INTO pg_ts_cfgmap
+               VALUES ('default_english', 'uint', '{simple}');
+        INSERT INTO pg_ts_cfgmap
+               VALUES ('default_english', 'version', '{simple}');
+
+
+    Our addition of a configuration for 'default_english' is now
+    complete. We have successfully created a new tsearch2
+    configuration. At the same time we have also set the new
+    configuration to be our default for en_US locale.
+    
+        SELECT to_tsvector('default_english',
+                           'learning tsearch is like going to school');
+                           to_tsvector
+        --------------------------------------------------
+         'go':5 'like':4 'learn':1 'school':7 'tsearch':2
+        SELECT to_tsvector('learning tsearch is like going to school');
+                            to_tsvector
+        --------------------------------------------------
+         'go':5 'like':4 'learn':1 'school':7 'tsearch':2
+        (1 row)
+
+
+    In the case that you already have a configuration set for
+    the locale, and you are changing it to your new dictionary
+    configuration. You will have to set the old locale to NULL. If
+    we are using the 'C' locale then we would do this:
+    
+        UPDATE pg_ts_cfg SET locale=NULL WHERE locale = 'C';
+
+
+    That about wraps up the configuration of tsearch2. There is
+    much more you can do with the tables provided. This was just an
+    introduction to get things working rather quickly.
+
+    ADDING NEW DICTIONARIES TO TSEARCH2
+
+    To aid in the addition of new dictionaries to the tsearch2
+    module you can use another additional module in combination
+    with tsearch2. The gendict module is included into tsearch2
+    distribution and is available from gendict/ subdirectory.
+
+    I will not go into detail about installation and
+    instructions on how to use gendict to it's fullest extent right
+    now. You can read the README.gendict ... it has all of the
+    instructions and information you will need.
+
+    BACKING UP AND RESTORING DATABASES THAT FEATURE
+    TSEARCH2
+
+    Believe it or not, this isn't as straight forward as it
+    should be, and you will have problems trying to backup and
+    restore any database which uses tsearch2 unless you take the
+    steps shown below. And before you ask using pg_dumpall will
+    result in failure every time. These took a lot of trial and
+    error to get working, but the process as laid down below has
+    been used a dozen times now in live production environments so
+    it should work fine.
+
+    HOWEVER never rely on anyone elses instructions to backup
+    and restore a database system, always develop and understand
+    your own methodology, and test it numerous times before you
+    need to do it for real.
+
+    To Backup a PostgreSQL database that uses the tsearch2
+    module:
+
+    1) Backup any global database objects such as users and
+    groups (this step is usually only necessary when you will be
+    restoring to a virgin system)
+    
+        pg_dumpall -g > GLOBALobjects.sql
+
+
+    2) Backup the full database schema using pg_dump
+    
+        pg_dump -s DATABASE > DATABASEschema.sql
+
+
+    3) Backup the full database using pg_dump
+    
+        pg_dump -Fc DATABASE > DATABASEdata.tar
+
+
+    To Restore a PostgreSQL database that uses the tsearch2
+    module:
+
+    1) Create the blank database
+    
+        createdb DATABASE
+
+
+    2) Restore any global database objects such as users and
+    groups (this step is usually only necessary when you will be
+    restoring to a virgin system)
+    
+        psql DATABASE < GLOBALobjects.sql
+
+
+    3) Create the tsearch2 objects, functions and operators
+    
+        psql DATABASE < tsearch2.sql
+
+
+    4) Edit the backed up database schema and delete all SQL
+    commands which create tsearch2 related functions, operators and
+    data types, BUT NOT fields in table definitions that specify
+    tsvector types. If your not sure what these are, they are the
+    ones listed in tsearch2.sql. Then restore the edited schema to
+    the database
+    
+        psql DATABASE < DATABASEschema.sql
+
+
+    5) Restore the data for the database
+    
+        pg_restore -N -a -d DATABASE DATABASEdata.tar
+
+
+    If you get any errors in step 4, it will most likely be
+    because you forgot to remove an object that was created in
+    tsearch2.sql. Any errors in step 5 will mean the database
+    schema was probably restored wrongly.
+  
+
+
diff --git a/contrib/tsearch2/docs/tsearch2-guide.html b/contrib/tsearch2/docs/tsearch2-guide.html

new file mode 100644 (file)

index 0000000..2529480
--- /dev/null
+++ b/contrib/tsearch2/docs/tsearch2-guide.html
@@ -0,0 +1,1057 @@
+
+
+
+
+tsearch2 guide
+
+
+The tsearch2 Guide
+
+
+Brandon Craig Rhodes
30 June 2003
+
+This Guide introduces the reader to the PostgreSQL tsearch2 module,
+version 2.
+More formal descriptions of the module's types and functions
+are provided in the tsearch2 Reference,
+which is a companion to this document.
+You can retrieve a beta copy of the tsearch2 module from the
+GiST for PostgreSQL
+page — look under the section entitled Development History
+for the current version.
+
+First we will examine the tsvector and tsquery types
+and how they are used to search documents;
+next, we will use them to build a simple search engine in SQL;
+and finally, we will study the internals of document conversion
+and how you might tune the internals to accommodate various searching needs.
+
+Once you have tsearch2 working with PostgreSQL,
+you should be able to run the examples here exactly as they are typed.
+
+
+Table of Contents
+
+Vectors and Queries
+A Simple Search Engine
+Ranking and Position Weights
+Casting Vectors and Queries
+Parsing and Lexing
+
+
+
+
+Vectors and Queries
+
+
+This section introduces
+the two data types upon which tsearch2 search engines are based,
+and illustrates their interaction using the simplest possible case.
+The complex examples we present later on
+are merely variations and elaborations of this basic mechanism.
+
+
+The tsearch2 module allows you to index documents by the words they contain,
+and then perform very efficient searches
+for documents that contain a given combination of words.
+Preparing your document index involves two steps:
+
+Making a list of the words each document contains.
+ You must reduce each document to a tsvector
+ which lists each word that appears in the document.
+ This process offers many options,
+ because there is no requirement
+ that you must copy words into the vector
+ exactly as they appear in the document.
+ For example,
+ many developers omit frequent and content-free stop words
+ like the to reduce the size of their index;
+ others reduce different forms of the same word
+ (forked, forking, forks)
+ to a common form (fork)
+ to make search results independent of tense and case.
+ Because words are very often stored in a modified form,
+ we use the special term lexemes
+ for the word forms we actually store in the vector.
+Creating an index of the documents by lexeme.
+ This is managed automatically by tsearch2
+ when you creat a gist() index
+ on the tsvector column of a table,
+ which implements a form of the Berkeley
+ Generalized Search Tree.
+
+Once your documents are indexed,
+performing a search involves:
+
+Reducing the search terms to lexemes.
+ You must express each search you want to perform
+ as a tsquery specifying a boolean combination of lexemes.
+ Note that tsearch2 only finds exact matches
+ between the lexemes in your query and the ones in each vector —
+ even capitalization counts as a difference
+ (which is why all lexemes are usually kept lowercase).
+ So you must process search words the same way you processed document words;
+ if forking became fork in the document's tsvector,
+ then the search term forking must also become fork
+ or the search will not find the document.
+Retrieving the documents that match the query.
+ Running a SELECT ... WHERE
+ query @@ vector
+ on the table with the vector column
+ will return the documents that match your query.
+Presenting your results.
+ This final stage offers as many options
+ as turning documents into vectors.
+ You can order documents by how well they matched the search terms;
+ create a headline for each document
+ showing some of the phrases in which it uses the search terms;
+ and restrict the number of results retrieved.
+ You will of course want some way to identify each document,
+ so the user can ask for the full text of the ones he wants to read.
+
+And beyond deciding upon rules for turning documents into vectors
+and for presenting search results to users,
+you have to decide where to perform these operations —
+whether one database server
+will parse documents, perform searches, and prepare search results,
+or whether to spread the load of these operations across several machines.
+These are complicated design issues
+which we will explore later;
+in this section and the next,
+we will illustrate what can be accomplished
+using a single database server.
+
+The default tsearch2 configuration,
+which we will learn more about later,
+provides a good example of a process for reducing documents to vectors:
+
+
+=# SELECT set_curcfg('default')
+=# SELECT to_tsvector('The air smells of sea water.')
+             to_tsvector             
+-------------------------------------
+ 'air':2 'sea':5 'smell':3 'water':6
+(1 row)
+
+
+Note the complex relationship between this document and its vector.
+The vector lists only words from the document —
+spaces and punctuation have disappeared.
+Common words like the and of have been eliminated.
+The -s that makes smells a plural has been removed,
+leaving a lexeme that represents the word in its simplest form.
+And finally,
+though the vector remembers the positions in which each word appeared,
+it does not store the lexemes in that order.
+
+Keeping word positions in your vectors is optional, by the way.
+The positions are necessary for the tsearch2 ranking functions,
+which you can use to prioritize documents
+based on how often each document uses the search terms
+and whether they appear in close proximity.
+But if you do not perform ranking,
+or use your own process that ignores the word positions stored in the vector,
+then you can save space by stripping them from your vectors:
+
+
+=# SELECT strip(to_tsvector('The air smells of sea water.'))
+            strip            
+-----------------------------
+ 'air' 'sea' 'smell' 'water'
+(1 row)
+
+
+Now that we have a procedure for creating vectors,
+we can build an indexed table of vectors very simply:
+
+
+=# CREATE TABLE vectors ( vector tsvector )
+=# CREATE INDEX vector_index ON vectors USING gist(vector)
+=# INSERT INTO vectors VALUES (to_tsvector('The path forks here'))
+=# INSERT INTO vectors VALUES (to_tsvector('A crawl leads west'))
+=# INSERT INTO vectors VALUES (to_tsvector('The left fork leads northeast'))
+=# SELECT * FROM vectors
+                  vector                  
+------------------------------------------
+ 'fork':3 'path':2
+ 'lead':3 'west':4 'crawl':2
+ 'fork':3 'lead':4 'left':2 'northeast':5
+(3 rows)
+
+
+Now we can search this collection of document vectors
+using the @@ operator and a tsquery
+that specifies the combination of lexemes we are looking for.
+Note that while vectors simply list lexemes,
+queries always combine them with the operators
+‘&’ and,
+‘|’ or,
+and  ‘!’ not,
+plus parentheses for grouping.
+Some examples of the query syntax:
+
+
+ ‘find documents with the word forks in them’
+ 'forks'
+
+ ‘... with both forks and leads’
+ 'forks & leads'
+
+ ‘... with either forks or leads’
+ 'forks | leads'
+
+ ‘... with either forks or leads,
+  but without crawl’
+ '(forks|leads) & !crawl'
+
+The tsearch2 module
+provides a to_tsquery() function for creating queries
+that uses the same process as to_tsvector() uses
+to reduce words to lexemes.
+For instance,
+it will remove the -s from the plurals in the last example above:
+
+
+=# SELECT to_tsquery('(leads|forks) & !crawl')
+           to_tsquery           
+--------------------------------
+ ( 'lead' | 'fork' ) & !'crawl'
+(1 row)
+
+
+Again,
+this is critically important because the search operator @@
+only finds exact matches
+between the words in a query and the words in a vector;
+if the document vector lists the lexeme fork
+but the query looks for the plural form forks,
+the query would not match that document.
+Thanks to the symmetry between our process
+for producing vectors and queries, however,
+the above searches return correct results:
+
+
+=# SELECT * FROM vectors WHERE vector @@ to_tsquery('(leads|forks) & !crawl')
+                  vector                  
+------------------------------------------
+ 'fork':3 'path':2
+ 'fork':3 'lead':4 'left':2 'northeast':5
+(2 rows)
+
+
+You may want to try the other queries shown above,
+and perhaps invent some of your own.
+
+You should not include stop words in a query,
+since you cannot search for words you have discarded.
+If you throw out the word the when building vectors, for example,
+your index will obviously not know which documents included it.
+The to_tsquery() function will automatically detect this
+and give you an error to prevent this mistake:
+
+
+=# SELECT to_tsquery('the')
+NOTICE:  Query contains only stopword(s) or doesn't contain lexem(s), ignored
+ to_tsquery 
+------------
+ 
+(1 row)
+
+
+But if you every build vectors and queries using your own routines,
+a possibility we will discuss later,
+then you will need to enforce this rule yourself.
+
+
+Now that you understand how vectors and queries work together,
+you are prepared to tackle many additional topics:
+how to distribute searching across many servers;
+how to customize the process
+by which tsearch2 turns documents and queries into lexemes,
+or use a process of your own;
+and how to sort and display search results to your users.
+But before discussing these detailed questions,
+we will build a simple search engine
+to see how easily its basic features work together.
+
+
+A Simple Search Engine
+
+
+In this section we build a simple search engine out of SQL functions
+that use the vector and query types described in the previous section.
+While this example is simpler
+than a search engine that has to interface with the outside world,
+it will illustrate the basic principles of building a search engine,
+and better prepare you for developing your own.
+
+Building a search engine involves only a few improvements
+upon the rudimentary vector searches described in the last section.
+
+Because the user wants to read documents, not vectors,
+ you must provide some way
+ for the full text of each document to be accessed —
+ either by storing the entire text of each document in the database,
+ or storing an identifier
+ like a URL, file name, or document routing number
+ that lets you fetch the document from other storage.
+You can make it easier for user interface code to refer to each document
+ by providing a unique identifier for each document,
+ perhaps with a SERIAL column.
+Search results should be ordered by relevance.
+ If you leave word positions in your vectors,
+ you can either have PostgreSQL ORDER your results
+ BY a ranking function,
+ or you can fetch the vectors yourself and perform your own sort.
+ If you choose to ignore word positions or strip them from your vectors,
+ you will have to determine relevance yourself,
+ using either the full text of the document
+ or other information about each document you may possess.
+For each document returned by a search,
+ you will usually want to display a summary called a headline
+ that shows short excerpts
+ illustrating how the document uses the query words.
+ Headlines are usually generated from the full text of the document,
+ not from position information in the tsvector,
+ since excerpts lacking stop words, punctuation, and suffixes
+ would not be comprehensible.
+ If you store the full text of each document in the database,
+ headlines can be generated very simply by a tsearch2 function.
+ If you store your documents elsewhere,
+ then you will either have to transmit each document to the database
+ every time you want to run the headline function on it,
+ or use your own headline code outside of the database.
+
+
+We can easily construct a simple search engine
+that accomplishes these goals.
+First we build a table that, for each document,
+stores a unique identifier, the full text of the document,
+and its tsvector:
+
+
+=# CREATE TABLE docs ( id SERIAL, doc TEXT, vector tsvector )
+=# CREATE INDEX docs_index ON docs USING gist(vector);
+
+
+Note that although searches will still work
+on tables where you have neglected
+to create a gist() index over your vectors,
+they will run much more slowly
+since they will have to compare the query
+against every document vector in the table.
+
+Because the table we have created
+stores each document in two different ways —
+both as text and as a vector —
+our INSERT statements must provide the document in both forms.
+While more advanced PostgreSQL programmers
+might accomplish this with a database trigger or rule,
+for this simple example we will use a small SQL function:
+
+
+=# CREATE FUNCTION insdoc(text) RETURNS void LANGUAGE sql AS
+  'INSERT INTO docs (doc, vector) VALUES ($1, to_tsvector($1));'
+
+
+Now, by calling insdoc() several times,
+we can populate our table with documents:
+
+
+=# SELECT insdoc('A low crawl over cobbles leads inward to the west.')
+=# SELECT insdoc('The canyon runs into a mass of boulders -- dead end.')
+=# SELECT insdoc('You are crawling over cobbles in a low passage.')
+=# SELECT insdoc('Cavernous passages lead east, north, and south.')
+=# SELECT insdoc('To the east a low wide crawl slants up.')
+=# SELECT insdoc('You are in the south side chamber.')
+=# SELECT insdoc('The passage here is blocked by a recent cave-in.')
+=# SELECT insdoc('You are in a splendid chamber thirty feet high.')
+
+
+Now we can build a search function.
+Its SELECT statement is based upon
+the same @@ operation illustrated in the previous section.
+But instead of returning matching vectors,
+we return for each document
+its SERIAL identifier, so the user can retrieve it later;
+a headline that illustrates its use of the search terms;
+and a ranking with which we also order the results.
+Our search operation can be coded as a single SELECT statement
+returning its own kind of table row,
+which we call a finddoc_t:
+
+
+=# CREATE TYPE finddoc_t AS (id INTEGER, headline TEXT, rank REAL)
+=# CREATE FUNCTION finddoc(text) RETURNS SETOF finddoc_t LANGUAGE sql AS '
+   SELECT id, headline(doc, q), rank(vector, q)
+     FROM docs, to_tsquery($1) AS q
+     WHERE vector @@ q ORDER BY rank(vector, q) DESC'
+
+
+This function is a rather satisfactory search engine.
+Here is one example search,
+after which the user fetches the top-ranking document itself;
+with similar commands you can try queries of your own:
+
+
+=# SELECT * FROM finddoc('passage|crawl')
+ id |                       headline                        | rank 
+----+-------------------------------------------------------+------
+  3 | <b>crawling</b> over cobbles in a low <b>passage</b>. | 0.19
+  1 | <b>crawl</b> over cobbles leads inward to the west.   |  0.1
+  4 | <b>passages</b> lead east, north, and south.          |  0.1
+  5 | <b>crawl</b> slants up.                               |  0.1
+  7 | <b>passage</b> here is blocked by a recent  cave-in.  |  0.1
+(5 rows)
+=# SELECT doc FROM docs WHERE id = 3
+                       doc                       
+-------------------------------------------------
+ You are crawling over cobbles in a low passage.
+(1 row)
+
+
+While by default the headline() function
+surrounds matching words with <b> and </b>
+in order to distinguish them from the surrounding text,
+you can provide options that change its behavior;
+consult the tsearch2 Reference for more details about
+Headline Functions.
+
+Though a search may match hundreds or thousands of documents,
+you will usually present only ten or twenty results to the user at a time.
+This can be most easily accomplished
+by limiting your query with a LIMIT
+and an OFFSET clause —
+to display results ten at a time, for example,
+your would generate your first page of results
+with LIMIT 10 OFFSET 0,
+your second page
+with LIMIT 10 OFFSET 10,
+your third page
+with LIMIT 10 OFFSET 20,
+and so forth.
+There are two problems with this approach, however.
+
+The first problem is the strain of running the query over again
+for every page of results the user views.
+For small document collections or lightly loaded servers,
+this may not be a problem;
+but the impact can be high
+when a search must repeatedly rank and sort
+the same ten thousand results
+on an already busy server.
+So instead of selecting only one page of results,
+you will probably use LIMIT and OFFSET
+to return a few dozen or few hundred results,
+which you can cache and display to the user one page at a time.
+Whether a result cache rewards your effort
+will depend principally on the behavior of your users —
+how often they even view the second page of results, for instance.
+
+The second issue solved by caching involves consistency.
+If the database is changing while the user browses their results,
+then documents might appear and disappear as they page through them.
+In some cases the user might even miss a particular result —
+perhaps the one they were looking for —
+if, say, its rank improves from 31th to 30th
+after they load results 21–30 but before they view results 31–40.
+While many databases are static or infrequently updated,
+and will not present this problem,
+users searching very dymanic document collections
+might benefit from the stable results that caches yield.
+
+
+Having seen the features of a search engine
+implemented entirely within the database,
+we will learn about some specific tsearch2 features.
+First we will look in more detail at document ranking.
+
+
+Ranking and Position Weights
+
+
+When we built our simple search engine,
+we used the rank() function to order our results.
+Here we describe tsearch2 ranking in more detail.
+
+
+There are two functions with which tsearch2 can rank search results.
+They both use the lexeme positions listed in the tsvector,
+so you cannot rank vectors
+from which these have been removed with strip().
+The rank() function existed in older versions of OpenFTS,
+and has the feature that you can assign different weights
+to words from different sections of your document.
+The rank_cd() uses a recent technique for weighting results
+but does not allow different weight to be given
+to different sections of your document.
+
+Both ranking functions allow you to specify,
+as an optional last argument,
+whether you want their results normalized —
+whether the rank returned should be adjusted for document length.
+Specifying a last argument of 0 (zero) makes no adjustment;
+1 (one) divides the document rank
+by the logarithm of the document length;
+and 2 divides it by the plain length.
+In all of these examples we omit this optional argument,
+which is the same as specifying zero —
+we are making no adjustment for document length.
+
+The rank_cd() function uses an experimental measurement
+called cover density ranking that rewards documents
+when they make frequent use of the search terms
+that are close together in the document.
+You can read about the algorithm in more detail
+in Clarke et al.,
+ “
+>Relevance Ranking for One to Three Term Queries.”
+An optional first argument allows you to tune their formula;
+for details
+see the section on ranking
+in the Reference.
+
+The rank() function offers more flexibility
+because it pays attention to the weights
+with which you have labelled lexeme positions.
+Currently tsearch2 supports four different weight labels:
+'D', the default weight;
+and 'A', 'B', and 'C'.
+All vectors created with to_tsvector()
+assign the weight 'D' to each position,
+which as the default is not displayed when you print a vector out.
+
+If you want positions with weights other than 'D',
+you have two options:
+either you can author a vector directly through the ::tsvector
+casting operation,
+as described in the following section,
+which lets you give each position whichever weight you want;
+or you can pass a vector through the setweight() function
+which sets all of its position weights to a single value.
+An example of the latter:
+
+
+
+=# SELECT vector FROM docs WHERE id = 3
+                 vector                 
+----------------------------------------
+ 'low':8 'cobbl':5 'crawl':3 'passag':9
+(1 row)
+=# SELECT setweight(vector, 'A') FROM docs WHERE id = 3
+                 setweight                  
+--------------------------------------------
+ 'low':8A 'cobbl':5A 'crawl':3A 'passag':9A
+(1 row)
+
+
+
+Merely changing all of the weights in a vector is not very useful,
+of course,
+since this results still in all words having the same weight.
+But if we parse different parts of a document separately,
+giving each section its own weight,
+and then concatenate the vectors of each part into a single vector,
+the result can be very useful.
+We can construct a simple example
+in which document titles are given greater weight
+that text in the body of the document:
+
+
+
+=# CREATE TABLE tdocs ( id SERIAL, title TEXT, doc TEXT, vector tsvector )
+=# CREATE INDEX tdocs_index ON tdocs USING gist(vector);
+=# CREATE FUNCTION instdoc(text, text) RETURNS void LANGUAGE sql AS

+  'INSERT INTO tdocs (title, doc, vector)
+   VALUES ($1, $2, setweight(to_tsvector($1), ''A'') || to_tsvector($2));'
+
+
+
+Now words from a document title will be weighted differently
+than those in the main text
+if we provide the title and body as separate arguments:
+
+
+
+=# SELECT instdoc('Spendid Chamber',

+ 'The walls are frozen rivers of orange stone.')
+ instdoc 
+---------
+ 
+(1 row)
+=# SELECT vector FROM tdocs
+                                    vector                                    
+------------------------------------------------------------------------------
+ 'wall':4 'orang':9 'river':7 'stone':10 'frozen':6 'chamber':2A 'spendid':1A
+(1 row)
+
+
+
+Note that although the necessity is unusual,
+you can constrain search terms
+to only match words from certain sections
+by following them with a colon
+and a list of the sections in which the word can occur;
+by default this list is 'ABCD'
+so that search terms match words from all sections.
+For example,
+here we search for a word both generally,
+and then looking only for specific weights:
+
+
+
+=# SELECT title, doc FROM tdocs WHERE vector @@ to_tsquery('spendid')
+      title      |                     doc                      
+-----------------+----------------------------------------------
+ Spendid Chamber | The walls are frozen rivers of orange stone.
+(1 row)
+=# SELECT title, doc FROM tdocs WHERE vector @@ to_tsquery('spendid:A')
+      title      |                     doc                      
+-----------------+----------------------------------------------
+ Spendid Chamber | The walls are frozen rivers of orange stone.
+(1 row)
+=# SELECT title, doc FROM tdocs WHERE vector @@ to_tsquery('spendid:D')
+ title | doc 
+-------+-----
+(0 rows)
+
+
+
+
+
+

+Our examples so far use tsearch2 to parse our documents into vectors.
+When your application needs absolute control over vector content,
+you will want to use direct type casting,
+which is described in the next section.
+
+
+Casting Vectors and Queries
+
+

+While tsearch2 has powerful and flexible ways
+to process documents and turn them into document vectors,
+you will sometimes want to parse documents on your own
+and place the results directly in vectors.
+Here we show you how.
+
+
+In the preceding examples,
+we used the to_tsvector() function
+when we needed a document's text reduced to a document vector.
+We saw that the function stripped whitespace and punctuation,
+eliminated common words,
+and altered suffixes to reduce words to a common form.
+While these operations are often desirable,
+and while in the sections below
+we will gain precise control over this process,
+there are occasions on which
+you want to avoid the changes that to_tsvector() makes to text
+and specify explicitly the words that you want in your vectors.
+Or you may want to create queries directly
+rather than through to_tsquery().
+
+For example,
+you may have already developed your own routine
+for reducing your documents to searchable lexemes,
+and do not want your carefully generated terms altered
+by passing them through to_tsvector().
+Or you might be developing and debugging parsing routines of your own
+that you are not ready to load into the database.
+In either case,
+you will find that direct insertion is easily accomplished
+if you simply follow some simple rules.
+
+Vectors are created directly
+when you cast a string of whitespace separated lexemes
+to the tsvector type:
+
+
+
+=# select 'the only exit is the way you came in'::tsvector
+                     tsvector                     
+--------------------------------------------------
+ 'in' 'is' 'the' 'way' 'you' 'came' 'exit' 'only'
+(1 row)
+
+
+
+Notice that the conversion interpreted the string
+simply as a list of lexemes to be included in the vector.
+Their order was lost,
+as was the number of times each lexeme appeared.
+You must keep in mind that directly creating vectors with casting
+is not an alternate means of parsing;
+it is a way of directly entering lexemes into a vector without parsing.
+
+Queries can also be created through casting,
+if you separate lexemes with boolean operators
+rather than with whitespace.
+When creating your own vectors and queries,
+remember that the search operator @@
+finds only exact matches between query lexemes and vector lexemes
+—
+if they are not exactly the same string,
+they will not be considered a match.
+
+To include lexeme positions in your vector,
+write the positions exactly the way tsearch2 displays them
+when it prints vectors:
+by following each lexeme with a colon
+and a comma-separated list of integer positions.
+If you list a lexeme more than once,
+then all the positions listed for it are combined into a single list.
+For example,
+here are two ways of writing the same vector,
+depending on whether you mention ‘the’ twice
+or combine its positions into a list yourself:
+
+
+
+=# select 'the:1 only:2 exit:3 is:4 the:5 way:6 you:7 came:8 in:9'::tsvector
+                              tsvector                              
+--------------------------------------------------------------------
+ 'in':9 'is':4 'the':1,5 'way':6 'you':7 'came':8 'exit':3 'only':2
+(1 row)
+=# select 'the:1,5 only:2 exit:3 is:4 way:6 you:7 came:8 in:9'::tsvector
+                              tsvector                              
+--------------------------------------------------------------------
+ 'in':9 'is':4 'the':1,5 'way':6 'you':7 'came':8 'exit':3 'only':2
+(1 row)
+
+
+
+Things can get slightly tricky
+if you want to include apostrophes, backslashes, or spaces
+inside your lexemes
+(wanting to include either of the latter would be unusual,
+but they can be included if you follow the rules).
+The main problem is that the apostrophe and backslash
+are important both to PostgreSQL when it is interpreting a string,
+and to the tsvector conversion function.
+You may want to review section
+1.1.2.1,
+“String Constants”
+in the PostgreSQL documentation before proceeding.
+
+When you cast strings directly into vectors:
+
+The string is interpreted as a whitespace-separated list of lexemes,
+ any of which can be suffixed with a colon and a list of positions.
+A lexeme can be quoted by preceding it with an apostrophe,
+ in which case it runs until the next apostrophe;
+ otherwise a lexeme ends with the first whitespace or colon encountered.
+Any character preceded by a backslash,
+ including whitespace, the apostrophe, the colon, and the backslash itself,
+ loses its normal meaning and is treated as a letter.
+ Backslashes are effective
+ both inside and outside of apostrophe-quoted lexemes.
+A lexeme can be suffixed with a list of positions
+ by appending a colon and a comma-separated list of integers,
+ each of which can itself be followed by a letter
+ to designate a position weight
+ (position weights are described below).
+
+
+Here are some example strings,
+showing the lexeme you want to insert
+together with the string that the ::tsvector operator
+needs to see,
+and how you would type that string at the PostgreSQL prompt:
+
+
+
+For the lexeme...
+you need the string...
+which you can type as:
+
+nugget
+nugget
+'nugget'
+
+won't
+won't
+'won''t'
+
+pinin'
+pinin'
+'pinin'''
+
+'bout
+\'bout
+'\\''bout'
+
+white mist
+white\ mist
+'white\\ mist'
+
+or:
+'white mist'
+'''white mist'''
+
+won't budge
+won\'t\ budge
+'won\\''t\\ budge'
+
+or:
+'won\'t budge'
+'''won\\''t budge'''
+
+back\slashed
+back\\slashed
+'back\\\\slashed'
+
+
+Remember to use the quoted quoting shown at the right
+only when typing in strings as part of a PostgreSQL query.
+If you are providing strings through a library
+that automatically quotes them
+or provides them in binary form to PostgreSQL,
+then you can use the strings in the middle instead —
+suitably quoted in the language you are using, of course.
+
+Position weights are described below
+and can be written exactly as they will be displayed
+when you select a weighted vector:
+
+
+=# select 'weighty:1,3A trivial:2B,4'::tsvector
+           tsvector            
+-------------------------------
+ 'trivial':2B,4 'weighty':1,3A
+(1 row)
+
+
+
+Note that if you are composing SQL queries
+in a scripting language like Perl or Python,
+that itself considers quotes and backslashes special,
+then you may have another quoting layer to deal with
+on top of the two layers already shown above.
+In such cases you may want to write a function
+that performs the necessary quoting for you.
+
+

+Having seen how to create vectors of your own,
+it is time to learn how the native tsearch2 parser
+reduces documents to vectors.
+
+
+Parsing and Lexing
+
+

+The previous section
+described how you can bypass the parser provided by tsearch2
+and populate your table of documents
+with vectors of your own devising.
+But for those interested in the native tsearch2 facilities,
+we present here an overview of how it goes about
+reducing documents to vectors.
+
+
+The to_tsvector() function reduces documents to vectors
+in two stages.
+First, a parser breaks the input document
+into short sequences of text called tokens.
+Each token is usually a word, space, or piece of punctuation,
+though some parsers return larger and more exotic items
+like HTML tags as single tokens.
+Each token returned by the parser
+is either discarded
+or passed to a dictionary that converts it into a lexeme.
+The resulting lexemes are collected into a vector and returned.
+
+The choice of which parser and dictionaries to_tsvector() should use
+is controlled by your choice of configuration.
+The tsearch2 module comes with several configurations,
+and you can define more of your own;
+in fact the creation of a new configuration is illustrated below,
+in the section on position weights.
+
+To learn about parsing in more detail,
+we will study this example:
+
+
+=# select to_tsvector('default',

+     'The walls extend upward for well over 100 feet.')
+                       to_tsvector                        
+----------------------------------------------------------
+ '100':8 'feet':9 'wall':2 'well':6 'extend':3 'upward':4
+(1 row)
+
+
+Unlike the to_tsvector() calls used in the above examples,
+this one specifies the 'default' configuration explicitly.
+When we called to_tsvector() in earlier examples
+with only one argument,
+it used the current configuration,
+which is chosen automatically based on your LOCALE
+if that locale is mentioned in the pg_ts_cfg table
+(which is shown under the first bullet in the description below).
+If your locale is not listed in the table,
+your attempts to use the current configuration will return:
+
+
+ERROR:  Can't find tsearch2 config by locale
+
+
+You can always change the current configuration manually
+by calling the set_curcfg() function
+described in the section on
+Configurations
+in the Reference.
+
+Each configuration serves as an index into two different tables:
+in pg_ts_cfg it determines
+which parser will break our text into tokens,
+and in pg_ts_cfgmap
+it directs each token to a dictionary for processing.
+The steps in detail are:
+
+
+
+First, our text is parsed,
+using the parser listed for our configuration in the pg_ts_cfg table.
+We are using the 'default' configuration,
+so the table tells us to use the 'default' parser:
+
+
+=# SELECT * FROM pg_ts_cfg WHERE ts_name = 'default'
+ ts_name | prs_name | locale 
+---------+----------+--------
+ default | default  | C
+(1 row)
+
+
+So our text will be parsed as though we had called:
+
+
+=# select * from parse('default',

+     'The walls extend upward for well over 100 feet.')
+
+
+This breaks the text into a list of tokens
+which are each labelled with an integer type:
+
+The₁♦_{12
+>walls₁♦_{12
+>extend₁♦_{12
+>upward₁♦_{12
+>for₁♦_{12
+>well₁♦_{12
+>over₁♦_{12
+>100₂₂♦_{12
+>feet₁.₁₂
+
+Each word has been assigned type 1;
+each space (represented here by a diamond) and the period, type 12;
+and the number one hundred, type 22.
+We can retrieve the alias for each type
+through the token_type function:
+
+
+=# select * from token_type('default')

+     where tokid = 1 or tokid = 12 or tokid = 22
+ tokid | alias |      descr       
+-------+-------+------------------
+     1 | lword | Latin word
+    12 | blank | Space symbols
+    22 | uint  | Unsigned integer
+(3 rows)
+
+
+
+
+Next, the tokens are assigned to dictionaries
+by looking up their type aliases in pg_ts_cfgmap
+to determine which dictionary should process each token.
+Since we are using the 'default' configuration:
+
+
+=# select * from pg_ts_cfgmap where ts_name = 'default' and

+      (tok_alias = 'lword' or tok_alias = 'blank' or tok_alias = 'uint')
+ ts_name | tok_alias | dict_name 
+---------+-----------+-----------
+ default | lword     | {en_stem}
+ default | uint      | {simple}
+(2 rows)
+
+
+Since this map provides no dictionary for blank tokens,
+the spaces and period are simply discarded,
+leaving nine tokens,
+which are then numbered by their position:
+
+The¹
+walls²
+extend³
+upward⁴
+for⁵
+well⁶
+over⁷
+100⁸
+feet⁹
+
+
+Finally, the words are reduced to lexemes by their respective dictionaries.
+The 100 is submitted to the simple dictionary,
+which returns tokens unaltered except for making them lowercase:
+
+
+=# select lexize('simple', '100')
+ lexize 
+--------
+ {100}
+(1 row)
+
+
+The other words are submitted to en_stem
+which reduces each English word to a linguistic stem,
+and then discards stems which belong to its list of stop words;
+you can see the list of stop words
+in the file whose path is in the dict_initoption field
+of the pg_ts_dict table entry for en_stem.
+The first three words of our text illustrate respectively
+an en_stem stop word,
+a word which en_stem alters by stemming,
+and a word which en_stem leaves alone:
+
+
+=# select lexize('en_stem', 'The')
+ lexize 
+--------
+ {}
+(1 row)
+=# select lexize('en_stem', 'walls')
+ lexize 
+--------
+ {wall}
+(1 row)
+=# select lexize('en_stem', 'extend')
+  lexize  
+----------
+ {extend}
+(1 row)
+
+
+Once en_stem is done discarding stop words and stemming the rest,
+we are left with:
+
+wall²
+extend³
+upward⁴
+well⁶
+100⁸
+feet⁹
+
+Which is precisely the result of the example that began this section.
+
+Query words are stemmed by the to_tsquery() function
+using the same scheme to determine the dictionary for each token,
+with the difference that the query parser recognizes as special
+the boolean operators that separate query words.
+
+
+
+
+}

diff --git a/contrib/tsearch2/docs/tsearch2-ref.html b/contrib/tsearch2/docs/tsearch2-ref.html

new file mode 100644 (file)

index 0000000..df0faa4


--- /dev/null
+++ b/contrib/tsearch2/docs/tsearch2-ref.html
@@ -0,0 +1,448 @@
+
+
+
+
+tsearch2 reference
+
+
+The tsearch2 Reference
+
+
+Brandon Craig Rhodes
30 June 2003
+
+This Reference documents the user types and functions
+of the tsearch2 module for PostgreSQL.
+An introduction to the module is provided
+by the tsearch2 Guide,
+a companion document to this one.
+You can retrieve a beta copy of the tsearch2 module from the
+GiST for PostgreSQL
+page — look under the section entitled Development History
+for the current version.
+
+Vectors and Queries
+
+Vectors and queries both store lexemes,
+but for different purposes.
+A tsvector stores the lexemes
+of the words that are parsed out of a document,
+and can also remember the position of each word.
+A tsquery specifies a boolean condition among lexemes.
+
+Any of the following functions with a configuration argument
+can use either an integer id or textual ts_name
+to select a configuration;
+if the option is omitted, then the current configuration is used.
+For more information on the current configuration,
+read the next section on Configurations.
+
+Vector Operations
+
+
+
+ to_tsvector( [configuration,]

+ document TEXT) RETURNS tsvector
+
+ Parses a document into tokens,
+ reduces the tokens to lexemes,
+ and returns a tsvector which lists the lexemes
+ together with their positions in the document.
+ For the best description of this process,
+ see the section on Parsing and Stemming
+ in the accompanying tsearch2 Guide.
+
+ strip(vector tsvector) RETURNS tsvector
+
+ Return a vector which lists the same lexemes
+ as the given vector,
+ but which lacks any information
+ about where in the document each lexeme appeared.
+ While the returned vector is thus useless for relevance ranking,
+ it will usually be much smaller.
+
+ setweight(vector tsvector, letter) RETURNS tsvector
+
+ This function returns a copy of the input vector
+ in which every location has been labelled
+ with either the letter
+ 'A', 'B', or 'C',
+ or the default label 'D'
+ (which is the default with which new vectors are created,
+ and as such is usually not displayed).
+ These labels are retained when vectors are concatenated,
+ allowing words from different parts of a document
+ to be weighted differently by ranking functions.
+
+ vector1 || vector2
+
+ concat(vector1 tsvector, vector2 tsvector)

+ RETURNS tsvector
+
+ Returns a vector which combines the lexemes and position information
+ in the two vectors given as arguments.
+ Position weight labels (described in the previous paragraph)
+ are retained intact during the concatenation.
+ This has at least two uses.
+ First,
+ if some sections of your document
+ need be parsed with different configurations than others,
+ you can parse them separately
+ and concatenate the resulting vectors into one.
+ Second,
+ you can weight words from some sections of you document
+ more heavily than those from others by:
+ parsing the sections into separate vectors;
+ assigning the vectors different position labels
+ with the setweight() function;
+ concatenating them into a single vector;
+ and then providing a weights argument
+ to the rank() function
+ that assigns different weights to positions with different labels.
+
+ tsvector_size(vector tsvector) RETURNS INT4
+
+ Returns the number of lexemes stored in the vector.
+
+ text::tsvector RETURNS tsvector
+
+ Directly casting text to a tsvector
+ allows you to directly inject lexemes into a vector,
+ with whatever positions and position weights you choose to specify.
+ The text should be formatted
+ like the vector would be printed by the output of a SELECT.
+ See the Casting
+ section in the Guide for details.
+
+
+Query Operations
+
+
+
+ to_tsquery( [configuration,]

+ querytext text) RETURNS tsvector
+
+ Parses a query,
+ which should be single words separated by the boolean operators
+ “&” and,
+ “|” or,
+ and “!” not,
+ which can be grouped using parenthesis.
+ Each word is reduced to a lexeme using the current
+ or specified configuration.
+
+
+ querytree(query tsquery) RETURNS text
+
+ This might return a textual representation of the given query.
+
+ text::tsquery RETURNS tsquery
+
+ Directly casting text to a tsquery
+ allows you to directly inject lexemes into a query,
+ with whatever positions and position weight flags you choose to specify.
+ The text should be formatted
+ like the query would be printed by the output of a SELECT.
+ See the Casting
+ section in the Guide for details.
+
+
+Configurations
+
+A configuration specifies all of the equipment necessary
+to transform a document into a tsvector:
+the parser that breaks its text into tokens,
+and the dictionaries which then transform each token into a lexeme.
+Every call to to_tsvector() (described above)
+uses a configuration to perform its processing.
+Three configurations come with tsearch2:
+
+
+default — Indexes words and numbers,
+ using the en_stem English Snowball stemmer for Latin-alphabet words
+ and the simple dictionary for all others.
+default_russian — Indexes words and numbers,
+ using the en_stem English Snowball stemmer for Latin-alphabet words
+ and the ru_stem Russian Snowball dictionary for all others.
+simple — Processes both words and numbers
+ with the simple dictionary,
+ which neither discards any stop words nor alters them.
+
+
+The tsearch2 modules initially chooses your current configuration
+by looking for your current locale in the locale field
+of the pg_ts_cfg table described below.
+You can manipulate the current configuration yourself with these functions:
+
+
+
+ set_curcfg( id INT | ts_name TEXT

+  ) RETURNS VOID
+
+ Set the current configuration used by to_tsvector
+ and to_tsquery.
+
+ show_curcfg() RETURNS INT4
+
+ Returns the integer id of the current configuration.
+
+
+
+Each configuration is defined by a record in the pg_ts_cfg table:
+
+create table pg_ts_cfg (
+   id      int not  null primary key,
+   ts_name     text not null,
+   prs_name    text not null,
+   locale      text
+);
+
+The id and ts_name are unique values
+which identify the configuration;
+the prs_name specifies which parser the configuration uses.
+Once this parser has split document text into tokens,
+the type of each resulting token —
+or, more specifically, the type's lex_alias
+as specified in the parser's lexem_type() table —
+is searched for together with the configuration's ts_name
+in the pg_ts_cfgmap table:
+
+create table pg_ts_cfgmap (
+   ts_name     text not null,
+   lex_alias   text not null,
+   dict_name   text[],
+   primary key (ts_name,lex_alias)
+);
+
+Those tokens whose types are not listed are discarded.
+The remaining tokens are assigned integer positions,
+starting with 1 for the first token in the document,
+and turned into lexemes with the help of the dictionaries
+whose names are given in the dict_name array for their type.
+These dictionaries are tried in order,
+stopping either with the first one to return a lexeme for the token,
+or discarding the token if no dictionary returns a lexeme for it.
+
+Parsers
+
+Each parser is defined by a record in the pg_ts_parser table:
+
+create table pg_ts_parser (
+   prs_id      int not null primary key,
+   prs_name    text not null,
+   prs_start   oid not null,
+   prs_getlexem    oid not null,
+   prs_end     oid not null,
+   prs_headline    oid not null,
+   prs_lextype oid not null,
+   prs_comment text
+);
+
+The prs_id and prs_name uniquely identify the parser,
+while prs_comment usually describes its name and version
+for the reference of users.
+The other items identify the low-level functions
+which make the parser operate,
+and are only of interest to someone writing a parser of their own.
+
+The tsearch2 module comes with one parser named default
+which is suitable for parsing most plain text and HTML documents.
+
+Each parser argument below
+must designate a parser with either an integer prs_id
+or a textual prs_name;
+the current parser is used when this argument is omitted.
+
+
+
+ CREATE FUNCTION set_curprs(parser) RETURNS VOID
+
+ Selects a current parser
+ which will be used when any of the following functions
+ are called without a parser as an argument.
+
+ CREATE FUNCTION lexem_type(

+  [ parser ]
+  ) RETURNS SETOF lexemtype
+
+ Returns a table which defines and describes
+ each kind of token the parser may produce as output.
+ For each token type the table gives the lexid
+ which the parser will label each token of that type,
+ the alias which names the token type,
+ and a short description descr for the user to read.
+
+ CREATE FUNCTION parse(

+  [ parser, ] document TEXT
+  ) RETURNS SETOF lexemtype
+
+ Parses the given document and returns a series of records,
+ one for each token produced by parsing.
+ Each token includes a lexid giving its type
+ and a lexem which gives its content.
+
+
+Dictionaries
+
+Dictionaries take textual tokens as input,
+usually those produced by a parser,
+and return lexemes which are usually some reduced form of the token.
+Among the dictionaries which come installed with tsearch2 are:
+
+
+simple simply folds uppercase letters to lowercase
+ before returning the word.
+en_stem runs an English Snowball stemmer on each word
+ that attempts to reduce the various forms of a verb or noun
+ to a single recognizable form.
+ru_stem runs a Russian Snowball stemmer on each word.
+
+
+Each dictionary is defined by an entry in the pg_ts_dict table:
+
+CREATE TABLE pg_ts_dict (
+   dict_id     int not null primary key,
+   dict_name   text not null,
+   dict_init   oid,
+   dict_initoption text,
+   dict_lemmatize  oid not null,
+   dict_comment    text
+);
+
+The dict_id and dict_name
+serve as unique identifiers for the dictionary.
+The meaning of the dict_initoption varies among dictionaries,
+but for the built-in Snowball dictionaries
+it specifies a file from which stop words should be read.
+The dict_comment is a human-readable description of the dictionary.
+The other fields are internal function identifiers
+useful only to developers trying to implement their own dictionaries.
+
+The argument named dictionary
+in each of the following functions
+should be either an integer dict_id or a textual dict_name
+identifying which dictionary should be used for the operation;
+if omitted then the current dictionary is used.
+
+
+
+ CREATE FUNCTION set_curdict(dictionary) RETURNS VOID
+
+ Selects a current dictionary for use by functions
+ that do not select a dictionary explicitly.
+
+ CREATE FUNCTION lexize(

+ [ dictionary, ] word text)
+ RETURNS TEXT[]
+
+ Reduces a single word to a lexeme.
+ Note that lexemes are arrays of zero or more strings,
+ since in some languages there might be several base words
+ from which an inflected form could arise.
+
+
+Ranking
+
+Ranking attempts to measure how relevant documents are to particular queries
+by inspecting the number of times each search word appears in the document,
+and whether different search terms occur near each other.
+Note that this information is only available in unstripped vectors —
+ranking functions will only return a useful result
+for a tsvector which still has position information!
+
+Both of these ranking functions
+take an integer normalization option
+that specifies whether a document's length should impact its rank.
+This is often desirable,
+since a hundred-word document with five instances of a search word
+is probably more relevant than a thousand-word document with five instances.
+The option can have the values:
+
+
+0 (the default) ignores document length.
+1 divides the rank by the logarithm of the length.
+2 divides the rank by the length itself.
+
+
+The two ranking functions currently available are:
+
+
+
+ CREATE FUNCTION rank(

+  [ weights float4[], ]
+  vector tsvector, query tsquery,
+  [ normalization int4 ]

+  ) RETURNS float4
+
+ This is the ranking function from the old version of OpenFTS,
+ and offers the ability to weight word instances more heavily
+ depending on how you have classified them.
+ The weights specify how heavily to weight each category of word:
+ 
+>{D-weight, A-weight, B-weight, C-weight}
+ If no weights are provided, then these defaults are used:
+ {0.1, 0.2, 0.4, 1.0}
+ Often weights are used to mark words from special areas of the document,
+ like the title or an initial abstract,
+ and make them more or less important than words in the document body.
+
+ CREATE FUNCTION rank_cd(

+  [ K int4, ]
+  vector tsvector, query tsquery,
+  [ normalization int4 ]

+  ) RETURNS float4
+
+ This function computes the cover density ranking
+ for the given document vector and query,
+ as described in Clarke, Cormack, and Tudhope's
+ “
+>Relevance Ranking for One to Three Term Queries”
+ in the 1999 Information Processing and Management.
+ The value K is one of the values from their formula,
+ and defaults to K=4.
+ The examples in their paper K=16;
+ we can roughly describe the term
+ as stating how far apart two search terms can fall
+ before the formula begins penalizing them for lack of proximity.
+
+
+Headlines
+
+
+
+ CREATE FUNCTION headline(

+  [ id int4, | ts_name text, ]
+  document text, query tsquery,
+  [ options text ]

+  ) RETURNS text
+
+ Every form of the the headline() function
+ accepts a document along with a query,
+ and returns one or more ellipse-separated excerpts from the document
+ in which terms from the query are highlighted.
+ The configuration with which to parse the document
+ can be specified by either its id or ts_name;
+ if none is specified that the current configuration is used instead.
+ 
+ An options string if provided should be a comma-separated list
+ of one or more ‘option=value’ pairs.
+ The available options are:
+ 
+  StartSel, StopSel —
+   the strings with which query words appearing in the document
+   should be delimited to distinguish them from other excerpted words.
+  MaxWords, MinWords —
+   limits on the shortest and longest headlines you will accept.
+  ShortWord —
+   this prevents your headline from beginning or ending
+   with a word which has this many characters or less.
+   The default value of 3 should eliminate most English
+   conjunctions and articles.
+ 
+ Any unspecified options receive these defaults:
+ 
+StartSel=<b>, StopSel=</b>, MaxWords=35, MinWords=15, ShortWord=3
+ 
+
+
+
+


diff --git a/contrib/tsearch2/expected/tsearch2.out b/contrib/tsearch2/expected/tsearch2.out

new file mode 100644 (file)

index 0000000..a842c5b


--- /dev/null
+++ b/contrib/tsearch2/expected/tsearch2.out
@@ -0,0 +1,2055 @@
+--
+-- first, define the datatype.  Turn off echoing so that expected file
+-- does not depend on contents of seg.sql.
+--
+\set ECHO none
+psql:tsearch2.sql:13: NOTICE:  CREATE TABLE / PRIMARY KEY will create implicit index 'pg_ts_dict_pkey' for table 'pg_ts_dict'
+psql:tsearch2.sql:145: NOTICE:  CREATE TABLE / PRIMARY KEY will create implicit index 'pg_ts_parser_pkey' for table 'pg_ts_parser'
+psql:tsearch2.sql:244: NOTICE:  CREATE TABLE / PRIMARY KEY will create implicit index 'pg_ts_cfg_pkey' for table 'pg_ts_cfg'
+psql:tsearch2.sql:251: NOTICE:  CREATE TABLE / PRIMARY KEY will create implicit index 'pg_ts_cfgmap_pkey' for table 'pg_ts_cfgmap'
+psql:tsearch2.sql:339: NOTICE:  ProcedureCreate: type tsvector is not yet defined
+psql:tsearch2.sql:344: NOTICE:  Argument type "tsvector" is only a shell
+psql:tsearch2.sql:398: NOTICE:  ProcedureCreate: type tsquery is not yet defined
+psql:tsearch2.sql:403: NOTICE:  Argument type "tsquery" is only a shell
+psql:tsearch2.sql:545: NOTICE:  ProcedureCreate: type gtsvector is not yet defined
+psql:tsearch2.sql:550: NOTICE:  Argument type "gtsvector" is only a shell
+--tsvector
+SELECT '1'::tsvector;
+ tsvector 
+----------
+ '1'
+(1 row)
+
+SELECT '1 '::tsvector;
+ tsvector 
+----------
+ '1'
+(1 row)
+
+SELECT ' 1'::tsvector;
+ tsvector 
+----------
+ '1'
+(1 row)
+
+SELECT ' 1 '::tsvector;
+ tsvector 
+----------
+ '1'
+(1 row)
+
+SELECT '1 2'::tsvector;
+ tsvector 
+----------
+ '1' '2'
+(1 row)
+
+SELECT '\'1 2\''::tsvector;
+ tsvector 
+----------
+ '1 2'
+(1 row)
+
+SELECT '\'1 \\\'2\''::tsvector;
+ tsvector 
+----------
+ '1 \'2'
+(1 row)
+
+SELECT '\'1 \\\'2\'3'::tsvector;
+  tsvector   
+-------------
+ '3' '1 \'2'
+(1 row)
+
+SELECT '\'1 \\\'2\' 3'::tsvector;
+  tsvector   
+-------------
+ '3' '1 \'2'
+(1 row)
+
+SELECT '\'1 \\\'2\' \' 3\' 4 '::tsvector;
+     tsvector     
+------------------
+ '4' ' 3' '1 \'2'
+(1 row)
+
+select '\'w\':4A,3B,2C,1D,5 a:8';
+       ?column?        
+-----------------------
+ 'w':4A,3B,2C,1D,5 a:8
+(1 row)
+
+select 'a:3A b:2a'::tsvector || 'ba:1234 a:1B';
+          ?column?          
+----------------------------
+ 'a':3A,4B 'b':2A 'ba':1237
+(1 row)
+
+select setweight('w:12B w:13* w:12,5,6 a:1,3* a:3 w asd:1dc asd zxc:81,567,222A'::tsvector, 'c');
+                        setweight                         
+----------------------------------------------------------
+ 'a':1C,3C 'w':5C,6C,12C,13C 'asd':1C 'zxc':81C,222C,567C
+(1 row)
+
+select strip('w:12B w:13* w:12,5,6 a:1,3* a:3 w asd:1dc asd'::tsvector);
+     strip     
+---------------
+ 'a' 'w' 'asd'
+(1 row)
+
+--tsquery
+SELECT '1'::tsquery;
+ tsquery 
+---------
+ '1'
+(1 row)
+
+SELECT '1 '::tsquery;
+ tsquery 
+---------
+ '1'
+(1 row)
+
+SELECT ' 1'::tsquery;
+ tsquery 
+---------
+ '1'
+(1 row)
+
+SELECT ' 1 '::tsquery;
+ tsquery 
+---------
+ '1'
+(1 row)
+
+SELECT '\'1 2\''::tsquery;
+ tsquery 
+---------
+ '1 2'
+(1 row)
+
+SELECT '\'1 \\\'2\''::tsquery;
+ tsquery 
+---------
+ '1 \'2'
+(1 row)
+
+SELECT '!1'::tsquery;
+ tsquery 
+---------
+ !'1'
+(1 row)
+
+SELECT '1|2'::tsquery;
+  tsquery  
+-----------
+ '1' | '2'
+(1 row)
+
+SELECT '1|!2'::tsquery;
+  tsquery   
+------------
+ '1' | !'2'
+(1 row)
+
+SELECT '!1|2'::tsquery;
+  tsquery   
+------------
+ !'1' | '2'
+(1 row)
+
+SELECT '!1|!2'::tsquery;
+   tsquery   
+-------------
+ !'1' | !'2'
+(1 row)
+
+SELECT '!(!1|!2)'::tsquery;
+     tsquery      
+------------------
+ !( !'1' | !'2' )
+(1 row)
+
+SELECT '!(!1|2)'::tsquery;
+     tsquery     
+-----------------
+ !( !'1' | '2' )
+(1 row)
+
+SELECT '!(1|!2)'::tsquery;
+     tsquery     
+-----------------
+ !( '1' | !'2' )
+(1 row)
+
+SELECT '!(1|2)'::tsquery;
+    tsquery     
+----------------
+ !( '1' | '2' )
+(1 row)
+
+SELECT '1&2'::tsquery;
+  tsquery  
+-----------
+ '1' & '2'
+(1 row)
+
+SELECT '!1&2'::tsquery;
+  tsquery   
+------------
+ !'1' & '2'
+(1 row)
+
+SELECT '1&!2'::tsquery;
+  tsquery   
+------------
+ '1' & !'2'
+(1 row)
+
+SELECT '!1&!2'::tsquery;
+   tsquery   
+-------------
+ !'1' & !'2'
+(1 row)
+
+SELECT '(1&2)'::tsquery;
+  tsquery  
+-----------
+ '1' & '2'
+(1 row)
+
+SELECT '1&(2)'::tsquery;
+  tsquery  
+-----------
+ '1' & '2'
+(1 row)
+
+SELECT '!(1)&2'::tsquery;
+  tsquery   
+------------
+ !'1' & '2'
+(1 row)
+
+SELECT '!(1&2)'::tsquery;
+    tsquery     
+----------------
+ !( '1' & '2' )
+(1 row)
+
+SELECT '1|2&3'::tsquery;
+     tsquery     
+-----------------
+ '1' | '2' & '3'
+(1 row)
+
+SELECT '1|(2&3)'::tsquery;
+     tsquery     
+-----------------
+ '1' | '2' & '3'
+(1 row)
+
+SELECT '(1|2)&3'::tsquery;
+       tsquery       
+---------------------
+ ( '1' | '2' ) & '3'
+(1 row)
+
+SELECT '1|2&!3'::tsquery;
+     tsquery      
+------------------
+ '1' | '2' & !'3'
+(1 row)
+
+SELECT '1|!2&3'::tsquery;
+     tsquery      
+------------------
+ '1' | !'2' & '3'
+(1 row)
+
+SELECT '!1|2&3'::tsquery;
+     tsquery      
+------------------
+ !'1' | '2' & '3'
+(1 row)
+
+SELECT '!1|(2&3)'::tsquery;
+     tsquery      
+------------------
+ !'1' | '2' & '3'
+(1 row)
+
+SELECT '!(1|2)&3'::tsquery;
+       tsquery        
+----------------------
+ !( '1' | '2' ) & '3'
+(1 row)
+
+SELECT '(!1|2)&3'::tsquery;
+       tsquery        
+----------------------
+ ( !'1' | '2' ) & '3'
+(1 row)
+
+SELECT '1|(2|(4|(5|6)))'::tsquery;
+                 tsquery                 
+-----------------------------------------
+ '1' | ( '2' | ( '4' | ( '5' | '6' ) ) )
+(1 row)
+
+SELECT '1|2|4|5|6'::tsquery;
+                 tsquery                 
+-----------------------------------------
+ ( ( ( '1' | '2' ) | '4' ) | '5' ) | '6'
+(1 row)
+
+SELECT '1&(2&(4&(5&6)))'::tsquery;
+           tsquery           
+-----------------------------
+ '1' & '2' & '4' & '5' & '6'
+(1 row)
+
+SELECT '1&2&4&5&6'::tsquery;
+           tsquery           
+-----------------------------
+ '1' & '2' & '4' & '5' & '6'
+(1 row)
+
+SELECT '1&(2&(4&(5|6)))'::tsquery;
+             tsquery             
+---------------------------------
+ '1' & '2' & '4' & ( '5' | '6' )
+(1 row)
+
+SELECT '1&(2&(4&(5|!6)))'::tsquery;
+             tsquery              
+----------------------------------
+ '1' & '2' & '4' & ( '5' | !'6' )
+(1 row)
+
+SELECT '1&(\'2\'&(\' 4\'&(\\|5 | \'6 \\\' !|&\')))'::tsquery;
+                 tsquery                  
+------------------------------------------
+ '1' & '2' & ' 4' & ( '|5' | '6 \' !|&' )
+(1 row)
+
+SELECT '\'the wether\':dc & \' sKies \':BC & a:d b:a';
+                 ?column?                 
+------------------------------------------
+ 'the wether':dc & ' sKies ':BC & a:d b:a
+(1 row)
+
+select lexize('simple', 'ASD56 hsdkf');
+     lexize      
+-----------------
+ {"asd56 hsdkf"}
+(1 row)
+
+select lexize('en_stem', 'SKIES Problems identity');
+          lexize          
+--------------------------
+ {"skies problems ident"}
+(1 row)
+
+select * from token_type('default');
+ tokid |    alias     |               descr               
+-------+--------------+-----------------------------------
+     1 | lword        | Latin word
+     2 | nlword       | Non-latin word
+     3 | word         | Word
+     4 | email        | Email
+     5 | url          | URL
+     6 | host         | Host
+     7 | sfloat       | Scientific notation
+     8 | version      | VERSION
+     9 | part_hword   | Part of hyphenated word
+    10 | nlpart_hword | Non-latin part of hyphenated word
+    11 | lpart_hword  | Latin part of hyphenated word
+    12 | blank        | Space symbols
+    13 | tag          | HTML Tag
+    14 | http         | HTTP head
+    15 | hword        | Hyphenated word
+    16 | lhword       | Latin hyphenated word
+    17 | nlhword      | Non-latin hyphenated word
+    18 | uri          | URI
+    19 | file         | File or path name
+    20 | float        | Decimal notation
+    21 | int          | Signed integer
+    22 | uint         | Unsigned integer
+    23 | entity       | HTML Entity
+(23 rows)
+
+select * from parse('default', '345 [email protected] \' http://www.com/ http://aew.werc.ewr/?ad=qwe&dw 1aew.werc.ewr/?ad=qwe&dw 2aew.werc.ewr http://3aew.werc.ewr/?ad=qwe&dw http://4aew.werc.ewr http://5aew.werc.ewr:8100/?  ad=qwe&dw 6aew.werc.ewr:8100/?ad=qwe&dw 7aew.werc.ewr:8100/?ad=qwe&dw=%20%32 +4.0e-10 qwe qwe qwqwe 234.435 455 5.005 [email protected] qwe-wer asdf qwer jf sdjk ewr1> ewri2 ">
+/usr/local/fff /awdf/dwqe/4325 rewt/ewr wefjn /wqe-324/ewr gist.h gist.h.c gist.c. readline 4.2 4.2. 4.2, readline-4.2 readline-4.2. 234 
+ wow  < jqw <> qwerty');
+ tokid |                token                 
+-------+--------------------------------------
+    22 | 345
+    12 |  
+     4 | [email protected]
+    12 |  
+    12 | '
+    12 |  
+    14 | http://
+     6 | www.com
+    12 | /
+    12 |  
+    14 | http://
+     5 | aew.werc.ewr/?ad=qwe&dw
+     6 | aew.werc.ewr
+    18 | /?ad=qwe&dw
+    12 |  
+     5 | 1aew.werc.ewr/?ad=qwe&dw
+     6 | 1aew.werc.ewr
+    18 | /?ad=qwe&dw
+    12 |  
+     6 | 2aew.werc.ewr
+    12 |  
+    14 | http://
+     5 | 3aew.werc.ewr/?ad=qwe&dw
+     6 | 3aew.werc.ewr
+    18 | /?ad=qwe&dw
+    12 |  
+    14 | http://
+     6 | 4aew.werc.ewr
+    12 |  
+    14 | http://
+     5 | 5aew.werc.ewr:8100/?
+     6 | 5aew.werc.ewr
+    18 | :8100/?
+    12 |   
+     1 | ad
+    12 | =
+     1 | qwe
+    12 | &
+     1 | dw
+    12 |  
+     5 | 6aew.werc.ewr:8100/?ad=qwe&dw
+     6 | 6aew.werc.ewr
+    18 | :8100/?ad=qwe&dw
+    12 |  
+     5 | 7aew.werc.ewr:8100/?ad=qwe&dw=%20%32
+     6 | 7aew.werc.ewr
+    18 | :8100/?ad=qwe&dw=%20%32
+    12 |  
+     7 | +4.0e-10
+    12 |  
+     1 | qwe
+    12 |  
+     1 | qwe
+    12 |  
+     1 | qwqwe
+    12 |  
+    20 | 234.435
+    12 |  
+    22 | 455
+    12 |  
+    20 | 5.005
+    12 |  
+     4 | [email protected]
+    12 |  
+    16 | qwe-wer
+    11 | qwe
+    12 | -
+    11 | wer
+    12 |  
+     1 | asdf
+    12 |  
+    13 |  
+     1 | qwer
+    12 |  
+     1 | jf
+    12 |  
+     1 | sdjk
+    13 |  
+    12 |  
+     3 | ewr1
+    12 | >
+    12 |  
+     3 | ewri2
+    12 |  
+    13 |  
+    12 | 
+
+    19 | /usr/local/fff
+    12 |  
+    19 | /awdf/dwqe/4325
+    12 |  
+    19 | rewt/ewr
+    12 |  
+     1 | wefjn
+    12 |  
+    19 | /wqe-324/ewr
+    12 |  
+     6 | gist.h
+    12 |  
+     6 | gist.h.c
+    12 |  
+     6 | gist.c
+    12 | .
+    12 |  
+     1 | readline
+    12 |  
+    20 | 4.2
+    12 |  
+    20 | 4.2
+    12 | .
+    12 |  
+    20 | 4.2
+    12 | ,
+    12 |  
+    15 | readline-4
+    11 | readline
+    12 | -
+    20 | 4.2
+    12 |  
+    15 | readline-4
+    11 | readline
+    12 | -
+    20 | 4.2
+    12 | .
+    12 |  
+    22 | 234
+    12 |  
+
+    13 |  
+    12 |  
+     1 | wow
+    12 |   
+    12 | <
+    12 |  
+     1 | jqw
+    12 |  
+    12 | <
+    12 | >
+    12 |  
+     1 | qwerty
+(138 rows)
+
+SELECT to_tsvector('default', '345 [email protected] \' http://www.com/ http://aew.werc.ewr/?ad=qwe&dw 1aew.werc.ewr/?ad=qwe&dw 2aew.werc.ewr http://3aew.werc.ewr/?ad=qwe&dw http://4aew.werc.ewr http://5aew.werc.ewr:8100/?  ad=qwe&dw 6aew.werc.ewr:8100/?ad=qwe&dw 7aew.werc.ewr:8100/?ad=qwe&dw=%20%32 +4.0e-10 qwe qwe qwqwe 234.435 455 5.005 [email protected] qwe-wer asdf qwer jf sdjk ewr1> ewri2 ">
+/usr/local/fff /awdf/dwqe/4325 rewt/ewr wefjn /wqe-324/ewr gist.h gist.h.c gist.c. readline 4.2 4.2. 4.2, readline-4.2 readline-4.2. 234 
+ wow  < jqw <> qwerty');
+                                                                                                                                                                                                                                                                                                                                                                                                                                               to_tsvector                                                                                                                                                                                                                                                                                                                                                                                                                                                
+----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
+ 'ad':18 'dw':20 'jf':40 '234':62 '345':1 '4.2':53,54,55,58,61 '455':32 'jqw':64 'qwe':19,28,29,36 'wer':37 'wow':63 'asdf':38 'ewr1':42 'qwer':39 'sdjk':41 '5.005':33 'ewri2':43 'qwqwe':30 'wefjn':47 'gist.c':51 'gist.h':49 'qwerti':65 '234.435':31 ':8100/?':17 'qwe-wer':35 'readlin':52,57,60 'www.com':3 '+4.0e-10':27 'gist.h.c':50 'rewt/ewr':46 '[email protected]':2 'readline-4':56,59 '/?ad=qwe&dw':6,9,13 '/wqe-324/ewr':48 'aew.werc.ewr':5 '1aew.werc.ewr':8 '2aew.werc.ewr':10 '3aew.werc.ewr':12 '4aew.werc.ewr':14 '5aew.werc.ewr':16 '6aew.werc.ewr':22 '7aew.werc.ewr':25 '/usr/local/fff':44 '/awdf/dwqe/4325':45 ':8100/?ad=qwe&dw':23 '[email protected]':34 '5aew.werc.ewr:8100/?':15 ':8100/?ad=qwe&dw=%20%32':26 'aew.werc.ewr/?ad=qwe&dw':4 '1aew.werc.ewr/?ad=qwe&dw':7 '3aew.werc.ewr/?ad=qwe&dw':11 '6aew.werc.ewr:8100/?ad=qwe&dw':21 '7aew.werc.ewr:8100/?ad=qwe&dw=%20%32':24
+(1 row)
+
+SELECT length(to_tsvector('default', '345 qw'));
+ length 
+--------
+      2
+(1 row)
+
+SELECT length(to_tsvector('default', '345 [email protected] \' http://www.com/ http://aew.werc.ewr/?ad=qwe&dw 1aew.werc.ewr/?ad=qwe&dw 2aew.werc.ewr http://3aew.werc.ewr/?ad=qwe&dw http://4aew.werc.ewr http://5aew.werc.ewr:8100/?  ad=qwe&dw 6aew.werc.ewr:8100/?ad=qwe&dw 7aew.werc.ewr:8100/?ad=qwe&dw=%20%32 +4.0e-10 qwe qwe qwqwe 234.435 455 5.005 [email protected] qwe-wer asdf qwer jf sdjk ewr1> ewri2 ">
+/usr/local/fff /awdf/dwqe/4325 rewt/ewr wefjn /wqe-324/ewr gist.h gist.h.c gist.c. readline 4.2 4.2. 4.2, readline-4.2 readline-4.2. 234 
+ wow  < jqw <> qwerty'));
+ length 
+--------
+     53
+(1 row)
+
+select to_tsquery('default', 'qwe & sKies '); 
+  to_tsquery   
+---------------
+ 'qwe' & 'sky'
+(1 row)
+
+select to_tsquery('simple', 'qwe & sKies '); 
+   to_tsquery    
+-----------------
+ 'qwe' & 'skies'
+(1 row)
+
+select to_tsquery('default', '\'the wether\':dc & \'           sKies \':BC ');
+       to_tsquery       
+------------------------
+ 'wether':CD & 'sky':BC
+(1 row)
+
+select 'a b:89  ca:23A,64b d:34c'::tsvector @@ 'd:AC & ca';
+ ?column? 
+----------
+ t
+(1 row)
+
+select 'a b:89  ca:23A,64b d:34c'::tsvector @@ 'd:AC & ca:B';
+ ?column? 
+----------
+ t
+(1 row)
+
+select 'a b:89  ca:23A,64b d:34c'::tsvector @@ 'd:AC & ca:A';
+ ?column? 
+----------
+ t
+(1 row)
+
+select 'a b:89  ca:23A,64b d:34c'::tsvector @@ 'd:AC & ca:C';
+ ?column? 
+----------
+ f
+(1 row)
+
+select 'a b:89  ca:23A,64b d:34c'::tsvector @@ 'd:AC & ca:CB';
+ ?column? 
+----------
+ t
+(1 row)
+
+CREATE TABLE test_tsvector( t text, a tsvector );
+\copy test_tsvector from 'data/test_tsearch.data'
+SELECT count(*) FROM test_tsvector WHERE a @@ 'wr|qh';
+ count 
+-------
+   158
+(1 row)
+
+SELECT count(*) FROM test_tsvector WHERE a @@ 'wr&qh';
+ count 
+-------
+    17
+(1 row)
+
+SELECT count(*) FROM test_tsvector WHERE a @@ 'eq&yt';
+ count 
+-------
+     6
+(1 row)
+
+SELECT count(*) FROM test_tsvector WHERE a @@ 'eq|yt';
+ count 
+-------
+    98
+(1 row)
+
+SELECT count(*) FROM test_tsvector WHERE a @@ '(eq&yt)|(wr&qh)';
+ count 
+-------
+    23
+(1 row)
+
+SELECT count(*) FROM test_tsvector WHERE a @@ '(eq|yt)&(wr|qh)';
+ count 
+-------
+    39
+(1 row)
+
+create index wowidx on test_tsvector using gist (a);
+set enable_seqscan=off;
+SELECT count(*) FROM test_tsvector WHERE a @@ 'wr|qh';
+ count 
+-------
+   158
+(1 row)
+
+SELECT count(*) FROM test_tsvector WHERE a @@ 'wr&qh';
+ count 
+-------
+    17
+(1 row)
+
+SELECT count(*) FROM test_tsvector WHERE a @@ 'eq&yt';
+ count 
+-------
+     6
+(1 row)
+
+SELECT count(*) FROM test_tsvector WHERE a @@ 'eq|yt';
+ count 
+-------
+    98
+(1 row)
+
+SELECT count(*) FROM test_tsvector WHERE a @@ '(eq&yt)|(wr&qh)';
+ count 
+-------
+    23
+(1 row)
+
+SELECT count(*) FROM test_tsvector WHERE a @@ '(eq|yt)&(wr|qh)';
+ count 
+-------
+    39
+(1 row)
+
+select set_curcfg('default');
+ set_curcfg 
+------------
+ 
+(1 row)
+
+CREATE TRIGGER tsvectorupdate
+BEFORE UPDATE OR INSERT ON test_tsvector
+FOR EACH ROW EXECUTE PROCEDURE tsearch2(a, t);
+SELECT count(*) FROM test_tsvector WHERE a @@ to_tsquery('345&qwerty');
+ count 
+-------
+     0
+(1 row)
+
+INSERT INTO test_tsvector (t) VALUES ('345 qwerty');
+SELECT count(*) FROM test_tsvector WHERE a @@ to_tsquery('345&qwerty');
+ count 
+-------
+     1
+(1 row)
+
+UPDATE test_tsvector SET t = null WHERE t = '345 qwerty';
+SELECT count(*) FROM test_tsvector WHERE a @@ to_tsquery('345&qwerty');
+ count 
+-------
+     0
+(1 row)
+
+drop trigger tsvectorupdate on test_tsvector;
+create function wow(text) returns text as 'select $1 || \' copyright\'; ' language sql;
+create trigger tsvectorupdate before update or insert on test_tsvector
+for each row execute procedure tsearch2(a, wow, t);
+insert into test_tsvector (t) values ('345 qwerty');
+select count(*) FROM test_tsvector WHERE a @@ to_tsquery('345&qwerty');
+ count 
+-------
+     1
+(1 row)
+
+select count(*) FROM test_tsvector WHERE a @@ to_tsquery('copyright');
+ count 
+-------
+     1
+(1 row)
+
+select rank(' a:1 s:2C d g'::tsvector, 'a | s');
+ rank 
+------
+ 0.28
+(1 row)
+
+select rank(' a:1 s:2B d g'::tsvector, 'a | s');
+ rank 
+------
+ 0.46
+(1 row)
+
+select rank(' a:1 s:2 d g'::tsvector, 'a | s');
+ rank 
+------
+ 0.19
+(1 row)
+
+select rank(' a:1 s:2C d g'::tsvector, 'a & s');
+   rank   
+----------
+ 0.140153
+(1 row)
+
+select rank(' a:1 s:2B d g'::tsvector, 'a & s');
+   rank   
+----------
+ 0.198206
+(1 row)
+
+select rank(' a:1 s:2 d g'::tsvector, 'a & s');
+   rank    
+-----------
+ 0.0991032
+(1 row)
+
+insert into test_tsvector (t) values ('foo bar foo the over foo qq bar');
+select * from stat('select a from test_tsvector') order by ndoc desc, nentry desc, word;
+   word    | ndoc | nentry 
+-----------+------+--------
+ qq        |  109 |    109
+ qt        |  102 |    102
+ qe        |  100 |    100
+ qh        |   98 |     98
+ qw        |   98 |     98
+ qa        |   97 |     97
+ ql        |   94 |     94
+ qs        |   94 |     94
+ qi        |   92 |     92
+ qr        |   92 |     92
+ qj        |   91 |     91
+ qd        |   87 |     87
+ qz        |   87 |     87
+ qc        |   86 |     86
+ qn        |   86 |     86
+ qv        |   85 |     85
+ qo        |   84 |     84
+ qy        |   84 |     84
+ wp        |   84 |     84
+ qf        |   81 |     81
+ qk        |   80 |     80
+ wt        |   80 |     80
+ qu        |   79 |     79
+ qg        |   78 |     78
+ wb        |   78 |     78
+ qx        |   77 |     77
+ wr        |   77 |     77
+ ws        |   73 |     73
+ wy        |   73 |     73
+ wa        |   72 |     72
+ wf        |   70 |     70
+ wg        |   70 |     70
+ wi        |   70 |     70
+ wu        |   70 |     70
+ wc        |   69 |     69
+ wj        |   69 |     69
+ qp        |   68 |     68
+ wh        |   68 |     68
+ wv        |   68 |     68
+ qb        |   66 |     66
+ eu        |   65 |     65
+ we        |   65 |     65
+ wl        |   65 |     65
+ wq        |   65 |     65
+ wk        |   64 |     64
+ ee        |   63 |     63
+ eo        |   63 |     63
+ qm        |   63 |     63
+ wn        |   63 |     63
+ ef        |   62 |     62
+ eh        |   62 |     62
+ ex        |   62 |     62
+ re        |   62 |     62
+ rl        |   62 |     62
+ rr        |   62 |     62
+ eb        |   61 |     61
+ ek        |   61 |     61
+ ww        |   61 |     61
+ ea        |   60 |     60
+ ei        |   60 |     60
+ em        |   60 |     60
+ eq        |   60 |     60
+ ew        |   60 |     60
+ ro        |   60 |     60
+ rw        |   60 |     60
+ tl        |   60 |     60
+ eg        |   59 |     59
+ en        |   59 |     59
+ ez        |   59 |     59
+ rj        |   59 |     59
+ ry        |   59 |     59
+ tw        |   59 |     59
+ tx        |   59 |     59
+ ej        |   58 |     58
+ es        |   58 |     58
+ ra        |   58 |     58
+ rd        |   58 |     58
+ rg        |   58 |     58
+ rx        |   58 |     58
+ tb        |   58 |     58
+ wd        |   58 |     58
+ ed        |   57 |     57
+ tc        |   57 |     57
+ wx        |   57 |     57
+ er        |   56 |     56
+ wm        |   56 |     56
+ wo        |   56 |     56
+ yw        |   56 |     56
+ ep        |   55 |     55
+ rk        |   55 |     55
+ rp        |   55 |     55
+ rz        |   55 |     55
+ ta        |   55 |     55
+ rq        |   54 |     54
+ yn        |   54 |     54
+ ec        |   53 |     53
+ el        |   53 |     53
+ ru        |   53 |     53
+ rv        |   53 |     53
+ tz        |   53 |     53
+ un        |   53 |     53
+ wz        |   53 |     53
+ ys        |   53 |     53
+ oe        |   52 |     52
+ tn        |   52 |     52
+ tq        |   52 |     52
+ ty        |   52 |     52
+ uq        |   52 |     52
+ yg        |   52 |     52
+ ym        |   52 |     52
+ oi        |   51 |     51
+ to        |   51 |     51
+ yi        |   51 |     51
+ pn        |   50 |     50
+ rb        |   50 |     50
+ ri        |   50 |     50
+ rn        |   50 |     50
+ ti        |   50 |     50
+ tv        |   50 |     50
+ um        |   50 |     50
+ ut        |   50 |     50
+ ya        |   50 |     50
+ et        |   49 |     49
+ ix        |   49 |     49
+ ox        |   49 |     49
+ q3        |   49 |     49
+ yf        |   49 |     49
+ yl        |   49 |     49
+ yo        |   49 |     49
+ yr        |   49 |     49
+ ev        |   48 |     48
+ ey        |   48 |     48
+ ot        |   48 |     48
+ rc        |   48 |     48
+ rm        |   48 |     48
+ th        |   48 |     48
+ uo        |   48 |     48
+ ia        |   47 |     47
+ q1        |   47 |     47
+ rh        |   47 |     47
+ yq        |   47 |     47
+ yz        |   47 |     47
+ av        |   46 |     46
+ im        |   46 |     46
+ os        |   46 |     46
+ tk        |   46 |     46
+ yy        |   46 |     46
+ ir        |   45 |     45
+ iv        |   45 |     45
+ iw        |   45 |     45
+ oj        |   45 |     45
+ pl        |   45 |     45
+ pv        |   45 |     45
+ te        |   45 |     45
+ tu        |   45 |     45
+ uv        |   45 |     45
+ ux        |   45 |     45
+ yd        |   45 |     45
+ yx        |   45 |     45
+ ij        |   44 |     44
+ pa        |   44 |     44
+ se        |   44 |     44
+ tg        |   44 |     44
+ ue        |   44 |     44
+ yb        |   44 |     44
+ yt        |   44 |     44
+ if        |   43 |     43
+ ik        |   43 |     43
+ in        |   43 |     43
+ ph        |   43 |     43
+ pj        |   43 |     43
+ q5        |   43 |     43
+ rt        |   43 |     43
+ ub        |   43 |     43
+ ud        |   43 |     43
+ uh        |   43 |     43
+ uj        |   43 |     43
+ w7        |   43 |     43
+ ye        |   43 |     43
+ yv        |   43 |     43
+ db        |   42 |     42
+ do        |   42 |     42
+ id        |   42 |     42
+ ie        |   42 |     42
+ ii        |   42 |     42
+ of        |   42 |     42
+ pr        |   42 |     42
+ q4        |   42 |     42
+ rf        |   42 |     42
+ td        |   42 |     42
+ uk        |   42 |     42
+ up        |   42 |     42
+ yh        |   42 |     42
+ yk        |   42 |     42
+ io        |   41 |     41
+ it        |   41 |     41
+ pb        |   41 |     41
+ q0        |   41 |     41
+ q7        |   41 |     41
+ rs        |   41 |     41
+ tj        |   41 |     41
+ ur        |   41 |     41
+ ig        |   40 |     40
+ iu        |   40 |     40
+ iy        |   40 |     40
+ od        |   40 |     40
+ q6        |   40 |     40
+ tt        |   40 |     40
+ ug        |   40 |     40
+ ul        |   40 |     40
+ us        |   40 |     40
+ uu        |   40 |     40
+ uz        |   40 |     40
+ ah        |   39 |     39
+ ar        |   39 |     39
+ as        |   39 |     39
+ dl        |   39 |     39
+ dt        |   39 |     39
+ hk        |   39 |     39
+ iq        |   39 |     39
+ is        |   39 |     39
+ oc        |   39 |     39
+ ov        |   39 |     39
+ oy        |   39 |     39
+ uf        |   39 |     39
+ ui        |   39 |     39
+ aa        |   38 |     38
+ ad        |   38 |     38
+ fh        |   38 |     38
+ gm        |   38 |     38
+ ic        |   38 |     38
+ jd        |   38 |     38
+ om        |   38 |     38
+ or        |   38 |     38
+ oz        |   38 |     38
+ pm        |   38 |     38
+ q8        |   38 |     38
+ sf        |   38 |     38
+ sm        |   38 |     38
+ sv        |   38 |     38
+ uc        |   38 |     38
+ ak        |   37 |     37
+ aq        |   37 |     37
+ di        |   37 |     37
+ e4        |   37 |     37
+ fi        |   37 |     37
+ fx        |   37 |     37
+ ha        |   37 |     37
+ hp        |   37 |     37
+ ih        |   37 |     37
+ og        |   37 |     37
+ po        |   37 |     37
+ pw        |   37 |     37
+ sn        |   37 |     37
+ su        |   37 |     37
+ sw        |   37 |     37
+ w6        |   37 |     37
+ yj        |   37 |     37
+ yu        |   37 |     37
+ ag        |   36 |     36
+ am        |   36 |     36
+ at        |   36 |     36
+ e1        |   36 |     36
+ ff        |   36 |     36
+ gx        |   36 |     36
+ he        |   36 |     36
+ hj        |   36 |     36
+ ib        |   36 |     36
+ iz        |   36 |     36
+ lm        |   36 |     36
+ ok        |   36 |     36
+ pk        |   36 |     36
+ pp        |   36 |     36
+ pu        |   36 |     36
+ sp        |   36 |     36
+ tf        |   36 |     36
+ tm        |   36 |     36
+ ay        |   35 |     35
+ dy        |   35 |     35
+ fu        |   35 |     35
+ ku        |   35 |     35
+ lh        |   35 |     35
+ lq        |   35 |     35
+ o6        |   35 |     35
+ ob        |   35 |     35
+ on        |   35 |     35
+ op        |   35 |     35
+ pd        |   35 |     35
+ ps        |   35 |     35
+ si        |   35 |     35
+ sl        |   35 |     35
+ sx        |   35 |     35
+ tp        |   35 |     35
+ tr        |   35 |     35
+ w3        |   35 |     35
+ y1        |   35 |     35
+ al        |   34 |     34
+ ap        |   34 |     34
+ az        |   34 |     34
+ dc        |   34 |     34
+ dd        |   34 |     34
+ dz        |   34 |     34
+ e0        |   34 |     34
+ fj        |   34 |     34
+ fp        |   34 |     34
+ gd        |   34 |     34
+ gg        |   34 |     34
+ gk        |   34 |     34
+ go        |   34 |     34
+ ho        |   34 |     34
+ jc        |   34 |     34
+ oa        |   34 |     34
+ oh        |   34 |     34
+ oo        |   34 |     34
+ pe        |   34 |     34
+ px        |   34 |     34
+ sd        |   34 |     34
+ sq        |   34 |     34
+ sy        |   34 |     34
+ ab        |   33 |     33
+ ae        |   33 |     33
+ af        |   33 |     33
+ aw        |   33 |     33
+ e5        |   33 |     33
+ fk        |   33 |     33
+ gu        |   33 |     33
+ gy        |   33 |     33
+ hb        |   33 |     33
+ hm        |   33 |     33
+ hy        |   33 |     33
+ jl        |   33 |     33
+ jr        |   33 |     33
+ ls        |   33 |     33
+ oq        |   33 |     33
+ pt        |   33 |     33
+ sa        |   33 |     33
+ sh        |   33 |     33
+ sj        |   33 |     33
+ so        |   33 |     33
+ sz        |   33 |     33
+ t7        |   33 |     33
+ uw        |   33 |     33
+ w8        |   33 |     33
+ y0        |   33 |     33
+ yp        |   33 |     33
+ dh        |   32 |     32
+ dp        |   32 |     32
+ dq        |   32 |     32
+ e7        |   32 |     32
+ fn        |   32 |     32
+ fo        |   32 |     32
+ fr        |   32 |     32
+ ga        |   32 |     32
+ gq        |   32 |     32
+ hh        |   32 |     32
+ il        |   32 |     32
+ ip        |   32 |     32
+ jv        |   32 |     32
+ lc        |   32 |     32
+ ol        |   32 |     32
+ pc        |   32 |     32
+ q9        |   32 |     32
+ ds        |   31 |     31
+ e9        |   31 |     31
+ fd        |   31 |     31
+ fe        |   31 |     31
+ ft        |   31 |     31
+ gs        |   31 |     31
+ hl        |   31 |     31
+ hs        |   31 |     31
+ jb        |   31 |     31
+ kc        |   31 |     31
+ kw        |   31 |     31
+ mj        |   31 |     31
+ q2        |   31 |     31
+ r3        |   31 |     31
+ sb        |   31 |     31
+ sk        |   31 |     31
+ ts        |   31 |     31
+ ua        |   31 |     31
+ yc        |   31 |     31
+ zw        |   31 |     31
+ ao        |   30 |     30
+ du        |   30 |     30
+ fw        |   30 |     30
+ gj        |   30 |     30
+ hu        |   30 |     30
+ kh        |   30 |     30
+ kl        |   30 |     30
+ kv        |   30 |     30
+ ld        |   30 |     30
+ lf        |   30 |     30
+ pq        |   30 |     30
+ py        |   30 |     30
+ sc        |   30 |     30
+ sr        |   30 |     30
+ uy        |   30 |     30
+ vg        |   30 |     30
+ w2        |   30 |     30
+ xg        |   30 |     30
+ xo        |   30 |     30
+ au        |   29 |     29
+ cx        |   29 |     29
+ fv        |   29 |     29
+ gh        |   29 |     29
+ gl        |   29 |     29
+ gt        |   29 |     29
+ hw        |   29 |     29
+ ji        |   29 |     29
+ km        |   29 |     29
+ la        |   29 |     29
+ ou        |   29 |     29
+ r0        |   29 |     29
+ w0        |   29 |     29
+ y9        |   29 |     29
+ zm        |   29 |     29
+ zs        |   29 |     29
+ zy        |   29 |     29
+ ax        |   28 |     28
+ cd        |   28 |     28
+ dj        |   28 |     28
+ dn        |   28 |     28
+ dr        |   28 |     28
+ ht        |   28 |     28
+ jf        |   28 |     28
+ lo        |   28 |     28
+ lr        |   28 |     28
+ na        |   28 |     28
+ ng        |   28 |     28
+ r8        |   28 |     28
+ ss        |   28 |     28
+ xt        |   28 |     28
+ y6        |   28 |     28
+ aj        |   27 |     27
+ ca        |   27 |     27
+ cg        |   27 |     27
+ df        |   27 |     27
+ dg        |   27 |     27
+ dv        |   27 |     27
+ gc        |   27 |     27
+ gn        |   27 |     27
+ gr        |   27 |     27
+ hd        |   27 |     27
+ i8        |   27 |     27
+ jn        |   27 |     27
+ jt        |   27 |     27
+ lp        |   27 |     27
+ o9        |   27 |     27
+ ow        |   27 |     27
+ r9        |   27 |     27
+ t8        |   27 |     27
+ u5        |   27 |     27
+ w4        |   27 |     27
+ xm        |   27 |     27
+ zz        |   27 |     27
+ a2        |   26 |     26
+ ac        |   26 |     26
+ ai        |   26 |     26
+ cm        |   26 |     26
+ cu        |   26 |     26
+ cw        |   26 |     26
+ dk        |   26 |     26
+ e2        |   26 |     26
+ fc        |   26 |     26
+ fg        |   26 |     26
+ fl        |   26 |     26
+ fs        |   26 |     26
+ ge        |   26 |     26
+ gv        |   26 |     26
+ hc        |   26 |     26
+ hi        |   26 |     26
+ hx        |   26 |     26
+ jj        |   26 |     26
+ jm        |   26 |     26
+ kg        |   26 |     26
+ kk        |   26 |     26
+ kn        |   26 |     26
+ ko        |   26 |     26
+ kt        |   26 |     26
+ ln        |   26 |     26
+ mx        |   26 |     26
+ pg        |   26 |     26
+ r4        |   26 |     26
+ t6        |   26 |     26
+ u1        |   26 |     26
+ u4        |   26 |     26
+ vi        |   26 |     26
+ vr        |   26 |     26
+ w1        |   26 |     26
+ w9        |   26 |     26
+ xk        |   26 |     26
+ xs        |   26 |     26
+ zf        |   26 |     26
+ bb        |   25 |     25
+ dm        |   25 |     25
+ dw        |   25 |     25
+ e8        |   25 |     25
+ fb        |   25 |     25
+ gw        |   25 |     25
+ h8        |   25 |     25
+ hf        |   25 |     25
+ hg        |   25 |     25
+ hn        |   25 |     25
+ hv        |   25 |     25
+ i0        |   25 |     25
+ i3        |   25 |     25
+ jg        |   25 |     25
+ jo        |   25 |     25
+ jx        |   25 |     25
+ kq        |   25 |     25
+ lw        |   25 |     25
+ lx        |   25 |     25
+ o3        |   25 |     25
+ p7        |   25 |     25
+ pf        |   25 |     25
+ pi        |   25 |     25
+ pz        |   25 |     25
+ r2        |   25 |     25
+ r5        |   25 |     25
+ t9        |   25 |     25
+ u7        |   25 |     25
+ ve        |   25 |     25
+ vu        |   25 |     25
+ y5        |   25 |     25
+ y8        |   25 |     25
+ zt        |   25 |     25
+ an        |   24 |     24
+ bj        |   24 |     24
+ dx        |   24 |     24
+ fm        |   24 |     24
+ fz        |   24 |     24
+ gb        |   24 |     24
+ gi        |   24 |     24
+ gp        |   24 |     24
+ hr        |   24 |     24
+ hz        |   24 |     24
+ i5        |   24 |     24
+ jq        |   24 |     24
+ kb        |   24 |     24
+ ke        |   24 |     24
+ kf        |   24 |     24
+ kp        |   24 |     24
+ lv        |   24 |     24
+ lz        |   24 |     24
+ o8        |   24 |     24
+ r1        |   24 |     24
+ s7        |   24 |     24
+ sg        |   24 |     24
+ u3        |   24 |     24
+ vj        |   24 |     24
+ vt        |   24 |     24
+ w5        |   24 |     24
+ zj        |   24 |     24
+ be        |   23 |     23
+ bi        |   23 |     23
+ bn        |   23 |     23
+ cn        |   23 |     23
+ cy        |   23 |     23
+ da        |   23 |     23
+ e6        |   23 |     23
+ fa        |   23 |     23
+ js        |   23 |     23
+ ki        |   23 |     23
+ kz        |   23 |     23
+ li        |   23 |     23
+ mt        |   23 |     23
+ mz        |   23 |     23
+ nu        |   23 |     23
+ o2        |   23 |     23
+ p5        |   23 |     23
+ p8        |   23 |     23
+ r7        |   23 |     23
+ t0        |   23 |     23
+ t1        |   23 |     23
+ t3        |   23 |     23
+ vm        |   23 |     23
+ xh        |   23 |     23
+ xx        |   23 |     23
+ zp        |   23 |     23
+ zr        |   23 |     23
+ a3        |   22 |     22
+ bg        |   22 |     22
+ de        |   22 |     22
+ e3        |   22 |     22
+ fq        |   22 |     22
+ i2        |   22 |     22
+ i7        |   22 |     22
+ ja        |   22 |     22
+ jk        |   22 |     22
+ jy        |   22 |     22
+ kr        |   22 |     22
+ kx        |   22 |     22
+ ly        |   22 |     22
+ nb        |   22 |     22
+ nh        |   22 |     22
+ ns        |   22 |     22
+ s3        |   22 |     22
+ u2        |   22 |     22
+ vn        |   22 |     22
+ xe        |   22 |     22
+ y4        |   22 |     22
+ zh        |   22 |     22
+ zo        |   22 |     22
+ zq        |   22 |     22
+ a1        |   21 |     21
+ bl        |   21 |     21
+ bo        |   21 |     21
+ cb        |   21 |     21
+ ch        |   21 |     21
+ co        |   21 |     21
+ cq        |   21 |     21
+ cv        |   21 |     21
+ d7        |   21 |     21
+ g8        |   21 |     21
+ je        |   21 |     21
+ jp        |   21 |     21
+ jz        |   21 |     21
+ lg        |   21 |     21
+ me        |   21 |     21
+ nc        |   21 |     21
+ p4        |   21 |     21
+ st        |   21 |     21
+ vb        |   21 |     21
+ vw        |   21 |     21
+ vz        |   21 |     21
+ xj        |   21 |     21
+ xq        |   21 |     21
+ xu        |   21 |     21
+ xy        |   21 |     21
+ zb        |   21 |     21
+ bv        |   20 |     20
+ bz        |   20 |     20
+ cj        |   20 |     20
+ cp        |   20 |     20
+ cs        |   20 |     20
+ d8        |   20 |     20
+ ju        |   20 |     20
+ k0        |   20 |     20
+ ks        |   20 |     20
+ ky        |   20 |     20
+ l1        |   20 |     20
+ lb        |   20 |     20
+ lj        |   20 |     20
+ lu        |   20 |     20
+ nm        |   20 |     20
+ nw        |   20 |     20
+ nz        |   20 |     20
+ o7        |   20 |     20
+ p6        |   20 |     20
+ vh        |   20 |     20
+ vp        |   20 |     20
+ vs        |   20 |     20
+ xb        |   20 |     20
+ xr        |   20 |     20
+ z3        |   20 |     20
+ zv        |   20 |     20
+ bq        |   19 |     19
+ br        |   19 |     19
+ by        |   19 |     19
+ cl        |   19 |     19
+ d2        |   19 |     19
+ f1        |   19 |     19
+ f4        |   19 |     19
+ gf        |   19 |     19
+ hq        |   19 |     19
+ k9        |   19 |     19
+ ka        |   19 |     19
+ kd        |   19 |     19
+ kj        |   19 |     19
+ md        |   19 |     19
+ mi        |   19 |     19
+ ml        |   19 |     19
+ my        |   19 |     19
+ nj        |   19 |     19
+ ny        |   19 |     19
+ o1        |   19 |     19
+ s4        |   19 |     19
+ s8        |   19 |     19
+ t5        |   19 |     19
+ u0        |   19 |     19
+ xl        |   19 |     19
+ zg        |   19 |     19
+ zi        |   19 |     19
+ a5        |   18 |     18
+ b9        |   18 |     18
+ bh        |   18 |     18
+ bx        |   18 |     18
+ d3        |   18 |     18
+ fy        |   18 |     18
+ g2        |   18 |     18
+ i4        |   18 |     18
+ i6        |   18 |     18
+ i9        |   18 |     18
+ jw        |   18 |     18
+ lk        |   18 |     18
+ mb        |   18 |     18
+ mv        |   18 |     18
+ nd        |   18 |     18
+ nr        |   18 |     18
+ nt        |   18 |     18
+ t2        |   18 |     18
+ xf        |   18 |     18
+ xv        |   18 |     18
+ zc        |   18 |     18
+ zd        |   18 |     18
+ a7        |   17 |     17
+ bc        |   17 |     17
+ bd        |   17 |     17
+ ce        |   17 |     17
+ cf        |   17 |     17
+ cr        |   17 |     17
+ g9        |   17 |     17
+ j0        |   17 |     17
+ j5        |   17 |     17
+ mp        |   17 |     17
+ mr        |   17 |     17
+ mw        |   17 |     17
+ nk        |   17 |     17
+ no        |   17 |     17
+ o0        |   17 |     17
+ o4        |   17 |     17
+ s0        |   17 |     17
+ s1        |   17 |     17
+ t4        |   17 |     17
+ u9        |   17 |     17
+ vf        |   17 |     17
+ vx        |   17 |     17
+ x3        |   17 |     17
+ xi        |   17 |     17
+ xn        |   17 |     17
+ xz        |   17 |     17
+ zl        |   17 |     17
+ zn        |   17 |     17
+ a0        |   16 |     16
+ bu        |   16 |     16
+ bw        |   16 |     16
+ ci        |   16 |     16
+ ck        |   16 |     16
+ d0        |   16 |     16
+ d4        |   16 |     16
+ d6        |   16 |     16
+ f5        |   16 |     16
+ g1        |   16 |     16
+ gz        |   16 |     16
+ h4        |   16 |     16
+ jh        |   16 |     16
+ l4        |   16 |     16
+ lt        |   16 |     16
+ mg        |   16 |     16
+ mh        |   16 |     16
+ mo        |   16 |     16
+ ni        |   16 |     16
+ nl        |   16 |     16
+ nq        |   16 |     16
+ p2        |   16 |     16
+ u8        |   16 |     16
+ v9        |   16 |     16
+ vl        |   16 |     16
+ vo        |   16 |     16
+ xp        |   16 |     16
+ y3        |   16 |     16
+ y7        |   16 |     16
+ z7        |   16 |     16
+ za        |   16 |     16
+ zx        |   16 |     16
+ bf        |   15 |     15
+ bp        |   15 |     15
+ cc        |   15 |     15
+ g0        |   15 |     15
+ j2        |   15 |     15
+ j9        |   15 |     15
+ l6        |   15 |     15
+ le        |   15 |     15
+ ll        |   15 |     15
+ m8        |   15 |     15
+ ma        |   15 |     15
+ mu        |   15 |     15
+ nf        |   15 |     15
+ r6        |   15 |     15
+ s5        |   15 |     15
+ vd        |   15 |     15
+ vk        |   15 |     15
+ xa        |   15 |     15
+ xw        |   15 |     15
+ y2        |   15 |     15
+ z8        |   15 |     15
+ ze        |   15 |     15
+ zu        |   15 |     15
+ a6        |   14 |     14
+ bk        |   14 |     14
+ bt        |   14 |     14
+ c0        |   14 |     14
+ f8        |   14 |     14
+ g3        |   14 |     14
+ g4        |   14 |     14
+ g7        |   14 |     14
+ h6        |   14 |     14
+ h7        |   14 |     14
+ h9        |   14 |     14
+ i1        |   14 |     14
+ k1        |   14 |     14
+ k2        |   14 |     14
+ k6        |   14 |     14
+ k7        |   14 |     14
+ mc        |   14 |     14
+ nn        |   14 |     14
+ p9        |   14 |     14
+ u6        |   14 |     14
+ xd        |   14 |     14
+ z6        |   14 |     14
+ zk        |   14 |     14
+ a4        |   13 |     13
+ a9        |   13 |     13
+ bm        |   13 |     13
+ cz        |   13 |     13
+ f2        |   13 |     13
+ f3        |   13 |     13
+ f6        |   13 |     13
+ g6        |   13 |     13
+ h2        |   13 |     13
+ j1        |   13 |     13
+ k5        |   13 |     13
+ m1        |   13 |     13
+ mf        |   13 |     13
+ mq        |   13 |     13
+ np        |   13 |     13
+ nx        |   13 |     13
+ o5        |   13 |     13
+ p0        |   13 |     13
+ p1        |   13 |     13
+ s6        |   13 |     13
+ s9        |   13 |     13
+ v6        |   13 |     13
+ va        |   13 |     13
+ vc        |   13 |     13
+ xc        |   13 |     13
+ z0        |   13 |     13
+ c9        |   12 |     12
+ d1        |   12 |     12
+ h0        |   12 |     12
+ h1        |   12 |     12
+ j8        |   12 |     12
+ k4        |   12 |     12
+ l5        |   12 |     12
+ l9        |   12 |     12
+ m2        |   12 |     12
+ m6        |   12 |     12
+ m9        |   12 |     12
+ n7        |   12 |     12
+ nv        |   12 |     12
+ p3        |   12 |     12
+ vq        |   12 |     12
+ vy        |   12 |     12
+ x1        |   12 |     12
+ x2        |   12 |     12
+ z5        |   12 |     12
+ c1        |   11 |     11
+ c3        |   11 |     11
+ ct        |   11 |     11
+ f9        |   11 |     11
+ g5        |   11 |     11
+ j6        |   11 |     11
+ l8        |   11 |     11
+ n1        |   11 |     11
+ v7        |   11 |     11
+ vv        |   11 |     11
+ x5        |   11 |     11
+ x8        |   11 |     11
+ z2        |   11 |     11
+ b0        |   10 |     10
+ b2        |   10 |     10
+ b8        |   10 |     10
+ c6        |   10 |     10
+ f0        |   10 |     10
+ f7        |   10 |     10
+ h5        |   10 |     10
+ j3        |   10 |     10
+ j4        |   10 |     10
+ j7        |   10 |     10
+ l7        |   10 |     10
+ m0        |   10 |     10
+ m7        |   10 |     10
+ mm        |   10 |     10
+ mn        |   10 |     10
+ n8        |   10 |     10
+ v1        |   10 |     10
+ x0        |   10 |     10
+ x6        |   10 |     10
+ x7        |   10 |     10
+ x9        |   10 |     10
+ a8        |    9 |      9
+ b1        |    9 |      9
+ b4        |    9 |      9
+ b5        |    9 |      9
+ b6        |    9 |      9
+ ba        |    9 |      9
+ bs        |    9 |      9
+ c5        |    9 |      9
+ d5        |    9 |      9
+ k8        |    9 |      9
+ l0        |    9 |      9
+ m5        |    9 |      9
+ mk        |    9 |      9
+ ms        |    9 |      9
+ n3        |    9 |      9
+ n4        |    9 |      9
+ n6        |    9 |      9
+ ne        |    9 |      9
+ v0        |    9 |      9
+ v3        |    9 |      9
+ v5        |    9 |      9
+ v8        |    9 |      9
+ b3        |    8 |      8
+ b7        |    8 |      8
+ c2        |    8 |      8
+ c7        |    8 |      8
+ c8        |    8 |      8
+ d9        |    8 |      8
+ k3        |    8 |      8
+ l3        |    8 |      8
+ m3        |    8 |      8
+ m4        |    8 |      8
+ n0        |    8 |      8
+ n5        |    8 |      8
+ v4        |    8 |      8
+ x4        |    8 |      8
+ z1        |    8 |      8
+ z9        |    8 |      8
+ l2        |    7 |      7
+ s2        |    7 |      7
+ z4        |    7 |      7
+ 1l        |    6 |      6
+ 1o        |    6 |      6
+ 1t        |    6 |      6
+ 2e        |    6 |      6
+ 2o        |    6 |      6
+ c4        |    6 |      6
+ h3        |    6 |      6
+ n2        |    6 |      6
+ n9        |    6 |      6
+ v2        |    6 |      6
+ 2l        |    5 |      5
+ 2u        |    5 |      5
+ 3k        |    5 |      5
+ 4p        |    5 |      5
+ 18        |    4 |      4
+ 1a        |    4 |      4
+ 1i        |    4 |      4
+ 2s        |    4 |      4
+ 3q        |    4 |      4
+ 3y        |    4 |      4
+ 5y        |    4 |      4
+ 1f        |    3 |      3
+ 1h        |    3 |      3
+ 1m        |    3 |      3
+ 1p        |    3 |      3
+ 1s        |    3 |      3
+ 1v        |    3 |      3
+ 1x        |    3 |      3
+ 27        |    3 |      3
+ 2a        |    3 |      3
+ 2b        |    3 |      3
+ 2h        |    3 |      3
+ 2n        |    3 |      3
+ 2p        |    3 |      3
+ 2v        |    3 |      3
+ 2y        |    3 |      3
+ 3d        |    3 |      3
+ 3w        |    3 |      3
+ 3z        |    3 |      3
+ 4a        |    3 |      3
+ 4d        |    3 |      3
+ 4v        |    3 |      3
+ 4z        |    3 |      3
+ 5e        |    3 |      3
+ 5i        |    3 |      3
+ 5k        |    3 |      3
+ 5o        |    3 |      3
+ 5t        |    3 |      3
+ 6b        |    3 |      3
+ 6d        |    3 |      3
+ 6o        |    3 |      3
+ 6w        |    3 |      3
+ 7a        |    3 |      3
+ 7h        |    3 |      3
+ 7r        |    3 |      3
+ 93        |    3 |      3
+ 10        |    2 |      2
+ 12        |    2 |      2
+ 15        |    2 |      2
+ 16        |    2 |      2
+ 19        |    2 |      2
+ 1b        |    2 |      2
+ 1d        |    2 |      2
+ 1g        |    2 |      2
+ 1j        |    2 |      2
+ 1n        |    2 |      2
+ 1r        |    2 |      2
+ 1u        |    2 |      2
+ 1w        |    2 |      2
+ 1y        |    2 |      2
+ 20        |    2 |      2
+ 25        |    2 |      2
+ 2d        |    2 |      2
+ 2i        |    2 |      2
+ 2j        |    2 |      2
+ 2k        |    2 |      2
+ 2q        |    2 |      2
+ 2r        |    2 |      2
+ 2t        |    2 |      2
+ 2w        |    2 |      2
+ 2z        |    2 |      2
+ 3b        |    2 |      2
+ 3f        |    2 |      2
+ 3h        |    2 |      2
+ 3o        |    2 |      2
+ 3p        |    2 |      2
+ 3r        |    2 |      2
+ 3s        |    2 |      2
+ 3v        |    2 |      2
+ 42        |    2 |      2
+ 43        |    2 |      2
+ 4f        |    2 |      2
+ 4g        |    2 |      2
+ 4h        |    2 |      2
+ 4j        |    2 |      2
+ 4m        |    2 |      2
+ 4r        |    2 |      2
+ 4s        |    2 |      2
+ 4t        |    2 |      2
+ 4u        |    2 |      2
+ 5c        |    2 |      2
+ 5f        |    2 |      2
+ 5h        |    2 |      2
+ 5p        |    2 |      2
+ 5q        |    2 |      2
+ 5z        |    2 |      2
+ 6a        |    2 |      2
+ 6h        |    2 |      2
+ 6q        |    2 |      2
+ 6r        |    2 |      2
+ 6t        |    2 |      2
+ 6y        |    2 |      2
+ 70        |    2 |      2
+ 7c        |    2 |      2
+ 7g        |    2 |      2
+ 7k        |    2 |      2
+ 7o        |    2 |      2
+ 7u        |    2 |      2
+ 8j        |    2 |      2
+ 8w        |    2 |      2
+ 9f        |    2 |      2
+ 9y        |    2 |      2
+ copyright |    2 |      2
+ foo       |    1 |      3
+ bar       |    1 |      2
+ 0e        |    1 |      1
+ 0h        |    1 |      1
+ 0p        |    1 |      1
+ 0w        |    1 |      1
+ 0z        |    1 |      1
+ 11        |    1 |      1
+ 13        |    1 |      1
+ 14        |    1 |      1
+ 17        |    1 |      1
+ 1k        |    1 |      1
+ 1q        |    1 |      1
+ 1z        |    1 |      1
+ 24        |    1 |      1
+ 26        |    1 |      1
+ 28        |    1 |      1
+ 2f        |    1 |      1
+ 30        |    1 |      1
+ 345       |    1 |      1
+ 37        |    1 |      1
+ 39        |    1 |      1
+ 3a        |    1 |      1
+ 3e        |    1 |      1
+ 3g        |    1 |      1
+ 3i        |    1 |      1
+ 3m        |    1 |      1
+ 3t        |    1 |      1
+ 3u        |    1 |      1
+ 40        |    1 |      1
+ 41        |    1 |      1
+ 44        |    1 |      1
+ 45        |    1 |      1
+ 48        |    1 |      1
+ 4b        |    1 |      1
+ 4c        |    1 |      1
+ 4i        |    1 |      1
+ 4k        |    1 |      1
+ 4n        |    1 |      1
+ 4o        |    1 |      1
+ 4q        |    1 |      1
+ 4w        |    1 |      1
+ 4y        |    1 |      1
+ 51        |    1 |      1
+ 55        |    1 |      1
+ 56        |    1 |      1
+ 5a        |    1 |      1
+ 5d        |    1 |      1
+ 5g        |    1 |      1
+ 5j        |    1 |      1
+ 5l        |    1 |      1
+ 5s        |    1 |      1
+ 5u        |    1 |      1
+ 5x        |    1 |      1
+ 64        |    1 |      1
+ 68        |    1 |      1
+ 6c        |    1 |      1
+ 6f        |    1 |      1
+ 6g        |    1 |      1
+ 6i        |    1 |      1
+ 6k        |    1 |      1
+ 6n        |    1 |      1
+ 6p        |    1 |      1
+ 6s        |    1 |      1
+ 6u        |    1 |      1
+ 6x        |    1 |      1
+ 72        |    1 |      1
+ 7f        |    1 |      1
+ 7j        |    1 |      1
+ 7n        |    1 |      1
+ 7p        |    1 |      1
+ 7w        |    1 |      1
+ 7y        |    1 |      1
+ 7z        |    1 |      1
+ 80        |    1 |      1
+ 82        |    1 |      1
+ 85        |    1 |      1
+ 8d        |    1 |      1
+ 8i        |    1 |      1
+ 8l        |    1 |      1
+ 8n        |    1 |      1
+ 8p        |    1 |      1
+ 8t        |    1 |      1
+ 8x        |    1 |      1
+ 95        |    1 |      1
+ 97        |    1 |      1
+ 9a        |    1 |      1
+ 9e        |    1 |      1
+ 9h        |    1 |      1
+ 9r        |    1 |      1
+ 9w        |    1 |      1
+ qwerti    |    1 |      1
+(1146 rows)
+
+select reset_tsearch();
+NOTICE:  TSearch cache cleaned
+ reset_tsearch 
+---------------
+ 
+(1 row)
+
+select to_tsquery('default', 'skies & books');
+   to_tsquery   
+----------------
+ 'sky' & 'book'
+(1 row)
+
+select rank_cd(to_tsvector('Erosion It took the sea a thousand years,
+A thousand years to trace
+The granite features of this cliff
+In crag and scarp and base.
+It took the sea an hour one night
+An hour of storm to place
+The sculpture of these granite seams,
+Upon a woman s face. E.  J.  Pratt  (1882 1964)
+'), to_tsquery('sea&thousand&years'));
+ rank_cd 
+---------
+     1.2
+(1 row)
+
+select rank_cd(to_tsvector('Erosion It took the sea a thousand years,
+A thousand years to trace
+The granite features of this cliff
+In crag and scarp and base.
+It took the sea an hour one night
+An hour of storm to place
+The sculpture of these granite seams,
+Upon a woman s face. E.  J.  Pratt  (1882 1964)
+'), to_tsquery('granite&sea'));
+ rank_cd  
+----------
+ 0.880303
+(1 row)
+
+select rank_cd(to_tsvector('Erosion It took the sea a thousand years,
+A thousand years to trace
+The granite features of this cliff
+In crag and scarp and base.
+It took the sea an hour one night
+An hour of storm to place
+The sculpture of these granite seams,
+Upon a woman s face. E.  J.  Pratt  (1882 1964)
+'), to_tsquery('sea'));
+ rank_cd 
+---------
+       2
+(1 row)
+
+select get_covers(to_tsvector('Erosion It took the sea a thousand years,
+A thousand years to trace
+The granite features of this cliff
+In crag and scarp and base.
+It took the sea an hour one night
+An hour of storm to place
+The sculpture of these granite seams,
+Upon a woman s face. E.  J.  Pratt  (1882 1964)
+'), to_tsquery('sea&thousand&years'));
+                                                                                             get_covers                                                                                             
+----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
+ eros took {1 sea thousand year }1 {2 thousand year trace granit featur cliff crag scarp base took sea }2 hour one night hour storm place sculptur granit seam upon woman face e j pratt 1882 1964 
+(1 row)
+
+select get_covers(to_tsvector('Erosion It took the sea a thousand years,
+A thousand years to trace
+The granite features of this cliff
+In crag and scarp and base.
+It took the sea an hour one night
+An hour of storm to place
+The sculpture of these granite seams,
+Upon a woman s face. E.  J.  Pratt  (1882 1964)
+'), to_tsquery('granite&sea'));
+                                                                                                get_covers                                                                                                
+----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
+ eros took {1 sea thousand year thousand year trace {2 granit }1 featur cliff crag scarp base took {3 sea }2 hour one night hour storm place sculptur granit }3 seam upon woman face e j pratt 1882 1964 
+(1 row)
+
+select get_covers(to_tsvector('Erosion It took the sea a thousand years,
+A thousand years to trace
+The granite features of this cliff
+In crag and scarp and base.
+It took the sea an hour one night
+An hour of storm to place
+The sculpture of these granite seams,
+Upon a woman s face. E.  J.  Pratt  (1882 1964)
+'), to_tsquery('sea'));
+                                                                                             get_covers                                                                                             
+----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
+ eros took {1 sea }1 thousand year thousand year trace granit featur cliff crag scarp base took {2 sea }2 hour one night hour storm place sculptur granit seam upon woman face e j pratt 1882 1964 
+(1 row)
+
+select headline('Erosion It took the sea a thousand years,
+A thousand years to trace
+The granite features of this cliff
+In crag and scarp and base.
+It took the sea an hour one night
+An hour of storm to place
+The sculpture of these granite seams,
+Upon a woman s face. E.  J.  Pratt  (1882 1964)
+', to_tsquery('sea&thousand&years'));
+                                                       headline                                                        
+-----------------------------------------------------------------------------------------------------------------------
+ sea a thousand years,
+A thousand years to trace
+The granite features of this cliff
+(1 row)
+
+ 
+select headline('Erosion It took the sea a thousand years,
+A thousand years to trace
+The granite features of this cliff
+In crag and scarp and base.
+It took the sea an hour one night
+An hour of storm to place
+The sculpture of these granite seams,
+Upon a woman s face. E.  J.  Pratt  (1882 1964)
+', to_tsquery('granite&sea'));
+                                           headline                                           
+----------------------------------------------------------------------------------------------
+ sea an hour one night
+An hour of storm to place
+The sculpture of these granite
+(1 row)
+
+ 
+select headline('Erosion It took the sea a thousand years,
+A thousand years to trace
+The granite features of this cliff
+In crag and scarp and base.
+It took the sea an hour one night
+An hour of storm to place
+The sculpture of these granite seams,
+Upon a woman s face. E.  J.  Pratt  (1882 1964)
+', to_tsquery('sea'));
+                                         headline                                          
+-------------------------------------------------------------------------------------------
+ sea a thousand years,
+A thousand years to trace
+The granite features of this cliff
+(1 row)
+


diff --git a/contrib/tsearch2/gendict/Makefile.IN b/contrib/tsearch2/gendict/Makefile.IN

new file mode 100644 (file)

index 0000000..c13e496


--- /dev/null
+++ b/contrib/tsearch2/gendict/Makefile.IN
@@ -0,0 +1,12 @@
+subdir = contrib/CFG_DIR
+top_builddir = ../..
+include $(top_builddir)/src/Makefile.global
+
+MODULE_big = dict_CFG_MODNAME
+OBJS = CFG_OFILE
+DATA_built = dict_CFG_MODNAME.sql
+DOCS = README.CFG_MODNAME
+PG_CPPFLAGS =
+SHLIB_LINK = ../tsearch2/libtsearch2.a
+
+include $(top_srcdir)/contrib/contrib-global.mk


diff --git a/contrib/tsearch2/gendict/README.gendict b/contrib/tsearch2/gendict/README.gendict

new file mode 100644 (file)

index 0000000..e91f1b7


--- /dev/null
+++ b/contrib/tsearch2/gendict/README.gendict
@@ -0,0 +1,130 @@
+Gendict - generate dictionary templates for contrib/tsearch2 module.
+
+This utility aims to help people creating dictionary for contrib/tsearch v2
+module. Particularly, it has built-in support for snowball stemmers.
+
+Programming API to tsearch2 dictionaries is described in tsearch v2 
+documentation.
+
+
+Prerequisities:
+
+* PostgreSQL 7.3 and above.
+
+* You need tsearch2 module sources already compiled
+
+* Rights to install contrib modules
+
+Usage:
+
+    run config.sh without parameters to see options and arguments
+
+Usage:
+./config.sh -n DICTNAME ( [ -s [ -p PREFIX ] ] | [ -c CFILES ] [ -h HFILES ] [ -i ] ) [ -v ] [ -d DIR ] [ -C COMMENT ]
+    -v - be verbose
+    -d DIR - name of directory in PGSQL_SRC/contrib (default dict_DICTNAME)
+    -C COMMENT - dictionary comment
+Generate Snowball stemmer:
+./config.sh -n DICTNAME -s [ -p PREFIX ] [ -v ] [ -d DIR ] [ -C COMMENT ]
+    -s - generate Snowball wrapper
+    -p - prefix of Snowball's function, (default DICTNAME)
+Generate template dictionary:
+./config.sh -n DICTNAME [ -c CFILES ] [ -h HFILES ] [ -i ] [ -v ] [ -d DIR ] [ -C COMMENT ]
+    -c CFILES - source files, must be placed in contrib/tsearch2/gendict directory.
+                These files will be used in Makefile.
+    -h HFILES - header files, must be placed in contrib/tsearch2/gendict directory.
+                These files will be used in Makefile and subinclude.h
+    -i - dictionary has init method
+
+
+Example 1:
+
+   Create Portuguese stemmer
+ 
+   0. cd PGSQL_SRC/contrib/tsearch2/gendict
+
+   1. Obtain stem.{c,h} files for Portuguese
+
+      wget http://snowball.tartarus.org/portuguese/stem.c
+      wget http://snowball.tartarus.org/portuguese/stem.h
+   
+   2. Create template files for Portuguese
+
+      ./config.sh -n pt -s -p portuguese -v -C'Snowball stemmer for Portuguese'
+
+      Note, that argument for -p option should be *the same* as name of stemming
+      function in stem.c (without _stem)
+
+      A bunch of files will be generated and placed in PGSQL_SRC/contrib/dict_pt
+      directory.
+
+   3. Compile and install dictionary
+
+   cd PGSQL_SRC/contrib/dict_pt
+   make
+   make install
+
+   4. Test it 
+
+   Sample portuguese words with the stemmed forms are available
+        from http://snowball.tartarus.org/portuguese/stemmer.html
+
+   createdb testdict
+   psql testdict < /usr/local/pgsql/share/contrib/tsearch2.sql
+   psql testdict < /usr/local/pgsql/share/contrib/dict_pt.sql
+   psql -d testdict -c "select lexize('pt','bobagem');"
+    lexize  
+   ---------
+    {bobag}
+   (1 row)
+
+   Here is what I have in pg_ts_dict table
+
+   psql -d testdict -c "select * from pg_ts_dict where dict_name='pt';"
+    dict_name | dict_init | dict_initoption | dict_lexize |          dict_comment           
+   -----------+-----------+-----------------+-------------+---------------------------------
+    pt        |   7177806 |                 |     7159330 | Snowball stemmer for Portuguese
+   (1 row)
+
+ 
+        Note, that you have already installed dictionary and corresponding
+   entry in tsearch configuration and you may modify it using
+   plain SQL commands, for example, specify stop words.
+
+Example 2:
+
+      a) Simple template dictionary with init method 
+
+       ./config.sh -n wow -v -i -C WOW
+
+      b) Create simple template dict (without init method):
+   ./config.sh -n wow -v  -C WOW
+
+        The same as above, but dictionary will have not init method
+
+       Dictionaries obtained in a) and b) are fully working and ready
+       for use: 
+     a) lowercase input word and remove it if it is a stop word
+     b) recognizes any word
+
+      c) Simple template dictionary with source files (with init method):
+
+       ./config.sh -n wow -v -i -c a.c -h a.h -C WOW
+
+        Source files ( a.c ) must be placed in contrib/tsearch2/gendict directory.
+        These files will be used in Makefile.
+
+        Header files ( a.h ), must be placed in contrib/tsearch2/gendict directory.
+        These files will be used in Makefile and subinclude.h
+
+      d) Simple template dictionary with source files (without init method):
+
+   ./config.sh -n wow -v  -c a.c -h a.h -C WOW
+
+   The same as above, but dictionary will have not init method
+
+       After that you have sources in PGSQL_SRC/contrib/dict_wow and
+       you may edit them to create actual dictionary.
+
+  Please, check Tsearch2 home page (http://www.sai.msu.su/~megera/postgres/gist/tsearch/V2/)
+  for additional information about "Gendict tutorial" and dictionaries.
\ No newline at end of file


diff --git a/contrib/tsearch2/gendict/config.sh b/contrib/tsearch2/gendict/config.sh

new file mode 100755 (executable)

index 0000000..26bb542


--- /dev/null
+++ b/contrib/tsearch2/gendict/config.sh
@@ -0,0 +1,183 @@
+#!/bin/sh
+
+usage () {
+   echo Usage:
+   echo $0 -n DICTNAME  \( [ -s [ -p PREFIX ] ] \| [ -c CFILES ] [ -h HFILES ] [ -i ] \) [ -v ] [ -d DIR ] [ -C COMMENT ]
+   echo '    -v - be verbose'
+   echo '    -d DIR - name of directory in PGSQL_SRL/contrib (default dict_DICTNAME)'
+   echo '    -C COMMENT - dictionary comment' 
+   echo Generate Snowball stemmer:
+   echo $0 -n DICTNAME -s [ -p PREFIX ] [ -v ] [ -d DIR ] [ -C COMMENT ]
+   echo '    -s - generate Snowball wrapper'
+   echo "    -p - prefix of Snowball's function, (default DICTNAME)" 
+   echo Generate template dictionary:
+   echo $0 -n DICTNAME [ -c CFILES ] [ -h HFILES ] [ -i ] [ -v ] [ -d DIR ] [ -C COMMENT ]
+   echo '    -c CFILES - source files, must be placed in contrib/tsearch2/gendict directory.'
+   echo '                These files will be used in Makefile.'
+   echo '    -h HFILES - header files, must be placed in contrib/tsearch2/gendict directory.'
+   echo '                These files will be used in Makefile and subinclude.h'
+   echo '    -i - dictionary has init method'
+   exit 1;
+}
+
+dictname=
+stemmode=no
+verbose=no
+cfile=
+hfile=
+dir= 
+hasinit=no
+comment=
+prefix=
+
+while getopts n:c:C:h:d:p:vis opt
+do
+   case "$opt" in
+       v) verbose=yes;;
+       s) stemmode=yes;;
+       i) hasinit=yes;;
+       n) dictname="$OPTARG";;
+       c) cfile="$OPTARG";;
+       h) hfile="$OPTARG";;
+       d) dir="$OPTARG";;
+       C) comment="$OPTARG";;
+       p) prefix="$OPTARG";;
+       \?) usage;;
+   esac
+done
+
+[ ${#dictname} -eq 0 ] && usage
+
+dictname=`echo $dictname | tr '[:upper:]' '[:lower:]'`
+
+if [ $stemmode = "yes" ] ; then 
+   [ ${#prefix} -eq 0 ] && prefix=$dictname
+   hasinit=yes
+   cfile="stem.c"
+   hfile="stem.h"
+fi 
+
+[ ${#dir}   -eq 0 ] && dir="dict_$dictname"
+
+if [ ${#comment} -eq 0 ]; then
+   comment=null
+else
+   comment="'$comment'"
+fi
+
+ofile=
+for f in $cfile
+do
+   f=` echo $f | sed 's#c$#o#'`
+   ofile="$ofile $f"
+done
+
+if [ $stemmode = "yes" ] ; then
+   ofile="$ofile dict_snowball.o"
+else
+   ofile="$ofile dict_tmpl.o"
+fi
+
+if [ $verbose = "yes" ]; then
+   echo Dictname: "'"$dictname"'"
+   echo Snowball stemmer: $stemmode
+   echo Has init method: $hasinit
+   [ $stemmode = "yes" ] && echo Function prefix: $prefix 
+   echo Source files: $cfile
+   echo Header files: $hfile
+   echo Object files: $ofile
+   echo Comment: $comment
+   echo Directory: ../../$dir
+fi
+
+
+[ $verbose = "yes" ] && echo -n 'Build directory...  '
+if [ ! -d ../../$dir ]; then
+   if ! mkdir ../../$dir ; then 
+       echo "Can't create directory ../../$dir"
+       exit 1
+   fi 
+fi
+[ $verbose = "yes" ] && echo ok
+
+
+[ $verbose = "yes" ] && echo -n 'Build Makefile...  '
+sed s#CFG_DIR#$dir# < Makefile.IN | sed s#CFG_MODNAME#$dictname# | sed "s#CFG_OFILE#$ofile#" > ../../$dir/Makefile.tmp
+if [ $stemmode = "yes" ] ; then
+   sed "s#^PG_CPPFLAGS.*\$#PG_CPPFLAGS = -I../tsearch2/snowball -I../tsearch2#" < ../../$dir/Makefile.tmp >  ../../$dir/Makefile 
+else
+   sed "s#^PG_CPPFLAGS.*\$#PG_CPPFLAGS = -I../tsearch2#" < ../../$dir/Makefile.tmp >  ../../$dir/Makefile 
+fi
+rm ../../$dir/Makefile.tmp
+[ $verbose = "yes" ] && echo ok
+
+
+[ $verbose = "yes" ] && echo -n Build dict_$dictname'.sql.in...  '
+if [ $hasinit = "yes" ]; then
+   sed s#CFG_MODNAME#$dictname# < sql.IN | sed "s#CFG_COMMENT#$comment#" | sed s#^HASINIT## | sed 's#^NOINIT.*$##' > ../../$dir/dict_$dictname.sql.in.tmp
+   if [ $stemmode = "yes" ] ; then
+       sed s#^ISSNOWBALL## < ../../$dir/dict_$dictname.sql.in.tmp | sed s#^NOSNOWBALL.*\$## > ../../$dir/dict_$dictname.sql.in
+   else
+       sed s#^NOSNOWBALL## < ../../$dir/dict_$dictname.sql.in.tmp | sed s#^ISSNOWBALL.*\$## > ../../$dir/dict_$dictname.sql.in
+   fi
+   rm ../../$dir/dict_$dictname.sql.in.tmp 
+else 
+   sed s#CFG_MODNAME#$dictname# < sql.IN | sed "s#CFG_COMMENT#$comment#" | sed s#^NOINIT## | sed 's#^HASINIT.*$##' | sed s#^NOSNOWBALL## | sed s#^ISSNOWBALL.*\$## > ../../$dir/dict_$dictname.sql.in
+fi
+[ $verbose = "yes" ] && echo ok
+
+
+
+if [ ${#cfile} -ne 0 ] || [ ${#hfile} -ne 0 ] ; then
+   [ $verbose = "yes" ] && echo -n 'Copy source and header files...  '
+   if [ ${#cfile} -ne 0 ] ; then
+       if ! cp $cfile ../../$dir ; then 
+           echo "Cant cp all or one of files: $cfile"
+           exit 1
+       fi
+   fi
+   if [ ${#hfile} -ne 0 ] ; then 
+       if ! cp $hfile ../../$dir ; then 
+               echo "Cant cp all or one of files: $hfile"
+           exit 1
+       fi
+   fi
+   [ $verbose = "yes" ] && echo ok
+fi
+
+
+[ $verbose = "yes" ] && echo -n 'Build sub-include header...  '
+echo -n > ../../$dir/subinclude.h 
+for i in $hfile
+do
+   echo "#include \"$i\"" >> ../../$dir/subinclude.h
+done
+[ $verbose = "yes" ] && echo ok
+
+
+if  [ $stemmode = "yes" ] ; then 
+   [ $verbose = "yes" ] && echo -n 'Build Snowball stemmer...  '
+   sed s#CFG_MODNAME#$dictname#g < dict_snowball.c.IN | sed s#CFG_PREFIX#$prefix#g > ../../$dir/dict_snowball.c
+else
+   [ $verbose = "yes" ] && echo -n 'Build dictinonary...  '
+   sed s#CFG_MODNAME#$dictname#g < dict_tmpl.c.IN > ../../$dir/dict_tmpl.c.tmp
+   if [ $hasinit = "yes" ]; then
+       sed s#^HASINIT## <  ../../$dir/dict_tmpl.c.tmp | sed 's#^NOINIT.*$##' > ../../$dir/dict_tmpl.c
+   else 
+       sed s#^HASINIT.*\$## <  ../../$dir/dict_tmpl.c.tmp | sed 's#^NOINIT##' > ../../$dir/dict_tmpl.c
+   fi
+   rm ../../$dir/dict_tmpl.c.tmp
+fi 
+[ $verbose = "yes" ] && echo ok
+
+
+[ $verbose = "yes" ] && echo -n "Build README.$dictname...  "
+if  [ $stemmode = "yes" ] ; then
+   echo "Autogenerated Snowball's wrapper for $prefix" > ../../$dir/README.$dictname
+else
+   echo "Autogenerated template for $dictname" > ../../$dir/README.$dictname
+fi
+[ $verbose = "yes" ] && echo ok
+
+echo All is done
+


diff --git a/contrib/tsearch2/gendict/dict_snowball.c.IN b/contrib/tsearch2/gendict/dict_snowball.c.IN

new file mode 100644 (file)

index 0000000..10ef6f1


--- /dev/null
+++ b/contrib/tsearch2/gendict/dict_snowball.c.IN
@@ -0,0 +1,52 @@
+/* 
+ * example of Snowball dictionary
+ * http://snowball.tartarus.org/ 
+ * Teodor Sigaev 
+ */
+#include 
+#include 
+
+#include "postgres.h"
+
+#include "dict.h"
+#include "common.h"
+#include "snowball/header.h"
+#include "subinclude.h"
+
+typedef struct {
+   struct SN_env *z;
+   StopList    stoplist;
+   int (*stem)(struct SN_env * z);
+} DictSnowball;
+
+
+PG_FUNCTION_INFO_V1(dinit_CFG_MODNAME);
+Datum dinit_CFG_MODNAME(PG_FUNCTION_ARGS);
+
+Datum 
+dinit_CFG_MODNAME(PG_FUNCTION_ARGS) {
+   DictSnowball    *d = (DictSnowball*)malloc( sizeof(DictSnowball) );
+
+   if ( !d )
+       elog(ERROR, "No memory");
+   memset(d,0,sizeof(DictSnowball));
+   d->stoplist.wordop=lowerstr;
+       
+   if ( !PG_ARGISNULL(0) && PG_GETARG_POINTER(0)!=NULL ) {
+       text       *in = PG_GETARG_TEXT_P(0);
+       readstoplist(in, &(d->stoplist));
+       sortstoplist(&(d->stoplist));
+       PG_FREE_IF_COPY(in, 0);
+   }
+
+   d->z = CFG_PREFIX_create_env();
+   if (!d->z) {
+       freestoplist(&(d->stoplist));
+       elog(ERROR,"No memory");
+   }
+   d->stem=CFG_PREFIX_stem;
+
+   PG_RETURN_POINTER(d);
+}
+
+


diff --git a/contrib/tsearch2/gendict/dict_tmpl.c.IN b/contrib/tsearch2/gendict/dict_tmpl.c.IN

new file mode 100644 (file)

index 0000000..10c0381


--- /dev/null
+++ b/contrib/tsearch2/gendict/dict_tmpl.c.IN
@@ -0,0 +1,64 @@
+/* 
+ * example of dictionary 
+ * Teodor Sigaev 
+ */
+#include 
+#include 
+#include 
+
+#include "postgres.h"
+
+#include "dict.h"
+#include "common.h"
+
+#include "subinclude.h"
+
+HASINIT typedef struct {
+HASINIT    StopList    stoplist;
+HASINIT } DictExample;
+
+
+HASINIT PG_FUNCTION_INFO_V1(dinit_CFG_MODNAME);
+HASINIT Datum dinit_CFG_MODNAME(PG_FUNCTION_ARGS);
+
+HASINIT Datum 
+HASINIT dinit_CFG_MODNAME(PG_FUNCTION_ARGS) {
+HASINIT    DictExample *d = (DictExample*)malloc( sizeof(DictExample) );
+HASINIT 
+HASINIT    if ( !d )
+HASINIT        elog(ERROR, "No memory");
+HASINIT    memset(d,0,sizeof(DictExample));
+HASINIT 
+HASINIT    d->stoplist.wordop=lowerstr;
+HASINIT    
+HASINIT    /* Your INIT code */
+HASINIT    
+HASINIT    if ( !PG_ARGISNULL(0) && PG_GETARG_POINTER(0)!=NULL ) {
+HASINIT        text       *in = PG_GETARG_TEXT_P(0);
+HASINIT        readstoplist(in, &(d->stoplist));
+HASINIT        sortstoplist(&(d->stoplist));
+HASINIT        PG_FREE_IF_COPY(in, 0);
+HASINIT    }
+HASINIT 
+HASINIT    PG_RETURN_POINTER(d);
+HASINIT }
+
+PG_FUNCTION_INFO_V1(dlexize_CFG_MODNAME);
+Datum dlexize_CFG_MODNAME(PG_FUNCTION_ARGS);
+Datum
+dlexize_CFG_MODNAME(PG_FUNCTION_ARGS) {
+HASINIT    DictExample *d = (DictExample*)PG_GETARG_POINTER(0);
+   char       *in = (char*)PG_GETARG_POINTER(1);
+   char *txt = pnstrdup(in, PG_GETARG_INT32(2));
+   char    **res=palloc(sizeof(char*)*2);
+
+   /* Your INIT dictionary code */
+HASINIT    if ( *txt=='\0' || searchstoplist(&(d->stoplist),txt) ) {
+HASINIT        pfree(txt);
+HASINIT        res[0]=NULL;
+HASINIT    } else 
+       res[0]=txt;
+   res[1]=NULL;
+
+   PG_RETURN_POINTER(res);
+}


diff --git a/contrib/tsearch2/gendict/sql.IN b/contrib/tsearch2/gendict/sql.IN

new file mode 100644 (file)

index 0000000..ff0d842


--- /dev/null
+++ b/contrib/tsearch2/gendict/sql.IN
@@ -0,0 +1,26 @@
+SET search_path = public;
+BEGIN;
+
+HASINIT create function dinit_CFG_MODNAME(text)
+HASINIT         returns internal
+HASINIT         as 'MODULE_PATHNAME'
+HASINIT         language 'C';
+
+NOSNOWBALL create function dlexize_CFG_MODNAME(internal,internal,int4)
+NOSNOWBALL        returns internal
+NOSNOWBALL        as 'MODULE_PATHNAME'
+NOSNOWBALL        language 'C'
+NOSNOWBALL        with (isstrict);
+
+insert into pg_ts_dict select
+        'CFG_MODNAME',
+HASINIT        (select oid from pg_proc where proname='dinit_CFG_MODNAME'),
+NOINIT        null,
+        null,
+ISSNOWBALL        (select oid from pg_proc where proname='snb_lexize'),
+NOSNOWBALL        (select oid from pg_proc where proname='dlexize_CFG_MODNAME'),
+        CFG_COMMENT
+;
+
+
+END;


diff --git a/contrib/tsearch2/gistidx.c b/contrib/tsearch2/gistidx.c

new file mode 100644 (file)

index 0000000..5a34f74


--- /dev/null
+++ b/contrib/tsearch2/gistidx.c
@@ -0,0 +1,686 @@
+#include "postgres.h"
+
+#include 
+
+#include "access/gist.h"
+#include "access/itup.h"
+#include "access/rtree.h"
+#include "utils/elog.h"
+#include "utils/palloc.h"
+#include "utils/array.h"
+#include "utils/builtins.h"
+#include "storage/bufpage.h"
+#include "access/tuptoaster.h"
+
+#include "tsvector.h"
+#include "query.h"
+#include "gistidx.h"
+#include "crc32.h"
+
+PG_FUNCTION_INFO_V1(gtsvector_in);
+Datum      gtsvector_in(PG_FUNCTION_ARGS);
+
+PG_FUNCTION_INFO_V1(gtsvector_out);
+Datum      gtsvector_out(PG_FUNCTION_ARGS);
+
+PG_FUNCTION_INFO_V1(gtsvector_compress);
+Datum      gtsvector_compress(PG_FUNCTION_ARGS);
+
+PG_FUNCTION_INFO_V1(gtsvector_decompress);
+Datum      gtsvector_decompress(PG_FUNCTION_ARGS);
+
+PG_FUNCTION_INFO_V1(gtsvector_consistent);
+Datum      gtsvector_consistent(PG_FUNCTION_ARGS);
+
+PG_FUNCTION_INFO_V1(gtsvector_union);
+Datum      gtsvector_union(PG_FUNCTION_ARGS);
+
+PG_FUNCTION_INFO_V1(gtsvector_same);
+Datum      gtsvector_same(PG_FUNCTION_ARGS);
+
+PG_FUNCTION_INFO_V1(gtsvector_penalty);
+Datum      gtsvector_penalty(PG_FUNCTION_ARGS);
+
+PG_FUNCTION_INFO_V1(gtsvector_picksplit);
+Datum      gtsvector_picksplit(PG_FUNCTION_ARGS);
+
+#define GETENTRY(vec,pos) ((GISTTYPE *) DatumGetPointer(((GISTENTRY *) VARDATA(vec))[(pos)].key))
+#define SUMBIT(val) (       \
+   GETBITBYTE(val,0) + \
+   GETBITBYTE(val,1) + \
+   GETBITBYTE(val,2) + \
+   GETBITBYTE(val,3) + \
+   GETBITBYTE(val,4) + \
+   GETBITBYTE(val,5) + \
+   GETBITBYTE(val,6) + \
+   GETBITBYTE(val,7)   \
+)
+
+
+Datum
+gtsvector_in(PG_FUNCTION_ARGS)
+{
+   elog(ERROR, "Not implemented");
+   PG_RETURN_DATUM(0);
+}
+
+Datum
+gtsvector_out(PG_FUNCTION_ARGS)
+{
+   elog(ERROR, "Not implemented");
+   PG_RETURN_DATUM(0);
+}
+
+static int
+compareint(const void *a, const void *b)
+{
+   if (*((int4 *) a) == *((int4 *) b))
+       return 0;
+   return (*((int4 *) a) > *((int4 *) b)) ? 1 : -1;
+}
+
+static int
+uniqueint(int4 *a, int4 l)
+{
+   int4       *ptr,
+              *res;
+
+   if (l == 1)
+       return l;
+
+   ptr = res = a;
+
+   qsort((void *) a, l, sizeof(int4), compareint);
+
+   while (ptr - a < l)
+       if (*ptr != *res)
+           *(++res) = *ptr++;
+       else
+           ptr++;
+   return res + 1 - a;
+}
+
+static void
+makesign(BITVECP sign, GISTTYPE * a)
+{
+   int4        k,
+               len = ARRNELEM(a);
+   int4       *ptr = GETARR(a);
+
+   MemSet((void *) sign, 0, sizeof(BITVEC));
+   for (k = 0; k < len; k++)
+       HASH(sign, ptr[k]);
+}
+
+Datum
+gtsvector_compress(PG_FUNCTION_ARGS)
+{
+   GISTENTRY  *entry = (GISTENTRY *) PG_GETARG_POINTER(0);
+   GISTENTRY  *retval = entry;
+
+   if (entry->leafkey)
+   {                           /* tsvector */
+       GISTTYPE   *res;
+       tsvector       *toastedval = (tsvector *) DatumGetPointer(entry->key);
+       tsvector       *val = (tsvector *) DatumGetPointer(PG_DETOAST_DATUM(entry->key));
+       int4        len;
+       int4       *arr;
+       WordEntry  *ptr = ARRPTR(val);
+       char       *words = STRPTR(val);
+
+       len = CALCGTSIZE(ARRKEY, val->size);
+       res = (GISTTYPE *) palloc(len);
+       res->len = len;
+       res->flag = ARRKEY;
+       arr = GETARR(res);
+       len = val->size;
+       while (len--)
+       {
+           *arr = crc32_sz((uint8 *) &words[ptr->pos], ptr->len);
+           arr++;
+           ptr++;
+       }
+
+       len = uniqueint(GETARR(res), val->size);
+       if (len != val->size)
+       {
+           /*
+            * there is a collision of hash-function; len is always less
+            * than val->size
+            */
+           len = CALCGTSIZE(ARRKEY, len);
+           res = (GISTTYPE *) repalloc((void *) res, len);
+           res->len = len;
+       }
+       if (val != toastedval)
+           pfree(val);
+
+       /* make signature, if array is too long */
+       if (res->len > TOAST_INDEX_TARGET)
+       {
+           GISTTYPE   *ressign;
+
+           len = CALCGTSIZE(SIGNKEY, 0);
+           ressign = (GISTTYPE *) palloc(len);
+           ressign->len = len;
+           ressign->flag = SIGNKEY;
+           makesign(GETSIGN(ressign), res);
+           pfree(res);
+           res = ressign;
+       }
+
+       retval = (GISTENTRY *) palloc(sizeof(GISTENTRY));
+       gistentryinit(*retval, PointerGetDatum(res),
+                     entry->rel, entry->page,
+                     entry->offset, res->len, FALSE);
+   }
+   else if (ISSIGNKEY(DatumGetPointer(entry->key)) &&
+            !ISALLTRUE(DatumGetPointer(entry->key)))
+   {
+       int4        i,
+                   len;
+       GISTTYPE   *res;
+       BITVECP     sign = GETSIGN(DatumGetPointer(entry->key));
+
+       LOOPBYTE(
+                if ((sign[i] & 0xff) != 0xff)
+                PG_RETURN_POINTER(retval);
+       );
+
+       len = CALCGTSIZE(SIGNKEY | ALLISTRUE, 0);
+       res = (GISTTYPE *) palloc(len);
+       res->len = len;
+       res->flag = SIGNKEY | ALLISTRUE;
+
+       retval = (GISTENTRY *) palloc(sizeof(GISTENTRY));
+       gistentryinit(*retval, PointerGetDatum(res),
+                     entry->rel, entry->page,
+                     entry->offset, res->len, FALSE);
+   }
+   PG_RETURN_POINTER(retval);
+}
+
+Datum
+gtsvector_decompress(PG_FUNCTION_ARGS)
+{
+   GISTENTRY  *entry = (GISTENTRY *) PG_GETARG_POINTER(0);
+   GISTTYPE   *key = (GISTTYPE *) DatumGetPointer(PG_DETOAST_DATUM(entry->key));
+
+   if (key != (GISTTYPE *) DatumGetPointer(entry->key))
+   {
+       GISTENTRY  *retval = (GISTENTRY *) palloc(sizeof(GISTENTRY));
+
+       gistentryinit(*retval, PointerGetDatum(key),
+                     entry->rel, entry->page,
+                     entry->offset, key->len, FALSE);
+
+       PG_RETURN_POINTER(retval);
+   }
+
+   PG_RETURN_POINTER(entry);
+}
+
+typedef struct
+{
+   int4       *arrb;
+   int4       *arre;
+}  CHKVAL;
+
+/*
+ * is there value 'val' in array or not ?
+ */
+static bool
+checkcondition_arr(void *checkval, ITEM * val)
+{
+   int4       *StopLow = ((CHKVAL *) checkval)->arrb;
+   int4       *StopHigh = ((CHKVAL *) checkval)->arre;
+   int4       *StopMiddle;
+
+   /* Loop invariant: StopLow <= val < StopHigh */
+
+   while (StopLow < StopHigh)
+   {
+       StopMiddle = StopLow + (StopHigh - StopLow) / 2;
+       if (*StopMiddle == val->val)
+           return (true);
+       else if (*StopMiddle < val->val)
+           StopLow = StopMiddle + 1;
+       else
+           StopHigh = StopMiddle;
+   }
+
+   return (false);
+}
+
+static bool
+checkcondition_bit(void *checkval, ITEM * val)
+{
+   return GETBIT(checkval, HASHVAL(val->val));
+}
+
+Datum
+gtsvector_consistent(PG_FUNCTION_ARGS)
+{
+   QUERYTYPE  *query = (QUERYTYPE *) PG_GETARG_POINTER(1);
+   GISTTYPE   *key = (GISTTYPE *) DatumGetPointer(
+                               ((GISTENTRY *) PG_GETARG_POINTER(0))->key
+   );
+
+   if (!query->size)
+       PG_RETURN_BOOL(false);
+
+   if (ISSIGNKEY(key))
+   {
+       if (ISALLTRUE(key))
+           PG_RETURN_BOOL(true);
+
+       PG_RETURN_BOOL(TS_execute(
+                              GETQUERY(query),
+                              (void *) GETSIGN(key), false,
+                              checkcondition_bit
+                              ));
+   }
+   else
+   {                           /* only leaf pages */
+       CHKVAL      chkval;
+
+       chkval.arrb = GETARR(key);
+       chkval.arre = chkval.arrb + ARRNELEM(key);
+       PG_RETURN_BOOL(TS_execute(
+                              GETQUERY(query),
+                              (void *) &chkval, true,
+                              checkcondition_arr
+                              ));
+   }
+}
+
+static int4
+unionkey(BITVECP sbase, GISTTYPE * add)
+{
+   int4        i;
+
+   if (ISSIGNKEY(add))
+   {
+       BITVECP     sadd = GETSIGN(add);
+
+       if (ISALLTRUE(add))
+           return 1;
+
+       LOOPBYTE(
+                sbase[i] |= sadd[i];
+       );
+   }
+   else
+   {
+       int4       *ptr = GETARR(add);
+
+       for (i = 0; i < ARRNELEM(add); i++)
+           HASH(sbase, ptr[i]);
+   }
+   return 0;
+}
+
+
+Datum
+gtsvector_union(PG_FUNCTION_ARGS)
+{
+   bytea      *entryvec = (bytea *) PG_GETARG_POINTER(0);
+   int        *size = (int *) PG_GETARG_POINTER(1);
+   BITVEC      base;
+   int4        len = (VARSIZE(entryvec) - VARHDRSZ) / sizeof(GISTENTRY);
+   int4        i;
+   int4        flag = 0;
+   GISTTYPE   *result;
+
+   MemSet((void *) base, 0, sizeof(BITVEC));
+   for (i = 0; i < len; i++)
+   {
+       if (unionkey(base, GETENTRY(entryvec, i)))
+       {
+           flag = ALLISTRUE;
+           break;
+       }
+   }
+
+   flag |= SIGNKEY;
+   len = CALCGTSIZE(flag, 0);
+   result = (GISTTYPE *) palloc(len);
+   *size = result->len = len;
+   result->flag = flag;
+   if (!ISALLTRUE(result))
+       memcpy((void *) GETSIGN(result), (void *) base, sizeof(BITVEC));
+
+   PG_RETURN_POINTER(result);
+}
+
+Datum
+gtsvector_same(PG_FUNCTION_ARGS)
+{
+   GISTTYPE   *a = (GISTTYPE *) PG_GETARG_POINTER(0);
+   GISTTYPE   *b = (GISTTYPE *) PG_GETARG_POINTER(1);
+   bool       *result = (bool *) PG_GETARG_POINTER(2);
+
+   if (ISSIGNKEY(a))
+   {                           /* then b also ISSIGNKEY */
+       if (ISALLTRUE(a) && ISALLTRUE(b))
+           *result = true;
+       else if (ISALLTRUE(a))
+           *result = false;
+       else if (ISALLTRUE(b))
+           *result = false;
+       else
+       {
+           int4        i;
+           BITVECP     sa = GETSIGN(a),
+                       sb = GETSIGN(b);
+
+           *result = true;
+           LOOPBYTE(
+                    if (sa[i] != sb[i])
+                    {
+               *result = false;
+               break;
+           }
+           );
+       }
+   }
+   else
+   {                           /* a and b ISARRKEY */
+       int4        lena = ARRNELEM(a),
+                   lenb = ARRNELEM(b);
+
+       if (lena != lenb)
+           *result = false;
+       else
+       {
+           int4       *ptra = GETARR(a),
+                      *ptrb = GETARR(b);
+           int4        i;
+
+           *result = true;
+           for (i = 0; i < lena; i++)
+               if (ptra[i] != ptrb[i])
+               {
+                   *result = false;
+                   break;
+               }
+       }
+   }
+
+   PG_RETURN_POINTER(result);
+}
+
+static int4
+sizebitvec(BITVECP sign)
+{
+   int4        size = 0,
+               i;
+
+   LOOPBYTE(
+       size += SUMBIT(*(char *) sign);
+       sign = (BITVECP) (((char *) sign) + 1);
+   );
+   return size;
+}
+
+static int
+hemdistsign(BITVECP  a, BITVECP b) {
+   int i,dist=0;
+
+   LOOPBIT(
+       if ( GETBIT(a,i) != GETBIT(b,i) )
+           dist++;
+   );
+   return dist;
+}
+
+static int
+hemdist(GISTTYPE   *a, GISTTYPE   *b) {
+   if ( ISALLTRUE(a) ) {
+       if (ISALLTRUE(b))
+           return 0;
+       else
+           return SIGLENBIT-sizebitvec(GETSIGN(b));
+   } else if (ISALLTRUE(b))
+       return SIGLENBIT-sizebitvec(GETSIGN(a));
+
+   return hemdistsign( GETSIGN(a), GETSIGN(b) );
+}
+
+Datum
+gtsvector_penalty(PG_FUNCTION_ARGS)
+{
+   GISTENTRY  *origentry = (GISTENTRY *) PG_GETARG_POINTER(0); /* always ISSIGNKEY */
+   GISTENTRY  *newentry = (GISTENTRY *) PG_GETARG_POINTER(1);
+   float      *penalty = (float *) PG_GETARG_POINTER(2);
+   GISTTYPE   *origval = (GISTTYPE *) DatumGetPointer(origentry->key);
+   GISTTYPE   *newval = (GISTTYPE *) DatumGetPointer(newentry->key);
+   BITVECP     orig = GETSIGN(origval);
+
+   *penalty = 0.0;
+
+   if (ISARRKEY(newval)) {
+       BITVEC sign;
+       makesign(sign, newval);
+
+       if ( ISALLTRUE(origval) ) 
+           *penalty=((float)(SIGLENBIT-sizebitvec(sign)))/(float)(SIGLENBIT+1);
+       else 
+           *penalty=hemdistsign(sign,orig);
+   } else {
+       *penalty=hemdist(origval,newval);
+   }
+   PG_RETURN_POINTER(penalty);
+}
+
+typedef struct
+{
+   bool        allistrue;
+   BITVEC      sign;
+}  CACHESIGN;
+
+static void
+fillcache(CACHESIGN * item, GISTTYPE * key)
+{
+   item->allistrue = false;
+   if (ISARRKEY(key))
+       makesign(item->sign, key);
+   else if (ISALLTRUE(key))
+       item->allistrue = true;
+   else
+       memcpy((void *) item->sign, (void *) GETSIGN(key), sizeof(BITVEC));
+}
+
+#define WISH_F(a,b,c) (double)( -(double)(((a)-(b))*((a)-(b))*((a)-(b)))*(c) )
+typedef struct
+{
+   OffsetNumber pos;
+   int4        cost;
+} SPLITCOST;
+
+static int
+comparecost(const void *a, const void *b)
+{
+   if (((SPLITCOST *) a)->cost == ((SPLITCOST *) b)->cost)
+       return 0;
+   else
+       return (((SPLITCOST *) a)->cost > ((SPLITCOST *) b)->cost) ? 1 : -1;
+}
+
+
+static int
+hemdistcache(CACHESIGN   *a, CACHESIGN   *b) {
+   if ( a->allistrue ) {
+       if (b->allistrue)
+           return 0;
+       else
+           return SIGLENBIT-sizebitvec(b->sign);
+   } else if (b->allistrue)
+       return SIGLENBIT-sizebitvec(a->sign);
+
+   return hemdistsign( a->sign, b->sign );
+}
+
+Datum
+gtsvector_picksplit(PG_FUNCTION_ARGS)
+{
+   bytea      *entryvec = (bytea *) PG_GETARG_POINTER(0);
+   GIST_SPLITVEC *v = (GIST_SPLITVEC *) PG_GETARG_POINTER(1);
+   OffsetNumber k,
+               j;
+   GISTTYPE   *datum_l,
+              *datum_r;
+   BITVECP     union_l,
+               union_r;
+   int4        size_alpha,
+               size_beta;
+   int4        size_waste,
+               waste = -1;
+   int4        nbytes;
+   OffsetNumber seed_1 = 0,
+               seed_2 = 0;
+   OffsetNumber *left,
+              *right;
+   OffsetNumber maxoff;
+   BITVECP     ptr;
+   int         i;
+   CACHESIGN  *cache;
+   SPLITCOST  *costvector;
+
+   maxoff = ((VARSIZE(entryvec) - VARHDRSZ) / sizeof(GISTENTRY)) - 2;
+   nbytes = (maxoff + 2) * sizeof(OffsetNumber);
+   v->spl_left = (OffsetNumber *) palloc(nbytes);
+   v->spl_right = (OffsetNumber *) palloc(nbytes);
+
+   cache = (CACHESIGN *) palloc(sizeof(CACHESIGN) * (maxoff + 2));
+   fillcache(&cache[FirstOffsetNumber], GETENTRY(entryvec, FirstOffsetNumber));
+
+   for (k = FirstOffsetNumber; k < maxoff; k = OffsetNumberNext(k)) {
+       for (j = OffsetNumberNext(k); j <= maxoff; j = OffsetNumberNext(j)) {
+           if (k == FirstOffsetNumber)
+               fillcache(&cache[j], GETENTRY(entryvec, j));
+
+           size_waste=hemdistcache(&(cache[j]),&(cache[k]));
+           if (size_waste > waste) {
+               waste = size_waste;
+               seed_1 = k;
+               seed_2 = j;
+           }
+       }
+   }
+
+   left = v->spl_left;
+   v->spl_nleft = 0;
+   right = v->spl_right;
+   v->spl_nright = 0;
+
+   if (seed_1 == 0 || seed_2 == 0) {
+       seed_1 = 1;
+       seed_2 = 2;
+   }
+
+   /* form initial .. */
+   if (cache[seed_1].allistrue) {
+       datum_l = (GISTTYPE *) palloc(CALCGTSIZE(SIGNKEY | ALLISTRUE, 0));
+       datum_l->len = CALCGTSIZE(SIGNKEY | ALLISTRUE, 0);
+       datum_l->flag = SIGNKEY | ALLISTRUE;
+   } else {
+       datum_l = (GISTTYPE *) palloc(CALCGTSIZE(SIGNKEY, 0));
+       datum_l->len = CALCGTSIZE(SIGNKEY, 0);
+       datum_l->flag = SIGNKEY;
+       memcpy((void *) GETSIGN(datum_l), (void *) cache[seed_1].sign, sizeof(BITVEC));
+   }
+   if (cache[seed_2].allistrue) {
+       datum_r = (GISTTYPE *) palloc(CALCGTSIZE(SIGNKEY | ALLISTRUE, 0));
+       datum_r->len = CALCGTSIZE(SIGNKEY | ALLISTRUE, 0);
+       datum_r->flag = SIGNKEY | ALLISTRUE;
+   } else {
+       datum_r = (GISTTYPE *) palloc(CALCGTSIZE(SIGNKEY, 0));
+       datum_r->len = CALCGTSIZE(SIGNKEY, 0);
+       datum_r->flag = SIGNKEY;
+       memcpy((void *) GETSIGN(datum_r), (void *) cache[seed_2].sign, sizeof(BITVEC));
+   }
+
+   union_l=GETSIGN(datum_l);
+   union_r=GETSIGN(datum_r);
+   maxoff = OffsetNumberNext(maxoff);
+   fillcache(&cache[maxoff], GETENTRY(entryvec, maxoff));
+   /* sort before ... */
+   costvector = (SPLITCOST *) palloc(sizeof(SPLITCOST) * maxoff);
+   for (j = FirstOffsetNumber; j <= maxoff; j = OffsetNumberNext(j)) {
+       costvector[j - 1].pos = j;
+       size_alpha = hemdistcache(&(cache[seed_1]), &(cache[j]));
+       size_beta  = hemdistcache(&(cache[seed_2]), &(cache[j]));
+       costvector[j - 1].cost = abs(size_alpha - size_beta);
+   }
+   qsort((void *) costvector, maxoff, sizeof(SPLITCOST), comparecost);
+
+   for (k = 0; k < maxoff; k++) {
+       j = costvector[k].pos;
+       if (j == seed_1) {
+           *left++ = j;
+           v->spl_nleft++;
+           continue;
+       } else if (j == seed_2) {
+           *right++ = j;
+           v->spl_nright++;
+           continue;
+       }
+
+       if (ISALLTRUE(datum_l) || cache[j].allistrue) {
+           if ( ISALLTRUE(datum_l) && cache[j].allistrue )
+               size_alpha=0;
+           else
+               size_alpha = SIGLENBIT-sizebitvec(  
+                   ( cache[j].allistrue ) ? GETSIGN(datum_l) : GETSIGN(cache[j].sign)  
+               );
+       } else {
+           size_alpha=hemdistsign(cache[j].sign,GETSIGN(datum_l));
+       }
+
+       if (ISALLTRUE(datum_r) || cache[j].allistrue) {
+           if ( ISALLTRUE(datum_r) && cache[j].allistrue )
+               size_beta=0;
+           else
+               size_beta = SIGLENBIT-sizebitvec(  
+                   ( cache[j].allistrue ) ? GETSIGN(datum_r) : GETSIGN(cache[j].sign)  
+               );
+       } else {
+           size_beta=hemdistsign(cache[j].sign,GETSIGN(datum_r));
+       }
+
+       if (size_alpha  < size_beta + WISH_F(v->spl_nleft, v->spl_nright, 0.1)) {
+           if (ISALLTRUE(datum_l) || cache[j].allistrue) {
+               if (! ISALLTRUE(datum_l) )
+                   MemSet((void *) GETSIGN(datum_l), 0xff, sizeof(BITVEC));
+           } else {
+               ptr=cache[j].sign;
+               LOOPBYTE(
+                   union_l[i] |= ptr[i];
+               );
+           }
+           *left++ = j;
+           v->spl_nleft++;
+       } else {
+           if (ISALLTRUE(datum_r) || cache[j].allistrue) {
+               if (! ISALLTRUE(datum_r) )
+                   MemSet((void *) GETSIGN(datum_r), 0xff, sizeof(BITVEC));
+           } else {
+               ptr=cache[j].sign;
+               LOOPBYTE(
+                   union_r[i] |= ptr[i];
+               );
+           }
+           *right++ = j;
+           v->spl_nright++;
+       }
+   }
+
+   *right = *left = FirstOffsetNumber;
+   pfree(costvector);
+   pfree(cache);
+   v->spl_ldatum = PointerGetDatum(datum_l);
+   v->spl_rdatum = PointerGetDatum(datum_r);
+
+   PG_RETURN_POINTER(v);
+}


diff --git a/contrib/tsearch2/gistidx.h b/contrib/tsearch2/gistidx.h

new file mode 100644 (file)

index 0000000..d081c74


--- /dev/null
+++ b/contrib/tsearch2/gistidx.h
@@ -0,0 +1,67 @@
+#ifndef __GISTIDX_H__
+#define __GISTIDX_H__
+
+/*
+#define GISTIDX_DEBUG
+*/
+
+/*
+ * signature defines
+ */
+
+#define BITBYTE 8
+#define SIGLENINT  63          /* >121 => key will toast, so it will not
+                                * work !!! */
+#define SIGLEN ( sizeof(int4)*SIGLENINT )
+#define SIGLENBIT (SIGLEN*BITBYTE)
+
+typedef char BITVEC[SIGLEN];
+typedef char *BITVECP;
+
+#define LOOPBYTE(a) \
+       for(i=0;i
+               a;\
+       }
+#define LOOPBIT(a) \
+               for(i=0;i
+                               a;\
+               }
+
+#define GETBYTE(x,i) ( *( (BITVECP)(x) + (int)( (i) / BITBYTE ) ) )
+#define GETBITBYTE(x,i) ( ((char)(x)) >> i & 0x01 )
+#define CLRBIT(x,i)   GETBYTE(x,i) &= ~( 0x01 << ( (i) % BITBYTE ) )
+#define SETBIT(x,i)   GETBYTE(x,i) |=  ( 0x01 << ( (i) % BITBYTE ) )
+#define GETBIT(x,i) ( (GETBYTE(x,i) >> ( (i) % BITBYTE )) & 0x01 )
+
+#define abs(a)         ((a) <  (0) ? -(a) : (a))
+#define min(a,b)           ((a) <  (b) ? (a) : (b))
+#define HASHVAL(val) (((unsigned int)(val)) % SIGLENBIT)
+#define HASH(sign, val) SETBIT((sign), HASHVAL(val))
+
+
+/*
+ * type of index key
+ */
+typedef struct
+{
+   int4        len;
+   int4        flag;
+   char        data[1];
+}  GISTTYPE;
+
+#define ARRKEY     0x01
+#define SIGNKEY        0x02
+#define ALLISTRUE  0x04
+
+#define ISARRKEY(x) ( ((GISTTYPE*)x)->flag & ARRKEY )
+#define ISSIGNKEY(x)   ( ((GISTTYPE*)x)->flag & SIGNKEY )
+#define ISALLTRUE(x)   ( ((GISTTYPE*)x)->flag & ALLISTRUE )
+
+#define GTHDRSIZE  ( sizeof(int4)*2  )
+#define CALCGTSIZE(flag, len) ( GTHDRSIZE + ( ( (flag) & ARRKEY ) ? ((len)*sizeof(int4)) : (((flag) & ALLISTRUE) ? 0 : SIGLEN) ) )
+
+#define GETSIGN(x) ( (BITVECP)( (char*)x+GTHDRSIZE ) )
+#define GETARR(x)  ( (int4*)( (char*)x+GTHDRSIZE ) )
+#define ARRNELEM(x) ( ( ((GISTTYPE*)x)->len - GTHDRSIZE )/sizeof(int4) )
+
+#endif


diff --git a/contrib/tsearch2/ispell/spell.c b/contrib/tsearch2/ispell/spell.c

new file mode 100644 (file)

index 0000000..3cf2cc8


--- /dev/null
+++ b/contrib/tsearch2/ispell/spell.c
@@ -0,0 +1,520 @@
+#include 
+#include 
+#include 
+#include 
+
+#include "postgres.h"
+
+#include "spell.h"
+
+#define MAXNORMLEN 56
+
+#define STRNCASECMP(x,y)        (strncasecmp(x,y,strlen(y)))
+
+static int cmpspell(const void *s1,const void *s2){
+   return(strcmp(((const SPELL*)s1)->word,((const SPELL*)s2)->word));
+}
+
+static void 
+strlower( char * str ) {
+   unsigned char *ptr = (unsigned char *)str;
+   while ( *ptr ) {
+       *ptr = tolower( *ptr );
+       ptr++;
+   }
+}
+
+/* backward string compaire for suffix tree operations */
+static int 
+strbcmp(const char *s1, const char *s2) { 
+   int l1 = strlen(s1)-1, l2 = strlen(s2)-1;
+   while (l1 >= 0 && l2 >= 0) {
+       if (s1[l1] < s2[l2]) return -1;
+       if (s1[l1] > s2[l2]) return 1;
+       l1--; l2--;
+   }
+   if (l1 < l2) return -1;
+   if (l1 > l2) return 1;
+
+   return 0;
+}
+static int 
+strbncmp(const char *s1, const char *s2, size_t count) { 
+   int l1 = strlen(s1) - 1, l2 = strlen(s2) - 1, l = count;
+   while (l1 >= 0 && l2 >= 0 && l > 0) {
+       if (s1[l1] < s2[l2]) return -1;
+       if (s1[l1] > s2[l2]) return 1;
+       l1--;
+       l2--;
+       l--;
+   }
+   if (l == 0) return 0;
+   if (l1 < l2) return -1;
+   if (l1 > l2) return 1;
+   return 0;
+}
+
+static int 
+cmpaffix(const void *s1,const void *s2){
+   if (((const AFFIX*)s1)->type < ((const AFFIX*)s2)->type) return -1;
+   if (((const AFFIX*)s1)->type > ((const AFFIX*)s2)->type) return 1;
+   if (((const AFFIX*)s1)->type == 'p')
+       return(strcmp(((const AFFIX*)s1)->repl,((const AFFIX*)s2)->repl));
+   else 
+       return(strbcmp(((const AFFIX*)s1)->repl,((const AFFIX*)s2)->repl));
+}
+
+int 
+AddSpell(IspellDict * Conf,const char * word,const char *flag){
+   if(Conf->nspell>=Conf->mspell){
+       if(Conf->mspell){
+           Conf->mspell+=1024*20;
+           Conf->Spell=(SPELL *)realloc(Conf->Spell,Conf->mspell*sizeof(SPELL));
+       }else{
+           Conf->mspell=1024*20;
+           Conf->Spell=(SPELL *)malloc(Conf->mspell*sizeof(SPELL));
+       }
+       if ( Conf->Spell == NULL )
+           elog(ERROR,"No memory for AddSpell"); 
+   }
+   Conf->Spell[Conf->nspell].word=strdup(word);
+   if ( !Conf->Spell[Conf->nspell].word ) 
+       elog(ERROR,"No memory for AddSpell");
+   strncpy(Conf->Spell[Conf->nspell].flag,flag,10);
+   Conf->nspell++;
+   return(0);
+}
+
+
+int 
+ImportDictionary(IspellDict * Conf,const char *filename){
+   unsigned char str[BUFSIZ];  
+   FILE *dict;
+
+   if(!(dict=fopen(filename,"r")))return(1);
+   while(fgets(str,sizeof(str),dict)){
+       unsigned char *s;
+       const unsigned char *flag;
+
+           flag = NULL;
+       if((s=strchr(str,'/'))){
+           *s=0;
+           s++;flag=s;
+           while(*s){
+               if (((*s>='A')&&(*s<='Z'))||((*s>='a')&&(*s<='z')))
+                   s++;
+               else {
+                   *s=0;
+                   break;
+               }
+           }
+       }else{
+           flag="";
+       }
+       strlower(str);
+       /* Dont load words if first letter is not required */
+       /* It allows to optimize loading at  search time   */
+       s=str;
+       while(*s){
+           if(*s=='\r')*s=0;
+           if(*s=='\n')*s=0;
+           s++;
+       }
+       AddSpell(Conf,str,flag);
+   }
+   fclose(dict);
+   return(0);
+}
+
+
+static SPELL * 
+FindWord(IspellDict * Conf, const char *word, int affixflag) {
+   int l,c,r,resc,resl,resr, i;
+
+   i = (int)(*word) & 255;
+   l = Conf->SpellTree.Left[i];
+   r = Conf->SpellTree.Right[i];
+   if (l == -1) return (NULL);
+   while(l<=r){
+       c = (l + r) >> 1;
+       resc = strcmp(Conf->Spell[c].word, word);
+       if( (resc == 0) && 
+           ((affixflag == 0) || (strchr(Conf->Spell[c].flag, affixflag) != NULL)) ) {
+           return(&Conf->Spell[c]);
+       }
+       resl = strcmp(Conf->Spell[l].word, word);
+       if( (resl == 0) && 
+           ((affixflag == 0) || (strchr(Conf->Spell[l].flag, affixflag) != NULL)) ) {
+           return(&Conf->Spell[l]);
+       }
+       resr = strcmp(Conf->Spell[r].word, word);
+       if( (resr == 0) && 
+           ((affixflag == 0) || (strchr(Conf->Spell[r].flag, affixflag) != NULL)) ) {
+           return(&Conf->Spell[r]);
+       }
+       if(resc < 0){
+           l = c + 1;
+           r--;
+       } else if(resc > 0){
+           r = c - 1;
+           l++;
+       } else {
+           l++;
+           r--;
+       }
+   }
+   return(NULL);
+}
+
+int 
+AddAffix(IspellDict * Conf,int flag,const char *mask,const char *find,const char *repl,int type) {
+   if(Conf->naffixes>=Conf->maffixes){
+       if(Conf->maffixes){
+           Conf->maffixes+=16;
+           Conf->Affix = (AFFIX*)realloc((void*)Conf->Affix,Conf->maffixes*sizeof(AFFIX));
+       }else{
+           Conf->maffixes=16;
+           Conf->Affix = (AFFIX*)malloc(Conf->maffixes * sizeof(AFFIX));
+       }
+       if ( Conf->Affix == NULL ) 
+           elog(ERROR,"No memory for AddAffix");
+   }
+   if (type=='s') {
+       sprintf(Conf->Affix[Conf->naffixes].mask,"%s$",mask);
+   } else {
+       sprintf(Conf->Affix[Conf->naffixes].mask,"^%s",mask);
+   }
+   Conf->Affix[Conf->naffixes].compile = 1;
+   Conf->Affix[Conf->naffixes].flag=flag;
+   Conf->Affix[Conf->naffixes].type=type;
+   
+   strcpy(Conf->Affix[Conf->naffixes].find,find);
+   strcpy(Conf->Affix[Conf->naffixes].repl,repl);
+   Conf->Affix[Conf->naffixes].replen=strlen(repl);
+   Conf->naffixes++;
+   return(0);
+}
+
+static char * 
+remove_spaces(char *dist,char *src){
+char *d,*s;
+   d=dist;
+   s=src;
+   while(*s){
+       if(*s!=' '&&*s!='-'&&*s!='\t'){
+           *d=*s;
+           d++;
+       }
+       s++;
+   }
+   *d=0;
+   return(dist);
+}
+
+
+int 
+ImportAffixes(IspellDict * Conf,const char *filename){
+   unsigned char str[BUFSIZ];
+   unsigned char flag=0;
+   unsigned char mask[BUFSIZ]="";
+   unsigned char find[BUFSIZ]="";
+   unsigned char repl[BUFSIZ]="";
+   unsigned char *s;
+   int i;
+   int suffixes=0;
+   int prefixes=0;
+   FILE *affix;
+
+   if(!(affix=fopen(filename,"r")))
+       return(1);
+
+   while(fgets(str,sizeof(str),affix)){
+       if(!STRNCASECMP(str,"suffixes")){
+           suffixes=1;
+           prefixes=0;
+           continue;
+       }
+       if(!STRNCASECMP(str,"prefixes")){
+           suffixes=0;
+           prefixes=1;
+           continue;
+       }
+       if(!STRNCASECMP(str,"flag ")){
+           s=str+5;
+           while(strchr("* ",*s))
+               s++;
+           flag=*s;
+           continue;
+       }
+       if((!suffixes)&&(!prefixes))continue;
+       if((s=strchr(str,'#')))*s=0;
+       if(!*str)continue;
+       strlower(str);
+       strcpy(mask,"");
+       strcpy(find,"");
+       strcpy(repl,"");
+       i=sscanf(str,"%[^>\n]>%[^,\n],%[^\n]",mask,find,repl);
+       remove_spaces(str,repl);strcpy(repl,str);
+       remove_spaces(str,find);strcpy(find,str);
+       remove_spaces(str,mask);strcpy(mask,str);
+       switch(i){
+           case 3:
+               break;
+           case 2:
+               if(*find != '\0'){
+                   strcpy(repl,find);
+                   strcpy(find,"");
+               }
+               break;
+           default:
+               continue;
+       }
+       
+       AddAffix(Conf,(int)flag,mask,find,repl,suffixes?'s':'p');
+       
+   }
+   fclose(affix);
+       
+   return(0);
+}
+
+void 
+SortDictionary(IspellDict * Conf){
+  int  CurLet = -1, Let;size_t i;
+
+        qsort((void*)Conf->Spell,Conf->nspell,sizeof(SPELL),cmpspell);
+
+   for(i = 0; i < 256 ; i++ )
+       Conf->SpellTree.Left[i] = -1;
+
+   for(i = 0; i < Conf->nspell; i++) {
+     Let = (int)(*(Conf->Spell[i].word)) & 255;
+     if (CurLet != Let) {
+       Conf->SpellTree.Left[Let] = i;
+       CurLet = Let;
+     }
+     Conf->SpellTree.Right[Let] = i;
+   }
+}
+
+void 
+SortAffixes(IspellDict * Conf) {
+  int   CurLetP = -1, CurLetS = -1, Let;
+  AFFIX *Affix; size_t i;
+  
+  if (Conf->naffixes > 1)
+    qsort((void*)Conf->Affix,Conf->naffixes,sizeof(AFFIX),cmpaffix);
+  for(i = 0; i < 256; i++) {
+      Conf->PrefixTree.Left[i] = Conf->PrefixTree.Right[i] = -1;
+      Conf->SuffixTree.Left[i] = Conf->SuffixTree.Right[i] = -1;
+  }
+
+  for(i = 0; i < Conf->naffixes; i++) {
+    Affix = &(((AFFIX*)Conf->Affix)[i]);
+    if(Affix->type == 'p') {
+      Let = (int)(*(Affix->repl)) & 255;
+      if (CurLetP != Let) {
+   Conf->PrefixTree.Left[Let] = i;
+   CurLetP = Let;
+      }
+      Conf->PrefixTree.Right[Let] = i;
+    } else {
+      Let = (Affix->replen) ? (int)(Affix->repl[Affix->replen-1]) & 255 : 0;
+      if (CurLetS != Let) {
+   Conf->SuffixTree.Left[Let] = i;
+   CurLetS = Let;
+      }
+      Conf->SuffixTree.Right[Let] = i;
+    }
+  }
+}
+
+static char * 
+CheckSuffix(const char *word, size_t len, AFFIX *Affix, int *res, IspellDict *Conf) {
+  regmatch_t subs[2]; /* workaround for apache&linux */
+  char newword[2*MAXNORMLEN] = "";
+  int err;
+  
+  *res = strbncmp(word, Affix->repl, Affix->replen);
+  if (*res < 0) {
+    return NULL;
+  }
+  if (*res > 0) {
+    return NULL;
+  }
+  strcpy(newword, word);
+  strcpy(newword+len-Affix->replen, Affix->find);
+
+  if (Affix->compile) {
+    err = regcomp(&(Affix->reg),Affix->mask,REG_EXTENDED|REG_ICASE|REG_NOSUB);
+    if(err){
+      /*regerror(err, &(Affix->reg), regerrstr, ERRSTRSIZE);*/
+      regfree(&(Affix->reg));
+      return(NULL);
+    }
+    Affix->compile = 0;
+  }
+  if(!(err=regexec(&(Affix->reg),newword,1,subs,0))){
+    if(FindWord(Conf, newword, Affix->flag))
+   return pstrdup(newword);    
+  }
+  return NULL;
+}
+
+#define NS 1
+#define MAX_NORM 512
+static int 
+CheckPrefix(const char *word, size_t len, AFFIX *Affix, IspellDict *Conf, int pi,
+       char **forms, char ***cur ) {
+  regmatch_t subs[NS*2];
+  char newword[2*MAXNORMLEN] = "";
+  int err, ls, res, lres;
+  size_t newlen;
+  AFFIX *CAffix = Conf->Affix;
+  
+  res = strncmp(word, Affix->repl, Affix->replen);
+  if (res != 0) {
+    return res;
+  }
+  strcpy(newword, Affix->find);
+  strcat(newword, word+Affix->replen);
+
+  if (Affix->compile) {
+    err = regcomp(&(Affix->reg),Affix->mask,REG_EXTENDED|REG_ICASE|REG_NOSUB);
+    if(err){
+      /*regerror(err, &(Affix->reg), regerrstr, ERRSTRSIZE);*/
+      regfree(&(Affix->reg));
+      return (0);
+    }
+    Affix->compile = 0;
+  }
+  if(!(err=regexec(&(Affix->reg),newword,1,subs,0))){
+    SPELL * curspell;
+
+    if((curspell=FindWord(Conf, newword, Affix->flag))){
+      if ((*cur - forms) < (MAX_NORM-1)) {
+   **cur =  pstrdup(newword);
+   (*cur)++; **cur = NULL;
+      }
+    } 
+    newlen = strlen(newword);
+    ls = Conf->SuffixTree.Left[pi];
+      if ( ls>=0 && ((*cur - forms) < (MAX_NORM-1)) ) {
+   **cur = CheckSuffix(newword, newlen, &CAffix[ls], &lres, Conf);
+   if (**cur) {
+     (*cur)++; **cur = NULL;
+   }
+      }
+  }
+  return 0;
+}
+
+
+char ** 
+NormalizeWord(IspellDict * Conf,char *word){
+/*regmatch_t subs[NS];*/
+size_t len;
+char ** forms;
+char **cur;
+AFFIX * Affix;
+int ri, pi, ipi, lp, rp, cp, ls, rs;
+int lres, rres, cres = 0;
+  SPELL *spell;
+
+   len=strlen(word);
+   if (len > MAXNORMLEN)
+       return(NULL);
+
+   strlower(word);
+
+   forms=(char **) palloc(MAX_NORM*sizeof(char **));
+   cur=forms;*cur=NULL;
+
+   ri = (int)(*word) & 255;
+   pi = (int)(word[strlen(word)-1]) & 255;
+   Affix=(AFFIX*)Conf->Affix;
+
+   /* Check that the word itself is normal form */
+   if((spell = FindWord(Conf, word, 0))){
+       *cur=pstrdup(word);
+       cur++;*cur=NULL;
+   }
+
+   /* Find all other NORMAL forms of the 'word' */
+
+   for (ipi = 0; ipi <= pi; ipi += pi) {
+
+       /* check prefix */
+       lp = Conf->PrefixTree.Left[ri];
+       rp = Conf->PrefixTree.Right[ri];
+       while (lp >= 0 && lp <= rp) {
+         cp = (lp + rp) >> 1;
+         cres = 0;
+         if ((cur - forms) < (MAX_NORM-1)) {
+       cres = CheckPrefix(word, len, &Affix[cp], Conf, ipi, forms, &cur);
+         }
+         if ((lp < cp) && ((cur - forms) < (MAX_NORM-1)) ) {
+       lres = CheckPrefix(word, len, &Affix[lp], Conf, ipi, forms, &cur);
+         }
+         if ( (rp > cp) && ((cur - forms) < (MAX_NORM-1)) ) {
+       rres = CheckPrefix(word, len, &Affix[rp], Conf, ipi, forms, &cur);
+         }
+         if (cres < 0) {
+       rp = cp - 1;
+       lp++;
+         } else if (cres > 0) {
+       lp = cp + 1;
+       rp--;
+         } else {
+       lp++;
+       rp--;
+         }
+       }
+
+       /* check suffix */
+       ls = Conf->SuffixTree.Left[ipi];
+       rs = Conf->SuffixTree.Right[ipi];
+       while (ls >= 0 && ls <= rs) {
+         if (  ((cur - forms) < (MAX_NORM-1)) ) {
+       *cur = CheckSuffix(word, len, &Affix[ls], &lres, Conf);
+       if (*cur) {
+         cur++; *cur = NULL;
+       }
+         }
+         if ( (rs > ls) && ((cur - forms) < (MAX_NORM-1)) ) {
+       *cur = CheckSuffix(word, len, &Affix[rs], &rres, Conf);
+       if (*cur) {
+         cur++; *cur = NULL;
+       }
+         }
+         ls++;
+         rs--;
+       } /* end while */
+     
+   } /* for ipi */
+
+   if(cur==forms){
+       pfree(forms);
+       return(NULL);
+   }
+   return(forms);
+}
+
+void 
+FreeIspell (IspellDict *Conf) {
+  int i;
+  AFFIX *Affix = (AFFIX *)Conf->Affix;
+
+  for (i = 0; i < Conf->naffixes; i++) {
+    if (Affix[i].compile == 0) {
+      regfree(&(Affix[i].reg));
+    }
+  }
+  for (i = 0; i < Conf->naffixes; i++) {
+   free( Conf->Spell[i].word );
+  }
+  free(Conf->Affix);
+  free(Conf->Spell);
+  memset( (void*)Conf, 0, sizeof(IspellDict) );
+  return;
+}


diff --git a/contrib/tsearch2/ispell/spell.h b/contrib/tsearch2/ispell/spell.h

new file mode 100644 (file)

index 0000000..3034ca6


--- /dev/null
+++ b/contrib/tsearch2/ispell/spell.h
@@ -0,0 +1,51 @@
+#ifndef __SPELL_H__
+#define __SPELL_H__
+
+#include 
+#include 
+
+typedef struct spell_struct {
+        char * word; 
+        char flag[10];
+} SPELL;
+
+typedef struct aff_struct {   
+        char flag;
+        char type;
+        char mask[33];
+        char find[16];
+        char repl[16];
+        regex_t reg;
+        size_t replen;
+        char compile;
+} AFFIX;
+
+typedef struct Tree_struct {
+        int Left[256], Right[256];
+} Tree_struct;
+
+typedef struct {
+   int maffixes;
+   int naffixes;
+   AFFIX * Affix;
+
+   int nspell;
+   int mspell;
+   SPELL   *Spell;
+   Tree_struct SpellTree;
+   Tree_struct PrefixTree;
+   Tree_struct SuffixTree;
+
+} IspellDict;
+
+char ** NormalizeWord(IspellDict * Conf,char *word);
+int ImportAffixes(IspellDict * Conf, const char *filename);
+int ImportDictionary(IspellDict * Conf,const char *filename);
+
+int  AddSpell(IspellDict * Conf,const char * word,const char *flag);
+int  AddAffix(IspellDict * Conf,int flag,const char *mask,const char *find,const char *repl,int type);
+void SortDictionary(IspellDict * Conf);
+void SortAffixes(IspellDict * Conf);
+void FreeIspell (IspellDict *Conf);
+
+#endif


diff --git a/contrib/tsearch2/prs_dcfg.c b/contrib/tsearch2/prs_dcfg.c

new file mode 100644 (file)

index 0000000..e4b0e8b


--- /dev/null
+++ b/contrib/tsearch2/prs_dcfg.c
@@ -0,0 +1,119 @@
+/* 
+ * Simple config parser 
+ * Teodor Sigaev 
+ */
+#include 
+#include 
+#include 
+
+#include "postgres.h"
+
+#include "dict.h"
+#include "common.h"
+
+#define CS_WAITKEY 0
+#define CS_INKEY   1
+#define CS_WAITEQ  2
+#define CS_WAITVALUE   3
+#define CS_INVALUE 4
+#define CS_IN2VALUE    5
+#define CS_WAITDELIM   6
+#define CS_INESC   7
+#define CS_IN2ESC  8
+
+static char *
+nstrdup(char *ptr, int len) {
+   char *res=palloc(len+1), *cptr;
+   memcpy(res,ptr,len);
+   res[len]='\0';
+   cptr = ptr = res;
+   while(*ptr) {
+       if ( *ptr == '\\' ) 
+           ptr++;
+       *cptr=*ptr; ptr++; cptr++;
+   }
+   *cptr='\0';
+
+   return res;
+}
+
+void
+parse_cfgdict(text *in, Map **m) {
+   Map *mptr;
+   char *ptr=VARDATA(in), *begin=NULL;
+   char num=0;
+   int state=CS_WAITKEY;
+
+   while( ptr-VARDATA(in) < VARSIZE(in) - VARHDRSZ ) {
+       if ( *ptr==',' ) num++;
+       ptr++;
+   }
+
+   *m=mptr=(Map*)palloc( sizeof(Map)*(num+2) );
+   memset(mptr, 0, sizeof(Map)*(num+2) );
+   ptr=VARDATA(in);
+   while( ptr-VARDATA(in) < VARSIZE(in) - VARHDRSZ ) {
+       if (state==CS_WAITKEY) {
+           if (isalpha(*ptr)) {
+               begin=ptr;
+               state=CS_INKEY;
+           } else if ( !isspace(*ptr) )
+               elog(ERROR,"Syntax error in position %d near '%c'", ptr-VARDATA(in), *ptr);
+       } else if (state==CS_INKEY) {
+           if ( isspace(*ptr) ) {
+               mptr->key=nstrdup(begin, ptr-begin);
+               state=CS_WAITEQ;
+           } else if ( *ptr=='=' ) {
+               mptr->key=nstrdup(begin, ptr-begin);
+               state=CS_WAITVALUE;
+           } else if ( !isalpha(*ptr) ) 
+               elog(ERROR,"Syntax error in position %d near '%c'", ptr-VARDATA(in), *ptr);
+       } else if ( state==CS_WAITEQ ) {
+           if ( *ptr=='=' )
+               state=CS_WAITVALUE;
+           else if ( !isspace(*ptr) )
+               elog(ERROR,"Syntax error in position %d near '%c'", ptr-VARDATA(in), *ptr);
+       } else if ( state==CS_WAITVALUE ) {
+           if ( *ptr=='"' ) {
+               begin=ptr+1;
+               state=CS_INVALUE;
+           } else if ( !isspace(*ptr) ) {
+               begin=ptr;
+               state=CS_IN2VALUE;
+           }
+       } else if ( state==CS_INVALUE ) {
+           if ( *ptr=='"' ) {
+               mptr->value = nstrdup(begin, ptr-begin);
+               mptr++;
+               state=CS_WAITDELIM;
+           } else if ( *ptr=='\\' )
+               state=CS_INESC;
+       } else if ( state==CS_IN2VALUE ) {
+           if ( isspace(*ptr) || *ptr==',' ) {
+               mptr->value = nstrdup(begin, ptr-begin);
+               mptr++;
+               state=( *ptr==',' ) ? CS_WAITKEY : CS_WAITDELIM;
+           } else if ( *ptr=='\\' )
+               state=CS_INESC;
+       } else if ( state==CS_WAITDELIM ) {
+           if ( *ptr==',' ) 
+               state=CS_WAITKEY; 
+           else if ( !isspace(*ptr) )
+               elog(ERROR,"Syntax error in position %d near '%c'", ptr-VARDATA(in), *ptr);
+       } else if ( state == CS_INESC ) {
+           state=CS_INVALUE;
+       } else if ( state == CS_IN2ESC ) {
+           state=CS_IN2VALUE;
+       } else 
+           elog(ERROR,"Bad parser state: %d at position %d near '%c'", state, ptr-VARDATA(in), *ptr);
+       ptr++;
+   }
+
+   if (state==CS_IN2VALUE) {
+       mptr->value = nstrdup(begin, ptr-begin);
+       mptr++;
+   } else if ( !(state==CS_WAITDELIM || state==CS_WAITKEY) ) 
+       elog(ERROR,"Unexpected end of line");
+}
+
+


diff --git a/contrib/tsearch2/query.c b/contrib/tsearch2/query.c

new file mode 100644 (file)

index 0000000..8e714f2


--- /dev/null
+++ b/contrib/tsearch2/query.c
@@ -0,0 +1,862 @@
+/*
+ * IO definitions for tsquery and mtsquery. This type
+ * are identical, but for parsing mtsquery used parser for text
+ * and also morphology is used.
+ * Internal structure:
+ * query tree, then string with original value.
+ * Query tree with plain view. It's means that in array of nodes
+ * right child is always next and left position = item+item->left
+ * Teodor Sigaev 
+ */
+#include "postgres.h"
+
+#include 
+#include 
+
+#include "access/gist.h"
+#include "access/itup.h"
+#include "access/rtree.h"
+#include "utils/elog.h"
+#include "utils/palloc.h"
+#include "utils/array.h"
+#include "utils/builtins.h"
+#include "storage/bufpage.h"
+
+#include "ts_cfg.h"
+#include "tsvector.h"
+#include "crc32.h"
+#include "query.h"
+#include "rewrite.h"
+#include "common.h"
+
+
+PG_FUNCTION_INFO_V1(tsquery_in);
+Datum      tsquery_in(PG_FUNCTION_ARGS);
+
+PG_FUNCTION_INFO_V1(tsquery_out);
+Datum      tsquery_out(PG_FUNCTION_ARGS);
+
+PG_FUNCTION_INFO_V1(exectsq);
+Datum      exectsq(PG_FUNCTION_ARGS);
+
+PG_FUNCTION_INFO_V1(rexectsq);
+Datum      rexectsq(PG_FUNCTION_ARGS);
+
+PG_FUNCTION_INFO_V1(tsquerytree);
+Datum      tsquerytree(PG_FUNCTION_ARGS);
+
+PG_FUNCTION_INFO_V1(to_tsquery);
+Datum      to_tsquery(PG_FUNCTION_ARGS);
+
+PG_FUNCTION_INFO_V1(to_tsquery_name);
+Datum      to_tsquery_name(PG_FUNCTION_ARGS);
+
+PG_FUNCTION_INFO_V1(to_tsquery_current);
+Datum      to_tsquery_current(PG_FUNCTION_ARGS);
+
+#define END            0
+#define ERR            1
+#define VAL            2
+#define OPR            3
+#define OPEN       4
+#define CLOSE      5
+#define VALTRUE        6           /* for stop words */
+#define VALFALSE   7
+
+/* parser's states */
+#define WAITOPERAND 1
+#define WAITOPERATOR   2
+
+/*
+ * node of query tree, also used
+ * for storing polish notation in parser
+ */
+typedef struct NODE
+{
+   int2        weight;
+   int2        type;
+   int4        val;
+   int2        distance;
+   int2        length;
+   struct NODE *next;
+}  NODE;
+
+typedef struct
+{
+   char       *buf;
+   int4        state;
+   int4        count;
+   /* reverse polish notation in list (for temprorary usage) */
+   NODE       *str;
+   /* number in str */
+   int4        num;
+
+   /* user-friendly operand */
+   int4        lenop;
+   int4        sumlen;
+   char       *op;
+   char       *curop;
+
+   /* state for value's parser */
+   TI_IN_STATE valstate;
+
+   /* tscfg */
+   int cfg_id;
+}  QPRS_STATE;
+
+static char*
+get_weight(char *buf, int2 *weight) {
+   *weight = 0;
+
+   if ( *buf != ':' )
+       return buf;
+
+   buf++;
+   while( *buf ) {
+       switch(tolower(*buf)) {
+           case 'a': *weight |= 1<<3; break; 
+           case 'b': *weight |= 1<<2; break; 
+           case 'c': *weight |= 1<<1; break; 
+           case 'd': *weight |= 1;    break;
+           default: return buf; 
+       }
+       buf++;
+   }
+   
+   return buf;
+}
+
+/*
+ * get token from query string
+ */
+static int4
+gettoken_query(QPRS_STATE * state, int4 *val, int4 *lenval, char **strval, int2 *weight)
+{
+   while (1)
+   {
+       switch (state->state)
+       {
+           case WAITOPERAND:
+               if (*(state->buf) == '!')
+               {
+                   (state->buf)++;
+                   *val = (int4) '!';
+                   return OPR;
+               }
+               else if (*(state->buf) == '(')
+               {
+                   state->count++;
+                   (state->buf)++;
+                   return OPEN;
+               } else if ( *(state->buf) == ':' ) {
+                   elog(ERROR,"Error at start of operand"); 
+               } else if (*(state->buf) != ' ') {
+                   state->valstate.prsbuf = state->buf;
+                   state->state = WAITOPERATOR;
+                   if (gettoken_tsvector(&(state->valstate)))
+                   {
+                       *strval = state->valstate.word;
+                       *lenval = state->valstate.curpos - state->valstate.word;
+                       state->buf = get_weight(state->valstate.prsbuf, weight);
+                       return VAL;
+                   }
+                   else
+                       elog(ERROR, "No operand");
+               }
+               break;
+           case WAITOPERATOR:
+               if (*(state->buf) == '&' || *(state->buf) == '|')
+               {
+                   state->state = WAITOPERAND;
+                   *val = (int4) *(state->buf);
+                   (state->buf)++;
+                   return OPR;
+               }
+               else if (*(state->buf) == ')')
+               {
+                   (state->buf)++;
+                   state->count--;
+                   return (state->count < 0) ? ERR : CLOSE;
+               }
+               else if (*(state->buf) == '\0')
+                   return (state->count) ? ERR : END;
+               else if (*(state->buf) != ' ')
+                   return ERR;
+               break;
+           default:
+               return ERR;
+               break;
+       }
+       (state->buf)++;
+   }
+   return END;
+}
+
+/*
+ * push new one in polish notation reverse view
+ */
+static void
+pushquery(QPRS_STATE * state, int4 type, int4 val, int4 distance, int4 lenval, int2 weight)
+{
+   NODE       *tmp = (NODE *) palloc(sizeof(NODE));
+
+   tmp->weight = weight;
+   tmp->type = type;
+   tmp->val = val;
+   if (distance >= MAXSTRPOS)
+       elog(ERROR, "Value is too big");
+   if (lenval >= MAXSTRLEN)
+       elog(ERROR, "Operand is too long");
+   tmp->distance = distance;
+   tmp->length = lenval;
+   tmp->next = state->str;
+   state->str = tmp;
+   state->num++;
+}
+
+/*
+ * This function is used for tsquery parsing
+ */
+static void
+pushval_asis(QPRS_STATE * state, int type, char *strval, int lenval, int2 weight)
+{
+   if (lenval >= MAXSTRLEN)
+       elog(ERROR, "Word is too long");
+
+   pushquery(state, type, crc32_sz((uint8 *) strval, lenval),
+             state->curop - state->op, lenval, weight);
+
+   while (state->curop - state->op + lenval + 1 >= state->lenop)
+   {
+       int4        tmp = state->curop - state->op;
+
+       state->lenop *= 2;
+       state->op = (char *) repalloc((void *) state->op, state->lenop);
+       state->curop = state->op + tmp;
+   }
+   memcpy((void *) state->curop, (void *) strval, lenval);
+   state->curop += lenval;
+   *(state->curop) = '\0';
+   state->curop++;
+   state->sumlen += lenval + 1;
+   return;
+}
+
+/*
+ * This function is used for morph parsing
+ */
+static void
+pushval_morph(QPRS_STATE * state, int typeval, char *strval, int lenval, int2 weight)
+{
+   int4        count = 0;
+   PRSTEXT         prs;
+
+   prs.lenwords = 32;
+   prs.curwords = 0;
+   prs.pos = 0;
+   prs.words = (WORD *) palloc(sizeof(WORD) * prs.lenwords);
+
+   parsetext_v2(findcfg(state->cfg_id), &prs, strval, lenval);
+
+   for(count=0;count
+       pushval_asis(state, VAL, prs.words[count].word, prs.words[count].len, weight);
+       pfree( prs.words[count].word );
+       if (count)
+           pushquery(state, OPR, (int4) '&', 0, 0, 0 );
+   }   
+   pfree(prs.words);
+
+   /* XXX */
+   if ( prs.curwords==0 ) 
+       pushval_asis(state, VALTRUE, 0, 0, 0);
+}
+
+#define STACKDEPTH 32
+/*
+ * make polish notaion of query
+ */
+static int4
+makepol(QPRS_STATE * state, void (*pushval) (QPRS_STATE *, int, char *, int, int2))
+{
+   int4        val,
+               type;
+   int4        lenval;
+   char       *strval;
+   int4        stack[STACKDEPTH];
+   int4        lenstack = 0;
+   int2        weight;
+
+   while ((type = gettoken_query(state, &val, &lenval, &strval, &weight)) != END)
+   {
+       switch (type)
+       {
+           case VAL:
+               (*pushval) (state, VAL, strval, lenval, weight);
+               while (lenstack && (stack[lenstack - 1] == (int4) '&' ||
+                                   stack[lenstack - 1] == (int4) '!'))
+               {
+                   lenstack--;
+                   pushquery(state, OPR, stack[lenstack], 0, 0, 0);
+               }
+               break;
+           case OPR:
+               if (lenstack && val == (int4) '|')
+                   pushquery(state, OPR, val, 0, 0, 0);
+               else
+               {
+                   if (lenstack == STACKDEPTH)
+                       elog(ERROR, "Stack too short");
+                   stack[lenstack] = val;
+                   lenstack++;
+               }
+               break;
+           case OPEN:
+               if (makepol(state, pushval) == ERR)
+                   return ERR;
+               if (lenstack && (stack[lenstack - 1] == (int4) '&' ||
+                                stack[lenstack - 1] == (int4) '!'))
+               {
+                   lenstack--;
+                   pushquery(state, OPR, stack[lenstack], 0, 0, 0);
+               }
+               break;
+           case CLOSE:
+               while (lenstack)
+               {
+                   lenstack--;
+                   pushquery(state, OPR, stack[lenstack], 0, 0, 0);
+               };
+               return END;
+               break;
+           case ERR:
+           default:
+               elog(ERROR, "Syntax error");
+               return ERR;
+
+       }
+   }
+   while (lenstack)
+   {
+       lenstack--;
+       pushquery(state, OPR, stack[lenstack], 0, 0, 0);
+   };
+   return END;
+}
+
+typedef struct
+{
+   WordEntry  *arrb;
+   WordEntry  *arre;
+   char       *values;
+   char       *operand;
+}  CHKVAL;
+
+/*
+ * compare 2 string values
+ */
+static int4
+ValCompare(CHKVAL * chkval, WordEntry * ptr, ITEM * item)
+{
+   if (ptr->len == item->length)
+       return strncmp(
+                      &(chkval->values[ptr->pos]),
+                      &(chkval->operand[item->distance]),
+                      item->length);
+
+   return (ptr->len > item->length) ? 1 : -1;
+}
+
+/*
+ * check weight info
+ */
+static bool
+checkclass_str(CHKVAL * chkval, WordEntry * val, ITEM * item) {
+   WordEntryPos *ptr = (WordEntryPos*) (chkval->values+val->pos+SHORTALIGN(val->len)+sizeof(uint16));
+   uint16  len = *( (uint16*) (chkval->values+val->pos+SHORTALIGN(val->len)) );
+   while (len--) {
+       if ( item->weight & ( 1<weight ) )
+           return true;
+       ptr++;
+   }
+   return false; 
+}
+
+/*
+ * is there value 'val' in array or not ?
+ */
+static bool
+checkcondition_str(void *checkval, ITEM * val)
+{
+   WordEntry  *StopLow = ((CHKVAL *) checkval)->arrb;
+   WordEntry  *StopHigh = ((CHKVAL *) checkval)->arre;
+   WordEntry  *StopMiddle;
+   int         difference;
+
+   /* Loop invariant: StopLow <= val < StopHigh */
+
+   while (StopLow < StopHigh)
+   {
+       StopMiddle = StopLow + (StopHigh - StopLow) / 2;
+       difference = ValCompare((CHKVAL *) checkval, StopMiddle, val);
+       if (difference == 0)
+           return ( val->weight && StopMiddle->haspos ) ? 
+               checkclass_str((CHKVAL *) checkval,StopMiddle, val) : true;
+       else if (difference < 0)
+           StopLow = StopMiddle + 1;
+       else
+           StopHigh = StopMiddle;
+   }
+
+   return (false);
+}
+
+/*
+ * check for boolean condition
+ */
+bool
+TS_execute(ITEM * curitem, void *checkval, bool calcnot, bool (*chkcond) (void *checkval, ITEM * val))
+{
+   if (curitem->type == VAL)
+       return (*chkcond) (checkval, curitem);
+   else if (curitem->val == (int4) '!')
+   {
+       return (calcnot) ?
+           ((TS_execute(curitem + 1, checkval, calcnot, chkcond)) ? false : true)
+           : true;
+   }
+   else if (curitem->val == (int4) '&')
+   {
+       if (TS_execute(curitem + curitem->left, checkval, calcnot, chkcond))
+           return TS_execute(curitem + 1, checkval, calcnot, chkcond);
+       else
+           return false;
+   }
+   else
+   {                           /* |-operator */
+       if (TS_execute(curitem + curitem->left, checkval, calcnot, chkcond))
+           return true;
+       else
+           return TS_execute(curitem + 1, checkval, calcnot, chkcond);
+   }
+   return false;
+}
+
+/*
+ * boolean operations
+ */
+Datum
+rexectsq(PG_FUNCTION_ARGS)
+{
+   return DirectFunctionCall2(
+                              exectsq,
+                              PG_GETARG_DATUM(1),
+                              PG_GETARG_DATUM(0)
+       );
+}
+
+Datum
+exectsq(PG_FUNCTION_ARGS)
+{
+   tsvector       *val = (tsvector *) DatumGetPointer(PG_DETOAST_DATUM(PG_GETARG_DATUM(0)));
+   QUERYTYPE  *query = (QUERYTYPE *) DatumGetPointer(PG_DETOAST_DATUM(PG_GETARG_DATUM(1)));
+   CHKVAL      chkval;
+   bool        result;
+
+   if (!val->size || !query->size)
+   {
+       PG_FREE_IF_COPY(val, 0);
+       PG_FREE_IF_COPY(query, 1);
+       PG_RETURN_BOOL(false);
+   }
+
+   chkval.arrb = ARRPTR(val);
+   chkval.arre = chkval.arrb + val->size;
+   chkval.values = STRPTR(val);
+   chkval.operand = GETOPERAND(query);
+   result = TS_execute(
+                    GETQUERY(query),
+                    &chkval,
+                    true,
+                    checkcondition_str
+       );
+
+   PG_FREE_IF_COPY(val, 0);
+   PG_FREE_IF_COPY(query, 1);
+   PG_RETURN_BOOL(result);
+}
+
+/*
+ * find left operand in polish notation view
+ */
+static void
+findoprnd(ITEM * ptr, int4 *pos)
+{
+#ifdef BS_DEBUG
+   elog(DEBUG3, (ptr[*pos].type == OPR) ?
+        "%d  %c" : "%d  %d ", *pos, ptr[*pos].val);
+#endif
+   if (ptr[*pos].type == VAL || ptr[*pos].type == VALTRUE)
+   {
+       ptr[*pos].left = 0;
+       (*pos)++;
+   }
+   else if (ptr[*pos].val == (int4) '!')
+   {
+       ptr[*pos].left = 1;
+       (*pos)++;
+       findoprnd(ptr, pos);
+   }
+   else
+   {
+       ITEM       *curitem = &ptr[*pos];
+       int4        tmp = *pos;
+
+       (*pos)++;
+       findoprnd(ptr, pos);
+       curitem->left = *pos - tmp;
+       findoprnd(ptr, pos);
+   }
+}
+
+
+/*
+ * input
+ */
+static QUERYTYPE *
+queryin(char *buf, void (*pushval) (QPRS_STATE *, int, char *, int, int2), int cfg_id)
+{
+   QPRS_STATE  state;
+   int4        i;
+   QUERYTYPE  *query;
+   int4        commonlen;
+   ITEM       *ptr;
+   NODE       *tmp;
+   int4        pos = 0;
+
+#ifdef BS_DEBUG
+   char        pbuf[16384],
+              *cur;
+#endif
+
+   /* init state */
+   state.buf = buf;
+   state.state = WAITOPERAND;
+   state.count = 0;
+   state.num = 0;
+   state.str = NULL;
+   state.cfg_id=cfg_id;
+
+   /* init value parser's state */
+   state.valstate.oprisdelim = true;
+   state.valstate.len = 32;
+   state.valstate.word = (char *) palloc(state.valstate.len);
+
+   /* init list of operand */
+   state.sumlen = 0;
+   state.lenop = 64;
+   state.curop = state.op = (char *) palloc(state.lenop);
+   *(state.curop) = '\0';
+
+   /* parse query & make polish notation (postfix, but in reverse order) */
+   makepol(&state, pushval);
+   pfree(state.valstate.word);
+   if (!state.num)
+       elog(ERROR, "Empty query");
+
+   /* make finish struct */
+   commonlen = COMPUTESIZE(state.num, state.sumlen);
+   query = (QUERYTYPE *) palloc(commonlen);
+   query->len = commonlen;
+   query->size = state.num;
+   ptr = GETQUERY(query);
+
+   /* set item in polish notation */
+   for (i = 0; i < state.num; i++)
+   {
+       ptr[i].weight = state.str->weight;
+       ptr[i].type = state.str->type;
+       ptr[i].val = state.str->val;
+       ptr[i].distance = state.str->distance;
+       ptr[i].length = state.str->length;
+       tmp = state.str->next;
+       pfree(state.str);
+       state.str = tmp;
+   }
+
+   /* set user friendly-operand view */
+   memcpy((void *) GETOPERAND(query), (void *) state.op, state.sumlen);
+   pfree(state.op);
+
+   /* set left operand's position for every operator */
+   pos = 0;
+   findoprnd(ptr, &pos);
+
+#ifdef BS_DEBUG
+   cur = pbuf;
+   *cur = '\0';
+   for (i = 0; i < query->size; i++)
+   {
+       if (ptr[i].type == OPR)
+           sprintf(cur, "%c(%d) ", ptr[i].val, ptr[i].left);
+       else
+           sprintf(cur, "%d(%s) ", ptr[i].val, GETOPERAND(query) + ptr[i].distance);
+       cur = strchr(cur, '\0');
+   }
+   elog(DEBUG3, "POR: %s", pbuf);
+#endif
+
+   return query;
+}
+
+/*
+ * in without morphology
+ */
+Datum
+tsquery_in(PG_FUNCTION_ARGS)
+{
+   PG_RETURN_POINTER(queryin((char *) PG_GETARG_POINTER(0), pushval_asis, 0));
+}
+
+/*
+ * out function
+ */
+typedef struct
+{
+   ITEM       *curpol;
+   char       *buf;
+   char       *cur;
+   char       *op;
+   int4        buflen;
+}  INFIX;
+
+#define RESIZEBUF(inf,addsize) \
+while( ( inf->cur - inf->buf ) + addsize + 1 >= inf->buflen ) \
+{ \
+   int4 len = inf->cur - inf->buf; \
+   inf->buflen *= 2; \
+   inf->buf = (char*) repalloc( (void*)inf->buf, inf->buflen ); \
+   inf->cur = inf->buf + len; \
+}
+
+/*
+ * recursive walk on tree and print it in
+ * infix (human-readable) view
+ */
+static void
+infix(INFIX * in, bool first)
+{
+   if (in->curpol->type == VAL)
+   {
+       char       *op = in->op + in->curpol->distance;
+
+       RESIZEBUF(in, in->curpol->length * 2 + 2 + 5);
+       *(in->cur) = '\'';
+       in->cur++;
+       while (*op)
+       {
+           if (*op == '\'')
+           {
+               *(in->cur) = '\\';
+               in->cur++;
+           }
+           *(in->cur) = *op;
+           op++;
+           in->cur++;
+       }
+       *(in->cur) = '\'';
+       in->cur++;
+       if ( in->curpol->weight ) {
+           *(in->cur) = ':'; in->cur++;
+           if ( in->curpol->weight & (1<<3) ) { *(in->cur) = 'A'; in->cur++; }
+           if ( in->curpol->weight & (1<<2) ) { *(in->cur) = 'B'; in->cur++; }
+           if ( in->curpol->weight & (1<<1) ) { *(in->cur) = 'C'; in->cur++; }
+           if ( in->curpol->weight & 1 )      { *(in->cur) = 'D'; in->cur++; }
+       }
+       *(in->cur) = '\0';
+       in->curpol++;
+   }
+   else if (in->curpol->val == (int4) '!')
+   {
+       bool        isopr = false;
+
+       RESIZEBUF(in, 1);
+       *(in->cur) = '!';
+       in->cur++;
+       *(in->cur) = '\0';
+       in->curpol++;
+       if (in->curpol->type == OPR)
+       {
+           isopr = true;
+           RESIZEBUF(in, 2);
+           sprintf(in->cur, "( ");
+           in->cur = strchr(in->cur, '\0');
+       }
+       infix(in, isopr);
+       if (isopr)
+       {
+           RESIZEBUF(in, 2);
+           sprintf(in->cur, " )");
+           in->cur = strchr(in->cur, '\0');
+       }
+   }
+   else
+   {
+       int4        op = in->curpol->val;
+       INFIX       nrm;
+
+       in->curpol++;
+       if (op == (int4) '|' && !first)
+       {
+           RESIZEBUF(in, 2);
+           sprintf(in->cur, "( ");
+           in->cur = strchr(in->cur, '\0');
+       }
+
+       nrm.curpol = in->curpol;
+       nrm.op = in->op;
+       nrm.buflen = 16;
+       nrm.cur = nrm.buf = (char *) palloc(sizeof(char) * nrm.buflen);
+
+       /* get right operand */
+       infix(&nrm, false);
+
+       /* get & print left operand */
+       in->curpol = nrm.curpol;
+       infix(in, false);
+
+       /* print operator & right operand */
+       RESIZEBUF(in, 3 + (nrm.cur - nrm.buf));
+       sprintf(in->cur, " %c %s", op, nrm.buf);
+       in->cur = strchr(in->cur, '\0');
+       pfree(nrm.buf);
+
+       if (op == (int4) '|' && !first)
+       {
+           RESIZEBUF(in, 2);
+           sprintf(in->cur, " )");
+           in->cur = strchr(in->cur, '\0');
+       }
+   }
+}
+
+
+Datum
+tsquery_out(PG_FUNCTION_ARGS)
+{
+   QUERYTYPE  *query = (QUERYTYPE *) DatumGetPointer(PG_DETOAST_DATUM(PG_GETARG_DATUM(0)));
+   INFIX       nrm;
+
+   if (query->size == 0)
+   {
+       char       *b = palloc(1);
+
+       *b = '\0';
+       PG_RETURN_POINTER(b);
+   }
+   nrm.curpol = GETQUERY(query);
+   nrm.buflen = 32;
+   nrm.cur = nrm.buf = (char *) palloc(sizeof(char) * nrm.buflen);
+   *(nrm.cur) = '\0';
+   nrm.op = GETOPERAND(query);
+   infix(&nrm, true);
+
+   PG_FREE_IF_COPY(query, 0);
+   PG_RETURN_POINTER(nrm.buf);
+}
+
+/*
+ * debug function, used only for view query
+ * which will be executed in non-leaf pages in index
+ */
+Datum
+tsquerytree(PG_FUNCTION_ARGS)
+{
+   QUERYTYPE  *query = (QUERYTYPE *) DatumGetPointer(PG_DETOAST_DATUM(PG_GETARG_DATUM(0)));
+   INFIX       nrm;
+   text       *res;
+   ITEM       *q;
+   int4        len;
+
+
+   if (query->size == 0)
+   {
+       res = (text *) palloc(VARHDRSZ);
+       VARATT_SIZEP(res) = VARHDRSZ;
+       PG_RETURN_POINTER(res);
+   }
+
+   q = clean_NOT_v2(GETQUERY(query), &len);
+
+   if (!q)
+   {
+       res = (text *) palloc(1 + VARHDRSZ);
+       VARATT_SIZEP(res) = 1 + VARHDRSZ;
+       *((char *) VARDATA(res)) = 'T';
+   }
+   else
+   {
+       nrm.curpol = q;
+       nrm.buflen = 32;
+       nrm.cur = nrm.buf = (char *) palloc(sizeof(char) * nrm.buflen);
+       *(nrm.cur) = '\0';
+       nrm.op = GETOPERAND(query);
+       infix(&nrm, true);
+
+       res = (text *) palloc(nrm.cur - nrm.buf + VARHDRSZ);
+       VARATT_SIZEP(res) = nrm.cur - nrm.buf + VARHDRSZ;
+       strncpy(VARDATA(res), nrm.buf, nrm.cur - nrm.buf);
+       pfree(q);
+   }
+
+   PG_FREE_IF_COPY(query, 0);
+
+   PG_RETURN_POINTER(res);
+}
+
+Datum
+to_tsquery(PG_FUNCTION_ARGS) {
+   text    *in = PG_GETARG_TEXT_P(1);
+   char *str;
+   QUERYTYPE  *query;
+   ITEM       *res;
+   int4        len;
+
+   str=text2char(in);
+   PG_FREE_IF_COPY(in,1);
+
+   query = queryin(str, pushval_morph, PG_GETARG_INT32(0));
+   res = clean_fakeval_v2(GETQUERY(query), &len);
+   if (!res)
+   {
+       query->len = HDRSIZEQT;
+       query->size = 0;
+       PG_RETURN_POINTER(query);
+   }
+   memcpy((void *) GETQUERY(query), (void *) res, len * sizeof(ITEM));
+   pfree(res);
+   PG_RETURN_POINTER(query);
+}
+
+Datum
+to_tsquery_name(PG_FUNCTION_ARGS) {
+   text *name=PG_GETARG_TEXT_P(0);
+   Datum res= DirectFunctionCall2(
+       to_tsquery,
+       Int32GetDatum( name2id_cfg(name) ),
+       PG_GETARG_DATUM(1)
+   );
+   
+   PG_FREE_IF_COPY(name,1);
+   PG_RETURN_DATUM(res);
+}
+
+Datum
+to_tsquery_current(PG_FUNCTION_ARGS) {
+   PG_RETURN_DATUM( DirectFunctionCall2(
+       to_tsquery,
+       Int32GetDatum( get_currcfg() ),
+       PG_GETARG_DATUM(0)
+   ));
+}
+
+


diff --git a/contrib/tsearch2/query.h b/contrib/tsearch2/query.h

new file mode 100644 (file)

index 0000000..c0715a2


--- /dev/null
+++ b/contrib/tsearch2/query.h
@@ -0,0 +1,55 @@
+#ifndef __QUERY_H__
+#define __QUERY_H__
+/*
+#define BS_DEBUG
+*/
+
+
+/*
+ * item in polish notation with back link
+ * to left operand
+ */
+typedef struct ITEM
+{
+   int8        type;
+   int8        weight;
+   int2        left;
+   int4        val;
+   /* user-friendly value, must correlate with WordEntry */
+   uint32  
+       unused:1,
+       length:11,
+       distance:20;
+}  ITEM;
+
+/*
+ *Storage:
+ * (len)(size)(array of ITEM)(array of operand in user-friendly form)
+ */
+typedef struct
+{
+   int4        len;
+   int4        size;
+   char        data[1];
+}  QUERYTYPE;
+
+#define HDRSIZEQT  ( 2*sizeof(int4) )
+#define COMPUTESIZE(size,lenofoperand) ( HDRSIZEQT + size * sizeof(ITEM) + lenofoperand )
+#define GETQUERY(x)  (ITEM*)( (char*)(x)+HDRSIZEQT )
+#define GETOPERAND(x)  ( (char*)GETQUERY(x) + ((QUERYTYPE*)x)->size * sizeof(ITEM) )
+
+#define ISOPERATOR(x) ( (x)=='!' || (x)=='&' || (x)=='|' || (x)=='(' || (x)==')' )
+
+#define END                0
+#define ERR                1
+#define VAL                2
+#define OPR                3
+#define OPEN           4
+#define CLOSE          5
+#define VALTRUE            6       /* for stop words */
+#define VALFALSE       7
+
+bool TS_execute(ITEM * curitem, void *checkval,
+       bool calcnot, bool (*chkcond) (void *checkval, ITEM * val));
+
+#endif


diff --git a/contrib/tsearch2/rank.c b/contrib/tsearch2/rank.c

new file mode 100644 (file)

index 0000000..b73f400


--- /dev/null
+++ b/contrib/tsearch2/rank.c
@@ -0,0 +1,591 @@
+/*
+ * Relevation
+ * Teodor Sigaev 
+ */
+#include "postgres.h"
+#include 
+
+#include "access/gist.h"
+#include "access/itup.h"
+#include "utils/elog.h"
+#include "utils/palloc.h"
+#include "utils/builtins.h"
+#include "fmgr.h"
+#include "funcapi.h"
+#include "storage/bufpage.h"
+#include "executor/spi.h"
+#include "commands/trigger.h"
+#include "nodes/pg_list.h"
+#include "catalog/namespace.h"
+
+#include "utils/array.h"
+
+#include "tsvector.h"
+#include "query.h"
+#include "common.h"
+
+PG_FUNCTION_INFO_V1(rank);
+Datum      rank(PG_FUNCTION_ARGS);
+
+PG_FUNCTION_INFO_V1(rank_def);
+Datum      rank_def(PG_FUNCTION_ARGS);
+
+PG_FUNCTION_INFO_V1(rank_cd);
+Datum      rank_cd(PG_FUNCTION_ARGS);
+
+PG_FUNCTION_INFO_V1(rank_cd_def);
+Datum      rank_cd_def(PG_FUNCTION_ARGS);
+
+PG_FUNCTION_INFO_V1(get_covers);
+Datum      get_covers(PG_FUNCTION_ARGS);
+
+static float weights[]={0.1, 0.2, 0.4, 1.0};
+
+#define wpos(wep)  ( w[ ((WordEntryPos*)(wep))->weight ] )
+
+#define DEF_NORM_METHOD    0
+
+/*
+ * Returns a weight of a word collocation
+ */
+static float4 word_distance ( int4 w ) {
+   if ( w>100 )
+   return 1e-30;
+
+   return 1.0/(1.005+0.05*exp( ((float4)w)/1.5-2) );
+}
+
+static int
+cnt_length( tsvector *t ) {
+   WordEntry   *ptr=ARRPTR(t), *end=(WordEntry*)STRPTR(t);
+   int len = 0, clen;
+
+   while(ptr < end) {
+       if ( (clen=POSDATALEN(t, ptr)) == 0 )
+           len += 1;
+       else
+           len += clen;
+       ptr++;
+   }
+
+   return len;
+}
+
+static int4
+WordECompareITEM(char *eval, char *qval, WordEntry * ptr, ITEM * item) {
+        if (ptr->len == item->length)
+                return strncmp(
+                                           eval + ptr->pos,
+                                           qval + item->distance,
+                                           item->length);
+
+        return (ptr->len > item->length) ? 1 : -1;
+}
+
+static WordEntry*
+find_wordentry(tsvector *t, QUERYTYPE *q, ITEM *item) {
+        WordEntry  *StopLow = ARRPTR(t);
+        WordEntry  *StopHigh = (WordEntry*)STRPTR(t);
+        WordEntry  *StopMiddle;
+        int                     difference;
+
+        /* Loop invariant: StopLow <= item < StopHigh */
+
+        while (StopLow < StopHigh)
+        {
+                StopMiddle = StopLow + (StopHigh - StopLow) / 2;
+                difference = WordECompareITEM(STRPTR(t), GETOPERAND(q), StopMiddle, item);
+                if (difference == 0)
+                        return StopMiddle;
+                else if (difference < 0)
+                        StopLow = StopMiddle + 1;
+                else
+                        StopHigh = StopMiddle;
+        }
+
+        return NULL;
+}
+
+static WordEntryPos    POSNULL[]={
+   {0,0},
+   {0,MAXENTRYPOS-1}
+};
+
+static float
+calc_rank_and(float *w, tsvector *t, QUERYTYPE *q) {
+   uint16 **pos=(uint16**)palloc(sizeof(uint16*) * q->size);
+   int i,k,l,p;
+   WordEntry *entry;
+   WordEntryPos    *post,*ct;
+   int4    dimt,lenct,dist;
+   float res=-1.0;
+   ITEM    *item=GETQUERY(q);
+
+   memset(pos,0,sizeof(uint16**) * q->size);
+   *(uint16*)POSNULL = lengthof(POSNULL)-1;
+
+   for(i=0; isize; i++) {
+       
+       if ( item[i].type != VAL )
+           continue;
+
+       entry=find_wordentry(t,q,&(item[i]));
+       if ( !entry )
+           continue;
+
+       if ( entry->haspos )
+           pos[i] = (uint16*)_POSDATAPTR(t,entry);
+       else
+           pos[i] = (uint16*)POSNULL;
+
+
+       dimt = *(uint16*)(pos[i]);
+       post = (WordEntryPos*)(pos[i]+1);
+       for( k=0; k
+           if ( !pos[k] ) continue;
+           lenct = *(uint16*)(pos[k]);
+           ct = (WordEntryPos*)(pos[k]+1);
+           for(l=0; l
+               for(p=0; p
+                   dist = abs( post[l].pos - ct[p].pos );
+                   if ( dist || (dist==0 && (pos[i]==(uint16*)POSNULL || pos[k]==(uint16*)POSNULL) ) ) {
+                       float curw; 
+                       if ( !dist ) dist=MAXENTRYPOS;  
+                       curw= sqrt( wpos(&(post[l])) * wpos( &(ct[p]) ) * word_distance(dist) );
+                       res = ( res < 0 ) ? curw : 1.0 - ( 1.0 - res ) * ( 1.0 - curw );
+                   }
+               }
+           }
+       }
+   }
+   pfree(pos);
+   return res; 
+}
+
+static float
+calc_rank_or(float *w, tsvector *t, QUERYTYPE *q) {
+   WordEntry *entry;
+   WordEntryPos    *post;
+   int4    dimt,j,i;
+   float res=-1.0;
+   ITEM    *item=GETQUERY(q);
+
+   *(uint16*)POSNULL = lengthof(POSNULL)-1;
+
+   for(i=0; isize; i++) {
+       if ( item[i].type != VAL )
+           continue;
+
+       entry=find_wordentry(t,q,&(item[i]));
+       if ( !entry )
+           continue;
+
+       if ( entry->haspos ) {
+           dimt = POSDATALEN(t,entry);
+           post = POSDATAPTR(t,entry);
+       } else {
+           dimt = *(uint16*)POSNULL;
+           post = POSNULL+1;
+       }
+
+       for(j=0;j
+           if ( res < 0 )
+               res = wpos( &(post[j]) );
+           else
+               res = 1.0 - ( 1.0-res ) * ( 1.0-wpos( &(post[j]) ) );
+       }
+   }
+   return res;
+}
+
+static float
+calc_rank(float *w, tsvector *t, QUERYTYPE *q, int4 method) {
+   ITEM *item = GETQUERY(q);
+   float res=0.0;
+
+   if (!t->size || !q->size)
+       return 0.0;
+
+   res = ( item->type != VAL && item->val == (int4) '&' ) ?
+       calc_rank_and(w,t,q) : calc_rank_or(w,t,q);
+
+   if ( res < 0 )
+       res = 1e-20;
+
+        switch(method) {
+       case 0: break;
+       case 1: res /= log((float)cnt_length(t)); break;
+       case 2: res /= (float)cnt_length(t); break;
+       default:
+       elog(ERROR,"Unknown normalization method: %d",method);
+        }
+
+   return res;
+}
+
+Datum
+rank(PG_FUNCTION_ARGS) {
+   ArrayType  *win = (ArrayType *) PG_DETOAST_DATUM(PG_GETARG_DATUM(0));
+   tsvector       *txt = (tsvector *) PG_DETOAST_DATUM(PG_GETARG_DATUM(1));
+   QUERYTYPE  *query = (QUERYTYPE *) PG_DETOAST_DATUM(PG_GETARG_DATUM(2));
+   int method=DEF_NORM_METHOD;
+   float res=0.0;
+   float ws[ lengthof(weights) ];
+   int i;
+
+   if ( ARR_NDIM(win) != 1 ) 
+       elog(ERROR,"Array of weight is not one dimentional");
+   if ( ARRNELEMS(win) < lengthof(weights) )
+        elog(ERROR,"Array of weight is too short");
+
+   for(i=0;i
+       ws[ i ] = ( ((float4*)ARR_DATA_PTR(win))[i] >= 0 ) ? ((float4*)ARR_DATA_PTR(win))[i] : weights[i];
+       if ( ws[ i ] > 1.0 ) 
+           elog(ERROR,"Weight out of range");
+   } 
+
+   if ( PG_NARGS() == 4 )
+       method=PG_GETARG_INT32(3);
+
+   res=calc_rank(ws, txt, query, method); 
+       
+   PG_FREE_IF_COPY(win, 0);
+   PG_FREE_IF_COPY(txt, 1);
+   PG_FREE_IF_COPY(query, 2);
+   PG_RETURN_FLOAT4(res);
+}
+
+Datum
+rank_def(PG_FUNCTION_ARGS) {
+   tsvector       *txt = (tsvector *) PG_DETOAST_DATUM(PG_GETARG_DATUM(0));
+   QUERYTYPE  *query = (QUERYTYPE *) PG_DETOAST_DATUM(PG_GETARG_DATUM(1));
+   float res=0.0;
+   int method=DEF_NORM_METHOD;
+
+   if ( PG_NARGS() == 3 )
+       method=PG_GETARG_INT32(2);
+
+   res=calc_rank(weights, txt, query, method); 
+       
+   PG_FREE_IF_COPY(txt, 0);
+   PG_FREE_IF_COPY(query, 1);
+   PG_RETURN_FLOAT4(res);
+}
+
+
+typedef struct {
+   ITEM    *item;
+   int32   pos;
+} DocRepresentation;
+
+static int
+compareDocR(const void *a, const void *b) {
+   if ( ((DocRepresentation *) a)->pos == ((DocRepresentation *) b)->pos )
+       return 1;
+   return ( ((DocRepresentation *) a)->pos > ((DocRepresentation *) b)->pos ) ? 1 : -1;
+}
+
+
+typedef struct {
+   DocRepresentation *doc;
+   int len;
+}  ChkDocR;
+
+static bool
+checkcondition_DR(void *checkval, ITEM *val) {
+   DocRepresentation *ptr = ((ChkDocR*)checkval)->doc;
+
+   while( ptr - ((ChkDocR*)checkval)->doc < ((ChkDocR*)checkval)->len ) {
+       if ( val == ptr->item )
+           return true;
+       ptr++;
+   }   
+
+   return false;
+}
+
+
+static bool
+Cover(DocRepresentation *doc, int len, QUERYTYPE *query, int *pos, int *p, int *q) {
+   int i;
+   DocRepresentation   *ptr,*f=(DocRepresentation*)0xffffffff;
+   ITEM    *item=GETQUERY(query);
+   int lastpos=*pos;
+   int oldq=*q;
+
+   *p=0x7fffffff;
+   *q=0;
+
+   for(i=0; isize; i++) {
+       if ( item->type != VAL ) {
+           item++;
+           continue;
+       }
+       ptr = doc + *pos;
+
+       while(ptr-doc
+           if ( ptr->item == item ) {
+               if ( ptr->pos > *q ) {
+                   *q = ptr->pos;
+                   lastpos= ptr - doc;
+               } 
+               break;
+           } 
+           ptr++;
+       }
+
+       item++;
+   }
+
+   if (*q==0 )
+       return false;
+
+   if (*q==oldq) { /* already check this pos */
+       (*pos)++;
+       return Cover(doc, len, query, pos,p,q);
+   } 
+
+   item=GETQUERY(query);
+   for(i=0; isize; i++) {
+       if ( item->type != VAL ) {
+           item++;
+           continue;
+       }
+       ptr = doc + lastpos;
+
+       while(ptr>=doc+*pos) {
+           if ( ptr->item == item ) {
+               if ( ptr->pos < *p ) {
+                   *p = ptr->pos;
+                   f=ptr;
+               }
+               break;
+           }
+           ptr--;
+       }
+       item++;
+   }
+ 
+   if ( *p<=*q ) {
+       ChkDocR ch = { f, (doc + lastpos)-f+1 };
+       *pos = f-doc+1;
+       if ( TS_execute(GETQUERY(query), &ch, false, checkcondition_DR) ) { 
+ /*elog(NOTICE,"OP:%d NP:%d P:%d Q:%d", *pos, lastpos, *p, *q);*/ 
+           return true;
+       } else
+           return Cover(doc, len, query, pos,p,q); 
+   }
+ 
+   return false;
+}
+
+static DocRepresentation*
+get_docrep(tsvector     *txt, QUERYTYPE  *query, int *doclen) {
+   ITEM    *item=GETQUERY(query);
+   WordEntry *entry;
+   WordEntryPos    *post;
+   int4    dimt,j,i;
+   int len=query->size*4,cur=0;
+   DocRepresentation *doc;
+
+   *(uint16*)POSNULL = lengthof(POSNULL)-1;
+   doc = (DocRepresentation*)palloc(sizeof(DocRepresentation)*len);
+   for(i=0; isize; i++) {
+       if ( item[i].type != VAL )
+           continue;
+
+       entry=find_wordentry(txt,query,&(item[i]));
+       if ( !entry )
+           continue;
+
+       if ( entry->haspos ) {
+           dimt = POSDATALEN(txt,entry);
+           post = POSDATAPTR(txt,entry);
+       } else {
+           dimt = *(uint16*)POSNULL;
+           post = POSNULL+1;
+       }
+
+       while( cur+dimt >= len ) {
+           len*=2;
+           doc = (DocRepresentation*)repalloc(doc,sizeof(DocRepresentation)*len);
+       }
+
+       for(j=0;j
+           doc[cur].item=&(item[i]);
+           doc[cur].pos=post[j].pos;
+           cur++;
+       }
+   }
+
+   *doclen=cur;
+   
+   if ( cur>0 ) {
+       if ( cur>1 ) 
+           qsort((void *) doc, cur, sizeof(DocRepresentation), compareDocR);
+       return doc;
+   }
+   
+   pfree(doc);
+   return NULL;
+}
+
+
+Datum
+rank_cd(PG_FUNCTION_ARGS) {
+   int K = PG_GETARG_INT32(0);
+   tsvector       *txt = (tsvector *) PG_DETOAST_DATUM(PG_GETARG_DATUM(1));
+   QUERYTYPE  *query = (QUERYTYPE *) PG_DETOAST_DATUM(PG_GETARG_DATUM(2));
+   int method=DEF_NORM_METHOD;
+   DocRepresentation   *doc;
+   float   res=0.0;
+   int p=0,q=0,len,cur;
+
+   doc = get_docrep(txt, query, &len);
+   if ( !doc ) {
+       PG_FREE_IF_COPY(txt, 1);
+       PG_FREE_IF_COPY(query, 2);
+       PG_RETURN_FLOAT4(0.0);
+   }
+
+   cur=0;
+   if (K<=0)
+       K=4;    
+   while( Cover(doc, len, query, &cur, &p, &q) ) 
+       res += ( q-p+1 > K ) ? ((float)K)/((float)(q-p+1)) : 1.0;
+
+   if ( PG_NARGS() == 4 )
+       method=PG_GETARG_INT32(3);
+
+        switch(method) {
+       case 0: break;
+       case 1: res /= log((float)cnt_length(txt)); break;
+       case 2: res /= (float)cnt_length(txt); break;
+       default:
+       elog(ERROR,"Unknown normalization method: %d",method);
+        }
+
+   pfree(doc);
+   PG_FREE_IF_COPY(txt, 1);
+   PG_FREE_IF_COPY(query, 2);
+
+   PG_RETURN_FLOAT4(res);
+}
+
+
+Datum
+rank_cd_def(PG_FUNCTION_ARGS) {
+   PG_RETURN_DATUM( DirectFunctionCall4(   
+       rank_cd,
+       Int32GetDatum(-1),
+       PG_GETARG_DATUM(0),
+       PG_GETARG_DATUM(1),
+       ( PG_NARGS() == 3 ) ? PG_GETARG_DATUM(2) : Int32GetDatum(DEF_NORM_METHOD)
+   )); 
+}
+
+/**************debug*************/
+
+typedef struct {
+   char    *w;
+   int2    len;
+   int2    pos;
+   int2    start;
+   int2    finish;
+} DocWord;
+
+static int
+compareDocWord(const void *a, const void *b) {
+   if ( ((DocWord *) a)->pos == ((DocWord *) b)->pos )
+       return 1;
+   return ( ((DocWord *) a)->pos > ((DocWord *) b)->pos ) ? 1 : -1;
+}
+
+
+Datum 
+get_covers(PG_FUNCTION_ARGS) {
+   tsvector     *txt = (tsvector *) PG_DETOAST_DATUM(PG_GETARG_DATUM(0));
+   QUERYTYPE  *query = (QUERYTYPE *) PG_DETOAST_DATUM(PG_GETARG_DATUM(1));
+   WordEntry       *pptr=ARRPTR(txt);
+   int i,dlen=0,j,cur=0,len=0,rlen;
+   DocWord *dw,*dwptr;
+   text    *out;
+   char *cptr;
+   DocRepresentation *doc;
+   int pos=0,p,q,olddwpos=0;
+   int ncover=1;
+
+   doc = get_docrep(txt, query, &rlen);
+
+   if ( !doc ) {
+       out=palloc(VARHDRSZ);
+       VARATT_SIZEP(out) = VARHDRSZ;
+       PG_FREE_IF_COPY(txt,0);
+       PG_FREE_IF_COPY(query,1);
+       PG_RETURN_POINTER(out);
+   }
+
+   for(i=0;isize;i++) {
+       if (!pptr[i].haspos)
+           elog(ERROR,"No pos info");
+        dlen += POSDATALEN(txt,&(pptr[i]));
+   }
+
+   dwptr=dw=palloc(sizeof(DocWord)*dlen);
+   memset(dw,0,sizeof(DocWord)*dlen);
+
+   for(i=0;isize;i++) {
+       WordEntryPos    *posdata = POSDATAPTR(txt,&(pptr[i]));
+       for(j=0;j
+           dw[cur].w=STRPTR(txt)+pptr[i].pos;  
+           dw[cur].len=pptr[i].len;    
+           dw[cur].pos=posdata[j].pos;
+           cur++;
+       }
+       len+=(pptr[i].len + 1) * (int)POSDATALEN(txt,&(pptr[i]));
+   }
+   qsort((void *) dw, dlen, sizeof(DocWord), compareDocWord);
+
+   while( Cover(doc, rlen, query, &pos, &p, &q) ) {
+       dwptr=dw+olddwpos;
+       while(dwptr->pos < p && dwptr-dw
+           dwptr++;
+       olddwpos=dwptr-dw;
+       dwptr->start=ncover;
+       while(dwptr->pos < q+1 && dwptr-dw
+           dwptr++;
+       (dwptr-1)->finish=ncover;
+       len+= 4 /* {}+two spaces */ + 2*16 /*numbers*/;
+       ncover++; 
+   } 
+   
+   out=palloc(VARHDRSZ+len);
+   cptr=((char*)out)+VARHDRSZ;
+   dwptr=dw;
+
+   while( dwptr-dw < dlen) {
+       if ( dwptr->start ) {
+           sprintf(cptr,"{%d ",dwptr->start);
+           cptr=strchr(cptr,'\0');
+       }
+       memcpy(cptr,dwptr->w,dwptr->len);
+       cptr+=dwptr->len;
+       *cptr=' ';
+       cptr++;
+       if ( dwptr->finish ) { 
+           sprintf(cptr,"}%d ",dwptr->finish);
+           cptr=strchr(cptr,'\0');
+       }
+       dwptr++;
+   }   
+
+   VARATT_SIZEP(out) = cptr - ((char*)out);
+   
+   pfree(dw);
+   pfree(doc);
+
+   PG_FREE_IF_COPY(txt,0);
+   PG_FREE_IF_COPY(query,1);
+   PG_RETURN_POINTER(out);
+}
+


diff --git a/contrib/tsearch2/rewrite.c b/contrib/tsearch2/rewrite.c

new file mode 100644 (file)

index 0000000..d5bc0f6


--- /dev/null
+++ b/contrib/tsearch2/rewrite.c
@@ -0,0 +1,292 @@
+/*
+ * Rewrite routines of query tree
+ * Teodor Sigaev 
+ */
+
+#include "postgres.h"
+
+#include 
+
+#include "access/gist.h"
+#include "access/itup.h"
+#include "access/rtree.h"
+#include "utils/elog.h"
+#include "utils/palloc.h"
+#include "utils/array.h"
+#include "utils/builtins.h"
+#include "storage/bufpage.h"
+
+#include "query.h"
+#include "rewrite.h"
+
+typedef struct NODE
+{
+   struct NODE *left;
+   struct NODE *right;
+   ITEM       *valnode;
+}  NODE;
+
+/*
+ * make query tree from plain view of query
+ */
+static NODE *
+maketree(ITEM * in)
+{
+   NODE       *node = (NODE *) palloc(sizeof(NODE));
+
+   node->valnode = in;
+   node->right = node->left = NULL;
+   if (in->type == OPR)
+   {
+       node->right = maketree(in + 1);
+       if (in->val != (int4) '!')
+           node->left = maketree(in + in->left);
+   }
+   return node;
+}
+
+typedef struct
+{
+   ITEM       *ptr;
+   int4        len;
+   int4        cur;
+}  PLAINTREE;
+
+static void
+plainnode(PLAINTREE * state, NODE * node)
+{
+   if (state->cur == state->len)
+   {
+       state->len *= 2;
+       state->ptr = (ITEM *) repalloc((void *) state->ptr, state->len * sizeof(ITEM));
+   }
+   memcpy((void *) &(state->ptr[state->cur]), (void *) node->valnode, sizeof(ITEM));
+   if (node->valnode->type == VAL)
+       state->cur++;
+   else if (node->valnode->val == (int4) '!')
+   {
+       state->ptr[state->cur].left = 1;
+       state->cur++;
+       plainnode(state, node->right);
+   }
+   else
+   {
+       int4        cur = state->cur;
+
+       state->cur++;
+       plainnode(state, node->right);
+       state->ptr[cur].left = state->cur - cur;
+       plainnode(state, node->left);
+   }
+   pfree(node);
+}
+
+/*
+ * make plain view of tree from 'normal' view of tree
+ */
+static ITEM *
+plaintree(NODE * root, int4 *len)
+{
+   PLAINTREE   pl;
+
+   pl.cur = 0;
+   pl.len = 16;
+   if (root && (root->valnode->type == VAL || root->valnode->type == OPR))
+   {
+       pl.ptr = (ITEM *) palloc(pl.len * sizeof(ITEM));
+       plainnode(&pl, root);
+   }
+   else
+       pl.ptr = NULL;
+   *len = pl.cur;
+   return pl.ptr;
+}
+
+static void
+freetree(NODE * node)
+{
+   if (!node)
+       return;
+   if (node->left)
+       freetree(node->left);
+   if (node->right)
+       freetree(node->right);
+   pfree(node);
+}
+
+/*
+ * clean tree for ! operator.
+ * It's usefull for debug, but in
+ * other case, such view is used with search in index.
+ * Operator ! always return TRUE
+ */
+static NODE *
+clean_NOT_intree(NODE * node)
+{
+   if (node->valnode->type == VAL)
+       return node;
+
+   if (node->valnode->val == (int4) '!')
+   {
+       freetree(node);
+       return NULL;
+   }
+
+   /* operator & or | */
+   if (node->valnode->val == (int4) '|')
+   {
+       if ((node->left = clean_NOT_intree(node->left)) == NULL ||
+           (node->right = clean_NOT_intree(node->right)) == NULL)
+       {
+           freetree(node);
+           return NULL;
+       }
+   }
+   else
+   {
+       NODE       *res = node;
+
+       node->left = clean_NOT_intree(node->left);
+       node->right = clean_NOT_intree(node->right);
+       if (node->left == NULL && node->right == NULL)
+       {
+           pfree(node);
+           res = NULL;
+       }
+       else if (node->left == NULL)
+       {
+           res = node->right;
+           pfree(node);
+       }
+       else if (node->right == NULL)
+       {
+           res = node->left;
+           pfree(node);
+       }
+       return res;
+   }
+   return node;
+}
+
+ITEM *
+clean_NOT_v2(ITEM * ptr, int4 *len)
+{
+   NODE       *root = maketree(ptr);
+
+   return plaintree(clean_NOT_intree(root), len);
+}
+
+#define V_UNKNOWN  0
+#define V_TRUE     1
+#define V_FALSE        2
+
+/*
+ * Clean query tree from values which is always in
+ * text (stopword)
+ */
+static NODE *
+clean_fakeval_intree(NODE * node, char *result)
+{
+   char        lresult = V_UNKNOWN,
+               rresult = V_UNKNOWN;
+
+   if (node->valnode->type == VAL)
+       return node;
+   else if (node->valnode->type == VALTRUE)
+   {
+       pfree(node);
+       *result = V_TRUE;
+       return NULL;
+   }
+
+
+   if (node->valnode->val == (int4) '!')
+   {
+       node->right = clean_fakeval_intree(node->right, &rresult);
+       if (!node->right)
+       {
+           *result = (rresult == V_TRUE) ? V_FALSE : V_TRUE;
+           freetree(node);
+           return NULL;
+       }
+   }
+   else if (node->valnode->val == (int4) '|')
+   {
+       NODE       *res = node;
+
+       node->left = clean_fakeval_intree(node->left, &lresult);
+       node->right = clean_fakeval_intree(node->right, &rresult);
+       if (lresult == V_TRUE || rresult == V_TRUE)
+       {
+           freetree(node);
+           *result = V_TRUE;
+           return NULL;
+       }
+       else if (lresult == V_FALSE && rresult == V_FALSE)
+       {
+           freetree(node);
+           *result = V_FALSE;
+           return NULL;
+       }
+       else if (lresult == V_FALSE)
+       {
+           res = node->right;
+           pfree(node);
+       }
+       else if (rresult == V_FALSE)
+       {
+           res = node->left;
+           pfree(node);
+       }
+       return res;
+   }
+   else
+   {
+       NODE       *res = node;
+
+       node->left = clean_fakeval_intree(node->left, &lresult);
+       node->right = clean_fakeval_intree(node->right, &rresult);
+       if (lresult == V_FALSE || rresult == V_FALSE)
+       {
+           freetree(node);
+           *result = V_FALSE;
+           return NULL;
+       }
+       else if (lresult == V_TRUE && rresult == V_TRUE)
+       {
+           freetree(node);
+           *result = V_TRUE;
+           return NULL;
+       }
+       else if (lresult == V_TRUE)
+       {
+           res = node->right;
+           pfree(node);
+       }
+       else if (rresult == V_TRUE)
+       {
+           res = node->left;
+           pfree(node);
+       }
+       return res;
+   }
+   return node;
+}
+
+ITEM *
+clean_fakeval_v2(ITEM * ptr, int4 *len)
+{
+   NODE       *root = maketree(ptr);
+   char        result = V_UNKNOWN;
+   NODE       *resroot;
+
+   resroot = clean_fakeval_intree(root, &result);
+   if (result != V_UNKNOWN)
+   {
+       elog(NOTICE, "Query contains only stopword(s) or doesn't contain lexem(s), ignored");
+       *len = 0;
+       return NULL;
+   }
+
+   return plaintree(resroot, len);
+}


diff --git a/contrib/tsearch2/rewrite.h b/contrib/tsearch2/rewrite.h

new file mode 100644 (file)

index 0000000..d47788a


--- /dev/null
+++ b/contrib/tsearch2/rewrite.h
@@ -0,0 +1,7 @@
+#ifndef __REWRITE_H__
+#define __REWRITE_H__
+
+ITEM      *clean_NOT_v2(ITEM * ptr, int4 *len);
+ITEM      *clean_fakeval_v2(ITEM * ptr, int4 *len);
+
+#endif


diff --git a/contrib/tsearch2/snmap.c b/contrib/tsearch2/snmap.c

new file mode 100644 (file)

index 0000000..fe138ad


--- /dev/null
+++ b/contrib/tsearch2/snmap.c
@@ -0,0 +1,75 @@
+/* 
+ * simple but fast map from str to Oid
+ * Teodor Sigaev 
+ */
+#include 
+#include 
+#include 
+
+#include "postgres.h"
+#include "snmap.h"
+#include "common.h"
+
+static int
+compareSNMapEntry(const void *a, const void *b) {
+   return strcmp( ((SNMapEntry*)a)->key, ((SNMapEntry*)b)->key );
+}
+
+void 
+addSNMap( SNMap *map, char *key, Oid value ) {
+   if (map->len>=map->reallen) {
+       SNMapEntry *tmp;
+       int len = (map->reallen) ? 2*map->reallen : 16;
+       tmp=(SNMapEntry*)realloc(map->list, sizeof(SNMapEntry) * len);
+       if ( !tmp )
+           elog(ERROR, "No memory");
+       map->reallen=len;
+       map->list=tmp;
+   }
+   map->list[ map->len ].key = strdup(key);
+   if ( ! map->list[ map->len ].key )
+       elog(ERROR, "No memory");
+   map->list[ map->len ].value=value;
+   map->len++;
+   if ( map->len>1 ) qsort(map->list, map->len, sizeof(SNMapEntry), compareSNMapEntry);
+}
+
+void 
+addSNMap_t( SNMap *map, text *key, Oid value ) {
+   char *k=text2char( key );
+   addSNMap(map, k, value);
+   pfree(k);
+}
+
+Oid 
+findSNMap( SNMap *map, char *key ) {
+   SNMapEntry *ptr;
+   SNMapEntry ks = {key, 0};
+   if ( map->len==0 || !map->list )
+       return 0;   
+   ptr = (SNMapEntry*) bsearch(&ks, map->list, map->len, sizeof(SNMapEntry), compareSNMapEntry);
+   return (ptr) ? ptr->value : 0;
+}
+
+Oid  
+findSNMap_t( SNMap *map, text *key ) {
+   char *k=text2char(key);
+   int res;
+   res= findSNMap(map, k);
+   pfree(k);
+   return res;
+}
+
+void freeSNMap( SNMap *map ) {
+   SNMapEntry *entry=map->list;
+   if ( map->list ) {
+       while( map->len ) {
+           if ( entry->key ) free(entry->key);
+           entry++; map->len--;
+       }
+       free( map->list );
+   }
+   memset(map,0,sizeof(SNMap));
+}
+
+


diff --git a/contrib/tsearch2/snmap.h b/contrib/tsearch2/snmap.h

new file mode 100644 (file)

index 0000000..b485601


--- /dev/null
+++ b/contrib/tsearch2/snmap.h
@@ -0,0 +1,23 @@
+#ifndef __SNMAP_H__
+#define __SNMAP_H__
+
+#include "postgres.h"
+
+typedef struct {
+   char    *key;
+   Oid value;
+} SNMapEntry;
+
+typedef struct {
+   int len;
+   int reallen;
+   SNMapEntry  *list;
+} SNMap;
+
+void addSNMap( SNMap *map, char *key, Oid value );
+void addSNMap_t( SNMap *map, text *key, Oid value );
+Oid findSNMap( SNMap *map, char *key );
+Oid findSNMap_t( SNMap *map, text *key );
+void freeSNMap( SNMap *map );
+
+#endif


diff --git a/contrib/tsearch2/snowball/api.c b/contrib/tsearch2/snowball/api.c

new file mode 100644 (file)

index 0000000..c9019ce


--- /dev/null
+++ b/contrib/tsearch2/snowball/api.c
@@ -0,0 +1,48 @@
+
+#include "header.h"
+
+extern struct SN_env * SN_create_env(int S_size, int I_size, int B_size)
+{   struct SN_env * z = (struct SN_env *) calloc(1, sizeof(struct SN_env));
+    z->p = create_s();
+    if (S_size)
+    {   z->S = (symbol * *) calloc(S_size, sizeof(symbol *));
+        {   int i;
+            for (i = 0; i < S_size; i++) z->S[i] = create_s();
+        }
+        z->S_size = S_size;
+    }
+
+    if (I_size)
+    {   z->I = (int *) calloc(I_size, sizeof(int));
+        z->I_size = I_size;
+    }
+
+    if (B_size)
+    {   z->B = (symbol *) calloc(B_size, sizeof(symbol));
+        z->B_size = B_size;
+    }
+
+    return z;
+}
+
+extern void SN_close_env(struct SN_env * z)
+{
+    if (z->S_size)
+    {
+        {   int i;
+            for (i = 0; i < z->S_size; i++) lose_s(z->S[i]);
+        }
+        free(z->S);
+    }
+    if (z->I_size) free(z->I);
+    if (z->B_size) free(z->B);
+    if (z->p) lose_s(z->p);
+    free(z);
+}
+
+extern void SN_set_current(struct SN_env * z, int size, const symbol * s)
+{
+    replace_s(z, 0, z->l, size, s);
+    z->c = 0;
+}
+


diff --git a/contrib/tsearch2/snowball/api.h b/contrib/tsearch2/snowball/api.h

new file mode 100644 (file)

index 0000000..3e8b6e1


--- /dev/null
+++ b/contrib/tsearch2/snowball/api.h
@@ -0,0 +1,27 @@
+
+typedef unsigned char symbol;
+
+/* Or replace 'char' above with 'short' for 16 bit characters.
+
+   More precisely, replace 'char' with whatever type guarantees the
+   character width you need. Note however that sizeof(symbol) should divide
+   HEAD, defined in header.h as 2*sizeof(int), without remainder, otherwise
+   there is an alignment problem. In the unlikely event of a problem here,
+   consult Martin Porter.
+
+*/
+
+struct SN_env {
+    symbol * p;
+    int c; int a; int l; int lb; int bra; int ket;
+    int S_size; int I_size; int B_size;
+    symbol * * S;
+    int * I;
+    symbol * B;
+};
+
+extern struct SN_env * SN_create_env(int S_size, int I_size, int B_size);
+extern void SN_close_env(struct SN_env * z);
+
+extern void SN_set_current(struct SN_env * z, int size, const symbol * s);
+


diff --git a/contrib/tsearch2/snowball/english_stem.c b/contrib/tsearch2/snowball/english_stem.c

new file mode 100644 (file)

index 0000000..6715c7c


--- /dev/null
+++ b/contrib/tsearch2/snowball/english_stem.c
@@ -0,0 +1,894 @@
+
+/* This file was generated automatically by the Snowball to ANSI C compiler */
+
+#include "header.h"
+
+extern int english_stem(struct SN_env * z);
+static int r_exception2(struct SN_env * z);
+static int r_exception1(struct SN_env * z);
+static int r_Step_5(struct SN_env * z);
+static int r_Step_4(struct SN_env * z);
+static int r_Step_3(struct SN_env * z);
+static int r_Step_2(struct SN_env * z);
+static int r_Step_1c(struct SN_env * z);
+static int r_Step_1b(struct SN_env * z);
+static int r_Step_1a(struct SN_env * z);
+static int r_R2(struct SN_env * z);
+static int r_R1(struct SN_env * z);
+static int r_shortv(struct SN_env * z);
+static int r_mark_regions(struct SN_env * z);
+static int r_postlude(struct SN_env * z);
+static int r_prelude(struct SN_env * z);
+
+extern struct SN_env * english_create_env(void);
+extern void english_close_env(struct SN_env * z);
+
+static symbol s_0_0[5] = { 'g', 'e', 'n', 'e', 'r' };
+
+static struct among a_0[1] =
+{
+/*  0 */ { 5, s_0_0, -1, -1, 0}
+};
+
+static symbol s_1_0[3] = { 'i', 'e', 'd' };
+static symbol s_1_1[1] = { 's' };
+static symbol s_1_2[3] = { 'i', 'e', 's' };
+static symbol s_1_3[4] = { 's', 's', 'e', 's' };
+static symbol s_1_4[2] = { 's', 's' };
+static symbol s_1_5[2] = { 'u', 's' };
+
+static struct among a_1[6] =
+{
+/*  0 */ { 3, s_1_0, -1, 2, 0},
+/*  1 */ { 1, s_1_1, -1, 3, 0},
+/*  2 */ { 3, s_1_2, 1, 2, 0},
+/*  3 */ { 4, s_1_3, 1, 1, 0},
+/*  4 */ { 2, s_1_4, 1, -1, 0},
+/*  5 */ { 2, s_1_5, 1, -1, 0}
+};
+
+static symbol s_2_1[2] = { 'b', 'b' };
+static symbol s_2_2[2] = { 'd', 'd' };
+static symbol s_2_3[2] = { 'f', 'f' };
+static symbol s_2_4[2] = { 'g', 'g' };
+static symbol s_2_5[2] = { 'b', 'l' };
+static symbol s_2_6[2] = { 'm', 'm' };
+static symbol s_2_7[2] = { 'n', 'n' };
+static symbol s_2_8[2] = { 'p', 'p' };
+static symbol s_2_9[2] = { 'r', 'r' };
+static symbol s_2_10[2] = { 'a', 't' };
+static symbol s_2_11[2] = { 't', 't' };
+static symbol s_2_12[2] = { 'i', 'z' };
+
+static struct among a_2[13] =
+{
+/*  0 */ { 0, 0, -1, 3, 0},
+/*  1 */ { 2, s_2_1, 0, 2, 0},
+/*  2 */ { 2, s_2_2, 0, 2, 0},
+/*  3 */ { 2, s_2_3, 0, 2, 0},
+/*  4 */ { 2, s_2_4, 0, 2, 0},
+/*  5 */ { 2, s_2_5, 0, 1, 0},
+/*  6 */ { 2, s_2_6, 0, 2, 0},
+/*  7 */ { 2, s_2_7, 0, 2, 0},
+/*  8 */ { 2, s_2_8, 0, 2, 0},
+/*  9 */ { 2, s_2_9, 0, 2, 0},
+/* 10 */ { 2, s_2_10, 0, 1, 0},
+/* 11 */ { 2, s_2_11, 0, 2, 0},
+/* 12 */ { 2, s_2_12, 0, 1, 0}
+};
+
+static symbol s_3_0[2] = { 'e', 'd' };
+static symbol s_3_1[3] = { 'e', 'e', 'd' };
+static symbol s_3_2[3] = { 'i', 'n', 'g' };
+static symbol s_3_3[4] = { 'e', 'd', 'l', 'y' };
+static symbol s_3_4[5] = { 'e', 'e', 'd', 'l', 'y' };
+static symbol s_3_5[5] = { 'i', 'n', 'g', 'l', 'y' };
+
+static struct among a_3[6] =
+{
+/*  0 */ { 2, s_3_0, -1, 2, 0},
+/*  1 */ { 3, s_3_1, 0, 1, 0},
+/*  2 */ { 3, s_3_2, -1, 2, 0},
+/*  3 */ { 4, s_3_3, -1, 2, 0},
+/*  4 */ { 5, s_3_4, 3, 1, 0},
+/*  5 */ { 5, s_3_5, -1, 2, 0}
+};
+
+static symbol s_4_0[4] = { 'a', 'n', 'c', 'i' };
+static symbol s_4_1[4] = { 'e', 'n', 'c', 'i' };
+static symbol s_4_2[3] = { 'o', 'g', 'i' };
+static symbol s_4_3[2] = { 'l', 'i' };
+static symbol s_4_4[3] = { 'b', 'l', 'i' };
+static symbol s_4_5[4] = { 'a', 'b', 'l', 'i' };
+static symbol s_4_6[4] = { 'a', 'l', 'l', 'i' };
+static symbol s_4_7[5] = { 'f', 'u', 'l', 'l', 'i' };
+static symbol s_4_8[6] = { 'l', 'e', 's', 's', 'l', 'i' };
+static symbol s_4_9[5] = { 'o', 'u', 's', 'l', 'i' };
+static symbol s_4_10[5] = { 'e', 'n', 't', 'l', 'i' };
+static symbol s_4_11[5] = { 'a', 'l', 'i', 't', 'i' };
+static symbol s_4_12[6] = { 'b', 'i', 'l', 'i', 't', 'i' };
+static symbol s_4_13[5] = { 'i', 'v', 'i', 't', 'i' };
+static symbol s_4_14[6] = { 't', 'i', 'o', 'n', 'a', 'l' };
+static symbol s_4_15[7] = { 'a', 't', 'i', 'o', 'n', 'a', 'l' };
+static symbol s_4_16[5] = { 'a', 'l', 'i', 's', 'm' };
+static symbol s_4_17[5] = { 'a', 't', 'i', 'o', 'n' };
+static symbol s_4_18[7] = { 'i', 'z', 'a', 't', 'i', 'o', 'n' };
+static symbol s_4_19[4] = { 'i', 'z', 'e', 'r' };
+static symbol s_4_20[4] = { 'a', 't', 'o', 'r' };
+static symbol s_4_21[7] = { 'i', 'v', 'e', 'n', 'e', 's', 's' };
+static symbol s_4_22[7] = { 'f', 'u', 'l', 'n', 'e', 's', 's' };
+static symbol s_4_23[7] = { 'o', 'u', 's', 'n', 'e', 's', 's' };
+
+static struct among a_4[24] =
+{
+/*  0 */ { 4, s_4_0, -1, 3, 0},
+/*  1 */ { 4, s_4_1, -1, 2, 0},
+/*  2 */ { 3, s_4_2, -1, 13, 0},
+/*  3 */ { 2, s_4_3, -1, 16, 0},
+/*  4 */ { 3, s_4_4, 3, 12, 0},
+/*  5 */ { 4, s_4_5, 4, 4, 0},
+/*  6 */ { 4, s_4_6, 3, 8, 0},
+/*  7 */ { 5, s_4_7, 3, 14, 0},
+/*  8 */ { 6, s_4_8, 3, 15, 0},
+/*  9 */ { 5, s_4_9, 3, 10, 0},
+/* 10 */ { 5, s_4_10, 3, 5, 0},
+/* 11 */ { 5, s_4_11, -1, 8, 0},
+/* 12 */ { 6, s_4_12, -1, 12, 0},
+/* 13 */ { 5, s_4_13, -1, 11, 0},
+/* 14 */ { 6, s_4_14, -1, 1, 0},
+/* 15 */ { 7, s_4_15, 14, 7, 0},
+/* 16 */ { 5, s_4_16, -1, 8, 0},
+/* 17 */ { 5, s_4_17, -1, 7, 0},
+/* 18 */ { 7, s_4_18, 17, 6, 0},
+/* 19 */ { 4, s_4_19, -1, 6, 0},
+/* 20 */ { 4, s_4_20, -1, 7, 0},
+/* 21 */ { 7, s_4_21, -1, 11, 0},
+/* 22 */ { 7, s_4_22, -1, 9, 0},
+/* 23 */ { 7, s_4_23, -1, 10, 0}
+};
+
+static symbol s_5_0[5] = { 'i', 'c', 'a', 't', 'e' };
+static symbol s_5_1[5] = { 'a', 't', 'i', 'v', 'e' };
+static symbol s_5_2[5] = { 'a', 'l', 'i', 'z', 'e' };
+static symbol s_5_3[5] = { 'i', 'c', 'i', 't', 'i' };
+static symbol s_5_4[4] = { 'i', 'c', 'a', 'l' };
+static symbol s_5_5[6] = { 't', 'i', 'o', 'n', 'a', 'l' };
+static symbol s_5_6[7] = { 'a', 't', 'i', 'o', 'n', 'a', 'l' };
+static symbol s_5_7[3] = { 'f', 'u', 'l' };
+static symbol s_5_8[4] = { 'n', 'e', 's', 's' };
+
+static struct among a_5[9] =
+{
+/*  0 */ { 5, s_5_0, -1, 4, 0},
+/*  1 */ { 5, s_5_1, -1, 6, 0},
+/*  2 */ { 5, s_5_2, -1, 3, 0},
+/*  3 */ { 5, s_5_3, -1, 4, 0},
+/*  4 */ { 4, s_5_4, -1, 4, 0},
+/*  5 */ { 6, s_5_5, -1, 1, 0},
+/*  6 */ { 7, s_5_6, 5, 2, 0},
+/*  7 */ { 3, s_5_7, -1, 5, 0},
+/*  8 */ { 4, s_5_8, -1, 5, 0}
+};
+
+static symbol s_6_0[2] = { 'i', 'c' };
+static symbol s_6_1[4] = { 'a', 'n', 'c', 'e' };
+static symbol s_6_2[4] = { 'e', 'n', 'c', 'e' };
+static symbol s_6_3[4] = { 'a', 'b', 'l', 'e' };
+static symbol s_6_4[4] = { 'i', 'b', 'l', 'e' };
+static symbol s_6_5[3] = { 'a', 't', 'e' };
+static symbol s_6_6[3] = { 'i', 'v', 'e' };
+static symbol s_6_7[3] = { 'i', 'z', 'e' };
+static symbol s_6_8[3] = { 'i', 't', 'i' };
+static symbol s_6_9[2] = { 'a', 'l' };
+static symbol s_6_10[3] = { 'i', 's', 'm' };
+static symbol s_6_11[3] = { 'i', 'o', 'n' };
+static symbol s_6_12[2] = { 'e', 'r' };
+static symbol s_6_13[3] = { 'o', 'u', 's' };
+static symbol s_6_14[3] = { 'a', 'n', 't' };
+static symbol s_6_15[3] = { 'e', 'n', 't' };
+static symbol s_6_16[4] = { 'm', 'e', 'n', 't' };
+static symbol s_6_17[5] = { 'e', 'm', 'e', 'n', 't' };
+
+static struct among a_6[18] =
+{
+/*  0 */ { 2, s_6_0, -1, 1, 0},
+/*  1 */ { 4, s_6_1, -1, 1, 0},
+/*  2 */ { 4, s_6_2, -1, 1, 0},
+/*  3 */ { 4, s_6_3, -1, 1, 0},
+/*  4 */ { 4, s_6_4, -1, 1, 0},
+/*  5 */ { 3, s_6_5, -1, 1, 0},
+/*  6 */ { 3, s_6_6, -1, 1, 0},
+/*  7 */ { 3, s_6_7, -1, 1, 0},
+/*  8 */ { 3, s_6_8, -1, 1, 0},
+/*  9 */ { 2, s_6_9, -1, 1, 0},
+/* 10 */ { 3, s_6_10, -1, 1, 0},
+/* 11 */ { 3, s_6_11, -1, 2, 0},
+/* 12 */ { 2, s_6_12, -1, 1, 0},
+/* 13 */ { 3, s_6_13, -1, 1, 0},
+/* 14 */ { 3, s_6_14, -1, 1, 0},
+/* 15 */ { 3, s_6_15, -1, 1, 0},
+/* 16 */ { 4, s_6_16, 15, 1, 0},
+/* 17 */ { 5, s_6_17, 16, 1, 0}
+};
+
+static symbol s_7_0[1] = { 'e' };
+static symbol s_7_1[1] = { 'l' };
+
+static struct among a_7[2] =
+{
+/*  0 */ { 1, s_7_0, -1, 1, 0},
+/*  1 */ { 1, s_7_1, -1, 2, 0}
+};
+
+static symbol s_8_0[7] = { 's', 'u', 'c', 'c', 'e', 'e', 'd' };
+static symbol s_8_1[7] = { 'p', 'r', 'o', 'c', 'e', 'e', 'd' };
+static symbol s_8_2[6] = { 'e', 'x', 'c', 'e', 'e', 'd' };
+static symbol s_8_3[7] = { 'c', 'a', 'n', 'n', 'i', 'n', 'g' };
+static symbol s_8_4[6] = { 'i', 'n', 'n', 'i', 'n', 'g' };
+static symbol s_8_5[7] = { 'e', 'a', 'r', 'r', 'i', 'n', 'g' };
+static symbol s_8_6[7] = { 'h', 'e', 'r', 'r', 'i', 'n', 'g' };
+static symbol s_8_7[6] = { 'o', 'u', 't', 'i', 'n', 'g' };
+
+static struct among a_8[8] =
+{
+/*  0 */ { 7, s_8_0, -1, -1, 0},
+/*  1 */ { 7, s_8_1, -1, -1, 0},
+/*  2 */ { 6, s_8_2, -1, -1, 0},
+/*  3 */ { 7, s_8_3, -1, -1, 0},
+/*  4 */ { 6, s_8_4, -1, -1, 0},
+/*  5 */ { 7, s_8_5, -1, -1, 0},
+/*  6 */ { 7, s_8_6, -1, -1, 0},
+/*  7 */ { 6, s_8_7, -1, -1, 0}
+};
+
+static symbol s_9_0[5] = { 'a', 'n', 'd', 'e', 's' };
+static symbol s_9_1[5] = { 'a', 't', 'l', 'a', 's' };
+static symbol s_9_2[4] = { 'b', 'i', 'a', 's' };
+static symbol s_9_3[6] = { 'c', 'o', 's', 'm', 'o', 's' };
+static symbol s_9_4[5] = { 'd', 'y', 'i', 'n', 'g' };
+static symbol s_9_5[5] = { 'e', 'a', 'r', 'l', 'y' };
+static symbol s_9_6[6] = { 'g', 'e', 'n', 't', 'l', 'y' };
+static symbol s_9_7[4] = { 'h', 'o', 'w', 'e' };
+static symbol s_9_8[4] = { 'i', 'd', 'l', 'y' };
+static symbol s_9_9[5] = { 'l', 'y', 'i', 'n', 'g' };
+static symbol s_9_10[4] = { 'n', 'e', 'w', 's' };
+static symbol s_9_11[4] = { 'o', 'n', 'l', 'y' };
+static symbol s_9_12[6] = { 's', 'i', 'n', 'g', 'l', 'y' };
+static symbol s_9_13[5] = { 's', 'k', 'i', 'e', 's' };
+static symbol s_9_14[4] = { 's', 'k', 'i', 's' };
+static symbol s_9_15[3] = { 's', 'k', 'y' };
+static symbol s_9_16[5] = { 't', 'y', 'i', 'n', 'g' };
+static symbol s_9_17[4] = { 'u', 'g', 'l', 'y' };
+
+static struct among a_9[18] =
+{
+/*  0 */ { 5, s_9_0, -1, -1, 0},
+/*  1 */ { 5, s_9_1, -1, -1, 0},
+/*  2 */ { 4, s_9_2, -1, -1, 0},
+/*  3 */ { 6, s_9_3, -1, -1, 0},
+/*  4 */ { 5, s_9_4, -1, 3, 0},
+/*  5 */ { 5, s_9_5, -1, 9, 0},
+/*  6 */ { 6, s_9_6, -1, 7, 0},
+/*  7 */ { 4, s_9_7, -1, -1, 0},
+/*  8 */ { 4, s_9_8, -1, 6, 0},
+/*  9 */ { 5, s_9_9, -1, 4, 0},
+/* 10 */ { 4, s_9_10, -1, -1, 0},
+/* 11 */ { 4, s_9_11, -1, 10, 0},
+/* 12 */ { 6, s_9_12, -1, 11, 0},
+/* 13 */ { 5, s_9_13, -1, 2, 0},
+/* 14 */ { 4, s_9_14, -1, 1, 0},
+/* 15 */ { 3, s_9_15, -1, -1, 0},
+/* 16 */ { 5, s_9_16, -1, 5, 0},
+/* 17 */ { 4, s_9_17, -1, 8, 0}
+};
+
+static unsigned char g_v[] = { 17, 65, 16, 1 };
+
+static unsigned char g_v_WXY[] = { 1, 17, 65, 208, 1 };
+
+static unsigned char g_valid_LI[] = { 55, 141, 2 };
+
+static symbol s_0[] = { 'y' };
+static symbol s_1[] = { 'Y' };
+static symbol s_2[] = { 'y' };
+static symbol s_3[] = { 'Y' };
+static symbol s_4[] = { 's', 's' };
+static symbol s_5[] = { 'i', 'e' };
+static symbol s_6[] = { 'i' };
+static symbol s_7[] = { 'e', 'e' };
+static symbol s_8[] = { 'e' };
+static symbol s_9[] = { 'e' };
+static symbol s_10[] = { 'y' };
+static symbol s_11[] = { 'Y' };
+static symbol s_12[] = { 'i' };
+static symbol s_13[] = { 't', 'i', 'o', 'n' };
+static symbol s_14[] = { 'e', 'n', 'c', 'e' };
+static symbol s_15[] = { 'a', 'n', 'c', 'e' };
+static symbol s_16[] = { 'a', 'b', 'l', 'e' };
+static symbol s_17[] = { 'e', 'n', 't' };
+static symbol s_18[] = { 'i', 'z', 'e' };
+static symbol s_19[] = { 'a', 't', 'e' };
+static symbol s_20[] = { 'a', 'l' };
+static symbol s_21[] = { 'f', 'u', 'l' };
+static symbol s_22[] = { 'o', 'u', 's' };
+static symbol s_23[] = { 'i', 'v', 'e' };
+static symbol s_24[] = { 'b', 'l', 'e' };
+static symbol s_25[] = { 'l' };
+static symbol s_26[] = { 'o', 'g' };
+static symbol s_27[] = { 'f', 'u', 'l' };
+static symbol s_28[] = { 'l', 'e', 's', 's' };
+static symbol s_29[] = { 't', 'i', 'o', 'n' };
+static symbol s_30[] = { 'a', 't', 'e' };
+static symbol s_31[] = { 'a', 'l' };
+static symbol s_32[] = { 'i', 'c' };
+static symbol s_33[] = { 's' };
+static symbol s_34[] = { 't' };
+static symbol s_35[] = { 'l' };
+static symbol s_36[] = { 's', 'k', 'i' };
+static symbol s_37[] = { 's', 'k', 'y' };
+static symbol s_38[] = { 'd', 'i', 'e' };
+static symbol s_39[] = { 'l', 'i', 'e' };
+static symbol s_40[] = { 't', 'i', 'e' };
+static symbol s_41[] = { 'i', 'd', 'l' };
+static symbol s_42[] = { 'g', 'e', 'n', 't', 'l' };
+static symbol s_43[] = { 'u', 'g', 'l', 'i' };
+static symbol s_44[] = { 'e', 'a', 'r', 'l', 'i' };
+static symbol s_45[] = { 'o', 'n', 'l', 'i' };
+static symbol s_46[] = { 's', 'i', 'n', 'g', 'l' };
+static symbol s_47[] = { 'Y' };
+static symbol s_48[] = { 'y' };
+
+static int r_prelude(struct SN_env * z) {
+    z->B[0] = 0; /* unset Y_found, line 24 */
+    {   int c = z->c; /* do, line 25 */
+        z->bra = z->c; /* [, line 25 */
+        if (!(eq_s(z, 1, s_0))) goto lab0;
+        z->ket = z->c; /* ], line 25 */
+        if (!(in_grouping(z, g_v, 97, 121))) goto lab0;
+        slice_from_s(z, 1, s_1); /* <-, line 25 */
+        z->B[0] = 1; /* set Y_found, line 25 */
+    lab0:
+        z->c = c;
+    }
+    {   int c = z->c; /* do, line 26 */
+        while(1) { /* repeat, line 26 */
+            int c = z->c;
+            while(1) { /* goto, line 26 */
+                int c = z->c;
+                if (!(in_grouping(z, g_v, 97, 121))) goto lab3;
+                z->bra = z->c; /* [, line 26 */
+                if (!(eq_s(z, 1, s_2))) goto lab3;
+                z->ket = z->c; /* ], line 26 */
+                z->c = c;
+                break;
+            lab3:
+                z->c = c;
+                if (z->c >= z->l) goto lab2;
+                z->c++;
+            }
+            slice_from_s(z, 1, s_3); /* <-, line 26 */
+            z->B[0] = 1; /* set Y_found, line 26 */
+            continue;
+        lab2:
+            z->c = c;
+            break;
+        }
+    lab1:
+        z->c = c;
+    }
+    return 1;
+}
+
+static int r_mark_regions(struct SN_env * z) {
+    z->I[0] = z->l;
+    z->I[1] = z->l;
+    {   int c = z->c; /* do, line 32 */
+        {   int c = z->c; /* or, line 36 */
+            if (!(find_among(z, a_0, 1))) goto lab2; /* among, line 33 */
+            goto lab1;
+        lab2:
+            z->c = c;
+            while(1) { /* gopast, line 36 */
+                if (!(in_grouping(z, g_v, 97, 121))) goto lab3;
+                break;
+            lab3:
+                if (z->c >= z->l) goto lab0;
+                z->c++;
+            }
+            while(1) { /* gopast, line 36 */
+                if (!(out_grouping(z, g_v, 97, 121))) goto lab4;
+                break;
+            lab4:
+                if (z->c >= z->l) goto lab0;
+                z->c++;
+            }
+        }
+    lab1:
+        z->I[0] = z->c; /* setmark p1, line 37 */
+        while(1) { /* gopast, line 38 */
+            if (!(in_grouping(z, g_v, 97, 121))) goto lab5;
+            break;
+        lab5:
+            if (z->c >= z->l) goto lab0;
+            z->c++;
+        }
+        while(1) { /* gopast, line 38 */
+            if (!(out_grouping(z, g_v, 97, 121))) goto lab6;
+            break;
+        lab6:
+            if (z->c >= z->l) goto lab0;
+            z->c++;
+        }
+        z->I[1] = z->c; /* setmark p2, line 38 */
+    lab0:
+        z->c = c;
+    }
+    return 1;
+}
+
+static int r_shortv(struct SN_env * z) {
+    {   int m = z->l - z->c; /* or, line 46 */
+        if (!(out_grouping_b(z, g_v_WXY, 89, 121))) goto lab1;
+        if (!(in_grouping_b(z, g_v, 97, 121))) goto lab1;
+        if (!(out_grouping_b(z, g_v, 97, 121))) goto lab1;
+        goto lab0;
+    lab1:
+        z->c = z->l - m;
+        if (!(out_grouping_b(z, g_v, 97, 121))) return 0;
+        if (!(in_grouping_b(z, g_v, 97, 121))) return 0;
+        if (z->c > z->lb) return 0; /* atlimit, line 47 */
+    }
+lab0:
+    return 1;
+}
+
+static int r_R1(struct SN_env * z) {
+    if (!(z->I[0] <= z->c)) return 0;
+    return 1;
+}
+
+static int r_R2(struct SN_env * z) {
+    if (!(z->I[1] <= z->c)) return 0;
+    return 1;
+}
+
+static int r_Step_1a(struct SN_env * z) {
+    int among_var;
+    z->ket = z->c; /* [, line 54 */
+    among_var = find_among_b(z, a_1, 6); /* substring, line 54 */
+    if (!(among_var)) return 0;
+    z->bra = z->c; /* ], line 54 */
+    switch(among_var) {
+        case 0: return 0;
+        case 1:
+            slice_from_s(z, 2, s_4); /* <-, line 55 */
+            break;
+        case 2:
+            {   int m = z->l - z->c; /* or, line 57 */
+                if (z->c <= z->lb) goto lab1;
+                z->c--; /* next, line 57 */
+                if (z->c > z->lb) goto lab1; /* atlimit, line 57 */
+                slice_from_s(z, 2, s_5); /* <-, line 57 */
+                goto lab0;
+            lab1:
+                z->c = z->l - m;
+                slice_from_s(z, 1, s_6); /* <-, line 57 */
+            }
+        lab0:
+            break;
+        case 3:
+            if (z->c <= z->lb) return 0;
+            z->c--; /* next, line 58 */
+            while(1) { /* gopast, line 58 */
+                if (!(in_grouping_b(z, g_v, 97, 121))) goto lab2;
+                break;
+            lab2:
+                if (z->c <= z->lb) return 0;
+                z->c--;
+            }
+            slice_del(z); /* delete, line 58 */
+            break;
+    }
+    return 1;
+}
+
+static int r_Step_1b(struct SN_env * z) {
+    int among_var;
+    z->ket = z->c; /* [, line 64 */
+    among_var = find_among_b(z, a_3, 6); /* substring, line 64 */
+    if (!(among_var)) return 0;
+    z->bra = z->c; /* ], line 64 */
+    switch(among_var) {
+        case 0: return 0;
+        case 1:
+            if (!r_R1(z)) return 0; /* call R1, line 66 */
+            slice_from_s(z, 2, s_7); /* <-, line 66 */
+            break;
+        case 2:
+            {   int m_test = z->l - z->c; /* test, line 69 */
+                while(1) { /* gopast, line 69 */
+                    if (!(in_grouping_b(z, g_v, 97, 121))) goto lab0;
+                    break;
+                lab0:
+                    if (z->c <= z->lb) return 0;
+                    z->c--;
+                }
+                z->c = z->l - m_test;
+            }
+            slice_del(z); /* delete, line 69 */
+            {   int m_test = z->l - z->c; /* test, line 70 */
+                among_var = find_among_b(z, a_2, 13); /* substring, line 70 */
+                if (!(among_var)) return 0;
+                z->c = z->l - m_test;
+            }
+            switch(among_var) {
+                case 0: return 0;
+                case 1:
+                    {   int c = z->c;
+                        insert_s(z, z->c, z->c, 1, s_8); /* <+, line 72 */
+                        z->c = c;
+                    }
+                    break;
+                case 2:
+                    z->ket = z->c; /* [, line 75 */
+                    if (z->c <= z->lb) return 0;
+                    z->c--; /* next, line 75 */
+                    z->bra = z->c; /* ], line 75 */
+                    slice_del(z); /* delete, line 75 */
+                    break;
+                case 3:
+                    if (z->c != z->I[0]) return 0; /* atmark, line 76 */
+                    {   int m_test = z->l - z->c; /* test, line 76 */
+                        if (!r_shortv(z)) return 0; /* call shortv, line 76 */
+                        z->c = z->l - m_test;
+                    }
+                    {   int c = z->c;
+                        insert_s(z, z->c, z->c, 1, s_9); /* <+, line 76 */
+                        z->c = c;
+                    }
+                    break;
+            }
+            break;
+    }
+    return 1;
+}
+
+static int r_Step_1c(struct SN_env * z) {
+    z->ket = z->c; /* [, line 83 */
+    {   int m = z->l - z->c; /* or, line 83 */
+        if (!(eq_s_b(z, 1, s_10))) goto lab1;
+        goto lab0;
+    lab1:
+        z->c = z->l - m;
+        if (!(eq_s_b(z, 1, s_11))) return 0;
+    }
+lab0:
+    z->bra = z->c; /* ], line 83 */
+    if (!(out_grouping_b(z, g_v, 97, 121))) return 0;
+    {   int m = z->l - z->c; /* not, line 84 */
+        if (z->c > z->lb) goto lab2; /* atlimit, line 84 */
+        return 0;
+    lab2:
+        z->c = z->l - m;
+    }
+    slice_from_s(z, 1, s_12); /* <-, line 85 */
+    return 1;
+}
+
+static int r_Step_2(struct SN_env * z) {
+    int among_var;
+    z->ket = z->c; /* [, line 89 */
+    among_var = find_among_b(z, a_4, 24); /* substring, line 89 */
+    if (!(among_var)) return 0;
+    z->bra = z->c; /* ], line 89 */
+    if (!r_R1(z)) return 0; /* call R1, line 89 */
+    switch(among_var) {
+        case 0: return 0;
+        case 1:
+            slice_from_s(z, 4, s_13); /* <-, line 90 */
+            break;
+        case 2:
+            slice_from_s(z, 4, s_14); /* <-, line 91 */
+            break;
+        case 3:
+            slice_from_s(z, 4, s_15); /* <-, line 92 */
+            break;
+        case 4:
+            slice_from_s(z, 4, s_16); /* <-, line 93 */
+            break;
+        case 5:
+            slice_from_s(z, 3, s_17); /* <-, line 94 */
+            break;
+        case 6:
+            slice_from_s(z, 3, s_18); /* <-, line 96 */
+            break;
+        case 7:
+            slice_from_s(z, 3, s_19); /* <-, line 98 */
+            break;
+        case 8:
+            slice_from_s(z, 2, s_20); /* <-, line 100 */
+            break;
+        case 9:
+            slice_from_s(z, 3, s_21); /* <-, line 101 */
+            break;
+        case 10:
+            slice_from_s(z, 3, s_22); /* <-, line 103 */
+            break;
+        case 11:
+            slice_from_s(z, 3, s_23); /* <-, line 105 */
+            break;
+        case 12:
+            slice_from_s(z, 3, s_24); /* <-, line 107 */
+            break;
+        case 13:
+            if (!(eq_s_b(z, 1, s_25))) return 0;
+            slice_from_s(z, 2, s_26); /* <-, line 108 */
+            break;
+        case 14:
+            slice_from_s(z, 3, s_27); /* <-, line 109 */
+            break;
+        case 15:
+            slice_from_s(z, 4, s_28); /* <-, line 110 */
+            break;
+        case 16:
+            if (!(in_grouping_b(z, g_valid_LI, 99, 116))) return 0;
+            slice_del(z); /* delete, line 111 */
+            break;
+    }
+    return 1;
+}
+
+static int r_Step_3(struct SN_env * z) {
+    int among_var;
+    z->ket = z->c; /* [, line 116 */
+    among_var = find_among_b(z, a_5, 9); /* substring, line 116 */
+    if (!(among_var)) return 0;
+    z->bra = z->c; /* ], line 116 */
+    if (!r_R1(z)) return 0; /* call R1, line 116 */
+    switch(among_var) {
+        case 0: return 0;
+        case 1:
+            slice_from_s(z, 4, s_29); /* <-, line 117 */
+            break;
+        case 2:
+            slice_from_s(z, 3, s_30); /* <-, line 118 */
+            break;
+        case 3:
+            slice_from_s(z, 2, s_31); /* <-, line 119 */
+            break;
+        case 4:
+            slice_from_s(z, 2, s_32); /* <-, line 121 */
+            break;
+        case 5:
+            slice_del(z); /* delete, line 123 */
+            break;
+        case 6:
+            if (!r_R2(z)) return 0; /* call R2, line 125 */
+            slice_del(z); /* delete, line 125 */
+            break;
+    }
+    return 1;
+}
+
+static int r_Step_4(struct SN_env * z) {
+    int among_var;
+    z->ket = z->c; /* [, line 130 */
+    among_var = find_among_b(z, a_6, 18); /* substring, line 130 */
+    if (!(among_var)) return 0;
+    z->bra = z->c; /* ], line 130 */
+    if (!r_R2(z)) return 0; /* call R2, line 130 */
+    switch(among_var) {
+        case 0: return 0;
+        case 1:
+            slice_del(z); /* delete, line 133 */
+            break;
+        case 2:
+            {   int m = z->l - z->c; /* or, line 134 */
+                if (!(eq_s_b(z, 1, s_33))) goto lab1;
+                goto lab0;
+            lab1:
+                z->c = z->l - m;
+                if (!(eq_s_b(z, 1, s_34))) return 0;
+            }
+        lab0:
+            slice_del(z); /* delete, line 134 */
+            break;
+    }
+    return 1;
+}
+
+static int r_Step_5(struct SN_env * z) {
+    int among_var;
+    z->ket = z->c; /* [, line 139 */
+    among_var = find_among_b(z, a_7, 2); /* substring, line 139 */
+    if (!(among_var)) return 0;
+    z->bra = z->c; /* ], line 139 */
+    switch(among_var) {
+        case 0: return 0;
+        case 1:
+            {   int m = z->l - z->c; /* or, line 140 */
+                if (!r_R2(z)) goto lab1; /* call R2, line 140 */
+                goto lab0;
+            lab1:
+                z->c = z->l - m;
+                if (!r_R1(z)) return 0; /* call R1, line 140 */
+                {   int m = z->l - z->c; /* not, line 140 */
+                    if (!r_shortv(z)) goto lab2; /* call shortv, line 140 */
+                    return 0;
+                lab2:
+                    z->c = z->l - m;
+                }
+            }
+        lab0:
+            slice_del(z); /* delete, line 140 */
+            break;
+        case 2:
+            if (!r_R2(z)) return 0; /* call R2, line 141 */
+            if (!(eq_s_b(z, 1, s_35))) return 0;
+            slice_del(z); /* delete, line 141 */
+            break;
+    }
+    return 1;
+}
+
+static int r_exception2(struct SN_env * z) {
+    z->ket = z->c; /* [, line 147 */
+    if (!(find_among_b(z, a_8, 8))) return 0; /* substring, line 147 */
+    z->bra = z->c; /* ], line 147 */
+    if (z->c > z->lb) return 0; /* atlimit, line 147 */
+    return 1;
+}
+
+static int r_exception1(struct SN_env * z) {
+    int among_var;
+    z->bra = z->c; /* [, line 159 */
+    among_var = find_among(z, a_9, 18); /* substring, line 159 */
+    if (!(among_var)) return 0;
+    z->ket = z->c; /* ], line 159 */
+    if (z->c < z->l) return 0; /* atlimit, line 159 */
+    switch(among_var) {
+        case 0: return 0;
+        case 1:
+            slice_from_s(z, 3, s_36); /* <-, line 163 */
+            break;
+        case 2:
+            slice_from_s(z, 3, s_37); /* <-, line 164 */
+            break;
+        case 3:
+            slice_from_s(z, 3, s_38); /* <-, line 165 */
+            break;
+        case 4:
+            slice_from_s(z, 3, s_39); /* <-, line 166 */
+            break;
+        case 5:
+            slice_from_s(z, 3, s_40); /* <-, line 167 */
+            break;
+        case 6:
+            slice_from_s(z, 3, s_41); /* <-, line 171 */
+            break;
+        case 7:
+            slice_from_s(z, 5, s_42); /* <-, line 172 */
+            break;
+        case 8:
+            slice_from_s(z, 4, s_43); /* <-, line 173 */
+            break;
+        case 9:
+            slice_from_s(z, 5, s_44); /* <-, line 174 */
+            break;
+        case 10:
+            slice_from_s(z, 4, s_45); /* <-, line 175 */
+            break;
+        case 11:
+            slice_from_s(z, 5, s_46); /* <-, line 176 */
+            break;
+    }
+    return 1;
+}
+
+static int r_postlude(struct SN_env * z) {
+    if (!(z->B[0])) return 0; /* Boolean test Y_found, line 192 */
+    while(1) { /* repeat, line 192 */
+        int c = z->c;
+        while(1) { /* goto, line 192 */
+            int c = z->c;
+            z->bra = z->c; /* [, line 192 */
+            if (!(eq_s(z, 1, s_47))) goto lab1;
+            z->ket = z->c; /* ], line 192 */
+            z->c = c;
+            break;
+        lab1:
+            z->c = c;
+            if (z->c >= z->l) goto lab0;
+            z->c++;
+        }
+        slice_from_s(z, 1, s_48); /* <-, line 192 */
+        continue;
+    lab0:
+        z->c = c;
+        break;
+    }
+    return 1;
+}
+
+extern int english_stem(struct SN_env * z) {
+    {   int c = z->c; /* or, line 196 */
+        if (!r_exception1(z)) goto lab1; /* call exception1, line 196 */
+        goto lab0;
+    lab1:
+        z->c = c;
+        {   int c_test = z->c; /* test, line 198 */
+            {   int c = z->c + 3;
+                if (0 > c || c > z->l) return 0;
+                z->c = c; /* hop, line 198 */
+            }
+            z->c = c_test;
+        }
+        {   int c = z->c; /* do, line 199 */
+            if (!r_prelude(z)) goto lab2; /* call prelude, line 199 */
+        lab2:
+            z->c = c;
+        }
+        {   int c = z->c; /* do, line 200 */
+            if (!r_mark_regions(z)) goto lab3; /* call mark_regions, line 200 */
+        lab3:
+            z->c = c;
+        }
+        z->lb = z->c; z->c = z->l; /* backwards, line 201 */
+
+        {   int m = z->l - z->c; /* do, line 203 */
+            if (!r_Step_1a(z)) goto lab4; /* call Step_1a, line 203 */
+        lab4:
+            z->c = z->l - m;
+        }
+        {   int m = z->l - z->c; /* or, line 205 */
+            if (!r_exception2(z)) goto lab6; /* call exception2, line 205 */
+            goto lab5;
+        lab6:
+            z->c = z->l - m;
+            {   int m = z->l - z->c; /* do, line 207 */
+                if (!r_Step_1b(z)) goto lab7; /* call Step_1b, line 207 */
+            lab7:
+                z->c = z->l - m;
+            }
+            {   int m = z->l - z->c; /* do, line 208 */
+                if (!r_Step_1c(z)) goto lab8; /* call Step_1c, line 208 */
+            lab8:
+                z->c = z->l - m;
+            }
+            {   int m = z->l - z->c; /* do, line 210 */
+                if (!r_Step_2(z)) goto lab9; /* call Step_2, line 210 */
+            lab9:
+                z->c = z->l - m;
+            }
+            {   int m = z->l - z->c; /* do, line 211 */
+                if (!r_Step_3(z)) goto lab10; /* call Step_3, line 211 */
+            lab10:
+                z->c = z->l - m;
+            }
+            {   int m = z->l - z->c; /* do, line 212 */
+                if (!r_Step_4(z)) goto lab11; /* call Step_4, line 212 */
+            lab11:
+                z->c = z->l - m;
+            }
+            {   int m = z->l - z->c; /* do, line 214 */
+                if (!r_Step_5(z)) goto lab12; /* call Step_5, line 214 */
+            lab12:
+                z->c = z->l - m;
+            }
+        }
+    lab5:
+        z->c = z->lb;
+        {   int c = z->c; /* do, line 217 */
+            if (!r_postlude(z)) goto lab13; /* call postlude, line 217 */
+        lab13:
+            z->c = c;
+        }
+    }
+lab0:
+    return 1;
+}
+
+extern struct SN_env * english_create_env(void) { return SN_create_env(0, 2, 1); }
+
+extern void english_close_env(struct SN_env * z) { SN_close_env(z); }
+


diff --git a/contrib/tsearch2/snowball/english_stem.h b/contrib/tsearch2/snowball/english_stem.h

new file mode 100644 (file)

index 0000000..bfefcd5


--- /dev/null
+++ b/contrib/tsearch2/snowball/english_stem.h
@@ -0,0 +1,8 @@
+
+/* This file was generated automatically by the Snowball to ANSI C compiler */
+
+extern struct SN_env * english_create_env(void);
+extern void english_close_env(struct SN_env * z);
+
+extern int english_stem(struct SN_env * z);
+


diff --git a/contrib/tsearch2/snowball/header.h b/contrib/tsearch2/snowball/header.h

new file mode 100644 (file)

index 0000000..aaec3ae


--- /dev/null
+++ b/contrib/tsearch2/snowball/header.h
@@ -0,0 +1,57 @@
+
+#include 
+
+#include "api.h"
+
+#define MAXINT INT_MAX
+#define MININT INT_MIN
+
+#define HEAD 2*sizeof(int)
+
+#define SIZE(p)        ((int *)(p))[-1]
+#define SET_SIZE(p, n) ((int *)(p))[-1] = n
+#define CAPACITY(p)    ((int *)(p))[-2]
+
+struct among
+{   int s_size;     /* number of chars in string */
+    symbol * s;       /* search string */
+    int substring_i;/* index to longest matching substring */
+    int result;     /* result of the lookup */
+    int (* function)(struct SN_env *);
+};
+
+extern symbol * create_s(void);
+extern void lose_s(symbol * p);
+
+extern int in_grouping(struct SN_env * z, unsigned char * s, int min, int max);
+extern int in_grouping_b(struct SN_env * z, unsigned char * s, int min, int max);
+extern int out_grouping(struct SN_env * z, unsigned char * s, int min, int max);
+extern int out_grouping_b(struct SN_env * z, unsigned char * s, int min, int max);
+
+extern int in_range(struct SN_env * z, int min, int max);
+extern int in_range_b(struct SN_env * z, int min, int max);
+extern int out_range(struct SN_env * z, int min, int max);
+extern int out_range_b(struct SN_env * z, int min, int max);
+
+extern int eq_s(struct SN_env * z, int s_size, symbol * s);
+extern int eq_s_b(struct SN_env * z, int s_size, symbol * s);
+extern int eq_v(struct SN_env * z, symbol * p);
+extern int eq_v_b(struct SN_env * z, symbol * p);
+
+extern int find_among(struct SN_env * z, struct among * v, int v_size);
+extern int find_among_b(struct SN_env * z, struct among * v, int v_size);
+
+extern symbol * increase_size(symbol * p, int n);
+extern int replace_s(struct SN_env * z, int c_bra, int c_ket, int s_size, const symbol * s);
+extern void slice_from_s(struct SN_env * z, int s_size, symbol * s);
+extern void slice_from_v(struct SN_env * z, symbol * p);
+extern void slice_del(struct SN_env * z);
+
+extern void insert_s(struct SN_env * z, int bra, int ket, int s_size, symbol * s);
+extern void insert_v(struct SN_env * z, int bra, int ket, symbol * p);
+
+extern symbol * slice_to(struct SN_env * z, symbol * p);
+extern symbol * assign_to(struct SN_env * z, symbol * p);
+
+extern void debug(struct SN_env * z, int number, int line_count);
+


diff --git a/contrib/tsearch2/snowball/russian_stem.c b/contrib/tsearch2/snowball/russian_stem.c

new file mode 100644 (file)

index 0000000..14fd491


--- /dev/null
+++ b/contrib/tsearch2/snowball/russian_stem.c
@@ -0,0 +1,626 @@
+
+/* This file was generated automatically by the Snowball to ANSI C compiler */
+
+#include "header.h"
+
+extern int russian_stem(struct SN_env * z);
+static int r_tidy_up(struct SN_env * z);
+static int r_derivational(struct SN_env * z);
+static int r_noun(struct SN_env * z);
+static int r_verb(struct SN_env * z);
+static int r_reflexive(struct SN_env * z);
+static int r_adjectival(struct SN_env * z);
+static int r_adjective(struct SN_env * z);
+static int r_perfective_gerund(struct SN_env * z);
+static int r_R2(struct SN_env * z);
+static int r_mark_regions(struct SN_env * z);
+
+extern struct SN_env * russian_create_env(void);
+extern void russian_close_env(struct SN_env * z);
+
+static symbol s_0_0[3] = { 215, 219, 201 };
+static symbol s_0_1[4] = { 201, 215, 219, 201 };
+static symbol s_0_2[4] = { 217, 215, 219, 201 };
+static symbol s_0_3[1] = { 215 };
+static symbol s_0_4[2] = { 201, 215 };
+static symbol s_0_5[2] = { 217, 215 };
+static symbol s_0_6[5] = { 215, 219, 201, 211, 216 };
+static symbol s_0_7[6] = { 201, 215, 219, 201, 211, 216 };
+static symbol s_0_8[6] = { 217, 215, 219, 201, 211, 216 };
+
+static struct among a_0[9] =
+{
+/*  0 */ { 3, s_0_0, -1, 1, 0},
+/*  1 */ { 4, s_0_1, 0, 2, 0},
+/*  2 */ { 4, s_0_2, 0, 2, 0},
+/*  3 */ { 1, s_0_3, -1, 1, 0},
+/*  4 */ { 2, s_0_4, 3, 2, 0},
+/*  5 */ { 2, s_0_5, 3, 2, 0},
+/*  6 */ { 5, s_0_6, -1, 1, 0},
+/*  7 */ { 6, s_0_7, 6, 2, 0},
+/*  8 */ { 6, s_0_8, 6, 2, 0}
+};
+
+static symbol s_1_0[2] = { 192, 192 };
+static symbol s_1_1[2] = { 197, 192 };
+static symbol s_1_2[2] = { 207, 192 };
+static symbol s_1_3[2] = { 213, 192 };
+static symbol s_1_4[2] = { 197, 197 };
+static symbol s_1_5[2] = { 201, 197 };
+static symbol s_1_6[2] = { 207, 197 };
+static symbol s_1_7[2] = { 217, 197 };
+static symbol s_1_8[2] = { 201, 200 };
+static symbol s_1_9[2] = { 217, 200 };
+static symbol s_1_10[3] = { 201, 205, 201 };
+static symbol s_1_11[3] = { 217, 205, 201 };
+static symbol s_1_12[2] = { 197, 202 };
+static symbol s_1_13[2] = { 201, 202 };
+static symbol s_1_14[2] = { 207, 202 };
+static symbol s_1_15[2] = { 217, 202 };
+static symbol s_1_16[2] = { 197, 205 };
+static symbol s_1_17[2] = { 201, 205 };
+static symbol s_1_18[2] = { 207, 205 };
+static symbol s_1_19[2] = { 217, 205 };
+static symbol s_1_20[3] = { 197, 199, 207 };
+static symbol s_1_21[3] = { 207, 199, 207 };
+static symbol s_1_22[2] = { 193, 209 };
+static symbol s_1_23[2] = { 209, 209 };
+static symbol s_1_24[3] = { 197, 205, 213 };
+static symbol s_1_25[3] = { 207, 205, 213 };
+
+static struct among a_1[26] =
+{
+/*  0 */ { 2, s_1_0, -1, 1, 0},
+/*  1 */ { 2, s_1_1, -1, 1, 0},
+/*  2 */ { 2, s_1_2, -1, 1, 0},
+/*  3 */ { 2, s_1_3, -1, 1, 0},
+/*  4 */ { 2, s_1_4, -1, 1, 0},
+/*  5 */ { 2, s_1_5, -1, 1, 0},
+/*  6 */ { 2, s_1_6, -1, 1, 0},
+/*  7 */ { 2, s_1_7, -1, 1, 0},
+/*  8 */ { 2, s_1_8, -1, 1, 0},
+/*  9 */ { 2, s_1_9, -1, 1, 0},
+/* 10 */ { 3, s_1_10, -1, 1, 0},
+/* 11 */ { 3, s_1_11, -1, 1, 0},
+/* 12 */ { 2, s_1_12, -1, 1, 0},
+/* 13 */ { 2, s_1_13, -1, 1, 0},
+/* 14 */ { 2, s_1_14, -1, 1, 0},
+/* 15 */ { 2, s_1_15, -1, 1, 0},
+/* 16 */ { 2, s_1_16, -1, 1, 0},
+/* 17 */ { 2, s_1_17, -1, 1, 0},
+/* 18 */ { 2, s_1_18, -1, 1, 0},
+/* 19 */ { 2, s_1_19, -1, 1, 0},
+/* 20 */ { 3, s_1_20, -1, 1, 0},
+/* 21 */ { 3, s_1_21, -1, 1, 0},
+/* 22 */ { 2, s_1_22, -1, 1, 0},
+/* 23 */ { 2, s_1_23, -1, 1, 0},
+/* 24 */ { 3, s_1_24, -1, 1, 0},
+/* 25 */ { 3, s_1_25, -1, 1, 0}
+};
+
+static symbol s_2_0[2] = { 197, 205 };
+static symbol s_2_1[2] = { 206, 206 };
+static symbol s_2_2[2] = { 215, 219 };
+static symbol s_2_3[3] = { 201, 215, 219 };
+static symbol s_2_4[3] = { 217, 215, 219 };
+static symbol s_2_5[1] = { 221 };
+static symbol s_2_6[2] = { 192, 221 };
+static symbol s_2_7[3] = { 213, 192, 221 };
+
+static struct among a_2[8] =
+{
+/*  0 */ { 2, s_2_0, -1, 1, 0},
+/*  1 */ { 2, s_2_1, -1, 1, 0},
+/*  2 */ { 2, s_2_2, -1, 1, 0},
+/*  3 */ { 3, s_2_3, 2, 2, 0},
+/*  4 */ { 3, s_2_4, 2, 2, 0},
+/*  5 */ { 1, s_2_5, -1, 1, 0},
+/*  6 */ { 2, s_2_6, 5, 1, 0},
+/*  7 */ { 3, s_2_7, 6, 2, 0}
+};
+
+static symbol s_3_0[2] = { 211, 209 };
+static symbol s_3_1[2] = { 211, 216 };
+
+static struct among a_3[2] =
+{
+/*  0 */ { 2, s_3_0, -1, 1, 0},
+/*  1 */ { 2, s_3_1, -1, 1, 0}
+};
+
+static symbol s_4_0[1] = { 192 };
+static symbol s_4_1[2] = { 213, 192 };
+static symbol s_4_2[2] = { 204, 193 };
+static symbol s_4_3[3] = { 201, 204, 193 };
+static symbol s_4_4[3] = { 217, 204, 193 };
+static symbol s_4_5[2] = { 206, 193 };
+static symbol s_4_6[3] = { 197, 206, 193 };
+static symbol s_4_7[3] = { 197, 212, 197 };
+static symbol s_4_8[3] = { 201, 212, 197 };
+static symbol s_4_9[3] = { 202, 212, 197 };
+static symbol s_4_10[4] = { 197, 202, 212, 197 };
+static symbol s_4_11[4] = { 213, 202, 212, 197 };
+static symbol s_4_12[2] = { 204, 201 };
+static symbol s_4_13[3] = { 201, 204, 201 };
+static symbol s_4_14[3] = { 217, 204, 201 };
+static symbol s_4_15[1] = { 202 };
+static symbol s_4_16[2] = { 197, 202 };
+static symbol s_4_17[2] = { 213, 202 };
+static symbol s_4_18[1] = { 204 };
+static symbol s_4_19[2] = { 201, 204 };
+static symbol s_4_20[2] = { 217, 204 };
+static symbol s_4_21[2] = { 197, 205 };
+static symbol s_4_22[2] = { 201, 205 };
+static symbol s_4_23[2] = { 217, 205 };
+static symbol s_4_24[1] = { 206 };
+static symbol s_4_25[2] = { 197, 206 };
+static symbol s_4_26[2] = { 204, 207 };
+static symbol s_4_27[3] = { 201, 204, 207 };
+static symbol s_4_28[3] = { 217, 204, 207 };
+static symbol s_4_29[2] = { 206, 207 };
+static symbol s_4_30[3] = { 197, 206, 207 };
+static symbol s_4_31[3] = { 206, 206, 207 };
+static symbol s_4_32[2] = { 192, 212 };
+static symbol s_4_33[3] = { 213, 192, 212 };
+static symbol s_4_34[2] = { 197, 212 };
+static symbol s_4_35[3] = { 213, 197, 212 };
+static symbol s_4_36[2] = { 201, 212 };
+static symbol s_4_37[2] = { 209, 212 };
+static symbol s_4_38[2] = { 217, 212 };
+static symbol s_4_39[2] = { 212, 216 };
+static symbol s_4_40[3] = { 201, 212, 216 };
+static symbol s_4_41[3] = { 217, 212, 216 };
+static symbol s_4_42[3] = { 197, 219, 216 };
+static symbol s_4_43[3] = { 201, 219, 216 };
+static symbol s_4_44[2] = { 206, 217 };
+static symbol s_4_45[3] = { 197, 206, 217 };
+
+static struct among a_4[46] =
+{
+/*  0 */ { 1, s_4_0, -1, 2, 0},
+/*  1 */ { 2, s_4_1, 0, 2, 0},
+/*  2 */ { 2, s_4_2, -1, 1, 0},
+/*  3 */ { 3, s_4_3, 2, 2, 0},
+/*  4 */ { 3, s_4_4, 2, 2, 0},
+/*  5 */ { 2, s_4_5, -1, 1, 0},
+/*  6 */ { 3, s_4_6, 5, 2, 0},
+/*  7 */ { 3, s_4_7, -1, 1, 0},
+/*  8 */ { 3, s_4_8, -1, 2, 0},
+/*  9 */ { 3, s_4_9, -1, 1, 0},
+/* 10 */ { 4, s_4_10, 9, 2, 0},
+/* 11 */ { 4, s_4_11, 9, 2, 0},
+/* 12 */ { 2, s_4_12, -1, 1, 0},
+/* 13 */ { 3, s_4_13, 12, 2, 0},
+/* 14 */ { 3, s_4_14, 12, 2, 0},
+/* 15 */ { 1, s_4_15, -1, 1, 0},
+/* 16 */ { 2, s_4_16, 15, 2, 0},
+/* 17 */ { 2, s_4_17, 15, 2, 0},
+/* 18 */ { 1, s_4_18, -1, 1, 0},
+/* 19 */ { 2, s_4_19, 18, 2, 0},
+/* 20 */ { 2, s_4_20, 18, 2, 0},
+/* 21 */ { 2, s_4_21, -1, 1, 0},
+/* 22 */ { 2, s_4_22, -1, 2, 0},
+/* 23 */ { 2, s_4_23, -1, 2, 0},
+/* 24 */ { 1, s_4_24, -1, 1, 0},
+/* 25 */ { 2, s_4_25, 24, 2, 0},
+/* 26 */ { 2, s_4_26, -1, 1, 0},
+/* 27 */ { 3, s_4_27, 26, 2, 0},
+/* 28 */ { 3, s_4_28, 26, 2, 0},
+/* 29 */ { 2, s_4_29, -1, 1, 0},
+/* 30 */ { 3, s_4_30, 29, 2, 0},
+/* 31 */ { 3, s_4_31, 29, 1, 0},
+/* 32 */ { 2, s_4_32, -1, 1, 0},
+/* 33 */ { 3, s_4_33, 32, 2, 0},
+/* 34 */ { 2, s_4_34, -1, 1, 0},
+/* 35 */ { 3, s_4_35, 34, 2, 0},
+/* 36 */ { 2, s_4_36, -1, 2, 0},
+/* 37 */ { 2, s_4_37, -1, 2, 0},
+/* 38 */ { 2, s_4_38, -1, 2, 0},
+/* 39 */ { 2, s_4_39, -1, 1, 0},
+/* 40 */ { 3, s_4_40, 39, 2, 0},
+/* 41 */ { 3, s_4_41, 39, 2, 0},
+/* 42 */ { 3, s_4_42, -1, 1, 0},
+/* 43 */ { 3, s_4_43, -1, 2, 0},
+/* 44 */ { 2, s_4_44, -1, 1, 0},
+/* 45 */ { 3, s_4_45, 44, 2, 0}
+};
+
+static symbol s_5_0[1] = { 192 };
+static symbol s_5_1[2] = { 201, 192 };
+static symbol s_5_2[2] = { 216, 192 };
+static symbol s_5_3[1] = { 193 };
+static symbol s_5_4[1] = { 197 };
+static symbol s_5_5[2] = { 201, 197 };
+static symbol s_5_6[2] = { 216, 197 };
+static symbol s_5_7[2] = { 193, 200 };
+static symbol s_5_8[2] = { 209, 200 };
+static symbol s_5_9[3] = { 201, 209, 200 };
+static symbol s_5_10[1] = { 201 };
+static symbol s_5_11[2] = { 197, 201 };
+static symbol s_5_12[2] = { 201, 201 };
+static symbol s_5_13[3] = { 193, 205, 201 };
+static symbol s_5_14[3] = { 209, 205, 201 };
+static symbol s_5_15[4] = { 201, 209, 205, 201 };
+static symbol s_5_16[1] = { 202 };
+static symbol s_5_17[2] = { 197, 202 };
+static symbol s_5_18[3] = { 201, 197, 202 };
+static symbol s_5_19[2] = { 201, 202 };
+static symbol s_5_20[2] = { 207, 202 };
+static symbol s_5_21[2] = { 193, 205 };
+static symbol s_5_22[2] = { 197, 205 };
+static symbol s_5_23[3] = { 201, 197, 205 };
+static symbol s_5_24[2] = { 207, 205 };
+static symbol s_5_25[2] = { 209, 205 };
+static symbol s_5_26[3] = { 201, 209, 205 };
+static symbol s_5_27[1] = { 207 };
+static symbol s_5_28[1] = { 209 };
+static symbol s_5_29[2] = { 201, 209 };
+static symbol s_5_30[2] = { 216, 209 };
+static symbol s_5_31[1] = { 213 };
+static symbol s_5_32[2] = { 197, 215 };
+static symbol s_5_33[2] = { 207, 215 };
+static symbol s_5_34[1] = { 216 };
+static symbol s_5_35[1] = { 217 };
+
+static struct among a_5[36] =
+{
+/*  0 */ { 1, s_5_0, -1, 1, 0},
+/*  1 */ { 2, s_5_1, 0, 1, 0},
+/*  2 */ { 2, s_5_2, 0, 1, 0},
+/*  3 */ { 1, s_5_3, -1, 1, 0},
+/*  4 */ { 1, s_5_4, -1, 1, 0},
+/*  5 */ { 2, s_5_5, 4, 1, 0},
+/*  6 */ { 2, s_5_6, 4, 1, 0},
+/*  7 */ { 2, s_5_7, -1, 1, 0},
+/*  8 */ { 2, s_5_8, -1, 1, 0},
+/*  9 */ { 3, s_5_9, 8, 1, 0},
+/* 10 */ { 1, s_5_10, -1, 1, 0},
+/* 11 */ { 2, s_5_11, 10, 1, 0},
+/* 12 */ { 2, s_5_12, 10, 1, 0},
+/* 13 */ { 3, s_5_13, 10, 1, 0},
+/* 14 */ { 3, s_5_14, 10, 1, 0},
+/* 15 */ { 4, s_5_15, 14, 1, 0},
+/* 16 */ { 1, s_5_16, -1, 1, 0},
+/* 17 */ { 2, s_5_17, 16, 1, 0},
+/* 18 */ { 3, s_5_18, 17, 1, 0},
+/* 19 */ { 2, s_5_19, 16, 1, 0},
+/* 20 */ { 2, s_5_20, 16, 1, 0},
+/* 21 */ { 2, s_5_21, -1, 1, 0},
+/* 22 */ { 2, s_5_22, -1, 1, 0},
+/* 23 */ { 3, s_5_23, 22, 1, 0},
+/* 24 */ { 2, s_5_24, -1, 1, 0},
+/* 25 */ { 2, s_5_25, -1, 1, 0},
+/* 26 */ { 3, s_5_26, 25, 1, 0},
+/* 27 */ { 1, s_5_27, -1, 1, 0},
+/* 28 */ { 1, s_5_28, -1, 1, 0},
+/* 29 */ { 2, s_5_29, 28, 1, 0},
+/* 30 */ { 2, s_5_30, 28, 1, 0},
+/* 31 */ { 1, s_5_31, -1, 1, 0},
+/* 32 */ { 2, s_5_32, -1, 1, 0},
+/* 33 */ { 2, s_5_33, -1, 1, 0},
+/* 34 */ { 1, s_5_34, -1, 1, 0},
+/* 35 */ { 1, s_5_35, -1, 1, 0}
+};
+
+static symbol s_6_0[3] = { 207, 211, 212 };
+static symbol s_6_1[4] = { 207, 211, 212, 216 };
+
+static struct among a_6[2] =
+{
+/*  0 */ { 3, s_6_0, -1, 1, 0},
+/*  1 */ { 4, s_6_1, -1, 1, 0}
+};
+
+static symbol s_7_0[4] = { 197, 202, 219, 197 };
+static symbol s_7_1[1] = { 206 };
+static symbol s_7_2[1] = { 216 };
+static symbol s_7_3[3] = { 197, 202, 219 };
+
+static struct among a_7[4] =
+{
+/*  0 */ { 4, s_7_0, -1, 1, 0},
+/*  1 */ { 1, s_7_1, -1, 2, 0},
+/*  2 */ { 1, s_7_2, -1, 3, 0},
+/*  3 */ { 3, s_7_3, -1, 1, 0}
+};
+
+static unsigned char g_v[] = { 35, 130, 34, 18 };
+
+static symbol s_0[] = { 193 };
+static symbol s_1[] = { 209 };
+static symbol s_2[] = { 193 };
+static symbol s_3[] = { 209 };
+static symbol s_4[] = { 193 };
+static symbol s_5[] = { 209 };
+static symbol s_6[] = { 206 };
+static symbol s_7[] = { 206 };
+static symbol s_8[] = { 206 };
+static symbol s_9[] = { 201 };
+
+static int r_mark_regions(struct SN_env * z) {
+    z->I[0] = z->l;
+    z->I[1] = z->l;
+    {   int c = z->c; /* do, line 100 */
+        while(1) { /* gopast, line 101 */
+            if (!(in_grouping(z, g_v, 192, 220))) goto lab1;
+            break;
+        lab1:
+            if (z->c >= z->l) goto lab0;
+            z->c++;
+        }
+        z->I[0] = z->c; /* setmark pV, line 101 */
+        while(1) { /* gopast, line 101 */
+            if (!(out_grouping(z, g_v, 192, 220))) goto lab2;
+            break;
+        lab2:
+            if (z->c >= z->l) goto lab0;
+            z->c++;
+        }
+        while(1) { /* gopast, line 102 */
+            if (!(in_grouping(z, g_v, 192, 220))) goto lab3;
+            break;
+        lab3:
+            if (z->c >= z->l) goto lab0;
+            z->c++;
+        }
+        while(1) { /* gopast, line 102 */
+            if (!(out_grouping(z, g_v, 192, 220))) goto lab4;
+            break;
+        lab4:
+            if (z->c >= z->l) goto lab0;
+            z->c++;
+        }
+        z->I[1] = z->c; /* setmark p2, line 102 */
+    lab0:
+        z->c = c;
+    }
+    return 1;
+}
+
+static int r_R2(struct SN_env * z) {
+    if (!(z->I[1] <= z->c)) return 0;
+    return 1;
+}
+
+static int r_perfective_gerund(struct SN_env * z) {
+    int among_var;
+    z->ket = z->c; /* [, line 111 */
+    among_var = find_among_b(z, a_0, 9); /* substring, line 111 */
+    if (!(among_var)) return 0;
+    z->bra = z->c; /* ], line 111 */
+    switch(among_var) {
+        case 0: return 0;
+        case 1:
+            {   int m = z->l - z->c; /* or, line 115 */
+                if (!(eq_s_b(z, 1, s_0))) goto lab1;
+                goto lab0;
+            lab1:
+                z->c = z->l - m;
+                if (!(eq_s_b(z, 1, s_1))) return 0;
+            }
+        lab0:
+            slice_del(z); /* delete, line 115 */
+            break;
+        case 2:
+            slice_del(z); /* delete, line 122 */
+            break;
+    }
+    return 1;
+}
+
+static int r_adjective(struct SN_env * z) {
+    int among_var;
+    z->ket = z->c; /* [, line 127 */
+    among_var = find_among_b(z, a_1, 26); /* substring, line 127 */
+    if (!(among_var)) return 0;
+    z->bra = z->c; /* ], line 127 */
+    switch(among_var) {
+        case 0: return 0;
+        case 1:
+            slice_del(z); /* delete, line 136 */
+            break;
+    }
+    return 1;
+}
+
+static int r_adjectival(struct SN_env * z) {
+    int among_var;
+    if (!r_adjective(z)) return 0; /* call adjective, line 141 */
+    {   int m = z->l - z->c; /* try, line 148 */
+        z->ket = z->c; /* [, line 149 */
+        among_var = find_among_b(z, a_2, 8); /* substring, line 149 */
+        if (!(among_var)) { z->c = z->l - m; goto lab0; }
+        z->bra = z->c; /* ], line 149 */
+        switch(among_var) {
+            case 0: { z->c = z->l - m; goto lab0; }
+            case 1:
+                {   int m = z->l - z->c; /* or, line 154 */
+                    if (!(eq_s_b(z, 1, s_2))) goto lab2;
+                    goto lab1;
+                lab2:
+                    z->c = z->l - m;
+                    if (!(eq_s_b(z, 1, s_3))) { z->c = z->l - m; goto lab0; }
+                }
+            lab1:
+                slice_del(z); /* delete, line 154 */
+                break;
+            case 2:
+                slice_del(z); /* delete, line 161 */
+                break;
+        }
+    lab0:
+        ;
+    }
+    return 1;
+}
+
+static int r_reflexive(struct SN_env * z) {
+    int among_var;
+    z->ket = z->c; /* [, line 168 */
+    among_var = find_among_b(z, a_3, 2); /* substring, line 168 */
+    if (!(among_var)) return 0;
+    z->bra = z->c; /* ], line 168 */
+    switch(among_var) {
+        case 0: return 0;
+        case 1:
+            slice_del(z); /* delete, line 171 */
+            break;
+    }
+    return 1;
+}
+
+static int r_verb(struct SN_env * z) {
+    int among_var;
+    z->ket = z->c; /* [, line 176 */
+    among_var = find_among_b(z, a_4, 46); /* substring, line 176 */
+    if (!(among_var)) return 0;
+    z->bra = z->c; /* ], line 176 */
+    switch(among_var) {
+        case 0: return 0;
+        case 1:
+            {   int m = z->l - z->c; /* or, line 182 */
+                if (!(eq_s_b(z, 1, s_4))) goto lab1;
+                goto lab0;
+            lab1:
+                z->c = z->l - m;
+                if (!(eq_s_b(z, 1, s_5))) return 0;
+            }
+        lab0:
+            slice_del(z); /* delete, line 182 */
+            break;
+        case 2:
+            slice_del(z); /* delete, line 190 */
+            break;
+    }
+    return 1;
+}
+
+static int r_noun(struct SN_env * z) {
+    int among_var;
+    z->ket = z->c; /* [, line 199 */
+    among_var = find_among_b(z, a_5, 36); /* substring, line 199 */
+    if (!(among_var)) return 0;
+    z->bra = z->c; /* ], line 199 */
+    switch(among_var) {
+        case 0: return 0;
+        case 1:
+            slice_del(z); /* delete, line 206 */
+            break;
+    }
+    return 1;
+}
+
+static int r_derivational(struct SN_env * z) {
+    int among_var;
+    z->ket = z->c; /* [, line 215 */
+    among_var = find_among_b(z, a_6, 2); /* substring, line 215 */
+    if (!(among_var)) return 0;
+    z->bra = z->c; /* ], line 215 */
+    if (!r_R2(z)) return 0; /* call R2, line 215 */
+    switch(among_var) {
+        case 0: return 0;
+        case 1:
+            slice_del(z); /* delete, line 218 */
+            break;
+    }
+    return 1;
+}
+
+static int r_tidy_up(struct SN_env * z) {
+    int among_var;
+    z->ket = z->c; /* [, line 223 */
+    among_var = find_among_b(z, a_7, 4); /* substring, line 223 */
+    if (!(among_var)) return 0;
+    z->bra = z->c; /* ], line 223 */
+    switch(among_var) {
+        case 0: return 0;
+        case 1:
+            slice_del(z); /* delete, line 227 */
+            z->ket = z->c; /* [, line 228 */
+            if (!(eq_s_b(z, 1, s_6))) return 0;
+            z->bra = z->c; /* ], line 228 */
+            if (!(eq_s_b(z, 1, s_7))) return 0;
+            slice_del(z); /* delete, line 228 */
+            break;
+        case 2:
+            if (!(eq_s_b(z, 1, s_8))) return 0;
+            slice_del(z); /* delete, line 231 */
+            break;
+        case 3:
+            slice_del(z); /* delete, line 233 */
+            break;
+    }
+    return 1;
+}
+
+extern int russian_stem(struct SN_env * z) {
+    {   int c = z->c; /* do, line 240 */
+        if (!r_mark_regions(z)) goto lab0; /* call mark_regions, line 240 */
+    lab0:
+        z->c = c;
+    }
+    z->lb = z->c; z->c = z->l; /* backwards, line 241 */
+
+    {   int m = z->l - z->c; /* setlimit, line 241 */
+        int m3;
+        if (z->c < z->I[0]) return 0;
+        z->c = z->I[0]; /* tomark, line 241 */
+        m3 = z->lb; z->lb = z->c;
+        z->c = z->l - m;
+        {   int m = z->l - z->c; /* do, line 242 */
+            {   int m = z->l - z->c; /* or, line 243 */
+                if (!r_perfective_gerund(z)) goto lab3; /* call perfective_gerund, line 243 */
+                goto lab2;
+            lab3:
+                z->c = z->l - m;
+                {   int m = z->l - z->c; /* try, line 244 */
+                    if (!r_reflexive(z)) { z->c = z->l - m; goto lab4; } /* call reflexive, line 244 */
+                lab4:
+                    ;
+                }
+                {   int m = z->l - z->c; /* or, line 245 */
+                    if (!r_adjectival(z)) goto lab6; /* call adjectival, line 245 */
+                    goto lab5;
+                lab6:
+                    z->c = z->l - m;
+                    if (!r_verb(z)) goto lab7; /* call verb, line 245 */
+                    goto lab5;
+                lab7:
+                    z->c = z->l - m;
+                    if (!r_noun(z)) goto lab1; /* call noun, line 245 */
+                }
+            lab5:
+                ;
+            }
+        lab2:
+        lab1:
+            z->c = z->l - m;
+        }
+        {   int m = z->l - z->c; /* try, line 248 */
+            z->ket = z->c; /* [, line 248 */
+            if (!(eq_s_b(z, 1, s_9))) { z->c = z->l - m; goto lab8; }
+            z->bra = z->c; /* ], line 248 */
+            slice_del(z); /* delete, line 248 */
+        lab8:
+            ;
+        }
+        {   int m = z->l - z->c; /* do, line 251 */
+            if (!r_derivational(z)) goto lab9; /* call derivational, line 251 */
+        lab9:
+            z->c = z->l - m;
+        }
+        {   int m = z->l - z->c; /* do, line 252 */
+            if (!r_tidy_up(z)) goto lab10; /* call tidy_up, line 252 */
+        lab10:
+            z->c = z->l - m;
+        }
+        z->lb = m3;
+    }
+    z->c = z->lb;
+    return 1;
+}
+
+extern struct SN_env * russian_create_env(void) { return SN_create_env(0, 2, 0); }
+
+extern void russian_close_env(struct SN_env * z) { SN_close_env(z); }
+


diff --git a/contrib/tsearch2/snowball/russian_stem.h b/contrib/tsearch2/snowball/russian_stem.h

new file mode 100644 (file)

index 0000000..7dc26d4


--- /dev/null
+++ b/contrib/tsearch2/snowball/russian_stem.h
@@ -0,0 +1,8 @@
+
+/* This file was generated automatically by the Snowball to ANSI C compiler */
+
+extern struct SN_env * russian_create_env(void);
+extern void russian_close_env(struct SN_env * z);
+
+extern int russian_stem(struct SN_env * z);
+


diff --git a/contrib/tsearch2/snowball/utilities.c b/contrib/tsearch2/snowball/utilities.c

new file mode 100644 (file)

index 0000000..5dc7524


--- /dev/null
+++ b/contrib/tsearch2/snowball/utilities.c
@@ -0,0 +1,328 @@
+
+#include 
+#include 
+#include 
+
+#include "header.h"
+
+#define unless(C) if(!(C))
+
+#define CREATE_SIZE 1
+
+extern symbol * create_s(void)
+{   symbol * p = (symbol *) (HEAD + (char *) malloc(HEAD + (CREATE_SIZE + 1) * sizeof(symbol)));
+    CAPACITY(p) = CREATE_SIZE;
+    SET_SIZE(p, CREATE_SIZE);
+    return p;
+}
+
+extern void lose_s(symbol * p) { free((char *) p - HEAD); }
+
+extern int in_grouping(struct SN_env * z, unsigned char * s, int min, int max)
+{   if (z->c >= z->l) return 0;
+    {   int ch = z->p[z->c];
+        if
+        (ch > max || (ch -= min) < 0 ||
+         (s[ch >> 3] & (0X1 << (ch & 0X7))) == 0) return 0;
+    }
+    z->c++; return 1;
+}
+
+extern int in_grouping_b(struct SN_env * z, unsigned char * s, int min, int max)
+{   if (z->c <= z->lb) return 0;
+    {   int ch = z->p[z->c - 1];
+        if
+        (ch > max || (ch -= min) < 0 ||
+         (s[ch >> 3] & (0X1 << (ch & 0X7))) == 0) return 0;
+    }
+    z->c--; return 1;
+}
+
+extern int out_grouping(struct SN_env * z, unsigned char * s, int min, int max)
+{   if (z->c >= z->l) return 0;
+    {   int ch = z->p[z->c];
+        unless
+        (ch > max || (ch -= min) < 0 ||
+         (s[ch >> 3] & (0X1 << (ch & 0X7))) == 0) return 0;
+    }
+    z->c++; return 1;
+}
+
+extern int out_grouping_b(struct SN_env * z, unsigned char * s, int min, int max)
+{   if (z->c <= z->lb) return 0;
+    {   int ch = z->p[z->c - 1];
+        unless
+        (ch > max || (ch -= min) < 0 ||
+         (s[ch >> 3] & (0X1 << (ch & 0X7))) == 0) return 0;
+    }
+    z->c--; return 1;
+}
+
+
+extern int in_range(struct SN_env * z, int min, int max)
+{   if (z->c >= z->l) return 0;
+    {   int ch = z->p[z->c];
+        if
+        (ch > max || ch < min) return 0;
+    }
+    z->c++; return 1;
+}
+
+extern int in_range_b(struct SN_env * z, int min, int max)
+{   if (z->c <= z->lb) return 0;
+    {   int ch = z->p[z->c - 1];
+        if
+        (ch > max || ch < min) return 0;
+    }
+    z->c--; return 1;
+}
+
+extern int out_range(struct SN_env * z, int min, int max)
+{   if (z->c >= z->l) return 0;
+    {   int ch = z->p[z->c];
+        unless
+        (ch > max || ch < min) return 0;
+    }
+    z->c++; return 1;
+}
+
+extern int out_range_b(struct SN_env * z, int min, int max)
+{   if (z->c <= z->lb) return 0;
+    {   int ch = z->p[z->c - 1];
+        unless
+        (ch > max || ch < min) return 0;
+    }
+    z->c--; return 1;
+}
+
+extern int eq_s(struct SN_env * z, int s_size, symbol * s)
+{   if (z->l - z->c < s_size ||
+        memcmp(z->p + z->c, s, s_size * sizeof(symbol)) != 0) return 0;
+    z->c += s_size; return 1;
+}
+
+extern int eq_s_b(struct SN_env * z, int s_size, symbol * s)
+{   if (z->c - z->lb < s_size ||
+        memcmp(z->p + z->c - s_size, s, s_size * sizeof(symbol)) != 0) return 0;
+    z->c -= s_size; return 1;
+}
+
+extern int eq_v(struct SN_env * z, symbol * p)
+{   return eq_s(z, SIZE(p), p);
+}
+
+extern int eq_v_b(struct SN_env * z, symbol * p)
+{   return eq_s_b(z, SIZE(p), p);
+}
+
+extern int find_among(struct SN_env * z, struct among * v, int v_size)
+{
+    int i = 0;
+    int j = v_size;
+
+    int c = z->c; int l = z->l;
+    symbol * q = z->p + c;
+
+    struct among * w;
+
+    int common_i = 0;
+    int common_j = 0;
+
+    int first_key_inspected = 0;
+
+    while(1)
+    {   int k = i + ((j - i) >> 1);
+        int diff = 0;
+        int common = common_i < common_j ? common_i : common_j; /* smaller */
+        w = v + k;
+        {   int i; for (i = common; i < w->s_size; i++)
+            {   if (c + common == l) { diff = -1; break; }
+                diff = q[common] - w->s[i];
+                if (diff != 0) break;
+                common++;
+            }
+        }
+        if (diff < 0) { j = k; common_j = common; }
+                 else { i = k; common_i = common; }
+        if (j - i <= 1)
+        {   if (i > 0) break; /* v->s has been inspected */
+            if (j == i) break; /* only one item in v */
+
+            /* - but now we need to go round once more to get
+               v->s inspected. This looks messy, but is actually
+               the optimal approach.  */
+
+            if (first_key_inspected) break;
+            first_key_inspected = 1;
+        }
+    }
+    while(1)
+    {   w = v + i;
+        if (common_i >= w->s_size)
+        {   z->c = c + w->s_size;
+            if (w->function == 0) return w->result;
+            {   int res = w->function(z);
+                z->c = c + w->s_size;
+                if (res) return w->result;
+            }
+        }
+        i = w->substring_i;
+        if (i < 0) return 0;
+    }
+}
+
+/* find_among_b is for backwards processing. Same comments apply */
+
+extern int find_among_b(struct SN_env * z, struct among * v, int v_size)
+{
+    int i = 0;
+    int j = v_size;
+
+    int c = z->c; int lb = z->lb;
+    symbol * q = z->p + c - 1;
+
+    struct among * w;
+
+    int common_i = 0;
+    int common_j = 0;
+
+    int first_key_inspected = 0;
+
+    while(1)
+    {   int k = i + ((j - i) >> 1);
+        int diff = 0;
+        int common = common_i < common_j ? common_i : common_j;
+        w = v + k;
+        {   int i; for (i = w->s_size - 1 - common; i >= 0; i--)
+            {   if (c - common == lb) { diff = -1; break; }
+                diff = q[- common] - w->s[i];
+                if (diff != 0) break;
+                common++;
+            }
+        }
+        if (diff < 0) { j = k; common_j = common; }
+                 else { i = k; common_i = common; }
+        if (j - i <= 1)
+        {   if (i > 0) break;
+            if (j == i) break;
+            if (first_key_inspected) break;
+            first_key_inspected = 1;
+        }
+    }
+    while(1)
+    {   w = v + i;
+        if (common_i >= w->s_size)
+        {   z->c = c - w->s_size;
+            if (w->function == 0) return w->result;
+            {   int res = w->function(z);
+                z->c = c - w->s_size;
+                if (res) return w->result;
+            }
+        }
+        i = w->substring_i;
+        if (i < 0) return 0;
+    }
+}
+
+
+extern symbol * increase_size(symbol * p, int n)
+{   int new_size = n + 20;
+    symbol * q = (symbol *) (HEAD + (char *) malloc(HEAD + (new_size + 1) * sizeof(symbol)));
+    CAPACITY(q) = new_size;
+    memmove(q, p, CAPACITY(p) * sizeof(symbol)); lose_s(p); return q;
+}
+
+/* to replace symbols between c_bra and c_ket in z->p by the
+   s_size symbols at s
+*/
+
+extern int replace_s(struct SN_env * z, int c_bra, int c_ket, int s_size, const symbol * s)
+{   int adjustment = s_size - (c_ket - c_bra);
+    int len = SIZE(z->p);
+    if (adjustment != 0)
+    {   if (adjustment + len > CAPACITY(z->p)) z->p = increase_size(z->p, adjustment + len);
+        memmove(z->p + c_ket + adjustment, z->p + c_ket, (len - c_ket) * sizeof(symbol));
+        SET_SIZE(z->p, adjustment + len);
+        z->l += adjustment;
+        if (z->c >= c_ket) z->c += adjustment; else
+            if (z->c > c_bra) z->c = c_bra;
+    }
+    unless (s_size == 0) memmove(z->p + c_bra, s, s_size * sizeof(symbol));
+    return adjustment;
+}
+
+static void slice_check(struct SN_env * z)
+{
+    if (!(0 <= z->bra &&
+          z->bra <= z->ket &&
+          z->ket <= z->l &&
+          z->l <= SIZE(z->p)))   /* this line could be removed */
+    {
+        fprintf(stderr, "faulty slice operation:\n");
+        debug(z, -1, 0);
+        exit(1);
+    }
+}
+
+extern void slice_from_s(struct SN_env * z, int s_size, symbol * s)
+{   slice_check(z);
+    replace_s(z, z->bra, z->ket, s_size, s);
+}
+
+extern void slice_from_v(struct SN_env * z, symbol * p)
+{   slice_from_s(z, SIZE(p), p);
+}
+
+extern void slice_del(struct SN_env * z)
+{   slice_from_s(z, 0, 0);
+}
+
+extern void insert_s(struct SN_env * z, int bra, int ket, int s_size, symbol * s)
+{   int adjustment = replace_s(z, bra, ket, s_size, s);
+    if (bra <= z->bra) z->bra += adjustment;
+    if (bra <= z->ket) z->ket += adjustment;
+}
+
+extern void insert_v(struct SN_env * z, int bra, int ket, symbol * p)
+{   int adjustment = replace_s(z, bra, ket, SIZE(p), p);
+    if (bra <= z->bra) z->bra += adjustment;
+    if (bra <= z->ket) z->ket += adjustment;
+}
+
+extern symbol * slice_to(struct SN_env * z, symbol * p)
+{   slice_check(z);
+    {   int len = z->ket - z->bra;
+        if (CAPACITY(p) < len) p = increase_size(p, len);
+        memmove(p, z->p + z->bra, len * sizeof(symbol));
+        SET_SIZE(p, len);
+    }
+    return p;
+}
+
+extern symbol * assign_to(struct SN_env * z, symbol * p)
+{   int len = z->l;
+    if (CAPACITY(p) < len) p = increase_size(p, len);
+    memmove(p, z->p, len * sizeof(symbol));
+    SET_SIZE(p, len);
+    return p;
+}
+
+extern void debug(struct SN_env * z, int number, int line_count)
+{   int i;
+    int limit = SIZE(z->p);
+    /*if (number >= 0) printf("%3d (line %4d): '", number, line_count);*/
+    if (number >= 0) printf("%3d (line %4d): [%d]'", number, line_count,limit);
+    for (i = 0; i <= limit; i++)
+    {   if (z->lb == i) printf("{");
+        if (z->bra == i) printf("[");
+        if (z->c == i) printf("|");
+        if (z->ket == i) printf("]");
+        if (z->l == i) printf("}");
+        if (i < limit)
+        {   int ch = z->p[i];
+            if (ch == 0) ch = '#';
+            printf("%c", ch);
+        }
+    }
+    printf("'\n");
+}


diff --git a/contrib/tsearch2/sql/tsearch2.sql b/contrib/tsearch2/sql/tsearch2.sql

new file mode 100644 (file)

index 0000000..6ca6480


--- /dev/null
+++ b/contrib/tsearch2/sql/tsearch2.sql
@@ -0,0 +1,243 @@
+--
+-- first, define the datatype.  Turn off echoing so that expected file
+-- does not depend on contents of seg.sql.
+--
+\set ECHO none
+\i tsearch2.sql
+\set ECHO all
+
+--tsvector
+SELECT '1'::tsvector;
+SELECT '1 '::tsvector;
+SELECT ' 1'::tsvector;
+SELECT ' 1 '::tsvector;
+SELECT '1 2'::tsvector;
+SELECT '\'1 2\''::tsvector;
+SELECT '\'1 \\\'2\''::tsvector;
+SELECT '\'1 \\\'2\'3'::tsvector;
+SELECT '\'1 \\\'2\' 3'::tsvector;
+SELECT '\'1 \\\'2\' \' 3\' 4 '::tsvector;
+select '\'w\':4A,3B,2C,1D,5 a:8';
+select 'a:3A b:2a'::tsvector || 'ba:1234 a:1B';
+select setweight('w:12B w:13* w:12,5,6 a:1,3* a:3 w asd:1dc asd zxc:81,567,222A'::tsvector, 'c');
+select strip('w:12B w:13* w:12,5,6 a:1,3* a:3 w asd:1dc asd'::tsvector);
+
+
+--tsquery
+SELECT '1'::tsquery;
+SELECT '1 '::tsquery;
+SELECT ' 1'::tsquery;
+SELECT ' 1 '::tsquery;
+SELECT '\'1 2\''::tsquery;
+SELECT '\'1 \\\'2\''::tsquery;
+SELECT '!1'::tsquery;
+SELECT '1|2'::tsquery;
+SELECT '1|!2'::tsquery;
+SELECT '!1|2'::tsquery;
+SELECT '!1|!2'::tsquery;
+SELECT '!(!1|!2)'::tsquery;
+SELECT '!(!1|2)'::tsquery;
+SELECT '!(1|!2)'::tsquery;
+SELECT '!(1|2)'::tsquery;
+SELECT '1&2'::tsquery;
+SELECT '!1&2'::tsquery;
+SELECT '1&!2'::tsquery;
+SELECT '!1&!2'::tsquery;
+SELECT '(1&2)'::tsquery;
+SELECT '1&(2)'::tsquery;
+SELECT '!(1)&2'::tsquery;
+SELECT '!(1&2)'::tsquery;
+SELECT '1|2&3'::tsquery;
+SELECT '1|(2&3)'::tsquery;
+SELECT '(1|2)&3'::tsquery;
+SELECT '1|2&!3'::tsquery;
+SELECT '1|!2&3'::tsquery;
+SELECT '!1|2&3'::tsquery;
+SELECT '!1|(2&3)'::tsquery;
+SELECT '!(1|2)&3'::tsquery;
+SELECT '(!1|2)&3'::tsquery;
+SELECT '1|(2|(4|(5|6)))'::tsquery;
+SELECT '1|2|4|5|6'::tsquery;
+SELECT '1&(2&(4&(5&6)))'::tsquery;
+SELECT '1&2&4&5&6'::tsquery;
+SELECT '1&(2&(4&(5|6)))'::tsquery;
+SELECT '1&(2&(4&(5|!6)))'::tsquery;
+SELECT '1&(\'2\'&(\' 4\'&(\\|5 | \'6 \\\' !|&\')))'::tsquery;
+SELECT '\'the wether\':dc & \' sKies \':BC & a:d b:a';
+
+select lexize('simple', 'ASD56 hsdkf');
+select lexize('en_stem', 'SKIES Problems identity');
+
+select * from token_type('default');
+select * from parse('default', '345 [email protected] \' http://www.com/ http://aew.werc.ewr/?ad=qwe&dw 1aew.werc.ewr/?ad=qwe&dw 2aew.werc.ewr http://3aew.werc.ewr/?ad=qwe&dw http://4aew.werc.ewr http://5aew.werc.ewr:8100/?  ad=qwe&dw 6aew.werc.ewr:8100/?ad=qwe&dw 7aew.werc.ewr:8100/?ad=qwe&dw=%20%32 +4.0e-10 qwe qwe qwqwe 234.435 455 5.005 [email protected] qwe-wer asdf qwer jf sdjk ewr1> ewri2 ">
+/usr/local/fff /awdf/dwqe/4325 rewt/ewr wefjn /wqe-324/ewr gist.h gist.h.c gist.c. readline 4.2 4.2. 4.2, readline-4.2 readline-4.2. 234 
+ wow  < jqw <> qwerty');
+
+SELECT to_tsvector('default', '345 [email protected] \' http://www.com/ http://aew.werc.ewr/?ad=qwe&dw 1aew.werc.ewr/?ad=qwe&dw 2aew.werc.ewr http://3aew.werc.ewr/?ad=qwe&dw http://4aew.werc.ewr http://5aew.werc.ewr:8100/?  ad=qwe&dw 6aew.werc.ewr:8100/?ad=qwe&dw 7aew.werc.ewr:8100/?ad=qwe&dw=%20%32 +4.0e-10 qwe qwe qwqwe 234.435 455 5.005 [email protected] qwe-wer asdf qwer jf sdjk ewr1> ewri2 ">
+/usr/local/fff /awdf/dwqe/4325 rewt/ewr wefjn /wqe-324/ewr gist.h gist.h.c gist.c. readline 4.2 4.2. 4.2, readline-4.2 readline-4.2. 234 
+ wow  < jqw <> qwerty');
+
+SELECT length(to_tsvector('default', '345 qw'));
+
+SELECT length(to_tsvector('default', '345 [email protected] \' http://www.com/ http://aew.werc.ewr/?ad=qwe&dw 1aew.werc.ewr/?ad=qwe&dw 2aew.werc.ewr http://3aew.werc.ewr/?ad=qwe&dw http://4aew.werc.ewr http://5aew.werc.ewr:8100/?  ad=qwe&dw 6aew.werc.ewr:8100/?ad=qwe&dw 7aew.werc.ewr:8100/?ad=qwe&dw=%20%32 +4.0e-10 qwe qwe qwqwe 234.435 455 5.005 [email protected] qwe-wer asdf qwer jf sdjk ewr1> ewri2 ">
+/usr/local/fff /awdf/dwqe/4325 rewt/ewr wefjn /wqe-324/ewr gist.h gist.h.c gist.c. readline 4.2 4.2. 4.2, readline-4.2 readline-4.2. 234 
+ wow  < jqw <> qwerty'));
+
+
+select to_tsquery('default', 'qwe & sKies '); 
+select to_tsquery('simple', 'qwe & sKies '); 
+select to_tsquery('default', '\'the wether\':dc & \'           sKies \':BC ');
+select 'a b:89  ca:23A,64b d:34c'::tsvector @@ 'd:AC & ca';
+select 'a b:89  ca:23A,64b d:34c'::tsvector @@ 'd:AC & ca:B';
+select 'a b:89  ca:23A,64b d:34c'::tsvector @@ 'd:AC & ca:A';
+select 'a b:89  ca:23A,64b d:34c'::tsvector @@ 'd:AC & ca:C';
+select 'a b:89  ca:23A,64b d:34c'::tsvector @@ 'd:AC & ca:CB';
+
+CREATE TABLE test_tsvector( t text, a tsvector );
+
+\copy test_tsvector from 'data/test_tsearch.data'
+
+SELECT count(*) FROM test_tsvector WHERE a @@ 'wr|qh';
+SELECT count(*) FROM test_tsvector WHERE a @@ 'wr&qh';
+SELECT count(*) FROM test_tsvector WHERE a @@ 'eq&yt';
+SELECT count(*) FROM test_tsvector WHERE a @@ 'eq|yt';
+SELECT count(*) FROM test_tsvector WHERE a @@ '(eq&yt)|(wr&qh)';
+SELECT count(*) FROM test_tsvector WHERE a @@ '(eq|yt)&(wr|qh)';
+
+create index wowidx on test_tsvector using gist (a);
+set enable_seqscan=off;
+
+SELECT count(*) FROM test_tsvector WHERE a @@ 'wr|qh';
+SELECT count(*) FROM test_tsvector WHERE a @@ 'wr&qh';
+SELECT count(*) FROM test_tsvector WHERE a @@ 'eq&yt';
+SELECT count(*) FROM test_tsvector WHERE a @@ 'eq|yt';
+SELECT count(*) FROM test_tsvector WHERE a @@ '(eq&yt)|(wr&qh)';
+SELECT count(*) FROM test_tsvector WHERE a @@ '(eq|yt)&(wr|qh)';
+
+select set_curcfg('default');
+
+CREATE TRIGGER tsvectorupdate
+BEFORE UPDATE OR INSERT ON test_tsvector
+FOR EACH ROW EXECUTE PROCEDURE tsearch2(a, t);
+
+SELECT count(*) FROM test_tsvector WHERE a @@ to_tsquery('345&qwerty');
+
+INSERT INTO test_tsvector (t) VALUES ('345 qwerty');
+
+SELECT count(*) FROM test_tsvector WHERE a @@ to_tsquery('345&qwerty');
+
+UPDATE test_tsvector SET t = null WHERE t = '345 qwerty';
+
+SELECT count(*) FROM test_tsvector WHERE a @@ to_tsquery('345&qwerty');
+
+drop trigger tsvectorupdate on test_tsvector;
+create function wow(text) returns text as 'select $1 || \' copyright\'; ' language sql;
+create trigger tsvectorupdate before update or insert on test_tsvector
+for each row execute procedure tsearch2(a, wow, t);
+insert into test_tsvector (t) values ('345 qwerty');
+select count(*) FROM test_tsvector WHERE a @@ to_tsquery('345&qwerty');
+select count(*) FROM test_tsvector WHERE a @@ to_tsquery('copyright');
+
+select rank(' a:1 s:2C d g'::tsvector, 'a | s');
+select rank(' a:1 s:2B d g'::tsvector, 'a | s');
+select rank(' a:1 s:2 d g'::tsvector, 'a | s');
+select rank(' a:1 s:2C d g'::tsvector, 'a & s');
+select rank(' a:1 s:2B d g'::tsvector, 'a & s');
+select rank(' a:1 s:2 d g'::tsvector, 'a & s');
+
+insert into test_tsvector (t) values ('foo bar foo the over foo qq bar');
+select * from stat('select a from test_tsvector') order by ndoc desc, nentry desc, word;
+
+select reset_tsearch();
+select to_tsquery('default', 'skies & books');
+
+select rank_cd(to_tsvector('Erosion It took the sea a thousand years,
+A thousand years to trace
+The granite features of this cliff
+In crag and scarp and base.
+It took the sea an hour one night
+An hour of storm to place
+The sculpture of these granite seams,
+Upon a woman s face. E.  J.  Pratt  (1882 1964)
+'), to_tsquery('sea&thousand&years'));
+
+select rank_cd(to_tsvector('Erosion It took the sea a thousand years,
+A thousand years to trace
+The granite features of this cliff
+In crag and scarp and base.
+It took the sea an hour one night
+An hour of storm to place
+The sculpture of these granite seams,
+Upon a woman s face. E.  J.  Pratt  (1882 1964)
+'), to_tsquery('granite&sea'));
+
+select rank_cd(to_tsvector('Erosion It took the sea a thousand years,
+A thousand years to trace
+The granite features of this cliff
+In crag and scarp and base.
+It took the sea an hour one night
+An hour of storm to place
+The sculpture of these granite seams,
+Upon a woman s face. E.  J.  Pratt  (1882 1964)
+'), to_tsquery('sea'));
+
+select get_covers(to_tsvector('Erosion It took the sea a thousand years,
+A thousand years to trace
+The granite features of this cliff
+In crag and scarp and base.
+It took the sea an hour one night
+An hour of storm to place
+The sculpture of these granite seams,
+Upon a woman s face. E.  J.  Pratt  (1882 1964)
+'), to_tsquery('sea&thousand&years'));
+
+select get_covers(to_tsvector('Erosion It took the sea a thousand years,
+A thousand years to trace
+The granite features of this cliff
+In crag and scarp and base.
+It took the sea an hour one night
+An hour of storm to place
+The sculpture of these granite seams,
+Upon a woman s face. E.  J.  Pratt  (1882 1964)
+'), to_tsquery('granite&sea'));
+
+select get_covers(to_tsvector('Erosion It took the sea a thousand years,
+A thousand years to trace
+The granite features of this cliff
+In crag and scarp and base.
+It took the sea an hour one night
+An hour of storm to place
+The sculpture of these granite seams,
+Upon a woman s face. E.  J.  Pratt  (1882 1964)
+'), to_tsquery('sea'));
+
+select headline('Erosion It took the sea a thousand years,
+A thousand years to trace
+The granite features of this cliff
+In crag and scarp and base.
+It took the sea an hour one night
+An hour of storm to place
+The sculpture of these granite seams,
+Upon a woman s face. E.  J.  Pratt  (1882 1964)
+', to_tsquery('sea&thousand&years'));
+ 
+select headline('Erosion It took the sea a thousand years,
+A thousand years to trace
+The granite features of this cliff
+In crag and scarp and base.
+It took the sea an hour one night
+An hour of storm to place
+The sculpture of these granite seams,
+Upon a woman s face. E.  J.  Pratt  (1882 1964)
+', to_tsquery('granite&sea'));
+ 
+select headline('Erosion It took the sea a thousand years,
+A thousand years to trace
+The granite features of this cliff
+In crag and scarp and base.
+It took the sea an hour one night
+An hour of storm to place
+The sculpture of these granite seams,
+Upon a woman s face. E.  J.  Pratt  (1882 1964)
+', to_tsquery('sea'));
+


diff --git a/contrib/tsearch2/stopword.c b/contrib/tsearch2/stopword.c

new file mode 100644 (file)

index 0000000..7f7806f


--- /dev/null
+++ b/contrib/tsearch2/stopword.c
@@ -0,0 +1,101 @@
+/* 
+ * stopword library
+ * Teodor Sigaev 
+ */
+#include 
+#include 
+#include 
+#include 
+
+#include "postgres.h"
+#include "common.h"
+#include "dict.h"
+
+#define STOPBUFLEN 4096
+
+char*
+lowerstr(char *str) {
+   char *ptr=str;
+   while(*ptr) {
+       *ptr = tolower(*(unsigned char*)ptr);
+       ptr++;
+   }
+   return str;
+}
+
+void
+freestoplist(StopList *s) {
+   char **ptr=s->stop;
+   if ( ptr )
+       while( *ptr && s->len >0 ) {
+           free(*ptr);
+           ptr++; s->len--;
+       free(s->stop);
+   }
+   memset(s,0,sizeof(StopList));
+}
+
+void
+readstoplist(text *in, StopList *s) {
+   char **stop=NULL;
+   s->len=0;
+   if ( in && VARSIZE(in) - VARHDRSZ > 0 ) {
+       char *filename=text2char(in);
+       FILE    *hin=NULL;
+       char    buf[STOPBUFLEN];
+       int reallen=0;
+
+       if ( (hin=fopen(filename,"r")) == NULL )
+           elog(ERROR,"Can't open file '%s': %s", filename, strerror(errno));
+       while( fgets(buf,STOPBUFLEN,hin) ) {
+           buf[strlen(buf)-1] = '\0';
+           if ( *buf=='\0' ) continue;
+
+           if ( s->len>= reallen ) {
+               char **tmp;
+               reallen=(reallen) ? reallen*2 : 16;
+               tmp=(char**)realloc((void*)stop, sizeof(char*)*reallen);
+               if (!tmp) {
+                   freestoplist(s);
+                   fclose(hin); 
+                   elog(ERROR,"Not enough memory");
+               }
+               stop=tmp;
+           }
+    
+           stop[s->len]=strdup(buf);
+           if ( !stop[s->len] ) {
+               freestoplist(s);
+               fclose(hin); 
+               elog(ERROR,"Not enough memory");
+           }
+           if ( s->wordop ) 
+               stop[s->len]=(s->wordop)(stop[s->len]);
+
+           (s->len)++; 
+       }
+       fclose(hin);
+       pfree(filename); 
+   }
+   s->stop=stop;
+} 
+
+static int
+comparestr(const void *a, const void *b) {
+   return strcmp( *(char**)a, *(char**)b );
+}
+
+void
+sortstoplist(StopList *s) {
+   if (s->stop && s->len>0)
+       qsort(s->stop, s->len, sizeof(char*), comparestr);
+}
+
+bool
+searchstoplist(StopList *s, char *key) {
+   if ( s->wordop ) 
+       key=(*(s->wordop))(key);
+   return ( s->stop && s->len>0 && bsearch(&key, s->stop, s->len, sizeof(char*), comparestr) ) ? true : false;
+}
+
+


diff --git a/contrib/tsearch2/stopword/english.stop b/contrib/tsearch2/stopword/english.stop

new file mode 100644 (file)

index 0000000..a913011


--- /dev/null
+++ b/contrib/tsearch2/stopword/english.stop
@@ -0,0 +1,128 @@
+i
+me
+my
+myself
+we
+our
+ours
+ourselves
+you
+your
+yours
+yourself
+yourselves
+he
+him
+his
+himself
+she
+her
+hers
+herself
+it
+its
+itself
+they
+them
+their
+theirs
+themselves
+what
+which
+who
+whom
+this
+that
+these
+those
+am
+is
+are
+was
+were
+be
+been
+being
+have
+has
+had
+having
+do
+does
+did
+doing
+a
+an
+the
+and
+but
+if
+or
+because
+as
+until
+while
+of
+at
+by
+for
+with
+about
+against
+between
+into
+through
+during
+before
+after
+above
+below
+to
+from
+up
+down
+in
+out
+on
+off
+over
+under
+again
+further
+then
+once
+here
+there
+when
+where
+why
+how
+all
+any
+both
+each
+few
+more
+most
+other
+some
+such
+no
+nor
+not
+only
+own
+same
+so
+than
+too
+very
+s
+t
+can
+will
+just
+don
+should
+now
+


diff --git a/contrib/tsearch2/stopword/russian.stop b/contrib/tsearch2/stopword/russian.stop

new file mode 100644 (file)

index 0000000..1877e3a


--- /dev/null
+++ b/contrib/tsearch2/stopword/russian.stop
@@ -0,0 +1,151 @@
+É
+×
+×Ï
+ÎÅ
+ÞÔÏ
+ÏÎ
+ÎÁ
+Ñ
+Ó
+ÓÏ
+ËÁË
+Á
+ÔÏ
+×ÓÅ
+ÏÎÁ
+ÔÁË
+ÅÇÏ
+ÎÏ
+ÄÁ
+ÔÙ
+Ë
+Õ
+ÖÅ
+×Ù
+ÚÁ
+ÂÙ
+ÐÏ
+ÔÏÌØËÏ
+ÅÅ
+ÍÎÅ
+ÂÙÌÏ
+×ÏÔ
+ÏÔ
+ÍÅÎÑ
+ÅÝÅ
+ÎÅÔ
+Ï
+ÉÚ
+ÅÍÕ
+ÔÅÐÅÒØ
+ËÏÇÄÁ
+ÄÁÖÅ
+ÎÕ
+×ÄÒÕÇ
+ÌÉ
+ÅÓÌÉ
+ÕÖÅ
+ÉÌÉ
+ÎÉ
+ÂÙÔØ
+ÂÙÌ
+ÎÅÇÏ
+ÄÏ
+×ÁÓ
+ÎÉÂÕÄØ
+ÏÐÑÔØ
+ÕÖ
+×ÁÍ
+×ÅÄØ
+ÔÁÍ
+ÐÏÔÏÍ
+ÓÅÂÑ
+ÎÉÞÅÇÏ
+ÅÊ
+ÍÏÖÅÔ
+ÏÎÉ
+ÔÕÔ
+ÇÄÅ
+ÅÓÔØ
+ÎÁÄÏ
+ÎÅÊ
+ÄÌÑ
+ÍÙ
+ÔÅÂÑ
+ÉÈ
+ÞÅÍ
+ÂÙÌÁ
+ÓÁÍ
+ÞÔÏÂ
+ÂÅÚ
+ÂÕÄÔÏ
+ÞÅÇÏ
+ÒÁÚ
+ÔÏÖÅ
+ÓÅÂÅ
+ÐÏÄ
+ÂÕÄÅÔ
+Ö
+ÔÏÇÄÁ
+ËÔÏ
+ÜÔÏÔ
+ÔÏÇÏ
+ÐÏÔÏÍÕ
+ÜÔÏÇÏ
+ËÁËÏÊ
+ÓÏ×ÓÅÍ
+ÎÉÍ
+ÚÄÅÓØ
+ÜÔÏÍ
+ÏÄÉÎ
+ÐÏÞÔÉ
+ÍÏÊ
+ÔÅÍ
+ÞÔÏÂÙ
+ÎÅÅ
+ÓÅÊÞÁÓ
+ÂÙÌÉ
+ËÕÄÁ
+ÚÁÞÅÍ
+×ÓÅÈ
+ÎÉËÏÇÄÁ
+ÍÏÖÎÏ
+ÐÒÉ
+ÎÁËÏÎÅÃ
+Ä×Á
+ÏÂ
+ÄÒÕÇÏÊ
+ÈÏÔØ
+ÐÏÓÌÅ
+ÎÁÄ
+ÂÏÌØÛÅ
+ÔÏÔ
+ÞÅÒÅÚ
+ÜÔÉ
+ÎÁÓ
+ÐÒÏ
+×ÓÅÇÏ
+ÎÉÈ
+ËÁËÁÑ
+ÍÎÏÇÏ
+ÒÁÚ×Å
+ÔÒÉ
+ÜÔÕ
+ÍÏÑ
+×ÐÒÏÞÅÍ
+ÈÏÒÏÛÏ
+Ó×ÏÀ
+ÜÔÏÊ
+ÐÅÒÅÄ
+ÉÎÏÇÄÁ
+ÌÕÞÛÅ
+ÞÕÔØ
+ÔÏÍ
+ÎÅÌØÚÑ
+ÔÁËÏÊ
+ÉÍ
+ÂÏÌÅÅ
+×ÓÅÇÄÁ
+ËÏÎÅÞÎÏ
+×ÓÀ
+ÍÅÖÄÕ


diff --git a/contrib/tsearch2/ts_cfg.c b/contrib/tsearch2/ts_cfg.c

new file mode 100644 (file)

index 0000000..7c9f20c


--- /dev/null
+++ b/contrib/tsearch2/ts_cfg.c
@@ -0,0 +1,509 @@
+/* 
+ * interface functions to tscfg 
+ * Teodor Sigaev 
+ */
+#include 
+#include 
+#include 
+#include 
+#include 
+
+#include "postgres.h"
+#include "fmgr.h"
+#include "utils/array.h"
+#include "catalog/pg_type.h"
+#include "executor/spi.h"
+
+#include "ts_cfg.h"
+#include "dict.h"
+#include "wparser.h"
+#include "snmap.h"
+#include "common.h"
+#include "tsvector.h"
+
+/*********top interface**********/
+
+static void *plan_getcfg_bylocale=NULL;
+static void *plan_getcfg=NULL;
+static void *plan_getmap=NULL;
+static void *plan_name2id=NULL;
+static Oid current_cfg_id=0;
+
+void
+init_cfg(Oid id, TSCfgInfo *cfg) {
+   Oid arg[2]={ OIDOID, OIDOID };
+   bool isnull;
+   Datum pars[2]={ ObjectIdGetDatum(id), ObjectIdGetDatum(id) } ;
+   int stat,i,j;
+   text *ptr;
+   text *prsname=NULL;
+   MemoryContext   oldcontext;
+
+   memset(cfg,0,sizeof(TSCfgInfo));
+   SPI_connect();
+   if ( !plan_getcfg ) {
+       plan_getcfg = SPI_saveplan( SPI_prepare( "select prs_name from pg_ts_cfg where oid = $1" , 1, arg ) );
+       if ( !plan_getcfg ) 
+           ts_error(ERROR, "SPI_prepare() failed");
+   }
+
+   stat = SPI_execp(plan_getcfg, pars, " ", 1);
+   if ( stat < 0 )
+       ts_error (ERROR, "SPI_execp return %d", stat);
+   if ( SPI_processed > 0 ) {
+       prsname = (text*) DatumGetPointer( 
+           SPI_getbinval(SPI_tuptable->vals[0], SPI_tuptable->tupdesc, 1, &isnull) 
+       );
+       oldcontext = MemoryContextSwitchTo(TopMemoryContext);
+       prsname = ptextdup( prsname );
+       MemoryContextSwitchTo(oldcontext);
+       
+       cfg->id=id;
+   } else 
+       ts_error(ERROR, "No tsearch cfg with id %d", id);
+
+   arg[0]=TEXTOID;
+   if ( !plan_getmap ) {
+       plan_getmap = SPI_saveplan( SPI_prepare( "select lt.tokid, pg_ts_cfgmap.dict_name from pg_ts_cfgmap, pg_ts_cfg, token_type( $1 ) as lt where lt.alias = pg_ts_cfgmap.tok_alias and pg_ts_cfgmap.ts_name = pg_ts_cfg.ts_name and pg_ts_cfg.oid= $2 order by lt.tokid desc;" , 2, arg ) );
+       if ( !plan_getmap )
+           ts_error(ERROR, "SPI_prepare() failed");
+   }
+
+   pars[0]=PointerGetDatum( prsname );
+   stat = SPI_execp(plan_getmap, pars, " ", 0);
+   if ( stat < 0 )
+       ts_error (ERROR, "SPI_execp return %d", stat);
+   if ( SPI_processed <= 0 )
+       ts_error(ERROR, "No parser with id %d", id);
+
+   for(i=0;i
+       int lexid = DatumGetInt32(SPI_getbinval(SPI_tuptable->vals[i], SPI_tuptable->tupdesc, 1, &isnull));
+       ArrayType *toasted_a = (ArrayType*)PointerGetDatum(SPI_getbinval(SPI_tuptable->vals[i], SPI_tuptable->tupdesc, 2, &isnull));
+       ArrayType *a;
+
+       if ( !cfg->map ) {
+           cfg->len=lexid+1;
+           cfg->map = (ListDictionary*)malloc( sizeof(ListDictionary)*cfg->len );
+           if ( !cfg->map )
+               ts_error(ERROR,"No memory");
+           memset( cfg->map, 0, sizeof(ListDictionary)*cfg->len );
+       }
+
+       if (isnull)
+           continue;
+
+       a=(ArrayType*)PointerGetDatum( PG_DETOAST_DATUM( DatumGetPointer(toasted_a) ) );
+       
+       if ( ARR_NDIM(a) != 1 )
+           ts_error(ERROR,"Wrong dimension");
+       if ( ARRNELEMS(a) < 1 )
+           continue;
+
+       cfg->map[lexid].len=ARRNELEMS(a);
+       cfg->map[lexid].dict_id=(Datum*)malloc( sizeof(Datum)*cfg->map[lexid].len );
+       memset(cfg->map[lexid].dict_id,0,sizeof(Datum)*cfg->map[lexid].len );
+       ptr=(text*)ARR_DATA_PTR(a);
+       oldcontext = MemoryContextSwitchTo(TopMemoryContext);
+       for(j=0;jmap[lexid].len;j++) {
+           cfg->map[lexid].dict_id[j] = PointerGetDatum(ptextdup(ptr));
+           ptr=NEXTVAL(ptr);
+       } 
+       MemoryContextSwitchTo(oldcontext);
+
+       if ( a != toasted_a ) 
+           pfree(a);
+   }
+   
+   SPI_finish();
+   cfg->prs_id = name2id_prs( prsname );
+   pfree(prsname);
+   for(i=0;ilen;i++) {
+       for(j=0;jmap[i].len;j++) {
+           ptr = (text*)DatumGetPointer( cfg->map[i].dict_id[j] );
+           cfg->map[i].dict_id[j] = ObjectIdGetDatum( name2id_dict(ptr) );
+           pfree(ptr);
+       }
+   }
+}
+
+typedef struct {
+   TSCfgInfo   *last_cfg;
+   int     len;
+   int     reallen;
+   TSCfgInfo   *list;
+   SNMap       name2id_map;
+} CFGList;
+
+static CFGList CList = {NULL,0,0,NULL,{0,0,NULL}};
+
+void
+reset_cfg(void) {
+        freeSNMap( &(CList.name2id_map) );
+        if ( CList.list ) {
+       int i,j;
+       for(i=0;i
+           if ( CList.list[i].map ) {
+               for(j=0;j
+                   if ( CList.list[i].map[j].dict_id )
+                       free(CList.list[i].map[j].dict_id);
+               free( CList.list[i].map );
+           }
+                free(CList.list);
+   }
+        memset(&CList,0,sizeof(CFGList));
+}
+
+static int
+comparecfg(const void *a, const void *b) {
+   return ((TSCfgInfo*)a)->id - ((TSCfgInfo*)b)->id;
+}
+
+TSCfgInfo *
+findcfg(Oid id) {
+   /* last used cfg */
+   if ( CList.last_cfg && CList.last_cfg->id==id )
+       return CList.last_cfg;
+
+   /* already used cfg */
+   if ( CList.len != 0 ) {
+       TSCfgInfo key;
+       key.id=id;
+       CList.last_cfg = bsearch(&key, CList.list, CList.len, sizeof(TSCfgInfo), comparecfg);
+       if ( CList.last_cfg != NULL )
+           return CList.last_cfg;
+   }
+
+   /* last chance */
+   if ( CList.len==CList.reallen ) {
+       TSCfgInfo *tmp;
+       int reallen = ( CList.reallen ) ? 2*CList.reallen : 16;
+       tmp=(TSCfgInfo*)realloc(CList.list,sizeof(TSCfgInfo)*reallen);
+       if ( !tmp ) 
+           ts_error(ERROR,"No memory");
+       CList.reallen=reallen;
+       CList.list=tmp;
+   }
+   CList.last_cfg=&(CList.list[CList.len]);
+   init_cfg(id, CList.last_cfg);
+   CList.len++;
+   qsort(CList.list, CList.len, sizeof(TSCfgInfo), comparecfg);
+   return findcfg(id); /* qsort changed order!! */;
+}
+
+
+Oid
+name2id_cfg(text *name) {
+   Oid arg[1]={ TEXTOID };
+   bool isnull;
+   Datum pars[1]={ PointerGetDatum(name) };
+   int stat;
+   Oid id=findSNMap_t( &(CList.name2id_map), name );
+   
+   if ( id ) 
+       return id;
+   
+   SPI_connect();
+   if ( !plan_name2id ) {
+       plan_name2id = SPI_saveplan( SPI_prepare( "select oid from pg_ts_cfg where ts_name = $1" , 1, arg ) );
+       if ( !plan_name2id ) 
+           elog(ERROR, "SPI_prepare() failed");
+   }
+
+   stat = SPI_execp(plan_name2id, pars, " ", 1);
+   if ( stat < 0 )
+       elog (ERROR, "SPI_execp return %d", stat);
+   if ( SPI_processed > 0 ) {
+       id=DatumGetObjectId( SPI_getbinval(SPI_tuptable->vals[0], SPI_tuptable->tupdesc, 1, &isnull) );
+       if ( isnull ) 
+           elog(ERROR, "Null id for tsearch config");
+   } else 
+       elog(ERROR, "No tsearch config");
+   SPI_finish();
+   addSNMap_t( &(CList.name2id_map), name, id );
+   return id;
+}
+
+
+void 
+parsetext_v2(TSCfgInfo *cfg, PRSTEXT * prs, char *buf, int4 buflen) {
+   int type, lenlemm, i;
+   char    *lemm=NULL;
+   WParserInfo *prsobj = findprs(cfg->prs_id);
+
+   prsobj->prs=(void*)DatumGetPointer(
+       FunctionCall2(
+           &(prsobj->start_info),
+           PointerGetDatum(buf),
+           Int32GetDatum(buflen)
+       )
+   );
+
+   while( ( type=DatumGetInt32(FunctionCall3(
+           &(prsobj->getlexeme_info),
+           PointerGetDatum(prsobj->prs),
+           PointerGetDatum(&lemm),
+           PointerGetDatum(&lenlemm))) ) != 0 ) {
+
+       if ( lenlemm >= MAXSTRLEN )
+           elog(ERROR, "Word is too long");
+
+
+       if ( type >= cfg->len ) /* skip this type of lexem */
+           continue; 
+
+       for(i=0;imap[type].len;i++) {
+           DictInfo    *dict=finddict( DatumGetObjectId(cfg->map[type].dict_id[i]) );
+           char    **norms, **ptr;
+   
+           norms = ptr = (char**)DatumGetPointer(
+               FunctionCall3(
+                   &(dict->lexize_info),
+                   PointerGetDatum(dict->dictionary),
+                   PointerGetDatum(lemm),
+                   PointerGetDatum(lenlemm)
+               )
+           );
+           if ( !norms ) /* dictionary doesn't know this lexem */
+               continue;
+
+           prs->pos++; /*set pos*/
+
+           while( *ptr ) {
+               if (prs->curwords == prs->lenwords) {
+                   prs->lenwords *= 2;
+                   prs->words = (WORD *) repalloc((void *) prs->words, prs->lenwords * sizeof(WORD));
+               }
+
+               prs->words[prs->curwords].len = strlen(*ptr);
+               prs->words[prs->curwords].word = *ptr;
+               prs->words[prs->curwords].alen = 0;
+               prs->words[prs->curwords].pos.pos = LIMITPOS(prs->pos);
+               ptr++;
+               prs->curwords++;
+           }
+           pfree(norms);
+           break; /* lexem already normalized or is stop word*/
+       }
+   }
+
+   FunctionCall1(
+       &(prsobj->end_info),
+       PointerGetDatum(prsobj->prs)
+   );
+}
+
+static void
+hladdword(HLPRSTEXT * prs, char *buf, int4 buflen, int type) {
+   while (prs->curwords >= prs->lenwords) {
+       prs->lenwords *= 2;
+       prs->words = (HLWORD *) repalloc((void *) prs->words, prs->lenwords * sizeof(HLWORD));
+   }
+   memset( &(prs->words[prs->curwords]), 0, sizeof(HLWORD) ); 
+   prs->words[prs->curwords].type = (uint8)type;
+   prs->words[prs->curwords].len = buflen; 
+   prs->words[prs->curwords].word = palloc(buflen);
+   memcpy(prs->words[prs->curwords].word, buf, buflen);
+   prs->curwords++;    
+}
+
+static void
+hlfinditem(HLPRSTEXT * prs, QUERYTYPE *query, char *buf, int buflen ) {
+   int i;
+   ITEM    *item=GETQUERY(query);
+   HLWORD  *word=&( prs->words[prs->curwords-1] );
+
+   while (prs->curwords + query->size >= prs->lenwords) {
+       prs->lenwords *= 2;
+       prs->words = (HLWORD *) repalloc((void *) prs->words, prs->lenwords * sizeof(HLWORD));
+   }
+
+   for(i=0; isize; i++) { 
+       if ( item->type == VAL && item->length == buflen && strncmp( GETOPERAND(query) + item->distance, buf, buflen )==0 ) {
+           if ( word->item ) {
+               memcpy( &(prs->words[prs->curwords]), word, sizeof(HLWORD) );
+               prs->words[prs->curwords].item=item;
+               prs->words[prs->curwords].repeated=1;
+               prs->curwords++;
+           } else 
+               word->item=item;    
+       }
+       item++;
+   }
+}
+
+void 
+hlparsetext(TSCfgInfo *cfg, HLPRSTEXT * prs, QUERYTYPE *query, char *buf, int4 buflen) {
+   int type, lenlemm, i;
+   char    *lemm=NULL;
+   WParserInfo *prsobj = findprs(cfg->prs_id);
+
+   prsobj->prs=(void*)DatumGetPointer(
+       FunctionCall2(
+           &(prsobj->start_info),
+           PointerGetDatum(buf),
+           Int32GetDatum(buflen)
+       )
+   );
+
+   while( ( type=DatumGetInt32(FunctionCall3(
+           &(prsobj->getlexeme_info),
+           PointerGetDatum(prsobj->prs),
+           PointerGetDatum(&lemm),
+           PointerGetDatum(&lenlemm))) ) != 0 ) {
+
+       if ( lenlemm >= MAXSTRLEN )
+           elog(ERROR, "Word is too long");
+
+       hladdword(prs,lemm,lenlemm,type);
+
+       if ( type >= cfg->len ) 
+           continue; 
+
+       for(i=0;imap[type].len;i++) {
+           DictInfo    *dict=finddict( DatumGetObjectId(cfg->map[type].dict_id[i]) );
+           char    **norms, **ptr;
+   
+           norms = ptr = (char**)DatumGetPointer(
+               FunctionCall3(
+                   &(dict->lexize_info),
+                   PointerGetDatum(dict->dictionary),
+                   PointerGetDatum(lemm),
+                   PointerGetDatum(lenlemm)
+               )
+           );
+           if ( !norms ) /* dictionary doesn't know this lexem */
+               continue;
+
+           while( *ptr ) {
+               hlfinditem(prs,query,*ptr,strlen(*ptr));
+               pfree(*ptr);
+               ptr++;
+           }
+           pfree(norms);
+           break; /* lexem already normalized or is stop word*/
+       }
+   }
+
+   FunctionCall1(
+       &(prsobj->end_info),
+       PointerGetDatum(prsobj->prs)
+   );
+}
+
+text* 
+genhl(HLPRSTEXT * prs) {
+   text *out;
+   int len=128;
+   char *ptr;
+   HLWORD  *wrd=prs->words;
+
+   out = (text*)palloc( len );
+   ptr=((char*)out) + VARHDRSZ;
+
+   while( wrd - prs->words < prs->curwords ) {
+       while (  wrd->len + prs->stopsellen + prs->startsellen + (ptr - ((char*)out)) >= len ) {
+           int dist = ptr - ((char*)out);
+           len*= 2;
+           out = (text *) repalloc(out, len);
+           ptr=((char*)out) + dist;
+       }
+
+       if ( wrd->in && !wrd->skip && !wrd->repeated ) {
+           if ( wrd->replace ) {
+               *ptr=' ';
+               ptr++;
+           } else {
+               if (wrd->selected) {
+                   memcpy(ptr,prs->startsel,prs->startsellen);
+                   ptr+=prs->startsellen;
+               }
+               memcpy(ptr,wrd->word,wrd->len);
+               ptr+=wrd->len;
+               if (wrd->selected) {
+                   memcpy(ptr,prs->stopsel,prs->stopsellen);
+                   ptr+=prs->stopsellen;
+               }
+           }
+       }
+
+       if ( !wrd->repeated )
+           pfree(wrd->word);
+
+       wrd++;
+   }
+
+   VARATT_SIZEP(out)=ptr - ((char*)out);
+   return out; 
+}
+
+int  
+get_currcfg(void) {
+   Oid arg[1]={ TEXTOID };
+   const char *curlocale;
+   Datum pars[1];
+   bool isnull;
+   int stat;
+
+   if ( current_cfg_id > 0 )
+       return current_cfg_id;
+
+   SPI_connect();
+   if ( !plan_getcfg_bylocale ) {
+       plan_getcfg_bylocale=SPI_saveplan( SPI_prepare( "select oid from pg_ts_cfg where locale = $1 ", 1, arg ) );
+       if ( !plan_getcfg_bylocale )
+           elog(ERROR, "SPI_prepare() failed");
+   }
+
+   curlocale = setlocale(LC_CTYPE, NULL);
+   pars[0] = PointerGetDatum( char2text((char*)curlocale) );
+   stat = SPI_execp(plan_getcfg_bylocale, pars, " ", 1);
+
+   if ( stat < 0 )
+       elog (ERROR, "SPI_execp return %d", stat);
+   if ( SPI_processed > 0 )
+       current_cfg_id = DatumGetObjectId( SPI_getbinval(SPI_tuptable->vals[0], SPI_tuptable->tupdesc, 1, &isnull) );
+   else 
+       elog(ERROR,"Can't find tsearch config by locale");
+
+   pfree(DatumGetPointer(pars[0]));
+   SPI_finish();
+   return current_cfg_id;
+}
+
+PG_FUNCTION_INFO_V1(set_curcfg);
+Datum set_curcfg(PG_FUNCTION_ARGS);
+Datum
+set_curcfg(PG_FUNCTION_ARGS) {
+        findcfg(PG_GETARG_OID(0));
+        current_cfg_id=PG_GETARG_OID(0);
+        PG_RETURN_VOID();
+}
+                
+PG_FUNCTION_INFO_V1(set_curcfg_byname);
+Datum set_curcfg_byname(PG_FUNCTION_ARGS);
+Datum
+set_curcfg_byname(PG_FUNCTION_ARGS) {
+        text *name=PG_GETARG_TEXT_P(0);
+   
+        DirectFunctionCall1(
+                set_curcfg,
+                ObjectIdGetDatum( name2id_cfg(name) )
+        );
+        PG_FREE_IF_COPY(name, 0);
+        PG_RETURN_VOID();      
+}       
+
+PG_FUNCTION_INFO_V1(show_curcfg);
+Datum show_curcfg(PG_FUNCTION_ARGS);
+Datum
+show_curcfg(PG_FUNCTION_ARGS) {
+   PG_RETURN_OID( get_currcfg() ); 
+}
+
+PG_FUNCTION_INFO_V1(reset_tsearch);
+Datum reset_tsearch(PG_FUNCTION_ARGS);
+Datum
+reset_tsearch(PG_FUNCTION_ARGS) {
+   ts_error(NOTICE,"TSearch cache cleaned");
+   PG_RETURN_VOID(); 
+}


diff --git a/contrib/tsearch2/ts_cfg.h b/contrib/tsearch2/ts_cfg.h

new file mode 100644 (file)

index 0000000..01006c1


--- /dev/null
+++ b/contrib/tsearch2/ts_cfg.h
@@ -0,0 +1,68 @@
+#ifndef __TS_CFG_H__
+#define __TS_CFG_H__
+#include "postgres.h"
+#include "query.h"
+
+typedef struct {
+   int len;
+   Datum   *dict_id;
+} ListDictionary;
+
+typedef struct {
+   Oid id;
+   Oid prs_id;
+   int len;
+   ListDictionary  *map;   
+}  TSCfgInfo;
+
+Oid name2id_cfg(text *name);
+TSCfgInfo * findcfg(Oid id);
+void init_cfg(Oid id, TSCfgInfo *cfg);
+void reset_cfg(void);
+
+typedef struct {
+        uint16          len;
+   union {
+       uint16      pos;
+       uint16      *apos;
+   } pos;
+        char       *word;
+   uint32  alen;
+}       WORD;
+   
+typedef struct {
+        WORD       *words;
+        int4            lenwords;
+        int4            curwords;
+   int4        pos;
+}       PRSTEXT;
+
+typedef struct {
+        uint16    len;
+   uint8    selected:1,
+         in:1,
+         skip:1,
+         replace:1,
+         repeated:1;
+   uint8   type;
+        char      *word;
+   ITEM      *item;
+}       HLWORD;
+   
+typedef struct {
+        HLWORD       *words;
+        int4            lenwords;
+        int4            curwords;
+        char           *startsel;
+        char            *stopsel;
+        int2            startsellen;
+        int2            stopsellen;
+}       HLPRSTEXT;
+
+void hlparsetext(TSCfgInfo *cfg, HLPRSTEXT * prs, QUERYTYPE *query, char *buf, int4 buflen);
+text* genhl(HLPRSTEXT * prs);
+
+void parsetext_v2(TSCfgInfo *cfg, PRSTEXT * prs, char *buf, int4 buflen);
+int  get_currcfg(void);
+
+#endif


diff --git a/contrib/tsearch2/ts_stat.c b/contrib/tsearch2/ts_stat.c

new file mode 100644 (file)

index 0000000..9099981


--- /dev/null
+++ b/contrib/tsearch2/ts_stat.c
@@ -0,0 +1,412 @@
+/*
+ * stat functions
+ */
+
+#include "tsvector.h"
+#include "ts_stat.h"
+#include "funcapi.h"
+#include "catalog/pg_type.h"
+#include "executor/spi.h"
+#include "common.h"
+
+PG_FUNCTION_INFO_V1(tsstat_in);
+Datum           tsstat_in(PG_FUNCTION_ARGS);
+Datum           
+tsstat_in(PG_FUNCTION_ARGS) {
+   tsstat *stat=palloc(STATHDRSIZE);
+   stat->len=STATHDRSIZE;
+   stat->size=0;
+   PG_RETURN_POINTER(stat);
+}
+
+PG_FUNCTION_INFO_V1(tsstat_out);
+Datum           tsstat_out(PG_FUNCTION_ARGS);
+Datum           
+tsstat_out(PG_FUNCTION_ARGS) {
+   elog(ERROR,"Unimplemented");
+   PG_RETURN_NULL();
+}
+
+static WordEntry**
+SEI_realloc( WordEntry** in, uint32 *len ) {
+   if ( *len==0 || in==NULL ) {
+       *len=8;
+       in=palloc( sizeof(WordEntry*)* (*len) );
+   } else {
+       *len *= 2;
+       in=repalloc( in, sizeof(WordEntry*)* (*len) );
+   }
+   return in;
+}
+
+static int
+compareStatWord(StatEntry *a, WordEntry *b, tsstat *stat, tsvector *txt) {
+   if ( a->len == b->len ) 
+       return strncmp(
+           STATSTRPTR(stat) + a->pos,
+           STRPTR(txt) + b->pos,
+           a->len
+       );
+   return ( a->len > b->len ) ? 1 : -1;
+}
+
+static tsstat*
+formstat(tsstat *stat, tsvector *txt, WordEntry** entry, uint32 len) {
+   tsstat  *newstat;
+   uint32 totallen, nentry;
+   uint32  slen=0;
+   WordEntry   **ptr=entry;
+   char    *curptr;
+   StatEntry   *sptr,*nptr;
+
+   while(ptr-entry
+       slen += (*ptr)->len;
+       ptr++;
+   }
+
+   nentry=stat->size + len;
+   slen+=STATSTRSIZE(stat);
+   totallen=CALCSTATSIZE(nentry,slen);
+   newstat=palloc(totallen);
+   newstat->len=totallen;
+   newstat->size=nentry;
+
+   memcpy(STATSTRPTR(newstat), STATSTRPTR(stat), STATSTRSIZE(stat));
+   curptr=STATSTRPTR(newstat) + STATSTRSIZE(stat);
+
+   ptr=entry;
+   sptr=STATPTR(stat);
+   nptr=STATPTR(newstat);
+
+   if ( len == 1 ) {
+       StatEntry *StopLow = STATPTR(stat);
+       StatEntry *StopHigh = (StatEntry*)STATSTRPTR(stat);
+
+       while (StopLow < StopHigh) {
+           sptr=StopLow + (StopHigh - StopLow) / 2;
+           if ( compareStatWord(sptr,*ptr,stat,txt) < 0 )
+               StopLow = sptr + 1;
+           else
+               StopHigh = sptr; 
+       }
+       nptr =STATPTR(newstat) + (StopLow-STATPTR(stat));
+       memcpy( STATPTR(newstat), STATPTR(stat), sizeof(StatEntry) * (StopLow-STATPTR(stat)) );
+       nptr->nentry=POSDATALEN(txt,*ptr);
+       if ( nptr->nentry==0 )
+               nptr->nentry=1; 
+       nptr->ndoc=1;
+       nptr->len=(*ptr)->len;
+       memcpy(curptr, STRPTR(txt) + (*ptr)->pos, nptr->len);
+       nptr->pos = curptr - STATSTRPTR(newstat);
+       memcpy( nptr+1, StopLow, sizeof(StatEntry) * ( ((StatEntry*)STATSTRPTR(stat))-StopLow ) );
+   } else {
+       while( sptr-STATPTR(stat) < stat->size && ptr-entry
+           if ( compareStatWord(sptr,*ptr,stat,txt) < 0 ) {
+               memcpy(nptr, sptr, sizeof(StatEntry));
+               sptr++;
+           } else {
+               nptr->nentry=POSDATALEN(txt,*ptr);
+               if ( nptr->nentry==0 )
+                   nptr->nentry=1; 
+               nptr->ndoc=1;
+               nptr->len=(*ptr)->len;
+               memcpy(curptr, STRPTR(txt) + (*ptr)->pos, nptr->len);
+               nptr->pos = curptr - STATSTRPTR(newstat);
+               curptr += nptr->len;
+               ptr++;
+           }
+           nptr++;
+       }
+
+       memcpy( nptr, sptr, sizeof(StatEntry)*( stat->size - (sptr-STATPTR(stat)) ) ); 
+       
+       while(ptr-entry
+           nptr->nentry=POSDATALEN(txt,*ptr);
+           if ( nptr->nentry==0 )
+               nptr->nentry=1; 
+           nptr->ndoc=1;
+           nptr->len=(*ptr)->len;
+           memcpy(curptr, STRPTR(txt) + (*ptr)->pos, nptr->len);
+           nptr->pos = curptr - STATSTRPTR(newstat);
+           curptr += nptr->len;
+           ptr++; nptr++;
+       }
+   }
+
+   return newstat;
+} 
+
+PG_FUNCTION_INFO_V1(ts_accum);
+Datum           ts_accum(PG_FUNCTION_ARGS);
+Datum 
+ts_accum(PG_FUNCTION_ARGS) {
+   tsstat *newstat,*stat= (tsstat*)PG_GETARG_POINTER(0);
+   tsvector  *txt = (tsvector *) PG_DETOAST_DATUM(PG_GETARG_DATUM(1));
+   WordEntry   **newentry=NULL;
+   uint32  len=0, cur=0;
+   StatEntry   *sptr;
+   WordEntry   *wptr;
+
+   if ( stat==NULL || PG_ARGISNULL(0) ) { /* Init in first */ 
+       stat=palloc(STATHDRSIZE);
+       stat->len=STATHDRSIZE;
+       stat->size=0;
+   }
+
+   /* simple check of correctness */
+   if ( txt==NULL || PG_ARGISNULL(1) || txt->size==0 ) {
+       PG_FREE_IF_COPY(txt,1); 
+       PG_RETURN_POINTER(stat);
+   }
+
+   sptr=STATPTR(stat);
+   wptr=ARRPTR(txt);
+
+   if ( stat->size < 100*txt->size ) { /* merge */
+       while( sptr-STATPTR(stat) < stat->size && wptr-ARRPTR(txt) < txt->size ) {
+           int cmp = compareStatWord(sptr,wptr,stat,txt);
+           if ( cmp<0 ) {
+               sptr++;
+           } else if ( cmp==0 ) {
+               int n=POSDATALEN(txt,wptr);
+   
+               if (n==0) n=1;
+               sptr->ndoc++;
+               sptr->nentry +=n ;
+               sptr++; wptr++;
+           } else {
+               if ( cur==len )
+                   newentry=SEI_realloc(newentry, &len);
+               newentry[cur]=wptr;
+               wptr++; cur++;
+           }
+       }
+
+       while( wptr-ARRPTR(txt) < txt->size ) {
+           if ( cur==len )
+               newentry=SEI_realloc(newentry, &len);
+           newentry[cur]=wptr;
+           wptr++; cur++;
+       }
+   } else { /* search */
+       while( wptr-ARRPTR(txt) < txt->size ) {
+           StatEntry *StopLow = STATPTR(stat);
+           StatEntry *StopHigh = (StatEntry*)STATSTRPTR(stat);
+           int cmp;
+
+           while (StopLow < StopHigh) {
+               sptr=StopLow + (StopHigh - StopLow) / 2;
+               cmp =  compareStatWord(sptr,wptr,stat,txt);
+               if (cmp==0) {
+                   int n=POSDATALEN(txt,wptr);
+                   if (n==0) n=1;
+                   sptr->ndoc++;
+                   sptr->nentry +=n ;
+                   break;
+               } else if ( cmp < 0 )
+                   StopLow = sptr + 1;
+               else
+                   StopHigh = sptr; 
+           }
+       
+           if ( StopLow >= StopHigh ) { /* not found */
+               if ( cur==len )
+                   newentry=SEI_realloc(newentry, &len);
+               newentry[cur]=wptr;
+               cur++;
+           }
+           wptr++;
+       }   
+   }
+
+   
+   if ( cur==0 ) { /* no new words */ 
+       PG_FREE_IF_COPY(txt,1);
+       PG_RETURN_POINTER(stat);
+   }
+
+   newstat = formstat(stat, txt, newentry, cur);
+   pfree(newentry);
+   PG_FREE_IF_COPY(txt,1);
+   /* pfree(stat); */
+
+   PG_RETURN_POINTER(newstat);
+}
+
+typedef struct {
+   uint32  cur;
+   tsvector *stat;
+} StatStorage;
+
+static void
+ts_setup_firstcall(FuncCallContext  *funcctx, tsstat *stat) {
+   TupleDesc            tupdesc;
+   MemoryContext     oldcontext;
+   StatStorage     *st;
+   
+   oldcontext = MemoryContextSwitchTo(funcctx->multi_call_memory_ctx);
+   st=palloc( sizeof(StatStorage) );
+   st->cur=0;
+   st->stat=palloc( stat->len );
+   memcpy(st->stat, stat, stat->len);
+   funcctx->user_fctx = (void*)st;
+   tupdesc = RelationNameGetTupleDesc("statinfo");
+   funcctx->slot = TupleDescGetSlot(tupdesc);
+   funcctx->attinmeta = TupleDescGetAttInMetadata(tupdesc);
+   MemoryContextSwitchTo(oldcontext);
+}
+
+
+static Datum
+ts_process_call(FuncCallContext  *funcctx) {
+   StatStorage     *st;
+   st=(StatStorage*)funcctx->user_fctx;
+
+   if ( st->cur < st->stat->size ) {
+       Datum result;
+       char* values[3];
+       char    ndoc[16];
+       char    nentry[16];
+       StatEntry *entry=STATPTR(st->stat) + st->cur;
+       HeapTuple    tuple;
+
+       values[1]=ndoc;
+       sprintf(ndoc,"%d",entry->ndoc);
+       values[2]=nentry;
+       sprintf(nentry,"%d",entry->nentry);
+       values[0]=palloc( entry->len+1 );
+       memcpy( values[0], STATSTRPTR(st->stat)+entry->pos, entry->len);
+       (values[0])[entry->len]='\0';
+
+       tuple = BuildTupleFromCStrings(funcctx->attinmeta, values);
+       result = TupleGetDatum(funcctx->slot, tuple);
+
+       pfree(values[0]);
+       st->cur++;
+       return result;  
+   } else {
+       pfree(st->stat);
+       pfree(st);
+   }
+   
+   return (Datum)0;
+}
+
+PG_FUNCTION_INFO_V1(ts_accum_finish);
+Datum           ts_accum_finish(PG_FUNCTION_ARGS);
+Datum 
+ts_accum_finish(PG_FUNCTION_ARGS) {
+   FuncCallContext  *funcctx;
+   Datum result;
+
+   if (SRF_IS_FIRSTCALL()) {
+       funcctx = SRF_FIRSTCALL_INIT();
+       ts_setup_firstcall(funcctx, (tsstat*)PG_GETARG_POINTER(0) );
+   }
+
+   funcctx = SRF_PERCALL_SETUP();
+   if (  (result=ts_process_call(funcctx)) != (Datum)0 )
+       SRF_RETURN_NEXT(funcctx, result);
+   SRF_RETURN_DONE(funcctx);
+}
+
+static Oid tiOid=InvalidOid;
+static void 
+get_ti_Oid(void) {
+   int ret;
+   bool isnull; 
+
+   if ( (ret = SPI_exec("select oid from pg_type where typname='tsvector'",1)) < 0 )   
+       elog(ERROR, "SPI_exec to get tsvector oid returns %d", ret);
+
+   if ( SPI_processed<0 )
+       elog(ERROR, "There is no tsvector type");
+   tiOid = DatumGetObjectId( SPI_getbinval(SPI_tuptable->vals[0], SPI_tuptable->tupdesc, 1, &isnull) );
+   if ( tiOid==InvalidOid )
+       elog(ERROR, "tsvector type has InvalidOid");
+}
+
+static tsstat*
+ts_stat_sql(text *txt) {
+   char *query=text2char(txt);
+   int i;
+   tsstat *newstat,*stat;
+   bool isnull;
+   Portal portal;
+   void    *plan;
+
+   if ( tiOid==InvalidOid ) 
+       get_ti_Oid();
+
+   if ( (plan = SPI_prepare(query,0,NULL))==NULL )
+       elog(ERROR, "SPI_prepare('%s') returns NULL",query);
+
+   if ( (portal = SPI_cursor_open(NULL, plan, NULL, NULL)) == NULL )
+       elog(ERROR, "SPI_cursor_open('%s') returns NULL",query);
+
+   SPI_cursor_fetch(portal, true, 100);
+
+   if ( SPI_tuptable->tupdesc->natts != 1 )
+       elog(ERROR, "Number of fields doesn't equal to 1");
+
+   if ( SPI_gettypeid(SPI_tuptable->tupdesc, 1) != tiOid )
+       elog(ERROR, "Column isn't of tsvector type");
+
+   stat=palloc(STATHDRSIZE);
+   stat->len=STATHDRSIZE;
+   stat->size=0;
+
+   while(SPI_processed>0) {
+       for(i=0;i
+           Datum data=SPI_getbinval(SPI_tuptable->vals[i], SPI_tuptable->tupdesc, 1, &isnull);
+
+           if ( !isnull ) {
+               newstat = (tsstat*)DatumGetPointer(DirectFunctionCall2(
+                   ts_accum,
+                   PointerGetDatum(stat),
+                   data
+               ));
+               if ( stat!=newstat && stat )
+                   pfree(stat);
+               stat=newstat;
+           }
+       } 
+
+       SPI_freetuptable(SPI_tuptable);
+       SPI_cursor_fetch(portal, true, 100);        
+   }   
+
+   SPI_freetuptable(SPI_tuptable);
+   SPI_cursor_close(portal);
+   SPI_freeplan(plan);
+   pfree(query);
+
+   return stat;    
+}
+
+PG_FUNCTION_INFO_V1(ts_stat);
+Datum           ts_stat(PG_FUNCTION_ARGS);
+Datum 
+ts_stat(PG_FUNCTION_ARGS) {
+   FuncCallContext  *funcctx;
+   Datum result;
+
+   if (SRF_IS_FIRSTCALL()) {
+       tsstat *stat;
+       text    *txt=PG_GETARG_TEXT_P(0);
+   
+       funcctx = SRF_FIRSTCALL_INIT();
+       SPI_connect();
+       stat = ts_stat_sql(txt);
+       PG_FREE_IF_COPY(txt,0); 
+       ts_setup_firstcall(funcctx, stat );
+       SPI_finish();
+   }
+
+   funcctx = SRF_PERCALL_SETUP();
+   if (  (result=ts_process_call(funcctx)) != (Datum)0 )
+       SRF_RETURN_NEXT(funcctx, result);
+   SRF_RETURN_DONE(funcctx);
+}
+
+


diff --git a/contrib/tsearch2/ts_stat.h b/contrib/tsearch2/ts_stat.h

new file mode 100644 (file)

index 0000000..c32b17a


--- /dev/null
+++ b/contrib/tsearch2/ts_stat.h
@@ -0,0 +1,32 @@
+#ifndef __TXTIDX_STAT_H__
+#define __TXTIDX_STAT_H__
+
+#include "postgres.h"
+
+#include "access/gist.h"
+#include "access/itup.h"
+#include "utils/elog.h"
+#include "utils/palloc.h"
+#include "utils/builtins.h"
+#include "storage/bufpage.h"
+
+typedef struct {
+   uint32  len;
+   uint32  pos;
+   uint32  ndoc;   
+   uint32  nentry; 
+}  StatEntry;
+
+typedef struct {
+   int4        len;
+   int4        size;
+   char        data[1];
+}  tsstat;
+
+#define STATHDRSIZE (sizeof(int4)*2)
+#define CALCSTATSIZE(x, lenstr) ( x * sizeof(StatEntry) + STATHDRSIZE + lenstr )
+#define STATPTR(x) ( (StatEntry*) ( (char*)x + STATHDRSIZE ) )
+#define STATSTRPTR(x)  ( (char*)x + STATHDRSIZE + ( sizeof(StatEntry) * ((tsvector*)x)->size ) )
+#define STATSTRSIZE(x) ( ((tsvector*)x)->len - STATHDRSIZE - ( sizeof(StatEntry) * ((tsvector*)x)->size ) )
+
+#endif


diff --git a/contrib/tsearch2/tsearch.sql._in b/contrib/tsearch2/tsearch.sql._in

new file mode 100644 (file)

index 0000000..91ffbc8


--- /dev/null
+++ b/contrib/tsearch2/tsearch.sql._in
@@ -0,0 +1,674 @@
+-- Adjust this setting to control where the objects get CREATEd.
+SET search_path = public;
+
+BEGIN;
+
+--dict conf
+CREATE TABLE pg_ts_dict (
+   dict_name   text not null primary key,
+   dict_init   oid,
+   dict_initoption text,
+   dict_lexize oid not null,
+   dict_comment    text
+) with oids;
+
+--dict interface
+CREATE FUNCTION lexize(oid, text) 
+   returns _text
+   as 'MODULE_PATHNAME'
+   language 'C'
+   with (isstrict);
+
+CREATE FUNCTION lexize(text, text)
+        returns _text
+        as 'MODULE_PATHNAME', 'lexize_byname'
+        language 'C'
+        with (isstrict);
+
+CREATE FUNCTION lexize(text)
+        returns _text
+        as 'MODULE_PATHNAME', 'lexize_bycurrent'
+        language 'C'
+        with (isstrict);
+
+CREATE FUNCTION set_curdict(int)
+   returns void
+   as 'MODULE_PATHNAME'
+   language 'C'
+   with (isstrict);
+
+CREATE FUNCTION set_curdict(text)
+   returns void
+   as 'MODULE_PATHNAME', 'set_curdict_byname'
+   language 'C'
+   with (isstrict);
+
+--built-in dictionaries
+CREATE FUNCTION dex_init(text)
+   returns internal
+   as 'MODULE_PATHNAME' 
+   language 'C';
+
+CREATE FUNCTION dex_lexize(internal,internal,int4)
+   returns internal
+   as 'MODULE_PATHNAME'
+   language 'C'
+   with (isstrict);
+
+insert into pg_ts_dict select 
+   'simple', 
+   (select oid from pg_proc where proname='dex_init'),
+   null,
+   (select oid from pg_proc where proname='dex_lexize'),
+   'Simple example of dictionary.'
+;
+    
+CREATE FUNCTION snb_en_init(text)
+   returns internal
+   as 'MODULE_PATHNAME' 
+   language 'C';
+
+CREATE FUNCTION snb_lexize(internal,internal,int4)
+   returns internal
+   as 'MODULE_PATHNAME'
+   language 'C'
+   with (isstrict);
+
+insert into pg_ts_dict select 
+   'en_stem', 
+   (select oid from pg_proc where proname='snb_en_init'),
+   'DATA_PATH/english.stop',
+   (select oid from pg_proc where proname='snb_lexize'),
+   'English Stemmer. Snowball.'
+;
+
+CREATE FUNCTION snb_ru_init(text)
+   returns internal
+   as 'MODULE_PATHNAME' 
+   language 'C';
+
+insert into pg_ts_dict select 
+   'ru_stem', 
+   (select oid from pg_proc where proname='snb_ru_init'),
+   'DATA_PATH/russian.stop',
+   (select oid from pg_proc where proname='snb_lexize'),
+   'Russian Stemmer. Snowball.'
+;
+    
+CREATE FUNCTION spell_init(text)
+   returns internal
+   as 'MODULE_PATHNAME' 
+   language 'C';
+
+CREATE FUNCTION spell_lexize(internal,internal,int4)
+   returns internal
+   as 'MODULE_PATHNAME'
+   language 'C'
+   with (isstrict);
+
+insert into pg_ts_dict select 
+   'ispell_template', 
+   (select oid from pg_proc where proname='spell_init'),
+   null,
+   (select oid from pg_proc where proname='spell_lexize'),
+   'ISpell interface. Must have .dict and .aff files'
+;
+
+CREATE FUNCTION syn_init(text)
+   returns internal
+   as 'MODULE_PATHNAME' 
+   language 'C';
+
+CREATE FUNCTION syn_lexize(internal,internal,int4)
+   returns internal
+   as 'MODULE_PATHNAME'
+   language 'C'
+   with (isstrict);
+
+insert into pg_ts_dict select 
+   'synonym', 
+   (select oid from pg_proc where proname='syn_init'),
+   null,
+   (select oid from pg_proc where proname='syn_lexize'),
+   'Example of synonym dictionary'
+;
+
+--dict conf
+CREATE TABLE pg_ts_parser (
+   prs_name    text not null primary key,
+   prs_start   oid not null,
+   prs_nexttoken   oid not null,
+   prs_end     oid not null,
+   prs_headline    oid not null,
+   prs_lextype oid not null,
+   prs_comment text
+) with oids;
+
+--sql-level interface
+CREATE TYPE tokentype 
+   as (tokid int4, alias text, descr text); 
+
+CREATE FUNCTION token_type(int4)
+   returns setof tokentype
+   as 'MODULE_PATHNAME'
+   language 'C'
+   with (isstrict);
+
+CREATE FUNCTION token_type(text)
+   returns setof tokentype
+   as 'MODULE_PATHNAME', 'token_type_byname'
+   language 'C'
+   with (isstrict);
+
+CREATE FUNCTION token_type()
+   returns setof tokentype
+   as 'MODULE_PATHNAME', 'token_type_current'
+   language 'C'
+   with (isstrict);
+
+CREATE FUNCTION set_curprs(int)
+   returns void
+   as 'MODULE_PATHNAME'
+   language 'C'
+   with (isstrict);
+
+CREATE FUNCTION set_curprs(text)
+   returns void
+   as 'MODULE_PATHNAME', 'set_curprs_byname'
+   language 'C'
+   with (isstrict);
+
+CREATE TYPE tokenout 
+   as (tokid int4, token text);
+
+CREATE FUNCTION parse(oid,text)
+   returns setof tokenout
+   as 'MODULE_PATHNAME'
+   language 'C'
+   with (isstrict);
+ 
+CREATE FUNCTION parse(text,text)
+   returns setof tokenout
+   as 'MODULE_PATHNAME', 'parse_byname'
+   language 'C'
+   with (isstrict);
+ 
+CREATE FUNCTION parse(text)
+   returns setof tokenout
+   as 'MODULE_PATHNAME', 'parse_current'
+   language 'C'
+   with (isstrict);
+ 
+--default parser
+CREATE FUNCTION prsd_start(internal,int4)
+   returns internal
+   as 'MODULE_PATHNAME'
+   language 'C';
+
+CREATE FUNCTION prsd_getlexeme(internal,internal,internal)
+   returns int4
+   as 'MODULE_PATHNAME'
+   language 'C';
+
+CREATE FUNCTION prsd_end(internal)
+   returns void
+   as 'MODULE_PATHNAME'
+   language 'C';
+
+CREATE FUNCTION prsd_lextype(internal)
+   returns internal
+   as 'MODULE_PATHNAME'
+   language 'C';
+
+CREATE FUNCTION prsd_headline(internal,internal,internal)
+   returns internal
+   as 'MODULE_PATHNAME'
+   language 'C';
+
+insert into pg_ts_parser select
+   'default',
+   (select oid from pg_proc where proname='prsd_start'),   
+   (select oid from pg_proc where proname='prsd_getlexeme'),   
+   (select oid from pg_proc where proname='prsd_end'), 
+   (select oid from pg_proc where proname='prsd_headline'),
+   (select oid from pg_proc where proname='prsd_lextype'),
+   'Parser from OpenFTS v0.34'
+;  
+
+--tsearch config
+
+CREATE TABLE pg_ts_cfg (
+   ts_name     text not null primary key,
+   prs_name    text not null,
+   locale      text
+) with oids;
+
+CREATE TABLE pg_ts_cfgmap (
+   ts_name     text not null,
+   tok_alias   text not null,
+   dict_name   text[],
+   primary key (ts_name,tok_alias)
+) with oids;
+
+CREATE FUNCTION set_curcfg(int)
+   returns void
+   as 'MODULE_PATHNAME'
+   language 'C'
+   with (isstrict);
+
+CREATE FUNCTION set_curcfg(text)
+   returns void
+   as 'MODULE_PATHNAME', 'set_curcfg_byname'
+   language 'C'
+   with (isstrict);
+
+CREATE FUNCTION show_curcfg()
+   returns oid
+   as 'MODULE_PATHNAME'
+   language 'C'
+   with (isstrict);
+
+insert into pg_ts_cfg values ('default', 'default','C');
+insert into pg_ts_cfg values ('default_russian', 'default','ru_RU.KOI8-R');
+insert into pg_ts_cfg values ('simple', 'default');
+
+copy pg_ts_cfgmap from stdin;
+default    lword   {en_stem}
+default    nlword  {simple}
+default    word    {simple}
+default    email   {simple}
+default    url {simple}
+default    host    {simple}
+default    sfloat  {simple}
+default    version {simple}
+default    part_hword  {simple}
+default    nlpart_hword    {simple}
+default    lpart_hword {en_stem}
+default    hword   {simple}
+default    lhword  {en_stem}
+default    nlhword {simple}
+default    uri {simple}
+default    file    {simple}
+default    float   {simple}
+default    int {simple}
+default    uint    {simple}
+default_russian    lword   {en_stem}
+default_russian    nlword  {ru_stem}
+default_russian    word    {ru_stem}
+default_russian    email   {simple}
+default_russian    url {simple}
+default_russian    host    {simple}
+default_russian    sfloat  {simple}
+default_russian    version {simple}
+default_russian    part_hword  {simple}
+default_russian    nlpart_hword    {ru_stem}
+default_russian    lpart_hword {en_stem}
+default_russian    hword   {ru_stem}
+default_russian    lhword  {en_stem}
+default_russian    nlhword {ru_stem}
+default_russian    uri {simple}
+default_russian    file    {simple}
+default_russian    float   {simple}
+default_russian    int {simple}
+default_russian    uint    {simple}
+simple lword   {simple}
+simple nlword  {simple}
+simple word    {simple}
+simple email   {simple}
+simple url {simple}
+simple host    {simple}
+simple sfloat  {simple}
+simple version {simple}
+simple part_hword  {simple}
+simple nlpart_hword    {simple}
+simple lpart_hword {simple}
+simple hword   {simple}
+simple lhword  {simple}
+simple nlhword {simple}
+simple uri {simple}
+simple file    {simple}
+simple float   {simple}
+simple int {simple}
+simple uint    {simple}
+\.
+
+--tsvector type
+CREATE FUNCTION tsvector_in(cstring)
+RETURNS tsvector
+AS 'MODULE_PATHNAME'
+LANGUAGE 'C' with (isstrict);
+
+CREATE FUNCTION tsvector_out(tsvector)
+RETURNS cstring
+AS 'MODULE_PATHNAME'
+LANGUAGE 'C' with (isstrict);
+
+CREATE TYPE tsvector (
+        INTERNALLENGTH = -1,
+        INPUT = tsvector_in,
+        OUTPUT = tsvector_out,
+        STORAGE = extended
+);
+
+CREATE FUNCTION length(tsvector)
+RETURNS int4
+AS 'MODULE_PATHNAME', 'tsvector_length'
+LANGUAGE 'C' with (isstrict,iscachable);
+
+CREATE FUNCTION to_tsvector(oid, text)
+RETURNS tsvector
+AS 'MODULE_PATHNAME'
+LANGUAGE 'C' with (isstrict,iscachable);
+
+CREATE FUNCTION to_tsvector(text, text)
+RETURNS tsvector
+AS 'MODULE_PATHNAME', 'to_tsvector_name'
+LANGUAGE 'C' with (isstrict,iscachable);
+
+CREATE FUNCTION to_tsvector(text)
+RETURNS tsvector
+AS 'MODULE_PATHNAME', 'to_tsvector_current'
+LANGUAGE 'C' with (isstrict,iscachable);
+
+CREATE FUNCTION strip(tsvector)
+RETURNS tsvector
+AS 'MODULE_PATHNAME'
+LANGUAGE 'C' with (isstrict,iscachable);
+
+CREATE FUNCTION setweight(tsvector,"char")
+RETURNS tsvector
+AS 'MODULE_PATHNAME'
+LANGUAGE 'C' with (isstrict,iscachable);
+
+CREATE FUNCTION concat(tsvector,tsvector)
+RETURNS tsvector
+AS 'MODULE_PATHNAME'
+LANGUAGE 'C' with (isstrict,iscachable);
+
+CREATE OPERATOR || (
+        LEFTARG = tsvector,
+        RIGHTARG = tsvector,
+        PROCEDURE = concat
+);
+
+--query type
+CREATE FUNCTION tsquery_in(cstring)
+RETURNS tsquery
+AS 'MODULE_PATHNAME'
+LANGUAGE 'C' with (isstrict);
+
+CREATE FUNCTION tsquery_out(tsquery)
+RETURNS cstring
+AS 'MODULE_PATHNAME'
+LANGUAGE 'C' with (isstrict);
+
+CREATE TYPE tsquery (
+        INTERNALLENGTH = -1,
+        INPUT = tsquery_in,
+        OUTPUT = tsquery_out
+);
+
+CREATE FUNCTION querytree(tsquery)
+RETURNS text
+AS 'MODULE_PATHNAME', 'tsquerytree'
+LANGUAGE 'C' with (isstrict);
+
+CREATE FUNCTION to_tsquery(oid, text)
+RETURNS tsquery
+AS 'MODULE_PATHNAME'
+LANGUAGE 'c' with (isstrict,iscachable);
+
+CREATE FUNCTION to_tsquery(text, text)
+RETURNS tsquery
+AS 'MODULE_PATHNAME','to_tsquery_name'
+LANGUAGE 'c' with (isstrict,iscachable);
+
+CREATE FUNCTION to_tsquery(text)
+RETURNS tsquery
+AS 'MODULE_PATHNAME','to_tsquery_current'
+LANGUAGE 'c' with (isstrict,iscachable);
+
+--operations
+CREATE FUNCTION exectsq(tsvector, tsquery)
+RETURNS bool
+AS 'MODULE_PATHNAME'
+LANGUAGE 'C' with (isstrict, iscachable);
+  
+COMMENT ON FUNCTION exectsq(tsvector, tsquery) IS 'boolean operation with text index';
+
+CREATE FUNCTION rexectsq(tsquery, tsvector)
+RETURNS bool
+AS 'MODULE_PATHNAME'
+LANGUAGE 'C' with (isstrict, iscachable);
+
+COMMENT ON FUNCTION rexectsq(tsquery, tsvector) IS 'boolean operation with text index';
+
+CREATE OPERATOR @@ (
+        LEFTARG = tsvector,
+        RIGHTARG = tsquery,
+        PROCEDURE = exectsq,
+        COMMUTATOR = '@@',
+        RESTRICT = contsel,
+        JOIN = contjoinsel
+);
+CREATE OPERATOR @@ (
+        LEFTARG = tsquery,
+        RIGHTARG = tsvector,
+        PROCEDURE = rexectsq,
+        COMMUTATOR = '@@',
+        RESTRICT = contsel,
+        JOIN = contjoinsel
+);
+
+--Trigger
+CREATE FUNCTION tsearch2()
+RETURNS trigger
+AS 'MODULE_PATHNAME'
+LANGUAGE 'C';
+
+--Relevation
+CREATE FUNCTION rank(float4[], tsvector, tsquery)
+RETURNS float4
+AS 'MODULE_PATHNAME'
+LANGUAGE 'C' WITH (isstrict, iscachable);
+
+CREATE FUNCTION rank(float4[], tsvector, tsquery, int4)
+RETURNS float4
+AS 'MODULE_PATHNAME'
+LANGUAGE 'C' WITH (isstrict, iscachable);
+
+CREATE FUNCTION rank(tsvector, tsquery)
+RETURNS float4
+AS 'MODULE_PATHNAME', 'rank_def'
+LANGUAGE 'C' WITH (isstrict, iscachable);
+
+CREATE FUNCTION rank(tsvector, tsquery, int4)
+RETURNS float4
+AS 'MODULE_PATHNAME', 'rank_def'
+LANGUAGE 'C' WITH (isstrict, iscachable);
+
+CREATE FUNCTION rank_cd(int4, tsvector, tsquery)
+RETURNS float4
+AS 'MODULE_PATHNAME'
+LANGUAGE 'C' WITH (isstrict, iscachable);
+
+CREATE FUNCTION rank_cd(int4, tsvector, tsquery, int4)
+RETURNS float4
+AS 'MODULE_PATHNAME'
+LANGUAGE 'C' WITH (isstrict, iscachable);
+
+CREATE FUNCTION rank_cd(tsvector, tsquery)
+RETURNS float4
+AS 'MODULE_PATHNAME', 'rank_cd_def'
+LANGUAGE 'C' WITH (isstrict, iscachable);
+
+CREATE FUNCTION rank_cd(tsvector, tsquery, int4)
+RETURNS float4
+AS 'MODULE_PATHNAME', 'rank_cd_def'
+LANGUAGE 'C' WITH (isstrict, iscachable);
+
+CREATE FUNCTION headline(oid, text, tsquery, text)
+RETURNS text
+AS 'MODULE_PATHNAME', 'headline'
+LANGUAGE 'C' WITH (isstrict, iscachable);
+
+CREATE FUNCTION headline(oid, text, tsquery)
+RETURNS text
+AS 'MODULE_PATHNAME', 'headline'
+LANGUAGE 'C' WITH (isstrict, iscachable);
+
+CREATE FUNCTION headline(text, text, tsquery, text)
+RETURNS text
+AS 'MODULE_PATHNAME', 'headline_byname'
+LANGUAGE 'C' WITH (isstrict, iscachable);
+
+CREATE FUNCTION headline(text, text, tsquery)
+RETURNS text
+AS 'MODULE_PATHNAME', 'headline_byname'
+LANGUAGE 'C' WITH (isstrict, iscachable);
+
+CREATE FUNCTION headline(text, tsquery, text)
+RETURNS text
+AS 'MODULE_PATHNAME', 'headline_current'
+LANGUAGE 'C' WITH (isstrict, iscachable);
+
+CREATE FUNCTION headline(text, tsquery)
+RETURNS text
+AS 'MODULE_PATHNAME', 'headline_current'
+LANGUAGE 'C' WITH (isstrict, iscachable);
+
+--GiST
+--GiST key type 
+CREATE FUNCTION gtsvector_in(cstring)
+RETURNS gtsvector
+AS 'MODULE_PATHNAME'
+LANGUAGE 'C' with (isstrict);
+
+CREATE FUNCTION gtsvector_out(gtsvector)
+RETURNS cstring
+AS 'MODULE_PATHNAME'
+LANGUAGE 'C' with (isstrict);
+
+CREATE TYPE gtsvector (
+        INTERNALLENGTH = -1,
+        INPUT = gtsvector_in,
+        OUTPUT = gtsvector_out
+);
+
+-- support FUNCTIONs
+CREATE FUNCTION gtsvector_consistent(gtsvector,internal,int4)
+RETURNS bool
+AS 'MODULE_PATHNAME'
+LANGUAGE 'C';
+  
+CREATE FUNCTION gtsvector_compress(internal)
+RETURNS internal
+AS 'MODULE_PATHNAME'
+LANGUAGE 'C';
+
+CREATE FUNCTION gtsvector_decompress(internal)
+RETURNS internal
+AS 'MODULE_PATHNAME'
+LANGUAGE 'C';
+
+CREATE FUNCTION gtsvector_penalty(internal,internal,internal)
+RETURNS internal
+AS 'MODULE_PATHNAME'
+LANGUAGE 'C' with (isstrict);
+
+CREATE FUNCTION gtsvector_picksplit(internal, internal)
+RETURNS internal
+AS 'MODULE_PATHNAME'
+LANGUAGE 'C';
+
+CREATE FUNCTION gtsvector_union(bytea, internal)
+RETURNS _int4
+AS 'MODULE_PATHNAME'
+LANGUAGE 'C';
+
+CREATE FUNCTION gtsvector_same(gtsvector, gtsvector, internal)
+RETURNS internal
+AS 'MODULE_PATHNAME'
+LANGUAGE 'C';
+
+-- CREATE the OPERATOR class
+CREATE OPERATOR CLASS gist_tsvector_ops
+DEFAULT FOR TYPE tsvector USING gist
+AS
+        OPERATOR        1       @@ (tsvector, tsquery)  RECHECK ,
+        FUNCTION        1       gtsvector_consistent (gtsvector, internal, int4),
+        FUNCTION        2       gtsvector_union (bytea, internal),
+        FUNCTION        3       gtsvector_compress (internal),
+        FUNCTION        4       gtsvector_decompress (internal),
+        FUNCTION        5       gtsvector_penalty (internal, internal, internal),
+        FUNCTION        6       gtsvector_picksplit (internal, internal),
+        FUNCTION        7       gtsvector_same (gtsvector, gtsvector, internal),
+        STORAGE         gtsvector;
+
+
+--stat info
+CREATE TYPE statinfo 
+   as (word text, ndoc int4, nentry int4);
+
+--REATE FUNCTION tsstat_in(cstring)
+--RETURNS tsstat
+--AS 'MODULE_PATHNAME'
+--LANGUAGE 'C' with (isstrict);
+--
+--CREATE FUNCTION tsstat_out(tsstat)
+--RETURNS cstring
+--AS 'MODULE_PATHNAME'
+--LANGUAGE 'C' with (isstrict);
+--
+--CREATE TYPE tsstat (
+--        INTERNALLENGTH = -1,
+--        INPUT = tsstat_in,
+--        OUTPUT = tsstat_out,
+--        STORAGE = plain
+--);
+--
+--CREATE FUNCTION ts_accum(tsstat,tsvector)
+--RETURNS tsstat
+--AS 'MODULE_PATHNAME'
+--LANGUAGE 'C' with (isstrict);
+--
+--CREATE FUNCTION ts_accum_finish(tsstat)
+-- returns setof statinfo
+-- as 'MODULE_PATHNAME'
+-- language 'C'
+-- with (isstrict);
+--
+--CREATE AGGREGATE stat (
+-- BASETYPE=tsvector,
+-- SFUNC=ts_accum,
+-- STYPE=tsstat,
+-- FINALFUNC = ts_accum_finish,
+-- initcond = ''
+--); 
+
+CREATE FUNCTION stat(text)
+   returns setof statinfo
+   as 'MODULE_PATHNAME', 'ts_stat'
+   language 'C'
+   with (isstrict);
+
+--reset - just for debuging
+CREATE FUNCTION reset_tsearch()
+        returns void
+        as 'MODULE_PATHNAME'
+        language 'C'
+        with (isstrict);
+
+--get cover (debug for rank_cd)
+CREATE FUNCTION get_covers(tsvector,tsquery)
+        returns text
+        as 'MODULE_PATHNAME'
+        language 'C'
+        with (isstrict);
+
+
+--example of ISpell dictionary
+--update pg_ts_dict set dict_initoption='DictFile="/usr/local/share/ispell/russian.dict" ,AffFile ="/usr/local/share/ispell/russian.aff", StopFile="/usr/local/share/ispell/russian.stop"' where dict_id=4;
+--example of synonym dict
+--update pg_ts_dict set dict_initoption='/usr/local/share/ispell/english.syn' where dict_id=5;
+END;


diff --git a/contrib/tsearch2/tsvector.c b/contrib/tsearch2/tsvector.c

new file mode 100644 (file)

index 0000000..ff0794d


--- /dev/null
+++ b/contrib/tsearch2/tsvector.c
@@ -0,0 +1,804 @@
+/*
+ * In/Out definitions for tsvector type
+ * Internal structure:
+ * string of values, array of position lexem in string and it's length
+ * Teodor Sigaev 
+ */
+#include "postgres.h"
+
+#include "access/gist.h"
+#include "access/itup.h"
+#include "utils/elog.h"
+#include "utils/palloc.h"
+#include "utils/builtins.h"
+#include "storage/bufpage.h"
+#include "executor/spi.h"
+#include "commands/trigger.h"
+#include "nodes/pg_list.h"
+#include "catalog/namespace.h"
+
+#include "utils/pg_locale.h"
+
+#include              /* tolower */
+#include "tsvector.h"
+#include "query.h"
+#include "ts_cfg.h"
+#include "common.h"
+
+PG_FUNCTION_INFO_V1(tsvector_in);
+Datum      tsvector_in(PG_FUNCTION_ARGS);
+
+PG_FUNCTION_INFO_V1(tsvector_out);
+Datum      tsvector_out(PG_FUNCTION_ARGS);
+
+PG_FUNCTION_INFO_V1(to_tsvector);
+Datum      to_tsvector(PG_FUNCTION_ARGS);
+PG_FUNCTION_INFO_V1(to_tsvector_current);
+Datum      to_tsvector_current(PG_FUNCTION_ARGS);
+PG_FUNCTION_INFO_V1(to_tsvector_name);
+Datum      to_tsvector_name(PG_FUNCTION_ARGS);
+
+PG_FUNCTION_INFO_V1(tsearch2);
+Datum      tsearch2(PG_FUNCTION_ARGS);
+
+PG_FUNCTION_INFO_V1(tsvector_length);
+Datum      tsvector_length(PG_FUNCTION_ARGS);
+
+/*
+ * in/out text index type
+ */
+static int 
+comparePos(const void *a, const void *b) {
+   if ( ((WordEntryPos *) a)->pos == ((WordEntryPos *) b)->pos )
+       return 1;
+   return ( ((WordEntryPos *) a)->pos > ((WordEntryPos *) b)->pos ) ? 1 : -1;
+}
+
+static int
+uniquePos(WordEntryPos *a, int4 l) {
+   WordEntryPos *ptr, *res;
+
+   res=a;
+   if (l==1)
+       return l;
+
+   qsort((void *) a, l, sizeof(WordEntryPos), comparePos);
+
+   ptr = a + 1;
+   while (ptr - a < l) {
+       if ( ptr->pos != res->pos ) {
+           res++;
+           res->pos = ptr->pos;
+           res->weight = ptr->weight;
+           if ( res-a >= MAXNUMPOS-1 || res->pos == MAXENTRYPOS-1 )
+               break;
+       } else if ( ptr->weight > res->weight )
+           res->weight = ptr->weight;
+       ptr++;
+   }
+   return res + 1 - a;
+}
+
+static char *BufferStr;
+static int
+compareentry(const void *a, const void *b)
+{
+   if ( ((WordEntryIN *) a)->entry.len == ((WordEntryIN *) b)->entry.len)
+   {
+       return strncmp(
+                      &BufferStr[((WordEntryIN *) a)->entry.pos],
+                      &BufferStr[((WordEntryIN *) b)->entry.pos],
+                      ((WordEntryIN *) a)->entry.len);
+   }
+   return ( ((WordEntryIN *) a)->entry.len > ((WordEntryIN *) b)->entry.len ) ? 1 : -1;
+}
+
+static int
+uniqueentry(WordEntryIN * a, int4 l, char *buf, int4 *outbuflen)
+{
+   WordEntryIN  *ptr,
+              *res;
+
+   res = a;
+   if (l == 1) {
+       if ( a->entry.haspos ) {
+           *(uint16*)(a->pos) = uniquePos( &(a->pos[1]), *(uint16*)(a->pos));
+           *outbuflen = SHORTALIGN(res->entry.len) + (*(uint16*)(a->pos) +1 )*sizeof(WordEntryPos);
+       }
+       return l;
+   }
+
+   ptr = a + 1;
+   BufferStr = buf;
+   qsort((void *) a, l, sizeof(WordEntryIN), compareentry);
+
+   while (ptr - a < l)
+   {
+       if (!(ptr->entry.len == res->entry.len &&
+             strncmp(&buf[ptr->entry.pos], &buf[res->entry.pos], res->entry.len) == 0))
+       {
+           if ( res->entry.haspos ) {
+               *(uint16*)(res->pos) = uniquePos( &(res->pos[1]), *(uint16*)(res->pos));
+               *outbuflen += *(uint16*)(res->pos) * sizeof(WordEntryPos);
+           }
+           *outbuflen += SHORTALIGN(res->entry.len);
+           res++;
+           memcpy(res,ptr,sizeof(WordEntryIN));
+       } else if ( ptr->entry.haspos ){
+           if ( res->entry.haspos ) {
+               int4 len=*(uint16*)(ptr->pos) + 1 + *(uint16*)(res->pos);
+               res->pos=(WordEntryPos*)repalloc( res->pos, len*sizeof(WordEntryPos));
+               memcpy( &(res->pos[ *(uint16*)(res->pos) + 1 ]), 
+                   &(ptr->pos[1]), *(uint16*)(ptr->pos) * sizeof(WordEntryPos));
+               *(uint16*)(res->pos) += *(uint16*)(ptr->pos);
+               pfree( ptr->pos );
+           } else {
+               res->entry.haspos=1;
+               res->pos = ptr->pos;
+           }
+       }
+       ptr++;
+   }
+   if ( res->entry.haspos ) {
+       *(uint16*)(res->pos) = uniquePos( &(res->pos[1]), *(uint16*)(res->pos));
+       *outbuflen += *(uint16*)(res->pos) * sizeof(WordEntryPos);
+   }
+   *outbuflen += SHORTALIGN(res->entry.len);
+
+   return res + 1 - a;
+}
+
+#define WAITWORD   1
+#define WAITENDWORD 2
+#define WAITNEXTCHAR   3
+#define WAITENDCMPLX   4
+#define WAITPOSINFO    5
+#define INPOSINFO  6
+#define WAITPOSDELIM   7
+
+#define RESIZEPRSBUF \
+do { \
+   if ( state->curpos - state->word + 1 >= state->len ) \
+   { \
+       int4 clen = state->curpos - state->word; \
+       state->len *= 2; \
+       state->word = (char*)repalloc( (void*)state->word, state->len ); \
+       state->curpos = state->word + clen; \
+   } \
+} while (0)
+
+int4
+gettoken_tsvector(TI_IN_STATE * state)
+{
+   int4        oldstate = 0;
+
+   state->curpos = state->word;
+   state->state = WAITWORD;
+   state->alen=0;
+
+   while (1)
+   {
+       if (state->state == WAITWORD)
+       {
+           if (*(state->prsbuf) == '\0')
+               return 0;
+           else if (*(state->prsbuf) == '\'')
+               state->state = WAITENDCMPLX;
+           else if (*(state->prsbuf) == '\\')
+           {
+               state->state = WAITNEXTCHAR;
+               oldstate = WAITENDWORD;
+           }
+           else if (state->oprisdelim && ISOPERATOR(*(state->prsbuf)))
+               elog(ERROR, "Syntax error");
+           else if (*(state->prsbuf) != ' ')
+           {
+               *(state->curpos) = *(state->prsbuf);
+               state->curpos++;
+               state->state = WAITENDWORD;
+           }
+       }
+       else if (state->state == WAITNEXTCHAR)
+       {
+           if (*(state->prsbuf) == '\0')
+               elog(ERROR, "There is no escaped character");
+           else
+           {
+               RESIZEPRSBUF;
+               *(state->curpos) = *(state->prsbuf);
+               state->curpos++;
+               state->state = oldstate;
+           }
+       }
+       else if (state->state == WAITENDWORD)
+       {
+           if (*(state->prsbuf) == '\\')
+           {
+               state->state = WAITNEXTCHAR;
+               oldstate = WAITENDWORD;
+           }
+           else if (*(state->prsbuf) == ' ' || *(state->prsbuf) == '\0' ||
+                    (state->oprisdelim && ISOPERATOR(*(state->prsbuf))))
+           {
+               RESIZEPRSBUF;
+               if (state->curpos == state->word)
+                   elog(ERROR, "Syntax error");
+               *(state->curpos) = '\0';
+               return 1; 
+           } else if ( *(state->prsbuf) == ':' ) {
+               if (state->curpos == state->word)
+                   elog(ERROR, "Syntax error");
+               *(state->curpos) = '\0';
+               if ( state->oprisdelim )
+                   return 1;
+               else
+                   state->state = INPOSINFO;
+           }
+           else
+           {
+               RESIZEPRSBUF;
+               *(state->curpos) = *(state->prsbuf);
+               state->curpos++;
+           }
+       }
+       else if (state->state == WAITENDCMPLX)
+       {
+           if (*(state->prsbuf) == '\'')
+           {
+               RESIZEPRSBUF;
+               *(state->curpos) = '\0';
+               if (state->curpos == state->word)
+                   elog(ERROR, "Syntax error");
+               if ( state->oprisdelim ) {
+                   state->prsbuf++;
+                   return 1;
+               } else
+                   state->state = WAITPOSINFO;
+           }
+           else if (*(state->prsbuf) == '\\')
+           {
+               state->state = WAITNEXTCHAR;
+               oldstate = WAITENDCMPLX;
+           }
+           else if (*(state->prsbuf) == '\0')
+               elog(ERROR, "Syntax error");
+           else
+           {
+               RESIZEPRSBUF;
+               *(state->curpos) = *(state->prsbuf);
+               state->curpos++;
+           }
+       } else if (state->state == WAITPOSINFO) {
+           if ( *(state->prsbuf) == ':' )
+               state->state=INPOSINFO;
+           else
+               return 1;
+       } else if (state->state == INPOSINFO) {
+           if ( isdigit(*(state->prsbuf)) ) {
+               if ( state->alen==0 ) {
+                   state->alen=4;
+                   state->pos = (WordEntryPos*)palloc( sizeof(WordEntryPos)*state->alen );
+                   *(uint16*)(state->pos)=0;
+               } else if ( *(uint16*)(state->pos) +1 >= state->alen ) {
+                   state->alen *= 2; 
+                   state->pos = (WordEntryPos*)repalloc( state->pos, sizeof(WordEntryPos)*state->alen );
+               }
+               (  *(uint16*)(state->pos) )++;
+               state->pos[ *(uint16*)(state->pos) ].pos = LIMITPOS(atoi(state->prsbuf));
+               if ( state->pos[ *(uint16*)(state->pos) ].pos == 0 )
+                   elog(ERROR,"Wrong position info");
+               state->pos[ *(uint16*)(state->pos) ].weight = 0;
+               state->state = WAITPOSDELIM;
+           } else
+               elog(ERROR,"Syntax error");
+       } else if (state->state == WAITPOSDELIM) {
+           if ( *(state->prsbuf) == ',' ) {
+               state->state = INPOSINFO;
+           } else if ( tolower(*(state->prsbuf)) == 'a' || *(state->prsbuf)=='*' ) {
+               if ( state->pos[ *(uint16*)(state->pos) ].weight )
+                   elog(ERROR,"Syntax error");
+               state->pos[ *(uint16*)(state->pos) ].weight = 3;
+           } else if ( tolower(*(state->prsbuf)) == 'b' ) {
+               if ( state->pos[ *(uint16*)(state->pos) ].weight )
+                   elog(ERROR,"Syntax error");
+               state->pos[ *(uint16*)(state->pos) ].weight = 2;
+           } else if ( tolower(*(state->prsbuf)) == 'c' ) {
+               if ( state->pos[ *(uint16*)(state->pos) ].weight )
+                   elog(ERROR,"Syntax error");
+               state->pos[ *(uint16*)(state->pos) ].weight = 1;
+           } else if ( tolower(*(state->prsbuf)) == 'd' ) {
+               if ( state->pos[ *(uint16*)(state->pos) ].weight )
+                   elog(ERROR,"Syntax error");
+               state->pos[ *(uint16*)(state->pos) ].weight = 0;
+           } else if ( isspace(*(state->prsbuf)) || *(state->prsbuf) == '\0' ) {
+               return 1;
+           } else if ( !isdigit(*(state->prsbuf)) )
+               elog(ERROR,"Syntax error");
+       } else
+           elog(ERROR, "Inner bug :(");
+       state->prsbuf++;
+   }
+
+   return 0;
+}
+
+Datum
+tsvector_in(PG_FUNCTION_ARGS)
+{
+   char       *buf = PG_GETARG_CSTRING(0);
+   TI_IN_STATE state;
+   WordEntryIN  *arr;
+   WordEntry  *inarr;
+   int4        len = 0,
+               totallen = 64;
+   tsvector       *in;
+   char       *tmpbuf,
+              *cur;
+   int4        i,
+               buflen = 256;
+
+   state.prsbuf = buf;
+   state.len = 32;
+   state.word = (char *) palloc(state.len);
+   state.oprisdelim = false;
+
+   arr = (WordEntryIN *) palloc(sizeof(WordEntryIN) * totallen);
+   cur = tmpbuf = (char *) palloc(buflen);
+   while (gettoken_tsvector(&state))
+   {
+       if (len >= totallen)
+       {
+           totallen *= 2;
+           arr = (WordEntryIN *) repalloc((void *) arr, sizeof(WordEntryIN) * totallen);
+       }
+       while ((cur - tmpbuf) + (state.curpos - state.word) >= buflen)
+       {
+           int4        dist = cur - tmpbuf;
+
+           buflen *= 2;
+           tmpbuf = (char *) repalloc((void *) tmpbuf, buflen);
+           cur = tmpbuf + dist;
+       }
+       if (state.curpos - state.word >= MAXSTRLEN)
+           elog(ERROR, "Word is too long");
+       arr[len].entry.len= state.curpos - state.word;
+       if (cur - tmpbuf > MAXSTRPOS)
+           elog(ERROR, "Too long value");
+       arr[len].entry.pos=cur - tmpbuf;
+       memcpy((void *) cur, (void *) state.word, arr[len].entry.len);
+       cur += arr[len].entry.len;
+       if ( state.alen ) {
+           arr[len].entry.haspos=1;
+           arr[len].pos = state.pos;
+       } else
+           arr[len].entry.haspos=0;
+       len++;
+   }
+   pfree(state.word);
+
+   if ( len > 0 )
+       len = uniqueentry(arr, len, tmpbuf, &buflen);
+   totallen = CALCDATASIZE(len, buflen);
+   in = (tsvector *) palloc(totallen);
+   memset(in,0,totallen);
+   in->len = totallen;
+   in->size = len;
+   cur = STRPTR(in);
+   inarr = ARRPTR(in);
+   for (i = 0; i < len; i++)
+   {
+       memcpy((void *) cur, (void *) &tmpbuf[arr[i].entry.pos], arr[i].entry.len);
+       arr[i].entry.pos=cur - STRPTR(in);
+       cur += SHORTALIGN(arr[i].entry.len);
+       if ( arr[i].entry.haspos ) {
+           memcpy( cur, arr[i].pos, (*(uint16*)arr[i].pos + 1) * sizeof(WordEntryPos));
+           cur +=  (*(uint16*)arr[i].pos + 1) * sizeof(WordEntryPos);
+           pfree( arr[i].pos ); 
+       }
+       memcpy( &(inarr[i]), &(arr[i].entry), sizeof(WordEntry) );
+   }
+   pfree(tmpbuf);
+   pfree(arr);
+   PG_RETURN_POINTER(in);
+}
+
+Datum
+tsvector_length(PG_FUNCTION_ARGS)
+{
+   tsvector       *in = (tsvector *) PG_DETOAST_DATUM(PG_GETARG_DATUM(0));
+   int4        ret = in->size;
+
+   PG_FREE_IF_COPY(in, 0);
+   PG_RETURN_INT32(ret);
+}
+
+Datum
+tsvector_out(PG_FUNCTION_ARGS)
+{
+   tsvector       *out = (tsvector *) PG_DETOAST_DATUM(PG_GETARG_DATUM(0));
+   char       *outbuf;
+   int4        i,
+               j,
+               lenbuf = 0, pp;
+   WordEntry  *ptr = ARRPTR(out);
+   char       *curin,
+              *curout;
+
+       lenbuf=out->size * 2 /* '' */ + out->size - 1 /* space */ + 2 /*\0*/;
+       for (i = 0; i < out->size; i++) {
+               lenbuf += ptr[i].len*2 /*for escape */;
+               if ( ptr[i].haspos )
+                       lenbuf += 7*POSDATALEN(out, &(ptr[i]));
+       }
+
+   curout = outbuf = (char *) palloc(lenbuf);
+   for (i = 0; i < out->size; i++)
+   {
+       curin = STRPTR(out)+ptr->pos;
+       if (i != 0)
+           *curout++ = ' ';
+       *curout++ = '\'';
+       j = ptr->len;
+       while (j--)
+       {
+           if (*curin == '\'')
+           {
+               int4        pos = curout - outbuf;
+
+               outbuf = (char *) repalloc((void *) outbuf, ++lenbuf);
+               curout = outbuf + pos;
+               *curout++ = '\\';
+           }
+           *curout++ = *curin++;
+       }
+       *curout++ = '\'';
+       if ( (pp=POSDATALEN(out,ptr)) != 0 ) {
+           WordEntryPos *wptr;
+           *curout++ = ':';
+           wptr=POSDATAPTR(out,ptr);
+           while(pp) {
+               sprintf(curout,"%d",wptr->pos);
+               curout=strchr(curout,'\0');
+               switch( wptr->weight ) {
+                   case 3:   *curout++ = 'A'; break;
+                   case 2:   *curout++ = 'B'; break;
+                   case 1:   *curout++ = 'C'; break;
+                   case 0: 
+                   default: break;
+               }
+               if ( pp>1 )     *curout++ = ',';
+               pp--; wptr++;
+           }
+       }
+       ptr++;
+   }
+   *curout='\0';
+   outbuf[lenbuf - 1] = '\0';
+   PG_FREE_IF_COPY(out, 0);
+   PG_RETURN_POINTER(outbuf);
+}
+
+static int
+compareWORD(const void *a, const void *b)
+{
+   if (((WORD *) a)->len == ((WORD *) b)->len) {
+       int res = strncmp(
+                      ((WORD *) a)->word,
+                      ((WORD *) b)->word,
+                      ((WORD *) b)->len);
+       if ( res==0 ) 
+           return ( ((WORD *) a)->pos.pos > ((WORD *) b)->pos.pos ) ? 1 : -1;
+       return res;
+   }
+   return (((WORD *) a)->len > ((WORD *) b)->len) ? 1 : -1;
+}
+
+static int
+uniqueWORD(WORD * a, int4 l)
+{
+   WORD       *ptr,
+              *res;
+   int tmppos;
+
+   if (l == 1) {
+       tmppos=LIMITPOS(a->pos.pos);
+       a->alen=2;
+       a->pos.apos=(uint16*)palloc( sizeof(uint16)*a->alen );
+       a->pos.apos[0]=1;
+       a->pos.apos[1]=tmppos;
+       return l;
+   }
+
+   res = a;
+   ptr = a + 1;
+
+   qsort((void *) a, l, sizeof(WORD), compareWORD);
+   tmppos=LIMITPOS(a->pos.pos);
+   a->alen=2;
+   a->pos.apos=(uint16*)palloc( sizeof(uint16)*a->alen );
+   a->pos.apos[0]=1;
+   a->pos.apos[1]=tmppos;
+
+   while (ptr - a < l)
+   {
+       if (!(ptr->len == res->len &&
+             strncmp(ptr->word, res->word, res->len) == 0))
+       {
+           res++;
+           res->len = ptr->len;
+           res->word = ptr->word;
+           tmppos=LIMITPOS(ptr->pos.pos);
+           res->alen=2;
+           res->pos.apos=(uint16*)palloc( sizeof(uint16)*res->alen );
+           res->pos.apos[0]=1;
+           res->pos.apos[1]=tmppos;
+       } else {
+           pfree(ptr->word);
+           if ( res->pos.apos[0] < MAXNUMPOS-1 && res->pos.apos[ res->pos.apos[0] ] != MAXENTRYPOS-1 ) {
+               if ( res->pos.apos[0]+1 >= res->alen ) {
+                   res->alen*=2;
+                   res->pos.apos=(uint16*)repalloc( res->pos.apos, sizeof(uint16)*res->alen );
+               }
+               res->pos.apos[ res->pos.apos[0]+1 ] = LIMITPOS(ptr->pos.pos);
+               res->pos.apos[0]++; 
+           }
+       }
+       ptr++;
+   }
+
+   return res + 1 - a;
+}
+
+/*
+ * make value of tsvector
+ */
+static tsvector *
+makevalue(PRSTEXT * prs)
+{
+   int4        i,j,
+               lenstr = 0,
+               totallen;
+   tsvector       *in;
+   WordEntry  *ptr;
+   char       *str,
+              *cur;
+
+   prs->curwords = uniqueWORD(prs->words, prs->curwords);
+   for (i = 0; i < prs->curwords; i++) {
+       lenstr += SHORTALIGN(prs->words[i].len);
+
+       if ( prs->words[i].alen )
+           lenstr += sizeof(uint16) + prs->words[i].pos.apos[0] * sizeof(WordEntryPos);
+   }
+
+   totallen = CALCDATASIZE(prs->curwords, lenstr);
+   in = (tsvector *) palloc(totallen);
+   memset(in,0,totallen);  
+   in->len = totallen;
+   in->size = prs->curwords;
+
+   ptr = ARRPTR(in);
+   cur = str = STRPTR(in);
+   for (i = 0; i < prs->curwords; i++)
+   {
+       ptr->len = prs->words[i].len;
+       if (cur - str > MAXSTRPOS)
+           elog(ERROR, "Value is too big");
+       ptr->pos= cur - str;
+       memcpy((void *) cur, (void *) prs->words[i].word, prs->words[i].len);
+       pfree(prs->words[i].word);
+       cur += SHORTALIGN(prs->words[i].len);
+       if ( prs->words[i].alen ) {
+           WordEntryPos *wptr;
+           
+           ptr->haspos=1;
+           *(uint16*)cur = prs->words[i].pos.apos[0];
+           wptr=POSDATAPTR(in,ptr);
+           for(j=0;j<*(uint16*)cur;j++) {
+               wptr[j].weight=0;
+               wptr[j].pos=prs->words[i].pos.apos[j+1];
+           }
+           cur += sizeof(uint16) + prs->words[i].pos.apos[0] * sizeof(WordEntryPos);
+           pfree(prs->words[i].pos.apos);
+       } else
+           ptr->haspos=0;
+       ptr++;
+   }
+   pfree(prs->words);
+   return in;
+}
+
+
+Datum
+to_tsvector(PG_FUNCTION_ARGS)
+{
+   text       *in = PG_GETARG_TEXT_P(1);
+   PRSTEXT     prs;
+   tsvector       *out = NULL;
+   TSCfgInfo *cfg=findcfg(PG_GETARG_INT32(0)); 
+
+   prs.lenwords = 32;
+   prs.curwords = 0;
+   prs.pos = 0;
+   prs.words = (WORD *) palloc(sizeof(WORD) * prs.lenwords);
+   
+   parsetext_v2(cfg, &prs, VARDATA(in), VARSIZE(in) - VARHDRSZ);
+   PG_FREE_IF_COPY(in, 1);
+
+   if (prs.curwords)
+       out = makevalue(&prs);
+   else {
+       pfree(prs.words);
+       out = palloc(CALCDATASIZE(0,0));
+       out->len = CALCDATASIZE(0,0);
+       out->size = 0;
+   } 
+   PG_RETURN_POINTER(out);
+}
+
+Datum
+to_tsvector_name(PG_FUNCTION_ARGS) {
+   text       *cfg=PG_GETARG_TEXT_P(0);
+   Datum res = DirectFunctionCall3(
+       to_tsvector,
+       Int32GetDatum( name2id_cfg( cfg ) ),
+       PG_GETARG_DATUM(1),
+       (Datum)0
+   );
+   PG_FREE_IF_COPY(cfg,0);
+   PG_RETURN_DATUM(res);   
+}
+
+Datum
+to_tsvector_current(PG_FUNCTION_ARGS) {
+   Datum res = DirectFunctionCall3(
+       to_tsvector,
+       Int32GetDatum( get_currcfg() ),
+       PG_GETARG_DATUM(0),
+       (Datum)0
+   );
+   PG_RETURN_DATUM(res);   
+}
+
+static Oid
+findFunc(char *fname) {
+   FuncCandidateList clist,ptr;
+   Oid funcid = InvalidOid;
+   List *names=makeList1(makeString(fname));
+
+   ptr = clist = FuncnameGetCandidates(names, 1);
+   freeList(names);
+
+   if ( !ptr )
+       return funcid;
+
+   while(ptr) {
+       if ( ptr->args[0] == TEXTOID && funcid == InvalidOid )
+           funcid=ptr->oid;
+       clist=ptr->next;
+       pfree(ptr);
+       ptr=clist;
+   }
+
+   return funcid;
+}
+
+/*
+ * Trigger
+ */
+Datum
+tsearch2(PG_FUNCTION_ARGS)
+{
+   TriggerData *trigdata;
+   Trigger    *trigger;
+   Relation    rel;
+   HeapTuple   rettuple = NULL;
+   TSCfgInfo *cfg=findcfg(get_currcfg()); 
+   int         numidxattr,
+               i;
+   PRSTEXT     prs;
+   Datum       datum = (Datum) 0;
+   Oid     funcoid = InvalidOid;
+
+   if (!CALLED_AS_TRIGGER(fcinfo))
+       elog(ERROR, "TSearch: Not fired by trigger manager");
+
+   trigdata = (TriggerData *) fcinfo->context;
+   if (TRIGGER_FIRED_FOR_STATEMENT(trigdata->tg_event))
+       elog(ERROR, "TSearch: Can't process STATEMENT events");
+   if (TRIGGER_FIRED_AFTER(trigdata->tg_event))
+       elog(ERROR, "TSearch: Must be fired BEFORE event");
+
+   if (TRIGGER_FIRED_BY_INSERT(trigdata->tg_event))
+       rettuple = trigdata->tg_trigtuple;
+   else if (TRIGGER_FIRED_BY_UPDATE(trigdata->tg_event))
+       rettuple = trigdata->tg_newtuple;
+   else
+       elog(ERROR, "TSearch: Unknown event");
+
+   trigger = trigdata->tg_trigger;
+   rel = trigdata->tg_relation;
+
+   if (trigger->tgnargs < 2)
+       elog(ERROR, "TSearch: format tsearch2(tsvector_field, text_field1,...)");
+
+   numidxattr = SPI_fnumber(rel->rd_att, trigger->tgargs[0]);
+   if (numidxattr == SPI_ERROR_NOATTRIBUTE)
+       elog(ERROR, "TSearch: Can not find tsvector_field");
+
+   prs.lenwords = 32;
+   prs.curwords = 0;
+   prs.pos = 0;
+   prs.words = (WORD *) palloc(sizeof(WORD) * prs.lenwords);
+
+   /* find all words in indexable column */
+   for (i = 1; i < trigger->tgnargs; i++)
+   {
+       int         numattr;
+       Oid         oidtype;
+       Datum       txt_toasted;
+       bool        isnull;
+       text       *txt;
+
+       numattr = SPI_fnumber(rel->rd_att, trigger->tgargs[i]);
+       if (numattr == SPI_ERROR_NOATTRIBUTE)
+       {
+           funcoid=findFunc(trigger->tgargs[i]);
+           if ( funcoid==InvalidOid )
+               elog(ERROR,"TSearch: can't find function or field '%s'",trigger->tgargs[i]);
+           continue;
+       }
+       oidtype = SPI_gettypeid(rel->rd_att, numattr);
+       /* We assume char() and varchar() are binary-equivalent to text */
+       if (!(oidtype == TEXTOID ||
+             oidtype == VARCHAROID ||
+             oidtype == BPCHAROID))
+       {
+           elog(WARNING, "TSearch: '%s' is not of character type",
+                trigger->tgargs[i]);
+           continue;
+       }
+       txt_toasted = SPI_getbinval(rettuple, rel->rd_att, numattr, &isnull);
+       if (isnull)
+           continue;
+
+       if ( funcoid!=InvalidOid ) {
+           text *txttmp = (text *) DatumGetPointer( OidFunctionCall1(
+               funcoid,
+               PointerGetDatum(txt_toasted)
+           ));
+           txt = (text *) DatumGetPointer(PG_DETOAST_DATUM(PointerGetDatum(txttmp)));
+           if ( txt == txttmp )
+               txt_toasted = PointerGetDatum(txt);
+       } else
+            txt = (text *) DatumGetPointer(PG_DETOAST_DATUM(PointerGetDatum(txt_toasted)));
+
+       parsetext_v2(cfg, &prs, VARDATA(txt), VARSIZE(txt) - VARHDRSZ);
+       if (txt != (text*)DatumGetPointer(txt_toasted) )
+           pfree(txt);
+   }
+
+   /* make tsvector value */
+   if (prs.curwords)
+   {
+       datum = PointerGetDatum(makevalue(&prs));
+       rettuple = SPI_modifytuple(rel, rettuple, 1, &numidxattr,
+                                  &datum, NULL);
+       pfree(DatumGetPointer(datum));
+   }
+   else
+   {
+       tsvector *out = palloc(CALCDATASIZE(0,0));
+       out->len = CALCDATASIZE(0,0);
+       out->size = 0;
+       datum = PointerGetDatum(out);
+       pfree(prs.words);
+       rettuple = SPI_modifytuple(rel, rettuple, 1, &numidxattr,
+                                  &datum, NULL);
+   }
+
+   if (rettuple == NULL)
+       elog(ERROR, "TSearch: %d returned by SPI_modifytuple", SPI_result);
+
+   return PointerGetDatum(rettuple);
+}


diff --git a/contrib/tsearch2/tsvector.h b/contrib/tsearch2/tsvector.h

new file mode 100644 (file)

index 0000000..31e6a4b


--- /dev/null
+++ b/contrib/tsearch2/tsvector.h
@@ -0,0 +1,71 @@
+#ifndef __TXTIDX_H__
+#define __TXTIDX_H__
+
+/*
+#define TXTIDX_DEBUG
+*/
+
+#include "postgres.h"
+
+#include "access/gist.h"
+#include "access/itup.h"
+#include "utils/elog.h"
+#include "utils/palloc.h"
+#include "utils/builtins.h"
+#include "storage/bufpage.h"
+
+typedef struct {
+   uint32
+       haspos:1,
+       len:11, /* MAX 2Kb */
+       pos:20; /* MAX 1Mb */
+}  WordEntry;
+#define MAXSTRLEN ( 1<<11 )
+#define MAXSTRPOS ( 1<<20 )
+
+typedef struct {
+   uint16
+       weight:2,
+       pos:14;
+} WordEntryPos;
+#define MAXENTRYPOS    (1<<14)
+#define MAXNUMPOS  256
+#define LIMITPOS(x)    ( ( (x) >= MAXENTRYPOS ) ? (MAXENTRYPOS-1) : (x) )
+
+typedef struct
+{
+   int4        len;
+   int4        size;
+   char        data[1];
+}  tsvector;
+
+#define DATAHDRSIZE (sizeof(int4)*2)
+#define CALCDATASIZE(x, lenstr) ( x * sizeof(WordEntry) + DATAHDRSIZE + lenstr )
+#define ARRPTR(x)  ( (WordEntry*) ( (char*)x + DATAHDRSIZE ) )
+#define STRPTR(x)  ( (char*)x + DATAHDRSIZE + ( sizeof(WordEntry) * ((tsvector*)x)->size ) )
+#define STRSIZE(x) ( ((tsvector*)x)->len - DATAHDRSIZE - ( sizeof(WordEntry) * ((tsvector*)x)->size ) )
+#define _POSDATAPTR(x,e)   (STRPTR(x)+((WordEntry*)(e))->pos+SHORTALIGN(((WordEntry*)(e))->len))
+#define POSDATALEN(x,e) ( ( ((WordEntry*)(e))->haspos ) ? (*(uint16*)_POSDATAPTR(x,e)) : 0 ) 
+#define POSDATAPTR(x,e)    ( (WordEntryPos*)( _POSDATAPTR(x,e)+sizeof(uint16) ) )
+
+
+typedef struct {
+   WordEntry   entry;
+   WordEntryPos    *pos;
+}  WordEntryIN;
+
+typedef struct
+{
+   char       *prsbuf;
+   char       *word;
+   char       *curpos;
+   int4        len;
+   int4        state;
+   int4        alen;
+   WordEntryPos    *pos;
+   bool        oprisdelim;
+}  TI_IN_STATE;
+
+int4       gettoken_tsvector(TI_IN_STATE * state);
+
+#endif


diff --git a/contrib/tsearch2/tsvector_op.c b/contrib/tsearch2/tsvector_op.c

new file mode 100644 (file)

index 0000000..3f38014


--- /dev/null
+++ b/contrib/tsearch2/tsvector_op.c
@@ -0,0 +1,264 @@
+/*
+ * Operations for tsvector type
+ * Teodor Sigaev 
+ */
+#include "postgres.h"
+
+#include "access/gist.h"
+#include "access/itup.h"
+#include "utils/elog.h"
+#include "utils/palloc.h"
+#include "utils/builtins.h"
+#include "storage/bufpage.h"
+#include "executor/spi.h"
+#include "commands/trigger.h"
+#include "nodes/pg_list.h"
+#include "catalog/namespace.h"
+
+#include "utils/pg_locale.h"
+
+#include              /* tolower */
+#include "tsvector.h"
+#include "query.h"
+#include "ts_cfg.h"
+#include "common.h"
+
+PG_FUNCTION_INFO_V1(strip);
+Datum      strip(PG_FUNCTION_ARGS);
+
+PG_FUNCTION_INFO_V1(setweight);
+Datum      setweight(PG_FUNCTION_ARGS);
+
+PG_FUNCTION_INFO_V1(concat);
+Datum      concat(PG_FUNCTION_ARGS);
+
+Datum
+strip(PG_FUNCTION_ARGS)
+{
+   tsvector       *in = (tsvector *) PG_DETOAST_DATUM(PG_GETARG_DATUM(0));
+   tsvector    *out;
+   int i,len=0;
+   WordEntry *arrin=ARRPTR(in), *arrout;
+   char *cur;
+
+   for(i=0;isize;i++) 
+       len += SHORTALIGN( arrin[i].len );
+
+   len = CALCDATASIZE(in->size, len);
+   out=(tsvector*)palloc(len);
+   memset(out,0,len);
+   out->len=len;
+   out->size=in->size;
+   arrout=ARRPTR(out);
+   cur=STRPTR(out);
+   for(i=0;isize;i++) {
+       memcpy(cur, STRPTR(in)+arrin[i].pos, arrin[i].len);
+       arrout[i].haspos = 0;
+       arrout[i].len = arrin[i].len;
+       arrout[i].pos = cur - STRPTR(out);
+       cur += SHORTALIGN( arrout[i].len );
+   }
+       
+   PG_FREE_IF_COPY(in, 0);
+   PG_RETURN_POINTER(out);
+}
+
+Datum
+setweight(PG_FUNCTION_ARGS)
+{
+   tsvector       *in = (tsvector *) PG_DETOAST_DATUM(PG_GETARG_DATUM(0));
+   char       cw = PG_GETARG_CHAR(1);
+   tsvector    *out;
+   int i,j;
+   WordEntry *entry;
+   WordEntryPos *p;
+   int w=0;
+
+   switch(tolower(cw)) {
+       case 'a': w=3; break;
+       case 'b': w=2; break;
+       case 'c': w=1; break;
+       case 'd': w=0; break;
+       default: elog(ERROR,"Unknown weight");
+   }
+
+   out=(tsvector*)palloc(in->len);
+   memcpy(out,in,in->len);
+   entry=ARRPTR(out);
+   i=out->size;    
+   while(i--) {
+       if ( (j=POSDATALEN(out,entry)) != 0 ) {
+           p=POSDATAPTR(out,entry);
+           while(j--) {
+               p->weight=w;
+               p++;
+           }
+       }
+       entry++;
+   }
+       
+   PG_FREE_IF_COPY(in, 0);
+   PG_RETURN_POINTER(out);
+}
+
+static int
+compareEntry(char *ptra, WordEntry* a, char *ptrb, WordEntry* b)
+{
+        if ( a->len == b->len)
+        {
+                return strncmp(
+                                           ptra + a->pos,
+                                           ptrb + b->pos,
+                                           a->len);
+        }
+        return ( a->len > b->len ) ? 1 : -1;
+}
+
+static int4
+add_pos(tsvector *src, WordEntry *srcptr, tsvector *dest, WordEntry *destptr, int4 maxpos ) {
+   uint16 *clen = (uint16*)_POSDATAPTR(dest,destptr);
+   int i;
+   uint16 slen = POSDATALEN(src, srcptr), startlen;
+   WordEntryPos *spos=POSDATAPTR(src, srcptr), *dpos=POSDATAPTR(dest,destptr);
+
+   if ( ! destptr->haspos ) 
+       *clen=0;
+
+   startlen = *clen;
+   for(i=0; i
+       dpos[ *clen ].weight = spos[i].weight; 
+       dpos[ *clen ].pos    = LIMITPOS(spos[i].pos + maxpos);
+       (*clen)++;
+   }
+
+   if ( *clen != startlen )
+       destptr->haspos=1; 
+   return  *clen - startlen;
+}
+
+
+Datum
+concat(PG_FUNCTION_ARGS) {
+   tsvector       *in1 = (tsvector *) PG_DETOAST_DATUM(PG_GETARG_DATUM(0));
+   tsvector       *in2 = (tsvector *) PG_DETOAST_DATUM(PG_GETARG_DATUM(1));
+   tsvector       *out;
+   WordEntry *ptr;
+   WordEntry *ptr1,*ptr2;
+   WordEntryPos *p;
+   int maxpos=0,i,j,i1,i2;
+   char *cur;
+   char *data,*data1,*data2;
+
+   ptr=ARRPTR(in1);
+   i=in1->size;
+   while(i--) {
+       if ( (j=POSDATALEN(in1,ptr)) != 0 ) {
+           p=POSDATAPTR(in1,ptr);
+           while(j--) {
+               if ( p->pos > maxpos ) 
+                   maxpos = p->pos;
+               p++;
+           }
+       }
+       ptr++;
+   }
+   
+   ptr1=ARRPTR(in1); ptr2=ARRPTR(in2);
+   data1=STRPTR(in1); data2=STRPTR(in2);
+   i1=in1->size;   i2=in2->size;
+   out=(tsvector*)palloc( in1->len + in2->len );
+   memset(out,0,in1->len + in2->len);
+   out->len = in1->len + in2->len;
+   out->size = in1->size + in2->size;
+   data=cur=STRPTR(out);
+   ptr=ARRPTR(out);
+   while( i1 && i2 ) {
+       int cmp=compareEntry(data1,ptr1,data2,ptr2);
+       if ( cmp < 0 ) { /* in1 first */
+           ptr->haspos = ptr1->haspos;
+           ptr->len = ptr1->len;
+           memcpy( cur, data1 + ptr1->pos, ptr1->len );
+           ptr->pos = cur - data; 
+           cur+=SHORTALIGN(ptr1->len);
+           if ( ptr->haspos ) {
+               memcpy(cur, _POSDATAPTR(in1, ptr1), POSDATALEN(in1, ptr1)*sizeof(WordEntryPos) + sizeof(uint16));
+               cur+=POSDATALEN(in1, ptr1)*sizeof(WordEntryPos) + sizeof(uint16);
+           }
+           ptr++; ptr1++; i1--;
+       } else if ( cmp>0 ) { /* in2 first */ 
+           ptr->haspos = ptr2->haspos;
+           ptr->len = ptr2->len;
+           memcpy( cur, data2 + ptr2->pos, ptr2->len );
+           ptr->pos = cur - data; 
+           cur+=SHORTALIGN(ptr2->len);
+           if ( ptr->haspos ) {
+               int addlen = add_pos(in2, ptr2, out, ptr, maxpos );
+               if ( addlen == 0 )
+                   ptr->haspos=0;
+               else
+                   cur += addlen*sizeof(WordEntryPos) + sizeof(uint16); 
+           }
+           ptr++; ptr2++; i2--;
+       } else {
+           ptr->haspos = ptr1->haspos | ptr2->haspos;
+           ptr->len = ptr1->len;
+           memcpy( cur, data1 + ptr1->pos, ptr1->len );
+           ptr->pos = cur - data; 
+           cur+=SHORTALIGN(ptr1->len);
+           if ( ptr->haspos ) {
+               if ( ptr1->haspos ) {
+                   memcpy(cur, _POSDATAPTR(in1, ptr1), POSDATALEN(in1, ptr1)*sizeof(WordEntryPos) + sizeof(uint16));
+                   cur+=POSDATALEN(in1, ptr1)*sizeof(WordEntryPos) + sizeof(uint16);
+                   if ( ptr2->haspos )
+                       cur += add_pos(in2, ptr2, out, ptr, maxpos )*sizeof(WordEntryPos);
+               } else if ( ptr2->haspos ) {
+                   int addlen = add_pos(in2, ptr2, out, ptr, maxpos );
+                   if ( addlen == 0 )
+                       ptr->haspos=0;
+                   else
+                       cur += addlen*sizeof(WordEntryPos) + sizeof(uint16); 
+               }
+           }
+           ptr++; ptr1++; ptr2++; i1--; i2--;
+       }
+   }
+
+   while(i1) {
+       ptr->haspos = ptr1->haspos;
+       ptr->len = ptr1->len;
+       memcpy( cur, data1 + ptr1->pos, ptr1->len );
+       ptr->pos = cur - data; 
+       cur+=SHORTALIGN(ptr1->len);
+       if ( ptr->haspos ) {
+           memcpy(cur, _POSDATAPTR(in1, ptr1), POSDATALEN(in1, ptr1)*sizeof(WordEntryPos) + sizeof(uint16));
+           cur+=POSDATALEN(in1, ptr1)*sizeof(WordEntryPos) + sizeof(uint16);
+       }
+       ptr++; ptr1++; i1--;
+   }
+
+   while(i2) {
+       ptr->haspos = ptr2->haspos;
+       ptr->len = ptr2->len;
+       memcpy( cur, data2 + ptr2->pos, ptr2->len );
+       ptr->pos = cur - data; 
+       cur+=SHORTALIGN(ptr2->len);
+       if ( ptr->haspos ) {
+           int addlen = add_pos(in2, ptr2, out, ptr, maxpos );
+           if ( addlen == 0 )
+               ptr->haspos=0;
+           else
+               cur += addlen*sizeof(WordEntryPos) + sizeof(uint16); 
+       }
+       ptr++; ptr2++; i2--;
+   }
+   
+   out->size=ptr-ARRPTR(out);
+   out->len = CALCDATASIZE( out->size, cur-data );
+   if ( data != STRPTR(out) )
+       memmove( STRPTR(out), data, cur-data );
+
+   PG_FREE_IF_COPY(in1, 0);
+   PG_FREE_IF_COPY(in2, 1);
+   PG_RETURN_POINTER(out);
+}
+


diff --git a/contrib/tsearch2/untsearch.sql.in b/contrib/tsearch2/untsearch.sql.in

new file mode 100644 (file)

index 0000000..a4fe145


--- /dev/null
+++ b/contrib/tsearch2/untsearch.sql.in
@@ -0,0 +1,62 @@
+BEGIN;
+
+--Be careful !!!
+--script drops all indices, triggers and columns with types defined
+--in tsearch2.sql
+
+
+DROP OPERATOR CLASS gist_tsvector_ops USING gist CASCADE;
+
+
+DROP OPERATOR || (tsvector, tsvector);
+DROP OPERATOR @@ (tsvector, tsquery);
+DROP OPERATOR @@ (tsquery, tsvector);
+
+DROP AGGREGATE stat(tsvector);
+
+DROP TABLE pg_ts_dict;
+DROP TABLE pg_ts_parser;
+DROP TABLE pg_ts_cfg;
+DROP TABLE pg_ts_cfgmap;
+
+DROP TYPE tokentype CASCADE;
+DROP TYPE tokenout CASCADE;
+DROP TYPE tsvector CASCADE;
+DROP TYPE tsquery CASCADE;
+DROP TYPE gtsvector CASCADE;
+DROP TYPE tsstat CASCADE;
+DROP TYPE statinfo CASCADE;
+
+DROP FUNCTION lexize(oid, text) ;
+DROP FUNCTION lexize(text, text);
+DROP FUNCTION lexize(text);
+DROP FUNCTION set_curdict(int);
+DROP FUNCTION set_curdict(text);
+DROP FUNCTION dex_init(text);
+DROP FUNCTION dex_lexize(internal,internal,int4);
+DROP FUNCTION snb_en_init(text);
+DROP FUNCTION snb_lexize(internal,internal,int4);
+DROP FUNCTION snb_ru_init(text);
+DROP FUNCTION spell_init(text);
+DROP FUNCTION spell_lexize(internal,internal,int4);
+DROP FUNCTION syn_init(text);
+DROP FUNCTION syn_lexize(internal,internal,int4);
+DROP FUNCTION set_curprs(int);
+DROP FUNCTION set_curprs(text);
+DROP FUNCTION prsd_start(internal,int4);
+DROP FUNCTION prsd_getlexeme(internal,internal,internal);
+DROP FUNCTION prsd_end(internal);
+DROP FUNCTION prsd_lextype(internal);
+DROP FUNCTION prsd_headline(internal,internal,internal);
+DROP FUNCTION set_curcfg(int);
+DROP FUNCTION set_curcfg(text);
+DROP FUNCTION show_curcfg();
+DROP FUNCTION gtsvector_compress(internal);
+DROP FUNCTION gtsvector_decompress(internal);
+DROP FUNCTION gtsvector_penalty(internal,internal,internal);
+DROP FUNCTION gtsvector_picksplit(internal, internal);
+DROP FUNCTION gtsvector_union(bytea, internal);
+DROP FUNCTION reset_tsearch();
+DROP FUNCTION tsearch2() CASCADE;
+
+END;


diff --git a/contrib/tsearch2/wordparser/deflex.c b/contrib/tsearch2/wordparser/deflex.c

new file mode 100644 (file)

index 0000000..ea596c5


--- /dev/null
+++ b/contrib/tsearch2/wordparser/deflex.c
@@ -0,0 +1,56 @@
+#include "deflex.h"
+
+const char *lex_descr[]={
+   "",
+   "Latin word",
+   "Non-latin word",
+   "Word",
+   "Email",
+   "URL",
+   "Host",
+   "Scientific notation",
+   "VERSION",
+   "Part of hyphenated word",
+   "Non-latin part of hyphenated word",
+   "Latin part of hyphenated word",
+   "Space symbols",
+   "HTML Tag",
+   "HTTP head",
+   "Hyphenated word",
+   "Latin hyphenated word",
+   "Non-latin hyphenated word",
+   "URI",
+   "File or path name",
+   "Decimal notation",
+   "Signed integer",
+   "Unsigned integer",
+   "HTML Entity"
+};
+
+const char *tok_alias[]={
+   "",
+   "lword",
+   "nlword",
+   "word",
+   "email",
+   "url",
+   "host",
+   "sfloat",
+   "version",
+   "part_hword",
+   "nlpart_hword",
+   "lpart_hword",
+   "blank",
+   "tag",
+   "http",
+   "hword",
+   "lhword",
+   "nlhword",
+   "uri",
+   "file",
+   "float",
+   "int",
+   "uint",
+   "entity"
+};
+


diff --git a/contrib/tsearch2/wordparser/deflex.h b/contrib/tsearch2/wordparser/deflex.h

new file mode 100644 (file)

index 0000000..651d1f9


--- /dev/null
+++ b/contrib/tsearch2/wordparser/deflex.h
@@ -0,0 +1,34 @@
+#ifndef __DEFLEX_H__
+#define __DEFLEX_H__
+
+/* rememder !!!! */
+#define LASTNUM        23
+
+#define LATWORD        1
+#define CYRWORD        2
+#define UWORD      3
+#define EMAIL      4
+#define FURL       5
+#define HOST       6
+#define SCIENTIFIC 7
+#define VERSIONNUMBER  8
+#define PARTHYPHENWORD 9
+#define CYRPARTHYPHENWORD  10
+#define LATPARTHYPHENWORD  11
+#define SPACE      12
+#define TAG            13
+#define HTTP       14
+#define HYPHENWORD 15
+#define LATHYPHENWORD  16
+#define CYRHYPHENWORD  17
+#define URI        18
+#define FILEPATH   19
+#define DECIMAL        20
+#define SIGNEDINT  21
+#define UNSIGNEDINT 22
+#define HTMLENTITY 23
+
+extern const char *lex_descr[];
+extern const char *tok_alias[];
+
+#endif


diff --git a/contrib/tsearch2/wordparser/parser.h b/contrib/tsearch2/wordparser/parser.h

new file mode 100644 (file)

index 0000000..55cf005


--- /dev/null
+++ b/contrib/tsearch2/wordparser/parser.h
@@ -0,0 +1,11 @@
+#ifndef __PARSER_H__
+#define __PARSER_H__
+
+char      *token;
+int            tokenlen;
+int            tsearch2_yylex(void);
+void       start_parse_str(char *, int);
+void       start_parse_fh(FILE *, int);
+void       end_parse(void);
+
+#endif


diff --git a/contrib/tsearch2/wordparser/parser.l b/contrib/tsearch2/wordparser/parser.l

new file mode 100644 (file)

index 0000000..49824f5


--- /dev/null
+++ b/contrib/tsearch2/wordparser/parser.l
@@ -0,0 +1,346 @@
+%{
+#include "postgres.h"
+
+#include "deflex.h"
+#include "parser.h"
+#include "common.h"
+
+/* Avoid exit() on fatal scanner errors */
+#define fprintf(file, fmt, msg)  ts_error(ERROR, fmt, msg)
+
+/* postgres allocation function */
+#define free    pfree
+#define malloc  palloc
+#define realloc repalloc
+
+#ifdef strdup
+#undef strdup
+#endif
+#define strdup  pstrdup
+
+char *token = NULL;  /* pointer to token */
+char *s     = NULL;  /* to return WHOLE hyphenated-word */
+
+YY_BUFFER_STATE buf = NULL; /* buffer to parse; it need for parse from string */
+
+int lrlimit = -1;  /* for limiting read from filehandle ( -1 - unlimited read ) */
+int bytestoread = 0;   /* for limiting read from filehandle */
+
+/* redefine macro for read limited length */
+#define YY_INPUT(buf,result,max_size) \
+   if ( yy_current_buffer->yy_is_interactive ) { \
+                int c = '*', n; \
+                for ( n = 0; n < max_size && \
+                             (c = getc( tsearch2_yyin )) != EOF && c != '\n'; ++n ) \
+                        buf[n] = (char) c; \
+                if ( c == '\n' ) \
+                        buf[n++] = (char) c; \
+                if ( c == EOF && ferror( tsearch2_yyin ) ) \
+                        YY_FATAL_ERROR( "input in flex scanner failed" ); \
+                result = n; \
+        }  else { \
+       if ( lrlimit == 0 ) \
+           result=YY_NULL; \
+       else { \
+           if ( lrlimit>0 ) { \
+               bytestoread = ( lrlimit > max_size ) ? max_size : lrlimit; \
+               lrlimit -= bytestoread; \
+           } else \
+               bytestoread = max_size; \
+               if ( ((result = fread( buf, 1, bytestoread, tsearch2_yyin )) == 0) \
+                       && ferror( tsearch2_yyin ) ) \
+                       YY_FATAL_ERROR( "input in flex scanner failed" ); \
+       } \
+   }
+
+%}
+
+%option 8bit
+%option never-interactive
+%option nounput
+%option noyywrap
+
+/* parser's state for parsing hyphenated-word */
+%x DELIM  
+/* parser's state for parsing URL*/
+%x URL  
+%x SERVER  
+
+/* parser's state for parsing TAGS */
+%x INTAG
+%x QINTAG
+%x INCOMMENT
+%x INSCRIPT
+
+/* cyrillic koi8 char */
+CYRALNUM   [0-9\200-\377]
+CYRALPHA   [\200-\377]
+ALPHA      [a-zA-Z\200-\377]
+ALNUM      [0-9a-zA-Z\200-\377]
+
+
+HOSTNAME   ([-_[:alnum:]]+\.)+[[:alpha:]]+
+URI        [-_[:alnum:]/%,\.;=&?#]+
+
+%%
+
+"<"[Ss][Cc][Rr][Ii][Pp][Tt] { BEGIN INSCRIPT; }
+
+"" {
+   BEGIN INITIAL; 
+   *tsearch2_yytext=' '; *(tsearch2_yytext+1) = '\0'; 
+   token = tsearch2_yytext;
+   tokenlen = tsearch2_yyleng;
+   return SPACE;
+}
+
+""   { 
+   BEGIN INITIAL;
+   *tsearch2_yytext=' '; *(tsearch2_yytext+1) = '\0'; 
+   token = tsearch2_yytext;
+   tokenlen = tsearch2_yyleng;
+   return SPACE;
+}
+
+
+"<"[\![:alpha:]]   { BEGIN INTAG; }
+
+"
+
+"\""    { BEGIN QINTAG; }
+
+"\\\"" ;
+
+"\""   { BEGIN INTAG; }
+
+">" { 
+   BEGIN INITIAL;
+   token = tsearch2_yytext;
+   *tsearch2_yytext=' '; 
+   token = tsearch2_yytext;
+   tokenlen = 1;
+   return TAG;
+}
+
+.|\n  ;
+
+\&(quot|amp|nbsp|lt|gt)\;   {
+   token = tsearch2_yytext;
+   tokenlen = tsearch2_yyleng;
+   return HTMLENTITY;
+}
+
+\&\#[0-9][0-9]?[0-9]?\; {
+   token = tsearch2_yytext;
+   tokenlen = tsearch2_yyleng;
+   return HTMLENTITY;
+}
+ 
+[-_\.[:alnum:]]+@{HOSTNAME}  /* Emails */ { 
+   token = tsearch2_yytext; 
+   tokenlen = tsearch2_yyleng;
+   return EMAIL; 
+}
+
+[+-]?[0-9]+(\.[0-9]+)?[eEdD][+-]?[0-9]+  /* float */   { 
+   token = tsearch2_yytext; 
+   tokenlen = tsearch2_yyleng;
+   return SCIENTIFIC; 
+}
+
+[0-9]+\.[0-9]+\.[0-9\.]*[0-9] {
+   token = tsearch2_yytext;
+   tokenlen = tsearch2_yyleng;
+   return VERSIONNUMBER;
+}
+
+[+-]?[0-9]+\.[0-9]+ {
+   token = tsearch2_yytext;
+   tokenlen = tsearch2_yyleng;
+   return DECIMAL;
+}
+
+[+-][0-9]+ { 
+   token = tsearch2_yytext; 
+   tokenlen = tsearch2_yyleng;
+   return SIGNEDINT; 
+}
+
+[0-9]+ { 
+   token = tsearch2_yytext; 
+   tokenlen = tsearch2_yyleng;
+   return UNSIGNEDINT; 
+}
+
+http"://"        { 
+   BEGIN URL; 
+   token = tsearch2_yytext;
+   tokenlen = tsearch2_yyleng;
+   return HTTP;
+}
+
+ftp"://"        { 
+   BEGIN URL; 
+   token = tsearch2_yytext;
+   tokenlen = tsearch2_yyleng;
+   return HTTP;
+}
+
+{HOSTNAME}[/:]{URI} { 
+   BEGIN SERVER;
+   if (s) { free(s); s=NULL; } 
+   s = strdup( tsearch2_yytext ); 
+   tokenlen = tsearch2_yyleng;
+   yyless( 0 ); 
+   token = s;
+   return FURL;
+}
+
+{HOSTNAME} {
+   token = tsearch2_yytext; 
+   tokenlen = tsearch2_yyleng;
+   return HOST;
+}
+
+[/:]{URI}  {
+   token = tsearch2_yytext;
+   tokenlen = tsearch2_yyleng;
+   return URI;
+}
+
+[[:alnum:]\./_-]+"/"[[:alnum:]\./_-]+ {
+   token = tsearch2_yytext;
+   tokenlen = tsearch2_yyleng;
+   return FILEPATH;
+}
+
+({CYRALPHA}+-)+{CYRALPHA}+ /* composite-word */    {
+   BEGIN DELIM;
+   if (s) { free(s); s=NULL; } 
+   s = strdup( tsearch2_yytext );
+   tokenlen = tsearch2_yyleng;
+   yyless( 0 );
+   token = s;
+   return CYRHYPHENWORD;
+}
+
+([[:alpha:]]+-)+[[:alpha:]]+ /* composite-word */  {
+    BEGIN DELIM;
+   if (s) { free(s); s=NULL; } 
+   s = strdup( tsearch2_yytext );
+   tokenlen = tsearch2_yyleng;
+   yyless( 0 );
+   token = s;
+   return LATHYPHENWORD;
+}
+
+({ALNUM}+-)+{ALNUM}+ /* composite-word */  {
+   BEGIN DELIM;
+   if (s) { free(s); s=NULL; } 
+   s = strdup( tsearch2_yytext );
+   tokenlen = tsearch2_yyleng;
+   yyless( 0 );
+   token = s;
+   return HYPHENWORD;
+}
+
+[0-9]+\.[0-9]+\.[0-9\.]*[0-9] {
+   token = tsearch2_yytext;
+   tokenlen = tsearch2_yyleng;
+   return VERSIONNUMBER;
+}
+
+\+?[0-9]+\.[0-9]+ {
+   token = tsearch2_yytext;
+   tokenlen = tsearch2_yyleng;
+   return DECIMAL;
+}
+
+{CYRALPHA}+  /* one word in composite-word */   { 
+   token = tsearch2_yytext; 
+   tokenlen = tsearch2_yyleng;
+   return CYRPARTHYPHENWORD; 
+}
+
+[[:alpha:]]+  /* one word in composite-word */  { 
+   token = tsearch2_yytext; 
+   tokenlen = tsearch2_yyleng;
+   return LATPARTHYPHENWORD; 
+}
+
+{ALNUM}+  /* one word in composite-word */  { 
+   token = tsearch2_yytext; 
+   tokenlen = tsearch2_yyleng;
+   return PARTHYPHENWORD; 
+}
+
+-  { 
+   token = tsearch2_yytext;
+   tokenlen = tsearch2_yyleng;
+   return SPACE;
+}
+
+.|\n /* return in basic state */ {
+   BEGIN INITIAL;
+   yyless( 0 );
+}
+
+{CYRALPHA}+ /* normal word */  { 
+   token = tsearch2_yytext; 
+   tokenlen = tsearch2_yyleng;
+   return CYRWORD; 
+}
+
+[[:alpha:]]+ /* normal word */ { 
+   token = tsearch2_yytext; 
+   tokenlen = tsearch2_yyleng;
+   return LATWORD; 
+}
+
+{ALNUM}+ /* normal word */ { 
+   token = tsearch2_yytext; 
+   tokenlen = tsearch2_yyleng;
+   return UWORD; 
+}
+
+[ \r\n\t]+ {
+   token = tsearch2_yytext;
+   tokenlen = tsearch2_yyleng;
+   return SPACE;
+}
+
+. {
+   token = tsearch2_yytext;
+   tokenlen = tsearch2_yyleng;
+   return SPACE;
+} 
+
+%%
+
+/* clearing after parsing from string */
+void end_parse() {
+   if (s) { free(s); s=NULL; } 
+   tsearch2_yy_delete_buffer( buf );
+   buf = NULL;
+} 
+
+/* start parse from string */
+void start_parse_str(char* str, int limit) {
+   if (buf) end_parse();
+   buf = tsearch2_yy_scan_bytes( str, limit );
+   tsearch2_yy_switch_to_buffer( buf );
+   BEGIN INITIAL;
+}
+
+/* start parse from filehandle */
+void start_parse_fh( FILE* fh, int limit ) {
+   if (buf) end_parse();
+   lrlimit = ( limit ) ? limit : -1;
+   buf = tsearch2_yy_create_buffer( fh, YY_BUF_SIZE );
+   tsearch2_yy_switch_to_buffer( buf );
+   BEGIN INITIAL;
+}
+
+


diff --git a/contrib/tsearch2/wparser.c b/contrib/tsearch2/wparser.c

new file mode 100644 (file)

index 0000000..deff94c


--- /dev/null
+++ b/contrib/tsearch2/wparser.c
@@ -0,0 +1,529 @@
+/* 
+ * interface functions to parser 
+ * Teodor Sigaev 
+ */
+#include 
+#include 
+#include 
+#include 
+
+#include "postgres.h"
+#include "fmgr.h"
+#include "utils/array.h"
+#include "catalog/pg_type.h"
+#include "executor/spi.h"
+#include "funcapi.h"
+
+#include "wparser.h"
+#include "ts_cfg.h"
+#include "snmap.h"
+#include "common.h"
+
+/*********top interface**********/
+
+static void *plan_getparser=NULL;
+static Oid current_parser_id=InvalidOid;
+
+void
+init_prs(Oid id, WParserInfo *prs) {
+   Oid arg[1]={ OIDOID };
+   bool isnull;
+   Datum pars[1]={ ObjectIdGetDatum(id) };
+   int stat;
+
+   memset(prs,0,sizeof(WParserInfo));
+   SPI_connect();
+   if ( !plan_getparser ) {
+       plan_getparser = SPI_saveplan( SPI_prepare( "select prs_start, prs_nexttoken, prs_end, prs_lextype, prs_headline from pg_ts_parser where oid = $1" , 1, arg ) );
+       if ( !plan_getparser ) 
+           ts_error(ERROR, "SPI_prepare() failed");
+   }
+
+   stat = SPI_execp(plan_getparser, pars, " ", 1);
+   if ( stat < 0 )
+       ts_error (ERROR, "SPI_execp return %d", stat);
+   if ( SPI_processed > 0 ) {
+       Oid oid=InvalidOid;
+       oid=DatumGetObjectId( SPI_getbinval(SPI_tuptable->vals[0], SPI_tuptable->tupdesc, 1, &isnull) );
+       fmgr_info_cxt(oid, &(prs->start_info), TopMemoryContext);
+       oid=DatumGetObjectId( SPI_getbinval(SPI_tuptable->vals[0], SPI_tuptable->tupdesc, 2, &isnull) );
+       fmgr_info_cxt(oid, &(prs->getlexeme_info), TopMemoryContext);
+       oid=DatumGetObjectId( SPI_getbinval(SPI_tuptable->vals[0], SPI_tuptable->tupdesc, 3, &isnull) );
+       fmgr_info_cxt(oid, &(prs->end_info), TopMemoryContext);
+       prs->lextype=DatumGetObjectId( SPI_getbinval(SPI_tuptable->vals[0], SPI_tuptable->tupdesc, 4, &isnull) );
+       oid=DatumGetObjectId( SPI_getbinval(SPI_tuptable->vals[0], SPI_tuptable->tupdesc, 5, &isnull) );
+       fmgr_info_cxt(oid, &(prs->headline_info), TopMemoryContext);
+       prs->prs_id=id;
+   } else 
+       ts_error(ERROR, "No parser with id %d", id);
+   SPI_finish();
+}
+
+typedef struct {
+   WParserInfo *last_prs;
+   int     len;
+   int     reallen;
+   WParserInfo *list;
+   SNMap       name2id_map;
+} PrsList;
+
+static PrsList PList = {NULL,0,0,NULL,{0,0,NULL}};
+
+void    
+reset_prs(void) {
+   freeSNMap( &(PList.name2id_map) );
+   if ( PList.list )
+       free(PList.list);
+   memset(&PList,0,sizeof(PrsList));
+}
+
+static int
+compareprs(const void *a, const void *b) {
+   return ((WParserInfo*)a)->prs_id - ((WParserInfo*)b)->prs_id;
+}
+
+WParserInfo *
+findprs(Oid id) {
+   /* last used prs */
+   if ( PList.last_prs && PList.last_prs->prs_id==id )
+       return PList.last_prs;
+
+   /* already used prs */
+   if ( PList.len != 0 ) {
+       WParserInfo key;
+       key.prs_id=id;
+       PList.last_prs = bsearch(&key, PList.list, PList.len, sizeof(WParserInfo), compareprs);
+       if ( PList.last_prs != NULL )
+           return PList.last_prs;
+   }
+
+   /* last chance */
+   if ( PList.len==PList.reallen ) {
+       WParserInfo *tmp;
+       int reallen = ( PList.reallen ) ? 2*PList.reallen : 16;
+       tmp=(WParserInfo*)realloc(PList.list,sizeof(WParserInfo)*reallen);
+       if ( !tmp ) 
+           ts_error(ERROR,"No memory");
+       PList.reallen=reallen;
+       PList.list=tmp;
+   }
+   PList.last_prs=&(PList.list[PList.len]);
+   init_prs(id, PList.last_prs);
+   PList.len++;
+   qsort(PList.list, PList.len, sizeof(WParserInfo), compareprs);
+   return findprs(id); /* qsort changed order!! */;
+}
+
+static void *plan_name2id=NULL;
+
+Oid
+name2id_prs(text *name) {
+   Oid arg[1]={ TEXTOID };
+   bool isnull;
+   Datum pars[1]={ PointerGetDatum(name) };
+   int stat;
+   Oid id=findSNMap_t( &(PList.name2id_map), name );
+   
+   if ( id ) 
+       return id;
+   
+
+   SPI_connect();
+   if ( !plan_name2id ) {
+       plan_name2id = SPI_saveplan( SPI_prepare( "select oid from pg_ts_parser where prs_name = $1" , 1, arg ) );
+       if ( !plan_name2id ) 
+           ts_error(ERROR, "SPI_prepare() failed");
+   }
+
+   stat = SPI_execp(plan_name2id, pars, " ", 1);
+   if ( stat < 0 )
+       ts_error (ERROR, "SPI_execp return %d", stat);
+   if ( SPI_processed > 0 )
+       id=DatumGetObjectId( SPI_getbinval(SPI_tuptable->vals[0], SPI_tuptable->tupdesc, 1, &isnull) );
+   else 
+       ts_error(ERROR, "No parser '%s'", text2char(name));
+   SPI_finish();
+   addSNMap_t( &(PList.name2id_map), name, id );
+   return id;
+}
+
+
+/******sql-level interface******/
+typedef struct {
+   int     cur;
+   LexDescr    *list;
+} TypeStorage;
+
+static void
+setup_firstcall(FuncCallContext  *funcctx, Oid prsid) {
+   TupleDesc            tupdesc;
+   MemoryContext     oldcontext;
+   TypeStorage     *st;
+   WParserInfo *prs = findprs(prsid); 
+
+   oldcontext = MemoryContextSwitchTo(funcctx->multi_call_memory_ctx);
+
+   st=(TypeStorage*)palloc( sizeof(TypeStorage) );
+   st->cur=0;
+   st->list = (LexDescr*)DatumGetPointer(
+       OidFunctionCall1( prs->lextype, PointerGetDatum(prs->prs) )
+   );
+   funcctx->user_fctx = (void*)st;
+   tupdesc = RelationNameGetTupleDesc("tokentype");
+   funcctx->slot = TupleDescGetSlot(tupdesc);
+   funcctx->attinmeta = TupleDescGetAttInMetadata(tupdesc);
+   MemoryContextSwitchTo(oldcontext);
+}
+
+static Datum
+process_call(FuncCallContext  *funcctx) {
+   TypeStorage     *st;
+
+   st=(TypeStorage*)funcctx->user_fctx;
+   if (  st->list && st->list[st->cur].lexid ) {
+       Datum result;
+       char* values[3];
+       char    txtid[16];
+       HeapTuple    tuple;
+
+       values[0]=txtid;
+       sprintf(txtid,"%d",st->list[st->cur].lexid);
+       values[1]=st->list[st->cur].alias;
+       values[2]=st->list[st->cur].descr;
+
+       tuple = BuildTupleFromCStrings(funcctx->attinmeta, values);
+       result = TupleGetDatum(funcctx->slot, tuple);
+
+       pfree(values[1]);
+       pfree(values[2]);
+       st->cur++;
+       return result;
+   } else {
+       if ( st->list ) pfree(st->list);
+       pfree(st);
+   }
+   return (Datum)0;
+}
+
+PG_FUNCTION_INFO_V1(token_type);
+Datum token_type(PG_FUNCTION_ARGS);
+
+Datum
+token_type(PG_FUNCTION_ARGS) {
+   FuncCallContext  *funcctx;
+   Datum result;
+
+   if (SRF_IS_FIRSTCALL()) { 
+       funcctx = SRF_FIRSTCALL_INIT();
+       setup_firstcall(funcctx, PG_GETARG_OID(0) );
+   }
+
+   funcctx = SRF_PERCALL_SETUP();
+
+   if (  (result=process_call(funcctx)) != (Datum)0 )
+       SRF_RETURN_NEXT(funcctx, result);
+   SRF_RETURN_DONE(funcctx);
+}
+
+PG_FUNCTION_INFO_V1(token_type_byname);
+Datum token_type_byname(PG_FUNCTION_ARGS);
+Datum
+token_type_byname(PG_FUNCTION_ARGS) {
+   FuncCallContext  *funcctx;
+   Datum result;
+
+   if (SRF_IS_FIRSTCALL()) {
+       text *name = PG_GETARG_TEXT_P(0); 
+       funcctx = SRF_FIRSTCALL_INIT();
+       setup_firstcall(funcctx, name2id_prs( name ) );
+       PG_FREE_IF_COPY(name,0);
+   }
+
+   funcctx = SRF_PERCALL_SETUP();
+
+   if (  (result=process_call(funcctx)) != (Datum)0 )
+       SRF_RETURN_NEXT(funcctx, result);
+   SRF_RETURN_DONE(funcctx);
+}
+
+PG_FUNCTION_INFO_V1(token_type_current);
+Datum token_type_current(PG_FUNCTION_ARGS);
+Datum
+token_type_current(PG_FUNCTION_ARGS) {
+   FuncCallContext  *funcctx;
+   Datum result;
+
+   if (SRF_IS_FIRSTCALL()) {
+       funcctx = SRF_FIRSTCALL_INIT();
+       if ( current_parser_id==InvalidOid ) 
+           current_parser_id = name2id_prs( char2text("default") );
+       setup_firstcall(funcctx, current_parser_id );
+   }
+
+   funcctx = SRF_PERCALL_SETUP();
+
+   if (  (result=process_call(funcctx)) != (Datum)0 )
+       SRF_RETURN_NEXT(funcctx, result);
+   SRF_RETURN_DONE(funcctx);
+}
+
+
+PG_FUNCTION_INFO_V1(set_curprs);
+Datum set_curprs(PG_FUNCTION_ARGS);
+Datum
+set_curprs(PG_FUNCTION_ARGS) {
+        findprs(PG_GETARG_OID(0));
+        current_parser_id=PG_GETARG_OID(0);
+        PG_RETURN_VOID();
+}
+
+PG_FUNCTION_INFO_V1(set_curprs_byname);
+Datum set_curprs_byname(PG_FUNCTION_ARGS);
+Datum
+set_curprs_byname(PG_FUNCTION_ARGS) {
+        text *name=PG_GETARG_TEXT_P(0);
+    
+        DirectFunctionCall1(
+                set_curprs,
+                ObjectIdGetDatum( name2id_prs(name) )
+        );
+        PG_FREE_IF_COPY(name, 0);
+        PG_RETURN_VOID();
+}
+
+typedef struct {
+   int type;
+   char    *lexem;
+} LexemEntry;
+
+typedef struct {
+   int cur;
+   int len;
+   LexemEntry  *list;
+} PrsStorage;
+   
+
+static void
+prs_setup_firstcall(FuncCallContext  *funcctx, int prsid, text *txt) {
+   TupleDesc            tupdesc;
+   MemoryContext     oldcontext;
+   PrsStorage  *st;
+   WParserInfo *prs = findprs(prsid); 
+   char    *lex=NULL;
+   int     llen=0, type=0; 
+
+   oldcontext = MemoryContextSwitchTo(funcctx->multi_call_memory_ctx);
+
+   st=(PrsStorage*)palloc( sizeof(PrsStorage) );
+   st->cur=0;
+   st->len=16;
+   st->list=(LexemEntry*)palloc( sizeof(LexemEntry)*st->len );
+
+   prs->prs = (void*)DatumGetPointer(
+       FunctionCall2(
+           &(prs->start_info),
+           PointerGetDatum(VARDATA(txt)),
+           Int32GetDatum(VARSIZE(txt)-VARHDRSZ)
+       )
+   );
+
+   while( ( type=DatumGetInt32(FunctionCall3(
+           &(prs->getlexeme_info),
+           PointerGetDatum(prs->prs),
+           PointerGetDatum(&lex),
+           PointerGetDatum(&llen))) ) != 0 ) {
+
+       if ( st->cur>=st->len ) {
+           st->len=2*st->len;
+           st->list=(LexemEntry*)repalloc(st->list, sizeof(LexemEntry)*st->len);
+       }
+       st->list[st->cur].lexem = palloc(llen+1);
+       memcpy( st->list[st->cur].lexem, lex, llen);
+       st->list[st->cur].lexem[llen]='\0';
+       st->list[st->cur].type=type;
+       st->cur++;
+   }
+       
+   FunctionCall1(
+       &(prs->end_info),
+       PointerGetDatum(prs->prs)
+   );
+
+   st->len=st->cur;
+   st->cur=0;
+   
+   funcctx->user_fctx = (void*)st;
+   tupdesc = RelationNameGetTupleDesc("tokenout");
+   funcctx->slot = TupleDescGetSlot(tupdesc);
+   funcctx->attinmeta = TupleDescGetAttInMetadata(tupdesc);
+   MemoryContextSwitchTo(oldcontext);
+}
+
+static Datum
+prs_process_call(FuncCallContext  *funcctx) {
+   PrsStorage  *st;
+
+   st=(PrsStorage*)funcctx->user_fctx;
+   if (  st->cur < st->len ) {
+       Datum result;
+       char* values[2];
+       char    tid[16];
+       HeapTuple    tuple;
+
+       values[0]=tid;
+       sprintf(tid,"%d",st->list[st->cur].type);
+       values[1]=st->list[st->cur].lexem;
+       tuple = BuildTupleFromCStrings(funcctx->attinmeta, values);
+       result = TupleGetDatum(funcctx->slot, tuple);
+
+       pfree(values[1]);
+       st->cur++;
+       return result;
+   } else {
+       if ( st->list ) pfree(st->list);
+       pfree(st);
+   }
+   return (Datum)0;
+}
+
+           
+
+PG_FUNCTION_INFO_V1(parse);
+Datum parse(PG_FUNCTION_ARGS);
+Datum
+parse(PG_FUNCTION_ARGS) {
+   FuncCallContext  *funcctx;
+   Datum result;
+
+   if (SRF_IS_FIRSTCALL()) {
+       text *txt = PG_GETARG_TEXT_P(1); 
+       funcctx = SRF_FIRSTCALL_INIT();
+       prs_setup_firstcall(funcctx, PG_GETARG_OID(0),txt );
+       PG_FREE_IF_COPY(txt,1);
+   }
+
+   funcctx = SRF_PERCALL_SETUP();
+
+   if (  (result=prs_process_call(funcctx)) != (Datum)0 )
+       SRF_RETURN_NEXT(funcctx, result);
+   SRF_RETURN_DONE(funcctx);
+}
+
+PG_FUNCTION_INFO_V1(parse_byname);
+Datum parse_byname(PG_FUNCTION_ARGS);
+Datum
+parse_byname(PG_FUNCTION_ARGS) {
+   FuncCallContext  *funcctx;
+   Datum result;
+
+   if (SRF_IS_FIRSTCALL()) {
+       text *name = PG_GETARG_TEXT_P(0); 
+       text *txt = PG_GETARG_TEXT_P(1); 
+       funcctx = SRF_FIRSTCALL_INIT();
+       prs_setup_firstcall(funcctx, name2id_prs( name ),txt );
+       PG_FREE_IF_COPY(name,0);
+       PG_FREE_IF_COPY(txt,1);
+   }
+
+   funcctx = SRF_PERCALL_SETUP();
+
+   if (  (result=prs_process_call(funcctx)) != (Datum)0 )
+       SRF_RETURN_NEXT(funcctx, result);
+   SRF_RETURN_DONE(funcctx);
+}
+
+
+PG_FUNCTION_INFO_V1(parse_current);
+Datum parse_current(PG_FUNCTION_ARGS);
+Datum
+parse_current(PG_FUNCTION_ARGS) {
+   FuncCallContext  *funcctx;
+   Datum result;
+
+   if (SRF_IS_FIRSTCALL()) {
+       text *txt = PG_GETARG_TEXT_P(0); 
+       funcctx = SRF_FIRSTCALL_INIT();
+       if ( current_parser_id==InvalidOid ) 
+           current_parser_id = name2id_prs( char2text("default") );
+       prs_setup_firstcall(funcctx, current_parser_id,txt );
+       PG_FREE_IF_COPY(txt,0);
+   }
+
+   funcctx = SRF_PERCALL_SETUP();
+
+   if (  (result=prs_process_call(funcctx)) != (Datum)0 )
+       SRF_RETURN_NEXT(funcctx, result);
+   SRF_RETURN_DONE(funcctx);
+}
+
+PG_FUNCTION_INFO_V1(headline);
+Datum headline(PG_FUNCTION_ARGS);
+Datum
+headline(PG_FUNCTION_ARGS) {
+   TSCfgInfo *cfg=findcfg(PG_GETARG_OID(0));
+   text       *in = PG_GETARG_TEXT_P(1);
+   QUERYTYPE  *query = (QUERYTYPE *) DatumGetPointer(PG_DETOAST_DATUM(PG_GETARG_DATUM(2)));
+   text       *opt=( PG_NARGS()>3 && PG_GETARG_POINTER(3) ) ? PG_GETARG_TEXT_P(3) : NULL;
+   HLPRSTEXT   prs;
+   text *out;
+   WParserInfo *prsobj = findprs(cfg->prs_id);
+
+   memset(&prs,0,sizeof(HLPRSTEXT));
+   prs.lenwords = 32;
+   prs.words = (HLWORD *) palloc(sizeof(HLWORD) * prs.lenwords);
+   hlparsetext(cfg, &prs, query, VARDATA(in), VARSIZE(in) - VARHDRSZ);
+
+
+   FunctionCall3(
+       &(prsobj->headline_info),
+       PointerGetDatum(&prs),
+       PointerGetDatum(opt),
+       PointerGetDatum(query)
+   );
+
+   out = genhl(&prs);
+
+   PG_FREE_IF_COPY(in,1);
+   PG_FREE_IF_COPY(query,2);
+   if ( opt ) PG_FREE_IF_COPY(opt,3);
+   pfree(prs.words);
+   pfree(prs.startsel);
+   pfree(prs.stopsel);
+
+   PG_RETURN_POINTER(out);
+}
+
+
+PG_FUNCTION_INFO_V1(headline_byname);
+Datum headline_byname(PG_FUNCTION_ARGS);
+Datum
+headline_byname(PG_FUNCTION_ARGS) {
+   text *cfg=PG_GETARG_TEXT_P(0);
+
+   Datum out=DirectFunctionCall4(
+       headline,
+       ObjectIdGetDatum(name2id_cfg( cfg ) ),
+       PG_GETARG_DATUM(1),
+       PG_GETARG_DATUM(2),
+       ( PG_NARGS()>3 ) ? PG_GETARG_DATUM(3) : PointerGetDatum(NULL)
+   );
+
+   PG_FREE_IF_COPY(cfg,0);
+   PG_RETURN_DATUM(out);   
+}
+
+PG_FUNCTION_INFO_V1(headline_current);
+Datum headline_current(PG_FUNCTION_ARGS);
+Datum
+headline_current(PG_FUNCTION_ARGS) {
+   PG_RETURN_DATUM(DirectFunctionCall4(
+       headline,
+       ObjectIdGetDatum(get_currcfg()),
+       PG_GETARG_DATUM(0),
+       PG_GETARG_DATUM(1),
+       ( PG_NARGS()>2 ) ? PG_GETARG_DATUM(2) : PointerGetDatum(NULL)
+   ));
+}
+
+
+


diff --git a/contrib/tsearch2/wparser.h b/contrib/tsearch2/wparser.h

new file mode 100644 (file)

index 0000000..a8afc56


--- /dev/null
+++ b/contrib/tsearch2/wparser.h
@@ -0,0 +1,28 @@
+#ifndef __WPARSER_H__
+#define __WPARSER_H__
+#include "postgres.h"
+#include "fmgr.h"
+
+typedef struct {
+   Oid prs_id;
+   FmgrInfo start_info;
+   FmgrInfo getlexeme_info;
+   FmgrInfo end_info;
+   FmgrInfo headline_info;
+   Oid lextype;
+   void *prs;
+} WParserInfo;
+
+void init_prs(Oid id, WParserInfo *prs);
+WParserInfo* findprs(Oid id);
+Oid name2id_prs(text *name);
+void   reset_prs(void);
+
+
+typedef struct {
+   int lexid;
+   char    *alias;
+   char    *descr;
+} LexDescr;
+
+#endif


diff --git a/contrib/tsearch2/wparser_def.c b/contrib/tsearch2/wparser_def.c

new file mode 100644 (file)

index 0000000..eec8b03


--- /dev/null
+++ b/contrib/tsearch2/wparser_def.c
@@ -0,0 +1,291 @@
+/* 
+ * default word parser 
+ * Teodor Sigaev 
+ */
+#include 
+#include 
+#include 
+
+#include "postgres.h"
+#include "utils/builtins.h"
+
+#include "dict.h"
+#include "wparser.h"
+#include "common.h"
+#include "ts_cfg.h"
+#include "wordparser/parser.h"
+#include "wordparser/deflex.h"
+
+PG_FUNCTION_INFO_V1(prsd_lextype);
+Datum prsd_lextype(PG_FUNCTION_ARGS);
+
+Datum 
+prsd_lextype(PG_FUNCTION_ARGS) {
+   LexDescr *descr=(LexDescr*)palloc(sizeof(LexDescr)*(LASTNUM+1));
+   int i;
+
+   for(i=1;i<=LASTNUM;i++) {
+       descr[i-1].lexid = i;
+       descr[i-1].alias = pstrdup(tok_alias[i]);
+       descr[i-1].descr = pstrdup(lex_descr[i]);
+   }
+   
+   descr[LASTNUM].lexid=0;
+       
+   PG_RETURN_POINTER(descr);
+}
+
+PG_FUNCTION_INFO_V1(prsd_start);
+Datum prsd_start(PG_FUNCTION_ARGS);
+Datum 
+prsd_start(PG_FUNCTION_ARGS) {
+   start_parse_str( (char*)PG_GETARG_POINTER(0), PG_GETARG_INT32(1) );
+   PG_RETURN_POINTER(NULL);
+}
+
+PG_FUNCTION_INFO_V1(prsd_getlexeme);
+Datum prsd_getlexeme(PG_FUNCTION_ARGS);
+Datum 
+prsd_getlexeme(PG_FUNCTION_ARGS) {
+   /* ParserState *p=(ParserState*)PG_GETARG_POINTER(0); */
+   char **t=(char**)PG_GETARG_POINTER(1); 
+   int *tlen=(int*)PG_GETARG_POINTER(2);
+   int  type=tsearch2_yylex();
+
+   *t = token;
+   *tlen = tokenlen;
+   PG_RETURN_INT32(type);
+}
+
+PG_FUNCTION_INFO_V1(prsd_end);
+Datum prsd_end(PG_FUNCTION_ARGS);
+Datum 
+prsd_end(PG_FUNCTION_ARGS) {
+   /* ParserState *p=(ParserState*)PG_GETARG_POINTER(0); */
+   end_parse();
+   PG_RETURN_VOID();
+}
+
+#define LEAVETOKEN(x)  ( (x)==12 )
+#define COMPLEXTOKEN(x)    ( (x)==5 || (x)==15 || (x)==16 || (x)==17 )
+#define ENDPUNCTOKEN(x)    ( (x)==12 )
+
+
+#define IDIGNORE(x) ( (x)==13 || (x)==14 || (x)==12 || (x)==23 )
+#define HLIDIGNORE(x) ( (x)==5 || (x)==13 || (x)==15 || (x)==16 || (x)==17 )
+#define NONWORDTOKEN(x)    ( (x)==12 || HLIDIGNORE(x) )
+#define NOENDTOKEN(x)  ( NONWORDTOKEN(x) || (x)==7 || (x)==8 || (x)==20 || (x)==21 || (x)==22 || IDIGNORE(x) )
+
+typedef struct {
+   HLWORD  *words;
+   int len;
+} hlCheck;
+
+static bool
+checkcondition_HL(void *checkval, ITEM *val) {
+   int i;
+   for(i=0;i<((hlCheck*)checkval)->len;i++) {
+       if ( ((hlCheck*)checkval)->words[i].item==val )
+           return true;
+   }
+   return false;
+}
+
+
+static bool
+hlCover(HLPRSTEXT *prs, QUERYTYPE *query, int *p, int *q) {
+   int i,j;
+   ITEM    *item=GETQUERY(query);
+   int pos=*p;
+   *q=0;
+   *p=0x7fffffff;
+
+   for(j=0;jsize;j++) {
+       if ( item->type != VAL ) {
+           item++;
+           continue;
+       }
+       for(i=pos;icurwords;i++) {
+           if ( prs->words[i].item == item ) {
+               if ( i>*q) 
+                   *q = i;
+               break;
+           }
+       }
+       item++;
+   }
+
+   if ( *q==0 )
+       return false;
+
+   item=GETQUERY(query);
+   for(j=0;jsize;j++) {
+       if ( item->type != VAL ) {
+           item++;
+           continue;
+       }
+       for(i=*q;i>=pos;i--) {
+           if ( prs->words[i].item == item ) {
+               if ( i<*p )
+                   *p=i;
+               break;
+           }
+       }
+       item++;
+   }   
+
+   if ( *p<=*q ) {
+       hlCheck ch={ &(prs->words[*p]), *q-*p+1 };
+       if ( TS_execute(GETQUERY(query), &ch, false, checkcondition_HL) ) { 
+           return true;
+       } else {
+           (*p)++;
+           return hlCover(prs,query,p,q);
+       }
+   }
+
+   return false;
+}
+
+PG_FUNCTION_INFO_V1(prsd_headline);
+Datum prsd_headline(PG_FUNCTION_ARGS);
+Datum 
+prsd_headline(PG_FUNCTION_ARGS) {
+   HLPRSTEXT   *prs=(HLPRSTEXT*)PG_GETARG_POINTER(0);
+   text    *opt=(text*)PG_GETARG_POINTER(1); /* can't be toasted */
+   QUERYTYPE   *query=(QUERYTYPE*)PG_GETARG_POINTER(2); /* can't be toasted */
+   /* from opt + start and and tag */
+   int min_words=15;   
+   int max_words=35;   
+   int shortword=3;    
+
+   int p=0,q=0;
+   int bestb=-1,beste=-1;
+   int bestlen=-1;
+   int pose=0, poslen, curlen;
+
+   int i;
+
+   /*config*/
+   prs->startsel=NULL;
+   prs->stopsel=NULL;
+   if ( opt ) {
+       Map *map,*mptr;
+       
+       parse_cfgdict(opt,&map);
+       mptr=map;
+
+       while(mptr && mptr->key) {
+           if ( strcasecmp(mptr->key,"MaxWords")==0 )
+               max_words=pg_atoi(mptr->value,4,1);
+           else if ( strcasecmp(mptr->key,"MinWords")==0 )
+               min_words=pg_atoi(mptr->value,4,1);
+           else if ( strcasecmp(mptr->key,"ShortWord")==0 )
+               shortword=pg_atoi(mptr->value,4,1);
+           else if ( strcasecmp(mptr->key,"StartSel")==0 )
+               prs->startsel=pstrdup(mptr->value);
+           else if ( strcasecmp(mptr->key,"StopSel")==0 )
+               prs->stopsel=pstrdup(mptr->value);
+               
+           pfree(mptr->key);
+           pfree(mptr->value);
+
+           mptr++;
+       }
+       pfree(map);
+
+       if ( min_words >= max_words )
+           elog(ERROR,"Must be MinWords < MaxWords");
+       if ( min_words<=0 )
+           elog(ERROR,"Must be MinWords > 0");
+       if ( shortword<0 )
+           elog(ERROR,"Must be ShortWord >= 0");
+   }
+
+   while( hlCover(prs,query,&p,&q) ) {
+       /* find cover len in words */
+       curlen=0;
+       poslen=0;
+       for(i=p;i<=q && curlen < max_words ; i++) {
+           if ( !NONWORDTOKEN(prs->words[i].type) ) 
+               curlen++;
+           if ( prs->words[i].item && !prs->words[i].repeated )
+               poslen++; 
+           pose=i;
+       }
+
+       if ( poslenwords[beste].type) || prs->words[beste].len <= shortword) ) { 
+           /* best already finded, so try one more cover */
+           p++;
+           continue;
+       }
+
+       if ( curlen < max_words ) { /* find good end */
+           for(i=i-1 ;icurwords && curlen
+               if ( i!=q ) {
+                   if ( !NONWORDTOKEN(prs->words[i].type) ) 
+                       curlen++;
+                   if ( prs->words[i].item && !prs->words[i].repeated )
+                       poslen++;
+               }
+               pose=i;
+               if ( NOENDTOKEN(prs->words[i].type) || prs->words[i].len <= shortword ) 
+                   continue;
+               if ( curlen>=min_words )    
+                   break;
+           }
+       } else { /* shorter cover :((( */
+           for(;curlen>min_words;i--) {
+               if ( !NONWORDTOKEN(prs->words[i].type) ) 
+                   curlen--;
+               if ( prs->words[i].item && !prs->words[i].repeated )
+                   poslen--;
+               pose=i;
+               if ( NOENDTOKEN(prs->words[i].type) || prs->words[i].len <= shortword ) 
+                   continue;
+               break;
+           }
+       }
+
+       if ( bestlen <0 || (poslen>bestlen && !(NOENDTOKEN(prs->words[pose].type) || prs->words[pose].len <= shortword)) || 
+               ( bestlen>=0 && !(NOENDTOKEN(prs->words[pose].type)  || prs->words[pose].len <= shortword) && 
+                   (NOENDTOKEN(prs->words[beste].type) || prs->words[beste].len <= shortword) ) ) {
+           bestb=p; beste=pose;
+           bestlen=poslen;
+       } 
+
+       p++;
+   }
+
+   if ( bestlen<0 ) {
+       curlen=0;
+       poslen=0;
+       for(i=0;icurwords && curlen
+           if ( !NONWORDTOKEN(prs->words[i].type) ) 
+               curlen++;
+           pose=i;
+       }
+       bestb=0; beste=pose;
+   }
+
+   for(i=bestb;i<=beste;i++) {
+       if ( prs->words[i].item )
+           prs->words[i].selected=1;
+       if ( prs->words[i].repeated )
+           prs->words[i].skip=1;
+       if ( HLIDIGNORE(prs->words[i].type) )
+           prs->words[i].replace=1;
+
+       prs->words[i].in=1;
+   }
+
+   if (!prs->startsel)
+       prs->startsel=pstrdup("");

+   if (!prs->stopsel)
+       prs->stopsel=pstrdup("");
+        prs->startsellen=strlen(prs->startsel);
+   prs->stopsellen=strlen(prs->stopsel);
+
+   PG_RETURN_POINTER(prs);
+}
+




This is the main PostgreSQL git repository.
RSS
Atom}}}}}}}
+>Relevance Ranking for One to Three Term Queries.”
+An optional first argument allows you to tune their formula;
+for details
+see the section on ranking
+in the Reference.
+
+The rank() function offers more flexibility
+because it pays attention to the weights
+with which you have labelled lexeme positions.
+Currently tsearch2 supports four different weight labels:
+'D', the default weight;
+and 'A', 'B', and 'C'.
+All vectors created with to_tsvector()
+assign the weight 'D' to each position,
+which as the default is not displayed when you print a vector out.
+
+If you want positions with weights other than 'D',
+you have two options:
+either you can author a vector directly through the ::tsvector
+casting operation,
+as described in the following section,
+which lets you give each position whichever weight you want;
+or you can pass a vector through the setweight() function
+which sets all of its position weights to a single value.
+An example of the latter:
+
+
+
+=# SELECT vector FROM docs WHERE id = 3
+                 vector                 
+----------------------------------------
+ 'low':8 'cobbl':5 'crawl':3 'passag':9
+(1 row)
+=# SELECT setweight(vector, 'A') FROM docs WHERE id = 3
+                 setweight                  
+--------------------------------------------
+ 'low':8A 'cobbl':5A 'crawl':3A 'passag':9A
+(1 row)
+
+
+
+Merely changing all of the weights in a vector is not very useful,
+of course,
+since this results still in all words having the same weight.
+But if we parse different parts of a document separately,
+giving each section its own weight,
+and then concatenate the vectors of each part into a single vector,
+the result can be very useful.
+We can construct a simple example
+in which document titles are given greater weight
+that text in the body of the document:
+
+
+
+=# CREATE TABLE tdocs ( id SERIAL, title TEXT, doc TEXT, vector tsvector )
+=# CREATE INDEX tdocs_index ON tdocs USING gist(vector);
+=# CREATE FUNCTION instdoc(text, text) RETURNS void LANGUAGE sql AS
+  'INSERT INTO tdocs (title, doc, vector)
+   VALUES ($1, $2, setweight(to_tsvector($1), ''A'') || to_tsvector($2));'
+
+
+
+Now words from a document title will be weighted differently
+than those in the main text
+if we provide the title and body as separate arguments:
+
+
+
+=# SELECT instdoc('Spendid Chamber',
+ 'The walls are frozen rivers of orange stone.')
+ instdoc 
+---------
+ 
+(1 row)
+=# SELECT vector FROM tdocs
+                                    vector                                    
+------------------------------------------------------------------------------
+ 'wall':4 'orang':9 'river':7 'stone':10 'frozen':6 'chamber':2A 'spendid':1A
+(1 row)
+
+
+
+Note that although the necessity is unusual,
+you can constrain search terms
+to only match words from certain sections
+by following them with a colon
+and a list of the sections in which the word can occur;
+by default this list is 'ABCD'
+so that search terms match words from all sections.
+For example,
+here we search for a word both generally,
+and then looking only for specific weights:
+
+
+
+=# SELECT title, doc FROM tdocs WHERE vector @@ to_tsquery('spendid')
+      title      |                     doc                      
+-----------------+----------------------------------------------
+ Spendid Chamber | The walls are frozen rivers of orange stone.
+(1 row)
+=# SELECT title, doc FROM tdocs WHERE vector @@ to_tsquery('spendid:A')
+      title      |                     doc                      
+-----------------+----------------------------------------------
+ Spendid Chamber | The walls are frozen rivers of orange stone.
+(1 row)
+=# SELECT title, doc FROM tdocs WHERE vector @@ to_tsquery('spendid:D')
+ title | doc 
+-------+-----
+(0 rows)
+
+
+
+
+
+
+Our examples so far use tsearch2 to parse our documents into vectors.
+When your application needs absolute control over vector content,
+you will want to use direct type casting,
+which is described in the next section.
+
+
+Casting Vectors and Queries
+
+
+While tsearch2 has powerful and flexible ways
+to process documents and turn them into document vectors,
+you will sometimes want to parse documents on your own
+and place the results directly in vectors.
+Here we show you how.
+
+
+In the preceding examples,
+we used the to_tsvector() function
+when we needed a document's text reduced to a document vector.
+We saw that the function stripped whitespace and punctuation,
+eliminated common words,
+and altered suffixes to reduce words to a common form.
+While these operations are often desirable,
+and while in the sections below
+we will gain precise control over this process,
+there are occasions on which
+you want to avoid the changes that to_tsvector() makes to text
+and specify explicitly the words that you want in your vectors.
+Or you may want to create queries directly
+rather than through to_tsquery().
+
+For example,
+you may have already developed your own routine
+for reducing your documents to searchable lexemes,
+and do not want your carefully generated terms altered
+by passing them through to_tsvector().
+Or you might be developing and debugging parsing routines of your own
+that you are not ready to load into the database.
+In either case,
+you will find that direct insertion is easily accomplished
+if you simply follow some simple rules.
+
+Vectors are created directly
+when you cast a string of whitespace separated lexemes
+to the tsvector type:
+
+
+
+=# select 'the only exit is the way you came in'::tsvector
+                     tsvector                     
+--------------------------------------------------
+ 'in' 'is' 'the' 'way' 'you' 'came' 'exit' 'only'
+(1 row)
+
+
+
+Notice that the conversion interpreted the string
+simply as a list of lexemes to be included in the vector.
+Their order was lost,
+as was the number of times each lexeme appeared.
+You must keep in mind that directly creating vectors with casting
+is not an alternate means of parsing;
+it is a way of directly entering lexemes into a vector without parsing.
+
+Queries can also be created through casting,
+if you separate lexemes with boolean operators
+rather than with whitespace.
+When creating your own vectors and queries,
+remember that the search operator @@
+finds only exact matches between query lexemes and vector lexemes
+—
+if they are not exactly the same string,
+they will not be considered a match.
+
+To include lexeme positions in your vector,
+write the positions exactly the way tsearch2 displays them
+when it prints vectors:
+by following each lexeme with a colon
+and a comma-separated list of integer positions.
+If you list a lexeme more than once,
+then all the positions listed for it are combined into a single list.
+For example,
+here are two ways of writing the same vector,
+depending on whether you mention ‘the’ twice
+or combine its positions into a list yourself:
+
+
+
+=# select 'the:1 only:2 exit:3 is:4 the:5 way:6 you:7 came:8 in:9'::tsvector
+                              tsvector                              
+--------------------------------------------------------------------
+ 'in':9 'is':4 'the':1,5 'way':6 'you':7 'came':8 'exit':3 'only':2
+(1 row)
+=# select 'the:1,5 only:2 exit:3 is:4 way:6 you:7 came:8 in:9'::tsvector
+                              tsvector                              
+--------------------------------------------------------------------
+ 'in':9 'is':4 'the':1,5 'way':6 'you':7 'came':8 'exit':3 'only':2
+(1 row)
+
+
+
+Things can get slightly tricky
+if you want to include apostrophes, backslashes, or spaces
+inside your lexemes
+(wanting to include either of the latter would be unusual,
+but they can be included if you follow the rules).
+The main problem is that the apostrophe and backslash
+are important both to PostgreSQL when it is interpreting a string,
+and to the tsvector conversion function.
+You may want to review section
+1.1.2.1,
+“String Constants”
+in the PostgreSQL documentation before proceeding.
+
+When you cast strings directly into vectors:
+
+The string is interpreted as a whitespace-separated list of lexemes,
+ any of which can be suffixed with a colon and a list of positions.
+A lexeme can be quoted by preceding it with an apostrophe,
+ in which case it runs until the next apostrophe;
+ otherwise a lexeme ends with the first whitespace or colon encountered.
+Any character preceded by a backslash,
+ including whitespace, the apostrophe, the colon, and the backslash itself,
+ loses its normal meaning and is treated as a letter.
+ Backslashes are effective
+ both inside and outside of apostrophe-quoted lexemes.
+A lexeme can be suffixed with a list of positions
+ by appending a colon and a comma-separated list of integers,
+ each of which can itself be followed by a letter
+ to designate a position weight
+ (position weights are described below).
+
+
+Here are some example strings,
+showing the lexeme you want to insert
+together with the string that the ::tsvector operator
+needs to see,
+and how you would type that string at the PostgreSQL prompt:
+
+
+
+For the lexeme...
+you need the string...
+which you can type as:
+
+nugget
+nugget
+'nugget'
+
+won't
+won't
+'won''t'
+
+pinin'
+pinin'
+'pinin'''
+
+'bout
+\'bout
+'\\''bout'
+
+white mist
+white\ mist
+'white\\ mist'
+
+or:
+'white mist'
+'''white mist'''
+
+won't budge
+won\'t\ budge
+'won\\''t\\ budge'
+
+or:
+'won\'t budge'
+'''won\\''t budge'''
+
+back\slashed
+back\\slashed
+'back\\\\slashed'
+
+
+Remember to use the quoted quoting shown at the right
+only when typing in strings as part of a PostgreSQL query.
+If you are providing strings through a library
+that automatically quotes them
+or provides them in binary form to PostgreSQL,
+then you can use the strings in the middle instead —
+suitably quoted in the language you are using, of course.
+
+Position weights are described below
+and can be written exactly as they will be displayed
+when you select a weighted vector:
+
+
+=# select 'weighty:1,3A trivial:2B,4'::tsvector
+           tsvector            
+-------------------------------
+ 'trivial':2B,4 'weighty':1,3A
+(1 row)
+
+
+
+Note that if you are composing SQL queries
+in a scripting language like Perl or Python,
+that itself considers quotes and backslashes special,
+then you may have another quoting layer to deal with
+on top of the two layers already shown above.
+In such cases you may want to write a function
+that performs the necessary quoting for you.
+
+
+Having seen how to create vectors of your own,
+it is time to learn how the native tsearch2 parser
+reduces documents to vectors.
+
+
+Parsing and Lexing
+
+
+The previous section
+described how you can bypass the parser provided by tsearch2
+and populate your table of documents
+with vectors of your own devising.
+But for those interested in the native tsearch2 facilities,
+we present here an overview of how it goes about
+reducing documents to vectors.
+
+
+The to_tsvector() function reduces documents to vectors
+in two stages.
+First, a parser breaks the input document
+into short sequences of text called tokens.
+Each token is usually a word, space, or piece of punctuation,
+though some parsers return larger and more exotic items
+like HTML tags as single tokens.
+Each token returned by the parser
+is either discarded
+or passed to a dictionary that converts it into a lexeme.
+The resulting lexemes are collected into a vector and returned.
+
+The choice of which parser and dictionaries to_tsvector() should use
+is controlled by your choice of configuration.
+The tsearch2 module comes with several configurations,
+and you can define more of your own;
+in fact the creation of a new configuration is illustrated below,
+in the section on position weights.
+
+To learn about parsing in more detail,
+we will study this example:
+
+
+=# select to_tsvector('default',
+     'The walls extend upward for well over 100 feet.')
+                       to_tsvector                        
+----------------------------------------------------------
+ '100':8 'feet':9 'wall':2 'well':6 'extend':3 'upward':4
+(1 row)
+
+
+Unlike the to_tsvector() calls used in the above examples,
+this one specifies the 'default' configuration explicitly.
+When we called to_tsvector() in earlier examples
+with only one argument,
+it used the current configuration,
+which is chosen automatically based on your LOCALE
+if that locale is mentioned in the pg_ts_cfg table
+(which is shown under the first bullet in the description below).
+If your locale is not listed in the table,
+your attempts to use the current configuration will return:
+
+
+ERROR:  Can't find tsearch2 config by locale
+
+
+You can always change the current configuration manually
+by calling the set_curcfg() function
+described in the section on
+Configurations
+in the Reference.
+
+Each configuration serves as an index into two different tables:
+in pg_ts_cfg it determines
+which parser will break our text into tokens,
+and in pg_ts_cfgmap
+it directs each token to a dictionary for processing.
+The steps in detail are:
+
+
+
+First, our text is parsed,
+using the parser listed for our configuration in the pg_ts_cfg table.
+We are using the 'default' configuration,
+so the table tells us to use the 'default' parser:
+
+
+=# SELECT * FROM pg_ts_cfg WHERE ts_name = 'default'
+ ts_name | prs_name | locale 
+---------+----------+--------
+ default | default  | C
+(1 row)
+
+
+So our text will be parsed as though we had called:
+
+
+=# select * from parse('default',
+     'The walls extend upward for well over 100 feet.')
+
+
+This breaks the text into a list of tokens
+which are each labelled with an integer type:
+
+The₁♦_{12
+>walls₁♦_{12
+>extend₁♦_{12
+>upward₁♦_{12
+>for₁♦_{12
+>well₁♦_{12
+>over₁♦_{12
+>100₂₂♦_{12
+>feet₁.₁₂
+
+Each word has been assigned type 1;
+each space (represented here by a diamond) and the period, type 12;
+and the number one hundred, type 22.
+We can retrieve the alias for each type
+through the token_type function:
+
+
+=# select * from token_type('default')

+     where tokid = 1 or tokid = 12 or tokid = 22
+ tokid | alias |      descr       
+-------+-------+------------------
+     1 | lword | Latin word
+    12 | blank | Space symbols
+    22 | uint  | Unsigned integer
+(3 rows)
+
+
+
+
+Next, the tokens are assigned to dictionaries
+by looking up their type aliases in pg_ts_cfgmap
+to determine which dictionary should process each token.
+Since we are using the 'default' configuration:
+
+
+=# select * from pg_ts_cfgmap where ts_name = 'default' and

+      (tok_alias = 'lword' or tok_alias = 'blank' or tok_alias = 'uint')
+ ts_name | tok_alias | dict_name 
+---------+-----------+-----------
+ default | lword     | {en_stem}
+ default | uint      | {simple}
+(2 rows)
+
+
+Since this map provides no dictionary for blank tokens,
+the spaces and period are simply discarded,
+leaving nine tokens,
+which are then numbered by their position:
+
+The¹
+walls²
+extend³
+upward⁴
+for⁵
+well⁶
+over⁷
+100⁸
+feet⁹
+
+
+Finally, the words are reduced to lexemes by their respective dictionaries.
+The 100 is submitted to the simple dictionary,
+which returns tokens unaltered except for making them lowercase:
+
+
+=# select lexize('simple', '100')
+ lexize 
+--------
+ {100}
+(1 row)
+
+
+The other words are submitted to en_stem
+which reduces each English word to a linguistic stem,
+and then discards stems which belong to its list of stop words;
+you can see the list of stop words
+in the file whose path is in the dict_initoption field
+of the pg_ts_dict table entry for en_stem.
+The first three words of our text illustrate respectively
+an en_stem stop word,
+a word which en_stem alters by stemming,
+and a word which en_stem leaves alone:
+
+
+=# select lexize('en_stem', 'The')
+ lexize 
+--------
+ {}
+(1 row)
+=# select lexize('en_stem', 'walls')
+ lexize 
+--------
+ {wall}
+(1 row)
+=# select lexize('en_stem', 'extend')
+  lexize  
+----------
+ {extend}
+(1 row)
+
+
+Once en_stem is done discarding stop words and stemming the rest,
+we are left with:
+
+wall²
+extend³
+upward⁴
+well⁶
+100⁸
+feet⁹
+
+Which is precisely the result of the example that began this section.
+
+Query words are stemmed by the to_tsquery() function
+using the same scheme to determine the dictionary for each token,
+with the difference that the query parser recognizes as special
+the boolean operators that separate query words.
+
+
+
+
+}

diff --git a/contrib/tsearch2/docs/tsearch2-ref.html b/contrib/tsearch2/docs/tsearch2-ref.html

new file mode 100644 (file)

index 0000000..df0faa4


--- /dev/null
+++ b/contrib/tsearch2/docs/tsearch2-ref.html
@@ -0,0 +1,448 @@
+
+
+
+
+tsearch2 reference
+
+
+The tsearch2 Reference
+
+
+Brandon Craig Rhodes
30 June 2003
+
+This Reference documents the user types and functions
+of the tsearch2 module for PostgreSQL.
+An introduction to the module is provided
+by the tsearch2 Guide,
+a companion document to this one.
+You can retrieve a beta copy of the tsearch2 module from the
+GiST for PostgreSQL
+page — look under the section entitled Development History
+for the current version.
+
+Vectors and Queries
+
+Vectors and queries both store lexemes,
+but for different purposes.
+A tsvector stores the lexemes
+of the words that are parsed out of a document,
+and can also remember the position of each word.
+A tsquery specifies a boolean condition among lexemes.
+
+Any of the following functions with a configuration argument
+can use either an integer id or textual ts_name
+to select a configuration;
+if the option is omitted, then the current configuration is used.
+For more information on the current configuration,
+read the next section on Configurations.
+
+Vector Operations
+
+
+
+ to_tsvector( [configuration,]

+ document TEXT) RETURNS tsvector
+
+ Parses a document into tokens,
+ reduces the tokens to lexemes,
+ and returns a tsvector which lists the lexemes
+ together with their positions in the document.
+ For the best description of this process,
+ see the section on Parsing and Stemming
+ in the accompanying tsearch2 Guide.
+
+ strip(vector tsvector) RETURNS tsvector
+
+ Return a vector which lists the same lexemes
+ as the given vector,
+ but which lacks any information
+ about where in the document each lexeme appeared.
+ While the returned vector is thus useless for relevance ranking,
+ it will usually be much smaller.
+
+ setweight(vector tsvector, letter) RETURNS tsvector
+
+ This function returns a copy of the input vector
+ in which every location has been labelled
+ with either the letter
+ 'A', 'B', or 'C',
+ or the default label 'D'
+ (which is the default with which new vectors are created,
+ and as such is usually not displayed).
+ These labels are retained when vectors are concatenated,
+ allowing words from different parts of a document
+ to be weighted differently by ranking functions.
+
+ vector1 || vector2
+
+ concat(vector1 tsvector, vector2 tsvector)

+ RETURNS tsvector
+
+ Returns a vector which combines the lexemes and position information
+ in the two vectors given as arguments.
+ Position weight labels (described in the previous paragraph)
+ are retained intact during the concatenation.
+ This has at least two uses.
+ First,
+ if some sections of your document
+ need be parsed with different configurations than others,
+ you can parse them separately
+ and concatenate the resulting vectors into one.
+ Second,
+ you can weight words from some sections of you document
+ more heavily than those from others by:
+ parsing the sections into separate vectors;
+ assigning the vectors different position labels
+ with the setweight() function;
+ concatenating them into a single vector;
+ and then providing a weights argument
+ to the rank() function
+ that assigns different weights to positions with different labels.
+
+ tsvector_size(vector tsvector) RETURNS INT4
+
+ Returns the number of lexemes stored in the vector.
+
+ text::tsvector RETURNS tsvector
+
+ Directly casting text to a tsvector
+ allows you to directly inject lexemes into a vector,
+ with whatever positions and position weights you choose to specify.
+ The text should be formatted
+ like the vector would be printed by the output of a SELECT.
+ See the Casting
+ section in the Guide for details.
+
+
+Query Operations
+
+
+
+ to_tsquery( [configuration,]

+ querytext text) RETURNS tsvector
+
+ Parses a query,
+ which should be single words separated by the boolean operators
+ “&” and,
+ “|” or,
+ and “!” not,
+ which can be grouped using parenthesis.
+ Each word is reduced to a lexeme using the current
+ or specified configuration.
+
+
+ querytree(query tsquery) RETURNS text
+
+ This might return a textual representation of the given query.
+
+ text::tsquery RETURNS tsquery
+
+ Directly casting text to a tsquery
+ allows you to directly inject lexemes into a query,
+ with whatever positions and position weight flags you choose to specify.
+ The text should be formatted
+ like the query would be printed by the output of a SELECT.
+ See the Casting
+ section in the Guide for details.
+
+
+Configurations
+
+A configuration specifies all of the equipment necessary
+to transform a document into a tsvector:
+the parser that breaks its text into tokens,
+and the dictionaries which then transform each token into a lexeme.
+Every call to to_tsvector() (described above)
+uses a configuration to perform its processing.
+Three configurations come with tsearch2:
+
+
+default — Indexes words and numbers,
+ using the en_stem English Snowball stemmer for Latin-alphabet words
+ and the simple dictionary for all others.
+default_russian — Indexes words and numbers,
+ using the en_stem English Snowball stemmer for Latin-alphabet words
+ and the ru_stem Russian Snowball dictionary for all others.
+simple — Processes both words and numbers
+ with the simple dictionary,
+ which neither discards any stop words nor alters them.
+
+
+The tsearch2 modules initially chooses your current configuration
+by looking for your current locale in the locale field
+of the pg_ts_cfg table described below.
+You can manipulate the current configuration yourself with these functions:
+
+
+
+ set_curcfg( id INT | ts_name TEXT

+  ) RETURNS VOID
+
+ Set the current configuration used by to_tsvector
+ and to_tsquery.
+
+ show_curcfg() RETURNS INT4
+
+ Returns the integer id of the current configuration.
+
+
+
+Each configuration is defined by a record in the pg_ts_cfg table:
+
+create table pg_ts_cfg (
+   id      int not  null primary key,
+   ts_name     text not null,
+   prs_name    text not null,
+   locale      text
+);
+
+The id and ts_name are unique values
+which identify the configuration;
+the prs_name specifies which parser the configuration uses.
+Once this parser has split document text into tokens,
+the type of each resulting token —
+or, more specifically, the type's lex_alias
+as specified in the parser's lexem_type() table —
+is searched for together with the configuration's ts_name
+in the pg_ts_cfgmap table:
+
+create table pg_ts_cfgmap (
+   ts_name     text not null,
+   lex_alias   text not null,
+   dict_name   text[],
+   primary key (ts_name,lex_alias)
+);
+
+Those tokens whose types are not listed are discarded.
+The remaining tokens are assigned integer positions,
+starting with 1 for the first token in the document,
+and turned into lexemes with the help of the dictionaries
+whose names are given in the dict_name array for their type.
+These dictionaries are tried in order,
+stopping either with the first one to return a lexeme for the token,
+or discarding the token if no dictionary returns a lexeme for it.
+
+Parsers
+
+Each parser is defined by a record in the pg_ts_parser table:
+
+create table pg_ts_parser (
+   prs_id      int not null primary key,
+   prs_name    text not null,
+   prs_start   oid not null,
+   prs_getlexem    oid not null,
+   prs_end     oid not null,
+   prs_headline    oid not null,
+   prs_lextype oid not null,
+   prs_comment text
+);
+
+The prs_id and prs_name uniquely identify the parser,
+while prs_comment usually describes its name and version
+for the reference of users.
+The other items identify the low-level functions
+which make the parser operate,
+and are only of interest to someone writing a parser of their own.
+
+The tsearch2 module comes with one parser named default
+which is suitable for parsing most plain text and HTML documents.
+
+Each parser argument below
+must designate a parser with either an integer prs_id
+or a textual prs_name;
+the current parser is used when this argument is omitted.
+
+
+
+ CREATE FUNCTION set_curprs(parser) RETURNS VOID
+
+ Selects a current parser
+ which will be used when any of the following functions
+ are called without a parser as an argument.
+
+ CREATE FUNCTION lexem_type(

+  [ parser ]
+  ) RETURNS SETOF lexemtype
+
+ Returns a table which defines and describes
+ each kind of token the parser may produce as output.
+ For each token type the table gives the lexid
+ which the parser will label each token of that type,
+ the alias which names the token type,
+ and a short description descr for the user to read.
+
+ CREATE FUNCTION parse(

+  [ parser, ] document TEXT
+  ) RETURNS SETOF lexemtype
+
+ Parses the given document and returns a series of records,
+ one for each token produced by parsing.
+ Each token includes a lexid giving its type
+ and a lexem which gives its content.
+
+
+Dictionaries
+
+Dictionaries take textual tokens as input,
+usually those produced by a parser,
+and return lexemes which are usually some reduced form of the token.
+Among the dictionaries which come installed with tsearch2 are:
+
+
+simple simply folds uppercase letters to lowercase
+ before returning the word.
+en_stem runs an English Snowball stemmer on each word
+ that attempts to reduce the various forms of a verb or noun
+ to a single recognizable form.
+ru_stem runs a Russian Snowball stemmer on each word.
+
+
+Each dictionary is defined by an entry in the pg_ts_dict table:
+
+CREATE TABLE pg_ts_dict (
+   dict_id     int not null primary key,
+   dict_name   text not null,
+   dict_init   oid,
+   dict_initoption text,
+   dict_lemmatize  oid not null,
+   dict_comment    text
+);
+
+The dict_id and dict_name
+serve as unique identifiers for the dictionary.
+The meaning of the dict_initoption varies among dictionaries,
+but for the built-in Snowball dictionaries
+it specifies a file from which stop words should be read.
+The dict_comment is a human-readable description of the dictionary.
+The other fields are internal function identifiers
+useful only to developers trying to implement their own dictionaries.
+
+The argument named dictionary
+in each of the following functions
+should be either an integer dict_id or a textual dict_name
+identifying which dictionary should be used for the operation;
+if omitted then the current dictionary is used.
+
+
+
+ CREATE FUNCTION set_curdict(dictionary) RETURNS VOID
+
+ Selects a current dictionary for use by functions
+ that do not select a dictionary explicitly.
+
+ CREATE FUNCTION lexize(

+ [ dictionary, ] word text)
+ RETURNS TEXT[]
+
+ Reduces a single word to a lexeme.
+ Note that lexemes are arrays of zero or more strings,
+ since in some languages there might be several base words
+ from which an inflected form could arise.
+
+
+Ranking
+
+Ranking attempts to measure how relevant documents are to particular queries
+by inspecting the number of times each search word appears in the document,
+and whether different search terms occur near each other.
+Note that this information is only available in unstripped vectors —
+ranking functions will only return a useful result
+for a tsvector which still has position information!
+
+Both of these ranking functions
+take an integer normalization option
+that specifies whether a document's length should impact its rank.
+This is often desirable,
+since a hundred-word document with five instances of a search word
+is probably more relevant than a thousand-word document with five instances.
+The option can have the values:
+
+
+0 (the default) ignores document length.
+1 divides the rank by the logarithm of the length.
+2 divides the rank by the length itself.
+
+
+The two ranking functions currently available are:
+
+
+
+ CREATE FUNCTION rank(

+  [ weights float4[], ]
+  vector tsvector, query tsquery,
+  [ normalization int4 ]

+  ) RETURNS float4
+
+ This is the ranking function from the old version of OpenFTS,
+ and offers the ability to weight word instances more heavily
+ depending on how you have classified them.
+ The weights specify how heavily to weight each category of word:
+ 
+>{D-weight, A-weight, B-weight, C-weight}
+ If no weights are provided, then these defaults are used:
+ {0.1, 0.2, 0.4, 1.0}
+ Often weights are used to mark words from special areas of the document,
+ like the title or an initial abstract,
+ and make them more or less important than words in the document body.
+
+ CREATE FUNCTION rank_cd(

+  [ K int4, ]
+  vector tsvector, query tsquery,
+  [ normalization int4 ]

+  ) RETURNS float4
+
+ This function computes the cover density ranking
+ for the given document vector and query,
+ as described in Clarke, Cormack, and Tudhope's
+ “
+>Relevance Ranking for One to Three Term Queries”
+ in the 1999 Information Processing and Management.
+ The value K is one of the values from their formula,
+ and defaults to K=4.
+ The examples in their paper K=16;
+ we can roughly describe the term
+ as stating how far apart two search terms can fall
+ before the formula begins penalizing them for lack of proximity.
+
+
+Headlines
+
+
+
+ CREATE FUNCTION headline(

+  [ id int4, | ts_name text, ]
+  document text, query tsquery,
+  [ options text ]

+  ) RETURNS text
+
+ Every form of the the headline() function
+ accepts a document along with a query,
+ and returns one or more ellipse-separated excerpts from the document
+ in which terms from the query are highlighted.
+ The configuration with which to parse the document
+ can be specified by either its id or ts_name;
+ if none is specified that the current configuration is used instead.
+ 
+ An options string if provided should be a comma-separated list
+ of one or more ‘option=value’ pairs.
+ The available options are:
+ 
+  StartSel, StopSel —
+   the strings with which query words appearing in the document
+   should be delimited to distinguish them from other excerpted words.
+  MaxWords, MinWords —
+   limits on the shortest and longest headlines you will accept.
+  ShortWord —
+   this prevents your headline from beginning or ending
+   with a word which has this many characters or less.
+   The default value of 3 should eliminate most English
+   conjunctions and articles.
+ 
+ Any unspecified options receive these defaults:
+ 
+StartSel=<b>, StopSel=</b>, MaxWords=35, MinWords=15, ShortWord=3
+ 
+
+
+
+


diff --git a/contrib/tsearch2/expected/tsearch2.out b/contrib/tsearch2/expected/tsearch2.out

new file mode 100644 (file)

index 0000000..a842c5b


--- /dev/null
+++ b/contrib/tsearch2/expected/tsearch2.out
@@ -0,0 +1,2055 @@
+--
+-- first, define the datatype.  Turn off echoing so that expected file
+-- does not depend on contents of seg.sql.
+--
+\set ECHO none
+psql:tsearch2.sql:13: NOTICE:  CREATE TABLE / PRIMARY KEY will create implicit index 'pg_ts_dict_pkey' for table 'pg_ts_dict'
+psql:tsearch2.sql:145: NOTICE:  CREATE TABLE / PRIMARY KEY will create implicit index 'pg_ts_parser_pkey' for table 'pg_ts_parser'
+psql:tsearch2.sql:244: NOTICE:  CREATE TABLE / PRIMARY KEY will create implicit index 'pg_ts_cfg_pkey' for table 'pg_ts_cfg'
+psql:tsearch2.sql:251: NOTICE:  CREATE TABLE / PRIMARY KEY will create implicit index 'pg_ts_cfgmap_pkey' for table 'pg_ts_cfgmap'
+psql:tsearch2.sql:339: NOTICE:  ProcedureCreate: type tsvector is not yet defined
+psql:tsearch2.sql:344: NOTICE:  Argument type "tsvector" is only a shell
+psql:tsearch2.sql:398: NOTICE:  ProcedureCreate: type tsquery is not yet defined
+psql:tsearch2.sql:403: NOTICE:  Argument type "tsquery" is only a shell
+psql:tsearch2.sql:545: NOTICE:  ProcedureCreate: type gtsvector is not yet defined
+psql:tsearch2.sql:550: NOTICE:  Argument type "gtsvector" is only a shell
+--tsvector
+SELECT '1'::tsvector;
+ tsvector 
+----------
+ '1'
+(1 row)
+
+SELECT '1 '::tsvector;
+ tsvector 
+----------
+ '1'
+(1 row)
+
+SELECT ' 1'::tsvector;
+ tsvector 
+----------
+ '1'
+(1 row)
+
+SELECT ' 1 '::tsvector;
+ tsvector 
+----------
+ '1'
+(1 row)
+
+SELECT '1 2'::tsvector;
+ tsvector 
+----------
+ '1' '2'
+(1 row)
+
+SELECT '\'1 2\''::tsvector;
+ tsvector 
+----------
+ '1 2'
+(1 row)
+
+SELECT '\'1 \\\'2\''::tsvector;
+ tsvector 
+----------
+ '1 \'2'
+(1 row)
+
+SELECT '\'1 \\\'2\'3'::tsvector;
+  tsvector   
+-------------
+ '3' '1 \'2'
+(1 row)
+
+SELECT '\'1 \\\'2\' 3'::tsvector;
+  tsvector   
+-------------
+ '3' '1 \'2'
+(1 row)
+
+SELECT '\'1 \\\'2\' \' 3\' 4 '::tsvector;
+     tsvector     
+------------------
+ '4' ' 3' '1 \'2'
+(1 row)
+
+select '\'w\':4A,3B,2C,1D,5 a:8';
+       ?column?        
+-----------------------
+ 'w':4A,3B,2C,1D,5 a:8
+(1 row)
+
+select 'a:3A b:2a'::tsvector || 'ba:1234 a:1B';
+          ?column?          
+----------------------------
+ 'a':3A,4B 'b':2A 'ba':1237
+(1 row)
+
+select setweight('w:12B w:13* w:12,5,6 a:1,3* a:3 w asd:1dc asd zxc:81,567,222A'::tsvector, 'c');
+                        setweight                         
+----------------------------------------------------------
+ 'a':1C,3C 'w':5C,6C,12C,13C 'asd':1C 'zxc':81C,222C,567C
+(1 row)
+
+select strip('w:12B w:13* w:12,5,6 a:1,3* a:3 w asd:1dc asd'::tsvector);
+     strip     
+---------------
+ 'a' 'w' 'asd'
+(1 row)
+
+--tsquery
+SELECT '1'::tsquery;
+ tsquery 
+---------
+ '1'
+(1 row)
+
+SELECT '1 '::tsquery;
+ tsquery 
+---------
+ '1'
+(1 row)
+
+SELECT ' 1'::tsquery;
+ tsquery 
+---------
+ '1'
+(1 row)
+
+SELECT ' 1 '::tsquery;
+ tsquery 
+---------
+ '1'
+(1 row)
+
+SELECT '\'1 2\''::tsquery;
+ tsquery 
+---------
+ '1 2'
+(1 row)
+
+SELECT '\'1 \\\'2\''::tsquery;
+ tsquery 
+---------
+ '1 \'2'
+(1 row)
+
+SELECT '!1'::tsquery;
+ tsquery 
+---------
+ !'1'
+(1 row)
+
+SELECT '1|2'::tsquery;
+  tsquery  
+-----------
+ '1' | '2'
+(1 row)
+
+SELECT '1|!2'::tsquery;
+  tsquery   
+------------
+ '1' | !'2'
+(1 row)
+
+SELECT '!1|2'::tsquery;
+  tsquery   
+------------
+ !'1' | '2'
+(1 row)
+
+SELECT '!1|!2'::tsquery;
+   tsquery   
+-------------
+ !'1' | !'2'
+(1 row)
+
+SELECT '!(!1|!2)'::tsquery;
+     tsquery      
+------------------
+ !( !'1' | !'2' )
+(1 row)
+
+SELECT '!(!1|2)'::tsquery;
+     tsquery     
+-----------------
+ !( !'1' | '2' )
+(1 row)
+
+SELECT '!(1|!2)'::tsquery;
+     tsquery     
+-----------------
+ !( '1' | !'2' )
+(1 row)
+
+SELECT '!(1|2)'::tsquery;
+    tsquery     
+----------------
+ !( '1' | '2' )
+(1 row)
+
+SELECT '1&2'::tsquery;
+  tsquery  
+-----------
+ '1' & '2'
+(1 row)
+
+SELECT '!1&2'::tsquery;
+  tsquery   
+------------
+ !'1' & '2'
+(1 row)
+
+SELECT '1&!2'::tsquery;
+  tsquery   
+------------
+ '1' & !'2'
+(1 row)
+
+SELECT '!1&!2'::tsquery;
+   tsquery   
+-------------
+ !'1' & !'2'
+(1 row)
+
+SELECT '(1&2)'::tsquery;
+  tsquery  
+-----------
+ '1' & '2'
+(1 row)
+
+SELECT '1&(2)'::tsquery;
+  tsquery  
+-----------
+ '1' & '2'
+(1 row)
+
+SELECT '!(1)&2'::tsquery;
+  tsquery   
+------------
+ !'1' & '2'
+(1 row)
+
+SELECT '!(1&2)'::tsquery;
+    tsquery     
+----------------
+ !( '1' & '2' )
+(1 row)
+
+SELECT '1|2&3'::tsquery;
+     tsquery     
+-----------------
+ '1' | '2' & '3'
+(1 row)
+
+SELECT '1|(2&3)'::tsquery;
+     tsquery     
+-----------------
+ '1' | '2' & '3'
+(1 row)
+
+SELECT '(1|2)&3'::tsquery;
+       tsquery       
+---------------------
+ ( '1' | '2' ) & '3'
+(1 row)
+
+SELECT '1|2&!3'::tsquery;
+     tsquery      
+------------------
+ '1' | '2' & !'3'
+(1 row)
+
+SELECT '1|!2&3'::tsquery;
+     tsquery      
+------------------
+ '1' | !'2' & '3'
+(1 row)
+
+SELECT '!1|2&3'::tsquery;
+     tsquery      
+------------------
+ !'1' | '2' & '3'
+(1 row)
+
+SELECT '!1|(2&3)'::tsquery;
+     tsquery      
+------------------
+ !'1' | '2' & '3'
+(1 row)
+
+SELECT '!(1|2)&3'::tsquery;
+       tsquery        
+----------------------
+ !( '1' | '2' ) & '3'
+(1 row)
+
+SELECT '(!1|2)&3'::tsquery;
+       tsquery        
+----------------------
+ ( !'1' | '2' ) & '3'
+(1 row)
+
+SELECT '1|(2|(4|(5|6)))'::tsquery;
+                 tsquery                 
+-----------------------------------------
+ '1' | ( '2' | ( '4' | ( '5' | '6' ) ) )
+(1 row)
+
+SELECT '1|2|4|5|6'::tsquery;
+                 tsquery                 
+-----------------------------------------
+ ( ( ( '1' | '2' ) | '4' ) | '5' ) | '6'
+(1 row)
+
+SELECT '1&(2&(4&(5&6)))'::tsquery;
+           tsquery           
+-----------------------------
+ '1' & '2' & '4' & '5' & '6'
+(1 row)
+
+SELECT '1&2&4&5&6'::tsquery;
+           tsquery           
+-----------------------------
+ '1' & '2' & '4' & '5' & '6'
+(1 row)
+
+SELECT '1&(2&(4&(5|6)))'::tsquery;
+             tsquery             
+---------------------------------
+ '1' & '2' & '4' & ( '5' | '6' )
+(1 row)
+
+SELECT '1&(2&(4&(5|!6)))'::tsquery;
+             tsquery              
+----------------------------------
+ '1' & '2' & '4' & ( '5' | !'6' )
+(1 row)
+
+SELECT '1&(\'2\'&(\' 4\'&(\\|5 | \'6 \\\' !|&\')))'::tsquery;
+                 tsquery                  
+------------------------------------------
+ '1' & '2' & ' 4' & ( '|5' | '6 \' !|&' )
+(1 row)
+
+SELECT '\'the wether\':dc & \' sKies \':BC & a:d b:a';
+                 ?column?                 
+------------------------------------------
+ 'the wether':dc & ' sKies ':BC & a:d b:a
+(1 row)
+
+select lexize('simple', 'ASD56 hsdkf');
+     lexize      
+-----------------
+ {"asd56 hsdkf"}
+(1 row)
+
+select lexize('en_stem', 'SKIES Problems identity');
+          lexize          
+--------------------------
+ {"skies problems ident"}
+(1 row)
+
+select * from token_type('default');
+ tokid |    alias     |               descr               
+-------+--------------+-----------------------------------
+     1 | lword        | Latin word
+     2 | nlword       | Non-latin word
+     3 | word         | Word
+     4 | email        | Email
+     5 | url          | URL
+     6 | host         | Host
+     7 | sfloat       | Scientific notation
+     8 | version      | VERSION
+     9 | part_hword   | Part of hyphenated word
+    10 | nlpart_hword | Non-latin part of hyphenated word
+    11 | lpart_hword  | Latin part of hyphenated word
+    12 | blank        | Space symbols
+    13 | tag          | HTML Tag
+    14 | http         | HTTP head
+    15 | hword        | Hyphenated word
+    16 | lhword       | Latin hyphenated word
+    17 | nlhword      | Non-latin hyphenated word
+    18 | uri          | URI
+    19 | file         | File or path name
+    20 | float        | Decimal notation
+    21 | int          | Signed integer
+    22 | uint         | Unsigned integer
+    23 | entity       | HTML Entity
+(23 rows)
+
+select * from parse('default', '345 [email protected] \' http://www.com/ http://aew.werc.ewr/?ad=qwe&dw 1aew.werc.ewr/?ad=qwe&dw 2aew.werc.ewr http://3aew.werc.ewr/?ad=qwe&dw http://4aew.werc.ewr http://5aew.werc.ewr:8100/?  ad=qwe&dw 6aew.werc.ewr:8100/?ad=qwe&dw 7aew.werc.ewr:8100/?ad=qwe&dw=%20%32 +4.0e-10 qwe qwe qwqwe 234.435 455 5.005 [email protected] qwe-wer asdf qwer jf sdjk ewr1> ewri2 ">
+/usr/local/fff /awdf/dwqe/4325 rewt/ewr wefjn /wqe-324/ewr gist.h gist.h.c gist.c. readline 4.2 4.2. 4.2, readline-4.2 readline-4.2. 234 
+ wow  < jqw <> qwerty');
+ tokid |                token                 
+-------+--------------------------------------
+    22 | 345
+    12 |  
+     4 | [email protected]
+    12 |  
+    12 | '
+    12 |  
+    14 | http://
+     6 | www.com
+    12 | /
+    12 |  
+    14 | http://
+     5 | aew.werc.ewr/?ad=qwe&dw
+     6 | aew.werc.ewr
+    18 | /?ad=qwe&dw
+    12 |  
+     5 | 1aew.werc.ewr/?ad=qwe&dw
+     6 | 1aew.werc.ewr
+    18 | /?ad=qwe&dw
+    12 |  
+     6 | 2aew.werc.ewr
+    12 |  
+    14 | http://
+     5 | 3aew.werc.ewr/?ad=qwe&dw
+     6 | 3aew.werc.ewr
+    18 | /?ad=qwe&dw
+    12 |  
+    14 | http://
+     6 | 4aew.werc.ewr
+    12 |  
+    14 | http://
+     5 | 5aew.werc.ewr:8100/?
+     6 | 5aew.werc.ewr
+    18 | :8100/?
+    12 |   
+     1 | ad
+    12 | =
+     1 | qwe
+    12 | &
+     1 | dw
+    12 |  
+     5 | 6aew.werc.ewr:8100/?ad=qwe&dw
+     6 | 6aew.werc.ewr
+    18 | :8100/?ad=qwe&dw
+    12 |  
+     5 | 7aew.werc.ewr:8100/?ad=qwe&dw=%20%32
+     6 | 7aew.werc.ewr
+    18 | :8100/?ad=qwe&dw=%20%32
+    12 |  
+     7 | +4.0e-10
+    12 |  
+     1 | qwe
+    12 |  
+     1 | qwe
+    12 |  
+     1 | qwqwe
+    12 |  
+    20 | 234.435
+    12 |  
+    22 | 455
+    12 |  
+    20 | 5.005
+    12 |  
+     4 | [email protected]
+    12 |  
+    16 | qwe-wer
+    11 | qwe
+    12 | -
+    11 | wer
+    12 |  
+     1 | asdf
+    12 |  
+    13 |  
+     1 | qwer
+    12 |  
+     1 | jf
+    12 |  
+     1 | sdjk
+    13 |  
+    12 |  
+     3 | ewr1
+    12 | >
+    12 |  
+     3 | ewri2
+    12 |  
+    13 |  
+    12 | 
+
+    19 | /usr/local/fff
+    12 |  
+    19 | /awdf/dwqe/4325
+    12 |  
+    19 | rewt/ewr
+    12 |  
+     1 | wefjn
+    12 |  
+    19 | /wqe-324/ewr
+    12 |  
+     6 | gist.h
+    12 |  
+     6 | gist.h.c
+    12 |  
+     6 | gist.c
+    12 | .
+    12 |  
+     1 | readline
+    12 |  
+    20 | 4.2
+    12 |  
+    20 | 4.2
+    12 | .
+    12 |  
+    20 | 4.2
+    12 | ,
+    12 |  
+    15 | readline-4
+    11 | readline
+    12 | -
+    20 | 4.2
+    12 |  
+    15 | readline-4
+    11 | readline
+    12 | -
+    20 | 4.2
+    12 | .
+    12 |  
+    22 | 234
+    12 |  
+
+    13 |  
+    12 |  
+     1 | wow
+    12 |   
+    12 | <
+    12 |  
+     1 | jqw
+    12 |  
+    12 | <
+    12 | >
+    12 |  
+     1 | qwerty
+(138 rows)
+
+SELECT to_tsvector('default', '345 [email protected] \' http://www.com/ http://aew.werc.ewr/?ad=qwe&dw 1aew.werc.ewr/?ad=qwe&dw 2aew.werc.ewr http://3aew.werc.ewr/?ad=qwe&dw http://4aew.werc.ewr http://5aew.werc.ewr:8100/?  ad=qwe&dw 6aew.werc.ewr:8100/?ad=qwe&dw 7aew.werc.ewr:8100/?ad=qwe&dw=%20%32 +4.0e-10 qwe qwe qwqwe 234.435 455 5.005 [email protected] qwe-wer asdf qwer jf sdjk ewr1> ewri2 ">
+/usr/local/fff /awdf/dwqe/4325 rewt/ewr wefjn /wqe-324/ewr gist.h gist.h.c gist.c. readline 4.2 4.2. 4.2, readline-4.2 readline-4.2. 234 
+ wow  < jqw <> qwerty');
+                                                                                                                                                                                                                                                                                                                                                                                                                                               to_tsvector                                                                                                                                                                                                                                                                                                                                                                                                                                                
+----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
+ 'ad':18 'dw':20 'jf':40 '234':62 '345':1 '4.2':53,54,55,58,61 '455':32 'jqw':64 'qwe':19,28,29,36 'wer':37 'wow':63 'asdf':38 'ewr1':42 'qwer':39 'sdjk':41 '5.005':33 'ewri2':43 'qwqwe':30 'wefjn':47 'gist.c':51 'gist.h':49 'qwerti':65 '234.435':31 ':8100/?':17 'qwe-wer':35 'readlin':52,57,60 'www.com':3 '+4.0e-10':27 'gist.h.c':50 'rewt/ewr':46 '[email protected]':2 'readline-4':56,59 '/?ad=qwe&dw':6,9,13 '/wqe-324/ewr':48 'aew.werc.ewr':5 '1aew.werc.ewr':8 '2aew.werc.ewr':10 '3aew.werc.ewr':12 '4aew.werc.ewr':14 '5aew.werc.ewr':16 '6aew.werc.ewr':22 '7aew.werc.ewr':25 '/usr/local/fff':44 '/awdf/dwqe/4325':45 ':8100/?ad=qwe&dw':23 '[email protected]':34 '5aew.werc.ewr:8100/?':15 ':8100/?ad=qwe&dw=%20%32':26 'aew.werc.ewr/?ad=qwe&dw':4 '1aew.werc.ewr/?ad=qwe&dw':7 '3aew.werc.ewr/?ad=qwe&dw':11 '6aew.werc.ewr:8100/?ad=qwe&dw':21 '7aew.werc.ewr:8100/?ad=qwe&dw=%20%32':24
+(1 row)
+
+SELECT length(to_tsvector('default', '345 qw'));
+ length 
+--------
+      2
+(1 row)
+
+SELECT length(to_tsvector('default', '345 [email protected] \' http://www.com/ http://aew.werc.ewr/?ad=qwe&dw 1aew.werc.ewr/?ad=qwe&dw 2aew.werc.ewr http://3aew.werc.ewr/?ad=qwe&dw http://4aew.werc.ewr http://5aew.werc.ewr:8100/?  ad=qwe&dw 6aew.werc.ewr:8100/?ad=qwe&dw 7aew.werc.ewr:8100/?ad=qwe&dw=%20%32 +4.0e-10 qwe qwe qwqwe 234.435 455 5.005 [email protected] qwe-wer asdf qwer jf sdjk ewr1> ewri2 ">
+/usr/local/fff /awdf/dwqe/4325 rewt/ewr wefjn /wqe-324/ewr gist.h gist.h.c gist.c. readline 4.2 4.2. 4.2, readline-4.2 readline-4.2. 234 
+ wow  < jqw <> qwerty'));
+ length 
+--------
+     53
+(1 row)
+
+select to_tsquery('default', 'qwe & sKies '); 
+  to_tsquery   
+---------------
+ 'qwe' & 'sky'
+(1 row)
+
+select to_tsquery('simple', 'qwe & sKies '); 
+   to_tsquery    
+-----------------
+ 'qwe' & 'skies'
+(1 row)
+
+select to_tsquery('default', '\'the wether\':dc & \'           sKies \':BC ');
+       to_tsquery       
+------------------------
+ 'wether':CD & 'sky':BC
+(1 row)
+
+select 'a b:89  ca:23A,64b d:34c'::tsvector @@ 'd:AC & ca';
+ ?column? 
+----------
+ t
+(1 row)
+
+select 'a b:89  ca:23A,64b d:34c'::tsvector @@ 'd:AC & ca:B';
+ ?column? 
+----------
+ t
+(1 row)
+
+select 'a b:89  ca:23A,64b d:34c'::tsvector @@ 'd:AC & ca:A';
+ ?column? 
+----------
+ t
+(1 row)
+
+select 'a b:89  ca:23A,64b d:34c'::tsvector @@ 'd:AC & ca:C';
+ ?column? 
+----------
+ f
+(1 row)
+
+select 'a b:89  ca:23A,64b d:34c'::tsvector @@ 'd:AC & ca:CB';
+ ?column? 
+----------
+ t
+(1 row)
+
+CREATE TABLE test_tsvector( t text, a tsvector );
+\copy test_tsvector from 'data/test_tsearch.data'
+SELECT count(*) FROM test_tsvector WHERE a @@ 'wr|qh';
+ count 
+-------
+   158
+(1 row)
+
+SELECT count(*) FROM test_tsvector WHERE a @@ 'wr&qh';
+ count 
+-------
+    17
+(1 row)
+
+SELECT count(*) FROM test_tsvector WHERE a @@ 'eq&yt';
+ count 
+-------
+     6
+(1 row)
+
+SELECT count(*) FROM test_tsvector WHERE a @@ 'eq|yt';
+ count 
+-------
+    98
+(1 row)
+
+SELECT count(*) FROM test_tsvector WHERE a @@ '(eq&yt)|(wr&qh)';
+ count 
+-------
+    23
+(1 row)
+
+SELECT count(*) FROM test_tsvector WHERE a @@ '(eq|yt)&(wr|qh)';
+ count 
+-------
+    39
+(1 row)
+
+create index wowidx on test_tsvector using gist (a);
+set enable_seqscan=off;
+SELECT count(*) FROM test_tsvector WHERE a @@ 'wr|qh';
+ count 
+-------
+   158
+(1 row)
+
+SELECT count(*) FROM test_tsvector WHERE a @@ 'wr&qh';
+ count 
+-------
+    17
+(1 row)
+
+SELECT count(*) FROM test_tsvector WHERE a @@ 'eq&yt';
+ count 
+-------
+     6
+(1 row)
+
+SELECT count(*) FROM test_tsvector WHERE a @@ 'eq|yt';
+ count 
+-------
+    98
+(1 row)
+
+SELECT count(*) FROM test_tsvector WHERE a @@ '(eq&yt)|(wr&qh)';
+ count 
+-------
+    23
+(1 row)
+
+SELECT count(*) FROM test_tsvector WHERE a @@ '(eq|yt)&(wr|qh)';
+ count 
+-------
+    39
+(1 row)
+
+select set_curcfg('default');
+ set_curcfg 
+------------
+ 
+(1 row)
+
+CREATE TRIGGER tsvectorupdate
+BEFORE UPDATE OR INSERT ON test_tsvector
+FOR EACH ROW EXECUTE PROCEDURE tsearch2(a, t);
+SELECT count(*) FROM test_tsvector WHERE a @@ to_tsquery('345&qwerty');
+ count 
+-------
+     0
+(1 row)
+
+INSERT INTO test_tsvector (t) VALUES ('345 qwerty');
+SELECT count(*) FROM test_tsvector WHERE a @@ to_tsquery('345&qwerty');
+ count 
+-------
+     1
+(1 row)
+
+UPDATE test_tsvector SET t = null WHERE t = '345 qwerty';
+SELECT count(*) FROM test_tsvector WHERE a @@ to_tsquery('345&qwerty');
+ count 
+-------
+     0
+(1 row)
+
+drop trigger tsvectorupdate on test_tsvector;
+create function wow(text) returns text as 'select $1 || \' copyright\'; ' language sql;
+create trigger tsvectorupdate before update or insert on test_tsvector
+for each row execute procedure tsearch2(a, wow, t);
+insert into test_tsvector (t) values ('345 qwerty');
+select count(*) FROM test_tsvector WHERE a @@ to_tsquery('345&qwerty');
+ count 
+-------
+     1
+(1 row)
+
+select count(*) FROM test_tsvector WHERE a @@ to_tsquery('copyright');
+ count 
+-------
+     1
+(1 row)
+
+select rank(' a:1 s:2C d g'::tsvector, 'a | s');
+ rank 
+------
+ 0.28
+(1 row)
+
+select rank(' a:1 s:2B d g'::tsvector, 'a | s');
+ rank 
+------
+ 0.46
+(1 row)
+
+select rank(' a:1 s:2 d g'::tsvector, 'a | s');
+ rank 
+------
+ 0.19
+(1 row)
+
+select rank(' a:1 s:2C d g'::tsvector, 'a & s');
+   rank   
+----------
+ 0.140153
+(1 row)
+
+select rank(' a:1 s:2B d g'::tsvector, 'a & s');
+   rank   
+----------
+ 0.198206
+(1 row)
+
+select rank(' a:1 s:2 d g'::tsvector, 'a & s');
+   rank    
+-----------
+ 0.0991032
+(1 row)
+
+insert into test_tsvector (t) values ('foo bar foo the over foo qq bar');
+select * from stat('select a from test_tsvector') order by ndoc desc, nentry desc, word;
+   word    | ndoc | nentry 
+-----------+------+--------
+ qq        |  109 |    109
+ qt        |  102 |    102
+ qe        |  100 |    100
+ qh        |   98 |     98
+ qw        |   98 |     98
+ qa        |   97 |     97
+ ql        |   94 |     94
+ qs        |   94 |     94
+ qi        |   92 |     92
+ qr        |   92 |     92
+ qj        |   91 |     91
+ qd        |   87 |     87
+ qz        |   87 |     87
+ qc        |   86 |     86
+ qn        |   86 |     86
+ qv        |   85 |     85
+ qo        |   84 |     84
+ qy        |   84 |     84
+ wp        |   84 |     84
+ qf        |   81 |     81
+ qk        |   80 |     80
+ wt        |   80 |     80
+ qu        |   79 |     79
+ qg        |   78 |     78
+ wb        |   78 |     78
+ qx        |   77 |     77
+ wr        |   77 |     77
+ ws        |   73 |     73
+ wy        |   73 |     73
+ wa        |   72 |     72
+ wf        |   70 |     70
+ wg        |   70 |     70
+ wi        |   70 |     70
+ wu        |   70 |     70
+ wc        |   69 |     69
+ wj        |   69 |     69
+ qp        |   68 |     68
+ wh        |   68 |     68
+ wv        |   68 |     68
+ qb        |   66 |     66
+ eu        |   65 |     65
+ we        |   65 |     65
+ wl        |   65 |     65
+ wq        |   65 |     65
+ wk        |   64 |     64
+ ee        |   63 |     63
+ eo        |   63 |     63
+ qm        |   63 |     63
+ wn        |   63 |     63
+ ef        |   62 |     62
+ eh        |   62 |     62
+ ex        |   62 |     62
+ re        |   62 |     62
+ rl        |   62 |     62
+ rr        |   62 |     62
+ eb        |   61 |     61
+ ek        |   61 |     61
+ ww        |   61 |     61
+ ea        |   60 |     60
+ ei        |   60 |     60
+ em        |   60 |     60
+ eq        |   60 |     60
+ ew        |   60 |     60
+ ro        |   60 |     60
+ rw        |   60 |     60
+ tl        |   60 |     60
+ eg        |   59 |     59
+ en        |   59 |     59
+ ez        |   59 |     59
+ rj        |   59 |     59
+ ry        |   59 |     59
+ tw        |   59 |     59
+ tx        |   59 |     59
+ ej        |   58 |     58
+ es        |   58 |     58
+ ra        |   58 |     58
+ rd        |   58 |     58
+ rg        |   58 |     58
+ rx        |   58 |     58
+ tb        |   58 |     58
+ wd        |   58 |     58
+ ed        |   57 |     57
+ tc        |   57 |     57
+ wx        |   57 |     57
+ er        |   56 |     56
+ wm        |   56 |     56
+ wo        |   56 |     56
+ yw        |   56 |     56
+ ep        |   55 |     55
+ rk        |   55 |     55
+ rp        |   55 |     55
+ rz        |   55 |     55
+ ta        |   55 |     55
+ rq        |   54 |     54
+ yn        |   54 |     54
+ ec        |   53 |     53
+ el        |   53 |     53
+ ru        |   53 |     53
+ rv        |   53 |     53
+ tz        |   53 |     53
+ un        |   53 |     53
+ wz        |   53 |     53
+ ys        |   53 |     53
+ oe        |   52 |     52
+ tn        |   52 |     52
+ tq        |   52 |     52
+ ty        |   52 |     52
+ uq        |   52 |     52
+ yg        |   52 |     52
+ ym        |   52 |     52
+ oi        |   51 |     51
+ to        |   51 |     51
+ yi        |   51 |     51
+ pn        |   50 |     50
+ rb        |   50 |     50
+ ri        |   50 |     50
+ rn        |   50 |     50
+ ti        |   50 |     50
+ tv        |   50 |     50
+ um        |   50 |     50
+ ut        |   50 |     50
+ ya        |   50 |     50
+ et        |   49 |     49
+ ix        |   49 |     49
+ ox        |   49 |     49
+ q3        |   49 |     49
+ yf        |   49 |     49
+ yl        |   49 |     49
+ yo        |   49 |     49
+ yr        |   49 |     49
+ ev        |   48 |     48
+ ey        |   48 |     48
+ ot        |   48 |     48
+ rc        |   48 |     48
+ rm        |   48 |     48
+ th        |   48 |     48
+ uo        |   48 |     48
+ ia        |   47 |     47
+ q1        |   47 |     47
+ rh        |   47 |     47
+ yq        |   47 |     47
+ yz        |   47 |     47
+ av        |   46 |     46
+ im        |   46 |     46
+ os        |   46 |     46
+ tk        |   46 |     46
+ yy        |   46 |     46
+ ir        |   45 |     45
+ iv        |   45 |     45
+ iw        |   45 |     45
+ oj        |   45 |     45
+ pl        |   45 |     45
+ pv        |   45 |     45
+ te        |   45 |     45
+ tu        |   45 |     45
+ uv        |   45 |     45
+ ux        |   45 |     45
+ yd        |   45 |     45
+ yx        |   45 |     45
+ ij        |   44 |     44
+ pa        |   44 |     44
+ se        |   44 |     44
+ tg        |   44 |     44
+ ue        |   44 |     44
+ yb        |   44 |     44
+ yt        |   44 |     44
+ if        |   43 |     43
+ ik        |   43 |     43
+ in        |   43 |     43
+ ph        |   43 |     43
+ pj        |   43 |     43
+ q5        |   43 |     43
+ rt        |   43 |     43
+ ub        |   43 |     43
+ ud        |   43 |     43
+ uh        |   43 |     43
+ uj        |   43 |     43
+ w7        |   43 |     43
+ ye        |   43 |     43
+ yv        |   43 |     43
+ db        |   42 |     42
+ do        |   42 |     42
+ id        |   42 |     42
+ ie        |   42 |     42
+ ii        |   42 |     42
+ of        |   42 |     42
+ pr        |   42 |     42
+ q4        |   42 |     42
+ rf        |   42 |     42
+ td        |   42 |     42
+ uk        |   42 |     42
+ up        |   42 |     42
+ yh        |   42 |     42
+ yk        |   42 |     42
+ io        |   41 |     41
+ it        |   41 |     41
+ pb        |   41 |     41
+ q0        |   41 |     41
+ q7        |   41 |     41
+ rs        |   41 |     41
+ tj        |   41 |     41
+ ur        |   41 |     41
+ ig        |   40 |     40
+ iu        |   40 |     40
+ iy        |   40 |     40
+ od        |   40 |     40
+ q6        |   40 |     40
+ tt        |   40 |     40
+ ug        |   40 |     40
+ ul        |   40 |     40
+ us        |   40 |     40
+ uu        |   40 |     40
+ uz        |   40 |     40
+ ah        |   39 |     39
+ ar        |   39 |     39
+ as        |   39 |     39
+ dl        |   39 |     39
+ dt        |   39 |     39
+ hk        |   39 |     39
+ iq        |   39 |     39
+ is        |   39 |     39
+ oc        |   39 |     39
+ ov        |   39 |     39
+ oy        |   39 |     39
+ uf        |   39 |     39
+ ui        |   39 |     39
+ aa        |   38 |     38
+ ad        |   38 |     38
+ fh        |   38 |     38
+ gm        |   38 |     38
+ ic        |   38 |     38
+ jd        |   38 |     38
+ om        |   38 |     38
+ or        |   38 |     38
+ oz        |   38 |     38
+ pm        |   38 |     38
+ q8        |   38 |     38
+ sf        |   38 |     38
+ sm        |   38 |     38
+ sv        |   38 |     38
+ uc        |   38 |     38
+ ak        |   37 |     37
+ aq        |   37 |     37
+ di        |   37 |     37
+ e4        |   37 |     37
+ fi        |   37 |     37
+ fx        |   37 |     37
+ ha        |   37 |     37
+ hp        |   37 |     37
+ ih        |   37 |     37
+ og        |   37 |     37
+ po        |   37 |     37
+ pw        |   37 |     37
+ sn        |   37 |     37
+ su        |   37 |     37
+ sw        |   37 |     37
+ w6        |   37 |     37
+ yj        |   37 |     37
+ yu        |   37 |     37
+ ag        |   36 |     36
+ am        |   36 |     36
+ at        |   36 |     36
+ e1        |   36 |     36
+ ff        |   36 |     36
+ gx        |   36 |     36
+ he        |   36 |     36
+ hj        |   36 |     36
+ ib        |   36 |     36
+ iz        |   36 |     36
+ lm        |   36 |     36
+ ok        |   36 |     36
+ pk        |   36 |     36
+ pp        |   36 |     36
+ pu        |   36 |     36
+ sp        |   36 |     36
+ tf        |   36 |     36
+ tm        |   36 |     36
+ ay        |   35 |     35
+ dy        |   35 |     35
+ fu        |   35 |     35
+ ku        |   35 |     35
+ lh        |   35 |     35
+ lq        |   35 |     35
+ o6        |   35 |     35
+ ob        |   35 |     35
+ on        |   35 |     35
+ op        |   35 |     35
+ pd        |   35 |     35
+ ps        |   35 |     35
+ si        |   35 |     35
+ sl        |   35 |     35
+ sx        |   35 |     35
+ tp        |   35 |     35
+ tr        |   35 |     35
+ w3        |   35 |     35
+ y1        |   35 |     35
+ al        |   34 |     34
+ ap        |   34 |     34
+ az        |   34 |     34
+ dc        |   34 |     34
+ dd        |   34 |     34
+ dz        |   34 |     34
+ e0        |   34 |     34
+ fj        |   34 |     34
+ fp        |   34 |     34
+ gd        |   34 |     34
+ gg        |   34 |     34
+ gk        |   34 |     34
+ go        |   34 |     34
+ ho        |   34 |     34
+ jc        |   34 |     34
+ oa        |   34 |     34
+ oh        |   34 |     34
+ oo        |   34 |     34
+ pe        |   34 |     34
+ px        |   34 |     34
+ sd        |   34 |     34
+ sq        |   34 |     34
+ sy        |   34 |     34
+ ab        |   33 |     33
+ ae        |   33 |     33
+ af        |   33 |     33
+ aw        |   33 |     33
+ e5        |   33 |     33
+ fk        |   33 |     33
+ gu        |   33 |     33
+ gy        |   33 |     33
+ hb        |   33 |     33
+ hm        |   33 |     33
+ hy        |   33 |     33
+ jl        |   33 |     33
+ jr        |   33 |     33
+ ls        |   33 |     33
+ oq        |   33 |     33
+ pt        |   33 |     33
+ sa        |   33 |     33
+ sh        |   33 |     33
+ sj        |   33 |     33
+ so        |   33 |     33
+ sz        |   33 |     33
+ t7        |   33 |     33
+ uw        |   33 |     33
+ w8        |   33 |     33
+ y0        |   33 |     33
+ yp        |   33 |     33
+ dh        |   32 |     32
+ dp        |   32 |     32
+ dq        |   32 |     32
+ e7        |   32 |     32
+ fn        |   32 |     32
+ fo        |   32 |     32
+ fr        |   32 |     32
+ ga        |   32 |     32
+ gq        |   32 |     32
+ hh        |   32 |     32
+ il        |   32 |     32
+ ip        |   32 |     32
+ jv        |   32 |     32
+ lc        |   32 |     32
+ ol        |   32 |     32
+ pc        |   32 |     32
+ q9        |   32 |     32
+ ds        |   31 |     31
+ e9        |   31 |     31
+ fd        |   31 |     31
+ fe        |   31 |     31
+ ft        |   31 |     31
+ gs        |   31 |     31
+ hl        |   31 |     31
+ hs        |   31 |     31
+ jb        |   31 |     31
+ kc        |   31 |     31
+ kw        |   31 |     31
+ mj        |   31 |     31
+ q2        |   31 |     31
+ r3        |   31 |     31
+ sb        |   31 |     31
+ sk        |   31 |     31
+ ts        |   31 |     31
+ ua        |   31 |     31
+ yc        |   31 |     31
+ zw        |   31 |     31
+ ao        |   30 |     30
+ du        |   30 |     30
+ fw        |   30 |     30
+ gj        |   30 |     30
+ hu        |   30 |     30
+ kh        |   30 |     30
+ kl        |   30 |     30
+ kv        |   30 |     30
+ ld        |   30 |     30
+ lf        |   30 |     30
+ pq        |   30 |     30
+ py        |   30 |     30
+ sc        |   30 |     30
+ sr        |   30 |     30
+ uy        |   30 |     30
+ vg        |   30 |     30
+ w2        |   30 |     30
+ xg        |   30 |     30
+ xo        |   30 |     30
+ au        |   29 |     29
+ cx        |   29 |     29
+ fv        |   29 |     29
+ gh        |   29 |     29
+ gl        |   29 |     29
+ gt        |   29 |     29
+ hw        |   29 |     29
+ ji        |   29 |     29
+ km        |   29 |     29
+ la        |   29 |     29
+ ou        |   29 |     29
+ r0        |   29 |     29
+ w0        |   29 |     29
+ y9        |   29 |     29
+ zm        |   29 |     29
+ zs        |   29 |     29
+ zy        |   29 |     29
+ ax        |   28 |     28
+ cd        |   28 |     28
+ dj        |   28 |     28
+ dn        |   28 |     28
+ dr        |   28 |     28
+ ht        |   28 |     28
+ jf        |   28 |     28
+ lo        |   28 |     28
+ lr        |   28 |     28
+ na        |   28 |     28
+ ng        |   28 |     28
+ r8        |   28 |     28
+ ss        |   28 |     28
+ xt        |   28 |     28
+ y6        |   28 |     28
+ aj        |   27 |     27
+ ca        |   27 |     27
+ cg        |   27 |     27
+ df        |   27 |     27
+ dg        |   27 |     27
+ dv        |   27 |     27
+ gc        |   27 |     27
+ gn        |   27 |     27
+ gr        |   27 |     27
+ hd        |   27 |     27
+ i8        |   27 |     27
+ jn        |   27 |     27
+ jt        |   27 |     27
+ lp        |   27 |     27
+ o9        |   27 |     27
+ ow        |   27 |     27
+ r9        |   27 |     27
+ t8        |   27 |     27
+ u5        |   27 |     27
+ w4        |   27 |     27
+ xm        |   27 |     27
+ zz        |   27 |     27
+ a2        |   26 |     26
+ ac        |   26 |     26
+ ai        |   26 |     26
+ cm        |   26 |     26
+ cu        |   26 |     26
+ cw        |   26 |     26
+ dk        |   26 |     26
+ e2        |   26 |     26
+ fc        |   26 |     26
+ fg        |   26 |     26
+ fl        |   26 |     26
+ fs        |   26 |     26
+ ge        |   26 |     26
+ gv        |   26 |     26
+ hc        |   26 |     26
+ hi        |   26 |     26
+ hx        |   26 |     26
+ jj        |   26 |     26
+ jm        |   26 |     26
+ kg        |   26 |     26
+ kk        |   26 |     26
+ kn        |   26 |     26
+ ko        |   26 |     26
+ kt        |   26 |     26
+ ln        |   26 |     26
+ mx        |   26 |     26
+ pg        |   26 |     26
+ r4        |   26 |     26
+ t6        |   26 |     26
+ u1        |   26 |     26
+ u4        |   26 |     26
+ vi        |   26 |     26
+ vr        |   26 |     26
+ w1        |   26 |     26
+ w9        |   26 |     26
+ xk        |   26 |     26
+ xs        |   26 |     26
+ zf        |   26 |     26
+ bb        |   25 |     25
+ dm        |   25 |     25
+ dw        |   25 |     25
+ e8        |   25 |     25
+ fb        |   25 |     25
+ gw        |   25 |     25
+ h8        |   25 |     25
+ hf        |   25 |     25
+ hg        |   25 |     25
+ hn        |   25 |     25
+ hv        |   25 |     25
+ i0        |   25 |     25
+ i3        |   25 |     25
+ jg        |   25 |     25
+ jo        |   25 |     25
+ jx        |   25 |     25
+ kq        |   25 |     25
+ lw        |   25 |     25
+ lx        |   25 |     25
+ o3        |   25 |     25
+ p7        |   25 |     25
+ pf        |   25 |     25
+ pi        |   25 |     25
+ pz        |   25 |     25
+ r2        |   25 |     25
+ r5        |   25 |     25
+ t9        |   25 |     25
+ u7        |   25 |     25
+ ve        |   25 |     25
+ vu        |   25 |     25
+ y5        |   25 |     25
+ y8        |   25 |     25
+ zt        |   25 |     25
+ an        |   24 |     24
+ bj        |   24 |     24
+ dx        |   24 |     24
+ fm        |   24 |     24
+ fz        |   24 |     24
+ gb        |   24 |     24
+ gi        |   24 |     24
+ gp        |   24 |     24
+ hr        |   24 |     24
+ hz        |   24 |     24
+ i5        |   24 |     24
+ jq        |   24 |     24
+ kb        |   24 |     24
+ ke        |   24 |     24
+ kf        |   24 |     24
+ kp        |   24 |     24
+ lv        |   24 |     24
+ lz        |   24 |     24
+ o8        |   24 |     24
+ r1        |   24 |     24
+ s7        |   24 |     24
+ sg        |   24 |     24
+ u3        |   24 |     24
+ vj        |   24 |     24
+ vt        |   24 |     24
+ w5        |   24 |     24
+ zj        |   24 |     24
+ be        |   23 |     23
+ bi        |   23 |     23
+ bn        |   23 |     23
+ cn        |   23 |     23
+ cy        |   23 |     23
+ da        |   23 |     23
+ e6        |   23 |     23
+ fa        |   23 |     23
+ js        |   23 |     23
+ ki        |   23 |     23
+ kz        |   23 |     23
+ li        |   23 |     23
+ mt        |   23 |     23
+ mz        |   23 |     23
+ nu        |   23 |     23
+ o2        |   23 |     23
+ p5        |   23 |     23
+ p8        |   23 |     23
+ r7        |   23 |     23
+ t0        |   23 |     23
+ t1        |   23 |     23
+ t3        |   23 |     23
+ vm        |   23 |     23
+ xh        |   23 |     23
+ xx        |   23 |     23
+ zp        |   23 |     23
+ zr        |   23 |     23
+ a3        |   22 |     22
+ bg        |   22 |     22
+ de        |   22 |     22
+ e3        |   22 |     22
+ fq        |   22 |     22
+ i2        |   22 |     22
+ i7        |   22 |     22
+ ja        |   22 |     22
+ jk        |   22 |     22
+ jy        |   22 |     22
+ kr        |   22 |     22
+ kx        |   22 |     22
+ ly        |   22 |     22
+ nb        |   22 |     22
+ nh        |   22 |     22
+ ns        |   22 |     22
+ s3        |   22 |     22
+ u2        |   22 |     22
+ vn        |   22 |     22
+ xe        |   22 |     22
+ y4        |   22 |     22
+ zh        |   22 |     22
+ zo        |   22 |     22
+ zq        |   22 |     22
+ a1        |   21 |     21
+ bl        |   21 |     21
+ bo        |   21 |     21
+ cb        |   21 |     21
+ ch        |   21 |     21
+ co        |   21 |     21
+ cq        |   21 |     21
+ cv        |   21 |     21
+ d7        |   21 |     21
+ g8        |   21 |     21
+ je        |   21 |     21
+ jp        |   21 |     21
+ jz        |   21 |     21
+ lg        |   21 |     21
+ me        |   21 |     21
+ nc        |   21 |     21
+ p4        |   21 |     21
+ st        |   21 |     21
+ vb        |   21 |     21
+ vw        |   21 |     21
+ vz        |   21 |     21
+ xj        |   21 |     21
+ xq        |   21 |     21
+ xu        |   21 |     21
+ xy        |   21 |     21
+ zb        |   21 |     21
+ bv        |   20 |     20
+ bz        |   20 |     20
+ cj        |   20 |     20
+ cp        |   20 |     20
+ cs        |   20 |     20
+ d8        |   20 |     20
+ ju        |   20 |     20
+ k0        |   20 |     20
+ ks        |   20 |     20
+ ky        |   20 |     20
+ l1        |   20 |     20
+ lb        |   20 |     20
+ lj        |   20 |     20
+ lu        |   20 |     20
+ nm        |   20 |     20
+ nw        |   20 |     20
+ nz        |   20 |     20
+ o7        |   20 |     20
+ p6        |   20 |     20
+ vh        |   20 |     20
+ vp        |   20 |     20
+ vs        |   20 |     20
+ xb        |   20 |     20
+ xr        |   20 |     20
+ z3        |   20 |     20
+ zv        |   20 |     20
+ bq        |   19 |     19
+ br        |   19 |     19
+ by        |   19 |     19
+ cl        |   19 |     19
+ d2        |   19 |     19
+ f1        |   19 |     19
+ f4        |   19 |     19
+ gf        |   19 |     19
+ hq        |   19 |     19
+ k9        |   19 |     19
+ ka        |   19 |     19
+ kd        |   19 |     19
+ kj        |   19 |     19
+ md        |   19 |     19
+ mi        |   19 |     19
+ ml        |   19 |     19
+ my        |   19 |     19
+ nj        |   19 |     19
+ ny        |   19 |     19
+ o1        |   19 |     19
+ s4        |   19 |     19
+ s8        |   19 |     19
+ t5        |   19 |     19
+ u0        |   19 |     19
+ xl        |   19 |     19
+ zg        |   19 |     19
+ zi        |   19 |     19
+ a5        |   18 |     18
+ b9        |   18 |     18
+ bh        |   18 |     18
+ bx        |   18 |     18
+ d3        |   18 |     18
+ fy        |   18 |     18
+ g2        |   18 |     18
+ i4        |   18 |     18
+ i6        |   18 |     18
+ i9        |   18 |     18
+ jw        |   18 |     18
+ lk        |   18 |     18
+ mb        |   18 |     18
+ mv        |   18 |     18
+ nd        |   18 |     18
+ nr        |   18 |     18
+ nt        |   18 |     18
+ t2        |   18 |     18
+ xf        |   18 |     18
+ xv        |   18 |     18
+ zc        |   18 |     18
+ zd        |   18 |     18
+ a7        |   17 |     17
+ bc        |   17 |     17
+ bd        |   17 |     17
+ ce        |   17 |     17
+ cf        |   17 |     17
+ cr        |   17 |     17
+ g9        |   17 |     17
+ j0        |   17 |     17
+ j5        |   17 |     17
+ mp        |   17 |     17
+ mr        |   17 |     17
+ mw        |   17 |     17
+ nk        |   17 |     17
+ no        |   17 |     17
+ o0        |   17 |     17
+ o4        |   17 |     17
+ s0        |   17 |     17
+ s1        |   17 |     17
+ t4        |   17 |     17
+ u9        |   17 |     17
+ vf        |   17 |     17
+ vx        |   17 |     17
+ x3        |   17 |     17
+ xi        |   17 |     17
+ xn        |   17 |     17
+ xz        |   17 |     17
+ zl        |   17 |     17
+ zn        |   17 |     17
+ a0        |   16 |     16
+ bu        |   16 |     16
+ bw        |   16 |     16
+ ci        |   16 |     16
+ ck        |   16 |     16
+ d0        |   16 |     16
+ d4        |   16 |     16
+ d6        |   16 |     16
+ f5        |   16 |     16
+ g1        |   16 |     16
+ gz        |   16 |     16
+ h4        |   16 |     16
+ jh        |   16 |     16
+ l4        |   16 |     16
+ lt        |   16 |     16
+ mg        |   16 |     16
+ mh        |   16 |     16
+ mo        |   16 |     16
+ ni        |   16 |     16
+ nl        |   16 |     16
+ nq        |   16 |     16
+ p2        |   16 |     16
+ u8        |   16 |     16
+ v9        |   16 |     16
+ vl        |   16 |     16
+ vo        |   16 |     16
+ xp        |   16 |     16
+ y3        |   16 |     16
+ y7        |   16 |     16
+ z7        |   16 |     16
+ za        |   16 |     16
+ zx        |   16 |     16
+ bf        |   15 |     15
+ bp        |   15 |     15
+ cc        |   15 |     15
+ g0        |   15 |     15
+ j2        |   15 |     15
+ j9        |   15 |     15
+ l6        |   15 |     15
+ le        |   15 |     15
+ ll        |   15 |     15
+ m8        |   15 |     15
+ ma        |   15 |     15
+ mu        |   15 |     15
+ nf        |   15 |     15
+ r6        |   15 |     15
+ s5        |   15 |     15
+ vd        |   15 |     15
+ vk        |   15 |     15
+ xa        |   15 |     15
+ xw        |   15 |     15
+ y2        |   15 |     15
+ z8        |   15 |     15
+ ze        |   15 |     15
+ zu        |   15 |     15
+ a6        |   14 |     14
+ bk        |   14 |     14
+ bt        |   14 |     14
+ c0        |   14 |     14
+ f8        |   14 |     14
+ g3        |   14 |     14
+ g4        |   14 |     14
+ g7        |   14 |     14
+ h6        |   14 |     14
+ h7        |   14 |     14
+ h9        |   14 |     14
+ i1        |   14 |     14
+ k1        |   14 |     14
+ k2        |   14 |     14
+ k6        |   14 |     14
+ k7        |   14 |     14
+ mc        |   14 |     14
+ nn        |   14 |     14
+ p9        |   14 |     14
+ u6        |   14 |     14
+ xd        |   14 |     14
+ z6        |   14 |     14
+ zk        |   14 |     14
+ a4        |   13 |     13
+ a9        |   13 |     13
+ bm        |   13 |     13
+ cz        |   13 |     13
+ f2        |   13 |     13
+ f3        |   13 |     13
+ f6        |   13 |     13
+ g6        |   13 |     13
+ h2        |   13 |     13
+ j1        |   13 |     13
+ k5        |   13 |     13
+ m1        |   13 |     13
+ mf        |   13 |     13
+ mq        |   13 |     13
+ np        |   13 |     13
+ nx        |   13 |     13
+ o5        |   13 |     13
+ p0        |   13 |     13
+ p1        |   13 |     13
+ s6        |   13 |     13
+ s9        |   13 |     13
+ v6        |   13 |     13
+ va        |   13 |     13
+ vc        |   13 |     13
+ xc        |   13 |     13
+ z0        |   13 |     13
+ c9        |   12 |     12
+ d1        |   12 |     12
+ h0        |   12 |     12
+ h1        |   12 |     12
+ j8        |   12 |     12
+ k4        |   12 |     12
+ l5        |   12 |     12
+ l9        |   12 |     12
+ m2        |   12 |     12
+ m6        |   12 |     12
+ m9        |   12 |     12
+ n7        |   12 |     12
+ nv        |   12 |     12
+ p3        |   12 |     12
+ vq        |   12 |     12
+ vy        |   12 |     12
+ x1        |   12 |     12
+ x2        |   12 |     12
+ z5        |   12 |     12
+ c1        |   11 |     11
+ c3        |   11 |     11
+ ct        |   11 |     11
+ f9        |   11 |     11
+ g5        |   11 |     11
+ j6        |   11 |     11
+ l8        |   11 |     11
+ n1        |   11 |     11
+ v7        |   11 |     11
+ vv        |   11 |     11
+ x5        |   11 |     11
+ x8        |   11 |     11
+ z2        |   11 |     11
+ b0        |   10 |     10
+ b2        |   10 |     10
+ b8        |   10 |     10
+ c6        |   10 |     10
+ f0        |   10 |     10
+ f7        |   10 |     10
+ h5        |   10 |     10
+ j3        |   10 |     10
+ j4        |   10 |     10
+ j7        |   10 |     10
+ l7        |   10 |     10
+ m0        |   10 |     10
+ m7        |   10 |     10
+ mm        |   10 |     10
+ mn        |   10 |     10
+ n8        |   10 |     10
+ v1        |   10 |     10
+ x0        |   10 |     10
+ x6        |   10 |     10
+ x7        |   10 |     10
+ x9        |   10 |     10
+ a8        |    9 |      9
+ b1        |    9 |      9
+ b4        |    9 |      9
+ b5        |    9 |      9
+ b6        |    9 |      9
+ ba        |    9 |      9
+ bs        |    9 |      9
+ c5        |    9 |      9
+ d5        |    9 |      9
+ k8        |    9 |      9
+ l0        |    9 |      9
+ m5        |    9 |      9
+ mk        |    9 |      9
+ ms        |    9 |      9
+ n3        |    9 |      9
+ n4        |    9 |      9
+ n6        |    9 |      9
+ ne        |    9 |      9
+ v0        |    9 |      9
+ v3        |    9 |      9
+ v5        |    9 |      9
+ v8        |    9 |      9
+ b3        |    8 |      8
+ b7        |    8 |      8
+ c2        |    8 |      8
+ c7        |    8 |      8
+ c8        |    8 |      8
+ d9        |    8 |      8
+ k3        |    8 |      8
+ l3        |    8 |      8
+ m3        |    8 |      8
+ m4        |    8 |      8
+ n0        |    8 |      8
+ n5        |    8 |      8
+ v4        |    8 |      8
+ x4        |    8 |      8
+ z1        |    8 |      8
+ z9        |    8 |      8
+ l2        |    7 |      7
+ s2        |    7 |      7
+ z4        |    7 |      7
+ 1l        |    6 |      6
+ 1o        |    6 |      6
+ 1t        |    6 |      6
+ 2e        |    6 |      6
+ 2o        |    6 |      6
+ c4        |    6 |      6
+ h3        |    6 |      6
+ n2        |    6 |      6
+ n9        |    6 |      6
+ v2        |    6 |      6
+ 2l        |    5 |      5
+ 2u        |    5 |      5
+ 3k        |    5 |      5
+ 4p        |    5 |      5
+ 18        |    4 |      4
+ 1a        |    4 |      4
+ 1i        |    4 |      4
+ 2s        |    4 |      4
+ 3q        |    4 |      4
+ 3y        |    4 |      4
+ 5y        |    4 |      4
+ 1f        |    3 |      3
+ 1h        |    3 |      3
+ 1m        |    3 |      3
+ 1p        |    3 |      3
+ 1s        |    3 |      3
+ 1v        |    3 |      3
+ 1x        |    3 |      3
+ 27        |    3 |      3
+ 2a        |    3 |      3
+ 2b        |    3 |      3
+ 2h        |    3 |      3
+ 2n        |    3 |      3
+ 2p        |    3 |      3
+ 2v        |    3 |      3
+ 2y        |    3 |      3
+ 3d        |    3 |      3
+ 3w        |    3 |      3
+ 3z        |    3 |      3
+ 4a        |    3 |      3
+ 4d        |    3 |      3
+ 4v        |    3 |      3
+ 4z        |    3 |      3
+ 5e        |    3 |      3
+ 5i        |    3 |      3
+ 5k        |    3 |      3
+ 5o        |    3 |      3
+ 5t        |    3 |      3
+ 6b        |    3 |      3
+ 6d        |    3 |      3
+ 6o        |    3 |      3
+ 6w        |    3 |      3
+ 7a        |    3 |      3
+ 7h        |    3 |      3
+ 7r        |    3 |      3
+ 93        |    3 |      3
+ 10        |    2 |      2
+ 12        |    2 |      2
+ 15        |    2 |      2
+ 16        |    2 |      2
+ 19        |    2 |      2
+ 1b        |    2 |      2
+ 1d        |    2 |      2
+ 1g        |    2 |      2
+ 1j        |    2 |      2
+ 1n        |    2 |      2
+ 1r        |    2 |      2
+ 1u        |    2 |      2
+ 1w        |    2 |      2
+ 1y        |    2 |      2
+ 20        |    2 |      2
+ 25        |    2 |      2
+ 2d        |    2 |      2
+ 2i        |    2 |      2
+ 2j        |    2 |      2
+ 2k        |    2 |      2
+ 2q        |    2 |      2
+ 2r        |    2 |      2
+ 2t        |    2 |      2
+ 2w        |    2 |      2
+ 2z        |    2 |      2
+ 3b        |    2 |      2
+ 3f        |    2 |      2
+ 3h        |    2 |      2
+ 3o        |    2 |      2
+ 3p        |    2 |      2
+ 3r        |    2 |      2
+ 3s        |    2 |      2
+ 3v        |    2 |      2
+ 42        |    2 |      2
+ 43        |    2 |      2
+ 4f        |    2 |      2
+ 4g        |    2 |      2
+ 4h        |    2 |      2
+ 4j        |    2 |      2
+ 4m        |    2 |      2
+ 4r        |    2 |      2
+ 4s        |    2 |      2
+ 4t        |    2 |      2
+ 4u        |    2 |      2
+ 5c        |    2 |      2
+ 5f        |    2 |      2
+ 5h        |    2 |      2
+ 5p        |    2 |      2
+ 5q        |    2 |      2
+ 5z        |    2 |      2
+ 6a        |    2 |      2
+ 6h        |    2 |      2
+ 6q        |    2 |      2
+ 6r        |    2 |      2
+ 6t        |    2 |      2
+ 6y        |    2 |      2
+ 70        |    2 |      2
+ 7c        |    2 |      2
+ 7g        |    2 |      2
+ 7k        |    2 |      2
+ 7o        |    2 |      2
+ 7u        |    2 |      2
+ 8j        |    2 |      2
+ 8w        |    2 |      2
+ 9f        |    2 |      2
+ 9y        |    2 |      2
+ copyright |    2 |      2
+ foo       |    1 |      3
+ bar       |    1 |      2
+ 0e        |    1 |      1
+ 0h        |    1 |      1
+ 0p        |    1 |      1
+ 0w        |    1 |      1
+ 0z        |    1 |      1
+ 11        |    1 |      1
+ 13        |    1 |      1
+ 14        |    1 |      1
+ 17        |    1 |      1
+ 1k        |    1 |      1
+ 1q        |    1 |      1
+ 1z        |    1 |      1
+ 24        |    1 |      1
+ 26        |    1 |      1
+ 28        |    1 |      1
+ 2f        |    1 |      1
+ 30        |    1 |      1
+ 345       |    1 |      1
+ 37        |    1 |      1
+ 39        |    1 |      1
+ 3a        |    1 |      1
+ 3e        |    1 |      1
+ 3g        |    1 |      1
+ 3i        |    1 |      1
+ 3m        |    1 |      1
+ 3t        |    1 |      1
+ 3u        |    1 |      1
+ 40        |    1 |      1
+ 41        |    1 |      1
+ 44        |    1 |      1
+ 45        |    1 |      1
+ 48        |    1 |      1
+ 4b        |    1 |      1
+ 4c        |    1 |      1
+ 4i        |    1 |      1
+ 4k        |    1 |      1
+ 4n        |    1 |      1
+ 4o        |    1 |      1
+ 4q        |    1 |      1
+ 4w        |    1 |      1
+ 4y        |    1 |      1
+ 51        |    1 |      1
+ 55        |    1 |      1
+ 56        |    1 |      1
+ 5a        |    1 |      1
+ 5d        |    1 |      1
+ 5g        |    1 |      1
+ 5j        |    1 |      1
+ 5l        |    1 |      1
+ 5s        |    1 |      1
+ 5u        |    1 |      1
+ 5x        |    1 |      1
+ 64        |    1 |      1
+ 68        |    1 |      1
+ 6c        |    1 |      1
+ 6f        |    1 |      1
+ 6g        |    1 |      1
+ 6i        |    1 |      1
+ 6k        |    1 |      1
+ 6n        |    1 |      1
+ 6p        |    1 |      1
+ 6s        |    1 |      1
+ 6u        |    1 |      1
+ 6x        |    1 |      1
+ 72        |    1 |      1
+ 7f        |    1 |      1
+ 7j        |    1 |      1
+ 7n        |    1 |      1
+ 7p        |    1 |      1
+ 7w        |    1 |      1
+ 7y        |    1 |      1
+ 7z        |    1 |      1
+ 80        |    1 |      1
+ 82        |    1 |      1
+ 85        |    1 |      1
+ 8d        |    1 |      1
+ 8i        |    1 |      1
+ 8l        |    1 |      1
+ 8n        |    1 |      1
+ 8p        |    1 |      1
+ 8t        |    1 |      1
+ 8x        |    1 |      1
+ 95        |    1 |      1
+ 97        |    1 |      1
+ 9a        |    1 |      1
+ 9e        |    1 |      1
+ 9h        |    1 |      1
+ 9r        |    1 |      1
+ 9w        |    1 |      1
+ qwerti    |    1 |      1
+(1146 rows)
+
+select reset_tsearch();
+NOTICE:  TSearch cache cleaned
+ reset_tsearch 
+---------------
+ 
+(1 row)
+
+select to_tsquery('default', 'skies & books');
+   to_tsquery   
+----------------
+ 'sky' & 'book'
+(1 row)
+
+select rank_cd(to_tsvector('Erosion It took the sea a thousand years,
+A thousand years to trace
+The granite features of this cliff
+In crag and scarp and base.
+It took the sea an hour one night
+An hour of storm to place
+The sculpture of these granite seams,
+Upon a woman s face. E.  J.  Pratt  (1882 1964)
+'), to_tsquery('sea&thousand&years'));
+ rank_cd 
+---------
+     1.2
+(1 row)
+
+select rank_cd(to_tsvector('Erosion It took the sea a thousand years,
+A thousand years to trace
+The granite features of this cliff
+In crag and scarp and base.
+It took the sea an hour one night
+An hour of storm to place
+The sculpture of these granite seams,
+Upon a woman s face. E.  J.  Pratt  (1882 1964)
+'), to_tsquery('granite&sea'));
+ rank_cd  
+----------
+ 0.880303
+(1 row)
+
+select rank_cd(to_tsvector('Erosion It took the sea a thousand years,
+A thousand years to trace
+The granite features of this cliff
+In crag and scarp and base.
+It took the sea an hour one night
+An hour of storm to place
+The sculpture of these granite seams,
+Upon a woman s face. E.  J.  Pratt  (1882 1964)
+'), to_tsquery('sea'));
+ rank_cd 
+---------
+       2
+(1 row)
+
+select get_covers(to_tsvector('Erosion It took the sea a thousand years,
+A thousand years to trace
+The granite features of this cliff
+In crag and scarp and base.
+It took the sea an hour one night
+An hour of storm to place
+The sculpture of these granite seams,
+Upon a woman s face. E.  J.  Pratt  (1882 1964)
+'), to_tsquery('sea&thousand&years'));
+                                                                                             get_covers                                                                                             
+----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
+ eros took {1 sea thousand year }1 {2 thousand year trace granit featur cliff crag scarp base took sea }2 hour one night hour storm place sculptur granit seam upon woman face e j pratt 1882 1964 
+(1 row)
+
+select get_covers(to_tsvector('Erosion It took the sea a thousand years,
+A thousand years to trace
+The granite features of this cliff
+In crag and scarp and base.
+It took the sea an hour one night
+An hour of storm to place
+The sculpture of these granite seams,
+Upon a woman s face. E.  J.  Pratt  (1882 1964)
+'), to_tsquery('granite&sea'));
+                                                                                                get_covers                                                                                                
+----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
+ eros took {1 sea thousand year thousand year trace {2 granit }1 featur cliff crag scarp base took {3 sea }2 hour one night hour storm place sculptur granit }3 seam upon woman face e j pratt 1882 1964 
+(1 row)
+
+select get_covers(to_tsvector('Erosion It took the sea a thousand years,
+A thousand years to trace
+The granite features of this cliff
+In crag and scarp and base.
+It took the sea an hour one night
+An hour of storm to place
+The sculpture of these granite seams,
+Upon a woman s face. E.  J.  Pratt  (1882 1964)
+'), to_tsquery('sea'));
+                                                                                             get_covers                                                                                             
+----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
+ eros took {1 sea }1 thousand year thousand year trace granit featur cliff crag scarp base took {2 sea }2 hour one night hour storm place sculptur granit seam upon woman face e j pratt 1882 1964 
+(1 row)
+
+select headline('Erosion It took the sea a thousand years,
+A thousand years to trace
+The granite features of this cliff
+In crag and scarp and base.
+It took the sea an hour one night
+An hour of storm to place
+The sculpture of these granite seams,
+Upon a woman s face. E.  J.  Pratt  (1882 1964)
+', to_tsquery('sea&thousand&years'));
+                                                       headline                                                        
+-----------------------------------------------------------------------------------------------------------------------
+ sea a thousand years,
+A thousand years to trace
+The granite features of this cliff
+(1 row)
+
+ 
+select headline('Erosion It took the sea a thousand years,
+A thousand years to trace
+The granite features of this cliff
+In crag and scarp and base.
+It took the sea an hour one night
+An hour of storm to place
+The sculpture of these granite seams,
+Upon a woman s face. E.  J.  Pratt  (1882 1964)
+', to_tsquery('granite&sea'));
+                                           headline                                           
+----------------------------------------------------------------------------------------------
+ sea an hour one night
+An hour of storm to place
+The sculpture of these granite
+(1 row)
+
+ 
+select headline('Erosion It took the sea a thousand years,
+A thousand years to trace
+The granite features of this cliff
+In crag and scarp and base.
+It took the sea an hour one night
+An hour of storm to place
+The sculpture of these granite seams,
+Upon a woman s face. E.  J.  Pratt  (1882 1964)
+', to_tsquery('sea'));
+                                         headline                                          
+-------------------------------------------------------------------------------------------
+ sea a thousand years,
+A thousand years to trace
+The granite features of this cliff
+(1 row)
+


diff --git a/contrib/tsearch2/gendict/Makefile.IN b/contrib/tsearch2/gendict/Makefile.IN

new file mode 100644 (file)

index 0000000..c13e496


--- /dev/null
+++ b/contrib/tsearch2/gendict/Makefile.IN
@@ -0,0 +1,12 @@
+subdir = contrib/CFG_DIR
+top_builddir = ../..
+include $(top_builddir)/src/Makefile.global
+
+MODULE_big = dict_CFG_MODNAME
+OBJS = CFG_OFILE
+DATA_built = dict_CFG_MODNAME.sql
+DOCS = README.CFG_MODNAME
+PG_CPPFLAGS =
+SHLIB_LINK = ../tsearch2/libtsearch2.a
+
+include $(top_srcdir)/contrib/contrib-global.mk


diff --git a/contrib/tsearch2/gendict/README.gendict b/contrib/tsearch2/gendict/README.gendict

new file mode 100644 (file)

index 0000000..e91f1b7


--- /dev/null
+++ b/contrib/tsearch2/gendict/README.gendict
@@ -0,0 +1,130 @@
+Gendict - generate dictionary templates for contrib/tsearch2 module.
+
+This utility aims to help people creating dictionary for contrib/tsearch v2
+module. Particularly, it has built-in support for snowball stemmers.
+
+Programming API to tsearch2 dictionaries is described in tsearch v2 
+documentation.
+
+
+Prerequisities:
+
+* PostgreSQL 7.3 and above.
+
+* You need tsearch2 module sources already compiled
+
+* Rights to install contrib modules
+
+Usage:
+
+    run config.sh without parameters to see options and arguments
+
+Usage:
+./config.sh -n DICTNAME ( [ -s [ -p PREFIX ] ] | [ -c CFILES ] [ -h HFILES ] [ -i ] ) [ -v ] [ -d DIR ] [ -C COMMENT ]
+    -v - be verbose
+    -d DIR - name of directory in PGSQL_SRC/contrib (default dict_DICTNAME)
+    -C COMMENT - dictionary comment
+Generate Snowball stemmer:
+./config.sh -n DICTNAME -s [ -p PREFIX ] [ -v ] [ -d DIR ] [ -C COMMENT ]
+    -s - generate Snowball wrapper
+    -p - prefix of Snowball's function, (default DICTNAME)
+Generate template dictionary:
+./config.sh -n DICTNAME [ -c CFILES ] [ -h HFILES ] [ -i ] [ -v ] [ -d DIR ] [ -C COMMENT ]
+    -c CFILES - source files, must be placed in contrib/tsearch2/gendict directory.
+                These files will be used in Makefile.
+    -h HFILES - header files, must be placed in contrib/tsearch2/gendict directory.
+                These files will be used in Makefile and subinclude.h
+    -i - dictionary has init method
+
+
+Example 1:
+
+   Create Portuguese stemmer
+ 
+   0. cd PGSQL_SRC/contrib/tsearch2/gendict
+
+   1. Obtain stem.{c,h} files for Portuguese
+
+      wget http://snowball.tartarus.org/portuguese/stem.c
+      wget http://snowball.tartarus.org/portuguese/stem.h
+   
+   2. Create template files for Portuguese
+
+      ./config.sh -n pt -s -p portuguese -v -C'Snowball stemmer for Portuguese'
+
+      Note, that argument for -p option should be *the same* as name of stemming
+      function in stem.c (without _stem)
+
+      A bunch of files will be generated and placed in PGSQL_SRC/contrib/dict_pt
+      directory.
+
+   3. Compile and install dictionary
+
+   cd PGSQL_SRC/contrib/dict_pt
+   make
+   make install
+
+   4. Test it 
+
+   Sample portuguese words with the stemmed forms are available
+        from http://snowball.tartarus.org/portuguese/stemmer.html
+
+   createdb testdict
+   psql testdict < /usr/local/pgsql/share/contrib/tsearch2.sql
+   psql testdict < /usr/local/pgsql/share/contrib/dict_pt.sql
+   psql -d testdict -c "select lexize('pt','bobagem');"
+    lexize  
+   ---------
+    {bobag}
+   (1 row)
+
+   Here is what I have in pg_ts_dict table
+
+   psql -d testdict -c "select * from pg_ts_dict where dict_name='pt';"
+    dict_name | dict_init | dict_initoption | dict_lexize |          dict_comment           
+   -----------+-----------+-----------------+-------------+---------------------------------
+    pt        |   7177806 |                 |     7159330 | Snowball stemmer for Portuguese
+   (1 row)
+
+ 
+        Note, that you have already installed dictionary and corresponding
+   entry in tsearch configuration and you may modify it using
+   plain SQL commands, for example, specify stop words.
+
+Example 2:
+
+      a) Simple template dictionary with init method 
+
+       ./config.sh -n wow -v -i -C WOW
+
+      b) Create simple template dict (without init method):
+   ./config.sh -n wow -v  -C WOW
+
+        The same as above, but dictionary will have not init method
+
+       Dictionaries obtained in a) and b) are fully working and ready
+       for use: 
+     a) lowercase input word and remove it if it is a stop word
+     b) recognizes any word
+
+      c) Simple template dictionary with source files (with init method):
+
+       ./config.sh -n wow -v -i -c a.c -h a.h -C WOW
+
+        Source files ( a.c ) must be placed in contrib/tsearch2/gendict directory.
+        These files will be used in Makefile.
+
+        Header files ( a.h ), must be placed in contrib/tsearch2/gendict directory.
+        These files will be used in Makefile and subinclude.h
+
+      d) Simple template dictionary with source files (without init method):
+
+   ./config.sh -n wow -v  -c a.c -h a.h -C WOW
+
+   The same as above, but dictionary will have not init method
+
+       After that you have sources in PGSQL_SRC/contrib/dict_wow and
+       you may edit them to create actual dictionary.
+
+  Please, check Tsearch2 home page (http://www.sai.msu.su/~megera/postgres/gist/tsearch/V2/)
+  for additional information about "Gendict tutorial" and dictionaries.
\ No newline at end of file


diff --git a/contrib/tsearch2/gendict/config.sh b/contrib/tsearch2/gendict/config.sh

new file mode 100755 (executable)

index 0000000..26bb542


--- /dev/null
+++ b/contrib/tsearch2/gendict/config.sh
@@ -0,0 +1,183 @@
+#!/bin/sh
+
+usage () {
+   echo Usage:
+   echo $0 -n DICTNAME  \( [ -s [ -p PREFIX ] ] \| [ -c CFILES ] [ -h HFILES ] [ -i ] \) [ -v ] [ -d DIR ] [ -C COMMENT ]
+   echo '    -v - be verbose'
+   echo '    -d DIR - name of directory in PGSQL_SRL/contrib (default dict_DICTNAME)'
+   echo '    -C COMMENT - dictionary comment' 
+   echo Generate Snowball stemmer:
+   echo $0 -n DICTNAME -s [ -p PREFIX ] [ -v ] [ -d DIR ] [ -C COMMENT ]
+   echo '    -s - generate Snowball wrapper'
+   echo "    -p - prefix of Snowball's function, (default DICTNAME)" 
+   echo Generate template dictionary:
+   echo $0 -n DICTNAME [ -c CFILES ] [ -h HFILES ] [ -i ] [ -v ] [ -d DIR ] [ -C COMMENT ]
+   echo '    -c CFILES - source files, must be placed in contrib/tsearch2/gendict directory.'
+   echo '                These files will be used in Makefile.'
+   echo '    -h HFILES - header files, must be placed in contrib/tsearch2/gendict directory.'
+   echo '                These files will be used in Makefile and subinclude.h'
+   echo '    -i - dictionary has init method'
+   exit 1;
+}
+
+dictname=
+stemmode=no
+verbose=no
+cfile=
+hfile=
+dir= 
+hasinit=no
+comment=
+prefix=
+
+while getopts n:c:C:h:d:p:vis opt
+do
+   case "$opt" in
+       v) verbose=yes;;
+       s) stemmode=yes;;
+       i) hasinit=yes;;
+       n) dictname="$OPTARG";;
+       c) cfile="$OPTARG";;
+       h) hfile="$OPTARG";;
+       d) dir="$OPTARG";;
+       C) comment="$OPTARG";;
+       p) prefix="$OPTARG";;
+       \?) usage;;
+   esac
+done
+
+[ ${#dictname} -eq 0 ] && usage
+
+dictname=`echo $dictname | tr '[:upper:]' '[:lower:]'`
+
+if [ $stemmode = "yes" ] ; then 
+   [ ${#prefix} -eq 0 ] && prefix=$dictname
+   hasinit=yes
+   cfile="stem.c"
+   hfile="stem.h"
+fi 
+
+[ ${#dir}   -eq 0 ] && dir="dict_$dictname"
+
+if [ ${#comment} -eq 0 ]; then
+   comment=null
+else
+   comment="'$comment'"
+fi
+
+ofile=
+for f in $cfile
+do
+   f=` echo $f | sed 's#c$#o#'`
+   ofile="$ofile $f"
+done
+
+if [ $stemmode = "yes" ] ; then
+   ofile="$ofile dict_snowball.o"
+else
+   ofile="$ofile dict_tmpl.o"
+fi
+
+if [ $verbose = "yes" ]; then
+   echo Dictname: "'"$dictname"'"
+   echo Snowball stemmer: $stemmode
+   echo Has init method: $hasinit
+   [ $stemmode = "yes" ] && echo Function prefix: $prefix 
+   echo Source files: $cfile
+   echo Header files: $hfile
+   echo Object files: $ofile
+   echo Comment: $comment
+   echo Directory: ../../$dir
+fi
+
+
+[ $verbose = "yes" ] && echo -n 'Build directory...  '
+if [ ! -d ../../$dir ]; then
+   if ! mkdir ../../$dir ; then 
+       echo "Can't create directory ../../$dir"
+       exit 1
+   fi 
+fi
+[ $verbose = "yes" ] && echo ok
+
+
+[ $verbose = "yes" ] && echo -n 'Build Makefile...  '
+sed s#CFG_DIR#$dir# < Makefile.IN | sed s#CFG_MODNAME#$dictname# | sed "s#CFG_OFILE#$ofile#" > ../../$dir/Makefile.tmp
+if [ $stemmode = "yes" ] ; then
+   sed "s#^PG_CPPFLAGS.*\$#PG_CPPFLAGS = -I../tsearch2/snowball -I../tsearch2#" < ../../$dir/Makefile.tmp >  ../../$dir/Makefile 
+else
+   sed "s#^PG_CPPFLAGS.*\$#PG_CPPFLAGS = -I../tsearch2#" < ../../$dir/Makefile.tmp >  ../../$dir/Makefile 
+fi
+rm ../../$dir/Makefile.tmp
+[ $verbose = "yes" ] && echo ok
+
+
+[ $verbose = "yes" ] && echo -n Build dict_$dictname'.sql.in...  '
+if [ $hasinit = "yes" ]; then
+   sed s#CFG_MODNAME#$dictname# < sql.IN | sed "s#CFG_COMMENT#$comment#" | sed s#^HASINIT## | sed 's#^NOINIT.*$##' > ../../$dir/dict_$dictname.sql.in.tmp
+   if [ $stemmode = "yes" ] ; then
+       sed s#^ISSNOWBALL## < ../../$dir/dict_$dictname.sql.in.tmp | sed s#^NOSNOWBALL.*\$## > ../../$dir/dict_$dictname.sql.in
+   else
+       sed s#^NOSNOWBALL## < ../../$dir/dict_$dictname.sql.in.tmp | sed s#^ISSNOWBALL.*\$## > ../../$dir/dict_$dictname.sql.in
+   fi
+   rm ../../$dir/dict_$dictname.sql.in.tmp 
+else 
+   sed s#CFG_MODNAME#$dictname# < sql.IN | sed "s#CFG_COMMENT#$comment#" | sed s#^NOINIT## | sed 's#^HASINIT.*$##' | sed s#^NOSNOWBALL## | sed s#^ISSNOWBALL.*\$## > ../../$dir/dict_$dictname.sql.in
+fi
+[ $verbose = "yes" ] && echo ok
+
+
+
+if [ ${#cfile} -ne 0 ] || [ ${#hfile} -ne 0 ] ; then
+   [ $verbose = "yes" ] && echo -n 'Copy source and header files...  '
+   if [ ${#cfile} -ne 0 ] ; then
+       if ! cp $cfile ../../$dir ; then 
+           echo "Cant cp all or one of files: $cfile"
+           exit 1
+       fi
+   fi
+   if [ ${#hfile} -ne 0 ] ; then 
+       if ! cp $hfile ../../$dir ; then 
+               echo "Cant cp all or one of files: $hfile"
+           exit 1
+       fi
+   fi
+   [ $verbose = "yes" ] && echo ok
+fi
+
+
+[ $verbose = "yes" ] && echo -n 'Build sub-include header...  '
+echo -n > ../../$dir/subinclude.h 
+for i in $hfile
+do
+   echo "#include \"$i\"" >> ../../$dir/subinclude.h
+done
+[ $verbose = "yes" ] && echo ok
+
+
+if  [ $stemmode = "yes" ] ; then 
+   [ $verbose = "yes" ] && echo -n 'Build Snowball stemmer...  '
+   sed s#CFG_MODNAME#$dictname#g < dict_snowball.c.IN | sed s#CFG_PREFIX#$prefix#g > ../../$dir/dict_snowball.c
+else
+   [ $verbose = "yes" ] && echo -n 'Build dictinonary...  '
+   sed s#CFG_MODNAME#$dictname#g < dict_tmpl.c.IN > ../../$dir/dict_tmpl.c.tmp
+   if [ $hasinit = "yes" ]; then
+       sed s#^HASINIT## <  ../../$dir/dict_tmpl.c.tmp | sed 's#^NOINIT.*$##' > ../../$dir/dict_tmpl.c
+   else 
+       sed s#^HASINIT.*\$## <  ../../$dir/dict_tmpl.c.tmp | sed 's#^NOINIT##' > ../../$dir/dict_tmpl.c
+   fi
+   rm ../../$dir/dict_tmpl.c.tmp
+fi 
+[ $verbose = "yes" ] && echo ok
+
+
+[ $verbose = "yes" ] && echo -n "Build README.$dictname...  "
+if  [ $stemmode = "yes" ] ; then
+   echo "Autogenerated Snowball's wrapper for $prefix" > ../../$dir/README.$dictname
+else
+   echo "Autogenerated template for $dictname" > ../../$dir/README.$dictname
+fi
+[ $verbose = "yes" ] && echo ok
+
+echo All is done
+


diff --git a/contrib/tsearch2/gendict/dict_snowball.c.IN b/contrib/tsearch2/gendict/dict_snowball.c.IN

new file mode 100644 (file)

index 0000000..10ef6f1


--- /dev/null
+++ b/contrib/tsearch2/gendict/dict_snowball.c.IN
@@ -0,0 +1,52 @@
+/* 
+ * example of Snowball dictionary
+ * http://snowball.tartarus.org/ 
+ * Teodor Sigaev 
+ */
+#include 
+#include 
+
+#include "postgres.h"
+
+#include "dict.h"
+#include "common.h"
+#include "snowball/header.h"
+#include "subinclude.h"
+
+typedef struct {
+   struct SN_env *z;
+   StopList    stoplist;
+   int (*stem)(struct SN_env * z);
+} DictSnowball;
+
+
+PG_FUNCTION_INFO_V1(dinit_CFG_MODNAME);
+Datum dinit_CFG_MODNAME(PG_FUNCTION_ARGS);
+
+Datum 
+dinit_CFG_MODNAME(PG_FUNCTION_ARGS) {
+   DictSnowball    *d = (DictSnowball*)malloc( sizeof(DictSnowball) );
+
+   if ( !d )
+       elog(ERROR, "No memory");
+   memset(d,0,sizeof(DictSnowball));
+   d->stoplist.wordop=lowerstr;
+       
+   if ( !PG_ARGISNULL(0) && PG_GETARG_POINTER(0)!=NULL ) {
+       text       *in = PG_GETARG_TEXT_P(0);
+       readstoplist(in, &(d->stoplist));
+       sortstoplist(&(d->stoplist));
+       PG_FREE_IF_COPY(in, 0);
+   }
+
+   d->z = CFG_PREFIX_create_env();
+   if (!d->z) {
+       freestoplist(&(d->stoplist));
+       elog(ERROR,"No memory");
+   }
+   d->stem=CFG_PREFIX_stem;
+
+   PG_RETURN_POINTER(d);
+}
+
+


diff --git a/contrib/tsearch2/gendict/dict_tmpl.c.IN b/contrib/tsearch2/gendict/dict_tmpl.c.IN

new file mode 100644 (file)

index 0000000..10c0381


--- /dev/null
+++ b/contrib/tsearch2/gendict/dict_tmpl.c.IN
@@ -0,0 +1,64 @@
+/* 
+ * example of dictionary 
+ * Teodor Sigaev 
+ */
+#include 
+#include 
+#include 
+
+#include "postgres.h"
+
+#include "dict.h"
+#include "common.h"
+
+#include "subinclude.h"
+
+HASINIT typedef struct {
+HASINIT    StopList    stoplist;
+HASINIT } DictExample;
+
+
+HASINIT PG_FUNCTION_INFO_V1(dinit_CFG_MODNAME);
+HASINIT Datum dinit_CFG_MODNAME(PG_FUNCTION_ARGS);
+
+HASINIT Datum 
+HASINIT dinit_CFG_MODNAME(PG_FUNCTION_ARGS) {
+HASINIT    DictExample *d = (DictExample*)malloc( sizeof(DictExample) );
+HASINIT 
+HASINIT    if ( !d )
+HASINIT        elog(ERROR, "No memory");
+HASINIT    memset(d,0,sizeof(DictExample));
+HASINIT 
+HASINIT    d->stoplist.wordop=lowerstr;
+HASINIT    
+HASINIT    /* Your INIT code */
+HASINIT    
+HASINIT    if ( !PG_ARGISNULL(0) && PG_GETARG_POINTER(0)!=NULL ) {
+HASINIT        text       *in = PG_GETARG_TEXT_P(0);
+HASINIT        readstoplist(in, &(d->stoplist));
+HASINIT        sortstoplist(&(d->stoplist));
+HASINIT        PG_FREE_IF_COPY(in, 0);
+HASINIT    }
+HASINIT 
+HASINIT    PG_RETURN_POINTER(d);
+HASINIT }
+
+PG_FUNCTION_INFO_V1(dlexize_CFG_MODNAME);
+Datum dlexize_CFG_MODNAME(PG_FUNCTION_ARGS);
+Datum
+dlexize_CFG_MODNAME(PG_FUNCTION_ARGS) {
+HASINIT    DictExample *d = (DictExample*)PG_GETARG_POINTER(0);
+   char       *in = (char*)PG_GETARG_POINTER(1);
+   char *txt = pnstrdup(in, PG_GETARG_INT32(2));
+   char    **res=palloc(sizeof(char*)*2);
+
+   /* Your INIT dictionary code */
+HASINIT    if ( *txt=='\0' || searchstoplist(&(d->stoplist),txt) ) {
+HASINIT        pfree(txt);
+HASINIT        res[0]=NULL;
+HASINIT    } else 
+       res[0]=txt;
+   res[1]=NULL;
+
+   PG_RETURN_POINTER(res);
+}


diff --git a/contrib/tsearch2/gendict/sql.IN b/contrib/tsearch2/gendict/sql.IN

new file mode 100644 (file)

index 0000000..ff0d842


--- /dev/null
+++ b/contrib/tsearch2/gendict/sql.IN
@@ -0,0 +1,26 @@
+SET search_path = public;
+BEGIN;
+
+HASINIT create function dinit_CFG_MODNAME(text)
+HASINIT         returns internal
+HASINIT         as 'MODULE_PATHNAME'
+HASINIT         language 'C';
+
+NOSNOWBALL create function dlexize_CFG_MODNAME(internal,internal,int4)
+NOSNOWBALL        returns internal
+NOSNOWBALL        as 'MODULE_PATHNAME'
+NOSNOWBALL        language 'C'
+NOSNOWBALL        with (isstrict);
+
+insert into pg_ts_dict select
+        'CFG_MODNAME',
+HASINIT        (select oid from pg_proc where proname='dinit_CFG_MODNAME'),
+NOINIT        null,
+        null,
+ISSNOWBALL        (select oid from pg_proc where proname='snb_lexize'),
+NOSNOWBALL        (select oid from pg_proc where proname='dlexize_CFG_MODNAME'),
+        CFG_COMMENT
+;
+
+
+END;


diff --git a/contrib/tsearch2/gistidx.c b/contrib/tsearch2/gistidx.c

new file mode 100644 (file)

index 0000000..5a34f74


--- /dev/null
+++ b/contrib/tsearch2/gistidx.c
@@ -0,0 +1,686 @@
+#include "postgres.h"
+
+#include 
+
+#include "access/gist.h"
+#include "access/itup.h"
+#include "access/rtree.h"
+#include "utils/elog.h"
+#include "utils/palloc.h"
+#include "utils/array.h"
+#include "utils/builtins.h"
+#include "storage/bufpage.h"
+#include "access/tuptoaster.h"
+
+#include "tsvector.h"
+#include "query.h"
+#include "gistidx.h"
+#include "crc32.h"
+
+PG_FUNCTION_INFO_V1(gtsvector_in);
+Datum      gtsvector_in(PG_FUNCTION_ARGS);
+
+PG_FUNCTION_INFO_V1(gtsvector_out);
+Datum      gtsvector_out(PG_FUNCTION_ARGS);
+
+PG_FUNCTION_INFO_V1(gtsvector_compress);
+Datum      gtsvector_compress(PG_FUNCTION_ARGS);
+
+PG_FUNCTION_INFO_V1(gtsvector_decompress);
+Datum      gtsvector_decompress(PG_FUNCTION_ARGS);
+
+PG_FUNCTION_INFO_V1(gtsvector_consistent);
+Datum      gtsvector_consistent(PG_FUNCTION_ARGS);
+
+PG_FUNCTION_INFO_V1(gtsvector_union);
+Datum      gtsvector_union(PG_FUNCTION_ARGS);
+
+PG_FUNCTION_INFO_V1(gtsvector_same);
+Datum      gtsvector_same(PG_FUNCTION_ARGS);
+
+PG_FUNCTION_INFO_V1(gtsvector_penalty);
+Datum      gtsvector_penalty(PG_FUNCTION_ARGS);
+
+PG_FUNCTION_INFO_V1(gtsvector_picksplit);
+Datum      gtsvector_picksplit(PG_FUNCTION_ARGS);
+
+#define GETENTRY(vec,pos) ((GISTTYPE *) DatumGetPointer(((GISTENTRY *) VARDATA(vec))[(pos)].key))
+#define SUMBIT(val) (       \
+   GETBITBYTE(val,0) + \
+   GETBITBYTE(val,1) + \
+   GETBITBYTE(val,2) + \
+   GETBITBYTE(val,3) + \
+   GETBITBYTE(val,4) + \
+   GETBITBYTE(val,5) + \
+   GETBITBYTE(val,6) + \
+   GETBITBYTE(val,7)   \
+)
+
+
+Datum
+gtsvector_in(PG_FUNCTION_ARGS)
+{
+   elog(ERROR, "Not implemented");
+   PG_RETURN_DATUM(0);
+}
+
+Datum
+gtsvector_out(PG_FUNCTION_ARGS)
+{
+   elog(ERROR, "Not implemented");
+   PG_RETURN_DATUM(0);
+}
+
+static int
+compareint(const void *a, const void *b)
+{
+   if (*((int4 *) a) == *((int4 *) b))
+       return 0;
+   return (*((int4 *) a) > *((int4 *) b)) ? 1 : -1;
+}
+
+static int
+uniqueint(int4 *a, int4 l)
+{
+   int4       *ptr,
+              *res;
+
+   if (l == 1)
+       return l;
+
+   ptr = res = a;
+
+   qsort((void *) a, l, sizeof(int4), compareint);
+
+   while (ptr - a < l)
+       if (*ptr != *res)
+           *(++res) = *ptr++;
+       else
+           ptr++;
+   return res + 1 - a;
+}
+
+static void
+makesign(BITVECP sign, GISTTYPE * a)
+{
+   int4        k,
+               len = ARRNELEM(a);
+   int4       *ptr = GETARR(a);
+
+   MemSet((void *) sign, 0, sizeof(BITVEC));
+   for (k = 0; k < len; k++)
+       HASH(sign, ptr[k]);
+}
+
+Datum
+gtsvector_compress(PG_FUNCTION_ARGS)
+{
+   GISTENTRY  *entry = (GISTENTRY *) PG_GETARG_POINTER(0);
+   GISTENTRY  *retval = entry;
+
+   if (entry->leafkey)
+   {                           /* tsvector */
+       GISTTYPE   *res;
+       tsvector       *toastedval = (tsvector *) DatumGetPointer(entry->key);
+       tsvector       *val = (tsvector *) DatumGetPointer(PG_DETOAST_DATUM(entry->key));
+       int4        len;
+       int4       *arr;
+       WordEntry  *ptr = ARRPTR(val);
+       char       *words = STRPTR(val);
+
+       len = CALCGTSIZE(ARRKEY, val->size);
+       res = (GISTTYPE *) palloc(len);
+       res->len = len;
+       res->flag = ARRKEY;
+       arr = GETARR(res);
+       len = val->size;
+       while (len--)
+       {
+           *arr = crc32_sz((uint8 *) &words[ptr->pos], ptr->len);
+           arr++;
+           ptr++;
+       }
+
+       len = uniqueint(GETARR(res), val->size);
+       if (len != val->size)
+       {
+           /*
+            * there is a collision of hash-function; len is always less
+            * than val->size
+            */
+           len = CALCGTSIZE(ARRKEY, len);
+           res = (GISTTYPE *) repalloc((void *) res, len);
+           res->len = len;
+       }
+       if (val != toastedval)
+           pfree(val);
+
+       /* make signature, if array is too long */
+       if (res->len > TOAST_INDEX_TARGET)
+       {
+           GISTTYPE   *ressign;
+
+           len = CALCGTSIZE(SIGNKEY, 0);
+           ressign = (GISTTYPE *) palloc(len);
+           ressign->len = len;
+           ressign->flag = SIGNKEY;
+           makesign(GETSIGN(ressign), res);
+           pfree(res);
+           res = ressign;
+       }
+
+       retval = (GISTENTRY *) palloc(sizeof(GISTENTRY));
+       gistentryinit(*retval, PointerGetDatum(res),
+                     entry->rel, entry->page,
+                     entry->offset, res->len, FALSE);
+   }
+   else if (ISSIGNKEY(DatumGetPointer(entry->key)) &&
+            !ISALLTRUE(DatumGetPointer(entry->key)))
+   {
+       int4        i,
+                   len;
+       GISTTYPE   *res;
+       BITVECP     sign = GETSIGN(DatumGetPointer(entry->key));
+
+       LOOPBYTE(
+                if ((sign[i] & 0xff) != 0xff)
+                PG_RETURN_POINTER(retval);
+       );
+
+       len = CALCGTSIZE(SIGNKEY | ALLISTRUE, 0);
+       res = (GISTTYPE *) palloc(len);
+       res->len = len;
+       res->flag = SIGNKEY | ALLISTRUE;
+
+       retval = (GISTENTRY *) palloc(sizeof(GISTENTRY));
+       gistentryinit(*retval, PointerGetDatum(res),
+                     entry->rel, entry->page,
+                     entry->offset, res->len, FALSE);
+   }
+   PG_RETURN_POINTER(retval);
+}
+
+Datum
+gtsvector_decompress(PG_FUNCTION_ARGS)
+{
+   GISTENTRY  *entry = (GISTENTRY *) PG_GETARG_POINTER(0);
+   GISTTYPE   *key = (GISTTYPE *) DatumGetPointer(PG_DETOAST_DATUM(entry->key));
+
+   if (key != (GISTTYPE *) DatumGetPointer(entry->key))
+   {
+       GISTENTRY  *retval = (GISTENTRY *) palloc(sizeof(GISTENTRY));
+
+       gistentryinit(*retval, PointerGetDatum(key),
+                     entry->rel, entry->page,
+                     entry->offset, key->len, FALSE);
+
+       PG_RETURN_POINTER(retval);
+   }
+
+   PG_RETURN_POINTER(entry);
+}
+
+typedef struct
+{
+   int4       *arrb;
+   int4       *arre;
+}  CHKVAL;
+
+/*
+ * is there value 'val' in array or not ?
+ */
+static bool
+checkcondition_arr(void *checkval, ITEM * val)
+{
+   int4       *StopLow = ((CHKVAL *) checkval)->arrb;
+   int4       *StopHigh = ((CHKVAL *) checkval)->arre;
+   int4       *StopMiddle;
+
+   /* Loop invariant: StopLow <= val < StopHigh */
+
+   while (StopLow < StopHigh)
+   {
+       StopMiddle = StopLow + (StopHigh - StopLow) / 2;
+       if (*StopMiddle == val->val)
+           return (true);
+       else if (*StopMiddle < val->val)
+           StopLow = StopMiddle + 1;
+       else
+           StopHigh = StopMiddle;
+   }
+
+   return (false);
+}
+
+static bool
+checkcondition_bit(void *checkval, ITEM * val)
+{
+   return GETBIT(checkval, HASHVAL(val->val));
+}
+
+Datum
+gtsvector_consistent(PG_FUNCTION_ARGS)
+{
+   QUERYTYPE  *query = (QUERYTYPE *) PG_GETARG_POINTER(1);
+   GISTTYPE   *key = (GISTTYPE *) DatumGetPointer(
+                               ((GISTENTRY *) PG_GETARG_POINTER(0))->key
+   );
+
+   if (!query->size)
+       PG_RETURN_BOOL(false);
+
+   if (ISSIGNKEY(key))
+   {
+       if (ISALLTRUE(key))
+           PG_RETURN_BOOL(true);
+
+       PG_RETURN_BOOL(TS_execute(
+                              GETQUERY(query),
+                              (void *) GETSIGN(key), false,
+                              checkcondition_bit
+                              ));
+   }
+   else
+   {                           /* only leaf pages */
+       CHKVAL      chkval;
+
+       chkval.arrb = GETARR(key);
+       chkval.arre = chkval.arrb + ARRNELEM(key);
+       PG_RETURN_BOOL(TS_execute(
+                              GETQUERY(query),
+                              (void *) &chkval, true,
+                              checkcondition_arr
+                              ));
+   }
+}
+
+static int4
+unionkey(BITVECP sbase, GISTTYPE * add)
+{
+   int4        i;
+
+   if (ISSIGNKEY(add))
+   {
+       BITVECP     sadd = GETSIGN(add);
+
+       if (ISALLTRUE(add))
+           return 1;
+
+       LOOPBYTE(
+                sbase[i] |= sadd[i];
+       );
+   }
+   else
+   {
+       int4       *ptr = GETARR(add);
+
+       for (i = 0; i < ARRNELEM(add); i++)
+           HASH(sbase, ptr[i]);
+   }
+   return 0;
+}
+
+
+Datum
+gtsvector_union(PG_FUNCTION_ARGS)
+{
+   bytea      *entryvec = (bytea *) PG_GETARG_POINTER(0);
+   int        *size = (int *) PG_GETARG_POINTER(1);
+   BITVEC      base;
+   int4        len = (VARSIZE(entryvec) - VARHDRSZ) / sizeof(GISTENTRY);
+   int4        i;
+   int4        flag = 0;
+   GISTTYPE   *result;
+
+   MemSet((void *) base, 0, sizeof(BITVEC));
+   for (i = 0; i < len; i++)
+   {
+       if (unionkey(base, GETENTRY(entryvec, i)))
+       {
+           flag = ALLISTRUE;
+           break;
+       }
+   }
+
+   flag |= SIGNKEY;
+   len = CALCGTSIZE(flag, 0);
+   result = (GISTTYPE *) palloc(len);
+   *size = result->len = len;
+   result->flag = flag;
+   if (!ISALLTRUE(result))
+       memcpy((void *) GETSIGN(result), (void *) base, sizeof(BITVEC));
+
+   PG_RETURN_POINTER(result);
+}
+
+Datum
+gtsvector_same(PG_FUNCTION_ARGS)
+{
+   GISTTYPE   *a = (GISTTYPE *) PG_GETARG_POINTER(0);
+   GISTTYPE   *b = (GISTTYPE *) PG_GETARG_POINTER(1);
+   bool       *result = (bool *) PG_GETARG_POINTER(2);
+
+   if (ISSIGNKEY(a))
+   {                           /* then b also ISSIGNKEY */
+       if (ISALLTRUE(a) && ISALLTRUE(b))
+           *result = true;
+       else if (ISALLTRUE(a))
+           *result = false;
+       else if (ISALLTRUE(b))
+           *result = false;
+       else
+       {
+           int4        i;
+           BITVECP     sa = GETSIGN(a),
+                       sb = GETSIGN(b);
+
+           *result = true;
+           LOOPBYTE(
+                    if (sa[i] != sb[i])
+                    {
+               *result = false;
+               break;
+           }
+           );
+       }
+   }
+   else
+   {                           /* a and b ISARRKEY */
+       int4        lena = ARRNELEM(a),
+                   lenb = ARRNELEM(b);
+
+       if (lena != lenb)
+           *result = false;
+       else
+       {
+           int4       *ptra = GETARR(a),
+                      *ptrb = GETARR(b);
+           int4        i;
+
+           *result = true;
+           for (i = 0; i < lena; i++)
+               if (ptra[i] != ptrb[i])
+               {
+                   *result = false;
+                   break;
+               }
+       }
+   }
+
+   PG_RETURN_POINTER(result);
+}
+
+static int4
+sizebitvec(BITVECP sign)
+{
+   int4        size = 0,
+               i;
+
+   LOOPBYTE(
+       size += SUMBIT(*(char *) sign);
+       sign = (BITVECP) (((char *) sign) + 1);
+   );
+   return size;
+}
+
+static int
+hemdistsign(BITVECP  a, BITVECP b) {
+   int i,dist=0;
+
+   LOOPBIT(
+       if ( GETBIT(a,i) != GETBIT(b,i) )
+           dist++;
+   );
+   return dist;
+}
+
+static int
+hemdist(GISTTYPE   *a, GISTTYPE   *b) {
+   if ( ISALLTRUE(a) ) {
+       if (ISALLTRUE(b))
+           return 0;
+       else
+           return SIGLENBIT-sizebitvec(GETSIGN(b));
+   } else if (ISALLTRUE(b))
+       return SIGLENBIT-sizebitvec(GETSIGN(a));
+
+   return hemdistsign( GETSIGN(a), GETSIGN(b) );
+}
+
+Datum
+gtsvector_penalty(PG_FUNCTION_ARGS)
+{
+   GISTENTRY  *origentry = (GISTENTRY *) PG_GETARG_POINTER(0); /* always ISSIGNKEY */
+   GISTENTRY  *newentry = (GISTENTRY *) PG_GETARG_POINTER(1);
+   float      *penalty = (float *) PG_GETARG_POINTER(2);
+   GISTTYPE   *origval = (GISTTYPE *) DatumGetPointer(origentry->key);
+   GISTTYPE   *newval = (GISTTYPE *) DatumGetPointer(newentry->key);
+   BITVECP     orig = GETSIGN(origval);
+
+   *penalty = 0.0;
+
+   if (ISARRKEY(newval)) {
+       BITVEC sign;
+       makesign(sign, newval);
+
+       if ( ISALLTRUE(origval) ) 
+           *penalty=((float)(SIGLENBIT-sizebitvec(sign)))/(float)(SIGLENBIT+1);
+       else 
+           *penalty=hemdistsign(sign,orig);
+   } else {
+       *penalty=hemdist(origval,newval);
+   }
+   PG_RETURN_POINTER(penalty);
+}
+
+typedef struct
+{
+   bool        allistrue;
+   BITVEC      sign;
+}  CACHESIGN;
+
+static void
+fillcache(CACHESIGN * item, GISTTYPE * key)
+{
+   item->allistrue = false;
+   if (ISARRKEY(key))
+       makesign(item->sign, key);
+   else if (ISALLTRUE(key))
+       item->allistrue = true;
+   else
+       memcpy((void *) item->sign, (void *) GETSIGN(key), sizeof(BITVEC));
+}
+
+#define WISH_F(a,b,c) (double)( -(double)(((a)-(b))*((a)-(b))*((a)-(b)))*(c) )
+typedef struct
+{
+   OffsetNumber pos;
+   int4        cost;
+} SPLITCOST;
+
+static int
+comparecost(const void *a, const void *b)
+{
+   if (((SPLITCOST *) a)->cost == ((SPLITCOST *) b)->cost)
+       return 0;
+   else
+       return (((SPLITCOST *) a)->cost > ((SPLITCOST *) b)->cost) ? 1 : -1;
+}
+
+
+static int
+hemdistcache(CACHESIGN   *a, CACHESIGN   *b) {
+   if ( a->allistrue ) {
+       if (b->allistrue)
+           return 0;
+       else
+           return SIGLENBIT-sizebitvec(b->sign);
+   } else if (b->allistrue)
+       return SIGLENBIT-sizebitvec(a->sign);
+
+   return hemdistsign( a->sign, b->sign );
+}
+
+Datum
+gtsvector_picksplit(PG_FUNCTION_ARGS)
+{
+   bytea      *entryvec = (bytea *) PG_GETARG_POINTER(0);
+   GIST_SPLITVEC *v = (GIST_SPLITVEC *) PG_GETARG_POINTER(1);
+   OffsetNumber k,
+               j;
+   GISTTYPE   *datum_l,
+              *datum_r;
+   BITVECP     union_l,
+               union_r;
+   int4        size_alpha,
+               size_beta;
+   int4        size_waste,
+               waste = -1;
+   int4        nbytes;
+   OffsetNumber seed_1 = 0,
+               seed_2 = 0;
+   OffsetNumber *left,
+              *right;
+   OffsetNumber maxoff;
+   BITVECP     ptr;
+   int         i;
+   CACHESIGN  *cache;
+   SPLITCOST  *costvector;
+
+   maxoff = ((VARSIZE(entryvec) - VARHDRSZ) / sizeof(GISTENTRY)) - 2;
+   nbytes = (maxoff + 2) * sizeof(OffsetNumber);
+   v->spl_left = (OffsetNumber *) palloc(nbytes);
+   v->spl_right = (OffsetNumber *) palloc(nbytes);
+
+   cache = (CACHESIGN *) palloc(sizeof(CACHESIGN) * (maxoff + 2));
+   fillcache(&cache[FirstOffsetNumber], GETENTRY(entryvec, FirstOffsetNumber));
+
+   for (k = FirstOffsetNumber; k < maxoff; k = OffsetNumberNext(k)) {
+       for (j = OffsetNumberNext(k); j <= maxoff; j = OffsetNumberNext(j)) {
+           if (k == FirstOffsetNumber)
+               fillcache(&cache[j], GETENTRY(entryvec, j));
+
+           size_waste=hemdistcache(&(cache[j]),&(cache[k]));
+           if (size_waste > waste) {
+               waste = size_waste;
+               seed_1 = k;
+               seed_2 = j;
+           }
+       }
+   }
+
+   left = v->spl_left;
+   v->spl_nleft = 0;
+   right = v->spl_right;
+   v->spl_nright = 0;
+
+   if (seed_1 == 0 || seed_2 == 0) {
+       seed_1 = 1;
+       seed_2 = 2;
+   }
+
+   /* form initial .. */
+   if (cache[seed_1].allistrue) {
+       datum_l = (GISTTYPE *) palloc(CALCGTSIZE(SIGNKEY | ALLISTRUE, 0));
+       datum_l->len = CALCGTSIZE(SIGNKEY | ALLISTRUE, 0);
+       datum_l->flag = SIGNKEY | ALLISTRUE;
+   } else {
+       datum_l = (GISTTYPE *) palloc(CALCGTSIZE(SIGNKEY, 0));
+       datum_l->len = CALCGTSIZE(SIGNKEY, 0);
+       datum_l->flag = SIGNKEY;
+       memcpy((void *) GETSIGN(datum_l), (void *) cache[seed_1].sign, sizeof(BITVEC));
+   }
+   if (cache[seed_2].allistrue) {
+       datum_r = (GISTTYPE *) palloc(CALCGTSIZE(SIGNKEY | ALLISTRUE, 0));
+       datum_r->len = CALCGTSIZE(SIGNKEY | ALLISTRUE, 0);
+       datum_r->flag = SIGNKEY | ALLISTRUE;
+   } else {
+       datum_r = (GISTTYPE *) palloc(CALCGTSIZE(SIGNKEY, 0));
+       datum_r->len = CALCGTSIZE(SIGNKEY, 0);
+       datum_r->flag = SIGNKEY;
+       memcpy((void *) GETSIGN(datum_r), (void *) cache[seed_2].sign, sizeof(BITVEC));
+   }
+
+   union_l=GETSIGN(datum_l);
+   union_r=GETSIGN(datum_r);
+   maxoff = OffsetNumberNext(maxoff);
+   fillcache(&cache[maxoff], GETENTRY(entryvec, maxoff));
+   /* sort before ... */
+   costvector = (SPLITCOST *) palloc(sizeof(SPLITCOST) * maxoff);
+   for (j = FirstOffsetNumber; j <= maxoff; j = OffsetNumberNext(j)) {
+       costvector[j - 1].pos = j;
+       size_alpha = hemdistcache(&(cache[seed_1]), &(cache[j]));
+       size_beta  = hemdistcache(&(cache[seed_2]), &(cache[j]));
+       costvector[j - 1].cost = abs(size_alpha - size_beta);
+   }
+   qsort((void *) costvector, maxoff, sizeof(SPLITCOST), comparecost);
+
+   for (k = 0; k < maxoff; k++) {
+       j = costvector[k].pos;
+       if (j == seed_1) {
+           *left++ = j;
+           v->spl_nleft++;
+           continue;
+       } else if (j == seed_2) {
+           *right++ = j;
+           v->spl_nright++;
+           continue;
+       }
+
+       if (ISALLTRUE(datum_l) || cache[j].allistrue) {
+           if ( ISALLTRUE(datum_l) && cache[j].allistrue )
+               size_alpha=0;
+           else
+               size_alpha = SIGLENBIT-sizebitvec(  
+                   ( cache[j].allistrue ) ? GETSIGN(datum_l) : GETSIGN(cache[j].sign)  
+               );
+       } else {
+           size_alpha=hemdistsign(cache[j].sign,GETSIGN(datum_l));
+       }
+
+       if (ISALLTRUE(datum_r) || cache[j].allistrue) {
+           if ( ISALLTRUE(datum_r) && cache[j].allistrue )
+               size_beta=0;
+           else
+               size_beta = SIGLENBIT-sizebitvec(  
+                   ( cache[j].allistrue ) ? GETSIGN(datum_r) : GETSIGN(cache[j].sign)  
+               );
+       } else {
+           size_beta=hemdistsign(cache[j].sign,GETSIGN(datum_r));
+       }
+
+       if (size_alpha  < size_beta + WISH_F(v->spl_nleft, v->spl_nright, 0.1)) {
+           if (ISALLTRUE(datum_l) || cache[j].allistrue) {
+               if (! ISALLTRUE(datum_l) )
+                   MemSet((void *) GETSIGN(datum_l), 0xff, sizeof(BITVEC));
+           } else {
+               ptr=cache[j].sign;
+               LOOPBYTE(
+                   union_l[i] |= ptr[i];
+               );
+           }
+           *left++ = j;
+           v->spl_nleft++;
+       } else {
+           if (ISALLTRUE(datum_r) || cache[j].allistrue) {
+               if (! ISALLTRUE(datum_r) )
+                   MemSet((void *) GETSIGN(datum_r), 0xff, sizeof(BITVEC));
+           } else {
+               ptr=cache[j].sign;
+               LOOPBYTE(
+                   union_r[i] |= ptr[i];
+               );
+           }
+           *right++ = j;
+           v->spl_nright++;
+       }
+   }
+
+   *right = *left = FirstOffsetNumber;
+   pfree(costvector);
+   pfree(cache);
+   v->spl_ldatum = PointerGetDatum(datum_l);
+   v->spl_rdatum = PointerGetDatum(datum_r);
+
+   PG_RETURN_POINTER(v);
+}


diff --git a/contrib/tsearch2/gistidx.h b/contrib/tsearch2/gistidx.h

new file mode 100644 (file)

index 0000000..d081c74


--- /dev/null
+++ b/contrib/tsearch2/gistidx.h
@@ -0,0 +1,67 @@
+#ifndef __GISTIDX_H__
+#define __GISTIDX_H__
+
+/*
+#define GISTIDX_DEBUG
+*/
+
+/*
+ * signature defines
+ */
+
+#define BITBYTE 8
+#define SIGLENINT  63          /* >121 => key will toast, so it will not
+                                * work !!! */
+#define SIGLEN ( sizeof(int4)*SIGLENINT )
+#define SIGLENBIT (SIGLEN*BITBYTE)
+
+typedef char BITVEC[SIGLEN];
+typedef char *BITVECP;
+
+#define LOOPBYTE(a) \
+       for(i=0;i
+               a;\
+       }
+#define LOOPBIT(a) \
+               for(i=0;i
+                               a;\
+               }
+
+#define GETBYTE(x,i) ( *( (BITVECP)(x) + (int)( (i) / BITBYTE ) ) )
+#define GETBITBYTE(x,i) ( ((char)(x)) >> i & 0x01 )
+#define CLRBIT(x,i)   GETBYTE(x,i) &= ~( 0x01 << ( (i) % BITBYTE ) )
+#define SETBIT(x,i)   GETBYTE(x,i) |=  ( 0x01 << ( (i) % BITBYTE ) )
+#define GETBIT(x,i) ( (GETBYTE(x,i) >> ( (i) % BITBYTE )) & 0x01 )
+
+#define abs(a)         ((a) <  (0) ? -(a) : (a))
+#define min(a,b)           ((a) <  (b) ? (a) : (b))
+#define HASHVAL(val) (((unsigned int)(val)) % SIGLENBIT)
+#define HASH(sign, val) SETBIT((sign), HASHVAL(val))
+
+
+/*
+ * type of index key
+ */
+typedef struct
+{
+   int4        len;
+   int4        flag;
+   char        data[1];
+}  GISTTYPE;
+
+#define ARRKEY     0x01
+#define SIGNKEY        0x02
+#define ALLISTRUE  0x04
+
+#define ISARRKEY(x) ( ((GISTTYPE*)x)->flag & ARRKEY )
+#define ISSIGNKEY(x)   ( ((GISTTYPE*)x)->flag & SIGNKEY )
+#define ISALLTRUE(x)   ( ((GISTTYPE*)x)->flag & ALLISTRUE )
+
+#define GTHDRSIZE  ( sizeof(int4)*2  )
+#define CALCGTSIZE(flag, len) ( GTHDRSIZE + ( ( (flag) & ARRKEY ) ? ((len)*sizeof(int4)) : (((flag) & ALLISTRUE) ? 0 : SIGLEN) ) )
+
+#define GETSIGN(x) ( (BITVECP)( (char*)x+GTHDRSIZE ) )
+#define GETARR(x)  ( (int4*)( (char*)x+GTHDRSIZE ) )
+#define ARRNELEM(x) ( ( ((GISTTYPE*)x)->len - GTHDRSIZE )/sizeof(int4) )
+
+#endif


diff --git a/contrib/tsearch2/ispell/spell.c b/contrib/tsearch2/ispell/spell.c

new file mode 100644 (file)

index 0000000..3cf2cc8


--- /dev/null
+++ b/contrib/tsearch2/ispell/spell.c
@@ -0,0 +1,520 @@
+#include 
+#include 
+#include 
+#include 
+
+#include "postgres.h"
+
+#include "spell.h"
+
+#define MAXNORMLEN 56
+
+#define STRNCASECMP(x,y)        (strncasecmp(x,y,strlen(y)))
+
+static int cmpspell(const void *s1,const void *s2){
+   return(strcmp(((const SPELL*)s1)->word,((const SPELL*)s2)->word));
+}
+
+static void 
+strlower( char * str ) {
+   unsigned char *ptr = (unsigned char *)str;
+   while ( *ptr ) {
+       *ptr = tolower( *ptr );
+       ptr++;
+   }
+}
+
+/* backward string compaire for suffix tree operations */
+static int 
+strbcmp(const char *s1, const char *s2) { 
+   int l1 = strlen(s1)-1, l2 = strlen(s2)-1;
+   while (l1 >= 0 && l2 >= 0) {
+       if (s1[l1] < s2[l2]) return -1;
+       if (s1[l1] > s2[l2]) return 1;
+       l1--; l2--;
+   }
+   if (l1 < l2) return -1;
+   if (l1 > l2) return 1;
+
+   return 0;
+}
+static int 
+strbncmp(const char *s1, const char *s2, size_t count) { 
+   int l1 = strlen(s1) - 1, l2 = strlen(s2) - 1, l = count;
+   while (l1 >= 0 && l2 >= 0 && l > 0) {
+       if (s1[l1] < s2[l2]) return -1;
+       if (s1[l1] > s2[l2]) return 1;
+       l1--;
+       l2--;
+       l--;
+   }
+   if (l == 0) return 0;
+   if (l1 < l2) return -1;
+   if (l1 > l2) return 1;
+   return 0;
+}
+
+static int 
+cmpaffix(const void *s1,const void *s2){
+   if (((const AFFIX*)s1)->type < ((const AFFIX*)s2)->type) return -1;
+   if (((const AFFIX*)s1)->type > ((const AFFIX*)s2)->type) return 1;
+   if (((const AFFIX*)s1)->type == 'p')
+       return(strcmp(((const AFFIX*)s1)->repl,((const AFFIX*)s2)->repl));
+   else 
+       return(strbcmp(((const AFFIX*)s1)->repl,((const AFFIX*)s2)->repl));
+}
+
+int 
+AddSpell(IspellDict * Conf,const char * word,const char *flag){
+   if(Conf->nspell>=Conf->mspell){
+       if(Conf->mspell){
+           Conf->mspell+=1024*20;
+           Conf->Spell=(SPELL *)realloc(Conf->Spell,Conf->mspell*sizeof(SPELL));
+       }else{
+           Conf->mspell=1024*20;
+           Conf->Spell=(SPELL *)malloc(Conf->mspell*sizeof(SPELL));
+       }
+       if ( Conf->Spell == NULL )
+           elog(ERROR,"No memory for AddSpell"); 
+   }
+   Conf->Spell[Conf->nspell].word=strdup(word);
+   if ( !Conf->Spell[Conf->nspell].word ) 
+       elog(ERROR,"No memory for AddSpell");
+   strncpy(Conf->Spell[Conf->nspell].flag,flag,10);
+   Conf->nspell++;
+   return(0);
+}
+
+
+int 
+ImportDictionary(IspellDict * Conf,const char *filename){
+   unsigned char str[BUFSIZ];  
+   FILE *dict;
+
+   if(!(dict=fopen(filename,"r")))return(1);
+   while(fgets(str,sizeof(str),dict)){
+       unsigned char *s;
+       const unsigned char *flag;
+
+           flag = NULL;
+       if((s=strchr(str,'/'))){
+           *s=0;
+           s++;flag=s;
+           while(*s){
+               if (((*s>='A')&&(*s<='Z'))||((*s>='a')&&(*s<='z')))
+                   s++;
+               else {
+                   *s=0;
+                   break;
+               }
+           }
+       }else{
+           flag="";
+       }
+       strlower(str);
+       /* Dont load words if first letter is not required */
+       /* It allows to optimize loading at  search time   */
+       s=str;
+       while(*s){
+           if(*s=='\r')*s=0;
+           if(*s=='\n')*s=0;
+           s++;
+       }
+       AddSpell(Conf,str,flag);
+   }
+   fclose(dict);
+   return(0);
+}
+
+
+static SPELL * 
+FindWord(IspellDict * Conf, const char *word, int affixflag) {
+   int l,c,r,resc,resl,resr, i;
+
+   i = (int)(*word) & 255;
+   l = Conf->SpellTree.Left[i];
+   r = Conf->SpellTree.Right[i];
+   if (l == -1) return (NULL);
+   while(l<=r){
+       c = (l + r) >> 1;
+       resc = strcmp(Conf->Spell[c].word, word);
+       if( (resc == 0) && 
+           ((affixflag == 0) || (strchr(Conf->Spell[c].flag, affixflag) != NULL)) ) {
+           return(&Conf->Spell[c]);
+       }
+       resl = strcmp(Conf->Spell[l].word, word);
+       if( (resl == 0) && 
+           ((affixflag == 0) || (strchr(Conf->Spell[l].flag, affixflag) != NULL)) ) {
+           return(&Conf->Spell[l]);
+       }
+       resr = strcmp(Conf->Spell[r].word, word);
+       if( (resr == 0) && 
+           ((affixflag == 0) || (strchr(Conf->Spell[r].flag, affixflag) != NULL)) ) {
+           return(&Conf->Spell[r]);
+       }
+       if(resc < 0){
+           l = c + 1;
+           r--;
+       } else if(resc > 0){
+           r = c - 1;
+           l++;
+       } else {
+           l++;
+           r--;
+       }
+   }
+   return(NULL);
+}
+
+int 
+AddAffix(IspellDict * Conf,int flag,const char *mask,const char *find,const char *repl,int type) {
+   if(Conf->naffixes>=Conf->maffixes){
+       if(Conf->maffixes){
+           Conf->maffixes+=16;
+           Conf->Affix = (AFFIX*)realloc((void*)Conf->Affix,Conf->maffixes*sizeof(AFFIX));
+       }else{
+           Conf->maffixes=16;
+           Conf->Affix = (AFFIX*)malloc(Conf->maffixes * sizeof(AFFIX));
+       }
+       if ( Conf->Affix == NULL ) 
+           elog(ERROR,"No memory for AddAffix");
+   }
+   if (type=='s') {
+       sprintf(Conf->Affix[Conf->naffixes].mask,"%s$",mask);
+   } else {
+       sprintf(Conf->Affix[Conf->naffixes].mask,"^%s",mask);
+   }
+   Conf->Affix[Conf->naffixes].compile = 1;
+   Conf->Affix[Conf->naffixes].flag=flag;
+   Conf->Affix[Conf->naffixes].type=type;
+   
+   strcpy(Conf->Affix[Conf->naffixes].find,find);
+   strcpy(Conf->Affix[Conf->naffixes].repl,repl);
+   Conf->Affix[Conf->naffixes].replen=strlen(repl);
+   Conf->naffixes++;
+   return(0);
+}
+
+static char * 
+remove_spaces(char *dist,char *src){
+char *d,*s;
+   d=dist;
+   s=src;
+   while(*s){
+       if(*s!=' '&&*s!='-'&&*s!='\t'){
+           *d=*s;
+           d++;
+       }
+       s++;
+   }
+   *d=0;
+   return(dist);
+}
+
+
+int 
+ImportAffixes(IspellDict * Conf,const char *filename){
+   unsigned char str[BUFSIZ];
+   unsigned char flag=0;
+   unsigned char mask[BUFSIZ]="";
+   unsigned char find[BUFSIZ]="";
+   unsigned char repl[BUFSIZ]="";
+   unsigned char *s;
+   int i;
+   int suffixes=0;
+   int prefixes=0;
+   FILE *affix;
+
+   if(!(affix=fopen(filename,"r")))
+       return(1);
+
+   while(fgets(str,sizeof(str),affix)){
+       if(!STRNCASECMP(str,"suffixes")){
+           suffixes=1;
+           prefixes=0;
+           continue;
+       }
+       if(!STRNCASECMP(str,"prefixes")){
+           suffixes=0;
+           prefixes=1;
+           continue;
+       }
+       if(!STRNCASECMP(str,"flag ")){
+           s=str+5;
+           while(strchr("* ",*s))
+               s++;
+           flag=*s;
+           continue;
+       }
+       if((!suffixes)&&(!prefixes))continue;
+       if((s=strchr(str,'#')))*s=0;
+       if(!*str)continue;
+       strlower(str);
+       strcpy(mask,"");
+       strcpy(find,"");
+       strcpy(repl,"");
+       i=sscanf(str,"%[^>\n]>%[^,\n],%[^\n]",mask,find,repl);
+       remove_spaces(str,repl);strcpy(repl,str);
+       remove_spaces(str,find);strcpy(find,str);
+       remove_spaces(str,mask);strcpy(mask,str);
+       switch(i){
+           case 3:
+               break;
+           case 2:
+               if(*find != '\0'){
+                   strcpy(repl,find);
+                   strcpy(find,"");
+               }
+               break;
+           default:
+               continue;
+       }
+       
+       AddAffix(Conf,(int)flag,mask,find,repl,suffixes?'s':'p');
+       
+   }
+   fclose(affix);
+       
+   return(0);
+}
+
+void 
+SortDictionary(IspellDict * Conf){
+  int  CurLet = -1, Let;size_t i;
+
+        qsort((void*)Conf->Spell,Conf->nspell,sizeof(SPELL),cmpspell);
+
+   for(i = 0; i < 256 ; i++ )
+       Conf->SpellTree.Left[i] = -1;
+
+   for(i = 0; i < Conf->nspell; i++) {
+     Let = (int)(*(Conf->Spell[i].word)) & 255;
+     if (CurLet != Let) {
+       Conf->SpellTree.Left[Let] = i;
+       CurLet = Let;
+     }
+     Conf->SpellTree.Right[Let] = i;
+   }
+}
+
+void 
+SortAffixes(IspellDict * Conf) {
+  int   CurLetP = -1, CurLetS = -1, Let;
+  AFFIX *Affix; size_t i;
+  
+  if (Conf->naffixes > 1)
+    qsort((void*)Conf->Affix,Conf->naffixes,sizeof(AFFIX),cmpaffix);
+  for(i = 0; i < 256; i++) {
+      Conf->PrefixTree.Left[i] = Conf->PrefixTree.Right[i] = -1;
+      Conf->SuffixTree.Left[i] = Conf->SuffixTree.Right[i] = -1;
+  }
+
+  for(i = 0; i < Conf->naffixes; i++) {
+    Affix = &(((AFFIX*)Conf->Affix)[i]);
+    if(Affix->type == 'p') {
+      Let = (int)(*(Affix->repl)) & 255;
+      if (CurLetP != Let) {
+   Conf->PrefixTree.Left[Let] = i;
+   CurLetP = Let;
+      }
+      Conf->PrefixTree.Right[Let] = i;
+    } else {
+      Let = (Affix->replen) ? (int)(Affix->repl[Affix->replen-1]) & 255 : 0;
+      if (CurLetS != Let) {
+   Conf->SuffixTree.Left[Let] = i;
+   CurLetS = Let;
+      }
+      Conf->SuffixTree.Right[Let] = i;
+    }
+  }
+}
+
+static char * 
+CheckSuffix(const char *word, size_t len, AFFIX *Affix, int *res, IspellDict *Conf) {
+  regmatch_t subs[2]; /* workaround for apache&linux */
+  char newword[2*MAXNORMLEN] = "";
+  int err;
+  
+  *res = strbncmp(word, Affix->repl, Affix->replen);
+  if (*res < 0) {
+    return NULL;
+  }
+  if (*res > 0) {
+    return NULL;
+  }
+  strcpy(newword, word);
+  strcpy(newword+len-Affix->replen, Affix->find);
+
+  if (Affix->compile) {
+    err = regcomp(&(Affix->reg),Affix->mask,REG_EXTENDED|REG_ICASE|REG_NOSUB);
+    if(err){
+      /*regerror(err, &(Affix->reg), regerrstr, ERRSTRSIZE);*/
+      regfree(&(Affix->reg));
+      return(NULL);
+    }
+    Affix->compile = 0;
+  }
+  if(!(err=regexec(&(Affix->reg),newword,1,subs,0))){
+    if(FindWord(Conf, newword, Affix->flag))
+   return pstrdup(newword);    
+  }
+  return NULL;
+}
+
+#define NS 1
+#define MAX_NORM 512
+static int 
+CheckPrefix(const char *word, size_t len, AFFIX *Affix, IspellDict *Conf, int pi,
+       char **forms, char ***cur ) {
+  regmatch_t subs[NS*2];
+  char newword[2*MAXNORMLEN] = "";
+  int err, ls, res, lres;
+  size_t newlen;
+  AFFIX *CAffix = Conf->Affix;
+  
+  res = strncmp(word, Affix->repl, Affix->replen);
+  if (res != 0) {
+    return res;
+  }
+  strcpy(newword, Affix->find);
+  strcat(newword, word+Affix->replen);
+
+  if (Affix->compile) {
+    err = regcomp(&(Affix->reg),Affix->mask,REG_EXTENDED|REG_ICASE|REG_NOSUB);
+    if(err){
+      /*regerror(err, &(Affix->reg), regerrstr, ERRSTRSIZE);*/
+      regfree(&(Affix->reg));
+      return (0);
+    }
+    Affix->compile = 0;
+  }
+  if(!(err=regexec(&(Affix->reg),newword,1,subs,0))){
+    SPELL * curspell;
+
+    if((curspell=FindWord(Conf, newword, Affix->flag))){
+      if ((*cur - forms) < (MAX_NORM-1)) {
+   **cur =  pstrdup(newword);
+   (*cur)++; **cur = NULL;
+      }
+    } 
+    newlen = strlen(newword);
+    ls = Conf->SuffixTree.Left[pi];
+      if ( ls>=0 && ((*cur - forms) < (MAX_NORM-1)) ) {
+   **cur = CheckSuffix(newword, newlen, &CAffix[ls], &lres, Conf);
+   if (**cur) {
+     (*cur)++; **cur = NULL;
+   }
+      }
+  }
+  return 0;
+}
+
+
+char ** 
+NormalizeWord(IspellDict * Conf,char *word){
+/*regmatch_t subs[NS];*/
+size_t len;
+char ** forms;
+char **cur;
+AFFIX * Affix;
+int ri, pi, ipi, lp, rp, cp, ls, rs;
+int lres, rres, cres = 0;
+  SPELL *spell;
+
+   len=strlen(word);
+   if (len > MAXNORMLEN)
+       return(NULL);
+
+   strlower(word);
+
+   forms=(char **) palloc(MAX_NORM*sizeof(char **));
+   cur=forms;*cur=NULL;
+
+   ri = (int)(*word) & 255;
+   pi = (int)(word[strlen(word)-1]) & 255;
+   Affix=(AFFIX*)Conf->Affix;
+
+   /* Check that the word itself is normal form */
+   if((spell = FindWord(Conf, word, 0))){
+       *cur=pstrdup(word);
+       cur++;*cur=NULL;
+   }
+
+   /* Find all other NORMAL forms of the 'word' */
+
+   for (ipi = 0; ipi <= pi; ipi += pi) {
+
+       /* check prefix */
+       lp = Conf->PrefixTree.Left[ri];
+       rp = Conf->PrefixTree.Right[ri];
+       while (lp >= 0 && lp <= rp) {
+         cp = (lp + rp) >> 1;
+         cres = 0;
+         if ((cur - forms) < (MAX_NORM-1)) {
+       cres = CheckPrefix(word, len, &Affix[cp], Conf, ipi, forms, &cur);
+         }
+         if ((lp < cp) && ((cur - forms) < (MAX_NORM-1)) ) {
+       lres = CheckPrefix(word, len, &Affix[lp], Conf, ipi, forms, &cur);
+         }
+         if ( (rp > cp) && ((cur - forms) < (MAX_NORM-1)) ) {
+       rres = CheckPrefix(word, len, &Affix[rp], Conf, ipi, forms, &cur);
+         }
+         if (cres < 0) {
+       rp = cp - 1;
+       lp++;
+         } else if (cres > 0) {
+       lp = cp + 1;
+       rp--;
+         } else {
+       lp++;
+       rp--;
+         }
+       }
+
+       /* check suffix */
+       ls = Conf->SuffixTree.Left[ipi];
+       rs = Conf->SuffixTree.Right[ipi];
+       while (ls >= 0 && ls <= rs) {
+         if (  ((cur - forms) < (MAX_NORM-1)) ) {
+       *cur = CheckSuffix(word, len, &Affix[ls], &lres, Conf);
+       if (*cur) {
+         cur++; *cur = NULL;
+       }
+         }
+         if ( (rs > ls) && ((cur - forms) < (MAX_NORM-1)) ) {
+       *cur = CheckSuffix(word, len, &Affix[rs], &rres, Conf);
+       if (*cur) {
+         cur++; *cur = NULL;
+       }
+         }
+         ls++;
+         rs--;
+       } /* end while */
+     
+   } /* for ipi */
+
+   if(cur==forms){
+       pfree(forms);
+       return(NULL);
+   }
+   return(forms);
+}
+
+void 
+FreeIspell (IspellDict *Conf) {
+  int i;
+  AFFIX *Affix = (AFFIX *)Conf->Affix;
+
+  for (i = 0; i < Conf->naffixes; i++) {
+    if (Affix[i].compile == 0) {
+      regfree(&(Affix[i].reg));
+    }
+  }
+  for (i = 0; i < Conf->naffixes; i++) {
+   free( Conf->Spell[i].word );
+  }
+  free(Conf->Affix);
+  free(Conf->Spell);
+  memset( (void*)Conf, 0, sizeof(IspellDict) );
+  return;
+}


diff --git a/contrib/tsearch2/ispell/spell.h b/contrib/tsearch2/ispell/spell.h

new file mode 100644 (file)

index 0000000..3034ca6


--- /dev/null
+++ b/contrib/tsearch2/ispell/spell.h
@@ -0,0 +1,51 @@
+#ifndef __SPELL_H__
+#define __SPELL_H__
+
+#include 
+#include 
+
+typedef struct spell_struct {
+        char * word; 
+        char flag[10];
+} SPELL;
+
+typedef struct aff_struct {   
+        char flag;
+        char type;
+        char mask[33];
+        char find[16];
+        char repl[16];
+        regex_t reg;
+        size_t replen;
+        char compile;
+} AFFIX;
+
+typedef struct Tree_struct {
+        int Left[256], Right[256];
+} Tree_struct;
+
+typedef struct {
+   int maffixes;
+   int naffixes;
+   AFFIX * Affix;
+
+   int nspell;
+   int mspell;
+   SPELL   *Spell;
+   Tree_struct SpellTree;
+   Tree_struct PrefixTree;
+   Tree_struct SuffixTree;
+
+} IspellDict;
+
+char ** NormalizeWord(IspellDict * Conf,char *word);
+int ImportAffixes(IspellDict * Conf, const char *filename);
+int ImportDictionary(IspellDict * Conf,const char *filename);
+
+int  AddSpell(IspellDict * Conf,const char * word,const char *flag);
+int  AddAffix(IspellDict * Conf,int flag,const char *mask,const char *find,const char *repl,int type);
+void SortDictionary(IspellDict * Conf);
+void SortAffixes(IspellDict * Conf);
+void FreeIspell (IspellDict *Conf);
+
+#endif


diff --git a/contrib/tsearch2/prs_dcfg.c b/contrib/tsearch2/prs_dcfg.c

new file mode 100644 (file)

index 0000000..e4b0e8b


--- /dev/null
+++ b/contrib/tsearch2/prs_dcfg.c
@@ -0,0 +1,119 @@
+/* 
+ * Simple config parser 
+ * Teodor Sigaev 
+ */
+#include 
+#include 
+#include 
+
+#include "postgres.h"
+
+#include "dict.h"
+#include "common.h"
+
+#define CS_WAITKEY 0
+#define CS_INKEY   1
+#define CS_WAITEQ  2
+#define CS_WAITVALUE   3
+#define CS_INVALUE 4
+#define CS_IN2VALUE    5
+#define CS_WAITDELIM   6
+#define CS_INESC   7
+#define CS_IN2ESC  8
+
+static char *
+nstrdup(char *ptr, int len) {
+   char *res=palloc(len+1), *cptr;
+   memcpy(res,ptr,len);
+   res[len]='\0';
+   cptr = ptr = res;
+   while(*ptr) {
+       if ( *ptr == '\\' ) 
+           ptr++;
+       *cptr=*ptr; ptr++; cptr++;
+   }
+   *cptr='\0';
+
+   return res;
+}
+
+void
+parse_cfgdict(text *in, Map **m) {
+   Map *mptr;
+   char *ptr=VARDATA(in), *begin=NULL;
+   char num=0;
+   int state=CS_WAITKEY;
+
+   while( ptr-VARDATA(in) < VARSIZE(in) - VARHDRSZ ) {
+       if ( *ptr==',' ) num++;
+       ptr++;
+   }
+
+   *m=mptr=(Map*)palloc( sizeof(Map)*(num+2) );
+   memset(mptr, 0, sizeof(Map)*(num+2) );
+   ptr=VARDATA(in);
+   while( ptr-VARDATA(in) < VARSIZE(in) - VARHDRSZ ) {
+       if (state==CS_WAITKEY) {
+           if (isalpha(*ptr)) {
+               begin=ptr;
+               state=CS_INKEY;
+           } else if ( !isspace(*ptr) )
+               elog(ERROR,"Syntax error in position %d near '%c'", ptr-VARDATA(in), *ptr);
+       } else if (state==CS_INKEY) {
+           if ( isspace(*ptr) ) {
+               mptr->key=nstrdup(begin, ptr-begin);
+               state=CS_WAITEQ;
+           } else if ( *ptr=='=' ) {
+               mptr->key=nstrdup(begin, ptr-begin);
+               state=CS_WAITVALUE;
+           } else if ( !isalpha(*ptr) ) 
+               elog(ERROR,"Syntax error in position %d near '%c'", ptr-VARDATA(in), *ptr);
+       } else if ( state==CS_WAITEQ ) {
+           if ( *ptr=='=' )
+               state=CS_WAITVALUE;
+           else if ( !isspace(*ptr) )
+               elog(ERROR,"Syntax error in position %d near '%c'", ptr-VARDATA(in), *ptr);
+       } else if ( state==CS_WAITVALUE ) {
+           if ( *ptr=='"' ) {
+               begin=ptr+1;
+               state=CS_INVALUE;
+           } else if ( !isspace(*ptr) ) {
+               begin=ptr;
+               state=CS_IN2VALUE;
+           }
+       } else if ( state==CS_INVALUE ) {
+           if ( *ptr=='"' ) {
+               mptr->value = nstrdup(begin, ptr-begin);
+               mptr++;
+               state=CS_WAITDELIM;
+           } else if ( *ptr=='\\' )
+               state=CS_INESC;
+       } else if ( state==CS_IN2VALUE ) {
+           if ( isspace(*ptr) || *ptr==',' ) {
+               mptr->value = nstrdup(begin, ptr-begin);
+               mptr++;
+               state=( *ptr==',' ) ? CS_WAITKEY : CS_WAITDELIM;
+           } else if ( *ptr=='\\' )
+               state=CS_INESC;
+       } else if ( state==CS_WAITDELIM ) {
+           if ( *ptr==',' ) 
+               state=CS_WAITKEY; 
+           else if ( !isspace(*ptr) )
+               elog(ERROR,"Syntax error in position %d near '%c'", ptr-VARDATA(in), *ptr);
+       } else if ( state == CS_INESC ) {
+           state=CS_INVALUE;
+       } else if ( state == CS_IN2ESC ) {
+           state=CS_IN2VALUE;
+       } else 
+           elog(ERROR,"Bad parser state: %d at position %d near '%c'", state, ptr-VARDATA(in), *ptr);
+       ptr++;
+   }
+
+   if (state==CS_IN2VALUE) {
+       mptr->value = nstrdup(begin, ptr-begin);
+       mptr++;
+   } else if ( !(state==CS_WAITDELIM || state==CS_WAITKEY) ) 
+       elog(ERROR,"Unexpected end of line");
+}
+
+


diff --git a/contrib/tsearch2/query.c b/contrib/tsearch2/query.c

new file mode 100644 (file)

index 0000000..8e714f2


--- /dev/null
+++ b/contrib/tsearch2/query.c
@@ -0,0 +1,862 @@
+/*
+ * IO definitions for tsquery and mtsquery. This type
+ * are identical, but for parsing mtsquery used parser for text
+ * and also morphology is used.
+ * Internal structure:
+ * query tree, then string with original value.
+ * Query tree with plain view. It's means that in array of nodes
+ * right child is always next and left position = item+item->left
+ * Teodor Sigaev 
+ */
+#include "postgres.h"
+
+#include 
+#include 
+
+#include "access/gist.h"
+#include "access/itup.h"
+#include "access/rtree.h"
+#include "utils/elog.h"
+#include "utils/palloc.h"
+#include "utils/array.h"
+#include "utils/builtins.h"
+#include "storage/bufpage.h"
+
+#include "ts_cfg.h"
+#include "tsvector.h"
+#include "crc32.h"
+#include "query.h"
+#include "rewrite.h"
+#include "common.h"
+
+
+PG_FUNCTION_INFO_V1(tsquery_in);
+Datum      tsquery_in(PG_FUNCTION_ARGS);
+
+PG_FUNCTION_INFO_V1(tsquery_out);
+Datum      tsquery_out(PG_FUNCTION_ARGS);
+
+PG_FUNCTION_INFO_V1(exectsq);
+Datum      exectsq(PG_FUNCTION_ARGS);
+
+PG_FUNCTION_INFO_V1(rexectsq);
+Datum      rexectsq(PG_FUNCTION_ARGS);
+
+PG_FUNCTION_INFO_V1(tsquerytree);
+Datum      tsquerytree(PG_FUNCTION_ARGS);
+
+PG_FUNCTION_INFO_V1(to_tsquery);
+Datum      to_tsquery(PG_FUNCTION_ARGS);
+
+PG_FUNCTION_INFO_V1(to_tsquery_name);
+Datum      to_tsquery_name(PG_FUNCTION_ARGS);
+
+PG_FUNCTION_INFO_V1(to_tsquery_current);
+Datum      to_tsquery_current(PG_FUNCTION_ARGS);
+
+#define END            0
+#define ERR            1
+#define VAL            2
+#define OPR            3
+#define OPEN       4
+#define CLOSE      5
+#define VALTRUE        6           /* for stop words */
+#define VALFALSE   7
+
+/* parser's states */
+#define WAITOPERAND 1
+#define WAITOPERATOR   2
+
+/*
+ * node of query tree, also used
+ * for storing polish notation in parser
+ */
+typedef struct NODE
+{
+   int2        weight;
+   int2        type;
+   int4        val;
+   int2        distance;
+   int2        length;
+   struct NODE *next;
+}  NODE;
+
+typedef struct
+{
+   char       *buf;
+   int4        state;
+   int4        count;
+   /* reverse polish notation in list (for temprorary usage) */
+   NODE       *str;
+   /* number in str */
+   int4        num;
+
+   /* user-friendly operand */
+   int4        lenop;
+   int4        sumlen;
+   char       *op;
+   char       *curop;
+
+   /* state for value's parser */
+   TI_IN_STATE valstate;
+
+   /* tscfg */
+   int cfg_id;
+}  QPRS_STATE;
+
+static char*
+get_weight(char *buf, int2 *weight) {
+   *weight = 0;
+
+   if ( *buf != ':' )
+       return buf;
+
+   buf++;
+   while( *buf ) {
+       switch(tolower(*buf)) {
+           case 'a': *weight |= 1<<3; break; 
+           case 'b': *weight |= 1<<2; break; 
+           case 'c': *weight |= 1<<1; break; 
+           case 'd': *weight |= 1;    break;
+           default: return buf; 
+       }
+       buf++;
+   }
+   
+   return buf;
+}
+
+/*
+ * get token from query string
+ */
+static int4
+gettoken_query(QPRS_STATE * state, int4 *val, int4 *lenval, char **strval, int2 *weight)
+{
+   while (1)
+   {
+       switch (state->state)
+       {
+           case WAITOPERAND:
+               if (*(state->buf) == '!')
+               {
+                   (state->buf)++;
+                   *val = (int4) '!';
+                   return OPR;
+               }
+               else if (*(state->buf) == '(')
+               {
+                   state->count++;
+                   (state->buf)++;
+                   return OPEN;
+               } else if ( *(state->buf) == ':' ) {
+                   elog(ERROR,"Error at start of operand"); 
+               } else if (*(state->buf) != ' ') {
+                   state->valstate.prsbuf = state->buf;
+                   state->state = WAITOPERATOR;
+                   if (gettoken_tsvector(&(state->valstate)))
+                   {
+                       *strval = state->valstate.word;
+                       *lenval = state->valstate.curpos - state->valstate.word;
+                       state->buf = get_weight(state->valstate.prsbuf, weight);
+                       return VAL;
+                   }
+                   else
+                       elog(ERROR, "No operand");
+               }
+               break;
+           case WAITOPERATOR:
+               if (*(state->buf) == '&' || *(state->buf) == '|')
+               {
+                   state->state = WAITOPERAND;
+                   *val = (int4) *(state->buf);
+                   (state->buf)++;
+                   return OPR;
+               }
+               else if (*(state->buf) == ')')
+               {
+                   (state->buf)++;
+                   state->count--;
+                   return (state->count < 0) ? ERR : CLOSE;
+               }
+               else if (*(state->buf) == '\0')
+                   return (state->count) ? ERR : END;
+               else if (*(state->buf) != ' ')
+                   return ERR;
+               break;
+           default:
+               return ERR;
+               break;
+       }
+       (state->buf)++;
+   }
+   return END;
+}
+
+/*
+ * push new one in polish notation reverse view
+ */
+static void
+pushquery(QPRS_STATE * state, int4 type, int4 val, int4 distance, int4 lenval, int2 weight)
+{
+   NODE       *tmp = (NODE *) palloc(sizeof(NODE));
+
+   tmp->weight = weight;
+   tmp->type = type;
+   tmp->val = val;
+   if (distance >= MAXSTRPOS)
+       elog(ERROR, "Value is too big");
+   if (lenval >= MAXSTRLEN)
+       elog(ERROR, "Operand is too long");
+   tmp->distance = distance;
+   tmp->length = lenval;
+   tmp->next = state->str;
+   state->str = tmp;
+   state->num++;
+}
+
+/*
+ * This function is used for tsquery parsing
+ */
+static void
+pushval_asis(QPRS_STATE * state, int type, char *strval, int lenval, int2 weight)
+{
+   if (lenval >= MAXSTRLEN)
+       elog(ERROR, "Word is too long");
+
+   pushquery(state, type, crc32_sz((uint8 *) strval, lenval),
+             state->curop - state->op, lenval, weight);
+
+   while (state->curop - state->op + lenval + 1 >= state->lenop)
+   {
+       int4        tmp = state->curop - state->op;
+
+       state->lenop *= 2;
+       state->op = (char *) repalloc((void *) state->op, state->lenop);
+       state->curop = state->op + tmp;
+   }
+   memcpy((void *) state->curop, (void *) strval, lenval);
+   state->curop += lenval;
+   *(state->curop) = '\0';
+   state->curop++;
+   state->sumlen += lenval + 1;
+   return;
+}
+
+/*
+ * This function is used for morph parsing
+ */
+static void
+pushval_morph(QPRS_STATE * state, int typeval, char *strval, int lenval, int2 weight)
+{
+   int4        count = 0;
+   PRSTEXT         prs;
+
+   prs.lenwords = 32;
+   prs.curwords = 0;
+   prs.pos = 0;
+   prs.words = (WORD *) palloc(sizeof(WORD) * prs.lenwords);
+
+   parsetext_v2(findcfg(state->cfg_id), &prs, strval, lenval);
+
+   for(count=0;count
+       pushval_asis(state, VAL, prs.words[count].word, prs.words[count].len, weight);
+       pfree( prs.words[count].word );
+       if (count)
+           pushquery(state, OPR, (int4) '&', 0, 0, 0 );
+   }   
+   pfree(prs.words);
+
+   /* XXX */
+   if ( prs.curwords==0 ) 
+       pushval_asis(state, VALTRUE, 0, 0, 0);
+}
+
+#define STACKDEPTH 32
+/*
+ * make polish notaion of query
+ */
+static int4
+makepol(QPRS_STATE * state, void (*pushval) (QPRS_STATE *, int, char *, int, int2))
+{
+   int4        val,
+               type;
+   int4        lenval;
+   char       *strval;
+   int4        stack[STACKDEPTH];
+   int4        lenstack = 0;
+   int2        weight;
+
+   while ((type = gettoken_query(state, &val, &lenval, &strval, &weight)) != END)
+   {
+       switch (type)
+       {
+           case VAL:
+               (*pushval) (state, VAL, strval, lenval, weight);
+               while (lenstack && (stack[lenstack - 1] == (int4) '&' ||
+                                   stack[lenstack - 1] == (int4) '!'))
+               {
+                   lenstack--;
+                   pushquery(state, OPR, stack[lenstack], 0, 0, 0);
+               }
+               break;
+           case OPR:
+               if (lenstack && val == (int4) '|')
+                   pushquery(state, OPR, val, 0, 0, 0);
+               else
+               {
+                   if (lenstack == STACKDEPTH)
+                       elog(ERROR, "Stack too short");
+                   stack[lenstack] = val;
+                   lenstack++;
+               }
+               break;
+           case OPEN:
+               if (makepol(state, pushval) == ERR)
+                   return ERR;
+               if (lenstack && (stack[lenstack - 1] == (int4) '&' ||
+                                stack[lenstack - 1] == (int4) '!'))
+               {
+                   lenstack--;
+                   pushquery(state, OPR, stack[lenstack], 0, 0, 0);
+               }
+               break;
+           case CLOSE:
+               while (lenstack)
+               {
+                   lenstack--;
+                   pushquery(state, OPR, stack[lenstack], 0, 0, 0);
+               };
+               return END;
+               break;
+           case ERR:
+           default:
+               elog(ERROR, "Syntax error");
+               return ERR;
+
+       }
+   }
+   while (lenstack)
+   {
+       lenstack--;
+       pushquery(state, OPR, stack[lenstack], 0, 0, 0);
+   };
+   return END;
+}
+
+typedef struct
+{
+   WordEntry  *arrb;
+   WordEntry  *arre;
+   char       *values;
+   char       *operand;
+}  CHKVAL;
+
+/*
+ * compare 2 string values
+ */
+static int4
+ValCompare(CHKVAL * chkval, WordEntry * ptr, ITEM * item)
+{
+   if (ptr->len == item->length)
+       return strncmp(
+                      &(chkval->values[ptr->pos]),
+                      &(chkval->operand[item->distance]),
+                      item->length);
+
+   return (ptr->len > item->length) ? 1 : -1;
+}
+
+/*
+ * check weight info
+ */
+static bool
+checkclass_str(CHKVAL * chkval, WordEntry * val, ITEM * item) {
+   WordEntryPos *ptr = (WordEntryPos*) (chkval->values+val->pos+SHORTALIGN(val->len)+sizeof(uint16));
+   uint16  len = *( (uint16*) (chkval->values+val->pos+SHORTALIGN(val->len)) );
+   while (len--) {
+       if ( item->weight & ( 1<weight ) )
+           return true;
+       ptr++;
+   }
+   return false; 
+}
+
+/*
+ * is there value 'val' in array or not ?
+ */
+static bool
+checkcondition_str(void *checkval, ITEM * val)
+{
+   WordEntry  *StopLow = ((CHKVAL *) checkval)->arrb;
+   WordEntry  *StopHigh = ((CHKVAL *) checkval)->arre;
+   WordEntry  *StopMiddle;
+   int         difference;
+
+   /* Loop invariant: StopLow <= val < StopHigh */
+
+   while (StopLow < StopHigh)
+   {
+       StopMiddle = StopLow + (StopHigh - StopLow) / 2;
+       difference = ValCompare((CHKVAL *) checkval, StopMiddle, val);
+       if (difference == 0)
+           return ( val->weight && StopMiddle->haspos ) ? 
+               checkclass_str((CHKVAL *) checkval,StopMiddle, val) : true;
+       else if (difference < 0)
+           StopLow = StopMiddle + 1;
+       else
+           StopHigh = StopMiddle;
+   }
+
+   return (false);
+}
+
+/*
+ * check for boolean condition
+ */
+bool
+TS_execute(ITEM * curitem, void *checkval, bool calcnot, bool (*chkcond) (void *checkval, ITEM * val))
+{
+   if (curitem->type == VAL)
+       return (*chkcond) (checkval, curitem);
+   else if (curitem->val == (int4) '!')
+   {
+       return (calcnot) ?
+           ((TS_execute(curitem + 1, checkval, calcnot, chkcond)) ? false : true)
+           : true;
+   }
+   else if (curitem->val == (int4) '&')
+   {
+       if (TS_execute(curitem + curitem->left, checkval, calcnot, chkcond))
+           return TS_execute(curitem + 1, checkval, calcnot, chkcond);
+       else
+           return false;
+   }
+   else
+   {                           /* |-operator */
+       if (TS_execute(curitem + curitem->left, checkval, calcnot, chkcond))
+           return true;
+       else
+           return TS_execute(curitem + 1, checkval, calcnot, chkcond);
+   }
+   return false;
+}
+
+/*
+ * boolean operations
+ */
+Datum
+rexectsq(PG_FUNCTION_ARGS)
+{
+   return DirectFunctionCall2(
+                              exectsq,
+                              PG_GETARG_DATUM(1),
+                              PG_GETARG_DATUM(0)
+       );
+}
+
+Datum
+exectsq(PG_FUNCTION_ARGS)
+{
+   tsvector       *val = (tsvector *) DatumGetPointer(PG_DETOAST_DATUM(PG_GETARG_DATUM(0)));
+   QUERYTYPE  *query = (QUERYTYPE *) DatumGetPointer(PG_DETOAST_DATUM(PG_GETARG_DATUM(1)));
+   CHKVAL      chkval;
+   bool        result;
+
+   if (!val->size || !query->size)
+   {
+       PG_FREE_IF_COPY(val, 0);
+       PG_FREE_IF_COPY(query, 1);
+       PG_RETURN_BOOL(false);
+   }
+
+   chkval.arrb = ARRPTR(val);
+   chkval.arre = chkval.arrb + val->size;
+   chkval.values = STRPTR(val);
+   chkval.operand = GETOPERAND(query);
+   result = TS_execute(
+                    GETQUERY(query),
+                    &chkval,
+                    true,
+                    checkcondition_str
+       );
+
+   PG_FREE_IF_COPY(val, 0);
+   PG_FREE_IF_COPY(query, 1);
+   PG_RETURN_BOOL(result);
+}
+
+/*
+ * find left operand in polish notation view
+ */
+static void
+findoprnd(ITEM * ptr, int4 *pos)
+{
+#ifdef BS_DEBUG
+   elog(DEBUG3, (ptr[*pos].type == OPR) ?
+        "%d  %c" : "%d  %d ", *pos, ptr[*pos].val);
+#endif
+   if (ptr[*pos].type == VAL || ptr[*pos].type == VALTRUE)
+   {
+       ptr[*pos].left = 0;
+       (*pos)++;
+   }
+   else if (ptr[*pos].val == (int4) '!')
+   {
+       ptr[*pos].left = 1;
+       (*pos)++;
+       findoprnd(ptr, pos);
+   }
+   else
+   {
+       ITEM       *curitem = &ptr[*pos];
+       int4        tmp = *pos;
+
+       (*pos)++;
+       findoprnd(ptr, pos);
+       curitem->left = *pos - tmp;
+       findoprnd(ptr, pos);
+   }
+}
+
+
+/*
+ * input
+ */
+static QUERYTYPE *
+queryin(char *buf, void (*pushval) (QPRS_STATE *, int, char *, int, int2), int cfg_id)
+{
+   QPRS_STATE  state;
+   int4        i;
+   QUERYTYPE  *query;
+   int4        commonlen;
+   ITEM       *ptr;
+   NODE       *tmp;
+   int4        pos = 0;
+
+#ifdef BS_DEBUG
+   char        pbuf[16384],
+              *cur;
+#endif
+
+   /* init state */
+   state.buf = buf;
+   state.state = WAITOPERAND;
+   state.count = 0;
+   state.num = 0;
+   state.str = NULL;
+   state.cfg_id=cfg_id;
+
+   /* init value parser's state */
+   state.valstate.oprisdelim = true;
+   state.valstate.len = 32;
+   state.valstate.word = (char *) palloc(state.valstate.len);
+
+   /* init list of operand */
+   state.sumlen = 0;
+   state.lenop = 64;
+   state.curop = state.op = (char *) palloc(state.lenop);
+   *(state.curop) = '\0';
+
+   /* parse query & make polish notation (postfix, but in reverse order) */
+   makepol(&state, pushval);
+   pfree(state.valstate.word);
+   if (!state.num)
+       elog(ERROR, "Empty query");
+
+   /* make finish struct */
+   commonlen = COMPUTESIZE(state.num, state.sumlen);
+   query = (QUERYTYPE *) palloc(commonlen);
+   query->len = commonlen;
+   query->size = state.num;
+   ptr = GETQUERY(query);
+
+   /* set item in polish notation */
+   for (i = 0; i < state.num; i++)
+   {
+       ptr[i].weight = state.str->weight;
+       ptr[i].type = state.str->type;
+       ptr[i].val = state.str->val;
+       ptr[i].distance = state.str->distance;
+       ptr[i].length = state.str->length;
+       tmp = state.str->next;
+       pfree(state.str);
+       state.str = tmp;
+   }
+
+   /* set user friendly-operand view */
+   memcpy((void *) GETOPERAND(query), (void *) state.op, state.sumlen);
+   pfree(state.op);
+
+   /* set left operand's position for every operator */
+   pos = 0;
+   findoprnd(ptr, &pos);
+
+#ifdef BS_DEBUG
+   cur = pbuf;
+   *cur = '\0';
+   for (i = 0; i < query->size; i++)
+   {
+       if (ptr[i].type == OPR)
+           sprintf(cur, "%c(%d) ", ptr[i].val, ptr[i].left);
+       else
+           sprintf(cur, "%d(%s) ", ptr[i].val, GETOPERAND(query) + ptr[i].distance);
+       cur = strchr(cur, '\0');
+   }
+   elog(DEBUG3, "POR: %s", pbuf);
+#endif
+
+   return query;
+}
+
+/*
+ * in without morphology
+ */
+Datum
+tsquery_in(PG_FUNCTION_ARGS)
+{
+   PG_RETURN_POINTER(queryin((char *) PG_GETARG_POINTER(0), pushval_asis, 0));
+}
+
+/*
+ * out function
+ */
+typedef struct
+{
+   ITEM       *curpol;
+   char       *buf;
+   char       *cur;
+   char       *op;
+   int4        buflen;
+}  INFIX;
+
+#define RESIZEBUF(inf,addsize) \
+while( ( inf->cur - inf->buf ) + addsize + 1 >= inf->buflen ) \
+{ \
+   int4 len = inf->cur - inf->buf; \
+   inf->buflen *= 2; \
+   inf->buf = (char*) repalloc( (void*)inf->buf, inf->buflen ); \
+   inf->cur = inf->buf + len; \
+}
+
+/*
+ * recursive walk on tree and print it in
+ * infix (human-readable) view
+ */
+static void
+infix(INFIX * in, bool first)
+{
+   if (in->curpol->type == VAL)
+   {
+       char       *op = in->op + in->curpol->distance;
+
+       RESIZEBUF(in, in->curpol->length * 2 + 2 + 5);
+       *(in->cur) = '\'';
+       in->cur++;
+       while (*op)
+       {
+           if (*op == '\'')
+           {
+               *(in->cur) = '\\';
+               in->cur++;
+           }
+           *(in->cur) = *op;
+           op++;
+           in->cur++;
+       }
+       *(in->cur) = '\'';
+       in->cur++;
+       if ( in->curpol->weight ) {
+           *(in->cur) = ':'; in->cur++;
+           if ( in->curpol->weight & (1<<3) ) { *(in->cur) = 'A'; in->cur++; }
+           if ( in->curpol->weight & (1<<2) ) { *(in->cur) = 'B'; in->cur++; }
+           if ( in->curpol->weight & (1<<1) ) { *(in->cur) = 'C'; in->cur++; }
+           if ( in->curpol->weight & 1 )      { *(in->cur) = 'D'; in->cur++; }
+       }
+       *(in->cur) = '\0';
+       in->curpol++;
+   }
+   else if (in->curpol->val == (int4) '!')
+   {
+       bool        isopr = false;
+
+       RESIZEBUF(in, 1);
+       *(in->cur) = '!';
+       in->cur++;
+       *(in->cur) = '\0';
+       in->curpol++;
+       if (in->curpol->type == OPR)
+       {
+           isopr = true;
+           RESIZEBUF(in, 2);
+           sprintf(in->cur, "( ");
+           in->cur = strchr(in->cur, '\0');
+       }
+       infix(in, isopr);
+       if (isopr)
+       {
+           RESIZEBUF(in, 2);
+           sprintf(in->cur, " )");
+           in->cur = strchr(in->cur, '\0');
+       }
+   }
+   else
+   {
+       int4        op = in->curpol->val;
+       INFIX       nrm;
+
+       in->curpol++;
+       if (op == (int4) '|' && !first)
+       {
+           RESIZEBUF(in, 2);
+           sprintf(in->cur, "( ");
+           in->cur = strchr(in->cur, '\0');
+       }
+
+       nrm.curpol = in->curpol;
+       nrm.op = in->op;
+       nrm.buflen = 16;
+       nrm.cur = nrm.buf = (char *) palloc(sizeof(char) * nrm.buflen);
+
+       /* get right operand */
+       infix(&nrm, false);
+
+       /* get & print left operand */
+       in->curpol = nrm.curpol;
+       infix(in, false);
+
+       /* print operator & right operand */
+       RESIZEBUF(in, 3 + (nrm.cur - nrm.buf));
+       sprintf(in->cur, " %c %s", op, nrm.buf);
+       in->cur = strchr(in->cur, '\0');
+       pfree(nrm.buf);
+
+       if (op == (int4) '|' && !first)
+       {
+           RESIZEBUF(in, 2);
+           sprintf(in->cur, " )");
+           in->cur = strchr(in->cur, '\0');
+       }
+   }
+}
+
+
+Datum
+tsquery_out(PG_FUNCTION_ARGS)
+{
+   QUERYTYPE  *query = (QUERYTYPE *) DatumGetPointer(PG_DETOAST_DATUM(PG_GETARG_DATUM(0)));
+   INFIX       nrm;
+
+   if (query->size == 0)
+   {
+       char       *b = palloc(1);
+
+       *b = '\0';
+       PG_RETURN_POINTER(b);
+   }
+   nrm.curpol = GETQUERY(query);
+   nrm.buflen = 32;
+   nrm.cur = nrm.buf = (char *) palloc(sizeof(char) * nrm.buflen);
+   *(nrm.cur) = '\0';
+   nrm.op = GETOPERAND(query);
+   infix(&nrm, true);
+
+   PG_FREE_IF_COPY(query, 0);
+   PG_RETURN_POINTER(nrm.buf);
+}
+
+/*
+ * debug function, used only for view query
+ * which will be executed in non-leaf pages in index
+ */
+Datum
+tsquerytree(PG_FUNCTION_ARGS)
+{
+   QUERYTYPE  *query = (QUERYTYPE *) DatumGetPointer(PG_DETOAST_DATUM(PG_GETARG_DATUM(0)));
+   INFIX       nrm;
+   text       *res;
+   ITEM       *q;
+   int4        len;
+
+
+   if (query->size == 0)
+   {
+       res = (text *) palloc(VARHDRSZ);
+       VARATT_SIZEP(res) = VARHDRSZ;
+       PG_RETURN_POINTER(res);
+   }
+
+   q = clean_NOT_v2(GETQUERY(query), &len);
+
+   if (!q)
+   {
+       res = (text *) palloc(1 + VARHDRSZ);
+       VARATT_SIZEP(res) = 1 + VARHDRSZ;
+       *((char *) VARDATA(res)) = 'T';
+   }
+   else
+   {
+       nrm.curpol = q;
+       nrm.buflen = 32;
+       nrm.cur = nrm.buf = (char *) palloc(sizeof(char) * nrm.buflen);
+       *(nrm.cur) = '\0';
+       nrm.op = GETOPERAND(query);
+       infix(&nrm, true);
+
+       res = (text *) palloc(nrm.cur - nrm.buf + VARHDRSZ);
+       VARATT_SIZEP(res) = nrm.cur - nrm.buf + VARHDRSZ;
+       strncpy(VARDATA(res), nrm.buf, nrm.cur - nrm.buf);
+       pfree(q);
+   }
+
+   PG_FREE_IF_COPY(query, 0);
+
+   PG_RETURN_POINTER(res);
+}
+
+Datum
+to_tsquery(PG_FUNCTION_ARGS) {
+   text    *in = PG_GETARG_TEXT_P(1);
+   char *str;
+   QUERYTYPE  *query;
+   ITEM       *res;
+   int4        len;
+
+   str=text2char(in);
+   PG_FREE_IF_COPY(in,1);
+
+   query = queryin(str, pushval_morph, PG_GETARG_INT32(0));
+   res = clean_fakeval_v2(GETQUERY(query), &len);
+   if (!res)
+   {
+       query->len = HDRSIZEQT;
+       query->size = 0;
+       PG_RETURN_POINTER(query);
+   }
+   memcpy((void *) GETQUERY(query), (void *) res, len * sizeof(ITEM));
+   pfree(res);
+   PG_RETURN_POINTER(query);
+}
+
+Datum
+to_tsquery_name(PG_FUNCTION_ARGS) {
+   text *name=PG_GETARG_TEXT_P(0);
+   Datum res= DirectFunctionCall2(
+       to_tsquery,
+       Int32GetDatum( name2id_cfg(name) ),
+       PG_GETARG_DATUM(1)
+   );
+   
+   PG_FREE_IF_COPY(name,1);
+   PG_RETURN_DATUM(res);
+}
+
+Datum
+to_tsquery_current(PG_FUNCTION_ARGS) {
+   PG_RETURN_DATUM( DirectFunctionCall2(
+       to_tsquery,
+       Int32GetDatum( get_currcfg() ),
+       PG_GETARG_DATUM(0)
+   ));
+}
+
+


diff --git a/contrib/tsearch2/query.h b/contrib/tsearch2/query.h

new file mode 100644 (file)

index 0000000..c0715a2


--- /dev/null
+++ b/contrib/tsearch2/query.h
@@ -0,0 +1,55 @@
+#ifndef __QUERY_H__
+#define __QUERY_H__
+/*
+#define BS_DEBUG
+*/
+
+
+/*
+ * item in polish notation with back link
+ * to left operand
+ */
+typedef struct ITEM
+{
+   int8        type;
+   int8        weight;
+   int2        left;
+   int4        val;
+   /* user-friendly value, must correlate with WordEntry */
+   uint32  
+       unused:1,
+       length:11,
+       distance:20;
+}  ITEM;
+
+/*
+ *Storage:
+ * (len)(size)(array of ITEM)(array of operand in user-friendly form)
+ */
+typedef struct
+{
+   int4        len;
+   int4        size;
+   char        data[1];
+}  QUERYTYPE;
+
+#define HDRSIZEQT  ( 2*sizeof(int4) )
+#define COMPUTESIZE(size,lenofoperand) ( HDRSIZEQT + size * sizeof(ITEM) + lenofoperand )
+#define GETQUERY(x)  (ITEM*)( (char*)(x)+HDRSIZEQT )
+#define GETOPERAND(x)  ( (char*)GETQUERY(x) + ((QUERYTYPE*)x)->size * sizeof(ITEM) )
+
+#define ISOPERATOR(x) ( (x)=='!' || (x)=='&' || (x)=='|' || (x)=='(' || (x)==')' )
+
+#define END                0
+#define ERR                1
+#define VAL                2
+#define OPR                3
+#define OPEN           4
+#define CLOSE          5
+#define VALTRUE            6       /* for stop words */
+#define VALFALSE       7
+
+bool TS_execute(ITEM * curitem, void *checkval,
+       bool calcnot, bool (*chkcond) (void *checkval, ITEM * val));
+
+#endif


diff --git a/contrib/tsearch2/rank.c b/contrib/tsearch2/rank.c

new file mode 100644 (file)

index 0000000..b73f400


--- /dev/null
+++ b/contrib/tsearch2/rank.c
@@ -0,0 +1,591 @@
+/*
+ * Relevation
+ * Teodor Sigaev 
+ */
+#include "postgres.h"
+#include 
+
+#include "access/gist.h"
+#include "access/itup.h"
+#include "utils/elog.h"
+#include "utils/palloc.h"
+#include "utils/builtins.h"
+#include "fmgr.h"
+#include "funcapi.h"
+#include "storage/bufpage.h"
+#include "executor/spi.h"
+#include "commands/trigger.h"
+#include "nodes/pg_list.h"
+#include "catalog/namespace.h"
+
+#include "utils/array.h"
+
+#include "tsvector.h"
+#include "query.h"
+#include "common.h"
+
+PG_FUNCTION_INFO_V1(rank);
+Datum      rank(PG_FUNCTION_ARGS);
+
+PG_FUNCTION_INFO_V1(rank_def);
+Datum      rank_def(PG_FUNCTION_ARGS);
+
+PG_FUNCTION_INFO_V1(rank_cd);
+Datum      rank_cd(PG_FUNCTION_ARGS);
+
+PG_FUNCTION_INFO_V1(rank_cd_def);
+Datum      rank_cd_def(PG_FUNCTION_ARGS);
+
+PG_FUNCTION_INFO_V1(get_covers);
+Datum      get_covers(PG_FUNCTION_ARGS);
+
+static float weights[]={0.1, 0.2, 0.4, 1.0};
+
+#define wpos(wep)  ( w[ ((WordEntryPos*)(wep))->weight ] )
+
+#define DEF_NORM_METHOD    0
+
+/*
+ * Returns a weight of a word collocation
+ */
+static float4 word_distance ( int4 w ) {
+   if ( w>100 )
+   return 1e-30;
+
+   return 1.0/(1.005+0.05*exp( ((float4)w)/1.5-2) );
+}
+
+static int
+cnt_length( tsvector *t ) {
+   WordEntry   *ptr=ARRPTR(t), *end=(WordEntry*)STRPTR(t);
+   int len = 0, clen;
+
+   while(ptr < end) {
+       if ( (clen=POSDATALEN(t, ptr)) == 0 )
+           len += 1;
+       else
+           len += clen;
+       ptr++;
+   }
+
+   return len;
+}
+
+static int4
+WordECompareITEM(char *eval, char *qval, WordEntry * ptr, ITEM * item) {
+        if (ptr->len == item->length)
+                return strncmp(
+                                           eval + ptr->pos,
+                                           qval + item->distance,
+                                           item->length);
+
+        return (ptr->len > item->length) ? 1 : -1;
+}
+
+static WordEntry*
+find_wordentry(tsvector *t, QUERYTYPE *q, ITEM *item) {
+        WordEntry  *StopLow = ARRPTR(t);
+        WordEntry  *StopHigh = (WordEntry*)STRPTR(t);
+        WordEntry  *StopMiddle;
+        int                     difference;
+
+        /* Loop invariant: StopLow <= item < StopHigh */
+
+        while (StopLow < StopHigh)
+        {
+                StopMiddle = StopLow + (StopHigh - StopLow) / 2;
+                difference = WordECompareITEM(STRPTR(t), GETOPERAND(q), StopMiddle, item);
+                if (difference == 0)
+                        return StopMiddle;
+                else if (difference < 0)
+                        StopLow = StopMiddle + 1;
+                else
+                        StopHigh = StopMiddle;
+        }
+
+        return NULL;
+}
+
+static WordEntryPos    POSNULL[]={
+   {0,0},
+   {0,MAXENTRYPOS-1}
+};
+
+static float
+calc_rank_and(float *w, tsvector *t, QUERYTYPE *q) {
+   uint16 **pos=(uint16**)palloc(sizeof(uint16*) * q->size);
+   int i,k,l,p;
+   WordEntry *entry;
+   WordEntryPos    *post,*ct;
+   int4    dimt,lenct,dist;
+   float res=-1.0;
+   ITEM    *item=GETQUERY(q);
+
+   memset(pos,0,sizeof(uint16**) * q->size);
+   *(uint16*)POSNULL = lengthof(POSNULL)-1;
+
+   for(i=0; isize; i++) {
+       
+       if ( item[i].type != VAL )
+           continue;
+
+       entry=find_wordentry(t,q,&(item[i]));
+       if ( !entry )
+           continue;
+
+       if ( entry->haspos )
+           pos[i] = (uint16*)_POSDATAPTR(t,entry);
+       else
+           pos[i] = (uint16*)POSNULL;
+
+
+       dimt = *(uint16*)(pos[i]);
+       post = (WordEntryPos*)(pos[i]+1);
+       for( k=0; k
+           if ( !pos[k] ) continue;
+           lenct = *(uint16*)(pos[k]);
+           ct = (WordEntryPos*)(pos[k]+1);
+           for(l=0; l
+               for(p=0; p
+                   dist = abs( post[l].pos - ct[p].pos );
+                   if ( dist || (dist==0 && (pos[i]==(uint16*)POSNULL || pos[k]==(uint16*)POSNULL) ) ) {
+                       float curw; 
+                       if ( !dist ) dist=MAXENTRYPOS;  
+                       curw= sqrt( wpos(&(post[l])) * wpos( &(ct[p]) ) * word_distance(dist) );
+                       res = ( res < 0 ) ? curw : 1.0 - ( 1.0 - res ) * ( 1.0 - curw );
+                   }
+               }
+           }
+       }
+   }
+   pfree(pos);
+   return res; 
+}
+
+static float
+calc_rank_or(float *w, tsvector *t, QUERYTYPE *q) {
+   WordEntry *entry;
+   WordEntryPos    *post;
+   int4    dimt,j,i;
+   float res=-1.0;
+   ITEM    *item=GETQUERY(q);
+
+   *(uint16*)POSNULL = lengthof(POSNULL)-1;
+
+   for(i=0; isize; i++) {
+       if ( item[i].type != VAL )
+           continue;
+
+       entry=find_wordentry(t,q,&(item[i]));
+       if ( !entry )
+           continue;
+
+       if ( entry->haspos ) {
+           dimt = POSDATALEN(t,entry);
+           post = POSDATAPTR(t,entry);
+       } else {
+           dimt = *(uint16*)POSNULL;
+           post = POSNULL+1;
+       }
+
+       for(j=0;j
+           if ( res < 0 )
+               res = wpos( &(post[j]) );
+           else
+               res = 1.0 - ( 1.0-res ) * ( 1.0-wpos( &(post[j]) ) );
+       }
+   }
+   return res;
+}
+
+static float
+calc_rank(float *w, tsvector *t, QUERYTYPE *q, int4 method) {
+   ITEM *item = GETQUERY(q);
+   float res=0.0;
+
+   if (!t->size || !q->size)
+       return 0.0;
+
+   res = ( item->type != VAL && item->val == (int4) '&' ) ?
+       calc_rank_and(w,t,q) : calc_rank_or(w,t,q);
+
+   if ( res < 0 )
+       res = 1e-20;
+
+        switch(method) {
+       case 0: break;
+       case 1: res /= log((float)cnt_length(t)); break;
+       case 2: res /= (float)cnt_length(t); break;
+       default:
+       elog(ERROR,"Unknown normalization method: %d",method);
+        }
+
+   return res;
+}
+
+Datum
+rank(PG_FUNCTION_ARGS) {
+   ArrayType  *win = (ArrayType *) PG_DETOAST_DATUM(PG_GETARG_DATUM(0));
+   tsvector       *txt = (tsvector *) PG_DETOAST_DATUM(PG_GETARG_DATUM(1));
+   QUERYTYPE  *query = (QUERYTYPE *) PG_DETOAST_DATUM(PG_GETARG_DATUM(2));
+   int method=DEF_NORM_METHOD;
+   float res=0.0;
+   float ws[ lengthof(weights) ];
+   int i;
+
+   if ( ARR_NDIM(win) != 1 ) 
+       elog(ERROR,"Array of weight is not one dimentional");
+   if ( ARRNELEMS(win) < lengthof(weights) )
+        elog(ERROR,"Array of weight is too short");
+
+   for(i=0;i
+       ws[ i ] = ( ((float4*)ARR_DATA_PTR(win))[i] >= 0 ) ? ((float4*)ARR_DATA_PTR(win))[i] : weights[i];
+       if ( ws[ i ] > 1.0 ) 
+           elog(ERROR,"Weight out of range");
+   } 
+
+   if ( PG_NARGS() == 4 )
+       method=PG_GETARG_INT32(3);
+
+   res=calc_rank(ws, txt, query, method); 
+       
+   PG_FREE_IF_COPY(win, 0);
+   PG_FREE_IF_COPY(txt, 1);
+   PG_FREE_IF_COPY(query, 2);
+   PG_RETURN_FLOAT4(res);
+}
+
+Datum
+rank_def(PG_FUNCTION_ARGS) {
+   tsvector       *txt = (tsvector *) PG_DETOAST_DATUM(PG_GETARG_DATUM(0));
+   QUERYTYPE  *query = (QUERYTYPE *) PG_DETOAST_DATUM(PG_GETARG_DATUM(1));
+   float res=0.0;
+   int method=DEF_NORM_METHOD;
+
+   if ( PG_NARGS() == 3 )
+       method=PG_GETARG_INT32(2);
+
+   res=calc_rank(weights, txt, query, method); 
+       
+   PG_FREE_IF_COPY(txt, 0);
+   PG_FREE_IF_COPY(query, 1);
+   PG_RETURN_FLOAT4(res);
+}
+
+
+typedef struct {
+   ITEM    *item;
+   int32   pos;
+} DocRepresentation;
+
+static int
+compareDocR(const void *a, const void *b) {
+   if ( ((DocRepresentation *) a)->pos == ((DocRepresentation *) b)->pos )
+       return 1;
+   return ( ((DocRepresentation *) a)->pos > ((DocRepresentation *) b)->pos ) ? 1 : -1;
+}
+
+
+typedef struct {
+   DocRepresentation *doc;
+   int len;
+}  ChkDocR;
+
+static bool
+checkcondition_DR(void *checkval, ITEM *val) {
+   DocRepresentation *ptr = ((ChkDocR*)checkval)->doc;
+
+   while( ptr - ((ChkDocR*)checkval)->doc < ((ChkDocR*)checkval)->len ) {
+       if ( val == ptr->item )
+           return true;
+       ptr++;
+   }   
+
+   return false;
+}
+
+
+static bool
+Cover(DocRepresentation *doc, int len, QUERYTYPE *query, int *pos, int *p, int *q) {
+   int i;
+   DocRepresentation   *ptr,*f=(DocRepresentation*)0xffffffff;
+   ITEM    *item=GETQUERY(query);
+   int lastpos=*pos;
+   int oldq=*q;
+
+   *p=0x7fffffff;
+   *q=0;
+
+   for(i=0; isize; i++) {
+       if ( item->type != VAL ) {
+           item++;
+           continue;
+       }
+       ptr = doc + *pos;
+
+       while(ptr-doc
+           if ( ptr->item == item ) {
+               if ( ptr->pos > *q ) {
+                   *q = ptr->pos;
+                   lastpos= ptr - doc;
+               } 
+               break;
+           } 
+           ptr++;
+       }
+
+       item++;
+   }
+
+   if (*q==0 )
+       return false;
+
+   if (*q==oldq) { /* already check this pos */
+       (*pos)++;
+       return Cover(doc, len, query, pos,p,q);
+   } 
+
+   item=GETQUERY(query);
+   for(i=0; isize; i++) {
+       if ( item->type != VAL ) {
+           item++;
+           continue;
+       }
+       ptr = doc + lastpos;
+
+       while(ptr>=doc+*pos) {
+           if ( ptr->item == item ) {
+               if ( ptr->pos < *p ) {
+                   *p = ptr->pos;
+                   f=ptr;
+               }
+               break;
+           }
+           ptr--;
+       }
+       item++;
+   }
+ 
+   if ( *p<=*q ) {
+       ChkDocR ch = { f, (doc + lastpos)-f+1 };
+       *pos = f-doc+1;
+       if ( TS_execute(GETQUERY(query), &ch, false, checkcondition_DR) ) { 
+ /*elog(NOTICE,"OP:%d NP:%d P:%d Q:%d", *pos, lastpos, *p, *q);*/ 
+           return true;
+       } else
+           return Cover(doc, len, query, pos,p,q); 
+   }
+ 
+   return false;
+}
+
+static DocRepresentation*
+get_docrep(tsvector     *txt, QUERYTYPE  *query, int *doclen) {
+   ITEM    *item=GETQUERY(query);
+   WordEntry *entry;
+   WordEntryPos    *post;
+   int4    dimt,j,i;
+   int len=query->size*4,cur=0;
+   DocRepresentation *doc;
+
+   *(uint16*)POSNULL = lengthof(POSNULL)-1;
+   doc = (DocRepresentation*)palloc(sizeof(DocRepresentation)*len);
+   for(i=0; isize; i++) {
+       if ( item[i].type != VAL )
+           continue;
+
+       entry=find_wordentry(txt,query,&(item[i]));
+       if ( !entry )
+           continue;
+
+       if ( entry->haspos ) {
+           dimt = POSDATALEN(txt,entry);
+           post = POSDATAPTR(txt,entry);
+       } else {
+           dimt = *(uint16*)POSNULL;
+           post = POSNULL+1;
+       }
+
+       while( cur+dimt >= len ) {
+           len*=2;
+           doc = (DocRepresentation*)repalloc(doc,sizeof(DocRepresentation)*len);
+       }
+
+       for(j=0;j
+           doc[cur].item=&(item[i]);
+           doc[cur].pos=post[j].pos;
+           cur++;
+       }
+   }
+
+   *doclen=cur;
+   
+   if ( cur>0 ) {
+       if ( cur>1 ) 
+           qsort((void *) doc, cur, sizeof(DocRepresentation), compareDocR);
+       return doc;
+   }
+   
+   pfree(doc);
+   return NULL;
+}
+
+
+Datum
+rank_cd(PG_FUNCTION_ARGS) {
+   int K = PG_GETARG_INT32(0);
+   tsvector       *txt = (tsvector *) PG_DETOAST_DATUM(PG_GETARG_DATUM(1));
+   QUERYTYPE  *query = (QUERYTYPE *) PG_DETOAST_DATUM(PG_GETARG_DATUM(2));
+   int method=DEF_NORM_METHOD;
+   DocRepresentation   *doc;
+   float   res=0.0;
+   int p=0,q=0,len,cur;
+
+   doc = get_docrep(txt, query, &len);
+   if ( !doc ) {
+       PG_FREE_IF_COPY(txt, 1);
+       PG_FREE_IF_COPY(query, 2);
+       PG_RETURN_FLOAT4(0.0);
+   }
+
+   cur=0;
+   if (K<=0)
+       K=4;    
+   while( Cover(doc, len, query, &cur, &p, &q) ) 
+       res += ( q-p+1 > K ) ? ((float)K)/((float)(q-p+1)) : 1.0;
+
+   if ( PG_NARGS() == 4 )
+       method=PG_GETARG_INT32(3);
+
+        switch(method) {
+       case 0: break;
+       case 1: res /= log((float)cnt_length(txt)); break;
+       case 2: res /= (float)cnt_length(txt); break;
+       default:
+       elog(ERROR,"Unknown normalization method: %d",method);
+        }
+
+   pfree(doc);
+   PG_FREE_IF_COPY(txt, 1);
+   PG_FREE_IF_COPY(query, 2);
+
+   PG_RETURN_FLOAT4(res);
+}
+
+
+Datum
+rank_cd_def(PG_FUNCTION_ARGS) {
+   PG_RETURN_DATUM( DirectFunctionCall4(   
+       rank_cd,
+       Int32GetDatum(-1),
+       PG_GETARG_DATUM(0),
+       PG_GETARG_DATUM(1),
+       ( PG_NARGS() == 3 ) ? PG_GETARG_DATUM(2) : Int32GetDatum(DEF_NORM_METHOD)
+   )); 
+}
+
+/**************debug*************/
+
+typedef struct {
+   char    *w;
+   int2    len;
+   int2    pos;
+   int2    start;
+   int2    finish;
+} DocWord;
+
+static int
+compareDocWord(const void *a, const void *b) {
+   if ( ((DocWord *) a)->pos == ((DocWord *) b)->pos )
+       return 1;
+   return ( ((DocWord *) a)->pos > ((DocWord *) b)->pos ) ? 1 : -1;
+}
+
+
+Datum 
+get_covers(PG_FUNCTION_ARGS) {
+   tsvector     *txt = (tsvector *) PG_DETOAST_DATUM(PG_GETARG_DATUM(0));
+   QUERYTYPE  *query = (QUERYTYPE *) PG_DETOAST_DATUM(PG_GETARG_DATUM(1));
+   WordEntry       *pptr=ARRPTR(txt);
+   int i,dlen=0,j,cur=0,len=0,rlen;
+   DocWord *dw,*dwptr;
+   text    *out;
+   char *cptr;
+   DocRepresentation *doc;
+   int pos=0,p,q,olddwpos=0;
+   int ncover=1;
+
+   doc = get_docrep(txt, query, &rlen);
+
+   if ( !doc ) {
+       out=palloc(VARHDRSZ);
+       VARATT_SIZEP(out) = VARHDRSZ;
+       PG_FREE_IF_COPY(txt,0);
+       PG_FREE_IF_COPY(query,1);
+       PG_RETURN_POINTER(out);
+   }
+
+   for(i=0;isize;i++) {
+       if (!pptr[i].haspos)
+           elog(ERROR,"No pos info");
+        dlen += POSDATALEN(txt,&(pptr[i]));
+   }
+
+   dwptr=dw=palloc(sizeof(DocWord)*dlen);
+   memset(dw,0,sizeof(DocWord)*dlen);
+
+   for(i=0;isize;i++) {
+       WordEntryPos    *posdata = POSDATAPTR(txt,&(pptr[i]));
+       for(j=0;j
+           dw[cur].w=STRPTR(txt)+pptr[i].pos;  
+           dw[cur].len=pptr[i].len;    
+           dw[cur].pos=posdata[j].pos;
+           cur++;
+       }
+       len+=(pptr[i].len + 1) * (int)POSDATALEN(txt,&(pptr[i]));
+   }
+   qsort((void *) dw, dlen, sizeof(DocWord), compareDocWord);
+
+   while( Cover(doc, rlen, query, &pos, &p, &q) ) {
+       dwptr=dw+olddwpos;
+       while(dwptr->pos < p && dwptr-dw
+           dwptr++;
+       olddwpos=dwptr-dw;
+       dwptr->start=ncover;
+       while(dwptr->pos < q+1 && dwptr-dw
+           dwptr++;
+       (dwptr-1)->finish=ncover;
+       len+= 4 /* {}+two spaces */ + 2*16 /*numbers*/;
+       ncover++; 
+   } 
+   
+   out=palloc(VARHDRSZ+len);
+   cptr=((char*)out)+VARHDRSZ;
+   dwptr=dw;
+
+   while( dwptr-dw < dlen) {
+       if ( dwptr->start ) {
+           sprintf(cptr,"{%d ",dwptr->start);
+           cptr=strchr(cptr,'\0');
+       }
+       memcpy(cptr,dwptr->w,dwptr->len);
+       cptr+=dwptr->len;
+       *cptr=' ';
+       cptr++;
+       if ( dwptr->finish ) { 
+           sprintf(cptr,"}%d ",dwptr->finish);
+           cptr=strchr(cptr,'\0');
+       }
+       dwptr++;
+   }   
+
+   VARATT_SIZEP(out) = cptr - ((char*)out);
+   
+   pfree(dw);
+   pfree(doc);
+
+   PG_FREE_IF_COPY(txt,0);
+   PG_FREE_IF_COPY(query,1);
+   PG_RETURN_POINTER(out);
+}
+


diff --git a/contrib/tsearch2/rewrite.c b/contrib/tsearch2/rewrite.c

new file mode 100644 (file)

index 0000000..d5bc0f6


--- /dev/null
+++ b/contrib/tsearch2/rewrite.c
@@ -0,0 +1,292 @@
+/*
+ * Rewrite routines of query tree
+ * Teodor Sigaev 
+ */
+
+#include "postgres.h"
+
+#include 
+
+#include "access/gist.h"
+#include "access/itup.h"
+#include "access/rtree.h"
+#include "utils/elog.h"
+#include "utils/palloc.h"
+#include "utils/array.h"
+#include "utils/builtins.h"
+#include "storage/bufpage.h"
+
+#include "query.h"
+#include "rewrite.h"
+
+typedef struct NODE
+{
+   struct NODE *left;
+   struct NODE *right;
+   ITEM       *valnode;
+}  NODE;
+
+/*
+ * make query tree from plain view of query
+ */
+static NODE *
+maketree(ITEM * in)
+{
+   NODE       *node = (NODE *) palloc(sizeof(NODE));
+
+   node->valnode = in;
+   node->right = node->left = NULL;
+   if (in->type == OPR)
+   {
+       node->right = maketree(in + 1);
+       if (in->val != (int4) '!')
+           node->left = maketree(in + in->left);
+   }
+   return node;
+}
+
+typedef struct
+{
+   ITEM       *ptr;
+   int4        len;
+   int4        cur;
+}  PLAINTREE;
+
+static void
+plainnode(PLAINTREE * state, NODE * node)
+{
+   if (state->cur == state->len)
+   {
+       state->len *= 2;
+       state->ptr = (ITEM *) repalloc((void *) state->ptr, state->len * sizeof(ITEM));
+   }
+   memcpy((void *) &(state->ptr[state->cur]), (void *) node->valnode, sizeof(ITEM));
+   if (node->valnode->type == VAL)
+       state->cur++;
+   else if (node->valnode->val == (int4) '!')
+   {
+       state->ptr[state->cur].left = 1;
+       state->cur++;
+       plainnode(state, node->right);
+   }
+   else
+   {
+       int4        cur = state->cur;
+
+       state->cur++;
+       plainnode(state, node->right);
+       state->ptr[cur].left = state->cur - cur;
+       plainnode(state, node->left);
+   }
+   pfree(node);
+}
+
+/*
+ * make plain view of tree from 'normal' view of tree
+ */
+static ITEM *
+plaintree(NODE * root, int4 *len)
+{
+   PLAINTREE   pl;
+
+   pl.cur = 0;
+   pl.len = 16;
+   if (root && (root->valnode->type == VAL || root->valnode->type == OPR))
+   {
+       pl.ptr = (ITEM *) palloc(pl.len * sizeof(ITEM));
+       plainnode(&pl, root);
+   }
+   else
+       pl.ptr = NULL;
+   *len = pl.cur;
+   return pl.ptr;
+}
+
+static void
+freetree(NODE * node)
+{
+   if (!node)
+       return;
+   if (node->left)
+       freetree(node->left);
+   if (node->right)
+       freetree(node->right);
+   pfree(node);
+}
+
+/*
+ * clean tree for ! operator.
+ * It's usefull for debug, but in
+ * other case, such view is used with search in index.
+ * Operator ! always return TRUE
+ */
+static NODE *
+clean_NOT_intree(NODE * node)
+{
+   if (node->valnode->type == VAL)
+       return node;
+
+   if (node->valnode->val == (int4) '!')
+   {
+       freetree(node);
+       return NULL;
+   }
+
+   /* operator & or | */
+   if (node->valnode->val == (int4) '|')
+   {
+       if ((node->left = clean_NOT_intree(node->left)) == NULL ||
+           (node->right = clean_NOT_intree(node->right)) == NULL)
+       {
+           freetree(node);
+           return NULL;
+       }
+   }
+   else
+   {
+       NODE       *res = node;
+
+       node->left = clean_NOT_intree(node->left);
+       node->right = clean_NOT_intree(node->right);
+       if (node->left == NULL && node->right == NULL)
+       {
+           pfree(node);
+           res = NULL;
+       }
+       else if (node->left == NULL)
+       {
+           res = node->right;
+           pfree(node);
+       }
+       else if (node->right == NULL)
+       {
+           res = node->left;
+           pfree(node);
+       }
+       return res;
+   }
+   return node;
+}
+
+ITEM *
+clean_NOT_v2(ITEM * ptr, int4 *len)
+{
+   NODE       *root = maketree(ptr);
+
+   return plaintree(clean_NOT_intree(root), len);
+}
+
+#define V_UNKNOWN  0
+#define V_TRUE     1
+#define V_FALSE        2
+
+/*
+ * Clean query tree from values which is always in
+ * text (stopword)
+ */
+static NODE *
+clean_fakeval_intree(NODE * node, char *result)
+{
+   char        lresult = V_UNKNOWN,
+               rresult = V_UNKNOWN;
+
+   if (node->valnode->type == VAL)
+       return node;
+   else if (node->valnode->type == VALTRUE)
+   {
+       pfree(node);
+       *result = V_TRUE;
+       return NULL;
+   }
+
+
+   if (node->valnode->val == (int4) '!')
+   {
+       node->right = clean_fakeval_intree(node->right, &rresult);
+       if (!node->right)
+       {
+           *result = (rresult == V_TRUE) ? V_FALSE : V_TRUE;
+           freetree(node);
+           return NULL;
+       }
+   }
+   else if (node->valnode->val == (int4) '|')
+   {
+       NODE       *res = node;
+
+       node->left = clean_fakeval_intree(node->left, &lresult);
+       node->right = clean_fakeval_intree(node->right, &rresult);
+       if (lresult == V_TRUE || rresult == V_TRUE)
+       {
+           freetree(node);
+           *result = V_TRUE;
+           return NULL;
+       }
+       else if (lresult == V_FALSE && rresult == V_FALSE)
+       {
+           freetree(node);
+           *result = V_FALSE;
+           return NULL;
+       }
+       else if (lresult == V_FALSE)
+       {
+           res = node->right;
+           pfree(node);
+       }
+       else if (rresult == V_FALSE)
+       {
+           res = node->left;
+           pfree(node);
+       }
+       return res;
+   }
+   else
+   {
+       NODE       *res = node;
+
+       node->left = clean_fakeval_intree(node->left, &lresult);
+       node->right = clean_fakeval_intree(node->right, &rresult);
+       if (lresult == V_FALSE || rresult == V_FALSE)
+       {
+           freetree(node);
+           *result = V_FALSE;
+           return NULL;
+       }
+       else if (lresult == V_TRUE && rresult == V_TRUE)
+       {
+           freetree(node);
+           *result = V_TRUE;
+           return NULL;
+       }
+       else if (lresult == V_TRUE)
+       {
+           res = node->right;
+           pfree(node);
+       }
+       else if (rresult == V_TRUE)
+       {
+           res = node->left;
+           pfree(node);
+       }
+       return res;
+   }
+   return node;
+}
+
+ITEM *
+clean_fakeval_v2(ITEM * ptr, int4 *len)
+{
+   NODE       *root = maketree(ptr);
+   char        result = V_UNKNOWN;
+   NODE       *resroot;
+
+   resroot = clean_fakeval_intree(root, &result);
+   if (result != V_UNKNOWN)
+   {
+       elog(NOTICE, "Query contains only stopword(s) or doesn't contain lexem(s), ignored");
+       *len = 0;
+       return NULL;
+   }
+
+   return plaintree(resroot, len);
+}


diff --git a/contrib/tsearch2/rewrite.h b/contrib/tsearch2/rewrite.h

new file mode 100644 (file)

index 0000000..d47788a


--- /dev/null
+++ b/contrib/tsearch2/rewrite.h
@@ -0,0 +1,7 @@
+#ifndef __REWRITE_H__
+#define __REWRITE_H__
+
+ITEM      *clean_NOT_v2(ITEM * ptr, int4 *len);
+ITEM      *clean_fakeval_v2(ITEM * ptr, int4 *len);
+
+#endif


diff --git a/contrib/tsearch2/snmap.c b/contrib/tsearch2/snmap.c

new file mode 100644 (file)

index 0000000..fe138ad


--- /dev/null
+++ b/contrib/tsearch2/snmap.c
@@ -0,0 +1,75 @@
+/* 
+ * simple but fast map from str to Oid
+ * Teodor Sigaev 
+ */
+#include 
+#include 
+#include 
+
+#include "postgres.h"
+#include "snmap.h"
+#include "common.h"
+
+static int
+compareSNMapEntry(const void *a, const void *b) {
+   return strcmp( ((SNMapEntry*)a)->key, ((SNMapEntry*)b)->key );
+}
+
+void 
+addSNMap( SNMap *map, char *key, Oid value ) {
+   if (map->len>=map->reallen) {
+       SNMapEntry *tmp;
+       int len = (map->reallen) ? 2*map->reallen : 16;
+       tmp=(SNMapEntry*)realloc(map->list, sizeof(SNMapEntry) * len);
+       if ( !tmp )
+           elog(ERROR, "No memory");
+       map->reallen=len;
+       map->list=tmp;
+   }
+   map->list[ map->len ].key = strdup(key);
+   if ( ! map->list[ map->len ].key )
+       elog(ERROR, "No memory");
+   map->list[ map->len ].value=value;
+   map->len++;
+   if ( map->len>1 ) qsort(map->list, map->len, sizeof(SNMapEntry), compareSNMapEntry);
+}
+
+void 
+addSNMap_t( SNMap *map, text *key, Oid value ) {
+   char *k=text2char( key );
+   addSNMap(map, k, value);
+   pfree(k);
+}
+
+Oid 
+findSNMap( SNMap *map, char *key ) {
+   SNMapEntry *ptr;
+   SNMapEntry ks = {key, 0};
+   if ( map->len==0 || !map->list )
+       return 0;   
+   ptr = (SNMapEntry*) bsearch(&ks, map->list, map->len, sizeof(SNMapEntry), compareSNMapEntry);
+   return (ptr) ? ptr->value : 0;
+}
+
+Oid  
+findSNMap_t( SNMap *map, text *key ) {
+   char *k=text2char(key);
+   int res;
+   res= findSNMap(map, k);
+   pfree(k);
+   return res;
+}
+
+void freeSNMap( SNMap *map ) {
+   SNMapEntry *entry=map->list;
+   if ( map->list ) {
+       while( map->len ) {
+           if ( entry->key ) free(entry->key);
+           entry++; map->len--;
+       }
+       free( map->list );
+   }
+   memset(map,0,sizeof(SNMap));
+}
+
+


diff --git a/contrib/tsearch2/snmap.h b/contrib/tsearch2/snmap.h

new file mode 100644 (file)

index 0000000..b485601


--- /dev/null
+++ b/contrib/tsearch2/snmap.h
@@ -0,0 +1,23 @@
+#ifndef __SNMAP_H__
+#define __SNMAP_H__
+
+#include "postgres.h"
+
+typedef struct {
+   char    *key;
+   Oid value;
+} SNMapEntry;
+
+typedef struct {
+   int len;
+   int reallen;
+   SNMapEntry  *list;
+} SNMap;
+
+void addSNMap( SNMap *map, char *key, Oid value );
+void addSNMap_t( SNMap *map, text *key, Oid value );
+Oid findSNMap( SNMap *map, char *key );
+Oid findSNMap_t( SNMap *map, text *key );
+void freeSNMap( SNMap *map );
+
+#endif


diff --git a/contrib/tsearch2/snowball/api.c b/contrib/tsearch2/snowball/api.c

new file mode 100644 (file)

index 0000000..c9019ce


--- /dev/null
+++ b/contrib/tsearch2/snowball/api.c
@@ -0,0 +1,48 @@
+
+#include "header.h"
+
+extern struct SN_env * SN_create_env(int S_size, int I_size, int B_size)
+{   struct SN_env * z = (struct SN_env *) calloc(1, sizeof(struct SN_env));
+    z->p = create_s();
+    if (S_size)
+    {   z->S = (symbol * *) calloc(S_size, sizeof(symbol *));
+        {   int i;
+            for (i = 0; i < S_size; i++) z->S[i] = create_s();
+        }
+        z->S_size = S_size;
+    }
+
+    if (I_size)
+    {   z->I = (int *) calloc(I_size, sizeof(int));
+        z->I_size = I_size;
+    }
+
+    if (B_size)
+    {   z->B = (symbol *) calloc(B_size, sizeof(symbol));
+        z->B_size = B_size;
+    }
+
+    return z;
+}
+
+extern void SN_close_env(struct SN_env * z)
+{
+    if (z->S_size)
+    {
+        {   int i;
+            for (i = 0; i < z->S_size; i++) lose_s(z->S[i]);
+        }
+        free(z->S);
+    }
+    if (z->I_size) free(z->I);
+    if (z->B_size) free(z->B);
+    if (z->p) lose_s(z->p);
+    free(z);
+}
+
+extern void SN_set_current(struct SN_env * z, int size, const symbol * s)
+{
+    replace_s(z, 0, z->l, size, s);
+    z->c = 0;
+}
+


diff --git a/contrib/tsearch2/snowball/api.h b/contrib/tsearch2/snowball/api.h

new file mode 100644 (file)

index 0000000..3e8b6e1


--- /dev/null
+++ b/contrib/tsearch2/snowball/api.h
@@ -0,0 +1,27 @@
+
+typedef unsigned char symbol;
+
+/* Or replace 'char' above with 'short' for 16 bit characters.
+
+   More precisely, replace 'char' with whatever type guarantees the
+   character width you need. Note however that sizeof(symbol) should divide
+   HEAD, defined in header.h as 2*sizeof(int), without remainder, otherwise
+   there is an alignment problem. In the unlikely event of a problem here,
+   consult Martin Porter.
+
+*/
+
+struct SN_env {
+    symbol * p;
+    int c; int a; int l; int lb; int bra; int ket;
+    int S_size; int I_size; int B_size;
+    symbol * * S;
+    int * I;
+    symbol * B;
+};
+
+extern struct SN_env * SN_create_env(int S_size, int I_size, int B_size);
+extern void SN_close_env(struct SN_env * z);
+
+extern void SN_set_current(struct SN_env * z, int size, const symbol * s);
+


diff --git a/contrib/tsearch2/snowball/english_stem.c b/contrib/tsearch2/snowball/english_stem.c

new file mode 100644 (file)

index 0000000..6715c7c


--- /dev/null
+++ b/contrib/tsearch2/snowball/english_stem.c
@@ -0,0 +1,894 @@
+
+/* This file was generated automatically by the Snowball to ANSI C compiler */
+
+#include "header.h"
+
+extern int english_stem(struct SN_env * z);
+static int r_exception2(struct SN_env * z);
+static int r_exception1(struct SN_env * z);
+static int r_Step_5(struct SN_env * z);
+static int r_Step_4(struct SN_env * z);
+static int r_Step_3(struct SN_env * z);
+static int r_Step_2(struct SN_env * z);
+static int r_Step_1c(struct SN_env * z);
+static int r_Step_1b(struct SN_env * z);
+static int r_Step_1a(struct SN_env * z);
+static int r_R2(struct SN_env * z);
+static int r_R1(struct SN_env * z);
+static int r_shortv(struct SN_env * z);
+static int r_mark_regions(struct SN_env * z);
+static int r_postlude(struct SN_env * z);
+static int r_prelude(struct SN_env * z);
+
+extern struct SN_env * english_create_env(void);
+extern void english_close_env(struct SN_env * z);
+
+static symbol s_0_0[5] = { 'g', 'e', 'n', 'e', 'r' };
+
+static struct among a_0[1] =
+{
+/*  0 */ { 5, s_0_0, -1, -1, 0}
+};
+
+static symbol s_1_0[3] = { 'i', 'e', 'd' };
+static symbol s_1_1[1] = { 's' };
+static symbol s_1_2[3] = { 'i', 'e', 's' };
+static symbol s_1_3[4] = { 's', 's', 'e', 's' };
+static symbol s_1_4[2] = { 's', 's' };
+static symbol s_1_5[2] = { 'u', 's' };
+
+static struct among a_1[6] =
+{
+/*  0 */ { 3, s_1_0, -1, 2, 0},
+/*  1 */ { 1, s_1_1, -1, 3, 0},
+/*  2 */ { 3, s_1_2, 1, 2, 0},
+/*  3 */ { 4, s_1_3, 1, 1, 0},
+/*  4 */ { 2, s_1_4, 1, -1, 0},
+/*  5 */ { 2, s_1_5, 1, -1, 0}
+};
+
+static symbol s_2_1[2] = { 'b', 'b' };
+static symbol s_2_2[2] = { 'd', 'd' };
+static symbol s_2_3[2] = { 'f', 'f' };
+static symbol s_2_4[2] = { 'g', 'g' };
+static symbol s_2_5[2] = { 'b', 'l' };
+static symbol s_2_6[2] = { 'm', 'm' };
+static symbol s_2_7[2] = { 'n', 'n' };
+static symbol s_2_8[2] = { 'p', 'p' };
+static symbol s_2_9[2] = { 'r', 'r' };
+static symbol s_2_10[2] = { 'a', 't' };
+static symbol s_2_11[2] = { 't', 't' };
+static symbol s_2_12[2] = { 'i', 'z' };
+
+static struct among a_2[13] =
+{
+/*  0 */ { 0, 0, -1, 3, 0},
+/*  1 */ { 2, s_2_1, 0, 2, 0},
+/*  2 */ { 2, s_2_2, 0, 2, 0},
+/*  3 */ { 2, s_2_3, 0, 2, 0},
+/*  4 */ { 2, s_2_4, 0, 2, 0},
+/*  5 */ { 2, s_2_5, 0, 1, 0},
+/*  6 */ { 2, s_2_6, 0, 2, 0},
+/*  7 */ { 2, s_2_7, 0, 2, 0},
+/*  8 */ { 2, s_2_8, 0, 2, 0},
+/*  9 */ { 2, s_2_9, 0, 2, 0},
+/* 10 */ { 2, s_2_10, 0, 1, 0},
+/* 11 */ { 2, s_2_11, 0, 2, 0},
+/* 12 */ { 2, s_2_12, 0, 1, 0}
+};
+
+static symbol s_3_0[2] = { 'e', 'd' };
+static symbol s_3_1[3] = { 'e', 'e', 'd' };
+static symbol s_3_2[3] = { 'i', 'n', 'g' };
+static symbol s_3_3[4] = { 'e', 'd', 'l', 'y' };
+static symbol s_3_4[5] = { 'e', 'e', 'd', 'l', 'y' };
+static symbol s_3_5[5] = { 'i', 'n', 'g', 'l', 'y' };
+
+static struct among a_3[6] =
+{
+/*  0 */ { 2, s_3_0, -1, 2, 0},
+/*  1 */ { 3, s_3_1, 0, 1, 0},
+/*  2 */ { 3, s_3_2, -1, 2, 0},
+/*  3 */ { 4, s_3_3, -1, 2, 0},
+/*  4 */ { 5, s_3_4, 3, 1, 0},
+/*  5 */ { 5, s_3_5, -1, 2, 0}
+};
+
+static symbol s_4_0[4] = { 'a', 'n', 'c', 'i' };
+static symbol s_4_1[4] = { 'e', 'n', 'c', 'i' };
+static symbol s_4_2[3] = { 'o', 'g', 'i' };
+static symbol s_4_3[2] = { 'l', 'i' };
+static symbol s_4_4[3] = { 'b', 'l', 'i' };
+static symbol s_4_5[4] = { 'a', 'b', 'l', 'i' };
+static symbol s_4_6[4] = { 'a', 'l', 'l', 'i' };
+static symbol s_4_7[5] = { 'f', 'u', 'l', 'l', 'i' };
+static symbol s_4_8[6] = { 'l', 'e', 's', 's', 'l', 'i' };
+static symbol s_4_9[5] = { 'o', 'u', 's', 'l', 'i' };
+static symbol s_4_10[5] = { 'e', 'n', 't', 'l', 'i' };
+static symbol s_4_11[5] = { 'a', 'l', 'i', 't', 'i' };
+static symbol s_4_12[6] = { 'b', 'i', 'l', 'i', 't', 'i' };
+static symbol s_4_13[5] = { 'i', 'v', 'i', 't', 'i' };
+static symbol s_4_14[6] = { 't', 'i', 'o', 'n', 'a', 'l' };
+static symbol s_4_15[7] = { 'a', 't', 'i', 'o', 'n', 'a', 'l' };
+static symbol s_4_16[5] = { 'a', 'l', 'i', 's', 'm' };
+static symbol s_4_17[5] = { 'a', 't', 'i', 'o', 'n' };
+static symbol s_4_18[7] = { 'i', 'z', 'a', 't', 'i', 'o', 'n' };
+static symbol s_4_19[4] = { 'i', 'z', 'e', 'r' };
+static symbol s_4_20[4] = { 'a', 't', 'o', 'r' };
+static symbol s_4_21[7] = { 'i', 'v', 'e', 'n', 'e', 's', 's' };
+static symbol s_4_22[7] = { 'f', 'u', 'l', 'n', 'e', 's', 's' };
+static symbol s_4_23[7] = { 'o', 'u', 's', 'n', 'e', 's', 's' };
+
+static struct among a_4[24] =
+{
+/*  0 */ { 4, s_4_0, -1, 3, 0},
+/*  1 */ { 4, s_4_1, -1, 2, 0},
+/*  2 */ { 3, s_4_2, -1, 13, 0},
+/*  3 */ { 2, s_4_3, -1, 16, 0},
+/*  4 */ { 3, s_4_4, 3, 12, 0},
+/*  5 */ { 4, s_4_5, 4, 4, 0},
+/*  6 */ { 4, s_4_6, 3, 8, 0},
+/*  7 */ { 5, s_4_7, 3, 14, 0},
+/*  8 */ { 6, s_4_8, 3, 15, 0},
+/*  9 */ { 5, s_4_9, 3, 10, 0},
+/* 10 */ { 5, s_4_10, 3, 5, 0},
+/* 11 */ { 5, s_4_11, -1, 8, 0},
+/* 12 */ { 6, s_4_12, -1, 12, 0},
+/* 13 */ { 5, s_4_13, -1, 11, 0},
+/* 14 */ { 6, s_4_14, -1, 1, 0},
+/* 15 */ { 7, s_4_15, 14, 7, 0},
+/* 16 */ { 5, s_4_16, -1, 8, 0},
+/* 17 */ { 5, s_4_17, -1, 7, 0},
+/* 18 */ { 7, s_4_18, 17, 6, 0},
+/* 19 */ { 4, s_4_19, -1, 6, 0},
+/* 20 */ { 4, s_4_20, -1, 7, 0},
+/* 21 */ { 7, s_4_21, -1, 11, 0},
+/* 22 */ { 7, s_4_22, -1, 9, 0},
+/* 23 */ { 7, s_4_23, -1, 10, 0}
+};
+
+static symbol s_5_0[5] = { 'i', 'c', 'a', 't', 'e' };
+static symbol s_5_1[5] = { 'a', 't', 'i', 'v', 'e' };
+static symbol s_5_2[5] = { 'a', 'l', 'i', 'z', 'e' };
+static symbol s_5_3[5] = { 'i', 'c', 'i', 't', 'i' };
+static symbol s_5_4[4] = { 'i', 'c', 'a', 'l' };
+static symbol s_5_5[6] = { 't', 'i', 'o', 'n', 'a', 'l' };
+static symbol s_5_6[7] = { 'a', 't', 'i', 'o', 'n', 'a', 'l' };
+static symbol s_5_7[3] = { 'f', 'u', 'l' };
+static symbol s_5_8[4] = { 'n', 'e', 's', 's' };
+
+static struct among a_5[9] =
+{
+/*  0 */ { 5, s_5_0, -1, 4, 0},
+/*  1 */ { 5, s_5_1, -1, 6, 0},
+/*  2 */ { 5, s_5_2, -1, 3, 0},
+/*  3 */ { 5, s_5_3, -1, 4, 0},
+/*  4 */ { 4, s_5_4, -1, 4, 0},
+/*  5 */ { 6, s_5_5, -1, 1, 0},
+/*  6 */ { 7, s_5_6, 5, 2, 0},
+/*  7 */ { 3, s_5_7, -1, 5, 0},
+/*  8 */ { 4, s_5_8, -1, 5, 0}
+};
+
+static symbol s_6_0[2] = { 'i', 'c' };
+static symbol s_6_1[4] = { 'a', 'n', 'c', 'e' };
+static symbol s_6_2[4] = { 'e', 'n', 'c', 'e' };
+static symbol s_6_3[4] = { 'a', 'b', 'l', 'e' };
+static symbol s_6_4[4] = { 'i', 'b', 'l', 'e' };
+static symbol s_6_5[3] = { 'a', 't', 'e' };
+static symbol s_6_6[3] = { 'i', 'v', 'e' };
+static symbol s_6_7[3] = { 'i', 'z', 'e' };
+static symbol s_6_8[3] = { 'i', 't', 'i' };
+static symbol s_6_9[2] = { 'a', 'l' };
+static symbol s_6_10[3] = { 'i', 's', 'm' };
+static symbol s_6_11[3] = { 'i', 'o', 'n' };
+static symbol s_6_12[2] = { 'e', 'r' };
+static symbol s_6_13[3] = { 'o', 'u', 's' };
+static symbol s_6_14[3] = { 'a', 'n', 't' };
+static symbol s_6_15[3] = { 'e', 'n', 't' };
+static symbol s_6_16[4] = { 'm', 'e', 'n', 't' };
+static symbol s_6_17[5] = { 'e', 'm', 'e', 'n', 't' };
+
+static struct among a_6[18] =
+{
+/*  0 */ { 2, s_6_0, -1, 1, 0},
+/*  1 */ { 4, s_6_1, -1, 1, 0},
+/*  2 */ { 4, s_6_2, -1, 1, 0},
+/*  3 */ { 4, s_6_3, -1, 1, 0},
+/*  4 */ { 4, s_6_4, -1, 1, 0},
+/*  5 */ { 3, s_6_5, -1, 1, 0},
+/*  6 */ { 3, s_6_6, -1, 1, 0},
+/*  7 */ { 3, s_6_7, -1, 1, 0},
+/*  8 */ { 3, s_6_8, -1, 1, 0},
+/*  9 */ { 2, s_6_9, -1, 1, 0},
+/* 10 */ { 3, s_6_10, -1, 1, 0},
+/* 11 */ { 3, s_6_11, -1, 2, 0},
+/* 12 */ { 2, s_6_12, -1, 1, 0},
+/* 13 */ { 3, s_6_13, -1, 1, 0},
+/* 14 */ { 3, s_6_14, -1, 1, 0},
+/* 15 */ { 3, s_6_15, -1, 1, 0},
+/* 16 */ { 4, s_6_16, 15, 1, 0},
+/* 17 */ { 5, s_6_17, 16, 1, 0}
+};
+
+static symbol s_7_0[1] = { 'e' };
+static symbol s_7_1[1] = { 'l' };
+
+static struct among a_7[2] =
+{
+/*  0 */ { 1, s_7_0, -1, 1, 0},
+/*  1 */ { 1, s_7_1, -1, 2, 0}
+};
+
+static symbol s_8_0[7] = { 's', 'u', 'c', 'c', 'e', 'e', 'd' };
+static symbol s_8_1[7] = { 'p', 'r', 'o', 'c', 'e', 'e', 'd' };
+static symbol s_8_2[6] = { 'e', 'x', 'c', 'e', 'e', 'd' };
+static symbol s_8_3[7] = { 'c', 'a', 'n', 'n', 'i', 'n', 'g' };
+static symbol s_8_4[6] = { 'i', 'n', 'n', 'i', 'n', 'g' };
+static symbol s_8_5[7] = { 'e', 'a', 'r', 'r', 'i', 'n', 'g' };
+static symbol s_8_6[7] = { 'h', 'e', 'r', 'r', 'i', 'n', 'g' };
+static symbol s_8_7[6] = { 'o', 'u', 't', 'i', 'n', 'g' };
+
+static struct among a_8[8] =
+{
+/*  0 */ { 7, s_8_0, -1, -1, 0},
+/*  1 */ { 7, s_8_1, -1, -1, 0},
+/*  2 */ { 6, s_8_2, -1, -1, 0},
+/*  3 */ { 7, s_8_3, -1, -1, 0},
+/*  4 */ { 6, s_8_4, -1, -1, 0},
+/*  5 */ { 7, s_8_5, -1, -1, 0},
+/*  6 */ { 7, s_8_6, -1, -1, 0},
+/*  7 */ { 6, s_8_7, -1, -1, 0}
+};
+
+static symbol s_9_0[5] = { 'a', 'n', 'd', 'e', 's' };
+static symbol s_9_1[5] = { 'a', 't', 'l', 'a', 's' };
+static symbol s_9_2[4] = { 'b', 'i', 'a', 's' };
+static symbol s_9_3[6] = { 'c', 'o', 's', 'm', 'o', 's' };
+static symbol s_9_4[5] = { 'd', 'y', 'i', 'n', 'g' };
+static symbol s_9_5[5] = { 'e', 'a', 'r', 'l', 'y' };
+static symbol s_9_6[6] = { 'g', 'e', 'n', 't', 'l', 'y' };
+static symbol s_9_7[4] = { 'h', 'o', 'w', 'e' };
+static symbol s_9_8[4] = { 'i', 'd', 'l', 'y' };
+static symbol s_9_9[5] = { 'l', 'y', 'i', 'n', 'g' };
+static symbol s_9_10[4] = { 'n', 'e', 'w', 's' };
+static symbol s_9_11[4] = { 'o', 'n', 'l', 'y' };
+static symbol s_9_12[6] = { 's', 'i', 'n', 'g', 'l', 'y' };
+static symbol s_9_13[5] = { 's', 'k', 'i', 'e', 's' };
+static symbol s_9_14[4] = { 's', 'k', 'i', 's' };
+static symbol s_9_15[3] = { 's', 'k', 'y' };
+static symbol s_9_16[5] = { 't', 'y', 'i', 'n', 'g' };
+static symbol s_9_17[4] = { 'u', 'g', 'l', 'y' };
+
+static struct among a_9[18] =
+{
+/*  0 */ { 5, s_9_0, -1, -1, 0},
+/*  1 */ { 5, s_9_1, -1, -1, 0},
+/*  2 */ { 4, s_9_2, -1, -1, 0},
+/*  3 */ { 6, s_9_3, -1, -1, 0},
+/*  4 */ { 5, s_9_4, -1, 3, 0},
+/*  5 */ { 5, s_9_5, -1, 9, 0},
+/*  6 */ { 6, s_9_6, -1, 7, 0},
+/*  7 */ { 4, s_9_7, -1, -1, 0},
+/*  8 */ { 4, s_9_8, -1, 6, 0},
+/*  9 */ { 5, s_9_9, -1, 4, 0},
+/* 10 */ { 4, s_9_10, -1, -1, 0},
+/* 11 */ { 4, s_9_11, -1, 10, 0},
+/* 12 */ { 6, s_9_12, -1, 11, 0},
+/* 13 */ { 5, s_9_13, -1, 2, 0},
+/* 14 */ { 4, s_9_14, -1, 1, 0},
+/* 15 */ { 3, s_9_15, -1, -1, 0},
+/* 16 */ { 5, s_9_16, -1, 5, 0},
+/* 17 */ { 4, s_9_17, -1, 8, 0}
+};
+
+static unsigned char g_v[] = { 17, 65, 16, 1 };
+
+static unsigned char g_v_WXY[] = { 1, 17, 65, 208, 1 };
+
+static unsigned char g_valid_LI[] = { 55, 141, 2 };
+
+static symbol s_0[] = { 'y' };
+static symbol s_1[] = { 'Y' };
+static symbol s_2[] = { 'y' };
+static symbol s_3[] = { 'Y' };
+static symbol s_4[] = { 's', 's' };
+static symbol s_5[] = { 'i', 'e' };
+static symbol s_6[] = { 'i' };
+static symbol s_7[] = { 'e', 'e' };
+static symbol s_8[] = { 'e' };
+static symbol s_9[] = { 'e' };
+static symbol s_10[] = { 'y' };
+static symbol s_11[] = { 'Y' };
+static symbol s_12[] = { 'i' };
+static symbol s_13[] = { 't', 'i', 'o', 'n' };
+static symbol s_14[] = { 'e', 'n', 'c', 'e' };
+static symbol s_15[] = { 'a', 'n', 'c', 'e' };
+static symbol s_16[] = { 'a', 'b', 'l', 'e' };
+static symbol s_17[] = { 'e', 'n', 't' };
+static symbol s_18[] = { 'i', 'z', 'e' };
+static symbol s_19[] = { 'a', 't', 'e' };
+static symbol s_20[] = { 'a', 'l' };
+static symbol s_21[] = { 'f', 'u', 'l' };
+static symbol s_22[] = { 'o', 'u', 's' };
+static symbol s_23[] = { 'i', 'v', 'e' };
+static symbol s_24[] = { 'b', 'l', 'e' };
+static symbol s_25[] = { 'l' };
+static symbol s_26[] = { 'o', 'g' };
+static symbol s_27[] = { 'f', 'u', 'l' };
+static symbol s_28[] = { 'l', 'e', 's', 's' };
+static symbol s_29[] = { 't', 'i', 'o', 'n' };
+static symbol s_30[] = { 'a', 't', 'e' };
+static symbol s_31[] = { 'a', 'l' };
+static symbol s_32[] = { 'i', 'c' };
+static symbol s_33[] = { 's' };
+static symbol s_34[] = { 't' };
+static symbol s_35[] = { 'l' };
+static symbol s_36[] = { 's', 'k', 'i' };
+static symbol s_37[] = { 's', 'k', 'y' };
+static symbol s_38[] = { 'd', 'i', 'e' };
+static symbol s_39[] = { 'l', 'i', 'e' };
+static symbol s_40[] = { 't', 'i', 'e' };
+static symbol s_41[] = { 'i', 'd', 'l' };
+static symbol s_42[] = { 'g', 'e', 'n', 't', 'l' };
+static symbol s_43[] = { 'u', 'g', 'l', 'i' };
+static symbol s_44[] = { 'e', 'a', 'r', 'l', 'i' };
+static symbol s_45[] = { 'o', 'n', 'l', 'i' };
+static symbol s_46[] = { 's', 'i', 'n', 'g', 'l' };
+static symbol s_47[] = { 'Y' };
+static symbol s_48[] = { 'y' };
+
+static int r_prelude(struct SN_env * z) {
+    z->B[0] = 0; /* unset Y_found, line 24 */
+    {   int c = z->c; /* do, line 25 */
+        z->bra = z->c; /* [, line 25 */
+        if (!(eq_s(z, 1, s_0))) goto lab0;
+        z->ket = z->c; /* ], line 25 */
+        if (!(in_grouping(z, g_v, 97, 121))) goto lab0;
+        slice_from_s(z, 1, s_1); /* <-, line 25 */
+        z->B[0] = 1; /* set Y_found, line 25 */
+    lab0:
+        z->c = c;
+    }
+    {   int c = z->c; /* do, line 26 */
+        while(1) { /* repeat, line 26 */
+            int c = z->c;
+            while(1) { /* goto, line 26 */
+                int c = z->c;
+                if (!(in_grouping(z, g_v, 97, 121))) goto lab3;
+                z->bra = z->c; /* [, line 26 */
+                if (!(eq_s(z, 1, s_2))) goto lab3;
+                z->ket = z->c; /* ], line 26 */
+                z->c = c;
+                break;
+            lab3:
+                z->c = c;
+                if (z->c >= z->l) goto lab2;
+                z->c++;
+            }
+            slice_from_s(z, 1, s_3); /* <-, line 26 */
+            z->B[0] = 1; /* set Y_found, line 26 */
+            continue;
+        lab2:
+            z->c = c;
+            break;
+        }
+    lab1:
+        z->c = c;
+    }
+    return 1;
+}
+
+static int r_mark_regions(struct SN_env * z) {
+    z->I[0] = z->l;
+    z->I[1] = z->l;
+    {   int c = z->c; /* do, line 32 */
+        {   int c = z->c; /* or, line 36 */
+            if (!(find_among(z, a_0, 1))) goto lab2; /* among, line 33 */
+            goto lab1;
+        lab2:
+            z->c = c;
+            while(1) { /* gopast, line 36 */
+                if (!(in_grouping(z, g_v, 97, 121))) goto lab3;
+                break;
+            lab3:
+                if (z->c >= z->l) goto lab0;
+                z->c++;
+            }
+            while(1) { /* gopast, line 36 */
+                if (!(out_grouping(z, g_v, 97, 121))) goto lab4;
+                break;
+            lab4:
+                if (z->c >= z->l) goto lab0;
+                z->c++;
+            }
+        }
+    lab1:
+        z->I[0] = z->c; /* setmark p1, line 37 */
+        while(1) { /* gopast, line 38 */
+            if (!(in_grouping(z, g_v, 97, 121))) goto lab5;
+            break;
+        lab5:
+            if (z->c >= z->l) goto lab0;
+            z->c++;
+        }
+        while(1) { /* gopast, line 38 */
+            if (!(out_grouping(z, g_v, 97, 121))) goto lab6;
+            break;
+        lab6:
+            if (z->c >= z->l) goto lab0;
+            z->c++;
+        }
+        z->I[1] = z->c; /* setmark p2, line 38 */
+    lab0:
+        z->c = c;
+    }
+    return 1;
+}
+
+static int r_shortv(struct SN_env * z) {
+    {   int m = z->l - z->c; /* or, line 46 */
+        if (!(out_grouping_b(z, g_v_WXY, 89, 121))) goto lab1;
+        if (!(in_grouping_b(z, g_v, 97, 121))) goto lab1;
+        if (!(out_grouping_b(z, g_v, 97, 121))) goto lab1;
+        goto lab0;
+    lab1:
+        z->c = z->l - m;
+        if (!(out_grouping_b(z, g_v, 97, 121))) return 0;
+        if (!(in_grouping_b(z, g_v, 97, 121))) return 0;
+        if (z->c > z->lb) return 0; /* atlimit, line 47 */
+    }
+lab0:
+    return 1;
+}
+
+static int r_R1(struct SN_env * z) {
+    if (!(z->I[0] <= z->c)) return 0;
+    return 1;
+}
+
+static int r_R2(struct SN_env * z) {
+    if (!(z->I[1] <= z->c)) return 0;
+    return 1;
+}
+
+static int r_Step_1a(struct SN_env * z) {
+    int among_var;
+    z->ket = z->c; /* [, line 54 */
+    among_var = find_among_b(z, a_1, 6); /* substring, line 54 */
+    if (!(among_var)) return 0;
+    z->bra = z->c; /* ], line 54 */
+    switch(among_var) {
+        case 0: return 0;
+        case 1:
+            slice_from_s(z, 2, s_4); /* <-, line 55 */
+            break;
+        case 2:
+            {   int m = z->l - z->c; /* or, line 57 */
+                if (z->c <= z->lb) goto lab1;
+                z->c--; /* next, line 57 */
+                if (z->c > z->lb) goto lab1; /* atlimit, line 57 */
+                slice_from_s(z, 2, s_5); /* <-, line 57 */
+                goto lab0;
+            lab1:
+                z->c = z->l - m;
+                slice_from_s(z, 1, s_6); /* <-, line 57 */
+            }
+        lab0:
+            break;
+        case 3:
+            if (z->c <= z->lb) return 0;
+            z->c--; /* next, line 58 */
+            while(1) { /* gopast, line 58 */
+                if (!(in_grouping_b(z, g_v, 97, 121))) goto lab2;
+                break;
+            lab2:
+                if (z->c <= z->lb) return 0;
+                z->c--;
+            }
+            slice_del(z); /* delete, line 58 */
+            break;
+    }
+    return 1;
+}
+
+static int r_Step_1b(struct SN_env * z) {
+    int among_var;
+    z->ket = z->c; /* [, line 64 */
+    among_var = find_among_b(z, a_3, 6); /* substring, line 64 */
+    if (!(among_var)) return 0;
+    z->bra = z->c; /* ], line 64 */
+    switch(among_var) {
+        case 0: return 0;
+        case 1:
+            if (!r_R1(z)) return 0; /* call R1, line 66 */
+            slice_from_s(z, 2, s_7); /* <-, line 66 */
+            break;
+        case 2:
+            {   int m_test = z->l - z->c; /* test, line 69 */
+                while(1) { /* gopast, line 69 */
+                    if (!(in_grouping_b(z, g_v, 97, 121))) goto lab0;
+                    break;
+                lab0:
+                    if (z->c <= z->lb) return 0;
+                    z->c--;
+                }
+                z->c = z->l - m_test;
+            }
+            slice_del(z); /* delete, line 69 */
+            {   int m_test = z->l - z->c; /* test, line 70 */
+                among_var = find_among_b(z, a_2, 13); /* substring, line 70 */
+                if (!(among_var)) return 0;
+                z->c = z->l - m_test;
+            }
+            switch(among_var) {
+                case 0: return 0;
+                case 1:
+                    {   int c = z->c;
+                        insert_s(z, z->c, z->c, 1, s_8); /* <+, line 72 */
+                        z->c = c;
+                    }
+                    break;
+                case 2:
+                    z->ket = z->c; /* [, line 75 */
+                    if (z->c <= z->lb) return 0;
+                    z->c--; /* next, line 75 */
+                    z->bra = z->c; /* ], line 75 */
+                    slice_del(z); /* delete, line 75 */
+                    break;
+                case 3:
+                    if (z->c != z->I[0]) return 0; /* atmark, line 76 */
+                    {   int m_test = z->l - z->c; /* test, line 76 */
+                        if (!r_shortv(z)) return 0; /* call shortv, line 76 */
+                        z->c = z->l - m_test;
+                    }
+                    {   int c = z->c;
+                        insert_s(z, z->c, z->c, 1, s_9); /* <+, line 76 */
+                        z->c = c;
+                    }
+                    break;
+            }
+            break;
+    }
+    return 1;
+}
+
+static int r_Step_1c(struct SN_env * z) {
+    z->ket = z->c; /* [, line 83 */
+    {   int m = z->l - z->c; /* or, line 83 */
+        if (!(eq_s_b(z, 1, s_10))) goto lab1;
+        goto lab0;
+    lab1:
+        z->c = z->l - m;
+        if (!(eq_s_b(z, 1, s_11))) return 0;
+    }
+lab0:
+    z->bra = z->c; /* ], line 83 */
+    if (!(out_grouping_b(z, g_v, 97, 121))) return 0;
+    {   int m = z->l - z->c; /* not, line 84 */
+        if (z->c > z->lb) goto lab2; /* atlimit, line 84 */
+        return 0;
+    lab2:
+        z->c = z->l - m;
+    }
+    slice_from_s(z, 1, s_12); /* <-, line 85 */
+    return 1;
+}
+
+static int r_Step_2(struct SN_env * z) {
+    int among_var;
+    z->ket = z->c; /* [, line 89 */
+    among_var = find_among_b(z, a_4, 24); /* substring, line 89 */
+    if (!(among_var)) return 0;
+    z->bra = z->c; /* ], line 89 */
+    if (!r_R1(z)) return 0; /* call R1, line 89 */
+    switch(among_var) {
+        case 0: return 0;
+        case 1:
+            slice_from_s(z, 4, s_13); /* <-, line 90 */
+            break;
+        case 2:
+            slice_from_s(z, 4, s_14); /* <-, line 91 */
+            break;
+        case 3:
+            slice_from_s(z, 4, s_15); /* <-, line 92 */
+            break;
+        case 4:
+            slice_from_s(z, 4, s_16); /* <-, line 93 */
+            break;
+        case 5:
+            slice_from_s(z, 3, s_17); /* <-, line 94 */
+            break;
+        case 6:
+            slice_from_s(z, 3, s_18); /* <-, line 96 */
+            break;
+        case 7:
+            slice_from_s(z, 3, s_19); /* <-, line 98 */
+            break;
+        case 8:
+            slice_from_s(z, 2, s_20); /* <-, line 100 */
+            break;
+        case 9:
+            slice_from_s(z, 3, s_21); /* <-, line 101 */
+            break;
+        case 10:
+            slice_from_s(z, 3, s_22); /* <-, line 103 */
+            break;
+        case 11:
+            slice_from_s(z, 3, s_23); /* <-, line 105 */
+            break;
+        case 12:
+            slice_from_s(z, 3, s_24); /* <-, line 107 */
+            break;
+        case 13:
+            if (!(eq_s_b(z, 1, s_25))) return 0;
+            slice_from_s(z, 2, s_26); /* <-, line 108 */
+            break;
+        case 14:
+            slice_from_s(z, 3, s_27); /* <-, line 109 */
+            break;
+        case 15:
+            slice_from_s(z, 4, s_28); /* <-, line 110 */
+            break;
+        case 16:
+            if (!(in_grouping_b(z, g_valid_LI, 99, 116))) return 0;
+            slice_del(z); /* delete, line 111 */
+            break;
+    }
+    return 1;
+}
+
+static int r_Step_3(struct SN_env * z) {
+    int among_var;
+    z->ket = z->c; /* [, line 116 */
+    among_var = find_among_b(z, a_5, 9); /* substring, line 116 */
+    if (!(among_var)) return 0;
+    z->bra = z->c; /* ], line 116 */
+    if (!r_R1(z)) return 0; /* call R1, line 116 */
+    switch(among_var) {
+        case 0: return 0;
+        case 1:
+            slice_from_s(z, 4, s_29); /* <-, line 117 */
+            break;
+        case 2:
+            slice_from_s(z, 3, s_30); /* <-, line 118 */
+            break;
+        case 3:
+            slice_from_s(z, 2, s_31); /* <-, line 119 */
+            break;
+        case 4:
+            slice_from_s(z, 2, s_32); /* <-, line 121 */
+            break;
+        case 5:
+            slice_del(z); /* delete, line 123 */
+            break;
+        case 6:
+            if (!r_R2(z)) return 0; /* call R2, line 125 */
+            slice_del(z); /* delete, line 125 */
+            break;
+    }
+    return 1;
+}
+
+static int r_Step_4(struct SN_env * z) {
+    int among_var;
+    z->ket = z->c; /* [, line 130 */
+    among_var = find_among_b(z, a_6, 18); /* substring, line 130 */
+    if (!(among_var)) return 0;
+    z->bra = z->c; /* ], line 130 */
+    if (!r_R2(z)) return 0; /* call R2, line 130 */
+    switch(among_var) {
+        case 0: return 0;
+        case 1:
+            slice_del(z); /* delete, line 133 */
+            break;
+        case 2:
+            {   int m = z->l - z->c; /* or, line 134 */
+                if (!(eq_s_b(z, 1, s_33))) goto lab1;
+                goto lab0;
+            lab1:
+                z->c = z->l - m;
+                if (!(eq_s_b(z, 1, s_34))) return 0;
+            }
+        lab0:
+            slice_del(z); /* delete, line 134 */
+            break;
+    }
+    return 1;
+}
+
+static int r_Step_5(struct SN_env * z) {
+    int among_var;
+    z->ket = z->c; /* [, line 139 */
+    among_var = find_among_b(z, a_7, 2); /* substring, line 139 */
+    if (!(among_var)) return 0;
+    z->bra = z->c; /* ], line 139 */
+    switch(among_var) {
+        case 0: return 0;
+        case 1:
+            {   int m = z->l - z->c; /* or, line 140 */
+                if (!r_R2(z)) goto lab1; /* call R2, line 140 */
+                goto lab0;
+            lab1:
+                z->c = z->l - m;
+                if (!r_R1(z)) return 0; /* call R1, line 140 */
+                {   int m = z->l - z->c; /* not, line 140 */
+                    if (!r_shortv(z)) goto lab2; /* call shortv, line 140 */
+                    return 0;
+                lab2:
+                    z->c = z->l - m;
+                }
+            }
+        lab0:
+            slice_del(z); /* delete, line 140 */
+            break;
+        case 2:
+            if (!r_R2(z)) return 0; /* call R2, line 141 */
+            if (!(eq_s_b(z, 1, s_35))) return 0;
+            slice_del(z); /* delete, line 141 */
+            break;
+    }
+    return 1;
+}
+
+static int r_exception2(struct SN_env * z) {
+    z->ket = z->c; /* [, line 147 */
+    if (!(find_among_b(z, a_8, 8))) return 0; /* substring, line 147 */
+    z->bra = z->c; /* ], line 147 */
+    if (z->c > z->lb) return 0; /* atlimit, line 147 */
+    return 1;
+}
+
+static int r_exception1(struct SN_env * z) {
+    int among_var;
+    z->bra = z->c; /* [, line 159 */
+    among_var = find_among(z, a_9, 18); /* substring, line 159 */
+    if (!(among_var)) return 0;
+    z->ket = z->c; /* ], line 159 */
+    if (z->c < z->l) return 0; /* atlimit, line 159 */
+    switch(among_var) {
+        case 0: return 0;
+        case 1:
+            slice_from_s(z, 3, s_36); /* <-, line 163 */
+            break;
+        case 2:
+            slice_from_s(z, 3, s_37); /* <-, line 164 */
+            break;
+        case 3:
+            slice_from_s(z, 3, s_38); /* <-, line 165 */
+            break;
+        case 4:
+            slice_from_s(z, 3, s_39); /* <-, line 166 */
+            break;
+        case 5:
+            slice_from_s(z, 3, s_40); /* <-, line 167 */
+            break;
+        case 6:
+            slice_from_s(z, 3, s_41); /* <-, line 171 */
+            break;
+        case 7:
+            slice_from_s(z, 5, s_42); /* <-, line 172 */
+            break;
+        case 8:
+            slice_from_s(z, 4, s_43); /* <-, line 173 */
+            break;
+        case 9:
+            slice_from_s(z, 5, s_44); /* <-, line 174 */
+            break;
+        case 10:
+            slice_from_s(z, 4, s_45); /* <-, line 175 */
+            break;
+        case 11:
+            slice_from_s(z, 5, s_46); /* <-, line 176 */
+            break;
+    }
+    return 1;
+}
+
+static int r_postlude(struct SN_env * z) {
+    if (!(z->B[0])) return 0; /* Boolean test Y_found, line 192 */
+    while(1) { /* repeat, line 192 */
+        int c = z->c;
+        while(1) { /* goto, line 192 */
+            int c = z->c;
+            z->bra = z->c; /* [, line 192 */
+            if (!(eq_s(z, 1, s_47))) goto lab1;
+            z->ket = z->c; /* ], line 192 */
+            z->c = c;
+            break;
+        lab1:
+            z->c = c;
+            if (z->c >= z->l) goto lab0;
+            z->c++;
+        }
+        slice_from_s(z, 1, s_48); /* <-, line 192 */
+        continue;
+    lab0:
+        z->c = c;
+        break;
+    }
+    return 1;
+}
+
+extern int english_stem(struct SN_env * z) {
+    {   int c = z->c; /* or, line 196 */
+        if (!r_exception1(z)) goto lab1; /* call exception1, line 196 */
+        goto lab0;
+    lab1:
+        z->c = c;
+        {   int c_test = z->c; /* test, line 198 */
+            {   int c = z->c + 3;
+                if (0 > c || c > z->l) return 0;
+                z->c = c; /* hop, line 198 */
+            }
+            z->c = c_test;
+        }
+        {   int c = z->c; /* do, line 199 */
+            if (!r_prelude(z)) goto lab2; /* call prelude, line 199 */
+        lab2:
+            z->c = c;
+        }
+        {   int c = z->c; /* do, line 200 */
+            if (!r_mark_regions(z)) goto lab3; /* call mark_regions, line 200 */
+        lab3:
+            z->c = c;
+        }
+        z->lb = z->c; z->c = z->l; /* backwards, line 201 */
+
+        {   int m = z->l - z->c; /* do, line 203 */
+            if (!r_Step_1a(z)) goto lab4; /* call Step_1a, line 203 */
+        lab4:
+            z->c = z->l - m;
+        }
+        {   int m = z->l - z->c; /* or, line 205 */
+            if (!r_exception2(z)) goto lab6; /* call exception2, line 205 */
+            goto lab5;
+        lab6:
+            z->c = z->l - m;
+            {   int m = z->l - z->c; /* do, line 207 */
+                if (!r_Step_1b(z)) goto lab7; /* call Step_1b, line 207 */
+            lab7:
+                z->c = z->l - m;
+            }
+            {   int m = z->l - z->c; /* do, line 208 */
+                if (!r_Step_1c(z)) goto lab8; /* call Step_1c, line 208 */
+            lab8:
+                z->c = z->l - m;
+            }
+            {   int m = z->l - z->c; /* do, line 210 */
+                if (!r_Step_2(z)) goto lab9; /* call Step_2, line 210 */
+            lab9:
+                z->c = z->l - m;
+            }
+            {   int m = z->l - z->c; /* do, line 211 */
+                if (!r_Step_3(z)) goto lab10; /* call Step_3, line 211 */
+            lab10:
+                z->c = z->l - m;
+            }
+            {   int m = z->l - z->c; /* do, line 212 */
+                if (!r_Step_4(z)) goto lab11; /* call Step_4, line 212 */
+            lab11:
+                z->c = z->l - m;
+            }
+            {   int m = z->l - z->c; /* do, line 214 */
+                if (!r_Step_5(z)) goto lab12; /* call Step_5, line 214 */
+            lab12:
+                z->c = z->l - m;
+            }
+        }
+    lab5:
+        z->c = z->lb;
+        {   int c = z->c; /* do, line 217 */
+            if (!r_postlude(z)) goto lab13; /* call postlude, line 217 */
+        lab13:
+            z->c = c;
+        }
+    }
+lab0:
+    return 1;
+}
+
+extern struct SN_env * english_create_env(void) { return SN_create_env(0, 2, 1); }
+
+extern void english_close_env(struct SN_env * z) { SN_close_env(z); }
+


diff --git a/contrib/tsearch2/snowball/english_stem.h b/contrib/tsearch2/snowball/english_stem.h

new file mode 100644 (file)

index 0000000..bfefcd5


--- /dev/null
+++ b/contrib/tsearch2/snowball/english_stem.h
@@ -0,0 +1,8 @@
+
+/* This file was generated automatically by the Snowball to ANSI C compiler */
+
+extern struct SN_env * english_create_env(void);
+extern void english_close_env(struct SN_env * z);
+
+extern int english_stem(struct SN_env * z);
+


diff --git a/contrib/tsearch2/snowball/header.h b/contrib/tsearch2/snowball/header.h

new file mode 100644 (file)

index 0000000..aaec3ae


--- /dev/null
+++ b/contrib/tsearch2/snowball/header.h
@@ -0,0 +1,57 @@
+
+#include 
+
+#include "api.h"
+
+#define MAXINT INT_MAX
+#define MININT INT_MIN
+
+#define HEAD 2*sizeof(int)
+
+#define SIZE(p)        ((int *)(p))[-1]
+#define SET_SIZE(p, n) ((int *)(p))[-1] = n
+#define CAPACITY(p)    ((int *)(p))[-2]
+
+struct among
+{   int s_size;     /* number of chars in string */
+    symbol * s;       /* search string */
+    int substring_i;/* index to longest matching substring */
+    int result;     /* result of the lookup */
+    int (* function)(struct SN_env *);
+};
+
+extern symbol * create_s(void);
+extern void lose_s(symbol * p);
+
+extern int in_grouping(struct SN_env * z, unsigned char * s, int min, int max);
+extern int in_grouping_b(struct SN_env * z, unsigned char * s, int min, int max);
+extern int out_grouping(struct SN_env * z, unsigned char * s, int min, int max);
+extern int out_grouping_b(struct SN_env * z, unsigned char * s, int min, int max);
+
+extern int in_range(struct SN_env * z, int min, int max);
+extern int in_range_b(struct SN_env * z, int min, int max);
+extern int out_range(struct SN_env * z, int min, int max);
+extern int out_range_b(struct SN_env * z, int min, int max);
+
+extern int eq_s(struct SN_env * z, int s_size, symbol * s);
+extern int eq_s_b(struct SN_env * z, int s_size, symbol * s);
+extern int eq_v(struct SN_env * z, symbol * p);
+extern int eq_v_b(struct SN_env * z, symbol * p);
+
+extern int find_among(struct SN_env * z, struct among * v, int v_size);
+extern int find_among_b(struct SN_env * z, struct among * v, int v_size);
+
+extern symbol * increase_size(symbol * p, int n);
+extern int replace_s(struct SN_env * z, int c_bra, int c_ket, int s_size, const symbol * s);
+extern void slice_from_s(struct SN_env * z, int s_size, symbol * s);
+extern void slice_from_v(struct SN_env * z, symbol * p);
+extern void slice_del(struct SN_env * z);
+
+extern void insert_s(struct SN_env * z, int bra, int ket, int s_size, symbol * s);
+extern void insert_v(struct SN_env * z, int bra, int ket, symbol * p);
+
+extern symbol * slice_to(struct SN_env * z, symbol * p);
+extern symbol * assign_to(struct SN_env * z, symbol * p);
+
+extern void debug(struct SN_env * z, int number, int line_count);
+


diff --git a/contrib/tsearch2/snowball/russian_stem.c b/contrib/tsearch2/snowball/russian_stem.c

new file mode 100644 (file)

index 0000000..14fd491


--- /dev/null
+++ b/contrib/tsearch2/snowball/russian_stem.c
@@ -0,0 +1,626 @@
+
+/* This file was generated automatically by the Snowball to ANSI C compiler */
+
+#include "header.h"
+
+extern int russian_stem(struct SN_env * z);
+static int r_tidy_up(struct SN_env * z);
+static int r_derivational(struct SN_env * z);
+static int r_noun(struct SN_env * z);
+static int r_verb(struct SN_env * z);
+static int r_reflexive(struct SN_env * z);
+static int r_adjectival(struct SN_env * z);
+static int r_adjective(struct SN_env * z);
+static int r_perfective_gerund(struct SN_env * z);
+static int r_R2(struct SN_env * z);
+static int r_mark_regions(struct SN_env * z);
+
+extern struct SN_env * russian_create_env(void);
+extern void russian_close_env(struct SN_env * z);
+
+static symbol s_0_0[3] = { 215, 219, 201 };
+static symbol s_0_1[4] = { 201, 215, 219, 201 };
+static symbol s_0_2[4] = { 217, 215, 219, 201 };
+static symbol s_0_3[1] = { 215 };
+static symbol s_0_4[2] = { 201, 215 };
+static symbol s_0_5[2] = { 217, 215 };
+static symbol s_0_6[5] = { 215, 219, 201, 211, 216 };
+static symbol s_0_7[6] = { 201, 215, 219, 201, 211, 216 };
+static symbol s_0_8[6] = { 217, 215, 219, 201, 211, 216 };
+
+static struct among a_0[9] =
+{
+/*  0 */ { 3, s_0_0, -1, 1, 0},
+/*  1 */ { 4, s_0_1, 0, 2, 0},
+/*  2 */ { 4, s_0_2, 0, 2, 0},
+/*  3 */ { 1, s_0_3, -1, 1, 0},
+/*  4 */ { 2, s_0_4, 3, 2, 0},
+/*  5 */ { 2, s_0_5, 3, 2, 0},
+/*  6 */ { 5, s_0_6, -1, 1, 0},
+/*  7 */ { 6, s_0_7, 6, 2, 0},
+/*  8 */ { 6, s_0_8, 6, 2, 0}
+};
+
+static symbol s_1_0[2] = { 192, 192 };
+static symbol s_1_1[2] = { 197, 192 };
+static symbol s_1_2[2] = { 207, 192 };
+static symbol s_1_3[2] = { 213, 192 };
+static symbol s_1_4[2] = { 197, 197 };
+static symbol s_1_5[2] = { 201, 197 };
+static symbol s_1_6[2] = { 207, 197 };
+static symbol s_1_7[2] = { 217, 197 };
+static symbol s_1_8[2] = { 201, 200 };
+static symbol s_1_9[2] = { 217, 200 };
+static symbol s_1_10[3] = { 201, 205, 201 };
+static symbol s_1_11[3] = { 217, 205, 201 };
+static symbol s_1_12[2] = { 197, 202 };
+static symbol s_1_13[2] = { 201, 202 };
+static symbol s_1_14[2] = { 207, 202 };
+static symbol s_1_15[2] = { 217, 202 };
+static symbol s_1_16[2] = { 197, 205 };
+static symbol s_1_17[2] = { 201, 205 };
+static symbol s_1_18[2] = { 207, 205 };
+static symbol s_1_19[2] = { 217, 205 };
+static symbol s_1_20[3] = { 197, 199, 207 };
+static symbol s_1_21[3] = { 207, 199, 207 };
+static symbol s_1_22[2] = { 193, 209 };
+static symbol s_1_23[2] = { 209, 209 };
+static symbol s_1_24[3] = { 197, 205, 213 };
+static symbol s_1_25[3] = { 207, 205, 213 };
+
+static struct among a_1[26] =
+{
+/*  0 */ { 2, s_1_0, -1, 1, 0},
+/*  1 */ { 2, s_1_1, -1, 1, 0},
+/*  2 */ { 2, s_1_2, -1, 1, 0},
+/*  3 */ { 2, s_1_3, -1, 1, 0},
+/*  4 */ { 2, s_1_4, -1, 1, 0},
+/*  5 */ { 2, s_1_5, -1, 1, 0},
+/*  6 */ { 2, s_1_6, -1, 1, 0},
+/*  7 */ { 2, s_1_7, -1, 1, 0},
+/*  8 */ { 2, s_1_8, -1, 1, 0},
+/*  9 */ { 2, s_1_9, -1, 1, 0},
+/* 10 */ { 3, s_1_10, -1, 1, 0},
+/* 11 */ { 3, s_1_11, -1, 1, 0},
+/* 12 */ { 2, s_1_12, -1, 1, 0},
+/* 13 */ { 2, s_1_13, -1, 1, 0},
+/* 14 */ { 2, s_1_14, -1, 1, 0},
+/* 15 */ { 2, s_1_15, -1, 1, 0},
+/* 16 */ { 2, s_1_16, -1, 1, 0},
+/* 17 */ { 2, s_1_17, -1, 1, 0},
+/* 18 */ { 2, s_1_18, -1, 1, 0},
+/* 19 */ { 2, s_1_19, -1, 1, 0},
+/* 20 */ { 3, s_1_20, -1, 1, 0},
+/* 21 */ { 3, s_1_21, -1, 1, 0},
+/* 22 */ { 2, s_1_22, -1, 1, 0},
+/* 23 */ { 2, s_1_23, -1, 1, 0},
+/* 24 */ { 3, s_1_24, -1, 1, 0},
+/* 25 */ { 3, s_1_25, -1, 1, 0}
+};
+
+static symbol s_2_0[2] = { 197, 205 };
+static symbol s_2_1[2] = { 206, 206 };
+static symbol s_2_2[2] = { 215, 219 };
+static symbol s_2_3[3] = { 201, 215, 219 };
+static symbol s_2_4[3] = { 217, 215, 219 };
+static symbol s_2_5[1] = { 221 };
+static symbol s_2_6[2] = { 192, 221 };
+static symbol s_2_7[3] = { 213, 192, 221 };
+
+static struct among a_2[8] =
+{
+/*  0 */ { 2, s_2_0, -1, 1, 0},
+/*  1 */ { 2, s_2_1, -1, 1, 0},
+/*  2 */ { 2, s_2_2, -1, 1, 0},
+/*  3 */ { 3, s_2_3, 2, 2, 0},
+/*  4 */ { 3, s_2_4, 2, 2, 0},
+/*  5 */ { 1, s_2_5, -1, 1, 0},
+/*  6 */ { 2, s_2_6, 5, 1, 0},
+/*  7 */ { 3, s_2_7, 6, 2, 0}
+};
+
+static symbol s_3_0[2] = { 211, 209 };
+static symbol s_3_1[2] = { 211, 216 };
+
+static struct among a_3[2] =
+{
+/*  0 */ { 2, s_3_0, -1, 1, 0},
+/*  1 */ { 2, s_3_1, -1, 1, 0}
+};
+
+static symbol s_4_0[1] = { 192 };
+static symbol s_4_1[2] = { 213, 192 };
+static symbol s_4_2[2] = { 204, 193 };
+static symbol s_4_3[3] = { 201, 204, 193 };
+static symbol s_4_4[3] = { 217, 204, 193 };
+static symbol s_4_5[2] = { 206, 193 };
+static symbol s_4_6[3] = { 197, 206, 193 };
+static symbol s_4_7[3] = { 197, 212, 197 };
+static symbol s_4_8[3] = { 201, 212, 197 };
+static symbol s_4_9[3] = { 202, 212, 197 };
+static symbol s_4_10[4] = { 197, 202, 212, 197 };
+static symbol s_4_11[4] = { 213, 202, 212, 197 };
+static symbol s_4_12[2] = { 204, 201 };
+static symbol s_4_13[3] = { 201, 204, 201 };
+static symbol s_4_14[3] = { 217, 204, 201 };
+static symbol s_4_15[1] = { 202 };
+static symbol s_4_16[2] = { 197, 202 };
+static symbol s_4_17[2] = { 213, 202 };
+static symbol s_4_18[1] = { 204 };
+static symbol s_4_19[2] = { 201, 204 };
+static symbol s_4_20[2] = { 217, 204 };
+static symbol s_4_21[2] = { 197, 205 };
+static symbol s_4_22[2] = { 201, 205 };
+static symbol s_4_23[2] = { 217, 205 };
+static symbol s_4_24[1] = { 206 };
+static symbol s_4_25[2] = { 197, 206 };
+static symbol s_4_26[2] = { 204, 207 };
+static symbol s_4_27[3] = { 201, 204, 207 };
+static symbol s_4_28[3] = { 217, 204, 207 };
+static symbol s_4_29[2] = { 206, 207 };
+static symbol s_4_30[3] = { 197, 206, 207 };
+static symbol s_4_31[3] = { 206, 206, 207 };
+static symbol s_4_32[2] = { 192, 212 };
+static symbol s_4_33[3] = { 213, 192, 212 };
+static symbol s_4_34[2] = { 197, 212 };
+static symbol s_4_35[3] = { 213, 197, 212 };
+static symbol s_4_36[2] = { 201, 212 };
+static symbol s_4_37[2] = { 209, 212 };
+static symbol s_4_38[2] = { 217, 212 };
+static symbol s_4_39[2] = { 212, 216 };
+static symbol s_4_40[3] = { 201, 212, 216 };
+static symbol s_4_41[3] = { 217, 212, 216 };
+static symbol s_4_42[3] = { 197, 219, 216 };
+static symbol s_4_43[3] = { 201, 219, 216 };
+static symbol s_4_44[2] = { 206, 217 };
+static symbol s_4_45[3] = { 197, 206, 217 };
+
+static struct among a_4[46] =
+{
+/*  0 */ { 1, s_4_0, -1, 2, 0},
+/*  1 */ { 2, s_4_1, 0, 2, 0},
+/*  2 */ { 2, s_4_2, -1, 1, 0},
+/*  3 */ { 3, s_4_3, 2, 2, 0},
+/*  4 */ { 3, s_4_4, 2, 2, 0},
+/*  5 */ { 2, s_4_5, -1, 1, 0},
+/*  6 */ { 3, s_4_6, 5, 2, 0},
+/*  7 */ { 3, s_4_7, -1, 1, 0},
+/*  8 */ { 3, s_4_8, -1, 2, 0},
+/*  9 */ { 3, s_4_9, -1, 1, 0},
+/* 10 */ { 4, s_4_10, 9, 2, 0},
+/* 11 */ { 4, s_4_11, 9, 2, 0},
+/* 12 */ { 2, s_4_12, -1, 1, 0},
+/* 13 */ { 3, s_4_13, 12, 2, 0},
+/* 14 */ { 3, s_4_14, 12, 2, 0},
+/* 15 */ { 1, s_4_15, -1, 1, 0},
+/* 16 */ { 2, s_4_16, 15, 2, 0},
+/* 17 */ { 2, s_4_17, 15, 2, 0},
+/* 18 */ { 1, s_4_18, -1, 1, 0},
+/* 19 */ { 2, s_4_19, 18, 2, 0},
+/* 20 */ { 2, s_4_20, 18, 2, 0},
+/* 21 */ { 2, s_4_21, -1, 1, 0},
+/* 22 */ { 2, s_4_22, -1, 2, 0},
+/* 23 */ { 2, s_4_23, -1, 2, 0},
+/* 24 */ { 1, s_4_24, -1, 1, 0},
+/* 25 */ { 2, s_4_25, 24, 2, 0},
+/* 26 */ { 2, s_4_26, -1, 1, 0},
+/* 27 */ { 3, s_4_27, 26, 2, 0},
+/* 28 */ { 3, s_4_28, 26, 2, 0},
+/* 29 */ { 2, s_4_29, -1, 1, 0},
+/* 30 */ { 3, s_4_30, 29, 2, 0},
+/* 31 */ { 3, s_4_31, 29, 1, 0},
+/* 32 */ { 2, s_4_32, -1, 1, 0},
+/* 33 */ { 3, s_4_33, 32, 2, 0},
+/* 34 */ { 2, s_4_34, -1, 1, 0},
+/* 35 */ { 3, s_4_35, 34, 2, 0},
+/* 36 */ { 2, s_4_36, -1, 2, 0},
+/* 37 */ { 2, s_4_37, -1, 2, 0},
+/* 38 */ { 2, s_4_38, -1, 2, 0},
+/* 39 */ { 2, s_4_39, -1, 1, 0},
+/* 40 */ { 3, s_4_40, 39, 2, 0},
+/* 41 */ { 3, s_4_41, 39, 2, 0},
+/* 42 */ { 3, s_4_42, -1, 1, 0},
+/* 43 */ { 3, s_4_43, -1, 2, 0},
+/* 44 */ { 2, s_4_44, -1, 1, 0},
+/* 45 */ { 3, s_4_45, 44, 2, 0}
+};
+
+static symbol s_5_0[1] = { 192 };
+static symbol s_5_1[2] = { 201, 192 };
+static symbol s_5_2[2] = { 216, 192 };
+static symbol s_5_3[1] = { 193 };
+static symbol s_5_4[1] = { 197 };
+static symbol s_5_5[2] = { 201, 197 };
+static symbol s_5_6[2] = { 216, 197 };
+static symbol s_5_7[2] = { 193, 200 };
+static symbol s_5_8[2] = { 209, 200 };
+static symbol s_5_9[3] = { 201, 209, 200 };
+static symbol s_5_10[1] = { 201 };
+static symbol s_5_11[2] = { 197, 201 };
+static symbol s_5_12[2] = { 201, 201 };
+static symbol s_5_13[3] = { 193, 205, 201 };
+static symbol s_5_14[3] = { 209, 205, 201 };
+static symbol s_5_15[4] = { 201, 209, 205, 201 };
+static symbol s_5_16[1] = { 202 };
+static symbol s_5_17[2] = { 197, 202 };
+static symbol s_5_18[3] = { 201, 197, 202 };
+static symbol s_5_19[2] = { 201, 202 };
+static symbol s_5_20[2] = { 207, 202 };
+static symbol s_5_21[2] = { 193, 205 };
+static symbol s_5_22[2] = { 197, 205 };
+static symbol s_5_23[3] = { 201, 197, 205 };
+static symbol s_5_24[2] = { 207, 205 };
+static symbol s_5_25[2] = { 209, 205 };
+static symbol s_5_26[3] = { 201, 209, 205 };
+static symbol s_5_27[1] = { 207 };
+static symbol s_5_28[1] = { 209 };
+static symbol s_5_29[2] = { 201, 209 };
+static symbol s_5_30[2] = { 216, 209 };
+static symbol s_5_31[1] = { 213 };
+static symbol s_5_32[2] = { 197, 215 };
+static symbol s_5_33[2] = { 207, 215 };
+static symbol s_5_34[1] = { 216 };
+static symbol s_5_35[1] = { 217 };
+
+static struct among a_5[36] =
+{
+/*  0 */ { 1, s_5_0, -1, 1, 0},
+/*  1 */ { 2, s_5_1, 0, 1, 0},
+/*  2 */ { 2, s_5_2, 0, 1, 0},
+/*  3 */ { 1, s_5_3, -1, 1, 0},
+/*  4 */ { 1, s_5_4, -1, 1, 0},
+/*  5 */ { 2, s_5_5, 4, 1, 0},
+/*  6 */ { 2, s_5_6, 4, 1, 0},
+/*  7 */ { 2, s_5_7, -1, 1, 0},
+/*  8 */ { 2, s_5_8, -1, 1, 0},
+/*  9 */ { 3, s_5_9, 8, 1, 0},
+/* 10 */ { 1, s_5_10, -1, 1, 0},
+/* 11 */ { 2, s_5_11, 10, 1, 0},
+/* 12 */ { 2, s_5_12, 10, 1, 0},
+/* 13 */ { 3, s_5_13, 10, 1, 0},
+/* 14 */ { 3, s_5_14, 10, 1, 0},
+/* 15 */ { 4, s_5_15, 14, 1, 0},
+/* 16 */ { 1, s_5_16, -1, 1, 0},
+/* 17 */ { 2, s_5_17, 16, 1, 0},
+/* 18 */ { 3, s_5_18, 17, 1, 0},
+/* 19 */ { 2, s_5_19, 16, 1, 0},
+/* 20 */ { 2, s_5_20, 16, 1, 0},
+/* 21 */ { 2, s_5_21, -1, 1, 0},
+/* 22 */ { 2, s_5_22, -1, 1, 0},
+/* 23 */ { 3, s_5_23, 22, 1, 0},
+/* 24 */ { 2, s_5_24, -1, 1, 0},
+/* 25 */ { 2, s_5_25, -1, 1, 0},
+/* 26 */ { 3, s_5_26, 25, 1, 0},
+/* 27 */ { 1, s_5_27, -1, 1, 0},
+/* 28 */ { 1, s_5_28, -1, 1, 0},
+/* 29 */ { 2, s_5_29, 28, 1, 0},
+/* 30 */ { 2, s_5_30, 28, 1, 0},
+/* 31 */ { 1, s_5_31, -1, 1, 0},
+/* 32 */ { 2, s_5_32, -1, 1, 0},
+/* 33 */ { 2, s_5_33, -1, 1, 0},
+/* 34 */ { 1, s_5_34, -1, 1, 0},
+/* 35 */ { 1, s_5_35, -1, 1, 0}
+};
+
+static symbol s_6_0[3] = { 207, 211, 212 };
+static symbol s_6_1[4] = { 207, 211, 212, 216 };
+
+static struct among a_6[2] =
+{
+/*  0 */ { 3, s_6_0, -1, 1, 0},
+/*  1 */ { 4, s_6_1, -1, 1, 0}
+};
+
+static symbol s_7_0[4] = { 197, 202, 219, 197 };
+static symbol s_7_1[1] = { 206 };
+static symbol s_7_2[1] = { 216 };
+static symbol s_7_3[3] = { 197, 202, 219 };
+
+static struct among a_7[4] =
+{
+/*  0 */ { 4, s_7_0, -1, 1, 0},
+/*  1 */ { 1, s_7_1, -1, 2, 0},
+/*  2 */ { 1, s_7_2, -1, 3, 0},
+/*  3 */ { 3, s_7_3, -1, 1, 0}
+};
+
+static unsigned char g_v[] = { 35, 130, 34, 18 };
+
+static symbol s_0[] = { 193 };
+static symbol s_1[] = { 209 };
+static symbol s_2[] = { 193 };
+static symbol s_3[] = { 209 };
+static symbol s_4[] = { 193 };
+static symbol s_5[] = { 209 };
+static symbol s_6[] = { 206 };
+static symbol s_7[] = { 206 };
+static symbol s_8[] = { 206 };
+static symbol s_9[] = { 201 };
+
+static int r_mark_regions(struct SN_env * z) {
+    z->I[0] = z->l;
+    z->I[1] = z->l;
+    {   int c = z->c; /* do, line 100 */
+        while(1) { /* gopast, line 101 */
+            if (!(in_grouping(z, g_v, 192, 220))) goto lab1;
+            break;
+        lab1:
+            if (z->c >= z->l) goto lab0;
+            z->c++;
+        }
+        z->I[0] = z->c; /* setmark pV, line 101 */
+        while(1) { /* gopast, line 101 */
+            if (!(out_grouping(z, g_v, 192, 220))) goto lab2;
+            break;
+        lab2:
+            if (z->c >= z->l) goto lab0;
+            z->c++;
+        }
+        while(1) { /* gopast, line 102 */
+            if (!(in_grouping(z, g_v, 192, 220))) goto lab3;
+            break;
+        lab3:
+            if (z->c >= z->l) goto lab0;
+            z->c++;
+        }
+        while(1) { /* gopast, line 102 */
+            if (!(out_grouping(z, g_v, 192, 220))) goto lab4;
+            break;
+        lab4:
+            if (z->c >= z->l) goto lab0;
+            z->c++;
+        }
+        z->I[1] = z->c; /* setmark p2, line 102 */
+    lab0:
+        z->c = c;
+    }
+    return 1;
+}
+
+static int r_R2(struct SN_env * z) {
+    if (!(z->I[1] <= z->c)) return 0;
+    return 1;
+}
+
+static int r_perfective_gerund(struct SN_env * z) {
+    int among_var;
+    z->ket = z->c; /* [, line 111 */
+    among_var = find_among_b(z, a_0, 9); /* substring, line 111 */
+    if (!(among_var)) return 0;
+    z->bra = z->c; /* ], line 111 */
+    switch(among_var) {
+        case 0: return 0;
+        case 1:
+            {   int m = z->l - z->c; /* or, line 115 */
+                if (!(eq_s_b(z, 1, s_0))) goto lab1;
+                goto lab0;
+            lab1:
+                z->c = z->l - m;
+                if (!(eq_s_b(z, 1, s_1))) return 0;
+            }
+        lab0:
+            slice_del(z); /* delete, line 115 */
+            break;
+        case 2:
+            slice_del(z); /* delete, line 122 */
+            break;
+    }
+    return 1;
+}
+
+static int r_adjective(struct SN_env * z) {
+    int among_var;
+    z->ket = z->c; /* [, line 127 */
+    among_var = find_among_b(z, a_1, 26); /* substring, line 127 */
+    if (!(among_var)) return 0;
+    z->bra = z->c; /* ], line 127 */
+    switch(among_var) {
+        case 0: return 0;
+        case 1:
+            slice_del(z); /* delete, line 136 */
+            break;
+    }
+    return 1;
+}
+
+static int r_adjectival(struct SN_env * z) {
+    int among_var;
+    if (!r_adjective(z)) return 0; /* call adjective, line 141 */
+    {   int m = z->l - z->c; /* try, line 148 */
+        z->ket = z->c; /* [, line 149 */
+        among_var = find_among_b(z, a_2, 8); /* substring, line 149 */
+        if (!(among_var)) { z->c = z->l - m; goto lab0; }
+        z->bra = z->c; /* ], line 149 */
+        switch(among_var) {
+            case 0: { z->c = z->l - m; goto lab0; }
+            case 1:
+                {   int m = z->l - z->c; /* or, line 154 */
+                    if (!(eq_s_b(z, 1, s_2))) goto lab2;
+                    goto lab1;
+                lab2:
+                    z->c = z->l - m;
+                    if (!(eq_s_b(z, 1, s_3))) { z->c = z->l - m; goto lab0; }
+                }
+            lab1:
+                slice_del(z); /* delete, line 154 */
+                break;
+            case 2:
+                slice_del(z); /* delete, line 161 */
+                break;
+        }
+    lab0:
+        ;
+    }
+    return 1;
+}
+
+static int r_reflexive(struct SN_env * z) {
+    int among_var;
+    z->ket = z->c; /* [, line 168 */
+    among_var = find_among_b(z, a_3, 2); /* substring, line 168 */
+    if (!(among_var)) return 0;
+    z->bra = z->c; /* ], line 168 */
+    switch(among_var) {
+        case 0: return 0;
+        case 1:
+            slice_del(z); /* delete, line 171 */
+            break;
+    }
+    return 1;
+}
+
+static int r_verb(struct SN_env * z) {
+    int among_var;
+    z->ket = z->c; /* [, line 176 */
+    among_var = find_among_b(z, a_4, 46); /* substring, line 176 */
+    if (!(among_var)) return 0;
+    z->bra = z->c; /* ], line 176 */
+    switch(among_var) {
+        case 0: return 0;
+        case 1:
+            {   int m = z->l - z->c; /* or, line 182 */
+                if (!(eq_s_b(z, 1, s_4))) goto lab1;
+                goto lab0;
+            lab1:
+                z->c = z->l - m;
+                if (!(eq_s_b(z, 1, s_5))) return 0;
+            }
+        lab0:
+            slice_del(z); /* delete, line 182 */
+            break;
+        case 2:
+            slice_del(z); /* delete, line 190 */
+            break;
+    }
+    return 1;
+}
+
+static int r_noun(struct SN_env * z) {
+    int among_var;
+    z->ket = z->c; /* [, line 199 */
+    among_var = find_among_b(z, a_5, 36); /* substring, line 199 */
+    if (!(among_var)) return 0;
+    z->bra = z->c; /* ], line 199 */
+    switch(among_var) {
+        case 0: return 0;
+        case 1:
+            slice_del(z); /* delete, line 206 */
+            break;
+    }
+    return 1;
+}
+
+static int r_derivational(struct SN_env * z) {
+    int among_var;
+    z->ket = z->c; /* [, line 215 */
+    among_var = find_among_b(z, a_6, 2); /* substring, line 215 */
+    if (!(among_var)) return 0;
+    z->bra = z->c; /* ], line 215 */
+    if (!r_R2(z)) return 0; /* call R2, line 215 */
+    switch(among_var) {
+        case 0: return 0;
+        case 1:
+            slice_del(z); /* delete, line 218 */
+            break;
+    }
+    return 1;
+}
+
+static int r_tidy_up(struct SN_env * z) {
+    int among_var;
+    z->ket = z->c; /* [, line 223 */
+    among_var = find_among_b(z, a_7, 4); /* substring, line 223 */
+    if (!(among_var)) return 0;
+    z->bra = z->c; /* ], line 223 */
+    switch(among_var) {
+        case 0: return 0;
+        case 1:
+            slice_del(z); /* delete, line 227 */
+            z->ket = z->c; /* [, line 228 */
+            if (!(eq_s_b(z, 1, s_6))) return 0;
+            z->bra = z->c; /* ], line 228 */
+            if (!(eq_s_b(z, 1, s_7))) return 0;
+            slice_del(z); /* delete, line 228 */
+            break;
+        case 2:
+            if (!(eq_s_b(z, 1, s_8))) return 0;
+            slice_del(z); /* delete, line 231 */
+            break;
+        case 3:
+            slice_del(z); /* delete, line 233 */
+            break;
+    }
+    return 1;
+}
+
+extern int russian_stem(struct SN_env * z) {
+    {   int c = z->c; /* do, line 240 */
+        if (!r_mark_regions(z)) goto lab0; /* call mark_regions, line 240 */
+    lab0:
+        z->c = c;
+    }
+    z->lb = z->c; z->c = z->l; /* backwards, line 241 */
+
+    {   int m = z->l - z->c; /* setlimit, line 241 */
+        int m3;
+        if (z->c < z->I[0]) return 0;
+        z->c = z->I[0]; /* tomark, line 241 */
+        m3 = z->lb; z->lb = z->c;
+        z->c = z->l - m;
+        {   int m = z->l - z->c; /* do, line 242 */
+            {   int m = z->l - z->c; /* or, line 243 */
+                if (!r_perfective_gerund(z)) goto lab3; /* call perfective_gerund, line 243 */
+                goto lab2;
+            lab3:
+                z->c = z->l - m;
+                {   int m = z->l - z->c; /* try, line 244 */
+                    if (!r_reflexive(z)) { z->c = z->l - m; goto lab4; } /* call reflexive, line 244 */
+                lab4:
+                    ;
+                }
+                {   int m = z->l - z->c; /* or, line 245 */
+                    if (!r_adjectival(z)) goto lab6; /* call adjectival, line 245 */
+                    goto lab5;
+                lab6:
+                    z->c = z->l - m;
+                    if (!r_verb(z)) goto lab7; /* call verb, line 245 */
+                    goto lab5;
+                lab7:
+                    z->c = z->l - m;
+                    if (!r_noun(z)) goto lab1; /* call noun, line 245 */
+                }
+            lab5:
+                ;
+            }
+        lab2:
+        lab1:
+            z->c = z->l - m;
+        }
+        {   int m = z->l - z->c; /* try, line 248 */
+            z->ket = z->c; /* [, line 248 */
+            if (!(eq_s_b(z, 1, s_9))) { z->c = z->l - m; goto lab8; }
+            z->bra = z->c; /* ], line 248 */
+            slice_del(z); /* delete, line 248 */
+        lab8:
+            ;
+        }
+        {   int m = z->l - z->c; /* do, line 251 */
+            if (!r_derivational(z)) goto lab9; /* call derivational, line 251 */
+        lab9:
+            z->c = z->l - m;
+        }
+        {   int m = z->l - z->c; /* do, line 252 */
+            if (!r_tidy_up(z)) goto lab10; /* call tidy_up, line 252 */
+        lab10:
+            z->c = z->l - m;
+        }
+        z->lb = m3;
+    }
+    z->c = z->lb;
+    return 1;
+}
+
+extern struct SN_env * russian_create_env(void) { return SN_create_env(0, 2, 0); }
+
+extern void russian_close_env(struct SN_env * z) { SN_close_env(z); }
+


diff --git a/contrib/tsearch2/snowball/russian_stem.h b/contrib/tsearch2/snowball/russian_stem.h

new file mode 100644 (file)

index 0000000..7dc26d4


--- /dev/null
+++ b/contrib/tsearch2/snowball/russian_stem.h
@@ -0,0 +1,8 @@
+
+/* This file was generated automatically by the Snowball to ANSI C compiler */
+
+extern struct SN_env * russian_create_env(void);
+extern void russian_close_env(struct SN_env * z);
+
+extern int russian_stem(struct SN_env * z);
+


diff --git a/contrib/tsearch2/snowball/utilities.c b/contrib/tsearch2/snowball/utilities.c

new file mode 100644 (file)

index 0000000..5dc7524


--- /dev/null
+++ b/contrib/tsearch2/snowball/utilities.c
@@ -0,0 +1,328 @@
+
+#include 
+#include 
+#include 
+
+#include "header.h"
+
+#define unless(C) if(!(C))
+
+#define CREATE_SIZE 1
+
+extern symbol * create_s(void)
+{   symbol * p = (symbol *) (HEAD + (char *) malloc(HEAD + (CREATE_SIZE + 1) * sizeof(symbol)));
+    CAPACITY(p) = CREATE_SIZE;
+    SET_SIZE(p, CREATE_SIZE);
+    return p;
+}
+
+extern void lose_s(symbol * p) { free((char *) p - HEAD); }
+
+extern int in_grouping(struct SN_env * z, unsigned char * s, int min, int max)
+{   if (z->c >= z->l) return 0;
+    {   int ch = z->p[z->c];
+        if
+        (ch > max || (ch -= min) < 0 ||
+         (s[ch >> 3] & (0X1 << (ch & 0X7))) == 0) return 0;
+    }
+    z->c++; return 1;
+}
+
+extern int in_grouping_b(struct SN_env * z, unsigned char * s, int min, int max)
+{   if (z->c <= z->lb) return 0;
+    {   int ch = z->p[z->c - 1];
+        if
+        (ch > max || (ch -= min) < 0 ||
+         (s[ch >> 3] & (0X1 << (ch & 0X7))) == 0) return 0;
+    }
+    z->c--; return 1;
+}
+
+extern int out_grouping(struct SN_env * z, unsigned char * s, int min, int max)
+{   if (z->c >= z->l) return 0;
+    {   int ch = z->p[z->c];
+        unless
+        (ch > max || (ch -= min) < 0 ||
+         (s[ch >> 3] & (0X1 << (ch & 0X7))) == 0) return 0;
+    }
+    z->c++; return 1;
+}
+
+extern int out_grouping_b(struct SN_env * z, unsigned char * s, int min, int max)
+{   if (z->c <= z->lb) return 0;
+    {   int ch = z->p[z->c - 1];
+        unless
+        (ch > max || (ch -= min) < 0 ||
+         (s[ch >> 3] & (0X1 << (ch & 0X7))) == 0) return 0;
+    }
+    z->c--; return 1;
+}
+
+
+extern int in_range(struct SN_env * z, int min, int max)
+{   if (z->c >= z->l) return 0;
+    {   int ch = z->p[z->c];
+        if
+        (ch > max || ch < min) return 0;
+    }
+    z->c++; return 1;
+}
+
+extern int in_range_b(struct SN_env * z, int min, int max)
+{   if (z->c <= z->lb) return 0;
+    {   int ch = z->p[z->c - 1];
+        if
+        (ch > max || ch < min) return 0;
+    }
+    z->c--; return 1;
+}
+
+extern int out_range(struct SN_env * z, int min, int max)
+{   if (z->c >= z->l) return 0;
+    {   int ch = z->p[z->c];
+        unless
+        (ch > max || ch < min) return 0;
+    }
+    z->c++; return 1;
+}
+
+extern int out_range_b(struct SN_env * z, int min, int max)
+{   if (z->c <= z->lb) return 0;
+    {   int ch = z->p[z->c - 1];
+        unless
+        (ch > max || ch < min) return 0;
+    }
+    z->c--; return 1;
+}
+
+extern int eq_s(struct SN_env * z, int s_size, symbol * s)
+{   if (z->l - z->c < s_size ||
+        memcmp(z->p + z->c, s, s_size * sizeof(symbol)) != 0) return 0;
+    z->c += s_size; return 1;
+}
+
+extern int eq_s_b(struct SN_env * z, int s_size, symbol * s)
+{   if (z->c - z->lb < s_size ||
+        memcmp(z->p + z->c - s_size, s, s_size * sizeof(symbol)) != 0) return 0;
+    z->c -= s_size; return 1;
+}
+
+extern int eq_v(struct SN_env * z, symbol * p)
+{   return eq_s(z, SIZE(p), p);
+}
+
+extern int eq_v_b(struct SN_env * z, symbol * p)
+{   return eq_s_b(z, SIZE(p), p);
+}
+
+extern int find_among(struct SN_env * z, struct among * v, int v_size)
+{
+    int i = 0;
+    int j = v_size;
+
+    int c = z->c; int l = z->l;
+    symbol * q = z->p + c;
+
+    struct among * w;
+
+    int common_i = 0;
+    int common_j = 0;
+
+    int first_key_inspected = 0;
+
+    while(1)
+    {   int k = i + ((j - i) >> 1);
+        int diff = 0;
+        int common = common_i < common_j ? common_i : common_j; /* smaller */
+        w = v + k;
+        {   int i; for (i = common; i < w->s_size; i++)
+            {   if (c + common == l) { diff = -1; break; }
+                diff = q[common] - w->s[i];
+                if (diff != 0) break;
+                common++;
+            }
+        }
+        if (diff < 0) { j = k; common_j = common; }
+                 else { i = k; common_i = common; }
+        if (j - i <= 1)
+        {   if (i > 0) break; /* v->s has been inspected */
+            if (j == i) break; /* only one item in v */
+
+            /* - but now we need to go round once more to get
+               v->s inspected. This looks messy, but is actually
+               the optimal approach.  */
+
+            if (first_key_inspected) break;
+            first_key_inspected = 1;
+        }
+    }
+    while(1)
+    {   w = v + i;
+        if (common_i >= w->s_size)
+        {   z->c = c + w->s_size;
+            if (w->function == 0) return w->result;
+            {   int res = w->function(z);
+                z->c = c + w->s_size;
+                if (res) return w->result;
+            }
+        }
+        i = w->substring_i;
+        if (i < 0) return 0;
+    }
+}
+
+/* find_among_b is for backwards processing. Same comments apply */
+
+extern int find_among_b(struct SN_env * z, struct among * v, int v_size)
+{
+    int i = 0;
+    int j = v_size;
+
+    int c = z->c; int lb = z->lb;
+    symbol * q = z->p + c - 1;
+
+    struct among * w;
+
+    int common_i = 0;
+    int common_j = 0;
+
+    int first_key_inspected = 0;
+
+    while(1)
+    {   int k = i + ((j - i) >> 1);
+        int diff = 0;
+        int common = common_i < common_j ? common_i : common_j;
+        w = v + k;
+        {   int i; for (i = w->s_size - 1 - common; i >= 0; i--)
+            {   if (c - common == lb) { diff = -1; break; }
+                diff = q[- common] - w->s[i];
+                if (diff != 0) break;
+                common++;
+            }
+        }
+        if (diff < 0) { j = k; common_j = common; }
+                 else { i = k; common_i = common; }
+        if (j - i <= 1)
+        {   if (i > 0) break;
+            if (j == i) break;
+            if (first_key_inspected) break;
+            first_key_inspected = 1;
+        }
+    }
+    while(1)
+    {   w = v + i;
+        if (common_i >= w->s_size)
+        {   z->c = c - w->s_size;
+            if (w->function == 0) return w->result;
+            {   int res = w->function(z);
+                z->c = c - w->s_size;
+                if (res) return w->result;
+            }
+        }
+        i = w->substring_i;
+        if (i < 0) return 0;
+    }
+}
+
+
+extern symbol * increase_size(symbol * p, int n)
+{   int new_size = n + 20;
+    symbol * q = (symbol *) (HEAD + (char *) malloc(HEAD + (new_size + 1) * sizeof(symbol)));
+    CAPACITY(q) = new_size;
+    memmove(q, p, CAPACITY(p) * sizeof(symbol)); lose_s(p); return q;
+}
+
+/* to replace symbols between c_bra and c_ket in z->p by the
+   s_size symbols at s
+*/
+
+extern int replace_s(struct SN_env * z, int c_bra, int c_ket, int s_size, const symbol * s)
+{   int adjustment = s_size - (c_ket - c_bra);
+    int len = SIZE(z->p);
+    if (adjustment != 0)
+    {   if (adjustment + len > CAPACITY(z->p)) z->p = increase_size(z->p, adjustment + len);
+        memmove(z->p + c_ket + adjustment, z->p + c_ket, (len - c_ket) * sizeof(symbol));
+        SET_SIZE(z->p, adjustment + len);
+        z->l += adjustment;
+        if (z->c >= c_ket) z->c += adjustment; else
+            if (z->c > c_bra) z->c = c_bra;
+    }
+    unless (s_size == 0) memmove(z->p + c_bra, s, s_size * sizeof(symbol));
+    return adjustment;
+}
+
+static void slice_check(struct SN_env * z)
+{
+    if (!(0 <= z->bra &&
+          z->bra <= z->ket &&
+          z->ket <= z->l &&
+          z->l <= SIZE(z->p)))   /* this line could be removed */
+    {
+        fprintf(stderr, "faulty slice operation:\n");
+        debug(z, -1, 0);
+        exit(1);
+    }
+}
+
+extern void slice_from_s(struct SN_env * z, int s_size, symbol * s)
+{   slice_check(z);
+    replace_s(z, z->bra, z->ket, s_size, s);
+}
+
+extern void slice_from_v(struct SN_env * z, symbol * p)
+{   slice_from_s(z, SIZE(p), p);
+}
+
+extern void slice_del(struct SN_env * z)
+{   slice_from_s(z, 0, 0);
+}
+
+extern void insert_s(struct SN_env * z, int bra, int ket, int s_size, symbol * s)
+{   int adjustment = replace_s(z, bra, ket, s_size, s);
+    if (bra <= z->bra) z->bra += adjustment;
+    if (bra <= z->ket) z->ket += adjustment;
+}
+
+extern void insert_v(struct SN_env * z, int bra, int ket, symbol * p)
+{   int adjustment = replace_s(z, bra, ket, SIZE(p), p);
+    if (bra <= z->bra) z->bra += adjustment;
+    if (bra <= z->ket) z->ket += adjustment;
+}
+
+extern symbol * slice_to(struct SN_env * z, symbol * p)
+{   slice_check(z);
+    {   int len = z->ket - z->bra;
+        if (CAPACITY(p) < len) p = increase_size(p, len);
+        memmove(p, z->p + z->bra, len * sizeof(symbol));
+        SET_SIZE(p, len);
+    }
+    return p;
+}
+
+extern symbol * assign_to(struct SN_env * z, symbol * p)
+{   int len = z->l;
+    if (CAPACITY(p) < len) p = increase_size(p, len);
+    memmove(p, z->p, len * sizeof(symbol));
+    SET_SIZE(p, len);
+    return p;
+}
+
+extern void debug(struct SN_env * z, int number, int line_count)
+{   int i;
+    int limit = SIZE(z->p);
+    /*if (number >= 0) printf("%3d (line %4d): '", number, line_count);*/
+    if (number >= 0) printf("%3d (line %4d): [%d]'", number, line_count,limit);
+    for (i = 0; i <= limit; i++)
+    {   if (z->lb == i) printf("{");
+        if (z->bra == i) printf("[");
+        if (z->c == i) printf("|");
+        if (z->ket == i) printf("]");
+        if (z->l == i) printf("}");
+        if (i < limit)
+        {   int ch = z->p[i];
+            if (ch == 0) ch = '#';
+            printf("%c", ch);
+        }
+    }
+    printf("'\n");
+}


diff --git a/contrib/tsearch2/sql/tsearch2.sql b/contrib/tsearch2/sql/tsearch2.sql

new file mode 100644 (file)

index 0000000..6ca6480


--- /dev/null
+++ b/contrib/tsearch2/sql/tsearch2.sql
@@ -0,0 +1,243 @@
+--
+-- first, define the datatype.  Turn off echoing so that expected file
+-- does not depend on contents of seg.sql.
+--
+\set ECHO none
+\i tsearch2.sql
+\set ECHO all
+
+--tsvector
+SELECT '1'::tsvector;
+SELECT '1 '::tsvector;
+SELECT ' 1'::tsvector;
+SELECT ' 1 '::tsvector;
+SELECT '1 2'::tsvector;
+SELECT '\'1 2\''::tsvector;
+SELECT '\'1 \\\'2\''::tsvector;
+SELECT '\'1 \\\'2\'3'::tsvector;
+SELECT '\'1 \\\'2\' 3'::tsvector;
+SELECT '\'1 \\\'2\' \' 3\' 4 '::tsvector;
+select '\'w\':4A,3B,2C,1D,5 a:8';
+select 'a:3A b:2a'::tsvector || 'ba:1234 a:1B';
+select setweight('w:12B w:13* w:12,5,6 a:1,3* a:3 w asd:1dc asd zxc:81,567,222A'::tsvector, 'c');
+select strip('w:12B w:13* w:12,5,6 a:1,3* a:3 w asd:1dc asd'::tsvector);
+
+
+--tsquery
+SELECT '1'::tsquery;
+SELECT '1 '::tsquery;
+SELECT ' 1'::tsquery;
+SELECT ' 1 '::tsquery;
+SELECT '\'1 2\''::tsquery;
+SELECT '\'1 \\\'2\''::tsquery;
+SELECT '!1'::tsquery;
+SELECT '1|2'::tsquery;
+SELECT '1|!2'::tsquery;
+SELECT '!1|2'::tsquery;
+SELECT '!1|!2'::tsquery;
+SELECT '!(!1|!2)'::tsquery;
+SELECT '!(!1|2)'::tsquery;
+SELECT '!(1|!2)'::tsquery;
+SELECT '!(1|2)'::tsquery;
+SELECT '1&2'::tsquery;
+SELECT '!1&2'::tsquery;
+SELECT '1&!2'::tsquery;
+SELECT '!1&!2'::tsquery;
+SELECT '(1&2)'::tsquery;
+SELECT '1&(2)'::tsquery;
+SELECT '!(1)&2'::tsquery;
+SELECT '!(1&2)'::tsquery;
+SELECT '1|2&3'::tsquery;
+SELECT '1|(2&3)'::tsquery;
+SELECT '(1|2)&3'::tsquery;
+SELECT '1|2&!3'::tsquery;
+SELECT '1|!2&3'::tsquery;
+SELECT '!1|2&3'::tsquery;
+SELECT '!1|(2&3)'::tsquery;
+SELECT '!(1|2)&3'::tsquery;
+SELECT '(!1|2)&3'::tsquery;
+SELECT '1|(2|(4|(5|6)))'::tsquery;
+SELECT '1|2|4|5|6'::tsquery;
+SELECT '1&(2&(4&(5&6)))'::tsquery;
+SELECT '1&2&4&5&6'::tsquery;
+SELECT '1&(2&(4&(5|6)))'::tsquery;
+SELECT '1&(2&(4&(5|!6)))'::tsquery;
+SELECT '1&(\'2\'&(\' 4\'&(\\|5 | \'6 \\\' !|&\')))'::tsquery;
+SELECT '\'the wether\':dc & \' sKies \':BC & a:d b:a';
+
+select lexize('simple', 'ASD56 hsdkf');
+select lexize('en_stem', 'SKIES Problems identity');
+
+select * from token_type('default');
+select * from parse('default', '345 [email protected] \' http://www.com/ http://aew.werc.ewr/?ad=qwe&dw 1aew.werc.ewr/?ad=qwe&dw 2aew.werc.ewr http://3aew.werc.ewr/?ad=qwe&dw http://4aew.werc.ewr http://5aew.werc.ewr:8100/?  ad=qwe&dw 6aew.werc.ewr:8100/?ad=qwe&dw 7aew.werc.ewr:8100/?ad=qwe&dw=%20%32 +4.0e-10 qwe qwe qwqwe 234.435 455 5.005 [email protected] qwe-wer asdf qwer jf sdjk ewr1> ewri2 ">
+/usr/local/fff /awdf/dwqe/4325 rewt/ewr wefjn /wqe-324/ewr gist.h gist.h.c gist.c. readline 4.2 4.2. 4.2, readline-4.2 readline-4.2. 234 
+ wow  < jqw <> qwerty');
+
+SELECT to_tsvector('default', '345 [email protected] \' http://www.com/ http://aew.werc.ewr/?ad=qwe&dw 1aew.werc.ewr/?ad=qwe&dw 2aew.werc.ewr http://3aew.werc.ewr/?ad=qwe&dw http://4aew.werc.ewr http://5aew.werc.ewr:8100/?  ad=qwe&dw 6aew.werc.ewr:8100/?ad=qwe&dw 7aew.werc.ewr:8100/?ad=qwe&dw=%20%32 +4.0e-10 qwe qwe qwqwe 234.435 455 5.005 [email protected] qwe-wer asdf qwer jf sdjk ewr1> ewri2 ">
+/usr/local/fff /awdf/dwqe/4325 rewt/ewr wefjn /wqe-324/ewr gist.h gist.h.c gist.c. readline 4.2 4.2. 4.2, readline-4.2 readline-4.2. 234 
+ wow  < jqw <> qwerty');
+
+SELECT length(to_tsvector('default', '345 qw'));
+
+SELECT length(to_tsvector('default', '345 [email protected] \' http://www.com/ http://aew.werc.ewr/?ad=qwe&dw 1aew.werc.ewr/?ad=qwe&dw 2aew.werc.ewr http://3aew.werc.ewr/?ad=qwe&dw http://4aew.werc.ewr http://5aew.werc.ewr:8100/?  ad=qwe&dw 6aew.werc.ewr:8100/?ad=qwe&dw 7aew.werc.ewr:8100/?ad=qwe&dw=%20%32 +4.0e-10 qwe qwe qwqwe 234.435 455 5.005 [email protected] qwe-wer asdf qwer jf sdjk ewr1> ewri2 ">
+/usr/local/fff /awdf/dwqe/4325 rewt/ewr wefjn /wqe-324/ewr gist.h gist.h.c gist.c. readline 4.2 4.2. 4.2, readline-4.2 readline-4.2. 234 
+ wow  < jqw <> qwerty'));
+
+
+select to_tsquery('default', 'qwe & sKies '); 
+select to_tsquery('simple', 'qwe & sKies '); 
+select to_tsquery('default', '\'the wether\':dc & \'           sKies \':BC ');
+select 'a b:89  ca:23A,64b d:34c'::tsvector @@ 'd:AC & ca';
+select 'a b:89  ca:23A,64b d:34c'::tsvector @@ 'd:AC & ca:B';
+select 'a b:89  ca:23A,64b d:34c'::tsvector @@ 'd:AC & ca:A';
+select 'a b:89  ca:23A,64b d:34c'::tsvector @@ 'd:AC & ca:C';
+select 'a b:89  ca:23A,64b d:34c'::tsvector @@ 'd:AC & ca:CB';
+
+CREATE TABLE test_tsvector( t text, a tsvector );
+
+\copy test_tsvector from 'data/test_tsearch.data'
+
+SELECT count(*) FROM test_tsvector WHERE a @@ 'wr|qh';
+SELECT count(*) FROM test_tsvector WHERE a @@ 'wr&qh';
+SELECT count(*) FROM test_tsvector WHERE a @@ 'eq&yt';
+SELECT count(*) FROM test_tsvector WHERE a @@ 'eq|yt';
+SELECT count(*) FROM test_tsvector WHERE a @@ '(eq&yt)|(wr&qh)';
+SELECT count(*) FROM test_tsvector WHERE a @@ '(eq|yt)&(wr|qh)';
+
+create index wowidx on test_tsvector using gist (a);
+set enable_seqscan=off;
+
+SELECT count(*) FROM test_tsvector WHERE a @@ 'wr|qh';
+SELECT count(*) FROM test_tsvector WHERE a @@ 'wr&qh';
+SELECT count(*) FROM test_tsvector WHERE a @@ 'eq&yt';
+SELECT count(*) FROM test_tsvector WHERE a @@ 'eq|yt';
+SELECT count(*) FROM test_tsvector WHERE a @@ '(eq&yt)|(wr&qh)';
+SELECT count(*) FROM test_tsvector WHERE a @@ '(eq|yt)&(wr|qh)';
+
+select set_curcfg('default');
+
+CREATE TRIGGER tsvectorupdate
+BEFORE UPDATE OR INSERT ON test_tsvector
+FOR EACH ROW EXECUTE PROCEDURE tsearch2(a, t);
+
+SELECT count(*) FROM test_tsvector WHERE a @@ to_tsquery('345&qwerty');
+
+INSERT INTO test_tsvector (t) VALUES ('345 qwerty');
+
+SELECT count(*) FROM test_tsvector WHERE a @@ to_tsquery('345&qwerty');
+
+UPDATE test_tsvector SET t = null WHERE t = '345 qwerty';
+
+SELECT count(*) FROM test_tsvector WHERE a @@ to_tsquery('345&qwerty');
+
+drop trigger tsvectorupdate on test_tsvector;
+create function wow(text) returns text as 'select $1 || \' copyright\'; ' language sql;
+create trigger tsvectorupdate before update or insert on test_tsvector
+for each row execute procedure tsearch2(a, wow, t);
+insert into test_tsvector (t) values ('345 qwerty');
+select count(*) FROM test_tsvector WHERE a @@ to_tsquery('345&qwerty');
+select count(*) FROM test_tsvector WHERE a @@ to_tsquery('copyright');
+
+select rank(' a:1 s:2C d g'::tsvector, 'a | s');
+select rank(' a:1 s:2B d g'::tsvector, 'a | s');
+select rank(' a:1 s:2 d g'::tsvector, 'a | s');
+select rank(' a:1 s:2C d g'::tsvector, 'a & s');
+select rank(' a:1 s:2B d g'::tsvector, 'a & s');
+select rank(' a:1 s:2 d g'::tsvector, 'a & s');
+
+insert into test_tsvector (t) values ('foo bar foo the over foo qq bar');
+select * from stat('select a from test_tsvector') order by ndoc desc, nentry desc, word;
+
+select reset_tsearch();
+select to_tsquery('default', 'skies & books');
+
+select rank_cd(to_tsvector('Erosion It took the sea a thousand years,
+A thousand years to trace
+The granite features of this cliff
+In crag and scarp and base.
+It took the sea an hour one night
+An hour of storm to place
+The sculpture of these granite seams,
+Upon a woman s face. E.  J.  Pratt  (1882 1964)
+'), to_tsquery('sea&thousand&years'));
+
+select rank_cd(to_tsvector('Erosion It took the sea a thousand years,
+A thousand years to trace
+The granite features of this cliff
+In crag and scarp and base.
+It took the sea an hour one night
+An hour of storm to place
+The sculpture of these granite seams,
+Upon a woman s face. E.  J.  Pratt  (1882 1964)
+'), to_tsquery('granite&sea'));
+
+select rank_cd(to_tsvector('Erosion It took the sea a thousand years,
+A thousand years to trace
+The granite features of this cliff
+In crag and scarp and base.
+It took the sea an hour one night
+An hour of storm to place
+The sculpture of these granite seams,
+Upon a woman s face. E.  J.  Pratt  (1882 1964)
+'), to_tsquery('sea'));
+
+select get_covers(to_tsvector('Erosion It took the sea a thousand years,
+A thousand years to trace
+The granite features of this cliff
+In crag and scarp and base.
+It took the sea an hour one night
+An hour of storm to place
+The sculpture of these granite seams,
+Upon a woman s face. E.  J.  Pratt  (1882 1964)
+'), to_tsquery('sea&thousand&years'));
+
+select get_covers(to_tsvector('Erosion It took the sea a thousand years,
+A thousand years to trace
+The granite features of this cliff
+In crag and scarp and base.
+It took the sea an hour one night
+An hour of storm to place
+The sculpture of these granite seams,
+Upon a woman s face. E.  J.  Pratt  (1882 1964)
+'), to_tsquery('granite&sea'));
+
+select get_covers(to_tsvector('Erosion It took the sea a thousand years,
+A thousand years to trace
+The granite features of this cliff
+In crag and scarp and base.
+It took the sea an hour one night
+An hour of storm to place
+The sculpture of these granite seams,
+Upon a woman s face. E.  J.  Pratt  (1882 1964)
+'), to_tsquery('sea'));
+
+select headline('Erosion It took the sea a thousand years,
+A thousand years to trace
+The granite features of this cliff
+In crag and scarp and base.
+It took the sea an hour one night
+An hour of storm to place
+The sculpture of these granite seams,
+Upon a woman s face. E.  J.  Pratt  (1882 1964)
+', to_tsquery('sea&thousand&years'));
+ 
+select headline('Erosion It took the sea a thousand years,
+A thousand years to trace
+The granite features of this cliff
+In crag and scarp and base.
+It took the sea an hour one night
+An hour of storm to place
+The sculpture of these granite seams,
+Upon a woman s face. E.  J.  Pratt  (1882 1964)
+', to_tsquery('granite&sea'));
+ 
+select headline('Erosion It took the sea a thousand years,
+A thousand years to trace
+The granite features of this cliff
+In crag and scarp and base.
+It took the sea an hour one night
+An hour of storm to place
+The sculpture of these granite seams,
+Upon a woman s face. E.  J.  Pratt  (1882 1964)
+', to_tsquery('sea'));
+


diff --git a/contrib/tsearch2/stopword.c b/contrib/tsearch2/stopword.c

new file mode 100644 (file)

index 0000000..7f7806f


--- /dev/null
+++ b/contrib/tsearch2/stopword.c
@@ -0,0 +1,101 @@
+/* 
+ * stopword library
+ * Teodor Sigaev 
+ */
+#include 
+#include 
+#include 
+#include 
+
+#include "postgres.h"
+#include "common.h"
+#include "dict.h"
+
+#define STOPBUFLEN 4096
+
+char*
+lowerstr(char *str) {
+   char *ptr=str;
+   while(*ptr) {
+       *ptr = tolower(*(unsigned char*)ptr);
+       ptr++;
+   }
+   return str;
+}
+
+void
+freestoplist(StopList *s) {
+   char **ptr=s->stop;
+   if ( ptr )
+       while( *ptr && s->len >0 ) {
+           free(*ptr);
+           ptr++; s->len--;
+       free(s->stop);
+   }
+   memset(s,0,sizeof(StopList));
+}
+
+void
+readstoplist(text *in, StopList *s) {
+   char **stop=NULL;
+   s->len=0;
+   if ( in && VARSIZE(in) - VARHDRSZ > 0 ) {
+       char *filename=text2char(in);
+       FILE    *hin=NULL;
+       char    buf[STOPBUFLEN];
+       int reallen=0;
+
+       if ( (hin=fopen(filename,"r")) == NULL )
+           elog(ERROR,"Can't open file '%s': %s", filename, strerror(errno));
+       while( fgets(buf,STOPBUFLEN,hin) ) {
+           buf[strlen(buf)-1] = '\0';
+           if ( *buf=='\0' ) continue;
+
+           if ( s->len>= reallen ) {
+               char **tmp;
+               reallen=(reallen) ? reallen*2 : 16;
+               tmp=(char**)realloc((void*)stop, sizeof(char*)*reallen);
+               if (!tmp) {
+                   freestoplist(s);
+                   fclose(hin); 
+                   elog(ERROR,"Not enough memory");
+               }
+               stop=tmp;
+           }
+    
+           stop[s->len]=strdup(buf);
+           if ( !stop[s->len] ) {
+               freestoplist(s);
+               fclose(hin); 
+               elog(ERROR,"Not enough memory");
+           }
+           if ( s->wordop ) 
+               stop[s->len]=(s->wordop)(stop[s->len]);
+
+           (s->len)++; 
+       }
+       fclose(hin);
+       pfree(filename); 
+   }
+   s->stop=stop;
+} 
+
+static int
+comparestr(const void *a, const void *b) {
+   return strcmp( *(char**)a, *(char**)b );
+}
+
+void
+sortstoplist(StopList *s) {
+   if (s->stop && s->len>0)
+       qsort(s->stop, s->len, sizeof(char*), comparestr);
+}
+
+bool
+searchstoplist(StopList *s, char *key) {
+   if ( s->wordop ) 
+       key=(*(s->wordop))(key);
+   return ( s->stop && s->len>0 && bsearch(&key, s->stop, s->len, sizeof(char*), comparestr) ) ? true : false;
+}
+
+


diff --git a/contrib/tsearch2/stopword/english.stop b/contrib/tsearch2/stopword/english.stop

new file mode 100644 (file)

index 0000000..a913011


--- /dev/null
+++ b/contrib/tsearch2/stopword/english.stop
@@ -0,0 +1,128 @@
+i
+me
+my
+myself
+we
+our
+ours
+ourselves
+you
+your
+yours
+yourself
+yourselves
+he
+him
+his
+himself
+she
+her
+hers
+herself
+it
+its
+itself
+they
+them
+their
+theirs
+themselves
+what
+which
+who
+whom
+this
+that
+these
+those
+am
+is
+are
+was
+were
+be
+been
+being
+have
+has
+had
+having
+do
+does
+did
+doing
+a
+an
+the
+and
+but
+if
+or
+because
+as
+until
+while
+of
+at
+by
+for
+with
+about
+against
+between
+into
+through
+during
+before
+after
+above
+below
+to
+from
+up
+down
+in
+out
+on
+off
+over
+under
+again
+further
+then
+once
+here
+there
+when
+where
+why
+how
+all
+any
+both
+each
+few
+more
+most
+other
+some
+such
+no
+nor
+not
+only
+own
+same
+so
+than
+too
+very
+s
+t
+can
+will
+just
+don
+should
+now
+


diff --git a/contrib/tsearch2/stopword/russian.stop b/contrib/tsearch2/stopword/russian.stop

new file mode 100644 (file)

index 0000000..1877e3a


--- /dev/null
+++ b/contrib/tsearch2/stopword/russian.stop
@@ -0,0 +1,151 @@
+É
+×
+×Ï
+ÎÅ
+ÞÔÏ
+ÏÎ
+ÎÁ
+Ñ
+Ó
+ÓÏ
+ËÁË
+Á
+ÔÏ
+×ÓÅ
+ÏÎÁ
+ÔÁË
+ÅÇÏ
+ÎÏ
+ÄÁ
+ÔÙ
+Ë
+Õ
+ÖÅ
+×Ù
+ÚÁ
+ÂÙ
+ÐÏ
+ÔÏÌØËÏ
+ÅÅ
+ÍÎÅ
+ÂÙÌÏ
+×ÏÔ
+ÏÔ
+ÍÅÎÑ
+ÅÝÅ
+ÎÅÔ
+Ï
+ÉÚ
+ÅÍÕ
+ÔÅÐÅÒØ
+ËÏÇÄÁ
+ÄÁÖÅ
+ÎÕ
+×ÄÒÕÇ
+ÌÉ
+ÅÓÌÉ
+ÕÖÅ
+ÉÌÉ
+ÎÉ
+ÂÙÔØ
+ÂÙÌ
+ÎÅÇÏ
+ÄÏ
+×ÁÓ
+ÎÉÂÕÄØ
+ÏÐÑÔØ
+ÕÖ
+×ÁÍ
+×ÅÄØ
+ÔÁÍ
+ÐÏÔÏÍ
+ÓÅÂÑ
+ÎÉÞÅÇÏ
+ÅÊ
+ÍÏÖÅÔ
+ÏÎÉ
+ÔÕÔ
+ÇÄÅ
+ÅÓÔØ
+ÎÁÄÏ
+ÎÅÊ
+ÄÌÑ
+ÍÙ
+ÔÅÂÑ
+ÉÈ
+ÞÅÍ
+ÂÙÌÁ
+ÓÁÍ
+ÞÔÏÂ
+ÂÅÚ
+ÂÕÄÔÏ
+ÞÅÇÏ
+ÒÁÚ
+ÔÏÖÅ
+ÓÅÂÅ
+ÐÏÄ
+ÂÕÄÅÔ
+Ö
+ÔÏÇÄÁ
+ËÔÏ
+ÜÔÏÔ
+ÔÏÇÏ
+ÐÏÔÏÍÕ
+ÜÔÏÇÏ
+ËÁËÏÊ
+ÓÏ×ÓÅÍ
+ÎÉÍ
+ÚÄÅÓØ
+ÜÔÏÍ
+ÏÄÉÎ
+ÐÏÞÔÉ
+ÍÏÊ
+ÔÅÍ
+ÞÔÏÂÙ
+ÎÅÅ
+ÓÅÊÞÁÓ
+ÂÙÌÉ
+ËÕÄÁ
+ÚÁÞÅÍ
+×ÓÅÈ
+ÎÉËÏÇÄÁ
+ÍÏÖÎÏ
+ÐÒÉ
+ÎÁËÏÎÅÃ
+Ä×Á
+ÏÂ
+ÄÒÕÇÏÊ
+ÈÏÔØ
+ÐÏÓÌÅ
+ÎÁÄ
+ÂÏÌØÛÅ
+ÔÏÔ
+ÞÅÒÅÚ
+ÜÔÉ
+ÎÁÓ
+ÐÒÏ
+×ÓÅÇÏ
+ÎÉÈ
+ËÁËÁÑ
+ÍÎÏÇÏ
+ÒÁÚ×Å
+ÔÒÉ
+ÜÔÕ
+ÍÏÑ
+×ÐÒÏÞÅÍ
+ÈÏÒÏÛÏ
+Ó×ÏÀ
+ÜÔÏÊ
+ÐÅÒÅÄ
+ÉÎÏÇÄÁ
+ÌÕÞÛÅ
+ÞÕÔØ
+ÔÏÍ
+ÎÅÌØÚÑ
+ÔÁËÏÊ
+ÉÍ
+ÂÏÌÅÅ
+×ÓÅÇÄÁ
+ËÏÎÅÞÎÏ
+×ÓÀ
+ÍÅÖÄÕ


diff --git a/contrib/tsearch2/ts_cfg.c b/contrib/tsearch2/ts_cfg.c

new file mode 100644 (file)

index 0000000..7c9f20c


--- /dev/null
+++ b/contrib/tsearch2/ts_cfg.c
@@ -0,0 +1,509 @@
+/* 
+ * interface functions to tscfg 
+ * Teodor Sigaev 
+ */
+#include 
+#include 
+#include 
+#include 
+#include 
+
+#include "postgres.h"
+#include "fmgr.h"
+#include "utils/array.h"
+#include "catalog/pg_type.h"
+#include "executor/spi.h"
+
+#include "ts_cfg.h"
+#include "dict.h"
+#include "wparser.h"
+#include "snmap.h"
+#include "common.h"
+#include "tsvector.h"
+
+/*********top interface**********/
+
+static void *plan_getcfg_bylocale=NULL;
+static void *plan_getcfg=NULL;
+static void *plan_getmap=NULL;
+static void *plan_name2id=NULL;
+static Oid current_cfg_id=0;
+
+void
+init_cfg(Oid id, TSCfgInfo *cfg) {
+   Oid arg[2]={ OIDOID, OIDOID };
+   bool isnull;
+   Datum pars[2]={ ObjectIdGetDatum(id), ObjectIdGetDatum(id) } ;
+   int stat,i,j;
+   text *ptr;
+   text *prsname=NULL;
+   MemoryContext   oldcontext;
+
+   memset(cfg,0,sizeof(TSCfgInfo));
+   SPI_connect();
+   if ( !plan_getcfg ) {
+       plan_getcfg = SPI_saveplan( SPI_prepare( "select prs_name from pg_ts_cfg where oid = $1" , 1, arg ) );
+       if ( !plan_getcfg ) 
+           ts_error(ERROR, "SPI_prepare() failed");
+   }
+
+   stat = SPI_execp(plan_getcfg, pars, " ", 1);
+   if ( stat < 0 )
+       ts_error (ERROR, "SPI_execp return %d", stat);
+   if ( SPI_processed > 0 ) {
+       prsname = (text*) DatumGetPointer( 
+           SPI_getbinval(SPI_tuptable->vals[0], SPI_tuptable->tupdesc, 1, &isnull) 
+       );
+       oldcontext = MemoryContextSwitchTo(TopMemoryContext);
+       prsname = ptextdup( prsname );
+       MemoryContextSwitchTo(oldcontext);
+       
+       cfg->id=id;
+   } else 
+       ts_error(ERROR, "No tsearch cfg with id %d", id);
+
+   arg[0]=TEXTOID;
+   if ( !plan_getmap ) {
+       plan_getmap = SPI_saveplan( SPI_prepare( "select lt.tokid, pg_ts_cfgmap.dict_name from pg_ts_cfgmap, pg_ts_cfg, token_type( $1 ) as lt where lt.alias = pg_ts_cfgmap.tok_alias and pg_ts_cfgmap.ts_name = pg_ts_cfg.ts_name and pg_ts_cfg.oid= $2 order by lt.tokid desc;" , 2, arg ) );
+       if ( !plan_getmap )
+           ts_error(ERROR, "SPI_prepare() failed");
+   }
+
+   pars[0]=PointerGetDatum( prsname );
+   stat = SPI_execp(plan_getmap, pars, " ", 0);
+   if ( stat < 0 )
+       ts_error (ERROR, "SPI_execp return %d", stat);
+   if ( SPI_processed <= 0 )
+       ts_error(ERROR, "No parser with id %d", id);
+
+   for(i=0;i
+       int lexid = DatumGetInt32(SPI_getbinval(SPI_tuptable->vals[i], SPI_tuptable->tupdesc, 1, &isnull));
+       ArrayType *toasted_a = (ArrayType*)PointerGetDatum(SPI_getbinval(SPI_tuptable->vals[i], SPI_tuptable->tupdesc, 2, &isnull));
+       ArrayType *a;
+
+       if ( !cfg->map ) {
+           cfg->len=lexid+1;
+           cfg->map = (ListDictionary*)malloc( sizeof(ListDictionary)*cfg->len );
+           if ( !cfg->map )
+               ts_error(ERROR,"No memory");
+           memset( cfg->map, 0, sizeof(ListDictionary)*cfg->len );
+       }
+
+       if (isnull)
+           continue;
+
+       a=(ArrayType*)PointerGetDatum( PG_DETOAST_DATUM( DatumGetPointer(toasted_a) ) );
+       
+       if ( ARR_NDIM(a) != 1 )
+           ts_error(ERROR,"Wrong dimension");
+       if ( ARRNELEMS(a) < 1 )
+           continue;
+
+       cfg->map[lexid].len=ARRNELEMS(a);
+       cfg->map[lexid].dict_id=(Datum*)malloc( sizeof(Datum)*cfg->map[lexid].len );
+       memset(cfg->map[lexid].dict_id,0,sizeof(Datum)*cfg->map[lexid].len );
+       ptr=(text*)ARR_DATA_PTR(a);
+       oldcontext = MemoryContextSwitchTo(TopMemoryContext);
+       for(j=0;jmap[lexid].len;j++) {
+           cfg->map[lexid].dict_id[j] = PointerGetDatum(ptextdup(ptr));
+           ptr=NEXTVAL(ptr);
+       } 
+       MemoryContextSwitchTo(oldcontext);
+
+       if ( a != toasted_a ) 
+           pfree(a);
+   }
+   
+   SPI_finish();
+   cfg->prs_id = name2id_prs( prsname );
+   pfree(prsname);
+   for(i=0;ilen;i++) {
+       for(j=0;jmap[i].len;j++) {
+           ptr = (text*)DatumGetPointer( cfg->map[i].dict_id[j] );
+           cfg->map[i].dict_id[j] = ObjectIdGetDatum( name2id_dict(ptr) );
+           pfree(ptr);
+       }
+   }
+}
+
+typedef struct {
+   TSCfgInfo   *last_cfg;
+   int     len;
+   int     reallen;
+   TSCfgInfo   *list;
+   SNMap       name2id_map;
+} CFGList;
+
+static CFGList CList = {NULL,0,0,NULL,{0,0,NULL}};
+
+void
+reset_cfg(void) {
+        freeSNMap( &(CList.name2id_map) );
+        if ( CList.list ) {
+       int i,j;
+       for(i=0;i
+           if ( CList.list[i].map ) {
+               for(j=0;j
+                   if ( CList.list[i].map[j].dict_id )
+                       free(CList.list[i].map[j].dict_id);
+               free( CList.list[i].map );
+           }
+                free(CList.list);
+   }
+        memset(&CList,0,sizeof(CFGList));
+}
+
+static int
+comparecfg(const void *a, const void *b) {
+   return ((TSCfgInfo*)a)->id - ((TSCfgInfo*)b)->id;
+}
+
+TSCfgInfo *
+findcfg(Oid id) {
+   /* last used cfg */
+   if ( CList.last_cfg && CList.last_cfg->id==id )
+       return CList.last_cfg;
+
+   /* already used cfg */
+   if ( CList.len != 0 ) {
+       TSCfgInfo key;
+       key.id=id;
+       CList.last_cfg = bsearch(&key, CList.list, CList.len, sizeof(TSCfgInfo), comparecfg);
+       if ( CList.last_cfg != NULL )
+           return CList.last_cfg;
+   }
+
+   /* last chance */
+   if ( CList.len==CList.reallen ) {
+       TSCfgInfo *tmp;
+       int reallen = ( CList.reallen ) ? 2*CList.reallen : 16;
+       tmp=(TSCfgInfo*)realloc(CList.list,sizeof(TSCfgInfo)*reallen);
+       if ( !tmp ) 
+           ts_error(ERROR,"No memory");
+       CList.reallen=reallen;
+       CList.list=tmp;
+   }
+   CList.last_cfg=&(CList.list[CList.len]);
+   init_cfg(id, CList.last_cfg);
+   CList.len++;
+   qsort(CList.list, CList.len, sizeof(TSCfgInfo), comparecfg);
+   return findcfg(id); /* qsort changed order!! */;
+}
+
+
+Oid
+name2id_cfg(text *name) {
+   Oid arg[1]={ TEXTOID };
+   bool isnull;
+   Datum pars[1]={ PointerGetDatum(name) };
+   int stat;
+   Oid id=findSNMap_t( &(CList.name2id_map), name );
+   
+   if ( id ) 
+       return id;
+   
+   SPI_connect();
+   if ( !plan_name2id ) {
+       plan_name2id = SPI_saveplan( SPI_prepare( "select oid from pg_ts_cfg where ts_name = $1" , 1, arg ) );
+       if ( !plan_name2id ) 
+           elog(ERROR, "SPI_prepare() failed");
+   }
+
+   stat = SPI_execp(plan_name2id, pars, " ", 1);
+   if ( stat < 0 )
+       elog (ERROR, "SPI_execp return %d", stat);
+   if ( SPI_processed > 0 ) {
+       id=DatumGetObjectId( SPI_getbinval(SPI_tuptable->vals[0], SPI_tuptable->tupdesc, 1, &isnull) );
+       if ( isnull ) 
+           elog(ERROR, "Null id for tsearch config");
+   } else 
+       elog(ERROR, "No tsearch config");
+   SPI_finish();
+   addSNMap_t( &(CList.name2id_map), name, id );
+   return id;
+}
+
+
+void 
+parsetext_v2(TSCfgInfo *cfg, PRSTEXT * prs, char *buf, int4 buflen) {
+   int type, lenlemm, i;
+   char    *lemm=NULL;
+   WParserInfo *prsobj = findprs(cfg->prs_id);
+
+   prsobj->prs=(void*)DatumGetPointer(
+       FunctionCall2(
+           &(prsobj->start_info),
+           PointerGetDatum(buf),
+           Int32GetDatum(buflen)
+       )
+   );
+
+   while( ( type=DatumGetInt32(FunctionCall3(
+           &(prsobj->getlexeme_info),
+           PointerGetDatum(prsobj->prs),
+           PointerGetDatum(&lemm),
+           PointerGetDatum(&lenlemm))) ) != 0 ) {
+
+       if ( lenlemm >= MAXSTRLEN )
+           elog(ERROR, "Word is too long");
+
+
+       if ( type >= cfg->len ) /* skip this type of lexem */
+           continue; 
+
+       for(i=0;imap[type].len;i++) {
+           DictInfo    *dict=finddict( DatumGetObjectId(cfg->map[type].dict_id[i]) );
+           char    **norms, **ptr;
+   
+           norms = ptr = (char**)DatumGetPointer(
+               FunctionCall3(
+                   &(dict->lexize_info),
+                   PointerGetDatum(dict->dictionary),
+                   PointerGetDatum(lemm),
+                   PointerGetDatum(lenlemm)
+               )
+           );
+           if ( !norms ) /* dictionary doesn't know this lexem */
+               continue;
+
+           prs->pos++; /*set pos*/
+
+           while( *ptr ) {
+               if (prs->curwords == prs->lenwords) {
+                   prs->lenwords *= 2;
+                   prs->words = (WORD *) repalloc((void *) prs->words, prs->lenwords * sizeof(WORD));
+               }
+
+               prs->words[prs->curwords].len = strlen(*ptr);
+               prs->words[prs->curwords].word = *ptr;
+               prs->words[prs->curwords].alen = 0;
+               prs->words[prs->curwords].pos.pos = LIMITPOS(prs->pos);
+               ptr++;
+               prs->curwords++;
+           }
+           pfree(norms);
+           break; /* lexem already normalized or is stop word*/
+       }
+   }
+
+   FunctionCall1(
+       &(prsobj->end_info),
+       PointerGetDatum(prsobj->prs)
+   );
+}
+
+static void
+hladdword(HLPRSTEXT * prs, char *buf, int4 buflen, int type) {
+   while (prs->curwords >= prs->lenwords) {
+       prs->lenwords *= 2;
+       prs->words = (HLWORD *) repalloc((void *) prs->words, prs->lenwords * sizeof(HLWORD));
+   }
+   memset( &(prs->words[prs->curwords]), 0, sizeof(HLWORD) ); 
+   prs->words[prs->curwords].type = (uint8)type;
+   prs->words[prs->curwords].len = buflen; 
+   prs->words[prs->curwords].word = palloc(buflen);
+   memcpy(prs->words[prs->curwords].word, buf, buflen);
+   prs->curwords++;    
+}
+
+static void
+hlfinditem(HLPRSTEXT * prs, QUERYTYPE *query, char *buf, int buflen ) {
+   int i;
+   ITEM    *item=GETQUERY(query);
+   HLWORD  *word=&( prs->words[prs->curwords-1] );
+
+   while (prs->curwords + query->size >= prs->lenwords) {
+       prs->lenwords *= 2;
+       prs->words = (HLWORD *) repalloc((void *) prs->words, prs->lenwords * sizeof(HLWORD));
+   }
+
+   for(i=0; isize; i++) { 
+       if ( item->type == VAL && item->length == buflen && strncmp( GETOPERAND(query) + item->distance, buf, buflen )==0 ) {
+           if ( word->item ) {
+               memcpy( &(prs->words[prs->curwords]), word, sizeof(HLWORD) );
+               prs->words[prs->curwords].item=item;
+               prs->words[prs->curwords].repeated=1;
+               prs->curwords++;
+           } else 
+               word->item=item;    
+       }
+       item++;
+   }
+}
+
+void 
+hlparsetext(TSCfgInfo *cfg, HLPRSTEXT * prs, QUERYTYPE *query, char *buf, int4 buflen) {
+   int type, lenlemm, i;
+   char    *lemm=NULL;
+   WParserInfo *prsobj = findprs(cfg->prs_id);
+
+   prsobj->prs=(void*)DatumGetPointer(
+       FunctionCall2(
+           &(prsobj->start_info),
+           PointerGetDatum(buf),
+           Int32GetDatum(buflen)
+       )
+   );
+
+   while( ( type=DatumGetInt32(FunctionCall3(
+           &(prsobj->getlexeme_info),
+           PointerGetDatum(prsobj->prs),
+           PointerGetDatum(&lemm),
+           PointerGetDatum(&lenlemm))) ) != 0 ) {
+
+       if ( lenlemm >= MAXSTRLEN )
+           elog(ERROR, "Word is too long");
+
+       hladdword(prs,lemm,lenlemm,type);
+
+       if ( type >= cfg->len ) 
+           continue; 
+
+       for(i=0;imap[type].len;i++) {
+           DictInfo    *dict=finddict( DatumGetObjectId(cfg->map[type].dict_id[i]) );
+           char    **norms, **ptr;
+   
+           norms = ptr = (char**)DatumGetPointer(
+               FunctionCall3(
+                   &(dict->lexize_info),
+                   PointerGetDatum(dict->dictionary),
+                   PointerGetDatum(lemm),
+                   PointerGetDatum(lenlemm)
+               )
+           );
+           if ( !norms ) /* dictionary doesn't know this lexem */
+               continue;
+
+           while( *ptr ) {
+               hlfinditem(prs,query,*ptr,strlen(*ptr));
+               pfree(*ptr);
+               ptr++;
+           }
+           pfree(norms);
+           break; /* lexem already normalized or is stop word*/
+       }
+   }
+
+   FunctionCall1(
+       &(prsobj->end_info),
+       PointerGetDatum(prsobj->prs)
+   );
+}
+
+text* 
+genhl(HLPRSTEXT * prs) {
+   text *out;
+   int len=128;
+   char *ptr;
+   HLWORD  *wrd=prs->words;
+
+   out = (text*)palloc( len );
+   ptr=((char*)out) + VARHDRSZ;
+
+   while( wrd - prs->words < prs->curwords ) {
+       while (  wrd->len + prs->stopsellen + prs->startsellen + (ptr - ((char*)out)) >= len ) {
+           int dist = ptr - ((char*)out);
+           len*= 2;
+           out = (text *) repalloc(out, len);
+           ptr=((char*)out) + dist;
+       }
+
+       if ( wrd->in && !wrd->skip && !wrd->repeated ) {
+           if ( wrd->replace ) {
+               *ptr=' ';
+               ptr++;
+           } else {
+               if (wrd->selected) {
+                   memcpy(ptr,prs->startsel,prs->startsellen);
+                   ptr+=prs->startsellen;
+               }
+               memcpy(ptr,wrd->word,wrd->len);
+               ptr+=wrd->len;
+               if (wrd->selected) {
+                   memcpy(ptr,prs->stopsel,prs->stopsellen);
+                   ptr+=prs->stopsellen;
+               }
+           }
+       }
+
+       if ( !wrd->repeated )
+           pfree(wrd->word);
+
+       wrd++;
+   }
+
+   VARATT_SIZEP(out)=ptr - ((char*)out);
+   return out; 
+}
+
+int  
+get_currcfg(void) {
+   Oid arg[1]={ TEXTOID };
+   const char *curlocale;
+   Datum pars[1];
+   bool isnull;
+   int stat;
+
+   if ( current_cfg_id > 0 )
+       return current_cfg_id;
+
+   SPI_connect();
+   if ( !plan_getcfg_bylocale ) {
+       plan_getcfg_bylocale=SPI_saveplan( SPI_prepare( "select oid from pg_ts_cfg where locale = $1 ", 1, arg ) );
+       if ( !plan_getcfg_bylocale )
+           elog(ERROR, "SPI_prepare() failed");
+   }
+
+   curlocale = setlocale(LC_CTYPE, NULL);
+   pars[0] = PointerGetDatum( char2text((char*)curlocale) );
+   stat = SPI_execp(plan_getcfg_bylocale, pars, " ", 1);
+
+   if ( stat < 0 )
+       elog (ERROR, "SPI_execp return %d", stat);
+   if ( SPI_processed > 0 )
+       current_cfg_id = DatumGetObjectId( SPI_getbinval(SPI_tuptable->vals[0], SPI_tuptable->tupdesc, 1, &isnull) );
+   else 
+       elog(ERROR,"Can't find tsearch config by locale");
+
+   pfree(DatumGetPointer(pars[0]));
+   SPI_finish();
+   return current_cfg_id;
+}
+
+PG_FUNCTION_INFO_V1(set_curcfg);
+Datum set_curcfg(PG_FUNCTION_ARGS);
+Datum
+set_curcfg(PG_FUNCTION_ARGS) {
+        findcfg(PG_GETARG_OID(0));
+        current_cfg_id=PG_GETARG_OID(0);
+        PG_RETURN_VOID();
+}
+                
+PG_FUNCTION_INFO_V1(set_curcfg_byname);
+Datum set_curcfg_byname(PG_FUNCTION_ARGS);
+Datum
+set_curcfg_byname(PG_FUNCTION_ARGS) {
+        text *name=PG_GETARG_TEXT_P(0);
+   
+        DirectFunctionCall1(
+                set_curcfg,
+                ObjectIdGetDatum( name2id_cfg(name) )
+        );
+        PG_FREE_IF_COPY(name, 0);
+        PG_RETURN_VOID();      
+}       
+
+PG_FUNCTION_INFO_V1(show_curcfg);
+Datum show_curcfg(PG_FUNCTION_ARGS);
+Datum
+show_curcfg(PG_FUNCTION_ARGS) {
+   PG_RETURN_OID( get_currcfg() ); 
+}
+
+PG_FUNCTION_INFO_V1(reset_tsearch);
+Datum reset_tsearch(PG_FUNCTION_ARGS);
+Datum
+reset_tsearch(PG_FUNCTION_ARGS) {
+   ts_error(NOTICE,"TSearch cache cleaned");
+   PG_RETURN_VOID(); 
+}


diff --git a/contrib/tsearch2/ts_cfg.h b/contrib/tsearch2/ts_cfg.h

new file mode 100644 (file)

index 0000000..01006c1


--- /dev/null
+++ b/contrib/tsearch2/ts_cfg.h
@@ -0,0 +1,68 @@
+#ifndef __TS_CFG_H__
+#define __TS_CFG_H__
+#include "postgres.h"
+#include "query.h"
+
+typedef struct {
+   int len;
+   Datum   *dict_id;
+} ListDictionary;
+
+typedef struct {
+   Oid id;
+   Oid prs_id;
+   int len;
+   ListDictionary  *map;   
+}  TSCfgInfo;
+
+Oid name2id_cfg(text *name);
+TSCfgInfo * findcfg(Oid id);
+void init_cfg(Oid id, TSCfgInfo *cfg);
+void reset_cfg(void);
+
+typedef struct {
+        uint16          len;
+   union {
+       uint16      pos;
+       uint16      *apos;
+   } pos;
+        char       *word;
+   uint32  alen;
+}       WORD;
+   
+typedef struct {
+        WORD       *words;
+        int4            lenwords;
+        int4            curwords;
+   int4        pos;
+}       PRSTEXT;
+
+typedef struct {
+        uint16    len;
+   uint8    selected:1,
+         in:1,
+         skip:1,
+         replace:1,
+         repeated:1;
+   uint8   type;
+        char      *word;
+   ITEM      *item;
+}       HLWORD;
+   
+typedef struct {
+        HLWORD       *words;
+        int4            lenwords;
+        int4            curwords;
+        char           *startsel;
+        char            *stopsel;
+        int2            startsellen;
+        int2            stopsellen;
+}       HLPRSTEXT;
+
+void hlparsetext(TSCfgInfo *cfg, HLPRSTEXT * prs, QUERYTYPE *query, char *buf, int4 buflen);
+text* genhl(HLPRSTEXT * prs);
+
+void parsetext_v2(TSCfgInfo *cfg, PRSTEXT * prs, char *buf, int4 buflen);
+int  get_currcfg(void);
+
+#endif


diff --git a/contrib/tsearch2/ts_stat.c b/contrib/tsearch2/ts_stat.c

new file mode 100644 (file)

index 0000000..9099981


--- /dev/null
+++ b/contrib/tsearch2/ts_stat.c
@@ -0,0 +1,412 @@
+/*
+ * stat functions
+ */
+
+#include "tsvector.h"
+#include "ts_stat.h"
+#include "funcapi.h"
+#include "catalog/pg_type.h"
+#include "executor/spi.h"
+#include "common.h"
+
+PG_FUNCTION_INFO_V1(tsstat_in);
+Datum           tsstat_in(PG_FUNCTION_ARGS);
+Datum           
+tsstat_in(PG_FUNCTION_ARGS) {
+   tsstat *stat=palloc(STATHDRSIZE);
+   stat->len=STATHDRSIZE;
+   stat->size=0;
+   PG_RETURN_POINTER(stat);
+}
+
+PG_FUNCTION_INFO_V1(tsstat_out);
+Datum           tsstat_out(PG_FUNCTION_ARGS);
+Datum           
+tsstat_out(PG_FUNCTION_ARGS) {
+   elog(ERROR,"Unimplemented");
+   PG_RETURN_NULL();
+}
+
+static WordEntry**
+SEI_realloc( WordEntry** in, uint32 *len ) {
+   if ( *len==0 || in==NULL ) {
+       *len=8;
+       in=palloc( sizeof(WordEntry*)* (*len) );
+   } else {
+       *len *= 2;
+       in=repalloc( in, sizeof(WordEntry*)* (*len) );
+   }
+   return in;
+}
+
+static int
+compareStatWord(StatEntry *a, WordEntry *b, tsstat *stat, tsvector *txt) {
+   if ( a->len == b->len ) 
+       return strncmp(
+           STATSTRPTR(stat) + a->pos,
+           STRPTR(txt) + b->pos,
+           a->len
+       );
+   return ( a->len > b->len ) ? 1 : -1;
+}
+
+static tsstat*
+formstat(tsstat *stat, tsvector *txt, WordEntry** entry, uint32 len) {
+   tsstat  *newstat;
+   uint32 totallen, nentry;
+   uint32  slen=0;
+   WordEntry   **ptr=entry;
+   char    *curptr;
+   StatEntry   *sptr,*nptr;
+
+   while(ptr-entry
+       slen += (*ptr)->len;
+       ptr++;
+   }
+
+   nentry=stat->size + len;
+   slen+=STATSTRSIZE(stat);
+   totallen=CALCSTATSIZE(nentry,slen);
+   newstat=palloc(totallen);
+   newstat->len=totallen;
+   newstat->size=nentry;
+
+   memcpy(STATSTRPTR(newstat), STATSTRPTR(stat), STATSTRSIZE(stat));
+   curptr=STATSTRPTR(newstat) + STATSTRSIZE(stat);
+
+   ptr=entry;
+   sptr=STATPTR(stat);
+   nptr=STATPTR(newstat);
+
+   if ( len == 1 ) {
+       StatEntry *StopLow = STATPTR(stat);
+       StatEntry *StopHigh = (StatEntry*)STATSTRPTR(stat);
+
+       while (StopLow < StopHigh) {
+           sptr=StopLow + (StopHigh - StopLow) / 2;
+           if ( compareStatWord(sptr,*ptr,stat,txt) < 0 )
+               StopLow = sptr + 1;
+           else
+               StopHigh = sptr; 
+       }
+       nptr =STATPTR(newstat) + (StopLow-STATPTR(stat));
+       memcpy( STATPTR(newstat), STATPTR(stat), sizeof(StatEntry) * (StopLow-STATPTR(stat)) );
+       nptr->nentry=POSDATALEN(txt,*ptr);
+       if ( nptr->nentry==0 )
+               nptr->nentry=1; 
+       nptr->ndoc=1;
+       nptr->len=(*ptr)->len;
+       memcpy(curptr, STRPTR(txt) + (*ptr)->pos, nptr->len);
+       nptr->pos = curptr - STATSTRPTR(newstat);
+       memcpy( nptr+1, StopLow, sizeof(StatEntry) * ( ((StatEntry*)STATSTRPTR(stat))-StopLow ) );
+   } else {
+       while( sptr-STATPTR(stat) < stat->size && ptr-entry
+           if ( compareStatWord(sptr,*ptr,stat,txt) < 0 ) {
+               memcpy(nptr, sptr, sizeof(StatEntry));
+               sptr++;
+           } else {
+               nptr->nentry=POSDATALEN(txt,*ptr);
+               if ( nptr->nentry==0 )
+                   nptr->nentry=1; 
+               nptr->ndoc=1;
+               nptr->len=(*ptr)->len;
+               memcpy(curptr, STRPTR(txt) + (*ptr)->pos, nptr->len);
+               nptr->pos = curptr - STATSTRPTR(newstat);
+               curptr += nptr->len;
+               ptr++;
+           }
+           nptr++;
+       }
+
+       memcpy( nptr, sptr, sizeof(StatEntry)*( stat->size - (sptr-STATPTR(stat)) ) ); 
+       
+       while(ptr-entry
+           nptr->nentry=POSDATALEN(txt,*ptr);
+           if ( nptr->nentry==0 )
+               nptr->nentry=1; 
+           nptr->ndoc=1;
+           nptr->len=(*ptr)->len;
+           memcpy(curptr, STRPTR(txt) + (*ptr)->pos, nptr->len);
+           nptr->pos = curptr - STATSTRPTR(newstat);
+           curptr += nptr->len;
+           ptr++; nptr++;
+       }
+   }
+
+   return newstat;
+} 
+
+PG_FUNCTION_INFO_V1(ts_accum);
+Datum           ts_accum(PG_FUNCTION_ARGS);
+Datum 
+ts_accum(PG_FUNCTION_ARGS) {
+   tsstat *newstat,*stat= (tsstat*)PG_GETARG_POINTER(0);
+   tsvector  *txt = (tsvector *) PG_DETOAST_DATUM(PG_GETARG_DATUM(1));
+   WordEntry   **newentry=NULL;
+   uint32  len=0, cur=0;
+   StatEntry   *sptr;
+   WordEntry   *wptr;
+
+   if ( stat==NULL || PG_ARGISNULL(0) ) { /* Init in first */ 
+       stat=palloc(STATHDRSIZE);
+       stat->len=STATHDRSIZE;
+       stat->size=0;
+   }
+
+   /* simple check of correctness */
+   if ( txt==NULL || PG_ARGISNULL(1) || txt->size==0 ) {
+       PG_FREE_IF_COPY(txt,1); 
+       PG_RETURN_POINTER(stat);
+   }
+
+   sptr=STATPTR(stat);
+   wptr=ARRPTR(txt);
+
+   if ( stat->size < 100*txt->size ) { /* merge */
+       while( sptr-STATPTR(stat) < stat->size && wptr-ARRPTR(txt) < txt->size ) {
+           int cmp = compareStatWord(sptr,wptr,stat,txt);
+           if ( cmp<0 ) {
+               sptr++;
+           } else if ( cmp==0 ) {
+               int n=POSDATALEN(txt,wptr);
+   
+               if (n==0) n=1;
+               sptr->ndoc++;
+               sptr->nentry +=n ;
+               sptr++; wptr++;
+           } else {
+               if ( cur==len )
+                   newentry=SEI_realloc(newentry, &len);
+               newentry[cur]=wptr;
+               wptr++; cur++;
+           }
+       }
+
+       while( wptr-ARRPTR(txt) < txt->size ) {
+           if ( cur==len )
+               newentry=SEI_realloc(newentry, &len);
+           newentry[cur]=wptr;
+           wptr++; cur++;
+       }
+   } else { /* search */
+       while( wptr-ARRPTR(txt) < txt->size ) {
+           StatEntry *StopLow = STATPTR(stat);
+           StatEntry *StopHigh = (StatEntry*)STATSTRPTR(stat);
+           int cmp;
+
+           while (StopLow < StopHigh) {
+               sptr=StopLow + (StopHigh - StopLow) / 2;
+               cmp =  compareStatWord(sptr,wptr,stat,txt);
+               if (cmp==0) {
+                   int n=POSDATALEN(txt,wptr);
+                   if (n==0) n=1;
+                   sptr->ndoc++;
+                   sptr->nentry +=n ;
+                   break;
+               } else if ( cmp < 0 )
+                   StopLow = sptr + 1;
+               else
+                   StopHigh = sptr; 
+           }
+       
+           if ( StopLow >= StopHigh ) { /* not found */
+               if ( cur==len )
+                   newentry=SEI_realloc(newentry, &len);
+               newentry[cur]=wptr;
+               cur++;
+           }
+           wptr++;
+       }   
+   }
+
+   
+   if ( cur==0 ) { /* no new words */ 
+       PG_FREE_IF_COPY(txt,1);
+       PG_RETURN_POINTER(stat);
+   }
+
+   newstat = formstat(stat, txt, newentry, cur);
+   pfree(newentry);
+   PG_FREE_IF_COPY(txt,1);
+   /* pfree(stat); */
+
+   PG_RETURN_POINTER(newstat);
+}
+
+typedef struct {
+   uint32  cur;
+   tsvector *stat;
+} StatStorage;
+
+static void
+ts_setup_firstcall(FuncCallContext  *funcctx, tsstat *stat) {
+   TupleDesc            tupdesc;
+   MemoryContext     oldcontext;
+   StatStorage     *st;
+   
+   oldcontext = MemoryContextSwitchTo(funcctx->multi_call_memory_ctx);
+   st=palloc( sizeof(StatStorage) );
+   st->cur=0;
+   st->stat=palloc( stat->len );
+   memcpy(st->stat, stat, stat->len);
+   funcctx->user_fctx = (void*)st;
+   tupdesc = RelationNameGetTupleDesc("statinfo");
+   funcctx->slot = TupleDescGetSlot(tupdesc);
+   funcctx->attinmeta = TupleDescGetAttInMetadata(tupdesc);
+   MemoryContextSwitchTo(oldcontext);
+}
+
+
+static Datum
+ts_process_call(FuncCallContext  *funcctx) {
+   StatStorage     *st;
+   st=(StatStorage*)funcctx->user_fctx;
+
+   if ( st->cur < st->stat->size ) {
+       Datum result;
+       char* values[3];
+       char    ndoc[16];
+       char    nentry[16];
+       StatEntry *entry=STATPTR(st->stat) + st->cur;
+       HeapTuple    tuple;
+
+       values[1]=ndoc;
+       sprintf(ndoc,"%d",entry->ndoc);
+       values[2]=nentry;
+       sprintf(nentry,"%d",entry->nentry);
+       values[0]=palloc( entry->len+1 );
+       memcpy( values[0], STATSTRPTR(st->stat)+entry->pos, entry->len);
+       (values[0])[entry->len]='\0';
+
+       tuple = BuildTupleFromCStrings(funcctx->attinmeta, values);
+       result = TupleGetDatum(funcctx->slot, tuple);
+
+       pfree(values[0]);
+       st->cur++;
+       return result;  
+   } else {
+       pfree(st->stat);
+       pfree(st);
+   }
+   
+   return (Datum)0;
+}
+
+PG_FUNCTION_INFO_V1(ts_accum_finish);
+Datum           ts_accum_finish(PG_FUNCTION_ARGS);
+Datum 
+ts_accum_finish(PG_FUNCTION_ARGS) {
+   FuncCallContext  *funcctx;
+   Datum result;
+
+   if (SRF_IS_FIRSTCALL()) {
+       funcctx = SRF_FIRSTCALL_INIT();
+       ts_setup_firstcall(funcctx, (tsstat*)PG_GETARG_POINTER(0) );
+   }
+
+   funcctx = SRF_PERCALL_SETUP();
+   if (  (result=ts_process_call(funcctx)) != (Datum)0 )
+       SRF_RETURN_NEXT(funcctx, result);
+   SRF_RETURN_DONE(funcctx);
+}
+
+static Oid tiOid=InvalidOid;
+static void 
+get_ti_Oid(void) {
+   int ret;
+   bool isnull; 
+
+   if ( (ret = SPI_exec("select oid from pg_type where typname='tsvector'",1)) < 0 )   
+       elog(ERROR, "SPI_exec to get tsvector oid returns %d", ret);
+
+   if ( SPI_processed<0 )
+       elog(ERROR, "There is no tsvector type");
+   tiOid = DatumGetObjectId( SPI_getbinval(SPI_tuptable->vals[0], SPI_tuptable->tupdesc, 1, &isnull) );
+   if ( tiOid==InvalidOid )
+       elog(ERROR, "tsvector type has InvalidOid");
+}
+
+static tsstat*
+ts_stat_sql(text *txt) {
+   char *query=text2char(txt);
+   int i;
+   tsstat *newstat,*stat;
+   bool isnull;
+   Portal portal;
+   void    *plan;
+
+   if ( tiOid==InvalidOid ) 
+       get_ti_Oid();
+
+   if ( (plan = SPI_prepare(query,0,NULL))==NULL )
+       elog(ERROR, "SPI_prepare('%s') returns NULL",query);
+
+   if ( (portal = SPI_cursor_open(NULL, plan, NULL, NULL)) == NULL )
+       elog(ERROR, "SPI_cursor_open('%s') returns NULL",query);
+
+   SPI_cursor_fetch(portal, true, 100);
+
+   if ( SPI_tuptable->tupdesc->natts != 1 )
+       elog(ERROR, "Number of fields doesn't equal to 1");
+
+   if ( SPI_gettypeid(SPI_tuptable->tupdesc, 1) != tiOid )
+       elog(ERROR, "Column isn't of tsvector type");
+
+   stat=palloc(STATHDRSIZE);
+   stat->len=STATHDRSIZE;
+   stat->size=0;
+
+   while(SPI_processed>0) {
+       for(i=0;i
+           Datum data=SPI_getbinval(SPI_tuptable->vals[i], SPI_tuptable->tupdesc, 1, &isnull);
+
+           if ( !isnull ) {
+               newstat = (tsstat*)DatumGetPointer(DirectFunctionCall2(
+                   ts_accum,
+                   PointerGetDatum(stat),
+                   data
+               ));
+               if ( stat!=newstat && stat )
+                   pfree(stat);
+               stat=newstat;
+           }
+       } 
+
+       SPI_freetuptable(SPI_tuptable);
+       SPI_cursor_fetch(portal, true, 100);        
+   }   
+
+   SPI_freetuptable(SPI_tuptable);
+   SPI_cursor_close(portal);
+   SPI_freeplan(plan);
+   pfree(query);
+
+   return stat;    
+}
+
+PG_FUNCTION_INFO_V1(ts_stat);
+Datum           ts_stat(PG_FUNCTION_ARGS);
+Datum 
+ts_stat(PG_FUNCTION_ARGS) {
+   FuncCallContext  *funcctx;
+   Datum result;
+
+   if (SRF_IS_FIRSTCALL()) {
+       tsstat *stat;
+       text    *txt=PG_GETARG_TEXT_P(0);
+   
+       funcctx = SRF_FIRSTCALL_INIT();
+       SPI_connect();
+       stat = ts_stat_sql(txt);
+       PG_FREE_IF_COPY(txt,0); 
+       ts_setup_firstcall(funcctx, stat );
+       SPI_finish();
+   }
+
+   funcctx = SRF_PERCALL_SETUP();
+   if (  (result=ts_process_call(funcctx)) != (Datum)0 )
+       SRF_RETURN_NEXT(funcctx, result);
+   SRF_RETURN_DONE(funcctx);
+}
+
+


diff --git a/contrib/tsearch2/ts_stat.h b/contrib/tsearch2/ts_stat.h

new file mode 100644 (file)

index 0000000..c32b17a


--- /dev/null
+++ b/contrib/tsearch2/ts_stat.h
@@ -0,0 +1,32 @@
+#ifndef __TXTIDX_STAT_H__
+#define __TXTIDX_STAT_H__
+
+#include "postgres.h"
+
+#include "access/gist.h"
+#include "access/itup.h"
+#include "utils/elog.h"
+#include "utils/palloc.h"
+#include "utils/builtins.h"
+#include "storage/bufpage.h"
+
+typedef struct {
+   uint32  len;
+   uint32  pos;
+   uint32  ndoc;   
+   uint32  nentry; 
+}  StatEntry;
+
+typedef struct {
+   int4        len;
+   int4        size;
+   char        data[1];
+}  tsstat;
+
+#define STATHDRSIZE (sizeof(int4)*2)
+#define CALCSTATSIZE(x, lenstr) ( x * sizeof(StatEntry) + STATHDRSIZE + lenstr )
+#define STATPTR(x) ( (StatEntry*) ( (char*)x + STATHDRSIZE ) )
+#define STATSTRPTR(x)  ( (char*)x + STATHDRSIZE + ( sizeof(StatEntry) * ((tsvector*)x)->size ) )
+#define STATSTRSIZE(x) ( ((tsvector*)x)->len - STATHDRSIZE - ( sizeof(StatEntry) * ((tsvector*)x)->size ) )
+
+#endif


diff --git a/contrib/tsearch2/tsearch.sql._in b/contrib/tsearch2/tsearch.sql._in

new file mode 100644 (file)

index 0000000..91ffbc8


--- /dev/null
+++ b/contrib/tsearch2/tsearch.sql._in
@@ -0,0 +1,674 @@
+-- Adjust this setting to control where the objects get CREATEd.
+SET search_path = public;
+
+BEGIN;
+
+--dict conf
+CREATE TABLE pg_ts_dict (
+   dict_name   text not null primary key,
+   dict_init   oid,
+   dict_initoption text,
+   dict_lexize oid not null,
+   dict_comment    text
+) with oids;
+
+--dict interface
+CREATE FUNCTION lexize(oid, text) 
+   returns _text
+   as 'MODULE_PATHNAME'
+   language 'C'
+   with (isstrict);
+
+CREATE FUNCTION lexize(text, text)
+        returns _text
+        as 'MODULE_PATHNAME', 'lexize_byname'
+        language 'C'
+        with (isstrict);
+
+CREATE FUNCTION lexize(text)
+        returns _text
+        as 'MODULE_PATHNAME', 'lexize_bycurrent'
+        language 'C'
+        with (isstrict);
+
+CREATE FUNCTION set_curdict(int)
+   returns void
+   as 'MODULE_PATHNAME'
+   language 'C'
+   with (isstrict);
+
+CREATE FUNCTION set_curdict(text)
+   returns void
+   as 'MODULE_PATHNAME', 'set_curdict_byname'
+   language 'C'
+   with (isstrict);
+
+--built-in dictionaries
+CREATE FUNCTION dex_init(text)
+   returns internal
+   as 'MODULE_PATHNAME' 
+   language 'C';
+
+CREATE FUNCTION dex_lexize(internal,internal,int4)
+   returns internal
+   as 'MODULE_PATHNAME'
+   language 'C'
+   with (isstrict);
+
+insert into pg_ts_dict select 
+   'simple', 
+   (select oid from pg_proc where proname='dex_init'),
+   null,
+   (select oid from pg_proc where proname='dex_lexize'),
+   'Simple example of dictionary.'
+;
+    
+CREATE FUNCTION snb_en_init(text)
+   returns internal
+   as 'MODULE_PATHNAME' 
+   language 'C';
+
+CREATE FUNCTION snb_lexize(internal,internal,int4)
+   returns internal
+   as 'MODULE_PATHNAME'
+   language 'C'
+   with (isstrict);
+
+insert into pg_ts_dict select 
+   'en_stem', 
+   (select oid from pg_proc where proname='snb_en_init'),
+   'DATA_PATH/english.stop',
+   (select oid from pg_proc where proname='snb_lexize'),
+   'English Stemmer. Snowball.'
+;
+
+CREATE FUNCTION snb_ru_init(text)
+   returns internal
+   as 'MODULE_PATHNAME' 
+   language 'C';
+
+insert into pg_ts_dict select 
+   'ru_stem', 
+   (select oid from pg_proc where proname='snb_ru_init'),
+   'DATA_PATH/russian.stop',
+   (select oid from pg_proc where proname='snb_lexize'),
+   'Russian Stemmer. Snowball.'
+;
+    
+CREATE FUNCTION spell_init(text)
+   returns internal
+   as 'MODULE_PATHNAME' 
+   language 'C';
+
+CREATE FUNCTION spell_lexize(internal,internal,int4)
+   returns internal
+   as 'MODULE_PATHNAME'
+   language 'C'
+   with (isstrict);
+
+insert into pg_ts_dict select 
+   'ispell_template', 
+   (select oid from pg_proc where proname='spell_init'),
+   null,
+   (select oid from pg_proc where proname='spell_lexize'),
+   'ISpell interface. Must have .dict and .aff files'
+;
+
+CREATE FUNCTION syn_init(text)
+   returns internal
+   as 'MODULE_PATHNAME' 
+   language 'C';
+
+CREATE FUNCTION syn_lexize(internal,internal,int4)
+   returns internal
+   as 'MODULE_PATHNAME'
+   language 'C'
+   with (isstrict);
+
+insert into pg_ts_dict select 
+   'synonym', 
+   (select oid from pg_proc where proname='syn_init'),
+   null,
+   (select oid from pg_proc where proname='syn_lexize'),
+   'Example of synonym dictionary'
+;
+
+--dict conf
+CREATE TABLE pg_ts_parser (
+   prs_name    text not null primary key,
+   prs_start   oid not null,
+   prs_nexttoken   oid not null,
+   prs_end     oid not null,
+   prs_headline    oid not null,
+   prs_lextype oid not null,
+   prs_comment text
+) with oids;
+
+--sql-level interface
+CREATE TYPE tokentype 
+   as (tokid int4, alias text, descr text); 
+
+CREATE FUNCTION token_type(int4)
+   returns setof tokentype
+   as 'MODULE_PATHNAME'
+   language 'C'
+   with (isstrict);
+
+CREATE FUNCTION token_type(text)
+   returns setof tokentype
+   as 'MODULE_PATHNAME', 'token_type_byname'
+   language 'C'
+   with (isstrict);
+
+CREATE FUNCTION token_type()
+   returns setof tokentype
+   as 'MODULE_PATHNAME', 'token_type_current'
+   language 'C'
+   with (isstrict);
+
+CREATE FUNCTION set_curprs(int)
+   returns void
+   as 'MODULE_PATHNAME'
+   language 'C'
+   with (isstrict);
+
+CREATE FUNCTION set_curprs(text)
+   returns void
+   as 'MODULE_PATHNAME', 'set_curprs_byname'
+   language 'C'
+   with (isstrict);
+
+CREATE TYPE tokenout 
+   as (tokid int4, token text);
+
+CREATE FUNCTION parse(oid,text)
+   returns setof tokenout
+   as 'MODULE_PATHNAME'
+   language 'C'
+   with (isstrict);
+ 
+CREATE FUNCTION parse(text,text)
+   returns setof tokenout
+   as 'MODULE_PATHNAME', 'parse_byname'
+   language 'C'
+   with (isstrict);
+ 
+CREATE FUNCTION parse(text)
+   returns setof tokenout
+   as 'MODULE_PATHNAME', 'parse_current'
+   language 'C'
+   with (isstrict);
+ 
+--default parser
+CREATE FUNCTION prsd_start(internal,int4)
+   returns internal
+   as 'MODULE_PATHNAME'
+   language 'C';
+
+CREATE FUNCTION prsd_getlexeme(internal,internal,internal)
+   returns int4
+   as 'MODULE_PATHNAME'
+   language 'C';
+
+CREATE FUNCTION prsd_end(internal)
+   returns void
+   as 'MODULE_PATHNAME'
+   language 'C';
+
+CREATE FUNCTION prsd_lextype(internal)
+   returns internal
+   as 'MODULE_PATHNAME'
+   language 'C';
+
+CREATE FUNCTION prsd_headline(internal,internal,internal)
+   returns internal
+   as 'MODULE_PATHNAME'
+   language 'C';
+
+insert into pg_ts_parser select
+   'default',
+   (select oid from pg_proc where proname='prsd_start'),   
+   (select oid from pg_proc where proname='prsd_getlexeme'),   
+   (select oid from pg_proc where proname='prsd_end'), 
+   (select oid from pg_proc where proname='prsd_headline'),
+   (select oid from pg_proc where proname='prsd_lextype'),
+   'Parser from OpenFTS v0.34'
+;  
+
+--tsearch config
+
+CREATE TABLE pg_ts_cfg (
+   ts_name     text not null primary key,
+   prs_name    text not null,
+   locale      text
+) with oids;
+
+CREATE TABLE pg_ts_cfgmap (
+   ts_name     text not null,
+   tok_alias   text not null,
+   dict_name   text[],
+   primary key (ts_name,tok_alias)
+) with oids;
+
+CREATE FUNCTION set_curcfg(int)
+   returns void
+   as 'MODULE_PATHNAME'
+   language 'C'
+   with (isstrict);
+
+CREATE FUNCTION set_curcfg(text)
+   returns void
+   as 'MODULE_PATHNAME', 'set_curcfg_byname'
+   language 'C'
+   with (isstrict);
+
+CREATE FUNCTION show_curcfg()
+   returns oid
+   as 'MODULE_PATHNAME'
+   language 'C'
+   with (isstrict);
+
+insert into pg_ts_cfg values ('default', 'default','C');
+insert into pg_ts_cfg values ('default_russian', 'default','ru_RU.KOI8-R');
+insert into pg_ts_cfg values ('simple', 'default');
+
+copy pg_ts_cfgmap from stdin;
+default    lword   {en_stem}
+default    nlword  {simple}
+default    word    {simple}
+default    email   {simple}
+default    url {simple}
+default    host    {simple}
+default    sfloat  {simple}
+default    version {simple}
+default    part_hword  {simple}
+default    nlpart_hword    {simple}
+default    lpart_hword {en_stem}
+default    hword   {simple}
+default    lhword  {en_stem}
+default    nlhword {simple}
+default    uri {simple}
+default    file    {simple}
+default    float   {simple}
+default    int {simple}
+default    uint    {simple}
+default_russian    lword   {en_stem}
+default_russian    nlword  {ru_stem}
+default_russian    word    {ru_stem}
+default_russian    email   {simple}
+default_russian    url {simple}
+default_russian    host    {simple}
+default_russian    sfloat  {simple}
+default_russian    version {simple}
+default_russian    part_hword  {simple}
+default_russian    nlpart_hword    {ru_stem}
+default_russian    lpart_hword {en_stem}
+default_russian    hword   {ru_stem}
+default_russian    lhword  {en_stem}
+default_russian    nlhword {ru_stem}
+default_russian    uri {simple}
+default_russian    file    {simple}
+default_russian    float   {simple}
+default_russian    int {simple}
+default_russian    uint    {simple}
+simple lword   {simple}
+simple nlword  {simple}
+simple word    {simple}
+simple email   {simple}
+simple url {simple}
+simple host    {simple}
+simple sfloat  {simple}
+simple version {simple}
+simple part_hword  {simple}
+simple nlpart_hword    {simple}
+simple lpart_hword {simple}
+simple hword   {simple}
+simple lhword  {simple}
+simple nlhword {simple}
+simple uri {simple}
+simple file    {simple}
+simple float   {simple}
+simple int {simple}
+simple uint    {simple}
+\.
+
+--tsvector type
+CREATE FUNCTION tsvector_in(cstring)
+RETURNS tsvector
+AS 'MODULE_PATHNAME'
+LANGUAGE 'C' with (isstrict);
+
+CREATE FUNCTION tsvector_out(tsvector)
+RETURNS cstring
+AS 'MODULE_PATHNAME'
+LANGUAGE 'C' with (isstrict);
+
+CREATE TYPE tsvector (
+        INTERNALLENGTH = -1,
+        INPUT = tsvector_in,
+        OUTPUT = tsvector_out,
+        STORAGE = extended
+);
+
+CREATE FUNCTION length(tsvector)
+RETURNS int4
+AS 'MODULE_PATHNAME', 'tsvector_length'
+LANGUAGE 'C' with (isstrict,iscachable);
+
+CREATE FUNCTION to_tsvector(oid, text)
+RETURNS tsvector
+AS 'MODULE_PATHNAME'
+LANGUAGE 'C' with (isstrict,iscachable);
+
+CREATE FUNCTION to_tsvector(text, text)
+RETURNS tsvector
+AS 'MODULE_PATHNAME', 'to_tsvector_name'
+LANGUAGE 'C' with (isstrict,iscachable);
+
+CREATE FUNCTION to_tsvector(text)
+RETURNS tsvector
+AS 'MODULE_PATHNAME', 'to_tsvector_current'
+LANGUAGE 'C' with (isstrict,iscachable);
+
+CREATE FUNCTION strip(tsvector)
+RETURNS tsvector
+AS 'MODULE_PATHNAME'
+LANGUAGE 'C' with (isstrict,iscachable);
+
+CREATE FUNCTION setweight(tsvector,"char")
+RETURNS tsvector
+AS 'MODULE_PATHNAME'
+LANGUAGE 'C' with (isstrict,iscachable);
+
+CREATE FUNCTION concat(tsvector,tsvector)
+RETURNS tsvector
+AS 'MODULE_PATHNAME'
+LANGUAGE 'C' with (isstrict,iscachable);
+
+CREATE OPERATOR || (
+        LEFTARG = tsvector,
+        RIGHTARG = tsvector,
+        PROCEDURE = concat
+);
+
+--query type
+CREATE FUNCTION tsquery_in(cstring)
+RETURNS tsquery
+AS 'MODULE_PATHNAME'
+LANGUAGE 'C' with (isstrict);
+
+CREATE FUNCTION tsquery_out(tsquery)
+RETURNS cstring
+AS 'MODULE_PATHNAME'
+LANGUAGE 'C' with (isstrict);
+
+CREATE TYPE tsquery (
+        INTERNALLENGTH = -1,
+        INPUT = tsquery_in,
+        OUTPUT = tsquery_out
+);
+
+CREATE FUNCTION querytree(tsquery)
+RETURNS text
+AS 'MODULE_PATHNAME', 'tsquerytree'
+LANGUAGE 'C' with (isstrict);
+
+CREATE FUNCTION to_tsquery(oid, text)
+RETURNS tsquery
+AS 'MODULE_PATHNAME'
+LANGUAGE 'c' with (isstrict,iscachable);
+
+CREATE FUNCTION to_tsquery(text, text)
+RETURNS tsquery
+AS 'MODULE_PATHNAME','to_tsquery_name'
+LANGUAGE 'c' with (isstrict,iscachable);
+
+CREATE FUNCTION to_tsquery(text)
+RETURNS tsquery
+AS 'MODULE_PATHNAME','to_tsquery_current'
+LANGUAGE 'c' with (isstrict,iscachable);
+
+--operations
+CREATE FUNCTION exectsq(tsvector, tsquery)
+RETURNS bool
+AS 'MODULE_PATHNAME'
+LANGUAGE 'C' with (isstrict, iscachable);
+  
+COMMENT ON FUNCTION exectsq(tsvector, tsquery) IS 'boolean operation with text index';
+
+CREATE FUNCTION rexectsq(tsquery, tsvector)
+RETURNS bool
+AS 'MODULE_PATHNAME'
+LANGUAGE 'C' with (isstrict, iscachable);
+
+COMMENT ON FUNCTION rexectsq(tsquery, tsvector) IS 'boolean operation with text index';
+
+CREATE OPERATOR @@ (
+        LEFTARG = tsvector,
+        RIGHTARG = tsquery,
+        PROCEDURE = exectsq,
+        COMMUTATOR = '@@',
+        RESTRICT = contsel,
+        JOIN = contjoinsel
+);
+CREATE OPERATOR @@ (
+        LEFTARG = tsquery,
+        RIGHTARG = tsvector,
+        PROCEDURE = rexectsq,
+        COMMUTATOR = '@@',
+        RESTRICT = contsel,
+        JOIN = contjoinsel
+);
+
+--Trigger
+CREATE FUNCTION tsearch2()
+RETURNS trigger
+AS 'MODULE_PATHNAME'
+LANGUAGE 'C';
+
+--Relevation
+CREATE FUNCTION rank(float4[], tsvector, tsquery)
+RETURNS float4
+AS 'MODULE_PATHNAME'
+LANGUAGE 'C' WITH (isstrict, iscachable);
+
+CREATE FUNCTION rank(float4[], tsvector, tsquery, int4)
+RETURNS float4
+AS 'MODULE_PATHNAME'
+LANGUAGE 'C' WITH (isstrict, iscachable);
+
+CREATE FUNCTION rank(tsvector, tsquery)
+RETURNS float4
+AS 'MODULE_PATHNAME', 'rank_def'
+LANGUAGE 'C' WITH (isstrict, iscachable);
+
+CREATE FUNCTION rank(tsvector, tsquery, int4)
+RETURNS float4
+AS 'MODULE_PATHNAME', 'rank_def'
+LANGUAGE 'C' WITH (isstrict, iscachable);
+
+CREATE FUNCTION rank_cd(int4, tsvector, tsquery)
+RETURNS float4
+AS 'MODULE_PATHNAME'
+LANGUAGE 'C' WITH (isstrict, iscachable);
+
+CREATE FUNCTION rank_cd(int4, tsvector, tsquery, int4)
+RETURNS float4
+AS 'MODULE_PATHNAME'
+LANGUAGE 'C' WITH (isstrict, iscachable);
+
+CREATE FUNCTION rank_cd(tsvector, tsquery)
+RETURNS float4
+AS 'MODULE_PATHNAME', 'rank_cd_def'
+LANGUAGE 'C' WITH (isstrict, iscachable);
+
+CREATE FUNCTION rank_cd(tsvector, tsquery, int4)
+RETURNS float4
+AS 'MODULE_PATHNAME', 'rank_cd_def'
+LANGUAGE 'C' WITH (isstrict, iscachable);
+
+CREATE FUNCTION headline(oid, text, tsquery, text)
+RETURNS text
+AS 'MODULE_PATHNAME', 'headline'
+LANGUAGE 'C' WITH (isstrict, iscachable);
+
+CREATE FUNCTION headline(oid, text, tsquery)
+RETURNS text
+AS 'MODULE_PATHNAME', 'headline'
+LANGUAGE 'C' WITH (isstrict, iscachable);
+
+CREATE FUNCTION headline(text, text, tsquery, text)
+RETURNS text
+AS 'MODULE_PATHNAME', 'headline_byname'
+LANGUAGE 'C' WITH (isstrict, iscachable);
+
+CREATE FUNCTION headline(text, text, tsquery)
+RETURNS text
+AS 'MODULE_PATHNAME', 'headline_byname'
+LANGUAGE 'C' WITH (isstrict, iscachable);
+
+CREATE FUNCTION headline(text, tsquery, text)
+RETURNS text
+AS 'MODULE_PATHNAME', 'headline_current'
+LANGUAGE 'C' WITH (isstrict, iscachable);
+
+CREATE FUNCTION headline(text, tsquery)
+RETURNS text
+AS 'MODULE_PATHNAME', 'headline_current'
+LANGUAGE 'C' WITH (isstrict, iscachable);
+
+--GiST
+--GiST key type 
+CREATE FUNCTION gtsvector_in(cstring)
+RETURNS gtsvector
+AS 'MODULE_PATHNAME'
+LANGUAGE 'C' with (isstrict);
+
+CREATE FUNCTION gtsvector_out(gtsvector)
+RETURNS cstring
+AS 'MODULE_PATHNAME'
+LANGUAGE 'C' with (isstrict);
+
+CREATE TYPE gtsvector (
+        INTERNALLENGTH = -1,
+        INPUT = gtsvector_in,
+        OUTPUT = gtsvector_out
+);
+
+-- support FUNCTIONs
+CREATE FUNCTION gtsvector_consistent(gtsvector,internal,int4)
+RETURNS bool
+AS 'MODULE_PATHNAME'
+LANGUAGE 'C';
+  
+CREATE FUNCTION gtsvector_compress(internal)
+RETURNS internal
+AS 'MODULE_PATHNAME'
+LANGUAGE 'C';
+
+CREATE FUNCTION gtsvector_decompress(internal)
+RETURNS internal
+AS 'MODULE_PATHNAME'
+LANGUAGE 'C';
+
+CREATE FUNCTION gtsvector_penalty(internal,internal,internal)
+RETURNS internal
+AS 'MODULE_PATHNAME'
+LANGUAGE 'C' with (isstrict);
+
+CREATE FUNCTION gtsvector_picksplit(internal, internal)
+RETURNS internal
+AS 'MODULE_PATHNAME'
+LANGUAGE 'C';
+
+CREATE FUNCTION gtsvector_union(bytea, internal)
+RETURNS _int4
+AS 'MODULE_PATHNAME'
+LANGUAGE 'C';
+
+CREATE FUNCTION gtsvector_same(gtsvector, gtsvector, internal)
+RETURNS internal
+AS 'MODULE_PATHNAME'
+LANGUAGE 'C';
+
+-- CREATE the OPERATOR class
+CREATE OPERATOR CLASS gist_tsvector_ops
+DEFAULT FOR TYPE tsvector USING gist
+AS
+        OPERATOR        1       @@ (tsvector, tsquery)  RECHECK ,
+        FUNCTION        1       gtsvector_consistent (gtsvector, internal, int4),
+        FUNCTION        2       gtsvector_union (bytea, internal),
+        FUNCTION        3       gtsvector_compress (internal),
+        FUNCTION        4       gtsvector_decompress (internal),
+        FUNCTION        5       gtsvector_penalty (internal, internal, internal),
+        FUNCTION        6       gtsvector_picksplit (internal, internal),
+        FUNCTION        7       gtsvector_same (gtsvector, gtsvector, internal),
+        STORAGE         gtsvector;
+
+
+--stat info
+CREATE TYPE statinfo 
+   as (word text, ndoc int4, nentry int4);
+
+--REATE FUNCTION tsstat_in(cstring)
+--RETURNS tsstat
+--AS 'MODULE_PATHNAME'
+--LANGUAGE 'C' with (isstrict);
+--
+--CREATE FUNCTION tsstat_out(tsstat)
+--RETURNS cstring
+--AS 'MODULE_PATHNAME'
+--LANGUAGE 'C' with (isstrict);
+--
+--CREATE TYPE tsstat (
+--        INTERNALLENGTH = -1,
+--        INPUT = tsstat_in,
+--        OUTPUT = tsstat_out,
+--        STORAGE = plain
+--);
+--
+--CREATE FUNCTION ts_accum(tsstat,tsvector)
+--RETURNS tsstat
+--AS 'MODULE_PATHNAME'
+--LANGUAGE 'C' with (isstrict);
+--
+--CREATE FUNCTION ts_accum_finish(tsstat)
+-- returns setof statinfo
+-- as 'MODULE_PATHNAME'
+-- language 'C'
+-- with (isstrict);
+--
+--CREATE AGGREGATE stat (
+-- BASETYPE=tsvector,
+-- SFUNC=ts_accum,
+-- STYPE=tsstat,
+-- FINALFUNC = ts_accum_finish,
+-- initcond = ''
+--); 
+
+CREATE FUNCTION stat(text)
+   returns setof statinfo
+   as 'MODULE_PATHNAME', 'ts_stat'
+   language 'C'
+   with (isstrict);
+
+--reset - just for debuging
+CREATE FUNCTION reset_tsearch()
+        returns void
+        as 'MODULE_PATHNAME'
+        language 'C'
+        with (isstrict);
+
+--get cover (debug for rank_cd)
+CREATE FUNCTION get_covers(tsvector,tsquery)
+        returns text
+        as 'MODULE_PATHNAME'
+        language 'C'
+        with (isstrict);
+
+
+--example of ISpell dictionary
+--update pg_ts_dict set dict_initoption='DictFile="/usr/local/share/ispell/russian.dict" ,AffFile ="/usr/local/share/ispell/russian.aff", StopFile="/usr/local/share/ispell/russian.stop"' where dict_id=4;
+--example of synonym dict
+--update pg_ts_dict set dict_initoption='/usr/local/share/ispell/english.syn' where dict_id=5;
+END;


diff --git a/contrib/tsearch2/tsvector.c b/contrib/tsearch2/tsvector.c

new file mode 100644 (file)

index 0000000..ff0794d


--- /dev/null
+++ b/contrib/tsearch2/tsvector.c
@@ -0,0 +1,804 @@
+/*
+ * In/Out definitions for tsvector type
+ * Internal structure:
+ * string of values, array of position lexem in string and it's length
+ * Teodor Sigaev 
+ */
+#include "postgres.h"
+
+#include "access/gist.h"
+#include "access/itup.h"
+#include "utils/elog.h"
+#include "utils/palloc.h"
+#include "utils/builtins.h"
+#include "storage/bufpage.h"
+#include "executor/spi.h"
+#include "commands/trigger.h"
+#include "nodes/pg_list.h"
+#include "catalog/namespace.h"
+
+#include "utils/pg_locale.h"
+
+#include              /* tolower */
+#include "tsvector.h"
+#include "query.h"
+#include "ts_cfg.h"
+#include "common.h"
+
+PG_FUNCTION_INFO_V1(tsvector_in);
+Datum      tsvector_in(PG_FUNCTION_ARGS);
+
+PG_FUNCTION_INFO_V1(tsvector_out);
+Datum      tsvector_out(PG_FUNCTION_ARGS);
+
+PG_FUNCTION_INFO_V1(to_tsvector);
+Datum      to_tsvector(PG_FUNCTION_ARGS);
+PG_FUNCTION_INFO_V1(to_tsvector_current);
+Datum      to_tsvector_current(PG_FUNCTION_ARGS);
+PG_FUNCTION_INFO_V1(to_tsvector_name);
+Datum      to_tsvector_name(PG_FUNCTION_ARGS);
+
+PG_FUNCTION_INFO_V1(tsearch2);
+Datum      tsearch2(PG_FUNCTION_ARGS);
+
+PG_FUNCTION_INFO_V1(tsvector_length);
+Datum      tsvector_length(PG_FUNCTION_ARGS);
+
+/*
+ * in/out text index type
+ */
+static int 
+comparePos(const void *a, const void *b) {
+   if ( ((WordEntryPos *) a)->pos == ((WordEntryPos *) b)->pos )
+       return 1;
+   return ( ((WordEntryPos *) a)->pos > ((WordEntryPos *) b)->pos ) ? 1 : -1;
+}
+
+static int
+uniquePos(WordEntryPos *a, int4 l) {
+   WordEntryPos *ptr, *res;
+
+   res=a;
+   if (l==1)
+       return l;
+
+   qsort((void *) a, l, sizeof(WordEntryPos), comparePos);
+
+   ptr = a + 1;
+   while (ptr - a < l) {
+       if ( ptr->pos != res->pos ) {
+           res++;
+           res->pos = ptr->pos;
+           res->weight = ptr->weight;
+           if ( res-a >= MAXNUMPOS-1 || res->pos == MAXENTRYPOS-1 )
+               break;
+       } else if ( ptr->weight > res->weight )
+           res->weight = ptr->weight;
+       ptr++;
+   }
+   return res + 1 - a;
+}
+
+static char *BufferStr;
+static int
+compareentry(const void *a, const void *b)
+{
+   if ( ((WordEntryIN *) a)->entry.len == ((WordEntryIN *) b)->entry.len)
+   {
+       return strncmp(
+                      &BufferStr[((WordEntryIN *) a)->entry.pos],
+                      &BufferStr[((WordEntryIN *) b)->entry.pos],
+                      ((WordEntryIN *) a)->entry.len);
+   }
+   return ( ((WordEntryIN *) a)->entry.len > ((WordEntryIN *) b)->entry.len ) ? 1 : -1;
+}
+
+static int
+uniqueentry(WordEntryIN * a, int4 l, char *buf, int4 *outbuflen)
+{
+   WordEntryIN  *ptr,
+              *res;
+
+   res = a;
+   if (l == 1) {
+       if ( a->entry.haspos ) {
+           *(uint16*)(a->pos) = uniquePos( &(a->pos[1]), *(uint16*)(a->pos));
+           *outbuflen = SHORTALIGN(res->entry.len) + (*(uint16*)(a->pos) +1 )*sizeof(WordEntryPos);
+       }
+       return l;
+   }
+
+   ptr = a + 1;
+   BufferStr = buf;
+   qsort((void *) a, l, sizeof(WordEntryIN), compareentry);
+
+   while (ptr - a < l)
+   {
+       if (!(ptr->entry.len == res->entry.len &&
+             strncmp(&buf[ptr->entry.pos], &buf[res->entry.pos], res->entry.len) == 0))
+       {
+           if ( res->entry.haspos ) {
+               *(uint16*)(res->pos) = uniquePos( &(res->pos[1]), *(uint16*)(res->pos));
+               *outbuflen += *(uint16*)(res->pos) * sizeof(WordEntryPos);
+           }
+           *outbuflen += SHORTALIGN(res->entry.len);
+           res++;
+           memcpy(res,ptr,sizeof(WordEntryIN));
+       } else if ( ptr->entry.haspos ){
+           if ( res->entry.haspos ) {
+               int4 len=*(uint16*)(ptr->pos) + 1 + *(uint16*)(res->pos);
+               res->pos=(WordEntryPos*)repalloc( res->pos, len*sizeof(WordEntryPos));
+               memcpy( &(res->pos[ *(uint16*)(res->pos) + 1 ]), 
+                   &(ptr->pos[1]), *(uint16*)(ptr->pos) * sizeof(WordEntryPos));
+               *(uint16*)(res->pos) += *(uint16*)(ptr->pos);
+               pfree( ptr->pos );
+           } else {
+               res->entry.haspos=1;
+               res->pos = ptr->pos;
+           }
+       }
+       ptr++;
+   }
+   if ( res->entry.haspos ) {
+       *(uint16*)(res->pos) = uniquePos( &(res->pos[1]), *(uint16*)(res->pos));
+       *outbuflen += *(uint16*)(res->pos) * sizeof(WordEntryPos);
+   }
+   *outbuflen += SHORTALIGN(res->entry.len);
+
+   return res + 1 - a;
+}
+
+#define WAITWORD   1
+#define WAITENDWORD 2
+#define WAITNEXTCHAR   3
+#define WAITENDCMPLX   4
+#define WAITPOSINFO    5
+#define INPOSINFO  6
+#define WAITPOSDELIM   7
+
+#define RESIZEPRSBUF \
+do { \
+   if ( state->curpos - state->word + 1 >= state->len ) \
+   { \
+       int4 clen = state->curpos - state->word; \
+       state->len *= 2; \
+       state->word = (char*)repalloc( (void*)state->word, state->len ); \
+       state->curpos = state->word + clen; \
+   } \
+} while (0)
+
+int4
+gettoken_tsvector(TI_IN_STATE * state)
+{
+   int4        oldstate = 0;
+
+   state->curpos = state->word;
+   state->state = WAITWORD;
+   state->alen=0;
+
+   while (1)
+   {
+       if (state->state == WAITWORD)
+       {
+           if (*(state->prsbuf) == '\0')
+               return 0;
+           else if (*(state->prsbuf) == '\'')
+               state->state = WAITENDCMPLX;
+           else if (*(state->prsbuf) == '\\')
+           {
+               state->state = WAITNEXTCHAR;
+               oldstate = WAITENDWORD;
+           }
+           else if (state->oprisdelim && ISOPERATOR(*(state->prsbuf)))
+               elog(ERROR, "Syntax error");
+           else if (*(state->prsbuf) != ' ')
+           {
+               *(state->curpos) = *(state->prsbuf);
+               state->curpos++;
+               state->state = WAITENDWORD;
+           }
+       }
+       else if (state->state == WAITNEXTCHAR)
+       {
+           if (*(state->prsbuf) == '\0')
+               elog(ERROR, "There is no escaped character");
+           else
+           {
+               RESIZEPRSBUF;
+               *(state->curpos) = *(state->prsbuf);
+               state->curpos++;
+               state->state = oldstate;
+           }
+       }
+       else if (state->state == WAITENDWORD)
+       {
+           if (*(state->prsbuf) == '\\')
+           {
+               state->state = WAITNEXTCHAR;
+               oldstate = WAITENDWORD;
+           }
+           else if (*(state->prsbuf) == ' ' || *(state->prsbuf) == '\0' ||
+                    (state->oprisdelim && ISOPERATOR(*(state->prsbuf))))
+           {
+               RESIZEPRSBUF;
+               if (state->curpos == state->word)
+                   elog(ERROR, "Syntax error");
+               *(state->curpos) = '\0';
+               return 1; 
+           } else if ( *(state->prsbuf) == ':' ) {
+               if (state->curpos == state->word)
+                   elog(ERROR, "Syntax error");
+               *(state->curpos) = '\0';
+               if ( state->oprisdelim )
+                   return 1;
+               else
+                   state->state = INPOSINFO;
+           }
+           else
+           {
+               RESIZEPRSBUF;
+               *(state->curpos) = *(state->prsbuf);
+               state->curpos++;
+           }
+       }
+       else if (state->state == WAITENDCMPLX)
+       {
+           if (*(state->prsbuf) == '\'')
+           {
+               RESIZEPRSBUF;
+               *(state->curpos) = '\0';
+               if (state->curpos == state->word)
+                   elog(ERROR, "Syntax error");
+               if ( state->oprisdelim ) {
+                   state->prsbuf++;
+                   return 1;
+               } else
+                   state->state = WAITPOSINFO;
+           }
+           else if (*(state->prsbuf) == '\\')
+           {
+               state->state = WAITNEXTCHAR;
+               oldstate = WAITENDCMPLX;
+           }
+           else if (*(state->prsbuf) == '\0')
+               elog(ERROR, "Syntax error");
+           else
+           {
+               RESIZEPRSBUF;
+               *(state->curpos) = *(state->prsbuf);
+               state->curpos++;
+           }
+       } else if (state->state == WAITPOSINFO) {
+           if ( *(state->prsbuf) == ':' )
+               state->state=INPOSINFO;
+           else
+               return 1;
+       } else if (state->state == INPOSINFO) {
+           if ( isdigit(*(state->prsbuf)) ) {
+               if ( state->alen==0 ) {
+                   state->alen=4;
+                   state->pos = (WordEntryPos*)palloc( sizeof(WordEntryPos)*state->alen );
+                   *(uint16*)(state->pos)=0;
+               } else if ( *(uint16*)(state->pos) +1 >= state->alen ) {
+                   state->alen *= 2; 
+                   state->pos = (WordEntryPos*)repalloc( state->pos, sizeof(WordEntryPos)*state->alen );
+               }
+               (  *(uint16*)(state->pos) )++;
+               state->pos[ *(uint16*)(state->pos) ].pos = LIMITPOS(atoi(state->prsbuf));
+               if ( state->pos[ *(uint16*)(state->pos) ].pos == 0 )
+                   elog(ERROR,"Wrong position info");
+               state->pos[ *(uint16*)(state->pos) ].weight = 0;
+               state->state = WAITPOSDELIM;
+           } else
+               elog(ERROR,"Syntax error");
+       } else if (state->state == WAITPOSDELIM) {
+           if ( *(state->prsbuf) == ',' ) {
+               state->state = INPOSINFO;
+           } else if ( tolower(*(state->prsbuf)) == 'a' || *(state->prsbuf)=='*' ) {
+               if ( state->pos[ *(uint16*)(state->pos) ].weight )
+                   elog(ERROR,"Syntax error");
+               state->pos[ *(uint16*)(state->pos) ].weight = 3;
+           } else if ( tolower(*(state->prsbuf)) == 'b' ) {
+               if ( state->pos[ *(uint16*)(state->pos) ].weight )
+                   elog(ERROR,"Syntax error");
+               state->pos[ *(uint16*)(state->pos) ].weight = 2;
+           } else if ( tolower(*(state->prsbuf)) == 'c' ) {
+               if ( state->pos[ *(uint16*)(state->pos) ].weight )
+                   elog(ERROR,"Syntax error");
+               state->pos[ *(uint16*)(state->pos) ].weight = 1;
+           } else if ( tolower(*(state->prsbuf)) == 'd' ) {
+               if ( state->pos[ *(uint16*)(state->pos) ].weight )
+                   elog(ERROR,"Syntax error");
+               state->pos[ *(uint16*)(state->pos) ].weight = 0;
+           } else if ( isspace(*(state->prsbuf)) || *(state->prsbuf) == '\0' ) {
+               return 1;
+           } else if ( !isdigit(*(state->prsbuf)) )
+               elog(ERROR,"Syntax error");
+       } else
+           elog(ERROR, "Inner bug :(");
+       state->prsbuf++;
+   }
+
+   return 0;
+}
+
+Datum
+tsvector_in(PG_FUNCTION_ARGS)
+{
+   char       *buf = PG_GETARG_CSTRING(0);
+   TI_IN_STATE state;
+   WordEntryIN  *arr;
+   WordEntry  *inarr;
+   int4        len = 0,
+               totallen = 64;
+   tsvector       *in;
+   char       *tmpbuf,
+              *cur;
+   int4        i,
+               buflen = 256;
+
+   state.prsbuf = buf;
+   state.len = 32;
+   state.word = (char *) palloc(state.len);
+   state.oprisdelim = false;
+
+   arr = (WordEntryIN *) palloc(sizeof(WordEntryIN) * totallen);
+   cur = tmpbuf = (char *) palloc(buflen);
+   while (gettoken_tsvector(&state))
+   {
+       if (len >= totallen)
+       {
+           totallen *= 2;
+           arr = (WordEntryIN *) repalloc((void *) arr, sizeof(WordEntryIN) * totallen);
+       }
+       while ((cur - tmpbuf) + (state.curpos - state.word) >= buflen)
+       {
+           int4        dist = cur - tmpbuf;
+
+           buflen *= 2;
+           tmpbuf = (char *) repalloc((void *) tmpbuf, buflen);
+           cur = tmpbuf + dist;
+       }
+       if (state.curpos - state.word >= MAXSTRLEN)
+           elog(ERROR, "Word is too long");
+       arr[len].entry.len= state.curpos - state.word;
+       if (cur - tmpbuf > MAXSTRPOS)
+           elog(ERROR, "Too long value");
+       arr[len].entry.pos=cur - tmpbuf;
+       memcpy((void *) cur, (void *) state.word, arr[len].entry.len);
+       cur += arr[len].entry.len;
+       if ( state.alen ) {
+           arr[len].entry.haspos=1;
+           arr[len].pos = state.pos;
+       } else
+           arr[len].entry.haspos=0;
+       len++;
+   }
+   pfree(state.word);
+
+   if ( len > 0 )
+       len = uniqueentry(arr, len, tmpbuf, &buflen);
+   totallen = CALCDATASIZE(len, buflen);
+   in = (tsvector *) palloc(totallen);
+   memset(in,0,totallen);
+   in->len = totallen;
+   in->size = len;
+   cur = STRPTR(in);
+   inarr = ARRPTR(in);
+   for (i = 0; i < len; i++)
+   {
+       memcpy((void *) cur, (void *) &tmpbuf[arr[i].entry.pos], arr[i].entry.len);
+       arr[i].entry.pos=cur - STRPTR(in);
+       cur += SHORTALIGN(arr[i].entry.len);
+       if ( arr[i].entry.haspos ) {
+           memcpy( cur, arr[i].pos, (*(uint16*)arr[i].pos + 1) * sizeof(WordEntryPos));
+           cur +=  (*(uint16*)arr[i].pos + 1) * sizeof(WordEntryPos);
+           pfree( arr[i].pos ); 
+       }
+       memcpy( &(inarr[i]), &(arr[i].entry), sizeof(WordEntry) );
+   }
+   pfree(tmpbuf);
+   pfree(arr);
+   PG_RETURN_POINTER(in);
+}
+
+Datum
+tsvector_length(PG_FUNCTION_ARGS)
+{
+   tsvector       *in = (tsvector *) PG_DETOAST_DATUM(PG_GETARG_DATUM(0));
+   int4        ret = in->size;
+
+   PG_FREE_IF_COPY(in, 0);
+   PG_RETURN_INT32(ret);
+}
+
+Datum
+tsvector_out(PG_FUNCTION_ARGS)
+{
+   tsvector       *out = (tsvector *) PG_DETOAST_DATUM(PG_GETARG_DATUM(0));
+   char       *outbuf;
+   int4        i,
+               j,
+               lenbuf = 0, pp;
+   WordEntry  *ptr = ARRPTR(out);
+   char       *curin,
+              *curout;
+
+       lenbuf=out->size * 2 /* '' */ + out->size - 1 /* space */ + 2 /*\0*/;
+       for (i = 0; i < out->size; i++) {
+               lenbuf += ptr[i].len*2 /*for escape */;
+               if ( ptr[i].haspos )
+                       lenbuf += 7*POSDATALEN(out, &(ptr[i]));
+       }
+
+   curout = outbuf = (char *) palloc(lenbuf);
+   for (i = 0; i < out->size; i++)
+   {
+       curin = STRPTR(out)+ptr->pos;
+       if (i != 0)
+           *curout++ = ' ';
+       *curout++ = '\'';
+       j = ptr->len;
+       while (j--)
+       {
+           if (*curin == '\'')
+           {
+               int4        pos = curout - outbuf;
+
+               outbuf = (char *) repalloc((void *) outbuf, ++lenbuf);
+               curout = outbuf + pos;
+               *curout++ = '\\';
+           }
+           *curout++ = *curin++;
+       }
+       *curout++ = '\'';
+       if ( (pp=POSDATALEN(out,ptr)) != 0 ) {
+           WordEntryPos *wptr;
+           *curout++ = ':';
+           wptr=POSDATAPTR(out,ptr);
+           while(pp) {
+               sprintf(curout,"%d",wptr->pos);
+               curout=strchr(curout,'\0');
+               switch( wptr->weight ) {
+                   case 3:   *curout++ = 'A'; break;
+                   case 2:   *curout++ = 'B'; break;
+                   case 1:   *curout++ = 'C'; break;
+                   case 0: 
+                   default: break;
+               }
+               if ( pp>1 )     *curout++ = ',';
+               pp--; wptr++;
+           }
+       }
+       ptr++;
+   }
+   *curout='\0';
+   outbuf[lenbuf - 1] = '\0';
+   PG_FREE_IF_COPY(out, 0);
+   PG_RETURN_POINTER(outbuf);
+}
+
+static int
+compareWORD(const void *a, const void *b)
+{
+   if (((WORD *) a)->len == ((WORD *) b)->len) {
+       int res = strncmp(
+                      ((WORD *) a)->word,
+                      ((WORD *) b)->word,
+                      ((WORD *) b)->len);
+       if ( res==0 ) 
+           return ( ((WORD *) a)->pos.pos > ((WORD *) b)->pos.pos ) ? 1 : -1;
+       return res;
+   }
+   return (((WORD *) a)->len > ((WORD *) b)->len) ? 1 : -1;
+}
+
+static int
+uniqueWORD(WORD * a, int4 l)
+{
+   WORD       *ptr,
+              *res;
+   int tmppos;
+
+   if (l == 1) {
+       tmppos=LIMITPOS(a->pos.pos);
+       a->alen=2;
+       a->pos.apos=(uint16*)palloc( sizeof(uint16)*a->alen );
+       a->pos.apos[0]=1;
+       a->pos.apos[1]=tmppos;
+       return l;
+   }
+
+   res = a;
+   ptr = a + 1;
+
+   qsort((void *) a, l, sizeof(WORD), compareWORD);
+   tmppos=LIMITPOS(a->pos.pos);
+   a->alen=2;
+   a->pos.apos=(uint16*)palloc( sizeof(uint16)*a->alen );
+   a->pos.apos[0]=1;
+   a->pos.apos[1]=tmppos;
+
+   while (ptr - a < l)
+   {
+       if (!(ptr->len == res->len &&
+             strncmp(ptr->word, res->word, res->len) == 0))
+       {
+           res++;
+           res->len = ptr->len;
+           res->word = ptr->word;
+           tmppos=LIMITPOS(ptr->pos.pos);
+           res->alen=2;
+           res->pos.apos=(uint16*)palloc( sizeof(uint16)*res->alen );
+           res->pos.apos[0]=1;
+           res->pos.apos[1]=tmppos;
+       } else {
+           pfree(ptr->word);
+           if ( res->pos.apos[0] < MAXNUMPOS-1 && res->pos.apos[ res->pos.apos[0] ] != MAXENTRYPOS-1 ) {
+               if ( res->pos.apos[0]+1 >= res->alen ) {
+                   res->alen*=2;
+                   res->pos.apos=(uint16*)repalloc( res->pos.apos, sizeof(uint16)*res->alen );
+               }
+               res->pos.apos[ res->pos.apos[0]+1 ] = LIMITPOS(ptr->pos.pos);
+               res->pos.apos[0]++; 
+           }
+       }
+       ptr++;
+   }
+
+   return res + 1 - a;
+}
+
+/*
+ * make value of tsvector
+ */
+static tsvector *
+makevalue(PRSTEXT * prs)
+{
+   int4        i,j,
+               lenstr = 0,
+               totallen;
+   tsvector       *in;
+   WordEntry  *ptr;
+   char       *str,
+              *cur;
+
+   prs->curwords = uniqueWORD(prs->words, prs->curwords);
+   for (i = 0; i < prs->curwords; i++) {
+       lenstr += SHORTALIGN(prs->words[i].len);
+
+       if ( prs->words[i].alen )
+           lenstr += sizeof(uint16) + prs->words[i].pos.apos[0] * sizeof(WordEntryPos);
+   }
+
+   totallen = CALCDATASIZE(prs->curwords, lenstr);
+   in = (tsvector *) palloc(totallen);
+   memset(in,0,totallen);  
+   in->len = totallen;
+   in->size = prs->curwords;
+
+   ptr = ARRPTR(in);
+   cur = str = STRPTR(in);
+   for (i = 0; i < prs->curwords; i++)
+   {
+       ptr->len = prs->words[i].len;
+       if (cur - str > MAXSTRPOS)
+           elog(ERROR, "Value is too big");
+       ptr->pos= cur - str;
+       memcpy((void *) cur, (void *) prs->words[i].word, prs->words[i].len);
+       pfree(prs->words[i].word);
+       cur += SHORTALIGN(prs->words[i].len);
+       if ( prs->words[i].alen ) {
+           WordEntryPos *wptr;
+           
+           ptr->haspos=1;
+           *(uint16*)cur = prs->words[i].pos.apos[0];
+           wptr=POSDATAPTR(in,ptr);
+           for(j=0;j<*(uint16*)cur;j++) {
+               wptr[j].weight=0;
+               wptr[j].pos=prs->words[i].pos.apos[j+1];
+           }
+           cur += sizeof(uint16) + prs->words[i].pos.apos[0] * sizeof(WordEntryPos);
+           pfree(prs->words[i].pos.apos);
+       } else
+           ptr->haspos=0;
+       ptr++;
+   }
+   pfree(prs->words);
+   return in;
+}
+
+
+Datum
+to_tsvector(PG_FUNCTION_ARGS)
+{
+   text       *in = PG_GETARG_TEXT_P(1);
+   PRSTEXT     prs;
+   tsvector       *out = NULL;
+   TSCfgInfo *cfg=findcfg(PG_GETARG_INT32(0)); 
+
+   prs.lenwords = 32;
+   prs.curwords = 0;
+   prs.pos = 0;
+   prs.words = (WORD *) palloc(sizeof(WORD) * prs.lenwords);
+   
+   parsetext_v2(cfg, &prs, VARDATA(in), VARSIZE(in) - VARHDRSZ);
+   PG_FREE_IF_COPY(in, 1);
+
+   if (prs.curwords)
+       out = makevalue(&prs);
+   else {
+       pfree(prs.words);
+       out = palloc(CALCDATASIZE(0,0));
+       out->len = CALCDATASIZE(0,0);
+       out->size = 0;
+   } 
+   PG_RETURN_POINTER(out);
+}
+
+Datum
+to_tsvector_name(PG_FUNCTION_ARGS) {
+   text       *cfg=PG_GETARG_TEXT_P(0);
+   Datum res = DirectFunctionCall3(
+       to_tsvector,
+       Int32GetDatum( name2id_cfg( cfg ) ),
+       PG_GETARG_DATUM(1),
+       (Datum)0
+   );
+   PG_FREE_IF_COPY(cfg,0);
+   PG_RETURN_DATUM(res);   
+}
+
+Datum
+to_tsvector_current(PG_FUNCTION_ARGS) {
+   Datum res = DirectFunctionCall3(
+       to_tsvector,
+       Int32GetDatum( get_currcfg() ),
+       PG_GETARG_DATUM(0),
+       (Datum)0
+   );
+   PG_RETURN_DATUM(res);   
+}
+
+static Oid
+findFunc(char *fname) {
+   FuncCandidateList clist,ptr;
+   Oid funcid = InvalidOid;
+   List *names=makeList1(makeString(fname));
+
+   ptr = clist = FuncnameGetCandidates(names, 1);
+   freeList(names);
+
+   if ( !ptr )
+       return funcid;
+
+   while(ptr) {
+       if ( ptr->args[0] == TEXTOID && funcid == InvalidOid )
+           funcid=ptr->oid;
+       clist=ptr->next;
+       pfree(ptr);
+       ptr=clist;
+   }
+
+   return funcid;
+}
+
+/*
+ * Trigger
+ */
+Datum
+tsearch2(PG_FUNCTION_ARGS)
+{
+   TriggerData *trigdata;
+   Trigger    *trigger;
+   Relation    rel;
+   HeapTuple   rettuple = NULL;
+   TSCfgInfo *cfg=findcfg(get_currcfg()); 
+   int         numidxattr,
+               i;
+   PRSTEXT     prs;
+   Datum       datum = (Datum) 0;
+   Oid     funcoid = InvalidOid;
+
+   if (!CALLED_AS_TRIGGER(fcinfo))
+       elog(ERROR, "TSearch: Not fired by trigger manager");
+
+   trigdata = (TriggerData *) fcinfo->context;
+   if (TRIGGER_FIRED_FOR_STATEMENT(trigdata->tg_event))
+       elog(ERROR, "TSearch: Can't process STATEMENT events");
+   if (TRIGGER_FIRED_AFTER(trigdata->tg_event))
+       elog(ERROR, "TSearch: Must be fired BEFORE event");
+
+   if (TRIGGER_FIRED_BY_INSERT(trigdata->tg_event))
+       rettuple = trigdata->tg_trigtuple;
+   else if (TRIGGER_FIRED_BY_UPDATE(trigdata->tg_event))
+       rettuple = trigdata->tg_newtuple;
+   else
+       elog(ERROR, "TSearch: Unknown event");
+
+   trigger = trigdata->tg_trigger;
+   rel = trigdata->tg_relation;
+
+   if (trigger->tgnargs < 2)
+       elog(ERROR, "TSearch: format tsearch2(tsvector_field, text_field1,...)");
+
+   numidxattr = SPI_fnumber(rel->rd_att, trigger->tgargs[0]);
+   if (numidxattr == SPI_ERROR_NOATTRIBUTE)
+       elog(ERROR, "TSearch: Can not find tsvector_field");
+
+   prs.lenwords = 32;
+   prs.curwords = 0;
+   prs.pos = 0;
+   prs.words = (WORD *) palloc(sizeof(WORD) * prs.lenwords);
+
+   /* find all words in indexable column */
+   for (i = 1; i < trigger->tgnargs; i++)
+   {
+       int         numattr;
+       Oid         oidtype;
+       Datum       txt_toasted;
+       bool        isnull;
+       text       *txt;
+
+       numattr = SPI_fnumber(rel->rd_att, trigger->tgargs[i]);
+       if (numattr == SPI_ERROR_NOATTRIBUTE)
+       {
+           funcoid=findFunc(trigger->tgargs[i]);
+           if ( funcoid==InvalidOid )
+               elog(ERROR,"TSearch: can't find function or field '%s'",trigger->tgargs[i]);
+           continue;
+       }
+       oidtype = SPI_gettypeid(rel->rd_att, numattr);
+       /* We assume char() and varchar() are binary-equivalent to text */
+       if (!(oidtype == TEXTOID ||
+             oidtype == VARCHAROID ||
+             oidtype == BPCHAROID))
+       {
+           elog(WARNING, "TSearch: '%s' is not of character type",
+                trigger->tgargs[i]);
+           continue;
+       }
+       txt_toasted = SPI_getbinval(rettuple, rel->rd_att, numattr, &isnull);
+       if (isnull)
+           continue;
+
+       if ( funcoid!=InvalidOid ) {
+           text *txttmp = (text *) DatumGetPointer( OidFunctionCall1(
+               funcoid,
+               PointerGetDatum(txt_toasted)
+           ));
+           txt = (text *) DatumGetPointer(PG_DETOAST_DATUM(PointerGetDatum(txttmp)));
+           if ( txt == txttmp )
+               txt_toasted = PointerGetDatum(txt);
+       } else
+            txt = (text *) DatumGetPointer(PG_DETOAST_DATUM(PointerGetDatum(txt_toasted)));
+
+       parsetext_v2(cfg, &prs, VARDATA(txt), VARSIZE(txt) - VARHDRSZ);
+       if (txt != (text*)DatumGetPointer(txt_toasted) )
+           pfree(txt);
+   }
+
+   /* make tsvector value */
+   if (prs.curwords)
+   {
+       datum = PointerGetDatum(makevalue(&prs));
+       rettuple = SPI_modifytuple(rel, rettuple, 1, &numidxattr,
+                                  &datum, NULL);
+       pfree(DatumGetPointer(datum));
+   }
+   else
+   {
+       tsvector *out = palloc(CALCDATASIZE(0,0));
+       out->len = CALCDATASIZE(0,0);
+       out->size = 0;
+       datum = PointerGetDatum(out);
+       pfree(prs.words);
+       rettuple = SPI_modifytuple(rel, rettuple, 1, &numidxattr,
+                                  &datum, NULL);
+   }
+
+   if (rettuple == NULL)
+       elog(ERROR, "TSearch: %d returned by SPI_modifytuple", SPI_result);
+
+   return PointerGetDatum(rettuple);
+}


diff --git a/contrib/tsearch2/tsvector.h b/contrib/tsearch2/tsvector.h

new file mode 100644 (file)

index 0000000..31e6a4b


--- /dev/null
+++ b/contrib/tsearch2/tsvector.h
@@ -0,0 +1,71 @@
+#ifndef __TXTIDX_H__
+#define __TXTIDX_H__
+
+/*
+#define TXTIDX_DEBUG
+*/
+
+#include "postgres.h"
+
+#include "access/gist.h"
+#include "access/itup.h"
+#include "utils/elog.h"
+#include "utils/palloc.h"
+#include "utils/builtins.h"
+#include "storage/bufpage.h"
+
+typedef struct {
+   uint32
+       haspos:1,
+       len:11, /* MAX 2Kb */
+       pos:20; /* MAX 1Mb */
+}  WordEntry;
+#define MAXSTRLEN ( 1<<11 )
+#define MAXSTRPOS ( 1<<20 )
+
+typedef struct {
+   uint16
+       weight:2,
+       pos:14;
+} WordEntryPos;
+#define MAXENTRYPOS    (1<<14)
+#define MAXNUMPOS  256
+#define LIMITPOS(x)    ( ( (x) >= MAXENTRYPOS ) ? (MAXENTRYPOS-1) : (x) )
+
+typedef struct
+{
+   int4        len;
+   int4        size;
+   char        data[1];
+}  tsvector;
+
+#define DATAHDRSIZE (sizeof(int4)*2)
+#define CALCDATASIZE(x, lenstr) ( x * sizeof(WordEntry) + DATAHDRSIZE + lenstr )
+#define ARRPTR(x)  ( (WordEntry*) ( (char*)x + DATAHDRSIZE ) )
+#define STRPTR(x)  ( (char*)x + DATAHDRSIZE + ( sizeof(WordEntry) * ((tsvector*)x)->size ) )
+#define STRSIZE(x) ( ((tsvector*)x)->len - DATAHDRSIZE - ( sizeof(WordEntry) * ((tsvector*)x)->size ) )
+#define _POSDATAPTR(x,e)   (STRPTR(x)+((WordEntry*)(e))->pos+SHORTALIGN(((WordEntry*)(e))->len))
+#define POSDATALEN(x,e) ( ( ((WordEntry*)(e))->haspos ) ? (*(uint16*)_POSDATAPTR(x,e)) : 0 ) 
+#define POSDATAPTR(x,e)    ( (WordEntryPos*)( _POSDATAPTR(x,e)+sizeof(uint16) ) )
+
+
+typedef struct {
+   WordEntry   entry;
+   WordEntryPos    *pos;
+}  WordEntryIN;
+
+typedef struct
+{
+   char       *prsbuf;
+   char       *word;
+   char       *curpos;
+   int4        len;
+   int4        state;
+   int4        alen;
+   WordEntryPos    *pos;
+   bool        oprisdelim;
+}  TI_IN_STATE;
+
+int4       gettoken_tsvector(TI_IN_STATE * state);
+
+#endif


diff --git a/contrib/tsearch2/tsvector_op.c b/contrib/tsearch2/tsvector_op.c

new file mode 100644 (file)

index 0000000..3f38014


--- /dev/null
+++ b/contrib/tsearch2/tsvector_op.c
@@ -0,0 +1,264 @@
+/*
+ * Operations for tsvector type
+ * Teodor Sigaev 
+ */
+#include "postgres.h"
+
+#include "access/gist.h"
+#include "access/itup.h"
+#include "utils/elog.h"
+#include "utils/palloc.h"
+#include "utils/builtins.h"
+#include "storage/bufpage.h"
+#include "executor/spi.h"
+#include "commands/trigger.h"
+#include "nodes/pg_list.h"
+#include "catalog/namespace.h"
+
+#include "utils/pg_locale.h"
+
+#include              /* tolower */
+#include "tsvector.h"
+#include "query.h"
+#include "ts_cfg.h"
+#include "common.h"
+
+PG_FUNCTION_INFO_V1(strip);
+Datum      strip(PG_FUNCTION_ARGS);
+
+PG_FUNCTION_INFO_V1(setweight);
+Datum      setweight(PG_FUNCTION_ARGS);
+
+PG_FUNCTION_INFO_V1(concat);
+Datum      concat(PG_FUNCTION_ARGS);
+
+Datum
+strip(PG_FUNCTION_ARGS)
+{
+   tsvector       *in = (tsvector *) PG_DETOAST_DATUM(PG_GETARG_DATUM(0));
+   tsvector    *out;
+   int i,len=0;
+   WordEntry *arrin=ARRPTR(in), *arrout;
+   char *cur;
+
+   for(i=0;isize;i++) 
+       len += SHORTALIGN( arrin[i].len );
+
+   len = CALCDATASIZE(in->size, len);
+   out=(tsvector*)palloc(len);
+   memset(out,0,len);
+   out->len=len;
+   out->size=in->size;
+   arrout=ARRPTR(out);
+   cur=STRPTR(out);
+   for(i=0;isize;i++) {
+       memcpy(cur, STRPTR(in)+arrin[i].pos, arrin[i].len);
+       arrout[i].haspos = 0;
+       arrout[i].len = arrin[i].len;
+       arrout[i].pos = cur - STRPTR(out);
+       cur += SHORTALIGN( arrout[i].len );
+   }
+       
+   PG_FREE_IF_COPY(in, 0);
+   PG_RETURN_POINTER(out);
+}
+
+Datum
+setweight(PG_FUNCTION_ARGS)
+{
+   tsvector       *in = (tsvector *) PG_DETOAST_DATUM(PG_GETARG_DATUM(0));
+   char       cw = PG_GETARG_CHAR(1);
+   tsvector    *out;
+   int i,j;
+   WordEntry *entry;
+   WordEntryPos *p;
+   int w=0;
+
+   switch(tolower(cw)) {
+       case 'a': w=3; break;
+       case 'b': w=2; break;
+       case 'c': w=1; break;
+       case 'd': w=0; break;
+       default: elog(ERROR,"Unknown weight");
+   }
+
+   out=(tsvector*)palloc(in->len);
+   memcpy(out,in,in->len);
+   entry=ARRPTR(out);
+   i=out->size;    
+   while(i--) {
+       if ( (j=POSDATALEN(out,entry)) != 0 ) {
+           p=POSDATAPTR(out,entry);
+           while(j--) {
+               p->weight=w;
+               p++;
+           }
+       }
+       entry++;
+   }
+       
+   PG_FREE_IF_COPY(in, 0);
+   PG_RETURN_POINTER(out);
+}
+
+static int
+compareEntry(char *ptra, WordEntry* a, char *ptrb, WordEntry* b)
+{
+        if ( a->len == b->len)
+        {
+                return strncmp(
+                                           ptra + a->pos,
+                                           ptrb + b->pos,
+                                           a->len);
+        }
+        return ( a->len > b->len ) ? 1 : -1;
+}
+
+static int4
+add_pos(tsvector *src, WordEntry *srcptr, tsvector *dest, WordEntry *destptr, int4 maxpos ) {
+   uint16 *clen = (uint16*)_POSDATAPTR(dest,destptr);
+   int i;
+   uint16 slen = POSDATALEN(src, srcptr), startlen;
+   WordEntryPos *spos=POSDATAPTR(src, srcptr), *dpos=POSDATAPTR(dest,destptr);
+
+   if ( ! destptr->haspos ) 
+       *clen=0;
+
+   startlen = *clen;
+   for(i=0; i
+       dpos[ *clen ].weight = spos[i].weight; 
+       dpos[ *clen ].pos    = LIMITPOS(spos[i].pos + maxpos);
+       (*clen)++;
+   }
+
+   if ( *clen != startlen )
+       destptr->haspos=1; 
+   return  *clen - startlen;
+}
+
+
+Datum
+concat(PG_FUNCTION_ARGS) {
+   tsvector       *in1 = (tsvector *) PG_DETOAST_DATUM(PG_GETARG_DATUM(0));
+   tsvector       *in2 = (tsvector *) PG_DETOAST_DATUM(PG_GETARG_DATUM(1));
+   tsvector       *out;
+   WordEntry *ptr;
+   WordEntry *ptr1,*ptr2;
+   WordEntryPos *p;
+   int maxpos=0,i,j,i1,i2;
+   char *cur;
+   char *data,*data1,*data2;
+
+   ptr=ARRPTR(in1);
+   i=in1->size;
+   while(i--) {
+       if ( (j=POSDATALEN(in1,ptr)) != 0 ) {
+           p=POSDATAPTR(in1,ptr);
+           while(j--) {
+               if ( p->pos > maxpos ) 
+                   maxpos = p->pos;
+               p++;
+           }
+       }
+       ptr++;
+   }
+   
+   ptr1=ARRPTR(in1); ptr2=ARRPTR(in2);
+   data1=STRPTR(in1); data2=STRPTR(in2);
+   i1=in1->size;   i2=in2->size;
+   out=(tsvector*)palloc( in1->len + in2->len );
+   memset(out,0,in1->len + in2->len);
+   out->len = in1->len + in2->len;
+   out->size = in1->size + in2->size;
+   data=cur=STRPTR(out);
+   ptr=ARRPTR(out);
+   while( i1 && i2 ) {
+       int cmp=compareEntry(data1,ptr1,data2,ptr2);
+       if ( cmp < 0 ) { /* in1 first */
+           ptr->haspos = ptr1->haspos;
+           ptr->len = ptr1->len;
+           memcpy( cur, data1 + ptr1->pos, ptr1->len );
+           ptr->pos = cur - data; 
+           cur+=SHORTALIGN(ptr1->len);
+           if ( ptr->haspos ) {
+               memcpy(cur, _POSDATAPTR(in1, ptr1), POSDATALEN(in1, ptr1)*sizeof(WordEntryPos) + sizeof(uint16));
+               cur+=POSDATALEN(in1, ptr1)*sizeof(WordEntryPos) + sizeof(uint16);
+           }
+           ptr++; ptr1++; i1--;
+       } else if ( cmp>0 ) { /* in2 first */ 
+           ptr->haspos = ptr2->haspos;
+           ptr->len = ptr2->len;
+           memcpy( cur, data2 + ptr2->pos, ptr2->len );
+           ptr->pos = cur - data; 
+           cur+=SHORTALIGN(ptr2->len);
+           if ( ptr->haspos ) {
+               int addlen = add_pos(in2, ptr2, out, ptr, maxpos );
+               if ( addlen == 0 )
+                   ptr->haspos=0;
+               else
+                   cur += addlen*sizeof(WordEntryPos) + sizeof(uint16); 
+           }
+           ptr++; ptr2++; i2--;
+       } else {
+           ptr->haspos = ptr1->haspos | ptr2->haspos;
+           ptr->len = ptr1->len;
+           memcpy( cur, data1 + ptr1->pos, ptr1->len );
+           ptr->pos = cur - data; 
+           cur+=SHORTALIGN(ptr1->len);
+           if ( ptr->haspos ) {
+               if ( ptr1->haspos ) {
+                   memcpy(cur, _POSDATAPTR(in1, ptr1), POSDATALEN(in1, ptr1)*sizeof(WordEntryPos) + sizeof(uint16));
+                   cur+=POSDATALEN(in1, ptr1)*sizeof(WordEntryPos) + sizeof(uint16);
+                   if ( ptr2->haspos )
+                       cur += add_pos(in2, ptr2, out, ptr, maxpos )*sizeof(WordEntryPos);
+               } else if ( ptr2->haspos ) {
+                   int addlen = add_pos(in2, ptr2, out, ptr, maxpos );
+                   if ( addlen == 0 )
+                       ptr->haspos=0;
+                   else
+                       cur += addlen*sizeof(WordEntryPos) + sizeof(uint16); 
+               }
+           }
+           ptr++; ptr1++; ptr2++; i1--; i2--;
+       }
+   }
+
+   while(i1) {
+       ptr->haspos = ptr1->haspos;
+       ptr->len = ptr1->len;
+       memcpy( cur, data1 + ptr1->pos, ptr1->len );
+       ptr->pos = cur - data; 
+       cur+=SHORTALIGN(ptr1->len);
+       if ( ptr->haspos ) {
+           memcpy(cur, _POSDATAPTR(in1, ptr1), POSDATALEN(in1, ptr1)*sizeof(WordEntryPos) + sizeof(uint16));
+           cur+=POSDATALEN(in1, ptr1)*sizeof(WordEntryPos) + sizeof(uint16);
+       }
+       ptr++; ptr1++; i1--;
+   }
+
+   while(i2) {
+       ptr->haspos = ptr2->haspos;
+       ptr->len = ptr2->len;
+       memcpy( cur, data2 + ptr2->pos, ptr2->len );
+       ptr->pos = cur - data; 
+       cur+=SHORTALIGN(ptr2->len);
+       if ( ptr->haspos ) {
+           int addlen = add_pos(in2, ptr2, out, ptr, maxpos );
+           if ( addlen == 0 )
+               ptr->haspos=0;
+           else
+               cur += addlen*sizeof(WordEntryPos) + sizeof(uint16); 
+       }
+       ptr++; ptr2++; i2--;
+   }
+   
+   out->size=ptr-ARRPTR(out);
+   out->len = CALCDATASIZE( out->size, cur-data );
+   if ( data != STRPTR(out) )
+       memmove( STRPTR(out), data, cur-data );
+
+   PG_FREE_IF_COPY(in1, 0);
+   PG_FREE_IF_COPY(in2, 1);
+   PG_RETURN_POINTER(out);
+}
+


diff --git a/contrib/tsearch2/untsearch.sql.in b/contrib/tsearch2/untsearch.sql.in

new file mode 100644 (file)

index 0000000..a4fe145


--- /dev/null
+++ b/contrib/tsearch2/untsearch.sql.in
@@ -0,0 +1,62 @@
+BEGIN;
+
+--Be careful !!!
+--script drops all indices, triggers and columns with types defined
+--in tsearch2.sql
+
+
+DROP OPERATOR CLASS gist_tsvector_ops USING gist CASCADE;
+
+
+DROP OPERATOR || (tsvector, tsvector);
+DROP OPERATOR @@ (tsvector, tsquery);
+DROP OPERATOR @@ (tsquery, tsvector);
+
+DROP AGGREGATE stat(tsvector);
+
+DROP TABLE pg_ts_dict;
+DROP TABLE pg_ts_parser;
+DROP TABLE pg_ts_cfg;
+DROP TABLE pg_ts_cfgmap;
+
+DROP TYPE tokentype CASCADE;
+DROP TYPE tokenout CASCADE;
+DROP TYPE tsvector CASCADE;
+DROP TYPE tsquery CASCADE;
+DROP TYPE gtsvector CASCADE;
+DROP TYPE tsstat CASCADE;
+DROP TYPE statinfo CASCADE;
+
+DROP FUNCTION lexize(oid, text) ;
+DROP FUNCTION lexize(text, text);
+DROP FUNCTION lexize(text);
+DROP FUNCTION set_curdict(int);
+DROP FUNCTION set_curdict(text);
+DROP FUNCTION dex_init(text);
+DROP FUNCTION dex_lexize(internal,internal,int4);
+DROP FUNCTION snb_en_init(text);
+DROP FUNCTION snb_lexize(internal,internal,int4);
+DROP FUNCTION snb_ru_init(text);
+DROP FUNCTION spell_init(text);
+DROP FUNCTION spell_lexize(internal,internal,int4);
+DROP FUNCTION syn_init(text);
+DROP FUNCTION syn_lexize(internal,internal,int4);
+DROP FUNCTION set_curprs(int);
+DROP FUNCTION set_curprs(text);
+DROP FUNCTION prsd_start(internal,int4);
+DROP FUNCTION prsd_getlexeme(internal,internal,internal);
+DROP FUNCTION prsd_end(internal);
+DROP FUNCTION prsd_lextype(internal);
+DROP FUNCTION prsd_headline(internal,internal,internal);
+DROP FUNCTION set_curcfg(int);
+DROP FUNCTION set_curcfg(text);
+DROP FUNCTION show_curcfg();
+DROP FUNCTION gtsvector_compress(internal);
+DROP FUNCTION gtsvector_decompress(internal);
+DROP FUNCTION gtsvector_penalty(internal,internal,internal);
+DROP FUNCTION gtsvector_picksplit(internal, internal);
+DROP FUNCTION gtsvector_union(bytea, internal);
+DROP FUNCTION reset_tsearch();
+DROP FUNCTION tsearch2() CASCADE;
+
+END;


diff --git a/contrib/tsearch2/wordparser/deflex.c b/contrib/tsearch2/wordparser/deflex.c

new file mode 100644 (file)

index 0000000..ea596c5


--- /dev/null
+++ b/contrib/tsearch2/wordparser/deflex.c
@@ -0,0 +1,56 @@
+#include "deflex.h"
+
+const char *lex_descr[]={
+   "",
+   "Latin word",
+   "Non-latin word",
+   "Word",
+   "Email",
+   "URL",
+   "Host",
+   "Scientific notation",
+   "VERSION",
+   "Part of hyphenated word",
+   "Non-latin part of hyphenated word",
+   "Latin part of hyphenated word",
+   "Space symbols",
+   "HTML Tag",
+   "HTTP head",
+   "Hyphenated word",
+   "Latin hyphenated word",
+   "Non-latin hyphenated word",
+   "URI",
+   "File or path name",
+   "Decimal notation",
+   "Signed integer",
+   "Unsigned integer",
+   "HTML Entity"
+};
+
+const char *tok_alias[]={
+   "",
+   "lword",
+   "nlword",
+   "word",
+   "email",
+   "url",
+   "host",
+   "sfloat",
+   "version",
+   "part_hword",
+   "nlpart_hword",
+   "lpart_hword",
+   "blank",
+   "tag",
+   "http",
+   "hword",
+   "lhword",
+   "nlhword",
+   "uri",
+   "file",
+   "float",
+   "int",
+   "uint",
+   "entity"
+};
+


diff --git a/contrib/tsearch2/wordparser/deflex.h b/contrib/tsearch2/wordparser/deflex.h

new file mode 100644 (file)

index 0000000..651d1f9


--- /dev/null
+++ b/contrib/tsearch2/wordparser/deflex.h
@@ -0,0 +1,34 @@
+#ifndef __DEFLEX_H__
+#define __DEFLEX_H__
+
+/* rememder !!!! */
+#define LASTNUM        23
+
+#define LATWORD        1
+#define CYRWORD        2
+#define UWORD      3
+#define EMAIL      4
+#define FURL       5
+#define HOST       6
+#define SCIENTIFIC 7
+#define VERSIONNUMBER  8
+#define PARTHYPHENWORD 9
+#define CYRPARTHYPHENWORD  10
+#define LATPARTHYPHENWORD  11
+#define SPACE      12
+#define TAG            13
+#define HTTP       14
+#define HYPHENWORD 15
+#define LATHYPHENWORD  16
+#define CYRHYPHENWORD  17
+#define URI        18
+#define FILEPATH   19
+#define DECIMAL        20
+#define SIGNEDINT  21
+#define UNSIGNEDINT 22
+#define HTMLENTITY 23
+
+extern const char *lex_descr[];
+extern const char *tok_alias[];
+
+#endif


diff --git a/contrib/tsearch2/wordparser/parser.h b/contrib/tsearch2/wordparser/parser.h

new file mode 100644 (file)

index 0000000..55cf005


--- /dev/null
+++ b/contrib/tsearch2/wordparser/parser.h
@@ -0,0 +1,11 @@
+#ifndef __PARSER_H__
+#define __PARSER_H__
+
+char      *token;
+int            tokenlen;
+int            tsearch2_yylex(void);
+void       start_parse_str(char *, int);
+void       start_parse_fh(FILE *, int);
+void       end_parse(void);
+
+#endif


diff --git a/contrib/tsearch2/wordparser/parser.l b/contrib/tsearch2/wordparser/parser.l

new file mode 100644 (file)

index 0000000..49824f5


--- /dev/null
+++ b/contrib/tsearch2/wordparser/parser.l
@@ -0,0 +1,346 @@
+%{
+#include "postgres.h"
+
+#include "deflex.h"
+#include "parser.h"
+#include "common.h"
+
+/* Avoid exit() on fatal scanner errors */
+#define fprintf(file, fmt, msg)  ts_error(ERROR, fmt, msg)
+
+/* postgres allocation function */
+#define free    pfree
+#define malloc  palloc
+#define realloc repalloc
+
+#ifdef strdup
+#undef strdup
+#endif
+#define strdup  pstrdup
+
+char *token = NULL;  /* pointer to token */
+char *s     = NULL;  /* to return WHOLE hyphenated-word */
+
+YY_BUFFER_STATE buf = NULL; /* buffer to parse; it need for parse from string */
+
+int lrlimit = -1;  /* for limiting read from filehandle ( -1 - unlimited read ) */
+int bytestoread = 0;   /* for limiting read from filehandle */
+
+/* redefine macro for read limited length */
+#define YY_INPUT(buf,result,max_size) \
+   if ( yy_current_buffer->yy_is_interactive ) { \
+                int c = '*', n; \
+                for ( n = 0; n < max_size && \
+                             (c = getc( tsearch2_yyin )) != EOF && c != '\n'; ++n ) \
+                        buf[n] = (char) c; \
+                if ( c == '\n' ) \
+                        buf[n++] = (char) c; \
+                if ( c == EOF && ferror( tsearch2_yyin ) ) \
+                        YY_FATAL_ERROR( "input in flex scanner failed" ); \
+                result = n; \
+        }  else { \
+       if ( lrlimit == 0 ) \
+           result=YY_NULL; \
+       else { \
+           if ( lrlimit>0 ) { \
+               bytestoread = ( lrlimit > max_size ) ? max_size : lrlimit; \
+               lrlimit -= bytestoread; \
+           } else \
+               bytestoread = max_size; \
+               if ( ((result = fread( buf, 1, bytestoread, tsearch2_yyin )) == 0) \
+                       && ferror( tsearch2_yyin ) ) \
+                       YY_FATAL_ERROR( "input in flex scanner failed" ); \
+       } \
+   }
+
+%}
+
+%option 8bit
+%option never-interactive
+%option nounput
+%option noyywrap
+
+/* parser's state for parsing hyphenated-word */
+%x DELIM  
+/* parser's state for parsing URL*/
+%x URL  
+%x SERVER  
+
+/* parser's state for parsing TAGS */
+%x INTAG
+%x QINTAG
+%x INCOMMENT
+%x INSCRIPT
+
+/* cyrillic koi8 char */
+CYRALNUM   [0-9\200-\377]
+CYRALPHA   [\200-\377]
+ALPHA      [a-zA-Z\200-\377]
+ALNUM      [0-9a-zA-Z\200-\377]
+
+
+HOSTNAME   ([-_[:alnum:]]+\.)+[[:alpha:]]+
+URI        [-_[:alnum:]/%,\.;=&?#]+
+
+%%
+
+"<"[Ss][Cc][Rr][Ii][Pp][Tt] { BEGIN INSCRIPT; }
+
+"" {
+   BEGIN INITIAL; 
+   *tsearch2_yytext=' '; *(tsearch2_yytext+1) = '\0'; 
+   token = tsearch2_yytext;
+   tokenlen = tsearch2_yyleng;
+   return SPACE;
+}
+
+""   { 
+   BEGIN INITIAL;
+   *tsearch2_yytext=' '; *(tsearch2_yytext+1) = '\0'; 
+   token = tsearch2_yytext;
+   tokenlen = tsearch2_yyleng;
+   return SPACE;
+}
+
+
+"<"[\![:alpha:]]   { BEGIN INTAG; }
+
+"
+
+"\""    { BEGIN QINTAG; }
+
+"\\\"" ;
+
+"\""   { BEGIN INTAG; }
+
+">" { 
+   BEGIN INITIAL;
+   token = tsearch2_yytext;
+   *tsearch2_yytext=' '; 
+   token = tsearch2_yytext;
+   tokenlen = 1;
+   return TAG;
+}
+
+.|\n  ;
+
+\&(quot|amp|nbsp|lt|gt)\;   {
+   token = tsearch2_yytext;
+   tokenlen = tsearch2_yyleng;
+   return HTMLENTITY;
+}
+
+\&\#[0-9][0-9]?[0-9]?\; {
+   token = tsearch2_yytext;
+   tokenlen = tsearch2_yyleng;
+   return HTMLENTITY;
+}
+ 
+[-_\.[:alnum:]]+@{HOSTNAME}  /* Emails */ { 
+   token = tsearch2_yytext; 
+   tokenlen = tsearch2_yyleng;
+   return EMAIL; 
+}
+
+[+-]?[0-9]+(\.[0-9]+)?[eEdD][+-]?[0-9]+  /* float */   { 
+   token = tsearch2_yytext; 
+   tokenlen = tsearch2_yyleng;
+   return SCIENTIFIC; 
+}
+
+[0-9]+\.[0-9]+\.[0-9\.]*[0-9] {
+   token = tsearch2_yytext;
+   tokenlen = tsearch2_yyleng;
+   return VERSIONNUMBER;
+}
+
+[+-]?[0-9]+\.[0-9]+ {
+   token = tsearch2_yytext;
+   tokenlen = tsearch2_yyleng;
+   return DECIMAL;
+}
+
+[+-][0-9]+ { 
+   token = tsearch2_yytext; 
+   tokenlen = tsearch2_yyleng;
+   return SIGNEDINT; 
+}
+
+[0-9]+ { 
+   token = tsearch2_yytext; 
+   tokenlen = tsearch2_yyleng;
+   return UNSIGNEDINT; 
+}
+
+http"://"        { 
+   BEGIN URL; 
+   token = tsearch2_yytext;
+   tokenlen = tsearch2_yyleng;
+   return HTTP;
+}
+
+ftp"://"        { 
+   BEGIN URL; 
+   token = tsearch2_yytext;
+   tokenlen = tsearch2_yyleng;
+   return HTTP;
+}
+
+{HOSTNAME}[/:]{URI} { 
+   BEGIN SERVER;
+   if (s) { free(s); s=NULL; } 
+   s = strdup( tsearch2_yytext ); 
+   tokenlen = tsearch2_yyleng;
+   yyless( 0 ); 
+   token = s;
+   return FURL;
+}
+
+{HOSTNAME} {
+   token = tsearch2_yytext; 
+   tokenlen = tsearch2_yyleng;
+   return HOST;
+}
+
+[/:]{URI}  {
+   token = tsearch2_yytext;
+   tokenlen = tsearch2_yyleng;
+   return URI;
+}
+
+[[:alnum:]\./_-]+"/"[[:alnum:]\./_-]+ {
+   token = tsearch2_yytext;
+   tokenlen = tsearch2_yyleng;
+   return FILEPATH;
+}
+
+({CYRALPHA}+-)+{CYRALPHA}+ /* composite-word */    {
+   BEGIN DELIM;
+   if (s) { free(s); s=NULL; } 
+   s = strdup( tsearch2_yytext );
+   tokenlen = tsearch2_yyleng;
+   yyless( 0 );
+   token = s;
+   return CYRHYPHENWORD;
+}
+
+([[:alpha:]]+-)+[[:alpha:]]+ /* composite-word */  {
+    BEGIN DELIM;
+   if (s) { free(s); s=NULL; } 
+   s = strdup( tsearch2_yytext );
+   tokenlen = tsearch2_yyleng;
+   yyless( 0 );
+   token = s;
+   return LATHYPHENWORD;
+}
+
+({ALNUM}+-)+{ALNUM}+ /* composite-word */  {
+   BEGIN DELIM;
+   if (s) { free(s); s=NULL; } 
+   s = strdup( tsearch2_yytext );
+   tokenlen = tsearch2_yyleng;
+   yyless( 0 );
+   token = s;
+   return HYPHENWORD;
+}
+
+[0-9]+\.[0-9]+\.[0-9\.]*[0-9] {
+   token = tsearch2_yytext;
+   tokenlen = tsearch2_yyleng;
+   return VERSIONNUMBER;
+}
+
+\+?[0-9]+\.[0-9]+ {
+   token = tsearch2_yytext;
+   tokenlen = tsearch2_yyleng;
+   return DECIMAL;
+}
+
+{CYRALPHA}+  /* one word in composite-word */   { 
+   token = tsearch2_yytext; 
+   tokenlen = tsearch2_yyleng;
+   return CYRPARTHYPHENWORD; 
+}
+
+[[:alpha:]]+  /* one word in composite-word */  { 
+   token = tsearch2_yytext; 
+   tokenlen = tsearch2_yyleng;
+   return LATPARTHYPHENWORD; 
+}
+
+{ALNUM}+  /* one word in composite-word */  { 
+   token = tsearch2_yytext; 
+   tokenlen = tsearch2_yyleng;
+   return PARTHYPHENWORD; 
+}
+
+-  { 
+   token = tsearch2_yytext;
+   tokenlen = tsearch2_yyleng;
+   return SPACE;
+}
+
+.|\n /* return in basic state */ {
+   BEGIN INITIAL;
+   yyless( 0 );
+}
+
+{CYRALPHA}+ /* normal word */  { 
+   token = tsearch2_yytext; 
+   tokenlen = tsearch2_yyleng;
+   return CYRWORD; 
+}
+
+[[:alpha:]]+ /* normal word */ { 
+   token = tsearch2_yytext; 
+   tokenlen = tsearch2_yyleng;
+   return LATWORD; 
+}
+
+{ALNUM}+ /* normal word */ { 
+   token = tsearch2_yytext; 
+   tokenlen = tsearch2_yyleng;
+   return UWORD; 
+}
+
+[ \r\n\t]+ {
+   token = tsearch2_yytext;
+   tokenlen = tsearch2_yyleng;
+   return SPACE;
+}
+
+. {
+   token = tsearch2_yytext;
+   tokenlen = tsearch2_yyleng;
+   return SPACE;
+} 
+
+%%
+
+/* clearing after parsing from string */
+void end_parse() {
+   if (s) { free(s); s=NULL; } 
+   tsearch2_yy_delete_buffer( buf );
+   buf = NULL;
+} 
+
+/* start parse from string */
+void start_parse_str(char* str, int limit) {
+   if (buf) end_parse();
+   buf = tsearch2_yy_scan_bytes( str, limit );
+   tsearch2_yy_switch_to_buffer( buf );
+   BEGIN INITIAL;
+}
+
+/* start parse from filehandle */
+void start_parse_fh( FILE* fh, int limit ) {
+   if (buf) end_parse();
+   lrlimit = ( limit ) ? limit : -1;
+   buf = tsearch2_yy_create_buffer( fh, YY_BUF_SIZE );
+   tsearch2_yy_switch_to_buffer( buf );
+   BEGIN INITIAL;
+}
+
+


diff --git a/contrib/tsearch2/wparser.c b/contrib/tsearch2/wparser.c

new file mode 100644 (file)

index 0000000..deff94c


--- /dev/null
+++ b/contrib/tsearch2/wparser.c
@@ -0,0 +1,529 @@
+/* 
+ * interface functions to parser 
+ * Teodor Sigaev 
+ */
+#include 
+#include 
+#include 
+#include 
+
+#include "postgres.h"
+#include "fmgr.h"
+#include "utils/array.h"
+#include "catalog/pg_type.h"
+#include "executor/spi.h"
+#include "funcapi.h"
+
+#include "wparser.h"
+#include "ts_cfg.h"
+#include "snmap.h"
+#include "common.h"
+
+/*********top interface**********/
+
+static void *plan_getparser=NULL;
+static Oid current_parser_id=InvalidOid;
+
+void
+init_prs(Oid id, WParserInfo *prs) {
+   Oid arg[1]={ OIDOID };
+   bool isnull;
+   Datum pars[1]={ ObjectIdGetDatum(id) };
+   int stat;
+
+   memset(prs,0,sizeof(WParserInfo));
+   SPI_connect();
+   if ( !plan_getparser ) {
+       plan_getparser = SPI_saveplan( SPI_prepare( "select prs_start, prs_nexttoken, prs_end, prs_lextype, prs_headline from pg_ts_parser where oid = $1" , 1, arg ) );
+       if ( !plan_getparser ) 
+           ts_error(ERROR, "SPI_prepare() failed");
+   }
+
+   stat = SPI_execp(plan_getparser, pars, " ", 1);
+   if ( stat < 0 )
+       ts_error (ERROR, "SPI_execp return %d", stat);
+   if ( SPI_processed > 0 ) {
+       Oid oid=InvalidOid;
+       oid=DatumGetObjectId( SPI_getbinval(SPI_tuptable->vals[0], SPI_tuptable->tupdesc, 1, &isnull) );
+       fmgr_info_cxt(oid, &(prs->start_info), TopMemoryContext);
+       oid=DatumGetObjectId( SPI_getbinval(SPI_tuptable->vals[0], SPI_tuptable->tupdesc, 2, &isnull) );
+       fmgr_info_cxt(oid, &(prs->getlexeme_info), TopMemoryContext);
+       oid=DatumGetObjectId( SPI_getbinval(SPI_tuptable->vals[0], SPI_tuptable->tupdesc, 3, &isnull) );
+       fmgr_info_cxt(oid, &(prs->end_info), TopMemoryContext);
+       prs->lextype=DatumGetObjectId( SPI_getbinval(SPI_tuptable->vals[0], SPI_tuptable->tupdesc, 4, &isnull) );
+       oid=DatumGetObjectId( SPI_getbinval(SPI_tuptable->vals[0], SPI_tuptable->tupdesc, 5, &isnull) );
+       fmgr_info_cxt(oid, &(prs->headline_info), TopMemoryContext);
+       prs->prs_id=id;
+   } else 
+       ts_error(ERROR, "No parser with id %d", id);
+   SPI_finish();
+}
+
+typedef struct {
+   WParserInfo *last_prs;
+   int     len;
+   int     reallen;
+   WParserInfo *list;
+   SNMap       name2id_map;
+} PrsList;
+
+static PrsList PList = {NULL,0,0,NULL,{0,0,NULL}};
+
+void    
+reset_prs(void) {
+   freeSNMap( &(PList.name2id_map) );
+   if ( PList.list )
+       free(PList.list);
+   memset(&PList,0,sizeof(PrsList));
+}
+
+static int
+compareprs(const void *a, const void *b) {
+   return ((WParserInfo*)a)->prs_id - ((WParserInfo*)b)->prs_id;
+}
+
+WParserInfo *
+findprs(Oid id) {
+   /* last used prs */
+   if ( PList.last_prs && PList.last_prs->prs_id==id )
+       return PList.last_prs;
+
+   /* already used prs */
+   if ( PList.len != 0 ) {
+       WParserInfo key;
+       key.prs_id=id;
+       PList.last_prs = bsearch(&key, PList.list, PList.len, sizeof(WParserInfo), compareprs);
+       if ( PList.last_prs != NULL )
+           return PList.last_prs;
+   }
+
+   /* last chance */
+   if ( PList.len==PList.reallen ) {
+       WParserInfo *tmp;
+       int reallen = ( PList.reallen ) ? 2*PList.reallen : 16;
+       tmp=(WParserInfo*)realloc(PList.list,sizeof(WParserInfo)*reallen);
+       if ( !tmp ) 
+           ts_error(ERROR,"No memory");
+       PList.reallen=reallen;
+       PList.list=tmp;
+   }
+   PList.last_prs=&(PList.list[PList.len]);
+   init_prs(id, PList.last_prs);
+   PList.len++;
+   qsort(PList.list, PList.len, sizeof(WParserInfo), compareprs);
+   return findprs(id); /* qsort changed order!! */;
+}
+
+static void *plan_name2id=NULL;
+
+Oid
+name2id_prs(text *name) {
+   Oid arg[1]={ TEXTOID };
+   bool isnull;
+   Datum pars[1]={ PointerGetDatum(name) };
+   int stat;
+   Oid id=findSNMap_t( &(PList.name2id_map), name );
+   
+   if ( id ) 
+       return id;
+   
+
+   SPI_connect();
+   if ( !plan_name2id ) {
+       plan_name2id = SPI_saveplan( SPI_prepare( "select oid from pg_ts_parser where prs_name = $1" , 1, arg ) );
+       if ( !plan_name2id ) 
+           ts_error(ERROR, "SPI_prepare() failed");
+   }
+
+   stat = SPI_execp(plan_name2id, pars, " ", 1);
+   if ( stat < 0 )
+       ts_error (ERROR, "SPI_execp return %d", stat);
+   if ( SPI_processed > 0 )
+       id=DatumGetObjectId( SPI_getbinval(SPI_tuptable->vals[0], SPI_tuptable->tupdesc, 1, &isnull) );
+   else 
+       ts_error(ERROR, "No parser '%s'", text2char(name));
+   SPI_finish();
+   addSNMap_t( &(PList.name2id_map), name, id );
+   return id;
+}
+
+
+/******sql-level interface******/
+typedef struct {
+   int     cur;
+   LexDescr    *list;
+} TypeStorage;
+
+static void
+setup_firstcall(FuncCallContext  *funcctx, Oid prsid) {
+   TupleDesc            tupdesc;
+   MemoryContext     oldcontext;
+   TypeStorage     *st;
+   WParserInfo *prs = findprs(prsid); 
+
+   oldcontext = MemoryContextSwitchTo(funcctx->multi_call_memory_ctx);
+
+   st=(TypeStorage*)palloc( sizeof(TypeStorage) );
+   st->cur=0;
+   st->list = (LexDescr*)DatumGetPointer(
+       OidFunctionCall1( prs->lextype, PointerGetDatum(prs->prs) )
+   );
+   funcctx->user_fctx = (void*)st;
+   tupdesc = RelationNameGetTupleDesc("tokentype");
+   funcctx->slot = TupleDescGetSlot(tupdesc);
+   funcctx->attinmeta = TupleDescGetAttInMetadata(tupdesc);
+   MemoryContextSwitchTo(oldcontext);
+}
+
+static Datum
+process_call(FuncCallContext  *funcctx) {
+   TypeStorage     *st;
+
+   st=(TypeStorage*)funcctx->user_fctx;
+   if (  st->list && st->list[st->cur].lexid ) {
+       Datum result;
+       char* values[3];
+       char    txtid[16];
+       HeapTuple    tuple;
+
+       values[0]=txtid;
+       sprintf(txtid,"%d",st->list[st->cur].lexid);
+       values[1]=st->list[st->cur].alias;
+       values[2]=st->list[st->cur].descr;
+
+       tuple = BuildTupleFromCStrings(funcctx->attinmeta, values);
+       result = TupleGetDatum(funcctx->slot, tuple);
+
+       pfree(values[1]);
+       pfree(values[2]);
+       st->cur++;
+       return result;
+   } else {
+       if ( st->list ) pfree(st->list);
+       pfree(st);
+   }
+   return (Datum)0;
+}
+
+PG_FUNCTION_INFO_V1(token_type);
+Datum token_type(PG_FUNCTION_ARGS);
+
+Datum
+token_type(PG_FUNCTION_ARGS) {
+   FuncCallContext  *funcctx;
+   Datum result;
+
+   if (SRF_IS_FIRSTCALL()) { 
+       funcctx = SRF_FIRSTCALL_INIT();
+       setup_firstcall(funcctx, PG_GETARG_OID(0) );
+   }
+
+   funcctx = SRF_PERCALL_SETUP();
+
+   if (  (result=process_call(funcctx)) != (Datum)0 )
+       SRF_RETURN_NEXT(funcctx, result);
+   SRF_RETURN_DONE(funcctx);
+}
+
+PG_FUNCTION_INFO_V1(token_type_byname);
+Datum token_type_byname(PG_FUNCTION_ARGS);
+Datum
+token_type_byname(PG_FUNCTION_ARGS) {
+   FuncCallContext  *funcctx;
+   Datum result;
+
+   if (SRF_IS_FIRSTCALL()) {
+       text *name = PG_GETARG_TEXT_P(0); 
+       funcctx = SRF_FIRSTCALL_INIT();
+       setup_firstcall(funcctx, name2id_prs( name ) );
+       PG_FREE_IF_COPY(name,0);
+   }
+
+   funcctx = SRF_PERCALL_SETUP();
+
+   if (  (result=process_call(funcctx)) != (Datum)0 )
+       SRF_RETURN_NEXT(funcctx, result);
+   SRF_RETURN_DONE(funcctx);
+}
+
+PG_FUNCTION_INFO_V1(token_type_current);
+Datum token_type_current(PG_FUNCTION_ARGS);
+Datum
+token_type_current(PG_FUNCTION_ARGS) {
+   FuncCallContext  *funcctx;
+   Datum result;
+
+   if (SRF_IS_FIRSTCALL()) {
+       funcctx = SRF_FIRSTCALL_INIT();
+       if ( current_parser_id==InvalidOid ) 
+           current_parser_id = name2id_prs( char2text("default") );
+       setup_firstcall(funcctx, current_parser_id );
+   }
+
+   funcctx = SRF_PERCALL_SETUP();
+
+   if (  (result=process_call(funcctx)) != (Datum)0 )
+       SRF_RETURN_NEXT(funcctx, result);
+   SRF_RETURN_DONE(funcctx);
+}
+
+
+PG_FUNCTION_INFO_V1(set_curprs);
+Datum set_curprs(PG_FUNCTION_ARGS);
+Datum
+set_curprs(PG_FUNCTION_ARGS) {
+        findprs(PG_GETARG_OID(0));
+        current_parser_id=PG_GETARG_OID(0);
+        PG_RETURN_VOID();
+}
+
+PG_FUNCTION_INFO_V1(set_curprs_byname);
+Datum set_curprs_byname(PG_FUNCTION_ARGS);
+Datum
+set_curprs_byname(PG_FUNCTION_ARGS) {
+        text *name=PG_GETARG_TEXT_P(0);
+    
+        DirectFunctionCall1(
+                set_curprs,
+                ObjectIdGetDatum( name2id_prs(name) )
+        );
+        PG_FREE_IF_COPY(name, 0);
+        PG_RETURN_VOID();
+}
+
+typedef struct {
+   int type;
+   char    *lexem;
+} LexemEntry;
+
+typedef struct {
+   int cur;
+   int len;
+   LexemEntry  *list;
+} PrsStorage;
+   
+
+static void
+prs_setup_firstcall(FuncCallContext  *funcctx, int prsid, text *txt) {
+   TupleDesc            tupdesc;
+   MemoryContext     oldcontext;
+   PrsStorage  *st;
+   WParserInfo *prs = findprs(prsid); 
+   char    *lex=NULL;
+   int     llen=0, type=0; 
+
+   oldcontext = MemoryContextSwitchTo(funcctx->multi_call_memory_ctx);
+
+   st=(PrsStorage*)palloc( sizeof(PrsStorage) );
+   st->cur=0;
+   st->len=16;
+   st->list=(LexemEntry*)palloc( sizeof(LexemEntry)*st->len );
+
+   prs->prs = (void*)DatumGetPointer(
+       FunctionCall2(
+           &(prs->start_info),
+           PointerGetDatum(VARDATA(txt)),
+           Int32GetDatum(VARSIZE(txt)-VARHDRSZ)
+       )
+   );
+
+   while( ( type=DatumGetInt32(FunctionCall3(
+           &(prs->getlexeme_info),
+           PointerGetDatum(prs->prs),
+           PointerGetDatum(&lex),
+           PointerGetDatum(&llen))) ) != 0 ) {
+
+       if ( st->cur>=st->len ) {
+           st->len=2*st->len;
+           st->list=(LexemEntry*)repalloc(st->list, sizeof(LexemEntry)*st->len);
+       }
+       st->list[st->cur].lexem = palloc(llen+1);
+       memcpy( st->list[st->cur].lexem, lex, llen);
+       st->list[st->cur].lexem[llen]='\0';
+       st->list[st->cur].type=type;
+       st->cur++;
+   }
+       
+   FunctionCall1(
+       &(prs->end_info),
+       PointerGetDatum(prs->prs)
+   );
+
+   st->len=st->cur;
+   st->cur=0;
+   
+   funcctx->user_fctx = (void*)st;
+   tupdesc = RelationNameGetTupleDesc("tokenout");
+   funcctx->slot = TupleDescGetSlot(tupdesc);
+   funcctx->attinmeta = TupleDescGetAttInMetadata(tupdesc);
+   MemoryContextSwitchTo(oldcontext);
+}
+
+static Datum
+prs_process_call(FuncCallContext  *funcctx) {
+   PrsStorage  *st;
+
+   st=(PrsStorage*)funcctx->user_fctx;
+   if (  st->cur < st->len ) {
+       Datum result;
+       char* values[2];
+       char    tid[16];
+       HeapTuple    tuple;
+
+       values[0]=tid;
+       sprintf(tid,"%d",st->list[st->cur].type);
+       values[1]=st->list[st->cur].lexem;
+       tuple = BuildTupleFromCStrings(funcctx->attinmeta, values);
+       result = TupleGetDatum(funcctx->slot, tuple);
+
+       pfree(values[1]);
+       st->cur++;
+       return result;
+   } else {
+       if ( st->list ) pfree(st->list);
+       pfree(st);
+   }
+   return (Datum)0;
+}
+
+           
+
+PG_FUNCTION_INFO_V1(parse);
+Datum parse(PG_FUNCTION_ARGS);
+Datum
+parse(PG_FUNCTION_ARGS) {
+   FuncCallContext  *funcctx;
+   Datum result;
+
+   if (SRF_IS_FIRSTCALL()) {
+       text *txt = PG_GETARG_TEXT_P(1); 
+       funcctx = SRF_FIRSTCALL_INIT();
+       prs_setup_firstcall(funcctx, PG_GETARG_OID(0),txt );
+       PG_FREE_IF_COPY(txt,1);
+   }
+
+   funcctx = SRF_PERCALL_SETUP();
+
+   if (  (result=prs_process_call(funcctx)) != (Datum)0 )
+       SRF_RETURN_NEXT(funcctx, result);
+   SRF_RETURN_DONE(funcctx);
+}
+
+PG_FUNCTION_INFO_V1(parse_byname);
+Datum parse_byname(PG_FUNCTION_ARGS);
+Datum
+parse_byname(PG_FUNCTION_ARGS) {
+   FuncCallContext  *funcctx;
+   Datum result;
+
+   if (SRF_IS_FIRSTCALL()) {
+       text *name = PG_GETARG_TEXT_P(0); 
+       text *txt = PG_GETARG_TEXT_P(1); 
+       funcctx = SRF_FIRSTCALL_INIT();
+       prs_setup_firstcall(funcctx, name2id_prs( name ),txt );
+       PG_FREE_IF_COPY(name,0);
+       PG_FREE_IF_COPY(txt,1);
+   }
+
+   funcctx = SRF_PERCALL_SETUP();
+
+   if (  (result=prs_process_call(funcctx)) != (Datum)0 )
+       SRF_RETURN_NEXT(funcctx, result);
+   SRF_RETURN_DONE(funcctx);
+}
+
+
+PG_FUNCTION_INFO_V1(parse_current);
+Datum parse_current(PG_FUNCTION_ARGS);
+Datum
+parse_current(PG_FUNCTION_ARGS) {
+   FuncCallContext  *funcctx;
+   Datum result;
+
+   if (SRF_IS_FIRSTCALL()) {
+       text *txt = PG_GETARG_TEXT_P(0); 
+       funcctx = SRF_FIRSTCALL_INIT();
+       if ( current_parser_id==InvalidOid ) 
+           current_parser_id = name2id_prs( char2text("default") );
+       prs_setup_firstcall(funcctx, current_parser_id,txt );
+       PG_FREE_IF_COPY(txt,0);
+   }
+
+   funcctx = SRF_PERCALL_SETUP();
+
+   if (  (result=prs_process_call(funcctx)) != (Datum)0 )
+       SRF_RETURN_NEXT(funcctx, result);
+   SRF_RETURN_DONE(funcctx);
+}
+
+PG_FUNCTION_INFO_V1(headline);
+Datum headline(PG_FUNCTION_ARGS);
+Datum
+headline(PG_FUNCTION_ARGS) {
+   TSCfgInfo *cfg=findcfg(PG_GETARG_OID(0));
+   text       *in = PG_GETARG_TEXT_P(1);
+   QUERYTYPE  *query = (QUERYTYPE *) DatumGetPointer(PG_DETOAST_DATUM(PG_GETARG_DATUM(2)));
+   text       *opt=( PG_NARGS()>3 && PG_GETARG_POINTER(3) ) ? PG_GETARG_TEXT_P(3) : NULL;
+   HLPRSTEXT   prs;
+   text *out;
+   WParserInfo *prsobj = findprs(cfg->prs_id);
+
+   memset(&prs,0,sizeof(HLPRSTEXT));
+   prs.lenwords = 32;
+   prs.words = (HLWORD *) palloc(sizeof(HLWORD) * prs.lenwords);
+   hlparsetext(cfg, &prs, query, VARDATA(in), VARSIZE(in) - VARHDRSZ);
+
+
+   FunctionCall3(
+       &(prsobj->headline_info),
+       PointerGetDatum(&prs),
+       PointerGetDatum(opt),
+       PointerGetDatum(query)
+   );
+
+   out = genhl(&prs);
+
+   PG_FREE_IF_COPY(in,1);
+   PG_FREE_IF_COPY(query,2);
+   if ( opt ) PG_FREE_IF_COPY(opt,3);
+   pfree(prs.words);
+   pfree(prs.startsel);
+   pfree(prs.stopsel);
+
+   PG_RETURN_POINTER(out);
+}
+
+
+PG_FUNCTION_INFO_V1(headline_byname);
+Datum headline_byname(PG_FUNCTION_ARGS);
+Datum
+headline_byname(PG_FUNCTION_ARGS) {
+   text *cfg=PG_GETARG_TEXT_P(0);
+
+   Datum out=DirectFunctionCall4(
+       headline,
+       ObjectIdGetDatum(name2id_cfg( cfg ) ),
+       PG_GETARG_DATUM(1),
+       PG_GETARG_DATUM(2),
+       ( PG_NARGS()>3 ) ? PG_GETARG_DATUM(3) : PointerGetDatum(NULL)
+   );
+
+   PG_FREE_IF_COPY(cfg,0);
+   PG_RETURN_DATUM(out);   
+}
+
+PG_FUNCTION_INFO_V1(headline_current);
+Datum headline_current(PG_FUNCTION_ARGS);
+Datum
+headline_current(PG_FUNCTION_ARGS) {
+   PG_RETURN_DATUM(DirectFunctionCall4(
+       headline,
+       ObjectIdGetDatum(get_currcfg()),
+       PG_GETARG_DATUM(0),
+       PG_GETARG_DATUM(1),
+       ( PG_NARGS()>2 ) ? PG_GETARG_DATUM(2) : PointerGetDatum(NULL)
+   ));
+}
+
+
+


diff --git a/contrib/tsearch2/wparser.h b/contrib/tsearch2/wparser.h

new file mode 100644 (file)

index 0000000..a8afc56


--- /dev/null
+++ b/contrib/tsearch2/wparser.h
@@ -0,0 +1,28 @@
+#ifndef __WPARSER_H__
+#define __WPARSER_H__
+#include "postgres.h"
+#include "fmgr.h"
+
+typedef struct {
+   Oid prs_id;
+   FmgrInfo start_info;
+   FmgrInfo getlexeme_info;
+   FmgrInfo end_info;
+   FmgrInfo headline_info;
+   Oid lextype;
+   void *prs;
+} WParserInfo;
+
+void init_prs(Oid id, WParserInfo *prs);
+WParserInfo* findprs(Oid id);
+Oid name2id_prs(text *name);
+void   reset_prs(void);
+
+
+typedef struct {
+   int lexid;
+   char    *alias;
+   char    *descr;
+} LexDescr;
+
+#endif


diff --git a/contrib/tsearch2/wparser_def.c b/contrib/tsearch2/wparser_def.c

new file mode 100644 (file)

index 0000000..eec8b03


--- /dev/null
+++ b/contrib/tsearch2/wparser_def.c
@@ -0,0 +1,291 @@
+/* 
+ * default word parser 
+ * Teodor Sigaev 
+ */
+#include 
+#include 
+#include 
+
+#include "postgres.h"
+#include "utils/builtins.h"
+
+#include "dict.h"
+#include "wparser.h"
+#include "common.h"
+#include "ts_cfg.h"
+#include "wordparser/parser.h"
+#include "wordparser/deflex.h"
+
+PG_FUNCTION_INFO_V1(prsd_lextype);
+Datum prsd_lextype(PG_FUNCTION_ARGS);
+
+Datum 
+prsd_lextype(PG_FUNCTION_ARGS) {
+   LexDescr *descr=(LexDescr*)palloc(sizeof(LexDescr)*(LASTNUM+1));
+   int i;
+
+   for(i=1;i<=LASTNUM;i++) {
+       descr[i-1].lexid = i;
+       descr[i-1].alias = pstrdup(tok_alias[i]);
+       descr[i-1].descr = pstrdup(lex_descr[i]);
+   }
+   
+   descr[LASTNUM].lexid=0;
+       
+   PG_RETURN_POINTER(descr);
+}
+
+PG_FUNCTION_INFO_V1(prsd_start);
+Datum prsd_start(PG_FUNCTION_ARGS);
+Datum 
+prsd_start(PG_FUNCTION_ARGS) {
+   start_parse_str( (char*)PG_GETARG_POINTER(0), PG_GETARG_INT32(1) );
+   PG_RETURN_POINTER(NULL);
+}
+
+PG_FUNCTION_INFO_V1(prsd_getlexeme);
+Datum prsd_getlexeme(PG_FUNCTION_ARGS);
+Datum 
+prsd_getlexeme(PG_FUNCTION_ARGS) {
+   /* ParserState *p=(ParserState*)PG_GETARG_POINTER(0); */
+   char **t=(char**)PG_GETARG_POINTER(1); 
+   int *tlen=(int*)PG_GETARG_POINTER(2);
+   int  type=tsearch2_yylex();
+
+   *t = token;
+   *tlen = tokenlen;
+   PG_RETURN_INT32(type);
+}
+
+PG_FUNCTION_INFO_V1(prsd_end);
+Datum prsd_end(PG_FUNCTION_ARGS);
+Datum 
+prsd_end(PG_FUNCTION_ARGS) {
+   /* ParserState *p=(ParserState*)PG_GETARG_POINTER(0); */
+   end_parse();
+   PG_RETURN_VOID();
+}
+
+#define LEAVETOKEN(x)  ( (x)==12 )
+#define COMPLEXTOKEN(x)    ( (x)==5 || (x)==15 || (x)==16 || (x)==17 )
+#define ENDPUNCTOKEN(x)    ( (x)==12 )
+
+
+#define IDIGNORE(x) ( (x)==13 || (x)==14 || (x)==12 || (x)==23 )
+#define HLIDIGNORE(x) ( (x)==5 || (x)==13 || (x)==15 || (x)==16 || (x)==17 )
+#define NONWORDTOKEN(x)    ( (x)==12 || HLIDIGNORE(x) )
+#define NOENDTOKEN(x)  ( NONWORDTOKEN(x) || (x)==7 || (x)==8 || (x)==20 || (x)==21 || (x)==22 || IDIGNORE(x) )
+
+typedef struct {
+   HLWORD  *words;
+   int len;
+} hlCheck;
+
+static bool
+checkcondition_HL(void *checkval, ITEM *val) {
+   int i;
+   for(i=0;i<((hlCheck*)checkval)->len;i++) {
+       if ( ((hlCheck*)checkval)->words[i].item==val )
+           return true;
+   }
+   return false;
+}
+
+
+static bool
+hlCover(HLPRSTEXT *prs, QUERYTYPE *query, int *p, int *q) {
+   int i,j;
+   ITEM    *item=GETQUERY(query);
+   int pos=*p;
+   *q=0;
+   *p=0x7fffffff;
+
+   for(j=0;jsize;j++) {
+       if ( item->type != VAL ) {
+           item++;
+           continue;
+       }
+       for(i=pos;icurwords;i++) {
+           if ( prs->words[i].item == item ) {
+               if ( i>*q) 
+                   *q = i;
+               break;
+           }
+       }
+       item++;
+   }
+
+   if ( *q==0 )
+       return false;
+
+   item=GETQUERY(query);
+   for(j=0;jsize;j++) {
+       if ( item->type != VAL ) {
+           item++;
+           continue;
+       }
+       for(i=*q;i>=pos;i--) {
+           if ( prs->words[i].item == item ) {
+               if ( i<*p )
+                   *p=i;
+               break;
+           }
+       }
+       item++;
+   }   
+
+   if ( *p<=*q ) {
+       hlCheck ch={ &(prs->words[*p]), *q-*p+1 };
+       if ( TS_execute(GETQUERY(query), &ch, false, checkcondition_HL) ) { 
+           return true;
+       } else {
+           (*p)++;
+           return hlCover(prs,query,p,q);
+       }
+   }
+
+   return false;
+}
+
+PG_FUNCTION_INFO_V1(prsd_headline);
+Datum prsd_headline(PG_FUNCTION_ARGS);
+Datum 
+prsd_headline(PG_FUNCTION_ARGS) {
+   HLPRSTEXT   *prs=(HLPRSTEXT*)PG_GETARG_POINTER(0);
+   text    *opt=(text*)PG_GETARG_POINTER(1); /* can't be toasted */
+   QUERYTYPE   *query=(QUERYTYPE*)PG_GETARG_POINTER(2); /* can't be toasted */
+   /* from opt + start and and tag */
+   int min_words=15;   
+   int max_words=35;   
+   int shortword=3;    
+
+   int p=0,q=0;
+   int bestb=-1,beste=-1;
+   int bestlen=-1;
+   int pose=0, poslen, curlen;
+
+   int i;
+
+   /*config*/
+   prs->startsel=NULL;
+   prs->stopsel=NULL;
+   if ( opt ) {
+       Map *map,*mptr;
+       
+       parse_cfgdict(opt,&map);
+       mptr=map;
+
+       while(mptr && mptr->key) {
+           if ( strcasecmp(mptr->key,"MaxWords")==0 )
+               max_words=pg_atoi(mptr->value,4,1);
+           else if ( strcasecmp(mptr->key,"MinWords")==0 )
+               min_words=pg_atoi(mptr->value,4,1);
+           else if ( strcasecmp(mptr->key,"ShortWord")==0 )
+               shortword=pg_atoi(mptr->value,4,1);
+           else if ( strcasecmp(mptr->key,"StartSel")==0 )
+               prs->startsel=pstrdup(mptr->value);
+           else if ( strcasecmp(mptr->key,"StopSel")==0 )
+               prs->stopsel=pstrdup(mptr->value);
+               
+           pfree(mptr->key);
+           pfree(mptr->value);
+
+           mptr++;
+       }
+       pfree(map);
+
+       if ( min_words >= max_words )
+           elog(ERROR,"Must be MinWords < MaxWords");
+       if ( min_words<=0 )
+           elog(ERROR,"Must be MinWords > 0");
+       if ( shortword<0 )
+           elog(ERROR,"Must be ShortWord >= 0");
+   }
+
+   while( hlCover(prs,query,&p,&q) ) {
+       /* find cover len in words */
+       curlen=0;
+       poslen=0;
+       for(i=p;i<=q && curlen < max_words ; i++) {
+           if ( !NONWORDTOKEN(prs->words[i].type) ) 
+               curlen++;
+           if ( prs->words[i].item && !prs->words[i].repeated )
+               poslen++; 
+           pose=i;
+       }
+
+       if ( poslenwords[beste].type) || prs->words[beste].len <= shortword) ) { 
+           /* best already finded, so try one more cover */
+           p++;
+           continue;
+       }
+
+       if ( curlen < max_words ) { /* find good end */
+           for(i=i-1 ;icurwords && curlen
+               if ( i!=q ) {
+                   if ( !NONWORDTOKEN(prs->words[i].type) ) 
+                       curlen++;
+                   if ( prs->words[i].item && !prs->words[i].repeated )
+                       poslen++;
+               }
+               pose=i;
+               if ( NOENDTOKEN(prs->words[i].type) || prs->words[i].len <= shortword ) 
+                   continue;
+               if ( curlen>=min_words )    
+                   break;
+           }
+       } else { /* shorter cover :((( */
+           for(;curlen>min_words;i--) {
+               if ( !NONWORDTOKEN(prs->words[i].type) ) 
+                   curlen--;
+               if ( prs->words[i].item && !prs->words[i].repeated )
+                   poslen--;
+               pose=i;
+               if ( NOENDTOKEN(prs->words[i].type) || prs->words[i].len <= shortword ) 
+                   continue;
+               break;
+           }
+       }
+
+       if ( bestlen <0 || (poslen>bestlen && !(NOENDTOKEN(prs->words[pose].type) || prs->words[pose].len <= shortword)) || 
+               ( bestlen>=0 && !(NOENDTOKEN(prs->words[pose].type)  || prs->words[pose].len <= shortword) && 
+                   (NOENDTOKEN(prs->words[beste].type) || prs->words[beste].len <= shortword) ) ) {
+           bestb=p; beste=pose;
+           bestlen=poslen;
+       } 
+
+       p++;
+   }
+
+   if ( bestlen<0 ) {
+       curlen=0;
+       poslen=0;
+       for(i=0;icurwords && curlen
+           if ( !NONWORDTOKEN(prs->words[i].type) ) 
+               curlen++;
+           pose=i;
+       }
+       bestb=0; beste=pose;
+   }
+
+   for(i=bestb;i<=beste;i++) {
+       if ( prs->words[i].item )
+           prs->words[i].selected=1;
+       if ( prs->words[i].repeated )
+           prs->words[i].skip=1;
+       if ( HLIDIGNORE(prs->words[i].type) )
+           prs->words[i].replace=1;
+
+       prs->words[i].in=1;
+   }
+
+   if (!prs->startsel)
+       prs->startsel=pstrdup("");

+   if (!prs->stopsel)
+       prs->stopsel=pstrdup("");
+        prs->startsellen=strlen(prs->startsel);
+   prs->stopsellen=strlen(prs->stopsel);
+
+   PG_RETURN_POINTER(prs);
+}
+




This is the main PostgreSQL git repository.
RSS
Atom}}}}}}}
+>walls₁♦_{12
+>extend₁♦_{12
+>upward₁♦_{12
+>for₁♦_{12
+>well₁♦_{12
+>over₁♦_{12
+>100₂₂♦_{12
+>feet₁.₁₂
+
+Each word has been assigned type 1;
+each space (represented here by a diamond) and the period, type 12;
+and the number one hundred, type 22.
+We can retrieve the alias for each type
+through the token_type function:
+
+
+=# select * from token_type('default')

+     where tokid = 1 or tokid = 12 or tokid = 22
+ tokid | alias |      descr       
+-------+-------+------------------
+     1 | lword | Latin word
+    12 | blank | Space symbols
+    22 | uint  | Unsigned integer
+(3 rows)
+
+
+
+
+Next, the tokens are assigned to dictionaries
+by looking up their type aliases in pg_ts_cfgmap
+to determine which dictionary should process each token.
+Since we are using the 'default' configuration:
+
+
+=# select * from pg_ts_cfgmap where ts_name = 'default' and

+      (tok_alias = 'lword' or tok_alias = 'blank' or tok_alias = 'uint')
+ ts_name | tok_alias | dict_name 
+---------+-----------+-----------
+ default | lword     | {en_stem}
+ default | uint      | {simple}
+(2 rows)
+
+
+Since this map provides no dictionary for blank tokens,
+the spaces and period are simply discarded,
+leaving nine tokens,
+which are then numbered by their position:
+
+The¹
+walls²
+extend³
+upward⁴
+for⁵
+well⁶
+over⁷
+100⁸
+feet⁹
+
+
+Finally, the words are reduced to lexemes by their respective dictionaries.
+The 100 is submitted to the simple dictionary,
+which returns tokens unaltered except for making them lowercase:
+
+
+=# select lexize('simple', '100')
+ lexize 
+--------
+ {100}
+(1 row)
+
+
+The other words are submitted to en_stem
+which reduces each English word to a linguistic stem,
+and then discards stems which belong to its list of stop words;
+you can see the list of stop words
+in the file whose path is in the dict_initoption field
+of the pg_ts_dict table entry for en_stem.
+The first three words of our text illustrate respectively
+an en_stem stop word,
+a word which en_stem alters by stemming,
+and a word which en_stem leaves alone:
+
+
+=# select lexize('en_stem', 'The')
+ lexize 
+--------
+ {}
+(1 row)
+=# select lexize('en_stem', 'walls')
+ lexize 
+--------
+ {wall}
+(1 row)
+=# select lexize('en_stem', 'extend')
+  lexize  
+----------
+ {extend}
+(1 row)
+
+
+Once en_stem is done discarding stop words and stemming the rest,
+we are left with:
+
+wall²
+extend³
+upward⁴
+well⁶
+100⁸
+feet⁹
+
+Which is precisely the result of the example that began this section.
+
+Query words are stemmed by the to_tsquery() function
+using the same scheme to determine the dictionary for each token,
+with the difference that the query parser recognizes as special
+the boolean operators that separate query words.
+
+
+
+
+}

diff --git a/contrib/tsearch2/docs/tsearch2-ref.html b/contrib/tsearch2/docs/tsearch2-ref.html

new file mode 100644 (file)

index 0000000..df0faa4


--- /dev/null
+++ b/contrib/tsearch2/docs/tsearch2-ref.html
@@ -0,0 +1,448 @@
+
+
+
+
+tsearch2 reference
+
+
+The tsearch2 Reference
+
+
+Brandon Craig Rhodes
30 June 2003
+
+This Reference documents the user types and functions
+of the tsearch2 module for PostgreSQL.
+An introduction to the module is provided
+by the tsearch2 Guide,
+a companion document to this one.
+You can retrieve a beta copy of the tsearch2 module from the
+GiST for PostgreSQL
+page — look under the section entitled Development History
+for the current version.
+
+Vectors and Queries
+
+Vectors and queries both store lexemes,
+but for different purposes.
+A tsvector stores the lexemes
+of the words that are parsed out of a document,
+and can also remember the position of each word.
+A tsquery specifies a boolean condition among lexemes.
+
+Any of the following functions with a configuration argument
+can use either an integer id or textual ts_name
+to select a configuration;
+if the option is omitted, then the current configuration is used.
+For more information on the current configuration,
+read the next section on Configurations.
+
+Vector Operations
+
+
+
+ to_tsvector( [configuration,]

+ document TEXT) RETURNS tsvector
+
+ Parses a document into tokens,
+ reduces the tokens to lexemes,
+ and returns a tsvector which lists the lexemes
+ together with their positions in the document.
+ For the best description of this process,
+ see the section on Parsing and Stemming
+ in the accompanying tsearch2 Guide.
+
+ strip(vector tsvector) RETURNS tsvector
+
+ Return a vector which lists the same lexemes
+ as the given vector,
+ but which lacks any information
+ about where in the document each lexeme appeared.
+ While the returned vector is thus useless for relevance ranking,
+ it will usually be much smaller.
+
+ setweight(vector tsvector, letter) RETURNS tsvector
+
+ This function returns a copy of the input vector
+ in which every location has been labelled
+ with either the letter
+ 'A', 'B', or 'C',
+ or the default label 'D'
+ (which is the default with which new vectors are created,
+ and as such is usually not displayed).
+ These labels are retained when vectors are concatenated,
+ allowing words from different parts of a document
+ to be weighted differently by ranking functions.
+
+ vector1 || vector2
+
+ concat(vector1 tsvector, vector2 tsvector)

+ RETURNS tsvector
+
+ Returns a vector which combines the lexemes and position information
+ in the two vectors given as arguments.
+ Position weight labels (described in the previous paragraph)
+ are retained intact during the concatenation.
+ This has at least two uses.
+ First,
+ if some sections of your document
+ need be parsed with different configurations than others,
+ you can parse them separately
+ and concatenate the resulting vectors into one.
+ Second,
+ you can weight words from some sections of you document
+ more heavily than those from others by:
+ parsing the sections into separate vectors;
+ assigning the vectors different position labels
+ with the setweight() function;
+ concatenating them into a single vector;
+ and then providing a weights argument
+ to the rank() function
+ that assigns different weights to positions with different labels.
+
+ tsvector_size(vector tsvector) RETURNS INT4
+
+ Returns the number of lexemes stored in the vector.
+
+ text::tsvector RETURNS tsvector
+
+ Directly casting text to a tsvector
+ allows you to directly inject lexemes into a vector,
+ with whatever positions and position weights you choose to specify.
+ The text should be formatted
+ like the vector would be printed by the output of a SELECT.
+ See the Casting
+ section in the Guide for details.
+
+
+Query Operations
+
+
+
+ to_tsquery( [configuration,]

+ querytext text) RETURNS tsvector
+
+ Parses a query,
+ which should be single words separated by the boolean operators
+ “&” and,
+ “|” or,
+ and “!” not,
+ which can be grouped using parenthesis.
+ Each word is reduced to a lexeme using the current
+ or specified configuration.
+
+
+ querytree(query tsquery) RETURNS text
+
+ This might return a textual representation of the given query.
+
+ text::tsquery RETURNS tsquery
+
+ Directly casting text to a tsquery
+ allows you to directly inject lexemes into a query,
+ with whatever positions and position weight flags you choose to specify.
+ The text should be formatted
+ like the query would be printed by the output of a SELECT.
+ See the Casting
+ section in the Guide for details.
+
+
+Configurations
+
+A configuration specifies all of the equipment necessary
+to transform a document into a tsvector:
+the parser that breaks its text into tokens,
+and the dictionaries which then transform each token into a lexeme.
+Every call to to_tsvector() (described above)
+uses a configuration to perform its processing.
+Three configurations come with tsearch2:
+
+
+default — Indexes words and numbers,
+ using the en_stem English Snowball stemmer for Latin-alphabet words
+ and the simple dictionary for all others.
+default_russian — Indexes words and numbers,
+ using the en_stem English Snowball stemmer for Latin-alphabet words
+ and the ru_stem Russian Snowball dictionary for all others.
+simple — Processes both words and numbers
+ with the simple dictionary,
+ which neither discards any stop words nor alters them.
+
+
+The tsearch2 modules initially chooses your current configuration
+by looking for your current locale in the locale field
+of the pg_ts_cfg table described below.
+You can manipulate the current configuration yourself with these functions:
+
+
+
+ set_curcfg( id INT | ts_name TEXT

+  ) RETURNS VOID
+
+ Set the current configuration used by to_tsvector
+ and to_tsquery.
+
+ show_curcfg() RETURNS INT4
+
+ Returns the integer id of the current configuration.
+
+
+
+Each configuration is defined by a record in the pg_ts_cfg table:
+
+create table pg_ts_cfg (
+   id      int not  null primary key,
+   ts_name     text not null,
+   prs_name    text not null,
+   locale      text
+);
+
+The id and ts_name are unique values
+which identify the configuration;
+the prs_name specifies which parser the configuration uses.
+Once this parser has split document text into tokens,
+the type of each resulting token —
+or, more specifically, the type's lex_alias
+as specified in the parser's lexem_type() table —
+is searched for together with the configuration's ts_name
+in the pg_ts_cfgmap table:
+
+create table pg_ts_cfgmap (
+   ts_name     text not null,
+   lex_alias   text not null,
+   dict_name   text[],
+   primary key (ts_name,lex_alias)
+);
+
+Those tokens whose types are not listed are discarded.
+The remaining tokens are assigned integer positions,
+starting with 1 for the first token in the document,
+and turned into lexemes with the help of the dictionaries
+whose names are given in the dict_name array for their type.
+These dictionaries are tried in order,
+stopping either with the first one to return a lexeme for the token,
+or discarding the token if no dictionary returns a lexeme for it.
+
+Parsers
+
+Each parser is defined by a record in the pg_ts_parser table:
+
+create table pg_ts_parser (
+   prs_id      int not null primary key,
+   prs_name    text not null,
+   prs_start   oid not null,
+   prs_getlexem    oid not null,
+   prs_end     oid not null,
+   prs_headline    oid not null,
+   prs_lextype oid not null,
+   prs_comment text
+);
+
+The prs_id and prs_name uniquely identify the parser,
+while prs_comment usually describes its name and version
+for the reference of users.
+The other items identify the low-level functions
+which make the parser operate,
+and are only of interest to someone writing a parser of their own.
+
+The tsearch2 module comes with one parser named default
+which is suitable for parsing most plain text and HTML documents.
+
+Each parser argument below
+must designate a parser with either an integer prs_id
+or a textual prs_name;
+the current parser is used when this argument is omitted.
+
+
+
+ CREATE FUNCTION set_curprs(parser) RETURNS VOID
+
+ Selects a current parser
+ which will be used when any of the following functions
+ are called without a parser as an argument.
+
+ CREATE FUNCTION lexem_type(

+  [ parser ]
+  ) RETURNS SETOF lexemtype
+
+ Returns a table which defines and describes
+ each kind of token the parser may produce as output.
+ For each token type the table gives the lexid
+ which the parser will label each token of that type,
+ the alias which names the token type,
+ and a short description descr for the user to read.
+
+ CREATE FUNCTION parse(

+  [ parser, ] document TEXT
+  ) RETURNS SETOF lexemtype
+
+ Parses the given document and returns a series of records,
+ one for each token produced by parsing.
+ Each token includes a lexid giving its type
+ and a lexem which gives its content.
+
+
+Dictionaries
+
+Dictionaries take textual tokens as input,
+usually those produced by a parser,
+and return lexemes which are usually some reduced form of the token.
+Among the dictionaries which come installed with tsearch2 are:
+
+
+simple simply folds uppercase letters to lowercase
+ before returning the word.
+en_stem runs an English Snowball stemmer on each word
+ that attempts to reduce the various forms of a verb or noun
+ to a single recognizable form.
+ru_stem runs a Russian Snowball stemmer on each word.
+
+
+Each dictionary is defined by an entry in the pg_ts_dict table:
+
+CREATE TABLE pg_ts_dict (
+   dict_id     int not null primary key,
+   dict_name   text not null,
+   dict_init   oid,
+   dict_initoption text,
+   dict_lemmatize  oid not null,
+   dict_comment    text
+);
+
+The dict_id and dict_name
+serve as unique identifiers for the dictionary.
+The meaning of the dict_initoption varies among dictionaries,
+but for the built-in Snowball dictionaries
+it specifies a file from which stop words should be read.
+The dict_comment is a human-readable description of the dictionary.
+The other fields are internal function identifiers
+useful only to developers trying to implement their own dictionaries.
+
+The argument named dictionary
+in each of the following functions
+should be either an integer dict_id or a textual dict_name
+identifying which dictionary should be used for the operation;
+if omitted then the current dictionary is used.
+
+
+
+ CREATE FUNCTION set_curdict(dictionary) RETURNS VOID
+
+ Selects a current dictionary for use by functions
+ that do not select a dictionary explicitly.
+
+ CREATE FUNCTION lexize(

+ [ dictionary, ] word text)
+ RETURNS TEXT[]
+
+ Reduces a single word to a lexeme.
+ Note that lexemes are arrays of zero or more strings,
+ since in some languages there might be several base words
+ from which an inflected form could arise.
+
+
+Ranking
+
+Ranking attempts to measure how relevant documents are to particular queries
+by inspecting the number of times each search word appears in the document,
+and whether different search terms occur near each other.
+Note that this information is only available in unstripped vectors —
+ranking functions will only return a useful result
+for a tsvector which still has position information!
+
+Both of these ranking functions
+take an integer normalization option
+that specifies whether a document's length should impact its rank.
+This is often desirable,
+since a hundred-word document with five instances of a search word
+is probably more relevant than a thousand-word document with five instances.
+The option can have the values:
+
+
+0 (the default) ignores document length.
+1 divides the rank by the logarithm of the length.
+2 divides the rank by the length itself.
+
+
+The two ranking functions currently available are:
+
+
+
+ CREATE FUNCTION rank(

+  [ weights float4[], ]
+  vector tsvector, query tsquery,
+  [ normalization int4 ]

+  ) RETURNS float4
+
+ This is the ranking function from the old version of OpenFTS,
+ and offers the ability to weight word instances more heavily
+ depending on how you have classified them.
+ The weights specify how heavily to weight each category of word:
+ 
+>{D-weight, A-weight, B-weight, C-weight}
+ If no weights are provided, then these defaults are used:
+ {0.1, 0.2, 0.4, 1.0}
+ Often weights are used to mark words from special areas of the document,
+ like the title or an initial abstract,
+ and make them more or less important than words in the document body.
+
+ CREATE FUNCTION rank_cd(

+  [ K int4, ]
+  vector tsvector, query tsquery,
+  [ normalization int4 ]

+  ) RETURNS float4
+
+ This function computes the cover density ranking
+ for the given document vector and query,
+ as described in Clarke, Cormack, and Tudhope's
+ “
+>Relevance Ranking for One to Three Term Queries”
+ in the 1999 Information Processing and Management.
+ The value K is one of the values from their formula,
+ and defaults to K=4.
+ The examples in their paper K=16;
+ we can roughly describe the term
+ as stating how far apart two search terms can fall
+ before the formula begins penalizing them for lack of proximity.
+
+
+Headlines
+
+
+
+ CREATE FUNCTION headline(

+  [ id int4, | ts_name text, ]
+  document text, query tsquery,
+  [ options text ]

+  ) RETURNS text
+
+ Every form of the the headline() function
+ accepts a document along with a query,
+ and returns one or more ellipse-separated excerpts from the document
+ in which terms from the query are highlighted.
+ The configuration with which to parse the document
+ can be specified by either its id or ts_name;
+ if none is specified that the current configuration is used instead.
+ 
+ An options string if provided should be a comma-separated list
+ of one or more ‘option=value’ pairs.
+ The available options are:
+ 
+  StartSel, StopSel —
+   the strings with which query words appearing in the document
+   should be delimited to distinguish them from other excerpted words.
+  MaxWords, MinWords —
+   limits on the shortest and longest headlines you will accept.
+  ShortWord —
+   this prevents your headline from beginning or ending
+   with a word which has this many characters or less.
+   The default value of 3 should eliminate most English
+   conjunctions and articles.
+ 
+ Any unspecified options receive these defaults:
+ 
+StartSel=<b>, StopSel=</b>, MaxWords=35, MinWords=15, ShortWord=3
+ 
+
+
+
+


diff --git a/contrib/tsearch2/expected/tsearch2.out b/contrib/tsearch2/expected/tsearch2.out

new file mode 100644 (file)

index 0000000..a842c5b


--- /dev/null
+++ b/contrib/tsearch2/expected/tsearch2.out
@@ -0,0 +1,2055 @@
+--
+-- first, define the datatype.  Turn off echoing so that expected file
+-- does not depend on contents of seg.sql.
+--
+\set ECHO none
+psql:tsearch2.sql:13: NOTICE:  CREATE TABLE / PRIMARY KEY will create implicit index 'pg_ts_dict_pkey' for table 'pg_ts_dict'
+psql:tsearch2.sql:145: NOTICE:  CREATE TABLE / PRIMARY KEY will create implicit index 'pg_ts_parser_pkey' for table 'pg_ts_parser'
+psql:tsearch2.sql:244: NOTICE:  CREATE TABLE / PRIMARY KEY will create implicit index 'pg_ts_cfg_pkey' for table 'pg_ts_cfg'
+psql:tsearch2.sql:251: NOTICE:  CREATE TABLE / PRIMARY KEY will create implicit index 'pg_ts_cfgmap_pkey' for table 'pg_ts_cfgmap'
+psql:tsearch2.sql:339: NOTICE:  ProcedureCreate: type tsvector is not yet defined
+psql:tsearch2.sql:344: NOTICE:  Argument type "tsvector" is only a shell
+psql:tsearch2.sql:398: NOTICE:  ProcedureCreate: type tsquery is not yet defined
+psql:tsearch2.sql:403: NOTICE:  Argument type "tsquery" is only a shell
+psql:tsearch2.sql:545: NOTICE:  ProcedureCreate: type gtsvector is not yet defined
+psql:tsearch2.sql:550: NOTICE:  Argument type "gtsvector" is only a shell
+--tsvector
+SELECT '1'::tsvector;
+ tsvector 
+----------
+ '1'
+(1 row)
+
+SELECT '1 '::tsvector;
+ tsvector 
+----------
+ '1'
+(1 row)
+
+SELECT ' 1'::tsvector;
+ tsvector 
+----------
+ '1'
+(1 row)
+
+SELECT ' 1 '::tsvector;
+ tsvector 
+----------
+ '1'
+(1 row)
+
+SELECT '1 2'::tsvector;
+ tsvector 
+----------
+ '1' '2'
+(1 row)
+
+SELECT '\'1 2\''::tsvector;
+ tsvector 
+----------
+ '1 2'
+(1 row)
+
+SELECT '\'1 \\\'2\''::tsvector;
+ tsvector 
+----------
+ '1 \'2'
+(1 row)
+
+SELECT '\'1 \\\'2\'3'::tsvector;
+  tsvector   
+-------------
+ '3' '1 \'2'
+(1 row)
+
+SELECT '\'1 \\\'2\' 3'::tsvector;
+  tsvector   
+-------------
+ '3' '1 \'2'
+(1 row)
+
+SELECT '\'1 \\\'2\' \' 3\' 4 '::tsvector;
+     tsvector     
+------------------
+ '4' ' 3' '1 \'2'
+(1 row)
+
+select '\'w\':4A,3B,2C,1D,5 a:8';
+       ?column?        
+-----------------------
+ 'w':4A,3B,2C,1D,5 a:8
+(1 row)
+
+select 'a:3A b:2a'::tsvector || 'ba:1234 a:1B';
+          ?column?          
+----------------------------
+ 'a':3A,4B 'b':2A 'ba':1237
+(1 row)
+
+select setweight('w:12B w:13* w:12,5,6 a:1,3* a:3 w asd:1dc asd zxc:81,567,222A'::tsvector, 'c');
+                        setweight                         
+----------------------------------------------------------
+ 'a':1C,3C 'w':5C,6C,12C,13C 'asd':1C 'zxc':81C,222C,567C
+(1 row)
+
+select strip('w:12B w:13* w:12,5,6 a:1,3* a:3 w asd:1dc asd'::tsvector);
+     strip     
+---------------
+ 'a' 'w' 'asd'
+(1 row)
+
+--tsquery
+SELECT '1'::tsquery;
+ tsquery 
+---------
+ '1'
+(1 row)
+
+SELECT '1 '::tsquery;
+ tsquery 
+---------
+ '1'
+(1 row)
+
+SELECT ' 1'::tsquery;
+ tsquery 
+---------
+ '1'
+(1 row)
+
+SELECT ' 1 '::tsquery;
+ tsquery 
+---------
+ '1'
+(1 row)
+
+SELECT '\'1 2\''::tsquery;
+ tsquery 
+---------
+ '1 2'
+(1 row)
+
+SELECT '\'1 \\\'2\''::tsquery;
+ tsquery 
+---------
+ '1 \'2'
+(1 row)
+
+SELECT '!1'::tsquery;
+ tsquery 
+---------
+ !'1'
+(1 row)
+
+SELECT '1|2'::tsquery;
+  tsquery  
+-----------
+ '1' | '2'
+(1 row)
+
+SELECT '1|!2'::tsquery;
+  tsquery   
+------------
+ '1' | !'2'
+(1 row)
+
+SELECT '!1|2'::tsquery;
+  tsquery   
+------------
+ !'1' | '2'
+(1 row)
+
+SELECT '!1|!2'::tsquery;
+   tsquery   
+-------------
+ !'1' | !'2'
+(1 row)
+
+SELECT '!(!1|!2)'::tsquery;
+     tsquery      
+------------------
+ !( !'1' | !'2' )
+(1 row)
+
+SELECT '!(!1|2)'::tsquery;
+     tsquery     
+-----------------
+ !( !'1' | '2' )
+(1 row)
+
+SELECT '!(1|!2)'::tsquery;
+     tsquery     
+-----------------
+ !( '1' | !'2' )
+(1 row)
+
+SELECT '!(1|2)'::tsquery;
+    tsquery     
+----------------
+ !( '1' | '2' )
+(1 row)
+
+SELECT '1&2'::tsquery;
+  tsquery  
+-----------
+ '1' & '2'
+(1 row)
+
+SELECT '!1&2'::tsquery;
+  tsquery   
+------------
+ !'1' & '2'
+(1 row)
+
+SELECT '1&!2'::tsquery;
+  tsquery   
+------------
+ '1' & !'2'
+(1 row)
+
+SELECT '!1&!2'::tsquery;
+   tsquery   
+-------------
+ !'1' & !'2'
+(1 row)
+
+SELECT '(1&2)'::tsquery;
+  tsquery  
+-----------
+ '1' & '2'
+(1 row)
+
+SELECT '1&(2)'::tsquery;
+  tsquery  
+-----------
+ '1' & '2'
+(1 row)
+
+SELECT '!(1)&2'::tsquery;
+  tsquery   
+------------
+ !'1' & '2'
+(1 row)
+
+SELECT '!(1&2)'::tsquery;
+    tsquery     
+----------------
+ !( '1' & '2' )
+(1 row)
+
+SELECT '1|2&3'::tsquery;
+     tsquery     
+-----------------
+ '1' | '2' & '3'
+(1 row)
+
+SELECT '1|(2&3)'::tsquery;
+     tsquery     
+-----------------
+ '1' | '2' & '3'
+(1 row)
+
+SELECT '(1|2)&3'::tsquery;
+       tsquery       
+---------------------
+ ( '1' | '2' ) & '3'
+(1 row)
+
+SELECT '1|2&!3'::tsquery;
+     tsquery      
+------------------
+ '1' | '2' & !'3'
+(1 row)
+
+SELECT '1|!2&3'::tsquery;
+     tsquery      
+------------------
+ '1' | !'2' & '3'
+(1 row)
+
+SELECT '!1|2&3'::tsquery;
+     tsquery      
+------------------
+ !'1' | '2' & '3'
+(1 row)
+
+SELECT '!1|(2&3)'::tsquery;
+     tsquery      
+------------------
+ !'1' | '2' & '3'
+(1 row)
+
+SELECT '!(1|2)&3'::tsquery;
+       tsquery        
+----------------------
+ !( '1' | '2' ) & '3'
+(1 row)
+
+SELECT '(!1|2)&3'::tsquery;
+       tsquery        
+----------------------
+ ( !'1' | '2' ) & '3'
+(1 row)
+
+SELECT '1|(2|(4|(5|6)))'::tsquery;
+                 tsquery                 
+-----------------------------------------
+ '1' | ( '2' | ( '4' | ( '5' | '6' ) ) )
+(1 row)
+
+SELECT '1|2|4|5|6'::tsquery;
+                 tsquery                 
+-----------------------------------------
+ ( ( ( '1' | '2' ) | '4' ) | '5' ) | '6'
+(1 row)
+
+SELECT '1&(2&(4&(5&6)))'::tsquery;
+           tsquery           
+-----------------------------
+ '1' & '2' & '4' & '5' & '6'
+(1 row)
+
+SELECT '1&2&4&5&6'::tsquery;
+           tsquery           
+-----------------------------
+ '1' & '2' & '4' & '5' & '6'
+(1 row)
+
+SELECT '1&(2&(4&(5|6)))'::tsquery;
+             tsquery             
+---------------------------------
+ '1' & '2' & '4' & ( '5' | '6' )
+(1 row)
+
+SELECT '1&(2&(4&(5|!6)))'::tsquery;
+             tsquery              
+----------------------------------
+ '1' & '2' & '4' & ( '5' | !'6' )
+(1 row)
+
+SELECT '1&(\'2\'&(\' 4\'&(\\|5 | \'6 \\\' !|&\')))'::tsquery;
+                 tsquery                  
+------------------------------------------
+ '1' & '2' & ' 4' & ( '|5' | '6 \' !|&' )
+(1 row)
+
+SELECT '\'the wether\':dc & \' sKies \':BC & a:d b:a';
+                 ?column?                 
+------------------------------------------
+ 'the wether':dc & ' sKies ':BC & a:d b:a
+(1 row)
+
+select lexize('simple', 'ASD56 hsdkf');
+     lexize      
+-----------------
+ {"asd56 hsdkf"}
+(1 row)
+
+select lexize('en_stem', 'SKIES Problems identity');
+          lexize          
+--------------------------
+ {"skies problems ident"}
+(1 row)
+
+select * from token_type('default');
+ tokid |    alias     |               descr               
+-------+--------------+-----------------------------------
+     1 | lword        | Latin word
+     2 | nlword       | Non-latin word
+     3 | word         | Word
+     4 | email        | Email
+     5 | url          | URL
+     6 | host         | Host
+     7 | sfloat       | Scientific notation
+     8 | version      | VERSION
+     9 | part_hword   | Part of hyphenated word
+    10 | nlpart_hword | Non-latin part of hyphenated word
+    11 | lpart_hword  | Latin part of hyphenated word
+    12 | blank        | Space symbols
+    13 | tag          | HTML Tag
+    14 | http         | HTTP head
+    15 | hword        | Hyphenated word
+    16 | lhword       | Latin hyphenated word
+    17 | nlhword      | Non-latin hyphenated word
+    18 | uri          | URI
+    19 | file         | File or path name
+    20 | float        | Decimal notation
+    21 | int          | Signed integer
+    22 | uint         | Unsigned integer
+    23 | entity       | HTML Entity
+(23 rows)
+
+select * from parse('default', '345 [email protected] \' http://www.com/ http://aew.werc.ewr/?ad=qwe&dw 1aew.werc.ewr/?ad=qwe&dw 2aew.werc.ewr http://3aew.werc.ewr/?ad=qwe&dw http://4aew.werc.ewr http://5aew.werc.ewr:8100/?  ad=qwe&dw 6aew.werc.ewr:8100/?ad=qwe&dw 7aew.werc.ewr:8100/?ad=qwe&dw=%20%32 +4.0e-10 qwe qwe qwqwe 234.435 455 5.005 [email protected] qwe-wer asdf qwer jf sdjk ewr1> ewri2 ">
+/usr/local/fff /awdf/dwqe/4325 rewt/ewr wefjn /wqe-324/ewr gist.h gist.h.c gist.c. readline 4.2 4.2. 4.2, readline-4.2 readline-4.2. 234 
+ wow  < jqw <> qwerty');
+ tokid |                token                 
+-------+--------------------------------------
+    22 | 345
+    12 |  
+     4 | [email protected]
+    12 |  
+    12 | '
+    12 |  
+    14 | http://
+     6 | www.com
+    12 | /
+    12 |  
+    14 | http://
+     5 | aew.werc.ewr/?ad=qwe&dw
+     6 | aew.werc.ewr
+    18 | /?ad=qwe&dw
+    12 |  
+     5 | 1aew.werc.ewr/?ad=qwe&dw
+     6 | 1aew.werc.ewr
+    18 | /?ad=qwe&dw
+    12 |  
+     6 | 2aew.werc.ewr
+    12 |  
+    14 | http://
+     5 | 3aew.werc.ewr/?ad=qwe&dw
+     6 | 3aew.werc.ewr
+    18 | /?ad=qwe&dw
+    12 |  
+    14 | http://
+     6 | 4aew.werc.ewr
+    12 |  
+    14 | http://
+     5 | 5aew.werc.ewr:8100/?
+     6 | 5aew.werc.ewr
+    18 | :8100/?
+    12 |   
+     1 | ad
+    12 | =
+     1 | qwe
+    12 | &
+     1 | dw
+    12 |  
+     5 | 6aew.werc.ewr:8100/?ad=qwe&dw
+     6 | 6aew.werc.ewr
+    18 | :8100/?ad=qwe&dw
+    12 |  
+     5 | 7aew.werc.ewr:8100/?ad=qwe&dw=%20%32
+     6 | 7aew.werc.ewr
+    18 | :8100/?ad=qwe&dw=%20%32
+    12 |  
+     7 | +4.0e-10
+    12 |  
+     1 | qwe
+    12 |  
+     1 | qwe
+    12 |  
+     1 | qwqwe
+    12 |  
+    20 | 234.435
+    12 |  
+    22 | 455
+    12 |  
+    20 | 5.005
+    12 |  
+     4 | [email protected]
+    12 |  
+    16 | qwe-wer
+    11 | qwe
+    12 | -
+    11 | wer
+    12 |  
+     1 | asdf
+    12 |  
+    13 |  
+     1 | qwer
+    12 |  
+     1 | jf
+    12 |  
+     1 | sdjk
+    13 |  
+    12 |  
+     3 | ewr1
+    12 | >
+    12 |  
+     3 | ewri2
+    12 |  
+    13 |  
+    12 | 
+
+    19 | /usr/local/fff
+    12 |  
+    19 | /awdf/dwqe/4325
+    12 |  
+    19 | rewt/ewr
+    12 |  
+     1 | wefjn
+    12 |  
+    19 | /wqe-324/ewr
+    12 |  
+     6 | gist.h
+    12 |  
+     6 | gist.h.c
+    12 |  
+     6 | gist.c
+    12 | .
+    12 |  
+     1 | readline
+    12 |  
+    20 | 4.2
+    12 |  
+    20 | 4.2
+    12 | .
+    12 |  
+    20 | 4.2
+    12 | ,
+    12 |  
+    15 | readline-4
+    11 | readline
+    12 | -
+    20 | 4.2
+    12 |  
+    15 | readline-4
+    11 | readline
+    12 | -
+    20 | 4.2
+    12 | .
+    12 |  
+    22 | 234
+    12 |  
+
+    13 |  
+    12 |  
+     1 | wow
+    12 |   
+    12 | <
+    12 |  
+     1 | jqw
+    12 |  
+    12 | <
+    12 | >
+    12 |  
+     1 | qwerty
+(138 rows)
+
+SELECT to_tsvector('default', '345 [email protected] \' http://www.com/ http://aew.werc.ewr/?ad=qwe&dw 1aew.werc.ewr/?ad=qwe&dw 2aew.werc.ewr http://3aew.werc.ewr/?ad=qwe&dw http://4aew.werc.ewr http://5aew.werc.ewr:8100/?  ad=qwe&dw 6aew.werc.ewr:8100/?ad=qwe&dw 7aew.werc.ewr:8100/?ad=qwe&dw=%20%32 +4.0e-10 qwe qwe qwqwe 234.435 455 5.005 [email protected] qwe-wer asdf qwer jf sdjk ewr1> ewri2 ">
+/usr/local/fff /awdf/dwqe/4325 rewt/ewr wefjn /wqe-324/ewr gist.h gist.h.c gist.c. readline 4.2 4.2. 4.2, readline-4.2 readline-4.2. 234 
+ wow  < jqw <> qwerty');
+                                                                                                                                                                                                                                                                                                                                                                                                                                               to_tsvector                                                                                                                                                                                                                                                                                                                                                                                                                                                
+----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
+ 'ad':18 'dw':20 'jf':40 '234':62 '345':1 '4.2':53,54,55,58,61 '455':32 'jqw':64 'qwe':19,28,29,36 'wer':37 'wow':63 'asdf':38 'ewr1':42 'qwer':39 'sdjk':41 '5.005':33 'ewri2':43 'qwqwe':30 'wefjn':47 'gist.c':51 'gist.h':49 'qwerti':65 '234.435':31 ':8100/?':17 'qwe-wer':35 'readlin':52,57,60 'www.com':3 '+4.0e-10':27 'gist.h.c':50 'rewt/ewr':46 '[email protected]':2 'readline-4':56,59 '/?ad=qwe&dw':6,9,13 '/wqe-324/ewr':48 'aew.werc.ewr':5 '1aew.werc.ewr':8 '2aew.werc.ewr':10 '3aew.werc.ewr':12 '4aew.werc.ewr':14 '5aew.werc.ewr':16 '6aew.werc.ewr':22 '7aew.werc.ewr':25 '/usr/local/fff':44 '/awdf/dwqe/4325':45 ':8100/?ad=qwe&dw':23 '[email protected]':34 '5aew.werc.ewr:8100/?':15 ':8100/?ad=qwe&dw=%20%32':26 'aew.werc.ewr/?ad=qwe&dw':4 '1aew.werc.ewr/?ad=qwe&dw':7 '3aew.werc.ewr/?ad=qwe&dw':11 '6aew.werc.ewr:8100/?ad=qwe&dw':21 '7aew.werc.ewr:8100/?ad=qwe&dw=%20%32':24
+(1 row)
+
+SELECT length(to_tsvector('default', '345 qw'));
+ length 
+--------
+      2
+(1 row)
+
+SELECT length(to_tsvector('default', '345 [email protected] \' http://www.com/ http://aew.werc.ewr/?ad=qwe&dw 1aew.werc.ewr/?ad=qwe&dw 2aew.werc.ewr http://3aew.werc.ewr/?ad=qwe&dw http://4aew.werc.ewr http://5aew.werc.ewr:8100/?  ad=qwe&dw 6aew.werc.ewr:8100/?ad=qwe&dw 7aew.werc.ewr:8100/?ad=qwe&dw=%20%32 +4.0e-10 qwe qwe qwqwe 234.435 455 5.005 [email protected] qwe-wer asdf qwer jf sdjk ewr1> ewri2 ">
+/usr/local/fff /awdf/dwqe/4325 rewt/ewr wefjn /wqe-324/ewr gist.h gist.h.c gist.c. readline 4.2 4.2. 4.2, readline-4.2 readline-4.2. 234 
+ wow  < jqw <> qwerty'));
+ length 
+--------
+     53
+(1 row)
+
+select to_tsquery('default', 'qwe & sKies '); 
+  to_tsquery   
+---------------
+ 'qwe' & 'sky'
+(1 row)
+
+select to_tsquery('simple', 'qwe & sKies '); 
+   to_tsquery    
+-----------------
+ 'qwe' & 'skies'
+(1 row)
+
+select to_tsquery('default', '\'the wether\':dc & \'           sKies \':BC ');
+       to_tsquery       
+------------------------
+ 'wether':CD & 'sky':BC
+(1 row)
+
+select 'a b:89  ca:23A,64b d:34c'::tsvector @@ 'd:AC & ca';
+ ?column? 
+----------
+ t
+(1 row)
+
+select 'a b:89  ca:23A,64b d:34c'::tsvector @@ 'd:AC & ca:B';
+ ?column? 
+----------
+ t
+(1 row)
+
+select 'a b:89  ca:23A,64b d:34c'::tsvector @@ 'd:AC & ca:A';
+ ?column? 
+----------
+ t
+(1 row)
+
+select 'a b:89  ca:23A,64b d:34c'::tsvector @@ 'd:AC & ca:C';
+ ?column? 
+----------
+ f
+(1 row)
+
+select 'a b:89  ca:23A,64b d:34c'::tsvector @@ 'd:AC & ca:CB';
+ ?column? 
+----------
+ t
+(1 row)
+
+CREATE TABLE test_tsvector( t text, a tsvector );
+\copy test_tsvector from 'data/test_tsearch.data'
+SELECT count(*) FROM test_tsvector WHERE a @@ 'wr|qh';
+ count 
+-------
+   158
+(1 row)
+
+SELECT count(*) FROM test_tsvector WHERE a @@ 'wr&qh';
+ count 
+-------
+    17
+(1 row)
+
+SELECT count(*) FROM test_tsvector WHERE a @@ 'eq&yt';
+ count 
+-------
+     6
+(1 row)
+
+SELECT count(*) FROM test_tsvector WHERE a @@ 'eq|yt';
+ count 
+-------
+    98
+(1 row)
+
+SELECT count(*) FROM test_tsvector WHERE a @@ '(eq&yt)|(wr&qh)';
+ count 
+-------
+    23
+(1 row)
+
+SELECT count(*) FROM test_tsvector WHERE a @@ '(eq|yt)&(wr|qh)';
+ count 
+-------
+    39
+(1 row)
+
+create index wowidx on test_tsvector using gist (a);
+set enable_seqscan=off;
+SELECT count(*) FROM test_tsvector WHERE a @@ 'wr|qh';
+ count 
+-------
+   158
+(1 row)
+
+SELECT count(*) FROM test_tsvector WHERE a @@ 'wr&qh';
+ count 
+-------
+    17
+(1 row)
+
+SELECT count(*) FROM test_tsvector WHERE a @@ 'eq&yt';
+ count 
+-------
+     6
+(1 row)
+
+SELECT count(*) FROM test_tsvector WHERE a @@ 'eq|yt';
+ count 
+-------
+    98
+(1 row)
+
+SELECT count(*) FROM test_tsvector WHERE a @@ '(eq&yt)|(wr&qh)';
+ count 
+-------
+    23
+(1 row)
+
+SELECT count(*) FROM test_tsvector WHERE a @@ '(eq|yt)&(wr|qh)';
+ count 
+-------
+    39
+(1 row)
+
+select set_curcfg('default');
+ set_curcfg 
+------------
+ 
+(1 row)
+
+CREATE TRIGGER tsvectorupdate
+BEFORE UPDATE OR INSERT ON test_tsvector
+FOR EACH ROW EXECUTE PROCEDURE tsearch2(a, t);
+SELECT count(*) FROM test_tsvector WHERE a @@ to_tsquery('345&qwerty');
+ count 
+-------
+     0
+(1 row)
+
+INSERT INTO test_tsvector (t) VALUES ('345 qwerty');
+SELECT count(*) FROM test_tsvector WHERE a @@ to_tsquery('345&qwerty');
+ count 
+-------
+     1
+(1 row)
+
+UPDATE test_tsvector SET t = null WHERE t = '345 qwerty';
+SELECT count(*) FROM test_tsvector WHERE a @@ to_tsquery('345&qwerty');
+ count 
+-------
+     0
+(1 row)
+
+drop trigger tsvectorupdate on test_tsvector;
+create function wow(text) returns text as 'select $1 || \' copyright\'; ' language sql;
+create trigger tsvectorupdate before update or insert on test_tsvector
+for each row execute procedure tsearch2(a, wow, t);
+insert into test_tsvector (t) values ('345 qwerty');
+select count(*) FROM test_tsvector WHERE a @@ to_tsquery('345&qwerty');
+ count 
+-------
+     1
+(1 row)
+
+select count(*) FROM test_tsvector WHERE a @@ to_tsquery('copyright');
+ count 
+-------
+     1
+(1 row)
+
+select rank(' a:1 s:2C d g'::tsvector, 'a | s');
+ rank 
+------
+ 0.28
+(1 row)
+
+select rank(' a:1 s:2B d g'::tsvector, 'a | s');
+ rank 
+------
+ 0.46
+(1 row)
+
+select rank(' a:1 s:2 d g'::tsvector, 'a | s');
+ rank 
+------
+ 0.19
+(1 row)
+
+select rank(' a:1 s:2C d g'::tsvector, 'a & s');
+   rank   
+----------
+ 0.140153
+(1 row)
+
+select rank(' a:1 s:2B d g'::tsvector, 'a & s');
+   rank   
+----------
+ 0.198206
+(1 row)
+
+select rank(' a:1 s:2 d g'::tsvector, 'a & s');
+   rank    
+-----------
+ 0.0991032
+(1 row)
+
+insert into test_tsvector (t) values ('foo bar foo the over foo qq bar');
+select * from stat('select a from test_tsvector') order by ndoc desc, nentry desc, word;
+   word    | ndoc | nentry 
+-----------+------+--------
+ qq        |  109 |    109
+ qt        |  102 |    102
+ qe        |  100 |    100
+ qh        |   98 |     98
+ qw        |   98 |     98
+ qa        |   97 |     97
+ ql        |   94 |     94
+ qs        |   94 |     94
+ qi        |   92 |     92
+ qr        |   92 |     92
+ qj        |   91 |     91
+ qd        |   87 |     87
+ qz        |   87 |     87
+ qc        |   86 |     86
+ qn        |   86 |     86
+ qv        |   85 |     85
+ qo        |   84 |     84
+ qy        |   84 |     84
+ wp        |   84 |     84
+ qf        |   81 |     81
+ qk        |   80 |     80
+ wt        |   80 |     80
+ qu        |   79 |     79
+ qg        |   78 |     78
+ wb        |   78 |     78
+ qx        |   77 |     77
+ wr        |   77 |     77
+ ws        |   73 |     73
+ wy        |   73 |     73
+ wa        |   72 |     72
+ wf        |   70 |     70
+ wg        |   70 |     70
+ wi        |   70 |     70
+ wu        |   70 |     70
+ wc        |   69 |     69
+ wj        |   69 |     69
+ qp        |   68 |     68
+ wh        |   68 |     68
+ wv        |   68 |     68
+ qb        |   66 |     66
+ eu        |   65 |     65
+ we        |   65 |     65
+ wl        |   65 |     65
+ wq        |   65 |     65
+ wk        |   64 |     64
+ ee        |   63 |     63
+ eo        |   63 |     63
+ qm        |   63 |     63
+ wn        |   63 |     63
+ ef        |   62 |     62
+ eh        |   62 |     62
+ ex        |   62 |     62
+ re        |   62 |     62
+ rl        |   62 |     62
+ rr        |   62 |     62
+ eb        |   61 |     61
+ ek        |   61 |     61
+ ww        |   61 |     61
+ ea        |   60 |     60
+ ei        |   60 |     60
+ em        |   60 |     60
+ eq        |   60 |     60
+ ew        |   60 |     60
+ ro        |   60 |     60
+ rw        |   60 |     60
+ tl        |   60 |     60
+ eg        |   59 |     59
+ en        |   59 |     59
+ ez        |   59 |     59
+ rj        |   59 |     59
+ ry        |   59 |     59
+ tw        |   59 |     59
+ tx        |   59 |     59
+ ej        |   58 |     58
+ es        |   58 |     58
+ ra        |   58 |     58
+ rd        |   58 |     58
+ rg        |   58 |     58
+ rx        |   58 |     58
+ tb        |   58 |     58
+ wd        |   58 |     58
+ ed        |   57 |     57
+ tc        |   57 |     57
+ wx        |   57 |     57
+ er        |   56 |     56
+ wm        |   56 |     56
+ wo        |   56 |     56
+ yw        |   56 |     56
+ ep        |   55 |     55
+ rk        |   55 |     55
+ rp        |   55 |     55
+ rz        |   55 |     55
+ ta        |   55 |     55
+ rq        |   54 |     54
+ yn        |   54 |     54
+ ec        |   53 |     53
+ el        |   53 |     53
+ ru        |   53 |     53
+ rv        |   53 |     53
+ tz        |   53 |     53
+ un        |   53 |     53
+ wz        |   53 |     53
+ ys        |   53 |     53
+ oe        |   52 |     52
+ tn        |   52 |     52
+ tq        |   52 |     52
+ ty        |   52 |     52
+ uq        |   52 |     52
+ yg        |   52 |     52
+ ym        |   52 |     52
+ oi        |   51 |     51
+ to        |   51 |     51
+ yi        |   51 |     51
+ pn        |   50 |     50
+ rb        |   50 |     50
+ ri        |   50 |     50
+ rn        |   50 |     50
+ ti        |   50 |     50
+ tv        |   50 |     50
+ um        |   50 |     50
+ ut        |   50 |     50
+ ya        |   50 |     50
+ et        |   49 |     49
+ ix        |   49 |     49
+ ox        |   49 |     49
+ q3        |   49 |     49
+ yf        |   49 |     49
+ yl        |   49 |     49
+ yo        |   49 |     49
+ yr        |   49 |     49
+ ev        |   48 |     48
+ ey        |   48 |     48
+ ot        |   48 |     48
+ rc        |   48 |     48
+ rm        |   48 |     48
+ th        |   48 |     48
+ uo        |   48 |     48
+ ia        |   47 |     47
+ q1        |   47 |     47
+ rh        |   47 |     47
+ yq        |   47 |     47
+ yz        |   47 |     47
+ av        |   46 |     46
+ im        |   46 |     46
+ os        |   46 |     46
+ tk        |   46 |     46
+ yy        |   46 |     46
+ ir        |   45 |     45
+ iv        |   45 |     45
+ iw        |   45 |     45
+ oj        |   45 |     45
+ pl        |   45 |     45
+ pv        |   45 |     45
+ te        |   45 |     45
+ tu        |   45 |     45
+ uv        |   45 |     45
+ ux        |   45 |     45
+ yd        |   45 |     45
+ yx        |   45 |     45
+ ij        |   44 |     44
+ pa        |   44 |     44
+ se        |   44 |     44
+ tg        |   44 |     44
+ ue        |   44 |     44
+ yb        |   44 |     44
+ yt        |   44 |     44
+ if        |   43 |     43
+ ik        |   43 |     43
+ in        |   43 |     43
+ ph        |   43 |     43
+ pj        |   43 |     43
+ q5        |   43 |     43
+ rt        |   43 |     43
+ ub        |   43 |     43
+ ud        |   43 |     43
+ uh        |   43 |     43
+ uj        |   43 |     43
+ w7        |   43 |     43
+ ye        |   43 |     43
+ yv        |   43 |     43
+ db        |   42 |     42
+ do        |   42 |     42
+ id        |   42 |     42
+ ie        |   42 |     42
+ ii        |   42 |     42
+ of        |   42 |     42
+ pr        |   42 |     42
+ q4        |   42 |     42
+ rf        |   42 |     42
+ td        |   42 |     42
+ uk        |   42 |     42
+ up        |   42 |     42
+ yh        |   42 |     42
+ yk        |   42 |     42
+ io        |   41 |     41
+ it        |   41 |     41
+ pb        |   41 |     41
+ q0        |   41 |     41
+ q7        |   41 |     41
+ rs        |   41 |     41
+ tj        |   41 |     41
+ ur        |   41 |     41
+ ig        |   40 |     40
+ iu        |   40 |     40
+ iy        |   40 |     40
+ od        |   40 |     40
+ q6        |   40 |     40
+ tt        |   40 |     40
+ ug        |   40 |     40
+ ul        |   40 |     40
+ us        |   40 |     40
+ uu        |   40 |     40
+ uz        |   40 |     40
+ ah        |   39 |     39
+ ar        |   39 |     39
+ as        |   39 |     39
+ dl        |   39 |     39
+ dt        |   39 |     39
+ hk        |   39 |     39
+ iq        |   39 |     39
+ is        |   39 |     39
+ oc        |   39 |     39
+ ov        |   39 |     39
+ oy        |   39 |     39
+ uf        |   39 |     39
+ ui        |   39 |     39
+ aa        |   38 |     38
+ ad        |   38 |     38
+ fh        |   38 |     38
+ gm        |   38 |     38
+ ic        |   38 |     38
+ jd        |   38 |     38
+ om        |   38 |     38
+ or        |   38 |     38
+ oz        |   38 |     38
+ pm        |   38 |     38
+ q8        |   38 |     38
+ sf        |   38 |     38
+ sm        |   38 |     38
+ sv        |   38 |     38
+ uc        |   38 |     38
+ ak        |   37 |     37
+ aq        |   37 |     37
+ di        |   37 |     37
+ e4        |   37 |     37
+ fi        |   37 |     37
+ fx        |   37 |     37
+ ha        |   37 |     37
+ hp        |   37 |     37
+ ih        |   37 |     37
+ og        |   37 |     37
+ po        |   37 |     37
+ pw        |   37 |     37
+ sn        |   37 |     37
+ su        |   37 |     37
+ sw        |   37 |     37
+ w6        |   37 |     37
+ yj        |   37 |     37
+ yu        |   37 |     37
+ ag        |   36 |     36
+ am        |   36 |     36
+ at        |   36 |     36
+ e1        |   36 |     36
+ ff        |   36 |     36
+ gx        |   36 |     36
+ he        |   36 |     36
+ hj        |   36 |     36
+ ib        |   36 |     36
+ iz        |   36 |     36
+ lm        |   36 |     36
+ ok        |   36 |     36
+ pk        |   36 |     36
+ pp        |   36 |     36
+ pu        |   36 |     36
+ sp        |   36 |     36
+ tf        |   36 |     36
+ tm        |   36 |     36
+ ay        |   35 |     35
+ dy        |   35 |     35
+ fu        |   35 |     35
+ ku        |   35 |     35
+ lh        |   35 |     35
+ lq        |   35 |     35
+ o6        |   35 |     35
+ ob        |   35 |     35
+ on        |   35 |     35
+ op        |   35 |     35
+ pd        |   35 |     35
+ ps        |   35 |     35
+ si        |   35 |     35
+ sl        |   35 |     35
+ sx        |   35 |     35
+ tp        |   35 |     35
+ tr        |   35 |     35
+ w3        |   35 |     35
+ y1        |   35 |     35
+ al        |   34 |     34
+ ap        |   34 |     34
+ az        |   34 |     34
+ dc        |   34 |     34
+ dd        |   34 |     34
+ dz        |   34 |     34
+ e0        |   34 |     34
+ fj        |   34 |     34
+ fp        |   34 |     34
+ gd        |   34 |     34
+ gg        |   34 |     34
+ gk        |   34 |     34
+ go        |   34 |     34
+ ho        |   34 |     34
+ jc        |   34 |     34
+ oa        |   34 |     34
+ oh        |   34 |     34
+ oo        |   34 |     34
+ pe        |   34 |     34
+ px        |   34 |     34
+ sd        |   34 |     34
+ sq        |   34 |     34
+ sy        |   34 |     34
+ ab        |   33 |     33
+ ae        |   33 |     33
+ af        |   33 |     33
+ aw        |   33 |     33
+ e5        |   33 |     33
+ fk        |   33 |     33
+ gu        |   33 |     33
+ gy        |   33 |     33
+ hb        |   33 |     33
+ hm        |   33 |     33
+ hy        |   33 |     33
+ jl        |   33 |     33
+ jr        |   33 |     33
+ ls        |   33 |     33
+ oq        |   33 |     33
+ pt        |   33 |     33
+ sa        |   33 |     33
+ sh        |   33 |     33
+ sj        |   33 |     33
+ so        |   33 |     33
+ sz        |   33 |     33
+ t7        |   33 |     33
+ uw        |   33 |     33
+ w8        |   33 |     33
+ y0        |   33 |     33
+ yp        |   33 |     33
+ dh        |   32 |     32
+ dp        |   32 |     32
+ dq        |   32 |     32
+ e7        |   32 |     32
+ fn        |   32 |     32
+ fo        |   32 |     32
+ fr        |   32 |     32
+ ga        |   32 |     32
+ gq        |   32 |     32
+ hh        |   32 |     32
+ il        |   32 |     32
+ ip        |   32 |     32
+ jv        |   32 |     32
+ lc        |   32 |     32
+ ol        |   32 |     32
+ pc        |   32 |     32
+ q9        |   32 |     32
+ ds        |   31 |     31
+ e9        |   31 |     31
+ fd        |   31 |     31
+ fe        |   31 |     31
+ ft        |   31 |     31
+ gs        |   31 |     31
+ hl        |   31 |     31
+ hs        |   31 |     31
+ jb        |   31 |     31
+ kc        |   31 |     31
+ kw        |   31 |     31
+ mj        |   31 |     31
+ q2        |   31 |     31
+ r3        |   31 |     31
+ sb        |   31 |     31
+ sk        |   31 |     31
+ ts        |   31 |     31
+ ua        |   31 |     31
+ yc        |   31 |     31
+ zw        |   31 |     31
+ ao        |   30 |     30
+ du        |   30 |     30
+ fw        |   30 |     30
+ gj        |   30 |     30
+ hu        |   30 |     30
+ kh        |   30 |     30
+ kl        |   30 |     30
+ kv        |   30 |     30
+ ld        |   30 |     30
+ lf        |   30 |     30
+ pq        |   30 |     30
+ py        |   30 |     30
+ sc        |   30 |     30
+ sr        |   30 |     30
+ uy        |   30 |     30
+ vg        |   30 |     30
+ w2        |   30 |     30
+ xg        |   30 |     30
+ xo        |   30 |     30
+ au        |   29 |     29
+ cx        |   29 |     29
+ fv        |   29 |     29
+ gh        |   29 |     29
+ gl        |   29 |     29
+ gt        |   29 |     29
+ hw        |   29 |     29
+ ji        |   29 |     29
+ km        |   29 |     29
+ la        |   29 |     29
+ ou        |   29 |     29
+ r0        |   29 |     29
+ w0        |   29 |     29
+ y9        |   29 |     29
+ zm        |   29 |     29
+ zs        |   29 |     29
+ zy        |   29 |     29
+ ax        |   28 |     28
+ cd        |   28 |     28
+ dj        |   28 |     28
+ dn        |   28 |     28
+ dr        |   28 |     28
+ ht        |   28 |     28
+ jf        |   28 |     28
+ lo        |   28 |     28
+ lr        |   28 |     28
+ na        |   28 |     28
+ ng        |   28 |     28
+ r8        |   28 |     28
+ ss        |   28 |     28
+ xt        |   28 |     28
+ y6        |   28 |     28
+ aj        |   27 |     27
+ ca        |   27 |     27
+ cg        |   27 |     27
+ df        |   27 |     27
+ dg        |   27 |     27
+ dv        |   27 |     27
+ gc        |   27 |     27
+ gn        |   27 |     27
+ gr        |   27 |     27
+ hd        |   27 |     27
+ i8        |   27 |     27
+ jn        |   27 |     27
+ jt        |   27 |     27
+ lp        |   27 |     27
+ o9        |   27 |     27
+ ow        |   27 |     27
+ r9        |   27 |     27
+ t8        |   27 |     27
+ u5        |   27 |     27
+ w4        |   27 |     27
+ xm        |   27 |     27
+ zz        |   27 |     27
+ a2        |   26 |     26
+ ac        |   26 |     26
+ ai        |   26 |     26
+ cm        |   26 |     26
+ cu        |   26 |     26
+ cw        |   26 |     26
+ dk        |   26 |     26
+ e2        |   26 |     26
+ fc        |   26 |     26
+ fg        |   26 |     26
+ fl        |   26 |     26
+ fs        |   26 |     26
+ ge        |   26 |     26
+ gv        |   26 |     26
+ hc        |   26 |     26
+ hi        |   26 |     26
+ hx        |   26 |     26
+ jj        |   26 |     26
+ jm        |   26 |     26
+ kg        |   26 |     26
+ kk        |   26 |     26
+ kn        |   26 |     26
+ ko        |   26 |     26
+ kt        |   26 |     26
+ ln        |   26 |     26
+ mx        |   26 |     26
+ pg        |   26 |     26
+ r4        |   26 |     26
+ t6        |   26 |     26
+ u1        |   26 |     26
+ u4        |   26 |     26
+ vi        |   26 |     26
+ vr        |   26 |     26
+ w1        |   26 |     26
+ w9        |   26 |     26
+ xk        |   26 |     26
+ xs        |   26 |     26
+ zf        |   26 |     26
+ bb        |   25 |     25
+ dm        |   25 |     25
+ dw        |   25 |     25
+ e8        |   25 |     25
+ fb        |   25 |     25
+ gw        |   25 |     25
+ h8        |   25 |     25
+ hf        |   25 |     25
+ hg        |   25 |     25
+ hn        |   25 |     25
+ hv        |   25 |     25
+ i0        |   25 |     25
+ i3        |   25 |     25
+ jg        |   25 |     25
+ jo        |   25 |     25
+ jx        |   25 |     25
+ kq        |   25 |     25
+ lw        |   25 |     25
+ lx        |   25 |     25
+ o3        |   25 |     25
+ p7        |   25 |     25
+ pf        |   25 |     25
+ pi        |   25 |     25
+ pz        |   25 |     25
+ r2        |   25 |     25
+ r5        |   25 |     25
+ t9        |   25 |     25
+ u7        |   25 |     25
+ ve        |   25 |     25
+ vu        |   25 |     25
+ y5        |   25 |     25
+ y8        |   25 |     25
+ zt        |   25 |     25
+ an        |   24 |     24
+ bj        |   24 |     24
+ dx        |   24 |     24
+ fm        |   24 |     24
+ fz        |   24 |     24
+ gb        |   24 |     24
+ gi        |   24 |     24
+ gp        |   24 |     24
+ hr        |   24 |     24
+ hz        |   24 |     24
+ i5        |   24 |     24
+ jq        |   24 |     24
+ kb        |   24 |     24
+ ke        |   24 |     24
+ kf        |   24 |     24
+ kp        |   24 |     24
+ lv        |   24 |     24
+ lz        |   24 |     24
+ o8        |   24 |     24
+ r1        |   24 |     24
+ s7        |   24 |     24
+ sg        |   24 |     24
+ u3        |   24 |     24
+ vj        |   24 |     24
+ vt        |   24 |     24
+ w5        |   24 |     24
+ zj        |   24 |     24
+ be        |   23 |     23
+ bi        |   23 |     23
+ bn        |   23 |     23
+ cn        |   23 |     23
+ cy        |   23 |     23
+ da        |   23 |     23
+ e6        |   23 |     23
+ fa        |   23 |     23
+ js        |   23 |     23
+ ki        |   23 |     23
+ kz        |   23 |     23
+ li        |   23 |     23
+ mt        |   23 |     23
+ mz        |   23 |     23
+ nu        |   23 |     23
+ o2        |   23 |     23
+ p5        |   23 |     23
+ p8        |   23 |     23
+ r7        |   23 |     23
+ t0        |   23 |     23
+ t1        |   23 |     23
+ t3        |   23 |     23
+ vm        |   23 |     23
+ xh        |   23 |     23
+ xx        |   23 |     23
+ zp        |   23 |     23
+ zr        |   23 |     23
+ a3        |   22 |     22
+ bg        |   22 |     22
+ de        |   22 |     22
+ e3        |   22 |     22
+ fq        |   22 |     22
+ i2        |   22 |     22
+ i7        |   22 |     22
+ ja        |   22 |     22
+ jk        |   22 |     22
+ jy        |   22 |     22
+ kr        |   22 |     22
+ kx        |   22 |     22
+ ly        |   22 |     22
+ nb        |   22 |     22
+ nh        |   22 |     22
+ ns        |   22 |     22
+ s3        |   22 |     22
+ u2        |   22 |     22
+ vn        |   22 |     22
+ xe        |   22 |     22
+ y4        |   22 |     22
+ zh        |   22 |     22
+ zo        |   22 |     22
+ zq        |   22 |     22
+ a1        |   21 |     21
+ bl        |   21 |     21
+ bo        |   21 |     21
+ cb        |   21 |     21
+ ch        |   21 |     21
+ co        |   21 |     21
+ cq        |   21 |     21
+ cv        |   21 |     21
+ d7        |   21 |     21
+ g8        |   21 |     21
+ je        |   21 |     21
+ jp        |   21 |     21
+ jz        |   21 |     21
+ lg        |   21 |     21
+ me        |   21 |     21
+ nc        |   21 |     21
+ p4        |   21 |     21
+ st        |   21 |     21
+ vb        |   21 |     21
+ vw        |   21 |     21
+ vz        |   21 |     21
+ xj        |   21 |     21
+ xq        |   21 |     21
+ xu        |   21 |     21
+ xy        |   21 |     21
+ zb        |   21 |     21
+ bv        |   20 |     20
+ bz        |   20 |     20
+ cj        |   20 |     20
+ cp        |   20 |     20
+ cs        |   20 |     20
+ d8        |   20 |     20
+ ju        |   20 |     20
+ k0        |   20 |     20
+ ks        |   20 |     20
+ ky        |   20 |     20
+ l1        |   20 |     20
+ lb        |   20 |     20
+ lj        |   20 |     20
+ lu        |   20 |     20
+ nm        |   20 |     20
+ nw        |   20 |     20
+ nz        |   20 |     20
+ o7        |   20 |     20
+ p6        |   20 |     20
+ vh        |   20 |     20
+ vp        |   20 |     20
+ vs        |   20 |     20
+ xb        |   20 |     20
+ xr        |   20 |     20
+ z3        |   20 |     20
+ zv        |   20 |     20
+ bq        |   19 |     19
+ br        |   19 |     19
+ by        |   19 |     19
+ cl        |   19 |     19
+ d2        |   19 |     19
+ f1        |   19 |     19
+ f4        |   19 |     19
+ gf        |   19 |     19
+ hq        |   19 |     19
+ k9        |   19 |     19
+ ka        |   19 |     19
+ kd        |   19 |     19
+ kj        |   19 |     19
+ md        |   19 |     19
+ mi        |   19 |     19
+ ml        |   19 |     19
+ my        |   19 |     19
+ nj        |   19 |     19
+ ny        |   19 |     19
+ o1        |   19 |     19
+ s4        |   19 |     19
+ s8        |   19 |     19
+ t5        |   19 |     19
+ u0        |   19 |     19
+ xl        |   19 |     19
+ zg        |   19 |     19
+ zi        |   19 |     19
+ a5        |   18 |     18
+ b9        |   18 |     18
+ bh        |   18 |     18
+ bx        |   18 |     18
+ d3        |   18 |     18
+ fy        |   18 |     18
+ g2        |   18 |     18
+ i4        |   18 |     18
+ i6        |   18 |     18
+ i9        |   18 |     18
+ jw        |   18 |     18
+ lk        |   18 |     18
+ mb        |   18 |     18
+ mv        |   18 |     18
+ nd        |   18 |     18
+ nr        |   18 |     18
+ nt        |   18 |     18
+ t2        |   18 |     18
+ xf        |   18 |     18
+ xv        |   18 |     18
+ zc        |   18 |     18
+ zd        |   18 |     18
+ a7        |   17 |     17
+ bc        |   17 |     17
+ bd        |   17 |     17
+ ce        |   17 |     17
+ cf        |   17 |     17
+ cr        |   17 |     17
+ g9        |   17 |     17
+ j0        |   17 |     17
+ j5        |   17 |     17
+ mp        |   17 |     17
+ mr        |   17 |     17
+ mw        |   17 |     17
+ nk        |   17 |     17
+ no        |   17 |     17
+ o0        |   17 |     17
+ o4        |   17 |     17
+ s0        |   17 |     17
+ s1        |   17 |     17
+ t4        |   17 |     17
+ u9        |   17 |     17
+ vf        |   17 |     17
+ vx        |   17 |     17
+ x3        |   17 |     17
+ xi        |   17 |     17
+ xn        |   17 |     17
+ xz        |   17 |     17
+ zl        |   17 |     17
+ zn        |   17 |     17
+ a0        |   16 |     16
+ bu        |   16 |     16
+ bw        |   16 |     16
+ ci        |   16 |     16
+ ck        |   16 |     16
+ d0        |   16 |     16
+ d4        |   16 |     16
+ d6        |   16 |     16
+ f5        |   16 |     16
+ g1        |   16 |     16
+ gz        |   16 |     16
+ h4        |   16 |     16
+ jh        |   16 |     16
+ l4        |   16 |     16
+ lt        |   16 |     16
+ mg        |   16 |     16
+ mh        |   16 |     16
+ mo        |   16 |     16
+ ni        |   16 |     16
+ nl        |   16 |     16
+ nq        |   16 |     16
+ p2        |   16 |     16
+ u8        |   16 |     16
+ v9        |   16 |     16
+ vl        |   16 |     16
+ vo        |   16 |     16
+ xp        |   16 |     16
+ y3        |   16 |     16
+ y7        |   16 |     16
+ z7        |   16 |     16
+ za        |   16 |     16
+ zx        |   16 |     16
+ bf        |   15 |     15
+ bp        |   15 |     15
+ cc        |   15 |     15
+ g0        |   15 |     15
+ j2        |   15 |     15
+ j9        |   15 |     15
+ l6        |   15 |     15
+ le        |   15 |     15
+ ll        |   15 |     15
+ m8        |   15 |     15
+ ma        |   15 |     15
+ mu        |   15 |     15
+ nf        |   15 |     15
+ r6        |   15 |     15
+ s5        |   15 |     15
+ vd        |   15 |     15
+ vk        |   15 |     15
+ xa        |   15 |     15
+ xw        |   15 |     15
+ y2        |   15 |     15
+ z8        |   15 |     15
+ ze        |   15 |     15
+ zu        |   15 |     15
+ a6        |   14 |     14
+ bk        |   14 |     14
+ bt        |   14 |     14
+ c0        |   14 |     14
+ f8        |   14 |     14
+ g3        |   14 |     14
+ g4        |   14 |     14
+ g7        |   14 |     14
+ h6        |   14 |     14
+ h7        |   14 |     14
+ h9        |   14 |     14
+ i1        |   14 |     14
+ k1        |   14 |     14
+ k2        |   14 |     14
+ k6        |   14 |     14
+ k7        |   14 |     14
+ mc        |   14 |     14
+ nn        |   14 |     14
+ p9        |   14 |     14
+ u6        |   14 |     14
+ xd        |   14 |     14
+ z6        |   14 |     14
+ zk        |   14 |     14
+ a4        |   13 |     13
+ a9        |   13 |     13
+ bm        |   13 |     13
+ cz        |   13 |     13
+ f2        |   13 |     13
+ f3        |   13 |     13
+ f6        |   13 |     13
+ g6        |   13 |     13
+ h2        |   13 |     13
+ j1        |   13 |     13
+ k5        |   13 |     13
+ m1        |   13 |     13
+ mf        |   13 |     13
+ mq        |   13 |     13
+ np        |   13 |     13
+ nx        |   13 |     13
+ o5        |   13 |     13
+ p0        |   13 |     13
+ p1        |   13 |     13
+ s6        |   13 |     13
+ s9        |   13 |     13
+ v6        |   13 |     13
+ va        |   13 |     13
+ vc        |   13 |     13
+ xc        |   13 |     13
+ z0        |   13 |     13
+ c9        |   12 |     12
+ d1        |   12 |     12
+ h0        |   12 |     12
+ h1        |   12 |     12
+ j8        |   12 |     12
+ k4        |   12 |     12
+ l5        |   12 |     12
+ l9        |   12 |     12
+ m2        |   12 |     12
+ m6        |   12 |     12
+ m9        |   12 |     12
+ n7        |   12 |     12
+ nv        |   12 |     12
+ p3        |   12 |     12
+ vq        |   12 |     12
+ vy        |   12 |     12
+ x1        |   12 |     12
+ x2        |   12 |     12
+ z5        |   12 |     12
+ c1        |   11 |     11
+ c3        |   11 |     11
+ ct        |   11 |     11
+ f9        |   11 |     11
+ g5        |   11 |     11
+ j6        |   11 |     11
+ l8        |   11 |     11
+ n1        |   11 |     11
+ v7        |   11 |     11
+ vv        |   11 |     11
+ x5        |   11 |     11
+ x8        |   11 |     11
+ z2        |   11 |     11
+ b0        |   10 |     10
+ b2        |   10 |     10
+ b8        |   10 |     10
+ c6        |   10 |     10
+ f0        |   10 |     10
+ f7        |   10 |     10
+ h5        |   10 |     10
+ j3        |   10 |     10
+ j4        |   10 |     10
+ j7        |   10 |     10
+ l7        |   10 |     10
+ m0        |   10 |     10
+ m7        |   10 |     10
+ mm        |   10 |     10
+ mn        |   10 |     10
+ n8        |   10 |     10
+ v1        |   10 |     10
+ x0        |   10 |     10
+ x6        |   10 |     10
+ x7        |   10 |     10
+ x9        |   10 |     10
+ a8        |    9 |      9
+ b1        |    9 |      9
+ b4        |    9 |      9
+ b5        |    9 |      9
+ b6        |    9 |      9
+ ba        |    9 |      9
+ bs        |    9 |      9
+ c5        |    9 |      9
+ d5        |    9 |      9
+ k8        |    9 |      9
+ l0        |    9 |      9
+ m5        |    9 |      9
+ mk        |    9 |      9
+ ms        |    9 |      9
+ n3        |    9 |      9
+ n4        |    9 |      9
+ n6        |    9 |      9
+ ne        |    9 |      9
+ v0        |    9 |      9
+ v3        |    9 |      9
+ v5        |    9 |      9
+ v8        |    9 |      9
+ b3        |    8 |      8
+ b7        |    8 |      8
+ c2        |    8 |      8
+ c7        |    8 |      8
+ c8        |    8 |      8
+ d9        |    8 |      8
+ k3        |    8 |      8
+ l3        |    8 |      8
+ m3        |    8 |      8
+ m4        |    8 |      8
+ n0        |    8 |      8
+ n5        |    8 |      8
+ v4        |    8 |      8
+ x4        |    8 |      8
+ z1        |    8 |      8
+ z9        |    8 |      8
+ l2        |    7 |      7
+ s2        |    7 |      7
+ z4        |    7 |      7
+ 1l        |    6 |      6
+ 1o        |    6 |      6
+ 1t        |    6 |      6
+ 2e        |    6 |      6
+ 2o        |    6 |      6
+ c4        |    6 |      6
+ h3        |    6 |      6
+ n2        |    6 |      6
+ n9        |    6 |      6
+ v2        |    6 |      6
+ 2l        |    5 |      5
+ 2u        |    5 |      5
+ 3k        |    5 |      5
+ 4p        |    5 |      5
+ 18        |    4 |      4
+ 1a        |    4 |      4
+ 1i        |    4 |      4
+ 2s        |    4 |      4
+ 3q        |    4 |      4
+ 3y        |    4 |      4
+ 5y        |    4 |      4
+ 1f        |    3 |      3
+ 1h        |    3 |      3
+ 1m        |    3 |      3
+ 1p        |    3 |      3
+ 1s        |    3 |      3
+ 1v        |    3 |      3
+ 1x        |    3 |      3
+ 27        |    3 |      3
+ 2a        |    3 |      3
+ 2b        |    3 |      3
+ 2h        |    3 |      3
+ 2n        |    3 |      3
+ 2p        |    3 |      3
+ 2v        |    3 |      3
+ 2y        |    3 |      3
+ 3d        |    3 |      3
+ 3w        |    3 |      3
+ 3z        |    3 |      3
+ 4a        |    3 |      3
+ 4d        |    3 |      3
+ 4v        |    3 |      3
+ 4z        |    3 |      3
+ 5e        |    3 |      3
+ 5i        |    3 |      3
+ 5k        |    3 |      3
+ 5o        |    3 |      3
+ 5t        |    3 |      3
+ 6b        |    3 |      3
+ 6d        |    3 |      3
+ 6o        |    3 |      3
+ 6w        |    3 |      3
+ 7a        |    3 |      3
+ 7h        |    3 |      3
+ 7r        |    3 |      3
+ 93        |    3 |      3
+ 10        |    2 |      2
+ 12        |    2 |      2
+ 15        |    2 |      2
+ 16        |    2 |      2
+ 19        |    2 |      2
+ 1b        |    2 |      2
+ 1d        |    2 |      2
+ 1g        |    2 |      2
+ 1j        |    2 |      2
+ 1n        |    2 |      2
+ 1r        |    2 |      2
+ 1u        |    2 |      2
+ 1w        |    2 |      2
+ 1y        |    2 |      2
+ 20        |    2 |      2
+ 25        |    2 |      2
+ 2d        |    2 |      2
+ 2i        |    2 |      2
+ 2j        |    2 |      2
+ 2k        |    2 |      2
+ 2q        |    2 |      2
+ 2r        |    2 |      2
+ 2t        |    2 |      2
+ 2w        |    2 |      2
+ 2z        |    2 |      2
+ 3b        |    2 |      2
+ 3f        |    2 |      2
+ 3h        |    2 |      2
+ 3o        |    2 |      2
+ 3p        |    2 |      2
+ 3r        |    2 |      2
+ 3s        |    2 |      2
+ 3v        |    2 |      2
+ 42        |    2 |      2
+ 43        |    2 |      2
+ 4f        |    2 |      2
+ 4g        |    2 |      2
+ 4h        |    2 |      2
+ 4j        |    2 |      2
+ 4m        |    2 |      2
+ 4r        |    2 |      2
+ 4s        |    2 |      2
+ 4t        |    2 |      2
+ 4u        |    2 |      2
+ 5c        |    2 |      2
+ 5f        |    2 |      2
+ 5h        |    2 |      2
+ 5p        |    2 |      2
+ 5q        |    2 |      2
+ 5z        |    2 |      2
+ 6a        |    2 |      2
+ 6h        |    2 |      2
+ 6q        |    2 |      2
+ 6r        |    2 |      2
+ 6t        |    2 |      2
+ 6y        |    2 |      2
+ 70        |    2 |      2
+ 7c        |    2 |      2
+ 7g        |    2 |      2
+ 7k        |    2 |      2
+ 7o        |    2 |      2
+ 7u        |    2 |      2
+ 8j        |    2 |      2
+ 8w        |    2 |      2
+ 9f        |    2 |      2
+ 9y        |    2 |      2
+ copyright |    2 |      2
+ foo       |    1 |      3
+ bar       |    1 |      2
+ 0e        |    1 |      1
+ 0h        |    1 |      1
+ 0p        |    1 |      1
+ 0w        |    1 |      1
+ 0z        |    1 |      1
+ 11        |    1 |      1
+ 13        |    1 |      1
+ 14        |    1 |      1
+ 17        |    1 |      1
+ 1k        |    1 |      1
+ 1q        |    1 |      1
+ 1z        |    1 |      1
+ 24        |    1 |      1
+ 26        |    1 |      1
+ 28        |    1 |      1
+ 2f        |    1 |      1
+ 30        |    1 |      1
+ 345       |    1 |      1
+ 37        |    1 |      1
+ 39        |    1 |      1
+ 3a        |    1 |      1
+ 3e        |    1 |      1
+ 3g        |    1 |      1
+ 3i        |    1 |      1
+ 3m        |    1 |      1
+ 3t        |    1 |      1
+ 3u        |    1 |      1
+ 40        |    1 |      1
+ 41        |    1 |      1
+ 44        |    1 |      1
+ 45        |    1 |      1
+ 48        |    1 |      1
+ 4b        |    1 |      1
+ 4c        |    1 |      1
+ 4i        |    1 |      1
+ 4k        |    1 |      1
+ 4n        |    1 |      1
+ 4o        |    1 |      1
+ 4q        |    1 |      1
+ 4w        |    1 |      1
+ 4y        |    1 |      1
+ 51        |    1 |      1
+ 55        |    1 |      1
+ 56        |    1 |      1
+ 5a        |    1 |      1
+ 5d        |    1 |      1
+ 5g        |    1 |      1
+ 5j        |    1 |      1
+ 5l        |    1 |      1
+ 5s        |    1 |      1
+ 5u        |    1 |      1
+ 5x        |    1 |      1
+ 64        |    1 |      1
+ 68        |    1 |      1
+ 6c        |    1 |      1
+ 6f        |    1 |      1
+ 6g        |    1 |      1
+ 6i        |    1 |      1
+ 6k        |    1 |      1
+ 6n        |    1 |      1
+ 6p        |    1 |      1
+ 6s        |    1 |      1
+ 6u        |    1 |      1
+ 6x        |    1 |      1
+ 72        |    1 |      1
+ 7f        |    1 |      1
+ 7j        |    1 |      1
+ 7n        |    1 |      1
+ 7p        |    1 |      1
+ 7w        |    1 |      1
+ 7y        |    1 |      1
+ 7z        |    1 |      1
+ 80        |    1 |      1
+ 82        |    1 |      1
+ 85        |    1 |      1
+ 8d        |    1 |      1
+ 8i        |    1 |      1
+ 8l        |    1 |      1
+ 8n        |    1 |      1
+ 8p        |    1 |      1
+ 8t        |    1 |      1
+ 8x        |    1 |      1
+ 95        |    1 |      1
+ 97        |    1 |      1
+ 9a        |    1 |      1
+ 9e        |    1 |      1
+ 9h        |    1 |      1
+ 9r        |    1 |      1
+ 9w        |    1 |      1
+ qwerti    |    1 |      1
+(1146 rows)
+
+select reset_tsearch();
+NOTICE:  TSearch cache cleaned
+ reset_tsearch 
+---------------
+ 
+(1 row)
+
+select to_tsquery('default', 'skies & books');
+   to_tsquery   
+----------------
+ 'sky' & 'book'
+(1 row)
+
+select rank_cd(to_tsvector('Erosion It took the sea a thousand years,
+A thousand years to trace
+The granite features of this cliff
+In crag and scarp and base.
+It took the sea an hour one night
+An hour of storm to place
+The sculpture of these granite seams,
+Upon a woman s face. E.  J.  Pratt  (1882 1964)
+'), to_tsquery('sea&thousand&years'));
+ rank_cd 
+---------
+     1.2
+(1 row)
+
+select rank_cd(to_tsvector('Erosion It took the sea a thousand years,
+A thousand years to trace
+The granite features of this cliff
+In crag and scarp and base.
+It took the sea an hour one night
+An hour of storm to place
+The sculpture of these granite seams,
+Upon a woman s face. E.  J.  Pratt  (1882 1964)
+'), to_tsquery('granite&sea'));
+ rank_cd  
+----------
+ 0.880303
+(1 row)
+
+select rank_cd(to_tsvector('Erosion It took the sea a thousand years,
+A thousand years to trace
+The granite features of this cliff
+In crag and scarp and base.
+It took the sea an hour one night
+An hour of storm to place
+The sculpture of these granite seams,
+Upon a woman s face. E.  J.  Pratt  (1882 1964)
+'), to_tsquery('sea'));
+ rank_cd 
+---------
+       2
+(1 row)
+
+select get_covers(to_tsvector('Erosion It took the sea a thousand years,
+A thousand years to trace
+The granite features of this cliff
+In crag and scarp and base.
+It took the sea an hour one night
+An hour of storm to place
+The sculpture of these granite seams,
+Upon a woman s face. E.  J.  Pratt  (1882 1964)
+'), to_tsquery('sea&thousand&years'));
+                                                                                             get_covers                                                                                             
+----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
+ eros took {1 sea thousand year }1 {2 thousand year trace granit featur cliff crag scarp base took sea }2 hour one night hour storm place sculptur granit seam upon woman face e j pratt 1882 1964 
+(1 row)
+
+select get_covers(to_tsvector('Erosion It took the sea a thousand years,
+A thousand years to trace
+The granite features of this cliff
+In crag and scarp and base.
+It took the sea an hour one night
+An hour of storm to place
+The sculpture of these granite seams,
+Upon a woman s face. E.  J.  Pratt  (1882 1964)
+'), to_tsquery('granite&sea'));
+                                                                                                get_covers                                                                                                
+----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
+ eros took {1 sea thousand year thousand year trace {2 granit }1 featur cliff crag scarp base took {3 sea }2 hour one night hour storm place sculptur granit }3 seam upon woman face e j pratt 1882 1964 
+(1 row)
+
+select get_covers(to_tsvector('Erosion It took the sea a thousand years,
+A thousand years to trace
+The granite features of this cliff
+In crag and scarp and base.
+It took the sea an hour one night
+An hour of storm to place
+The sculpture of these granite seams,
+Upon a woman s face. E.  J.  Pratt  (1882 1964)
+'), to_tsquery('sea'));
+                                                                                             get_covers                                                                                             
+----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
+ eros took {1 sea }1 thousand year thousand year trace granit featur cliff crag scarp base took {2 sea }2 hour one night hour storm place sculptur granit seam upon woman face e j pratt 1882 1964 
+(1 row)
+
+select headline('Erosion It took the sea a thousand years,
+A thousand years to trace
+The granite features of this cliff
+In crag and scarp and base.
+It took the sea an hour one night
+An hour of storm to place
+The sculpture of these granite seams,
+Upon a woman s face. E.  J.  Pratt  (1882 1964)
+', to_tsquery('sea&thousand&years'));
+                                                       headline                                                        
+-----------------------------------------------------------------------------------------------------------------------
+ sea a thousand years,
+A thousand years to trace
+The granite features of this cliff
+(1 row)
+
+ 
+select headline('Erosion It took the sea a thousand years,
+A thousand years to trace
+The granite features of this cliff
+In crag and scarp and base.
+It took the sea an hour one night
+An hour of storm to place
+The sculpture of these granite seams,
+Upon a woman s face. E.  J.  Pratt  (1882 1964)
+', to_tsquery('granite&sea'));
+                                           headline                                           
+----------------------------------------------------------------------------------------------
+ sea an hour one night
+An hour of storm to place
+The sculpture of these granite
+(1 row)
+
+ 
+select headline('Erosion It took the sea a thousand years,
+A thousand years to trace
+The granite features of this cliff
+In crag and scarp and base.
+It took the sea an hour one night
+An hour of storm to place
+The sculpture of these granite seams,
+Upon a woman s face. E.  J.  Pratt  (1882 1964)
+', to_tsquery('sea'));
+                                         headline                                          
+-------------------------------------------------------------------------------------------
+ sea a thousand years,
+A thousand years to trace
+The granite features of this cliff
+(1 row)
+


diff --git a/contrib/tsearch2/gendict/Makefile.IN b/contrib/tsearch2/gendict/Makefile.IN

new file mode 100644 (file)

index 0000000..c13e496


--- /dev/null
+++ b/contrib/tsearch2/gendict/Makefile.IN
@@ -0,0 +1,12 @@
+subdir = contrib/CFG_DIR
+top_builddir = ../..
+include $(top_builddir)/src/Makefile.global
+
+MODULE_big = dict_CFG_MODNAME
+OBJS = CFG_OFILE
+DATA_built = dict_CFG_MODNAME.sql
+DOCS = README.CFG_MODNAME
+PG_CPPFLAGS =
+SHLIB_LINK = ../tsearch2/libtsearch2.a
+
+include $(top_srcdir)/contrib/contrib-global.mk


diff --git a/contrib/tsearch2/gendict/README.gendict b/contrib/tsearch2/gendict/README.gendict

new file mode 100644 (file)

index 0000000..e91f1b7


--- /dev/null
+++ b/contrib/tsearch2/gendict/README.gendict
@@ -0,0 +1,130 @@
+Gendict - generate dictionary templates for contrib/tsearch2 module.
+
+This utility aims to help people creating dictionary for contrib/tsearch v2
+module. Particularly, it has built-in support for snowball stemmers.
+
+Programming API to tsearch2 dictionaries is described in tsearch v2 
+documentation.
+
+
+Prerequisities:
+
+* PostgreSQL 7.3 and above.
+
+* You need tsearch2 module sources already compiled
+
+* Rights to install contrib modules
+
+Usage:
+
+    run config.sh without parameters to see options and arguments
+
+Usage:
+./config.sh -n DICTNAME ( [ -s [ -p PREFIX ] ] | [ -c CFILES ] [ -h HFILES ] [ -i ] ) [ -v ] [ -d DIR ] [ -C COMMENT ]
+    -v - be verbose
+    -d DIR - name of directory in PGSQL_SRC/contrib (default dict_DICTNAME)
+    -C COMMENT - dictionary comment
+Generate Snowball stemmer:
+./config.sh -n DICTNAME -s [ -p PREFIX ] [ -v ] [ -d DIR ] [ -C COMMENT ]
+    -s - generate Snowball wrapper
+    -p - prefix of Snowball's function, (default DICTNAME)
+Generate template dictionary:
+./config.sh -n DICTNAME [ -c CFILES ] [ -h HFILES ] [ -i ] [ -v ] [ -d DIR ] [ -C COMMENT ]
+    -c CFILES - source files, must be placed in contrib/tsearch2/gendict directory.
+                These files will be used in Makefile.
+    -h HFILES - header files, must be placed in contrib/tsearch2/gendict directory.
+                These files will be used in Makefile and subinclude.h
+    -i - dictionary has init method
+
+
+Example 1:
+
+   Create Portuguese stemmer
+ 
+   0. cd PGSQL_SRC/contrib/tsearch2/gendict
+
+   1. Obtain stem.{c,h} files for Portuguese
+
+      wget http://snowball.tartarus.org/portuguese/stem.c
+      wget http://snowball.tartarus.org/portuguese/stem.h
+   
+   2. Create template files for Portuguese
+
+      ./config.sh -n pt -s -p portuguese -v -C'Snowball stemmer for Portuguese'
+
+      Note, that argument for -p option should be *the same* as name of stemming
+      function in stem.c (without _stem)
+
+      A bunch of files will be generated and placed in PGSQL_SRC/contrib/dict_pt
+      directory.
+
+   3. Compile and install dictionary
+
+   cd PGSQL_SRC/contrib/dict_pt
+   make
+   make install
+
+   4. Test it 
+
+   Sample portuguese words with the stemmed forms are available
+        from http://snowball.tartarus.org/portuguese/stemmer.html
+
+   createdb testdict
+   psql testdict < /usr/local/pgsql/share/contrib/tsearch2.sql
+   psql testdict < /usr/local/pgsql/share/contrib/dict_pt.sql
+   psql -d testdict -c "select lexize('pt','bobagem');"
+    lexize  
+   ---------
+    {bobag}
+   (1 row)
+
+   Here is what I have in pg_ts_dict table
+
+   psql -d testdict -c "select * from pg_ts_dict where dict_name='pt';"
+    dict_name | dict_init | dict_initoption | dict_lexize |          dict_comment           
+   -----------+-----------+-----------------+-------------+---------------------------------
+    pt        |   7177806 |                 |     7159330 | Snowball stemmer for Portuguese
+   (1 row)
+
+ 
+        Note, that you have already installed dictionary and corresponding
+   entry in tsearch configuration and you may modify it using
+   plain SQL commands, for example, specify stop words.
+
+Example 2:
+
+      a) Simple template dictionary with init method 
+
+       ./config.sh -n wow -v -i -C WOW
+
+      b) Create simple template dict (without init method):
+   ./config.sh -n wow -v  -C WOW
+
+        The same as above, but dictionary will have not init method
+
+       Dictionaries obtained in a) and b) are fully working and ready
+       for use: 
+     a) lowercase input word and remove it if it is a stop word
+     b) recognizes any word
+
+      c) Simple template dictionary with source files (with init method):
+
+       ./config.sh -n wow -v -i -c a.c -h a.h -C WOW
+
+        Source files ( a.c ) must be placed in contrib/tsearch2/gendict directory.
+        These files will be used in Makefile.
+
+        Header files ( a.h ), must be placed in contrib/tsearch2/gendict directory.
+        These files will be used in Makefile and subinclude.h
+
+      d) Simple template dictionary with source files (without init method):
+
+   ./config.sh -n wow -v  -c a.c -h a.h -C WOW
+
+   The same as above, but dictionary will have not init method
+
+       After that you have sources in PGSQL_SRC/contrib/dict_wow and
+       you may edit them to create actual dictionary.
+
+  Please, check Tsearch2 home page (http://www.sai.msu.su/~megera/postgres/gist/tsearch/V2/)
+  for additional information about "Gendict tutorial" and dictionaries.
\ No newline at end of file


diff --git a/contrib/tsearch2/gendict/config.sh b/contrib/tsearch2/gendict/config.sh

new file mode 100755 (executable)

index 0000000..26bb542


--- /dev/null
+++ b/contrib/tsearch2/gendict/config.sh
@@ -0,0 +1,183 @@
+#!/bin/sh
+
+usage () {
+   echo Usage:
+   echo $0 -n DICTNAME  \( [ -s [ -p PREFIX ] ] \| [ -c CFILES ] [ -h HFILES ] [ -i ] \) [ -v ] [ -d DIR ] [ -C COMMENT ]
+   echo '    -v - be verbose'
+   echo '    -d DIR - name of directory in PGSQL_SRL/contrib (default dict_DICTNAME)'
+   echo '    -C COMMENT - dictionary comment' 
+   echo Generate Snowball stemmer:
+   echo $0 -n DICTNAME -s [ -p PREFIX ] [ -v ] [ -d DIR ] [ -C COMMENT ]
+   echo '    -s - generate Snowball wrapper'
+   echo "    -p - prefix of Snowball's function, (default DICTNAME)" 
+   echo Generate template dictionary:
+   echo $0 -n DICTNAME [ -c CFILES ] [ -h HFILES ] [ -i ] [ -v ] [ -d DIR ] [ -C COMMENT ]
+   echo '    -c CFILES - source files, must be placed in contrib/tsearch2/gendict directory.'
+   echo '                These files will be used in Makefile.'
+   echo '    -h HFILES - header files, must be placed in contrib/tsearch2/gendict directory.'
+   echo '                These files will be used in Makefile and subinclude.h'
+   echo '    -i - dictionary has init method'
+   exit 1;
+}
+
+dictname=
+stemmode=no
+verbose=no
+cfile=
+hfile=
+dir= 
+hasinit=no
+comment=
+prefix=
+
+while getopts n:c:C:h:d:p:vis opt
+do
+   case "$opt" in
+       v) verbose=yes;;
+       s) stemmode=yes;;
+       i) hasinit=yes;;
+       n) dictname="$OPTARG";;
+       c) cfile="$OPTARG";;
+       h) hfile="$OPTARG";;
+       d) dir="$OPTARG";;
+       C) comment="$OPTARG";;
+       p) prefix="$OPTARG";;
+       \?) usage;;
+   esac
+done
+
+[ ${#dictname} -eq 0 ] && usage
+
+dictname=`echo $dictname | tr '[:upper:]' '[:lower:]'`
+
+if [ $stemmode = "yes" ] ; then 
+   [ ${#prefix} -eq 0 ] && prefix=$dictname
+   hasinit=yes
+   cfile="stem.c"
+   hfile="stem.h"
+fi 
+
+[ ${#dir}   -eq 0 ] && dir="dict_$dictname"
+
+if [ ${#comment} -eq 0 ]; then
+   comment=null
+else
+   comment="'$comment'"
+fi
+
+ofile=
+for f in $cfile
+do
+   f=` echo $f | sed 's#c$#o#'`
+   ofile="$ofile $f"
+done
+
+if [ $stemmode = "yes" ] ; then
+   ofile="$ofile dict_snowball.o"
+else
+   ofile="$ofile dict_tmpl.o"
+fi
+
+if [ $verbose = "yes" ]; then
+   echo Dictname: "'"$dictname"'"
+   echo Snowball stemmer: $stemmode
+   echo Has init method: $hasinit
+   [ $stemmode = "yes" ] && echo Function prefix: $prefix 
+   echo Source files: $cfile
+   echo Header files: $hfile
+   echo Object files: $ofile
+   echo Comment: $comment
+   echo Directory: ../../$dir
+fi
+
+
+[ $verbose = "yes" ] && echo -n 'Build directory...  '
+if [ ! -d ../../$dir ]; then
+   if ! mkdir ../../$dir ; then 
+       echo "Can't create directory ../../$dir"
+       exit 1
+   fi 
+fi
+[ $verbose = "yes" ] && echo ok
+
+
+[ $verbose = "yes" ] && echo -n 'Build Makefile...  '
+sed s#CFG_DIR#$dir# < Makefile.IN | sed s#CFG_MODNAME#$dictname# | sed "s#CFG_OFILE#$ofile#" > ../../$dir/Makefile.tmp
+if [ $stemmode = "yes" ] ; then
+   sed "s#^PG_CPPFLAGS.*\$#PG_CPPFLAGS = -I../tsearch2/snowball -I../tsearch2#" < ../../$dir/Makefile.tmp >  ../../$dir/Makefile 
+else
+   sed "s#^PG_CPPFLAGS.*\$#PG_CPPFLAGS = -I../tsearch2#" < ../../$dir/Makefile.tmp >  ../../$dir/Makefile 
+fi
+rm ../../$dir/Makefile.tmp
+[ $verbose = "yes" ] && echo ok
+
+
+[ $verbose = "yes" ] && echo -n Build dict_$dictname'.sql.in...  '
+if [ $hasinit = "yes" ]; then
+   sed s#CFG_MODNAME#$dictname# < sql.IN | sed "s#CFG_COMMENT#$comment#" | sed s#^HASINIT## | sed 's#^NOINIT.*$##' > ../../$dir/dict_$dictname.sql.in.tmp
+   if [ $stemmode = "yes" ] ; then
+       sed s#^ISSNOWBALL## < ../../$dir/dict_$dictname.sql.in.tmp | sed s#^NOSNOWBALL.*\$## > ../../$dir/dict_$dictname.sql.in
+   else
+       sed s#^NOSNOWBALL## < ../../$dir/dict_$dictname.sql.in.tmp | sed s#^ISSNOWBALL.*\$## > ../../$dir/dict_$dictname.sql.in
+   fi
+   rm ../../$dir/dict_$dictname.sql.in.tmp 
+else 
+   sed s#CFG_MODNAME#$dictname# < sql.IN | sed "s#CFG_COMMENT#$comment#" | sed s#^NOINIT## | sed 's#^HASINIT.*$##' | sed s#^NOSNOWBALL## | sed s#^ISSNOWBALL.*\$## > ../../$dir/dict_$dictname.sql.in
+fi
+[ $verbose = "yes" ] && echo ok
+
+
+
+if [ ${#cfile} -ne 0 ] || [ ${#hfile} -ne 0 ] ; then
+   [ $verbose = "yes" ] && echo -n 'Copy source and header files...  '
+   if [ ${#cfile} -ne 0 ] ; then
+       if ! cp $cfile ../../$dir ; then 
+           echo "Cant cp all or one of files: $cfile"
+           exit 1
+       fi
+   fi
+   if [ ${#hfile} -ne 0 ] ; then 
+       if ! cp $hfile ../../$dir ; then 
+               echo "Cant cp all or one of files: $hfile"
+           exit 1
+       fi
+   fi
+   [ $verbose = "yes" ] && echo ok
+fi
+
+
+[ $verbose = "yes" ] && echo -n 'Build sub-include header...  '
+echo -n > ../../$dir/subinclude.h 
+for i in $hfile
+do
+   echo "#include \"$i\"" >> ../../$dir/subinclude.h
+done
+[ $verbose = "yes" ] && echo ok
+
+
+if  [ $stemmode = "yes" ] ; then 
+   [ $verbose = "yes" ] && echo -n 'Build Snowball stemmer...  '
+   sed s#CFG_MODNAME#$dictname#g < dict_snowball.c.IN | sed s#CFG_PREFIX#$prefix#g > ../../$dir/dict_snowball.c
+else
+   [ $verbose = "yes" ] && echo -n 'Build dictinonary...  '
+   sed s#CFG_MODNAME#$dictname#g < dict_tmpl.c.IN > ../../$dir/dict_tmpl.c.tmp
+   if [ $hasinit = "yes" ]; then
+       sed s#^HASINIT## <  ../../$dir/dict_tmpl.c.tmp | sed 's#^NOINIT.*$##' > ../../$dir/dict_tmpl.c
+   else 
+       sed s#^HASINIT.*\$## <  ../../$dir/dict_tmpl.c.tmp | sed 's#^NOINIT##' > ../../$dir/dict_tmpl.c
+   fi
+   rm ../../$dir/dict_tmpl.c.tmp
+fi 
+[ $verbose = "yes" ] && echo ok
+
+
+[ $verbose = "yes" ] && echo -n "Build README.$dictname...  "
+if  [ $stemmode = "yes" ] ; then
+   echo "Autogenerated Snowball's wrapper for $prefix" > ../../$dir/README.$dictname
+else
+   echo "Autogenerated template for $dictname" > ../../$dir/README.$dictname
+fi
+[ $verbose = "yes" ] && echo ok
+
+echo All is done
+


diff --git a/contrib/tsearch2/gendict/dict_snowball.c.IN b/contrib/tsearch2/gendict/dict_snowball.c.IN

new file mode 100644 (file)

index 0000000..10ef6f1


--- /dev/null
+++ b/contrib/tsearch2/gendict/dict_snowball.c.IN
@@ -0,0 +1,52 @@
+/* 
+ * example of Snowball dictionary
+ * http://snowball.tartarus.org/ 
+ * Teodor Sigaev 
+ */
+#include 
+#include 
+
+#include "postgres.h"
+
+#include "dict.h"
+#include "common.h"
+#include "snowball/header.h"
+#include "subinclude.h"
+
+typedef struct {
+   struct SN_env *z;
+   StopList    stoplist;
+   int (*stem)(struct SN_env * z);
+} DictSnowball;
+
+
+PG_FUNCTION_INFO_V1(dinit_CFG_MODNAME);
+Datum dinit_CFG_MODNAME(PG_FUNCTION_ARGS);
+
+Datum 
+dinit_CFG_MODNAME(PG_FUNCTION_ARGS) {
+   DictSnowball    *d = (DictSnowball*)malloc( sizeof(DictSnowball) );
+
+   if ( !d )
+       elog(ERROR, "No memory");
+   memset(d,0,sizeof(DictSnowball));
+   d->stoplist.wordop=lowerstr;
+       
+   if ( !PG_ARGISNULL(0) && PG_GETARG_POINTER(0)!=NULL ) {
+       text       *in = PG_GETARG_TEXT_P(0);
+       readstoplist(in, &(d->stoplist));
+       sortstoplist(&(d->stoplist));
+       PG_FREE_IF_COPY(in, 0);
+   }
+
+   d->z = CFG_PREFIX_create_env();
+   if (!d->z) {
+       freestoplist(&(d->stoplist));
+       elog(ERROR,"No memory");
+   }
+   d->stem=CFG_PREFIX_stem;
+
+   PG_RETURN_POINTER(d);
+}
+
+


diff --git a/contrib/tsearch2/gendict/dict_tmpl.c.IN b/contrib/tsearch2/gendict/dict_tmpl.c.IN

new file mode 100644 (file)

index 0000000..10c0381


--- /dev/null
+++ b/contrib/tsearch2/gendict/dict_tmpl.c.IN
@@ -0,0 +1,64 @@
+/* 
+ * example of dictionary 
+ * Teodor Sigaev 
+ */
+#include 
+#include 
+#include 
+
+#include "postgres.h"
+
+#include "dict.h"
+#include "common.h"
+
+#include "subinclude.h"
+
+HASINIT typedef struct {
+HASINIT    StopList    stoplist;
+HASINIT } DictExample;
+
+
+HASINIT PG_FUNCTION_INFO_V1(dinit_CFG_MODNAME);
+HASINIT Datum dinit_CFG_MODNAME(PG_FUNCTION_ARGS);
+
+HASINIT Datum 
+HASINIT dinit_CFG_MODNAME(PG_FUNCTION_ARGS) {
+HASINIT    DictExample *d = (DictExample*)malloc( sizeof(DictExample) );
+HASINIT 
+HASINIT    if ( !d )
+HASINIT        elog(ERROR, "No memory");
+HASINIT    memset(d,0,sizeof(DictExample));
+HASINIT 
+HASINIT    d->stoplist.wordop=lowerstr;
+HASINIT    
+HASINIT    /* Your INIT code */
+HASINIT    
+HASINIT    if ( !PG_ARGISNULL(0) && PG_GETARG_POINTER(0)!=NULL ) {
+HASINIT        text       *in = PG_GETARG_TEXT_P(0);
+HASINIT        readstoplist(in, &(d->stoplist));
+HASINIT        sortstoplist(&(d->stoplist));
+HASINIT        PG_FREE_IF_COPY(in, 0);
+HASINIT    }
+HASINIT 
+HASINIT    PG_RETURN_POINTER(d);
+HASINIT }
+
+PG_FUNCTION_INFO_V1(dlexize_CFG_MODNAME);
+Datum dlexize_CFG_MODNAME(PG_FUNCTION_ARGS);
+Datum
+dlexize_CFG_MODNAME(PG_FUNCTION_ARGS) {
+HASINIT    DictExample *d = (DictExample*)PG_GETARG_POINTER(0);
+   char       *in = (char*)PG_GETARG_POINTER(1);
+   char *txt = pnstrdup(in, PG_GETARG_INT32(2));
+   char    **res=palloc(sizeof(char*)*2);
+
+   /* Your INIT dictionary code */
+HASINIT    if ( *txt=='\0' || searchstoplist(&(d->stoplist),txt) ) {
+HASINIT        pfree(txt);
+HASINIT        res[0]=NULL;
+HASINIT    } else 
+       res[0]=txt;
+   res[1]=NULL;
+
+   PG_RETURN_POINTER(res);
+}


diff --git a/contrib/tsearch2/gendict/sql.IN b/contrib/tsearch2/gendict/sql.IN

new file mode 100644 (file)

index 0000000..ff0d842


--- /dev/null
+++ b/contrib/tsearch2/gendict/sql.IN
@@ -0,0 +1,26 @@
+SET search_path = public;
+BEGIN;
+
+HASINIT create function dinit_CFG_MODNAME(text)
+HASINIT         returns internal
+HASINIT         as 'MODULE_PATHNAME'
+HASINIT         language 'C';
+
+NOSNOWBALL create function dlexize_CFG_MODNAME(internal,internal,int4)
+NOSNOWBALL        returns internal
+NOSNOWBALL        as 'MODULE_PATHNAME'
+NOSNOWBALL        language 'C'
+NOSNOWBALL        with (isstrict);
+
+insert into pg_ts_dict select
+        'CFG_MODNAME',
+HASINIT        (select oid from pg_proc where proname='dinit_CFG_MODNAME'),
+NOINIT        null,
+        null,
+ISSNOWBALL        (select oid from pg_proc where proname='snb_lexize'),
+NOSNOWBALL        (select oid from pg_proc where proname='dlexize_CFG_MODNAME'),
+        CFG_COMMENT
+;
+
+
+END;


diff --git a/contrib/tsearch2/gistidx.c b/contrib/tsearch2/gistidx.c

new file mode 100644 (file)

index 0000000..5a34f74


--- /dev/null
+++ b/contrib/tsearch2/gistidx.c
@@ -0,0 +1,686 @@
+#include "postgres.h"
+
+#include 
+
+#include "access/gist.h"
+#include "access/itup.h"
+#include "access/rtree.h"
+#include "utils/elog.h"
+#include "utils/palloc.h"
+#include "utils/array.h"
+#include "utils/builtins.h"
+#include "storage/bufpage.h"
+#include "access/tuptoaster.h"
+
+#include "tsvector.h"
+#include "query.h"
+#include "gistidx.h"
+#include "crc32.h"
+
+PG_FUNCTION_INFO_V1(gtsvector_in);
+Datum      gtsvector_in(PG_FUNCTION_ARGS);
+
+PG_FUNCTION_INFO_V1(gtsvector_out);
+Datum      gtsvector_out(PG_FUNCTION_ARGS);
+
+PG_FUNCTION_INFO_V1(gtsvector_compress);
+Datum      gtsvector_compress(PG_FUNCTION_ARGS);
+
+PG_FUNCTION_INFO_V1(gtsvector_decompress);
+Datum      gtsvector_decompress(PG_FUNCTION_ARGS);
+
+PG_FUNCTION_INFO_V1(gtsvector_consistent);
+Datum      gtsvector_consistent(PG_FUNCTION_ARGS);
+
+PG_FUNCTION_INFO_V1(gtsvector_union);
+Datum      gtsvector_union(PG_FUNCTION_ARGS);
+
+PG_FUNCTION_INFO_V1(gtsvector_same);
+Datum      gtsvector_same(PG_FUNCTION_ARGS);
+
+PG_FUNCTION_INFO_V1(gtsvector_penalty);
+Datum      gtsvector_penalty(PG_FUNCTION_ARGS);
+
+PG_FUNCTION_INFO_V1(gtsvector_picksplit);
+Datum      gtsvector_picksplit(PG_FUNCTION_ARGS);
+
+#define GETENTRY(vec,pos) ((GISTTYPE *) DatumGetPointer(((GISTENTRY *) VARDATA(vec))[(pos)].key))
+#define SUMBIT(val) (       \
+   GETBITBYTE(val,0) + \
+   GETBITBYTE(val,1) + \
+   GETBITBYTE(val,2) + \
+   GETBITBYTE(val,3) + \
+   GETBITBYTE(val,4) + \
+   GETBITBYTE(val,5) + \
+   GETBITBYTE(val,6) + \
+   GETBITBYTE(val,7)   \
+)
+
+
+Datum
+gtsvector_in(PG_FUNCTION_ARGS)
+{
+   elog(ERROR, "Not implemented");
+   PG_RETURN_DATUM(0);
+}
+
+Datum
+gtsvector_out(PG_FUNCTION_ARGS)
+{
+   elog(ERROR, "Not implemented");
+   PG_RETURN_DATUM(0);
+}
+
+static int
+compareint(const void *a, const void *b)
+{
+   if (*((int4 *) a) == *((int4 *) b))
+       return 0;
+   return (*((int4 *) a) > *((int4 *) b)) ? 1 : -1;
+}
+
+static int
+uniqueint(int4 *a, int4 l)
+{
+   int4       *ptr,
+              *res;
+
+   if (l == 1)
+       return l;
+
+   ptr = res = a;
+
+   qsort((void *) a, l, sizeof(int4), compareint);
+
+   while (ptr - a < l)
+       if (*ptr != *res)
+           *(++res) = *ptr++;
+       else
+           ptr++;
+   return res + 1 - a;
+}
+
+static void
+makesign(BITVECP sign, GISTTYPE * a)
+{
+   int4        k,
+               len = ARRNELEM(a);
+   int4       *ptr = GETARR(a);
+
+   MemSet((void *) sign, 0, sizeof(BITVEC));
+   for (k = 0; k < len; k++)
+       HASH(sign, ptr[k]);
+}
+
+Datum
+gtsvector_compress(PG_FUNCTION_ARGS)
+{
+   GISTENTRY  *entry = (GISTENTRY *) PG_GETARG_POINTER(0);
+   GISTENTRY  *retval = entry;
+
+   if (entry->leafkey)
+   {                           /* tsvector */
+       GISTTYPE   *res;
+       tsvector       *toastedval = (tsvector *) DatumGetPointer(entry->key);
+       tsvector       *val = (tsvector *) DatumGetPointer(PG_DETOAST_DATUM(entry->key));
+       int4        len;
+       int4       *arr;
+       WordEntry  *ptr = ARRPTR(val);
+       char       *words = STRPTR(val);
+
+       len = CALCGTSIZE(ARRKEY, val->size);
+       res = (GISTTYPE *) palloc(len);
+       res->len = len;
+       res->flag = ARRKEY;
+       arr = GETARR(res);
+       len = val->size;
+       while (len--)
+       {
+           *arr = crc32_sz((uint8 *) &words[ptr->pos], ptr->len);
+           arr++;
+           ptr++;
+       }
+
+       len = uniqueint(GETARR(res), val->size);
+       if (len != val->size)
+       {
+           /*
+            * there is a collision of hash-function; len is always less
+            * than val->size
+            */
+           len = CALCGTSIZE(ARRKEY, len);
+           res = (GISTTYPE *) repalloc((void *) res, len);
+           res->len = len;
+       }
+       if (val != toastedval)
+           pfree(val);
+
+       /* make signature, if array is too long */
+       if (res->len > TOAST_INDEX_TARGET)
+       {
+           GISTTYPE   *ressign;
+
+           len = CALCGTSIZE(SIGNKEY, 0);
+           ressign = (GISTTYPE *) palloc(len);
+           ressign->len = len;
+           ressign->flag = SIGNKEY;
+           makesign(GETSIGN(ressign), res);
+           pfree(res);
+           res = ressign;
+       }
+
+       retval = (GISTENTRY *) palloc(sizeof(GISTENTRY));
+       gistentryinit(*retval, PointerGetDatum(res),
+                     entry->rel, entry->page,
+                     entry->offset, res->len, FALSE);
+   }
+   else if (ISSIGNKEY(DatumGetPointer(entry->key)) &&
+            !ISALLTRUE(DatumGetPointer(entry->key)))
+   {
+       int4        i,
+                   len;
+       GISTTYPE   *res;
+       BITVECP     sign = GETSIGN(DatumGetPointer(entry->key));
+
+       LOOPBYTE(
+                if ((sign[i] & 0xff) != 0xff)
+                PG_RETURN_POINTER(retval);
+       );
+
+       len = CALCGTSIZE(SIGNKEY | ALLISTRUE, 0);
+       res = (GISTTYPE *) palloc(len);
+       res->len = len;
+       res->flag = SIGNKEY | ALLISTRUE;
+
+       retval = (GISTENTRY *) palloc(sizeof(GISTENTRY));
+       gistentryinit(*retval, PointerGetDatum(res),
+                     entry->rel, entry->page,
+                     entry->offset, res->len, FALSE);
+   }
+   PG_RETURN_POINTER(retval);
+}
+
+Datum
+gtsvector_decompress(PG_FUNCTION_ARGS)
+{
+   GISTENTRY  *entry = (GISTENTRY *) PG_GETARG_POINTER(0);
+   GISTTYPE   *key = (GISTTYPE *) DatumGetPointer(PG_DETOAST_DATUM(entry->key));
+
+   if (key != (GISTTYPE *) DatumGetPointer(entry->key))
+   {
+       GISTENTRY  *retval = (GISTENTRY *) palloc(sizeof(GISTENTRY));
+
+       gistentryinit(*retval, PointerGetDatum(key),
+                     entry->rel, entry->page,
+                     entry->offset, key->len, FALSE);
+
+       PG_RETURN_POINTER(retval);
+   }
+
+   PG_RETURN_POINTER(entry);
+}
+
+typedef struct
+{
+   int4       *arrb;
+   int4       *arre;
+}  CHKVAL;
+
+/*
+ * is there value 'val' in array or not ?
+ */
+static bool
+checkcondition_arr(void *checkval, ITEM * val)
+{
+   int4       *StopLow = ((CHKVAL *) checkval)->arrb;
+   int4       *StopHigh = ((CHKVAL *) checkval)->arre;
+   int4       *StopMiddle;
+
+   /* Loop invariant: StopLow <= val < StopHigh */
+
+   while (StopLow < StopHigh)
+   {
+       StopMiddle = StopLow + (StopHigh - StopLow) / 2;
+       if (*StopMiddle == val->val)
+           return (true);
+       else if (*StopMiddle < val->val)
+           StopLow = StopMiddle + 1;
+       else
+           StopHigh = StopMiddle;
+   }
+
+   return (false);
+}
+
+static bool
+checkcondition_bit(void *checkval, ITEM * val)
+{
+   return GETBIT(checkval, HASHVAL(val->val));
+}
+
+Datum
+gtsvector_consistent(PG_FUNCTION_ARGS)
+{
+   QUERYTYPE  *query = (QUERYTYPE *) PG_GETARG_POINTER(1);
+   GISTTYPE   *key = (GISTTYPE *) DatumGetPointer(
+                               ((GISTENTRY *) PG_GETARG_POINTER(0))->key
+   );
+
+   if (!query->size)
+       PG_RETURN_BOOL(false);
+
+   if (ISSIGNKEY(key))
+   {
+       if (ISALLTRUE(key))
+           PG_RETURN_BOOL(true);
+
+       PG_RETURN_BOOL(TS_execute(
+                              GETQUERY(query),
+                              (void *) GETSIGN(key), false,
+                              checkcondition_bit
+                              ));
+   }
+   else
+   {                           /* only leaf pages */
+       CHKVAL      chkval;
+
+       chkval.arrb = GETARR(key);
+       chkval.arre = chkval.arrb + ARRNELEM(key);
+       PG_RETURN_BOOL(TS_execute(
+                              GETQUERY(query),
+                              (void *) &chkval, true,
+                              checkcondition_arr
+                              ));
+   }
+}
+
+static int4
+unionkey(BITVECP sbase, GISTTYPE * add)
+{
+   int4        i;
+
+   if (ISSIGNKEY(add))
+   {
+       BITVECP     sadd = GETSIGN(add);
+
+       if (ISALLTRUE(add))
+           return 1;
+
+       LOOPBYTE(
+                sbase[i] |= sadd[i];
+       );
+   }
+   else
+   {
+       int4       *ptr = GETARR(add);
+
+       for (i = 0; i < ARRNELEM(add); i++)
+           HASH(sbase, ptr[i]);
+   }
+   return 0;
+}
+
+
+Datum
+gtsvector_union(PG_FUNCTION_ARGS)
+{
+   bytea      *entryvec = (bytea *) PG_GETARG_POINTER(0);
+   int        *size = (int *) PG_GETARG_POINTER(1);
+   BITVEC      base;
+   int4        len = (VARSIZE(entryvec) - VARHDRSZ) / sizeof(GISTENTRY);
+   int4        i;
+   int4        flag = 0;
+   GISTTYPE   *result;
+
+   MemSet((void *) base, 0, sizeof(BITVEC));
+   for (i = 0; i < len; i++)
+   {
+       if (unionkey(base, GETENTRY(entryvec, i)))
+       {
+           flag = ALLISTRUE;
+           break;
+       }
+   }
+
+   flag |= SIGNKEY;
+   len = CALCGTSIZE(flag, 0);
+   result = (GISTTYPE *) palloc(len);
+   *size = result->len = len;
+   result->flag = flag;
+   if (!ISALLTRUE(result))
+       memcpy((void *) GETSIGN(result), (void *) base, sizeof(BITVEC));
+
+   PG_RETURN_POINTER(result);
+}
+
+Datum
+gtsvector_same(PG_FUNCTION_ARGS)
+{
+   GISTTYPE   *a = (GISTTYPE *) PG_GETARG_POINTER(0);
+   GISTTYPE   *b = (GISTTYPE *) PG_GETARG_POINTER(1);
+   bool       *result = (bool *) PG_GETARG_POINTER(2);
+
+   if (ISSIGNKEY(a))
+   {                           /* then b also ISSIGNKEY */
+       if (ISALLTRUE(a) && ISALLTRUE(b))
+           *result = true;
+       else if (ISALLTRUE(a))
+           *result = false;
+       else if (ISALLTRUE(b))
+           *result = false;
+       else
+       {
+           int4        i;
+           BITVECP     sa = GETSIGN(a),
+                       sb = GETSIGN(b);
+
+           *result = true;
+           LOOPBYTE(
+                    if (sa[i] != sb[i])
+                    {
+               *result = false;
+               break;
+           }
+           );
+       }
+   }
+   else
+   {                           /* a and b ISARRKEY */
+       int4        lena = ARRNELEM(a),
+                   lenb = ARRNELEM(b);
+
+       if (lena != lenb)
+           *result = false;
+       else
+       {
+           int4       *ptra = GETARR(a),
+                      *ptrb = GETARR(b);
+           int4        i;
+
+           *result = true;
+           for (i = 0; i < lena; i++)
+               if (ptra[i] != ptrb[i])
+               {
+                   *result = false;
+                   break;
+               }
+       }
+   }
+
+   PG_RETURN_POINTER(result);
+}
+
+static int4
+sizebitvec(BITVECP sign)
+{
+   int4        size = 0,
+               i;
+
+   LOOPBYTE(
+       size += SUMBIT(*(char *) sign);
+       sign = (BITVECP) (((char *) sign) + 1);
+   );
+   return size;
+}
+
+static int
+hemdistsign(BITVECP  a, BITVECP b) {
+   int i,dist=0;
+
+   LOOPBIT(
+       if ( GETBIT(a,i) != GETBIT(b,i) )
+           dist++;
+   );
+   return dist;
+}
+
+static int
+hemdist(GISTTYPE   *a, GISTTYPE   *b) {
+   if ( ISALLTRUE(a) ) {
+       if (ISALLTRUE(b))
+           return 0;
+       else
+           return SIGLENBIT-sizebitvec(GETSIGN(b));
+   } else if (ISALLTRUE(b))
+       return SIGLENBIT-sizebitvec(GETSIGN(a));
+
+   return hemdistsign( GETSIGN(a), GETSIGN(b) );
+}
+
+Datum
+gtsvector_penalty(PG_FUNCTION_ARGS)
+{
+   GISTENTRY  *origentry = (GISTENTRY *) PG_GETARG_POINTER(0); /* always ISSIGNKEY */
+   GISTENTRY  *newentry = (GISTENTRY *) PG_GETARG_POINTER(1);
+   float      *penalty = (float *) PG_GETARG_POINTER(2);
+   GISTTYPE   *origval = (GISTTYPE *) DatumGetPointer(origentry->key);
+   GISTTYPE   *newval = (GISTTYPE *) DatumGetPointer(newentry->key);
+   BITVECP     orig = GETSIGN(origval);
+
+   *penalty = 0.0;
+
+   if (ISARRKEY(newval)) {
+       BITVEC sign;
+       makesign(sign, newval);
+
+       if ( ISALLTRUE(origval) ) 
+           *penalty=((float)(SIGLENBIT-sizebitvec(sign)))/(float)(SIGLENBIT+1);
+       else 
+           *penalty=hemdistsign(sign,orig);
+   } else {
+       *penalty=hemdist(origval,newval);
+   }
+   PG_RETURN_POINTER(penalty);
+}
+
+typedef struct
+{
+   bool        allistrue;
+   BITVEC      sign;
+}  CACHESIGN;
+
+static void
+fillcache(CACHESIGN * item, GISTTYPE * key)
+{
+   item->allistrue = false;
+   if (ISARRKEY(key))
+       makesign(item->sign, key);
+   else if (ISALLTRUE(key))
+       item->allistrue = true;
+   else
+       memcpy((void *) item->sign, (void *) GETSIGN(key), sizeof(BITVEC));
+}
+
+#define WISH_F(a,b,c) (double)( -(double)(((a)-(b))*((a)-(b))*((a)-(b)))*(c) )
+typedef struct
+{
+   OffsetNumber pos;
+   int4        cost;
+} SPLITCOST;
+
+static int
+comparecost(const void *a, const void *b)
+{
+   if (((SPLITCOST *) a)->cost == ((SPLITCOST *) b)->cost)
+       return 0;
+   else
+       return (((SPLITCOST *) a)->cost > ((SPLITCOST *) b)->cost) ? 1 : -1;
+}
+
+
+static int
+hemdistcache(CACHESIGN   *a, CACHESIGN   *b) {
+   if ( a->allistrue ) {
+       if (b->allistrue)
+           return 0;
+       else
+           return SIGLENBIT-sizebitvec(b->sign);
+   } else if (b->allistrue)
+       return SIGLENBIT-sizebitvec(a->sign);
+
+   return hemdistsign( a->sign, b->sign );
+}
+
+Datum
+gtsvector_picksplit(PG_FUNCTION_ARGS)
+{
+   bytea      *entryvec = (bytea *) PG_GETARG_POINTER(0);
+   GIST_SPLITVEC *v = (GIST_SPLITVEC *) PG_GETARG_POINTER(1);
+   OffsetNumber k,
+               j;
+   GISTTYPE   *datum_l,
+              *datum_r;
+   BITVECP     union_l,
+               union_r;
+   int4        size_alpha,
+               size_beta;
+   int4        size_waste,
+               waste = -1;
+   int4        nbytes;
+   OffsetNumber seed_1 = 0,
+               seed_2 = 0;
+   OffsetNumber *left,
+              *right;
+   OffsetNumber maxoff;
+   BITVECP     ptr;
+   int         i;
+   CACHESIGN  *cache;
+   SPLITCOST  *costvector;
+
+   maxoff = ((VARSIZE(entryvec) - VARHDRSZ) / sizeof(GISTENTRY)) - 2;
+   nbytes = (maxoff + 2) * sizeof(OffsetNumber);
+   v->spl_left = (OffsetNumber *) palloc(nbytes);
+   v->spl_right = (OffsetNumber *) palloc(nbytes);
+
+   cache = (CACHESIGN *) palloc(sizeof(CACHESIGN) * (maxoff + 2));
+   fillcache(&cache[FirstOffsetNumber], GETENTRY(entryvec, FirstOffsetNumber));
+
+   for (k = FirstOffsetNumber; k < maxoff; k = OffsetNumberNext(k)) {
+       for (j = OffsetNumberNext(k); j <= maxoff; j = OffsetNumberNext(j)) {
+           if (k == FirstOffsetNumber)
+               fillcache(&cache[j], GETENTRY(entryvec, j));
+
+           size_waste=hemdistcache(&(cache[j]),&(cache[k]));
+           if (size_waste > waste) {
+               waste = size_waste;
+               seed_1 = k;
+               seed_2 = j;
+           }
+       }
+   }
+
+   left = v->spl_left;
+   v->spl_nleft = 0;
+   right = v->spl_right;
+   v->spl_nright = 0;
+
+   if (seed_1 == 0 || seed_2 == 0) {
+       seed_1 = 1;
+       seed_2 = 2;
+   }
+
+   /* form initial .. */
+   if (cache[seed_1].allistrue) {
+       datum_l = (GISTTYPE *) palloc(CALCGTSIZE(SIGNKEY | ALLISTRUE, 0));
+       datum_l->len = CALCGTSIZE(SIGNKEY | ALLISTRUE, 0);
+       datum_l->flag = SIGNKEY | ALLISTRUE;
+   } else {
+       datum_l = (GISTTYPE *) palloc(CALCGTSIZE(SIGNKEY, 0));
+       datum_l->len = CALCGTSIZE(SIGNKEY, 0);
+       datum_l->flag = SIGNKEY;
+       memcpy((void *) GETSIGN(datum_l), (void *) cache[seed_1].sign, sizeof(BITVEC));
+   }
+   if (cache[seed_2].allistrue) {
+       datum_r = (GISTTYPE *) palloc(CALCGTSIZE(SIGNKEY | ALLISTRUE, 0));
+       datum_r->len = CALCGTSIZE(SIGNKEY | ALLISTRUE, 0);
+       datum_r->flag = SIGNKEY | ALLISTRUE;
+   } else {
+       datum_r = (GISTTYPE *) palloc(CALCGTSIZE(SIGNKEY, 0));
+       datum_r->len = CALCGTSIZE(SIGNKEY, 0);
+       datum_r->flag = SIGNKEY;
+       memcpy((void *) GETSIGN(datum_r), (void *) cache[seed_2].sign, sizeof(BITVEC));
+   }
+
+   union_l=GETSIGN(datum_l);
+   union_r=GETSIGN(datum_r);
+   maxoff = OffsetNumberNext(maxoff);
+   fillcache(&cache[maxoff], GETENTRY(entryvec, maxoff));
+   /* sort before ... */
+   costvector = (SPLITCOST *) palloc(sizeof(SPLITCOST) * maxoff);
+   for (j = FirstOffsetNumber; j <= maxoff; j = OffsetNumberNext(j)) {
+       costvector[j - 1].pos = j;
+       size_alpha = hemdistcache(&(cache[seed_1]), &(cache[j]));
+       size_beta  = hemdistcache(&(cache[seed_2]), &(cache[j]));
+       costvector[j - 1].cost = abs(size_alpha - size_beta);
+   }
+   qsort((void *) costvector, maxoff, sizeof(SPLITCOST), comparecost);
+
+   for (k = 0; k < maxoff; k++) {
+       j = costvector[k].pos;
+       if (j == seed_1) {
+           *left++ = j;
+           v->spl_nleft++;
+           continue;
+       } else if (j == seed_2) {
+           *right++ = j;
+           v->spl_nright++;
+           continue;
+       }
+
+       if (ISALLTRUE(datum_l) || cache[j].allistrue) {
+           if ( ISALLTRUE(datum_l) && cache[j].allistrue )
+               size_alpha=0;
+           else
+               size_alpha = SIGLENBIT-sizebitvec(  
+                   ( cache[j].allistrue ) ? GETSIGN(datum_l) : GETSIGN(cache[j].sign)  
+               );
+       } else {
+           size_alpha=hemdistsign(cache[j].sign,GETSIGN(datum_l));
+       }
+
+       if (ISALLTRUE(datum_r) || cache[j].allistrue) {
+           if ( ISALLTRUE(datum_r) && cache[j].allistrue )
+               size_beta=0;
+           else
+               size_beta = SIGLENBIT-sizebitvec(  
+                   ( cache[j].allistrue ) ? GETSIGN(datum_r) : GETSIGN(cache[j].sign)  
+               );
+       } else {
+           size_beta=hemdistsign(cache[j].sign,GETSIGN(datum_r));
+       }
+
+       if (size_alpha  < size_beta + WISH_F(v->spl_nleft, v->spl_nright, 0.1)) {
+           if (ISALLTRUE(datum_l) || cache[j].allistrue) {
+               if (! ISALLTRUE(datum_l) )
+                   MemSet((void *) GETSIGN(datum_l), 0xff, sizeof(BITVEC));
+           } else {
+               ptr=cache[j].sign;
+               LOOPBYTE(
+                   union_l[i] |= ptr[i];
+               );
+           }
+           *left++ = j;
+           v->spl_nleft++;
+       } else {
+           if (ISALLTRUE(datum_r) || cache[j].allistrue) {
+               if (! ISALLTRUE(datum_r) )
+                   MemSet((void *) GETSIGN(datum_r), 0xff, sizeof(BITVEC));
+           } else {
+               ptr=cache[j].sign;
+               LOOPBYTE(
+                   union_r[i] |= ptr[i];
+               );
+           }
+           *right++ = j;
+           v->spl_nright++;
+       }
+   }
+
+   *right = *left = FirstOffsetNumber;
+   pfree(costvector);
+   pfree(cache);
+   v->spl_ldatum = PointerGetDatum(datum_l);
+   v->spl_rdatum = PointerGetDatum(datum_r);
+
+   PG_RETURN_POINTER(v);
+}


diff --git a/contrib/tsearch2/gistidx.h b/contrib/tsearch2/gistidx.h

new file mode 100644 (file)

index 0000000..d081c74


--- /dev/null
+++ b/contrib/tsearch2/gistidx.h
@@ -0,0 +1,67 @@
+#ifndef __GISTIDX_H__
+#define __GISTIDX_H__
+
+/*
+#define GISTIDX_DEBUG
+*/
+
+/*
+ * signature defines
+ */
+
+#define BITBYTE 8
+#define SIGLENINT  63          /* >121 => key will toast, so it will not
+                                * work !!! */
+#define SIGLEN ( sizeof(int4)*SIGLENINT )
+#define SIGLENBIT (SIGLEN*BITBYTE)
+
+typedef char BITVEC[SIGLEN];
+typedef char *BITVECP;
+
+#define LOOPBYTE(a) \
+       for(i=0;i
+               a;\
+       }
+#define LOOPBIT(a) \
+               for(i=0;i
+                               a;\
+               }
+
+#define GETBYTE(x,i) ( *( (BITVECP)(x) + (int)( (i) / BITBYTE ) ) )
+#define GETBITBYTE(x,i) ( ((char)(x)) >> i & 0x01 )
+#define CLRBIT(x,i)   GETBYTE(x,i) &= ~( 0x01 << ( (i) % BITBYTE ) )
+#define SETBIT(x,i)   GETBYTE(x,i) |=  ( 0x01 << ( (i) % BITBYTE ) )
+#define GETBIT(x,i) ( (GETBYTE(x,i) >> ( (i) % BITBYTE )) & 0x01 )
+
+#define abs(a)         ((a) <  (0) ? -(a) : (a))
+#define min(a,b)           ((a) <  (b) ? (a) : (b))
+#define HASHVAL(val) (((unsigned int)(val)) % SIGLENBIT)
+#define HASH(sign, val) SETBIT((sign), HASHVAL(val))
+
+
+/*
+ * type of index key
+ */
+typedef struct
+{
+   int4        len;
+   int4        flag;
+   char        data[1];
+}  GISTTYPE;
+
+#define ARRKEY     0x01
+#define SIGNKEY        0x02
+#define ALLISTRUE  0x04
+
+#define ISARRKEY(x) ( ((GISTTYPE*)x)->flag & ARRKEY )
+#define ISSIGNKEY(x)   ( ((GISTTYPE*)x)->flag & SIGNKEY )
+#define ISALLTRUE(x)   ( ((GISTTYPE*)x)->flag & ALLISTRUE )
+
+#define GTHDRSIZE  ( sizeof(int4)*2  )
+#define CALCGTSIZE(flag, len) ( GTHDRSIZE + ( ( (flag) & ARRKEY ) ? ((len)*sizeof(int4)) : (((flag) & ALLISTRUE) ? 0 : SIGLEN) ) )
+
+#define GETSIGN(x) ( (BITVECP)( (char*)x+GTHDRSIZE ) )
+#define GETARR(x)  ( (int4*)( (char*)x+GTHDRSIZE ) )
+#define ARRNELEM(x) ( ( ((GISTTYPE*)x)->len - GTHDRSIZE )/sizeof(int4) )
+
+#endif


diff --git a/contrib/tsearch2/ispell/spell.c b/contrib/tsearch2/ispell/spell.c

new file mode 100644 (file)

index 0000000..3cf2cc8


--- /dev/null
+++ b/contrib/tsearch2/ispell/spell.c
@@ -0,0 +1,520 @@
+#include 
+#include 
+#include 
+#include 
+
+#include "postgres.h"
+
+#include "spell.h"
+
+#define MAXNORMLEN 56
+
+#define STRNCASECMP(x,y)        (strncasecmp(x,y,strlen(y)))
+
+static int cmpspell(const void *s1,const void *s2){
+   return(strcmp(((const SPELL*)s1)->word,((const SPELL*)s2)->word));
+}
+
+static void 
+strlower( char * str ) {
+   unsigned char *ptr = (unsigned char *)str;
+   while ( *ptr ) {
+       *ptr = tolower( *ptr );
+       ptr++;
+   }
+}
+
+/* backward string compaire for suffix tree operations */
+static int 
+strbcmp(const char *s1, const char *s2) { 
+   int l1 = strlen(s1)-1, l2 = strlen(s2)-1;
+   while (l1 >= 0 && l2 >= 0) {
+       if (s1[l1] < s2[l2]) return -1;
+       if (s1[l1] > s2[l2]) return 1;
+       l1--; l2--;
+   }
+   if (l1 < l2) return -1;
+   if (l1 > l2) return 1;
+
+   return 0;
+}
+static int 
+strbncmp(const char *s1, const char *s2, size_t count) { 
+   int l1 = strlen(s1) - 1, l2 = strlen(s2) - 1, l = count;
+   while (l1 >= 0 && l2 >= 0 && l > 0) {
+       if (s1[l1] < s2[l2]) return -1;
+       if (s1[l1] > s2[l2]) return 1;
+       l1--;
+       l2--;
+       l--;
+   }
+   if (l == 0) return 0;
+   if (l1 < l2) return -1;
+   if (l1 > l2) return 1;
+   return 0;
+}
+
+static int 
+cmpaffix(const void *s1,const void *s2){
+   if (((const AFFIX*)s1)->type < ((const AFFIX*)s2)->type) return -1;
+   if (((const AFFIX*)s1)->type > ((const AFFIX*)s2)->type) return 1;
+   if (((const AFFIX*)s1)->type == 'p')
+       return(strcmp(((const AFFIX*)s1)->repl,((const AFFIX*)s2)->repl));
+   else 
+       return(strbcmp(((const AFFIX*)s1)->repl,((const AFFIX*)s2)->repl));
+}
+
+int 
+AddSpell(IspellDict * Conf,const char * word,const char *flag){
+   if(Conf->nspell>=Conf->mspell){
+       if(Conf->mspell){
+           Conf->mspell+=1024*20;
+           Conf->Spell=(SPELL *)realloc(Conf->Spell,Conf->mspell*sizeof(SPELL));
+       }else{
+           Conf->mspell=1024*20;
+           Conf->Spell=(SPELL *)malloc(Conf->mspell*sizeof(SPELL));
+       }
+       if ( Conf->Spell == NULL )
+           elog(ERROR,"No memory for AddSpell"); 
+   }
+   Conf->Spell[Conf->nspell].word=strdup(word);
+   if ( !Conf->Spell[Conf->nspell].word ) 
+       elog(ERROR,"No memory for AddSpell");
+   strncpy(Conf->Spell[Conf->nspell].flag,flag,10);
+   Conf->nspell++;
+   return(0);
+}
+
+
+int 
+ImportDictionary(IspellDict * Conf,const char *filename){
+   unsigned char str[BUFSIZ];  
+   FILE *dict;
+
+   if(!(dict=fopen(filename,"r")))return(1);
+   while(fgets(str,sizeof(str),dict)){
+       unsigned char *s;
+       const unsigned char *flag;
+
+           flag = NULL;
+       if((s=strchr(str,'/'))){
+           *s=0;
+           s++;flag=s;
+           while(*s){
+               if (((*s>='A')&&(*s<='Z'))||((*s>='a')&&(*s<='z')))
+                   s++;
+               else {
+                   *s=0;
+                   break;
+               }
+           }
+       }else{
+           flag="";
+       }
+       strlower(str);
+       /* Dont load words if first letter is not required */
+       /* It allows to optimize loading at  search time   */
+       s=str;
+       while(*s){
+           if(*s=='\r')*s=0;
+           if(*s=='\n')*s=0;
+           s++;
+       }
+       AddSpell(Conf,str,flag);
+   }
+   fclose(dict);
+   return(0);
+}
+
+
+static SPELL * 
+FindWord(IspellDict * Conf, const char *word, int affixflag) {
+   int l,c,r,resc,resl,resr, i;
+
+   i = (int)(*word) & 255;
+   l = Conf->SpellTree.Left[i];
+   r = Conf->SpellTree.Right[i];
+   if (l == -1) return (NULL);
+   while(l<=r){
+       c = (l + r) >> 1;
+       resc = strcmp(Conf->Spell[c].word, word);
+       if( (resc == 0) && 
+           ((affixflag == 0) || (strchr(Conf->Spell[c].flag, affixflag) != NULL)) ) {
+           return(&Conf->Spell[c]);
+       }
+       resl = strcmp(Conf->Spell[l].word, word);
+       if( (resl == 0) && 
+           ((affixflag == 0) || (strchr(Conf->Spell[l].flag, affixflag) != NULL)) ) {
+           return(&Conf->Spell[l]);
+       }
+       resr = strcmp(Conf->Spell[r].word, word);
+       if( (resr == 0) && 
+           ((affixflag == 0) || (strchr(Conf->Spell[r].flag, affixflag) != NULL)) ) {
+           return(&Conf->Spell[r]);
+       }
+       if(resc < 0){
+           l = c + 1;
+           r--;
+       } else if(resc > 0){
+           r = c - 1;
+           l++;
+       } else {
+           l++;
+           r--;
+       }
+   }
+   return(NULL);
+}
+
+int 
+AddAffix(IspellDict * Conf,int flag,const char *mask,const char *find,const char *repl,int type) {
+   if(Conf->naffixes>=Conf->maffixes){
+       if(Conf->maffixes){
+           Conf->maffixes+=16;
+           Conf->Affix = (AFFIX*)realloc((void*)Conf->Affix,Conf->maffixes*sizeof(AFFIX));
+       }else{
+           Conf->maffixes=16;
+           Conf->Affix = (AFFIX*)malloc(Conf->maffixes * sizeof(AFFIX));
+       }
+       if ( Conf->Affix == NULL ) 
+           elog(ERROR,"No memory for AddAffix");
+   }
+   if (type=='s') {
+       sprintf(Conf->Affix[Conf->naffixes].mask,"%s$",mask);
+   } else {
+       sprintf(Conf->Affix[Conf->naffixes].mask,"^%s",mask);
+   }
+   Conf->Affix[Conf->naffixes].compile = 1;
+   Conf->Affix[Conf->naffixes].flag=flag;
+   Conf->Affix[Conf->naffixes].type=type;
+   
+   strcpy(Conf->Affix[Conf->naffixes].find,find);
+   strcpy(Conf->Affix[Conf->naffixes].repl,repl);
+   Conf->Affix[Conf->naffixes].replen=strlen(repl);
+   Conf->naffixes++;
+   return(0);
+}
+
+static char * 
+remove_spaces(char *dist,char *src){
+char *d,*s;
+   d=dist;
+   s=src;
+   while(*s){
+       if(*s!=' '&&*s!='-'&&*s!='\t'){
+           *d=*s;
+           d++;
+       }
+       s++;
+   }
+   *d=0;
+   return(dist);
+}
+
+
+int 
+ImportAffixes(IspellDict * Conf,const char *filename){
+   unsigned char str[BUFSIZ];
+   unsigned char flag=0;
+   unsigned char mask[BUFSIZ]="";
+   unsigned char find[BUFSIZ]="";
+   unsigned char repl[BUFSIZ]="";
+   unsigned char *s;
+   int i;
+   int suffixes=0;
+   int prefixes=0;
+   FILE *affix;
+
+   if(!(affix=fopen(filename,"r")))
+       return(1);
+
+   while(fgets(str,sizeof(str),affix)){
+       if(!STRNCASECMP(str,"suffixes")){
+           suffixes=1;
+           prefixes=0;
+           continue;
+       }
+       if(!STRNCASECMP(str,"prefixes")){
+           suffixes=0;
+           prefixes=1;
+           continue;
+       }
+       if(!STRNCASECMP(str,"flag ")){
+           s=str+5;
+           while(strchr("* ",*s))
+               s++;
+           flag=*s;
+           continue;
+       }
+       if((!suffixes)&&(!prefixes))continue;
+       if((s=strchr(str,'#')))*s=0;
+       if(!*str)continue;
+       strlower(str);
+       strcpy(mask,"");
+       strcpy(find,"");
+       strcpy(repl,"");
+       i=sscanf(str,"%[^>\n]>%[^,\n],%[^\n]",mask,find,repl);
+       remove_spaces(str,repl);strcpy(repl,str);
+       remove_spaces(str,find);strcpy(find,str);
+       remove_spaces(str,mask);strcpy(mask,str);
+       switch(i){
+           case 3:
+               break;
+           case 2:
+               if(*find != '\0'){
+                   strcpy(repl,find);
+                   strcpy(find,"");
+               }
+               break;
+           default:
+               continue;
+       }
+       
+       AddAffix(Conf,(int)flag,mask,find,repl,suffixes?'s':'p');
+       
+   }
+   fclose(affix);
+       
+   return(0);
+}
+
+void 
+SortDictionary(IspellDict * Conf){
+  int  CurLet = -1, Let;size_t i;
+
+        qsort((void*)Conf->Spell,Conf->nspell,sizeof(SPELL),cmpspell);
+
+   for(i = 0; i < 256 ; i++ )
+       Conf->SpellTree.Left[i] = -1;
+
+   for(i = 0; i < Conf->nspell; i++) {
+     Let = (int)(*(Conf->Spell[i].word)) & 255;
+     if (CurLet != Let) {
+       Conf->SpellTree.Left[Let] = i;
+       CurLet = Let;
+     }
+     Conf->SpellTree.Right[Let] = i;
+   }
+}
+
+void 
+SortAffixes(IspellDict * Conf) {
+  int   CurLetP = -1, CurLetS = -1, Let;
+  AFFIX *Affix; size_t i;
+  
+  if (Conf->naffixes > 1)
+    qsort((void*)Conf->Affix,Conf->naffixes,sizeof(AFFIX),cmpaffix);
+  for(i = 0; i < 256; i++) {
+      Conf->PrefixTree.Left[i] = Conf->PrefixTree.Right[i] = -1;
+      Conf->SuffixTree.Left[i] = Conf->SuffixTree.Right[i] = -1;
+  }
+
+  for(i = 0; i < Conf->naffixes; i++) {
+    Affix = &(((AFFIX*)Conf->Affix)[i]);
+    if(Affix->type == 'p') {
+      Let = (int)(*(Affix->repl)) & 255;
+      if (CurLetP != Let) {
+   Conf->PrefixTree.Left[Let] = i;
+   CurLetP = Let;
+      }
+      Conf->PrefixTree.Right[Let] = i;
+    } else {
+      Let = (Affix->replen) ? (int)(Affix->repl[Affix->replen-1]) & 255 : 0;
+      if (CurLetS != Let) {
+   Conf->SuffixTree.Left[Let] = i;
+   CurLetS = Let;
+      }
+      Conf->SuffixTree.Right[Let] = i;
+    }
+  }
+}
+
+static char * 
+CheckSuffix(const char *word, size_t len, AFFIX *Affix, int *res, IspellDict *Conf) {
+  regmatch_t subs[2]; /* workaround for apache&linux */
+  char newword[2*MAXNORMLEN] = "";
+  int err;
+  
+  *res = strbncmp(word, Affix->repl, Affix->replen);
+  if (*res < 0) {
+    return NULL;
+  }
+  if (*res > 0) {
+    return NULL;
+  }
+  strcpy(newword, word);
+  strcpy(newword+len-Affix->replen, Affix->find);
+
+  if (Affix->compile) {
+    err = regcomp(&(Affix->reg),Affix->mask,REG_EXTENDED|REG_ICASE|REG_NOSUB);
+    if(err){
+      /*regerror(err, &(Affix->reg), regerrstr, ERRSTRSIZE);*/
+      regfree(&(Affix->reg));
+      return(NULL);
+    }
+    Affix->compile = 0;
+  }
+  if(!(err=regexec(&(Affix->reg),newword,1,subs,0))){
+    if(FindWord(Conf, newword, Affix->flag))
+   return pstrdup(newword);    
+  }
+  return NULL;
+}
+
+#define NS 1
+#define MAX_NORM 512
+static int 
+CheckPrefix(const char *word, size_t len, AFFIX *Affix, IspellDict *Conf, int pi,
+       char **forms, char ***cur ) {
+  regmatch_t subs[NS*2];
+  char newword[2*MAXNORMLEN] = "";
+  int err, ls, res, lres;
+  size_t newlen;
+  AFFIX *CAffix = Conf->Affix;
+  
+  res = strncmp(word, Affix->repl, Affix->replen);
+  if (res != 0) {
+    return res;
+  }
+  strcpy(newword, Affix->find);
+  strcat(newword, word+Affix->replen);
+
+  if (Affix->compile) {
+    err = regcomp(&(Affix->reg),Affix->mask,REG_EXTENDED|REG_ICASE|REG_NOSUB);
+    if(err){
+      /*regerror(err, &(Affix->reg), regerrstr, ERRSTRSIZE);*/
+      regfree(&(Affix->reg));
+      return (0);
+    }
+    Affix->compile = 0;
+  }
+  if(!(err=regexec(&(Affix->reg),newword,1,subs,0))){
+    SPELL * curspell;
+
+    if((curspell=FindWord(Conf, newword, Affix->flag))){
+      if ((*cur - forms) < (MAX_NORM-1)) {
+   **cur =  pstrdup(newword);
+   (*cur)++; **cur = NULL;
+      }
+    } 
+    newlen = strlen(newword);
+    ls = Conf->SuffixTree.Left[pi];
+      if ( ls>=0 && ((*cur - forms) < (MAX_NORM-1)) ) {
+   **cur = CheckSuffix(newword, newlen, &CAffix[ls], &lres, Conf);
+   if (**cur) {
+     (*cur)++; **cur = NULL;
+   }
+      }
+  }
+  return 0;
+}
+
+
+char ** 
+NormalizeWord(IspellDict * Conf,char *word){
+/*regmatch_t subs[NS];*/
+size_t len;
+char ** forms;
+char **cur;
+AFFIX * Affix;
+int ri, pi, ipi, lp, rp, cp, ls, rs;
+int lres, rres, cres = 0;
+  SPELL *spell;
+
+   len=strlen(word);
+   if (len > MAXNORMLEN)
+       return(NULL);
+
+   strlower(word);
+
+   forms=(char **) palloc(MAX_NORM*sizeof(char **));
+   cur=forms;*cur=NULL;
+
+   ri = (int)(*word) & 255;
+   pi = (int)(word[strlen(word)-1]) & 255;
+   Affix=(AFFIX*)Conf->Affix;
+
+   /* Check that the word itself is normal form */
+   if((spell = FindWord(Conf, word, 0))){
+       *cur=pstrdup(word);
+       cur++;*cur=NULL;
+   }
+
+   /* Find all other NORMAL forms of the 'word' */
+
+   for (ipi = 0; ipi <= pi; ipi += pi) {
+
+       /* check prefix */
+       lp = Conf->PrefixTree.Left[ri];
+       rp = Conf->PrefixTree.Right[ri];
+       while (lp >= 0 && lp <= rp) {
+         cp = (lp + rp) >> 1;
+         cres = 0;
+         if ((cur - forms) < (MAX_NORM-1)) {
+       cres = CheckPrefix(word, len, &Affix[cp], Conf, ipi, forms, &cur);
+         }
+         if ((lp < cp) && ((cur - forms) < (MAX_NORM-1)) ) {
+       lres = CheckPrefix(word, len, &Affix[lp], Conf, ipi, forms, &cur);
+         }
+         if ( (rp > cp) && ((cur - forms) < (MAX_NORM-1)) ) {
+       rres = CheckPrefix(word, len, &Affix[rp], Conf, ipi, forms, &cur);
+         }
+         if (cres < 0) {
+       rp = cp - 1;
+       lp++;
+         } else if (cres > 0) {
+       lp = cp + 1;
+       rp--;
+         } else {
+       lp++;
+       rp--;
+         }
+       }
+
+       /* check suffix */
+       ls = Conf->SuffixTree.Left[ipi];
+       rs = Conf->SuffixTree.Right[ipi];
+       while (ls >= 0 && ls <= rs) {
+         if (  ((cur - forms) < (MAX_NORM-1)) ) {
+       *cur = CheckSuffix(word, len, &Affix[ls], &lres, Conf);
+       if (*cur) {
+         cur++; *cur = NULL;
+       }
+         }
+         if ( (rs > ls) && ((cur - forms) < (MAX_NORM-1)) ) {
+       *cur = CheckSuffix(word, len, &Affix[rs], &rres, Conf);
+       if (*cur) {
+         cur++; *cur = NULL;
+       }
+         }
+         ls++;
+         rs--;
+       } /* end while */
+     
+   } /* for ipi */
+
+   if(cur==forms){
+       pfree(forms);
+       return(NULL);
+   }
+   return(forms);
+}
+
+void 
+FreeIspell (IspellDict *Conf) {
+  int i;
+  AFFIX *Affix = (AFFIX *)Conf->Affix;
+
+  for (i = 0; i < Conf->naffixes; i++) {
+    if (Affix[i].compile == 0) {
+      regfree(&(Affix[i].reg));
+    }
+  }
+  for (i = 0; i < Conf->naffixes; i++) {
+   free( Conf->Spell[i].word );
+  }
+  free(Conf->Affix);
+  free(Conf->Spell);
+  memset( (void*)Conf, 0, sizeof(IspellDict) );
+  return;
+}


diff --git a/contrib/tsearch2/ispell/spell.h b/contrib/tsearch2/ispell/spell.h

new file mode 100644 (file)

index 0000000..3034ca6


--- /dev/null
+++ b/contrib/tsearch2/ispell/spell.h
@@ -0,0 +1,51 @@
+#ifndef __SPELL_H__
+#define __SPELL_H__
+
+#include 
+#include 
+
+typedef struct spell_struct {
+        char * word; 
+        char flag[10];
+} SPELL;
+
+typedef struct aff_struct {   
+        char flag;
+        char type;
+        char mask[33];
+        char find[16];
+        char repl[16];
+        regex_t reg;
+        size_t replen;
+        char compile;
+} AFFIX;
+
+typedef struct Tree_struct {
+        int Left[256], Right[256];
+} Tree_struct;
+
+typedef struct {
+   int maffixes;
+   int naffixes;
+   AFFIX * Affix;
+
+   int nspell;
+   int mspell;
+   SPELL   *Spell;
+   Tree_struct SpellTree;
+   Tree_struct PrefixTree;
+   Tree_struct SuffixTree;
+
+} IspellDict;
+
+char ** NormalizeWord(IspellDict * Conf,char *word);
+int ImportAffixes(IspellDict * Conf, const char *filename);
+int ImportDictionary(IspellDict * Conf,const char *filename);
+
+int  AddSpell(IspellDict * Conf,const char * word,const char *flag);
+int  AddAffix(IspellDict * Conf,int flag,const char *mask,const char *find,const char *repl,int type);
+void SortDictionary(IspellDict * Conf);
+void SortAffixes(IspellDict * Conf);
+void FreeIspell (IspellDict *Conf);
+
+#endif


diff --git a/contrib/tsearch2/prs_dcfg.c b/contrib/tsearch2/prs_dcfg.c

new file mode 100644 (file)

index 0000000..e4b0e8b


--- /dev/null
+++ b/contrib/tsearch2/prs_dcfg.c
@@ -0,0 +1,119 @@
+/* 
+ * Simple config parser 
+ * Teodor Sigaev 
+ */
+#include 
+#include 
+#include 
+
+#include "postgres.h"
+
+#include "dict.h"
+#include "common.h"
+
+#define CS_WAITKEY 0
+#define CS_INKEY   1
+#define CS_WAITEQ  2
+#define CS_WAITVALUE   3
+#define CS_INVALUE 4
+#define CS_IN2VALUE    5
+#define CS_WAITDELIM   6
+#define CS_INESC   7
+#define CS_IN2ESC  8
+
+static char *
+nstrdup(char *ptr, int len) {
+   char *res=palloc(len+1), *cptr;
+   memcpy(res,ptr,len);
+   res[len]='\0';
+   cptr = ptr = res;
+   while(*ptr) {
+       if ( *ptr == '\\' ) 
+           ptr++;
+       *cptr=*ptr; ptr++; cptr++;
+   }
+   *cptr='\0';
+
+   return res;
+}
+
+void
+parse_cfgdict(text *in, Map **m) {
+   Map *mptr;
+   char *ptr=VARDATA(in), *begin=NULL;
+   char num=0;
+   int state=CS_WAITKEY;
+
+   while( ptr-VARDATA(in) < VARSIZE(in) - VARHDRSZ ) {
+       if ( *ptr==',' ) num++;
+       ptr++;
+   }
+
+   *m=mptr=(Map*)palloc( sizeof(Map)*(num+2) );
+   memset(mptr, 0, sizeof(Map)*(num+2) );
+   ptr=VARDATA(in);
+   while( ptr-VARDATA(in) < VARSIZE(in) - VARHDRSZ ) {
+       if (state==CS_WAITKEY) {
+           if (isalpha(*ptr)) {
+               begin=ptr;
+               state=CS_INKEY;
+           } else if ( !isspace(*ptr) )
+               elog(ERROR,"Syntax error in position %d near '%c'", ptr-VARDATA(in), *ptr);
+       } else if (state==CS_INKEY) {
+           if ( isspace(*ptr) ) {
+               mptr->key=nstrdup(begin, ptr-begin);
+               state=CS_WAITEQ;
+           } else if ( *ptr=='=' ) {
+               mptr->key=nstrdup(begin, ptr-begin);
+               state=CS_WAITVALUE;
+           } else if ( !isalpha(*ptr) ) 
+               elog(ERROR,"Syntax error in position %d near '%c'", ptr-VARDATA(in), *ptr);
+       } else if ( state==CS_WAITEQ ) {
+           if ( *ptr=='=' )
+               state=CS_WAITVALUE;
+           else if ( !isspace(*ptr) )
+               elog(ERROR,"Syntax error in position %d near '%c'", ptr-VARDATA(in), *ptr);
+       } else if ( state==CS_WAITVALUE ) {
+           if ( *ptr=='"' ) {
+               begin=ptr+1;
+               state=CS_INVALUE;
+           } else if ( !isspace(*ptr) ) {
+               begin=ptr;
+               state=CS_IN2VALUE;
+           }
+       } else if ( state==CS_INVALUE ) {
+           if ( *ptr=='"' ) {
+               mptr->value = nstrdup(begin, ptr-begin);
+               mptr++;
+               state=CS_WAITDELIM;
+           } else if ( *ptr=='\\' )
+               state=CS_INESC;
+       } else if ( state==CS_IN2VALUE ) {
+           if ( isspace(*ptr) || *ptr==',' ) {
+               mptr->value = nstrdup(begin, ptr-begin);
+               mptr++;
+               state=( *ptr==',' ) ? CS_WAITKEY : CS_WAITDELIM;
+           } else if ( *ptr=='\\' )
+               state=CS_INESC;
+       } else if ( state==CS_WAITDELIM ) {
+           if ( *ptr==',' ) 
+               state=CS_WAITKEY; 
+           else if ( !isspace(*ptr) )
+               elog(ERROR,"Syntax error in position %d near '%c'", ptr-VARDATA(in), *ptr);
+       } else if ( state == CS_INESC ) {
+           state=CS_INVALUE;
+       } else if ( state == CS_IN2ESC ) {
+           state=CS_IN2VALUE;
+       } else 
+           elog(ERROR,"Bad parser state: %d at position %d near '%c'", state, ptr-VARDATA(in), *ptr);
+       ptr++;
+   }
+
+   if (state==CS_IN2VALUE) {
+       mptr->value = nstrdup(begin, ptr-begin);
+       mptr++;
+   } else if ( !(state==CS_WAITDELIM || state==CS_WAITKEY) ) 
+       elog(ERROR,"Unexpected end of line");
+}
+
+


diff --git a/contrib/tsearch2/query.c b/contrib/tsearch2/query.c

new file mode 100644 (file)

index 0000000..8e714f2


--- /dev/null
+++ b/contrib/tsearch2/query.c
@@ -0,0 +1,862 @@
+/*
+ * IO definitions for tsquery and mtsquery. This type
+ * are identical, but for parsing mtsquery used parser for text
+ * and also morphology is used.
+ * Internal structure:
+ * query tree, then string with original value.
+ * Query tree with plain view. It's means that in array of nodes
+ * right child is always next and left position = item+item->left
+ * Teodor Sigaev 
+ */
+#include "postgres.h"
+
+#include 
+#include 
+
+#include "access/gist.h"
+#include "access/itup.h"
+#include "access/rtree.h"
+#include "utils/elog.h"
+#include "utils/palloc.h"
+#include "utils/array.h"
+#include "utils/builtins.h"
+#include "storage/bufpage.h"
+
+#include "ts_cfg.h"
+#include "tsvector.h"
+#include "crc32.h"
+#include "query.h"
+#include "rewrite.h"
+#include "common.h"
+
+
+PG_FUNCTION_INFO_V1(tsquery_in);
+Datum      tsquery_in(PG_FUNCTION_ARGS);
+
+PG_FUNCTION_INFO_V1(tsquery_out);
+Datum      tsquery_out(PG_FUNCTION_ARGS);
+
+PG_FUNCTION_INFO_V1(exectsq);
+Datum      exectsq(PG_FUNCTION_ARGS);
+
+PG_FUNCTION_INFO_V1(rexectsq);
+Datum      rexectsq(PG_FUNCTION_ARGS);
+
+PG_FUNCTION_INFO_V1(tsquerytree);
+Datum      tsquerytree(PG_FUNCTION_ARGS);
+
+PG_FUNCTION_INFO_V1(to_tsquery);
+Datum      to_tsquery(PG_FUNCTION_ARGS);
+
+PG_FUNCTION_INFO_V1(to_tsquery_name);
+Datum      to_tsquery_name(PG_FUNCTION_ARGS);
+
+PG_FUNCTION_INFO_V1(to_tsquery_current);
+Datum      to_tsquery_current(PG_FUNCTION_ARGS);
+
+#define END            0
+#define ERR            1
+#define VAL            2
+#define OPR            3
+#define OPEN       4
+#define CLOSE      5
+#define VALTRUE        6           /* for stop words */
+#define VALFALSE   7
+
+/* parser's states */
+#define WAITOPERAND 1
+#define WAITOPERATOR   2
+
+/*
+ * node of query tree, also used
+ * for storing polish notation in parser
+ */
+typedef struct NODE
+{
+   int2        weight;
+   int2        type;
+   int4        val;
+   int2        distance;
+   int2        length;
+   struct NODE *next;
+}  NODE;
+
+typedef struct
+{
+   char       *buf;
+   int4        state;
+   int4        count;
+   /* reverse polish notation in list (for temprorary usage) */
+   NODE       *str;
+   /* number in str */
+   int4        num;
+
+   /* user-friendly operand */
+   int4        lenop;
+   int4        sumlen;
+   char       *op;
+   char       *curop;
+
+   /* state for value's parser */
+   TI_IN_STATE valstate;
+
+   /* tscfg */
+   int cfg_id;
+}  QPRS_STATE;
+
+static char*
+get_weight(char *buf, int2 *weight) {
+   *weight = 0;
+
+   if ( *buf != ':' )
+       return buf;
+
+   buf++;
+   while( *buf ) {
+       switch(tolower(*buf)) {
+           case 'a': *weight |= 1<<3; break; 
+           case 'b': *weight |= 1<<2; break; 
+           case 'c': *weight |= 1<<1; break; 
+           case 'd': *weight |= 1;    break;
+           default: return buf; 
+       }
+       buf++;
+   }
+   
+   return buf;
+}
+
+/*
+ * get token from query string
+ */
+static int4
+gettoken_query(QPRS_STATE * state, int4 *val, int4 *lenval, char **strval, int2 *weight)
+{
+   while (1)
+   {
+       switch (state->state)
+       {
+           case WAITOPERAND:
+               if (*(state->buf) == '!')
+               {
+                   (state->buf)++;
+                   *val = (int4) '!';
+                   return OPR;
+               }
+               else if (*(state->buf) == '(')
+               {
+                   state->count++;
+                   (state->buf)++;
+                   return OPEN;
+               } else if ( *(state->buf) == ':' ) {
+                   elog(ERROR,"Error at start of operand"); 
+               } else if (*(state->buf) != ' ') {
+                   state->valstate.prsbuf = state->buf;
+                   state->state = WAITOPERATOR;
+                   if (gettoken_tsvector(&(state->valstate)))
+                   {
+                       *strval = state->valstate.word;
+                       *lenval = state->valstate.curpos - state->valstate.word;
+                       state->buf = get_weight(state->valstate.prsbuf, weight);
+                       return VAL;
+                   }
+                   else
+                       elog(ERROR, "No operand");
+               }
+               break;
+           case WAITOPERATOR:
+               if (*(state->buf) == '&' || *(state->buf) == '|')
+               {
+                   state->state = WAITOPERAND;
+                   *val = (int4) *(state->buf);
+                   (state->buf)++;
+                   return OPR;
+               }
+               else if (*(state->buf) == ')')
+               {
+                   (state->buf)++;
+                   state->count--;
+                   return (state->count < 0) ? ERR : CLOSE;
+               }
+               else if (*(state->buf) == '\0')
+                   return (state->count) ? ERR : END;
+               else if (*(state->buf) != ' ')
+                   return ERR;
+               break;
+           default:
+               return ERR;
+               break;
+       }
+       (state->buf)++;
+   }
+   return END;
+}
+
+/*
+ * push new one in polish notation reverse view
+ */
+static void
+pushquery(QPRS_STATE * state, int4 type, int4 val, int4 distance, int4 lenval, int2 weight)
+{
+   NODE       *tmp = (NODE *) palloc(sizeof(NODE));
+
+   tmp->weight = weight;
+   tmp->type = type;
+   tmp->val = val;
+   if (distance >= MAXSTRPOS)
+       elog(ERROR, "Value is too big");
+   if (lenval >= MAXSTRLEN)
+       elog(ERROR, "Operand is too long");
+   tmp->distance = distance;
+   tmp->length = lenval;
+   tmp->next = state->str;
+   state->str = tmp;
+   state->num++;
+}
+
+/*
+ * This function is used for tsquery parsing
+ */
+static void
+pushval_asis(QPRS_STATE * state, int type, char *strval, int lenval, int2 weight)
+{
+   if (lenval >= MAXSTRLEN)
+       elog(ERROR, "Word is too long");
+
+   pushquery(state, type, crc32_sz((uint8 *) strval, lenval),
+             state->curop - state->op, lenval, weight);
+
+   while (state->curop - state->op + lenval + 1 >= state->lenop)
+   {
+       int4        tmp = state->curop - state->op;
+
+       state->lenop *= 2;
+       state->op = (char *) repalloc((void *) state->op, state->lenop);
+       state->curop = state->op + tmp;
+   }
+   memcpy((void *) state->curop, (void *) strval, lenval);
+   state->curop += lenval;
+   *(state->curop) = '\0';
+   state->curop++;
+   state->sumlen += lenval + 1;
+   return;
+}
+
+/*
+ * This function is used for morph parsing
+ */
+static void
+pushval_morph(QPRS_STATE * state, int typeval, char *strval, int lenval, int2 weight)
+{
+   int4        count = 0;
+   PRSTEXT         prs;
+
+   prs.lenwords = 32;
+   prs.curwords = 0;
+   prs.pos = 0;
+   prs.words = (WORD *) palloc(sizeof(WORD) * prs.lenwords);
+
+   parsetext_v2(findcfg(state->cfg_id), &prs, strval, lenval);
+
+   for(count=0;count
+       pushval_asis(state, VAL, prs.words[count].word, prs.words[count].len, weight);
+       pfree( prs.words[count].word );
+       if (count)
+           pushquery(state, OPR, (int4) '&', 0, 0, 0 );
+   }   
+   pfree(prs.words);
+
+   /* XXX */
+   if ( prs.curwords==0 ) 
+       pushval_asis(state, VALTRUE, 0, 0, 0);
+}
+
+#define STACKDEPTH 32
+/*
+ * make polish notaion of query
+ */
+static int4
+makepol(QPRS_STATE * state, void (*pushval) (QPRS_STATE *, int, char *, int, int2))
+{
+   int4        val,
+               type;
+   int4        lenval;
+   char       *strval;
+   int4        stack[STACKDEPTH];
+   int4        lenstack = 0;
+   int2        weight;
+
+   while ((type = gettoken_query(state, &val, &lenval, &strval, &weight)) != END)
+   {
+       switch (type)
+       {
+           case VAL:
+               (*pushval) (state, VAL, strval, lenval, weight);
+               while (lenstack && (stack[lenstack - 1] == (int4) '&' ||
+                                   stack[lenstack - 1] == (int4) '!'))
+               {
+                   lenstack--;
+                   pushquery(state, OPR, stack[lenstack], 0, 0, 0);
+               }
+               break;
+           case OPR:
+               if (lenstack && val == (int4) '|')
+                   pushquery(state, OPR, val, 0, 0, 0);
+               else
+               {
+                   if (lenstack == STACKDEPTH)
+                       elog(ERROR, "Stack too short");
+                   stack[lenstack] = val;
+                   lenstack++;
+               }
+               break;
+           case OPEN:
+               if (makepol(state, pushval) == ERR)
+                   return ERR;
+               if (lenstack && (stack[lenstack - 1] == (int4) '&' ||
+                                stack[lenstack - 1] == (int4) '!'))
+               {
+                   lenstack--;
+                   pushquery(state, OPR, stack[lenstack], 0, 0, 0);
+               }
+               break;
+           case CLOSE:
+               while (lenstack)
+               {
+                   lenstack--;
+                   pushquery(state, OPR, stack[lenstack], 0, 0, 0);
+               };
+               return END;
+               break;
+           case ERR:
+           default:
+               elog(ERROR, "Syntax error");
+               return ERR;
+
+       }
+   }
+   while (lenstack)
+   {
+       lenstack--;
+       pushquery(state, OPR, stack[lenstack], 0, 0, 0);
+   };
+   return END;
+}
+
+typedef struct
+{
+   WordEntry  *arrb;
+   WordEntry  *arre;
+   char       *values;
+   char       *operand;
+}  CHKVAL;
+
+/*
+ * compare 2 string values
+ */
+static int4
+ValCompare(CHKVAL * chkval, WordEntry * ptr, ITEM * item)
+{
+   if (ptr->len == item->length)
+       return strncmp(
+                      &(chkval->values[ptr->pos]),
+                      &(chkval->operand[item->distance]),
+                      item->length);
+
+   return (ptr->len > item->length) ? 1 : -1;
+}
+
+/*
+ * check weight info
+ */
+static bool
+checkclass_str(CHKVAL * chkval, WordEntry * val, ITEM * item) {
+   WordEntryPos *ptr = (WordEntryPos*) (chkval->values+val->pos+SHORTALIGN(val->len)+sizeof(uint16));
+   uint16  len = *( (uint16*) (chkval->values+val->pos+SHORTALIGN(val->len)) );
+   while (len--) {
+       if ( item->weight & ( 1<weight ) )
+           return true;
+       ptr++;
+   }
+   return false; 
+}
+
+/*
+ * is there value 'val' in array or not ?
+ */
+static bool
+checkcondition_str(void *checkval, ITEM * val)
+{
+   WordEntry  *StopLow = ((CHKVAL *) checkval)->arrb;
+   WordEntry  *StopHigh = ((CHKVAL *) checkval)->arre;
+   WordEntry  *StopMiddle;
+   int         difference;
+
+   /* Loop invariant: StopLow <= val < StopHigh */
+
+   while (StopLow < StopHigh)
+   {
+       StopMiddle = StopLow + (StopHigh - StopLow) / 2;
+       difference = ValCompare((CHKVAL *) checkval, StopMiddle, val);
+       if (difference == 0)
+           return ( val->weight && StopMiddle->haspos ) ? 
+               checkclass_str((CHKVAL *) checkval,StopMiddle, val) : true;
+       else if (difference < 0)
+           StopLow = StopMiddle + 1;
+       else
+           StopHigh = StopMiddle;
+   }
+
+   return (false);
+}
+
+/*
+ * check for boolean condition
+ */
+bool
+TS_execute(ITEM * curitem, void *checkval, bool calcnot, bool (*chkcond) (void *checkval, ITEM * val))
+{
+   if (curitem->type == VAL)
+       return (*chkcond) (checkval, curitem);
+   else if (curitem->val == (int4) '!')
+   {
+       return (calcnot) ?
+           ((TS_execute(curitem + 1, checkval, calcnot, chkcond)) ? false : true)
+           : true;
+   }
+   else if (curitem->val == (int4) '&')
+   {
+       if (TS_execute(curitem + curitem->left, checkval, calcnot, chkcond))
+           return TS_execute(curitem + 1, checkval, calcnot, chkcond);
+       else
+           return false;
+   }
+   else
+   {                           /* |-operator */
+       if (TS_execute(curitem + curitem->left, checkval, calcnot, chkcond))
+           return true;
+       else
+           return TS_execute(curitem + 1, checkval, calcnot, chkcond);
+   }
+   return false;
+}
+
+/*
+ * boolean operations
+ */
+Datum
+rexectsq(PG_FUNCTION_ARGS)
+{
+   return DirectFunctionCall2(
+                              exectsq,
+                              PG_GETARG_DATUM(1),
+                              PG_GETARG_DATUM(0)
+       );
+}
+
+Datum
+exectsq(PG_FUNCTION_ARGS)
+{
+   tsvector       *val = (tsvector *) DatumGetPointer(PG_DETOAST_DATUM(PG_GETARG_DATUM(0)));
+   QUERYTYPE  *query = (QUERYTYPE *) DatumGetPointer(PG_DETOAST_DATUM(PG_GETARG_DATUM(1)));
+   CHKVAL      chkval;
+   bool        result;
+
+   if (!val->size || !query->size)
+   {
+       PG_FREE_IF_COPY(val, 0);
+       PG_FREE_IF_COPY(query, 1);
+       PG_RETURN_BOOL(false);
+   }
+
+   chkval.arrb = ARRPTR(val);
+   chkval.arre = chkval.arrb + val->size;
+   chkval.values = STRPTR(val);
+   chkval.operand = GETOPERAND(query);
+   result = TS_execute(
+                    GETQUERY(query),
+                    &chkval,
+                    true,
+                    checkcondition_str
+       );
+
+   PG_FREE_IF_COPY(val, 0);
+   PG_FREE_IF_COPY(query, 1);
+   PG_RETURN_BOOL(result);
+}
+
+/*
+ * find left operand in polish notation view
+ */
+static void
+findoprnd(ITEM * ptr, int4 *pos)
+{
+#ifdef BS_DEBUG
+   elog(DEBUG3, (ptr[*pos].type == OPR) ?
+        "%d  %c" : "%d  %d ", *pos, ptr[*pos].val);
+#endif
+   if (ptr[*pos].type == VAL || ptr[*pos].type == VALTRUE)
+   {
+       ptr[*pos].left = 0;
+       (*pos)++;
+   }
+   else if (ptr[*pos].val == (int4) '!')
+   {
+       ptr[*pos].left = 1;
+       (*pos)++;
+       findoprnd(ptr, pos);
+   }
+   else
+   {
+       ITEM       *curitem = &ptr[*pos];
+       int4        tmp = *pos;
+
+       (*pos)++;
+       findoprnd(ptr, pos);
+       curitem->left = *pos - tmp;
+       findoprnd(ptr, pos);
+   }
+}
+
+
+/*
+ * input
+ */
+static QUERYTYPE *
+queryin(char *buf, void (*pushval) (QPRS_STATE *, int, char *, int, int2), int cfg_id)
+{
+   QPRS_STATE  state;
+   int4        i;
+   QUERYTYPE  *query;
+   int4        commonlen;
+   ITEM       *ptr;
+   NODE       *tmp;
+   int4        pos = 0;
+
+#ifdef BS_DEBUG
+   char        pbuf[16384],
+              *cur;
+#endif
+
+   /* init state */
+   state.buf = buf;
+   state.state = WAITOPERAND;
+   state.count = 0;
+   state.num = 0;
+   state.str = NULL;
+   state.cfg_id=cfg_id;
+
+   /* init value parser's state */
+   state.valstate.oprisdelim = true;
+   state.valstate.len = 32;
+   state.valstate.word = (char *) palloc(state.valstate.len);
+
+   /* init list of operand */
+   state.sumlen = 0;
+   state.lenop = 64;
+   state.curop = state.op = (char *) palloc(state.lenop);
+   *(state.curop) = '\0';
+
+   /* parse query & make polish notation (postfix, but in reverse order) */
+   makepol(&state, pushval);
+   pfree(state.valstate.word);
+   if (!state.num)
+       elog(ERROR, "Empty query");
+
+   /* make finish struct */
+   commonlen = COMPUTESIZE(state.num, state.sumlen);
+   query = (QUERYTYPE *) palloc(commonlen);
+   query->len = commonlen;
+   query->size = state.num;
+   ptr = GETQUERY(query);
+
+   /* set item in polish notation */
+   for (i = 0; i < state.num; i++)
+   {
+       ptr[i].weight = state.str->weight;
+       ptr[i].type = state.str->type;
+       ptr[i].val = state.str->val;
+       ptr[i].distance = state.str->distance;
+       ptr[i].length = state.str->length;
+       tmp = state.str->next;
+       pfree(state.str);
+       state.str = tmp;
+   }
+
+   /* set user friendly-operand view */
+   memcpy((void *) GETOPERAND(query), (void *) state.op, state.sumlen);
+   pfree(state.op);
+
+   /* set left operand's position for every operator */
+   pos = 0;
+   findoprnd(ptr, &pos);
+
+#ifdef BS_DEBUG
+   cur = pbuf;
+   *cur = '\0';
+   for (i = 0; i < query->size; i++)
+   {
+       if (ptr[i].type == OPR)
+           sprintf(cur, "%c(%d) ", ptr[i].val, ptr[i].left);
+       else
+           sprintf(cur, "%d(%s) ", ptr[i].val, GETOPERAND(query) + ptr[i].distance);
+       cur = strchr(cur, '\0');
+   }
+   elog(DEBUG3, "POR: %s", pbuf);
+#endif
+
+   return query;
+}
+
+/*
+ * in without morphology
+ */
+Datum
+tsquery_in(PG_FUNCTION_ARGS)
+{
+   PG_RETURN_POINTER(queryin((char *) PG_GETARG_POINTER(0), pushval_asis, 0));
+}
+
+/*
+ * out function
+ */
+typedef struct
+{
+   ITEM       *curpol;
+   char       *buf;
+   char       *cur;
+   char       *op;
+   int4        buflen;
+}  INFIX;
+
+#define RESIZEBUF(inf,addsize) \
+while( ( inf->cur - inf->buf ) + addsize + 1 >= inf->buflen ) \
+{ \
+   int4 len = inf->cur - inf->buf; \
+   inf->buflen *= 2; \
+   inf->buf = (char*) repalloc( (void*)inf->buf, inf->buflen ); \
+   inf->cur = inf->buf + len; \
+}
+
+/*
+ * recursive walk on tree and print it in
+ * infix (human-readable) view
+ */
+static void
+infix(INFIX * in, bool first)
+{
+   if (in->curpol->type == VAL)
+   {
+       char       *op = in->op + in->curpol->distance;
+
+       RESIZEBUF(in, in->curpol->length * 2 + 2 + 5);
+       *(in->cur) = '\'';
+       in->cur++;
+       while (*op)
+       {
+           if (*op == '\'')
+           {
+               *(in->cur) = '\\';
+               in->cur++;
+           }
+           *(in->cur) = *op;
+           op++;
+           in->cur++;
+       }
+       *(in->cur) = '\'';
+       in->cur++;
+       if ( in->curpol->weight ) {
+           *(in->cur) = ':'; in->cur++;
+           if ( in->curpol->weight & (1<<3) ) { *(in->cur) = 'A'; in->cur++; }
+           if ( in->curpol->weight & (1<<2) ) { *(in->cur) = 'B'; in->cur++; }
+           if ( in->curpol->weight & (1<<1) ) { *(in->cur) = 'C'; in->cur++; }
+           if ( in->curpol->weight & 1 )      { *(in->cur) = 'D'; in->cur++; }
+       }
+       *(in->cur) = '\0';
+       in->curpol++;
+   }
+   else if (in->curpol->val == (int4) '!')
+   {
+       bool        isopr = false;
+
+       RESIZEBUF(in, 1);
+       *(in->cur) = '!';
+       in->cur++;
+       *(in->cur) = '\0';
+       in->curpol++;
+       if (in->curpol->type == OPR)
+       {
+           isopr = true;
+           RESIZEBUF(in, 2);
+           sprintf(in->cur, "( ");
+           in->cur = strchr(in->cur, '\0');
+       }
+       infix(in, isopr);
+       if (isopr)
+       {
+           RESIZEBUF(in, 2);
+           sprintf(in->cur, " )");
+           in->cur = strchr(in->cur, '\0');
+       }
+   }
+   else
+   {
+       int4        op = in->curpol->val;
+       INFIX       nrm;
+
+       in->curpol++;
+       if (op == (int4) '|' && !first)
+       {
+           RESIZEBUF(in, 2);
+           sprintf(in->cur, "( ");
+           in->cur = strchr(in->cur, '\0');
+       }
+
+       nrm.curpol = in->curpol;
+       nrm.op = in->op;
+       nrm.buflen = 16;
+       nrm.cur = nrm.buf = (char *) palloc(sizeof(char) * nrm.buflen);
+
+       /* get right operand */
+       infix(&nrm, false);
+
+       /* get & print left operand */
+       in->curpol = nrm.curpol;
+       infix(in, false);
+
+       /* print operator & right operand */
+       RESIZEBUF(in, 3 + (nrm.cur - nrm.buf));
+       sprintf(in->cur, " %c %s", op, nrm.buf);
+       in->cur = strchr(in->cur, '\0');
+       pfree(nrm.buf);
+
+       if (op == (int4) '|' && !first)
+       {
+           RESIZEBUF(in, 2);
+           sprintf(in->cur, " )");
+           in->cur = strchr(in->cur, '\0');
+       }
+   }
+}
+
+
+Datum
+tsquery_out(PG_FUNCTION_ARGS)
+{
+   QUERYTYPE  *query = (QUERYTYPE *) DatumGetPointer(PG_DETOAST_DATUM(PG_GETARG_DATUM(0)));
+   INFIX       nrm;
+
+   if (query->size == 0)
+   {
+       char       *b = palloc(1);
+
+       *b = '\0';
+       PG_RETURN_POINTER(b);
+   }
+   nrm.curpol = GETQUERY(query);
+   nrm.buflen = 32;
+   nrm.cur = nrm.buf = (char *) palloc(sizeof(char) * nrm.buflen);
+   *(nrm.cur) = '\0';
+   nrm.op = GETOPERAND(query);
+   infix(&nrm, true);
+
+   PG_FREE_IF_COPY(query, 0);
+   PG_RETURN_POINTER(nrm.buf);
+}
+
+/*
+ * debug function, used only for view query
+ * which will be executed in non-leaf pages in index
+ */
+Datum
+tsquerytree(PG_FUNCTION_ARGS)
+{
+   QUERYTYPE  *query = (QUERYTYPE *) DatumGetPointer(PG_DETOAST_DATUM(PG_GETARG_DATUM(0)));
+   INFIX       nrm;
+   text       *res;
+   ITEM       *q;
+   int4        len;
+
+
+   if (query->size == 0)
+   {
+       res = (text *) palloc(VARHDRSZ);
+       VARATT_SIZEP(res) = VARHDRSZ;
+       PG_RETURN_POINTER(res);
+   }
+
+   q = clean_NOT_v2(GETQUERY(query), &len);
+
+   if (!q)
+   {
+       res = (text *) palloc(1 + VARHDRSZ);
+       VARATT_SIZEP(res) = 1 + VARHDRSZ;
+       *((char *) VARDATA(res)) = 'T';
+   }
+   else
+   {
+       nrm.curpol = q;
+       nrm.buflen = 32;
+       nrm.cur = nrm.buf = (char *) palloc(sizeof(char) * nrm.buflen);
+       *(nrm.cur) = '\0';
+       nrm.op = GETOPERAND(query);
+       infix(&nrm, true);
+
+       res = (text *) palloc(nrm.cur - nrm.buf + VARHDRSZ);
+       VARATT_SIZEP(res) = nrm.cur - nrm.buf + VARHDRSZ;
+       strncpy(VARDATA(res), nrm.buf, nrm.cur - nrm.buf);
+       pfree(q);
+   }
+
+   PG_FREE_IF_COPY(query, 0);
+
+   PG_RETURN_POINTER(res);
+}
+
+Datum
+to_tsquery(PG_FUNCTION_ARGS) {
+   text    *in = PG_GETARG_TEXT_P(1);
+   char *str;
+   QUERYTYPE  *query;
+   ITEM       *res;
+   int4        len;
+
+   str=text2char(in);
+   PG_FREE_IF_COPY(in,1);
+
+   query = queryin(str, pushval_morph, PG_GETARG_INT32(0));
+   res = clean_fakeval_v2(GETQUERY(query), &len);
+   if (!res)
+   {
+       query->len = HDRSIZEQT;
+       query->size = 0;
+       PG_RETURN_POINTER(query);
+   }
+   memcpy((void *) GETQUERY(query), (void *) res, len * sizeof(ITEM));
+   pfree(res);
+   PG_RETURN_POINTER(query);
+}
+
+Datum
+to_tsquery_name(PG_FUNCTION_ARGS) {
+   text *name=PG_GETARG_TEXT_P(0);
+   Datum res= DirectFunctionCall2(
+       to_tsquery,
+       Int32GetDatum( name2id_cfg(name) ),
+       PG_GETARG_DATUM(1)
+   );
+   
+   PG_FREE_IF_COPY(name,1);
+   PG_RETURN_DATUM(res);
+}
+
+Datum
+to_tsquery_current(PG_FUNCTION_ARGS) {
+   PG_RETURN_DATUM( DirectFunctionCall2(
+       to_tsquery,
+       Int32GetDatum( get_currcfg() ),
+       PG_GETARG_DATUM(0)
+   ));
+}
+
+


diff --git a/contrib/tsearch2/query.h b/contrib/tsearch2/query.h

new file mode 100644 (file)

index 0000000..c0715a2


--- /dev/null
+++ b/contrib/tsearch2/query.h
@@ -0,0 +1,55 @@
+#ifndef __QUERY_H__
+#define __QUERY_H__
+/*
+#define BS_DEBUG
+*/
+
+
+/*
+ * item in polish notation with back link
+ * to left operand
+ */
+typedef struct ITEM
+{
+   int8        type;
+   int8        weight;
+   int2        left;
+   int4        val;
+   /* user-friendly value, must correlate with WordEntry */
+   uint32  
+       unused:1,
+       length:11,
+       distance:20;
+}  ITEM;
+
+/*
+ *Storage:
+ * (len)(size)(array of ITEM)(array of operand in user-friendly form)
+ */
+typedef struct
+{
+   int4        len;
+   int4        size;
+   char        data[1];
+}  QUERYTYPE;
+
+#define HDRSIZEQT  ( 2*sizeof(int4) )
+#define COMPUTESIZE(size,lenofoperand) ( HDRSIZEQT + size * sizeof(ITEM) + lenofoperand )
+#define GETQUERY(x)  (ITEM*)( (char*)(x)+HDRSIZEQT )
+#define GETOPERAND(x)  ( (char*)GETQUERY(x) + ((QUERYTYPE*)x)->size * sizeof(ITEM) )
+
+#define ISOPERATOR(x) ( (x)=='!' || (x)=='&' || (x)=='|' || (x)=='(' || (x)==')' )
+
+#define END                0
+#define ERR                1
+#define VAL                2
+#define OPR                3
+#define OPEN           4
+#define CLOSE          5
+#define VALTRUE            6       /* for stop words */
+#define VALFALSE       7
+
+bool TS_execute(ITEM * curitem, void *checkval,
+       bool calcnot, bool (*chkcond) (void *checkval, ITEM * val));
+
+#endif


diff --git a/contrib/tsearch2/rank.c b/contrib/tsearch2/rank.c

new file mode 100644 (file)

index 0000000..b73f400


--- /dev/null
+++ b/contrib/tsearch2/rank.c
@@ -0,0 +1,591 @@
+/*
+ * Relevation
+ * Teodor Sigaev 
+ */
+#include "postgres.h"
+#include 
+
+#include "access/gist.h"
+#include "access/itup.h"
+#include "utils/elog.h"
+#include "utils/palloc.h"
+#include "utils/builtins.h"
+#include "fmgr.h"
+#include "funcapi.h"
+#include "storage/bufpage.h"
+#include "executor/spi.h"
+#include "commands/trigger.h"
+#include "nodes/pg_list.h"
+#include "catalog/namespace.h"
+
+#include "utils/array.h"
+
+#include "tsvector.h"
+#include "query.h"
+#include "common.h"
+
+PG_FUNCTION_INFO_V1(rank);
+Datum      rank(PG_FUNCTION_ARGS);
+
+PG_FUNCTION_INFO_V1(rank_def);
+Datum      rank_def(PG_FUNCTION_ARGS);
+
+PG_FUNCTION_INFO_V1(rank_cd);
+Datum      rank_cd(PG_FUNCTION_ARGS);
+
+PG_FUNCTION_INFO_V1(rank_cd_def);
+Datum      rank_cd_def(PG_FUNCTION_ARGS);
+
+PG_FUNCTION_INFO_V1(get_covers);
+Datum      get_covers(PG_FUNCTION_ARGS);
+
+static float weights[]={0.1, 0.2, 0.4, 1.0};
+
+#define wpos(wep)  ( w[ ((WordEntryPos*)(wep))->weight ] )
+
+#define DEF_NORM_METHOD    0
+
+/*
+ * Returns a weight of a word collocation
+ */
+static float4 word_distance ( int4 w ) {
+   if ( w>100 )
+   return 1e-30;
+
+   return 1.0/(1.005+0.05*exp( ((float4)w)/1.5-2) );
+}
+
+static int
+cnt_length( tsvector *t ) {
+   WordEntry   *ptr=ARRPTR(t), *end=(WordEntry*)STRPTR(t);
+   int len = 0, clen;
+
+   while(ptr < end) {
+       if ( (clen=POSDATALEN(t, ptr)) == 0 )
+           len += 1;
+       else
+           len += clen;
+       ptr++;
+   }
+
+   return len;
+}
+
+static int4
+WordECompareITEM(char *eval, char *qval, WordEntry * ptr, ITEM * item) {
+        if (ptr->len == item->length)
+                return strncmp(
+                                           eval + ptr->pos,
+                                           qval + item->distance,
+                                           item->length);
+
+        return (ptr->len > item->length) ? 1 : -1;
+}
+
+static WordEntry*
+find_wordentry(tsvector *t, QUERYTYPE *q, ITEM *item) {
+        WordEntry  *StopLow = ARRPTR(t);
+        WordEntry  *StopHigh = (WordEntry*)STRPTR(t);
+        WordEntry  *StopMiddle;
+        int                     difference;
+
+        /* Loop invariant: StopLow <= item < StopHigh */
+
+        while (StopLow < StopHigh)
+        {
+                StopMiddle = StopLow + (StopHigh - StopLow) / 2;
+                difference = WordECompareITEM(STRPTR(t), GETOPERAND(q), StopMiddle, item);
+                if (difference == 0)
+                        return StopMiddle;
+                else if (difference < 0)
+                        StopLow = StopMiddle + 1;
+                else
+                        StopHigh = StopMiddle;
+        }
+
+        return NULL;
+}
+
+static WordEntryPos    POSNULL[]={
+   {0,0},
+   {0,MAXENTRYPOS-1}
+};
+
+static float
+calc_rank_and(float *w, tsvector *t, QUERYTYPE *q) {
+   uint16 **pos=(uint16**)palloc(sizeof(uint16*) * q->size);
+   int i,k,l,p;
+   WordEntry *entry;
+   WordEntryPos    *post,*ct;
+   int4    dimt,lenct,dist;
+   float res=-1.0;
+   ITEM    *item=GETQUERY(q);
+
+   memset(pos,0,sizeof(uint16**) * q->size);
+   *(uint16*)POSNULL = lengthof(POSNULL)-1;
+
+   for(i=0; isize; i++) {
+       
+       if ( item[i].type != VAL )
+           continue;
+
+       entry=find_wordentry(t,q,&(item[i]));
+       if ( !entry )
+           continue;
+
+       if ( entry->haspos )
+           pos[i] = (uint16*)_POSDATAPTR(t,entry);
+       else
+           pos[i] = (uint16*)POSNULL;
+
+
+       dimt = *(uint16*)(pos[i]);
+       post = (WordEntryPos*)(pos[i]+1);
+       for( k=0; k
+           if ( !pos[k] ) continue;
+           lenct = *(uint16*)(pos[k]);
+           ct = (WordEntryPos*)(pos[k]+1);
+           for(l=0; l
+               for(p=0; p
+                   dist = abs( post[l].pos - ct[p].pos );
+                   if ( dist || (dist==0 && (pos[i]==(uint16*)POSNULL || pos[k]==(uint16*)POSNULL) ) ) {
+                       float curw; 
+                       if ( !dist ) dist=MAXENTRYPOS;  
+                       curw= sqrt( wpos(&(post[l])) * wpos( &(ct[p]) ) * word_distance(dist) );
+                       res = ( res < 0 ) ? curw : 1.0 - ( 1.0 - res ) * ( 1.0 - curw );
+                   }
+               }
+           }
+       }
+   }
+   pfree(pos);
+   return res; 
+}
+
+static float
+calc_rank_or(float *w, tsvector *t, QUERYTYPE *q) {
+   WordEntry *entry;
+   WordEntryPos    *post;
+   int4    dimt,j,i;
+   float res=-1.0;
+   ITEM    *item=GETQUERY(q);
+
+   *(uint16*)POSNULL = lengthof(POSNULL)-1;
+
+   for(i=0; isize; i++) {
+       if ( item[i].type != VAL )
+           continue;
+
+       entry=find_wordentry(t,q,&(item[i]));
+       if ( !entry )
+           continue;
+
+       if ( entry->haspos ) {
+           dimt = POSDATALEN(t,entry);
+           post = POSDATAPTR(t,entry);
+       } else {
+           dimt = *(uint16*)POSNULL;
+           post = POSNULL+1;
+       }
+
+       for(j=0;j
+           if ( res < 0 )
+               res = wpos( &(post[j]) );
+           else
+               res = 1.0 - ( 1.0-res ) * ( 1.0-wpos( &(post[j]) ) );
+       }
+   }
+   return res;
+}
+
+static float
+calc_rank(float *w, tsvector *t, QUERYTYPE *q, int4 method) {
+   ITEM *item = GETQUERY(q);
+   float res=0.0;
+
+   if (!t->size || !q->size)
+       return 0.0;
+
+   res = ( item->type != VAL && item->val == (int4) '&' ) ?
+       calc_rank_and(w,t,q) : calc_rank_or(w,t,q);
+
+   if ( res < 0 )
+       res = 1e-20;
+
+        switch(method) {
+       case 0: break;
+       case 1: res /= log((float)cnt_length(t)); break;
+       case 2: res /= (float)cnt_length(t); break;
+       default:
+       elog(ERROR,"Unknown normalization method: %d",method);
+        }
+
+   return res;
+}
+
+Datum
+rank(PG_FUNCTION_ARGS) {
+   ArrayType  *win = (ArrayType *) PG_DETOAST_DATUM(PG_GETARG_DATUM(0));
+   tsvector       *txt = (tsvector *) PG_DETOAST_DATUM(PG_GETARG_DATUM(1));
+   QUERYTYPE  *query = (QUERYTYPE *) PG_DETOAST_DATUM(PG_GETARG_DATUM(2));
+   int method=DEF_NORM_METHOD;
+   float res=0.0;
+   float ws[ lengthof(weights) ];
+   int i;
+
+   if ( ARR_NDIM(win) != 1 ) 
+       elog(ERROR,"Array of weight is not one dimentional");
+   if ( ARRNELEMS(win) < lengthof(weights) )
+        elog(ERROR,"Array of weight is too short");
+
+   for(i=0;i
+       ws[ i ] = ( ((float4*)ARR_DATA_PTR(win))[i] >= 0 ) ? ((float4*)ARR_DATA_PTR(win))[i] : weights[i];
+       if ( ws[ i ] > 1.0 ) 
+           elog(ERROR,"Weight out of range");
+   } 
+
+   if ( PG_NARGS() == 4 )
+       method=PG_GETARG_INT32(3);
+
+   res=calc_rank(ws, txt, query, method); 
+       
+   PG_FREE_IF_COPY(win, 0);
+   PG_FREE_IF_COPY(txt, 1);
+   PG_FREE_IF_COPY(query, 2);
+   PG_RETURN_FLOAT4(res);
+}
+
+Datum
+rank_def(PG_FUNCTION_ARGS) {
+   tsvector       *txt = (tsvector *) PG_DETOAST_DATUM(PG_GETARG_DATUM(0));
+   QUERYTYPE  *query = (QUERYTYPE *) PG_DETOAST_DATUM(PG_GETARG_DATUM(1));
+   float res=0.0;
+   int method=DEF_NORM_METHOD;
+
+   if ( PG_NARGS() == 3 )
+       method=PG_GETARG_INT32(2);
+
+   res=calc_rank(weights, txt, query, method); 
+       
+   PG_FREE_IF_COPY(txt, 0);
+   PG_FREE_IF_COPY(query, 1);
+   PG_RETURN_FLOAT4(res);
+}
+
+
+typedef struct {
+   ITEM    *item;
+   int32   pos;
+} DocRepresentation;
+
+static int
+compareDocR(const void *a, const void *b) {
+   if ( ((DocRepresentation *) a)->pos == ((DocRepresentation *) b)->pos )
+       return 1;
+   return ( ((DocRepresentation *) a)->pos > ((DocRepresentation *) b)->pos ) ? 1 : -1;
+}
+
+
+typedef struct {
+   DocRepresentation *doc;
+   int len;
+}  ChkDocR;
+
+static bool
+checkcondition_DR(void *checkval, ITEM *val) {
+   DocRepresentation *ptr = ((ChkDocR*)checkval)->doc;
+
+   while( ptr - ((ChkDocR*)checkval)->doc < ((ChkDocR*)checkval)->len ) {
+       if ( val == ptr->item )
+           return true;
+       ptr++;
+   }   
+
+   return false;
+}
+
+
+static bool
+Cover(DocRepresentation *doc, int len, QUERYTYPE *query, int *pos, int *p, int *q) {
+   int i;
+   DocRepresentation   *ptr,*f=(DocRepresentation*)0xffffffff;
+   ITEM    *item=GETQUERY(query);
+   int lastpos=*pos;
+   int oldq=*q;
+
+   *p=0x7fffffff;
+   *q=0;
+
+   for(i=0; isize; i++) {
+       if ( item->type != VAL ) {
+           item++;
+           continue;
+       }
+       ptr = doc + *pos;
+
+       while(ptr-doc
+           if ( ptr->item == item ) {
+               if ( ptr->pos > *q ) {
+                   *q = ptr->pos;
+                   lastpos= ptr - doc;
+               } 
+               break;
+           } 
+           ptr++;
+       }
+
+       item++;
+   }
+
+   if (*q==0 )
+       return false;
+
+   if (*q==oldq) { /* already check this pos */
+       (*pos)++;
+       return Cover(doc, len, query, pos,p,q);
+   } 
+
+   item=GETQUERY(query);
+   for(i=0; isize; i++) {
+       if ( item->type != VAL ) {
+           item++;
+           continue;
+       }
+       ptr = doc + lastpos;
+
+       while(ptr>=doc+*pos) {
+           if ( ptr->item == item ) {
+               if ( ptr->pos < *p ) {
+                   *p = ptr->pos;
+                   f=ptr;
+               }
+               break;
+           }
+           ptr--;
+       }
+       item++;
+   }
+ 
+   if ( *p<=*q ) {
+       ChkDocR ch = { f, (doc + lastpos)-f+1 };
+       *pos = f-doc+1;
+       if ( TS_execute(GETQUERY(query), &ch, false, checkcondition_DR) ) { 
+ /*elog(NOTICE,"OP:%d NP:%d P:%d Q:%d", *pos, lastpos, *p, *q);*/ 
+           return true;
+       } else
+           return Cover(doc, len, query, pos,p,q); 
+   }
+ 
+   return false;
+}
+
+static DocRepresentation*
+get_docrep(tsvector     *txt, QUERYTYPE  *query, int *doclen) {
+   ITEM    *item=GETQUERY(query);
+   WordEntry *entry;
+   WordEntryPos    *post;
+   int4    dimt,j,i;
+   int len=query->size*4,cur=0;
+   DocRepresentation *doc;
+
+   *(uint16*)POSNULL = lengthof(POSNULL)-1;
+   doc = (DocRepresentation*)palloc(sizeof(DocRepresentation)*len);
+   for(i=0; isize; i++) {
+       if ( item[i].type != VAL )
+           continue;
+
+       entry=find_wordentry(txt,query,&(item[i]));
+       if ( !entry )
+           continue;
+
+       if ( entry->haspos ) {
+           dimt = POSDATALEN(txt,entry);
+           post = POSDATAPTR(txt,entry);
+       } else {
+           dimt = *(uint16*)POSNULL;
+           post = POSNULL+1;
+       }
+
+       while( cur+dimt >= len ) {
+           len*=2;
+           doc = (DocRepresentation*)repalloc(doc,sizeof(DocRepresentation)*len);
+       }
+
+       for(j=0;j
+           doc[cur].item=&(item[i]);
+           doc[cur].pos=post[j].pos;
+           cur++;
+       }
+   }
+
+   *doclen=cur;
+   
+   if ( cur>0 ) {
+       if ( cur>1 ) 
+           qsort((void *) doc, cur, sizeof(DocRepresentation), compareDocR);
+       return doc;
+   }
+   
+   pfree(doc);
+   return NULL;
+}
+
+
+Datum
+rank_cd(PG_FUNCTION_ARGS) {
+   int K = PG_GETARG_INT32(0);
+   tsvector       *txt = (tsvector *) PG_DETOAST_DATUM(PG_GETARG_DATUM(1));
+   QUERYTYPE  *query = (QUERYTYPE *) PG_DETOAST_DATUM(PG_GETARG_DATUM(2));
+   int method=DEF_NORM_METHOD;
+   DocRepresentation   *doc;
+   float   res=0.0;
+   int p=0,q=0,len,cur;
+
+   doc = get_docrep(txt, query, &len);
+   if ( !doc ) {
+       PG_FREE_IF_COPY(txt, 1);
+       PG_FREE_IF_COPY(query, 2);
+       PG_RETURN_FLOAT4(0.0);
+   }
+
+   cur=0;
+   if (K<=0)
+       K=4;    
+   while( Cover(doc, len, query, &cur, &p, &q) ) 
+       res += ( q-p+1 > K ) ? ((float)K)/((float)(q-p+1)) : 1.0;
+
+   if ( PG_NARGS() == 4 )
+       method=PG_GETARG_INT32(3);
+
+        switch(method) {
+       case 0: break;
+       case 1: res /= log((float)cnt_length(txt)); break;
+       case 2: res /= (float)cnt_length(txt); break;
+       default:
+       elog(ERROR,"Unknown normalization method: %d",method);
+        }
+
+   pfree(doc);
+   PG_FREE_IF_COPY(txt, 1);
+   PG_FREE_IF_COPY(query, 2);
+
+   PG_RETURN_FLOAT4(res);
+}
+
+
+Datum
+rank_cd_def(PG_FUNCTION_ARGS) {
+   PG_RETURN_DATUM( DirectFunctionCall4(   
+       rank_cd,
+       Int32GetDatum(-1),
+       PG_GETARG_DATUM(0),
+       PG_GETARG_DATUM(1),
+       ( PG_NARGS() == 3 ) ? PG_GETARG_DATUM(2) : Int32GetDatum(DEF_NORM_METHOD)
+   )); 
+}
+
+/**************debug*************/
+
+typedef struct {
+   char    *w;
+   int2    len;
+   int2    pos;
+   int2    start;
+   int2    finish;
+} DocWord;
+
+static int
+compareDocWord(const void *a, const void *b) {
+   if ( ((DocWord *) a)->pos == ((DocWord *) b)->pos )
+       return 1;
+   return ( ((DocWord *) a)->pos > ((DocWord *) b)->pos ) ? 1 : -1;
+}
+
+
+Datum 
+get_covers(PG_FUNCTION_ARGS) {
+   tsvector     *txt = (tsvector *) PG_DETOAST_DATUM(PG_GETARG_DATUM(0));
+   QUERYTYPE  *query = (QUERYTYPE *) PG_DETOAST_DATUM(PG_GETARG_DATUM(1));
+   WordEntry       *pptr=ARRPTR(txt);
+   int i,dlen=0,j,cur=0,len=0,rlen;
+   DocWord *dw,*dwptr;
+   text    *out;
+   char *cptr;
+   DocRepresentation *doc;
+   int pos=0,p,q,olddwpos=0;
+   int ncover=1;
+
+   doc = get_docrep(txt, query, &rlen);
+
+   if ( !doc ) {
+       out=palloc(VARHDRSZ);
+       VARATT_SIZEP(out) = VARHDRSZ;
+       PG_FREE_IF_COPY(txt,0);
+       PG_FREE_IF_COPY(query,1);
+       PG_RETURN_POINTER(out);
+   }
+
+   for(i=0;isize;i++) {
+       if (!pptr[i].haspos)
+           elog(ERROR,"No pos info");
+        dlen += POSDATALEN(txt,&(pptr[i]));
+   }
+
+   dwptr=dw=palloc(sizeof(DocWord)*dlen);
+   memset(dw,0,sizeof(DocWord)*dlen);
+
+   for(i=0;isize;i++) {
+       WordEntryPos    *posdata = POSDATAPTR(txt,&(pptr[i]));
+       for(j=0;j
+           dw[cur].w=STRPTR(txt)+pptr[i].pos;  
+           dw[cur].len=pptr[i].len;    
+           dw[cur].pos=posdata[j].pos;
+           cur++;
+       }
+       len+=(pptr[i].len + 1) * (int)POSDATALEN(txt,&(pptr[i]));
+   }
+   qsort((void *) dw, dlen, sizeof(DocWord), compareDocWord);
+
+   while( Cover(doc, rlen, query, &pos, &p, &q) ) {
+       dwptr=dw+olddwpos;
+       while(dwptr->pos < p && dwptr-dw
+           dwptr++;
+       olddwpos=dwptr-dw;
+       dwptr->start=ncover;
+       while(dwptr->pos < q+1 && dwptr-dw
+           dwptr++;
+       (dwptr-1)->finish=ncover;
+       len+= 4 /* {}+two spaces */ + 2*16 /*numbers*/;
+       ncover++; 
+   } 
+   
+   out=palloc(VARHDRSZ+len);
+   cptr=((char*)out)+VARHDRSZ;
+   dwptr=dw;
+
+   while( dwptr-dw < dlen) {
+       if ( dwptr->start ) {
+           sprintf(cptr,"{%d ",dwptr->start);
+           cptr=strchr(cptr,'\0');
+       }
+       memcpy(cptr,dwptr->w,dwptr->len);
+       cptr+=dwptr->len;
+       *cptr=' ';
+       cptr++;
+       if ( dwptr->finish ) { 
+           sprintf(cptr,"}%d ",dwptr->finish);
+           cptr=strchr(cptr,'\0');
+       }
+       dwptr++;
+   }   
+
+   VARATT_SIZEP(out) = cptr - ((char*)out);
+   
+   pfree(dw);
+   pfree(doc);
+
+   PG_FREE_IF_COPY(txt,0);
+   PG_FREE_IF_COPY(query,1);
+   PG_RETURN_POINTER(out);
+}
+


diff --git a/contrib/tsearch2/rewrite.c b/contrib/tsearch2/rewrite.c

new file mode 100644 (file)

index 0000000..d5bc0f6


--- /dev/null
+++ b/contrib/tsearch2/rewrite.c
@@ -0,0 +1,292 @@
+/*
+ * Rewrite routines of query tree
+ * Teodor Sigaev 
+ */
+
+#include "postgres.h"
+
+#include 
+
+#include "access/gist.h"
+#include "access/itup.h"
+#include "access/rtree.h"
+#include "utils/elog.h"
+#include "utils/palloc.h"
+#include "utils/array.h"
+#include "utils/builtins.h"
+#include "storage/bufpage.h"
+
+#include "query.h"
+#include "rewrite.h"
+
+typedef struct NODE
+{
+   struct NODE *left;
+   struct NODE *right;
+   ITEM       *valnode;
+}  NODE;
+
+/*
+ * make query tree from plain view of query
+ */
+static NODE *
+maketree(ITEM * in)
+{
+   NODE       *node = (NODE *) palloc(sizeof(NODE));
+
+   node->valnode = in;
+   node->right = node->left = NULL;
+   if (in->type == OPR)
+   {
+       node->right = maketree(in + 1);
+       if (in->val != (int4) '!')
+           node->left = maketree(in + in->left);
+   }
+   return node;
+}
+
+typedef struct
+{
+   ITEM       *ptr;
+   int4        len;
+   int4        cur;
+}  PLAINTREE;
+
+static void
+plainnode(PLAINTREE * state, NODE * node)
+{
+   if (state->cur == state->len)
+   {
+       state->len *= 2;
+       state->ptr = (ITEM *) repalloc((void *) state->ptr, state->len * sizeof(ITEM));
+   }
+   memcpy((void *) &(state->ptr[state->cur]), (void *) node->valnode, sizeof(ITEM));
+   if (node->valnode->type == VAL)
+       state->cur++;
+   else if (node->valnode->val == (int4) '!')
+   {
+       state->ptr[state->cur].left = 1;
+       state->cur++;
+       plainnode(state, node->right);
+   }
+   else
+   {
+       int4        cur = state->cur;
+
+       state->cur++;
+       plainnode(state, node->right);
+       state->ptr[cur].left = state->cur - cur;
+       plainnode(state, node->left);
+   }
+   pfree(node);
+}
+
+/*
+ * make plain view of tree from 'normal' view of tree
+ */
+static ITEM *
+plaintree(NODE * root, int4 *len)
+{
+   PLAINTREE   pl;
+
+   pl.cur = 0;
+   pl.len = 16;
+   if (root && (root->valnode->type == VAL || root->valnode->type == OPR))
+   {
+       pl.ptr = (ITEM *) palloc(pl.len * sizeof(ITEM));
+       plainnode(&pl, root);
+   }
+   else
+       pl.ptr = NULL;
+   *len = pl.cur;
+   return pl.ptr;
+}
+
+static void
+freetree(NODE * node)
+{
+   if (!node)
+       return;
+   if (node->left)
+       freetree(node->left);
+   if (node->right)
+       freetree(node->right);
+   pfree(node);
+}
+
+/*
+ * clean tree for ! operator.
+ * It's usefull for debug, but in
+ * other case, such view is used with search in index.
+ * Operator ! always return TRUE
+ */
+static NODE *
+clean_NOT_intree(NODE * node)
+{
+   if (node->valnode->type == VAL)
+       return node;
+
+   if (node->valnode->val == (int4) '!')
+   {
+       freetree(node);
+       return NULL;
+   }
+
+   /* operator & or | */
+   if (node->valnode->val == (int4) '|')
+   {
+       if ((node->left = clean_NOT_intree(node->left)) == NULL ||
+           (node->right = clean_NOT_intree(node->right)) == NULL)
+       {
+           freetree(node);
+           return NULL;
+       }
+   }
+   else
+   {
+       NODE       *res = node;
+
+       node->left = clean_NOT_intree(node->left);
+       node->right = clean_NOT_intree(node->right);
+       if (node->left == NULL && node->right == NULL)
+       {
+           pfree(node);
+           res = NULL;
+       }
+       else if (node->left == NULL)
+       {
+           res = node->right;
+           pfree(node);
+       }
+       else if (node->right == NULL)
+       {
+           res = node->left;
+           pfree(node);
+       }
+       return res;
+   }
+   return node;
+}
+
+ITEM *
+clean_NOT_v2(ITEM * ptr, int4 *len)
+{
+   NODE       *root = maketree(ptr);
+
+   return plaintree(clean_NOT_intree(root), len);
+}
+
+#define V_UNKNOWN  0
+#define V_TRUE     1
+#define V_FALSE        2
+
+/*
+ * Clean query tree from values which is always in
+ * text (stopword)
+ */
+static NODE *
+clean_fakeval_intree(NODE * node, char *result)
+{
+   char        lresult = V_UNKNOWN,
+               rresult = V_UNKNOWN;
+
+   if (node->valnode->type == VAL)
+       return node;
+   else if (node->valnode->type == VALTRUE)
+   {
+       pfree(node);
+       *result = V_TRUE;
+       return NULL;
+   }
+
+
+   if (node->valnode->val == (int4) '!')
+   {
+       node->right = clean_fakeval_intree(node->right, &rresult);
+       if (!node->right)
+       {
+           *result = (rresult == V_TRUE) ? V_FALSE : V_TRUE;
+           freetree(node);
+           return NULL;
+       }
+   }
+   else if (node->valnode->val == (int4) '|')
+   {
+       NODE       *res = node;
+
+       node->left = clean_fakeval_intree(node->left, &lresult);
+       node->right = clean_fakeval_intree(node->right, &rresult);
+       if (lresult == V_TRUE || rresult == V_TRUE)
+       {
+           freetree(node);
+           *result = V_TRUE;
+           return NULL;
+       }
+       else if (lresult == V_FALSE && rresult == V_FALSE)
+       {
+           freetree(node);
+           *result = V_FALSE;
+           return NULL;
+       }
+       else if (lresult == V_FALSE)
+       {
+           res = node->right;
+           pfree(node);
+       }
+       else if (rresult == V_FALSE)
+       {
+           res = node->left;
+           pfree(node);
+       }
+       return res;
+   }
+   else
+   {
+       NODE       *res = node;
+
+       node->left = clean_fakeval_intree(node->left, &lresult);
+       node->right = clean_fakeval_intree(node->right, &rresult);
+       if (lresult == V_FALSE || rresult == V_FALSE)
+       {
+           freetree(node);
+           *result = V_FALSE;
+           return NULL;
+       }
+       else if (lresult == V_TRUE && rresult == V_TRUE)
+       {
+           freetree(node);
+           *result = V_TRUE;
+           return NULL;
+       }
+       else if (lresult == V_TRUE)
+       {
+           res = node->right;
+           pfree(node);
+       }
+       else if (rresult == V_TRUE)
+       {
+           res = node->left;
+           pfree(node);
+       }
+       return res;
+   }
+   return node;
+}
+
+ITEM *
+clean_fakeval_v2(ITEM * ptr, int4 *len)
+{
+   NODE       *root = maketree(ptr);
+   char        result = V_UNKNOWN;
+   NODE       *resroot;
+
+   resroot = clean_fakeval_intree(root, &result);
+   if (result != V_UNKNOWN)
+   {
+       elog(NOTICE, "Query contains only stopword(s) or doesn't contain lexem(s), ignored");
+       *len = 0;
+       return NULL;
+   }
+
+   return plaintree(resroot, len);
+}


diff --git a/contrib/tsearch2/rewrite.h b/contrib/tsearch2/rewrite.h

new file mode 100644 (file)

index 0000000..d47788a


--- /dev/null
+++ b/contrib/tsearch2/rewrite.h
@@ -0,0 +1,7 @@
+#ifndef __REWRITE_H__
+#define __REWRITE_H__
+
+ITEM      *clean_NOT_v2(ITEM * ptr, int4 *len);
+ITEM      *clean_fakeval_v2(ITEM * ptr, int4 *len);
+
+#endif


diff --git a/contrib/tsearch2/snmap.c b/contrib/tsearch2/snmap.c

new file mode 100644 (file)

index 0000000..fe138ad


--- /dev/null
+++ b/contrib/tsearch2/snmap.c
@@ -0,0 +1,75 @@
+/* 
+ * simple but fast map from str to Oid
+ * Teodor Sigaev 
+ */
+#include 
+#include 
+#include 
+
+#include "postgres.h"
+#include "snmap.h"
+#include "common.h"
+
+static int
+compareSNMapEntry(const void *a, const void *b) {
+   return strcmp( ((SNMapEntry*)a)->key, ((SNMapEntry*)b)->key );
+}
+
+void 
+addSNMap( SNMap *map, char *key, Oid value ) {
+   if (map->len>=map->reallen) {
+       SNMapEntry *tmp;
+       int len = (map->reallen) ? 2*map->reallen : 16;
+       tmp=(SNMapEntry*)realloc(map->list, sizeof(SNMapEntry) * len);
+       if ( !tmp )
+           elog(ERROR, "No memory");
+       map->reallen=len;
+       map->list=tmp;
+   }
+   map->list[ map->len ].key = strdup(key);
+   if ( ! map->list[ map->len ].key )
+       elog(ERROR, "No memory");
+   map->list[ map->len ].value=value;
+   map->len++;
+   if ( map->len>1 ) qsort(map->list, map->len, sizeof(SNMapEntry), compareSNMapEntry);
+}
+
+void 
+addSNMap_t( SNMap *map, text *key, Oid value ) {
+   char *k=text2char( key );
+   addSNMap(map, k, value);
+   pfree(k);
+}
+
+Oid 
+findSNMap( SNMap *map, char *key ) {
+   SNMapEntry *ptr;
+   SNMapEntry ks = {key, 0};
+   if ( map->len==0 || !map->list )
+       return 0;   
+   ptr = (SNMapEntry*) bsearch(&ks, map->list, map->len, sizeof(SNMapEntry), compareSNMapEntry);
+   return (ptr) ? ptr->value : 0;
+}
+
+Oid  
+findSNMap_t( SNMap *map, text *key ) {
+   char *k=text2char(key);
+   int res;
+   res= findSNMap(map, k);
+   pfree(k);
+   return res;
+}
+
+void freeSNMap( SNMap *map ) {
+   SNMapEntry *entry=map->list;
+   if ( map->list ) {
+       while( map->len ) {
+           if ( entry->key ) free(entry->key);
+           entry++; map->len--;
+       }
+       free( map->list );
+   }
+   memset(map,0,sizeof(SNMap));
+}
+
+


diff --git a/contrib/tsearch2/snmap.h b/contrib/tsearch2/snmap.h

new file mode 100644 (file)

index 0000000..b485601


--- /dev/null
+++ b/contrib/tsearch2/snmap.h
@@ -0,0 +1,23 @@
+#ifndef __SNMAP_H__
+#define __SNMAP_H__
+
+#include "postgres.h"
+
+typedef struct {
+   char    *key;
+   Oid value;
+} SNMapEntry;
+
+typedef struct {
+   int len;
+   int reallen;
+   SNMapEntry  *list;
+} SNMap;
+
+void addSNMap( SNMap *map, char *key, Oid value );
+void addSNMap_t( SNMap *map, text *key, Oid value );
+Oid findSNMap( SNMap *map, char *key );
+Oid findSNMap_t( SNMap *map, text *key );
+void freeSNMap( SNMap *map );
+
+#endif


diff --git a/contrib/tsearch2/snowball/api.c b/contrib/tsearch2/snowball/api.c

new file mode 100644 (file)

index 0000000..c9019ce


--- /dev/null
+++ b/contrib/tsearch2/snowball/api.c
@@ -0,0 +1,48 @@
+
+#include "header.h"
+
+extern struct SN_env * SN_create_env(int S_size, int I_size, int B_size)
+{   struct SN_env * z = (struct SN_env *) calloc(1, sizeof(struct SN_env));
+    z->p = create_s();
+    if (S_size)
+    {   z->S = (symbol * *) calloc(S_size, sizeof(symbol *));
+        {   int i;
+            for (i = 0; i < S_size; i++) z->S[i] = create_s();
+        }
+        z->S_size = S_size;
+    }
+
+    if (I_size)
+    {   z->I = (int *) calloc(I_size, sizeof(int));
+        z->I_size = I_size;
+    }
+
+    if (B_size)
+    {   z->B = (symbol *) calloc(B_size, sizeof(symbol));
+        z->B_size = B_size;
+    }
+
+    return z;
+}
+
+extern void SN_close_env(struct SN_env * z)
+{
+    if (z->S_size)
+    {
+        {   int i;
+            for (i = 0; i < z->S_size; i++) lose_s(z->S[i]);
+        }
+        free(z->S);
+    }
+    if (z->I_size) free(z->I);
+    if (z->B_size) free(z->B);
+    if (z->p) lose_s(z->p);
+    free(z);
+}
+
+extern void SN_set_current(struct SN_env * z, int size, const symbol * s)
+{
+    replace_s(z, 0, z->l, size, s);
+    z->c = 0;
+}
+


diff --git a/contrib/tsearch2/snowball/api.h b/contrib/tsearch2/snowball/api.h

new file mode 100644 (file)

index 0000000..3e8b6e1


--- /dev/null
+++ b/contrib/tsearch2/snowball/api.h
@@ -0,0 +1,27 @@
+
+typedef unsigned char symbol;
+
+/* Or replace 'char' above with 'short' for 16 bit characters.
+
+   More precisely, replace 'char' with whatever type guarantees the
+   character width you need. Note however that sizeof(symbol) should divide
+   HEAD, defined in header.h as 2*sizeof(int), without remainder, otherwise
+   there is an alignment problem. In the unlikely event of a problem here,
+   consult Martin Porter.
+
+*/
+
+struct SN_env {
+    symbol * p;
+    int c; int a; int l; int lb; int bra; int ket;
+    int S_size; int I_size; int B_size;
+    symbol * * S;
+    int * I;
+    symbol * B;
+};
+
+extern struct SN_env * SN_create_env(int S_size, int I_size, int B_size);
+extern void SN_close_env(struct SN_env * z);
+
+extern void SN_set_current(struct SN_env * z, int size, const symbol * s);
+


diff --git a/contrib/tsearch2/snowball/english_stem.c b/contrib/tsearch2/snowball/english_stem.c

new file mode 100644 (file)

index 0000000..6715c7c


--- /dev/null
+++ b/contrib/tsearch2/snowball/english_stem.c
@@ -0,0 +1,894 @@
+
+/* This file was generated automatically by the Snowball to ANSI C compiler */
+
+#include "header.h"
+
+extern int english_stem(struct SN_env * z);
+static int r_exception2(struct SN_env * z);
+static int r_exception1(struct SN_env * z);
+static int r_Step_5(struct SN_env * z);
+static int r_Step_4(struct SN_env * z);
+static int r_Step_3(struct SN_env * z);
+static int r_Step_2(struct SN_env * z);
+static int r_Step_1c(struct SN_env * z);
+static int r_Step_1b(struct SN_env * z);
+static int r_Step_1a(struct SN_env * z);
+static int r_R2(struct SN_env * z);
+static int r_R1(struct SN_env * z);
+static int r_shortv(struct SN_env * z);
+static int r_mark_regions(struct SN_env * z);
+static int r_postlude(struct SN_env * z);
+static int r_prelude(struct SN_env * z);
+
+extern struct SN_env * english_create_env(void);
+extern void english_close_env(struct SN_env * z);
+
+static symbol s_0_0[5] = { 'g', 'e', 'n', 'e', 'r' };
+
+static struct among a_0[1] =
+{
+/*  0 */ { 5, s_0_0, -1, -1, 0}
+};
+
+static symbol s_1_0[3] = { 'i', 'e', 'd' };
+static symbol s_1_1[1] = { 's' };
+static symbol s_1_2[3] = { 'i', 'e', 's' };
+static symbol s_1_3[4] = { 's', 's', 'e', 's' };
+static symbol s_1_4[2] = { 's', 's' };
+static symbol s_1_5[2] = { 'u', 's' };
+
+static struct among a_1[6] =
+{
+/*  0 */ { 3, s_1_0, -1, 2, 0},
+/*  1 */ { 1, s_1_1, -1, 3, 0},
+/*  2 */ { 3, s_1_2, 1, 2, 0},
+/*  3 */ { 4, s_1_3, 1, 1, 0},
+/*  4 */ { 2, s_1_4, 1, -1, 0},
+/*  5 */ { 2, s_1_5, 1, -1, 0}
+};
+
+static symbol s_2_1[2] = { 'b', 'b' };
+static symbol s_2_2[2] = { 'd', 'd' };
+static symbol s_2_3[2] = { 'f', 'f' };
+static symbol s_2_4[2] = { 'g', 'g' };
+static symbol s_2_5[2] = { 'b', 'l' };
+static symbol s_2_6[2] = { 'm', 'm' };
+static symbol s_2_7[2] = { 'n', 'n' };
+static symbol s_2_8[2] = { 'p', 'p' };
+static symbol s_2_9[2] = { 'r', 'r' };
+static symbol s_2_10[2] = { 'a', 't' };
+static symbol s_2_11[2] = { 't', 't' };
+static symbol s_2_12[2] = { 'i', 'z' };
+
+static struct among a_2[13] =
+{
+/*  0 */ { 0, 0, -1, 3, 0},
+/*  1 */ { 2, s_2_1, 0, 2, 0},
+/*  2 */ { 2, s_2_2, 0, 2, 0},
+/*  3 */ { 2, s_2_3, 0, 2, 0},
+/*  4 */ { 2, s_2_4, 0, 2, 0},
+/*  5 */ { 2, s_2_5, 0, 1, 0},
+/*  6 */ { 2, s_2_6, 0, 2, 0},
+/*  7 */ { 2, s_2_7, 0, 2, 0},
+/*  8 */ { 2, s_2_8, 0, 2, 0},
+/*  9 */ { 2, s_2_9, 0, 2, 0},
+/* 10 */ { 2, s_2_10, 0, 1, 0},
+/* 11 */ { 2, s_2_11, 0, 2, 0},
+/* 12 */ { 2, s_2_12, 0, 1, 0}
+};
+
+static symbol s_3_0[2] = { 'e', 'd' };
+static symbol s_3_1[3] = { 'e', 'e', 'd' };
+static symbol s_3_2[3] = { 'i', 'n', 'g' };
+static symbol s_3_3[4] = { 'e', 'd', 'l', 'y' };
+static symbol s_3_4[5] = { 'e', 'e', 'd', 'l', 'y' };
+static symbol s_3_5[5] = { 'i', 'n', 'g', 'l', 'y' };
+
+static struct among a_3[6] =
+{
+/*  0 */ { 2, s_3_0, -1, 2, 0},
+/*  1 */ { 3, s_3_1, 0, 1, 0},
+/*  2 */ { 3, s_3_2, -1, 2, 0},
+/*  3 */ { 4, s_3_3, -1, 2, 0},
+/*  4 */ { 5, s_3_4, 3, 1, 0},
+/*  5 */ { 5, s_3_5, -1, 2, 0}
+};
+
+static symbol s_4_0[4] = { 'a', 'n', 'c', 'i' };
+static symbol s_4_1[4] = { 'e', 'n', 'c', 'i' };
+static symbol s_4_2[3] = { 'o', 'g', 'i' };
+static symbol s_4_3[2] = { 'l', 'i' };
+static symbol s_4_4[3] = { 'b', 'l', 'i' };
+static symbol s_4_5[4] = { 'a', 'b', 'l', 'i' };
+static symbol s_4_6[4] = { 'a', 'l', 'l', 'i' };
+static symbol s_4_7[5] = { 'f', 'u', 'l', 'l', 'i' };
+static symbol s_4_8[6] = { 'l', 'e', 's', 's', 'l', 'i' };
+static symbol s_4_9[5] = { 'o', 'u', 's', 'l', 'i' };
+static symbol s_4_10[5] = { 'e', 'n', 't', 'l', 'i' };
+static symbol s_4_11[5] = { 'a', 'l', 'i', 't', 'i' };
+static symbol s_4_12[6] = { 'b', 'i', 'l', 'i', 't', 'i' };
+static symbol s_4_13[5] = { 'i', 'v', 'i', 't', 'i' };
+static symbol s_4_14[6] = { 't', 'i', 'o', 'n', 'a', 'l' };
+static symbol s_4_15[7] = { 'a', 't', 'i', 'o', 'n', 'a', 'l' };
+static symbol s_4_16[5] = { 'a', 'l', 'i', 's', 'm' };
+static symbol s_4_17[5] = { 'a', 't', 'i', 'o', 'n' };
+static symbol s_4_18[7] = { 'i', 'z', 'a', 't', 'i', 'o', 'n' };
+static symbol s_4_19[4] = { 'i', 'z', 'e', 'r' };
+static symbol s_4_20[4] = { 'a', 't', 'o', 'r' };
+static symbol s_4_21[7] = { 'i', 'v', 'e', 'n', 'e', 's', 's' };
+static symbol s_4_22[7] = { 'f', 'u', 'l', 'n', 'e', 's', 's' };
+static symbol s_4_23[7] = { 'o', 'u', 's', 'n', 'e', 's', 's' };
+
+static struct among a_4[24] =
+{
+/*  0 */ { 4, s_4_0, -1, 3, 0},
+/*  1 */ { 4, s_4_1, -1, 2, 0},
+/*  2 */ { 3, s_4_2, -1, 13, 0},
+/*  3 */ { 2, s_4_3, -1, 16, 0},
+/*  4 */ { 3, s_4_4, 3, 12, 0},
+/*  5 */ { 4, s_4_5, 4, 4, 0},
+/*  6 */ { 4, s_4_6, 3, 8, 0},
+/*  7 */ { 5, s_4_7, 3, 14, 0},
+/*  8 */ { 6, s_4_8, 3, 15, 0},
+/*  9 */ { 5, s_4_9, 3, 10, 0},
+/* 10 */ { 5, s_4_10, 3, 5, 0},
+/* 11 */ { 5, s_4_11, -1, 8, 0},
+/* 12 */ { 6, s_4_12, -1, 12, 0},
+/* 13 */ { 5, s_4_13, -1, 11, 0},
+/* 14 */ { 6, s_4_14, -1, 1, 0},
+/* 15 */ { 7, s_4_15, 14, 7, 0},
+/* 16 */ { 5, s_4_16, -1, 8, 0},
+/* 17 */ { 5, s_4_17, -1, 7, 0},
+/* 18 */ { 7, s_4_18, 17, 6, 0},
+/* 19 */ { 4, s_4_19, -1, 6, 0},
+/* 20 */ { 4, s_4_20, -1, 7, 0},
+/* 21 */ { 7, s_4_21, -1, 11, 0},
+/* 22 */ { 7, s_4_22, -1, 9, 0},
+/* 23 */ { 7, s_4_23, -1, 10, 0}
+};
+
+static symbol s_5_0[5] = { 'i', 'c', 'a', 't', 'e' };
+static symbol s_5_1[5] = { 'a', 't', 'i', 'v', 'e' };
+static symbol s_5_2[5] = { 'a', 'l', 'i', 'z', 'e' };
+static symbol s_5_3[5] = { 'i', 'c', 'i', 't', 'i' };
+static symbol s_5_4[4] = { 'i', 'c', 'a', 'l' };
+static symbol s_5_5[6] = { 't', 'i', 'o', 'n', 'a', 'l' };
+static symbol s_5_6[7] = { 'a', 't', 'i', 'o', 'n', 'a', 'l' };
+static symbol s_5_7[3] = { 'f', 'u', 'l' };
+static symbol s_5_8[4] = { 'n', 'e', 's', 's' };
+
+static struct among a_5[9] =
+{
+/*  0 */ { 5, s_5_0, -1, 4, 0},
+/*  1 */ { 5, s_5_1, -1, 6, 0},
+/*  2 */ { 5, s_5_2, -1, 3, 0},
+/*  3 */ { 5, s_5_3, -1, 4, 0},
+/*  4 */ { 4, s_5_4, -1, 4, 0},
+/*  5 */ { 6, s_5_5, -1, 1, 0},
+/*  6 */ { 7, s_5_6, 5, 2, 0},
+/*  7 */ { 3, s_5_7, -1, 5, 0},
+/*  8 */ { 4, s_5_8, -1, 5, 0}
+};
+
+static symbol s_6_0[2] = { 'i', 'c' };
+static symbol s_6_1[4] = { 'a', 'n', 'c', 'e' };
+static symbol s_6_2[4] = { 'e', 'n', 'c', 'e' };
+static symbol s_6_3[4] = { 'a', 'b', 'l', 'e' };
+static symbol s_6_4[4] = { 'i', 'b', 'l', 'e' };
+static symbol s_6_5[3] = { 'a', 't', 'e' };
+static symbol s_6_6[3] = { 'i', 'v', 'e' };
+static symbol s_6_7[3] = { 'i', 'z', 'e' };
+static symbol s_6_8[3] = { 'i', 't', 'i' };
+static symbol s_6_9[2] = { 'a', 'l' };
+static symbol s_6_10[3] = { 'i', 's', 'm' };
+static symbol s_6_11[3] = { 'i', 'o', 'n' };
+static symbol s_6_12[2] = { 'e', 'r' };
+static symbol s_6_13[3] = { 'o', 'u', 's' };
+static symbol s_6_14[3] = { 'a', 'n', 't' };
+static symbol s_6_15[3] = { 'e', 'n', 't' };
+static symbol s_6_16[4] = { 'm', 'e', 'n', 't' };
+static symbol s_6_17[5] = { 'e', 'm', 'e', 'n', 't' };
+
+static struct among a_6[18] =
+{
+/*  0 */ { 2, s_6_0, -1, 1, 0},
+/*  1 */ { 4, s_6_1, -1, 1, 0},
+/*  2 */ { 4, s_6_2, -1, 1, 0},
+/*  3 */ { 4, s_6_3, -1, 1, 0},
+/*  4 */ { 4, s_6_4, -1, 1, 0},
+/*  5 */ { 3, s_6_5, -1, 1, 0},
+/*  6 */ { 3, s_6_6, -1, 1, 0},
+/*  7 */ { 3, s_6_7, -1, 1, 0},
+/*  8 */ { 3, s_6_8, -1, 1, 0},
+/*  9 */ { 2, s_6_9, -1, 1, 0},
+/* 10 */ { 3, s_6_10, -1, 1, 0},
+/* 11 */ { 3, s_6_11, -1, 2, 0},
+/* 12 */ { 2, s_6_12, -1, 1, 0},
+/* 13 */ { 3, s_6_13, -1, 1, 0},
+/* 14 */ { 3, s_6_14, -1, 1, 0},
+/* 15 */ { 3, s_6_15, -1, 1, 0},
+/* 16 */ { 4, s_6_16, 15, 1, 0},
+/* 17 */ { 5, s_6_17, 16, 1, 0}
+};
+
+static symbol s_7_0[1] = { 'e' };
+static symbol s_7_1[1] = { 'l' };
+
+static struct among a_7[2] =
+{
+/*  0 */ { 1, s_7_0, -1, 1, 0},
+/*  1 */ { 1, s_7_1, -1, 2, 0}
+};
+
+static symbol s_8_0[7] = { 's', 'u', 'c', 'c', 'e', 'e', 'd' };
+static symbol s_8_1[7] = { 'p', 'r', 'o', 'c', 'e', 'e', 'd' };
+static symbol s_8_2[6] = { 'e', 'x', 'c', 'e', 'e', 'd' };
+static symbol s_8_3[7] = { 'c', 'a', 'n', 'n', 'i', 'n', 'g' };
+static symbol s_8_4[6] = { 'i', 'n', 'n', 'i', 'n', 'g' };
+static symbol s_8_5[7] = { 'e', 'a', 'r', 'r', 'i', 'n', 'g' };
+static symbol s_8_6[7] = { 'h', 'e', 'r', 'r', 'i', 'n', 'g' };
+static symbol s_8_7[6] = { 'o', 'u', 't', 'i', 'n', 'g' };
+
+static struct among a_8[8] =
+{
+/*  0 */ { 7, s_8_0, -1, -1, 0},
+/*  1 */ { 7, s_8_1, -1, -1, 0},
+/*  2 */ { 6, s_8_2, -1, -1, 0},
+/*  3 */ { 7, s_8_3, -1, -1, 0},
+/*  4 */ { 6, s_8_4, -1, -1, 0},
+/*  5 */ { 7, s_8_5, -1, -1, 0},
+/*  6 */ { 7, s_8_6, -1, -1, 0},
+/*  7 */ { 6, s_8_7, -1, -1, 0}
+};
+
+static symbol s_9_0[5] = { 'a', 'n', 'd', 'e', 's' };
+static symbol s_9_1[5] = { 'a', 't', 'l', 'a', 's' };
+static symbol s_9_2[4] = { 'b', 'i', 'a', 's' };
+static symbol s_9_3[6] = { 'c', 'o', 's', 'm', 'o', 's' };
+static symbol s_9_4[5] = { 'd', 'y', 'i', 'n', 'g' };
+static symbol s_9_5[5] = { 'e', 'a', 'r', 'l', 'y' };
+static symbol s_9_6[6] = { 'g', 'e', 'n', 't', 'l', 'y' };
+static symbol s_9_7[4] = { 'h', 'o', 'w', 'e' };
+static symbol s_9_8[4] = { 'i', 'd', 'l', 'y' };
+static symbol s_9_9[5] = { 'l', 'y', 'i', 'n', 'g' };
+static symbol s_9_10[4] = { 'n', 'e', 'w', 's' };
+static symbol s_9_11[4] = { 'o', 'n', 'l', 'y' };
+static symbol s_9_12[6] = { 's', 'i', 'n', 'g', 'l', 'y' };
+static symbol s_9_13[5] = { 's', 'k', 'i', 'e', 's' };
+static symbol s_9_14[4] = { 's', 'k', 'i', 's' };
+static symbol s_9_15[3] = { 's', 'k', 'y' };
+static symbol s_9_16[5] = { 't', 'y', 'i', 'n', 'g' };
+static symbol s_9_17[4] = { 'u', 'g', 'l', 'y' };
+
+static struct among a_9[18] =
+{
+/*  0 */ { 5, s_9_0, -1, -1, 0},
+/*  1 */ { 5, s_9_1, -1, -1, 0},
+/*  2 */ { 4, s_9_2, -1, -1, 0},
+/*  3 */ { 6, s_9_3, -1, -1, 0},
+/*  4 */ { 5, s_9_4, -1, 3, 0},
+/*  5 */ { 5, s_9_5, -1, 9, 0},
+/*  6 */ { 6, s_9_6, -1, 7, 0},
+/*  7 */ { 4, s_9_7, -1, -1, 0},
+/*  8 */ { 4, s_9_8, -1, 6, 0},
+/*  9 */ { 5, s_9_9, -1, 4, 0},
+/* 10 */ { 4, s_9_10, -1, -1, 0},
+/* 11 */ { 4, s_9_11, -1, 10, 0},
+/* 12 */ { 6, s_9_12, -1, 11, 0},
+/* 13 */ { 5, s_9_13, -1, 2, 0},
+/* 14 */ { 4, s_9_14, -1, 1, 0},
+/* 15 */ { 3, s_9_15, -1, -1, 0},
+/* 16 */ { 5, s_9_16, -1, 5, 0},
+/* 17 */ { 4, s_9_17, -1, 8, 0}
+};
+
+static unsigned char g_v[] = { 17, 65, 16, 1 };
+
+static unsigned char g_v_WXY[] = { 1, 17, 65, 208, 1 };
+
+static unsigned char g_valid_LI[] = { 55, 141, 2 };
+
+static symbol s_0[] = { 'y' };
+static symbol s_1[] = { 'Y' };
+static symbol s_2[] = { 'y' };
+static symbol s_3[] = { 'Y' };
+static symbol s_4[] = { 's', 's' };
+static symbol s_5[] = { 'i', 'e' };
+static symbol s_6[] = { 'i' };
+static symbol s_7[] = { 'e', 'e' };
+static symbol s_8[] = { 'e' };
+static symbol s_9[] = { 'e' };
+static symbol s_10[] = { 'y' };
+static symbol s_11[] = { 'Y' };
+static symbol s_12[] = { 'i' };
+static symbol s_13[] = { 't', 'i', 'o', 'n' };
+static symbol s_14[] = { 'e', 'n', 'c', 'e' };
+static symbol s_15[] = { 'a', 'n', 'c', 'e' };
+static symbol s_16[] = { 'a', 'b', 'l', 'e' };
+static symbol s_17[] = { 'e', 'n', 't' };
+static symbol s_18[] = { 'i', 'z', 'e' };
+static symbol s_19[] = { 'a', 't', 'e' };
+static symbol s_20[] = { 'a', 'l' };
+static symbol s_21[] = { 'f', 'u', 'l' };
+static symbol s_22[] = { 'o', 'u', 's' };
+static symbol s_23[] = { 'i', 'v', 'e' };
+static symbol s_24[] = { 'b', 'l', 'e' };
+static symbol s_25[] = { 'l' };
+static symbol s_26[] = { 'o', 'g' };
+static symbol s_27[] = { 'f', 'u', 'l' };
+static symbol s_28[] = { 'l', 'e', 's', 's' };
+static symbol s_29[] = { 't', 'i', 'o', 'n' };
+static symbol s_30[] = { 'a', 't', 'e' };
+static symbol s_31[] = { 'a', 'l' };
+static symbol s_32[] = { 'i', 'c' };
+static symbol s_33[] = { 's' };
+static symbol s_34[] = { 't' };
+static symbol s_35[] = { 'l' };
+static symbol s_36[] = { 's', 'k', 'i' };
+static symbol s_37[] = { 's', 'k', 'y' };
+static symbol s_38[] = { 'd', 'i', 'e' };
+static symbol s_39[] = { 'l', 'i', 'e' };
+static symbol s_40[] = { 't', 'i', 'e' };
+static symbol s_41[] = { 'i', 'd', 'l' };
+static symbol s_42[] = { 'g', 'e', 'n', 't', 'l' };
+static symbol s_43[] = { 'u', 'g', 'l', 'i' };
+static symbol s_44[] = { 'e', 'a', 'r', 'l', 'i' };
+static symbol s_45[] = { 'o', 'n', 'l', 'i' };
+static symbol s_46[] = { 's', 'i', 'n', 'g', 'l' };
+static symbol s_47[] = { 'Y' };
+static symbol s_48[] = { 'y' };
+
+static int r_prelude(struct SN_env * z) {
+    z->B[0] = 0; /* unset Y_found, line 24 */
+    {   int c = z->c; /* do, line 25 */
+        z->bra = z->c; /* [, line 25 */
+        if (!(eq_s(z, 1, s_0))) goto lab0;
+        z->ket = z->c; /* ], line 25 */
+        if (!(in_grouping(z, g_v, 97, 121))) goto lab0;
+        slice_from_s(z, 1, s_1); /* <-, line 25 */
+        z->B[0] = 1; /* set Y_found, line 25 */
+    lab0:
+        z->c = c;
+    }
+    {   int c = z->c; /* do, line 26 */
+        while(1) { /* repeat, line 26 */
+            int c = z->c;
+            while(1) { /* goto, line 26 */
+                int c = z->c;
+                if (!(in_grouping(z, g_v, 97, 121))) goto lab3;
+                z->bra = z->c; /* [, line 26 */
+                if (!(eq_s(z, 1, s_2))) goto lab3;
+                z->ket = z->c; /* ], line 26 */
+                z->c = c;
+                break;
+            lab3:
+                z->c = c;
+                if (z->c >= z->l) goto lab2;
+                z->c++;
+            }
+            slice_from_s(z, 1, s_3); /* <-, line 26 */
+            z->B[0] = 1; /* set Y_found, line 26 */
+            continue;
+        lab2:
+            z->c = c;
+            break;
+        }
+    lab1:
+        z->c = c;
+    }
+    return 1;
+}
+
+static int r_mark_regions(struct SN_env * z) {
+    z->I[0] = z->l;
+    z->I[1] = z->l;
+    {   int c = z->c; /* do, line 32 */
+        {   int c = z->c; /* or, line 36 */
+            if (!(find_among(z, a_0, 1))) goto lab2; /* among, line 33 */
+            goto lab1;
+        lab2:
+            z->c = c;
+            while(1) { /* gopast, line 36 */
+                if (!(in_grouping(z, g_v, 97, 121))) goto lab3;
+                break;
+            lab3:
+                if (z->c >= z->l) goto lab0;
+                z->c++;
+            }
+            while(1) { /* gopast, line 36 */
+                if (!(out_grouping(z, g_v, 97, 121))) goto lab4;
+                break;
+            lab4:
+                if (z->c >= z->l) goto lab0;
+                z->c++;
+            }
+        }
+    lab1:
+        z->I[0] = z->c; /* setmark p1, line 37 */
+        while(1) { /* gopast, line 38 */
+            if (!(in_grouping(z, g_v, 97, 121))) goto lab5;
+            break;
+        lab5:
+            if (z->c >= z->l) goto lab0;
+            z->c++;
+        }
+        while(1) { /* gopast, line 38 */
+            if (!(out_grouping(z, g_v, 97, 121))) goto lab6;
+            break;
+        lab6:
+            if (z->c >= z->l) goto lab0;
+            z->c++;
+        }
+        z->I[1] = z->c; /* setmark p2, line 38 */
+    lab0:
+        z->c = c;
+    }
+    return 1;
+}
+
+static int r_shortv(struct SN_env * z) {
+    {   int m = z->l - z->c; /* or, line 46 */
+        if (!(out_grouping_b(z, g_v_WXY, 89, 121))) goto lab1;
+        if (!(in_grouping_b(z, g_v, 97, 121))) goto lab1;
+        if (!(out_grouping_b(z, g_v, 97, 121))) goto lab1;
+        goto lab0;
+    lab1:
+        z->c = z->l - m;
+        if (!(out_grouping_b(z, g_v, 97, 121))) return 0;
+        if (!(in_grouping_b(z, g_v, 97, 121))) return 0;
+        if (z->c > z->lb) return 0; /* atlimit, line 47 */
+    }
+lab0:
+    return 1;
+}
+
+static int r_R1(struct SN_env * z) {
+    if (!(z->I[0] <= z->c)) return 0;
+    return 1;
+}
+
+static int r_R2(struct SN_env * z) {
+    if (!(z->I[1] <= z->c)) return 0;
+    return 1;
+}
+
+static int r_Step_1a(struct SN_env * z) {
+    int among_var;
+    z->ket = z->c; /* [, line 54 */
+    among_var = find_among_b(z, a_1, 6); /* substring, line 54 */
+    if (!(among_var)) return 0;
+    z->bra = z->c; /* ], line 54 */
+    switch(among_var) {
+        case 0: return 0;
+        case 1:
+            slice_from_s(z, 2, s_4); /* <-, line 55 */
+            break;
+        case 2:
+            {   int m = z->l - z->c; /* or, line 57 */
+                if (z->c <= z->lb) goto lab1;
+                z->c--; /* next, line 57 */
+                if (z->c > z->lb) goto lab1; /* atlimit, line 57 */
+                slice_from_s(z, 2, s_5); /* <-, line 57 */
+                goto lab0;
+            lab1:
+                z->c = z->l - m;
+                slice_from_s(z, 1, s_6); /* <-, line 57 */
+            }
+        lab0:
+            break;
+        case 3:
+            if (z->c <= z->lb) return 0;
+            z->c--; /* next, line 58 */
+            while(1) { /* gopast, line 58 */
+                if (!(in_grouping_b(z, g_v, 97, 121))) goto lab2;
+                break;
+            lab2:
+                if (z->c <= z->lb) return 0;
+                z->c--;
+            }
+            slice_del(z); /* delete, line 58 */
+            break;
+    }
+    return 1;
+}
+
+static int r_Step_1b(struct SN_env * z) {
+    int among_var;
+    z->ket = z->c; /* [, line 64 */
+    among_var = find_among_b(z, a_3, 6); /* substring, line 64 */
+    if (!(among_var)) return 0;
+    z->bra = z->c; /* ], line 64 */
+    switch(among_var) {
+        case 0: return 0;
+        case 1:
+            if (!r_R1(z)) return 0; /* call R1, line 66 */
+            slice_from_s(z, 2, s_7); /* <-, line 66 */
+            break;
+        case 2:
+            {   int m_test = z->l - z->c; /* test, line 69 */
+                while(1) { /* gopast, line 69 */
+                    if (!(in_grouping_b(z, g_v, 97, 121))) goto lab0;
+                    break;
+                lab0:
+                    if (z->c <= z->lb) return 0;
+                    z->c--;
+                }
+                z->c = z->l - m_test;
+            }
+            slice_del(z); /* delete, line 69 */
+            {   int m_test = z->l - z->c; /* test, line 70 */
+                among_var = find_among_b(z, a_2, 13); /* substring, line 70 */
+                if (!(among_var)) return 0;
+                z->c = z->l - m_test;
+            }
+            switch(among_var) {
+                case 0: return 0;
+                case 1:
+                    {   int c = z->c;
+                        insert_s(z, z->c, z->c, 1, s_8); /* <+, line 72 */
+                        z->c = c;
+                    }
+                    break;
+                case 2:
+                    z->ket = z->c; /* [, line 75 */
+                    if (z->c <= z->lb) return 0;
+                    z->c--; /* next, line 75 */
+                    z->bra = z->c; /* ], line 75 */
+                    slice_del(z); /* delete, line 75 */
+                    break;
+                case 3:
+                    if (z->c != z->I[0]) return 0; /* atmark, line 76 */
+                    {   int m_test = z->l - z->c; /* test, line 76 */
+                        if (!r_shortv(z)) return 0; /* call shortv, line 76 */
+                        z->c = z->l - m_test;
+                    }
+                    {   int c = z->c;
+                        insert_s(z, z->c, z->c, 1, s_9); /* <+, line 76 */
+                        z->c = c;
+                    }
+                    break;
+            }
+            break;
+    }
+    return 1;
+}
+
+static int r_Step_1c(struct SN_env * z) {
+    z->ket = z->c; /* [, line 83 */
+    {   int m = z->l - z->c; /* or, line 83 */
+        if (!(eq_s_b(z, 1, s_10))) goto lab1;
+        goto lab0;
+    lab1:
+        z->c = z->l - m;
+        if (!(eq_s_b(z, 1, s_11))) return 0;
+    }
+lab0:
+    z->bra = z->c; /* ], line 83 */
+    if (!(out_grouping_b(z, g_v, 97, 121))) return 0;
+    {   int m = z->l - z->c; /* not, line 84 */
+        if (z->c > z->lb) goto lab2; /* atlimit, line 84 */
+        return 0;
+    lab2:
+        z->c = z->l - m;
+    }
+    slice_from_s(z, 1, s_12); /* <-, line 85 */
+    return 1;
+}
+
+static int r_Step_2(struct SN_env * z) {
+    int among_var;
+    z->ket = z->c; /* [, line 89 */
+    among_var = find_among_b(z, a_4, 24); /* substring, line 89 */
+    if (!(among_var)) return 0;
+    z->bra = z->c; /* ], line 89 */
+    if (!r_R1(z)) return 0; /* call R1, line 89 */
+    switch(among_var) {
+        case 0: return 0;
+        case 1:
+            slice_from_s(z, 4, s_13); /* <-, line 90 */
+            break;
+        case 2:
+            slice_from_s(z, 4, s_14); /* <-, line 91 */
+            break;
+        case 3:
+            slice_from_s(z, 4, s_15); /* <-, line 92 */
+            break;
+        case 4:
+            slice_from_s(z, 4, s_16); /* <-, line 93 */
+            break;
+        case 5:
+            slice_from_s(z, 3, s_17); /* <-, line 94 */
+            break;
+        case 6:
+            slice_from_s(z, 3, s_18); /* <-, line 96 */
+            break;
+        case 7:
+            slice_from_s(z, 3, s_19); /* <-, line 98 */
+            break;
+        case 8:
+            slice_from_s(z, 2, s_20); /* <-, line 100 */
+            break;
+        case 9:
+            slice_from_s(z, 3, s_21); /* <-, line 101 */
+            break;
+        case 10:
+            slice_from_s(z, 3, s_22); /* <-, line 103 */
+            break;
+        case 11:
+            slice_from_s(z, 3, s_23); /* <-, line 105 */
+            break;
+        case 12:
+            slice_from_s(z, 3, s_24); /* <-, line 107 */
+            break;
+        case 13:
+            if (!(eq_s_b(z, 1, s_25))) return 0;
+            slice_from_s(z, 2, s_26); /* <-, line 108 */
+            break;
+        case 14:
+            slice_from_s(z, 3, s_27); /* <-, line 109 */
+            break;
+        case 15:
+            slice_from_s(z, 4, s_28); /* <-, line 110 */
+            break;
+        case 16:
+            if (!(in_grouping_b(z, g_valid_LI, 99, 116))) return 0;
+            slice_del(z); /* delete, line 111 */
+            break;
+    }
+    return 1;
+}
+
+static int r_Step_3(struct SN_env * z) {
+    int among_var;
+    z->ket = z->c; /* [, line 116 */
+    among_var = find_among_b(z, a_5, 9); /* substring, line 116 */
+    if (!(among_var)) return 0;
+    z->bra = z->c; /* ], line 116 */
+    if (!r_R1(z)) return 0; /* call R1, line 116 */
+    switch(among_var) {
+        case 0: return 0;
+        case 1:
+            slice_from_s(z, 4, s_29); /* <-, line 117 */
+            break;
+        case 2:
+            slice_from_s(z, 3, s_30); /* <-, line 118 */
+            break;
+        case 3:
+            slice_from_s(z, 2, s_31); /* <-, line 119 */
+            break;
+        case 4:
+            slice_from_s(z, 2, s_32); /* <-, line 121 */
+            break;
+        case 5:
+            slice_del(z); /* delete, line 123 */
+            break;
+        case 6:
+            if (!r_R2(z)) return 0; /* call R2, line 125 */
+            slice_del(z); /* delete, line 125 */
+            break;
+    }
+    return 1;
+}
+
+static int r_Step_4(struct SN_env * z) {
+    int among_var;
+    z->ket = z->c; /* [, line 130 */
+    among_var = find_among_b(z, a_6, 18); /* substring, line 130 */
+    if (!(among_var)) return 0;
+    z->bra = z->c; /* ], line 130 */
+    if (!r_R2(z)) return 0; /* call R2, line 130 */
+    switch(among_var) {
+        case 0: return 0;
+        case 1:
+            slice_del(z); /* delete, line 133 */
+            break;
+        case 2:
+            {   int m = z->l - z->c; /* or, line 134 */
+                if (!(eq_s_b(z, 1, s_33))) goto lab1;
+                goto lab0;
+            lab1:
+                z->c = z->l - m;
+                if (!(eq_s_b(z, 1, s_34))) return 0;
+            }
+        lab0:
+            slice_del(z); /* delete, line 134 */
+            break;
+    }
+    return 1;
+}
+
+static int r_Step_5(struct SN_env * z) {
+    int among_var;
+    z->ket = z->c; /* [, line 139 */
+    among_var = find_among_b(z, a_7, 2); /* substring, line 139 */
+    if (!(among_var)) return 0;
+    z->bra = z->c; /* ], line 139 */
+    switch(among_var) {
+        case 0: return 0;
+        case 1:
+            {   int m = z->l - z->c; /* or, line 140 */
+                if (!r_R2(z)) goto lab1; /* call R2, line 140 */
+                goto lab0;
+            lab1:
+                z->c = z->l - m;
+                if (!r_R1(z)) return 0; /* call R1, line 140 */
+                {   int m = z->l - z->c; /* not, line 140 */
+                    if (!r_shortv(z)) goto lab2; /* call shortv, line 140 */
+                    return 0;
+                lab2:
+                    z->c = z->l - m;
+                }
+            }
+        lab0:
+            slice_del(z); /* delete, line 140 */
+            break;
+        case 2:
+            if (!r_R2(z)) return 0; /* call R2, line 141 */
+            if (!(eq_s_b(z, 1, s_35))) return 0;
+            slice_del(z); /* delete, line 141 */
+            break;
+    }
+    return 1;
+}
+
+static int r_exception2(struct SN_env * z) {
+    z->ket = z->c; /* [, line 147 */
+    if (!(find_among_b(z, a_8, 8))) return 0; /* substring, line 147 */
+    z->bra = z->c; /* ], line 147 */
+    if (z->c > z->lb) return 0; /* atlimit, line 147 */
+    return 1;
+}
+
+static int r_exception1(struct SN_env * z) {
+    int among_var;
+    z->bra = z->c; /* [, line 159 */
+    among_var = find_among(z, a_9, 18); /* substring, line 159 */
+    if (!(among_var)) return 0;
+    z->ket = z->c; /* ], line 159 */
+    if (z->c < z->l) return 0; /* atlimit, line 159 */
+    switch(among_var) {
+        case 0: return 0;
+        case 1:
+            slice_from_s(z, 3, s_36); /* <-, line 163 */
+            break;
+        case 2:
+            slice_from_s(z, 3, s_37); /* <-, line 164 */
+            break;
+        case 3:
+            slice_from_s(z, 3, s_38); /* <-, line 165 */
+            break;
+        case 4:
+            slice_from_s(z, 3, s_39); /* <-, line 166 */
+            break;
+        case 5:
+            slice_from_s(z, 3, s_40); /* <-, line 167 */
+            break;
+        case 6:
+            slice_from_s(z, 3, s_41); /* <-, line 171 */
+            break;
+        case 7:
+            slice_from_s(z, 5, s_42); /* <-, line 172 */
+            break;
+        case 8:
+            slice_from_s(z, 4, s_43); /* <-, line 173 */
+            break;
+        case 9:
+            slice_from_s(z, 5, s_44); /* <-, line 174 */
+            break;
+        case 10:
+            slice_from_s(z, 4, s_45); /* <-, line 175 */
+            break;
+        case 11:
+            slice_from_s(z, 5, s_46); /* <-, line 176 */
+            break;
+    }
+    return 1;
+}
+
+static int r_postlude(struct SN_env * z) {
+    if (!(z->B[0])) return 0; /* Boolean test Y_found, line 192 */
+    while(1) { /* repeat, line 192 */
+        int c = z->c;
+        while(1) { /* goto, line 192 */
+            int c = z->c;
+            z->bra = z->c; /* [, line 192 */
+            if (!(eq_s(z, 1, s_47))) goto lab1;
+            z->ket = z->c; /* ], line 192 */
+            z->c = c;
+            break;
+        lab1:
+            z->c = c;
+            if (z->c >= z->l) goto lab0;
+            z->c++;
+        }
+        slice_from_s(z, 1, s_48); /* <-, line 192 */
+        continue;
+    lab0:
+        z->c = c;
+        break;
+    }
+    return 1;
+}
+
+extern int english_stem(struct SN_env * z) {
+    {   int c = z->c; /* or, line 196 */
+        if (!r_exception1(z)) goto lab1; /* call exception1, line 196 */
+        goto lab0;
+    lab1:
+        z->c = c;
+        {   int c_test = z->c; /* test, line 198 */
+            {   int c = z->c + 3;
+                if (0 > c || c > z->l) return 0;
+                z->c = c; /* hop, line 198 */
+            }
+            z->c = c_test;
+        }
+        {   int c = z->c; /* do, line 199 */
+            if (!r_prelude(z)) goto lab2; /* call prelude, line 199 */
+        lab2:
+            z->c = c;
+        }
+        {   int c = z->c; /* do, line 200 */
+            if (!r_mark_regions(z)) goto lab3; /* call mark_regions, line 200 */
+        lab3:
+            z->c = c;
+        }
+        z->lb = z->c; z->c = z->l; /* backwards, line 201 */
+
+        {   int m = z->l - z->c; /* do, line 203 */
+            if (!r_Step_1a(z)) goto lab4; /* call Step_1a, line 203 */
+        lab4:
+            z->c = z->l - m;
+        }
+        {   int m = z->l - z->c; /* or, line 205 */
+            if (!r_exception2(z)) goto lab6; /* call exception2, line 205 */
+            goto lab5;
+        lab6:
+            z->c = z->l - m;
+            {   int m = z->l - z->c; /* do, line 207 */
+                if (!r_Step_1b(z)) goto lab7; /* call Step_1b, line 207 */
+            lab7:
+                z->c = z->l - m;
+            }
+            {   int m = z->l - z->c; /* do, line 208 */
+                if (!r_Step_1c(z)) goto lab8; /* call Step_1c, line 208 */
+            lab8:
+                z->c = z->l - m;
+            }
+            {   int m = z->l - z->c; /* do, line 210 */
+                if (!r_Step_2(z)) goto lab9; /* call Step_2, line 210 */
+            lab9:
+                z->c = z->l - m;
+            }
+            {   int m = z->l - z->c; /* do, line 211 */
+                if (!r_Step_3(z)) goto lab10; /* call Step_3, line 211 */
+            lab10:
+                z->c = z->l - m;
+            }
+            {   int m = z->l - z->c; /* do, line 212 */
+                if (!r_Step_4(z)) goto lab11; /* call Step_4, line 212 */
+            lab11:
+                z->c = z->l - m;
+            }
+            {   int m = z->l - z->c; /* do, line 214 */
+                if (!r_Step_5(z)) goto lab12; /* call Step_5, line 214 */
+            lab12:
+                z->c = z->l - m;
+            }
+        }
+    lab5:
+        z->c = z->lb;
+        {   int c = z->c; /* do, line 217 */
+            if (!r_postlude(z)) goto lab13; /* call postlude, line 217 */
+        lab13:
+            z->c = c;
+        }
+    }
+lab0:
+    return 1;
+}
+
+extern struct SN_env * english_create_env(void) { return SN_create_env(0, 2, 1); }
+
+extern void english_close_env(struct SN_env * z) { SN_close_env(z); }
+


diff --git a/contrib/tsearch2/snowball/english_stem.h b/contrib/tsearch2/snowball/english_stem.h

new file mode 100644 (file)

index 0000000..bfefcd5


--- /dev/null
+++ b/contrib/tsearch2/snowball/english_stem.h
@@ -0,0 +1,8 @@
+
+/* This file was generated automatically by the Snowball to ANSI C compiler */
+
+extern struct SN_env * english_create_env(void);
+extern void english_close_env(struct SN_env * z);
+
+extern int english_stem(struct SN_env * z);
+


diff --git a/contrib/tsearch2/snowball/header.h b/contrib/tsearch2/snowball/header.h

new file mode 100644 (file)

index 0000000..aaec3ae


--- /dev/null
+++ b/contrib/tsearch2/snowball/header.h
@@ -0,0 +1,57 @@
+
+#include 
+
+#include "api.h"
+
+#define MAXINT INT_MAX
+#define MININT INT_MIN
+
+#define HEAD 2*sizeof(int)
+
+#define SIZE(p)        ((int *)(p))[-1]
+#define SET_SIZE(p, n) ((int *)(p))[-1] = n
+#define CAPACITY(p)    ((int *)(p))[-2]
+
+struct among
+{   int s_size;     /* number of chars in string */
+    symbol * s;       /* search string */
+    int substring_i;/* index to longest matching substring */
+    int result;     /* result of the lookup */
+    int (* function)(struct SN_env *);
+};
+
+extern symbol * create_s(void);
+extern void lose_s(symbol * p);
+
+extern int in_grouping(struct SN_env * z, unsigned char * s, int min, int max);
+extern int in_grouping_b(struct SN_env * z, unsigned char * s, int min, int max);
+extern int out_grouping(struct SN_env * z, unsigned char * s, int min, int max);
+extern int out_grouping_b(struct SN_env * z, unsigned char * s, int min, int max);
+
+extern int in_range(struct SN_env * z, int min, int max);
+extern int in_range_b(struct SN_env * z, int min, int max);
+extern int out_range(struct SN_env * z, int min, int max);
+extern int out_range_b(struct SN_env * z, int min, int max);
+
+extern int eq_s(struct SN_env * z, int s_size, symbol * s);
+extern int eq_s_b(struct SN_env * z, int s_size, symbol * s);
+extern int eq_v(struct SN_env * z, symbol * p);
+extern int eq_v_b(struct SN_env * z, symbol * p);
+
+extern int find_among(struct SN_env * z, struct among * v, int v_size);
+extern int find_among_b(struct SN_env * z, struct among * v, int v_size);
+
+extern symbol * increase_size(symbol * p, int n);
+extern int replace_s(struct SN_env * z, int c_bra, int c_ket, int s_size, const symbol * s);
+extern void slice_from_s(struct SN_env * z, int s_size, symbol * s);
+extern void slice_from_v(struct SN_env * z, symbol * p);
+extern void slice_del(struct SN_env * z);
+
+extern void insert_s(struct SN_env * z, int bra, int ket, int s_size, symbol * s);
+extern void insert_v(struct SN_env * z, int bra, int ket, symbol * p);
+
+extern symbol * slice_to(struct SN_env * z, symbol * p);
+extern symbol * assign_to(struct SN_env * z, symbol * p);
+
+extern void debug(struct SN_env * z, int number, int line_count);
+


diff --git a/contrib/tsearch2/snowball/russian_stem.c b/contrib/tsearch2/snowball/russian_stem.c

new file mode 100644 (file)

index 0000000..14fd491


--- /dev/null
+++ b/contrib/tsearch2/snowball/russian_stem.c
@@ -0,0 +1,626 @@
+
+/* This file was generated automatically by the Snowball to ANSI C compiler */
+
+#include "header.h"
+
+extern int russian_stem(struct SN_env * z);
+static int r_tidy_up(struct SN_env * z);
+static int r_derivational(struct SN_env * z);
+static int r_noun(struct SN_env * z);
+static int r_verb(struct SN_env * z);
+static int r_reflexive(struct SN_env * z);
+static int r_adjectival(struct SN_env * z);
+static int r_adjective(struct SN_env * z);
+static int r_perfective_gerund(struct SN_env * z);
+static int r_R2(struct SN_env * z);
+static int r_mark_regions(struct SN_env * z);
+
+extern struct SN_env * russian_create_env(void);
+extern void russian_close_env(struct SN_env * z);
+
+static symbol s_0_0[3] = { 215, 219, 201 };
+static symbol s_0_1[4] = { 201, 215, 219, 201 };
+static symbol s_0_2[4] = { 217, 215, 219, 201 };
+static symbol s_0_3[1] = { 215 };
+static symbol s_0_4[2] = { 201, 215 };
+static symbol s_0_5[2] = { 217, 215 };
+static symbol s_0_6[5] = { 215, 219, 201, 211, 216 };
+static symbol s_0_7[6] = { 201, 215, 219, 201, 211, 216 };
+static symbol s_0_8[6] = { 217, 215, 219, 201, 211, 216 };
+
+static struct among a_0[9] =
+{
+/*  0 */ { 3, s_0_0, -1, 1, 0},
+/*  1 */ { 4, s_0_1, 0, 2, 0},
+/*  2 */ { 4, s_0_2, 0, 2, 0},
+/*  3 */ { 1, s_0_3, -1, 1, 0},
+/*  4 */ { 2, s_0_4, 3, 2, 0},
+/*  5 */ { 2, s_0_5, 3, 2, 0},
+/*  6 */ { 5, s_0_6, -1, 1, 0},
+/*  7 */ { 6, s_0_7, 6, 2, 0},
+/*  8 */ { 6, s_0_8, 6, 2, 0}
+};
+
+static symbol s_1_0[2] = { 192, 192 };
+static symbol s_1_1[2] = { 197, 192 };
+static symbol s_1_2[2] = { 207, 192 };
+static symbol s_1_3[2] = { 213, 192 };
+static symbol s_1_4[2] = { 197, 197 };
+static symbol s_1_5[2] = { 201, 197 };
+static symbol s_1_6[2] = { 207, 197 };
+static symbol s_1_7[2] = { 217, 197 };
+static symbol s_1_8[2] = { 201, 200 };
+static symbol s_1_9[2] = { 217, 200 };
+static symbol s_1_10[3] = { 201, 205, 201 };
+static symbol s_1_11[3] = { 217, 205, 201 };
+static symbol s_1_12[2] = { 197, 202 };
+static symbol s_1_13[2] = { 201, 202 };
+static symbol s_1_14[2] = { 207, 202 };
+static symbol s_1_15[2] = { 217, 202 };
+static symbol s_1_16[2] = { 197, 205 };
+static symbol s_1_17[2] = { 201, 205 };
+static symbol s_1_18[2] = { 207, 205 };
+static symbol s_1_19[2] = { 217, 205 };
+static symbol s_1_20[3] = { 197, 199, 207 };
+static symbol s_1_21[3] = { 207, 199, 207 };
+static symbol s_1_22[2] = { 193, 209 };
+static symbol s_1_23[2] = { 209, 209 };
+static symbol s_1_24[3] = { 197, 205, 213 };
+static symbol s_1_25[3] = { 207, 205, 213 };
+
+static struct among a_1[26] =
+{
+/*  0 */ { 2, s_1_0, -1, 1, 0},
+/*  1 */ { 2, s_1_1, -1, 1, 0},
+/*  2 */ { 2, s_1_2, -1, 1, 0},
+/*  3 */ { 2, s_1_3, -1, 1, 0},
+/*  4 */ { 2, s_1_4, -1, 1, 0},
+/*  5 */ { 2, s_1_5, -1, 1, 0},
+/*  6 */ { 2, s_1_6, -1, 1, 0},
+/*  7 */ { 2, s_1_7, -1, 1, 0},
+/*  8 */ { 2, s_1_8, -1, 1, 0},
+/*  9 */ { 2, s_1_9, -1, 1, 0},
+/* 10 */ { 3, s_1_10, -1, 1, 0},
+/* 11 */ { 3, s_1_11, -1, 1, 0},
+/* 12 */ { 2, s_1_12, -1, 1, 0},
+/* 13 */ { 2, s_1_13, -1, 1, 0},
+/* 14 */ { 2, s_1_14, -1, 1, 0},
+/* 15 */ { 2, s_1_15, -1, 1, 0},
+/* 16 */ { 2, s_1_16, -1, 1, 0},
+/* 17 */ { 2, s_1_17, -1, 1, 0},
+/* 18 */ { 2, s_1_18, -1, 1, 0},
+/* 19 */ { 2, s_1_19, -1, 1, 0},
+/* 20 */ { 3, s_1_20, -1, 1, 0},
+/* 21 */ { 3, s_1_21, -1, 1, 0},
+/* 22 */ { 2, s_1_22, -1, 1, 0},
+/* 23 */ { 2, s_1_23, -1, 1, 0},
+/* 24 */ { 3, s_1_24, -1, 1, 0},
+/* 25 */ { 3, s_1_25, -1, 1, 0}
+};
+
+static symbol s_2_0[2] = { 197, 205 };
+static symbol s_2_1[2] = { 206, 206 };
+static symbol s_2_2[2] = { 215, 219 };
+static symbol s_2_3[3] = { 201, 215, 219 };
+static symbol s_2_4[3] = { 217, 215, 219 };
+static symbol s_2_5[1] = { 221 };
+static symbol s_2_6[2] = { 192, 221 };
+static symbol s_2_7[3] = { 213, 192, 221 };
+
+static struct among a_2[8] =
+{
+/*  0 */ { 2, s_2_0, -1, 1, 0},
+/*  1 */ { 2, s_2_1, -1, 1, 0},
+/*  2 */ { 2, s_2_2, -1, 1, 0},
+/*  3 */ { 3, s_2_3, 2, 2, 0},
+/*  4 */ { 3, s_2_4, 2, 2, 0},
+/*  5 */ { 1, s_2_5, -1, 1, 0},
+/*  6 */ { 2, s_2_6, 5, 1, 0},
+/*  7 */ { 3, s_2_7, 6, 2, 0}
+};
+
+static symbol s_3_0[2] = { 211, 209 };
+static symbol s_3_1[2] = { 211, 216 };
+
+static struct among a_3[2] =
+{
+/*  0 */ { 2, s_3_0, -1, 1, 0},
+/*  1 */ { 2, s_3_1, -1, 1, 0}
+};
+
+static symbol s_4_0[1] = { 192 };
+static symbol s_4_1[2] = { 213, 192 };
+static symbol s_4_2[2] = { 204, 193 };
+static symbol s_4_3[3] = { 201, 204, 193 };
+static symbol s_4_4[3] = { 217, 204, 193 };
+static symbol s_4_5[2] = { 206, 193 };
+static symbol s_4_6[3] = { 197, 206, 193 };
+static symbol s_4_7[3] = { 197, 212, 197 };
+static symbol s_4_8[3] = { 201, 212, 197 };
+static symbol s_4_9[3] = { 202, 212, 197 };
+static symbol s_4_10[4] = { 197, 202, 212, 197 };
+static symbol s_4_11[4] = { 213, 202, 212, 197 };
+static symbol s_4_12[2] = { 204, 201 };
+static symbol s_4_13[3] = { 201, 204, 201 };
+static symbol s_4_14[3] = { 217, 204, 201 };
+static symbol s_4_15[1] = { 202 };
+static symbol s_4_16[2] = { 197, 202 };
+static symbol s_4_17[2] = { 213, 202 };
+static symbol s_4_18[1] = { 204 };
+static symbol s_4_19[2] = { 201, 204 };
+static symbol s_4_20[2] = { 217, 204 };
+static symbol s_4_21[2] = { 197, 205 };
+static symbol s_4_22[2] = { 201, 205 };
+static symbol s_4_23[2] = { 217, 205 };
+static symbol s_4_24[1] = { 206 };
+static symbol s_4_25[2] = { 197, 206 };
+static symbol s_4_26[2] = { 204, 207 };
+static symbol s_4_27[3] = { 201, 204, 207 };
+static symbol s_4_28[3] = { 217, 204, 207 };
+static symbol s_4_29[2] = { 206, 207 };
+static symbol s_4_30[3] = { 197, 206, 207 };
+static symbol s_4_31[3] = { 206, 206, 207 };
+static symbol s_4_32[2] = { 192, 212 };
+static symbol s_4_33[3] = { 213, 192, 212 };
+static symbol s_4_34[2] = { 197, 212 };
+static symbol s_4_35[3] = { 213, 197, 212 };
+static symbol s_4_36[2] = { 201, 212 };
+static symbol s_4_37[2] = { 209, 212 };
+static symbol s_4_38[2] = { 217, 212 };
+static symbol s_4_39[2] = { 212, 216 };
+static symbol s_4_40[3] = { 201, 212, 216 };
+static symbol s_4_41[3] = { 217, 212, 216 };
+static symbol s_4_42[3] = { 197, 219, 216 };
+static symbol s_4_43[3] = { 201, 219, 216 };
+static symbol s_4_44[2] = { 206, 217 };
+static symbol s_4_45[3] = { 197, 206, 217 };
+
+static struct among a_4[46] =
+{
+/*  0 */ { 1, s_4_0, -1, 2, 0},
+/*  1 */ { 2, s_4_1, 0, 2, 0},
+/*  2 */ { 2, s_4_2, -1, 1, 0},
+/*  3 */ { 3, s_4_3, 2, 2, 0},
+/*  4 */ { 3, s_4_4, 2, 2, 0},
+/*  5 */ { 2, s_4_5, -1, 1, 0},
+/*  6 */ { 3, s_4_6, 5, 2, 0},
+/*  7 */ { 3, s_4_7, -1, 1, 0},
+/*  8 */ { 3, s_4_8, -1, 2, 0},
+/*  9 */ { 3, s_4_9, -1, 1, 0},
+/* 10 */ { 4, s_4_10, 9, 2, 0},
+/* 11 */ { 4, s_4_11, 9, 2, 0},
+/* 12 */ { 2, s_4_12, -1, 1, 0},
+/* 13 */ { 3, s_4_13, 12, 2, 0},
+/* 14 */ { 3, s_4_14, 12, 2, 0},
+/* 15 */ { 1, s_4_15, -1, 1, 0},
+/* 16 */ { 2, s_4_16, 15, 2, 0},
+/* 17 */ { 2, s_4_17, 15, 2, 0},
+/* 18 */ { 1, s_4_18, -1, 1, 0},
+/* 19 */ { 2, s_4_19, 18, 2, 0},
+/* 20 */ { 2, s_4_20, 18, 2, 0},
+/* 21 */ { 2, s_4_21, -1, 1, 0},
+/* 22 */ { 2, s_4_22, -1, 2, 0},
+/* 23 */ { 2, s_4_23, -1, 2, 0},
+/* 24 */ { 1, s_4_24, -1, 1, 0},
+/* 25 */ { 2, s_4_25, 24, 2, 0},
+/* 26 */ { 2, s_4_26, -1, 1, 0},
+/* 27 */ { 3, s_4_27, 26, 2, 0},
+/* 28 */ { 3, s_4_28, 26, 2, 0},
+/* 29 */ { 2, s_4_29, -1, 1, 0},
+/* 30 */ { 3, s_4_30, 29, 2, 0},
+/* 31 */ { 3, s_4_31, 29, 1, 0},
+/* 32 */ { 2, s_4_32, -1, 1, 0},
+/* 33 */ { 3, s_4_33, 32, 2, 0},
+/* 34 */ { 2, s_4_34, -1, 1, 0},
+/* 35 */ { 3, s_4_35, 34, 2, 0},
+/* 36 */ { 2, s_4_36, -1, 2, 0},
+/* 37 */ { 2, s_4_37, -1, 2, 0},
+/* 38 */ { 2, s_4_38, -1, 2, 0},
+/* 39 */ { 2, s_4_39, -1, 1, 0},
+/* 40 */ { 3, s_4_40, 39, 2, 0},
+/* 41 */ { 3, s_4_41, 39, 2, 0},
+/* 42 */ { 3, s_4_42, -1, 1, 0},
+/* 43 */ { 3, s_4_43, -1, 2, 0},
+/* 44 */ { 2, s_4_44, -1, 1, 0},
+/* 45 */ { 3, s_4_45, 44, 2, 0}
+};
+
+static symbol s_5_0[1] = { 192 };
+static symbol s_5_1[2] = { 201, 192 };
+static symbol s_5_2[2] = { 216, 192 };
+static symbol s_5_3[1] = { 193 };
+static symbol s_5_4[1] = { 197 };
+static symbol s_5_5[2] = { 201, 197 };
+static symbol s_5_6[2] = { 216, 197 };
+static symbol s_5_7[2] = { 193, 200 };
+static symbol s_5_8[2] = { 209, 200 };
+static symbol s_5_9[3] = { 201, 209, 200 };
+static symbol s_5_10[1] = { 201 };
+static symbol s_5_11[2] = { 197, 201 };
+static symbol s_5_12[2] = { 201, 201 };
+static symbol s_5_13[3] = { 193, 205, 201 };
+static symbol s_5_14[3] = { 209, 205, 201 };
+static symbol s_5_15[4] = { 201, 209, 205, 201 };
+static symbol s_5_16[1] = { 202 };
+static symbol s_5_17[2] = { 197, 202 };
+static symbol s_5_18[3] = { 201, 197, 202 };
+static symbol s_5_19[2] = { 201, 202 };
+static symbol s_5_20[2] = { 207, 202 };
+static symbol s_5_21[2] = { 193, 205 };
+static symbol s_5_22[2] = { 197, 205 };
+static symbol s_5_23[3] = { 201, 197, 205 };
+static symbol s_5_24[2] = { 207, 205 };
+static symbol s_5_25[2] = { 209, 205 };
+static symbol s_5_26[3] = { 201, 209, 205 };
+static symbol s_5_27[1] = { 207 };
+static symbol s_5_28[1] = { 209 };
+static symbol s_5_29[2] = { 201, 209 };
+static symbol s_5_30[2] = { 216, 209 };
+static symbol s_5_31[1] = { 213 };
+static symbol s_5_32[2] = { 197, 215 };
+static symbol s_5_33[2] = { 207, 215 };
+static symbol s_5_34[1] = { 216 };
+static symbol s_5_35[1] = { 217 };
+
+static struct among a_5[36] =
+{
+/*  0 */ { 1, s_5_0, -1, 1, 0},
+/*  1 */ { 2, s_5_1, 0, 1, 0},
+/*  2 */ { 2, s_5_2, 0, 1, 0},
+/*  3 */ { 1, s_5_3, -1, 1, 0},
+/*  4 */ { 1, s_5_4, -1, 1, 0},
+/*  5 */ { 2, s_5_5, 4, 1, 0},
+/*  6 */ { 2, s_5_6, 4, 1, 0},
+/*  7 */ { 2, s_5_7, -1, 1, 0},
+/*  8 */ { 2, s_5_8, -1, 1, 0},
+/*  9 */ { 3, s_5_9, 8, 1, 0},
+/* 10 */ { 1, s_5_10, -1, 1, 0},
+/* 11 */ { 2, s_5_11, 10, 1, 0},
+/* 12 */ { 2, s_5_12, 10, 1, 0},
+/* 13 */ { 3, s_5_13, 10, 1, 0},
+/* 14 */ { 3, s_5_14, 10, 1, 0},
+/* 15 */ { 4, s_5_15, 14, 1, 0},
+/* 16 */ { 1, s_5_16, -1, 1, 0},
+/* 17 */ { 2, s_5_17, 16, 1, 0},
+/* 18 */ { 3, s_5_18, 17, 1, 0},
+/* 19 */ { 2, s_5_19, 16, 1, 0},
+/* 20 */ { 2, s_5_20, 16, 1, 0},
+/* 21 */ { 2, s_5_21, -1, 1, 0},
+/* 22 */ { 2, s_5_22, -1, 1, 0},
+/* 23 */ { 3, s_5_23, 22, 1, 0},
+/* 24 */ { 2, s_5_24, -1, 1, 0},
+/* 25 */ { 2, s_5_25, -1, 1, 0},
+/* 26 */ { 3, s_5_26, 25, 1, 0},
+/* 27 */ { 1, s_5_27, -1, 1, 0},
+/* 28 */ { 1, s_5_28, -1, 1, 0},
+/* 29 */ { 2, s_5_29, 28, 1, 0},
+/* 30 */ { 2, s_5_30, 28, 1, 0},
+/* 31 */ { 1, s_5_31, -1, 1, 0},
+/* 32 */ { 2, s_5_32, -1, 1, 0},
+/* 33 */ { 2, s_5_33, -1, 1, 0},
+/* 34 */ { 1, s_5_34, -1, 1, 0},
+/* 35 */ { 1, s_5_35, -1, 1, 0}
+};
+
+static symbol s_6_0[3] = { 207, 211, 212 };
+static symbol s_6_1[4] = { 207, 211, 212, 216 };
+
+static struct among a_6[2] =
+{
+/*  0 */ { 3, s_6_0, -1, 1, 0},
+/*  1 */ { 4, s_6_1, -1, 1, 0}
+};
+
+static symbol s_7_0[4] = { 197, 202, 219, 197 };
+static symbol s_7_1[1] = { 206 };
+static symbol s_7_2[1] = { 216 };
+static symbol s_7_3[3] = { 197, 202, 219 };
+
+static struct among a_7[4] =
+{
+/*  0 */ { 4, s_7_0, -1, 1, 0},
+/*  1 */ { 1, s_7_1, -1, 2, 0},
+/*  2 */ { 1, s_7_2, -1, 3, 0},
+/*  3 */ { 3, s_7_3, -1, 1, 0}
+};
+
+static unsigned char g_v[] = { 35, 130, 34, 18 };
+
+static symbol s_0[] = { 193 };
+static symbol s_1[] = { 209 };
+static symbol s_2[] = { 193 };
+static symbol s_3[] = { 209 };
+static symbol s_4[] = { 193 };
+static symbol s_5[] = { 209 };
+static symbol s_6[] = { 206 };
+static symbol s_7[] = { 206 };
+static symbol s_8[] = { 206 };
+static symbol s_9[] = { 201 };
+
+static int r_mark_regions(struct SN_env * z) {
+    z->I[0] = z->l;
+    z->I[1] = z->l;
+    {   int c = z->c; /* do, line 100 */
+        while(1) { /* gopast, line 101 */
+            if (!(in_grouping(z, g_v, 192, 220))) goto lab1;
+            break;
+        lab1:
+            if (z->c >= z->l) goto lab0;
+            z->c++;
+        }
+        z->I[0] = z->c; /* setmark pV, line 101 */
+        while(1) { /* gopast, line 101 */
+            if (!(out_grouping(z, g_v, 192, 220))) goto lab2;
+            break;
+        lab2:
+            if (z->c >= z->l) goto lab0;
+            z->c++;
+        }
+        while(1) { /* gopast, line 102 */
+            if (!(in_grouping(z, g_v, 192, 220))) goto lab3;
+            break;
+        lab3:
+            if (z->c >= z->l) goto lab0;
+            z->c++;
+        }
+        while(1) { /* gopast, line 102 */
+            if (!(out_grouping(z, g_v, 192, 220))) goto lab4;
+            break;
+        lab4:
+            if (z->c >= z->l) goto lab0;
+            z->c++;
+        }
+        z->I[1] = z->c; /* setmark p2, line 102 */
+    lab0:
+        z->c = c;
+    }
+    return 1;
+}
+
+static int r_R2(struct SN_env * z) {
+    if (!(z->I[1] <= z->c)) return 0;
+    return 1;
+}
+
+static int r_perfective_gerund(struct SN_env * z) {
+    int among_var;
+    z->ket = z->c; /* [, line 111 */
+    among_var = find_among_b(z, a_0, 9); /* substring, line 111 */
+    if (!(among_var)) return 0;
+    z->bra = z->c; /* ], line 111 */
+    switch(among_var) {
+        case 0: return 0;
+        case 1:
+            {   int m = z->l - z->c; /* or, line 115 */
+                if (!(eq_s_b(z, 1, s_0))) goto lab1;
+                goto lab0;
+            lab1:
+                z->c = z->l - m;
+                if (!(eq_s_b(z, 1, s_1))) return 0;
+            }
+        lab0:
+            slice_del(z); /* delete, line 115 */
+            break;
+        case 2:
+            slice_del(z); /* delete, line 122 */
+            break;
+    }
+    return 1;
+}
+
+static int r_adjective(struct SN_env * z) {
+    int among_var;
+    z->ket = z->c; /* [, line 127 */
+    among_var = find_among_b(z, a_1, 26); /* substring, line 127 */
+    if (!(among_var)) return 0;
+    z->bra = z->c; /* ], line 127 */
+    switch(among_var) {
+        case 0: return 0;
+        case 1:
+            slice_del(z); /* delete, line 136 */
+            break;
+    }
+    return 1;
+}
+
+static int r_adjectival(struct SN_env * z) {
+    int among_var;
+    if (!r_adjective(z)) return 0; /* call adjective, line 141 */
+    {   int m = z->l - z->c; /* try, line 148 */
+        z->ket = z->c; /* [, line 149 */
+        among_var = find_among_b(z, a_2, 8); /* substring, line 149 */
+        if (!(among_var)) { z->c = z->l - m; goto lab0; }
+        z->bra = z->c; /* ], line 149 */
+        switch(among_var) {
+            case 0: { z->c = z->l - m; goto lab0; }
+            case 1:
+                {   int m = z->l - z->c; /* or, line 154 */
+                    if (!(eq_s_b(z, 1, s_2))) goto lab2;
+                    goto lab1;
+                lab2:
+                    z->c = z->l - m;
+                    if (!(eq_s_b(z, 1, s_3))) { z->c = z->l - m; goto lab0; }
+                }
+            lab1:
+                slice_del(z); /* delete, line 154 */
+                break;
+            case 2:
+                slice_del(z); /* delete, line 161 */
+                break;
+        }
+    lab0:
+        ;
+    }
+    return 1;
+}
+
+static int r_reflexive(struct SN_env * z) {
+    int among_var;
+    z->ket = z->c; /* [, line 168 */
+    among_var = find_among_b(z, a_3, 2); /* substring, line 168 */
+    if (!(among_var)) return 0;
+    z->bra = z->c; /* ], line 168 */
+    switch(among_var) {
+        case 0: return 0;
+        case 1:
+            slice_del(z); /* delete, line 171 */
+            break;
+    }
+    return 1;
+}
+
+static int r_verb(struct SN_env * z) {
+    int among_var;
+    z->ket = z->c; /* [, line 176 */
+    among_var = find_among_b(z, a_4, 46); /* substring, line 176 */
+    if (!(among_var)) return 0;
+    z->bra = z->c; /* ], line 176 */
+    switch(among_var) {
+        case 0: return 0;
+        case 1:
+            {   int m = z->l - z->c; /* or, line 182 */
+                if (!(eq_s_b(z, 1, s_4))) goto lab1;
+                goto lab0;
+            lab1:
+                z->c = z->l - m;
+                if (!(eq_s_b(z, 1, s_5))) return 0;
+            }
+        lab0:
+            slice_del(z); /* delete, line 182 */
+            break;
+        case 2:
+            slice_del(z); /* delete, line 190 */
+            break;
+    }
+    return 1;
+}
+
+static int r_noun(struct SN_env * z) {
+    int among_var;
+    z->ket = z->c; /* [, line 199 */
+    among_var = find_among_b(z, a_5, 36); /* substring, line 199 */
+    if (!(among_var)) return 0;
+    z->bra = z->c; /* ], line 199 */
+    switch(among_var) {
+        case 0: return 0;
+        case 1:
+            slice_del(z); /* delete, line 206 */
+            break;
+    }
+    return 1;
+}
+
+static int r_derivational(struct SN_env * z) {
+    int among_var;
+    z->ket = z->c; /* [, line 215 */
+    among_var = find_among_b(z, a_6, 2); /* substring, line 215 */
+    if (!(among_var)) return 0;
+    z->bra = z->c; /* ], line 215 */
+    if (!r_R2(z)) return 0; /* call R2, line 215 */
+    switch(among_var) {
+        case 0: return 0;
+        case 1:
+            slice_del(z); /* delete, line 218 */
+            break;
+    }
+    return 1;
+}
+
+static int r_tidy_up(struct SN_env * z) {
+    int among_var;
+    z->ket = z->c; /* [, line 223 */
+    among_var = find_among_b(z, a_7, 4); /* substring, line 223 */
+    if (!(among_var)) return 0;
+    z->bra = z->c; /* ], line 223 */
+    switch(among_var) {
+        case 0: return 0;
+        case 1:
+            slice_del(z); /* delete, line 227 */
+            z->ket = z->c; /* [, line 228 */
+            if (!(eq_s_b(z, 1, s_6))) return 0;
+            z->bra = z->c; /* ], line 228 */
+            if (!(eq_s_b(z, 1, s_7))) return 0;
+            slice_del(z); /* delete, line 228 */
+            break;
+        case 2:
+            if (!(eq_s_b(z, 1, s_8))) return 0;
+            slice_del(z); /* delete, line 231 */
+            break;
+        case 3:
+            slice_del(z); /* delete, line 233 */
+            break;
+    }
+    return 1;
+}
+
+extern int russian_stem(struct SN_env * z) {
+    {   int c = z->c; /* do, line 240 */
+        if (!r_mark_regions(z)) goto lab0; /* call mark_regions, line 240 */
+    lab0:
+        z->c = c;
+    }
+    z->lb = z->c; z->c = z->l; /* backwards, line 241 */
+
+    {   int m = z->l - z->c; /* setlimit, line 241 */
+        int m3;
+        if (z->c < z->I[0]) return 0;
+        z->c = z->I[0]; /* tomark, line 241 */
+        m3 = z->lb; z->lb = z->c;
+        z->c = z->l - m;
+        {   int m = z->l - z->c; /* do, line 242 */
+            {   int m = z->l - z->c; /* or, line 243 */
+                if (!r_perfective_gerund(z)) goto lab3; /* call perfective_gerund, line 243 */
+                goto lab2;
+            lab3:
+                z->c = z->l - m;
+                {   int m = z->l - z->c; /* try, line 244 */
+                    if (!r_reflexive(z)) { z->c = z->l - m; goto lab4; } /* call reflexive, line 244 */
+                lab4:
+                    ;
+                }
+                {   int m = z->l - z->c; /* or, line 245 */
+                    if (!r_adjectival(z)) goto lab6; /* call adjectival, line 245 */
+                    goto lab5;
+                lab6:
+                    z->c = z->l - m;
+                    if (!r_verb(z)) goto lab7; /* call verb, line 245 */
+                    goto lab5;
+                lab7:
+                    z->c = z->l - m;
+                    if (!r_noun(z)) goto lab1; /* call noun, line 245 */
+                }
+            lab5:
+                ;
+            }
+        lab2:
+        lab1:
+            z->c = z->l - m;
+        }
+        {   int m = z->l - z->c; /* try, line 248 */
+            z->ket = z->c; /* [, line 248 */
+            if (!(eq_s_b(z, 1, s_9))) { z->c = z->l - m; goto lab8; }
+            z->bra = z->c; /* ], line 248 */
+            slice_del(z); /* delete, line 248 */
+        lab8:
+            ;
+        }
+        {   int m = z->l - z->c; /* do, line 251 */
+            if (!r_derivational(z)) goto lab9; /* call derivational, line 251 */
+        lab9:
+            z->c = z->l - m;
+        }
+        {   int m = z->l - z->c; /* do, line 252 */
+            if (!r_tidy_up(z)) goto lab10; /* call tidy_up, line 252 */
+        lab10:
+            z->c = z->l - m;
+        }
+        z->lb = m3;
+    }
+    z->c = z->lb;
+    return 1;
+}
+
+extern struct SN_env * russian_create_env(void) { return SN_create_env(0, 2, 0); }
+
+extern void russian_close_env(struct SN_env * z) { SN_close_env(z); }
+


diff --git a/contrib/tsearch2/snowball/russian_stem.h b/contrib/tsearch2/snowball/russian_stem.h

new file mode 100644 (file)

index 0000000..7dc26d4


--- /dev/null
+++ b/contrib/tsearch2/snowball/russian_stem.h
@@ -0,0 +1,8 @@
+
+/* This file was generated automatically by the Snowball to ANSI C compiler */
+
+extern struct SN_env * russian_create_env(void);
+extern void russian_close_env(struct SN_env * z);
+
+extern int russian_stem(struct SN_env * z);
+


diff --git a/contrib/tsearch2/snowball/utilities.c b/contrib/tsearch2/snowball/utilities.c

new file mode 100644 (file)

index 0000000..5dc7524


--- /dev/null
+++ b/contrib/tsearch2/snowball/utilities.c
@@ -0,0 +1,328 @@
+
+#include 
+#include 
+#include 
+
+#include "header.h"
+
+#define unless(C) if(!(C))
+
+#define CREATE_SIZE 1
+
+extern symbol * create_s(void)
+{   symbol * p = (symbol *) (HEAD + (char *) malloc(HEAD + (CREATE_SIZE + 1) * sizeof(symbol)));
+    CAPACITY(p) = CREATE_SIZE;
+    SET_SIZE(p, CREATE_SIZE);
+    return p;
+}
+
+extern void lose_s(symbol * p) { free((char *) p - HEAD); }
+
+extern int in_grouping(struct SN_env * z, unsigned char * s, int min, int max)
+{   if (z->c >= z->l) return 0;
+    {   int ch = z->p[z->c];
+        if
+        (ch > max || (ch -= min) < 0 ||
+         (s[ch >> 3] & (0X1 << (ch & 0X7))) == 0) return 0;
+    }
+    z->c++; return 1;
+}
+
+extern int in_grouping_b(struct SN_env * z, unsigned char * s, int min, int max)
+{   if (z->c <= z->lb) return 0;
+    {   int ch = z->p[z->c - 1];
+        if
+        (ch > max || (ch -= min) < 0 ||
+         (s[ch >> 3] & (0X1 << (ch & 0X7))) == 0) return 0;
+    }
+    z->c--; return 1;
+}
+
+extern int out_grouping(struct SN_env * z, unsigned char * s, int min, int max)
+{   if (z->c >= z->l) return 0;
+    {   int ch = z->p[z->c];
+        unless
+        (ch > max || (ch -= min) < 0 ||
+         (s[ch >> 3] & (0X1 << (ch & 0X7))) == 0) return 0;
+    }
+    z->c++; return 1;
+}
+
+extern int out_grouping_b(struct SN_env * z, unsigned char * s, int min, int max)
+{   if (z->c <= z->lb) return 0;
+    {   int ch = z->p[z->c - 1];
+        unless
+        (ch > max || (ch -= min) < 0 ||
+         (s[ch >> 3] & (0X1 << (ch & 0X7))) == 0) return 0;
+    }
+    z->c--; return 1;
+}
+
+
+extern int in_range(struct SN_env * z, int min, int max)
+{   if (z->c >= z->l) return 0;
+    {   int ch = z->p[z->c];
+        if
+        (ch > max || ch < min) return 0;
+    }
+    z->c++; return 1;
+}
+
+extern int in_range_b(struct SN_env * z, int min, int max)
+{   if (z->c <= z->lb) return 0;
+    {   int ch = z->p[z->c - 1];
+        if
+        (ch > max || ch < min) return 0;
+    }
+    z->c--; return 1;
+}
+
+extern int out_range(struct SN_env * z, int min, int max)
+{   if (z->c >= z->l) return 0;
+    {   int ch = z->p[z->c];
+        unless
+        (ch > max || ch < min) return 0;
+    }
+    z->c++; return 1;
+}
+
+extern int out_range_b(struct SN_env * z, int min, int max)
+{   if (z->c <= z->lb) return 0;
+    {   int ch = z->p[z->c - 1];
+        unless
+        (ch > max || ch < min) return 0;
+    }
+    z->c--; return 1;
+}
+
+extern int eq_s(struct SN_env * z, int s_size, symbol * s)
+{   if (z->l - z->c < s_size ||
+        memcmp(z->p + z->c, s, s_size * sizeof(symbol)) != 0) return 0;
+    z->c += s_size; return 1;
+}
+
+extern int eq_s_b(struct SN_env * z, int s_size, symbol * s)
+{   if (z->c - z->lb < s_size ||
+        memcmp(z->p + z->c - s_size, s, s_size * sizeof(symbol)) != 0) return 0;
+    z->c -= s_size; return 1;
+}
+
+extern int eq_v(struct SN_env * z, symbol * p)
+{   return eq_s(z, SIZE(p), p);
+}
+
+extern int eq_v_b(struct SN_env * z, symbol * p)
+{   return eq_s_b(z, SIZE(p), p);
+}
+
+extern int find_among(struct SN_env * z, struct among * v, int v_size)
+{
+    int i = 0;
+    int j = v_size;
+
+    int c = z->c; int l = z->l;
+    symbol * q = z->p + c;
+
+    struct among * w;
+
+    int common_i = 0;
+    int common_j = 0;
+
+    int first_key_inspected = 0;
+
+    while(1)
+    {   int k = i + ((j - i) >> 1);
+        int diff = 0;
+        int common = common_i < common_j ? common_i : common_j; /* smaller */
+        w = v + k;
+        {   int i; for (i = common; i < w->s_size; i++)
+            {   if (c + common == l) { diff = -1; break; }
+                diff = q[common] - w->s[i];
+                if (diff != 0) break;
+                common++;
+            }
+        }
+        if (diff < 0) { j = k; common_j = common; }
+                 else { i = k; common_i = common; }
+        if (j - i <= 1)
+        {   if (i > 0) break; /* v->s has been inspected */
+            if (j == i) break; /* only one item in v */
+
+            /* - but now we need to go round once more to get
+               v->s inspected. This looks messy, but is actually
+               the optimal approach.  */
+
+            if (first_key_inspected) break;
+            first_key_inspected = 1;
+        }
+    }
+    while(1)
+    {   w = v + i;
+        if (common_i >= w->s_size)
+        {   z->c = c + w->s_size;
+            if (w->function == 0) return w->result;
+            {   int res = w->function(z);
+                z->c = c + w->s_size;
+                if (res) return w->result;
+            }
+        }
+        i = w->substring_i;
+        if (i < 0) return 0;
+    }
+}
+
+/* find_among_b is for backwards processing. Same comments apply */
+
+extern int find_among_b(struct SN_env * z, struct among * v, int v_size)
+{
+    int i = 0;
+    int j = v_size;
+
+    int c = z->c; int lb = z->lb;
+    symbol * q = z->p + c - 1;
+
+    struct among * w;
+
+    int common_i = 0;
+    int common_j = 0;
+
+    int first_key_inspected = 0;
+
+    while(1)
+    {   int k = i + ((j - i) >> 1);
+        int diff = 0;
+        int common = common_i < common_j ? common_i : common_j;
+        w = v + k;
+        {   int i; for (i = w->s_size - 1 - common; i >= 0; i--)
+            {   if (c - common == lb) { diff = -1; break; }
+                diff = q[- common] - w->s[i];
+                if (diff != 0) break;
+                common++;
+            }
+        }
+        if (diff < 0) { j = k; common_j = common; }
+                 else { i = k; common_i = common; }
+        if (j - i <= 1)
+        {   if (i > 0) break;
+            if (j == i) break;
+            if (first_key_inspected) break;
+            first_key_inspected = 1;
+        }
+    }
+    while(1)
+    {   w = v + i;
+        if (common_i >= w->s_size)
+        {   z->c = c - w->s_size;
+            if (w->function == 0) return w->result;
+            {   int res = w->function(z);
+                z->c = c - w->s_size;
+                if (res) return w->result;
+            }
+        }
+        i = w->substring_i;
+        if (i < 0) return 0;
+    }
+}
+
+
+extern symbol * increase_size(symbol * p, int n)
+{   int new_size = n + 20;
+    symbol * q = (symbol *) (HEAD + (char *) malloc(HEAD + (new_size + 1) * sizeof(symbol)));
+    CAPACITY(q) = new_size;
+    memmove(q, p, CAPACITY(p) * sizeof(symbol)); lose_s(p); return q;
+}
+
+/* to replace symbols between c_bra and c_ket in z->p by the
+   s_size symbols at s
+*/
+
+extern int replace_s(struct SN_env * z, int c_bra, int c_ket, int s_size, const symbol * s)
+{   int adjustment = s_size - (c_ket - c_bra);
+    int len = SIZE(z->p);
+    if (adjustment != 0)
+    {   if (adjustment + len > CAPACITY(z->p)) z->p = increase_size(z->p, adjustment + len);
+        memmove(z->p + c_ket + adjustment, z->p + c_ket, (len - c_ket) * sizeof(symbol));
+        SET_SIZE(z->p, adjustment + len);
+        z->l += adjustment;
+        if (z->c >= c_ket) z->c += adjustment; else
+            if (z->c > c_bra) z->c = c_bra;
+    }
+    unless (s_size == 0) memmove(z->p + c_bra, s, s_size * sizeof(symbol));
+    return adjustment;
+}
+
+static void slice_check(struct SN_env * z)
+{
+    if (!(0 <= z->bra &&
+          z->bra <= z->ket &&
+          z->ket <= z->l &&
+          z->l <= SIZE(z->p)))   /* this line could be removed */
+    {
+        fprintf(stderr, "faulty slice operation:\n");
+        debug(z, -1, 0);
+        exit(1);
+    }
+}
+
+extern void slice_from_s(struct SN_env * z, int s_size, symbol * s)
+{   slice_check(z);
+    replace_s(z, z->bra, z->ket, s_size, s);
+}
+
+extern void slice_from_v(struct SN_env * z, symbol * p)
+{   slice_from_s(z, SIZE(p), p);
+}
+
+extern void slice_del(struct SN_env * z)
+{   slice_from_s(z, 0, 0);
+}
+
+extern void insert_s(struct SN_env * z, int bra, int ket, int s_size, symbol * s)
+{   int adjustment = replace_s(z, bra, ket, s_size, s);
+    if (bra <= z->bra) z->bra += adjustment;
+    if (bra <= z->ket) z->ket += adjustment;
+}
+
+extern void insert_v(struct SN_env * z, int bra, int ket, symbol * p)
+{   int adjustment = replace_s(z, bra, ket, SIZE(p), p);
+    if (bra <= z->bra) z->bra += adjustment;
+    if (bra <= z->ket) z->ket += adjustment;
+}
+
+extern symbol * slice_to(struct SN_env * z, symbol * p)
+{   slice_check(z);
+    {   int len = z->ket - z->bra;
+        if (CAPACITY(p) < len) p = increase_size(p, len);
+        memmove(p, z->p + z->bra, len * sizeof(symbol));
+        SET_SIZE(p, len);
+    }
+    return p;
+}
+
+extern symbol * assign_to(struct SN_env * z, symbol * p)
+{   int len = z->l;
+    if (CAPACITY(p) < len) p = increase_size(p, len);
+    memmove(p, z->p, len * sizeof(symbol));
+    SET_SIZE(p, len);
+    return p;
+}
+
+extern void debug(struct SN_env * z, int number, int line_count)
+{   int i;
+    int limit = SIZE(z->p);
+    /*if (number >= 0) printf("%3d (line %4d): '", number, line_count);*/
+    if (number >= 0) printf("%3d (line %4d): [%d]'", number, line_count,limit);
+    for (i = 0; i <= limit; i++)
+    {   if (z->lb == i) printf("{");
+        if (z->bra == i) printf("[");
+        if (z->c == i) printf("|");
+        if (z->ket == i) printf("]");
+        if (z->l == i) printf("}");
+        if (i < limit)
+        {   int ch = z->p[i];
+            if (ch == 0) ch = '#';
+            printf("%c", ch);
+        }
+    }
+    printf("'\n");
+}


diff --git a/contrib/tsearch2/sql/tsearch2.sql b/contrib/tsearch2/sql/tsearch2.sql

new file mode 100644 (file)

index 0000000..6ca6480


--- /dev/null
+++ b/contrib/tsearch2/sql/tsearch2.sql
@@ -0,0 +1,243 @@
+--
+-- first, define the datatype.  Turn off echoing so that expected file
+-- does not depend on contents of seg.sql.
+--
+\set ECHO none
+\i tsearch2.sql
+\set ECHO all
+
+--tsvector
+SELECT '1'::tsvector;
+SELECT '1 '::tsvector;
+SELECT ' 1'::tsvector;
+SELECT ' 1 '::tsvector;
+SELECT '1 2'::tsvector;
+SELECT '\'1 2\''::tsvector;
+SELECT '\'1 \\\'2\''::tsvector;
+SELECT '\'1 \\\'2\'3'::tsvector;
+SELECT '\'1 \\\'2\' 3'::tsvector;
+SELECT '\'1 \\\'2\' \' 3\' 4 '::tsvector;
+select '\'w\':4A,3B,2C,1D,5 a:8';
+select 'a:3A b:2a'::tsvector || 'ba:1234 a:1B';
+select setweight('w:12B w:13* w:12,5,6 a:1,3* a:3 w asd:1dc asd zxc:81,567,222A'::tsvector, 'c');
+select strip('w:12B w:13* w:12,5,6 a:1,3* a:3 w asd:1dc asd'::tsvector);
+
+
+--tsquery
+SELECT '1'::tsquery;
+SELECT '1 '::tsquery;
+SELECT ' 1'::tsquery;
+SELECT ' 1 '::tsquery;
+SELECT '\'1 2\''::tsquery;
+SELECT '\'1 \\\'2\''::tsquery;
+SELECT '!1'::tsquery;
+SELECT '1|2'::tsquery;
+SELECT '1|!2'::tsquery;
+SELECT '!1|2'::tsquery;
+SELECT '!1|!2'::tsquery;
+SELECT '!(!1|!2)'::tsquery;
+SELECT '!(!1|2)'::tsquery;
+SELECT '!(1|!2)'::tsquery;
+SELECT '!(1|2)'::tsquery;
+SELECT '1&2'::tsquery;
+SELECT '!1&2'::tsquery;
+SELECT '1&!2'::tsquery;
+SELECT '!1&!2'::tsquery;
+SELECT '(1&2)'::tsquery;
+SELECT '1&(2)'::tsquery;
+SELECT '!(1)&2'::tsquery;
+SELECT '!(1&2)'::tsquery;
+SELECT '1|2&3'::tsquery;
+SELECT '1|(2&3)'::tsquery;
+SELECT '(1|2)&3'::tsquery;
+SELECT '1|2&!3'::tsquery;
+SELECT '1|!2&3'::tsquery;
+SELECT '!1|2&3'::tsquery;
+SELECT '!1|(2&3)'::tsquery;
+SELECT '!(1|2)&3'::tsquery;
+SELECT '(!1|2)&3'::tsquery;
+SELECT '1|(2|(4|(5|6)))'::tsquery;
+SELECT '1|2|4|5|6'::tsquery;
+SELECT '1&(2&(4&(5&6)))'::tsquery;
+SELECT '1&2&4&5&6'::tsquery;
+SELECT '1&(2&(4&(5|6)))'::tsquery;
+SELECT '1&(2&(4&(5|!6)))'::tsquery;
+SELECT '1&(\'2\'&(\' 4\'&(\\|5 | \'6 \\\' !|&\')))'::tsquery;
+SELECT '\'the wether\':dc & \' sKies \':BC & a:d b:a';
+
+select lexize('simple', 'ASD56 hsdkf');
+select lexize('en_stem', 'SKIES Problems identity');
+
+select * from token_type('default');
+select * from parse('default', '345 [email protected] \' http://www.com/ http://aew.werc.ewr/?ad=qwe&dw 1aew.werc.ewr/?ad=qwe&dw 2aew.werc.ewr http://3aew.werc.ewr/?ad=qwe&dw http://4aew.werc.ewr http://5aew.werc.ewr:8100/?  ad=qwe&dw 6aew.werc.ewr:8100/?ad=qwe&dw 7aew.werc.ewr:8100/?ad=qwe&dw=%20%32 +4.0e-10 qwe qwe qwqwe 234.435 455 5.005 [email protected] qwe-wer asdf qwer jf sdjk ewr1> ewri2 ">
+/usr/local/fff /awdf/dwqe/4325 rewt/ewr wefjn /wqe-324/ewr gist.h gist.h.c gist.c. readline 4.2 4.2. 4.2, readline-4.2 readline-4.2. 234 
+ wow  < jqw <> qwerty');
+
+SELECT to_tsvector('default', '345 [email protected] \' http://www.com/ http://aew.werc.ewr/?ad=qwe&dw 1aew.werc.ewr/?ad=qwe&dw 2aew.werc.ewr http://3aew.werc.ewr/?ad=qwe&dw http://4aew.werc.ewr http://5aew.werc.ewr:8100/?  ad=qwe&dw 6aew.werc.ewr:8100/?ad=qwe&dw 7aew.werc.ewr:8100/?ad=qwe&dw=%20%32 +4.0e-10 qwe qwe qwqwe 234.435 455 5.005 [email protected] qwe-wer asdf qwer jf sdjk ewr1> ewri2 ">
+/usr/local/fff /awdf/dwqe/4325 rewt/ewr wefjn /wqe-324/ewr gist.h gist.h.c gist.c. readline 4.2 4.2. 4.2, readline-4.2 readline-4.2. 234 
+ wow  < jqw <> qwerty');
+
+SELECT length(to_tsvector('default', '345 qw'));
+
+SELECT length(to_tsvector('default', '345 [email protected] \' http://www.com/ http://aew.werc.ewr/?ad=qwe&dw 1aew.werc.ewr/?ad=qwe&dw 2aew.werc.ewr http://3aew.werc.ewr/?ad=qwe&dw http://4aew.werc.ewr http://5aew.werc.ewr:8100/?  ad=qwe&dw 6aew.werc.ewr:8100/?ad=qwe&dw 7aew.werc.ewr:8100/?ad=qwe&dw=%20%32 +4.0e-10 qwe qwe qwqwe 234.435 455 5.005 [email protected] qwe-wer asdf qwer jf sdjk ewr1> ewri2 ">
+/usr/local/fff /awdf/dwqe/4325 rewt/ewr wefjn /wqe-324/ewr gist.h gist.h.c gist.c. readline 4.2 4.2. 4.2, readline-4.2 readline-4.2. 234 
+ wow  < jqw <> qwerty'));
+
+
+select to_tsquery('default', 'qwe & sKies '); 
+select to_tsquery('simple', 'qwe & sKies '); 
+select to_tsquery('default', '\'the wether\':dc & \'           sKies \':BC ');
+select 'a b:89  ca:23A,64b d:34c'::tsvector @@ 'd:AC & ca';
+select 'a b:89  ca:23A,64b d:34c'::tsvector @@ 'd:AC & ca:B';
+select 'a b:89  ca:23A,64b d:34c'::tsvector @@ 'd:AC & ca:A';
+select 'a b:89  ca:23A,64b d:34c'::tsvector @@ 'd:AC & ca:C';
+select 'a b:89  ca:23A,64b d:34c'::tsvector @@ 'd:AC & ca:CB';
+
+CREATE TABLE test_tsvector( t text, a tsvector );
+
+\copy test_tsvector from 'data/test_tsearch.data'
+
+SELECT count(*) FROM test_tsvector WHERE a @@ 'wr|qh';
+SELECT count(*) FROM test_tsvector WHERE a @@ 'wr&qh';
+SELECT count(*) FROM test_tsvector WHERE a @@ 'eq&yt';
+SELECT count(*) FROM test_tsvector WHERE a @@ 'eq|yt';
+SELECT count(*) FROM test_tsvector WHERE a @@ '(eq&yt)|(wr&qh)';
+SELECT count(*) FROM test_tsvector WHERE a @@ '(eq|yt)&(wr|qh)';
+
+create index wowidx on test_tsvector using gist (a);
+set enable_seqscan=off;
+
+SELECT count(*) FROM test_tsvector WHERE a @@ 'wr|qh';
+SELECT count(*) FROM test_tsvector WHERE a @@ 'wr&qh';
+SELECT count(*) FROM test_tsvector WHERE a @@ 'eq&yt';
+SELECT count(*) FROM test_tsvector WHERE a @@ 'eq|yt';
+SELECT count(*) FROM test_tsvector WHERE a @@ '(eq&yt)|(wr&qh)';
+SELECT count(*) FROM test_tsvector WHERE a @@ '(eq|yt)&(wr|qh)';
+
+select set_curcfg('default');
+
+CREATE TRIGGER tsvectorupdate
+BEFORE UPDATE OR INSERT ON test_tsvector
+FOR EACH ROW EXECUTE PROCEDURE tsearch2(a, t);
+
+SELECT count(*) FROM test_tsvector WHERE a @@ to_tsquery('345&qwerty');
+
+INSERT INTO test_tsvector (t) VALUES ('345 qwerty');
+
+SELECT count(*) FROM test_tsvector WHERE a @@ to_tsquery('345&qwerty');
+
+UPDATE test_tsvector SET t = null WHERE t = '345 qwerty';
+
+SELECT count(*) FROM test_tsvector WHERE a @@ to_tsquery('345&qwerty');
+
+drop trigger tsvectorupdate on test_tsvector;
+create function wow(text) returns text as 'select $1 || \' copyright\'; ' language sql;
+create trigger tsvectorupdate before update or insert on test_tsvector
+for each row execute procedure tsearch2(a, wow, t);
+insert into test_tsvector (t) values ('345 qwerty');
+select count(*) FROM test_tsvector WHERE a @@ to_tsquery('345&qwerty');
+select count(*) FROM test_tsvector WHERE a @@ to_tsquery('copyright');
+
+select rank(' a:1 s:2C d g'::tsvector, 'a | s');
+select rank(' a:1 s:2B d g'::tsvector, 'a | s');
+select rank(' a:1 s:2 d g'::tsvector, 'a | s');
+select rank(' a:1 s:2C d g'::tsvector, 'a & s');
+select rank(' a:1 s:2B d g'::tsvector, 'a & s');
+select rank(' a:1 s:2 d g'::tsvector, 'a & s');
+
+insert into test_tsvector (t) values ('foo bar foo the over foo qq bar');
+select * from stat('select a from test_tsvector') order by ndoc desc, nentry desc, word;
+
+select reset_tsearch();
+select to_tsquery('default', 'skies & books');
+
+select rank_cd(to_tsvector('Erosion It took the sea a thousand years,
+A thousand years to trace
+The granite features of this cliff
+In crag and scarp and base.
+It took the sea an hour one night
+An hour of storm to place
+The sculpture of these granite seams,
+Upon a woman s face. E.  J.  Pratt  (1882 1964)
+'), to_tsquery('sea&thousand&years'));
+
+select rank_cd(to_tsvector('Erosion It took the sea a thousand years,
+A thousand years to trace
+The granite features of this cliff
+In crag and scarp and base.
+It took the sea an hour one night
+An hour of storm to place
+The sculpture of these granite seams,
+Upon a woman s face. E.  J.  Pratt  (1882 1964)
+'), to_tsquery('granite&sea'));
+
+select rank_cd(to_tsvector('Erosion It took the sea a thousand years,
+A thousand years to trace
+The granite features of this cliff
+In crag and scarp and base.
+It took the sea an hour one night
+An hour of storm to place
+The sculpture of these granite seams,
+Upon a woman s face. E.  J.  Pratt  (1882 1964)
+'), to_tsquery('sea'));
+
+select get_covers(to_tsvector('Erosion It took the sea a thousand years,
+A thousand years to trace
+The granite features of this cliff
+In crag and scarp and base.
+It took the sea an hour one night
+An hour of storm to place
+The sculpture of these granite seams,
+Upon a woman s face. E.  J.  Pratt  (1882 1964)
+'), to_tsquery('sea&thousand&years'));
+
+select get_covers(to_tsvector('Erosion It took the sea a thousand years,
+A thousand years to trace
+The granite features of this cliff
+In crag and scarp and base.
+It took the sea an hour one night
+An hour of storm to place
+The sculpture of these granite seams,
+Upon a woman s face. E.  J.  Pratt  (1882 1964)
+'), to_tsquery('granite&sea'));
+
+select get_covers(to_tsvector('Erosion It took the sea a thousand years,
+A thousand years to trace
+The granite features of this cliff
+In crag and scarp and base.
+It took the sea an hour one night
+An hour of storm to place
+The sculpture of these granite seams,
+Upon a woman s face. E.  J.  Pratt  (1882 1964)
+'), to_tsquery('sea'));
+
+select headline('Erosion It took the sea a thousand years,
+A thousand years to trace
+The granite features of this cliff
+In crag and scarp and base.
+It took the sea an hour one night
+An hour of storm to place
+The sculpture of these granite seams,
+Upon a woman s face. E.  J.  Pratt  (1882 1964)
+', to_tsquery('sea&thousand&years'));
+ 
+select headline('Erosion It took the sea a thousand years,
+A thousand years to trace
+The granite features of this cliff
+In crag and scarp and base.
+It took the sea an hour one night
+An hour of storm to place
+The sculpture of these granite seams,
+Upon a woman s face. E.  J.  Pratt  (1882 1964)
+', to_tsquery('granite&sea'));
+ 
+select headline('Erosion It took the sea a thousand years,
+A thousand years to trace
+The granite features of this cliff
+In crag and scarp and base.
+It took the sea an hour one night
+An hour of storm to place
+The sculpture of these granite seams,
+Upon a woman s face. E.  J.  Pratt  (1882 1964)
+', to_tsquery('sea'));
+


diff --git a/contrib/tsearch2/stopword.c b/contrib/tsearch2/stopword.c

new file mode 100644 (file)

index 0000000..7f7806f


--- /dev/null
+++ b/contrib/tsearch2/stopword.c
@@ -0,0 +1,101 @@
+/* 
+ * stopword library
+ * Teodor Sigaev 
+ */
+#include 
+#include 
+#include 
+#include 
+
+#include "postgres.h"
+#include "common.h"
+#include "dict.h"
+
+#define STOPBUFLEN 4096
+
+char*
+lowerstr(char *str) {
+   char *ptr=str;
+   while(*ptr) {
+       *ptr = tolower(*(unsigned char*)ptr);
+       ptr++;
+   }
+   return str;
+}
+
+void
+freestoplist(StopList *s) {
+   char **ptr=s->stop;
+   if ( ptr )
+       while( *ptr && s->len >0 ) {
+           free(*ptr);
+           ptr++; s->len--;
+       free(s->stop);
+   }
+   memset(s,0,sizeof(StopList));
+}
+
+void
+readstoplist(text *in, StopList *s) {
+   char **stop=NULL;
+   s->len=0;
+   if ( in && VARSIZE(in) - VARHDRSZ > 0 ) {
+       char *filename=text2char(in);
+       FILE    *hin=NULL;
+       char    buf[STOPBUFLEN];
+       int reallen=0;
+
+       if ( (hin=fopen(filename,"r")) == NULL )
+           elog(ERROR,"Can't open file '%s': %s", filename, strerror(errno));
+       while( fgets(buf,STOPBUFLEN,hin) ) {
+           buf[strlen(buf)-1] = '\0';
+           if ( *buf=='\0' ) continue;
+
+           if ( s->len>= reallen ) {
+               char **tmp;
+               reallen=(reallen) ? reallen*2 : 16;
+               tmp=(char**)realloc((void*)stop, sizeof(char*)*reallen);
+               if (!tmp) {
+                   freestoplist(s);
+                   fclose(hin); 
+                   elog(ERROR,"Not enough memory");
+               }
+               stop=tmp;
+           }
+    
+           stop[s->len]=strdup(buf);
+           if ( !stop[s->len] ) {
+               freestoplist(s);
+               fclose(hin); 
+               elog(ERROR,"Not enough memory");
+           }
+           if ( s->wordop ) 
+               stop[s->len]=(s->wordop)(stop[s->len]);
+
+           (s->len)++; 
+       }
+       fclose(hin);
+       pfree(filename); 
+   }
+   s->stop=stop;
+} 
+
+static int
+comparestr(const void *a, const void *b) {
+   return strcmp( *(char**)a, *(char**)b );
+}
+
+void
+sortstoplist(StopList *s) {
+   if (s->stop && s->len>0)
+       qsort(s->stop, s->len, sizeof(char*), comparestr);
+}
+
+bool
+searchstoplist(StopList *s, char *key) {
+   if ( s->wordop ) 
+       key=(*(s->wordop))(key);
+   return ( s->stop && s->len>0 && bsearch(&key, s->stop, s->len, sizeof(char*), comparestr) ) ? true : false;
+}
+
+


diff --git a/contrib/tsearch2/stopword/english.stop b/contrib/tsearch2/stopword/english.stop

new file mode 100644 (file)

index 0000000..a913011


--- /dev/null
+++ b/contrib/tsearch2/stopword/english.stop
@@ -0,0 +1,128 @@
+i
+me
+my
+myself
+we
+our
+ours
+ourselves
+you
+your
+yours
+yourself
+yourselves
+he
+him
+his
+himself
+she
+her
+hers
+herself
+it
+its
+itself
+they
+them
+their
+theirs
+themselves
+what
+which
+who
+whom
+this
+that
+these
+those
+am
+is
+are
+was
+were
+be
+been
+being
+have
+has
+had
+having
+do
+does
+did
+doing
+a
+an
+the
+and
+but
+if
+or
+because
+as
+until
+while
+of
+at
+by
+for
+with
+about
+against
+between
+into
+through
+during
+before
+after
+above
+below
+to
+from
+up
+down
+in
+out
+on
+off
+over
+under
+again
+further
+then
+once
+here
+there
+when
+where
+why
+how
+all
+any
+both
+each
+few
+more
+most
+other
+some
+such
+no
+nor
+not
+only
+own
+same
+so
+than
+too
+very
+s
+t
+can
+will
+just
+don
+should
+now
+


diff --git a/contrib/tsearch2/stopword/russian.stop b/contrib/tsearch2/stopword/russian.stop

new file mode 100644 (file)

index 0000000..1877e3a


--- /dev/null
+++ b/contrib/tsearch2/stopword/russian.stop
@@ -0,0 +1,151 @@
+É
+×
+×Ï
+ÎÅ
+ÞÔÏ
+ÏÎ
+ÎÁ
+Ñ
+Ó
+ÓÏ
+ËÁË
+Á
+ÔÏ
+×ÓÅ
+ÏÎÁ
+ÔÁË
+ÅÇÏ
+ÎÏ
+ÄÁ
+ÔÙ
+Ë
+Õ
+ÖÅ
+×Ù
+ÚÁ
+ÂÙ
+ÐÏ
+ÔÏÌØËÏ
+ÅÅ
+ÍÎÅ
+ÂÙÌÏ
+×ÏÔ
+ÏÔ
+ÍÅÎÑ
+ÅÝÅ
+ÎÅÔ
+Ï
+ÉÚ
+ÅÍÕ
+ÔÅÐÅÒØ
+ËÏÇÄÁ
+ÄÁÖÅ
+ÎÕ
+×ÄÒÕÇ
+ÌÉ
+ÅÓÌÉ
+ÕÖÅ
+ÉÌÉ
+ÎÉ
+ÂÙÔØ
+ÂÙÌ
+ÎÅÇÏ
+ÄÏ
+×ÁÓ
+ÎÉÂÕÄØ
+ÏÐÑÔØ
+ÕÖ
+×ÁÍ
+×ÅÄØ
+ÔÁÍ
+ÐÏÔÏÍ
+ÓÅÂÑ
+ÎÉÞÅÇÏ
+ÅÊ
+ÍÏÖÅÔ
+ÏÎÉ
+ÔÕÔ
+ÇÄÅ
+ÅÓÔØ
+ÎÁÄÏ
+ÎÅÊ
+ÄÌÑ
+ÍÙ
+ÔÅÂÑ
+ÉÈ
+ÞÅÍ
+ÂÙÌÁ
+ÓÁÍ
+ÞÔÏÂ
+ÂÅÚ
+ÂÕÄÔÏ
+ÞÅÇÏ
+ÒÁÚ
+ÔÏÖÅ
+ÓÅÂÅ
+ÐÏÄ
+ÂÕÄÅÔ
+Ö
+ÔÏÇÄÁ
+ËÔÏ
+ÜÔÏÔ
+ÔÏÇÏ
+ÐÏÔÏÍÕ
+ÜÔÏÇÏ
+ËÁËÏÊ
+ÓÏ×ÓÅÍ
+ÎÉÍ
+ÚÄÅÓØ
+ÜÔÏÍ
+ÏÄÉÎ
+ÐÏÞÔÉ
+ÍÏÊ
+ÔÅÍ
+ÞÔÏÂÙ
+ÎÅÅ
+ÓÅÊÞÁÓ
+ÂÙÌÉ
+ËÕÄÁ
+ÚÁÞÅÍ
+×ÓÅÈ
+ÎÉËÏÇÄÁ
+ÍÏÖÎÏ
+ÐÒÉ
+ÎÁËÏÎÅÃ
+Ä×Á
+ÏÂ
+ÄÒÕÇÏÊ
+ÈÏÔØ
+ÐÏÓÌÅ
+ÎÁÄ
+ÂÏÌØÛÅ
+ÔÏÔ
+ÞÅÒÅÚ
+ÜÔÉ
+ÎÁÓ
+ÐÒÏ
+×ÓÅÇÏ
+ÎÉÈ
+ËÁËÁÑ
+ÍÎÏÇÏ
+ÒÁÚ×Å
+ÔÒÉ
+ÜÔÕ
+ÍÏÑ
+×ÐÒÏÞÅÍ
+ÈÏÒÏÛÏ
+Ó×ÏÀ
+ÜÔÏÊ
+ÐÅÒÅÄ
+ÉÎÏÇÄÁ
+ÌÕÞÛÅ
+ÞÕÔØ
+ÔÏÍ
+ÎÅÌØÚÑ
+ÔÁËÏÊ
+ÉÍ
+ÂÏÌÅÅ
+×ÓÅÇÄÁ
+ËÏÎÅÞÎÏ
+×ÓÀ
+ÍÅÖÄÕ


diff --git a/contrib/tsearch2/ts_cfg.c b/contrib/tsearch2/ts_cfg.c

new file mode 100644 (file)

index 0000000..7c9f20c


--- /dev/null
+++ b/contrib/tsearch2/ts_cfg.c
@@ -0,0 +1,509 @@
+/* 
+ * interface functions to tscfg 
+ * Teodor Sigaev 
+ */
+#include 
+#include 
+#include 
+#include 
+#include 
+
+#include "postgres.h"
+#include "fmgr.h"
+#include "utils/array.h"
+#include "catalog/pg_type.h"
+#include "executor/spi.h"
+
+#include "ts_cfg.h"
+#include "dict.h"
+#include "wparser.h"
+#include "snmap.h"
+#include "common.h"
+#include "tsvector.h"
+
+/*********top interface**********/
+
+static void *plan_getcfg_bylocale=NULL;
+static void *plan_getcfg=NULL;
+static void *plan_getmap=NULL;
+static void *plan_name2id=NULL;
+static Oid current_cfg_id=0;
+
+void
+init_cfg(Oid id, TSCfgInfo *cfg) {
+   Oid arg[2]={ OIDOID, OIDOID };
+   bool isnull;
+   Datum pars[2]={ ObjectIdGetDatum(id), ObjectIdGetDatum(id) } ;
+   int stat,i,j;
+   text *ptr;
+   text *prsname=NULL;
+   MemoryContext   oldcontext;
+
+   memset(cfg,0,sizeof(TSCfgInfo));
+   SPI_connect();
+   if ( !plan_getcfg ) {
+       plan_getcfg = SPI_saveplan( SPI_prepare( "select prs_name from pg_ts_cfg where oid = $1" , 1, arg ) );
+       if ( !plan_getcfg ) 
+           ts_error(ERROR, "SPI_prepare() failed");
+   }
+
+   stat = SPI_execp(plan_getcfg, pars, " ", 1);
+   if ( stat < 0 )
+       ts_error (ERROR, "SPI_execp return %d", stat);
+   if ( SPI_processed > 0 ) {
+       prsname = (text*) DatumGetPointer( 
+           SPI_getbinval(SPI_tuptable->vals[0], SPI_tuptable->tupdesc, 1, &isnull) 
+       );
+       oldcontext = MemoryContextSwitchTo(TopMemoryContext);
+       prsname = ptextdup( prsname );
+       MemoryContextSwitchTo(oldcontext);
+       
+       cfg->id=id;
+   } else 
+       ts_error(ERROR, "No tsearch cfg with id %d", id);
+
+   arg[0]=TEXTOID;
+   if ( !plan_getmap ) {
+       plan_getmap = SPI_saveplan( SPI_prepare( "select lt.tokid, pg_ts_cfgmap.dict_name from pg_ts_cfgmap, pg_ts_cfg, token_type( $1 ) as lt where lt.alias = pg_ts_cfgmap.tok_alias and pg_ts_cfgmap.ts_name = pg_ts_cfg.ts_name and pg_ts_cfg.oid= $2 order by lt.tokid desc;" , 2, arg ) );
+       if ( !plan_getmap )
+           ts_error(ERROR, "SPI_prepare() failed");
+   }
+
+   pars[0]=PointerGetDatum( prsname );
+   stat = SPI_execp(plan_getmap, pars, " ", 0);
+   if ( stat < 0 )
+       ts_error (ERROR, "SPI_execp return %d", stat);
+   if ( SPI_processed <= 0 )
+       ts_error(ERROR, "No parser with id %d", id);
+
+   for(i=0;i
+       int lexid = DatumGetInt32(SPI_getbinval(SPI_tuptable->vals[i], SPI_tuptable->tupdesc, 1, &isnull));
+       ArrayType *toasted_a = (ArrayType*)PointerGetDatum(SPI_getbinval(SPI_tuptable->vals[i], SPI_tuptable->tupdesc, 2, &isnull));
+       ArrayType *a;
+
+       if ( !cfg->map ) {
+           cfg->len=lexid+1;
+           cfg->map = (ListDictionary*)malloc( sizeof(ListDictionary)*cfg->len );
+           if ( !cfg->map )
+               ts_error(ERROR,"No memory");
+           memset( cfg->map, 0, sizeof(ListDictionary)*cfg->len );
+       }
+
+       if (isnull)
+           continue;
+
+       a=(ArrayType*)PointerGetDatum( PG_DETOAST_DATUM( DatumGetPointer(toasted_a) ) );
+       
+       if ( ARR_NDIM(a) != 1 )
+           ts_error(ERROR,"Wrong dimension");
+       if ( ARRNELEMS(a) < 1 )
+           continue;
+
+       cfg->map[lexid].len=ARRNELEMS(a);
+       cfg->map[lexid].dict_id=(Datum*)malloc( sizeof(Datum)*cfg->map[lexid].len );
+       memset(cfg->map[lexid].dict_id,0,sizeof(Datum)*cfg->map[lexid].len );
+       ptr=(text*)ARR_DATA_PTR(a);
+       oldcontext = MemoryContextSwitchTo(TopMemoryContext);
+       for(j=0;jmap[lexid].len;j++) {
+           cfg->map[lexid].dict_id[j] = PointerGetDatum(ptextdup(ptr));
+           ptr=NEXTVAL(ptr);
+       } 
+       MemoryContextSwitchTo(oldcontext);
+
+       if ( a != toasted_a ) 
+           pfree(a);
+   }
+   
+   SPI_finish();
+   cfg->prs_id = name2id_prs( prsname );
+   pfree(prsname);
+   for(i=0;ilen;i++) {
+       for(j=0;jmap[i].len;j++) {
+           ptr = (text*)DatumGetPointer( cfg->map[i].dict_id[j] );
+           cfg->map[i].dict_id[j] = ObjectIdGetDatum( name2id_dict(ptr) );
+           pfree(ptr);
+       }
+   }
+}
+
+typedef struct {
+   TSCfgInfo   *last_cfg;
+   int     len;
+   int     reallen;
+   TSCfgInfo   *list;
+   SNMap       name2id_map;
+} CFGList;
+
+static CFGList CList = {NULL,0,0,NULL,{0,0,NULL}};
+
+void
+reset_cfg(void) {
+        freeSNMap( &(CList.name2id_map) );
+        if ( CList.list ) {
+       int i,j;
+       for(i=0;i
+           if ( CList.list[i].map ) {
+               for(j=0;j
+                   if ( CList.list[i].map[j].dict_id )
+                       free(CList.list[i].map[j].dict_id);
+               free( CList.list[i].map );
+           }
+                free(CList.list);
+   }
+        memset(&CList,0,sizeof(CFGList));
+}
+
+static int
+comparecfg(const void *a, const void *b) {
+   return ((TSCfgInfo*)a)->id - ((TSCfgInfo*)b)->id;
+}
+
+TSCfgInfo *
+findcfg(Oid id) {
+   /* last used cfg */
+   if ( CList.last_cfg && CList.last_cfg->id==id )
+       return CList.last_cfg;
+
+   /* already used cfg */
+   if ( CList.len != 0 ) {
+       TSCfgInfo key;
+       key.id=id;
+       CList.last_cfg = bsearch(&key, CList.list, CList.len, sizeof(TSCfgInfo), comparecfg);
+       if ( CList.last_cfg != NULL )
+           return CList.last_cfg;
+   }
+
+   /* last chance */
+   if ( CList.len==CList.reallen ) {
+       TSCfgInfo *tmp;
+       int reallen = ( CList.reallen ) ? 2*CList.reallen : 16;
+       tmp=(TSCfgInfo*)realloc(CList.list,sizeof(TSCfgInfo)*reallen);
+       if ( !tmp ) 
+           ts_error(ERROR,"No memory");
+       CList.reallen=reallen;
+       CList.list=tmp;
+   }
+   CList.last_cfg=&(CList.list[CList.len]);
+   init_cfg(id, CList.last_cfg);
+   CList.len++;
+   qsort(CList.list, CList.len, sizeof(TSCfgInfo), comparecfg);
+   return findcfg(id); /* qsort changed order!! */;
+}
+
+
+Oid
+name2id_cfg(text *name) {
+   Oid arg[1]={ TEXTOID };
+   bool isnull;
+   Datum pars[1]={ PointerGetDatum(name) };
+   int stat;
+   Oid id=findSNMap_t( &(CList.name2id_map), name );
+   
+   if ( id ) 
+       return id;
+   
+   SPI_connect();
+   if ( !plan_name2id ) {
+       plan_name2id = SPI_saveplan( SPI_prepare( "select oid from pg_ts_cfg where ts_name = $1" , 1, arg ) );
+       if ( !plan_name2id ) 
+           elog(ERROR, "SPI_prepare() failed");
+   }
+
+   stat = SPI_execp(plan_name2id, pars, " ", 1);
+   if ( stat < 0 )
+       elog (ERROR, "SPI_execp return %d", stat);
+   if ( SPI_processed > 0 ) {
+       id=DatumGetObjectId( SPI_getbinval(SPI_tuptable->vals[0], SPI_tuptable->tupdesc, 1, &isnull) );
+       if ( isnull ) 
+           elog(ERROR, "Null id for tsearch config");
+   } else 
+       elog(ERROR, "No tsearch config");
+   SPI_finish();
+   addSNMap_t( &(CList.name2id_map), name, id );
+   return id;
+}
+
+
+void 
+parsetext_v2(TSCfgInfo *cfg, PRSTEXT * prs, char *buf, int4 buflen) {
+   int type, lenlemm, i;
+   char    *lemm=NULL;
+   WParserInfo *prsobj = findprs(cfg->prs_id);
+
+   prsobj->prs=(void*)DatumGetPointer(
+       FunctionCall2(
+           &(prsobj->start_info),
+           PointerGetDatum(buf),
+           Int32GetDatum(buflen)
+       )
+   );
+
+   while( ( type=DatumGetInt32(FunctionCall3(
+           &(prsobj->getlexeme_info),
+           PointerGetDatum(prsobj->prs),
+           PointerGetDatum(&lemm),
+           PointerGetDatum(&lenlemm))) ) != 0 ) {
+
+       if ( lenlemm >= MAXSTRLEN )
+           elog(ERROR, "Word is too long");
+
+
+       if ( type >= cfg->len ) /* skip this type of lexem */
+           continue; 
+
+       for(i=0;imap[type].len;i++) {
+           DictInfo    *dict=finddict( DatumGetObjectId(cfg->map[type].dict_id[i]) );
+           char    **norms, **ptr;
+   
+           norms = ptr = (char**)DatumGetPointer(
+               FunctionCall3(
+                   &(dict->lexize_info),
+                   PointerGetDatum(dict->dictionary),
+                   PointerGetDatum(lemm),
+                   PointerGetDatum(lenlemm)
+               )
+           );
+           if ( !norms ) /* dictionary doesn't know this lexem */
+               continue;
+
+           prs->pos++; /*set pos*/
+
+           while( *ptr ) {
+               if (prs->curwords == prs->lenwords) {
+                   prs->lenwords *= 2;
+                   prs->words = (WORD *) repalloc((void *) prs->words, prs->lenwords * sizeof(WORD));
+               }
+
+               prs->words[prs->curwords].len = strlen(*ptr);
+               prs->words[prs->curwords].word = *ptr;
+               prs->words[prs->curwords].alen = 0;
+               prs->words[prs->curwords].pos.pos = LIMITPOS(prs->pos);
+               ptr++;
+               prs->curwords++;
+           }
+           pfree(norms);
+           break; /* lexem already normalized or is stop word*/
+       }
+   }
+
+   FunctionCall1(
+       &(prsobj->end_info),
+       PointerGetDatum(prsobj->prs)
+   );
+}
+
+static void
+hladdword(HLPRSTEXT * prs, char *buf, int4 buflen, int type) {
+   while (prs->curwords >= prs->lenwords) {
+       prs->lenwords *= 2;
+       prs->words = (HLWORD *) repalloc((void *) prs->words, prs->lenwords * sizeof(HLWORD));
+   }
+   memset( &(prs->words[prs->curwords]), 0, sizeof(HLWORD) ); 
+   prs->words[prs->curwords].type = (uint8)type;
+   prs->words[prs->curwords].len = buflen; 
+   prs->words[prs->curwords].word = palloc(buflen);
+   memcpy(prs->words[prs->curwords].word, buf, buflen);
+   prs->curwords++;    
+}
+
+static void
+hlfinditem(HLPRSTEXT * prs, QUERYTYPE *query, char *buf, int buflen ) {
+   int i;
+   ITEM    *item=GETQUERY(query);
+   HLWORD  *word=&( prs->words[prs->curwords-1] );
+
+   while (prs->curwords + query->size >= prs->lenwords) {
+       prs->lenwords *= 2;
+       prs->words = (HLWORD *) repalloc((void *) prs->words, prs->lenwords * sizeof(HLWORD));
+   }
+
+   for(i=0; isize; i++) { 
+       if ( item->type == VAL && item->length == buflen && strncmp( GETOPERAND(query) + item->distance, buf, buflen )==0 ) {
+           if ( word->item ) {
+               memcpy( &(prs->words[prs->curwords]), word, sizeof(HLWORD) );
+               prs->words[prs->curwords].item=item;
+               prs->words[prs->curwords].repeated=1;
+               prs->curwords++;
+           } else 
+               word->item=item;    
+       }
+       item++;
+   }
+}
+
+void 
+hlparsetext(TSCfgInfo *cfg, HLPRSTEXT * prs, QUERYTYPE *query, char *buf, int4 buflen) {
+   int type, lenlemm, i;
+   char    *lemm=NULL;
+   WParserInfo *prsobj = findprs(cfg->prs_id);
+
+   prsobj->prs=(void*)DatumGetPointer(
+       FunctionCall2(
+           &(prsobj->start_info),
+           PointerGetDatum(buf),
+           Int32GetDatum(buflen)
+       )
+   );
+
+   while( ( type=DatumGetInt32(FunctionCall3(
+           &(prsobj->getlexeme_info),
+           PointerGetDatum(prsobj->prs),
+           PointerGetDatum(&lemm),
+           PointerGetDatum(&lenlemm))) ) != 0 ) {
+
+       if ( lenlemm >= MAXSTRLEN )
+           elog(ERROR, "Word is too long");
+
+       hladdword(prs,lemm,lenlemm,type);
+
+       if ( type >= cfg->len ) 
+           continue; 
+
+       for(i=0;imap[type].len;i++) {
+           DictInfo    *dict=finddict( DatumGetObjectId(cfg->map[type].dict_id[i]) );
+           char    **norms, **ptr;
+   
+           norms = ptr = (char**)DatumGetPointer(
+               FunctionCall3(
+                   &(dict->lexize_info),
+                   PointerGetDatum(dict->dictionary),
+                   PointerGetDatum(lemm),
+                   PointerGetDatum(lenlemm)
+               )
+           );
+           if ( !norms ) /* dictionary doesn't know this lexem */
+               continue;
+
+           while( *ptr ) {
+               hlfinditem(prs,query,*ptr,strlen(*ptr));
+               pfree(*ptr);
+               ptr++;
+           }
+           pfree(norms);
+           break; /* lexem already normalized or is stop word*/
+       }
+   }
+
+   FunctionCall1(
+       &(prsobj->end_info),
+       PointerGetDatum(prsobj->prs)
+   );
+}
+
+text* 
+genhl(HLPRSTEXT * prs) {
+   text *out;
+   int len=128;
+   char *ptr;
+   HLWORD  *wrd=prs->words;
+
+   out = (text*)palloc( len );
+   ptr=((char*)out) + VARHDRSZ;
+
+   while( wrd - prs->words < prs->curwords ) {
+       while (  wrd->len + prs->stopsellen + prs->startsellen + (ptr - ((char*)out)) >= len ) {
+           int dist = ptr - ((char*)out);
+           len*= 2;
+           out = (text *) repalloc(out, len);
+           ptr=((char*)out) + dist;
+       }
+
+       if ( wrd->in && !wrd->skip && !wrd->repeated ) {
+           if ( wrd->replace ) {
+               *ptr=' ';
+               ptr++;
+           } else {
+               if (wrd->selected) {
+                   memcpy(ptr,prs->startsel,prs->startsellen);
+                   ptr+=prs->startsellen;
+               }
+               memcpy(ptr,wrd->word,wrd->len);
+               ptr+=wrd->len;
+               if (wrd->selected) {
+                   memcpy(ptr,prs->stopsel,prs->stopsellen);
+                   ptr+=prs->stopsellen;
+               }
+           }
+       }
+
+       if ( !wrd->repeated )
+           pfree(wrd->word);
+
+       wrd++;
+   }
+
+   VARATT_SIZEP(out)=ptr - ((char*)out);
+   return out; 
+}
+
+int  
+get_currcfg(void) {
+   Oid arg[1]={ TEXTOID };
+   const char *curlocale;
+   Datum pars[1];
+   bool isnull;
+   int stat;
+
+   if ( current_cfg_id > 0 )
+       return current_cfg_id;
+
+   SPI_connect();
+   if ( !plan_getcfg_bylocale ) {
+       plan_getcfg_bylocale=SPI_saveplan( SPI_prepare( "select oid from pg_ts_cfg where locale = $1 ", 1, arg ) );
+       if ( !plan_getcfg_bylocale )
+           elog(ERROR, "SPI_prepare() failed");
+   }
+
+   curlocale = setlocale(LC_CTYPE, NULL);
+   pars[0] = PointerGetDatum( char2text((char*)curlocale) );
+   stat = SPI_execp(plan_getcfg_bylocale, pars, " ", 1);
+
+   if ( stat < 0 )
+       elog (ERROR, "SPI_execp return %d", stat);
+   if ( SPI_processed > 0 )
+       current_cfg_id = DatumGetObjectId( SPI_getbinval(SPI_tuptable->vals[0], SPI_tuptable->tupdesc, 1, &isnull) );
+   else 
+       elog(ERROR,"Can't find tsearch config by locale");
+
+   pfree(DatumGetPointer(pars[0]));
+   SPI_finish();
+   return current_cfg_id;
+}
+
+PG_FUNCTION_INFO_V1(set_curcfg);
+Datum set_curcfg(PG_FUNCTION_ARGS);
+Datum
+set_curcfg(PG_FUNCTION_ARGS) {
+        findcfg(PG_GETARG_OID(0));
+        current_cfg_id=PG_GETARG_OID(0);
+        PG_RETURN_VOID();
+}
+                
+PG_FUNCTION_INFO_V1(set_curcfg_byname);
+Datum set_curcfg_byname(PG_FUNCTION_ARGS);
+Datum
+set_curcfg_byname(PG_FUNCTION_ARGS) {
+        text *name=PG_GETARG_TEXT_P(0);
+   
+        DirectFunctionCall1(
+                set_curcfg,
+                ObjectIdGetDatum( name2id_cfg(name) )
+        );
+        PG_FREE_IF_COPY(name, 0);
+        PG_RETURN_VOID();      
+}       
+
+PG_FUNCTION_INFO_V1(show_curcfg);
+Datum show_curcfg(PG_FUNCTION_ARGS);
+Datum
+show_curcfg(PG_FUNCTION_ARGS) {
+   PG_RETURN_OID( get_currcfg() ); 
+}
+
+PG_FUNCTION_INFO_V1(reset_tsearch);
+Datum reset_tsearch(PG_FUNCTION_ARGS);
+Datum
+reset_tsearch(PG_FUNCTION_ARGS) {
+   ts_error(NOTICE,"TSearch cache cleaned");
+   PG_RETURN_VOID(); 
+}


diff --git a/contrib/tsearch2/ts_cfg.h b/contrib/tsearch2/ts_cfg.h

new file mode 100644 (file)

index 0000000..01006c1


--- /dev/null
+++ b/contrib/tsearch2/ts_cfg.h
@@ -0,0 +1,68 @@
+#ifndef __TS_CFG_H__
+#define __TS_CFG_H__
+#include "postgres.h"
+#include "query.h"
+
+typedef struct {
+   int len;
+   Datum   *dict_id;
+} ListDictionary;
+
+typedef struct {
+   Oid id;
+   Oid prs_id;
+   int len;
+   ListDictionary  *map;   
+}  TSCfgInfo;
+
+Oid name2id_cfg(text *name);
+TSCfgInfo * findcfg(Oid id);
+void init_cfg(Oid id, TSCfgInfo *cfg);
+void reset_cfg(void);
+
+typedef struct {
+        uint16          len;
+   union {
+       uint16      pos;
+       uint16      *apos;
+   } pos;
+        char       *word;
+   uint32  alen;
+}       WORD;
+   
+typedef struct {
+        WORD       *words;
+        int4            lenwords;
+        int4            curwords;
+   int4        pos;
+}       PRSTEXT;
+
+typedef struct {
+        uint16    len;
+   uint8    selected:1,
+         in:1,
+         skip:1,
+         replace:1,
+         repeated:1;
+   uint8   type;
+        char      *word;
+   ITEM      *item;
+}       HLWORD;
+   
+typedef struct {
+        HLWORD       *words;
+        int4            lenwords;
+        int4            curwords;
+        char           *startsel;
+        char            *stopsel;
+        int2            startsellen;
+        int2            stopsellen;
+}       HLPRSTEXT;
+
+void hlparsetext(TSCfgInfo *cfg, HLPRSTEXT * prs, QUERYTYPE *query, char *buf, int4 buflen);
+text* genhl(HLPRSTEXT * prs);
+
+void parsetext_v2(TSCfgInfo *cfg, PRSTEXT * prs, char *buf, int4 buflen);
+int  get_currcfg(void);
+
+#endif


diff --git a/contrib/tsearch2/ts_stat.c b/contrib/tsearch2/ts_stat.c

new file mode 100644 (file)

index 0000000..9099981


--- /dev/null
+++ b/contrib/tsearch2/ts_stat.c
@@ -0,0 +1,412 @@
+/*
+ * stat functions
+ */
+
+#include "tsvector.h"
+#include "ts_stat.h"
+#include "funcapi.h"
+#include "catalog/pg_type.h"
+#include "executor/spi.h"
+#include "common.h"
+
+PG_FUNCTION_INFO_V1(tsstat_in);
+Datum           tsstat_in(PG_FUNCTION_ARGS);
+Datum           
+tsstat_in(PG_FUNCTION_ARGS) {
+   tsstat *stat=palloc(STATHDRSIZE);
+   stat->len=STATHDRSIZE;
+   stat->size=0;
+   PG_RETURN_POINTER(stat);
+}
+
+PG_FUNCTION_INFO_V1(tsstat_out);
+Datum           tsstat_out(PG_FUNCTION_ARGS);
+Datum           
+tsstat_out(PG_FUNCTION_ARGS) {
+   elog(ERROR,"Unimplemented");
+   PG_RETURN_NULL();
+}
+
+static WordEntry**
+SEI_realloc( WordEntry** in, uint32 *len ) {
+   if ( *len==0 || in==NULL ) {
+       *len=8;
+       in=palloc( sizeof(WordEntry*)* (*len) );
+   } else {
+       *len *= 2;
+       in=repalloc( in, sizeof(WordEntry*)* (*len) );
+   }
+   return in;
+}
+
+static int
+compareStatWord(StatEntry *a, WordEntry *b, tsstat *stat, tsvector *txt) {
+   if ( a->len == b->len ) 
+       return strncmp(
+           STATSTRPTR(stat) + a->pos,
+           STRPTR(txt) + b->pos,
+           a->len
+       );
+   return ( a->len > b->len ) ? 1 : -1;
+}
+
+static tsstat*
+formstat(tsstat *stat, tsvector *txt, WordEntry** entry, uint32 len) {
+   tsstat  *newstat;
+   uint32 totallen, nentry;
+   uint32  slen=0;
+   WordEntry   **ptr=entry;
+   char    *curptr;
+   StatEntry   *sptr,*nptr;
+
+   while(ptr-entry
+       slen += (*ptr)->len;
+       ptr++;
+   }
+
+   nentry=stat->size + len;
+   slen+=STATSTRSIZE(stat);
+   totallen=CALCSTATSIZE(nentry,slen);
+   newstat=palloc(totallen);
+   newstat->len=totallen;
+   newstat->size=nentry;
+
+   memcpy(STATSTRPTR(newstat), STATSTRPTR(stat), STATSTRSIZE(stat));
+   curptr=STATSTRPTR(newstat) + STATSTRSIZE(stat);
+
+   ptr=entry;
+   sptr=STATPTR(stat);
+   nptr=STATPTR(newstat);
+
+   if ( len == 1 ) {
+       StatEntry *StopLow = STATPTR(stat);
+       StatEntry *StopHigh = (StatEntry*)STATSTRPTR(stat);
+
+       while (StopLow < StopHigh) {
+           sptr=StopLow + (StopHigh - StopLow) / 2;
+           if ( compareStatWord(sptr,*ptr,stat,txt) < 0 )
+               StopLow = sptr + 1;
+           else
+               StopHigh = sptr; 
+       }
+       nptr =STATPTR(newstat) + (StopLow-STATPTR(stat));
+       memcpy( STATPTR(newstat), STATPTR(stat), sizeof(StatEntry) * (StopLow-STATPTR(stat)) );
+       nptr->nentry=POSDATALEN(txt,*ptr);
+       if ( nptr->nentry==0 )
+               nptr->nentry=1; 
+       nptr->ndoc=1;
+       nptr->len=(*ptr)->len;
+       memcpy(curptr, STRPTR(txt) + (*ptr)->pos, nptr->len);
+       nptr->pos = curptr - STATSTRPTR(newstat);
+       memcpy( nptr+1, StopLow, sizeof(StatEntry) * ( ((StatEntry*)STATSTRPTR(stat))-StopLow ) );
+   } else {
+       while( sptr-STATPTR(stat) < stat->size && ptr-entry
+           if ( compareStatWord(sptr,*ptr,stat,txt) < 0 ) {
+               memcpy(nptr, sptr, sizeof(StatEntry));
+               sptr++;
+           } else {
+               nptr->nentry=POSDATALEN(txt,*ptr);
+               if ( nptr->nentry==0 )
+                   nptr->nentry=1; 
+               nptr->ndoc=1;
+               nptr->len=(*ptr)->len;
+               memcpy(curptr, STRPTR(txt) + (*ptr)->pos, nptr->len);
+               nptr->pos = curptr - STATSTRPTR(newstat);
+               curptr += nptr->len;
+               ptr++;
+           }
+           nptr++;
+       }
+
+       memcpy( nptr, sptr, sizeof(StatEntry)*( stat->size - (sptr-STATPTR(stat)) ) ); 
+       
+       while(ptr-entry
+           nptr->nentry=POSDATALEN(txt,*ptr);
+           if ( nptr->nentry==0 )
+               nptr->nentry=1; 
+           nptr->ndoc=1;
+           nptr->len=(*ptr)->len;
+           memcpy(curptr, STRPTR(txt) + (*ptr)->pos, nptr->len);
+           nptr->pos = curptr - STATSTRPTR(newstat);
+           curptr += nptr->len;
+           ptr++; nptr++;
+       }
+   }
+
+   return newstat;
+} 
+
+PG_FUNCTION_INFO_V1(ts_accum);
+Datum           ts_accum(PG_FUNCTION_ARGS);
+Datum 
+ts_accum(PG_FUNCTION_ARGS) {
+   tsstat *newstat,*stat= (tsstat*)PG_GETARG_POINTER(0);
+   tsvector  *txt = (tsvector *) PG_DETOAST_DATUM(PG_GETARG_DATUM(1));
+   WordEntry   **newentry=NULL;
+   uint32  len=0, cur=0;
+   StatEntry   *sptr;
+   WordEntry   *wptr;
+
+   if ( stat==NULL || PG_ARGISNULL(0) ) { /* Init in first */ 
+       stat=palloc(STATHDRSIZE);
+       stat->len=STATHDRSIZE;
+       stat->size=0;
+   }
+
+   /* simple check of correctness */
+   if ( txt==NULL || PG_ARGISNULL(1) || txt->size==0 ) {
+       PG_FREE_IF_COPY(txt,1); 
+       PG_RETURN_POINTER(stat);
+   }
+
+   sptr=STATPTR(stat);
+   wptr=ARRPTR(txt);
+
+   if ( stat->size < 100*txt->size ) { /* merge */
+       while( sptr-STATPTR(stat) < stat->size && wptr-ARRPTR(txt) < txt->size ) {
+           int cmp = compareStatWord(sptr,wptr,stat,txt);
+           if ( cmp<0 ) {
+               sptr++;
+           } else if ( cmp==0 ) {
+               int n=POSDATALEN(txt,wptr);
+   
+               if (n==0) n=1;
+               sptr->ndoc++;
+               sptr->nentry +=n ;
+               sptr++; wptr++;
+           } else {
+               if ( cur==len )
+                   newentry=SEI_realloc(newentry, &len);
+               newentry[cur]=wptr;
+               wptr++; cur++;
+           }
+       }
+
+       while( wptr-ARRPTR(txt) < txt->size ) {
+           if ( cur==len )
+               newentry=SEI_realloc(newentry, &len);
+           newentry[cur]=wptr;
+           wptr++; cur++;
+       }
+   } else { /* search */
+       while( wptr-ARRPTR(txt) < txt->size ) {
+           StatEntry *StopLow = STATPTR(stat);
+           StatEntry *StopHigh = (StatEntry*)STATSTRPTR(stat);
+           int cmp;
+
+           while (StopLow < StopHigh) {
+               sptr=StopLow + (StopHigh - StopLow) / 2;
+               cmp =  compareStatWord(sptr,wptr,stat,txt);
+               if (cmp==0) {
+                   int n=POSDATALEN(txt,wptr);
+                   if (n==0) n=1;
+                   sptr->ndoc++;
+                   sptr->nentry +=n ;
+                   break;
+               } else if ( cmp < 0 )
+                   StopLow = sptr + 1;
+               else
+                   StopHigh = sptr; 
+           }
+       
+           if ( StopLow >= StopHigh ) { /* not found */
+               if ( cur==len )
+                   newentry=SEI_realloc(newentry, &len);
+               newentry[cur]=wptr;
+               cur++;
+           }
+           wptr++;
+       }   
+   }
+
+   
+   if ( cur==0 ) { /* no new words */ 
+       PG_FREE_IF_COPY(txt,1);
+       PG_RETURN_POINTER(stat);
+   }
+
+   newstat = formstat(stat, txt, newentry, cur);
+   pfree(newentry);
+   PG_FREE_IF_COPY(txt,1);
+   /* pfree(stat); */
+
+   PG_RETURN_POINTER(newstat);
+}
+
+typedef struct {
+   uint32  cur;
+   tsvector *stat;
+} StatStorage;
+
+static void
+ts_setup_firstcall(FuncCallContext  *funcctx, tsstat *stat) {
+   TupleDesc            tupdesc;
+   MemoryContext     oldcontext;
+   StatStorage     *st;
+   
+   oldcontext = MemoryContextSwitchTo(funcctx->multi_call_memory_ctx);
+   st=palloc( sizeof(StatStorage) );
+   st->cur=0;
+   st->stat=palloc( stat->len );
+   memcpy(st->stat, stat, stat->len);
+   funcctx->user_fctx = (void*)st;
+   tupdesc = RelationNameGetTupleDesc("statinfo");
+   funcctx->slot = TupleDescGetSlot(tupdesc);
+   funcctx->attinmeta = TupleDescGetAttInMetadata(tupdesc);
+   MemoryContextSwitchTo(oldcontext);
+}
+
+
+static Datum
+ts_process_call(FuncCallContext  *funcctx) {
+   StatStorage     *st;
+   st=(StatStorage*)funcctx->user_fctx;
+
+   if ( st->cur < st->stat->size ) {
+       Datum result;
+       char* values[3];
+       char    ndoc[16];
+       char    nentry[16];
+       StatEntry *entry=STATPTR(st->stat) + st->cur;
+       HeapTuple    tuple;
+
+       values[1]=ndoc;
+       sprintf(ndoc,"%d",entry->ndoc);
+       values[2]=nentry;
+       sprintf(nentry,"%d",entry->nentry);
+       values[0]=palloc( entry->len+1 );
+       memcpy( values[0], STATSTRPTR(st->stat)+entry->pos, entry->len);
+       (values[0])[entry->len]='\0';
+
+       tuple = BuildTupleFromCStrings(funcctx->attinmeta, values);
+       result = TupleGetDatum(funcctx->slot, tuple);
+
+       pfree(values[0]);
+       st->cur++;
+       return result;  
+   } else {
+       pfree(st->stat);
+       pfree(st);
+   }
+   
+   return (Datum)0;
+}
+
+PG_FUNCTION_INFO_V1(ts_accum_finish);
+Datum           ts_accum_finish(PG_FUNCTION_ARGS);
+Datum 
+ts_accum_finish(PG_FUNCTION_ARGS) {
+   FuncCallContext  *funcctx;
+   Datum result;
+
+   if (SRF_IS_FIRSTCALL()) {
+       funcctx = SRF_FIRSTCALL_INIT();
+       ts_setup_firstcall(funcctx, (tsstat*)PG_GETARG_POINTER(0) );
+   }
+
+   funcctx = SRF_PERCALL_SETUP();
+   if (  (result=ts_process_call(funcctx)) != (Datum)0 )
+       SRF_RETURN_NEXT(funcctx, result);
+   SRF_RETURN_DONE(funcctx);
+}
+
+static Oid tiOid=InvalidOid;
+static void 
+get_ti_Oid(void) {
+   int ret;
+   bool isnull; 
+
+   if ( (ret = SPI_exec("select oid from pg_type where typname='tsvector'",1)) < 0 )   
+       elog(ERROR, "SPI_exec to get tsvector oid returns %d", ret);
+
+   if ( SPI_processed<0 )
+       elog(ERROR, "There is no tsvector type");
+   tiOid = DatumGetObjectId( SPI_getbinval(SPI_tuptable->vals[0], SPI_tuptable->tupdesc, 1, &isnull) );
+   if ( tiOid==InvalidOid )
+       elog(ERROR, "tsvector type has InvalidOid");
+}
+
+static tsstat*
+ts_stat_sql(text *txt) {
+   char *query=text2char(txt);
+   int i;
+   tsstat *newstat,*stat;
+   bool isnull;
+   Portal portal;
+   void    *plan;
+
+   if ( tiOid==InvalidOid ) 
+       get_ti_Oid();
+
+   if ( (plan = SPI_prepare(query,0,NULL))==NULL )
+       elog(ERROR, "SPI_prepare('%s') returns NULL",query);
+
+   if ( (portal = SPI_cursor_open(NULL, plan, NULL, NULL)) == NULL )
+       elog(ERROR, "SPI_cursor_open('%s') returns NULL",query);
+
+   SPI_cursor_fetch(portal, true, 100);
+
+   if ( SPI_tuptable->tupdesc->natts != 1 )
+       elog(ERROR, "Number of fields doesn't equal to 1");
+
+   if ( SPI_gettypeid(SPI_tuptable->tupdesc, 1) != tiOid )
+       elog(ERROR, "Column isn't of tsvector type");
+
+   stat=palloc(STATHDRSIZE);
+   stat->len=STATHDRSIZE;
+   stat->size=0;
+
+   while(SPI_processed>0) {
+       for(i=0;i
+           Datum data=SPI_getbinval(SPI_tuptable->vals[i], SPI_tuptable->tupdesc, 1, &isnull);
+
+           if ( !isnull ) {
+               newstat = (tsstat*)DatumGetPointer(DirectFunctionCall2(
+                   ts_accum,
+                   PointerGetDatum(stat),
+                   data
+               ));
+               if ( stat!=newstat && stat )
+                   pfree(stat);
+               stat=newstat;
+           }
+       } 
+
+       SPI_freetuptable(SPI_tuptable);
+       SPI_cursor_fetch(portal, true, 100);        
+   }   
+
+   SPI_freetuptable(SPI_tuptable);
+   SPI_cursor_close(portal);
+   SPI_freeplan(plan);
+   pfree(query);
+
+   return stat;    
+}
+
+PG_FUNCTION_INFO_V1(ts_stat);
+Datum           ts_stat(PG_FUNCTION_ARGS);
+Datum 
+ts_stat(PG_FUNCTION_ARGS) {
+   FuncCallContext  *funcctx;
+   Datum result;
+
+   if (SRF_IS_FIRSTCALL()) {
+       tsstat *stat;
+       text    *txt=PG_GETARG_TEXT_P(0);
+   
+       funcctx = SRF_FIRSTCALL_INIT();
+       SPI_connect();
+       stat = ts_stat_sql(txt);
+       PG_FREE_IF_COPY(txt,0); 
+       ts_setup_firstcall(funcctx, stat );
+       SPI_finish();
+   }
+
+   funcctx = SRF_PERCALL_SETUP();
+   if (  (result=ts_process_call(funcctx)) != (Datum)0 )
+       SRF_RETURN_NEXT(funcctx, result);
+   SRF_RETURN_DONE(funcctx);
+}
+
+


diff --git a/contrib/tsearch2/ts_stat.h b/contrib/tsearch2/ts_stat.h

new file mode 100644 (file)

index 0000000..c32b17a


--- /dev/null
+++ b/contrib/tsearch2/ts_stat.h
@@ -0,0 +1,32 @@
+#ifndef __TXTIDX_STAT_H__
+#define __TXTIDX_STAT_H__
+
+#include "postgres.h"
+
+#include "access/gist.h"
+#include "access/itup.h"
+#include "utils/elog.h"
+#include "utils/palloc.h"
+#include "utils/builtins.h"
+#include "storage/bufpage.h"
+
+typedef struct {
+   uint32  len;
+   uint32  pos;
+   uint32  ndoc;   
+   uint32  nentry; 
+}  StatEntry;
+
+typedef struct {
+   int4        len;
+   int4        size;
+   char        data[1];
+}  tsstat;
+
+#define STATHDRSIZE (sizeof(int4)*2)
+#define CALCSTATSIZE(x, lenstr) ( x * sizeof(StatEntry) + STATHDRSIZE + lenstr )
+#define STATPTR(x) ( (StatEntry*) ( (char*)x + STATHDRSIZE ) )
+#define STATSTRPTR(x)  ( (char*)x + STATHDRSIZE + ( sizeof(StatEntry) * ((tsvector*)x)->size ) )
+#define STATSTRSIZE(x) ( ((tsvector*)x)->len - STATHDRSIZE - ( sizeof(StatEntry) * ((tsvector*)x)->size ) )
+
+#endif


diff --git a/contrib/tsearch2/tsearch.sql._in b/contrib/tsearch2/tsearch.sql._in

new file mode 100644 (file)

index 0000000..91ffbc8


--- /dev/null
+++ b/contrib/tsearch2/tsearch.sql._in
@@ -0,0 +1,674 @@
+-- Adjust this setting to control where the objects get CREATEd.
+SET search_path = public;
+
+BEGIN;
+
+--dict conf
+CREATE TABLE pg_ts_dict (
+   dict_name   text not null primary key,
+   dict_init   oid,
+   dict_initoption text,
+   dict_lexize oid not null,
+   dict_comment    text
+) with oids;
+
+--dict interface
+CREATE FUNCTION lexize(oid, text) 
+   returns _text
+   as 'MODULE_PATHNAME'
+   language 'C'
+   with (isstrict);
+
+CREATE FUNCTION lexize(text, text)
+        returns _text
+        as 'MODULE_PATHNAME', 'lexize_byname'
+        language 'C'
+        with (isstrict);
+
+CREATE FUNCTION lexize(text)
+        returns _text
+        as 'MODULE_PATHNAME', 'lexize_bycurrent'
+        language 'C'
+        with (isstrict);
+
+CREATE FUNCTION set_curdict(int)
+   returns void
+   as 'MODULE_PATHNAME'
+   language 'C'
+   with (isstrict);
+
+CREATE FUNCTION set_curdict(text)
+   returns void
+   as 'MODULE_PATHNAME', 'set_curdict_byname'
+   language 'C'
+   with (isstrict);
+
+--built-in dictionaries
+CREATE FUNCTION dex_init(text)
+   returns internal
+   as 'MODULE_PATHNAME' 
+   language 'C';
+
+CREATE FUNCTION dex_lexize(internal,internal,int4)
+   returns internal
+   as 'MODULE_PATHNAME'
+   language 'C'
+   with (isstrict);
+
+insert into pg_ts_dict select 
+   'simple', 
+   (select oid from pg_proc where proname='dex_init'),
+   null,
+   (select oid from pg_proc where proname='dex_lexize'),
+   'Simple example of dictionary.'
+;
+    
+CREATE FUNCTION snb_en_init(text)
+   returns internal
+   as 'MODULE_PATHNAME' 
+   language 'C';
+
+CREATE FUNCTION snb_lexize(internal,internal,int4)
+   returns internal
+   as 'MODULE_PATHNAME'
+   language 'C'
+   with (isstrict);
+
+insert into pg_ts_dict select 
+   'en_stem', 
+   (select oid from pg_proc where proname='snb_en_init'),
+   'DATA_PATH/english.stop',
+   (select oid from pg_proc where proname='snb_lexize'),
+   'English Stemmer. Snowball.'
+;
+
+CREATE FUNCTION snb_ru_init(text)
+   returns internal
+   as 'MODULE_PATHNAME' 
+   language 'C';
+
+insert into pg_ts_dict select 
+   'ru_stem', 
+   (select oid from pg_proc where proname='snb_ru_init'),
+   'DATA_PATH/russian.stop',
+   (select oid from pg_proc where proname='snb_lexize'),
+   'Russian Stemmer. Snowball.'
+;
+    
+CREATE FUNCTION spell_init(text)
+   returns internal
+   as 'MODULE_PATHNAME' 
+   language 'C';
+
+CREATE FUNCTION spell_lexize(internal,internal,int4)
+   returns internal
+   as 'MODULE_PATHNAME'
+   language 'C'
+   with (isstrict);
+
+insert into pg_ts_dict select 
+   'ispell_template', 
+   (select oid from pg_proc where proname='spell_init'),
+   null,
+   (select oid from pg_proc where proname='spell_lexize'),
+   'ISpell interface. Must have .dict and .aff files'
+;
+
+CREATE FUNCTION syn_init(text)
+   returns internal
+   as 'MODULE_PATHNAME' 
+   language 'C';
+
+CREATE FUNCTION syn_lexize(internal,internal,int4)
+   returns internal
+   as 'MODULE_PATHNAME'
+   language 'C'
+   with (isstrict);
+
+insert into pg_ts_dict select 
+   'synonym', 
+   (select oid from pg_proc where proname='syn_init'),
+   null,
+   (select oid from pg_proc where proname='syn_lexize'),
+   'Example of synonym dictionary'
+;
+
+--dict conf
+CREATE TABLE pg_ts_parser (
+   prs_name    text not null primary key,
+   prs_start   oid not null,
+   prs_nexttoken   oid not null,
+   prs_end     oid not null,
+   prs_headline    oid not null,
+   prs_lextype oid not null,
+   prs_comment text
+) with oids;
+
+--sql-level interface
+CREATE TYPE tokentype 
+   as (tokid int4, alias text, descr text); 
+
+CREATE FUNCTION token_type(int4)
+   returns setof tokentype
+   as 'MODULE_PATHNAME'
+   language 'C'
+   with (isstrict);
+
+CREATE FUNCTION token_type(text)
+   returns setof tokentype
+   as 'MODULE_PATHNAME', 'token_type_byname'
+   language 'C'
+   with (isstrict);
+
+CREATE FUNCTION token_type()
+   returns setof tokentype
+   as 'MODULE_PATHNAME', 'token_type_current'
+   language 'C'
+   with (isstrict);
+
+CREATE FUNCTION set_curprs(int)
+   returns void
+   as 'MODULE_PATHNAME'
+   language 'C'
+   with (isstrict);
+
+CREATE FUNCTION set_curprs(text)
+   returns void
+   as 'MODULE_PATHNAME', 'set_curprs_byname'
+   language 'C'
+   with (isstrict);
+
+CREATE TYPE tokenout 
+   as (tokid int4, token text);
+
+CREATE FUNCTION parse(oid,text)
+   returns setof tokenout
+   as 'MODULE_PATHNAME'
+   language 'C'
+   with (isstrict);
+ 
+CREATE FUNCTION parse(text,text)
+   returns setof tokenout
+   as 'MODULE_PATHNAME', 'parse_byname'
+   language 'C'
+   with (isstrict);
+ 
+CREATE FUNCTION parse(text)
+   returns setof tokenout
+   as 'MODULE_PATHNAME', 'parse_current'
+   language 'C'
+   with (isstrict);
+ 
+--default parser
+CREATE FUNCTION prsd_start(internal,int4)
+   returns internal
+   as 'MODULE_PATHNAME'
+   language 'C';
+
+CREATE FUNCTION prsd_getlexeme(internal,internal,internal)
+   returns int4
+   as 'MODULE_PATHNAME'
+   language 'C';
+
+CREATE FUNCTION prsd_end(internal)
+   returns void
+   as 'MODULE_PATHNAME'
+   language 'C';
+
+CREATE FUNCTION prsd_lextype(internal)
+   returns internal
+   as 'MODULE_PATHNAME'
+   language 'C';
+
+CREATE FUNCTION prsd_headline(internal,internal,internal)
+   returns internal
+   as 'MODULE_PATHNAME'
+   language 'C';
+
+insert into pg_ts_parser select
+   'default',
+   (select oid from pg_proc where proname='prsd_start'),   
+   (select oid from pg_proc where proname='prsd_getlexeme'),   
+   (select oid from pg_proc where proname='prsd_end'), 
+   (select oid from pg_proc where proname='prsd_headline'),
+   (select oid from pg_proc where proname='prsd_lextype'),
+   'Parser from OpenFTS v0.34'
+;  
+
+--tsearch config
+
+CREATE TABLE pg_ts_cfg (
+   ts_name     text not null primary key,
+   prs_name    text not null,
+   locale      text
+) with oids;
+
+CREATE TABLE pg_ts_cfgmap (
+   ts_name     text not null,
+   tok_alias   text not null,
+   dict_name   text[],
+   primary key (ts_name,tok_alias)
+) with oids;
+
+CREATE FUNCTION set_curcfg(int)
+   returns void
+   as 'MODULE_PATHNAME'
+   language 'C'
+   with (isstrict);
+
+CREATE FUNCTION set_curcfg(text)
+   returns void
+   as 'MODULE_PATHNAME', 'set_curcfg_byname'
+   language 'C'
+   with (isstrict);
+
+CREATE FUNCTION show_curcfg()
+   returns oid
+   as 'MODULE_PATHNAME'
+   language 'C'
+   with (isstrict);
+
+insert into pg_ts_cfg values ('default', 'default','C');
+insert into pg_ts_cfg values ('default_russian', 'default','ru_RU.KOI8-R');
+insert into pg_ts_cfg values ('simple', 'default');
+
+copy pg_ts_cfgmap from stdin;
+default    lword   {en_stem}
+default    nlword  {simple}
+default    word    {simple}
+default    email   {simple}
+default    url {simple}
+default    host    {simple}
+default    sfloat  {simple}
+default    version {simple}
+default    part_hword  {simple}
+default    nlpart_hword    {simple}
+default    lpart_hword {en_stem}
+default    hword   {simple}
+default    lhword  {en_stem}
+default    nlhword {simple}
+default    uri {simple}
+default    file    {simple}
+default    float   {simple}
+default    int {simple}
+default    uint    {simple}
+default_russian    lword   {en_stem}
+default_russian    nlword  {ru_stem}
+default_russian    word    {ru_stem}
+default_russian    email   {simple}
+default_russian    url {simple}
+default_russian    host    {simple}
+default_russian    sfloat  {simple}
+default_russian    version {simple}
+default_russian    part_hword  {simple}
+default_russian    nlpart_hword    {ru_stem}
+default_russian    lpart_hword {en_stem}
+default_russian    hword   {ru_stem}
+default_russian    lhword  {en_stem}
+default_russian    nlhword {ru_stem}
+default_russian    uri {simple}
+default_russian    file    {simple}
+default_russian    float   {simple}
+default_russian    int {simple}
+default_russian    uint    {simple}
+simple lword   {simple}
+simple nlword  {simple}
+simple word    {simple}
+simple email   {simple}
+simple url {simple}
+simple host    {simple}
+simple sfloat  {simple}
+simple version {simple}
+simple part_hword  {simple}
+simple nlpart_hword    {simple}
+simple lpart_hword {simple}
+simple hword   {simple}
+simple lhword  {simple}
+simple nlhword {simple}
+simple uri {simple}
+simple file    {simple}
+simple float   {simple}
+simple int {simple}
+simple uint    {simple}
+\.
+
+--tsvector type
+CREATE FUNCTION tsvector_in(cstring)
+RETURNS tsvector
+AS 'MODULE_PATHNAME'
+LANGUAGE 'C' with (isstrict);
+
+CREATE FUNCTION tsvector_out(tsvector)
+RETURNS cstring
+AS 'MODULE_PATHNAME'
+LANGUAGE 'C' with (isstrict);
+
+CREATE TYPE tsvector (
+        INTERNALLENGTH = -1,
+        INPUT = tsvector_in,
+        OUTPUT = tsvector_out,
+        STORAGE = extended
+);
+
+CREATE FUNCTION length(tsvector)
+RETURNS int4
+AS 'MODULE_PATHNAME', 'tsvector_length'
+LANGUAGE 'C' with (isstrict,iscachable);
+
+CREATE FUNCTION to_tsvector(oid, text)
+RETURNS tsvector
+AS 'MODULE_PATHNAME'
+LANGUAGE 'C' with (isstrict,iscachable);
+
+CREATE FUNCTION to_tsvector(text, text)
+RETURNS tsvector
+AS 'MODULE_PATHNAME', 'to_tsvector_name'
+LANGUAGE 'C' with (isstrict,iscachable);
+
+CREATE FUNCTION to_tsvector(text)
+RETURNS tsvector
+AS 'MODULE_PATHNAME', 'to_tsvector_current'
+LANGUAGE 'C' with (isstrict,iscachable);
+
+CREATE FUNCTION strip(tsvector)
+RETURNS tsvector
+AS 'MODULE_PATHNAME'
+LANGUAGE 'C' with (isstrict,iscachable);
+
+CREATE FUNCTION setweight(tsvector,"char")
+RETURNS tsvector
+AS 'MODULE_PATHNAME'
+LANGUAGE 'C' with (isstrict,iscachable);
+
+CREATE FUNCTION concat(tsvector,tsvector)
+RETURNS tsvector
+AS 'MODULE_PATHNAME'
+LANGUAGE 'C' with (isstrict,iscachable);
+
+CREATE OPERATOR || (
+        LEFTARG = tsvector,
+        RIGHTARG = tsvector,
+        PROCEDURE = concat
+);
+
+--query type
+CREATE FUNCTION tsquery_in(cstring)
+RETURNS tsquery
+AS 'MODULE_PATHNAME'
+LANGUAGE 'C' with (isstrict);
+
+CREATE FUNCTION tsquery_out(tsquery)
+RETURNS cstring
+AS 'MODULE_PATHNAME'
+LANGUAGE 'C' with (isstrict);
+
+CREATE TYPE tsquery (
+        INTERNALLENGTH = -1,
+        INPUT = tsquery_in,
+        OUTPUT = tsquery_out
+);
+
+CREATE FUNCTION querytree(tsquery)
+RETURNS text
+AS 'MODULE_PATHNAME', 'tsquerytree'
+LANGUAGE 'C' with (isstrict);
+
+CREATE FUNCTION to_tsquery(oid, text)
+RETURNS tsquery
+AS 'MODULE_PATHNAME'
+LANGUAGE 'c' with (isstrict,iscachable);
+
+CREATE FUNCTION to_tsquery(text, text)
+RETURNS tsquery
+AS 'MODULE_PATHNAME','to_tsquery_name'
+LANGUAGE 'c' with (isstrict,iscachable);
+
+CREATE FUNCTION to_tsquery(text)
+RETURNS tsquery
+AS 'MODULE_PATHNAME','to_tsquery_current'
+LANGUAGE 'c' with (isstrict,iscachable);
+
+--operations
+CREATE FUNCTION exectsq(tsvector, tsquery)
+RETURNS bool
+AS 'MODULE_PATHNAME'
+LANGUAGE 'C' with (isstrict, iscachable);
+  
+COMMENT ON FUNCTION exectsq(tsvector, tsquery) IS 'boolean operation with text index';
+
+CREATE FUNCTION rexectsq(tsquery, tsvector)
+RETURNS bool
+AS 'MODULE_PATHNAME'
+LANGUAGE 'C' with (isstrict, iscachable);
+
+COMMENT ON FUNCTION rexectsq(tsquery, tsvector) IS 'boolean operation with text index';
+
+CREATE OPERATOR @@ (
+        LEFTARG = tsvector,
+        RIGHTARG = tsquery,
+        PROCEDURE = exectsq,
+        COMMUTATOR = '@@',
+        RESTRICT = contsel,
+        JOIN = contjoinsel
+);
+CREATE OPERATOR @@ (
+        LEFTARG = tsquery,
+        RIGHTARG = tsvector,
+        PROCEDURE = rexectsq,
+        COMMUTATOR = '@@',
+        RESTRICT = contsel,
+        JOIN = contjoinsel
+);
+
+--Trigger
+CREATE FUNCTION tsearch2()
+RETURNS trigger
+AS 'MODULE_PATHNAME'
+LANGUAGE 'C';
+
+--Relevation
+CREATE FUNCTION rank(float4[], tsvector, tsquery)
+RETURNS float4
+AS 'MODULE_PATHNAME'
+LANGUAGE 'C' WITH (isstrict, iscachable);
+
+CREATE FUNCTION rank(float4[], tsvector, tsquery, int4)
+RETURNS float4
+AS 'MODULE_PATHNAME'
+LANGUAGE 'C' WITH (isstrict, iscachable);
+
+CREATE FUNCTION rank(tsvector, tsquery)
+RETURNS float4
+AS 'MODULE_PATHNAME', 'rank_def'
+LANGUAGE 'C' WITH (isstrict, iscachable);
+
+CREATE FUNCTION rank(tsvector, tsquery, int4)
+RETURNS float4
+AS 'MODULE_PATHNAME', 'rank_def'
+LANGUAGE 'C' WITH (isstrict, iscachable);
+
+CREATE FUNCTION rank_cd(int4, tsvector, tsquery)
+RETURNS float4
+AS 'MODULE_PATHNAME'
+LANGUAGE 'C' WITH (isstrict, iscachable);
+
+CREATE FUNCTION rank_cd(int4, tsvector, tsquery, int4)
+RETURNS float4
+AS 'MODULE_PATHNAME'
+LANGUAGE 'C' WITH (isstrict, iscachable);
+
+CREATE FUNCTION rank_cd(tsvector, tsquery)
+RETURNS float4
+AS 'MODULE_PATHNAME', 'rank_cd_def'
+LANGUAGE 'C' WITH (isstrict, iscachable);
+
+CREATE FUNCTION rank_cd(tsvector, tsquery, int4)
+RETURNS float4
+AS 'MODULE_PATHNAME', 'rank_cd_def'
+LANGUAGE 'C' WITH (isstrict, iscachable);
+
+CREATE FUNCTION headline(oid, text, tsquery, text)
+RETURNS text
+AS 'MODULE_PATHNAME', 'headline'
+LANGUAGE 'C' WITH (isstrict, iscachable);
+
+CREATE FUNCTION headline(oid, text, tsquery)
+RETURNS text
+AS 'MODULE_PATHNAME', 'headline'
+LANGUAGE 'C' WITH (isstrict, iscachable);
+
+CREATE FUNCTION headline(text, text, tsquery, text)
+RETURNS text
+AS 'MODULE_PATHNAME', 'headline_byname'
+LANGUAGE 'C' WITH (isstrict, iscachable);
+
+CREATE FUNCTION headline(text, text, tsquery)
+RETURNS text
+AS 'MODULE_PATHNAME', 'headline_byname'
+LANGUAGE 'C' WITH (isstrict, iscachable);
+
+CREATE FUNCTION headline(text, tsquery, text)
+RETURNS text
+AS 'MODULE_PATHNAME', 'headline_current'
+LANGUAGE 'C' WITH (isstrict, iscachable);
+
+CREATE FUNCTION headline(text, tsquery)
+RETURNS text
+AS 'MODULE_PATHNAME', 'headline_current'
+LANGUAGE 'C' WITH (isstrict, iscachable);
+
+--GiST
+--GiST key type 
+CREATE FUNCTION gtsvector_in(cstring)
+RETURNS gtsvector
+AS 'MODULE_PATHNAME'
+LANGUAGE 'C' with (isstrict);
+
+CREATE FUNCTION gtsvector_out(gtsvector)
+RETURNS cstring
+AS 'MODULE_PATHNAME'
+LANGUAGE 'C' with (isstrict);
+
+CREATE TYPE gtsvector (
+        INTERNALLENGTH = -1,
+        INPUT = gtsvector_in,
+        OUTPUT = gtsvector_out
+);
+
+-- support FUNCTIONs
+CREATE FUNCTION gtsvector_consistent(gtsvector,internal,int4)
+RETURNS bool
+AS 'MODULE_PATHNAME'
+LANGUAGE 'C';
+  
+CREATE FUNCTION gtsvector_compress(internal)
+RETURNS internal
+AS 'MODULE_PATHNAME'
+LANGUAGE 'C';
+
+CREATE FUNCTION gtsvector_decompress(internal)
+RETURNS internal
+AS 'MODULE_PATHNAME'
+LANGUAGE 'C';
+
+CREATE FUNCTION gtsvector_penalty(internal,internal,internal)
+RETURNS internal
+AS 'MODULE_PATHNAME'
+LANGUAGE 'C' with (isstrict);
+
+CREATE FUNCTION gtsvector_picksplit(internal, internal)
+RETURNS internal
+AS 'MODULE_PATHNAME'
+LANGUAGE 'C';
+
+CREATE FUNCTION gtsvector_union(bytea, internal)
+RETURNS _int4
+AS 'MODULE_PATHNAME'
+LANGUAGE 'C';
+
+CREATE FUNCTION gtsvector_same(gtsvector, gtsvector, internal)
+RETURNS internal
+AS 'MODULE_PATHNAME'
+LANGUAGE 'C';
+
+-- CREATE the OPERATOR class
+CREATE OPERATOR CLASS gist_tsvector_ops
+DEFAULT FOR TYPE tsvector USING gist
+AS
+        OPERATOR        1       @@ (tsvector, tsquery)  RECHECK ,
+        FUNCTION        1       gtsvector_consistent (gtsvector, internal, int4),
+        FUNCTION        2       gtsvector_union (bytea, internal),
+        FUNCTION        3       gtsvector_compress (internal),
+        FUNCTION        4       gtsvector_decompress (internal),
+        FUNCTION        5       gtsvector_penalty (internal, internal, internal),
+        FUNCTION        6       gtsvector_picksplit (internal, internal),
+        FUNCTION        7       gtsvector_same (gtsvector, gtsvector, internal),
+        STORAGE         gtsvector;
+
+
+--stat info
+CREATE TYPE statinfo 
+   as (word text, ndoc int4, nentry int4);
+
+--REATE FUNCTION tsstat_in(cstring)
+--RETURNS tsstat
+--AS 'MODULE_PATHNAME'
+--LANGUAGE 'C' with (isstrict);
+--
+--CREATE FUNCTION tsstat_out(tsstat)
+--RETURNS cstring
+--AS 'MODULE_PATHNAME'
+--LANGUAGE 'C' with (isstrict);
+--
+--CREATE TYPE tsstat (
+--        INTERNALLENGTH = -1,
+--        INPUT = tsstat_in,
+--        OUTPUT = tsstat_out,
+--        STORAGE = plain
+--);
+--
+--CREATE FUNCTION ts_accum(tsstat,tsvector)
+--RETURNS tsstat
+--AS 'MODULE_PATHNAME'
+--LANGUAGE 'C' with (isstrict);
+--
+--CREATE FUNCTION ts_accum_finish(tsstat)
+-- returns setof statinfo
+-- as 'MODULE_PATHNAME'
+-- language 'C'
+-- with (isstrict);
+--
+--CREATE AGGREGATE stat (
+-- BASETYPE=tsvector,
+-- SFUNC=ts_accum,
+-- STYPE=tsstat,
+-- FINALFUNC = ts_accum_finish,
+-- initcond = ''
+--); 
+
+CREATE FUNCTION stat(text)
+   returns setof statinfo
+   as 'MODULE_PATHNAME', 'ts_stat'
+   language 'C'
+   with (isstrict);
+
+--reset - just for debuging
+CREATE FUNCTION reset_tsearch()
+        returns void
+        as 'MODULE_PATHNAME'
+        language 'C'
+        with (isstrict);
+
+--get cover (debug for rank_cd)
+CREATE FUNCTION get_covers(tsvector,tsquery)
+        returns text
+        as 'MODULE_PATHNAME'
+        language 'C'
+        with (isstrict);
+
+
+--example of ISpell dictionary
+--update pg_ts_dict set dict_initoption='DictFile="/usr/local/share/ispell/russian.dict" ,AffFile ="/usr/local/share/ispell/russian.aff", StopFile="/usr/local/share/ispell/russian.stop"' where dict_id=4;
+--example of synonym dict
+--update pg_ts_dict set dict_initoption='/usr/local/share/ispell/english.syn' where dict_id=5;
+END;


diff --git a/contrib/tsearch2/tsvector.c b/contrib/tsearch2/tsvector.c

new file mode 100644 (file)

index 0000000..ff0794d


--- /dev/null
+++ b/contrib/tsearch2/tsvector.c
@@ -0,0 +1,804 @@
+/*
+ * In/Out definitions for tsvector type
+ * Internal structure:
+ * string of values, array of position lexem in string and it's length
+ * Teodor Sigaev 
+ */
+#include "postgres.h"
+
+#include "access/gist.h"
+#include "access/itup.h"
+#include "utils/elog.h"
+#include "utils/palloc.h"
+#include "utils/builtins.h"
+#include "storage/bufpage.h"
+#include "executor/spi.h"
+#include "commands/trigger.h"
+#include "nodes/pg_list.h"
+#include "catalog/namespace.h"
+
+#include "utils/pg_locale.h"
+
+#include              /* tolower */
+#include "tsvector.h"
+#include "query.h"
+#include "ts_cfg.h"
+#include "common.h"
+
+PG_FUNCTION_INFO_V1(tsvector_in);
+Datum      tsvector_in(PG_FUNCTION_ARGS);
+
+PG_FUNCTION_INFO_V1(tsvector_out);
+Datum      tsvector_out(PG_FUNCTION_ARGS);
+
+PG_FUNCTION_INFO_V1(to_tsvector);
+Datum      to_tsvector(PG_FUNCTION_ARGS);
+PG_FUNCTION_INFO_V1(to_tsvector_current);
+Datum      to_tsvector_current(PG_FUNCTION_ARGS);
+PG_FUNCTION_INFO_V1(to_tsvector_name);
+Datum      to_tsvector_name(PG_FUNCTION_ARGS);
+
+PG_FUNCTION_INFO_V1(tsearch2);
+Datum      tsearch2(PG_FUNCTION_ARGS);
+
+PG_FUNCTION_INFO_V1(tsvector_length);
+Datum      tsvector_length(PG_FUNCTION_ARGS);
+
+/*
+ * in/out text index type
+ */
+static int 
+comparePos(const void *a, const void *b) {
+   if ( ((WordEntryPos *) a)->pos == ((WordEntryPos *) b)->pos )
+       return 1;
+   return ( ((WordEntryPos *) a)->pos > ((WordEntryPos *) b)->pos ) ? 1 : -1;
+}
+
+static int
+uniquePos(WordEntryPos *a, int4 l) {
+   WordEntryPos *ptr, *res;
+
+   res=a;
+   if (l==1)
+       return l;
+
+   qsort((void *) a, l, sizeof(WordEntryPos), comparePos);
+
+   ptr = a + 1;
+   while (ptr - a < l) {
+       if ( ptr->pos != res->pos ) {
+           res++;
+           res->pos = ptr->pos;
+           res->weight = ptr->weight;
+           if ( res-a >= MAXNUMPOS-1 || res->pos == MAXENTRYPOS-1 )
+               break;
+       } else if ( ptr->weight > res->weight )
+           res->weight = ptr->weight;
+       ptr++;
+   }
+   return res + 1 - a;
+}
+
+static char *BufferStr;
+static int
+compareentry(const void *a, const void *b)
+{
+   if ( ((WordEntryIN *) a)->entry.len == ((WordEntryIN *) b)->entry.len)
+   {
+       return strncmp(
+                      &BufferStr[((WordEntryIN *) a)->entry.pos],
+                      &BufferStr[((WordEntryIN *) b)->entry.pos],
+                      ((WordEntryIN *) a)->entry.len);
+   }
+   return ( ((WordEntryIN *) a)->entry.len > ((WordEntryIN *) b)->entry.len ) ? 1 : -1;
+}
+
+static int
+uniqueentry(WordEntryIN * a, int4 l, char *buf, int4 *outbuflen)
+{
+   WordEntryIN  *ptr,
+              *res;
+
+   res = a;
+   if (l == 1) {
+       if ( a->entry.haspos ) {
+           *(uint16*)(a->pos) = uniquePos( &(a->pos[1]), *(uint16*)(a->pos));
+           *outbuflen = SHORTALIGN(res->entry.len) + (*(uint16*)(a->pos) +1 )*sizeof(WordEntryPos);
+       }
+       return l;
+   }
+
+   ptr = a + 1;
+   BufferStr = buf;
+   qsort((void *) a, l, sizeof(WordEntryIN), compareentry);
+
+   while (ptr - a < l)
+   {
+       if (!(ptr->entry.len == res->entry.len &&
+             strncmp(&buf[ptr->entry.pos], &buf[res->entry.pos], res->entry.len) == 0))
+       {
+           if ( res->entry.haspos ) {
+               *(uint16*)(res->pos) = uniquePos( &(res->pos[1]), *(uint16*)(res->pos));
+               *outbuflen += *(uint16*)(res->pos) * sizeof(WordEntryPos);
+           }
+           *outbuflen += SHORTALIGN(res->entry.len);
+           res++;
+           memcpy(res,ptr,sizeof(WordEntryIN));
+       } else if ( ptr->entry.haspos ){
+           if ( res->entry.haspos ) {
+               int4 len=*(uint16*)(ptr->pos) + 1 + *(uint16*)(res->pos);
+               res->pos=(WordEntryPos*)repalloc( res->pos, len*sizeof(WordEntryPos));
+               memcpy( &(res->pos[ *(uint16*)(res->pos) + 1 ]), 
+                   &(ptr->pos[1]), *(uint16*)(ptr->pos) * sizeof(WordEntryPos));
+               *(uint16*)(res->pos) += *(uint16*)(ptr->pos);
+               pfree( ptr->pos );
+           } else {
+               res->entry.haspos=1;
+               res->pos = ptr->pos;
+           }
+       }
+       ptr++;
+   }
+   if ( res->entry.haspos ) {
+       *(uint16*)(res->pos) = uniquePos( &(res->pos[1]), *(uint16*)(res->pos));
+       *outbuflen += *(uint16*)(res->pos) * sizeof(WordEntryPos);
+   }
+   *outbuflen += SHORTALIGN(res->entry.len);
+
+   return res + 1 - a;
+}
+
+#define WAITWORD   1
+#define WAITENDWORD 2
+#define WAITNEXTCHAR   3
+#define WAITENDCMPLX   4
+#define WAITPOSINFO    5
+#define INPOSINFO  6
+#define WAITPOSDELIM   7
+
+#define RESIZEPRSBUF \
+do { \
+   if ( state->curpos - state->word + 1 >= state->len ) \
+   { \
+       int4 clen = state->curpos - state->word; \
+       state->len *= 2; \
+       state->word = (char*)repalloc( (void*)state->word, state->len ); \
+       state->curpos = state->word + clen; \
+   } \
+} while (0)
+
+int4
+gettoken_tsvector(TI_IN_STATE * state)
+{
+   int4        oldstate = 0;
+
+   state->curpos = state->word;
+   state->state = WAITWORD;
+   state->alen=0;
+
+   while (1)
+   {
+       if (state->state == WAITWORD)
+       {
+           if (*(state->prsbuf) == '\0')
+               return 0;
+           else if (*(state->prsbuf) == '\'')
+               state->state = WAITENDCMPLX;
+           else if (*(state->prsbuf) == '\\')
+           {
+               state->state = WAITNEXTCHAR;
+               oldstate = WAITENDWORD;
+           }
+           else if (state->oprisdelim && ISOPERATOR(*(state->prsbuf)))
+               elog(ERROR, "Syntax error");
+           else if (*(state->prsbuf) != ' ')
+           {
+               *(state->curpos) = *(state->prsbuf);
+               state->curpos++;
+               state->state = WAITENDWORD;
+           }
+       }
+       else if (state->state == WAITNEXTCHAR)
+       {
+           if (*(state->prsbuf) == '\0')
+               elog(ERROR, "There is no escaped character");
+           else
+           {
+               RESIZEPRSBUF;
+               *(state->curpos) = *(state->prsbuf);
+               state->curpos++;
+               state->state = oldstate;
+           }
+       }
+       else if (state->state == WAITENDWORD)
+       {
+           if (*(state->prsbuf) == '\\')
+           {
+               state->state = WAITNEXTCHAR;
+               oldstate = WAITENDWORD;
+           }
+           else if (*(state->prsbuf) == ' ' || *(state->prsbuf) == '\0' ||
+                    (state->oprisdelim && ISOPERATOR(*(state->prsbuf))))
+           {
+               RESIZEPRSBUF;
+               if (state->curpos == state->word)
+                   elog(ERROR, "Syntax error");
+               *(state->curpos) = '\0';
+               return 1; 
+           } else if ( *(state->prsbuf) == ':' ) {
+               if (state->curpos == state->word)
+                   elog(ERROR, "Syntax error");
+               *(state->curpos) = '\0';
+               if ( state->oprisdelim )
+                   return 1;
+               else
+                   state->state = INPOSINFO;
+           }
+           else
+           {
+               RESIZEPRSBUF;
+               *(state->curpos) = *(state->prsbuf);
+               state->curpos++;
+           }
+       }
+       else if (state->state == WAITENDCMPLX)
+       {
+           if (*(state->prsbuf) == '\'')
+           {
+               RESIZEPRSBUF;
+               *(state->curpos) = '\0';
+               if (state->curpos == state->word)
+                   elog(ERROR, "Syntax error");
+               if ( state->oprisdelim ) {
+                   state->prsbuf++;
+                   return 1;
+               } else
+                   state->state = WAITPOSINFO;
+           }
+           else if (*(state->prsbuf) == '\\')
+           {
+               state->state = WAITNEXTCHAR;
+               oldstate = WAITENDCMPLX;
+           }
+           else if (*(state->prsbuf) == '\0')
+               elog(ERROR, "Syntax error");
+           else
+           {
+               RESIZEPRSBUF;
+               *(state->curpos) = *(state->prsbuf);
+               state->curpos++;
+           }
+       } else if (state->state == WAITPOSINFO) {
+           if ( *(state->prsbuf) == ':' )
+               state->state=INPOSINFO;
+           else
+               return 1;
+       } else if (state->state == INPOSINFO) {
+           if ( isdigit(*(state->prsbuf)) ) {
+               if ( state->alen==0 ) {
+                   state->alen=4;
+                   state->pos = (WordEntryPos*)palloc( sizeof(WordEntryPos)*state->alen );
+                   *(uint16*)(state->pos)=0;
+               } else if ( *(uint16*)(state->pos) +1 >= state->alen ) {
+                   state->alen *= 2; 
+                   state->pos = (WordEntryPos*)repalloc( state->pos, sizeof(WordEntryPos)*state->alen );
+               }
+               (  *(uint16*)(state->pos) )++;
+               state->pos[ *(uint16*)(state->pos) ].pos = LIMITPOS(atoi(state->prsbuf));
+               if ( state->pos[ *(uint16*)(state->pos) ].pos == 0 )
+                   elog(ERROR,"Wrong position info");
+               state->pos[ *(uint16*)(state->pos) ].weight = 0;
+               state->state = WAITPOSDELIM;
+           } else
+               elog(ERROR,"Syntax error");
+       } else if (state->state == WAITPOSDELIM) {
+           if ( *(state->prsbuf) == ',' ) {
+               state->state = INPOSINFO;
+           } else if ( tolower(*(state->prsbuf)) == 'a' || *(state->prsbuf)=='*' ) {
+               if ( state->pos[ *(uint16*)(state->pos) ].weight )
+                   elog(ERROR,"Syntax error");
+               state->pos[ *(uint16*)(state->pos) ].weight = 3;
+           } else if ( tolower(*(state->prsbuf)) == 'b' ) {
+               if ( state->pos[ *(uint16*)(state->pos) ].weight )
+                   elog(ERROR,"Syntax error");
+               state->pos[ *(uint16*)(state->pos) ].weight = 2;
+           } else if ( tolower(*(state->prsbuf)) == 'c' ) {
+               if ( state->pos[ *(uint16*)(state->pos) ].weight )
+                   elog(ERROR,"Syntax error");
+               state->pos[ *(uint16*)(state->pos) ].weight = 1;
+           } else if ( tolower(*(state->prsbuf)) == 'd' ) {
+               if ( state->pos[ *(uint16*)(state->pos) ].weight )
+                   elog(ERROR,"Syntax error");
+               state->pos[ *(uint16*)(state->pos) ].weight = 0;
+           } else if ( isspace(*(state->prsbuf)) || *(state->prsbuf) == '\0' ) {
+               return 1;
+           } else if ( !isdigit(*(state->prsbuf)) )
+               elog(ERROR,"Syntax error");
+       } else
+           elog(ERROR, "Inner bug :(");
+       state->prsbuf++;
+   }
+
+   return 0;
+}
+
+Datum
+tsvector_in(PG_FUNCTION_ARGS)
+{
+   char       *buf = PG_GETARG_CSTRING(0);
+   TI_IN_STATE state;
+   WordEntryIN  *arr;
+   WordEntry  *inarr;
+   int4        len = 0,
+               totallen = 64;
+   tsvector       *in;
+   char       *tmpbuf,
+              *cur;
+   int4        i,
+               buflen = 256;
+
+   state.prsbuf = buf;
+   state.len = 32;
+   state.word = (char *) palloc(state.len);
+   state.oprisdelim = false;
+
+   arr = (WordEntryIN *) palloc(sizeof(WordEntryIN) * totallen);
+   cur = tmpbuf = (char *) palloc(buflen);
+   while (gettoken_tsvector(&state))
+   {
+       if (len >= totallen)
+       {
+           totallen *= 2;
+           arr = (WordEntryIN *) repalloc((void *) arr, sizeof(WordEntryIN) * totallen);
+       }
+       while ((cur - tmpbuf) + (state.curpos - state.word) >= buflen)
+       {
+           int4        dist = cur - tmpbuf;
+
+           buflen *= 2;
+           tmpbuf = (char *) repalloc((void *) tmpbuf, buflen);
+           cur = tmpbuf + dist;
+       }
+       if (state.curpos - state.word >= MAXSTRLEN)
+           elog(ERROR, "Word is too long");
+       arr[len].entry.len= state.curpos - state.word;
+       if (cur - tmpbuf > MAXSTRPOS)
+           elog(ERROR, "Too long value");
+       arr[len].entry.pos=cur - tmpbuf;
+       memcpy((void *) cur, (void *) state.word, arr[len].entry.len);
+       cur += arr[len].entry.len;
+       if ( state.alen ) {
+           arr[len].entry.haspos=1;
+           arr[len].pos = state.pos;
+       } else
+           arr[len].entry.haspos=0;
+       len++;
+   }
+   pfree(state.word);
+
+   if ( len > 0 )
+       len = uniqueentry(arr, len, tmpbuf, &buflen);
+   totallen = CALCDATASIZE(len, buflen);
+   in = (tsvector *) palloc(totallen);
+   memset(in,0,totallen);
+   in->len = totallen;
+   in->size = len;
+   cur = STRPTR(in);
+   inarr = ARRPTR(in);
+   for (i = 0; i < len; i++)
+   {
+       memcpy((void *) cur, (void *) &tmpbuf[arr[i].entry.pos], arr[i].entry.len);
+       arr[i].entry.pos=cur - STRPTR(in);
+       cur += SHORTALIGN(arr[i].entry.len);
+       if ( arr[i].entry.haspos ) {
+           memcpy( cur, arr[i].pos, (*(uint16*)arr[i].pos + 1) * sizeof(WordEntryPos));
+           cur +=  (*(uint16*)arr[i].pos + 1) * sizeof(WordEntryPos);
+           pfree( arr[i].pos ); 
+       }
+       memcpy( &(inarr[i]), &(arr[i].entry), sizeof(WordEntry) );
+   }
+   pfree(tmpbuf);
+   pfree(arr);
+   PG_RETURN_POINTER(in);
+}
+
+Datum
+tsvector_length(PG_FUNCTION_ARGS)
+{
+   tsvector       *in = (tsvector *) PG_DETOAST_DATUM(PG_GETARG_DATUM(0));
+   int4        ret = in->size;
+
+   PG_FREE_IF_COPY(in, 0);
+   PG_RETURN_INT32(ret);
+}
+
+Datum
+tsvector_out(PG_FUNCTION_ARGS)
+{
+   tsvector       *out = (tsvector *) PG_DETOAST_DATUM(PG_GETARG_DATUM(0));
+   char       *outbuf;
+   int4        i,
+               j,
+               lenbuf = 0, pp;
+   WordEntry  *ptr = ARRPTR(out);
+   char       *curin,
+              *curout;
+
+       lenbuf=out->size * 2 /* '' */ + out->size - 1 /* space */ + 2 /*\0*/;
+       for (i = 0; i < out->size; i++) {
+               lenbuf += ptr[i].len*2 /*for escape */;
+               if ( ptr[i].haspos )
+                       lenbuf += 7*POSDATALEN(out, &(ptr[i]));
+       }
+
+   curout = outbuf = (char *) palloc(lenbuf);
+   for (i = 0; i < out->size; i++)
+   {
+       curin = STRPTR(out)+ptr->pos;
+       if (i != 0)
+           *curout++ = ' ';
+       *curout++ = '\'';
+       j = ptr->len;
+       while (j--)
+       {
+           if (*curin == '\'')
+           {
+               int4        pos = curout - outbuf;
+
+               outbuf = (char *) repalloc((void *) outbuf, ++lenbuf);
+               curout = outbuf + pos;
+               *curout++ = '\\';
+           }
+           *curout++ = *curin++;
+       }
+       *curout++ = '\'';
+       if ( (pp=POSDATALEN(out,ptr)) != 0 ) {
+           WordEntryPos *wptr;
+           *curout++ = ':';
+           wptr=POSDATAPTR(out,ptr);
+           while(pp) {
+               sprintf(curout,"%d",wptr->pos);
+               curout=strchr(curout,'\0');
+               switch( wptr->weight ) {
+                   case 3:   *curout++ = 'A'; break;
+                   case 2:   *curout++ = 'B'; break;
+                   case 1:   *curout++ = 'C'; break;
+                   case 0: 
+                   default: break;
+               }
+               if ( pp>1 )     *curout++ = ',';
+               pp--; wptr++;
+           }
+       }
+       ptr++;
+   }
+   *curout='\0';
+   outbuf[lenbuf - 1] = '\0';
+   PG_FREE_IF_COPY(out, 0);
+   PG_RETURN_POINTER(outbuf);
+}
+
+static int
+compareWORD(const void *a, const void *b)
+{
+   if (((WORD *) a)->len == ((WORD *) b)->len) {
+       int res = strncmp(
+                      ((WORD *) a)->word,
+                      ((WORD *) b)->word,
+                      ((WORD *) b)->len);
+       if ( res==0 ) 
+           return ( ((WORD *) a)->pos.pos > ((WORD *) b)->pos.pos ) ? 1 : -1;
+       return res;
+   }
+   return (((WORD *) a)->len > ((WORD *) b)->len) ? 1 : -1;
+}
+
+static int
+uniqueWORD(WORD * a, int4 l)
+{
+   WORD       *ptr,
+              *res;
+   int tmppos;
+
+   if (l == 1) {
+       tmppos=LIMITPOS(a->pos.pos);
+       a->alen=2;
+       a->pos.apos=(uint16*)palloc( sizeof(uint16)*a->alen );
+       a->pos.apos[0]=1;
+       a->pos.apos[1]=tmppos;
+       return l;
+   }
+
+   res = a;
+   ptr = a + 1;
+
+   qsort((void *) a, l, sizeof(WORD), compareWORD);
+   tmppos=LIMITPOS(a->pos.pos);
+   a->alen=2;
+   a->pos.apos=(uint16*)palloc( sizeof(uint16)*a->alen );
+   a->pos.apos[0]=1;
+   a->pos.apos[1]=tmppos;
+
+   while (ptr - a < l)
+   {
+       if (!(ptr->len == res->len &&
+             strncmp(ptr->word, res->word, res->len) == 0))
+       {
+           res++;
+           res->len = ptr->len;
+           res->word = ptr->word;
+           tmppos=LIMITPOS(ptr->pos.pos);
+           res->alen=2;
+           res->pos.apos=(uint16*)palloc( sizeof(uint16)*res->alen );
+           res->pos.apos[0]=1;
+           res->pos.apos[1]=tmppos;
+       } else {
+           pfree(ptr->word);
+           if ( res->pos.apos[0] < MAXNUMPOS-1 && res->pos.apos[ res->pos.apos[0] ] != MAXENTRYPOS-1 ) {
+               if ( res->pos.apos[0]+1 >= res->alen ) {
+                   res->alen*=2;
+                   res->pos.apos=(uint16*)repalloc( res->pos.apos, sizeof(uint16)*res->alen );
+               }
+               res->pos.apos[ res->pos.apos[0]+1 ] = LIMITPOS(ptr->pos.pos);
+               res->pos.apos[0]++; 
+           }
+       }
+       ptr++;
+   }
+
+   return res + 1 - a;
+}
+
+/*
+ * make value of tsvector
+ */
+static tsvector *
+makevalue(PRSTEXT * prs)
+{
+   int4        i,j,
+               lenstr = 0,
+               totallen;
+   tsvector       *in;
+   WordEntry  *ptr;
+   char       *str,
+              *cur;
+
+   prs->curwords = uniqueWORD(prs->words, prs->curwords);
+   for (i = 0; i < prs->curwords; i++) {
+       lenstr += SHORTALIGN(prs->words[i].len);
+
+       if ( prs->words[i].alen )
+           lenstr += sizeof(uint16) + prs->words[i].pos.apos[0] * sizeof(WordEntryPos);
+   }
+
+   totallen = CALCDATASIZE(prs->curwords, lenstr);
+   in = (tsvector *) palloc(totallen);
+   memset(in,0,totallen);  
+   in->len = totallen;
+   in->size = prs->curwords;
+
+   ptr = ARRPTR(in);
+   cur = str = STRPTR(in);
+   for (i = 0; i < prs->curwords; i++)
+   {
+       ptr->len = prs->words[i].len;
+       if (cur - str > MAXSTRPOS)
+           elog(ERROR, "Value is too big");
+       ptr->pos= cur - str;
+       memcpy((void *) cur, (void *) prs->words[i].word, prs->words[i].len);
+       pfree(prs->words[i].word);
+       cur += SHORTALIGN(prs->words[i].len);
+       if ( prs->words[i].alen ) {
+           WordEntryPos *wptr;
+           
+           ptr->haspos=1;
+           *(uint16*)cur = prs->words[i].pos.apos[0];
+           wptr=POSDATAPTR(in,ptr);
+           for(j=0;j<*(uint16*)cur;j++) {
+               wptr[j].weight=0;
+               wptr[j].pos=prs->words[i].pos.apos[j+1];
+           }
+           cur += sizeof(uint16) + prs->words[i].pos.apos[0] * sizeof(WordEntryPos);
+           pfree(prs->words[i].pos.apos);
+       } else
+           ptr->haspos=0;
+       ptr++;
+   }
+   pfree(prs->words);
+   return in;
+}
+
+
+Datum
+to_tsvector(PG_FUNCTION_ARGS)
+{
+   text       *in = PG_GETARG_TEXT_P(1);
+   PRSTEXT     prs;
+   tsvector       *out = NULL;
+   TSCfgInfo *cfg=findcfg(PG_GETARG_INT32(0)); 
+
+   prs.lenwords = 32;
+   prs.curwords = 0;
+   prs.pos = 0;
+   prs.words = (WORD *) palloc(sizeof(WORD) * prs.lenwords);
+   
+   parsetext_v2(cfg, &prs, VARDATA(in), VARSIZE(in) - VARHDRSZ);
+   PG_FREE_IF_COPY(in, 1);
+
+   if (prs.curwords)
+       out = makevalue(&prs);
+   else {
+       pfree(prs.words);
+       out = palloc(CALCDATASIZE(0,0));
+       out->len = CALCDATASIZE(0,0);
+       out->size = 0;
+   } 
+   PG_RETURN_POINTER(out);
+}
+
+Datum
+to_tsvector_name(PG_FUNCTION_ARGS) {
+   text       *cfg=PG_GETARG_TEXT_P(0);
+   Datum res = DirectFunctionCall3(
+       to_tsvector,
+       Int32GetDatum( name2id_cfg( cfg ) ),
+       PG_GETARG_DATUM(1),
+       (Datum)0
+   );
+   PG_FREE_IF_COPY(cfg,0);
+   PG_RETURN_DATUM(res);   
+}
+
+Datum
+to_tsvector_current(PG_FUNCTION_ARGS) {
+   Datum res = DirectFunctionCall3(
+       to_tsvector,
+       Int32GetDatum( get_currcfg() ),
+       PG_GETARG_DATUM(0),
+       (Datum)0
+   );
+   PG_RETURN_DATUM(res);   
+}
+
+static Oid
+findFunc(char *fname) {
+   FuncCandidateList clist,ptr;
+   Oid funcid = InvalidOid;
+   List *names=makeList1(makeString(fname));
+
+   ptr = clist = FuncnameGetCandidates(names, 1);
+   freeList(names);
+
+   if ( !ptr )
+       return funcid;
+
+   while(ptr) {
+       if ( ptr->args[0] == TEXTOID && funcid == InvalidOid )
+           funcid=ptr->oid;
+       clist=ptr->next;
+       pfree(ptr);
+       ptr=clist;
+   }
+
+   return funcid;
+}
+
+/*
+ * Trigger
+ */
+Datum
+tsearch2(PG_FUNCTION_ARGS)
+{
+   TriggerData *trigdata;
+   Trigger    *trigger;
+   Relation    rel;
+   HeapTuple   rettuple = NULL;
+   TSCfgInfo *cfg=findcfg(get_currcfg()); 
+   int         numidxattr,
+               i;
+   PRSTEXT     prs;
+   Datum       datum = (Datum) 0;
+   Oid     funcoid = InvalidOid;
+
+   if (!CALLED_AS_TRIGGER(fcinfo))
+       elog(ERROR, "TSearch: Not fired by trigger manager");
+
+   trigdata = (TriggerData *) fcinfo->context;
+   if (TRIGGER_FIRED_FOR_STATEMENT(trigdata->tg_event))
+       elog(ERROR, "TSearch: Can't process STATEMENT events");
+   if (TRIGGER_FIRED_AFTER(trigdata->tg_event))
+       elog(ERROR, "TSearch: Must be fired BEFORE event");
+
+   if (TRIGGER_FIRED_BY_INSERT(trigdata->tg_event))
+       rettuple = trigdata->tg_trigtuple;
+   else if (TRIGGER_FIRED_BY_UPDATE(trigdata->tg_event))
+       rettuple = trigdata->tg_newtuple;
+   else
+       elog(ERROR, "TSearch: Unknown event");
+
+   trigger = trigdata->tg_trigger;
+   rel = trigdata->tg_relation;
+
+   if (trigger->tgnargs < 2)
+       elog(ERROR, "TSearch: format tsearch2(tsvector_field, text_field1,...)");
+
+   numidxattr = SPI_fnumber(rel->rd_att, trigger->tgargs[0]);
+   if (numidxattr == SPI_ERROR_NOATTRIBUTE)
+       elog(ERROR, "TSearch: Can not find tsvector_field");
+
+   prs.lenwords = 32;
+   prs.curwords = 0;
+   prs.pos = 0;
+   prs.words = (WORD *) palloc(sizeof(WORD) * prs.lenwords);
+
+   /* find all words in indexable column */
+   for (i = 1; i < trigger->tgnargs; i++)
+   {
+       int         numattr;
+       Oid         oidtype;
+       Datum       txt_toasted;
+       bool        isnull;
+       text       *txt;
+
+       numattr = SPI_fnumber(rel->rd_att, trigger->tgargs[i]);
+       if (numattr == SPI_ERROR_NOATTRIBUTE)
+       {
+           funcoid=findFunc(trigger->tgargs[i]);
+           if ( funcoid==InvalidOid )
+               elog(ERROR,"TSearch: can't find function or field '%s'",trigger->tgargs[i]);
+           continue;
+       }
+       oidtype = SPI_gettypeid(rel->rd_att, numattr);
+       /* We assume char() and varchar() are binary-equivalent to text */
+       if (!(oidtype == TEXTOID ||
+             oidtype == VARCHAROID ||
+             oidtype == BPCHAROID))
+       {
+           elog(WARNING, "TSearch: '%s' is not of character type",
+                trigger->tgargs[i]);
+           continue;
+       }
+       txt_toasted = SPI_getbinval(rettuple, rel->rd_att, numattr, &isnull);
+       if (isnull)
+           continue;
+
+       if ( funcoid!=InvalidOid ) {
+           text *txttmp = (text *) DatumGetPointer( OidFunctionCall1(
+               funcoid,
+               PointerGetDatum(txt_toasted)
+           ));
+           txt = (text *) DatumGetPointer(PG_DETOAST_DATUM(PointerGetDatum(txttmp)));
+           if ( txt == txttmp )
+               txt_toasted = PointerGetDatum(txt);
+       } else
+            txt = (text *) DatumGetPointer(PG_DETOAST_DATUM(PointerGetDatum(txt_toasted)));
+
+       parsetext_v2(cfg, &prs, VARDATA(txt), VARSIZE(txt) - VARHDRSZ);
+       if (txt != (text*)DatumGetPointer(txt_toasted) )
+           pfree(txt);
+   }
+
+   /* make tsvector value */
+   if (prs.curwords)
+   {
+       datum = PointerGetDatum(makevalue(&prs));
+       rettuple = SPI_modifytuple(rel, rettuple, 1, &numidxattr,
+                                  &datum, NULL);
+       pfree(DatumGetPointer(datum));
+   }
+   else
+   {
+       tsvector *out = palloc(CALCDATASIZE(0,0));
+       out->len = CALCDATASIZE(0,0);
+       out->size = 0;
+       datum = PointerGetDatum(out);
+       pfree(prs.words);
+       rettuple = SPI_modifytuple(rel, rettuple, 1, &numidxattr,
+                                  &datum, NULL);
+   }
+
+   if (rettuple == NULL)
+       elog(ERROR, "TSearch: %d returned by SPI_modifytuple", SPI_result);
+
+   return PointerGetDatum(rettuple);
+}


diff --git a/contrib/tsearch2/tsvector.h b/contrib/tsearch2/tsvector.h

new file mode 100644 (file)

index 0000000..31e6a4b


--- /dev/null
+++ b/contrib/tsearch2/tsvector.h
@@ -0,0 +1,71 @@
+#ifndef __TXTIDX_H__
+#define __TXTIDX_H__
+
+/*
+#define TXTIDX_DEBUG
+*/
+
+#include "postgres.h"
+
+#include "access/gist.h"
+#include "access/itup.h"
+#include "utils/elog.h"
+#include "utils/palloc.h"
+#include "utils/builtins.h"
+#include "storage/bufpage.h"
+
+typedef struct {
+   uint32
+       haspos:1,
+       len:11, /* MAX 2Kb */
+       pos:20; /* MAX 1Mb */
+}  WordEntry;
+#define MAXSTRLEN ( 1<<11 )
+#define MAXSTRPOS ( 1<<20 )
+
+typedef struct {
+   uint16
+       weight:2,
+       pos:14;
+} WordEntryPos;
+#define MAXENTRYPOS    (1<<14)
+#define MAXNUMPOS  256
+#define LIMITPOS(x)    ( ( (x) >= MAXENTRYPOS ) ? (MAXENTRYPOS-1) : (x) )
+
+typedef struct
+{
+   int4        len;
+   int4        size;
+   char        data[1];
+}  tsvector;
+
+#define DATAHDRSIZE (sizeof(int4)*2)
+#define CALCDATASIZE(x, lenstr) ( x * sizeof(WordEntry) + DATAHDRSIZE + lenstr )
+#define ARRPTR(x)  ( (WordEntry*) ( (char*)x + DATAHDRSIZE ) )
+#define STRPTR(x)  ( (char*)x + DATAHDRSIZE + ( sizeof(WordEntry) * ((tsvector*)x)->size ) )
+#define STRSIZE(x) ( ((tsvector*)x)->len - DATAHDRSIZE - ( sizeof(WordEntry) * ((tsvector*)x)->size ) )
+#define _POSDATAPTR(x,e)   (STRPTR(x)+((WordEntry*)(e))->pos+SHORTALIGN(((WordEntry*)(e))->len))
+#define POSDATALEN(x,e) ( ( ((WordEntry*)(e))->haspos ) ? (*(uint16*)_POSDATAPTR(x,e)) : 0 ) 
+#define POSDATAPTR(x,e)    ( (WordEntryPos*)( _POSDATAPTR(x,e)+sizeof(uint16) ) )
+
+
+typedef struct {
+   WordEntry   entry;
+   WordEntryPos    *pos;
+}  WordEntryIN;
+
+typedef struct
+{
+   char       *prsbuf;
+   char       *word;
+   char       *curpos;
+   int4        len;
+   int4        state;
+   int4        alen;
+   WordEntryPos    *pos;
+   bool        oprisdelim;
+}  TI_IN_STATE;
+
+int4       gettoken_tsvector(TI_IN_STATE * state);
+
+#endif


diff --git a/contrib/tsearch2/tsvector_op.c b/contrib/tsearch2/tsvector_op.c

new file mode 100644 (file)

index 0000000..3f38014


--- /dev/null
+++ b/contrib/tsearch2/tsvector_op.c
@@ -0,0 +1,264 @@
+/*
+ * Operations for tsvector type
+ * Teodor Sigaev 
+ */
+#include "postgres.h"
+
+#include "access/gist.h"
+#include "access/itup.h"
+#include "utils/elog.h"
+#include "utils/palloc.h"
+#include "utils/builtins.h"
+#include "storage/bufpage.h"
+#include "executor/spi.h"
+#include "commands/trigger.h"
+#include "nodes/pg_list.h"
+#include "catalog/namespace.h"
+
+#include "utils/pg_locale.h"
+
+#include              /* tolower */
+#include "tsvector.h"
+#include "query.h"
+#include "ts_cfg.h"
+#include "common.h"
+
+PG_FUNCTION_INFO_V1(strip);
+Datum      strip(PG_FUNCTION_ARGS);
+
+PG_FUNCTION_INFO_V1(setweight);
+Datum      setweight(PG_FUNCTION_ARGS);
+
+PG_FUNCTION_INFO_V1(concat);
+Datum      concat(PG_FUNCTION_ARGS);
+
+Datum
+strip(PG_FUNCTION_ARGS)
+{
+   tsvector       *in = (tsvector *) PG_DETOAST_DATUM(PG_GETARG_DATUM(0));
+   tsvector    *out;
+   int i,len=0;
+   WordEntry *arrin=ARRPTR(in), *arrout;
+   char *cur;
+
+   for(i=0;isize;i++) 
+       len += SHORTALIGN( arrin[i].len );
+
+   len = CALCDATASIZE(in->size, len);
+   out=(tsvector*)palloc(len);
+   memset(out,0,len);
+   out->len=len;
+   out->size=in->size;
+   arrout=ARRPTR(out);
+   cur=STRPTR(out);
+   for(i=0;isize;i++) {
+       memcpy(cur, STRPTR(in)+arrin[i].pos, arrin[i].len);
+       arrout[i].haspos = 0;
+       arrout[i].len = arrin[i].len;
+       arrout[i].pos = cur - STRPTR(out);
+       cur += SHORTALIGN( arrout[i].len );
+   }
+       
+   PG_FREE_IF_COPY(in, 0);
+   PG_RETURN_POINTER(out);
+}
+
+Datum
+setweight(PG_FUNCTION_ARGS)
+{
+   tsvector       *in = (tsvector *) PG_DETOAST_DATUM(PG_GETARG_DATUM(0));
+   char       cw = PG_GETARG_CHAR(1);
+   tsvector    *out;
+   int i,j;
+   WordEntry *entry;
+   WordEntryPos *p;
+   int w=0;
+
+   switch(tolower(cw)) {
+       case 'a': w=3; break;
+       case 'b': w=2; break;
+       case 'c': w=1; break;
+       case 'd': w=0; break;
+       default: elog(ERROR,"Unknown weight");
+   }
+
+   out=(tsvector*)palloc(in->len);
+   memcpy(out,in,in->len);
+   entry=ARRPTR(out);
+   i=out->size;    
+   while(i--) {
+       if ( (j=POSDATALEN(out,entry)) != 0 ) {
+           p=POSDATAPTR(out,entry);
+           while(j--) {
+               p->weight=w;
+               p++;
+           }
+       }
+       entry++;
+   }
+       
+   PG_FREE_IF_COPY(in, 0);
+   PG_RETURN_POINTER(out);
+}
+
+static int
+compareEntry(char *ptra, WordEntry* a, char *ptrb, WordEntry* b)
+{
+        if ( a->len == b->len)
+        {
+                return strncmp(
+                                           ptra + a->pos,
+                                           ptrb + b->pos,
+                                           a->len);
+        }
+        return ( a->len > b->len ) ? 1 : -1;
+}
+
+static int4
+add_pos(tsvector *src, WordEntry *srcptr, tsvector *dest, WordEntry *destptr, int4 maxpos ) {
+   uint16 *clen = (uint16*)_POSDATAPTR(dest,destptr);
+   int i;
+   uint16 slen = POSDATALEN(src, srcptr), startlen;
+   WordEntryPos *spos=POSDATAPTR(src, srcptr), *dpos=POSDATAPTR(dest,destptr);
+
+   if ( ! destptr->haspos ) 
+       *clen=0;
+
+   startlen = *clen;
+   for(i=0; i
+       dpos[ *clen ].weight = spos[i].weight; 
+       dpos[ *clen ].pos    = LIMITPOS(spos[i].pos + maxpos);
+       (*clen)++;
+   }
+
+   if ( *clen != startlen )
+       destptr->haspos=1; 
+   return  *clen - startlen;
+}
+
+
+Datum
+concat(PG_FUNCTION_ARGS) {
+   tsvector       *in1 = (tsvector *) PG_DETOAST_DATUM(PG_GETARG_DATUM(0));
+   tsvector       *in2 = (tsvector *) PG_DETOAST_DATUM(PG_GETARG_DATUM(1));
+   tsvector       *out;
+   WordEntry *ptr;
+   WordEntry *ptr1,*ptr2;
+   WordEntryPos *p;
+   int maxpos=0,i,j,i1,i2;
+   char *cur;
+   char *data,*data1,*data2;
+
+   ptr=ARRPTR(in1);
+   i=in1->size;
+   while(i--) {
+       if ( (j=POSDATALEN(in1,ptr)) != 0 ) {
+           p=POSDATAPTR(in1,ptr);
+           while(j--) {
+               if ( p->pos > maxpos ) 
+                   maxpos = p->pos;
+               p++;
+           }
+       }
+       ptr++;
+   }
+   
+   ptr1=ARRPTR(in1); ptr2=ARRPTR(in2);
+   data1=STRPTR(in1); data2=STRPTR(in2);
+   i1=in1->size;   i2=in2->size;
+   out=(tsvector*)palloc( in1->len + in2->len );
+   memset(out,0,in1->len + in2->len);
+   out->len = in1->len + in2->len;
+   out->size = in1->size + in2->size;
+   data=cur=STRPTR(out);
+   ptr=ARRPTR(out);
+   while( i1 && i2 ) {
+       int cmp=compareEntry(data1,ptr1,data2,ptr2);
+       if ( cmp < 0 ) { /* in1 first */
+           ptr->haspos = ptr1->haspos;
+           ptr->len = ptr1->len;
+           memcpy( cur, data1 + ptr1->pos, ptr1->len );
+           ptr->pos = cur - data; 
+           cur+=SHORTALIGN(ptr1->len);
+           if ( ptr->haspos ) {
+               memcpy(cur, _POSDATAPTR(in1, ptr1), POSDATALEN(in1, ptr1)*sizeof(WordEntryPos) + sizeof(uint16));
+               cur+=POSDATALEN(in1, ptr1)*sizeof(WordEntryPos) + sizeof(uint16);
+           }
+           ptr++; ptr1++; i1--;
+       } else if ( cmp>0 ) { /* in2 first */ 
+           ptr->haspos = ptr2->haspos;
+           ptr->len = ptr2->len;
+           memcpy( cur, data2 + ptr2->pos, ptr2->len );
+           ptr->pos = cur - data; 
+           cur+=SHORTALIGN(ptr2->len);
+           if ( ptr->haspos ) {
+               int addlen = add_pos(in2, ptr2, out, ptr, maxpos );
+               if ( addlen == 0 )
+                   ptr->haspos=0;
+               else
+                   cur += addlen*sizeof(WordEntryPos) + sizeof(uint16); 
+           }
+           ptr++; ptr2++; i2--;
+       } else {
+           ptr->haspos = ptr1->haspos | ptr2->haspos;
+           ptr->len = ptr1->len;
+           memcpy( cur, data1 + ptr1->pos, ptr1->len );
+           ptr->pos = cur - data; 
+           cur+=SHORTALIGN(ptr1->len);
+           if ( ptr->haspos ) {
+               if ( ptr1->haspos ) {
+                   memcpy(cur, _POSDATAPTR(in1, ptr1), POSDATALEN(in1, ptr1)*sizeof(WordEntryPos) + sizeof(uint16));
+                   cur+=POSDATALEN(in1, ptr1)*sizeof(WordEntryPos) + sizeof(uint16);
+                   if ( ptr2->haspos )
+                       cur += add_pos(in2, ptr2, out, ptr, maxpos )*sizeof(WordEntryPos);
+               } else if ( ptr2->haspos ) {
+                   int addlen = add_pos(in2, ptr2, out, ptr, maxpos );
+                   if ( addlen == 0 )
+                       ptr->haspos=0;
+                   else
+                       cur += addlen*sizeof(WordEntryPos) + sizeof(uint16); 
+               }
+           }
+           ptr++; ptr1++; ptr2++; i1--; i2--;
+       }
+   }
+
+   while(i1) {
+       ptr->haspos = ptr1->haspos;
+       ptr->len = ptr1->len;
+       memcpy( cur, data1 + ptr1->pos, ptr1->len );
+       ptr->pos = cur - data; 
+       cur+=SHORTALIGN(ptr1->len);
+       if ( ptr->haspos ) {
+           memcpy(cur, _POSDATAPTR(in1, ptr1), POSDATALEN(in1, ptr1)*sizeof(WordEntryPos) + sizeof(uint16));
+           cur+=POSDATALEN(in1, ptr1)*sizeof(WordEntryPos) + sizeof(uint16);
+       }
+       ptr++; ptr1++; i1--;
+   }
+
+   while(i2) {
+       ptr->haspos = ptr2->haspos;
+       ptr->len = ptr2->len;
+       memcpy( cur, data2 + ptr2->pos, ptr2->len );
+       ptr->pos = cur - data; 
+       cur+=SHORTALIGN(ptr2->len);
+       if ( ptr->haspos ) {
+           int addlen = add_pos(in2, ptr2, out, ptr, maxpos );
+           if ( addlen == 0 )
+               ptr->haspos=0;
+           else
+               cur += addlen*sizeof(WordEntryPos) + sizeof(uint16); 
+       }
+       ptr++; ptr2++; i2--;
+   }
+   
+   out->size=ptr-ARRPTR(out);
+   out->len = CALCDATASIZE( out->size, cur-data );
+   if ( data != STRPTR(out) )
+       memmove( STRPTR(out), data, cur-data );
+
+   PG_FREE_IF_COPY(in1, 0);
+   PG_FREE_IF_COPY(in2, 1);
+   PG_RETURN_POINTER(out);
+}
+


diff --git a/contrib/tsearch2/untsearch.sql.in b/contrib/tsearch2/untsearch.sql.in

new file mode 100644 (file)

index 0000000..a4fe145


--- /dev/null
+++ b/contrib/tsearch2/untsearch.sql.in
@@ -0,0 +1,62 @@
+BEGIN;
+
+--Be careful !!!
+--script drops all indices, triggers and columns with types defined
+--in tsearch2.sql
+
+
+DROP OPERATOR CLASS gist_tsvector_ops USING gist CASCADE;
+
+
+DROP OPERATOR || (tsvector, tsvector);
+DROP OPERATOR @@ (tsvector, tsquery);
+DROP OPERATOR @@ (tsquery, tsvector);
+
+DROP AGGREGATE stat(tsvector);
+
+DROP TABLE pg_ts_dict;
+DROP TABLE pg_ts_parser;
+DROP TABLE pg_ts_cfg;
+DROP TABLE pg_ts_cfgmap;
+
+DROP TYPE tokentype CASCADE;
+DROP TYPE tokenout CASCADE;
+DROP TYPE tsvector CASCADE;
+DROP TYPE tsquery CASCADE;
+DROP TYPE gtsvector CASCADE;
+DROP TYPE tsstat CASCADE;
+DROP TYPE statinfo CASCADE;
+
+DROP FUNCTION lexize(oid, text) ;
+DROP FUNCTION lexize(text, text);
+DROP FUNCTION lexize(text);
+DROP FUNCTION set_curdict(int);
+DROP FUNCTION set_curdict(text);
+DROP FUNCTION dex_init(text);
+DROP FUNCTION dex_lexize(internal,internal,int4);
+DROP FUNCTION snb_en_init(text);
+DROP FUNCTION snb_lexize(internal,internal,int4);
+DROP FUNCTION snb_ru_init(text);
+DROP FUNCTION spell_init(text);
+DROP FUNCTION spell_lexize(internal,internal,int4);
+DROP FUNCTION syn_init(text);
+DROP FUNCTION syn_lexize(internal,internal,int4);
+DROP FUNCTION set_curprs(int);
+DROP FUNCTION set_curprs(text);
+DROP FUNCTION prsd_start(internal,int4);
+DROP FUNCTION prsd_getlexeme(internal,internal,internal);
+DROP FUNCTION prsd_end(internal);
+DROP FUNCTION prsd_lextype(internal);
+DROP FUNCTION prsd_headline(internal,internal,internal);
+DROP FUNCTION set_curcfg(int);
+DROP FUNCTION set_curcfg(text);
+DROP FUNCTION show_curcfg();
+DROP FUNCTION gtsvector_compress(internal);
+DROP FUNCTION gtsvector_decompress(internal);
+DROP FUNCTION gtsvector_penalty(internal,internal,internal);
+DROP FUNCTION gtsvector_picksplit(internal, internal);
+DROP FUNCTION gtsvector_union(bytea, internal);
+DROP FUNCTION reset_tsearch();
+DROP FUNCTION tsearch2() CASCADE;
+
+END;


diff --git a/contrib/tsearch2/wordparser/deflex.c b/contrib/tsearch2/wordparser/deflex.c

new file mode 100644 (file)

index 0000000..ea596c5


--- /dev/null
+++ b/contrib/tsearch2/wordparser/deflex.c
@@ -0,0 +1,56 @@
+#include "deflex.h"
+
+const char *lex_descr[]={
+   "",
+   "Latin word",
+   "Non-latin word",
+   "Word",
+   "Email",
+   "URL",
+   "Host",
+   "Scientific notation",
+   "VERSION",
+   "Part of hyphenated word",
+   "Non-latin part of hyphenated word",
+   "Latin part of hyphenated word",
+   "Space symbols",
+   "HTML Tag",
+   "HTTP head",
+   "Hyphenated word",
+   "Latin hyphenated word",
+   "Non-latin hyphenated word",
+   "URI",
+   "File or path name",
+   "Decimal notation",
+   "Signed integer",
+   "Unsigned integer",
+   "HTML Entity"
+};
+
+const char *tok_alias[]={
+   "",
+   "lword",
+   "nlword",
+   "word",
+   "email",
+   "url",
+   "host",
+   "sfloat",
+   "version",
+   "part_hword",
+   "nlpart_hword",
+   "lpart_hword",
+   "blank",
+   "tag",
+   "http",
+   "hword",
+   "lhword",
+   "nlhword",
+   "uri",
+   "file",
+   "float",
+   "int",
+   "uint",
+   "entity"
+};
+


diff --git a/contrib/tsearch2/wordparser/deflex.h b/contrib/tsearch2/wordparser/deflex.h

new file mode 100644 (file)

index 0000000..651d1f9


--- /dev/null
+++ b/contrib/tsearch2/wordparser/deflex.h
@@ -0,0 +1,34 @@
+#ifndef __DEFLEX_H__
+#define __DEFLEX_H__
+
+/* rememder !!!! */
+#define LASTNUM        23
+
+#define LATWORD        1
+#define CYRWORD        2
+#define UWORD      3
+#define EMAIL      4
+#define FURL       5
+#define HOST       6
+#define SCIENTIFIC 7
+#define VERSIONNUMBER  8
+#define PARTHYPHENWORD 9
+#define CYRPARTHYPHENWORD  10
+#define LATPARTHYPHENWORD  11
+#define SPACE      12
+#define TAG            13
+#define HTTP       14
+#define HYPHENWORD 15
+#define LATHYPHENWORD  16
+#define CYRHYPHENWORD  17
+#define URI        18
+#define FILEPATH   19
+#define DECIMAL        20
+#define SIGNEDINT  21
+#define UNSIGNEDINT 22
+#define HTMLENTITY 23
+
+extern const char *lex_descr[];
+extern const char *tok_alias[];
+
+#endif


diff --git a/contrib/tsearch2/wordparser/parser.h b/contrib/tsearch2/wordparser/parser.h

new file mode 100644 (file)

index 0000000..55cf005


--- /dev/null
+++ b/contrib/tsearch2/wordparser/parser.h
@@ -0,0 +1,11 @@
+#ifndef __PARSER_H__
+#define __PARSER_H__
+
+char      *token;
+int            tokenlen;
+int            tsearch2_yylex(void);
+void       start_parse_str(char *, int);
+void       start_parse_fh(FILE *, int);
+void       end_parse(void);
+
+#endif


diff --git a/contrib/tsearch2/wordparser/parser.l b/contrib/tsearch2/wordparser/parser.l

new file mode 100644 (file)

index 0000000..49824f5


--- /dev/null
+++ b/contrib/tsearch2/wordparser/parser.l
@@ -0,0 +1,346 @@
+%{
+#include "postgres.h"
+
+#include "deflex.h"
+#include "parser.h"
+#include "common.h"
+
+/* Avoid exit() on fatal scanner errors */
+#define fprintf(file, fmt, msg)  ts_error(ERROR, fmt, msg)
+
+/* postgres allocation function */
+#define free    pfree
+#define malloc  palloc
+#define realloc repalloc
+
+#ifdef strdup
+#undef strdup
+#endif
+#define strdup  pstrdup
+
+char *token = NULL;  /* pointer to token */
+char *s     = NULL;  /* to return WHOLE hyphenated-word */
+
+YY_BUFFER_STATE buf = NULL; /* buffer to parse; it need for parse from string */
+
+int lrlimit = -1;  /* for limiting read from filehandle ( -1 - unlimited read ) */
+int bytestoread = 0;   /* for limiting read from filehandle */
+
+/* redefine macro for read limited length */
+#define YY_INPUT(buf,result,max_size) \
+   if ( yy_current_buffer->yy_is_interactive ) { \
+                int c = '*', n; \
+                for ( n = 0; n < max_size && \
+                             (c = getc( tsearch2_yyin )) != EOF && c != '\n'; ++n ) \
+                        buf[n] = (char) c; \
+                if ( c == '\n' ) \
+                        buf[n++] = (char) c; \
+                if ( c == EOF && ferror( tsearch2_yyin ) ) \
+                        YY_FATAL_ERROR( "input in flex scanner failed" ); \
+                result = n; \
+        }  else { \
+       if ( lrlimit == 0 ) \
+           result=YY_NULL; \
+       else { \
+           if ( lrlimit>0 ) { \
+               bytestoread = ( lrlimit > max_size ) ? max_size : lrlimit; \
+               lrlimit -= bytestoread; \
+           } else \
+               bytestoread = max_size; \
+               if ( ((result = fread( buf, 1, bytestoread, tsearch2_yyin )) == 0) \
+                       && ferror( tsearch2_yyin ) ) \
+                       YY_FATAL_ERROR( "input in flex scanner failed" ); \
+       } \
+   }
+
+%}
+
+%option 8bit
+%option never-interactive
+%option nounput
+%option noyywrap
+
+/* parser's state for parsing hyphenated-word */
+%x DELIM  
+/* parser's state for parsing URL*/
+%x URL  
+%x SERVER  
+
+/* parser's state for parsing TAGS */
+%x INTAG
+%x QINTAG
+%x INCOMMENT
+%x INSCRIPT
+
+/* cyrillic koi8 char */
+CYRALNUM   [0-9\200-\377]
+CYRALPHA   [\200-\377]
+ALPHA      [a-zA-Z\200-\377]
+ALNUM      [0-9a-zA-Z\200-\377]
+
+
+HOSTNAME   ([-_[:alnum:]]+\.)+[[:alpha:]]+
+URI        [-_[:alnum:]/%,\.;=&?#]+
+
+%%
+
+"<"[Ss][Cc][Rr][Ii][Pp][Tt] { BEGIN INSCRIPT; }
+
+"" {
+   BEGIN INITIAL; 
+   *tsearch2_yytext=' '; *(tsearch2_yytext+1) = '\0'; 
+   token = tsearch2_yytext;
+   tokenlen = tsearch2_yyleng;
+   return SPACE;
+}
+
+""   { 
+   BEGIN INITIAL;
+   *tsearch2_yytext=' '; *(tsearch2_yytext+1) = '\0'; 
+   token = tsearch2_yytext;
+   tokenlen = tsearch2_yyleng;
+   return SPACE;
+}
+
+
+"<"[\![:alpha:]]   { BEGIN INTAG; }
+
+"
+
+"\""    { BEGIN QINTAG; }
+
+"\\\"" ;
+
+"\""   { BEGIN INTAG; }
+
+">" { 
+   BEGIN INITIAL;
+   token = tsearch2_yytext;
+   *tsearch2_yytext=' '; 
+   token = tsearch2_yytext;
+   tokenlen = 1;
+   return TAG;
+}
+
+.|\n  ;
+
+\&(quot|amp|nbsp|lt|gt)\;   {
+   token = tsearch2_yytext;
+   tokenlen = tsearch2_yyleng;
+   return HTMLENTITY;
+}
+
+\&\#[0-9][0-9]?[0-9]?\; {
+   token = tsearch2_yytext;
+   tokenlen = tsearch2_yyleng;
+   return HTMLENTITY;
+}
+ 
+[-_\.[:alnum:]]+@{HOSTNAME}  /* Emails */ { 
+   token = tsearch2_yytext; 
+   tokenlen = tsearch2_yyleng;
+   return EMAIL; 
+}
+
+[+-]?[0-9]+(\.[0-9]+)?[eEdD][+-]?[0-9]+  /* float */   { 
+   token = tsearch2_yytext; 
+   tokenlen = tsearch2_yyleng;
+   return SCIENTIFIC; 
+}
+
+[0-9]+\.[0-9]+\.[0-9\.]*[0-9] {
+   token = tsearch2_yytext;
+   tokenlen = tsearch2_yyleng;
+   return VERSIONNUMBER;
+}
+
+[+-]?[0-9]+\.[0-9]+ {
+   token = tsearch2_yytext;
+   tokenlen = tsearch2_yyleng;
+   return DECIMAL;
+}
+
+[+-][0-9]+ { 
+   token = tsearch2_yytext; 
+   tokenlen = tsearch2_yyleng;
+   return SIGNEDINT; 
+}
+
+[0-9]+ { 
+   token = tsearch2_yytext; 
+   tokenlen = tsearch2_yyleng;
+   return UNSIGNEDINT; 
+}
+
+http"://"        { 
+   BEGIN URL; 
+   token = tsearch2_yytext;
+   tokenlen = tsearch2_yyleng;
+   return HTTP;
+}
+
+ftp"://"        { 
+   BEGIN URL; 
+   token = tsearch2_yytext;
+   tokenlen = tsearch2_yyleng;
+   return HTTP;
+}
+
+{HOSTNAME}[/:]{URI} { 
+   BEGIN SERVER;
+   if (s) { free(s); s=NULL; } 
+   s = strdup( tsearch2_yytext ); 
+   tokenlen = tsearch2_yyleng;
+   yyless( 0 ); 
+   token = s;
+   return FURL;
+}
+
+{HOSTNAME} {
+   token = tsearch2_yytext; 
+   tokenlen = tsearch2_yyleng;
+   return HOST;
+}
+
+[/:]{URI}  {
+   token = tsearch2_yytext;
+   tokenlen = tsearch2_yyleng;
+   return URI;
+}
+
+[[:alnum:]\./_-]+"/"[[:alnum:]\./_-]+ {
+   token = tsearch2_yytext;
+   tokenlen = tsearch2_yyleng;
+   return FILEPATH;
+}
+
+({CYRALPHA}+-)+{CYRALPHA}+ /* composite-word */    {
+   BEGIN DELIM;
+   if (s) { free(s); s=NULL; } 
+   s = strdup( tsearch2_yytext );
+   tokenlen = tsearch2_yyleng;
+   yyless( 0 );
+   token = s;
+   return CYRHYPHENWORD;
+}
+
+([[:alpha:]]+-)+[[:alpha:]]+ /* composite-word */  {
+    BEGIN DELIM;
+   if (s) { free(s); s=NULL; } 
+   s = strdup( tsearch2_yytext );
+   tokenlen = tsearch2_yyleng;
+   yyless( 0 );
+   token = s;
+   return LATHYPHENWORD;
+}
+
+({ALNUM}+-)+{ALNUM}+ /* composite-word */  {
+   BEGIN DELIM;
+   if (s) { free(s); s=NULL; } 
+   s = strdup( tsearch2_yytext );
+   tokenlen = tsearch2_yyleng;
+   yyless( 0 );
+   token = s;
+   return HYPHENWORD;
+}
+
+[0-9]+\.[0-9]+\.[0-9\.]*[0-9] {
+   token = tsearch2_yytext;
+   tokenlen = tsearch2_yyleng;
+   return VERSIONNUMBER;
+}
+
+\+?[0-9]+\.[0-9]+ {
+   token = tsearch2_yytext;
+   tokenlen = tsearch2_yyleng;
+   return DECIMAL;
+}
+
+{CYRALPHA}+  /* one word in composite-word */   { 
+   token = tsearch2_yytext; 
+   tokenlen = tsearch2_yyleng;
+   return CYRPARTHYPHENWORD; 
+}
+
+[[:alpha:]]+  /* one word in composite-word */  { 
+   token = tsearch2_yytext; 
+   tokenlen = tsearch2_yyleng;
+   return LATPARTHYPHENWORD; 
+}
+
+{ALNUM}+  /* one word in composite-word */  { 
+   token = tsearch2_yytext; 
+   tokenlen = tsearch2_yyleng;
+   return PARTHYPHENWORD; 
+}
+
+-  { 
+   token = tsearch2_yytext;
+   tokenlen = tsearch2_yyleng;
+   return SPACE;
+}
+
+.|\n /* return in basic state */ {
+   BEGIN INITIAL;
+   yyless( 0 );
+}
+
+{CYRALPHA}+ /* normal word */  { 
+   token = tsearch2_yytext; 
+   tokenlen = tsearch2_yyleng;
+   return CYRWORD; 
+}
+
+[[:alpha:]]+ /* normal word */ { 
+   token = tsearch2_yytext; 
+   tokenlen = tsearch2_yyleng;
+   return LATWORD; 
+}
+
+{ALNUM}+ /* normal word */ { 
+   token = tsearch2_yytext; 
+   tokenlen = tsearch2_yyleng;
+   return UWORD; 
+}
+
+[ \r\n\t]+ {
+   token = tsearch2_yytext;
+   tokenlen = tsearch2_yyleng;
+   return SPACE;
+}
+
+. {
+   token = tsearch2_yytext;
+   tokenlen = tsearch2_yyleng;
+   return SPACE;
+} 
+
+%%
+
+/* clearing after parsing from string */
+void end_parse() {
+   if (s) { free(s); s=NULL; } 
+   tsearch2_yy_delete_buffer( buf );
+   buf = NULL;
+} 
+
+/* start parse from string */
+void start_parse_str(char* str, int limit) {
+   if (buf) end_parse();
+   buf = tsearch2_yy_scan_bytes( str, limit );
+   tsearch2_yy_switch_to_buffer( buf );
+   BEGIN INITIAL;
+}
+
+/* start parse from filehandle */
+void start_parse_fh( FILE* fh, int limit ) {
+   if (buf) end_parse();
+   lrlimit = ( limit ) ? limit : -1;
+   buf = tsearch2_yy_create_buffer( fh, YY_BUF_SIZE );
+   tsearch2_yy_switch_to_buffer( buf );
+   BEGIN INITIAL;
+}
+
+


diff --git a/contrib/tsearch2/wparser.c b/contrib/tsearch2/wparser.c

new file mode 100644 (file)

index 0000000..deff94c


--- /dev/null
+++ b/contrib/tsearch2/wparser.c
@@ -0,0 +1,529 @@
+/* 
+ * interface functions to parser 
+ * Teodor Sigaev 
+ */
+#include 
+#include 
+#include 
+#include 
+
+#include "postgres.h"
+#include "fmgr.h"
+#include "utils/array.h"
+#include "catalog/pg_type.h"
+#include "executor/spi.h"
+#include "funcapi.h"
+
+#include "wparser.h"
+#include "ts_cfg.h"
+#include "snmap.h"
+#include "common.h"
+
+/*********top interface**********/
+
+static void *plan_getparser=NULL;
+static Oid current_parser_id=InvalidOid;
+
+void
+init_prs(Oid id, WParserInfo *prs) {
+   Oid arg[1]={ OIDOID };
+   bool isnull;
+   Datum pars[1]={ ObjectIdGetDatum(id) };
+   int stat;
+
+   memset(prs,0,sizeof(WParserInfo));
+   SPI_connect();
+   if ( !plan_getparser ) {
+       plan_getparser = SPI_saveplan( SPI_prepare( "select prs_start, prs_nexttoken, prs_end, prs_lextype, prs_headline from pg_ts_parser where oid = $1" , 1, arg ) );
+       if ( !plan_getparser ) 
+           ts_error(ERROR, "SPI_prepare() failed");
+   }
+
+   stat = SPI_execp(plan_getparser, pars, " ", 1);
+   if ( stat < 0 )
+       ts_error (ERROR, "SPI_execp return %d", stat);
+   if ( SPI_processed > 0 ) {
+       Oid oid=InvalidOid;
+       oid=DatumGetObjectId( SPI_getbinval(SPI_tuptable->vals[0], SPI_tuptable->tupdesc, 1, &isnull) );
+       fmgr_info_cxt(oid, &(prs->start_info), TopMemoryContext);
+       oid=DatumGetObjectId( SPI_getbinval(SPI_tuptable->vals[0], SPI_tuptable->tupdesc, 2, &isnull) );
+       fmgr_info_cxt(oid, &(prs->getlexeme_info), TopMemoryContext);
+       oid=DatumGetObjectId( SPI_getbinval(SPI_tuptable->vals[0], SPI_tuptable->tupdesc, 3, &isnull) );
+       fmgr_info_cxt(oid, &(prs->end_info), TopMemoryContext);
+       prs->lextype=DatumGetObjectId( SPI_getbinval(SPI_tuptable->vals[0], SPI_tuptable->tupdesc, 4, &isnull) );
+       oid=DatumGetObjectId( SPI_getbinval(SPI_tuptable->vals[0], SPI_tuptable->tupdesc, 5, &isnull) );
+       fmgr_info_cxt(oid, &(prs->headline_info), TopMemoryContext);
+       prs->prs_id=id;
+   } else 
+       ts_error(ERROR, "No parser with id %d", id);
+   SPI_finish();
+}
+
+typedef struct {
+   WParserInfo *last_prs;
+   int     len;
+   int     reallen;
+   WParserInfo *list;
+   SNMap       name2id_map;
+} PrsList;
+
+static PrsList PList = {NULL,0,0,NULL,{0,0,NULL}};
+
+void    
+reset_prs(void) {
+   freeSNMap( &(PList.name2id_map) );
+   if ( PList.list )
+       free(PList.list);
+   memset(&PList,0,sizeof(PrsList));
+}
+
+static int
+compareprs(const void *a, const void *b) {
+   return ((WParserInfo*)a)->prs_id - ((WParserInfo*)b)->prs_id;
+}
+
+WParserInfo *
+findprs(Oid id) {
+   /* last used prs */
+   if ( PList.last_prs && PList.last_prs->prs_id==id )
+       return PList.last_prs;
+
+   /* already used prs */
+   if ( PList.len != 0 ) {
+       WParserInfo key;
+       key.prs_id=id;
+       PList.last_prs = bsearch(&key, PList.list, PList.len, sizeof(WParserInfo), compareprs);
+       if ( PList.last_prs != NULL )
+           return PList.last_prs;
+   }
+
+   /* last chance */
+   if ( PList.len==PList.reallen ) {
+       WParserInfo *tmp;
+       int reallen = ( PList.reallen ) ? 2*PList.reallen : 16;
+       tmp=(WParserInfo*)realloc(PList.list,sizeof(WParserInfo)*reallen);
+       if ( !tmp ) 
+           ts_error(ERROR,"No memory");
+       PList.reallen=reallen;
+       PList.list=tmp;
+   }
+   PList.last_prs=&(PList.list[PList.len]);
+   init_prs(id, PList.last_prs);
+   PList.len++;
+   qsort(PList.list, PList.len, sizeof(WParserInfo), compareprs);
+   return findprs(id); /* qsort changed order!! */;
+}
+
+static void *plan_name2id=NULL;
+
+Oid
+name2id_prs(text *name) {
+   Oid arg[1]={ TEXTOID };
+   bool isnull;
+   Datum pars[1]={ PointerGetDatum(name) };
+   int stat;
+   Oid id=findSNMap_t( &(PList.name2id_map), name );
+   
+   if ( id ) 
+       return id;
+   
+
+   SPI_connect();
+   if ( !plan_name2id ) {
+       plan_name2id = SPI_saveplan( SPI_prepare( "select oid from pg_ts_parser where prs_name = $1" , 1, arg ) );
+       if ( !plan_name2id ) 
+           ts_error(ERROR, "SPI_prepare() failed");
+   }
+
+   stat = SPI_execp(plan_name2id, pars, " ", 1);
+   if ( stat < 0 )
+       ts_error (ERROR, "SPI_execp return %d", stat);
+   if ( SPI_processed > 0 )
+       id=DatumGetObjectId( SPI_getbinval(SPI_tuptable->vals[0], SPI_tuptable->tupdesc, 1, &isnull) );
+   else 
+       ts_error(ERROR, "No parser '%s'", text2char(name));
+   SPI_finish();
+   addSNMap_t( &(PList.name2id_map), name, id );
+   return id;
+}
+
+
+/******sql-level interface******/
+typedef struct {
+   int     cur;
+   LexDescr    *list;
+} TypeStorage;
+
+static void
+setup_firstcall(FuncCallContext  *funcctx, Oid prsid) {
+   TupleDesc            tupdesc;
+   MemoryContext     oldcontext;
+   TypeStorage     *st;
+   WParserInfo *prs = findprs(prsid); 
+
+   oldcontext = MemoryContextSwitchTo(funcctx->multi_call_memory_ctx);
+
+   st=(TypeStorage*)palloc( sizeof(TypeStorage) );
+   st->cur=0;
+   st->list = (LexDescr*)DatumGetPointer(
+       OidFunctionCall1( prs->lextype, PointerGetDatum(prs->prs) )
+   );
+   funcctx->user_fctx = (void*)st;
+   tupdesc = RelationNameGetTupleDesc("tokentype");
+   funcctx->slot = TupleDescGetSlot(tupdesc);
+   funcctx->attinmeta = TupleDescGetAttInMetadata(tupdesc);
+   MemoryContextSwitchTo(oldcontext);
+}
+
+static Datum
+process_call(FuncCallContext  *funcctx) {
+   TypeStorage     *st;
+
+   st=(TypeStorage*)funcctx->user_fctx;
+   if (  st->list && st->list[st->cur].lexid ) {
+       Datum result;
+       char* values[3];
+       char    txtid[16];
+       HeapTuple    tuple;
+
+       values[0]=txtid;
+       sprintf(txtid,"%d",st->list[st->cur].lexid);
+       values[1]=st->list[st->cur].alias;
+       values[2]=st->list[st->cur].descr;
+
+       tuple = BuildTupleFromCStrings(funcctx->attinmeta, values);
+       result = TupleGetDatum(funcctx->slot, tuple);
+
+       pfree(values[1]);
+       pfree(values[2]);
+       st->cur++;
+       return result;
+   } else {
+       if ( st->list ) pfree(st->list);
+       pfree(st);
+   }
+   return (Datum)0;
+}
+
+PG_FUNCTION_INFO_V1(token_type);
+Datum token_type(PG_FUNCTION_ARGS);
+
+Datum
+token_type(PG_FUNCTION_ARGS) {
+   FuncCallContext  *funcctx;
+   Datum result;
+
+   if (SRF_IS_FIRSTCALL()) { 
+       funcctx = SRF_FIRSTCALL_INIT();
+       setup_firstcall(funcctx, PG_GETARG_OID(0) );
+   }
+
+   funcctx = SRF_PERCALL_SETUP();
+
+   if (  (result=process_call(funcctx)) != (Datum)0 )
+       SRF_RETURN_NEXT(funcctx, result);
+   SRF_RETURN_DONE(funcctx);
+}
+
+PG_FUNCTION_INFO_V1(token_type_byname);
+Datum token_type_byname(PG_FUNCTION_ARGS);
+Datum
+token_type_byname(PG_FUNCTION_ARGS) {
+   FuncCallContext  *funcctx;
+   Datum result;
+
+   if (SRF_IS_FIRSTCALL()) {
+       text *name = PG_GETARG_TEXT_P(0); 
+       funcctx = SRF_FIRSTCALL_INIT();
+       setup_firstcall(funcctx, name2id_prs( name ) );
+       PG_FREE_IF_COPY(name,0);
+   }
+
+   funcctx = SRF_PERCALL_SETUP();
+
+   if (  (result=process_call(funcctx)) != (Datum)0 )
+       SRF_RETURN_NEXT(funcctx, result);
+   SRF_RETURN_DONE(funcctx);
+}
+
+PG_FUNCTION_INFO_V1(token_type_current);
+Datum token_type_current(PG_FUNCTION_ARGS);
+Datum
+token_type_current(PG_FUNCTION_ARGS) {
+   FuncCallContext  *funcctx;
+   Datum result;
+
+   if (SRF_IS_FIRSTCALL()) {
+       funcctx = SRF_FIRSTCALL_INIT();
+       if ( current_parser_id==InvalidOid ) 
+           current_parser_id = name2id_prs( char2text("default") );
+       setup_firstcall(funcctx, current_parser_id );
+   }
+
+   funcctx = SRF_PERCALL_SETUP();
+
+   if (  (result=process_call(funcctx)) != (Datum)0 )
+       SRF_RETURN_NEXT(funcctx, result);
+   SRF_RETURN_DONE(funcctx);
+}
+
+
+PG_FUNCTION_INFO_V1(set_curprs);
+Datum set_curprs(PG_FUNCTION_ARGS);
+Datum
+set_curprs(PG_FUNCTION_ARGS) {
+        findprs(PG_GETARG_OID(0));
+        current_parser_id=PG_GETARG_OID(0);
+        PG_RETURN_VOID();
+}
+
+PG_FUNCTION_INFO_V1(set_curprs_byname);
+Datum set_curprs_byname(PG_FUNCTION_ARGS);
+Datum
+set_curprs_byname(PG_FUNCTION_ARGS) {
+        text *name=PG_GETARG_TEXT_P(0);
+    
+        DirectFunctionCall1(
+                set_curprs,
+                ObjectIdGetDatum( name2id_prs(name) )
+        );
+        PG_FREE_IF_COPY(name, 0);
+        PG_RETURN_VOID();
+}
+
+typedef struct {
+   int type;
+   char    *lexem;
+} LexemEntry;
+
+typedef struct {
+   int cur;
+   int len;
+   LexemEntry  *list;
+} PrsStorage;
+   
+
+static void
+prs_setup_firstcall(FuncCallContext  *funcctx, int prsid, text *txt) {
+   TupleDesc            tupdesc;
+   MemoryContext     oldcontext;
+   PrsStorage  *st;
+   WParserInfo *prs = findprs(prsid); 
+   char    *lex=NULL;
+   int     llen=0, type=0; 
+
+   oldcontext = MemoryContextSwitchTo(funcctx->multi_call_memory_ctx);
+
+   st=(PrsStorage*)palloc( sizeof(PrsStorage) );
+   st->cur=0;
+   st->len=16;
+   st->list=(LexemEntry*)palloc( sizeof(LexemEntry)*st->len );
+
+   prs->prs = (void*)DatumGetPointer(
+       FunctionCall2(
+           &(prs->start_info),
+           PointerGetDatum(VARDATA(txt)),
+           Int32GetDatum(VARSIZE(txt)-VARHDRSZ)
+       )
+   );
+
+   while( ( type=DatumGetInt32(FunctionCall3(
+           &(prs->getlexeme_info),
+           PointerGetDatum(prs->prs),
+           PointerGetDatum(&lex),
+           PointerGetDatum(&llen))) ) != 0 ) {
+
+       if ( st->cur>=st->len ) {
+           st->len=2*st->len;
+           st->list=(LexemEntry*)repalloc(st->list, sizeof(LexemEntry)*st->len);
+       }
+       st->list[st->cur].lexem = palloc(llen+1);
+       memcpy( st->list[st->cur].lexem, lex, llen);
+       st->list[st->cur].lexem[llen]='\0';
+       st->list[st->cur].type=type;
+       st->cur++;
+   }
+       
+   FunctionCall1(
+       &(prs->end_info),
+       PointerGetDatum(prs->prs)
+   );
+
+   st->len=st->cur;
+   st->cur=0;
+   
+   funcctx->user_fctx = (void*)st;
+   tupdesc = RelationNameGetTupleDesc("tokenout");
+   funcctx->slot = TupleDescGetSlot(tupdesc);
+   funcctx->attinmeta = TupleDescGetAttInMetadata(tupdesc);
+   MemoryContextSwitchTo(oldcontext);
+}
+
+static Datum
+prs_process_call(FuncCallContext  *funcctx) {
+   PrsStorage  *st;
+
+   st=(PrsStorage*)funcctx->user_fctx;
+   if (  st->cur < st->len ) {
+       Datum result;
+       char* values[2];
+       char    tid[16];
+       HeapTuple    tuple;
+
+       values[0]=tid;
+       sprintf(tid,"%d",st->list[st->cur].type);
+       values[1]=st->list[st->cur].lexem;
+       tuple = BuildTupleFromCStrings(funcctx->attinmeta, values);
+       result = TupleGetDatum(funcctx->slot, tuple);
+
+       pfree(values[1]);
+       st->cur++;
+       return result;
+   } else {
+       if ( st->list ) pfree(st->list);
+       pfree(st);
+   }
+   return (Datum)0;
+}
+
+           
+
+PG_FUNCTION_INFO_V1(parse);
+Datum parse(PG_FUNCTION_ARGS);
+Datum
+parse(PG_FUNCTION_ARGS) {
+   FuncCallContext  *funcctx;
+   Datum result;
+
+   if (SRF_IS_FIRSTCALL()) {
+       text *txt = PG_GETARG_TEXT_P(1); 
+       funcctx = SRF_FIRSTCALL_INIT();
+       prs_setup_firstcall(funcctx, PG_GETARG_OID(0),txt );
+       PG_FREE_IF_COPY(txt,1);
+   }
+
+   funcctx = SRF_PERCALL_SETUP();
+
+   if (  (result=prs_process_call(funcctx)) != (Datum)0 )
+       SRF_RETURN_NEXT(funcctx, result);
+   SRF_RETURN_DONE(funcctx);
+}
+
+PG_FUNCTION_INFO_V1(parse_byname);
+Datum parse_byname(PG_FUNCTION_ARGS);
+Datum
+parse_byname(PG_FUNCTION_ARGS) {
+   FuncCallContext  *funcctx;
+   Datum result;
+
+   if (SRF_IS_FIRSTCALL()) {
+       text *name = PG_GETARG_TEXT_P(0); 
+       text *txt = PG_GETARG_TEXT_P(1); 
+       funcctx = SRF_FIRSTCALL_INIT();
+       prs_setup_firstcall(funcctx, name2id_prs( name ),txt );
+       PG_FREE_IF_COPY(name,0);
+       PG_FREE_IF_COPY(txt,1);
+   }
+
+   funcctx = SRF_PERCALL_SETUP();
+
+   if (  (result=prs_process_call(funcctx)) != (Datum)0 )
+       SRF_RETURN_NEXT(funcctx, result);
+   SRF_RETURN_DONE(funcctx);
+}
+
+
+PG_FUNCTION_INFO_V1(parse_current);
+Datum parse_current(PG_FUNCTION_ARGS);
+Datum
+parse_current(PG_FUNCTION_ARGS) {
+   FuncCallContext  *funcctx;
+   Datum result;
+
+   if (SRF_IS_FIRSTCALL()) {
+       text *txt = PG_GETARG_TEXT_P(0); 
+       funcctx = SRF_FIRSTCALL_INIT();
+       if ( current_parser_id==InvalidOid ) 
+           current_parser_id = name2id_prs( char2text("default") );
+       prs_setup_firstcall(funcctx, current_parser_id,txt );
+       PG_FREE_IF_COPY(txt,0);
+   }
+
+   funcctx = SRF_PERCALL_SETUP();
+
+   if (  (result=prs_process_call(funcctx)) != (Datum)0 )
+       SRF_RETURN_NEXT(funcctx, result);
+   SRF_RETURN_DONE(funcctx);
+}
+
+PG_FUNCTION_INFO_V1(headline);
+Datum headline(PG_FUNCTION_ARGS);
+Datum
+headline(PG_FUNCTION_ARGS) {
+   TSCfgInfo *cfg=findcfg(PG_GETARG_OID(0));
+   text       *in = PG_GETARG_TEXT_P(1);
+   QUERYTYPE  *query = (QUERYTYPE *) DatumGetPointer(PG_DETOAST_DATUM(PG_GETARG_DATUM(2)));
+   text       *opt=( PG_NARGS()>3 && PG_GETARG_POINTER(3) ) ? PG_GETARG_TEXT_P(3) : NULL;
+   HLPRSTEXT   prs;
+   text *out;
+   WParserInfo *prsobj = findprs(cfg->prs_id);
+
+   memset(&prs,0,sizeof(HLPRSTEXT));
+   prs.lenwords = 32;
+   prs.words = (HLWORD *) palloc(sizeof(HLWORD) * prs.lenwords);
+   hlparsetext(cfg, &prs, query, VARDATA(in), VARSIZE(in) - VARHDRSZ);
+
+
+   FunctionCall3(
+       &(prsobj->headline_info),
+       PointerGetDatum(&prs),
+       PointerGetDatum(opt),
+       PointerGetDatum(query)
+   );
+
+   out = genhl(&prs);
+
+   PG_FREE_IF_COPY(in,1);
+   PG_FREE_IF_COPY(query,2);
+   if ( opt ) PG_FREE_IF_COPY(opt,3);
+   pfree(prs.words);
+   pfree(prs.startsel);
+   pfree(prs.stopsel);
+
+   PG_RETURN_POINTER(out);
+}
+
+
+PG_FUNCTION_INFO_V1(headline_byname);
+Datum headline_byname(PG_FUNCTION_ARGS);
+Datum
+headline_byname(PG_FUNCTION_ARGS) {
+   text *cfg=PG_GETARG_TEXT_P(0);
+
+   Datum out=DirectFunctionCall4(
+       headline,
+       ObjectIdGetDatum(name2id_cfg( cfg ) ),
+       PG_GETARG_DATUM(1),
+       PG_GETARG_DATUM(2),
+       ( PG_NARGS()>3 ) ? PG_GETARG_DATUM(3) : PointerGetDatum(NULL)
+   );
+
+   PG_FREE_IF_COPY(cfg,0);
+   PG_RETURN_DATUM(out);   
+}
+
+PG_FUNCTION_INFO_V1(headline_current);
+Datum headline_current(PG_FUNCTION_ARGS);
+Datum
+headline_current(PG_FUNCTION_ARGS) {
+   PG_RETURN_DATUM(DirectFunctionCall4(
+       headline,
+       ObjectIdGetDatum(get_currcfg()),
+       PG_GETARG_DATUM(0),
+       PG_GETARG_DATUM(1),
+       ( PG_NARGS()>2 ) ? PG_GETARG_DATUM(2) : PointerGetDatum(NULL)
+   ));
+}
+
+
+


diff --git a/contrib/tsearch2/wparser.h b/contrib/tsearch2/wparser.h

new file mode 100644 (file)

index 0000000..a8afc56


--- /dev/null
+++ b/contrib/tsearch2/wparser.h
@@ -0,0 +1,28 @@
+#ifndef __WPARSER_H__
+#define __WPARSER_H__
+#include "postgres.h"
+#include "fmgr.h"
+
+typedef struct {
+   Oid prs_id;
+   FmgrInfo start_info;
+   FmgrInfo getlexeme_info;
+   FmgrInfo end_info;
+   FmgrInfo headline_info;
+   Oid lextype;
+   void *prs;
+} WParserInfo;
+
+void init_prs(Oid id, WParserInfo *prs);
+WParserInfo* findprs(Oid id);
+Oid name2id_prs(text *name);
+void   reset_prs(void);
+
+
+typedef struct {
+   int lexid;
+   char    *alias;
+   char    *descr;
+} LexDescr;
+
+#endif


diff --git a/contrib/tsearch2/wparser_def.c b/contrib/tsearch2/wparser_def.c

new file mode 100644 (file)

index 0000000..eec8b03


--- /dev/null
+++ b/contrib/tsearch2/wparser_def.c
@@ -0,0 +1,291 @@
+/* 
+ * default word parser 
+ * Teodor Sigaev 
+ */
+#include 
+#include 
+#include 
+
+#include "postgres.h"
+#include "utils/builtins.h"
+
+#include "dict.h"
+#include "wparser.h"
+#include "common.h"
+#include "ts_cfg.h"
+#include "wordparser/parser.h"
+#include "wordparser/deflex.h"
+
+PG_FUNCTION_INFO_V1(prsd_lextype);
+Datum prsd_lextype(PG_FUNCTION_ARGS);
+
+Datum 
+prsd_lextype(PG_FUNCTION_ARGS) {
+   LexDescr *descr=(LexDescr*)palloc(sizeof(LexDescr)*(LASTNUM+1));
+   int i;
+
+   for(i=1;i<=LASTNUM;i++) {
+       descr[i-1].lexid = i;
+       descr[i-1].alias = pstrdup(tok_alias[i]);
+       descr[i-1].descr = pstrdup(lex_descr[i]);
+   }
+   
+   descr[LASTNUM].lexid=0;
+       
+   PG_RETURN_POINTER(descr);
+}
+
+PG_FUNCTION_INFO_V1(prsd_start);
+Datum prsd_start(PG_FUNCTION_ARGS);
+Datum 
+prsd_start(PG_FUNCTION_ARGS) {
+   start_parse_str( (char*)PG_GETARG_POINTER(0), PG_GETARG_INT32(1) );
+   PG_RETURN_POINTER(NULL);
+}
+
+PG_FUNCTION_INFO_V1(prsd_getlexeme);
+Datum prsd_getlexeme(PG_FUNCTION_ARGS);
+Datum 
+prsd_getlexeme(PG_FUNCTION_ARGS) {
+   /* ParserState *p=(ParserState*)PG_GETARG_POINTER(0); */
+   char **t=(char**)PG_GETARG_POINTER(1); 
+   int *tlen=(int*)PG_GETARG_POINTER(2);
+   int  type=tsearch2_yylex();
+
+   *t = token;
+   *tlen = tokenlen;
+   PG_RETURN_INT32(type);
+}
+
+PG_FUNCTION_INFO_V1(prsd_end);
+Datum prsd_end(PG_FUNCTION_ARGS);
+Datum 
+prsd_end(PG_FUNCTION_ARGS) {
+   /* ParserState *p=(ParserState*)PG_GETARG_POINTER(0); */
+   end_parse();
+   PG_RETURN_VOID();
+}
+
+#define LEAVETOKEN(x)  ( (x)==12 )
+#define COMPLEXTOKEN(x)    ( (x)==5 || (x)==15 || (x)==16 || (x)==17 )
+#define ENDPUNCTOKEN(x)    ( (x)==12 )
+
+
+#define IDIGNORE(x) ( (x)==13 || (x)==14 || (x)==12 || (x)==23 )
+#define HLIDIGNORE(x) ( (x)==5 || (x)==13 || (x)==15 || (x)==16 || (x)==17 )
+#define NONWORDTOKEN(x)    ( (x)==12 || HLIDIGNORE(x) )
+#define NOENDTOKEN(x)  ( NONWORDTOKEN(x) || (x)==7 || (x)==8 || (x)==20 || (x)==21 || (x)==22 || IDIGNORE(x) )
+
+typedef struct {
+   HLWORD  *words;
+   int len;
+} hlCheck;
+
+static bool
+checkcondition_HL(void *checkval, ITEM *val) {
+   int i;
+   for(i=0;i<((hlCheck*)checkval)->len;i++) {
+       if ( ((hlCheck*)checkval)->words[i].item==val )
+           return true;
+   }
+   return false;
+}
+
+
+static bool
+hlCover(HLPRSTEXT *prs, QUERYTYPE *query, int *p, int *q) {
+   int i,j;
+   ITEM    *item=GETQUERY(query);
+   int pos=*p;
+   *q=0;
+   *p=0x7fffffff;
+
+   for(j=0;jsize;j++) {
+       if ( item->type != VAL ) {
+           item++;
+           continue;
+       }
+       for(i=pos;icurwords;i++) {
+           if ( prs->words[i].item == item ) {
+               if ( i>*q) 
+                   *q = i;
+               break;
+           }
+       }
+       item++;
+   }
+
+   if ( *q==0 )
+       return false;
+
+   item=GETQUERY(query);
+   for(j=0;jsize;j++) {
+       if ( item->type != VAL ) {
+           item++;
+           continue;
+       }
+       for(i=*q;i>=pos;i--) {
+           if ( prs->words[i].item == item ) {
+               if ( i<*p )
+                   *p=i;
+               break;
+           }
+       }
+       item++;
+   }   
+
+   if ( *p<=*q ) {
+       hlCheck ch={ &(prs->words[*p]), *q-*p+1 };
+       if ( TS_execute(GETQUERY(query), &ch, false, checkcondition_HL) ) { 
+           return true;
+       } else {
+           (*p)++;
+           return hlCover(prs,query,p,q);
+       }
+   }
+
+   return false;
+}
+
+PG_FUNCTION_INFO_V1(prsd_headline);
+Datum prsd_headline(PG_FUNCTION_ARGS);
+Datum 
+prsd_headline(PG_FUNCTION_ARGS) {
+   HLPRSTEXT   *prs=(HLPRSTEXT*)PG_GETARG_POINTER(0);
+   text    *opt=(text*)PG_GETARG_POINTER(1); /* can't be toasted */
+   QUERYTYPE   *query=(QUERYTYPE*)PG_GETARG_POINTER(2); /* can't be toasted */
+   /* from opt + start and and tag */
+   int min_words=15;   
+   int max_words=35;   
+   int shortword=3;    
+
+   int p=0,q=0;
+   int bestb=-1,beste=-1;
+   int bestlen=-1;
+   int pose=0, poslen, curlen;
+
+   int i;
+
+   /*config*/
+   prs->startsel=NULL;
+   prs->stopsel=NULL;
+   if ( opt ) {
+       Map *map,*mptr;
+       
+       parse_cfgdict(opt,&map);
+       mptr=map;
+
+       while(mptr && mptr->key) {
+           if ( strcasecmp(mptr->key,"MaxWords")==0 )
+               max_words=pg_atoi(mptr->value,4,1);
+           else if ( strcasecmp(mptr->key,"MinWords")==0 )
+               min_words=pg_atoi(mptr->value,4,1);
+           else if ( strcasecmp(mptr->key,"ShortWord")==0 )
+               shortword=pg_atoi(mptr->value,4,1);
+           else if ( strcasecmp(mptr->key,"StartSel")==0 )
+               prs->startsel=pstrdup(mptr->value);
+           else if ( strcasecmp(mptr->key,"StopSel")==0 )
+               prs->stopsel=pstrdup(mptr->value);
+               
+           pfree(mptr->key);
+           pfree(mptr->value);
+
+           mptr++;
+       }
+       pfree(map);
+
+       if ( min_words >= max_words )
+           elog(ERROR,"Must be MinWords < MaxWords");
+       if ( min_words<=0 )
+           elog(ERROR,"Must be MinWords > 0");
+       if ( shortword<0 )
+           elog(ERROR,"Must be ShortWord >= 0");
+   }
+
+   while( hlCover(prs,query,&p,&q) ) {
+       /* find cover len in words */
+       curlen=0;
+       poslen=0;
+       for(i=p;i<=q && curlen < max_words ; i++) {
+           if ( !NONWORDTOKEN(prs->words[i].type) ) 
+               curlen++;
+           if ( prs->words[i].item && !prs->words[i].repeated )
+               poslen++; 
+           pose=i;
+       }
+
+       if ( poslenwords[beste].type) || prs->words[beste].len <= shortword) ) { 
+           /* best already finded, so try one more cover */
+           p++;
+           continue;
+       }
+
+       if ( curlen < max_words ) { /* find good end */
+           for(i=i-1 ;icurwords && curlen
+               if ( i!=q ) {
+                   if ( !NONWORDTOKEN(prs->words[i].type) ) 
+                       curlen++;
+                   if ( prs->words[i].item && !prs->words[i].repeated )
+                       poslen++;
+               }
+               pose=i;
+               if ( NOENDTOKEN(prs->words[i].type) || prs->words[i].len <= shortword ) 
+                   continue;
+               if ( curlen>=min_words )    
+                   break;
+           }
+       } else { /* shorter cover :((( */
+           for(;curlen>min_words;i--) {
+               if ( !NONWORDTOKEN(prs->words[i].type) ) 
+                   curlen--;
+               if ( prs->words[i].item && !prs->words[i].repeated )
+                   poslen--;
+               pose=i;
+               if ( NOENDTOKEN(prs->words[i].type) || prs->words[i].len <= shortword ) 
+                   continue;
+               break;
+           }
+       }
+
+       if ( bestlen <0 || (poslen>bestlen && !(NOENDTOKEN(prs->words[pose].type) || prs->words[pose].len <= shortword)) || 
+               ( bestlen>=0 && !(NOENDTOKEN(prs->words[pose].type)  || prs->words[pose].len <= shortword) && 
+                   (NOENDTOKEN(prs->words[beste].type) || prs->words[beste].len <= shortword) ) ) {
+           bestb=p; beste=pose;
+           bestlen=poslen;
+       } 
+
+       p++;
+   }
+
+   if ( bestlen<0 ) {
+       curlen=0;
+       poslen=0;
+       for(i=0;icurwords && curlen
+           if ( !NONWORDTOKEN(prs->words[i].type) ) 
+               curlen++;
+           pose=i;
+       }
+       bestb=0; beste=pose;
+   }
+
+   for(i=bestb;i<=beste;i++) {
+       if ( prs->words[i].item )
+           prs->words[i].selected=1;
+       if ( prs->words[i].repeated )
+           prs->words[i].skip=1;
+       if ( HLIDIGNORE(prs->words[i].type) )
+           prs->words[i].replace=1;
+
+       prs->words[i].in=1;
+   }
+
+   if (!prs->startsel)
+       prs->startsel=pstrdup("");

+   if (!prs->stopsel)
+       prs->stopsel=pstrdup("");
+        prs->startsellen=strlen(prs->startsel);
+   prs->stopsellen=strlen(prs->stopsel);
+
+   PG_RETURN_POINTER(prs);
+}
+




This is the main PostgreSQL git repository.
RSS
Atom}}}}}}
+>extend₁♦_{12
+>upward₁♦_{12
+>for₁♦_{12
+>well₁♦_{12
+>over₁♦_{12
+>100₂₂♦_{12
+>feet₁.₁₂
+
+Each word has been assigned type 1;
+each space (represented here by a diamond) and the period, type 12;
+and the number one hundred, type 22.
+We can retrieve the alias for each type
+through the token_type function:
+
+
+=# select * from token_type('default')

+     where tokid = 1 or tokid = 12 or tokid = 22
+ tokid | alias |      descr       
+-------+-------+------------------
+     1 | lword | Latin word
+    12 | blank | Space symbols
+    22 | uint  | Unsigned integer
+(3 rows)
+
+
+
+
+Next, the tokens are assigned to dictionaries
+by looking up their type aliases in pg_ts_cfgmap
+to determine which dictionary should process each token.
+Since we are using the 'default' configuration:
+
+
+=# select * from pg_ts_cfgmap where ts_name = 'default' and

+      (tok_alias = 'lword' or tok_alias = 'blank' or tok_alias = 'uint')
+ ts_name | tok_alias | dict_name 
+---------+-----------+-----------
+ default | lword     | {en_stem}
+ default | uint      | {simple}
+(2 rows)
+
+
+Since this map provides no dictionary for blank tokens,
+the spaces and period are simply discarded,
+leaving nine tokens,
+which are then numbered by their position:
+
+The¹
+walls²
+extend³
+upward⁴
+for⁵
+well⁶
+over⁷
+100⁸
+feet⁹
+
+
+Finally, the words are reduced to lexemes by their respective dictionaries.
+The 100 is submitted to the simple dictionary,
+which returns tokens unaltered except for making them lowercase:
+
+
+=# select lexize('simple', '100')
+ lexize 
+--------
+ {100}
+(1 row)
+
+
+The other words are submitted to en_stem
+which reduces each English word to a linguistic stem,
+and then discards stems which belong to its list of stop words;
+you can see the list of stop words
+in the file whose path is in the dict_initoption field
+of the pg_ts_dict table entry for en_stem.
+The first three words of our text illustrate respectively
+an en_stem stop word,
+a word which en_stem alters by stemming,
+and a word which en_stem leaves alone:
+
+
+=# select lexize('en_stem', 'The')
+ lexize 
+--------
+ {}
+(1 row)
+=# select lexize('en_stem', 'walls')
+ lexize 
+--------
+ {wall}
+(1 row)
+=# select lexize('en_stem', 'extend')
+  lexize  
+----------
+ {extend}
+(1 row)
+
+
+Once en_stem is done discarding stop words and stemming the rest,
+we are left with:
+
+wall²
+extend³
+upward⁴
+well⁶
+100⁸
+feet⁹
+
+Which is precisely the result of the example that began this section.
+
+Query words are stemmed by the to_tsquery() function
+using the same scheme to determine the dictionary for each token,
+with the difference that the query parser recognizes as special
+the boolean operators that separate query words.
+
+
+
+
+}

diff --git a/contrib/tsearch2/docs/tsearch2-ref.html b/contrib/tsearch2/docs/tsearch2-ref.html

new file mode 100644 (file)

index 0000000..df0faa4


--- /dev/null
+++ b/contrib/tsearch2/docs/tsearch2-ref.html
@@ -0,0 +1,448 @@
+
+
+
+
+tsearch2 reference
+
+
+The tsearch2 Reference
+
+
+Brandon Craig Rhodes
30 June 2003
+
+This Reference documents the user types and functions
+of the tsearch2 module for PostgreSQL.
+An introduction to the module is provided
+by the tsearch2 Guide,
+a companion document to this one.
+You can retrieve a beta copy of the tsearch2 module from the
+GiST for PostgreSQL
+page — look under the section entitled Development History
+for the current version.
+
+Vectors and Queries
+
+Vectors and queries both store lexemes,
+but for different purposes.
+A tsvector stores the lexemes
+of the words that are parsed out of a document,
+and can also remember the position of each word.
+A tsquery specifies a boolean condition among lexemes.
+
+Any of the following functions with a configuration argument
+can use either an integer id or textual ts_name
+to select a configuration;
+if the option is omitted, then the current configuration is used.
+For more information on the current configuration,
+read the next section on Configurations.
+
+Vector Operations
+
+
+
+ to_tsvector( [configuration,]

+ document TEXT) RETURNS tsvector
+
+ Parses a document into tokens,
+ reduces the tokens to lexemes,
+ and returns a tsvector which lists the lexemes
+ together with their positions in the document.
+ For the best description of this process,
+ see the section on Parsing and Stemming
+ in the accompanying tsearch2 Guide.
+
+ strip(vector tsvector) RETURNS tsvector
+
+ Return a vector which lists the same lexemes
+ as the given vector,
+ but which lacks any information
+ about where in the document each lexeme appeared.
+ While the returned vector is thus useless for relevance ranking,
+ it will usually be much smaller.
+
+ setweight(vector tsvector, letter) RETURNS tsvector
+
+ This function returns a copy of the input vector
+ in which every location has been labelled
+ with either the letter
+ 'A', 'B', or 'C',
+ or the default label 'D'
+ (which is the default with which new vectors are created,
+ and as such is usually not displayed).
+ These labels are retained when vectors are concatenated,
+ allowing words from different parts of a document
+ to be weighted differently by ranking functions.
+
+ vector1 || vector2
+
+ concat(vector1 tsvector, vector2 tsvector)

+ RETURNS tsvector
+
+ Returns a vector which combines the lexemes and position information
+ in the two vectors given as arguments.
+ Position weight labels (described in the previous paragraph)
+ are retained intact during the concatenation.
+ This has at least two uses.
+ First,
+ if some sections of your document
+ need be parsed with different configurations than others,
+ you can parse them separately
+ and concatenate the resulting vectors into one.
+ Second,
+ you can weight words from some sections of you document
+ more heavily than those from others by:
+ parsing the sections into separate vectors;
+ assigning the vectors different position labels
+ with the setweight() function;
+ concatenating them into a single vector;
+ and then providing a weights argument
+ to the rank() function
+ that assigns different weights to positions with different labels.
+
+ tsvector_size(vector tsvector) RETURNS INT4
+
+ Returns the number of lexemes stored in the vector.
+
+ text::tsvector RETURNS tsvector
+
+ Directly casting text to a tsvector
+ allows you to directly inject lexemes into a vector,
+ with whatever positions and position weights you choose to specify.
+ The text should be formatted
+ like the vector would be printed by the output of a SELECT.
+ See the Casting
+ section in the Guide for details.
+
+
+Query Operations
+
+
+
+ to_tsquery( [configuration,]

+ querytext text) RETURNS tsvector
+
+ Parses a query,
+ which should be single words separated by the boolean operators
+ “&” and,
+ “|” or,
+ and “!” not,
+ which can be grouped using parenthesis.
+ Each word is reduced to a lexeme using the current
+ or specified configuration.
+
+
+ querytree(query tsquery) RETURNS text
+
+ This might return a textual representation of the given query.
+
+ text::tsquery RETURNS tsquery
+
+ Directly casting text to a tsquery
+ allows you to directly inject lexemes into a query,
+ with whatever positions and position weight flags you choose to specify.
+ The text should be formatted
+ like the query would be printed by the output of a SELECT.
+ See the Casting
+ section in the Guide for details.
+
+
+Configurations
+
+A configuration specifies all of the equipment necessary
+to transform a document into a tsvector:
+the parser that breaks its text into tokens,
+and the dictionaries which then transform each token into a lexeme.
+Every call to to_tsvector() (described above)
+uses a configuration to perform its processing.
+Three configurations come with tsearch2:
+
+
+default — Indexes words and numbers,
+ using the en_stem English Snowball stemmer for Latin-alphabet words
+ and the simple dictionary for all others.
+default_russian — Indexes words and numbers,
+ using the en_stem English Snowball stemmer for Latin-alphabet words
+ and the ru_stem Russian Snowball dictionary for all others.
+simple — Processes both words and numbers
+ with the simple dictionary,
+ which neither discards any stop words nor alters them.
+
+
+The tsearch2 modules initially chooses your current configuration
+by looking for your current locale in the locale field
+of the pg_ts_cfg table described below.
+You can manipulate the current configuration yourself with these functions:
+
+
+
+ set_curcfg( id INT | ts_name TEXT

+  ) RETURNS VOID
+
+ Set the current configuration used by to_tsvector
+ and to_tsquery.
+
+ show_curcfg() RETURNS INT4
+
+ Returns the integer id of the current configuration.
+
+
+
+Each configuration is defined by a record in the pg_ts_cfg table:
+
+create table pg_ts_cfg (
+   id      int not  null primary key,
+   ts_name     text not null,
+   prs_name    text not null,
+   locale      text
+);
+
+The id and ts_name are unique values
+which identify the configuration;
+the prs_name specifies which parser the configuration uses.
+Once this parser has split document text into tokens,
+the type of each resulting token —
+or, more specifically, the type's lex_alias
+as specified in the parser's lexem_type() table —
+is searched for together with the configuration's ts_name
+in the pg_ts_cfgmap table:
+
+create table pg_ts_cfgmap (
+   ts_name     text not null,
+   lex_alias   text not null,
+   dict_name   text[],
+   primary key (ts_name,lex_alias)
+);
+
+Those tokens whose types are not listed are discarded.
+The remaining tokens are assigned integer positions,
+starting with 1 for the first token in the document,
+and turned into lexemes with the help of the dictionaries
+whose names are given in the dict_name array for their type.
+These dictionaries are tried in order,
+stopping either with the first one to return a lexeme for the token,
+or discarding the token if no dictionary returns a lexeme for it.
+
+Parsers
+
+Each parser is defined by a record in the pg_ts_parser table:
+
+create table pg_ts_parser (
+   prs_id      int not null primary key,
+   prs_name    text not null,
+   prs_start   oid not null,
+   prs_getlexem    oid not null,
+   prs_end     oid not null,
+   prs_headline    oid not null,
+   prs_lextype oid not null,
+   prs_comment text
+);
+
+The prs_id and prs_name uniquely identify the parser,
+while prs_comment usually describes its name and version
+for the reference of users.
+The other items identify the low-level functions
+which make the parser operate,
+and are only of interest to someone writing a parser of their own.
+
+The tsearch2 module comes with one parser named default
+which is suitable for parsing most plain text and HTML documents.
+
+Each parser argument below
+must designate a parser with either an integer prs_id
+or a textual prs_name;
+the current parser is used when this argument is omitted.
+
+
+
+ CREATE FUNCTION set_curprs(parser) RETURNS VOID
+
+ Selects a current parser
+ which will be used when any of the following functions
+ are called without a parser as an argument.
+
+ CREATE FUNCTION lexem_type(

+  [ parser ]
+  ) RETURNS SETOF lexemtype
+
+ Returns a table which defines and describes
+ each kind of token the parser may produce as output.
+ For each token type the table gives the lexid
+ which the parser will label each token of that type,
+ the alias which names the token type,
+ and a short description descr for the user to read.
+
+ CREATE FUNCTION parse(

+  [ parser, ] document TEXT
+  ) RETURNS SETOF lexemtype
+
+ Parses the given document and returns a series of records,
+ one for each token produced by parsing.
+ Each token includes a lexid giving its type
+ and a lexem which gives its content.
+
+
+Dictionaries
+
+Dictionaries take textual tokens as input,
+usually those produced by a parser,
+and return lexemes which are usually some reduced form of the token.
+Among the dictionaries which come installed with tsearch2 are:
+
+
+simple simply folds uppercase letters to lowercase
+ before returning the word.
+en_stem runs an English Snowball stemmer on each word
+ that attempts to reduce the various forms of a verb or noun
+ to a single recognizable form.
+ru_stem runs a Russian Snowball stemmer on each word.
+
+
+Each dictionary is defined by an entry in the pg_ts_dict table:
+
+CREATE TABLE pg_ts_dict (
+   dict_id     int not null primary key,
+   dict_name   text not null,
+   dict_init   oid,
+   dict_initoption text,
+   dict_lemmatize  oid not null,
+   dict_comment    text
+);
+
+The dict_id and dict_name
+serve as unique identifiers for the dictionary.
+The meaning of the dict_initoption varies among dictionaries,
+but for the built-in Snowball dictionaries
+it specifies a file from which stop words should be read.
+The dict_comment is a human-readable description of the dictionary.
+The other fields are internal function identifiers
+useful only to developers trying to implement their own dictionaries.
+
+The argument named dictionary
+in each of the following functions
+should be either an integer dict_id or a textual dict_name
+identifying which dictionary should be used for the operation;
+if omitted then the current dictionary is used.
+
+
+
+ CREATE FUNCTION set_curdict(dictionary) RETURNS VOID
+
+ Selects a current dictionary for use by functions
+ that do not select a dictionary explicitly.
+
+ CREATE FUNCTION lexize(

+ [ dictionary, ] word text)
+ RETURNS TEXT[]
+
+ Reduces a single word to a lexeme.
+ Note that lexemes are arrays of zero or more strings,
+ since in some languages there might be several base words
+ from which an inflected form could arise.
+
+
+Ranking
+
+Ranking attempts to measure how relevant documents are to particular queries
+by inspecting the number of times each search word appears in the document,
+and whether different search terms occur near each other.
+Note that this information is only available in unstripped vectors —
+ranking functions will only return a useful result
+for a tsvector which still has position information!
+
+Both of these ranking functions
+take an integer normalization option
+that specifies whether a document's length should impact its rank.
+This is often desirable,
+since a hundred-word document with five instances of a search word
+is probably more relevant than a thousand-word document with five instances.
+The option can have the values:
+
+
+0 (the default) ignores document length.
+1 divides the rank by the logarithm of the length.
+2 divides the rank by the length itself.
+
+
+The two ranking functions currently available are:
+
+
+
+ CREATE FUNCTION rank(

+  [ weights float4[], ]
+  vector tsvector, query tsquery,
+  [ normalization int4 ]

+  ) RETURNS float4
+
+ This is the ranking function from the old version of OpenFTS,
+ and offers the ability to weight word instances more heavily
+ depending on how you have classified them.
+ The weights specify how heavily to weight each category of word:
+ 
+>{D-weight, A-weight, B-weight, C-weight}
+ If no weights are provided, then these defaults are used:
+ {0.1, 0.2, 0.4, 1.0}
+ Often weights are used to mark words from special areas of the document,
+ like the title or an initial abstract,
+ and make them more or less important than words in the document body.
+
+ CREATE FUNCTION rank_cd(

+  [ K int4, ]
+  vector tsvector, query tsquery,
+  [ normalization int4 ]

+  ) RETURNS float4
+
+ This function computes the cover density ranking
+ for the given document vector and query,
+ as described in Clarke, Cormack, and Tudhope's
+ “
+>Relevance Ranking for One to Three Term Queries”
+ in the 1999 Information Processing and Management.
+ The value K is one of the values from their formula,
+ and defaults to K=4.
+ The examples in their paper K=16;
+ we can roughly describe the term
+ as stating how far apart two search terms can fall
+ before the formula begins penalizing them for lack of proximity.
+
+
+Headlines
+
+
+
+ CREATE FUNCTION headline(

+  [ id int4, | ts_name text, ]
+  document text, query tsquery,
+  [ options text ]

+  ) RETURNS text
+
+ Every form of the the headline() function
+ accepts a document along with a query,
+ and returns one or more ellipse-separated excerpts from the document
+ in which terms from the query are highlighted.
+ The configuration with which to parse the document
+ can be specified by either its id or ts_name;
+ if none is specified that the current configuration is used instead.
+ 
+ An options string if provided should be a comma-separated list
+ of one or more ‘option=value’ pairs.
+ The available options are:
+ 
+  StartSel, StopSel —
+   the strings with which query words appearing in the document
+   should be delimited to distinguish them from other excerpted words.
+  MaxWords, MinWords —
+   limits on the shortest and longest headlines you will accept.
+  ShortWord —
+   this prevents your headline from beginning or ending
+   with a word which has this many characters or less.
+   The default value of 3 should eliminate most English
+   conjunctions and articles.
+ 
+ Any unspecified options receive these defaults:
+ 
+StartSel=<b>, StopSel=</b>, MaxWords=35, MinWords=15, ShortWord=3
+ 
+
+
+
+


diff --git a/contrib/tsearch2/expected/tsearch2.out b/contrib/tsearch2/expected/tsearch2.out

new file mode 100644 (file)

index 0000000..a842c5b


--- /dev/null
+++ b/contrib/tsearch2/expected/tsearch2.out
@@ -0,0 +1,2055 @@
+--
+-- first, define the datatype.  Turn off echoing so that expected file
+-- does not depend on contents of seg.sql.
+--
+\set ECHO none
+psql:tsearch2.sql:13: NOTICE:  CREATE TABLE / PRIMARY KEY will create implicit index 'pg_ts_dict_pkey' for table 'pg_ts_dict'
+psql:tsearch2.sql:145: NOTICE:  CREATE TABLE / PRIMARY KEY will create implicit index 'pg_ts_parser_pkey' for table 'pg_ts_parser'
+psql:tsearch2.sql:244: NOTICE:  CREATE TABLE / PRIMARY KEY will create implicit index 'pg_ts_cfg_pkey' for table 'pg_ts_cfg'
+psql:tsearch2.sql:251: NOTICE:  CREATE TABLE / PRIMARY KEY will create implicit index 'pg_ts_cfgmap_pkey' for table 'pg_ts_cfgmap'
+psql:tsearch2.sql:339: NOTICE:  ProcedureCreate: type tsvector is not yet defined
+psql:tsearch2.sql:344: NOTICE:  Argument type "tsvector" is only a shell
+psql:tsearch2.sql:398: NOTICE:  ProcedureCreate: type tsquery is not yet defined
+psql:tsearch2.sql:403: NOTICE:  Argument type "tsquery" is only a shell
+psql:tsearch2.sql:545: NOTICE:  ProcedureCreate: type gtsvector is not yet defined
+psql:tsearch2.sql:550: NOTICE:  Argument type "gtsvector" is only a shell
+--tsvector
+SELECT '1'::tsvector;
+ tsvector 
+----------
+ '1'
+(1 row)
+
+SELECT '1 '::tsvector;
+ tsvector 
+----------
+ '1'
+(1 row)
+
+SELECT ' 1'::tsvector;
+ tsvector 
+----------
+ '1'
+(1 row)
+
+SELECT ' 1 '::tsvector;
+ tsvector 
+----------
+ '1'
+(1 row)
+
+SELECT '1 2'::tsvector;
+ tsvector 
+----------
+ '1' '2'
+(1 row)
+
+SELECT '\'1 2\''::tsvector;
+ tsvector 
+----------
+ '1 2'
+(1 row)
+
+SELECT '\'1 \\\'2\''::tsvector;
+ tsvector 
+----------
+ '1 \'2'
+(1 row)
+
+SELECT '\'1 \\\'2\'3'::tsvector;
+  tsvector   
+-------------
+ '3' '1 \'2'
+(1 row)
+
+SELECT '\'1 \\\'2\' 3'::tsvector;
+  tsvector   
+-------------
+ '3' '1 \'2'
+(1 row)
+
+SELECT '\'1 \\\'2\' \' 3\' 4 '::tsvector;
+     tsvector     
+------------------
+ '4' ' 3' '1 \'2'
+(1 row)
+
+select '\'w\':4A,3B,2C,1D,5 a:8';
+       ?column?        
+-----------------------
+ 'w':4A,3B,2C,1D,5 a:8
+(1 row)
+
+select 'a:3A b:2a'::tsvector || 'ba:1234 a:1B';
+          ?column?          
+----------------------------
+ 'a':3A,4B 'b':2A 'ba':1237
+(1 row)
+
+select setweight('w:12B w:13* w:12,5,6 a:1,3* a:3 w asd:1dc asd zxc:81,567,222A'::tsvector, 'c');
+                        setweight                         
+----------------------------------------------------------
+ 'a':1C,3C 'w':5C,6C,12C,13C 'asd':1C 'zxc':81C,222C,567C
+(1 row)
+
+select strip('w:12B w:13* w:12,5,6 a:1,3* a:3 w asd:1dc asd'::tsvector);
+     strip     
+---------------
+ 'a' 'w' 'asd'
+(1 row)
+
+--tsquery
+SELECT '1'::tsquery;
+ tsquery 
+---------
+ '1'
+(1 row)
+
+SELECT '1 '::tsquery;
+ tsquery 
+---------
+ '1'
+(1 row)
+
+SELECT ' 1'::tsquery;
+ tsquery 
+---------
+ '1'
+(1 row)
+
+SELECT ' 1 '::tsquery;
+ tsquery 
+---------
+ '1'
+(1 row)
+
+SELECT '\'1 2\''::tsquery;
+ tsquery 
+---------
+ '1 2'
+(1 row)
+
+SELECT '\'1 \\\'2\''::tsquery;
+ tsquery 
+---------
+ '1 \'2'
+(1 row)
+
+SELECT '!1'::tsquery;
+ tsquery 
+---------
+ !'1'
+(1 row)
+
+SELECT '1|2'::tsquery;
+  tsquery  
+-----------
+ '1' | '2'
+(1 row)
+
+SELECT '1|!2'::tsquery;
+  tsquery   
+------------
+ '1' | !'2'
+(1 row)
+
+SELECT '!1|2'::tsquery;
+  tsquery   
+------------
+ !'1' | '2'
+(1 row)
+
+SELECT '!1|!2'::tsquery;
+   tsquery   
+-------------
+ !'1' | !'2'
+(1 row)
+
+SELECT '!(!1|!2)'::tsquery;
+     tsquery      
+------------------
+ !( !'1' | !'2' )
+(1 row)
+
+SELECT '!(!1|2)'::tsquery;
+     tsquery     
+-----------------
+ !( !'1' | '2' )
+(1 row)
+
+SELECT '!(1|!2)'::tsquery;
+     tsquery     
+-----------------
+ !( '1' | !'2' )
+(1 row)
+
+SELECT '!(1|2)'::tsquery;
+    tsquery     
+----------------
+ !( '1' | '2' )
+(1 row)
+
+SELECT '1&2'::tsquery;
+  tsquery  
+-----------
+ '1' & '2'
+(1 row)
+
+SELECT '!1&2'::tsquery;
+  tsquery   
+------------
+ !'1' & '2'
+(1 row)
+
+SELECT '1&!2'::tsquery;
+  tsquery   
+------------
+ '1' & !'2'
+(1 row)
+
+SELECT '!1&!2'::tsquery;
+   tsquery   
+-------------
+ !'1' & !'2'
+(1 row)
+
+SELECT '(1&2)'::tsquery;
+  tsquery  
+-----------
+ '1' & '2'
+(1 row)
+
+SELECT '1&(2)'::tsquery;
+  tsquery  
+-----------
+ '1' & '2'
+(1 row)
+
+SELECT '!(1)&2'::tsquery;
+  tsquery   
+------------
+ !'1' & '2'
+(1 row)
+
+SELECT '!(1&2)'::tsquery;
+    tsquery     
+----------------
+ !( '1' & '2' )
+(1 row)
+
+SELECT '1|2&3'::tsquery;
+     tsquery     
+-----------------
+ '1' | '2' & '3'
+(1 row)
+
+SELECT '1|(2&3)'::tsquery;
+     tsquery     
+-----------------
+ '1' | '2' & '3'
+(1 row)
+
+SELECT '(1|2)&3'::tsquery;
+       tsquery       
+---------------------
+ ( '1' | '2' ) & '3'
+(1 row)
+
+SELECT '1|2&!3'::tsquery;
+     tsquery      
+------------------
+ '1' | '2' & !'3'
+(1 row)
+
+SELECT '1|!2&3'::tsquery;
+     tsquery      
+------------------
+ '1' | !'2' & '3'
+(1 row)
+
+SELECT '!1|2&3'::tsquery;
+     tsquery      
+------------------
+ !'1' | '2' & '3'
+(1 row)
+
+SELECT '!1|(2&3)'::tsquery;
+     tsquery      
+------------------
+ !'1' | '2' & '3'
+(1 row)
+
+SELECT '!(1|2)&3'::tsquery;
+       tsquery        
+----------------------
+ !( '1' | '2' ) & '3'
+(1 row)
+
+SELECT '(!1|2)&3'::tsquery;
+       tsquery        
+----------------------
+ ( !'1' | '2' ) & '3'
+(1 row)
+
+SELECT '1|(2|(4|(5|6)))'::tsquery;
+                 tsquery                 
+-----------------------------------------
+ '1' | ( '2' | ( '4' | ( '5' | '6' ) ) )
+(1 row)
+
+SELECT '1|2|4|5|6'::tsquery;
+                 tsquery                 
+-----------------------------------------
+ ( ( ( '1' | '2' ) | '4' ) | '5' ) | '6'
+(1 row)
+
+SELECT '1&(2&(4&(5&6)))'::tsquery;
+           tsquery           
+-----------------------------
+ '1' & '2' & '4' & '5' & '6'
+(1 row)
+
+SELECT '1&2&4&5&6'::tsquery;
+           tsquery           
+-----------------------------
+ '1' & '2' & '4' & '5' & '6'
+(1 row)
+
+SELECT '1&(2&(4&(5|6)))'::tsquery;
+             tsquery             
+---------------------------------
+ '1' & '2' & '4' & ( '5' | '6' )
+(1 row)
+
+SELECT '1&(2&(4&(5|!6)))'::tsquery;
+             tsquery              
+----------------------------------
+ '1' & '2' & '4' & ( '5' | !'6' )
+(1 row)
+
+SELECT '1&(\'2\'&(\' 4\'&(\\|5 | \'6 \\\' !|&\')))'::tsquery;
+                 tsquery                  
+------------------------------------------
+ '1' & '2' & ' 4' & ( '|5' | '6 \' !|&' )
+(1 row)
+
+SELECT '\'the wether\':dc & \' sKies \':BC & a:d b:a';
+                 ?column?                 
+------------------------------------------
+ 'the wether':dc & ' sKies ':BC & a:d b:a
+(1 row)
+
+select lexize('simple', 'ASD56 hsdkf');
+     lexize      
+-----------------
+ {"asd56 hsdkf"}
+(1 row)
+
+select lexize('en_stem', 'SKIES Problems identity');
+          lexize          
+--------------------------
+ {"skies problems ident"}
+(1 row)
+
+select * from token_type('default');
+ tokid |    alias     |               descr               
+-------+--------------+-----------------------------------
+     1 | lword        | Latin word
+     2 | nlword       | Non-latin word
+     3 | word         | Word
+     4 | email        | Email
+     5 | url          | URL
+     6 | host         | Host
+     7 | sfloat       | Scientific notation
+     8 | version      | VERSION
+     9 | part_hword   | Part of hyphenated word
+    10 | nlpart_hword | Non-latin part of hyphenated word
+    11 | lpart_hword  | Latin part of hyphenated word
+    12 | blank        | Space symbols
+    13 | tag          | HTML Tag
+    14 | http         | HTTP head
+    15 | hword        | Hyphenated word
+    16 | lhword       | Latin hyphenated word
+    17 | nlhword      | Non-latin hyphenated word
+    18 | uri          | URI
+    19 | file         | File or path name
+    20 | float        | Decimal notation
+    21 | int          | Signed integer
+    22 | uint         | Unsigned integer
+    23 | entity       | HTML Entity
+(23 rows)
+
+select * from parse('default', '345 [email protected] \' http://www.com/ http://aew.werc.ewr/?ad=qwe&dw 1aew.werc.ewr/?ad=qwe&dw 2aew.werc.ewr http://3aew.werc.ewr/?ad=qwe&dw http://4aew.werc.ewr http://5aew.werc.ewr:8100/?  ad=qwe&dw 6aew.werc.ewr:8100/?ad=qwe&dw 7aew.werc.ewr:8100/?ad=qwe&dw=%20%32 +4.0e-10 qwe qwe qwqwe 234.435 455 5.005 [email protected] qwe-wer asdf qwer jf sdjk ewr1> ewri2 ">
+/usr/local/fff /awdf/dwqe/4325 rewt/ewr wefjn /wqe-324/ewr gist.h gist.h.c gist.c. readline 4.2 4.2. 4.2, readline-4.2 readline-4.2. 234 
+ wow  < jqw <> qwerty');
+ tokid |                token                 
+-------+--------------------------------------
+    22 | 345
+    12 |  
+     4 | [email protected]
+    12 |  
+    12 | '
+    12 |  
+    14 | http://
+     6 | www.com
+    12 | /
+    12 |  
+    14 | http://
+     5 | aew.werc.ewr/?ad=qwe&dw
+     6 | aew.werc.ewr
+    18 | /?ad=qwe&dw
+    12 |  
+     5 | 1aew.werc.ewr/?ad=qwe&dw
+     6 | 1aew.werc.ewr
+    18 | /?ad=qwe&dw
+    12 |  
+     6 | 2aew.werc.ewr
+    12 |  
+    14 | http://
+     5 | 3aew.werc.ewr/?ad=qwe&dw
+     6 | 3aew.werc.ewr
+    18 | /?ad=qwe&dw
+    12 |  
+    14 | http://
+     6 | 4aew.werc.ewr
+    12 |  
+    14 | http://
+     5 | 5aew.werc.ewr:8100/?
+     6 | 5aew.werc.ewr
+    18 | :8100/?
+    12 |   
+     1 | ad
+    12 | =
+     1 | qwe
+    12 | &
+     1 | dw
+    12 |  
+     5 | 6aew.werc.ewr:8100/?ad=qwe&dw
+     6 | 6aew.werc.ewr
+    18 | :8100/?ad=qwe&dw
+    12 |  
+     5 | 7aew.werc.ewr:8100/?ad=qwe&dw=%20%32
+     6 | 7aew.werc.ewr
+    18 | :8100/?ad=qwe&dw=%20%32
+    12 |  
+     7 | +4.0e-10
+    12 |  
+     1 | qwe
+    12 |  
+     1 | qwe
+    12 |  
+     1 | qwqwe
+    12 |  
+    20 | 234.435
+    12 |  
+    22 | 455
+    12 |  
+    20 | 5.005
+    12 |  
+     4 | [email protected]
+    12 |  
+    16 | qwe-wer
+    11 | qwe
+    12 | -
+    11 | wer
+    12 |  
+     1 | asdf
+    12 |  
+    13 |  
+     1 | qwer
+    12 |  
+     1 | jf
+    12 |  
+     1 | sdjk
+    13 |  
+    12 |  
+     3 | ewr1
+    12 | >
+    12 |  
+     3 | ewri2
+    12 |  
+    13 |  
+    12 | 
+
+    19 | /usr/local/fff
+    12 |  
+    19 | /awdf/dwqe/4325
+    12 |  
+    19 | rewt/ewr
+    12 |  
+     1 | wefjn
+    12 |  
+    19 | /wqe-324/ewr
+    12 |  
+     6 | gist.h
+    12 |  
+     6 | gist.h.c
+    12 |  
+     6 | gist.c
+    12 | .
+    12 |  
+     1 | readline
+    12 |  
+    20 | 4.2
+    12 |  
+    20 | 4.2
+    12 | .
+    12 |  
+    20 | 4.2
+    12 | ,
+    12 |  
+    15 | readline-4
+    11 | readline
+    12 | -
+    20 | 4.2
+    12 |  
+    15 | readline-4
+    11 | readline
+    12 | -
+    20 | 4.2
+    12 | .
+    12 |  
+    22 | 234
+    12 |  
+
+    13 |  
+    12 |  
+     1 | wow
+    12 |   
+    12 | <
+    12 |  
+     1 | jqw
+    12 |  
+    12 | <
+    12 | >
+    12 |  
+     1 | qwerty
+(138 rows)
+
+SELECT to_tsvector('default', '345 [email protected] \' http://www.com/ http://aew.werc.ewr/?ad=qwe&dw 1aew.werc.ewr/?ad=qwe&dw 2aew.werc.ewr http://3aew.werc.ewr/?ad=qwe&dw http://4aew.werc.ewr http://5aew.werc.ewr:8100/?  ad=qwe&dw 6aew.werc.ewr:8100/?ad=qwe&dw 7aew.werc.ewr:8100/?ad=qwe&dw=%20%32 +4.0e-10 qwe qwe qwqwe 234.435 455 5.005 [email protected] qwe-wer asdf qwer jf sdjk ewr1> ewri2 ">
+/usr/local/fff /awdf/dwqe/4325 rewt/ewr wefjn /wqe-324/ewr gist.h gist.h.c gist.c. readline 4.2 4.2. 4.2, readline-4.2 readline-4.2. 234 
+ wow  < jqw <> qwerty');
+                                                                                                                                                                                                                                                                                                                                                                                                                                               to_tsvector                                                                                                                                                                                                                                                                                                                                                                                                                                                
+----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
+ 'ad':18 'dw':20 'jf':40 '234':62 '345':1 '4.2':53,54,55,58,61 '455':32 'jqw':64 'qwe':19,28,29,36 'wer':37 'wow':63 'asdf':38 'ewr1':42 'qwer':39 'sdjk':41 '5.005':33 'ewri2':43 'qwqwe':30 'wefjn':47 'gist.c':51 'gist.h':49 'qwerti':65 '234.435':31 ':8100/?':17 'qwe-wer':35 'readlin':52,57,60 'www.com':3 '+4.0e-10':27 'gist.h.c':50 'rewt/ewr':46 '[email protected]':2 'readline-4':56,59 '/?ad=qwe&dw':6,9,13 '/wqe-324/ewr':48 'aew.werc.ewr':5 '1aew.werc.ewr':8 '2aew.werc.ewr':10 '3aew.werc.ewr':12 '4aew.werc.ewr':14 '5aew.werc.ewr':16 '6aew.werc.ewr':22 '7aew.werc.ewr':25 '/usr/local/fff':44 '/awdf/dwqe/4325':45 ':8100/?ad=qwe&dw':23 '[email protected]':34 '5aew.werc.ewr:8100/?':15 ':8100/?ad=qwe&dw=%20%32':26 'aew.werc.ewr/?ad=qwe&dw':4 '1aew.werc.ewr/?ad=qwe&dw':7 '3aew.werc.ewr/?ad=qwe&dw':11 '6aew.werc.ewr:8100/?ad=qwe&dw':21 '7aew.werc.ewr:8100/?ad=qwe&dw=%20%32':24
+(1 row)
+
+SELECT length(to_tsvector('default', '345 qw'));
+ length 
+--------
+      2
+(1 row)
+
+SELECT length(to_tsvector('default', '345 [email protected] \' http://www.com/ http://aew.werc.ewr/?ad=qwe&dw 1aew.werc.ewr/?ad=qwe&dw 2aew.werc.ewr http://3aew.werc.ewr/?ad=qwe&dw http://4aew.werc.ewr http://5aew.werc.ewr:8100/?  ad=qwe&dw 6aew.werc.ewr:8100/?ad=qwe&dw 7aew.werc.ewr:8100/?ad=qwe&dw=%20%32 +4.0e-10 qwe qwe qwqwe 234.435 455 5.005 [email protected] qwe-wer asdf qwer jf sdjk ewr1> ewri2 ">
+/usr/local/fff /awdf/dwqe/4325 rewt/ewr wefjn /wqe-324/ewr gist.h gist.h.c gist.c. readline 4.2 4.2. 4.2, readline-4.2 readline-4.2. 234 
+ wow  < jqw <> qwerty'));
+ length 
+--------
+     53
+(1 row)
+
+select to_tsquery('default', 'qwe & sKies '); 
+  to_tsquery   
+---------------
+ 'qwe' & 'sky'
+(1 row)
+
+select to_tsquery('simple', 'qwe & sKies '); 
+   to_tsquery    
+-----------------
+ 'qwe' & 'skies'
+(1 row)
+
+select to_tsquery('default', '\'the wether\':dc & \'           sKies \':BC ');
+       to_tsquery       
+------------------------
+ 'wether':CD & 'sky':BC
+(1 row)
+
+select 'a b:89  ca:23A,64b d:34c'::tsvector @@ 'd:AC & ca';
+ ?column? 
+----------
+ t
+(1 row)
+
+select 'a b:89  ca:23A,64b d:34c'::tsvector @@ 'd:AC & ca:B';
+ ?column? 
+----------
+ t
+(1 row)
+
+select 'a b:89  ca:23A,64b d:34c'::tsvector @@ 'd:AC & ca:A';
+ ?column? 
+----------
+ t
+(1 row)
+
+select 'a b:89  ca:23A,64b d:34c'::tsvector @@ 'd:AC & ca:C';
+ ?column? 
+----------
+ f
+(1 row)
+
+select 'a b:89  ca:23A,64b d:34c'::tsvector @@ 'd:AC & ca:CB';
+ ?column? 
+----------
+ t
+(1 row)
+
+CREATE TABLE test_tsvector( t text, a tsvector );
+\copy test_tsvector from 'data/test_tsearch.data'
+SELECT count(*) FROM test_tsvector WHERE a @@ 'wr|qh';
+ count 
+-------
+   158
+(1 row)
+
+SELECT count(*) FROM test_tsvector WHERE a @@ 'wr&qh';
+ count 
+-------
+    17
+(1 row)
+
+SELECT count(*) FROM test_tsvector WHERE a @@ 'eq&yt';
+ count 
+-------
+     6
+(1 row)
+
+SELECT count(*) FROM test_tsvector WHERE a @@ 'eq|yt';
+ count 
+-------
+    98
+(1 row)
+
+SELECT count(*) FROM test_tsvector WHERE a @@ '(eq&yt)|(wr&qh)';
+ count 
+-------
+    23
+(1 row)
+
+SELECT count(*) FROM test_tsvector WHERE a @@ '(eq|yt)&(wr|qh)';
+ count 
+-------
+    39
+(1 row)
+
+create index wowidx on test_tsvector using gist (a);
+set enable_seqscan=off;
+SELECT count(*) FROM test_tsvector WHERE a @@ 'wr|qh';
+ count 
+-------
+   158
+(1 row)
+
+SELECT count(*) FROM test_tsvector WHERE a @@ 'wr&qh';
+ count 
+-------
+    17
+(1 row)
+
+SELECT count(*) FROM test_tsvector WHERE a @@ 'eq&yt';
+ count 
+-------
+     6
+(1 row)
+
+SELECT count(*) FROM test_tsvector WHERE a @@ 'eq|yt';
+ count 
+-------
+    98
+(1 row)
+
+SELECT count(*) FROM test_tsvector WHERE a @@ '(eq&yt)|(wr&qh)';
+ count 
+-------
+    23
+(1 row)
+
+SELECT count(*) FROM test_tsvector WHERE a @@ '(eq|yt)&(wr|qh)';
+ count 
+-------
+    39
+(1 row)
+
+select set_curcfg('default');
+ set_curcfg 
+------------
+ 
+(1 row)
+
+CREATE TRIGGER tsvectorupdate
+BEFORE UPDATE OR INSERT ON test_tsvector
+FOR EACH ROW EXECUTE PROCEDURE tsearch2(a, t);
+SELECT count(*) FROM test_tsvector WHERE a @@ to_tsquery('345&qwerty');
+ count 
+-------
+     0
+(1 row)
+
+INSERT INTO test_tsvector (t) VALUES ('345 qwerty');
+SELECT count(*) FROM test_tsvector WHERE a @@ to_tsquery('345&qwerty');
+ count 
+-------
+     1
+(1 row)
+
+UPDATE test_tsvector SET t = null WHERE t = '345 qwerty';
+SELECT count(*) FROM test_tsvector WHERE a @@ to_tsquery('345&qwerty');
+ count 
+-------
+     0
+(1 row)
+
+drop trigger tsvectorupdate on test_tsvector;
+create function wow(text) returns text as 'select $1 || \' copyright\'; ' language sql;
+create trigger tsvectorupdate before update or insert on test_tsvector
+for each row execute procedure tsearch2(a, wow, t);
+insert into test_tsvector (t) values ('345 qwerty');
+select count(*) FROM test_tsvector WHERE a @@ to_tsquery('345&qwerty');
+ count 
+-------
+     1
+(1 row)
+
+select count(*) FROM test_tsvector WHERE a @@ to_tsquery('copyright');
+ count 
+-------
+     1
+(1 row)
+
+select rank(' a:1 s:2C d g'::tsvector, 'a | s');
+ rank 
+------
+ 0.28
+(1 row)
+
+select rank(' a:1 s:2B d g'::tsvector, 'a | s');
+ rank 
+------
+ 0.46
+(1 row)
+
+select rank(' a:1 s:2 d g'::tsvector, 'a | s');
+ rank 
+------
+ 0.19
+(1 row)
+
+select rank(' a:1 s:2C d g'::tsvector, 'a & s');
+   rank   
+----------
+ 0.140153
+(1 row)
+
+select rank(' a:1 s:2B d g'::tsvector, 'a & s');
+   rank   
+----------
+ 0.198206
+(1 row)
+
+select rank(' a:1 s:2 d g'::tsvector, 'a & s');
+   rank    
+-----------
+ 0.0991032
+(1 row)
+
+insert into test_tsvector (t) values ('foo bar foo the over foo qq bar');
+select * from stat('select a from test_tsvector') order by ndoc desc, nentry desc, word;
+   word    | ndoc | nentry 
+-----------+------+--------
+ qq        |  109 |    109
+ qt        |  102 |    102
+ qe        |  100 |    100
+ qh        |   98 |     98
+ qw        |   98 |     98
+ qa        |   97 |     97
+ ql        |   94 |     94
+ qs        |   94 |     94
+ qi        |   92 |     92
+ qr        |   92 |     92
+ qj        |   91 |     91
+ qd        |   87 |     87
+ qz        |   87 |     87
+ qc        |   86 |     86
+ qn        |   86 |     86
+ qv        |   85 |     85
+ qo        |   84 |     84
+ qy        |   84 |     84
+ wp        |   84 |     84
+ qf        |   81 |     81
+ qk        |   80 |     80
+ wt        |   80 |     80
+ qu        |   79 |     79
+ qg        |   78 |     78
+ wb        |   78 |     78
+ qx        |   77 |     77
+ wr        |   77 |     77
+ ws        |   73 |     73
+ wy        |   73 |     73
+ wa        |   72 |     72
+ wf        |   70 |     70
+ wg        |   70 |     70
+ wi        |   70 |     70
+ wu        |   70 |     70
+ wc        |   69 |     69
+ wj        |   69 |     69
+ qp        |   68 |     68
+ wh        |   68 |     68
+ wv        |   68 |     68
+ qb        |   66 |     66
+ eu        |   65 |     65
+ we        |   65 |     65
+ wl        |   65 |     65
+ wq        |   65 |     65
+ wk        |   64 |     64
+ ee        |   63 |     63
+ eo        |   63 |     63
+ qm        |   63 |     63
+ wn        |   63 |     63
+ ef        |   62 |     62
+ eh        |   62 |     62
+ ex        |   62 |     62
+ re        |   62 |     62
+ rl        |   62 |     62
+ rr        |   62 |     62
+ eb        |   61 |     61
+ ek        |   61 |     61
+ ww        |   61 |     61
+ ea        |   60 |     60
+ ei        |   60 |     60
+ em        |   60 |     60
+ eq        |   60 |     60
+ ew        |   60 |     60
+ ro        |   60 |     60
+ rw        |   60 |     60
+ tl        |   60 |     60
+ eg        |   59 |     59
+ en        |   59 |     59
+ ez        |   59 |     59
+ rj        |   59 |     59
+ ry        |   59 |     59
+ tw        |   59 |     59
+ tx        |   59 |     59
+ ej        |   58 |     58
+ es        |   58 |     58
+ ra        |   58 |     58
+ rd        |   58 |     58
+ rg        |   58 |     58
+ rx        |   58 |     58
+ tb        |   58 |     58
+ wd        |   58 |     58
+ ed        |   57 |     57
+ tc        |   57 |     57
+ wx        |   57 |     57
+ er        |   56 |     56
+ wm        |   56 |     56
+ wo        |   56 |     56
+ yw        |   56 |     56
+ ep        |   55 |     55
+ rk        |   55 |     55
+ rp        |   55 |     55
+ rz        |   55 |     55
+ ta        |   55 |     55
+ rq        |   54 |     54
+ yn        |   54 |     54
+ ec        |   53 |     53
+ el        |   53 |     53
+ ru        |   53 |     53
+ rv        |   53 |     53
+ tz        |   53 |     53
+ un        |   53 |     53
+ wz        |   53 |     53
+ ys        |   53 |     53
+ oe        |   52 |     52
+ tn        |   52 |     52
+ tq        |   52 |     52
+ ty        |   52 |     52
+ uq        |   52 |     52
+ yg        |   52 |     52
+ ym        |   52 |     52
+ oi        |   51 |     51
+ to        |   51 |     51
+ yi        |   51 |     51
+ pn        |   50 |     50
+ rb        |   50 |     50
+ ri        |   50 |     50
+ rn        |   50 |     50
+ ti        |   50 |     50
+ tv        |   50 |     50
+ um        |   50 |     50
+ ut        |   50 |     50
+ ya        |   50 |     50
+ et        |   49 |     49
+ ix        |   49 |     49
+ ox        |   49 |     49
+ q3        |   49 |     49
+ yf        |   49 |     49
+ yl        |   49 |     49
+ yo        |   49 |     49
+ yr        |   49 |     49
+ ev        |   48 |     48
+ ey        |   48 |     48
+ ot        |   48 |     48
+ rc        |   48 |     48
+ rm        |   48 |     48
+ th        |   48 |     48
+ uo        |   48 |     48
+ ia        |   47 |     47
+ q1        |   47 |     47
+ rh        |   47 |     47
+ yq        |   47 |     47
+ yz        |   47 |     47
+ av        |   46 |     46
+ im        |   46 |     46
+ os        |   46 |     46
+ tk        |   46 |     46
+ yy        |   46 |     46
+ ir        |   45 |     45
+ iv        |   45 |     45
+ iw        |   45 |     45
+ oj        |   45 |     45
+ pl        |   45 |     45
+ pv        |   45 |     45
+ te        |   45 |     45
+ tu        |   45 |     45
+ uv        |   45 |     45
+ ux        |   45 |     45
+ yd        |   45 |     45
+ yx        |   45 |     45
+ ij        |   44 |     44
+ pa        |   44 |     44
+ se        |   44 |     44
+ tg        |   44 |     44
+ ue        |   44 |     44
+ yb        |   44 |     44
+ yt        |   44 |     44
+ if        |   43 |     43
+ ik        |   43 |     43
+ in        |   43 |     43
+ ph        |   43 |     43
+ pj        |   43 |     43
+ q5        |   43 |     43
+ rt        |   43 |     43
+ ub        |   43 |     43
+ ud        |   43 |     43
+ uh        |   43 |     43
+ uj        |   43 |     43
+ w7        |   43 |     43
+ ye        |   43 |     43
+ yv        |   43 |     43
+ db        |   42 |     42
+ do        |   42 |     42
+ id        |   42 |     42
+ ie        |   42 |     42
+ ii        |   42 |     42
+ of        |   42 |     42
+ pr        |   42 |     42
+ q4        |   42 |     42
+ rf        |   42 |     42
+ td        |   42 |     42
+ uk        |   42 |     42
+ up        |   42 |     42
+ yh        |   42 |     42
+ yk        |   42 |     42
+ io        |   41 |     41
+ it        |   41 |     41
+ pb        |   41 |     41
+ q0        |   41 |     41
+ q7        |   41 |     41
+ rs        |   41 |     41
+ tj        |   41 |     41
+ ur        |   41 |     41
+ ig        |   40 |     40
+ iu        |   40 |     40
+ iy        |   40 |     40
+ od        |   40 |     40
+ q6        |   40 |     40
+ tt        |   40 |     40
+ ug        |   40 |     40
+ ul        |   40 |     40
+ us        |   40 |     40
+ uu        |   40 |     40
+ uz        |   40 |     40
+ ah        |   39 |     39
+ ar        |   39 |     39
+ as        |   39 |     39
+ dl        |   39 |     39
+ dt        |   39 |     39
+ hk        |   39 |     39
+ iq        |   39 |     39
+ is        |   39 |     39
+ oc        |   39 |     39
+ ov        |   39 |     39
+ oy        |   39 |     39
+ uf        |   39 |     39
+ ui        |   39 |     39
+ aa        |   38 |     38
+ ad        |   38 |     38
+ fh        |   38 |     38
+ gm        |   38 |     38
+ ic        |   38 |     38
+ jd        |   38 |     38
+ om        |   38 |     38
+ or        |   38 |     38
+ oz        |   38 |     38
+ pm        |   38 |     38
+ q8        |   38 |     38
+ sf        |   38 |     38
+ sm        |   38 |     38
+ sv        |   38 |     38
+ uc        |   38 |     38
+ ak        |   37 |     37
+ aq        |   37 |     37
+ di        |   37 |     37
+ e4        |   37 |     37
+ fi        |   37 |     37
+ fx        |   37 |     37
+ ha        |   37 |     37
+ hp        |   37 |     37
+ ih        |   37 |     37
+ og        |   37 |     37
+ po        |   37 |     37
+ pw        |   37 |     37
+ sn        |   37 |     37
+ su        |   37 |     37
+ sw        |   37 |     37
+ w6        |   37 |     37
+ yj        |   37 |     37
+ yu        |   37 |     37
+ ag        |   36 |     36
+ am        |   36 |     36
+ at        |   36 |     36
+ e1        |   36 |     36
+ ff        |   36 |     36
+ gx        |   36 |     36
+ he        |   36 |     36
+ hj        |   36 |     36
+ ib        |   36 |     36
+ iz        |   36 |     36
+ lm        |   36 |     36
+ ok        |   36 |     36
+ pk        |   36 |     36
+ pp        |   36 |     36
+ pu        |   36 |     36
+ sp        |   36 |     36
+ tf        |   36 |     36
+ tm        |   36 |     36
+ ay        |   35 |     35
+ dy        |   35 |     35
+ fu        |   35 |     35
+ ku        |   35 |     35
+ lh        |   35 |     35
+ lq        |   35 |     35
+ o6        |   35 |     35
+ ob        |   35 |     35
+ on        |   35 |     35
+ op        |   35 |     35
+ pd        |   35 |     35
+ ps        |   35 |     35
+ si        |   35 |     35
+ sl        |   35 |     35
+ sx        |   35 |     35
+ tp        |   35 |     35
+ tr        |   35 |     35
+ w3        |   35 |     35
+ y1        |   35 |     35
+ al        |   34 |     34
+ ap        |   34 |     34
+ az        |   34 |     34
+ dc        |   34 |     34
+ dd        |   34 |     34
+ dz        |   34 |     34
+ e0        |   34 |     34
+ fj        |   34 |     34
+ fp        |   34 |     34
+ gd        |   34 |     34
+ gg        |   34 |     34
+ gk        |   34 |     34
+ go        |   34 |     34
+ ho        |   34 |     34
+ jc        |   34 |     34
+ oa        |   34 |     34
+ oh        |   34 |     34
+ oo        |   34 |     34
+ pe        |   34 |     34
+ px        |   34 |     34
+ sd        |   34 |     34
+ sq        |   34 |     34
+ sy        |   34 |     34
+ ab        |   33 |     33
+ ae        |   33 |     33
+ af        |   33 |     33
+ aw        |   33 |     33
+ e5        |   33 |     33
+ fk        |   33 |     33
+ gu        |   33 |     33
+ gy        |   33 |     33
+ hb        |   33 |     33
+ hm        |   33 |     33
+ hy        |   33 |     33
+ jl        |   33 |     33
+ jr        |   33 |     33
+ ls        |   33 |     33
+ oq        |   33 |     33
+ pt        |   33 |     33
+ sa        |   33 |     33
+ sh        |   33 |     33
+ sj        |   33 |     33
+ so        |   33 |     33
+ sz        |   33 |     33
+ t7        |   33 |     33
+ uw        |   33 |     33
+ w8        |   33 |     33
+ y0        |   33 |     33
+ yp        |   33 |     33
+ dh        |   32 |     32
+ dp        |   32 |     32
+ dq        |   32 |     32
+ e7        |   32 |     32
+ fn        |   32 |     32
+ fo        |   32 |     32
+ fr        |   32 |     32
+ ga        |   32 |     32
+ gq        |   32 |     32
+ hh        |   32 |     32
+ il        |   32 |     32
+ ip        |   32 |     32
+ jv        |   32 |     32
+ lc        |   32 |     32
+ ol        |   32 |     32
+ pc        |   32 |     32
+ q9        |   32 |     32
+ ds        |   31 |     31
+ e9        |   31 |     31
+ fd        |   31 |     31
+ fe        |   31 |     31
+ ft        |   31 |     31
+ gs        |   31 |     31
+ hl        |   31 |     31
+ hs        |   31 |     31
+ jb        |   31 |     31
+ kc        |   31 |     31
+ kw        |   31 |     31
+ mj        |   31 |     31
+ q2        |   31 |     31
+ r3        |   31 |     31
+ sb        |   31 |     31
+ sk        |   31 |     31
+ ts        |   31 |     31
+ ua        |   31 |     31
+ yc        |   31 |     31
+ zw        |   31 |     31
+ ao        |   30 |     30
+ du        |   30 |     30
+ fw        |   30 |     30
+ gj        |   30 |     30
+ hu        |   30 |     30
+ kh        |   30 |     30
+ kl        |   30 |     30
+ kv        |   30 |     30
+ ld        |   30 |     30
+ lf        |   30 |     30
+ pq        |   30 |     30
+ py        |   30 |     30
+ sc        |   30 |     30
+ sr        |   30 |     30
+ uy        |   30 |     30
+ vg        |   30 |     30
+ w2        |   30 |     30
+ xg        |   30 |     30
+ xo        |   30 |     30
+ au        |   29 |     29
+ cx        |   29 |     29
+ fv        |   29 |     29
+ gh        |   29 |     29
+ gl        |   29 |     29
+ gt        |   29 |     29
+ hw        |   29 |     29
+ ji        |   29 |     29
+ km        |   29 |     29
+ la        |   29 |     29
+ ou        |   29 |     29
+ r0        |   29 |     29
+ w0        |   29 |     29
+ y9        |   29 |     29
+ zm        |   29 |     29
+ zs        |   29 |     29
+ zy        |   29 |     29
+ ax        |   28 |     28
+ cd        |   28 |     28
+ dj        |   28 |     28
+ dn        |   28 |     28
+ dr        |   28 |     28
+ ht        |   28 |     28
+ jf        |   28 |     28
+ lo        |   28 |     28
+ lr        |   28 |     28
+ na        |   28 |     28
+ ng        |   28 |     28
+ r8        |   28 |     28
+ ss        |   28 |     28
+ xt        |   28 |     28
+ y6        |   28 |     28
+ aj        |   27 |     27
+ ca        |   27 |     27
+ cg        |   27 |     27
+ df        |   27 |     27
+ dg        |   27 |     27
+ dv        |   27 |     27
+ gc        |   27 |     27
+ gn        |   27 |     27
+ gr        |   27 |     27
+ hd        |   27 |     27
+ i8        |   27 |     27
+ jn        |   27 |     27
+ jt        |   27 |     27
+ lp        |   27 |     27
+ o9        |   27 |     27
+ ow        |   27 |     27
+ r9        |   27 |     27
+ t8        |   27 |     27
+ u5        |   27 |     27
+ w4        |   27 |     27
+ xm        |   27 |     27
+ zz        |   27 |     27
+ a2        |   26 |     26
+ ac        |   26 |     26
+ ai        |   26 |     26
+ cm        |   26 |     26
+ cu        |   26 |     26
+ cw        |   26 |     26
+ dk        |   26 |     26
+ e2        |   26 |     26
+ fc        |   26 |     26
+ fg        |   26 |     26
+ fl        |   26 |     26
+ fs        |   26 |     26
+ ge        |   26 |     26
+ gv        |   26 |     26
+ hc        |   26 |     26
+ hi        |   26 |     26
+ hx        |   26 |     26
+ jj        |   26 |     26
+ jm        |   26 |     26
+ kg        |   26 |     26
+ kk        |   26 |     26
+ kn        |   26 |     26
+ ko        |   26 |     26
+ kt        |   26 |     26
+ ln        |   26 |     26
+ mx        |   26 |     26
+ pg        |   26 |     26
+ r4        |   26 |     26
+ t6        |   26 |     26
+ u1        |   26 |     26
+ u4        |   26 |     26
+ vi        |   26 |     26
+ vr        |   26 |     26
+ w1        |   26 |     26
+ w9        |   26 |     26
+ xk        |   26 |     26
+ xs        |   26 |     26
+ zf        |   26 |     26
+ bb        |   25 |     25
+ dm        |   25 |     25
+ dw        |   25 |     25
+ e8        |   25 |     25
+ fb        |   25 |     25
+ gw        |   25 |     25
+ h8        |   25 |     25
+ hf        |   25 |     25
+ hg        |   25 |     25
+ hn        |   25 |     25
+ hv        |   25 |     25
+ i0        |   25 |     25
+ i3        |   25 |     25
+ jg        |   25 |     25
+ jo        |   25 |     25
+ jx        |   25 |     25
+ kq        |   25 |     25
+ lw        |   25 |     25
+ lx        |   25 |     25
+ o3        |   25 |     25
+ p7        |   25 |     25
+ pf        |   25 |     25
+ pi        |   25 |     25
+ pz        |   25 |     25
+ r2        |   25 |     25
+ r5        |   25 |     25
+ t9        |   25 |     25
+ u7        |   25 |     25
+ ve        |   25 |     25
+ vu        |   25 |     25
+ y5        |   25 |     25
+ y8        |   25 |     25
+ zt        |   25 |     25
+ an        |   24 |     24
+ bj        |   24 |     24
+ dx        |   24 |     24
+ fm        |   24 |     24
+ fz        |   24 |     24
+ gb        |   24 |     24
+ gi        |   24 |     24
+ gp        |   24 |     24
+ hr        |   24 |     24
+ hz        |   24 |     24
+ i5        |   24 |     24
+ jq        |   24 |     24
+ kb        |   24 |     24
+ ke        |   24 |     24
+ kf        |   24 |     24
+ kp        |   24 |     24
+ lv        |   24 |     24
+ lz        |   24 |     24
+ o8        |   24 |     24
+ r1        |   24 |     24
+ s7        |   24 |     24
+ sg        |   24 |     24
+ u3        |   24 |     24
+ vj        |   24 |     24
+ vt        |   24 |     24
+ w5        |   24 |     24
+ zj        |   24 |     24
+ be        |   23 |     23
+ bi        |   23 |     23
+ bn        |   23 |     23
+ cn        |   23 |     23
+ cy        |   23 |     23
+ da        |   23 |     23
+ e6        |   23 |     23
+ fa        |   23 |     23
+ js        |   23 |     23
+ ki        |   23 |     23
+ kz        |   23 |     23
+ li        |   23 |     23
+ mt        |   23 |     23
+ mz        |   23 |     23
+ nu        |   23 |     23
+ o2        |   23 |     23
+ p5        |   23 |     23
+ p8        |   23 |     23
+ r7        |   23 |     23
+ t0        |   23 |     23
+ t1        |   23 |     23
+ t3        |   23 |     23
+ vm        |   23 |     23
+ xh        |   23 |     23
+ xx        |   23 |     23
+ zp        |   23 |     23
+ zr        |   23 |     23
+ a3        |   22 |     22
+ bg        |   22 |     22
+ de        |   22 |     22
+ e3        |   22 |     22
+ fq        |   22 |     22
+ i2        |   22 |     22
+ i7        |   22 |     22
+ ja        |   22 |     22
+ jk        |   22 |     22
+ jy        |   22 |     22
+ kr        |   22 |     22
+ kx        |   22 |     22
+ ly        |   22 |     22
+ nb        |   22 |     22
+ nh        |   22 |     22
+ ns        |   22 |     22
+ s3        |   22 |     22
+ u2        |   22 |     22
+ vn        |   22 |     22
+ xe        |   22 |     22
+ y4        |   22 |     22
+ zh        |   22 |     22
+ zo        |   22 |     22
+ zq        |   22 |     22
+ a1        |   21 |     21
+ bl        |   21 |     21
+ bo        |   21 |     21
+ cb        |   21 |     21
+ ch        |   21 |     21
+ co        |   21 |     21
+ cq        |   21 |     21
+ cv        |   21 |     21
+ d7        |   21 |     21
+ g8        |   21 |     21
+ je        |   21 |     21
+ jp        |   21 |     21
+ jz        |   21 |     21
+ lg        |   21 |     21
+ me        |   21 |     21
+ nc        |   21 |     21
+ p4        |   21 |     21
+ st        |   21 |     21
+ vb        |   21 |     21
+ vw        |   21 |     21
+ vz        |   21 |     21
+ xj        |   21 |     21
+ xq        |   21 |     21
+ xu        |   21 |     21
+ xy        |   21 |     21
+ zb        |   21 |     21
+ bv        |   20 |     20
+ bz        |   20 |     20
+ cj        |   20 |     20
+ cp        |   20 |     20
+ cs        |   20 |     20
+ d8        |   20 |     20
+ ju        |   20 |     20
+ k0        |   20 |     20
+ ks        |   20 |     20
+ ky        |   20 |     20
+ l1        |   20 |     20
+ lb        |   20 |     20
+ lj        |   20 |     20
+ lu        |   20 |     20
+ nm        |   20 |     20
+ nw        |   20 |     20
+ nz        |   20 |     20
+ o7        |   20 |     20
+ p6        |   20 |     20
+ vh        |   20 |     20
+ vp        |   20 |     20
+ vs        |   20 |     20
+ xb        |   20 |     20
+ xr        |   20 |     20
+ z3        |   20 |     20
+ zv        |   20 |     20
+ bq        |   19 |     19
+ br        |   19 |     19
+ by        |   19 |     19
+ cl        |   19 |     19
+ d2        |   19 |     19
+ f1        |   19 |     19
+ f4        |   19 |     19
+ gf        |   19 |     19
+ hq        |   19 |     19
+ k9        |   19 |     19
+ ka        |   19 |     19
+ kd        |   19 |     19
+ kj        |   19 |     19
+ md        |   19 |     19
+ mi        |   19 |     19
+ ml        |   19 |     19
+ my        |   19 |     19
+ nj        |   19 |     19
+ ny        |   19 |     19
+ o1        |   19 |     19
+ s4        |   19 |     19
+ s8        |   19 |     19
+ t5        |   19 |     19
+ u0        |   19 |     19
+ xl        |   19 |     19
+ zg        |   19 |     19
+ zi        |   19 |     19
+ a5        |   18 |     18
+ b9        |   18 |     18
+ bh        |   18 |     18
+ bx        |   18 |     18
+ d3        |   18 |     18
+ fy        |   18 |     18
+ g2        |   18 |     18
+ i4        |   18 |     18
+ i6        |   18 |     18
+ i9        |   18 |     18
+ jw        |   18 |     18
+ lk        |   18 |     18
+ mb        |   18 |     18
+ mv        |   18 |     18
+ nd        |   18 |     18
+ nr        |   18 |     18
+ nt        |   18 |     18
+ t2        |   18 |     18
+ xf        |   18 |     18
+ xv        |   18 |     18
+ zc        |   18 |     18
+ zd        |   18 |     18
+ a7        |   17 |     17
+ bc        |   17 |     17
+ bd        |   17 |     17
+ ce        |   17 |     17
+ cf        |   17 |     17
+ cr        |   17 |     17
+ g9        |   17 |     17
+ j0        |   17 |     17
+ j5        |   17 |     17
+ mp        |   17 |     17
+ mr        |   17 |     17
+ mw        |   17 |     17
+ nk        |   17 |     17
+ no        |   17 |     17
+ o0        |   17 |     17
+ o4        |   17 |     17
+ s0        |   17 |     17
+ s1        |   17 |     17
+ t4        |   17 |     17
+ u9        |   17 |     17
+ vf        |   17 |     17
+ vx        |   17 |     17
+ x3        |   17 |     17
+ xi        |   17 |     17
+ xn        |   17 |     17
+ xz        |   17 |     17
+ zl        |   17 |     17
+ zn        |   17 |     17
+ a0        |   16 |     16
+ bu        |   16 |     16
+ bw        |   16 |     16
+ ci        |   16 |     16
+ ck        |   16 |     16
+ d0        |   16 |     16
+ d4        |   16 |     16
+ d6        |   16 |     16
+ f5        |   16 |     16
+ g1        |   16 |     16
+ gz        |   16 |     16
+ h4        |   16 |     16
+ jh        |   16 |     16
+ l4        |   16 |     16
+ lt        |   16 |     16
+ mg        |   16 |     16
+ mh        |   16 |     16
+ mo        |   16 |     16
+ ni        |   16 |     16
+ nl        |   16 |     16
+ nq        |   16 |     16
+ p2        |   16 |     16
+ u8        |   16 |     16
+ v9        |   16 |     16
+ vl        |   16 |     16
+ vo        |   16 |     16
+ xp        |   16 |     16
+ y3        |   16 |     16
+ y7        |   16 |     16
+ z7        |   16 |     16
+ za        |   16 |     16
+ zx        |   16 |     16
+ bf        |   15 |     15
+ bp        |   15 |     15
+ cc        |   15 |     15
+ g0        |   15 |     15
+ j2        |   15 |     15
+ j9        |   15 |     15
+ l6        |   15 |     15
+ le        |   15 |     15
+ ll        |   15 |     15
+ m8        |   15 |     15
+ ma        |   15 |     15
+ mu        |   15 |     15
+ nf        |   15 |     15
+ r6        |   15 |     15
+ s5        |   15 |     15
+ vd        |   15 |     15
+ vk        |   15 |     15
+ xa        |   15 |     15
+ xw        |   15 |     15
+ y2        |   15 |     15
+ z8        |   15 |     15
+ ze        |   15 |     15
+ zu        |   15 |     15
+ a6        |   14 |     14
+ bk        |   14 |     14
+ bt        |   14 |     14
+ c0        |   14 |     14
+ f8        |   14 |     14
+ g3        |   14 |     14
+ g4        |   14 |     14
+ g7        |   14 |     14
+ h6        |   14 |     14
+ h7        |   14 |     14
+ h9        |   14 |     14
+ i1        |   14 |     14
+ k1        |   14 |     14
+ k2        |   14 |     14
+ k6        |   14 |     14
+ k7        |   14 |     14
+ mc        |   14 |     14
+ nn        |   14 |     14
+ p9        |   14 |     14
+ u6        |   14 |     14
+ xd        |   14 |     14
+ z6        |   14 |     14
+ zk        |   14 |     14
+ a4        |   13 |     13
+ a9        |   13 |     13
+ bm        |   13 |     13
+ cz        |   13 |     13
+ f2        |   13 |     13
+ f3        |   13 |     13
+ f6        |   13 |     13
+ g6        |   13 |     13
+ h2        |   13 |     13
+ j1        |   13 |     13
+ k5        |   13 |     13
+ m1        |   13 |     13
+ mf        |   13 |     13
+ mq        |   13 |     13
+ np        |   13 |     13
+ nx        |   13 |     13
+ o5        |   13 |     13
+ p0        |   13 |     13
+ p1        |   13 |     13
+ s6        |   13 |     13
+ s9        |   13 |     13
+ v6        |   13 |     13
+ va        |   13 |     13
+ vc        |   13 |     13
+ xc        |   13 |     13
+ z0        |   13 |     13
+ c9        |   12 |     12
+ d1        |   12 |     12
+ h0        |   12 |     12
+ h1        |   12 |     12
+ j8        |   12 |     12
+ k4        |   12 |     12
+ l5        |   12 |     12
+ l9        |   12 |     12
+ m2        |   12 |     12
+ m6        |   12 |     12
+ m9        |   12 |     12
+ n7        |   12 |     12
+ nv        |   12 |     12
+ p3        |   12 |     12
+ vq        |   12 |     12
+ vy        |   12 |     12
+ x1        |   12 |     12
+ x2        |   12 |     12
+ z5        |   12 |     12
+ c1        |   11 |     11
+ c3        |   11 |     11
+ ct        |   11 |     11
+ f9        |   11 |     11
+ g5        |   11 |     11
+ j6        |   11 |     11
+ l8        |   11 |     11
+ n1        |   11 |     11
+ v7        |   11 |     11
+ vv        |   11 |     11
+ x5        |   11 |     11
+ x8        |   11 |     11
+ z2        |   11 |     11
+ b0        |   10 |     10
+ b2        |   10 |     10
+ b8        |   10 |     10
+ c6        |   10 |     10
+ f0        |   10 |     10
+ f7        |   10 |     10
+ h5        |   10 |     10
+ j3        |   10 |     10
+ j4        |   10 |     10
+ j7        |   10 |     10
+ l7        |   10 |     10
+ m0        |   10 |     10
+ m7        |   10 |     10
+ mm        |   10 |     10
+ mn        |   10 |     10
+ n8        |   10 |     10
+ v1        |   10 |     10
+ x0        |   10 |     10
+ x6        |   10 |     10
+ x7        |   10 |     10
+ x9        |   10 |     10
+ a8        |    9 |      9
+ b1        |    9 |      9
+ b4        |    9 |      9
+ b5        |    9 |      9
+ b6        |    9 |      9
+ ba        |    9 |      9
+ bs        |    9 |      9
+ c5        |    9 |      9
+ d5        |    9 |      9
+ k8        |    9 |      9
+ l0        |    9 |      9
+ m5        |    9 |      9
+ mk        |    9 |      9
+ ms        |    9 |      9
+ n3        |    9 |      9
+ n4        |    9 |      9
+ n6        |    9 |      9
+ ne        |    9 |      9
+ v0        |    9 |      9
+ v3        |    9 |      9
+ v5        |    9 |      9
+ v8        |    9 |      9
+ b3        |    8 |      8
+ b7        |    8 |      8
+ c2        |    8 |      8
+ c7        |    8 |      8
+ c8        |    8 |      8
+ d9        |    8 |      8
+ k3        |    8 |      8
+ l3        |    8 |      8
+ m3        |    8 |      8
+ m4        |    8 |      8
+ n0        |    8 |      8
+ n5        |    8 |      8
+ v4        |    8 |      8
+ x4        |    8 |      8
+ z1        |    8 |      8
+ z9        |    8 |      8
+ l2        |    7 |      7
+ s2        |    7 |      7
+ z4        |    7 |      7
+ 1l        |    6 |      6
+ 1o        |    6 |      6
+ 1t        |    6 |      6
+ 2e        |    6 |      6
+ 2o        |    6 |      6
+ c4        |    6 |      6
+ h3        |    6 |      6
+ n2        |    6 |      6
+ n9        |    6 |      6
+ v2        |    6 |      6
+ 2l        |    5 |      5
+ 2u        |    5 |      5
+ 3k        |    5 |      5
+ 4p        |    5 |      5
+ 18        |    4 |      4
+ 1a        |    4 |      4
+ 1i        |    4 |      4
+ 2s        |    4 |      4
+ 3q        |    4 |      4
+ 3y        |    4 |      4
+ 5y        |    4 |      4
+ 1f        |    3 |      3
+ 1h        |    3 |      3
+ 1m        |    3 |      3
+ 1p        |    3 |      3
+ 1s        |    3 |      3
+ 1v        |    3 |      3
+ 1x        |    3 |      3
+ 27        |    3 |      3
+ 2a        |    3 |      3
+ 2b        |    3 |      3
+ 2h        |    3 |      3
+ 2n        |    3 |      3
+ 2p        |    3 |      3
+ 2v        |    3 |      3
+ 2y        |    3 |      3
+ 3d        |    3 |      3
+ 3w        |    3 |      3
+ 3z        |    3 |      3
+ 4a        |    3 |      3
+ 4d        |    3 |      3
+ 4v        |    3 |      3
+ 4z        |    3 |      3
+ 5e        |    3 |      3
+ 5i        |    3 |      3
+ 5k        |    3 |      3
+ 5o        |    3 |      3
+ 5t        |    3 |      3
+ 6b        |    3 |      3
+ 6d        |    3 |      3
+ 6o        |    3 |      3
+ 6w        |    3 |      3
+ 7a        |    3 |      3
+ 7h        |    3 |      3
+ 7r        |    3 |      3
+ 93        |    3 |      3
+ 10        |    2 |      2
+ 12        |    2 |      2
+ 15        |    2 |      2
+ 16        |    2 |      2
+ 19        |    2 |      2
+ 1b        |    2 |      2
+ 1d        |    2 |      2
+ 1g        |    2 |      2
+ 1j        |    2 |      2
+ 1n        |    2 |      2
+ 1r        |    2 |      2
+ 1u        |    2 |      2
+ 1w        |    2 |      2
+ 1y        |    2 |      2
+ 20        |    2 |      2
+ 25        |    2 |      2
+ 2d        |    2 |      2
+ 2i        |    2 |      2
+ 2j        |    2 |      2
+ 2k        |    2 |      2
+ 2q        |    2 |      2
+ 2r        |    2 |      2
+ 2t        |    2 |      2
+ 2w        |    2 |      2
+ 2z        |    2 |      2
+ 3b        |    2 |      2
+ 3f        |    2 |      2
+ 3h        |    2 |      2
+ 3o        |    2 |      2
+ 3p        |    2 |      2
+ 3r        |    2 |      2
+ 3s        |    2 |      2
+ 3v        |    2 |      2
+ 42        |    2 |      2
+ 43        |    2 |      2
+ 4f        |    2 |      2
+ 4g        |    2 |      2
+ 4h        |    2 |      2
+ 4j        |    2 |      2
+ 4m        |    2 |      2
+ 4r        |    2 |      2
+ 4s        |    2 |      2
+ 4t        |    2 |      2
+ 4u        |    2 |      2
+ 5c        |    2 |      2
+ 5f        |    2 |      2
+ 5h        |    2 |      2
+ 5p        |    2 |      2
+ 5q        |    2 |      2
+ 5z        |    2 |      2
+ 6a        |    2 |      2
+ 6h        |    2 |      2
+ 6q        |    2 |      2
+ 6r        |    2 |      2
+ 6t        |    2 |      2
+ 6y        |    2 |      2
+ 70        |    2 |      2
+ 7c        |    2 |      2
+ 7g        |    2 |      2
+ 7k        |    2 |      2
+ 7o        |    2 |      2
+ 7u        |    2 |      2
+ 8j        |    2 |      2
+ 8w        |    2 |      2
+ 9f        |    2 |      2
+ 9y        |    2 |      2
+ copyright |    2 |      2
+ foo       |    1 |      3
+ bar       |    1 |      2
+ 0e        |    1 |      1
+ 0h        |    1 |      1
+ 0p        |    1 |      1
+ 0w        |    1 |      1
+ 0z        |    1 |      1
+ 11        |    1 |      1
+ 13        |    1 |      1
+ 14        |    1 |      1
+ 17        |    1 |      1
+ 1k        |    1 |      1
+ 1q        |    1 |      1
+ 1z        |    1 |      1
+ 24        |    1 |      1
+ 26        |    1 |      1
+ 28        |    1 |      1
+ 2f        |    1 |      1
+ 30        |    1 |      1
+ 345       |    1 |      1
+ 37        |    1 |      1
+ 39        |    1 |      1
+ 3a        |    1 |      1
+ 3e        |    1 |      1
+ 3g        |    1 |      1
+ 3i        |    1 |      1
+ 3m        |    1 |      1
+ 3t        |    1 |      1
+ 3u        |    1 |      1
+ 40        |    1 |      1
+ 41        |    1 |      1
+ 44        |    1 |      1
+ 45        |    1 |      1
+ 48        |    1 |      1
+ 4b        |    1 |      1
+ 4c        |    1 |      1
+ 4i        |    1 |      1
+ 4k        |    1 |      1
+ 4n        |    1 |      1
+ 4o        |    1 |      1
+ 4q        |    1 |      1
+ 4w        |    1 |      1
+ 4y        |    1 |      1
+ 51        |    1 |      1
+ 55        |    1 |      1
+ 56        |    1 |      1
+ 5a        |    1 |      1
+ 5d        |    1 |      1
+ 5g        |    1 |      1
+ 5j        |    1 |      1
+ 5l        |    1 |      1
+ 5s        |    1 |      1
+ 5u        |    1 |      1
+ 5x        |    1 |      1
+ 64        |    1 |      1
+ 68        |    1 |      1
+ 6c        |    1 |      1
+ 6f        |    1 |      1
+ 6g        |    1 |      1
+ 6i        |    1 |      1
+ 6k        |    1 |      1
+ 6n        |    1 |      1
+ 6p        |    1 |      1
+ 6s        |    1 |      1
+ 6u        |    1 |      1
+ 6x        |    1 |      1
+ 72        |    1 |      1
+ 7f        |    1 |      1
+ 7j        |    1 |      1
+ 7n        |    1 |      1
+ 7p        |    1 |      1
+ 7w        |    1 |      1
+ 7y        |    1 |      1
+ 7z        |    1 |      1
+ 80        |    1 |      1
+ 82        |    1 |      1
+ 85        |    1 |      1
+ 8d        |    1 |      1
+ 8i        |    1 |      1
+ 8l        |    1 |      1
+ 8n        |    1 |      1
+ 8p        |    1 |      1
+ 8t        |    1 |      1
+ 8x        |    1 |      1
+ 95        |    1 |      1
+ 97        |    1 |      1
+ 9a        |    1 |      1
+ 9e        |    1 |      1
+ 9h        |    1 |      1
+ 9r        |    1 |      1
+ 9w        |    1 |      1
+ qwerti    |    1 |      1
+(1146 rows)
+
+select reset_tsearch();
+NOTICE:  TSearch cache cleaned
+ reset_tsearch 
+---------------
+ 
+(1 row)
+
+select to_tsquery('default', 'skies & books');
+   to_tsquery   
+----------------
+ 'sky' & 'book'
+(1 row)
+
+select rank_cd(to_tsvector('Erosion It took the sea a thousand years,
+A thousand years to trace
+The granite features of this cliff
+In crag and scarp and base.
+It took the sea an hour one night
+An hour of storm to place
+The sculpture of these granite seams,
+Upon a woman s face. E.  J.  Pratt  (1882 1964)
+'), to_tsquery('sea&thousand&years'));
+ rank_cd 
+---------
+     1.2
+(1 row)
+
+select rank_cd(to_tsvector('Erosion It took the sea a thousand years,
+A thousand years to trace
+The granite features of this cliff
+In crag and scarp and base.
+It took the sea an hour one night
+An hour of storm to place
+The sculpture of these granite seams,
+Upon a woman s face. E.  J.  Pratt  (1882 1964)
+'), to_tsquery('granite&sea'));
+ rank_cd  
+----------
+ 0.880303
+(1 row)
+
+select rank_cd(to_tsvector('Erosion It took the sea a thousand years,
+A thousand years to trace
+The granite features of this cliff
+In crag and scarp and base.
+It took the sea an hour one night
+An hour of storm to place
+The sculpture of these granite seams,
+Upon a woman s face. E.  J.  Pratt  (1882 1964)
+'), to_tsquery('sea'));
+ rank_cd 
+---------
+       2
+(1 row)
+
+select get_covers(to_tsvector('Erosion It took the sea a thousand years,
+A thousand years to trace
+The granite features of this cliff
+In crag and scarp and base.
+It took the sea an hour one night
+An hour of storm to place
+The sculpture of these granite seams,
+Upon a woman s face. E.  J.  Pratt  (1882 1964)
+'), to_tsquery('sea&thousand&years'));
+                                                                                             get_covers                                                                                             
+----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
+ eros took {1 sea thousand year }1 {2 thousand year trace granit featur cliff crag scarp base took sea }2 hour one night hour storm place sculptur granit seam upon woman face e j pratt 1882 1964 
+(1 row)
+
+select get_covers(to_tsvector('Erosion It took the sea a thousand years,
+A thousand years to trace
+The granite features of this cliff
+In crag and scarp and base.
+It took the sea an hour one night
+An hour of storm to place
+The sculpture of these granite seams,
+Upon a woman s face. E.  J.  Pratt  (1882 1964)
+'), to_tsquery('granite&sea'));
+                                                                                                get_covers                                                                                                
+----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
+ eros took {1 sea thousand year thousand year trace {2 granit }1 featur cliff crag scarp base took {3 sea }2 hour one night hour storm place sculptur granit }3 seam upon woman face e j pratt 1882 1964 
+(1 row)
+
+select get_covers(to_tsvector('Erosion It took the sea a thousand years,
+A thousand years to trace
+The granite features of this cliff
+In crag and scarp and base.
+It took the sea an hour one night
+An hour of storm to place
+The sculpture of these granite seams,
+Upon a woman s face. E.  J.  Pratt  (1882 1964)
+'), to_tsquery('sea'));
+                                                                                             get_covers                                                                                             
+----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
+ eros took {1 sea }1 thousand year thousand year trace granit featur cliff crag scarp base took {2 sea }2 hour one night hour storm place sculptur granit seam upon woman face e j pratt 1882 1964 
+(1 row)
+
+select headline('Erosion It took the sea a thousand years,
+A thousand years to trace
+The granite features of this cliff
+In crag and scarp and base.
+It took the sea an hour one night
+An hour of storm to place
+The sculpture of these granite seams,
+Upon a woman s face. E.  J.  Pratt  (1882 1964)
+', to_tsquery('sea&thousand&years'));
+                                                       headline                                                        
+-----------------------------------------------------------------------------------------------------------------------
+ sea a thousand years,
+A thousand years to trace
+The granite features of this cliff
+(1 row)
+
+ 
+select headline('Erosion It took the sea a thousand years,
+A thousand years to trace
+The granite features of this cliff
+In crag and scarp and base.
+It took the sea an hour one night
+An hour of storm to place
+The sculpture of these granite seams,
+Upon a woman s face. E.  J.  Pratt  (1882 1964)
+', to_tsquery('granite&sea'));
+                                           headline                                           
+----------------------------------------------------------------------------------------------
+ sea an hour one night
+An hour of storm to place
+The sculpture of these granite
+(1 row)
+
+ 
+select headline('Erosion It took the sea a thousand years,
+A thousand years to trace
+The granite features of this cliff
+In crag and scarp and base.
+It took the sea an hour one night
+An hour of storm to place
+The sculpture of these granite seams,
+Upon a woman s face. E.  J.  Pratt  (1882 1964)
+', to_tsquery('sea'));
+                                         headline                                          
+-------------------------------------------------------------------------------------------
+ sea a thousand years,
+A thousand years to trace
+The granite features of this cliff
+(1 row)
+


diff --git a/contrib/tsearch2/gendict/Makefile.IN b/contrib/tsearch2/gendict/Makefile.IN

new file mode 100644 (file)

index 0000000..c13e496


--- /dev/null
+++ b/contrib/tsearch2/gendict/Makefile.IN
@@ -0,0 +1,12 @@
+subdir = contrib/CFG_DIR
+top_builddir = ../..
+include $(top_builddir)/src/Makefile.global
+
+MODULE_big = dict_CFG_MODNAME
+OBJS = CFG_OFILE
+DATA_built = dict_CFG_MODNAME.sql
+DOCS = README.CFG_MODNAME
+PG_CPPFLAGS =
+SHLIB_LINK = ../tsearch2/libtsearch2.a
+
+include $(top_srcdir)/contrib/contrib-global.mk


diff --git a/contrib/tsearch2/gendict/README.gendict b/contrib/tsearch2/gendict/README.gendict

new file mode 100644 (file)

index 0000000..e91f1b7


--- /dev/null
+++ b/contrib/tsearch2/gendict/README.gendict
@@ -0,0 +1,130 @@
+Gendict - generate dictionary templates for contrib/tsearch2 module.
+
+This utility aims to help people creating dictionary for contrib/tsearch v2
+module. Particularly, it has built-in support for snowball stemmers.
+
+Programming API to tsearch2 dictionaries is described in tsearch v2 
+documentation.
+
+
+Prerequisities:
+
+* PostgreSQL 7.3 and above.
+
+* You need tsearch2 module sources already compiled
+
+* Rights to install contrib modules
+
+Usage:
+
+    run config.sh without parameters to see options and arguments
+
+Usage:
+./config.sh -n DICTNAME ( [ -s [ -p PREFIX ] ] | [ -c CFILES ] [ -h HFILES ] [ -i ] ) [ -v ] [ -d DIR ] [ -C COMMENT ]
+    -v - be verbose
+    -d DIR - name of directory in PGSQL_SRC/contrib (default dict_DICTNAME)
+    -C COMMENT - dictionary comment
+Generate Snowball stemmer:
+./config.sh -n DICTNAME -s [ -p PREFIX ] [ -v ] [ -d DIR ] [ -C COMMENT ]
+    -s - generate Snowball wrapper
+    -p - prefix of Snowball's function, (default DICTNAME)
+Generate template dictionary:
+./config.sh -n DICTNAME [ -c CFILES ] [ -h HFILES ] [ -i ] [ -v ] [ -d DIR ] [ -C COMMENT ]
+    -c CFILES - source files, must be placed in contrib/tsearch2/gendict directory.
+                These files will be used in Makefile.
+    -h HFILES - header files, must be placed in contrib/tsearch2/gendict directory.
+                These files will be used in Makefile and subinclude.h
+    -i - dictionary has init method
+
+
+Example 1:
+
+   Create Portuguese stemmer
+ 
+   0. cd PGSQL_SRC/contrib/tsearch2/gendict
+
+   1. Obtain stem.{c,h} files for Portuguese
+
+      wget http://snowball.tartarus.org/portuguese/stem.c
+      wget http://snowball.tartarus.org/portuguese/stem.h
+   
+   2. Create template files for Portuguese
+
+      ./config.sh -n pt -s -p portuguese -v -C'Snowball stemmer for Portuguese'
+
+      Note, that argument for -p option should be *the same* as name of stemming
+      function in stem.c (without _stem)
+
+      A bunch of files will be generated and placed in PGSQL_SRC/contrib/dict_pt
+      directory.
+
+   3. Compile and install dictionary
+
+   cd PGSQL_SRC/contrib/dict_pt
+   make
+   make install
+
+   4. Test it 
+
+   Sample portuguese words with the stemmed forms are available
+        from http://snowball.tartarus.org/portuguese/stemmer.html
+
+   createdb testdict
+   psql testdict < /usr/local/pgsql/share/contrib/tsearch2.sql
+   psql testdict < /usr/local/pgsql/share/contrib/dict_pt.sql
+   psql -d testdict -c "select lexize('pt','bobagem');"
+    lexize  
+   ---------
+    {bobag}
+   (1 row)
+
+   Here is what I have in pg_ts_dict table
+
+   psql -d testdict -c "select * from pg_ts_dict where dict_name='pt';"
+    dict_name | dict_init | dict_initoption | dict_lexize |          dict_comment           
+   -----------+-----------+-----------------+-------------+---------------------------------
+    pt        |   7177806 |                 |     7159330 | Snowball stemmer for Portuguese
+   (1 row)
+
+ 
+        Note, that you have already installed dictionary and corresponding
+   entry in tsearch configuration and you may modify it using
+   plain SQL commands, for example, specify stop words.
+
+Example 2:
+
+      a) Simple template dictionary with init method 
+
+       ./config.sh -n wow -v -i -C WOW
+
+      b) Create simple template dict (without init method):
+   ./config.sh -n wow -v  -C WOW
+
+        The same as above, but dictionary will have not init method
+
+       Dictionaries obtained in a) and b) are fully working and ready
+       for use: 
+     a) lowercase input word and remove it if it is a stop word
+     b) recognizes any word
+
+      c) Simple template dictionary with source files (with init method):
+
+       ./config.sh -n wow -v -i -c a.c -h a.h -C WOW
+
+        Source files ( a.c ) must be placed in contrib/tsearch2/gendict directory.
+        These files will be used in Makefile.
+
+        Header files ( a.h ), must be placed in contrib/tsearch2/gendict directory.
+        These files will be used in Makefile and subinclude.h
+
+      d) Simple template dictionary with source files (without init method):
+
+   ./config.sh -n wow -v  -c a.c -h a.h -C WOW
+
+   The same as above, but dictionary will have not init method
+
+       After that you have sources in PGSQL_SRC/contrib/dict_wow and
+       you may edit them to create actual dictionary.
+
+  Please, check Tsearch2 home page (http://www.sai.msu.su/~megera/postgres/gist/tsearch/V2/)
+  for additional information about "Gendict tutorial" and dictionaries.
\ No newline at end of file


diff --git a/contrib/tsearch2/gendict/config.sh b/contrib/tsearch2/gendict/config.sh

new file mode 100755 (executable)

index 0000000..26bb542


--- /dev/null
+++ b/contrib/tsearch2/gendict/config.sh
@@ -0,0 +1,183 @@
+#!/bin/sh
+
+usage () {
+   echo Usage:
+   echo $0 -n DICTNAME  \( [ -s [ -p PREFIX ] ] \| [ -c CFILES ] [ -h HFILES ] [ -i ] \) [ -v ] [ -d DIR ] [ -C COMMENT ]
+   echo '    -v - be verbose'
+   echo '    -d DIR - name of directory in PGSQL_SRL/contrib (default dict_DICTNAME)'
+   echo '    -C COMMENT - dictionary comment' 
+   echo Generate Snowball stemmer:
+   echo $0 -n DICTNAME -s [ -p PREFIX ] [ -v ] [ -d DIR ] [ -C COMMENT ]
+   echo '    -s - generate Snowball wrapper'
+   echo "    -p - prefix of Snowball's function, (default DICTNAME)" 
+   echo Generate template dictionary:
+   echo $0 -n DICTNAME [ -c CFILES ] [ -h HFILES ] [ -i ] [ -v ] [ -d DIR ] [ -C COMMENT ]
+   echo '    -c CFILES - source files, must be placed in contrib/tsearch2/gendict directory.'
+   echo '                These files will be used in Makefile.'
+   echo '    -h HFILES - header files, must be placed in contrib/tsearch2/gendict directory.'
+   echo '                These files will be used in Makefile and subinclude.h'
+   echo '    -i - dictionary has init method'
+   exit 1;
+}
+
+dictname=
+stemmode=no
+verbose=no
+cfile=
+hfile=
+dir= 
+hasinit=no
+comment=
+prefix=
+
+while getopts n:c:C:h:d:p:vis opt
+do
+   case "$opt" in
+       v) verbose=yes;;
+       s) stemmode=yes;;
+       i) hasinit=yes;;
+       n) dictname="$OPTARG";;
+       c) cfile="$OPTARG";;
+       h) hfile="$OPTARG";;
+       d) dir="$OPTARG";;
+       C) comment="$OPTARG";;
+       p) prefix="$OPTARG";;
+       \?) usage;;
+   esac
+done
+
+[ ${#dictname} -eq 0 ] && usage
+
+dictname=`echo $dictname | tr '[:upper:]' '[:lower:]'`
+
+if [ $stemmode = "yes" ] ; then 
+   [ ${#prefix} -eq 0 ] && prefix=$dictname
+   hasinit=yes
+   cfile="stem.c"
+   hfile="stem.h"
+fi 
+
+[ ${#dir}   -eq 0 ] && dir="dict_$dictname"
+
+if [ ${#comment} -eq 0 ]; then
+   comment=null
+else
+   comment="'$comment'"
+fi
+
+ofile=
+for f in $cfile
+do
+   f=` echo $f | sed 's#c$#o#'`
+   ofile="$ofile $f"
+done
+
+if [ $stemmode = "yes" ] ; then
+   ofile="$ofile dict_snowball.o"
+else
+   ofile="$ofile dict_tmpl.o"
+fi
+
+if [ $verbose = "yes" ]; then
+   echo Dictname: "'"$dictname"'"
+   echo Snowball stemmer: $stemmode
+   echo Has init method: $hasinit
+   [ $stemmode = "yes" ] && echo Function prefix: $prefix 
+   echo Source files: $cfile
+   echo Header files: $hfile
+   echo Object files: $ofile
+   echo Comment: $comment
+   echo Directory: ../../$dir
+fi
+
+
+[ $verbose = "yes" ] && echo -n 'Build directory...  '
+if [ ! -d ../../$dir ]; then
+   if ! mkdir ../../$dir ; then 
+       echo "Can't create directory ../../$dir"
+       exit 1
+   fi 
+fi
+[ $verbose = "yes" ] && echo ok
+
+
+[ $verbose = "yes" ] && echo -n 'Build Makefile...  '
+sed s#CFG_DIR#$dir# < Makefile.IN | sed s#CFG_MODNAME#$dictname# | sed "s#CFG_OFILE#$ofile#" > ../../$dir/Makefile.tmp
+if [ $stemmode = "yes" ] ; then
+   sed "s#^PG_CPPFLAGS.*\$#PG_CPPFLAGS = -I../tsearch2/snowball -I../tsearch2#" < ../../$dir/Makefile.tmp >  ../../$dir/Makefile 
+else
+   sed "s#^PG_CPPFLAGS.*\$#PG_CPPFLAGS = -I../tsearch2#" < ../../$dir/Makefile.tmp >  ../../$dir/Makefile 
+fi
+rm ../../$dir/Makefile.tmp
+[ $verbose = "yes" ] && echo ok
+
+
+[ $verbose = "yes" ] && echo -n Build dict_$dictname'.sql.in...  '
+if [ $hasinit = "yes" ]; then
+   sed s#CFG_MODNAME#$dictname# < sql.IN | sed "s#CFG_COMMENT#$comment#" | sed s#^HASINIT## | sed 's#^NOINIT.*$##' > ../../$dir/dict_$dictname.sql.in.tmp
+   if [ $stemmode = "yes" ] ; then
+       sed s#^ISSNOWBALL## < ../../$dir/dict_$dictname.sql.in.tmp | sed s#^NOSNOWBALL.*\$## > ../../$dir/dict_$dictname.sql.in
+   else
+       sed s#^NOSNOWBALL## < ../../$dir/dict_$dictname.sql.in.tmp | sed s#^ISSNOWBALL.*\$## > ../../$dir/dict_$dictname.sql.in
+   fi
+   rm ../../$dir/dict_$dictname.sql.in.tmp 
+else 
+   sed s#CFG_MODNAME#$dictname# < sql.IN | sed "s#CFG_COMMENT#$comment#" | sed s#^NOINIT## | sed 's#^HASINIT.*$##' | sed s#^NOSNOWBALL## | sed s#^ISSNOWBALL.*\$## > ../../$dir/dict_$dictname.sql.in
+fi
+[ $verbose = "yes" ] && echo ok
+
+
+
+if [ ${#cfile} -ne 0 ] || [ ${#hfile} -ne 0 ] ; then
+   [ $verbose = "yes" ] && echo -n 'Copy source and header files...  '
+   if [ ${#cfile} -ne 0 ] ; then
+       if ! cp $cfile ../../$dir ; then 
+           echo "Cant cp all or one of files: $cfile"
+           exit 1
+       fi
+   fi
+   if [ ${#hfile} -ne 0 ] ; then 
+       if ! cp $hfile ../../$dir ; then 
+               echo "Cant cp all or one of files: $hfile"
+           exit 1
+       fi
+   fi
+   [ $verbose = "yes" ] && echo ok
+fi
+
+
+[ $verbose = "yes" ] && echo -n 'Build sub-include header...  '
+echo -n > ../../$dir/subinclude.h 
+for i in $hfile
+do
+   echo "#include \"$i\"" >> ../../$dir/subinclude.h
+done
+[ $verbose = "yes" ] && echo ok
+
+
+if  [ $stemmode = "yes" ] ; then 
+   [ $verbose = "yes" ] && echo -n 'Build Snowball stemmer...  '
+   sed s#CFG_MODNAME#$dictname#g < dict_snowball.c.IN | sed s#CFG_PREFIX#$prefix#g > ../../$dir/dict_snowball.c
+else
+   [ $verbose = "yes" ] && echo -n 'Build dictinonary...  '
+   sed s#CFG_MODNAME#$dictname#g < dict_tmpl.c.IN > ../../$dir/dict_tmpl.c.tmp
+   if [ $hasinit = "yes" ]; then
+       sed s#^HASINIT## <  ../../$dir/dict_tmpl.c.tmp | sed 's#^NOINIT.*$##' > ../../$dir/dict_tmpl.c
+   else 
+       sed s#^HASINIT.*\$## <  ../../$dir/dict_tmpl.c.tmp | sed 's#^NOINIT##' > ../../$dir/dict_tmpl.c
+   fi
+   rm ../../$dir/dict_tmpl.c.tmp
+fi 
+[ $verbose = "yes" ] && echo ok
+
+
+[ $verbose = "yes" ] && echo -n "Build README.$dictname...  "
+if  [ $stemmode = "yes" ] ; then
+   echo "Autogenerated Snowball's wrapper for $prefix" > ../../$dir/README.$dictname
+else
+   echo "Autogenerated template for $dictname" > ../../$dir/README.$dictname
+fi
+[ $verbose = "yes" ] && echo ok
+
+echo All is done
+


diff --git a/contrib/tsearch2/gendict/dict_snowball.c.IN b/contrib/tsearch2/gendict/dict_snowball.c.IN

new file mode 100644 (file)

index 0000000..10ef6f1


--- /dev/null
+++ b/contrib/tsearch2/gendict/dict_snowball.c.IN
@@ -0,0 +1,52 @@
+/* 
+ * example of Snowball dictionary
+ * http://snowball.tartarus.org/ 
+ * Teodor Sigaev 
+ */
+#include 
+#include 
+
+#include "postgres.h"
+
+#include "dict.h"
+#include "common.h"
+#include "snowball/header.h"
+#include "subinclude.h"
+
+typedef struct {
+   struct SN_env *z;
+   StopList    stoplist;
+   int (*stem)(struct SN_env * z);
+} DictSnowball;
+
+
+PG_FUNCTION_INFO_V1(dinit_CFG_MODNAME);
+Datum dinit_CFG_MODNAME(PG_FUNCTION_ARGS);
+
+Datum 
+dinit_CFG_MODNAME(PG_FUNCTION_ARGS) {
+   DictSnowball    *d = (DictSnowball*)malloc( sizeof(DictSnowball) );
+
+   if ( !d )
+       elog(ERROR, "No memory");
+   memset(d,0,sizeof(DictSnowball));
+   d->stoplist.wordop=lowerstr;
+       
+   if ( !PG_ARGISNULL(0) && PG_GETARG_POINTER(0)!=NULL ) {
+       text       *in = PG_GETARG_TEXT_P(0);
+       readstoplist(in, &(d->stoplist));
+       sortstoplist(&(d->stoplist));
+       PG_FREE_IF_COPY(in, 0);
+   }
+
+   d->z = CFG_PREFIX_create_env();
+   if (!d->z) {
+       freestoplist(&(d->stoplist));
+       elog(ERROR,"No memory");
+   }
+   d->stem=CFG_PREFIX_stem;
+
+   PG_RETURN_POINTER(d);
+}
+
+


diff --git a/contrib/tsearch2/gendict/dict_tmpl.c.IN b/contrib/tsearch2/gendict/dict_tmpl.c.IN

new file mode 100644 (file)

index 0000000..10c0381


--- /dev/null
+++ b/contrib/tsearch2/gendict/dict_tmpl.c.IN
@@ -0,0 +1,64 @@
+/* 
+ * example of dictionary 
+ * Teodor Sigaev 
+ */
+#include 
+#include 
+#include 
+
+#include "postgres.h"
+
+#include "dict.h"
+#include "common.h"
+
+#include "subinclude.h"
+
+HASINIT typedef struct {
+HASINIT    StopList    stoplist;
+HASINIT } DictExample;
+
+
+HASINIT PG_FUNCTION_INFO_V1(dinit_CFG_MODNAME);
+HASINIT Datum dinit_CFG_MODNAME(PG_FUNCTION_ARGS);
+
+HASINIT Datum 
+HASINIT dinit_CFG_MODNAME(PG_FUNCTION_ARGS) {
+HASINIT    DictExample *d = (DictExample*)malloc( sizeof(DictExample) );
+HASINIT 
+HASINIT    if ( !d )
+HASINIT        elog(ERROR, "No memory");
+HASINIT    memset(d,0,sizeof(DictExample));
+HASINIT 
+HASINIT    d->stoplist.wordop=lowerstr;
+HASINIT    
+HASINIT    /* Your INIT code */
+HASINIT    
+HASINIT    if ( !PG_ARGISNULL(0) && PG_GETARG_POINTER(0)!=NULL ) {
+HASINIT        text       *in = PG_GETARG_TEXT_P(0);
+HASINIT        readstoplist(in, &(d->stoplist));
+HASINIT        sortstoplist(&(d->stoplist));
+HASINIT        PG_FREE_IF_COPY(in, 0);
+HASINIT    }
+HASINIT 
+HASINIT    PG_RETURN_POINTER(d);
+HASINIT }
+
+PG_FUNCTION_INFO_V1(dlexize_CFG_MODNAME);
+Datum dlexize_CFG_MODNAME(PG_FUNCTION_ARGS);
+Datum
+dlexize_CFG_MODNAME(PG_FUNCTION_ARGS) {
+HASINIT    DictExample *d = (DictExample*)PG_GETARG_POINTER(0);
+   char       *in = (char*)PG_GETARG_POINTER(1);
+   char *txt = pnstrdup(in, PG_GETARG_INT32(2));
+   char    **res=palloc(sizeof(char*)*2);
+
+   /* Your INIT dictionary code */
+HASINIT    if ( *txt=='\0' || searchstoplist(&(d->stoplist),txt) ) {
+HASINIT        pfree(txt);
+HASINIT        res[0]=NULL;
+HASINIT    } else 
+       res[0]=txt;
+   res[1]=NULL;
+
+   PG_RETURN_POINTER(res);
+}


diff --git a/contrib/tsearch2/gendict/sql.IN b/contrib/tsearch2/gendict/sql.IN

new file mode 100644 (file)

index 0000000..ff0d842


--- /dev/null
+++ b/contrib/tsearch2/gendict/sql.IN
@@ -0,0 +1,26 @@
+SET search_path = public;
+BEGIN;
+
+HASINIT create function dinit_CFG_MODNAME(text)
+HASINIT         returns internal
+HASINIT         as 'MODULE_PATHNAME'
+HASINIT         language 'C';
+
+NOSNOWBALL create function dlexize_CFG_MODNAME(internal,internal,int4)
+NOSNOWBALL        returns internal
+NOSNOWBALL        as 'MODULE_PATHNAME'
+NOSNOWBALL        language 'C'
+NOSNOWBALL        with (isstrict);
+
+insert into pg_ts_dict select
+        'CFG_MODNAME',
+HASINIT        (select oid from pg_proc where proname='dinit_CFG_MODNAME'),
+NOINIT        null,
+        null,
+ISSNOWBALL        (select oid from pg_proc where proname='snb_lexize'),
+NOSNOWBALL        (select oid from pg_proc where proname='dlexize_CFG_MODNAME'),
+        CFG_COMMENT
+;
+
+
+END;


diff --git a/contrib/tsearch2/gistidx.c b/contrib/tsearch2/gistidx.c

new file mode 100644 (file)

index 0000000..5a34f74


--- /dev/null
+++ b/contrib/tsearch2/gistidx.c
@@ -0,0 +1,686 @@
+#include "postgres.h"
+
+#include 
+
+#include "access/gist.h"
+#include "access/itup.h"
+#include "access/rtree.h"
+#include "utils/elog.h"
+#include "utils/palloc.h"
+#include "utils/array.h"
+#include "utils/builtins.h"
+#include "storage/bufpage.h"
+#include "access/tuptoaster.h"
+
+#include "tsvector.h"
+#include "query.h"
+#include "gistidx.h"
+#include "crc32.h"
+
+PG_FUNCTION_INFO_V1(gtsvector_in);
+Datum      gtsvector_in(PG_FUNCTION_ARGS);
+
+PG_FUNCTION_INFO_V1(gtsvector_out);
+Datum      gtsvector_out(PG_FUNCTION_ARGS);
+
+PG_FUNCTION_INFO_V1(gtsvector_compress);
+Datum      gtsvector_compress(PG_FUNCTION_ARGS);
+
+PG_FUNCTION_INFO_V1(gtsvector_decompress);
+Datum      gtsvector_decompress(PG_FUNCTION_ARGS);
+
+PG_FUNCTION_INFO_V1(gtsvector_consistent);
+Datum      gtsvector_consistent(PG_FUNCTION_ARGS);
+
+PG_FUNCTION_INFO_V1(gtsvector_union);
+Datum      gtsvector_union(PG_FUNCTION_ARGS);
+
+PG_FUNCTION_INFO_V1(gtsvector_same);
+Datum      gtsvector_same(PG_FUNCTION_ARGS);
+
+PG_FUNCTION_INFO_V1(gtsvector_penalty);
+Datum      gtsvector_penalty(PG_FUNCTION_ARGS);
+
+PG_FUNCTION_INFO_V1(gtsvector_picksplit);
+Datum      gtsvector_picksplit(PG_FUNCTION_ARGS);
+
+#define GETENTRY(vec,pos) ((GISTTYPE *) DatumGetPointer(((GISTENTRY *) VARDATA(vec))[(pos)].key))
+#define SUMBIT(val) (       \
+   GETBITBYTE(val,0) + \
+   GETBITBYTE(val,1) + \
+   GETBITBYTE(val,2) + \
+   GETBITBYTE(val,3) + \
+   GETBITBYTE(val,4) + \
+   GETBITBYTE(val,5) + \
+   GETBITBYTE(val,6) + \
+   GETBITBYTE(val,7)   \
+)
+
+
+Datum
+gtsvector_in(PG_FUNCTION_ARGS)
+{
+   elog(ERROR, "Not implemented");
+   PG_RETURN_DATUM(0);
+}
+
+Datum
+gtsvector_out(PG_FUNCTION_ARGS)
+{
+   elog(ERROR, "Not implemented");
+   PG_RETURN_DATUM(0);
+}
+
+static int
+compareint(const void *a, const void *b)
+{
+   if (*((int4 *) a) == *((int4 *) b))
+       return 0;
+   return (*((int4 *) a) > *((int4 *) b)) ? 1 : -1;
+}
+
+static int
+uniqueint(int4 *a, int4 l)
+{
+   int4       *ptr,
+              *res;
+
+   if (l == 1)
+       return l;
+
+   ptr = res = a;
+
+   qsort((void *) a, l, sizeof(int4), compareint);
+
+   while (ptr - a < l)
+       if (*ptr != *res)
+           *(++res) = *ptr++;
+       else
+           ptr++;
+   return res + 1 - a;
+}
+
+static void
+makesign(BITVECP sign, GISTTYPE * a)
+{
+   int4        k,
+               len = ARRNELEM(a);
+   int4       *ptr = GETARR(a);
+
+   MemSet((void *) sign, 0, sizeof(BITVEC));
+   for (k = 0; k < len; k++)
+       HASH(sign, ptr[k]);
+}
+
+Datum
+gtsvector_compress(PG_FUNCTION_ARGS)
+{
+   GISTENTRY  *entry = (GISTENTRY *) PG_GETARG_POINTER(0);
+   GISTENTRY  *retval = entry;
+
+   if (entry->leafkey)
+   {                           /* tsvector */
+       GISTTYPE   *res;
+       tsvector       *toastedval = (tsvector *) DatumGetPointer(entry->key);
+       tsvector       *val = (tsvector *) DatumGetPointer(PG_DETOAST_DATUM(entry->key));
+       int4        len;
+       int4       *arr;
+       WordEntry  *ptr = ARRPTR(val);
+       char       *words = STRPTR(val);
+
+       len = CALCGTSIZE(ARRKEY, val->size);
+       res = (GISTTYPE *) palloc(len);
+       res->len = len;
+       res->flag = ARRKEY;
+       arr = GETARR(res);
+       len = val->size;
+       while (len--)
+       {
+           *arr = crc32_sz((uint8 *) &words[ptr->pos], ptr->len);
+           arr++;
+           ptr++;
+       }
+
+       len = uniqueint(GETARR(res), val->size);
+       if (len != val->size)
+       {
+           /*
+            * there is a collision of hash-function; len is always less
+            * than val->size
+            */
+           len = CALCGTSIZE(ARRKEY, len);
+           res = (GISTTYPE *) repalloc((void *) res, len);
+           res->len = len;
+       }
+       if (val != toastedval)
+           pfree(val);
+
+       /* make signature, if array is too long */
+       if (res->len > TOAST_INDEX_TARGET)
+       {
+           GISTTYPE   *ressign;
+
+           len = CALCGTSIZE(SIGNKEY, 0);
+           ressign = (GISTTYPE *) palloc(len);
+           ressign->len = len;
+           ressign->flag = SIGNKEY;
+           makesign(GETSIGN(ressign), res);
+           pfree(res);
+           res = ressign;
+       }
+
+       retval = (GISTENTRY *) palloc(sizeof(GISTENTRY));
+       gistentryinit(*retval, PointerGetDatum(res),
+                     entry->rel, entry->page,
+                     entry->offset, res->len, FALSE);
+   }
+   else if (ISSIGNKEY(DatumGetPointer(entry->key)) &&
+            !ISALLTRUE(DatumGetPointer(entry->key)))
+   {
+       int4        i,
+                   len;
+       GISTTYPE   *res;
+       BITVECP     sign = GETSIGN(DatumGetPointer(entry->key));
+
+       LOOPBYTE(
+                if ((sign[i] & 0xff) != 0xff)
+                PG_RETURN_POINTER(retval);
+       );
+
+       len = CALCGTSIZE(SIGNKEY | ALLISTRUE, 0);
+       res = (GISTTYPE *) palloc(len);
+       res->len = len;
+       res->flag = SIGNKEY | ALLISTRUE;
+
+       retval = (GISTENTRY *) palloc(sizeof(GISTENTRY));
+       gistentryinit(*retval, PointerGetDatum(res),
+                     entry->rel, entry->page,
+                     entry->offset, res->len, FALSE);
+   }
+   PG_RETURN_POINTER(retval);
+}
+
+Datum
+gtsvector_decompress(PG_FUNCTION_ARGS)
+{
+   GISTENTRY  *entry = (GISTENTRY *) PG_GETARG_POINTER(0);
+   GISTTYPE   *key = (GISTTYPE *) DatumGetPointer(PG_DETOAST_DATUM(entry->key));
+
+   if (key != (GISTTYPE *) DatumGetPointer(entry->key))
+   {
+       GISTENTRY  *retval = (GISTENTRY *) palloc(sizeof(GISTENTRY));
+
+       gistentryinit(*retval, PointerGetDatum(key),
+                     entry->rel, entry->page,
+                     entry->offset, key->len, FALSE);
+
+       PG_RETURN_POINTER(retval);
+   }
+
+   PG_RETURN_POINTER(entry);
+}
+
+typedef struct
+{
+   int4       *arrb;
+   int4       *arre;
+}  CHKVAL;
+
+/*
+ * is there value 'val' in array or not ?
+ */
+static bool
+checkcondition_arr(void *checkval, ITEM * val)
+{
+   int4       *StopLow = ((CHKVAL *) checkval)->arrb;
+   int4       *StopHigh = ((CHKVAL *) checkval)->arre;
+   int4       *StopMiddle;
+
+   /* Loop invariant: StopLow <= val < StopHigh */
+
+   while (StopLow < StopHigh)
+   {
+       StopMiddle = StopLow + (StopHigh - StopLow) / 2;
+       if (*StopMiddle == val->val)
+           return (true);
+       else if (*StopMiddle < val->val)
+           StopLow = StopMiddle + 1;
+       else
+           StopHigh = StopMiddle;
+   }
+
+   return (false);
+}
+
+static bool
+checkcondition_bit(void *checkval, ITEM * val)
+{
+   return GETBIT(checkval, HASHVAL(val->val));
+}
+
+Datum
+gtsvector_consistent(PG_FUNCTION_ARGS)
+{
+   QUERYTYPE  *query = (QUERYTYPE *) PG_GETARG_POINTER(1);
+   GISTTYPE   *key = (GISTTYPE *) DatumGetPointer(
+                               ((GISTENTRY *) PG_GETARG_POINTER(0))->key
+   );
+
+   if (!query->size)
+       PG_RETURN_BOOL(false);
+
+   if (ISSIGNKEY(key))
+   {
+       if (ISALLTRUE(key))
+           PG_RETURN_BOOL(true);
+
+       PG_RETURN_BOOL(TS_execute(
+                              GETQUERY(query),
+                              (void *) GETSIGN(key), false,
+                              checkcondition_bit
+                              ));
+   }
+   else
+   {                           /* only leaf pages */
+       CHKVAL      chkval;
+
+       chkval.arrb = GETARR(key);
+       chkval.arre = chkval.arrb + ARRNELEM(key);
+       PG_RETURN_BOOL(TS_execute(
+                              GETQUERY(query),
+                              (void *) &chkval, true,
+                              checkcondition_arr
+                              ));
+   }
+}
+
+static int4
+unionkey(BITVECP sbase, GISTTYPE * add)
+{
+   int4        i;
+
+   if (ISSIGNKEY(add))
+   {
+       BITVECP     sadd = GETSIGN(add);
+
+       if (ISALLTRUE(add))
+           return 1;
+
+       LOOPBYTE(
+                sbase[i] |= sadd[i];
+       );
+   }
+   else
+   {
+       int4       *ptr = GETARR(add);
+
+       for (i = 0; i < ARRNELEM(add); i++)
+           HASH(sbase, ptr[i]);
+   }
+   return 0;
+}
+
+
+Datum
+gtsvector_union(PG_FUNCTION_ARGS)
+{
+   bytea      *entryvec = (bytea *) PG_GETARG_POINTER(0);
+   int        *size = (int *) PG_GETARG_POINTER(1);
+   BITVEC      base;
+   int4        len = (VARSIZE(entryvec) - VARHDRSZ) / sizeof(GISTENTRY);
+   int4        i;
+   int4        flag = 0;
+   GISTTYPE   *result;
+
+   MemSet((void *) base, 0, sizeof(BITVEC));
+   for (i = 0; i < len; i++)
+   {
+       if (unionkey(base, GETENTRY(entryvec, i)))
+       {
+           flag = ALLISTRUE;
+           break;
+       }
+   }
+
+   flag |= SIGNKEY;
+   len = CALCGTSIZE(flag, 0);
+   result = (GISTTYPE *) palloc(len);
+   *size = result->len = len;
+   result->flag = flag;
+   if (!ISALLTRUE(result))
+       memcpy((void *) GETSIGN(result), (void *) base, sizeof(BITVEC));
+
+   PG_RETURN_POINTER(result);
+}
+
+Datum
+gtsvector_same(PG_FUNCTION_ARGS)
+{
+   GISTTYPE   *a = (GISTTYPE *) PG_GETARG_POINTER(0);
+   GISTTYPE   *b = (GISTTYPE *) PG_GETARG_POINTER(1);
+   bool       *result = (bool *) PG_GETARG_POINTER(2);
+
+   if (ISSIGNKEY(a))
+   {                           /* then b also ISSIGNKEY */
+       if (ISALLTRUE(a) && ISALLTRUE(b))
+           *result = true;
+       else if (ISALLTRUE(a))
+           *result = false;
+       else if (ISALLTRUE(b))
+           *result = false;
+       else
+       {
+           int4        i;
+           BITVECP     sa = GETSIGN(a),
+                       sb = GETSIGN(b);
+
+           *result = true;
+           LOOPBYTE(
+                    if (sa[i] != sb[i])
+                    {
+               *result = false;
+               break;
+           }
+           );
+       }
+   }
+   else
+   {                           /* a and b ISARRKEY */
+       int4        lena = ARRNELEM(a),
+                   lenb = ARRNELEM(b);
+
+       if (lena != lenb)
+           *result = false;
+       else
+       {
+           int4       *ptra = GETARR(a),
+                      *ptrb = GETARR(b);
+           int4        i;
+
+           *result = true;
+           for (i = 0; i < lena; i++)
+               if (ptra[i] != ptrb[i])
+               {
+                   *result = false;
+                   break;
+               }
+       }
+   }
+
+   PG_RETURN_POINTER(result);
+}
+
+static int4
+sizebitvec(BITVECP sign)
+{
+   int4        size = 0,
+               i;
+
+   LOOPBYTE(
+       size += SUMBIT(*(char *) sign);
+       sign = (BITVECP) (((char *) sign) + 1);
+   );
+   return size;
+}
+
+static int
+hemdistsign(BITVECP  a, BITVECP b) {
+   int i,dist=0;
+
+   LOOPBIT(
+       if ( GETBIT(a,i) != GETBIT(b,i) )
+           dist++;
+   );
+   return dist;
+}
+
+static int
+hemdist(GISTTYPE   *a, GISTTYPE   *b) {
+   if ( ISALLTRUE(a) ) {
+       if (ISALLTRUE(b))
+           return 0;
+       else
+           return SIGLENBIT-sizebitvec(GETSIGN(b));
+   } else if (ISALLTRUE(b))
+       return SIGLENBIT-sizebitvec(GETSIGN(a));
+
+   return hemdistsign( GETSIGN(a), GETSIGN(b) );
+}
+
+Datum
+gtsvector_penalty(PG_FUNCTION_ARGS)
+{
+   GISTENTRY  *origentry = (GISTENTRY *) PG_GETARG_POINTER(0); /* always ISSIGNKEY */
+   GISTENTRY  *newentry = (GISTENTRY *) PG_GETARG_POINTER(1);
+   float      *penalty = (float *) PG_GETARG_POINTER(2);
+   GISTTYPE   *origval = (GISTTYPE *) DatumGetPointer(origentry->key);
+   GISTTYPE   *newval = (GISTTYPE *) DatumGetPointer(newentry->key);
+   BITVECP     orig = GETSIGN(origval);
+
+   *penalty = 0.0;
+
+   if (ISARRKEY(newval)) {
+       BITVEC sign;
+       makesign(sign, newval);
+
+       if ( ISALLTRUE(origval) ) 
+           *penalty=((float)(SIGLENBIT-sizebitvec(sign)))/(float)(SIGLENBIT+1);
+       else 
+           *penalty=hemdistsign(sign,orig);
+   } else {
+       *penalty=hemdist(origval,newval);
+   }
+   PG_RETURN_POINTER(penalty);
+}
+
+typedef struct
+{
+   bool        allistrue;
+   BITVEC      sign;
+}  CACHESIGN;
+
+static void
+fillcache(CACHESIGN * item, GISTTYPE * key)
+{
+   item->allistrue = false;
+   if (ISARRKEY(key))
+       makesign(item->sign, key);
+   else if (ISALLTRUE(key))
+       item->allistrue = true;
+   else
+       memcpy((void *) item->sign, (void *) GETSIGN(key), sizeof(BITVEC));
+}
+
+#define WISH_F(a,b,c) (double)( -(double)(((a)-(b))*((a)-(b))*((a)-(b)))*(c) )
+typedef struct
+{
+   OffsetNumber pos;
+   int4        cost;
+} SPLITCOST;
+
+static int
+comparecost(const void *a, const void *b)
+{
+   if (((SPLITCOST *) a)->cost == ((SPLITCOST *) b)->cost)
+       return 0;
+   else
+       return (((SPLITCOST *) a)->cost > ((SPLITCOST *) b)->cost) ? 1 : -1;
+}
+
+
+static int
+hemdistcache(CACHESIGN   *a, CACHESIGN   *b) {
+   if ( a->allistrue ) {
+       if (b->allistrue)
+           return 0;
+       else
+           return SIGLENBIT-sizebitvec(b->sign);
+   } else if (b->allistrue)
+       return SIGLENBIT-sizebitvec(a->sign);
+
+   return hemdistsign( a->sign, b->sign );
+}
+
+Datum
+gtsvector_picksplit(PG_FUNCTION_ARGS)
+{
+   bytea      *entryvec = (bytea *) PG_GETARG_POINTER(0);
+   GIST_SPLITVEC *v = (GIST_SPLITVEC *) PG_GETARG_POINTER(1);
+   OffsetNumber k,
+               j;
+   GISTTYPE   *datum_l,
+              *datum_r;
+   BITVECP     union_l,
+               union_r;
+   int4        size_alpha,
+               size_beta;
+   int4        size_waste,
+               waste = -1;
+   int4        nbytes;
+   OffsetNumber seed_1 = 0,
+               seed_2 = 0;
+   OffsetNumber *left,
+              *right;
+   OffsetNumber maxoff;
+   BITVECP     ptr;
+   int         i;
+   CACHESIGN  *cache;
+   SPLITCOST  *costvector;
+
+   maxoff = ((VARSIZE(entryvec) - VARHDRSZ) / sizeof(GISTENTRY)) - 2;
+   nbytes = (maxoff + 2) * sizeof(OffsetNumber);
+   v->spl_left = (OffsetNumber *) palloc(nbytes);
+   v->spl_right = (OffsetNumber *) palloc(nbytes);
+
+   cache = (CACHESIGN *) palloc(sizeof(CACHESIGN) * (maxoff + 2));
+   fillcache(&cache[FirstOffsetNumber], GETENTRY(entryvec, FirstOffsetNumber));
+
+   for (k = FirstOffsetNumber; k < maxoff; k = OffsetNumberNext(k)) {
+       for (j = OffsetNumberNext(k); j <= maxoff; j = OffsetNumberNext(j)) {
+           if (k == FirstOffsetNumber)
+               fillcache(&cache[j], GETENTRY(entryvec, j));
+
+           size_waste=hemdistcache(&(cache[j]),&(cache[k]));
+           if (size_waste > waste) {
+               waste = size_waste;
+               seed_1 = k;
+               seed_2 = j;
+           }
+       }
+   }
+
+   left = v->spl_left;
+   v->spl_nleft = 0;
+   right = v->spl_right;
+   v->spl_nright = 0;
+
+   if (seed_1 == 0 || seed_2 == 0) {
+       seed_1 = 1;
+       seed_2 = 2;
+   }
+
+   /* form initial .. */
+   if (cache[seed_1].allistrue) {
+       datum_l = (GISTTYPE *) palloc(CALCGTSIZE(SIGNKEY | ALLISTRUE, 0));
+       datum_l->len = CALCGTSIZE(SIGNKEY | ALLISTRUE, 0);
+       datum_l->flag = SIGNKEY | ALLISTRUE;
+   } else {
+       datum_l = (GISTTYPE *) palloc(CALCGTSIZE(SIGNKEY, 0));
+       datum_l->len = CALCGTSIZE(SIGNKEY, 0);
+       datum_l->flag = SIGNKEY;
+       memcpy((void *) GETSIGN(datum_l), (void *) cache[seed_1].sign, sizeof(BITVEC));
+   }
+   if (cache[seed_2].allistrue) {
+       datum_r = (GISTTYPE *) palloc(CALCGTSIZE(SIGNKEY | ALLISTRUE, 0));
+       datum_r->len = CALCGTSIZE(SIGNKEY | ALLISTRUE, 0);
+       datum_r->flag = SIGNKEY | ALLISTRUE;
+   } else {
+       datum_r = (GISTTYPE *) palloc(CALCGTSIZE(SIGNKEY, 0));
+       datum_r->len = CALCGTSIZE(SIGNKEY, 0);
+       datum_r->flag = SIGNKEY;
+       memcpy((void *) GETSIGN(datum_r), (void *) cache[seed_2].sign, sizeof(BITVEC));
+   }
+
+   union_l=GETSIGN(datum_l);
+   union_r=GETSIGN(datum_r);
+   maxoff = OffsetNumberNext(maxoff);
+   fillcache(&cache[maxoff], GETENTRY(entryvec, maxoff));
+   /* sort before ... */
+   costvector = (SPLITCOST *) palloc(sizeof(SPLITCOST) * maxoff);
+   for (j = FirstOffsetNumber; j <= maxoff; j = OffsetNumberNext(j)) {
+       costvector[j - 1].pos = j;
+       size_alpha = hemdistcache(&(cache[seed_1]), &(cache[j]));
+       size_beta  = hemdistcache(&(cache[seed_2]), &(cache[j]));
+       costvector[j - 1].cost = abs(size_alpha - size_beta);
+   }
+   qsort((void *) costvector, maxoff, sizeof(SPLITCOST), comparecost);
+
+   for (k = 0; k < maxoff; k++) {
+       j = costvector[k].pos;
+       if (j == seed_1) {
+           *left++ = j;
+           v->spl_nleft++;
+           continue;
+       } else if (j == seed_2) {
+           *right++ = j;
+           v->spl_nright++;
+           continue;
+       }
+
+       if (ISALLTRUE(datum_l) || cache[j].allistrue) {
+           if ( ISALLTRUE(datum_l) && cache[j].allistrue )
+               size_alpha=0;
+           else
+               size_alpha = SIGLENBIT-sizebitvec(  
+                   ( cache[j].allistrue ) ? GETSIGN(datum_l) : GETSIGN(cache[j].sign)  
+               );
+       } else {
+           size_alpha=hemdistsign(cache[j].sign,GETSIGN(datum_l));
+       }
+
+       if (ISALLTRUE(datum_r) || cache[j].allistrue) {
+           if ( ISALLTRUE(datum_r) && cache[j].allistrue )
+               size_beta=0;
+           else
+               size_beta = SIGLENBIT-sizebitvec(  
+                   ( cache[j].allistrue ) ? GETSIGN(datum_r) : GETSIGN(cache[j].sign)  
+               );
+       } else {
+           size_beta=hemdistsign(cache[j].sign,GETSIGN(datum_r));
+       }
+
+       if (size_alpha  < size_beta + WISH_F(v->spl_nleft, v->spl_nright, 0.1)) {
+           if (ISALLTRUE(datum_l) || cache[j].allistrue) {
+               if (! ISALLTRUE(datum_l) )
+                   MemSet((void *) GETSIGN(datum_l), 0xff, sizeof(BITVEC));
+           } else {
+               ptr=cache[j].sign;
+               LOOPBYTE(
+                   union_l[i] |= ptr[i];
+               );
+           }
+           *left++ = j;
+           v->spl_nleft++;
+       } else {
+           if (ISALLTRUE(datum_r) || cache[j].allistrue) {
+               if (! ISALLTRUE(datum_r) )
+                   MemSet((void *) GETSIGN(datum_r), 0xff, sizeof(BITVEC));
+           } else {
+               ptr=cache[j].sign;
+               LOOPBYTE(
+                   union_r[i] |= ptr[i];
+               );
+           }
+           *right++ = j;
+           v->spl_nright++;
+       }
+   }
+
+   *right = *left = FirstOffsetNumber;
+   pfree(costvector);
+   pfree(cache);
+   v->spl_ldatum = PointerGetDatum(datum_l);
+   v->spl_rdatum = PointerGetDatum(datum_r);
+
+   PG_RETURN_POINTER(v);
+}


diff --git a/contrib/tsearch2/gistidx.h b/contrib/tsearch2/gistidx.h

new file mode 100644 (file)

index 0000000..d081c74


--- /dev/null
+++ b/contrib/tsearch2/gistidx.h
@@ -0,0 +1,67 @@
+#ifndef __GISTIDX_H__
+#define __GISTIDX_H__
+
+/*
+#define GISTIDX_DEBUG
+*/
+
+/*
+ * signature defines
+ */
+
+#define BITBYTE 8
+#define SIGLENINT  63          /* >121 => key will toast, so it will not
+                                * work !!! */
+#define SIGLEN ( sizeof(int4)*SIGLENINT )
+#define SIGLENBIT (SIGLEN*BITBYTE)
+
+typedef char BITVEC[SIGLEN];
+typedef char *BITVECP;
+
+#define LOOPBYTE(a) \
+       for(i=0;i
+               a;\
+       }
+#define LOOPBIT(a) \
+               for(i=0;i
+                               a;\
+               }
+
+#define GETBYTE(x,i) ( *( (BITVECP)(x) + (int)( (i) / BITBYTE ) ) )
+#define GETBITBYTE(x,i) ( ((char)(x)) >> i & 0x01 )
+#define CLRBIT(x,i)   GETBYTE(x,i) &= ~( 0x01 << ( (i) % BITBYTE ) )
+#define SETBIT(x,i)   GETBYTE(x,i) |=  ( 0x01 << ( (i) % BITBYTE ) )
+#define GETBIT(x,i) ( (GETBYTE(x,i) >> ( (i) % BITBYTE )) & 0x01 )
+
+#define abs(a)         ((a) <  (0) ? -(a) : (a))
+#define min(a,b)           ((a) <  (b) ? (a) : (b))
+#define HASHVAL(val) (((unsigned int)(val)) % SIGLENBIT)
+#define HASH(sign, val) SETBIT((sign), HASHVAL(val))
+
+
+/*
+ * type of index key
+ */
+typedef struct
+{
+   int4        len;
+   int4        flag;
+   char        data[1];
+}  GISTTYPE;
+
+#define ARRKEY     0x01
+#define SIGNKEY        0x02
+#define ALLISTRUE  0x04
+
+#define ISARRKEY(x) ( ((GISTTYPE*)x)->flag & ARRKEY )
+#define ISSIGNKEY(x)   ( ((GISTTYPE*)x)->flag & SIGNKEY )
+#define ISALLTRUE(x)   ( ((GISTTYPE*)x)->flag & ALLISTRUE )
+
+#define GTHDRSIZE  ( sizeof(int4)*2  )
+#define CALCGTSIZE(flag, len) ( GTHDRSIZE + ( ( (flag) & ARRKEY ) ? ((len)*sizeof(int4)) : (((flag) & ALLISTRUE) ? 0 : SIGLEN) ) )
+
+#define GETSIGN(x) ( (BITVECP)( (char*)x+GTHDRSIZE ) )
+#define GETARR(x)  ( (int4*)( (char*)x+GTHDRSIZE ) )
+#define ARRNELEM(x) ( ( ((GISTTYPE*)x)->len - GTHDRSIZE )/sizeof(int4) )
+
+#endif


diff --git a/contrib/tsearch2/ispell/spell.c b/contrib/tsearch2/ispell/spell.c

new file mode 100644 (file)

index 0000000..3cf2cc8


--- /dev/null
+++ b/contrib/tsearch2/ispell/spell.c
@@ -0,0 +1,520 @@
+#include 
+#include 
+#include 
+#include 
+
+#include "postgres.h"
+
+#include "spell.h"
+
+#define MAXNORMLEN 56
+
+#define STRNCASECMP(x,y)        (strncasecmp(x,y,strlen(y)))
+
+static int cmpspell(const void *s1,const void *s2){
+   return(strcmp(((const SPELL*)s1)->word,((const SPELL*)s2)->word));
+}
+
+static void 
+strlower( char * str ) {
+   unsigned char *ptr = (unsigned char *)str;
+   while ( *ptr ) {
+       *ptr = tolower( *ptr );
+       ptr++;
+   }
+}
+
+/* backward string compaire for suffix tree operations */
+static int 
+strbcmp(const char *s1, const char *s2) { 
+   int l1 = strlen(s1)-1, l2 = strlen(s2)-1;
+   while (l1 >= 0 && l2 >= 0) {
+       if (s1[l1] < s2[l2]) return -1;
+       if (s1[l1] > s2[l2]) return 1;
+       l1--; l2--;
+   }
+   if (l1 < l2) return -1;
+   if (l1 > l2) return 1;
+
+   return 0;
+}
+static int 
+strbncmp(const char *s1, const char *s2, size_t count) { 
+   int l1 = strlen(s1) - 1, l2 = strlen(s2) - 1, l = count;
+   while (l1 >= 0 && l2 >= 0 && l > 0) {
+       if (s1[l1] < s2[l2]) return -1;
+       if (s1[l1] > s2[l2]) return 1;
+       l1--;
+       l2--;
+       l--;
+   }
+   if (l == 0) return 0;
+   if (l1 < l2) return -1;
+   if (l1 > l2) return 1;
+   return 0;
+}
+
+static int 
+cmpaffix(const void *s1,const void *s2){
+   if (((const AFFIX*)s1)->type < ((const AFFIX*)s2)->type) return -1;
+   if (((const AFFIX*)s1)->type > ((const AFFIX*)s2)->type) return 1;
+   if (((const AFFIX*)s1)->type == 'p')
+       return(strcmp(((const AFFIX*)s1)->repl,((const AFFIX*)s2)->repl));
+   else 
+       return(strbcmp(((const AFFIX*)s1)->repl,((const AFFIX*)s2)->repl));
+}
+
+int 
+AddSpell(IspellDict * Conf,const char * word,const char *flag){
+   if(Conf->nspell>=Conf->mspell){
+       if(Conf->mspell){
+           Conf->mspell+=1024*20;
+           Conf->Spell=(SPELL *)realloc(Conf->Spell,Conf->mspell*sizeof(SPELL));
+       }else{
+           Conf->mspell=1024*20;
+           Conf->Spell=(SPELL *)malloc(Conf->mspell*sizeof(SPELL));
+       }
+       if ( Conf->Spell == NULL )
+           elog(ERROR,"No memory for AddSpell"); 
+   }
+   Conf->Spell[Conf->nspell].word=strdup(word);
+   if ( !Conf->Spell[Conf->nspell].word ) 
+       elog(ERROR,"No memory for AddSpell");
+   strncpy(Conf->Spell[Conf->nspell].flag,flag,10);
+   Conf->nspell++;
+   return(0);
+}
+
+
+int 
+ImportDictionary(IspellDict * Conf,const char *filename){
+   unsigned char str[BUFSIZ];  
+   FILE *dict;
+
+   if(!(dict=fopen(filename,"r")))return(1);
+   while(fgets(str,sizeof(str),dict)){
+       unsigned char *s;
+       const unsigned char *flag;
+
+           flag = NULL;
+       if((s=strchr(str,'/'))){
+           *s=0;
+           s++;flag=s;
+           while(*s){
+               if (((*s>='A')&&(*s<='Z'))||((*s>='a')&&(*s<='z')))
+                   s++;
+               else {
+                   *s=0;
+                   break;
+               }
+           }
+       }else{
+           flag="";
+       }
+       strlower(str);
+       /* Dont load words if first letter is not required */
+       /* It allows to optimize loading at  search time   */
+       s=str;
+       while(*s){
+           if(*s=='\r')*s=0;
+           if(*s=='\n')*s=0;
+           s++;
+       }
+       AddSpell(Conf,str,flag);
+   }
+   fclose(dict);
+   return(0);
+}
+
+
+static SPELL * 
+FindWord(IspellDict * Conf, const char *word, int affixflag) {
+   int l,c,r,resc,resl,resr, i;
+
+   i = (int)(*word) & 255;
+   l = Conf->SpellTree.Left[i];
+   r = Conf->SpellTree.Right[i];
+   if (l == -1) return (NULL);
+   while(l<=r){
+       c = (l + r) >> 1;
+       resc = strcmp(Conf->Spell[c].word, word);
+       if( (resc == 0) && 
+           ((affixflag == 0) || (strchr(Conf->Spell[c].flag, affixflag) != NULL)) ) {
+           return(&Conf->Spell[c]);
+       }
+       resl = strcmp(Conf->Spell[l].word, word);
+       if( (resl == 0) && 
+           ((affixflag == 0) || (strchr(Conf->Spell[l].flag, affixflag) != NULL)) ) {
+           return(&Conf->Spell[l]);
+       }
+       resr = strcmp(Conf->Spell[r].word, word);
+       if( (resr == 0) && 
+           ((affixflag == 0) || (strchr(Conf->Spell[r].flag, affixflag) != NULL)) ) {
+           return(&Conf->Spell[r]);
+       }
+       if(resc < 0){
+           l = c + 1;
+           r--;
+       } else if(resc > 0){
+           r = c - 1;
+           l++;
+       } else {
+           l++;
+           r--;
+       }
+   }
+   return(NULL);
+}
+
+int 
+AddAffix(IspellDict * Conf,int flag,const char *mask,const char *find,const char *repl,int type) {
+   if(Conf->naffixes>=Conf->maffixes){
+       if(Conf->maffixes){
+           Conf->maffixes+=16;
+           Conf->Affix = (AFFIX*)realloc((void*)Conf->Affix,Conf->maffixes*sizeof(AFFIX));
+       }else{
+           Conf->maffixes=16;
+           Conf->Affix = (AFFIX*)malloc(Conf->maffixes * sizeof(AFFIX));
+       }
+       if ( Conf->Affix == NULL ) 
+           elog(ERROR,"No memory for AddAffix");
+   }
+   if (type=='s') {
+       sprintf(Conf->Affix[Conf->naffixes].mask,"%s$",mask);
+   } else {
+       sprintf(Conf->Affix[Conf->naffixes].mask,"^%s",mask);
+   }
+   Conf->Affix[Conf->naffixes].compile = 1;
+   Conf->Affix[Conf->naffixes].flag=flag;
+   Conf->Affix[Conf->naffixes].type=type;
+   
+   strcpy(Conf->Affix[Conf->naffixes].find,find);
+   strcpy(Conf->Affix[Conf->naffixes].repl,repl);
+   Conf->Affix[Conf->naffixes].replen=strlen(repl);
+   Conf->naffixes++;
+   return(0);
+}
+
+static char * 
+remove_spaces(char *dist,char *src){
+char *d,*s;
+   d=dist;
+   s=src;
+   while(*s){
+       if(*s!=' '&&*s!='-'&&*s!='\t'){
+           *d=*s;
+           d++;
+       }
+       s++;
+   }
+   *d=0;
+   return(dist);
+}
+
+
+int 
+ImportAffixes(IspellDict * Conf,const char *filename){
+   unsigned char str[BUFSIZ];
+   unsigned char flag=0;
+   unsigned char mask[BUFSIZ]="";
+   unsigned char find[BUFSIZ]="";
+   unsigned char repl[BUFSIZ]="";
+   unsigned char *s;
+   int i;
+   int suffixes=0;
+   int prefixes=0;
+   FILE *affix;
+
+   if(!(affix=fopen(filename,"r")))
+       return(1);
+
+   while(fgets(str,sizeof(str),affix)){
+       if(!STRNCASECMP(str,"suffixes")){
+           suffixes=1;
+           prefixes=0;
+           continue;
+       }
+       if(!STRNCASECMP(str,"prefixes")){
+           suffixes=0;
+           prefixes=1;
+           continue;
+       }
+       if(!STRNCASECMP(str,"flag ")){
+           s=str+5;
+           while(strchr("* ",*s))
+               s++;
+           flag=*s;
+           continue;
+       }
+       if((!suffixes)&&(!prefixes))continue;
+       if((s=strchr(str,'#')))*s=0;
+       if(!*str)continue;
+       strlower(str);
+       strcpy(mask,"");
+       strcpy(find,"");
+       strcpy(repl,"");
+       i=sscanf(str,"%[^>\n]>%[^,\n],%[^\n]",mask,find,repl);
+       remove_spaces(str,repl);strcpy(repl,str);
+       remove_spaces(str,find);strcpy(find,str);
+       remove_spaces(str,mask);strcpy(mask,str);
+       switch(i){
+           case 3:
+               break;
+           case 2:
+               if(*find != '\0'){
+                   strcpy(repl,find);
+                   strcpy(find,"");
+               }
+               break;
+           default:
+               continue;
+       }
+       
+       AddAffix(Conf,(int)flag,mask,find,repl,suffixes?'s':'p');
+       
+   }
+   fclose(affix);
+       
+   return(0);
+}
+
+void 
+SortDictionary(IspellDict * Conf){
+  int  CurLet = -1, Let;size_t i;
+
+        qsort((void*)Conf->Spell,Conf->nspell,sizeof(SPELL),cmpspell);
+
+   for(i = 0; i < 256 ; i++ )
+       Conf->SpellTree.Left[i] = -1;
+
+   for(i = 0; i < Conf->nspell; i++) {
+     Let = (int)(*(Conf->Spell[i].word)) & 255;
+     if (CurLet != Let) {
+       Conf->SpellTree.Left[Let] = i;
+       CurLet = Let;
+     }
+     Conf->SpellTree.Right[Let] = i;
+   }
+}
+
+void 
+SortAffixes(IspellDict * Conf) {
+  int   CurLetP = -1, CurLetS = -1, Let;
+  AFFIX *Affix; size_t i;
+  
+  if (Conf->naffixes > 1)
+    qsort((void*)Conf->Affix,Conf->naffixes,sizeof(AFFIX),cmpaffix);
+  for(i = 0; i < 256; i++) {
+      Conf->PrefixTree.Left[i] = Conf->PrefixTree.Right[i] = -1;
+      Conf->SuffixTree.Left[i] = Conf->SuffixTree.Right[i] = -1;
+  }
+
+  for(i = 0; i < Conf->naffixes; i++) {
+    Affix = &(((AFFIX*)Conf->Affix)[i]);
+    if(Affix->type == 'p') {
+      Let = (int)(*(Affix->repl)) & 255;
+      if (CurLetP != Let) {
+   Conf->PrefixTree.Left[Let] = i;
+   CurLetP = Let;
+      }
+      Conf->PrefixTree.Right[Let] = i;
+    } else {
+      Let = (Affix->replen) ? (int)(Affix->repl[Affix->replen-1]) & 255 : 0;
+      if (CurLetS != Let) {
+   Conf->SuffixTree.Left[Let] = i;
+   CurLetS = Let;
+      }
+      Conf->SuffixTree.Right[Let] = i;
+    }
+  }
+}
+
+static char * 
+CheckSuffix(const char *word, size_t len, AFFIX *Affix, int *res, IspellDict *Conf) {
+  regmatch_t subs[2]; /* workaround for apache&linux */
+  char newword[2*MAXNORMLEN] = "";
+  int err;
+  
+  *res = strbncmp(word, Affix->repl, Affix->replen);
+  if (*res < 0) {
+    return NULL;
+  }
+  if (*res > 0) {
+    return NULL;
+  }
+  strcpy(newword, word);
+  strcpy(newword+len-Affix->replen, Affix->find);
+
+  if (Affix->compile) {
+    err = regcomp(&(Affix->reg),Affix->mask,REG_EXTENDED|REG_ICASE|REG_NOSUB);
+    if(err){
+      /*regerror(err, &(Affix->reg), regerrstr, ERRSTRSIZE);*/
+      regfree(&(Affix->reg));
+      return(NULL);
+    }
+    Affix->compile = 0;
+  }
+  if(!(err=regexec(&(Affix->reg),newword,1,subs,0))){
+    if(FindWord(Conf, newword, Affix->flag))
+   return pstrdup(newword);    
+  }
+  return NULL;
+}
+
+#define NS 1
+#define MAX_NORM 512
+static int 
+CheckPrefix(const char *word, size_t len, AFFIX *Affix, IspellDict *Conf, int pi,
+       char **forms, char ***cur ) {
+  regmatch_t subs[NS*2];
+  char newword[2*MAXNORMLEN] = "";
+  int err, ls, res, lres;
+  size_t newlen;
+  AFFIX *CAffix = Conf->Affix;
+  
+  res = strncmp(word, Affix->repl, Affix->replen);
+  if (res != 0) {
+    return res;
+  }
+  strcpy(newword, Affix->find);
+  strcat(newword, word+Affix->replen);
+
+  if (Affix->compile) {
+    err = regcomp(&(Affix->reg),Affix->mask,REG_EXTENDED|REG_ICASE|REG_NOSUB);
+    if(err){
+      /*regerror(err, &(Affix->reg), regerrstr, ERRSTRSIZE);*/
+      regfree(&(Affix->reg));
+      return (0);
+    }
+    Affix->compile = 0;
+  }
+  if(!(err=regexec(&(Affix->reg),newword,1,subs,0))){
+    SPELL * curspell;
+
+    if((curspell=FindWord(Conf, newword, Affix->flag))){
+      if ((*cur - forms) < (MAX_NORM-1)) {
+   **cur =  pstrdup(newword);
+   (*cur)++; **cur = NULL;
+      }
+    } 
+    newlen = strlen(newword);
+    ls = Conf->SuffixTree.Left[pi];
+      if ( ls>=0 && ((*cur - forms) < (MAX_NORM-1)) ) {
+   **cur = CheckSuffix(newword, newlen, &CAffix[ls], &lres, Conf);
+   if (**cur) {
+     (*cur)++; **cur = NULL;
+   }
+      }
+  }
+  return 0;
+}
+
+
+char ** 
+NormalizeWord(IspellDict * Conf,char *word){
+/*regmatch_t subs[NS];*/
+size_t len;
+char ** forms;
+char **cur;
+AFFIX * Affix;
+int ri, pi, ipi, lp, rp, cp, ls, rs;
+int lres, rres, cres = 0;
+  SPELL *spell;
+
+   len=strlen(word);
+   if (len > MAXNORMLEN)
+       return(NULL);
+
+   strlower(word);
+
+   forms=(char **) palloc(MAX_NORM*sizeof(char **));
+   cur=forms;*cur=NULL;
+
+   ri = (int)(*word) & 255;
+   pi = (int)(word[strlen(word)-1]) & 255;
+   Affix=(AFFIX*)Conf->Affix;
+
+   /* Check that the word itself is normal form */
+   if((spell = FindWord(Conf, word, 0))){
+       *cur=pstrdup(word);
+       cur++;*cur=NULL;
+   }
+
+   /* Find all other NORMAL forms of the 'word' */
+
+   for (ipi = 0; ipi <= pi; ipi += pi) {
+
+       /* check prefix */
+       lp = Conf->PrefixTree.Left[ri];
+       rp = Conf->PrefixTree.Right[ri];
+       while (lp >= 0 && lp <= rp) {
+         cp = (lp + rp) >> 1;
+         cres = 0;
+         if ((cur - forms) < (MAX_NORM-1)) {
+       cres = CheckPrefix(word, len, &Affix[cp], Conf, ipi, forms, &cur);
+         }
+         if ((lp < cp) && ((cur - forms) < (MAX_NORM-1)) ) {
+       lres = CheckPrefix(word, len, &Affix[lp], Conf, ipi, forms, &cur);
+         }
+         if ( (rp > cp) && ((cur - forms) < (MAX_NORM-1)) ) {
+       rres = CheckPrefix(word, len, &Affix[rp], Conf, ipi, forms, &cur);
+         }
+         if (cres < 0) {
+       rp = cp - 1;
+       lp++;
+         } else if (cres > 0) {
+       lp = cp + 1;
+       rp--;
+         } else {
+       lp++;
+       rp--;
+         }
+       }
+
+       /* check suffix */
+       ls = Conf->SuffixTree.Left[ipi];
+       rs = Conf->SuffixTree.Right[ipi];
+       while (ls >= 0 && ls <= rs) {
+         if (  ((cur - forms) < (MAX_NORM-1)) ) {
+       *cur = CheckSuffix(word, len, &Affix[ls], &lres, Conf);
+       if (*cur) {
+         cur++; *cur = NULL;
+       }
+         }
+         if ( (rs > ls) && ((cur - forms) < (MAX_NORM-1)) ) {
+       *cur = CheckSuffix(word, len, &Affix[rs], &rres, Conf);
+       if (*cur) {
+         cur++; *cur = NULL;
+       }
+         }
+         ls++;
+         rs--;
+       } /* end while */
+     
+   } /* for ipi */
+
+   if(cur==forms){
+       pfree(forms);
+       return(NULL);
+   }
+   return(forms);
+}
+
+void 
+FreeIspell (IspellDict *Conf) {
+  int i;
+  AFFIX *Affix = (AFFIX *)Conf->Affix;
+
+  for (i = 0; i < Conf->naffixes; i++) {
+    if (Affix[i].compile == 0) {
+      regfree(&(Affix[i].reg));
+    }
+  }
+  for (i = 0; i < Conf->naffixes; i++) {
+   free( Conf->Spell[i].word );
+  }
+  free(Conf->Affix);
+  free(Conf->Spell);
+  memset( (void*)Conf, 0, sizeof(IspellDict) );
+  return;
+}


diff --git a/contrib/tsearch2/ispell/spell.h b/contrib/tsearch2/ispell/spell.h

new file mode 100644 (file)

index 0000000..3034ca6


--- /dev/null
+++ b/contrib/tsearch2/ispell/spell.h
@@ -0,0 +1,51 @@
+#ifndef __SPELL_H__
+#define __SPELL_H__
+
+#include 
+#include 
+
+typedef struct spell_struct {
+        char * word; 
+        char flag[10];
+} SPELL;
+
+typedef struct aff_struct {   
+        char flag;
+        char type;
+        char mask[33];
+        char find[16];
+        char repl[16];
+        regex_t reg;
+        size_t replen;
+        char compile;
+} AFFIX;
+
+typedef struct Tree_struct {
+        int Left[256], Right[256];
+} Tree_struct;
+
+typedef struct {
+   int maffixes;
+   int naffixes;
+   AFFIX * Affix;
+
+   int nspell;
+   int mspell;
+   SPELL   *Spell;
+   Tree_struct SpellTree;
+   Tree_struct PrefixTree;
+   Tree_struct SuffixTree;
+
+} IspellDict;
+
+char ** NormalizeWord(IspellDict * Conf,char *word);
+int ImportAffixes(IspellDict * Conf, const char *filename);
+int ImportDictionary(IspellDict * Conf,const char *filename);
+
+int  AddSpell(IspellDict * Conf,const char * word,const char *flag);
+int  AddAffix(IspellDict * Conf,int flag,const char *mask,const char *find,const char *repl,int type);
+void SortDictionary(IspellDict * Conf);
+void SortAffixes(IspellDict * Conf);
+void FreeIspell (IspellDict *Conf);
+
+#endif


diff --git a/contrib/tsearch2/prs_dcfg.c b/contrib/tsearch2/prs_dcfg.c

new file mode 100644 (file)

index 0000000..e4b0e8b


--- /dev/null
+++ b/contrib/tsearch2/prs_dcfg.c
@@ -0,0 +1,119 @@
+/* 
+ * Simple config parser 
+ * Teodor Sigaev 
+ */
+#include 
+#include 
+#include 
+
+#include "postgres.h"
+
+#include "dict.h"
+#include "common.h"
+
+#define CS_WAITKEY 0
+#define CS_INKEY   1
+#define CS_WAITEQ  2
+#define CS_WAITVALUE   3
+#define CS_INVALUE 4
+#define CS_IN2VALUE    5
+#define CS_WAITDELIM   6
+#define CS_INESC   7
+#define CS_IN2ESC  8
+
+static char *
+nstrdup(char *ptr, int len) {
+   char *res=palloc(len+1), *cptr;
+   memcpy(res,ptr,len);
+   res[len]='\0';
+   cptr = ptr = res;
+   while(*ptr) {
+       if ( *ptr == '\\' ) 
+           ptr++;
+       *cptr=*ptr; ptr++; cptr++;
+   }
+   *cptr='\0';
+
+   return res;
+}
+
+void
+parse_cfgdict(text *in, Map **m) {
+   Map *mptr;
+   char *ptr=VARDATA(in), *begin=NULL;
+   char num=0;
+   int state=CS_WAITKEY;
+
+   while( ptr-VARDATA(in) < VARSIZE(in) - VARHDRSZ ) {
+       if ( *ptr==',' ) num++;
+       ptr++;
+   }
+
+   *m=mptr=(Map*)palloc( sizeof(Map)*(num+2) );
+   memset(mptr, 0, sizeof(Map)*(num+2) );
+   ptr=VARDATA(in);
+   while( ptr-VARDATA(in) < VARSIZE(in) - VARHDRSZ ) {
+       if (state==CS_WAITKEY) {
+           if (isalpha(*ptr)) {
+               begin=ptr;
+               state=CS_INKEY;
+           } else if ( !isspace(*ptr) )
+               elog(ERROR,"Syntax error in position %d near '%c'", ptr-VARDATA(in), *ptr);
+       } else if (state==CS_INKEY) {
+           if ( isspace(*ptr) ) {
+               mptr->key=nstrdup(begin, ptr-begin);
+               state=CS_WAITEQ;
+           } else if ( *ptr=='=' ) {
+               mptr->key=nstrdup(begin, ptr-begin);
+               state=CS_WAITVALUE;
+           } else if ( !isalpha(*ptr) ) 
+               elog(ERROR,"Syntax error in position %d near '%c'", ptr-VARDATA(in), *ptr);
+       } else if ( state==CS_WAITEQ ) {
+           if ( *ptr=='=' )
+               state=CS_WAITVALUE;
+           else if ( !isspace(*ptr) )
+               elog(ERROR,"Syntax error in position %d near '%c'", ptr-VARDATA(in), *ptr);
+       } else if ( state==CS_WAITVALUE ) {
+           if ( *ptr=='"' ) {
+               begin=ptr+1;
+               state=CS_INVALUE;
+           } else if ( !isspace(*ptr) ) {
+               begin=ptr;
+               state=CS_IN2VALUE;
+           }
+       } else if ( state==CS_INVALUE ) {
+           if ( *ptr=='"' ) {
+               mptr->value = nstrdup(begin, ptr-begin);
+               mptr++;
+               state=CS_WAITDELIM;
+           } else if ( *ptr=='\\' )
+               state=CS_INESC;
+       } else if ( state==CS_IN2VALUE ) {
+           if ( isspace(*ptr) || *ptr==',' ) {
+               mptr->value = nstrdup(begin, ptr-begin);
+               mptr++;
+               state=( *ptr==',' ) ? CS_WAITKEY : CS_WAITDELIM;
+           } else if ( *ptr=='\\' )
+               state=CS_INESC;
+       } else if ( state==CS_WAITDELIM ) {
+           if ( *ptr==',' ) 
+               state=CS_WAITKEY; 
+           else if ( !isspace(*ptr) )
+               elog(ERROR,"Syntax error in position %d near '%c'", ptr-VARDATA(in), *ptr);
+       } else if ( state == CS_INESC ) {
+           state=CS_INVALUE;
+       } else if ( state == CS_IN2ESC ) {
+           state=CS_IN2VALUE;
+       } else 
+           elog(ERROR,"Bad parser state: %d at position %d near '%c'", state, ptr-VARDATA(in), *ptr);
+       ptr++;
+   }
+
+   if (state==CS_IN2VALUE) {
+       mptr->value = nstrdup(begin, ptr-begin);
+       mptr++;
+   } else if ( !(state==CS_WAITDELIM || state==CS_WAITKEY) ) 
+       elog(ERROR,"Unexpected end of line");
+}
+
+


diff --git a/contrib/tsearch2/query.c b/contrib/tsearch2/query.c

new file mode 100644 (file)

index 0000000..8e714f2


--- /dev/null
+++ b/contrib/tsearch2/query.c
@@ -0,0 +1,862 @@
+/*
+ * IO definitions for tsquery and mtsquery. This type
+ * are identical, but for parsing mtsquery used parser for text
+ * and also morphology is used.
+ * Internal structure:
+ * query tree, then string with original value.
+ * Query tree with plain view. It's means that in array of nodes
+ * right child is always next and left position = item+item->left
+ * Teodor Sigaev 
+ */
+#include "postgres.h"
+
+#include 
+#include 
+
+#include "access/gist.h"
+#include "access/itup.h"
+#include "access/rtree.h"
+#include "utils/elog.h"
+#include "utils/palloc.h"
+#include "utils/array.h"
+#include "utils/builtins.h"
+#include "storage/bufpage.h"
+
+#include "ts_cfg.h"
+#include "tsvector.h"
+#include "crc32.h"
+#include "query.h"
+#include "rewrite.h"
+#include "common.h"
+
+
+PG_FUNCTION_INFO_V1(tsquery_in);
+Datum      tsquery_in(PG_FUNCTION_ARGS);
+
+PG_FUNCTION_INFO_V1(tsquery_out);
+Datum      tsquery_out(PG_FUNCTION_ARGS);
+
+PG_FUNCTION_INFO_V1(exectsq);
+Datum      exectsq(PG_FUNCTION_ARGS);
+
+PG_FUNCTION_INFO_V1(rexectsq);
+Datum      rexectsq(PG_FUNCTION_ARGS);
+
+PG_FUNCTION_INFO_V1(tsquerytree);
+Datum      tsquerytree(PG_FUNCTION_ARGS);
+
+PG_FUNCTION_INFO_V1(to_tsquery);
+Datum      to_tsquery(PG_FUNCTION_ARGS);
+
+PG_FUNCTION_INFO_V1(to_tsquery_name);
+Datum      to_tsquery_name(PG_FUNCTION_ARGS);
+
+PG_FUNCTION_INFO_V1(to_tsquery_current);
+Datum      to_tsquery_current(PG_FUNCTION_ARGS);
+
+#define END            0
+#define ERR            1
+#define VAL            2
+#define OPR            3
+#define OPEN       4
+#define CLOSE      5
+#define VALTRUE        6           /* for stop words */
+#define VALFALSE   7
+
+/* parser's states */
+#define WAITOPERAND 1
+#define WAITOPERATOR   2
+
+/*
+ * node of query tree, also used
+ * for storing polish notation in parser
+ */
+typedef struct NODE
+{
+   int2        weight;
+   int2        type;
+   int4        val;
+   int2        distance;
+   int2        length;
+   struct NODE *next;
+}  NODE;
+
+typedef struct
+{
+   char       *buf;
+   int4        state;
+   int4        count;
+   /* reverse polish notation in list (for temprorary usage) */
+   NODE       *str;
+   /* number in str */
+   int4        num;
+
+   /* user-friendly operand */
+   int4        lenop;
+   int4        sumlen;
+   char       *op;
+   char       *curop;
+
+   /* state for value's parser */
+   TI_IN_STATE valstate;
+
+   /* tscfg */
+   int cfg_id;
+}  QPRS_STATE;
+
+static char*
+get_weight(char *buf, int2 *weight) {
+   *weight = 0;
+
+   if ( *buf != ':' )
+       return buf;
+
+   buf++;
+   while( *buf ) {
+       switch(tolower(*buf)) {
+           case 'a': *weight |= 1<<3; break; 
+           case 'b': *weight |= 1<<2; break; 
+           case 'c': *weight |= 1<<1; break; 
+           case 'd': *weight |= 1;    break;
+           default: return buf; 
+       }
+       buf++;
+   }
+   
+   return buf;
+}
+
+/*
+ * get token from query string
+ */
+static int4
+gettoken_query(QPRS_STATE * state, int4 *val, int4 *lenval, char **strval, int2 *weight)
+{
+   while (1)
+   {
+       switch (state->state)
+       {
+           case WAITOPERAND:
+               if (*(state->buf) == '!')
+               {
+                   (state->buf)++;
+                   *val = (int4) '!';
+                   return OPR;
+               }
+               else if (*(state->buf) == '(')
+               {
+                   state->count++;
+                   (state->buf)++;
+                   return OPEN;
+               } else if ( *(state->buf) == ':' ) {
+                   elog(ERROR,"Error at start of operand"); 
+               } else if (*(state->buf) != ' ') {
+                   state->valstate.prsbuf = state->buf;
+                   state->state = WAITOPERATOR;
+                   if (gettoken_tsvector(&(state->valstate)))
+                   {
+                       *strval = state->valstate.word;
+                       *lenval = state->valstate.curpos - state->valstate.word;
+                       state->buf = get_weight(state->valstate.prsbuf, weight);
+                       return VAL;
+                   }
+                   else
+                       elog(ERROR, "No operand");
+               }
+               break;
+           case WAITOPERATOR:
+               if (*(state->buf) == '&' || *(state->buf) == '|')
+               {
+                   state->state = WAITOPERAND;
+                   *val = (int4) *(state->buf);
+                   (state->buf)++;
+                   return OPR;
+               }
+               else if (*(state->buf) == ')')
+               {
+                   (state->buf)++;
+                   state->count--;
+                   return (state->count < 0) ? ERR : CLOSE;
+               }
+               else if (*(state->buf) == '\0')
+                   return (state->count) ? ERR : END;
+               else if (*(state->buf) != ' ')
+                   return ERR;
+               break;
+           default:
+               return ERR;
+               break;
+       }
+       (state->buf)++;
+   }
+   return END;
+}
+
+/*
+ * push new one in polish notation reverse view
+ */
+static void
+pushquery(QPRS_STATE * state, int4 type, int4 val, int4 distance, int4 lenval, int2 weight)
+{
+   NODE       *tmp = (NODE *) palloc(sizeof(NODE));
+
+   tmp->weight = weight;
+   tmp->type = type;
+   tmp->val = val;
+   if (distance >= MAXSTRPOS)
+       elog(ERROR, "Value is too big");
+   if (lenval >= MAXSTRLEN)
+       elog(ERROR, "Operand is too long");
+   tmp->distance = distance;
+   tmp->length = lenval;
+   tmp->next = state->str;
+   state->str = tmp;
+   state->num++;
+}
+
+/*
+ * This function is used for tsquery parsing
+ */
+static void
+pushval_asis(QPRS_STATE * state, int type, char *strval, int lenval, int2 weight)
+{
+   if (lenval >= MAXSTRLEN)
+       elog(ERROR, "Word is too long");
+
+   pushquery(state, type, crc32_sz((uint8 *) strval, lenval),
+             state->curop - state->op, lenval, weight);
+
+   while (state->curop - state->op + lenval + 1 >= state->lenop)
+   {
+       int4        tmp = state->curop - state->op;
+
+       state->lenop *= 2;
+       state->op = (char *) repalloc((void *) state->op, state->lenop);
+       state->curop = state->op + tmp;
+   }
+   memcpy((void *) state->curop, (void *) strval, lenval);
+   state->curop += lenval;
+   *(state->curop) = '\0';
+   state->curop++;
+   state->sumlen += lenval + 1;
+   return;
+}
+
+/*
+ * This function is used for morph parsing
+ */
+static void
+pushval_morph(QPRS_STATE * state, int typeval, char *strval, int lenval, int2 weight)
+{
+   int4        count = 0;
+   PRSTEXT         prs;
+
+   prs.lenwords = 32;
+   prs.curwords = 0;
+   prs.pos = 0;
+   prs.words = (WORD *) palloc(sizeof(WORD) * prs.lenwords);
+
+   parsetext_v2(findcfg(state->cfg_id), &prs, strval, lenval);
+
+   for(count=0;count
+       pushval_asis(state, VAL, prs.words[count].word, prs.words[count].len, weight);
+       pfree( prs.words[count].word );
+       if (count)
+           pushquery(state, OPR, (int4) '&', 0, 0, 0 );
+   }   
+   pfree(prs.words);
+
+   /* XXX */
+   if ( prs.curwords==0 ) 
+       pushval_asis(state, VALTRUE, 0, 0, 0);
+}
+
+#define STACKDEPTH 32
+/*
+ * make polish notaion of query
+ */
+static int4
+makepol(QPRS_STATE * state, void (*pushval) (QPRS_STATE *, int, char *, int, int2))
+{
+   int4        val,
+               type;
+   int4        lenval;
+   char       *strval;
+   int4        stack[STACKDEPTH];
+   int4        lenstack = 0;
+   int2        weight;
+
+   while ((type = gettoken_query(state, &val, &lenval, &strval, &weight)) != END)
+   {
+       switch (type)
+       {
+           case VAL:
+               (*pushval) (state, VAL, strval, lenval, weight);
+               while (lenstack && (stack[lenstack - 1] == (int4) '&' ||
+                                   stack[lenstack - 1] == (int4) '!'))
+               {
+                   lenstack--;
+                   pushquery(state, OPR, stack[lenstack], 0, 0, 0);
+               }
+               break;
+           case OPR:
+               if (lenstack && val == (int4) '|')
+                   pushquery(state, OPR, val, 0, 0, 0);
+               else
+               {
+                   if (lenstack == STACKDEPTH)
+                       elog(ERROR, "Stack too short");
+                   stack[lenstack] = val;
+                   lenstack++;
+               }
+               break;
+           case OPEN:
+               if (makepol(state, pushval) == ERR)
+                   return ERR;
+               if (lenstack && (stack[lenstack - 1] == (int4) '&' ||
+                                stack[lenstack - 1] == (int4) '!'))
+               {
+                   lenstack--;
+                   pushquery(state, OPR, stack[lenstack], 0, 0, 0);
+               }
+               break;
+           case CLOSE:
+               while (lenstack)
+               {
+                   lenstack--;
+                   pushquery(state, OPR, stack[lenstack], 0, 0, 0);
+               };
+               return END;
+               break;
+           case ERR:
+           default:
+               elog(ERROR, "Syntax error");
+               return ERR;
+
+       }
+   }
+   while (lenstack)
+   {
+       lenstack--;
+       pushquery(state, OPR, stack[lenstack], 0, 0, 0);
+   };
+   return END;
+}
+
+typedef struct
+{
+   WordEntry  *arrb;
+   WordEntry  *arre;
+   char       *values;
+   char       *operand;
+}  CHKVAL;
+
+/*
+ * compare 2 string values
+ */
+static int4
+ValCompare(CHKVAL * chkval, WordEntry * ptr, ITEM * item)
+{
+   if (ptr->len == item->length)
+       return strncmp(
+                      &(chkval->values[ptr->pos]),
+                      &(chkval->operand[item->distance]),
+                      item->length);
+
+   return (ptr->len > item->length) ? 1 : -1;
+}
+
+/*
+ * check weight info
+ */
+static bool
+checkclass_str(CHKVAL * chkval, WordEntry * val, ITEM * item) {
+   WordEntryPos *ptr = (WordEntryPos*) (chkval->values+val->pos+SHORTALIGN(val->len)+sizeof(uint16));
+   uint16  len = *( (uint16*) (chkval->values+val->pos+SHORTALIGN(val->len)) );
+   while (len--) {
+       if ( item->weight & ( 1<weight ) )
+           return true;
+       ptr++;
+   }
+   return false; 
+}
+
+/*
+ * is there value 'val' in array or not ?
+ */
+static bool
+checkcondition_str(void *checkval, ITEM * val)
+{
+   WordEntry  *StopLow = ((CHKVAL *) checkval)->arrb;
+   WordEntry  *StopHigh = ((CHKVAL *) checkval)->arre;
+   WordEntry  *StopMiddle;
+   int         difference;
+
+   /* Loop invariant: StopLow <= val < StopHigh */
+
+   while (StopLow < StopHigh)
+   {
+       StopMiddle = StopLow + (StopHigh - StopLow) / 2;
+       difference = ValCompare((CHKVAL *) checkval, StopMiddle, val);
+       if (difference == 0)
+           return ( val->weight && StopMiddle->haspos ) ? 
+               checkclass_str((CHKVAL *) checkval,StopMiddle, val) : true;
+       else if (difference < 0)
+           StopLow = StopMiddle + 1;
+       else
+           StopHigh = StopMiddle;
+   }
+
+   return (false);
+}
+
+/*
+ * check for boolean condition
+ */
+bool
+TS_execute(ITEM * curitem, void *checkval, bool calcnot, bool (*chkcond) (void *checkval, ITEM * val))
+{
+   if (curitem->type == VAL)
+       return (*chkcond) (checkval, curitem);
+   else if (curitem->val == (int4) '!')
+   {
+       return (calcnot) ?
+           ((TS_execute(curitem + 1, checkval, calcnot, chkcond)) ? false : true)
+           : true;
+   }
+   else if (curitem->val == (int4) '&')
+   {
+       if (TS_execute(curitem + curitem->left, checkval, calcnot, chkcond))
+           return TS_execute(curitem + 1, checkval, calcnot, chkcond);
+       else
+           return false;
+   }
+   else
+   {                           /* |-operator */
+       if (TS_execute(curitem + curitem->left, checkval, calcnot, chkcond))
+           return true;
+       else
+           return TS_execute(curitem + 1, checkval, calcnot, chkcond);
+   }
+   return false;
+}
+
+/*
+ * boolean operations
+ */
+Datum
+rexectsq(PG_FUNCTION_ARGS)
+{
+   return DirectFunctionCall2(
+                              exectsq,
+                              PG_GETARG_DATUM(1),
+                              PG_GETARG_DATUM(0)
+       );
+}
+
+Datum
+exectsq(PG_FUNCTION_ARGS)
+{
+   tsvector       *val = (tsvector *) DatumGetPointer(PG_DETOAST_DATUM(PG_GETARG_DATUM(0)));
+   QUERYTYPE  *query = (QUERYTYPE *) DatumGetPointer(PG_DETOAST_DATUM(PG_GETARG_DATUM(1)));
+   CHKVAL      chkval;
+   bool        result;
+
+   if (!val->size || !query->size)
+   {
+       PG_FREE_IF_COPY(val, 0);
+       PG_FREE_IF_COPY(query, 1);
+       PG_RETURN_BOOL(false);
+   }
+
+   chkval.arrb = ARRPTR(val);
+   chkval.arre = chkval.arrb + val->size;
+   chkval.values = STRPTR(val);
+   chkval.operand = GETOPERAND(query);
+   result = TS_execute(
+                    GETQUERY(query),
+                    &chkval,
+                    true,
+                    checkcondition_str
+       );
+
+   PG_FREE_IF_COPY(val, 0);
+   PG_FREE_IF_COPY(query, 1);
+   PG_RETURN_BOOL(result);
+}
+
+/*
+ * find left operand in polish notation view
+ */
+static void
+findoprnd(ITEM * ptr, int4 *pos)
+{
+#ifdef BS_DEBUG
+   elog(DEBUG3, (ptr[*pos].type == OPR) ?
+        "%d  %c" : "%d  %d ", *pos, ptr[*pos].val);
+#endif
+   if (ptr[*pos].type == VAL || ptr[*pos].type == VALTRUE)
+   {
+       ptr[*pos].left = 0;
+       (*pos)++;
+   }
+   else if (ptr[*pos].val == (int4) '!')
+   {
+       ptr[*pos].left = 1;
+       (*pos)++;
+       findoprnd(ptr, pos);
+   }
+   else
+   {
+       ITEM       *curitem = &ptr[*pos];
+       int4        tmp = *pos;
+
+       (*pos)++;
+       findoprnd(ptr, pos);
+       curitem->left = *pos - tmp;
+       findoprnd(ptr, pos);
+   }
+}
+
+
+/*
+ * input
+ */
+static QUERYTYPE *
+queryin(char *buf, void (*pushval) (QPRS_STATE *, int, char *, int, int2), int cfg_id)
+{
+   QPRS_STATE  state;
+   int4        i;
+   QUERYTYPE  *query;
+   int4        commonlen;
+   ITEM       *ptr;
+   NODE       *tmp;
+   int4        pos = 0;
+
+#ifdef BS_DEBUG
+   char        pbuf[16384],
+              *cur;
+#endif
+
+   /* init state */
+   state.buf = buf;
+   state.state = WAITOPERAND;
+   state.count = 0;
+   state.num = 0;
+   state.str = NULL;
+   state.cfg_id=cfg_id;
+
+   /* init value parser's state */
+   state.valstate.oprisdelim = true;
+   state.valstate.len = 32;
+   state.valstate.word = (char *) palloc(state.valstate.len);
+
+   /* init list of operand */
+   state.sumlen = 0;
+   state.lenop = 64;
+   state.curop = state.op = (char *) palloc(state.lenop);
+   *(state.curop) = '\0';
+
+   /* parse query & make polish notation (postfix, but in reverse order) */
+   makepol(&state, pushval);
+   pfree(state.valstate.word);
+   if (!state.num)
+       elog(ERROR, "Empty query");
+
+   /* make finish struct */
+   commonlen = COMPUTESIZE(state.num, state.sumlen);
+   query = (QUERYTYPE *) palloc(commonlen);
+   query->len = commonlen;
+   query->size = state.num;
+   ptr = GETQUERY(query);
+
+   /* set item in polish notation */
+   for (i = 0; i < state.num; i++)
+   {
+       ptr[i].weight = state.str->weight;
+       ptr[i].type = state.str->type;
+       ptr[i].val = state.str->val;
+       ptr[i].distance = state.str->distance;
+       ptr[i].length = state.str->length;
+       tmp = state.str->next;
+       pfree(state.str);
+       state.str = tmp;
+   }
+
+   /* set user friendly-operand view */
+   memcpy((void *) GETOPERAND(query), (void *) state.op, state.sumlen);
+   pfree(state.op);
+
+   /* set left operand's position for every operator */
+   pos = 0;
+   findoprnd(ptr, &pos);
+
+#ifdef BS_DEBUG
+   cur = pbuf;
+   *cur = '\0';
+   for (i = 0; i < query->size; i++)
+   {
+       if (ptr[i].type == OPR)
+           sprintf(cur, "%c(%d) ", ptr[i].val, ptr[i].left);
+       else
+           sprintf(cur, "%d(%s) ", ptr[i].val, GETOPERAND(query) + ptr[i].distance);
+       cur = strchr(cur, '\0');
+   }
+   elog(DEBUG3, "POR: %s", pbuf);
+#endif
+
+   return query;
+}
+
+/*
+ * in without morphology
+ */
+Datum
+tsquery_in(PG_FUNCTION_ARGS)
+{
+   PG_RETURN_POINTER(queryin((char *) PG_GETARG_POINTER(0), pushval_asis, 0));
+}
+
+/*
+ * out function
+ */
+typedef struct
+{
+   ITEM       *curpol;
+   char       *buf;
+   char       *cur;
+   char       *op;
+   int4        buflen;
+}  INFIX;
+
+#define RESIZEBUF(inf,addsize) \
+while( ( inf->cur - inf->buf ) + addsize + 1 >= inf->buflen ) \
+{ \
+   int4 len = inf->cur - inf->buf; \
+   inf->buflen *= 2; \
+   inf->buf = (char*) repalloc( (void*)inf->buf, inf->buflen ); \
+   inf->cur = inf->buf + len; \
+}
+
+/*
+ * recursive walk on tree and print it in
+ * infix (human-readable) view
+ */
+static void
+infix(INFIX * in, bool first)
+{
+   if (in->curpol->type == VAL)
+   {
+       char       *op = in->op + in->curpol->distance;
+
+       RESIZEBUF(in, in->curpol->length * 2 + 2 + 5);
+       *(in->cur) = '\'';
+       in->cur++;
+       while (*op)
+       {
+           if (*op == '\'')
+           {
+               *(in->cur) = '\\';
+               in->cur++;
+           }
+           *(in->cur) = *op;
+           op++;
+           in->cur++;
+       }
+       *(in->cur) = '\'';
+       in->cur++;
+       if ( in->curpol->weight ) {
+           *(in->cur) = ':'; in->cur++;
+           if ( in->curpol->weight & (1<<3) ) { *(in->cur) = 'A'; in->cur++; }
+           if ( in->curpol->weight & (1<<2) ) { *(in->cur) = 'B'; in->cur++; }
+           if ( in->curpol->weight & (1<<1) ) { *(in->cur) = 'C'; in->cur++; }
+           if ( in->curpol->weight & 1 )      { *(in->cur) = 'D'; in->cur++; }
+       }
+       *(in->cur) = '\0';
+       in->curpol++;
+   }
+   else if (in->curpol->val == (int4) '!')
+   {
+       bool        isopr = false;
+
+       RESIZEBUF(in, 1);
+       *(in->cur) = '!';
+       in->cur++;
+       *(in->cur) = '\0';
+       in->curpol++;
+       if (in->curpol->type == OPR)
+       {
+           isopr = true;
+           RESIZEBUF(in, 2);
+           sprintf(in->cur, "( ");
+           in->cur = strchr(in->cur, '\0');
+       }
+       infix(in, isopr);
+       if (isopr)
+       {
+           RESIZEBUF(in, 2);
+           sprintf(in->cur, " )");
+           in->cur = strchr(in->cur, '\0');
+       }
+   }
+   else
+   {
+       int4        op = in->curpol->val;
+       INFIX       nrm;
+
+       in->curpol++;
+       if (op == (int4) '|' && !first)
+       {
+           RESIZEBUF(in, 2);
+           sprintf(in->cur, "( ");
+           in->cur = strchr(in->cur, '\0');
+       }
+
+       nrm.curpol = in->curpol;
+       nrm.op = in->op;
+       nrm.buflen = 16;
+       nrm.cur = nrm.buf = (char *) palloc(sizeof(char) * nrm.buflen);
+
+       /* get right operand */
+       infix(&nrm, false);
+
+       /* get & print left operand */
+       in->curpol = nrm.curpol;
+       infix(in, false);
+
+       /* print operator & right operand */
+       RESIZEBUF(in, 3 + (nrm.cur - nrm.buf));
+       sprintf(in->cur, " %c %s", op, nrm.buf);
+       in->cur = strchr(in->cur, '\0');
+       pfree(nrm.buf);
+
+       if (op == (int4) '|' && !first)
+       {
+           RESIZEBUF(in, 2);
+           sprintf(in->cur, " )");
+           in->cur = strchr(in->cur, '\0');
+       }
+   }
+}
+
+
+Datum
+tsquery_out(PG_FUNCTION_ARGS)
+{
+   QUERYTYPE  *query = (QUERYTYPE *) DatumGetPointer(PG_DETOAST_DATUM(PG_GETARG_DATUM(0)));
+   INFIX       nrm;
+
+   if (query->size == 0)
+   {
+       char       *b = palloc(1);
+
+       *b = '\0';
+       PG_RETURN_POINTER(b);
+   }
+   nrm.curpol = GETQUERY(query);
+   nrm.buflen = 32;
+   nrm.cur = nrm.buf = (char *) palloc(sizeof(char) * nrm.buflen);
+   *(nrm.cur) = '\0';
+   nrm.op = GETOPERAND(query);
+   infix(&nrm, true);
+
+   PG_FREE_IF_COPY(query, 0);
+   PG_RETURN_POINTER(nrm.buf);
+}
+
+/*
+ * debug function, used only for view query
+ * which will be executed in non-leaf pages in index
+ */
+Datum
+tsquerytree(PG_FUNCTION_ARGS)
+{
+   QUERYTYPE  *query = (QUERYTYPE *) DatumGetPointer(PG_DETOAST_DATUM(PG_GETARG_DATUM(0)));
+   INFIX       nrm;
+   text       *res;
+   ITEM       *q;
+   int4        len;
+
+
+   if (query->size == 0)
+   {
+       res = (text *) palloc(VARHDRSZ);
+       VARATT_SIZEP(res) = VARHDRSZ;
+       PG_RETURN_POINTER(res);
+   }
+
+   q = clean_NOT_v2(GETQUERY(query), &len);
+
+   if (!q)
+   {
+       res = (text *) palloc(1 + VARHDRSZ);
+       VARATT_SIZEP(res) = 1 + VARHDRSZ;
+       *((char *) VARDATA(res)) = 'T';
+   }
+   else
+   {
+       nrm.curpol = q;
+       nrm.buflen = 32;
+       nrm.cur = nrm.buf = (char *) palloc(sizeof(char) * nrm.buflen);
+       *(nrm.cur) = '\0';
+       nrm.op = GETOPERAND(query);
+       infix(&nrm, true);
+
+       res = (text *) palloc(nrm.cur - nrm.buf + VARHDRSZ);
+       VARATT_SIZEP(res) = nrm.cur - nrm.buf + VARHDRSZ;
+       strncpy(VARDATA(res), nrm.buf, nrm.cur - nrm.buf);
+       pfree(q);
+   }
+
+   PG_FREE_IF_COPY(query, 0);
+
+   PG_RETURN_POINTER(res);
+}
+
+Datum
+to_tsquery(PG_FUNCTION_ARGS) {
+   text    *in = PG_GETARG_TEXT_P(1);
+   char *str;
+   QUERYTYPE  *query;
+   ITEM       *res;
+   int4        len;
+
+   str=text2char(in);
+   PG_FREE_IF_COPY(in,1);
+
+   query = queryin(str, pushval_morph, PG_GETARG_INT32(0));
+   res = clean_fakeval_v2(GETQUERY(query), &len);
+   if (!res)
+   {
+       query->len = HDRSIZEQT;
+       query->size = 0;
+       PG_RETURN_POINTER(query);
+   }
+   memcpy((void *) GETQUERY(query), (void *) res, len * sizeof(ITEM));
+   pfree(res);
+   PG_RETURN_POINTER(query);
+}
+
+Datum
+to_tsquery_name(PG_FUNCTION_ARGS) {
+   text *name=PG_GETARG_TEXT_P(0);
+   Datum res= DirectFunctionCall2(
+       to_tsquery,
+       Int32GetDatum( name2id_cfg(name) ),
+       PG_GETARG_DATUM(1)
+   );
+   
+   PG_FREE_IF_COPY(name,1);
+   PG_RETURN_DATUM(res);
+}
+
+Datum
+to_tsquery_current(PG_FUNCTION_ARGS) {
+   PG_RETURN_DATUM( DirectFunctionCall2(
+       to_tsquery,
+       Int32GetDatum( get_currcfg() ),
+       PG_GETARG_DATUM(0)
+   ));
+}
+
+


diff --git a/contrib/tsearch2/query.h b/contrib/tsearch2/query.h

new file mode 100644 (file)

index 0000000..c0715a2


--- /dev/null
+++ b/contrib/tsearch2/query.h
@@ -0,0 +1,55 @@
+#ifndef __QUERY_H__
+#define __QUERY_H__
+/*
+#define BS_DEBUG
+*/
+
+
+/*
+ * item in polish notation with back link
+ * to left operand
+ */
+typedef struct ITEM
+{
+   int8        type;
+   int8        weight;
+   int2        left;
+   int4        val;
+   /* user-friendly value, must correlate with WordEntry */
+   uint32  
+       unused:1,
+       length:11,
+       distance:20;
+}  ITEM;
+
+/*
+ *Storage:
+ * (len)(size)(array of ITEM)(array of operand in user-friendly form)
+ */
+typedef struct
+{
+   int4        len;
+   int4        size;
+   char        data[1];
+}  QUERYTYPE;
+
+#define HDRSIZEQT  ( 2*sizeof(int4) )
+#define COMPUTESIZE(size,lenofoperand) ( HDRSIZEQT + size * sizeof(ITEM) + lenofoperand )
+#define GETQUERY(x)  (ITEM*)( (char*)(x)+HDRSIZEQT )
+#define GETOPERAND(x)  ( (char*)GETQUERY(x) + ((QUERYTYPE*)x)->size * sizeof(ITEM) )
+
+#define ISOPERATOR(x) ( (x)=='!' || (x)=='&' || (x)=='|' || (x)=='(' || (x)==')' )
+
+#define END                0
+#define ERR                1
+#define VAL                2
+#define OPR                3
+#define OPEN           4
+#define CLOSE          5
+#define VALTRUE            6       /* for stop words */
+#define VALFALSE       7
+
+bool TS_execute(ITEM * curitem, void *checkval,
+       bool calcnot, bool (*chkcond) (void *checkval, ITEM * val));
+
+#endif


diff --git a/contrib/tsearch2/rank.c b/contrib/tsearch2/rank.c

new file mode 100644 (file)

index 0000000..b73f400


--- /dev/null
+++ b/contrib/tsearch2/rank.c
@@ -0,0 +1,591 @@
+/*
+ * Relevation
+ * Teodor Sigaev 
+ */
+#include "postgres.h"
+#include 
+
+#include "access/gist.h"
+#include "access/itup.h"
+#include "utils/elog.h"
+#include "utils/palloc.h"
+#include "utils/builtins.h"
+#include "fmgr.h"
+#include "funcapi.h"
+#include "storage/bufpage.h"
+#include "executor/spi.h"
+#include "commands/trigger.h"
+#include "nodes/pg_list.h"
+#include "catalog/namespace.h"
+
+#include "utils/array.h"
+
+#include "tsvector.h"
+#include "query.h"
+#include "common.h"
+
+PG_FUNCTION_INFO_V1(rank);
+Datum      rank(PG_FUNCTION_ARGS);
+
+PG_FUNCTION_INFO_V1(rank_def);
+Datum      rank_def(PG_FUNCTION_ARGS);
+
+PG_FUNCTION_INFO_V1(rank_cd);
+Datum      rank_cd(PG_FUNCTION_ARGS);
+
+PG_FUNCTION_INFO_V1(rank_cd_def);
+Datum      rank_cd_def(PG_FUNCTION_ARGS);
+
+PG_FUNCTION_INFO_V1(get_covers);
+Datum      get_covers(PG_FUNCTION_ARGS);
+
+static float weights[]={0.1, 0.2, 0.4, 1.0};
+
+#define wpos(wep)  ( w[ ((WordEntryPos*)(wep))->weight ] )
+
+#define DEF_NORM_METHOD    0
+
+/*
+ * Returns a weight of a word collocation
+ */
+static float4 word_distance ( int4 w ) {
+   if ( w>100 )
+   return 1e-30;
+
+   return 1.0/(1.005+0.05*exp( ((float4)w)/1.5-2) );
+}
+
+static int
+cnt_length( tsvector *t ) {
+   WordEntry   *ptr=ARRPTR(t), *end=(WordEntry*)STRPTR(t);
+   int len = 0, clen;
+
+   while(ptr < end) {
+       if ( (clen=POSDATALEN(t, ptr)) == 0 )
+           len += 1;
+       else
+           len += clen;
+       ptr++;
+   }
+
+   return len;
+}
+
+static int4
+WordECompareITEM(char *eval, char *qval, WordEntry * ptr, ITEM * item) {
+        if (ptr->len == item->length)
+                return strncmp(
+                                           eval + ptr->pos,
+                                           qval + item->distance,
+                                           item->length);
+
+        return (ptr->len > item->length) ? 1 : -1;
+}
+
+static WordEntry*
+find_wordentry(tsvector *t, QUERYTYPE *q, ITEM *item) {
+        WordEntry  *StopLow = ARRPTR(t);
+        WordEntry  *StopHigh = (WordEntry*)STRPTR(t);
+        WordEntry  *StopMiddle;
+        int                     difference;
+
+        /* Loop invariant: StopLow <= item < StopHigh */
+
+        while (StopLow < StopHigh)
+        {
+                StopMiddle = StopLow + (StopHigh - StopLow) / 2;
+                difference = WordECompareITEM(STRPTR(t), GETOPERAND(q), StopMiddle, item);
+                if (difference == 0)
+                        return StopMiddle;
+                else if (difference < 0)
+                        StopLow = StopMiddle + 1;
+                else
+                        StopHigh = StopMiddle;
+        }
+
+        return NULL;
+}
+
+static WordEntryPos    POSNULL[]={
+   {0,0},
+   {0,MAXENTRYPOS-1}
+};
+
+static float
+calc_rank_and(float *w, tsvector *t, QUERYTYPE *q) {
+   uint16 **pos=(uint16**)palloc(sizeof(uint16*) * q->size);
+   int i,k,l,p;
+   WordEntry *entry;
+   WordEntryPos    *post,*ct;
+   int4    dimt,lenct,dist;
+   float res=-1.0;
+   ITEM    *item=GETQUERY(q);
+
+   memset(pos,0,sizeof(uint16**) * q->size);
+   *(uint16*)POSNULL = lengthof(POSNULL)-1;
+
+   for(i=0; isize; i++) {
+       
+       if ( item[i].type != VAL )
+           continue;
+
+       entry=find_wordentry(t,q,&(item[i]));
+       if ( !entry )
+           continue;
+
+       if ( entry->haspos )
+           pos[i] = (uint16*)_POSDATAPTR(t,entry);
+       else
+           pos[i] = (uint16*)POSNULL;
+
+
+       dimt = *(uint16*)(pos[i]);
+       post = (WordEntryPos*)(pos[i]+1);
+       for( k=0; k
+           if ( !pos[k] ) continue;
+           lenct = *(uint16*)(pos[k]);
+           ct = (WordEntryPos*)(pos[k]+1);
+           for(l=0; l
+               for(p=0; p
+                   dist = abs( post[l].pos - ct[p].pos );
+                   if ( dist || (dist==0 && (pos[i]==(uint16*)POSNULL || pos[k]==(uint16*)POSNULL) ) ) {
+                       float curw; 
+                       if ( !dist ) dist=MAXENTRYPOS;  
+                       curw= sqrt( wpos(&(post[l])) * wpos( &(ct[p]) ) * word_distance(dist) );
+                       res = ( res < 0 ) ? curw : 1.0 - ( 1.0 - res ) * ( 1.0 - curw );
+                   }
+               }
+           }
+       }
+   }
+   pfree(pos);
+   return res; 
+}
+
+static float
+calc_rank_or(float *w, tsvector *t, QUERYTYPE *q) {
+   WordEntry *entry;
+   WordEntryPos    *post;
+   int4    dimt,j,i;
+   float res=-1.0;
+   ITEM    *item=GETQUERY(q);
+
+   *(uint16*)POSNULL = lengthof(POSNULL)-1;
+
+   for(i=0; isize; i++) {
+       if ( item[i].type != VAL )
+           continue;
+
+       entry=find_wordentry(t,q,&(item[i]));
+       if ( !entry )
+           continue;
+
+       if ( entry->haspos ) {
+           dimt = POSDATALEN(t,entry);
+           post = POSDATAPTR(t,entry);
+       } else {
+           dimt = *(uint16*)POSNULL;
+           post = POSNULL+1;
+       }
+
+       for(j=0;j
+           if ( res < 0 )
+               res = wpos( &(post[j]) );
+           else
+               res = 1.0 - ( 1.0-res ) * ( 1.0-wpos( &(post[j]) ) );
+       }
+   }
+   return res;
+}
+
+static float
+calc_rank(float *w, tsvector *t, QUERYTYPE *q, int4 method) {
+   ITEM *item = GETQUERY(q);
+   float res=0.0;
+
+   if (!t->size || !q->size)
+       return 0.0;
+
+   res = ( item->type != VAL && item->val == (int4) '&' ) ?
+       calc_rank_and(w,t,q) : calc_rank_or(w,t,q);
+
+   if ( res < 0 )
+       res = 1e-20;
+
+        switch(method) {
+       case 0: break;
+       case 1: res /= log((float)cnt_length(t)); break;
+       case 2: res /= (float)cnt_length(t); break;
+       default:
+       elog(ERROR,"Unknown normalization method: %d",method);
+        }
+
+   return res;
+}
+
+Datum
+rank(PG_FUNCTION_ARGS) {
+   ArrayType  *win = (ArrayType *) PG_DETOAST_DATUM(PG_GETARG_DATUM(0));
+   tsvector       *txt = (tsvector *) PG_DETOAST_DATUM(PG_GETARG_DATUM(1));
+   QUERYTYPE  *query = (QUERYTYPE *) PG_DETOAST_DATUM(PG_GETARG_DATUM(2));
+   int method=DEF_NORM_METHOD;
+   float res=0.0;
+   float ws[ lengthof(weights) ];
+   int i;
+
+   if ( ARR_NDIM(win) != 1 ) 
+       elog(ERROR,"Array of weight is not one dimentional");
+   if ( ARRNELEMS(win) < lengthof(weights) )
+        elog(ERROR,"Array of weight is too short");
+
+   for(i=0;i
+       ws[ i ] = ( ((float4*)ARR_DATA_PTR(win))[i] >= 0 ) ? ((float4*)ARR_DATA_PTR(win))[i] : weights[i];
+       if ( ws[ i ] > 1.0 ) 
+           elog(ERROR,"Weight out of range");
+   } 
+
+   if ( PG_NARGS() == 4 )
+       method=PG_GETARG_INT32(3);
+
+   res=calc_rank(ws, txt, query, method); 
+       
+   PG_FREE_IF_COPY(win, 0);
+   PG_FREE_IF_COPY(txt, 1);
+   PG_FREE_IF_COPY(query, 2);
+   PG_RETURN_FLOAT4(res);
+}
+
+Datum
+rank_def(PG_FUNCTION_ARGS) {
+   tsvector       *txt = (tsvector *) PG_DETOAST_DATUM(PG_GETARG_DATUM(0));
+   QUERYTYPE  *query = (QUERYTYPE *) PG_DETOAST_DATUM(PG_GETARG_DATUM(1));
+   float res=0.0;
+   int method=DEF_NORM_METHOD;
+
+   if ( PG_NARGS() == 3 )
+       method=PG_GETARG_INT32(2);
+
+   res=calc_rank(weights, txt, query, method); 
+       
+   PG_FREE_IF_COPY(txt, 0);
+   PG_FREE_IF_COPY(query, 1);
+   PG_RETURN_FLOAT4(res);
+}
+
+
+typedef struct {
+   ITEM    *item;
+   int32   pos;
+} DocRepresentation;
+
+static int
+compareDocR(const void *a, const void *b) {
+   if ( ((DocRepresentation *) a)->pos == ((DocRepresentation *) b)->pos )
+       return 1;
+   return ( ((DocRepresentation *) a)->pos > ((DocRepresentation *) b)->pos ) ? 1 : -1;
+}
+
+
+typedef struct {
+   DocRepresentation *doc;
+   int len;
+}  ChkDocR;
+
+static bool
+checkcondition_DR(void *checkval, ITEM *val) {
+   DocRepresentation *ptr = ((ChkDocR*)checkval)->doc;
+
+   while( ptr - ((ChkDocR*)checkval)->doc < ((ChkDocR*)checkval)->len ) {
+       if ( val == ptr->item )
+           return true;
+       ptr++;
+   }   
+
+   return false;
+}
+
+
+static bool
+Cover(DocRepresentation *doc, int len, QUERYTYPE *query, int *pos, int *p, int *q) {
+   int i;
+   DocRepresentation   *ptr,*f=(DocRepresentation*)0xffffffff;
+   ITEM    *item=GETQUERY(query);
+   int lastpos=*pos;
+   int oldq=*q;
+
+   *p=0x7fffffff;
+   *q=0;
+
+   for(i=0; isize; i++) {
+       if ( item->type != VAL ) {
+           item++;
+           continue;
+       }
+       ptr = doc + *pos;
+
+       while(ptr-doc
+           if ( ptr->item == item ) {
+               if ( ptr->pos > *q ) {
+                   *q = ptr->pos;
+                   lastpos= ptr - doc;
+               } 
+               break;
+           } 
+           ptr++;
+       }
+
+       item++;
+   }
+
+   if (*q==0 )
+       return false;
+
+   if (*q==oldq) { /* already check this pos */
+       (*pos)++;
+       return Cover(doc, len, query, pos,p,q);
+   } 
+
+   item=GETQUERY(query);
+   for(i=0; isize; i++) {
+       if ( item->type != VAL ) {
+           item++;
+           continue;
+       }
+       ptr = doc + lastpos;
+
+       while(ptr>=doc+*pos) {
+           if ( ptr->item == item ) {
+               if ( ptr->pos < *p ) {
+                   *p = ptr->pos;
+                   f=ptr;
+               }
+               break;
+           }
+           ptr--;
+       }
+       item++;
+   }
+ 
+   if ( *p<=*q ) {
+       ChkDocR ch = { f, (doc + lastpos)-f+1 };
+       *pos = f-doc+1;
+       if ( TS_execute(GETQUERY(query), &ch, false, checkcondition_DR) ) { 
+ /*elog(NOTICE,"OP:%d NP:%d P:%d Q:%d", *pos, lastpos, *p, *q);*/ 
+           return true;
+       } else
+           return Cover(doc, len, query, pos,p,q); 
+   }
+ 
+   return false;
+}
+
+static DocRepresentation*
+get_docrep(tsvector     *txt, QUERYTYPE  *query, int *doclen) {
+   ITEM    *item=GETQUERY(query);
+   WordEntry *entry;
+   WordEntryPos    *post;
+   int4    dimt,j,i;
+   int len=query->size*4,cur=0;
+   DocRepresentation *doc;
+
+   *(uint16*)POSNULL = lengthof(POSNULL)-1;
+   doc = (DocRepresentation*)palloc(sizeof(DocRepresentation)*len);
+   for(i=0; isize; i++) {
+       if ( item[i].type != VAL )
+           continue;
+
+       entry=find_wordentry(txt,query,&(item[i]));
+       if ( !entry )
+           continue;
+
+       if ( entry->haspos ) {
+           dimt = POSDATALEN(txt,entry);
+           post = POSDATAPTR(txt,entry);
+       } else {
+           dimt = *(uint16*)POSNULL;
+           post = POSNULL+1;
+       }
+
+       while( cur+dimt >= len ) {
+           len*=2;
+           doc = (DocRepresentation*)repalloc(doc,sizeof(DocRepresentation)*len);
+       }
+
+       for(j=0;j
+           doc[cur].item=&(item[i]);
+           doc[cur].pos=post[j].pos;
+           cur++;
+       }
+   }
+
+   *doclen=cur;
+   
+   if ( cur>0 ) {
+       if ( cur>1 ) 
+           qsort((void *) doc, cur, sizeof(DocRepresentation), compareDocR);
+       return doc;
+   }
+   
+   pfree(doc);
+   return NULL;
+}
+
+
+Datum
+rank_cd(PG_FUNCTION_ARGS) {
+   int K = PG_GETARG_INT32(0);
+   tsvector       *txt = (tsvector *) PG_DETOAST_DATUM(PG_GETARG_DATUM(1));
+   QUERYTYPE  *query = (QUERYTYPE *) PG_DETOAST_DATUM(PG_GETARG_DATUM(2));
+   int method=DEF_NORM_METHOD;
+   DocRepresentation   *doc;
+   float   res=0.0;
+   int p=0,q=0,len,cur;
+
+   doc = get_docrep(txt, query, &len);
+   if ( !doc ) {
+       PG_FREE_IF_COPY(txt, 1);
+       PG_FREE_IF_COPY(query, 2);
+       PG_RETURN_FLOAT4(0.0);
+   }
+
+   cur=0;
+   if (K<=0)
+       K=4;    
+   while( Cover(doc, len, query, &cur, &p, &q) ) 
+       res += ( q-p+1 > K ) ? ((float)K)/((float)(q-p+1)) : 1.0;
+
+   if ( PG_NARGS() == 4 )
+       method=PG_GETARG_INT32(3);
+
+        switch(method) {
+       case 0: break;
+       case 1: res /= log((float)cnt_length(txt)); break;
+       case 2: res /= (float)cnt_length(txt); break;
+       default:
+       elog(ERROR,"Unknown normalization method: %d",method);
+        }
+
+   pfree(doc);
+   PG_FREE_IF_COPY(txt, 1);
+   PG_FREE_IF_COPY(query, 2);
+
+   PG_RETURN_FLOAT4(res);
+}
+
+
+Datum
+rank_cd_def(PG_FUNCTION_ARGS) {
+   PG_RETURN_DATUM( DirectFunctionCall4(   
+       rank_cd,
+       Int32GetDatum(-1),
+       PG_GETARG_DATUM(0),
+       PG_GETARG_DATUM(1),
+       ( PG_NARGS() == 3 ) ? PG_GETARG_DATUM(2) : Int32GetDatum(DEF_NORM_METHOD)
+   )); 
+}
+
+/**************debug*************/
+
+typedef struct {
+   char    *w;
+   int2    len;
+   int2    pos;
+   int2    start;
+   int2    finish;
+} DocWord;
+
+static int
+compareDocWord(const void *a, const void *b) {
+   if ( ((DocWord *) a)->pos == ((DocWord *) b)->pos )
+       return 1;
+   return ( ((DocWord *) a)->pos > ((DocWord *) b)->pos ) ? 1 : -1;
+}
+
+
+Datum 
+get_covers(PG_FUNCTION_ARGS) {
+   tsvector     *txt = (tsvector *) PG_DETOAST_DATUM(PG_GETARG_DATUM(0));
+   QUERYTYPE  *query = (QUERYTYPE *) PG_DETOAST_DATUM(PG_GETARG_DATUM(1));
+   WordEntry       *pptr=ARRPTR(txt);
+   int i,dlen=0,j,cur=0,len=0,rlen;
+   DocWord *dw,*dwptr;
+   text    *out;
+   char *cptr;
+   DocRepresentation *doc;
+   int pos=0,p,q,olddwpos=0;
+   int ncover=1;
+
+   doc = get_docrep(txt, query, &rlen);
+
+   if ( !doc ) {
+       out=palloc(VARHDRSZ);
+       VARATT_SIZEP(out) = VARHDRSZ;
+       PG_FREE_IF_COPY(txt,0);
+       PG_FREE_IF_COPY(query,1);
+       PG_RETURN_POINTER(out);
+   }
+
+   for(i=0;isize;i++) {
+       if (!pptr[i].haspos)
+           elog(ERROR,"No pos info");
+        dlen += POSDATALEN(txt,&(pptr[i]));
+   }
+
+   dwptr=dw=palloc(sizeof(DocWord)*dlen);
+   memset(dw,0,sizeof(DocWord)*dlen);
+
+   for(i=0;isize;i++) {
+       WordEntryPos    *posdata = POSDATAPTR(txt,&(pptr[i]));
+       for(j=0;j
+           dw[cur].w=STRPTR(txt)+pptr[i].pos;  
+           dw[cur].len=pptr[i].len;    
+           dw[cur].pos=posdata[j].pos;
+           cur++;
+       }
+       len+=(pptr[i].len + 1) * (int)POSDATALEN(txt,&(pptr[i]));
+   }
+   qsort((void *) dw, dlen, sizeof(DocWord), compareDocWord);
+
+   while( Cover(doc, rlen, query, &pos, &p, &q) ) {
+       dwptr=dw+olddwpos;
+       while(dwptr->pos < p && dwptr-dw
+           dwptr++;
+       olddwpos=dwptr-dw;
+       dwptr->start=ncover;
+       while(dwptr->pos < q+1 && dwptr-dw
+           dwptr++;
+       (dwptr-1)->finish=ncover;
+       len+= 4 /* {}+two spaces */ + 2*16 /*numbers*/;
+       ncover++; 
+   } 
+   
+   out=palloc(VARHDRSZ+len);
+   cptr=((char*)out)+VARHDRSZ;
+   dwptr=dw;
+
+   while( dwptr-dw < dlen) {
+       if ( dwptr->start ) {
+           sprintf(cptr,"{%d ",dwptr->start);
+           cptr=strchr(cptr,'\0');
+       }
+       memcpy(cptr,dwptr->w,dwptr->len);
+       cptr+=dwptr->len;
+       *cptr=' ';
+       cptr++;
+       if ( dwptr->finish ) { 
+           sprintf(cptr,"}%d ",dwptr->finish);
+           cptr=strchr(cptr,'\0');
+       }
+       dwptr++;
+   }   
+
+   VARATT_SIZEP(out) = cptr - ((char*)out);
+   
+   pfree(dw);
+   pfree(doc);
+
+   PG_FREE_IF_COPY(txt,0);
+   PG_FREE_IF_COPY(query,1);
+   PG_RETURN_POINTER(out);
+}
+


diff --git a/contrib/tsearch2/rewrite.c b/contrib/tsearch2/rewrite.c

new file mode 100644 (file)

index 0000000..d5bc0f6


--- /dev/null
+++ b/contrib/tsearch2/rewrite.c
@@ -0,0 +1,292 @@
+/*
+ * Rewrite routines of query tree
+ * Teodor Sigaev 
+ */
+
+#include "postgres.h"
+
+#include 
+
+#include "access/gist.h"
+#include "access/itup.h"
+#include "access/rtree.h"
+#include "utils/elog.h"
+#include "utils/palloc.h"
+#include "utils/array.h"
+#include "utils/builtins.h"
+#include "storage/bufpage.h"
+
+#include "query.h"
+#include "rewrite.h"
+
+typedef struct NODE
+{
+   struct NODE *left;
+   struct NODE *right;
+   ITEM       *valnode;
+}  NODE;
+
+/*
+ * make query tree from plain view of query
+ */
+static NODE *
+maketree(ITEM * in)
+{
+   NODE       *node = (NODE *) palloc(sizeof(NODE));
+
+   node->valnode = in;
+   node->right = node->left = NULL;
+   if (in->type == OPR)
+   {
+       node->right = maketree(in + 1);
+       if (in->val != (int4) '!')
+           node->left = maketree(in + in->left);
+   }
+   return node;
+}
+
+typedef struct
+{
+   ITEM       *ptr;
+   int4        len;
+   int4        cur;
+}  PLAINTREE;
+
+static void
+plainnode(PLAINTREE * state, NODE * node)
+{
+   if (state->cur == state->len)
+   {
+       state->len *= 2;
+       state->ptr = (ITEM *) repalloc((void *) state->ptr, state->len * sizeof(ITEM));
+   }
+   memcpy((void *) &(state->ptr[state->cur]), (void *) node->valnode, sizeof(ITEM));
+   if (node->valnode->type == VAL)
+       state->cur++;
+   else if (node->valnode->val == (int4) '!')
+   {
+       state->ptr[state->cur].left = 1;
+       state->cur++;
+       plainnode(state, node->right);
+   }
+   else
+   {
+       int4        cur = state->cur;
+
+       state->cur++;
+       plainnode(state, node->right);
+       state->ptr[cur].left = state->cur - cur;
+       plainnode(state, node->left);
+   }
+   pfree(node);
+}
+
+/*
+ * make plain view of tree from 'normal' view of tree
+ */
+static ITEM *
+plaintree(NODE * root, int4 *len)
+{
+   PLAINTREE   pl;
+
+   pl.cur = 0;
+   pl.len = 16;
+   if (root && (root->valnode->type == VAL || root->valnode->type == OPR))
+   {
+       pl.ptr = (ITEM *) palloc(pl.len * sizeof(ITEM));
+       plainnode(&pl, root);
+   }
+   else
+       pl.ptr = NULL;
+   *len = pl.cur;
+   return pl.ptr;
+}
+
+static void
+freetree(NODE * node)
+{
+   if (!node)
+       return;
+   if (node->left)
+       freetree(node->left);
+   if (node->right)
+       freetree(node->right);
+   pfree(node);
+}
+
+/*
+ * clean tree for ! operator.
+ * It's usefull for debug, but in
+ * other case, such view is used with search in index.
+ * Operator ! always return TRUE
+ */
+static NODE *
+clean_NOT_intree(NODE * node)
+{
+   if (node->valnode->type == VAL)
+       return node;
+
+   if (node->valnode->val == (int4) '!')
+   {
+       freetree(node);
+       return NULL;
+   }
+
+   /* operator & or | */
+   if (node->valnode->val == (int4) '|')
+   {
+       if ((node->left = clean_NOT_intree(node->left)) == NULL ||
+           (node->right = clean_NOT_intree(node->right)) == NULL)
+       {
+           freetree(node);
+           return NULL;
+       }
+   }
+   else
+   {
+       NODE       *res = node;
+
+       node->left = clean_NOT_intree(node->left);
+       node->right = clean_NOT_intree(node->right);
+       if (node->left == NULL && node->right == NULL)
+       {
+           pfree(node);
+           res = NULL;
+       }
+       else if (node->left == NULL)
+       {
+           res = node->right;
+           pfree(node);
+       }
+       else if (node->right == NULL)
+       {
+           res = node->left;
+           pfree(node);
+       }
+       return res;
+   }
+   return node;
+}
+
+ITEM *
+clean_NOT_v2(ITEM * ptr, int4 *len)
+{
+   NODE       *root = maketree(ptr);
+
+   return plaintree(clean_NOT_intree(root), len);
+}
+
+#define V_UNKNOWN  0
+#define V_TRUE     1
+#define V_FALSE        2
+
+/*
+ * Clean query tree from values which is always in
+ * text (stopword)
+ */
+static NODE *
+clean_fakeval_intree(NODE * node, char *result)
+{
+   char        lresult = V_UNKNOWN,
+               rresult = V_UNKNOWN;
+
+   if (node->valnode->type == VAL)
+       return node;
+   else if (node->valnode->type == VALTRUE)
+   {
+       pfree(node);
+       *result = V_TRUE;
+       return NULL;
+   }
+
+
+   if (node->valnode->val == (int4) '!')
+   {
+       node->right = clean_fakeval_intree(node->right, &rresult);
+       if (!node->right)
+       {
+           *result = (rresult == V_TRUE) ? V_FALSE : V_TRUE;
+           freetree(node);
+           return NULL;
+       }
+   }
+   else if (node->valnode->val == (int4) '|')
+   {
+       NODE       *res = node;
+
+       node->left = clean_fakeval_intree(node->left, &lresult);
+       node->right = clean_fakeval_intree(node->right, &rresult);
+       if (lresult == V_TRUE || rresult == V_TRUE)
+       {
+           freetree(node);
+           *result = V_TRUE;
+           return NULL;
+       }
+       else if (lresult == V_FALSE && rresult == V_FALSE)
+       {
+           freetree(node);
+           *result = V_FALSE;
+           return NULL;
+       }
+       else if (lresult == V_FALSE)
+       {
+           res = node->right;
+           pfree(node);
+       }
+       else if (rresult == V_FALSE)
+       {
+           res = node->left;
+           pfree(node);
+       }
+       return res;
+   }
+   else
+   {
+       NODE       *res = node;
+
+       node->left = clean_fakeval_intree(node->left, &lresult);
+       node->right = clean_fakeval_intree(node->right, &rresult);
+       if (lresult == V_FALSE || rresult == V_FALSE)
+       {
+           freetree(node);
+           *result = V_FALSE;
+           return NULL;
+       }
+       else if (lresult == V_TRUE && rresult == V_TRUE)
+       {
+           freetree(node);
+           *result = V_TRUE;
+           return NULL;
+       }
+       else if (lresult == V_TRUE)
+       {
+           res = node->right;
+           pfree(node);
+       }
+       else if (rresult == V_TRUE)
+       {
+           res = node->left;
+           pfree(node);
+       }
+       return res;
+   }
+   return node;
+}
+
+ITEM *
+clean_fakeval_v2(ITEM * ptr, int4 *len)
+{
+   NODE       *root = maketree(ptr);
+   char        result = V_UNKNOWN;
+   NODE       *resroot;
+
+   resroot = clean_fakeval_intree(root, &result);
+   if (result != V_UNKNOWN)
+   {
+       elog(NOTICE, "Query contains only stopword(s) or doesn't contain lexem(s), ignored");
+       *len = 0;
+       return NULL;
+   }
+
+   return plaintree(resroot, len);
+}


diff --git a/contrib/tsearch2/rewrite.h b/contrib/tsearch2/rewrite.h

new file mode 100644 (file)

index 0000000..d47788a


--- /dev/null
+++ b/contrib/tsearch2/rewrite.h
@@ -0,0 +1,7 @@
+#ifndef __REWRITE_H__
+#define __REWRITE_H__
+
+ITEM      *clean_NOT_v2(ITEM * ptr, int4 *len);
+ITEM      *clean_fakeval_v2(ITEM * ptr, int4 *len);
+
+#endif


diff --git a/contrib/tsearch2/snmap.c b/contrib/tsearch2/snmap.c

new file mode 100644 (file)

index 0000000..fe138ad


--- /dev/null
+++ b/contrib/tsearch2/snmap.c
@@ -0,0 +1,75 @@
+/* 
+ * simple but fast map from str to Oid
+ * Teodor Sigaev 
+ */
+#include 
+#include 
+#include 
+
+#include "postgres.h"
+#include "snmap.h"
+#include "common.h"
+
+static int
+compareSNMapEntry(const void *a, const void *b) {
+   return strcmp( ((SNMapEntry*)a)->key, ((SNMapEntry*)b)->key );
+}
+
+void 
+addSNMap( SNMap *map, char *key, Oid value ) {
+   if (map->len>=map->reallen) {
+       SNMapEntry *tmp;
+       int len = (map->reallen) ? 2*map->reallen : 16;
+       tmp=(SNMapEntry*)realloc(map->list, sizeof(SNMapEntry) * len);
+       if ( !tmp )
+           elog(ERROR, "No memory");
+       map->reallen=len;
+       map->list=tmp;
+   }
+   map->list[ map->len ].key = strdup(key);
+   if ( ! map->list[ map->len ].key )
+       elog(ERROR, "No memory");
+   map->list[ map->len ].value=value;
+   map->len++;
+   if ( map->len>1 ) qsort(map->list, map->len, sizeof(SNMapEntry), compareSNMapEntry);
+}
+
+void 
+addSNMap_t( SNMap *map, text *key, Oid value ) {
+   char *k=text2char( key );
+   addSNMap(map, k, value);
+   pfree(k);
+}
+
+Oid 
+findSNMap( SNMap *map, char *key ) {
+   SNMapEntry *ptr;
+   SNMapEntry ks = {key, 0};
+   if ( map->len==0 || !map->list )
+       return 0;   
+   ptr = (SNMapEntry*) bsearch(&ks, map->list, map->len, sizeof(SNMapEntry), compareSNMapEntry);
+   return (ptr) ? ptr->value : 0;
+}
+
+Oid  
+findSNMap_t( SNMap *map, text *key ) {
+   char *k=text2char(key);
+   int res;
+   res= findSNMap(map, k);
+   pfree(k);
+   return res;
+}
+
+void freeSNMap( SNMap *map ) {
+   SNMapEntry *entry=map->list;
+   if ( map->list ) {
+       while( map->len ) {
+           if ( entry->key ) free(entry->key);
+           entry++; map->len--;
+       }
+       free( map->list );
+   }
+   memset(map,0,sizeof(SNMap));
+}
+
+


diff --git a/contrib/tsearch2/snmap.h b/contrib/tsearch2/snmap.h

new file mode 100644 (file)

index 0000000..b485601


--- /dev/null
+++ b/contrib/tsearch2/snmap.h
@@ -0,0 +1,23 @@
+#ifndef __SNMAP_H__
+#define __SNMAP_H__
+
+#include "postgres.h"
+
+typedef struct {
+   char    *key;
+   Oid value;
+} SNMapEntry;
+
+typedef struct {
+   int len;
+   int reallen;
+   SNMapEntry  *list;
+} SNMap;
+
+void addSNMap( SNMap *map, char *key, Oid value );
+void addSNMap_t( SNMap *map, text *key, Oid value );
+Oid findSNMap( SNMap *map, char *key );
+Oid findSNMap_t( SNMap *map, text *key );
+void freeSNMap( SNMap *map );
+
+#endif


diff --git a/contrib/tsearch2/snowball/api.c b/contrib/tsearch2/snowball/api.c

new file mode 100644 (file)

index 0000000..c9019ce


--- /dev/null
+++ b/contrib/tsearch2/snowball/api.c
@@ -0,0 +1,48 @@
+
+#include "header.h"
+
+extern struct SN_env * SN_create_env(int S_size, int I_size, int B_size)
+{   struct SN_env * z = (struct SN_env *) calloc(1, sizeof(struct SN_env));
+    z->p = create_s();
+    if (S_size)
+    {   z->S = (symbol * *) calloc(S_size, sizeof(symbol *));
+        {   int i;
+            for (i = 0; i < S_size; i++) z->S[i] = create_s();
+        }
+        z->S_size = S_size;
+    }
+
+    if (I_size)
+    {   z->I = (int *) calloc(I_size, sizeof(int));
+        z->I_size = I_size;
+    }
+
+    if (B_size)
+    {   z->B = (symbol *) calloc(B_size, sizeof(symbol));
+        z->B_size = B_size;
+    }
+
+    return z;
+}
+
+extern void SN_close_env(struct SN_env * z)
+{
+    if (z->S_size)
+    {
+        {   int i;
+            for (i = 0; i < z->S_size; i++) lose_s(z->S[i]);
+        }
+        free(z->S);
+    }
+    if (z->I_size) free(z->I);
+    if (z->B_size) free(z->B);
+    if (z->p) lose_s(z->p);
+    free(z);
+}
+
+extern void SN_set_current(struct SN_env * z, int size, const symbol * s)
+{
+    replace_s(z, 0, z->l, size, s);
+    z->c = 0;
+}
+


diff --git a/contrib/tsearch2/snowball/api.h b/contrib/tsearch2/snowball/api.h

new file mode 100644 (file)

index 0000000..3e8b6e1


--- /dev/null
+++ b/contrib/tsearch2/snowball/api.h
@@ -0,0 +1,27 @@
+
+typedef unsigned char symbol;
+
+/* Or replace 'char' above with 'short' for 16 bit characters.
+
+   More precisely, replace 'char' with whatever type guarantees the
+   character width you need. Note however that sizeof(symbol) should divide
+   HEAD, defined in header.h as 2*sizeof(int), without remainder, otherwise
+   there is an alignment problem. In the unlikely event of a problem here,
+   consult Martin Porter.
+
+*/
+
+struct SN_env {
+    symbol * p;
+    int c; int a; int l; int lb; int bra; int ket;
+    int S_size; int I_size; int B_size;
+    symbol * * S;
+    int * I;
+    symbol * B;
+};
+
+extern struct SN_env * SN_create_env(int S_size, int I_size, int B_size);
+extern void SN_close_env(struct SN_env * z);
+
+extern void SN_set_current(struct SN_env * z, int size, const symbol * s);
+


diff --git a/contrib/tsearch2/snowball/english_stem.c b/contrib/tsearch2/snowball/english_stem.c

new file mode 100644 (file)

index 0000000..6715c7c


--- /dev/null
+++ b/contrib/tsearch2/snowball/english_stem.c
@@ -0,0 +1,894 @@
+
+/* This file was generated automatically by the Snowball to ANSI C compiler */
+
+#include "header.h"
+
+extern int english_stem(struct SN_env * z);
+static int r_exception2(struct SN_env * z);
+static int r_exception1(struct SN_env * z);
+static int r_Step_5(struct SN_env * z);
+static int r_Step_4(struct SN_env * z);
+static int r_Step_3(struct SN_env * z);
+static int r_Step_2(struct SN_env * z);
+static int r_Step_1c(struct SN_env * z);
+static int r_Step_1b(struct SN_env * z);
+static int r_Step_1a(struct SN_env * z);
+static int r_R2(struct SN_env * z);
+static int r_R1(struct SN_env * z);
+static int r_shortv(struct SN_env * z);
+static int r_mark_regions(struct SN_env * z);
+static int r_postlude(struct SN_env * z);
+static int r_prelude(struct SN_env * z);
+
+extern struct SN_env * english_create_env(void);
+extern void english_close_env(struct SN_env * z);
+
+static symbol s_0_0[5] = { 'g', 'e', 'n', 'e', 'r' };
+
+static struct among a_0[1] =
+{
+/*  0 */ { 5, s_0_0, -1, -1, 0}
+};
+
+static symbol s_1_0[3] = { 'i', 'e', 'd' };
+static symbol s_1_1[1] = { 's' };
+static symbol s_1_2[3] = { 'i', 'e', 's' };
+static symbol s_1_3[4] = { 's', 's', 'e', 's' };
+static symbol s_1_4[2] = { 's', 's' };
+static symbol s_1_5[2] = { 'u', 's' };
+
+static struct among a_1[6] =
+{
+/*  0 */ { 3, s_1_0, -1, 2, 0},
+/*  1 */ { 1, s_1_1, -1, 3, 0},
+/*  2 */ { 3, s_1_2, 1, 2, 0},
+/*  3 */ { 4, s_1_3, 1, 1, 0},
+/*  4 */ { 2, s_1_4, 1, -1, 0},
+/*  5 */ { 2, s_1_5, 1, -1, 0}
+};
+
+static symbol s_2_1[2] = { 'b', 'b' };
+static symbol s_2_2[2] = { 'd', 'd' };
+static symbol s_2_3[2] = { 'f', 'f' };
+static symbol s_2_4[2] = { 'g', 'g' };
+static symbol s_2_5[2] = { 'b', 'l' };
+static symbol s_2_6[2] = { 'm', 'm' };
+static symbol s_2_7[2] = { 'n', 'n' };
+static symbol s_2_8[2] = { 'p', 'p' };
+static symbol s_2_9[2] = { 'r', 'r' };
+static symbol s_2_10[2] = { 'a', 't' };
+static symbol s_2_11[2] = { 't', 't' };
+static symbol s_2_12[2] = { 'i', 'z' };
+
+static struct among a_2[13] =
+{
+/*  0 */ { 0, 0, -1, 3, 0},
+/*  1 */ { 2, s_2_1, 0, 2, 0},
+/*  2 */ { 2, s_2_2, 0, 2, 0},
+/*  3 */ { 2, s_2_3, 0, 2, 0},
+/*  4 */ { 2, s_2_4, 0, 2, 0},
+/*  5 */ { 2, s_2_5, 0, 1, 0},
+/*  6 */ { 2, s_2_6, 0, 2, 0},
+/*  7 */ { 2, s_2_7, 0, 2, 0},
+/*  8 */ { 2, s_2_8, 0, 2, 0},
+/*  9 */ { 2, s_2_9, 0, 2, 0},
+/* 10 */ { 2, s_2_10, 0, 1, 0},
+/* 11 */ { 2, s_2_11, 0, 2, 0},
+/* 12 */ { 2, s_2_12, 0, 1, 0}
+};
+
+static symbol s_3_0[2] = { 'e', 'd' };
+static symbol s_3_1[3] = { 'e', 'e', 'd' };
+static symbol s_3_2[3] = { 'i', 'n', 'g' };
+static symbol s_3_3[4] = { 'e', 'd', 'l', 'y' };
+static symbol s_3_4[5] = { 'e', 'e', 'd', 'l', 'y' };
+static symbol s_3_5[5] = { 'i', 'n', 'g', 'l', 'y' };
+
+static struct among a_3[6] =
+{
+/*  0 */ { 2, s_3_0, -1, 2, 0},
+/*  1 */ { 3, s_3_1, 0, 1, 0},
+/*  2 */ { 3, s_3_2, -1, 2, 0},
+/*  3 */ { 4, s_3_3, -1, 2, 0},
+/*  4 */ { 5, s_3_4, 3, 1, 0},
+/*  5 */ { 5, s_3_5, -1, 2, 0}
+};
+
+static symbol s_4_0[4] = { 'a', 'n', 'c', 'i' };
+static symbol s_4_1[4] = { 'e', 'n', 'c', 'i' };
+static symbol s_4_2[3] = { 'o', 'g', 'i' };
+static symbol s_4_3[2] = { 'l', 'i' };
+static symbol s_4_4[3] = { 'b', 'l', 'i' };
+static symbol s_4_5[4] = { 'a', 'b', 'l', 'i' };
+static symbol s_4_6[4] = { 'a', 'l', 'l', 'i' };
+static symbol s_4_7[5] = { 'f', 'u', 'l', 'l', 'i' };
+static symbol s_4_8[6] = { 'l', 'e', 's', 's', 'l', 'i' };
+static symbol s_4_9[5] = { 'o', 'u', 's', 'l', 'i' };
+static symbol s_4_10[5] = { 'e', 'n', 't', 'l', 'i' };
+static symbol s_4_11[5] = { 'a', 'l', 'i', 't', 'i' };
+static symbol s_4_12[6] = { 'b', 'i', 'l', 'i', 't', 'i' };
+static symbol s_4_13[5] = { 'i', 'v', 'i', 't', 'i' };
+static symbol s_4_14[6] = { 't', 'i', 'o', 'n', 'a', 'l' };
+static symbol s_4_15[7] = { 'a', 't', 'i', 'o', 'n', 'a', 'l' };
+static symbol s_4_16[5] = { 'a', 'l', 'i', 's', 'm' };
+static symbol s_4_17[5] = { 'a', 't', 'i', 'o', 'n' };
+static symbol s_4_18[7] = { 'i', 'z', 'a', 't', 'i', 'o', 'n' };
+static symbol s_4_19[4] = { 'i', 'z', 'e', 'r' };
+static symbol s_4_20[4] = { 'a', 't', 'o', 'r' };
+static symbol s_4_21[7] = { 'i', 'v', 'e', 'n', 'e', 's', 's' };
+static symbol s_4_22[7] = { 'f', 'u', 'l', 'n', 'e', 's', 's' };
+static symbol s_4_23[7] = { 'o', 'u', 's', 'n', 'e', 's', 's' };
+
+static struct among a_4[24] =
+{
+/*  0 */ { 4, s_4_0, -1, 3, 0},
+/*  1 */ { 4, s_4_1, -1, 2, 0},
+/*  2 */ { 3, s_4_2, -1, 13, 0},
+/*  3 */ { 2, s_4_3, -1, 16, 0},
+/*  4 */ { 3, s_4_4, 3, 12, 0},
+/*  5 */ { 4, s_4_5, 4, 4, 0},
+/*  6 */ { 4, s_4_6, 3, 8, 0},
+/*  7 */ { 5, s_4_7, 3, 14, 0},
+/*  8 */ { 6, s_4_8, 3, 15, 0},
+/*  9 */ { 5, s_4_9, 3, 10, 0},
+/* 10 */ { 5, s_4_10, 3, 5, 0},
+/* 11 */ { 5, s_4_11, -1, 8, 0},
+/* 12 */ { 6, s_4_12, -1, 12, 0},
+/* 13 */ { 5, s_4_13, -1, 11, 0},
+/* 14 */ { 6, s_4_14, -1, 1, 0},
+/* 15 */ { 7, s_4_15, 14, 7, 0},
+/* 16 */ { 5, s_4_16, -1, 8, 0},
+/* 17 */ { 5, s_4_17, -1, 7, 0},
+/* 18 */ { 7, s_4_18, 17, 6, 0},
+/* 19 */ { 4, s_4_19, -1, 6, 0},
+/* 20 */ { 4, s_4_20, -1, 7, 0},
+/* 21 */ { 7, s_4_21, -1, 11, 0},
+/* 22 */ { 7, s_4_22, -1, 9, 0},
+/* 23 */ { 7, s_4_23, -1, 10, 0}
+};
+
+static symbol s_5_0[5] = { 'i', 'c', 'a', 't', 'e' };
+static symbol s_5_1[5] = { 'a', 't', 'i', 'v', 'e' };
+static symbol s_5_2[5] = { 'a', 'l', 'i', 'z', 'e' };
+static symbol s_5_3[5] = { 'i', 'c', 'i', 't', 'i' };
+static symbol s_5_4[4] = { 'i', 'c', 'a', 'l' };
+static symbol s_5_5[6] = { 't', 'i', 'o', 'n', 'a', 'l' };
+static symbol s_5_6[7] = { 'a', 't', 'i', 'o', 'n', 'a', 'l' };
+static symbol s_5_7[3] = { 'f', 'u', 'l' };
+static symbol s_5_8[4] = { 'n', 'e', 's', 's' };
+
+static struct among a_5[9] =
+{
+/*  0 */ { 5, s_5_0, -1, 4, 0},
+/*  1 */ { 5, s_5_1, -1, 6, 0},
+/*  2 */ { 5, s_5_2, -1, 3, 0},
+/*  3 */ { 5, s_5_3, -1, 4, 0},
+/*  4 */ { 4, s_5_4, -1, 4, 0},
+/*  5 */ { 6, s_5_5, -1, 1, 0},
+/*  6 */ { 7, s_5_6, 5, 2, 0},
+/*  7 */ { 3, s_5_7, -1, 5, 0},
+/*  8 */ { 4, s_5_8, -1, 5, 0}
+};
+
+static symbol s_6_0[2] = { 'i', 'c' };
+static symbol s_6_1[4] = { 'a', 'n', 'c', 'e' };
+static symbol s_6_2[4] = { 'e', 'n', 'c', 'e' };
+static symbol s_6_3[4] = { 'a', 'b', 'l', 'e' };
+static symbol s_6_4[4] = { 'i', 'b', 'l', 'e' };
+static symbol s_6_5[3] = { 'a', 't', 'e' };
+static symbol s_6_6[3] = { 'i', 'v', 'e' };
+static symbol s_6_7[3] = { 'i', 'z', 'e' };
+static symbol s_6_8[3] = { 'i', 't', 'i' };
+static symbol s_6_9[2] = { 'a', 'l' };
+static symbol s_6_10[3] = { 'i', 's', 'm' };
+static symbol s_6_11[3] = { 'i', 'o', 'n' };
+static symbol s_6_12[2] = { 'e', 'r' };
+static symbol s_6_13[3] = { 'o', 'u', 's' };
+static symbol s_6_14[3] = { 'a', 'n', 't' };
+static symbol s_6_15[3] = { 'e', 'n', 't' };
+static symbol s_6_16[4] = { 'm', 'e', 'n', 't' };
+static symbol s_6_17[5] = { 'e', 'm', 'e', 'n', 't' };
+
+static struct among a_6[18] =
+{
+/*  0 */ { 2, s_6_0, -1, 1, 0},
+/*  1 */ { 4, s_6_1, -1, 1, 0},
+/*  2 */ { 4, s_6_2, -1, 1, 0},
+/*  3 */ { 4, s_6_3, -1, 1, 0},
+/*  4 */ { 4, s_6_4, -1, 1, 0},
+/*  5 */ { 3, s_6_5, -1, 1, 0},
+/*  6 */ { 3, s_6_6, -1, 1, 0},
+/*  7 */ { 3, s_6_7, -1, 1, 0},
+/*  8 */ { 3, s_6_8, -1, 1, 0},
+/*  9 */ { 2, s_6_9, -1, 1, 0},
+/* 10 */ { 3, s_6_10, -1, 1, 0},
+/* 11 */ { 3, s_6_11, -1, 2, 0},
+/* 12 */ { 2, s_6_12, -1, 1, 0},
+/* 13 */ { 3, s_6_13, -1, 1, 0},
+/* 14 */ { 3, s_6_14, -1, 1, 0},
+/* 15 */ { 3, s_6_15, -1, 1, 0},
+/* 16 */ { 4, s_6_16, 15, 1, 0},
+/* 17 */ { 5, s_6_17, 16, 1, 0}
+};
+
+static symbol s_7_0[1] = { 'e' };
+static symbol s_7_1[1] = { 'l' };
+
+static struct among a_7[2] =
+{
+/*  0 */ { 1, s_7_0, -1, 1, 0},
+/*  1 */ { 1, s_7_1, -1, 2, 0}
+};
+
+static symbol s_8_0[7] = { 's', 'u', 'c', 'c', 'e', 'e', 'd' };
+static symbol s_8_1[7] = { 'p', 'r', 'o', 'c', 'e', 'e', 'd' };
+static symbol s_8_2[6] = { 'e', 'x', 'c', 'e', 'e', 'd' };
+static symbol s_8_3[7] = { 'c', 'a', 'n', 'n', 'i', 'n', 'g' };
+static symbol s_8_4[6] = { 'i', 'n', 'n', 'i', 'n', 'g' };
+static symbol s_8_5[7] = { 'e', 'a', 'r', 'r', 'i', 'n', 'g' };
+static symbol s_8_6[7] = { 'h', 'e', 'r', 'r', 'i', 'n', 'g' };
+static symbol s_8_7[6] = { 'o', 'u', 't', 'i', 'n', 'g' };
+
+static struct among a_8[8] =
+{
+/*  0 */ { 7, s_8_0, -1, -1, 0},
+/*  1 */ { 7, s_8_1, -1, -1, 0},
+/*  2 */ { 6, s_8_2, -1, -1, 0},
+/*  3 */ { 7, s_8_3, -1, -1, 0},
+/*  4 */ { 6, s_8_4, -1, -1, 0},
+/*  5 */ { 7, s_8_5, -1, -1, 0},
+/*  6 */ { 7, s_8_6, -1, -1, 0},
+/*  7 */ { 6, s_8_7, -1, -1, 0}
+};
+
+static symbol s_9_0[5] = { 'a', 'n', 'd', 'e', 's' };
+static symbol s_9_1[5] = { 'a', 't', 'l', 'a', 's' };
+static symbol s_9_2[4] = { 'b', 'i', 'a', 's' };
+static symbol s_9_3[6] = { 'c', 'o', 's', 'm', 'o', 's' };
+static symbol s_9_4[5] = { 'd', 'y', 'i', 'n', 'g' };
+static symbol s_9_5[5] = { 'e', 'a', 'r', 'l', 'y' };
+static symbol s_9_6[6] = { 'g', 'e', 'n', 't', 'l', 'y' };
+static symbol s_9_7[4] = { 'h', 'o', 'w', 'e' };
+static symbol s_9_8[4] = { 'i', 'd', 'l', 'y' };
+static symbol s_9_9[5] = { 'l', 'y', 'i', 'n', 'g' };
+static symbol s_9_10[4] = { 'n', 'e', 'w', 's' };
+static symbol s_9_11[4] = { 'o', 'n', 'l', 'y' };
+static symbol s_9_12[6] = { 's', 'i', 'n', 'g', 'l', 'y' };
+static symbol s_9_13[5] = { 's', 'k', 'i', 'e', 's' };
+static symbol s_9_14[4] = { 's', 'k', 'i', 's' };
+static symbol s_9_15[3] = { 's', 'k', 'y' };
+static symbol s_9_16[5] = { 't', 'y', 'i', 'n', 'g' };
+static symbol s_9_17[4] = { 'u', 'g', 'l', 'y' };
+
+static struct among a_9[18] =
+{
+/*  0 */ { 5, s_9_0, -1, -1, 0},
+/*  1 */ { 5, s_9_1, -1, -1, 0},
+/*  2 */ { 4, s_9_2, -1, -1, 0},
+/*  3 */ { 6, s_9_3, -1, -1, 0},
+/*  4 */ { 5, s_9_4, -1, 3, 0},
+/*  5 */ { 5, s_9_5, -1, 9, 0},
+/*  6 */ { 6, s_9_6, -1, 7, 0},
+/*  7 */ { 4, s_9_7, -1, -1, 0},
+/*  8 */ { 4, s_9_8, -1, 6, 0},
+/*  9 */ { 5, s_9_9, -1, 4, 0},
+/* 10 */ { 4, s_9_10, -1, -1, 0},
+/* 11 */ { 4, s_9_11, -1, 10, 0},
+/* 12 */ { 6, s_9_12, -1, 11, 0},
+/* 13 */ { 5, s_9_13, -1, 2, 0},
+/* 14 */ { 4, s_9_14, -1, 1, 0},
+/* 15 */ { 3, s_9_15, -1, -1, 0},
+/* 16 */ { 5, s_9_16, -1, 5, 0},
+/* 17 */ { 4, s_9_17, -1, 8, 0}
+};
+
+static unsigned char g_v[] = { 17, 65, 16, 1 };
+
+static unsigned char g_v_WXY[] = { 1, 17, 65, 208, 1 };
+
+static unsigned char g_valid_LI[] = { 55, 141, 2 };
+
+static symbol s_0[] = { 'y' };
+static symbol s_1[] = { 'Y' };
+static symbol s_2[] = { 'y' };
+static symbol s_3[] = { 'Y' };
+static symbol s_4[] = { 's', 's' };
+static symbol s_5[] = { 'i', 'e' };
+static symbol s_6[] = { 'i' };
+static symbol s_7[] = { 'e', 'e' };
+static symbol s_8[] = { 'e' };
+static symbol s_9[] = { 'e' };
+static symbol s_10[] = { 'y' };
+static symbol s_11[] = { 'Y' };
+static symbol s_12[] = { 'i' };
+static symbol s_13[] = { 't', 'i', 'o', 'n' };
+static symbol s_14[] = { 'e', 'n', 'c', 'e' };
+static symbol s_15[] = { 'a', 'n', 'c', 'e' };
+static symbol s_16[] = { 'a', 'b', 'l', 'e' };
+static symbol s_17[] = { 'e', 'n', 't' };
+static symbol s_18[] = { 'i', 'z', 'e' };
+static symbol s_19[] = { 'a', 't', 'e' };
+static symbol s_20[] = { 'a', 'l' };
+static symbol s_21[] = { 'f', 'u', 'l' };
+static symbol s_22[] = { 'o', 'u', 's' };
+static symbol s_23[] = { 'i', 'v', 'e' };
+static symbol s_24[] = { 'b', 'l', 'e' };
+static symbol s_25[] = { 'l' };
+static symbol s_26[] = { 'o', 'g' };
+static symbol s_27[] = { 'f', 'u', 'l' };
+static symbol s_28[] = { 'l', 'e', 's', 's' };
+static symbol s_29[] = { 't', 'i', 'o', 'n' };
+static symbol s_30[] = { 'a', 't', 'e' };
+static symbol s_31[] = { 'a', 'l' };
+static symbol s_32[] = { 'i', 'c' };
+static symbol s_33[] = { 's' };
+static symbol s_34[] = { 't' };
+static symbol s_35[] = { 'l' };
+static symbol s_36[] = { 's', 'k', 'i' };
+static symbol s_37[] = { 's', 'k', 'y' };
+static symbol s_38[] = { 'd', 'i', 'e' };
+static symbol s_39[] = { 'l', 'i', 'e' };
+static symbol s_40[] = { 't', 'i', 'e' };
+static symbol s_41[] = { 'i', 'd', 'l' };
+static symbol s_42[] = { 'g', 'e', 'n', 't', 'l' };
+static symbol s_43[] = { 'u', 'g', 'l', 'i' };
+static symbol s_44[] = { 'e', 'a', 'r', 'l', 'i' };
+static symbol s_45[] = { 'o', 'n', 'l', 'i' };
+static symbol s_46[] = { 's', 'i', 'n', 'g', 'l' };
+static symbol s_47[] = { 'Y' };
+static symbol s_48[] = { 'y' };
+
+static int r_prelude(struct SN_env * z) {
+    z->B[0] = 0; /* unset Y_found, line 24 */
+    {   int c = z->c; /* do, line 25 */
+        z->bra = z->c; /* [, line 25 */
+        if (!(eq_s(z, 1, s_0))) goto lab0;
+        z->ket = z->c; /* ], line 25 */
+        if (!(in_grouping(z, g_v, 97, 121))) goto lab0;
+        slice_from_s(z, 1, s_1); /* <-, line 25 */
+        z->B[0] = 1; /* set Y_found, line 25 */
+    lab0:
+        z->c = c;
+    }
+    {   int c = z->c; /* do, line 26 */
+        while(1) { /* repeat, line 26 */
+            int c = z->c;
+            while(1) { /* goto, line 26 */
+                int c = z->c;
+                if (!(in_grouping(z, g_v, 97, 121))) goto lab3;
+                z->bra = z->c; /* [, line 26 */
+                if (!(eq_s(z, 1, s_2))) goto lab3;
+                z->ket = z->c; /* ], line 26 */
+                z->c = c;
+                break;
+            lab3:
+                z->c = c;
+                if (z->c >= z->l) goto lab2;
+                z->c++;
+            }
+            slice_from_s(z, 1, s_3); /* <-, line 26 */
+            z->B[0] = 1; /* set Y_found, line 26 */
+            continue;
+        lab2:
+            z->c = c;
+            break;
+        }
+    lab1:
+        z->c = c;
+    }
+    return 1;
+}
+
+static int r_mark_regions(struct SN_env * z) {
+    z->I[0] = z->l;
+    z->I[1] = z->l;
+    {   int c = z->c; /* do, line 32 */
+        {   int c = z->c; /* or, line 36 */
+            if (!(find_among(z, a_0, 1))) goto lab2; /* among, line 33 */
+            goto lab1;
+        lab2:
+            z->c = c;
+            while(1) { /* gopast, line 36 */
+                if (!(in_grouping(z, g_v, 97, 121))) goto lab3;
+                break;
+            lab3:
+                if (z->c >= z->l) goto lab0;
+                z->c++;
+            }
+            while(1) { /* gopast, line 36 */
+                if (!(out_grouping(z, g_v, 97, 121))) goto lab4;
+                break;
+            lab4:
+                if (z->c >= z->l) goto lab0;
+                z->c++;
+            }
+        }
+    lab1:
+        z->I[0] = z->c; /* setmark p1, line 37 */
+        while(1) { /* gopast, line 38 */
+            if (!(in_grouping(z, g_v, 97, 121))) goto lab5;
+            break;
+        lab5:
+            if (z->c >= z->l) goto lab0;
+            z->c++;
+        }
+        while(1) { /* gopast, line 38 */
+            if (!(out_grouping(z, g_v, 97, 121))) goto lab6;
+            break;
+        lab6:
+            if (z->c >= z->l) goto lab0;
+            z->c++;
+        }
+        z->I[1] = z->c; /* setmark p2, line 38 */
+    lab0:
+        z->c = c;
+    }
+    return 1;
+}
+
+static int r_shortv(struct SN_env * z) {
+    {   int m = z->l - z->c; /* or, line 46 */
+        if (!(out_grouping_b(z, g_v_WXY, 89, 121))) goto lab1;
+        if (!(in_grouping_b(z, g_v, 97, 121))) goto lab1;
+        if (!(out_grouping_b(z, g_v, 97, 121))) goto lab1;
+        goto lab0;
+    lab1:
+        z->c = z->l - m;
+        if (!(out_grouping_b(z, g_v, 97, 121))) return 0;
+        if (!(in_grouping_b(z, g_v, 97, 121))) return 0;
+        if (z->c > z->lb) return 0; /* atlimit, line 47 */
+    }
+lab0:
+    return 1;
+}
+
+static int r_R1(struct SN_env * z) {
+    if (!(z->I[0] <= z->c)) return 0;
+    return 1;
+}
+
+static int r_R2(struct SN_env * z) {
+    if (!(z->I[1] <= z->c)) return 0;
+    return 1;
+}
+
+static int r_Step_1a(struct SN_env * z) {
+    int among_var;
+    z->ket = z->c; /* [, line 54 */
+    among_var = find_among_b(z, a_1, 6); /* substring, line 54 */
+    if (!(among_var)) return 0;
+    z->bra = z->c; /* ], line 54 */
+    switch(among_var) {
+        case 0: return 0;
+        case 1:
+            slice_from_s(z, 2, s_4); /* <-, line 55 */
+            break;
+        case 2:
+            {   int m = z->l - z->c; /* or, line 57 */
+                if (z->c <= z->lb) goto lab1;
+                z->c--; /* next, line 57 */
+                if (z->c > z->lb) goto lab1; /* atlimit, line 57 */
+                slice_from_s(z, 2, s_5); /* <-, line 57 */
+                goto lab0;
+            lab1:
+                z->c = z->l - m;
+                slice_from_s(z, 1, s_6); /* <-, line 57 */
+            }
+        lab0:
+            break;
+        case 3:
+            if (z->c <= z->lb) return 0;
+            z->c--; /* next, line 58 */
+            while(1) { /* gopast, line 58 */
+                if (!(in_grouping_b(z, g_v, 97, 121))) goto lab2;
+                break;
+            lab2:
+                if (z->c <= z->lb) return 0;
+                z->c--;
+            }
+            slice_del(z); /* delete, line 58 */
+            break;
+    }
+    return 1;
+}
+
+static int r_Step_1b(struct SN_env * z) {
+    int among_var;
+    z->ket = z->c; /* [, line 64 */
+    among_var = find_among_b(z, a_3, 6); /* substring, line 64 */
+    if (!(among_var)) return 0;
+    z->bra = z->c; /* ], line 64 */
+    switch(among_var) {
+        case 0: return 0;
+        case 1:
+            if (!r_R1(z)) return 0; /* call R1, line 66 */
+            slice_from_s(z, 2, s_7); /* <-, line 66 */
+            break;
+        case 2:
+            {   int m_test = z->l - z->c; /* test, line 69 */
+                while(1) { /* gopast, line 69 */
+                    if (!(in_grouping_b(z, g_v, 97, 121))) goto lab0;
+                    break;
+                lab0:
+                    if (z->c <= z->lb) return 0;
+                    z->c--;
+                }
+                z->c = z->l - m_test;
+            }
+            slice_del(z); /* delete, line 69 */
+            {   int m_test = z->l - z->c; /* test, line 70 */
+                among_var = find_among_b(z, a_2, 13); /* substring, line 70 */
+                if (!(among_var)) return 0;
+                z->c = z->l - m_test;
+            }
+            switch(among_var) {
+                case 0: return 0;
+                case 1:
+                    {   int c = z->c;
+                        insert_s(z, z->c, z->c, 1, s_8); /* <+, line 72 */
+                        z->c = c;
+                    }
+                    break;
+                case 2:
+                    z->ket = z->c; /* [, line 75 */
+                    if (z->c <= z->lb) return 0;
+                    z->c--; /* next, line 75 */
+                    z->bra = z->c; /* ], line 75 */
+                    slice_del(z); /* delete, line 75 */
+                    break;
+                case 3:
+                    if (z->c != z->I[0]) return 0; /* atmark, line 76 */
+                    {   int m_test = z->l - z->c; /* test, line 76 */
+                        if (!r_shortv(z)) return 0; /* call shortv, line 76 */
+                        z->c = z->l - m_test;
+                    }
+                    {   int c = z->c;
+                        insert_s(z, z->c, z->c, 1, s_9); /* <+, line 76 */
+                        z->c = c;
+                    }
+                    break;
+            }
+            break;
+    }
+    return 1;
+}
+
+static int r_Step_1c(struct SN_env * z) {
+    z->ket = z->c; /* [, line 83 */
+    {   int m = z->l - z->c; /* or, line 83 */
+        if (!(eq_s_b(z, 1, s_10))) goto lab1;
+        goto lab0;
+    lab1:
+        z->c = z->l - m;
+        if (!(eq_s_b(z, 1, s_11))) return 0;
+    }
+lab0:
+    z->bra = z->c; /* ], line 83 */
+    if (!(out_grouping_b(z, g_v, 97, 121))) return 0;
+    {   int m = z->l - z->c; /* not, line 84 */
+        if (z->c > z->lb) goto lab2; /* atlimit, line 84 */
+        return 0;
+    lab2:
+        z->c = z->l - m;
+    }
+    slice_from_s(z, 1, s_12); /* <-, line 85 */
+    return 1;
+}
+
+static int r_Step_2(struct SN_env * z) {
+    int among_var;
+    z->ket = z->c; /* [, line 89 */
+    among_var = find_among_b(z, a_4, 24); /* substring, line 89 */
+    if (!(among_var)) return 0;
+    z->bra = z->c; /* ], line 89 */
+    if (!r_R1(z)) return 0; /* call R1, line 89 */
+    switch(among_var) {
+        case 0: return 0;
+        case 1:
+            slice_from_s(z, 4, s_13); /* <-, line 90 */
+            break;
+        case 2:
+            slice_from_s(z, 4, s_14); /* <-, line 91 */
+            break;
+        case 3:
+            slice_from_s(z, 4, s_15); /* <-, line 92 */
+            break;
+        case 4:
+            slice_from_s(z, 4, s_16); /* <-, line 93 */
+            break;
+        case 5:
+            slice_from_s(z, 3, s_17); /* <-, line 94 */
+            break;
+        case 6:
+            slice_from_s(z, 3, s_18); /* <-, line 96 */
+            break;
+        case 7:
+            slice_from_s(z, 3, s_19); /* <-, line 98 */
+            break;
+        case 8:
+            slice_from_s(z, 2, s_20); /* <-, line 100 */
+            break;
+        case 9:
+            slice_from_s(z, 3, s_21); /* <-, line 101 */
+            break;
+        case 10:
+            slice_from_s(z, 3, s_22); /* <-, line 103 */
+            break;
+        case 11:
+            slice_from_s(z, 3, s_23); /* <-, line 105 */
+            break;
+        case 12:
+            slice_from_s(z, 3, s_24); /* <-, line 107 */
+            break;
+        case 13:
+            if (!(eq_s_b(z, 1, s_25))) return 0;
+            slice_from_s(z, 2, s_26); /* <-, line 108 */
+            break;
+        case 14:
+            slice_from_s(z, 3, s_27); /* <-, line 109 */
+            break;
+        case 15:
+            slice_from_s(z, 4, s_28); /* <-, line 110 */
+            break;
+        case 16:
+            if (!(in_grouping_b(z, g_valid_LI, 99, 116))) return 0;
+            slice_del(z); /* delete, line 111 */
+            break;
+    }
+    return 1;
+}
+
+static int r_Step_3(struct SN_env * z) {
+    int among_var;
+    z->ket = z->c; /* [, line 116 */
+    among_var = find_among_b(z, a_5, 9); /* substring, line 116 */
+    if (!(among_var)) return 0;
+    z->bra = z->c; /* ], line 116 */
+    if (!r_R1(z)) return 0; /* call R1, line 116 */
+    switch(among_var) {
+        case 0: return 0;
+        case 1:
+            slice_from_s(z, 4, s_29); /* <-, line 117 */
+            break;
+        case 2:
+            slice_from_s(z, 3, s_30); /* <-, line 118 */
+            break;
+        case 3:
+            slice_from_s(z, 2, s_31); /* <-, line 119 */
+            break;
+        case 4:
+            slice_from_s(z, 2, s_32); /* <-, line 121 */
+            break;
+        case 5:
+            slice_del(z); /* delete, line 123 */
+            break;
+        case 6:
+            if (!r_R2(z)) return 0; /* call R2, line 125 */
+            slice_del(z); /* delete, line 125 */
+            break;
+    }
+    return 1;
+}
+
+static int r_Step_4(struct SN_env * z) {
+    int among_var;
+    z->ket = z->c; /* [, line 130 */
+    among_var = find_among_b(z, a_6, 18); /* substring, line 130 */
+    if (!(among_var)) return 0;
+    z->bra = z->c; /* ], line 130 */
+    if (!r_R2(z)) return 0; /* call R2, line 130 */
+    switch(among_var) {
+        case 0: return 0;
+        case 1:
+            slice_del(z); /* delete, line 133 */
+            break;
+        case 2:
+            {   int m = z->l - z->c; /* or, line 134 */
+                if (!(eq_s_b(z, 1, s_33))) goto lab1;
+                goto lab0;
+            lab1:
+                z->c = z->l - m;
+                if (!(eq_s_b(z, 1, s_34))) return 0;
+            }
+        lab0:
+            slice_del(z); /* delete, line 134 */
+            break;
+    }
+    return 1;
+}
+
+static int r_Step_5(struct SN_env * z) {
+    int among_var;
+    z->ket = z->c; /* [, line 139 */
+    among_var = find_among_b(z, a_7, 2); /* substring, line 139 */
+    if (!(among_var)) return 0;
+    z->bra = z->c; /* ], line 139 */
+    switch(among_var) {
+        case 0: return 0;
+        case 1:
+            {   int m = z->l - z->c; /* or, line 140 */
+                if (!r_R2(z)) goto lab1; /* call R2, line 140 */
+                goto lab0;
+            lab1:
+                z->c = z->l - m;
+                if (!r_R1(z)) return 0; /* call R1, line 140 */
+                {   int m = z->l - z->c; /* not, line 140 */
+                    if (!r_shortv(z)) goto lab2; /* call shortv, line 140 */
+                    return 0;
+                lab2:
+                    z->c = z->l - m;
+                }
+            }
+        lab0:
+            slice_del(z); /* delete, line 140 */
+            break;
+        case 2:
+            if (!r_R2(z)) return 0; /* call R2, line 141 */
+            if (!(eq_s_b(z, 1, s_35))) return 0;
+            slice_del(z); /* delete, line 141 */
+            break;
+    }
+    return 1;
+}
+
+static int r_exception2(struct SN_env * z) {
+    z->ket = z->c; /* [, line 147 */
+    if (!(find_among_b(z, a_8, 8))) return 0; /* substring, line 147 */
+    z->bra = z->c; /* ], line 147 */
+    if (z->c > z->lb) return 0; /* atlimit, line 147 */
+    return 1;
+}
+
+static int r_exception1(struct SN_env * z) {
+    int among_var;
+    z->bra = z->c; /* [, line 159 */
+    among_var = find_among(z, a_9, 18); /* substring, line 159 */
+    if (!(among_var)) return 0;
+    z->ket = z->c; /* ], line 159 */
+    if (z->c < z->l) return 0; /* atlimit, line 159 */
+    switch(among_var) {
+        case 0: return 0;
+        case 1:
+            slice_from_s(z, 3, s_36); /* <-, line 163 */
+            break;
+        case 2:
+            slice_from_s(z, 3, s_37); /* <-, line 164 */
+            break;
+        case 3:
+            slice_from_s(z, 3, s_38); /* <-, line 165 */
+            break;
+        case 4:
+            slice_from_s(z, 3, s_39); /* <-, line 166 */
+            break;
+        case 5:
+            slice_from_s(z, 3, s_40); /* <-, line 167 */
+            break;
+        case 6:
+            slice_from_s(z, 3, s_41); /* <-, line 171 */
+            break;
+        case 7:
+            slice_from_s(z, 5, s_42); /* <-, line 172 */
+            break;
+        case 8:
+            slice_from_s(z, 4, s_43); /* <-, line 173 */
+            break;
+        case 9:
+            slice_from_s(z, 5, s_44); /* <-, line 174 */
+            break;
+        case 10:
+            slice_from_s(z, 4, s_45); /* <-, line 175 */
+            break;
+        case 11:
+            slice_from_s(z, 5, s_46); /* <-, line 176 */
+            break;
+    }
+    return 1;
+}
+
+static int r_postlude(struct SN_env * z) {
+    if (!(z->B[0])) return 0; /* Boolean test Y_found, line 192 */
+    while(1) { /* repeat, line 192 */
+        int c = z->c;
+        while(1) { /* goto, line 192 */
+            int c = z->c;
+            z->bra = z->c; /* [, line 192 */
+            if (!(eq_s(z, 1, s_47))) goto lab1;
+            z->ket = z->c; /* ], line 192 */
+            z->c = c;
+            break;
+        lab1:
+            z->c = c;
+            if (z->c >= z->l) goto lab0;
+            z->c++;
+        }
+        slice_from_s(z, 1, s_48); /* <-, line 192 */
+        continue;
+    lab0:
+        z->c = c;
+        break;
+    }
+    return 1;
+}
+
+extern int english_stem(struct SN_env * z) {
+    {   int c = z->c; /* or, line 196 */
+        if (!r_exception1(z)) goto lab1; /* call exception1, line 196 */
+        goto lab0;
+    lab1:
+        z->c = c;
+        {   int c_test = z->c; /* test, line 198 */
+            {   int c = z->c + 3;
+                if (0 > c || c > z->l) return 0;
+                z->c = c; /* hop, line 198 */
+            }
+            z->c = c_test;
+        }
+        {   int c = z->c; /* do, line 199 */
+            if (!r_prelude(z)) goto lab2; /* call prelude, line 199 */
+        lab2:
+            z->c = c;
+        }
+        {   int c = z->c; /* do, line 200 */
+            if (!r_mark_regions(z)) goto lab3; /* call mark_regions, line 200 */
+        lab3:
+            z->c = c;
+        }
+        z->lb = z->c; z->c = z->l; /* backwards, line 201 */
+
+        {   int m = z->l - z->c; /* do, line 203 */
+            if (!r_Step_1a(z)) goto lab4; /* call Step_1a, line 203 */
+        lab4:
+            z->c = z->l - m;
+        }
+        {   int m = z->l - z->c; /* or, line 205 */
+            if (!r_exception2(z)) goto lab6; /* call exception2, line 205 */
+            goto lab5;
+        lab6:
+            z->c = z->l - m;
+            {   int m = z->l - z->c; /* do, line 207 */
+                if (!r_Step_1b(z)) goto lab7; /* call Step_1b, line 207 */
+            lab7:
+                z->c = z->l - m;
+            }
+            {   int m = z->l - z->c; /* do, line 208 */
+                if (!r_Step_1c(z)) goto lab8; /* call Step_1c, line 208 */
+            lab8:
+                z->c = z->l - m;
+            }
+            {   int m = z->l - z->c; /* do, line 210 */
+                if (!r_Step_2(z)) goto lab9; /* call Step_2, line 210 */
+            lab9:
+                z->c = z->l - m;
+            }
+            {   int m = z->l - z->c; /* do, line 211 */
+                if (!r_Step_3(z)) goto lab10; /* call Step_3, line 211 */
+            lab10:
+                z->c = z->l - m;
+            }
+            {   int m = z->l - z->c; /* do, line 212 */
+                if (!r_Step_4(z)) goto lab11; /* call Step_4, line 212 */
+            lab11:
+                z->c = z->l - m;
+            }
+            {   int m = z->l - z->c; /* do, line 214 */
+                if (!r_Step_5(z)) goto lab12; /* call Step_5, line 214 */
+            lab12:
+                z->c = z->l - m;
+            }
+        }
+    lab5:
+        z->c = z->lb;
+        {   int c = z->c; /* do, line 217 */
+            if (!r_postlude(z)) goto lab13; /* call postlude, line 217 */
+        lab13:
+            z->c = c;
+        }
+    }
+lab0:
+    return 1;
+}
+
+extern struct SN_env * english_create_env(void) { return SN_create_env(0, 2, 1); }
+
+extern void english_close_env(struct SN_env * z) { SN_close_env(z); }
+


diff --git a/contrib/tsearch2/snowball/english_stem.h b/contrib/tsearch2/snowball/english_stem.h

new file mode 100644 (file)

index 0000000..bfefcd5


--- /dev/null
+++ b/contrib/tsearch2/snowball/english_stem.h
@@ -0,0 +1,8 @@
+
+/* This file was generated automatically by the Snowball to ANSI C compiler */
+
+extern struct SN_env * english_create_env(void);
+extern void english_close_env(struct SN_env * z);
+
+extern int english_stem(struct SN_env * z);
+


diff --git a/contrib/tsearch2/snowball/header.h b/contrib/tsearch2/snowball/header.h

new file mode 100644 (file)

index 0000000..aaec3ae


--- /dev/null
+++ b/contrib/tsearch2/snowball/header.h
@@ -0,0 +1,57 @@
+
+#include 
+
+#include "api.h"
+
+#define MAXINT INT_MAX
+#define MININT INT_MIN
+
+#define HEAD 2*sizeof(int)
+
+#define SIZE(p)        ((int *)(p))[-1]
+#define SET_SIZE(p, n) ((int *)(p))[-1] = n
+#define CAPACITY(p)    ((int *)(p))[-2]
+
+struct among
+{   int s_size;     /* number of chars in string */
+    symbol * s;       /* search string */
+    int substring_i;/* index to longest matching substring */
+    int result;     /* result of the lookup */
+    int (* function)(struct SN_env *);
+};
+
+extern symbol * create_s(void);
+extern void lose_s(symbol * p);
+
+extern int in_grouping(struct SN_env * z, unsigned char * s, int min, int max);
+extern int in_grouping_b(struct SN_env * z, unsigned char * s, int min, int max);
+extern int out_grouping(struct SN_env * z, unsigned char * s, int min, int max);
+extern int out_grouping_b(struct SN_env * z, unsigned char * s, int min, int max);
+
+extern int in_range(struct SN_env * z, int min, int max);
+extern int in_range_b(struct SN_env * z, int min, int max);
+extern int out_range(struct SN_env * z, int min, int max);
+extern int out_range_b(struct SN_env * z, int min, int max);
+
+extern int eq_s(struct SN_env * z, int s_size, symbol * s);
+extern int eq_s_b(struct SN_env * z, int s_size, symbol * s);
+extern int eq_v(struct SN_env * z, symbol * p);
+extern int eq_v_b(struct SN_env * z, symbol * p);
+
+extern int find_among(struct SN_env * z, struct among * v, int v_size);
+extern int find_among_b(struct SN_env * z, struct among * v, int v_size);
+
+extern symbol * increase_size(symbol * p, int n);
+extern int replace_s(struct SN_env * z, int c_bra, int c_ket, int s_size, const symbol * s);
+extern void slice_from_s(struct SN_env * z, int s_size, symbol * s);
+extern void slice_from_v(struct SN_env * z, symbol * p);
+extern void slice_del(struct SN_env * z);
+
+extern void insert_s(struct SN_env * z, int bra, int ket, int s_size, symbol * s);
+extern void insert_v(struct SN_env * z, int bra, int ket, symbol * p);
+
+extern symbol * slice_to(struct SN_env * z, symbol * p);
+extern symbol * assign_to(struct SN_env * z, symbol * p);
+
+extern void debug(struct SN_env * z, int number, int line_count);
+


diff --git a/contrib/tsearch2/snowball/russian_stem.c b/contrib/tsearch2/snowball/russian_stem.c

new file mode 100644 (file)

index 0000000..14fd491


--- /dev/null
+++ b/contrib/tsearch2/snowball/russian_stem.c
@@ -0,0 +1,626 @@
+
+/* This file was generated automatically by the Snowball to ANSI C compiler */
+
+#include "header.h"
+
+extern int russian_stem(struct SN_env * z);
+static int r_tidy_up(struct SN_env * z);
+static int r_derivational(struct SN_env * z);
+static int r_noun(struct SN_env * z);
+static int r_verb(struct SN_env * z);
+static int r_reflexive(struct SN_env * z);
+static int r_adjectival(struct SN_env * z);
+static int r_adjective(struct SN_env * z);
+static int r_perfective_gerund(struct SN_env * z);
+static int r_R2(struct SN_env * z);
+static int r_mark_regions(struct SN_env * z);
+
+extern struct SN_env * russian_create_env(void);
+extern void russian_close_env(struct SN_env * z);
+
+static symbol s_0_0[3] = { 215, 219, 201 };
+static symbol s_0_1[4] = { 201, 215, 219, 201 };
+static symbol s_0_2[4] = { 217, 215, 219, 201 };
+static symbol s_0_3[1] = { 215 };
+static symbol s_0_4[2] = { 201, 215 };
+static symbol s_0_5[2] = { 217, 215 };
+static symbol s_0_6[5] = { 215, 219, 201, 211, 216 };
+static symbol s_0_7[6] = { 201, 215, 219, 201, 211, 216 };
+static symbol s_0_8[6] = { 217, 215, 219, 201, 211, 216 };
+
+static struct among a_0[9] =
+{
+/*  0 */ { 3, s_0_0, -1, 1, 0},
+/*  1 */ { 4, s_0_1, 0, 2, 0},
+/*  2 */ { 4, s_0_2, 0, 2, 0},
+/*  3 */ { 1, s_0_3, -1, 1, 0},
+/*  4 */ { 2, s_0_4, 3, 2, 0},
+/*  5 */ { 2, s_0_5, 3, 2, 0},
+/*  6 */ { 5, s_0_6, -1, 1, 0},
+/*  7 */ { 6, s_0_7, 6, 2, 0},
+/*  8 */ { 6, s_0_8, 6, 2, 0}
+};
+
+static symbol s_1_0[2] = { 192, 192 };
+static symbol s_1_1[2] = { 197, 192 };
+static symbol s_1_2[2] = { 207, 192 };
+static symbol s_1_3[2] = { 213, 192 };
+static symbol s_1_4[2] = { 197, 197 };
+static symbol s_1_5[2] = { 201, 197 };
+static symbol s_1_6[2] = { 207, 197 };
+static symbol s_1_7[2] = { 217, 197 };
+static symbol s_1_8[2] = { 201, 200 };
+static symbol s_1_9[2] = { 217, 200 };
+static symbol s_1_10[3] = { 201, 205, 201 };
+static symbol s_1_11[3] = { 217, 205, 201 };
+static symbol s_1_12[2] = { 197, 202 };
+static symbol s_1_13[2] = { 201, 202 };
+static symbol s_1_14[2] = { 207, 202 };
+static symbol s_1_15[2] = { 217, 202 };
+static symbol s_1_16[2] = { 197, 205 };
+static symbol s_1_17[2] = { 201, 205 };
+static symbol s_1_18[2] = { 207, 205 };
+static symbol s_1_19[2] = { 217, 205 };
+static symbol s_1_20[3] = { 197, 199, 207 };
+static symbol s_1_21[3] = { 207, 199, 207 };
+static symbol s_1_22[2] = { 193, 209 };
+static symbol s_1_23[2] = { 209, 209 };
+static symbol s_1_24[3] = { 197, 205, 213 };
+static symbol s_1_25[3] = { 207, 205, 213 };
+
+static struct among a_1[26] =
+{
+/*  0 */ { 2, s_1_0, -1, 1, 0},
+/*  1 */ { 2, s_1_1, -1, 1, 0},
+/*  2 */ { 2, s_1_2, -1, 1, 0},
+/*  3 */ { 2, s_1_3, -1, 1, 0},
+/*  4 */ { 2, s_1_4, -1, 1, 0},
+/*  5 */ { 2, s_1_5, -1, 1, 0},
+/*  6 */ { 2, s_1_6, -1, 1, 0},
+/*  7 */ { 2, s_1_7, -1, 1, 0},
+/*  8 */ { 2, s_1_8, -1, 1, 0},
+/*  9 */ { 2, s_1_9, -1, 1, 0},
+/* 10 */ { 3, s_1_10, -1, 1, 0},
+/* 11 */ { 3, s_1_11, -1, 1, 0},
+/* 12 */ { 2, s_1_12, -1, 1, 0},
+/* 13 */ { 2, s_1_13, -1, 1, 0},
+/* 14 */ { 2, s_1_14, -1, 1, 0},
+/* 15 */ { 2, s_1_15, -1, 1, 0},
+/* 16 */ { 2, s_1_16, -1, 1, 0},
+/* 17 */ { 2, s_1_17, -1, 1, 0},
+/* 18 */ { 2, s_1_18, -1, 1, 0},
+/* 19 */ { 2, s_1_19, -1, 1, 0},
+/* 20 */ { 3, s_1_20, -1, 1, 0},
+/* 21 */ { 3, s_1_21, -1, 1, 0},
+/* 22 */ { 2, s_1_22, -1, 1, 0},
+/* 23 */ { 2, s_1_23, -1, 1, 0},
+/* 24 */ { 3, s_1_24, -1, 1, 0},
+/* 25 */ { 3, s_1_25, -1, 1, 0}
+};
+
+static symbol s_2_0[2] = { 197, 205 };
+static symbol s_2_1[2] = { 206, 206 };
+static symbol s_2_2[2] = { 215, 219 };
+static symbol s_2_3[3] = { 201, 215, 219 };
+static symbol s_2_4[3] = { 217, 215, 219 };
+static symbol s_2_5[1] = { 221 };
+static symbol s_2_6[2] = { 192, 221 };
+static symbol s_2_7[3] = { 213, 192, 221 };
+
+static struct among a_2[8] =
+{
+/*  0 */ { 2, s_2_0, -1, 1, 0},
+/*  1 */ { 2, s_2_1, -1, 1, 0},
+/*  2 */ { 2, s_2_2, -1, 1, 0},
+/*  3 */ { 3, s_2_3, 2, 2, 0},
+/*  4 */ { 3, s_2_4, 2, 2, 0},
+/*  5 */ { 1, s_2_5, -1, 1, 0},
+/*  6 */ { 2, s_2_6, 5, 1, 0},
+/*  7 */ { 3, s_2_7, 6, 2, 0}
+};
+
+static symbol s_3_0[2] = { 211, 209 };
+static symbol s_3_1[2] = { 211, 216 };
+
+static struct among a_3[2] =
+{
+/*  0 */ { 2, s_3_0, -1, 1, 0},
+/*  1 */ { 2, s_3_1, -1, 1, 0}
+};
+
+static symbol s_4_0[1] = { 192 };
+static symbol s_4_1[2] = { 213, 192 };
+static symbol s_4_2[2] = { 204, 193 };
+static symbol s_4_3[3] = { 201, 204, 193 };
+static symbol s_4_4[3] = { 217, 204, 193 };
+static symbol s_4_5[2] = { 206, 193 };
+static symbol s_4_6[3] = { 197, 206, 193 };
+static symbol s_4_7[3] = { 197, 212, 197 };
+static symbol s_4_8[3] = { 201, 212, 197 };
+static symbol s_4_9[3] = { 202, 212, 197 };
+static symbol s_4_10[4] = { 197, 202, 212, 197 };
+static symbol s_4_11[4] = { 213, 202, 212, 197 };
+static symbol s_4_12[2] = { 204, 201 };
+static symbol s_4_13[3] = { 201, 204, 201 };
+static symbol s_4_14[3] = { 217, 204, 201 };
+static symbol s_4_15[1] = { 202 };
+static symbol s_4_16[2] = { 197, 202 };
+static symbol s_4_17[2] = { 213, 202 };
+static symbol s_4_18[1] = { 204 };
+static symbol s_4_19[2] = { 201, 204 };
+static symbol s_4_20[2] = { 217, 204 };
+static symbol s_4_21[2] = { 197, 205 };
+static symbol s_4_22[2] = { 201, 205 };
+static symbol s_4_23[2] = { 217, 205 };
+static symbol s_4_24[1] = { 206 };
+static symbol s_4_25[2] = { 197, 206 };
+static symbol s_4_26[2] = { 204, 207 };
+static symbol s_4_27[3] = { 201, 204, 207 };
+static symbol s_4_28[3] = { 217, 204, 207 };
+static symbol s_4_29[2] = { 206, 207 };
+static symbol s_4_30[3] = { 197, 206, 207 };
+static symbol s_4_31[3] = { 206, 206, 207 };
+static symbol s_4_32[2] = { 192, 212 };
+static symbol s_4_33[3] = { 213, 192, 212 };
+static symbol s_4_34[2] = { 197, 212 };
+static symbol s_4_35[3] = { 213, 197, 212 };
+static symbol s_4_36[2] = { 201, 212 };
+static symbol s_4_37[2] = { 209, 212 };
+static symbol s_4_38[2] = { 217, 212 };
+static symbol s_4_39[2] = { 212, 216 };
+static symbol s_4_40[3] = { 201, 212, 216 };
+static symbol s_4_41[3] = { 217, 212, 216 };
+static symbol s_4_42[3] = { 197, 219, 216 };
+static symbol s_4_43[3] = { 201, 219, 216 };
+static symbol s_4_44[2] = { 206, 217 };
+static symbol s_4_45[3] = { 197, 206, 217 };
+
+static struct among a_4[46] =
+{
+/*  0 */ { 1, s_4_0, -1, 2, 0},
+/*  1 */ { 2, s_4_1, 0, 2, 0},
+/*  2 */ { 2, s_4_2, -1, 1, 0},
+/*  3 */ { 3, s_4_3, 2, 2, 0},
+/*  4 */ { 3, s_4_4, 2, 2, 0},
+/*  5 */ { 2, s_4_5, -1, 1, 0},
+/*  6 */ { 3, s_4_6, 5, 2, 0},
+/*  7 */ { 3, s_4_7, -1, 1, 0},
+/*  8 */ { 3, s_4_8, -1, 2, 0},
+/*  9 */ { 3, s_4_9, -1, 1, 0},
+/* 10 */ { 4, s_4_10, 9, 2, 0},
+/* 11 */ { 4, s_4_11, 9, 2, 0},
+/* 12 */ { 2, s_4_12, -1, 1, 0},
+/* 13 */ { 3, s_4_13, 12, 2, 0},
+/* 14 */ { 3, s_4_14, 12, 2, 0},
+/* 15 */ { 1, s_4_15, -1, 1, 0},
+/* 16 */ { 2, s_4_16, 15, 2, 0},
+/* 17 */ { 2, s_4_17, 15, 2, 0},
+/* 18 */ { 1, s_4_18, -1, 1, 0},
+/* 19 */ { 2, s_4_19, 18, 2, 0},
+/* 20 */ { 2, s_4_20, 18, 2, 0},
+/* 21 */ { 2, s_4_21, -1, 1, 0},
+/* 22 */ { 2, s_4_22, -1, 2, 0},
+/* 23 */ { 2, s_4_23, -1, 2, 0},
+/* 24 */ { 1, s_4_24, -1, 1, 0},
+/* 25 */ { 2, s_4_25, 24, 2, 0},
+/* 26 */ { 2, s_4_26, -1, 1, 0},
+/* 27 */ { 3, s_4_27, 26, 2, 0},
+/* 28 */ { 3, s_4_28, 26, 2, 0},
+/* 29 */ { 2, s_4_29, -1, 1, 0},
+/* 30 */ { 3, s_4_30, 29, 2, 0},
+/* 31 */ { 3, s_4_31, 29, 1, 0},
+/* 32 */ { 2, s_4_32, -1, 1, 0},
+/* 33 */ { 3, s_4_33, 32, 2, 0},
+/* 34 */ { 2, s_4_34, -1, 1, 0},
+/* 35 */ { 3, s_4_35, 34, 2, 0},
+/* 36 */ { 2, s_4_36, -1, 2, 0},
+/* 37 */ { 2, s_4_37, -1, 2, 0},
+/* 38 */ { 2, s_4_38, -1, 2, 0},
+/* 39 */ { 2, s_4_39, -1, 1, 0},
+/* 40 */ { 3, s_4_40, 39, 2, 0},
+/* 41 */ { 3, s_4_41, 39, 2, 0},
+/* 42 */ { 3, s_4_42, -1, 1, 0},
+/* 43 */ { 3, s_4_43, -1, 2, 0},
+/* 44 */ { 2, s_4_44, -1, 1, 0},
+/* 45 */ { 3, s_4_45, 44, 2, 0}
+};
+
+static symbol s_5_0[1] = { 192 };
+static symbol s_5_1[2] = { 201, 192 };
+static symbol s_5_2[2] = { 216, 192 };
+static symbol s_5_3[1] = { 193 };
+static symbol s_5_4[1] = { 197 };
+static symbol s_5_5[2] = { 201, 197 };
+static symbol s_5_6[2] = { 216, 197 };
+static symbol s_5_7[2] = { 193, 200 };
+static symbol s_5_8[2] = { 209, 200 };
+static symbol s_5_9[3] = { 201, 209, 200 };
+static symbol s_5_10[1] = { 201 };
+static symbol s_5_11[2] = { 197, 201 };
+static symbol s_5_12[2] = { 201, 201 };
+static symbol s_5_13[3] = { 193, 205, 201 };
+static symbol s_5_14[3] = { 209, 205, 201 };
+static symbol s_5_15[4] = { 201, 209, 205, 201 };
+static symbol s_5_16[1] = { 202 };
+static symbol s_5_17[2] = { 197, 202 };
+static symbol s_5_18[3] = { 201, 197, 202 };
+static symbol s_5_19[2] = { 201, 202 };
+static symbol s_5_20[2] = { 207, 202 };
+static symbol s_5_21[2] = { 193, 205 };
+static symbol s_5_22[2] = { 197, 205 };
+static symbol s_5_23[3] = { 201, 197, 205 };
+static symbol s_5_24[2] = { 207, 205 };
+static symbol s_5_25[2] = { 209, 205 };
+static symbol s_5_26[3] = { 201, 209, 205 };
+static symbol s_5_27[1] = { 207 };
+static symbol s_5_28[1] = { 209 };
+static symbol s_5_29[2] = { 201, 209 };
+static symbol s_5_30[2] = { 216, 209 };
+static symbol s_5_31[1] = { 213 };
+static symbol s_5_32[2] = { 197, 215 };
+static symbol s_5_33[2] = { 207, 215 };
+static symbol s_5_34[1] = { 216 };
+static symbol s_5_35[1] = { 217 };
+
+static struct among a_5[36] =
+{
+/*  0 */ { 1, s_5_0, -1, 1, 0},
+/*  1 */ { 2, s_5_1, 0, 1, 0},
+/*  2 */ { 2, s_5_2, 0, 1, 0},
+/*  3 */ { 1, s_5_3, -1, 1, 0},
+/*  4 */ { 1, s_5_4, -1, 1, 0},
+/*  5 */ { 2, s_5_5, 4, 1, 0},
+/*  6 */ { 2, s_5_6, 4, 1, 0},
+/*  7 */ { 2, s_5_7, -1, 1, 0},
+/*  8 */ { 2, s_5_8, -1, 1, 0},
+/*  9 */ { 3, s_5_9, 8, 1, 0},
+/* 10 */ { 1, s_5_10, -1, 1, 0},
+/* 11 */ { 2, s_5_11, 10, 1, 0},
+/* 12 */ { 2, s_5_12, 10, 1, 0},
+/* 13 */ { 3, s_5_13, 10, 1, 0},
+/* 14 */ { 3, s_5_14, 10, 1, 0},
+/* 15 */ { 4, s_5_15, 14, 1, 0},
+/* 16 */ { 1, s_5_16, -1, 1, 0},
+/* 17 */ { 2, s_5_17, 16, 1, 0},
+/* 18 */ { 3, s_5_18, 17, 1, 0},
+/* 19 */ { 2, s_5_19, 16, 1, 0},
+/* 20 */ { 2, s_5_20, 16, 1, 0},
+/* 21 */ { 2, s_5_21, -1, 1, 0},
+/* 22 */ { 2, s_5_22, -1, 1, 0},
+/* 23 */ { 3, s_5_23, 22, 1, 0},
+/* 24 */ { 2, s_5_24, -1, 1, 0},
+/* 25 */ { 2, s_5_25, -1, 1, 0},
+/* 26 */ { 3, s_5_26, 25, 1, 0},
+/* 27 */ { 1, s_5_27, -1, 1, 0},
+/* 28 */ { 1, s_5_28, -1, 1, 0},
+/* 29 */ { 2, s_5_29, 28, 1, 0},
+/* 30 */ { 2, s_5_30, 28, 1, 0},
+/* 31 */ { 1, s_5_31, -1, 1, 0},
+/* 32 */ { 2, s_5_32, -1, 1, 0},
+/* 33 */ { 2, s_5_33, -1, 1, 0},
+/* 34 */ { 1, s_5_34, -1, 1, 0},
+/* 35 */ { 1, s_5_35, -1, 1, 0}
+};
+
+static symbol s_6_0[3] = { 207, 211, 212 };
+static symbol s_6_1[4] = { 207, 211, 212, 216 };
+
+static struct among a_6[2] =
+{
+/*  0 */ { 3, s_6_0, -1, 1, 0},
+/*  1 */ { 4, s_6_1, -1, 1, 0}
+};
+
+static symbol s_7_0[4] = { 197, 202, 219, 197 };
+static symbol s_7_1[1] = { 206 };
+static symbol s_7_2[1] = { 216 };
+static symbol s_7_3[3] = { 197, 202, 219 };
+
+static struct among a_7[4] =
+{
+/*  0 */ { 4, s_7_0, -1, 1, 0},
+/*  1 */ { 1, s_7_1, -1, 2, 0},
+/*  2 */ { 1, s_7_2, -1, 3, 0},
+/*  3 */ { 3, s_7_3, -1, 1, 0}
+};
+
+static unsigned char g_v[] = { 35, 130, 34, 18 };
+
+static symbol s_0[] = { 193 };
+static symbol s_1[] = { 209 };
+static symbol s_2[] = { 193 };
+static symbol s_3[] = { 209 };
+static symbol s_4[] = { 193 };
+static symbol s_5[] = { 209 };
+static symbol s_6[] = { 206 };
+static symbol s_7[] = { 206 };
+static symbol s_8[] = { 206 };
+static symbol s_9[] = { 201 };
+
+static int r_mark_regions(struct SN_env * z) {
+    z->I[0] = z->l;
+    z->I[1] = z->l;
+    {   int c = z->c; /* do, line 100 */
+        while(1) { /* gopast, line 101 */
+            if (!(in_grouping(z, g_v, 192, 220))) goto lab1;
+            break;
+        lab1:
+            if (z->c >= z->l) goto lab0;
+            z->c++;
+        }
+        z->I[0] = z->c; /* setmark pV, line 101 */
+        while(1) { /* gopast, line 101 */
+            if (!(out_grouping(z, g_v, 192, 220))) goto lab2;
+            break;
+        lab2:
+            if (z->c >= z->l) goto lab0;
+            z->c++;
+        }
+        while(1) { /* gopast, line 102 */
+            if (!(in_grouping(z, g_v, 192, 220))) goto lab3;
+            break;
+        lab3:
+            if (z->c >= z->l) goto lab0;
+            z->c++;
+        }
+        while(1) { /* gopast, line 102 */
+            if (!(out_grouping(z, g_v, 192, 220))) goto lab4;
+            break;
+        lab4:
+            if (z->c >= z->l) goto lab0;
+            z->c++;
+        }
+        z->I[1] = z->c; /* setmark p2, line 102 */
+    lab0:
+        z->c = c;
+    }
+    return 1;
+}
+
+static int r_R2(struct SN_env * z) {
+    if (!(z->I[1] <= z->c)) return 0;
+    return 1;
+}
+
+static int r_perfective_gerund(struct SN_env * z) {
+    int among_var;
+    z->ket = z->c; /* [, line 111 */
+    among_var = find_among_b(z, a_0, 9); /* substring, line 111 */
+    if (!(among_var)) return 0;
+    z->bra = z->c; /* ], line 111 */
+    switch(among_var) {
+        case 0: return 0;
+        case 1:
+            {   int m = z->l - z->c; /* or, line 115 */
+                if (!(eq_s_b(z, 1, s_0))) goto lab1;
+                goto lab0;
+            lab1:
+                z->c = z->l - m;
+                if (!(eq_s_b(z, 1, s_1))) return 0;
+            }
+        lab0:
+            slice_del(z); /* delete, line 115 */
+            break;
+        case 2:
+            slice_del(z); /* delete, line 122 */
+            break;
+    }
+    return 1;
+}
+
+static int r_adjective(struct SN_env * z) {
+    int among_var;
+    z->ket = z->c; /* [, line 127 */
+    among_var = find_among_b(z, a_1, 26); /* substring, line 127 */
+    if (!(among_var)) return 0;
+    z->bra = z->c; /* ], line 127 */
+    switch(among_var) {
+        case 0: return 0;
+        case 1:
+            slice_del(z); /* delete, line 136 */
+            break;
+    }
+    return 1;
+}
+
+static int r_adjectival(struct SN_env * z) {
+    int among_var;
+    if (!r_adjective(z)) return 0; /* call adjective, line 141 */
+    {   int m = z->l - z->c; /* try, line 148 */
+        z->ket = z->c; /* [, line 149 */
+        among_var = find_among_b(z, a_2, 8); /* substring, line 149 */
+        if (!(among_var)) { z->c = z->l - m; goto lab0; }
+        z->bra = z->c; /* ], line 149 */
+        switch(among_var) {
+            case 0: { z->c = z->l - m; goto lab0; }
+            case 1:
+                {   int m = z->l - z->c; /* or, line 154 */
+                    if (!(eq_s_b(z, 1, s_2))) goto lab2;
+                    goto lab1;
+                lab2:
+                    z->c = z->l - m;
+                    if (!(eq_s_b(z, 1, s_3))) { z->c = z->l - m; goto lab0; }
+                }
+            lab1:
+                slice_del(z); /* delete, line 154 */
+                break;
+            case 2:
+                slice_del(z); /* delete, line 161 */
+                break;
+        }
+    lab0:
+        ;
+    }
+    return 1;
+}
+
+static int r_reflexive(struct SN_env * z) {
+    int among_var;
+    z->ket = z->c; /* [, line 168 */
+    among_var = find_among_b(z, a_3, 2); /* substring, line 168 */
+    if (!(among_var)) return 0;
+    z->bra = z->c; /* ], line 168 */
+    switch(among_var) {
+        case 0: return 0;
+        case 1:
+            slice_del(z); /* delete, line 171 */
+            break;
+    }
+    return 1;
+}
+
+static int r_verb(struct SN_env * z) {
+    int among_var;
+    z->ket = z->c; /* [, line 176 */
+    among_var = find_among_b(z, a_4, 46); /* substring, line 176 */
+    if (!(among_var)) return 0;
+    z->bra = z->c; /* ], line 176 */
+    switch(among_var) {
+        case 0: return 0;
+        case 1:
+            {   int m = z->l - z->c; /* or, line 182 */
+                if (!(eq_s_b(z, 1, s_4))) goto lab1;
+                goto lab0;
+            lab1:
+                z->c = z->l - m;
+                if (!(eq_s_b(z, 1, s_5))) return 0;
+            }
+        lab0:
+            slice_del(z); /* delete, line 182 */
+            break;
+        case 2:
+            slice_del(z); /* delete, line 190 */
+            break;
+    }
+    return 1;
+}
+
+static int r_noun(struct SN_env * z) {
+    int among_var;
+    z->ket = z->c; /* [, line 199 */
+    among_var = find_among_b(z, a_5, 36); /* substring, line 199 */
+    if (!(among_var)) return 0;
+    z->bra = z->c; /* ], line 199 */
+    switch(among_var) {
+        case 0: return 0;
+        case 1:
+            slice_del(z); /* delete, line 206 */
+            break;
+    }
+    return 1;
+}
+
+static int r_derivational(struct SN_env * z) {
+    int among_var;
+    z->ket = z->c; /* [, line 215 */
+    among_var = find_among_b(z, a_6, 2); /* substring, line 215 */
+    if (!(among_var)) return 0;
+    z->bra = z->c; /* ], line 215 */
+    if (!r_R2(z)) return 0; /* call R2, line 215 */
+    switch(among_var) {
+        case 0: return 0;
+        case 1:
+            slice_del(z); /* delete, line 218 */
+            break;
+    }
+    return 1;
+}
+
+static int r_tidy_up(struct SN_env * z) {
+    int among_var;
+    z->ket = z->c; /* [, line 223 */
+    among_var = find_among_b(z, a_7, 4); /* substring, line 223 */
+    if (!(among_var)) return 0;
+    z->bra = z->c; /* ], line 223 */
+    switch(among_var) {
+        case 0: return 0;
+        case 1:
+            slice_del(z); /* delete, line 227 */
+            z->ket = z->c; /* [, line 228 */
+            if (!(eq_s_b(z, 1, s_6))) return 0;
+            z->bra = z->c; /* ], line 228 */
+            if (!(eq_s_b(z, 1, s_7))) return 0;
+            slice_del(z); /* delete, line 228 */
+            break;
+        case 2:
+            if (!(eq_s_b(z, 1, s_8))) return 0;
+            slice_del(z); /* delete, line 231 */
+            break;
+        case 3:
+            slice_del(z); /* delete, line 233 */
+            break;
+    }
+    return 1;
+}
+
+extern int russian_stem(struct SN_env * z) {
+    {   int c = z->c; /* do, line 240 */
+        if (!r_mark_regions(z)) goto lab0; /* call mark_regions, line 240 */
+    lab0:
+        z->c = c;
+    }
+    z->lb = z->c; z->c = z->l; /* backwards, line 241 */
+
+    {   int m = z->l - z->c; /* setlimit, line 241 */
+        int m3;
+        if (z->c < z->I[0]) return 0;
+        z->c = z->I[0]; /* tomark, line 241 */
+        m3 = z->lb; z->lb = z->c;
+        z->c = z->l - m;
+        {   int m = z->l - z->c; /* do, line 242 */
+            {   int m = z->l - z->c; /* or, line 243 */
+                if (!r_perfective_gerund(z)) goto lab3; /* call perfective_gerund, line 243 */
+                goto lab2;
+            lab3:
+                z->c = z->l - m;
+                {   int m = z->l - z->c; /* try, line 244 */
+                    if (!r_reflexive(z)) { z->c = z->l - m; goto lab4; } /* call reflexive, line 244 */
+                lab4:
+                    ;
+                }
+                {   int m = z->l - z->c; /* or, line 245 */
+                    if (!r_adjectival(z)) goto lab6; /* call adjectival, line 245 */
+                    goto lab5;
+                lab6:
+                    z->c = z->l - m;
+                    if (!r_verb(z)) goto lab7; /* call verb, line 245 */
+                    goto lab5;
+                lab7:
+                    z->c = z->l - m;
+                    if (!r_noun(z)) goto lab1; /* call noun, line 245 */
+                }
+            lab5:
+                ;
+            }
+        lab2:
+        lab1:
+            z->c = z->l - m;
+        }
+        {   int m = z->l - z->c; /* try, line 248 */
+            z->ket = z->c; /* [, line 248 */
+            if (!(eq_s_b(z, 1, s_9))) { z->c = z->l - m; goto lab8; }
+            z->bra = z->c; /* ], line 248 */
+            slice_del(z); /* delete, line 248 */
+        lab8:
+            ;
+        }
+        {   int m = z->l - z->c; /* do, line 251 */
+            if (!r_derivational(z)) goto lab9; /* call derivational, line 251 */
+        lab9:
+            z->c = z->l - m;
+        }
+        {   int m = z->l - z->c; /* do, line 252 */
+            if (!r_tidy_up(z)) goto lab10; /* call tidy_up, line 252 */
+        lab10:
+            z->c = z->l - m;
+        }
+        z->lb = m3;
+    }
+    z->c = z->lb;
+    return 1;
+}
+
+extern struct SN_env * russian_create_env(void) { return SN_create_env(0, 2, 0); }
+
+extern void russian_close_env(struct SN_env * z) { SN_close_env(z); }
+


diff --git a/contrib/tsearch2/snowball/russian_stem.h b/contrib/tsearch2/snowball/russian_stem.h

new file mode 100644 (file)

index 0000000..7dc26d4


--- /dev/null
+++ b/contrib/tsearch2/snowball/russian_stem.h
@@ -0,0 +1,8 @@
+
+/* This file was generated automatically by the Snowball to ANSI C compiler */
+
+extern struct SN_env * russian_create_env(void);
+extern void russian_close_env(struct SN_env * z);
+
+extern int russian_stem(struct SN_env * z);
+


diff --git a/contrib/tsearch2/snowball/utilities.c b/contrib/tsearch2/snowball/utilities.c

new file mode 100644 (file)

index 0000000..5dc7524


--- /dev/null
+++ b/contrib/tsearch2/snowball/utilities.c
@@ -0,0 +1,328 @@
+
+#include 
+#include 
+#include 
+
+#include "header.h"
+
+#define unless(C) if(!(C))
+
+#define CREATE_SIZE 1
+
+extern symbol * create_s(void)
+{   symbol * p = (symbol *) (HEAD + (char *) malloc(HEAD + (CREATE_SIZE + 1) * sizeof(symbol)));
+    CAPACITY(p) = CREATE_SIZE;
+    SET_SIZE(p, CREATE_SIZE);
+    return p;
+}
+
+extern void lose_s(symbol * p) { free((char *) p - HEAD); }
+
+extern int in_grouping(struct SN_env * z, unsigned char * s, int min, int max)
+{   if (z->c >= z->l) return 0;
+    {   int ch = z->p[z->c];
+        if
+        (ch > max || (ch -= min) < 0 ||
+         (s[ch >> 3] & (0X1 << (ch & 0X7))) == 0) return 0;
+    }
+    z->c++; return 1;
+}
+
+extern int in_grouping_b(struct SN_env * z, unsigned char * s, int min, int max)
+{   if (z->c <= z->lb) return 0;
+    {   int ch = z->p[z->c - 1];
+        if
+        (ch > max || (ch -= min) < 0 ||
+         (s[ch >> 3] & (0X1 << (ch & 0X7))) == 0) return 0;
+    }
+    z->c--; return 1;
+}
+
+extern int out_grouping(struct SN_env * z, unsigned char * s, int min, int max)
+{   if (z->c >= z->l) return 0;
+    {   int ch = z->p[z->c];
+        unless
+        (ch > max || (ch -= min) < 0 ||
+         (s[ch >> 3] & (0X1 << (ch & 0X7))) == 0) return 0;
+    }
+    z->c++; return 1;
+}
+
+extern int out_grouping_b(struct SN_env * z, unsigned char * s, int min, int max)
+{   if (z->c <= z->lb) return 0;
+    {   int ch = z->p[z->c - 1];
+        unless
+        (ch > max || (ch -= min) < 0 ||
+         (s[ch >> 3] & (0X1 << (ch & 0X7))) == 0) return 0;
+    }
+    z->c--; return 1;
+}
+
+
+extern int in_range(struct SN_env * z, int min, int max)
+{   if (z->c >= z->l) return 0;
+    {   int ch = z->p[z->c];
+        if
+        (ch > max || ch < min) return 0;
+    }
+    z->c++; return 1;
+}
+
+extern int in_range_b(struct SN_env * z, int min, int max)
+{   if (z->c <= z->lb) return 0;
+    {   int ch = z->p[z->c - 1];
+        if
+        (ch > max || ch < min) return 0;
+    }
+    z->c--; return 1;
+}
+
+extern int out_range(struct SN_env * z, int min, int max)
+{   if (z->c >= z->l) return 0;
+    {   int ch = z->p[z->c];
+        unless
+        (ch > max || ch < min) return 0;
+    }
+    z->c++; return 1;
+}
+
+extern int out_range_b(struct SN_env * z, int min, int max)
+{   if (z->c <= z->lb) return 0;
+    {   int ch = z->p[z->c - 1];
+        unless
+        (ch > max || ch < min) return 0;
+    }
+    z->c--; return 1;
+}
+
+extern int eq_s(struct SN_env * z, int s_size, symbol * s)
+{   if (z->l - z->c < s_size ||
+        memcmp(z->p + z->c, s, s_size * sizeof(symbol)) != 0) return 0;
+    z->c += s_size; return 1;
+}
+
+extern int eq_s_b(struct SN_env * z, int s_size, symbol * s)
+{   if (z->c - z->lb < s_size ||
+        memcmp(z->p + z->c - s_size, s, s_size * sizeof(symbol)) != 0) return 0;
+    z->c -= s_size; return 1;
+}
+
+extern int eq_v(struct SN_env * z, symbol * p)
+{   return eq_s(z, SIZE(p), p);
+}
+
+extern int eq_v_b(struct SN_env * z, symbol * p)
+{   return eq_s_b(z, SIZE(p), p);
+}
+
+extern int find_among(struct SN_env * z, struct among * v, int v_size)
+{
+    int i = 0;
+    int j = v_size;
+
+    int c = z->c; int l = z->l;
+    symbol * q = z->p + c;
+
+    struct among * w;
+
+    int common_i = 0;
+    int common_j = 0;
+
+    int first_key_inspected = 0;
+
+    while(1)
+    {   int k = i + ((j - i) >> 1);
+        int diff = 0;
+        int common = common_i < common_j ? common_i : common_j; /* smaller */
+        w = v + k;
+        {   int i; for (i = common; i < w->s_size; i++)
+            {   if (c + common == l) { diff = -1; break; }
+                diff = q[common] - w->s[i];
+                if (diff != 0) break;
+                common++;
+            }
+        }
+        if (diff < 0) { j = k; common_j = common; }
+                 else { i = k; common_i = common; }
+        if (j - i <= 1)
+        {   if (i > 0) break; /* v->s has been inspected */
+            if (j == i) break; /* only one item in v */
+
+            /* - but now we need to go round once more to get
+               v->s inspected. This looks messy, but is actually
+               the optimal approach.  */
+
+            if (first_key_inspected) break;
+            first_key_inspected = 1;
+        }
+    }
+    while(1)
+    {   w = v + i;
+        if (common_i >= w->s_size)
+        {   z->c = c + w->s_size;
+            if (w->function == 0) return w->result;
+            {   int res = w->function(z);
+                z->c = c + w->s_size;
+                if (res) return w->result;
+            }
+        }
+        i = w->substring_i;
+        if (i < 0) return 0;
+    }
+}
+
+/* find_among_b is for backwards processing. Same comments apply */
+
+extern int find_among_b(struct SN_env * z, struct among * v, int v_size)
+{
+    int i = 0;
+    int j = v_size;
+
+    int c = z->c; int lb = z->lb;
+    symbol * q = z->p + c - 1;
+
+    struct among * w;
+
+    int common_i = 0;
+    int common_j = 0;
+
+    int first_key_inspected = 0;
+
+    while(1)
+    {   int k = i + ((j - i) >> 1);
+        int diff = 0;
+        int common = common_i < common_j ? common_i : common_j;
+        w = v + k;
+        {   int i; for (i = w->s_size - 1 - common; i >= 0; i--)
+            {   if (c - common == lb) { diff = -1; break; }
+                diff = q[- common] - w->s[i];
+                if (diff != 0) break;
+                common++;
+            }
+        }
+        if (diff < 0) { j = k; common_j = common; }
+                 else { i = k; common_i = common; }
+        if (j - i <= 1)
+        {   if (i > 0) break;
+            if (j == i) break;
+            if (first_key_inspected) break;
+            first_key_inspected = 1;
+        }
+    }
+    while(1)
+    {   w = v + i;
+        if (common_i >= w->s_size)
+        {   z->c = c - w->s_size;
+            if (w->function == 0) return w->result;
+            {   int res = w->function(z);
+                z->c = c - w->s_size;
+                if (res) return w->result;
+            }
+        }
+        i = w->substring_i;
+        if (i < 0) return 0;
+    }
+}
+
+
+extern symbol * increase_size(symbol * p, int n)
+{   int new_size = n + 20;
+    symbol * q = (symbol *) (HEAD + (char *) malloc(HEAD + (new_size + 1) * sizeof(symbol)));
+    CAPACITY(q) = new_size;
+    memmove(q, p, CAPACITY(p) * sizeof(symbol)); lose_s(p); return q;
+}
+
+/* to replace symbols between c_bra and c_ket in z->p by the
+   s_size symbols at s
+*/
+
+extern int replace_s(struct SN_env * z, int c_bra, int c_ket, int s_size, const symbol * s)
+{   int adjustment = s_size - (c_ket - c_bra);
+    int len = SIZE(z->p);
+    if (adjustment != 0)
+    {   if (adjustment + len > CAPACITY(z->p)) z->p = increase_size(z->p, adjustment + len);
+        memmove(z->p + c_ket + adjustment, z->p + c_ket, (len - c_ket) * sizeof(symbol));
+        SET_SIZE(z->p, adjustment + len);
+        z->l += adjustment;
+        if (z->c >= c_ket) z->c += adjustment; else
+            if (z->c > c_bra) z->c = c_bra;
+    }
+    unless (s_size == 0) memmove(z->p + c_bra, s, s_size * sizeof(symbol));
+    return adjustment;
+}
+
+static void slice_check(struct SN_env * z)
+{
+    if (!(0 <= z->bra &&
+          z->bra <= z->ket &&
+          z->ket <= z->l &&
+          z->l <= SIZE(z->p)))   /* this line could be removed */
+    {
+        fprintf(stderr, "faulty slice operation:\n");
+        debug(z, -1, 0);
+        exit(1);
+    }
+}
+
+extern void slice_from_s(struct SN_env * z, int s_size, symbol * s)
+{   slice_check(z);
+    replace_s(z, z->bra, z->ket, s_size, s);
+}
+
+extern void slice_from_v(struct SN_env * z, symbol * p)
+{   slice_from_s(z, SIZE(p), p);
+}
+
+extern void slice_del(struct SN_env * z)
+{   slice_from_s(z, 0, 0);
+}
+
+extern void insert_s(struct SN_env * z, int bra, int ket, int s_size, symbol * s)
+{   int adjustment = replace_s(z, bra, ket, s_size, s);
+    if (bra <= z->bra) z->bra += adjustment;
+    if (bra <= z->ket) z->ket += adjustment;
+}
+
+extern void insert_v(struct SN_env * z, int bra, int ket, symbol * p)
+{   int adjustment = replace_s(z, bra, ket, SIZE(p), p);
+    if (bra <= z->bra) z->bra += adjustment;
+    if (bra <= z->ket) z->ket += adjustment;
+}
+
+extern symbol * slice_to(struct SN_env * z, symbol * p)
+{   slice_check(z);
+    {   int len = z->ket - z->bra;
+        if (CAPACITY(p) < len) p = increase_size(p, len);
+        memmove(p, z->p + z->bra, len * sizeof(symbol));
+        SET_SIZE(p, len);
+    }
+    return p;
+}
+
+extern symbol * assign_to(struct SN_env * z, symbol * p)
+{   int len = z->l;
+    if (CAPACITY(p) < len) p = increase_size(p, len);
+    memmove(p, z->p, len * sizeof(symbol));
+    SET_SIZE(p, len);
+    return p;
+}
+
+extern void debug(struct SN_env * z, int number, int line_count)
+{   int i;
+    int limit = SIZE(z->p);
+    /*if (number >= 0) printf("%3d (line %4d): '", number, line_count);*/
+    if (number >= 0) printf("%3d (line %4d): [%d]'", number, line_count,limit);
+    for (i = 0; i <= limit; i++)
+    {   if (z->lb == i) printf("{");
+        if (z->bra == i) printf("[");
+        if (z->c == i) printf("|");
+        if (z->ket == i) printf("]");
+        if (z->l == i) printf("}");
+        if (i < limit)
+        {   int ch = z->p[i];
+            if (ch == 0) ch = '#';
+            printf("%c", ch);
+        }
+    }
+    printf("'\n");
+}


diff --git a/contrib/tsearch2/sql/tsearch2.sql b/contrib/tsearch2/sql/tsearch2.sql

new file mode 100644 (file)

index 0000000..6ca6480


--- /dev/null
+++ b/contrib/tsearch2/sql/tsearch2.sql
@@ -0,0 +1,243 @@
+--
+-- first, define the datatype.  Turn off echoing so that expected file
+-- does not depend on contents of seg.sql.
+--
+\set ECHO none
+\i tsearch2.sql
+\set ECHO all
+
+--tsvector
+SELECT '1'::tsvector;
+SELECT '1 '::tsvector;
+SELECT ' 1'::tsvector;
+SELECT ' 1 '::tsvector;
+SELECT '1 2'::tsvector;
+SELECT '\'1 2\''::tsvector;
+SELECT '\'1 \\\'2\''::tsvector;
+SELECT '\'1 \\\'2\'3'::tsvector;
+SELECT '\'1 \\\'2\' 3'::tsvector;
+SELECT '\'1 \\\'2\' \' 3\' 4 '::tsvector;
+select '\'w\':4A,3B,2C,1D,5 a:8';
+select 'a:3A b:2a'::tsvector || 'ba:1234 a:1B';
+select setweight('w:12B w:13* w:12,5,6 a:1,3* a:3 w asd:1dc asd zxc:81,567,222A'::tsvector, 'c');
+select strip('w:12B w:13* w:12,5,6 a:1,3* a:3 w asd:1dc asd'::tsvector);
+
+
+--tsquery
+SELECT '1'::tsquery;
+SELECT '1 '::tsquery;
+SELECT ' 1'::tsquery;
+SELECT ' 1 '::tsquery;
+SELECT '\'1 2\''::tsquery;
+SELECT '\'1 \\\'2\''::tsquery;
+SELECT '!1'::tsquery;
+SELECT '1|2'::tsquery;
+SELECT '1|!2'::tsquery;
+SELECT '!1|2'::tsquery;
+SELECT '!1|!2'::tsquery;
+SELECT '!(!1|!2)'::tsquery;
+SELECT '!(!1|2)'::tsquery;
+SELECT '!(1|!2)'::tsquery;
+SELECT '!(1|2)'::tsquery;
+SELECT '1&2'::tsquery;
+SELECT '!1&2'::tsquery;
+SELECT '1&!2'::tsquery;
+SELECT '!1&!2'::tsquery;
+SELECT '(1&2)'::tsquery;
+SELECT '1&(2)'::tsquery;
+SELECT '!(1)&2'::tsquery;
+SELECT '!(1&2)'::tsquery;
+SELECT '1|2&3'::tsquery;
+SELECT '1|(2&3)'::tsquery;
+SELECT '(1|2)&3'::tsquery;
+SELECT '1|2&!3'::tsquery;
+SELECT '1|!2&3'::tsquery;
+SELECT '!1|2&3'::tsquery;
+SELECT '!1|(2&3)'::tsquery;
+SELECT '!(1|2)&3'::tsquery;
+SELECT '(!1|2)&3'::tsquery;
+SELECT '1|(2|(4|(5|6)))'::tsquery;
+SELECT '1|2|4|5|6'::tsquery;
+SELECT '1&(2&(4&(5&6)))'::tsquery;
+SELECT '1&2&4&5&6'::tsquery;
+SELECT '1&(2&(4&(5|6)))'::tsquery;
+SELECT '1&(2&(4&(5|!6)))'::tsquery;
+SELECT '1&(\'2\'&(\' 4\'&(\\|5 | \'6 \\\' !|&\')))'::tsquery;
+SELECT '\'the wether\':dc & \' sKies \':BC & a:d b:a';
+
+select lexize('simple', 'ASD56 hsdkf');
+select lexize('en_stem', 'SKIES Problems identity');
+
+select * from token_type('default');
+select * from parse('default', '345 [email protected] \' http://www.com/ http://aew.werc.ewr/?ad=qwe&dw 1aew.werc.ewr/?ad=qwe&dw 2aew.werc.ewr http://3aew.werc.ewr/?ad=qwe&dw http://4aew.werc.ewr http://5aew.werc.ewr:8100/?  ad=qwe&dw 6aew.werc.ewr:8100/?ad=qwe&dw 7aew.werc.ewr:8100/?ad=qwe&dw=%20%32 +4.0e-10 qwe qwe qwqwe 234.435 455 5.005 [email protected] qwe-wer asdf qwer jf sdjk ewr1> ewri2 ">
+/usr/local/fff /awdf/dwqe/4325 rewt/ewr wefjn /wqe-324/ewr gist.h gist.h.c gist.c. readline 4.2 4.2. 4.2, readline-4.2 readline-4.2. 234 
+ wow  < jqw <> qwerty');
+
+SELECT to_tsvector('default', '345 [email protected] \' http://www.com/ http://aew.werc.ewr/?ad=qwe&dw 1aew.werc.ewr/?ad=qwe&dw 2aew.werc.ewr http://3aew.werc.ewr/?ad=qwe&dw http://4aew.werc.ewr http://5aew.werc.ewr:8100/?  ad=qwe&dw 6aew.werc.ewr:8100/?ad=qwe&dw 7aew.werc.ewr:8100/?ad=qwe&dw=%20%32 +4.0e-10 qwe qwe qwqwe 234.435 455 5.005 [email protected] qwe-wer asdf qwer jf sdjk ewr1> ewri2 ">
+/usr/local/fff /awdf/dwqe/4325 rewt/ewr wefjn /wqe-324/ewr gist.h gist.h.c gist.c. readline 4.2 4.2. 4.2, readline-4.2 readline-4.2. 234 
+ wow  < jqw <> qwerty');
+
+SELECT length(to_tsvector('default', '345 qw'));
+
+SELECT length(to_tsvector('default', '345 [email protected] \' http://www.com/ http://aew.werc.ewr/?ad=qwe&dw 1aew.werc.ewr/?ad=qwe&dw 2aew.werc.ewr http://3aew.werc.ewr/?ad=qwe&dw http://4aew.werc.ewr http://5aew.werc.ewr:8100/?  ad=qwe&dw 6aew.werc.ewr:8100/?ad=qwe&dw 7aew.werc.ewr:8100/?ad=qwe&dw=%20%32 +4.0e-10 qwe qwe qwqwe 234.435 455 5.005 [email protected] qwe-wer asdf qwer jf sdjk ewr1> ewri2 ">
+/usr/local/fff /awdf/dwqe/4325 rewt/ewr wefjn /wqe-324/ewr gist.h gist.h.c gist.c. readline 4.2 4.2. 4.2, readline-4.2 readline-4.2. 234 
+ wow  < jqw <> qwerty'));
+
+
+select to_tsquery('default', 'qwe & sKies '); 
+select to_tsquery('simple', 'qwe & sKies '); 
+select to_tsquery('default', '\'the wether\':dc & \'           sKies \':BC ');
+select 'a b:89  ca:23A,64b d:34c'::tsvector @@ 'd:AC & ca';
+select 'a b:89  ca:23A,64b d:34c'::tsvector @@ 'd:AC & ca:B';
+select 'a b:89  ca:23A,64b d:34c'::tsvector @@ 'd:AC & ca:A';
+select 'a b:89  ca:23A,64b d:34c'::tsvector @@ 'd:AC & ca:C';
+select 'a b:89  ca:23A,64b d:34c'::tsvector @@ 'd:AC & ca:CB';
+
+CREATE TABLE test_tsvector( t text, a tsvector );
+
+\copy test_tsvector from 'data/test_tsearch.data'
+
+SELECT count(*) FROM test_tsvector WHERE a @@ 'wr|qh';
+SELECT count(*) FROM test_tsvector WHERE a @@ 'wr&qh';
+SELECT count(*) FROM test_tsvector WHERE a @@ 'eq&yt';
+SELECT count(*) FROM test_tsvector WHERE a @@ 'eq|yt';
+SELECT count(*) FROM test_tsvector WHERE a @@ '(eq&yt)|(wr&qh)';
+SELECT count(*) FROM test_tsvector WHERE a @@ '(eq|yt)&(wr|qh)';
+
+create index wowidx on test_tsvector using gist (a);
+set enable_seqscan=off;
+
+SELECT count(*) FROM test_tsvector WHERE a @@ 'wr|qh';
+SELECT count(*) FROM test_tsvector WHERE a @@ 'wr&qh';
+SELECT count(*) FROM test_tsvector WHERE a @@ 'eq&yt';
+SELECT count(*) FROM test_tsvector WHERE a @@ 'eq|yt';
+SELECT count(*) FROM test_tsvector WHERE a @@ '(eq&yt)|(wr&qh)';
+SELECT count(*) FROM test_tsvector WHERE a @@ '(eq|yt)&(wr|qh)';
+
+select set_curcfg('default');
+
+CREATE TRIGGER tsvectorupdate
+BEFORE UPDATE OR INSERT ON test_tsvector
+FOR EACH ROW EXECUTE PROCEDURE tsearch2(a, t);
+
+SELECT count(*) FROM test_tsvector WHERE a @@ to_tsquery('345&qwerty');
+
+INSERT INTO test_tsvector (t) VALUES ('345 qwerty');
+
+SELECT count(*) FROM test_tsvector WHERE a @@ to_tsquery('345&qwerty');
+
+UPDATE test_tsvector SET t = null WHERE t = '345 qwerty';
+
+SELECT count(*) FROM test_tsvector WHERE a @@ to_tsquery('345&qwerty');
+
+drop trigger tsvectorupdate on test_tsvector;
+create function wow(text) returns text as 'select $1 || \' copyright\'; ' language sql;
+create trigger tsvectorupdate before update or insert on test_tsvector
+for each row execute procedure tsearch2(a, wow, t);
+insert into test_tsvector (t) values ('345 qwerty');
+select count(*) FROM test_tsvector WHERE a @@ to_tsquery('345&qwerty');
+select count(*) FROM test_tsvector WHERE a @@ to_tsquery('copyright');
+
+select rank(' a:1 s:2C d g'::tsvector, 'a | s');
+select rank(' a:1 s:2B d g'::tsvector, 'a | s');
+select rank(' a:1 s:2 d g'::tsvector, 'a | s');
+select rank(' a:1 s:2C d g'::tsvector, 'a & s');
+select rank(' a:1 s:2B d g'::tsvector, 'a & s');
+select rank(' a:1 s:2 d g'::tsvector, 'a & s');
+
+insert into test_tsvector (t) values ('foo bar foo the over foo qq bar');
+select * from stat('select a from test_tsvector') order by ndoc desc, nentry desc, word;
+
+select reset_tsearch();
+select to_tsquery('default', 'skies & books');
+
+select rank_cd(to_tsvector('Erosion It took the sea a thousand years,
+A thousand years to trace
+The granite features of this cliff
+In crag and scarp and base.
+It took the sea an hour one night
+An hour of storm to place
+The sculpture of these granite seams,
+Upon a woman s face. E.  J.  Pratt  (1882 1964)
+'), to_tsquery('sea&thousand&years'));
+
+select rank_cd(to_tsvector('Erosion It took the sea a thousand years,
+A thousand years to trace
+The granite features of this cliff
+In crag and scarp and base.
+It took the sea an hour one night
+An hour of storm to place
+The sculpture of these granite seams,
+Upon a woman s face. E.  J.  Pratt  (1882 1964)
+'), to_tsquery('granite&sea'));
+
+select rank_cd(to_tsvector('Erosion It took the sea a thousand years,
+A thousand years to trace
+The granite features of this cliff
+In crag and scarp and base.
+It took the sea an hour one night
+An hour of storm to place
+The sculpture of these granite seams,
+Upon a woman s face. E.  J.  Pratt  (1882 1964)
+'), to_tsquery('sea'));
+
+select get_covers(to_tsvector('Erosion It took the sea a thousand years,
+A thousand years to trace
+The granite features of this cliff
+In crag and scarp and base.
+It took the sea an hour one night
+An hour of storm to place
+The sculpture of these granite seams,
+Upon a woman s face. E.  J.  Pratt  (1882 1964)
+'), to_tsquery('sea&thousand&years'));
+
+select get_covers(to_tsvector('Erosion It took the sea a thousand years,
+A thousand years to trace
+The granite features of this cliff
+In crag and scarp and base.
+It took the sea an hour one night
+An hour of storm to place
+The sculpture of these granite seams,
+Upon a woman s face. E.  J.  Pratt  (1882 1964)
+'), to_tsquery('granite&sea'));
+
+select get_covers(to_tsvector('Erosion It took the sea a thousand years,
+A thousand years to trace
+The granite features of this cliff
+In crag and scarp and base.
+It took the sea an hour one night
+An hour of storm to place
+The sculpture of these granite seams,
+Upon a woman s face. E.  J.  Pratt  (1882 1964)
+'), to_tsquery('sea'));
+
+select headline('Erosion It took the sea a thousand years,
+A thousand years to trace
+The granite features of this cliff
+In crag and scarp and base.
+It took the sea an hour one night
+An hour of storm to place
+The sculpture of these granite seams,
+Upon a woman s face. E.  J.  Pratt  (1882 1964)
+', to_tsquery('sea&thousand&years'));
+ 
+select headline('Erosion It took the sea a thousand years,
+A thousand years to trace
+The granite features of this cliff
+In crag and scarp and base.
+It took the sea an hour one night
+An hour of storm to place
+The sculpture of these granite seams,
+Upon a woman s face. E.  J.  Pratt  (1882 1964)
+', to_tsquery('granite&sea'));
+ 
+select headline('Erosion It took the sea a thousand years,
+A thousand years to trace
+The granite features of this cliff
+In crag and scarp and base.
+It took the sea an hour one night
+An hour of storm to place
+The sculpture of these granite seams,
+Upon a woman s face. E.  J.  Pratt  (1882 1964)
+', to_tsquery('sea'));
+


diff --git a/contrib/tsearch2/stopword.c b/contrib/tsearch2/stopword.c

new file mode 100644 (file)

index 0000000..7f7806f


--- /dev/null
+++ b/contrib/tsearch2/stopword.c
@@ -0,0 +1,101 @@
+/* 
+ * stopword library
+ * Teodor Sigaev 
+ */
+#include 
+#include 
+#include 
+#include 
+
+#include "postgres.h"
+#include "common.h"
+#include "dict.h"
+
+#define STOPBUFLEN 4096
+
+char*
+lowerstr(char *str) {
+   char *ptr=str;
+   while(*ptr) {
+       *ptr = tolower(*(unsigned char*)ptr);
+       ptr++;
+   }
+   return str;
+}
+
+void
+freestoplist(StopList *s) {
+   char **ptr=s->stop;
+   if ( ptr )
+       while( *ptr && s->len >0 ) {
+           free(*ptr);
+           ptr++; s->len--;
+       free(s->stop);
+   }
+   memset(s,0,sizeof(StopList));
+}
+
+void
+readstoplist(text *in, StopList *s) {
+   char **stop=NULL;
+   s->len=0;
+   if ( in && VARSIZE(in) - VARHDRSZ > 0 ) {
+       char *filename=text2char(in);
+       FILE    *hin=NULL;
+       char    buf[STOPBUFLEN];
+       int reallen=0;
+
+       if ( (hin=fopen(filename,"r")) == NULL )
+           elog(ERROR,"Can't open file '%s': %s", filename, strerror(errno));
+       while( fgets(buf,STOPBUFLEN,hin) ) {
+           buf[strlen(buf)-1] = '\0';
+           if ( *buf=='\0' ) continue;
+
+           if ( s->len>= reallen ) {
+               char **tmp;
+               reallen=(reallen) ? reallen*2 : 16;
+               tmp=(char**)realloc((void*)stop, sizeof(char*)*reallen);
+               if (!tmp) {
+                   freestoplist(s);
+                   fclose(hin); 
+                   elog(ERROR,"Not enough memory");
+               }
+               stop=tmp;
+           }
+    
+           stop[s->len]=strdup(buf);
+           if ( !stop[s->len] ) {
+               freestoplist(s);
+               fclose(hin); 
+               elog(ERROR,"Not enough memory");
+           }
+           if ( s->wordop ) 
+               stop[s->len]=(s->wordop)(stop[s->len]);
+
+           (s->len)++; 
+       }
+       fclose(hin);
+       pfree(filename); 
+   }
+   s->stop=stop;
+} 
+
+static int
+comparestr(const void *a, const void *b) {
+   return strcmp( *(char**)a, *(char**)b );
+}
+
+void
+sortstoplist(StopList *s) {
+   if (s->stop && s->len>0)
+       qsort(s->stop, s->len, sizeof(char*), comparestr);
+}
+
+bool
+searchstoplist(StopList *s, char *key) {
+   if ( s->wordop ) 
+       key=(*(s->wordop))(key);
+   return ( s->stop && s->len>0 && bsearch(&key, s->stop, s->len, sizeof(char*), comparestr) ) ? true : false;
+}
+
+


diff --git a/contrib/tsearch2/stopword/english.stop b/contrib/tsearch2/stopword/english.stop

new file mode 100644 (file)

index 0000000..a913011


--- /dev/null
+++ b/contrib/tsearch2/stopword/english.stop
@@ -0,0 +1,128 @@
+i
+me
+my
+myself
+we
+our
+ours
+ourselves
+you
+your
+yours
+yourself
+yourselves
+he
+him
+his
+himself
+she
+her
+hers
+herself
+it
+its
+itself
+they
+them
+their
+theirs
+themselves
+what
+which
+who
+whom
+this
+that
+these
+those
+am
+is
+are
+was
+were
+be
+been
+being
+have
+has
+had
+having
+do
+does
+did
+doing
+a
+an
+the
+and
+but
+if
+or
+because
+as
+until
+while
+of
+at
+by
+for
+with
+about
+against
+between
+into
+through
+during
+before
+after
+above
+below
+to
+from
+up
+down
+in
+out
+on
+off
+over
+under
+again
+further
+then
+once
+here
+there
+when
+where
+why
+how
+all
+any
+both
+each
+few
+more
+most
+other
+some
+such
+no
+nor
+not
+only
+own
+same
+so
+than
+too
+very
+s
+t
+can
+will
+just
+don
+should
+now
+


diff --git a/contrib/tsearch2/stopword/russian.stop b/contrib/tsearch2/stopword/russian.stop

new file mode 100644 (file)

index 0000000..1877e3a


--- /dev/null
+++ b/contrib/tsearch2/stopword/russian.stop
@@ -0,0 +1,151 @@
+É
+×
+×Ï
+ÎÅ
+ÞÔÏ
+ÏÎ
+ÎÁ
+Ñ
+Ó
+ÓÏ
+ËÁË
+Á
+ÔÏ
+×ÓÅ
+ÏÎÁ
+ÔÁË
+ÅÇÏ
+ÎÏ
+ÄÁ
+ÔÙ
+Ë
+Õ
+ÖÅ
+×Ù
+ÚÁ
+ÂÙ
+ÐÏ
+ÔÏÌØËÏ
+ÅÅ
+ÍÎÅ
+ÂÙÌÏ
+×ÏÔ
+ÏÔ
+ÍÅÎÑ
+ÅÝÅ
+ÎÅÔ
+Ï
+ÉÚ
+ÅÍÕ
+ÔÅÐÅÒØ
+ËÏÇÄÁ
+ÄÁÖÅ
+ÎÕ
+×ÄÒÕÇ
+ÌÉ
+ÅÓÌÉ
+ÕÖÅ
+ÉÌÉ
+ÎÉ
+ÂÙÔØ
+ÂÙÌ
+ÎÅÇÏ
+ÄÏ
+×ÁÓ
+ÎÉÂÕÄØ
+ÏÐÑÔØ
+ÕÖ
+×ÁÍ
+×ÅÄØ
+ÔÁÍ
+ÐÏÔÏÍ
+ÓÅÂÑ
+ÎÉÞÅÇÏ
+ÅÊ
+ÍÏÖÅÔ
+ÏÎÉ
+ÔÕÔ
+ÇÄÅ
+ÅÓÔØ
+ÎÁÄÏ
+ÎÅÊ
+ÄÌÑ
+ÍÙ
+ÔÅÂÑ
+ÉÈ
+ÞÅÍ
+ÂÙÌÁ
+ÓÁÍ
+ÞÔÏÂ
+ÂÅÚ
+ÂÕÄÔÏ
+ÞÅÇÏ
+ÒÁÚ
+ÔÏÖÅ
+ÓÅÂÅ
+ÐÏÄ
+ÂÕÄÅÔ
+Ö
+ÔÏÇÄÁ
+ËÔÏ
+ÜÔÏÔ
+ÔÏÇÏ
+ÐÏÔÏÍÕ
+ÜÔÏÇÏ
+ËÁËÏÊ
+ÓÏ×ÓÅÍ
+ÎÉÍ
+ÚÄÅÓØ
+ÜÔÏÍ
+ÏÄÉÎ
+ÐÏÞÔÉ
+ÍÏÊ
+ÔÅÍ
+ÞÔÏÂÙ
+ÎÅÅ
+ÓÅÊÞÁÓ
+ÂÙÌÉ
+ËÕÄÁ
+ÚÁÞÅÍ
+×ÓÅÈ
+ÎÉËÏÇÄÁ
+ÍÏÖÎÏ
+ÐÒÉ
+ÎÁËÏÎÅÃ
+Ä×Á
+ÏÂ
+ÄÒÕÇÏÊ
+ÈÏÔØ
+ÐÏÓÌÅ
+ÎÁÄ
+ÂÏÌØÛÅ
+ÔÏÔ
+ÞÅÒÅÚ
+ÜÔÉ
+ÎÁÓ
+ÐÒÏ
+×ÓÅÇÏ
+ÎÉÈ
+ËÁËÁÑ
+ÍÎÏÇÏ
+ÒÁÚ×Å
+ÔÒÉ
+ÜÔÕ
+ÍÏÑ
+×ÐÒÏÞÅÍ
+ÈÏÒÏÛÏ
+Ó×ÏÀ
+ÜÔÏÊ
+ÐÅÒÅÄ
+ÉÎÏÇÄÁ
+ÌÕÞÛÅ
+ÞÕÔØ
+ÔÏÍ
+ÎÅÌØÚÑ
+ÔÁËÏÊ
+ÉÍ
+ÂÏÌÅÅ
+×ÓÅÇÄÁ
+ËÏÎÅÞÎÏ
+×ÓÀ
+ÍÅÖÄÕ


diff --git a/contrib/tsearch2/ts_cfg.c b/contrib/tsearch2/ts_cfg.c

new file mode 100644 (file)

index 0000000..7c9f20c


--- /dev/null
+++ b/contrib/tsearch2/ts_cfg.c
@@ -0,0 +1,509 @@
+/* 
+ * interface functions to tscfg 
+ * Teodor Sigaev 
+ */
+#include 
+#include 
+#include 
+#include 
+#include 
+
+#include "postgres.h"
+#include "fmgr.h"
+#include "utils/array.h"
+#include "catalog/pg_type.h"
+#include "executor/spi.h"
+
+#include "ts_cfg.h"
+#include "dict.h"
+#include "wparser.h"
+#include "snmap.h"
+#include "common.h"
+#include "tsvector.h"
+
+/*********top interface**********/
+
+static void *plan_getcfg_bylocale=NULL;
+static void *plan_getcfg=NULL;
+static void *plan_getmap=NULL;
+static void *plan_name2id=NULL;
+static Oid current_cfg_id=0;
+
+void
+init_cfg(Oid id, TSCfgInfo *cfg) {
+   Oid arg[2]={ OIDOID, OIDOID };
+   bool isnull;
+   Datum pars[2]={ ObjectIdGetDatum(id), ObjectIdGetDatum(id) } ;
+   int stat,i,j;
+   text *ptr;
+   text *prsname=NULL;
+   MemoryContext   oldcontext;
+
+   memset(cfg,0,sizeof(TSCfgInfo));
+   SPI_connect();
+   if ( !plan_getcfg ) {
+       plan_getcfg = SPI_saveplan( SPI_prepare( "select prs_name from pg_ts_cfg where oid = $1" , 1, arg ) );
+       if ( !plan_getcfg ) 
+           ts_error(ERROR, "SPI_prepare() failed");
+   }
+
+   stat = SPI_execp(plan_getcfg, pars, " ", 1);
+   if ( stat < 0 )
+       ts_error (ERROR, "SPI_execp return %d", stat);
+   if ( SPI_processed > 0 ) {
+       prsname = (text*) DatumGetPointer( 
+           SPI_getbinval(SPI_tuptable->vals[0], SPI_tuptable->tupdesc, 1, &isnull) 
+       );
+       oldcontext = MemoryContextSwitchTo(TopMemoryContext);
+       prsname = ptextdup( prsname );
+       MemoryContextSwitchTo(oldcontext);
+       
+       cfg->id=id;
+   } else 
+       ts_error(ERROR, "No tsearch cfg with id %d", id);
+
+   arg[0]=TEXTOID;
+   if ( !plan_getmap ) {
+       plan_getmap = SPI_saveplan( SPI_prepare( "select lt.tokid, pg_ts_cfgmap.dict_name from pg_ts_cfgmap, pg_ts_cfg, token_type( $1 ) as lt where lt.alias = pg_ts_cfgmap.tok_alias and pg_ts_cfgmap.ts_name = pg_ts_cfg.ts_name and pg_ts_cfg.oid= $2 order by lt.tokid desc;" , 2, arg ) );
+       if ( !plan_getmap )
+           ts_error(ERROR, "SPI_prepare() failed");
+   }
+
+   pars[0]=PointerGetDatum( prsname );
+   stat = SPI_execp(plan_getmap, pars, " ", 0);
+   if ( stat < 0 )
+       ts_error (ERROR, "SPI_execp return %d", stat);
+   if ( SPI_processed <= 0 )
+       ts_error(ERROR, "No parser with id %d", id);
+
+   for(i=0;i
+       int lexid = DatumGetInt32(SPI_getbinval(SPI_tuptable->vals[i], SPI_tuptable->tupdesc, 1, &isnull));
+       ArrayType *toasted_a = (ArrayType*)PointerGetDatum(SPI_getbinval(SPI_tuptable->vals[i], SPI_tuptable->tupdesc, 2, &isnull));
+       ArrayType *a;
+
+       if ( !cfg->map ) {
+           cfg->len=lexid+1;
+           cfg->map = (ListDictionary*)malloc( sizeof(ListDictionary)*cfg->len );
+           if ( !cfg->map )
+               ts_error(ERROR,"No memory");
+           memset( cfg->map, 0, sizeof(ListDictionary)*cfg->len );
+       }
+
+       if (isnull)
+           continue;
+
+       a=(ArrayType*)PointerGetDatum( PG_DETOAST_DATUM( DatumGetPointer(toasted_a) ) );
+       
+       if ( ARR_NDIM(a) != 1 )
+           ts_error(ERROR,"Wrong dimension");
+       if ( ARRNELEMS(a) < 1 )
+           continue;
+
+       cfg->map[lexid].len=ARRNELEMS(a);
+       cfg->map[lexid].dict_id=(Datum*)malloc( sizeof(Datum)*cfg->map[lexid].len );
+       memset(cfg->map[lexid].dict_id,0,sizeof(Datum)*cfg->map[lexid].len );
+       ptr=(text*)ARR_DATA_PTR(a);
+       oldcontext = MemoryContextSwitchTo(TopMemoryContext);
+       for(j=0;jmap[lexid].len;j++) {
+           cfg->map[lexid].dict_id[j] = PointerGetDatum(ptextdup(ptr));
+           ptr=NEXTVAL(ptr);
+       } 
+       MemoryContextSwitchTo(oldcontext);
+
+       if ( a != toasted_a ) 
+           pfree(a);
+   }
+   
+   SPI_finish();
+   cfg->prs_id = name2id_prs( prsname );
+   pfree(prsname);
+   for(i=0;ilen;i++) {
+       for(j=0;jmap[i].len;j++) {
+           ptr = (text*)DatumGetPointer( cfg->map[i].dict_id[j] );
+           cfg->map[i].dict_id[j] = ObjectIdGetDatum( name2id_dict(ptr) );
+           pfree(ptr);
+       }
+   }
+}
+
+typedef struct {
+   TSCfgInfo   *last_cfg;
+   int     len;
+   int     reallen;
+   TSCfgInfo   *list;
+   SNMap       name2id_map;
+} CFGList;
+
+static CFGList CList = {NULL,0,0,NULL,{0,0,NULL}};
+
+void
+reset_cfg(void) {
+        freeSNMap( &(CList.name2id_map) );
+        if ( CList.list ) {
+       int i,j;
+       for(i=0;i
+           if ( CList.list[i].map ) {
+               for(j=0;j
+                   if ( CList.list[i].map[j].dict_id )
+                       free(CList.list[i].map[j].dict_id);
+               free( CList.list[i].map );
+           }
+                free(CList.list);
+   }
+        memset(&CList,0,sizeof(CFGList));
+}
+
+static int
+comparecfg(const void *a, const void *b) {
+   return ((TSCfgInfo*)a)->id - ((TSCfgInfo*)b)->id;
+}
+
+TSCfgInfo *
+findcfg(Oid id) {
+   /* last used cfg */
+   if ( CList.last_cfg && CList.last_cfg->id==id )
+       return CList.last_cfg;
+
+   /* already used cfg */
+   if ( CList.len != 0 ) {
+       TSCfgInfo key;
+       key.id=id;
+       CList.last_cfg = bsearch(&key, CList.list, CList.len, sizeof(TSCfgInfo), comparecfg);
+       if ( CList.last_cfg != NULL )
+           return CList.last_cfg;
+   }
+
+   /* last chance */
+   if ( CList.len==CList.reallen ) {
+       TSCfgInfo *tmp;
+       int reallen = ( CList.reallen ) ? 2*CList.reallen : 16;
+       tmp=(TSCfgInfo*)realloc(CList.list,sizeof(TSCfgInfo)*reallen);
+       if ( !tmp ) 
+           ts_error(ERROR,"No memory");
+       CList.reallen=reallen;
+       CList.list=tmp;
+   }
+   CList.last_cfg=&(CList.list[CList.len]);
+   init_cfg(id, CList.last_cfg);
+   CList.len++;
+   qsort(CList.list, CList.len, sizeof(TSCfgInfo), comparecfg);
+   return findcfg(id); /* qsort changed order!! */;
+}
+
+
+Oid
+name2id_cfg(text *name) {
+   Oid arg[1]={ TEXTOID };
+   bool isnull;
+   Datum pars[1]={ PointerGetDatum(name) };
+   int stat;
+   Oid id=findSNMap_t( &(CList.name2id_map), name );
+   
+   if ( id ) 
+       return id;
+   
+   SPI_connect();
+   if ( !plan_name2id ) {
+       plan_name2id = SPI_saveplan( SPI_prepare( "select oid from pg_ts_cfg where ts_name = $1" , 1, arg ) );
+       if ( !plan_name2id ) 
+           elog(ERROR, "SPI_prepare() failed");
+   }
+
+   stat = SPI_execp(plan_name2id, pars, " ", 1);
+   if ( stat < 0 )
+       elog (ERROR, "SPI_execp return %d", stat);
+   if ( SPI_processed > 0 ) {
+       id=DatumGetObjectId( SPI_getbinval(SPI_tuptable->vals[0], SPI_tuptable->tupdesc, 1, &isnull) );
+       if ( isnull ) 
+           elog(ERROR, "Null id for tsearch config");
+   } else 
+       elog(ERROR, "No tsearch config");
+   SPI_finish();
+   addSNMap_t( &(CList.name2id_map), name, id );
+   return id;
+}
+
+
+void 
+parsetext_v2(TSCfgInfo *cfg, PRSTEXT * prs, char *buf, int4 buflen) {
+   int type, lenlemm, i;
+   char    *lemm=NULL;
+   WParserInfo *prsobj = findprs(cfg->prs_id);
+
+   prsobj->prs=(void*)DatumGetPointer(
+       FunctionCall2(
+           &(prsobj->start_info),
+           PointerGetDatum(buf),
+           Int32GetDatum(buflen)
+       )
+   );
+
+   while( ( type=DatumGetInt32(FunctionCall3(
+           &(prsobj->getlexeme_info),
+           PointerGetDatum(prsobj->prs),
+           PointerGetDatum(&lemm),
+           PointerGetDatum(&lenlemm))) ) != 0 ) {
+
+       if ( lenlemm >= MAXSTRLEN )
+           elog(ERROR, "Word is too long");
+
+
+       if ( type >= cfg->len ) /* skip this type of lexem */
+           continue; 
+
+       for(i=0;imap[type].len;i++) {
+           DictInfo    *dict=finddict( DatumGetObjectId(cfg->map[type].dict_id[i]) );
+           char    **norms, **ptr;
+   
+           norms = ptr = (char**)DatumGetPointer(
+               FunctionCall3(
+                   &(dict->lexize_info),
+                   PointerGetDatum(dict->dictionary),
+                   PointerGetDatum(lemm),
+                   PointerGetDatum(lenlemm)
+               )
+           );
+           if ( !norms ) /* dictionary doesn't know this lexem */
+               continue;
+
+           prs->pos++; /*set pos*/
+
+           while( *ptr ) {
+               if (prs->curwords == prs->lenwords) {
+                   prs->lenwords *= 2;
+                   prs->words = (WORD *) repalloc((void *) prs->words, prs->lenwords * sizeof(WORD));
+               }
+
+               prs->words[prs->curwords].len = strlen(*ptr);
+               prs->words[prs->curwords].word = *ptr;
+               prs->words[prs->curwords].alen = 0;
+               prs->words[prs->curwords].pos.pos = LIMITPOS(prs->pos);
+               ptr++;
+               prs->curwords++;
+           }
+           pfree(norms);
+           break; /* lexem already normalized or is stop word*/
+       }
+   }
+
+   FunctionCall1(
+       &(prsobj->end_info),
+       PointerGetDatum(prsobj->prs)
+   );
+}
+
+static void
+hladdword(HLPRSTEXT * prs, char *buf, int4 buflen, int type) {
+   while (prs->curwords >= prs->lenwords) {
+       prs->lenwords *= 2;
+       prs->words = (HLWORD *) repalloc((void *) prs->words, prs->lenwords * sizeof(HLWORD));
+   }
+   memset( &(prs->words[prs->curwords]), 0, sizeof(HLWORD) ); 
+   prs->words[prs->curwords].type = (uint8)type;
+   prs->words[prs->curwords].len = buflen; 
+   prs->words[prs->curwords].word = palloc(buflen);
+   memcpy(prs->words[prs->curwords].word, buf, buflen);
+   prs->curwords++;    
+}
+
+static void
+hlfinditem(HLPRSTEXT * prs, QUERYTYPE *query, char *buf, int buflen ) {
+   int i;
+   ITEM    *item=GETQUERY(query);
+   HLWORD  *word=&( prs->words[prs->curwords-1] );
+
+   while (prs->curwords + query->size >= prs->lenwords) {
+       prs->lenwords *= 2;
+       prs->words = (HLWORD *) repalloc((void *) prs->words, prs->lenwords * sizeof(HLWORD));
+   }
+
+   for(i=0; isize; i++) { 
+       if ( item->type == VAL && item->length == buflen && strncmp( GETOPERAND(query) + item->distance, buf, buflen )==0 ) {
+           if ( word->item ) {
+               memcpy( &(prs->words[prs->curwords]), word, sizeof(HLWORD) );
+               prs->words[prs->curwords].item=item;
+               prs->words[prs->curwords].repeated=1;
+               prs->curwords++;
+           } else 
+               word->item=item;    
+       }
+       item++;
+   }
+}
+
+void 
+hlparsetext(TSCfgInfo *cfg, HLPRSTEXT * prs, QUERYTYPE *query, char *buf, int4 buflen) {
+   int type, lenlemm, i;
+   char    *lemm=NULL;
+   WParserInfo *prsobj = findprs(cfg->prs_id);
+
+   prsobj->prs=(void*)DatumGetPointer(
+       FunctionCall2(
+           &(prsobj->start_info),
+           PointerGetDatum(buf),
+           Int32GetDatum(buflen)
+       )
+   );
+
+   while( ( type=DatumGetInt32(FunctionCall3(
+           &(prsobj->getlexeme_info),
+           PointerGetDatum(prsobj->prs),
+           PointerGetDatum(&lemm),
+           PointerGetDatum(&lenlemm))) ) != 0 ) {
+
+       if ( lenlemm >= MAXSTRLEN )
+           elog(ERROR, "Word is too long");
+
+       hladdword(prs,lemm,lenlemm,type);
+
+       if ( type >= cfg->len ) 
+           continue; 
+
+       for(i=0;imap[type].len;i++) {
+           DictInfo    *dict=finddict( DatumGetObjectId(cfg->map[type].dict_id[i]) );
+           char    **norms, **ptr;
+   
+           norms = ptr = (char**)DatumGetPointer(
+               FunctionCall3(
+                   &(dict->lexize_info),
+                   PointerGetDatum(dict->dictionary),
+                   PointerGetDatum(lemm),
+                   PointerGetDatum(lenlemm)
+               )
+           );
+           if ( !norms ) /* dictionary doesn't know this lexem */
+               continue;
+
+           while( *ptr ) {
+               hlfinditem(prs,query,*ptr,strlen(*ptr));
+               pfree(*ptr);
+               ptr++;
+           }
+           pfree(norms);
+           break; /* lexem already normalized or is stop word*/
+       }
+   }
+
+   FunctionCall1(
+       &(prsobj->end_info),
+       PointerGetDatum(prsobj->prs)
+   );
+}
+
+text* 
+genhl(HLPRSTEXT * prs) {
+   text *out;
+   int len=128;
+   char *ptr;
+   HLWORD  *wrd=prs->words;
+
+   out = (text*)palloc( len );
+   ptr=((char*)out) + VARHDRSZ;
+
+   while( wrd - prs->words < prs->curwords ) {
+       while (  wrd->len + prs->stopsellen + prs->startsellen + (ptr - ((char*)out)) >= len ) {
+           int dist = ptr - ((char*)out);
+           len*= 2;
+           out = (text *) repalloc(out, len);
+           ptr=((char*)out) + dist;
+       }
+
+       if ( wrd->in && !wrd->skip && !wrd->repeated ) {
+           if ( wrd->replace ) {
+               *ptr=' ';
+               ptr++;
+           } else {
+               if (wrd->selected) {
+                   memcpy(ptr,prs->startsel,prs->startsellen);
+                   ptr+=prs->startsellen;
+               }
+               memcpy(ptr,wrd->word,wrd->len);
+               ptr+=wrd->len;
+               if (wrd->selected) {
+                   memcpy(ptr,prs->stopsel,prs->stopsellen);
+                   ptr+=prs->stopsellen;
+               }
+           }
+       }
+
+       if ( !wrd->repeated )
+           pfree(wrd->word);
+
+       wrd++;
+   }
+
+   VARATT_SIZEP(out)=ptr - ((char*)out);
+   return out; 
+}
+
+int  
+get_currcfg(void) {
+   Oid arg[1]={ TEXTOID };
+   const char *curlocale;
+   Datum pars[1];
+   bool isnull;
+   int stat;
+
+   if ( current_cfg_id > 0 )
+       return current_cfg_id;
+
+   SPI_connect();
+   if ( !plan_getcfg_bylocale ) {
+       plan_getcfg_bylocale=SPI_saveplan( SPI_prepare( "select oid from pg_ts_cfg where locale = $1 ", 1, arg ) );
+       if ( !plan_getcfg_bylocale )
+           elog(ERROR, "SPI_prepare() failed");
+   }
+
+   curlocale = setlocale(LC_CTYPE, NULL);
+   pars[0] = PointerGetDatum( char2text((char*)curlocale) );
+   stat = SPI_execp(plan_getcfg_bylocale, pars, " ", 1);
+
+   if ( stat < 0 )
+       elog (ERROR, "SPI_execp return %d", stat);
+   if ( SPI_processed > 0 )
+       current_cfg_id = DatumGetObjectId( SPI_getbinval(SPI_tuptable->vals[0], SPI_tuptable->tupdesc, 1, &isnull) );
+   else 
+       elog(ERROR,"Can't find tsearch config by locale");
+
+   pfree(DatumGetPointer(pars[0]));
+   SPI_finish();
+   return current_cfg_id;
+}
+
+PG_FUNCTION_INFO_V1(set_curcfg);
+Datum set_curcfg(PG_FUNCTION_ARGS);
+Datum
+set_curcfg(PG_FUNCTION_ARGS) {
+        findcfg(PG_GETARG_OID(0));
+        current_cfg_id=PG_GETARG_OID(0);
+        PG_RETURN_VOID();
+}
+                
+PG_FUNCTION_INFO_V1(set_curcfg_byname);
+Datum set_curcfg_byname(PG_FUNCTION_ARGS);
+Datum
+set_curcfg_byname(PG_FUNCTION_ARGS) {
+        text *name=PG_GETARG_TEXT_P(0);
+   
+        DirectFunctionCall1(
+                set_curcfg,
+                ObjectIdGetDatum( name2id_cfg(name) )
+        );
+        PG_FREE_IF_COPY(name, 0);
+        PG_RETURN_VOID();      
+}       
+
+PG_FUNCTION_INFO_V1(show_curcfg);
+Datum show_curcfg(PG_FUNCTION_ARGS);
+Datum
+show_curcfg(PG_FUNCTION_ARGS) {
+   PG_RETURN_OID( get_currcfg() ); 
+}
+
+PG_FUNCTION_INFO_V1(reset_tsearch);
+Datum reset_tsearch(PG_FUNCTION_ARGS);
+Datum
+reset_tsearch(PG_FUNCTION_ARGS) {
+   ts_error(NOTICE,"TSearch cache cleaned");
+   PG_RETURN_VOID(); 
+}


diff --git a/contrib/tsearch2/ts_cfg.h b/contrib/tsearch2/ts_cfg.h

new file mode 100644 (file)

index 0000000..01006c1


--- /dev/null
+++ b/contrib/tsearch2/ts_cfg.h
@@ -0,0 +1,68 @@
+#ifndef __TS_CFG_H__
+#define __TS_CFG_H__
+#include "postgres.h"
+#include "query.h"
+
+typedef struct {
+   int len;
+   Datum   *dict_id;
+} ListDictionary;
+
+typedef struct {
+   Oid id;
+   Oid prs_id;
+   int len;
+   ListDictionary  *map;   
+}  TSCfgInfo;
+
+Oid name2id_cfg(text *name);
+TSCfgInfo * findcfg(Oid id);
+void init_cfg(Oid id, TSCfgInfo *cfg);
+void reset_cfg(void);
+
+typedef struct {
+        uint16          len;
+   union {
+       uint16      pos;
+       uint16      *apos;
+   } pos;
+        char       *word;
+   uint32  alen;
+}       WORD;
+   
+typedef struct {
+        WORD       *words;
+        int4            lenwords;
+        int4            curwords;
+   int4        pos;
+}       PRSTEXT;
+
+typedef struct {
+        uint16    len;
+   uint8    selected:1,
+         in:1,
+         skip:1,
+         replace:1,
+         repeated:1;
+   uint8   type;
+        char      *word;
+   ITEM      *item;
+}       HLWORD;
+   
+typedef struct {
+        HLWORD       *words;
+        int4            lenwords;
+        int4            curwords;
+        char           *startsel;
+        char            *stopsel;
+        int2            startsellen;
+        int2            stopsellen;
+}       HLPRSTEXT;
+
+void hlparsetext(TSCfgInfo *cfg, HLPRSTEXT * prs, QUERYTYPE *query, char *buf, int4 buflen);
+text* genhl(HLPRSTEXT * prs);
+
+void parsetext_v2(TSCfgInfo *cfg, PRSTEXT * prs, char *buf, int4 buflen);
+int  get_currcfg(void);
+
+#endif


diff --git a/contrib/tsearch2/ts_stat.c b/contrib/tsearch2/ts_stat.c

new file mode 100644 (file)

index 0000000..9099981


--- /dev/null
+++ b/contrib/tsearch2/ts_stat.c
@@ -0,0 +1,412 @@
+/*
+ * stat functions
+ */
+
+#include "tsvector.h"
+#include "ts_stat.h"
+#include "funcapi.h"
+#include "catalog/pg_type.h"
+#include "executor/spi.h"
+#include "common.h"
+
+PG_FUNCTION_INFO_V1(tsstat_in);
+Datum           tsstat_in(PG_FUNCTION_ARGS);
+Datum           
+tsstat_in(PG_FUNCTION_ARGS) {
+   tsstat *stat=palloc(STATHDRSIZE);
+   stat->len=STATHDRSIZE;
+   stat->size=0;
+   PG_RETURN_POINTER(stat);
+}
+
+PG_FUNCTION_INFO_V1(tsstat_out);
+Datum           tsstat_out(PG_FUNCTION_ARGS);
+Datum           
+tsstat_out(PG_FUNCTION_ARGS) {
+   elog(ERROR,"Unimplemented");
+   PG_RETURN_NULL();
+}
+
+static WordEntry**
+SEI_realloc( WordEntry** in, uint32 *len ) {
+   if ( *len==0 || in==NULL ) {
+       *len=8;
+       in=palloc( sizeof(WordEntry*)* (*len) );
+   } else {
+       *len *= 2;
+       in=repalloc( in, sizeof(WordEntry*)* (*len) );
+   }
+   return in;
+}
+
+static int
+compareStatWord(StatEntry *a, WordEntry *b, tsstat *stat, tsvector *txt) {
+   if ( a->len == b->len ) 
+       return strncmp(
+           STATSTRPTR(stat) + a->pos,
+           STRPTR(txt) + b->pos,
+           a->len
+       );
+   return ( a->len > b->len ) ? 1 : -1;
+}
+
+static tsstat*
+formstat(tsstat *stat, tsvector *txt, WordEntry** entry, uint32 len) {
+   tsstat  *newstat;
+   uint32 totallen, nentry;
+   uint32  slen=0;
+   WordEntry   **ptr=entry;
+   char    *curptr;
+   StatEntry   *sptr,*nptr;
+
+   while(ptr-entry
+       slen += (*ptr)->len;
+       ptr++;
+   }
+
+   nentry=stat->size + len;
+   slen+=STATSTRSIZE(stat);
+   totallen=CALCSTATSIZE(nentry,slen);
+   newstat=palloc(totallen);
+   newstat->len=totallen;
+   newstat->size=nentry;
+
+   memcpy(STATSTRPTR(newstat), STATSTRPTR(stat), STATSTRSIZE(stat));
+   curptr=STATSTRPTR(newstat) + STATSTRSIZE(stat);
+
+   ptr=entry;
+   sptr=STATPTR(stat);
+   nptr=STATPTR(newstat);
+
+   if ( len == 1 ) {
+       StatEntry *StopLow = STATPTR(stat);
+       StatEntry *StopHigh = (StatEntry*)STATSTRPTR(stat);
+
+       while (StopLow < StopHigh) {
+           sptr=StopLow + (StopHigh - StopLow) / 2;
+           if ( compareStatWord(sptr,*ptr,stat,txt) < 0 )
+               StopLow = sptr + 1;
+           else
+               StopHigh = sptr; 
+       }
+       nptr =STATPTR(newstat) + (StopLow-STATPTR(stat));
+       memcpy( STATPTR(newstat), STATPTR(stat), sizeof(StatEntry) * (StopLow-STATPTR(stat)) );
+       nptr->nentry=POSDATALEN(txt,*ptr);
+       if ( nptr->nentry==0 )
+               nptr->nentry=1; 
+       nptr->ndoc=1;
+       nptr->len=(*ptr)->len;
+       memcpy(curptr, STRPTR(txt) + (*ptr)->pos, nptr->len);
+       nptr->pos = curptr - STATSTRPTR(newstat);
+       memcpy( nptr+1, StopLow, sizeof(StatEntry) * ( ((StatEntry*)STATSTRPTR(stat))-StopLow ) );
+   } else {
+       while( sptr-STATPTR(stat) < stat->size && ptr-entry
+           if ( compareStatWord(sptr,*ptr,stat,txt) < 0 ) {
+               memcpy(nptr, sptr, sizeof(StatEntry));
+               sptr++;
+           } else {
+               nptr->nentry=POSDATALEN(txt,*ptr);
+               if ( nptr->nentry==0 )
+                   nptr->nentry=1; 
+               nptr->ndoc=1;
+               nptr->len=(*ptr)->len;
+               memcpy(curptr, STRPTR(txt) + (*ptr)->pos, nptr->len);
+               nptr->pos = curptr - STATSTRPTR(newstat);
+               curptr += nptr->len;
+               ptr++;
+           }
+           nptr++;
+       }
+
+       memcpy( nptr, sptr, sizeof(StatEntry)*( stat->size - (sptr-STATPTR(stat)) ) ); 
+       
+       while(ptr-entry
+           nptr->nentry=POSDATALEN(txt,*ptr);
+           if ( nptr->nentry==0 )
+               nptr->nentry=1; 
+           nptr->ndoc=1;
+           nptr->len=(*ptr)->len;
+           memcpy(curptr, STRPTR(txt) + (*ptr)->pos, nptr->len);
+           nptr->pos = curptr - STATSTRPTR(newstat);
+           curptr += nptr->len;
+           ptr++; nptr++;
+       }
+   }
+
+   return newstat;
+} 
+
+PG_FUNCTION_INFO_V1(ts_accum);
+Datum           ts_accum(PG_FUNCTION_ARGS);
+Datum 
+ts_accum(PG_FUNCTION_ARGS) {
+   tsstat *newstat,*stat= (tsstat*)PG_GETARG_POINTER(0);
+   tsvector  *txt = (tsvector *) PG_DETOAST_DATUM(PG_GETARG_DATUM(1));
+   WordEntry   **newentry=NULL;
+   uint32  len=0, cur=0;
+   StatEntry   *sptr;
+   WordEntry   *wptr;
+
+   if ( stat==NULL || PG_ARGISNULL(0) ) { /* Init in first */ 
+       stat=palloc(STATHDRSIZE);
+       stat->len=STATHDRSIZE;
+       stat->size=0;
+   }
+
+   /* simple check of correctness */
+   if ( txt==NULL || PG_ARGISNULL(1) || txt->size==0 ) {
+       PG_FREE_IF_COPY(txt,1); 
+       PG_RETURN_POINTER(stat);
+   }
+
+   sptr=STATPTR(stat);
+   wptr=ARRPTR(txt);
+
+   if ( stat->size < 100*txt->size ) { /* merge */
+       while( sptr-STATPTR(stat) < stat->size && wptr-ARRPTR(txt) < txt->size ) {
+           int cmp = compareStatWord(sptr,wptr,stat,txt);
+           if ( cmp<0 ) {
+               sptr++;
+           } else if ( cmp==0 ) {
+               int n=POSDATALEN(txt,wptr);
+   
+               if (n==0) n=1;
+               sptr->ndoc++;
+               sptr->nentry +=n ;
+               sptr++; wptr++;
+           } else {
+               if ( cur==len )
+                   newentry=SEI_realloc(newentry, &len);
+               newentry[cur]=wptr;
+               wptr++; cur++;
+           }
+       }
+
+       while( wptr-ARRPTR(txt) < txt->size ) {
+           if ( cur==len )
+               newentry=SEI_realloc(newentry, &len);
+           newentry[cur]=wptr;
+           wptr++; cur++;
+       }
+   } else { /* search */
+       while( wptr-ARRPTR(txt) < txt->size ) {
+           StatEntry *StopLow = STATPTR(stat);
+           StatEntry *StopHigh = (StatEntry*)STATSTRPTR(stat);
+           int cmp;
+
+           while (StopLow < StopHigh) {
+               sptr=StopLow + (StopHigh - StopLow) / 2;
+               cmp =  compareStatWord(sptr,wptr,stat,txt);
+               if (cmp==0) {
+                   int n=POSDATALEN(txt,wptr);
+                   if (n==0) n=1;
+                   sptr->ndoc++;
+                   sptr->nentry +=n ;
+                   break;
+               } else if ( cmp < 0 )
+                   StopLow = sptr + 1;
+               else
+                   StopHigh = sptr; 
+           }
+       
+           if ( StopLow >= StopHigh ) { /* not found */
+               if ( cur==len )
+                   newentry=SEI_realloc(newentry, &len);
+               newentry[cur]=wptr;
+               cur++;
+           }
+           wptr++;
+       }   
+   }
+
+   
+   if ( cur==0 ) { /* no new words */ 
+       PG_FREE_IF_COPY(txt,1);
+       PG_RETURN_POINTER(stat);
+   }
+
+   newstat = formstat(stat, txt, newentry, cur);
+   pfree(newentry);
+   PG_FREE_IF_COPY(txt,1);
+   /* pfree(stat); */
+
+   PG_RETURN_POINTER(newstat);
+}
+
+typedef struct {
+   uint32  cur;
+   tsvector *stat;
+} StatStorage;
+
+static void
+ts_setup_firstcall(FuncCallContext  *funcctx, tsstat *stat) {
+   TupleDesc            tupdesc;
+   MemoryContext     oldcontext;
+   StatStorage     *st;
+   
+   oldcontext = MemoryContextSwitchTo(funcctx->multi_call_memory_ctx);
+   st=palloc( sizeof(StatStorage) );
+   st->cur=0;
+   st->stat=palloc( stat->len );
+   memcpy(st->stat, stat, stat->len);
+   funcctx->user_fctx = (void*)st;
+   tupdesc = RelationNameGetTupleDesc("statinfo");
+   funcctx->slot = TupleDescGetSlot(tupdesc);
+   funcctx->attinmeta = TupleDescGetAttInMetadata(tupdesc);
+   MemoryContextSwitchTo(oldcontext);
+}
+
+
+static Datum
+ts_process_call(FuncCallContext  *funcctx) {
+   StatStorage     *st;
+   st=(StatStorage*)funcctx->user_fctx;
+
+   if ( st->cur < st->stat->size ) {
+       Datum result;
+       char* values[3];
+       char    ndoc[16];
+       char    nentry[16];
+       StatEntry *entry=STATPTR(st->stat) + st->cur;
+       HeapTuple    tuple;
+
+       values[1]=ndoc;
+       sprintf(ndoc,"%d",entry->ndoc);
+       values[2]=nentry;
+       sprintf(nentry,"%d",entry->nentry);
+       values[0]=palloc( entry->len+1 );
+       memcpy( values[0], STATSTRPTR(st->stat)+entry->pos, entry->len);
+       (values[0])[entry->len]='\0';
+
+       tuple = BuildTupleFromCStrings(funcctx->attinmeta, values);
+       result = TupleGetDatum(funcctx->slot, tuple);
+
+       pfree(values[0]);
+       st->cur++;
+       return result;  
+   } else {
+       pfree(st->stat);
+       pfree(st);
+   }
+   
+   return (Datum)0;
+}
+
+PG_FUNCTION_INFO_V1(ts_accum_finish);
+Datum           ts_accum_finish(PG_FUNCTION_ARGS);
+Datum 
+ts_accum_finish(PG_FUNCTION_ARGS) {
+   FuncCallContext  *funcctx;
+   Datum result;
+
+   if (SRF_IS_FIRSTCALL()) {
+       funcctx = SRF_FIRSTCALL_INIT();
+       ts_setup_firstcall(funcctx, (tsstat*)PG_GETARG_POINTER(0) );
+   }
+
+   funcctx = SRF_PERCALL_SETUP();
+   if (  (result=ts_process_call(funcctx)) != (Datum)0 )
+       SRF_RETURN_NEXT(funcctx, result);
+   SRF_RETURN_DONE(funcctx);
+}
+
+static Oid tiOid=InvalidOid;
+static void 
+get_ti_Oid(void) {
+   int ret;
+   bool isnull; 
+
+   if ( (ret = SPI_exec("select oid from pg_type where typname='tsvector'",1)) < 0 )   
+       elog(ERROR, "SPI_exec to get tsvector oid returns %d", ret);
+
+   if ( SPI_processed<0 )
+       elog(ERROR, "There is no tsvector type");
+   tiOid = DatumGetObjectId( SPI_getbinval(SPI_tuptable->vals[0], SPI_tuptable->tupdesc, 1, &isnull) );
+   if ( tiOid==InvalidOid )
+       elog(ERROR, "tsvector type has InvalidOid");
+}
+
+static tsstat*
+ts_stat_sql(text *txt) {
+   char *query=text2char(txt);
+   int i;
+   tsstat *newstat,*stat;
+   bool isnull;
+   Portal portal;
+   void    *plan;
+
+   if ( tiOid==InvalidOid ) 
+       get_ti_Oid();
+
+   if ( (plan = SPI_prepare(query,0,NULL))==NULL )
+       elog(ERROR, "SPI_prepare('%s') returns NULL",query);
+
+   if ( (portal = SPI_cursor_open(NULL, plan, NULL, NULL)) == NULL )
+       elog(ERROR, "SPI_cursor_open('%s') returns NULL",query);
+
+   SPI_cursor_fetch(portal, true, 100);
+
+   if ( SPI_tuptable->tupdesc->natts != 1 )
+       elog(ERROR, "Number of fields doesn't equal to 1");
+
+   if ( SPI_gettypeid(SPI_tuptable->tupdesc, 1) != tiOid )
+       elog(ERROR, "Column isn't of tsvector type");
+
+   stat=palloc(STATHDRSIZE);
+   stat->len=STATHDRSIZE;
+   stat->size=0;
+
+   while(SPI_processed>0) {
+       for(i=0;i
+           Datum data=SPI_getbinval(SPI_tuptable->vals[i], SPI_tuptable->tupdesc, 1, &isnull);
+
+           if ( !isnull ) {
+               newstat = (tsstat*)DatumGetPointer(DirectFunctionCall2(
+                   ts_accum,
+                   PointerGetDatum(stat),
+                   data
+               ));
+               if ( stat!=newstat && stat )
+                   pfree(stat);
+               stat=newstat;
+           }
+       } 
+
+       SPI_freetuptable(SPI_tuptable);
+       SPI_cursor_fetch(portal, true, 100);        
+   }   
+
+   SPI_freetuptable(SPI_tuptable);
+   SPI_cursor_close(portal);
+   SPI_freeplan(plan);
+   pfree(query);
+
+   return stat;    
+}
+
+PG_FUNCTION_INFO_V1(ts_stat);
+Datum           ts_stat(PG_FUNCTION_ARGS);
+Datum 
+ts_stat(PG_FUNCTION_ARGS) {
+   FuncCallContext  *funcctx;
+   Datum result;
+
+   if (SRF_IS_FIRSTCALL()) {
+       tsstat *stat;
+       text    *txt=PG_GETARG_TEXT_P(0);
+   
+       funcctx = SRF_FIRSTCALL_INIT();
+       SPI_connect();
+       stat = ts_stat_sql(txt);
+       PG_FREE_IF_COPY(txt,0); 
+       ts_setup_firstcall(funcctx, stat );
+       SPI_finish();
+   }
+
+   funcctx = SRF_PERCALL_SETUP();
+   if (  (result=ts_process_call(funcctx)) != (Datum)0 )
+       SRF_RETURN_NEXT(funcctx, result);
+   SRF_RETURN_DONE(funcctx);
+}
+
+


diff --git a/contrib/tsearch2/ts_stat.h b/contrib/tsearch2/ts_stat.h

new file mode 100644 (file)

index 0000000..c32b17a


--- /dev/null
+++ b/contrib/tsearch2/ts_stat.h
@@ -0,0 +1,32 @@
+#ifndef __TXTIDX_STAT_H__
+#define __TXTIDX_STAT_H__
+
+#include "postgres.h"
+
+#include "access/gist.h"
+#include "access/itup.h"
+#include "utils/elog.h"
+#include "utils/palloc.h"
+#include "utils/builtins.h"
+#include "storage/bufpage.h"
+
+typedef struct {
+   uint32  len;
+   uint32  pos;
+   uint32  ndoc;   
+   uint32  nentry; 
+}  StatEntry;
+
+typedef struct {
+   int4        len;
+   int4        size;
+   char        data[1];
+}  tsstat;
+
+#define STATHDRSIZE (sizeof(int4)*2)
+#define CALCSTATSIZE(x, lenstr) ( x * sizeof(StatEntry) + STATHDRSIZE + lenstr )
+#define STATPTR(x) ( (StatEntry*) ( (char*)x + STATHDRSIZE ) )
+#define STATSTRPTR(x)  ( (char*)x + STATHDRSIZE + ( sizeof(StatEntry) * ((tsvector*)x)->size ) )
+#define STATSTRSIZE(x) ( ((tsvector*)x)->len - STATHDRSIZE - ( sizeof(StatEntry) * ((tsvector*)x)->size ) )
+
+#endif


diff --git a/contrib/tsearch2/tsearch.sql._in b/contrib/tsearch2/tsearch.sql._in

new file mode 100644 (file)

index 0000000..91ffbc8


--- /dev/null
+++ b/contrib/tsearch2/tsearch.sql._in
@@ -0,0 +1,674 @@
+-- Adjust this setting to control where the objects get CREATEd.
+SET search_path = public;
+
+BEGIN;
+
+--dict conf
+CREATE TABLE pg_ts_dict (
+   dict_name   text not null primary key,
+   dict_init   oid,
+   dict_initoption text,
+   dict_lexize oid not null,
+   dict_comment    text
+) with oids;
+
+--dict interface
+CREATE FUNCTION lexize(oid, text) 
+   returns _text
+   as 'MODULE_PATHNAME'
+   language 'C'
+   with (isstrict);
+
+CREATE FUNCTION lexize(text, text)
+        returns _text
+        as 'MODULE_PATHNAME', 'lexize_byname'
+        language 'C'
+        with (isstrict);
+
+CREATE FUNCTION lexize(text)
+        returns _text
+        as 'MODULE_PATHNAME', 'lexize_bycurrent'
+        language 'C'
+        with (isstrict);
+
+CREATE FUNCTION set_curdict(int)
+   returns void
+   as 'MODULE_PATHNAME'
+   language 'C'
+   with (isstrict);
+
+CREATE FUNCTION set_curdict(text)
+   returns void
+   as 'MODULE_PATHNAME', 'set_curdict_byname'
+   language 'C'
+   with (isstrict);
+
+--built-in dictionaries
+CREATE FUNCTION dex_init(text)
+   returns internal
+   as 'MODULE_PATHNAME' 
+   language 'C';
+
+CREATE FUNCTION dex_lexize(internal,internal,int4)
+   returns internal
+   as 'MODULE_PATHNAME'
+   language 'C'
+   with (isstrict);
+
+insert into pg_ts_dict select 
+   'simple', 
+   (select oid from pg_proc where proname='dex_init'),
+   null,
+   (select oid from pg_proc where proname='dex_lexize'),
+   'Simple example of dictionary.'
+;
+    
+CREATE FUNCTION snb_en_init(text)
+   returns internal
+   as 'MODULE_PATHNAME' 
+   language 'C';
+
+CREATE FUNCTION snb_lexize(internal,internal,int4)
+   returns internal
+   as 'MODULE_PATHNAME'
+   language 'C'
+   with (isstrict);
+
+insert into pg_ts_dict select 
+   'en_stem', 
+   (select oid from pg_proc where proname='snb_en_init'),
+   'DATA_PATH/english.stop',
+   (select oid from pg_proc where proname='snb_lexize'),
+   'English Stemmer. Snowball.'
+;
+
+CREATE FUNCTION snb_ru_init(text)
+   returns internal
+   as 'MODULE_PATHNAME' 
+   language 'C';
+
+insert into pg_ts_dict select 
+   'ru_stem', 
+   (select oid from pg_proc where proname='snb_ru_init'),
+   'DATA_PATH/russian.stop',
+   (select oid from pg_proc where proname='snb_lexize'),
+   'Russian Stemmer. Snowball.'
+;
+    
+CREATE FUNCTION spell_init(text)
+   returns internal
+   as 'MODULE_PATHNAME' 
+   language 'C';
+
+CREATE FUNCTION spell_lexize(internal,internal,int4)
+   returns internal
+   as 'MODULE_PATHNAME'
+   language 'C'
+   with (isstrict);
+
+insert into pg_ts_dict select 
+   'ispell_template', 
+   (select oid from pg_proc where proname='spell_init'),
+   null,
+   (select oid from pg_proc where proname='spell_lexize'),
+   'ISpell interface. Must have .dict and .aff files'
+;
+
+CREATE FUNCTION syn_init(text)
+   returns internal
+   as 'MODULE_PATHNAME' 
+   language 'C';
+
+CREATE FUNCTION syn_lexize(internal,internal,int4)
+   returns internal
+   as 'MODULE_PATHNAME'
+   language 'C'
+   with (isstrict);
+
+insert into pg_ts_dict select 
+   'synonym', 
+   (select oid from pg_proc where proname='syn_init'),
+   null,
+   (select oid from pg_proc where proname='syn_lexize'),
+   'Example of synonym dictionary'
+;
+
+--dict conf
+CREATE TABLE pg_ts_parser (
+   prs_name    text not null primary key,
+   prs_start   oid not null,
+   prs_nexttoken   oid not null,
+   prs_end     oid not null,
+   prs_headline    oid not null,
+   prs_lextype oid not null,
+   prs_comment text
+) with oids;
+
+--sql-level interface
+CREATE TYPE tokentype 
+   as (tokid int4, alias text, descr text); 
+
+CREATE FUNCTION token_type(int4)
+   returns setof tokentype
+   as 'MODULE_PATHNAME'
+   language 'C'
+   with (isstrict);
+
+CREATE FUNCTION token_type(text)
+   returns setof tokentype
+   as 'MODULE_PATHNAME', 'token_type_byname'
+   language 'C'
+   with (isstrict);
+
+CREATE FUNCTION token_type()
+   returns setof tokentype
+   as 'MODULE_PATHNAME', 'token_type_current'
+   language 'C'
+   with (isstrict);
+
+CREATE FUNCTION set_curprs(int)
+   returns void
+   as 'MODULE_PATHNAME'
+   language 'C'
+   with (isstrict);
+
+CREATE FUNCTION set_curprs(text)
+   returns void
+   as 'MODULE_PATHNAME', 'set_curprs_byname'
+   language 'C'
+   with (isstrict);
+
+CREATE TYPE tokenout 
+   as (tokid int4, token text);
+
+CREATE FUNCTION parse(oid,text)
+   returns setof tokenout
+   as 'MODULE_PATHNAME'
+   language 'C'
+   with (isstrict);
+ 
+CREATE FUNCTION parse(text,text)
+   returns setof tokenout
+   as 'MODULE_PATHNAME', 'parse_byname'
+   language 'C'
+   with (isstrict);
+ 
+CREATE FUNCTION parse(text)
+   returns setof tokenout
+   as 'MODULE_PATHNAME', 'parse_current'
+   language 'C'
+   with (isstrict);
+ 
+--default parser
+CREATE FUNCTION prsd_start(internal,int4)
+   returns internal
+   as 'MODULE_PATHNAME'
+   language 'C';
+
+CREATE FUNCTION prsd_getlexeme(internal,internal,internal)
+   returns int4
+   as 'MODULE_PATHNAME'
+   language 'C';
+
+CREATE FUNCTION prsd_end(internal)
+   returns void
+   as 'MODULE_PATHNAME'
+   language 'C';
+
+CREATE FUNCTION prsd_lextype(internal)
+   returns internal
+   as 'MODULE_PATHNAME'
+   language 'C';
+
+CREATE FUNCTION prsd_headline(internal,internal,internal)
+   returns internal
+   as 'MODULE_PATHNAME'
+   language 'C';
+
+insert into pg_ts_parser select
+   'default',
+   (select oid from pg_proc where proname='prsd_start'),   
+   (select oid from pg_proc where proname='prsd_getlexeme'),   
+   (select oid from pg_proc where proname='prsd_end'), 
+   (select oid from pg_proc where proname='prsd_headline'),
+   (select oid from pg_proc where proname='prsd_lextype'),
+   'Parser from OpenFTS v0.34'
+;  
+
+--tsearch config
+
+CREATE TABLE pg_ts_cfg (
+   ts_name     text not null primary key,
+   prs_name    text not null,
+   locale      text
+) with oids;
+
+CREATE TABLE pg_ts_cfgmap (
+   ts_name     text not null,
+   tok_alias   text not null,
+   dict_name   text[],
+   primary key (ts_name,tok_alias)
+) with oids;
+
+CREATE FUNCTION set_curcfg(int)
+   returns void
+   as 'MODULE_PATHNAME'
+   language 'C'
+   with (isstrict);
+
+CREATE FUNCTION set_curcfg(text)
+   returns void
+   as 'MODULE_PATHNAME', 'set_curcfg_byname'
+   language 'C'
+   with (isstrict);
+
+CREATE FUNCTION show_curcfg()
+   returns oid
+   as 'MODULE_PATHNAME'
+   language 'C'
+   with (isstrict);
+
+insert into pg_ts_cfg values ('default', 'default','C');
+insert into pg_ts_cfg values ('default_russian', 'default','ru_RU.KOI8-R');
+insert into pg_ts_cfg values ('simple', 'default');
+
+copy pg_ts_cfgmap from stdin;
+default    lword   {en_stem}
+default    nlword  {simple}
+default    word    {simple}
+default    email   {simple}
+default    url {simple}
+default    host    {simple}
+default    sfloat  {simple}
+default    version {simple}
+default    part_hword  {simple}
+default    nlpart_hword    {simple}
+default    lpart_hword {en_stem}
+default    hword   {simple}
+default    lhword  {en_stem}
+default    nlhword {simple}
+default    uri {simple}
+default    file    {simple}
+default    float   {simple}
+default    int {simple}
+default    uint    {simple}
+default_russian    lword   {en_stem}
+default_russian    nlword  {ru_stem}
+default_russian    word    {ru_stem}
+default_russian    email   {simple}
+default_russian    url {simple}
+default_russian    host    {simple}
+default_russian    sfloat  {simple}
+default_russian    version {simple}
+default_russian    part_hword  {simple}
+default_russian    nlpart_hword    {ru_stem}
+default_russian    lpart_hword {en_stem}
+default_russian    hword   {ru_stem}
+default_russian    lhword  {en_stem}
+default_russian    nlhword {ru_stem}
+default_russian    uri {simple}
+default_russian    file    {simple}
+default_russian    float   {simple}
+default_russian    int {simple}
+default_russian    uint    {simple}
+simple lword   {simple}
+simple nlword  {simple}
+simple word    {simple}
+simple email   {simple}
+simple url {simple}
+simple host    {simple}
+simple sfloat  {simple}
+simple version {simple}
+simple part_hword  {simple}
+simple nlpart_hword    {simple}
+simple lpart_hword {simple}
+simple hword   {simple}
+simple lhword  {simple}
+simple nlhword {simple}
+simple uri {simple}
+simple file    {simple}
+simple float   {simple}
+simple int {simple}
+simple uint    {simple}
+\.
+
+--tsvector type
+CREATE FUNCTION tsvector_in(cstring)
+RETURNS tsvector
+AS 'MODULE_PATHNAME'
+LANGUAGE 'C' with (isstrict);
+
+CREATE FUNCTION tsvector_out(tsvector)
+RETURNS cstring
+AS 'MODULE_PATHNAME'
+LANGUAGE 'C' with (isstrict);
+
+CREATE TYPE tsvector (
+        INTERNALLENGTH = -1,
+        INPUT = tsvector_in,
+        OUTPUT = tsvector_out,
+        STORAGE = extended
+);
+
+CREATE FUNCTION length(tsvector)
+RETURNS int4
+AS 'MODULE_PATHNAME', 'tsvector_length'
+LANGUAGE 'C' with (isstrict,iscachable);
+
+CREATE FUNCTION to_tsvector(oid, text)
+RETURNS tsvector
+AS 'MODULE_PATHNAME'
+LANGUAGE 'C' with (isstrict,iscachable);
+
+CREATE FUNCTION to_tsvector(text, text)
+RETURNS tsvector
+AS 'MODULE_PATHNAME', 'to_tsvector_name'
+LANGUAGE 'C' with (isstrict,iscachable);
+
+CREATE FUNCTION to_tsvector(text)
+RETURNS tsvector
+AS 'MODULE_PATHNAME', 'to_tsvector_current'
+LANGUAGE 'C' with (isstrict,iscachable);
+
+CREATE FUNCTION strip(tsvector)
+RETURNS tsvector
+AS 'MODULE_PATHNAME'
+LANGUAGE 'C' with (isstrict,iscachable);
+
+CREATE FUNCTION setweight(tsvector,"char")
+RETURNS tsvector
+AS 'MODULE_PATHNAME'
+LANGUAGE 'C' with (isstrict,iscachable);
+
+CREATE FUNCTION concat(tsvector,tsvector)
+RETURNS tsvector
+AS 'MODULE_PATHNAME'
+LANGUAGE 'C' with (isstrict,iscachable);
+
+CREATE OPERATOR || (
+        LEFTARG = tsvector,
+        RIGHTARG = tsvector,
+        PROCEDURE = concat
+);
+
+--query type
+CREATE FUNCTION tsquery_in(cstring)
+RETURNS tsquery
+AS 'MODULE_PATHNAME'
+LANGUAGE 'C' with (isstrict);
+
+CREATE FUNCTION tsquery_out(tsquery)
+RETURNS cstring
+AS 'MODULE_PATHNAME'
+LANGUAGE 'C' with (isstrict);
+
+CREATE TYPE tsquery (
+        INTERNALLENGTH = -1,
+        INPUT = tsquery_in,
+        OUTPUT = tsquery_out
+);
+
+CREATE FUNCTION querytree(tsquery)
+RETURNS text
+AS 'MODULE_PATHNAME', 'tsquerytree'
+LANGUAGE 'C' with (isstrict);
+
+CREATE FUNCTION to_tsquery(oid, text)
+RETURNS tsquery
+AS 'MODULE_PATHNAME'
+LANGUAGE 'c' with (isstrict,iscachable);
+
+CREATE FUNCTION to_tsquery(text, text)
+RETURNS tsquery
+AS 'MODULE_PATHNAME','to_tsquery_name'
+LANGUAGE 'c' with (isstrict,iscachable);
+
+CREATE FUNCTION to_tsquery(text)
+RETURNS tsquery
+AS 'MODULE_PATHNAME','to_tsquery_current'
+LANGUAGE 'c' with (isstrict,iscachable);
+
+--operations
+CREATE FUNCTION exectsq(tsvector, tsquery)
+RETURNS bool
+AS 'MODULE_PATHNAME'
+LANGUAGE 'C' with (isstrict, iscachable);
+  
+COMMENT ON FUNCTION exectsq(tsvector, tsquery) IS 'boolean operation with text index';
+
+CREATE FUNCTION rexectsq(tsquery, tsvector)
+RETURNS bool
+AS 'MODULE_PATHNAME'
+LANGUAGE 'C' with (isstrict, iscachable);
+
+COMMENT ON FUNCTION rexectsq(tsquery, tsvector) IS 'boolean operation with text index';
+
+CREATE OPERATOR @@ (
+        LEFTARG = tsvector,
+        RIGHTARG = tsquery,
+        PROCEDURE = exectsq,
+        COMMUTATOR = '@@',
+        RESTRICT = contsel,
+        JOIN = contjoinsel
+);
+CREATE OPERATOR @@ (
+        LEFTARG = tsquery,
+        RIGHTARG = tsvector,
+        PROCEDURE = rexectsq,
+        COMMUTATOR = '@@',
+        RESTRICT = contsel,
+        JOIN = contjoinsel
+);
+
+--Trigger
+CREATE FUNCTION tsearch2()
+RETURNS trigger
+AS 'MODULE_PATHNAME'
+LANGUAGE 'C';
+
+--Relevation
+CREATE FUNCTION rank(float4[], tsvector, tsquery)
+RETURNS float4
+AS 'MODULE_PATHNAME'
+LANGUAGE 'C' WITH (isstrict, iscachable);
+
+CREATE FUNCTION rank(float4[], tsvector, tsquery, int4)
+RETURNS float4
+AS 'MODULE_PATHNAME'
+LANGUAGE 'C' WITH (isstrict, iscachable);
+
+CREATE FUNCTION rank(tsvector, tsquery)
+RETURNS float4
+AS 'MODULE_PATHNAME', 'rank_def'
+LANGUAGE 'C' WITH (isstrict, iscachable);
+
+CREATE FUNCTION rank(tsvector, tsquery, int4)
+RETURNS float4
+AS 'MODULE_PATHNAME', 'rank_def'
+LANGUAGE 'C' WITH (isstrict, iscachable);
+
+CREATE FUNCTION rank_cd(int4, tsvector, tsquery)
+RETURNS float4
+AS 'MODULE_PATHNAME'
+LANGUAGE 'C' WITH (isstrict, iscachable);
+
+CREATE FUNCTION rank_cd(int4, tsvector, tsquery, int4)
+RETURNS float4
+AS 'MODULE_PATHNAME'
+LANGUAGE 'C' WITH (isstrict, iscachable);
+
+CREATE FUNCTION rank_cd(tsvector, tsquery)
+RETURNS float4
+AS 'MODULE_PATHNAME', 'rank_cd_def'
+LANGUAGE 'C' WITH (isstrict, iscachable);
+
+CREATE FUNCTION rank_cd(tsvector, tsquery, int4)
+RETURNS float4
+AS 'MODULE_PATHNAME', 'rank_cd_def'
+LANGUAGE 'C' WITH (isstrict, iscachable);
+
+CREATE FUNCTION headline(oid, text, tsquery, text)
+RETURNS text
+AS 'MODULE_PATHNAME', 'headline'
+LANGUAGE 'C' WITH (isstrict, iscachable);
+
+CREATE FUNCTION headline(oid, text, tsquery)
+RETURNS text
+AS 'MODULE_PATHNAME', 'headline'
+LANGUAGE 'C' WITH (isstrict, iscachable);
+
+CREATE FUNCTION headline(text, text, tsquery, text)
+RETURNS text
+AS 'MODULE_PATHNAME', 'headline_byname'
+LANGUAGE 'C' WITH (isstrict, iscachable);
+
+CREATE FUNCTION headline(text, text, tsquery)
+RETURNS text
+AS 'MODULE_PATHNAME', 'headline_byname'
+LANGUAGE 'C' WITH (isstrict, iscachable);
+
+CREATE FUNCTION headline(text, tsquery, text)
+RETURNS text
+AS 'MODULE_PATHNAME', 'headline_current'
+LANGUAGE 'C' WITH (isstrict, iscachable);
+
+CREATE FUNCTION headline(text, tsquery)
+RETURNS text
+AS 'MODULE_PATHNAME', 'headline_current'
+LANGUAGE 'C' WITH (isstrict, iscachable);
+
+--GiST
+--GiST key type 
+CREATE FUNCTION gtsvector_in(cstring)
+RETURNS gtsvector
+AS 'MODULE_PATHNAME'
+LANGUAGE 'C' with (isstrict);
+
+CREATE FUNCTION gtsvector_out(gtsvector)
+RETURNS cstring
+AS 'MODULE_PATHNAME'
+LANGUAGE 'C' with (isstrict);
+
+CREATE TYPE gtsvector (
+        INTERNALLENGTH = -1,
+        INPUT = gtsvector_in,
+        OUTPUT = gtsvector_out
+);
+
+-- support FUNCTIONs
+CREATE FUNCTION gtsvector_consistent(gtsvector,internal,int4)
+RETURNS bool
+AS 'MODULE_PATHNAME'
+LANGUAGE 'C';
+  
+CREATE FUNCTION gtsvector_compress(internal)
+RETURNS internal
+AS 'MODULE_PATHNAME'
+LANGUAGE 'C';
+
+CREATE FUNCTION gtsvector_decompress(internal)
+RETURNS internal
+AS 'MODULE_PATHNAME'
+LANGUAGE 'C';
+
+CREATE FUNCTION gtsvector_penalty(internal,internal,internal)
+RETURNS internal
+AS 'MODULE_PATHNAME'
+LANGUAGE 'C' with (isstrict);
+
+CREATE FUNCTION gtsvector_picksplit(internal, internal)
+RETURNS internal
+AS 'MODULE_PATHNAME'
+LANGUAGE 'C';
+
+CREATE FUNCTION gtsvector_union(bytea, internal)
+RETURNS _int4
+AS 'MODULE_PATHNAME'
+LANGUAGE 'C';
+
+CREATE FUNCTION gtsvector_same(gtsvector, gtsvector, internal)
+RETURNS internal
+AS 'MODULE_PATHNAME'
+LANGUAGE 'C';
+
+-- CREATE the OPERATOR class
+CREATE OPERATOR CLASS gist_tsvector_ops
+DEFAULT FOR TYPE tsvector USING gist
+AS
+        OPERATOR        1       @@ (tsvector, tsquery)  RECHECK ,
+        FUNCTION        1       gtsvector_consistent (gtsvector, internal, int4),
+        FUNCTION        2       gtsvector_union (bytea, internal),
+        FUNCTION        3       gtsvector_compress (internal),
+        FUNCTION        4       gtsvector_decompress (internal),
+        FUNCTION        5       gtsvector_penalty (internal, internal, internal),
+        FUNCTION        6       gtsvector_picksplit (internal, internal),
+        FUNCTION        7       gtsvector_same (gtsvector, gtsvector, internal),
+        STORAGE         gtsvector;
+
+
+--stat info
+CREATE TYPE statinfo 
+   as (word text, ndoc int4, nentry int4);
+
+--REATE FUNCTION tsstat_in(cstring)
+--RETURNS tsstat
+--AS 'MODULE_PATHNAME'
+--LANGUAGE 'C' with (isstrict);
+--
+--CREATE FUNCTION tsstat_out(tsstat)
+--RETURNS cstring
+--AS 'MODULE_PATHNAME'
+--LANGUAGE 'C' with (isstrict);
+--
+--CREATE TYPE tsstat (
+--        INTERNALLENGTH = -1,
+--        INPUT = tsstat_in,
+--        OUTPUT = tsstat_out,
+--        STORAGE = plain
+--);
+--
+--CREATE FUNCTION ts_accum(tsstat,tsvector)
+--RETURNS tsstat
+--AS 'MODULE_PATHNAME'
+--LANGUAGE 'C' with (isstrict);
+--
+--CREATE FUNCTION ts_accum_finish(tsstat)
+-- returns setof statinfo
+-- as 'MODULE_PATHNAME'
+-- language 'C'
+-- with (isstrict);
+--
+--CREATE AGGREGATE stat (
+-- BASETYPE=tsvector,
+-- SFUNC=ts_accum,
+-- STYPE=tsstat,
+-- FINALFUNC = ts_accum_finish,
+-- initcond = ''
+--); 
+
+CREATE FUNCTION stat(text)
+   returns setof statinfo
+   as 'MODULE_PATHNAME', 'ts_stat'
+   language 'C'
+   with (isstrict);
+
+--reset - just for debuging
+CREATE FUNCTION reset_tsearch()
+        returns void
+        as 'MODULE_PATHNAME'
+        language 'C'
+        with (isstrict);
+
+--get cover (debug for rank_cd)
+CREATE FUNCTION get_covers(tsvector,tsquery)
+        returns text
+        as 'MODULE_PATHNAME'
+        language 'C'
+        with (isstrict);
+
+
+--example of ISpell dictionary
+--update pg_ts_dict set dict_initoption='DictFile="/usr/local/share/ispell/russian.dict" ,AffFile ="/usr/local/share/ispell/russian.aff", StopFile="/usr/local/share/ispell/russian.stop"' where dict_id=4;
+--example of synonym dict
+--update pg_ts_dict set dict_initoption='/usr/local/share/ispell/english.syn' where dict_id=5;
+END;


diff --git a/contrib/tsearch2/tsvector.c b/contrib/tsearch2/tsvector.c

new file mode 100644 (file)

index 0000000..ff0794d


--- /dev/null
+++ b/contrib/tsearch2/tsvector.c
@@ -0,0 +1,804 @@
+/*
+ * In/Out definitions for tsvector type
+ * Internal structure:
+ * string of values, array of position lexem in string and it's length
+ * Teodor Sigaev 
+ */
+#include "postgres.h"
+
+#include "access/gist.h"
+#include "access/itup.h"
+#include "utils/elog.h"
+#include "utils/palloc.h"
+#include "utils/builtins.h"
+#include "storage/bufpage.h"
+#include "executor/spi.h"
+#include "commands/trigger.h"
+#include "nodes/pg_list.h"
+#include "catalog/namespace.h"
+
+#include "utils/pg_locale.h"
+
+#include              /* tolower */
+#include "tsvector.h"
+#include "query.h"
+#include "ts_cfg.h"
+#include "common.h"
+
+PG_FUNCTION_INFO_V1(tsvector_in);
+Datum      tsvector_in(PG_FUNCTION_ARGS);
+
+PG_FUNCTION_INFO_V1(tsvector_out);
+Datum      tsvector_out(PG_FUNCTION_ARGS);
+
+PG_FUNCTION_INFO_V1(to_tsvector);
+Datum      to_tsvector(PG_FUNCTION_ARGS);
+PG_FUNCTION_INFO_V1(to_tsvector_current);
+Datum      to_tsvector_current(PG_FUNCTION_ARGS);
+PG_FUNCTION_INFO_V1(to_tsvector_name);
+Datum      to_tsvector_name(PG_FUNCTION_ARGS);
+
+PG_FUNCTION_INFO_V1(tsearch2);
+Datum      tsearch2(PG_FUNCTION_ARGS);
+
+PG_FUNCTION_INFO_V1(tsvector_length);
+Datum      tsvector_length(PG_FUNCTION_ARGS);
+
+/*
+ * in/out text index type
+ */
+static int 
+comparePos(const void *a, const void *b) {
+   if ( ((WordEntryPos *) a)->pos == ((WordEntryPos *) b)->pos )
+       return 1;
+   return ( ((WordEntryPos *) a)->pos > ((WordEntryPos *) b)->pos ) ? 1 : -1;
+}
+
+static int
+uniquePos(WordEntryPos *a, int4 l) {
+   WordEntryPos *ptr, *res;
+
+   res=a;
+   if (l==1)
+       return l;
+
+   qsort((void *) a, l, sizeof(WordEntryPos), comparePos);
+
+   ptr = a + 1;
+   while (ptr - a < l) {
+       if ( ptr->pos != res->pos ) {
+           res++;
+           res->pos = ptr->pos;
+           res->weight = ptr->weight;
+           if ( res-a >= MAXNUMPOS-1 || res->pos == MAXENTRYPOS-1 )
+               break;
+       } else if ( ptr->weight > res->weight )
+           res->weight = ptr->weight;
+       ptr++;
+   }
+   return res + 1 - a;
+}
+
+static char *BufferStr;
+static int
+compareentry(const void *a, const void *b)
+{
+   if ( ((WordEntryIN *) a)->entry.len == ((WordEntryIN *) b)->entry.len)
+   {
+       return strncmp(
+                      &BufferStr[((WordEntryIN *) a)->entry.pos],
+                      &BufferStr[((WordEntryIN *) b)->entry.pos],
+                      ((WordEntryIN *) a)->entry.len);
+   }
+   return ( ((WordEntryIN *) a)->entry.len > ((WordEntryIN *) b)->entry.len ) ? 1 : -1;
+}
+
+static int
+uniqueentry(WordEntryIN * a, int4 l, char *buf, int4 *outbuflen)
+{
+   WordEntryIN  *ptr,
+              *res;
+
+   res = a;
+   if (l == 1) {
+       if ( a->entry.haspos ) {
+           *(uint16*)(a->pos) = uniquePos( &(a->pos[1]), *(uint16*)(a->pos));
+           *outbuflen = SHORTALIGN(res->entry.len) + (*(uint16*)(a->pos) +1 )*sizeof(WordEntryPos);
+       }
+       return l;
+   }
+
+   ptr = a + 1;
+   BufferStr = buf;
+   qsort((void *) a, l, sizeof(WordEntryIN), compareentry);
+
+   while (ptr - a < l)
+   {
+       if (!(ptr->entry.len == res->entry.len &&
+             strncmp(&buf[ptr->entry.pos], &buf[res->entry.pos], res->entry.len) == 0))
+       {
+           if ( res->entry.haspos ) {
+               *(uint16*)(res->pos) = uniquePos( &(res->pos[1]), *(uint16*)(res->pos));
+               *outbuflen += *(uint16*)(res->pos) * sizeof(WordEntryPos);
+           }
+           *outbuflen += SHORTALIGN(res->entry.len);
+           res++;
+           memcpy(res,ptr,sizeof(WordEntryIN));
+       } else if ( ptr->entry.haspos ){
+           if ( res->entry.haspos ) {
+               int4 len=*(uint16*)(ptr->pos) + 1 + *(uint16*)(res->pos);
+               res->pos=(WordEntryPos*)repalloc( res->pos, len*sizeof(WordEntryPos));
+               memcpy( &(res->pos[ *(uint16*)(res->pos) + 1 ]), 
+                   &(ptr->pos[1]), *(uint16*)(ptr->pos) * sizeof(WordEntryPos));
+               *(uint16*)(res->pos) += *(uint16*)(ptr->pos);
+               pfree( ptr->pos );
+           } else {
+               res->entry.haspos=1;
+               res->pos = ptr->pos;
+           }
+       }
+       ptr++;
+   }
+   if ( res->entry.haspos ) {
+       *(uint16*)(res->pos) = uniquePos( &(res->pos[1]), *(uint16*)(res->pos));
+       *outbuflen += *(uint16*)(res->pos) * sizeof(WordEntryPos);
+   }
+   *outbuflen += SHORTALIGN(res->entry.len);
+
+   return res + 1 - a;
+}
+
+#define WAITWORD   1
+#define WAITENDWORD 2
+#define WAITNEXTCHAR   3
+#define WAITENDCMPLX   4
+#define WAITPOSINFO    5
+#define INPOSINFO  6
+#define WAITPOSDELIM   7
+
+#define RESIZEPRSBUF \
+do { \
+   if ( state->curpos - state->word + 1 >= state->len ) \
+   { \
+       int4 clen = state->curpos - state->word; \
+       state->len *= 2; \
+       state->word = (char*)repalloc( (void*)state->word, state->len ); \
+       state->curpos = state->word + clen; \
+   } \
+} while (0)
+
+int4
+gettoken_tsvector(TI_IN_STATE * state)
+{
+   int4        oldstate = 0;
+
+   state->curpos = state->word;
+   state->state = WAITWORD;
+   state->alen=0;
+
+   while (1)
+   {
+       if (state->state == WAITWORD)
+       {
+           if (*(state->prsbuf) == '\0')
+               return 0;
+           else if (*(state->prsbuf) == '\'')
+               state->state = WAITENDCMPLX;
+           else if (*(state->prsbuf) == '\\')
+           {
+               state->state = WAITNEXTCHAR;
+               oldstate = WAITENDWORD;
+           }
+           else if (state->oprisdelim && ISOPERATOR(*(state->prsbuf)))
+               elog(ERROR, "Syntax error");
+           else if (*(state->prsbuf) != ' ')
+           {
+               *(state->curpos) = *(state->prsbuf);
+               state->curpos++;
+               state->state = WAITENDWORD;
+           }
+       }
+       else if (state->state == WAITNEXTCHAR)
+       {
+           if (*(state->prsbuf) == '\0')
+               elog(ERROR, "There is no escaped character");
+           else
+           {
+               RESIZEPRSBUF;
+               *(state->curpos) = *(state->prsbuf);
+               state->curpos++;
+               state->state = oldstate;
+           }
+       }
+       else if (state->state == WAITENDWORD)
+       {
+           if (*(state->prsbuf) == '\\')
+           {
+               state->state = WAITNEXTCHAR;
+               oldstate = WAITENDWORD;
+           }
+           else if (*(state->prsbuf) == ' ' || *(state->prsbuf) == '\0' ||
+                    (state->oprisdelim && ISOPERATOR(*(state->prsbuf))))
+           {
+               RESIZEPRSBUF;
+               if (state->curpos == state->word)
+                   elog(ERROR, "Syntax error");
+               *(state->curpos) = '\0';
+               return 1; 
+           } else if ( *(state->prsbuf) == ':' ) {
+               if (state->curpos == state->word)
+                   elog(ERROR, "Syntax error");
+               *(state->curpos) = '\0';
+               if ( state->oprisdelim )
+                   return 1;
+               else
+                   state->state = INPOSINFO;
+           }
+           else
+           {
+               RESIZEPRSBUF;
+               *(state->curpos) = *(state->prsbuf);
+               state->curpos++;
+           }
+       }
+       else if (state->state == WAITENDCMPLX)
+       {
+           if (*(state->prsbuf) == '\'')
+           {
+               RESIZEPRSBUF;
+               *(state->curpos) = '\0';
+               if (state->curpos == state->word)
+                   elog(ERROR, "Syntax error");
+               if ( state->oprisdelim ) {
+                   state->prsbuf++;
+                   return 1;
+               } else
+                   state->state = WAITPOSINFO;
+           }
+           else if (*(state->prsbuf) == '\\')
+           {
+               state->state = WAITNEXTCHAR;
+               oldstate = WAITENDCMPLX;
+           }
+           else if (*(state->prsbuf) == '\0')
+               elog(ERROR, "Syntax error");
+           else
+           {
+               RESIZEPRSBUF;
+               *(state->curpos) = *(state->prsbuf);
+               state->curpos++;
+           }
+       } else if (state->state == WAITPOSINFO) {
+           if ( *(state->prsbuf) == ':' )
+               state->state=INPOSINFO;
+           else
+               return 1;
+       } else if (state->state == INPOSINFO) {
+           if ( isdigit(*(state->prsbuf)) ) {
+               if ( state->alen==0 ) {
+                   state->alen=4;
+                   state->pos = (WordEntryPos*)palloc( sizeof(WordEntryPos)*state->alen );
+                   *(uint16*)(state->pos)=0;
+               } else if ( *(uint16*)(state->pos) +1 >= state->alen ) {
+                   state->alen *= 2; 
+                   state->pos = (WordEntryPos*)repalloc( state->pos, sizeof(WordEntryPos)*state->alen );
+               }
+               (  *(uint16*)(state->pos) )++;
+               state->pos[ *(uint16*)(state->pos) ].pos = LIMITPOS(atoi(state->prsbuf));
+               if ( state->pos[ *(uint16*)(state->pos) ].pos == 0 )
+                   elog(ERROR,"Wrong position info");
+               state->pos[ *(uint16*)(state->pos) ].weight = 0;
+               state->state = WAITPOSDELIM;
+           } else
+               elog(ERROR,"Syntax error");
+       } else if (state->state == WAITPOSDELIM) {
+           if ( *(state->prsbuf) == ',' ) {
+               state->state = INPOSINFO;
+           } else if ( tolower(*(state->prsbuf)) == 'a' || *(state->prsbuf)=='*' ) {
+               if ( state->pos[ *(uint16*)(state->pos) ].weight )
+                   elog(ERROR,"Syntax error");
+               state->pos[ *(uint16*)(state->pos) ].weight = 3;
+           } else if ( tolower(*(state->prsbuf)) == 'b' ) {
+               if ( state->pos[ *(uint16*)(state->pos) ].weight )
+                   elog(ERROR,"Syntax error");
+               state->pos[ *(uint16*)(state->pos) ].weight = 2;
+           } else if ( tolower(*(state->prsbuf)) == 'c' ) {
+               if ( state->pos[ *(uint16*)(state->pos) ].weight )
+                   elog(ERROR,"Syntax error");
+               state->pos[ *(uint16*)(state->pos) ].weight = 1;
+           } else if ( tolower(*(state->prsbuf)) == 'd' ) {
+               if ( state->pos[ *(uint16*)(state->pos) ].weight )
+                   elog(ERROR,"Syntax error");
+               state->pos[ *(uint16*)(state->pos) ].weight = 0;
+           } else if ( isspace(*(state->prsbuf)) || *(state->prsbuf) == '\0' ) {
+               return 1;
+           } else if ( !isdigit(*(state->prsbuf)) )
+               elog(ERROR,"Syntax error");
+       } else
+           elog(ERROR, "Inner bug :(");
+       state->prsbuf++;
+   }
+
+   return 0;
+}
+
+Datum
+tsvector_in(PG_FUNCTION_ARGS)
+{
+   char       *buf = PG_GETARG_CSTRING(0);
+   TI_IN_STATE state;
+   WordEntryIN  *arr;
+   WordEntry  *inarr;
+   int4        len = 0,
+               totallen = 64;
+   tsvector       *in;
+   char       *tmpbuf,
+              *cur;
+   int4        i,
+               buflen = 256;
+
+   state.prsbuf = buf;
+   state.len = 32;
+   state.word = (char *) palloc(state.len);
+   state.oprisdelim = false;
+
+   arr = (WordEntryIN *) palloc(sizeof(WordEntryIN) * totallen);
+   cur = tmpbuf = (char *) palloc(buflen);
+   while (gettoken_tsvector(&state))
+   {
+       if (len >= totallen)
+       {
+           totallen *= 2;
+           arr = (WordEntryIN *) repalloc((void *) arr, sizeof(WordEntryIN) * totallen);
+       }
+       while ((cur - tmpbuf) + (state.curpos - state.word) >= buflen)
+       {
+           int4        dist = cur - tmpbuf;
+
+           buflen *= 2;
+           tmpbuf = (char *) repalloc((void *) tmpbuf, buflen);
+           cur = tmpbuf + dist;
+       }
+       if (state.curpos - state.word >= MAXSTRLEN)
+           elog(ERROR, "Word is too long");
+       arr[len].entry.len= state.curpos - state.word;
+       if (cur - tmpbuf > MAXSTRPOS)
+           elog(ERROR, "Too long value");
+       arr[len].entry.pos=cur - tmpbuf;
+       memcpy((void *) cur, (void *) state.word, arr[len].entry.len);
+       cur += arr[len].entry.len;
+       if ( state.alen ) {
+           arr[len].entry.haspos=1;
+           arr[len].pos = state.pos;
+       } else
+           arr[len].entry.haspos=0;
+       len++;
+   }
+   pfree(state.word);
+
+   if ( len > 0 )
+       len = uniqueentry(arr, len, tmpbuf, &buflen);
+   totallen = CALCDATASIZE(len, buflen);
+   in = (tsvector *) palloc(totallen);
+   memset(in,0,totallen);
+   in->len = totallen;
+   in->size = len;
+   cur = STRPTR(in);
+   inarr = ARRPTR(in);
+   for (i = 0; i < len; i++)
+   {
+       memcpy((void *) cur, (void *) &tmpbuf[arr[i].entry.pos], arr[i].entry.len);
+       arr[i].entry.pos=cur - STRPTR(in);
+       cur += SHORTALIGN(arr[i].entry.len);
+       if ( arr[i].entry.haspos ) {
+           memcpy( cur, arr[i].pos, (*(uint16*)arr[i].pos + 1) * sizeof(WordEntryPos));
+           cur +=  (*(uint16*)arr[i].pos + 1) * sizeof(WordEntryPos);
+           pfree( arr[i].pos ); 
+       }
+       memcpy( &(inarr[i]), &(arr[i].entry), sizeof(WordEntry) );
+   }
+   pfree(tmpbuf);
+   pfree(arr);
+   PG_RETURN_POINTER(in);
+}
+
+Datum
+tsvector_length(PG_FUNCTION_ARGS)
+{
+   tsvector       *in = (tsvector *) PG_DETOAST_DATUM(PG_GETARG_DATUM(0));
+   int4        ret = in->size;
+
+   PG_FREE_IF_COPY(in, 0);
+   PG_RETURN_INT32(ret);
+}
+
+Datum
+tsvector_out(PG_FUNCTION_ARGS)
+{
+   tsvector       *out = (tsvector *) PG_DETOAST_DATUM(PG_GETARG_DATUM(0));
+   char       *outbuf;
+   int4        i,
+               j,
+               lenbuf = 0, pp;
+   WordEntry  *ptr = ARRPTR(out);
+   char       *curin,
+              *curout;
+
+       lenbuf=out->size * 2 /* '' */ + out->size - 1 /* space */ + 2 /*\0*/;
+       for (i = 0; i < out->size; i++) {
+               lenbuf += ptr[i].len*2 /*for escape */;
+               if ( ptr[i].haspos )
+                       lenbuf += 7*POSDATALEN(out, &(ptr[i]));
+       }
+
+   curout = outbuf = (char *) palloc(lenbuf);
+   for (i = 0; i < out->size; i++)
+   {
+       curin = STRPTR(out)+ptr->pos;
+       if (i != 0)
+           *curout++ = ' ';
+       *curout++ = '\'';
+       j = ptr->len;
+       while (j--)
+       {
+           if (*curin == '\'')
+           {
+               int4        pos = curout - outbuf;
+
+               outbuf = (char *) repalloc((void *) outbuf, ++lenbuf);
+               curout = outbuf + pos;
+               *curout++ = '\\';
+           }
+           *curout++ = *curin++;
+       }
+       *curout++ = '\'';
+       if ( (pp=POSDATALEN(out,ptr)) != 0 ) {
+           WordEntryPos *wptr;
+           *curout++ = ':';
+           wptr=POSDATAPTR(out,ptr);
+           while(pp) {
+               sprintf(curout,"%d",wptr->pos);
+               curout=strchr(curout,'\0');
+               switch( wptr->weight ) {
+                   case 3:   *curout++ = 'A'; break;
+                   case 2:   *curout++ = 'B'; break;
+                   case 1:   *curout++ = 'C'; break;
+                   case 0: 
+                   default: break;
+               }
+               if ( pp>1 )     *curout++ = ',';
+               pp--; wptr++;
+           }
+       }
+       ptr++;
+   }
+   *curout='\0';
+   outbuf[lenbuf - 1] = '\0';
+   PG_FREE_IF_COPY(out, 0);
+   PG_RETURN_POINTER(outbuf);
+}
+
+static int
+compareWORD(const void *a, const void *b)
+{
+   if (((WORD *) a)->len == ((WORD *) b)->len) {
+       int res = strncmp(
+                      ((WORD *) a)->word,
+                      ((WORD *) b)->word,
+                      ((WORD *) b)->len);
+       if ( res==0 ) 
+           return ( ((WORD *) a)->pos.pos > ((WORD *) b)->pos.pos ) ? 1 : -1;
+       return res;
+   }
+   return (((WORD *) a)->len > ((WORD *) b)->len) ? 1 : -1;
+}
+
+static int
+uniqueWORD(WORD * a, int4 l)
+{
+   WORD       *ptr,
+              *res;
+   int tmppos;
+
+   if (l == 1) {
+       tmppos=LIMITPOS(a->pos.pos);
+       a->alen=2;
+       a->pos.apos=(uint16*)palloc( sizeof(uint16)*a->alen );
+       a->pos.apos[0]=1;
+       a->pos.apos[1]=tmppos;
+       return l;
+   }
+
+   res = a;
+   ptr = a + 1;
+
+   qsort((void *) a, l, sizeof(WORD), compareWORD);
+   tmppos=LIMITPOS(a->pos.pos);
+   a->alen=2;
+   a->pos.apos=(uint16*)palloc( sizeof(uint16)*a->alen );
+   a->pos.apos[0]=1;
+   a->pos.apos[1]=tmppos;
+
+   while (ptr - a < l)
+   {
+       if (!(ptr->len == res->len &&
+             strncmp(ptr->word, res->word, res->len) == 0))
+       {
+           res++;
+           res->len = ptr->len;
+           res->word = ptr->word;
+           tmppos=LIMITPOS(ptr->pos.pos);
+           res->alen=2;
+           res->pos.apos=(uint16*)palloc( sizeof(uint16)*res->alen );
+           res->pos.apos[0]=1;
+           res->pos.apos[1]=tmppos;
+       } else {
+           pfree(ptr->word);
+           if ( res->pos.apos[0] < MAXNUMPOS-1 && res->pos.apos[ res->pos.apos[0] ] != MAXENTRYPOS-1 ) {
+               if ( res->pos.apos[0]+1 >= res->alen ) {
+                   res->alen*=2;
+                   res->pos.apos=(uint16*)repalloc( res->pos.apos, sizeof(uint16)*res->alen );
+               }
+               res->pos.apos[ res->pos.apos[0]+1 ] = LIMITPOS(ptr->pos.pos);
+               res->pos.apos[0]++; 
+           }
+       }
+       ptr++;
+   }
+
+   return res + 1 - a;
+}
+
+/*
+ * make value of tsvector
+ */
+static tsvector *
+makevalue(PRSTEXT * prs)
+{
+   int4        i,j,
+               lenstr = 0,
+               totallen;
+   tsvector       *in;
+   WordEntry  *ptr;
+   char       *str,
+              *cur;
+
+   prs->curwords = uniqueWORD(prs->words, prs->curwords);
+   for (i = 0; i < prs->curwords; i++) {
+       lenstr += SHORTALIGN(prs->words[i].len);
+
+       if ( prs->words[i].alen )
+           lenstr += sizeof(uint16) + prs->words[i].pos.apos[0] * sizeof(WordEntryPos);
+   }
+
+   totallen = CALCDATASIZE(prs->curwords, lenstr);
+   in = (tsvector *) palloc(totallen);
+   memset(in,0,totallen);  
+   in->len = totallen;
+   in->size = prs->curwords;
+
+   ptr = ARRPTR(in);
+   cur = str = STRPTR(in);
+   for (i = 0; i < prs->curwords; i++)
+   {
+       ptr->len = prs->words[i].len;
+       if (cur - str > MAXSTRPOS)
+           elog(ERROR, "Value is too big");
+       ptr->pos= cur - str;
+       memcpy((void *) cur, (void *) prs->words[i].word, prs->words[i].len);
+       pfree(prs->words[i].word);
+       cur += SHORTALIGN(prs->words[i].len);
+       if ( prs->words[i].alen ) {
+           WordEntryPos *wptr;
+           
+           ptr->haspos=1;
+           *(uint16*)cur = prs->words[i].pos.apos[0];
+           wptr=POSDATAPTR(in,ptr);
+           for(j=0;j<*(uint16*)cur;j++) {
+               wptr[j].weight=0;
+               wptr[j].pos=prs->words[i].pos.apos[j+1];
+           }
+           cur += sizeof(uint16) + prs->words[i].pos.apos[0] * sizeof(WordEntryPos);
+           pfree(prs->words[i].pos.apos);
+       } else
+           ptr->haspos=0;
+       ptr++;
+   }
+   pfree(prs->words);
+   return in;
+}
+
+
+Datum
+to_tsvector(PG_FUNCTION_ARGS)
+{
+   text       *in = PG_GETARG_TEXT_P(1);
+   PRSTEXT     prs;
+   tsvector       *out = NULL;
+   TSCfgInfo *cfg=findcfg(PG_GETARG_INT32(0)); 
+
+   prs.lenwords = 32;
+   prs.curwords = 0;
+   prs.pos = 0;
+   prs.words = (WORD *) palloc(sizeof(WORD) * prs.lenwords);
+   
+   parsetext_v2(cfg, &prs, VARDATA(in), VARSIZE(in) - VARHDRSZ);
+   PG_FREE_IF_COPY(in, 1);
+
+   if (prs.curwords)
+       out = makevalue(&prs);
+   else {
+       pfree(prs.words);
+       out = palloc(CALCDATASIZE(0,0));
+       out->len = CALCDATASIZE(0,0);
+       out->size = 0;
+   } 
+   PG_RETURN_POINTER(out);
+}
+
+Datum
+to_tsvector_name(PG_FUNCTION_ARGS) {
+   text       *cfg=PG_GETARG_TEXT_P(0);
+   Datum res = DirectFunctionCall3(
+       to_tsvector,
+       Int32GetDatum( name2id_cfg( cfg ) ),
+       PG_GETARG_DATUM(1),
+       (Datum)0
+   );
+   PG_FREE_IF_COPY(cfg,0);
+   PG_RETURN_DATUM(res);   
+}
+
+Datum
+to_tsvector_current(PG_FUNCTION_ARGS) {
+   Datum res = DirectFunctionCall3(
+       to_tsvector,
+       Int32GetDatum( get_currcfg() ),
+       PG_GETARG_DATUM(0),
+       (Datum)0
+   );
+   PG_RETURN_DATUM(res);   
+}
+
+static Oid
+findFunc(char *fname) {
+   FuncCandidateList clist,ptr;
+   Oid funcid = InvalidOid;
+   List *names=makeList1(makeString(fname));
+
+   ptr = clist = FuncnameGetCandidates(names, 1);
+   freeList(names);
+
+   if ( !ptr )
+       return funcid;
+
+   while(ptr) {
+       if ( ptr->args[0] == TEXTOID && funcid == InvalidOid )
+           funcid=ptr->oid;
+       clist=ptr->next;
+       pfree(ptr);
+       ptr=clist;
+   }
+
+   return funcid;
+}
+
+/*
+ * Trigger
+ */
+Datum
+tsearch2(PG_FUNCTION_ARGS)
+{
+   TriggerData *trigdata;
+   Trigger    *trigger;
+   Relation    rel;
+   HeapTuple   rettuple = NULL;
+   TSCfgInfo *cfg=findcfg(get_currcfg()); 
+   int         numidxattr,
+               i;
+   PRSTEXT     prs;
+   Datum       datum = (Datum) 0;
+   Oid     funcoid = InvalidOid;
+
+   if (!CALLED_AS_TRIGGER(fcinfo))
+       elog(ERROR, "TSearch: Not fired by trigger manager");
+
+   trigdata = (TriggerData *) fcinfo->context;
+   if (TRIGGER_FIRED_FOR_STATEMENT(trigdata->tg_event))
+       elog(ERROR, "TSearch: Can't process STATEMENT events");
+   if (TRIGGER_FIRED_AFTER(trigdata->tg_event))
+       elog(ERROR, "TSearch: Must be fired BEFORE event");
+
+   if (TRIGGER_FIRED_BY_INSERT(trigdata->tg_event))
+       rettuple = trigdata->tg_trigtuple;
+   else if (TRIGGER_FIRED_BY_UPDATE(trigdata->tg_event))
+       rettuple = trigdata->tg_newtuple;
+   else
+       elog(ERROR, "TSearch: Unknown event");
+
+   trigger = trigdata->tg_trigger;
+   rel = trigdata->tg_relation;
+
+   if (trigger->tgnargs < 2)
+       elog(ERROR, "TSearch: format tsearch2(tsvector_field, text_field1,...)");
+
+   numidxattr = SPI_fnumber(rel->rd_att, trigger->tgargs[0]);
+   if (numidxattr == SPI_ERROR_NOATTRIBUTE)
+       elog(ERROR, "TSearch: Can not find tsvector_field");
+
+   prs.lenwords = 32;
+   prs.curwords = 0;
+   prs.pos = 0;
+   prs.words = (WORD *) palloc(sizeof(WORD) * prs.lenwords);
+
+   /* find all words in indexable column */
+   for (i = 1; i < trigger->tgnargs; i++)
+   {
+       int         numattr;
+       Oid         oidtype;
+       Datum       txt_toasted;
+       bool        isnull;
+       text       *txt;
+
+       numattr = SPI_fnumber(rel->rd_att, trigger->tgargs[i]);
+       if (numattr == SPI_ERROR_NOATTRIBUTE)
+       {
+           funcoid=findFunc(trigger->tgargs[i]);
+           if ( funcoid==InvalidOid )
+               elog(ERROR,"TSearch: can't find function or field '%s'",trigger->tgargs[i]);
+           continue;
+       }
+       oidtype = SPI_gettypeid(rel->rd_att, numattr);
+       /* We assume char() and varchar() are binary-equivalent to text */
+       if (!(oidtype == TEXTOID ||
+             oidtype == VARCHAROID ||
+             oidtype == BPCHAROID))
+       {
+           elog(WARNING, "TSearch: '%s' is not of character type",
+                trigger->tgargs[i]);
+           continue;
+       }
+       txt_toasted = SPI_getbinval(rettuple, rel->rd_att, numattr, &isnull);
+       if (isnull)
+           continue;
+
+       if ( funcoid!=InvalidOid ) {
+           text *txttmp = (text *) DatumGetPointer( OidFunctionCall1(
+               funcoid,
+               PointerGetDatum(txt_toasted)
+           ));
+           txt = (text *) DatumGetPointer(PG_DETOAST_DATUM(PointerGetDatum(txttmp)));
+           if ( txt == txttmp )
+               txt_toasted = PointerGetDatum(txt);
+       } else
+            txt = (text *) DatumGetPointer(PG_DETOAST_DATUM(PointerGetDatum(txt_toasted)));
+
+       parsetext_v2(cfg, &prs, VARDATA(txt), VARSIZE(txt) - VARHDRSZ);
+       if (txt != (text*)DatumGetPointer(txt_toasted) )
+           pfree(txt);
+   }
+
+   /* make tsvector value */
+   if (prs.curwords)
+   {
+       datum = PointerGetDatum(makevalue(&prs));
+       rettuple = SPI_modifytuple(rel, rettuple, 1, &numidxattr,
+                                  &datum, NULL);
+       pfree(DatumGetPointer(datum));
+   }
+   else
+   {
+       tsvector *out = palloc(CALCDATASIZE(0,0));
+       out->len = CALCDATASIZE(0,0);
+       out->size = 0;
+       datum = PointerGetDatum(out);
+       pfree(prs.words);
+       rettuple = SPI_modifytuple(rel, rettuple, 1, &numidxattr,
+                                  &datum, NULL);
+   }
+
+   if (rettuple == NULL)
+       elog(ERROR, "TSearch: %d returned by SPI_modifytuple", SPI_result);
+
+   return PointerGetDatum(rettuple);
+}


diff --git a/contrib/tsearch2/tsvector.h b/contrib/tsearch2/tsvector.h

new file mode 100644 (file)

index 0000000..31e6a4b


--- /dev/null
+++ b/contrib/tsearch2/tsvector.h
@@ -0,0 +1,71 @@
+#ifndef __TXTIDX_H__
+#define __TXTIDX_H__
+
+/*
+#define TXTIDX_DEBUG
+*/
+
+#include "postgres.h"
+
+#include "access/gist.h"
+#include "access/itup.h"
+#include "utils/elog.h"
+#include "utils/palloc.h"
+#include "utils/builtins.h"
+#include "storage/bufpage.h"
+
+typedef struct {
+   uint32
+       haspos:1,
+       len:11, /* MAX 2Kb */
+       pos:20; /* MAX 1Mb */
+}  WordEntry;
+#define MAXSTRLEN ( 1<<11 )
+#define MAXSTRPOS ( 1<<20 )
+
+typedef struct {
+   uint16
+       weight:2,
+       pos:14;
+} WordEntryPos;
+#define MAXENTRYPOS    (1<<14)
+#define MAXNUMPOS  256
+#define LIMITPOS(x)    ( ( (x) >= MAXENTRYPOS ) ? (MAXENTRYPOS-1) : (x) )
+
+typedef struct
+{
+   int4        len;
+   int4        size;
+   char        data[1];
+}  tsvector;
+
+#define DATAHDRSIZE (sizeof(int4)*2)
+#define CALCDATASIZE(x, lenstr) ( x * sizeof(WordEntry) + DATAHDRSIZE + lenstr )
+#define ARRPTR(x)  ( (WordEntry*) ( (char*)x + DATAHDRSIZE ) )
+#define STRPTR(x)  ( (char*)x + DATAHDRSIZE + ( sizeof(WordEntry) * ((tsvector*)x)->size ) )
+#define STRSIZE(x) ( ((tsvector*)x)->len - DATAHDRSIZE - ( sizeof(WordEntry) * ((tsvector*)x)->size ) )
+#define _POSDATAPTR(x,e)   (STRPTR(x)+((WordEntry*)(e))->pos+SHORTALIGN(((WordEntry*)(e))->len))
+#define POSDATALEN(x,e) ( ( ((WordEntry*)(e))->haspos ) ? (*(uint16*)_POSDATAPTR(x,e)) : 0 ) 
+#define POSDATAPTR(x,e)    ( (WordEntryPos*)( _POSDATAPTR(x,e)+sizeof(uint16) ) )
+
+
+typedef struct {
+   WordEntry   entry;
+   WordEntryPos    *pos;
+}  WordEntryIN;
+
+typedef struct
+{
+   char       *prsbuf;
+   char       *word;
+   char       *curpos;
+   int4        len;
+   int4        state;
+   int4        alen;
+   WordEntryPos    *pos;
+   bool        oprisdelim;
+}  TI_IN_STATE;
+
+int4       gettoken_tsvector(TI_IN_STATE * state);
+
+#endif


diff --git a/contrib/tsearch2/tsvector_op.c b/contrib/tsearch2/tsvector_op.c

new file mode 100644 (file)

index 0000000..3f38014


--- /dev/null
+++ b/contrib/tsearch2/tsvector_op.c
@@ -0,0 +1,264 @@
+/*
+ * Operations for tsvector type
+ * Teodor Sigaev 
+ */
+#include "postgres.h"
+
+#include "access/gist.h"
+#include "access/itup.h"
+#include "utils/elog.h"
+#include "utils/palloc.h"
+#include "utils/builtins.h"
+#include "storage/bufpage.h"
+#include "executor/spi.h"
+#include "commands/trigger.h"
+#include "nodes/pg_list.h"
+#include "catalog/namespace.h"
+
+#include "utils/pg_locale.h"
+
+#include              /* tolower */
+#include "tsvector.h"
+#include "query.h"
+#include "ts_cfg.h"
+#include "common.h"
+
+PG_FUNCTION_INFO_V1(strip);
+Datum      strip(PG_FUNCTION_ARGS);
+
+PG_FUNCTION_INFO_V1(setweight);
+Datum      setweight(PG_FUNCTION_ARGS);
+
+PG_FUNCTION_INFO_V1(concat);
+Datum      concat(PG_FUNCTION_ARGS);
+
+Datum
+strip(PG_FUNCTION_ARGS)
+{
+   tsvector       *in = (tsvector *) PG_DETOAST_DATUM(PG_GETARG_DATUM(0));
+   tsvector    *out;
+   int i,len=0;
+   WordEntry *arrin=ARRPTR(in), *arrout;
+   char *cur;
+
+   for(i=0;isize;i++) 
+       len += SHORTALIGN( arrin[i].len );
+
+   len = CALCDATASIZE(in->size, len);
+   out=(tsvector*)palloc(len);
+   memset(out,0,len);
+   out->len=len;
+   out->size=in->size;
+   arrout=ARRPTR(out);
+   cur=STRPTR(out);
+   for(i=0;isize;i++) {
+       memcpy(cur, STRPTR(in)+arrin[i].pos, arrin[i].len);
+       arrout[i].haspos = 0;
+       arrout[i].len = arrin[i].len;
+       arrout[i].pos = cur - STRPTR(out);
+       cur += SHORTALIGN( arrout[i].len );
+   }
+       
+   PG_FREE_IF_COPY(in, 0);
+   PG_RETURN_POINTER(out);
+}
+
+Datum
+setweight(PG_FUNCTION_ARGS)
+{
+   tsvector       *in = (tsvector *) PG_DETOAST_DATUM(PG_GETARG_DATUM(0));
+   char       cw = PG_GETARG_CHAR(1);
+   tsvector    *out;
+   int i,j;
+   WordEntry *entry;
+   WordEntryPos *p;
+   int w=0;
+
+   switch(tolower(cw)) {
+       case 'a': w=3; break;
+       case 'b': w=2; break;
+       case 'c': w=1; break;
+       case 'd': w=0; break;
+       default: elog(ERROR,"Unknown weight");
+   }
+
+   out=(tsvector*)palloc(in->len);
+   memcpy(out,in,in->len);
+   entry=ARRPTR(out);
+   i=out->size;    
+   while(i--) {
+       if ( (j=POSDATALEN(out,entry)) != 0 ) {
+           p=POSDATAPTR(out,entry);
+           while(j--) {
+               p->weight=w;
+               p++;
+           }
+       }
+       entry++;
+   }
+       
+   PG_FREE_IF_COPY(in, 0);
+   PG_RETURN_POINTER(out);
+}
+
+static int
+compareEntry(char *ptra, WordEntry* a, char *ptrb, WordEntry* b)
+{
+        if ( a->len == b->len)
+        {
+                return strncmp(
+                                           ptra + a->pos,
+                                           ptrb + b->pos,
+                                           a->len);
+        }
+        return ( a->len > b->len ) ? 1 : -1;
+}
+
+static int4
+add_pos(tsvector *src, WordEntry *srcptr, tsvector *dest, WordEntry *destptr, int4 maxpos ) {
+   uint16 *clen = (uint16*)_POSDATAPTR(dest,destptr);
+   int i;
+   uint16 slen = POSDATALEN(src, srcptr), startlen;
+   WordEntryPos *spos=POSDATAPTR(src, srcptr), *dpos=POSDATAPTR(dest,destptr);
+
+   if ( ! destptr->haspos ) 
+       *clen=0;
+
+   startlen = *clen;
+   for(i=0; i
+       dpos[ *clen ].weight = spos[i].weight; 
+       dpos[ *clen ].pos    = LIMITPOS(spos[i].pos + maxpos);
+       (*clen)++;
+   }
+
+   if ( *clen != startlen )
+       destptr->haspos=1; 
+   return  *clen - startlen;
+}
+
+
+Datum
+concat(PG_FUNCTION_ARGS) {
+   tsvector       *in1 = (tsvector *) PG_DETOAST_DATUM(PG_GETARG_DATUM(0));
+   tsvector       *in2 = (tsvector *) PG_DETOAST_DATUM(PG_GETARG_DATUM(1));
+   tsvector       *out;
+   WordEntry *ptr;
+   WordEntry *ptr1,*ptr2;
+   WordEntryPos *p;
+   int maxpos=0,i,j,i1,i2;
+   char *cur;
+   char *data,*data1,*data2;
+
+   ptr=ARRPTR(in1);
+   i=in1->size;
+   while(i--) {
+       if ( (j=POSDATALEN(in1,ptr)) != 0 ) {
+           p=POSDATAPTR(in1,ptr);
+           while(j--) {
+               if ( p->pos > maxpos ) 
+                   maxpos = p->pos;
+               p++;
+           }
+       }
+       ptr++;
+   }
+   
+   ptr1=ARRPTR(in1); ptr2=ARRPTR(in2);
+   data1=STRPTR(in1); data2=STRPTR(in2);
+   i1=in1->size;   i2=in2->size;
+   out=(tsvector*)palloc( in1->len + in2->len );
+   memset(out,0,in1->len + in2->len);
+   out->len = in1->len + in2->len;
+   out->size = in1->size + in2->size;
+   data=cur=STRPTR(out);
+   ptr=ARRPTR(out);
+   while( i1 && i2 ) {
+       int cmp=compareEntry(data1,ptr1,data2,ptr2);
+       if ( cmp < 0 ) { /* in1 first */
+           ptr->haspos = ptr1->haspos;
+           ptr->len = ptr1->len;
+           memcpy( cur, data1 + ptr1->pos, ptr1->len );
+           ptr->pos = cur - data; 
+           cur+=SHORTALIGN(ptr1->len);
+           if ( ptr->haspos ) {
+               memcpy(cur, _POSDATAPTR(in1, ptr1), POSDATALEN(in1, ptr1)*sizeof(WordEntryPos) + sizeof(uint16));
+               cur+=POSDATALEN(in1, ptr1)*sizeof(WordEntryPos) + sizeof(uint16);
+           }
+           ptr++; ptr1++; i1--;
+       } else if ( cmp>0 ) { /* in2 first */ 
+           ptr->haspos = ptr2->haspos;
+           ptr->len = ptr2->len;
+           memcpy( cur, data2 + ptr2->pos, ptr2->len );
+           ptr->pos = cur - data; 
+           cur+=SHORTALIGN(ptr2->len);
+           if ( ptr->haspos ) {
+               int addlen = add_pos(in2, ptr2, out, ptr, maxpos );
+               if ( addlen == 0 )
+                   ptr->haspos=0;
+               else
+                   cur += addlen*sizeof(WordEntryPos) + sizeof(uint16); 
+           }
+           ptr++; ptr2++; i2--;
+       } else {
+           ptr->haspos = ptr1->haspos | ptr2->haspos;
+           ptr->len = ptr1->len;
+           memcpy( cur, data1 + ptr1->pos, ptr1->len );
+           ptr->pos = cur - data; 
+           cur+=SHORTALIGN(ptr1->len);
+           if ( ptr->haspos ) {
+               if ( ptr1->haspos ) {
+                   memcpy(cur, _POSDATAPTR(in1, ptr1), POSDATALEN(in1, ptr1)*sizeof(WordEntryPos) + sizeof(uint16));
+                   cur+=POSDATALEN(in1, ptr1)*sizeof(WordEntryPos) + sizeof(uint16);
+                   if ( ptr2->haspos )
+                       cur += add_pos(in2, ptr2, out, ptr, maxpos )*sizeof(WordEntryPos);
+               } else if ( ptr2->haspos ) {
+                   int addlen = add_pos(in2, ptr2, out, ptr, maxpos );
+                   if ( addlen == 0 )
+                       ptr->haspos=0;
+                   else
+                       cur += addlen*sizeof(WordEntryPos) + sizeof(uint16); 
+               }
+           }
+           ptr++; ptr1++; ptr2++; i1--; i2--;
+       }
+   }
+
+   while(i1) {
+       ptr->haspos = ptr1->haspos;
+       ptr->len = ptr1->len;
+       memcpy( cur, data1 + ptr1->pos, ptr1->len );
+       ptr->pos = cur - data; 
+       cur+=SHORTALIGN(ptr1->len);
+       if ( ptr->haspos ) {
+           memcpy(cur, _POSDATAPTR(in1, ptr1), POSDATALEN(in1, ptr1)*sizeof(WordEntryPos) + sizeof(uint16));
+           cur+=POSDATALEN(in1, ptr1)*sizeof(WordEntryPos) + sizeof(uint16);
+       }
+       ptr++; ptr1++; i1--;
+   }
+
+   while(i2) {
+       ptr->haspos = ptr2->haspos;
+       ptr->len = ptr2->len;
+       memcpy( cur, data2 + ptr2->pos, ptr2->len );
+       ptr->pos = cur - data; 
+       cur+=SHORTALIGN(ptr2->len);
+       if ( ptr->haspos ) {
+           int addlen = add_pos(in2, ptr2, out, ptr, maxpos );
+           if ( addlen == 0 )
+               ptr->haspos=0;
+           else
+               cur += addlen*sizeof(WordEntryPos) + sizeof(uint16); 
+       }
+       ptr++; ptr2++; i2--;
+   }
+   
+   out->size=ptr-ARRPTR(out);
+   out->len = CALCDATASIZE( out->size, cur-data );
+   if ( data != STRPTR(out) )
+       memmove( STRPTR(out), data, cur-data );
+
+   PG_FREE_IF_COPY(in1, 0);
+   PG_FREE_IF_COPY(in2, 1);
+   PG_RETURN_POINTER(out);
+}
+


diff --git a/contrib/tsearch2/untsearch.sql.in b/contrib/tsearch2/untsearch.sql.in

new file mode 100644 (file)

index 0000000..a4fe145


--- /dev/null
+++ b/contrib/tsearch2/untsearch.sql.in
@@ -0,0 +1,62 @@
+BEGIN;
+
+--Be careful !!!
+--script drops all indices, triggers and columns with types defined
+--in tsearch2.sql
+
+
+DROP OPERATOR CLASS gist_tsvector_ops USING gist CASCADE;
+
+
+DROP OPERATOR || (tsvector, tsvector);
+DROP OPERATOR @@ (tsvector, tsquery);
+DROP OPERATOR @@ (tsquery, tsvector);
+
+DROP AGGREGATE stat(tsvector);
+
+DROP TABLE pg_ts_dict;
+DROP TABLE pg_ts_parser;
+DROP TABLE pg_ts_cfg;
+DROP TABLE pg_ts_cfgmap;
+
+DROP TYPE tokentype CASCADE;
+DROP TYPE tokenout CASCADE;
+DROP TYPE tsvector CASCADE;
+DROP TYPE tsquery CASCADE;
+DROP TYPE gtsvector CASCADE;
+DROP TYPE tsstat CASCADE;
+DROP TYPE statinfo CASCADE;
+
+DROP FUNCTION lexize(oid, text) ;
+DROP FUNCTION lexize(text, text);
+DROP FUNCTION lexize(text);
+DROP FUNCTION set_curdict(int);
+DROP FUNCTION set_curdict(text);
+DROP FUNCTION dex_init(text);
+DROP FUNCTION dex_lexize(internal,internal,int4);
+DROP FUNCTION snb_en_init(text);
+DROP FUNCTION snb_lexize(internal,internal,int4);
+DROP FUNCTION snb_ru_init(text);
+DROP FUNCTION spell_init(text);
+DROP FUNCTION spell_lexize(internal,internal,int4);
+DROP FUNCTION syn_init(text);
+DROP FUNCTION syn_lexize(internal,internal,int4);
+DROP FUNCTION set_curprs(int);
+DROP FUNCTION set_curprs(text);
+DROP FUNCTION prsd_start(internal,int4);
+DROP FUNCTION prsd_getlexeme(internal,internal,internal);
+DROP FUNCTION prsd_end(internal);
+DROP FUNCTION prsd_lextype(internal);
+DROP FUNCTION prsd_headline(internal,internal,internal);
+DROP FUNCTION set_curcfg(int);
+DROP FUNCTION set_curcfg(text);
+DROP FUNCTION show_curcfg();
+DROP FUNCTION gtsvector_compress(internal);
+DROP FUNCTION gtsvector_decompress(internal);
+DROP FUNCTION gtsvector_penalty(internal,internal,internal);
+DROP FUNCTION gtsvector_picksplit(internal, internal);
+DROP FUNCTION gtsvector_union(bytea, internal);
+DROP FUNCTION reset_tsearch();
+DROP FUNCTION tsearch2() CASCADE;
+
+END;


diff --git a/contrib/tsearch2/wordparser/deflex.c b/contrib/tsearch2/wordparser/deflex.c

new file mode 100644 (file)

index 0000000..ea596c5


--- /dev/null
+++ b/contrib/tsearch2/wordparser/deflex.c
@@ -0,0 +1,56 @@
+#include "deflex.h"
+
+const char *lex_descr[]={
+   "",
+   "Latin word",
+   "Non-latin word",
+   "Word",
+   "Email",
+   "URL",
+   "Host",
+   "Scientific notation",
+   "VERSION",
+   "Part of hyphenated word",
+   "Non-latin part of hyphenated word",
+   "Latin part of hyphenated word",
+   "Space symbols",
+   "HTML Tag",
+   "HTTP head",
+   "Hyphenated word",
+   "Latin hyphenated word",
+   "Non-latin hyphenated word",
+   "URI",
+   "File or path name",
+   "Decimal notation",
+   "Signed integer",
+   "Unsigned integer",
+   "HTML Entity"
+};
+
+const char *tok_alias[]={
+   "",
+   "lword",
+   "nlword",
+   "word",
+   "email",
+   "url",
+   "host",
+   "sfloat",
+   "version",
+   "part_hword",
+   "nlpart_hword",
+   "lpart_hword",
+   "blank",
+   "tag",
+   "http",
+   "hword",
+   "lhword",
+   "nlhword",
+   "uri",
+   "file",
+   "float",
+   "int",
+   "uint",
+   "entity"
+};
+


diff --git a/contrib/tsearch2/wordparser/deflex.h b/contrib/tsearch2/wordparser/deflex.h

new file mode 100644 (file)

index 0000000..651d1f9


--- /dev/null
+++ b/contrib/tsearch2/wordparser/deflex.h
@@ -0,0 +1,34 @@
+#ifndef __DEFLEX_H__
+#define __DEFLEX_H__
+
+/* rememder !!!! */
+#define LASTNUM        23
+
+#define LATWORD        1
+#define CYRWORD        2
+#define UWORD      3
+#define EMAIL      4
+#define FURL       5
+#define HOST       6
+#define SCIENTIFIC 7
+#define VERSIONNUMBER  8
+#define PARTHYPHENWORD 9
+#define CYRPARTHYPHENWORD  10
+#define LATPARTHYPHENWORD  11
+#define SPACE      12
+#define TAG            13
+#define HTTP       14
+#define HYPHENWORD 15
+#define LATHYPHENWORD  16
+#define CYRHYPHENWORD  17
+#define URI        18
+#define FILEPATH   19
+#define DECIMAL        20
+#define SIGNEDINT  21
+#define UNSIGNEDINT 22
+#define HTMLENTITY 23
+
+extern const char *lex_descr[];
+extern const char *tok_alias[];
+
+#endif


diff --git a/contrib/tsearch2/wordparser/parser.h b/contrib/tsearch2/wordparser/parser.h

new file mode 100644 (file)

index 0000000..55cf005


--- /dev/null
+++ b/contrib/tsearch2/wordparser/parser.h
@@ -0,0 +1,11 @@
+#ifndef __PARSER_H__
+#define __PARSER_H__
+
+char      *token;
+int            tokenlen;
+int            tsearch2_yylex(void);
+void       start_parse_str(char *, int);
+void       start_parse_fh(FILE *, int);
+void       end_parse(void);
+
+#endif


diff --git a/contrib/tsearch2/wordparser/parser.l b/contrib/tsearch2/wordparser/parser.l

new file mode 100644 (file)

index 0000000..49824f5


--- /dev/null
+++ b/contrib/tsearch2/wordparser/parser.l
@@ -0,0 +1,346 @@
+%{
+#include "postgres.h"
+
+#include "deflex.h"
+#include "parser.h"
+#include "common.h"
+
+/* Avoid exit() on fatal scanner errors */
+#define fprintf(file, fmt, msg)  ts_error(ERROR, fmt, msg)
+
+/* postgres allocation function */
+#define free    pfree
+#define malloc  palloc
+#define realloc repalloc
+
+#ifdef strdup
+#undef strdup
+#endif
+#define strdup  pstrdup
+
+char *token = NULL;  /* pointer to token */
+char *s     = NULL;  /* to return WHOLE hyphenated-word */
+
+YY_BUFFER_STATE buf = NULL; /* buffer to parse; it need for parse from string */
+
+int lrlimit = -1;  /* for limiting read from filehandle ( -1 - unlimited read ) */
+int bytestoread = 0;   /* for limiting read from filehandle */
+
+/* redefine macro for read limited length */
+#define YY_INPUT(buf,result,max_size) \
+   if ( yy_current_buffer->yy_is_interactive ) { \
+                int c = '*', n; \
+                for ( n = 0; n < max_size && \
+                             (c = getc( tsearch2_yyin )) != EOF && c != '\n'; ++n ) \
+                        buf[n] = (char) c; \
+                if ( c == '\n' ) \
+                        buf[n++] = (char) c; \
+                if ( c == EOF && ferror( tsearch2_yyin ) ) \
+                        YY_FATAL_ERROR( "input in flex scanner failed" ); \
+                result = n; \
+        }  else { \
+       if ( lrlimit == 0 ) \
+           result=YY_NULL; \
+       else { \
+           if ( lrlimit>0 ) { \
+               bytestoread = ( lrlimit > max_size ) ? max_size : lrlimit; \
+               lrlimit -= bytestoread; \
+           } else \
+               bytestoread = max_size; \
+               if ( ((result = fread( buf, 1, bytestoread, tsearch2_yyin )) == 0) \
+                       && ferror( tsearch2_yyin ) ) \
+                       YY_FATAL_ERROR( "input in flex scanner failed" ); \
+       } \
+   }
+
+%}
+
+%option 8bit
+%option never-interactive
+%option nounput
+%option noyywrap
+
+/* parser's state for parsing hyphenated-word */
+%x DELIM  
+/* parser's state for parsing URL*/
+%x URL  
+%x SERVER  
+
+/* parser's state for parsing TAGS */
+%x INTAG
+%x QINTAG
+%x INCOMMENT
+%x INSCRIPT
+
+/* cyrillic koi8 char */
+CYRALNUM   [0-9\200-\377]
+CYRALPHA   [\200-\377]
+ALPHA      [a-zA-Z\200-\377]
+ALNUM      [0-9a-zA-Z\200-\377]
+
+
+HOSTNAME   ([-_[:alnum:]]+\.)+[[:alpha:]]+
+URI        [-_[:alnum:]/%,\.;=&?#]+
+
+%%
+
+"<"[Ss][Cc][Rr][Ii][Pp][Tt] { BEGIN INSCRIPT; }
+
+"" {
+   BEGIN INITIAL; 
+   *tsearch2_yytext=' '; *(tsearch2_yytext+1) = '\0'; 
+   token = tsearch2_yytext;
+   tokenlen = tsearch2_yyleng;
+   return SPACE;
+}
+
+""   { 
+   BEGIN INITIAL;
+   *tsearch2_yytext=' '; *(tsearch2_yytext+1) = '\0'; 
+   token = tsearch2_yytext;
+   tokenlen = tsearch2_yyleng;
+   return SPACE;
+}
+
+
+"<"[\![:alpha:]]   { BEGIN INTAG; }
+
+"
+
+"\""    { BEGIN QINTAG; }
+
+"\\\"" ;
+
+"\""   { BEGIN INTAG; }
+
+">" { 
+   BEGIN INITIAL;
+   token = tsearch2_yytext;
+   *tsearch2_yytext=' '; 
+   token = tsearch2_yytext;
+   tokenlen = 1;
+   return TAG;
+}
+
+.|\n  ;
+
+\&(quot|amp|nbsp|lt|gt)\;   {
+   token = tsearch2_yytext;
+   tokenlen = tsearch2_yyleng;
+   return HTMLENTITY;
+}
+
+\&\#[0-9][0-9]?[0-9]?\; {
+   token = tsearch2_yytext;
+   tokenlen = tsearch2_yyleng;
+   return HTMLENTITY;
+}
+ 
+[-_\.[:alnum:]]+@{HOSTNAME}  /* Emails */ { 
+   token = tsearch2_yytext; 
+   tokenlen = tsearch2_yyleng;
+   return EMAIL; 
+}
+
+[+-]?[0-9]+(\.[0-9]+)?[eEdD][+-]?[0-9]+  /* float */   { 
+   token = tsearch2_yytext; 
+   tokenlen = tsearch2_yyleng;
+   return SCIENTIFIC; 
+}
+
+[0-9]+\.[0-9]+\.[0-9\.]*[0-9] {
+   token = tsearch2_yytext;
+   tokenlen = tsearch2_yyleng;
+   return VERSIONNUMBER;
+}
+
+[+-]?[0-9]+\.[0-9]+ {
+   token = tsearch2_yytext;
+   tokenlen = tsearch2_yyleng;
+   return DECIMAL;
+}
+
+[+-][0-9]+ { 
+   token = tsearch2_yytext; 
+   tokenlen = tsearch2_yyleng;
+   return SIGNEDINT; 
+}
+
+[0-9]+ { 
+   token = tsearch2_yytext; 
+   tokenlen = tsearch2_yyleng;
+   return UNSIGNEDINT; 
+}
+
+http"://"        { 
+   BEGIN URL; 
+   token = tsearch2_yytext;
+   tokenlen = tsearch2_yyleng;
+   return HTTP;
+}
+
+ftp"://"        { 
+   BEGIN URL; 
+   token = tsearch2_yytext;
+   tokenlen = tsearch2_yyleng;
+   return HTTP;
+}
+
+{HOSTNAME}[/:]{URI} { 
+   BEGIN SERVER;
+   if (s) { free(s); s=NULL; } 
+   s = strdup( tsearch2_yytext ); 
+   tokenlen = tsearch2_yyleng;
+   yyless( 0 ); 
+   token = s;
+   return FURL;
+}
+
+{HOSTNAME} {
+   token = tsearch2_yytext; 
+   tokenlen = tsearch2_yyleng;
+   return HOST;
+}
+
+[/:]{URI}  {
+   token = tsearch2_yytext;
+   tokenlen = tsearch2_yyleng;
+   return URI;
+}
+
+[[:alnum:]\./_-]+"/"[[:alnum:]\./_-]+ {
+   token = tsearch2_yytext;
+   tokenlen = tsearch2_yyleng;
+   return FILEPATH;
+}
+
+({CYRALPHA}+-)+{CYRALPHA}+ /* composite-word */    {
+   BEGIN DELIM;
+   if (s) { free(s); s=NULL; } 
+   s = strdup( tsearch2_yytext );
+   tokenlen = tsearch2_yyleng;
+   yyless( 0 );
+   token = s;
+   return CYRHYPHENWORD;
+}
+
+([[:alpha:]]+-)+[[:alpha:]]+ /* composite-word */  {
+    BEGIN DELIM;
+   if (s) { free(s); s=NULL; } 
+   s = strdup( tsearch2_yytext );
+   tokenlen = tsearch2_yyleng;
+   yyless( 0 );
+   token = s;
+   return LATHYPHENWORD;
+}
+
+({ALNUM}+-)+{ALNUM}+ /* composite-word */  {
+   BEGIN DELIM;
+   if (s) { free(s); s=NULL; } 
+   s = strdup( tsearch2_yytext );
+   tokenlen = tsearch2_yyleng;
+   yyless( 0 );
+   token = s;
+   return HYPHENWORD;
+}
+
+[0-9]+\.[0-9]+\.[0-9\.]*[0-9] {
+   token = tsearch2_yytext;
+   tokenlen = tsearch2_yyleng;
+   return VERSIONNUMBER;
+}
+
+\+?[0-9]+\.[0-9]+ {
+   token = tsearch2_yytext;
+   tokenlen = tsearch2_yyleng;
+   return DECIMAL;
+}
+
+{CYRALPHA}+  /* one word in composite-word */   { 
+   token = tsearch2_yytext; 
+   tokenlen = tsearch2_yyleng;
+   return CYRPARTHYPHENWORD; 
+}
+
+[[:alpha:]]+  /* one word in composite-word */  { 
+   token = tsearch2_yytext; 
+   tokenlen = tsearch2_yyleng;
+   return LATPARTHYPHENWORD; 
+}
+
+{ALNUM}+  /* one word in composite-word */  { 
+   token = tsearch2_yytext; 
+   tokenlen = tsearch2_yyleng;
+   return PARTHYPHENWORD; 
+}
+
+-  { 
+   token = tsearch2_yytext;
+   tokenlen = tsearch2_yyleng;
+   return SPACE;
+}
+
+.|\n /* return in basic state */ {
+   BEGIN INITIAL;
+   yyless( 0 );
+}
+
+{CYRALPHA}+ /* normal word */  { 
+   token = tsearch2_yytext; 
+   tokenlen = tsearch2_yyleng;
+   return CYRWORD; 
+}
+
+[[:alpha:]]+ /* normal word */ { 
+   token = tsearch2_yytext; 
+   tokenlen = tsearch2_yyleng;
+   return LATWORD; 
+}
+
+{ALNUM}+ /* normal word */ { 
+   token = tsearch2_yytext; 
+   tokenlen = tsearch2_yyleng;
+   return UWORD; 
+}
+
+[ \r\n\t]+ {
+   token = tsearch2_yytext;
+   tokenlen = tsearch2_yyleng;
+   return SPACE;
+}
+
+. {
+   token = tsearch2_yytext;
+   tokenlen = tsearch2_yyleng;
+   return SPACE;
+} 
+
+%%
+
+/* clearing after parsing from string */
+void end_parse() {
+   if (s) { free(s); s=NULL; } 
+   tsearch2_yy_delete_buffer( buf );
+   buf = NULL;
+} 
+
+/* start parse from string */
+void start_parse_str(char* str, int limit) {
+   if (buf) end_parse();
+   buf = tsearch2_yy_scan_bytes( str, limit );
+   tsearch2_yy_switch_to_buffer( buf );
+   BEGIN INITIAL;
+}
+
+/* start parse from filehandle */
+void start_parse_fh( FILE* fh, int limit ) {
+   if (buf) end_parse();
+   lrlimit = ( limit ) ? limit : -1;
+   buf = tsearch2_yy_create_buffer( fh, YY_BUF_SIZE );
+   tsearch2_yy_switch_to_buffer( buf );
+   BEGIN INITIAL;
+}
+
+


diff --git a/contrib/tsearch2/wparser.c b/contrib/tsearch2/wparser.c

new file mode 100644 (file)

index 0000000..deff94c


--- /dev/null
+++ b/contrib/tsearch2/wparser.c
@@ -0,0 +1,529 @@
+/* 
+ * interface functions to parser 
+ * Teodor Sigaev 
+ */
+#include 
+#include 
+#include 
+#include 
+
+#include "postgres.h"
+#include "fmgr.h"
+#include "utils/array.h"
+#include "catalog/pg_type.h"
+#include "executor/spi.h"
+#include "funcapi.h"
+
+#include "wparser.h"
+#include "ts_cfg.h"
+#include "snmap.h"
+#include "common.h"
+
+/*********top interface**********/
+
+static void *plan_getparser=NULL;
+static Oid current_parser_id=InvalidOid;
+
+void
+init_prs(Oid id, WParserInfo *prs) {
+   Oid arg[1]={ OIDOID };
+   bool isnull;
+   Datum pars[1]={ ObjectIdGetDatum(id) };
+   int stat;
+
+   memset(prs,0,sizeof(WParserInfo));
+   SPI_connect();
+   if ( !plan_getparser ) {
+       plan_getparser = SPI_saveplan( SPI_prepare( "select prs_start, prs_nexttoken, prs_end, prs_lextype, prs_headline from pg_ts_parser where oid = $1" , 1, arg ) );
+       if ( !plan_getparser ) 
+           ts_error(ERROR, "SPI_prepare() failed");
+   }
+
+   stat = SPI_execp(plan_getparser, pars, " ", 1);
+   if ( stat < 0 )
+       ts_error (ERROR, "SPI_execp return %d", stat);
+   if ( SPI_processed > 0 ) {
+       Oid oid=InvalidOid;
+       oid=DatumGetObjectId( SPI_getbinval(SPI_tuptable->vals[0], SPI_tuptable->tupdesc, 1, &isnull) );
+       fmgr_info_cxt(oid, &(prs->start_info), TopMemoryContext);
+       oid=DatumGetObjectId( SPI_getbinval(SPI_tuptable->vals[0], SPI_tuptable->tupdesc, 2, &isnull) );
+       fmgr_info_cxt(oid, &(prs->getlexeme_info), TopMemoryContext);
+       oid=DatumGetObjectId( SPI_getbinval(SPI_tuptable->vals[0], SPI_tuptable->tupdesc, 3, &isnull) );
+       fmgr_info_cxt(oid, &(prs->end_info), TopMemoryContext);
+       prs->lextype=DatumGetObjectId( SPI_getbinval(SPI_tuptable->vals[0], SPI_tuptable->tupdesc, 4, &isnull) );
+       oid=DatumGetObjectId( SPI_getbinval(SPI_tuptable->vals[0], SPI_tuptable->tupdesc, 5, &isnull) );
+       fmgr_info_cxt(oid, &(prs->headline_info), TopMemoryContext);
+       prs->prs_id=id;
+   } else 
+       ts_error(ERROR, "No parser with id %d", id);
+   SPI_finish();
+}
+
+typedef struct {
+   WParserInfo *last_prs;
+   int     len;
+   int     reallen;
+   WParserInfo *list;
+   SNMap       name2id_map;
+} PrsList;
+
+static PrsList PList = {NULL,0,0,NULL,{0,0,NULL}};
+
+void    
+reset_prs(void) {
+   freeSNMap( &(PList.name2id_map) );
+   if ( PList.list )
+       free(PList.list);
+   memset(&PList,0,sizeof(PrsList));
+}
+
+static int
+compareprs(const void *a, const void *b) {
+   return ((WParserInfo*)a)->prs_id - ((WParserInfo*)b)->prs_id;
+}
+
+WParserInfo *
+findprs(Oid id) {
+   /* last used prs */
+   if ( PList.last_prs && PList.last_prs->prs_id==id )
+       return PList.last_prs;
+
+   /* already used prs */
+   if ( PList.len != 0 ) {
+       WParserInfo key;
+       key.prs_id=id;
+       PList.last_prs = bsearch(&key, PList.list, PList.len, sizeof(WParserInfo), compareprs);
+       if ( PList.last_prs != NULL )
+           return PList.last_prs;
+   }
+
+   /* last chance */
+   if ( PList.len==PList.reallen ) {
+       WParserInfo *tmp;
+       int reallen = ( PList.reallen ) ? 2*PList.reallen : 16;
+       tmp=(WParserInfo*)realloc(PList.list,sizeof(WParserInfo)*reallen);
+       if ( !tmp ) 
+           ts_error(ERROR,"No memory");
+       PList.reallen=reallen;
+       PList.list=tmp;
+   }
+   PList.last_prs=&(PList.list[PList.len]);
+   init_prs(id, PList.last_prs);
+   PList.len++;
+   qsort(PList.list, PList.len, sizeof(WParserInfo), compareprs);
+   return findprs(id); /* qsort changed order!! */;
+}
+
+static void *plan_name2id=NULL;
+
+Oid
+name2id_prs(text *name) {
+   Oid arg[1]={ TEXTOID };
+   bool isnull;
+   Datum pars[1]={ PointerGetDatum(name) };
+   int stat;
+   Oid id=findSNMap_t( &(PList.name2id_map), name );
+   
+   if ( id ) 
+       return id;
+   
+
+   SPI_connect();
+   if ( !plan_name2id ) {
+       plan_name2id = SPI_saveplan( SPI_prepare( "select oid from pg_ts_parser where prs_name = $1" , 1, arg ) );
+       if ( !plan_name2id ) 
+           ts_error(ERROR, "SPI_prepare() failed");
+   }
+
+   stat = SPI_execp(plan_name2id, pars, " ", 1);
+   if ( stat < 0 )
+       ts_error (ERROR, "SPI_execp return %d", stat);
+   if ( SPI_processed > 0 )
+       id=DatumGetObjectId( SPI_getbinval(SPI_tuptable->vals[0], SPI_tuptable->tupdesc, 1, &isnull) );
+   else 
+       ts_error(ERROR, "No parser '%s'", text2char(name));
+   SPI_finish();
+   addSNMap_t( &(PList.name2id_map), name, id );
+   return id;
+}
+
+
+/******sql-level interface******/
+typedef struct {
+   int     cur;
+   LexDescr    *list;
+} TypeStorage;
+
+static void
+setup_firstcall(FuncCallContext  *funcctx, Oid prsid) {
+   TupleDesc            tupdesc;
+   MemoryContext     oldcontext;
+   TypeStorage     *st;
+   WParserInfo *prs = findprs(prsid); 
+
+   oldcontext = MemoryContextSwitchTo(funcctx->multi_call_memory_ctx);
+
+   st=(TypeStorage*)palloc( sizeof(TypeStorage) );
+   st->cur=0;
+   st->list = (LexDescr*)DatumGetPointer(
+       OidFunctionCall1( prs->lextype, PointerGetDatum(prs->prs) )
+   );
+   funcctx->user_fctx = (void*)st;
+   tupdesc = RelationNameGetTupleDesc("tokentype");
+   funcctx->slot = TupleDescGetSlot(tupdesc);
+   funcctx->attinmeta = TupleDescGetAttInMetadata(tupdesc);
+   MemoryContextSwitchTo(oldcontext);
+}
+
+static Datum
+process_call(FuncCallContext  *funcctx) {
+   TypeStorage     *st;
+
+   st=(TypeStorage*)funcctx->user_fctx;
+   if (  st->list && st->list[st->cur].lexid ) {
+       Datum result;
+       char* values[3];
+       char    txtid[16];
+       HeapTuple    tuple;
+
+       values[0]=txtid;
+       sprintf(txtid,"%d",st->list[st->cur].lexid);
+       values[1]=st->list[st->cur].alias;
+       values[2]=st->list[st->cur].descr;
+
+       tuple = BuildTupleFromCStrings(funcctx->attinmeta, values);
+       result = TupleGetDatum(funcctx->slot, tuple);
+
+       pfree(values[1]);
+       pfree(values[2]);
+       st->cur++;
+       return result;
+   } else {
+       if ( st->list ) pfree(st->list);
+       pfree(st);
+   }
+   return (Datum)0;
+}
+
+PG_FUNCTION_INFO_V1(token_type);
+Datum token_type(PG_FUNCTION_ARGS);
+
+Datum
+token_type(PG_FUNCTION_ARGS) {
+   FuncCallContext  *funcctx;
+   Datum result;
+
+   if (SRF_IS_FIRSTCALL()) { 
+       funcctx = SRF_FIRSTCALL_INIT();
+       setup_firstcall(funcctx, PG_GETARG_OID(0) );
+   }
+
+   funcctx = SRF_PERCALL_SETUP();
+
+   if (  (result=process_call(funcctx)) != (Datum)0 )
+       SRF_RETURN_NEXT(funcctx, result);
+   SRF_RETURN_DONE(funcctx);
+}
+
+PG_FUNCTION_INFO_V1(token_type_byname);
+Datum token_type_byname(PG_FUNCTION_ARGS);
+Datum
+token_type_byname(PG_FUNCTION_ARGS) {
+   FuncCallContext  *funcctx;
+   Datum result;
+
+   if (SRF_IS_FIRSTCALL()) {
+       text *name = PG_GETARG_TEXT_P(0); 
+       funcctx = SRF_FIRSTCALL_INIT();
+       setup_firstcall(funcctx, name2id_prs( name ) );
+       PG_FREE_IF_COPY(name,0);
+   }
+
+   funcctx = SRF_PERCALL_SETUP();
+
+   if (  (result=process_call(funcctx)) != (Datum)0 )
+       SRF_RETURN_NEXT(funcctx, result);
+   SRF_RETURN_DONE(funcctx);
+}
+
+PG_FUNCTION_INFO_V1(token_type_current);
+Datum token_type_current(PG_FUNCTION_ARGS);
+Datum
+token_type_current(PG_FUNCTION_ARGS) {
+   FuncCallContext  *funcctx;
+   Datum result;
+
+   if (SRF_IS_FIRSTCALL()) {
+       funcctx = SRF_FIRSTCALL_INIT();
+       if ( current_parser_id==InvalidOid ) 
+           current_parser_id = name2id_prs( char2text("default") );
+       setup_firstcall(funcctx, current_parser_id );
+   }
+
+   funcctx = SRF_PERCALL_SETUP();
+
+   if (  (result=process_call(funcctx)) != (Datum)0 )
+       SRF_RETURN_NEXT(funcctx, result);
+   SRF_RETURN_DONE(funcctx);
+}
+
+
+PG_FUNCTION_INFO_V1(set_curprs);
+Datum set_curprs(PG_FUNCTION_ARGS);
+Datum
+set_curprs(PG_FUNCTION_ARGS) {
+        findprs(PG_GETARG_OID(0));
+        current_parser_id=PG_GETARG_OID(0);
+        PG_RETURN_VOID();
+}
+
+PG_FUNCTION_INFO_V1(set_curprs_byname);
+Datum set_curprs_byname(PG_FUNCTION_ARGS);
+Datum
+set_curprs_byname(PG_FUNCTION_ARGS) {
+        text *name=PG_GETARG_TEXT_P(0);
+    
+        DirectFunctionCall1(
+                set_curprs,
+                ObjectIdGetDatum( name2id_prs(name) )
+        );
+        PG_FREE_IF_COPY(name, 0);
+        PG_RETURN_VOID();
+}
+
+typedef struct {
+   int type;
+   char    *lexem;
+} LexemEntry;
+
+typedef struct {
+   int cur;
+   int len;
+   LexemEntry  *list;
+} PrsStorage;
+   
+
+static void
+prs_setup_firstcall(FuncCallContext  *funcctx, int prsid, text *txt) {
+   TupleDesc            tupdesc;
+   MemoryContext     oldcontext;
+   PrsStorage  *st;
+   WParserInfo *prs = findprs(prsid); 
+   char    *lex=NULL;
+   int     llen=0, type=0; 
+
+   oldcontext = MemoryContextSwitchTo(funcctx->multi_call_memory_ctx);
+
+   st=(PrsStorage*)palloc( sizeof(PrsStorage) );
+   st->cur=0;
+   st->len=16;
+   st->list=(LexemEntry*)palloc( sizeof(LexemEntry)*st->len );
+
+   prs->prs = (void*)DatumGetPointer(
+       FunctionCall2(
+           &(prs->start_info),
+           PointerGetDatum(VARDATA(txt)),
+           Int32GetDatum(VARSIZE(txt)-VARHDRSZ)
+       )
+   );
+
+   while( ( type=DatumGetInt32(FunctionCall3(
+           &(prs->getlexeme_info),
+           PointerGetDatum(prs->prs),
+           PointerGetDatum(&lex),
+           PointerGetDatum(&llen))) ) != 0 ) {
+
+       if ( st->cur>=st->len ) {
+           st->len=2*st->len;
+           st->list=(LexemEntry*)repalloc(st->list, sizeof(LexemEntry)*st->len);
+       }
+       st->list[st->cur].lexem = palloc(llen+1);
+       memcpy( st->list[st->cur].lexem, lex, llen);
+       st->list[st->cur].lexem[llen]='\0';
+       st->list[st->cur].type=type;
+       st->cur++;
+   }
+       
+   FunctionCall1(
+       &(prs->end_info),
+       PointerGetDatum(prs->prs)
+   );
+
+   st->len=st->cur;
+   st->cur=0;
+   
+   funcctx->user_fctx = (void*)st;
+   tupdesc = RelationNameGetTupleDesc("tokenout");
+   funcctx->slot = TupleDescGetSlot(tupdesc);
+   funcctx->attinmeta = TupleDescGetAttInMetadata(tupdesc);
+   MemoryContextSwitchTo(oldcontext);
+}
+
+static Datum
+prs_process_call(FuncCallContext  *funcctx) {
+   PrsStorage  *st;
+
+   st=(PrsStorage*)funcctx->user_fctx;
+   if (  st->cur < st->len ) {
+       Datum result;
+       char* values[2];
+       char    tid[16];
+       HeapTuple    tuple;
+
+       values[0]=tid;
+       sprintf(tid,"%d",st->list[st->cur].type);
+       values[1]=st->list[st->cur].lexem;
+       tuple = BuildTupleFromCStrings(funcctx->attinmeta, values);
+       result = TupleGetDatum(funcctx->slot, tuple);
+
+       pfree(values[1]);
+       st->cur++;
+       return result;
+   } else {
+       if ( st->list ) pfree(st->list);
+       pfree(st);
+   }
+   return (Datum)0;
+}
+
+           
+
+PG_FUNCTION_INFO_V1(parse);
+Datum parse(PG_FUNCTION_ARGS);
+Datum
+parse(PG_FUNCTION_ARGS) {
+   FuncCallContext  *funcctx;
+   Datum result;
+
+   if (SRF_IS_FIRSTCALL()) {
+       text *txt = PG_GETARG_TEXT_P(1); 
+       funcctx = SRF_FIRSTCALL_INIT();
+       prs_setup_firstcall(funcctx, PG_GETARG_OID(0),txt );
+       PG_FREE_IF_COPY(txt,1);
+   }
+
+   funcctx = SRF_PERCALL_SETUP();
+
+   if (  (result=prs_process_call(funcctx)) != (Datum)0 )
+       SRF_RETURN_NEXT(funcctx, result);
+   SRF_RETURN_DONE(funcctx);
+}
+
+PG_FUNCTION_INFO_V1(parse_byname);
+Datum parse_byname(PG_FUNCTION_ARGS);
+Datum
+parse_byname(PG_FUNCTION_ARGS) {
+   FuncCallContext  *funcctx;
+   Datum result;
+
+   if (SRF_IS_FIRSTCALL()) {
+       text *name = PG_GETARG_TEXT_P(0); 
+       text *txt = PG_GETARG_TEXT_P(1); 
+       funcctx = SRF_FIRSTCALL_INIT();
+       prs_setup_firstcall(funcctx, name2id_prs( name ),txt );
+       PG_FREE_IF_COPY(name,0);
+       PG_FREE_IF_COPY(txt,1);
+   }
+
+   funcctx = SRF_PERCALL_SETUP();
+
+   if (  (result=prs_process_call(funcctx)) != (Datum)0 )
+       SRF_RETURN_NEXT(funcctx, result);
+   SRF_RETURN_DONE(funcctx);
+}
+
+
+PG_FUNCTION_INFO_V1(parse_current);
+Datum parse_current(PG_FUNCTION_ARGS);
+Datum
+parse_current(PG_FUNCTION_ARGS) {
+   FuncCallContext  *funcctx;
+   Datum result;
+
+   if (SRF_IS_FIRSTCALL()) {
+       text *txt = PG_GETARG_TEXT_P(0); 
+       funcctx = SRF_FIRSTCALL_INIT();
+       if ( current_parser_id==InvalidOid ) 
+           current_parser_id = name2id_prs( char2text("default") );
+       prs_setup_firstcall(funcctx, current_parser_id,txt );
+       PG_FREE_IF_COPY(txt,0);
+   }
+
+   funcctx = SRF_PERCALL_SETUP();
+
+   if (  (result=prs_process_call(funcctx)) != (Datum)0 )
+       SRF_RETURN_NEXT(funcctx, result);
+   SRF_RETURN_DONE(funcctx);
+}
+
+PG_FUNCTION_INFO_V1(headline);
+Datum headline(PG_FUNCTION_ARGS);
+Datum
+headline(PG_FUNCTION_ARGS) {
+   TSCfgInfo *cfg=findcfg(PG_GETARG_OID(0));
+   text       *in = PG_GETARG_TEXT_P(1);
+   QUERYTYPE  *query = (QUERYTYPE *) DatumGetPointer(PG_DETOAST_DATUM(PG_GETARG_DATUM(2)));
+   text       *opt=( PG_NARGS()>3 && PG_GETARG_POINTER(3) ) ? PG_GETARG_TEXT_P(3) : NULL;
+   HLPRSTEXT   prs;
+   text *out;
+   WParserInfo *prsobj = findprs(cfg->prs_id);
+
+   memset(&prs,0,sizeof(HLPRSTEXT));
+   prs.lenwords = 32;
+   prs.words = (HLWORD *) palloc(sizeof(HLWORD) * prs.lenwords);
+   hlparsetext(cfg, &prs, query, VARDATA(in), VARSIZE(in) - VARHDRSZ);
+
+
+   FunctionCall3(
+       &(prsobj->headline_info),
+       PointerGetDatum(&prs),
+       PointerGetDatum(opt),
+       PointerGetDatum(query)
+   );
+
+   out = genhl(&prs);
+
+   PG_FREE_IF_COPY(in,1);
+   PG_FREE_IF_COPY(query,2);
+   if ( opt ) PG_FREE_IF_COPY(opt,3);
+   pfree(prs.words);
+   pfree(prs.startsel);
+   pfree(prs.stopsel);
+
+   PG_RETURN_POINTER(out);
+}
+
+
+PG_FUNCTION_INFO_V1(headline_byname);
+Datum headline_byname(PG_FUNCTION_ARGS);
+Datum
+headline_byname(PG_FUNCTION_ARGS) {
+   text *cfg=PG_GETARG_TEXT_P(0);
+
+   Datum out=DirectFunctionCall4(
+       headline,
+       ObjectIdGetDatum(name2id_cfg( cfg ) ),
+       PG_GETARG_DATUM(1),
+       PG_GETARG_DATUM(2),
+       ( PG_NARGS()>3 ) ? PG_GETARG_DATUM(3) : PointerGetDatum(NULL)
+   );
+
+   PG_FREE_IF_COPY(cfg,0);
+   PG_RETURN_DATUM(out);   
+}
+
+PG_FUNCTION_INFO_V1(headline_current);
+Datum headline_current(PG_FUNCTION_ARGS);
+Datum
+headline_current(PG_FUNCTION_ARGS) {
+   PG_RETURN_DATUM(DirectFunctionCall4(
+       headline,
+       ObjectIdGetDatum(get_currcfg()),
+       PG_GETARG_DATUM(0),
+       PG_GETARG_DATUM(1),
+       ( PG_NARGS()>2 ) ? PG_GETARG_DATUM(2) : PointerGetDatum(NULL)
+   ));
+}
+
+
+


diff --git a/contrib/tsearch2/wparser.h b/contrib/tsearch2/wparser.h

new file mode 100644 (file)

index 0000000..a8afc56


--- /dev/null
+++ b/contrib/tsearch2/wparser.h
@@ -0,0 +1,28 @@
+#ifndef __WPARSER_H__
+#define __WPARSER_H__
+#include "postgres.h"
+#include "fmgr.h"
+
+typedef struct {
+   Oid prs_id;
+   FmgrInfo start_info;
+   FmgrInfo getlexeme_info;
+   FmgrInfo end_info;
+   FmgrInfo headline_info;
+   Oid lextype;
+   void *prs;
+} WParserInfo;
+
+void init_prs(Oid id, WParserInfo *prs);
+WParserInfo* findprs(Oid id);
+Oid name2id_prs(text *name);
+void   reset_prs(void);
+
+
+typedef struct {
+   int lexid;
+   char    *alias;
+   char    *descr;
+} LexDescr;
+
+#endif


diff --git a/contrib/tsearch2/wparser_def.c b/contrib/tsearch2/wparser_def.c

new file mode 100644 (file)

index 0000000..eec8b03


--- /dev/null
+++ b/contrib/tsearch2/wparser_def.c
@@ -0,0 +1,291 @@
+/* 
+ * default word parser 
+ * Teodor Sigaev 
+ */
+#include 
+#include 
+#include 
+
+#include "postgres.h"
+#include "utils/builtins.h"
+
+#include "dict.h"
+#include "wparser.h"
+#include "common.h"
+#include "ts_cfg.h"
+#include "wordparser/parser.h"
+#include "wordparser/deflex.h"
+
+PG_FUNCTION_INFO_V1(prsd_lextype);
+Datum prsd_lextype(PG_FUNCTION_ARGS);
+
+Datum 
+prsd_lextype(PG_FUNCTION_ARGS) {
+   LexDescr *descr=(LexDescr*)palloc(sizeof(LexDescr)*(LASTNUM+1));
+   int i;
+
+   for(i=1;i<=LASTNUM;i++) {
+       descr[i-1].lexid = i;
+       descr[i-1].alias = pstrdup(tok_alias[i]);
+       descr[i-1].descr = pstrdup(lex_descr[i]);
+   }
+   
+   descr[LASTNUM].lexid=0;
+       
+   PG_RETURN_POINTER(descr);
+}
+
+PG_FUNCTION_INFO_V1(prsd_start);
+Datum prsd_start(PG_FUNCTION_ARGS);
+Datum 
+prsd_start(PG_FUNCTION_ARGS) {
+   start_parse_str( (char*)PG_GETARG_POINTER(0), PG_GETARG_INT32(1) );
+   PG_RETURN_POINTER(NULL);
+}
+
+PG_FUNCTION_INFO_V1(prsd_getlexeme);
+Datum prsd_getlexeme(PG_FUNCTION_ARGS);
+Datum 
+prsd_getlexeme(PG_FUNCTION_ARGS) {
+   /* ParserState *p=(ParserState*)PG_GETARG_POINTER(0); */
+   char **t=(char**)PG_GETARG_POINTER(1); 
+   int *tlen=(int*)PG_GETARG_POINTER(2);
+   int  type=tsearch2_yylex();
+
+   *t = token;
+   *tlen = tokenlen;
+   PG_RETURN_INT32(type);
+}
+
+PG_FUNCTION_INFO_V1(prsd_end);
+Datum prsd_end(PG_FUNCTION_ARGS);
+Datum 
+prsd_end(PG_FUNCTION_ARGS) {
+   /* ParserState *p=(ParserState*)PG_GETARG_POINTER(0); */
+   end_parse();
+   PG_RETURN_VOID();
+}
+
+#define LEAVETOKEN(x)  ( (x)==12 )
+#define COMPLEXTOKEN(x)    ( (x)==5 || (x)==15 || (x)==16 || (x)==17 )
+#define ENDPUNCTOKEN(x)    ( (x)==12 )
+
+
+#define IDIGNORE(x) ( (x)==13 || (x)==14 || (x)==12 || (x)==23 )
+#define HLIDIGNORE(x) ( (x)==5 || (x)==13 || (x)==15 || (x)==16 || (x)==17 )
+#define NONWORDTOKEN(x)    ( (x)==12 || HLIDIGNORE(x) )
+#define NOENDTOKEN(x)  ( NONWORDTOKEN(x) || (x)==7 || (x)==8 || (x)==20 || (x)==21 || (x)==22 || IDIGNORE(x) )
+
+typedef struct {
+   HLWORD  *words;
+   int len;
+} hlCheck;
+
+static bool
+checkcondition_HL(void *checkval, ITEM *val) {
+   int i;
+   for(i=0;i<((hlCheck*)checkval)->len;i++) {
+       if ( ((hlCheck*)checkval)->words[i].item==val )
+           return true;
+   }
+   return false;
+}
+
+
+static bool
+hlCover(HLPRSTEXT *prs, QUERYTYPE *query, int *p, int *q) {
+   int i,j;
+   ITEM    *item=GETQUERY(query);
+   int pos=*p;
+   *q=0;
+   *p=0x7fffffff;
+
+   for(j=0;jsize;j++) {
+       if ( item->type != VAL ) {
+           item++;
+           continue;
+       }
+       for(i=pos;icurwords;i++) {
+           if ( prs->words[i].item == item ) {
+               if ( i>*q) 
+                   *q = i;
+               break;
+           }
+       }
+       item++;
+   }
+
+   if ( *q==0 )
+       return false;
+
+   item=GETQUERY(query);
+   for(j=0;jsize;j++) {
+       if ( item->type != VAL ) {
+           item++;
+           continue;
+       }
+       for(i=*q;i>=pos;i--) {
+           if ( prs->words[i].item == item ) {
+               if ( i<*p )
+                   *p=i;
+               break;
+           }
+       }
+       item++;
+   }   
+
+   if ( *p<=*q ) {
+       hlCheck ch={ &(prs->words[*p]), *q-*p+1 };
+       if ( TS_execute(GETQUERY(query), &ch, false, checkcondition_HL) ) { 
+           return true;
+       } else {
+           (*p)++;
+           return hlCover(prs,query,p,q);
+       }
+   }
+
+   return false;
+}
+
+PG_FUNCTION_INFO_V1(prsd_headline);
+Datum prsd_headline(PG_FUNCTION_ARGS);
+Datum 
+prsd_headline(PG_FUNCTION_ARGS) {
+   HLPRSTEXT   *prs=(HLPRSTEXT*)PG_GETARG_POINTER(0);
+   text    *opt=(text*)PG_GETARG_POINTER(1); /* can't be toasted */
+   QUERYTYPE   *query=(QUERYTYPE*)PG_GETARG_POINTER(2); /* can't be toasted */
+   /* from opt + start and and tag */
+   int min_words=15;   
+   int max_words=35;   
+   int shortword=3;    
+
+   int p=0,q=0;
+   int bestb=-1,beste=-1;
+   int bestlen=-1;
+   int pose=0, poslen, curlen;
+
+   int i;
+
+   /*config*/
+   prs->startsel=NULL;
+   prs->stopsel=NULL;
+   if ( opt ) {
+       Map *map,*mptr;
+       
+       parse_cfgdict(opt,&map);
+       mptr=map;
+
+       while(mptr && mptr->key) {
+           if ( strcasecmp(mptr->key,"MaxWords")==0 )
+               max_words=pg_atoi(mptr->value,4,1);
+           else if ( strcasecmp(mptr->key,"MinWords")==0 )
+               min_words=pg_atoi(mptr->value,4,1);
+           else if ( strcasecmp(mptr->key,"ShortWord")==0 )
+               shortword=pg_atoi(mptr->value,4,1);
+           else if ( strcasecmp(mptr->key,"StartSel")==0 )
+               prs->startsel=pstrdup(mptr->value);
+           else if ( strcasecmp(mptr->key,"StopSel")==0 )
+               prs->stopsel=pstrdup(mptr->value);
+               
+           pfree(mptr->key);
+           pfree(mptr->value);
+
+           mptr++;
+       }
+       pfree(map);
+
+       if ( min_words >= max_words )
+           elog(ERROR,"Must be MinWords < MaxWords");
+       if ( min_words<=0 )
+           elog(ERROR,"Must be MinWords > 0");
+       if ( shortword<0 )
+           elog(ERROR,"Must be ShortWord >= 0");
+   }
+
+   while( hlCover(prs,query,&p,&q) ) {
+       /* find cover len in words */
+       curlen=0;
+       poslen=0;
+       for(i=p;i<=q && curlen < max_words ; i++) {
+           if ( !NONWORDTOKEN(prs->words[i].type) ) 
+               curlen++;
+           if ( prs->words[i].item && !prs->words[i].repeated )
+               poslen++; 
+           pose=i;
+       }
+
+       if ( poslenwords[beste].type) || prs->words[beste].len <= shortword) ) { 
+           /* best already finded, so try one more cover */
+           p++;
+           continue;
+       }
+
+       if ( curlen < max_words ) { /* find good end */
+           for(i=i-1 ;icurwords && curlen
+               if ( i!=q ) {
+                   if ( !NONWORDTOKEN(prs->words[i].type) ) 
+                       curlen++;
+                   if ( prs->words[i].item && !prs->words[i].repeated )
+                       poslen++;
+               }
+               pose=i;
+               if ( NOENDTOKEN(prs->words[i].type) || prs->words[i].len <= shortword ) 
+                   continue;
+               if ( curlen>=min_words )    
+                   break;
+           }
+       } else { /* shorter cover :((( */
+           for(;curlen>min_words;i--) {
+               if ( !NONWORDTOKEN(prs->words[i].type) ) 
+                   curlen--;
+               if ( prs->words[i].item && !prs->words[i].repeated )
+                   poslen--;
+               pose=i;
+               if ( NOENDTOKEN(prs->words[i].type) || prs->words[i].len <= shortword ) 
+                   continue;
+               break;
+           }
+       }
+
+       if ( bestlen <0 || (poslen>bestlen && !(NOENDTOKEN(prs->words[pose].type) || prs->words[pose].len <= shortword)) || 
+               ( bestlen>=0 && !(NOENDTOKEN(prs->words[pose].type)  || prs->words[pose].len <= shortword) && 
+                   (NOENDTOKEN(prs->words[beste].type) || prs->words[beste].len <= shortword) ) ) {
+           bestb=p; beste=pose;
+           bestlen=poslen;
+       } 
+
+       p++;
+   }
+
+   if ( bestlen<0 ) {
+       curlen=0;
+       poslen=0;
+       for(i=0;icurwords && curlen
+           if ( !NONWORDTOKEN(prs->words[i].type) ) 
+               curlen++;
+           pose=i;
+       }
+       bestb=0; beste=pose;
+   }
+
+   for(i=bestb;i<=beste;i++) {
+       if ( prs->words[i].item )
+           prs->words[i].selected=1;
+       if ( prs->words[i].repeated )
+           prs->words[i].skip=1;
+       if ( HLIDIGNORE(prs->words[i].type) )
+           prs->words[i].replace=1;
+
+       prs->words[i].in=1;
+   }
+
+   if (!prs->startsel)
+       prs->startsel=pstrdup("");

+   if (!prs->stopsel)
+       prs->stopsel=pstrdup("");
+        prs->startsellen=strlen(prs->startsel);
+   prs->stopsellen=strlen(prs->stopsel);
+
+   PG_RETURN_POINTER(prs);
+}
+




This is the main PostgreSQL git repository.
RSS
Atom}}}}}
+>upward₁♦_{12
+>for₁♦_{12
+>well₁♦_{12
+>over₁♦_{12
+>100₂₂♦_{12
+>feet₁.₁₂
+
+Each word has been assigned type 1;
+each space (represented here by a diamond) and the period, type 12;
+and the number one hundred, type 22.
+We can retrieve the alias for each type
+through the token_type function:
+
+
+=# select * from token_type('default')

+     where tokid = 1 or tokid = 12 or tokid = 22
+ tokid | alias |      descr       
+-------+-------+------------------
+     1 | lword | Latin word
+    12 | blank | Space symbols
+    22 | uint  | Unsigned integer
+(3 rows)
+
+
+
+
+Next, the tokens are assigned to dictionaries
+by looking up their type aliases in pg_ts_cfgmap
+to determine which dictionary should process each token.
+Since we are using the 'default' configuration:
+
+
+=# select * from pg_ts_cfgmap where ts_name = 'default' and

+      (tok_alias = 'lword' or tok_alias = 'blank' or tok_alias = 'uint')
+ ts_name | tok_alias | dict_name 
+---------+-----------+-----------
+ default | lword     | {en_stem}
+ default | uint      | {simple}
+(2 rows)
+
+
+Since this map provides no dictionary for blank tokens,
+the spaces and period are simply discarded,
+leaving nine tokens,
+which are then numbered by their position:
+
+The¹
+walls²
+extend³
+upward⁴
+for⁵
+well⁶
+over⁷
+100⁸
+feet⁹
+
+
+Finally, the words are reduced to lexemes by their respective dictionaries.
+The 100 is submitted to the simple dictionary,
+which returns tokens unaltered except for making them lowercase:
+
+
+=# select lexize('simple', '100')
+ lexize 
+--------
+ {100}
+(1 row)
+
+
+The other words are submitted to en_stem
+which reduces each English word to a linguistic stem,
+and then discards stems which belong to its list of stop words;
+you can see the list of stop words
+in the file whose path is in the dict_initoption field
+of the pg_ts_dict table entry for en_stem.
+The first three words of our text illustrate respectively
+an en_stem stop word,
+a word which en_stem alters by stemming,
+and a word which en_stem leaves alone:
+
+
+=# select lexize('en_stem', 'The')
+ lexize 
+--------
+ {}
+(1 row)
+=# select lexize('en_stem', 'walls')
+ lexize 
+--------
+ {wall}
+(1 row)
+=# select lexize('en_stem', 'extend')
+  lexize  
+----------
+ {extend}
+(1 row)
+
+
+Once en_stem is done discarding stop words and stemming the rest,
+we are left with:
+
+wall²
+extend³
+upward⁴
+well⁶
+100⁸
+feet⁹
+
+Which is precisely the result of the example that began this section.
+
+Query words are stemmed by the to_tsquery() function
+using the same scheme to determine the dictionary for each token,
+with the difference that the query parser recognizes as special
+the boolean operators that separate query words.
+
+
+
+
+}

diff --git a/contrib/tsearch2/docs/tsearch2-ref.html b/contrib/tsearch2/docs/tsearch2-ref.html

new file mode 100644 (file)

index 0000000..df0faa4


--- /dev/null
+++ b/contrib/tsearch2/docs/tsearch2-ref.html
@@ -0,0 +1,448 @@
+
+
+
+
+tsearch2 reference
+
+
+The tsearch2 Reference
+
+
+Brandon Craig Rhodes
30 June 2003
+
+This Reference documents the user types and functions
+of the tsearch2 module for PostgreSQL.
+An introduction to the module is provided
+by the tsearch2 Guide,
+a companion document to this one.
+You can retrieve a beta copy of the tsearch2 module from the
+GiST for PostgreSQL
+page — look under the section entitled Development History
+for the current version.
+
+Vectors and Queries
+
+Vectors and queries both store lexemes,
+but for different purposes.
+A tsvector stores the lexemes
+of the words that are parsed out of a document,
+and can also remember the position of each word.
+A tsquery specifies a boolean condition among lexemes.
+
+Any of the following functions with a configuration argument
+can use either an integer id or textual ts_name
+to select a configuration;
+if the option is omitted, then the current configuration is used.
+For more information on the current configuration,
+read the next section on Configurations.
+
+Vector Operations
+
+
+
+ to_tsvector( [configuration,]

+ document TEXT) RETURNS tsvector
+
+ Parses a document into tokens,
+ reduces the tokens to lexemes,
+ and returns a tsvector which lists the lexemes
+ together with their positions in the document.
+ For the best description of this process,
+ see the section on Parsing and Stemming
+ in the accompanying tsearch2 Guide.
+
+ strip(vector tsvector) RETURNS tsvector
+
+ Return a vector which lists the same lexemes
+ as the given vector,
+ but which lacks any information
+ about where in the document each lexeme appeared.
+ While the returned vector is thus useless for relevance ranking,
+ it will usually be much smaller.
+
+ setweight(vector tsvector, letter) RETURNS tsvector
+
+ This function returns a copy of the input vector
+ in which every location has been labelled
+ with either the letter
+ 'A', 'B', or 'C',
+ or the default label 'D'
+ (which is the default with which new vectors are created,
+ and as such is usually not displayed).
+ These labels are retained when vectors are concatenated,
+ allowing words from different parts of a document
+ to be weighted differently by ranking functions.
+
+ vector1 || vector2
+
+ concat(vector1 tsvector, vector2 tsvector)

+ RETURNS tsvector
+
+ Returns a vector which combines the lexemes and position information
+ in the two vectors given as arguments.
+ Position weight labels (described in the previous paragraph)
+ are retained intact during the concatenation.
+ This has at least two uses.
+ First,
+ if some sections of your document
+ need be parsed with different configurations than others,
+ you can parse them separately
+ and concatenate the resulting vectors into one.
+ Second,
+ you can weight words from some sections of you document
+ more heavily than those from others by:
+ parsing the sections into separate vectors;
+ assigning the vectors different position labels
+ with the setweight() function;
+ concatenating them into a single vector;
+ and then providing a weights argument
+ to the rank() function
+ that assigns different weights to positions with different labels.
+
+ tsvector_size(vector tsvector) RETURNS INT4
+
+ Returns the number of lexemes stored in the vector.
+
+ text::tsvector RETURNS tsvector
+
+ Directly casting text to a tsvector
+ allows you to directly inject lexemes into a vector,
+ with whatever positions and position weights you choose to specify.
+ The text should be formatted
+ like the vector would be printed by the output of a SELECT.
+ See the Casting
+ section in the Guide for details.
+
+
+Query Operations
+
+
+
+ to_tsquery( [configuration,]

+ querytext text) RETURNS tsvector
+
+ Parses a query,
+ which should be single words separated by the boolean operators
+ “&” and,
+ “|” or,
+ and “!” not,
+ which can be grouped using parenthesis.
+ Each word is reduced to a lexeme using the current
+ or specified configuration.
+
+
+ querytree(query tsquery) RETURNS text
+
+ This might return a textual representation of the given query.
+
+ text::tsquery RETURNS tsquery
+
+ Directly casting text to a tsquery
+ allows you to directly inject lexemes into a query,
+ with whatever positions and position weight flags you choose to specify.
+ The text should be formatted
+ like the query would be printed by the output of a SELECT.
+ See the Casting
+ section in the Guide for details.
+
+
+Configurations
+
+A configuration specifies all of the equipment necessary
+to transform a document into a tsvector:
+the parser that breaks its text into tokens,
+and the dictionaries which then transform each token into a lexeme.
+Every call to to_tsvector() (described above)
+uses a configuration to perform its processing.
+Three configurations come with tsearch2:
+
+
+default — Indexes words and numbers,
+ using the en_stem English Snowball stemmer for Latin-alphabet words
+ and the simple dictionary for all others.
+default_russian — Indexes words and numbers,
+ using the en_stem English Snowball stemmer for Latin-alphabet words
+ and the ru_stem Russian Snowball dictionary for all others.
+simple — Processes both words and numbers
+ with the simple dictionary,
+ which neither discards any stop words nor alters them.
+
+
+The tsearch2 modules initially chooses your current configuration
+by looking for your current locale in the locale field
+of the pg_ts_cfg table described below.
+You can manipulate the current configuration yourself with these functions:
+
+
+
+ set_curcfg( id INT | ts_name TEXT

+  ) RETURNS VOID
+
+ Set the current configuration used by to_tsvector
+ and to_tsquery.
+
+ show_curcfg() RETURNS INT4
+
+ Returns the integer id of the current configuration.
+
+
+
+Each configuration is defined by a record in the pg_ts_cfg table:
+
+create table pg_ts_cfg (
+   id      int not  null primary key,
+   ts_name     text not null,
+   prs_name    text not null,
+   locale      text
+);
+
+The id and ts_name are unique values
+which identify the configuration;
+the prs_name specifies which parser the configuration uses.
+Once this parser has split document text into tokens,
+the type of each resulting token —
+or, more specifically, the type's lex_alias
+as specified in the parser's lexem_type() table —
+is searched for together with the configuration's ts_name
+in the pg_ts_cfgmap table:
+
+create table pg_ts_cfgmap (
+   ts_name     text not null,
+   lex_alias   text not null,
+   dict_name   text[],
+   primary key (ts_name,lex_alias)
+);
+
+Those tokens whose types are not listed are discarded.
+The remaining tokens are assigned integer positions,
+starting with 1 for the first token in the document,
+and turned into lexemes with the help of the dictionaries
+whose names are given in the dict_name array for their type.
+These dictionaries are tried in order,
+stopping either with the first one to return a lexeme for the token,
+or discarding the token if no dictionary returns a lexeme for it.
+
+Parsers
+
+Each parser is defined by a record in the pg_ts_parser table:
+
+create table pg_ts_parser (
+   prs_id      int not null primary key,
+   prs_name    text not null,
+   prs_start   oid not null,
+   prs_getlexem    oid not null,
+   prs_end     oid not null,
+   prs_headline    oid not null,
+   prs_lextype oid not null,
+   prs_comment text
+);
+
+The prs_id and prs_name uniquely identify the parser,
+while prs_comment usually describes its name and version
+for the reference of users.
+The other items identify the low-level functions
+which make the parser operate,
+and are only of interest to someone writing a parser of their own.
+
+The tsearch2 module comes with one parser named default
+which is suitable for parsing most plain text and HTML documents.
+
+Each parser argument below
+must designate a parser with either an integer prs_id
+or a textual prs_name;
+the current parser is used when this argument is omitted.
+
+
+
+ CREATE FUNCTION set_curprs(parser) RETURNS VOID
+
+ Selects a current parser
+ which will be used when any of the following functions
+ are called without a parser as an argument.
+
+ CREATE FUNCTION lexem_type(

+  [ parser ]
+  ) RETURNS SETOF lexemtype
+
+ Returns a table which defines and describes
+ each kind of token the parser may produce as output.
+ For each token type the table gives the lexid
+ which the parser will label each token of that type,
+ the alias which names the token type,
+ and a short description descr for the user to read.
+
+ CREATE FUNCTION parse(

+  [ parser, ] document TEXT
+  ) RETURNS SETOF lexemtype
+
+ Parses the given document and returns a series of records,
+ one for each token produced by parsing.
+ Each token includes a lexid giving its type
+ and a lexem which gives its content.
+
+
+Dictionaries
+
+Dictionaries take textual tokens as input,
+usually those produced by a parser,
+and return lexemes which are usually some reduced form of the token.
+Among the dictionaries which come installed with tsearch2 are:
+
+
+simple simply folds uppercase letters to lowercase
+ before returning the word.
+en_stem runs an English Snowball stemmer on each word
+ that attempts to reduce the various forms of a verb or noun
+ to a single recognizable form.
+ru_stem runs a Russian Snowball stemmer on each word.
+
+
+Each dictionary is defined by an entry in the pg_ts_dict table:
+
+CREATE TABLE pg_ts_dict (
+   dict_id     int not null primary key,
+   dict_name   text not null,
+   dict_init   oid,
+   dict_initoption text,
+   dict_lemmatize  oid not null,
+   dict_comment    text
+);
+
+The dict_id and dict_name
+serve as unique identifiers for the dictionary.
+The meaning of the dict_initoption varies among dictionaries,
+but for the built-in Snowball dictionaries
+it specifies a file from which stop words should be read.
+The dict_comment is a human-readable description of the dictionary.
+The other fields are internal function identifiers
+useful only to developers trying to implement their own dictionaries.
+
+The argument named dictionary
+in each of the following functions
+should be either an integer dict_id or a textual dict_name
+identifying which dictionary should be used for the operation;
+if omitted then the current dictionary is used.
+
+
+
+ CREATE FUNCTION set_curdict(dictionary) RETURNS VOID
+
+ Selects a current dictionary for use by functions
+ that do not select a dictionary explicitly.
+
+ CREATE FUNCTION lexize(

+ [ dictionary, ] word text)
+ RETURNS TEXT[]
+
+ Reduces a single word to a lexeme.
+ Note that lexemes are arrays of zero or more strings,
+ since in some languages there might be several base words
+ from which an inflected form could arise.
+
+
+Ranking
+
+Ranking attempts to measure how relevant documents are to particular queries
+by inspecting the number of times each search word appears in the document,
+and whether different search terms occur near each other.
+Note that this information is only available in unstripped vectors —
+ranking functions will only return a useful result
+for a tsvector which still has position information!
+
+Both of these ranking functions
+take an integer normalization option
+that specifies whether a document's length should impact its rank.
+This is often desirable,
+since a hundred-word document with five instances of a search word
+is probably more relevant than a thousand-word document with five instances.
+The option can have the values:
+
+
+0 (the default) ignores document length.
+1 divides the rank by the logarithm of the length.
+2 divides the rank by the length itself.
+
+
+The two ranking functions currently available are:
+
+
+
+ CREATE FUNCTION rank(

+  [ weights float4[], ]
+  vector tsvector, query tsquery,
+  [ normalization int4 ]

+  ) RETURNS float4
+
+ This is the ranking function from the old version of OpenFTS,
+ and offers the ability to weight word instances more heavily
+ depending on how you have classified them.
+ The weights specify how heavily to weight each category of word:
+ 
+>{D-weight, A-weight, B-weight, C-weight}
+ If no weights are provided, then these defaults are used:
+ {0.1, 0.2, 0.4, 1.0}
+ Often weights are used to mark words from special areas of the document,
+ like the title or an initial abstract,
+ and make them more or less important than words in the document body.
+
+ CREATE FUNCTION rank_cd(

+  [ K int4, ]
+  vector tsvector, query tsquery,
+  [ normalization int4 ]

+  ) RETURNS float4
+
+ This function computes the cover density ranking
+ for the given document vector and query,
+ as described in Clarke, Cormack, and Tudhope's
+ “
+>Relevance Ranking for One to Three Term Queries”
+ in the 1999 Information Processing and Management.
+ The value K is one of the values from their formula,
+ and defaults to K=4.
+ The examples in their paper K=16;
+ we can roughly describe the term
+ as stating how far apart two search terms can fall
+ before the formula begins penalizing them for lack of proximity.
+
+
+Headlines
+
+
+
+ CREATE FUNCTION headline(

+  [ id int4, | ts_name text, ]
+  document text, query tsquery,
+  [ options text ]

+  ) RETURNS text
+
+ Every form of the the headline() function
+ accepts a document along with a query,
+ and returns one or more ellipse-separated excerpts from the document
+ in which terms from the query are highlighted.
+ The configuration with which to parse the document
+ can be specified by either its id or ts_name;
+ if none is specified that the current configuration is used instead.
+ 
+ An options string if provided should be a comma-separated list
+ of one or more ‘option=value’ pairs.
+ The available options are:
+ 
+  StartSel, StopSel —
+   the strings with which query words appearing in the document
+   should be delimited to distinguish them from other excerpted words.
+  MaxWords, MinWords —
+   limits on the shortest and longest headlines you will accept.
+  ShortWord —
+   this prevents your headline from beginning or ending
+   with a word which has this many characters or less.
+   The default value of 3 should eliminate most English
+   conjunctions and articles.
+ 
+ Any unspecified options receive these defaults:
+ 
+StartSel=<b>, StopSel=</b>, MaxWords=35, MinWords=15, ShortWord=3
+ 
+
+
+
+


diff --git a/contrib/tsearch2/expected/tsearch2.out b/contrib/tsearch2/expected/tsearch2.out

new file mode 100644 (file)

index 0000000..a842c5b


--- /dev/null
+++ b/contrib/tsearch2/expected/tsearch2.out
@@ -0,0 +1,2055 @@
+--
+-- first, define the datatype.  Turn off echoing so that expected file
+-- does not depend on contents of seg.sql.
+--
+\set ECHO none
+psql:tsearch2.sql:13: NOTICE:  CREATE TABLE / PRIMARY KEY will create implicit index 'pg_ts_dict_pkey' for table 'pg_ts_dict'
+psql:tsearch2.sql:145: NOTICE:  CREATE TABLE / PRIMARY KEY will create implicit index 'pg_ts_parser_pkey' for table 'pg_ts_parser'
+psql:tsearch2.sql:244: NOTICE:  CREATE TABLE / PRIMARY KEY will create implicit index 'pg_ts_cfg_pkey' for table 'pg_ts_cfg'
+psql:tsearch2.sql:251: NOTICE:  CREATE TABLE / PRIMARY KEY will create implicit index 'pg_ts_cfgmap_pkey' for table 'pg_ts_cfgmap'
+psql:tsearch2.sql:339: NOTICE:  ProcedureCreate: type tsvector is not yet defined
+psql:tsearch2.sql:344: NOTICE:  Argument type "tsvector" is only a shell
+psql:tsearch2.sql:398: NOTICE:  ProcedureCreate: type tsquery is not yet defined
+psql:tsearch2.sql:403: NOTICE:  Argument type "tsquery" is only a shell
+psql:tsearch2.sql:545: NOTICE:  ProcedureCreate: type gtsvector is not yet defined
+psql:tsearch2.sql:550: NOTICE:  Argument type "gtsvector" is only a shell
+--tsvector
+SELECT '1'::tsvector;
+ tsvector 
+----------
+ '1'
+(1 row)
+
+SELECT '1 '::tsvector;
+ tsvector 
+----------
+ '1'
+(1 row)
+
+SELECT ' 1'::tsvector;
+ tsvector 
+----------
+ '1'
+(1 row)
+
+SELECT ' 1 '::tsvector;
+ tsvector 
+----------
+ '1'
+(1 row)
+
+SELECT '1 2'::tsvector;
+ tsvector 
+----------
+ '1' '2'
+(1 row)
+
+SELECT '\'1 2\''::tsvector;
+ tsvector 
+----------
+ '1 2'
+(1 row)
+
+SELECT '\'1 \\\'2\''::tsvector;
+ tsvector 
+----------
+ '1 \'2'
+(1 row)
+
+SELECT '\'1 \\\'2\'3'::tsvector;
+  tsvector   
+-------------
+ '3' '1 \'2'
+(1 row)
+
+SELECT '\'1 \\\'2\' 3'::tsvector;
+  tsvector   
+-------------
+ '3' '1 \'2'
+(1 row)
+
+SELECT '\'1 \\\'2\' \' 3\' 4 '::tsvector;
+     tsvector     
+------------------
+ '4' ' 3' '1 \'2'
+(1 row)
+
+select '\'w\':4A,3B,2C,1D,5 a:8';
+       ?column?        
+-----------------------
+ 'w':4A,3B,2C,1D,5 a:8
+(1 row)
+
+select 'a:3A b:2a'::tsvector || 'ba:1234 a:1B';
+          ?column?          
+----------------------------
+ 'a':3A,4B 'b':2A 'ba':1237
+(1 row)
+
+select setweight('w:12B w:13* w:12,5,6 a:1,3* a:3 w asd:1dc asd zxc:81,567,222A'::tsvector, 'c');
+                        setweight                         
+----------------------------------------------------------
+ 'a':1C,3C 'w':5C,6C,12C,13C 'asd':1C 'zxc':81C,222C,567C
+(1 row)
+
+select strip('w:12B w:13* w:12,5,6 a:1,3* a:3 w asd:1dc asd'::tsvector);
+     strip     
+---------------
+ 'a' 'w' 'asd'
+(1 row)
+
+--tsquery
+SELECT '1'::tsquery;
+ tsquery 
+---------
+ '1'
+(1 row)
+
+SELECT '1 '::tsquery;
+ tsquery 
+---------
+ '1'
+(1 row)
+
+SELECT ' 1'::tsquery;
+ tsquery 
+---------
+ '1'
+(1 row)
+
+SELECT ' 1 '::tsquery;
+ tsquery 
+---------
+ '1'
+(1 row)
+
+SELECT '\'1 2\''::tsquery;
+ tsquery 
+---------
+ '1 2'
+(1 row)
+
+SELECT '\'1 \\\'2\''::tsquery;
+ tsquery 
+---------
+ '1 \'2'
+(1 row)
+
+SELECT '!1'::tsquery;
+ tsquery 
+---------
+ !'1'
+(1 row)
+
+SELECT '1|2'::tsquery;
+  tsquery  
+-----------
+ '1' | '2'
+(1 row)
+
+SELECT '1|!2'::tsquery;
+  tsquery   
+------------
+ '1' | !'2'
+(1 row)
+
+SELECT '!1|2'::tsquery;
+  tsquery   
+------------
+ !'1' | '2'
+(1 row)
+
+SELECT '!1|!2'::tsquery;
+   tsquery   
+-------------
+ !'1' | !'2'
+(1 row)
+
+SELECT '!(!1|!2)'::tsquery;
+     tsquery      
+------------------
+ !( !'1' | !'2' )
+(1 row)
+
+SELECT '!(!1|2)'::tsquery;
+     tsquery     
+-----------------
+ !( !'1' | '2' )
+(1 row)
+
+SELECT '!(1|!2)'::tsquery;
+     tsquery     
+-----------------
+ !( '1' | !'2' )
+(1 row)
+
+SELECT '!(1|2)'::tsquery;
+    tsquery     
+----------------
+ !( '1' | '2' )
+(1 row)
+
+SELECT '1&2'::tsquery;
+  tsquery  
+-----------
+ '1' & '2'
+(1 row)
+
+SELECT '!1&2'::tsquery;
+  tsquery   
+------------
+ !'1' & '2'
+(1 row)
+
+SELECT '1&!2'::tsquery;
+  tsquery   
+------------
+ '1' & !'2'
+(1 row)
+
+SELECT '!1&!2'::tsquery;
+   tsquery   
+-------------
+ !'1' & !'2'
+(1 row)
+
+SELECT '(1&2)'::tsquery;
+  tsquery  
+-----------
+ '1' & '2'
+(1 row)
+
+SELECT '1&(2)'::tsquery;
+  tsquery  
+-----------
+ '1' & '2'
+(1 row)
+
+SELECT '!(1)&2'::tsquery;
+  tsquery   
+------------
+ !'1' & '2'
+(1 row)
+
+SELECT '!(1&2)'::tsquery;
+    tsquery     
+----------------
+ !( '1' & '2' )
+(1 row)
+
+SELECT '1|2&3'::tsquery;
+     tsquery     
+-----------------
+ '1' | '2' & '3'
+(1 row)
+
+SELECT '1|(2&3)'::tsquery;
+     tsquery     
+-----------------
+ '1' | '2' & '3'
+(1 row)
+
+SELECT '(1|2)&3'::tsquery;
+       tsquery       
+---------------------
+ ( '1' | '2' ) & '3'
+(1 row)
+
+SELECT '1|2&!3'::tsquery;
+     tsquery      
+------------------
+ '1' | '2' & !'3'
+(1 row)
+
+SELECT '1|!2&3'::tsquery;
+     tsquery      
+------------------
+ '1' | !'2' & '3'
+(1 row)
+
+SELECT '!1|2&3'::tsquery;
+     tsquery      
+------------------
+ !'1' | '2' & '3'
+(1 row)
+
+SELECT '!1|(2&3)'::tsquery;
+     tsquery      
+------------------
+ !'1' | '2' & '3'
+(1 row)
+
+SELECT '!(1|2)&3'::tsquery;
+       tsquery        
+----------------------
+ !( '1' | '2' ) & '3'
+(1 row)
+
+SELECT '(!1|2)&3'::tsquery;
+       tsquery        
+----------------------
+ ( !'1' | '2' ) & '3'
+(1 row)
+
+SELECT '1|(2|(4|(5|6)))'::tsquery;
+                 tsquery                 
+-----------------------------------------
+ '1' | ( '2' | ( '4' | ( '5' | '6' ) ) )
+(1 row)
+
+SELECT '1|2|4|5|6'::tsquery;
+                 tsquery                 
+-----------------------------------------
+ ( ( ( '1' | '2' ) | '4' ) | '5' ) | '6'
+(1 row)
+
+SELECT '1&(2&(4&(5&6)))'::tsquery;
+           tsquery           
+-----------------------------
+ '1' & '2' & '4' & '5' & '6'
+(1 row)
+
+SELECT '1&2&4&5&6'::tsquery;
+           tsquery           
+-----------------------------
+ '1' & '2' & '4' & '5' & '6'
+(1 row)
+
+SELECT '1&(2&(4&(5|6)))'::tsquery;
+             tsquery             
+---------------------------------
+ '1' & '2' & '4' & ( '5' | '6' )
+(1 row)
+
+SELECT '1&(2&(4&(5|!6)))'::tsquery;
+             tsquery              
+----------------------------------
+ '1' & '2' & '4' & ( '5' | !'6' )
+(1 row)
+
+SELECT '1&(\'2\'&(\' 4\'&(\\|5 | \'6 \\\' !|&\')))'::tsquery;
+                 tsquery                  
+------------------------------------------
+ '1' & '2' & ' 4' & ( '|5' | '6 \' !|&' )
+(1 row)
+
+SELECT '\'the wether\':dc & \' sKies \':BC & a:d b:a';
+                 ?column?                 
+------------------------------------------
+ 'the wether':dc & ' sKies ':BC & a:d b:a
+(1 row)
+
+select lexize('simple', 'ASD56 hsdkf');
+     lexize      
+-----------------
+ {"asd56 hsdkf"}
+(1 row)
+
+select lexize('en_stem', 'SKIES Problems identity');
+          lexize          
+--------------------------
+ {"skies problems ident"}
+(1 row)
+
+select * from token_type('default');
+ tokid |    alias     |               descr               
+-------+--------------+-----------------------------------
+     1 | lword        | Latin word
+     2 | nlword       | Non-latin word
+     3 | word         | Word
+     4 | email        | Email
+     5 | url          | URL
+     6 | host         | Host
+     7 | sfloat       | Scientific notation
+     8 | version      | VERSION
+     9 | part_hword   | Part of hyphenated word
+    10 | nlpart_hword | Non-latin part of hyphenated word
+    11 | lpart_hword  | Latin part of hyphenated word
+    12 | blank        | Space symbols
+    13 | tag          | HTML Tag
+    14 | http         | HTTP head
+    15 | hword        | Hyphenated word
+    16 | lhword       | Latin hyphenated word
+    17 | nlhword      | Non-latin hyphenated word
+    18 | uri          | URI
+    19 | file         | File or path name
+    20 | float        | Decimal notation
+    21 | int          | Signed integer
+    22 | uint         | Unsigned integer
+    23 | entity       | HTML Entity
+(23 rows)
+
+select * from parse('default', '345 [email protected] \' http://www.com/ http://aew.werc.ewr/?ad=qwe&dw 1aew.werc.ewr/?ad=qwe&dw 2aew.werc.ewr http://3aew.werc.ewr/?ad=qwe&dw http://4aew.werc.ewr http://5aew.werc.ewr:8100/?  ad=qwe&dw 6aew.werc.ewr:8100/?ad=qwe&dw 7aew.werc.ewr:8100/?ad=qwe&dw=%20%32 +4.0e-10 qwe qwe qwqwe 234.435 455 5.005 [email protected] qwe-wer asdf qwer jf sdjk ewr1> ewri2 ">
+/usr/local/fff /awdf/dwqe/4325 rewt/ewr wefjn /wqe-324/ewr gist.h gist.h.c gist.c. readline 4.2 4.2. 4.2, readline-4.2 readline-4.2. 234 
+ wow  < jqw <> qwerty');
+ tokid |                token                 
+-------+--------------------------------------
+    22 | 345
+    12 |  
+     4 | [email protected]
+    12 |  
+    12 | '
+    12 |  
+    14 | http://
+     6 | www.com
+    12 | /
+    12 |  
+    14 | http://
+     5 | aew.werc.ewr/?ad=qwe&dw
+     6 | aew.werc.ewr
+    18 | /?ad=qwe&dw
+    12 |  
+     5 | 1aew.werc.ewr/?ad=qwe&dw
+     6 | 1aew.werc.ewr
+    18 | /?ad=qwe&dw
+    12 |  
+     6 | 2aew.werc.ewr
+    12 |  
+    14 | http://
+     5 | 3aew.werc.ewr/?ad=qwe&dw
+     6 | 3aew.werc.ewr
+    18 | /?ad=qwe&dw
+    12 |  
+    14 | http://
+     6 | 4aew.werc.ewr
+    12 |  
+    14 | http://
+     5 | 5aew.werc.ewr:8100/?
+     6 | 5aew.werc.ewr
+    18 | :8100/?
+    12 |   
+     1 | ad
+    12 | =
+     1 | qwe
+    12 | &
+     1 | dw
+    12 |  
+     5 | 6aew.werc.ewr:8100/?ad=qwe&dw
+     6 | 6aew.werc.ewr
+    18 | :8100/?ad=qwe&dw
+    12 |  
+     5 | 7aew.werc.ewr:8100/?ad=qwe&dw=%20%32
+     6 | 7aew.werc.ewr
+    18 | :8100/?ad=qwe&dw=%20%32
+    12 |  
+     7 | +4.0e-10
+    12 |  
+     1 | qwe
+    12 |  
+     1 | qwe
+    12 |  
+     1 | qwqwe
+    12 |  
+    20 | 234.435
+    12 |  
+    22 | 455
+    12 |  
+    20 | 5.005
+    12 |  
+     4 | [email protected]
+    12 |  
+    16 | qwe-wer
+    11 | qwe
+    12 | -
+    11 | wer
+    12 |  
+     1 | asdf
+    12 |  
+    13 |  
+     1 | qwer
+    12 |  
+     1 | jf
+    12 |  
+     1 | sdjk
+    13 |  
+    12 |  
+     3 | ewr1
+    12 | >
+    12 |  
+     3 | ewri2
+    12 |  
+    13 |  
+    12 | 
+
+    19 | /usr/local/fff
+    12 |  
+    19 | /awdf/dwqe/4325
+    12 |  
+    19 | rewt/ewr
+    12 |  
+     1 | wefjn
+    12 |  
+    19 | /wqe-324/ewr
+    12 |  
+     6 | gist.h
+    12 |  
+     6 | gist.h.c
+    12 |  
+     6 | gist.c
+    12 | .
+    12 |  
+     1 | readline
+    12 |  
+    20 | 4.2
+    12 |  
+    20 | 4.2
+    12 | .
+    12 |  
+    20 | 4.2
+    12 | ,
+    12 |  
+    15 | readline-4
+    11 | readline
+    12 | -
+    20 | 4.2
+    12 |  
+    15 | readline-4
+    11 | readline
+    12 | -
+    20 | 4.2
+    12 | .
+    12 |  
+    22 | 234
+    12 |  
+
+    13 |  
+    12 |  
+     1 | wow
+    12 |   
+    12 | <
+    12 |  
+     1 | jqw
+    12 |  
+    12 | <
+    12 | >
+    12 |  
+     1 | qwerty
+(138 rows)
+
+SELECT to_tsvector('default', '345 [email protected] \' http://www.com/ http://aew.werc.ewr/?ad=qwe&dw 1aew.werc.ewr/?ad=qwe&dw 2aew.werc.ewr http://3aew.werc.ewr/?ad=qwe&dw http://4aew.werc.ewr http://5aew.werc.ewr:8100/?  ad=qwe&dw 6aew.werc.ewr:8100/?ad=qwe&dw 7aew.werc.ewr:8100/?ad=qwe&dw=%20%32 +4.0e-10 qwe qwe qwqwe 234.435 455 5.005 [email protected] qwe-wer asdf qwer jf sdjk ewr1> ewri2 ">
+/usr/local/fff /awdf/dwqe/4325 rewt/ewr wefjn /wqe-324/ewr gist.h gist.h.c gist.c. readline 4.2 4.2. 4.2, readline-4.2 readline-4.2. 234 
+ wow  < jqw <> qwerty');
+                                                                                                                                                                                                                                                                                                                                                                                                                                               to_tsvector                                                                                                                                                                                                                                                                                                                                                                                                                                                
+----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
+ 'ad':18 'dw':20 'jf':40 '234':62 '345':1 '4.2':53,54,55,58,61 '455':32 'jqw':64 'qwe':19,28,29,36 'wer':37 'wow':63 'asdf':38 'ewr1':42 'qwer':39 'sdjk':41 '5.005':33 'ewri2':43 'qwqwe':30 'wefjn':47 'gist.c':51 'gist.h':49 'qwerti':65 '234.435':31 ':8100/?':17 'qwe-wer':35 'readlin':52,57,60 'www.com':3 '+4.0e-10':27 'gist.h.c':50 'rewt/ewr':46 '[email protected]':2 'readline-4':56,59 '/?ad=qwe&dw':6,9,13 '/wqe-324/ewr':48 'aew.werc.ewr':5 '1aew.werc.ewr':8 '2aew.werc.ewr':10 '3aew.werc.ewr':12 '4aew.werc.ewr':14 '5aew.werc.ewr':16 '6aew.werc.ewr':22 '7aew.werc.ewr':25 '/usr/local/fff':44 '/awdf/dwqe/4325':45 ':8100/?ad=qwe&dw':23 '[email protected]':34 '5aew.werc.ewr:8100/?':15 ':8100/?ad=qwe&dw=%20%32':26 'aew.werc.ewr/?ad=qwe&dw':4 '1aew.werc.ewr/?ad=qwe&dw':7 '3aew.werc.ewr/?ad=qwe&dw':11 '6aew.werc.ewr:8100/?ad=qwe&dw':21 '7aew.werc.ewr:8100/?ad=qwe&dw=%20%32':24
+(1 row)
+
+SELECT length(to_tsvector('default', '345 qw'));
+ length 
+--------
+      2
+(1 row)
+
+SELECT length(to_tsvector('default', '345 [email protected] \' http://www.com/ http://aew.werc.ewr/?ad=qwe&dw 1aew.werc.ewr/?ad=qwe&dw 2aew.werc.ewr http://3aew.werc.ewr/?ad=qwe&dw http://4aew.werc.ewr http://5aew.werc.ewr:8100/?  ad=qwe&dw 6aew.werc.ewr:8100/?ad=qwe&dw 7aew.werc.ewr:8100/?ad=qwe&dw=%20%32 +4.0e-10 qwe qwe qwqwe 234.435 455 5.005 [email protected] qwe-wer asdf qwer jf sdjk ewr1> ewri2 ">
+/usr/local/fff /awdf/dwqe/4325 rewt/ewr wefjn /wqe-324/ewr gist.h gist.h.c gist.c. readline 4.2 4.2. 4.2, readline-4.2 readline-4.2. 234 
+ wow  < jqw <> qwerty'));
+ length 
+--------
+     53
+(1 row)
+
+select to_tsquery('default', 'qwe & sKies '); 
+  to_tsquery   
+---------------
+ 'qwe' & 'sky'
+(1 row)
+
+select to_tsquery('simple', 'qwe & sKies '); 
+   to_tsquery    
+-----------------
+ 'qwe' & 'skies'
+(1 row)
+
+select to_tsquery('default', '\'the wether\':dc & \'           sKies \':BC ');
+       to_tsquery       
+------------------------
+ 'wether':CD & 'sky':BC
+(1 row)
+
+select 'a b:89  ca:23A,64b d:34c'::tsvector @@ 'd:AC & ca';
+ ?column? 
+----------
+ t
+(1 row)
+
+select 'a b:89  ca:23A,64b d:34c'::tsvector @@ 'd:AC & ca:B';
+ ?column? 
+----------
+ t
+(1 row)
+
+select 'a b:89  ca:23A,64b d:34c'::tsvector @@ 'd:AC & ca:A';
+ ?column? 
+----------
+ t
+(1 row)
+
+select 'a b:89  ca:23A,64b d:34c'::tsvector @@ 'd:AC & ca:C';
+ ?column? 
+----------
+ f
+(1 row)
+
+select 'a b:89  ca:23A,64b d:34c'::tsvector @@ 'd:AC & ca:CB';
+ ?column? 
+----------
+ t
+(1 row)
+
+CREATE TABLE test_tsvector( t text, a tsvector );
+\copy test_tsvector from 'data/test_tsearch.data'
+SELECT count(*) FROM test_tsvector WHERE a @@ 'wr|qh';
+ count 
+-------
+   158
+(1 row)
+
+SELECT count(*) FROM test_tsvector WHERE a @@ 'wr&qh';
+ count 
+-------
+    17
+(1 row)
+
+SELECT count(*) FROM test_tsvector WHERE a @@ 'eq&yt';
+ count 
+-------
+     6
+(1 row)
+
+SELECT count(*) FROM test_tsvector WHERE a @@ 'eq|yt';
+ count 
+-------
+    98
+(1 row)
+
+SELECT count(*) FROM test_tsvector WHERE a @@ '(eq&yt)|(wr&qh)';
+ count 
+-------
+    23
+(1 row)
+
+SELECT count(*) FROM test_tsvector WHERE a @@ '(eq|yt)&(wr|qh)';
+ count 
+-------
+    39
+(1 row)
+
+create index wowidx on test_tsvector using gist (a);
+set enable_seqscan=off;
+SELECT count(*) FROM test_tsvector WHERE a @@ 'wr|qh';
+ count 
+-------
+   158
+(1 row)
+
+SELECT count(*) FROM test_tsvector WHERE a @@ 'wr&qh';
+ count 
+-------
+    17
+(1 row)
+
+SELECT count(*) FROM test_tsvector WHERE a @@ 'eq&yt';
+ count 
+-------
+     6
+(1 row)
+
+SELECT count(*) FROM test_tsvector WHERE a @@ 'eq|yt';
+ count 
+-------
+    98
+(1 row)
+
+SELECT count(*) FROM test_tsvector WHERE a @@ '(eq&yt)|(wr&qh)';
+ count 
+-------
+    23
+(1 row)
+
+SELECT count(*) FROM test_tsvector WHERE a @@ '(eq|yt)&(wr|qh)';
+ count 
+-------
+    39
+(1 row)
+
+select set_curcfg('default');
+ set_curcfg 
+------------
+ 
+(1 row)
+
+CREATE TRIGGER tsvectorupdate
+BEFORE UPDATE OR INSERT ON test_tsvector
+FOR EACH ROW EXECUTE PROCEDURE tsearch2(a, t);
+SELECT count(*) FROM test_tsvector WHERE a @@ to_tsquery('345&qwerty');
+ count 
+-------
+     0
+(1 row)
+
+INSERT INTO test_tsvector (t) VALUES ('345 qwerty');
+SELECT count(*) FROM test_tsvector WHERE a @@ to_tsquery('345&qwerty');
+ count 
+-------
+     1
+(1 row)
+
+UPDATE test_tsvector SET t = null WHERE t = '345 qwerty';
+SELECT count(*) FROM test_tsvector WHERE a @@ to_tsquery('345&qwerty');
+ count 
+-------
+     0
+(1 row)
+
+drop trigger tsvectorupdate on test_tsvector;
+create function wow(text) returns text as 'select $1 || \' copyright\'; ' language sql;
+create trigger tsvectorupdate before update or insert on test_tsvector
+for each row execute procedure tsearch2(a, wow, t);
+insert into test_tsvector (t) values ('345 qwerty');
+select count(*) FROM test_tsvector WHERE a @@ to_tsquery('345&qwerty');
+ count 
+-------
+     1
+(1 row)
+
+select count(*) FROM test_tsvector WHERE a @@ to_tsquery('copyright');
+ count 
+-------
+     1
+(1 row)
+
+select rank(' a:1 s:2C d g'::tsvector, 'a | s');
+ rank 
+------
+ 0.28
+(1 row)
+
+select rank(' a:1 s:2B d g'::tsvector, 'a | s');
+ rank 
+------
+ 0.46
+(1 row)
+
+select rank(' a:1 s:2 d g'::tsvector, 'a | s');
+ rank 
+------
+ 0.19
+(1 row)
+
+select rank(' a:1 s:2C d g'::tsvector, 'a & s');
+   rank   
+----------
+ 0.140153
+(1 row)
+
+select rank(' a:1 s:2B d g'::tsvector, 'a & s');
+   rank   
+----------
+ 0.198206
+(1 row)
+
+select rank(' a:1 s:2 d g'::tsvector, 'a & s');
+   rank    
+-----------
+ 0.0991032
+(1 row)
+
+insert into test_tsvector (t) values ('foo bar foo the over foo qq bar');
+select * from stat('select a from test_tsvector') order by ndoc desc, nentry desc, word;
+   word    | ndoc | nentry 
+-----------+------+--------
+ qq        |  109 |    109
+ qt        |  102 |    102
+ qe        |  100 |    100
+ qh        |   98 |     98
+ qw        |   98 |     98
+ qa        |   97 |     97
+ ql        |   94 |     94
+ qs        |   94 |     94
+ qi        |   92 |     92
+ qr        |   92 |     92
+ qj        |   91 |     91
+ qd        |   87 |     87
+ qz        |   87 |     87
+ qc        |   86 |     86
+ qn        |   86 |     86
+ qv        |   85 |     85
+ qo        |   84 |     84
+ qy        |   84 |     84
+ wp        |   84 |     84
+ qf        |   81 |     81
+ qk        |   80 |     80
+ wt        |   80 |     80
+ qu        |   79 |     79
+ qg        |   78 |     78
+ wb        |   78 |     78
+ qx        |   77 |     77
+ wr        |   77 |     77
+ ws        |   73 |     73
+ wy        |   73 |     73
+ wa        |   72 |     72
+ wf        |   70 |     70
+ wg        |   70 |     70
+ wi        |   70 |     70
+ wu        |   70 |     70
+ wc        |   69 |     69
+ wj        |   69 |     69
+ qp        |   68 |     68
+ wh        |   68 |     68
+ wv        |   68 |     68
+ qb        |   66 |     66
+ eu        |   65 |     65
+ we        |   65 |     65
+ wl        |   65 |     65
+ wq        |   65 |     65
+ wk        |   64 |     64
+ ee        |   63 |     63
+ eo        |   63 |     63
+ qm        |   63 |     63
+ wn        |   63 |     63
+ ef        |   62 |     62
+ eh        |   62 |     62
+ ex        |   62 |     62
+ re        |   62 |     62
+ rl        |   62 |     62
+ rr        |   62 |     62
+ eb        |   61 |     61
+ ek        |   61 |     61
+ ww        |   61 |     61
+ ea        |   60 |     60
+ ei        |   60 |     60
+ em        |   60 |     60
+ eq        |   60 |     60
+ ew        |   60 |     60
+ ro        |   60 |     60
+ rw        |   60 |     60
+ tl        |   60 |     60
+ eg        |   59 |     59
+ en        |   59 |     59
+ ez        |   59 |     59
+ rj        |   59 |     59
+ ry        |   59 |     59
+ tw        |   59 |     59
+ tx        |   59 |     59
+ ej        |   58 |     58
+ es        |   58 |     58
+ ra        |   58 |     58
+ rd        |   58 |     58
+ rg        |   58 |     58
+ rx        |   58 |     58
+ tb        |   58 |     58
+ wd        |   58 |     58
+ ed        |   57 |     57
+ tc        |   57 |     57
+ wx        |   57 |     57
+ er        |   56 |     56
+ wm        |   56 |     56
+ wo        |   56 |     56
+ yw        |   56 |     56
+ ep        |   55 |     55
+ rk        |   55 |     55
+ rp        |   55 |     55
+ rz        |   55 |     55
+ ta        |   55 |     55
+ rq        |   54 |     54
+ yn        |   54 |     54
+ ec        |   53 |     53
+ el        |   53 |     53
+ ru        |   53 |     53
+ rv        |   53 |     53
+ tz        |   53 |     53
+ un        |   53 |     53
+ wz        |   53 |     53
+ ys        |   53 |     53
+ oe        |   52 |     52
+ tn        |   52 |     52
+ tq        |   52 |     52
+ ty        |   52 |     52
+ uq        |   52 |     52
+ yg        |   52 |     52
+ ym        |   52 |     52
+ oi        |   51 |     51
+ to        |   51 |     51
+ yi        |   51 |     51
+ pn        |   50 |     50
+ rb        |   50 |     50
+ ri        |   50 |     50
+ rn        |   50 |     50
+ ti        |   50 |     50
+ tv        |   50 |     50
+ um        |   50 |     50
+ ut        |   50 |     50
+ ya        |   50 |     50
+ et        |   49 |     49
+ ix        |   49 |     49
+ ox        |   49 |     49
+ q3        |   49 |     49
+ yf        |   49 |     49
+ yl        |   49 |     49
+ yo        |   49 |     49
+ yr        |   49 |     49
+ ev        |   48 |     48
+ ey        |   48 |     48
+ ot        |   48 |     48
+ rc        |   48 |     48
+ rm        |   48 |     48
+ th        |   48 |     48
+ uo        |   48 |     48
+ ia        |   47 |     47
+ q1        |   47 |     47
+ rh        |   47 |     47
+ yq        |   47 |     47
+ yz        |   47 |     47
+ av        |   46 |     46
+ im        |   46 |     46
+ os        |   46 |     46
+ tk        |   46 |     46
+ yy        |   46 |     46
+ ir        |   45 |     45
+ iv        |   45 |     45
+ iw        |   45 |     45
+ oj        |   45 |     45
+ pl        |   45 |     45
+ pv        |   45 |     45
+ te        |   45 |     45
+ tu        |   45 |     45
+ uv        |   45 |     45
+ ux        |   45 |     45
+ yd        |   45 |     45
+ yx        |   45 |     45
+ ij        |   44 |     44
+ pa        |   44 |     44
+ se        |   44 |     44
+ tg        |   44 |     44
+ ue        |   44 |     44
+ yb        |   44 |     44
+ yt        |   44 |     44
+ if        |   43 |     43
+ ik        |   43 |     43
+ in        |   43 |     43
+ ph        |   43 |     43
+ pj        |   43 |     43
+ q5        |   43 |     43
+ rt        |   43 |     43
+ ub        |   43 |     43
+ ud        |   43 |     43
+ uh        |   43 |     43
+ uj        |   43 |     43
+ w7        |   43 |     43
+ ye        |   43 |     43
+ yv        |   43 |     43
+ db        |   42 |     42
+ do        |   42 |     42
+ id        |   42 |     42
+ ie        |   42 |     42
+ ii        |   42 |     42
+ of        |   42 |     42
+ pr        |   42 |     42
+ q4        |   42 |     42
+ rf        |   42 |     42
+ td        |   42 |     42
+ uk        |   42 |     42
+ up        |   42 |     42
+ yh        |   42 |     42
+ yk        |   42 |     42
+ io        |   41 |     41
+ it        |   41 |     41
+ pb        |   41 |     41
+ q0        |   41 |     41
+ q7        |   41 |     41
+ rs        |   41 |     41
+ tj        |   41 |     41
+ ur        |   41 |     41
+ ig        |   40 |     40
+ iu        |   40 |     40
+ iy        |   40 |     40
+ od        |   40 |     40
+ q6        |   40 |     40
+ tt        |   40 |     40
+ ug        |   40 |     40
+ ul        |   40 |     40
+ us        |   40 |     40
+ uu        |   40 |     40
+ uz        |   40 |     40
+ ah        |   39 |     39
+ ar        |   39 |     39
+ as        |   39 |     39
+ dl        |   39 |     39
+ dt        |   39 |     39
+ hk        |   39 |     39
+ iq        |   39 |     39
+ is        |   39 |     39
+ oc        |   39 |     39
+ ov        |   39 |     39
+ oy        |   39 |     39
+ uf        |   39 |     39
+ ui        |   39 |     39
+ aa        |   38 |     38
+ ad        |   38 |     38
+ fh        |   38 |     38
+ gm        |   38 |     38
+ ic        |   38 |     38
+ jd        |   38 |     38
+ om        |   38 |     38
+ or        |   38 |     38
+ oz        |   38 |     38
+ pm        |   38 |     38
+ q8        |   38 |     38
+ sf        |   38 |     38
+ sm        |   38 |     38
+ sv        |   38 |     38
+ uc        |   38 |     38
+ ak        |   37 |     37
+ aq        |   37 |     37
+ di        |   37 |     37
+ e4        |   37 |     37
+ fi        |   37 |     37
+ fx        |   37 |     37
+ ha        |   37 |     37
+ hp        |   37 |     37
+ ih        |   37 |     37
+ og        |   37 |     37
+ po        |   37 |     37
+ pw        |   37 |     37
+ sn        |   37 |     37
+ su        |   37 |     37
+ sw        |   37 |     37
+ w6        |   37 |     37
+ yj        |   37 |     37
+ yu        |   37 |     37
+ ag        |   36 |     36
+ am        |   36 |     36
+ at        |   36 |     36
+ e1        |   36 |     36
+ ff        |   36 |     36
+ gx        |   36 |     36
+ he        |   36 |     36
+ hj        |   36 |     36
+ ib        |   36 |     36
+ iz        |   36 |     36
+ lm        |   36 |     36
+ ok        |   36 |     36
+ pk        |   36 |     36
+ pp        |   36 |     36
+ pu        |   36 |     36
+ sp        |   36 |     36
+ tf        |   36 |     36
+ tm        |   36 |     36
+ ay        |   35 |     35
+ dy        |   35 |     35
+ fu        |   35 |     35
+ ku        |   35 |     35
+ lh        |   35 |     35
+ lq        |   35 |     35
+ o6        |   35 |     35
+ ob        |   35 |     35
+ on        |   35 |     35
+ op        |   35 |     35
+ pd        |   35 |     35
+ ps        |   35 |     35
+ si        |   35 |     35
+ sl        |   35 |     35
+ sx        |   35 |     35
+ tp        |   35 |     35
+ tr        |   35 |     35
+ w3        |   35 |     35
+ y1        |   35 |     35
+ al        |   34 |     34
+ ap        |   34 |     34
+ az        |   34 |     34
+ dc        |   34 |     34
+ dd        |   34 |     34
+ dz        |   34 |     34
+ e0        |   34 |     34
+ fj        |   34 |     34
+ fp        |   34 |     34
+ gd        |   34 |     34
+ gg        |   34 |     34
+ gk        |   34 |     34
+ go        |   34 |     34
+ ho        |   34 |     34
+ jc        |   34 |     34
+ oa        |   34 |     34
+ oh        |   34 |     34
+ oo        |   34 |     34
+ pe        |   34 |     34
+ px        |   34 |     34
+ sd        |   34 |     34
+ sq        |   34 |     34
+ sy        |   34 |     34
+ ab        |   33 |     33
+ ae        |   33 |     33
+ af        |   33 |     33
+ aw        |   33 |     33
+ e5        |   33 |     33
+ fk        |   33 |     33
+ gu        |   33 |     33
+ gy        |   33 |     33
+ hb        |   33 |     33
+ hm        |   33 |     33
+ hy        |   33 |     33
+ jl        |   33 |     33
+ jr        |   33 |     33
+ ls        |   33 |     33
+ oq        |   33 |     33
+ pt        |   33 |     33
+ sa        |   33 |     33
+ sh        |   33 |     33
+ sj        |   33 |     33
+ so        |   33 |     33
+ sz        |   33 |     33
+ t7        |   33 |     33
+ uw        |   33 |     33
+ w8        |   33 |     33
+ y0        |   33 |     33
+ yp        |   33 |     33
+ dh        |   32 |     32
+ dp        |   32 |     32
+ dq        |   32 |     32
+ e7        |   32 |     32
+ fn        |   32 |     32
+ fo        |   32 |     32
+ fr        |   32 |     32
+ ga        |   32 |     32
+ gq        |   32 |     32
+ hh        |   32 |     32
+ il        |   32 |     32
+ ip        |   32 |     32
+ jv        |   32 |     32
+ lc        |   32 |     32
+ ol        |   32 |     32
+ pc        |   32 |     32
+ q9        |   32 |     32
+ ds        |   31 |     31
+ e9        |   31 |     31
+ fd        |   31 |     31
+ fe        |   31 |     31
+ ft        |   31 |     31
+ gs        |   31 |     31
+ hl        |   31 |     31
+ hs        |   31 |     31
+ jb        |   31 |     31
+ kc        |   31 |     31
+ kw        |   31 |     31
+ mj        |   31 |     31
+ q2        |   31 |     31
+ r3        |   31 |     31
+ sb        |   31 |     31
+ sk        |   31 |     31
+ ts        |   31 |     31
+ ua        |   31 |     31
+ yc        |   31 |     31
+ zw        |   31 |     31
+ ao        |   30 |     30
+ du        |   30 |     30
+ fw        |   30 |     30
+ gj        |   30 |     30
+ hu        |   30 |     30
+ kh        |   30 |     30
+ kl        |   30 |     30
+ kv        |   30 |     30
+ ld        |   30 |     30
+ lf        |   30 |     30
+ pq        |   30 |     30
+ py        |   30 |     30
+ sc        |   30 |     30
+ sr        |   30 |     30
+ uy        |   30 |     30
+ vg        |   30 |     30
+ w2        |   30 |     30
+ xg        |   30 |     30
+ xo        |   30 |     30
+ au        |   29 |     29
+ cx        |   29 |     29
+ fv        |   29 |     29
+ gh        |   29 |     29
+ gl        |   29 |     29
+ gt        |   29 |     29
+ hw        |   29 |     29
+ ji        |   29 |     29
+ km        |   29 |     29
+ la        |   29 |     29
+ ou        |   29 |     29
+ r0        |   29 |     29
+ w0        |   29 |     29
+ y9        |   29 |     29
+ zm        |   29 |     29
+ zs        |   29 |     29
+ zy        |   29 |     29
+ ax        |   28 |     28
+ cd        |   28 |     28
+ dj        |   28 |     28
+ dn        |   28 |     28
+ dr        |   28 |     28
+ ht        |   28 |     28
+ jf        |   28 |     28
+ lo        |   28 |     28
+ lr        |   28 |     28
+ na        |   28 |     28
+ ng        |   28 |     28
+ r8        |   28 |     28
+ ss        |   28 |     28
+ xt        |   28 |     28
+ y6        |   28 |     28
+ aj        |   27 |     27
+ ca        |   27 |     27
+ cg        |   27 |     27
+ df        |   27 |     27
+ dg        |   27 |     27
+ dv        |   27 |     27
+ gc        |   27 |     27
+ gn        |   27 |     27
+ gr        |   27 |     27
+ hd        |   27 |     27
+ i8        |   27 |     27
+ jn        |   27 |     27
+ jt        |   27 |     27
+ lp        |   27 |     27
+ o9        |   27 |     27
+ ow        |   27 |     27
+ r9        |   27 |     27
+ t8        |   27 |     27
+ u5        |   27 |     27
+ w4        |   27 |     27
+ xm        |   27 |     27
+ zz        |   27 |     27
+ a2        |   26 |     26
+ ac        |   26 |     26
+ ai        |   26 |     26
+ cm        |   26 |     26
+ cu        |   26 |     26
+ cw        |   26 |     26
+ dk        |   26 |     26
+ e2        |   26 |     26
+ fc        |   26 |     26
+ fg        |   26 |     26
+ fl        |   26 |     26
+ fs        |   26 |     26
+ ge        |   26 |     26
+ gv        |   26 |     26
+ hc        |   26 |     26
+ hi        |   26 |     26
+ hx        |   26 |     26
+ jj        |   26 |     26
+ jm        |   26 |     26
+ kg        |   26 |     26
+ kk        |   26 |     26
+ kn        |   26 |     26
+ ko        |   26 |     26
+ kt        |   26 |     26
+ ln        |   26 |     26
+ mx        |   26 |     26
+ pg        |   26 |     26
+ r4        |   26 |     26
+ t6        |   26 |     26
+ u1        |   26 |     26
+ u4        |   26 |     26
+ vi        |   26 |     26
+ vr        |   26 |     26
+ w1        |   26 |     26
+ w9        |   26 |     26
+ xk        |   26 |     26
+ xs        |   26 |     26
+ zf        |   26 |     26
+ bb        |   25 |     25
+ dm        |   25 |     25
+ dw        |   25 |     25
+ e8        |   25 |     25
+ fb        |   25 |     25
+ gw        |   25 |     25
+ h8        |   25 |     25
+ hf        |   25 |     25
+ hg        |   25 |     25
+ hn        |   25 |     25
+ hv        |   25 |     25
+ i0        |   25 |     25
+ i3        |   25 |     25
+ jg        |   25 |     25
+ jo        |   25 |     25
+ jx        |   25 |     25
+ kq        |   25 |     25
+ lw        |   25 |     25
+ lx        |   25 |     25
+ o3        |   25 |     25
+ p7        |   25 |     25
+ pf        |   25 |     25
+ pi        |   25 |     25
+ pz        |   25 |     25
+ r2        |   25 |     25
+ r5        |   25 |     25
+ t9        |   25 |     25
+ u7        |   25 |     25
+ ve        |   25 |     25
+ vu        |   25 |     25
+ y5        |   25 |     25
+ y8        |   25 |     25
+ zt        |   25 |     25
+ an        |   24 |     24
+ bj        |   24 |     24
+ dx        |   24 |     24
+ fm        |   24 |     24
+ fz        |   24 |     24
+ gb        |   24 |     24
+ gi        |   24 |     24
+ gp        |   24 |     24
+ hr        |   24 |     24
+ hz        |   24 |     24
+ i5        |   24 |     24
+ jq        |   24 |     24
+ kb        |   24 |     24
+ ke        |   24 |     24
+ kf        |   24 |     24
+ kp        |   24 |     24
+ lv        |   24 |     24
+ lz        |   24 |     24
+ o8        |   24 |     24
+ r1        |   24 |     24
+ s7        |   24 |     24
+ sg        |   24 |     24
+ u3        |   24 |     24
+ vj        |   24 |     24
+ vt        |   24 |     24
+ w5        |   24 |     24
+ zj        |   24 |     24
+ be        |   23 |     23
+ bi        |   23 |     23
+ bn        |   23 |     23
+ cn        |   23 |     23
+ cy        |   23 |     23
+ da        |   23 |     23
+ e6        |   23 |     23
+ fa        |   23 |     23
+ js        |   23 |     23
+ ki        |   23 |     23
+ kz        |   23 |     23
+ li        |   23 |     23
+ mt        |   23 |     23
+ mz        |   23 |     23
+ nu        |   23 |     23
+ o2        |   23 |     23
+ p5        |   23 |     23
+ p8        |   23 |     23
+ r7        |   23 |     23
+ t0        |   23 |     23
+ t1        |   23 |     23
+ t3        |   23 |     23
+ vm        |   23 |     23
+ xh        |   23 |     23
+ xx        |   23 |     23
+ zp        |   23 |     23
+ zr        |   23 |     23
+ a3        |   22 |     22
+ bg        |   22 |     22
+ de        |   22 |     22
+ e3        |   22 |     22
+ fq        |   22 |     22
+ i2        |   22 |     22
+ i7        |   22 |     22
+ ja        |   22 |     22
+ jk        |   22 |     22
+ jy        |   22 |     22
+ kr        |   22 |     22
+ kx        |   22 |     22
+ ly        |   22 |     22
+ nb        |   22 |     22
+ nh        |   22 |     22
+ ns        |   22 |     22
+ s3        |   22 |     22
+ u2        |   22 |     22
+ vn        |   22 |     22
+ xe        |   22 |     22
+ y4        |   22 |     22
+ zh        |   22 |     22
+ zo        |   22 |     22
+ zq        |   22 |     22
+ a1        |   21 |     21
+ bl        |   21 |     21
+ bo        |   21 |     21
+ cb        |   21 |     21
+ ch        |   21 |     21
+ co        |   21 |     21
+ cq        |   21 |     21
+ cv        |   21 |     21
+ d7        |   21 |     21
+ g8        |   21 |     21
+ je        |   21 |     21
+ jp        |   21 |     21
+ jz        |   21 |     21
+ lg        |   21 |     21
+ me        |   21 |     21
+ nc        |   21 |     21
+ p4        |   21 |     21
+ st        |   21 |     21
+ vb        |   21 |     21
+ vw        |   21 |     21
+ vz        |   21 |     21
+ xj        |   21 |     21
+ xq        |   21 |     21
+ xu        |   21 |     21
+ xy        |   21 |     21
+ zb        |   21 |     21
+ bv        |   20 |     20
+ bz        |   20 |     20
+ cj        |   20 |     20
+ cp        |   20 |     20
+ cs        |   20 |     20
+ d8        |   20 |     20
+ ju        |   20 |     20
+ k0        |   20 |     20
+ ks        |   20 |     20
+ ky        |   20 |     20
+ l1        |   20 |     20
+ lb        |   20 |     20
+ lj        |   20 |     20
+ lu        |   20 |     20
+ nm        |   20 |     20
+ nw        |   20 |     20
+ nz        |   20 |     20
+ o7        |   20 |     20
+ p6        |   20 |     20
+ vh        |   20 |     20
+ vp        |   20 |     20
+ vs        |   20 |     20
+ xb        |   20 |     20
+ xr        |   20 |     20
+ z3        |   20 |     20
+ zv        |   20 |     20
+ bq        |   19 |     19
+ br        |   19 |     19
+ by        |   19 |     19
+ cl        |   19 |     19
+ d2        |   19 |     19
+ f1        |   19 |     19
+ f4        |   19 |     19
+ gf        |   19 |     19
+ hq        |   19 |     19
+ k9        |   19 |     19
+ ka        |   19 |     19
+ kd        |   19 |     19
+ kj        |   19 |     19
+ md        |   19 |     19
+ mi        |   19 |     19
+ ml        |   19 |     19
+ my        |   19 |     19
+ nj        |   19 |     19
+ ny        |   19 |     19
+ o1        |   19 |     19
+ s4        |   19 |     19
+ s8        |   19 |     19
+ t5        |   19 |     19
+ u0        |   19 |     19
+ xl        |   19 |     19
+ zg        |   19 |     19
+ zi        |   19 |     19
+ a5        |   18 |     18
+ b9        |   18 |     18
+ bh        |   18 |     18
+ bx        |   18 |     18
+ d3        |   18 |     18
+ fy        |   18 |     18
+ g2        |   18 |     18
+ i4        |   18 |     18
+ i6        |   18 |     18
+ i9        |   18 |     18
+ jw        |   18 |     18
+ lk        |   18 |     18
+ mb        |   18 |     18
+ mv        |   18 |     18
+ nd        |   18 |     18
+ nr        |   18 |     18
+ nt        |   18 |     18
+ t2        |   18 |     18
+ xf        |   18 |     18
+ xv        |   18 |     18
+ zc        |   18 |     18
+ zd        |   18 |     18
+ a7        |   17 |     17
+ bc        |   17 |     17
+ bd        |   17 |     17
+ ce        |   17 |     17
+ cf        |   17 |     17
+ cr        |   17 |     17
+ g9        |   17 |     17
+ j0        |   17 |     17
+ j5        |   17 |     17
+ mp        |   17 |     17
+ mr        |   17 |     17
+ mw        |   17 |     17
+ nk        |   17 |     17
+ no        |   17 |     17
+ o0        |   17 |     17
+ o4        |   17 |     17
+ s0        |   17 |     17
+ s1        |   17 |     17
+ t4        |   17 |     17
+ u9        |   17 |     17
+ vf        |   17 |     17
+ vx        |   17 |     17
+ x3        |   17 |     17
+ xi        |   17 |     17
+ xn        |   17 |     17
+ xz        |   17 |     17
+ zl        |   17 |     17
+ zn        |   17 |     17
+ a0        |   16 |     16
+ bu        |   16 |     16
+ bw        |   16 |     16
+ ci        |   16 |     16
+ ck        |   16 |     16
+ d0        |   16 |     16
+ d4        |   16 |     16
+ d6        |   16 |     16
+ f5        |   16 |     16
+ g1        |   16 |     16
+ gz        |   16 |     16
+ h4        |   16 |     16
+ jh        |   16 |     16
+ l4        |   16 |     16
+ lt        |   16 |     16
+ mg        |   16 |     16
+ mh        |   16 |     16
+ mo        |   16 |     16
+ ni        |   16 |     16
+ nl        |   16 |     16
+ nq        |   16 |     16
+ p2        |   16 |     16
+ u8        |   16 |     16
+ v9        |   16 |     16
+ vl        |   16 |     16
+ vo        |   16 |     16
+ xp        |   16 |     16
+ y3        |   16 |     16
+ y7        |   16 |     16
+ z7        |   16 |     16
+ za        |   16 |     16
+ zx        |   16 |     16
+ bf        |   15 |     15
+ bp        |   15 |     15
+ cc        |   15 |     15
+ g0        |   15 |     15
+ j2        |   15 |     15
+ j9        |   15 |     15
+ l6        |   15 |     15
+ le        |   15 |     15
+ ll        |   15 |     15
+ m8        |   15 |     15
+ ma        |   15 |     15
+ mu        |   15 |     15
+ nf        |   15 |     15
+ r6        |   15 |     15
+ s5        |   15 |     15
+ vd        |   15 |     15
+ vk        |   15 |     15
+ xa        |   15 |     15
+ xw        |   15 |     15
+ y2        |   15 |     15
+ z8        |   15 |     15
+ ze        |   15 |     15
+ zu        |   15 |     15
+ a6        |   14 |     14
+ bk        |   14 |     14
+ bt        |   14 |     14
+ c0        |   14 |     14
+ f8        |   14 |     14
+ g3        |   14 |     14
+ g4        |   14 |     14
+ g7        |   14 |     14
+ h6        |   14 |     14
+ h7        |   14 |     14
+ h9        |   14 |     14
+ i1        |   14 |     14
+ k1        |   14 |     14
+ k2        |   14 |     14
+ k6        |   14 |     14
+ k7        |   14 |     14
+ mc        |   14 |     14
+ nn        |   14 |     14
+ p9        |   14 |     14
+ u6        |   14 |     14
+ xd        |   14 |     14
+ z6        |   14 |     14
+ zk        |   14 |     14
+ a4        |   13 |     13
+ a9        |   13 |     13
+ bm        |   13 |     13
+ cz        |   13 |     13
+ f2        |   13 |     13
+ f3        |   13 |     13
+ f6        |   13 |     13
+ g6        |   13 |     13
+ h2        |   13 |     13
+ j1        |   13 |     13
+ k5        |   13 |     13
+ m1        |   13 |     13
+ mf        |   13 |     13
+ mq        |   13 |     13
+ np        |   13 |     13
+ nx        |   13 |     13
+ o5        |   13 |     13
+ p0        |   13 |     13
+ p1        |   13 |     13
+ s6        |   13 |     13
+ s9        |   13 |     13
+ v6        |   13 |     13
+ va        |   13 |     13
+ vc        |   13 |     13
+ xc        |   13 |     13
+ z0        |   13 |     13
+ c9        |   12 |     12
+ d1        |   12 |     12
+ h0        |   12 |     12
+ h1        |   12 |     12
+ j8        |   12 |     12
+ k4        |   12 |     12
+ l5        |   12 |     12
+ l9        |   12 |     12
+ m2        |   12 |     12
+ m6        |   12 |     12
+ m9        |   12 |     12
+ n7        |   12 |     12
+ nv        |   12 |     12
+ p3        |   12 |     12
+ vq        |   12 |     12
+ vy        |   12 |     12
+ x1        |   12 |     12
+ x2        |   12 |     12
+ z5        |   12 |     12
+ c1        |   11 |     11
+ c3        |   11 |     11
+ ct        |   11 |     11
+ f9        |   11 |     11
+ g5        |   11 |     11
+ j6        |   11 |     11
+ l8        |   11 |     11
+ n1        |   11 |     11
+ v7        |   11 |     11
+ vv        |   11 |     11
+ x5        |   11 |     11
+ x8        |   11 |     11
+ z2        |   11 |     11
+ b0        |   10 |     10
+ b2        |   10 |     10
+ b8        |   10 |     10
+ c6        |   10 |     10
+ f0        |   10 |     10
+ f7        |   10 |     10
+ h5        |   10 |     10
+ j3        |   10 |     10
+ j4        |   10 |     10
+ j7        |   10 |     10
+ l7        |   10 |     10
+ m0        |   10 |     10
+ m7        |   10 |     10
+ mm        |   10 |     10
+ mn        |   10 |     10
+ n8        |   10 |     10
+ v1        |   10 |     10
+ x0        |   10 |     10
+ x6        |   10 |     10
+ x7        |   10 |     10
+ x9        |   10 |     10
+ a8        |    9 |      9
+ b1        |    9 |      9
+ b4        |    9 |      9
+ b5        |    9 |      9
+ b6        |    9 |      9
+ ba        |    9 |      9
+ bs        |    9 |      9
+ c5        |    9 |      9
+ d5        |    9 |      9
+ k8        |    9 |      9
+ l0        |    9 |      9
+ m5        |    9 |      9
+ mk        |    9 |      9
+ ms        |    9 |      9
+ n3        |    9 |      9
+ n4        |    9 |      9
+ n6        |    9 |      9
+ ne        |    9 |      9
+ v0        |    9 |      9
+ v3        |    9 |      9
+ v5        |    9 |      9
+ v8        |    9 |      9
+ b3        |    8 |      8
+ b7        |    8 |      8
+ c2        |    8 |      8
+ c7        |    8 |      8
+ c8        |    8 |      8
+ d9        |    8 |      8
+ k3        |    8 |      8
+ l3        |    8 |      8
+ m3        |    8 |      8
+ m4        |    8 |      8
+ n0        |    8 |      8
+ n5        |    8 |      8
+ v4        |    8 |      8
+ x4        |    8 |      8
+ z1        |    8 |      8
+ z9        |    8 |      8
+ l2        |    7 |      7
+ s2        |    7 |      7
+ z4        |    7 |      7
+ 1l        |    6 |      6
+ 1o        |    6 |      6
+ 1t        |    6 |      6
+ 2e        |    6 |      6
+ 2o        |    6 |      6
+ c4        |    6 |      6
+ h3        |    6 |      6
+ n2        |    6 |      6
+ n9        |    6 |      6
+ v2        |    6 |      6
+ 2l        |    5 |      5
+ 2u        |    5 |      5
+ 3k        |    5 |      5
+ 4p        |    5 |      5
+ 18        |    4 |      4
+ 1a        |    4 |      4
+ 1i        |    4 |      4
+ 2s        |    4 |      4
+ 3q        |    4 |      4
+ 3y        |    4 |      4
+ 5y        |    4 |      4
+ 1f        |    3 |      3
+ 1h        |    3 |      3
+ 1m        |    3 |      3
+ 1p        |    3 |      3
+ 1s        |    3 |      3
+ 1v        |    3 |      3
+ 1x        |    3 |      3
+ 27        |    3 |      3
+ 2a        |    3 |      3
+ 2b        |    3 |      3
+ 2h        |    3 |      3
+ 2n        |    3 |      3
+ 2p        |    3 |      3
+ 2v        |    3 |      3
+ 2y        |    3 |      3
+ 3d        |    3 |      3
+ 3w        |    3 |      3
+ 3z        |    3 |      3
+ 4a        |    3 |      3
+ 4d        |    3 |      3
+ 4v        |    3 |      3
+ 4z        |    3 |      3
+ 5e        |    3 |      3
+ 5i        |    3 |      3
+ 5k        |    3 |      3
+ 5o        |    3 |      3
+ 5t        |    3 |      3
+ 6b        |    3 |      3
+ 6d        |    3 |      3
+ 6o        |    3 |      3
+ 6w        |    3 |      3
+ 7a        |    3 |      3
+ 7h        |    3 |      3
+ 7r        |    3 |      3
+ 93        |    3 |      3
+ 10        |    2 |      2
+ 12        |    2 |      2
+ 15        |    2 |      2
+ 16        |    2 |      2
+ 19        |    2 |      2
+ 1b        |    2 |      2
+ 1d        |    2 |      2
+ 1g        |    2 |      2
+ 1j        |    2 |      2
+ 1n        |    2 |      2
+ 1r        |    2 |      2
+ 1u        |    2 |      2
+ 1w        |    2 |      2
+ 1y        |    2 |      2
+ 20        |    2 |      2
+ 25        |    2 |      2
+ 2d        |    2 |      2
+ 2i        |    2 |      2
+ 2j        |    2 |      2
+ 2k        |    2 |      2
+ 2q        |    2 |      2
+ 2r        |    2 |      2
+ 2t        |    2 |      2
+ 2w        |    2 |      2
+ 2z        |    2 |      2
+ 3b        |    2 |      2
+ 3f        |    2 |      2
+ 3h        |    2 |      2
+ 3o        |    2 |      2
+ 3p        |    2 |      2
+ 3r        |    2 |      2
+ 3s        |    2 |      2
+ 3v        |    2 |      2
+ 42        |    2 |      2
+ 43        |    2 |      2
+ 4f        |    2 |      2
+ 4g        |    2 |      2
+ 4h        |    2 |      2
+ 4j        |    2 |      2
+ 4m        |    2 |      2
+ 4r        |    2 |      2
+ 4s        |    2 |      2
+ 4t        |    2 |      2
+ 4u        |    2 |      2
+ 5c        |    2 |      2
+ 5f        |    2 |      2
+ 5h        |    2 |      2
+ 5p        |    2 |      2
+ 5q        |    2 |      2
+ 5z        |    2 |      2
+ 6a        |    2 |      2
+ 6h        |    2 |      2
+ 6q        |    2 |      2
+ 6r        |    2 |      2
+ 6t        |    2 |      2
+ 6y        |    2 |      2
+ 70        |    2 |      2
+ 7c        |    2 |      2
+ 7g        |    2 |      2
+ 7k        |    2 |      2
+ 7o        |    2 |      2
+ 7u        |    2 |      2
+ 8j        |    2 |      2
+ 8w        |    2 |      2
+ 9f        |    2 |      2
+ 9y        |    2 |      2
+ copyright |    2 |      2
+ foo       |    1 |      3
+ bar       |    1 |      2
+ 0e        |    1 |      1
+ 0h        |    1 |      1
+ 0p        |    1 |      1
+ 0w        |    1 |      1
+ 0z        |    1 |      1
+ 11        |    1 |      1
+ 13        |    1 |      1
+ 14        |    1 |      1
+ 17        |    1 |      1
+ 1k        |    1 |      1
+ 1q        |    1 |      1
+ 1z        |    1 |      1
+ 24        |    1 |      1
+ 26        |    1 |      1
+ 28        |    1 |      1
+ 2f        |    1 |      1
+ 30        |    1 |      1
+ 345       |    1 |      1
+ 37        |    1 |      1
+ 39        |    1 |      1
+ 3a        |    1 |      1
+ 3e        |    1 |      1
+ 3g        |    1 |      1
+ 3i        |    1 |      1
+ 3m        |    1 |      1
+ 3t        |    1 |      1
+ 3u        |    1 |      1
+ 40        |    1 |      1
+ 41        |    1 |      1
+ 44        |    1 |      1
+ 45        |    1 |      1
+ 48        |    1 |      1
+ 4b        |    1 |      1
+ 4c        |    1 |      1
+ 4i        |    1 |      1
+ 4k        |    1 |      1
+ 4n        |    1 |      1
+ 4o        |    1 |      1
+ 4q        |    1 |      1
+ 4w        |    1 |      1
+ 4y        |    1 |      1
+ 51        |    1 |      1
+ 55        |    1 |      1
+ 56        |    1 |      1
+ 5a        |    1 |      1
+ 5d        |    1 |      1
+ 5g        |    1 |      1
+ 5j        |    1 |      1
+ 5l        |    1 |      1
+ 5s        |    1 |      1
+ 5u        |    1 |      1
+ 5x        |    1 |      1
+ 64        |    1 |      1
+ 68        |    1 |      1
+ 6c        |    1 |      1
+ 6f        |    1 |      1
+ 6g        |    1 |      1
+ 6i        |    1 |      1
+ 6k        |    1 |      1
+ 6n        |    1 |      1
+ 6p        |    1 |      1
+ 6s        |    1 |      1
+ 6u        |    1 |      1
+ 6x        |    1 |      1
+ 72        |    1 |      1
+ 7f        |    1 |      1
+ 7j        |    1 |      1
+ 7n        |    1 |      1
+ 7p        |    1 |      1
+ 7w        |    1 |      1
+ 7y        |    1 |      1
+ 7z        |    1 |      1
+ 80        |    1 |      1
+ 82        |    1 |      1
+ 85        |    1 |      1
+ 8d        |    1 |      1
+ 8i        |    1 |      1
+ 8l        |    1 |      1
+ 8n        |    1 |      1
+ 8p        |    1 |      1
+ 8t        |    1 |      1
+ 8x        |    1 |      1
+ 95        |    1 |      1
+ 97        |    1 |      1
+ 9a        |    1 |      1
+ 9e        |    1 |      1
+ 9h        |    1 |      1
+ 9r        |    1 |      1
+ 9w        |    1 |      1
+ qwerti    |    1 |      1
+(1146 rows)
+
+select reset_tsearch();
+NOTICE:  TSearch cache cleaned
+ reset_tsearch 
+---------------
+ 
+(1 row)
+
+select to_tsquery('default', 'skies & books');
+   to_tsquery   
+----------------
+ 'sky' & 'book'
+(1 row)
+
+select rank_cd(to_tsvector('Erosion It took the sea a thousand years,
+A thousand years to trace
+The granite features of this cliff
+In crag and scarp and base.
+It took the sea an hour one night
+An hour of storm to place
+The sculpture of these granite seams,
+Upon a woman s face. E.  J.  Pratt  (1882 1964)
+'), to_tsquery('sea&thousand&years'));
+ rank_cd 
+---------
+     1.2
+(1 row)
+
+select rank_cd(to_tsvector('Erosion It took the sea a thousand years,
+A thousand years to trace
+The granite features of this cliff
+In crag and scarp and base.
+It took the sea an hour one night
+An hour of storm to place
+The sculpture of these granite seams,
+Upon a woman s face. E.  J.  Pratt  (1882 1964)
+'), to_tsquery('granite&sea'));
+ rank_cd  
+----------
+ 0.880303
+(1 row)
+
+select rank_cd(to_tsvector('Erosion It took the sea a thousand years,
+A thousand years to trace
+The granite features of this cliff
+In crag and scarp and base.
+It took the sea an hour one night
+An hour of storm to place
+The sculpture of these granite seams,
+Upon a woman s face. E.  J.  Pratt  (1882 1964)
+'), to_tsquery('sea'));
+ rank_cd 
+---------
+       2
+(1 row)
+
+select get_covers(to_tsvector('Erosion It took the sea a thousand years,
+A thousand years to trace
+The granite features of this cliff
+In crag and scarp and base.
+It took the sea an hour one night
+An hour of storm to place
+The sculpture of these granite seams,
+Upon a woman s face. E.  J.  Pratt  (1882 1964)
+'), to_tsquery('sea&thousand&years'));
+                                                                                             get_covers                                                                                             
+----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
+ eros took {1 sea thousand year }1 {2 thousand year trace granit featur cliff crag scarp base took sea }2 hour one night hour storm place sculptur granit seam upon woman face e j pratt 1882 1964 
+(1 row)
+
+select get_covers(to_tsvector('Erosion It took the sea a thousand years,
+A thousand years to trace
+The granite features of this cliff
+In crag and scarp and base.
+It took the sea an hour one night
+An hour of storm to place
+The sculpture of these granite seams,
+Upon a woman s face. E.  J.  Pratt  (1882 1964)
+'), to_tsquery('granite&sea'));
+                                                                                                get_covers                                                                                                
+----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
+ eros took {1 sea thousand year thousand year trace {2 granit }1 featur cliff crag scarp base took {3 sea }2 hour one night hour storm place sculptur granit }3 seam upon woman face e j pratt 1882 1964 
+(1 row)
+
+select get_covers(to_tsvector('Erosion It took the sea a thousand years,
+A thousand years to trace
+The granite features of this cliff
+In crag and scarp and base.
+It took the sea an hour one night
+An hour of storm to place
+The sculpture of these granite seams,
+Upon a woman s face. E.  J.  Pratt  (1882 1964)
+'), to_tsquery('sea'));
+                                                                                             get_covers                                                                                             
+----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
+ eros took {1 sea }1 thousand year thousand year trace granit featur cliff crag scarp base took {2 sea }2 hour one night hour storm place sculptur granit seam upon woman face e j pratt 1882 1964 
+(1 row)
+
+select headline('Erosion It took the sea a thousand years,
+A thousand years to trace
+The granite features of this cliff
+In crag and scarp and base.
+It took the sea an hour one night
+An hour of storm to place
+The sculpture of these granite seams,
+Upon a woman s face. E.  J.  Pratt  (1882 1964)
+', to_tsquery('sea&thousand&years'));
+                                                       headline                                                        
+-----------------------------------------------------------------------------------------------------------------------
+ sea a thousand years,
+A thousand years to trace
+The granite features of this cliff
+(1 row)
+
+ 
+select headline('Erosion It took the sea a thousand years,
+A thousand years to trace
+The granite features of this cliff
+In crag and scarp and base.
+It took the sea an hour one night
+An hour of storm to place
+The sculpture of these granite seams,
+Upon a woman s face. E.  J.  Pratt  (1882 1964)
+', to_tsquery('granite&sea'));
+                                           headline                                           
+----------------------------------------------------------------------------------------------
+ sea an hour one night
+An hour of storm to place
+The sculpture of these granite
+(1 row)
+
+ 
+select headline('Erosion It took the sea a thousand years,
+A thousand years to trace
+The granite features of this cliff
+In crag and scarp and base.
+It took the sea an hour one night
+An hour of storm to place
+The sculpture of these granite seams,
+Upon a woman s face. E.  J.  Pratt  (1882 1964)
+', to_tsquery('sea'));
+                                         headline                                          
+-------------------------------------------------------------------------------------------
+ sea a thousand years,
+A thousand years to trace
+The granite features of this cliff
+(1 row)
+


diff --git a/contrib/tsearch2/gendict/Makefile.IN b/contrib/tsearch2/gendict/Makefile.IN

new file mode 100644 (file)

index 0000000..c13e496


--- /dev/null
+++ b/contrib/tsearch2/gendict/Makefile.IN
@@ -0,0 +1,12 @@
+subdir = contrib/CFG_DIR
+top_builddir = ../..
+include $(top_builddir)/src/Makefile.global
+
+MODULE_big = dict_CFG_MODNAME
+OBJS = CFG_OFILE
+DATA_built = dict_CFG_MODNAME.sql
+DOCS = README.CFG_MODNAME
+PG_CPPFLAGS =
+SHLIB_LINK = ../tsearch2/libtsearch2.a
+
+include $(top_srcdir)/contrib/contrib-global.mk


diff --git a/contrib/tsearch2/gendict/README.gendict b/contrib/tsearch2/gendict/README.gendict

new file mode 100644 (file)

index 0000000..e91f1b7


--- /dev/null
+++ b/contrib/tsearch2/gendict/README.gendict
@@ -0,0 +1,130 @@
+Gendict - generate dictionary templates for contrib/tsearch2 module.
+
+This utility aims to help people creating dictionary for contrib/tsearch v2
+module. Particularly, it has built-in support for snowball stemmers.
+
+Programming API to tsearch2 dictionaries is described in tsearch v2 
+documentation.
+
+
+Prerequisities:
+
+* PostgreSQL 7.3 and above.
+
+* You need tsearch2 module sources already compiled
+
+* Rights to install contrib modules
+
+Usage:
+
+    run config.sh without parameters to see options and arguments
+
+Usage:
+./config.sh -n DICTNAME ( [ -s [ -p PREFIX ] ] | [ -c CFILES ] [ -h HFILES ] [ -i ] ) [ -v ] [ -d DIR ] [ -C COMMENT ]
+    -v - be verbose
+    -d DIR - name of directory in PGSQL_SRC/contrib (default dict_DICTNAME)
+    -C COMMENT - dictionary comment
+Generate Snowball stemmer:
+./config.sh -n DICTNAME -s [ -p PREFIX ] [ -v ] [ -d DIR ] [ -C COMMENT ]
+    -s - generate Snowball wrapper
+    -p - prefix of Snowball's function, (default DICTNAME)
+Generate template dictionary:
+./config.sh -n DICTNAME [ -c CFILES ] [ -h HFILES ] [ -i ] [ -v ] [ -d DIR ] [ -C COMMENT ]
+    -c CFILES - source files, must be placed in contrib/tsearch2/gendict directory.
+                These files will be used in Makefile.
+    -h HFILES - header files, must be placed in contrib/tsearch2/gendict directory.
+                These files will be used in Makefile and subinclude.h
+    -i - dictionary has init method
+
+
+Example 1:
+
+   Create Portuguese stemmer
+ 
+   0. cd PGSQL_SRC/contrib/tsearch2/gendict
+
+   1. Obtain stem.{c,h} files for Portuguese
+
+      wget http://snowball.tartarus.org/portuguese/stem.c
+      wget http://snowball.tartarus.org/portuguese/stem.h
+   
+   2. Create template files for Portuguese
+
+      ./config.sh -n pt -s -p portuguese -v -C'Snowball stemmer for Portuguese'
+
+      Note, that argument for -p option should be *the same* as name of stemming
+      function in stem.c (without _stem)
+
+      A bunch of files will be generated and placed in PGSQL_SRC/contrib/dict_pt
+      directory.
+
+   3. Compile and install dictionary
+
+   cd PGSQL_SRC/contrib/dict_pt
+   make
+   make install
+
+   4. Test it 
+
+   Sample portuguese words with the stemmed forms are available
+        from http://snowball.tartarus.org/portuguese/stemmer.html
+
+   createdb testdict
+   psql testdict < /usr/local/pgsql/share/contrib/tsearch2.sql
+   psql testdict < /usr/local/pgsql/share/contrib/dict_pt.sql
+   psql -d testdict -c "select lexize('pt','bobagem');"
+    lexize  
+   ---------
+    {bobag}
+   (1 row)
+
+   Here is what I have in pg_ts_dict table
+
+   psql -d testdict -c "select * from pg_ts_dict where dict_name='pt';"
+    dict_name | dict_init | dict_initoption | dict_lexize |          dict_comment           
+   -----------+-----------+-----------------+-------------+---------------------------------
+    pt        |   7177806 |                 |     7159330 | Snowball stemmer for Portuguese
+   (1 row)
+
+ 
+        Note, that you have already installed dictionary and corresponding
+   entry in tsearch configuration and you may modify it using
+   plain SQL commands, for example, specify stop words.
+
+Example 2:
+
+      a) Simple template dictionary with init method 
+
+       ./config.sh -n wow -v -i -C WOW
+
+      b) Create simple template dict (without init method):
+   ./config.sh -n wow -v  -C WOW
+
+        The same as above, but dictionary will have not init method
+
+       Dictionaries obtained in a) and b) are fully working and ready
+       for use: 
+     a) lowercase input word and remove it if it is a stop word
+     b) recognizes any word
+
+      c) Simple template dictionary with source files (with init method):
+
+       ./config.sh -n wow -v -i -c a.c -h a.h -C WOW
+
+        Source files ( a.c ) must be placed in contrib/tsearch2/gendict directory.
+        These files will be used in Makefile.
+
+        Header files ( a.h ), must be placed in contrib/tsearch2/gendict directory.
+        These files will be used in Makefile and subinclude.h
+
+      d) Simple template dictionary with source files (without init method):
+
+   ./config.sh -n wow -v  -c a.c -h a.h -C WOW
+
+   The same as above, but dictionary will have not init method
+
+       After that you have sources in PGSQL_SRC/contrib/dict_wow and
+       you may edit them to create actual dictionary.
+
+  Please, check Tsearch2 home page (http://www.sai.msu.su/~megera/postgres/gist/tsearch/V2/)
+  for additional information about "Gendict tutorial" and dictionaries.
\ No newline at end of file


diff --git a/contrib/tsearch2/gendict/config.sh b/contrib/tsearch2/gendict/config.sh

new file mode 100755 (executable)

index 0000000..26bb542


--- /dev/null
+++ b/contrib/tsearch2/gendict/config.sh
@@ -0,0 +1,183 @@
+#!/bin/sh
+
+usage () {
+   echo Usage:
+   echo $0 -n DICTNAME  \( [ -s [ -p PREFIX ] ] \| [ -c CFILES ] [ -h HFILES ] [ -i ] \) [ -v ] [ -d DIR ] [ -C COMMENT ]
+   echo '    -v - be verbose'
+   echo '    -d DIR - name of directory in PGSQL_SRL/contrib (default dict_DICTNAME)'
+   echo '    -C COMMENT - dictionary comment' 
+   echo Generate Snowball stemmer:
+   echo $0 -n DICTNAME -s [ -p PREFIX ] [ -v ] [ -d DIR ] [ -C COMMENT ]
+   echo '    -s - generate Snowball wrapper'
+   echo "    -p - prefix of Snowball's function, (default DICTNAME)" 
+   echo Generate template dictionary:
+   echo $0 -n DICTNAME [ -c CFILES ] [ -h HFILES ] [ -i ] [ -v ] [ -d DIR ] [ -C COMMENT ]
+   echo '    -c CFILES - source files, must be placed in contrib/tsearch2/gendict directory.'
+   echo '                These files will be used in Makefile.'
+   echo '    -h HFILES - header files, must be placed in contrib/tsearch2/gendict directory.'
+   echo '                These files will be used in Makefile and subinclude.h'
+   echo '    -i - dictionary has init method'
+   exit 1;
+}
+
+dictname=
+stemmode=no
+verbose=no
+cfile=
+hfile=
+dir= 
+hasinit=no
+comment=
+prefix=
+
+while getopts n:c:C:h:d:p:vis opt
+do
+   case "$opt" in
+       v) verbose=yes;;
+       s) stemmode=yes;;
+       i) hasinit=yes;;
+       n) dictname="$OPTARG";;
+       c) cfile="$OPTARG";;
+       h) hfile="$OPTARG";;
+       d) dir="$OPTARG";;
+       C) comment="$OPTARG";;
+       p) prefix="$OPTARG";;
+       \?) usage;;
+   esac
+done
+
+[ ${#dictname} -eq 0 ] && usage
+
+dictname=`echo $dictname | tr '[:upper:]' '[:lower:]'`
+
+if [ $stemmode = "yes" ] ; then 
+   [ ${#prefix} -eq 0 ] && prefix=$dictname
+   hasinit=yes
+   cfile="stem.c"
+   hfile="stem.h"
+fi 
+
+[ ${#dir}   -eq 0 ] && dir="dict_$dictname"
+
+if [ ${#comment} -eq 0 ]; then
+   comment=null
+else
+   comment="'$comment'"
+fi
+
+ofile=
+for f in $cfile
+do
+   f=` echo $f | sed 's#c$#o#'`
+   ofile="$ofile $f"
+done
+
+if [ $stemmode = "yes" ] ; then
+   ofile="$ofile dict_snowball.o"
+else
+   ofile="$ofile dict_tmpl.o"
+fi
+
+if [ $verbose = "yes" ]; then
+   echo Dictname: "'"$dictname"'"
+   echo Snowball stemmer: $stemmode
+   echo Has init method: $hasinit
+   [ $stemmode = "yes" ] && echo Function prefix: $prefix 
+   echo Source files: $cfile
+   echo Header files: $hfile
+   echo Object files: $ofile
+   echo Comment: $comment
+   echo Directory: ../../$dir
+fi
+
+
+[ $verbose = "yes" ] && echo -n 'Build directory...  '
+if [ ! -d ../../$dir ]; then
+   if ! mkdir ../../$dir ; then 
+       echo "Can't create directory ../../$dir"
+       exit 1
+   fi 
+fi
+[ $verbose = "yes" ] && echo ok
+
+
+[ $verbose = "yes" ] && echo -n 'Build Makefile...  '
+sed s#CFG_DIR#$dir# < Makefile.IN | sed s#CFG_MODNAME#$dictname# | sed "s#CFG_OFILE#$ofile#" > ../../$dir/Makefile.tmp
+if [ $stemmode = "yes" ] ; then
+   sed "s#^PG_CPPFLAGS.*\$#PG_CPPFLAGS = -I../tsearch2/snowball -I../tsearch2#" < ../../$dir/Makefile.tmp >  ../../$dir/Makefile 
+else
+   sed "s#^PG_CPPFLAGS.*\$#PG_CPPFLAGS = -I../tsearch2#" < ../../$dir/Makefile.tmp >  ../../$dir/Makefile 
+fi
+rm ../../$dir/Makefile.tmp
+[ $verbose = "yes" ] && echo ok
+
+
+[ $verbose = "yes" ] && echo -n Build dict_$dictname'.sql.in...  '
+if [ $hasinit = "yes" ]; then
+   sed s#CFG_MODNAME#$dictname# < sql.IN | sed "s#CFG_COMMENT#$comment#" | sed s#^HASINIT## | sed 's#^NOINIT.*$##' > ../../$dir/dict_$dictname.sql.in.tmp
+   if [ $stemmode = "yes" ] ; then
+       sed s#^ISSNOWBALL## < ../../$dir/dict_$dictname.sql.in.tmp | sed s#^NOSNOWBALL.*\$## > ../../$dir/dict_$dictname.sql.in
+   else
+       sed s#^NOSNOWBALL## < ../../$dir/dict_$dictname.sql.in.tmp | sed s#^ISSNOWBALL.*\$## > ../../$dir/dict_$dictname.sql.in
+   fi
+   rm ../../$dir/dict_$dictname.sql.in.tmp 
+else 
+   sed s#CFG_MODNAME#$dictname# < sql.IN | sed "s#CFG_COMMENT#$comment#" | sed s#^NOINIT## | sed 's#^HASINIT.*$##' | sed s#^NOSNOWBALL## | sed s#^ISSNOWBALL.*\$## > ../../$dir/dict_$dictname.sql.in
+fi
+[ $verbose = "yes" ] && echo ok
+
+
+
+if [ ${#cfile} -ne 0 ] || [ ${#hfile} -ne 0 ] ; then
+   [ $verbose = "yes" ] && echo -n 'Copy source and header files...  '
+   if [ ${#cfile} -ne 0 ] ; then
+       if ! cp $cfile ../../$dir ; then 
+           echo "Cant cp all or one of files: $cfile"
+           exit 1
+       fi
+   fi
+   if [ ${#hfile} -ne 0 ] ; then 
+       if ! cp $hfile ../../$dir ; then 
+               echo "Cant cp all or one of files: $hfile"
+           exit 1
+       fi
+   fi
+   [ $verbose = "yes" ] && echo ok
+fi
+
+
+[ $verbose = "yes" ] && echo -n 'Build sub-include header...  '
+echo -n > ../../$dir/subinclude.h 
+for i in $hfile
+do
+   echo "#include \"$i\"" >> ../../$dir/subinclude.h
+done
+[ $verbose = "yes" ] && echo ok
+
+
+if  [ $stemmode = "yes" ] ; then 
+   [ $verbose = "yes" ] && echo -n 'Build Snowball stemmer...  '
+   sed s#CFG_MODNAME#$dictname#g < dict_snowball.c.IN | sed s#CFG_PREFIX#$prefix#g > ../../$dir/dict_snowball.c
+else
+   [ $verbose = "yes" ] && echo -n 'Build dictinonary...  '
+   sed s#CFG_MODNAME#$dictname#g < dict_tmpl.c.IN > ../../$dir/dict_tmpl.c.tmp
+   if [ $hasinit = "yes" ]; then
+       sed s#^HASINIT## <  ../../$dir/dict_tmpl.c.tmp | sed 's#^NOINIT.*$##' > ../../$dir/dict_tmpl.c
+   else 
+       sed s#^HASINIT.*\$## <  ../../$dir/dict_tmpl.c.tmp | sed 's#^NOINIT##' > ../../$dir/dict_tmpl.c
+   fi
+   rm ../../$dir/dict_tmpl.c.tmp
+fi 
+[ $verbose = "yes" ] && echo ok
+
+
+[ $verbose = "yes" ] && echo -n "Build README.$dictname...  "
+if  [ $stemmode = "yes" ] ; then
+   echo "Autogenerated Snowball's wrapper for $prefix" > ../../$dir/README.$dictname
+else
+   echo "Autogenerated template for $dictname" > ../../$dir/README.$dictname
+fi
+[ $verbose = "yes" ] && echo ok
+
+echo All is done
+


diff --git a/contrib/tsearch2/gendict/dict_snowball.c.IN b/contrib/tsearch2/gendict/dict_snowball.c.IN

new file mode 100644 (file)

index 0000000..10ef6f1


--- /dev/null
+++ b/contrib/tsearch2/gendict/dict_snowball.c.IN
@@ -0,0 +1,52 @@
+/* 
+ * example of Snowball dictionary
+ * http://snowball.tartarus.org/ 
+ * Teodor Sigaev 
+ */
+#include 
+#include 
+
+#include "postgres.h"
+
+#include "dict.h"
+#include "common.h"
+#include "snowball/header.h"
+#include "subinclude.h"
+
+typedef struct {
+   struct SN_env *z;
+   StopList    stoplist;
+   int (*stem)(struct SN_env * z);
+} DictSnowball;
+
+
+PG_FUNCTION_INFO_V1(dinit_CFG_MODNAME);
+Datum dinit_CFG_MODNAME(PG_FUNCTION_ARGS);
+
+Datum 
+dinit_CFG_MODNAME(PG_FUNCTION_ARGS) {
+   DictSnowball    *d = (DictSnowball*)malloc( sizeof(DictSnowball) );
+
+   if ( !d )
+       elog(ERROR, "No memory");
+   memset(d,0,sizeof(DictSnowball));
+   d->stoplist.wordop=lowerstr;
+       
+   if ( !PG_ARGISNULL(0) && PG_GETARG_POINTER(0)!=NULL ) {
+       text       *in = PG_GETARG_TEXT_P(0);
+       readstoplist(in, &(d->stoplist));
+       sortstoplist(&(d->stoplist));
+       PG_FREE_IF_COPY(in, 0);
+   }
+
+   d->z = CFG_PREFIX_create_env();
+   if (!d->z) {
+       freestoplist(&(d->stoplist));
+       elog(ERROR,"No memory");
+   }
+   d->stem=CFG_PREFIX_stem;
+
+   PG_RETURN_POINTER(d);
+}
+
+


diff --git a/contrib/tsearch2/gendict/dict_tmpl.c.IN b/contrib/tsearch2/gendict/dict_tmpl.c.IN

new file mode 100644 (file)

index 0000000..10c0381


--- /dev/null
+++ b/contrib/tsearch2/gendict/dict_tmpl.c.IN
@@ -0,0 +1,64 @@
+/* 
+ * example of dictionary 
+ * Teodor Sigaev 
+ */
+#include 
+#include 
+#include 
+
+#include "postgres.h"
+
+#include "dict.h"
+#include "common.h"
+
+#include "subinclude.h"
+
+HASINIT typedef struct {
+HASINIT    StopList    stoplist;
+HASINIT } DictExample;
+
+
+HASINIT PG_FUNCTION_INFO_V1(dinit_CFG_MODNAME);
+HASINIT Datum dinit_CFG_MODNAME(PG_FUNCTION_ARGS);
+
+HASINIT Datum 
+HASINIT dinit_CFG_MODNAME(PG_FUNCTION_ARGS) {
+HASINIT    DictExample *d = (DictExample*)malloc( sizeof(DictExample) );
+HASINIT 
+HASINIT    if ( !d )
+HASINIT        elog(ERROR, "No memory");
+HASINIT    memset(d,0,sizeof(DictExample));
+HASINIT 
+HASINIT    d->stoplist.wordop=lowerstr;
+HASINIT    
+HASINIT    /* Your INIT code */
+HASINIT    
+HASINIT    if ( !PG_ARGISNULL(0) && PG_GETARG_POINTER(0)!=NULL ) {
+HASINIT        text       *in = PG_GETARG_TEXT_P(0);
+HASINIT        readstoplist(in, &(d->stoplist));
+HASINIT        sortstoplist(&(d->stoplist));
+HASINIT        PG_FREE_IF_COPY(in, 0);
+HASINIT    }
+HASINIT 
+HASINIT    PG_RETURN_POINTER(d);
+HASINIT }
+
+PG_FUNCTION_INFO_V1(dlexize_CFG_MODNAME);
+Datum dlexize_CFG_MODNAME(PG_FUNCTION_ARGS);
+Datum
+dlexize_CFG_MODNAME(PG_FUNCTION_ARGS) {
+HASINIT    DictExample *d = (DictExample*)PG_GETARG_POINTER(0);
+   char       *in = (char*)PG_GETARG_POINTER(1);
+   char *txt = pnstrdup(in, PG_GETARG_INT32(2));
+   char    **res=palloc(sizeof(char*)*2);
+
+   /* Your INIT dictionary code */
+HASINIT    if ( *txt=='\0' || searchstoplist(&(d->stoplist),txt) ) {
+HASINIT        pfree(txt);
+HASINIT        res[0]=NULL;
+HASINIT    } else 
+       res[0]=txt;
+   res[1]=NULL;
+
+   PG_RETURN_POINTER(res);
+}


diff --git a/contrib/tsearch2/gendict/sql.IN b/contrib/tsearch2/gendict/sql.IN

new file mode 100644 (file)

index 0000000..ff0d842


--- /dev/null
+++ b/contrib/tsearch2/gendict/sql.IN
@@ -0,0 +1,26 @@
+SET search_path = public;
+BEGIN;
+
+HASINIT create function dinit_CFG_MODNAME(text)
+HASINIT         returns internal
+HASINIT         as 'MODULE_PATHNAME'
+HASINIT         language 'C';
+
+NOSNOWBALL create function dlexize_CFG_MODNAME(internal,internal,int4)
+NOSNOWBALL        returns internal
+NOSNOWBALL        as 'MODULE_PATHNAME'
+NOSNOWBALL        language 'C'
+NOSNOWBALL        with (isstrict);
+
+insert into pg_ts_dict select
+        'CFG_MODNAME',
+HASINIT        (select oid from pg_proc where proname='dinit_CFG_MODNAME'),
+NOINIT        null,
+        null,
+ISSNOWBALL        (select oid from pg_proc where proname='snb_lexize'),
+NOSNOWBALL        (select oid from pg_proc where proname='dlexize_CFG_MODNAME'),
+        CFG_COMMENT
+;
+
+
+END;


diff --git a/contrib/tsearch2/gistidx.c b/contrib/tsearch2/gistidx.c

new file mode 100644 (file)

index 0000000..5a34f74


--- /dev/null
+++ b/contrib/tsearch2/gistidx.c
@@ -0,0 +1,686 @@
+#include "postgres.h"
+
+#include 
+
+#include "access/gist.h"
+#include "access/itup.h"
+#include "access/rtree.h"
+#include "utils/elog.h"
+#include "utils/palloc.h"
+#include "utils/array.h"
+#include "utils/builtins.h"
+#include "storage/bufpage.h"
+#include "access/tuptoaster.h"
+
+#include "tsvector.h"
+#include "query.h"
+#include "gistidx.h"
+#include "crc32.h"
+
+PG_FUNCTION_INFO_V1(gtsvector_in);
+Datum      gtsvector_in(PG_FUNCTION_ARGS);
+
+PG_FUNCTION_INFO_V1(gtsvector_out);
+Datum      gtsvector_out(PG_FUNCTION_ARGS);
+
+PG_FUNCTION_INFO_V1(gtsvector_compress);
+Datum      gtsvector_compress(PG_FUNCTION_ARGS);
+
+PG_FUNCTION_INFO_V1(gtsvector_decompress);
+Datum      gtsvector_decompress(PG_FUNCTION_ARGS);
+
+PG_FUNCTION_INFO_V1(gtsvector_consistent);
+Datum      gtsvector_consistent(PG_FUNCTION_ARGS);
+
+PG_FUNCTION_INFO_V1(gtsvector_union);
+Datum      gtsvector_union(PG_FUNCTION_ARGS);
+
+PG_FUNCTION_INFO_V1(gtsvector_same);
+Datum      gtsvector_same(PG_FUNCTION_ARGS);
+
+PG_FUNCTION_INFO_V1(gtsvector_penalty);
+Datum      gtsvector_penalty(PG_FUNCTION_ARGS);
+
+PG_FUNCTION_INFO_V1(gtsvector_picksplit);
+Datum      gtsvector_picksplit(PG_FUNCTION_ARGS);
+
+#define GETENTRY(vec,pos) ((GISTTYPE *) DatumGetPointer(((GISTENTRY *) VARDATA(vec))[(pos)].key))
+#define SUMBIT(val) (       \
+   GETBITBYTE(val,0) + \
+   GETBITBYTE(val,1) + \
+   GETBITBYTE(val,2) + \
+   GETBITBYTE(val,3) + \
+   GETBITBYTE(val,4) + \
+   GETBITBYTE(val,5) + \
+   GETBITBYTE(val,6) + \
+   GETBITBYTE(val,7)   \
+)
+
+
+Datum
+gtsvector_in(PG_FUNCTION_ARGS)
+{
+   elog(ERROR, "Not implemented");
+   PG_RETURN_DATUM(0);
+}
+
+Datum
+gtsvector_out(PG_FUNCTION_ARGS)
+{
+   elog(ERROR, "Not implemented");
+   PG_RETURN_DATUM(0);
+}
+
+static int
+compareint(const void *a, const void *b)
+{
+   if (*((int4 *) a) == *((int4 *) b))
+       return 0;
+   return (*((int4 *) a) > *((int4 *) b)) ? 1 : -1;
+}
+
+static int
+uniqueint(int4 *a, int4 l)
+{
+   int4       *ptr,
+              *res;
+
+   if (l == 1)
+       return l;
+
+   ptr = res = a;
+
+   qsort((void *) a, l, sizeof(int4), compareint);
+
+   while (ptr - a < l)
+       if (*ptr != *res)
+           *(++res) = *ptr++;
+       else
+           ptr++;
+   return res + 1 - a;
+}
+
+static void
+makesign(BITVECP sign, GISTTYPE * a)
+{
+   int4        k,
+               len = ARRNELEM(a);
+   int4       *ptr = GETARR(a);
+
+   MemSet((void *) sign, 0, sizeof(BITVEC));
+   for (k = 0; k < len; k++)
+       HASH(sign, ptr[k]);
+}
+
+Datum
+gtsvector_compress(PG_FUNCTION_ARGS)
+{
+   GISTENTRY  *entry = (GISTENTRY *) PG_GETARG_POINTER(0);
+   GISTENTRY  *retval = entry;
+
+   if (entry->leafkey)
+   {                           /* tsvector */
+       GISTTYPE   *res;
+       tsvector       *toastedval = (tsvector *) DatumGetPointer(entry->key);
+       tsvector       *val = (tsvector *) DatumGetPointer(PG_DETOAST_DATUM(entry->key));
+       int4        len;
+       int4       *arr;
+       WordEntry  *ptr = ARRPTR(val);
+       char       *words = STRPTR(val);
+
+       len = CALCGTSIZE(ARRKEY, val->size);
+       res = (GISTTYPE *) palloc(len);
+       res->len = len;
+       res->flag = ARRKEY;
+       arr = GETARR(res);
+       len = val->size;
+       while (len--)
+       {
+           *arr = crc32_sz((uint8 *) &words[ptr->pos], ptr->len);
+           arr++;
+           ptr++;
+       }
+
+       len = uniqueint(GETARR(res), val->size);
+       if (len != val->size)
+       {
+           /*
+            * there is a collision of hash-function; len is always less
+            * than val->size
+            */
+           len = CALCGTSIZE(ARRKEY, len);
+           res = (GISTTYPE *) repalloc((void *) res, len);
+           res->len = len;
+       }
+       if (val != toastedval)
+           pfree(val);
+
+       /* make signature, if array is too long */
+       if (res->len > TOAST_INDEX_TARGET)
+       {
+           GISTTYPE   *ressign;
+
+           len = CALCGTSIZE(SIGNKEY, 0);
+           ressign = (GISTTYPE *) palloc(len);
+           ressign->len = len;
+           ressign->flag = SIGNKEY;
+           makesign(GETSIGN(ressign), res);
+           pfree(res);
+           res = ressign;
+       }
+
+       retval = (GISTENTRY *) palloc(sizeof(GISTENTRY));
+       gistentryinit(*retval, PointerGetDatum(res),
+                     entry->rel, entry->page,
+                     entry->offset, res->len, FALSE);
+   }
+   else if (ISSIGNKEY(DatumGetPointer(entry->key)) &&
+            !ISALLTRUE(DatumGetPointer(entry->key)))
+   {
+       int4        i,
+                   len;
+       GISTTYPE   *res;
+       BITVECP     sign = GETSIGN(DatumGetPointer(entry->key));
+
+       LOOPBYTE(
+                if ((sign[i] & 0xff) != 0xff)
+                PG_RETURN_POINTER(retval);
+       );
+
+       len = CALCGTSIZE(SIGNKEY | ALLISTRUE, 0);
+       res = (GISTTYPE *) palloc(len);
+       res->len = len;
+       res->flag = SIGNKEY | ALLISTRUE;
+
+       retval = (GISTENTRY *) palloc(sizeof(GISTENTRY));
+       gistentryinit(*retval, PointerGetDatum(res),
+                     entry->rel, entry->page,
+                     entry->offset, res->len, FALSE);
+   }
+   PG_RETURN_POINTER(retval);
+}
+
+Datum
+gtsvector_decompress(PG_FUNCTION_ARGS)
+{
+   GISTENTRY  *entry = (GISTENTRY *) PG_GETARG_POINTER(0);
+   GISTTYPE   *key = (GISTTYPE *) DatumGetPointer(PG_DETOAST_DATUM(entry->key));
+
+   if (key != (GISTTYPE *) DatumGetPointer(entry->key))
+   {
+       GISTENTRY  *retval = (GISTENTRY *) palloc(sizeof(GISTENTRY));
+
+       gistentryinit(*retval, PointerGetDatum(key),
+                     entry->rel, entry->page,
+                     entry->offset, key->len, FALSE);
+
+       PG_RETURN_POINTER(retval);
+   }
+
+   PG_RETURN_POINTER(entry);
+}
+
+typedef struct
+{
+   int4       *arrb;
+   int4       *arre;
+}  CHKVAL;
+
+/*
+ * is there value 'val' in array or not ?
+ */
+static bool
+checkcondition_arr(void *checkval, ITEM * val)
+{
+   int4       *StopLow = ((CHKVAL *) checkval)->arrb;
+   int4       *StopHigh = ((CHKVAL *) checkval)->arre;
+   int4       *StopMiddle;
+
+   /* Loop invariant: StopLow <= val < StopHigh */
+
+   while (StopLow < StopHigh)
+   {
+       StopMiddle = StopLow + (StopHigh - StopLow) / 2;
+       if (*StopMiddle == val->val)
+           return (true);
+       else if (*StopMiddle < val->val)
+           StopLow = StopMiddle + 1;
+       else
+           StopHigh = StopMiddle;
+   }
+
+   return (false);
+}
+
+static bool
+checkcondition_bit(void *checkval, ITEM * val)
+{
+   return GETBIT(checkval, HASHVAL(val->val));
+}
+
+Datum
+gtsvector_consistent(PG_FUNCTION_ARGS)
+{
+   QUERYTYPE  *query = (QUERYTYPE *) PG_GETARG_POINTER(1);
+   GISTTYPE   *key = (GISTTYPE *) DatumGetPointer(
+                               ((GISTENTRY *) PG_GETARG_POINTER(0))->key
+   );
+
+   if (!query->size)
+       PG_RETURN_BOOL(false);
+
+   if (ISSIGNKEY(key))
+   {
+       if (ISALLTRUE(key))
+           PG_RETURN_BOOL(true);
+
+       PG_RETURN_BOOL(TS_execute(
+                              GETQUERY(query),
+                              (void *) GETSIGN(key), false,
+                              checkcondition_bit
+                              ));
+   }
+   else
+   {                           /* only leaf pages */
+       CHKVAL      chkval;
+
+       chkval.arrb = GETARR(key);
+       chkval.arre = chkval.arrb + ARRNELEM(key);
+       PG_RETURN_BOOL(TS_execute(
+                              GETQUERY(query),
+                              (void *) &chkval, true,
+                              checkcondition_arr
+                              ));
+   }
+}
+
+static int4
+unionkey(BITVECP sbase, GISTTYPE * add)
+{
+   int4        i;
+
+   if (ISSIGNKEY(add))
+   {
+       BITVECP     sadd = GETSIGN(add);
+
+       if (ISALLTRUE(add))
+           return 1;
+
+       LOOPBYTE(
+                sbase[i] |= sadd[i];
+       );
+   }
+   else
+   {
+       int4       *ptr = GETARR(add);
+
+       for (i = 0; i < ARRNELEM(add); i++)
+           HASH(sbase, ptr[i]);
+   }
+   return 0;
+}
+
+
+Datum
+gtsvector_union(PG_FUNCTION_ARGS)
+{
+   bytea      *entryvec = (bytea *) PG_GETARG_POINTER(0);
+   int        *size = (int *) PG_GETARG_POINTER(1);
+   BITVEC      base;
+   int4        len = (VARSIZE(entryvec) - VARHDRSZ) / sizeof(GISTENTRY);
+   int4        i;
+   int4        flag = 0;
+   GISTTYPE   *result;
+
+   MemSet((void *) base, 0, sizeof(BITVEC));
+   for (i = 0; i < len; i++)
+   {
+       if (unionkey(base, GETENTRY(entryvec, i)))
+       {
+           flag = ALLISTRUE;
+           break;
+       }
+   }
+
+   flag |= SIGNKEY;
+   len = CALCGTSIZE(flag, 0);
+   result = (GISTTYPE *) palloc(len);
+   *size = result->len = len;
+   result->flag = flag;
+   if (!ISALLTRUE(result))
+       memcpy((void *) GETSIGN(result), (void *) base, sizeof(BITVEC));
+
+   PG_RETURN_POINTER(result);
+}
+
+Datum
+gtsvector_same(PG_FUNCTION_ARGS)
+{
+   GISTTYPE   *a = (GISTTYPE *) PG_GETARG_POINTER(0);
+   GISTTYPE   *b = (GISTTYPE *) PG_GETARG_POINTER(1);
+   bool       *result = (bool *) PG_GETARG_POINTER(2);
+
+   if (ISSIGNKEY(a))
+   {                           /* then b also ISSIGNKEY */
+       if (ISALLTRUE(a) && ISALLTRUE(b))
+           *result = true;
+       else if (ISALLTRUE(a))
+           *result = false;
+       else if (ISALLTRUE(b))
+           *result = false;
+       else
+       {
+           int4        i;
+           BITVECP     sa = GETSIGN(a),
+                       sb = GETSIGN(b);
+
+           *result = true;
+           LOOPBYTE(
+                    if (sa[i] != sb[i])
+                    {
+               *result = false;
+               break;
+           }
+           );
+       }
+   }
+   else
+   {                           /* a and b ISARRKEY */
+       int4        lena = ARRNELEM(a),
+                   lenb = ARRNELEM(b);
+
+       if (lena != lenb)
+           *result = false;
+       else
+       {
+           int4       *ptra = GETARR(a),
+                      *ptrb = GETARR(b);
+           int4        i;
+
+           *result = true;
+           for (i = 0; i < lena; i++)
+               if (ptra[i] != ptrb[i])
+               {
+                   *result = false;
+                   break;
+               }
+       }
+   }
+
+   PG_RETURN_POINTER(result);
+}
+
+static int4
+sizebitvec(BITVECP sign)
+{
+   int4        size = 0,
+               i;
+
+   LOOPBYTE(
+       size += SUMBIT(*(char *) sign);
+       sign = (BITVECP) (((char *) sign) + 1);
+   );
+   return size;
+}
+
+static int
+hemdistsign(BITVECP  a, BITVECP b) {
+   int i,dist=0;
+
+   LOOPBIT(
+       if ( GETBIT(a,i) != GETBIT(b,i) )
+           dist++;
+   );
+   return dist;
+}
+
+static int
+hemdist(GISTTYPE   *a, GISTTYPE   *b) {
+   if ( ISALLTRUE(a) ) {
+       if (ISALLTRUE(b))
+           return 0;
+       else
+           return SIGLENBIT-sizebitvec(GETSIGN(b));
+   } else if (ISALLTRUE(b))
+       return SIGLENBIT-sizebitvec(GETSIGN(a));
+
+   return hemdistsign( GETSIGN(a), GETSIGN(b) );
+}
+
+Datum
+gtsvector_penalty(PG_FUNCTION_ARGS)
+{
+   GISTENTRY  *origentry = (GISTENTRY *) PG_GETARG_POINTER(0); /* always ISSIGNKEY */
+   GISTENTRY  *newentry = (GISTENTRY *) PG_GETARG_POINTER(1);
+   float      *penalty = (float *) PG_GETARG_POINTER(2);
+   GISTTYPE   *origval = (GISTTYPE *) DatumGetPointer(origentry->key);
+   GISTTYPE   *newval = (GISTTYPE *) DatumGetPointer(newentry->key);
+   BITVECP     orig = GETSIGN(origval);
+
+   *penalty = 0.0;
+
+   if (ISARRKEY(newval)) {
+       BITVEC sign;
+       makesign(sign, newval);
+
+       if ( ISALLTRUE(origval) ) 
+           *penalty=((float)(SIGLENBIT-sizebitvec(sign)))/(float)(SIGLENBIT+1);
+       else 
+           *penalty=hemdistsign(sign,orig);
+   } else {
+       *penalty=hemdist(origval,newval);
+   }
+   PG_RETURN_POINTER(penalty);
+}
+
+typedef struct
+{
+   bool        allistrue;
+   BITVEC      sign;
+}  CACHESIGN;
+
+static void
+fillcache(CACHESIGN * item, GISTTYPE * key)
+{
+   item->allistrue = false;
+   if (ISARRKEY(key))
+       makesign(item->sign, key);
+   else if (ISALLTRUE(key))
+       item->allistrue = true;
+   else
+       memcpy((void *) item->sign, (void *) GETSIGN(key), sizeof(BITVEC));
+}
+
+#define WISH_F(a,b,c) (double)( -(double)(((a)-(b))*((a)-(b))*((a)-(b)))*(c) )
+typedef struct
+{
+   OffsetNumber pos;
+   int4        cost;
+} SPLITCOST;
+
+static int
+comparecost(const void *a, const void *b)
+{
+   if (((SPLITCOST *) a)->cost == ((SPLITCOST *) b)->cost)
+       return 0;
+   else
+       return (((SPLITCOST *) a)->cost > ((SPLITCOST *) b)->cost) ? 1 : -1;
+}
+
+
+static int
+hemdistcache(CACHESIGN   *a, CACHESIGN   *b) {
+   if ( a->allistrue ) {
+       if (b->allistrue)
+           return 0;
+       else
+           return SIGLENBIT-sizebitvec(b->sign);
+   } else if (b->allistrue)
+       return SIGLENBIT-sizebitvec(a->sign);
+
+   return hemdistsign( a->sign, b->sign );
+}
+
+Datum
+gtsvector_picksplit(PG_FUNCTION_ARGS)
+{
+   bytea      *entryvec = (bytea *) PG_GETARG_POINTER(0);
+   GIST_SPLITVEC *v = (GIST_SPLITVEC *) PG_GETARG_POINTER(1);
+   OffsetNumber k,
+               j;
+   GISTTYPE   *datum_l,
+              *datum_r;
+   BITVECP     union_l,
+               union_r;
+   int4        size_alpha,
+               size_beta;
+   int4        size_waste,
+               waste = -1;
+   int4        nbytes;
+   OffsetNumber seed_1 = 0,
+               seed_2 = 0;
+   OffsetNumber *left,
+              *right;
+   OffsetNumber maxoff;
+   BITVECP     ptr;
+   int         i;
+   CACHESIGN  *cache;
+   SPLITCOST  *costvector;
+
+   maxoff = ((VARSIZE(entryvec) - VARHDRSZ) / sizeof(GISTENTRY)) - 2;
+   nbytes = (maxoff + 2) * sizeof(OffsetNumber);
+   v->spl_left = (OffsetNumber *) palloc(nbytes);
+   v->spl_right = (OffsetNumber *) palloc(nbytes);
+
+   cache = (CACHESIGN *) palloc(sizeof(CACHESIGN) * (maxoff + 2));
+   fillcache(&cache[FirstOffsetNumber], GETENTRY(entryvec, FirstOffsetNumber));
+
+   for (k = FirstOffsetNumber; k < maxoff; k = OffsetNumberNext(k)) {
+       for (j = OffsetNumberNext(k); j <= maxoff; j = OffsetNumberNext(j)) {
+           if (k == FirstOffsetNumber)
+               fillcache(&cache[j], GETENTRY(entryvec, j));
+
+           size_waste=hemdistcache(&(cache[j]),&(cache[k]));
+           if (size_waste > waste) {
+               waste = size_waste;
+               seed_1 = k;
+               seed_2 = j;
+           }
+       }
+   }
+
+   left = v->spl_left;
+   v->spl_nleft = 0;
+   right = v->spl_right;
+   v->spl_nright = 0;
+
+   if (seed_1 == 0 || seed_2 == 0) {
+       seed_1 = 1;
+       seed_2 = 2;
+   }
+
+   /* form initial .. */
+   if (cache[seed_1].allistrue) {
+       datum_l = (GISTTYPE *) palloc(CALCGTSIZE(SIGNKEY | ALLISTRUE, 0));
+       datum_l->len = CALCGTSIZE(SIGNKEY | ALLISTRUE, 0);
+       datum_l->flag = SIGNKEY | ALLISTRUE;
+   } else {
+       datum_l = (GISTTYPE *) palloc(CALCGTSIZE(SIGNKEY, 0));
+       datum_l->len = CALCGTSIZE(SIGNKEY, 0);
+       datum_l->flag = SIGNKEY;
+       memcpy((void *) GETSIGN(datum_l), (void *) cache[seed_1].sign, sizeof(BITVEC));
+   }
+   if (cache[seed_2].allistrue) {
+       datum_r = (GISTTYPE *) palloc(CALCGTSIZE(SIGNKEY | ALLISTRUE, 0));
+       datum_r->len = CALCGTSIZE(SIGNKEY | ALLISTRUE, 0);
+       datum_r->flag = SIGNKEY | ALLISTRUE;
+   } else {
+       datum_r = (GISTTYPE *) palloc(CALCGTSIZE(SIGNKEY, 0));
+       datum_r->len = CALCGTSIZE(SIGNKEY, 0);
+       datum_r->flag = SIGNKEY;
+       memcpy((void *) GETSIGN(datum_r), (void *) cache[seed_2].sign, sizeof(BITVEC));
+   }
+
+   union_l=GETSIGN(datum_l);
+   union_r=GETSIGN(datum_r);
+   maxoff = OffsetNumberNext(maxoff);
+   fillcache(&cache[maxoff], GETENTRY(entryvec, maxoff));
+   /* sort before ... */
+   costvector = (SPLITCOST *) palloc(sizeof(SPLITCOST) * maxoff);
+   for (j = FirstOffsetNumber; j <= maxoff; j = OffsetNumberNext(j)) {
+       costvector[j - 1].pos = j;
+       size_alpha = hemdistcache(&(cache[seed_1]), &(cache[j]));
+       size_beta  = hemdistcache(&(cache[seed_2]), &(cache[j]));
+       costvector[j - 1].cost = abs(size_alpha - size_beta);
+   }
+   qsort((void *) costvector, maxoff, sizeof(SPLITCOST), comparecost);
+
+   for (k = 0; k < maxoff; k++) {
+       j = costvector[k].pos;
+       if (j == seed_1) {
+           *left++ = j;
+           v->spl_nleft++;
+           continue;
+       } else if (j == seed_2) {
+           *right++ = j;
+           v->spl_nright++;
+           continue;
+       }
+
+       if (ISALLTRUE(datum_l) || cache[j].allistrue) {
+           if ( ISALLTRUE(datum_l) && cache[j].allistrue )
+               size_alpha=0;
+           else
+               size_alpha = SIGLENBIT-sizebitvec(  
+                   ( cache[j].allistrue ) ? GETSIGN(datum_l) : GETSIGN(cache[j].sign)  
+               );
+       } else {
+           size_alpha=hemdistsign(cache[j].sign,GETSIGN(datum_l));
+       }
+
+       if (ISALLTRUE(datum_r) || cache[j].allistrue) {
+           if ( ISALLTRUE(datum_r) && cache[j].allistrue )
+               size_beta=0;
+           else
+               size_beta = SIGLENBIT-sizebitvec(  
+                   ( cache[j].allistrue ) ? GETSIGN(datum_r) : GETSIGN(cache[j].sign)  
+               );
+       } else {
+           size_beta=hemdistsign(cache[j].sign,GETSIGN(datum_r));
+       }
+
+       if (size_alpha  < size_beta + WISH_F(v->spl_nleft, v->spl_nright, 0.1)) {
+           if (ISALLTRUE(datum_l) || cache[j].allistrue) {
+               if (! ISALLTRUE(datum_l) )
+                   MemSet((void *) GETSIGN(datum_l), 0xff, sizeof(BITVEC));
+           } else {
+               ptr=cache[j].sign;
+               LOOPBYTE(
+                   union_l[i] |= ptr[i];
+               );
+           }
+           *left++ = j;
+           v->spl_nleft++;
+       } else {
+           if (ISALLTRUE(datum_r) || cache[j].allistrue) {
+               if (! ISALLTRUE(datum_r) )
+                   MemSet((void *) GETSIGN(datum_r), 0xff, sizeof(BITVEC));
+           } else {
+               ptr=cache[j].sign;
+               LOOPBYTE(
+                   union_r[i] |= ptr[i];
+               );
+           }
+           *right++ = j;
+           v->spl_nright++;
+       }
+   }
+
+   *right = *left = FirstOffsetNumber;
+   pfree(costvector);
+   pfree(cache);
+   v->spl_ldatum = PointerGetDatum(datum_l);
+   v->spl_rdatum = PointerGetDatum(datum_r);
+
+   PG_RETURN_POINTER(v);
+}


diff --git a/contrib/tsearch2/gistidx.h b/contrib/tsearch2/gistidx.h

new file mode 100644 (file)

index 0000000..d081c74


--- /dev/null
+++ b/contrib/tsearch2/gistidx.h
@@ -0,0 +1,67 @@
+#ifndef __GISTIDX_H__
+#define __GISTIDX_H__
+
+/*
+#define GISTIDX_DEBUG
+*/
+
+/*
+ * signature defines
+ */
+
+#define BITBYTE 8
+#define SIGLENINT  63          /* >121 => key will toast, so it will not
+                                * work !!! */
+#define SIGLEN ( sizeof(int4)*SIGLENINT )
+#define SIGLENBIT (SIGLEN*BITBYTE)
+
+typedef char BITVEC[SIGLEN];
+typedef char *BITVECP;
+
+#define LOOPBYTE(a) \
+       for(i=0;i
+               a;\
+       }
+#define LOOPBIT(a) \
+               for(i=0;i
+                               a;\
+               }
+
+#define GETBYTE(x,i) ( *( (BITVECP)(x) + (int)( (i) / BITBYTE ) ) )
+#define GETBITBYTE(x,i) ( ((char)(x)) >> i & 0x01 )
+#define CLRBIT(x,i)   GETBYTE(x,i) &= ~( 0x01 << ( (i) % BITBYTE ) )
+#define SETBIT(x,i)   GETBYTE(x,i) |=  ( 0x01 << ( (i) % BITBYTE ) )
+#define GETBIT(x,i) ( (GETBYTE(x,i) >> ( (i) % BITBYTE )) & 0x01 )
+
+#define abs(a)         ((a) <  (0) ? -(a) : (a))
+#define min(a,b)           ((a) <  (b) ? (a) : (b))
+#define HASHVAL(val) (((unsigned int)(val)) % SIGLENBIT)
+#define HASH(sign, val) SETBIT((sign), HASHVAL(val))
+
+
+/*
+ * type of index key
+ */
+typedef struct
+{
+   int4        len;
+   int4        flag;
+   char        data[1];
+}  GISTTYPE;
+
+#define ARRKEY     0x01
+#define SIGNKEY        0x02
+#define ALLISTRUE  0x04
+
+#define ISARRKEY(x) ( ((GISTTYPE*)x)->flag & ARRKEY )
+#define ISSIGNKEY(x)   ( ((GISTTYPE*)x)->flag & SIGNKEY )
+#define ISALLTRUE(x)   ( ((GISTTYPE*)x)->flag & ALLISTRUE )
+
+#define GTHDRSIZE  ( sizeof(int4)*2  )
+#define CALCGTSIZE(flag, len) ( GTHDRSIZE + ( ( (flag) & ARRKEY ) ? ((len)*sizeof(int4)) : (((flag) & ALLISTRUE) ? 0 : SIGLEN) ) )
+
+#define GETSIGN(x) ( (BITVECP)( (char*)x+GTHDRSIZE ) )
+#define GETARR(x)  ( (int4*)( (char*)x+GTHDRSIZE ) )
+#define ARRNELEM(x) ( ( ((GISTTYPE*)x)->len - GTHDRSIZE )/sizeof(int4) )
+
+#endif


diff --git a/contrib/tsearch2/ispell/spell.c b/contrib/tsearch2/ispell/spell.c

new file mode 100644 (file)

index 0000000..3cf2cc8


--- /dev/null
+++ b/contrib/tsearch2/ispell/spell.c
@@ -0,0 +1,520 @@
+#include 
+#include 
+#include 
+#include 
+
+#include "postgres.h"
+
+#include "spell.h"
+
+#define MAXNORMLEN 56
+
+#define STRNCASECMP(x,y)        (strncasecmp(x,y,strlen(y)))
+
+static int cmpspell(const void *s1,const void *s2){
+   return(strcmp(((const SPELL*)s1)->word,((const SPELL*)s2)->word));
+}
+
+static void 
+strlower( char * str ) {
+   unsigned char *ptr = (unsigned char *)str;
+   while ( *ptr ) {
+       *ptr = tolower( *ptr );
+       ptr++;
+   }
+}
+
+/* backward string compaire for suffix tree operations */
+static int 
+strbcmp(const char *s1, const char *s2) { 
+   int l1 = strlen(s1)-1, l2 = strlen(s2)-1;
+   while (l1 >= 0 && l2 >= 0) {
+       if (s1[l1] < s2[l2]) return -1;
+       if (s1[l1] > s2[l2]) return 1;
+       l1--; l2--;
+   }
+   if (l1 < l2) return -1;
+   if (l1 > l2) return 1;
+
+   return 0;
+}
+static int 
+strbncmp(const char *s1, const char *s2, size_t count) { 
+   int l1 = strlen(s1) - 1, l2 = strlen(s2) - 1, l = count;
+   while (l1 >= 0 && l2 >= 0 && l > 0) {
+       if (s1[l1] < s2[l2]) return -1;
+       if (s1[l1] > s2[l2]) return 1;
+       l1--;
+       l2--;
+       l--;
+   }
+   if (l == 0) return 0;
+   if (l1 < l2) return -1;
+   if (l1 > l2) return 1;
+   return 0;
+}
+
+static int 
+cmpaffix(const void *s1,const void *s2){
+   if (((const AFFIX*)s1)->type < ((const AFFIX*)s2)->type) return -1;
+   if (((const AFFIX*)s1)->type > ((const AFFIX*)s2)->type) return 1;
+   if (((const AFFIX*)s1)->type == 'p')
+       return(strcmp(((const AFFIX*)s1)->repl,((const AFFIX*)s2)->repl));
+   else 
+       return(strbcmp(((const AFFIX*)s1)->repl,((const AFFIX*)s2)->repl));
+}
+
+int 
+AddSpell(IspellDict * Conf,const char * word,const char *flag){
+   if(Conf->nspell>=Conf->mspell){
+       if(Conf->mspell){
+           Conf->mspell+=1024*20;
+           Conf->Spell=(SPELL *)realloc(Conf->Spell,Conf->mspell*sizeof(SPELL));
+       }else{
+           Conf->mspell=1024*20;
+           Conf->Spell=(SPELL *)malloc(Conf->mspell*sizeof(SPELL));
+       }
+       if ( Conf->Spell == NULL )
+           elog(ERROR,"No memory for AddSpell"); 
+   }
+   Conf->Spell[Conf->nspell].word=strdup(word);
+   if ( !Conf->Spell[Conf->nspell].word ) 
+       elog(ERROR,"No memory for AddSpell");
+   strncpy(Conf->Spell[Conf->nspell].flag,flag,10);
+   Conf->nspell++;
+   return(0);
+}
+
+
+int 
+ImportDictionary(IspellDict * Conf,const char *filename){
+   unsigned char str[BUFSIZ];  
+   FILE *dict;
+
+   if(!(dict=fopen(filename,"r")))return(1);
+   while(fgets(str,sizeof(str),dict)){
+       unsigned char *s;
+       const unsigned char *flag;
+
+           flag = NULL;
+       if((s=strchr(str,'/'))){
+           *s=0;
+           s++;flag=s;
+           while(*s){
+               if (((*s>='A')&&(*s<='Z'))||((*s>='a')&&(*s<='z')))
+                   s++;
+               else {
+                   *s=0;
+                   break;
+               }
+           }
+       }else{
+           flag="";
+       }
+       strlower(str);
+       /* Dont load words if first letter is not required */
+       /* It allows to optimize loading at  search time   */
+       s=str;
+       while(*s){
+           if(*s=='\r')*s=0;
+           if(*s=='\n')*s=0;
+           s++;
+       }
+       AddSpell(Conf,str,flag);
+   }
+   fclose(dict);
+   return(0);
+}
+
+
+static SPELL * 
+FindWord(IspellDict * Conf, const char *word, int affixflag) {
+   int l,c,r,resc,resl,resr, i;
+
+   i = (int)(*word) & 255;
+   l = Conf->SpellTree.Left[i];
+   r = Conf->SpellTree.Right[i];
+   if (l == -1) return (NULL);
+   while(l<=r){
+       c = (l + r) >> 1;
+       resc = strcmp(Conf->Spell[c].word, word);
+       if( (resc == 0) && 
+           ((affixflag == 0) || (strchr(Conf->Spell[c].flag, affixflag) != NULL)) ) {
+           return(&Conf->Spell[c]);
+       }
+       resl = strcmp(Conf->Spell[l].word, word);
+       if( (resl == 0) && 
+           ((affixflag == 0) || (strchr(Conf->Spell[l].flag, affixflag) != NULL)) ) {
+           return(&Conf->Spell[l]);
+       }
+       resr = strcmp(Conf->Spell[r].word, word);
+       if( (resr == 0) && 
+           ((affixflag == 0) || (strchr(Conf->Spell[r].flag, affixflag) != NULL)) ) {
+           return(&Conf->Spell[r]);
+       }
+       if(resc < 0){
+           l = c + 1;
+           r--;
+       } else if(resc > 0){
+           r = c - 1;
+           l++;
+       } else {
+           l++;
+           r--;
+       }
+   }
+   return(NULL);
+}
+
+int 
+AddAffix(IspellDict * Conf,int flag,const char *mask,const char *find,const char *repl,int type) {
+   if(Conf->naffixes>=Conf->maffixes){
+       if(Conf->maffixes){
+           Conf->maffixes+=16;
+           Conf->Affix = (AFFIX*)realloc((void*)Conf->Affix,Conf->maffixes*sizeof(AFFIX));
+       }else{
+           Conf->maffixes=16;
+           Conf->Affix = (AFFIX*)malloc(Conf->maffixes * sizeof(AFFIX));
+       }
+       if ( Conf->Affix == NULL ) 
+           elog(ERROR,"No memory for AddAffix");
+   }
+   if (type=='s') {
+       sprintf(Conf->Affix[Conf->naffixes].mask,"%s$",mask);
+   } else {
+       sprintf(Conf->Affix[Conf->naffixes].mask,"^%s",mask);
+   }
+   Conf->Affix[Conf->naffixes].compile = 1;
+   Conf->Affix[Conf->naffixes].flag=flag;
+   Conf->Affix[Conf->naffixes].type=type;
+   
+   strcpy(Conf->Affix[Conf->naffixes].find,find);
+   strcpy(Conf->Affix[Conf->naffixes].repl,repl);
+   Conf->Affix[Conf->naffixes].replen=strlen(repl);
+   Conf->naffixes++;
+   return(0);
+}
+
+static char * 
+remove_spaces(char *dist,char *src){
+char *d,*s;
+   d=dist;
+   s=src;
+   while(*s){
+       if(*s!=' '&&*s!='-'&&*s!='\t'){
+           *d=*s;
+           d++;
+       }
+       s++;
+   }
+   *d=0;
+   return(dist);
+}
+
+
+int 
+ImportAffixes(IspellDict * Conf,const char *filename){
+   unsigned char str[BUFSIZ];
+   unsigned char flag=0;
+   unsigned char mask[BUFSIZ]="";
+   unsigned char find[BUFSIZ]="";
+   unsigned char repl[BUFSIZ]="";
+   unsigned char *s;
+   int i;
+   int suffixes=0;
+   int prefixes=0;
+   FILE *affix;
+
+   if(!(affix=fopen(filename,"r")))
+       return(1);
+
+   while(fgets(str,sizeof(str),affix)){
+       if(!STRNCASECMP(str,"suffixes")){
+           suffixes=1;
+           prefixes=0;
+           continue;
+       }
+       if(!STRNCASECMP(str,"prefixes")){
+           suffixes=0;
+           prefixes=1;
+           continue;
+       }
+       if(!STRNCASECMP(str,"flag ")){
+           s=str+5;
+           while(strchr("* ",*s))
+               s++;
+           flag=*s;
+           continue;
+       }
+       if((!suffixes)&&(!prefixes))continue;
+       if((s=strchr(str,'#')))*s=0;
+       if(!*str)continue;
+       strlower(str);
+       strcpy(mask,"");
+       strcpy(find,"");
+       strcpy(repl,"");
+       i=sscanf(str,"%[^>\n]>%[^,\n],%[^\n]",mask,find,repl);
+       remove_spaces(str,repl);strcpy(repl,str);
+       remove_spaces(str,find);strcpy(find,str);
+       remove_spaces(str,mask);strcpy(mask,str);
+       switch(i){
+           case 3:
+               break;
+           case 2:
+               if(*find != '\0'){
+                   strcpy(repl,find);
+                   strcpy(find,"");
+               }
+               break;
+           default:
+               continue;
+       }
+       
+       AddAffix(Conf,(int)flag,mask,find,repl,suffixes?'s':'p');
+       
+   }
+   fclose(affix);
+       
+   return(0);
+}
+
+void 
+SortDictionary(IspellDict * Conf){
+  int  CurLet = -1, Let;size_t i;
+
+        qsort((void*)Conf->Spell,Conf->nspell,sizeof(SPELL),cmpspell);
+
+   for(i = 0; i < 256 ; i++ )
+       Conf->SpellTree.Left[i] = -1;
+
+   for(i = 0; i < Conf->nspell; i++) {
+     Let = (int)(*(Conf->Spell[i].word)) & 255;
+     if (CurLet != Let) {
+       Conf->SpellTree.Left[Let] = i;
+       CurLet = Let;
+     }
+     Conf->SpellTree.Right[Let] = i;
+   }
+}
+
+void 
+SortAffixes(IspellDict * Conf) {
+  int   CurLetP = -1, CurLetS = -1, Let;
+  AFFIX *Affix; size_t i;
+  
+  if (Conf->naffixes > 1)
+    qsort((void*)Conf->Affix,Conf->naffixes,sizeof(AFFIX),cmpaffix);
+  for(i = 0; i < 256; i++) {
+      Conf->PrefixTree.Left[i] = Conf->PrefixTree.Right[i] = -1;
+      Conf->SuffixTree.Left[i] = Conf->SuffixTree.Right[i] = -1;
+  }
+
+  for(i = 0; i < Conf->naffixes; i++) {
+    Affix = &(((AFFIX*)Conf->Affix)[i]);
+    if(Affix->type == 'p') {
+      Let = (int)(*(Affix->repl)) & 255;
+      if (CurLetP != Let) {
+   Conf->PrefixTree.Left[Let] = i;
+   CurLetP = Let;
+      }
+      Conf->PrefixTree.Right[Let] = i;
+    } else {
+      Let = (Affix->replen) ? (int)(Affix->repl[Affix->replen-1]) & 255 : 0;
+      if (CurLetS != Let) {
+   Conf->SuffixTree.Left[Let] = i;
+   CurLetS = Let;
+      }
+      Conf->SuffixTree.Right[Let] = i;
+    }
+  }
+}
+
+static char * 
+CheckSuffix(const char *word, size_t len, AFFIX *Affix, int *res, IspellDict *Conf) {
+  regmatch_t subs[2]; /* workaround for apache&linux */
+  char newword[2*MAXNORMLEN] = "";
+  int err;
+  
+  *res = strbncmp(word, Affix->repl, Affix->replen);
+  if (*res < 0) {
+    return NULL;
+  }
+  if (*res > 0) {
+    return NULL;
+  }
+  strcpy(newword, word);
+  strcpy(newword+len-Affix->replen, Affix->find);
+
+  if (Affix->compile) {
+    err = regcomp(&(Affix->reg),Affix->mask,REG_EXTENDED|REG_ICASE|REG_NOSUB);
+    if(err){
+      /*regerror(err, &(Affix->reg), regerrstr, ERRSTRSIZE);*/
+      regfree(&(Affix->reg));
+      return(NULL);
+    }
+    Affix->compile = 0;
+  }
+  if(!(err=regexec(&(Affix->reg),newword,1,subs,0))){
+    if(FindWord(Conf, newword, Affix->flag))
+   return pstrdup(newword);    
+  }
+  return NULL;
+}
+
+#define NS 1
+#define MAX_NORM 512
+static int 
+CheckPrefix(const char *word, size_t len, AFFIX *Affix, IspellDict *Conf, int pi,
+       char **forms, char ***cur ) {
+  regmatch_t subs[NS*2];
+  char newword[2*MAXNORMLEN] = "";
+  int err, ls, res, lres;
+  size_t newlen;
+  AFFIX *CAffix = Conf->Affix;
+  
+  res = strncmp(word, Affix->repl, Affix->replen);
+  if (res != 0) {
+    return res;
+  }
+  strcpy(newword, Affix->find);
+  strcat(newword, word+Affix->replen);
+
+  if (Affix->compile) {
+    err = regcomp(&(Affix->reg),Affix->mask,REG_EXTENDED|REG_ICASE|REG_NOSUB);
+    if(err){
+      /*regerror(err, &(Affix->reg), regerrstr, ERRSTRSIZE);*/
+      regfree(&(Affix->reg));
+      return (0);
+    }
+    Affix->compile = 0;
+  }
+  if(!(err=regexec(&(Affix->reg),newword,1,subs,0))){
+    SPELL * curspell;
+
+    if((curspell=FindWord(Conf, newword, Affix->flag))){
+      if ((*cur - forms) < (MAX_NORM-1)) {
+   **cur =  pstrdup(newword);
+   (*cur)++; **cur = NULL;
+      }
+    } 
+    newlen = strlen(newword);
+    ls = Conf->SuffixTree.Left[pi];
+      if ( ls>=0 && ((*cur - forms) < (MAX_NORM-1)) ) {
+   **cur = CheckSuffix(newword, newlen, &CAffix[ls], &lres, Conf);
+   if (**cur) {
+     (*cur)++; **cur = NULL;
+   }
+      }
+  }
+  return 0;
+}
+
+
+char ** 
+NormalizeWord(IspellDict * Conf,char *word){
+/*regmatch_t subs[NS];*/
+size_t len;
+char ** forms;
+char **cur;
+AFFIX * Affix;
+int ri, pi, ipi, lp, rp, cp, ls, rs;
+int lres, rres, cres = 0;
+  SPELL *spell;
+
+   len=strlen(word);
+   if (len > MAXNORMLEN)
+       return(NULL);
+
+   strlower(word);
+
+   forms=(char **) palloc(MAX_NORM*sizeof(char **));
+   cur=forms;*cur=NULL;
+
+   ri = (int)(*word) & 255;
+   pi = (int)(word[strlen(word)-1]) & 255;
+   Affix=(AFFIX*)Conf->Affix;
+
+   /* Check that the word itself is normal form */
+   if((spell = FindWord(Conf, word, 0))){
+       *cur=pstrdup(word);
+       cur++;*cur=NULL;
+   }
+
+   /* Find all other NORMAL forms of the 'word' */
+
+   for (ipi = 0; ipi <= pi; ipi += pi) {
+
+       /* check prefix */
+       lp = Conf->PrefixTree.Left[ri];
+       rp = Conf->PrefixTree.Right[ri];
+       while (lp >= 0 && lp <= rp) {
+         cp = (lp + rp) >> 1;
+         cres = 0;
+         if ((cur - forms) < (MAX_NORM-1)) {
+       cres = CheckPrefix(word, len, &Affix[cp], Conf, ipi, forms, &cur);
+         }
+         if ((lp < cp) && ((cur - forms) < (MAX_NORM-1)) ) {
+       lres = CheckPrefix(word, len, &Affix[lp], Conf, ipi, forms, &cur);
+         }
+         if ( (rp > cp) && ((cur - forms) < (MAX_NORM-1)) ) {
+       rres = CheckPrefix(word, len, &Affix[rp], Conf, ipi, forms, &cur);
+         }
+         if (cres < 0) {
+       rp = cp - 1;
+       lp++;
+         } else if (cres > 0) {
+       lp = cp + 1;
+       rp--;
+         } else {
+       lp++;
+       rp--;
+         }
+       }
+
+       /* check suffix */
+       ls = Conf->SuffixTree.Left[ipi];
+       rs = Conf->SuffixTree.Right[ipi];
+       while (ls >= 0 && ls <= rs) {
+         if (  ((cur - forms) < (MAX_NORM-1)) ) {
+       *cur = CheckSuffix(word, len, &Affix[ls], &lres, Conf);
+       if (*cur) {
+         cur++; *cur = NULL;
+       }
+         }
+         if ( (rs > ls) && ((cur - forms) < (MAX_NORM-1)) ) {
+       *cur = CheckSuffix(word, len, &Affix[rs], &rres, Conf);
+       if (*cur) {
+         cur++; *cur = NULL;
+       }
+         }
+         ls++;
+         rs--;
+       } /* end while */
+     
+   } /* for ipi */
+
+   if(cur==forms){
+       pfree(forms);
+       return(NULL);
+   }
+   return(forms);
+}
+
+void 
+FreeIspell (IspellDict *Conf) {
+  int i;
+  AFFIX *Affix = (AFFIX *)Conf->Affix;
+
+  for (i = 0; i < Conf->naffixes; i++) {
+    if (Affix[i].compile == 0) {
+      regfree(&(Affix[i].reg));
+    }
+  }
+  for (i = 0; i < Conf->naffixes; i++) {
+   free( Conf->Spell[i].word );
+  }
+  free(Conf->Affix);
+  free(Conf->Spell);
+  memset( (void*)Conf, 0, sizeof(IspellDict) );
+  return;
+}


diff --git a/contrib/tsearch2/ispell/spell.h b/contrib/tsearch2/ispell/spell.h

new file mode 100644 (file)

index 0000000..3034ca6


--- /dev/null
+++ b/contrib/tsearch2/ispell/spell.h
@@ -0,0 +1,51 @@
+#ifndef __SPELL_H__
+#define __SPELL_H__
+
+#include 
+#include 
+
+typedef struct spell_struct {
+        char * word; 
+        char flag[10];
+} SPELL;
+
+typedef struct aff_struct {   
+        char flag;
+        char type;
+        char mask[33];
+        char find[16];
+        char repl[16];
+        regex_t reg;
+        size_t replen;
+        char compile;
+} AFFIX;
+
+typedef struct Tree_struct {
+        int Left[256], Right[256];
+} Tree_struct;
+
+typedef struct {
+   int maffixes;
+   int naffixes;
+   AFFIX * Affix;
+
+   int nspell;
+   int mspell;
+   SPELL   *Spell;
+   Tree_struct SpellTree;
+   Tree_struct PrefixTree;
+   Tree_struct SuffixTree;
+
+} IspellDict;
+
+char ** NormalizeWord(IspellDict * Conf,char *word);
+int ImportAffixes(IspellDict * Conf, const char *filename);
+int ImportDictionary(IspellDict * Conf,const char *filename);
+
+int  AddSpell(IspellDict * Conf,const char * word,const char *flag);
+int  AddAffix(IspellDict * Conf,int flag,const char *mask,const char *find,const char *repl,int type);
+void SortDictionary(IspellDict * Conf);
+void SortAffixes(IspellDict * Conf);
+void FreeIspell (IspellDict *Conf);
+
+#endif


diff --git a/contrib/tsearch2/prs_dcfg.c b/contrib/tsearch2/prs_dcfg.c

new file mode 100644 (file)

index 0000000..e4b0e8b


--- /dev/null
+++ b/contrib/tsearch2/prs_dcfg.c
@@ -0,0 +1,119 @@
+/* 
+ * Simple config parser 
+ * Teodor Sigaev 
+ */
+#include 
+#include 
+#include 
+
+#include "postgres.h"
+
+#include "dict.h"
+#include "common.h"
+
+#define CS_WAITKEY 0
+#define CS_INKEY   1
+#define CS_WAITEQ  2
+#define CS_WAITVALUE   3
+#define CS_INVALUE 4
+#define CS_IN2VALUE    5
+#define CS_WAITDELIM   6
+#define CS_INESC   7
+#define CS_IN2ESC  8
+
+static char *
+nstrdup(char *ptr, int len) {
+   char *res=palloc(len+1), *cptr;
+   memcpy(res,ptr,len);
+   res[len]='\0';
+   cptr = ptr = res;
+   while(*ptr) {
+       if ( *ptr == '\\' ) 
+           ptr++;
+       *cptr=*ptr; ptr++; cptr++;
+   }
+   *cptr='\0';
+
+   return res;
+}
+
+void
+parse_cfgdict(text *in, Map **m) {
+   Map *mptr;
+   char *ptr=VARDATA(in), *begin=NULL;
+   char num=0;
+   int state=CS_WAITKEY;
+
+   while( ptr-VARDATA(in) < VARSIZE(in) - VARHDRSZ ) {
+       if ( *ptr==',' ) num++;
+       ptr++;
+   }
+
+   *m=mptr=(Map*)palloc( sizeof(Map)*(num+2) );
+   memset(mptr, 0, sizeof(Map)*(num+2) );
+   ptr=VARDATA(in);
+   while( ptr-VARDATA(in) < VARSIZE(in) - VARHDRSZ ) {
+       if (state==CS_WAITKEY) {
+           if (isalpha(*ptr)) {
+               begin=ptr;
+               state=CS_INKEY;
+           } else if ( !isspace(*ptr) )
+               elog(ERROR,"Syntax error in position %d near '%c'", ptr-VARDATA(in), *ptr);
+       } else if (state==CS_INKEY) {
+           if ( isspace(*ptr) ) {
+               mptr->key=nstrdup(begin, ptr-begin);
+               state=CS_WAITEQ;
+           } else if ( *ptr=='=' ) {
+               mptr->key=nstrdup(begin, ptr-begin);
+               state=CS_WAITVALUE;
+           } else if ( !isalpha(*ptr) ) 
+               elog(ERROR,"Syntax error in position %d near '%c'", ptr-VARDATA(in), *ptr);
+       } else if ( state==CS_WAITEQ ) {
+           if ( *ptr=='=' )
+               state=CS_WAITVALUE;
+           else if ( !isspace(*ptr) )
+               elog(ERROR,"Syntax error in position %d near '%c'", ptr-VARDATA(in), *ptr);
+       } else if ( state==CS_WAITVALUE ) {
+           if ( *ptr=='"' ) {
+               begin=ptr+1;
+               state=CS_INVALUE;
+           } else if ( !isspace(*ptr) ) {
+               begin=ptr;
+               state=CS_IN2VALUE;
+           }
+       } else if ( state==CS_INVALUE ) {
+           if ( *ptr=='"' ) {
+               mptr->value = nstrdup(begin, ptr-begin);
+               mptr++;
+               state=CS_WAITDELIM;
+           } else if ( *ptr=='\\' )
+               state=CS_INESC;
+       } else if ( state==CS_IN2VALUE ) {
+           if ( isspace(*ptr) || *ptr==',' ) {
+               mptr->value = nstrdup(begin, ptr-begin);
+               mptr++;
+               state=( *ptr==',' ) ? CS_WAITKEY : CS_WAITDELIM;
+           } else if ( *ptr=='\\' )
+               state=CS_INESC;
+       } else if ( state==CS_WAITDELIM ) {
+           if ( *ptr==',' ) 
+               state=CS_WAITKEY; 
+           else if ( !isspace(*ptr) )
+               elog(ERROR,"Syntax error in position %d near '%c'", ptr-VARDATA(in), *ptr);
+       } else if ( state == CS_INESC ) {
+           state=CS_INVALUE;
+       } else if ( state == CS_IN2ESC ) {
+           state=CS_IN2VALUE;
+       } else 
+           elog(ERROR,"Bad parser state: %d at position %d near '%c'", state, ptr-VARDATA(in), *ptr);
+       ptr++;
+   }
+
+   if (state==CS_IN2VALUE) {
+       mptr->value = nstrdup(begin, ptr-begin);
+       mptr++;
+   } else if ( !(state==CS_WAITDELIM || state==CS_WAITKEY) ) 
+       elog(ERROR,"Unexpected end of line");
+}
+
+


diff --git a/contrib/tsearch2/query.c b/contrib/tsearch2/query.c

new file mode 100644 (file)

index 0000000..8e714f2


--- /dev/null
+++ b/contrib/tsearch2/query.c
@@ -0,0 +1,862 @@
+/*
+ * IO definitions for tsquery and mtsquery. This type
+ * are identical, but for parsing mtsquery used parser for text
+ * and also morphology is used.
+ * Internal structure:
+ * query tree, then string with original value.
+ * Query tree with plain view. It's means that in array of nodes
+ * right child is always next and left position = item+item->left
+ * Teodor Sigaev 
+ */
+#include "postgres.h"
+
+#include 
+#include 
+
+#include "access/gist.h"
+#include "access/itup.h"
+#include "access/rtree.h"
+#include "utils/elog.h"
+#include "utils/palloc.h"
+#include "utils/array.h"
+#include "utils/builtins.h"
+#include "storage/bufpage.h"
+
+#include "ts_cfg.h"
+#include "tsvector.h"
+#include "crc32.h"
+#include "query.h"
+#include "rewrite.h"
+#include "common.h"
+
+
+PG_FUNCTION_INFO_V1(tsquery_in);
+Datum      tsquery_in(PG_FUNCTION_ARGS);
+
+PG_FUNCTION_INFO_V1(tsquery_out);
+Datum      tsquery_out(PG_FUNCTION_ARGS);
+
+PG_FUNCTION_INFO_V1(exectsq);
+Datum      exectsq(PG_FUNCTION_ARGS);
+
+PG_FUNCTION_INFO_V1(rexectsq);
+Datum      rexectsq(PG_FUNCTION_ARGS);
+
+PG_FUNCTION_INFO_V1(tsquerytree);
+Datum      tsquerytree(PG_FUNCTION_ARGS);
+
+PG_FUNCTION_INFO_V1(to_tsquery);
+Datum      to_tsquery(PG_FUNCTION_ARGS);
+
+PG_FUNCTION_INFO_V1(to_tsquery_name);
+Datum      to_tsquery_name(PG_FUNCTION_ARGS);
+
+PG_FUNCTION_INFO_V1(to_tsquery_current);
+Datum      to_tsquery_current(PG_FUNCTION_ARGS);
+
+#define END            0
+#define ERR            1
+#define VAL            2
+#define OPR            3
+#define OPEN       4
+#define CLOSE      5
+#define VALTRUE        6           /* for stop words */
+#define VALFALSE   7
+
+/* parser's states */
+#define WAITOPERAND 1
+#define WAITOPERATOR   2
+
+/*
+ * node of query tree, also used
+ * for storing polish notation in parser
+ */
+typedef struct NODE
+{
+   int2        weight;
+   int2        type;
+   int4        val;
+   int2        distance;
+   int2        length;
+   struct NODE *next;
+}  NODE;
+
+typedef struct
+{
+   char       *buf;
+   int4        state;
+   int4        count;
+   /* reverse polish notation in list (for temprorary usage) */
+   NODE       *str;
+   /* number in str */
+   int4        num;
+
+   /* user-friendly operand */
+   int4        lenop;
+   int4        sumlen;
+   char       *op;
+   char       *curop;
+
+   /* state for value's parser */
+   TI_IN_STATE valstate;
+
+   /* tscfg */
+   int cfg_id;
+}  QPRS_STATE;
+
+static char*
+get_weight(char *buf, int2 *weight) {
+   *weight = 0;
+
+   if ( *buf != ':' )
+       return buf;
+
+   buf++;
+   while( *buf ) {
+       switch(tolower(*buf)) {
+           case 'a': *weight |= 1<<3; break; 
+           case 'b': *weight |= 1<<2; break; 
+           case 'c': *weight |= 1<<1; break; 
+           case 'd': *weight |= 1;    break;
+           default: return buf; 
+       }
+       buf++;
+   }
+   
+   return buf;
+}
+
+/*
+ * get token from query string
+ */
+static int4
+gettoken_query(QPRS_STATE * state, int4 *val, int4 *lenval, char **strval, int2 *weight)
+{
+   while (1)
+   {
+       switch (state->state)
+       {
+           case WAITOPERAND:
+               if (*(state->buf) == '!')
+               {
+                   (state->buf)++;
+                   *val = (int4) '!';
+                   return OPR;
+               }
+               else if (*(state->buf) == '(')
+               {
+                   state->count++;
+                   (state->buf)++;
+                   return OPEN;
+               } else if ( *(state->buf) == ':' ) {
+                   elog(ERROR,"Error at start of operand"); 
+               } else if (*(state->buf) != ' ') {
+                   state->valstate.prsbuf = state->buf;
+                   state->state = WAITOPERATOR;
+                   if (gettoken_tsvector(&(state->valstate)))
+                   {
+                       *strval = state->valstate.word;
+                       *lenval = state->valstate.curpos - state->valstate.word;
+                       state->buf = get_weight(state->valstate.prsbuf, weight);
+                       return VAL;
+                   }
+                   else
+                       elog(ERROR, "No operand");
+               }
+               break;
+           case WAITOPERATOR:
+               if (*(state->buf) == '&' || *(state->buf) == '|')
+               {
+                   state->state = WAITOPERAND;
+                   *val = (int4) *(state->buf);
+                   (state->buf)++;
+                   return OPR;
+               }
+               else if (*(state->buf) == ')')
+               {
+                   (state->buf)++;
+                   state->count--;
+                   return (state->count < 0) ? ERR : CLOSE;
+               }
+               else if (*(state->buf) == '\0')
+                   return (state->count) ? ERR : END;
+               else if (*(state->buf) != ' ')
+                   return ERR;
+               break;
+           default:
+               return ERR;
+               break;
+       }
+       (state->buf)++;
+   }
+   return END;
+}
+
+/*
+ * push new one in polish notation reverse view
+ */
+static void
+pushquery(QPRS_STATE * state, int4 type, int4 val, int4 distance, int4 lenval, int2 weight)
+{
+   NODE       *tmp = (NODE *) palloc(sizeof(NODE));
+
+   tmp->weight = weight;
+   tmp->type = type;
+   tmp->val = val;
+   if (distance >= MAXSTRPOS)
+       elog(ERROR, "Value is too big");
+   if (lenval >= MAXSTRLEN)
+       elog(ERROR, "Operand is too long");
+   tmp->distance = distance;
+   tmp->length = lenval;
+   tmp->next = state->str;
+   state->str = tmp;
+   state->num++;
+}
+
+/*
+ * This function is used for tsquery parsing
+ */
+static void
+pushval_asis(QPRS_STATE * state, int type, char *strval, int lenval, int2 weight)
+{
+   if (lenval >= MAXSTRLEN)
+       elog(ERROR, "Word is too long");
+
+   pushquery(state, type, crc32_sz((uint8 *) strval, lenval),
+             state->curop - state->op, lenval, weight);
+
+   while (state->curop - state->op + lenval + 1 >= state->lenop)
+   {
+       int4        tmp = state->curop - state->op;
+
+       state->lenop *= 2;
+       state->op = (char *) repalloc((void *) state->op, state->lenop);
+       state->curop = state->op + tmp;
+   }
+   memcpy((void *) state->curop, (void *) strval, lenval);
+   state->curop += lenval;
+   *(state->curop) = '\0';
+   state->curop++;
+   state->sumlen += lenval + 1;
+   return;
+}
+
+/*
+ * This function is used for morph parsing
+ */
+static void
+pushval_morph(QPRS_STATE * state, int typeval, char *strval, int lenval, int2 weight)
+{
+   int4        count = 0;
+   PRSTEXT         prs;
+
+   prs.lenwords = 32;
+   prs.curwords = 0;
+   prs.pos = 0;
+   prs.words = (WORD *) palloc(sizeof(WORD) * prs.lenwords);
+
+   parsetext_v2(findcfg(state->cfg_id), &prs, strval, lenval);
+
+   for(count=0;count
+       pushval_asis(state, VAL, prs.words[count].word, prs.words[count].len, weight);
+       pfree( prs.words[count].word );
+       if (count)
+           pushquery(state, OPR, (int4) '&', 0, 0, 0 );
+   }   
+   pfree(prs.words);
+
+   /* XXX */
+   if ( prs.curwords==0 ) 
+       pushval_asis(state, VALTRUE, 0, 0, 0);
+}
+
+#define STACKDEPTH 32
+/*
+ * make polish notaion of query
+ */
+static int4
+makepol(QPRS_STATE * state, void (*pushval) (QPRS_STATE *, int, char *, int, int2))
+{
+   int4        val,
+               type;
+   int4        lenval;
+   char       *strval;
+   int4        stack[STACKDEPTH];
+   int4        lenstack = 0;
+   int2        weight;
+
+   while ((type = gettoken_query(state, &val, &lenval, &strval, &weight)) != END)
+   {
+       switch (type)
+       {
+           case VAL:
+               (*pushval) (state, VAL, strval, lenval, weight);
+               while (lenstack && (stack[lenstack - 1] == (int4) '&' ||
+                                   stack[lenstack - 1] == (int4) '!'))
+               {
+                   lenstack--;
+                   pushquery(state, OPR, stack[lenstack], 0, 0, 0);
+               }
+               break;
+           case OPR:
+               if (lenstack && val == (int4) '|')
+                   pushquery(state, OPR, val, 0, 0, 0);
+               else
+               {
+                   if (lenstack == STACKDEPTH)
+                       elog(ERROR, "Stack too short");
+                   stack[lenstack] = val;
+                   lenstack++;
+               }
+               break;
+           case OPEN:
+               if (makepol(state, pushval) == ERR)
+                   return ERR;
+               if (lenstack && (stack[lenstack - 1] == (int4) '&' ||
+                                stack[lenstack - 1] == (int4) '!'))
+               {
+                   lenstack--;
+                   pushquery(state, OPR, stack[lenstack], 0, 0, 0);
+               }
+               break;
+           case CLOSE:
+               while (lenstack)
+               {
+                   lenstack--;
+                   pushquery(state, OPR, stack[lenstack], 0, 0, 0);
+               };
+               return END;
+               break;
+           case ERR:
+           default:
+               elog(ERROR, "Syntax error");
+               return ERR;
+
+       }
+   }
+   while (lenstack)
+   {
+       lenstack--;
+       pushquery(state, OPR, stack[lenstack], 0, 0, 0);
+   };
+   return END;
+}
+
+typedef struct
+{
+   WordEntry  *arrb;
+   WordEntry  *arre;
+   char       *values;
+   char       *operand;
+}  CHKVAL;
+
+/*
+ * compare 2 string values
+ */
+static int4
+ValCompare(CHKVAL * chkval, WordEntry * ptr, ITEM * item)
+{
+   if (ptr->len == item->length)
+       return strncmp(
+                      &(chkval->values[ptr->pos]),
+                      &(chkval->operand[item->distance]),
+                      item->length);
+
+   return (ptr->len > item->length) ? 1 : -1;
+}
+
+/*
+ * check weight info
+ */
+static bool
+checkclass_str(CHKVAL * chkval, WordEntry * val, ITEM * item) {
+   WordEntryPos *ptr = (WordEntryPos*) (chkval->values+val->pos+SHORTALIGN(val->len)+sizeof(uint16));
+   uint16  len = *( (uint16*) (chkval->values+val->pos+SHORTALIGN(val->len)) );
+   while (len--) {
+       if ( item->weight & ( 1<weight ) )
+           return true;
+       ptr++;
+   }
+   return false; 
+}
+
+/*
+ * is there value 'val' in array or not ?
+ */
+static bool
+checkcondition_str(void *checkval, ITEM * val)
+{
+   WordEntry  *StopLow = ((CHKVAL *) checkval)->arrb;
+   WordEntry  *StopHigh = ((CHKVAL *) checkval)->arre;
+   WordEntry  *StopMiddle;
+   int         difference;
+
+   /* Loop invariant: StopLow <= val < StopHigh */
+
+   while (StopLow < StopHigh)
+   {
+       StopMiddle = StopLow + (StopHigh - StopLow) / 2;
+       difference = ValCompare((CHKVAL *) checkval, StopMiddle, val);
+       if (difference == 0)
+           return ( val->weight && StopMiddle->haspos ) ? 
+               checkclass_str((CHKVAL *) checkval,StopMiddle, val) : true;
+       else if (difference < 0)
+           StopLow = StopMiddle + 1;
+       else
+           StopHigh = StopMiddle;
+   }
+
+   return (false);
+}
+
+/*
+ * check for boolean condition
+ */
+bool
+TS_execute(ITEM * curitem, void *checkval, bool calcnot, bool (*chkcond) (void *checkval, ITEM * val))
+{
+   if (curitem->type == VAL)
+       return (*chkcond) (checkval, curitem);
+   else if (curitem->val == (int4) '!')
+   {
+       return (calcnot) ?
+           ((TS_execute(curitem + 1, checkval, calcnot, chkcond)) ? false : true)
+           : true;
+   }
+   else if (curitem->val == (int4) '&')
+   {
+       if (TS_execute(curitem + curitem->left, checkval, calcnot, chkcond))
+           return TS_execute(curitem + 1, checkval, calcnot, chkcond);
+       else
+           return false;
+   }
+   else
+   {                           /* |-operator */
+       if (TS_execute(curitem + curitem->left, checkval, calcnot, chkcond))
+           return true;
+       else
+           return TS_execute(curitem + 1, checkval, calcnot, chkcond);
+   }
+   return false;
+}
+
+/*
+ * boolean operations
+ */
+Datum
+rexectsq(PG_FUNCTION_ARGS)
+{
+   return DirectFunctionCall2(
+                              exectsq,
+                              PG_GETARG_DATUM(1),
+                              PG_GETARG_DATUM(0)
+       );
+}
+
+Datum
+exectsq(PG_FUNCTION_ARGS)
+{
+   tsvector       *val = (tsvector *) DatumGetPointer(PG_DETOAST_DATUM(PG_GETARG_DATUM(0)));
+   QUERYTYPE  *query = (QUERYTYPE *) DatumGetPointer(PG_DETOAST_DATUM(PG_GETARG_DATUM(1)));
+   CHKVAL      chkval;
+   bool        result;
+
+   if (!val->size || !query->size)
+   {
+       PG_FREE_IF_COPY(val, 0);
+       PG_FREE_IF_COPY(query, 1);
+       PG_RETURN_BOOL(false);
+   }
+
+   chkval.arrb = ARRPTR(val);
+   chkval.arre = chkval.arrb + val->size;
+   chkval.values = STRPTR(val);
+   chkval.operand = GETOPERAND(query);
+   result = TS_execute(
+                    GETQUERY(query),
+                    &chkval,
+                    true,
+                    checkcondition_str
+       );
+
+   PG_FREE_IF_COPY(val, 0);
+   PG_FREE_IF_COPY(query, 1);
+   PG_RETURN_BOOL(result);
+}
+
+/*
+ * find left operand in polish notation view
+ */
+static void
+findoprnd(ITEM * ptr, int4 *pos)
+{
+#ifdef BS_DEBUG
+   elog(DEBUG3, (ptr[*pos].type == OPR) ?
+        "%d  %c" : "%d  %d ", *pos, ptr[*pos].val);
+#endif
+   if (ptr[*pos].type == VAL || ptr[*pos].type == VALTRUE)
+   {
+       ptr[*pos].left = 0;
+       (*pos)++;
+   }
+   else if (ptr[*pos].val == (int4) '!')
+   {
+       ptr[*pos].left = 1;
+       (*pos)++;
+       findoprnd(ptr, pos);
+   }
+   else
+   {
+       ITEM       *curitem = &ptr[*pos];
+       int4        tmp = *pos;
+
+       (*pos)++;
+       findoprnd(ptr, pos);
+       curitem->left = *pos - tmp;
+       findoprnd(ptr, pos);
+   }
+}
+
+
+/*
+ * input
+ */
+static QUERYTYPE *
+queryin(char *buf, void (*pushval) (QPRS_STATE *, int, char *, int, int2), int cfg_id)
+{
+   QPRS_STATE  state;
+   int4        i;
+   QUERYTYPE  *query;
+   int4        commonlen;
+   ITEM       *ptr;
+   NODE       *tmp;
+   int4        pos = 0;
+
+#ifdef BS_DEBUG
+   char        pbuf[16384],
+              *cur;
+#endif
+
+   /* init state */
+   state.buf = buf;
+   state.state = WAITOPERAND;
+   state.count = 0;
+   state.num = 0;
+   state.str = NULL;
+   state.cfg_id=cfg_id;
+
+   /* init value parser's state */
+   state.valstate.oprisdelim = true;
+   state.valstate.len = 32;
+   state.valstate.word = (char *) palloc(state.valstate.len);
+
+   /* init list of operand */
+   state.sumlen = 0;
+   state.lenop = 64;
+   state.curop = state.op = (char *) palloc(state.lenop);
+   *(state.curop) = '\0';
+
+   /* parse query & make polish notation (postfix, but in reverse order) */
+   makepol(&state, pushval);
+   pfree(state.valstate.word);
+   if (!state.num)
+       elog(ERROR, "Empty query");
+
+   /* make finish struct */
+   commonlen = COMPUTESIZE(state.num, state.sumlen);
+   query = (QUERYTYPE *) palloc(commonlen);
+   query->len = commonlen;
+   query->size = state.num;
+   ptr = GETQUERY(query);
+
+   /* set item in polish notation */
+   for (i = 0; i < state.num; i++)
+   {
+       ptr[i].weight = state.str->weight;
+       ptr[i].type = state.str->type;
+       ptr[i].val = state.str->val;
+       ptr[i].distance = state.str->distance;
+       ptr[i].length = state.str->length;
+       tmp = state.str->next;
+       pfree(state.str);
+       state.str = tmp;
+   }
+
+   /* set user friendly-operand view */
+   memcpy((void *) GETOPERAND(query), (void *) state.op, state.sumlen);
+   pfree(state.op);
+
+   /* set left operand's position for every operator */
+   pos = 0;
+   findoprnd(ptr, &pos);
+
+#ifdef BS_DEBUG
+   cur = pbuf;
+   *cur = '\0';
+   for (i = 0; i < query->size; i++)
+   {
+       if (ptr[i].type == OPR)
+           sprintf(cur, "%c(%d) ", ptr[i].val, ptr[i].left);
+       else
+           sprintf(cur, "%d(%s) ", ptr[i].val, GETOPERAND(query) + ptr[i].distance);
+       cur = strchr(cur, '\0');
+   }
+   elog(DEBUG3, "POR: %s", pbuf);
+#endif
+
+   return query;
+}
+
+/*
+ * in without morphology
+ */
+Datum
+tsquery_in(PG_FUNCTION_ARGS)
+{
+   PG_RETURN_POINTER(queryin((char *) PG_GETARG_POINTER(0), pushval_asis, 0));
+}
+
+/*
+ * out function
+ */
+typedef struct
+{
+   ITEM       *curpol;
+   char       *buf;
+   char       *cur;
+   char       *op;
+   int4        buflen;
+}  INFIX;
+
+#define RESIZEBUF(inf,addsize) \
+while( ( inf->cur - inf->buf ) + addsize + 1 >= inf->buflen ) \
+{ \
+   int4 len = inf->cur - inf->buf; \
+   inf->buflen *= 2; \
+   inf->buf = (char*) repalloc( (void*)inf->buf, inf->buflen ); \
+   inf->cur = inf->buf + len; \
+}
+
+/*
+ * recursive walk on tree and print it in
+ * infix (human-readable) view
+ */
+static void
+infix(INFIX * in, bool first)
+{
+   if (in->curpol->type == VAL)
+   {
+       char       *op = in->op + in->curpol->distance;
+
+       RESIZEBUF(in, in->curpol->length * 2 + 2 + 5);
+       *(in->cur) = '\'';
+       in->cur++;
+       while (*op)
+       {
+           if (*op == '\'')
+           {
+               *(in->cur) = '\\';
+               in->cur++;
+           }
+           *(in->cur) = *op;
+           op++;
+           in->cur++;
+       }
+       *(in->cur) = '\'';
+       in->cur++;
+       if ( in->curpol->weight ) {
+           *(in->cur) = ':'; in->cur++;
+           if ( in->curpol->weight & (1<<3) ) { *(in->cur) = 'A'; in->cur++; }
+           if ( in->curpol->weight & (1<<2) ) { *(in->cur) = 'B'; in->cur++; }
+           if ( in->curpol->weight & (1<<1) ) { *(in->cur) = 'C'; in->cur++; }
+           if ( in->curpol->weight & 1 )      { *(in->cur) = 'D'; in->cur++; }
+       }
+       *(in->cur) = '\0';
+       in->curpol++;
+   }
+   else if (in->curpol->val == (int4) '!')
+   {
+       bool        isopr = false;
+
+       RESIZEBUF(in, 1);
+       *(in->cur) = '!';
+       in->cur++;
+       *(in->cur) = '\0';
+       in->curpol++;
+       if (in->curpol->type == OPR)
+       {
+           isopr = true;
+           RESIZEBUF(in, 2);
+           sprintf(in->cur, "( ");
+           in->cur = strchr(in->cur, '\0');
+       }
+       infix(in, isopr);
+       if (isopr)
+       {
+           RESIZEBUF(in, 2);
+           sprintf(in->cur, " )");
+           in->cur = strchr(in->cur, '\0');
+       }
+   }
+   else
+   {
+       int4        op = in->curpol->val;
+       INFIX       nrm;
+
+       in->curpol++;
+       if (op == (int4) '|' && !first)
+       {
+           RESIZEBUF(in, 2);
+           sprintf(in->cur, "( ");
+           in->cur = strchr(in->cur, '\0');
+       }
+
+       nrm.curpol = in->curpol;
+       nrm.op = in->op;
+       nrm.buflen = 16;
+       nrm.cur = nrm.buf = (char *) palloc(sizeof(char) * nrm.buflen);
+
+       /* get right operand */
+       infix(&nrm, false);
+
+       /* get & print left operand */
+       in->curpol = nrm.curpol;
+       infix(in, false);
+
+       /* print operator & right operand */
+       RESIZEBUF(in, 3 + (nrm.cur - nrm.buf));
+       sprintf(in->cur, " %c %s", op, nrm.buf);
+       in->cur = strchr(in->cur, '\0');
+       pfree(nrm.buf);
+
+       if (op == (int4) '|' && !first)
+       {
+           RESIZEBUF(in, 2);
+           sprintf(in->cur, " )");
+           in->cur = strchr(in->cur, '\0');
+       }
+   }
+}
+
+
+Datum
+tsquery_out(PG_FUNCTION_ARGS)
+{
+   QUERYTYPE  *query = (QUERYTYPE *) DatumGetPointer(PG_DETOAST_DATUM(PG_GETARG_DATUM(0)));
+   INFIX       nrm;
+
+   if (query->size == 0)
+   {
+       char       *b = palloc(1);
+
+       *b = '\0';
+       PG_RETURN_POINTER(b);
+   }
+   nrm.curpol = GETQUERY(query);
+   nrm.buflen = 32;
+   nrm.cur = nrm.buf = (char *) palloc(sizeof(char) * nrm.buflen);
+   *(nrm.cur) = '\0';
+   nrm.op = GETOPERAND(query);
+   infix(&nrm, true);
+
+   PG_FREE_IF_COPY(query, 0);
+   PG_RETURN_POINTER(nrm.buf);
+}
+
+/*
+ * debug function, used only for view query
+ * which will be executed in non-leaf pages in index
+ */
+Datum
+tsquerytree(PG_FUNCTION_ARGS)
+{
+   QUERYTYPE  *query = (QUERYTYPE *) DatumGetPointer(PG_DETOAST_DATUM(PG_GETARG_DATUM(0)));
+   INFIX       nrm;
+   text       *res;
+   ITEM       *q;
+   int4        len;
+
+
+   if (query->size == 0)
+   {
+       res = (text *) palloc(VARHDRSZ);
+       VARATT_SIZEP(res) = VARHDRSZ;
+       PG_RETURN_POINTER(res);
+   }
+
+   q = clean_NOT_v2(GETQUERY(query), &len);
+
+   if (!q)
+   {
+       res = (text *) palloc(1 + VARHDRSZ);
+       VARATT_SIZEP(res) = 1 + VARHDRSZ;
+       *((char *) VARDATA(res)) = 'T';
+   }
+   else
+   {
+       nrm.curpol = q;
+       nrm.buflen = 32;
+       nrm.cur = nrm.buf = (char *) palloc(sizeof(char) * nrm.buflen);
+       *(nrm.cur) = '\0';
+       nrm.op = GETOPERAND(query);
+       infix(&nrm, true);
+
+       res = (text *) palloc(nrm.cur - nrm.buf + VARHDRSZ);
+       VARATT_SIZEP(res) = nrm.cur - nrm.buf + VARHDRSZ;
+       strncpy(VARDATA(res), nrm.buf, nrm.cur - nrm.buf);
+       pfree(q);
+   }
+
+   PG_FREE_IF_COPY(query, 0);
+
+   PG_RETURN_POINTER(res);
+}
+
+Datum
+to_tsquery(PG_FUNCTION_ARGS) {
+   text    *in = PG_GETARG_TEXT_P(1);
+   char *str;
+   QUERYTYPE  *query;
+   ITEM       *res;
+   int4        len;
+
+   str=text2char(in);
+   PG_FREE_IF_COPY(in,1);
+
+   query = queryin(str, pushval_morph, PG_GETARG_INT32(0));
+   res = clean_fakeval_v2(GETQUERY(query), &len);
+   if (!res)
+   {
+       query->len = HDRSIZEQT;
+       query->size = 0;
+       PG_RETURN_POINTER(query);
+   }
+   memcpy((void *) GETQUERY(query), (void *) res, len * sizeof(ITEM));
+   pfree(res);
+   PG_RETURN_POINTER(query);
+}
+
+Datum
+to_tsquery_name(PG_FUNCTION_ARGS) {
+   text *name=PG_GETARG_TEXT_P(0);
+   Datum res= DirectFunctionCall2(
+       to_tsquery,
+       Int32GetDatum( name2id_cfg(name) ),
+       PG_GETARG_DATUM(1)
+   );
+   
+   PG_FREE_IF_COPY(name,1);
+   PG_RETURN_DATUM(res);
+}
+
+Datum
+to_tsquery_current(PG_FUNCTION_ARGS) {
+   PG_RETURN_DATUM( DirectFunctionCall2(
+       to_tsquery,
+       Int32GetDatum( get_currcfg() ),
+       PG_GETARG_DATUM(0)
+   ));
+}
+
+


diff --git a/contrib/tsearch2/query.h b/contrib/tsearch2/query.h

new file mode 100644 (file)

index 0000000..c0715a2


--- /dev/null
+++ b/contrib/tsearch2/query.h
@@ -0,0 +1,55 @@
+#ifndef __QUERY_H__
+#define __QUERY_H__
+/*
+#define BS_DEBUG
+*/
+
+
+/*
+ * item in polish notation with back link
+ * to left operand
+ */
+typedef struct ITEM
+{
+   int8        type;
+   int8        weight;
+   int2        left;
+   int4        val;
+   /* user-friendly value, must correlate with WordEntry */
+   uint32  
+       unused:1,
+       length:11,
+       distance:20;
+}  ITEM;
+
+/*
+ *Storage:
+ * (len)(size)(array of ITEM)(array of operand in user-friendly form)
+ */
+typedef struct
+{
+   int4        len;
+   int4        size;
+   char        data[1];
+}  QUERYTYPE;
+
+#define HDRSIZEQT  ( 2*sizeof(int4) )
+#define COMPUTESIZE(size,lenofoperand) ( HDRSIZEQT + size * sizeof(ITEM) + lenofoperand )
+#define GETQUERY(x)  (ITEM*)( (char*)(x)+HDRSIZEQT )
+#define GETOPERAND(x)  ( (char*)GETQUERY(x) + ((QUERYTYPE*)x)->size * sizeof(ITEM) )
+
+#define ISOPERATOR(x) ( (x)=='!' || (x)=='&' || (x)=='|' || (x)=='(' || (x)==')' )
+
+#define END                0
+#define ERR                1
+#define VAL                2
+#define OPR                3
+#define OPEN           4
+#define CLOSE          5
+#define VALTRUE            6       /* for stop words */
+#define VALFALSE       7
+
+bool TS_execute(ITEM * curitem, void *checkval,
+       bool calcnot, bool (*chkcond) (void *checkval, ITEM * val));
+
+#endif


diff --git a/contrib/tsearch2/rank.c b/contrib/tsearch2/rank.c

new file mode 100644 (file)

index 0000000..b73f400


--- /dev/null
+++ b/contrib/tsearch2/rank.c
@@ -0,0 +1,591 @@
+/*
+ * Relevation
+ * Teodor Sigaev 
+ */
+#include "postgres.h"
+#include 
+
+#include "access/gist.h"
+#include "access/itup.h"
+#include "utils/elog.h"
+#include "utils/palloc.h"
+#include "utils/builtins.h"
+#include "fmgr.h"
+#include "funcapi.h"
+#include "storage/bufpage.h"
+#include "executor/spi.h"
+#include "commands/trigger.h"
+#include "nodes/pg_list.h"
+#include "catalog/namespace.h"
+
+#include "utils/array.h"
+
+#include "tsvector.h"
+#include "query.h"
+#include "common.h"
+
+PG_FUNCTION_INFO_V1(rank);
+Datum      rank(PG_FUNCTION_ARGS);
+
+PG_FUNCTION_INFO_V1(rank_def);
+Datum      rank_def(PG_FUNCTION_ARGS);
+
+PG_FUNCTION_INFO_V1(rank_cd);
+Datum      rank_cd(PG_FUNCTION_ARGS);
+
+PG_FUNCTION_INFO_V1(rank_cd_def);
+Datum      rank_cd_def(PG_FUNCTION_ARGS);
+
+PG_FUNCTION_INFO_V1(get_covers);
+Datum      get_covers(PG_FUNCTION_ARGS);
+
+static float weights[]={0.1, 0.2, 0.4, 1.0};
+
+#define wpos(wep)  ( w[ ((WordEntryPos*)(wep))->weight ] )
+
+#define DEF_NORM_METHOD    0
+
+/*
+ * Returns a weight of a word collocation
+ */
+static float4 word_distance ( int4 w ) {
+   if ( w>100 )
+   return 1e-30;
+
+   return 1.0/(1.005+0.05*exp( ((float4)w)/1.5-2) );
+}
+
+static int
+cnt_length( tsvector *t ) {
+   WordEntry   *ptr=ARRPTR(t), *end=(WordEntry*)STRPTR(t);
+   int len = 0, clen;
+
+   while(ptr < end) {
+       if ( (clen=POSDATALEN(t, ptr)) == 0 )
+           len += 1;
+       else
+           len += clen;
+       ptr++;
+   }
+
+   return len;
+}
+
+static int4
+WordECompareITEM(char *eval, char *qval, WordEntry * ptr, ITEM * item) {
+        if (ptr->len == item->length)
+                return strncmp(
+                                           eval + ptr->pos,
+                                           qval + item->distance,
+                                           item->length);
+
+        return (ptr->len > item->length) ? 1 : -1;
+}
+
+static WordEntry*
+find_wordentry(tsvector *t, QUERYTYPE *q, ITEM *item) {
+        WordEntry  *StopLow = ARRPTR(t);
+        WordEntry  *StopHigh = (WordEntry*)STRPTR(t);
+        WordEntry  *StopMiddle;
+        int                     difference;
+
+        /* Loop invariant: StopLow <= item < StopHigh */
+
+        while (StopLow < StopHigh)
+        {
+                StopMiddle = StopLow + (StopHigh - StopLow) / 2;
+                difference = WordECompareITEM(STRPTR(t), GETOPERAND(q), StopMiddle, item);
+                if (difference == 0)
+                        return StopMiddle;
+                else if (difference < 0)
+                        StopLow = StopMiddle + 1;
+                else
+                        StopHigh = StopMiddle;
+        }
+
+        return NULL;
+}
+
+static WordEntryPos    POSNULL[]={
+   {0,0},
+   {0,MAXENTRYPOS-1}
+};
+
+static float
+calc_rank_and(float *w, tsvector *t, QUERYTYPE *q) {
+   uint16 **pos=(uint16**)palloc(sizeof(uint16*) * q->size);
+   int i,k,l,p;
+   WordEntry *entry;
+   WordEntryPos    *post,*ct;
+   int4    dimt,lenct,dist;
+   float res=-1.0;
+   ITEM    *item=GETQUERY(q);
+
+   memset(pos,0,sizeof(uint16**) * q->size);
+   *(uint16*)POSNULL = lengthof(POSNULL)-1;
+
+   for(i=0; isize; i++) {
+       
+       if ( item[i].type != VAL )
+           continue;
+
+       entry=find_wordentry(t,q,&(item[i]));
+       if ( !entry )
+           continue;
+
+       if ( entry->haspos )
+           pos[i] = (uint16*)_POSDATAPTR(t,entry);
+       else
+           pos[i] = (uint16*)POSNULL;
+
+
+       dimt = *(uint16*)(pos[i]);
+       post = (WordEntryPos*)(pos[i]+1);
+       for( k=0; k
+           if ( !pos[k] ) continue;
+           lenct = *(uint16*)(pos[k]);
+           ct = (WordEntryPos*)(pos[k]+1);
+           for(l=0; l
+               for(p=0; p
+                   dist = abs( post[l].pos - ct[p].pos );
+                   if ( dist || (dist==0 && (pos[i]==(uint16*)POSNULL || pos[k]==(uint16*)POSNULL) ) ) {
+                       float curw; 
+                       if ( !dist ) dist=MAXENTRYPOS;  
+                       curw= sqrt( wpos(&(post[l])) * wpos( &(ct[p]) ) * word_distance(dist) );
+                       res = ( res < 0 ) ? curw : 1.0 - ( 1.0 - res ) * ( 1.0 - curw );
+                   }
+               }
+           }
+       }
+   }
+   pfree(pos);
+   return res; 
+}
+
+static float
+calc_rank_or(float *w, tsvector *t, QUERYTYPE *q) {
+   WordEntry *entry;
+   WordEntryPos    *post;
+   int4    dimt,j,i;
+   float res=-1.0;
+   ITEM    *item=GETQUERY(q);
+
+   *(uint16*)POSNULL = lengthof(POSNULL)-1;
+
+   for(i=0; isize; i++) {
+       if ( item[i].type != VAL )
+           continue;
+
+       entry=find_wordentry(t,q,&(item[i]));
+       if ( !entry )
+           continue;
+
+       if ( entry->haspos ) {
+           dimt = POSDATALEN(t,entry);
+           post = POSDATAPTR(t,entry);
+       } else {
+           dimt = *(uint16*)POSNULL;
+           post = POSNULL+1;
+       }
+
+       for(j=0;j
+           if ( res < 0 )
+               res = wpos( &(post[j]) );
+           else
+               res = 1.0 - ( 1.0-res ) * ( 1.0-wpos( &(post[j]) ) );
+       }
+   }
+   return res;
+}
+
+static float
+calc_rank(float *w, tsvector *t, QUERYTYPE *q, int4 method) {
+   ITEM *item = GETQUERY(q);
+   float res=0.0;
+
+   if (!t->size || !q->size)
+       return 0.0;
+
+   res = ( item->type != VAL && item->val == (int4) '&' ) ?
+       calc_rank_and(w,t,q) : calc_rank_or(w,t,q);
+
+   if ( res < 0 )
+       res = 1e-20;
+
+        switch(method) {
+       case 0: break;
+       case 1: res /= log((float)cnt_length(t)); break;
+       case 2: res /= (float)cnt_length(t); break;
+       default:
+       elog(ERROR,"Unknown normalization method: %d",method);
+        }
+
+   return res;
+}
+
+Datum
+rank(PG_FUNCTION_ARGS) {
+   ArrayType  *win = (ArrayType *) PG_DETOAST_DATUM(PG_GETARG_DATUM(0));
+   tsvector       *txt = (tsvector *) PG_DETOAST_DATUM(PG_GETARG_DATUM(1));
+   QUERYTYPE  *query = (QUERYTYPE *) PG_DETOAST_DATUM(PG_GETARG_DATUM(2));
+   int method=DEF_NORM_METHOD;
+   float res=0.0;
+   float ws[ lengthof(weights) ];
+   int i;
+
+   if ( ARR_NDIM(win) != 1 ) 
+       elog(ERROR,"Array of weight is not one dimentional");
+   if ( ARRNELEMS(win) < lengthof(weights) )
+        elog(ERROR,"Array of weight is too short");
+
+   for(i=0;i
+       ws[ i ] = ( ((float4*)ARR_DATA_PTR(win))[i] >= 0 ) ? ((float4*)ARR_DATA_PTR(win))[i] : weights[i];
+       if ( ws[ i ] > 1.0 ) 
+           elog(ERROR,"Weight out of range");
+   } 
+
+   if ( PG_NARGS() == 4 )
+       method=PG_GETARG_INT32(3);
+
+   res=calc_rank(ws, txt, query, method); 
+       
+   PG_FREE_IF_COPY(win, 0);
+   PG_FREE_IF_COPY(txt, 1);
+   PG_FREE_IF_COPY(query, 2);
+   PG_RETURN_FLOAT4(res);
+}
+
+Datum
+rank_def(PG_FUNCTION_ARGS) {
+   tsvector       *txt = (tsvector *) PG_DETOAST_DATUM(PG_GETARG_DATUM(0));
+   QUERYTYPE  *query = (QUERYTYPE *) PG_DETOAST_DATUM(PG_GETARG_DATUM(1));
+   float res=0.0;
+   int method=DEF_NORM_METHOD;
+
+   if ( PG_NARGS() == 3 )
+       method=PG_GETARG_INT32(2);
+
+   res=calc_rank(weights, txt, query, method); 
+       
+   PG_FREE_IF_COPY(txt, 0);
+   PG_FREE_IF_COPY(query, 1);
+   PG_RETURN_FLOAT4(res);
+}
+
+
+typedef struct {
+   ITEM    *item;
+   int32   pos;
+} DocRepresentation;
+
+static int
+compareDocR(const void *a, const void *b) {
+   if ( ((DocRepresentation *) a)->pos == ((DocRepresentation *) b)->pos )
+       return 1;
+   return ( ((DocRepresentation *) a)->pos > ((DocRepresentation *) b)->pos ) ? 1 : -1;
+}
+
+
+typedef struct {
+   DocRepresentation *doc;
+   int len;
+}  ChkDocR;
+
+static bool
+checkcondition_DR(void *checkval, ITEM *val) {
+   DocRepresentation *ptr = ((ChkDocR*)checkval)->doc;
+
+   while( ptr - ((ChkDocR*)checkval)->doc < ((ChkDocR*)checkval)->len ) {
+       if ( val == ptr->item )
+           return true;
+       ptr++;
+   }   
+
+   return false;
+}
+
+
+static bool
+Cover(DocRepresentation *doc, int len, QUERYTYPE *query, int *pos, int *p, int *q) {
+   int i;
+   DocRepresentation   *ptr,*f=(DocRepresentation*)0xffffffff;
+   ITEM    *item=GETQUERY(query);
+   int lastpos=*pos;
+   int oldq=*q;
+
+   *p=0x7fffffff;
+   *q=0;
+
+   for(i=0; isize; i++) {
+       if ( item->type != VAL ) {
+           item++;
+           continue;
+       }
+       ptr = doc + *pos;
+
+       while(ptr-doc
+           if ( ptr->item == item ) {
+               if ( ptr->pos > *q ) {
+                   *q = ptr->pos;
+                   lastpos= ptr - doc;
+               } 
+               break;
+           } 
+           ptr++;
+       }
+
+       item++;
+   }
+
+   if (*q==0 )
+       return false;
+
+   if (*q==oldq) { /* already check this pos */
+       (*pos)++;
+       return Cover(doc, len, query, pos,p,q);
+   } 
+
+   item=GETQUERY(query);
+   for(i=0; isize; i++) {
+       if ( item->type != VAL ) {
+           item++;
+           continue;
+       }
+       ptr = doc + lastpos;
+
+       while(ptr>=doc+*pos) {
+           if ( ptr->item == item ) {
+               if ( ptr->pos < *p ) {
+                   *p = ptr->pos;
+                   f=ptr;
+               }
+               break;
+           }
+           ptr--;
+       }
+       item++;
+   }
+ 
+   if ( *p<=*q ) {
+       ChkDocR ch = { f, (doc + lastpos)-f+1 };
+       *pos = f-doc+1;
+       if ( TS_execute(GETQUERY(query), &ch, false, checkcondition_DR) ) { 
+ /*elog(NOTICE,"OP:%d NP:%d P:%d Q:%d", *pos, lastpos, *p, *q);*/ 
+           return true;
+       } else
+           return Cover(doc, len, query, pos,p,q); 
+   }
+ 
+   return false;
+}
+
+static DocRepresentation*
+get_docrep(tsvector     *txt, QUERYTYPE  *query, int *doclen) {
+   ITEM    *item=GETQUERY(query);
+   WordEntry *entry;
+   WordEntryPos    *post;
+   int4    dimt,j,i;
+   int len=query->size*4,cur=0;
+   DocRepresentation *doc;
+
+   *(uint16*)POSNULL = lengthof(POSNULL)-1;
+   doc = (DocRepresentation*)palloc(sizeof(DocRepresentation)*len);
+   for(i=0; isize; i++) {
+       if ( item[i].type != VAL )
+           continue;
+
+       entry=find_wordentry(txt,query,&(item[i]));
+       if ( !entry )
+           continue;
+
+       if ( entry->haspos ) {
+           dimt = POSDATALEN(txt,entry);
+           post = POSDATAPTR(txt,entry);
+       } else {
+           dimt = *(uint16*)POSNULL;
+           post = POSNULL+1;
+       }
+
+       while( cur+dimt >= len ) {
+           len*=2;
+           doc = (DocRepresentation*)repalloc(doc,sizeof(DocRepresentation)*len);
+       }
+
+       for(j=0;j
+           doc[cur].item=&(item[i]);
+           doc[cur].pos=post[j].pos;
+           cur++;
+       }
+   }
+
+   *doclen=cur;
+   
+   if ( cur>0 ) {
+       if ( cur>1 ) 
+           qsort((void *) doc, cur, sizeof(DocRepresentation), compareDocR);
+       return doc;
+   }
+   
+   pfree(doc);
+   return NULL;
+}
+
+
+Datum
+rank_cd(PG_FUNCTION_ARGS) {
+   int K = PG_GETARG_INT32(0);
+   tsvector       *txt = (tsvector *) PG_DETOAST_DATUM(PG_GETARG_DATUM(1));
+   QUERYTYPE  *query = (QUERYTYPE *) PG_DETOAST_DATUM(PG_GETARG_DATUM(2));
+   int method=DEF_NORM_METHOD;
+   DocRepresentation   *doc;
+   float   res=0.0;
+   int p=0,q=0,len,cur;
+
+   doc = get_docrep(txt, query, &len);
+   if ( !doc ) {
+       PG_FREE_IF_COPY(txt, 1);
+       PG_FREE_IF_COPY(query, 2);
+       PG_RETURN_FLOAT4(0.0);
+   }
+
+   cur=0;
+   if (K<=0)
+       K=4;    
+   while( Cover(doc, len, query, &cur, &p, &q) ) 
+       res += ( q-p+1 > K ) ? ((float)K)/((float)(q-p+1)) : 1.0;
+
+   if ( PG_NARGS() == 4 )
+       method=PG_GETARG_INT32(3);
+
+        switch(method) {
+       case 0: break;
+       case 1: res /= log((float)cnt_length(txt)); break;
+       case 2: res /= (float)cnt_length(txt); break;
+       default:
+       elog(ERROR,"Unknown normalization method: %d",method);
+        }
+
+   pfree(doc);
+   PG_FREE_IF_COPY(txt, 1);
+   PG_FREE_IF_COPY(query, 2);
+
+   PG_RETURN_FLOAT4(res);
+}
+
+
+Datum
+rank_cd_def(PG_FUNCTION_ARGS) {
+   PG_RETURN_DATUM( DirectFunctionCall4(   
+       rank_cd,
+       Int32GetDatum(-1),
+       PG_GETARG_DATUM(0),
+       PG_GETARG_DATUM(1),
+       ( PG_NARGS() == 3 ) ? PG_GETARG_DATUM(2) : Int32GetDatum(DEF_NORM_METHOD)
+   )); 
+}
+
+/**************debug*************/
+
+typedef struct {
+   char    *w;
+   int2    len;
+   int2    pos;
+   int2    start;
+   int2    finish;
+} DocWord;
+
+static int
+compareDocWord(const void *a, const void *b) {
+   if ( ((DocWord *) a)->pos == ((DocWord *) b)->pos )
+       return 1;
+   return ( ((DocWord *) a)->pos > ((DocWord *) b)->pos ) ? 1 : -1;
+}
+
+
+Datum 
+get_covers(PG_FUNCTION_ARGS) {
+   tsvector     *txt = (tsvector *) PG_DETOAST_DATUM(PG_GETARG_DATUM(0));
+   QUERYTYPE  *query = (QUERYTYPE *) PG_DETOAST_DATUM(PG_GETARG_DATUM(1));
+   WordEntry       *pptr=ARRPTR(txt);
+   int i,dlen=0,j,cur=0,len=0,rlen;
+   DocWord *dw,*dwptr;
+   text    *out;
+   char *cptr;
+   DocRepresentation *doc;
+   int pos=0,p,q,olddwpos=0;
+   int ncover=1;
+
+   doc = get_docrep(txt, query, &rlen);
+
+   if ( !doc ) {
+       out=palloc(VARHDRSZ);
+       VARATT_SIZEP(out) = VARHDRSZ;
+       PG_FREE_IF_COPY(txt,0);
+       PG_FREE_IF_COPY(query,1);
+       PG_RETURN_POINTER(out);
+   }
+
+   for(i=0;isize;i++) {
+       if (!pptr[i].haspos)
+           elog(ERROR,"No pos info");
+        dlen += POSDATALEN(txt,&(pptr[i]));
+   }
+
+   dwptr=dw=palloc(sizeof(DocWord)*dlen);
+   memset(dw,0,sizeof(DocWord)*dlen);
+
+   for(i=0;isize;i++) {
+       WordEntryPos    *posdata = POSDATAPTR(txt,&(pptr[i]));
+       for(j=0;j
+           dw[cur].w=STRPTR(txt)+pptr[i].pos;  
+           dw[cur].len=pptr[i].len;    
+           dw[cur].pos=posdata[j].pos;
+           cur++;
+       }
+       len+=(pptr[i].len + 1) * (int)POSDATALEN(txt,&(pptr[i]));
+   }
+   qsort((void *) dw, dlen, sizeof(DocWord), compareDocWord);
+
+   while( Cover(doc, rlen, query, &pos, &p, &q) ) {
+       dwptr=dw+olddwpos;
+       while(dwptr->pos < p && dwptr-dw
+           dwptr++;
+       olddwpos=dwptr-dw;
+       dwptr->start=ncover;
+       while(dwptr->pos < q+1 && dwptr-dw
+           dwptr++;
+       (dwptr-1)->finish=ncover;
+       len+= 4 /* {}+two spaces */ + 2*16 /*numbers*/;
+       ncover++; 
+   } 
+   
+   out=palloc(VARHDRSZ+len);
+   cptr=((char*)out)+VARHDRSZ;
+   dwptr=dw;
+
+   while( dwptr-dw < dlen) {
+       if ( dwptr->start ) {
+           sprintf(cptr,"{%d ",dwptr->start);
+           cptr=strchr(cptr,'\0');
+       }
+       memcpy(cptr,dwptr->w,dwptr->len);
+       cptr+=dwptr->len;
+       *cptr=' ';
+       cptr++;
+       if ( dwptr->finish ) { 
+           sprintf(cptr,"}%d ",dwptr->finish);
+           cptr=strchr(cptr,'\0');
+       }
+       dwptr++;
+   }   
+
+   VARATT_SIZEP(out) = cptr - ((char*)out);
+   
+   pfree(dw);
+   pfree(doc);
+
+   PG_FREE_IF_COPY(txt,0);
+   PG_FREE_IF_COPY(query,1);
+   PG_RETURN_POINTER(out);
+}
+


diff --git a/contrib/tsearch2/rewrite.c b/contrib/tsearch2/rewrite.c

new file mode 100644 (file)

index 0000000..d5bc0f6


--- /dev/null
+++ b/contrib/tsearch2/rewrite.c
@@ -0,0 +1,292 @@
+/*
+ * Rewrite routines of query tree
+ * Teodor Sigaev 
+ */
+
+#include "postgres.h"
+
+#include 
+
+#include "access/gist.h"
+#include "access/itup.h"
+#include "access/rtree.h"
+#include "utils/elog.h"
+#include "utils/palloc.h"
+#include "utils/array.h"
+#include "utils/builtins.h"
+#include "storage/bufpage.h"
+
+#include "query.h"
+#include "rewrite.h"
+
+typedef struct NODE
+{
+   struct NODE *left;
+   struct NODE *right;
+   ITEM       *valnode;
+}  NODE;
+
+/*
+ * make query tree from plain view of query
+ */
+static NODE *
+maketree(ITEM * in)
+{
+   NODE       *node = (NODE *) palloc(sizeof(NODE));
+
+   node->valnode = in;
+   node->right = node->left = NULL;
+   if (in->type == OPR)
+   {
+       node->right = maketree(in + 1);
+       if (in->val != (int4) '!')
+           node->left = maketree(in + in->left);
+   }
+   return node;
+}
+
+typedef struct
+{
+   ITEM       *ptr;
+   int4        len;
+   int4        cur;
+}  PLAINTREE;
+
+static void
+plainnode(PLAINTREE * state, NODE * node)
+{
+   if (state->cur == state->len)
+   {
+       state->len *= 2;
+       state->ptr = (ITEM *) repalloc((void *) state->ptr, state->len * sizeof(ITEM));
+   }
+   memcpy((void *) &(state->ptr[state->cur]), (void *) node->valnode, sizeof(ITEM));
+   if (node->valnode->type == VAL)
+       state->cur++;
+   else if (node->valnode->val == (int4) '!')
+   {
+       state->ptr[state->cur].left = 1;
+       state->cur++;
+       plainnode(state, node->right);
+   }
+   else
+   {
+       int4        cur = state->cur;
+
+       state->cur++;
+       plainnode(state, node->right);
+       state->ptr[cur].left = state->cur - cur;
+       plainnode(state, node->left);
+   }
+   pfree(node);
+}
+
+/*
+ * make plain view of tree from 'normal' view of tree
+ */
+static ITEM *
+plaintree(NODE * root, int4 *len)
+{
+   PLAINTREE   pl;
+
+   pl.cur = 0;
+   pl.len = 16;
+   if (root && (root->valnode->type == VAL || root->valnode->type == OPR))
+   {
+       pl.ptr = (ITEM *) palloc(pl.len * sizeof(ITEM));
+       plainnode(&pl, root);
+   }
+   else
+       pl.ptr = NULL;
+   *len = pl.cur;
+   return pl.ptr;
+}
+
+static void
+freetree(NODE * node)
+{
+   if (!node)
+       return;
+   if (node->left)
+       freetree(node->left);
+   if (node->right)
+       freetree(node->right);
+   pfree(node);
+}
+
+/*
+ * clean tree for ! operator.
+ * It's usefull for debug, but in
+ * other case, such view is used with search in index.
+ * Operator ! always return TRUE
+ */
+static NODE *
+clean_NOT_intree(NODE * node)
+{
+   if (node->valnode->type == VAL)
+       return node;
+
+   if (node->valnode->val == (int4) '!')
+   {
+       freetree(node);
+       return NULL;
+   }
+
+   /* operator & or | */
+   if (node->valnode->val == (int4) '|')
+   {
+       if ((node->left = clean_NOT_intree(node->left)) == NULL ||
+           (node->right = clean_NOT_intree(node->right)) == NULL)
+       {
+           freetree(node);
+           return NULL;
+       }
+   }
+   else
+   {
+       NODE       *res = node;
+
+       node->left = clean_NOT_intree(node->left);
+       node->right = clean_NOT_intree(node->right);
+       if (node->left == NULL && node->right == NULL)
+       {
+           pfree(node);
+           res = NULL;
+       }
+       else if (node->left == NULL)
+       {
+           res = node->right;
+           pfree(node);
+       }
+       else if (node->right == NULL)
+       {
+           res = node->left;
+           pfree(node);
+       }
+       return res;
+   }
+   return node;
+}
+
+ITEM *
+clean_NOT_v2(ITEM * ptr, int4 *len)
+{
+   NODE       *root = maketree(ptr);
+
+   return plaintree(clean_NOT_intree(root), len);
+}
+
+#define V_UNKNOWN  0
+#define V_TRUE     1
+#define V_FALSE        2
+
+/*
+ * Clean query tree from values which is always in
+ * text (stopword)
+ */
+static NODE *
+clean_fakeval_intree(NODE * node, char *result)
+{
+   char        lresult = V_UNKNOWN,
+               rresult = V_UNKNOWN;
+
+   if (node->valnode->type == VAL)
+       return node;
+   else if (node->valnode->type == VALTRUE)
+   {
+       pfree(node);
+       *result = V_TRUE;
+       return NULL;
+   }
+
+
+   if (node->valnode->val == (int4) '!')
+   {
+       node->right = clean_fakeval_intree(node->right, &rresult);
+       if (!node->right)
+       {
+           *result = (rresult == V_TRUE) ? V_FALSE : V_TRUE;
+           freetree(node);
+           return NULL;
+       }
+   }
+   else if (node->valnode->val == (int4) '|')
+   {
+       NODE       *res = node;
+
+       node->left = clean_fakeval_intree(node->left, &lresult);
+       node->right = clean_fakeval_intree(node->right, &rresult);
+       if (lresult == V_TRUE || rresult == V_TRUE)
+       {
+           freetree(node);
+           *result = V_TRUE;
+           return NULL;
+       }
+       else if (lresult == V_FALSE && rresult == V_FALSE)
+       {
+           freetree(node);
+           *result = V_FALSE;
+           return NULL;
+       }
+       else if (lresult == V_FALSE)
+       {
+           res = node->right;
+           pfree(node);
+       }
+       else if (rresult == V_FALSE)
+       {
+           res = node->left;
+           pfree(node);
+       }
+       return res;
+   }
+   else
+   {
+       NODE       *res = node;
+
+       node->left = clean_fakeval_intree(node->left, &lresult);
+       node->right = clean_fakeval_intree(node->right, &rresult);
+       if (lresult == V_FALSE || rresult == V_FALSE)
+       {
+           freetree(node);
+           *result = V_FALSE;
+           return NULL;
+       }
+       else if (lresult == V_TRUE && rresult == V_TRUE)
+       {
+           freetree(node);
+           *result = V_TRUE;
+           return NULL;
+       }
+       else if (lresult == V_TRUE)
+       {
+           res = node->right;
+           pfree(node);
+       }
+       else if (rresult == V_TRUE)
+       {
+           res = node->left;
+           pfree(node);
+       }
+       return res;
+   }
+   return node;
+}
+
+ITEM *
+clean_fakeval_v2(ITEM * ptr, int4 *len)
+{
+   NODE       *root = maketree(ptr);
+   char        result = V_UNKNOWN;
+   NODE       *resroot;
+
+   resroot = clean_fakeval_intree(root, &result);
+   if (result != V_UNKNOWN)
+   {
+       elog(NOTICE, "Query contains only stopword(s) or doesn't contain lexem(s), ignored");
+       *len = 0;
+       return NULL;
+   }
+
+   return plaintree(resroot, len);
+}


diff --git a/contrib/tsearch2/rewrite.h b/contrib/tsearch2/rewrite.h

new file mode 100644 (file)

index 0000000..d47788a


--- /dev/null
+++ b/contrib/tsearch2/rewrite.h
@@ -0,0 +1,7 @@
+#ifndef __REWRITE_H__
+#define __REWRITE_H__
+
+ITEM      *clean_NOT_v2(ITEM * ptr, int4 *len);
+ITEM      *clean_fakeval_v2(ITEM * ptr, int4 *len);
+
+#endif


diff --git a/contrib/tsearch2/snmap.c b/contrib/tsearch2/snmap.c

new file mode 100644 (file)

index 0000000..fe138ad


--- /dev/null
+++ b/contrib/tsearch2/snmap.c
@@ -0,0 +1,75 @@
+/* 
+ * simple but fast map from str to Oid
+ * Teodor Sigaev 
+ */
+#include 
+#include 
+#include 
+
+#include "postgres.h"
+#include "snmap.h"
+#include "common.h"
+
+static int
+compareSNMapEntry(const void *a, const void *b) {
+   return strcmp( ((SNMapEntry*)a)->key, ((SNMapEntry*)b)->key );
+}
+
+void 
+addSNMap( SNMap *map, char *key, Oid value ) {
+   if (map->len>=map->reallen) {
+       SNMapEntry *tmp;
+       int len = (map->reallen) ? 2*map->reallen : 16;
+       tmp=(SNMapEntry*)realloc(map->list, sizeof(SNMapEntry) * len);
+       if ( !tmp )
+           elog(ERROR, "No memory");
+       map->reallen=len;
+       map->list=tmp;
+   }
+   map->list[ map->len ].key = strdup(key);
+   if ( ! map->list[ map->len ].key )
+       elog(ERROR, "No memory");
+   map->list[ map->len ].value=value;
+   map->len++;
+   if ( map->len>1 ) qsort(map->list, map->len, sizeof(SNMapEntry), compareSNMapEntry);
+}
+
+void 
+addSNMap_t( SNMap *map, text *key, Oid value ) {
+   char *k=text2char( key );
+   addSNMap(map, k, value);
+   pfree(k);
+}
+
+Oid 
+findSNMap( SNMap *map, char *key ) {
+   SNMapEntry *ptr;
+   SNMapEntry ks = {key, 0};
+   if ( map->len==0 || !map->list )
+       return 0;   
+   ptr = (SNMapEntry*) bsearch(&ks, map->list, map->len, sizeof(SNMapEntry), compareSNMapEntry);
+   return (ptr) ? ptr->value : 0;
+}
+
+Oid  
+findSNMap_t( SNMap *map, text *key ) {
+   char *k=text2char(key);
+   int res;
+   res= findSNMap(map, k);
+   pfree(k);
+   return res;
+}
+
+void freeSNMap( SNMap *map ) {
+   SNMapEntry *entry=map->list;
+   if ( map->list ) {
+       while( map->len ) {
+           if ( entry->key ) free(entry->key);
+           entry++; map->len--;
+       }
+       free( map->list );
+   }
+   memset(map,0,sizeof(SNMap));
+}
+
+


diff --git a/contrib/tsearch2/snmap.h b/contrib/tsearch2/snmap.h

new file mode 100644 (file)

index 0000000..b485601


--- /dev/null
+++ b/contrib/tsearch2/snmap.h
@@ -0,0 +1,23 @@
+#ifndef __SNMAP_H__
+#define __SNMAP_H__
+
+#include "postgres.h"
+
+typedef struct {
+   char    *key;
+   Oid value;
+} SNMapEntry;
+
+typedef struct {
+   int len;
+   int reallen;
+   SNMapEntry  *list;
+} SNMap;
+
+void addSNMap( SNMap *map, char *key, Oid value );
+void addSNMap_t( SNMap *map, text *key, Oid value );
+Oid findSNMap( SNMap *map, char *key );
+Oid findSNMap_t( SNMap *map, text *key );
+void freeSNMap( SNMap *map );
+
+#endif


diff --git a/contrib/tsearch2/snowball/api.c b/contrib/tsearch2/snowball/api.c

new file mode 100644 (file)

index 0000000..c9019ce


--- /dev/null
+++ b/contrib/tsearch2/snowball/api.c
@@ -0,0 +1,48 @@
+
+#include "header.h"
+
+extern struct SN_env * SN_create_env(int S_size, int I_size, int B_size)
+{   struct SN_env * z = (struct SN_env *) calloc(1, sizeof(struct SN_env));
+    z->p = create_s();
+    if (S_size)
+    {   z->S = (symbol * *) calloc(S_size, sizeof(symbol *));
+        {   int i;
+            for (i = 0; i < S_size; i++) z->S[i] = create_s();
+        }
+        z->S_size = S_size;
+    }
+
+    if (I_size)
+    {   z->I = (int *) calloc(I_size, sizeof(int));
+        z->I_size = I_size;
+    }
+
+    if (B_size)
+    {   z->B = (symbol *) calloc(B_size, sizeof(symbol));
+        z->B_size = B_size;
+    }
+
+    return z;
+}
+
+extern void SN_close_env(struct SN_env * z)
+{
+    if (z->S_size)
+    {
+        {   int i;
+            for (i = 0; i < z->S_size; i++) lose_s(z->S[i]);
+        }
+        free(z->S);
+    }
+    if (z->I_size) free(z->I);
+    if (z->B_size) free(z->B);
+    if (z->p) lose_s(z->p);
+    free(z);
+}
+
+extern void SN_set_current(struct SN_env * z, int size, const symbol * s)
+{
+    replace_s(z, 0, z->l, size, s);
+    z->c = 0;
+}
+


diff --git a/contrib/tsearch2/snowball/api.h b/contrib/tsearch2/snowball/api.h

new file mode 100644 (file)

index 0000000..3e8b6e1


--- /dev/null
+++ b/contrib/tsearch2/snowball/api.h
@@ -0,0 +1,27 @@
+
+typedef unsigned char symbol;
+
+/* Or replace 'char' above with 'short' for 16 bit characters.
+
+   More precisely, replace 'char' with whatever type guarantees the
+   character width you need. Note however that sizeof(symbol) should divide
+   HEAD, defined in header.h as 2*sizeof(int), without remainder, otherwise
+   there is an alignment problem. In the unlikely event of a problem here,
+   consult Martin Porter.
+
+*/
+
+struct SN_env {
+    symbol * p;
+    int c; int a; int l; int lb; int bra; int ket;
+    int S_size; int I_size; int B_size;
+    symbol * * S;
+    int * I;
+    symbol * B;
+};
+
+extern struct SN_env * SN_create_env(int S_size, int I_size, int B_size);
+extern void SN_close_env(struct SN_env * z);
+
+extern void SN_set_current(struct SN_env * z, int size, const symbol * s);
+


diff --git a/contrib/tsearch2/snowball/english_stem.c b/contrib/tsearch2/snowball/english_stem.c

new file mode 100644 (file)

index 0000000..6715c7c


--- /dev/null
+++ b/contrib/tsearch2/snowball/english_stem.c
@@ -0,0 +1,894 @@
+
+/* This file was generated automatically by the Snowball to ANSI C compiler */
+
+#include "header.h"
+
+extern int english_stem(struct SN_env * z);
+static int r_exception2(struct SN_env * z);
+static int r_exception1(struct SN_env * z);
+static int r_Step_5(struct SN_env * z);
+static int r_Step_4(struct SN_env * z);
+static int r_Step_3(struct SN_env * z);
+static int r_Step_2(struct SN_env * z);
+static int r_Step_1c(struct SN_env * z);
+static int r_Step_1b(struct SN_env * z);
+static int r_Step_1a(struct SN_env * z);
+static int r_R2(struct SN_env * z);
+static int r_R1(struct SN_env * z);
+static int r_shortv(struct SN_env * z);
+static int r_mark_regions(struct SN_env * z);
+static int r_postlude(struct SN_env * z);
+static int r_prelude(struct SN_env * z);
+
+extern struct SN_env * english_create_env(void);
+extern void english_close_env(struct SN_env * z);
+
+static symbol s_0_0[5] = { 'g', 'e', 'n', 'e', 'r' };
+
+static struct among a_0[1] =
+{
+/*  0 */ { 5, s_0_0, -1, -1, 0}
+};
+
+static symbol s_1_0[3] = { 'i', 'e', 'd' };
+static symbol s_1_1[1] = { 's' };
+static symbol s_1_2[3] = { 'i', 'e', 's' };
+static symbol s_1_3[4] = { 's', 's', 'e', 's' };
+static symbol s_1_4[2] = { 's', 's' };
+static symbol s_1_5[2] = { 'u', 's' };
+
+static struct among a_1[6] =
+{
+/*  0 */ { 3, s_1_0, -1, 2, 0},
+/*  1 */ { 1, s_1_1, -1, 3, 0},
+/*  2 */ { 3, s_1_2, 1, 2, 0},
+/*  3 */ { 4, s_1_3, 1, 1, 0},
+/*  4 */ { 2, s_1_4, 1, -1, 0},
+/*  5 */ { 2, s_1_5, 1, -1, 0}
+};
+
+static symbol s_2_1[2] = { 'b', 'b' };
+static symbol s_2_2[2] = { 'd', 'd' };
+static symbol s_2_3[2] = { 'f', 'f' };
+static symbol s_2_4[2] = { 'g', 'g' };
+static symbol s_2_5[2] = { 'b', 'l' };
+static symbol s_2_6[2] = { 'm', 'm' };
+static symbol s_2_7[2] = { 'n', 'n' };
+static symbol s_2_8[2] = { 'p', 'p' };
+static symbol s_2_9[2] = { 'r', 'r' };
+static symbol s_2_10[2] = { 'a', 't' };
+static symbol s_2_11[2] = { 't', 't' };
+static symbol s_2_12[2] = { 'i', 'z' };
+
+static struct among a_2[13] =
+{
+/*  0 */ { 0, 0, -1, 3, 0},
+/*  1 */ { 2, s_2_1, 0, 2, 0},
+/*  2 */ { 2, s_2_2, 0, 2, 0},
+/*  3 */ { 2, s_2_3, 0, 2, 0},
+/*  4 */ { 2, s_2_4, 0, 2, 0},
+/*  5 */ { 2, s_2_5, 0, 1, 0},
+/*  6 */ { 2, s_2_6, 0, 2, 0},
+/*  7 */ { 2, s_2_7, 0, 2, 0},
+/*  8 */ { 2, s_2_8, 0, 2, 0},
+/*  9 */ { 2, s_2_9, 0, 2, 0},
+/* 10 */ { 2, s_2_10, 0, 1, 0},
+/* 11 */ { 2, s_2_11, 0, 2, 0},
+/* 12 */ { 2, s_2_12, 0, 1, 0}
+};
+
+static symbol s_3_0[2] = { 'e', 'd' };
+static symbol s_3_1[3] = { 'e', 'e', 'd' };
+static symbol s_3_2[3] = { 'i', 'n', 'g' };
+static symbol s_3_3[4] = { 'e', 'd', 'l', 'y' };
+static symbol s_3_4[5] = { 'e', 'e', 'd', 'l', 'y' };
+static symbol s_3_5[5] = { 'i', 'n', 'g', 'l', 'y' };
+
+static struct among a_3[6] =
+{
+/*  0 */ { 2, s_3_0, -1, 2, 0},
+/*  1 */ { 3, s_3_1, 0, 1, 0},
+/*  2 */ { 3, s_3_2, -1, 2, 0},
+/*  3 */ { 4, s_3_3, -1, 2, 0},
+/*  4 */ { 5, s_3_4, 3, 1, 0},
+/*  5 */ { 5, s_3_5, -1, 2, 0}
+};
+
+static symbol s_4_0[4] = { 'a', 'n', 'c', 'i' };
+static symbol s_4_1[4] = { 'e', 'n', 'c', 'i' };
+static symbol s_4_2[3] = { 'o', 'g', 'i' };
+static symbol s_4_3[2] = { 'l', 'i' };
+static symbol s_4_4[3] = { 'b', 'l', 'i' };
+static symbol s_4_5[4] = { 'a', 'b', 'l', 'i' };
+static symbol s_4_6[4] = { 'a', 'l', 'l', 'i' };
+static symbol s_4_7[5] = { 'f', 'u', 'l', 'l', 'i' };
+static symbol s_4_8[6] = { 'l', 'e', 's', 's', 'l', 'i' };
+static symbol s_4_9[5] = { 'o', 'u', 's', 'l', 'i' };
+static symbol s_4_10[5] = { 'e', 'n', 't', 'l', 'i' };
+static symbol s_4_11[5] = { 'a', 'l', 'i', 't', 'i' };
+static symbol s_4_12[6] = { 'b', 'i', 'l', 'i', 't', 'i' };
+static symbol s_4_13[5] = { 'i', 'v', 'i', 't', 'i' };
+static symbol s_4_14[6] = { 't', 'i', 'o', 'n', 'a', 'l' };
+static symbol s_4_15[7] = { 'a', 't', 'i', 'o', 'n', 'a', 'l' };
+static symbol s_4_16[5] = { 'a', 'l', 'i', 's', 'm' };
+static symbol s_4_17[5] = { 'a', 't', 'i', 'o', 'n' };
+static symbol s_4_18[7] = { 'i', 'z', 'a', 't', 'i', 'o', 'n' };
+static symbol s_4_19[4] = { 'i', 'z', 'e', 'r' };
+static symbol s_4_20[4] = { 'a', 't', 'o', 'r' };
+static symbol s_4_21[7] = { 'i', 'v', 'e', 'n', 'e', 's', 's' };
+static symbol s_4_22[7] = { 'f', 'u', 'l', 'n', 'e', 's', 's' };
+static symbol s_4_23[7] = { 'o', 'u', 's', 'n', 'e', 's', 's' };
+
+static struct among a_4[24] =
+{
+/*  0 */ { 4, s_4_0, -1, 3, 0},
+/*  1 */ { 4, s_4_1, -1, 2, 0},
+/*  2 */ { 3, s_4_2, -1, 13, 0},
+/*  3 */ { 2, s_4_3, -1, 16, 0},
+/*  4 */ { 3, s_4_4, 3, 12, 0},
+/*  5 */ { 4, s_4_5, 4, 4, 0},
+/*  6 */ { 4, s_4_6, 3, 8, 0},
+/*  7 */ { 5, s_4_7, 3, 14, 0},
+/*  8 */ { 6, s_4_8, 3, 15, 0},
+/*  9 */ { 5, s_4_9, 3, 10, 0},
+/* 10 */ { 5, s_4_10, 3, 5, 0},
+/* 11 */ { 5, s_4_11, -1, 8, 0},
+/* 12 */ { 6, s_4_12, -1, 12, 0},
+/* 13 */ { 5, s_4_13, -1, 11, 0},
+/* 14 */ { 6, s_4_14, -1, 1, 0},
+/* 15 */ { 7, s_4_15, 14, 7, 0},
+/* 16 */ { 5, s_4_16, -1, 8, 0},
+/* 17 */ { 5, s_4_17, -1, 7, 0},
+/* 18 */ { 7, s_4_18, 17, 6, 0},
+/* 19 */ { 4, s_4_19, -1, 6, 0},
+/* 20 */ { 4, s_4_20, -1, 7, 0},
+/* 21 */ { 7, s_4_21, -1, 11, 0},
+/* 22 */ { 7, s_4_22, -1, 9, 0},
+/* 23 */ { 7, s_4_23, -1, 10, 0}
+};
+
+static symbol s_5_0[5] = { 'i', 'c', 'a', 't', 'e' };
+static symbol s_5_1[5] = { 'a', 't', 'i', 'v', 'e' };
+static symbol s_5_2[5] = { 'a', 'l', 'i', 'z', 'e' };
+static symbol s_5_3[5] = { 'i', 'c', 'i', 't', 'i' };
+static symbol s_5_4[4] = { 'i', 'c', 'a', 'l' };
+static symbol s_5_5[6] = { 't', 'i', 'o', 'n', 'a', 'l' };
+static symbol s_5_6[7] = { 'a', 't', 'i', 'o', 'n', 'a', 'l' };
+static symbol s_5_7[3] = { 'f', 'u', 'l' };
+static symbol s_5_8[4] = { 'n', 'e', 's', 's' };
+
+static struct among a_5[9] =
+{
+/*  0 */ { 5, s_5_0, -1, 4, 0},
+/*  1 */ { 5, s_5_1, -1, 6, 0},
+/*  2 */ { 5, s_5_2, -1, 3, 0},
+/*  3 */ { 5, s_5_3, -1, 4, 0},
+/*  4 */ { 4, s_5_4, -1, 4, 0},
+/*  5 */ { 6, s_5_5, -1, 1, 0},
+/*  6 */ { 7, s_5_6, 5, 2, 0},
+/*  7 */ { 3, s_5_7, -1, 5, 0},
+/*  8 */ { 4, s_5_8, -1, 5, 0}
+};
+
+static symbol s_6_0[2] = { 'i', 'c' };
+static symbol s_6_1[4] = { 'a', 'n', 'c', 'e' };
+static symbol s_6_2[4] = { 'e', 'n', 'c', 'e' };
+static symbol s_6_3[4] = { 'a', 'b', 'l', 'e' };
+static symbol s_6_4[4] = { 'i', 'b', 'l', 'e' };
+static symbol s_6_5[3] = { 'a', 't', 'e' };
+static symbol s_6_6[3] = { 'i', 'v', 'e' };
+static symbol s_6_7[3] = { 'i', 'z', 'e' };
+static symbol s_6_8[3] = { 'i', 't', 'i' };
+static symbol s_6_9[2] = { 'a', 'l' };
+static symbol s_6_10[3] = { 'i', 's', 'm' };
+static symbol s_6_11[3] = { 'i', 'o', 'n' };
+static symbol s_6_12[2] = { 'e', 'r' };
+static symbol s_6_13[3] = { 'o', 'u', 's' };
+static symbol s_6_14[3] = { 'a', 'n', 't' };
+static symbol s_6_15[3] = { 'e', 'n', 't' };
+static symbol s_6_16[4] = { 'm', 'e', 'n', 't' };
+static symbol s_6_17[5] = { 'e', 'm', 'e', 'n', 't' };
+
+static struct among a_6[18] =
+{
+/*  0 */ { 2, s_6_0, -1, 1, 0},
+/*  1 */ { 4, s_6_1, -1, 1, 0},
+/*  2 */ { 4, s_6_2, -1, 1, 0},
+/*  3 */ { 4, s_6_3, -1, 1, 0},
+/*  4 */ { 4, s_6_4, -1, 1, 0},
+/*  5 */ { 3, s_6_5, -1, 1, 0},
+/*  6 */ { 3, s_6_6, -1, 1, 0},
+/*  7 */ { 3, s_6_7, -1, 1, 0},
+/*  8 */ { 3, s_6_8, -1, 1, 0},
+/*  9 */ { 2, s_6_9, -1, 1, 0},
+/* 10 */ { 3, s_6_10, -1, 1, 0},
+/* 11 */ { 3, s_6_11, -1, 2, 0},
+/* 12 */ { 2, s_6_12, -1, 1, 0},
+/* 13 */ { 3, s_6_13, -1, 1, 0},
+/* 14 */ { 3, s_6_14, -1, 1, 0},
+/* 15 */ { 3, s_6_15, -1, 1, 0},
+/* 16 */ { 4, s_6_16, 15, 1, 0},
+/* 17 */ { 5, s_6_17, 16, 1, 0}
+};
+
+static symbol s_7_0[1] = { 'e' };
+static symbol s_7_1[1] = { 'l' };
+
+static struct among a_7[2] =
+{
+/*  0 */ { 1, s_7_0, -1, 1, 0},
+/*  1 */ { 1, s_7_1, -1, 2, 0}
+};
+
+static symbol s_8_0[7] = { 's', 'u', 'c', 'c', 'e', 'e', 'd' };
+static symbol s_8_1[7] = { 'p', 'r', 'o', 'c', 'e', 'e', 'd' };
+static symbol s_8_2[6] = { 'e', 'x', 'c', 'e', 'e', 'd' };
+static symbol s_8_3[7] = { 'c', 'a', 'n', 'n', 'i', 'n', 'g' };
+static symbol s_8_4[6] = { 'i', 'n', 'n', 'i', 'n', 'g' };
+static symbol s_8_5[7] = { 'e', 'a', 'r', 'r', 'i', 'n', 'g' };
+static symbol s_8_6[7] = { 'h', 'e', 'r', 'r', 'i', 'n', 'g' };
+static symbol s_8_7[6] = { 'o', 'u', 't', 'i', 'n', 'g' };
+
+static struct among a_8[8] =
+{
+/*  0 */ { 7, s_8_0, -1, -1, 0},
+/*  1 */ { 7, s_8_1, -1, -1, 0},
+/*  2 */ { 6, s_8_2, -1, -1, 0},
+/*  3 */ { 7, s_8_3, -1, -1, 0},
+/*  4 */ { 6, s_8_4, -1, -1, 0},
+/*  5 */ { 7, s_8_5, -1, -1, 0},
+/*  6 */ { 7, s_8_6, -1, -1, 0},
+/*  7 */ { 6, s_8_7, -1, -1, 0}
+};
+
+static symbol s_9_0[5] = { 'a', 'n', 'd', 'e', 's' };
+static symbol s_9_1[5] = { 'a', 't', 'l', 'a', 's' };
+static symbol s_9_2[4] = { 'b', 'i', 'a', 's' };
+static symbol s_9_3[6] = { 'c', 'o', 's', 'm', 'o', 's' };
+static symbol s_9_4[5] = { 'd', 'y', 'i', 'n', 'g' };
+static symbol s_9_5[5] = { 'e', 'a', 'r', 'l', 'y' };
+static symbol s_9_6[6] = { 'g', 'e', 'n', 't', 'l', 'y' };
+static symbol s_9_7[4] = { 'h', 'o', 'w', 'e' };
+static symbol s_9_8[4] = { 'i', 'd', 'l', 'y' };
+static symbol s_9_9[5] = { 'l', 'y', 'i', 'n', 'g' };
+static symbol s_9_10[4] = { 'n', 'e', 'w', 's' };
+static symbol s_9_11[4] = { 'o', 'n', 'l', 'y' };
+static symbol s_9_12[6] = { 's', 'i', 'n', 'g', 'l', 'y' };
+static symbol s_9_13[5] = { 's', 'k', 'i', 'e', 's' };
+static symbol s_9_14[4] = { 's', 'k', 'i', 's' };
+static symbol s_9_15[3] = { 's', 'k', 'y' };
+static symbol s_9_16[5] = { 't', 'y', 'i', 'n', 'g' };
+static symbol s_9_17[4] = { 'u', 'g', 'l', 'y' };
+
+static struct among a_9[18] =
+{
+/*  0 */ { 5, s_9_0, -1, -1, 0},
+/*  1 */ { 5, s_9_1, -1, -1, 0},
+/*  2 */ { 4, s_9_2, -1, -1, 0},
+/*  3 */ { 6, s_9_3, -1, -1, 0},
+/*  4 */ { 5, s_9_4, -1, 3, 0},
+/*  5 */ { 5, s_9_5, -1, 9, 0},
+/*  6 */ { 6, s_9_6, -1, 7, 0},
+/*  7 */ { 4, s_9_7, -1, -1, 0},
+/*  8 */ { 4, s_9_8, -1, 6, 0},
+/*  9 */ { 5, s_9_9, -1, 4, 0},
+/* 10 */ { 4, s_9_10, -1, -1, 0},
+/* 11 */ { 4, s_9_11, -1, 10, 0},
+/* 12 */ { 6, s_9_12, -1, 11, 0},
+/* 13 */ { 5, s_9_13, -1, 2, 0},
+/* 14 */ { 4, s_9_14, -1, 1, 0},
+/* 15 */ { 3, s_9_15, -1, -1, 0},
+/* 16 */ { 5, s_9_16, -1, 5, 0},
+/* 17 */ { 4, s_9_17, -1, 8, 0}
+};
+
+static unsigned char g_v[] = { 17, 65, 16, 1 };
+
+static unsigned char g_v_WXY[] = { 1, 17, 65, 208, 1 };
+
+static unsigned char g_valid_LI[] = { 55, 141, 2 };
+
+static symbol s_0[] = { 'y' };
+static symbol s_1[] = { 'Y' };
+static symbol s_2[] = { 'y' };
+static symbol s_3[] = { 'Y' };
+static symbol s_4[] = { 's', 's' };
+static symbol s_5[] = { 'i', 'e' };
+static symbol s_6[] = { 'i' };
+static symbol s_7[] = { 'e', 'e' };
+static symbol s_8[] = { 'e' };
+static symbol s_9[] = { 'e' };
+static symbol s_10[] = { 'y' };
+static symbol s_11[] = { 'Y' };
+static symbol s_12[] = { 'i' };
+static symbol s_13[] = { 't', 'i', 'o', 'n' };
+static symbol s_14[] = { 'e', 'n', 'c', 'e' };
+static symbol s_15[] = { 'a', 'n', 'c', 'e' };
+static symbol s_16[] = { 'a', 'b', 'l', 'e' };
+static symbol s_17[] = { 'e', 'n', 't' };
+static symbol s_18[] = { 'i', 'z', 'e' };
+static symbol s_19[] = { 'a', 't', 'e' };
+static symbol s_20[] = { 'a', 'l' };
+static symbol s_21[] = { 'f', 'u', 'l' };
+static symbol s_22[] = { 'o', 'u', 's' };
+static symbol s_23[] = { 'i', 'v', 'e' };
+static symbol s_24[] = { 'b', 'l', 'e' };
+static symbol s_25[] = { 'l' };
+static symbol s_26[] = { 'o', 'g' };
+static symbol s_27[] = { 'f', 'u', 'l' };
+static symbol s_28[] = { 'l', 'e', 's', 's' };
+static symbol s_29[] = { 't', 'i', 'o', 'n' };
+static symbol s_30[] = { 'a', 't', 'e' };
+static symbol s_31[] = { 'a', 'l' };
+static symbol s_32[] = { 'i', 'c' };
+static symbol s_33[] = { 's' };
+static symbol s_34[] = { 't' };
+static symbol s_35[] = { 'l' };
+static symbol s_36[] = { 's', 'k', 'i' };
+static symbol s_37[] = { 's', 'k', 'y' };
+static symbol s_38[] = { 'd', 'i', 'e' };
+static symbol s_39[] = { 'l', 'i', 'e' };
+static symbol s_40[] = { 't', 'i', 'e' };
+static symbol s_41[] = { 'i', 'd', 'l' };
+static symbol s_42[] = { 'g', 'e', 'n', 't', 'l' };
+static symbol s_43[] = { 'u', 'g', 'l', 'i' };
+static symbol s_44[] = { 'e', 'a', 'r', 'l', 'i' };
+static symbol s_45[] = { 'o', 'n', 'l', 'i' };
+static symbol s_46[] = { 's', 'i', 'n', 'g', 'l' };
+static symbol s_47[] = { 'Y' };
+static symbol s_48[] = { 'y' };
+
+static int r_prelude(struct SN_env * z) {
+    z->B[0] = 0; /* unset Y_found, line 24 */
+    {   int c = z->c; /* do, line 25 */
+        z->bra = z->c; /* [, line 25 */
+        if (!(eq_s(z, 1, s_0))) goto lab0;
+        z->ket = z->c; /* ], line 25 */
+        if (!(in_grouping(z, g_v, 97, 121))) goto lab0;
+        slice_from_s(z, 1, s_1); /* <-, line 25 */
+        z->B[0] = 1; /* set Y_found, line 25 */
+    lab0:
+        z->c = c;
+    }
+    {   int c = z->c; /* do, line 26 */
+        while(1) { /* repeat, line 26 */
+            int c = z->c;
+            while(1) { /* goto, line 26 */
+                int c = z->c;
+                if (!(in_grouping(z, g_v, 97, 121))) goto lab3;
+                z->bra = z->c; /* [, line 26 */
+                if (!(eq_s(z, 1, s_2))) goto lab3;
+                z->ket = z->c; /* ], line 26 */
+                z->c = c;
+                break;
+            lab3:
+                z->c = c;
+                if (z->c >= z->l) goto lab2;
+                z->c++;
+            }
+            slice_from_s(z, 1, s_3); /* <-, line 26 */
+            z->B[0] = 1; /* set Y_found, line 26 */
+            continue;
+        lab2:
+            z->c = c;
+            break;
+        }
+    lab1:
+        z->c = c;
+    }
+    return 1;
+}
+
+static int r_mark_regions(struct SN_env * z) {
+    z->I[0] = z->l;
+    z->I[1] = z->l;
+    {   int c = z->c; /* do, line 32 */
+        {   int c = z->c; /* or, line 36 */
+            if (!(find_among(z, a_0, 1))) goto lab2; /* among, line 33 */
+            goto lab1;
+        lab2:
+            z->c = c;
+            while(1) { /* gopast, line 36 */
+                if (!(in_grouping(z, g_v, 97, 121))) goto lab3;
+                break;
+            lab3:
+                if (z->c >= z->l) goto lab0;
+                z->c++;
+            }
+            while(1) { /* gopast, line 36 */
+                if (!(out_grouping(z, g_v, 97, 121))) goto lab4;
+                break;
+            lab4:
+                if (z->c >= z->l) goto lab0;
+                z->c++;
+            }
+        }
+    lab1:
+        z->I[0] = z->c; /* setmark p1, line 37 */
+        while(1) { /* gopast, line 38 */
+            if (!(in_grouping(z, g_v, 97, 121))) goto lab5;
+            break;
+        lab5:
+            if (z->c >= z->l) goto lab0;
+            z->c++;
+        }
+        while(1) { /* gopast, line 38 */
+            if (!(out_grouping(z, g_v, 97, 121))) goto lab6;
+            break;
+        lab6:
+            if (z->c >= z->l) goto lab0;
+            z->c++;
+        }
+        z->I[1] = z->c; /* setmark p2, line 38 */
+    lab0:
+        z->c = c;
+    }
+    return 1;
+}
+
+static int r_shortv(struct SN_env * z) {
+    {   int m = z->l - z->c; /* or, line 46 */
+        if (!(out_grouping_b(z, g_v_WXY, 89, 121))) goto lab1;
+        if (!(in_grouping_b(z, g_v, 97, 121))) goto lab1;
+        if (!(out_grouping_b(z, g_v, 97, 121))) goto lab1;
+        goto lab0;
+    lab1:
+        z->c = z->l - m;
+        if (!(out_grouping_b(z, g_v, 97, 121))) return 0;
+        if (!(in_grouping_b(z, g_v, 97, 121))) return 0;
+        if (z->c > z->lb) return 0; /* atlimit, line 47 */
+    }
+lab0:
+    return 1;
+}
+
+static int r_R1(struct SN_env * z) {
+    if (!(z->I[0] <= z->c)) return 0;
+    return 1;
+}
+
+static int r_R2(struct SN_env * z) {
+    if (!(z->I[1] <= z->c)) return 0;
+    return 1;
+}
+
+static int r_Step_1a(struct SN_env * z) {
+    int among_var;
+    z->ket = z->c; /* [, line 54 */
+    among_var = find_among_b(z, a_1, 6); /* substring, line 54 */
+    if (!(among_var)) return 0;
+    z->bra = z->c; /* ], line 54 */
+    switch(among_var) {
+        case 0: return 0;
+        case 1:
+            slice_from_s(z, 2, s_4); /* <-, line 55 */
+            break;
+        case 2:
+            {   int m = z->l - z->c; /* or, line 57 */
+                if (z->c <= z->lb) goto lab1;
+                z->c--; /* next, line 57 */
+                if (z->c > z->lb) goto lab1; /* atlimit, line 57 */
+                slice_from_s(z, 2, s_5); /* <-, line 57 */
+                goto lab0;
+            lab1:
+                z->c = z->l - m;
+                slice_from_s(z, 1, s_6); /* <-, line 57 */
+            }
+        lab0:
+            break;
+        case 3:
+            if (z->c <= z->lb) return 0;
+            z->c--; /* next, line 58 */
+            while(1) { /* gopast, line 58 */
+                if (!(in_grouping_b(z, g_v, 97, 121))) goto lab2;
+                break;
+            lab2:
+                if (z->c <= z->lb) return 0;
+                z->c--;
+            }
+            slice_del(z); /* delete, line 58 */
+            break;
+    }
+    return 1;
+}
+
+static int r_Step_1b(struct SN_env * z) {
+    int among_var;
+    z->ket = z->c; /* [, line 64 */
+    among_var = find_among_b(z, a_3, 6); /* substring, line 64 */
+    if (!(among_var)) return 0;
+    z->bra = z->c; /* ], line 64 */
+    switch(among_var) {
+        case 0: return 0;
+        case 1:
+            if (!r_R1(z)) return 0; /* call R1, line 66 */
+            slice_from_s(z, 2, s_7); /* <-, line 66 */
+            break;
+        case 2:
+            {   int m_test = z->l - z->c; /* test, line 69 */
+                while(1) { /* gopast, line 69 */
+                    if (!(in_grouping_b(z, g_v, 97, 121))) goto lab0;
+                    break;
+                lab0:
+                    if (z->c <= z->lb) return 0;
+                    z->c--;
+                }
+                z->c = z->l - m_test;
+            }
+            slice_del(z); /* delete, line 69 */
+            {   int m_test = z->l - z->c; /* test, line 70 */
+                among_var = find_among_b(z, a_2, 13); /* substring, line 70 */
+                if (!(among_var)) return 0;
+                z->c = z->l - m_test;
+            }
+            switch(among_var) {
+                case 0: return 0;
+                case 1:
+                    {   int c = z->c;
+                        insert_s(z, z->c, z->c, 1, s_8); /* <+, line 72 */
+                        z->c = c;
+                    }
+                    break;
+                case 2:
+                    z->ket = z->c; /* [, line 75 */
+                    if (z->c <= z->lb) return 0;
+                    z->c--; /* next, line 75 */
+                    z->bra = z->c; /* ], line 75 */
+                    slice_del(z); /* delete, line 75 */
+                    break;
+                case 3:
+                    if (z->c != z->I[0]) return 0; /* atmark, line 76 */
+                    {   int m_test = z->l - z->c; /* test, line 76 */
+                        if (!r_shortv(z)) return 0; /* call shortv, line 76 */
+                        z->c = z->l - m_test;
+                    }
+                    {   int c = z->c;
+                        insert_s(z, z->c, z->c, 1, s_9); /* <+, line 76 */
+                        z->c = c;
+                    }
+                    break;
+            }
+            break;
+    }
+    return 1;
+}
+
+static int r_Step_1c(struct SN_env * z) {
+    z->ket = z->c; /* [, line 83 */
+    {   int m = z->l - z->c; /* or, line 83 */
+        if (!(eq_s_b(z, 1, s_10))) goto lab1;
+        goto lab0;
+    lab1:
+        z->c = z->l - m;
+        if (!(eq_s_b(z, 1, s_11))) return 0;
+    }
+lab0:
+    z->bra = z->c; /* ], line 83 */
+    if (!(out_grouping_b(z, g_v, 97, 121))) return 0;
+    {   int m = z->l - z->c; /* not, line 84 */
+        if (z->c > z->lb) goto lab2; /* atlimit, line 84 */
+        return 0;
+    lab2:
+        z->c = z->l - m;
+    }
+    slice_from_s(z, 1, s_12); /* <-, line 85 */
+    return 1;
+}
+
+static int r_Step_2(struct SN_env * z) {
+    int among_var;
+    z->ket = z->c; /* [, line 89 */
+    among_var = find_among_b(z, a_4, 24); /* substring, line 89 */
+    if (!(among_var)) return 0;
+    z->bra = z->c; /* ], line 89 */
+    if (!r_R1(z)) return 0; /* call R1, line 89 */
+    switch(among_var) {
+        case 0: return 0;
+        case 1:
+            slice_from_s(z, 4, s_13); /* <-, line 90 */
+            break;
+        case 2:
+            slice_from_s(z, 4, s_14); /* <-, line 91 */
+            break;
+        case 3:
+            slice_from_s(z, 4, s_15); /* <-, line 92 */
+            break;
+        case 4:
+            slice_from_s(z, 4, s_16); /* <-, line 93 */
+            break;
+        case 5:
+            slice_from_s(z, 3, s_17); /* <-, line 94 */
+            break;
+        case 6:
+            slice_from_s(z, 3, s_18); /* <-, line 96 */
+            break;
+        case 7:
+            slice_from_s(z, 3, s_19); /* <-, line 98 */
+            break;
+        case 8:
+            slice_from_s(z, 2, s_20); /* <-, line 100 */
+            break;
+        case 9:
+            slice_from_s(z, 3, s_21); /* <-, line 101 */
+            break;
+        case 10:
+            slice_from_s(z, 3, s_22); /* <-, line 103 */
+            break;
+        case 11:
+            slice_from_s(z, 3, s_23); /* <-, line 105 */
+            break;
+        case 12:
+            slice_from_s(z, 3, s_24); /* <-, line 107 */
+            break;
+        case 13:
+            if (!(eq_s_b(z, 1, s_25))) return 0;
+            slice_from_s(z, 2, s_26); /* <-, line 108 */
+            break;
+        case 14:
+            slice_from_s(z, 3, s_27); /* <-, line 109 */
+            break;
+        case 15:
+            slice_from_s(z, 4, s_28); /* <-, line 110 */
+            break;
+        case 16:
+            if (!(in_grouping_b(z, g_valid_LI, 99, 116))) return 0;
+            slice_del(z); /* delete, line 111 */
+            break;
+    }
+    return 1;
+}
+
+static int r_Step_3(struct SN_env * z) {
+    int among_var;
+    z->ket = z->c; /* [, line 116 */
+    among_var = find_among_b(z, a_5, 9); /* substring, line 116 */
+    if (!(among_var)) return 0;
+    z->bra = z->c; /* ], line 116 */
+    if (!r_R1(z)) return 0; /* call R1, line 116 */
+    switch(among_var) {
+        case 0: return 0;
+        case 1:
+            slice_from_s(z, 4, s_29); /* <-, line 117 */
+            break;
+        case 2:
+            slice_from_s(z, 3, s_30); /* <-, line 118 */
+            break;
+        case 3:
+            slice_from_s(z, 2, s_31); /* <-, line 119 */
+            break;
+        case 4:
+            slice_from_s(z, 2, s_32); /* <-, line 121 */
+            break;
+        case 5:
+            slice_del(z); /* delete, line 123 */
+            break;
+        case 6:
+            if (!r_R2(z)) return 0; /* call R2, line 125 */
+            slice_del(z); /* delete, line 125 */
+            break;
+    }
+    return 1;
+}
+
+static int r_Step_4(struct SN_env * z) {
+    int among_var;
+    z->ket = z->c; /* [, line 130 */
+    among_var = find_among_b(z, a_6, 18); /* substring, line 130 */
+    if (!(among_var)) return 0;
+    z->bra = z->c; /* ], line 130 */
+    if (!r_R2(z)) return 0; /* call R2, line 130 */
+    switch(among_var) {
+        case 0: return 0;
+        case 1:
+            slice_del(z); /* delete, line 133 */
+            break;
+        case 2:
+            {   int m = z->l - z->c; /* or, line 134 */
+                if (!(eq_s_b(z, 1, s_33))) goto lab1;
+                goto lab0;
+            lab1:
+                z->c = z->l - m;
+                if (!(eq_s_b(z, 1, s_34))) return 0;
+            }
+        lab0:
+            slice_del(z); /* delete, line 134 */
+            break;
+    }
+    return 1;
+}
+
+static int r_Step_5(struct SN_env * z) {
+    int among_var;
+    z->ket = z->c; /* [, line 139 */
+    among_var = find_among_b(z, a_7, 2); /* substring, line 139 */
+    if (!(among_var)) return 0;
+    z->bra = z->c; /* ], line 139 */
+    switch(among_var) {
+        case 0: return 0;
+        case 1:
+            {   int m = z->l - z->c; /* or, line 140 */
+                if (!r_R2(z)) goto lab1; /* call R2, line 140 */
+                goto lab0;
+            lab1:
+                z->c = z->l - m;
+                if (!r_R1(z)) return 0; /* call R1, line 140 */
+                {   int m = z->l - z->c; /* not, line 140 */
+                    if (!r_shortv(z)) goto lab2; /* call shortv, line 140 */
+                    return 0;
+                lab2:
+                    z->c = z->l - m;
+                }
+            }
+        lab0:
+            slice_del(z); /* delete, line 140 */
+            break;
+        case 2:
+            if (!r_R2(z)) return 0; /* call R2, line 141 */
+            if (!(eq_s_b(z, 1, s_35))) return 0;
+            slice_del(z); /* delete, line 141 */
+            break;
+    }
+    return 1;
+}
+
+static int r_exception2(struct SN_env * z) {
+    z->ket = z->c; /* [, line 147 */
+    if (!(find_among_b(z, a_8, 8))) return 0; /* substring, line 147 */
+    z->bra = z->c; /* ], line 147 */
+    if (z->c > z->lb) return 0; /* atlimit, line 147 */
+    return 1;
+}
+
+static int r_exception1(struct SN_env * z) {
+    int among_var;
+    z->bra = z->c; /* [, line 159 */
+    among_var = find_among(z, a_9, 18); /* substring, line 159 */
+    if (!(among_var)) return 0;
+    z->ket = z->c; /* ], line 159 */
+    if (z->c < z->l) return 0; /* atlimit, line 159 */
+    switch(among_var) {
+        case 0: return 0;
+        case 1:
+            slice_from_s(z, 3, s_36); /* <-, line 163 */
+            break;
+        case 2:
+            slice_from_s(z, 3, s_37); /* <-, line 164 */
+            break;
+        case 3:
+            slice_from_s(z, 3, s_38); /* <-, line 165 */
+            break;
+        case 4:
+            slice_from_s(z, 3, s_39); /* <-, line 166 */
+            break;
+        case 5:
+            slice_from_s(z, 3, s_40); /* <-, line 167 */
+            break;
+        case 6:
+            slice_from_s(z, 3, s_41); /* <-, line 171 */
+            break;
+        case 7:
+            slice_from_s(z, 5, s_42); /* <-, line 172 */
+            break;
+        case 8:
+            slice_from_s(z, 4, s_43); /* <-, line 173 */
+            break;
+        case 9:
+            slice_from_s(z, 5, s_44); /* <-, line 174 */
+            break;
+        case 10:
+            slice_from_s(z, 4, s_45); /* <-, line 175 */
+            break;
+        case 11:
+            slice_from_s(z, 5, s_46); /* <-, line 176 */
+            break;
+    }
+    return 1;
+}
+
+static int r_postlude(struct SN_env * z) {
+    if (!(z->B[0])) return 0; /* Boolean test Y_found, line 192 */
+    while(1) { /* repeat, line 192 */
+        int c = z->c;
+        while(1) { /* goto, line 192 */
+            int c = z->c;
+            z->bra = z->c; /* [, line 192 */
+            if (!(eq_s(z, 1, s_47))) goto lab1;
+            z->ket = z->c; /* ], line 192 */
+            z->c = c;
+            break;
+        lab1:
+            z->c = c;
+            if (z->c >= z->l) goto lab0;
+            z->c++;
+        }
+        slice_from_s(z, 1, s_48); /* <-, line 192 */
+        continue;
+    lab0:
+        z->c = c;
+        break;
+    }
+    return 1;
+}
+
+extern int english_stem(struct SN_env * z) {
+    {   int c = z->c; /* or, line 196 */
+        if (!r_exception1(z)) goto lab1; /* call exception1, line 196 */
+        goto lab0;
+    lab1:
+        z->c = c;
+        {   int c_test = z->c; /* test, line 198 */
+            {   int c = z->c + 3;
+                if (0 > c || c > z->l) return 0;
+                z->c = c; /* hop, line 198 */
+            }
+            z->c = c_test;
+        }
+        {   int c = z->c; /* do, line 199 */
+            if (!r_prelude(z)) goto lab2; /* call prelude, line 199 */
+        lab2:
+            z->c = c;
+        }
+        {   int c = z->c; /* do, line 200 */
+            if (!r_mark_regions(z)) goto lab3; /* call mark_regions, line 200 */
+        lab3:
+            z->c = c;
+        }
+        z->lb = z->c; z->c = z->l; /* backwards, line 201 */
+
+        {   int m = z->l - z->c; /* do, line 203 */
+            if (!r_Step_1a(z)) goto lab4; /* call Step_1a, line 203 */
+        lab4:
+            z->c = z->l - m;
+        }
+        {   int m = z->l - z->c; /* or, line 205 */
+            if (!r_exception2(z)) goto lab6; /* call exception2, line 205 */
+            goto lab5;
+        lab6:
+            z->c = z->l - m;
+            {   int m = z->l - z->c; /* do, line 207 */
+                if (!r_Step_1b(z)) goto lab7; /* call Step_1b, line 207 */
+            lab7:
+                z->c = z->l - m;
+            }
+            {   int m = z->l - z->c; /* do, line 208 */
+                if (!r_Step_1c(z)) goto lab8; /* call Step_1c, line 208 */
+            lab8:
+                z->c = z->l - m;
+            }
+            {   int m = z->l - z->c; /* do, line 210 */
+                if (!r_Step_2(z)) goto lab9; /* call Step_2, line 210 */
+            lab9:
+                z->c = z->l - m;
+            }
+            {   int m = z->l - z->c; /* do, line 211 */
+                if (!r_Step_3(z)) goto lab10; /* call Step_3, line 211 */
+            lab10:
+                z->c = z->l - m;
+            }
+            {   int m = z->l - z->c; /* do, line 212 */
+                if (!r_Step_4(z)) goto lab11; /* call Step_4, line 212 */
+            lab11:
+                z->c = z->l - m;
+            }
+            {   int m = z->l - z->c; /* do, line 214 */
+                if (!r_Step_5(z)) goto lab12; /* call Step_5, line 214 */
+            lab12:
+                z->c = z->l - m;
+            }
+        }
+    lab5:
+        z->c = z->lb;
+        {   int c = z->c; /* do, line 217 */
+            if (!r_postlude(z)) goto lab13; /* call postlude, line 217 */
+        lab13:
+            z->c = c;
+        }
+    }
+lab0:
+    return 1;
+}
+
+extern struct SN_env * english_create_env(void) { return SN_create_env(0, 2, 1); }
+
+extern void english_close_env(struct SN_env * z) { SN_close_env(z); }
+


diff --git a/contrib/tsearch2/snowball/english_stem.h b/contrib/tsearch2/snowball/english_stem.h

new file mode 100644 (file)

index 0000000..bfefcd5


--- /dev/null
+++ b/contrib/tsearch2/snowball/english_stem.h
@@ -0,0 +1,8 @@
+
+/* This file was generated automatically by the Snowball to ANSI C compiler */
+
+extern struct SN_env * english_create_env(void);
+extern void english_close_env(struct SN_env * z);
+
+extern int english_stem(struct SN_env * z);
+


diff --git a/contrib/tsearch2/snowball/header.h b/contrib/tsearch2/snowball/header.h

new file mode 100644 (file)

index 0000000..aaec3ae


--- /dev/null
+++ b/contrib/tsearch2/snowball/header.h
@@ -0,0 +1,57 @@
+
+#include 
+
+#include "api.h"
+
+#define MAXINT INT_MAX
+#define MININT INT_MIN
+
+#define HEAD 2*sizeof(int)
+
+#define SIZE(p)        ((int *)(p))[-1]
+#define SET_SIZE(p, n) ((int *)(p))[-1] = n
+#define CAPACITY(p)    ((int *)(p))[-2]
+
+struct among
+{   int s_size;     /* number of chars in string */
+    symbol * s;       /* search string */
+    int substring_i;/* index to longest matching substring */
+    int result;     /* result of the lookup */
+    int (* function)(struct SN_env *);
+};
+
+extern symbol * create_s(void);
+extern void lose_s(symbol * p);
+
+extern int in_grouping(struct SN_env * z, unsigned char * s, int min, int max);
+extern int in_grouping_b(struct SN_env * z, unsigned char * s, int min, int max);
+extern int out_grouping(struct SN_env * z, unsigned char * s, int min, int max);
+extern int out_grouping_b(struct SN_env * z, unsigned char * s, int min, int max);
+
+extern int in_range(struct SN_env * z, int min, int max);
+extern int in_range_b(struct SN_env * z, int min, int max);
+extern int out_range(struct SN_env * z, int min, int max);
+extern int out_range_b(struct SN_env * z, int min, int max);
+
+extern int eq_s(struct SN_env * z, int s_size, symbol * s);
+extern int eq_s_b(struct SN_env * z, int s_size, symbol * s);
+extern int eq_v(struct SN_env * z, symbol * p);
+extern int eq_v_b(struct SN_env * z, symbol * p);
+
+extern int find_among(struct SN_env * z, struct among * v, int v_size);
+extern int find_among_b(struct SN_env * z, struct among * v, int v_size);
+
+extern symbol * increase_size(symbol * p, int n);
+extern int replace_s(struct SN_env * z, int c_bra, int c_ket, int s_size, const symbol * s);
+extern void slice_from_s(struct SN_env * z, int s_size, symbol * s);
+extern void slice_from_v(struct SN_env * z, symbol * p);
+extern void slice_del(struct SN_env * z);
+
+extern void insert_s(struct SN_env * z, int bra, int ket, int s_size, symbol * s);
+extern void insert_v(struct SN_env * z, int bra, int ket, symbol * p);
+
+extern symbol * slice_to(struct SN_env * z, symbol * p);
+extern symbol * assign_to(struct SN_env * z, symbol * p);
+
+extern void debug(struct SN_env * z, int number, int line_count);
+


diff --git a/contrib/tsearch2/snowball/russian_stem.c b/contrib/tsearch2/snowball/russian_stem.c

new file mode 100644 (file)

index 0000000..14fd491


--- /dev/null
+++ b/contrib/tsearch2/snowball/russian_stem.c
@@ -0,0 +1,626 @@
+
+/* This file was generated automatically by the Snowball to ANSI C compiler */
+
+#include "header.h"
+
+extern int russian_stem(struct SN_env * z);
+static int r_tidy_up(struct SN_env * z);
+static int r_derivational(struct SN_env * z);
+static int r_noun(struct SN_env * z);
+static int r_verb(struct SN_env * z);
+static int r_reflexive(struct SN_env * z);
+static int r_adjectival(struct SN_env * z);
+static int r_adjective(struct SN_env * z);
+static int r_perfective_gerund(struct SN_env * z);
+static int r_R2(struct SN_env * z);
+static int r_mark_regions(struct SN_env * z);
+
+extern struct SN_env * russian_create_env(void);
+extern void russian_close_env(struct SN_env * z);
+
+static symbol s_0_0[3] = { 215, 219, 201 };
+static symbol s_0_1[4] = { 201, 215, 219, 201 };
+static symbol s_0_2[4] = { 217, 215, 219, 201 };
+static symbol s_0_3[1] = { 215 };
+static symbol s_0_4[2] = { 201, 215 };
+static symbol s_0_5[2] = { 217, 215 };
+static symbol s_0_6[5] = { 215, 219, 201, 211, 216 };
+static symbol s_0_7[6] = { 201, 215, 219, 201, 211, 216 };
+static symbol s_0_8[6] = { 217, 215, 219, 201, 211, 216 };
+
+static struct among a_0[9] =
+{
+/*  0 */ { 3, s_0_0, -1, 1, 0},
+/*  1 */ { 4, s_0_1, 0, 2, 0},
+/*  2 */ { 4, s_0_2, 0, 2, 0},
+/*  3 */ { 1, s_0_3, -1, 1, 0},
+/*  4 */ { 2, s_0_4, 3, 2, 0},
+/*  5 */ { 2, s_0_5, 3, 2, 0},
+/*  6 */ { 5, s_0_6, -1, 1, 0},
+/*  7 */ { 6, s_0_7, 6, 2, 0},
+/*  8 */ { 6, s_0_8, 6, 2, 0}
+};
+
+static symbol s_1_0[2] = { 192, 192 };
+static symbol s_1_1[2] = { 197, 192 };
+static symbol s_1_2[2] = { 207, 192 };
+static symbol s_1_3[2] = { 213, 192 };
+static symbol s_1_4[2] = { 197, 197 };
+static symbol s_1_5[2] = { 201, 197 };
+static symbol s_1_6[2] = { 207, 197 };
+static symbol s_1_7[2] = { 217, 197 };
+static symbol s_1_8[2] = { 201, 200 };
+static symbol s_1_9[2] = { 217, 200 };
+static symbol s_1_10[3] = { 201, 205, 201 };
+static symbol s_1_11[3] = { 217, 205, 201 };
+static symbol s_1_12[2] = { 197, 202 };
+static symbol s_1_13[2] = { 201, 202 };
+static symbol s_1_14[2] = { 207, 202 };
+static symbol s_1_15[2] = { 217, 202 };
+static symbol s_1_16[2] = { 197, 205 };
+static symbol s_1_17[2] = { 201, 205 };
+static symbol s_1_18[2] = { 207, 205 };
+static symbol s_1_19[2] = { 217, 205 };
+static symbol s_1_20[3] = { 197, 199, 207 };
+static symbol s_1_21[3] = { 207, 199, 207 };
+static symbol s_1_22[2] = { 193, 209 };
+static symbol s_1_23[2] = { 209, 209 };
+static symbol s_1_24[3] = { 197, 205, 213 };
+static symbol s_1_25[3] = { 207, 205, 213 };
+
+static struct among a_1[26] =
+{
+/*  0 */ { 2, s_1_0, -1, 1, 0},
+/*  1 */ { 2, s_1_1, -1, 1, 0},
+/*  2 */ { 2, s_1_2, -1, 1, 0},
+/*  3 */ { 2, s_1_3, -1, 1, 0},
+/*  4 */ { 2, s_1_4, -1, 1, 0},
+/*  5 */ { 2, s_1_5, -1, 1, 0},
+/*  6 */ { 2, s_1_6, -1, 1, 0},
+/*  7 */ { 2, s_1_7, -1, 1, 0},
+/*  8 */ { 2, s_1_8, -1, 1, 0},
+/*  9 */ { 2, s_1_9, -1, 1, 0},
+/* 10 */ { 3, s_1_10, -1, 1, 0},
+/* 11 */ { 3, s_1_11, -1, 1, 0},
+/* 12 */ { 2, s_1_12, -1, 1, 0},
+/* 13 */ { 2, s_1_13, -1, 1, 0},
+/* 14 */ { 2, s_1_14, -1, 1, 0},
+/* 15 */ { 2, s_1_15, -1, 1, 0},
+/* 16 */ { 2, s_1_16, -1, 1, 0},
+/* 17 */ { 2, s_1_17, -1, 1, 0},
+/* 18 */ { 2, s_1_18, -1, 1, 0},
+/* 19 */ { 2, s_1_19, -1, 1, 0},
+/* 20 */ { 3, s_1_20, -1, 1, 0},
+/* 21 */ { 3, s_1_21, -1, 1, 0},
+/* 22 */ { 2, s_1_22, -1, 1, 0},
+/* 23 */ { 2, s_1_23, -1, 1, 0},
+/* 24 */ { 3, s_1_24, -1, 1, 0},
+/* 25 */ { 3, s_1_25, -1, 1, 0}
+};
+
+static symbol s_2_0[2] = { 197, 205 };
+static symbol s_2_1[2] = { 206, 206 };
+static symbol s_2_2[2] = { 215, 219 };
+static symbol s_2_3[3] = { 201, 215, 219 };
+static symbol s_2_4[3] = { 217, 215, 219 };
+static symbol s_2_5[1] = { 221 };
+static symbol s_2_6[2] = { 192, 221 };
+static symbol s_2_7[3] = { 213, 192, 221 };
+
+static struct among a_2[8] =
+{
+/*  0 */ { 2, s_2_0, -1, 1, 0},
+/*  1 */ { 2, s_2_1, -1, 1, 0},
+/*  2 */ { 2, s_2_2, -1, 1, 0},
+/*  3 */ { 3, s_2_3, 2, 2, 0},
+/*  4 */ { 3, s_2_4, 2, 2, 0},
+/*  5 */ { 1, s_2_5, -1, 1, 0},
+/*  6 */ { 2, s_2_6, 5, 1, 0},
+/*  7 */ { 3, s_2_7, 6, 2, 0}
+};
+
+static symbol s_3_0[2] = { 211, 209 };
+static symbol s_3_1[2] = { 211, 216 };
+
+static struct among a_3[2] =
+{
+/*  0 */ { 2, s_3_0, -1, 1, 0},
+/*  1 */ { 2, s_3_1, -1, 1, 0}
+};
+
+static symbol s_4_0[1] = { 192 };
+static symbol s_4_1[2] = { 213, 192 };
+static symbol s_4_2[2] = { 204, 193 };
+static symbol s_4_3[3] = { 201, 204, 193 };
+static symbol s_4_4[3] = { 217, 204, 193 };
+static symbol s_4_5[2] = { 206, 193 };
+static symbol s_4_6[3] = { 197, 206, 193 };
+static symbol s_4_7[3] = { 197, 212, 197 };
+static symbol s_4_8[3] = { 201, 212, 197 };
+static symbol s_4_9[3] = { 202, 212, 197 };
+static symbol s_4_10[4] = { 197, 202, 212, 197 };
+static symbol s_4_11[4] = { 213, 202, 212, 197 };
+static symbol s_4_12[2] = { 204, 201 };
+static symbol s_4_13[3] = { 201, 204, 201 };
+static symbol s_4_14[3] = { 217, 204, 201 };
+static symbol s_4_15[1] = { 202 };
+static symbol s_4_16[2] = { 197, 202 };
+static symbol s_4_17[2] = { 213, 202 };
+static symbol s_4_18[1] = { 204 };
+static symbol s_4_19[2] = { 201, 204 };
+static symbol s_4_20[2] = { 217, 204 };
+static symbol s_4_21[2] = { 197, 205 };
+static symbol s_4_22[2] = { 201, 205 };
+static symbol s_4_23[2] = { 217, 205 };
+static symbol s_4_24[1] = { 206 };
+static symbol s_4_25[2] = { 197, 206 };
+static symbol s_4_26[2] = { 204, 207 };
+static symbol s_4_27[3] = { 201, 204, 207 };
+static symbol s_4_28[3] = { 217, 204, 207 };
+static symbol s_4_29[2] = { 206, 207 };
+static symbol s_4_30[3] = { 197, 206, 207 };
+static symbol s_4_31[3] = { 206, 206, 207 };
+static symbol s_4_32[2] = { 192, 212 };
+static symbol s_4_33[3] = { 213, 192, 212 };
+static symbol s_4_34[2] = { 197, 212 };
+static symbol s_4_35[3] = { 213, 197, 212 };
+static symbol s_4_36[2] = { 201, 212 };
+static symbol s_4_37[2] = { 209, 212 };
+static symbol s_4_38[2] = { 217, 212 };
+static symbol s_4_39[2] = { 212, 216 };
+static symbol s_4_40[3] = { 201, 212, 216 };
+static symbol s_4_41[3] = { 217, 212, 216 };
+static symbol s_4_42[3] = { 197, 219, 216 };
+static symbol s_4_43[3] = { 201, 219, 216 };
+static symbol s_4_44[2] = { 206, 217 };
+static symbol s_4_45[3] = { 197, 206, 217 };
+
+static struct among a_4[46] =
+{
+/*  0 */ { 1, s_4_0, -1, 2, 0},
+/*  1 */ { 2, s_4_1, 0, 2, 0},
+/*  2 */ { 2, s_4_2, -1, 1, 0},
+/*  3 */ { 3, s_4_3, 2, 2, 0},
+/*  4 */ { 3, s_4_4, 2, 2, 0},
+/*  5 */ { 2, s_4_5, -1, 1, 0},
+/*  6 */ { 3, s_4_6, 5, 2, 0},
+/*  7 */ { 3, s_4_7, -1, 1, 0},
+/*  8 */ { 3, s_4_8, -1, 2, 0},
+/*  9 */ { 3, s_4_9, -1, 1, 0},
+/* 10 */ { 4, s_4_10, 9, 2, 0},
+/* 11 */ { 4, s_4_11, 9, 2, 0},
+/* 12 */ { 2, s_4_12, -1, 1, 0},
+/* 13 */ { 3, s_4_13, 12, 2, 0},
+/* 14 */ { 3, s_4_14, 12, 2, 0},
+/* 15 */ { 1, s_4_15, -1, 1, 0},
+/* 16 */ { 2, s_4_16, 15, 2, 0},
+/* 17 */ { 2, s_4_17, 15, 2, 0},
+/* 18 */ { 1, s_4_18, -1, 1, 0},
+/* 19 */ { 2, s_4_19, 18, 2, 0},
+/* 20 */ { 2, s_4_20, 18, 2, 0},
+/* 21 */ { 2, s_4_21, -1, 1, 0},
+/* 22 */ { 2, s_4_22, -1, 2, 0},
+/* 23 */ { 2, s_4_23, -1, 2, 0},
+/* 24 */ { 1, s_4_24, -1, 1, 0},
+/* 25 */ { 2, s_4_25, 24, 2, 0},
+/* 26 */ { 2, s_4_26, -1, 1, 0},
+/* 27 */ { 3, s_4_27, 26, 2, 0},
+/* 28 */ { 3, s_4_28, 26, 2, 0},
+/* 29 */ { 2, s_4_29, -1, 1, 0},
+/* 30 */ { 3, s_4_30, 29, 2, 0},
+/* 31 */ { 3, s_4_31, 29, 1, 0},
+/* 32 */ { 2, s_4_32, -1, 1, 0},
+/* 33 */ { 3, s_4_33, 32, 2, 0},
+/* 34 */ { 2, s_4_34, -1, 1, 0},
+/* 35 */ { 3, s_4_35, 34, 2, 0},
+/* 36 */ { 2, s_4_36, -1, 2, 0},
+/* 37 */ { 2, s_4_37, -1, 2, 0},
+/* 38 */ { 2, s_4_38, -1, 2, 0},
+/* 39 */ { 2, s_4_39, -1, 1, 0},
+/* 40 */ { 3, s_4_40, 39, 2, 0},
+/* 41 */ { 3, s_4_41, 39, 2, 0},
+/* 42 */ { 3, s_4_42, -1, 1, 0},
+/* 43 */ { 3, s_4_43, -1, 2, 0},
+/* 44 */ { 2, s_4_44, -1, 1, 0},
+/* 45 */ { 3, s_4_45, 44, 2, 0}
+};
+
+static symbol s_5_0[1] = { 192 };
+static symbol s_5_1[2] = { 201, 192 };
+static symbol s_5_2[2] = { 216, 192 };
+static symbol s_5_3[1] = { 193 };
+static symbol s_5_4[1] = { 197 };
+static symbol s_5_5[2] = { 201, 197 };
+static symbol s_5_6[2] = { 216, 197 };
+static symbol s_5_7[2] = { 193, 200 };
+static symbol s_5_8[2] = { 209, 200 };
+static symbol s_5_9[3] = { 201, 209, 200 };
+static symbol s_5_10[1] = { 201 };
+static symbol s_5_11[2] = { 197, 201 };
+static symbol s_5_12[2] = { 201, 201 };
+static symbol s_5_13[3] = { 193, 205, 201 };
+static symbol s_5_14[3] = { 209, 205, 201 };
+static symbol s_5_15[4] = { 201, 209, 205, 201 };
+static symbol s_5_16[1] = { 202 };
+static symbol s_5_17[2] = { 197, 202 };
+static symbol s_5_18[3] = { 201, 197, 202 };
+static symbol s_5_19[2] = { 201, 202 };
+static symbol s_5_20[2] = { 207, 202 };
+static symbol s_5_21[2] = { 193, 205 };
+static symbol s_5_22[2] = { 197, 205 };
+static symbol s_5_23[3] = { 201, 197, 205 };
+static symbol s_5_24[2] = { 207, 205 };
+static symbol s_5_25[2] = { 209, 205 };
+static symbol s_5_26[3] = { 201, 209, 205 };
+static symbol s_5_27[1] = { 207 };
+static symbol s_5_28[1] = { 209 };
+static symbol s_5_29[2] = { 201, 209 };
+static symbol s_5_30[2] = { 216, 209 };
+static symbol s_5_31[1] = { 213 };
+static symbol s_5_32[2] = { 197, 215 };
+static symbol s_5_33[2] = { 207, 215 };
+static symbol s_5_34[1] = { 216 };
+static symbol s_5_35[1] = { 217 };
+
+static struct among a_5[36] =
+{
+/*  0 */ { 1, s_5_0, -1, 1, 0},
+/*  1 */ { 2, s_5_1, 0, 1, 0},
+/*  2 */ { 2, s_5_2, 0, 1, 0},
+/*  3 */ { 1, s_5_3, -1, 1, 0},
+/*  4 */ { 1, s_5_4, -1, 1, 0},
+/*  5 */ { 2, s_5_5, 4, 1, 0},
+/*  6 */ { 2, s_5_6, 4, 1, 0},
+/*  7 */ { 2, s_5_7, -1, 1, 0},
+/*  8 */ { 2, s_5_8, -1, 1, 0},
+/*  9 */ { 3, s_5_9, 8, 1, 0},
+/* 10 */ { 1, s_5_10, -1, 1, 0},
+/* 11 */ { 2, s_5_11, 10, 1, 0},
+/* 12 */ { 2, s_5_12, 10, 1, 0},
+/* 13 */ { 3, s_5_13, 10, 1, 0},
+/* 14 */ { 3, s_5_14, 10, 1, 0},
+/* 15 */ { 4, s_5_15, 14, 1, 0},
+/* 16 */ { 1, s_5_16, -1, 1, 0},
+/* 17 */ { 2, s_5_17, 16, 1, 0},
+/* 18 */ { 3, s_5_18, 17, 1, 0},
+/* 19 */ { 2, s_5_19, 16, 1, 0},
+/* 20 */ { 2, s_5_20, 16, 1, 0},
+/* 21 */ { 2, s_5_21, -1, 1, 0},
+/* 22 */ { 2, s_5_22, -1, 1, 0},
+/* 23 */ { 3, s_5_23, 22, 1, 0},
+/* 24 */ { 2, s_5_24, -1, 1, 0},
+/* 25 */ { 2, s_5_25, -1, 1, 0},
+/* 26 */ { 3, s_5_26, 25, 1, 0},
+/* 27 */ { 1, s_5_27, -1, 1, 0},
+/* 28 */ { 1, s_5_28, -1, 1, 0},
+/* 29 */ { 2, s_5_29, 28, 1, 0},
+/* 30 */ { 2, s_5_30, 28, 1, 0},
+/* 31 */ { 1, s_5_31, -1, 1, 0},
+/* 32 */ { 2, s_5_32, -1, 1, 0},
+/* 33 */ { 2, s_5_33, -1, 1, 0},
+/* 34 */ { 1, s_5_34, -1, 1, 0},
+/* 35 */ { 1, s_5_35, -1, 1, 0}
+};
+
+static symbol s_6_0[3] = { 207, 211, 212 };
+static symbol s_6_1[4] = { 207, 211, 212, 216 };
+
+static struct among a_6[2] =
+{
+/*  0 */ { 3, s_6_0, -1, 1, 0},
+/*  1 */ { 4, s_6_1, -1, 1, 0}
+};
+
+static symbol s_7_0[4] = { 197, 202, 219, 197 };
+static symbol s_7_1[1] = { 206 };
+static symbol s_7_2[1] = { 216 };
+static symbol s_7_3[3] = { 197, 202, 219 };
+
+static struct among a_7[4] =
+{
+/*  0 */ { 4, s_7_0, -1, 1, 0},
+/*  1 */ { 1, s_7_1, -1, 2, 0},
+/*  2 */ { 1, s_7_2, -1, 3, 0},
+/*  3 */ { 3, s_7_3, -1, 1, 0}
+};
+
+static unsigned char g_v[] = { 35, 130, 34, 18 };
+
+static symbol s_0[] = { 193 };
+static symbol s_1[] = { 209 };
+static symbol s_2[] = { 193 };
+static symbol s_3[] = { 209 };
+static symbol s_4[] = { 193 };
+static symbol s_5[] = { 209 };
+static symbol s_6[] = { 206 };
+static symbol s_7[] = { 206 };
+static symbol s_8[] = { 206 };
+static symbol s_9[] = { 201 };
+
+static int r_mark_regions(struct SN_env * z) {
+    z->I[0] = z->l;
+    z->I[1] = z->l;
+    {   int c = z->c; /* do, line 100 */
+        while(1) { /* gopast, line 101 */
+            if (!(in_grouping(z, g_v, 192, 220))) goto lab1;
+            break;
+        lab1:
+            if (z->c >= z->l) goto lab0;
+            z->c++;
+        }
+        z->I[0] = z->c; /* setmark pV, line 101 */
+        while(1) { /* gopast, line 101 */
+            if (!(out_grouping(z, g_v, 192, 220))) goto lab2;
+            break;
+        lab2:
+            if (z->c >= z->l) goto lab0;
+            z->c++;
+        }
+        while(1) { /* gopast, line 102 */
+            if (!(in_grouping(z, g_v, 192, 220))) goto lab3;
+            break;
+        lab3:
+            if (z->c >= z->l) goto lab0;
+            z->c++;
+        }
+        while(1) { /* gopast, line 102 */
+            if (!(out_grouping(z, g_v, 192, 220))) goto lab4;
+            break;
+        lab4:
+            if (z->c >= z->l) goto lab0;
+            z->c++;
+        }
+        z->I[1] = z->c; /* setmark p2, line 102 */
+    lab0:
+        z->c = c;
+    }
+    return 1;
+}
+
+static int r_R2(struct SN_env * z) {
+    if (!(z->I[1] <= z->c)) return 0;
+    return 1;
+}
+
+static int r_perfective_gerund(struct SN_env * z) {
+    int among_var;
+    z->ket = z->c; /* [, line 111 */
+    among_var = find_among_b(z, a_0, 9); /* substring, line 111 */
+    if (!(among_var)) return 0;
+    z->bra = z->c; /* ], line 111 */
+    switch(among_var) {
+        case 0: return 0;
+        case 1:
+            {   int m = z->l - z->c; /* or, line 115 */
+                if (!(eq_s_b(z, 1, s_0))) goto lab1;
+                goto lab0;
+            lab1:
+                z->c = z->l - m;
+                if (!(eq_s_b(z, 1, s_1))) return 0;
+            }
+        lab0:
+            slice_del(z); /* delete, line 115 */
+            break;
+        case 2:
+            slice_del(z); /* delete, line 122 */
+            break;
+    }
+    return 1;
+}
+
+static int r_adjective(struct SN_env * z) {
+    int among_var;
+    z->ket = z->c; /* [, line 127 */
+    among_var = find_among_b(z, a_1, 26); /* substring, line 127 */
+    if (!(among_var)) return 0;
+    z->bra = z->c; /* ], line 127 */
+    switch(among_var) {
+        case 0: return 0;
+        case 1:
+            slice_del(z); /* delete, line 136 */
+            break;
+    }
+    return 1;
+}
+
+static int r_adjectival(struct SN_env * z) {
+    int among_var;
+    if (!r_adjective(z)) return 0; /* call adjective, line 141 */
+    {   int m = z->l - z->c; /* try, line 148 */
+        z->ket = z->c; /* [, line 149 */
+        among_var = find_among_b(z, a_2, 8); /* substring, line 149 */
+        if (!(among_var)) { z->c = z->l - m; goto lab0; }
+        z->bra = z->c; /* ], line 149 */
+        switch(among_var) {
+            case 0: { z->c = z->l - m; goto lab0; }
+            case 1:
+                {   int m = z->l - z->c; /* or, line 154 */
+                    if (!(eq_s_b(z, 1, s_2))) goto lab2;
+                    goto lab1;
+                lab2:
+                    z->c = z->l - m;
+                    if (!(eq_s_b(z, 1, s_3))) { z->c = z->l - m; goto lab0; }
+                }
+            lab1:
+                slice_del(z); /* delete, line 154 */
+                break;
+            case 2:
+                slice_del(z); /* delete, line 161 */
+                break;
+        }
+    lab0:
+        ;
+    }
+    return 1;
+}
+
+static int r_reflexive(struct SN_env * z) {
+    int among_var;
+    z->ket = z->c; /* [, line 168 */
+    among_var = find_among_b(z, a_3, 2); /* substring, line 168 */
+    if (!(among_var)) return 0;
+    z->bra = z->c; /* ], line 168 */
+    switch(among_var) {
+        case 0: return 0;
+        case 1:
+            slice_del(z); /* delete, line 171 */
+            break;
+    }
+    return 1;
+}
+
+static int r_verb(struct SN_env * z) {
+    int among_var;
+    z->ket = z->c; /* [, line 176 */
+    among_var = find_among_b(z, a_4, 46); /* substring, line 176 */
+    if (!(among_var)) return 0;
+    z->bra = z->c; /* ], line 176 */
+    switch(among_var) {
+        case 0: return 0;
+        case 1:
+            {   int m = z->l - z->c; /* or, line 182 */
+                if (!(eq_s_b(z, 1, s_4))) goto lab1;
+                goto lab0;
+            lab1:
+                z->c = z->l - m;
+                if (!(eq_s_b(z, 1, s_5))) return 0;
+            }
+        lab0:
+            slice_del(z); /* delete, line 182 */
+            break;
+        case 2:
+            slice_del(z); /* delete, line 190 */
+            break;
+    }
+    return 1;
+}
+
+static int r_noun(struct SN_env * z) {
+    int among_var;
+    z->ket = z->c; /* [, line 199 */
+    among_var = find_among_b(z, a_5, 36); /* substring, line 199 */
+    if (!(among_var)) return 0;
+    z->bra = z->c; /* ], line 199 */
+    switch(among_var) {
+        case 0: return 0;
+        case 1:
+            slice_del(z); /* delete, line 206 */
+            break;
+    }
+    return 1;
+}
+
+static int r_derivational(struct SN_env * z) {
+    int among_var;
+    z->ket = z->c; /* [, line 215 */
+    among_var = find_among_b(z, a_6, 2); /* substring, line 215 */
+    if (!(among_var)) return 0;
+    z->bra = z->c; /* ], line 215 */
+    if (!r_R2(z)) return 0; /* call R2, line 215 */
+    switch(among_var) {
+        case 0: return 0;
+        case 1:
+            slice_del(z); /* delete, line 218 */
+            break;
+    }
+    return 1;
+}
+
+static int r_tidy_up(struct SN_env * z) {
+    int among_var;
+    z->ket = z->c; /* [, line 223 */
+    among_var = find_among_b(z, a_7, 4); /* substring, line 223 */
+    if (!(among_var)) return 0;
+    z->bra = z->c; /* ], line 223 */
+    switch(among_var) {
+        case 0: return 0;
+        case 1:
+            slice_del(z); /* delete, line 227 */
+            z->ket = z->c; /* [, line 228 */
+            if (!(eq_s_b(z, 1, s_6))) return 0;
+            z->bra = z->c; /* ], line 228 */
+            if (!(eq_s_b(z, 1, s_7))) return 0;
+            slice_del(z); /* delete, line 228 */
+            break;
+        case 2:
+            if (!(eq_s_b(z, 1, s_8))) return 0;
+            slice_del(z); /* delete, line 231 */
+            break;
+        case 3:
+            slice_del(z); /* delete, line 233 */
+            break;
+    }
+    return 1;
+}
+
+extern int russian_stem(struct SN_env * z) {
+    {   int c = z->c; /* do, line 240 */
+        if (!r_mark_regions(z)) goto lab0; /* call mark_regions, line 240 */
+    lab0:
+        z->c = c;
+    }
+    z->lb = z->c; z->c = z->l; /* backwards, line 241 */
+
+    {   int m = z->l - z->c; /* setlimit, line 241 */
+        int m3;
+        if (z->c < z->I[0]) return 0;
+        z->c = z->I[0]; /* tomark, line 241 */
+        m3 = z->lb; z->lb = z->c;
+        z->c = z->l - m;
+        {   int m = z->l - z->c; /* do, line 242 */
+            {   int m = z->l - z->c; /* or, line 243 */
+                if (!r_perfective_gerund(z)) goto lab3; /* call perfective_gerund, line 243 */
+                goto lab2;
+            lab3:
+                z->c = z->l - m;
+                {   int m = z->l - z->c; /* try, line 244 */
+                    if (!r_reflexive(z)) { z->c = z->l - m; goto lab4; } /* call reflexive, line 244 */
+                lab4:
+                    ;
+                }
+                {   int m = z->l - z->c; /* or, line 245 */
+                    if (!r_adjectival(z)) goto lab6; /* call adjectival, line 245 */
+                    goto lab5;
+                lab6:
+                    z->c = z->l - m;
+                    if (!r_verb(z)) goto lab7; /* call verb, line 245 */
+                    goto lab5;
+                lab7:
+                    z->c = z->l - m;
+                    if (!r_noun(z)) goto lab1; /* call noun, line 245 */
+                }
+            lab5:
+                ;
+            }
+        lab2:
+        lab1:
+            z->c = z->l - m;
+        }
+        {   int m = z->l - z->c; /* try, line 248 */
+            z->ket = z->c; /* [, line 248 */
+            if (!(eq_s_b(z, 1, s_9))) { z->c = z->l - m; goto lab8; }
+            z->bra = z->c; /* ], line 248 */
+            slice_del(z); /* delete, line 248 */
+        lab8:
+            ;
+        }
+        {   int m = z->l - z->c; /* do, line 251 */
+            if (!r_derivational(z)) goto lab9; /* call derivational, line 251 */
+        lab9:
+            z->c = z->l - m;
+        }
+        {   int m = z->l - z->c; /* do, line 252 */
+            if (!r_tidy_up(z)) goto lab10; /* call tidy_up, line 252 */
+        lab10:
+            z->c = z->l - m;
+        }
+        z->lb = m3;
+    }
+    z->c = z->lb;
+    return 1;
+}
+
+extern struct SN_env * russian_create_env(void) { return SN_create_env(0, 2, 0); }
+
+extern void russian_close_env(struct SN_env * z) { SN_close_env(z); }
+


diff --git a/contrib/tsearch2/snowball/russian_stem.h b/contrib/tsearch2/snowball/russian_stem.h

new file mode 100644 (file)

index 0000000..7dc26d4


--- /dev/null
+++ b/contrib/tsearch2/snowball/russian_stem.h
@@ -0,0 +1,8 @@
+
+/* This file was generated automatically by the Snowball to ANSI C compiler */
+
+extern struct SN_env * russian_create_env(void);
+extern void russian_close_env(struct SN_env * z);
+
+extern int russian_stem(struct SN_env * z);
+


diff --git a/contrib/tsearch2/snowball/utilities.c b/contrib/tsearch2/snowball/utilities.c

new file mode 100644 (file)

index 0000000..5dc7524


--- /dev/null
+++ b/contrib/tsearch2/snowball/utilities.c
@@ -0,0 +1,328 @@
+
+#include 
+#include 
+#include 
+
+#include "header.h"
+
+#define unless(C) if(!(C))
+
+#define CREATE_SIZE 1
+
+extern symbol * create_s(void)
+{   symbol * p = (symbol *) (HEAD + (char *) malloc(HEAD + (CREATE_SIZE + 1) * sizeof(symbol)));
+    CAPACITY(p) = CREATE_SIZE;
+    SET_SIZE(p, CREATE_SIZE);
+    return p;
+}
+
+extern void lose_s(symbol * p) { free((char *) p - HEAD); }
+
+extern int in_grouping(struct SN_env * z, unsigned char * s, int min, int max)
+{   if (z->c >= z->l) return 0;
+    {   int ch = z->p[z->c];
+        if
+        (ch > max || (ch -= min) < 0 ||
+         (s[ch >> 3] & (0X1 << (ch & 0X7))) == 0) return 0;
+    }
+    z->c++; return 1;
+}
+
+extern int in_grouping_b(struct SN_env * z, unsigned char * s, int min, int max)
+{   if (z->c <= z->lb) return 0;
+    {   int ch = z->p[z->c - 1];
+        if
+        (ch > max || (ch -= min) < 0 ||
+         (s[ch >> 3] & (0X1 << (ch & 0X7))) == 0) return 0;
+    }
+    z->c--; return 1;
+}
+
+extern int out_grouping(struct SN_env * z, unsigned char * s, int min, int max)
+{   if (z->c >= z->l) return 0;
+    {   int ch = z->p[z->c];
+        unless
+        (ch > max || (ch -= min) < 0 ||
+         (s[ch >> 3] & (0X1 << (ch & 0X7))) == 0) return 0;
+    }
+    z->c++; return 1;
+}
+
+extern int out_grouping_b(struct SN_env * z, unsigned char * s, int min, int max)
+{   if (z->c <= z->lb) return 0;
+    {   int ch = z->p[z->c - 1];
+        unless
+        (ch > max || (ch -= min) < 0 ||
+         (s[ch >> 3] & (0X1 << (ch & 0X7))) == 0) return 0;
+    }
+    z->c--; return 1;
+}
+
+
+extern int in_range(struct SN_env * z, int min, int max)
+{   if (z->c >= z->l) return 0;
+    {   int ch = z->p[z->c];
+        if
+        (ch > max || ch < min) return 0;
+    }
+    z->c++; return 1;
+}
+
+extern int in_range_b(struct SN_env * z, int min, int max)
+{   if (z->c <= z->lb) return 0;
+    {   int ch = z->p[z->c - 1];
+        if
+        (ch > max || ch < min) return 0;
+    }
+    z->c--; return 1;
+}
+
+extern int out_range(struct SN_env * z, int min, int max)
+{   if (z->c >= z->l) return 0;
+    {   int ch = z->p[z->c];
+        unless
+        (ch > max || ch < min) return 0;
+    }
+    z->c++; return 1;
+}
+
+extern int out_range_b(struct SN_env * z, int min, int max)
+{   if (z->c <= z->lb) return 0;
+    {   int ch = z->p[z->c - 1];
+        unless
+        (ch > max || ch < min) return 0;
+    }
+    z->c--; return 1;
+}
+
+extern int eq_s(struct SN_env * z, int s_size, symbol * s)
+{   if (z->l - z->c < s_size ||
+        memcmp(z->p + z->c, s, s_size * sizeof(symbol)) != 0) return 0;
+    z->c += s_size; return 1;
+}
+
+extern int eq_s_b(struct SN_env * z, int s_size, symbol * s)
+{   if (z->c - z->lb < s_size ||
+        memcmp(z->p + z->c - s_size, s, s_size * sizeof(symbol)) != 0) return 0;
+    z->c -= s_size; return 1;
+}
+
+extern int eq_v(struct SN_env * z, symbol * p)
+{   return eq_s(z, SIZE(p), p);
+}
+
+extern int eq_v_b(struct SN_env * z, symbol * p)
+{   return eq_s_b(z, SIZE(p), p);
+}
+
+extern int find_among(struct SN_env * z, struct among * v, int v_size)
+{
+    int i = 0;
+    int j = v_size;
+
+    int c = z->c; int l = z->l;
+    symbol * q = z->p + c;
+
+    struct among * w;
+
+    int common_i = 0;
+    int common_j = 0;
+
+    int first_key_inspected = 0;
+
+    while(1)
+    {   int k = i + ((j - i) >> 1);
+        int diff = 0;
+        int common = common_i < common_j ? common_i : common_j; /* smaller */
+        w = v + k;
+        {   int i; for (i = common; i < w->s_size; i++)
+            {   if (c + common == l) { diff = -1; break; }
+                diff = q[common] - w->s[i];
+                if (diff != 0) break;
+                common++;
+            }
+        }
+        if (diff < 0) { j = k; common_j = common; }
+                 else { i = k; common_i = common; }
+        if (j - i <= 1)
+        {   if (i > 0) break; /* v->s has been inspected */
+            if (j == i) break; /* only one item in v */
+
+            /* - but now we need to go round once more to get
+               v->s inspected. This looks messy, but is actually
+               the optimal approach.  */
+
+            if (first_key_inspected) break;
+            first_key_inspected = 1;
+        }
+    }
+    while(1)
+    {   w = v + i;
+        if (common_i >= w->s_size)
+        {   z->c = c + w->s_size;
+            if (w->function == 0) return w->result;
+            {   int res = w->function(z);
+                z->c = c + w->s_size;
+                if (res) return w->result;
+            }
+        }
+        i = w->substring_i;
+        if (i < 0) return 0;
+    }
+}
+
+/* find_among_b is for backwards processing. Same comments apply */
+
+extern int find_among_b(struct SN_env * z, struct among * v, int v_size)
+{
+    int i = 0;
+    int j = v_size;
+
+    int c = z->c; int lb = z->lb;
+    symbol * q = z->p + c - 1;
+
+    struct among * w;
+
+    int common_i = 0;
+    int common_j = 0;
+
+    int first_key_inspected = 0;
+
+    while(1)
+    {   int k = i + ((j - i) >> 1);
+        int diff = 0;
+        int common = common_i < common_j ? common_i : common_j;
+        w = v + k;
+        {   int i; for (i = w->s_size - 1 - common; i >= 0; i--)
+            {   if (c - common == lb) { diff = -1; break; }
+                diff = q[- common] - w->s[i];
+                if (diff != 0) break;
+                common++;
+            }
+        }
+        if (diff < 0) { j = k; common_j = common; }
+                 else { i = k; common_i = common; }
+        if (j - i <= 1)
+        {   if (i > 0) break;
+            if (j == i) break;
+            if (first_key_inspected) break;
+            first_key_inspected = 1;
+        }
+    }
+    while(1)
+    {   w = v + i;
+        if (common_i >= w->s_size)
+        {   z->c = c - w->s_size;
+            if (w->function == 0) return w->result;
+            {   int res = w->function(z);
+                z->c = c - w->s_size;
+                if (res) return w->result;
+            }
+        }
+        i = w->substring_i;
+        if (i < 0) return 0;
+    }
+}
+
+
+extern symbol * increase_size(symbol * p, int n)
+{   int new_size = n + 20;
+    symbol * q = (symbol *) (HEAD + (char *) malloc(HEAD + (new_size + 1) * sizeof(symbol)));
+    CAPACITY(q) = new_size;
+    memmove(q, p, CAPACITY(p) * sizeof(symbol)); lose_s(p); return q;
+}
+
+/* to replace symbols between c_bra and c_ket in z->p by the
+   s_size symbols at s
+*/
+
+extern int replace_s(struct SN_env * z, int c_bra, int c_ket, int s_size, const symbol * s)
+{   int adjustment = s_size - (c_ket - c_bra);
+    int len = SIZE(z->p);
+    if (adjustment != 0)
+    {   if (adjustment + len > CAPACITY(z->p)) z->p = increase_size(z->p, adjustment + len);
+        memmove(z->p + c_ket + adjustment, z->p + c_ket, (len - c_ket) * sizeof(symbol));
+        SET_SIZE(z->p, adjustment + len);
+        z->l += adjustment;
+        if (z->c >= c_ket) z->c += adjustment; else
+            if (z->c > c_bra) z->c = c_bra;
+    }
+    unless (s_size == 0) memmove(z->p + c_bra, s, s_size * sizeof(symbol));
+    return adjustment;
+}
+
+static void slice_check(struct SN_env * z)
+{
+    if (!(0 <= z->bra &&
+          z->bra <= z->ket &&
+          z->ket <= z->l &&
+          z->l <= SIZE(z->p)))   /* this line could be removed */
+    {
+        fprintf(stderr, "faulty slice operation:\n");
+        debug(z, -1, 0);
+        exit(1);
+    }
+}
+
+extern void slice_from_s(struct SN_env * z, int s_size, symbol * s)
+{   slice_check(z);
+    replace_s(z, z->bra, z->ket, s_size, s);
+}
+
+extern void slice_from_v(struct SN_env * z, symbol * p)
+{   slice_from_s(z, SIZE(p), p);
+}
+
+extern void slice_del(struct SN_env * z)
+{   slice_from_s(z, 0, 0);
+}
+
+extern void insert_s(struct SN_env * z, int bra, int ket, int s_size, symbol * s)
+{   int adjustment = replace_s(z, bra, ket, s_size, s);
+    if (bra <= z->bra) z->bra += adjustment;
+    if (bra <= z->ket) z->ket += adjustment;
+}
+
+extern void insert_v(struct SN_env * z, int bra, int ket, symbol * p)
+{   int adjustment = replace_s(z, bra, ket, SIZE(p), p);
+    if (bra <= z->bra) z->bra += adjustment;
+    if (bra <= z->ket) z->ket += adjustment;
+}
+
+extern symbol * slice_to(struct SN_env * z, symbol * p)
+{   slice_check(z);
+    {   int len = z->ket - z->bra;
+        if (CAPACITY(p) < len) p = increase_size(p, len);
+        memmove(p, z->p + z->bra, len * sizeof(symbol));
+        SET_SIZE(p, len);
+    }
+    return p;
+}
+
+extern symbol * assign_to(struct SN_env * z, symbol * p)
+{   int len = z->l;
+    if (CAPACITY(p) < len) p = increase_size(p, len);
+    memmove(p, z->p, len * sizeof(symbol));
+    SET_SIZE(p, len);
+    return p;
+}
+
+extern void debug(struct SN_env * z, int number, int line_count)
+{   int i;
+    int limit = SIZE(z->p);
+    /*if (number >= 0) printf("%3d (line %4d): '", number, line_count);*/
+    if (number >= 0) printf("%3d (line %4d): [%d]'", number, line_count,limit);
+    for (i = 0; i <= limit; i++)
+    {   if (z->lb == i) printf("{");
+        if (z->bra == i) printf("[");
+        if (z->c == i) printf("|");
+        if (z->ket == i) printf("]");
+        if (z->l == i) printf("}");
+        if (i < limit)
+        {   int ch = z->p[i];
+            if (ch == 0) ch = '#';
+            printf("%c", ch);
+        }
+    }
+    printf("'\n");
+}


diff --git a/contrib/tsearch2/sql/tsearch2.sql b/contrib/tsearch2/sql/tsearch2.sql

new file mode 100644 (file)

index 0000000..6ca6480


--- /dev/null
+++ b/contrib/tsearch2/sql/tsearch2.sql
@@ -0,0 +1,243 @@
+--
+-- first, define the datatype.  Turn off echoing so that expected file
+-- does not depend on contents of seg.sql.
+--
+\set ECHO none
+\i tsearch2.sql
+\set ECHO all
+
+--tsvector
+SELECT '1'::tsvector;
+SELECT '1 '::tsvector;
+SELECT ' 1'::tsvector;
+SELECT ' 1 '::tsvector;
+SELECT '1 2'::tsvector;
+SELECT '\'1 2\''::tsvector;
+SELECT '\'1 \\\'2\''::tsvector;
+SELECT '\'1 \\\'2\'3'::tsvector;
+SELECT '\'1 \\\'2\' 3'::tsvector;
+SELECT '\'1 \\\'2\' \' 3\' 4 '::tsvector;
+select '\'w\':4A,3B,2C,1D,5 a:8';
+select 'a:3A b:2a'::tsvector || 'ba:1234 a:1B';
+select setweight('w:12B w:13* w:12,5,6 a:1,3* a:3 w asd:1dc asd zxc:81,567,222A'::tsvector, 'c');
+select strip('w:12B w:13* w:12,5,6 a:1,3* a:3 w asd:1dc asd'::tsvector);
+
+
+--tsquery
+SELECT '1'::tsquery;
+SELECT '1 '::tsquery;
+SELECT ' 1'::tsquery;
+SELECT ' 1 '::tsquery;
+SELECT '\'1 2\''::tsquery;
+SELECT '\'1 \\\'2\''::tsquery;
+SELECT '!1'::tsquery;
+SELECT '1|2'::tsquery;
+SELECT '1|!2'::tsquery;
+SELECT '!1|2'::tsquery;
+SELECT '!1|!2'::tsquery;
+SELECT '!(!1|!2)'::tsquery;
+SELECT '!(!1|2)'::tsquery;
+SELECT '!(1|!2)'::tsquery;
+SELECT '!(1|2)'::tsquery;
+SELECT '1&2'::tsquery;
+SELECT '!1&2'::tsquery;
+SELECT '1&!2'::tsquery;
+SELECT '!1&!2'::tsquery;
+SELECT '(1&2)'::tsquery;
+SELECT '1&(2)'::tsquery;
+SELECT '!(1)&2'::tsquery;
+SELECT '!(1&2)'::tsquery;
+SELECT '1|2&3'::tsquery;
+SELECT '1|(2&3)'::tsquery;
+SELECT '(1|2)&3'::tsquery;
+SELECT '1|2&!3'::tsquery;
+SELECT '1|!2&3'::tsquery;
+SELECT '!1|2&3'::tsquery;
+SELECT '!1|(2&3)'::tsquery;
+SELECT '!(1|2)&3'::tsquery;
+SELECT '(!1|2)&3'::tsquery;
+SELECT '1|(2|(4|(5|6)))'::tsquery;
+SELECT '1|2|4|5|6'::tsquery;
+SELECT '1&(2&(4&(5&6)))'::tsquery;
+SELECT '1&2&4&5&6'::tsquery;
+SELECT '1&(2&(4&(5|6)))'::tsquery;
+SELECT '1&(2&(4&(5|!6)))'::tsquery;
+SELECT '1&(\'2\'&(\' 4\'&(\\|5 | \'6 \\\' !|&\')))'::tsquery;
+SELECT '\'the wether\':dc & \' sKies \':BC & a:d b:a';
+
+select lexize('simple', 'ASD56 hsdkf');
+select lexize('en_stem', 'SKIES Problems identity');
+
+select * from token_type('default');
+select * from parse('default', '345 [email protected] \' http://www.com/ http://aew.werc.ewr/?ad=qwe&dw 1aew.werc.ewr/?ad=qwe&dw 2aew.werc.ewr http://3aew.werc.ewr/?ad=qwe&dw http://4aew.werc.ewr http://5aew.werc.ewr:8100/?  ad=qwe&dw 6aew.werc.ewr:8100/?ad=qwe&dw 7aew.werc.ewr:8100/?ad=qwe&dw=%20%32 +4.0e-10 qwe qwe qwqwe 234.435 455 5.005 [email protected] qwe-wer asdf qwer jf sdjk ewr1> ewri2 ">
+/usr/local/fff /awdf/dwqe/4325 rewt/ewr wefjn /wqe-324/ewr gist.h gist.h.c gist.c. readline 4.2 4.2. 4.2, readline-4.2 readline-4.2. 234 
+ wow  < jqw <> qwerty');
+
+SELECT to_tsvector('default', '345 [email protected] \' http://www.com/ http://aew.werc.ewr/?ad=qwe&dw 1aew.werc.ewr/?ad=qwe&dw 2aew.werc.ewr http://3aew.werc.ewr/?ad=qwe&dw http://4aew.werc.ewr http://5aew.werc.ewr:8100/?  ad=qwe&dw 6aew.werc.ewr:8100/?ad=qwe&dw 7aew.werc.ewr:8100/?ad=qwe&dw=%20%32 +4.0e-10 qwe qwe qwqwe 234.435 455 5.005 [email protected] qwe-wer asdf qwer jf sdjk ewr1> ewri2 ">
+/usr/local/fff /awdf/dwqe/4325 rewt/ewr wefjn /wqe-324/ewr gist.h gist.h.c gist.c. readline 4.2 4.2. 4.2, readline-4.2 readline-4.2. 234 
+ wow  < jqw <> qwerty');
+
+SELECT length(to_tsvector('default', '345 qw'));
+
+SELECT length(to_tsvector('default', '345 [email protected] \' http://www.com/ http://aew.werc.ewr/?ad=qwe&dw 1aew.werc.ewr/?ad=qwe&dw 2aew.werc.ewr http://3aew.werc.ewr/?ad=qwe&dw http://4aew.werc.ewr http://5aew.werc.ewr:8100/?  ad=qwe&dw 6aew.werc.ewr:8100/?ad=qwe&dw 7aew.werc.ewr:8100/?ad=qwe&dw=%20%32 +4.0e-10 qwe qwe qwqwe 234.435 455 5.005 [email protected] qwe-wer asdf qwer jf sdjk ewr1> ewri2 ">
+/usr/local/fff /awdf/dwqe/4325 rewt/ewr wefjn /wqe-324/ewr gist.h gist.h.c gist.c. readline 4.2 4.2. 4.2, readline-4.2 readline-4.2. 234 
+ wow  < jqw <> qwerty'));
+
+
+select to_tsquery('default', 'qwe & sKies '); 
+select to_tsquery('simple', 'qwe & sKies '); 
+select to_tsquery('default', '\'the wether\':dc & \'           sKies \':BC ');
+select 'a b:89  ca:23A,64b d:34c'::tsvector @@ 'd:AC & ca';
+select 'a b:89  ca:23A,64b d:34c'::tsvector @@ 'd:AC & ca:B';
+select 'a b:89  ca:23A,64b d:34c'::tsvector @@ 'd:AC & ca:A';
+select 'a b:89  ca:23A,64b d:34c'::tsvector @@ 'd:AC & ca:C';
+select 'a b:89  ca:23A,64b d:34c'::tsvector @@ 'd:AC & ca:CB';
+
+CREATE TABLE test_tsvector( t text, a tsvector );
+
+\copy test_tsvector from 'data/test_tsearch.data'
+
+SELECT count(*) FROM test_tsvector WHERE a @@ 'wr|qh';
+SELECT count(*) FROM test_tsvector WHERE a @@ 'wr&qh';
+SELECT count(*) FROM test_tsvector WHERE a @@ 'eq&yt';
+SELECT count(*) FROM test_tsvector WHERE a @@ 'eq|yt';
+SELECT count(*) FROM test_tsvector WHERE a @@ '(eq&yt)|(wr&qh)';
+SELECT count(*) FROM test_tsvector WHERE a @@ '(eq|yt)&(wr|qh)';
+
+create index wowidx on test_tsvector using gist (a);
+set enable_seqscan=off;
+
+SELECT count(*) FROM test_tsvector WHERE a @@ 'wr|qh';
+SELECT count(*) FROM test_tsvector WHERE a @@ 'wr&qh';
+SELECT count(*) FROM test_tsvector WHERE a @@ 'eq&yt';
+SELECT count(*) FROM test_tsvector WHERE a @@ 'eq|yt';
+SELECT count(*) FROM test_tsvector WHERE a @@ '(eq&yt)|(wr&qh)';
+SELECT count(*) FROM test_tsvector WHERE a @@ '(eq|yt)&(wr|qh)';
+
+select set_curcfg('default');
+
+CREATE TRIGGER tsvectorupdate
+BEFORE UPDATE OR INSERT ON test_tsvector
+FOR EACH ROW EXECUTE PROCEDURE tsearch2(a, t);
+
+SELECT count(*) FROM test_tsvector WHERE a @@ to_tsquery('345&qwerty');
+
+INSERT INTO test_tsvector (t) VALUES ('345 qwerty');
+
+SELECT count(*) FROM test_tsvector WHERE a @@ to_tsquery('345&qwerty');
+
+UPDATE test_tsvector SET t = null WHERE t = '345 qwerty';
+
+SELECT count(*) FROM test_tsvector WHERE a @@ to_tsquery('345&qwerty');
+
+drop trigger tsvectorupdate on test_tsvector;
+create function wow(text) returns text as 'select $1 || \' copyright\'; ' language sql;
+create trigger tsvectorupdate before update or insert on test_tsvector
+for each row execute procedure tsearch2(a, wow, t);
+insert into test_tsvector (t) values ('345 qwerty');
+select count(*) FROM test_tsvector WHERE a @@ to_tsquery('345&qwerty');
+select count(*) FROM test_tsvector WHERE a @@ to_tsquery('copyright');
+
+select rank(' a:1 s:2C d g'::tsvector, 'a | s');
+select rank(' a:1 s:2B d g'::tsvector, 'a | s');
+select rank(' a:1 s:2 d g'::tsvector, 'a | s');
+select rank(' a:1 s:2C d g'::tsvector, 'a & s');
+select rank(' a:1 s:2B d g'::tsvector, 'a & s');
+select rank(' a:1 s:2 d g'::tsvector, 'a & s');
+
+insert into test_tsvector (t) values ('foo bar foo the over foo qq bar');
+select * from stat('select a from test_tsvector') order by ndoc desc, nentry desc, word;
+
+select reset_tsearch();
+select to_tsquery('default', 'skies & books');
+
+select rank_cd(to_tsvector('Erosion It took the sea a thousand years,
+A thousand years to trace
+The granite features of this cliff
+In crag and scarp and base.
+It took the sea an hour one night
+An hour of storm to place
+The sculpture of these granite seams,
+Upon a woman s face. E.  J.  Pratt  (1882 1964)
+'), to_tsquery('sea&thousand&years'));
+
+select rank_cd(to_tsvector('Erosion It took the sea a thousand years,
+A thousand years to trace
+The granite features of this cliff
+In crag and scarp and base.
+It took the sea an hour one night
+An hour of storm to place
+The sculpture of these granite seams,
+Upon a woman s face. E.  J.  Pratt  (1882 1964)
+'), to_tsquery('granite&sea'));
+
+select rank_cd(to_tsvector('Erosion It took the sea a thousand years,
+A thousand years to trace
+The granite features of this cliff
+In crag and scarp and base.
+It took the sea an hour one night
+An hour of storm to place
+The sculpture of these granite seams,
+Upon a woman s face. E.  J.  Pratt  (1882 1964)
+'), to_tsquery('sea'));
+
+select get_covers(to_tsvector('Erosion It took the sea a thousand years,
+A thousand years to trace
+The granite features of this cliff
+In crag and scarp and base.
+It took the sea an hour one night
+An hour of storm to place
+The sculpture of these granite seams,
+Upon a woman s face. E.  J.  Pratt  (1882 1964)
+'), to_tsquery('sea&thousand&years'));
+
+select get_covers(to_tsvector('Erosion It took the sea a thousand years,
+A thousand years to trace
+The granite features of this cliff
+In crag and scarp and base.
+It took the sea an hour one night
+An hour of storm to place
+The sculpture of these granite seams,
+Upon a woman s face. E.  J.  Pratt  (1882 1964)
+'), to_tsquery('granite&sea'));
+
+select get_covers(to_tsvector('Erosion It took the sea a thousand years,
+A thousand years to trace
+The granite features of this cliff
+In crag and scarp and base.
+It took the sea an hour one night
+An hour of storm to place
+The sculpture of these granite seams,
+Upon a woman s face. E.  J.  Pratt  (1882 1964)
+'), to_tsquery('sea'));
+
+select headline('Erosion It took the sea a thousand years,
+A thousand years to trace
+The granite features of this cliff
+In crag and scarp and base.
+It took the sea an hour one night
+An hour of storm to place
+The sculpture of these granite seams,
+Upon a woman s face. E.  J.  Pratt  (1882 1964)
+', to_tsquery('sea&thousand&years'));
+ 
+select headline('Erosion It took the sea a thousand years,
+A thousand years to trace
+The granite features of this cliff
+In crag and scarp and base.
+It took the sea an hour one night
+An hour of storm to place
+The sculpture of these granite seams,
+Upon a woman s face. E.  J.  Pratt  (1882 1964)
+', to_tsquery('granite&sea'));
+ 
+select headline('Erosion It took the sea a thousand years,
+A thousand years to trace
+The granite features of this cliff
+In crag and scarp and base.
+It took the sea an hour one night
+An hour of storm to place
+The sculpture of these granite seams,
+Upon a woman s face. E.  J.  Pratt  (1882 1964)
+', to_tsquery('sea'));
+


diff --git a/contrib/tsearch2/stopword.c b/contrib/tsearch2/stopword.c

new file mode 100644 (file)

index 0000000..7f7806f


--- /dev/null
+++ b/contrib/tsearch2/stopword.c
@@ -0,0 +1,101 @@
+/* 
+ * stopword library
+ * Teodor Sigaev 
+ */
+#include 
+#include 
+#include 
+#include 
+
+#include "postgres.h"
+#include "common.h"
+#include "dict.h"
+
+#define STOPBUFLEN 4096
+
+char*
+lowerstr(char *str) {
+   char *ptr=str;
+   while(*ptr) {
+       *ptr = tolower(*(unsigned char*)ptr);
+       ptr++;
+   }
+   return str;
+}
+
+void
+freestoplist(StopList *s) {
+   char **ptr=s->stop;
+   if ( ptr )
+       while( *ptr && s->len >0 ) {
+           free(*ptr);
+           ptr++; s->len--;
+       free(s->stop);
+   }
+   memset(s,0,sizeof(StopList));
+}
+
+void
+readstoplist(text *in, StopList *s) {
+   char **stop=NULL;
+   s->len=0;
+   if ( in && VARSIZE(in) - VARHDRSZ > 0 ) {
+       char *filename=text2char(in);
+       FILE    *hin=NULL;
+       char    buf[STOPBUFLEN];
+       int reallen=0;
+
+       if ( (hin=fopen(filename,"r")) == NULL )
+           elog(ERROR,"Can't open file '%s': %s", filename, strerror(errno));
+       while( fgets(buf,STOPBUFLEN,hin) ) {
+           buf[strlen(buf)-1] = '\0';
+           if ( *buf=='\0' ) continue;
+
+           if ( s->len>= reallen ) {
+               char **tmp;
+               reallen=(reallen) ? reallen*2 : 16;
+               tmp=(char**)realloc((void*)stop, sizeof(char*)*reallen);
+               if (!tmp) {
+                   freestoplist(s);
+                   fclose(hin); 
+                   elog(ERROR,"Not enough memory");
+               }
+               stop=tmp;
+           }
+    
+           stop[s->len]=strdup(buf);
+           if ( !stop[s->len] ) {
+               freestoplist(s);
+               fclose(hin); 
+               elog(ERROR,"Not enough memory");
+           }
+           if ( s->wordop ) 
+               stop[s->len]=(s->wordop)(stop[s->len]);
+
+           (s->len)++; 
+       }
+       fclose(hin);
+       pfree(filename); 
+   }
+   s->stop=stop;
+} 
+
+static int
+comparestr(const void *a, const void *b) {
+   return strcmp( *(char**)a, *(char**)b );
+}
+
+void
+sortstoplist(StopList *s) {
+   if (s->stop && s->len>0)
+       qsort(s->stop, s->len, sizeof(char*), comparestr);
+}
+
+bool
+searchstoplist(StopList *s, char *key) {
+   if ( s->wordop ) 
+       key=(*(s->wordop))(key);
+   return ( s->stop && s->len>0 && bsearch(&key, s->stop, s->len, sizeof(char*), comparestr) ) ? true : false;
+}
+
+


diff --git a/contrib/tsearch2/stopword/english.stop b/contrib/tsearch2/stopword/english.stop

new file mode 100644 (file)

index 0000000..a913011


--- /dev/null
+++ b/contrib/tsearch2/stopword/english.stop
@@ -0,0 +1,128 @@
+i
+me
+my
+myself
+we
+our
+ours
+ourselves
+you
+your
+yours
+yourself
+yourselves
+he
+him
+his
+himself
+she
+her
+hers
+herself
+it
+its
+itself
+they
+them
+their
+theirs
+themselves
+what
+which
+who
+whom
+this
+that
+these
+those
+am
+is
+are
+was
+were
+be
+been
+being
+have
+has
+had
+having
+do
+does
+did
+doing
+a
+an
+the
+and
+but
+if
+or
+because
+as
+until
+while
+of
+at
+by
+for
+with
+about
+against
+between
+into
+through
+during
+before
+after
+above
+below
+to
+from
+up
+down
+in
+out
+on
+off
+over
+under
+again
+further
+then
+once
+here
+there
+when
+where
+why
+how
+all
+any
+both
+each
+few
+more
+most
+other
+some
+such
+no
+nor
+not
+only
+own
+same
+so
+than
+too
+very
+s
+t
+can
+will
+just
+don
+should
+now
+


diff --git a/contrib/tsearch2/stopword/russian.stop b/contrib/tsearch2/stopword/russian.stop

new file mode 100644 (file)

index 0000000..1877e3a


--- /dev/null
+++ b/contrib/tsearch2/stopword/russian.stop
@@ -0,0 +1,151 @@
+É
+×
+×Ï
+ÎÅ
+ÞÔÏ
+ÏÎ
+ÎÁ
+Ñ
+Ó
+ÓÏ
+ËÁË
+Á
+ÔÏ
+×ÓÅ
+ÏÎÁ
+ÔÁË
+ÅÇÏ
+ÎÏ
+ÄÁ
+ÔÙ
+Ë
+Õ
+ÖÅ
+×Ù
+ÚÁ
+ÂÙ
+ÐÏ
+ÔÏÌØËÏ
+ÅÅ
+ÍÎÅ
+ÂÙÌÏ
+×ÏÔ
+ÏÔ
+ÍÅÎÑ
+ÅÝÅ
+ÎÅÔ
+Ï
+ÉÚ
+ÅÍÕ
+ÔÅÐÅÒØ
+ËÏÇÄÁ
+ÄÁÖÅ
+ÎÕ
+×ÄÒÕÇ
+ÌÉ
+ÅÓÌÉ
+ÕÖÅ
+ÉÌÉ
+ÎÉ
+ÂÙÔØ
+ÂÙÌ
+ÎÅÇÏ
+ÄÏ
+×ÁÓ
+ÎÉÂÕÄØ
+ÏÐÑÔØ
+ÕÖ
+×ÁÍ
+×ÅÄØ
+ÔÁÍ
+ÐÏÔÏÍ
+ÓÅÂÑ
+ÎÉÞÅÇÏ
+ÅÊ
+ÍÏÖÅÔ
+ÏÎÉ
+ÔÕÔ
+ÇÄÅ
+ÅÓÔØ
+ÎÁÄÏ
+ÎÅÊ
+ÄÌÑ
+ÍÙ
+ÔÅÂÑ
+ÉÈ
+ÞÅÍ
+ÂÙÌÁ
+ÓÁÍ
+ÞÔÏÂ
+ÂÅÚ
+ÂÕÄÔÏ
+ÞÅÇÏ
+ÒÁÚ
+ÔÏÖÅ
+ÓÅÂÅ
+ÐÏÄ
+ÂÕÄÅÔ
+Ö
+ÔÏÇÄÁ
+ËÔÏ
+ÜÔÏÔ
+ÔÏÇÏ
+ÐÏÔÏÍÕ
+ÜÔÏÇÏ
+ËÁËÏÊ
+ÓÏ×ÓÅÍ
+ÎÉÍ
+ÚÄÅÓØ
+ÜÔÏÍ
+ÏÄÉÎ
+ÐÏÞÔÉ
+ÍÏÊ
+ÔÅÍ
+ÞÔÏÂÙ
+ÎÅÅ
+ÓÅÊÞÁÓ
+ÂÙÌÉ
+ËÕÄÁ
+ÚÁÞÅÍ
+×ÓÅÈ
+ÎÉËÏÇÄÁ
+ÍÏÖÎÏ
+ÐÒÉ
+ÎÁËÏÎÅÃ
+Ä×Á
+ÏÂ
+ÄÒÕÇÏÊ
+ÈÏÔØ
+ÐÏÓÌÅ
+ÎÁÄ
+ÂÏÌØÛÅ
+ÔÏÔ
+ÞÅÒÅÚ
+ÜÔÉ
+ÎÁÓ
+ÐÒÏ
+×ÓÅÇÏ
+ÎÉÈ
+ËÁËÁÑ
+ÍÎÏÇÏ
+ÒÁÚ×Å
+ÔÒÉ
+ÜÔÕ
+ÍÏÑ
+×ÐÒÏÞÅÍ
+ÈÏÒÏÛÏ
+Ó×ÏÀ
+ÜÔÏÊ
+ÐÅÒÅÄ
+ÉÎÏÇÄÁ
+ÌÕÞÛÅ
+ÞÕÔØ
+ÔÏÍ
+ÎÅÌØÚÑ
+ÔÁËÏÊ
+ÉÍ
+ÂÏÌÅÅ
+×ÓÅÇÄÁ
+ËÏÎÅÞÎÏ
+×ÓÀ
+ÍÅÖÄÕ


diff --git a/contrib/tsearch2/ts_cfg.c b/contrib/tsearch2/ts_cfg.c

new file mode 100644 (file)

index 0000000..7c9f20c


--- /dev/null
+++ b/contrib/tsearch2/ts_cfg.c
@@ -0,0 +1,509 @@
+/* 
+ * interface functions to tscfg 
+ * Teodor Sigaev 
+ */
+#include 
+#include 
+#include 
+#include 
+#include 
+
+#include "postgres.h"
+#include "fmgr.h"
+#include "utils/array.h"
+#include "catalog/pg_type.h"
+#include "executor/spi.h"
+
+#include "ts_cfg.h"
+#include "dict.h"
+#include "wparser.h"
+#include "snmap.h"
+#include "common.h"
+#include "tsvector.h"
+
+/*********top interface**********/
+
+static void *plan_getcfg_bylocale=NULL;
+static void *plan_getcfg=NULL;
+static void *plan_getmap=NULL;
+static void *plan_name2id=NULL;
+static Oid current_cfg_id=0;
+
+void
+init_cfg(Oid id, TSCfgInfo *cfg) {
+   Oid arg[2]={ OIDOID, OIDOID };
+   bool isnull;
+   Datum pars[2]={ ObjectIdGetDatum(id), ObjectIdGetDatum(id) } ;
+   int stat,i,j;
+   text *ptr;
+   text *prsname=NULL;
+   MemoryContext   oldcontext;
+
+   memset(cfg,0,sizeof(TSCfgInfo));
+   SPI_connect();
+   if ( !plan_getcfg ) {
+       plan_getcfg = SPI_saveplan( SPI_prepare( "select prs_name from pg_ts_cfg where oid = $1" , 1, arg ) );
+       if ( !plan_getcfg ) 
+           ts_error(ERROR, "SPI_prepare() failed");
+   }
+
+   stat = SPI_execp(plan_getcfg, pars, " ", 1);
+   if ( stat < 0 )
+       ts_error (ERROR, "SPI_execp return %d", stat);
+   if ( SPI_processed > 0 ) {
+       prsname = (text*) DatumGetPointer( 
+           SPI_getbinval(SPI_tuptable->vals[0], SPI_tuptable->tupdesc, 1, &isnull) 
+       );
+       oldcontext = MemoryContextSwitchTo(TopMemoryContext);
+       prsname = ptextdup( prsname );
+       MemoryContextSwitchTo(oldcontext);
+       
+       cfg->id=id;
+   } else 
+       ts_error(ERROR, "No tsearch cfg with id %d", id);
+
+   arg[0]=TEXTOID;
+   if ( !plan_getmap ) {
+       plan_getmap = SPI_saveplan( SPI_prepare( "select lt.tokid, pg_ts_cfgmap.dict_name from pg_ts_cfgmap, pg_ts_cfg, token_type( $1 ) as lt where lt.alias = pg_ts_cfgmap.tok_alias and pg_ts_cfgmap.ts_name = pg_ts_cfg.ts_name and pg_ts_cfg.oid= $2 order by lt.tokid desc;" , 2, arg ) );
+       if ( !plan_getmap )
+           ts_error(ERROR, "SPI_prepare() failed");
+   }
+
+   pars[0]=PointerGetDatum( prsname );
+   stat = SPI_execp(plan_getmap, pars, " ", 0);
+   if ( stat < 0 )
+       ts_error (ERROR, "SPI_execp return %d", stat);
+   if ( SPI_processed <= 0 )
+       ts_error(ERROR, "No parser with id %d", id);
+
+   for(i=0;i
+       int lexid = DatumGetInt32(SPI_getbinval(SPI_tuptable->vals[i], SPI_tuptable->tupdesc, 1, &isnull));
+       ArrayType *toasted_a = (ArrayType*)PointerGetDatum(SPI_getbinval(SPI_tuptable->vals[i], SPI_tuptable->tupdesc, 2, &isnull));
+       ArrayType *a;
+
+       if ( !cfg->map ) {
+           cfg->len=lexid+1;
+           cfg->map = (ListDictionary*)malloc( sizeof(ListDictionary)*cfg->len );
+           if ( !cfg->map )
+               ts_error(ERROR,"No memory");
+           memset( cfg->map, 0, sizeof(ListDictionary)*cfg->len );
+       }
+
+       if (isnull)
+           continue;
+
+       a=(ArrayType*)PointerGetDatum( PG_DETOAST_DATUM( DatumGetPointer(toasted_a) ) );
+       
+       if ( ARR_NDIM(a) != 1 )
+           ts_error(ERROR,"Wrong dimension");
+       if ( ARRNELEMS(a) < 1 )
+           continue;
+
+       cfg->map[lexid].len=ARRNELEMS(a);
+       cfg->map[lexid].dict_id=(Datum*)malloc( sizeof(Datum)*cfg->map[lexid].len );
+       memset(cfg->map[lexid].dict_id,0,sizeof(Datum)*cfg->map[lexid].len );
+       ptr=(text*)ARR_DATA_PTR(a);
+       oldcontext = MemoryContextSwitchTo(TopMemoryContext);
+       for(j=0;jmap[lexid].len;j++) {
+           cfg->map[lexid].dict_id[j] = PointerGetDatum(ptextdup(ptr));
+           ptr=NEXTVAL(ptr);
+       } 
+       MemoryContextSwitchTo(oldcontext);
+
+       if ( a != toasted_a ) 
+           pfree(a);
+   }
+   
+   SPI_finish();
+   cfg->prs_id = name2id_prs( prsname );
+   pfree(prsname);
+   for(i=0;ilen;i++) {
+       for(j=0;jmap[i].len;j++) {
+           ptr = (text*)DatumGetPointer( cfg->map[i].dict_id[j] );
+           cfg->map[i].dict_id[j] = ObjectIdGetDatum( name2id_dict(ptr) );
+           pfree(ptr);
+       }
+   }
+}
+
+typedef struct {
+   TSCfgInfo   *last_cfg;
+   int     len;
+   int     reallen;
+   TSCfgInfo   *list;
+   SNMap       name2id_map;
+} CFGList;
+
+static CFGList CList = {NULL,0,0,NULL,{0,0,NULL}};
+
+void
+reset_cfg(void) {
+        freeSNMap( &(CList.name2id_map) );
+        if ( CList.list ) {
+       int i,j;
+       for(i=0;i
+           if ( CList.list[i].map ) {
+               for(j=0;j
+                   if ( CList.list[i].map[j].dict_id )
+                       free(CList.list[i].map[j].dict_id);
+               free( CList.list[i].map );
+           }
+                free(CList.list);
+   }
+        memset(&CList,0,sizeof(CFGList));
+}
+
+static int
+comparecfg(const void *a, const void *b) {
+   return ((TSCfgInfo*)a)->id - ((TSCfgInfo*)b)->id;
+}
+
+TSCfgInfo *
+findcfg(Oid id) {
+   /* last used cfg */
+   if ( CList.last_cfg && CList.last_cfg->id==id )
+       return CList.last_cfg;
+
+   /* already used cfg */
+   if ( CList.len != 0 ) {
+       TSCfgInfo key;
+       key.id=id;
+       CList.last_cfg = bsearch(&key, CList.list, CList.len, sizeof(TSCfgInfo), comparecfg);
+       if ( CList.last_cfg != NULL )
+           return CList.last_cfg;
+   }
+
+   /* last chance */
+   if ( CList.len==CList.reallen ) {
+       TSCfgInfo *tmp;
+       int reallen = ( CList.reallen ) ? 2*CList.reallen : 16;
+       tmp=(TSCfgInfo*)realloc(CList.list,sizeof(TSCfgInfo)*reallen);
+       if ( !tmp ) 
+           ts_error(ERROR,"No memory");
+       CList.reallen=reallen;
+       CList.list=tmp;
+   }
+   CList.last_cfg=&(CList.list[CList.len]);
+   init_cfg(id, CList.last_cfg);
+   CList.len++;
+   qsort(CList.list, CList.len, sizeof(TSCfgInfo), comparecfg);
+   return findcfg(id); /* qsort changed order!! */;
+}
+
+
+Oid
+name2id_cfg(text *name) {
+   Oid arg[1]={ TEXTOID };
+   bool isnull;
+   Datum pars[1]={ PointerGetDatum(name) };
+   int stat;
+   Oid id=findSNMap_t( &(CList.name2id_map), name );
+   
+   if ( id ) 
+       return id;
+   
+   SPI_connect();
+   if ( !plan_name2id ) {
+       plan_name2id = SPI_saveplan( SPI_prepare( "select oid from pg_ts_cfg where ts_name = $1" , 1, arg ) );
+       if ( !plan_name2id ) 
+           elog(ERROR, "SPI_prepare() failed");
+   }
+
+   stat = SPI_execp(plan_name2id, pars, " ", 1);
+   if ( stat < 0 )
+       elog (ERROR, "SPI_execp return %d", stat);
+   if ( SPI_processed > 0 ) {
+       id=DatumGetObjectId( SPI_getbinval(SPI_tuptable->vals[0], SPI_tuptable->tupdesc, 1, &isnull) );
+       if ( isnull ) 
+           elog(ERROR, "Null id for tsearch config");
+   } else 
+       elog(ERROR, "No tsearch config");
+   SPI_finish();
+   addSNMap_t( &(CList.name2id_map), name, id );
+   return id;
+}
+
+
+void 
+parsetext_v2(TSCfgInfo *cfg, PRSTEXT * prs, char *buf, int4 buflen) {
+   int type, lenlemm, i;
+   char    *lemm=NULL;
+   WParserInfo *prsobj = findprs(cfg->prs_id);
+
+   prsobj->prs=(void*)DatumGetPointer(
+       FunctionCall2(
+           &(prsobj->start_info),
+           PointerGetDatum(buf),
+           Int32GetDatum(buflen)
+       )
+   );
+
+   while( ( type=DatumGetInt32(FunctionCall3(
+           &(prsobj->getlexeme_info),
+           PointerGetDatum(prsobj->prs),
+           PointerGetDatum(&lemm),
+           PointerGetDatum(&lenlemm))) ) != 0 ) {
+
+       if ( lenlemm >= MAXSTRLEN )
+           elog(ERROR, "Word is too long");
+
+
+       if ( type >= cfg->len ) /* skip this type of lexem */
+           continue; 
+
+       for(i=0;imap[type].len;i++) {
+           DictInfo    *dict=finddict( DatumGetObjectId(cfg->map[type].dict_id[i]) );
+           char    **norms, **ptr;
+   
+           norms = ptr = (char**)DatumGetPointer(
+               FunctionCall3(
+                   &(dict->lexize_info),
+                   PointerGetDatum(dict->dictionary),
+                   PointerGetDatum(lemm),
+                   PointerGetDatum(lenlemm)
+               )
+           );
+           if ( !norms ) /* dictionary doesn't know this lexem */
+               continue;
+
+           prs->pos++; /*set pos*/
+
+           while( *ptr ) {
+               if (prs->curwords == prs->lenwords) {
+                   prs->lenwords *= 2;
+                   prs->words = (WORD *) repalloc((void *) prs->words, prs->lenwords * sizeof(WORD));
+               }
+
+               prs->words[prs->curwords].len = strlen(*ptr);
+               prs->words[prs->curwords].word = *ptr;
+               prs->words[prs->curwords].alen = 0;
+               prs->words[prs->curwords].pos.pos = LIMITPOS(prs->pos);
+               ptr++;
+               prs->curwords++;
+           }
+           pfree(norms);
+           break; /* lexem already normalized or is stop word*/
+       }
+   }
+
+   FunctionCall1(
+       &(prsobj->end_info),
+       PointerGetDatum(prsobj->prs)
+   );
+}
+
+static void
+hladdword(HLPRSTEXT * prs, char *buf, int4 buflen, int type) {
+   while (prs->curwords >= prs->lenwords) {
+       prs->lenwords *= 2;
+       prs->words = (HLWORD *) repalloc((void *) prs->words, prs->lenwords * sizeof(HLWORD));
+   }
+   memset( &(prs->words[prs->curwords]), 0, sizeof(HLWORD) ); 
+   prs->words[prs->curwords].type = (uint8)type;
+   prs->words[prs->curwords].len = buflen; 
+   prs->words[prs->curwords].word = palloc(buflen);
+   memcpy(prs->words[prs->curwords].word, buf, buflen);
+   prs->curwords++;    
+}
+
+static void
+hlfinditem(HLPRSTEXT * prs, QUERYTYPE *query, char *buf, int buflen ) {
+   int i;
+   ITEM    *item=GETQUERY(query);
+   HLWORD  *word=&( prs->words[prs->curwords-1] );
+
+   while (prs->curwords + query->size >= prs->lenwords) {
+       prs->lenwords *= 2;
+       prs->words = (HLWORD *) repalloc((void *) prs->words, prs->lenwords * sizeof(HLWORD));
+   }
+
+   for(i=0; isize; i++) { 
+       if ( item->type == VAL && item->length == buflen && strncmp( GETOPERAND(query) + item->distance, buf, buflen )==0 ) {
+           if ( word->item ) {
+               memcpy( &(prs->words[prs->curwords]), word, sizeof(HLWORD) );
+               prs->words[prs->curwords].item=item;
+               prs->words[prs->curwords].repeated=1;
+               prs->curwords++;
+           } else 
+               word->item=item;    
+       }
+       item++;
+   }
+}
+
+void 
+hlparsetext(TSCfgInfo *cfg, HLPRSTEXT * prs, QUERYTYPE *query, char *buf, int4 buflen) {
+   int type, lenlemm, i;
+   char    *lemm=NULL;
+   WParserInfo *prsobj = findprs(cfg->prs_id);
+
+   prsobj->prs=(void*)DatumGetPointer(
+       FunctionCall2(
+           &(prsobj->start_info),
+           PointerGetDatum(buf),
+           Int32GetDatum(buflen)
+       )
+   );
+
+   while( ( type=DatumGetInt32(FunctionCall3(
+           &(prsobj->getlexeme_info),
+           PointerGetDatum(prsobj->prs),
+           PointerGetDatum(&lemm),
+           PointerGetDatum(&lenlemm))) ) != 0 ) {
+
+       if ( lenlemm >= MAXSTRLEN )
+           elog(ERROR, "Word is too long");
+
+       hladdword(prs,lemm,lenlemm,type);
+
+       if ( type >= cfg->len ) 
+           continue; 
+
+       for(i=0;imap[type].len;i++) {
+           DictInfo    *dict=finddict( DatumGetObjectId(cfg->map[type].dict_id[i]) );
+           char    **norms, **ptr;
+   
+           norms = ptr = (char**)DatumGetPointer(
+               FunctionCall3(
+                   &(dict->lexize_info),
+                   PointerGetDatum(dict->dictionary),
+                   PointerGetDatum(lemm),
+                   PointerGetDatum(lenlemm)
+               )
+           );
+           if ( !norms ) /* dictionary doesn't know this lexem */
+               continue;
+
+           while( *ptr ) {
+               hlfinditem(prs,query,*ptr,strlen(*ptr));
+               pfree(*ptr);
+               ptr++;
+           }
+           pfree(norms);
+           break; /* lexem already normalized or is stop word*/
+       }
+   }
+
+   FunctionCall1(
+       &(prsobj->end_info),
+       PointerGetDatum(prsobj->prs)
+   );
+}
+
+text* 
+genhl(HLPRSTEXT * prs) {
+   text *out;
+   int len=128;
+   char *ptr;
+   HLWORD  *wrd=prs->words;
+
+   out = (text*)palloc( len );
+   ptr=((char*)out) + VARHDRSZ;
+
+   while( wrd - prs->words < prs->curwords ) {
+       while (  wrd->len + prs->stopsellen + prs->startsellen + (ptr - ((char*)out)) >= len ) {
+           int dist = ptr - ((char*)out);
+           len*= 2;
+           out = (text *) repalloc(out, len);
+           ptr=((char*)out) + dist;
+       }
+
+       if ( wrd->in && !wrd->skip && !wrd->repeated ) {
+           if ( wrd->replace ) {
+               *ptr=' ';
+               ptr++;
+           } else {
+               if (wrd->selected) {
+                   memcpy(ptr,prs->startsel,prs->startsellen);
+                   ptr+=prs->startsellen;
+               }
+               memcpy(ptr,wrd->word,wrd->len);
+               ptr+=wrd->len;
+               if (wrd->selected) {
+                   memcpy(ptr,prs->stopsel,prs->stopsellen);
+                   ptr+=prs->stopsellen;
+               }
+           }
+       }
+
+       if ( !wrd->repeated )
+           pfree(wrd->word);
+
+       wrd++;
+   }
+
+   VARATT_SIZEP(out)=ptr - ((char*)out);
+   return out; 
+}
+
+int  
+get_currcfg(void) {
+   Oid arg[1]={ TEXTOID };
+   const char *curlocale;
+   Datum pars[1];
+   bool isnull;
+   int stat;
+
+   if ( current_cfg_id > 0 )
+       return current_cfg_id;
+
+   SPI_connect();
+   if ( !plan_getcfg_bylocale ) {
+       plan_getcfg_bylocale=SPI_saveplan( SPI_prepare( "select oid from pg_ts_cfg where locale = $1 ", 1, arg ) );
+       if ( !plan_getcfg_bylocale )
+           elog(ERROR, "SPI_prepare() failed");
+   }
+
+   curlocale = setlocale(LC_CTYPE, NULL);
+   pars[0] = PointerGetDatum( char2text((char*)curlocale) );
+   stat = SPI_execp(plan_getcfg_bylocale, pars, " ", 1);
+
+   if ( stat < 0 )
+       elog (ERROR, "SPI_execp return %d", stat);
+   if ( SPI_processed > 0 )
+       current_cfg_id = DatumGetObjectId( SPI_getbinval(SPI_tuptable->vals[0], SPI_tuptable->tupdesc, 1, &isnull) );
+   else 
+       elog(ERROR,"Can't find tsearch config by locale");
+
+   pfree(DatumGetPointer(pars[0]));
+   SPI_finish();
+   return current_cfg_id;
+}
+
+PG_FUNCTION_INFO_V1(set_curcfg);
+Datum set_curcfg(PG_FUNCTION_ARGS);
+Datum
+set_curcfg(PG_FUNCTION_ARGS) {
+        findcfg(PG_GETARG_OID(0));
+        current_cfg_id=PG_GETARG_OID(0);
+        PG_RETURN_VOID();
+}
+                
+PG_FUNCTION_INFO_V1(set_curcfg_byname);
+Datum set_curcfg_byname(PG_FUNCTION_ARGS);
+Datum
+set_curcfg_byname(PG_FUNCTION_ARGS) {
+        text *name=PG_GETARG_TEXT_P(0);
+   
+        DirectFunctionCall1(
+                set_curcfg,
+                ObjectIdGetDatum( name2id_cfg(name) )
+        );
+        PG_FREE_IF_COPY(name, 0);
+        PG_RETURN_VOID();      
+}       
+
+PG_FUNCTION_INFO_V1(show_curcfg);
+Datum show_curcfg(PG_FUNCTION_ARGS);
+Datum
+show_curcfg(PG_FUNCTION_ARGS) {
+   PG_RETURN_OID( get_currcfg() ); 
+}
+
+PG_FUNCTION_INFO_V1(reset_tsearch);
+Datum reset_tsearch(PG_FUNCTION_ARGS);
+Datum
+reset_tsearch(PG_FUNCTION_ARGS) {
+   ts_error(NOTICE,"TSearch cache cleaned");
+   PG_RETURN_VOID(); 
+}


diff --git a/contrib/tsearch2/ts_cfg.h b/contrib/tsearch2/ts_cfg.h

new file mode 100644 (file)

index 0000000..01006c1


--- /dev/null
+++ b/contrib/tsearch2/ts_cfg.h
@@ -0,0 +1,68 @@
+#ifndef __TS_CFG_H__
+#define __TS_CFG_H__
+#include "postgres.h"
+#include "query.h"
+
+typedef struct {
+   int len;
+   Datum   *dict_id;
+} ListDictionary;
+
+typedef struct {
+   Oid id;
+   Oid prs_id;
+   int len;
+   ListDictionary  *map;   
+}  TSCfgInfo;
+
+Oid name2id_cfg(text *name);
+TSCfgInfo * findcfg(Oid id);
+void init_cfg(Oid id, TSCfgInfo *cfg);
+void reset_cfg(void);
+
+typedef struct {
+        uint16          len;
+   union {
+       uint16      pos;
+       uint16      *apos;
+   } pos;
+        char       *word;
+   uint32  alen;
+}       WORD;
+   
+typedef struct {
+        WORD       *words;
+        int4            lenwords;
+        int4            curwords;
+   int4        pos;
+}       PRSTEXT;
+
+typedef struct {
+        uint16    len;
+   uint8    selected:1,
+         in:1,
+         skip:1,
+         replace:1,
+         repeated:1;
+   uint8   type;
+        char      *word;
+   ITEM      *item;
+}       HLWORD;
+   
+typedef struct {
+        HLWORD       *words;
+        int4            lenwords;
+        int4            curwords;
+        char           *startsel;
+        char            *stopsel;
+        int2            startsellen;
+        int2            stopsellen;
+}       HLPRSTEXT;
+
+void hlparsetext(TSCfgInfo *cfg, HLPRSTEXT * prs, QUERYTYPE *query, char *buf, int4 buflen);
+text* genhl(HLPRSTEXT * prs);
+
+void parsetext_v2(TSCfgInfo *cfg, PRSTEXT * prs, char *buf, int4 buflen);
+int  get_currcfg(void);
+
+#endif


diff --git a/contrib/tsearch2/ts_stat.c b/contrib/tsearch2/ts_stat.c

new file mode 100644 (file)

index 0000000..9099981


--- /dev/null
+++ b/contrib/tsearch2/ts_stat.c
@@ -0,0 +1,412 @@
+/*
+ * stat functions
+ */
+
+#include "tsvector.h"
+#include "ts_stat.h"
+#include "funcapi.h"
+#include "catalog/pg_type.h"
+#include "executor/spi.h"
+#include "common.h"
+
+PG_FUNCTION_INFO_V1(tsstat_in);
+Datum           tsstat_in(PG_FUNCTION_ARGS);
+Datum           
+tsstat_in(PG_FUNCTION_ARGS) {
+   tsstat *stat=palloc(STATHDRSIZE);
+   stat->len=STATHDRSIZE;
+   stat->size=0;
+   PG_RETURN_POINTER(stat);
+}
+
+PG_FUNCTION_INFO_V1(tsstat_out);
+Datum           tsstat_out(PG_FUNCTION_ARGS);
+Datum           
+tsstat_out(PG_FUNCTION_ARGS) {
+   elog(ERROR,"Unimplemented");
+   PG_RETURN_NULL();
+}
+
+static WordEntry**
+SEI_realloc( WordEntry** in, uint32 *len ) {
+   if ( *len==0 || in==NULL ) {
+       *len=8;
+       in=palloc( sizeof(WordEntry*)* (*len) );
+   } else {
+       *len *= 2;
+       in=repalloc( in, sizeof(WordEntry*)* (*len) );
+   }
+   return in;
+}
+
+static int
+compareStatWord(StatEntry *a, WordEntry *b, tsstat *stat, tsvector *txt) {
+   if ( a->len == b->len ) 
+       return strncmp(
+           STATSTRPTR(stat) + a->pos,
+           STRPTR(txt) + b->pos,
+           a->len
+       );
+   return ( a->len > b->len ) ? 1 : -1;
+}
+
+static tsstat*
+formstat(tsstat *stat, tsvector *txt, WordEntry** entry, uint32 len) {
+   tsstat  *newstat;
+   uint32 totallen, nentry;
+   uint32  slen=0;
+   WordEntry   **ptr=entry;
+   char    *curptr;
+   StatEntry   *sptr,*nptr;
+
+   while(ptr-entry
+       slen += (*ptr)->len;
+       ptr++;
+   }
+
+   nentry=stat->size + len;
+   slen+=STATSTRSIZE(stat);
+   totallen=CALCSTATSIZE(nentry,slen);
+   newstat=palloc(totallen);
+   newstat->len=totallen;
+   newstat->size=nentry;
+
+   memcpy(STATSTRPTR(newstat), STATSTRPTR(stat), STATSTRSIZE(stat));
+   curptr=STATSTRPTR(newstat) + STATSTRSIZE(stat);
+
+   ptr=entry;
+   sptr=STATPTR(stat);
+   nptr=STATPTR(newstat);
+
+   if ( len == 1 ) {
+       StatEntry *StopLow = STATPTR(stat);
+       StatEntry *StopHigh = (StatEntry*)STATSTRPTR(stat);
+
+       while (StopLow < StopHigh) {
+           sptr=StopLow + (StopHigh - StopLow) / 2;
+           if ( compareStatWord(sptr,*ptr,stat,txt) < 0 )
+               StopLow = sptr + 1;
+           else
+               StopHigh = sptr; 
+       }
+       nptr =STATPTR(newstat) + (StopLow-STATPTR(stat));
+       memcpy( STATPTR(newstat), STATPTR(stat), sizeof(StatEntry) * (StopLow-STATPTR(stat)) );
+       nptr->nentry=POSDATALEN(txt,*ptr);
+       if ( nptr->nentry==0 )
+               nptr->nentry=1; 
+       nptr->ndoc=1;
+       nptr->len=(*ptr)->len;
+       memcpy(curptr, STRPTR(txt) + (*ptr)->pos, nptr->len);
+       nptr->pos = curptr - STATSTRPTR(newstat);
+       memcpy( nptr+1, StopLow, sizeof(StatEntry) * ( ((StatEntry*)STATSTRPTR(stat))-StopLow ) );
+   } else {
+       while( sptr-STATPTR(stat) < stat->size && ptr-entry
+           if ( compareStatWord(sptr,*ptr,stat,txt) < 0 ) {
+               memcpy(nptr, sptr, sizeof(StatEntry));
+               sptr++;
+           } else {
+               nptr->nentry=POSDATALEN(txt,*ptr);
+               if ( nptr->nentry==0 )
+                   nptr->nentry=1; 
+               nptr->ndoc=1;
+               nptr->len=(*ptr)->len;
+               memcpy(curptr, STRPTR(txt) + (*ptr)->pos, nptr->len);
+               nptr->pos = curptr - STATSTRPTR(newstat);
+               curptr += nptr->len;
+               ptr++;
+           }
+           nptr++;
+       }
+
+       memcpy( nptr, sptr, sizeof(StatEntry)*( stat->size - (sptr-STATPTR(stat)) ) ); 
+       
+       while(ptr-entry
+           nptr->nentry=POSDATALEN(txt,*ptr);
+           if ( nptr->nentry==0 )
+               nptr->nentry=1; 
+           nptr->ndoc=1;
+           nptr->len=(*ptr)->len;
+           memcpy(curptr, STRPTR(txt) + (*ptr)->pos, nptr->len);
+           nptr->pos = curptr - STATSTRPTR(newstat);
+           curptr += nptr->len;
+           ptr++; nptr++;
+       }
+   }
+
+   return newstat;
+} 
+
+PG_FUNCTION_INFO_V1(ts_accum);
+Datum           ts_accum(PG_FUNCTION_ARGS);
+Datum 
+ts_accum(PG_FUNCTION_ARGS) {
+   tsstat *newstat,*stat= (tsstat*)PG_GETARG_POINTER(0);
+   tsvector  *txt = (tsvector *) PG_DETOAST_DATUM(PG_GETARG_DATUM(1));
+   WordEntry   **newentry=NULL;
+   uint32  len=0, cur=0;
+   StatEntry   *sptr;
+   WordEntry   *wptr;
+
+   if ( stat==NULL || PG_ARGISNULL(0) ) { /* Init in first */ 
+       stat=palloc(STATHDRSIZE);
+       stat->len=STATHDRSIZE;
+       stat->size=0;
+   }
+
+   /* simple check of correctness */
+   if ( txt==NULL || PG_ARGISNULL(1) || txt->size==0 ) {
+       PG_FREE_IF_COPY(txt,1); 
+       PG_RETURN_POINTER(stat);
+   }
+
+   sptr=STATPTR(stat);
+   wptr=ARRPTR(txt);
+
+   if ( stat->size < 100*txt->size ) { /* merge */
+       while( sptr-STATPTR(stat) < stat->size && wptr-ARRPTR(txt) < txt->size ) {
+           int cmp = compareStatWord(sptr,wptr,stat,txt);
+           if ( cmp<0 ) {
+               sptr++;
+           } else if ( cmp==0 ) {
+               int n=POSDATALEN(txt,wptr);
+   
+               if (n==0) n=1;
+               sptr->ndoc++;
+               sptr->nentry +=n ;
+               sptr++; wptr++;
+           } else {
+               if ( cur==len )
+                   newentry=SEI_realloc(newentry, &len);
+               newentry[cur]=wptr;
+               wptr++; cur++;
+           }
+       }
+
+       while( wptr-ARRPTR(txt) < txt->size ) {
+           if ( cur==len )
+               newentry=SEI_realloc(newentry, &len);
+           newentry[cur]=wptr;
+           wptr++; cur++;
+       }
+   } else { /* search */
+       while( wptr-ARRPTR(txt) < txt->size ) {
+           StatEntry *StopLow = STATPTR(stat);
+           StatEntry *StopHigh = (StatEntry*)STATSTRPTR(stat);
+           int cmp;
+
+           while (StopLow < StopHigh) {
+               sptr=StopLow + (StopHigh - StopLow) / 2;
+               cmp =  compareStatWord(sptr,wptr,stat,txt);
+               if (cmp==0) {
+                   int n=POSDATALEN(txt,wptr);
+                   if (n==0) n=1;
+                   sptr->ndoc++;
+                   sptr->nentry +=n ;
+                   break;
+               } else if ( cmp < 0 )
+                   StopLow = sptr + 1;
+               else
+                   StopHigh = sptr; 
+           }
+       
+           if ( StopLow >= StopHigh ) { /* not found */
+               if ( cur==len )
+                   newentry=SEI_realloc(newentry, &len);
+               newentry[cur]=wptr;
+               cur++;
+           }
+           wptr++;
+       }   
+   }
+
+   
+   if ( cur==0 ) { /* no new words */ 
+       PG_FREE_IF_COPY(txt,1);
+       PG_RETURN_POINTER(stat);
+   }
+
+   newstat = formstat(stat, txt, newentry, cur);
+   pfree(newentry);
+   PG_FREE_IF_COPY(txt,1);
+   /* pfree(stat); */
+
+   PG_RETURN_POINTER(newstat);
+}
+
+typedef struct {
+   uint32  cur;
+   tsvector *stat;
+} StatStorage;
+
+static void
+ts_setup_firstcall(FuncCallContext  *funcctx, tsstat *stat) {
+   TupleDesc            tupdesc;
+   MemoryContext     oldcontext;
+   StatStorage     *st;
+   
+   oldcontext = MemoryContextSwitchTo(funcctx->multi_call_memory_ctx);
+   st=palloc( sizeof(StatStorage) );
+   st->cur=0;
+   st->stat=palloc( stat->len );
+   memcpy(st->stat, stat, stat->len);
+   funcctx->user_fctx = (void*)st;
+   tupdesc = RelationNameGetTupleDesc("statinfo");
+   funcctx->slot = TupleDescGetSlot(tupdesc);
+   funcctx->attinmeta = TupleDescGetAttInMetadata(tupdesc);
+   MemoryContextSwitchTo(oldcontext);
+}
+
+
+static Datum
+ts_process_call(FuncCallContext  *funcctx) {
+   StatStorage     *st;
+   st=(StatStorage*)funcctx->user_fctx;
+
+   if ( st->cur < st->stat->size ) {
+       Datum result;
+       char* values[3];
+       char    ndoc[16];
+       char    nentry[16];
+       StatEntry *entry=STATPTR(st->stat) + st->cur;
+       HeapTuple    tuple;
+
+       values[1]=ndoc;
+       sprintf(ndoc,"%d",entry->ndoc);
+       values[2]=nentry;
+       sprintf(nentry,"%d",entry->nentry);
+       values[0]=palloc( entry->len+1 );
+       memcpy( values[0], STATSTRPTR(st->stat)+entry->pos, entry->len);
+       (values[0])[entry->len]='\0';
+
+       tuple = BuildTupleFromCStrings(funcctx->attinmeta, values);
+       result = TupleGetDatum(funcctx->slot, tuple);
+
+       pfree(values[0]);
+       st->cur++;
+       return result;  
+   } else {
+       pfree(st->stat);
+       pfree(st);
+   }
+   
+   return (Datum)0;
+}
+
+PG_FUNCTION_INFO_V1(ts_accum_finish);
+Datum           ts_accum_finish(PG_FUNCTION_ARGS);
+Datum 
+ts_accum_finish(PG_FUNCTION_ARGS) {
+   FuncCallContext  *funcctx;
+   Datum result;
+
+   if (SRF_IS_FIRSTCALL()) {
+       funcctx = SRF_FIRSTCALL_INIT();
+       ts_setup_firstcall(funcctx, (tsstat*)PG_GETARG_POINTER(0) );
+   }
+
+   funcctx = SRF_PERCALL_SETUP();
+   if (  (result=ts_process_call(funcctx)) != (Datum)0 )
+       SRF_RETURN_NEXT(funcctx, result);
+   SRF_RETURN_DONE(funcctx);
+}
+
+static Oid tiOid=InvalidOid;
+static void 
+get_ti_Oid(void) {
+   int ret;
+   bool isnull; 
+
+   if ( (ret = SPI_exec("select oid from pg_type where typname='tsvector'",1)) < 0 )   
+       elog(ERROR, "SPI_exec to get tsvector oid returns %d", ret);
+
+   if ( SPI_processed<0 )
+       elog(ERROR, "There is no tsvector type");
+   tiOid = DatumGetObjectId( SPI_getbinval(SPI_tuptable->vals[0], SPI_tuptable->tupdesc, 1, &isnull) );
+   if ( tiOid==InvalidOid )
+       elog(ERROR, "tsvector type has InvalidOid");
+}
+
+static tsstat*
+ts_stat_sql(text *txt) {
+   char *query=text2char(txt);
+   int i;
+   tsstat *newstat,*stat;
+   bool isnull;
+   Portal portal;
+   void    *plan;
+
+   if ( tiOid==InvalidOid ) 
+       get_ti_Oid();
+
+   if ( (plan = SPI_prepare(query,0,NULL))==NULL )
+       elog(ERROR, "SPI_prepare('%s') returns NULL",query);
+
+   if ( (portal = SPI_cursor_open(NULL, plan, NULL, NULL)) == NULL )
+       elog(ERROR, "SPI_cursor_open('%s') returns NULL",query);
+
+   SPI_cursor_fetch(portal, true, 100);
+
+   if ( SPI_tuptable->tupdesc->natts != 1 )
+       elog(ERROR, "Number of fields doesn't equal to 1");
+
+   if ( SPI_gettypeid(SPI_tuptable->tupdesc, 1) != tiOid )
+       elog(ERROR, "Column isn't of tsvector type");
+
+   stat=palloc(STATHDRSIZE);
+   stat->len=STATHDRSIZE;
+   stat->size=0;
+
+   while(SPI_processed>0) {
+       for(i=0;i
+           Datum data=SPI_getbinval(SPI_tuptable->vals[i], SPI_tuptable->tupdesc, 1, &isnull);
+
+           if ( !isnull ) {
+               newstat = (tsstat*)DatumGetPointer(DirectFunctionCall2(
+                   ts_accum,
+                   PointerGetDatum(stat),
+                   data
+               ));
+               if ( stat!=newstat && stat )
+                   pfree(stat);
+               stat=newstat;
+           }
+       } 
+
+       SPI_freetuptable(SPI_tuptable);
+       SPI_cursor_fetch(portal, true, 100);        
+   }   
+
+   SPI_freetuptable(SPI_tuptable);
+   SPI_cursor_close(portal);
+   SPI_freeplan(plan);
+   pfree(query);
+
+   return stat;    
+}
+
+PG_FUNCTION_INFO_V1(ts_stat);
+Datum           ts_stat(PG_FUNCTION_ARGS);
+Datum 
+ts_stat(PG_FUNCTION_ARGS) {
+   FuncCallContext  *funcctx;
+   Datum result;
+
+   if (SRF_IS_FIRSTCALL()) {
+       tsstat *stat;
+       text    *txt=PG_GETARG_TEXT_P(0);
+   
+       funcctx = SRF_FIRSTCALL_INIT();
+       SPI_connect();
+       stat = ts_stat_sql(txt);
+       PG_FREE_IF_COPY(txt,0); 
+       ts_setup_firstcall(funcctx, stat );
+       SPI_finish();
+   }
+
+   funcctx = SRF_PERCALL_SETUP();
+   if (  (result=ts_process_call(funcctx)) != (Datum)0 )
+       SRF_RETURN_NEXT(funcctx, result);
+   SRF_RETURN_DONE(funcctx);
+}
+
+


diff --git a/contrib/tsearch2/ts_stat.h b/contrib/tsearch2/ts_stat.h

new file mode 100644 (file)

index 0000000..c32b17a


--- /dev/null
+++ b/contrib/tsearch2/ts_stat.h
@@ -0,0 +1,32 @@
+#ifndef __TXTIDX_STAT_H__
+#define __TXTIDX_STAT_H__
+
+#include "postgres.h"
+
+#include "access/gist.h"
+#include "access/itup.h"
+#include "utils/elog.h"
+#include "utils/palloc.h"
+#include "utils/builtins.h"
+#include "storage/bufpage.h"
+
+typedef struct {
+   uint32  len;
+   uint32  pos;
+   uint32  ndoc;   
+   uint32  nentry; 
+}  StatEntry;
+
+typedef struct {
+   int4        len;
+   int4        size;
+   char        data[1];
+}  tsstat;
+
+#define STATHDRSIZE (sizeof(int4)*2)
+#define CALCSTATSIZE(x, lenstr) ( x * sizeof(StatEntry) + STATHDRSIZE + lenstr )
+#define STATPTR(x) ( (StatEntry*) ( (char*)x + STATHDRSIZE ) )
+#define STATSTRPTR(x)  ( (char*)x + STATHDRSIZE + ( sizeof(StatEntry) * ((tsvector*)x)->size ) )
+#define STATSTRSIZE(x) ( ((tsvector*)x)->len - STATHDRSIZE - ( sizeof(StatEntry) * ((tsvector*)x)->size ) )
+
+#endif


diff --git a/contrib/tsearch2/tsearch.sql._in b/contrib/tsearch2/tsearch.sql._in

new file mode 100644 (file)

index 0000000..91ffbc8


--- /dev/null
+++ b/contrib/tsearch2/tsearch.sql._in
@@ -0,0 +1,674 @@
+-- Adjust this setting to control where the objects get CREATEd.
+SET search_path = public;
+
+BEGIN;
+
+--dict conf
+CREATE TABLE pg_ts_dict (
+   dict_name   text not null primary key,
+   dict_init   oid,
+   dict_initoption text,
+   dict_lexize oid not null,
+   dict_comment    text
+) with oids;
+
+--dict interface
+CREATE FUNCTION lexize(oid, text) 
+   returns _text
+   as 'MODULE_PATHNAME'
+   language 'C'
+   with (isstrict);
+
+CREATE FUNCTION lexize(text, text)
+        returns _text
+        as 'MODULE_PATHNAME', 'lexize_byname'
+        language 'C'
+        with (isstrict);
+
+CREATE FUNCTION lexize(text)
+        returns _text
+        as 'MODULE_PATHNAME', 'lexize_bycurrent'
+        language 'C'
+        with (isstrict);
+
+CREATE FUNCTION set_curdict(int)
+   returns void
+   as 'MODULE_PATHNAME'
+   language 'C'
+   with (isstrict);
+
+CREATE FUNCTION set_curdict(text)
+   returns void
+   as 'MODULE_PATHNAME', 'set_curdict_byname'
+   language 'C'
+   with (isstrict);
+
+--built-in dictionaries
+CREATE FUNCTION dex_init(text)
+   returns internal
+   as 'MODULE_PATHNAME' 
+   language 'C';
+
+CREATE FUNCTION dex_lexize(internal,internal,int4)
+   returns internal
+   as 'MODULE_PATHNAME'
+   language 'C'
+   with (isstrict);
+
+insert into pg_ts_dict select 
+   'simple', 
+   (select oid from pg_proc where proname='dex_init'),
+   null,
+   (select oid from pg_proc where proname='dex_lexize'),
+   'Simple example of dictionary.'
+;
+    
+CREATE FUNCTION snb_en_init(text)
+   returns internal
+   as 'MODULE_PATHNAME' 
+   language 'C';
+
+CREATE FUNCTION snb_lexize(internal,internal,int4)
+   returns internal
+   as 'MODULE_PATHNAME'
+   language 'C'
+   with (isstrict);
+
+insert into pg_ts_dict select 
+   'en_stem', 
+   (select oid from pg_proc where proname='snb_en_init'),
+   'DATA_PATH/english.stop',
+   (select oid from pg_proc where proname='snb_lexize'),
+   'English Stemmer. Snowball.'
+;
+
+CREATE FUNCTION snb_ru_init(text)
+   returns internal
+   as 'MODULE_PATHNAME' 
+   language 'C';
+
+insert into pg_ts_dict select 
+   'ru_stem', 
+   (select oid from pg_proc where proname='snb_ru_init'),
+   'DATA_PATH/russian.stop',
+   (select oid from pg_proc where proname='snb_lexize'),
+   'Russian Stemmer. Snowball.'
+;
+    
+CREATE FUNCTION spell_init(text)
+   returns internal
+   as 'MODULE_PATHNAME' 
+   language 'C';
+
+CREATE FUNCTION spell_lexize(internal,internal,int4)
+   returns internal
+   as 'MODULE_PATHNAME'
+   language 'C'
+   with (isstrict);
+
+insert into pg_ts_dict select 
+   'ispell_template', 
+   (select oid from pg_proc where proname='spell_init'),
+   null,
+   (select oid from pg_proc where proname='spell_lexize'),
+   'ISpell interface. Must have .dict and .aff files'
+;
+
+CREATE FUNCTION syn_init(text)
+   returns internal
+   as 'MODULE_PATHNAME' 
+   language 'C';
+
+CREATE FUNCTION syn_lexize(internal,internal,int4)
+   returns internal
+   as 'MODULE_PATHNAME'
+   language 'C'
+   with (isstrict);
+
+insert into pg_ts_dict select 
+   'synonym', 
+   (select oid from pg_proc where proname='syn_init'),
+   null,
+   (select oid from pg_proc where proname='syn_lexize'),
+   'Example of synonym dictionary'
+;
+
+--dict conf
+CREATE TABLE pg_ts_parser (
+   prs_name    text not null primary key,
+   prs_start   oid not null,
+   prs_nexttoken   oid not null,
+   prs_end     oid not null,
+   prs_headline    oid not null,
+   prs_lextype oid not null,
+   prs_comment text
+) with oids;
+
+--sql-level interface
+CREATE TYPE tokentype 
+   as (tokid int4, alias text, descr text); 
+
+CREATE FUNCTION token_type(int4)
+   returns setof tokentype
+   as 'MODULE_PATHNAME'
+   language 'C'
+   with (isstrict);
+
+CREATE FUNCTION token_type(text)
+   returns setof tokentype
+   as 'MODULE_PATHNAME', 'token_type_byname'
+   language 'C'
+   with (isstrict);
+
+CREATE FUNCTION token_type()
+   returns setof tokentype
+   as 'MODULE_PATHNAME', 'token_type_current'
+   language 'C'
+   with (isstrict);
+
+CREATE FUNCTION set_curprs(int)
+   returns void
+   as 'MODULE_PATHNAME'
+   language 'C'
+   with (isstrict);
+
+CREATE FUNCTION set_curprs(text)
+   returns void
+   as 'MODULE_PATHNAME', 'set_curprs_byname'
+   language 'C'
+   with (isstrict);
+
+CREATE TYPE tokenout 
+   as (tokid int4, token text);
+
+CREATE FUNCTION parse(oid,text)
+   returns setof tokenout
+   as 'MODULE_PATHNAME'
+   language 'C'
+   with (isstrict);
+ 
+CREATE FUNCTION parse(text,text)
+   returns setof tokenout
+   as 'MODULE_PATHNAME', 'parse_byname'
+   language 'C'
+   with (isstrict);
+ 
+CREATE FUNCTION parse(text)
+   returns setof tokenout
+   as 'MODULE_PATHNAME', 'parse_current'
+   language 'C'
+   with (isstrict);
+ 
+--default parser
+CREATE FUNCTION prsd_start(internal,int4)
+   returns internal
+   as 'MODULE_PATHNAME'
+   language 'C';
+
+CREATE FUNCTION prsd_getlexeme(internal,internal,internal)
+   returns int4
+   as 'MODULE_PATHNAME'
+   language 'C';
+
+CREATE FUNCTION prsd_end(internal)
+   returns void
+   as 'MODULE_PATHNAME'
+   language 'C';
+
+CREATE FUNCTION prsd_lextype(internal)
+   returns internal
+   as 'MODULE_PATHNAME'
+   language 'C';
+
+CREATE FUNCTION prsd_headline(internal,internal,internal)
+   returns internal
+   as 'MODULE_PATHNAME'
+   language 'C';
+
+insert into pg_ts_parser select
+   'default',
+   (select oid from pg_proc where proname='prsd_start'),   
+   (select oid from pg_proc where proname='prsd_getlexeme'),   
+   (select oid from pg_proc where proname='prsd_end'), 
+   (select oid from pg_proc where proname='prsd_headline'),
+   (select oid from pg_proc where proname='prsd_lextype'),
+   'Parser from OpenFTS v0.34'
+;  
+
+--tsearch config
+
+CREATE TABLE pg_ts_cfg (
+   ts_name     text not null primary key,
+   prs_name    text not null,
+   locale      text
+) with oids;
+
+CREATE TABLE pg_ts_cfgmap (
+   ts_name     text not null,
+   tok_alias   text not null,
+   dict_name   text[],
+   primary key (ts_name,tok_alias)
+) with oids;
+
+CREATE FUNCTION set_curcfg(int)
+   returns void
+   as 'MODULE_PATHNAME'
+   language 'C'
+   with (isstrict);
+
+CREATE FUNCTION set_curcfg(text)
+   returns void
+   as 'MODULE_PATHNAME', 'set_curcfg_byname'
+   language 'C'
+   with (isstrict);
+
+CREATE FUNCTION show_curcfg()
+   returns oid
+   as 'MODULE_PATHNAME'
+   language 'C'
+   with (isstrict);
+
+insert into pg_ts_cfg values ('default', 'default','C');
+insert into pg_ts_cfg values ('default_russian', 'default','ru_RU.KOI8-R');
+insert into pg_ts_cfg values ('simple', 'default');
+
+copy pg_ts_cfgmap from stdin;
+default    lword   {en_stem}
+default    nlword  {simple}
+default    word    {simple}
+default    email   {simple}
+default    url {simple}
+default    host    {simple}
+default    sfloat  {simple}
+default    version {simple}
+default    part_hword  {simple}
+default    nlpart_hword    {simple}
+default    lpart_hword {en_stem}
+default    hword   {simple}
+default    lhword  {en_stem}
+default    nlhword {simple}
+default    uri {simple}
+default    file    {simple}
+default    float   {simple}
+default    int {simple}
+default    uint    {simple}
+default_russian    lword   {en_stem}
+default_russian    nlword  {ru_stem}
+default_russian    word    {ru_stem}
+default_russian    email   {simple}
+default_russian    url {simple}
+default_russian    host    {simple}
+default_russian    sfloat  {simple}
+default_russian    version {simple}
+default_russian    part_hword  {simple}
+default_russian    nlpart_hword    {ru_stem}
+default_russian    lpart_hword {en_stem}
+default_russian    hword   {ru_stem}
+default_russian    lhword  {en_stem}
+default_russian    nlhword {ru_stem}
+default_russian    uri {simple}
+default_russian    file    {simple}
+default_russian    float   {simple}
+default_russian    int {simple}
+default_russian    uint    {simple}
+simple lword   {simple}
+simple nlword  {simple}
+simple word    {simple}
+simple email   {simple}
+simple url {simple}
+simple host    {simple}
+simple sfloat  {simple}
+simple version {simple}
+simple part_hword  {simple}
+simple nlpart_hword    {simple}
+simple lpart_hword {simple}
+simple hword   {simple}
+simple lhword  {simple}
+simple nlhword {simple}
+simple uri {simple}
+simple file    {simple}
+simple float   {simple}
+simple int {simple}
+simple uint    {simple}
+\.
+
+--tsvector type
+CREATE FUNCTION tsvector_in(cstring)
+RETURNS tsvector
+AS 'MODULE_PATHNAME'
+LANGUAGE 'C' with (isstrict);
+
+CREATE FUNCTION tsvector_out(tsvector)
+RETURNS cstring
+AS 'MODULE_PATHNAME'
+LANGUAGE 'C' with (isstrict);
+
+CREATE TYPE tsvector (
+        INTERNALLENGTH = -1,
+        INPUT = tsvector_in,
+        OUTPUT = tsvector_out,
+        STORAGE = extended
+);
+
+CREATE FUNCTION length(tsvector)
+RETURNS int4
+AS 'MODULE_PATHNAME', 'tsvector_length'
+LANGUAGE 'C' with (isstrict,iscachable);
+
+CREATE FUNCTION to_tsvector(oid, text)
+RETURNS tsvector
+AS 'MODULE_PATHNAME'
+LANGUAGE 'C' with (isstrict,iscachable);
+
+CREATE FUNCTION to_tsvector(text, text)
+RETURNS tsvector
+AS 'MODULE_PATHNAME', 'to_tsvector_name'
+LANGUAGE 'C' with (isstrict,iscachable);
+
+CREATE FUNCTION to_tsvector(text)
+RETURNS tsvector
+AS 'MODULE_PATHNAME', 'to_tsvector_current'
+LANGUAGE 'C' with (isstrict,iscachable);
+
+CREATE FUNCTION strip(tsvector)
+RETURNS tsvector
+AS 'MODULE_PATHNAME'
+LANGUAGE 'C' with (isstrict,iscachable);
+
+CREATE FUNCTION setweight(tsvector,"char")
+RETURNS tsvector
+AS 'MODULE_PATHNAME'
+LANGUAGE 'C' with (isstrict,iscachable);
+
+CREATE FUNCTION concat(tsvector,tsvector)
+RETURNS tsvector
+AS 'MODULE_PATHNAME'
+LANGUAGE 'C' with (isstrict,iscachable);
+
+CREATE OPERATOR || (
+        LEFTARG = tsvector,
+        RIGHTARG = tsvector,
+        PROCEDURE = concat
+);
+
+--query type
+CREATE FUNCTION tsquery_in(cstring)
+RETURNS tsquery
+AS 'MODULE_PATHNAME'
+LANGUAGE 'C' with (isstrict);
+
+CREATE FUNCTION tsquery_out(tsquery)
+RETURNS cstring
+AS 'MODULE_PATHNAME'
+LANGUAGE 'C' with (isstrict);
+
+CREATE TYPE tsquery (
+        INTERNALLENGTH = -1,
+        INPUT = tsquery_in,
+        OUTPUT = tsquery_out
+);
+
+CREATE FUNCTION querytree(tsquery)
+RETURNS text
+AS 'MODULE_PATHNAME', 'tsquerytree'
+LANGUAGE 'C' with (isstrict);
+
+CREATE FUNCTION to_tsquery(oid, text)
+RETURNS tsquery
+AS 'MODULE_PATHNAME'
+LANGUAGE 'c' with (isstrict,iscachable);
+
+CREATE FUNCTION to_tsquery(text, text)
+RETURNS tsquery
+AS 'MODULE_PATHNAME','to_tsquery_name'
+LANGUAGE 'c' with (isstrict,iscachable);
+
+CREATE FUNCTION to_tsquery(text)
+RETURNS tsquery
+AS 'MODULE_PATHNAME','to_tsquery_current'
+LANGUAGE 'c' with (isstrict,iscachable);
+
+--operations
+CREATE FUNCTION exectsq(tsvector, tsquery)
+RETURNS bool
+AS 'MODULE_PATHNAME'
+LANGUAGE 'C' with (isstrict, iscachable);
+  
+COMMENT ON FUNCTION exectsq(tsvector, tsquery) IS 'boolean operation with text index';
+
+CREATE FUNCTION rexectsq(tsquery, tsvector)
+RETURNS bool
+AS 'MODULE_PATHNAME'
+LANGUAGE 'C' with (isstrict, iscachable);
+
+COMMENT ON FUNCTION rexectsq(tsquery, tsvector) IS 'boolean operation with text index';
+
+CREATE OPERATOR @@ (
+        LEFTARG = tsvector,
+        RIGHTARG = tsquery,
+        PROCEDURE = exectsq,
+        COMMUTATOR = '@@',
+        RESTRICT = contsel,
+        JOIN = contjoinsel
+);
+CREATE OPERATOR @@ (
+        LEFTARG = tsquery,
+        RIGHTARG = tsvector,
+        PROCEDURE = rexectsq,
+        COMMUTATOR = '@@',
+        RESTRICT = contsel,
+        JOIN = contjoinsel
+);
+
+--Trigger
+CREATE FUNCTION tsearch2()
+RETURNS trigger
+AS 'MODULE_PATHNAME'
+LANGUAGE 'C';
+
+--Relevation
+CREATE FUNCTION rank(float4[], tsvector, tsquery)
+RETURNS float4
+AS 'MODULE_PATHNAME'
+LANGUAGE 'C' WITH (isstrict, iscachable);
+
+CREATE FUNCTION rank(float4[], tsvector, tsquery, int4)
+RETURNS float4
+AS 'MODULE_PATHNAME'
+LANGUAGE 'C' WITH (isstrict, iscachable);
+
+CREATE FUNCTION rank(tsvector, tsquery)
+RETURNS float4
+AS 'MODULE_PATHNAME', 'rank_def'
+LANGUAGE 'C' WITH (isstrict, iscachable);
+
+CREATE FUNCTION rank(tsvector, tsquery, int4)
+RETURNS float4
+AS 'MODULE_PATHNAME', 'rank_def'
+LANGUAGE 'C' WITH (isstrict, iscachable);
+
+CREATE FUNCTION rank_cd(int4, tsvector, tsquery)
+RETURNS float4
+AS 'MODULE_PATHNAME'
+LANGUAGE 'C' WITH (isstrict, iscachable);
+
+CREATE FUNCTION rank_cd(int4, tsvector, tsquery, int4)
+RETURNS float4
+AS 'MODULE_PATHNAME'
+LANGUAGE 'C' WITH (isstrict, iscachable);
+
+CREATE FUNCTION rank_cd(tsvector, tsquery)
+RETURNS float4
+AS 'MODULE_PATHNAME', 'rank_cd_def'
+LANGUAGE 'C' WITH (isstrict, iscachable);
+
+CREATE FUNCTION rank_cd(tsvector, tsquery, int4)
+RETURNS float4
+AS 'MODULE_PATHNAME', 'rank_cd_def'
+LANGUAGE 'C' WITH (isstrict, iscachable);
+
+CREATE FUNCTION headline(oid, text, tsquery, text)
+RETURNS text
+AS 'MODULE_PATHNAME', 'headline'
+LANGUAGE 'C' WITH (isstrict, iscachable);
+
+CREATE FUNCTION headline(oid, text, tsquery)
+RETURNS text
+AS 'MODULE_PATHNAME', 'headline'
+LANGUAGE 'C' WITH (isstrict, iscachable);
+
+CREATE FUNCTION headline(text, text, tsquery, text)
+RETURNS text
+AS 'MODULE_PATHNAME', 'headline_byname'
+LANGUAGE 'C' WITH (isstrict, iscachable);
+
+CREATE FUNCTION headline(text, text, tsquery)
+RETURNS text
+AS 'MODULE_PATHNAME', 'headline_byname'
+LANGUAGE 'C' WITH (isstrict, iscachable);
+
+CREATE FUNCTION headline(text, tsquery, text)
+RETURNS text
+AS 'MODULE_PATHNAME', 'headline_current'
+LANGUAGE 'C' WITH (isstrict, iscachable);
+
+CREATE FUNCTION headline(text, tsquery)
+RETURNS text
+AS 'MODULE_PATHNAME', 'headline_current'
+LANGUAGE 'C' WITH (isstrict, iscachable);
+
+--GiST
+--GiST key type 
+CREATE FUNCTION gtsvector_in(cstring)
+RETURNS gtsvector
+AS 'MODULE_PATHNAME'
+LANGUAGE 'C' with (isstrict);
+
+CREATE FUNCTION gtsvector_out(gtsvector)
+RETURNS cstring
+AS 'MODULE_PATHNAME'
+LANGUAGE 'C' with (isstrict);
+
+CREATE TYPE gtsvector (
+        INTERNALLENGTH = -1,
+        INPUT = gtsvector_in,
+        OUTPUT = gtsvector_out
+);
+
+-- support FUNCTIONs
+CREATE FUNCTION gtsvector_consistent(gtsvector,internal,int4)
+RETURNS bool
+AS 'MODULE_PATHNAME'
+LANGUAGE 'C';
+  
+CREATE FUNCTION gtsvector_compress(internal)
+RETURNS internal
+AS 'MODULE_PATHNAME'
+LANGUAGE 'C';
+
+CREATE FUNCTION gtsvector_decompress(internal)
+RETURNS internal
+AS 'MODULE_PATHNAME'
+LANGUAGE 'C';
+
+CREATE FUNCTION gtsvector_penalty(internal,internal,internal)
+RETURNS internal
+AS 'MODULE_PATHNAME'
+LANGUAGE 'C' with (isstrict);
+
+CREATE FUNCTION gtsvector_picksplit(internal, internal)
+RETURNS internal
+AS 'MODULE_PATHNAME'
+LANGUAGE 'C';
+
+CREATE FUNCTION gtsvector_union(bytea, internal)
+RETURNS _int4
+AS 'MODULE_PATHNAME'
+LANGUAGE 'C';
+
+CREATE FUNCTION gtsvector_same(gtsvector, gtsvector, internal)
+RETURNS internal
+AS 'MODULE_PATHNAME'
+LANGUAGE 'C';
+
+-- CREATE the OPERATOR class
+CREATE OPERATOR CLASS gist_tsvector_ops
+DEFAULT FOR TYPE tsvector USING gist
+AS
+        OPERATOR        1       @@ (tsvector, tsquery)  RECHECK ,
+        FUNCTION        1       gtsvector_consistent (gtsvector, internal, int4),
+        FUNCTION        2       gtsvector_union (bytea, internal),
+        FUNCTION        3       gtsvector_compress (internal),
+        FUNCTION        4       gtsvector_decompress (internal),
+        FUNCTION        5       gtsvector_penalty (internal, internal, internal),
+        FUNCTION        6       gtsvector_picksplit (internal, internal),
+        FUNCTION        7       gtsvector_same (gtsvector, gtsvector, internal),
+        STORAGE         gtsvector;
+
+
+--stat info
+CREATE TYPE statinfo 
+   as (word text, ndoc int4, nentry int4);
+
+--REATE FUNCTION tsstat_in(cstring)
+--RETURNS tsstat
+--AS 'MODULE_PATHNAME'
+--LANGUAGE 'C' with (isstrict);
+--
+--CREATE FUNCTION tsstat_out(tsstat)
+--RETURNS cstring
+--AS 'MODULE_PATHNAME'
+--LANGUAGE 'C' with (isstrict);
+--
+--CREATE TYPE tsstat (
+--        INTERNALLENGTH = -1,
+--        INPUT = tsstat_in,
+--        OUTPUT = tsstat_out,
+--        STORAGE = plain
+--);
+--
+--CREATE FUNCTION ts_accum(tsstat,tsvector)
+--RETURNS tsstat
+--AS 'MODULE_PATHNAME'
+--LANGUAGE 'C' with (isstrict);
+--
+--CREATE FUNCTION ts_accum_finish(tsstat)
+-- returns setof statinfo
+-- as 'MODULE_PATHNAME'
+-- language 'C'
+-- with (isstrict);
+--
+--CREATE AGGREGATE stat (
+-- BASETYPE=tsvector,
+-- SFUNC=ts_accum,
+-- STYPE=tsstat,
+-- FINALFUNC = ts_accum_finish,
+-- initcond = ''
+--); 
+
+CREATE FUNCTION stat(text)
+   returns setof statinfo
+   as 'MODULE_PATHNAME', 'ts_stat'
+   language 'C'
+   with (isstrict);
+
+--reset - just for debuging
+CREATE FUNCTION reset_tsearch()
+        returns void
+        as 'MODULE_PATHNAME'
+        language 'C'
+        with (isstrict);
+
+--get cover (debug for rank_cd)
+CREATE FUNCTION get_covers(tsvector,tsquery)
+        returns text
+        as 'MODULE_PATHNAME'
+        language 'C'
+        with (isstrict);
+
+
+--example of ISpell dictionary
+--update pg_ts_dict set dict_initoption='DictFile="/usr/local/share/ispell/russian.dict" ,AffFile ="/usr/local/share/ispell/russian.aff", StopFile="/usr/local/share/ispell/russian.stop"' where dict_id=4;
+--example of synonym dict
+--update pg_ts_dict set dict_initoption='/usr/local/share/ispell/english.syn' where dict_id=5;
+END;


diff --git a/contrib/tsearch2/tsvector.c b/contrib/tsearch2/tsvector.c

new file mode 100644 (file)

index 0000000..ff0794d


--- /dev/null
+++ b/contrib/tsearch2/tsvector.c
@@ -0,0 +1,804 @@
+/*
+ * In/Out definitions for tsvector type
+ * Internal structure:
+ * string of values, array of position lexem in string and it's length
+ * Teodor Sigaev 
+ */
+#include "postgres.h"
+
+#include "access/gist.h"
+#include "access/itup.h"
+#include "utils/elog.h"
+#include "utils/palloc.h"
+#include "utils/builtins.h"
+#include "storage/bufpage.h"
+#include "executor/spi.h"
+#include "commands/trigger.h"
+#include "nodes/pg_list.h"
+#include "catalog/namespace.h"
+
+#include "utils/pg_locale.h"
+
+#include              /* tolower */
+#include "tsvector.h"
+#include "query.h"
+#include "ts_cfg.h"
+#include "common.h"
+
+PG_FUNCTION_INFO_V1(tsvector_in);
+Datum      tsvector_in(PG_FUNCTION_ARGS);
+
+PG_FUNCTION_INFO_V1(tsvector_out);
+Datum      tsvector_out(PG_FUNCTION_ARGS);
+
+PG_FUNCTION_INFO_V1(to_tsvector);
+Datum      to_tsvector(PG_FUNCTION_ARGS);
+PG_FUNCTION_INFO_V1(to_tsvector_current);
+Datum      to_tsvector_current(PG_FUNCTION_ARGS);
+PG_FUNCTION_INFO_V1(to_tsvector_name);
+Datum      to_tsvector_name(PG_FUNCTION_ARGS);
+
+PG_FUNCTION_INFO_V1(tsearch2);
+Datum      tsearch2(PG_FUNCTION_ARGS);
+
+PG_FUNCTION_INFO_V1(tsvector_length);
+Datum      tsvector_length(PG_FUNCTION_ARGS);
+
+/*
+ * in/out text index type
+ */
+static int 
+comparePos(const void *a, const void *b) {
+   if ( ((WordEntryPos *) a)->pos == ((WordEntryPos *) b)->pos )
+       return 1;
+   return ( ((WordEntryPos *) a)->pos > ((WordEntryPos *) b)->pos ) ? 1 : -1;
+}
+
+static int
+uniquePos(WordEntryPos *a, int4 l) {
+   WordEntryPos *ptr, *res;
+
+   res=a;
+   if (l==1)
+       return l;
+
+   qsort((void *) a, l, sizeof(WordEntryPos), comparePos);
+
+   ptr = a + 1;
+   while (ptr - a < l) {
+       if ( ptr->pos != res->pos ) {
+           res++;
+           res->pos = ptr->pos;
+           res->weight = ptr->weight;
+           if ( res-a >= MAXNUMPOS-1 || res->pos == MAXENTRYPOS-1 )
+               break;
+       } else if ( ptr->weight > res->weight )
+           res->weight = ptr->weight;
+       ptr++;
+   }
+   return res + 1 - a;
+}
+
+static char *BufferStr;
+static int
+compareentry(const void *a, const void *b)
+{
+   if ( ((WordEntryIN *) a)->entry.len == ((WordEntryIN *) b)->entry.len)
+   {
+       return strncmp(
+                      &BufferStr[((WordEntryIN *) a)->entry.pos],
+                      &BufferStr[((WordEntryIN *) b)->entry.pos],
+                      ((WordEntryIN *) a)->entry.len);
+   }
+   return ( ((WordEntryIN *) a)->entry.len > ((WordEntryIN *) b)->entry.len ) ? 1 : -1;
+}
+
+static int
+uniqueentry(WordEntryIN * a, int4 l, char *buf, int4 *outbuflen)
+{
+   WordEntryIN  *ptr,
+              *res;
+
+   res = a;
+   if (l == 1) {
+       if ( a->entry.haspos ) {
+           *(uint16*)(a->pos) = uniquePos( &(a->pos[1]), *(uint16*)(a->pos));
+           *outbuflen = SHORTALIGN(res->entry.len) + (*(uint16*)(a->pos) +1 )*sizeof(WordEntryPos);
+       }
+       return l;
+   }
+
+   ptr = a + 1;
+   BufferStr = buf;
+   qsort((void *) a, l, sizeof(WordEntryIN), compareentry);
+
+   while (ptr - a < l)
+   {
+       if (!(ptr->entry.len == res->entry.len &&
+             strncmp(&buf[ptr->entry.pos], &buf[res->entry.pos], res->entry.len) == 0))
+       {
+           if ( res->entry.haspos ) {
+               *(uint16*)(res->pos) = uniquePos( &(res->pos[1]), *(uint16*)(res->pos));
+               *outbuflen += *(uint16*)(res->pos) * sizeof(WordEntryPos);
+           }
+           *outbuflen += SHORTALIGN(res->entry.len);
+           res++;
+           memcpy(res,ptr,sizeof(WordEntryIN));
+       } else if ( ptr->entry.haspos ){
+           if ( res->entry.haspos ) {
+               int4 len=*(uint16*)(ptr->pos) + 1 + *(uint16*)(res->pos);
+               res->pos=(WordEntryPos*)repalloc( res->pos, len*sizeof(WordEntryPos));
+               memcpy( &(res->pos[ *(uint16*)(res->pos) + 1 ]), 
+                   &(ptr->pos[1]), *(uint16*)(ptr->pos) * sizeof(WordEntryPos));
+               *(uint16*)(res->pos) += *(uint16*)(ptr->pos);
+               pfree( ptr->pos );
+           } else {
+               res->entry.haspos=1;
+               res->pos = ptr->pos;
+           }
+       }
+       ptr++;
+   }
+   if ( res->entry.haspos ) {
+       *(uint16*)(res->pos) = uniquePos( &(res->pos[1]), *(uint16*)(res->pos));
+       *outbuflen += *(uint16*)(res->pos) * sizeof(WordEntryPos);
+   }
+   *outbuflen += SHORTALIGN(res->entry.len);
+
+   return res + 1 - a;
+}
+
+#define WAITWORD   1
+#define WAITENDWORD 2
+#define WAITNEXTCHAR   3
+#define WAITENDCMPLX   4
+#define WAITPOSINFO    5
+#define INPOSINFO  6
+#define WAITPOSDELIM   7
+
+#define RESIZEPRSBUF \
+do { \
+   if ( state->curpos - state->word + 1 >= state->len ) \
+   { \
+       int4 clen = state->curpos - state->word; \
+       state->len *= 2; \
+       state->word = (char*)repalloc( (void*)state->word, state->len ); \
+       state->curpos = state->word + clen; \
+   } \
+} while (0)
+
+int4
+gettoken_tsvector(TI_IN_STATE * state)
+{
+   int4        oldstate = 0;
+
+   state->curpos = state->word;
+   state->state = WAITWORD;
+   state->alen=0;
+
+   while (1)
+   {
+       if (state->state == WAITWORD)
+       {
+           if (*(state->prsbuf) == '\0')
+               return 0;
+           else if (*(state->prsbuf) == '\'')
+               state->state = WAITENDCMPLX;
+           else if (*(state->prsbuf) == '\\')
+           {
+               state->state = WAITNEXTCHAR;
+               oldstate = WAITENDWORD;
+           }
+           else if (state->oprisdelim && ISOPERATOR(*(state->prsbuf)))
+               elog(ERROR, "Syntax error");
+           else if (*(state->prsbuf) != ' ')
+           {
+               *(state->curpos) = *(state->prsbuf);
+               state->curpos++;
+               state->state = WAITENDWORD;
+           }
+       }
+       else if (state->state == WAITNEXTCHAR)
+       {
+           if (*(state->prsbuf) == '\0')
+               elog(ERROR, "There is no escaped character");
+           else
+           {
+               RESIZEPRSBUF;
+               *(state->curpos) = *(state->prsbuf);
+               state->curpos++;
+               state->state = oldstate;
+           }
+       }
+       else if (state->state == WAITENDWORD)
+       {
+           if (*(state->prsbuf) == '\\')
+           {
+               state->state = WAITNEXTCHAR;
+               oldstate = WAITENDWORD;
+           }
+           else if (*(state->prsbuf) == ' ' || *(state->prsbuf) == '\0' ||
+                    (state->oprisdelim && ISOPERATOR(*(state->prsbuf))))
+           {
+               RESIZEPRSBUF;
+               if (state->curpos == state->word)
+                   elog(ERROR, "Syntax error");
+               *(state->curpos) = '\0';
+               return 1; 
+           } else if ( *(state->prsbuf) == ':' ) {
+               if (state->curpos == state->word)
+                   elog(ERROR, "Syntax error");
+               *(state->curpos) = '\0';
+               if ( state->oprisdelim )
+                   return 1;
+               else
+                   state->state = INPOSINFO;
+           }
+           else
+           {
+               RESIZEPRSBUF;
+               *(state->curpos) = *(state->prsbuf);
+               state->curpos++;
+           }
+       }
+       else if (state->state == WAITENDCMPLX)
+       {
+           if (*(state->prsbuf) == '\'')
+           {
+               RESIZEPRSBUF;
+               *(state->curpos) = '\0';
+               if (state->curpos == state->word)
+                   elog(ERROR, "Syntax error");
+               if ( state->oprisdelim ) {
+                   state->prsbuf++;
+                   return 1;
+               } else
+                   state->state = WAITPOSINFO;
+           }
+           else if (*(state->prsbuf) == '\\')
+           {
+               state->state = WAITNEXTCHAR;
+               oldstate = WAITENDCMPLX;
+           }
+           else if (*(state->prsbuf) == '\0')
+               elog(ERROR, "Syntax error");
+           else
+           {
+               RESIZEPRSBUF;
+               *(state->curpos) = *(state->prsbuf);
+               state->curpos++;
+           }
+       } else if (state->state == WAITPOSINFO) {
+           if ( *(state->prsbuf) == ':' )
+               state->state=INPOSINFO;
+           else
+               return 1;
+       } else if (state->state == INPOSINFO) {
+           if ( isdigit(*(state->prsbuf)) ) {
+               if ( state->alen==0 ) {
+                   state->alen=4;
+                   state->pos = (WordEntryPos*)palloc( sizeof(WordEntryPos)*state->alen );
+                   *(uint16*)(state->pos)=0;
+               } else if ( *(uint16*)(state->pos) +1 >= state->alen ) {
+                   state->alen *= 2; 
+                   state->pos = (WordEntryPos*)repalloc( state->pos, sizeof(WordEntryPos)*state->alen );
+               }
+               (  *(uint16*)(state->pos) )++;
+               state->pos[ *(uint16*)(state->pos) ].pos = LIMITPOS(atoi(state->prsbuf));
+               if ( state->pos[ *(uint16*)(state->pos) ].pos == 0 )
+                   elog(ERROR,"Wrong position info");
+               state->pos[ *(uint16*)(state->pos) ].weight = 0;
+               state->state = WAITPOSDELIM;
+           } else
+               elog(ERROR,"Syntax error");
+       } else if (state->state == WAITPOSDELIM) {
+           if ( *(state->prsbuf) == ',' ) {
+               state->state = INPOSINFO;
+           } else if ( tolower(*(state->prsbuf)) == 'a' || *(state->prsbuf)=='*' ) {
+               if ( state->pos[ *(uint16*)(state->pos) ].weight )
+                   elog(ERROR,"Syntax error");
+               state->pos[ *(uint16*)(state->pos) ].weight = 3;
+           } else if ( tolower(*(state->prsbuf)) == 'b' ) {
+               if ( state->pos[ *(uint16*)(state->pos) ].weight )
+                   elog(ERROR,"Syntax error");
+               state->pos[ *(uint16*)(state->pos) ].weight = 2;
+           } else if ( tolower(*(state->prsbuf)) == 'c' ) {
+               if ( state->pos[ *(uint16*)(state->pos) ].weight )
+                   elog(ERROR,"Syntax error");
+               state->pos[ *(uint16*)(state->pos) ].weight = 1;
+           } else if ( tolower(*(state->prsbuf)) == 'd' ) {
+               if ( state->pos[ *(uint16*)(state->pos) ].weight )
+                   elog(ERROR,"Syntax error");
+               state->pos[ *(uint16*)(state->pos) ].weight = 0;
+           } else if ( isspace(*(state->prsbuf)) || *(state->prsbuf) == '\0' ) {
+               return 1;
+           } else if ( !isdigit(*(state->prsbuf)) )
+               elog(ERROR,"Syntax error");
+       } else
+           elog(ERROR, "Inner bug :(");
+       state->prsbuf++;
+   }
+
+   return 0;
+}
+
+Datum
+tsvector_in(PG_FUNCTION_ARGS)
+{
+   char       *buf = PG_GETARG_CSTRING(0);
+   TI_IN_STATE state;
+   WordEntryIN  *arr;
+   WordEntry  *inarr;
+   int4        len = 0,
+               totallen = 64;
+   tsvector       *in;
+   char       *tmpbuf,
+              *cur;
+   int4        i,
+               buflen = 256;
+
+   state.prsbuf = buf;
+   state.len = 32;
+   state.word = (char *) palloc(state.len);
+   state.oprisdelim = false;
+
+   arr = (WordEntryIN *) palloc(sizeof(WordEntryIN) * totallen);
+   cur = tmpbuf = (char *) palloc(buflen);
+   while (gettoken_tsvector(&state))
+   {
+       if (len >= totallen)
+       {
+           totallen *= 2;
+           arr = (WordEntryIN *) repalloc((void *) arr, sizeof(WordEntryIN) * totallen);
+       }
+       while ((cur - tmpbuf) + (state.curpos - state.word) >= buflen)
+       {
+           int4        dist = cur - tmpbuf;
+
+           buflen *= 2;
+           tmpbuf = (char *) repalloc((void *) tmpbuf, buflen);
+           cur = tmpbuf + dist;
+       }
+       if (state.curpos - state.word >= MAXSTRLEN)
+           elog(ERROR, "Word is too long");
+       arr[len].entry.len= state.curpos - state.word;
+       if (cur - tmpbuf > MAXSTRPOS)
+           elog(ERROR, "Too long value");
+       arr[len].entry.pos=cur - tmpbuf;
+       memcpy((void *) cur, (void *) state.word, arr[len].entry.len);
+       cur += arr[len].entry.len;
+       if ( state.alen ) {
+           arr[len].entry.haspos=1;
+           arr[len].pos = state.pos;
+       } else
+           arr[len].entry.haspos=0;
+       len++;
+   }
+   pfree(state.word);
+
+   if ( len > 0 )
+       len = uniqueentry(arr, len, tmpbuf, &buflen);
+   totallen = CALCDATASIZE(len, buflen);
+   in = (tsvector *) palloc(totallen);
+   memset(in,0,totallen);
+   in->len = totallen;
+   in->size = len;
+   cur = STRPTR(in);
+   inarr = ARRPTR(in);
+   for (i = 0; i < len; i++)
+   {
+       memcpy((void *) cur, (void *) &tmpbuf[arr[i].entry.pos], arr[i].entry.len);
+       arr[i].entry.pos=cur - STRPTR(in);
+       cur += SHORTALIGN(arr[i].entry.len);
+       if ( arr[i].entry.haspos ) {
+           memcpy( cur, arr[i].pos, (*(uint16*)arr[i].pos + 1) * sizeof(WordEntryPos));
+           cur +=  (*(uint16*)arr[i].pos + 1) * sizeof(WordEntryPos);
+           pfree( arr[i].pos ); 
+       }
+       memcpy( &(inarr[i]), &(arr[i].entry), sizeof(WordEntry) );
+   }
+   pfree(tmpbuf);
+   pfree(arr);
+   PG_RETURN_POINTER(in);
+}
+
+Datum
+tsvector_length(PG_FUNCTION_ARGS)
+{
+   tsvector       *in = (tsvector *) PG_DETOAST_DATUM(PG_GETARG_DATUM(0));
+   int4        ret = in->size;
+
+   PG_FREE_IF_COPY(in, 0);
+   PG_RETURN_INT32(ret);
+}
+
+Datum
+tsvector_out(PG_FUNCTION_ARGS)
+{
+   tsvector       *out = (tsvector *) PG_DETOAST_DATUM(PG_GETARG_DATUM(0));
+   char       *outbuf;
+   int4        i,
+               j,
+               lenbuf = 0, pp;
+   WordEntry  *ptr = ARRPTR(out);
+   char       *curin,
+              *curout;
+
+       lenbuf=out->size * 2 /* '' */ + out->size - 1 /* space */ + 2 /*\0*/;
+       for (i = 0; i < out->size; i++) {
+               lenbuf += ptr[i].len*2 /*for escape */;
+               if ( ptr[i].haspos )
+                       lenbuf += 7*POSDATALEN(out, &(ptr[i]));
+       }
+
+   curout = outbuf = (char *) palloc(lenbuf);
+   for (i = 0; i < out->size; i++)
+   {
+       curin = STRPTR(out)+ptr->pos;
+       if (i != 0)
+           *curout++ = ' ';
+       *curout++ = '\'';
+       j = ptr->len;
+       while (j--)
+       {
+           if (*curin == '\'')
+           {
+               int4        pos = curout - outbuf;
+
+               outbuf = (char *) repalloc((void *) outbuf, ++lenbuf);
+               curout = outbuf + pos;
+               *curout++ = '\\';
+           }
+           *curout++ = *curin++;
+       }
+       *curout++ = '\'';
+       if ( (pp=POSDATALEN(out,ptr)) != 0 ) {
+           WordEntryPos *wptr;
+           *curout++ = ':';
+           wptr=POSDATAPTR(out,ptr);
+           while(pp) {
+               sprintf(curout,"%d",wptr->pos);
+               curout=strchr(curout,'\0');
+               switch( wptr->weight ) {
+                   case 3:   *curout++ = 'A'; break;
+                   case 2:   *curout++ = 'B'; break;
+                   case 1:   *curout++ = 'C'; break;
+                   case 0: 
+                   default: break;
+               }
+               if ( pp>1 )     *curout++ = ',';
+               pp--; wptr++;
+           }
+       }
+       ptr++;
+   }
+   *curout='\0';
+   outbuf[lenbuf - 1] = '\0';
+   PG_FREE_IF_COPY(out, 0);
+   PG_RETURN_POINTER(outbuf);
+}
+
+static int
+compareWORD(const void *a, const void *b)
+{
+   if (((WORD *) a)->len == ((WORD *) b)->len) {
+       int res = strncmp(
+                      ((WORD *) a)->word,
+                      ((WORD *) b)->word,
+                      ((WORD *) b)->len);
+       if ( res==0 ) 
+           return ( ((WORD *) a)->pos.pos > ((WORD *) b)->pos.pos ) ? 1 : -1;
+       return res;
+   }
+   return (((WORD *) a)->len > ((WORD *) b)->len) ? 1 : -1;
+}
+
+static int
+uniqueWORD(WORD * a, int4 l)
+{
+   WORD       *ptr,
+              *res;
+   int tmppos;
+
+   if (l == 1) {
+       tmppos=LIMITPOS(a->pos.pos);
+       a->alen=2;
+       a->pos.apos=(uint16*)palloc( sizeof(uint16)*a->alen );
+       a->pos.apos[0]=1;
+       a->pos.apos[1]=tmppos;
+       return l;
+   }
+
+   res = a;
+   ptr = a + 1;
+
+   qsort((void *) a, l, sizeof(WORD), compareWORD);
+   tmppos=LIMITPOS(a->pos.pos);
+   a->alen=2;
+   a->pos.apos=(uint16*)palloc( sizeof(uint16)*a->alen );
+   a->pos.apos[0]=1;
+   a->pos.apos[1]=tmppos;
+
+   while (ptr - a < l)
+   {
+       if (!(ptr->len == res->len &&
+             strncmp(ptr->word, res->word, res->len) == 0))
+       {
+           res++;
+           res->len = ptr->len;
+           res->word = ptr->word;
+           tmppos=LIMITPOS(ptr->pos.pos);
+           res->alen=2;
+           res->pos.apos=(uint16*)palloc( sizeof(uint16)*res->alen );
+           res->pos.apos[0]=1;
+           res->pos.apos[1]=tmppos;
+       } else {
+           pfree(ptr->word);
+           if ( res->pos.apos[0] < MAXNUMPOS-1 && res->pos.apos[ res->pos.apos[0] ] != MAXENTRYPOS-1 ) {
+               if ( res->pos.apos[0]+1 >= res->alen ) {
+                   res->alen*=2;
+                   res->pos.apos=(uint16*)repalloc( res->pos.apos, sizeof(uint16)*res->alen );
+               }
+               res->pos.apos[ res->pos.apos[0]+1 ] = LIMITPOS(ptr->pos.pos);
+               res->pos.apos[0]++; 
+           }
+       }
+       ptr++;
+   }
+
+   return res + 1 - a;
+}
+
+/*
+ * make value of tsvector
+ */
+static tsvector *
+makevalue(PRSTEXT * prs)
+{
+   int4        i,j,
+               lenstr = 0,
+               totallen;
+   tsvector       *in;
+   WordEntry  *ptr;
+   char       *str,
+              *cur;
+
+   prs->curwords = uniqueWORD(prs->words, prs->curwords);
+   for (i = 0; i < prs->curwords; i++) {
+       lenstr += SHORTALIGN(prs->words[i].len);
+
+       if ( prs->words[i].alen )
+           lenstr += sizeof(uint16) + prs->words[i].pos.apos[0] * sizeof(WordEntryPos);
+   }
+
+   totallen = CALCDATASIZE(prs->curwords, lenstr);
+   in = (tsvector *) palloc(totallen);
+   memset(in,0,totallen);  
+   in->len = totallen;
+   in->size = prs->curwords;
+
+   ptr = ARRPTR(in);
+   cur = str = STRPTR(in);
+   for (i = 0; i < prs->curwords; i++)
+   {
+       ptr->len = prs->words[i].len;
+       if (cur - str > MAXSTRPOS)
+           elog(ERROR, "Value is too big");
+       ptr->pos= cur - str;
+       memcpy((void *) cur, (void *) prs->words[i].word, prs->words[i].len);
+       pfree(prs->words[i].word);
+       cur += SHORTALIGN(prs->words[i].len);
+       if ( prs->words[i].alen ) {
+           WordEntryPos *wptr;
+           
+           ptr->haspos=1;
+           *(uint16*)cur = prs->words[i].pos.apos[0];
+           wptr=POSDATAPTR(in,ptr);
+           for(j=0;j<*(uint16*)cur;j++) {
+               wptr[j].weight=0;
+               wptr[j].pos=prs->words[i].pos.apos[j+1];
+           }
+           cur += sizeof(uint16) + prs->words[i].pos.apos[0] * sizeof(WordEntryPos);
+           pfree(prs->words[i].pos.apos);
+       } else
+           ptr->haspos=0;
+       ptr++;
+   }
+   pfree(prs->words);
+   return in;
+}
+
+
+Datum
+to_tsvector(PG_FUNCTION_ARGS)
+{
+   text       *in = PG_GETARG_TEXT_P(1);
+   PRSTEXT     prs;
+   tsvector       *out = NULL;
+   TSCfgInfo *cfg=findcfg(PG_GETARG_INT32(0)); 
+
+   prs.lenwords = 32;
+   prs.curwords = 0;
+   prs.pos = 0;
+   prs.words = (WORD *) palloc(sizeof(WORD) * prs.lenwords);
+   
+   parsetext_v2(cfg, &prs, VARDATA(in), VARSIZE(in) - VARHDRSZ);
+   PG_FREE_IF_COPY(in, 1);
+
+   if (prs.curwords)
+       out = makevalue(&prs);
+   else {
+       pfree(prs.words);
+       out = palloc(CALCDATASIZE(0,0));
+       out->len = CALCDATASIZE(0,0);
+       out->size = 0;
+   } 
+   PG_RETURN_POINTER(out);
+}
+
+Datum
+to_tsvector_name(PG_FUNCTION_ARGS) {
+   text       *cfg=PG_GETARG_TEXT_P(0);
+   Datum res = DirectFunctionCall3(
+       to_tsvector,
+       Int32GetDatum( name2id_cfg( cfg ) ),
+       PG_GETARG_DATUM(1),
+       (Datum)0
+   );
+   PG_FREE_IF_COPY(cfg,0);
+   PG_RETURN_DATUM(res);   
+}
+
+Datum
+to_tsvector_current(PG_FUNCTION_ARGS) {
+   Datum res = DirectFunctionCall3(
+       to_tsvector,
+       Int32GetDatum( get_currcfg() ),
+       PG_GETARG_DATUM(0),
+       (Datum)0
+   );
+   PG_RETURN_DATUM(res);   
+}
+
+static Oid
+findFunc(char *fname) {
+   FuncCandidateList clist,ptr;
+   Oid funcid = InvalidOid;
+   List *names=makeList1(makeString(fname));
+
+   ptr = clist = FuncnameGetCandidates(names, 1);
+   freeList(names);
+
+   if ( !ptr )
+       return funcid;
+
+   while(ptr) {
+       if ( ptr->args[0] == TEXTOID && funcid == InvalidOid )
+           funcid=ptr->oid;
+       clist=ptr->next;
+       pfree(ptr);
+       ptr=clist;
+   }
+
+   return funcid;
+}
+
+/*
+ * Trigger
+ */
+Datum
+tsearch2(PG_FUNCTION_ARGS)
+{
+   TriggerData *trigdata;
+   Trigger    *trigger;
+   Relation    rel;
+   HeapTuple   rettuple = NULL;
+   TSCfgInfo *cfg=findcfg(get_currcfg()); 
+   int         numidxattr,
+               i;
+   PRSTEXT     prs;
+   Datum       datum = (Datum) 0;
+   Oid     funcoid = InvalidOid;
+
+   if (!CALLED_AS_TRIGGER(fcinfo))
+       elog(ERROR, "TSearch: Not fired by trigger manager");
+
+   trigdata = (TriggerData *) fcinfo->context;
+   if (TRIGGER_FIRED_FOR_STATEMENT(trigdata->tg_event))
+       elog(ERROR, "TSearch: Can't process STATEMENT events");
+   if (TRIGGER_FIRED_AFTER(trigdata->tg_event))
+       elog(ERROR, "TSearch: Must be fired BEFORE event");
+
+   if (TRIGGER_FIRED_BY_INSERT(trigdata->tg_event))
+       rettuple = trigdata->tg_trigtuple;
+   else if (TRIGGER_FIRED_BY_UPDATE(trigdata->tg_event))
+       rettuple = trigdata->tg_newtuple;
+   else
+       elog(ERROR, "TSearch: Unknown event");
+
+   trigger = trigdata->tg_trigger;
+   rel = trigdata->tg_relation;
+
+   if (trigger->tgnargs < 2)
+       elog(ERROR, "TSearch: format tsearch2(tsvector_field, text_field1,...)");
+
+   numidxattr = SPI_fnumber(rel->rd_att, trigger->tgargs[0]);
+   if (numidxattr == SPI_ERROR_NOATTRIBUTE)
+       elog(ERROR, "TSearch: Can not find tsvector_field");
+
+   prs.lenwords = 32;
+   prs.curwords = 0;
+   prs.pos = 0;
+   prs.words = (WORD *) palloc(sizeof(WORD) * prs.lenwords);
+
+   /* find all words in indexable column */
+   for (i = 1; i < trigger->tgnargs; i++)
+   {
+       int         numattr;
+       Oid         oidtype;
+       Datum       txt_toasted;
+       bool        isnull;
+       text       *txt;
+
+       numattr = SPI_fnumber(rel->rd_att, trigger->tgargs[i]);
+       if (numattr == SPI_ERROR_NOATTRIBUTE)
+       {
+           funcoid=findFunc(trigger->tgargs[i]);
+           if ( funcoid==InvalidOid )
+               elog(ERROR,"TSearch: can't find function or field '%s'",trigger->tgargs[i]);
+           continue;
+       }
+       oidtype = SPI_gettypeid(rel->rd_att, numattr);
+       /* We assume char() and varchar() are binary-equivalent to text */
+       if (!(oidtype == TEXTOID ||
+             oidtype == VARCHAROID ||
+             oidtype == BPCHAROID))
+       {
+           elog(WARNING, "TSearch: '%s' is not of character type",
+                trigger->tgargs[i]);
+           continue;
+       }
+       txt_toasted = SPI_getbinval(rettuple, rel->rd_att, numattr, &isnull);
+       if (isnull)
+           continue;
+
+       if ( funcoid!=InvalidOid ) {
+           text *txttmp = (text *) DatumGetPointer( OidFunctionCall1(
+               funcoid,
+               PointerGetDatum(txt_toasted)
+           ));
+           txt = (text *) DatumGetPointer(PG_DETOAST_DATUM(PointerGetDatum(txttmp)));
+           if ( txt == txttmp )
+               txt_toasted = PointerGetDatum(txt);
+       } else
+            txt = (text *) DatumGetPointer(PG_DETOAST_DATUM(PointerGetDatum(txt_toasted)));
+
+       parsetext_v2(cfg, &prs, VARDATA(txt), VARSIZE(txt) - VARHDRSZ);
+       if (txt != (text*)DatumGetPointer(txt_toasted) )
+           pfree(txt);
+   }
+
+   /* make tsvector value */
+   if (prs.curwords)
+   {
+       datum = PointerGetDatum(makevalue(&prs));
+       rettuple = SPI_modifytuple(rel, rettuple, 1, &numidxattr,
+                                  &datum, NULL);
+       pfree(DatumGetPointer(datum));
+   }
+   else
+   {
+       tsvector *out = palloc(CALCDATASIZE(0,0));
+       out->len = CALCDATASIZE(0,0);
+       out->size = 0;
+       datum = PointerGetDatum(out);
+       pfree(prs.words);
+       rettuple = SPI_modifytuple(rel, rettuple, 1, &numidxattr,
+                                  &datum, NULL);
+   }
+
+   if (rettuple == NULL)
+       elog(ERROR, "TSearch: %d returned by SPI_modifytuple", SPI_result);
+
+   return PointerGetDatum(rettuple);
+}


diff --git a/contrib/tsearch2/tsvector.h b/contrib/tsearch2/tsvector.h

new file mode 100644 (file)

index 0000000..31e6a4b


--- /dev/null
+++ b/contrib/tsearch2/tsvector.h
@@ -0,0 +1,71 @@
+#ifndef __TXTIDX_H__
+#define __TXTIDX_H__
+
+/*
+#define TXTIDX_DEBUG
+*/
+
+#include "postgres.h"
+
+#include "access/gist.h"
+#include "access/itup.h"
+#include "utils/elog.h"
+#include "utils/palloc.h"
+#include "utils/builtins.h"
+#include "storage/bufpage.h"
+
+typedef struct {
+   uint32
+       haspos:1,
+       len:11, /* MAX 2Kb */
+       pos:20; /* MAX 1Mb */
+}  WordEntry;
+#define MAXSTRLEN ( 1<<11 )
+#define MAXSTRPOS ( 1<<20 )
+
+typedef struct {
+   uint16
+       weight:2,
+       pos:14;
+} WordEntryPos;
+#define MAXENTRYPOS    (1<<14)
+#define MAXNUMPOS  256
+#define LIMITPOS(x)    ( ( (x) >= MAXENTRYPOS ) ? (MAXENTRYPOS-1) : (x) )
+
+typedef struct
+{
+   int4        len;
+   int4        size;
+   char        data[1];
+}  tsvector;
+
+#define DATAHDRSIZE (sizeof(int4)*2)
+#define CALCDATASIZE(x, lenstr) ( x * sizeof(WordEntry) + DATAHDRSIZE + lenstr )
+#define ARRPTR(x)  ( (WordEntry*) ( (char*)x + DATAHDRSIZE ) )
+#define STRPTR(x)  ( (char*)x + DATAHDRSIZE + ( sizeof(WordEntry) * ((tsvector*)x)->size ) )
+#define STRSIZE(x) ( ((tsvector*)x)->len - DATAHDRSIZE - ( sizeof(WordEntry) * ((tsvector*)x)->size ) )
+#define _POSDATAPTR(x,e)   (STRPTR(x)+((WordEntry*)(e))->pos+SHORTALIGN(((WordEntry*)(e))->len))
+#define POSDATALEN(x,e) ( ( ((WordEntry*)(e))->haspos ) ? (*(uint16*)_POSDATAPTR(x,e)) : 0 ) 
+#define POSDATAPTR(x,e)    ( (WordEntryPos*)( _POSDATAPTR(x,e)+sizeof(uint16) ) )
+
+
+typedef struct {
+   WordEntry   entry;
+   WordEntryPos    *pos;
+}  WordEntryIN;
+
+typedef struct
+{
+   char       *prsbuf;
+   char       *word;
+   char       *curpos;
+   int4        len;
+   int4        state;
+   int4        alen;
+   WordEntryPos    *pos;
+   bool        oprisdelim;
+}  TI_IN_STATE;
+
+int4       gettoken_tsvector(TI_IN_STATE * state);
+
+#endif


diff --git a/contrib/tsearch2/tsvector_op.c b/contrib/tsearch2/tsvector_op.c

new file mode 100644 (file)

index 0000000..3f38014


--- /dev/null
+++ b/contrib/tsearch2/tsvector_op.c
@@ -0,0 +1,264 @@
+/*
+ * Operations for tsvector type
+ * Teodor Sigaev 
+ */
+#include "postgres.h"
+
+#include "access/gist.h"
+#include "access/itup.h"
+#include "utils/elog.h"
+#include "utils/palloc.h"
+#include "utils/builtins.h"
+#include "storage/bufpage.h"
+#include "executor/spi.h"
+#include "commands/trigger.h"
+#include "nodes/pg_list.h"
+#include "catalog/namespace.h"
+
+#include "utils/pg_locale.h"
+
+#include              /* tolower */
+#include "tsvector.h"
+#include "query.h"
+#include "ts_cfg.h"
+#include "common.h"
+
+PG_FUNCTION_INFO_V1(strip);
+Datum      strip(PG_FUNCTION_ARGS);
+
+PG_FUNCTION_INFO_V1(setweight);
+Datum      setweight(PG_FUNCTION_ARGS);
+
+PG_FUNCTION_INFO_V1(concat);
+Datum      concat(PG_FUNCTION_ARGS);
+
+Datum
+strip(PG_FUNCTION_ARGS)
+{
+   tsvector       *in = (tsvector *) PG_DETOAST_DATUM(PG_GETARG_DATUM(0));
+   tsvector    *out;
+   int i,len=0;
+   WordEntry *arrin=ARRPTR(in), *arrout;
+   char *cur;
+
+   for(i=0;isize;i++) 
+       len += SHORTALIGN( arrin[i].len );
+
+   len = CALCDATASIZE(in->size, len);
+   out=(tsvector*)palloc(len);
+   memset(out,0,len);
+   out->len=len;
+   out->size=in->size;
+   arrout=ARRPTR(out);
+   cur=STRPTR(out);
+   for(i=0;isize;i++) {
+       memcpy(cur, STRPTR(in)+arrin[i].pos, arrin[i].len);
+       arrout[i].haspos = 0;
+       arrout[i].len = arrin[i].len;
+       arrout[i].pos = cur - STRPTR(out);
+       cur += SHORTALIGN( arrout[i].len );
+   }
+       
+   PG_FREE_IF_COPY(in, 0);
+   PG_RETURN_POINTER(out);
+}
+
+Datum
+setweight(PG_FUNCTION_ARGS)
+{
+   tsvector       *in = (tsvector *) PG_DETOAST_DATUM(PG_GETARG_DATUM(0));
+   char       cw = PG_GETARG_CHAR(1);
+   tsvector    *out;
+   int i,j;
+   WordEntry *entry;
+   WordEntryPos *p;
+   int w=0;
+
+   switch(tolower(cw)) {
+       case 'a': w=3; break;
+       case 'b': w=2; break;
+       case 'c': w=1; break;
+       case 'd': w=0; break;
+       default: elog(ERROR,"Unknown weight");
+   }
+
+   out=(tsvector*)palloc(in->len);
+   memcpy(out,in,in->len);
+   entry=ARRPTR(out);
+   i=out->size;    
+   while(i--) {
+       if ( (j=POSDATALEN(out,entry)) != 0 ) {
+           p=POSDATAPTR(out,entry);
+           while(j--) {
+               p->weight=w;
+               p++;
+           }
+       }
+       entry++;
+   }
+       
+   PG_FREE_IF_COPY(in, 0);
+   PG_RETURN_POINTER(out);
+}
+
+static int
+compareEntry(char *ptra, WordEntry* a, char *ptrb, WordEntry* b)
+{
+        if ( a->len == b->len)
+        {
+                return strncmp(
+                                           ptra + a->pos,
+                                           ptrb + b->pos,
+                                           a->len);
+        }
+        return ( a->len > b->len ) ? 1 : -1;
+}
+
+static int4
+add_pos(tsvector *src, WordEntry *srcptr, tsvector *dest, WordEntry *destptr, int4 maxpos ) {
+   uint16 *clen = (uint16*)_POSDATAPTR(dest,destptr);
+   int i;
+   uint16 slen = POSDATALEN(src, srcptr), startlen;
+   WordEntryPos *spos=POSDATAPTR(src, srcptr), *dpos=POSDATAPTR(dest,destptr);
+
+   if ( ! destptr->haspos ) 
+       *clen=0;
+
+   startlen = *clen;
+   for(i=0; i
+       dpos[ *clen ].weight = spos[i].weight; 
+       dpos[ *clen ].pos    = LIMITPOS(spos[i].pos + maxpos);
+       (*clen)++;
+   }
+
+   if ( *clen != startlen )
+       destptr->haspos=1; 
+   return  *clen - startlen;
+}
+
+
+Datum
+concat(PG_FUNCTION_ARGS) {
+   tsvector       *in1 = (tsvector *) PG_DETOAST_DATUM(PG_GETARG_DATUM(0));
+   tsvector       *in2 = (tsvector *) PG_DETOAST_DATUM(PG_GETARG_DATUM(1));
+   tsvector       *out;
+   WordEntry *ptr;
+   WordEntry *ptr1,*ptr2;
+   WordEntryPos *p;
+   int maxpos=0,i,j,i1,i2;
+   char *cur;
+   char *data,*data1,*data2;
+
+   ptr=ARRPTR(in1);
+   i=in1->size;
+   while(i--) {
+       if ( (j=POSDATALEN(in1,ptr)) != 0 ) {
+           p=POSDATAPTR(in1,ptr);
+           while(j--) {
+               if ( p->pos > maxpos ) 
+                   maxpos = p->pos;
+               p++;
+           }
+       }
+       ptr++;
+   }
+   
+   ptr1=ARRPTR(in1); ptr2=ARRPTR(in2);
+   data1=STRPTR(in1); data2=STRPTR(in2);
+   i1=in1->size;   i2=in2->size;
+   out=(tsvector*)palloc( in1->len + in2->len );
+   memset(out,0,in1->len + in2->len);
+   out->len = in1->len + in2->len;
+   out->size = in1->size + in2->size;
+   data=cur=STRPTR(out);
+   ptr=ARRPTR(out);
+   while( i1 && i2 ) {
+       int cmp=compareEntry(data1,ptr1,data2,ptr2);
+       if ( cmp < 0 ) { /* in1 first */
+           ptr->haspos = ptr1->haspos;
+           ptr->len = ptr1->len;
+           memcpy( cur, data1 + ptr1->pos, ptr1->len );
+           ptr->pos = cur - data; 
+           cur+=SHORTALIGN(ptr1->len);
+           if ( ptr->haspos ) {
+               memcpy(cur, _POSDATAPTR(in1, ptr1), POSDATALEN(in1, ptr1)*sizeof(WordEntryPos) + sizeof(uint16));
+               cur+=POSDATALEN(in1, ptr1)*sizeof(WordEntryPos) + sizeof(uint16);
+           }
+           ptr++; ptr1++; i1--;
+       } else if ( cmp>0 ) { /* in2 first */ 
+           ptr->haspos = ptr2->haspos;
+           ptr->len = ptr2->len;
+           memcpy( cur, data2 + ptr2->pos, ptr2->len );
+           ptr->pos = cur - data; 
+           cur+=SHORTALIGN(ptr2->len);
+           if ( ptr->haspos ) {
+               int addlen = add_pos(in2, ptr2, out, ptr, maxpos );
+               if ( addlen == 0 )
+                   ptr->haspos=0;
+               else
+                   cur += addlen*sizeof(WordEntryPos) + sizeof(uint16); 
+           }
+           ptr++; ptr2++; i2--;
+       } else {
+           ptr->haspos = ptr1->haspos | ptr2->haspos;
+           ptr->len = ptr1->len;
+           memcpy( cur, data1 + ptr1->pos, ptr1->len );
+           ptr->pos = cur - data; 
+           cur+=SHORTALIGN(ptr1->len);
+           if ( ptr->haspos ) {
+               if ( ptr1->haspos ) {
+                   memcpy(cur, _POSDATAPTR(in1, ptr1), POSDATALEN(in1, ptr1)*sizeof(WordEntryPos) + sizeof(uint16));
+                   cur+=POSDATALEN(in1, ptr1)*sizeof(WordEntryPos) + sizeof(uint16);
+                   if ( ptr2->haspos )
+                       cur += add_pos(in2, ptr2, out, ptr, maxpos )*sizeof(WordEntryPos);
+               } else if ( ptr2->haspos ) {
+                   int addlen = add_pos(in2, ptr2, out, ptr, maxpos );
+                   if ( addlen == 0 )
+                       ptr->haspos=0;
+                   else
+                       cur += addlen*sizeof(WordEntryPos) + sizeof(uint16); 
+               }
+           }
+           ptr++; ptr1++; ptr2++; i1--; i2--;
+       }
+   }
+
+   while(i1) {
+       ptr->haspos = ptr1->haspos;
+       ptr->len = ptr1->len;
+       memcpy( cur, data1 + ptr1->pos, ptr1->len );
+       ptr->pos = cur - data; 
+       cur+=SHORTALIGN(ptr1->len);
+       if ( ptr->haspos ) {
+           memcpy(cur, _POSDATAPTR(in1, ptr1), POSDATALEN(in1, ptr1)*sizeof(WordEntryPos) + sizeof(uint16));
+           cur+=POSDATALEN(in1, ptr1)*sizeof(WordEntryPos) + sizeof(uint16);
+       }
+       ptr++; ptr1++; i1--;
+   }
+
+   while(i2) {
+       ptr->haspos = ptr2->haspos;
+       ptr->len = ptr2->len;
+       memcpy( cur, data2 + ptr2->pos, ptr2->len );
+       ptr->pos = cur - data; 
+       cur+=SHORTALIGN(ptr2->len);
+       if ( ptr->haspos ) {
+           int addlen = add_pos(in2, ptr2, out, ptr, maxpos );
+           if ( addlen == 0 )
+               ptr->haspos=0;
+           else
+               cur += addlen*sizeof(WordEntryPos) + sizeof(uint16); 
+       }
+       ptr++; ptr2++; i2--;
+   }
+   
+   out->size=ptr-ARRPTR(out);
+   out->len = CALCDATASIZE( out->size, cur-data );
+   if ( data != STRPTR(out) )
+       memmove( STRPTR(out), data, cur-data );
+
+   PG_FREE_IF_COPY(in1, 0);
+   PG_FREE_IF_COPY(in2, 1);
+   PG_RETURN_POINTER(out);
+}
+


diff --git a/contrib/tsearch2/untsearch.sql.in b/contrib/tsearch2/untsearch.sql.in

new file mode 100644 (file)

index 0000000..a4fe145


--- /dev/null
+++ b/contrib/tsearch2/untsearch.sql.in
@@ -0,0 +1,62 @@
+BEGIN;
+
+--Be careful !!!
+--script drops all indices, triggers and columns with types defined
+--in tsearch2.sql
+
+
+DROP OPERATOR CLASS gist_tsvector_ops USING gist CASCADE;
+
+
+DROP OPERATOR || (tsvector, tsvector);
+DROP OPERATOR @@ (tsvector, tsquery);
+DROP OPERATOR @@ (tsquery, tsvector);
+
+DROP AGGREGATE stat(tsvector);
+
+DROP TABLE pg_ts_dict;
+DROP TABLE pg_ts_parser;
+DROP TABLE pg_ts_cfg;
+DROP TABLE pg_ts_cfgmap;
+
+DROP TYPE tokentype CASCADE;
+DROP TYPE tokenout CASCADE;
+DROP TYPE tsvector CASCADE;
+DROP TYPE tsquery CASCADE;
+DROP TYPE gtsvector CASCADE;
+DROP TYPE tsstat CASCADE;
+DROP TYPE statinfo CASCADE;
+
+DROP FUNCTION lexize(oid, text) ;
+DROP FUNCTION lexize(text, text);
+DROP FUNCTION lexize(text);
+DROP FUNCTION set_curdict(int);
+DROP FUNCTION set_curdict(text);
+DROP FUNCTION dex_init(text);
+DROP FUNCTION dex_lexize(internal,internal,int4);
+DROP FUNCTION snb_en_init(text);
+DROP FUNCTION snb_lexize(internal,internal,int4);
+DROP FUNCTION snb_ru_init(text);
+DROP FUNCTION spell_init(text);
+DROP FUNCTION spell_lexize(internal,internal,int4);
+DROP FUNCTION syn_init(text);
+DROP FUNCTION syn_lexize(internal,internal,int4);
+DROP FUNCTION set_curprs(int);
+DROP FUNCTION set_curprs(text);
+DROP FUNCTION prsd_start(internal,int4);
+DROP FUNCTION prsd_getlexeme(internal,internal,internal);
+DROP FUNCTION prsd_end(internal);
+DROP FUNCTION prsd_lextype(internal);
+DROP FUNCTION prsd_headline(internal,internal,internal);
+DROP FUNCTION set_curcfg(int);
+DROP FUNCTION set_curcfg(text);
+DROP FUNCTION show_curcfg();
+DROP FUNCTION gtsvector_compress(internal);
+DROP FUNCTION gtsvector_decompress(internal);
+DROP FUNCTION gtsvector_penalty(internal,internal,internal);
+DROP FUNCTION gtsvector_picksplit(internal, internal);
+DROP FUNCTION gtsvector_union(bytea, internal);
+DROP FUNCTION reset_tsearch();
+DROP FUNCTION tsearch2() CASCADE;
+
+END;


diff --git a/contrib/tsearch2/wordparser/deflex.c b/contrib/tsearch2/wordparser/deflex.c

new file mode 100644 (file)

index 0000000..ea596c5


--- /dev/null
+++ b/contrib/tsearch2/wordparser/deflex.c
@@ -0,0 +1,56 @@
+#include "deflex.h"
+
+const char *lex_descr[]={
+   "",
+   "Latin word",
+   "Non-latin word",
+   "Word",
+   "Email",
+   "URL",
+   "Host",
+   "Scientific notation",
+   "VERSION",
+   "Part of hyphenated word",
+   "Non-latin part of hyphenated word",
+   "Latin part of hyphenated word",
+   "Space symbols",
+   "HTML Tag",
+   "HTTP head",
+   "Hyphenated word",
+   "Latin hyphenated word",
+   "Non-latin hyphenated word",
+   "URI",
+   "File or path name",
+   "Decimal notation",
+   "Signed integer",
+   "Unsigned integer",
+   "HTML Entity"
+};
+
+const char *tok_alias[]={
+   "",
+   "lword",
+   "nlword",
+   "word",
+   "email",
+   "url",
+   "host",
+   "sfloat",
+   "version",
+   "part_hword",
+   "nlpart_hword",
+   "lpart_hword",
+   "blank",
+   "tag",
+   "http",
+   "hword",
+   "lhword",
+   "nlhword",
+   "uri",
+   "file",
+   "float",
+   "int",
+   "uint",
+   "entity"
+};
+


diff --git a/contrib/tsearch2/wordparser/deflex.h b/contrib/tsearch2/wordparser/deflex.h

new file mode 100644 (file)

index 0000000..651d1f9


--- /dev/null
+++ b/contrib/tsearch2/wordparser/deflex.h
@@ -0,0 +1,34 @@
+#ifndef __DEFLEX_H__
+#define __DEFLEX_H__
+
+/* rememder !!!! */
+#define LASTNUM        23
+
+#define LATWORD        1
+#define CYRWORD        2
+#define UWORD      3
+#define EMAIL      4
+#define FURL       5
+#define HOST       6
+#define SCIENTIFIC 7
+#define VERSIONNUMBER  8
+#define PARTHYPHENWORD 9
+#define CYRPARTHYPHENWORD  10
+#define LATPARTHYPHENWORD  11
+#define SPACE      12
+#define TAG            13
+#define HTTP       14
+#define HYPHENWORD 15
+#define LATHYPHENWORD  16
+#define CYRHYPHENWORD  17
+#define URI        18
+#define FILEPATH   19
+#define DECIMAL        20
+#define SIGNEDINT  21
+#define UNSIGNEDINT 22
+#define HTMLENTITY 23
+
+extern const char *lex_descr[];
+extern const char *tok_alias[];
+
+#endif


diff --git a/contrib/tsearch2/wordparser/parser.h b/contrib/tsearch2/wordparser/parser.h

new file mode 100644 (file)

index 0000000..55cf005


--- /dev/null
+++ b/contrib/tsearch2/wordparser/parser.h
@@ -0,0 +1,11 @@
+#ifndef __PARSER_H__
+#define __PARSER_H__
+
+char      *token;
+int            tokenlen;
+int            tsearch2_yylex(void);
+void       start_parse_str(char *, int);
+void       start_parse_fh(FILE *, int);
+void       end_parse(void);
+
+#endif


diff --git a/contrib/tsearch2/wordparser/parser.l b/contrib/tsearch2/wordparser/parser.l

new file mode 100644 (file)

index 0000000..49824f5


--- /dev/null
+++ b/contrib/tsearch2/wordparser/parser.l
@@ -0,0 +1,346 @@
+%{
+#include "postgres.h"
+
+#include "deflex.h"
+#include "parser.h"
+#include "common.h"
+
+/* Avoid exit() on fatal scanner errors */
+#define fprintf(file, fmt, msg)  ts_error(ERROR, fmt, msg)
+
+/* postgres allocation function */
+#define free    pfree
+#define malloc  palloc
+#define realloc repalloc
+
+#ifdef strdup
+#undef strdup
+#endif
+#define strdup  pstrdup
+
+char *token = NULL;  /* pointer to token */
+char *s     = NULL;  /* to return WHOLE hyphenated-word */
+
+YY_BUFFER_STATE buf = NULL; /* buffer to parse; it need for parse from string */
+
+int lrlimit = -1;  /* for limiting read from filehandle ( -1 - unlimited read ) */
+int bytestoread = 0;   /* for limiting read from filehandle */
+
+/* redefine macro for read limited length */
+#define YY_INPUT(buf,result,max_size) \
+   if ( yy_current_buffer->yy_is_interactive ) { \
+                int c = '*', n; \
+                for ( n = 0; n < max_size && \
+                             (c = getc( tsearch2_yyin )) != EOF && c != '\n'; ++n ) \
+                        buf[n] = (char) c; \
+                if ( c == '\n' ) \
+                        buf[n++] = (char) c; \
+                if ( c == EOF && ferror( tsearch2_yyin ) ) \
+                        YY_FATAL_ERROR( "input in flex scanner failed" ); \
+                result = n; \
+        }  else { \
+       if ( lrlimit == 0 ) \
+           result=YY_NULL; \
+       else { \
+           if ( lrlimit>0 ) { \
+               bytestoread = ( lrlimit > max_size ) ? max_size : lrlimit; \
+               lrlimit -= bytestoread; \
+           } else \
+               bytestoread = max_size; \
+               if ( ((result = fread( buf, 1, bytestoread, tsearch2_yyin )) == 0) \
+                       && ferror( tsearch2_yyin ) ) \
+                       YY_FATAL_ERROR( "input in flex scanner failed" ); \
+       } \
+   }
+
+%}
+
+%option 8bit
+%option never-interactive
+%option nounput
+%option noyywrap
+
+/* parser's state for parsing hyphenated-word */
+%x DELIM  
+/* parser's state for parsing URL*/
+%x URL  
+%x SERVER  
+
+/* parser's state for parsing TAGS */
+%x INTAG
+%x QINTAG
+%x INCOMMENT
+%x INSCRIPT
+
+/* cyrillic koi8 char */
+CYRALNUM   [0-9\200-\377]
+CYRALPHA   [\200-\377]
+ALPHA      [a-zA-Z\200-\377]
+ALNUM      [0-9a-zA-Z\200-\377]
+
+
+HOSTNAME   ([-_[:alnum:]]+\.)+[[:alpha:]]+
+URI        [-_[:alnum:]/%,\.;=&?#]+
+
+%%
+
+"<"[Ss][Cc][Rr][Ii][Pp][Tt] { BEGIN INSCRIPT; }
+
+"" {
+   BEGIN INITIAL; 
+   *tsearch2_yytext=' '; *(tsearch2_yytext+1) = '\0'; 
+   token = tsearch2_yytext;
+   tokenlen = tsearch2_yyleng;
+   return SPACE;
+}
+
+""   { 
+   BEGIN INITIAL;
+   *tsearch2_yytext=' '; *(tsearch2_yytext+1) = '\0'; 
+   token = tsearch2_yytext;
+   tokenlen = tsearch2_yyleng;
+   return SPACE;
+}
+
+
+"<"[\![:alpha:]]   { BEGIN INTAG; }
+
+"
+
+"\""    { BEGIN QINTAG; }
+
+"\\\"" ;
+
+"\""   { BEGIN INTAG; }
+
+">" { 
+   BEGIN INITIAL;
+   token = tsearch2_yytext;
+   *tsearch2_yytext=' '; 
+   token = tsearch2_yytext;
+   tokenlen = 1;
+   return TAG;
+}
+
+.|\n  ;
+
+\&(quot|amp|nbsp|lt|gt)\;   {
+   token = tsearch2_yytext;
+   tokenlen = tsearch2_yyleng;
+   return HTMLENTITY;
+}
+
+\&\#[0-9][0-9]?[0-9]?\; {
+   token = tsearch2_yytext;
+   tokenlen = tsearch2_yyleng;
+   return HTMLENTITY;
+}
+ 
+[-_\.[:alnum:]]+@{HOSTNAME}  /* Emails */ { 
+   token = tsearch2_yytext; 
+   tokenlen = tsearch2_yyleng;
+   return EMAIL; 
+}
+
+[+-]?[0-9]+(\.[0-9]+)?[eEdD][+-]?[0-9]+  /* float */   { 
+   token = tsearch2_yytext; 
+   tokenlen = tsearch2_yyleng;
+   return SCIENTIFIC; 
+}
+
+[0-9]+\.[0-9]+\.[0-9\.]*[0-9] {
+   token = tsearch2_yytext;
+   tokenlen = tsearch2_yyleng;
+   return VERSIONNUMBER;
+}
+
+[+-]?[0-9]+\.[0-9]+ {
+   token = tsearch2_yytext;
+   tokenlen = tsearch2_yyleng;
+   return DECIMAL;
+}
+
+[+-][0-9]+ { 
+   token = tsearch2_yytext; 
+   tokenlen = tsearch2_yyleng;
+   return SIGNEDINT; 
+}
+
+[0-9]+ { 
+   token = tsearch2_yytext; 
+   tokenlen = tsearch2_yyleng;
+   return UNSIGNEDINT; 
+}
+
+http"://"        { 
+   BEGIN URL; 
+   token = tsearch2_yytext;
+   tokenlen = tsearch2_yyleng;
+   return HTTP;
+}
+
+ftp"://"        { 
+   BEGIN URL; 
+   token = tsearch2_yytext;
+   tokenlen = tsearch2_yyleng;
+   return HTTP;
+}
+
+{HOSTNAME}[/:]{URI} { 
+   BEGIN SERVER;
+   if (s) { free(s); s=NULL; } 
+   s = strdup( tsearch2_yytext ); 
+   tokenlen = tsearch2_yyleng;
+   yyless( 0 ); 
+   token = s;
+   return FURL;
+}
+
+{HOSTNAME} {
+   token = tsearch2_yytext; 
+   tokenlen = tsearch2_yyleng;
+   return HOST;
+}
+
+[/:]{URI}  {
+   token = tsearch2_yytext;
+   tokenlen = tsearch2_yyleng;
+   return URI;
+}
+
+[[:alnum:]\./_-]+"/"[[:alnum:]\./_-]+ {
+   token = tsearch2_yytext;
+   tokenlen = tsearch2_yyleng;
+   return FILEPATH;
+}
+
+({CYRALPHA}+-)+{CYRALPHA}+ /* composite-word */    {
+   BEGIN DELIM;
+   if (s) { free(s); s=NULL; } 
+   s = strdup( tsearch2_yytext );
+   tokenlen = tsearch2_yyleng;
+   yyless( 0 );
+   token = s;
+   return CYRHYPHENWORD;
+}
+
+([[:alpha:]]+-)+[[:alpha:]]+ /* composite-word */  {
+    BEGIN DELIM;
+   if (s) { free(s); s=NULL; } 
+   s = strdup( tsearch2_yytext );
+   tokenlen = tsearch2_yyleng;
+   yyless( 0 );
+   token = s;
+   return LATHYPHENWORD;
+}
+
+({ALNUM}+-)+{ALNUM}+ /* composite-word */  {
+   BEGIN DELIM;
+   if (s) { free(s); s=NULL; } 
+   s = strdup( tsearch2_yytext );
+   tokenlen = tsearch2_yyleng;
+   yyless( 0 );
+   token = s;
+   return HYPHENWORD;
+}
+
+[0-9]+\.[0-9]+\.[0-9\.]*[0-9] {
+   token = tsearch2_yytext;
+   tokenlen = tsearch2_yyleng;
+   return VERSIONNUMBER;
+}
+
+\+?[0-9]+\.[0-9]+ {
+   token = tsearch2_yytext;
+   tokenlen = tsearch2_yyleng;
+   return DECIMAL;
+}
+
+{CYRALPHA}+  /* one word in composite-word */   { 
+   token = tsearch2_yytext; 
+   tokenlen = tsearch2_yyleng;
+   return CYRPARTHYPHENWORD; 
+}
+
+[[:alpha:]]+  /* one word in composite-word */  { 
+   token = tsearch2_yytext; 
+   tokenlen = tsearch2_yyleng;
+   return LATPARTHYPHENWORD; 
+}
+
+{ALNUM}+  /* one word in composite-word */  { 
+   token = tsearch2_yytext; 
+   tokenlen = tsearch2_yyleng;
+   return PARTHYPHENWORD; 
+}
+
+-  { 
+   token = tsearch2_yytext;
+   tokenlen = tsearch2_yyleng;
+   return SPACE;
+}
+
+.|\n /* return in basic state */ {
+   BEGIN INITIAL;
+   yyless( 0 );
+}
+
+{CYRALPHA}+ /* normal word */  { 
+   token = tsearch2_yytext; 
+   tokenlen = tsearch2_yyleng;
+   return CYRWORD; 
+}
+
+[[:alpha:]]+ /* normal word */ { 
+   token = tsearch2_yytext; 
+   tokenlen = tsearch2_yyleng;
+   return LATWORD; 
+}
+
+{ALNUM}+ /* normal word */ { 
+   token = tsearch2_yytext; 
+   tokenlen = tsearch2_yyleng;
+   return UWORD; 
+}
+
+[ \r\n\t]+ {
+   token = tsearch2_yytext;
+   tokenlen = tsearch2_yyleng;
+   return SPACE;
+}
+
+. {
+   token = tsearch2_yytext;
+   tokenlen = tsearch2_yyleng;
+   return SPACE;
+} 
+
+%%
+
+/* clearing after parsing from string */
+void end_parse() {
+   if (s) { free(s); s=NULL; } 
+   tsearch2_yy_delete_buffer( buf );
+   buf = NULL;
+} 
+
+/* start parse from string */
+void start_parse_str(char* str, int limit) {
+   if (buf) end_parse();
+   buf = tsearch2_yy_scan_bytes( str, limit );
+   tsearch2_yy_switch_to_buffer( buf );
+   BEGIN INITIAL;
+}
+
+/* start parse from filehandle */
+void start_parse_fh( FILE* fh, int limit ) {
+   if (buf) end_parse();
+   lrlimit = ( limit ) ? limit : -1;
+   buf = tsearch2_yy_create_buffer( fh, YY_BUF_SIZE );
+   tsearch2_yy_switch_to_buffer( buf );
+   BEGIN INITIAL;
+}
+
+


diff --git a/contrib/tsearch2/wparser.c b/contrib/tsearch2/wparser.c

new file mode 100644 (file)

index 0000000..deff94c


--- /dev/null
+++ b/contrib/tsearch2/wparser.c
@@ -0,0 +1,529 @@
+/* 
+ * interface functions to parser 
+ * Teodor Sigaev 
+ */
+#include 
+#include 
+#include 
+#include 
+
+#include "postgres.h"
+#include "fmgr.h"
+#include "utils/array.h"
+#include "catalog/pg_type.h"
+#include "executor/spi.h"
+#include "funcapi.h"
+
+#include "wparser.h"
+#include "ts_cfg.h"
+#include "snmap.h"
+#include "common.h"
+
+/*********top interface**********/
+
+static void *plan_getparser=NULL;
+static Oid current_parser_id=InvalidOid;
+
+void
+init_prs(Oid id, WParserInfo *prs) {
+   Oid arg[1]={ OIDOID };
+   bool isnull;
+   Datum pars[1]={ ObjectIdGetDatum(id) };
+   int stat;
+
+   memset(prs,0,sizeof(WParserInfo));
+   SPI_connect();
+   if ( !plan_getparser ) {
+       plan_getparser = SPI_saveplan( SPI_prepare( "select prs_start, prs_nexttoken, prs_end, prs_lextype, prs_headline from pg_ts_parser where oid = $1" , 1, arg ) );
+       if ( !plan_getparser ) 
+           ts_error(ERROR, "SPI_prepare() failed");
+   }
+
+   stat = SPI_execp(plan_getparser, pars, " ", 1);
+   if ( stat < 0 )
+       ts_error (ERROR, "SPI_execp return %d", stat);
+   if ( SPI_processed > 0 ) {
+       Oid oid=InvalidOid;
+       oid=DatumGetObjectId( SPI_getbinval(SPI_tuptable->vals[0], SPI_tuptable->tupdesc, 1, &isnull) );
+       fmgr_info_cxt(oid, &(prs->start_info), TopMemoryContext);
+       oid=DatumGetObjectId( SPI_getbinval(SPI_tuptable->vals[0], SPI_tuptable->tupdesc, 2, &isnull) );
+       fmgr_info_cxt(oid, &(prs->getlexeme_info), TopMemoryContext);
+       oid=DatumGetObjectId( SPI_getbinval(SPI_tuptable->vals[0], SPI_tuptable->tupdesc, 3, &isnull) );
+       fmgr_info_cxt(oid, &(prs->end_info), TopMemoryContext);
+       prs->lextype=DatumGetObjectId( SPI_getbinval(SPI_tuptable->vals[0], SPI_tuptable->tupdesc, 4, &isnull) );
+       oid=DatumGetObjectId( SPI_getbinval(SPI_tuptable->vals[0], SPI_tuptable->tupdesc, 5, &isnull) );
+       fmgr_info_cxt(oid, &(prs->headline_info), TopMemoryContext);
+       prs->prs_id=id;
+   } else 
+       ts_error(ERROR, "No parser with id %d", id);
+   SPI_finish();
+}
+
+typedef struct {
+   WParserInfo *last_prs;
+   int     len;
+   int     reallen;
+   WParserInfo *list;
+   SNMap       name2id_map;
+} PrsList;
+
+static PrsList PList = {NULL,0,0,NULL,{0,0,NULL}};
+
+void    
+reset_prs(void) {
+   freeSNMap( &(PList.name2id_map) );
+   if ( PList.list )
+       free(PList.list);
+   memset(&PList,0,sizeof(PrsList));
+}
+
+static int
+compareprs(const void *a, const void *b) {
+   return ((WParserInfo*)a)->prs_id - ((WParserInfo*)b)->prs_id;
+}
+
+WParserInfo *
+findprs(Oid id) {
+   /* last used prs */
+   if ( PList.last_prs && PList.last_prs->prs_id==id )
+       return PList.last_prs;
+
+   /* already used prs */
+   if ( PList.len != 0 ) {
+       WParserInfo key;
+       key.prs_id=id;
+       PList.last_prs = bsearch(&key, PList.list, PList.len, sizeof(WParserInfo), compareprs);
+       if ( PList.last_prs != NULL )
+           return PList.last_prs;
+   }
+
+   /* last chance */
+   if ( PList.len==PList.reallen ) {
+       WParserInfo *tmp;
+       int reallen = ( PList.reallen ) ? 2*PList.reallen : 16;
+       tmp=(WParserInfo*)realloc(PList.list,sizeof(WParserInfo)*reallen);
+       if ( !tmp ) 
+           ts_error(ERROR,"No memory");
+       PList.reallen=reallen;
+       PList.list=tmp;
+   }
+   PList.last_prs=&(PList.list[PList.len]);
+   init_prs(id, PList.last_prs);
+   PList.len++;
+   qsort(PList.list, PList.len, sizeof(WParserInfo), compareprs);
+   return findprs(id); /* qsort changed order!! */;
+}
+
+static void *plan_name2id=NULL;
+
+Oid
+name2id_prs(text *name) {
+   Oid arg[1]={ TEXTOID };
+   bool isnull;
+   Datum pars[1]={ PointerGetDatum(name) };
+   int stat;
+   Oid id=findSNMap_t( &(PList.name2id_map), name );
+   
+   if ( id ) 
+       return id;
+   
+
+   SPI_connect();
+   if ( !plan_name2id ) {
+       plan_name2id = SPI_saveplan( SPI_prepare( "select oid from pg_ts_parser where prs_name = $1" , 1, arg ) );
+       if ( !plan_name2id ) 
+           ts_error(ERROR, "SPI_prepare() failed");
+   }
+
+   stat = SPI_execp(plan_name2id, pars, " ", 1);
+   if ( stat < 0 )
+       ts_error (ERROR, "SPI_execp return %d", stat);
+   if ( SPI_processed > 0 )
+       id=DatumGetObjectId( SPI_getbinval(SPI_tuptable->vals[0], SPI_tuptable->tupdesc, 1, &isnull) );
+   else 
+       ts_error(ERROR, "No parser '%s'", text2char(name));
+   SPI_finish();
+   addSNMap_t( &(PList.name2id_map), name, id );
+   return id;
+}
+
+
+/******sql-level interface******/
+typedef struct {
+   int     cur;
+   LexDescr    *list;
+} TypeStorage;
+
+static void
+setup_firstcall(FuncCallContext  *funcctx, Oid prsid) {
+   TupleDesc            tupdesc;
+   MemoryContext     oldcontext;
+   TypeStorage     *st;
+   WParserInfo *prs = findprs(prsid); 
+
+   oldcontext = MemoryContextSwitchTo(funcctx->multi_call_memory_ctx);
+
+   st=(TypeStorage*)palloc( sizeof(TypeStorage) );
+   st->cur=0;
+   st->list = (LexDescr*)DatumGetPointer(
+       OidFunctionCall1( prs->lextype, PointerGetDatum(prs->prs) )
+   );
+   funcctx->user_fctx = (void*)st;
+   tupdesc = RelationNameGetTupleDesc("tokentype");
+   funcctx->slot = TupleDescGetSlot(tupdesc);
+   funcctx->attinmeta = TupleDescGetAttInMetadata(tupdesc);
+   MemoryContextSwitchTo(oldcontext);
+}
+
+static Datum
+process_call(FuncCallContext  *funcctx) {
+   TypeStorage     *st;
+
+   st=(TypeStorage*)funcctx->user_fctx;
+   if (  st->list && st->list[st->cur].lexid ) {
+       Datum result;
+       char* values[3];
+       char    txtid[16];
+       HeapTuple    tuple;
+
+       values[0]=txtid;
+       sprintf(txtid,"%d",st->list[st->cur].lexid);
+       values[1]=st->list[st->cur].alias;
+       values[2]=st->list[st->cur].descr;
+
+       tuple = BuildTupleFromCStrings(funcctx->attinmeta, values);
+       result = TupleGetDatum(funcctx->slot, tuple);
+
+       pfree(values[1]);
+       pfree(values[2]);
+       st->cur++;
+       return result;
+   } else {
+       if ( st->list ) pfree(st->list);
+       pfree(st);
+   }
+   return (Datum)0;
+}
+
+PG_FUNCTION_INFO_V1(token_type);
+Datum token_type(PG_FUNCTION_ARGS);
+
+Datum
+token_type(PG_FUNCTION_ARGS) {
+   FuncCallContext  *funcctx;
+   Datum result;
+
+   if (SRF_IS_FIRSTCALL()) { 
+       funcctx = SRF_FIRSTCALL_INIT();
+       setup_firstcall(funcctx, PG_GETARG_OID(0) );
+   }
+
+   funcctx = SRF_PERCALL_SETUP();
+
+   if (  (result=process_call(funcctx)) != (Datum)0 )
+       SRF_RETURN_NEXT(funcctx, result);
+   SRF_RETURN_DONE(funcctx);
+}
+
+PG_FUNCTION_INFO_V1(token_type_byname);
+Datum token_type_byname(PG_FUNCTION_ARGS);
+Datum
+token_type_byname(PG_FUNCTION_ARGS) {
+   FuncCallContext  *funcctx;
+   Datum result;
+
+   if (SRF_IS_FIRSTCALL()) {
+       text *name = PG_GETARG_TEXT_P(0); 
+       funcctx = SRF_FIRSTCALL_INIT();
+       setup_firstcall(funcctx, name2id_prs( name ) );
+       PG_FREE_IF_COPY(name,0);
+   }
+
+   funcctx = SRF_PERCALL_SETUP();
+
+   if (  (result=process_call(funcctx)) != (Datum)0 )
+       SRF_RETURN_NEXT(funcctx, result);
+   SRF_RETURN_DONE(funcctx);
+}
+
+PG_FUNCTION_INFO_V1(token_type_current);
+Datum token_type_current(PG_FUNCTION_ARGS);
+Datum
+token_type_current(PG_FUNCTION_ARGS) {
+   FuncCallContext  *funcctx;
+   Datum result;
+
+   if (SRF_IS_FIRSTCALL()) {
+       funcctx = SRF_FIRSTCALL_INIT();
+       if ( current_parser_id==InvalidOid ) 
+           current_parser_id = name2id_prs( char2text("default") );
+       setup_firstcall(funcctx, current_parser_id );
+   }
+
+   funcctx = SRF_PERCALL_SETUP();
+
+   if (  (result=process_call(funcctx)) != (Datum)0 )
+       SRF_RETURN_NEXT(funcctx, result);
+   SRF_RETURN_DONE(funcctx);
+}
+
+
+PG_FUNCTION_INFO_V1(set_curprs);
+Datum set_curprs(PG_FUNCTION_ARGS);
+Datum
+set_curprs(PG_FUNCTION_ARGS) {
+        findprs(PG_GETARG_OID(0));
+        current_parser_id=PG_GETARG_OID(0);
+        PG_RETURN_VOID();
+}
+
+PG_FUNCTION_INFO_V1(set_curprs_byname);
+Datum set_curprs_byname(PG_FUNCTION_ARGS);
+Datum
+set_curprs_byname(PG_FUNCTION_ARGS) {
+        text *name=PG_GETARG_TEXT_P(0);
+    
+        DirectFunctionCall1(
+                set_curprs,
+                ObjectIdGetDatum( name2id_prs(name) )
+        );
+        PG_FREE_IF_COPY(name, 0);
+        PG_RETURN_VOID();
+}
+
+typedef struct {
+   int type;
+   char    *lexem;
+} LexemEntry;
+
+typedef struct {
+   int cur;
+   int len;
+   LexemEntry  *list;
+} PrsStorage;
+   
+
+static void
+prs_setup_firstcall(FuncCallContext  *funcctx, int prsid, text *txt) {
+   TupleDesc            tupdesc;
+   MemoryContext     oldcontext;
+   PrsStorage  *st;
+   WParserInfo *prs = findprs(prsid); 
+   char    *lex=NULL;
+   int     llen=0, type=0; 
+
+   oldcontext = MemoryContextSwitchTo(funcctx->multi_call_memory_ctx);
+
+   st=(PrsStorage*)palloc( sizeof(PrsStorage) );
+   st->cur=0;
+   st->len=16;
+   st->list=(LexemEntry*)palloc( sizeof(LexemEntry)*st->len );
+
+   prs->prs = (void*)DatumGetPointer(
+       FunctionCall2(
+           &(prs->start_info),
+           PointerGetDatum(VARDATA(txt)),
+           Int32GetDatum(VARSIZE(txt)-VARHDRSZ)
+       )
+   );
+
+   while( ( type=DatumGetInt32(FunctionCall3(
+           &(prs->getlexeme_info),
+           PointerGetDatum(prs->prs),
+           PointerGetDatum(&lex),
+           PointerGetDatum(&llen))) ) != 0 ) {
+
+       if ( st->cur>=st->len ) {
+           st->len=2*st->len;
+           st->list=(LexemEntry*)repalloc(st->list, sizeof(LexemEntry)*st->len);
+       }
+       st->list[st->cur].lexem = palloc(llen+1);
+       memcpy( st->list[st->cur].lexem, lex, llen);
+       st->list[st->cur].lexem[llen]='\0';
+       st->list[st->cur].type=type;
+       st->cur++;
+   }
+       
+   FunctionCall1(
+       &(prs->end_info),
+       PointerGetDatum(prs->prs)
+   );
+
+   st->len=st->cur;
+   st->cur=0;
+   
+   funcctx->user_fctx = (void*)st;
+   tupdesc = RelationNameGetTupleDesc("tokenout");
+   funcctx->slot = TupleDescGetSlot(tupdesc);
+   funcctx->attinmeta = TupleDescGetAttInMetadata(tupdesc);
+   MemoryContextSwitchTo(oldcontext);
+}
+
+static Datum
+prs_process_call(FuncCallContext  *funcctx) {
+   PrsStorage  *st;
+
+   st=(PrsStorage*)funcctx->user_fctx;
+   if (  st->cur < st->len ) {
+       Datum result;
+       char* values[2];
+       char    tid[16];
+       HeapTuple    tuple;
+
+       values[0]=tid;
+       sprintf(tid,"%d",st->list[st->cur].type);
+       values[1]=st->list[st->cur].lexem;
+       tuple = BuildTupleFromCStrings(funcctx->attinmeta, values);
+       result = TupleGetDatum(funcctx->slot, tuple);
+
+       pfree(values[1]);
+       st->cur++;
+       return result;
+   } else {
+       if ( st->list ) pfree(st->list);
+       pfree(st);
+   }
+   return (Datum)0;
+}
+
+           
+
+PG_FUNCTION_INFO_V1(parse);
+Datum parse(PG_FUNCTION_ARGS);
+Datum
+parse(PG_FUNCTION_ARGS) {
+   FuncCallContext  *funcctx;
+   Datum result;
+
+   if (SRF_IS_FIRSTCALL()) {
+       text *txt = PG_GETARG_TEXT_P(1); 
+       funcctx = SRF_FIRSTCALL_INIT();
+       prs_setup_firstcall(funcctx, PG_GETARG_OID(0),txt );
+       PG_FREE_IF_COPY(txt,1);
+   }
+
+   funcctx = SRF_PERCALL_SETUP();
+
+   if (  (result=prs_process_call(funcctx)) != (Datum)0 )
+       SRF_RETURN_NEXT(funcctx, result);
+   SRF_RETURN_DONE(funcctx);
+}
+
+PG_FUNCTION_INFO_V1(parse_byname);
+Datum parse_byname(PG_FUNCTION_ARGS);
+Datum
+parse_byname(PG_FUNCTION_ARGS) {
+   FuncCallContext  *funcctx;
+   Datum result;
+
+   if (SRF_IS_FIRSTCALL()) {
+       text *name = PG_GETARG_TEXT_P(0); 
+       text *txt = PG_GETARG_TEXT_P(1); 
+       funcctx = SRF_FIRSTCALL_INIT();
+       prs_setup_firstcall(funcctx, name2id_prs( name ),txt );
+       PG_FREE_IF_COPY(name,0);
+       PG_FREE_IF_COPY(txt,1);
+   }
+
+   funcctx = SRF_PERCALL_SETUP();
+
+   if (  (result=prs_process_call(funcctx)) != (Datum)0 )
+       SRF_RETURN_NEXT(funcctx, result);
+   SRF_RETURN_DONE(funcctx);
+}
+
+
+PG_FUNCTION_INFO_V1(parse_current);
+Datum parse_current(PG_FUNCTION_ARGS);
+Datum
+parse_current(PG_FUNCTION_ARGS) {
+   FuncCallContext  *funcctx;
+   Datum result;
+
+   if (SRF_IS_FIRSTCALL()) {
+       text *txt = PG_GETARG_TEXT_P(0); 
+       funcctx = SRF_FIRSTCALL_INIT();
+       if ( current_parser_id==InvalidOid ) 
+           current_parser_id = name2id_prs( char2text("default") );
+       prs_setup_firstcall(funcctx, current_parser_id,txt );
+       PG_FREE_IF_COPY(txt,0);
+   }
+
+   funcctx = SRF_PERCALL_SETUP();
+
+   if (  (result=prs_process_call(funcctx)) != (Datum)0 )
+       SRF_RETURN_NEXT(funcctx, result);
+   SRF_RETURN_DONE(funcctx);
+}
+
+PG_FUNCTION_INFO_V1(headline);
+Datum headline(PG_FUNCTION_ARGS);
+Datum
+headline(PG_FUNCTION_ARGS) {
+   TSCfgInfo *cfg=findcfg(PG_GETARG_OID(0));
+   text       *in = PG_GETARG_TEXT_P(1);
+   QUERYTYPE  *query = (QUERYTYPE *) DatumGetPointer(PG_DETOAST_DATUM(PG_GETARG_DATUM(2)));
+   text       *opt=( PG_NARGS()>3 && PG_GETARG_POINTER(3) ) ? PG_GETARG_TEXT_P(3) : NULL;
+   HLPRSTEXT   prs;
+   text *out;
+   WParserInfo *prsobj = findprs(cfg->prs_id);
+
+   memset(&prs,0,sizeof(HLPRSTEXT));
+   prs.lenwords = 32;
+   prs.words = (HLWORD *) palloc(sizeof(HLWORD) * prs.lenwords);
+   hlparsetext(cfg, &prs, query, VARDATA(in), VARSIZE(in) - VARHDRSZ);
+
+
+   FunctionCall3(
+       &(prsobj->headline_info),
+       PointerGetDatum(&prs),
+       PointerGetDatum(opt),
+       PointerGetDatum(query)
+   );
+
+   out = genhl(&prs);
+
+   PG_FREE_IF_COPY(in,1);
+   PG_FREE_IF_COPY(query,2);
+   if ( opt ) PG_FREE_IF_COPY(opt,3);
+   pfree(prs.words);
+   pfree(prs.startsel);
+   pfree(prs.stopsel);
+
+   PG_RETURN_POINTER(out);
+}
+
+
+PG_FUNCTION_INFO_V1(headline_byname);
+Datum headline_byname(PG_FUNCTION_ARGS);
+Datum
+headline_byname(PG_FUNCTION_ARGS) {
+   text *cfg=PG_GETARG_TEXT_P(0);
+
+   Datum out=DirectFunctionCall4(
+       headline,
+       ObjectIdGetDatum(name2id_cfg( cfg ) ),
+       PG_GETARG_DATUM(1),
+       PG_GETARG_DATUM(2),
+       ( PG_NARGS()>3 ) ? PG_GETARG_DATUM(3) : PointerGetDatum(NULL)
+   );
+
+   PG_FREE_IF_COPY(cfg,0);
+   PG_RETURN_DATUM(out);   
+}
+
+PG_FUNCTION_INFO_V1(headline_current);
+Datum headline_current(PG_FUNCTION_ARGS);
+Datum
+headline_current(PG_FUNCTION_ARGS) {
+   PG_RETURN_DATUM(DirectFunctionCall4(
+       headline,
+       ObjectIdGetDatum(get_currcfg()),
+       PG_GETARG_DATUM(0),
+       PG_GETARG_DATUM(1),
+       ( PG_NARGS()>2 ) ? PG_GETARG_DATUM(2) : PointerGetDatum(NULL)
+   ));
+}
+
+
+


diff --git a/contrib/tsearch2/wparser.h b/contrib/tsearch2/wparser.h

new file mode 100644 (file)

index 0000000..a8afc56


--- /dev/null
+++ b/contrib/tsearch2/wparser.h
@@ -0,0 +1,28 @@
+#ifndef __WPARSER_H__
+#define __WPARSER_H__
+#include "postgres.h"
+#include "fmgr.h"
+
+typedef struct {
+   Oid prs_id;
+   FmgrInfo start_info;
+   FmgrInfo getlexeme_info;
+   FmgrInfo end_info;
+   FmgrInfo headline_info;
+   Oid lextype;
+   void *prs;
+} WParserInfo;
+
+void init_prs(Oid id, WParserInfo *prs);
+WParserInfo* findprs(Oid id);
+Oid name2id_prs(text *name);
+void   reset_prs(void);
+
+
+typedef struct {
+   int lexid;
+   char    *alias;
+   char    *descr;
+} LexDescr;
+
+#endif


diff --git a/contrib/tsearch2/wparser_def.c b/contrib/tsearch2/wparser_def.c

new file mode 100644 (file)

index 0000000..eec8b03


--- /dev/null
+++ b/contrib/tsearch2/wparser_def.c
@@ -0,0 +1,291 @@
+/* 
+ * default word parser 
+ * Teodor Sigaev 
+ */
+#include 
+#include 
+#include 
+
+#include "postgres.h"
+#include "utils/builtins.h"
+
+#include "dict.h"
+#include "wparser.h"
+#include "common.h"
+#include "ts_cfg.h"
+#include "wordparser/parser.h"
+#include "wordparser/deflex.h"
+
+PG_FUNCTION_INFO_V1(prsd_lextype);
+Datum prsd_lextype(PG_FUNCTION_ARGS);
+
+Datum 
+prsd_lextype(PG_FUNCTION_ARGS) {
+   LexDescr *descr=(LexDescr*)palloc(sizeof(LexDescr)*(LASTNUM+1));
+   int i;
+
+   for(i=1;i<=LASTNUM;i++) {
+       descr[i-1].lexid = i;
+       descr[i-1].alias = pstrdup(tok_alias[i]);
+       descr[i-1].descr = pstrdup(lex_descr[i]);
+   }
+   
+   descr[LASTNUM].lexid=0;
+       
+   PG_RETURN_POINTER(descr);
+}
+
+PG_FUNCTION_INFO_V1(prsd_start);
+Datum prsd_start(PG_FUNCTION_ARGS);
+Datum 
+prsd_start(PG_FUNCTION_ARGS) {
+   start_parse_str( (char*)PG_GETARG_POINTER(0), PG_GETARG_INT32(1) );
+   PG_RETURN_POINTER(NULL);
+}
+
+PG_FUNCTION_INFO_V1(prsd_getlexeme);
+Datum prsd_getlexeme(PG_FUNCTION_ARGS);
+Datum 
+prsd_getlexeme(PG_FUNCTION_ARGS) {
+   /* ParserState *p=(ParserState*)PG_GETARG_POINTER(0); */
+   char **t=(char**)PG_GETARG_POINTER(1); 
+   int *tlen=(int*)PG_GETARG_POINTER(2);
+   int  type=tsearch2_yylex();
+
+   *t = token;
+   *tlen = tokenlen;
+   PG_RETURN_INT32(type);
+}
+
+PG_FUNCTION_INFO_V1(prsd_end);
+Datum prsd_end(PG_FUNCTION_ARGS);
+Datum 
+prsd_end(PG_FUNCTION_ARGS) {
+   /* ParserState *p=(ParserState*)PG_GETARG_POINTER(0); */
+   end_parse();
+   PG_RETURN_VOID();
+}
+
+#define LEAVETOKEN(x)  ( (x)==12 )
+#define COMPLEXTOKEN(x)    ( (x)==5 || (x)==15 || (x)==16 || (x)==17 )
+#define ENDPUNCTOKEN(x)    ( (x)==12 )
+
+
+#define IDIGNORE(x) ( (x)==13 || (x)==14 || (x)==12 || (x)==23 )
+#define HLIDIGNORE(x) ( (x)==5 || (x)==13 || (x)==15 || (x)==16 || (x)==17 )
+#define NONWORDTOKEN(x)    ( (x)==12 || HLIDIGNORE(x) )
+#define NOENDTOKEN(x)  ( NONWORDTOKEN(x) || (x)==7 || (x)==8 || (x)==20 || (x)==21 || (x)==22 || IDIGNORE(x) )
+
+typedef struct {
+   HLWORD  *words;
+   int len;
+} hlCheck;
+
+static bool
+checkcondition_HL(void *checkval, ITEM *val) {
+   int i;
+   for(i=0;i<((hlCheck*)checkval)->len;i++) {
+       if ( ((hlCheck*)checkval)->words[i].item==val )
+           return true;
+   }
+   return false;
+}
+
+
+static bool
+hlCover(HLPRSTEXT *prs, QUERYTYPE *query, int *p, int *q) {
+   int i,j;
+   ITEM    *item=GETQUERY(query);
+   int pos=*p;
+   *q=0;
+   *p=0x7fffffff;
+
+   for(j=0;jsize;j++) {
+       if ( item->type != VAL ) {
+           item++;
+           continue;
+       }
+       for(i=pos;icurwords;i++) {
+           if ( prs->words[i].item == item ) {
+               if ( i>*q) 
+                   *q = i;
+               break;
+           }
+       }
+       item++;
+   }
+
+   if ( *q==0 )
+       return false;
+
+   item=GETQUERY(query);
+   for(j=0;jsize;j++) {
+       if ( item->type != VAL ) {
+           item++;
+           continue;
+       }
+       for(i=*q;i>=pos;i--) {
+           if ( prs->words[i].item == item ) {
+               if ( i<*p )
+                   *p=i;
+               break;
+           }
+       }
+       item++;
+   }   
+
+   if ( *p<=*q ) {
+       hlCheck ch={ &(prs->words[*p]), *q-*p+1 };
+       if ( TS_execute(GETQUERY(query), &ch, false, checkcondition_HL) ) { 
+           return true;
+       } else {
+           (*p)++;
+           return hlCover(prs,query,p,q);
+       }
+   }
+
+   return false;
+}
+
+PG_FUNCTION_INFO_V1(prsd_headline);
+Datum prsd_headline(PG_FUNCTION_ARGS);
+Datum 
+prsd_headline(PG_FUNCTION_ARGS) {
+   HLPRSTEXT   *prs=(HLPRSTEXT*)PG_GETARG_POINTER(0);
+   text    *opt=(text*)PG_GETARG_POINTER(1); /* can't be toasted */
+   QUERYTYPE   *query=(QUERYTYPE*)PG_GETARG_POINTER(2); /* can't be toasted */
+   /* from opt + start and and tag */
+   int min_words=15;   
+   int max_words=35;   
+   int shortword=3;    
+
+   int p=0,q=0;
+   int bestb=-1,beste=-1;
+   int bestlen=-1;
+   int pose=0, poslen, curlen;
+
+   int i;
+
+   /*config*/
+   prs->startsel=NULL;
+   prs->stopsel=NULL;
+   if ( opt ) {
+       Map *map,*mptr;
+       
+       parse_cfgdict(opt,&map);
+       mptr=map;
+
+       while(mptr && mptr->key) {
+           if ( strcasecmp(mptr->key,"MaxWords")==0 )
+               max_words=pg_atoi(mptr->value,4,1);
+           else if ( strcasecmp(mptr->key,"MinWords")==0 )
+               min_words=pg_atoi(mptr->value,4,1);
+           else if ( strcasecmp(mptr->key,"ShortWord")==0 )
+               shortword=pg_atoi(mptr->value,4,1);
+           else if ( strcasecmp(mptr->key,"StartSel")==0 )
+               prs->startsel=pstrdup(mptr->value);
+           else if ( strcasecmp(mptr->key,"StopSel")==0 )
+               prs->stopsel=pstrdup(mptr->value);
+               
+           pfree(mptr->key);
+           pfree(mptr->value);
+
+           mptr++;
+       }
+       pfree(map);
+
+       if ( min_words >= max_words )
+           elog(ERROR,"Must be MinWords < MaxWords");
+       if ( min_words<=0 )
+           elog(ERROR,"Must be MinWords > 0");
+       if ( shortword<0 )
+           elog(ERROR,"Must be ShortWord >= 0");
+   }
+
+   while( hlCover(prs,query,&p,&q) ) {
+       /* find cover len in words */
+       curlen=0;
+       poslen=0;
+       for(i=p;i<=q && curlen < max_words ; i++) {
+           if ( !NONWORDTOKEN(prs->words[i].type) ) 
+               curlen++;
+           if ( prs->words[i].item && !prs->words[i].repeated )
+               poslen++; 
+           pose=i;
+       }
+
+       if ( poslenwords[beste].type) || prs->words[beste].len <= shortword) ) { 
+           /* best already finded, so try one more cover */
+           p++;
+           continue;
+       }
+
+       if ( curlen < max_words ) { /* find good end */
+           for(i=i-1 ;icurwords && curlen
+               if ( i!=q ) {
+                   if ( !NONWORDTOKEN(prs->words[i].type) ) 
+                       curlen++;
+                   if ( prs->words[i].item && !prs->words[i].repeated )
+                       poslen++;
+               }
+               pose=i;
+               if ( NOENDTOKEN(prs->words[i].type) || prs->words[i].len <= shortword ) 
+                   continue;
+               if ( curlen>=min_words )    
+                   break;
+           }
+       } else { /* shorter cover :((( */
+           for(;curlen>min_words;i--) {
+               if ( !NONWORDTOKEN(prs->words[i].type) ) 
+                   curlen--;
+               if ( prs->words[i].item && !prs->words[i].repeated )
+                   poslen--;
+               pose=i;
+               if ( NOENDTOKEN(prs->words[i].type) || prs->words[i].len <= shortword ) 
+                   continue;
+               break;
+           }
+       }
+
+       if ( bestlen <0 || (poslen>bestlen && !(NOENDTOKEN(prs->words[pose].type) || prs->words[pose].len <= shortword)) || 
+               ( bestlen>=0 && !(NOENDTOKEN(prs->words[pose].type)  || prs->words[pose].len <= shortword) && 
+                   (NOENDTOKEN(prs->words[beste].type) || prs->words[beste].len <= shortword) ) ) {
+           bestb=p; beste=pose;
+           bestlen=poslen;
+       } 
+
+       p++;
+   }
+
+   if ( bestlen<0 ) {
+       curlen=0;
+       poslen=0;
+       for(i=0;icurwords && curlen
+           if ( !NONWORDTOKEN(prs->words[i].type) ) 
+               curlen++;
+           pose=i;
+       }
+       bestb=0; beste=pose;
+   }
+
+   for(i=bestb;i<=beste;i++) {
+       if ( prs->words[i].item )
+           prs->words[i].selected=1;
+       if ( prs->words[i].repeated )
+           prs->words[i].skip=1;
+       if ( HLIDIGNORE(prs->words[i].type) )
+           prs->words[i].replace=1;
+
+       prs->words[i].in=1;
+   }
+
+   if (!prs->startsel)
+       prs->startsel=pstrdup("");

+   if (!prs->stopsel)
+       prs->stopsel=pstrdup("");
+        prs->startsellen=strlen(prs->startsel);
+   prs->stopsellen=strlen(prs->stopsel);
+
+   PG_RETURN_POINTER(prs);
+}
+




This is the main PostgreSQL git repository.
RSS
Atom}}}}
+>for₁♦_{12
+>well₁♦_{12
+>over₁♦_{12
+>100₂₂♦_{12
+>feet₁.₁₂
+
+Each word has been assigned type 1;
+each space (represented here by a diamond) and the period, type 12;
+and the number one hundred, type 22.
+We can retrieve the alias for each type
+through the token_type function:
+
+
+=# select * from token_type('default')

+     where tokid = 1 or tokid = 12 or tokid = 22
+ tokid | alias |      descr       
+-------+-------+------------------
+     1 | lword | Latin word
+    12 | blank | Space symbols
+    22 | uint  | Unsigned integer
+(3 rows)
+
+
+
+
+Next, the tokens are assigned to dictionaries
+by looking up their type aliases in pg_ts_cfgmap
+to determine which dictionary should process each token.
+Since we are using the 'default' configuration:
+
+
+=# select * from pg_ts_cfgmap where ts_name = 'default' and

+      (tok_alias = 'lword' or tok_alias = 'blank' or tok_alias = 'uint')
+ ts_name | tok_alias | dict_name 
+---------+-----------+-----------
+ default | lword     | {en_stem}
+ default | uint      | {simple}
+(2 rows)
+
+
+Since this map provides no dictionary for blank tokens,
+the spaces and period are simply discarded,
+leaving nine tokens,
+which are then numbered by their position:
+
+The¹
+walls²
+extend³
+upward⁴
+for⁵
+well⁶
+over⁷
+100⁸
+feet⁹
+
+
+Finally, the words are reduced to lexemes by their respective dictionaries.
+The 100 is submitted to the simple dictionary,
+which returns tokens unaltered except for making them lowercase:
+
+
+=# select lexize('simple', '100')
+ lexize 
+--------
+ {100}
+(1 row)
+
+
+The other words are submitted to en_stem
+which reduces each English word to a linguistic stem,
+and then discards stems which belong to its list of stop words;
+you can see the list of stop words
+in the file whose path is in the dict_initoption field
+of the pg_ts_dict table entry for en_stem.
+The first three words of our text illustrate respectively
+an en_stem stop word,
+a word which en_stem alters by stemming,
+and a word which en_stem leaves alone:
+
+
+=# select lexize('en_stem', 'The')
+ lexize 
+--------
+ {}
+(1 row)
+=# select lexize('en_stem', 'walls')
+ lexize 
+--------
+ {wall}
+(1 row)
+=# select lexize('en_stem', 'extend')
+  lexize  
+----------
+ {extend}
+(1 row)
+
+
+Once en_stem is done discarding stop words and stemming the rest,
+we are left with:
+
+wall²
+extend³
+upward⁴
+well⁶
+100⁸
+feet⁹
+
+Which is precisely the result of the example that began this section.
+
+Query words are stemmed by the to_tsquery() function
+using the same scheme to determine the dictionary for each token,
+with the difference that the query parser recognizes as special
+the boolean operators that separate query words.
+
+
+
+
+}

diff --git a/contrib/tsearch2/docs/tsearch2-ref.html b/contrib/tsearch2/docs/tsearch2-ref.html

new file mode 100644 (file)

index 0000000..df0faa4


--- /dev/null
+++ b/contrib/tsearch2/docs/tsearch2-ref.html
@@ -0,0 +1,448 @@
+
+
+
+
+tsearch2 reference
+
+
+The tsearch2 Reference
+
+
+Brandon Craig Rhodes
30 June 2003
+
+This Reference documents the user types and functions
+of the tsearch2 module for PostgreSQL.
+An introduction to the module is provided
+by the tsearch2 Guide,
+a companion document to this one.
+You can retrieve a beta copy of the tsearch2 module from the
+GiST for PostgreSQL
+page — look under the section entitled Development History
+for the current version.
+
+Vectors and Queries
+
+Vectors and queries both store lexemes,
+but for different purposes.
+A tsvector stores the lexemes
+of the words that are parsed out of a document,
+and can also remember the position of each word.
+A tsquery specifies a boolean condition among lexemes.
+
+Any of the following functions with a configuration argument
+can use either an integer id or textual ts_name
+to select a configuration;
+if the option is omitted, then the current configuration is used.
+For more information on the current configuration,
+read the next section on Configurations.
+
+Vector Operations
+
+
+
+ to_tsvector( [configuration,]

+ document TEXT) RETURNS tsvector
+
+ Parses a document into tokens,
+ reduces the tokens to lexemes,
+ and returns a tsvector which lists the lexemes
+ together with their positions in the document.
+ For the best description of this process,
+ see the section on Parsing and Stemming
+ in the accompanying tsearch2 Guide.
+
+ strip(vector tsvector) RETURNS tsvector
+
+ Return a vector which lists the same lexemes
+ as the given vector,
+ but which lacks any information
+ about where in the document each lexeme appeared.
+ While the returned vector is thus useless for relevance ranking,
+ it will usually be much smaller.
+
+ setweight(vector tsvector, letter) RETURNS tsvector
+
+ This function returns a copy of the input vector
+ in which every location has been labelled
+ with either the letter
+ 'A', 'B', or 'C',
+ or the default label 'D'
+ (which is the default with which new vectors are created,
+ and as such is usually not displayed).
+ These labels are retained when vectors are concatenated,
+ allowing words from different parts of a document
+ to be weighted differently by ranking functions.
+
+ vector1 || vector2
+
+ concat(vector1 tsvector, vector2 tsvector)

+ RETURNS tsvector
+
+ Returns a vector which combines the lexemes and position information
+ in the two vectors given as arguments.
+ Position weight labels (described in the previous paragraph)
+ are retained intact during the concatenation.
+ This has at least two uses.
+ First,
+ if some sections of your document
+ need be parsed with different configurations than others,
+ you can parse them separately
+ and concatenate the resulting vectors into one.
+ Second,
+ you can weight words from some sections of you document
+ more heavily than those from others by:
+ parsing the sections into separate vectors;
+ assigning the vectors different position labels
+ with the setweight() function;
+ concatenating them into a single vector;
+ and then providing a weights argument
+ to the rank() function
+ that assigns different weights to positions with different labels.
+
+ tsvector_size(vector tsvector) RETURNS INT4
+
+ Returns the number of lexemes stored in the vector.
+
+ text::tsvector RETURNS tsvector
+
+ Directly casting text to a tsvector
+ allows you to directly inject lexemes into a vector,
+ with whatever positions and position weights you choose to specify.
+ The text should be formatted
+ like the vector would be printed by the output of a SELECT.
+ See the Casting
+ section in the Guide for details.
+
+
+Query Operations
+
+
+
+ to_tsquery( [configuration,]

+ querytext text) RETURNS tsvector
+
+ Parses a query,
+ which should be single words separated by the boolean operators
+ “&” and,
+ “|” or,
+ and “!” not,
+ which can be grouped using parenthesis.
+ Each word is reduced to a lexeme using the current
+ or specified configuration.
+
+
+ querytree(query tsquery) RETURNS text
+
+ This might return a textual representation of the given query.
+
+ text::tsquery RETURNS tsquery
+
+ Directly casting text to a tsquery
+ allows you to directly inject lexemes into a query,
+ with whatever positions and position weight flags you choose to specify.
+ The text should be formatted
+ like the query would be printed by the output of a SELECT.
+ See the Casting
+ section in the Guide for details.
+
+
+Configurations
+
+A configuration specifies all of the equipment necessary
+to transform a document into a tsvector:
+the parser that breaks its text into tokens,
+and the dictionaries which then transform each token into a lexeme.
+Every call to to_tsvector() (described above)
+uses a configuration to perform its processing.
+Three configurations come with tsearch2:
+
+
+default — Indexes words and numbers,
+ using the en_stem English Snowball stemmer for Latin-alphabet words
+ and the simple dictionary for all others.
+default_russian — Indexes words and numbers,
+ using the en_stem English Snowball stemmer for Latin-alphabet words
+ and the ru_stem Russian Snowball dictionary for all others.
+simple — Processes both words and numbers
+ with the simple dictionary,
+ which neither discards any stop words nor alters them.
+
+
+The tsearch2 modules initially chooses your current configuration
+by looking for your current locale in the locale field
+of the pg_ts_cfg table described below.
+You can manipulate the current configuration yourself with these functions:
+
+
+
+ set_curcfg( id INT | ts_name TEXT

+  ) RETURNS VOID
+
+ Set the current configuration used by to_tsvector
+ and to_tsquery.
+
+ show_curcfg() RETURNS INT4
+
+ Returns the integer id of the current configuration.
+
+
+
+Each configuration is defined by a record in the pg_ts_cfg table:
+
+create table pg_ts_cfg (
+   id      int not  null primary key,
+   ts_name     text not null,
+   prs_name    text not null,
+   locale      text
+);
+
+The id and ts_name are unique values
+which identify the configuration;
+the prs_name specifies which parser the configuration uses.
+Once this parser has split document text into tokens,
+the type of each resulting token —
+or, more specifically, the type's lex_alias
+as specified in the parser's lexem_type() table —
+is searched for together with the configuration's ts_name
+in the pg_ts_cfgmap table:
+
+create table pg_ts_cfgmap (
+   ts_name     text not null,
+   lex_alias   text not null,
+   dict_name   text[],
+   primary key (ts_name,lex_alias)
+);
+
+Those tokens whose types are not listed are discarded.
+The remaining tokens are assigned integer positions,
+starting with 1 for the first token in the document,
+and turned into lexemes with the help of the dictionaries
+whose names are given in the dict_name array for their type.
+These dictionaries are tried in order,
+stopping either with the first one to return a lexeme for the token,
+or discarding the token if no dictionary returns a lexeme for it.
+
+Parsers
+
+Each parser is defined by a record in the pg_ts_parser table:
+
+create table pg_ts_parser (
+   prs_id      int not null primary key,
+   prs_name    text not null,
+   prs_start   oid not null,
+   prs_getlexem    oid not null,
+   prs_end     oid not null,
+   prs_headline    oid not null,
+   prs_lextype oid not null,
+   prs_comment text
+);
+
+The prs_id and prs_name uniquely identify the parser,
+while prs_comment usually describes its name and version
+for the reference of users.
+The other items identify the low-level functions
+which make the parser operate,
+and are only of interest to someone writing a parser of their own.
+
+The tsearch2 module comes with one parser named default
+which is suitable for parsing most plain text and HTML documents.
+
+Each parser argument below
+must designate a parser with either an integer prs_id
+or a textual prs_name;
+the current parser is used when this argument is omitted.
+
+
+
+ CREATE FUNCTION set_curprs(parser) RETURNS VOID
+
+ Selects a current parser
+ which will be used when any of the following functions
+ are called without a parser as an argument.
+
+ CREATE FUNCTION lexem_type(

+  [ parser ]
+  ) RETURNS SETOF lexemtype
+
+ Returns a table which defines and describes
+ each kind of token the parser may produce as output.
+ For each token type the table gives the lexid
+ which the parser will label each token of that type,
+ the alias which names the token type,
+ and a short description descr for the user to read.
+
+ CREATE FUNCTION parse(

+  [ parser, ] document TEXT
+  ) RETURNS SETOF lexemtype
+
+ Parses the given document and returns a series of records,
+ one for each token produced by parsing.
+ Each token includes a lexid giving its type
+ and a lexem which gives its content.
+
+
+Dictionaries
+
+Dictionaries take textual tokens as input,
+usually those produced by a parser,
+and return lexemes which are usually some reduced form of the token.
+Among the dictionaries which come installed with tsearch2 are:
+
+
+simple simply folds uppercase letters to lowercase
+ before returning the word.
+en_stem runs an English Snowball stemmer on each word
+ that attempts to reduce the various forms of a verb or noun
+ to a single recognizable form.
+ru_stem runs a Russian Snowball stemmer on each word.
+
+
+Each dictionary is defined by an entry in the pg_ts_dict table:
+
+CREATE TABLE pg_ts_dict (
+   dict_id     int not null primary key,
+   dict_name   text not null,
+   dict_init   oid,
+   dict_initoption text,
+   dict_lemmatize  oid not null,
+   dict_comment    text
+);
+
+The dict_id and dict_name
+serve as unique identifiers for the dictionary.
+The meaning of the dict_initoption varies among dictionaries,
+but for the built-in Snowball dictionaries
+it specifies a file from which stop words should be read.
+The dict_comment is a human-readable description of the dictionary.
+The other fields are internal function identifiers
+useful only to developers trying to implement their own dictionaries.
+
+The argument named dictionary
+in each of the following functions
+should be either an integer dict_id or a textual dict_name
+identifying which dictionary should be used for the operation;
+if omitted then the current dictionary is used.
+
+
+
+ CREATE FUNCTION set_curdict(dictionary) RETURNS VOID
+
+ Selects a current dictionary for use by functions
+ that do not select a dictionary explicitly.
+
+ CREATE FUNCTION lexize(

+ [ dictionary, ] word text)
+ RETURNS TEXT[]
+
+ Reduces a single word to a lexeme.
+ Note that lexemes are arrays of zero or more strings,
+ since in some languages there might be several base words
+ from which an inflected form could arise.
+
+
+Ranking
+
+Ranking attempts to measure how relevant documents are to particular queries
+by inspecting the number of times each search word appears in the document,
+and whether different search terms occur near each other.
+Note that this information is only available in unstripped vectors —
+ranking functions will only return a useful result
+for a tsvector which still has position information!
+
+Both of these ranking functions
+take an integer normalization option
+that specifies whether a document's length should impact its rank.
+This is often desirable,
+since a hundred-word document with five instances of a search word
+is probably more relevant than a thousand-word document with five instances.
+The option can have the values:
+
+
+0 (the default) ignores document length.
+1 divides the rank by the logarithm of the length.
+2 divides the rank by the length itself.
+
+
+The two ranking functions currently available are:
+
+
+
+ CREATE FUNCTION rank(

+  [ weights float4[], ]
+  vector tsvector, query tsquery,
+  [ normalization int4 ]

+  ) RETURNS float4
+
+ This is the ranking function from the old version of OpenFTS,
+ and offers the ability to weight word instances more heavily
+ depending on how you have classified them.
+ The weights specify how heavily to weight each category of word:
+ 
+>{D-weight, A-weight, B-weight, C-weight}
+ If no weights are provided, then these defaults are used:
+ {0.1, 0.2, 0.4, 1.0}
+ Often weights are used to mark words from special areas of the document,
+ like the title or an initial abstract,
+ and make them more or less important than words in the document body.
+
+ CREATE FUNCTION rank_cd(

+  [ K int4, ]
+  vector tsvector, query tsquery,
+  [ normalization int4 ]

+  ) RETURNS float4
+
+ This function computes the cover density ranking
+ for the given document vector and query,
+ as described in Clarke, Cormack, and Tudhope's
+ “
+>Relevance Ranking for One to Three Term Queries”
+ in the 1999 Information Processing and Management.
+ The value K is one of the values from their formula,
+ and defaults to K=4.
+ The examples in their paper K=16;
+ we can roughly describe the term
+ as stating how far apart two search terms can fall
+ before the formula begins penalizing them for lack of proximity.
+
+
+Headlines
+
+
+
+ CREATE FUNCTION headline(

+  [ id int4, | ts_name text, ]
+  document text, query tsquery,
+  [ options text ]

+  ) RETURNS text
+
+ Every form of the the headline() function
+ accepts a document along with a query,
+ and returns one or more ellipse-separated excerpts from the document
+ in which terms from the query are highlighted.
+ The configuration with which to parse the document
+ can be specified by either its id or ts_name;
+ if none is specified that the current configuration is used instead.
+ 
+ An options string if provided should be a comma-separated list
+ of one or more ‘option=value’ pairs.
+ The available options are:
+ 
+  StartSel, StopSel —
+   the strings with which query words appearing in the document
+   should be delimited to distinguish them from other excerpted words.
+  MaxWords, MinWords —
+   limits on the shortest and longest headlines you will accept.
+  ShortWord —
+   this prevents your headline from beginning or ending
+   with a word which has this many characters or less.
+   The default value of 3 should eliminate most English
+   conjunctions and articles.
+ 
+ Any unspecified options receive these defaults:
+ 
+StartSel=<b>, StopSel=</b>, MaxWords=35, MinWords=15, ShortWord=3
+ 
+
+
+
+


diff --git a/contrib/tsearch2/expected/tsearch2.out b/contrib/tsearch2/expected/tsearch2.out

new file mode 100644 (file)

index 0000000..a842c5b


--- /dev/null
+++ b/contrib/tsearch2/expected/tsearch2.out
@@ -0,0 +1,2055 @@
+--
+-- first, define the datatype.  Turn off echoing so that expected file
+-- does not depend on contents of seg.sql.
+--
+\set ECHO none
+psql:tsearch2.sql:13: NOTICE:  CREATE TABLE / PRIMARY KEY will create implicit index 'pg_ts_dict_pkey' for table 'pg_ts_dict'
+psql:tsearch2.sql:145: NOTICE:  CREATE TABLE / PRIMARY KEY will create implicit index 'pg_ts_parser_pkey' for table 'pg_ts_parser'
+psql:tsearch2.sql:244: NOTICE:  CREATE TABLE / PRIMARY KEY will create implicit index 'pg_ts_cfg_pkey' for table 'pg_ts_cfg'
+psql:tsearch2.sql:251: NOTICE:  CREATE TABLE / PRIMARY KEY will create implicit index 'pg_ts_cfgmap_pkey' for table 'pg_ts_cfgmap'
+psql:tsearch2.sql:339: NOTICE:  ProcedureCreate: type tsvector is not yet defined
+psql:tsearch2.sql:344: NOTICE:  Argument type "tsvector" is only a shell
+psql:tsearch2.sql:398: NOTICE:  ProcedureCreate: type tsquery is not yet defined
+psql:tsearch2.sql:403: NOTICE:  Argument type "tsquery" is only a shell
+psql:tsearch2.sql:545: NOTICE:  ProcedureCreate: type gtsvector is not yet defined
+psql:tsearch2.sql:550: NOTICE:  Argument type "gtsvector" is only a shell
+--tsvector
+SELECT '1'::tsvector;
+ tsvector 
+----------
+ '1'
+(1 row)
+
+SELECT '1 '::tsvector;
+ tsvector 
+----------
+ '1'
+(1 row)
+
+SELECT ' 1'::tsvector;
+ tsvector 
+----------
+ '1'
+(1 row)
+
+SELECT ' 1 '::tsvector;
+ tsvector 
+----------
+ '1'
+(1 row)
+
+SELECT '1 2'::tsvector;
+ tsvector 
+----------
+ '1' '2'
+(1 row)
+
+SELECT '\'1 2\''::tsvector;
+ tsvector 
+----------
+ '1 2'
+(1 row)
+
+SELECT '\'1 \\\'2\''::tsvector;
+ tsvector 
+----------
+ '1 \'2'
+(1 row)
+
+SELECT '\'1 \\\'2\'3'::tsvector;
+  tsvector   
+-------------
+ '3' '1 \'2'
+(1 row)
+
+SELECT '\'1 \\\'2\' 3'::tsvector;
+  tsvector   
+-------------
+ '3' '1 \'2'
+(1 row)
+
+SELECT '\'1 \\\'2\' \' 3\' 4 '::tsvector;
+     tsvector     
+------------------
+ '4' ' 3' '1 \'2'
+(1 row)
+
+select '\'w\':4A,3B,2C,1D,5 a:8';
+       ?column?        
+-----------------------
+ 'w':4A,3B,2C,1D,5 a:8
+(1 row)
+
+select 'a:3A b:2a'::tsvector || 'ba:1234 a:1B';
+          ?column?          
+----------------------------
+ 'a':3A,4B 'b':2A 'ba':1237
+(1 row)
+
+select setweight('w:12B w:13* w:12,5,6 a:1,3* a:3 w asd:1dc asd zxc:81,567,222A'::tsvector, 'c');
+                        setweight                         
+----------------------------------------------------------
+ 'a':1C,3C 'w':5C,6C,12C,13C 'asd':1C 'zxc':81C,222C,567C
+(1 row)
+
+select strip('w:12B w:13* w:12,5,6 a:1,3* a:3 w asd:1dc asd'::tsvector);
+     strip     
+---------------
+ 'a' 'w' 'asd'
+(1 row)
+
+--tsquery
+SELECT '1'::tsquery;
+ tsquery 
+---------
+ '1'
+(1 row)
+
+SELECT '1 '::tsquery;
+ tsquery 
+---------
+ '1'
+(1 row)
+
+SELECT ' 1'::tsquery;
+ tsquery 
+---------
+ '1'
+(1 row)
+
+SELECT ' 1 '::tsquery;
+ tsquery 
+---------
+ '1'
+(1 row)
+
+SELECT '\'1 2\''::tsquery;
+ tsquery 
+---------
+ '1 2'
+(1 row)
+
+SELECT '\'1 \\\'2\''::tsquery;
+ tsquery 
+---------
+ '1 \'2'
+(1 row)
+
+SELECT '!1'::tsquery;
+ tsquery 
+---------
+ !'1'
+(1 row)
+
+SELECT '1|2'::tsquery;
+  tsquery  
+-----------
+ '1' | '2'
+(1 row)
+
+SELECT '1|!2'::tsquery;
+  tsquery   
+------------
+ '1' | !'2'
+(1 row)
+
+SELECT '!1|2'::tsquery;
+  tsquery   
+------------
+ !'1' | '2'
+(1 row)
+
+SELECT '!1|!2'::tsquery;
+   tsquery   
+-------------
+ !'1' | !'2'
+(1 row)
+
+SELECT '!(!1|!2)'::tsquery;
+     tsquery      
+------------------
+ !( !'1' | !'2' )
+(1 row)
+
+SELECT '!(!1|2)'::tsquery;
+     tsquery     
+-----------------
+ !( !'1' | '2' )
+(1 row)
+
+SELECT '!(1|!2)'::tsquery;
+     tsquery     
+-----------------
+ !( '1' | !'2' )
+(1 row)
+
+SELECT '!(1|2)'::tsquery;
+    tsquery     
+----------------
+ !( '1' | '2' )
+(1 row)
+
+SELECT '1&2'::tsquery;
+  tsquery  
+-----------
+ '1' & '2'
+(1 row)
+
+SELECT '!1&2'::tsquery;
+  tsquery   
+------------
+ !'1' & '2'
+(1 row)
+
+SELECT '1&!2'::tsquery;
+  tsquery   
+------------
+ '1' & !'2'
+(1 row)
+
+SELECT '!1&!2'::tsquery;
+   tsquery   
+-------------
+ !'1' & !'2'
+(1 row)
+
+SELECT '(1&2)'::tsquery;
+  tsquery  
+-----------
+ '1' & '2'
+(1 row)
+
+SELECT '1&(2)'::tsquery;
+  tsquery  
+-----------
+ '1' & '2'
+(1 row)
+
+SELECT '!(1)&2'::tsquery;
+  tsquery   
+------------
+ !'1' & '2'
+(1 row)
+
+SELECT '!(1&2)'::tsquery;
+    tsquery     
+----------------
+ !( '1' & '2' )
+(1 row)
+
+SELECT '1|2&3'::tsquery;
+     tsquery     
+-----------------
+ '1' | '2' & '3'
+(1 row)
+
+SELECT '1|(2&3)'::tsquery;
+     tsquery     
+-----------------
+ '1' | '2' & '3'
+(1 row)
+
+SELECT '(1|2)&3'::tsquery;
+       tsquery       
+---------------------
+ ( '1' | '2' ) & '3'
+(1 row)
+
+SELECT '1|2&!3'::tsquery;
+     tsquery      
+------------------
+ '1' | '2' & !'3'
+(1 row)
+
+SELECT '1|!2&3'::tsquery;
+     tsquery      
+------------------
+ '1' | !'2' & '3'
+(1 row)
+
+SELECT '!1|2&3'::tsquery;
+     tsquery      
+------------------
+ !'1' | '2' & '3'
+(1 row)
+
+SELECT '!1|(2&3)'::tsquery;
+     tsquery      
+------------------
+ !'1' | '2' & '3'
+(1 row)
+
+SELECT '!(1|2)&3'::tsquery;
+       tsquery        
+----------------------
+ !( '1' | '2' ) & '3'
+(1 row)
+
+SELECT '(!1|2)&3'::tsquery;
+       tsquery        
+----------------------
+ ( !'1' | '2' ) & '3'
+(1 row)
+
+SELECT '1|(2|(4|(5|6)))'::tsquery;
+                 tsquery                 
+-----------------------------------------
+ '1' | ( '2' | ( '4' | ( '5' | '6' ) ) )
+(1 row)
+
+SELECT '1|2|4|5|6'::tsquery;
+                 tsquery                 
+-----------------------------------------
+ ( ( ( '1' | '2' ) | '4' ) | '5' ) | '6'
+(1 row)
+
+SELECT '1&(2&(4&(5&6)))'::tsquery;
+           tsquery           
+-----------------------------
+ '1' & '2' & '4' & '5' & '6'
+(1 row)
+
+SELECT '1&2&4&5&6'::tsquery;
+           tsquery           
+-----------------------------
+ '1' & '2' & '4' & '5' & '6'
+(1 row)
+
+SELECT '1&(2&(4&(5|6)))'::tsquery;
+             tsquery             
+---------------------------------
+ '1' & '2' & '4' & ( '5' | '6' )
+(1 row)
+
+SELECT '1&(2&(4&(5|!6)))'::tsquery;
+             tsquery              
+----------------------------------
+ '1' & '2' & '4' & ( '5' | !'6' )
+(1 row)
+
+SELECT '1&(\'2\'&(\' 4\'&(\\|5 | \'6 \\\' !|&\')))'::tsquery;
+                 tsquery                  
+------------------------------------------
+ '1' & '2' & ' 4' & ( '|5' | '6 \' !|&' )
+(1 row)
+
+SELECT '\'the wether\':dc & \' sKies \':BC & a:d b:a';
+                 ?column?                 
+------------------------------------------
+ 'the wether':dc & ' sKies ':BC & a:d b:a
+(1 row)
+
+select lexize('simple', 'ASD56 hsdkf');
+     lexize      
+-----------------
+ {"asd56 hsdkf"}
+(1 row)
+
+select lexize('en_stem', 'SKIES Problems identity');
+          lexize          
+--------------------------
+ {"skies problems ident"}
+(1 row)
+
+select * from token_type('default');
+ tokid |    alias     |               descr               
+-------+--------------+-----------------------------------
+     1 | lword        | Latin word
+     2 | nlword       | Non-latin word
+     3 | word         | Word
+     4 | email        | Email
+     5 | url          | URL
+     6 | host         | Host
+     7 | sfloat       | Scientific notation
+     8 | version      | VERSION
+     9 | part_hword   | Part of hyphenated word
+    10 | nlpart_hword | Non-latin part of hyphenated word
+    11 | lpart_hword  | Latin part of hyphenated word
+    12 | blank        | Space symbols
+    13 | tag          | HTML Tag
+    14 | http         | HTTP head
+    15 | hword        | Hyphenated word
+    16 | lhword       | Latin hyphenated word
+    17 | nlhword      | Non-latin hyphenated word
+    18 | uri          | URI
+    19 | file         | File or path name
+    20 | float        | Decimal notation
+    21 | int          | Signed integer
+    22 | uint         | Unsigned integer
+    23 | entity       | HTML Entity
+(23 rows)
+
+select * from parse('default', '345 [email protected] \' http://www.com/ http://aew.werc.ewr/?ad=qwe&dw 1aew.werc.ewr/?ad=qwe&dw 2aew.werc.ewr http://3aew.werc.ewr/?ad=qwe&dw http://4aew.werc.ewr http://5aew.werc.ewr:8100/?  ad=qwe&dw 6aew.werc.ewr:8100/?ad=qwe&dw 7aew.werc.ewr:8100/?ad=qwe&dw=%20%32 +4.0e-10 qwe qwe qwqwe 234.435 455 5.005 [email protected] qwe-wer asdf qwer jf sdjk ewr1> ewri2 ">
+/usr/local/fff /awdf/dwqe/4325 rewt/ewr wefjn /wqe-324/ewr gist.h gist.h.c gist.c. readline 4.2 4.2. 4.2, readline-4.2 readline-4.2. 234 
+ wow  < jqw <> qwerty');
+ tokid |                token                 
+-------+--------------------------------------
+    22 | 345
+    12 |  
+     4 | [email protected]
+    12 |  
+    12 | '
+    12 |  
+    14 | http://
+     6 | www.com
+    12 | /
+    12 |  
+    14 | http://
+     5 | aew.werc.ewr/?ad=qwe&dw
+     6 | aew.werc.ewr
+    18 | /?ad=qwe&dw
+    12 |  
+     5 | 1aew.werc.ewr/?ad=qwe&dw
+     6 | 1aew.werc.ewr
+    18 | /?ad=qwe&dw
+    12 |  
+     6 | 2aew.werc.ewr
+    12 |  
+    14 | http://
+     5 | 3aew.werc.ewr/?ad=qwe&dw
+     6 | 3aew.werc.ewr
+    18 | /?ad=qwe&dw
+    12 |  
+    14 | http://
+     6 | 4aew.werc.ewr
+    12 |  
+    14 | http://
+     5 | 5aew.werc.ewr:8100/?
+     6 | 5aew.werc.ewr
+    18 | :8100/?
+    12 |   
+     1 | ad
+    12 | =
+     1 | qwe
+    12 | &
+     1 | dw
+    12 |  
+     5 | 6aew.werc.ewr:8100/?ad=qwe&dw
+     6 | 6aew.werc.ewr
+    18 | :8100/?ad=qwe&dw
+    12 |  
+     5 | 7aew.werc.ewr:8100/?ad=qwe&dw=%20%32
+     6 | 7aew.werc.ewr
+    18 | :8100/?ad=qwe&dw=%20%32
+    12 |  
+     7 | +4.0e-10
+    12 |  
+     1 | qwe
+    12 |  
+     1 | qwe
+    12 |  
+     1 | qwqwe
+    12 |  
+    20 | 234.435
+    12 |  
+    22 | 455
+    12 |  
+    20 | 5.005
+    12 |  
+     4 | [email protected]
+    12 |  
+    16 | qwe-wer
+    11 | qwe
+    12 | -
+    11 | wer
+    12 |  
+     1 | asdf
+    12 |  
+    13 |  
+     1 | qwer
+    12 |  
+     1 | jf
+    12 |  
+     1 | sdjk
+    13 |  
+    12 |  
+     3 | ewr1
+    12 | >
+    12 |  
+     3 | ewri2
+    12 |  
+    13 |  
+    12 | 
+
+    19 | /usr/local/fff
+    12 |  
+    19 | /awdf/dwqe/4325
+    12 |  
+    19 | rewt/ewr
+    12 |  
+     1 | wefjn
+    12 |  
+    19 | /wqe-324/ewr
+    12 |  
+     6 | gist.h
+    12 |  
+     6 | gist.h.c
+    12 |  
+     6 | gist.c
+    12 | .
+    12 |  
+     1 | readline
+    12 |  
+    20 | 4.2
+    12 |  
+    20 | 4.2
+    12 | .
+    12 |  
+    20 | 4.2
+    12 | ,
+    12 |  
+    15 | readline-4
+    11 | readline
+    12 | -
+    20 | 4.2
+    12 |  
+    15 | readline-4
+    11 | readline
+    12 | -
+    20 | 4.2
+    12 | .
+    12 |  
+    22 | 234
+    12 |  
+
+    13 |  
+    12 |  
+     1 | wow
+    12 |   
+    12 | <
+    12 |  
+     1 | jqw
+    12 |  
+    12 | <
+    12 | >
+    12 |  
+     1 | qwerty
+(138 rows)
+
+SELECT to_tsvector('default', '345 [email protected] \' http://www.com/ http://aew.werc.ewr/?ad=qwe&dw 1aew.werc.ewr/?ad=qwe&dw 2aew.werc.ewr http://3aew.werc.ewr/?ad=qwe&dw http://4aew.werc.ewr http://5aew.werc.ewr:8100/?  ad=qwe&dw 6aew.werc.ewr:8100/?ad=qwe&dw 7aew.werc.ewr:8100/?ad=qwe&dw=%20%32 +4.0e-10 qwe qwe qwqwe 234.435 455 5.005 [email protected] qwe-wer asdf qwer jf sdjk ewr1> ewri2 ">
+/usr/local/fff /awdf/dwqe/4325 rewt/ewr wefjn /wqe-324/ewr gist.h gist.h.c gist.c. readline 4.2 4.2. 4.2, readline-4.2 readline-4.2. 234 
+ wow  < jqw <> qwerty');
+                                                                                                                                                                                                                                                                                                                                                                                                                                               to_tsvector                                                                                                                                                                                                                                                                                                                                                                                                                                                
+----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
+ 'ad':18 'dw':20 'jf':40 '234':62 '345':1 '4.2':53,54,55,58,61 '455':32 'jqw':64 'qwe':19,28,29,36 'wer':37 'wow':63 'asdf':38 'ewr1':42 'qwer':39 'sdjk':41 '5.005':33 'ewri2':43 'qwqwe':30 'wefjn':47 'gist.c':51 'gist.h':49 'qwerti':65 '234.435':31 ':8100/?':17 'qwe-wer':35 'readlin':52,57,60 'www.com':3 '+4.0e-10':27 'gist.h.c':50 'rewt/ewr':46 '[email protected]':2 'readline-4':56,59 '/?ad=qwe&dw':6,9,13 '/wqe-324/ewr':48 'aew.werc.ewr':5 '1aew.werc.ewr':8 '2aew.werc.ewr':10 '3aew.werc.ewr':12 '4aew.werc.ewr':14 '5aew.werc.ewr':16 '6aew.werc.ewr':22 '7aew.werc.ewr':25 '/usr/local/fff':44 '/awdf/dwqe/4325':45 ':8100/?ad=qwe&dw':23 '[email protected]':34 '5aew.werc.ewr:8100/?':15 ':8100/?ad=qwe&dw=%20%32':26 'aew.werc.ewr/?ad=qwe&dw':4 '1aew.werc.ewr/?ad=qwe&dw':7 '3aew.werc.ewr/?ad=qwe&dw':11 '6aew.werc.ewr:8100/?ad=qwe&dw':21 '7aew.werc.ewr:8100/?ad=qwe&dw=%20%32':24
+(1 row)
+
+SELECT length(to_tsvector('default', '345 qw'));
+ length 
+--------
+      2
+(1 row)
+
+SELECT length(to_tsvector('default', '345 [email protected] \' http://www.com/ http://aew.werc.ewr/?ad=qwe&dw 1aew.werc.ewr/?ad=qwe&dw 2aew.werc.ewr http://3aew.werc.ewr/?ad=qwe&dw http://4aew.werc.ewr http://5aew.werc.ewr:8100/?  ad=qwe&dw 6aew.werc.ewr:8100/?ad=qwe&dw 7aew.werc.ewr:8100/?ad=qwe&dw=%20%32 +4.0e-10 qwe qwe qwqwe 234.435 455 5.005 [email protected] qwe-wer asdf qwer jf sdjk ewr1> ewri2 ">
+/usr/local/fff /awdf/dwqe/4325 rewt/ewr wefjn /wqe-324/ewr gist.h gist.h.c gist.c. readline 4.2 4.2. 4.2, readline-4.2 readline-4.2. 234 
+ wow  < jqw <> qwerty'));
+ length 
+--------
+     53
+(1 row)
+
+select to_tsquery('default', 'qwe & sKies '); 
+  to_tsquery   
+---------------
+ 'qwe' & 'sky'
+(1 row)
+
+select to_tsquery('simple', 'qwe & sKies '); 
+   to_tsquery    
+-----------------
+ 'qwe' & 'skies'
+(1 row)
+
+select to_tsquery('default', '\'the wether\':dc & \'           sKies \':BC ');
+       to_tsquery       
+------------------------
+ 'wether':CD & 'sky':BC
+(1 row)
+
+select 'a b:89  ca:23A,64b d:34c'::tsvector @@ 'd:AC & ca';
+ ?column? 
+----------
+ t
+(1 row)
+
+select 'a b:89  ca:23A,64b d:34c'::tsvector @@ 'd:AC & ca:B';
+ ?column? 
+----------
+ t
+(1 row)
+
+select 'a b:89  ca:23A,64b d:34c'::tsvector @@ 'd:AC & ca:A';
+ ?column? 
+----------
+ t
+(1 row)
+
+select 'a b:89  ca:23A,64b d:34c'::tsvector @@ 'd:AC & ca:C';
+ ?column? 
+----------
+ f
+(1 row)
+
+select 'a b:89  ca:23A,64b d:34c'::tsvector @@ 'd:AC & ca:CB';
+ ?column? 
+----------
+ t
+(1 row)
+
+CREATE TABLE test_tsvector( t text, a tsvector );
+\copy test_tsvector from 'data/test_tsearch.data'
+SELECT count(*) FROM test_tsvector WHERE a @@ 'wr|qh';
+ count 
+-------
+   158
+(1 row)
+
+SELECT count(*) FROM test_tsvector WHERE a @@ 'wr&qh';
+ count 
+-------
+    17
+(1 row)
+
+SELECT count(*) FROM test_tsvector WHERE a @@ 'eq&yt';
+ count 
+-------
+     6
+(1 row)
+
+SELECT count(*) FROM test_tsvector WHERE a @@ 'eq|yt';
+ count 
+-------
+    98
+(1 row)
+
+SELECT count(*) FROM test_tsvector WHERE a @@ '(eq&yt)|(wr&qh)';
+ count 
+-------
+    23
+(1 row)
+
+SELECT count(*) FROM test_tsvector WHERE a @@ '(eq|yt)&(wr|qh)';
+ count 
+-------
+    39
+(1 row)
+
+create index wowidx on test_tsvector using gist (a);
+set enable_seqscan=off;
+SELECT count(*) FROM test_tsvector WHERE a @@ 'wr|qh';
+ count 
+-------
+   158
+(1 row)
+
+SELECT count(*) FROM test_tsvector WHERE a @@ 'wr&qh';
+ count 
+-------
+    17
+(1 row)
+
+SELECT count(*) FROM test_tsvector WHERE a @@ 'eq&yt';
+ count 
+-------
+     6
+(1 row)
+
+SELECT count(*) FROM test_tsvector WHERE a @@ 'eq|yt';
+ count 
+-------
+    98
+(1 row)
+
+SELECT count(*) FROM test_tsvector WHERE a @@ '(eq&yt)|(wr&qh)';
+ count 
+-------
+    23
+(1 row)
+
+SELECT count(*) FROM test_tsvector WHERE a @@ '(eq|yt)&(wr|qh)';
+ count 
+-------
+    39
+(1 row)
+
+select set_curcfg('default');
+ set_curcfg 
+------------
+ 
+(1 row)
+
+CREATE TRIGGER tsvectorupdate
+BEFORE UPDATE OR INSERT ON test_tsvector
+FOR EACH ROW EXECUTE PROCEDURE tsearch2(a, t);
+SELECT count(*) FROM test_tsvector WHERE a @@ to_tsquery('345&qwerty');
+ count 
+-------
+     0
+(1 row)
+
+INSERT INTO test_tsvector (t) VALUES ('345 qwerty');
+SELECT count(*) FROM test_tsvector WHERE a @@ to_tsquery('345&qwerty');
+ count 
+-------
+     1
+(1 row)
+
+UPDATE test_tsvector SET t = null WHERE t = '345 qwerty';
+SELECT count(*) FROM test_tsvector WHERE a @@ to_tsquery('345&qwerty');
+ count 
+-------
+     0
+(1 row)
+
+drop trigger tsvectorupdate on test_tsvector;
+create function wow(text) returns text as 'select $1 || \' copyright\'; ' language sql;
+create trigger tsvectorupdate before update or insert on test_tsvector
+for each row execute procedure tsearch2(a, wow, t);
+insert into test_tsvector (t) values ('345 qwerty');
+select count(*) FROM test_tsvector WHERE a @@ to_tsquery('345&qwerty');
+ count 
+-------
+     1
+(1 row)
+
+select count(*) FROM test_tsvector WHERE a @@ to_tsquery('copyright');
+ count 
+-------
+     1
+(1 row)
+
+select rank(' a:1 s:2C d g'::tsvector, 'a | s');
+ rank 
+------
+ 0.28
+(1 row)
+
+select rank(' a:1 s:2B d g'::tsvector, 'a | s');
+ rank 
+------
+ 0.46
+(1 row)
+
+select rank(' a:1 s:2 d g'::tsvector, 'a | s');
+ rank 
+------
+ 0.19
+(1 row)
+
+select rank(' a:1 s:2C d g'::tsvector, 'a & s');
+   rank   
+----------
+ 0.140153
+(1 row)
+
+select rank(' a:1 s:2B d g'::tsvector, 'a & s');
+   rank   
+----------
+ 0.198206
+(1 row)
+
+select rank(' a:1 s:2 d g'::tsvector, 'a & s');
+   rank    
+-----------
+ 0.0991032
+(1 row)
+
+insert into test_tsvector (t) values ('foo bar foo the over foo qq bar');
+select * from stat('select a from test_tsvector') order by ndoc desc, nentry desc, word;
+   word    | ndoc | nentry 
+-----------+------+--------
+ qq        |  109 |    109
+ qt        |  102 |    102
+ qe        |  100 |    100
+ qh        |   98 |     98
+ qw        |   98 |     98
+ qa        |   97 |     97
+ ql        |   94 |     94
+ qs        |   94 |     94
+ qi        |   92 |     92
+ qr        |   92 |     92
+ qj        |   91 |     91
+ qd        |   87 |     87
+ qz        |   87 |     87
+ qc        |   86 |     86
+ qn        |   86 |     86
+ qv        |   85 |     85
+ qo        |   84 |     84
+ qy        |   84 |     84
+ wp        |   84 |     84
+ qf        |   81 |     81
+ qk        |   80 |     80
+ wt        |   80 |     80
+ qu        |   79 |     79
+ qg        |   78 |     78
+ wb        |   78 |     78
+ qx        |   77 |     77
+ wr        |   77 |     77
+ ws        |   73 |     73
+ wy        |   73 |     73
+ wa        |   72 |     72
+ wf        |   70 |     70
+ wg        |   70 |     70
+ wi        |   70 |     70
+ wu        |   70 |     70
+ wc        |   69 |     69
+ wj        |   69 |     69
+ qp        |   68 |     68
+ wh        |   68 |     68
+ wv        |   68 |     68
+ qb        |   66 |     66
+ eu        |   65 |     65
+ we        |   65 |     65
+ wl        |   65 |     65
+ wq        |   65 |     65
+ wk        |   64 |     64
+ ee        |   63 |     63
+ eo        |   63 |     63
+ qm        |   63 |     63
+ wn        |   63 |     63
+ ef        |   62 |     62
+ eh        |   62 |     62
+ ex        |   62 |     62
+ re        |   62 |     62
+ rl        |   62 |     62
+ rr        |   62 |     62
+ eb        |   61 |     61
+ ek        |   61 |     61
+ ww        |   61 |     61
+ ea        |   60 |     60
+ ei        |   60 |     60
+ em        |   60 |     60
+ eq        |   60 |     60
+ ew        |   60 |     60
+ ro        |   60 |     60
+ rw        |   60 |     60
+ tl        |   60 |     60
+ eg        |   59 |     59
+ en        |   59 |     59
+ ez        |   59 |     59
+ rj        |   59 |     59
+ ry        |   59 |     59
+ tw        |   59 |     59
+ tx        |   59 |     59
+ ej        |   58 |     58
+ es        |   58 |     58
+ ra        |   58 |     58
+ rd        |   58 |     58
+ rg        |   58 |     58
+ rx        |   58 |     58
+ tb        |   58 |     58
+ wd        |   58 |     58
+ ed        |   57 |     57
+ tc        |   57 |     57
+ wx        |   57 |     57
+ er        |   56 |     56
+ wm        |   56 |     56
+ wo        |   56 |     56
+ yw        |   56 |     56
+ ep        |   55 |     55
+ rk        |   55 |     55
+ rp        |   55 |     55
+ rz        |   55 |     55
+ ta        |   55 |     55
+ rq        |   54 |     54
+ yn        |   54 |     54
+ ec        |   53 |     53
+ el        |   53 |     53
+ ru        |   53 |     53
+ rv        |   53 |     53
+ tz        |   53 |     53
+ un        |   53 |     53
+ wz        |   53 |     53
+ ys        |   53 |     53
+ oe        |   52 |     52
+ tn        |   52 |     52
+ tq        |   52 |     52
+ ty        |   52 |     52
+ uq        |   52 |     52
+ yg        |   52 |     52
+ ym        |   52 |     52
+ oi        |   51 |     51
+ to        |   51 |     51
+ yi        |   51 |     51
+ pn        |   50 |     50
+ rb        |   50 |     50
+ ri        |   50 |     50
+ rn        |   50 |     50
+ ti        |   50 |     50
+ tv        |   50 |     50
+ um        |   50 |     50
+ ut        |   50 |     50
+ ya        |   50 |     50
+ et        |   49 |     49
+ ix        |   49 |     49
+ ox        |   49 |     49
+ q3        |   49 |     49
+ yf        |   49 |     49
+ yl        |   49 |     49
+ yo        |   49 |     49
+ yr        |   49 |     49
+ ev        |   48 |     48
+ ey        |   48 |     48
+ ot        |   48 |     48
+ rc        |   48 |     48
+ rm        |   48 |     48
+ th        |   48 |     48
+ uo        |   48 |     48
+ ia        |   47 |     47
+ q1        |   47 |     47
+ rh        |   47 |     47
+ yq        |   47 |     47
+ yz        |   47 |     47
+ av        |   46 |     46
+ im        |   46 |     46
+ os        |   46 |     46
+ tk        |   46 |     46
+ yy        |   46 |     46
+ ir        |   45 |     45
+ iv        |   45 |     45
+ iw        |   45 |     45
+ oj        |   45 |     45
+ pl        |   45 |     45
+ pv        |   45 |     45
+ te        |   45 |     45
+ tu        |   45 |     45
+ uv        |   45 |     45
+ ux        |   45 |     45
+ yd        |   45 |     45
+ yx        |   45 |     45
+ ij        |   44 |     44
+ pa        |   44 |     44
+ se        |   44 |     44
+ tg        |   44 |     44
+ ue        |   44 |     44
+ yb        |   44 |     44
+ yt        |   44 |     44
+ if        |   43 |     43
+ ik        |   43 |     43
+ in        |   43 |     43
+ ph        |   43 |     43
+ pj        |   43 |     43
+ q5        |   43 |     43
+ rt        |   43 |     43
+ ub        |   43 |     43
+ ud        |   43 |     43
+ uh        |   43 |     43
+ uj        |   43 |     43
+ w7        |   43 |     43
+ ye        |   43 |     43
+ yv        |   43 |     43
+ db        |   42 |     42
+ do        |   42 |     42
+ id        |   42 |     42
+ ie        |   42 |     42
+ ii        |   42 |     42
+ of        |   42 |     42
+ pr        |   42 |     42
+ q4        |   42 |     42
+ rf        |   42 |     42
+ td        |   42 |     42
+ uk        |   42 |     42
+ up        |   42 |     42
+ yh        |   42 |     42
+ yk        |   42 |     42
+ io        |   41 |     41
+ it        |   41 |     41
+ pb        |   41 |     41
+ q0        |   41 |     41
+ q7        |   41 |     41
+ rs        |   41 |     41
+ tj        |   41 |     41
+ ur        |   41 |     41
+ ig        |   40 |     40
+ iu        |   40 |     40
+ iy        |   40 |     40
+ od        |   40 |     40
+ q6        |   40 |     40
+ tt        |   40 |     40
+ ug        |   40 |     40
+ ul        |   40 |     40
+ us        |   40 |     40
+ uu        |   40 |     40
+ uz        |   40 |     40
+ ah        |   39 |     39
+ ar        |   39 |     39
+ as        |   39 |     39
+ dl        |   39 |     39
+ dt        |   39 |     39
+ hk        |   39 |     39
+ iq        |   39 |     39
+ is        |   39 |     39
+ oc        |   39 |     39
+ ov        |   39 |     39
+ oy        |   39 |     39
+ uf        |   39 |     39
+ ui        |   39 |     39
+ aa        |   38 |     38
+ ad        |   38 |     38
+ fh        |   38 |     38
+ gm        |   38 |     38
+ ic        |   38 |     38
+ jd        |   38 |     38
+ om        |   38 |     38
+ or        |   38 |     38
+ oz        |   38 |     38
+ pm        |   38 |     38
+ q8        |   38 |     38
+ sf        |   38 |     38
+ sm        |   38 |     38
+ sv        |   38 |     38
+ uc        |   38 |     38
+ ak        |   37 |     37
+ aq        |   37 |     37
+ di        |   37 |     37
+ e4        |   37 |     37
+ fi        |   37 |     37
+ fx        |   37 |     37
+ ha        |   37 |     37
+ hp        |   37 |     37
+ ih        |   37 |     37
+ og        |   37 |     37
+ po        |   37 |     37
+ pw        |   37 |     37
+ sn        |   37 |     37
+ su        |   37 |     37
+ sw        |   37 |     37
+ w6        |   37 |     37
+ yj        |   37 |     37
+ yu        |   37 |     37
+ ag        |   36 |     36
+ am        |   36 |     36
+ at        |   36 |     36
+ e1        |   36 |     36
+ ff        |   36 |     36
+ gx        |   36 |     36
+ he        |   36 |     36
+ hj        |   36 |     36
+ ib        |   36 |     36
+ iz        |   36 |     36
+ lm        |   36 |     36
+ ok        |   36 |     36
+ pk        |   36 |     36
+ pp        |   36 |     36
+ pu        |   36 |     36
+ sp        |   36 |     36
+ tf        |   36 |     36
+ tm        |   36 |     36
+ ay        |   35 |     35
+ dy        |   35 |     35
+ fu        |   35 |     35
+ ku        |   35 |     35
+ lh        |   35 |     35
+ lq        |   35 |     35
+ o6        |   35 |     35
+ ob        |   35 |     35
+ on        |   35 |     35
+ op        |   35 |     35
+ pd        |   35 |     35
+ ps        |   35 |     35
+ si        |   35 |     35
+ sl        |   35 |     35
+ sx        |   35 |     35
+ tp        |   35 |     35
+ tr        |   35 |     35
+ w3        |   35 |     35
+ y1        |   35 |     35
+ al        |   34 |     34
+ ap        |   34 |     34
+ az        |   34 |     34
+ dc        |   34 |     34
+ dd        |   34 |     34
+ dz        |   34 |     34
+ e0        |   34 |     34
+ fj        |   34 |     34
+ fp        |   34 |     34
+ gd        |   34 |     34
+ gg        |   34 |     34
+ gk        |   34 |     34
+ go        |   34 |     34
+ ho        |   34 |     34
+ jc        |   34 |     34
+ oa        |   34 |     34
+ oh        |   34 |     34
+ oo        |   34 |     34
+ pe        |   34 |     34
+ px        |   34 |     34
+ sd        |   34 |     34
+ sq        |   34 |     34
+ sy        |   34 |     34
+ ab        |   33 |     33
+ ae        |   33 |     33
+ af        |   33 |     33
+ aw        |   33 |     33
+ e5        |   33 |     33
+ fk        |   33 |     33
+ gu        |   33 |     33
+ gy        |   33 |     33
+ hb        |   33 |     33
+ hm        |   33 |     33
+ hy        |   33 |     33
+ jl        |   33 |     33
+ jr        |   33 |     33
+ ls        |   33 |     33
+ oq        |   33 |     33
+ pt        |   33 |     33
+ sa        |   33 |     33
+ sh        |   33 |     33
+ sj        |   33 |     33
+ so        |   33 |     33
+ sz        |   33 |     33
+ t7        |   33 |     33
+ uw        |   33 |     33
+ w8        |   33 |     33
+ y0        |   33 |     33
+ yp        |   33 |     33
+ dh        |   32 |     32
+ dp        |   32 |     32
+ dq        |   32 |     32
+ e7        |   32 |     32
+ fn        |   32 |     32
+ fo        |   32 |     32
+ fr        |   32 |     32
+ ga        |   32 |     32
+ gq        |   32 |     32
+ hh        |   32 |     32
+ il        |   32 |     32
+ ip        |   32 |     32
+ jv        |   32 |     32
+ lc        |   32 |     32
+ ol        |   32 |     32
+ pc        |   32 |     32
+ q9        |   32 |     32
+ ds        |   31 |     31
+ e9        |   31 |     31
+ fd        |   31 |     31
+ fe        |   31 |     31
+ ft        |   31 |     31
+ gs        |   31 |     31
+ hl        |   31 |     31
+ hs        |   31 |     31
+ jb        |   31 |     31
+ kc        |   31 |     31
+ kw        |   31 |     31
+ mj        |   31 |     31
+ q2        |   31 |     31
+ r3        |   31 |     31
+ sb        |   31 |     31
+ sk        |   31 |     31
+ ts        |   31 |     31
+ ua        |   31 |     31
+ yc        |   31 |     31
+ zw        |   31 |     31
+ ao        |   30 |     30
+ du        |   30 |     30
+ fw        |   30 |     30
+ gj        |   30 |     30
+ hu        |   30 |     30
+ kh        |   30 |     30
+ kl        |   30 |     30
+ kv        |   30 |     30
+ ld        |   30 |     30
+ lf        |   30 |     30
+ pq        |   30 |     30
+ py        |   30 |     30
+ sc        |   30 |     30
+ sr        |   30 |     30
+ uy        |   30 |     30
+ vg        |   30 |     30
+ w2        |   30 |     30
+ xg        |   30 |     30
+ xo        |   30 |     30
+ au        |   29 |     29
+ cx        |   29 |     29
+ fv        |   29 |     29
+ gh        |   29 |     29
+ gl        |   29 |     29
+ gt        |   29 |     29
+ hw        |   29 |     29
+ ji        |   29 |     29
+ km        |   29 |     29
+ la        |   29 |     29
+ ou        |   29 |     29
+ r0        |   29 |     29
+ w0        |   29 |     29
+ y9        |   29 |     29
+ zm        |   29 |     29
+ zs        |   29 |     29
+ zy        |   29 |     29
+ ax        |   28 |     28
+ cd        |   28 |     28
+ dj        |   28 |     28
+ dn        |   28 |     28
+ dr        |   28 |     28
+ ht        |   28 |     28
+ jf        |   28 |     28
+ lo        |   28 |     28
+ lr        |   28 |     28
+ na        |   28 |     28
+ ng        |   28 |     28
+ r8        |   28 |     28
+ ss        |   28 |     28
+ xt        |   28 |     28
+ y6        |   28 |     28
+ aj        |   27 |     27
+ ca        |   27 |     27
+ cg        |   27 |     27
+ df        |   27 |     27
+ dg        |   27 |     27
+ dv        |   27 |     27
+ gc        |   27 |     27
+ gn        |   27 |     27
+ gr        |   27 |     27
+ hd        |   27 |     27
+ i8        |   27 |     27
+ jn        |   27 |     27
+ jt        |   27 |     27
+ lp        |   27 |     27
+ o9        |   27 |     27
+ ow        |   27 |     27
+ r9        |   27 |     27
+ t8        |   27 |     27
+ u5        |   27 |     27
+ w4        |   27 |     27
+ xm        |   27 |     27
+ zz        |   27 |     27
+ a2        |   26 |     26
+ ac        |   26 |     26
+ ai        |   26 |     26
+ cm        |   26 |     26
+ cu        |   26 |     26
+ cw        |   26 |     26
+ dk        |   26 |     26
+ e2        |   26 |     26
+ fc        |   26 |     26
+ fg        |   26 |     26
+ fl        |   26 |     26
+ fs        |   26 |     26
+ ge        |   26 |     26
+ gv        |   26 |     26
+ hc        |   26 |     26
+ hi        |   26 |     26
+ hx        |   26 |     26
+ jj        |   26 |     26
+ jm        |   26 |     26
+ kg        |   26 |     26
+ kk        |   26 |     26
+ kn        |   26 |     26
+ ko        |   26 |     26
+ kt        |   26 |     26
+ ln        |   26 |     26
+ mx        |   26 |     26
+ pg        |   26 |     26
+ r4        |   26 |     26
+ t6        |   26 |     26
+ u1        |   26 |     26
+ u4        |   26 |     26
+ vi        |   26 |     26
+ vr        |   26 |     26
+ w1        |   26 |     26
+ w9        |   26 |     26
+ xk        |   26 |     26
+ xs        |   26 |     26
+ zf        |   26 |     26
+ bb        |   25 |     25
+ dm        |   25 |     25
+ dw        |   25 |     25
+ e8        |   25 |     25
+ fb        |   25 |     25
+ gw        |   25 |     25
+ h8        |   25 |     25
+ hf        |   25 |     25
+ hg        |   25 |     25
+ hn        |   25 |     25
+ hv        |   25 |     25
+ i0        |   25 |     25
+ i3        |   25 |     25
+ jg        |   25 |     25
+ jo        |   25 |     25
+ jx        |   25 |     25
+ kq        |   25 |     25
+ lw        |   25 |     25
+ lx        |   25 |     25
+ o3        |   25 |     25
+ p7        |   25 |     25
+ pf        |   25 |     25
+ pi        |   25 |     25
+ pz        |   25 |     25
+ r2        |   25 |     25
+ r5        |   25 |     25
+ t9        |   25 |     25
+ u7        |   25 |     25
+ ve        |   25 |     25
+ vu        |   25 |     25
+ y5        |   25 |     25
+ y8        |   25 |     25
+ zt        |   25 |     25
+ an        |   24 |     24
+ bj        |   24 |     24
+ dx        |   24 |     24
+ fm        |   24 |     24
+ fz        |   24 |     24
+ gb        |   24 |     24
+ gi        |   24 |     24
+ gp        |   24 |     24
+ hr        |   24 |     24
+ hz        |   24 |     24
+ i5        |   24 |     24
+ jq        |   24 |     24
+ kb        |   24 |     24
+ ke        |   24 |     24
+ kf        |   24 |     24
+ kp        |   24 |     24
+ lv        |   24 |     24
+ lz        |   24 |     24
+ o8        |   24 |     24
+ r1        |   24 |     24
+ s7        |   24 |     24
+ sg        |   24 |     24
+ u3        |   24 |     24
+ vj        |   24 |     24
+ vt        |   24 |     24
+ w5        |   24 |     24
+ zj        |   24 |     24
+ be        |   23 |     23
+ bi        |   23 |     23
+ bn        |   23 |     23
+ cn        |   23 |     23
+ cy        |   23 |     23
+ da        |   23 |     23
+ e6        |   23 |     23
+ fa        |   23 |     23
+ js        |   23 |     23
+ ki        |   23 |     23
+ kz        |   23 |     23
+ li        |   23 |     23
+ mt        |   23 |     23
+ mz        |   23 |     23
+ nu        |   23 |     23
+ o2        |   23 |     23
+ p5        |   23 |     23
+ p8        |   23 |     23
+ r7        |   23 |     23
+ t0        |   23 |     23
+ t1        |   23 |     23
+ t3        |   23 |     23
+ vm        |   23 |     23
+ xh        |   23 |     23
+ xx        |   23 |     23
+ zp        |   23 |     23
+ zr        |   23 |     23
+ a3        |   22 |     22
+ bg        |   22 |     22
+ de        |   22 |     22
+ e3        |   22 |     22
+ fq        |   22 |     22
+ i2        |   22 |     22
+ i7        |   22 |     22
+ ja        |   22 |     22
+ jk        |   22 |     22
+ jy        |   22 |     22
+ kr        |   22 |     22
+ kx        |   22 |     22
+ ly        |   22 |     22
+ nb        |   22 |     22
+ nh        |   22 |     22
+ ns        |   22 |     22
+ s3        |   22 |     22
+ u2        |   22 |     22
+ vn        |   22 |     22
+ xe        |   22 |     22
+ y4        |   22 |     22
+ zh        |   22 |     22
+ zo        |   22 |     22
+ zq        |   22 |     22
+ a1        |   21 |     21
+ bl        |   21 |     21
+ bo        |   21 |     21
+ cb        |   21 |     21
+ ch        |   21 |     21
+ co        |   21 |     21
+ cq        |   21 |     21
+ cv        |   21 |     21
+ d7        |   21 |     21
+ g8        |   21 |     21
+ je        |   21 |     21
+ jp        |   21 |     21
+ jz        |   21 |     21
+ lg        |   21 |     21
+ me        |   21 |     21
+ nc        |   21 |     21
+ p4        |   21 |     21
+ st        |   21 |     21
+ vb        |   21 |     21
+ vw        |   21 |     21
+ vz        |   21 |     21
+ xj        |   21 |     21
+ xq        |   21 |     21
+ xu        |   21 |     21
+ xy        |   21 |     21
+ zb        |   21 |     21
+ bv        |   20 |     20
+ bz        |   20 |     20
+ cj        |   20 |     20
+ cp        |   20 |     20
+ cs        |   20 |     20
+ d8        |   20 |     20
+ ju        |   20 |     20
+ k0        |   20 |     20
+ ks        |   20 |     20
+ ky        |   20 |     20
+ l1        |   20 |     20
+ lb        |   20 |     20
+ lj        |   20 |     20
+ lu        |   20 |     20
+ nm        |   20 |     20
+ nw        |   20 |     20
+ nz        |   20 |     20
+ o7        |   20 |     20
+ p6        |   20 |     20
+ vh        |   20 |     20
+ vp        |   20 |     20
+ vs        |   20 |     20
+ xb        |   20 |     20
+ xr        |   20 |     20
+ z3        |   20 |     20
+ zv        |   20 |     20
+ bq        |   19 |     19
+ br        |   19 |     19
+ by        |   19 |     19
+ cl        |   19 |     19
+ d2        |   19 |     19
+ f1        |   19 |     19
+ f4        |   19 |     19
+ gf        |   19 |     19
+ hq        |   19 |     19
+ k9        |   19 |     19
+ ka        |   19 |     19
+ kd        |   19 |     19
+ kj        |   19 |     19
+ md        |   19 |     19
+ mi        |   19 |     19
+ ml        |   19 |     19
+ my        |   19 |     19
+ nj        |   19 |     19
+ ny        |   19 |     19
+ o1        |   19 |     19
+ s4        |   19 |     19
+ s8        |   19 |     19
+ t5        |   19 |     19
+ u0        |   19 |     19
+ xl        |   19 |     19
+ zg        |   19 |     19
+ zi        |   19 |     19
+ a5        |   18 |     18
+ b9        |   18 |     18
+ bh        |   18 |     18
+ bx        |   18 |     18
+ d3        |   18 |     18
+ fy        |   18 |     18
+ g2        |   18 |     18
+ i4        |   18 |     18
+ i6        |   18 |     18
+ i9        |   18 |     18
+ jw        |   18 |     18
+ lk        |   18 |     18
+ mb        |   18 |     18
+ mv        |   18 |     18
+ nd        |   18 |     18
+ nr        |   18 |     18
+ nt        |   18 |     18
+ t2        |   18 |     18
+ xf        |   18 |     18
+ xv        |   18 |     18
+ zc        |   18 |     18
+ zd        |   18 |     18
+ a7        |   17 |     17
+ bc        |   17 |     17
+ bd        |   17 |     17
+ ce        |   17 |     17
+ cf        |   17 |     17
+ cr        |   17 |     17
+ g9        |   17 |     17
+ j0        |   17 |     17
+ j5        |   17 |     17
+ mp        |   17 |     17
+ mr        |   17 |     17
+ mw        |   17 |     17
+ nk        |   17 |     17
+ no        |   17 |     17
+ o0        |   17 |     17
+ o4        |   17 |     17
+ s0        |   17 |     17
+ s1        |   17 |     17
+ t4        |   17 |     17
+ u9        |   17 |     17
+ vf        |   17 |     17
+ vx        |   17 |     17
+ x3        |   17 |     17
+ xi        |   17 |     17
+ xn        |   17 |     17
+ xz        |   17 |     17
+ zl        |   17 |     17
+ zn        |   17 |     17
+ a0        |   16 |     16
+ bu        |   16 |     16
+ bw        |   16 |     16
+ ci        |   16 |     16
+ ck        |   16 |     16
+ d0        |   16 |     16
+ d4        |   16 |     16
+ d6        |   16 |     16
+ f5        |   16 |     16
+ g1        |   16 |     16
+ gz        |   16 |     16
+ h4        |   16 |     16
+ jh        |   16 |     16
+ l4        |   16 |     16
+ lt        |   16 |     16
+ mg        |   16 |     16
+ mh        |   16 |     16
+ mo        |   16 |     16
+ ni        |   16 |     16
+ nl        |   16 |     16
+ nq        |   16 |     16
+ p2        |   16 |     16
+ u8        |   16 |     16
+ v9        |   16 |     16
+ vl        |   16 |     16
+ vo        |   16 |     16
+ xp        |   16 |     16
+ y3        |   16 |     16
+ y7        |   16 |     16
+ z7        |   16 |     16
+ za        |   16 |     16
+ zx        |   16 |     16
+ bf        |   15 |     15
+ bp        |   15 |     15
+ cc        |   15 |     15
+ g0        |   15 |     15
+ j2        |   15 |     15
+ j9        |   15 |     15
+ l6        |   15 |     15
+ le        |   15 |     15
+ ll        |   15 |     15
+ m8        |   15 |     15
+ ma        |   15 |     15
+ mu        |   15 |     15
+ nf        |   15 |     15
+ r6        |   15 |     15
+ s5        |   15 |     15
+ vd        |   15 |     15
+ vk        |   15 |     15
+ xa        |   15 |     15
+ xw        |   15 |     15
+ y2        |   15 |     15
+ z8        |   15 |     15
+ ze        |   15 |     15
+ zu        |   15 |     15
+ a6        |   14 |     14
+ bk        |   14 |     14
+ bt        |   14 |     14
+ c0        |   14 |     14
+ f8        |   14 |     14
+ g3        |   14 |     14
+ g4        |   14 |     14
+ g7        |   14 |     14
+ h6        |   14 |     14
+ h7        |   14 |     14
+ h9        |   14 |     14
+ i1        |   14 |     14
+ k1        |   14 |     14
+ k2        |   14 |     14
+ k6        |   14 |     14
+ k7        |   14 |     14
+ mc        |   14 |     14
+ nn        |   14 |     14
+ p9        |   14 |     14
+ u6        |   14 |     14
+ xd        |   14 |     14
+ z6        |   14 |     14
+ zk        |   14 |     14
+ a4        |   13 |     13
+ a9        |   13 |     13
+ bm        |   13 |     13
+ cz        |   13 |     13
+ f2        |   13 |     13
+ f3        |   13 |     13
+ f6        |   13 |     13
+ g6        |   13 |     13
+ h2        |   13 |     13
+ j1        |   13 |     13
+ k5        |   13 |     13
+ m1        |   13 |     13
+ mf        |   13 |     13
+ mq        |   13 |     13
+ np        |   13 |     13
+ nx        |   13 |     13
+ o5        |   13 |     13
+ p0        |   13 |     13
+ p1        |   13 |     13
+ s6        |   13 |     13
+ s9        |   13 |     13
+ v6        |   13 |     13
+ va        |   13 |     13
+ vc        |   13 |     13
+ xc        |   13 |     13
+ z0        |   13 |     13
+ c9        |   12 |     12
+ d1        |   12 |     12
+ h0        |   12 |     12
+ h1        |   12 |     12
+ j8        |   12 |     12
+ k4        |   12 |     12
+ l5        |   12 |     12
+ l9        |   12 |     12
+ m2        |   12 |     12
+ m6        |   12 |     12
+ m9        |   12 |     12
+ n7        |   12 |     12
+ nv        |   12 |     12
+ p3        |   12 |     12
+ vq        |   12 |     12
+ vy        |   12 |     12
+ x1        |   12 |     12
+ x2        |   12 |     12
+ z5        |   12 |     12
+ c1        |   11 |     11
+ c3        |   11 |     11
+ ct        |   11 |     11
+ f9        |   11 |     11
+ g5        |   11 |     11
+ j6        |   11 |     11
+ l8        |   11 |     11
+ n1        |   11 |     11
+ v7        |   11 |     11
+ vv        |   11 |     11
+ x5        |   11 |     11
+ x8        |   11 |     11
+ z2        |   11 |     11
+ b0        |   10 |     10
+ b2        |   10 |     10
+ b8        |   10 |     10
+ c6        |   10 |     10
+ f0        |   10 |     10
+ f7        |   10 |     10
+ h5        |   10 |     10
+ j3        |   10 |     10
+ j4        |   10 |     10
+ j7        |   10 |     10
+ l7        |   10 |     10
+ m0        |   10 |     10
+ m7        |   10 |     10
+ mm        |   10 |     10
+ mn        |   10 |     10
+ n8        |   10 |     10
+ v1        |   10 |     10
+ x0        |   10 |     10
+ x6        |   10 |     10
+ x7        |   10 |     10
+ x9        |   10 |     10
+ a8        |    9 |      9
+ b1        |    9 |      9
+ b4        |    9 |      9
+ b5        |    9 |      9
+ b6        |    9 |      9
+ ba        |    9 |      9
+ bs        |    9 |      9
+ c5        |    9 |      9
+ d5        |    9 |      9
+ k8        |    9 |      9
+ l0        |    9 |      9
+ m5        |    9 |      9
+ mk        |    9 |      9
+ ms        |    9 |      9
+ n3        |    9 |      9
+ n4        |    9 |      9
+ n6        |    9 |      9
+ ne        |    9 |      9
+ v0        |    9 |      9
+ v3        |    9 |      9
+ v5        |    9 |      9
+ v8        |    9 |      9
+ b3        |    8 |      8
+ b7        |    8 |      8
+ c2        |    8 |      8
+ c7        |    8 |      8
+ c8        |    8 |      8
+ d9        |    8 |      8
+ k3        |    8 |      8
+ l3        |    8 |      8
+ m3        |    8 |      8
+ m4        |    8 |      8
+ n0        |    8 |      8
+ n5        |    8 |      8
+ v4        |    8 |      8
+ x4        |    8 |      8
+ z1        |    8 |      8
+ z9        |    8 |      8
+ l2        |    7 |      7
+ s2        |    7 |      7
+ z4        |    7 |      7
+ 1l        |    6 |      6
+ 1o        |    6 |      6
+ 1t        |    6 |      6
+ 2e        |    6 |      6
+ 2o        |    6 |      6
+ c4        |    6 |      6
+ h3        |    6 |      6
+ n2        |    6 |      6
+ n9        |    6 |      6
+ v2        |    6 |      6
+ 2l        |    5 |      5
+ 2u        |    5 |      5
+ 3k        |    5 |      5
+ 4p        |    5 |      5
+ 18        |    4 |      4
+ 1a        |    4 |      4
+ 1i        |    4 |      4
+ 2s        |    4 |      4
+ 3q        |    4 |      4
+ 3y        |    4 |      4
+ 5y        |    4 |      4
+ 1f        |    3 |      3
+ 1h        |    3 |      3
+ 1m        |    3 |      3
+ 1p        |    3 |      3
+ 1s        |    3 |      3
+ 1v        |    3 |      3
+ 1x        |    3 |      3
+ 27        |    3 |      3
+ 2a        |    3 |      3
+ 2b        |    3 |      3
+ 2h        |    3 |      3
+ 2n        |    3 |      3
+ 2p        |    3 |      3
+ 2v        |    3 |      3
+ 2y        |    3 |      3
+ 3d        |    3 |      3
+ 3w        |    3 |      3
+ 3z        |    3 |      3
+ 4a        |    3 |      3
+ 4d        |    3 |      3
+ 4v        |    3 |      3
+ 4z        |    3 |      3
+ 5e        |    3 |      3
+ 5i        |    3 |      3
+ 5k        |    3 |      3
+ 5o        |    3 |      3
+ 5t        |    3 |      3
+ 6b        |    3 |      3
+ 6d        |    3 |      3
+ 6o        |    3 |      3
+ 6w        |    3 |      3
+ 7a        |    3 |      3
+ 7h        |    3 |      3
+ 7r        |    3 |      3
+ 93        |    3 |      3
+ 10        |    2 |      2
+ 12        |    2 |      2
+ 15        |    2 |      2
+ 16        |    2 |      2
+ 19        |    2 |      2
+ 1b        |    2 |      2
+ 1d        |    2 |      2
+ 1g        |    2 |      2
+ 1j        |    2 |      2
+ 1n        |    2 |      2
+ 1r        |    2 |      2
+ 1u        |    2 |      2
+ 1w        |    2 |      2
+ 1y        |    2 |      2
+ 20        |    2 |      2
+ 25        |    2 |      2
+ 2d        |    2 |      2
+ 2i        |    2 |      2
+ 2j        |    2 |      2
+ 2k        |    2 |      2
+ 2q        |    2 |      2
+ 2r        |    2 |      2
+ 2t        |    2 |      2
+ 2w        |    2 |      2
+ 2z        |    2 |      2
+ 3b        |    2 |      2
+ 3f        |    2 |      2
+ 3h        |    2 |      2
+ 3o        |    2 |      2
+ 3p        |    2 |      2
+ 3r        |    2 |      2
+ 3s        |    2 |      2
+ 3v        |    2 |      2
+ 42        |    2 |      2
+ 43        |    2 |      2
+ 4f        |    2 |      2
+ 4g        |    2 |      2
+ 4h        |    2 |      2
+ 4j        |    2 |      2
+ 4m        |    2 |      2
+ 4r        |    2 |      2
+ 4s        |    2 |      2
+ 4t        |    2 |      2
+ 4u        |    2 |      2
+ 5c        |    2 |      2
+ 5f        |    2 |      2
+ 5h        |    2 |      2
+ 5p        |    2 |      2
+ 5q        |    2 |      2
+ 5z        |    2 |      2
+ 6a        |    2 |      2
+ 6h        |    2 |      2
+ 6q        |    2 |      2
+ 6r        |    2 |      2
+ 6t        |    2 |      2
+ 6y        |    2 |      2
+ 70        |    2 |      2
+ 7c        |    2 |      2
+ 7g        |    2 |      2
+ 7k        |    2 |      2
+ 7o        |    2 |      2
+ 7u        |    2 |      2
+ 8j        |    2 |      2
+ 8w        |    2 |      2
+ 9f        |    2 |      2
+ 9y        |    2 |      2
+ copyright |    2 |      2
+ foo       |    1 |      3
+ bar       |    1 |      2
+ 0e        |    1 |      1
+ 0h        |    1 |      1
+ 0p        |    1 |      1
+ 0w        |    1 |      1
+ 0z        |    1 |      1
+ 11        |    1 |      1
+ 13        |    1 |      1
+ 14        |    1 |      1
+ 17        |    1 |      1
+ 1k        |    1 |      1
+ 1q        |    1 |      1
+ 1z        |    1 |      1
+ 24        |    1 |      1
+ 26        |    1 |      1
+ 28        |    1 |      1
+ 2f        |    1 |      1
+ 30        |    1 |      1
+ 345       |    1 |      1
+ 37        |    1 |      1
+ 39        |    1 |      1
+ 3a        |    1 |      1
+ 3e        |    1 |      1
+ 3g        |    1 |      1
+ 3i        |    1 |      1
+ 3m        |    1 |      1
+ 3t        |    1 |      1
+ 3u        |    1 |      1
+ 40        |    1 |      1
+ 41        |    1 |      1
+ 44        |    1 |      1
+ 45        |    1 |      1
+ 48        |    1 |      1
+ 4b        |    1 |      1
+ 4c        |    1 |      1
+ 4i        |    1 |      1
+ 4k        |    1 |      1
+ 4n        |    1 |      1
+ 4o        |    1 |      1
+ 4q        |    1 |      1
+ 4w        |    1 |      1
+ 4y        |    1 |      1
+ 51        |    1 |      1
+ 55        |    1 |      1
+ 56        |    1 |      1
+ 5a        |    1 |      1
+ 5d        |    1 |      1
+ 5g        |    1 |      1
+ 5j        |    1 |      1
+ 5l        |    1 |      1
+ 5s        |    1 |      1
+ 5u        |    1 |      1
+ 5x        |    1 |      1
+ 64        |    1 |      1
+ 68        |    1 |      1
+ 6c        |    1 |      1
+ 6f        |    1 |      1
+ 6g        |    1 |      1
+ 6i        |    1 |      1
+ 6k        |    1 |      1
+ 6n        |    1 |      1
+ 6p        |    1 |      1
+ 6s        |    1 |      1
+ 6u        |    1 |      1
+ 6x        |    1 |      1
+ 72        |    1 |      1
+ 7f        |    1 |      1
+ 7j        |    1 |      1
+ 7n        |    1 |      1
+ 7p        |    1 |      1
+ 7w        |    1 |      1
+ 7y        |    1 |      1
+ 7z        |    1 |      1
+ 80        |    1 |      1
+ 82        |    1 |      1
+ 85        |    1 |      1
+ 8d        |    1 |      1
+ 8i        |    1 |      1
+ 8l        |    1 |      1
+ 8n        |    1 |      1
+ 8p        |    1 |      1
+ 8t        |    1 |      1
+ 8x        |    1 |      1
+ 95        |    1 |      1
+ 97        |    1 |      1
+ 9a        |    1 |      1
+ 9e        |    1 |      1
+ 9h        |    1 |      1
+ 9r        |    1 |      1
+ 9w        |    1 |      1
+ qwerti    |    1 |      1
+(1146 rows)
+
+select reset_tsearch();
+NOTICE:  TSearch cache cleaned
+ reset_tsearch 
+---------------
+ 
+(1 row)
+
+select to_tsquery('default', 'skies & books');
+   to_tsquery   
+----------------
+ 'sky' & 'book'
+(1 row)
+
+select rank_cd(to_tsvector('Erosion It took the sea a thousand years,
+A thousand years to trace
+The granite features of this cliff
+In crag and scarp and base.
+It took the sea an hour one night
+An hour of storm to place
+The sculpture of these granite seams,
+Upon a woman s face. E.  J.  Pratt  (1882 1964)
+'), to_tsquery('sea&thousand&years'));
+ rank_cd 
+---------
+     1.2
+(1 row)
+
+select rank_cd(to_tsvector('Erosion It took the sea a thousand years,
+A thousand years to trace
+The granite features of this cliff
+In crag and scarp and base.
+It took the sea an hour one night
+An hour of storm to place
+The sculpture of these granite seams,
+Upon a woman s face. E.  J.  Pratt  (1882 1964)
+'), to_tsquery('granite&sea'));
+ rank_cd  
+----------
+ 0.880303
+(1 row)
+
+select rank_cd(to_tsvector('Erosion It took the sea a thousand years,
+A thousand years to trace
+The granite features of this cliff
+In crag and scarp and base.
+It took the sea an hour one night
+An hour of storm to place
+The sculpture of these granite seams,
+Upon a woman s face. E.  J.  Pratt  (1882 1964)
+'), to_tsquery('sea'));
+ rank_cd 
+---------
+       2
+(1 row)
+
+select get_covers(to_tsvector('Erosion It took the sea a thousand years,
+A thousand years to trace
+The granite features of this cliff
+In crag and scarp and base.
+It took the sea an hour one night
+An hour of storm to place
+The sculpture of these granite seams,
+Upon a woman s face. E.  J.  Pratt  (1882 1964)
+'), to_tsquery('sea&thousand&years'));
+                                                                                             get_covers                                                                                             
+----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
+ eros took {1 sea thousand year }1 {2 thousand year trace granit featur cliff crag scarp base took sea }2 hour one night hour storm place sculptur granit seam upon woman face e j pratt 1882 1964 
+(1 row)
+
+select get_covers(to_tsvector('Erosion It took the sea a thousand years,
+A thousand years to trace
+The granite features of this cliff
+In crag and scarp and base.
+It took the sea an hour one night
+An hour of storm to place
+The sculpture of these granite seams,
+Upon a woman s face. E.  J.  Pratt  (1882 1964)
+'), to_tsquery('granite&sea'));
+                                                                                                get_covers                                                                                                
+----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
+ eros took {1 sea thousand year thousand year trace {2 granit }1 featur cliff crag scarp base took {3 sea }2 hour one night hour storm place sculptur granit }3 seam upon woman face e j pratt 1882 1964 
+(1 row)
+
+select get_covers(to_tsvector('Erosion It took the sea a thousand years,
+A thousand years to trace
+The granite features of this cliff
+In crag and scarp and base.
+It took the sea an hour one night
+An hour of storm to place
+The sculpture of these granite seams,
+Upon a woman s face. E.  J.  Pratt  (1882 1964)
+'), to_tsquery('sea'));
+                                                                                             get_covers                                                                                             
+----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
+ eros took {1 sea }1 thousand year thousand year trace granit featur cliff crag scarp base took {2 sea }2 hour one night hour storm place sculptur granit seam upon woman face e j pratt 1882 1964 
+(1 row)
+
+select headline('Erosion It took the sea a thousand years,
+A thousand years to trace
+The granite features of this cliff
+In crag and scarp and base.
+It took the sea an hour one night
+An hour of storm to place
+The sculpture of these granite seams,
+Upon a woman s face. E.  J.  Pratt  (1882 1964)
+', to_tsquery('sea&thousand&years'));
+                                                       headline                                                        
+-----------------------------------------------------------------------------------------------------------------------
+ sea a thousand years,
+A thousand years to trace
+The granite features of this cliff
+(1 row)
+
+ 
+select headline('Erosion It took the sea a thousand years,
+A thousand years to trace
+The granite features of this cliff
+In crag and scarp and base.
+It took the sea an hour one night
+An hour of storm to place
+The sculpture of these granite seams,
+Upon a woman s face. E.  J.  Pratt  (1882 1964)
+', to_tsquery('granite&sea'));
+                                           headline                                           
+----------------------------------------------------------------------------------------------
+ sea an hour one night
+An hour of storm to place
+The sculpture of these granite
+(1 row)
+
+ 
+select headline('Erosion It took the sea a thousand years,
+A thousand years to trace
+The granite features of this cliff
+In crag and scarp and base.
+It took the sea an hour one night
+An hour of storm to place
+The sculpture of these granite seams,
+Upon a woman s face. E.  J.  Pratt  (1882 1964)
+', to_tsquery('sea'));
+                                         headline                                          
+-------------------------------------------------------------------------------------------
+ sea a thousand years,
+A thousand years to trace
+The granite features of this cliff
+(1 row)
+


diff --git a/contrib/tsearch2/gendict/Makefile.IN b/contrib/tsearch2/gendict/Makefile.IN

new file mode 100644 (file)

index 0000000..c13e496


--- /dev/null
+++ b/contrib/tsearch2/gendict/Makefile.IN
@@ -0,0 +1,12 @@
+subdir = contrib/CFG_DIR
+top_builddir = ../..
+include $(top_builddir)/src/Makefile.global
+
+MODULE_big = dict_CFG_MODNAME
+OBJS = CFG_OFILE
+DATA_built = dict_CFG_MODNAME.sql
+DOCS = README.CFG_MODNAME
+PG_CPPFLAGS =
+SHLIB_LINK = ../tsearch2/libtsearch2.a
+
+include $(top_srcdir)/contrib/contrib-global.mk


diff --git a/contrib/tsearch2/gendict/README.gendict b/contrib/tsearch2/gendict/README.gendict

new file mode 100644 (file)

index 0000000..e91f1b7


--- /dev/null
+++ b/contrib/tsearch2/gendict/README.gendict
@@ -0,0 +1,130 @@
+Gendict - generate dictionary templates for contrib/tsearch2 module.
+
+This utility aims to help people creating dictionary for contrib/tsearch v2
+module. Particularly, it has built-in support for snowball stemmers.
+
+Programming API to tsearch2 dictionaries is described in tsearch v2 
+documentation.
+
+
+Prerequisities:
+
+* PostgreSQL 7.3 and above.
+
+* You need tsearch2 module sources already compiled
+
+* Rights to install contrib modules
+
+Usage:
+
+    run config.sh without parameters to see options and arguments
+
+Usage:
+./config.sh -n DICTNAME ( [ -s [ -p PREFIX ] ] | [ -c CFILES ] [ -h HFILES ] [ -i ] ) [ -v ] [ -d DIR ] [ -C COMMENT ]
+    -v - be verbose
+    -d DIR - name of directory in PGSQL_SRC/contrib (default dict_DICTNAME)
+    -C COMMENT - dictionary comment
+Generate Snowball stemmer:
+./config.sh -n DICTNAME -s [ -p PREFIX ] [ -v ] [ -d DIR ] [ -C COMMENT ]
+    -s - generate Snowball wrapper
+    -p - prefix of Snowball's function, (default DICTNAME)
+Generate template dictionary:
+./config.sh -n DICTNAME [ -c CFILES ] [ -h HFILES ] [ -i ] [ -v ] [ -d DIR ] [ -C COMMENT ]
+    -c CFILES - source files, must be placed in contrib/tsearch2/gendict directory.
+                These files will be used in Makefile.
+    -h HFILES - header files, must be placed in contrib/tsearch2/gendict directory.
+                These files will be used in Makefile and subinclude.h
+    -i - dictionary has init method
+
+
+Example 1:
+
+   Create Portuguese stemmer
+ 
+   0. cd PGSQL_SRC/contrib/tsearch2/gendict
+
+   1. Obtain stem.{c,h} files for Portuguese
+
+      wget http://snowball.tartarus.org/portuguese/stem.c
+      wget http://snowball.tartarus.org/portuguese/stem.h
+   
+   2. Create template files for Portuguese
+
+      ./config.sh -n pt -s -p portuguese -v -C'Snowball stemmer for Portuguese'
+
+      Note, that argument for -p option should be *the same* as name of stemming
+      function in stem.c (without _stem)
+
+      A bunch of files will be generated and placed in PGSQL_SRC/contrib/dict_pt
+      directory.
+
+   3. Compile and install dictionary
+
+   cd PGSQL_SRC/contrib/dict_pt
+   make
+   make install
+
+   4. Test it 
+
+   Sample portuguese words with the stemmed forms are available
+        from http://snowball.tartarus.org/portuguese/stemmer.html
+
+   createdb testdict
+   psql testdict < /usr/local/pgsql/share/contrib/tsearch2.sql
+   psql testdict < /usr/local/pgsql/share/contrib/dict_pt.sql
+   psql -d testdict -c "select lexize('pt','bobagem');"
+    lexize  
+   ---------
+    {bobag}
+   (1 row)
+
+   Here is what I have in pg_ts_dict table
+
+   psql -d testdict -c "select * from pg_ts_dict where dict_name='pt';"
+    dict_name | dict_init | dict_initoption | dict_lexize |          dict_comment           
+   -----------+-----------+-----------------+-------------+---------------------------------
+    pt        |   7177806 |                 |     7159330 | Snowball stemmer for Portuguese
+   (1 row)
+
+ 
+        Note, that you have already installed dictionary and corresponding
+   entry in tsearch configuration and you may modify it using
+   plain SQL commands, for example, specify stop words.
+
+Example 2:
+
+      a) Simple template dictionary with init method 
+
+       ./config.sh -n wow -v -i -C WOW
+
+      b) Create simple template dict (without init method):
+   ./config.sh -n wow -v  -C WOW
+
+        The same as above, but dictionary will have not init method
+
+       Dictionaries obtained in a) and b) are fully working and ready
+       for use: 
+     a) lowercase input word and remove it if it is a stop word
+     b) recognizes any word
+
+      c) Simple template dictionary with source files (with init method):
+
+       ./config.sh -n wow -v -i -c a.c -h a.h -C WOW
+
+        Source files ( a.c ) must be placed in contrib/tsearch2/gendict directory.
+        These files will be used in Makefile.
+
+        Header files ( a.h ), must be placed in contrib/tsearch2/gendict directory.
+        These files will be used in Makefile and subinclude.h
+
+      d) Simple template dictionary with source files (without init method):
+
+   ./config.sh -n wow -v  -c a.c -h a.h -C WOW
+
+   The same as above, but dictionary will have not init method
+
+       After that you have sources in PGSQL_SRC/contrib/dict_wow and
+       you may edit them to create actual dictionary.
+
+  Please, check Tsearch2 home page (http://www.sai.msu.su/~megera/postgres/gist/tsearch/V2/)
+  for additional information about "Gendict tutorial" and dictionaries.
\ No newline at end of file


diff --git a/contrib/tsearch2/gendict/config.sh b/contrib/tsearch2/gendict/config.sh

new file mode 100755 (executable)

index 0000000..26bb542


--- /dev/null
+++ b/contrib/tsearch2/gendict/config.sh
@@ -0,0 +1,183 @@
+#!/bin/sh
+
+usage () {
+   echo Usage:
+   echo $0 -n DICTNAME  \( [ -s [ -p PREFIX ] ] \| [ -c CFILES ] [ -h HFILES ] [ -i ] \) [ -v ] [ -d DIR ] [ -C COMMENT ]
+   echo '    -v - be verbose'
+   echo '    -d DIR - name of directory in PGSQL_SRL/contrib (default dict_DICTNAME)'
+   echo '    -C COMMENT - dictionary comment' 
+   echo Generate Snowball stemmer:
+   echo $0 -n DICTNAME -s [ -p PREFIX ] [ -v ] [ -d DIR ] [ -C COMMENT ]
+   echo '    -s - generate Snowball wrapper'
+   echo "    -p - prefix of Snowball's function, (default DICTNAME)" 
+   echo Generate template dictionary:
+   echo $0 -n DICTNAME [ -c CFILES ] [ -h HFILES ] [ -i ] [ -v ] [ -d DIR ] [ -C COMMENT ]
+   echo '    -c CFILES - source files, must be placed in contrib/tsearch2/gendict directory.'
+   echo '                These files will be used in Makefile.'
+   echo '    -h HFILES - header files, must be placed in contrib/tsearch2/gendict directory.'
+   echo '                These files will be used in Makefile and subinclude.h'
+   echo '    -i - dictionary has init method'
+   exit 1;
+}
+
+dictname=
+stemmode=no
+verbose=no
+cfile=
+hfile=
+dir= 
+hasinit=no
+comment=
+prefix=
+
+while getopts n:c:C:h:d:p:vis opt
+do
+   case "$opt" in
+       v) verbose=yes;;
+       s) stemmode=yes;;
+       i) hasinit=yes;;
+       n) dictname="$OPTARG";;
+       c) cfile="$OPTARG";;
+       h) hfile="$OPTARG";;
+       d) dir="$OPTARG";;
+       C) comment="$OPTARG";;
+       p) prefix="$OPTARG";;
+       \?) usage;;
+   esac
+done
+
+[ ${#dictname} -eq 0 ] && usage
+
+dictname=`echo $dictname | tr '[:upper:]' '[:lower:]'`
+
+if [ $stemmode = "yes" ] ; then 
+   [ ${#prefix} -eq 0 ] && prefix=$dictname
+   hasinit=yes
+   cfile="stem.c"
+   hfile="stem.h"
+fi 
+
+[ ${#dir}   -eq 0 ] && dir="dict_$dictname"
+
+if [ ${#comment} -eq 0 ]; then
+   comment=null
+else
+   comment="'$comment'"
+fi
+
+ofile=
+for f in $cfile
+do
+   f=` echo $f | sed 's#c$#o#'`
+   ofile="$ofile $f"
+done
+
+if [ $stemmode = "yes" ] ; then
+   ofile="$ofile dict_snowball.o"
+else
+   ofile="$ofile dict_tmpl.o"
+fi
+
+if [ $verbose = "yes" ]; then
+   echo Dictname: "'"$dictname"'"
+   echo Snowball stemmer: $stemmode
+   echo Has init method: $hasinit
+   [ $stemmode = "yes" ] && echo Function prefix: $prefix 
+   echo Source files: $cfile
+   echo Header files: $hfile
+   echo Object files: $ofile
+   echo Comment: $comment
+   echo Directory: ../../$dir
+fi
+
+
+[ $verbose = "yes" ] && echo -n 'Build directory...  '
+if [ ! -d ../../$dir ]; then
+   if ! mkdir ../../$dir ; then 
+       echo "Can't create directory ../../$dir"
+       exit 1
+   fi 
+fi
+[ $verbose = "yes" ] && echo ok
+
+
+[ $verbose = "yes" ] && echo -n 'Build Makefile...  '
+sed s#CFG_DIR#$dir# < Makefile.IN | sed s#CFG_MODNAME#$dictname# | sed "s#CFG_OFILE#$ofile#" > ../../$dir/Makefile.tmp
+if [ $stemmode = "yes" ] ; then
+   sed "s#^PG_CPPFLAGS.*\$#PG_CPPFLAGS = -I../tsearch2/snowball -I../tsearch2#" < ../../$dir/Makefile.tmp >  ../../$dir/Makefile 
+else
+   sed "s#^PG_CPPFLAGS.*\$#PG_CPPFLAGS = -I../tsearch2#" < ../../$dir/Makefile.tmp >  ../../$dir/Makefile 
+fi
+rm ../../$dir/Makefile.tmp
+[ $verbose = "yes" ] && echo ok
+
+
+[ $verbose = "yes" ] && echo -n Build dict_$dictname'.sql.in...  '
+if [ $hasinit = "yes" ]; then
+   sed s#CFG_MODNAME#$dictname# < sql.IN | sed "s#CFG_COMMENT#$comment#" | sed s#^HASINIT## | sed 's#^NOINIT.*$##' > ../../$dir/dict_$dictname.sql.in.tmp
+   if [ $stemmode = "yes" ] ; then
+       sed s#^ISSNOWBALL## < ../../$dir/dict_$dictname.sql.in.tmp | sed s#^NOSNOWBALL.*\$## > ../../$dir/dict_$dictname.sql.in
+   else
+       sed s#^NOSNOWBALL## < ../../$dir/dict_$dictname.sql.in.tmp | sed s#^ISSNOWBALL.*\$## > ../../$dir/dict_$dictname.sql.in
+   fi
+   rm ../../$dir/dict_$dictname.sql.in.tmp 
+else 
+   sed s#CFG_MODNAME#$dictname# < sql.IN | sed "s#CFG_COMMENT#$comment#" | sed s#^NOINIT## | sed 's#^HASINIT.*$##' | sed s#^NOSNOWBALL## | sed s#^ISSNOWBALL.*\$## > ../../$dir/dict_$dictname.sql.in
+fi
+[ $verbose = "yes" ] && echo ok
+
+
+
+if [ ${#cfile} -ne 0 ] || [ ${#hfile} -ne 0 ] ; then
+   [ $verbose = "yes" ] && echo -n 'Copy source and header files...  '
+   if [ ${#cfile} -ne 0 ] ; then
+       if ! cp $cfile ../../$dir ; then 
+           echo "Cant cp all or one of files: $cfile"
+           exit 1
+       fi
+   fi
+   if [ ${#hfile} -ne 0 ] ; then 
+       if ! cp $hfile ../../$dir ; then 
+               echo "Cant cp all or one of files: $hfile"
+           exit 1
+       fi
+   fi
+   [ $verbose = "yes" ] && echo ok
+fi
+
+
+[ $verbose = "yes" ] && echo -n 'Build sub-include header...  '
+echo -n > ../../$dir/subinclude.h 
+for i in $hfile
+do
+   echo "#include \"$i\"" >> ../../$dir/subinclude.h
+done
+[ $verbose = "yes" ] && echo ok
+
+
+if  [ $stemmode = "yes" ] ; then 
+   [ $verbose = "yes" ] && echo -n 'Build Snowball stemmer...  '
+   sed s#CFG_MODNAME#$dictname#g < dict_snowball.c.IN | sed s#CFG_PREFIX#$prefix#g > ../../$dir/dict_snowball.c
+else
+   [ $verbose = "yes" ] && echo -n 'Build dictinonary...  '
+   sed s#CFG_MODNAME#$dictname#g < dict_tmpl.c.IN > ../../$dir/dict_tmpl.c.tmp
+   if [ $hasinit = "yes" ]; then
+       sed s#^HASINIT## <  ../../$dir/dict_tmpl.c.tmp | sed 's#^NOINIT.*$##' > ../../$dir/dict_tmpl.c
+   else 
+       sed s#^HASINIT.*\$## <  ../../$dir/dict_tmpl.c.tmp | sed 's#^NOINIT##' > ../../$dir/dict_tmpl.c
+   fi
+   rm ../../$dir/dict_tmpl.c.tmp
+fi 
+[ $verbose = "yes" ] && echo ok
+
+
+[ $verbose = "yes" ] && echo -n "Build README.$dictname...  "
+if  [ $stemmode = "yes" ] ; then
+   echo "Autogenerated Snowball's wrapper for $prefix" > ../../$dir/README.$dictname
+else
+   echo "Autogenerated template for $dictname" > ../../$dir/README.$dictname
+fi
+[ $verbose = "yes" ] && echo ok
+
+echo All is done
+


diff --git a/contrib/tsearch2/gendict/dict_snowball.c.IN b/contrib/tsearch2/gendict/dict_snowball.c.IN

new file mode 100644 (file)

index 0000000..10ef6f1


--- /dev/null
+++ b/contrib/tsearch2/gendict/dict_snowball.c.IN
@@ -0,0 +1,52 @@
+/* 
+ * example of Snowball dictionary
+ * http://snowball.tartarus.org/ 
+ * Teodor Sigaev 
+ */
+#include 
+#include 
+
+#include "postgres.h"
+
+#include "dict.h"
+#include "common.h"
+#include "snowball/header.h"
+#include "subinclude.h"
+
+typedef struct {
+   struct SN_env *z;
+   StopList    stoplist;
+   int (*stem)(struct SN_env * z);
+} DictSnowball;
+
+
+PG_FUNCTION_INFO_V1(dinit_CFG_MODNAME);
+Datum dinit_CFG_MODNAME(PG_FUNCTION_ARGS);
+
+Datum 
+dinit_CFG_MODNAME(PG_FUNCTION_ARGS) {
+   DictSnowball    *d = (DictSnowball*)malloc( sizeof(DictSnowball) );
+
+   if ( !d )
+       elog(ERROR, "No memory");
+   memset(d,0,sizeof(DictSnowball));
+   d->stoplist.wordop=lowerstr;
+       
+   if ( !PG_ARGISNULL(0) && PG_GETARG_POINTER(0)!=NULL ) {
+       text       *in = PG_GETARG_TEXT_P(0);
+       readstoplist(in, &(d->stoplist));
+       sortstoplist(&(d->stoplist));
+       PG_FREE_IF_COPY(in, 0);
+   }
+
+   d->z = CFG_PREFIX_create_env();
+   if (!d->z) {
+       freestoplist(&(d->stoplist));
+       elog(ERROR,"No memory");
+   }
+   d->stem=CFG_PREFIX_stem;
+
+   PG_RETURN_POINTER(d);
+}
+
+


diff --git a/contrib/tsearch2/gendict/dict_tmpl.c.IN b/contrib/tsearch2/gendict/dict_tmpl.c.IN

new file mode 100644 (file)

index 0000000..10c0381


--- /dev/null
+++ b/contrib/tsearch2/gendict/dict_tmpl.c.IN
@@ -0,0 +1,64 @@
+/* 
+ * example of dictionary 
+ * Teodor Sigaev 
+ */
+#include 
+#include 
+#include 
+
+#include "postgres.h"
+
+#include "dict.h"
+#include "common.h"
+
+#include "subinclude.h"
+
+HASINIT typedef struct {
+HASINIT    StopList    stoplist;
+HASINIT } DictExample;
+
+
+HASINIT PG_FUNCTION_INFO_V1(dinit_CFG_MODNAME);
+HASINIT Datum dinit_CFG_MODNAME(PG_FUNCTION_ARGS);
+
+HASINIT Datum 
+HASINIT dinit_CFG_MODNAME(PG_FUNCTION_ARGS) {
+HASINIT    DictExample *d = (DictExample*)malloc( sizeof(DictExample) );
+HASINIT 
+HASINIT    if ( !d )
+HASINIT        elog(ERROR, "No memory");
+HASINIT    memset(d,0,sizeof(DictExample));
+HASINIT 
+HASINIT    d->stoplist.wordop=lowerstr;
+HASINIT    
+HASINIT    /* Your INIT code */
+HASINIT    
+HASINIT    if ( !PG_ARGISNULL(0) && PG_GETARG_POINTER(0)!=NULL ) {
+HASINIT        text       *in = PG_GETARG_TEXT_P(0);
+HASINIT        readstoplist(in, &(d->stoplist));
+HASINIT        sortstoplist(&(d->stoplist));
+HASINIT        PG_FREE_IF_COPY(in, 0);
+HASINIT    }
+HASINIT 
+HASINIT    PG_RETURN_POINTER(d);
+HASINIT }
+
+PG_FUNCTION_INFO_V1(dlexize_CFG_MODNAME);
+Datum dlexize_CFG_MODNAME(PG_FUNCTION_ARGS);
+Datum
+dlexize_CFG_MODNAME(PG_FUNCTION_ARGS) {
+HASINIT    DictExample *d = (DictExample*)PG_GETARG_POINTER(0);
+   char       *in = (char*)PG_GETARG_POINTER(1);
+   char *txt = pnstrdup(in, PG_GETARG_INT32(2));
+   char    **res=palloc(sizeof(char*)*2);
+
+   /* Your INIT dictionary code */
+HASINIT    if ( *txt=='\0' || searchstoplist(&(d->stoplist),txt) ) {
+HASINIT        pfree(txt);
+HASINIT        res[0]=NULL;
+HASINIT    } else 
+       res[0]=txt;
+   res[1]=NULL;
+
+   PG_RETURN_POINTER(res);
+}


diff --git a/contrib/tsearch2/gendict/sql.IN b/contrib/tsearch2/gendict/sql.IN

new file mode 100644 (file)

index 0000000..ff0d842


--- /dev/null
+++ b/contrib/tsearch2/gendict/sql.IN
@@ -0,0 +1,26 @@
+SET search_path = public;
+BEGIN;
+
+HASINIT create function dinit_CFG_MODNAME(text)
+HASINIT         returns internal
+HASINIT         as 'MODULE_PATHNAME'
+HASINIT         language 'C';
+
+NOSNOWBALL create function dlexize_CFG_MODNAME(internal,internal,int4)
+NOSNOWBALL        returns internal
+NOSNOWBALL        as 'MODULE_PATHNAME'
+NOSNOWBALL        language 'C'
+NOSNOWBALL        with (isstrict);
+
+insert into pg_ts_dict select
+        'CFG_MODNAME',
+HASINIT        (select oid from pg_proc where proname='dinit_CFG_MODNAME'),
+NOINIT        null,
+        null,
+ISSNOWBALL        (select oid from pg_proc where proname='snb_lexize'),
+NOSNOWBALL        (select oid from pg_proc where proname='dlexize_CFG_MODNAME'),
+        CFG_COMMENT
+;
+
+
+END;


diff --git a/contrib/tsearch2/gistidx.c b/contrib/tsearch2/gistidx.c

new file mode 100644 (file)

index 0000000..5a34f74


--- /dev/null
+++ b/contrib/tsearch2/gistidx.c
@@ -0,0 +1,686 @@
+#include "postgres.h"
+
+#include 
+
+#include "access/gist.h"
+#include "access/itup.h"
+#include "access/rtree.h"
+#include "utils/elog.h"
+#include "utils/palloc.h"
+#include "utils/array.h"
+#include "utils/builtins.h"
+#include "storage/bufpage.h"
+#include "access/tuptoaster.h"
+
+#include "tsvector.h"
+#include "query.h"
+#include "gistidx.h"
+#include "crc32.h"
+
+PG_FUNCTION_INFO_V1(gtsvector_in);
+Datum      gtsvector_in(PG_FUNCTION_ARGS);
+
+PG_FUNCTION_INFO_V1(gtsvector_out);
+Datum      gtsvector_out(PG_FUNCTION_ARGS);
+
+PG_FUNCTION_INFO_V1(gtsvector_compress);
+Datum      gtsvector_compress(PG_FUNCTION_ARGS);
+
+PG_FUNCTION_INFO_V1(gtsvector_decompress);
+Datum      gtsvector_decompress(PG_FUNCTION_ARGS);
+
+PG_FUNCTION_INFO_V1(gtsvector_consistent);
+Datum      gtsvector_consistent(PG_FUNCTION_ARGS);
+
+PG_FUNCTION_INFO_V1(gtsvector_union);
+Datum      gtsvector_union(PG_FUNCTION_ARGS);
+
+PG_FUNCTION_INFO_V1(gtsvector_same);
+Datum      gtsvector_same(PG_FUNCTION_ARGS);
+
+PG_FUNCTION_INFO_V1(gtsvector_penalty);
+Datum      gtsvector_penalty(PG_FUNCTION_ARGS);
+
+PG_FUNCTION_INFO_V1(gtsvector_picksplit);
+Datum      gtsvector_picksplit(PG_FUNCTION_ARGS);
+
+#define GETENTRY(vec,pos) ((GISTTYPE *) DatumGetPointer(((GISTENTRY *) VARDATA(vec))[(pos)].key))
+#define SUMBIT(val) (       \
+   GETBITBYTE(val,0) + \
+   GETBITBYTE(val,1) + \
+   GETBITBYTE(val,2) + \
+   GETBITBYTE(val,3) + \
+   GETBITBYTE(val,4) + \
+   GETBITBYTE(val,5) + \
+   GETBITBYTE(val,6) + \
+   GETBITBYTE(val,7)   \
+)
+
+
+Datum
+gtsvector_in(PG_FUNCTION_ARGS)
+{
+   elog(ERROR, "Not implemented");
+   PG_RETURN_DATUM(0);
+}
+
+Datum
+gtsvector_out(PG_FUNCTION_ARGS)
+{
+   elog(ERROR, "Not implemented");
+   PG_RETURN_DATUM(0);
+}
+
+static int
+compareint(const void *a, const void *b)
+{
+   if (*((int4 *) a) == *((int4 *) b))
+       return 0;
+   return (*((int4 *) a) > *((int4 *) b)) ? 1 : -1;
+}
+
+static int
+uniqueint(int4 *a, int4 l)
+{
+   int4       *ptr,
+              *res;
+
+   if (l == 1)
+       return l;
+
+   ptr = res = a;
+
+   qsort((void *) a, l, sizeof(int4), compareint);
+
+   while (ptr - a < l)
+       if (*ptr != *res)
+           *(++res) = *ptr++;
+       else
+           ptr++;
+   return res + 1 - a;
+}
+
+static void
+makesign(BITVECP sign, GISTTYPE * a)
+{
+   int4        k,
+               len = ARRNELEM(a);
+   int4       *ptr = GETARR(a);
+
+   MemSet((void *) sign, 0, sizeof(BITVEC));
+   for (k = 0; k < len; k++)
+       HASH(sign, ptr[k]);
+}
+
+Datum
+gtsvector_compress(PG_FUNCTION_ARGS)
+{
+   GISTENTRY  *entry = (GISTENTRY *) PG_GETARG_POINTER(0);
+   GISTENTRY  *retval = entry;
+
+   if (entry->leafkey)
+   {                           /* tsvector */
+       GISTTYPE   *res;
+       tsvector       *toastedval = (tsvector *) DatumGetPointer(entry->key);
+       tsvector       *val = (tsvector *) DatumGetPointer(PG_DETOAST_DATUM(entry->key));
+       int4        len;
+       int4       *arr;
+       WordEntry  *ptr = ARRPTR(val);
+       char       *words = STRPTR(val);
+
+       len = CALCGTSIZE(ARRKEY, val->size);
+       res = (GISTTYPE *) palloc(len);
+       res->len = len;
+       res->flag = ARRKEY;
+       arr = GETARR(res);
+       len = val->size;
+       while (len--)
+       {
+           *arr = crc32_sz((uint8 *) &words[ptr->pos], ptr->len);
+           arr++;
+           ptr++;
+       }
+
+       len = uniqueint(GETARR(res), val->size);
+       if (len != val->size)
+       {
+           /*
+            * there is a collision of hash-function; len is always less
+            * than val->size
+            */
+           len = CALCGTSIZE(ARRKEY, len);
+           res = (GISTTYPE *) repalloc((void *) res, len);
+           res->len = len;
+       }
+       if (val != toastedval)
+           pfree(val);
+
+       /* make signature, if array is too long */
+       if (res->len > TOAST_INDEX_TARGET)
+       {
+           GISTTYPE   *ressign;
+
+           len = CALCGTSIZE(SIGNKEY, 0);
+           ressign = (GISTTYPE *) palloc(len);
+           ressign->len = len;
+           ressign->flag = SIGNKEY;
+           makesign(GETSIGN(ressign), res);
+           pfree(res);
+           res = ressign;
+       }
+
+       retval = (GISTENTRY *) palloc(sizeof(GISTENTRY));
+       gistentryinit(*retval, PointerGetDatum(res),
+                     entry->rel, entry->page,
+                     entry->offset, res->len, FALSE);
+   }
+   else if (ISSIGNKEY(DatumGetPointer(entry->key)) &&
+            !ISALLTRUE(DatumGetPointer(entry->key)))
+   {
+       int4        i,
+                   len;
+       GISTTYPE   *res;
+       BITVECP     sign = GETSIGN(DatumGetPointer(entry->key));
+
+       LOOPBYTE(
+                if ((sign[i] & 0xff) != 0xff)
+                PG_RETURN_POINTER(retval);
+       );
+
+       len = CALCGTSIZE(SIGNKEY | ALLISTRUE, 0);
+       res = (GISTTYPE *) palloc(len);
+       res->len = len;
+       res->flag = SIGNKEY | ALLISTRUE;
+
+       retval = (GISTENTRY *) palloc(sizeof(GISTENTRY));
+       gistentryinit(*retval, PointerGetDatum(res),
+                     entry->rel, entry->page,
+                     entry->offset, res->len, FALSE);
+   }
+   PG_RETURN_POINTER(retval);
+}
+
+Datum
+gtsvector_decompress(PG_FUNCTION_ARGS)
+{
+   GISTENTRY  *entry = (GISTENTRY *) PG_GETARG_POINTER(0);
+   GISTTYPE   *key = (GISTTYPE *) DatumGetPointer(PG_DETOAST_DATUM(entry->key));
+
+   if (key != (GISTTYPE *) DatumGetPointer(entry->key))
+   {
+       GISTENTRY  *retval = (GISTENTRY *) palloc(sizeof(GISTENTRY));
+
+       gistentryinit(*retval, PointerGetDatum(key),
+                     entry->rel, entry->page,
+                     entry->offset, key->len, FALSE);
+
+       PG_RETURN_POINTER(retval);
+   }
+
+   PG_RETURN_POINTER(entry);
+}
+
+typedef struct
+{
+   int4       *arrb;
+   int4       *arre;
+}  CHKVAL;
+
+/*
+ * is there value 'val' in array or not ?
+ */
+static bool
+checkcondition_arr(void *checkval, ITEM * val)
+{
+   int4       *StopLow = ((CHKVAL *) checkval)->arrb;
+   int4       *StopHigh = ((CHKVAL *) checkval)->arre;
+   int4       *StopMiddle;
+
+   /* Loop invariant: StopLow <= val < StopHigh */
+
+   while (StopLow < StopHigh)
+   {
+       StopMiddle = StopLow + (StopHigh - StopLow) / 2;
+       if (*StopMiddle == val->val)
+           return (true);
+       else if (*StopMiddle < val->val)
+           StopLow = StopMiddle + 1;
+       else
+           StopHigh = StopMiddle;
+   }
+
+   return (false);
+}
+
+static bool
+checkcondition_bit(void *checkval, ITEM * val)
+{
+   return GETBIT(checkval, HASHVAL(val->val));
+}
+
+Datum
+gtsvector_consistent(PG_FUNCTION_ARGS)
+{
+   QUERYTYPE  *query = (QUERYTYPE *) PG_GETARG_POINTER(1);
+   GISTTYPE   *key = (GISTTYPE *) DatumGetPointer(
+                               ((GISTENTRY *) PG_GETARG_POINTER(0))->key
+   );
+
+   if (!query->size)
+       PG_RETURN_BOOL(false);
+
+   if (ISSIGNKEY(key))
+   {
+       if (ISALLTRUE(key))
+           PG_RETURN_BOOL(true);
+
+       PG_RETURN_BOOL(TS_execute(
+                              GETQUERY(query),
+                              (void *) GETSIGN(key), false,
+                              checkcondition_bit
+                              ));
+   }
+   else
+   {                           /* only leaf pages */
+       CHKVAL      chkval;
+
+       chkval.arrb = GETARR(key);
+       chkval.arre = chkval.arrb + ARRNELEM(key);
+       PG_RETURN_BOOL(TS_execute(
+                              GETQUERY(query),
+                              (void *) &chkval, true,
+                              checkcondition_arr
+                              ));
+   }
+}
+
+static int4
+unionkey(BITVECP sbase, GISTTYPE * add)
+{
+   int4        i;
+
+   if (ISSIGNKEY(add))
+   {
+       BITVECP     sadd = GETSIGN(add);
+
+       if (ISALLTRUE(add))
+           return 1;
+
+       LOOPBYTE(
+                sbase[i] |= sadd[i];
+       );
+   }
+   else
+   {
+       int4       *ptr = GETARR(add);
+
+       for (i = 0; i < ARRNELEM(add); i++)
+           HASH(sbase, ptr[i]);
+   }
+   return 0;
+}
+
+
+Datum
+gtsvector_union(PG_FUNCTION_ARGS)
+{
+   bytea      *entryvec = (bytea *) PG_GETARG_POINTER(0);
+   int        *size = (int *) PG_GETARG_POINTER(1);
+   BITVEC      base;
+   int4        len = (VARSIZE(entryvec) - VARHDRSZ) / sizeof(GISTENTRY);
+   int4        i;
+   int4        flag = 0;
+   GISTTYPE   *result;
+
+   MemSet((void *) base, 0, sizeof(BITVEC));
+   for (i = 0; i < len; i++)
+   {
+       if (unionkey(base, GETENTRY(entryvec, i)))
+       {
+           flag = ALLISTRUE;
+           break;
+       }
+   }
+
+   flag |= SIGNKEY;
+   len = CALCGTSIZE(flag, 0);
+   result = (GISTTYPE *) palloc(len);
+   *size = result->len = len;
+   result->flag = flag;
+   if (!ISALLTRUE(result))
+       memcpy((void *) GETSIGN(result), (void *) base, sizeof(BITVEC));
+
+   PG_RETURN_POINTER(result);
+}
+
+Datum
+gtsvector_same(PG_FUNCTION_ARGS)
+{
+   GISTTYPE   *a = (GISTTYPE *) PG_GETARG_POINTER(0);
+   GISTTYPE   *b = (GISTTYPE *) PG_GETARG_POINTER(1);
+   bool       *result = (bool *) PG_GETARG_POINTER(2);
+
+   if (ISSIGNKEY(a))
+   {                           /* then b also ISSIGNKEY */
+       if (ISALLTRUE(a) && ISALLTRUE(b))
+           *result = true;
+       else if (ISALLTRUE(a))
+           *result = false;
+       else if (ISALLTRUE(b))
+           *result = false;
+       else
+       {
+           int4        i;
+           BITVECP     sa = GETSIGN(a),
+                       sb = GETSIGN(b);
+
+           *result = true;
+           LOOPBYTE(
+                    if (sa[i] != sb[i])
+                    {
+               *result = false;
+               break;
+           }
+           );
+       }
+   }
+   else
+   {                           /* a and b ISARRKEY */
+       int4        lena = ARRNELEM(a),
+                   lenb = ARRNELEM(b);
+
+       if (lena != lenb)
+           *result = false;
+       else
+       {
+           int4       *ptra = GETARR(a),
+                      *ptrb = GETARR(b);
+           int4        i;
+
+           *result = true;
+           for (i = 0; i < lena; i++)
+               if (ptra[i] != ptrb[i])
+               {
+                   *result = false;
+                   break;
+               }
+       }
+   }
+
+   PG_RETURN_POINTER(result);
+}
+
+static int4
+sizebitvec(BITVECP sign)
+{
+   int4        size = 0,
+               i;
+
+   LOOPBYTE(
+       size += SUMBIT(*(char *) sign);
+       sign = (BITVECP) (((char *) sign) + 1);
+   );
+   return size;
+}
+
+static int
+hemdistsign(BITVECP  a, BITVECP b) {
+   int i,dist=0;
+
+   LOOPBIT(
+       if ( GETBIT(a,i) != GETBIT(b,i) )
+           dist++;
+   );
+   return dist;
+}
+
+static int
+hemdist(GISTTYPE   *a, GISTTYPE   *b) {
+   if ( ISALLTRUE(a) ) {
+       if (ISALLTRUE(b))
+           return 0;
+       else
+           return SIGLENBIT-sizebitvec(GETSIGN(b));
+   } else if (ISALLTRUE(b))
+       return SIGLENBIT-sizebitvec(GETSIGN(a));
+
+   return hemdistsign( GETSIGN(a), GETSIGN(b) );
+}
+
+Datum
+gtsvector_penalty(PG_FUNCTION_ARGS)
+{
+   GISTENTRY  *origentry = (GISTENTRY *) PG_GETARG_POINTER(0); /* always ISSIGNKEY */
+   GISTENTRY  *newentry = (GISTENTRY *) PG_GETARG_POINTER(1);
+   float      *penalty = (float *) PG_GETARG_POINTER(2);
+   GISTTYPE   *origval = (GISTTYPE *) DatumGetPointer(origentry->key);
+   GISTTYPE   *newval = (GISTTYPE *) DatumGetPointer(newentry->key);
+   BITVECP     orig = GETSIGN(origval);
+
+   *penalty = 0.0;
+
+   if (ISARRKEY(newval)) {
+       BITVEC sign;
+       makesign(sign, newval);
+
+       if ( ISALLTRUE(origval) ) 
+           *penalty=((float)(SIGLENBIT-sizebitvec(sign)))/(float)(SIGLENBIT+1);
+       else 
+           *penalty=hemdistsign(sign,orig);
+   } else {
+       *penalty=hemdist(origval,newval);
+   }
+   PG_RETURN_POINTER(penalty);
+}
+
+typedef struct
+{
+   bool        allistrue;
+   BITVEC      sign;
+}  CACHESIGN;
+
+static void
+fillcache(CACHESIGN * item, GISTTYPE * key)
+{
+   item->allistrue = false;
+   if (ISARRKEY(key))
+       makesign(item->sign, key);
+   else if (ISALLTRUE(key))
+       item->allistrue = true;
+   else
+       memcpy((void *) item->sign, (void *) GETSIGN(key), sizeof(BITVEC));
+}
+
+#define WISH_F(a,b,c) (double)( -(double)(((a)-(b))*((a)-(b))*((a)-(b)))*(c) )
+typedef struct
+{
+   OffsetNumber pos;
+   int4        cost;
+} SPLITCOST;
+
+static int
+comparecost(const void *a, const void *b)
+{
+   if (((SPLITCOST *) a)->cost == ((SPLITCOST *) b)->cost)
+       return 0;
+   else
+       return (((SPLITCOST *) a)->cost > ((SPLITCOST *) b)->cost) ? 1 : -1;
+}
+
+
+static int
+hemdistcache(CACHESIGN   *a, CACHESIGN   *b) {
+   if ( a->allistrue ) {
+       if (b->allistrue)
+           return 0;
+       else
+           return SIGLENBIT-sizebitvec(b->sign);
+   } else if (b->allistrue)
+       return SIGLENBIT-sizebitvec(a->sign);
+
+   return hemdistsign( a->sign, b->sign );
+}
+
+Datum
+gtsvector_picksplit(PG_FUNCTION_ARGS)
+{
+   bytea      *entryvec = (bytea *) PG_GETARG_POINTER(0);
+   GIST_SPLITVEC *v = (GIST_SPLITVEC *) PG_GETARG_POINTER(1);
+   OffsetNumber k,
+               j;
+   GISTTYPE   *datum_l,
+              *datum_r;
+   BITVECP     union_l,
+               union_r;
+   int4        size_alpha,
+               size_beta;
+   int4        size_waste,
+               waste = -1;
+   int4        nbytes;
+   OffsetNumber seed_1 = 0,
+               seed_2 = 0;
+   OffsetNumber *left,
+              *right;
+   OffsetNumber maxoff;
+   BITVECP     ptr;
+   int         i;
+   CACHESIGN  *cache;
+   SPLITCOST  *costvector;
+
+   maxoff = ((VARSIZE(entryvec) - VARHDRSZ) / sizeof(GISTENTRY)) - 2;
+   nbytes = (maxoff + 2) * sizeof(OffsetNumber);
+   v->spl_left = (OffsetNumber *) palloc(nbytes);
+   v->spl_right = (OffsetNumber *) palloc(nbytes);
+
+   cache = (CACHESIGN *) palloc(sizeof(CACHESIGN) * (maxoff + 2));
+   fillcache(&cache[FirstOffsetNumber], GETENTRY(entryvec, FirstOffsetNumber));
+
+   for (k = FirstOffsetNumber; k < maxoff; k = OffsetNumberNext(k)) {
+       for (j = OffsetNumberNext(k); j <= maxoff; j = OffsetNumberNext(j)) {
+           if (k == FirstOffsetNumber)
+               fillcache(&cache[j], GETENTRY(entryvec, j));
+
+           size_waste=hemdistcache(&(cache[j]),&(cache[k]));
+           if (size_waste > waste) {
+               waste = size_waste;
+               seed_1 = k;
+               seed_2 = j;
+           }
+       }
+   }
+
+   left = v->spl_left;
+   v->spl_nleft = 0;
+   right = v->spl_right;
+   v->spl_nright = 0;
+
+   if (seed_1 == 0 || seed_2 == 0) {
+       seed_1 = 1;
+       seed_2 = 2;
+   }
+
+   /* form initial .. */
+   if (cache[seed_1].allistrue) {
+       datum_l = (GISTTYPE *) palloc(CALCGTSIZE(SIGNKEY | ALLISTRUE, 0));
+       datum_l->len = CALCGTSIZE(SIGNKEY | ALLISTRUE, 0);
+       datum_l->flag = SIGNKEY | ALLISTRUE;
+   } else {
+       datum_l = (GISTTYPE *) palloc(CALCGTSIZE(SIGNKEY, 0));
+       datum_l->len = CALCGTSIZE(SIGNKEY, 0);
+       datum_l->flag = SIGNKEY;
+       memcpy((void *) GETSIGN(datum_l), (void *) cache[seed_1].sign, sizeof(BITVEC));
+   }
+   if (cache[seed_2].allistrue) {
+       datum_r = (GISTTYPE *) palloc(CALCGTSIZE(SIGNKEY | ALLISTRUE, 0));
+       datum_r->len = CALCGTSIZE(SIGNKEY | ALLISTRUE, 0);
+       datum_r->flag = SIGNKEY | ALLISTRUE;
+   } else {
+       datum_r = (GISTTYPE *) palloc(CALCGTSIZE(SIGNKEY, 0));
+       datum_r->len = CALCGTSIZE(SIGNKEY, 0);
+       datum_r->flag = SIGNKEY;
+       memcpy((void *) GETSIGN(datum_r), (void *) cache[seed_2].sign, sizeof(BITVEC));
+   }
+
+   union_l=GETSIGN(datum_l);
+   union_r=GETSIGN(datum_r);
+   maxoff = OffsetNumberNext(maxoff);
+   fillcache(&cache[maxoff], GETENTRY(entryvec, maxoff));
+   /* sort before ... */
+   costvector = (SPLITCOST *) palloc(sizeof(SPLITCOST) * maxoff);
+   for (j = FirstOffsetNumber; j <= maxoff; j = OffsetNumberNext(j)) {
+       costvector[j - 1].pos = j;
+       size_alpha = hemdistcache(&(cache[seed_1]), &(cache[j]));
+       size_beta  = hemdistcache(&(cache[seed_2]), &(cache[j]));
+       costvector[j - 1].cost = abs(size_alpha - size_beta);
+   }
+   qsort((void *) costvector, maxoff, sizeof(SPLITCOST), comparecost);
+
+   for (k = 0; k < maxoff; k++) {
+       j = costvector[k].pos;
+       if (j == seed_1) {
+           *left++ = j;
+           v->spl_nleft++;
+           continue;
+       } else if (j == seed_2) {
+           *right++ = j;
+           v->spl_nright++;
+           continue;
+       }
+
+       if (ISALLTRUE(datum_l) || cache[j].allistrue) {
+           if ( ISALLTRUE(datum_l) && cache[j].allistrue )
+               size_alpha=0;
+           else
+               size_alpha = SIGLENBIT-sizebitvec(  
+                   ( cache[j].allistrue ) ? GETSIGN(datum_l) : GETSIGN(cache[j].sign)  
+               );
+       } else {
+           size_alpha=hemdistsign(cache[j].sign,GETSIGN(datum_l));
+       }
+
+       if (ISALLTRUE(datum_r) || cache[j].allistrue) {
+           if ( ISALLTRUE(datum_r) && cache[j].allistrue )
+               size_beta=0;
+           else
+               size_beta = SIGLENBIT-sizebitvec(  
+                   ( cache[j].allistrue ) ? GETSIGN(datum_r) : GETSIGN(cache[j].sign)  
+               );
+       } else {
+           size_beta=hemdistsign(cache[j].sign,GETSIGN(datum_r));
+       }
+
+       if (size_alpha  < size_beta + WISH_F(v->spl_nleft, v->spl_nright, 0.1)) {
+           if (ISALLTRUE(datum_l) || cache[j].allistrue) {
+               if (! ISALLTRUE(datum_l) )
+                   MemSet((void *) GETSIGN(datum_l), 0xff, sizeof(BITVEC));
+           } else {
+               ptr=cache[j].sign;
+               LOOPBYTE(
+                   union_l[i] |= ptr[i];
+               );
+           }
+           *left++ = j;
+           v->spl_nleft++;
+       } else {
+           if (ISALLTRUE(datum_r) || cache[j].allistrue) {
+               if (! ISALLTRUE(datum_r) )
+                   MemSet((void *) GETSIGN(datum_r), 0xff, sizeof(BITVEC));
+           } else {
+               ptr=cache[j].sign;
+               LOOPBYTE(
+                   union_r[i] |= ptr[i];
+               );
+           }
+           *right++ = j;
+           v->spl_nright++;
+       }
+   }
+
+   *right = *left = FirstOffsetNumber;
+   pfree(costvector);
+   pfree(cache);
+   v->spl_ldatum = PointerGetDatum(datum_l);
+   v->spl_rdatum = PointerGetDatum(datum_r);
+
+   PG_RETURN_POINTER(v);
+}


diff --git a/contrib/tsearch2/gistidx.h b/contrib/tsearch2/gistidx.h

new file mode 100644 (file)

index 0000000..d081c74


--- /dev/null
+++ b/contrib/tsearch2/gistidx.h
@@ -0,0 +1,67 @@
+#ifndef __GISTIDX_H__
+#define __GISTIDX_H__
+
+/*
+#define GISTIDX_DEBUG
+*/
+
+/*
+ * signature defines
+ */
+
+#define BITBYTE 8
+#define SIGLENINT  63          /* >121 => key will toast, so it will not
+                                * work !!! */
+#define SIGLEN ( sizeof(int4)*SIGLENINT )
+#define SIGLENBIT (SIGLEN*BITBYTE)
+
+typedef char BITVEC[SIGLEN];
+typedef char *BITVECP;
+
+#define LOOPBYTE(a) \
+       for(i=0;i
+               a;\
+       }
+#define LOOPBIT(a) \
+               for(i=0;i
+                               a;\
+               }
+
+#define GETBYTE(x,i) ( *( (BITVECP)(x) + (int)( (i) / BITBYTE ) ) )
+#define GETBITBYTE(x,i) ( ((char)(x)) >> i & 0x01 )
+#define CLRBIT(x,i)   GETBYTE(x,i) &= ~( 0x01 << ( (i) % BITBYTE ) )
+#define SETBIT(x,i)   GETBYTE(x,i) |=  ( 0x01 << ( (i) % BITBYTE ) )
+#define GETBIT(x,i) ( (GETBYTE(x,i) >> ( (i) % BITBYTE )) & 0x01 )
+
+#define abs(a)         ((a) <  (0) ? -(a) : (a))
+#define min(a,b)           ((a) <  (b) ? (a) : (b))
+#define HASHVAL(val) (((unsigned int)(val)) % SIGLENBIT)
+#define HASH(sign, val) SETBIT((sign), HASHVAL(val))
+
+
+/*
+ * type of index key
+ */
+typedef struct
+{
+   int4        len;
+   int4        flag;
+   char        data[1];
+}  GISTTYPE;
+
+#define ARRKEY     0x01
+#define SIGNKEY        0x02
+#define ALLISTRUE  0x04
+
+#define ISARRKEY(x) ( ((GISTTYPE*)x)->flag & ARRKEY )
+#define ISSIGNKEY(x)   ( ((GISTTYPE*)x)->flag & SIGNKEY )
+#define ISALLTRUE(x)   ( ((GISTTYPE*)x)->flag & ALLISTRUE )
+
+#define GTHDRSIZE  ( sizeof(int4)*2  )
+#define CALCGTSIZE(flag, len) ( GTHDRSIZE + ( ( (flag) & ARRKEY ) ? ((len)*sizeof(int4)) : (((flag) & ALLISTRUE) ? 0 : SIGLEN) ) )
+
+#define GETSIGN(x) ( (BITVECP)( (char*)x+GTHDRSIZE ) )
+#define GETARR(x)  ( (int4*)( (char*)x+GTHDRSIZE ) )
+#define ARRNELEM(x) ( ( ((GISTTYPE*)x)->len - GTHDRSIZE )/sizeof(int4) )
+
+#endif


diff --git a/contrib/tsearch2/ispell/spell.c b/contrib/tsearch2/ispell/spell.c

new file mode 100644 (file)

index 0000000..3cf2cc8


--- /dev/null
+++ b/contrib/tsearch2/ispell/spell.c
@@ -0,0 +1,520 @@
+#include 
+#include 
+#include 
+#include 
+
+#include "postgres.h"
+
+#include "spell.h"
+
+#define MAXNORMLEN 56
+
+#define STRNCASECMP(x,y)        (strncasecmp(x,y,strlen(y)))
+
+static int cmpspell(const void *s1,const void *s2){
+   return(strcmp(((const SPELL*)s1)->word,((const SPELL*)s2)->word));
+}
+
+static void 
+strlower( char * str ) {
+   unsigned char *ptr = (unsigned char *)str;
+   while ( *ptr ) {
+       *ptr = tolower( *ptr );
+       ptr++;
+   }
+}
+
+/* backward string compaire for suffix tree operations */
+static int 
+strbcmp(const char *s1, const char *s2) { 
+   int l1 = strlen(s1)-1, l2 = strlen(s2)-1;
+   while (l1 >= 0 && l2 >= 0) {
+       if (s1[l1] < s2[l2]) return -1;
+       if (s1[l1] > s2[l2]) return 1;
+       l1--; l2--;
+   }
+   if (l1 < l2) return -1;
+   if (l1 > l2) return 1;
+
+   return 0;
+}
+static int 
+strbncmp(const char *s1, const char *s2, size_t count) { 
+   int l1 = strlen(s1) - 1, l2 = strlen(s2) - 1, l = count;
+   while (l1 >= 0 && l2 >= 0 && l > 0) {
+       if (s1[l1] < s2[l2]) return -1;
+       if (s1[l1] > s2[l2]) return 1;
+       l1--;
+       l2--;
+       l--;
+   }
+   if (l == 0) return 0;
+   if (l1 < l2) return -1;
+   if (l1 > l2) return 1;
+   return 0;
+}
+
+static int 
+cmpaffix(const void *s1,const void *s2){
+   if (((const AFFIX*)s1)->type < ((const AFFIX*)s2)->type) return -1;
+   if (((const AFFIX*)s1)->type > ((const AFFIX*)s2)->type) return 1;
+   if (((const AFFIX*)s1)->type == 'p')
+       return(strcmp(((const AFFIX*)s1)->repl,((const AFFIX*)s2)->repl));
+   else 
+       return(strbcmp(((const AFFIX*)s1)->repl,((const AFFIX*)s2)->repl));
+}
+
+int 
+AddSpell(IspellDict * Conf,const char * word,const char *flag){
+   if(Conf->nspell>=Conf->mspell){
+       if(Conf->mspell){
+           Conf->mspell+=1024*20;
+           Conf->Spell=(SPELL *)realloc(Conf->Spell,Conf->mspell*sizeof(SPELL));
+       }else{
+           Conf->mspell=1024*20;
+           Conf->Spell=(SPELL *)malloc(Conf->mspell*sizeof(SPELL));
+       }
+       if ( Conf->Spell == NULL )
+           elog(ERROR,"No memory for AddSpell"); 
+   }
+   Conf->Spell[Conf->nspell].word=strdup(word);
+   if ( !Conf->Spell[Conf->nspell].word ) 
+       elog(ERROR,"No memory for AddSpell");
+   strncpy(Conf->Spell[Conf->nspell].flag,flag,10);
+   Conf->nspell++;
+   return(0);
+}
+
+
+int 
+ImportDictionary(IspellDict * Conf,const char *filename){
+   unsigned char str[BUFSIZ];  
+   FILE *dict;
+
+   if(!(dict=fopen(filename,"r")))return(1);
+   while(fgets(str,sizeof(str),dict)){
+       unsigned char *s;
+       const unsigned char *flag;
+
+           flag = NULL;
+       if((s=strchr(str,'/'))){
+           *s=0;
+           s++;flag=s;
+           while(*s){
+               if (((*s>='A')&&(*s<='Z'))||((*s>='a')&&(*s<='z')))
+                   s++;
+               else {
+                   *s=0;
+                   break;
+               }
+           }
+       }else{
+           flag="";
+       }
+       strlower(str);
+       /* Dont load words if first letter is not required */
+       /* It allows to optimize loading at  search time   */
+       s=str;
+       while(*s){
+           if(*s=='\r')*s=0;
+           if(*s=='\n')*s=0;
+           s++;
+       }
+       AddSpell(Conf,str,flag);
+   }
+   fclose(dict);
+   return(0);
+}
+
+
+static SPELL * 
+FindWord(IspellDict * Conf, const char *word, int affixflag) {
+   int l,c,r,resc,resl,resr, i;
+
+   i = (int)(*word) & 255;
+   l = Conf->SpellTree.Left[i];
+   r = Conf->SpellTree.Right[i];
+   if (l == -1) return (NULL);
+   while(l<=r){
+       c = (l + r) >> 1;
+       resc = strcmp(Conf->Spell[c].word, word);
+       if( (resc == 0) && 
+           ((affixflag == 0) || (strchr(Conf->Spell[c].flag, affixflag) != NULL)) ) {
+           return(&Conf->Spell[c]);
+       }
+       resl = strcmp(Conf->Spell[l].word, word);
+       if( (resl == 0) && 
+           ((affixflag == 0) || (strchr(Conf->Spell[l].flag, affixflag) != NULL)) ) {
+           return(&Conf->Spell[l]);
+       }
+       resr = strcmp(Conf->Spell[r].word, word);
+       if( (resr == 0) && 
+           ((affixflag == 0) || (strchr(Conf->Spell[r].flag, affixflag) != NULL)) ) {
+           return(&Conf->Spell[r]);
+       }
+       if(resc < 0){
+           l = c + 1;
+           r--;
+       } else if(resc > 0){
+           r = c - 1;
+           l++;
+       } else {
+           l++;
+           r--;
+       }
+   }
+   return(NULL);
+}
+
+int 
+AddAffix(IspellDict * Conf,int flag,const char *mask,const char *find,const char *repl,int type) {
+   if(Conf->naffixes>=Conf->maffixes){
+       if(Conf->maffixes){
+           Conf->maffixes+=16;
+           Conf->Affix = (AFFIX*)realloc((void*)Conf->Affix,Conf->maffixes*sizeof(AFFIX));
+       }else{
+           Conf->maffixes=16;
+           Conf->Affix = (AFFIX*)malloc(Conf->maffixes * sizeof(AFFIX));
+       }
+       if ( Conf->Affix == NULL ) 
+           elog(ERROR,"No memory for AddAffix");
+   }
+   if (type=='s') {
+       sprintf(Conf->Affix[Conf->naffixes].mask,"%s$",mask);
+   } else {
+       sprintf(Conf->Affix[Conf->naffixes].mask,"^%s",mask);
+   }
+   Conf->Affix[Conf->naffixes].compile = 1;
+   Conf->Affix[Conf->naffixes].flag=flag;
+   Conf->Affix[Conf->naffixes].type=type;
+   
+   strcpy(Conf->Affix[Conf->naffixes].find,find);
+   strcpy(Conf->Affix[Conf->naffixes].repl,repl);
+   Conf->Affix[Conf->naffixes].replen=strlen(repl);
+   Conf->naffixes++;
+   return(0);
+}
+
+static char * 
+remove_spaces(char *dist,char *src){
+char *d,*s;
+   d=dist;
+   s=src;
+   while(*s){
+       if(*s!=' '&&*s!='-'&&*s!='\t'){
+           *d=*s;
+           d++;
+       }
+       s++;
+   }
+   *d=0;
+   return(dist);
+}
+
+
+int 
+ImportAffixes(IspellDict * Conf,const char *filename){
+   unsigned char str[BUFSIZ];
+   unsigned char flag=0;
+   unsigned char mask[BUFSIZ]="";
+   unsigned char find[BUFSIZ]="";
+   unsigned char repl[BUFSIZ]="";
+   unsigned char *s;
+   int i;
+   int suffixes=0;
+   int prefixes=0;
+   FILE *affix;
+
+   if(!(affix=fopen(filename,"r")))
+       return(1);
+
+   while(fgets(str,sizeof(str),affix)){
+       if(!STRNCASECMP(str,"suffixes")){
+           suffixes=1;
+           prefixes=0;
+           continue;
+       }
+       if(!STRNCASECMP(str,"prefixes")){
+           suffixes=0;
+           prefixes=1;
+           continue;
+       }
+       if(!STRNCASECMP(str,"flag ")){
+           s=str+5;
+           while(strchr("* ",*s))
+               s++;
+           flag=*s;
+           continue;
+       }
+       if((!suffixes)&&(!prefixes))continue;
+       if((s=strchr(str,'#')))*s=0;
+       if(!*str)continue;
+       strlower(str);
+       strcpy(mask,"");
+       strcpy(find,"");
+       strcpy(repl,"");
+       i=sscanf(str,"%[^>\n]>%[^,\n],%[^\n]",mask,find,repl);
+       remove_spaces(str,repl);strcpy(repl,str);
+       remove_spaces(str,find);strcpy(find,str);
+       remove_spaces(str,mask);strcpy(mask,str);
+       switch(i){
+           case 3:
+               break;
+           case 2:
+               if(*find != '\0'){
+                   strcpy(repl,find);
+                   strcpy(find,"");
+               }
+               break;
+           default:
+               continue;
+       }
+       
+       AddAffix(Conf,(int)flag,mask,find,repl,suffixes?'s':'p');
+       
+   }
+   fclose(affix);
+       
+   return(0);
+}
+
+void 
+SortDictionary(IspellDict * Conf){
+  int  CurLet = -1, Let;size_t i;
+
+        qsort((void*)Conf->Spell,Conf->nspell,sizeof(SPELL),cmpspell);
+
+   for(i = 0; i < 256 ; i++ )
+       Conf->SpellTree.Left[i] = -1;
+
+   for(i = 0; i < Conf->nspell; i++) {
+     Let = (int)(*(Conf->Spell[i].word)) & 255;
+     if (CurLet != Let) {
+       Conf->SpellTree.Left[Let] = i;
+       CurLet = Let;
+     }
+     Conf->SpellTree.Right[Let] = i;
+   }
+}
+
+void 
+SortAffixes(IspellDict * Conf) {
+  int   CurLetP = -1, CurLetS = -1, Let;
+  AFFIX *Affix; size_t i;
+  
+  if (Conf->naffixes > 1)
+    qsort((void*)Conf->Affix,Conf->naffixes,sizeof(AFFIX),cmpaffix);
+  for(i = 0; i < 256; i++) {
+      Conf->PrefixTree.Left[i] = Conf->PrefixTree.Right[i] = -1;
+      Conf->SuffixTree.Left[i] = Conf->SuffixTree.Right[i] = -1;
+  }
+
+  for(i = 0; i < Conf->naffixes; i++) {
+    Affix = &(((AFFIX*)Conf->Affix)[i]);
+    if(Affix->type == 'p') {
+      Let = (int)(*(Affix->repl)) & 255;
+      if (CurLetP != Let) {
+   Conf->PrefixTree.Left[Let] = i;
+   CurLetP = Let;
+      }
+      Conf->PrefixTree.Right[Let] = i;
+    } else {
+      Let = (Affix->replen) ? (int)(Affix->repl[Affix->replen-1]) & 255 : 0;
+      if (CurLetS != Let) {
+   Conf->SuffixTree.Left[Let] = i;
+   CurLetS = Let;
+      }
+      Conf->SuffixTree.Right[Let] = i;
+    }
+  }
+}
+
+static char * 
+CheckSuffix(const char *word, size_t len, AFFIX *Affix, int *res, IspellDict *Conf) {
+  regmatch_t subs[2]; /* workaround for apache&linux */
+  char newword[2*MAXNORMLEN] = "";
+  int err;
+  
+  *res = strbncmp(word, Affix->repl, Affix->replen);
+  if (*res < 0) {
+    return NULL;
+  }
+  if (*res > 0) {
+    return NULL;
+  }
+  strcpy(newword, word);
+  strcpy(newword+len-Affix->replen, Affix->find);
+
+  if (Affix->compile) {
+    err = regcomp(&(Affix->reg),Affix->mask,REG_EXTENDED|REG_ICASE|REG_NOSUB);
+    if(err){
+      /*regerror(err, &(Affix->reg), regerrstr, ERRSTRSIZE);*/
+      regfree(&(Affix->reg));
+      return(NULL);
+    }
+    Affix->compile = 0;
+  }
+  if(!(err=regexec(&(Affix->reg),newword,1,subs,0))){
+    if(FindWord(Conf, newword, Affix->flag))
+   return pstrdup(newword);    
+  }
+  return NULL;
+}
+
+#define NS 1
+#define MAX_NORM 512
+static int 
+CheckPrefix(const char *word, size_t len, AFFIX *Affix, IspellDict *Conf, int pi,
+       char **forms, char ***cur ) {
+  regmatch_t subs[NS*2];
+  char newword[2*MAXNORMLEN] = "";
+  int err, ls, res, lres;
+  size_t newlen;
+  AFFIX *CAffix = Conf->Affix;
+  
+  res = strncmp(word, Affix->repl, Affix->replen);
+  if (res != 0) {
+    return res;
+  }
+  strcpy(newword, Affix->find);
+  strcat(newword, word+Affix->replen);
+
+  if (Affix->compile) {
+    err = regcomp(&(Affix->reg),Affix->mask,REG_EXTENDED|REG_ICASE|REG_NOSUB);
+    if(err){
+      /*regerror(err, &(Affix->reg), regerrstr, ERRSTRSIZE);*/
+      regfree(&(Affix->reg));
+      return (0);
+    }
+    Affix->compile = 0;
+  }
+  if(!(err=regexec(&(Affix->reg),newword,1,subs,0))){
+    SPELL * curspell;
+
+    if((curspell=FindWord(Conf, newword, Affix->flag))){
+      if ((*cur - forms) < (MAX_NORM-1)) {
+   **cur =  pstrdup(newword);
+   (*cur)++; **cur = NULL;
+      }
+    } 
+    newlen = strlen(newword);
+    ls = Conf->SuffixTree.Left[pi];
+      if ( ls>=0 && ((*cur - forms) < (MAX_NORM-1)) ) {
+   **cur = CheckSuffix(newword, newlen, &CAffix[ls], &lres, Conf);
+   if (**cur) {
+     (*cur)++; **cur = NULL;
+   }
+      }
+  }
+  return 0;
+}
+
+
+char ** 
+NormalizeWord(IspellDict * Conf,char *word){
+/*regmatch_t subs[NS];*/
+size_t len;
+char ** forms;
+char **cur;
+AFFIX * Affix;
+int ri, pi, ipi, lp, rp, cp, ls, rs;
+int lres, rres, cres = 0;
+  SPELL *spell;
+
+   len=strlen(word);
+   if (len > MAXNORMLEN)
+       return(NULL);
+
+   strlower(word);
+
+   forms=(char **) palloc(MAX_NORM*sizeof(char **));
+   cur=forms;*cur=NULL;
+
+   ri = (int)(*word) & 255;
+   pi = (int)(word[strlen(word)-1]) & 255;
+   Affix=(AFFIX*)Conf->Affix;
+
+   /* Check that the word itself is normal form */
+   if((spell = FindWord(Conf, word, 0))){
+       *cur=pstrdup(word);
+       cur++;*cur=NULL;
+   }
+
+   /* Find all other NORMAL forms of the 'word' */
+
+   for (ipi = 0; ipi <= pi; ipi += pi) {
+
+       /* check prefix */
+       lp = Conf->PrefixTree.Left[ri];
+       rp = Conf->PrefixTree.Right[ri];
+       while (lp >= 0 && lp <= rp) {
+         cp = (lp + rp) >> 1;
+         cres = 0;
+         if ((cur - forms) < (MAX_NORM-1)) {
+       cres = CheckPrefix(word, len, &Affix[cp], Conf, ipi, forms, &cur);
+         }
+         if ((lp < cp) && ((cur - forms) < (MAX_NORM-1)) ) {
+       lres = CheckPrefix(word, len, &Affix[lp], Conf, ipi, forms, &cur);
+         }
+         if ( (rp > cp) && ((cur - forms) < (MAX_NORM-1)) ) {
+       rres = CheckPrefix(word, len, &Affix[rp], Conf, ipi, forms, &cur);
+         }
+         if (cres < 0) {
+       rp = cp - 1;
+       lp++;
+         } else if (cres > 0) {
+       lp = cp + 1;
+       rp--;
+         } else {
+       lp++;
+       rp--;
+         }
+       }
+
+       /* check suffix */
+       ls = Conf->SuffixTree.Left[ipi];
+       rs = Conf->SuffixTree.Right[ipi];
+       while (ls >= 0 && ls <= rs) {
+         if (  ((cur - forms) < (MAX_NORM-1)) ) {
+       *cur = CheckSuffix(word, len, &Affix[ls], &lres, Conf);
+       if (*cur) {
+         cur++; *cur = NULL;
+       }
+         }
+         if ( (rs > ls) && ((cur - forms) < (MAX_NORM-1)) ) {
+       *cur = CheckSuffix(word, len, &Affix[rs], &rres, Conf);
+       if (*cur) {
+         cur++; *cur = NULL;
+       }
+         }
+         ls++;
+         rs--;
+       } /* end while */
+     
+   } /* for ipi */
+
+   if(cur==forms){
+       pfree(forms);
+       return(NULL);
+   }
+   return(forms);
+}
+
+void 
+FreeIspell (IspellDict *Conf) {
+  int i;
+  AFFIX *Affix = (AFFIX *)Conf->Affix;
+
+  for (i = 0; i < Conf->naffixes; i++) {
+    if (Affix[i].compile == 0) {
+      regfree(&(Affix[i].reg));
+    }
+  }
+  for (i = 0; i < Conf->naffixes; i++) {
+   free( Conf->Spell[i].word );
+  }
+  free(Conf->Affix);
+  free(Conf->Spell);
+  memset( (void*)Conf, 0, sizeof(IspellDict) );
+  return;
+}


diff --git a/contrib/tsearch2/ispell/spell.h b/contrib/tsearch2/ispell/spell.h

new file mode 100644 (file)

index 0000000..3034ca6


--- /dev/null
+++ b/contrib/tsearch2/ispell/spell.h
@@ -0,0 +1,51 @@
+#ifndef __SPELL_H__
+#define __SPELL_H__
+
+#include 
+#include 
+
+typedef struct spell_struct {
+        char * word; 
+        char flag[10];
+} SPELL;
+
+typedef struct aff_struct {   
+        char flag;
+        char type;
+        char mask[33];
+        char find[16];
+        char repl[16];
+        regex_t reg;
+        size_t replen;
+        char compile;
+} AFFIX;
+
+typedef struct Tree_struct {
+        int Left[256], Right[256];
+} Tree_struct;
+
+typedef struct {
+   int maffixes;
+   int naffixes;
+   AFFIX * Affix;
+
+   int nspell;
+   int mspell;
+   SPELL   *Spell;
+   Tree_struct SpellTree;
+   Tree_struct PrefixTree;
+   Tree_struct SuffixTree;
+
+} IspellDict;
+
+char ** NormalizeWord(IspellDict * Conf,char *word);
+int ImportAffixes(IspellDict * Conf, const char *filename);
+int ImportDictionary(IspellDict * Conf,const char *filename);
+
+int  AddSpell(IspellDict * Conf,const char * word,const char *flag);
+int  AddAffix(IspellDict * Conf,int flag,const char *mask,const char *find,const char *repl,int type);
+void SortDictionary(IspellDict * Conf);
+void SortAffixes(IspellDict * Conf);
+void FreeIspell (IspellDict *Conf);
+
+#endif


diff --git a/contrib/tsearch2/prs_dcfg.c b/contrib/tsearch2/prs_dcfg.c

new file mode 100644 (file)

index 0000000..e4b0e8b


--- /dev/null
+++ b/contrib/tsearch2/prs_dcfg.c
@@ -0,0 +1,119 @@
+/* 
+ * Simple config parser 
+ * Teodor Sigaev 
+ */
+#include 
+#include 
+#include 
+
+#include "postgres.h"
+
+#include "dict.h"
+#include "common.h"
+
+#define CS_WAITKEY 0
+#define CS_INKEY   1
+#define CS_WAITEQ  2
+#define CS_WAITVALUE   3
+#define CS_INVALUE 4
+#define CS_IN2VALUE    5
+#define CS_WAITDELIM   6
+#define CS_INESC   7
+#define CS_IN2ESC  8
+
+static char *
+nstrdup(char *ptr, int len) {
+   char *res=palloc(len+1), *cptr;
+   memcpy(res,ptr,len);
+   res[len]='\0';
+   cptr = ptr = res;
+   while(*ptr) {
+       if ( *ptr == '\\' ) 
+           ptr++;
+       *cptr=*ptr; ptr++; cptr++;
+   }
+   *cptr='\0';
+
+   return res;
+}
+
+void
+parse_cfgdict(text *in, Map **m) {
+   Map *mptr;
+   char *ptr=VARDATA(in), *begin=NULL;
+   char num=0;
+   int state=CS_WAITKEY;
+
+   while( ptr-VARDATA(in) < VARSIZE(in) - VARHDRSZ ) {
+       if ( *ptr==',' ) num++;
+       ptr++;
+   }
+
+   *m=mptr=(Map*)palloc( sizeof(Map)*(num+2) );
+   memset(mptr, 0, sizeof(Map)*(num+2) );
+   ptr=VARDATA(in);
+   while( ptr-VARDATA(in) < VARSIZE(in) - VARHDRSZ ) {
+       if (state==CS_WAITKEY) {
+           if (isalpha(*ptr)) {
+               begin=ptr;
+               state=CS_INKEY;
+           } else if ( !isspace(*ptr) )
+               elog(ERROR,"Syntax error in position %d near '%c'", ptr-VARDATA(in), *ptr);
+       } else if (state==CS_INKEY) {
+           if ( isspace(*ptr) ) {
+               mptr->key=nstrdup(begin, ptr-begin);
+               state=CS_WAITEQ;
+           } else if ( *ptr=='=' ) {
+               mptr->key=nstrdup(begin, ptr-begin);
+               state=CS_WAITVALUE;
+           } else if ( !isalpha(*ptr) ) 
+               elog(ERROR,"Syntax error in position %d near '%c'", ptr-VARDATA(in), *ptr);
+       } else if ( state==CS_WAITEQ ) {
+           if ( *ptr=='=' )
+               state=CS_WAITVALUE;
+           else if ( !isspace(*ptr) )
+               elog(ERROR,"Syntax error in position %d near '%c'", ptr-VARDATA(in), *ptr);
+       } else if ( state==CS_WAITVALUE ) {
+           if ( *ptr=='"' ) {
+               begin=ptr+1;
+               state=CS_INVALUE;
+           } else if ( !isspace(*ptr) ) {
+               begin=ptr;
+               state=CS_IN2VALUE;
+           }
+       } else if ( state==CS_INVALUE ) {
+           if ( *ptr=='"' ) {
+               mptr->value = nstrdup(begin, ptr-begin);
+               mptr++;
+               state=CS_WAITDELIM;
+           } else if ( *ptr=='\\' )
+               state=CS_INESC;
+       } else if ( state==CS_IN2VALUE ) {
+           if ( isspace(*ptr) || *ptr==',' ) {
+               mptr->value = nstrdup(begin, ptr-begin);
+               mptr++;
+               state=( *ptr==',' ) ? CS_WAITKEY : CS_WAITDELIM;
+           } else if ( *ptr=='\\' )
+               state=CS_INESC;
+       } else if ( state==CS_WAITDELIM ) {
+           if ( *ptr==',' ) 
+               state=CS_WAITKEY; 
+           else if ( !isspace(*ptr) )
+               elog(ERROR,"Syntax error in position %d near '%c'", ptr-VARDATA(in), *ptr);
+       } else if ( state == CS_INESC ) {
+           state=CS_INVALUE;
+       } else if ( state == CS_IN2ESC ) {
+           state=CS_IN2VALUE;
+       } else 
+           elog(ERROR,"Bad parser state: %d at position %d near '%c'", state, ptr-VARDATA(in), *ptr);
+       ptr++;
+   }
+
+   if (state==CS_IN2VALUE) {
+       mptr->value = nstrdup(begin, ptr-begin);
+       mptr++;
+   } else if ( !(state==CS_WAITDELIM || state==CS_WAITKEY) ) 
+       elog(ERROR,"Unexpected end of line");
+}
+
+


diff --git a/contrib/tsearch2/query.c b/contrib/tsearch2/query.c

new file mode 100644 (file)

index 0000000..8e714f2


--- /dev/null
+++ b/contrib/tsearch2/query.c
@@ -0,0 +1,862 @@
+/*
+ * IO definitions for tsquery and mtsquery. This type
+ * are identical, but for parsing mtsquery used parser for text
+ * and also morphology is used.
+ * Internal structure:
+ * query tree, then string with original value.
+ * Query tree with plain view. It's means that in array of nodes
+ * right child is always next and left position = item+item->left
+ * Teodor Sigaev 
+ */
+#include "postgres.h"
+
+#include 
+#include 
+
+#include "access/gist.h"
+#include "access/itup.h"
+#include "access/rtree.h"
+#include "utils/elog.h"
+#include "utils/palloc.h"
+#include "utils/array.h"
+#include "utils/builtins.h"
+#include "storage/bufpage.h"
+
+#include "ts_cfg.h"
+#include "tsvector.h"
+#include "crc32.h"
+#include "query.h"
+#include "rewrite.h"
+#include "common.h"
+
+
+PG_FUNCTION_INFO_V1(tsquery_in);
+Datum      tsquery_in(PG_FUNCTION_ARGS);
+
+PG_FUNCTION_INFO_V1(tsquery_out);
+Datum      tsquery_out(PG_FUNCTION_ARGS);
+
+PG_FUNCTION_INFO_V1(exectsq);
+Datum      exectsq(PG_FUNCTION_ARGS);
+
+PG_FUNCTION_INFO_V1(rexectsq);
+Datum      rexectsq(PG_FUNCTION_ARGS);
+
+PG_FUNCTION_INFO_V1(tsquerytree);
+Datum      tsquerytree(PG_FUNCTION_ARGS);
+
+PG_FUNCTION_INFO_V1(to_tsquery);
+Datum      to_tsquery(PG_FUNCTION_ARGS);
+
+PG_FUNCTION_INFO_V1(to_tsquery_name);
+Datum      to_tsquery_name(PG_FUNCTION_ARGS);
+
+PG_FUNCTION_INFO_V1(to_tsquery_current);
+Datum      to_tsquery_current(PG_FUNCTION_ARGS);
+
+#define END            0
+#define ERR            1
+#define VAL            2
+#define OPR            3
+#define OPEN       4
+#define CLOSE      5
+#define VALTRUE        6           /* for stop words */
+#define VALFALSE   7
+
+/* parser's states */
+#define WAITOPERAND 1
+#define WAITOPERATOR   2
+
+/*
+ * node of query tree, also used
+ * for storing polish notation in parser
+ */
+typedef struct NODE
+{
+   int2        weight;
+   int2        type;
+   int4        val;
+   int2        distance;
+   int2        length;
+   struct NODE *next;
+}  NODE;
+
+typedef struct
+{
+   char       *buf;
+   int4        state;
+   int4        count;
+   /* reverse polish notation in list (for temprorary usage) */
+   NODE       *str;
+   /* number in str */
+   int4        num;
+
+   /* user-friendly operand */
+   int4        lenop;
+   int4        sumlen;
+   char       *op;
+   char       *curop;
+
+   /* state for value's parser */
+   TI_IN_STATE valstate;
+
+   /* tscfg */
+   int cfg_id;
+}  QPRS_STATE;
+
+static char*
+get_weight(char *buf, int2 *weight) {
+   *weight = 0;
+
+   if ( *buf != ':' )
+       return buf;
+
+   buf++;
+   while( *buf ) {
+       switch(tolower(*buf)) {
+           case 'a': *weight |= 1<<3; break; 
+           case 'b': *weight |= 1<<2; break; 
+           case 'c': *weight |= 1<<1; break; 
+           case 'd': *weight |= 1;    break;
+           default: return buf; 
+       }
+       buf++;
+   }
+   
+   return buf;
+}
+
+/*
+ * get token from query string
+ */
+static int4
+gettoken_query(QPRS_STATE * state, int4 *val, int4 *lenval, char **strval, int2 *weight)
+{
+   while (1)
+   {
+       switch (state->state)
+       {
+           case WAITOPERAND:
+               if (*(state->buf) == '!')
+               {
+                   (state->buf)++;
+                   *val = (int4) '!';
+                   return OPR;
+               }
+               else if (*(state->buf) == '(')
+               {
+                   state->count++;
+                   (state->buf)++;
+                   return OPEN;
+               } else if ( *(state->buf) == ':' ) {
+                   elog(ERROR,"Error at start of operand"); 
+               } else if (*(state->buf) != ' ') {
+                   state->valstate.prsbuf = state->buf;
+                   state->state = WAITOPERATOR;
+                   if (gettoken_tsvector(&(state->valstate)))
+                   {
+                       *strval = state->valstate.word;
+                       *lenval = state->valstate.curpos - state->valstate.word;
+                       state->buf = get_weight(state->valstate.prsbuf, weight);
+                       return VAL;
+                   }
+                   else
+                       elog(ERROR, "No operand");
+               }
+               break;
+           case WAITOPERATOR:
+               if (*(state->buf) == '&' || *(state->buf) == '|')
+               {
+                   state->state = WAITOPERAND;
+                   *val = (int4) *(state->buf);
+                   (state->buf)++;
+                   return OPR;
+               }
+               else if (*(state->buf) == ')')
+               {
+                   (state->buf)++;
+                   state->count--;
+                   return (state->count < 0) ? ERR : CLOSE;
+               }
+               else if (*(state->buf) == '\0')
+                   return (state->count) ? ERR : END;
+               else if (*(state->buf) != ' ')
+                   return ERR;
+               break;
+           default:
+               return ERR;
+               break;
+       }
+       (state->buf)++;
+   }
+   return END;
+}
+
+/*
+ * push new one in polish notation reverse view
+ */
+static void
+pushquery(QPRS_STATE * state, int4 type, int4 val, int4 distance, int4 lenval, int2 weight)
+{
+   NODE       *tmp = (NODE *) palloc(sizeof(NODE));
+
+   tmp->weight = weight;
+   tmp->type = type;
+   tmp->val = val;
+   if (distance >= MAXSTRPOS)
+       elog(ERROR, "Value is too big");
+   if (lenval >= MAXSTRLEN)
+       elog(ERROR, "Operand is too long");
+   tmp->distance = distance;
+   tmp->length = lenval;
+   tmp->next = state->str;
+   state->str = tmp;
+   state->num++;
+}
+
+/*
+ * This function is used for tsquery parsing
+ */
+static void
+pushval_asis(QPRS_STATE * state, int type, char *strval, int lenval, int2 weight)
+{
+   if (lenval >= MAXSTRLEN)
+       elog(ERROR, "Word is too long");
+
+   pushquery(state, type, crc32_sz((uint8 *) strval, lenval),
+             state->curop - state->op, lenval, weight);
+
+   while (state->curop - state->op + lenval + 1 >= state->lenop)
+   {
+       int4        tmp = state->curop - state->op;
+
+       state->lenop *= 2;
+       state->op = (char *) repalloc((void *) state->op, state->lenop);
+       state->curop = state->op + tmp;
+   }
+   memcpy((void *) state->curop, (void *) strval, lenval);
+   state->curop += lenval;
+   *(state->curop) = '\0';
+   state->curop++;
+   state->sumlen += lenval + 1;
+   return;
+}
+
+/*
+ * This function is used for morph parsing
+ */
+static void
+pushval_morph(QPRS_STATE * state, int typeval, char *strval, int lenval, int2 weight)
+{
+   int4        count = 0;
+   PRSTEXT         prs;
+
+   prs.lenwords = 32;
+   prs.curwords = 0;
+   prs.pos = 0;
+   prs.words = (WORD *) palloc(sizeof(WORD) * prs.lenwords);
+
+   parsetext_v2(findcfg(state->cfg_id), &prs, strval, lenval);
+
+   for(count=0;count
+       pushval_asis(state, VAL, prs.words[count].word, prs.words[count].len, weight);
+       pfree( prs.words[count].word );
+       if (count)
+           pushquery(state, OPR, (int4) '&', 0, 0, 0 );
+   }   
+   pfree(prs.words);
+
+   /* XXX */
+   if ( prs.curwords==0 ) 
+       pushval_asis(state, VALTRUE, 0, 0, 0);
+}
+
+#define STACKDEPTH 32
+/*
+ * make polish notaion of query
+ */
+static int4
+makepol(QPRS_STATE * state, void (*pushval) (QPRS_STATE *, int, char *, int, int2))
+{
+   int4        val,
+               type;
+   int4        lenval;
+   char       *strval;
+   int4        stack[STACKDEPTH];
+   int4        lenstack = 0;
+   int2        weight;
+
+   while ((type = gettoken_query(state, &val, &lenval, &strval, &weight)) != END)
+   {
+       switch (type)
+       {
+           case VAL:
+               (*pushval) (state, VAL, strval, lenval, weight);
+               while (lenstack && (stack[lenstack - 1] == (int4) '&' ||
+                                   stack[lenstack - 1] == (int4) '!'))
+               {
+                   lenstack--;
+                   pushquery(state, OPR, stack[lenstack], 0, 0, 0);
+               }
+               break;
+           case OPR:
+               if (lenstack && val == (int4) '|')
+                   pushquery(state, OPR, val, 0, 0, 0);
+               else
+               {
+                   if (lenstack == STACKDEPTH)
+                       elog(ERROR, "Stack too short");
+                   stack[lenstack] = val;
+                   lenstack++;
+               }
+               break;
+           case OPEN:
+               if (makepol(state, pushval) == ERR)
+                   return ERR;
+               if (lenstack && (stack[lenstack - 1] == (int4) '&' ||
+                                stack[lenstack - 1] == (int4) '!'))
+               {
+                   lenstack--;
+                   pushquery(state, OPR, stack[lenstack], 0, 0, 0);
+               }
+               break;
+           case CLOSE:
+               while (lenstack)
+               {
+                   lenstack--;
+                   pushquery(state, OPR, stack[lenstack], 0, 0, 0);
+               };
+               return END;
+               break;
+           case ERR:
+           default:
+               elog(ERROR, "Syntax error");
+               return ERR;
+
+       }
+   }
+   while (lenstack)
+   {
+       lenstack--;
+       pushquery(state, OPR, stack[lenstack], 0, 0, 0);
+   };
+   return END;
+}
+
+typedef struct
+{
+   WordEntry  *arrb;
+   WordEntry  *arre;
+   char       *values;
+   char       *operand;
+}  CHKVAL;
+
+/*
+ * compare 2 string values
+ */
+static int4
+ValCompare(CHKVAL * chkval, WordEntry * ptr, ITEM * item)
+{
+   if (ptr->len == item->length)
+       return strncmp(
+                      &(chkval->values[ptr->pos]),
+                      &(chkval->operand[item->distance]),
+                      item->length);
+
+   return (ptr->len > item->length) ? 1 : -1;
+}
+
+/*
+ * check weight info
+ */
+static bool
+checkclass_str(CHKVAL * chkval, WordEntry * val, ITEM * item) {
+   WordEntryPos *ptr = (WordEntryPos*) (chkval->values+val->pos+SHORTALIGN(val->len)+sizeof(uint16));
+   uint16  len = *( (uint16*) (chkval->values+val->pos+SHORTALIGN(val->len)) );
+   while (len--) {
+       if ( item->weight & ( 1<weight ) )
+           return true;
+       ptr++;
+   }
+   return false; 
+}
+
+/*
+ * is there value 'val' in array or not ?
+ */
+static bool
+checkcondition_str(void *checkval, ITEM * val)
+{
+   WordEntry  *StopLow = ((CHKVAL *) checkval)->arrb;
+   WordEntry  *StopHigh = ((CHKVAL *) checkval)->arre;
+   WordEntry  *StopMiddle;
+   int         difference;
+
+   /* Loop invariant: StopLow <= val < StopHigh */
+
+   while (StopLow < StopHigh)
+   {
+       StopMiddle = StopLow + (StopHigh - StopLow) / 2;
+       difference = ValCompare((CHKVAL *) checkval, StopMiddle, val);
+       if (difference == 0)
+           return ( val->weight && StopMiddle->haspos ) ? 
+               checkclass_str((CHKVAL *) checkval,StopMiddle, val) : true;
+       else if (difference < 0)
+           StopLow = StopMiddle + 1;
+       else
+           StopHigh = StopMiddle;
+   }
+
+   return (false);
+}
+
+/*
+ * check for boolean condition
+ */
+bool
+TS_execute(ITEM * curitem, void *checkval, bool calcnot, bool (*chkcond) (void *checkval, ITEM * val))
+{
+   if (curitem->type == VAL)
+       return (*chkcond) (checkval, curitem);
+   else if (curitem->val == (int4) '!')
+   {
+       return (calcnot) ?
+           ((TS_execute(curitem + 1, checkval, calcnot, chkcond)) ? false : true)
+           : true;
+   }
+   else if (curitem->val == (int4) '&')
+   {
+       if (TS_execute(curitem + curitem->left, checkval, calcnot, chkcond))
+           return TS_execute(curitem + 1, checkval, calcnot, chkcond);
+       else
+           return false;
+   }
+   else
+   {                           /* |-operator */
+       if (TS_execute(curitem + curitem->left, checkval, calcnot, chkcond))
+           return true;
+       else
+           return TS_execute(curitem + 1, checkval, calcnot, chkcond);
+   }
+   return false;
+}
+
+/*
+ * boolean operations
+ */
+Datum
+rexectsq(PG_FUNCTION_ARGS)
+{
+   return DirectFunctionCall2(
+                              exectsq,
+                              PG_GETARG_DATUM(1),
+                              PG_GETARG_DATUM(0)
+       );
+}
+
+Datum
+exectsq(PG_FUNCTION_ARGS)
+{
+   tsvector       *val = (tsvector *) DatumGetPointer(PG_DETOAST_DATUM(PG_GETARG_DATUM(0)));
+   QUERYTYPE  *query = (QUERYTYPE *) DatumGetPointer(PG_DETOAST_DATUM(PG_GETARG_DATUM(1)));
+   CHKVAL      chkval;
+   bool        result;
+
+   if (!val->size || !query->size)
+   {
+       PG_FREE_IF_COPY(val, 0);
+       PG_FREE_IF_COPY(query, 1);
+       PG_RETURN_BOOL(false);
+   }
+
+   chkval.arrb = ARRPTR(val);
+   chkval.arre = chkval.arrb + val->size;
+   chkval.values = STRPTR(val);
+   chkval.operand = GETOPERAND(query);
+   result = TS_execute(
+                    GETQUERY(query),
+                    &chkval,
+                    true,
+                    checkcondition_str
+       );
+
+   PG_FREE_IF_COPY(val, 0);
+   PG_FREE_IF_COPY(query, 1);
+   PG_RETURN_BOOL(result);
+}
+
+/*
+ * find left operand in polish notation view
+ */
+static void
+findoprnd(ITEM * ptr, int4 *pos)
+{
+#ifdef BS_DEBUG
+   elog(DEBUG3, (ptr[*pos].type == OPR) ?
+        "%d  %c" : "%d  %d ", *pos, ptr[*pos].val);
+#endif
+   if (ptr[*pos].type == VAL || ptr[*pos].type == VALTRUE)
+   {
+       ptr[*pos].left = 0;
+       (*pos)++;
+   }
+   else if (ptr[*pos].val == (int4) '!')
+   {
+       ptr[*pos].left = 1;
+       (*pos)++;
+       findoprnd(ptr, pos);
+   }
+   else
+   {
+       ITEM       *curitem = &ptr[*pos];
+       int4        tmp = *pos;
+
+       (*pos)++;
+       findoprnd(ptr, pos);
+       curitem->left = *pos - tmp;
+       findoprnd(ptr, pos);
+   }
+}
+
+
+/*
+ * input
+ */
+static QUERYTYPE *
+queryin(char *buf, void (*pushval) (QPRS_STATE *, int, char *, int, int2), int cfg_id)
+{
+   QPRS_STATE  state;
+   int4        i;
+   QUERYTYPE  *query;
+   int4        commonlen;
+   ITEM       *ptr;
+   NODE       *tmp;
+   int4        pos = 0;
+
+#ifdef BS_DEBUG
+   char        pbuf[16384],
+              *cur;
+#endif
+
+   /* init state */
+   state.buf = buf;
+   state.state = WAITOPERAND;
+   state.count = 0;
+   state.num = 0;
+   state.str = NULL;
+   state.cfg_id=cfg_id;
+
+   /* init value parser's state */
+   state.valstate.oprisdelim = true;
+   state.valstate.len = 32;
+   state.valstate.word = (char *) palloc(state.valstate.len);
+
+   /* init list of operand */
+   state.sumlen = 0;
+   state.lenop = 64;
+   state.curop = state.op = (char *) palloc(state.lenop);
+   *(state.curop) = '\0';
+
+   /* parse query & make polish notation (postfix, but in reverse order) */
+   makepol(&state, pushval);
+   pfree(state.valstate.word);
+   if (!state.num)
+       elog(ERROR, "Empty query");
+
+   /* make finish struct */
+   commonlen = COMPUTESIZE(state.num, state.sumlen);
+   query = (QUERYTYPE *) palloc(commonlen);
+   query->len = commonlen;
+   query->size = state.num;
+   ptr = GETQUERY(query);
+
+   /* set item in polish notation */
+   for (i = 0; i < state.num; i++)
+   {
+       ptr[i].weight = state.str->weight;
+       ptr[i].type = state.str->type;
+       ptr[i].val = state.str->val;
+       ptr[i].distance = state.str->distance;
+       ptr[i].length = state.str->length;
+       tmp = state.str->next;
+       pfree(state.str);
+       state.str = tmp;
+   }
+
+   /* set user friendly-operand view */
+   memcpy((void *) GETOPERAND(query), (void *) state.op, state.sumlen);
+   pfree(state.op);
+
+   /* set left operand's position for every operator */
+   pos = 0;
+   findoprnd(ptr, &pos);
+
+#ifdef BS_DEBUG
+   cur = pbuf;
+   *cur = '\0';
+   for (i = 0; i < query->size; i++)
+   {
+       if (ptr[i].type == OPR)
+           sprintf(cur, "%c(%d) ", ptr[i].val, ptr[i].left);
+       else
+           sprintf(cur, "%d(%s) ", ptr[i].val, GETOPERAND(query) + ptr[i].distance);
+       cur = strchr(cur, '\0');
+   }
+   elog(DEBUG3, "POR: %s", pbuf);
+#endif
+
+   return query;
+}
+
+/*
+ * in without morphology
+ */
+Datum
+tsquery_in(PG_FUNCTION_ARGS)
+{
+   PG_RETURN_POINTER(queryin((char *) PG_GETARG_POINTER(0), pushval_asis, 0));
+}
+
+/*
+ * out function
+ */
+typedef struct
+{
+   ITEM       *curpol;
+   char       *buf;
+   char       *cur;
+   char       *op;
+   int4        buflen;
+}  INFIX;
+
+#define RESIZEBUF(inf,addsize) \
+while( ( inf->cur - inf->buf ) + addsize + 1 >= inf->buflen ) \
+{ \
+   int4 len = inf->cur - inf->buf; \
+   inf->buflen *= 2; \
+   inf->buf = (char*) repalloc( (void*)inf->buf, inf->buflen ); \
+   inf->cur = inf->buf + len; \
+}
+
+/*
+ * recursive walk on tree and print it in
+ * infix (human-readable) view
+ */
+static void
+infix(INFIX * in, bool first)
+{
+   if (in->curpol->type == VAL)
+   {
+       char       *op = in->op + in->curpol->distance;
+
+       RESIZEBUF(in, in->curpol->length * 2 + 2 + 5);
+       *(in->cur) = '\'';
+       in->cur++;
+       while (*op)
+       {
+           if (*op == '\'')
+           {
+               *(in->cur) = '\\';
+               in->cur++;
+           }
+           *(in->cur) = *op;
+           op++;
+           in->cur++;
+       }
+       *(in->cur) = '\'';
+       in->cur++;
+       if ( in->curpol->weight ) {
+           *(in->cur) = ':'; in->cur++;
+           if ( in->curpol->weight & (1<<3) ) { *(in->cur) = 'A'; in->cur++; }
+           if ( in->curpol->weight & (1<<2) ) { *(in->cur) = 'B'; in->cur++; }
+           if ( in->curpol->weight & (1<<1) ) { *(in->cur) = 'C'; in->cur++; }
+           if ( in->curpol->weight & 1 )      { *(in->cur) = 'D'; in->cur++; }
+       }
+       *(in->cur) = '\0';
+       in->curpol++;
+   }
+   else if (in->curpol->val == (int4) '!')
+   {
+       bool        isopr = false;
+
+       RESIZEBUF(in, 1);
+       *(in->cur) = '!';
+       in->cur++;
+       *(in->cur) = '\0';
+       in->curpol++;
+       if (in->curpol->type == OPR)
+       {
+           isopr = true;
+           RESIZEBUF(in, 2);
+           sprintf(in->cur, "( ");
+           in->cur = strchr(in->cur, '\0');
+       }
+       infix(in, isopr);
+       if (isopr)
+       {
+           RESIZEBUF(in, 2);
+           sprintf(in->cur, " )");
+           in->cur = strchr(in->cur, '\0');
+       }
+   }
+   else
+   {
+       int4        op = in->curpol->val;
+       INFIX       nrm;
+
+       in->curpol++;
+       if (op == (int4) '|' && !first)
+       {
+           RESIZEBUF(in, 2);
+           sprintf(in->cur, "( ");
+           in->cur = strchr(in->cur, '\0');
+       }
+
+       nrm.curpol = in->curpol;
+       nrm.op = in->op;
+       nrm.buflen = 16;
+       nrm.cur = nrm.buf = (char *) palloc(sizeof(char) * nrm.buflen);
+
+       /* get right operand */
+       infix(&nrm, false);
+
+       /* get & print left operand */
+       in->curpol = nrm.curpol;
+       infix(in, false);
+
+       /* print operator & right operand */
+       RESIZEBUF(in, 3 + (nrm.cur - nrm.buf));
+       sprintf(in->cur, " %c %s", op, nrm.buf);
+       in->cur = strchr(in->cur, '\0');
+       pfree(nrm.buf);
+
+       if (op == (int4) '|' && !first)
+       {
+           RESIZEBUF(in, 2);
+           sprintf(in->cur, " )");
+           in->cur = strchr(in->cur, '\0');
+       }
+   }
+}
+
+
+Datum
+tsquery_out(PG_FUNCTION_ARGS)
+{
+   QUERYTYPE  *query = (QUERYTYPE *) DatumGetPointer(PG_DETOAST_DATUM(PG_GETARG_DATUM(0)));
+   INFIX       nrm;
+
+   if (query->size == 0)
+   {
+       char       *b = palloc(1);
+
+       *b = '\0';
+       PG_RETURN_POINTER(b);
+   }
+   nrm.curpol = GETQUERY(query);
+   nrm.buflen = 32;
+   nrm.cur = nrm.buf = (char *) palloc(sizeof(char) * nrm.buflen);
+   *(nrm.cur) = '\0';
+   nrm.op = GETOPERAND(query);
+   infix(&nrm, true);
+
+   PG_FREE_IF_COPY(query, 0);
+   PG_RETURN_POINTER(nrm.buf);
+}
+
+/*
+ * debug function, used only for view query
+ * which will be executed in non-leaf pages in index
+ */
+Datum
+tsquerytree(PG_FUNCTION_ARGS)
+{
+   QUERYTYPE  *query = (QUERYTYPE *) DatumGetPointer(PG_DETOAST_DATUM(PG_GETARG_DATUM(0)));
+   INFIX       nrm;
+   text       *res;
+   ITEM       *q;
+   int4        len;
+
+
+   if (query->size == 0)
+   {
+       res = (text *) palloc(VARHDRSZ);
+       VARATT_SIZEP(res) = VARHDRSZ;
+       PG_RETURN_POINTER(res);
+   }
+
+   q = clean_NOT_v2(GETQUERY(query), &len);
+
+   if (!q)
+   {
+       res = (text *) palloc(1 + VARHDRSZ);
+       VARATT_SIZEP(res) = 1 + VARHDRSZ;
+       *((char *) VARDATA(res)) = 'T';
+   }
+   else
+   {
+       nrm.curpol = q;
+       nrm.buflen = 32;
+       nrm.cur = nrm.buf = (char *) palloc(sizeof(char) * nrm.buflen);
+       *(nrm.cur) = '\0';
+       nrm.op = GETOPERAND(query);
+       infix(&nrm, true);
+
+       res = (text *) palloc(nrm.cur - nrm.buf + VARHDRSZ);
+       VARATT_SIZEP(res) = nrm.cur - nrm.buf + VARHDRSZ;
+       strncpy(VARDATA(res), nrm.buf, nrm.cur - nrm.buf);
+       pfree(q);
+   }
+
+   PG_FREE_IF_COPY(query, 0);
+
+   PG_RETURN_POINTER(res);
+}
+
+Datum
+to_tsquery(PG_FUNCTION_ARGS) {
+   text    *in = PG_GETARG_TEXT_P(1);
+   char *str;
+   QUERYTYPE  *query;
+   ITEM       *res;
+   int4        len;
+
+   str=text2char(in);
+   PG_FREE_IF_COPY(in,1);
+
+   query = queryin(str, pushval_morph, PG_GETARG_INT32(0));
+   res = clean_fakeval_v2(GETQUERY(query), &len);
+   if (!res)
+   {
+       query->len = HDRSIZEQT;
+       query->size = 0;
+       PG_RETURN_POINTER(query);
+   }
+   memcpy((void *) GETQUERY(query), (void *) res, len * sizeof(ITEM));
+   pfree(res);
+   PG_RETURN_POINTER(query);
+}
+
+Datum
+to_tsquery_name(PG_FUNCTION_ARGS) {
+   text *name=PG_GETARG_TEXT_P(0);
+   Datum res= DirectFunctionCall2(
+       to_tsquery,
+       Int32GetDatum( name2id_cfg(name) ),
+       PG_GETARG_DATUM(1)
+   );
+   
+   PG_FREE_IF_COPY(name,1);
+   PG_RETURN_DATUM(res);
+}
+
+Datum
+to_tsquery_current(PG_FUNCTION_ARGS) {
+   PG_RETURN_DATUM( DirectFunctionCall2(
+       to_tsquery,
+       Int32GetDatum( get_currcfg() ),
+       PG_GETARG_DATUM(0)
+   ));
+}
+
+


diff --git a/contrib/tsearch2/query.h b/contrib/tsearch2/query.h

new file mode 100644 (file)

index 0000000..c0715a2


--- /dev/null
+++ b/contrib/tsearch2/query.h
@@ -0,0 +1,55 @@
+#ifndef __QUERY_H__
+#define __QUERY_H__
+/*
+#define BS_DEBUG
+*/
+
+
+/*
+ * item in polish notation with back link
+ * to left operand
+ */
+typedef struct ITEM
+{
+   int8        type;
+   int8        weight;
+   int2        left;
+   int4        val;
+   /* user-friendly value, must correlate with WordEntry */
+   uint32  
+       unused:1,
+       length:11,
+       distance:20;
+}  ITEM;
+
+/*
+ *Storage:
+ * (len)(size)(array of ITEM)(array of operand in user-friendly form)
+ */
+typedef struct
+{
+   int4        len;
+   int4        size;
+   char        data[1];
+}  QUERYTYPE;
+
+#define HDRSIZEQT  ( 2*sizeof(int4) )
+#define COMPUTESIZE(size,lenofoperand) ( HDRSIZEQT + size * sizeof(ITEM) + lenofoperand )
+#define GETQUERY(x)  (ITEM*)( (char*)(x)+HDRSIZEQT )
+#define GETOPERAND(x)  ( (char*)GETQUERY(x) + ((QUERYTYPE*)x)->size * sizeof(ITEM) )
+
+#define ISOPERATOR(x) ( (x)=='!' || (x)=='&' || (x)=='|' || (x)=='(' || (x)==')' )
+
+#define END                0
+#define ERR                1
+#define VAL                2
+#define OPR                3
+#define OPEN           4
+#define CLOSE          5
+#define VALTRUE            6       /* for stop words */
+#define VALFALSE       7
+
+bool TS_execute(ITEM * curitem, void *checkval,
+       bool calcnot, bool (*chkcond) (void *checkval, ITEM * val));
+
+#endif


diff --git a/contrib/tsearch2/rank.c b/contrib/tsearch2/rank.c

new file mode 100644 (file)

index 0000000..b73f400


--- /dev/null
+++ b/contrib/tsearch2/rank.c
@@ -0,0 +1,591 @@
+/*
+ * Relevation
+ * Teodor Sigaev 
+ */
+#include "postgres.h"
+#include 
+
+#include "access/gist.h"
+#include "access/itup.h"
+#include "utils/elog.h"
+#include "utils/palloc.h"
+#include "utils/builtins.h"
+#include "fmgr.h"
+#include "funcapi.h"
+#include "storage/bufpage.h"
+#include "executor/spi.h"
+#include "commands/trigger.h"
+#include "nodes/pg_list.h"
+#include "catalog/namespace.h"
+
+#include "utils/array.h"
+
+#include "tsvector.h"
+#include "query.h"
+#include "common.h"
+
+PG_FUNCTION_INFO_V1(rank);
+Datum      rank(PG_FUNCTION_ARGS);
+
+PG_FUNCTION_INFO_V1(rank_def);
+Datum      rank_def(PG_FUNCTION_ARGS);
+
+PG_FUNCTION_INFO_V1(rank_cd);
+Datum      rank_cd(PG_FUNCTION_ARGS);
+
+PG_FUNCTION_INFO_V1(rank_cd_def);
+Datum      rank_cd_def(PG_FUNCTION_ARGS);
+
+PG_FUNCTION_INFO_V1(get_covers);
+Datum      get_covers(PG_FUNCTION_ARGS);
+
+static float weights[]={0.1, 0.2, 0.4, 1.0};
+
+#define wpos(wep)  ( w[ ((WordEntryPos*)(wep))->weight ] )
+
+#define DEF_NORM_METHOD    0
+
+/*
+ * Returns a weight of a word collocation
+ */
+static float4 word_distance ( int4 w ) {
+   if ( w>100 )
+   return 1e-30;
+
+   return 1.0/(1.005+0.05*exp( ((float4)w)/1.5-2) );
+}
+
+static int
+cnt_length( tsvector *t ) {
+   WordEntry   *ptr=ARRPTR(t), *end=(WordEntry*)STRPTR(t);
+   int len = 0, clen;
+
+   while(ptr < end) {
+       if ( (clen=POSDATALEN(t, ptr)) == 0 )
+           len += 1;
+       else
+           len += clen;
+       ptr++;
+   }
+
+   return len;
+}
+
+static int4
+WordECompareITEM(char *eval, char *qval, WordEntry * ptr, ITEM * item) {
+        if (ptr->len == item->length)
+                return strncmp(
+                                           eval + ptr->pos,
+                                           qval + item->distance,
+                                           item->length);
+
+        return (ptr->len > item->length) ? 1 : -1;
+}
+
+static WordEntry*
+find_wordentry(tsvector *t, QUERYTYPE *q, ITEM *item) {
+        WordEntry  *StopLow = ARRPTR(t);
+        WordEntry  *StopHigh = (WordEntry*)STRPTR(t);
+        WordEntry  *StopMiddle;
+        int                     difference;
+
+        /* Loop invariant: StopLow <= item < StopHigh */
+
+        while (StopLow < StopHigh)
+        {
+                StopMiddle = StopLow + (StopHigh - StopLow) / 2;
+                difference = WordECompareITEM(STRPTR(t), GETOPERAND(q), StopMiddle, item);
+                if (difference == 0)
+                        return StopMiddle;
+                else if (difference < 0)
+                        StopLow = StopMiddle + 1;
+                else
+                        StopHigh = StopMiddle;
+        }
+
+        return NULL;
+}
+
+static WordEntryPos    POSNULL[]={
+   {0,0},
+   {0,MAXENTRYPOS-1}
+};
+
+static float
+calc_rank_and(float *w, tsvector *t, QUERYTYPE *q) {
+   uint16 **pos=(uint16**)palloc(sizeof(uint16*) * q->size);
+   int i,k,l,p;
+   WordEntry *entry;
+   WordEntryPos    *post,*ct;
+   int4    dimt,lenct,dist;
+   float res=-1.0;
+   ITEM    *item=GETQUERY(q);
+
+   memset(pos,0,sizeof(uint16**) * q->size);
+   *(uint16*)POSNULL = lengthof(POSNULL)-1;
+
+   for(i=0; isize; i++) {
+       
+       if ( item[i].type != VAL )
+           continue;
+
+       entry=find_wordentry(t,q,&(item[i]));
+       if ( !entry )
+           continue;
+
+       if ( entry->haspos )
+           pos[i] = (uint16*)_POSDATAPTR(t,entry);
+       else
+           pos[i] = (uint16*)POSNULL;
+
+
+       dimt = *(uint16*)(pos[i]);
+       post = (WordEntryPos*)(pos[i]+1);
+       for( k=0; k
+           if ( !pos[k] ) continue;
+           lenct = *(uint16*)(pos[k]);
+           ct = (WordEntryPos*)(pos[k]+1);
+           for(l=0; l
+               for(p=0; p
+                   dist = abs( post[l].pos - ct[p].pos );
+                   if ( dist || (dist==0 && (pos[i]==(uint16*)POSNULL || pos[k]==(uint16*)POSNULL) ) ) {
+                       float curw; 
+                       if ( !dist ) dist=MAXENTRYPOS;  
+                       curw= sqrt( wpos(&(post[l])) * wpos( &(ct[p]) ) * word_distance(dist) );
+                       res = ( res < 0 ) ? curw : 1.0 - ( 1.0 - res ) * ( 1.0 - curw );
+                   }
+               }
+           }
+       }
+   }
+   pfree(pos);
+   return res; 
+}
+
+static float
+calc_rank_or(float *w, tsvector *t, QUERYTYPE *q) {
+   WordEntry *entry;
+   WordEntryPos    *post;
+   int4    dimt,j,i;
+   float res=-1.0;
+   ITEM    *item=GETQUERY(q);
+
+   *(uint16*)POSNULL = lengthof(POSNULL)-1;
+
+   for(i=0; isize; i++) {
+       if ( item[i].type != VAL )
+           continue;
+
+       entry=find_wordentry(t,q,&(item[i]));
+       if ( !entry )
+           continue;
+
+       if ( entry->haspos ) {
+           dimt = POSDATALEN(t,entry);
+           post = POSDATAPTR(t,entry);
+       } else {
+           dimt = *(uint16*)POSNULL;
+           post = POSNULL+1;
+       }
+
+       for(j=0;j
+           if ( res < 0 )
+               res = wpos( &(post[j]) );
+           else
+               res = 1.0 - ( 1.0-res ) * ( 1.0-wpos( &(post[j]) ) );
+       }
+   }
+   return res;
+}
+
+static float
+calc_rank(float *w, tsvector *t, QUERYTYPE *q, int4 method) {
+   ITEM *item = GETQUERY(q);
+   float res=0.0;
+
+   if (!t->size || !q->size)
+       return 0.0;
+
+   res = ( item->type != VAL && item->val == (int4) '&' ) ?
+       calc_rank_and(w,t,q) : calc_rank_or(w,t,q);
+
+   if ( res < 0 )
+       res = 1e-20;
+
+        switch(method) {
+       case 0: break;
+       case 1: res /= log((float)cnt_length(t)); break;
+       case 2: res /= (float)cnt_length(t); break;
+       default:
+       elog(ERROR,"Unknown normalization method: %d",method);
+        }
+
+   return res;
+}
+
+Datum
+rank(PG_FUNCTION_ARGS) {
+   ArrayType  *win = (ArrayType *) PG_DETOAST_DATUM(PG_GETARG_DATUM(0));
+   tsvector       *txt = (tsvector *) PG_DETOAST_DATUM(PG_GETARG_DATUM(1));
+   QUERYTYPE  *query = (QUERYTYPE *) PG_DETOAST_DATUM(PG_GETARG_DATUM(2));
+   int method=DEF_NORM_METHOD;
+   float res=0.0;
+   float ws[ lengthof(weights) ];
+   int i;
+
+   if ( ARR_NDIM(win) != 1 ) 
+       elog(ERROR,"Array of weight is not one dimentional");
+   if ( ARRNELEMS(win) < lengthof(weights) )
+        elog(ERROR,"Array of weight is too short");
+
+   for(i=0;i
+       ws[ i ] = ( ((float4*)ARR_DATA_PTR(win))[i] >= 0 ) ? ((float4*)ARR_DATA_PTR(win))[i] : weights[i];
+       if ( ws[ i ] > 1.0 ) 
+           elog(ERROR,"Weight out of range");
+   } 
+
+   if ( PG_NARGS() == 4 )
+       method=PG_GETARG_INT32(3);
+
+   res=calc_rank(ws, txt, query, method); 
+       
+   PG_FREE_IF_COPY(win, 0);
+   PG_FREE_IF_COPY(txt, 1);
+   PG_FREE_IF_COPY(query, 2);
+   PG_RETURN_FLOAT4(res);
+}
+
+Datum
+rank_def(PG_FUNCTION_ARGS) {
+   tsvector       *txt = (tsvector *) PG_DETOAST_DATUM(PG_GETARG_DATUM(0));
+   QUERYTYPE  *query = (QUERYTYPE *) PG_DETOAST_DATUM(PG_GETARG_DATUM(1));
+   float res=0.0;
+   int method=DEF_NORM_METHOD;
+
+   if ( PG_NARGS() == 3 )
+       method=PG_GETARG_INT32(2);
+
+   res=calc_rank(weights, txt, query, method); 
+       
+   PG_FREE_IF_COPY(txt, 0);
+   PG_FREE_IF_COPY(query, 1);
+   PG_RETURN_FLOAT4(res);
+}
+
+
+typedef struct {
+   ITEM    *item;
+   int32   pos;
+} DocRepresentation;
+
+static int
+compareDocR(const void *a, const void *b) {
+   if ( ((DocRepresentation *) a)->pos == ((DocRepresentation *) b)->pos )
+       return 1;
+   return ( ((DocRepresentation *) a)->pos > ((DocRepresentation *) b)->pos ) ? 1 : -1;
+}
+
+
+typedef struct {
+   DocRepresentation *doc;
+   int len;
+}  ChkDocR;
+
+static bool
+checkcondition_DR(void *checkval, ITEM *val) {
+   DocRepresentation *ptr = ((ChkDocR*)checkval)->doc;
+
+   while( ptr - ((ChkDocR*)checkval)->doc < ((ChkDocR*)checkval)->len ) {
+       if ( val == ptr->item )
+           return true;
+       ptr++;
+   }   
+
+   return false;
+}
+
+
+static bool
+Cover(DocRepresentation *doc, int len, QUERYTYPE *query, int *pos, int *p, int *q) {
+   int i;
+   DocRepresentation   *ptr,*f=(DocRepresentation*)0xffffffff;
+   ITEM    *item=GETQUERY(query);
+   int lastpos=*pos;
+   int oldq=*q;
+
+   *p=0x7fffffff;
+   *q=0;
+
+   for(i=0; isize; i++) {
+       if ( item->type != VAL ) {
+           item++;
+           continue;
+       }
+       ptr = doc + *pos;
+
+       while(ptr-doc
+           if ( ptr->item == item ) {
+               if ( ptr->pos > *q ) {
+                   *q = ptr->pos;
+                   lastpos= ptr - doc;
+               } 
+               break;
+           } 
+           ptr++;
+       }
+
+       item++;
+   }
+
+   if (*q==0 )
+       return false;
+
+   if (*q==oldq) { /* already check this pos */
+       (*pos)++;
+       return Cover(doc, len, query, pos,p,q);
+   } 
+
+   item=GETQUERY(query);
+   for(i=0; isize; i++) {
+       if ( item->type != VAL ) {
+           item++;
+           continue;
+       }
+       ptr = doc + lastpos;
+
+       while(ptr>=doc+*pos) {
+           if ( ptr->item == item ) {
+               if ( ptr->pos < *p ) {
+                   *p = ptr->pos;
+                   f=ptr;
+               }
+               break;
+           }
+           ptr--;
+       }
+       item++;
+   }
+ 
+   if ( *p<=*q ) {
+       ChkDocR ch = { f, (doc + lastpos)-f+1 };
+       *pos = f-doc+1;
+       if ( TS_execute(GETQUERY(query), &ch, false, checkcondition_DR) ) { 
+ /*elog(NOTICE,"OP:%d NP:%d P:%d Q:%d", *pos, lastpos, *p, *q);*/ 
+           return true;
+       } else
+           return Cover(doc, len, query, pos,p,q); 
+   }
+ 
+   return false;
+}
+
+static DocRepresentation*
+get_docrep(tsvector     *txt, QUERYTYPE  *query, int *doclen) {
+   ITEM    *item=GETQUERY(query);
+   WordEntry *entry;
+   WordEntryPos    *post;
+   int4    dimt,j,i;
+   int len=query->size*4,cur=0;
+   DocRepresentation *doc;
+
+   *(uint16*)POSNULL = lengthof(POSNULL)-1;
+   doc = (DocRepresentation*)palloc(sizeof(DocRepresentation)*len);
+   for(i=0; isize; i++) {
+       if ( item[i].type != VAL )
+           continue;
+
+       entry=find_wordentry(txt,query,&(item[i]));
+       if ( !entry )
+           continue;
+
+       if ( entry->haspos ) {
+           dimt = POSDATALEN(txt,entry);
+           post = POSDATAPTR(txt,entry);
+       } else {
+           dimt = *(uint16*)POSNULL;
+           post = POSNULL+1;
+       }
+
+       while( cur+dimt >= len ) {
+           len*=2;
+           doc = (DocRepresentation*)repalloc(doc,sizeof(DocRepresentation)*len);
+       }
+
+       for(j=0;j
+           doc[cur].item=&(item[i]);
+           doc[cur].pos=post[j].pos;
+           cur++;
+       }
+   }
+
+   *doclen=cur;
+   
+   if ( cur>0 ) {
+       if ( cur>1 ) 
+           qsort((void *) doc, cur, sizeof(DocRepresentation), compareDocR);
+       return doc;
+   }
+   
+   pfree(doc);
+   return NULL;
+}
+
+
+Datum
+rank_cd(PG_FUNCTION_ARGS) {
+   int K = PG_GETARG_INT32(0);
+   tsvector       *txt = (tsvector *) PG_DETOAST_DATUM(PG_GETARG_DATUM(1));
+   QUERYTYPE  *query = (QUERYTYPE *) PG_DETOAST_DATUM(PG_GETARG_DATUM(2));
+   int method=DEF_NORM_METHOD;
+   DocRepresentation   *doc;
+   float   res=0.0;
+   int p=0,q=0,len,cur;
+
+   doc = get_docrep(txt, query, &len);
+   if ( !doc ) {
+       PG_FREE_IF_COPY(txt, 1);
+       PG_FREE_IF_COPY(query, 2);
+       PG_RETURN_FLOAT4(0.0);
+   }
+
+   cur=0;
+   if (K<=0)
+       K=4;    
+   while( Cover(doc, len, query, &cur, &p, &q) ) 
+       res += ( q-p+1 > K ) ? ((float)K)/((float)(q-p+1)) : 1.0;
+
+   if ( PG_NARGS() == 4 )
+       method=PG_GETARG_INT32(3);
+
+        switch(method) {
+       case 0: break;
+       case 1: res /= log((float)cnt_length(txt)); break;
+       case 2: res /= (float)cnt_length(txt); break;
+       default:
+       elog(ERROR,"Unknown normalization method: %d",method);
+        }
+
+   pfree(doc);
+   PG_FREE_IF_COPY(txt, 1);
+   PG_FREE_IF_COPY(query, 2);
+
+   PG_RETURN_FLOAT4(res);
+}
+
+
+Datum
+rank_cd_def(PG_FUNCTION_ARGS) {
+   PG_RETURN_DATUM( DirectFunctionCall4(   
+       rank_cd,
+       Int32GetDatum(-1),
+       PG_GETARG_DATUM(0),
+       PG_GETARG_DATUM(1),
+       ( PG_NARGS() == 3 ) ? PG_GETARG_DATUM(2) : Int32GetDatum(DEF_NORM_METHOD)
+   )); 
+}
+
+/**************debug*************/
+
+typedef struct {
+   char    *w;
+   int2    len;
+   int2    pos;
+   int2    start;
+   int2    finish;
+} DocWord;
+
+static int
+compareDocWord(const void *a, const void *b) {
+   if ( ((DocWord *) a)->pos == ((DocWord *) b)->pos )
+       return 1;
+   return ( ((DocWord *) a)->pos > ((DocWord *) b)->pos ) ? 1 : -1;
+}
+
+
+Datum 
+get_covers(PG_FUNCTION_ARGS) {
+   tsvector     *txt = (tsvector *) PG_DETOAST_DATUM(PG_GETARG_DATUM(0));
+   QUERYTYPE  *query = (QUERYTYPE *) PG_DETOAST_DATUM(PG_GETARG_DATUM(1));
+   WordEntry       *pptr=ARRPTR(txt);
+   int i,dlen=0,j,cur=0,len=0,rlen;
+   DocWord *dw,*dwptr;
+   text    *out;
+   char *cptr;
+   DocRepresentation *doc;
+   int pos=0,p,q,olddwpos=0;
+   int ncover=1;
+
+   doc = get_docrep(txt, query, &rlen);
+
+   if ( !doc ) {
+       out=palloc(VARHDRSZ);
+       VARATT_SIZEP(out) = VARHDRSZ;
+       PG_FREE_IF_COPY(txt,0);
+       PG_FREE_IF_COPY(query,1);
+       PG_RETURN_POINTER(out);
+   }
+
+   for(i=0;isize;i++) {
+       if (!pptr[i].haspos)
+           elog(ERROR,"No pos info");
+        dlen += POSDATALEN(txt,&(pptr[i]));
+   }
+
+   dwptr=dw=palloc(sizeof(DocWord)*dlen);
+   memset(dw,0,sizeof(DocWord)*dlen);
+
+   for(i=0;isize;i++) {
+       WordEntryPos    *posdata = POSDATAPTR(txt,&(pptr[i]));
+       for(j=0;j
+           dw[cur].w=STRPTR(txt)+pptr[i].pos;  
+           dw[cur].len=pptr[i].len;    
+           dw[cur].pos=posdata[j].pos;
+           cur++;
+       }
+       len+=(pptr[i].len + 1) * (int)POSDATALEN(txt,&(pptr[i]));
+   }
+   qsort((void *) dw, dlen, sizeof(DocWord), compareDocWord);
+
+   while( Cover(doc, rlen, query, &pos, &p, &q) ) {
+       dwptr=dw+olddwpos;
+       while(dwptr->pos < p && dwptr-dw
+           dwptr++;
+       olddwpos=dwptr-dw;
+       dwptr->start=ncover;
+       while(dwptr->pos < q+1 && dwptr-dw
+           dwptr++;
+       (dwptr-1)->finish=ncover;
+       len+= 4 /* {}+two spaces */ + 2*16 /*numbers*/;
+       ncover++; 
+   } 
+   
+   out=palloc(VARHDRSZ+len);
+   cptr=((char*)out)+VARHDRSZ;
+   dwptr=dw;
+
+   while( dwptr-dw < dlen) {
+       if ( dwptr->start ) {
+           sprintf(cptr,"{%d ",dwptr->start);
+           cptr=strchr(cptr,'\0');
+       }
+       memcpy(cptr,dwptr->w,dwptr->len);
+       cptr+=dwptr->len;
+       *cptr=' ';
+       cptr++;
+       if ( dwptr->finish ) { 
+           sprintf(cptr,"}%d ",dwptr->finish);
+           cptr=strchr(cptr,'\0');
+       }
+       dwptr++;
+   }   
+
+   VARATT_SIZEP(out) = cptr - ((char*)out);
+   
+   pfree(dw);
+   pfree(doc);
+
+   PG_FREE_IF_COPY(txt,0);
+   PG_FREE_IF_COPY(query,1);
+   PG_RETURN_POINTER(out);
+}
+


diff --git a/contrib/tsearch2/rewrite.c b/contrib/tsearch2/rewrite.c

new file mode 100644 (file)

index 0000000..d5bc0f6


--- /dev/null
+++ b/contrib/tsearch2/rewrite.c
@@ -0,0 +1,292 @@
+/*
+ * Rewrite routines of query tree
+ * Teodor Sigaev 
+ */
+
+#include "postgres.h"
+
+#include 
+
+#include "access/gist.h"
+#include "access/itup.h"
+#include "access/rtree.h"
+#include "utils/elog.h"
+#include "utils/palloc.h"
+#include "utils/array.h"
+#include "utils/builtins.h"
+#include "storage/bufpage.h"
+
+#include "query.h"
+#include "rewrite.h"
+
+typedef struct NODE
+{
+   struct NODE *left;
+   struct NODE *right;
+   ITEM       *valnode;
+}  NODE;
+
+/*
+ * make query tree from plain view of query
+ */
+static NODE *
+maketree(ITEM * in)
+{
+   NODE       *node = (NODE *) palloc(sizeof(NODE));
+
+   node->valnode = in;
+   node->right = node->left = NULL;
+   if (in->type == OPR)
+   {
+       node->right = maketree(in + 1);
+       if (in->val != (int4) '!')
+           node->left = maketree(in + in->left);
+   }
+   return node;
+}
+
+typedef struct
+{
+   ITEM       *ptr;
+   int4        len;
+   int4        cur;
+}  PLAINTREE;
+
+static void
+plainnode(PLAINTREE * state, NODE * node)
+{
+   if (state->cur == state->len)
+   {
+       state->len *= 2;
+       state->ptr = (ITEM *) repalloc((void *) state->ptr, state->len * sizeof(ITEM));
+   }
+   memcpy((void *) &(state->ptr[state->cur]), (void *) node->valnode, sizeof(ITEM));
+   if (node->valnode->type == VAL)
+       state->cur++;
+   else if (node->valnode->val == (int4) '!')
+   {
+       state->ptr[state->cur].left = 1;
+       state->cur++;
+       plainnode(state, node->right);
+   }
+   else
+   {
+       int4        cur = state->cur;
+
+       state->cur++;
+       plainnode(state, node->right);
+       state->ptr[cur].left = state->cur - cur;
+       plainnode(state, node->left);
+   }
+   pfree(node);
+}
+
+/*
+ * make plain view of tree from 'normal' view of tree
+ */
+static ITEM *
+plaintree(NODE * root, int4 *len)
+{
+   PLAINTREE   pl;
+
+   pl.cur = 0;
+   pl.len = 16;
+   if (root && (root->valnode->type == VAL || root->valnode->type == OPR))
+   {
+       pl.ptr = (ITEM *) palloc(pl.len * sizeof(ITEM));
+       plainnode(&pl, root);
+   }
+   else
+       pl.ptr = NULL;
+   *len = pl.cur;
+   return pl.ptr;
+}
+
+static void
+freetree(NODE * node)
+{
+   if (!node)
+       return;
+   if (node->left)
+       freetree(node->left);
+   if (node->right)
+       freetree(node->right);
+   pfree(node);
+}
+
+/*
+ * clean tree for ! operator.
+ * It's usefull for debug, but in
+ * other case, such view is used with search in index.
+ * Operator ! always return TRUE
+ */
+static NODE *
+clean_NOT_intree(NODE * node)
+{
+   if (node->valnode->type == VAL)
+       return node;
+
+   if (node->valnode->val == (int4) '!')
+   {
+       freetree(node);
+       return NULL;
+   }
+
+   /* operator & or | */
+   if (node->valnode->val == (int4) '|')
+   {
+       if ((node->left = clean_NOT_intree(node->left)) == NULL ||
+           (node->right = clean_NOT_intree(node->right)) == NULL)
+       {
+           freetree(node);
+           return NULL;
+       }
+   }
+   else
+   {
+       NODE       *res = node;
+
+       node->left = clean_NOT_intree(node->left);
+       node->right = clean_NOT_intree(node->right);
+       if (node->left == NULL && node->right == NULL)
+       {
+           pfree(node);
+           res = NULL;
+       }
+       else if (node->left == NULL)
+       {
+           res = node->right;
+           pfree(node);
+       }
+       else if (node->right == NULL)
+       {
+           res = node->left;
+           pfree(node);
+       }
+       return res;
+   }
+   return node;
+}
+
+ITEM *
+clean_NOT_v2(ITEM * ptr, int4 *len)
+{
+   NODE       *root = maketree(ptr);
+
+   return plaintree(clean_NOT_intree(root), len);
+}
+
+#define V_UNKNOWN  0
+#define V_TRUE     1
+#define V_FALSE        2
+
+/*
+ * Clean query tree from values which is always in
+ * text (stopword)
+ */
+static NODE *
+clean_fakeval_intree(NODE * node, char *result)
+{
+   char        lresult = V_UNKNOWN,
+               rresult = V_UNKNOWN;
+
+   if (node->valnode->type == VAL)
+       return node;
+   else if (node->valnode->type == VALTRUE)
+   {
+       pfree(node);
+       *result = V_TRUE;
+       return NULL;
+   }
+
+
+   if (node->valnode->val == (int4) '!')
+   {
+       node->right = clean_fakeval_intree(node->right, &rresult);
+       if (!node->right)
+       {
+           *result = (rresult == V_TRUE) ? V_FALSE : V_TRUE;
+           freetree(node);
+           return NULL;
+       }
+   }
+   else if (node->valnode->val == (int4) '|')
+   {
+       NODE       *res = node;
+
+       node->left = clean_fakeval_intree(node->left, &lresult);
+       node->right = clean_fakeval_intree(node->right, &rresult);
+       if (lresult == V_TRUE || rresult == V_TRUE)
+       {
+           freetree(node);
+           *result = V_TRUE;
+           return NULL;
+       }
+       else if (lresult == V_FALSE && rresult == V_FALSE)
+       {
+           freetree(node);
+           *result = V_FALSE;
+           return NULL;
+       }
+       else if (lresult == V_FALSE)
+       {
+           res = node->right;
+           pfree(node);
+       }
+       else if (rresult == V_FALSE)
+       {
+           res = node->left;
+           pfree(node);
+       }
+       return res;
+   }
+   else
+   {
+       NODE       *res = node;
+
+       node->left = clean_fakeval_intree(node->left, &lresult);
+       node->right = clean_fakeval_intree(node->right, &rresult);
+       if (lresult == V_FALSE || rresult == V_FALSE)
+       {
+           freetree(node);
+           *result = V_FALSE;
+           return NULL;
+       }
+       else if (lresult == V_TRUE && rresult == V_TRUE)
+       {
+           freetree(node);
+           *result = V_TRUE;
+           return NULL;
+       }
+       else if (lresult == V_TRUE)
+       {
+           res = node->right;
+           pfree(node);
+       }
+       else if (rresult == V_TRUE)
+       {
+           res = node->left;
+           pfree(node);
+       }
+       return res;
+   }
+   return node;
+}
+
+ITEM *
+clean_fakeval_v2(ITEM * ptr, int4 *len)
+{
+   NODE       *root = maketree(ptr);
+   char        result = V_UNKNOWN;
+   NODE       *resroot;
+
+   resroot = clean_fakeval_intree(root, &result);
+   if (result != V_UNKNOWN)
+   {
+       elog(NOTICE, "Query contains only stopword(s) or doesn't contain lexem(s), ignored");
+       *len = 0;
+       return NULL;
+   }
+
+   return plaintree(resroot, len);
+}


diff --git a/contrib/tsearch2/rewrite.h b/contrib/tsearch2/rewrite.h

new file mode 100644 (file)

index 0000000..d47788a


--- /dev/null
+++ b/contrib/tsearch2/rewrite.h
@@ -0,0 +1,7 @@
+#ifndef __REWRITE_H__
+#define __REWRITE_H__
+
+ITEM      *clean_NOT_v2(ITEM * ptr, int4 *len);
+ITEM      *clean_fakeval_v2(ITEM * ptr, int4 *len);
+
+#endif


diff --git a/contrib/tsearch2/snmap.c b/contrib/tsearch2/snmap.c

new file mode 100644 (file)

index 0000000..fe138ad


--- /dev/null
+++ b/contrib/tsearch2/snmap.c
@@ -0,0 +1,75 @@
+/* 
+ * simple but fast map from str to Oid
+ * Teodor Sigaev 
+ */
+#include 
+#include 
+#include 
+
+#include "postgres.h"
+#include "snmap.h"
+#include "common.h"
+
+static int
+compareSNMapEntry(const void *a, const void *b) {
+   return strcmp( ((SNMapEntry*)a)->key, ((SNMapEntry*)b)->key );
+}
+
+void 
+addSNMap( SNMap *map, char *key, Oid value ) {
+   if (map->len>=map->reallen) {
+       SNMapEntry *tmp;
+       int len = (map->reallen) ? 2*map->reallen : 16;
+       tmp=(SNMapEntry*)realloc(map->list, sizeof(SNMapEntry) * len);
+       if ( !tmp )
+           elog(ERROR, "No memory");
+       map->reallen=len;
+       map->list=tmp;
+   }
+   map->list[ map->len ].key = strdup(key);
+   if ( ! map->list[ map->len ].key )
+       elog(ERROR, "No memory");
+   map->list[ map->len ].value=value;
+   map->len++;
+   if ( map->len>1 ) qsort(map->list, map->len, sizeof(SNMapEntry), compareSNMapEntry);
+}
+
+void 
+addSNMap_t( SNMap *map, text *key, Oid value ) {
+   char *k=text2char( key );
+   addSNMap(map, k, value);
+   pfree(k);
+}
+
+Oid 
+findSNMap( SNMap *map, char *key ) {
+   SNMapEntry *ptr;
+   SNMapEntry ks = {key, 0};
+   if ( map->len==0 || !map->list )
+       return 0;   
+   ptr = (SNMapEntry*) bsearch(&ks, map->list, map->len, sizeof(SNMapEntry), compareSNMapEntry);
+   return (ptr) ? ptr->value : 0;
+}
+
+Oid  
+findSNMap_t( SNMap *map, text *key ) {
+   char *k=text2char(key);
+   int res;
+   res= findSNMap(map, k);
+   pfree(k);
+   return res;
+}
+
+void freeSNMap( SNMap *map ) {
+   SNMapEntry *entry=map->list;
+   if ( map->list ) {
+       while( map->len ) {
+           if ( entry->key ) free(entry->key);
+           entry++; map->len--;
+       }
+       free( map->list );
+   }
+   memset(map,0,sizeof(SNMap));
+}
+
+


diff --git a/contrib/tsearch2/snmap.h b/contrib/tsearch2/snmap.h

new file mode 100644 (file)

index 0000000..b485601


--- /dev/null
+++ b/contrib/tsearch2/snmap.h
@@ -0,0 +1,23 @@
+#ifndef __SNMAP_H__
+#define __SNMAP_H__
+
+#include "postgres.h"
+
+typedef struct {
+   char    *key;
+   Oid value;
+} SNMapEntry;
+
+typedef struct {
+   int len;
+   int reallen;
+   SNMapEntry  *list;
+} SNMap;
+
+void addSNMap( SNMap *map, char *key, Oid value );
+void addSNMap_t( SNMap *map, text *key, Oid value );
+Oid findSNMap( SNMap *map, char *key );
+Oid findSNMap_t( SNMap *map, text *key );
+void freeSNMap( SNMap *map );
+
+#endif


diff --git a/contrib/tsearch2/snowball/api.c b/contrib/tsearch2/snowball/api.c

new file mode 100644 (file)

index 0000000..c9019ce


--- /dev/null
+++ b/contrib/tsearch2/snowball/api.c
@@ -0,0 +1,48 @@
+
+#include "header.h"
+
+extern struct SN_env * SN_create_env(int S_size, int I_size, int B_size)
+{   struct SN_env * z = (struct SN_env *) calloc(1, sizeof(struct SN_env));
+    z->p = create_s();
+    if (S_size)
+    {   z->S = (symbol * *) calloc(S_size, sizeof(symbol *));
+        {   int i;
+            for (i = 0; i < S_size; i++) z->S[i] = create_s();
+        }
+        z->S_size = S_size;
+    }
+
+    if (I_size)
+    {   z->I = (int *) calloc(I_size, sizeof(int));
+        z->I_size = I_size;
+    }
+
+    if (B_size)
+    {   z->B = (symbol *) calloc(B_size, sizeof(symbol));
+        z->B_size = B_size;
+    }
+
+    return z;
+}
+
+extern void SN_close_env(struct SN_env * z)
+{
+    if (z->S_size)
+    {
+        {   int i;
+            for (i = 0; i < z->S_size; i++) lose_s(z->S[i]);
+        }
+        free(z->S);
+    }
+    if (z->I_size) free(z->I);
+    if (z->B_size) free(z->B);
+    if (z->p) lose_s(z->p);
+    free(z);
+}
+
+extern void SN_set_current(struct SN_env * z, int size, const symbol * s)
+{
+    replace_s(z, 0, z->l, size, s);
+    z->c = 0;
+}
+


diff --git a/contrib/tsearch2/snowball/api.h b/contrib/tsearch2/snowball/api.h

new file mode 100644 (file)

index 0000000..3e8b6e1


--- /dev/null
+++ b/contrib/tsearch2/snowball/api.h
@@ -0,0 +1,27 @@
+
+typedef unsigned char symbol;
+
+/* Or replace 'char' above with 'short' for 16 bit characters.
+
+   More precisely, replace 'char' with whatever type guarantees the
+   character width you need. Note however that sizeof(symbol) should divide
+   HEAD, defined in header.h as 2*sizeof(int), without remainder, otherwise
+   there is an alignment problem. In the unlikely event of a problem here,
+   consult Martin Porter.
+
+*/
+
+struct SN_env {
+    symbol * p;
+    int c; int a; int l; int lb; int bra; int ket;
+    int S_size; int I_size; int B_size;
+    symbol * * S;
+    int * I;
+    symbol * B;
+};
+
+extern struct SN_env * SN_create_env(int S_size, int I_size, int B_size);
+extern void SN_close_env(struct SN_env * z);
+
+extern void SN_set_current(struct SN_env * z, int size, const symbol * s);
+


diff --git a/contrib/tsearch2/snowball/english_stem.c b/contrib/tsearch2/snowball/english_stem.c

new file mode 100644 (file)

index 0000000..6715c7c


--- /dev/null
+++ b/contrib/tsearch2/snowball/english_stem.c
@@ -0,0 +1,894 @@
+
+/* This file was generated automatically by the Snowball to ANSI C compiler */
+
+#include "header.h"
+
+extern int english_stem(struct SN_env * z);
+static int r_exception2(struct SN_env * z);
+static int r_exception1(struct SN_env * z);
+static int r_Step_5(struct SN_env * z);
+static int r_Step_4(struct SN_env * z);
+static int r_Step_3(struct SN_env * z);
+static int r_Step_2(struct SN_env * z);
+static int r_Step_1c(struct SN_env * z);
+static int r_Step_1b(struct SN_env * z);
+static int r_Step_1a(struct SN_env * z);
+static int r_R2(struct SN_env * z);
+static int r_R1(struct SN_env * z);
+static int r_shortv(struct SN_env * z);
+static int r_mark_regions(struct SN_env * z);
+static int r_postlude(struct SN_env * z);
+static int r_prelude(struct SN_env * z);
+
+extern struct SN_env * english_create_env(void);
+extern void english_close_env(struct SN_env * z);
+
+static symbol s_0_0[5] = { 'g', 'e', 'n', 'e', 'r' };
+
+static struct among a_0[1] =
+{
+/*  0 */ { 5, s_0_0, -1, -1, 0}
+};
+
+static symbol s_1_0[3] = { 'i', 'e', 'd' };
+static symbol s_1_1[1] = { 's' };
+static symbol s_1_2[3] = { 'i', 'e', 's' };
+static symbol s_1_3[4] = { 's', 's', 'e', 's' };
+static symbol s_1_4[2] = { 's', 's' };
+static symbol s_1_5[2] = { 'u', 's' };
+
+static struct among a_1[6] =
+{
+/*  0 */ { 3, s_1_0, -1, 2, 0},
+/*  1 */ { 1, s_1_1, -1, 3, 0},
+/*  2 */ { 3, s_1_2, 1, 2, 0},
+/*  3 */ { 4, s_1_3, 1, 1, 0},
+/*  4 */ { 2, s_1_4, 1, -1, 0},
+/*  5 */ { 2, s_1_5, 1, -1, 0}
+};
+
+static symbol s_2_1[2] = { 'b', 'b' };
+static symbol s_2_2[2] = { 'd', 'd' };
+static symbol s_2_3[2] = { 'f', 'f' };
+static symbol s_2_4[2] = { 'g', 'g' };
+static symbol s_2_5[2] = { 'b', 'l' };
+static symbol s_2_6[2] = { 'm', 'm' };
+static symbol s_2_7[2] = { 'n', 'n' };
+static symbol s_2_8[2] = { 'p', 'p' };
+static symbol s_2_9[2] = { 'r', 'r' };
+static symbol s_2_10[2] = { 'a', 't' };
+static symbol s_2_11[2] = { 't', 't' };
+static symbol s_2_12[2] = { 'i', 'z' };
+
+static struct among a_2[13] =
+{
+/*  0 */ { 0, 0, -1, 3, 0},
+/*  1 */ { 2, s_2_1, 0, 2, 0},
+/*  2 */ { 2, s_2_2, 0, 2, 0},
+/*  3 */ { 2, s_2_3, 0, 2, 0},
+/*  4 */ { 2, s_2_4, 0, 2, 0},
+/*  5 */ { 2, s_2_5, 0, 1, 0},
+/*  6 */ { 2, s_2_6, 0, 2, 0},
+/*  7 */ { 2, s_2_7, 0, 2, 0},
+/*  8 */ { 2, s_2_8, 0, 2, 0},
+/*  9 */ { 2, s_2_9, 0, 2, 0},
+/* 10 */ { 2, s_2_10, 0, 1, 0},
+/* 11 */ { 2, s_2_11, 0, 2, 0},
+/* 12 */ { 2, s_2_12, 0, 1, 0}
+};
+
+static symbol s_3_0[2] = { 'e', 'd' };
+static symbol s_3_1[3] = { 'e', 'e', 'd' };
+static symbol s_3_2[3] = { 'i', 'n', 'g' };
+static symbol s_3_3[4] = { 'e', 'd', 'l', 'y' };
+static symbol s_3_4[5] = { 'e', 'e', 'd', 'l', 'y' };
+static symbol s_3_5[5] = { 'i', 'n', 'g', 'l', 'y' };
+
+static struct among a_3[6] =
+{
+/*  0 */ { 2, s_3_0, -1, 2, 0},
+/*  1 */ { 3, s_3_1, 0, 1, 0},
+/*  2 */ { 3, s_3_2, -1, 2, 0},
+/*  3 */ { 4, s_3_3, -1, 2, 0},
+/*  4 */ { 5, s_3_4, 3, 1, 0},
+/*  5 */ { 5, s_3_5, -1, 2, 0}
+};
+
+static symbol s_4_0[4] = { 'a', 'n', 'c', 'i' };
+static symbol s_4_1[4] = { 'e', 'n', 'c', 'i' };
+static symbol s_4_2[3] = { 'o', 'g', 'i' };
+static symbol s_4_3[2] = { 'l', 'i' };
+static symbol s_4_4[3] = { 'b', 'l', 'i' };
+static symbol s_4_5[4] = { 'a', 'b', 'l', 'i' };
+static symbol s_4_6[4] = { 'a', 'l', 'l', 'i' };
+static symbol s_4_7[5] = { 'f', 'u', 'l', 'l', 'i' };
+static symbol s_4_8[6] = { 'l', 'e', 's', 's', 'l', 'i' };
+static symbol s_4_9[5] = { 'o', 'u', 's', 'l', 'i' };
+static symbol s_4_10[5] = { 'e', 'n', 't', 'l', 'i' };
+static symbol s_4_11[5] = { 'a', 'l', 'i', 't', 'i' };
+static symbol s_4_12[6] = { 'b', 'i', 'l', 'i', 't', 'i' };
+static symbol s_4_13[5] = { 'i', 'v', 'i', 't', 'i' };
+static symbol s_4_14[6] = { 't', 'i', 'o', 'n', 'a', 'l' };
+static symbol s_4_15[7] = { 'a', 't', 'i', 'o', 'n', 'a', 'l' };
+static symbol s_4_16[5] = { 'a', 'l', 'i', 's', 'm' };
+static symbol s_4_17[5] = { 'a', 't', 'i', 'o', 'n' };
+static symbol s_4_18[7] = { 'i', 'z', 'a', 't', 'i', 'o', 'n' };
+static symbol s_4_19[4] = { 'i', 'z', 'e', 'r' };
+static symbol s_4_20[4] = { 'a', 't', 'o', 'r' };
+static symbol s_4_21[7] = { 'i', 'v', 'e', 'n', 'e', 's', 's' };
+static symbol s_4_22[7] = { 'f', 'u', 'l', 'n', 'e', 's', 's' };
+static symbol s_4_23[7] = { 'o', 'u', 's', 'n', 'e', 's', 's' };
+
+static struct among a_4[24] =
+{
+/*  0 */ { 4, s_4_0, -1, 3, 0},
+/*  1 */ { 4, s_4_1, -1, 2, 0},
+/*  2 */ { 3, s_4_2, -1, 13, 0},
+/*  3 */ { 2, s_4_3, -1, 16, 0},
+/*  4 */ { 3, s_4_4, 3, 12, 0},
+/*  5 */ { 4, s_4_5, 4, 4, 0},
+/*  6 */ { 4, s_4_6, 3, 8, 0},
+/*  7 */ { 5, s_4_7, 3, 14, 0},
+/*  8 */ { 6, s_4_8, 3, 15, 0},
+/*  9 */ { 5, s_4_9, 3, 10, 0},
+/* 10 */ { 5, s_4_10, 3, 5, 0},
+/* 11 */ { 5, s_4_11, -1, 8, 0},
+/* 12 */ { 6, s_4_12, -1, 12, 0},
+/* 13 */ { 5, s_4_13, -1, 11, 0},
+/* 14 */ { 6, s_4_14, -1, 1, 0},
+/* 15 */ { 7, s_4_15, 14, 7, 0},
+/* 16 */ { 5, s_4_16, -1, 8, 0},
+/* 17 */ { 5, s_4_17, -1, 7, 0},
+/* 18 */ { 7, s_4_18, 17, 6, 0},
+/* 19 */ { 4, s_4_19, -1, 6, 0},
+/* 20 */ { 4, s_4_20, -1, 7, 0},
+/* 21 */ { 7, s_4_21, -1, 11, 0},
+/* 22 */ { 7, s_4_22, -1, 9, 0},
+/* 23 */ { 7, s_4_23, -1, 10, 0}
+};
+
+static symbol s_5_0[5] = { 'i', 'c', 'a', 't', 'e' };
+static symbol s_5_1[5] = { 'a', 't', 'i', 'v', 'e' };
+static symbol s_5_2[5] = { 'a', 'l', 'i', 'z', 'e' };
+static symbol s_5_3[5] = { 'i', 'c', 'i', 't', 'i' };
+static symbol s_5_4[4] = { 'i', 'c', 'a', 'l' };
+static symbol s_5_5[6] = { 't', 'i', 'o', 'n', 'a', 'l' };
+static symbol s_5_6[7] = { 'a', 't', 'i', 'o', 'n', 'a', 'l' };
+static symbol s_5_7[3] = { 'f', 'u', 'l' };
+static symbol s_5_8[4] = { 'n', 'e', 's', 's' };
+
+static struct among a_5[9] =
+{
+/*  0 */ { 5, s_5_0, -1, 4, 0},
+/*  1 */ { 5, s_5_1, -1, 6, 0},
+/*  2 */ { 5, s_5_2, -1, 3, 0},
+/*  3 */ { 5, s_5_3, -1, 4, 0},
+/*  4 */ { 4, s_5_4, -1, 4, 0},
+/*  5 */ { 6, s_5_5, -1, 1, 0},
+/*  6 */ { 7, s_5_6, 5, 2, 0},
+/*  7 */ { 3, s_5_7, -1, 5, 0},
+/*  8 */ { 4, s_5_8, -1, 5, 0}
+};
+
+static symbol s_6_0[2] = { 'i', 'c' };
+static symbol s_6_1[4] = { 'a', 'n', 'c', 'e' };
+static symbol s_6_2[4] = { 'e', 'n', 'c', 'e' };
+static symbol s_6_3[4] = { 'a', 'b', 'l', 'e' };
+static symbol s_6_4[4] = { 'i', 'b', 'l', 'e' };
+static symbol s_6_5[3] = { 'a', 't', 'e' };
+static symbol s_6_6[3] = { 'i', 'v', 'e' };
+static symbol s_6_7[3] = { 'i', 'z', 'e' };
+static symbol s_6_8[3] = { 'i', 't', 'i' };
+static symbol s_6_9[2] = { 'a', 'l' };
+static symbol s_6_10[3] = { 'i', 's', 'm' };
+static symbol s_6_11[3] = { 'i', 'o', 'n' };
+static symbol s_6_12[2] = { 'e', 'r' };
+static symbol s_6_13[3] = { 'o', 'u', 's' };
+static symbol s_6_14[3] = { 'a', 'n', 't' };
+static symbol s_6_15[3] = { 'e', 'n', 't' };
+static symbol s_6_16[4] = { 'm', 'e', 'n', 't' };
+static symbol s_6_17[5] = { 'e', 'm', 'e', 'n', 't' };
+
+static struct among a_6[18] =
+{
+/*  0 */ { 2, s_6_0, -1, 1, 0},
+/*  1 */ { 4, s_6_1, -1, 1, 0},
+/*  2 */ { 4, s_6_2, -1, 1, 0},
+/*  3 */ { 4, s_6_3, -1, 1, 0},
+/*  4 */ { 4, s_6_4, -1, 1, 0},
+/*  5 */ { 3, s_6_5, -1, 1, 0},
+/*  6 */ { 3, s_6_6, -1, 1, 0},
+/*  7 */ { 3, s_6_7, -1, 1, 0},
+/*  8 */ { 3, s_6_8, -1, 1, 0},
+/*  9 */ { 2, s_6_9, -1, 1, 0},
+/* 10 */ { 3, s_6_10, -1, 1, 0},
+/* 11 */ { 3, s_6_11, -1, 2, 0},
+/* 12 */ { 2, s_6_12, -1, 1, 0},
+/* 13 */ { 3, s_6_13, -1, 1, 0},
+/* 14 */ { 3, s_6_14, -1, 1, 0},
+/* 15 */ { 3, s_6_15, -1, 1, 0},
+/* 16 */ { 4, s_6_16, 15, 1, 0},
+/* 17 */ { 5, s_6_17, 16, 1, 0}
+};
+
+static symbol s_7_0[1] = { 'e' };
+static symbol s_7_1[1] = { 'l' };
+
+static struct among a_7[2] =
+{
+/*  0 */ { 1, s_7_0, -1, 1, 0},
+/*  1 */ { 1, s_7_1, -1, 2, 0}
+};
+
+static symbol s_8_0[7] = { 's', 'u', 'c', 'c', 'e', 'e', 'd' };
+static symbol s_8_1[7] = { 'p', 'r', 'o', 'c', 'e', 'e', 'd' };
+static symbol s_8_2[6] = { 'e', 'x', 'c', 'e', 'e', 'd' };
+static symbol s_8_3[7] = { 'c', 'a', 'n', 'n', 'i', 'n', 'g' };
+static symbol s_8_4[6] = { 'i', 'n', 'n', 'i', 'n', 'g' };
+static symbol s_8_5[7] = { 'e', 'a', 'r', 'r', 'i', 'n', 'g' };
+static symbol s_8_6[7] = { 'h', 'e', 'r', 'r', 'i', 'n', 'g' };
+static symbol s_8_7[6] = { 'o', 'u', 't', 'i', 'n', 'g' };
+
+static struct among a_8[8] =
+{
+/*  0 */ { 7, s_8_0, -1, -1, 0},
+/*  1 */ { 7, s_8_1, -1, -1, 0},
+/*  2 */ { 6, s_8_2, -1, -1, 0},
+/*  3 */ { 7, s_8_3, -1, -1, 0},
+/*  4 */ { 6, s_8_4, -1, -1, 0},
+/*  5 */ { 7, s_8_5, -1, -1, 0},
+/*  6 */ { 7, s_8_6, -1, -1, 0},
+/*  7 */ { 6, s_8_7, -1, -1, 0}
+};
+
+static symbol s_9_0[5] = { 'a', 'n', 'd', 'e', 's' };
+static symbol s_9_1[5] = { 'a', 't', 'l', 'a', 's' };
+static symbol s_9_2[4] = { 'b', 'i', 'a', 's' };
+static symbol s_9_3[6] = { 'c', 'o', 's', 'm', 'o', 's' };
+static symbol s_9_4[5] = { 'd', 'y', 'i', 'n', 'g' };
+static symbol s_9_5[5] = { 'e', 'a', 'r', 'l', 'y' };
+static symbol s_9_6[6] = { 'g', 'e', 'n', 't', 'l', 'y' };
+static symbol s_9_7[4] = { 'h', 'o', 'w', 'e' };
+static symbol s_9_8[4] = { 'i', 'd', 'l', 'y' };
+static symbol s_9_9[5] = { 'l', 'y', 'i', 'n', 'g' };
+static symbol s_9_10[4] = { 'n', 'e', 'w', 's' };
+static symbol s_9_11[4] = { 'o', 'n', 'l', 'y' };
+static symbol s_9_12[6] = { 's', 'i', 'n', 'g', 'l', 'y' };
+static symbol s_9_13[5] = { 's', 'k', 'i', 'e', 's' };
+static symbol s_9_14[4] = { 's', 'k', 'i', 's' };
+static symbol s_9_15[3] = { 's', 'k', 'y' };
+static symbol s_9_16[5] = { 't', 'y', 'i', 'n', 'g' };
+static symbol s_9_17[4] = { 'u', 'g', 'l', 'y' };
+
+static struct among a_9[18] =
+{
+/*  0 */ { 5, s_9_0, -1, -1, 0},
+/*  1 */ { 5, s_9_1, -1, -1, 0},
+/*  2 */ { 4, s_9_2, -1, -1, 0},
+/*  3 */ { 6, s_9_3, -1, -1, 0},
+/*  4 */ { 5, s_9_4, -1, 3, 0},
+/*  5 */ { 5, s_9_5, -1, 9, 0},
+/*  6 */ { 6, s_9_6, -1, 7, 0},
+/*  7 */ { 4, s_9_7, -1, -1, 0},
+/*  8 */ { 4, s_9_8, -1, 6, 0},
+/*  9 */ { 5, s_9_9, -1, 4, 0},
+/* 10 */ { 4, s_9_10, -1, -1, 0},
+/* 11 */ { 4, s_9_11, -1, 10, 0},
+/* 12 */ { 6, s_9_12, -1, 11, 0},
+/* 13 */ { 5, s_9_13, -1, 2, 0},
+/* 14 */ { 4, s_9_14, -1, 1, 0},
+/* 15 */ { 3, s_9_15, -1, -1, 0},
+/* 16 */ { 5, s_9_16, -1, 5, 0},
+/* 17 */ { 4, s_9_17, -1, 8, 0}
+};
+
+static unsigned char g_v[] = { 17, 65, 16, 1 };
+
+static unsigned char g_v_WXY[] = { 1, 17, 65, 208, 1 };
+
+static unsigned char g_valid_LI[] = { 55, 141, 2 };
+
+static symbol s_0[] = { 'y' };
+static symbol s_1[] = { 'Y' };
+static symbol s_2[] = { 'y' };
+static symbol s_3[] = { 'Y' };
+static symbol s_4[] = { 's', 's' };
+static symbol s_5[] = { 'i', 'e' };
+static symbol s_6[] = { 'i' };
+static symbol s_7[] = { 'e', 'e' };
+static symbol s_8[] = { 'e' };
+static symbol s_9[] = { 'e' };
+static symbol s_10[] = { 'y' };
+static symbol s_11[] = { 'Y' };
+static symbol s_12[] = { 'i' };
+static symbol s_13[] = { 't', 'i', 'o', 'n' };
+static symbol s_14[] = { 'e', 'n', 'c', 'e' };
+static symbol s_15[] = { 'a', 'n', 'c', 'e' };
+static symbol s_16[] = { 'a', 'b', 'l', 'e' };
+static symbol s_17[] = { 'e', 'n', 't' };
+static symbol s_18[] = { 'i', 'z', 'e' };
+static symbol s_19[] = { 'a', 't', 'e' };
+static symbol s_20[] = { 'a', 'l' };
+static symbol s_21[] = { 'f', 'u', 'l' };
+static symbol s_22[] = { 'o', 'u', 's' };
+static symbol s_23[] = { 'i', 'v', 'e' };
+static symbol s_24[] = { 'b', 'l', 'e' };
+static symbol s_25[] = { 'l' };
+static symbol s_26[] = { 'o', 'g' };
+static symbol s_27[] = { 'f', 'u', 'l' };
+static symbol s_28[] = { 'l', 'e', 's', 's' };
+static symbol s_29[] = { 't', 'i', 'o', 'n' };
+static symbol s_30[] = { 'a', 't', 'e' };
+static symbol s_31[] = { 'a', 'l' };
+static symbol s_32[] = { 'i', 'c' };
+static symbol s_33[] = { 's' };
+static symbol s_34[] = { 't' };
+static symbol s_35[] = { 'l' };
+static symbol s_36[] = { 's', 'k', 'i' };
+static symbol s_37[] = { 's', 'k', 'y' };
+static symbol s_38[] = { 'd', 'i', 'e' };
+static symbol s_39[] = { 'l', 'i', 'e' };
+static symbol s_40[] = { 't', 'i', 'e' };
+static symbol s_41[] = { 'i', 'd', 'l' };
+static symbol s_42[] = { 'g', 'e', 'n', 't', 'l' };
+static symbol s_43[] = { 'u', 'g', 'l', 'i' };
+static symbol s_44[] = { 'e', 'a', 'r', 'l', 'i' };
+static symbol s_45[] = { 'o', 'n', 'l', 'i' };
+static symbol s_46[] = { 's', 'i', 'n', 'g', 'l' };
+static symbol s_47[] = { 'Y' };
+static symbol s_48[] = { 'y' };
+
+static int r_prelude(struct SN_env * z) {
+    z->B[0] = 0; /* unset Y_found, line 24 */
+    {   int c = z->c; /* do, line 25 */
+        z->bra = z->c; /* [, line 25 */
+        if (!(eq_s(z, 1, s_0))) goto lab0;
+        z->ket = z->c; /* ], line 25 */
+        if (!(in_grouping(z, g_v, 97, 121))) goto lab0;
+        slice_from_s(z, 1, s_1); /* <-, line 25 */
+        z->B[0] = 1; /* set Y_found, line 25 */
+    lab0:
+        z->c = c;
+    }
+    {   int c = z->c; /* do, line 26 */
+        while(1) { /* repeat, line 26 */
+            int c = z->c;
+            while(1) { /* goto, line 26 */
+                int c = z->c;
+                if (!(in_grouping(z, g_v, 97, 121))) goto lab3;
+                z->bra = z->c; /* [, line 26 */
+                if (!(eq_s(z, 1, s_2))) goto lab3;
+                z->ket = z->c; /* ], line 26 */
+                z->c = c;
+                break;
+            lab3:
+                z->c = c;
+                if (z->c >= z->l) goto lab2;
+                z->c++;
+            }
+            slice_from_s(z, 1, s_3); /* <-, line 26 */
+            z->B[0] = 1; /* set Y_found, line 26 */
+            continue;
+        lab2:
+            z->c = c;
+            break;
+        }
+    lab1:
+        z->c = c;
+    }
+    return 1;
+}
+
+static int r_mark_regions(struct SN_env * z) {
+    z->I[0] = z->l;
+    z->I[1] = z->l;
+    {   int c = z->c; /* do, line 32 */
+        {   int c = z->c; /* or, line 36 */
+            if (!(find_among(z, a_0, 1))) goto lab2; /* among, line 33 */
+            goto lab1;
+        lab2:
+            z->c = c;
+            while(1) { /* gopast, line 36 */
+                if (!(in_grouping(z, g_v, 97, 121))) goto lab3;
+                break;
+            lab3:
+                if (z->c >= z->l) goto lab0;
+                z->c++;
+            }
+            while(1) { /* gopast, line 36 */
+                if (!(out_grouping(z, g_v, 97, 121))) goto lab4;
+                break;
+            lab4:
+                if (z->c >= z->l) goto lab0;
+                z->c++;
+            }
+        }
+    lab1:
+        z->I[0] = z->c; /* setmark p1, line 37 */
+        while(1) { /* gopast, line 38 */
+            if (!(in_grouping(z, g_v, 97, 121))) goto lab5;
+            break;
+        lab5:
+            if (z->c >= z->l) goto lab0;
+            z->c++;
+        }
+        while(1) { /* gopast, line 38 */
+            if (!(out_grouping(z, g_v, 97, 121))) goto lab6;
+            break;
+        lab6:
+            if (z->c >= z->l) goto lab0;
+            z->c++;
+        }
+        z->I[1] = z->c; /* setmark p2, line 38 */
+    lab0:
+        z->c = c;
+    }
+    return 1;
+}
+
+static int r_shortv(struct SN_env * z) {
+    {   int m = z->l - z->c; /* or, line 46 */
+        if (!(out_grouping_b(z, g_v_WXY, 89, 121))) goto lab1;
+        if (!(in_grouping_b(z, g_v, 97, 121))) goto lab1;
+        if (!(out_grouping_b(z, g_v, 97, 121))) goto lab1;
+        goto lab0;
+    lab1:
+        z->c = z->l - m;
+        if (!(out_grouping_b(z, g_v, 97, 121))) return 0;
+        if (!(in_grouping_b(z, g_v, 97, 121))) return 0;
+        if (z->c > z->lb) return 0; /* atlimit, line 47 */
+    }
+lab0:
+    return 1;
+}
+
+static int r_R1(struct SN_env * z) {
+    if (!(z->I[0] <= z->c)) return 0;
+    return 1;
+}
+
+static int r_R2(struct SN_env * z) {
+    if (!(z->I[1] <= z->c)) return 0;
+    return 1;
+}
+
+static int r_Step_1a(struct SN_env * z) {
+    int among_var;
+    z->ket = z->c; /* [, line 54 */
+    among_var = find_among_b(z, a_1, 6); /* substring, line 54 */
+    if (!(among_var)) return 0;
+    z->bra = z->c; /* ], line 54 */
+    switch(among_var) {
+        case 0: return 0;
+        case 1:
+            slice_from_s(z, 2, s_4); /* <-, line 55 */
+            break;
+        case 2:
+            {   int m = z->l - z->c; /* or, line 57 */
+                if (z->c <= z->lb) goto lab1;
+                z->c--; /* next, line 57 */
+                if (z->c > z->lb) goto lab1; /* atlimit, line 57 */
+                slice_from_s(z, 2, s_5); /* <-, line 57 */
+                goto lab0;
+            lab1:
+                z->c = z->l - m;
+                slice_from_s(z, 1, s_6); /* <-, line 57 */
+            }
+        lab0:
+            break;
+        case 3:
+            if (z->c <= z->lb) return 0;
+            z->c--; /* next, line 58 */
+            while(1) { /* gopast, line 58 */
+                if (!(in_grouping_b(z, g_v, 97, 121))) goto lab2;
+                break;
+            lab2:
+                if (z->c <= z->lb) return 0;
+                z->c--;
+            }
+            slice_del(z); /* delete, line 58 */
+            break;
+    }
+    return 1;
+}
+
+static int r_Step_1b(struct SN_env * z) {
+    int among_var;
+    z->ket = z->c; /* [, line 64 */
+    among_var = find_among_b(z, a_3, 6); /* substring, line 64 */
+    if (!(among_var)) return 0;
+    z->bra = z->c; /* ], line 64 */
+    switch(among_var) {
+        case 0: return 0;
+        case 1:
+            if (!r_R1(z)) return 0; /* call R1, line 66 */
+            slice_from_s(z, 2, s_7); /* <-, line 66 */
+            break;
+        case 2:
+            {   int m_test = z->l - z->c; /* test, line 69 */
+                while(1) { /* gopast, line 69 */
+                    if (!(in_grouping_b(z, g_v, 97, 121))) goto lab0;
+                    break;
+                lab0:
+                    if (z->c <= z->lb) return 0;
+                    z->c--;
+                }
+                z->c = z->l - m_test;
+            }
+            slice_del(z); /* delete, line 69 */
+            {   int m_test = z->l - z->c; /* test, line 70 */
+                among_var = find_among_b(z, a_2, 13); /* substring, line 70 */
+                if (!(among_var)) return 0;
+                z->c = z->l - m_test;
+            }
+            switch(among_var) {
+                case 0: return 0;
+                case 1:
+                    {   int c = z->c;
+                        insert_s(z, z->c, z->c, 1, s_8); /* <+, line 72 */
+                        z->c = c;
+                    }
+                    break;
+                case 2:
+                    z->ket = z->c; /* [, line 75 */
+                    if (z->c <= z->lb) return 0;
+                    z->c--; /* next, line 75 */
+                    z->bra = z->c; /* ], line 75 */
+                    slice_del(z); /* delete, line 75 */
+                    break;
+                case 3:
+                    if (z->c != z->I[0]) return 0; /* atmark, line 76 */
+                    {   int m_test = z->l - z->c; /* test, line 76 */
+                        if (!r_shortv(z)) return 0; /* call shortv, line 76 */
+                        z->c = z->l - m_test;
+                    }
+                    {   int c = z->c;
+                        insert_s(z, z->c, z->c, 1, s_9); /* <+, line 76 */
+                        z->c = c;
+                    }
+                    break;
+            }
+            break;
+    }
+    return 1;
+}
+
+static int r_Step_1c(struct SN_env * z) {
+    z->ket = z->c; /* [, line 83 */
+    {   int m = z->l - z->c; /* or, line 83 */
+        if (!(eq_s_b(z, 1, s_10))) goto lab1;
+        goto lab0;
+    lab1:
+        z->c = z->l - m;
+        if (!(eq_s_b(z, 1, s_11))) return 0;
+    }
+lab0:
+    z->bra = z->c; /* ], line 83 */
+    if (!(out_grouping_b(z, g_v, 97, 121))) return 0;
+    {   int m = z->l - z->c; /* not, line 84 */
+        if (z->c > z->lb) goto lab2; /* atlimit, line 84 */
+        return 0;
+    lab2:
+        z->c = z->l - m;
+    }
+    slice_from_s(z, 1, s_12); /* <-, line 85 */
+    return 1;
+}
+
+static int r_Step_2(struct SN_env * z) {
+    int among_var;
+    z->ket = z->c; /* [, line 89 */
+    among_var = find_among_b(z, a_4, 24); /* substring, line 89 */
+    if (!(among_var)) return 0;
+    z->bra = z->c; /* ], line 89 */
+    if (!r_R1(z)) return 0; /* call R1, line 89 */
+    switch(among_var) {
+        case 0: return 0;
+        case 1:
+            slice_from_s(z, 4, s_13); /* <-, line 90 */
+            break;
+        case 2:
+            slice_from_s(z, 4, s_14); /* <-, line 91 */
+            break;
+        case 3:
+            slice_from_s(z, 4, s_15); /* <-, line 92 */
+            break;
+        case 4:
+            slice_from_s(z, 4, s_16); /* <-, line 93 */
+            break;
+        case 5:
+            slice_from_s(z, 3, s_17); /* <-, line 94 */
+            break;
+        case 6:
+            slice_from_s(z, 3, s_18); /* <-, line 96 */
+            break;
+        case 7:
+            slice_from_s(z, 3, s_19); /* <-, line 98 */
+            break;
+        case 8:
+            slice_from_s(z, 2, s_20); /* <-, line 100 */
+            break;
+        case 9:
+            slice_from_s(z, 3, s_21); /* <-, line 101 */
+            break;
+        case 10:
+            slice_from_s(z, 3, s_22); /* <-, line 103 */
+            break;
+        case 11:
+            slice_from_s(z, 3, s_23); /* <-, line 105 */
+            break;
+        case 12:
+            slice_from_s(z, 3, s_24); /* <-, line 107 */
+            break;
+        case 13:
+            if (!(eq_s_b(z, 1, s_25))) return 0;
+            slice_from_s(z, 2, s_26); /* <-, line 108 */
+            break;
+        case 14:
+            slice_from_s(z, 3, s_27); /* <-, line 109 */
+            break;
+        case 15:
+            slice_from_s(z, 4, s_28); /* <-, line 110 */
+            break;
+        case 16:
+            if (!(in_grouping_b(z, g_valid_LI, 99, 116))) return 0;
+            slice_del(z); /* delete, line 111 */
+            break;
+    }
+    return 1;
+}
+
+static int r_Step_3(struct SN_env * z) {
+    int among_var;
+    z->ket = z->c; /* [, line 116 */
+    among_var = find_among_b(z, a_5, 9); /* substring, line 116 */
+    if (!(among_var)) return 0;
+    z->bra = z->c; /* ], line 116 */
+    if (!r_R1(z)) return 0; /* call R1, line 116 */
+    switch(among_var) {
+        case 0: return 0;
+        case 1:
+            slice_from_s(z, 4, s_29); /* <-, line 117 */
+            break;
+        case 2:
+            slice_from_s(z, 3, s_30); /* <-, line 118 */
+            break;
+        case 3:
+            slice_from_s(z, 2, s_31); /* <-, line 119 */
+            break;
+        case 4:
+            slice_from_s(z, 2, s_32); /* <-, line 121 */
+            break;
+        case 5:
+            slice_del(z); /* delete, line 123 */
+            break;
+        case 6:
+            if (!r_R2(z)) return 0; /* call R2, line 125 */
+            slice_del(z); /* delete, line 125 */
+            break;
+    }
+    return 1;
+}
+
+static int r_Step_4(struct SN_env * z) {
+    int among_var;
+    z->ket = z->c; /* [, line 130 */
+    among_var = find_among_b(z, a_6, 18); /* substring, line 130 */
+    if (!(among_var)) return 0;
+    z->bra = z->c; /* ], line 130 */
+    if (!r_R2(z)) return 0; /* call R2, line 130 */
+    switch(among_var) {
+        case 0: return 0;
+        case 1:
+            slice_del(z); /* delete, line 133 */
+            break;
+        case 2:
+            {   int m = z->l - z->c; /* or, line 134 */
+                if (!(eq_s_b(z, 1, s_33))) goto lab1;
+                goto lab0;
+            lab1:
+                z->c = z->l - m;
+                if (!(eq_s_b(z, 1, s_34))) return 0;
+            }
+        lab0:
+            slice_del(z); /* delete, line 134 */
+            break;
+    }
+    return 1;
+}
+
+static int r_Step_5(struct SN_env * z) {
+    int among_var;
+    z->ket = z->c; /* [, line 139 */
+    among_var = find_among_b(z, a_7, 2); /* substring, line 139 */
+    if (!(among_var)) return 0;
+    z->bra = z->c; /* ], line 139 */
+    switch(among_var) {
+        case 0: return 0;
+        case 1:
+            {   int m = z->l - z->c; /* or, line 140 */
+                if (!r_R2(z)) goto lab1; /* call R2, line 140 */
+                goto lab0;
+            lab1:
+                z->c = z->l - m;
+                if (!r_R1(z)) return 0; /* call R1, line 140 */
+                {   int m = z->l - z->c; /* not, line 140 */
+                    if (!r_shortv(z)) goto lab2; /* call shortv, line 140 */
+                    return 0;
+                lab2:
+                    z->c = z->l - m;
+                }
+            }
+        lab0:
+            slice_del(z); /* delete, line 140 */
+            break;
+        case 2:
+            if (!r_R2(z)) return 0; /* call R2, line 141 */
+            if (!(eq_s_b(z, 1, s_35))) return 0;
+            slice_del(z); /* delete, line 141 */
+            break;
+    }
+    return 1;
+}
+
+static int r_exception2(struct SN_env * z) {
+    z->ket = z->c; /* [, line 147 */
+    if (!(find_among_b(z, a_8, 8))) return 0; /* substring, line 147 */
+    z->bra = z->c; /* ], line 147 */
+    if (z->c > z->lb) return 0; /* atlimit, line 147 */
+    return 1;
+}
+
+static int r_exception1(struct SN_env * z) {
+    int among_var;
+    z->bra = z->c; /* [, line 159 */
+    among_var = find_among(z, a_9, 18); /* substring, line 159 */
+    if (!(among_var)) return 0;
+    z->ket = z->c; /* ], line 159 */
+    if (z->c < z->l) return 0; /* atlimit, line 159 */
+    switch(among_var) {
+        case 0: return 0;
+        case 1:
+            slice_from_s(z, 3, s_36); /* <-, line 163 */
+            break;
+        case 2:
+            slice_from_s(z, 3, s_37); /* <-, line 164 */
+            break;
+        case 3:
+            slice_from_s(z, 3, s_38); /* <-, line 165 */
+            break;
+        case 4:
+            slice_from_s(z, 3, s_39); /* <-, line 166 */
+            break;
+        case 5:
+            slice_from_s(z, 3, s_40); /* <-, line 167 */
+            break;
+        case 6:
+            slice_from_s(z, 3, s_41); /* <-, line 171 */
+            break;
+        case 7:
+            slice_from_s(z, 5, s_42); /* <-, line 172 */
+            break;
+        case 8:
+            slice_from_s(z, 4, s_43); /* <-, line 173 */
+            break;
+        case 9:
+            slice_from_s(z, 5, s_44); /* <-, line 174 */
+            break;
+        case 10:
+            slice_from_s(z, 4, s_45); /* <-, line 175 */
+            break;
+        case 11:
+            slice_from_s(z, 5, s_46); /* <-, line 176 */
+            break;
+    }
+    return 1;
+}
+
+static int r_postlude(struct SN_env * z) {
+    if (!(z->B[0])) return 0; /* Boolean test Y_found, line 192 */
+    while(1) { /* repeat, line 192 */
+        int c = z->c;
+        while(1) { /* goto, line 192 */
+            int c = z->c;
+            z->bra = z->c; /* [, line 192 */
+            if (!(eq_s(z, 1, s_47))) goto lab1;
+            z->ket = z->c; /* ], line 192 */
+            z->c = c;
+            break;
+        lab1:
+            z->c = c;
+            if (z->c >= z->l) goto lab0;
+            z->c++;
+        }
+        slice_from_s(z, 1, s_48); /* <-, line 192 */
+        continue;
+    lab0:
+        z->c = c;
+        break;
+    }
+    return 1;
+}
+
+extern int english_stem(struct SN_env * z) {
+    {   int c = z->c; /* or, line 196 */
+        if (!r_exception1(z)) goto lab1; /* call exception1, line 196 */
+        goto lab0;
+    lab1:
+        z->c = c;
+        {   int c_test = z->c; /* test, line 198 */
+            {   int c = z->c + 3;
+                if (0 > c || c > z->l) return 0;
+                z->c = c; /* hop, line 198 */
+            }
+            z->c = c_test;
+        }
+        {   int c = z->c; /* do, line 199 */
+            if (!r_prelude(z)) goto lab2; /* call prelude, line 199 */
+        lab2:
+            z->c = c;
+        }
+        {   int c = z->c; /* do, line 200 */
+            if (!r_mark_regions(z)) goto lab3; /* call mark_regions, line 200 */
+        lab3:
+            z->c = c;
+        }
+        z->lb = z->c; z->c = z->l; /* backwards, line 201 */
+
+        {   int m = z->l - z->c; /* do, line 203 */
+            if (!r_Step_1a(z)) goto lab4; /* call Step_1a, line 203 */
+        lab4:
+            z->c = z->l - m;
+        }
+        {   int m = z->l - z->c; /* or, line 205 */
+            if (!r_exception2(z)) goto lab6; /* call exception2, line 205 */
+            goto lab5;
+        lab6:
+            z->c = z->l - m;
+            {   int m = z->l - z->c; /* do, line 207 */
+                if (!r_Step_1b(z)) goto lab7; /* call Step_1b, line 207 */
+            lab7:
+                z->c = z->l - m;
+            }
+            {   int m = z->l - z->c; /* do, line 208 */
+                if (!r_Step_1c(z)) goto lab8; /* call Step_1c, line 208 */
+            lab8:
+                z->c = z->l - m;
+            }
+            {   int m = z->l - z->c; /* do, line 210 */
+                if (!r_Step_2(z)) goto lab9; /* call Step_2, line 210 */
+            lab9:
+                z->c = z->l - m;
+            }
+            {   int m = z->l - z->c; /* do, line 211 */
+                if (!r_Step_3(z)) goto lab10; /* call Step_3, line 211 */
+            lab10:
+                z->c = z->l - m;
+            }
+            {   int m = z->l - z->c; /* do, line 212 */
+                if (!r_Step_4(z)) goto lab11; /* call Step_4, line 212 */
+            lab11:
+                z->c = z->l - m;
+            }
+            {   int m = z->l - z->c; /* do, line 214 */
+                if (!r_Step_5(z)) goto lab12; /* call Step_5, line 214 */
+            lab12:
+                z->c = z->l - m;
+            }
+        }
+    lab5:
+        z->c = z->lb;
+        {   int c = z->c; /* do, line 217 */
+            if (!r_postlude(z)) goto lab13; /* call postlude, line 217 */
+        lab13:
+            z->c = c;
+        }
+    }
+lab0:
+    return 1;
+}
+
+extern struct SN_env * english_create_env(void) { return SN_create_env(0, 2, 1); }
+
+extern void english_close_env(struct SN_env * z) { SN_close_env(z); }
+


diff --git a/contrib/tsearch2/snowball/english_stem.h b/contrib/tsearch2/snowball/english_stem.h

new file mode 100644 (file)

index 0000000..bfefcd5


--- /dev/null
+++ b/contrib/tsearch2/snowball/english_stem.h
@@ -0,0 +1,8 @@
+
+/* This file was generated automatically by the Snowball to ANSI C compiler */
+
+extern struct SN_env * english_create_env(void);
+extern void english_close_env(struct SN_env * z);
+
+extern int english_stem(struct SN_env * z);
+


diff --git a/contrib/tsearch2/snowball/header.h b/contrib/tsearch2/snowball/header.h

new file mode 100644 (file)

index 0000000..aaec3ae


--- /dev/null
+++ b/contrib/tsearch2/snowball/header.h
@@ -0,0 +1,57 @@
+
+#include 
+
+#include "api.h"
+
+#define MAXINT INT_MAX
+#define MININT INT_MIN
+
+#define HEAD 2*sizeof(int)
+
+#define SIZE(p)        ((int *)(p))[-1]
+#define SET_SIZE(p, n) ((int *)(p))[-1] = n
+#define CAPACITY(p)    ((int *)(p))[-2]
+
+struct among
+{   int s_size;     /* number of chars in string */
+    symbol * s;       /* search string */
+    int substring_i;/* index to longest matching substring */
+    int result;     /* result of the lookup */
+    int (* function)(struct SN_env *);
+};
+
+extern symbol * create_s(void);
+extern void lose_s(symbol * p);
+
+extern int in_grouping(struct SN_env * z, unsigned char * s, int min, int max);
+extern int in_grouping_b(struct SN_env * z, unsigned char * s, int min, int max);
+extern int out_grouping(struct SN_env * z, unsigned char * s, int min, int max);
+extern int out_grouping_b(struct SN_env * z, unsigned char * s, int min, int max);
+
+extern int in_range(struct SN_env * z, int min, int max);
+extern int in_range_b(struct SN_env * z, int min, int max);
+extern int out_range(struct SN_env * z, int min, int max);
+extern int out_range_b(struct SN_env * z, int min, int max);
+
+extern int eq_s(struct SN_env * z, int s_size, symbol * s);
+extern int eq_s_b(struct SN_env * z, int s_size, symbol * s);
+extern int eq_v(struct SN_env * z, symbol * p);
+extern int eq_v_b(struct SN_env * z, symbol * p);
+
+extern int find_among(struct SN_env * z, struct among * v, int v_size);
+extern int find_among_b(struct SN_env * z, struct among * v, int v_size);
+
+extern symbol * increase_size(symbol * p, int n);
+extern int replace_s(struct SN_env * z, int c_bra, int c_ket, int s_size, const symbol * s);
+extern void slice_from_s(struct SN_env * z, int s_size, symbol * s);
+extern void slice_from_v(struct SN_env * z, symbol * p);
+extern void slice_del(struct SN_env * z);
+
+extern void insert_s(struct SN_env * z, int bra, int ket, int s_size, symbol * s);
+extern void insert_v(struct SN_env * z, int bra, int ket, symbol * p);
+
+extern symbol * slice_to(struct SN_env * z, symbol * p);
+extern symbol * assign_to(struct SN_env * z, symbol * p);
+
+extern void debug(struct SN_env * z, int number, int line_count);
+


diff --git a/contrib/tsearch2/snowball/russian_stem.c b/contrib/tsearch2/snowball/russian_stem.c

new file mode 100644 (file)

index 0000000..14fd491


--- /dev/null
+++ b/contrib/tsearch2/snowball/russian_stem.c
@@ -0,0 +1,626 @@
+
+/* This file was generated automatically by the Snowball to ANSI C compiler */
+
+#include "header.h"
+
+extern int russian_stem(struct SN_env * z);
+static int r_tidy_up(struct SN_env * z);
+static int r_derivational(struct SN_env * z);
+static int r_noun(struct SN_env * z);
+static int r_verb(struct SN_env * z);
+static int r_reflexive(struct SN_env * z);
+static int r_adjectival(struct SN_env * z);
+static int r_adjective(struct SN_env * z);
+static int r_perfective_gerund(struct SN_env * z);
+static int r_R2(struct SN_env * z);
+static int r_mark_regions(struct SN_env * z);
+
+extern struct SN_env * russian_create_env(void);
+extern void russian_close_env(struct SN_env * z);
+
+static symbol s_0_0[3] = { 215, 219, 201 };
+static symbol s_0_1[4] = { 201, 215, 219, 201 };
+static symbol s_0_2[4] = { 217, 215, 219, 201 };
+static symbol s_0_3[1] = { 215 };
+static symbol s_0_4[2] = { 201, 215 };
+static symbol s_0_5[2] = { 217, 215 };
+static symbol s_0_6[5] = { 215, 219, 201, 211, 216 };
+static symbol s_0_7[6] = { 201, 215, 219, 201, 211, 216 };
+static symbol s_0_8[6] = { 217, 215, 219, 201, 211, 216 };
+
+static struct among a_0[9] =
+{
+/*  0 */ { 3, s_0_0, -1, 1, 0},
+/*  1 */ { 4, s_0_1, 0, 2, 0},
+/*  2 */ { 4, s_0_2, 0, 2, 0},
+/*  3 */ { 1, s_0_3, -1, 1, 0},
+/*  4 */ { 2, s_0_4, 3, 2, 0},
+/*  5 */ { 2, s_0_5, 3, 2, 0},
+/*  6 */ { 5, s_0_6, -1, 1, 0},
+/*  7 */ { 6, s_0_7, 6, 2, 0},
+/*  8 */ { 6, s_0_8, 6, 2, 0}
+};
+
+static symbol s_1_0[2] = { 192, 192 };
+static symbol s_1_1[2] = { 197, 192 };
+static symbol s_1_2[2] = { 207, 192 };
+static symbol s_1_3[2] = { 213, 192 };
+static symbol s_1_4[2] = { 197, 197 };
+static symbol s_1_5[2] = { 201, 197 };
+static symbol s_1_6[2] = { 207, 197 };
+static symbol s_1_7[2] = { 217, 197 };
+static symbol s_1_8[2] = { 201, 200 };
+static symbol s_1_9[2] = { 217, 200 };
+static symbol s_1_10[3] = { 201, 205, 201 };
+static symbol s_1_11[3] = { 217, 205, 201 };
+static symbol s_1_12[2] = { 197, 202 };
+static symbol s_1_13[2] = { 201, 202 };
+static symbol s_1_14[2] = { 207, 202 };
+static symbol s_1_15[2] = { 217, 202 };
+static symbol s_1_16[2] = { 197, 205 };
+static symbol s_1_17[2] = { 201, 205 };
+static symbol s_1_18[2] = { 207, 205 };
+static symbol s_1_19[2] = { 217, 205 };
+static symbol s_1_20[3] = { 197, 199, 207 };
+static symbol s_1_21[3] = { 207, 199, 207 };
+static symbol s_1_22[2] = { 193, 209 };
+static symbol s_1_23[2] = { 209, 209 };
+static symbol s_1_24[3] = { 197, 205, 213 };
+static symbol s_1_25[3] = { 207, 205, 213 };
+
+static struct among a_1[26] =
+{
+/*  0 */ { 2, s_1_0, -1, 1, 0},
+/*  1 */ { 2, s_1_1, -1, 1, 0},
+/*  2 */ { 2, s_1_2, -1, 1, 0},
+/*  3 */ { 2, s_1_3, -1, 1, 0},
+/*  4 */ { 2, s_1_4, -1, 1, 0},
+/*  5 */ { 2, s_1_5, -1, 1, 0},
+/*  6 */ { 2, s_1_6, -1, 1, 0},
+/*  7 */ { 2, s_1_7, -1, 1, 0},
+/*  8 */ { 2, s_1_8, -1, 1, 0},
+/*  9 */ { 2, s_1_9, -1, 1, 0},
+/* 10 */ { 3, s_1_10, -1, 1, 0},
+/* 11 */ { 3, s_1_11, -1, 1, 0},
+/* 12 */ { 2, s_1_12, -1, 1, 0},
+/* 13 */ { 2, s_1_13, -1, 1, 0},
+/* 14 */ { 2, s_1_14, -1, 1, 0},
+/* 15 */ { 2, s_1_15, -1, 1, 0},
+/* 16 */ { 2, s_1_16, -1, 1, 0},
+/* 17 */ { 2, s_1_17, -1, 1, 0},
+/* 18 */ { 2, s_1_18, -1, 1, 0},
+/* 19 */ { 2, s_1_19, -1, 1, 0},
+/* 20 */ { 3, s_1_20, -1, 1, 0},
+/* 21 */ { 3, s_1_21, -1, 1, 0},
+/* 22 */ { 2, s_1_22, -1, 1, 0},
+/* 23 */ { 2, s_1_23, -1, 1, 0},
+/* 24 */ { 3, s_1_24, -1, 1, 0},
+/* 25 */ { 3, s_1_25, -1, 1, 0}
+};
+
+static symbol s_2_0[2] = { 197, 205 };
+static symbol s_2_1[2] = { 206, 206 };
+static symbol s_2_2[2] = { 215, 219 };
+static symbol s_2_3[3] = { 201, 215, 219 };
+static symbol s_2_4[3] = { 217, 215, 219 };
+static symbol s_2_5[1] = { 221 };
+static symbol s_2_6[2] = { 192, 221 };
+static symbol s_2_7[3] = { 213, 192, 221 };
+
+static struct among a_2[8] =
+{
+/*  0 */ { 2, s_2_0, -1, 1, 0},
+/*  1 */ { 2, s_2_1, -1, 1, 0},
+/*  2 */ { 2, s_2_2, -1, 1, 0},
+/*  3 */ { 3, s_2_3, 2, 2, 0},
+/*  4 */ { 3, s_2_4, 2, 2, 0},
+/*  5 */ { 1, s_2_5, -1, 1, 0},
+/*  6 */ { 2, s_2_6, 5, 1, 0},
+/*  7 */ { 3, s_2_7, 6, 2, 0}
+};
+
+static symbol s_3_0[2] = { 211, 209 };
+static symbol s_3_1[2] = { 211, 216 };
+
+static struct among a_3[2] =
+{
+/*  0 */ { 2, s_3_0, -1, 1, 0},
+/*  1 */ { 2, s_3_1, -1, 1, 0}
+};
+
+static symbol s_4_0[1] = { 192 };
+static symbol s_4_1[2] = { 213, 192 };
+static symbol s_4_2[2] = { 204, 193 };
+static symbol s_4_3[3] = { 201, 204, 193 };
+static symbol s_4_4[3] = { 217, 204, 193 };
+static symbol s_4_5[2] = { 206, 193 };
+static symbol s_4_6[3] = { 197, 206, 193 };
+static symbol s_4_7[3] = { 197, 212, 197 };
+static symbol s_4_8[3] = { 201, 212, 197 };
+static symbol s_4_9[3] = { 202, 212, 197 };
+static symbol s_4_10[4] = { 197, 202, 212, 197 };
+static symbol s_4_11[4] = { 213, 202, 212, 197 };
+static symbol s_4_12[2] = { 204, 201 };
+static symbol s_4_13[3] = { 201, 204, 201 };
+static symbol s_4_14[3] = { 217, 204, 201 };
+static symbol s_4_15[1] = { 202 };
+static symbol s_4_16[2] = { 197, 202 };
+static symbol s_4_17[2] = { 213, 202 };
+static symbol s_4_18[1] = { 204 };
+static symbol s_4_19[2] = { 201, 204 };
+static symbol s_4_20[2] = { 217, 204 };
+static symbol s_4_21[2] = { 197, 205 };
+static symbol s_4_22[2] = { 201, 205 };
+static symbol s_4_23[2] = { 217, 205 };
+static symbol s_4_24[1] = { 206 };
+static symbol s_4_25[2] = { 197, 206 };
+static symbol s_4_26[2] = { 204, 207 };
+static symbol s_4_27[3] = { 201, 204, 207 };
+static symbol s_4_28[3] = { 217, 204, 207 };
+static symbol s_4_29[2] = { 206, 207 };
+static symbol s_4_30[3] = { 197, 206, 207 };
+static symbol s_4_31[3] = { 206, 206, 207 };
+static symbol s_4_32[2] = { 192, 212 };
+static symbol s_4_33[3] = { 213, 192, 212 };
+static symbol s_4_34[2] = { 197, 212 };
+static symbol s_4_35[3] = { 213, 197, 212 };
+static symbol s_4_36[2] = { 201, 212 };
+static symbol s_4_37[2] = { 209, 212 };
+static symbol s_4_38[2] = { 217, 212 };
+static symbol s_4_39[2] = { 212, 216 };
+static symbol s_4_40[3] = { 201, 212, 216 };
+static symbol s_4_41[3] = { 217, 212, 216 };
+static symbol s_4_42[3] = { 197, 219, 216 };
+static symbol s_4_43[3] = { 201, 219, 216 };
+static symbol s_4_44[2] = { 206, 217 };
+static symbol s_4_45[3] = { 197, 206, 217 };
+
+static struct among a_4[46] =
+{
+/*  0 */ { 1, s_4_0, -1, 2, 0},
+/*  1 */ { 2, s_4_1, 0, 2, 0},
+/*  2 */ { 2, s_4_2, -1, 1, 0},
+/*  3 */ { 3, s_4_3, 2, 2, 0},
+/*  4 */ { 3, s_4_4, 2, 2, 0},
+/*  5 */ { 2, s_4_5, -1, 1, 0},
+/*  6 */ { 3, s_4_6, 5, 2, 0},
+/*  7 */ { 3, s_4_7, -1, 1, 0},
+/*  8 */ { 3, s_4_8, -1, 2, 0},
+/*  9 */ { 3, s_4_9, -1, 1, 0},
+/* 10 */ { 4, s_4_10, 9, 2, 0},
+/* 11 */ { 4, s_4_11, 9, 2, 0},
+/* 12 */ { 2, s_4_12, -1, 1, 0},
+/* 13 */ { 3, s_4_13, 12, 2, 0},
+/* 14 */ { 3, s_4_14, 12, 2, 0},
+/* 15 */ { 1, s_4_15, -1, 1, 0},
+/* 16 */ { 2, s_4_16, 15, 2, 0},
+/* 17 */ { 2, s_4_17, 15, 2, 0},
+/* 18 */ { 1, s_4_18, -1, 1, 0},
+/* 19 */ { 2, s_4_19, 18, 2, 0},
+/* 20 */ { 2, s_4_20, 18, 2, 0},
+/* 21 */ { 2, s_4_21, -1, 1, 0},
+/* 22 */ { 2, s_4_22, -1, 2, 0},
+/* 23 */ { 2, s_4_23, -1, 2, 0},
+/* 24 */ { 1, s_4_24, -1, 1, 0},
+/* 25 */ { 2, s_4_25, 24, 2, 0},
+/* 26 */ { 2, s_4_26, -1, 1, 0},
+/* 27 */ { 3, s_4_27, 26, 2, 0},
+/* 28 */ { 3, s_4_28, 26, 2, 0},
+/* 29 */ { 2, s_4_29, -1, 1, 0},
+/* 30 */ { 3, s_4_30, 29, 2, 0},
+/* 31 */ { 3, s_4_31, 29, 1, 0},
+/* 32 */ { 2, s_4_32, -1, 1, 0},
+/* 33 */ { 3, s_4_33, 32, 2, 0},
+/* 34 */ { 2, s_4_34, -1, 1, 0},
+/* 35 */ { 3, s_4_35, 34, 2, 0},
+/* 36 */ { 2, s_4_36, -1, 2, 0},
+/* 37 */ { 2, s_4_37, -1, 2, 0},
+/* 38 */ { 2, s_4_38, -1, 2, 0},
+/* 39 */ { 2, s_4_39, -1, 1, 0},
+/* 40 */ { 3, s_4_40, 39, 2, 0},
+/* 41 */ { 3, s_4_41, 39, 2, 0},
+/* 42 */ { 3, s_4_42, -1, 1, 0},
+/* 43 */ { 3, s_4_43, -1, 2, 0},
+/* 44 */ { 2, s_4_44, -1, 1, 0},
+/* 45 */ { 3, s_4_45, 44, 2, 0}
+};
+
+static symbol s_5_0[1] = { 192 };
+static symbol s_5_1[2] = { 201, 192 };
+static symbol s_5_2[2] = { 216, 192 };
+static symbol s_5_3[1] = { 193 };
+static symbol s_5_4[1] = { 197 };
+static symbol s_5_5[2] = { 201, 197 };
+static symbol s_5_6[2] = { 216, 197 };
+static symbol s_5_7[2] = { 193, 200 };
+static symbol s_5_8[2] = { 209, 200 };
+static symbol s_5_9[3] = { 201, 209, 200 };
+static symbol s_5_10[1] = { 201 };
+static symbol s_5_11[2] = { 197, 201 };
+static symbol s_5_12[2] = { 201, 201 };
+static symbol s_5_13[3] = { 193, 205, 201 };
+static symbol s_5_14[3] = { 209, 205, 201 };
+static symbol s_5_15[4] = { 201, 209, 205, 201 };
+static symbol s_5_16[1] = { 202 };
+static symbol s_5_17[2] = { 197, 202 };
+static symbol s_5_18[3] = { 201, 197, 202 };
+static symbol s_5_19[2] = { 201, 202 };
+static symbol s_5_20[2] = { 207, 202 };
+static symbol s_5_21[2] = { 193, 205 };
+static symbol s_5_22[2] = { 197, 205 };
+static symbol s_5_23[3] = { 201, 197, 205 };
+static symbol s_5_24[2] = { 207, 205 };
+static symbol s_5_25[2] = { 209, 205 };
+static symbol s_5_26[3] = { 201, 209, 205 };
+static symbol s_5_27[1] = { 207 };
+static symbol s_5_28[1] = { 209 };
+static symbol s_5_29[2] = { 201, 209 };
+static symbol s_5_30[2] = { 216, 209 };
+static symbol s_5_31[1] = { 213 };
+static symbol s_5_32[2] = { 197, 215 };
+static symbol s_5_33[2] = { 207, 215 };
+static symbol s_5_34[1] = { 216 };
+static symbol s_5_35[1] = { 217 };
+
+static struct among a_5[36] =
+{
+/*  0 */ { 1, s_5_0, -1, 1, 0},
+/*  1 */ { 2, s_5_1, 0, 1, 0},
+/*  2 */ { 2, s_5_2, 0, 1, 0},
+/*  3 */ { 1, s_5_3, -1, 1, 0},
+/*  4 */ { 1, s_5_4, -1, 1, 0},
+/*  5 */ { 2, s_5_5, 4, 1, 0},
+/*  6 */ { 2, s_5_6, 4, 1, 0},
+/*  7 */ { 2, s_5_7, -1, 1, 0},
+/*  8 */ { 2, s_5_8, -1, 1, 0},
+/*  9 */ { 3, s_5_9, 8, 1, 0},
+/* 10 */ { 1, s_5_10, -1, 1, 0},
+/* 11 */ { 2, s_5_11, 10, 1, 0},
+/* 12 */ { 2, s_5_12, 10, 1, 0},
+/* 13 */ { 3, s_5_13, 10, 1, 0},
+/* 14 */ { 3, s_5_14, 10, 1, 0},
+/* 15 */ { 4, s_5_15, 14, 1, 0},
+/* 16 */ { 1, s_5_16, -1, 1, 0},
+/* 17 */ { 2, s_5_17, 16, 1, 0},
+/* 18 */ { 3, s_5_18, 17, 1, 0},
+/* 19 */ { 2, s_5_19, 16, 1, 0},
+/* 20 */ { 2, s_5_20, 16, 1, 0},
+/* 21 */ { 2, s_5_21, -1, 1, 0},
+/* 22 */ { 2, s_5_22, -1, 1, 0},
+/* 23 */ { 3, s_5_23, 22, 1, 0},
+/* 24 */ { 2, s_5_24, -1, 1, 0},
+/* 25 */ { 2, s_5_25, -1, 1, 0},
+/* 26 */ { 3, s_5_26, 25, 1, 0},
+/* 27 */ { 1, s_5_27, -1, 1, 0},
+/* 28 */ { 1, s_5_28, -1, 1, 0},
+/* 29 */ { 2, s_5_29, 28, 1, 0},
+/* 30 */ { 2, s_5_30, 28, 1, 0},
+/* 31 */ { 1, s_5_31, -1, 1, 0},
+/* 32 */ { 2, s_5_32, -1, 1, 0},
+/* 33 */ { 2, s_5_33, -1, 1, 0},
+/* 34 */ { 1, s_5_34, -1, 1, 0},
+/* 35 */ { 1, s_5_35, -1, 1, 0}
+};
+
+static symbol s_6_0[3] = { 207, 211, 212 };
+static symbol s_6_1[4] = { 207, 211, 212, 216 };
+
+static struct among a_6[2] =
+{
+/*  0 */ { 3, s_6_0, -1, 1, 0},
+/*  1 */ { 4, s_6_1, -1, 1, 0}
+};
+
+static symbol s_7_0[4] = { 197, 202, 219, 197 };
+static symbol s_7_1[1] = { 206 };
+static symbol s_7_2[1] = { 216 };
+static symbol s_7_3[3] = { 197, 202, 219 };
+
+static struct among a_7[4] =
+{
+/*  0 */ { 4, s_7_0, -1, 1, 0},
+/*  1 */ { 1, s_7_1, -1, 2, 0},
+/*  2 */ { 1, s_7_2, -1, 3, 0},
+/*  3 */ { 3, s_7_3, -1, 1, 0}
+};
+
+static unsigned char g_v[] = { 35, 130, 34, 18 };
+
+static symbol s_0[] = { 193 };
+static symbol s_1[] = { 209 };
+static symbol s_2[] = { 193 };
+static symbol s_3[] = { 209 };
+static symbol s_4[] = { 193 };
+static symbol s_5[] = { 209 };
+static symbol s_6[] = { 206 };
+static symbol s_7[] = { 206 };
+static symbol s_8[] = { 206 };
+static symbol s_9[] = { 201 };
+
+static int r_mark_regions(struct SN_env * z) {
+    z->I[0] = z->l;
+    z->I[1] = z->l;
+    {   int c = z->c; /* do, line 100 */
+        while(1) { /* gopast, line 101 */
+            if (!(in_grouping(z, g_v, 192, 220))) goto lab1;
+            break;
+        lab1:
+            if (z->c >= z->l) goto lab0;
+            z->c++;
+        }
+        z->I[0] = z->c; /* setmark pV, line 101 */
+        while(1) { /* gopast, line 101 */
+            if (!(out_grouping(z, g_v, 192, 220))) goto lab2;
+            break;
+        lab2:
+            if (z->c >= z->l) goto lab0;
+            z->c++;
+        }
+        while(1) { /* gopast, line 102 */
+            if (!(in_grouping(z, g_v, 192, 220))) goto lab3;
+            break;
+        lab3:
+            if (z->c >= z->l) goto lab0;
+            z->c++;
+        }
+        while(1) { /* gopast, line 102 */
+            if (!(out_grouping(z, g_v, 192, 220))) goto lab4;
+            break;
+        lab4:
+            if (z->c >= z->l) goto lab0;
+            z->c++;
+        }
+        z->I[1] = z->c; /* setmark p2, line 102 */
+    lab0:
+        z->c = c;
+    }
+    return 1;
+}
+
+static int r_R2(struct SN_env * z) {
+    if (!(z->I[1] <= z->c)) return 0;
+    return 1;
+}
+
+static int r_perfective_gerund(struct SN_env * z) {
+    int among_var;
+    z->ket = z->c; /* [, line 111 */
+    among_var = find_among_b(z, a_0, 9); /* substring, line 111 */
+    if (!(among_var)) return 0;
+    z->bra = z->c; /* ], line 111 */
+    switch(among_var) {
+        case 0: return 0;
+        case 1:
+            {   int m = z->l - z->c; /* or, line 115 */
+                if (!(eq_s_b(z, 1, s_0))) goto lab1;
+                goto lab0;
+            lab1:
+                z->c = z->l - m;
+                if (!(eq_s_b(z, 1, s_1))) return 0;
+            }
+        lab0:
+            slice_del(z); /* delete, line 115 */
+            break;
+        case 2:
+            slice_del(z); /* delete, line 122 */
+            break;
+    }
+    return 1;
+}
+
+static int r_adjective(struct SN_env * z) {
+    int among_var;
+    z->ket = z->c; /* [, line 127 */
+    among_var = find_among_b(z, a_1, 26); /* substring, line 127 */
+    if (!(among_var)) return 0;
+    z->bra = z->c; /* ], line 127 */
+    switch(among_var) {
+        case 0: return 0;
+        case 1:
+            slice_del(z); /* delete, line 136 */
+            break;
+    }
+    return 1;
+}
+
+static int r_adjectival(struct SN_env * z) {
+    int among_var;
+    if (!r_adjective(z)) return 0; /* call adjective, line 141 */
+    {   int m = z->l - z->c; /* try, line 148 */
+        z->ket = z->c; /* [, line 149 */
+        among_var = find_among_b(z, a_2, 8); /* substring, line 149 */
+        if (!(among_var)) { z->c = z->l - m; goto lab0; }
+        z->bra = z->c; /* ], line 149 */
+        switch(among_var) {
+            case 0: { z->c = z->l - m; goto lab0; }
+            case 1:
+                {   int m = z->l - z->c; /* or, line 154 */
+                    if (!(eq_s_b(z, 1, s_2))) goto lab2;
+                    goto lab1;
+                lab2:
+                    z->c = z->l - m;
+                    if (!(eq_s_b(z, 1, s_3))) { z->c = z->l - m; goto lab0; }
+                }
+            lab1:
+                slice_del(z); /* delete, line 154 */
+                break;
+            case 2:
+                slice_del(z); /* delete, line 161 */
+                break;
+        }
+    lab0:
+        ;
+    }
+    return 1;
+}
+
+static int r_reflexive(struct SN_env * z) {
+    int among_var;
+    z->ket = z->c; /* [, line 168 */
+    among_var = find_among_b(z, a_3, 2); /* substring, line 168 */
+    if (!(among_var)) return 0;
+    z->bra = z->c; /* ], line 168 */
+    switch(among_var) {
+        case 0: return 0;
+        case 1:
+            slice_del(z); /* delete, line 171 */
+            break;
+    }
+    return 1;
+}
+
+static int r_verb(struct SN_env * z) {
+    int among_var;
+    z->ket = z->c; /* [, line 176 */
+    among_var = find_among_b(z, a_4, 46); /* substring, line 176 */
+    if (!(among_var)) return 0;
+    z->bra = z->c; /* ], line 176 */
+    switch(among_var) {
+        case 0: return 0;
+        case 1:
+            {   int m = z->l - z->c; /* or, line 182 */
+                if (!(eq_s_b(z, 1, s_4))) goto lab1;
+                goto lab0;
+            lab1:
+                z->c = z->l - m;
+                if (!(eq_s_b(z, 1, s_5))) return 0;
+            }
+        lab0:
+            slice_del(z); /* delete, line 182 */
+            break;
+        case 2:
+            slice_del(z); /* delete, line 190 */
+            break;
+    }
+    return 1;
+}
+
+static int r_noun(struct SN_env * z) {
+    int among_var;
+    z->ket = z->c; /* [, line 199 */
+    among_var = find_among_b(z, a_5, 36); /* substring, line 199 */
+    if (!(among_var)) return 0;
+    z->bra = z->c; /* ], line 199 */
+    switch(among_var) {
+        case 0: return 0;
+        case 1:
+            slice_del(z); /* delete, line 206 */
+            break;
+    }
+    return 1;
+}
+
+static int r_derivational(struct SN_env * z) {
+    int among_var;
+    z->ket = z->c; /* [, line 215 */
+    among_var = find_among_b(z, a_6, 2); /* substring, line 215 */
+    if (!(among_var)) return 0;
+    z->bra = z->c; /* ], line 215 */
+    if (!r_R2(z)) return 0; /* call R2, line 215 */
+    switch(among_var) {
+        case 0: return 0;
+        case 1:
+            slice_del(z); /* delete, line 218 */
+            break;
+    }
+    return 1;
+}
+
+static int r_tidy_up(struct SN_env * z) {
+    int among_var;
+    z->ket = z->c; /* [, line 223 */
+    among_var = find_among_b(z, a_7, 4); /* substring, line 223 */
+    if (!(among_var)) return 0;
+    z->bra = z->c; /* ], line 223 */
+    switch(among_var) {
+        case 0: return 0;
+        case 1:
+            slice_del(z); /* delete, line 227 */
+            z->ket = z->c; /* [, line 228 */
+            if (!(eq_s_b(z, 1, s_6))) return 0;
+            z->bra = z->c; /* ], line 228 */
+            if (!(eq_s_b(z, 1, s_7))) return 0;
+            slice_del(z); /* delete, line 228 */
+            break;
+        case 2:
+            if (!(eq_s_b(z, 1, s_8))) return 0;
+            slice_del(z); /* delete, line 231 */
+            break;
+        case 3:
+            slice_del(z); /* delete, line 233 */
+            break;
+    }
+    return 1;
+}
+
+extern int russian_stem(struct SN_env * z) {
+    {   int c = z->c; /* do, line 240 */
+        if (!r_mark_regions(z)) goto lab0; /* call mark_regions, line 240 */
+    lab0:
+        z->c = c;
+    }
+    z->lb = z->c; z->c = z->l; /* backwards, line 241 */
+
+    {   int m = z->l - z->c; /* setlimit, line 241 */
+        int m3;
+        if (z->c < z->I[0]) return 0;
+        z->c = z->I[0]; /* tomark, line 241 */
+        m3 = z->lb; z->lb = z->c;
+        z->c = z->l - m;
+        {   int m = z->l - z->c; /* do, line 242 */
+            {   int m = z->l - z->c; /* or, line 243 */
+                if (!r_perfective_gerund(z)) goto lab3; /* call perfective_gerund, line 243 */
+                goto lab2;
+            lab3:
+                z->c = z->l - m;
+                {   int m = z->l - z->c; /* try, line 244 */
+                    if (!r_reflexive(z)) { z->c = z->l - m; goto lab4; } /* call reflexive, line 244 */
+                lab4:
+                    ;
+                }
+                {   int m = z->l - z->c; /* or, line 245 */
+                    if (!r_adjectival(z)) goto lab6; /* call adjectival, line 245 */
+                    goto lab5;
+                lab6:
+                    z->c = z->l - m;
+                    if (!r_verb(z)) goto lab7; /* call verb, line 245 */
+                    goto lab5;
+                lab7:
+                    z->c = z->l - m;
+                    if (!r_noun(z)) goto lab1; /* call noun, line 245 */
+                }
+            lab5:
+                ;
+            }
+        lab2:
+        lab1:
+            z->c = z->l - m;
+        }
+        {   int m = z->l - z->c; /* try, line 248 */
+            z->ket = z->c; /* [, line 248 */
+            if (!(eq_s_b(z, 1, s_9))) { z->c = z->l - m; goto lab8; }
+            z->bra = z->c; /* ], line 248 */
+            slice_del(z); /* delete, line 248 */
+        lab8:
+            ;
+        }
+        {   int m = z->l - z->c; /* do, line 251 */
+            if (!r_derivational(z)) goto lab9; /* call derivational, line 251 */
+        lab9:
+            z->c = z->l - m;
+        }
+        {   int m = z->l - z->c; /* do, line 252 */
+            if (!r_tidy_up(z)) goto lab10; /* call tidy_up, line 252 */
+        lab10:
+            z->c = z->l - m;
+        }
+        z->lb = m3;
+    }
+    z->c = z->lb;
+    return 1;
+}
+
+extern struct SN_env * russian_create_env(void) { return SN_create_env(0, 2, 0); }
+
+extern void russian_close_env(struct SN_env * z) { SN_close_env(z); }
+


diff --git a/contrib/tsearch2/snowball/russian_stem.h b/contrib/tsearch2/snowball/russian_stem.h

new file mode 100644 (file)

index 0000000..7dc26d4


--- /dev/null
+++ b/contrib/tsearch2/snowball/russian_stem.h
@@ -0,0 +1,8 @@
+
+/* This file was generated automatically by the Snowball to ANSI C compiler */
+
+extern struct SN_env * russian_create_env(void);
+extern void russian_close_env(struct SN_env * z);
+
+extern int russian_stem(struct SN_env * z);
+


diff --git a/contrib/tsearch2/snowball/utilities.c b/contrib/tsearch2/snowball/utilities.c

new file mode 100644 (file)

index 0000000..5dc7524


--- /dev/null
+++ b/contrib/tsearch2/snowball/utilities.c
@@ -0,0 +1,328 @@
+
+#include 
+#include 
+#include 
+
+#include "header.h"
+
+#define unless(C) if(!(C))
+
+#define CREATE_SIZE 1
+
+extern symbol * create_s(void)
+{   symbol * p = (symbol *) (HEAD + (char *) malloc(HEAD + (CREATE_SIZE + 1) * sizeof(symbol)));
+    CAPACITY(p) = CREATE_SIZE;
+    SET_SIZE(p, CREATE_SIZE);
+    return p;
+}
+
+extern void lose_s(symbol * p) { free((char *) p - HEAD); }
+
+extern int in_grouping(struct SN_env * z, unsigned char * s, int min, int max)
+{   if (z->c >= z->l) return 0;
+    {   int ch = z->p[z->c];
+        if
+        (ch > max || (ch -= min) < 0 ||
+         (s[ch >> 3] & (0X1 << (ch & 0X7))) == 0) return 0;
+    }
+    z->c++; return 1;
+}
+
+extern int in_grouping_b(struct SN_env * z, unsigned char * s, int min, int max)
+{   if (z->c <= z->lb) return 0;
+    {   int ch = z->p[z->c - 1];
+        if
+        (ch > max || (ch -= min) < 0 ||
+         (s[ch >> 3] & (0X1 << (ch & 0X7))) == 0) return 0;
+    }
+    z->c--; return 1;
+}
+
+extern int out_grouping(struct SN_env * z, unsigned char * s, int min, int max)
+{   if (z->c >= z->l) return 0;
+    {   int ch = z->p[z->c];
+        unless
+        (ch > max || (ch -= min) < 0 ||
+         (s[ch >> 3] & (0X1 << (ch & 0X7))) == 0) return 0;
+    }
+    z->c++; return 1;
+}
+
+extern int out_grouping_b(struct SN_env * z, unsigned char * s, int min, int max)
+{   if (z->c <= z->lb) return 0;
+    {   int ch = z->p[z->c - 1];
+        unless
+        (ch > max || (ch -= min) < 0 ||
+         (s[ch >> 3] & (0X1 << (ch & 0X7))) == 0) return 0;
+    }
+    z->c--; return 1;
+}
+
+
+extern int in_range(struct SN_env * z, int min, int max)
+{   if (z->c >= z->l) return 0;
+    {   int ch = z->p[z->c];
+        if
+        (ch > max || ch < min) return 0;
+    }
+    z->c++; return 1;
+}
+
+extern int in_range_b(struct SN_env * z, int min, int max)
+{   if (z->c <= z->lb) return 0;
+    {   int ch = z->p[z->c - 1];
+        if
+        (ch > max || ch < min) return 0;
+    }
+    z->c--; return 1;
+}
+
+extern int out_range(struct SN_env * z, int min, int max)
+{   if (z->c >= z->l) return 0;
+    {   int ch = z->p[z->c];
+        unless
+        (ch > max || ch < min) return 0;
+    }
+    z->c++; return 1;
+}
+
+extern int out_range_b(struct SN_env * z, int min, int max)
+{   if (z->c <= z->lb) return 0;
+    {   int ch = z->p[z->c - 1];
+        unless
+        (ch > max || ch < min) return 0;
+    }
+    z->c--; return 1;
+}
+
+extern int eq_s(struct SN_env * z, int s_size, symbol * s)
+{   if (z->l - z->c < s_size ||
+        memcmp(z->p + z->c, s, s_size * sizeof(symbol)) != 0) return 0;
+    z->c += s_size; return 1;
+}
+
+extern int eq_s_b(struct SN_env * z, int s_size, symbol * s)
+{   if (z->c - z->lb < s_size ||
+        memcmp(z->p + z->c - s_size, s, s_size * sizeof(symbol)) != 0) return 0;
+    z->c -= s_size; return 1;
+}
+
+extern int eq_v(struct SN_env * z, symbol * p)
+{   return eq_s(z, SIZE(p), p);
+}
+
+extern int eq_v_b(struct SN_env * z, symbol * p)
+{   return eq_s_b(z, SIZE(p), p);
+}
+
+extern int find_among(struct SN_env * z, struct among * v, int v_size)
+{
+    int i = 0;
+    int j = v_size;
+
+    int c = z->c; int l = z->l;
+    symbol * q = z->p + c;
+
+    struct among * w;
+
+    int common_i = 0;
+    int common_j = 0;
+
+    int first_key_inspected = 0;
+
+    while(1)
+    {   int k = i + ((j - i) >> 1);
+        int diff = 0;
+        int common = common_i < common_j ? common_i : common_j; /* smaller */
+        w = v + k;
+        {   int i; for (i = common; i < w->s_size; i++)
+            {   if (c + common == l) { diff = -1; break; }
+                diff = q[common] - w->s[i];
+                if (diff != 0) break;
+                common++;
+            }
+        }
+        if (diff < 0) { j = k; common_j = common; }
+                 else { i = k; common_i = common; }
+        if (j - i <= 1)
+        {   if (i > 0) break; /* v->s has been inspected */
+            if (j == i) break; /* only one item in v */
+
+            /* - but now we need to go round once more to get
+               v->s inspected. This looks messy, but is actually
+               the optimal approach.  */
+
+            if (first_key_inspected) break;
+            first_key_inspected = 1;
+        }
+    }
+    while(1)
+    {   w = v + i;
+        if (common_i >= w->s_size)
+        {   z->c = c + w->s_size;
+            if (w->function == 0) return w->result;
+            {   int res = w->function(z);
+                z->c = c + w->s_size;
+                if (res) return w->result;
+            }
+        }
+        i = w->substring_i;
+        if (i < 0) return 0;
+    }
+}
+
+/* find_among_b is for backwards processing. Same comments apply */
+
+extern int find_among_b(struct SN_env * z, struct among * v, int v_size)
+{
+    int i = 0;
+    int j = v_size;
+
+    int c = z->c; int lb = z->lb;
+    symbol * q = z->p + c - 1;
+
+    struct among * w;
+
+    int common_i = 0;
+    int common_j = 0;
+
+    int first_key_inspected = 0;
+
+    while(1)
+    {   int k = i + ((j - i) >> 1);
+        int diff = 0;
+        int common = common_i < common_j ? common_i : common_j;
+        w = v + k;
+        {   int i; for (i = w->s_size - 1 - common; i >= 0; i--)
+            {   if (c - common == lb) { diff = -1; break; }
+                diff = q[- common] - w->s[i];
+                if (diff != 0) break;
+                common++;
+            }
+        }
+        if (diff < 0) { j = k; common_j = common; }
+                 else { i = k; common_i = common; }
+        if (j - i <= 1)
+        {   if (i > 0) break;
+            if (j == i) break;
+            if (first_key_inspected) break;
+            first_key_inspected = 1;
+        }
+    }
+    while(1)
+    {   w = v + i;
+        if (common_i >= w->s_size)
+        {   z->c = c - w->s_size;
+            if (w->function == 0) return w->result;
+            {   int res = w->function(z);
+                z->c = c - w->s_size;
+                if (res) return w->result;
+            }
+        }
+        i = w->substring_i;
+        if (i < 0) return 0;
+    }
+}
+
+
+extern symbol * increase_size(symbol * p, int n)
+{   int new_size = n + 20;
+    symbol * q = (symbol *) (HEAD + (char *) malloc(HEAD + (new_size + 1) * sizeof(symbol)));
+    CAPACITY(q) = new_size;
+    memmove(q, p, CAPACITY(p) * sizeof(symbol)); lose_s(p); return q;
+}
+
+/* to replace symbols between c_bra and c_ket in z->p by the
+   s_size symbols at s
+*/
+
+extern int replace_s(struct SN_env * z, int c_bra, int c_ket, int s_size, const symbol * s)
+{   int adjustment = s_size - (c_ket - c_bra);
+    int len = SIZE(z->p);
+    if (adjustment != 0)
+    {   if (adjustment + len > CAPACITY(z->p)) z->p = increase_size(z->p, adjustment + len);
+        memmove(z->p + c_ket + adjustment, z->p + c_ket, (len - c_ket) * sizeof(symbol));
+        SET_SIZE(z->p, adjustment + len);
+        z->l += adjustment;
+        if (z->c >= c_ket) z->c += adjustment; else
+            if (z->c > c_bra) z->c = c_bra;
+    }
+    unless (s_size == 0) memmove(z->p + c_bra, s, s_size * sizeof(symbol));
+    return adjustment;
+}
+
+static void slice_check(struct SN_env * z)
+{
+    if (!(0 <= z->bra &&
+          z->bra <= z->ket &&
+          z->ket <= z->l &&
+          z->l <= SIZE(z->p)))   /* this line could be removed */
+    {
+        fprintf(stderr, "faulty slice operation:\n");
+        debug(z, -1, 0);
+        exit(1);
+    }
+}
+
+extern void slice_from_s(struct SN_env * z, int s_size, symbol * s)
+{   slice_check(z);
+    replace_s(z, z->bra, z->ket, s_size, s);
+}
+
+extern void slice_from_v(struct SN_env * z, symbol * p)
+{   slice_from_s(z, SIZE(p), p);
+}
+
+extern void slice_del(struct SN_env * z)
+{   slice_from_s(z, 0, 0);
+}
+
+extern void insert_s(struct SN_env * z, int bra, int ket, int s_size, symbol * s)
+{   int adjustment = replace_s(z, bra, ket, s_size, s);
+    if (bra <= z->bra) z->bra += adjustment;
+    if (bra <= z->ket) z->ket += adjustment;
+}
+
+extern void insert_v(struct SN_env * z, int bra, int ket, symbol * p)
+{   int adjustment = replace_s(z, bra, ket, SIZE(p), p);
+    if (bra <= z->bra) z->bra += adjustment;
+    if (bra <= z->ket) z->ket += adjustment;
+}
+
+extern symbol * slice_to(struct SN_env * z, symbol * p)
+{   slice_check(z);
+    {   int len = z->ket - z->bra;
+        if (CAPACITY(p) < len) p = increase_size(p, len);
+        memmove(p, z->p + z->bra, len * sizeof(symbol));
+        SET_SIZE(p, len);
+    }
+    return p;
+}
+
+extern symbol * assign_to(struct SN_env * z, symbol * p)
+{   int len = z->l;
+    if (CAPACITY(p) < len) p = increase_size(p, len);
+    memmove(p, z->p, len * sizeof(symbol));
+    SET_SIZE(p, len);
+    return p;
+}
+
+extern void debug(struct SN_env * z, int number, int line_count)
+{   int i;
+    int limit = SIZE(z->p);
+    /*if (number >= 0) printf("%3d (line %4d): '", number, line_count);*/
+    if (number >= 0) printf("%3d (line %4d): [%d]'", number, line_count,limit);
+    for (i = 0; i <= limit; i++)
+    {   if (z->lb == i) printf("{");
+        if (z->bra == i) printf("[");
+        if (z->c == i) printf("|");
+        if (z->ket == i) printf("]");
+        if (z->l == i) printf("}");
+        if (i < limit)
+        {   int ch = z->p[i];
+            if (ch == 0) ch = '#';
+            printf("%c", ch);
+        }
+    }
+    printf("'\n");
+}


diff --git a/contrib/tsearch2/sql/tsearch2.sql b/contrib/tsearch2/sql/tsearch2.sql

new file mode 100644 (file)

index 0000000..6ca6480


--- /dev/null
+++ b/contrib/tsearch2/sql/tsearch2.sql
@@ -0,0 +1,243 @@
+--
+-- first, define the datatype.  Turn off echoing so that expected file
+-- does not depend on contents of seg.sql.
+--
+\set ECHO none
+\i tsearch2.sql
+\set ECHO all
+
+--tsvector
+SELECT '1'::tsvector;
+SELECT '1 '::tsvector;
+SELECT ' 1'::tsvector;
+SELECT ' 1 '::tsvector;
+SELECT '1 2'::tsvector;
+SELECT '\'1 2\''::tsvector;
+SELECT '\'1 \\\'2\''::tsvector;
+SELECT '\'1 \\\'2\'3'::tsvector;
+SELECT '\'1 \\\'2\' 3'::tsvector;
+SELECT '\'1 \\\'2\' \' 3\' 4 '::tsvector;
+select '\'w\':4A,3B,2C,1D,5 a:8';
+select 'a:3A b:2a'::tsvector || 'ba:1234 a:1B';
+select setweight('w:12B w:13* w:12,5,6 a:1,3* a:3 w asd:1dc asd zxc:81,567,222A'::tsvector, 'c');
+select strip('w:12B w:13* w:12,5,6 a:1,3* a:3 w asd:1dc asd'::tsvector);
+
+
+--tsquery
+SELECT '1'::tsquery;
+SELECT '1 '::tsquery;
+SELECT ' 1'::tsquery;
+SELECT ' 1 '::tsquery;
+SELECT '\'1 2\''::tsquery;
+SELECT '\'1 \\\'2\''::tsquery;
+SELECT '!1'::tsquery;
+SELECT '1|2'::tsquery;
+SELECT '1|!2'::tsquery;
+SELECT '!1|2'::tsquery;
+SELECT '!1|!2'::tsquery;
+SELECT '!(!1|!2)'::tsquery;
+SELECT '!(!1|2)'::tsquery;
+SELECT '!(1|!2)'::tsquery;
+SELECT '!(1|2)'::tsquery;
+SELECT '1&2'::tsquery;
+SELECT '!1&2'::tsquery;
+SELECT '1&!2'::tsquery;
+SELECT '!1&!2'::tsquery;
+SELECT '(1&2)'::tsquery;
+SELECT '1&(2)'::tsquery;
+SELECT '!(1)&2'::tsquery;
+SELECT '!(1&2)'::tsquery;
+SELECT '1|2&3'::tsquery;
+SELECT '1|(2&3)'::tsquery;
+SELECT '(1|2)&3'::tsquery;
+SELECT '1|2&!3'::tsquery;
+SELECT '1|!2&3'::tsquery;
+SELECT '!1|2&3'::tsquery;
+SELECT '!1|(2&3)'::tsquery;
+SELECT '!(1|2)&3'::tsquery;
+SELECT '(!1|2)&3'::tsquery;
+SELECT '1|(2|(4|(5|6)))'::tsquery;
+SELECT '1|2|4|5|6'::tsquery;
+SELECT '1&(2&(4&(5&6)))'::tsquery;
+SELECT '1&2&4&5&6'::tsquery;
+SELECT '1&(2&(4&(5|6)))'::tsquery;
+SELECT '1&(2&(4&(5|!6)))'::tsquery;
+SELECT '1&(\'2\'&(\' 4\'&(\\|5 | \'6 \\\' !|&\')))'::tsquery;
+SELECT '\'the wether\':dc & \' sKies \':BC & a:d b:a';
+
+select lexize('simple', 'ASD56 hsdkf');
+select lexize('en_stem', 'SKIES Problems identity');
+
+select * from token_type('default');
+select * from parse('default', '345 [email protected] \' http://www.com/ http://aew.werc.ewr/?ad=qwe&dw 1aew.werc.ewr/?ad=qwe&dw 2aew.werc.ewr http://3aew.werc.ewr/?ad=qwe&dw http://4aew.werc.ewr http://5aew.werc.ewr:8100/?  ad=qwe&dw 6aew.werc.ewr:8100/?ad=qwe&dw 7aew.werc.ewr:8100/?ad=qwe&dw=%20%32 +4.0e-10 qwe qwe qwqwe 234.435 455 5.005 [email protected] qwe-wer asdf qwer jf sdjk ewr1> ewri2 ">
+/usr/local/fff /awdf/dwqe/4325 rewt/ewr wefjn /wqe-324/ewr gist.h gist.h.c gist.c. readline 4.2 4.2. 4.2, readline-4.2 readline-4.2. 234 
+ wow  < jqw <> qwerty');
+
+SELECT to_tsvector('default', '345 [email protected] \' http://www.com/ http://aew.werc.ewr/?ad=qwe&dw 1aew.werc.ewr/?ad=qwe&dw 2aew.werc.ewr http://3aew.werc.ewr/?ad=qwe&dw http://4aew.werc.ewr http://5aew.werc.ewr:8100/?  ad=qwe&dw 6aew.werc.ewr:8100/?ad=qwe&dw 7aew.werc.ewr:8100/?ad=qwe&dw=%20%32 +4.0e-10 qwe qwe qwqwe 234.435 455 5.005 [email protected] qwe-wer asdf qwer jf sdjk ewr1> ewri2 ">
+/usr/local/fff /awdf/dwqe/4325 rewt/ewr wefjn /wqe-324/ewr gist.h gist.h.c gist.c. readline 4.2 4.2. 4.2, readline-4.2 readline-4.2. 234 
+ wow  < jqw <> qwerty');
+
+SELECT length(to_tsvector('default', '345 qw'));
+
+SELECT length(to_tsvector('default', '345 [email protected] \' http://www.com/ http://aew.werc.ewr/?ad=qwe&dw 1aew.werc.ewr/?ad=qwe&dw 2aew.werc.ewr http://3aew.werc.ewr/?ad=qwe&dw http://4aew.werc.ewr http://5aew.werc.ewr:8100/?  ad=qwe&dw 6aew.werc.ewr:8100/?ad=qwe&dw 7aew.werc.ewr:8100/?ad=qwe&dw=%20%32 +4.0e-10 qwe qwe qwqwe 234.435 455 5.005 [email protected] qwe-wer asdf qwer jf sdjk ewr1> ewri2 ">
+/usr/local/fff /awdf/dwqe/4325 rewt/ewr wefjn /wqe-324/ewr gist.h gist.h.c gist.c. readline 4.2 4.2. 4.2, readline-4.2 readline-4.2. 234 
+ wow  < jqw <> qwerty'));
+
+
+select to_tsquery('default', 'qwe & sKies '); 
+select to_tsquery('simple', 'qwe & sKies '); 
+select to_tsquery('default', '\'the wether\':dc & \'           sKies \':BC ');
+select 'a b:89  ca:23A,64b d:34c'::tsvector @@ 'd:AC & ca';
+select 'a b:89  ca:23A,64b d:34c'::tsvector @@ 'd:AC & ca:B';
+select 'a b:89  ca:23A,64b d:34c'::tsvector @@ 'd:AC & ca:A';
+select 'a b:89  ca:23A,64b d:34c'::tsvector @@ 'd:AC & ca:C';
+select 'a b:89  ca:23A,64b d:34c'::tsvector @@ 'd:AC & ca:CB';
+
+CREATE TABLE test_tsvector( t text, a tsvector );
+
+\copy test_tsvector from 'data/test_tsearch.data'
+
+SELECT count(*) FROM test_tsvector WHERE a @@ 'wr|qh';
+SELECT count(*) FROM test_tsvector WHERE a @@ 'wr&qh';
+SELECT count(*) FROM test_tsvector WHERE a @@ 'eq&yt';
+SELECT count(*) FROM test_tsvector WHERE a @@ 'eq|yt';
+SELECT count(*) FROM test_tsvector WHERE a @@ '(eq&yt)|(wr&qh)';
+SELECT count(*) FROM test_tsvector WHERE a @@ '(eq|yt)&(wr|qh)';
+
+create index wowidx on test_tsvector using gist (a);
+set enable_seqscan=off;
+
+SELECT count(*) FROM test_tsvector WHERE a @@ 'wr|qh';
+SELECT count(*) FROM test_tsvector WHERE a @@ 'wr&qh';
+SELECT count(*) FROM test_tsvector WHERE a @@ 'eq&yt';
+SELECT count(*) FROM test_tsvector WHERE a @@ 'eq|yt';
+SELECT count(*) FROM test_tsvector WHERE a @@ '(eq&yt)|(wr&qh)';
+SELECT count(*) FROM test_tsvector WHERE a @@ '(eq|yt)&(wr|qh)';
+
+select set_curcfg('default');
+
+CREATE TRIGGER tsvectorupdate
+BEFORE UPDATE OR INSERT ON test_tsvector
+FOR EACH ROW EXECUTE PROCEDURE tsearch2(a, t);
+
+SELECT count(*) FROM test_tsvector WHERE a @@ to_tsquery('345&qwerty');
+
+INSERT INTO test_tsvector (t) VALUES ('345 qwerty');
+
+SELECT count(*) FROM test_tsvector WHERE a @@ to_tsquery('345&qwerty');
+
+UPDATE test_tsvector SET t = null WHERE t = '345 qwerty';
+
+SELECT count(*) FROM test_tsvector WHERE a @@ to_tsquery('345&qwerty');
+
+drop trigger tsvectorupdate on test_tsvector;
+create function wow(text) returns text as 'select $1 || \' copyright\'; ' language sql;
+create trigger tsvectorupdate before update or insert on test_tsvector
+for each row execute procedure tsearch2(a, wow, t);
+insert into test_tsvector (t) values ('345 qwerty');
+select count(*) FROM test_tsvector WHERE a @@ to_tsquery('345&qwerty');
+select count(*) FROM test_tsvector WHERE a @@ to_tsquery('copyright');
+
+select rank(' a:1 s:2C d g'::tsvector, 'a | s');
+select rank(' a:1 s:2B d g'::tsvector, 'a | s');
+select rank(' a:1 s:2 d g'::tsvector, 'a | s');
+select rank(' a:1 s:2C d g'::tsvector, 'a & s');
+select rank(' a:1 s:2B d g'::tsvector, 'a & s');
+select rank(' a:1 s:2 d g'::tsvector, 'a & s');
+
+insert into test_tsvector (t) values ('foo bar foo the over foo qq bar');
+select * from stat('select a from test_tsvector') order by ndoc desc, nentry desc, word;
+
+select reset_tsearch();
+select to_tsquery('default', 'skies & books');
+
+select rank_cd(to_tsvector('Erosion It took the sea a thousand years,
+A thousand years to trace
+The granite features of this cliff
+In crag and scarp and base.
+It took the sea an hour one night
+An hour of storm to place
+The sculpture of these granite seams,
+Upon a woman s face. E.  J.  Pratt  (1882 1964)
+'), to_tsquery('sea&thousand&years'));
+
+select rank_cd(to_tsvector('Erosion It took the sea a thousand years,
+A thousand years to trace
+The granite features of this cliff
+In crag and scarp and base.
+It took the sea an hour one night
+An hour of storm to place
+The sculpture of these granite seams,
+Upon a woman s face. E.  J.  Pratt  (1882 1964)
+'), to_tsquery('granite&sea'));
+
+select rank_cd(to_tsvector('Erosion It took the sea a thousand years,
+A thousand years to trace
+The granite features of this cliff
+In crag and scarp and base.
+It took the sea an hour one night
+An hour of storm to place
+The sculpture of these granite seams,
+Upon a woman s face. E.  J.  Pratt  (1882 1964)
+'), to_tsquery('sea'));
+
+select get_covers(to_tsvector('Erosion It took the sea a thousand years,
+A thousand years to trace
+The granite features of this cliff
+In crag and scarp and base.
+It took the sea an hour one night
+An hour of storm to place
+The sculpture of these granite seams,
+Upon a woman s face. E.  J.  Pratt  (1882 1964)
+'), to_tsquery('sea&thousand&years'));
+
+select get_covers(to_tsvector('Erosion It took the sea a thousand years,
+A thousand years to trace
+The granite features of this cliff
+In crag and scarp and base.
+It took the sea an hour one night
+An hour of storm to place
+The sculpture of these granite seams,
+Upon a woman s face. E.  J.  Pratt  (1882 1964)
+'), to_tsquery('granite&sea'));
+
+select get_covers(to_tsvector('Erosion It took the sea a thousand years,
+A thousand years to trace
+The granite features of this cliff
+In crag and scarp and base.
+It took the sea an hour one night
+An hour of storm to place
+The sculpture of these granite seams,
+Upon a woman s face. E.  J.  Pratt  (1882 1964)
+'), to_tsquery('sea'));
+
+select headline('Erosion It took the sea a thousand years,
+A thousand years to trace
+The granite features of this cliff
+In crag and scarp and base.
+It took the sea an hour one night
+An hour of storm to place
+The sculpture of these granite seams,
+Upon a woman s face. E.  J.  Pratt  (1882 1964)
+', to_tsquery('sea&thousand&years'));
+ 
+select headline('Erosion It took the sea a thousand years,
+A thousand years to trace
+The granite features of this cliff
+In crag and scarp and base.
+It took the sea an hour one night
+An hour of storm to place
+The sculpture of these granite seams,
+Upon a woman s face. E.  J.  Pratt  (1882 1964)
+', to_tsquery('granite&sea'));
+ 
+select headline('Erosion It took the sea a thousand years,
+A thousand years to trace
+The granite features of this cliff
+In crag and scarp and base.
+It took the sea an hour one night
+An hour of storm to place
+The sculpture of these granite seams,
+Upon a woman s face. E.  J.  Pratt  (1882 1964)
+', to_tsquery('sea'));
+


diff --git a/contrib/tsearch2/stopword.c b/contrib/tsearch2/stopword.c

new file mode 100644 (file)

index 0000000..7f7806f


--- /dev/null
+++ b/contrib/tsearch2/stopword.c
@@ -0,0 +1,101 @@
+/* 
+ * stopword library
+ * Teodor Sigaev 
+ */
+#include 
+#include 
+#include 
+#include 
+
+#include "postgres.h"
+#include "common.h"
+#include "dict.h"
+
+#define STOPBUFLEN 4096
+
+char*
+lowerstr(char *str) {
+   char *ptr=str;
+   while(*ptr) {
+       *ptr = tolower(*(unsigned char*)ptr);
+       ptr++;
+   }
+   return str;
+}
+
+void
+freestoplist(StopList *s) {
+   char **ptr=s->stop;
+   if ( ptr )
+       while( *ptr && s->len >0 ) {
+           free(*ptr);
+           ptr++; s->len--;
+       free(s->stop);
+   }
+   memset(s,0,sizeof(StopList));
+}
+
+void
+readstoplist(text *in, StopList *s) {
+   char **stop=NULL;
+   s->len=0;
+   if ( in && VARSIZE(in) - VARHDRSZ > 0 ) {
+       char *filename=text2char(in);
+       FILE    *hin=NULL;
+       char    buf[STOPBUFLEN];
+       int reallen=0;
+
+       if ( (hin=fopen(filename,"r")) == NULL )
+           elog(ERROR,"Can't open file '%s': %s", filename, strerror(errno));
+       while( fgets(buf,STOPBUFLEN,hin) ) {
+           buf[strlen(buf)-1] = '\0';
+           if ( *buf=='\0' ) continue;
+
+           if ( s->len>= reallen ) {
+               char **tmp;
+               reallen=(reallen) ? reallen*2 : 16;
+               tmp=(char**)realloc((void*)stop, sizeof(char*)*reallen);
+               if (!tmp) {
+                   freestoplist(s);
+                   fclose(hin); 
+                   elog(ERROR,"Not enough memory");
+               }
+               stop=tmp;
+           }
+    
+           stop[s->len]=strdup(buf);
+           if ( !stop[s->len] ) {
+               freestoplist(s);
+               fclose(hin); 
+               elog(ERROR,"Not enough memory");
+           }
+           if ( s->wordop ) 
+               stop[s->len]=(s->wordop)(stop[s->len]);
+
+           (s->len)++; 
+       }
+       fclose(hin);
+       pfree(filename); 
+   }
+   s->stop=stop;
+} 
+
+static int
+comparestr(const void *a, const void *b) {
+   return strcmp( *(char**)a, *(char**)b );
+}
+
+void
+sortstoplist(StopList *s) {
+   if (s->stop && s->len>0)
+       qsort(s->stop, s->len, sizeof(char*), comparestr);
+}
+
+bool
+searchstoplist(StopList *s, char *key) {
+   if ( s->wordop ) 
+       key=(*(s->wordop))(key);
+   return ( s->stop && s->len>0 && bsearch(&key, s->stop, s->len, sizeof(char*), comparestr) ) ? true : false;
+}
+
+


diff --git a/contrib/tsearch2/stopword/english.stop b/contrib/tsearch2/stopword/english.stop

new file mode 100644 (file)

index 0000000..a913011


--- /dev/null
+++ b/contrib/tsearch2/stopword/english.stop
@@ -0,0 +1,128 @@
+i
+me
+my
+myself
+we
+our
+ours
+ourselves
+you
+your
+yours
+yourself
+yourselves
+he
+him
+his
+himself
+she
+her
+hers
+herself
+it
+its
+itself
+they
+them
+their
+theirs
+themselves
+what
+which
+who
+whom
+this
+that
+these
+those
+am
+is
+are
+was
+were
+be
+been
+being
+have
+has
+had
+having
+do
+does
+did
+doing
+a
+an
+the
+and
+but
+if
+or
+because
+as
+until
+while
+of
+at
+by
+for
+with
+about
+against
+between
+into
+through
+during
+before
+after
+above
+below
+to
+from
+up
+down
+in
+out
+on
+off
+over
+under
+again
+further
+then
+once
+here
+there
+when
+where
+why
+how
+all
+any
+both
+each
+few
+more
+most
+other
+some
+such
+no
+nor
+not
+only
+own
+same
+so
+than
+too
+very
+s
+t
+can
+will
+just
+don
+should
+now
+


diff --git a/contrib/tsearch2/stopword/russian.stop b/contrib/tsearch2/stopword/russian.stop

new file mode 100644 (file)

index 0000000..1877e3a


--- /dev/null
+++ b/contrib/tsearch2/stopword/russian.stop
@@ -0,0 +1,151 @@
+É
+×
+×Ï
+ÎÅ
+ÞÔÏ
+ÏÎ
+ÎÁ
+Ñ
+Ó
+ÓÏ
+ËÁË
+Á
+ÔÏ
+×ÓÅ
+ÏÎÁ
+ÔÁË
+ÅÇÏ
+ÎÏ
+ÄÁ
+ÔÙ
+Ë
+Õ
+ÖÅ
+×Ù
+ÚÁ
+ÂÙ
+ÐÏ
+ÔÏÌØËÏ
+ÅÅ
+ÍÎÅ
+ÂÙÌÏ
+×ÏÔ
+ÏÔ
+ÍÅÎÑ
+ÅÝÅ
+ÎÅÔ
+Ï
+ÉÚ
+ÅÍÕ
+ÔÅÐÅÒØ
+ËÏÇÄÁ
+ÄÁÖÅ
+ÎÕ
+×ÄÒÕÇ
+ÌÉ
+ÅÓÌÉ
+ÕÖÅ
+ÉÌÉ
+ÎÉ
+ÂÙÔØ
+ÂÙÌ
+ÎÅÇÏ
+ÄÏ
+×ÁÓ
+ÎÉÂÕÄØ
+ÏÐÑÔØ
+ÕÖ
+×ÁÍ
+×ÅÄØ
+ÔÁÍ
+ÐÏÔÏÍ
+ÓÅÂÑ
+ÎÉÞÅÇÏ
+ÅÊ
+ÍÏÖÅÔ
+ÏÎÉ
+ÔÕÔ
+ÇÄÅ
+ÅÓÔØ
+ÎÁÄÏ
+ÎÅÊ
+ÄÌÑ
+ÍÙ
+ÔÅÂÑ
+ÉÈ
+ÞÅÍ
+ÂÙÌÁ
+ÓÁÍ
+ÞÔÏÂ
+ÂÅÚ
+ÂÕÄÔÏ
+ÞÅÇÏ
+ÒÁÚ
+ÔÏÖÅ
+ÓÅÂÅ
+ÐÏÄ
+ÂÕÄÅÔ
+Ö
+ÔÏÇÄÁ
+ËÔÏ
+ÜÔÏÔ
+ÔÏÇÏ
+ÐÏÔÏÍÕ
+ÜÔÏÇÏ
+ËÁËÏÊ
+ÓÏ×ÓÅÍ
+ÎÉÍ
+ÚÄÅÓØ
+ÜÔÏÍ
+ÏÄÉÎ
+ÐÏÞÔÉ
+ÍÏÊ
+ÔÅÍ
+ÞÔÏÂÙ
+ÎÅÅ
+ÓÅÊÞÁÓ
+ÂÙÌÉ
+ËÕÄÁ
+ÚÁÞÅÍ
+×ÓÅÈ
+ÎÉËÏÇÄÁ
+ÍÏÖÎÏ
+ÐÒÉ
+ÎÁËÏÎÅÃ
+Ä×Á
+ÏÂ
+ÄÒÕÇÏÊ
+ÈÏÔØ
+ÐÏÓÌÅ
+ÎÁÄ
+ÂÏÌØÛÅ
+ÔÏÔ
+ÞÅÒÅÚ
+ÜÔÉ
+ÎÁÓ
+ÐÒÏ
+×ÓÅÇÏ
+ÎÉÈ
+ËÁËÁÑ
+ÍÎÏÇÏ
+ÒÁÚ×Å
+ÔÒÉ
+ÜÔÕ
+ÍÏÑ
+×ÐÒÏÞÅÍ
+ÈÏÒÏÛÏ
+Ó×ÏÀ
+ÜÔÏÊ
+ÐÅÒÅÄ
+ÉÎÏÇÄÁ
+ÌÕÞÛÅ
+ÞÕÔØ
+ÔÏÍ
+ÎÅÌØÚÑ
+ÔÁËÏÊ
+ÉÍ
+ÂÏÌÅÅ
+×ÓÅÇÄÁ
+ËÏÎÅÞÎÏ
+×ÓÀ
+ÍÅÖÄÕ


diff --git a/contrib/tsearch2/ts_cfg.c b/contrib/tsearch2/ts_cfg.c

new file mode 100644 (file)

index 0000000..7c9f20c


--- /dev/null
+++ b/contrib/tsearch2/ts_cfg.c
@@ -0,0 +1,509 @@
+/* 
+ * interface functions to tscfg 
+ * Teodor Sigaev 
+ */
+#include 
+#include 
+#include 
+#include 
+#include 
+
+#include "postgres.h"
+#include "fmgr.h"
+#include "utils/array.h"
+#include "catalog/pg_type.h"
+#include "executor/spi.h"
+
+#include "ts_cfg.h"
+#include "dict.h"
+#include "wparser.h"
+#include "snmap.h"
+#include "common.h"
+#include "tsvector.h"
+
+/*********top interface**********/
+
+static void *plan_getcfg_bylocale=NULL;
+static void *plan_getcfg=NULL;
+static void *plan_getmap=NULL;
+static void *plan_name2id=NULL;
+static Oid current_cfg_id=0;
+
+void
+init_cfg(Oid id, TSCfgInfo *cfg) {
+   Oid arg[2]={ OIDOID, OIDOID };
+   bool isnull;
+   Datum pars[2]={ ObjectIdGetDatum(id), ObjectIdGetDatum(id) } ;
+   int stat,i,j;
+   text *ptr;
+   text *prsname=NULL;
+   MemoryContext   oldcontext;
+
+   memset(cfg,0,sizeof(TSCfgInfo));
+   SPI_connect();
+   if ( !plan_getcfg ) {
+       plan_getcfg = SPI_saveplan( SPI_prepare( "select prs_name from pg_ts_cfg where oid = $1" , 1, arg ) );
+       if ( !plan_getcfg ) 
+           ts_error(ERROR, "SPI_prepare() failed");
+   }
+
+   stat = SPI_execp(plan_getcfg, pars, " ", 1);
+   if ( stat < 0 )
+       ts_error (ERROR, "SPI_execp return %d", stat);
+   if ( SPI_processed > 0 ) {
+       prsname = (text*) DatumGetPointer( 
+           SPI_getbinval(SPI_tuptable->vals[0], SPI_tuptable->tupdesc, 1, &isnull) 
+       );
+       oldcontext = MemoryContextSwitchTo(TopMemoryContext);
+       prsname = ptextdup( prsname );
+       MemoryContextSwitchTo(oldcontext);
+       
+       cfg->id=id;
+   } else 
+       ts_error(ERROR, "No tsearch cfg with id %d", id);
+
+   arg[0]=TEXTOID;
+   if ( !plan_getmap ) {
+       plan_getmap = SPI_saveplan( SPI_prepare( "select lt.tokid, pg_ts_cfgmap.dict_name from pg_ts_cfgmap, pg_ts_cfg, token_type( $1 ) as lt where lt.alias = pg_ts_cfgmap.tok_alias and pg_ts_cfgmap.ts_name = pg_ts_cfg.ts_name and pg_ts_cfg.oid= $2 order by lt.tokid desc;" , 2, arg ) );
+       if ( !plan_getmap )
+           ts_error(ERROR, "SPI_prepare() failed");
+   }
+
+   pars[0]=PointerGetDatum( prsname );
+   stat = SPI_execp(plan_getmap, pars, " ", 0);
+   if ( stat < 0 )
+       ts_error (ERROR, "SPI_execp return %d", stat);
+   if ( SPI_processed <= 0 )
+       ts_error(ERROR, "No parser with id %d", id);
+
+   for(i=0;i
+       int lexid = DatumGetInt32(SPI_getbinval(SPI_tuptable->vals[i], SPI_tuptable->tupdesc, 1, &isnull));
+       ArrayType *toasted_a = (ArrayType*)PointerGetDatum(SPI_getbinval(SPI_tuptable->vals[i], SPI_tuptable->tupdesc, 2, &isnull));
+       ArrayType *a;
+
+       if ( !cfg->map ) {
+           cfg->len=lexid+1;
+           cfg->map = (ListDictionary*)malloc( sizeof(ListDictionary)*cfg->len );
+           if ( !cfg->map )
+               ts_error(ERROR,"No memory");
+           memset( cfg->map, 0, sizeof(ListDictionary)*cfg->len );
+       }
+
+       if (isnull)
+           continue;
+
+       a=(ArrayType*)PointerGetDatum( PG_DETOAST_DATUM( DatumGetPointer(toasted_a) ) );
+       
+       if ( ARR_NDIM(a) != 1 )
+           ts_error(ERROR,"Wrong dimension");
+       if ( ARRNELEMS(a) < 1 )
+           continue;
+
+       cfg->map[lexid].len=ARRNELEMS(a);
+       cfg->map[lexid].dict_id=(Datum*)malloc( sizeof(Datum)*cfg->map[lexid].len );
+       memset(cfg->map[lexid].dict_id,0,sizeof(Datum)*cfg->map[lexid].len );
+       ptr=(text*)ARR_DATA_PTR(a);
+       oldcontext = MemoryContextSwitchTo(TopMemoryContext);
+       for(j=0;jmap[lexid].len;j++) {
+           cfg->map[lexid].dict_id[j] = PointerGetDatum(ptextdup(ptr));
+           ptr=NEXTVAL(ptr);
+       } 
+       MemoryContextSwitchTo(oldcontext);
+
+       if ( a != toasted_a ) 
+           pfree(a);
+   }
+   
+   SPI_finish();
+   cfg->prs_id = name2id_prs( prsname );
+   pfree(prsname);
+   for(i=0;ilen;i++) {
+       for(j=0;jmap[i].len;j++) {
+           ptr = (text*)DatumGetPointer( cfg->map[i].dict_id[j] );
+           cfg->map[i].dict_id[j] = ObjectIdGetDatum( name2id_dict(ptr) );
+           pfree(ptr);
+       }
+   }
+}
+
+typedef struct {
+   TSCfgInfo   *last_cfg;
+   int     len;
+   int     reallen;
+   TSCfgInfo   *list;
+   SNMap       name2id_map;
+} CFGList;
+
+static CFGList CList = {NULL,0,0,NULL,{0,0,NULL}};
+
+void
+reset_cfg(void) {
+        freeSNMap( &(CList.name2id_map) );
+        if ( CList.list ) {
+       int i,j;
+       for(i=0;i
+           if ( CList.list[i].map ) {
+               for(j=0;j
+                   if ( CList.list[i].map[j].dict_id )
+                       free(CList.list[i].map[j].dict_id);
+               free( CList.list[i].map );
+           }
+                free(CList.list);
+   }
+        memset(&CList,0,sizeof(CFGList));
+}
+
+static int
+comparecfg(const void *a, const void *b) {
+   return ((TSCfgInfo*)a)->id - ((TSCfgInfo*)b)->id;
+}
+
+TSCfgInfo *
+findcfg(Oid id) {
+   /* last used cfg */
+   if ( CList.last_cfg && CList.last_cfg->id==id )
+       return CList.last_cfg;
+
+   /* already used cfg */
+   if ( CList.len != 0 ) {
+       TSCfgInfo key;
+       key.id=id;
+       CList.last_cfg = bsearch(&key, CList.list, CList.len, sizeof(TSCfgInfo), comparecfg);
+       if ( CList.last_cfg != NULL )
+           return CList.last_cfg;
+   }
+
+   /* last chance */
+   if ( CList.len==CList.reallen ) {
+       TSCfgInfo *tmp;
+       int reallen = ( CList.reallen ) ? 2*CList.reallen : 16;
+       tmp=(TSCfgInfo*)realloc(CList.list,sizeof(TSCfgInfo)*reallen);
+       if ( !tmp ) 
+           ts_error(ERROR,"No memory");
+       CList.reallen=reallen;
+       CList.list=tmp;
+   }
+   CList.last_cfg=&(CList.list[CList.len]);
+   init_cfg(id, CList.last_cfg);
+   CList.len++;
+   qsort(CList.list, CList.len, sizeof(TSCfgInfo), comparecfg);
+   return findcfg(id); /* qsort changed order!! */;
+}
+
+
+Oid
+name2id_cfg(text *name) {
+   Oid arg[1]={ TEXTOID };
+   bool isnull;
+   Datum pars[1]={ PointerGetDatum(name) };
+   int stat;
+   Oid id=findSNMap_t( &(CList.name2id_map), name );
+   
+   if ( id ) 
+       return id;
+   
+   SPI_connect();
+   if ( !plan_name2id ) {
+       plan_name2id = SPI_saveplan( SPI_prepare( "select oid from pg_ts_cfg where ts_name = $1" , 1, arg ) );
+       if ( !plan_name2id ) 
+           elog(ERROR, "SPI_prepare() failed");
+   }
+
+   stat = SPI_execp(plan_name2id, pars, " ", 1);
+   if ( stat < 0 )
+       elog (ERROR, "SPI_execp return %d", stat);
+   if ( SPI_processed > 0 ) {
+       id=DatumGetObjectId( SPI_getbinval(SPI_tuptable->vals[0], SPI_tuptable->tupdesc, 1, &isnull) );
+       if ( isnull ) 
+           elog(ERROR, "Null id for tsearch config");
+   } else 
+       elog(ERROR, "No tsearch config");
+   SPI_finish();
+   addSNMap_t( &(CList.name2id_map), name, id );
+   return id;
+}
+
+
+void 
+parsetext_v2(TSCfgInfo *cfg, PRSTEXT * prs, char *buf, int4 buflen) {
+   int type, lenlemm, i;
+   char    *lemm=NULL;
+   WParserInfo *prsobj = findprs(cfg->prs_id);
+
+   prsobj->prs=(void*)DatumGetPointer(
+       FunctionCall2(
+           &(prsobj->start_info),
+           PointerGetDatum(buf),
+           Int32GetDatum(buflen)
+       )
+   );
+
+   while( ( type=DatumGetInt32(FunctionCall3(
+           &(prsobj->getlexeme_info),
+           PointerGetDatum(prsobj->prs),
+           PointerGetDatum(&lemm),
+           PointerGetDatum(&lenlemm))) ) != 0 ) {
+
+       if ( lenlemm >= MAXSTRLEN )
+           elog(ERROR, "Word is too long");
+
+
+       if ( type >= cfg->len ) /* skip this type of lexem */
+           continue; 
+
+       for(i=0;imap[type].len;i++) {
+           DictInfo    *dict=finddict( DatumGetObjectId(cfg->map[type].dict_id[i]) );
+           char    **norms, **ptr;
+   
+           norms = ptr = (char**)DatumGetPointer(
+               FunctionCall3(
+                   &(dict->lexize_info),
+                   PointerGetDatum(dict->dictionary),
+                   PointerGetDatum(lemm),
+                   PointerGetDatum(lenlemm)
+               )
+           );
+           if ( !norms ) /* dictionary doesn't know this lexem */
+               continue;
+
+           prs->pos++; /*set pos*/
+
+           while( *ptr ) {
+               if (prs->curwords == prs->lenwords) {
+                   prs->lenwords *= 2;
+                   prs->words = (WORD *) repalloc((void *) prs->words, prs->lenwords * sizeof(WORD));
+               }
+
+               prs->words[prs->curwords].len = strlen(*ptr);
+               prs->words[prs->curwords].word = *ptr;
+               prs->words[prs->curwords].alen = 0;
+               prs->words[prs->curwords].pos.pos = LIMITPOS(prs->pos);
+               ptr++;
+               prs->curwords++;
+           }
+           pfree(norms);
+           break; /* lexem already normalized or is stop word*/
+       }
+   }
+
+   FunctionCall1(
+       &(prsobj->end_info),
+       PointerGetDatum(prsobj->prs)
+   );
+}
+
+static void
+hladdword(HLPRSTEXT * prs, char *buf, int4 buflen, int type) {
+   while (prs->curwords >= prs->lenwords) {
+       prs->lenwords *= 2;
+       prs->words = (HLWORD *) repalloc((void *) prs->words, prs->lenwords * sizeof(HLWORD));
+   }
+   memset( &(prs->words[prs->curwords]), 0, sizeof(HLWORD) ); 
+   prs->words[prs->curwords].type = (uint8)type;
+   prs->words[prs->curwords].len = buflen; 
+   prs->words[prs->curwords].word = palloc(buflen);
+   memcpy(prs->words[prs->curwords].word, buf, buflen);
+   prs->curwords++;    
+}
+
+static void
+hlfinditem(HLPRSTEXT * prs, QUERYTYPE *query, char *buf, int buflen ) {
+   int i;
+   ITEM    *item=GETQUERY(query);
+   HLWORD  *word=&( prs->words[prs->curwords-1] );
+
+   while (prs->curwords + query->size >= prs->lenwords) {
+       prs->lenwords *= 2;
+       prs->words = (HLWORD *) repalloc((void *) prs->words, prs->lenwords * sizeof(HLWORD));
+   }
+
+   for(i=0; isize; i++) { 
+       if ( item->type == VAL && item->length == buflen && strncmp( GETOPERAND(query) + item->distance, buf, buflen )==0 ) {
+           if ( word->item ) {
+               memcpy( &(prs->words[prs->curwords]), word, sizeof(HLWORD) );
+               prs->words[prs->curwords].item=item;
+               prs->words[prs->curwords].repeated=1;
+               prs->curwords++;
+           } else 
+               word->item=item;    
+       }
+       item++;
+   }
+}
+
+void 
+hlparsetext(TSCfgInfo *cfg, HLPRSTEXT * prs, QUERYTYPE *query, char *buf, int4 buflen) {
+   int type, lenlemm, i;
+   char    *lemm=NULL;
+   WParserInfo *prsobj = findprs(cfg->prs_id);
+
+   prsobj->prs=(void*)DatumGetPointer(
+       FunctionCall2(
+           &(prsobj->start_info),
+           PointerGetDatum(buf),
+           Int32GetDatum(buflen)
+       )
+   );
+
+   while( ( type=DatumGetInt32(FunctionCall3(
+           &(prsobj->getlexeme_info),
+           PointerGetDatum(prsobj->prs),
+           PointerGetDatum(&lemm),
+           PointerGetDatum(&lenlemm))) ) != 0 ) {
+
+       if ( lenlemm >= MAXSTRLEN )
+           elog(ERROR, "Word is too long");
+
+       hladdword(prs,lemm,lenlemm,type);
+
+       if ( type >= cfg->len ) 
+           continue; 
+
+       for(i=0;imap[type].len;i++) {
+           DictInfo    *dict=finddict( DatumGetObjectId(cfg->map[type].dict_id[i]) );
+           char    **norms, **ptr;
+   
+           norms = ptr = (char**)DatumGetPointer(
+               FunctionCall3(
+                   &(dict->lexize_info),
+                   PointerGetDatum(dict->dictionary),
+                   PointerGetDatum(lemm),
+                   PointerGetDatum(lenlemm)
+               )
+           );
+           if ( !norms ) /* dictionary doesn't know this lexem */
+               continue;
+
+           while( *ptr ) {
+               hlfinditem(prs,query,*ptr,strlen(*ptr));
+               pfree(*ptr);
+               ptr++;
+           }
+           pfree(norms);
+           break; /* lexem already normalized or is stop word*/
+       }
+   }
+
+   FunctionCall1(
+       &(prsobj->end_info),
+       PointerGetDatum(prsobj->prs)
+   );
+}
+
+text* 
+genhl(HLPRSTEXT * prs) {
+   text *out;
+   int len=128;
+   char *ptr;
+   HLWORD  *wrd=prs->words;
+
+   out = (text*)palloc( len );
+   ptr=((char*)out) + VARHDRSZ;
+
+   while( wrd - prs->words < prs->curwords ) {
+       while (  wrd->len + prs->stopsellen + prs->startsellen + (ptr - ((char*)out)) >= len ) {
+           int dist = ptr - ((char*)out);
+           len*= 2;
+           out = (text *) repalloc(out, len);
+           ptr=((char*)out) + dist;
+       }
+
+       if ( wrd->in && !wrd->skip && !wrd->repeated ) {
+           if ( wrd->replace ) {
+               *ptr=' ';
+               ptr++;
+           } else {
+               if (wrd->selected) {
+                   memcpy(ptr,prs->startsel,prs->startsellen);
+                   ptr+=prs->startsellen;
+               }
+               memcpy(ptr,wrd->word,wrd->len);
+               ptr+=wrd->len;
+               if (wrd->selected) {
+                   memcpy(ptr,prs->stopsel,prs->stopsellen);
+                   ptr+=prs->stopsellen;
+               }
+           }
+       }
+
+       if ( !wrd->repeated )
+           pfree(wrd->word);
+
+       wrd++;
+   }
+
+   VARATT_SIZEP(out)=ptr - ((char*)out);
+   return out; 
+}
+
+int  
+get_currcfg(void) {
+   Oid arg[1]={ TEXTOID };
+   const char *curlocale;
+   Datum pars[1];
+   bool isnull;
+   int stat;
+
+   if ( current_cfg_id > 0 )
+       return current_cfg_id;
+
+   SPI_connect();
+   if ( !plan_getcfg_bylocale ) {
+       plan_getcfg_bylocale=SPI_saveplan( SPI_prepare( "select oid from pg_ts_cfg where locale = $1 ", 1, arg ) );
+       if ( !plan_getcfg_bylocale )
+           elog(ERROR, "SPI_prepare() failed");
+   }
+
+   curlocale = setlocale(LC_CTYPE, NULL);
+   pars[0] = PointerGetDatum( char2text((char*)curlocale) );
+   stat = SPI_execp(plan_getcfg_bylocale, pars, " ", 1);
+
+   if ( stat < 0 )
+       elog (ERROR, "SPI_execp return %d", stat);
+   if ( SPI_processed > 0 )
+       current_cfg_id = DatumGetObjectId( SPI_getbinval(SPI_tuptable->vals[0], SPI_tuptable->tupdesc, 1, &isnull) );
+   else 
+       elog(ERROR,"Can't find tsearch config by locale");
+
+   pfree(DatumGetPointer(pars[0]));
+   SPI_finish();
+   return current_cfg_id;
+}
+
+PG_FUNCTION_INFO_V1(set_curcfg);
+Datum set_curcfg(PG_FUNCTION_ARGS);
+Datum
+set_curcfg(PG_FUNCTION_ARGS) {
+        findcfg(PG_GETARG_OID(0));
+        current_cfg_id=PG_GETARG_OID(0);
+        PG_RETURN_VOID();
+}
+                
+PG_FUNCTION_INFO_V1(set_curcfg_byname);
+Datum set_curcfg_byname(PG_FUNCTION_ARGS);
+Datum
+set_curcfg_byname(PG_FUNCTION_ARGS) {
+        text *name=PG_GETARG_TEXT_P(0);
+   
+        DirectFunctionCall1(
+                set_curcfg,
+                ObjectIdGetDatum( name2id_cfg(name) )
+        );
+        PG_FREE_IF_COPY(name, 0);
+        PG_RETURN_VOID();      
+}       
+
+PG_FUNCTION_INFO_V1(show_curcfg);
+Datum show_curcfg(PG_FUNCTION_ARGS);
+Datum
+show_curcfg(PG_FUNCTION_ARGS) {
+   PG_RETURN_OID( get_currcfg() ); 
+}
+
+PG_FUNCTION_INFO_V1(reset_tsearch);
+Datum reset_tsearch(PG_FUNCTION_ARGS);
+Datum
+reset_tsearch(PG_FUNCTION_ARGS) {
+   ts_error(NOTICE,"TSearch cache cleaned");
+   PG_RETURN_VOID(); 
+}


diff --git a/contrib/tsearch2/ts_cfg.h b/contrib/tsearch2/ts_cfg.h

new file mode 100644 (file)

index 0000000..01006c1


--- /dev/null
+++ b/contrib/tsearch2/ts_cfg.h
@@ -0,0 +1,68 @@
+#ifndef __TS_CFG_H__
+#define __TS_CFG_H__
+#include "postgres.h"
+#include "query.h"
+
+typedef struct {
+   int len;
+   Datum   *dict_id;
+} ListDictionary;
+
+typedef struct {
+   Oid id;
+   Oid prs_id;
+   int len;
+   ListDictionary  *map;   
+}  TSCfgInfo;
+
+Oid name2id_cfg(text *name);
+TSCfgInfo * findcfg(Oid id);
+void init_cfg(Oid id, TSCfgInfo *cfg);
+void reset_cfg(void);
+
+typedef struct {
+        uint16          len;
+   union {
+       uint16      pos;
+       uint16      *apos;
+   } pos;
+        char       *word;
+   uint32  alen;
+}       WORD;
+   
+typedef struct {
+        WORD       *words;
+        int4            lenwords;
+        int4            curwords;
+   int4        pos;
+}       PRSTEXT;
+
+typedef struct {
+        uint16    len;
+   uint8    selected:1,
+         in:1,
+         skip:1,
+         replace:1,
+         repeated:1;
+   uint8   type;
+        char      *word;
+   ITEM      *item;
+}       HLWORD;
+   
+typedef struct {
+        HLWORD       *words;
+        int4            lenwords;
+        int4            curwords;
+        char           *startsel;
+        char            *stopsel;
+        int2            startsellen;
+        int2            stopsellen;
+}       HLPRSTEXT;
+
+void hlparsetext(TSCfgInfo *cfg, HLPRSTEXT * prs, QUERYTYPE *query, char *buf, int4 buflen);
+text* genhl(HLPRSTEXT * prs);
+
+void parsetext_v2(TSCfgInfo *cfg, PRSTEXT * prs, char *buf, int4 buflen);
+int  get_currcfg(void);
+
+#endif


diff --git a/contrib/tsearch2/ts_stat.c b/contrib/tsearch2/ts_stat.c

new file mode 100644 (file)

index 0000000..9099981


--- /dev/null
+++ b/contrib/tsearch2/ts_stat.c
@@ -0,0 +1,412 @@
+/*
+ * stat functions
+ */
+
+#include "tsvector.h"
+#include "ts_stat.h"
+#include "funcapi.h"
+#include "catalog/pg_type.h"
+#include "executor/spi.h"
+#include "common.h"
+
+PG_FUNCTION_INFO_V1(tsstat_in);
+Datum           tsstat_in(PG_FUNCTION_ARGS);
+Datum           
+tsstat_in(PG_FUNCTION_ARGS) {
+   tsstat *stat=palloc(STATHDRSIZE);
+   stat->len=STATHDRSIZE;
+   stat->size=0;
+   PG_RETURN_POINTER(stat);
+}
+
+PG_FUNCTION_INFO_V1(tsstat_out);
+Datum           tsstat_out(PG_FUNCTION_ARGS);
+Datum           
+tsstat_out(PG_FUNCTION_ARGS) {
+   elog(ERROR,"Unimplemented");
+   PG_RETURN_NULL();
+}
+
+static WordEntry**
+SEI_realloc( WordEntry** in, uint32 *len ) {
+   if ( *len==0 || in==NULL ) {
+       *len=8;
+       in=palloc( sizeof(WordEntry*)* (*len) );
+   } else {
+       *len *= 2;
+       in=repalloc( in, sizeof(WordEntry*)* (*len) );
+   }
+   return in;
+}
+
+static int
+compareStatWord(StatEntry *a, WordEntry *b, tsstat *stat, tsvector *txt) {
+   if ( a->len == b->len ) 
+       return strncmp(
+           STATSTRPTR(stat) + a->pos,
+           STRPTR(txt) + b->pos,
+           a->len
+       );
+   return ( a->len > b->len ) ? 1 : -1;
+}
+
+static tsstat*
+formstat(tsstat *stat, tsvector *txt, WordEntry** entry, uint32 len) {
+   tsstat  *newstat;
+   uint32 totallen, nentry;
+   uint32  slen=0;
+   WordEntry   **ptr=entry;
+   char    *curptr;
+   StatEntry   *sptr,*nptr;
+
+   while(ptr-entry
+       slen += (*ptr)->len;
+       ptr++;
+   }
+
+   nentry=stat->size + len;
+   slen+=STATSTRSIZE(stat);
+   totallen=CALCSTATSIZE(nentry,slen);
+   newstat=palloc(totallen);
+   newstat->len=totallen;
+   newstat->size=nentry;
+
+   memcpy(STATSTRPTR(newstat), STATSTRPTR(stat), STATSTRSIZE(stat));
+   curptr=STATSTRPTR(newstat) + STATSTRSIZE(stat);
+
+   ptr=entry;
+   sptr=STATPTR(stat);
+   nptr=STATPTR(newstat);
+
+   if ( len == 1 ) {
+       StatEntry *StopLow = STATPTR(stat);
+       StatEntry *StopHigh = (StatEntry*)STATSTRPTR(stat);
+
+       while (StopLow < StopHigh) {
+           sptr=StopLow + (StopHigh - StopLow) / 2;
+           if ( compareStatWord(sptr,*ptr,stat,txt) < 0 )
+               StopLow = sptr + 1;
+           else
+               StopHigh = sptr; 
+       }
+       nptr =STATPTR(newstat) + (StopLow-STATPTR(stat));
+       memcpy( STATPTR(newstat), STATPTR(stat), sizeof(StatEntry) * (StopLow-STATPTR(stat)) );
+       nptr->nentry=POSDATALEN(txt,*ptr);
+       if ( nptr->nentry==0 )
+               nptr->nentry=1; 
+       nptr->ndoc=1;
+       nptr->len=(*ptr)->len;
+       memcpy(curptr, STRPTR(txt) + (*ptr)->pos, nptr->len);
+       nptr->pos = curptr - STATSTRPTR(newstat);
+       memcpy( nptr+1, StopLow, sizeof(StatEntry) * ( ((StatEntry*)STATSTRPTR(stat))-StopLow ) );
+   } else {
+       while( sptr-STATPTR(stat) < stat->size && ptr-entry
+           if ( compareStatWord(sptr,*ptr,stat,txt) < 0 ) {
+               memcpy(nptr, sptr, sizeof(StatEntry));
+               sptr++;
+           } else {
+               nptr->nentry=POSDATALEN(txt,*ptr);
+               if ( nptr->nentry==0 )
+                   nptr->nentry=1; 
+               nptr->ndoc=1;
+               nptr->len=(*ptr)->len;
+               memcpy(curptr, STRPTR(txt) + (*ptr)->pos, nptr->len);
+               nptr->pos = curptr - STATSTRPTR(newstat);
+               curptr += nptr->len;
+               ptr++;
+           }
+           nptr++;
+       }
+
+       memcpy( nptr, sptr, sizeof(StatEntry)*( stat->size - (sptr-STATPTR(stat)) ) ); 
+       
+       while(ptr-entry
+           nptr->nentry=POSDATALEN(txt,*ptr);
+           if ( nptr->nentry==0 )
+               nptr->nentry=1; 
+           nptr->ndoc=1;
+           nptr->len=(*ptr)->len;
+           memcpy(curptr, STRPTR(txt) + (*ptr)->pos, nptr->len);
+           nptr->pos = curptr - STATSTRPTR(newstat);
+           curptr += nptr->len;
+           ptr++; nptr++;
+       }
+   }
+
+   return newstat;
+} 
+
+PG_FUNCTION_INFO_V1(ts_accum);
+Datum           ts_accum(PG_FUNCTION_ARGS);
+Datum 
+ts_accum(PG_FUNCTION_ARGS) {
+   tsstat *newstat,*stat= (tsstat*)PG_GETARG_POINTER(0);
+   tsvector  *txt = (tsvector *) PG_DETOAST_DATUM(PG_GETARG_DATUM(1));
+   WordEntry   **newentry=NULL;
+   uint32  len=0, cur=0;
+   StatEntry   *sptr;
+   WordEntry   *wptr;
+
+   if ( stat==NULL || PG_ARGISNULL(0) ) { /* Init in first */ 
+       stat=palloc(STATHDRSIZE);
+       stat->len=STATHDRSIZE;
+       stat->size=0;
+   }
+
+   /* simple check of correctness */
+   if ( txt==NULL || PG_ARGISNULL(1) || txt->size==0 ) {
+       PG_FREE_IF_COPY(txt,1); 
+       PG_RETURN_POINTER(stat);
+   }
+
+   sptr=STATPTR(stat);
+   wptr=ARRPTR(txt);
+
+   if ( stat->size < 100*txt->size ) { /* merge */
+       while( sptr-STATPTR(stat) < stat->size && wptr-ARRPTR(txt) < txt->size ) {
+           int cmp = compareStatWord(sptr,wptr,stat,txt);
+           if ( cmp<0 ) {
+               sptr++;
+           } else if ( cmp==0 ) {
+               int n=POSDATALEN(txt,wptr);
+   
+               if (n==0) n=1;
+               sptr->ndoc++;
+               sptr->nentry +=n ;
+               sptr++; wptr++;
+           } else {
+               if ( cur==len )
+                   newentry=SEI_realloc(newentry, &len);
+               newentry[cur]=wptr;
+               wptr++; cur++;
+           }
+       }
+
+       while( wptr-ARRPTR(txt) < txt->size ) {
+           if ( cur==len )
+               newentry=SEI_realloc(newentry, &len);
+           newentry[cur]=wptr;
+           wptr++; cur++;
+       }
+   } else { /* search */
+       while( wptr-ARRPTR(txt) < txt->size ) {
+           StatEntry *StopLow = STATPTR(stat);
+           StatEntry *StopHigh = (StatEntry*)STATSTRPTR(stat);
+           int cmp;
+
+           while (StopLow < StopHigh) {
+               sptr=StopLow + (StopHigh - StopLow) / 2;
+               cmp =  compareStatWord(sptr,wptr,stat,txt);
+               if (cmp==0) {
+                   int n=POSDATALEN(txt,wptr);
+                   if (n==0) n=1;
+                   sptr->ndoc++;
+                   sptr->nentry +=n ;
+                   break;
+               } else if ( cmp < 0 )
+                   StopLow = sptr + 1;
+               else
+                   StopHigh = sptr; 
+           }
+       
+           if ( StopLow >= StopHigh ) { /* not found */
+               if ( cur==len )
+                   newentry=SEI_realloc(newentry, &len);
+               newentry[cur]=wptr;
+               cur++;
+           }
+           wptr++;
+       }   
+   }
+
+   
+   if ( cur==0 ) { /* no new words */ 
+       PG_FREE_IF_COPY(txt,1);
+       PG_RETURN_POINTER(stat);
+   }
+
+   newstat = formstat(stat, txt, newentry, cur);
+   pfree(newentry);
+   PG_FREE_IF_COPY(txt,1);
+   /* pfree(stat); */
+
+   PG_RETURN_POINTER(newstat);
+}
+
+typedef struct {
+   uint32  cur;
+   tsvector *stat;
+} StatStorage;
+
+static void
+ts_setup_firstcall(FuncCallContext  *funcctx, tsstat *stat) {
+   TupleDesc            tupdesc;
+   MemoryContext     oldcontext;
+   StatStorage     *st;
+   
+   oldcontext = MemoryContextSwitchTo(funcctx->multi_call_memory_ctx);
+   st=palloc( sizeof(StatStorage) );
+   st->cur=0;
+   st->stat=palloc( stat->len );
+   memcpy(st->stat, stat, stat->len);
+   funcctx->user_fctx = (void*)st;
+   tupdesc = RelationNameGetTupleDesc("statinfo");
+   funcctx->slot = TupleDescGetSlot(tupdesc);
+   funcctx->attinmeta = TupleDescGetAttInMetadata(tupdesc);
+   MemoryContextSwitchTo(oldcontext);
+}
+
+
+static Datum
+ts_process_call(FuncCallContext  *funcctx) {
+   StatStorage     *st;
+   st=(StatStorage*)funcctx->user_fctx;
+
+   if ( st->cur < st->stat->size ) {
+       Datum result;
+       char* values[3];
+       char    ndoc[16];
+       char    nentry[16];
+       StatEntry *entry=STATPTR(st->stat) + st->cur;
+       HeapTuple    tuple;
+
+       values[1]=ndoc;
+       sprintf(ndoc,"%d",entry->ndoc);
+       values[2]=nentry;
+       sprintf(nentry,"%d",entry->nentry);
+       values[0]=palloc( entry->len+1 );
+       memcpy( values[0], STATSTRPTR(st->stat)+entry->pos, entry->len);
+       (values[0])[entry->len]='\0';
+
+       tuple = BuildTupleFromCStrings(funcctx->attinmeta, values);
+       result = TupleGetDatum(funcctx->slot, tuple);
+
+       pfree(values[0]);
+       st->cur++;
+       return result;  
+   } else {
+       pfree(st->stat);
+       pfree(st);
+   }
+   
+   return (Datum)0;
+}
+
+PG_FUNCTION_INFO_V1(ts_accum_finish);
+Datum           ts_accum_finish(PG_FUNCTION_ARGS);
+Datum 
+ts_accum_finish(PG_FUNCTION_ARGS) {
+   FuncCallContext  *funcctx;
+   Datum result;
+
+   if (SRF_IS_FIRSTCALL()) {
+       funcctx = SRF_FIRSTCALL_INIT();
+       ts_setup_firstcall(funcctx, (tsstat*)PG_GETARG_POINTER(0) );
+   }
+
+   funcctx = SRF_PERCALL_SETUP();
+   if (  (result=ts_process_call(funcctx)) != (Datum)0 )
+       SRF_RETURN_NEXT(funcctx, result);
+   SRF_RETURN_DONE(funcctx);
+}
+
+static Oid tiOid=InvalidOid;
+static void 
+get_ti_Oid(void) {
+   int ret;
+   bool isnull; 
+
+   if ( (ret = SPI_exec("select oid from pg_type where typname='tsvector'",1)) < 0 )   
+       elog(ERROR, "SPI_exec to get tsvector oid returns %d", ret);
+
+   if ( SPI_processed<0 )
+       elog(ERROR, "There is no tsvector type");
+   tiOid = DatumGetObjectId( SPI_getbinval(SPI_tuptable->vals[0], SPI_tuptable->tupdesc, 1, &isnull) );
+   if ( tiOid==InvalidOid )
+       elog(ERROR, "tsvector type has InvalidOid");
+}
+
+static tsstat*
+ts_stat_sql(text *txt) {
+   char *query=text2char(txt);
+   int i;
+   tsstat *newstat,*stat;
+   bool isnull;
+   Portal portal;
+   void    *plan;
+
+   if ( tiOid==InvalidOid ) 
+       get_ti_Oid();
+
+   if ( (plan = SPI_prepare(query,0,NULL))==NULL )
+       elog(ERROR, "SPI_prepare('%s') returns NULL",query);
+
+   if ( (portal = SPI_cursor_open(NULL, plan, NULL, NULL)) == NULL )
+       elog(ERROR, "SPI_cursor_open('%s') returns NULL",query);
+
+   SPI_cursor_fetch(portal, true, 100);
+
+   if ( SPI_tuptable->tupdesc->natts != 1 )
+       elog(ERROR, "Number of fields doesn't equal to 1");
+
+   if ( SPI_gettypeid(SPI_tuptable->tupdesc, 1) != tiOid )
+       elog(ERROR, "Column isn't of tsvector type");
+
+   stat=palloc(STATHDRSIZE);
+   stat->len=STATHDRSIZE;
+   stat->size=0;
+
+   while(SPI_processed>0) {
+       for(i=0;i
+           Datum data=SPI_getbinval(SPI_tuptable->vals[i], SPI_tuptable->tupdesc, 1, &isnull);
+
+           if ( !isnull ) {
+               newstat = (tsstat*)DatumGetPointer(DirectFunctionCall2(
+                   ts_accum,
+                   PointerGetDatum(stat),
+                   data
+               ));
+               if ( stat!=newstat && stat )
+                   pfree(stat);
+               stat=newstat;
+           }
+       } 
+
+       SPI_freetuptable(SPI_tuptable);
+       SPI_cursor_fetch(portal, true, 100);        
+   }   
+
+   SPI_freetuptable(SPI_tuptable);
+   SPI_cursor_close(portal);
+   SPI_freeplan(plan);
+   pfree(query);
+
+   return stat;    
+}
+
+PG_FUNCTION_INFO_V1(ts_stat);
+Datum           ts_stat(PG_FUNCTION_ARGS);
+Datum 
+ts_stat(PG_FUNCTION_ARGS) {
+   FuncCallContext  *funcctx;
+   Datum result;
+
+   if (SRF_IS_FIRSTCALL()) {
+       tsstat *stat;
+       text    *txt=PG_GETARG_TEXT_P(0);
+   
+       funcctx = SRF_FIRSTCALL_INIT();
+       SPI_connect();
+       stat = ts_stat_sql(txt);
+       PG_FREE_IF_COPY(txt,0); 
+       ts_setup_firstcall(funcctx, stat );
+       SPI_finish();
+   }
+
+   funcctx = SRF_PERCALL_SETUP();
+   if (  (result=ts_process_call(funcctx)) != (Datum)0 )
+       SRF_RETURN_NEXT(funcctx, result);
+   SRF_RETURN_DONE(funcctx);
+}
+
+


diff --git a/contrib/tsearch2/ts_stat.h b/contrib/tsearch2/ts_stat.h

new file mode 100644 (file)

index 0000000..c32b17a


--- /dev/null
+++ b/contrib/tsearch2/ts_stat.h
@@ -0,0 +1,32 @@
+#ifndef __TXTIDX_STAT_H__
+#define __TXTIDX_STAT_H__
+
+#include "postgres.h"
+
+#include "access/gist.h"
+#include "access/itup.h"
+#include "utils/elog.h"
+#include "utils/palloc.h"
+#include "utils/builtins.h"
+#include "storage/bufpage.h"
+
+typedef struct {
+   uint32  len;
+   uint32  pos;
+   uint32  ndoc;   
+   uint32  nentry; 
+}  StatEntry;
+
+typedef struct {
+   int4        len;
+   int4        size;
+   char        data[1];
+}  tsstat;
+
+#define STATHDRSIZE (sizeof(int4)*2)
+#define CALCSTATSIZE(x, lenstr) ( x * sizeof(StatEntry) + STATHDRSIZE + lenstr )
+#define STATPTR(x) ( (StatEntry*) ( (char*)x + STATHDRSIZE ) )
+#define STATSTRPTR(x)  ( (char*)x + STATHDRSIZE + ( sizeof(StatEntry) * ((tsvector*)x)->size ) )
+#define STATSTRSIZE(x) ( ((tsvector*)x)->len - STATHDRSIZE - ( sizeof(StatEntry) * ((tsvector*)x)->size ) )
+
+#endif


diff --git a/contrib/tsearch2/tsearch.sql._in b/contrib/tsearch2/tsearch.sql._in

new file mode 100644 (file)

index 0000000..91ffbc8


--- /dev/null
+++ b/contrib/tsearch2/tsearch.sql._in
@@ -0,0 +1,674 @@
+-- Adjust this setting to control where the objects get CREATEd.
+SET search_path = public;
+
+BEGIN;
+
+--dict conf
+CREATE TABLE pg_ts_dict (
+   dict_name   text not null primary key,
+   dict_init   oid,
+   dict_initoption text,
+   dict_lexize oid not null,
+   dict_comment    text
+) with oids;
+
+--dict interface
+CREATE FUNCTION lexize(oid, text) 
+   returns _text
+   as 'MODULE_PATHNAME'
+   language 'C'
+   with (isstrict);
+
+CREATE FUNCTION lexize(text, text)
+        returns _text
+        as 'MODULE_PATHNAME', 'lexize_byname'
+        language 'C'
+        with (isstrict);
+
+CREATE FUNCTION lexize(text)
+        returns _text
+        as 'MODULE_PATHNAME', 'lexize_bycurrent'
+        language 'C'
+        with (isstrict);
+
+CREATE FUNCTION set_curdict(int)
+   returns void
+   as 'MODULE_PATHNAME'
+   language 'C'
+   with (isstrict);
+
+CREATE FUNCTION set_curdict(text)
+   returns void
+   as 'MODULE_PATHNAME', 'set_curdict_byname'
+   language 'C'
+   with (isstrict);
+
+--built-in dictionaries
+CREATE FUNCTION dex_init(text)
+   returns internal
+   as 'MODULE_PATHNAME' 
+   language 'C';
+
+CREATE FUNCTION dex_lexize(internal,internal,int4)
+   returns internal
+   as 'MODULE_PATHNAME'
+   language 'C'
+   with (isstrict);
+
+insert into pg_ts_dict select 
+   'simple', 
+   (select oid from pg_proc where proname='dex_init'),
+   null,
+   (select oid from pg_proc where proname='dex_lexize'),
+   'Simple example of dictionary.'
+;
+    
+CREATE FUNCTION snb_en_init(text)
+   returns internal
+   as 'MODULE_PATHNAME' 
+   language 'C';
+
+CREATE FUNCTION snb_lexize(internal,internal,int4)
+   returns internal
+   as 'MODULE_PATHNAME'
+   language 'C'
+   with (isstrict);
+
+insert into pg_ts_dict select 
+   'en_stem', 
+   (select oid from pg_proc where proname='snb_en_init'),
+   'DATA_PATH/english.stop',
+   (select oid from pg_proc where proname='snb_lexize'),
+   'English Stemmer. Snowball.'
+;
+
+CREATE FUNCTION snb_ru_init(text)
+   returns internal
+   as 'MODULE_PATHNAME' 
+   language 'C';
+
+insert into pg_ts_dict select 
+   'ru_stem', 
+   (select oid from pg_proc where proname='snb_ru_init'),
+   'DATA_PATH/russian.stop',
+   (select oid from pg_proc where proname='snb_lexize'),
+   'Russian Stemmer. Snowball.'
+;
+    
+CREATE FUNCTION spell_init(text)
+   returns internal
+   as 'MODULE_PATHNAME' 
+   language 'C';
+
+CREATE FUNCTION spell_lexize(internal,internal,int4)
+   returns internal
+   as 'MODULE_PATHNAME'
+   language 'C'
+   with (isstrict);
+
+insert into pg_ts_dict select 
+   'ispell_template', 
+   (select oid from pg_proc where proname='spell_init'),
+   null,
+   (select oid from pg_proc where proname='spell_lexize'),
+   'ISpell interface. Must have .dict and .aff files'
+;
+
+CREATE FUNCTION syn_init(text)
+   returns internal
+   as 'MODULE_PATHNAME' 
+   language 'C';
+
+CREATE FUNCTION syn_lexize(internal,internal,int4)
+   returns internal
+   as 'MODULE_PATHNAME'
+   language 'C'
+   with (isstrict);
+
+insert into pg_ts_dict select 
+   'synonym', 
+   (select oid from pg_proc where proname='syn_init'),
+   null,
+   (select oid from pg_proc where proname='syn_lexize'),
+   'Example of synonym dictionary'
+;
+
+--dict conf
+CREATE TABLE pg_ts_parser (
+   prs_name    text not null primary key,
+   prs_start   oid not null,
+   prs_nexttoken   oid not null,
+   prs_end     oid not null,
+   prs_headline    oid not null,
+   prs_lextype oid not null,
+   prs_comment text
+) with oids;
+
+--sql-level interface
+CREATE TYPE tokentype 
+   as (tokid int4, alias text, descr text); 
+
+CREATE FUNCTION token_type(int4)
+   returns setof tokentype
+   as 'MODULE_PATHNAME'
+   language 'C'
+   with (isstrict);
+
+CREATE FUNCTION token_type(text)
+   returns setof tokentype
+   as 'MODULE_PATHNAME', 'token_type_byname'
+   language 'C'
+   with (isstrict);
+
+CREATE FUNCTION token_type()
+   returns setof tokentype
+   as 'MODULE_PATHNAME', 'token_type_current'
+   language 'C'
+   with (isstrict);
+
+CREATE FUNCTION set_curprs(int)
+   returns void
+   as 'MODULE_PATHNAME'
+   language 'C'
+   with (isstrict);
+
+CREATE FUNCTION set_curprs(text)
+   returns void
+   as 'MODULE_PATHNAME', 'set_curprs_byname'
+   language 'C'
+   with (isstrict);
+
+CREATE TYPE tokenout 
+   as (tokid int4, token text);
+
+CREATE FUNCTION parse(oid,text)
+   returns setof tokenout
+   as 'MODULE_PATHNAME'
+   language 'C'
+   with (isstrict);
+ 
+CREATE FUNCTION parse(text,text)
+   returns setof tokenout
+   as 'MODULE_PATHNAME', 'parse_byname'
+   language 'C'
+   with (isstrict);
+ 
+CREATE FUNCTION parse(text)
+   returns setof tokenout
+   as 'MODULE_PATHNAME', 'parse_current'
+   language 'C'
+   with (isstrict);
+ 
+--default parser
+CREATE FUNCTION prsd_start(internal,int4)
+   returns internal
+   as 'MODULE_PATHNAME'
+   language 'C';
+
+CREATE FUNCTION prsd_getlexeme(internal,internal,internal)
+   returns int4
+   as 'MODULE_PATHNAME'
+   language 'C';
+
+CREATE FUNCTION prsd_end(internal)
+   returns void
+   as 'MODULE_PATHNAME'
+   language 'C';
+
+CREATE FUNCTION prsd_lextype(internal)
+   returns internal
+   as 'MODULE_PATHNAME'
+   language 'C';
+
+CREATE FUNCTION prsd_headline(internal,internal,internal)
+   returns internal
+   as 'MODULE_PATHNAME'
+   language 'C';
+
+insert into pg_ts_parser select
+   'default',
+   (select oid from pg_proc where proname='prsd_start'),   
+   (select oid from pg_proc where proname='prsd_getlexeme'),   
+   (select oid from pg_proc where proname='prsd_end'), 
+   (select oid from pg_proc where proname='prsd_headline'),
+   (select oid from pg_proc where proname='prsd_lextype'),
+   'Parser from OpenFTS v0.34'
+;  
+
+--tsearch config
+
+CREATE TABLE pg_ts_cfg (
+   ts_name     text not null primary key,
+   prs_name    text not null,
+   locale      text
+) with oids;
+
+CREATE TABLE pg_ts_cfgmap (
+   ts_name     text not null,
+   tok_alias   text not null,
+   dict_name   text[],
+   primary key (ts_name,tok_alias)
+) with oids;
+
+CREATE FUNCTION set_curcfg(int)
+   returns void
+   as 'MODULE_PATHNAME'
+   language 'C'
+   with (isstrict);
+
+CREATE FUNCTION set_curcfg(text)
+   returns void
+   as 'MODULE_PATHNAME', 'set_curcfg_byname'
+   language 'C'
+   with (isstrict);
+
+CREATE FUNCTION show_curcfg()
+   returns oid
+   as 'MODULE_PATHNAME'
+   language 'C'
+   with (isstrict);
+
+insert into pg_ts_cfg values ('default', 'default','C');
+insert into pg_ts_cfg values ('default_russian', 'default','ru_RU.KOI8-R');
+insert into pg_ts_cfg values ('simple', 'default');
+
+copy pg_ts_cfgmap from stdin;
+default    lword   {en_stem}
+default    nlword  {simple}
+default    word    {simple}
+default    email   {simple}
+default    url {simple}
+default    host    {simple}
+default    sfloat  {simple}
+default    version {simple}
+default    part_hword  {simple}
+default    nlpart_hword    {simple}
+default    lpart_hword {en_stem}
+default    hword   {simple}
+default    lhword  {en_stem}
+default    nlhword {simple}
+default    uri {simple}
+default    file    {simple}
+default    float   {simple}
+default    int {simple}
+default    uint    {simple}
+default_russian    lword   {en_stem}
+default_russian    nlword  {ru_stem}
+default_russian    word    {ru_stem}
+default_russian    email   {simple}
+default_russian    url {simple}
+default_russian    host    {simple}
+default_russian    sfloat  {simple}
+default_russian    version {simple}
+default_russian    part_hword  {simple}
+default_russian    nlpart_hword    {ru_stem}
+default_russian    lpart_hword {en_stem}
+default_russian    hword   {ru_stem}
+default_russian    lhword  {en_stem}
+default_russian    nlhword {ru_stem}
+default_russian    uri {simple}
+default_russian    file    {simple}
+default_russian    float   {simple}
+default_russian    int {simple}
+default_russian    uint    {simple}
+simple lword   {simple}
+simple nlword  {simple}
+simple word    {simple}
+simple email   {simple}
+simple url {simple}
+simple host    {simple}
+simple sfloat  {simple}
+simple version {simple}
+simple part_hword  {simple}
+simple nlpart_hword    {simple}
+simple lpart_hword {simple}
+simple hword   {simple}
+simple lhword  {simple}
+simple nlhword {simple}
+simple uri {simple}
+simple file    {simple}
+simple float   {simple}
+simple int {simple}
+simple uint    {simple}
+\.
+
+--tsvector type
+CREATE FUNCTION tsvector_in(cstring)
+RETURNS tsvector
+AS 'MODULE_PATHNAME'
+LANGUAGE 'C' with (isstrict);
+
+CREATE FUNCTION tsvector_out(tsvector)
+RETURNS cstring
+AS 'MODULE_PATHNAME'
+LANGUAGE 'C' with (isstrict);
+
+CREATE TYPE tsvector (
+        INTERNALLENGTH = -1,
+        INPUT = tsvector_in,
+        OUTPUT = tsvector_out,
+        STORAGE = extended
+);
+
+CREATE FUNCTION length(tsvector)
+RETURNS int4
+AS 'MODULE_PATHNAME', 'tsvector_length'
+LANGUAGE 'C' with (isstrict,iscachable);
+
+CREATE FUNCTION to_tsvector(oid, text)
+RETURNS tsvector
+AS 'MODULE_PATHNAME'
+LANGUAGE 'C' with (isstrict,iscachable);
+
+CREATE FUNCTION to_tsvector(text, text)
+RETURNS tsvector
+AS 'MODULE_PATHNAME', 'to_tsvector_name'
+LANGUAGE 'C' with (isstrict,iscachable);
+
+CREATE FUNCTION to_tsvector(text)
+RETURNS tsvector
+AS 'MODULE_PATHNAME', 'to_tsvector_current'
+LANGUAGE 'C' with (isstrict,iscachable);
+
+CREATE FUNCTION strip(tsvector)
+RETURNS tsvector
+AS 'MODULE_PATHNAME'
+LANGUAGE 'C' with (isstrict,iscachable);
+
+CREATE FUNCTION setweight(tsvector,"char")
+RETURNS tsvector
+AS 'MODULE_PATHNAME'
+LANGUAGE 'C' with (isstrict,iscachable);
+
+CREATE FUNCTION concat(tsvector,tsvector)
+RETURNS tsvector
+AS 'MODULE_PATHNAME'
+LANGUAGE 'C' with (isstrict,iscachable);
+
+CREATE OPERATOR || (
+        LEFTARG = tsvector,
+        RIGHTARG = tsvector,
+        PROCEDURE = concat
+);
+
+--query type
+CREATE FUNCTION tsquery_in(cstring)
+RETURNS tsquery
+AS 'MODULE_PATHNAME'
+LANGUAGE 'C' with (isstrict);
+
+CREATE FUNCTION tsquery_out(tsquery)
+RETURNS cstring
+AS 'MODULE_PATHNAME'
+LANGUAGE 'C' with (isstrict);
+
+CREATE TYPE tsquery (
+        INTERNALLENGTH = -1,
+        INPUT = tsquery_in,
+        OUTPUT = tsquery_out
+);
+
+CREATE FUNCTION querytree(tsquery)
+RETURNS text
+AS 'MODULE_PATHNAME', 'tsquerytree'
+LANGUAGE 'C' with (isstrict);
+
+CREATE FUNCTION to_tsquery(oid, text)
+RETURNS tsquery
+AS 'MODULE_PATHNAME'
+LANGUAGE 'c' with (isstrict,iscachable);
+
+CREATE FUNCTION to_tsquery(text, text)
+RETURNS tsquery
+AS 'MODULE_PATHNAME','to_tsquery_name'
+LANGUAGE 'c' with (isstrict,iscachable);
+
+CREATE FUNCTION to_tsquery(text)
+RETURNS tsquery
+AS 'MODULE_PATHNAME','to_tsquery_current'
+LANGUAGE 'c' with (isstrict,iscachable);
+
+--operations
+CREATE FUNCTION exectsq(tsvector, tsquery)
+RETURNS bool
+AS 'MODULE_PATHNAME'
+LANGUAGE 'C' with (isstrict, iscachable);
+  
+COMMENT ON FUNCTION exectsq(tsvector, tsquery) IS 'boolean operation with text index';
+
+CREATE FUNCTION rexectsq(tsquery, tsvector)
+RETURNS bool
+AS 'MODULE_PATHNAME'
+LANGUAGE 'C' with (isstrict, iscachable);
+
+COMMENT ON FUNCTION rexectsq(tsquery, tsvector) IS 'boolean operation with text index';
+
+CREATE OPERATOR @@ (
+        LEFTARG = tsvector,
+        RIGHTARG = tsquery,
+        PROCEDURE = exectsq,
+        COMMUTATOR = '@@',
+        RESTRICT = contsel,
+        JOIN = contjoinsel
+);
+CREATE OPERATOR @@ (
+        LEFTARG = tsquery,
+        RIGHTARG = tsvector,
+        PROCEDURE = rexectsq,
+        COMMUTATOR = '@@',
+        RESTRICT = contsel,
+        JOIN = contjoinsel
+);
+
+--Trigger
+CREATE FUNCTION tsearch2()
+RETURNS trigger
+AS 'MODULE_PATHNAME'
+LANGUAGE 'C';
+
+--Relevation
+CREATE FUNCTION rank(float4[], tsvector, tsquery)
+RETURNS float4
+AS 'MODULE_PATHNAME'
+LANGUAGE 'C' WITH (isstrict, iscachable);
+
+CREATE FUNCTION rank(float4[], tsvector, tsquery, int4)
+RETURNS float4
+AS 'MODULE_PATHNAME'
+LANGUAGE 'C' WITH (isstrict, iscachable);
+
+CREATE FUNCTION rank(tsvector, tsquery)
+RETURNS float4
+AS 'MODULE_PATHNAME', 'rank_def'
+LANGUAGE 'C' WITH (isstrict, iscachable);
+
+CREATE FUNCTION rank(tsvector, tsquery, int4)
+RETURNS float4
+AS 'MODULE_PATHNAME', 'rank_def'
+LANGUAGE 'C' WITH (isstrict, iscachable);
+
+CREATE FUNCTION rank_cd(int4, tsvector, tsquery)
+RETURNS float4
+AS 'MODULE_PATHNAME'
+LANGUAGE 'C' WITH (isstrict, iscachable);
+
+CREATE FUNCTION rank_cd(int4, tsvector, tsquery, int4)
+RETURNS float4
+AS 'MODULE_PATHNAME'
+LANGUAGE 'C' WITH (isstrict, iscachable);
+
+CREATE FUNCTION rank_cd(tsvector, tsquery)
+RETURNS float4
+AS 'MODULE_PATHNAME', 'rank_cd_def'
+LANGUAGE 'C' WITH (isstrict, iscachable);
+
+CREATE FUNCTION rank_cd(tsvector, tsquery, int4)
+RETURNS float4
+AS 'MODULE_PATHNAME', 'rank_cd_def'
+LANGUAGE 'C' WITH (isstrict, iscachable);
+
+CREATE FUNCTION headline(oid, text, tsquery, text)
+RETURNS text
+AS 'MODULE_PATHNAME', 'headline'
+LANGUAGE 'C' WITH (isstrict, iscachable);
+
+CREATE FUNCTION headline(oid, text, tsquery)
+RETURNS text
+AS 'MODULE_PATHNAME', 'headline'
+LANGUAGE 'C' WITH (isstrict, iscachable);
+
+CREATE FUNCTION headline(text, text, tsquery, text)
+RETURNS text
+AS 'MODULE_PATHNAME', 'headline_byname'
+LANGUAGE 'C' WITH (isstrict, iscachable);
+
+CREATE FUNCTION headline(text, text, tsquery)
+RETURNS text
+AS 'MODULE_PATHNAME', 'headline_byname'
+LANGUAGE 'C' WITH (isstrict, iscachable);
+
+CREATE FUNCTION headline(text, tsquery, text)
+RETURNS text
+AS 'MODULE_PATHNAME', 'headline_current'
+LANGUAGE 'C' WITH (isstrict, iscachable);
+
+CREATE FUNCTION headline(text, tsquery)
+RETURNS text
+AS 'MODULE_PATHNAME', 'headline_current'
+LANGUAGE 'C' WITH (isstrict, iscachable);
+
+--GiST
+--GiST key type 
+CREATE FUNCTION gtsvector_in(cstring)
+RETURNS gtsvector
+AS 'MODULE_PATHNAME'
+LANGUAGE 'C' with (isstrict);
+
+CREATE FUNCTION gtsvector_out(gtsvector)
+RETURNS cstring
+AS 'MODULE_PATHNAME'
+LANGUAGE 'C' with (isstrict);
+
+CREATE TYPE gtsvector (
+        INTERNALLENGTH = -1,
+        INPUT = gtsvector_in,
+        OUTPUT = gtsvector_out
+);
+
+-- support FUNCTIONs
+CREATE FUNCTION gtsvector_consistent(gtsvector,internal,int4)
+RETURNS bool
+AS 'MODULE_PATHNAME'
+LANGUAGE 'C';
+  
+CREATE FUNCTION gtsvector_compress(internal)
+RETURNS internal
+AS 'MODULE_PATHNAME'
+LANGUAGE 'C';
+
+CREATE FUNCTION gtsvector_decompress(internal)
+RETURNS internal
+AS 'MODULE_PATHNAME'
+LANGUAGE 'C';
+
+CREATE FUNCTION gtsvector_penalty(internal,internal,internal)
+RETURNS internal
+AS 'MODULE_PATHNAME'
+LANGUAGE 'C' with (isstrict);
+
+CREATE FUNCTION gtsvector_picksplit(internal, internal)
+RETURNS internal
+AS 'MODULE_PATHNAME'
+LANGUAGE 'C';
+
+CREATE FUNCTION gtsvector_union(bytea, internal)
+RETURNS _int4
+AS 'MODULE_PATHNAME'
+LANGUAGE 'C';
+
+CREATE FUNCTION gtsvector_same(gtsvector, gtsvector, internal)
+RETURNS internal
+AS 'MODULE_PATHNAME'
+LANGUAGE 'C';
+
+-- CREATE the OPERATOR class
+CREATE OPERATOR CLASS gist_tsvector_ops
+DEFAULT FOR TYPE tsvector USING gist
+AS
+        OPERATOR        1       @@ (tsvector, tsquery)  RECHECK ,
+        FUNCTION        1       gtsvector_consistent (gtsvector, internal, int4),
+        FUNCTION        2       gtsvector_union (bytea, internal),
+        FUNCTION        3       gtsvector_compress (internal),
+        FUNCTION        4       gtsvector_decompress (internal),
+        FUNCTION        5       gtsvector_penalty (internal, internal, internal),
+        FUNCTION        6       gtsvector_picksplit (internal, internal),
+        FUNCTION        7       gtsvector_same (gtsvector, gtsvector, internal),
+        STORAGE         gtsvector;
+
+
+--stat info
+CREATE TYPE statinfo 
+   as (word text, ndoc int4, nentry int4);
+
+--REATE FUNCTION tsstat_in(cstring)
+--RETURNS tsstat
+--AS 'MODULE_PATHNAME'
+--LANGUAGE 'C' with (isstrict);
+--
+--CREATE FUNCTION tsstat_out(tsstat)
+--RETURNS cstring
+--AS 'MODULE_PATHNAME'
+--LANGUAGE 'C' with (isstrict);
+--
+--CREATE TYPE tsstat (
+--        INTERNALLENGTH = -1,
+--        INPUT = tsstat_in,
+--        OUTPUT = tsstat_out,
+--        STORAGE = plain
+--);
+--
+--CREATE FUNCTION ts_accum(tsstat,tsvector)
+--RETURNS tsstat
+--AS 'MODULE_PATHNAME'
+--LANGUAGE 'C' with (isstrict);
+--
+--CREATE FUNCTION ts_accum_finish(tsstat)
+-- returns setof statinfo
+-- as 'MODULE_PATHNAME'
+-- language 'C'
+-- with (isstrict);
+--
+--CREATE AGGREGATE stat (
+-- BASETYPE=tsvector,
+-- SFUNC=ts_accum,
+-- STYPE=tsstat,
+-- FINALFUNC = ts_accum_finish,
+-- initcond = ''
+--); 
+
+CREATE FUNCTION stat(text)
+   returns setof statinfo
+   as 'MODULE_PATHNAME', 'ts_stat'
+   language 'C'
+   with (isstrict);
+
+--reset - just for debuging
+CREATE FUNCTION reset_tsearch()
+        returns void
+        as 'MODULE_PATHNAME'
+        language 'C'
+        with (isstrict);
+
+--get cover (debug for rank_cd)
+CREATE FUNCTION get_covers(tsvector,tsquery)
+        returns text
+        as 'MODULE_PATHNAME'
+        language 'C'
+        with (isstrict);
+
+
+--example of ISpell dictionary
+--update pg_ts_dict set dict_initoption='DictFile="/usr/local/share/ispell/russian.dict" ,AffFile ="/usr/local/share/ispell/russian.aff", StopFile="/usr/local/share/ispell/russian.stop"' where dict_id=4;
+--example of synonym dict
+--update pg_ts_dict set dict_initoption='/usr/local/share/ispell/english.syn' where dict_id=5;
+END;


diff --git a/contrib/tsearch2/tsvector.c b/contrib/tsearch2/tsvector.c

new file mode 100644 (file)

index 0000000..ff0794d


--- /dev/null
+++ b/contrib/tsearch2/tsvector.c
@@ -0,0 +1,804 @@
+/*
+ * In/Out definitions for tsvector type
+ * Internal structure:
+ * string of values, array of position lexem in string and it's length
+ * Teodor Sigaev 
+ */
+#include "postgres.h"
+
+#include "access/gist.h"
+#include "access/itup.h"
+#include "utils/elog.h"
+#include "utils/palloc.h"
+#include "utils/builtins.h"
+#include "storage/bufpage.h"
+#include "executor/spi.h"
+#include "commands/trigger.h"
+#include "nodes/pg_list.h"
+#include "catalog/namespace.h"
+
+#include "utils/pg_locale.h"
+
+#include              /* tolower */
+#include "tsvector.h"
+#include "query.h"
+#include "ts_cfg.h"
+#include "common.h"
+
+PG_FUNCTION_INFO_V1(tsvector_in);
+Datum      tsvector_in(PG_FUNCTION_ARGS);
+
+PG_FUNCTION_INFO_V1(tsvector_out);
+Datum      tsvector_out(PG_FUNCTION_ARGS);
+
+PG_FUNCTION_INFO_V1(to_tsvector);
+Datum      to_tsvector(PG_FUNCTION_ARGS);
+PG_FUNCTION_INFO_V1(to_tsvector_current);
+Datum      to_tsvector_current(PG_FUNCTION_ARGS);
+PG_FUNCTION_INFO_V1(to_tsvector_name);
+Datum      to_tsvector_name(PG_FUNCTION_ARGS);
+
+PG_FUNCTION_INFO_V1(tsearch2);
+Datum      tsearch2(PG_FUNCTION_ARGS);
+
+PG_FUNCTION_INFO_V1(tsvector_length);
+Datum      tsvector_length(PG_FUNCTION_ARGS);
+
+/*
+ * in/out text index type
+ */
+static int 
+comparePos(const void *a, const void *b) {
+   if ( ((WordEntryPos *) a)->pos == ((WordEntryPos *) b)->pos )
+       return 1;
+   return ( ((WordEntryPos *) a)->pos > ((WordEntryPos *) b)->pos ) ? 1 : -1;
+}
+
+static int
+uniquePos(WordEntryPos *a, int4 l) {
+   WordEntryPos *ptr, *res;
+
+   res=a;
+   if (l==1)
+       return l;
+
+   qsort((void *) a, l, sizeof(WordEntryPos), comparePos);
+
+   ptr = a + 1;
+   while (ptr - a < l) {
+       if ( ptr->pos != res->pos ) {
+           res++;
+           res->pos = ptr->pos;
+           res->weight = ptr->weight;
+           if ( res-a >= MAXNUMPOS-1 || res->pos == MAXENTRYPOS-1 )
+               break;
+       } else if ( ptr->weight > res->weight )
+           res->weight = ptr->weight;
+       ptr++;
+   }
+   return res + 1 - a;
+}
+
+static char *BufferStr;
+static int
+compareentry(const void *a, const void *b)
+{
+   if ( ((WordEntryIN *) a)->entry.len == ((WordEntryIN *) b)->entry.len)
+   {
+       return strncmp(
+                      &BufferStr[((WordEntryIN *) a)->entry.pos],
+                      &BufferStr[((WordEntryIN *) b)->entry.pos],
+                      ((WordEntryIN *) a)->entry.len);
+   }
+   return ( ((WordEntryIN *) a)->entry.len > ((WordEntryIN *) b)->entry.len ) ? 1 : -1;
+}
+
+static int
+uniqueentry(WordEntryIN * a, int4 l, char *buf, int4 *outbuflen)
+{
+   WordEntryIN  *ptr,
+              *res;
+
+   res = a;
+   if (l == 1) {
+       if ( a->entry.haspos ) {
+           *(uint16*)(a->pos) = uniquePos( &(a->pos[1]), *(uint16*)(a->pos));
+           *outbuflen = SHORTALIGN(res->entry.len) + (*(uint16*)(a->pos) +1 )*sizeof(WordEntryPos);
+       }
+       return l;
+   }
+
+   ptr = a + 1;
+   BufferStr = buf;
+   qsort((void *) a, l, sizeof(WordEntryIN), compareentry);
+
+   while (ptr - a < l)
+   {
+       if (!(ptr->entry.len == res->entry.len &&
+             strncmp(&buf[ptr->entry.pos], &buf[res->entry.pos], res->entry.len) == 0))
+       {
+           if ( res->entry.haspos ) {
+               *(uint16*)(res->pos) = uniquePos( &(res->pos[1]), *(uint16*)(res->pos));
+               *outbuflen += *(uint16*)(res->pos) * sizeof(WordEntryPos);
+           }
+           *outbuflen += SHORTALIGN(res->entry.len);
+           res++;
+           memcpy(res,ptr,sizeof(WordEntryIN));
+       } else if ( ptr->entry.haspos ){
+           if ( res->entry.haspos ) {
+               int4 len=*(uint16*)(ptr->pos) + 1 + *(uint16*)(res->pos);
+               res->pos=(WordEntryPos*)repalloc( res->pos, len*sizeof(WordEntryPos));
+               memcpy( &(res->pos[ *(uint16*)(res->pos) + 1 ]), 
+                   &(ptr->pos[1]), *(uint16*)(ptr->pos) * sizeof(WordEntryPos));
+               *(uint16*)(res->pos) += *(uint16*)(ptr->pos);
+               pfree( ptr->pos );
+           } else {
+               res->entry.haspos=1;
+               res->pos = ptr->pos;
+           }
+       }
+       ptr++;
+   }
+   if ( res->entry.haspos ) {
+       *(uint16*)(res->pos) = uniquePos( &(res->pos[1]), *(uint16*)(res->pos));
+       *outbuflen += *(uint16*)(res->pos) * sizeof(WordEntryPos);
+   }
+   *outbuflen += SHORTALIGN(res->entry.len);
+
+   return res + 1 - a;
+}
+
+#define WAITWORD   1
+#define WAITENDWORD 2
+#define WAITNEXTCHAR   3
+#define WAITENDCMPLX   4
+#define WAITPOSINFO    5
+#define INPOSINFO  6
+#define WAITPOSDELIM   7
+
+#define RESIZEPRSBUF \
+do { \
+   if ( state->curpos - state->word + 1 >= state->len ) \
+   { \
+       int4 clen = state->curpos - state->word; \
+       state->len *= 2; \
+       state->word = (char*)repalloc( (void*)state->word, state->len ); \
+       state->curpos = state->word + clen; \
+   } \
+} while (0)
+
+int4
+gettoken_tsvector(TI_IN_STATE * state)
+{
+   int4        oldstate = 0;
+
+   state->curpos = state->word;
+   state->state = WAITWORD;
+   state->alen=0;
+
+   while (1)
+   {
+       if (state->state == WAITWORD)
+       {
+           if (*(state->prsbuf) == '\0')
+               return 0;
+           else if (*(state->prsbuf) == '\'')
+               state->state = WAITENDCMPLX;
+           else if (*(state->prsbuf) == '\\')
+           {
+               state->state = WAITNEXTCHAR;
+               oldstate = WAITENDWORD;
+           }
+           else if (state->oprisdelim && ISOPERATOR(*(state->prsbuf)))
+               elog(ERROR, "Syntax error");
+           else if (*(state->prsbuf) != ' ')
+           {
+               *(state->curpos) = *(state->prsbuf);
+               state->curpos++;
+               state->state = WAITENDWORD;
+           }
+       }
+       else if (state->state == WAITNEXTCHAR)
+       {
+           if (*(state->prsbuf) == '\0')
+               elog(ERROR, "There is no escaped character");
+           else
+           {
+               RESIZEPRSBUF;
+               *(state->curpos) = *(state->prsbuf);
+               state->curpos++;
+               state->state = oldstate;
+           }
+       }
+       else if (state->state == WAITENDWORD)
+       {
+           if (*(state->prsbuf) == '\\')
+           {
+               state->state = WAITNEXTCHAR;
+               oldstate = WAITENDWORD;
+           }
+           else if (*(state->prsbuf) == ' ' || *(state->prsbuf) == '\0' ||
+                    (state->oprisdelim && ISOPERATOR(*(state->prsbuf))))
+           {
+               RESIZEPRSBUF;
+               if (state->curpos == state->word)
+                   elog(ERROR, "Syntax error");
+               *(state->curpos) = '\0';
+               return 1; 
+           } else if ( *(state->prsbuf) == ':' ) {
+               if (state->curpos == state->word)
+                   elog(ERROR, "Syntax error");
+               *(state->curpos) = '\0';
+               if ( state->oprisdelim )
+                   return 1;
+               else
+                   state->state = INPOSINFO;
+           }
+           else
+           {
+               RESIZEPRSBUF;
+               *(state->curpos) = *(state->prsbuf);
+               state->curpos++;
+           }
+       }
+       else if (state->state == WAITENDCMPLX)
+       {
+           if (*(state->prsbuf) == '\'')
+           {
+               RESIZEPRSBUF;
+               *(state->curpos) = '\0';
+               if (state->curpos == state->word)
+                   elog(ERROR, "Syntax error");
+               if ( state->oprisdelim ) {
+                   state->prsbuf++;
+                   return 1;
+               } else
+                   state->state = WAITPOSINFO;
+           }
+           else if (*(state->prsbuf) == '\\')
+           {
+               state->state = WAITNEXTCHAR;
+               oldstate = WAITENDCMPLX;
+           }
+           else if (*(state->prsbuf) == '\0')
+               elog(ERROR, "Syntax error");
+           else
+           {
+               RESIZEPRSBUF;
+               *(state->curpos) = *(state->prsbuf);
+               state->curpos++;
+           }
+       } else if (state->state == WAITPOSINFO) {
+           if ( *(state->prsbuf) == ':' )
+               state->state=INPOSINFO;
+           else
+               return 1;
+       } else if (state->state == INPOSINFO) {
+           if ( isdigit(*(state->prsbuf)) ) {
+               if ( state->alen==0 ) {
+                   state->alen=4;
+                   state->pos = (WordEntryPos*)palloc( sizeof(WordEntryPos)*state->alen );
+                   *(uint16*)(state->pos)=0;
+               } else if ( *(uint16*)(state->pos) +1 >= state->alen ) {
+                   state->alen *= 2; 
+                   state->pos = (WordEntryPos*)repalloc( state->pos, sizeof(WordEntryPos)*state->alen );
+               }
+               (  *(uint16*)(state->pos) )++;
+               state->pos[ *(uint16*)(state->pos) ].pos = LIMITPOS(atoi(state->prsbuf));
+               if ( state->pos[ *(uint16*)(state->pos) ].pos == 0 )
+                   elog(ERROR,"Wrong position info");
+               state->pos[ *(uint16*)(state->pos) ].weight = 0;
+               state->state = WAITPOSDELIM;
+           } else
+               elog(ERROR,"Syntax error");
+       } else if (state->state == WAITPOSDELIM) {
+           if ( *(state->prsbuf) == ',' ) {
+               state->state = INPOSINFO;
+           } else if ( tolower(*(state->prsbuf)) == 'a' || *(state->prsbuf)=='*' ) {
+               if ( state->pos[ *(uint16*)(state->pos) ].weight )
+                   elog(ERROR,"Syntax error");
+               state->pos[ *(uint16*)(state->pos) ].weight = 3;
+           } else if ( tolower(*(state->prsbuf)) == 'b' ) {
+               if ( state->pos[ *(uint16*)(state->pos) ].weight )
+                   elog(ERROR,"Syntax error");
+               state->pos[ *(uint16*)(state->pos) ].weight = 2;
+           } else if ( tolower(*(state->prsbuf)) == 'c' ) {
+               if ( state->pos[ *(uint16*)(state->pos) ].weight )
+                   elog(ERROR,"Syntax error");
+               state->pos[ *(uint16*)(state->pos) ].weight = 1;
+           } else if ( tolower(*(state->prsbuf)) == 'd' ) {
+               if ( state->pos[ *(uint16*)(state->pos) ].weight )
+                   elog(ERROR,"Syntax error");
+               state->pos[ *(uint16*)(state->pos) ].weight = 0;
+           } else if ( isspace(*(state->prsbuf)) || *(state->prsbuf) == '\0' ) {
+               return 1;
+           } else if ( !isdigit(*(state->prsbuf)) )
+               elog(ERROR,"Syntax error");
+       } else
+           elog(ERROR, "Inner bug :(");
+       state->prsbuf++;
+   }
+
+   return 0;
+}
+
+Datum
+tsvector_in(PG_FUNCTION_ARGS)
+{
+   char       *buf = PG_GETARG_CSTRING(0);
+   TI_IN_STATE state;
+   WordEntryIN  *arr;
+   WordEntry  *inarr;
+   int4        len = 0,
+               totallen = 64;
+   tsvector       *in;
+   char       *tmpbuf,
+              *cur;
+   int4        i,
+               buflen = 256;
+
+   state.prsbuf = buf;
+   state.len = 32;
+   state.word = (char *) palloc(state.len);
+   state.oprisdelim = false;
+
+   arr = (WordEntryIN *) palloc(sizeof(WordEntryIN) * totallen);
+   cur = tmpbuf = (char *) palloc(buflen);
+   while (gettoken_tsvector(&state))
+   {
+       if (len >= totallen)
+       {
+           totallen *= 2;
+           arr = (WordEntryIN *) repalloc((void *) arr, sizeof(WordEntryIN) * totallen);
+       }
+       while ((cur - tmpbuf) + (state.curpos - state.word) >= buflen)
+       {
+           int4        dist = cur - tmpbuf;
+
+           buflen *= 2;
+           tmpbuf = (char *) repalloc((void *) tmpbuf, buflen);
+           cur = tmpbuf + dist;
+       }
+       if (state.curpos - state.word >= MAXSTRLEN)
+           elog(ERROR, "Word is too long");
+       arr[len].entry.len= state.curpos - state.word;
+       if (cur - tmpbuf > MAXSTRPOS)
+           elog(ERROR, "Too long value");
+       arr[len].entry.pos=cur - tmpbuf;
+       memcpy((void *) cur, (void *) state.word, arr[len].entry.len);
+       cur += arr[len].entry.len;
+       if ( state.alen ) {
+           arr[len].entry.haspos=1;
+           arr[len].pos = state.pos;
+       } else
+           arr[len].entry.haspos=0;
+       len++;
+   }
+   pfree(state.word);
+
+   if ( len > 0 )
+       len = uniqueentry(arr, len, tmpbuf, &buflen);
+   totallen = CALCDATASIZE(len, buflen);
+   in = (tsvector *) palloc(totallen);
+   memset(in,0,totallen);
+   in->len = totallen;
+   in->size = len;
+   cur = STRPTR(in);
+   inarr = ARRPTR(in);
+   for (i = 0; i < len; i++)
+   {
+       memcpy((void *) cur, (void *) &tmpbuf[arr[i].entry.pos], arr[i].entry.len);
+       arr[i].entry.pos=cur - STRPTR(in);
+       cur += SHORTALIGN(arr[i].entry.len);
+       if ( arr[i].entry.haspos ) {
+           memcpy( cur, arr[i].pos, (*(uint16*)arr[i].pos + 1) * sizeof(WordEntryPos));
+           cur +=  (*(uint16*)arr[i].pos + 1) * sizeof(WordEntryPos);
+           pfree( arr[i].pos ); 
+       }
+       memcpy( &(inarr[i]), &(arr[i].entry), sizeof(WordEntry) );
+   }
+   pfree(tmpbuf);
+   pfree(arr);
+   PG_RETURN_POINTER(in);
+}
+
+Datum
+tsvector_length(PG_FUNCTION_ARGS)
+{
+   tsvector       *in = (tsvector *) PG_DETOAST_DATUM(PG_GETARG_DATUM(0));
+   int4        ret = in->size;
+
+   PG_FREE_IF_COPY(in, 0);
+   PG_RETURN_INT32(ret);
+}
+
+Datum
+tsvector_out(PG_FUNCTION_ARGS)
+{
+   tsvector       *out = (tsvector *) PG_DETOAST_DATUM(PG_GETARG_DATUM(0));
+   char       *outbuf;
+   int4        i,
+               j,
+               lenbuf = 0, pp;
+   WordEntry  *ptr = ARRPTR(out);
+   char       *curin,
+              *curout;
+
+       lenbuf=out->size * 2 /* '' */ + out->size - 1 /* space */ + 2 /*\0*/;
+       for (i = 0; i < out->size; i++) {
+               lenbuf += ptr[i].len*2 /*for escape */;
+               if ( ptr[i].haspos )
+                       lenbuf += 7*POSDATALEN(out, &(ptr[i]));
+       }
+
+   curout = outbuf = (char *) palloc(lenbuf);
+   for (i = 0; i < out->size; i++)
+   {
+       curin = STRPTR(out)+ptr->pos;
+       if (i != 0)
+           *curout++ = ' ';
+       *curout++ = '\'';
+       j = ptr->len;
+       while (j--)
+       {
+           if (*curin == '\'')
+           {
+               int4        pos = curout - outbuf;
+
+               outbuf = (char *) repalloc((void *) outbuf, ++lenbuf);
+               curout = outbuf + pos;
+               *curout++ = '\\';
+           }
+           *curout++ = *curin++;
+       }
+       *curout++ = '\'';
+       if ( (pp=POSDATALEN(out,ptr)) != 0 ) {
+           WordEntryPos *wptr;
+           *curout++ = ':';
+           wptr=POSDATAPTR(out,ptr);
+           while(pp) {
+               sprintf(curout,"%d",wptr->pos);
+               curout=strchr(curout,'\0');
+               switch( wptr->weight ) {
+                   case 3:   *curout++ = 'A'; break;
+                   case 2:   *curout++ = 'B'; break;
+                   case 1:   *curout++ = 'C'; break;
+                   case 0: 
+                   default: break;
+               }
+               if ( pp>1 )     *curout++ = ',';
+               pp--; wptr++;
+           }
+       }
+       ptr++;
+   }
+   *curout='\0';
+   outbuf[lenbuf - 1] = '\0';
+   PG_FREE_IF_COPY(out, 0);
+   PG_RETURN_POINTER(outbuf);
+}
+
+static int
+compareWORD(const void *a, const void *b)
+{
+   if (((WORD *) a)->len == ((WORD *) b)->len) {
+       int res = strncmp(
+                      ((WORD *) a)->word,
+                      ((WORD *) b)->word,
+                      ((WORD *) b)->len);
+       if ( res==0 ) 
+           return ( ((WORD *) a)->pos.pos > ((WORD *) b)->pos.pos ) ? 1 : -1;
+       return res;
+   }
+   return (((WORD *) a)->len > ((WORD *) b)->len) ? 1 : -1;
+}
+
+static int
+uniqueWORD(WORD * a, int4 l)
+{
+   WORD       *ptr,
+              *res;
+   int tmppos;
+
+   if (l == 1) {
+       tmppos=LIMITPOS(a->pos.pos);
+       a->alen=2;
+       a->pos.apos=(uint16*)palloc( sizeof(uint16)*a->alen );
+       a->pos.apos[0]=1;
+       a->pos.apos[1]=tmppos;
+       return l;
+   }
+
+   res = a;
+   ptr = a + 1;
+
+   qsort((void *) a, l, sizeof(WORD), compareWORD);
+   tmppos=LIMITPOS(a->pos.pos);
+   a->alen=2;
+   a->pos.apos=(uint16*)palloc( sizeof(uint16)*a->alen );
+   a->pos.apos[0]=1;
+   a->pos.apos[1]=tmppos;
+
+   while (ptr - a < l)
+   {
+       if (!(ptr->len == res->len &&
+             strncmp(ptr->word, res->word, res->len) == 0))
+       {
+           res++;
+           res->len = ptr->len;
+           res->word = ptr->word;
+           tmppos=LIMITPOS(ptr->pos.pos);
+           res->alen=2;
+           res->pos.apos=(uint16*)palloc( sizeof(uint16)*res->alen );
+           res->pos.apos[0]=1;
+           res->pos.apos[1]=tmppos;
+       } else {
+           pfree(ptr->word);
+           if ( res->pos.apos[0] < MAXNUMPOS-1 && res->pos.apos[ res->pos.apos[0] ] != MAXENTRYPOS-1 ) {
+               if ( res->pos.apos[0]+1 >= res->alen ) {
+                   res->alen*=2;
+                   res->pos.apos=(uint16*)repalloc( res->pos.apos, sizeof(uint16)*res->alen );
+               }
+               res->pos.apos[ res->pos.apos[0]+1 ] = LIMITPOS(ptr->pos.pos);
+               res->pos.apos[0]++; 
+           }
+       }
+       ptr++;
+   }
+
+   return res + 1 - a;
+}
+
+/*
+ * make value of tsvector
+ */
+static tsvector *
+makevalue(PRSTEXT * prs)
+{
+   int4        i,j,
+               lenstr = 0,
+               totallen;
+   tsvector       *in;
+   WordEntry  *ptr;
+   char       *str,
+              *cur;
+
+   prs->curwords = uniqueWORD(prs->words, prs->curwords);
+   for (i = 0; i < prs->curwords; i++) {
+       lenstr += SHORTALIGN(prs->words[i].len);
+
+       if ( prs->words[i].alen )
+           lenstr += sizeof(uint16) + prs->words[i].pos.apos[0] * sizeof(WordEntryPos);
+   }
+
+   totallen = CALCDATASIZE(prs->curwords, lenstr);
+   in = (tsvector *) palloc(totallen);
+   memset(in,0,totallen);  
+   in->len = totallen;
+   in->size = prs->curwords;
+
+   ptr = ARRPTR(in);
+   cur = str = STRPTR(in);
+   for (i = 0; i < prs->curwords; i++)
+   {
+       ptr->len = prs->words[i].len;
+       if (cur - str > MAXSTRPOS)
+           elog(ERROR, "Value is too big");
+       ptr->pos= cur - str;
+       memcpy((void *) cur, (void *) prs->words[i].word, prs->words[i].len);
+       pfree(prs->words[i].word);
+       cur += SHORTALIGN(prs->words[i].len);
+       if ( prs->words[i].alen ) {
+           WordEntryPos *wptr;
+           
+           ptr->haspos=1;
+           *(uint16*)cur = prs->words[i].pos.apos[0];
+           wptr=POSDATAPTR(in,ptr);
+           for(j=0;j<*(uint16*)cur;j++) {
+               wptr[j].weight=0;
+               wptr[j].pos=prs->words[i].pos.apos[j+1];
+           }
+           cur += sizeof(uint16) + prs->words[i].pos.apos[0] * sizeof(WordEntryPos);
+           pfree(prs->words[i].pos.apos);
+       } else
+           ptr->haspos=0;
+       ptr++;
+   }
+   pfree(prs->words);
+   return in;
+}
+
+
+Datum
+to_tsvector(PG_FUNCTION_ARGS)
+{
+   text       *in = PG_GETARG_TEXT_P(1);
+   PRSTEXT     prs;
+   tsvector       *out = NULL;
+   TSCfgInfo *cfg=findcfg(PG_GETARG_INT32(0)); 
+
+   prs.lenwords = 32;
+   prs.curwords = 0;
+   prs.pos = 0;
+   prs.words = (WORD *) palloc(sizeof(WORD) * prs.lenwords);
+   
+   parsetext_v2(cfg, &prs, VARDATA(in), VARSIZE(in) - VARHDRSZ);
+   PG_FREE_IF_COPY(in, 1);
+
+   if (prs.curwords)
+       out = makevalue(&prs);
+   else {
+       pfree(prs.words);
+       out = palloc(CALCDATASIZE(0,0));
+       out->len = CALCDATASIZE(0,0);
+       out->size = 0;
+   } 
+   PG_RETURN_POINTER(out);
+}
+
+Datum
+to_tsvector_name(PG_FUNCTION_ARGS) {
+   text       *cfg=PG_GETARG_TEXT_P(0);
+   Datum res = DirectFunctionCall3(
+       to_tsvector,
+       Int32GetDatum( name2id_cfg( cfg ) ),
+       PG_GETARG_DATUM(1),
+       (Datum)0
+   );
+   PG_FREE_IF_COPY(cfg,0);
+   PG_RETURN_DATUM(res);   
+}
+
+Datum
+to_tsvector_current(PG_FUNCTION_ARGS) {
+   Datum res = DirectFunctionCall3(
+       to_tsvector,
+       Int32GetDatum( get_currcfg() ),
+       PG_GETARG_DATUM(0),
+       (Datum)0
+   );
+   PG_RETURN_DATUM(res);   
+}
+
+static Oid
+findFunc(char *fname) {
+   FuncCandidateList clist,ptr;
+   Oid funcid = InvalidOid;
+   List *names=makeList1(makeString(fname));
+
+   ptr = clist = FuncnameGetCandidates(names, 1);
+   freeList(names);
+
+   if ( !ptr )
+       return funcid;
+
+   while(ptr) {
+       if ( ptr->args[0] == TEXTOID && funcid == InvalidOid )
+           funcid=ptr->oid;
+       clist=ptr->next;
+       pfree(ptr);
+       ptr=clist;
+   }
+
+   return funcid;
+}
+
+/*
+ * Trigger
+ */
+Datum
+tsearch2(PG_FUNCTION_ARGS)
+{
+   TriggerData *trigdata;
+   Trigger    *trigger;
+   Relation    rel;
+   HeapTuple   rettuple = NULL;
+   TSCfgInfo *cfg=findcfg(get_currcfg()); 
+   int         numidxattr,
+               i;
+   PRSTEXT     prs;
+   Datum       datum = (Datum) 0;
+   Oid     funcoid = InvalidOid;
+
+   if (!CALLED_AS_TRIGGER(fcinfo))
+       elog(ERROR, "TSearch: Not fired by trigger manager");
+
+   trigdata = (TriggerData *) fcinfo->context;
+   if (TRIGGER_FIRED_FOR_STATEMENT(trigdata->tg_event))
+       elog(ERROR, "TSearch: Can't process STATEMENT events");
+   if (TRIGGER_FIRED_AFTER(trigdata->tg_event))
+       elog(ERROR, "TSearch: Must be fired BEFORE event");
+
+   if (TRIGGER_FIRED_BY_INSERT(trigdata->tg_event))
+       rettuple = trigdata->tg_trigtuple;
+   else if (TRIGGER_FIRED_BY_UPDATE(trigdata->tg_event))
+       rettuple = trigdata->tg_newtuple;
+   else
+       elog(ERROR, "TSearch: Unknown event");
+
+   trigger = trigdata->tg_trigger;
+   rel = trigdata->tg_relation;
+
+   if (trigger->tgnargs < 2)
+       elog(ERROR, "TSearch: format tsearch2(tsvector_field, text_field1,...)");
+
+   numidxattr = SPI_fnumber(rel->rd_att, trigger->tgargs[0]);
+   if (numidxattr == SPI_ERROR_NOATTRIBUTE)
+       elog(ERROR, "TSearch: Can not find tsvector_field");
+
+   prs.lenwords = 32;
+   prs.curwords = 0;
+   prs.pos = 0;
+   prs.words = (WORD *) palloc(sizeof(WORD) * prs.lenwords);
+
+   /* find all words in indexable column */
+   for (i = 1; i < trigger->tgnargs; i++)
+   {
+       int         numattr;
+       Oid         oidtype;
+       Datum       txt_toasted;
+       bool        isnull;
+       text       *txt;
+
+       numattr = SPI_fnumber(rel->rd_att, trigger->tgargs[i]);
+       if (numattr == SPI_ERROR_NOATTRIBUTE)
+       {
+           funcoid=findFunc(trigger->tgargs[i]);
+           if ( funcoid==InvalidOid )
+               elog(ERROR,"TSearch: can't find function or field '%s'",trigger->tgargs[i]);
+           continue;
+       }
+       oidtype = SPI_gettypeid(rel->rd_att, numattr);
+       /* We assume char() and varchar() are binary-equivalent to text */
+       if (!(oidtype == TEXTOID ||
+             oidtype == VARCHAROID ||
+             oidtype == BPCHAROID))
+       {
+           elog(WARNING, "TSearch: '%s' is not of character type",
+                trigger->tgargs[i]);
+           continue;
+       }
+       txt_toasted = SPI_getbinval(rettuple, rel->rd_att, numattr, &isnull);
+       if (isnull)
+           continue;
+
+       if ( funcoid!=InvalidOid ) {
+           text *txttmp = (text *) DatumGetPointer( OidFunctionCall1(
+               funcoid,
+               PointerGetDatum(txt_toasted)
+           ));
+           txt = (text *) DatumGetPointer(PG_DETOAST_DATUM(PointerGetDatum(txttmp)));
+           if ( txt == txttmp )
+               txt_toasted = PointerGetDatum(txt);
+       } else
+            txt = (text *) DatumGetPointer(PG_DETOAST_DATUM(PointerGetDatum(txt_toasted)));
+
+       parsetext_v2(cfg, &prs, VARDATA(txt), VARSIZE(txt) - VARHDRSZ);
+       if (txt != (text*)DatumGetPointer(txt_toasted) )
+           pfree(txt);
+   }
+
+   /* make tsvector value */
+   if (prs.curwords)
+   {
+       datum = PointerGetDatum(makevalue(&prs));
+       rettuple = SPI_modifytuple(rel, rettuple, 1, &numidxattr,
+                                  &datum, NULL);
+       pfree(DatumGetPointer(datum));
+   }
+   else
+   {
+       tsvector *out = palloc(CALCDATASIZE(0,0));
+       out->len = CALCDATASIZE(0,0);
+       out->size = 0;
+       datum = PointerGetDatum(out);
+       pfree(prs.words);
+       rettuple = SPI_modifytuple(rel, rettuple, 1, &numidxattr,
+                                  &datum, NULL);
+   }
+
+   if (rettuple == NULL)
+       elog(ERROR, "TSearch: %d returned by SPI_modifytuple", SPI_result);
+
+   return PointerGetDatum(rettuple);
+}


diff --git a/contrib/tsearch2/tsvector.h b/contrib/tsearch2/tsvector.h

new file mode 100644 (file)

index 0000000..31e6a4b


--- /dev/null
+++ b/contrib/tsearch2/tsvector.h
@@ -0,0 +1,71 @@
+#ifndef __TXTIDX_H__
+#define __TXTIDX_H__
+
+/*
+#define TXTIDX_DEBUG
+*/
+
+#include "postgres.h"
+
+#include "access/gist.h"
+#include "access/itup.h"
+#include "utils/elog.h"
+#include "utils/palloc.h"
+#include "utils/builtins.h"
+#include "storage/bufpage.h"
+
+typedef struct {
+   uint32
+       haspos:1,
+       len:11, /* MAX 2Kb */
+       pos:20; /* MAX 1Mb */
+}  WordEntry;
+#define MAXSTRLEN ( 1<<11 )
+#define MAXSTRPOS ( 1<<20 )
+
+typedef struct {
+   uint16
+       weight:2,
+       pos:14;
+} WordEntryPos;
+#define MAXENTRYPOS    (1<<14)
+#define MAXNUMPOS  256
+#define LIMITPOS(x)    ( ( (x) >= MAXENTRYPOS ) ? (MAXENTRYPOS-1) : (x) )
+
+typedef struct
+{
+   int4        len;
+   int4        size;
+   char        data[1];
+}  tsvector;
+
+#define DATAHDRSIZE (sizeof(int4)*2)
+#define CALCDATASIZE(x, lenstr) ( x * sizeof(WordEntry) + DATAHDRSIZE + lenstr )
+#define ARRPTR(x)  ( (WordEntry*) ( (char*)x + DATAHDRSIZE ) )
+#define STRPTR(x)  ( (char*)x + DATAHDRSIZE + ( sizeof(WordEntry) * ((tsvector*)x)->size ) )
+#define STRSIZE(x) ( ((tsvector*)x)->len - DATAHDRSIZE - ( sizeof(WordEntry) * ((tsvector*)x)->size ) )
+#define _POSDATAPTR(x,e)   (STRPTR(x)+((WordEntry*)(e))->pos+SHORTALIGN(((WordEntry*)(e))->len))
+#define POSDATALEN(x,e) ( ( ((WordEntry*)(e))->haspos ) ? (*(uint16*)_POSDATAPTR(x,e)) : 0 ) 
+#define POSDATAPTR(x,e)    ( (WordEntryPos*)( _POSDATAPTR(x,e)+sizeof(uint16) ) )
+
+
+typedef struct {
+   WordEntry   entry;
+   WordEntryPos    *pos;
+}  WordEntryIN;
+
+typedef struct
+{
+   char       *prsbuf;
+   char       *word;
+   char       *curpos;
+   int4        len;
+   int4        state;
+   int4        alen;
+   WordEntryPos    *pos;
+   bool        oprisdelim;
+}  TI_IN_STATE;
+
+int4       gettoken_tsvector(TI_IN_STATE * state);
+
+#endif


diff --git a/contrib/tsearch2/tsvector_op.c b/contrib/tsearch2/tsvector_op.c

new file mode 100644 (file)

index 0000000..3f38014


--- /dev/null
+++ b/contrib/tsearch2/tsvector_op.c
@@ -0,0 +1,264 @@
+/*
+ * Operations for tsvector type
+ * Teodor Sigaev 
+ */
+#include "postgres.h"
+
+#include "access/gist.h"
+#include "access/itup.h"
+#include "utils/elog.h"
+#include "utils/palloc.h"
+#include "utils/builtins.h"
+#include "storage/bufpage.h"
+#include "executor/spi.h"
+#include "commands/trigger.h"
+#include "nodes/pg_list.h"
+#include "catalog/namespace.h"
+
+#include "utils/pg_locale.h"
+
+#include              /* tolower */
+#include "tsvector.h"
+#include "query.h"
+#include "ts_cfg.h"
+#include "common.h"
+
+PG_FUNCTION_INFO_V1(strip);
+Datum      strip(PG_FUNCTION_ARGS);
+
+PG_FUNCTION_INFO_V1(setweight);
+Datum      setweight(PG_FUNCTION_ARGS);
+
+PG_FUNCTION_INFO_V1(concat);
+Datum      concat(PG_FUNCTION_ARGS);
+
+Datum
+strip(PG_FUNCTION_ARGS)
+{
+   tsvector       *in = (tsvector *) PG_DETOAST_DATUM(PG_GETARG_DATUM(0));
+   tsvector    *out;
+   int i,len=0;
+   WordEntry *arrin=ARRPTR(in), *arrout;
+   char *cur;
+
+   for(i=0;isize;i++) 
+       len += SHORTALIGN( arrin[i].len );
+
+   len = CALCDATASIZE(in->size, len);
+   out=(tsvector*)palloc(len);
+   memset(out,0,len);
+   out->len=len;
+   out->size=in->size;
+   arrout=ARRPTR(out);
+   cur=STRPTR(out);
+   for(i=0;isize;i++) {
+       memcpy(cur, STRPTR(in)+arrin[i].pos, arrin[i].len);
+       arrout[i].haspos = 0;
+       arrout[i].len = arrin[i].len;
+       arrout[i].pos = cur - STRPTR(out);
+       cur += SHORTALIGN( arrout[i].len );
+   }
+       
+   PG_FREE_IF_COPY(in, 0);
+   PG_RETURN_POINTER(out);
+}
+
+Datum
+setweight(PG_FUNCTION_ARGS)
+{
+   tsvector       *in = (tsvector *) PG_DETOAST_DATUM(PG_GETARG_DATUM(0));
+   char       cw = PG_GETARG_CHAR(1);
+   tsvector    *out;
+   int i,j;
+   WordEntry *entry;
+   WordEntryPos *p;
+   int w=0;
+
+   switch(tolower(cw)) {
+       case 'a': w=3; break;
+       case 'b': w=2; break;
+       case 'c': w=1; break;
+       case 'd': w=0; break;
+       default: elog(ERROR,"Unknown weight");
+   }
+
+   out=(tsvector*)palloc(in->len);
+   memcpy(out,in,in->len);
+   entry=ARRPTR(out);
+   i=out->size;    
+   while(i--) {
+       if ( (j=POSDATALEN(out,entry)) != 0 ) {
+           p=POSDATAPTR(out,entry);
+           while(j--) {
+               p->weight=w;
+               p++;
+           }
+       }
+       entry++;
+   }
+       
+   PG_FREE_IF_COPY(in, 0);
+   PG_RETURN_POINTER(out);
+}
+
+static int
+compareEntry(char *ptra, WordEntry* a, char *ptrb, WordEntry* b)
+{
+        if ( a->len == b->len)
+        {
+                return strncmp(
+                                           ptra + a->pos,
+                                           ptrb + b->pos,
+                                           a->len);
+        }
+        return ( a->len > b->len ) ? 1 : -1;
+}
+
+static int4
+add_pos(tsvector *src, WordEntry *srcptr, tsvector *dest, WordEntry *destptr, int4 maxpos ) {
+   uint16 *clen = (uint16*)_POSDATAPTR(dest,destptr);
+   int i;
+   uint16 slen = POSDATALEN(src, srcptr), startlen;
+   WordEntryPos *spos=POSDATAPTR(src, srcptr), *dpos=POSDATAPTR(dest,destptr);
+
+   if ( ! destptr->haspos ) 
+       *clen=0;
+
+   startlen = *clen;
+   for(i=0; i
+       dpos[ *clen ].weight = spos[i].weight; 
+       dpos[ *clen ].pos    = LIMITPOS(spos[i].pos + maxpos);
+       (*clen)++;
+   }
+
+   if ( *clen != startlen )
+       destptr->haspos=1; 
+   return  *clen - startlen;
+}
+
+
+Datum
+concat(PG_FUNCTION_ARGS) {
+   tsvector       *in1 = (tsvector *) PG_DETOAST_DATUM(PG_GETARG_DATUM(0));
+   tsvector       *in2 = (tsvector *) PG_DETOAST_DATUM(PG_GETARG_DATUM(1));
+   tsvector       *out;
+   WordEntry *ptr;
+   WordEntry *ptr1,*ptr2;
+   WordEntryPos *p;
+   int maxpos=0,i,j,i1,i2;
+   char *cur;
+   char *data,*data1,*data2;
+
+   ptr=ARRPTR(in1);
+   i=in1->size;
+   while(i--) {
+       if ( (j=POSDATALEN(in1,ptr)) != 0 ) {
+           p=POSDATAPTR(in1,ptr);
+           while(j--) {
+               if ( p->pos > maxpos ) 
+                   maxpos = p->pos;
+               p++;
+           }
+       }
+       ptr++;
+   }
+   
+   ptr1=ARRPTR(in1); ptr2=ARRPTR(in2);
+   data1=STRPTR(in1); data2=STRPTR(in2);
+   i1=in1->size;   i2=in2->size;
+   out=(tsvector*)palloc( in1->len + in2->len );
+   memset(out,0,in1->len + in2->len);
+   out->len = in1->len + in2->len;
+   out->size = in1->size + in2->size;
+   data=cur=STRPTR(out);
+   ptr=ARRPTR(out);
+   while( i1 && i2 ) {
+       int cmp=compareEntry(data1,ptr1,data2,ptr2);
+       if ( cmp < 0 ) { /* in1 first */
+           ptr->haspos = ptr1->haspos;
+           ptr->len = ptr1->len;
+           memcpy( cur, data1 + ptr1->pos, ptr1->len );
+           ptr->pos = cur - data; 
+           cur+=SHORTALIGN(ptr1->len);
+           if ( ptr->haspos ) {
+               memcpy(cur, _POSDATAPTR(in1, ptr1), POSDATALEN(in1, ptr1)*sizeof(WordEntryPos) + sizeof(uint16));
+               cur+=POSDATALEN(in1, ptr1)*sizeof(WordEntryPos) + sizeof(uint16);
+           }
+           ptr++; ptr1++; i1--;
+       } else if ( cmp>0 ) { /* in2 first */ 
+           ptr->haspos = ptr2->haspos;
+           ptr->len = ptr2->len;
+           memcpy( cur, data2 + ptr2->pos, ptr2->len );
+           ptr->pos = cur - data; 
+           cur+=SHORTALIGN(ptr2->len);
+           if ( ptr->haspos ) {
+               int addlen = add_pos(in2, ptr2, out, ptr, maxpos );
+               if ( addlen == 0 )
+                   ptr->haspos=0;
+               else
+                   cur += addlen*sizeof(WordEntryPos) + sizeof(uint16); 
+           }
+           ptr++; ptr2++; i2--;
+       } else {
+           ptr->haspos = ptr1->haspos | ptr2->haspos;
+           ptr->len = ptr1->len;
+           memcpy( cur, data1 + ptr1->pos, ptr1->len );
+           ptr->pos = cur - data; 
+           cur+=SHORTALIGN(ptr1->len);
+           if ( ptr->haspos ) {
+               if ( ptr1->haspos ) {
+                   memcpy(cur, _POSDATAPTR(in1, ptr1), POSDATALEN(in1, ptr1)*sizeof(WordEntryPos) + sizeof(uint16));
+                   cur+=POSDATALEN(in1, ptr1)*sizeof(WordEntryPos) + sizeof(uint16);
+                   if ( ptr2->haspos )
+                       cur += add_pos(in2, ptr2, out, ptr, maxpos )*sizeof(WordEntryPos);
+               } else if ( ptr2->haspos ) {
+                   int addlen = add_pos(in2, ptr2, out, ptr, maxpos );
+                   if ( addlen == 0 )
+                       ptr->haspos=0;
+                   else
+                       cur += addlen*sizeof(WordEntryPos) + sizeof(uint16); 
+               }
+           }
+           ptr++; ptr1++; ptr2++; i1--; i2--;
+       }
+   }
+
+   while(i1) {
+       ptr->haspos = ptr1->haspos;
+       ptr->len = ptr1->len;
+       memcpy( cur, data1 + ptr1->pos, ptr1->len );
+       ptr->pos = cur - data; 
+       cur+=SHORTALIGN(ptr1->len);
+       if ( ptr->haspos ) {
+           memcpy(cur, _POSDATAPTR(in1, ptr1), POSDATALEN(in1, ptr1)*sizeof(WordEntryPos) + sizeof(uint16));
+           cur+=POSDATALEN(in1, ptr1)*sizeof(WordEntryPos) + sizeof(uint16);
+       }
+       ptr++; ptr1++; i1--;
+   }
+
+   while(i2) {
+       ptr->haspos = ptr2->haspos;
+       ptr->len = ptr2->len;
+       memcpy( cur, data2 + ptr2->pos, ptr2->len );
+       ptr->pos = cur - data; 
+       cur+=SHORTALIGN(ptr2->len);
+       if ( ptr->haspos ) {
+           int addlen = add_pos(in2, ptr2, out, ptr, maxpos );
+           if ( addlen == 0 )
+               ptr->haspos=0;
+           else
+               cur += addlen*sizeof(WordEntryPos) + sizeof(uint16); 
+       }
+       ptr++; ptr2++; i2--;
+   }
+   
+   out->size=ptr-ARRPTR(out);
+   out->len = CALCDATASIZE( out->size, cur-data );
+   if ( data != STRPTR(out) )
+       memmove( STRPTR(out), data, cur-data );
+
+   PG_FREE_IF_COPY(in1, 0);
+   PG_FREE_IF_COPY(in2, 1);
+   PG_RETURN_POINTER(out);
+}
+


diff --git a/contrib/tsearch2/untsearch.sql.in b/contrib/tsearch2/untsearch.sql.in

new file mode 100644 (file)

index 0000000..a4fe145


--- /dev/null
+++ b/contrib/tsearch2/untsearch.sql.in
@@ -0,0 +1,62 @@
+BEGIN;
+
+--Be careful !!!
+--script drops all indices, triggers and columns with types defined
+--in tsearch2.sql
+
+
+DROP OPERATOR CLASS gist_tsvector_ops USING gist CASCADE;
+
+
+DROP OPERATOR || (tsvector, tsvector);
+DROP OPERATOR @@ (tsvector, tsquery);
+DROP OPERATOR @@ (tsquery, tsvector);
+
+DROP AGGREGATE stat(tsvector);
+
+DROP TABLE pg_ts_dict;
+DROP TABLE pg_ts_parser;
+DROP TABLE pg_ts_cfg;
+DROP TABLE pg_ts_cfgmap;
+
+DROP TYPE tokentype CASCADE;
+DROP TYPE tokenout CASCADE;
+DROP TYPE tsvector CASCADE;
+DROP TYPE tsquery CASCADE;
+DROP TYPE gtsvector CASCADE;
+DROP TYPE tsstat CASCADE;
+DROP TYPE statinfo CASCADE;
+
+DROP FUNCTION lexize(oid, text) ;
+DROP FUNCTION lexize(text, text);
+DROP FUNCTION lexize(text);
+DROP FUNCTION set_curdict(int);
+DROP FUNCTION set_curdict(text);
+DROP FUNCTION dex_init(text);
+DROP FUNCTION dex_lexize(internal,internal,int4);
+DROP FUNCTION snb_en_init(text);
+DROP FUNCTION snb_lexize(internal,internal,int4);
+DROP FUNCTION snb_ru_init(text);
+DROP FUNCTION spell_init(text);
+DROP FUNCTION spell_lexize(internal,internal,int4);
+DROP FUNCTION syn_init(text);
+DROP FUNCTION syn_lexize(internal,internal,int4);
+DROP FUNCTION set_curprs(int);
+DROP FUNCTION set_curprs(text);
+DROP FUNCTION prsd_start(internal,int4);
+DROP FUNCTION prsd_getlexeme(internal,internal,internal);
+DROP FUNCTION prsd_end(internal);
+DROP FUNCTION prsd_lextype(internal);
+DROP FUNCTION prsd_headline(internal,internal,internal);
+DROP FUNCTION set_curcfg(int);
+DROP FUNCTION set_curcfg(text);
+DROP FUNCTION show_curcfg();
+DROP FUNCTION gtsvector_compress(internal);
+DROP FUNCTION gtsvector_decompress(internal);
+DROP FUNCTION gtsvector_penalty(internal,internal,internal);
+DROP FUNCTION gtsvector_picksplit(internal, internal);
+DROP FUNCTION gtsvector_union(bytea, internal);
+DROP FUNCTION reset_tsearch();
+DROP FUNCTION tsearch2() CASCADE;
+
+END;


diff --git a/contrib/tsearch2/wordparser/deflex.c b/contrib/tsearch2/wordparser/deflex.c

new file mode 100644 (file)

index 0000000..ea596c5


--- /dev/null
+++ b/contrib/tsearch2/wordparser/deflex.c
@@ -0,0 +1,56 @@
+#include "deflex.h"
+
+const char *lex_descr[]={
+   "",
+   "Latin word",
+   "Non-latin word",
+   "Word",
+   "Email",
+   "URL",
+   "Host",
+   "Scientific notation",
+   "VERSION",
+   "Part of hyphenated word",
+   "Non-latin part of hyphenated word",
+   "Latin part of hyphenated word",
+   "Space symbols",
+   "HTML Tag",
+   "HTTP head",
+   "Hyphenated word",
+   "Latin hyphenated word",
+   "Non-latin hyphenated word",
+   "URI",
+   "File or path name",
+   "Decimal notation",
+   "Signed integer",
+   "Unsigned integer",
+   "HTML Entity"
+};
+
+const char *tok_alias[]={
+   "",
+   "lword",
+   "nlword",
+   "word",
+   "email",
+   "url",
+   "host",
+   "sfloat",
+   "version",
+   "part_hword",
+   "nlpart_hword",
+   "lpart_hword",
+   "blank",
+   "tag",
+   "http",
+   "hword",
+   "lhword",
+   "nlhword",
+   "uri",
+   "file",
+   "float",
+   "int",
+   "uint",
+   "entity"
+};
+


diff --git a/contrib/tsearch2/wordparser/deflex.h b/contrib/tsearch2/wordparser/deflex.h

new file mode 100644 (file)

index 0000000..651d1f9


--- /dev/null
+++ b/contrib/tsearch2/wordparser/deflex.h
@@ -0,0 +1,34 @@
+#ifndef __DEFLEX_H__
+#define __DEFLEX_H__
+
+/* rememder !!!! */
+#define LASTNUM        23
+
+#define LATWORD        1
+#define CYRWORD        2
+#define UWORD      3
+#define EMAIL      4
+#define FURL       5
+#define HOST       6
+#define SCIENTIFIC 7
+#define VERSIONNUMBER  8
+#define PARTHYPHENWORD 9
+#define CYRPARTHYPHENWORD  10
+#define LATPARTHYPHENWORD  11
+#define SPACE      12
+#define TAG            13
+#define HTTP       14
+#define HYPHENWORD 15
+#define LATHYPHENWORD  16
+#define CYRHYPHENWORD  17
+#define URI        18
+#define FILEPATH   19
+#define DECIMAL        20
+#define SIGNEDINT  21
+#define UNSIGNEDINT 22
+#define HTMLENTITY 23
+
+extern const char *lex_descr[];
+extern const char *tok_alias[];
+
+#endif


diff --git a/contrib/tsearch2/wordparser/parser.h b/contrib/tsearch2/wordparser/parser.h

new file mode 100644 (file)

index 0000000..55cf005


--- /dev/null
+++ b/contrib/tsearch2/wordparser/parser.h
@@ -0,0 +1,11 @@
+#ifndef __PARSER_H__
+#define __PARSER_H__
+
+char      *token;
+int            tokenlen;
+int            tsearch2_yylex(void);
+void       start_parse_str(char *, int);
+void       start_parse_fh(FILE *, int);
+void       end_parse(void);
+
+#endif


diff --git a/contrib/tsearch2/wordparser/parser.l b/contrib/tsearch2/wordparser/parser.l

new file mode 100644 (file)

index 0000000..49824f5


--- /dev/null
+++ b/contrib/tsearch2/wordparser/parser.l
@@ -0,0 +1,346 @@
+%{
+#include "postgres.h"
+
+#include "deflex.h"
+#include "parser.h"
+#include "common.h"
+
+/* Avoid exit() on fatal scanner errors */
+#define fprintf(file, fmt, msg)  ts_error(ERROR, fmt, msg)
+
+/* postgres allocation function */
+#define free    pfree
+#define malloc  palloc
+#define realloc repalloc
+
+#ifdef strdup
+#undef strdup
+#endif
+#define strdup  pstrdup
+
+char *token = NULL;  /* pointer to token */
+char *s     = NULL;  /* to return WHOLE hyphenated-word */
+
+YY_BUFFER_STATE buf = NULL; /* buffer to parse; it need for parse from string */
+
+int lrlimit = -1;  /* for limiting read from filehandle ( -1 - unlimited read ) */
+int bytestoread = 0;   /* for limiting read from filehandle */
+
+/* redefine macro for read limited length */
+#define YY_INPUT(buf,result,max_size) \
+   if ( yy_current_buffer->yy_is_interactive ) { \
+                int c = '*', n; \
+                for ( n = 0; n < max_size && \
+                             (c = getc( tsearch2_yyin )) != EOF && c != '\n'; ++n ) \
+                        buf[n] = (char) c; \
+                if ( c == '\n' ) \
+                        buf[n++] = (char) c; \
+                if ( c == EOF && ferror( tsearch2_yyin ) ) \
+                        YY_FATAL_ERROR( "input in flex scanner failed" ); \
+                result = n; \
+        }  else { \
+       if ( lrlimit == 0 ) \
+           result=YY_NULL; \
+       else { \
+           if ( lrlimit>0 ) { \
+               bytestoread = ( lrlimit > max_size ) ? max_size : lrlimit; \
+               lrlimit -= bytestoread; \
+           } else \
+               bytestoread = max_size; \
+               if ( ((result = fread( buf, 1, bytestoread, tsearch2_yyin )) == 0) \
+                       && ferror( tsearch2_yyin ) ) \
+                       YY_FATAL_ERROR( "input in flex scanner failed" ); \
+       } \
+   }
+
+%}
+
+%option 8bit
+%option never-interactive
+%option nounput
+%option noyywrap
+
+/* parser's state for parsing hyphenated-word */
+%x DELIM  
+/* parser's state for parsing URL*/
+%x URL  
+%x SERVER  
+
+/* parser's state for parsing TAGS */
+%x INTAG
+%x QINTAG
+%x INCOMMENT
+%x INSCRIPT
+
+/* cyrillic koi8 char */
+CYRALNUM   [0-9\200-\377]
+CYRALPHA   [\200-\377]
+ALPHA      [a-zA-Z\200-\377]
+ALNUM      [0-9a-zA-Z\200-\377]
+
+
+HOSTNAME   ([-_[:alnum:]]+\.)+[[:alpha:]]+
+URI        [-_[:alnum:]/%,\.;=&?#]+
+
+%%
+
+"<"[Ss][Cc][Rr][Ii][Pp][Tt] { BEGIN INSCRIPT; }
+
+"" {
+   BEGIN INITIAL; 
+   *tsearch2_yytext=' '; *(tsearch2_yytext+1) = '\0'; 
+   token = tsearch2_yytext;
+   tokenlen = tsearch2_yyleng;
+   return SPACE;
+}
+
+""   { 
+   BEGIN INITIAL;
+   *tsearch2_yytext=' '; *(tsearch2_yytext+1) = '\0'; 
+   token = tsearch2_yytext;
+   tokenlen = tsearch2_yyleng;
+   return SPACE;
+}
+
+
+"<"[\![:alpha:]]   { BEGIN INTAG; }
+
+"
+
+"\""    { BEGIN QINTAG; }
+
+"\\\"" ;
+
+"\""   { BEGIN INTAG; }
+
+">" { 
+   BEGIN INITIAL;
+   token = tsearch2_yytext;
+   *tsearch2_yytext=' '; 
+   token = tsearch2_yytext;
+   tokenlen = 1;
+   return TAG;
+}
+
+.|\n  ;
+
+\&(quot|amp|nbsp|lt|gt)\;   {
+   token = tsearch2_yytext;
+   tokenlen = tsearch2_yyleng;
+   return HTMLENTITY;
+}
+
+\&\#[0-9][0-9]?[0-9]?\; {
+   token = tsearch2_yytext;
+   tokenlen = tsearch2_yyleng;
+   return HTMLENTITY;
+}
+ 
+[-_\.[:alnum:]]+@{HOSTNAME}  /* Emails */ { 
+   token = tsearch2_yytext; 
+   tokenlen = tsearch2_yyleng;
+   return EMAIL; 
+}
+
+[+-]?[0-9]+(\.[0-9]+)?[eEdD][+-]?[0-9]+  /* float */   { 
+   token = tsearch2_yytext; 
+   tokenlen = tsearch2_yyleng;
+   return SCIENTIFIC; 
+}
+
+[0-9]+\.[0-9]+\.[0-9\.]*[0-9] {
+   token = tsearch2_yytext;
+   tokenlen = tsearch2_yyleng;
+   return VERSIONNUMBER;
+}
+
+[+-]?[0-9]+\.[0-9]+ {
+   token = tsearch2_yytext;
+   tokenlen = tsearch2_yyleng;
+   return DECIMAL;
+}
+
+[+-][0-9]+ { 
+   token = tsearch2_yytext; 
+   tokenlen = tsearch2_yyleng;
+   return SIGNEDINT; 
+}
+
+[0-9]+ { 
+   token = tsearch2_yytext; 
+   tokenlen = tsearch2_yyleng;
+   return UNSIGNEDINT; 
+}
+
+http"://"        { 
+   BEGIN URL; 
+   token = tsearch2_yytext;
+   tokenlen = tsearch2_yyleng;
+   return HTTP;
+}
+
+ftp"://"        { 
+   BEGIN URL; 
+   token = tsearch2_yytext;
+   tokenlen = tsearch2_yyleng;
+   return HTTP;
+}
+
+{HOSTNAME}[/:]{URI} { 
+   BEGIN SERVER;
+   if (s) { free(s); s=NULL; } 
+   s = strdup( tsearch2_yytext ); 
+   tokenlen = tsearch2_yyleng;
+   yyless( 0 ); 
+   token = s;
+   return FURL;
+}
+
+{HOSTNAME} {
+   token = tsearch2_yytext; 
+   tokenlen = tsearch2_yyleng;
+   return HOST;
+}
+
+[/:]{URI}  {
+   token = tsearch2_yytext;
+   tokenlen = tsearch2_yyleng;
+   return URI;
+}
+
+[[:alnum:]\./_-]+"/"[[:alnum:]\./_-]+ {
+   token = tsearch2_yytext;
+   tokenlen = tsearch2_yyleng;
+   return FILEPATH;
+}
+
+({CYRALPHA}+-)+{CYRALPHA}+ /* composite-word */    {
+   BEGIN DELIM;
+   if (s) { free(s); s=NULL; } 
+   s = strdup( tsearch2_yytext );
+   tokenlen = tsearch2_yyleng;
+   yyless( 0 );
+   token = s;
+   return CYRHYPHENWORD;
+}
+
+([[:alpha:]]+-)+[[:alpha:]]+ /* composite-word */  {
+    BEGIN DELIM;
+   if (s) { free(s); s=NULL; } 
+   s = strdup( tsearch2_yytext );
+   tokenlen = tsearch2_yyleng;
+   yyless( 0 );
+   token = s;
+   return LATHYPHENWORD;
+}
+
+({ALNUM}+-)+{ALNUM}+ /* composite-word */  {
+   BEGIN DELIM;
+   if (s) { free(s); s=NULL; } 
+   s = strdup( tsearch2_yytext );
+   tokenlen = tsearch2_yyleng;
+   yyless( 0 );
+   token = s;
+   return HYPHENWORD;
+}
+
+[0-9]+\.[0-9]+\.[0-9\.]*[0-9] {
+   token = tsearch2_yytext;
+   tokenlen = tsearch2_yyleng;
+   return VERSIONNUMBER;
+}
+
+\+?[0-9]+\.[0-9]+ {
+   token = tsearch2_yytext;
+   tokenlen = tsearch2_yyleng;
+   return DECIMAL;
+}
+
+{CYRALPHA}+  /* one word in composite-word */   { 
+   token = tsearch2_yytext; 
+   tokenlen = tsearch2_yyleng;
+   return CYRPARTHYPHENWORD; 
+}
+
+[[:alpha:]]+  /* one word in composite-word */  { 
+   token = tsearch2_yytext; 
+   tokenlen = tsearch2_yyleng;
+   return LATPARTHYPHENWORD; 
+}
+
+{ALNUM}+  /* one word in composite-word */  { 
+   token = tsearch2_yytext; 
+   tokenlen = tsearch2_yyleng;
+   return PARTHYPHENWORD; 
+}
+
+-  { 
+   token = tsearch2_yytext;
+   tokenlen = tsearch2_yyleng;
+   return SPACE;
+}
+
+.|\n /* return in basic state */ {
+   BEGIN INITIAL;
+   yyless( 0 );
+}
+
+{CYRALPHA}+ /* normal word */  { 
+   token = tsearch2_yytext; 
+   tokenlen = tsearch2_yyleng;
+   return CYRWORD; 
+}
+
+[[:alpha:]]+ /* normal word */ { 
+   token = tsearch2_yytext; 
+   tokenlen = tsearch2_yyleng;
+   return LATWORD; 
+}
+
+{ALNUM}+ /* normal word */ { 
+   token = tsearch2_yytext; 
+   tokenlen = tsearch2_yyleng;
+   return UWORD; 
+}
+
+[ \r\n\t]+ {
+   token = tsearch2_yytext;
+   tokenlen = tsearch2_yyleng;
+   return SPACE;
+}
+
+. {
+   token = tsearch2_yytext;
+   tokenlen = tsearch2_yyleng;
+   return SPACE;
+} 
+
+%%
+
+/* clearing after parsing from string */
+void end_parse() {
+   if (s) { free(s); s=NULL; } 
+   tsearch2_yy_delete_buffer( buf );
+   buf = NULL;
+} 
+
+/* start parse from string */
+void start_parse_str(char* str, int limit) {
+   if (buf) end_parse();
+   buf = tsearch2_yy_scan_bytes( str, limit );
+   tsearch2_yy_switch_to_buffer( buf );
+   BEGIN INITIAL;
+}
+
+/* start parse from filehandle */
+void start_parse_fh( FILE* fh, int limit ) {
+   if (buf) end_parse();
+   lrlimit = ( limit ) ? limit : -1;
+   buf = tsearch2_yy_create_buffer( fh, YY_BUF_SIZE );
+   tsearch2_yy_switch_to_buffer( buf );
+   BEGIN INITIAL;
+}
+
+


diff --git a/contrib/tsearch2/wparser.c b/contrib/tsearch2/wparser.c

new file mode 100644 (file)

index 0000000..deff94c


--- /dev/null
+++ b/contrib/tsearch2/wparser.c
@@ -0,0 +1,529 @@
+/* 
+ * interface functions to parser 
+ * Teodor Sigaev 
+ */
+#include 
+#include 
+#include 
+#include 
+
+#include "postgres.h"
+#include "fmgr.h"
+#include "utils/array.h"
+#include "catalog/pg_type.h"
+#include "executor/spi.h"
+#include "funcapi.h"
+
+#include "wparser.h"
+#include "ts_cfg.h"
+#include "snmap.h"
+#include "common.h"
+
+/*********top interface**********/
+
+static void *plan_getparser=NULL;
+static Oid current_parser_id=InvalidOid;
+
+void
+init_prs(Oid id, WParserInfo *prs) {
+   Oid arg[1]={ OIDOID };
+   bool isnull;
+   Datum pars[1]={ ObjectIdGetDatum(id) };
+   int stat;
+
+   memset(prs,0,sizeof(WParserInfo));
+   SPI_connect();
+   if ( !plan_getparser ) {
+       plan_getparser = SPI_saveplan( SPI_prepare( "select prs_start, prs_nexttoken, prs_end, prs_lextype, prs_headline from pg_ts_parser where oid = $1" , 1, arg ) );
+       if ( !plan_getparser ) 
+           ts_error(ERROR, "SPI_prepare() failed");
+   }
+
+   stat = SPI_execp(plan_getparser, pars, " ", 1);
+   if ( stat < 0 )
+       ts_error (ERROR, "SPI_execp return %d", stat);
+   if ( SPI_processed > 0 ) {
+       Oid oid=InvalidOid;
+       oid=DatumGetObjectId( SPI_getbinval(SPI_tuptable->vals[0], SPI_tuptable->tupdesc, 1, &isnull) );
+       fmgr_info_cxt(oid, &(prs->start_info), TopMemoryContext);
+       oid=DatumGetObjectId( SPI_getbinval(SPI_tuptable->vals[0], SPI_tuptable->tupdesc, 2, &isnull) );
+       fmgr_info_cxt(oid, &(prs->getlexeme_info), TopMemoryContext);
+       oid=DatumGetObjectId( SPI_getbinval(SPI_tuptable->vals[0], SPI_tuptable->tupdesc, 3, &isnull) );
+       fmgr_info_cxt(oid, &(prs->end_info), TopMemoryContext);
+       prs->lextype=DatumGetObjectId( SPI_getbinval(SPI_tuptable->vals[0], SPI_tuptable->tupdesc, 4, &isnull) );
+       oid=DatumGetObjectId( SPI_getbinval(SPI_tuptable->vals[0], SPI_tuptable->tupdesc, 5, &isnull) );
+       fmgr_info_cxt(oid, &(prs->headline_info), TopMemoryContext);
+       prs->prs_id=id;
+   } else 
+       ts_error(ERROR, "No parser with id %d", id);
+   SPI_finish();
+}
+
+typedef struct {
+   WParserInfo *last_prs;
+   int     len;
+   int     reallen;
+   WParserInfo *list;
+   SNMap       name2id_map;
+} PrsList;
+
+static PrsList PList = {NULL,0,0,NULL,{0,0,NULL}};
+
+void    
+reset_prs(void) {
+   freeSNMap( &(PList.name2id_map) );
+   if ( PList.list )
+       free(PList.list);
+   memset(&PList,0,sizeof(PrsList));
+}
+
+static int
+compareprs(const void *a, const void *b) {
+   return ((WParserInfo*)a)->prs_id - ((WParserInfo*)b)->prs_id;
+}
+
+WParserInfo *
+findprs(Oid id) {
+   /* last used prs */
+   if ( PList.last_prs && PList.last_prs->prs_id==id )
+       return PList.last_prs;
+
+   /* already used prs */
+   if ( PList.len != 0 ) {
+       WParserInfo key;
+       key.prs_id=id;
+       PList.last_prs = bsearch(&key, PList.list, PList.len, sizeof(WParserInfo), compareprs);
+       if ( PList.last_prs != NULL )
+           return PList.last_prs;
+   }
+
+   /* last chance */
+   if ( PList.len==PList.reallen ) {
+       WParserInfo *tmp;
+       int reallen = ( PList.reallen ) ? 2*PList.reallen : 16;
+       tmp=(WParserInfo*)realloc(PList.list,sizeof(WParserInfo)*reallen);
+       if ( !tmp ) 
+           ts_error(ERROR,"No memory");
+       PList.reallen=reallen;
+       PList.list=tmp;
+   }
+   PList.last_prs=&(PList.list[PList.len]);
+   init_prs(id, PList.last_prs);
+   PList.len++;
+   qsort(PList.list, PList.len, sizeof(WParserInfo), compareprs);
+   return findprs(id); /* qsort changed order!! */;
+}
+
+static void *plan_name2id=NULL;
+
+Oid
+name2id_prs(text *name) {
+   Oid arg[1]={ TEXTOID };
+   bool isnull;
+   Datum pars[1]={ PointerGetDatum(name) };
+   int stat;
+   Oid id=findSNMap_t( &(PList.name2id_map), name );
+   
+   if ( id ) 
+       return id;
+   
+
+   SPI_connect();
+   if ( !plan_name2id ) {
+       plan_name2id = SPI_saveplan( SPI_prepare( "select oid from pg_ts_parser where prs_name = $1" , 1, arg ) );
+       if ( !plan_name2id ) 
+           ts_error(ERROR, "SPI_prepare() failed");
+   }
+
+   stat = SPI_execp(plan_name2id, pars, " ", 1);
+   if ( stat < 0 )
+       ts_error (ERROR, "SPI_execp return %d", stat);
+   if ( SPI_processed > 0 )
+       id=DatumGetObjectId( SPI_getbinval(SPI_tuptable->vals[0], SPI_tuptable->tupdesc, 1, &isnull) );
+   else 
+       ts_error(ERROR, "No parser '%s'", text2char(name));
+   SPI_finish();
+   addSNMap_t( &(PList.name2id_map), name, id );
+   return id;
+}
+
+
+/******sql-level interface******/
+typedef struct {
+   int     cur;
+   LexDescr    *list;
+} TypeStorage;
+
+static void
+setup_firstcall(FuncCallContext  *funcctx, Oid prsid) {
+   TupleDesc            tupdesc;
+   MemoryContext     oldcontext;
+   TypeStorage     *st;
+   WParserInfo *prs = findprs(prsid); 
+
+   oldcontext = MemoryContextSwitchTo(funcctx->multi_call_memory_ctx);
+
+   st=(TypeStorage*)palloc( sizeof(TypeStorage) );
+   st->cur=0;
+   st->list = (LexDescr*)DatumGetPointer(
+       OidFunctionCall1( prs->lextype, PointerGetDatum(prs->prs) )
+   );
+   funcctx->user_fctx = (void*)st;
+   tupdesc = RelationNameGetTupleDesc("tokentype");
+   funcctx->slot = TupleDescGetSlot(tupdesc);
+   funcctx->attinmeta = TupleDescGetAttInMetadata(tupdesc);
+   MemoryContextSwitchTo(oldcontext);
+}
+
+static Datum
+process_call(FuncCallContext  *funcctx) {
+   TypeStorage     *st;
+
+   st=(TypeStorage*)funcctx->user_fctx;
+   if (  st->list && st->list[st->cur].lexid ) {
+       Datum result;
+       char* values[3];
+       char    txtid[16];
+       HeapTuple    tuple;
+
+       values[0]=txtid;
+       sprintf(txtid,"%d",st->list[st->cur].lexid);
+       values[1]=st->list[st->cur].alias;
+       values[2]=st->list[st->cur].descr;
+
+       tuple = BuildTupleFromCStrings(funcctx->attinmeta, values);
+       result = TupleGetDatum(funcctx->slot, tuple);
+
+       pfree(values[1]);
+       pfree(values[2]);
+       st->cur++;
+       return result;
+   } else {
+       if ( st->list ) pfree(st->list);
+       pfree(st);
+   }
+   return (Datum)0;
+}
+
+PG_FUNCTION_INFO_V1(token_type);
+Datum token_type(PG_FUNCTION_ARGS);
+
+Datum
+token_type(PG_FUNCTION_ARGS) {
+   FuncCallContext  *funcctx;
+   Datum result;
+
+   if (SRF_IS_FIRSTCALL()) { 
+       funcctx = SRF_FIRSTCALL_INIT();
+       setup_firstcall(funcctx, PG_GETARG_OID(0) );
+   }
+
+   funcctx = SRF_PERCALL_SETUP();
+
+   if (  (result=process_call(funcctx)) != (Datum)0 )
+       SRF_RETURN_NEXT(funcctx, result);
+   SRF_RETURN_DONE(funcctx);
+}
+
+PG_FUNCTION_INFO_V1(token_type_byname);
+Datum token_type_byname(PG_FUNCTION_ARGS);
+Datum
+token_type_byname(PG_FUNCTION_ARGS) {
+   FuncCallContext  *funcctx;
+   Datum result;
+
+   if (SRF_IS_FIRSTCALL()) {
+       text *name = PG_GETARG_TEXT_P(0); 
+       funcctx = SRF_FIRSTCALL_INIT();
+       setup_firstcall(funcctx, name2id_prs( name ) );
+       PG_FREE_IF_COPY(name,0);
+   }
+
+   funcctx = SRF_PERCALL_SETUP();
+
+   if (  (result=process_call(funcctx)) != (Datum)0 )
+       SRF_RETURN_NEXT(funcctx, result);
+   SRF_RETURN_DONE(funcctx);
+}
+
+PG_FUNCTION_INFO_V1(token_type_current);
+Datum token_type_current(PG_FUNCTION_ARGS);
+Datum
+token_type_current(PG_FUNCTION_ARGS) {
+   FuncCallContext  *funcctx;
+   Datum result;
+
+   if (SRF_IS_FIRSTCALL()) {
+       funcctx = SRF_FIRSTCALL_INIT();
+       if ( current_parser_id==InvalidOid ) 
+           current_parser_id = name2id_prs( char2text("default") );
+       setup_firstcall(funcctx, current_parser_id );
+   }
+
+   funcctx = SRF_PERCALL_SETUP();
+
+   if (  (result=process_call(funcctx)) != (Datum)0 )
+       SRF_RETURN_NEXT(funcctx, result);
+   SRF_RETURN_DONE(funcctx);
+}
+
+
+PG_FUNCTION_INFO_V1(set_curprs);
+Datum set_curprs(PG_FUNCTION_ARGS);
+Datum
+set_curprs(PG_FUNCTION_ARGS) {
+        findprs(PG_GETARG_OID(0));
+        current_parser_id=PG_GETARG_OID(0);
+        PG_RETURN_VOID();
+}
+
+PG_FUNCTION_INFO_V1(set_curprs_byname);
+Datum set_curprs_byname(PG_FUNCTION_ARGS);
+Datum
+set_curprs_byname(PG_FUNCTION_ARGS) {
+        text *name=PG_GETARG_TEXT_P(0);
+    
+        DirectFunctionCall1(
+                set_curprs,
+                ObjectIdGetDatum( name2id_prs(name) )
+        );
+        PG_FREE_IF_COPY(name, 0);
+        PG_RETURN_VOID();
+}
+
+typedef struct {
+   int type;
+   char    *lexem;
+} LexemEntry;
+
+typedef struct {
+   int cur;
+   int len;
+   LexemEntry  *list;
+} PrsStorage;
+   
+
+static void
+prs_setup_firstcall(FuncCallContext  *funcctx, int prsid, text *txt) {
+   TupleDesc            tupdesc;
+   MemoryContext     oldcontext;
+   PrsStorage  *st;
+   WParserInfo *prs = findprs(prsid); 
+   char    *lex=NULL;
+   int     llen=0, type=0; 
+
+   oldcontext = MemoryContextSwitchTo(funcctx->multi_call_memory_ctx);
+
+   st=(PrsStorage*)palloc( sizeof(PrsStorage) );
+   st->cur=0;
+   st->len=16;
+   st->list=(LexemEntry*)palloc( sizeof(LexemEntry)*st->len );
+
+   prs->prs = (void*)DatumGetPointer(
+       FunctionCall2(
+           &(prs->start_info),
+           PointerGetDatum(VARDATA(txt)),
+           Int32GetDatum(VARSIZE(txt)-VARHDRSZ)
+       )
+   );
+
+   while( ( type=DatumGetInt32(FunctionCall3(
+           &(prs->getlexeme_info),
+           PointerGetDatum(prs->prs),
+           PointerGetDatum(&lex),
+           PointerGetDatum(&llen))) ) != 0 ) {
+
+       if ( st->cur>=st->len ) {
+           st->len=2*st->len;
+           st->list=(LexemEntry*)repalloc(st->list, sizeof(LexemEntry)*st->len);
+       }
+       st->list[st->cur].lexem = palloc(llen+1);
+       memcpy( st->list[st->cur].lexem, lex, llen);
+       st->list[st->cur].lexem[llen]='\0';
+       st->list[st->cur].type=type;
+       st->cur++;
+   }
+       
+   FunctionCall1(
+       &(prs->end_info),
+       PointerGetDatum(prs->prs)
+   );
+
+   st->len=st->cur;
+   st->cur=0;
+   
+   funcctx->user_fctx = (void*)st;
+   tupdesc = RelationNameGetTupleDesc("tokenout");
+   funcctx->slot = TupleDescGetSlot(tupdesc);
+   funcctx->attinmeta = TupleDescGetAttInMetadata(tupdesc);
+   MemoryContextSwitchTo(oldcontext);
+}
+
+static Datum
+prs_process_call(FuncCallContext  *funcctx) {
+   PrsStorage  *st;
+
+   st=(PrsStorage*)funcctx->user_fctx;
+   if (  st->cur < st->len ) {
+       Datum result;
+       char* values[2];
+       char    tid[16];
+       HeapTuple    tuple;
+
+       values[0]=tid;
+       sprintf(tid,"%d",st->list[st->cur].type);
+       values[1]=st->list[st->cur].lexem;
+       tuple = BuildTupleFromCStrings(funcctx->attinmeta, values);
+       result = TupleGetDatum(funcctx->slot, tuple);
+
+       pfree(values[1]);
+       st->cur++;
+       return result;
+   } else {
+       if ( st->list ) pfree(st->list);
+       pfree(st);
+   }
+   return (Datum)0;
+}
+
+           
+
+PG_FUNCTION_INFO_V1(parse);
+Datum parse(PG_FUNCTION_ARGS);
+Datum
+parse(PG_FUNCTION_ARGS) {
+   FuncCallContext  *funcctx;
+   Datum result;
+
+   if (SRF_IS_FIRSTCALL()) {
+       text *txt = PG_GETARG_TEXT_P(1); 
+       funcctx = SRF_FIRSTCALL_INIT();
+       prs_setup_firstcall(funcctx, PG_GETARG_OID(0),txt );
+       PG_FREE_IF_COPY(txt,1);
+   }
+
+   funcctx = SRF_PERCALL_SETUP();
+
+   if (  (result=prs_process_call(funcctx)) != (Datum)0 )
+       SRF_RETURN_NEXT(funcctx, result);
+   SRF_RETURN_DONE(funcctx);
+}
+
+PG_FUNCTION_INFO_V1(parse_byname);
+Datum parse_byname(PG_FUNCTION_ARGS);
+Datum
+parse_byname(PG_FUNCTION_ARGS) {
+   FuncCallContext  *funcctx;
+   Datum result;
+
+   if (SRF_IS_FIRSTCALL()) {
+       text *name = PG_GETARG_TEXT_P(0); 
+       text *txt = PG_GETARG_TEXT_P(1); 
+       funcctx = SRF_FIRSTCALL_INIT();
+       prs_setup_firstcall(funcctx, name2id_prs( name ),txt );
+       PG_FREE_IF_COPY(name,0);
+       PG_FREE_IF_COPY(txt,1);
+   }
+
+   funcctx = SRF_PERCALL_SETUP();
+
+   if (  (result=prs_process_call(funcctx)) != (Datum)0 )
+       SRF_RETURN_NEXT(funcctx, result);
+   SRF_RETURN_DONE(funcctx);
+}
+
+
+PG_FUNCTION_INFO_V1(parse_current);
+Datum parse_current(PG_FUNCTION_ARGS);
+Datum
+parse_current(PG_FUNCTION_ARGS) {
+   FuncCallContext  *funcctx;
+   Datum result;
+
+   if (SRF_IS_FIRSTCALL()) {
+       text *txt = PG_GETARG_TEXT_P(0); 
+       funcctx = SRF_FIRSTCALL_INIT();
+       if ( current_parser_id==InvalidOid ) 
+           current_parser_id = name2id_prs( char2text("default") );
+       prs_setup_firstcall(funcctx, current_parser_id,txt );
+       PG_FREE_IF_COPY(txt,0);
+   }
+
+   funcctx = SRF_PERCALL_SETUP();
+
+   if (  (result=prs_process_call(funcctx)) != (Datum)0 )
+       SRF_RETURN_NEXT(funcctx, result);
+   SRF_RETURN_DONE(funcctx);
+}
+
+PG_FUNCTION_INFO_V1(headline);
+Datum headline(PG_FUNCTION_ARGS);
+Datum
+headline(PG_FUNCTION_ARGS) {
+   TSCfgInfo *cfg=findcfg(PG_GETARG_OID(0));
+   text       *in = PG_GETARG_TEXT_P(1);
+   QUERYTYPE  *query = (QUERYTYPE *) DatumGetPointer(PG_DETOAST_DATUM(PG_GETARG_DATUM(2)));
+   text       *opt=( PG_NARGS()>3 && PG_GETARG_POINTER(3) ) ? PG_GETARG_TEXT_P(3) : NULL;
+   HLPRSTEXT   prs;
+   text *out;
+   WParserInfo *prsobj = findprs(cfg->prs_id);
+
+   memset(&prs,0,sizeof(HLPRSTEXT));
+   prs.lenwords = 32;
+   prs.words = (HLWORD *) palloc(sizeof(HLWORD) * prs.lenwords);
+   hlparsetext(cfg, &prs, query, VARDATA(in), VARSIZE(in) - VARHDRSZ);
+
+
+   FunctionCall3(
+       &(prsobj->headline_info),
+       PointerGetDatum(&prs),
+       PointerGetDatum(opt),
+       PointerGetDatum(query)
+   );
+
+   out = genhl(&prs);
+
+   PG_FREE_IF_COPY(in,1);
+   PG_FREE_IF_COPY(query,2);
+   if ( opt ) PG_FREE_IF_COPY(opt,3);
+   pfree(prs.words);
+   pfree(prs.startsel);
+   pfree(prs.stopsel);
+
+   PG_RETURN_POINTER(out);
+}
+
+
+PG_FUNCTION_INFO_V1(headline_byname);
+Datum headline_byname(PG_FUNCTION_ARGS);
+Datum
+headline_byname(PG_FUNCTION_ARGS) {
+   text *cfg=PG_GETARG_TEXT_P(0);
+
+   Datum out=DirectFunctionCall4(
+       headline,
+       ObjectIdGetDatum(name2id_cfg( cfg ) ),
+       PG_GETARG_DATUM(1),
+       PG_GETARG_DATUM(2),
+       ( PG_NARGS()>3 ) ? PG_GETARG_DATUM(3) : PointerGetDatum(NULL)
+   );
+
+   PG_FREE_IF_COPY(cfg,0);
+   PG_RETURN_DATUM(out);   
+}
+
+PG_FUNCTION_INFO_V1(headline_current);
+Datum headline_current(PG_FUNCTION_ARGS);
+Datum
+headline_current(PG_FUNCTION_ARGS) {
+   PG_RETURN_DATUM(DirectFunctionCall4(
+       headline,
+       ObjectIdGetDatum(get_currcfg()),
+       PG_GETARG_DATUM(0),
+       PG_GETARG_DATUM(1),
+       ( PG_NARGS()>2 ) ? PG_GETARG_DATUM(2) : PointerGetDatum(NULL)
+   ));
+}
+
+
+


diff --git a/contrib/tsearch2/wparser.h b/contrib/tsearch2/wparser.h

new file mode 100644 (file)

index 0000000..a8afc56


--- /dev/null
+++ b/contrib/tsearch2/wparser.h
@@ -0,0 +1,28 @@
+#ifndef __WPARSER_H__
+#define __WPARSER_H__
+#include "postgres.h"
+#include "fmgr.h"
+
+typedef struct {
+   Oid prs_id;
+   FmgrInfo start_info;
+   FmgrInfo getlexeme_info;
+   FmgrInfo end_info;
+   FmgrInfo headline_info;
+   Oid lextype;
+   void *prs;
+} WParserInfo;
+
+void init_prs(Oid id, WParserInfo *prs);
+WParserInfo* findprs(Oid id);
+Oid name2id_prs(text *name);
+void   reset_prs(void);
+
+
+typedef struct {
+   int lexid;
+   char    *alias;
+   char    *descr;
+} LexDescr;
+
+#endif


diff --git a/contrib/tsearch2/wparser_def.c b/contrib/tsearch2/wparser_def.c

new file mode 100644 (file)

index 0000000..eec8b03


--- /dev/null
+++ b/contrib/tsearch2/wparser_def.c
@@ -0,0 +1,291 @@
+/* 
+ * default word parser 
+ * Teodor Sigaev 
+ */
+#include 
+#include 
+#include 
+
+#include "postgres.h"
+#include "utils/builtins.h"
+
+#include "dict.h"
+#include "wparser.h"
+#include "common.h"
+#include "ts_cfg.h"
+#include "wordparser/parser.h"
+#include "wordparser/deflex.h"
+
+PG_FUNCTION_INFO_V1(prsd_lextype);
+Datum prsd_lextype(PG_FUNCTION_ARGS);
+
+Datum 
+prsd_lextype(PG_FUNCTION_ARGS) {
+   LexDescr *descr=(LexDescr*)palloc(sizeof(LexDescr)*(LASTNUM+1));
+   int i;
+
+   for(i=1;i<=LASTNUM;i++) {
+       descr[i-1].lexid = i;
+       descr[i-1].alias = pstrdup(tok_alias[i]);
+       descr[i-1].descr = pstrdup(lex_descr[i]);
+   }
+   
+   descr[LASTNUM].lexid=0;
+       
+   PG_RETURN_POINTER(descr);
+}
+
+PG_FUNCTION_INFO_V1(prsd_start);
+Datum prsd_start(PG_FUNCTION_ARGS);
+Datum 
+prsd_start(PG_FUNCTION_ARGS) {
+   start_parse_str( (char*)PG_GETARG_POINTER(0), PG_GETARG_INT32(1) );
+   PG_RETURN_POINTER(NULL);
+}
+
+PG_FUNCTION_INFO_V1(prsd_getlexeme);
+Datum prsd_getlexeme(PG_FUNCTION_ARGS);
+Datum 
+prsd_getlexeme(PG_FUNCTION_ARGS) {
+   /* ParserState *p=(ParserState*)PG_GETARG_POINTER(0); */
+   char **t=(char**)PG_GETARG_POINTER(1); 
+   int *tlen=(int*)PG_GETARG_POINTER(2);
+   int  type=tsearch2_yylex();
+
+   *t = token;
+   *tlen = tokenlen;
+   PG_RETURN_INT32(type);
+}
+
+PG_FUNCTION_INFO_V1(prsd_end);
+Datum prsd_end(PG_FUNCTION_ARGS);
+Datum 
+prsd_end(PG_FUNCTION_ARGS) {
+   /* ParserState *p=(ParserState*)PG_GETARG_POINTER(0); */
+   end_parse();
+   PG_RETURN_VOID();
+}
+
+#define LEAVETOKEN(x)  ( (x)==12 )
+#define COMPLEXTOKEN(x)    ( (x)==5 || (x)==15 || (x)==16 || (x)==17 )
+#define ENDPUNCTOKEN(x)    ( (x)==12 )
+
+
+#define IDIGNORE(x) ( (x)==13 || (x)==14 || (x)==12 || (x)==23 )
+#define HLIDIGNORE(x) ( (x)==5 || (x)==13 || (x)==15 || (x)==16 || (x)==17 )
+#define NONWORDTOKEN(x)    ( (x)==12 || HLIDIGNORE(x) )
+#define NOENDTOKEN(x)  ( NONWORDTOKEN(x) || (x)==7 || (x)==8 || (x)==20 || (x)==21 || (x)==22 || IDIGNORE(x) )
+
+typedef struct {
+   HLWORD  *words;
+   int len;
+} hlCheck;
+
+static bool
+checkcondition_HL(void *checkval, ITEM *val) {
+   int i;
+   for(i=0;i<((hlCheck*)checkval)->len;i++) {
+       if ( ((hlCheck*)checkval)->words[i].item==val )
+           return true;
+   }
+   return false;
+}
+
+
+static bool
+hlCover(HLPRSTEXT *prs, QUERYTYPE *query, int *p, int *q) {
+   int i,j;
+   ITEM    *item=GETQUERY(query);
+   int pos=*p;
+   *q=0;
+   *p=0x7fffffff;
+
+   for(j=0;jsize;j++) {
+       if ( item->type != VAL ) {
+           item++;
+           continue;
+       }
+       for(i=pos;icurwords;i++) {
+           if ( prs->words[i].item == item ) {
+               if ( i>*q) 
+                   *q = i;
+               break;
+           }
+       }
+       item++;
+   }
+
+   if ( *q==0 )
+       return false;
+
+   item=GETQUERY(query);
+   for(j=0;jsize;j++) {
+       if ( item->type != VAL ) {
+           item++;
+           continue;
+       }
+       for(i=*q;i>=pos;i--) {
+           if ( prs->words[i].item == item ) {
+               if ( i<*p )
+                   *p=i;
+               break;
+           }
+       }
+       item++;
+   }   
+
+   if ( *p<=*q ) {
+       hlCheck ch={ &(prs->words[*p]), *q-*p+1 };
+       if ( TS_execute(GETQUERY(query), &ch, false, checkcondition_HL) ) { 
+           return true;
+       } else {
+           (*p)++;
+           return hlCover(prs,query,p,q);
+       }
+   }
+
+   return false;
+}
+
+PG_FUNCTION_INFO_V1(prsd_headline);
+Datum prsd_headline(PG_FUNCTION_ARGS);
+Datum 
+prsd_headline(PG_FUNCTION_ARGS) {
+   HLPRSTEXT   *prs=(HLPRSTEXT*)PG_GETARG_POINTER(0);
+   text    *opt=(text*)PG_GETARG_POINTER(1); /* can't be toasted */
+   QUERYTYPE   *query=(QUERYTYPE*)PG_GETARG_POINTER(2); /* can't be toasted */
+   /* from opt + start and and tag */
+   int min_words=15;   
+   int max_words=35;   
+   int shortword=3;    
+
+   int p=0,q=0;
+   int bestb=-1,beste=-1;
+   int bestlen=-1;
+   int pose=0, poslen, curlen;
+
+   int i;
+
+   /*config*/
+   prs->startsel=NULL;
+   prs->stopsel=NULL;
+   if ( opt ) {
+       Map *map,*mptr;
+       
+       parse_cfgdict(opt,&map);
+       mptr=map;
+
+       while(mptr && mptr->key) {
+           if ( strcasecmp(mptr->key,"MaxWords")==0 )
+               max_words=pg_atoi(mptr->value,4,1);
+           else if ( strcasecmp(mptr->key,"MinWords")==0 )
+               min_words=pg_atoi(mptr->value,4,1);
+           else if ( strcasecmp(mptr->key,"ShortWord")==0 )
+               shortword=pg_atoi(mptr->value,4,1);
+           else if ( strcasecmp(mptr->key,"StartSel")==0 )
+               prs->startsel=pstrdup(mptr->value);
+           else if ( strcasecmp(mptr->key,"StopSel")==0 )
+               prs->stopsel=pstrdup(mptr->value);
+               
+           pfree(mptr->key);
+           pfree(mptr->value);
+
+           mptr++;
+       }
+       pfree(map);
+
+       if ( min_words >= max_words )
+           elog(ERROR,"Must be MinWords < MaxWords");
+       if ( min_words<=0 )
+           elog(ERROR,"Must be MinWords > 0");
+       if ( shortword<0 )
+           elog(ERROR,"Must be ShortWord >= 0");
+   }
+
+   while( hlCover(prs,query,&p,&q) ) {
+       /* find cover len in words */
+       curlen=0;
+       poslen=0;
+       for(i=p;i<=q && curlen < max_words ; i++) {
+           if ( !NONWORDTOKEN(prs->words[i].type) ) 
+               curlen++;
+           if ( prs->words[i].item && !prs->words[i].repeated )
+               poslen++; 
+           pose=i;
+       }
+
+       if ( poslenwords[beste].type) || prs->words[beste].len <= shortword) ) { 
+           /* best already finded, so try one more cover */
+           p++;
+           continue;
+       }
+
+       if ( curlen < max_words ) { /* find good end */
+           for(i=i-1 ;icurwords && curlen
+               if ( i!=q ) {
+                   if ( !NONWORDTOKEN(prs->words[i].type) ) 
+                       curlen++;
+                   if ( prs->words[i].item && !prs->words[i].repeated )
+                       poslen++;
+               }
+               pose=i;
+               if ( NOENDTOKEN(prs->words[i].type) || prs->words[i].len <= shortword ) 
+                   continue;
+               if ( curlen>=min_words )    
+                   break;
+           }
+       } else { /* shorter cover :((( */
+           for(;curlen>min_words;i--) {
+               if ( !NONWORDTOKEN(prs->words[i].type) ) 
+                   curlen--;
+               if ( prs->words[i].item && !prs->words[i].repeated )
+                   poslen--;
+               pose=i;
+               if ( NOENDTOKEN(prs->words[i].type) || prs->words[i].len <= shortword ) 
+                   continue;
+               break;
+           }
+       }
+
+       if ( bestlen <0 || (poslen>bestlen && !(NOENDTOKEN(prs->words[pose].type) || prs->words[pose].len <= shortword)) || 
+               ( bestlen>=0 && !(NOENDTOKEN(prs->words[pose].type)  || prs->words[pose].len <= shortword) && 
+                   (NOENDTOKEN(prs->words[beste].type) || prs->words[beste].len <= shortword) ) ) {
+           bestb=p; beste=pose;
+           bestlen=poslen;
+       } 
+
+       p++;
+   }
+
+   if ( bestlen<0 ) {
+       curlen=0;
+       poslen=0;
+       for(i=0;icurwords && curlen
+           if ( !NONWORDTOKEN(prs->words[i].type) ) 
+               curlen++;
+           pose=i;
+       }
+       bestb=0; beste=pose;
+   }
+
+   for(i=bestb;i<=beste;i++) {
+       if ( prs->words[i].item )
+           prs->words[i].selected=1;
+       if ( prs->words[i].repeated )
+           prs->words[i].skip=1;
+       if ( HLIDIGNORE(prs->words[i].type) )
+           prs->words[i].replace=1;
+
+       prs->words[i].in=1;
+   }
+
+   if (!prs->startsel)
+       prs->startsel=pstrdup("");

+   if (!prs->stopsel)
+       prs->stopsel=pstrdup("");
+        prs->startsellen=strlen(prs->startsel);
+   prs->stopsellen=strlen(prs->stopsel);
+
+   PG_RETURN_POINTER(prs);
+}
+




This is the main PostgreSQL git repository.
RSS
Atom}}}
+>well₁♦_{12
+>over₁♦_{12
+>100₂₂♦_{12
+>feet₁.₁₂
+
+Each word has been assigned type 1;
+each space (represented here by a diamond) and the period, type 12;
+and the number one hundred, type 22.
+We can retrieve the alias for each type
+through the token_type function:
+
+
+=# select * from token_type('default')

+     where tokid = 1 or tokid = 12 or tokid = 22
+ tokid | alias |      descr       
+-------+-------+------------------
+     1 | lword | Latin word
+    12 | blank | Space symbols
+    22 | uint  | Unsigned integer
+(3 rows)
+
+
+
+
+Next, the tokens are assigned to dictionaries
+by looking up their type aliases in pg_ts_cfgmap
+to determine which dictionary should process each token.
+Since we are using the 'default' configuration:
+
+
+=# select * from pg_ts_cfgmap where ts_name = 'default' and

+      (tok_alias = 'lword' or tok_alias = 'blank' or tok_alias = 'uint')
+ ts_name | tok_alias | dict_name 
+---------+-----------+-----------
+ default | lword     | {en_stem}
+ default | uint      | {simple}
+(2 rows)
+
+
+Since this map provides no dictionary for blank tokens,
+the spaces and period are simply discarded,
+leaving nine tokens,
+which are then numbered by their position:
+
+The¹
+walls²
+extend³
+upward⁴
+for⁵
+well⁶
+over⁷
+100⁸
+feet⁹
+
+
+Finally, the words are reduced to lexemes by their respective dictionaries.
+The 100 is submitted to the simple dictionary,
+which returns tokens unaltered except for making them lowercase:
+
+
+=# select lexize('simple', '100')
+ lexize 
+--------
+ {100}
+(1 row)
+
+
+The other words are submitted to en_stem
+which reduces each English word to a linguistic stem,
+and then discards stems which belong to its list of stop words;
+you can see the list of stop words
+in the file whose path is in the dict_initoption field
+of the pg_ts_dict table entry for en_stem.
+The first three words of our text illustrate respectively
+an en_stem stop word,
+a word which en_stem alters by stemming,
+and a word which en_stem leaves alone:
+
+
+=# select lexize('en_stem', 'The')
+ lexize 
+--------
+ {}
+(1 row)
+=# select lexize('en_stem', 'walls')
+ lexize 
+--------
+ {wall}
+(1 row)
+=# select lexize('en_stem', 'extend')
+  lexize  
+----------
+ {extend}
+(1 row)
+
+
+Once en_stem is done discarding stop words and stemming the rest,
+we are left with:
+
+wall²
+extend³
+upward⁴
+well⁶
+100⁸
+feet⁹
+
+Which is precisely the result of the example that began this section.
+
+Query words are stemmed by the to_tsquery() function
+using the same scheme to determine the dictionary for each token,
+with the difference that the query parser recognizes as special
+the boolean operators that separate query words.
+
+
+
+
+}

diff --git a/contrib/tsearch2/docs/tsearch2-ref.html b/contrib/tsearch2/docs/tsearch2-ref.html

new file mode 100644 (file)

index 0000000..df0faa4


--- /dev/null
+++ b/contrib/tsearch2/docs/tsearch2-ref.html
@@ -0,0 +1,448 @@
+
+
+
+
+tsearch2 reference
+
+
+The tsearch2 Reference
+
+
+Brandon Craig Rhodes
30 June 2003
+
+This Reference documents the user types and functions
+of the tsearch2 module for PostgreSQL.
+An introduction to the module is provided
+by the tsearch2 Guide,
+a companion document to this one.
+You can retrieve a beta copy of the tsearch2 module from the
+GiST for PostgreSQL
+page — look under the section entitled Development History
+for the current version.
+
+Vectors and Queries
+
+Vectors and queries both store lexemes,
+but for different purposes.
+A tsvector stores the lexemes
+of the words that are parsed out of a document,
+and can also remember the position of each word.
+A tsquery specifies a boolean condition among lexemes.
+
+Any of the following functions with a configuration argument
+can use either an integer id or textual ts_name
+to select a configuration;
+if the option is omitted, then the current configuration is used.
+For more information on the current configuration,
+read the next section on Configurations.
+
+Vector Operations
+
+
+
+ to_tsvector( [configuration,]

+ document TEXT) RETURNS tsvector
+
+ Parses a document into tokens,
+ reduces the tokens to lexemes,
+ and returns a tsvector which lists the lexemes
+ together with their positions in the document.
+ For the best description of this process,
+ see the section on Parsing and Stemming
+ in the accompanying tsearch2 Guide.
+
+ strip(vector tsvector) RETURNS tsvector
+
+ Return a vector which lists the same lexemes
+ as the given vector,
+ but which lacks any information
+ about where in the document each lexeme appeared.
+ While the returned vector is thus useless for relevance ranking,
+ it will usually be much smaller.
+
+ setweight(vector tsvector, letter) RETURNS tsvector
+
+ This function returns a copy of the input vector
+ in which every location has been labelled
+ with either the letter
+ 'A', 'B', or 'C',
+ or the default label 'D'
+ (which is the default with which new vectors are created,
+ and as such is usually not displayed).
+ These labels are retained when vectors are concatenated,
+ allowing words from different parts of a document
+ to be weighted differently by ranking functions.
+
+ vector1 || vector2
+
+ concat(vector1 tsvector, vector2 tsvector)

+ RETURNS tsvector
+
+ Returns a vector which combines the lexemes and position information
+ in the two vectors given as arguments.
+ Position weight labels (described in the previous paragraph)
+ are retained intact during the concatenation.
+ This has at least two uses.
+ First,
+ if some sections of your document
+ need be parsed with different configurations than others,
+ you can parse them separately
+ and concatenate the resulting vectors into one.
+ Second,
+ you can weight words from some sections of you document
+ more heavily than those from others by:
+ parsing the sections into separate vectors;
+ assigning the vectors different position labels
+ with the setweight() function;
+ concatenating them into a single vector;
+ and then providing a weights argument
+ to the rank() function
+ that assigns different weights to positions with different labels.
+
+ tsvector_size(vector tsvector) RETURNS INT4
+
+ Returns the number of lexemes stored in the vector.
+
+ text::tsvector RETURNS tsvector
+
+ Directly casting text to a tsvector
+ allows you to directly inject lexemes into a vector,
+ with whatever positions and position weights you choose to specify.
+ The text should be formatted
+ like the vector would be printed by the output of a SELECT.
+ See the Casting
+ section in the Guide for details.
+
+
+Query Operations
+
+
+
+ to_tsquery( [configuration,]

+ querytext text) RETURNS tsvector
+
+ Parses a query,
+ which should be single words separated by the boolean operators
+ “&” and,
+ “|” or,
+ and “!” not,
+ which can be grouped using parenthesis.
+ Each word is reduced to a lexeme using the current
+ or specified configuration.
+
+
+ querytree(query tsquery) RETURNS text
+
+ This might return a textual representation of the given query.
+
+ text::tsquery RETURNS tsquery
+
+ Directly casting text to a tsquery
+ allows you to directly inject lexemes into a query,
+ with whatever positions and position weight flags you choose to specify.
+ The text should be formatted
+ like the query would be printed by the output of a SELECT.
+ See the Casting
+ section in the Guide for details.
+
+
+Configurations
+
+A configuration specifies all of the equipment necessary
+to transform a document into a tsvector:
+the parser that breaks its text into tokens,
+and the dictionaries which then transform each token into a lexeme.
+Every call to to_tsvector() (described above)
+uses a configuration to perform its processing.
+Three configurations come with tsearch2:
+
+
+default — Indexes words and numbers,
+ using the en_stem English Snowball stemmer for Latin-alphabet words
+ and the simple dictionary for all others.
+default_russian — Indexes words and numbers,
+ using the en_stem English Snowball stemmer for Latin-alphabet words
+ and the ru_stem Russian Snowball dictionary for all others.
+simple — Processes both words and numbers
+ with the simple dictionary,
+ which neither discards any stop words nor alters them.
+
+
+The tsearch2 modules initially chooses your current configuration
+by looking for your current locale in the locale field
+of the pg_ts_cfg table described below.
+You can manipulate the current configuration yourself with these functions:
+
+
+
+ set_curcfg( id INT | ts_name TEXT

+  ) RETURNS VOID
+
+ Set the current configuration used by to_tsvector
+ and to_tsquery.
+
+ show_curcfg() RETURNS INT4
+
+ Returns the integer id of the current configuration.
+
+
+
+Each configuration is defined by a record in the pg_ts_cfg table:
+
+create table pg_ts_cfg (
+   id      int not  null primary key,
+   ts_name     text not null,
+   prs_name    text not null,
+   locale      text
+);
+
+The id and ts_name are unique values
+which identify the configuration;
+the prs_name specifies which parser the configuration uses.
+Once this parser has split document text into tokens,
+the type of each resulting token —
+or, more specifically, the type's lex_alias
+as specified in the parser's lexem_type() table —
+is searched for together with the configuration's ts_name
+in the pg_ts_cfgmap table:
+
+create table pg_ts_cfgmap (
+   ts_name     text not null,
+   lex_alias   text not null,
+   dict_name   text[],
+   primary key (ts_name,lex_alias)
+);
+
+Those tokens whose types are not listed are discarded.
+The remaining tokens are assigned integer positions,
+starting with 1 for the first token in the document,
+and turned into lexemes with the help of the dictionaries
+whose names are given in the dict_name array for their type.
+These dictionaries are tried in order,
+stopping either with the first one to return a lexeme for the token,
+or discarding the token if no dictionary returns a lexeme for it.
+
+Parsers
+
+Each parser is defined by a record in the pg_ts_parser table:
+
+create table pg_ts_parser (
+   prs_id      int not null primary key,
+   prs_name    text not null,
+   prs_start   oid not null,
+   prs_getlexem    oid not null,
+   prs_end     oid not null,
+   prs_headline    oid not null,
+   prs_lextype oid not null,
+   prs_comment text
+);
+
+The prs_id and prs_name uniquely identify the parser,
+while prs_comment usually describes its name and version
+for the reference of users.
+The other items identify the low-level functions
+which make the parser operate,
+and are only of interest to someone writing a parser of their own.
+
+The tsearch2 module comes with one parser named default
+which is suitable for parsing most plain text and HTML documents.
+
+Each parser argument below
+must designate a parser with either an integer prs_id
+or a textual prs_name;
+the current parser is used when this argument is omitted.
+
+
+
+ CREATE FUNCTION set_curprs(parser) RETURNS VOID
+
+ Selects a current parser
+ which will be used when any of the following functions
+ are called without a parser as an argument.
+
+ CREATE FUNCTION lexem_type(

+  [ parser ]
+  ) RETURNS SETOF lexemtype
+
+ Returns a table which defines and describes
+ each kind of token the parser may produce as output.
+ For each token type the table gives the lexid
+ which the parser will label each token of that type,
+ the alias which names the token type,
+ and a short description descr for the user to read.
+
+ CREATE FUNCTION parse(

+  [ parser, ] document TEXT
+  ) RETURNS SETOF lexemtype
+
+ Parses the given document and returns a series of records,
+ one for each token produced by parsing.
+ Each token includes a lexid giving its type
+ and a lexem which gives its content.
+
+
+Dictionaries
+
+Dictionaries take textual tokens as input,
+usually those produced by a parser,
+and return lexemes which are usually some reduced form of the token.
+Among the dictionaries which come installed with tsearch2 are:
+
+
+simple simply folds uppercase letters to lowercase
+ before returning the word.
+en_stem runs an English Snowball stemmer on each word
+ that attempts to reduce the various forms of a verb or noun
+ to a single recognizable form.
+ru_stem runs a Russian Snowball stemmer on each word.
+
+
+Each dictionary is defined by an entry in the pg_ts_dict table:
+
+CREATE TABLE pg_ts_dict (
+   dict_id     int not null primary key,
+   dict_name   text not null,
+   dict_init   oid,
+   dict_initoption text,
+   dict_lemmatize  oid not null,
+   dict_comment    text
+);
+
+The dict_id and dict_name
+serve as unique identifiers for the dictionary.
+The meaning of the dict_initoption varies among dictionaries,
+but for the built-in Snowball dictionaries
+it specifies a file from which stop words should be read.
+The dict_comment is a human-readable description of the dictionary.
+The other fields are internal function identifiers
+useful only to developers trying to implement their own dictionaries.
+
+The argument named dictionary
+in each of the following functions
+should be either an integer dict_id or a textual dict_name
+identifying which dictionary should be used for the operation;
+if omitted then the current dictionary is used.
+
+
+
+ CREATE FUNCTION set_curdict(dictionary) RETURNS VOID
+
+ Selects a current dictionary for use by functions
+ that do not select a dictionary explicitly.
+
+ CREATE FUNCTION lexize(

+ [ dictionary, ] word text)
+ RETURNS TEXT[]
+
+ Reduces a single word to a lexeme.
+ Note that lexemes are arrays of zero or more strings,
+ since in some languages there might be several base words
+ from which an inflected form could arise.
+
+
+Ranking
+
+Ranking attempts to measure how relevant documents are to particular queries
+by inspecting the number of times each search word appears in the document,
+and whether different search terms occur near each other.
+Note that this information is only available in unstripped vectors —
+ranking functions will only return a useful result
+for a tsvector which still has position information!
+
+Both of these ranking functions
+take an integer normalization option
+that specifies whether a document's length should impact its rank.
+This is often desirable,
+since a hundred-word document with five instances of a search word
+is probably more relevant than a thousand-word document with five instances.
+The option can have the values:
+
+
+0 (the default) ignores document length.
+1 divides the rank by the logarithm of the length.
+2 divides the rank by the length itself.
+
+
+The two ranking functions currently available are:
+
+
+
+ CREATE FUNCTION rank(

+  [ weights float4[], ]
+  vector tsvector, query tsquery,
+  [ normalization int4 ]

+  ) RETURNS float4
+
+ This is the ranking function from the old version of OpenFTS,
+ and offers the ability to weight word instances more heavily
+ depending on how you have classified them.
+ The weights specify how heavily to weight each category of word:
+ 
+>{D-weight, A-weight, B-weight, C-weight}
+ If no weights are provided, then these defaults are used:
+ {0.1, 0.2, 0.4, 1.0}
+ Often weights are used to mark words from special areas of the document,
+ like the title or an initial abstract,
+ and make them more or less important than words in the document body.
+
+ CREATE FUNCTION rank_cd(

+  [ K int4, ]
+  vector tsvector, query tsquery,
+  [ normalization int4 ]

+  ) RETURNS float4
+
+ This function computes the cover density ranking
+ for the given document vector and query,
+ as described in Clarke, Cormack, and Tudhope's
+ “
+>Relevance Ranking for One to Three Term Queries”
+ in the 1999 Information Processing and Management.
+ The value K is one of the values from their formula,
+ and defaults to K=4.
+ The examples in their paper K=16;
+ we can roughly describe the term
+ as stating how far apart two search terms can fall
+ before the formula begins penalizing them for lack of proximity.
+
+
+Headlines
+
+
+
+ CREATE FUNCTION headline(

+  [ id int4, | ts_name text, ]
+  document text, query tsquery,
+  [ options text ]

+  ) RETURNS text
+
+ Every form of the the headline() function
+ accepts a document along with a query,
+ and returns one or more ellipse-separated excerpts from the document
+ in which terms from the query are highlighted.
+ The configuration with which to parse the document
+ can be specified by either its id or ts_name;
+ if none is specified that the current configuration is used instead.
+ 
+ An options string if provided should be a comma-separated list
+ of one or more ‘option=value’ pairs.
+ The available options are:
+ 
+  StartSel, StopSel —
+   the strings with which query words appearing in the document
+   should be delimited to distinguish them from other excerpted words.
+  MaxWords, MinWords —
+   limits on the shortest and longest headlines you will accept.
+  ShortWord —
+   this prevents your headline from beginning or ending
+   with a word which has this many characters or less.
+   The default value of 3 should eliminate most English
+   conjunctions and articles.
+ 
+ Any unspecified options receive these defaults:
+ 
+StartSel=<b>, StopSel=</b>, MaxWords=35, MinWords=15, ShortWord=3
+ 
+
+
+
+


diff --git a/contrib/tsearch2/expected/tsearch2.out b/contrib/tsearch2/expected/tsearch2.out

new file mode 100644 (file)

index 0000000..a842c5b


--- /dev/null
+++ b/contrib/tsearch2/expected/tsearch2.out
@@ -0,0 +1,2055 @@
+--
+-- first, define the datatype.  Turn off echoing so that expected file
+-- does not depend on contents of seg.sql.
+--
+\set ECHO none
+psql:tsearch2.sql:13: NOTICE:  CREATE TABLE / PRIMARY KEY will create implicit index 'pg_ts_dict_pkey' for table 'pg_ts_dict'
+psql:tsearch2.sql:145: NOTICE:  CREATE TABLE / PRIMARY KEY will create implicit index 'pg_ts_parser_pkey' for table 'pg_ts_parser'
+psql:tsearch2.sql:244: NOTICE:  CREATE TABLE / PRIMARY KEY will create implicit index 'pg_ts_cfg_pkey' for table 'pg_ts_cfg'
+psql:tsearch2.sql:251: NOTICE:  CREATE TABLE / PRIMARY KEY will create implicit index 'pg_ts_cfgmap_pkey' for table 'pg_ts_cfgmap'
+psql:tsearch2.sql:339: NOTICE:  ProcedureCreate: type tsvector is not yet defined
+psql:tsearch2.sql:344: NOTICE:  Argument type "tsvector" is only a shell
+psql:tsearch2.sql:398: NOTICE:  ProcedureCreate: type tsquery is not yet defined
+psql:tsearch2.sql:403: NOTICE:  Argument type "tsquery" is only a shell
+psql:tsearch2.sql:545: NOTICE:  ProcedureCreate: type gtsvector is not yet defined
+psql:tsearch2.sql:550: NOTICE:  Argument type "gtsvector" is only a shell
+--tsvector
+SELECT '1'::tsvector;
+ tsvector 
+----------
+ '1'
+(1 row)
+
+SELECT '1 '::tsvector;
+ tsvector 
+----------
+ '1'
+(1 row)
+
+SELECT ' 1'::tsvector;
+ tsvector 
+----------
+ '1'
+(1 row)
+
+SELECT ' 1 '::tsvector;
+ tsvector 
+----------
+ '1'
+(1 row)
+
+SELECT '1 2'::tsvector;
+ tsvector 
+----------
+ '1' '2'
+(1 row)
+
+SELECT '\'1 2\''::tsvector;
+ tsvector 
+----------
+ '1 2'
+(1 row)
+
+SELECT '\'1 \\\'2\''::tsvector;
+ tsvector 
+----------
+ '1 \'2'
+(1 row)
+
+SELECT '\'1 \\\'2\'3'::tsvector;
+  tsvector   
+-------------
+ '3' '1 \'2'
+(1 row)
+
+SELECT '\'1 \\\'2\' 3'::tsvector;
+  tsvector   
+-------------
+ '3' '1 \'2'
+(1 row)
+
+SELECT '\'1 \\\'2\' \' 3\' 4 '::tsvector;
+     tsvector     
+------------------
+ '4' ' 3' '1 \'2'
+(1 row)
+
+select '\'w\':4A,3B,2C,1D,5 a:8';
+       ?column?        
+-----------------------
+ 'w':4A,3B,2C,1D,5 a:8
+(1 row)
+
+select 'a:3A b:2a'::tsvector || 'ba:1234 a:1B';
+          ?column?          
+----------------------------
+ 'a':3A,4B 'b':2A 'ba':1237
+(1 row)
+
+select setweight('w:12B w:13* w:12,5,6 a:1,3* a:3 w asd:1dc asd zxc:81,567,222A'::tsvector, 'c');
+                        setweight                         
+----------------------------------------------------------
+ 'a':1C,3C 'w':5C,6C,12C,13C 'asd':1C 'zxc':81C,222C,567C
+(1 row)
+
+select strip('w:12B w:13* w:12,5,6 a:1,3* a:3 w asd:1dc asd'::tsvector);
+     strip     
+---------------
+ 'a' 'w' 'asd'
+(1 row)
+
+--tsquery
+SELECT '1'::tsquery;
+ tsquery 
+---------
+ '1'
+(1 row)
+
+SELECT '1 '::tsquery;
+ tsquery 
+---------
+ '1'
+(1 row)
+
+SELECT ' 1'::tsquery;
+ tsquery 
+---------
+ '1'
+(1 row)
+
+SELECT ' 1 '::tsquery;
+ tsquery 
+---------
+ '1'
+(1 row)
+
+SELECT '\'1 2\''::tsquery;
+ tsquery 
+---------
+ '1 2'
+(1 row)
+
+SELECT '\'1 \\\'2\''::tsquery;
+ tsquery 
+---------
+ '1 \'2'
+(1 row)
+
+SELECT '!1'::tsquery;
+ tsquery 
+---------
+ !'1'
+(1 row)
+
+SELECT '1|2'::tsquery;
+  tsquery  
+-----------
+ '1' | '2'
+(1 row)
+
+SELECT '1|!2'::tsquery;
+  tsquery   
+------------
+ '1' | !'2'
+(1 row)
+
+SELECT '!1|2'::tsquery;
+  tsquery   
+------------
+ !'1' | '2'
+(1 row)
+
+SELECT '!1|!2'::tsquery;
+   tsquery   
+-------------
+ !'1' | !'2'
+(1 row)
+
+SELECT '!(!1|!2)'::tsquery;
+     tsquery      
+------------------
+ !( !'1' | !'2' )
+(1 row)
+
+SELECT '!(!1|2)'::tsquery;
+     tsquery     
+-----------------
+ !( !'1' | '2' )
+(1 row)
+
+SELECT '!(1|!2)'::tsquery;
+     tsquery     
+-----------------
+ !( '1' | !'2' )
+(1 row)
+
+SELECT '!(1|2)'::tsquery;
+    tsquery     
+----------------
+ !( '1' | '2' )
+(1 row)
+
+SELECT '1&2'::tsquery;
+  tsquery  
+-----------
+ '1' & '2'
+(1 row)
+
+SELECT '!1&2'::tsquery;
+  tsquery   
+------------
+ !'1' & '2'
+(1 row)
+
+SELECT '1&!2'::tsquery;
+  tsquery   
+------------
+ '1' & !'2'
+(1 row)
+
+SELECT '!1&!2'::tsquery;
+   tsquery   
+-------------
+ !'1' & !'2'
+(1 row)
+
+SELECT '(1&2)'::tsquery;
+  tsquery  
+-----------
+ '1' & '2'
+(1 row)
+
+SELECT '1&(2)'::tsquery;
+  tsquery  
+-----------
+ '1' & '2'
+(1 row)
+
+SELECT '!(1)&2'::tsquery;
+  tsquery   
+------------
+ !'1' & '2'
+(1 row)
+
+SELECT '!(1&2)'::tsquery;
+    tsquery     
+----------------
+ !( '1' & '2' )
+(1 row)
+
+SELECT '1|2&3'::tsquery;
+     tsquery     
+-----------------
+ '1' | '2' & '3'
+(1 row)
+
+SELECT '1|(2&3)'::tsquery;
+     tsquery     
+-----------------
+ '1' | '2' & '3'
+(1 row)
+
+SELECT '(1|2)&3'::tsquery;
+       tsquery       
+---------------------
+ ( '1' | '2' ) & '3'
+(1 row)
+
+SELECT '1|2&!3'::tsquery;
+     tsquery      
+------------------
+ '1' | '2' & !'3'
+(1 row)
+
+SELECT '1|!2&3'::tsquery;
+     tsquery      
+------------------
+ '1' | !'2' & '3'
+(1 row)
+
+SELECT '!1|2&3'::tsquery;
+     tsquery      
+------------------
+ !'1' | '2' & '3'
+(1 row)
+
+SELECT '!1|(2&3)'::tsquery;
+     tsquery      
+------------------
+ !'1' | '2' & '3'
+(1 row)
+
+SELECT '!(1|2)&3'::tsquery;
+       tsquery        
+----------------------
+ !( '1' | '2' ) & '3'
+(1 row)
+
+SELECT '(!1|2)&3'::tsquery;
+       tsquery        
+----------------------
+ ( !'1' | '2' ) & '3'
+(1 row)
+
+SELECT '1|(2|(4|(5|6)))'::tsquery;
+                 tsquery                 
+-----------------------------------------
+ '1' | ( '2' | ( '4' | ( '5' | '6' ) ) )
+(1 row)
+
+SELECT '1|2|4|5|6'::tsquery;
+                 tsquery                 
+-----------------------------------------
+ ( ( ( '1' | '2' ) | '4' ) | '5' ) | '6'
+(1 row)
+
+SELECT '1&(2&(4&(5&6)))'::tsquery;
+           tsquery           
+-----------------------------
+ '1' & '2' & '4' & '5' & '6'
+(1 row)
+
+SELECT '1&2&4&5&6'::tsquery;
+           tsquery           
+-----------------------------
+ '1' & '2' & '4' & '5' & '6'
+(1 row)
+
+SELECT '1&(2&(4&(5|6)))'::tsquery;
+             tsquery             
+---------------------------------
+ '1' & '2' & '4' & ( '5' | '6' )
+(1 row)
+
+SELECT '1&(2&(4&(5|!6)))'::tsquery;
+             tsquery              
+----------------------------------
+ '1' & '2' & '4' & ( '5' | !'6' )
+(1 row)
+
+SELECT '1&(\'2\'&(\' 4\'&(\\|5 | \'6 \\\' !|&\')))'::tsquery;
+                 tsquery                  
+------------------------------------------
+ '1' & '2' & ' 4' & ( '|5' | '6 \' !|&' )
+(1 row)
+
+SELECT '\'the wether\':dc & \' sKies \':BC & a:d b:a';
+                 ?column?                 
+------------------------------------------
+ 'the wether':dc & ' sKies ':BC & a:d b:a
+(1 row)
+
+select lexize('simple', 'ASD56 hsdkf');
+     lexize      
+-----------------
+ {"asd56 hsdkf"}
+(1 row)
+
+select lexize('en_stem', 'SKIES Problems identity');
+          lexize          
+--------------------------
+ {"skies problems ident"}
+(1 row)
+
+select * from token_type('default');
+ tokid |    alias     |               descr               
+-------+--------------+-----------------------------------
+     1 | lword        | Latin word
+     2 | nlword       | Non-latin word
+     3 | word         | Word
+     4 | email        | Email
+     5 | url          | URL
+     6 | host         | Host
+     7 | sfloat       | Scientific notation
+     8 | version      | VERSION
+     9 | part_hword   | Part of hyphenated word
+    10 | nlpart_hword | Non-latin part of hyphenated word
+    11 | lpart_hword  | Latin part of hyphenated word
+    12 | blank        | Space symbols
+    13 | tag          | HTML Tag
+    14 | http         | HTTP head
+    15 | hword        | Hyphenated word
+    16 | lhword       | Latin hyphenated word
+    17 | nlhword      | Non-latin hyphenated word
+    18 | uri          | URI
+    19 | file         | File or path name
+    20 | float        | Decimal notation
+    21 | int          | Signed integer
+    22 | uint         | Unsigned integer
+    23 | entity       | HTML Entity
+(23 rows)
+
+select * from parse('default', '345 [email protected] \' http://www.com/ http://aew.werc.ewr/?ad=qwe&dw 1aew.werc.ewr/?ad=qwe&dw 2aew.werc.ewr http://3aew.werc.ewr/?ad=qwe&dw http://4aew.werc.ewr http://5aew.werc.ewr:8100/?  ad=qwe&dw 6aew.werc.ewr:8100/?ad=qwe&dw 7aew.werc.ewr:8100/?ad=qwe&dw=%20%32 +4.0e-10 qwe qwe qwqwe 234.435 455 5.005 [email protected] qwe-wer asdf qwer jf sdjk ewr1> ewri2 ">
+/usr/local/fff /awdf/dwqe/4325 rewt/ewr wefjn /wqe-324/ewr gist.h gist.h.c gist.c. readline 4.2 4.2. 4.2, readline-4.2 readline-4.2. 234 
+ wow  < jqw <> qwerty');
+ tokid |                token                 
+-------+--------------------------------------
+    22 | 345
+    12 |  
+     4 | [email protected]
+    12 |  
+    12 | '
+    12 |  
+    14 | http://
+     6 | www.com
+    12 | /
+    12 |  
+    14 | http://
+     5 | aew.werc.ewr/?ad=qwe&dw
+     6 | aew.werc.ewr
+    18 | /?ad=qwe&dw
+    12 |  
+     5 | 1aew.werc.ewr/?ad=qwe&dw
+     6 | 1aew.werc.ewr
+    18 | /?ad=qwe&dw
+    12 |  
+     6 | 2aew.werc.ewr
+    12 |  
+    14 | http://
+     5 | 3aew.werc.ewr/?ad=qwe&dw
+     6 | 3aew.werc.ewr
+    18 | /?ad=qwe&dw
+    12 |  
+    14 | http://
+     6 | 4aew.werc.ewr
+    12 |  
+    14 | http://
+     5 | 5aew.werc.ewr:8100/?
+     6 | 5aew.werc.ewr
+    18 | :8100/?
+    12 |   
+     1 | ad
+    12 | =
+     1 | qwe
+    12 | &
+     1 | dw
+    12 |  
+     5 | 6aew.werc.ewr:8100/?ad=qwe&dw
+     6 | 6aew.werc.ewr
+    18 | :8100/?ad=qwe&dw
+    12 |  
+     5 | 7aew.werc.ewr:8100/?ad=qwe&dw=%20%32
+     6 | 7aew.werc.ewr
+    18 | :8100/?ad=qwe&dw=%20%32
+    12 |  
+     7 | +4.0e-10
+    12 |  
+     1 | qwe
+    12 |  
+     1 | qwe
+    12 |  
+     1 | qwqwe
+    12 |  
+    20 | 234.435
+    12 |  
+    22 | 455
+    12 |  
+    20 | 5.005
+    12 |  
+     4 | [email protected]
+    12 |  
+    16 | qwe-wer
+    11 | qwe
+    12 | -
+    11 | wer
+    12 |  
+     1 | asdf
+    12 |  
+    13 |  
+     1 | qwer
+    12 |  
+     1 | jf
+    12 |  
+     1 | sdjk
+    13 |  
+    12 |  
+     3 | ewr1
+    12 | >
+    12 |  
+     3 | ewri2
+    12 |  
+    13 |  
+    12 | 
+
+    19 | /usr/local/fff
+    12 |  
+    19 | /awdf/dwqe/4325
+    12 |  
+    19 | rewt/ewr
+    12 |  
+     1 | wefjn
+    12 |  
+    19 | /wqe-324/ewr
+    12 |  
+     6 | gist.h
+    12 |  
+     6 | gist.h.c
+    12 |  
+     6 | gist.c
+    12 | .
+    12 |  
+     1 | readline
+    12 |  
+    20 | 4.2
+    12 |  
+    20 | 4.2
+    12 | .
+    12 |  
+    20 | 4.2
+    12 | ,
+    12 |  
+    15 | readline-4
+    11 | readline
+    12 | -
+    20 | 4.2
+    12 |  
+    15 | readline-4
+    11 | readline
+    12 | -
+    20 | 4.2
+    12 | .
+    12 |  
+    22 | 234
+    12 |  
+
+    13 |  
+    12 |  
+     1 | wow
+    12 |   
+    12 | <
+    12 |  
+     1 | jqw
+    12 |  
+    12 | <
+    12 | >
+    12 |  
+     1 | qwerty
+(138 rows)
+
+SELECT to_tsvector('default', '345 [email protected] \' http://www.com/ http://aew.werc.ewr/?ad=qwe&dw 1aew.werc.ewr/?ad=qwe&dw 2aew.werc.ewr http://3aew.werc.ewr/?ad=qwe&dw http://4aew.werc.ewr http://5aew.werc.ewr:8100/?  ad=qwe&dw 6aew.werc.ewr:8100/?ad=qwe&dw 7aew.werc.ewr:8100/?ad=qwe&dw=%20%32 +4.0e-10 qwe qwe qwqwe 234.435 455 5.005 [email protected] qwe-wer asdf qwer jf sdjk ewr1> ewri2 ">
+/usr/local/fff /awdf/dwqe/4325 rewt/ewr wefjn /wqe-324/ewr gist.h gist.h.c gist.c. readline 4.2 4.2. 4.2, readline-4.2 readline-4.2. 234 
+ wow  < jqw <> qwerty');
+                                                                                                                                                                                                                                                                                                                                                                                                                                               to_tsvector                                                                                                                                                                                                                                                                                                                                                                                                                                                
+----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
+ 'ad':18 'dw':20 'jf':40 '234':62 '345':1 '4.2':53,54,55,58,61 '455':32 'jqw':64 'qwe':19,28,29,36 'wer':37 'wow':63 'asdf':38 'ewr1':42 'qwer':39 'sdjk':41 '5.005':33 'ewri2':43 'qwqwe':30 'wefjn':47 'gist.c':51 'gist.h':49 'qwerti':65 '234.435':31 ':8100/?':17 'qwe-wer':35 'readlin':52,57,60 'www.com':3 '+4.0e-10':27 'gist.h.c':50 'rewt/ewr':46 '[email protected]':2 'readline-4':56,59 '/?ad=qwe&dw':6,9,13 '/wqe-324/ewr':48 'aew.werc.ewr':5 '1aew.werc.ewr':8 '2aew.werc.ewr':10 '3aew.werc.ewr':12 '4aew.werc.ewr':14 '5aew.werc.ewr':16 '6aew.werc.ewr':22 '7aew.werc.ewr':25 '/usr/local/fff':44 '/awdf/dwqe/4325':45 ':8100/?ad=qwe&dw':23 '[email protected]':34 '5aew.werc.ewr:8100/?':15 ':8100/?ad=qwe&dw=%20%32':26 'aew.werc.ewr/?ad=qwe&dw':4 '1aew.werc.ewr/?ad=qwe&dw':7 '3aew.werc.ewr/?ad=qwe&dw':11 '6aew.werc.ewr:8100/?ad=qwe&dw':21 '7aew.werc.ewr:8100/?ad=qwe&dw=%20%32':24
+(1 row)
+
+SELECT length(to_tsvector('default', '345 qw'));
+ length 
+--------
+      2
+(1 row)
+
+SELECT length(to_tsvector('default', '345 [email protected] \' http://www.com/ http://aew.werc.ewr/?ad=qwe&dw 1aew.werc.ewr/?ad=qwe&dw 2aew.werc.ewr http://3aew.werc.ewr/?ad=qwe&dw http://4aew.werc.ewr http://5aew.werc.ewr:8100/?  ad=qwe&dw 6aew.werc.ewr:8100/?ad=qwe&dw 7aew.werc.ewr:8100/?ad=qwe&dw=%20%32 +4.0e-10 qwe qwe qwqwe 234.435 455 5.005 [email protected] qwe-wer asdf qwer jf sdjk ewr1> ewri2 ">
+/usr/local/fff /awdf/dwqe/4325 rewt/ewr wefjn /wqe-324/ewr gist.h gist.h.c gist.c. readline 4.2 4.2. 4.2, readline-4.2 readline-4.2. 234 
+ wow  < jqw <> qwerty'));
+ length 
+--------
+     53
+(1 row)
+
+select to_tsquery('default', 'qwe & sKies '); 
+  to_tsquery   
+---------------
+ 'qwe' & 'sky'
+(1 row)
+
+select to_tsquery('simple', 'qwe & sKies '); 
+   to_tsquery    
+-----------------
+ 'qwe' & 'skies'
+(1 row)
+
+select to_tsquery('default', '\'the wether\':dc & \'           sKies \':BC ');
+       to_tsquery       
+------------------------
+ 'wether':CD & 'sky':BC
+(1 row)
+
+select 'a b:89  ca:23A,64b d:34c'::tsvector @@ 'd:AC & ca';
+ ?column? 
+----------
+ t
+(1 row)
+
+select 'a b:89  ca:23A,64b d:34c'::tsvector @@ 'd:AC & ca:B';
+ ?column? 
+----------
+ t
+(1 row)
+
+select 'a b:89  ca:23A,64b d:34c'::tsvector @@ 'd:AC & ca:A';
+ ?column? 
+----------
+ t
+(1 row)
+
+select 'a b:89  ca:23A,64b d:34c'::tsvector @@ 'd:AC & ca:C';
+ ?column? 
+----------
+ f
+(1 row)
+
+select 'a b:89  ca:23A,64b d:34c'::tsvector @@ 'd:AC & ca:CB';
+ ?column? 
+----------
+ t
+(1 row)
+
+CREATE TABLE test_tsvector( t text, a tsvector );
+\copy test_tsvector from 'data/test_tsearch.data'
+SELECT count(*) FROM test_tsvector WHERE a @@ 'wr|qh';
+ count 
+-------
+   158
+(1 row)
+
+SELECT count(*) FROM test_tsvector WHERE a @@ 'wr&qh';
+ count 
+-------
+    17
+(1 row)
+
+SELECT count(*) FROM test_tsvector WHERE a @@ 'eq&yt';
+ count 
+-------
+     6
+(1 row)
+
+SELECT count(*) FROM test_tsvector WHERE a @@ 'eq|yt';
+ count 
+-------
+    98
+(1 row)
+
+SELECT count(*) FROM test_tsvector WHERE a @@ '(eq&yt)|(wr&qh)';
+ count 
+-------
+    23
+(1 row)
+
+SELECT count(*) FROM test_tsvector WHERE a @@ '(eq|yt)&(wr|qh)';
+ count 
+-------
+    39
+(1 row)
+
+create index wowidx on test_tsvector using gist (a);
+set enable_seqscan=off;
+SELECT count(*) FROM test_tsvector WHERE a @@ 'wr|qh';
+ count 
+-------
+   158
+(1 row)
+
+SELECT count(*) FROM test_tsvector WHERE a @@ 'wr&qh';
+ count 
+-------
+    17
+(1 row)
+
+SELECT count(*) FROM test_tsvector WHERE a @@ 'eq&yt';
+ count 
+-------
+     6
+(1 row)
+
+SELECT count(*) FROM test_tsvector WHERE a @@ 'eq|yt';
+ count 
+-------
+    98
+(1 row)
+
+SELECT count(*) FROM test_tsvector WHERE a @@ '(eq&yt)|(wr&qh)';
+ count 
+-------
+    23
+(1 row)
+
+SELECT count(*) FROM test_tsvector WHERE a @@ '(eq|yt)&(wr|qh)';
+ count 
+-------
+    39
+(1 row)
+
+select set_curcfg('default');
+ set_curcfg 
+------------
+ 
+(1 row)
+
+CREATE TRIGGER tsvectorupdate
+BEFORE UPDATE OR INSERT ON test_tsvector
+FOR EACH ROW EXECUTE PROCEDURE tsearch2(a, t);
+SELECT count(*) FROM test_tsvector WHERE a @@ to_tsquery('345&qwerty');
+ count 
+-------
+     0
+(1 row)
+
+INSERT INTO test_tsvector (t) VALUES ('345 qwerty');
+SELECT count(*) FROM test_tsvector WHERE a @@ to_tsquery('345&qwerty');
+ count 
+-------
+     1
+(1 row)
+
+UPDATE test_tsvector SET t = null WHERE t = '345 qwerty';
+SELECT count(*) FROM test_tsvector WHERE a @@ to_tsquery('345&qwerty');
+ count 
+-------
+     0
+(1 row)
+
+drop trigger tsvectorupdate on test_tsvector;
+create function wow(text) returns text as 'select $1 || \' copyright\'; ' language sql;
+create trigger tsvectorupdate before update or insert on test_tsvector
+for each row execute procedure tsearch2(a, wow, t);
+insert into test_tsvector (t) values ('345 qwerty');
+select count(*) FROM test_tsvector WHERE a @@ to_tsquery('345&qwerty');
+ count 
+-------
+     1
+(1 row)
+
+select count(*) FROM test_tsvector WHERE a @@ to_tsquery('copyright');
+ count 
+-------
+     1
+(1 row)
+
+select rank(' a:1 s:2C d g'::tsvector, 'a | s');
+ rank 
+------
+ 0.28
+(1 row)
+
+select rank(' a:1 s:2B d g'::tsvector, 'a | s');
+ rank 
+------
+ 0.46
+(1 row)
+
+select rank(' a:1 s:2 d g'::tsvector, 'a | s');
+ rank 
+------
+ 0.19
+(1 row)
+
+select rank(' a:1 s:2C d g'::tsvector, 'a & s');
+   rank   
+----------
+ 0.140153
+(1 row)
+
+select rank(' a:1 s:2B d g'::tsvector, 'a & s');
+   rank   
+----------
+ 0.198206
+(1 row)
+
+select rank(' a:1 s:2 d g'::tsvector, 'a & s');
+   rank    
+-----------
+ 0.0991032
+(1 row)
+
+insert into test_tsvector (t) values ('foo bar foo the over foo qq bar');
+select * from stat('select a from test_tsvector') order by ndoc desc, nentry desc, word;
+   word    | ndoc | nentry 
+-----------+------+--------
+ qq        |  109 |    109
+ qt        |  102 |    102
+ qe        |  100 |    100
+ qh        |   98 |     98
+ qw        |   98 |     98
+ qa        |   97 |     97
+ ql        |   94 |     94
+ qs        |   94 |     94
+ qi        |   92 |     92
+ qr        |   92 |     92
+ qj        |   91 |     91
+ qd        |   87 |     87
+ qz        |   87 |     87
+ qc        |   86 |     86
+ qn        |   86 |     86
+ qv        |   85 |     85
+ qo        |   84 |     84
+ qy        |   84 |     84
+ wp        |   84 |     84
+ qf        |   81 |     81
+ qk        |   80 |     80
+ wt        |   80 |     80
+ qu        |   79 |     79
+ qg        |   78 |     78
+ wb        |   78 |     78
+ qx        |   77 |     77
+ wr        |   77 |     77
+ ws        |   73 |     73
+ wy        |   73 |     73
+ wa        |   72 |     72
+ wf        |   70 |     70
+ wg        |   70 |     70
+ wi        |   70 |     70
+ wu        |   70 |     70
+ wc        |   69 |     69
+ wj        |   69 |     69
+ qp        |   68 |     68
+ wh        |   68 |     68
+ wv        |   68 |     68
+ qb        |   66 |     66
+ eu        |   65 |     65
+ we        |   65 |     65
+ wl        |   65 |     65
+ wq        |   65 |     65
+ wk        |   64 |     64
+ ee        |   63 |     63
+ eo        |   63 |     63
+ qm        |   63 |     63
+ wn        |   63 |     63
+ ef        |   62 |     62
+ eh        |   62 |     62
+ ex        |   62 |     62
+ re        |   62 |     62
+ rl        |   62 |     62
+ rr        |   62 |     62
+ eb        |   61 |     61
+ ek        |   61 |     61
+ ww        |   61 |     61
+ ea        |   60 |     60
+ ei        |   60 |     60
+ em        |   60 |     60
+ eq        |   60 |     60
+ ew        |   60 |     60
+ ro        |   60 |     60
+ rw        |   60 |     60
+ tl        |   60 |     60
+ eg        |   59 |     59
+ en        |   59 |     59
+ ez        |   59 |     59
+ rj        |   59 |     59
+ ry        |   59 |     59
+ tw        |   59 |     59
+ tx        |   59 |     59
+ ej        |   58 |     58
+ es        |   58 |     58
+ ra        |   58 |     58
+ rd        |   58 |     58
+ rg        |   58 |     58
+ rx        |   58 |     58
+ tb        |   58 |     58
+ wd        |   58 |     58
+ ed        |   57 |     57
+ tc        |   57 |     57
+ wx        |   57 |     57
+ er        |   56 |     56
+ wm        |   56 |     56
+ wo        |   56 |     56
+ yw        |   56 |     56
+ ep        |   55 |     55
+ rk        |   55 |     55
+ rp        |   55 |     55
+ rz        |   55 |     55
+ ta        |   55 |     55
+ rq        |   54 |     54
+ yn        |   54 |     54
+ ec        |   53 |     53
+ el        |   53 |     53
+ ru        |   53 |     53
+ rv        |   53 |     53
+ tz        |   53 |     53
+ un        |   53 |     53
+ wz        |   53 |     53
+ ys        |   53 |     53
+ oe        |   52 |     52
+ tn        |   52 |     52
+ tq        |   52 |     52
+ ty        |   52 |     52
+ uq        |   52 |     52
+ yg        |   52 |     52
+ ym        |   52 |     52
+ oi        |   51 |     51
+ to        |   51 |     51
+ yi        |   51 |     51
+ pn        |   50 |     50
+ rb        |   50 |     50
+ ri        |   50 |     50
+ rn        |   50 |     50
+ ti        |   50 |     50
+ tv        |   50 |     50
+ um        |   50 |     50
+ ut        |   50 |     50
+ ya        |   50 |     50
+ et        |   49 |     49
+ ix        |   49 |     49
+ ox        |   49 |     49
+ q3        |   49 |     49
+ yf        |   49 |     49
+ yl        |   49 |     49
+ yo        |   49 |     49
+ yr        |   49 |     49
+ ev        |   48 |     48
+ ey        |   48 |     48
+ ot        |   48 |     48
+ rc        |   48 |     48
+ rm        |   48 |     48
+ th        |   48 |     48
+ uo        |   48 |     48
+ ia        |   47 |     47
+ q1        |   47 |     47
+ rh        |   47 |     47
+ yq        |   47 |     47
+ yz        |   47 |     47
+ av        |   46 |     46
+ im        |   46 |     46
+ os        |   46 |     46
+ tk        |   46 |     46
+ yy        |   46 |     46
+ ir        |   45 |     45
+ iv        |   45 |     45
+ iw        |   45 |     45
+ oj        |   45 |     45
+ pl        |   45 |     45
+ pv        |   45 |     45
+ te        |   45 |     45
+ tu        |   45 |     45
+ uv        |   45 |     45
+ ux        |   45 |     45
+ yd        |   45 |     45
+ yx        |   45 |     45
+ ij        |   44 |     44
+ pa        |   44 |     44
+ se        |   44 |     44
+ tg        |   44 |     44
+ ue        |   44 |     44
+ yb        |   44 |     44
+ yt        |   44 |     44
+ if        |   43 |     43
+ ik        |   43 |     43
+ in        |   43 |     43
+ ph        |   43 |     43
+ pj        |   43 |     43
+ q5        |   43 |     43
+ rt        |   43 |     43
+ ub        |   43 |     43
+ ud        |   43 |     43
+ uh        |   43 |     43
+ uj        |   43 |     43
+ w7        |   43 |     43
+ ye        |   43 |     43
+ yv        |   43 |     43
+ db        |   42 |     42
+ do        |   42 |     42
+ id        |   42 |     42
+ ie        |   42 |     42
+ ii        |   42 |     42
+ of        |   42 |     42
+ pr        |   42 |     42
+ q4        |   42 |     42
+ rf        |   42 |     42
+ td        |   42 |     42
+ uk        |   42 |     42
+ up        |   42 |     42
+ yh        |   42 |     42
+ yk        |   42 |     42
+ io        |   41 |     41
+ it        |   41 |     41
+ pb        |   41 |     41
+ q0        |   41 |     41
+ q7        |   41 |     41
+ rs        |   41 |     41
+ tj        |   41 |     41
+ ur        |   41 |     41
+ ig        |   40 |     40
+ iu        |   40 |     40
+ iy        |   40 |     40
+ od        |   40 |     40
+ q6        |   40 |     40
+ tt        |   40 |     40
+ ug        |   40 |     40
+ ul        |   40 |     40
+ us        |   40 |     40
+ uu        |   40 |     40
+ uz        |   40 |     40
+ ah        |   39 |     39
+ ar        |   39 |     39
+ as        |   39 |     39
+ dl        |   39 |     39
+ dt        |   39 |     39
+ hk        |   39 |     39
+ iq        |   39 |     39
+ is        |   39 |     39
+ oc        |   39 |     39
+ ov        |   39 |     39
+ oy        |   39 |     39
+ uf        |   39 |     39
+ ui        |   39 |     39
+ aa        |   38 |     38
+ ad        |   38 |     38
+ fh        |   38 |     38
+ gm        |   38 |     38
+ ic        |   38 |     38
+ jd        |   38 |     38
+ om        |   38 |     38
+ or        |   38 |     38
+ oz        |   38 |     38
+ pm        |   38 |     38
+ q8        |   38 |     38
+ sf        |   38 |     38
+ sm        |   38 |     38
+ sv        |   38 |     38
+ uc        |   38 |     38
+ ak        |   37 |     37
+ aq        |   37 |     37
+ di        |   37 |     37
+ e4        |   37 |     37
+ fi        |   37 |     37
+ fx        |   37 |     37
+ ha        |   37 |     37
+ hp        |   37 |     37
+ ih        |   37 |     37
+ og        |   37 |     37
+ po        |   37 |     37
+ pw        |   37 |     37
+ sn        |   37 |     37
+ su        |   37 |     37
+ sw        |   37 |     37
+ w6        |   37 |     37
+ yj        |   37 |     37
+ yu        |   37 |     37
+ ag        |   36 |     36
+ am        |   36 |     36
+ at        |   36 |     36
+ e1        |   36 |     36
+ ff        |   36 |     36
+ gx        |   36 |     36
+ he        |   36 |     36
+ hj        |   36 |     36
+ ib        |   36 |     36
+ iz        |   36 |     36
+ lm        |   36 |     36
+ ok        |   36 |     36
+ pk        |   36 |     36
+ pp        |   36 |     36
+ pu        |   36 |     36
+ sp        |   36 |     36
+ tf        |   36 |     36
+ tm        |   36 |     36
+ ay        |   35 |     35
+ dy        |   35 |     35
+ fu        |   35 |     35
+ ku        |   35 |     35
+ lh        |   35 |     35
+ lq        |   35 |     35
+ o6        |   35 |     35
+ ob        |   35 |     35
+ on        |   35 |     35
+ op        |   35 |     35
+ pd        |   35 |     35
+ ps        |   35 |     35
+ si        |   35 |     35
+ sl        |   35 |     35
+ sx        |   35 |     35
+ tp        |   35 |     35
+ tr        |   35 |     35
+ w3        |   35 |     35
+ y1        |   35 |     35
+ al        |   34 |     34
+ ap        |   34 |     34
+ az        |   34 |     34
+ dc        |   34 |     34
+ dd        |   34 |     34
+ dz        |   34 |     34
+ e0        |   34 |     34
+ fj        |   34 |     34
+ fp        |   34 |     34
+ gd        |   34 |     34
+ gg        |   34 |     34
+ gk        |   34 |     34
+ go        |   34 |     34
+ ho        |   34 |     34
+ jc        |   34 |     34
+ oa        |   34 |     34
+ oh        |   34 |     34
+ oo        |   34 |     34
+ pe        |   34 |     34
+ px        |   34 |     34
+ sd        |   34 |     34
+ sq        |   34 |     34
+ sy        |   34 |     34
+ ab        |   33 |     33
+ ae        |   33 |     33
+ af        |   33 |     33
+ aw        |   33 |     33
+ e5        |   33 |     33
+ fk        |   33 |     33
+ gu        |   33 |     33
+ gy        |   33 |     33
+ hb        |   33 |     33
+ hm        |   33 |     33
+ hy        |   33 |     33
+ jl        |   33 |     33
+ jr        |   33 |     33
+ ls        |   33 |     33
+ oq        |   33 |     33
+ pt        |   33 |     33
+ sa        |   33 |     33
+ sh        |   33 |     33
+ sj        |   33 |     33
+ so        |   33 |     33
+ sz        |   33 |     33
+ t7        |   33 |     33
+ uw        |   33 |     33
+ w8        |   33 |     33
+ y0        |   33 |     33
+ yp        |   33 |     33
+ dh        |   32 |     32
+ dp        |   32 |     32
+ dq        |   32 |     32
+ e7        |   32 |     32
+ fn        |   32 |     32
+ fo        |   32 |     32
+ fr        |   32 |     32
+ ga        |   32 |     32
+ gq        |   32 |     32
+ hh        |   32 |     32
+ il        |   32 |     32
+ ip        |   32 |     32
+ jv        |   32 |     32
+ lc        |   32 |     32
+ ol        |   32 |     32
+ pc        |   32 |     32
+ q9        |   32 |     32
+ ds        |   31 |     31
+ e9        |   31 |     31
+ fd        |   31 |     31
+ fe        |   31 |     31
+ ft        |   31 |     31
+ gs        |   31 |     31
+ hl        |   31 |     31
+ hs        |   31 |     31
+ jb        |   31 |     31
+ kc        |   31 |     31
+ kw        |   31 |     31
+ mj        |   31 |     31
+ q2        |   31 |     31
+ r3        |   31 |     31
+ sb        |   31 |     31
+ sk        |   31 |     31
+ ts        |   31 |     31
+ ua        |   31 |     31
+ yc        |   31 |     31
+ zw        |   31 |     31
+ ao        |   30 |     30
+ du        |   30 |     30
+ fw        |   30 |     30
+ gj        |   30 |     30
+ hu        |   30 |     30
+ kh        |   30 |     30
+ kl        |   30 |     30
+ kv        |   30 |     30
+ ld        |   30 |     30
+ lf        |   30 |     30
+ pq        |   30 |     30
+ py        |   30 |     30
+ sc        |   30 |     30
+ sr        |   30 |     30
+ uy        |   30 |     30
+ vg        |   30 |     30
+ w2        |   30 |     30
+ xg        |   30 |     30
+ xo        |   30 |     30
+ au        |   29 |     29
+ cx        |   29 |     29
+ fv        |   29 |     29
+ gh        |   29 |     29
+ gl        |   29 |     29
+ gt        |   29 |     29
+ hw        |   29 |     29
+ ji        |   29 |     29
+ km        |   29 |     29
+ la        |   29 |     29
+ ou        |   29 |     29
+ r0        |   29 |     29
+ w0        |   29 |     29
+ y9        |   29 |     29
+ zm        |   29 |     29
+ zs        |   29 |     29
+ zy        |   29 |     29
+ ax        |   28 |     28
+ cd        |   28 |     28
+ dj        |   28 |     28
+ dn        |   28 |     28
+ dr        |   28 |     28
+ ht        |   28 |     28
+ jf        |   28 |     28
+ lo        |   28 |     28
+ lr        |   28 |     28
+ na        |   28 |     28
+ ng        |   28 |     28
+ r8        |   28 |     28
+ ss        |   28 |     28
+ xt        |   28 |     28
+ y6        |   28 |     28
+ aj        |   27 |     27
+ ca        |   27 |     27
+ cg        |   27 |     27
+ df        |   27 |     27
+ dg        |   27 |     27
+ dv        |   27 |     27
+ gc        |   27 |     27
+ gn        |   27 |     27
+ gr        |   27 |     27
+ hd        |   27 |     27
+ i8        |   27 |     27
+ jn        |   27 |     27
+ jt        |   27 |     27
+ lp        |   27 |     27
+ o9        |   27 |     27
+ ow        |   27 |     27
+ r9        |   27 |     27
+ t8        |   27 |     27
+ u5        |   27 |     27
+ w4        |   27 |     27
+ xm        |   27 |     27
+ zz        |   27 |     27
+ a2        |   26 |     26
+ ac        |   26 |     26
+ ai        |   26 |     26
+ cm        |   26 |     26
+ cu        |   26 |     26
+ cw        |   26 |     26
+ dk        |   26 |     26
+ e2        |   26 |     26
+ fc        |   26 |     26
+ fg        |   26 |     26
+ fl        |   26 |     26
+ fs        |   26 |     26
+ ge        |   26 |     26
+ gv        |   26 |     26
+ hc        |   26 |     26
+ hi        |   26 |     26
+ hx        |   26 |     26
+ jj        |   26 |     26
+ jm        |   26 |     26
+ kg        |   26 |     26
+ kk        |   26 |     26
+ kn        |   26 |     26
+ ko        |   26 |     26
+ kt        |   26 |     26
+ ln        |   26 |     26
+ mx        |   26 |     26
+ pg        |   26 |     26
+ r4        |   26 |     26
+ t6        |   26 |     26
+ u1        |   26 |     26
+ u4        |   26 |     26
+ vi        |   26 |     26
+ vr        |   26 |     26
+ w1        |   26 |     26
+ w9        |   26 |     26
+ xk        |   26 |     26
+ xs        |   26 |     26
+ zf        |   26 |     26
+ bb        |   25 |     25
+ dm        |   25 |     25
+ dw        |   25 |     25
+ e8        |   25 |     25
+ fb        |   25 |     25
+ gw        |   25 |     25
+ h8        |   25 |     25
+ hf        |   25 |     25
+ hg        |   25 |     25
+ hn        |   25 |     25
+ hv        |   25 |     25
+ i0        |   25 |     25
+ i3        |   25 |     25
+ jg        |   25 |     25
+ jo        |   25 |     25
+ jx        |   25 |     25
+ kq        |   25 |     25
+ lw        |   25 |     25
+ lx        |   25 |     25
+ o3        |   25 |     25
+ p7        |   25 |     25
+ pf        |   25 |     25
+ pi        |   25 |     25
+ pz        |   25 |     25
+ r2        |   25 |     25
+ r5        |   25 |     25
+ t9        |   25 |     25
+ u7        |   25 |     25
+ ve        |   25 |     25
+ vu        |   25 |     25
+ y5        |   25 |     25
+ y8        |   25 |     25
+ zt        |   25 |     25
+ an        |   24 |     24
+ bj        |   24 |     24
+ dx        |   24 |     24
+ fm        |   24 |     24
+ fz        |   24 |     24
+ gb        |   24 |     24
+ gi        |   24 |     24
+ gp        |   24 |     24
+ hr        |   24 |     24
+ hz        |   24 |     24
+ i5        |   24 |     24
+ jq        |   24 |     24
+ kb        |   24 |     24
+ ke        |   24 |     24
+ kf        |   24 |     24
+ kp        |   24 |     24
+ lv        |   24 |     24
+ lz        |   24 |     24
+ o8        |   24 |     24
+ r1        |   24 |     24
+ s7        |   24 |     24
+ sg        |   24 |     24
+ u3        |   24 |     24
+ vj        |   24 |     24
+ vt        |   24 |     24
+ w5        |   24 |     24
+ zj        |   24 |     24
+ be        |   23 |     23
+ bi        |   23 |     23
+ bn        |   23 |     23
+ cn        |   23 |     23
+ cy        |   23 |     23
+ da        |   23 |     23
+ e6        |   23 |     23
+ fa        |   23 |     23
+ js        |   23 |     23
+ ki        |   23 |     23
+ kz        |   23 |     23
+ li        |   23 |     23
+ mt        |   23 |     23
+ mz        |   23 |     23
+ nu        |   23 |     23
+ o2        |   23 |     23
+ p5        |   23 |     23
+ p8        |   23 |     23
+ r7        |   23 |     23
+ t0        |   23 |     23
+ t1        |   23 |     23
+ t3        |   23 |     23
+ vm        |   23 |     23
+ xh        |   23 |     23
+ xx        |   23 |     23
+ zp        |   23 |     23
+ zr        |   23 |     23
+ a3        |   22 |     22
+ bg        |   22 |     22
+ de        |   22 |     22
+ e3        |   22 |     22
+ fq        |   22 |     22
+ i2        |   22 |     22
+ i7        |   22 |     22
+ ja        |   22 |     22
+ jk        |   22 |     22
+ jy        |   22 |     22
+ kr        |   22 |     22
+ kx        |   22 |     22
+ ly        |   22 |     22
+ nb        |   22 |     22
+ nh        |   22 |     22
+ ns        |   22 |     22
+ s3        |   22 |     22
+ u2        |   22 |     22
+ vn        |   22 |     22
+ xe        |   22 |     22
+ y4        |   22 |     22
+ zh        |   22 |     22
+ zo        |   22 |     22
+ zq        |   22 |     22
+ a1        |   21 |     21
+ bl        |   21 |     21
+ bo        |   21 |     21
+ cb        |   21 |     21
+ ch        |   21 |     21
+ co        |   21 |     21
+ cq        |   21 |     21
+ cv        |   21 |     21
+ d7        |   21 |     21
+ g8        |   21 |     21
+ je        |   21 |     21
+ jp        |   21 |     21
+ jz        |   21 |     21
+ lg        |   21 |     21
+ me        |   21 |     21
+ nc        |   21 |     21
+ p4        |   21 |     21
+ st        |   21 |     21
+ vb        |   21 |     21
+ vw        |   21 |     21
+ vz        |   21 |     21
+ xj        |   21 |     21
+ xq        |   21 |     21
+ xu        |   21 |     21
+ xy        |   21 |     21
+ zb        |   21 |     21
+ bv        |   20 |     20
+ bz        |   20 |     20
+ cj        |   20 |     20
+ cp        |   20 |     20
+ cs        |   20 |     20
+ d8        |   20 |     20
+ ju        |   20 |     20
+ k0        |   20 |     20
+ ks        |   20 |     20
+ ky        |   20 |     20
+ l1        |   20 |     20
+ lb        |   20 |     20
+ lj        |   20 |     20
+ lu        |   20 |     20
+ nm        |   20 |     20
+ nw        |   20 |     20
+ nz        |   20 |     20
+ o7        |   20 |     20
+ p6        |   20 |     20
+ vh        |   20 |     20
+ vp        |   20 |     20
+ vs        |   20 |     20
+ xb        |   20 |     20
+ xr        |   20 |     20
+ z3        |   20 |     20
+ zv        |   20 |     20
+ bq        |   19 |     19
+ br        |   19 |     19
+ by        |   19 |     19
+ cl        |   19 |     19
+ d2        |   19 |     19
+ f1        |   19 |     19
+ f4        |   19 |     19
+ gf        |   19 |     19
+ hq        |   19 |     19
+ k9        |   19 |     19
+ ka        |   19 |     19
+ kd        |   19 |     19
+ kj        |   19 |     19
+ md        |   19 |     19
+ mi        |   19 |     19
+ ml        |   19 |     19
+ my        |   19 |     19
+ nj        |   19 |     19
+ ny        |   19 |     19
+ o1        |   19 |     19
+ s4        |   19 |     19
+ s8        |   19 |     19
+ t5        |   19 |     19
+ u0        |   19 |     19
+ xl        |   19 |     19
+ zg        |   19 |     19
+ zi        |   19 |     19
+ a5        |   18 |     18
+ b9        |   18 |     18
+ bh        |   18 |     18
+ bx        |   18 |     18
+ d3        |   18 |     18
+ fy        |   18 |     18
+ g2        |   18 |     18
+ i4        |   18 |     18
+ i6        |   18 |     18
+ i9        |   18 |     18
+ jw        |   18 |     18
+ lk        |   18 |     18
+ mb        |   18 |     18
+ mv        |   18 |     18
+ nd        |   18 |     18
+ nr        |   18 |     18
+ nt        |   18 |     18
+ t2        |   18 |     18
+ xf        |   18 |     18
+ xv        |   18 |     18
+ zc        |   18 |     18
+ zd        |   18 |     18
+ a7        |   17 |     17
+ bc        |   17 |     17
+ bd        |   17 |     17
+ ce        |   17 |     17
+ cf        |   17 |     17
+ cr        |   17 |     17
+ g9        |   17 |     17
+ j0        |   17 |     17
+ j5        |   17 |     17
+ mp        |   17 |     17
+ mr        |   17 |     17
+ mw        |   17 |     17
+ nk        |   17 |     17
+ no        |   17 |     17
+ o0        |   17 |     17
+ o4        |   17 |     17
+ s0        |   17 |     17
+ s1        |   17 |     17
+ t4        |   17 |     17
+ u9        |   17 |     17
+ vf        |   17 |     17
+ vx        |   17 |     17
+ x3        |   17 |     17
+ xi        |   17 |     17
+ xn        |   17 |     17
+ xz        |   17 |     17
+ zl        |   17 |     17
+ zn        |   17 |     17
+ a0        |   16 |     16
+ bu        |   16 |     16
+ bw        |   16 |     16
+ ci        |   16 |     16
+ ck        |   16 |     16
+ d0        |   16 |     16
+ d4        |   16 |     16
+ d6        |   16 |     16
+ f5        |   16 |     16
+ g1        |   16 |     16
+ gz        |   16 |     16
+ h4        |   16 |     16
+ jh        |   16 |     16
+ l4        |   16 |     16
+ lt        |   16 |     16
+ mg        |   16 |     16
+ mh        |   16 |     16
+ mo        |   16 |     16
+ ni        |   16 |     16
+ nl        |   16 |     16
+ nq        |   16 |     16
+ p2        |   16 |     16
+ u8        |   16 |     16
+ v9        |   16 |     16
+ vl        |   16 |     16
+ vo        |   16 |     16
+ xp        |   16 |     16
+ y3        |   16 |     16
+ y7        |   16 |     16
+ z7        |   16 |     16
+ za        |   16 |     16
+ zx        |   16 |     16
+ bf        |   15 |     15
+ bp        |   15 |     15
+ cc        |   15 |     15
+ g0        |   15 |     15
+ j2        |   15 |     15
+ j9        |   15 |     15
+ l6        |   15 |     15
+ le        |   15 |     15
+ ll        |   15 |     15
+ m8        |   15 |     15
+ ma        |   15 |     15
+ mu        |   15 |     15
+ nf        |   15 |     15
+ r6        |   15 |     15
+ s5        |   15 |     15
+ vd        |   15 |     15
+ vk        |   15 |     15
+ xa        |   15 |     15
+ xw        |   15 |     15
+ y2        |   15 |     15
+ z8        |   15 |     15
+ ze        |   15 |     15
+ zu        |   15 |     15
+ a6        |   14 |     14
+ bk        |   14 |     14
+ bt        |   14 |     14
+ c0        |   14 |     14
+ f8        |   14 |     14
+ g3        |   14 |     14
+ g4        |   14 |     14
+ g7        |   14 |     14
+ h6        |   14 |     14
+ h7        |   14 |     14
+ h9        |   14 |     14
+ i1        |   14 |     14
+ k1        |   14 |     14
+ k2        |   14 |     14
+ k6        |   14 |     14
+ k7        |   14 |     14
+ mc        |   14 |     14
+ nn        |   14 |     14
+ p9        |   14 |     14
+ u6        |   14 |     14
+ xd        |   14 |     14
+ z6        |   14 |     14
+ zk        |   14 |     14
+ a4        |   13 |     13
+ a9        |   13 |     13
+ bm        |   13 |     13
+ cz        |   13 |     13
+ f2        |   13 |     13
+ f3        |   13 |     13
+ f6        |   13 |     13
+ g6        |   13 |     13
+ h2        |   13 |     13
+ j1        |   13 |     13
+ k5        |   13 |     13
+ m1        |   13 |     13
+ mf        |   13 |     13
+ mq        |   13 |     13
+ np        |   13 |     13
+ nx        |   13 |     13
+ o5        |   13 |     13
+ p0        |   13 |     13
+ p1        |   13 |     13
+ s6        |   13 |     13
+ s9        |   13 |     13
+ v6        |   13 |     13
+ va        |   13 |     13
+ vc        |   13 |     13
+ xc        |   13 |     13
+ z0        |   13 |     13
+ c9        |   12 |     12
+ d1        |   12 |     12
+ h0        |   12 |     12
+ h1        |   12 |     12
+ j8        |   12 |     12
+ k4        |   12 |     12
+ l5        |   12 |     12
+ l9        |   12 |     12
+ m2        |   12 |     12
+ m6        |   12 |     12
+ m9        |   12 |     12
+ n7        |   12 |     12
+ nv        |   12 |     12
+ p3        |   12 |     12
+ vq        |   12 |     12
+ vy        |   12 |     12
+ x1        |   12 |     12
+ x2        |   12 |     12
+ z5        |   12 |     12
+ c1        |   11 |     11
+ c3        |   11 |     11
+ ct        |   11 |     11
+ f9        |   11 |     11
+ g5        |   11 |     11
+ j6        |   11 |     11
+ l8        |   11 |     11
+ n1        |   11 |     11
+ v7        |   11 |     11
+ vv        |   11 |     11
+ x5        |   11 |     11
+ x8        |   11 |     11
+ z2        |   11 |     11
+ b0        |   10 |     10
+ b2        |   10 |     10
+ b8        |   10 |     10
+ c6        |   10 |     10
+ f0        |   10 |     10
+ f7        |   10 |     10
+ h5        |   10 |     10
+ j3        |   10 |     10
+ j4        |   10 |     10
+ j7        |   10 |     10
+ l7        |   10 |     10
+ m0        |   10 |     10
+ m7        |   10 |     10
+ mm        |   10 |     10
+ mn        |   10 |     10
+ n8        |   10 |     10
+ v1        |   10 |     10
+ x0        |   10 |     10
+ x6        |   10 |     10
+ x7        |   10 |     10
+ x9        |   10 |     10
+ a8        |    9 |      9
+ b1        |    9 |      9
+ b4        |    9 |      9
+ b5        |    9 |      9
+ b6        |    9 |      9
+ ba        |    9 |      9
+ bs        |    9 |      9
+ c5        |    9 |      9
+ d5        |    9 |      9
+ k8        |    9 |      9
+ l0        |    9 |      9
+ m5        |    9 |      9
+ mk        |    9 |      9
+ ms        |    9 |      9
+ n3        |    9 |      9
+ n4        |    9 |      9
+ n6        |    9 |      9
+ ne        |    9 |      9
+ v0        |    9 |      9
+ v3        |    9 |      9
+ v5        |    9 |      9
+ v8        |    9 |      9
+ b3        |    8 |      8
+ b7        |    8 |      8
+ c2        |    8 |      8
+ c7        |    8 |      8
+ c8        |    8 |      8
+ d9        |    8 |      8
+ k3        |    8 |      8
+ l3        |    8 |      8
+ m3        |    8 |      8
+ m4        |    8 |      8
+ n0        |    8 |      8
+ n5        |    8 |      8
+ v4        |    8 |      8
+ x4        |    8 |      8
+ z1        |    8 |      8
+ z9        |    8 |      8
+ l2        |    7 |      7
+ s2        |    7 |      7
+ z4        |    7 |      7
+ 1l        |    6 |      6
+ 1o        |    6 |      6
+ 1t        |    6 |      6
+ 2e        |    6 |      6
+ 2o        |    6 |      6
+ c4        |    6 |      6
+ h3        |    6 |      6
+ n2        |    6 |      6
+ n9        |    6 |      6
+ v2        |    6 |      6
+ 2l        |    5 |      5
+ 2u        |    5 |      5
+ 3k        |    5 |      5
+ 4p        |    5 |      5
+ 18        |    4 |      4
+ 1a        |    4 |      4
+ 1i        |    4 |      4
+ 2s        |    4 |      4
+ 3q        |    4 |      4
+ 3y        |    4 |      4
+ 5y        |    4 |      4
+ 1f        |    3 |      3
+ 1h        |    3 |      3
+ 1m        |    3 |      3
+ 1p        |    3 |      3
+ 1s        |    3 |      3
+ 1v        |    3 |      3
+ 1x        |    3 |      3
+ 27        |    3 |      3
+ 2a        |    3 |      3
+ 2b        |    3 |      3
+ 2h        |    3 |      3
+ 2n        |    3 |      3
+ 2p        |    3 |      3
+ 2v        |    3 |      3
+ 2y        |    3 |      3
+ 3d        |    3 |      3
+ 3w        |    3 |      3
+ 3z        |    3 |      3
+ 4a        |    3 |      3
+ 4d        |    3 |      3
+ 4v        |    3 |      3
+ 4z        |    3 |      3
+ 5e        |    3 |      3
+ 5i        |    3 |      3
+ 5k        |    3 |      3
+ 5o        |    3 |      3
+ 5t        |    3 |      3
+ 6b        |    3 |      3
+ 6d        |    3 |      3
+ 6o        |    3 |      3
+ 6w        |    3 |      3
+ 7a        |    3 |      3
+ 7h        |    3 |      3
+ 7r        |    3 |      3
+ 93        |    3 |      3
+ 10        |    2 |      2
+ 12        |    2 |      2
+ 15        |    2 |      2
+ 16        |    2 |      2
+ 19        |    2 |      2
+ 1b        |    2 |      2
+ 1d        |    2 |      2
+ 1g        |    2 |      2
+ 1j        |    2 |      2
+ 1n        |    2 |      2
+ 1r        |    2 |      2
+ 1u        |    2 |      2
+ 1w        |    2 |      2
+ 1y        |    2 |      2
+ 20        |    2 |      2
+ 25        |    2 |      2
+ 2d        |    2 |      2
+ 2i        |    2 |      2
+ 2j        |    2 |      2
+ 2k        |    2 |      2
+ 2q        |    2 |      2
+ 2r        |    2 |      2
+ 2t        |    2 |      2
+ 2w        |    2 |      2
+ 2z        |    2 |      2
+ 3b        |    2 |      2
+ 3f        |    2 |      2
+ 3h        |    2 |      2
+ 3o        |    2 |      2
+ 3p        |    2 |      2
+ 3r        |    2 |      2
+ 3s        |    2 |      2
+ 3v        |    2 |      2
+ 42        |    2 |      2
+ 43        |    2 |      2
+ 4f        |    2 |      2
+ 4g        |    2 |      2
+ 4h        |    2 |      2
+ 4j        |    2 |      2
+ 4m        |    2 |      2
+ 4r        |    2 |      2
+ 4s        |    2 |      2
+ 4t        |    2 |      2
+ 4u        |    2 |      2
+ 5c        |    2 |      2
+ 5f        |    2 |      2
+ 5h        |    2 |      2
+ 5p        |    2 |      2
+ 5q        |    2 |      2
+ 5z        |    2 |      2
+ 6a        |    2 |      2
+ 6h        |    2 |      2
+ 6q        |    2 |      2
+ 6r        |    2 |      2
+ 6t        |    2 |      2
+ 6y        |    2 |      2
+ 70        |    2 |      2
+ 7c        |    2 |      2
+ 7g        |    2 |      2
+ 7k        |    2 |      2
+ 7o        |    2 |      2
+ 7u        |    2 |      2
+ 8j        |    2 |      2
+ 8w        |    2 |      2
+ 9f        |    2 |      2
+ 9y        |    2 |      2
+ copyright |    2 |      2
+ foo       |    1 |      3
+ bar       |    1 |      2
+ 0e        |    1 |      1
+ 0h        |    1 |      1
+ 0p        |    1 |      1
+ 0w        |    1 |      1
+ 0z        |    1 |      1
+ 11        |    1 |      1
+ 13        |    1 |      1
+ 14        |    1 |      1
+ 17        |    1 |      1
+ 1k        |    1 |      1
+ 1q        |    1 |      1
+ 1z        |    1 |      1
+ 24        |    1 |      1
+ 26        |    1 |      1
+ 28        |    1 |      1
+ 2f        |    1 |      1
+ 30        |    1 |      1
+ 345       |    1 |      1
+ 37        |    1 |      1
+ 39        |    1 |      1
+ 3a        |    1 |      1
+ 3e        |    1 |      1
+ 3g        |    1 |      1
+ 3i        |    1 |      1
+ 3m        |    1 |      1
+ 3t        |    1 |      1
+ 3u        |    1 |      1
+ 40        |    1 |      1
+ 41        |    1 |      1
+ 44        |    1 |      1
+ 45        |    1 |      1
+ 48        |    1 |      1
+ 4b        |    1 |      1
+ 4c        |    1 |      1
+ 4i        |    1 |      1
+ 4k        |    1 |      1
+ 4n        |    1 |      1
+ 4o        |    1 |      1
+ 4q        |    1 |      1
+ 4w        |    1 |      1
+ 4y        |    1 |      1
+ 51        |    1 |      1
+ 55        |    1 |      1
+ 56        |    1 |      1
+ 5a        |    1 |      1
+ 5d        |    1 |      1
+ 5g        |    1 |      1
+ 5j        |    1 |      1
+ 5l        |    1 |      1
+ 5s        |    1 |      1
+ 5u        |    1 |      1
+ 5x        |    1 |      1
+ 64        |    1 |      1
+ 68        |    1 |      1
+ 6c        |    1 |      1
+ 6f        |    1 |      1
+ 6g        |    1 |      1
+ 6i        |    1 |      1
+ 6k        |    1 |      1
+ 6n        |    1 |      1
+ 6p        |    1 |      1
+ 6s        |    1 |      1
+ 6u        |    1 |      1
+ 6x        |    1 |      1
+ 72        |    1 |      1
+ 7f        |    1 |      1
+ 7j        |    1 |      1
+ 7n        |    1 |      1
+ 7p        |    1 |      1
+ 7w        |    1 |      1
+ 7y        |    1 |      1
+ 7z        |    1 |      1
+ 80        |    1 |      1
+ 82        |    1 |      1
+ 85        |    1 |      1
+ 8d        |    1 |      1
+ 8i        |    1 |      1
+ 8l        |    1 |      1
+ 8n        |    1 |      1
+ 8p        |    1 |      1
+ 8t        |    1 |      1
+ 8x        |    1 |      1
+ 95        |    1 |      1
+ 97        |    1 |      1
+ 9a        |    1 |      1
+ 9e        |    1 |      1
+ 9h        |    1 |      1
+ 9r        |    1 |      1
+ 9w        |    1 |      1
+ qwerti    |    1 |      1
+(1146 rows)
+
+select reset_tsearch();
+NOTICE:  TSearch cache cleaned
+ reset_tsearch 
+---------------
+ 
+(1 row)
+
+select to_tsquery('default', 'skies & books');
+   to_tsquery   
+----------------
+ 'sky' & 'book'
+(1 row)
+
+select rank_cd(to_tsvector('Erosion It took the sea a thousand years,
+A thousand years to trace
+The granite features of this cliff
+In crag and scarp and base.
+It took the sea an hour one night
+An hour of storm to place
+The sculpture of these granite seams,
+Upon a woman s face. E.  J.  Pratt  (1882 1964)
+'), to_tsquery('sea&thousand&years'));
+ rank_cd 
+---------
+     1.2
+(1 row)
+
+select rank_cd(to_tsvector('Erosion It took the sea a thousand years,
+A thousand years to trace
+The granite features of this cliff
+In crag and scarp and base.
+It took the sea an hour one night
+An hour of storm to place
+The sculpture of these granite seams,
+Upon a woman s face. E.  J.  Pratt  (1882 1964)
+'), to_tsquery('granite&sea'));
+ rank_cd  
+----------
+ 0.880303
+(1 row)
+
+select rank_cd(to_tsvector('Erosion It took the sea a thousand years,
+A thousand years to trace
+The granite features of this cliff
+In crag and scarp and base.
+It took the sea an hour one night
+An hour of storm to place
+The sculpture of these granite seams,
+Upon a woman s face. E.  J.  Pratt  (1882 1964)
+'), to_tsquery('sea'));
+ rank_cd 
+---------
+       2
+(1 row)
+
+select get_covers(to_tsvector('Erosion It took the sea a thousand years,
+A thousand years to trace
+The granite features of this cliff
+In crag and scarp and base.
+It took the sea an hour one night
+An hour of storm to place
+The sculpture of these granite seams,
+Upon a woman s face. E.  J.  Pratt  (1882 1964)
+'), to_tsquery('sea&thousand&years'));
+                                                                                             get_covers                                                                                             
+----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
+ eros took {1 sea thousand year }1 {2 thousand year trace granit featur cliff crag scarp base took sea }2 hour one night hour storm place sculptur granit seam upon woman face e j pratt 1882 1964 
+(1 row)
+
+select get_covers(to_tsvector('Erosion It took the sea a thousand years,
+A thousand years to trace
+The granite features of this cliff
+In crag and scarp and base.
+It took the sea an hour one night
+An hour of storm to place
+The sculpture of these granite seams,
+Upon a woman s face. E.  J.  Pratt  (1882 1964)
+'), to_tsquery('granite&sea'));
+                                                                                                get_covers                                                                                                
+----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
+ eros took {1 sea thousand year thousand year trace {2 granit }1 featur cliff crag scarp base took {3 sea }2 hour one night hour storm place sculptur granit }3 seam upon woman face e j pratt 1882 1964 
+(1 row)
+
+select get_covers(to_tsvector('Erosion It took the sea a thousand years,
+A thousand years to trace
+The granite features of this cliff
+In crag and scarp and base.
+It took the sea an hour one night
+An hour of storm to place
+The sculpture of these granite seams,
+Upon a woman s face. E.  J.  Pratt  (1882 1964)
+'), to_tsquery('sea'));
+                                                                                             get_covers                                                                                             
+----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
+ eros took {1 sea }1 thousand year thousand year trace granit featur cliff crag scarp base took {2 sea }2 hour one night hour storm place sculptur granit seam upon woman face e j pratt 1882 1964 
+(1 row)
+
+select headline('Erosion It took the sea a thousand years,
+A thousand years to trace
+The granite features of this cliff
+In crag and scarp and base.
+It took the sea an hour one night
+An hour of storm to place
+The sculpture of these granite seams,
+Upon a woman s face. E.  J.  Pratt  (1882 1964)
+', to_tsquery('sea&thousand&years'));
+                                                       headline                                                        
+-----------------------------------------------------------------------------------------------------------------------
+ sea a thousand years,
+A thousand years to trace
+The granite features of this cliff
+(1 row)
+
+ 
+select headline('Erosion It took the sea a thousand years,
+A thousand years to trace
+The granite features of this cliff
+In crag and scarp and base.
+It took the sea an hour one night
+An hour of storm to place
+The sculpture of these granite seams,
+Upon a woman s face. E.  J.  Pratt  (1882 1964)
+', to_tsquery('granite&sea'));
+                                           headline                                           
+----------------------------------------------------------------------------------------------
+ sea an hour one night
+An hour of storm to place
+The sculpture of these granite
+(1 row)
+
+ 
+select headline('Erosion It took the sea a thousand years,
+A thousand years to trace
+The granite features of this cliff
+In crag and scarp and base.
+It took the sea an hour one night
+An hour of storm to place
+The sculpture of these granite seams,
+Upon a woman s face. E.  J.  Pratt  (1882 1964)
+', to_tsquery('sea'));
+                                         headline                                          
+-------------------------------------------------------------------------------------------
+ sea a thousand years,
+A thousand years to trace
+The granite features of this cliff
+(1 row)
+


diff --git a/contrib/tsearch2/gendict/Makefile.IN b/contrib/tsearch2/gendict/Makefile.IN

new file mode 100644 (file)

index 0000000..c13e496


--- /dev/null
+++ b/contrib/tsearch2/gendict/Makefile.IN
@@ -0,0 +1,12 @@
+subdir = contrib/CFG_DIR
+top_builddir = ../..
+include $(top_builddir)/src/Makefile.global
+
+MODULE_big = dict_CFG_MODNAME
+OBJS = CFG_OFILE
+DATA_built = dict_CFG_MODNAME.sql
+DOCS = README.CFG_MODNAME
+PG_CPPFLAGS =
+SHLIB_LINK = ../tsearch2/libtsearch2.a
+
+include $(top_srcdir)/contrib/contrib-global.mk


diff --git a/contrib/tsearch2/gendict/README.gendict b/contrib/tsearch2/gendict/README.gendict

new file mode 100644 (file)

index 0000000..e91f1b7


--- /dev/null
+++ b/contrib/tsearch2/gendict/README.gendict
@@ -0,0 +1,130 @@
+Gendict - generate dictionary templates for contrib/tsearch2 module.
+
+This utility aims to help people creating dictionary for contrib/tsearch v2
+module. Particularly, it has built-in support for snowball stemmers.
+
+Programming API to tsearch2 dictionaries is described in tsearch v2 
+documentation.
+
+
+Prerequisities:
+
+* PostgreSQL 7.3 and above.
+
+* You need tsearch2 module sources already compiled
+
+* Rights to install contrib modules
+
+Usage:
+
+    run config.sh without parameters to see options and arguments
+
+Usage:
+./config.sh -n DICTNAME ( [ -s [ -p PREFIX ] ] | [ -c CFILES ] [ -h HFILES ] [ -i ] ) [ -v ] [ -d DIR ] [ -C COMMENT ]
+    -v - be verbose
+    -d DIR - name of directory in PGSQL_SRC/contrib (default dict_DICTNAME)
+    -C COMMENT - dictionary comment
+Generate Snowball stemmer:
+./config.sh -n DICTNAME -s [ -p PREFIX ] [ -v ] [ -d DIR ] [ -C COMMENT ]
+    -s - generate Snowball wrapper
+    -p - prefix of Snowball's function, (default DICTNAME)
+Generate template dictionary:
+./config.sh -n DICTNAME [ -c CFILES ] [ -h HFILES ] [ -i ] [ -v ] [ -d DIR ] [ -C COMMENT ]
+    -c CFILES - source files, must be placed in contrib/tsearch2/gendict directory.
+                These files will be used in Makefile.
+    -h HFILES - header files, must be placed in contrib/tsearch2/gendict directory.
+                These files will be used in Makefile and subinclude.h
+    -i - dictionary has init method
+
+
+Example 1:
+
+   Create Portuguese stemmer
+ 
+   0. cd PGSQL_SRC/contrib/tsearch2/gendict
+
+   1. Obtain stem.{c,h} files for Portuguese
+
+      wget http://snowball.tartarus.org/portuguese/stem.c
+      wget http://snowball.tartarus.org/portuguese/stem.h
+   
+   2. Create template files for Portuguese
+
+      ./config.sh -n pt -s -p portuguese -v -C'Snowball stemmer for Portuguese'
+
+      Note, that argument for -p option should be *the same* as name of stemming
+      function in stem.c (without _stem)
+
+      A bunch of files will be generated and placed in PGSQL_SRC/contrib/dict_pt
+      directory.
+
+   3. Compile and install dictionary
+
+   cd PGSQL_SRC/contrib/dict_pt
+   make
+   make install
+
+   4. Test it 
+
+   Sample portuguese words with the stemmed forms are available
+        from http://snowball.tartarus.org/portuguese/stemmer.html
+
+   createdb testdict
+   psql testdict < /usr/local/pgsql/share/contrib/tsearch2.sql
+   psql testdict < /usr/local/pgsql/share/contrib/dict_pt.sql
+   psql -d testdict -c "select lexize('pt','bobagem');"
+    lexize  
+   ---------
+    {bobag}
+   (1 row)
+
+   Here is what I have in pg_ts_dict table
+
+   psql -d testdict -c "select * from pg_ts_dict where dict_name='pt';"
+    dict_name | dict_init | dict_initoption | dict_lexize |          dict_comment           
+   -----------+-----------+-----------------+-------------+---------------------------------
+    pt        |   7177806 |                 |     7159330 | Snowball stemmer for Portuguese
+   (1 row)
+
+ 
+        Note, that you have already installed dictionary and corresponding
+   entry in tsearch configuration and you may modify it using
+   plain SQL commands, for example, specify stop words.
+
+Example 2:
+
+      a) Simple template dictionary with init method 
+
+       ./config.sh -n wow -v -i -C WOW
+
+      b) Create simple template dict (without init method):
+   ./config.sh -n wow -v  -C WOW
+
+        The same as above, but dictionary will have not init method
+
+       Dictionaries obtained in a) and b) are fully working and ready
+       for use: 
+     a) lowercase input word and remove it if it is a stop word
+     b) recognizes any word
+
+      c) Simple template dictionary with source files (with init method):
+
+       ./config.sh -n wow -v -i -c a.c -h a.h -C WOW
+
+        Source files ( a.c ) must be placed in contrib/tsearch2/gendict directory.
+        These files will be used in Makefile.
+
+        Header files ( a.h ), must be placed in contrib/tsearch2/gendict directory.
+        These files will be used in Makefile and subinclude.h
+
+      d) Simple template dictionary with source files (without init method):
+
+   ./config.sh -n wow -v  -c a.c -h a.h -C WOW
+
+   The same as above, but dictionary will have not init method
+
+       After that you have sources in PGSQL_SRC/contrib/dict_wow and
+       you may edit them to create actual dictionary.
+
+  Please, check Tsearch2 home page (http://www.sai.msu.su/~megera/postgres/gist/tsearch/V2/)
+  for additional information about "Gendict tutorial" and dictionaries.
\ No newline at end of file


diff --git a/contrib/tsearch2/gendict/config.sh b/contrib/tsearch2/gendict/config.sh

new file mode 100755 (executable)

index 0000000..26bb542


--- /dev/null
+++ b/contrib/tsearch2/gendict/config.sh
@@ -0,0 +1,183 @@
+#!/bin/sh
+
+usage () {
+   echo Usage:
+   echo $0 -n DICTNAME  \( [ -s [ -p PREFIX ] ] \| [ -c CFILES ] [ -h HFILES ] [ -i ] \) [ -v ] [ -d DIR ] [ -C COMMENT ]
+   echo '    -v - be verbose'
+   echo '    -d DIR - name of directory in PGSQL_SRL/contrib (default dict_DICTNAME)'
+   echo '    -C COMMENT - dictionary comment' 
+   echo Generate Snowball stemmer:
+   echo $0 -n DICTNAME -s [ -p PREFIX ] [ -v ] [ -d DIR ] [ -C COMMENT ]
+   echo '    -s - generate Snowball wrapper'
+   echo "    -p - prefix of Snowball's function, (default DICTNAME)" 
+   echo Generate template dictionary:
+   echo $0 -n DICTNAME [ -c CFILES ] [ -h HFILES ] [ -i ] [ -v ] [ -d DIR ] [ -C COMMENT ]
+   echo '    -c CFILES - source files, must be placed in contrib/tsearch2/gendict directory.'
+   echo '                These files will be used in Makefile.'
+   echo '    -h HFILES - header files, must be placed in contrib/tsearch2/gendict directory.'
+   echo '                These files will be used in Makefile and subinclude.h'
+   echo '    -i - dictionary has init method'
+   exit 1;
+}
+
+dictname=
+stemmode=no
+verbose=no
+cfile=
+hfile=
+dir= 
+hasinit=no
+comment=
+prefix=
+
+while getopts n:c:C:h:d:p:vis opt
+do
+   case "$opt" in
+       v) verbose=yes;;
+       s) stemmode=yes;;
+       i) hasinit=yes;;
+       n) dictname="$OPTARG";;
+       c) cfile="$OPTARG";;
+       h) hfile="$OPTARG";;
+       d) dir="$OPTARG";;
+       C) comment="$OPTARG";;
+       p) prefix="$OPTARG";;
+       \?) usage;;
+   esac
+done
+
+[ ${#dictname} -eq 0 ] && usage
+
+dictname=`echo $dictname | tr '[:upper:]' '[:lower:]'`
+
+if [ $stemmode = "yes" ] ; then 
+   [ ${#prefix} -eq 0 ] && prefix=$dictname
+   hasinit=yes
+   cfile="stem.c"
+   hfile="stem.h"
+fi 
+
+[ ${#dir}   -eq 0 ] && dir="dict_$dictname"
+
+if [ ${#comment} -eq 0 ]; then
+   comment=null
+else
+   comment="'$comment'"
+fi
+
+ofile=
+for f in $cfile
+do
+   f=` echo $f | sed 's#c$#o#'`
+   ofile="$ofile $f"
+done
+
+if [ $stemmode = "yes" ] ; then
+   ofile="$ofile dict_snowball.o"
+else
+   ofile="$ofile dict_tmpl.o"
+fi
+
+if [ $verbose = "yes" ]; then
+   echo Dictname: "'"$dictname"'"
+   echo Snowball stemmer: $stemmode
+   echo Has init method: $hasinit
+   [ $stemmode = "yes" ] && echo Function prefix: $prefix 
+   echo Source files: $cfile
+   echo Header files: $hfile
+   echo Object files: $ofile
+   echo Comment: $comment
+   echo Directory: ../../$dir
+fi
+
+
+[ $verbose = "yes" ] && echo -n 'Build directory...  '
+if [ ! -d ../../$dir ]; then
+   if ! mkdir ../../$dir ; then 
+       echo "Can't create directory ../../$dir"
+       exit 1
+   fi 
+fi
+[ $verbose = "yes" ] && echo ok
+
+
+[ $verbose = "yes" ] && echo -n 'Build Makefile...  '
+sed s#CFG_DIR#$dir# < Makefile.IN | sed s#CFG_MODNAME#$dictname# | sed "s#CFG_OFILE#$ofile#" > ../../$dir/Makefile.tmp
+if [ $stemmode = "yes" ] ; then
+   sed "s#^PG_CPPFLAGS.*\$#PG_CPPFLAGS = -I../tsearch2/snowball -I../tsearch2#" < ../../$dir/Makefile.tmp >  ../../$dir/Makefile 
+else
+   sed "s#^PG_CPPFLAGS.*\$#PG_CPPFLAGS = -I../tsearch2#" < ../../$dir/Makefile.tmp >  ../../$dir/Makefile 
+fi
+rm ../../$dir/Makefile.tmp
+[ $verbose = "yes" ] && echo ok
+
+
+[ $verbose = "yes" ] && echo -n Build dict_$dictname'.sql.in...  '
+if [ $hasinit = "yes" ]; then
+   sed s#CFG_MODNAME#$dictname# < sql.IN | sed "s#CFG_COMMENT#$comment#" | sed s#^HASINIT## | sed 's#^NOINIT.*$##' > ../../$dir/dict_$dictname.sql.in.tmp
+   if [ $stemmode = "yes" ] ; then
+       sed s#^ISSNOWBALL## < ../../$dir/dict_$dictname.sql.in.tmp | sed s#^NOSNOWBALL.*\$## > ../../$dir/dict_$dictname.sql.in
+   else
+       sed s#^NOSNOWBALL## < ../../$dir/dict_$dictname.sql.in.tmp | sed s#^ISSNOWBALL.*\$## > ../../$dir/dict_$dictname.sql.in
+   fi
+   rm ../../$dir/dict_$dictname.sql.in.tmp 
+else 
+   sed s#CFG_MODNAME#$dictname# < sql.IN | sed "s#CFG_COMMENT#$comment#" | sed s#^NOINIT## | sed 's#^HASINIT.*$##' | sed s#^NOSNOWBALL## | sed s#^ISSNOWBALL.*\$## > ../../$dir/dict_$dictname.sql.in
+fi
+[ $verbose = "yes" ] && echo ok
+
+
+
+if [ ${#cfile} -ne 0 ] || [ ${#hfile} -ne 0 ] ; then
+   [ $verbose = "yes" ] && echo -n 'Copy source and header files...  '
+   if [ ${#cfile} -ne 0 ] ; then
+       if ! cp $cfile ../../$dir ; then 
+           echo "Cant cp all or one of files: $cfile"
+           exit 1
+       fi
+   fi
+   if [ ${#hfile} -ne 0 ] ; then 
+       if ! cp $hfile ../../$dir ; then 
+               echo "Cant cp all or one of files: $hfile"
+           exit 1
+       fi
+   fi
+   [ $verbose = "yes" ] && echo ok
+fi
+
+
+[ $verbose = "yes" ] && echo -n 'Build sub-include header...  '
+echo -n > ../../$dir/subinclude.h 
+for i in $hfile
+do
+   echo "#include \"$i\"" >> ../../$dir/subinclude.h
+done
+[ $verbose = "yes" ] && echo ok
+
+
+if  [ $stemmode = "yes" ] ; then 
+   [ $verbose = "yes" ] && echo -n 'Build Snowball stemmer...  '
+   sed s#CFG_MODNAME#$dictname#g < dict_snowball.c.IN | sed s#CFG_PREFIX#$prefix#g > ../../$dir/dict_snowball.c
+else
+   [ $verbose = "yes" ] && echo -n 'Build dictinonary...  '
+   sed s#CFG_MODNAME#$dictname#g < dict_tmpl.c.IN > ../../$dir/dict_tmpl.c.tmp
+   if [ $hasinit = "yes" ]; then
+       sed s#^HASINIT## <  ../../$dir/dict_tmpl.c.tmp | sed 's#^NOINIT.*$##' > ../../$dir/dict_tmpl.c
+   else 
+       sed s#^HASINIT.*\$## <  ../../$dir/dict_tmpl.c.tmp | sed 's#^NOINIT##' > ../../$dir/dict_tmpl.c
+   fi
+   rm ../../$dir/dict_tmpl.c.tmp
+fi 
+[ $verbose = "yes" ] && echo ok
+
+
+[ $verbose = "yes" ] && echo -n "Build README.$dictname...  "
+if  [ $stemmode = "yes" ] ; then
+   echo "Autogenerated Snowball's wrapper for $prefix" > ../../$dir/README.$dictname
+else
+   echo "Autogenerated template for $dictname" > ../../$dir/README.$dictname
+fi
+[ $verbose = "yes" ] && echo ok
+
+echo All is done
+


diff --git a/contrib/tsearch2/gendict/dict_snowball.c.IN b/contrib/tsearch2/gendict/dict_snowball.c.IN

new file mode 100644 (file)

index 0000000..10ef6f1


--- /dev/null
+++ b/contrib/tsearch2/gendict/dict_snowball.c.IN
@@ -0,0 +1,52 @@
+/* 
+ * example of Snowball dictionary
+ * http://snowball.tartarus.org/ 
+ * Teodor Sigaev 
+ */
+#include 
+#include 
+
+#include "postgres.h"
+
+#include "dict.h"
+#include "common.h"
+#include "snowball/header.h"
+#include "subinclude.h"
+
+typedef struct {
+   struct SN_env *z;
+   StopList    stoplist;
+   int (*stem)(struct SN_env * z);
+} DictSnowball;
+
+
+PG_FUNCTION_INFO_V1(dinit_CFG_MODNAME);
+Datum dinit_CFG_MODNAME(PG_FUNCTION_ARGS);
+
+Datum 
+dinit_CFG_MODNAME(PG_FUNCTION_ARGS) {
+   DictSnowball    *d = (DictSnowball*)malloc( sizeof(DictSnowball) );
+
+   if ( !d )
+       elog(ERROR, "No memory");
+   memset(d,0,sizeof(DictSnowball));
+   d->stoplist.wordop=lowerstr;
+       
+   if ( !PG_ARGISNULL(0) && PG_GETARG_POINTER(0)!=NULL ) {
+       text       *in = PG_GETARG_TEXT_P(0);
+       readstoplist(in, &(d->stoplist));
+       sortstoplist(&(d->stoplist));
+       PG_FREE_IF_COPY(in, 0);
+   }
+
+   d->z = CFG_PREFIX_create_env();
+   if (!d->z) {
+       freestoplist(&(d->stoplist));
+       elog(ERROR,"No memory");
+   }
+   d->stem=CFG_PREFIX_stem;
+
+   PG_RETURN_POINTER(d);
+}
+
+


diff --git a/contrib/tsearch2/gendict/dict_tmpl.c.IN b/contrib/tsearch2/gendict/dict_tmpl.c.IN

new file mode 100644 (file)

index 0000000..10c0381


--- /dev/null
+++ b/contrib/tsearch2/gendict/dict_tmpl.c.IN
@@ -0,0 +1,64 @@
+/* 
+ * example of dictionary 
+ * Teodor Sigaev 
+ */
+#include 
+#include 
+#include 
+
+#include "postgres.h"
+
+#include "dict.h"
+#include "common.h"
+
+#include "subinclude.h"
+
+HASINIT typedef struct {
+HASINIT    StopList    stoplist;
+HASINIT } DictExample;
+
+
+HASINIT PG_FUNCTION_INFO_V1(dinit_CFG_MODNAME);
+HASINIT Datum dinit_CFG_MODNAME(PG_FUNCTION_ARGS);
+
+HASINIT Datum 
+HASINIT dinit_CFG_MODNAME(PG_FUNCTION_ARGS) {
+HASINIT    DictExample *d = (DictExample*)malloc( sizeof(DictExample) );
+HASINIT 
+HASINIT    if ( !d )
+HASINIT        elog(ERROR, "No memory");
+HASINIT    memset(d,0,sizeof(DictExample));
+HASINIT 
+HASINIT    d->stoplist.wordop=lowerstr;
+HASINIT    
+HASINIT    /* Your INIT code */
+HASINIT    
+HASINIT    if ( !PG_ARGISNULL(0) && PG_GETARG_POINTER(0)!=NULL ) {
+HASINIT        text       *in = PG_GETARG_TEXT_P(0);
+HASINIT        readstoplist(in, &(d->stoplist));
+HASINIT        sortstoplist(&(d->stoplist));
+HASINIT        PG_FREE_IF_COPY(in, 0);
+HASINIT    }
+HASINIT 
+HASINIT    PG_RETURN_POINTER(d);
+HASINIT }
+
+PG_FUNCTION_INFO_V1(dlexize_CFG_MODNAME);
+Datum dlexize_CFG_MODNAME(PG_FUNCTION_ARGS);
+Datum
+dlexize_CFG_MODNAME(PG_FUNCTION_ARGS) {
+HASINIT    DictExample *d = (DictExample*)PG_GETARG_POINTER(0);
+   char       *in = (char*)PG_GETARG_POINTER(1);
+   char *txt = pnstrdup(in, PG_GETARG_INT32(2));
+   char    **res=palloc(sizeof(char*)*2);
+
+   /* Your INIT dictionary code */
+HASINIT    if ( *txt=='\0' || searchstoplist(&(d->stoplist),txt) ) {
+HASINIT        pfree(txt);
+HASINIT        res[0]=NULL;
+HASINIT    } else 
+       res[0]=txt;
+   res[1]=NULL;
+
+   PG_RETURN_POINTER(res);
+}


diff --git a/contrib/tsearch2/gendict/sql.IN b/contrib/tsearch2/gendict/sql.IN

new file mode 100644 (file)

index 0000000..ff0d842


--- /dev/null
+++ b/contrib/tsearch2/gendict/sql.IN
@@ -0,0 +1,26 @@
+SET search_path = public;
+BEGIN;
+
+HASINIT create function dinit_CFG_MODNAME(text)
+HASINIT         returns internal
+HASINIT         as 'MODULE_PATHNAME'
+HASINIT         language 'C';
+
+NOSNOWBALL create function dlexize_CFG_MODNAME(internal,internal,int4)
+NOSNOWBALL        returns internal
+NOSNOWBALL        as 'MODULE_PATHNAME'
+NOSNOWBALL        language 'C'
+NOSNOWBALL        with (isstrict);
+
+insert into pg_ts_dict select
+        'CFG_MODNAME',
+HASINIT        (select oid from pg_proc where proname='dinit_CFG_MODNAME'),
+NOINIT        null,
+        null,
+ISSNOWBALL        (select oid from pg_proc where proname='snb_lexize'),
+NOSNOWBALL        (select oid from pg_proc where proname='dlexize_CFG_MODNAME'),
+        CFG_COMMENT
+;
+
+
+END;


diff --git a/contrib/tsearch2/gistidx.c b/contrib/tsearch2/gistidx.c

new file mode 100644 (file)

index 0000000..5a34f74


--- /dev/null
+++ b/contrib/tsearch2/gistidx.c
@@ -0,0 +1,686 @@
+#include "postgres.h"
+
+#include 
+
+#include "access/gist.h"
+#include "access/itup.h"
+#include "access/rtree.h"
+#include "utils/elog.h"
+#include "utils/palloc.h"
+#include "utils/array.h"
+#include "utils/builtins.h"
+#include "storage/bufpage.h"
+#include "access/tuptoaster.h"
+
+#include "tsvector.h"
+#include "query.h"
+#include "gistidx.h"
+#include "crc32.h"
+
+PG_FUNCTION_INFO_V1(gtsvector_in);
+Datum      gtsvector_in(PG_FUNCTION_ARGS);
+
+PG_FUNCTION_INFO_V1(gtsvector_out);
+Datum      gtsvector_out(PG_FUNCTION_ARGS);
+
+PG_FUNCTION_INFO_V1(gtsvector_compress);
+Datum      gtsvector_compress(PG_FUNCTION_ARGS);
+
+PG_FUNCTION_INFO_V1(gtsvector_decompress);
+Datum      gtsvector_decompress(PG_FUNCTION_ARGS);
+
+PG_FUNCTION_INFO_V1(gtsvector_consistent);
+Datum      gtsvector_consistent(PG_FUNCTION_ARGS);
+
+PG_FUNCTION_INFO_V1(gtsvector_union);
+Datum      gtsvector_union(PG_FUNCTION_ARGS);
+
+PG_FUNCTION_INFO_V1(gtsvector_same);
+Datum      gtsvector_same(PG_FUNCTION_ARGS);
+
+PG_FUNCTION_INFO_V1(gtsvector_penalty);
+Datum      gtsvector_penalty(PG_FUNCTION_ARGS);
+
+PG_FUNCTION_INFO_V1(gtsvector_picksplit);
+Datum      gtsvector_picksplit(PG_FUNCTION_ARGS);
+
+#define GETENTRY(vec,pos) ((GISTTYPE *) DatumGetPointer(((GISTENTRY *) VARDATA(vec))[(pos)].key))
+#define SUMBIT(val) (       \
+   GETBITBYTE(val,0) + \
+   GETBITBYTE(val,1) + \
+   GETBITBYTE(val,2) + \
+   GETBITBYTE(val,3) + \
+   GETBITBYTE(val,4) + \
+   GETBITBYTE(val,5) + \
+   GETBITBYTE(val,6) + \
+   GETBITBYTE(val,7)   \
+)
+
+
+Datum
+gtsvector_in(PG_FUNCTION_ARGS)
+{
+   elog(ERROR, "Not implemented");
+   PG_RETURN_DATUM(0);
+}
+
+Datum
+gtsvector_out(PG_FUNCTION_ARGS)
+{
+   elog(ERROR, "Not implemented");
+   PG_RETURN_DATUM(0);
+}
+
+static int
+compareint(const void *a, const void *b)
+{
+   if (*((int4 *) a) == *((int4 *) b))
+       return 0;
+   return (*((int4 *) a) > *((int4 *) b)) ? 1 : -1;
+}
+
+static int
+uniqueint(int4 *a, int4 l)
+{
+   int4       *ptr,
+              *res;
+
+   if (l == 1)
+       return l;
+
+   ptr = res = a;
+
+   qsort((void *) a, l, sizeof(int4), compareint);
+
+   while (ptr - a < l)
+       if (*ptr != *res)
+           *(++res) = *ptr++;
+       else
+           ptr++;
+   return res + 1 - a;
+}
+
+static void
+makesign(BITVECP sign, GISTTYPE * a)
+{
+   int4        k,
+               len = ARRNELEM(a);
+   int4       *ptr = GETARR(a);
+
+   MemSet((void *) sign, 0, sizeof(BITVEC));
+   for (k = 0; k < len; k++)
+       HASH(sign, ptr[k]);
+}
+
+Datum
+gtsvector_compress(PG_FUNCTION_ARGS)
+{
+   GISTENTRY  *entry = (GISTENTRY *) PG_GETARG_POINTER(0);
+   GISTENTRY  *retval = entry;
+
+   if (entry->leafkey)
+   {                           /* tsvector */
+       GISTTYPE   *res;
+       tsvector       *toastedval = (tsvector *) DatumGetPointer(entry->key);
+       tsvector       *val = (tsvector *) DatumGetPointer(PG_DETOAST_DATUM(entry->key));
+       int4        len;
+       int4       *arr;
+       WordEntry  *ptr = ARRPTR(val);
+       char       *words = STRPTR(val);
+
+       len = CALCGTSIZE(ARRKEY, val->size);
+       res = (GISTTYPE *) palloc(len);
+       res->len = len;
+       res->flag = ARRKEY;
+       arr = GETARR(res);
+       len = val->size;
+       while (len--)
+       {
+           *arr = crc32_sz((uint8 *) &words[ptr->pos], ptr->len);
+           arr++;
+           ptr++;
+       }
+
+       len = uniqueint(GETARR(res), val->size);
+       if (len != val->size)
+       {
+           /*
+            * there is a collision of hash-function; len is always less
+            * than val->size
+            */
+           len = CALCGTSIZE(ARRKEY, len);
+           res = (GISTTYPE *) repalloc((void *) res, len);
+           res->len = len;
+       }
+       if (val != toastedval)
+           pfree(val);
+
+       /* make signature, if array is too long */
+       if (res->len > TOAST_INDEX_TARGET)
+       {
+           GISTTYPE   *ressign;
+
+           len = CALCGTSIZE(SIGNKEY, 0);
+           ressign = (GISTTYPE *) palloc(len);
+           ressign->len = len;
+           ressign->flag = SIGNKEY;
+           makesign(GETSIGN(ressign), res);
+           pfree(res);
+           res = ressign;
+       }
+
+       retval = (GISTENTRY *) palloc(sizeof(GISTENTRY));
+       gistentryinit(*retval, PointerGetDatum(res),
+                     entry->rel, entry->page,
+                     entry->offset, res->len, FALSE);
+   }
+   else if (ISSIGNKEY(DatumGetPointer(entry->key)) &&
+            !ISALLTRUE(DatumGetPointer(entry->key)))
+   {
+       int4        i,
+                   len;
+       GISTTYPE   *res;
+       BITVECP     sign = GETSIGN(DatumGetPointer(entry->key));
+
+       LOOPBYTE(
+                if ((sign[i] & 0xff) != 0xff)
+                PG_RETURN_POINTER(retval);
+       );
+
+       len = CALCGTSIZE(SIGNKEY | ALLISTRUE, 0);
+       res = (GISTTYPE *) palloc(len);
+       res->len = len;
+       res->flag = SIGNKEY | ALLISTRUE;
+
+       retval = (GISTENTRY *) palloc(sizeof(GISTENTRY));
+       gistentryinit(*retval, PointerGetDatum(res),
+                     entry->rel, entry->page,
+                     entry->offset, res->len, FALSE);
+   }
+   PG_RETURN_POINTER(retval);
+}
+
+Datum
+gtsvector_decompress(PG_FUNCTION_ARGS)
+{
+   GISTENTRY  *entry = (GISTENTRY *) PG_GETARG_POINTER(0);
+   GISTTYPE   *key = (GISTTYPE *) DatumGetPointer(PG_DETOAST_DATUM(entry->key));
+
+   if (key != (GISTTYPE *) DatumGetPointer(entry->key))
+   {
+       GISTENTRY  *retval = (GISTENTRY *) palloc(sizeof(GISTENTRY));
+
+       gistentryinit(*retval, PointerGetDatum(key),
+                     entry->rel, entry->page,
+                     entry->offset, key->len, FALSE);
+
+       PG_RETURN_POINTER(retval);
+   }
+
+   PG_RETURN_POINTER(entry);
+}
+
+typedef struct
+{
+   int4       *arrb;
+   int4       *arre;
+}  CHKVAL;
+
+/*
+ * is there value 'val' in array or not ?
+ */
+static bool
+checkcondition_arr(void *checkval, ITEM * val)
+{
+   int4       *StopLow = ((CHKVAL *) checkval)->arrb;
+   int4       *StopHigh = ((CHKVAL *) checkval)->arre;
+   int4       *StopMiddle;
+
+   /* Loop invariant: StopLow <= val < StopHigh */
+
+   while (StopLow < StopHigh)
+   {
+       StopMiddle = StopLow + (StopHigh - StopLow) / 2;
+       if (*StopMiddle == val->val)
+           return (true);
+       else if (*StopMiddle < val->val)
+           StopLow = StopMiddle + 1;
+       else
+           StopHigh = StopMiddle;
+   }
+
+   return (false);
+}
+
+static bool
+checkcondition_bit(void *checkval, ITEM * val)
+{
+   return GETBIT(checkval, HASHVAL(val->val));
+}
+
+Datum
+gtsvector_consistent(PG_FUNCTION_ARGS)
+{
+   QUERYTYPE  *query = (QUERYTYPE *) PG_GETARG_POINTER(1);
+   GISTTYPE   *key = (GISTTYPE *) DatumGetPointer(
+                               ((GISTENTRY *) PG_GETARG_POINTER(0))->key
+   );
+
+   if (!query->size)
+       PG_RETURN_BOOL(false);
+
+   if (ISSIGNKEY(key))
+   {
+       if (ISALLTRUE(key))
+           PG_RETURN_BOOL(true);
+
+       PG_RETURN_BOOL(TS_execute(
+                              GETQUERY(query),
+                              (void *) GETSIGN(key), false,
+                              checkcondition_bit
+                              ));
+   }
+   else
+   {                           /* only leaf pages */
+       CHKVAL      chkval;
+
+       chkval.arrb = GETARR(key);
+       chkval.arre = chkval.arrb + ARRNELEM(key);
+       PG_RETURN_BOOL(TS_execute(
+                              GETQUERY(query),
+                              (void *) &chkval, true,
+                              checkcondition_arr
+                              ));
+   }
+}
+
+static int4
+unionkey(BITVECP sbase, GISTTYPE * add)
+{
+   int4        i;
+
+   if (ISSIGNKEY(add))
+   {
+       BITVECP     sadd = GETSIGN(add);
+
+       if (ISALLTRUE(add))
+           return 1;
+
+       LOOPBYTE(
+                sbase[i] |= sadd[i];
+       );
+   }
+   else
+   {
+       int4       *ptr = GETARR(add);
+
+       for (i = 0; i < ARRNELEM(add); i++)
+           HASH(sbase, ptr[i]);
+   }
+   return 0;
+}
+
+
+Datum
+gtsvector_union(PG_FUNCTION_ARGS)
+{
+   bytea      *entryvec = (bytea *) PG_GETARG_POINTER(0);
+   int        *size = (int *) PG_GETARG_POINTER(1);
+   BITVEC      base;
+   int4        len = (VARSIZE(entryvec) - VARHDRSZ) / sizeof(GISTENTRY);
+   int4        i;
+   int4        flag = 0;
+   GISTTYPE   *result;
+
+   MemSet((void *) base, 0, sizeof(BITVEC));
+   for (i = 0; i < len; i++)
+   {
+       if (unionkey(base, GETENTRY(entryvec, i)))
+       {
+           flag = ALLISTRUE;
+           break;
+       }
+   }
+
+   flag |= SIGNKEY;
+   len = CALCGTSIZE(flag, 0);
+   result = (GISTTYPE *) palloc(len);
+   *size = result->len = len;
+   result->flag = flag;
+   if (!ISALLTRUE(result))
+       memcpy((void *) GETSIGN(result), (void *) base, sizeof(BITVEC));
+
+   PG_RETURN_POINTER(result);
+}
+
+Datum
+gtsvector_same(PG_FUNCTION_ARGS)
+{
+   GISTTYPE   *a = (GISTTYPE *) PG_GETARG_POINTER(0);
+   GISTTYPE   *b = (GISTTYPE *) PG_GETARG_POINTER(1);
+   bool       *result = (bool *) PG_GETARG_POINTER(2);
+
+   if (ISSIGNKEY(a))
+   {                           /* then b also ISSIGNKEY */
+       if (ISALLTRUE(a) && ISALLTRUE(b))
+           *result = true;
+       else if (ISALLTRUE(a))
+           *result = false;
+       else if (ISALLTRUE(b))
+           *result = false;
+       else
+       {
+           int4        i;
+           BITVECP     sa = GETSIGN(a),
+                       sb = GETSIGN(b);
+
+           *result = true;
+           LOOPBYTE(
+                    if (sa[i] != sb[i])
+                    {
+               *result = false;
+               break;
+           }
+           );
+       }
+   }
+   else
+   {                           /* a and b ISARRKEY */
+       int4        lena = ARRNELEM(a),
+                   lenb = ARRNELEM(b);
+
+       if (lena != lenb)
+           *result = false;
+       else
+       {
+           int4       *ptra = GETARR(a),
+                      *ptrb = GETARR(b);
+           int4        i;
+
+           *result = true;
+           for (i = 0; i < lena; i++)
+               if (ptra[i] != ptrb[i])
+               {
+                   *result = false;
+                   break;
+               }
+       }
+   }
+
+   PG_RETURN_POINTER(result);
+}
+
+static int4
+sizebitvec(BITVECP sign)
+{
+   int4        size = 0,
+               i;
+
+   LOOPBYTE(
+       size += SUMBIT(*(char *) sign);
+       sign = (BITVECP) (((char *) sign) + 1);
+   );
+   return size;
+}
+
+static int
+hemdistsign(BITVECP  a, BITVECP b) {
+   int i,dist=0;
+
+   LOOPBIT(
+       if ( GETBIT(a,i) != GETBIT(b,i) )
+           dist++;
+   );
+   return dist;
+}
+
+static int
+hemdist(GISTTYPE   *a, GISTTYPE   *b) {
+   if ( ISALLTRUE(a) ) {
+       if (ISALLTRUE(b))
+           return 0;
+       else
+           return SIGLENBIT-sizebitvec(GETSIGN(b));
+   } else if (ISALLTRUE(b))
+       return SIGLENBIT-sizebitvec(GETSIGN(a));
+
+   return hemdistsign( GETSIGN(a), GETSIGN(b) );
+}
+
+Datum
+gtsvector_penalty(PG_FUNCTION_ARGS)
+{
+   GISTENTRY  *origentry = (GISTENTRY *) PG_GETARG_POINTER(0); /* always ISSIGNKEY */
+   GISTENTRY  *newentry = (GISTENTRY *) PG_GETARG_POINTER(1);
+   float      *penalty = (float *) PG_GETARG_POINTER(2);
+   GISTTYPE   *origval = (GISTTYPE *) DatumGetPointer(origentry->key);
+   GISTTYPE   *newval = (GISTTYPE *) DatumGetPointer(newentry->key);
+   BITVECP     orig = GETSIGN(origval);
+
+   *penalty = 0.0;
+
+   if (ISARRKEY(newval)) {
+       BITVEC sign;
+       makesign(sign, newval);
+
+       if ( ISALLTRUE(origval) ) 
+           *penalty=((float)(SIGLENBIT-sizebitvec(sign)))/(float)(SIGLENBIT+1);
+       else 
+           *penalty=hemdistsign(sign,orig);
+   } else {
+       *penalty=hemdist(origval,newval);
+   }
+   PG_RETURN_POINTER(penalty);
+}
+
+typedef struct
+{
+   bool        allistrue;
+   BITVEC      sign;
+}  CACHESIGN;
+
+static void
+fillcache(CACHESIGN * item, GISTTYPE * key)
+{
+   item->allistrue = false;
+   if (ISARRKEY(key))
+       makesign(item->sign, key);
+   else if (ISALLTRUE(key))
+       item->allistrue = true;
+   else
+       memcpy((void *) item->sign, (void *) GETSIGN(key), sizeof(BITVEC));
+}
+
+#define WISH_F(a,b,c) (double)( -(double)(((a)-(b))*((a)-(b))*((a)-(b)))*(c) )
+typedef struct
+{
+   OffsetNumber pos;
+   int4        cost;
+} SPLITCOST;
+
+static int
+comparecost(const void *a, const void *b)
+{
+   if (((SPLITCOST *) a)->cost == ((SPLITCOST *) b)->cost)
+       return 0;
+   else
+       return (((SPLITCOST *) a)->cost > ((SPLITCOST *) b)->cost) ? 1 : -1;
+}
+
+
+static int
+hemdistcache(CACHESIGN   *a, CACHESIGN   *b) {
+   if ( a->allistrue ) {
+       if (b->allistrue)
+           return 0;
+       else
+           return SIGLENBIT-sizebitvec(b->sign);
+   } else if (b->allistrue)
+       return SIGLENBIT-sizebitvec(a->sign);
+
+   return hemdistsign( a->sign, b->sign );
+}
+
+Datum
+gtsvector_picksplit(PG_FUNCTION_ARGS)
+{
+   bytea      *entryvec = (bytea *) PG_GETARG_POINTER(0);
+   GIST_SPLITVEC *v = (GIST_SPLITVEC *) PG_GETARG_POINTER(1);
+   OffsetNumber k,
+               j;
+   GISTTYPE   *datum_l,
+              *datum_r;
+   BITVECP     union_l,
+               union_r;
+   int4        size_alpha,
+               size_beta;
+   int4        size_waste,
+               waste = -1;
+   int4        nbytes;
+   OffsetNumber seed_1 = 0,
+               seed_2 = 0;
+   OffsetNumber *left,
+              *right;
+   OffsetNumber maxoff;
+   BITVECP     ptr;
+   int         i;
+   CACHESIGN  *cache;
+   SPLITCOST  *costvector;
+
+   maxoff = ((VARSIZE(entryvec) - VARHDRSZ) / sizeof(GISTENTRY)) - 2;
+   nbytes = (maxoff + 2) * sizeof(OffsetNumber);
+   v->spl_left = (OffsetNumber *) palloc(nbytes);
+   v->spl_right = (OffsetNumber *) palloc(nbytes);
+
+   cache = (CACHESIGN *) palloc(sizeof(CACHESIGN) * (maxoff + 2));
+   fillcache(&cache[FirstOffsetNumber], GETENTRY(entryvec, FirstOffsetNumber));
+
+   for (k = FirstOffsetNumber; k < maxoff; k = OffsetNumberNext(k)) {
+       for (j = OffsetNumberNext(k); j <= maxoff; j = OffsetNumberNext(j)) {
+           if (k == FirstOffsetNumber)
+               fillcache(&cache[j], GETENTRY(entryvec, j));
+
+           size_waste=hemdistcache(&(cache[j]),&(cache[k]));
+           if (size_waste > waste) {
+               waste = size_waste;
+               seed_1 = k;
+               seed_2 = j;
+           }
+       }
+   }
+
+   left = v->spl_left;
+   v->spl_nleft = 0;
+   right = v->spl_right;
+   v->spl_nright = 0;
+
+   if (seed_1 == 0 || seed_2 == 0) {
+       seed_1 = 1;
+       seed_2 = 2;
+   }
+
+   /* form initial .. */
+   if (cache[seed_1].allistrue) {
+       datum_l = (GISTTYPE *) palloc(CALCGTSIZE(SIGNKEY | ALLISTRUE, 0));
+       datum_l->len = CALCGTSIZE(SIGNKEY | ALLISTRUE, 0);
+       datum_l->flag = SIGNKEY | ALLISTRUE;
+   } else {
+       datum_l = (GISTTYPE *) palloc(CALCGTSIZE(SIGNKEY, 0));
+       datum_l->len = CALCGTSIZE(SIGNKEY, 0);
+       datum_l->flag = SIGNKEY;
+       memcpy((void *) GETSIGN(datum_l), (void *) cache[seed_1].sign, sizeof(BITVEC));
+   }
+   if (cache[seed_2].allistrue) {
+       datum_r = (GISTTYPE *) palloc(CALCGTSIZE(SIGNKEY | ALLISTRUE, 0));
+       datum_r->len = CALCGTSIZE(SIGNKEY | ALLISTRUE, 0);
+       datum_r->flag = SIGNKEY | ALLISTRUE;
+   } else {
+       datum_r = (GISTTYPE *) palloc(CALCGTSIZE(SIGNKEY, 0));
+       datum_r->len = CALCGTSIZE(SIGNKEY, 0);
+       datum_r->flag = SIGNKEY;
+       memcpy((void *) GETSIGN(datum_r), (void *) cache[seed_2].sign, sizeof(BITVEC));
+   }
+
+   union_l=GETSIGN(datum_l);
+   union_r=GETSIGN(datum_r);
+   maxoff = OffsetNumberNext(maxoff);
+   fillcache(&cache[maxoff], GETENTRY(entryvec, maxoff));
+   /* sort before ... */
+   costvector = (SPLITCOST *) palloc(sizeof(SPLITCOST) * maxoff);
+   for (j = FirstOffsetNumber; j <= maxoff; j = OffsetNumberNext(j)) {
+       costvector[j - 1].pos = j;
+       size_alpha = hemdistcache(&(cache[seed_1]), &(cache[j]));
+       size_beta  = hemdistcache(&(cache[seed_2]), &(cache[j]));
+       costvector[j - 1].cost = abs(size_alpha - size_beta);
+   }
+   qsort((void *) costvector, maxoff, sizeof(SPLITCOST), comparecost);
+
+   for (k = 0; k < maxoff; k++) {
+       j = costvector[k].pos;
+       if (j == seed_1) {
+           *left++ = j;
+           v->spl_nleft++;
+           continue;
+       } else if (j == seed_2) {
+           *right++ = j;
+           v->spl_nright++;
+           continue;
+       }
+
+       if (ISALLTRUE(datum_l) || cache[j].allistrue) {
+           if ( ISALLTRUE(datum_l) && cache[j].allistrue )
+               size_alpha=0;
+           else
+               size_alpha = SIGLENBIT-sizebitvec(  
+                   ( cache[j].allistrue ) ? GETSIGN(datum_l) : GETSIGN(cache[j].sign)  
+               );
+       } else {
+           size_alpha=hemdistsign(cache[j].sign,GETSIGN(datum_l));
+       }
+
+       if (ISALLTRUE(datum_r) || cache[j].allistrue) {
+           if ( ISALLTRUE(datum_r) && cache[j].allistrue )
+               size_beta=0;
+           else
+               size_beta = SIGLENBIT-sizebitvec(  
+                   ( cache[j].allistrue ) ? GETSIGN(datum_r) : GETSIGN(cache[j].sign)  
+               );
+       } else {
+           size_beta=hemdistsign(cache[j].sign,GETSIGN(datum_r));
+       }
+
+       if (size_alpha  < size_beta + WISH_F(v->spl_nleft, v->spl_nright, 0.1)) {
+           if (ISALLTRUE(datum_l) || cache[j].allistrue) {
+               if (! ISALLTRUE(datum_l) )
+                   MemSet((void *) GETSIGN(datum_l), 0xff, sizeof(BITVEC));
+           } else {
+               ptr=cache[j].sign;
+               LOOPBYTE(
+                   union_l[i] |= ptr[i];
+               );
+           }
+           *left++ = j;
+           v->spl_nleft++;
+       } else {
+           if (ISALLTRUE(datum_r) || cache[j].allistrue) {
+               if (! ISALLTRUE(datum_r) )
+                   MemSet((void *) GETSIGN(datum_r), 0xff, sizeof(BITVEC));
+           } else {
+               ptr=cache[j].sign;
+               LOOPBYTE(
+                   union_r[i] |= ptr[i];
+               );
+           }
+           *right++ = j;
+           v->spl_nright++;
+       }
+   }
+
+   *right = *left = FirstOffsetNumber;
+   pfree(costvector);
+   pfree(cache);
+   v->spl_ldatum = PointerGetDatum(datum_l);
+   v->spl_rdatum = PointerGetDatum(datum_r);
+
+   PG_RETURN_POINTER(v);
+}


diff --git a/contrib/tsearch2/gistidx.h b/contrib/tsearch2/gistidx.h

new file mode 100644 (file)

index 0000000..d081c74


--- /dev/null
+++ b/contrib/tsearch2/gistidx.h
@@ -0,0 +1,67 @@
+#ifndef __GISTIDX_H__
+#define __GISTIDX_H__
+
+/*
+#define GISTIDX_DEBUG
+*/
+
+/*
+ * signature defines
+ */
+
+#define BITBYTE 8
+#define SIGLENINT  63          /* >121 => key will toast, so it will not
+                                * work !!! */
+#define SIGLEN ( sizeof(int4)*SIGLENINT )
+#define SIGLENBIT (SIGLEN*BITBYTE)
+
+typedef char BITVEC[SIGLEN];
+typedef char *BITVECP;
+
+#define LOOPBYTE(a) \
+       for(i=0;i
+               a;\
+       }
+#define LOOPBIT(a) \
+               for(i=0;i
+                               a;\
+               }
+
+#define GETBYTE(x,i) ( *( (BITVECP)(x) + (int)( (i) / BITBYTE ) ) )
+#define GETBITBYTE(x,i) ( ((char)(x)) >> i & 0x01 )
+#define CLRBIT(x,i)   GETBYTE(x,i) &= ~( 0x01 << ( (i) % BITBYTE ) )
+#define SETBIT(x,i)   GETBYTE(x,i) |=  ( 0x01 << ( (i) % BITBYTE ) )
+#define GETBIT(x,i) ( (GETBYTE(x,i) >> ( (i) % BITBYTE )) & 0x01 )
+
+#define abs(a)         ((a) <  (0) ? -(a) : (a))
+#define min(a,b)           ((a) <  (b) ? (a) : (b))
+#define HASHVAL(val) (((unsigned int)(val)) % SIGLENBIT)
+#define HASH(sign, val) SETBIT((sign), HASHVAL(val))
+
+
+/*
+ * type of index key
+ */
+typedef struct
+{
+   int4        len;
+   int4        flag;
+   char        data[1];
+}  GISTTYPE;
+
+#define ARRKEY     0x01
+#define SIGNKEY        0x02
+#define ALLISTRUE  0x04
+
+#define ISARRKEY(x) ( ((GISTTYPE*)x)->flag & ARRKEY )
+#define ISSIGNKEY(x)   ( ((GISTTYPE*)x)->flag & SIGNKEY )
+#define ISALLTRUE(x)   ( ((GISTTYPE*)x)->flag & ALLISTRUE )
+
+#define GTHDRSIZE  ( sizeof(int4)*2  )
+#define CALCGTSIZE(flag, len) ( GTHDRSIZE + ( ( (flag) & ARRKEY ) ? ((len)*sizeof(int4)) : (((flag) & ALLISTRUE) ? 0 : SIGLEN) ) )
+
+#define GETSIGN(x) ( (BITVECP)( (char*)x+GTHDRSIZE ) )
+#define GETARR(x)  ( (int4*)( (char*)x+GTHDRSIZE ) )
+#define ARRNELEM(x) ( ( ((GISTTYPE*)x)->len - GTHDRSIZE )/sizeof(int4) )
+
+#endif


diff --git a/contrib/tsearch2/ispell/spell.c b/contrib/tsearch2/ispell/spell.c

new file mode 100644 (file)

index 0000000..3cf2cc8


--- /dev/null
+++ b/contrib/tsearch2/ispell/spell.c
@@ -0,0 +1,520 @@
+#include 
+#include 
+#include 
+#include 
+
+#include "postgres.h"
+
+#include "spell.h"
+
+#define MAXNORMLEN 56
+
+#define STRNCASECMP(x,y)        (strncasecmp(x,y,strlen(y)))
+
+static int cmpspell(const void *s1,const void *s2){
+   return(strcmp(((const SPELL*)s1)->word,((const SPELL*)s2)->word));
+}
+
+static void 
+strlower( char * str ) {
+   unsigned char *ptr = (unsigned char *)str;
+   while ( *ptr ) {
+       *ptr = tolower( *ptr );
+       ptr++;
+   }
+}
+
+/* backward string compaire for suffix tree operations */
+static int 
+strbcmp(const char *s1, const char *s2) { 
+   int l1 = strlen(s1)-1, l2 = strlen(s2)-1;
+   while (l1 >= 0 && l2 >= 0) {
+       if (s1[l1] < s2[l2]) return -1;
+       if (s1[l1] > s2[l2]) return 1;
+       l1--; l2--;
+   }
+   if (l1 < l2) return -1;
+   if (l1 > l2) return 1;
+
+   return 0;
+}
+static int 
+strbncmp(const char *s1, const char *s2, size_t count) { 
+   int l1 = strlen(s1) - 1, l2 = strlen(s2) - 1, l = count;
+   while (l1 >= 0 && l2 >= 0 && l > 0) {
+       if (s1[l1] < s2[l2]) return -1;
+       if (s1[l1] > s2[l2]) return 1;
+       l1--;
+       l2--;
+       l--;
+   }
+   if (l == 0) return 0;
+   if (l1 < l2) return -1;
+   if (l1 > l2) return 1;
+   return 0;
+}
+
+static int 
+cmpaffix(const void *s1,const void *s2){
+   if (((const AFFIX*)s1)->type < ((const AFFIX*)s2)->type) return -1;
+   if (((const AFFIX*)s1)->type > ((const AFFIX*)s2)->type) return 1;
+   if (((const AFFIX*)s1)->type == 'p')
+       return(strcmp(((const AFFIX*)s1)->repl,((const AFFIX*)s2)->repl));
+   else 
+       return(strbcmp(((const AFFIX*)s1)->repl,((const AFFIX*)s2)->repl));
+}
+
+int 
+AddSpell(IspellDict * Conf,const char * word,const char *flag){
+   if(Conf->nspell>=Conf->mspell){
+       if(Conf->mspell){
+           Conf->mspell+=1024*20;
+           Conf->Spell=(SPELL *)realloc(Conf->Spell,Conf->mspell*sizeof(SPELL));
+       }else{
+           Conf->mspell=1024*20;
+           Conf->Spell=(SPELL *)malloc(Conf->mspell*sizeof(SPELL));
+       }
+       if ( Conf->Spell == NULL )
+           elog(ERROR,"No memory for AddSpell"); 
+   }
+   Conf->Spell[Conf->nspell].word=strdup(word);
+   if ( !Conf->Spell[Conf->nspell].word ) 
+       elog(ERROR,"No memory for AddSpell");
+   strncpy(Conf->Spell[Conf->nspell].flag,flag,10);
+   Conf->nspell++;
+   return(0);
+}
+
+
+int 
+ImportDictionary(IspellDict * Conf,const char *filename){
+   unsigned char str[BUFSIZ];  
+   FILE *dict;
+
+   if(!(dict=fopen(filename,"r")))return(1);
+   while(fgets(str,sizeof(str),dict)){
+       unsigned char *s;
+       const unsigned char *flag;
+
+           flag = NULL;
+       if((s=strchr(str,'/'))){
+           *s=0;
+           s++;flag=s;
+           while(*s){
+               if (((*s>='A')&&(*s<='Z'))||((*s>='a')&&(*s<='z')))
+                   s++;
+               else {
+                   *s=0;
+                   break;
+               }
+           }
+       }else{
+           flag="";
+       }
+       strlower(str);
+       /* Dont load words if first letter is not required */
+       /* It allows to optimize loading at  search time   */
+       s=str;
+       while(*s){
+           if(*s=='\r')*s=0;
+           if(*s=='\n')*s=0;
+           s++;
+       }
+       AddSpell(Conf,str,flag);
+   }
+   fclose(dict);
+   return(0);
+}
+
+
+static SPELL * 
+FindWord(IspellDict * Conf, const char *word, int affixflag) {
+   int l,c,r,resc,resl,resr, i;
+
+   i = (int)(*word) & 255;
+   l = Conf->SpellTree.Left[i];
+   r = Conf->SpellTree.Right[i];
+   if (l == -1) return (NULL);
+   while(l<=r){
+       c = (l + r) >> 1;
+       resc = strcmp(Conf->Spell[c].word, word);
+       if( (resc == 0) && 
+           ((affixflag == 0) || (strchr(Conf->Spell[c].flag, affixflag) != NULL)) ) {
+           return(&Conf->Spell[c]);
+       }
+       resl = strcmp(Conf->Spell[l].word, word);
+       if( (resl == 0) && 
+           ((affixflag == 0) || (strchr(Conf->Spell[l].flag, affixflag) != NULL)) ) {
+           return(&Conf->Spell[l]);
+       }
+       resr = strcmp(Conf->Spell[r].word, word);
+       if( (resr == 0) && 
+           ((affixflag == 0) || (strchr(Conf->Spell[r].flag, affixflag) != NULL)) ) {
+           return(&Conf->Spell[r]);
+       }
+       if(resc < 0){
+           l = c + 1;
+           r--;
+       } else if(resc > 0){
+           r = c - 1;
+           l++;
+       } else {
+           l++;
+           r--;
+       }
+   }
+   return(NULL);
+}
+
+int 
+AddAffix(IspellDict * Conf,int flag,const char *mask,const char *find,const char *repl,int type) {
+   if(Conf->naffixes>=Conf->maffixes){
+       if(Conf->maffixes){
+           Conf->maffixes+=16;
+           Conf->Affix = (AFFIX*)realloc((void*)Conf->Affix,Conf->maffixes*sizeof(AFFIX));
+       }else{
+           Conf->maffixes=16;
+           Conf->Affix = (AFFIX*)malloc(Conf->maffixes * sizeof(AFFIX));
+       }
+       if ( Conf->Affix == NULL ) 
+           elog(ERROR,"No memory for AddAffix");
+   }
+   if (type=='s') {
+       sprintf(Conf->Affix[Conf->naffixes].mask,"%s$",mask);
+   } else {
+       sprintf(Conf->Affix[Conf->naffixes].mask,"^%s",mask);
+   }
+   Conf->Affix[Conf->naffixes].compile = 1;
+   Conf->Affix[Conf->naffixes].flag=flag;
+   Conf->Affix[Conf->naffixes].type=type;
+   
+   strcpy(Conf->Affix[Conf->naffixes].find,find);
+   strcpy(Conf->Affix[Conf->naffixes].repl,repl);
+   Conf->Affix[Conf->naffixes].replen=strlen(repl);
+   Conf->naffixes++;
+   return(0);
+}
+
+static char * 
+remove_spaces(char *dist,char *src){
+char *d,*s;
+   d=dist;
+   s=src;
+   while(*s){
+       if(*s!=' '&&*s!='-'&&*s!='\t'){
+           *d=*s;
+           d++;
+       }
+       s++;
+   }
+   *d=0;
+   return(dist);
+}
+
+
+int 
+ImportAffixes(IspellDict * Conf,const char *filename){
+   unsigned char str[BUFSIZ];
+   unsigned char flag=0;
+   unsigned char mask[BUFSIZ]="";
+   unsigned char find[BUFSIZ]="";
+   unsigned char repl[BUFSIZ]="";
+   unsigned char *s;
+   int i;
+   int suffixes=0;
+   int prefixes=0;
+   FILE *affix;
+
+   if(!(affix=fopen(filename,"r")))
+       return(1);
+
+   while(fgets(str,sizeof(str),affix)){
+       if(!STRNCASECMP(str,"suffixes")){
+           suffixes=1;
+           prefixes=0;
+           continue;
+       }
+       if(!STRNCASECMP(str,"prefixes")){
+           suffixes=0;
+           prefixes=1;
+           continue;
+       }
+       if(!STRNCASECMP(str,"flag ")){
+           s=str+5;
+           while(strchr("* ",*s))
+               s++;
+           flag=*s;
+           continue;
+       }
+       if((!suffixes)&&(!prefixes))continue;
+       if((s=strchr(str,'#')))*s=0;
+       if(!*str)continue;
+       strlower(str);
+       strcpy(mask,"");
+       strcpy(find,"");
+       strcpy(repl,"");
+       i=sscanf(str,"%[^>\n]>%[^,\n],%[^\n]",mask,find,repl);
+       remove_spaces(str,repl);strcpy(repl,str);
+       remove_spaces(str,find);strcpy(find,str);
+       remove_spaces(str,mask);strcpy(mask,str);
+       switch(i){
+           case 3:
+               break;
+           case 2:
+               if(*find != '\0'){
+                   strcpy(repl,find);
+                   strcpy(find,"");
+               }
+               break;
+           default:
+               continue;
+       }
+       
+       AddAffix(Conf,(int)flag,mask,find,repl,suffixes?'s':'p');
+       
+   }
+   fclose(affix);
+       
+   return(0);
+}
+
+void 
+SortDictionary(IspellDict * Conf){
+  int  CurLet = -1, Let;size_t i;
+
+        qsort((void*)Conf->Spell,Conf->nspell,sizeof(SPELL),cmpspell);
+
+   for(i = 0; i < 256 ; i++ )
+       Conf->SpellTree.Left[i] = -1;
+
+   for(i = 0; i < Conf->nspell; i++) {
+     Let = (int)(*(Conf->Spell[i].word)) & 255;
+     if (CurLet != Let) {
+       Conf->SpellTree.Left[Let] = i;
+       CurLet = Let;
+     }
+     Conf->SpellTree.Right[Let] = i;
+   }
+}
+
+void 
+SortAffixes(IspellDict * Conf) {
+  int   CurLetP = -1, CurLetS = -1, Let;
+  AFFIX *Affix; size_t i;
+  
+  if (Conf->naffixes > 1)
+    qsort((void*)Conf->Affix,Conf->naffixes,sizeof(AFFIX),cmpaffix);
+  for(i = 0; i < 256; i++) {
+      Conf->PrefixTree.Left[i] = Conf->PrefixTree.Right[i] = -1;
+      Conf->SuffixTree.Left[i] = Conf->SuffixTree.Right[i] = -1;
+  }
+
+  for(i = 0; i < Conf->naffixes; i++) {
+    Affix = &(((AFFIX*)Conf->Affix)[i]);
+    if(Affix->type == 'p') {
+      Let = (int)(*(Affix->repl)) & 255;
+      if (CurLetP != Let) {
+   Conf->PrefixTree.Left[Let] = i;
+   CurLetP = Let;
+      }
+      Conf->PrefixTree.Right[Let] = i;
+    } else {
+      Let = (Affix->replen) ? (int)(Affix->repl[Affix->replen-1]) & 255 : 0;
+      if (CurLetS != Let) {
+   Conf->SuffixTree.Left[Let] = i;
+   CurLetS = Let;
+      }
+      Conf->SuffixTree.Right[Let] = i;
+    }
+  }
+}
+
+static char * 
+CheckSuffix(const char *word, size_t len, AFFIX *Affix, int *res, IspellDict *Conf) {
+  regmatch_t subs[2]; /* workaround for apache&linux */
+  char newword[2*MAXNORMLEN] = "";
+  int err;
+  
+  *res = strbncmp(word, Affix->repl, Affix->replen);
+  if (*res < 0) {
+    return NULL;
+  }
+  if (*res > 0) {
+    return NULL;
+  }
+  strcpy(newword, word);
+  strcpy(newword+len-Affix->replen, Affix->find);
+
+  if (Affix->compile) {
+    err = regcomp(&(Affix->reg),Affix->mask,REG_EXTENDED|REG_ICASE|REG_NOSUB);
+    if(err){
+      /*regerror(err, &(Affix->reg), regerrstr, ERRSTRSIZE);*/
+      regfree(&(Affix->reg));
+      return(NULL);
+    }
+    Affix->compile = 0;
+  }
+  if(!(err=regexec(&(Affix->reg),newword,1,subs,0))){
+    if(FindWord(Conf, newword, Affix->flag))
+   return pstrdup(newword);    
+  }
+  return NULL;
+}
+
+#define NS 1
+#define MAX_NORM 512
+static int 
+CheckPrefix(const char *word, size_t len, AFFIX *Affix, IspellDict *Conf, int pi,
+       char **forms, char ***cur ) {
+  regmatch_t subs[NS*2];
+  char newword[2*MAXNORMLEN] = "";
+  int err, ls, res, lres;
+  size_t newlen;
+  AFFIX *CAffix = Conf->Affix;
+  
+  res = strncmp(word, Affix->repl, Affix->replen);
+  if (res != 0) {
+    return res;
+  }
+  strcpy(newword, Affix->find);
+  strcat(newword, word+Affix->replen);
+
+  if (Affix->compile) {
+    err = regcomp(&(Affix->reg),Affix->mask,REG_EXTENDED|REG_ICASE|REG_NOSUB);
+    if(err){
+      /*regerror(err, &(Affix->reg), regerrstr, ERRSTRSIZE);*/
+      regfree(&(Affix->reg));
+      return (0);
+    }
+    Affix->compile = 0;
+  }
+  if(!(err=regexec(&(Affix->reg),newword,1,subs,0))){
+    SPELL * curspell;
+
+    if((curspell=FindWord(Conf, newword, Affix->flag))){
+      if ((*cur - forms) < (MAX_NORM-1)) {
+   **cur =  pstrdup(newword);
+   (*cur)++; **cur = NULL;
+      }
+    } 
+    newlen = strlen(newword);
+    ls = Conf->SuffixTree.Left[pi];
+      if ( ls>=0 && ((*cur - forms) < (MAX_NORM-1)) ) {
+   **cur = CheckSuffix(newword, newlen, &CAffix[ls], &lres, Conf);
+   if (**cur) {
+     (*cur)++; **cur = NULL;
+   }
+      }
+  }
+  return 0;
+}
+
+
+char ** 
+NormalizeWord(IspellDict * Conf,char *word){
+/*regmatch_t subs[NS];*/
+size_t len;
+char ** forms;
+char **cur;
+AFFIX * Affix;
+int ri, pi, ipi, lp, rp, cp, ls, rs;
+int lres, rres, cres = 0;
+  SPELL *spell;
+
+   len=strlen(word);
+   if (len > MAXNORMLEN)
+       return(NULL);
+
+   strlower(word);
+
+   forms=(char **) palloc(MAX_NORM*sizeof(char **));
+   cur=forms;*cur=NULL;
+
+   ri = (int)(*word) & 255;
+   pi = (int)(word[strlen(word)-1]) & 255;
+   Affix=(AFFIX*)Conf->Affix;
+
+   /* Check that the word itself is normal form */
+   if((spell = FindWord(Conf, word, 0))){
+       *cur=pstrdup(word);
+       cur++;*cur=NULL;
+   }
+
+   /* Find all other NORMAL forms of the 'word' */
+
+   for (ipi = 0; ipi <= pi; ipi += pi) {
+
+       /* check prefix */
+       lp = Conf->PrefixTree.Left[ri];
+       rp = Conf->PrefixTree.Right[ri];
+       while (lp >= 0 && lp <= rp) {
+         cp = (lp + rp) >> 1;
+         cres = 0;
+         if ((cur - forms) < (MAX_NORM-1)) {
+       cres = CheckPrefix(word, len, &Affix[cp], Conf, ipi, forms, &cur);
+         }
+         if ((lp < cp) && ((cur - forms) < (MAX_NORM-1)) ) {
+       lres = CheckPrefix(word, len, &Affix[lp], Conf, ipi, forms, &cur);
+         }
+         if ( (rp > cp) && ((cur - forms) < (MAX_NORM-1)) ) {
+       rres = CheckPrefix(word, len, &Affix[rp], Conf, ipi, forms, &cur);
+         }
+         if (cres < 0) {
+       rp = cp - 1;
+       lp++;
+         } else if (cres > 0) {
+       lp = cp + 1;
+       rp--;
+         } else {
+       lp++;
+       rp--;
+         }
+       }
+
+       /* check suffix */
+       ls = Conf->SuffixTree.Left[ipi];
+       rs = Conf->SuffixTree.Right[ipi];
+       while (ls >= 0 && ls <= rs) {
+         if (  ((cur - forms) < (MAX_NORM-1)) ) {
+       *cur = CheckSuffix(word, len, &Affix[ls], &lres, Conf);
+       if (*cur) {
+         cur++; *cur = NULL;
+       }
+         }
+         if ( (rs > ls) && ((cur - forms) < (MAX_NORM-1)) ) {
+       *cur = CheckSuffix(word, len, &Affix[rs], &rres, Conf);
+       if (*cur) {
+         cur++; *cur = NULL;
+       }
+         }
+         ls++;
+         rs--;
+       } /* end while */
+     
+   } /* for ipi */
+
+   if(cur==forms){
+       pfree(forms);
+       return(NULL);
+   }
+   return(forms);
+}
+
+void 
+FreeIspell (IspellDict *Conf) {
+  int i;
+  AFFIX *Affix = (AFFIX *)Conf->Affix;
+
+  for (i = 0; i < Conf->naffixes; i++) {
+    if (Affix[i].compile == 0) {
+      regfree(&(Affix[i].reg));
+    }
+  }
+  for (i = 0; i < Conf->naffixes; i++) {
+   free( Conf->Spell[i].word );
+  }
+  free(Conf->Affix);
+  free(Conf->Spell);
+  memset( (void*)Conf, 0, sizeof(IspellDict) );
+  return;
+}


diff --git a/contrib/tsearch2/ispell/spell.h b/contrib/tsearch2/ispell/spell.h

new file mode 100644 (file)

index 0000000..3034ca6


--- /dev/null
+++ b/contrib/tsearch2/ispell/spell.h
@@ -0,0 +1,51 @@
+#ifndef __SPELL_H__
+#define __SPELL_H__
+
+#include 
+#include 
+
+typedef struct spell_struct {
+        char * word; 
+        char flag[10];
+} SPELL;
+
+typedef struct aff_struct {   
+        char flag;
+        char type;
+        char mask[33];
+        char find[16];
+        char repl[16];
+        regex_t reg;
+        size_t replen;
+        char compile;
+} AFFIX;
+
+typedef struct Tree_struct {
+        int Left[256], Right[256];
+} Tree_struct;
+
+typedef struct {
+   int maffixes;
+   int naffixes;
+   AFFIX * Affix;
+
+   int nspell;
+   int mspell;
+   SPELL   *Spell;
+   Tree_struct SpellTree;
+   Tree_struct PrefixTree;
+   Tree_struct SuffixTree;
+
+} IspellDict;
+
+char ** NormalizeWord(IspellDict * Conf,char *word);
+int ImportAffixes(IspellDict * Conf, const char *filename);
+int ImportDictionary(IspellDict * Conf,const char *filename);
+
+int  AddSpell(IspellDict * Conf,const char * word,const char *flag);
+int  AddAffix(IspellDict * Conf,int flag,const char *mask,const char *find,const char *repl,int type);
+void SortDictionary(IspellDict * Conf);
+void SortAffixes(IspellDict * Conf);
+void FreeIspell (IspellDict *Conf);
+
+#endif


diff --git a/contrib/tsearch2/prs_dcfg.c b/contrib/tsearch2/prs_dcfg.c

new file mode 100644 (file)

index 0000000..e4b0e8b


--- /dev/null
+++ b/contrib/tsearch2/prs_dcfg.c
@@ -0,0 +1,119 @@
+/* 
+ * Simple config parser 
+ * Teodor Sigaev 
+ */
+#include 
+#include 
+#include 
+
+#include "postgres.h"
+
+#include "dict.h"
+#include "common.h"
+
+#define CS_WAITKEY 0
+#define CS_INKEY   1
+#define CS_WAITEQ  2
+#define CS_WAITVALUE   3
+#define CS_INVALUE 4
+#define CS_IN2VALUE    5
+#define CS_WAITDELIM   6
+#define CS_INESC   7
+#define CS_IN2ESC  8
+
+static char *
+nstrdup(char *ptr, int len) {
+   char *res=palloc(len+1), *cptr;
+   memcpy(res,ptr,len);
+   res[len]='\0';
+   cptr = ptr = res;
+   while(*ptr) {
+       if ( *ptr == '\\' ) 
+           ptr++;
+       *cptr=*ptr; ptr++; cptr++;
+   }
+   *cptr='\0';
+
+   return res;
+}
+
+void
+parse_cfgdict(text *in, Map **m) {
+   Map *mptr;
+   char *ptr=VARDATA(in), *begin=NULL;
+   char num=0;
+   int state=CS_WAITKEY;
+
+   while( ptr-VARDATA(in) < VARSIZE(in) - VARHDRSZ ) {
+       if ( *ptr==',' ) num++;
+       ptr++;
+   }
+
+   *m=mptr=(Map*)palloc( sizeof(Map)*(num+2) );
+   memset(mptr, 0, sizeof(Map)*(num+2) );
+   ptr=VARDATA(in);
+   while( ptr-VARDATA(in) < VARSIZE(in) - VARHDRSZ ) {
+       if (state==CS_WAITKEY) {
+           if (isalpha(*ptr)) {
+               begin=ptr;
+               state=CS_INKEY;
+           } else if ( !isspace(*ptr) )
+               elog(ERROR,"Syntax error in position %d near '%c'", ptr-VARDATA(in), *ptr);
+       } else if (state==CS_INKEY) {
+           if ( isspace(*ptr) ) {
+               mptr->key=nstrdup(begin, ptr-begin);
+               state=CS_WAITEQ;
+           } else if ( *ptr=='=' ) {
+               mptr->key=nstrdup(begin, ptr-begin);
+               state=CS_WAITVALUE;
+           } else if ( !isalpha(*ptr) ) 
+               elog(ERROR,"Syntax error in position %d near '%c'", ptr-VARDATA(in), *ptr);
+       } else if ( state==CS_WAITEQ ) {
+           if ( *ptr=='=' )
+               state=CS_WAITVALUE;
+           else if ( !isspace(*ptr) )
+               elog(ERROR,"Syntax error in position %d near '%c'", ptr-VARDATA(in), *ptr);
+       } else if ( state==CS_WAITVALUE ) {
+           if ( *ptr=='"' ) {
+               begin=ptr+1;
+               state=CS_INVALUE;
+           } else if ( !isspace(*ptr) ) {
+               begin=ptr;
+               state=CS_IN2VALUE;
+           }
+       } else if ( state==CS_INVALUE ) {
+           if ( *ptr=='"' ) {
+               mptr->value = nstrdup(begin, ptr-begin);
+               mptr++;
+               state=CS_WAITDELIM;
+           } else if ( *ptr=='\\' )
+               state=CS_INESC;
+       } else if ( state==CS_IN2VALUE ) {
+           if ( isspace(*ptr) || *ptr==',' ) {
+               mptr->value = nstrdup(begin, ptr-begin);
+               mptr++;
+               state=( *ptr==',' ) ? CS_WAITKEY : CS_WAITDELIM;
+           } else if ( *ptr=='\\' )
+               state=CS_INESC;
+       } else if ( state==CS_WAITDELIM ) {
+           if ( *ptr==',' ) 
+               state=CS_WAITKEY; 
+           else if ( !isspace(*ptr) )
+               elog(ERROR,"Syntax error in position %d near '%c'", ptr-VARDATA(in), *ptr);
+       } else if ( state == CS_INESC ) {
+           state=CS_INVALUE;
+       } else if ( state == CS_IN2ESC ) {
+           state=CS_IN2VALUE;
+       } else 
+           elog(ERROR,"Bad parser state: %d at position %d near '%c'", state, ptr-VARDATA(in), *ptr);
+       ptr++;
+   }
+
+   if (state==CS_IN2VALUE) {
+       mptr->value = nstrdup(begin, ptr-begin);
+       mptr++;
+   } else if ( !(state==CS_WAITDELIM || state==CS_WAITKEY) ) 
+       elog(ERROR,"Unexpected end of line");
+}
+
+


diff --git a/contrib/tsearch2/query.c b/contrib/tsearch2/query.c

new file mode 100644 (file)

index 0000000..8e714f2


--- /dev/null
+++ b/contrib/tsearch2/query.c
@@ -0,0 +1,862 @@
+/*
+ * IO definitions for tsquery and mtsquery. This type
+ * are identical, but for parsing mtsquery used parser for text
+ * and also morphology is used.
+ * Internal structure:
+ * query tree, then string with original value.
+ * Query tree with plain view. It's means that in array of nodes
+ * right child is always next and left position = item+item->left
+ * Teodor Sigaev 
+ */
+#include "postgres.h"
+
+#include 
+#include 
+
+#include "access/gist.h"
+#include "access/itup.h"
+#include "access/rtree.h"
+#include "utils/elog.h"
+#include "utils/palloc.h"
+#include "utils/array.h"
+#include "utils/builtins.h"
+#include "storage/bufpage.h"
+
+#include "ts_cfg.h"
+#include "tsvector.h"
+#include "crc32.h"
+#include "query.h"
+#include "rewrite.h"
+#include "common.h"
+
+
+PG_FUNCTION_INFO_V1(tsquery_in);
+Datum      tsquery_in(PG_FUNCTION_ARGS);
+
+PG_FUNCTION_INFO_V1(tsquery_out);
+Datum      tsquery_out(PG_FUNCTION_ARGS);
+
+PG_FUNCTION_INFO_V1(exectsq);
+Datum      exectsq(PG_FUNCTION_ARGS);
+
+PG_FUNCTION_INFO_V1(rexectsq);
+Datum      rexectsq(PG_FUNCTION_ARGS);
+
+PG_FUNCTION_INFO_V1(tsquerytree);
+Datum      tsquerytree(PG_FUNCTION_ARGS);
+
+PG_FUNCTION_INFO_V1(to_tsquery);
+Datum      to_tsquery(PG_FUNCTION_ARGS);
+
+PG_FUNCTION_INFO_V1(to_tsquery_name);
+Datum      to_tsquery_name(PG_FUNCTION_ARGS);
+
+PG_FUNCTION_INFO_V1(to_tsquery_current);
+Datum      to_tsquery_current(PG_FUNCTION_ARGS);
+
+#define END            0
+#define ERR            1
+#define VAL            2
+#define OPR            3
+#define OPEN       4
+#define CLOSE      5
+#define VALTRUE        6           /* for stop words */
+#define VALFALSE   7
+
+/* parser's states */
+#define WAITOPERAND 1
+#define WAITOPERATOR   2
+
+/*
+ * node of query tree, also used
+ * for storing polish notation in parser
+ */
+typedef struct NODE
+{
+   int2        weight;
+   int2        type;
+   int4        val;
+   int2        distance;
+   int2        length;
+   struct NODE *next;
+}  NODE;
+
+typedef struct
+{
+   char       *buf;
+   int4        state;
+   int4        count;
+   /* reverse polish notation in list (for temprorary usage) */
+   NODE       *str;
+   /* number in str */
+   int4        num;
+
+   /* user-friendly operand */
+   int4        lenop;
+   int4        sumlen;
+   char       *op;
+   char       *curop;
+
+   /* state for value's parser */
+   TI_IN_STATE valstate;
+
+   /* tscfg */
+   int cfg_id;
+}  QPRS_STATE;
+
+static char*
+get_weight(char *buf, int2 *weight) {
+   *weight = 0;
+
+   if ( *buf != ':' )
+       return buf;
+
+   buf++;
+   while( *buf ) {
+       switch(tolower(*buf)) {
+           case 'a': *weight |= 1<<3; break; 
+           case 'b': *weight |= 1<<2; break; 
+           case 'c': *weight |= 1<<1; break; 
+           case 'd': *weight |= 1;    break;
+           default: return buf; 
+       }
+       buf++;
+   }
+   
+   return buf;
+}
+
+/*
+ * get token from query string
+ */
+static int4
+gettoken_query(QPRS_STATE * state, int4 *val, int4 *lenval, char **strval, int2 *weight)
+{
+   while (1)
+   {
+       switch (state->state)
+       {
+           case WAITOPERAND:
+               if (*(state->buf) == '!')
+               {
+                   (state->buf)++;
+                   *val = (int4) '!';
+                   return OPR;
+               }
+               else if (*(state->buf) == '(')
+               {
+                   state->count++;
+                   (state->buf)++;
+                   return OPEN;
+               } else if ( *(state->buf) == ':' ) {
+                   elog(ERROR,"Error at start of operand"); 
+               } else if (*(state->buf) != ' ') {
+                   state->valstate.prsbuf = state->buf;
+                   state->state = WAITOPERATOR;
+                   if (gettoken_tsvector(&(state->valstate)))
+                   {
+                       *strval = state->valstate.word;
+                       *lenval = state->valstate.curpos - state->valstate.word;
+                       state->buf = get_weight(state->valstate.prsbuf, weight);
+                       return VAL;
+                   }
+                   else
+                       elog(ERROR, "No operand");
+               }
+               break;
+           case WAITOPERATOR:
+               if (*(state->buf) == '&' || *(state->buf) == '|')
+               {
+                   state->state = WAITOPERAND;
+                   *val = (int4) *(state->buf);
+                   (state->buf)++;
+                   return OPR;
+               }
+               else if (*(state->buf) == ')')
+               {
+                   (state->buf)++;
+                   state->count--;
+                   return (state->count < 0) ? ERR : CLOSE;
+               }
+               else if (*(state->buf) == '\0')
+                   return (state->count) ? ERR : END;
+               else if (*(state->buf) != ' ')
+                   return ERR;
+               break;
+           default:
+               return ERR;
+               break;
+       }
+       (state->buf)++;
+   }
+   return END;
+}
+
+/*
+ * push new one in polish notation reverse view
+ */
+static void
+pushquery(QPRS_STATE * state, int4 type, int4 val, int4 distance, int4 lenval, int2 weight)
+{
+   NODE       *tmp = (NODE *) palloc(sizeof(NODE));
+
+   tmp->weight = weight;
+   tmp->type = type;
+   tmp->val = val;
+   if (distance >= MAXSTRPOS)
+       elog(ERROR, "Value is too big");
+   if (lenval >= MAXSTRLEN)
+       elog(ERROR, "Operand is too long");
+   tmp->distance = distance;
+   tmp->length = lenval;
+   tmp->next = state->str;
+   state->str = tmp;
+   state->num++;
+}
+
+/*
+ * This function is used for tsquery parsing
+ */
+static void
+pushval_asis(QPRS_STATE * state, int type, char *strval, int lenval, int2 weight)
+{
+   if (lenval >= MAXSTRLEN)
+       elog(ERROR, "Word is too long");
+
+   pushquery(state, type, crc32_sz((uint8 *) strval, lenval),
+             state->curop - state->op, lenval, weight);
+
+   while (state->curop - state->op + lenval + 1 >= state->lenop)
+   {
+       int4        tmp = state->curop - state->op;
+
+       state->lenop *= 2;
+       state->op = (char *) repalloc((void *) state->op, state->lenop);
+       state->curop = state->op + tmp;
+   }
+   memcpy((void *) state->curop, (void *) strval, lenval);
+   state->curop += lenval;
+   *(state->curop) = '\0';
+   state->curop++;
+   state->sumlen += lenval + 1;
+   return;
+}
+
+/*
+ * This function is used for morph parsing
+ */
+static void
+pushval_morph(QPRS_STATE * state, int typeval, char *strval, int lenval, int2 weight)
+{
+   int4        count = 0;
+   PRSTEXT         prs;
+
+   prs.lenwords = 32;
+   prs.curwords = 0;
+   prs.pos = 0;
+   prs.words = (WORD *) palloc(sizeof(WORD) * prs.lenwords);
+
+   parsetext_v2(findcfg(state->cfg_id), &prs, strval, lenval);
+
+   for(count=0;count
+       pushval_asis(state, VAL, prs.words[count].word, prs.words[count].len, weight);
+       pfree( prs.words[count].word );
+       if (count)
+           pushquery(state, OPR, (int4) '&', 0, 0, 0 );
+   }   
+   pfree(prs.words);
+
+   /* XXX */
+   if ( prs.curwords==0 ) 
+       pushval_asis(state, VALTRUE, 0, 0, 0);
+}
+
+#define STACKDEPTH 32
+/*
+ * make polish notaion of query
+ */
+static int4
+makepol(QPRS_STATE * state, void (*pushval) (QPRS_STATE *, int, char *, int, int2))
+{
+   int4        val,
+               type;
+   int4        lenval;
+   char       *strval;
+   int4        stack[STACKDEPTH];
+   int4        lenstack = 0;
+   int2        weight;
+
+   while ((type = gettoken_query(state, &val, &lenval, &strval, &weight)) != END)
+   {
+       switch (type)
+       {
+           case VAL:
+               (*pushval) (state, VAL, strval, lenval, weight);
+               while (lenstack && (stack[lenstack - 1] == (int4) '&' ||
+                                   stack[lenstack - 1] == (int4) '!'))
+               {
+                   lenstack--;
+                   pushquery(state, OPR, stack[lenstack], 0, 0, 0);
+               }
+               break;
+           case OPR:
+               if (lenstack && val == (int4) '|')
+                   pushquery(state, OPR, val, 0, 0, 0);
+               else
+               {
+                   if (lenstack == STACKDEPTH)
+                       elog(ERROR, "Stack too short");
+                   stack[lenstack] = val;
+                   lenstack++;
+               }
+               break;
+           case OPEN:
+               if (makepol(state, pushval) == ERR)
+                   return ERR;
+               if (lenstack && (stack[lenstack - 1] == (int4) '&' ||
+                                stack[lenstack - 1] == (int4) '!'))
+               {
+                   lenstack--;
+                   pushquery(state, OPR, stack[lenstack], 0, 0, 0);
+               }
+               break;
+           case CLOSE:
+               while (lenstack)
+               {
+                   lenstack--;
+                   pushquery(state, OPR, stack[lenstack], 0, 0, 0);
+               };
+               return END;
+               break;
+           case ERR:
+           default:
+               elog(ERROR, "Syntax error");
+               return ERR;
+
+       }
+   }
+   while (lenstack)
+   {
+       lenstack--;
+       pushquery(state, OPR, stack[lenstack], 0, 0, 0);
+   };
+   return END;
+}
+
+typedef struct
+{
+   WordEntry  *arrb;
+   WordEntry  *arre;
+   char       *values;
+   char       *operand;
+}  CHKVAL;
+
+/*
+ * compare 2 string values
+ */
+static int4
+ValCompare(CHKVAL * chkval, WordEntry * ptr, ITEM * item)
+{
+   if (ptr->len == item->length)
+       return strncmp(
+                      &(chkval->values[ptr->pos]),
+                      &(chkval->operand[item->distance]),
+                      item->length);
+
+   return (ptr->len > item->length) ? 1 : -1;
+}
+
+/*
+ * check weight info
+ */
+static bool
+checkclass_str(CHKVAL * chkval, WordEntry * val, ITEM * item) {
+   WordEntryPos *ptr = (WordEntryPos*) (chkval->values+val->pos+SHORTALIGN(val->len)+sizeof(uint16));
+   uint16  len = *( (uint16*) (chkval->values+val->pos+SHORTALIGN(val->len)) );
+   while (len--) {
+       if ( item->weight & ( 1<weight ) )
+           return true;
+       ptr++;
+   }
+   return false; 
+}
+
+/*
+ * is there value 'val' in array or not ?
+ */
+static bool
+checkcondition_str(void *checkval, ITEM * val)
+{
+   WordEntry  *StopLow = ((CHKVAL *) checkval)->arrb;
+   WordEntry  *StopHigh = ((CHKVAL *) checkval)->arre;
+   WordEntry  *StopMiddle;
+   int         difference;
+
+   /* Loop invariant: StopLow <= val < StopHigh */
+
+   while (StopLow < StopHigh)
+   {
+       StopMiddle = StopLow + (StopHigh - StopLow) / 2;
+       difference = ValCompare((CHKVAL *) checkval, StopMiddle, val);
+       if (difference == 0)
+           return ( val->weight && StopMiddle->haspos ) ? 
+               checkclass_str((CHKVAL *) checkval,StopMiddle, val) : true;
+       else if (difference < 0)
+           StopLow = StopMiddle + 1;
+       else
+           StopHigh = StopMiddle;
+   }
+
+   return (false);
+}
+
+/*
+ * check for boolean condition
+ */
+bool
+TS_execute(ITEM * curitem, void *checkval, bool calcnot, bool (*chkcond) (void *checkval, ITEM * val))
+{
+   if (curitem->type == VAL)
+       return (*chkcond) (checkval, curitem);
+   else if (curitem->val == (int4) '!')
+   {
+       return (calcnot) ?
+           ((TS_execute(curitem + 1, checkval, calcnot, chkcond)) ? false : true)
+           : true;
+   }
+   else if (curitem->val == (int4) '&')
+   {
+       if (TS_execute(curitem + curitem->left, checkval, calcnot, chkcond))
+           return TS_execute(curitem + 1, checkval, calcnot, chkcond);
+       else
+           return false;
+   }
+   else
+   {                           /* |-operator */
+       if (TS_execute(curitem + curitem->left, checkval, calcnot, chkcond))
+           return true;
+       else
+           return TS_execute(curitem + 1, checkval, calcnot, chkcond);
+   }
+   return false;
+}
+
+/*
+ * boolean operations
+ */
+Datum
+rexectsq(PG_FUNCTION_ARGS)
+{
+   return DirectFunctionCall2(
+                              exectsq,
+                              PG_GETARG_DATUM(1),
+                              PG_GETARG_DATUM(0)
+       );
+}
+
+Datum
+exectsq(PG_FUNCTION_ARGS)
+{
+   tsvector       *val = (tsvector *) DatumGetPointer(PG_DETOAST_DATUM(PG_GETARG_DATUM(0)));
+   QUERYTYPE  *query = (QUERYTYPE *) DatumGetPointer(PG_DETOAST_DATUM(PG_GETARG_DATUM(1)));
+   CHKVAL      chkval;
+   bool        result;
+
+   if (!val->size || !query->size)
+   {
+       PG_FREE_IF_COPY(val, 0);
+       PG_FREE_IF_COPY(query, 1);
+       PG_RETURN_BOOL(false);
+   }
+
+   chkval.arrb = ARRPTR(val);
+   chkval.arre = chkval.arrb + val->size;
+   chkval.values = STRPTR(val);
+   chkval.operand = GETOPERAND(query);
+   result = TS_execute(
+                    GETQUERY(query),
+                    &chkval,
+                    true,
+                    checkcondition_str
+       );
+
+   PG_FREE_IF_COPY(val, 0);
+   PG_FREE_IF_COPY(query, 1);
+   PG_RETURN_BOOL(result);
+}
+
+/*
+ * find left operand in polish notation view
+ */
+static void
+findoprnd(ITEM * ptr, int4 *pos)
+{
+#ifdef BS_DEBUG
+   elog(DEBUG3, (ptr[*pos].type == OPR) ?
+        "%d  %c" : "%d  %d ", *pos, ptr[*pos].val);
+#endif
+   if (ptr[*pos].type == VAL || ptr[*pos].type == VALTRUE)
+   {
+       ptr[*pos].left = 0;
+       (*pos)++;
+   }
+   else if (ptr[*pos].val == (int4) '!')
+   {
+       ptr[*pos].left = 1;
+       (*pos)++;
+       findoprnd(ptr, pos);
+   }
+   else
+   {
+       ITEM       *curitem = &ptr[*pos];
+       int4        tmp = *pos;
+
+       (*pos)++;
+       findoprnd(ptr, pos);
+       curitem->left = *pos - tmp;
+       findoprnd(ptr, pos);
+   }
+}
+
+
+/*
+ * input
+ */
+static QUERYTYPE *
+queryin(char *buf, void (*pushval) (QPRS_STATE *, int, char *, int, int2), int cfg_id)
+{
+   QPRS_STATE  state;
+   int4        i;
+   QUERYTYPE  *query;
+   int4        commonlen;
+   ITEM       *ptr;
+   NODE       *tmp;
+   int4        pos = 0;
+
+#ifdef BS_DEBUG
+   char        pbuf[16384],
+              *cur;
+#endif
+
+   /* init state */
+   state.buf = buf;
+   state.state = WAITOPERAND;
+   state.count = 0;
+   state.num = 0;
+   state.str = NULL;
+   state.cfg_id=cfg_id;
+
+   /* init value parser's state */
+   state.valstate.oprisdelim = true;
+   state.valstate.len = 32;
+   state.valstate.word = (char *) palloc(state.valstate.len);
+
+   /* init list of operand */
+   state.sumlen = 0;
+   state.lenop = 64;
+   state.curop = state.op = (char *) palloc(state.lenop);
+   *(state.curop) = '\0';
+
+   /* parse query & make polish notation (postfix, but in reverse order) */
+   makepol(&state, pushval);
+   pfree(state.valstate.word);
+   if (!state.num)
+       elog(ERROR, "Empty query");
+
+   /* make finish struct */
+   commonlen = COMPUTESIZE(state.num, state.sumlen);
+   query = (QUERYTYPE *) palloc(commonlen);
+   query->len = commonlen;
+   query->size = state.num;
+   ptr = GETQUERY(query);
+
+   /* set item in polish notation */
+   for (i = 0; i < state.num; i++)
+   {
+       ptr[i].weight = state.str->weight;
+       ptr[i].type = state.str->type;
+       ptr[i].val = state.str->val;
+       ptr[i].distance = state.str->distance;
+       ptr[i].length = state.str->length;
+       tmp = state.str->next;
+       pfree(state.str);
+       state.str = tmp;
+   }
+
+   /* set user friendly-operand view */
+   memcpy((void *) GETOPERAND(query), (void *) state.op, state.sumlen);
+   pfree(state.op);
+
+   /* set left operand's position for every operator */
+   pos = 0;
+   findoprnd(ptr, &pos);
+
+#ifdef BS_DEBUG
+   cur = pbuf;
+   *cur = '\0';
+   for (i = 0; i < query->size; i++)
+   {
+       if (ptr[i].type == OPR)
+           sprintf(cur, "%c(%d) ", ptr[i].val, ptr[i].left);
+       else
+           sprintf(cur, "%d(%s) ", ptr[i].val, GETOPERAND(query) + ptr[i].distance);
+       cur = strchr(cur, '\0');
+   }
+   elog(DEBUG3, "POR: %s", pbuf);
+#endif
+
+   return query;
+}
+
+/*
+ * in without morphology
+ */
+Datum
+tsquery_in(PG_FUNCTION_ARGS)
+{
+   PG_RETURN_POINTER(queryin((char *) PG_GETARG_POINTER(0), pushval_asis, 0));
+}
+
+/*
+ * out function
+ */
+typedef struct
+{
+   ITEM       *curpol;
+   char       *buf;
+   char       *cur;
+   char       *op;
+   int4        buflen;
+}  INFIX;
+
+#define RESIZEBUF(inf,addsize) \
+while( ( inf->cur - inf->buf ) + addsize + 1 >= inf->buflen ) \
+{ \
+   int4 len = inf->cur - inf->buf; \
+   inf->buflen *= 2; \
+   inf->buf = (char*) repalloc( (void*)inf->buf, inf->buflen ); \
+   inf->cur = inf->buf + len; \
+}
+
+/*
+ * recursive walk on tree and print it in
+ * infix (human-readable) view
+ */
+static void
+infix(INFIX * in, bool first)
+{
+   if (in->curpol->type == VAL)
+   {
+       char       *op = in->op + in->curpol->distance;
+
+       RESIZEBUF(in, in->curpol->length * 2 + 2 + 5);
+       *(in->cur) = '\'';
+       in->cur++;
+       while (*op)
+       {
+           if (*op == '\'')
+           {
+               *(in->cur) = '\\';
+               in->cur++;
+           }
+           *(in->cur) = *op;
+           op++;
+           in->cur++;
+       }
+       *(in->cur) = '\'';
+       in->cur++;
+       if ( in->curpol->weight ) {
+           *(in->cur) = ':'; in->cur++;
+           if ( in->curpol->weight & (1<<3) ) { *(in->cur) = 'A'; in->cur++; }
+           if ( in->curpol->weight & (1<<2) ) { *(in->cur) = 'B'; in->cur++; }
+           if ( in->curpol->weight & (1<<1) ) { *(in->cur) = 'C'; in->cur++; }
+           if ( in->curpol->weight & 1 )      { *(in->cur) = 'D'; in->cur++; }
+       }
+       *(in->cur) = '\0';
+       in->curpol++;
+   }
+   else if (in->curpol->val == (int4) '!')
+   {
+       bool        isopr = false;
+
+       RESIZEBUF(in, 1);
+       *(in->cur) = '!';
+       in->cur++;
+       *(in->cur) = '\0';
+       in->curpol++;
+       if (in->curpol->type == OPR)
+       {
+           isopr = true;
+           RESIZEBUF(in, 2);
+           sprintf(in->cur, "( ");
+           in->cur = strchr(in->cur, '\0');
+       }
+       infix(in, isopr);
+       if (isopr)
+       {
+           RESIZEBUF(in, 2);
+           sprintf(in->cur, " )");
+           in->cur = strchr(in->cur, '\0');
+       }
+   }
+   else
+   {
+       int4        op = in->curpol->val;
+       INFIX       nrm;
+
+       in->curpol++;
+       if (op == (int4) '|' && !first)
+       {
+           RESIZEBUF(in, 2);
+           sprintf(in->cur, "( ");
+           in->cur = strchr(in->cur, '\0');
+       }
+
+       nrm.curpol = in->curpol;
+       nrm.op = in->op;
+       nrm.buflen = 16;
+       nrm.cur = nrm.buf = (char *) palloc(sizeof(char) * nrm.buflen);
+
+       /* get right operand */
+       infix(&nrm, false);
+
+       /* get & print left operand */
+       in->curpol = nrm.curpol;
+       infix(in, false);
+
+       /* print operator & right operand */
+       RESIZEBUF(in, 3 + (nrm.cur - nrm.buf));
+       sprintf(in->cur, " %c %s", op, nrm.buf);
+       in->cur = strchr(in->cur, '\0');
+       pfree(nrm.buf);
+
+       if (op == (int4) '|' && !first)
+       {
+           RESIZEBUF(in, 2);
+           sprintf(in->cur, " )");
+           in->cur = strchr(in->cur, '\0');
+       }
+   }
+}
+
+
+Datum
+tsquery_out(PG_FUNCTION_ARGS)
+{
+   QUERYTYPE  *query = (QUERYTYPE *) DatumGetPointer(PG_DETOAST_DATUM(PG_GETARG_DATUM(0)));
+   INFIX       nrm;
+
+   if (query->size == 0)
+   {
+       char       *b = palloc(1);
+
+       *b = '\0';
+       PG_RETURN_POINTER(b);
+   }
+   nrm.curpol = GETQUERY(query);
+   nrm.buflen = 32;
+   nrm.cur = nrm.buf = (char *) palloc(sizeof(char) * nrm.buflen);
+   *(nrm.cur) = '\0';
+   nrm.op = GETOPERAND(query);
+   infix(&nrm, true);
+
+   PG_FREE_IF_COPY(query, 0);
+   PG_RETURN_POINTER(nrm.buf);
+}
+
+/*
+ * debug function, used only for view query
+ * which will be executed in non-leaf pages in index
+ */
+Datum
+tsquerytree(PG_FUNCTION_ARGS)
+{
+   QUERYTYPE  *query = (QUERYTYPE *) DatumGetPointer(PG_DETOAST_DATUM(PG_GETARG_DATUM(0)));
+   INFIX       nrm;
+   text       *res;
+   ITEM       *q;
+   int4        len;
+
+
+   if (query->size == 0)
+   {
+       res = (text *) palloc(VARHDRSZ);
+       VARATT_SIZEP(res) = VARHDRSZ;
+       PG_RETURN_POINTER(res);
+   }
+
+   q = clean_NOT_v2(GETQUERY(query), &len);
+
+   if (!q)
+   {
+       res = (text *) palloc(1 + VARHDRSZ);
+       VARATT_SIZEP(res) = 1 + VARHDRSZ;
+       *((char *) VARDATA(res)) = 'T';
+   }
+   else
+   {
+       nrm.curpol = q;
+       nrm.buflen = 32;
+       nrm.cur = nrm.buf = (char *) palloc(sizeof(char) * nrm.buflen);
+       *(nrm.cur) = '\0';
+       nrm.op = GETOPERAND(query);
+       infix(&nrm, true);
+
+       res = (text *) palloc(nrm.cur - nrm.buf + VARHDRSZ);
+       VARATT_SIZEP(res) = nrm.cur - nrm.buf + VARHDRSZ;
+       strncpy(VARDATA(res), nrm.buf, nrm.cur - nrm.buf);
+       pfree(q);
+   }
+
+   PG_FREE_IF_COPY(query, 0);
+
+   PG_RETURN_POINTER(res);
+}
+
+Datum
+to_tsquery(PG_FUNCTION_ARGS) {
+   text    *in = PG_GETARG_TEXT_P(1);
+   char *str;
+   QUERYTYPE  *query;
+   ITEM       *res;
+   int4        len;
+
+   str=text2char(in);
+   PG_FREE_IF_COPY(in,1);
+
+   query = queryin(str, pushval_morph, PG_GETARG_INT32(0));
+   res = clean_fakeval_v2(GETQUERY(query), &len);
+   if (!res)
+   {
+       query->len = HDRSIZEQT;
+       query->size = 0;
+       PG_RETURN_POINTER(query);
+   }
+   memcpy((void *) GETQUERY(query), (void *) res, len * sizeof(ITEM));
+   pfree(res);
+   PG_RETURN_POINTER(query);
+}
+
+Datum
+to_tsquery_name(PG_FUNCTION_ARGS) {
+   text *name=PG_GETARG_TEXT_P(0);
+   Datum res= DirectFunctionCall2(
+       to_tsquery,
+       Int32GetDatum( name2id_cfg(name) ),
+       PG_GETARG_DATUM(1)
+   );
+   
+   PG_FREE_IF_COPY(name,1);
+   PG_RETURN_DATUM(res);
+}
+
+Datum
+to_tsquery_current(PG_FUNCTION_ARGS) {
+   PG_RETURN_DATUM( DirectFunctionCall2(
+       to_tsquery,
+       Int32GetDatum( get_currcfg() ),
+       PG_GETARG_DATUM(0)
+   ));
+}
+
+


diff --git a/contrib/tsearch2/query.h b/contrib/tsearch2/query.h

new file mode 100644 (file)

index 0000000..c0715a2


--- /dev/null
+++ b/contrib/tsearch2/query.h
@@ -0,0 +1,55 @@
+#ifndef __QUERY_H__
+#define __QUERY_H__
+/*
+#define BS_DEBUG
+*/
+
+
+/*
+ * item in polish notation with back link
+ * to left operand
+ */
+typedef struct ITEM
+{
+   int8        type;
+   int8        weight;
+   int2        left;
+   int4        val;
+   /* user-friendly value, must correlate with WordEntry */
+   uint32  
+       unused:1,
+       length:11,
+       distance:20;
+}  ITEM;
+
+/*
+ *Storage:
+ * (len)(size)(array of ITEM)(array of operand in user-friendly form)
+ */
+typedef struct
+{
+   int4        len;
+   int4        size;
+   char        data[1];
+}  QUERYTYPE;
+
+#define HDRSIZEQT  ( 2*sizeof(int4) )
+#define COMPUTESIZE(size,lenofoperand) ( HDRSIZEQT + size * sizeof(ITEM) + lenofoperand )
+#define GETQUERY(x)  (ITEM*)( (char*)(x)+HDRSIZEQT )
+#define GETOPERAND(x)  ( (char*)GETQUERY(x) + ((QUERYTYPE*)x)->size * sizeof(ITEM) )
+
+#define ISOPERATOR(x) ( (x)=='!' || (x)=='&' || (x)=='|' || (x)=='(' || (x)==')' )
+
+#define END                0
+#define ERR                1
+#define VAL                2
+#define OPR                3
+#define OPEN           4
+#define CLOSE          5
+#define VALTRUE            6       /* for stop words */
+#define VALFALSE       7
+
+bool TS_execute(ITEM * curitem, void *checkval,
+       bool calcnot, bool (*chkcond) (void *checkval, ITEM * val));
+
+#endif


diff --git a/contrib/tsearch2/rank.c b/contrib/tsearch2/rank.c

new file mode 100644 (file)

index 0000000..b73f400


--- /dev/null
+++ b/contrib/tsearch2/rank.c
@@ -0,0 +1,591 @@
+/*
+ * Relevation
+ * Teodor Sigaev 
+ */
+#include "postgres.h"
+#include 
+
+#include "access/gist.h"
+#include "access/itup.h"
+#include "utils/elog.h"
+#include "utils/palloc.h"
+#include "utils/builtins.h"
+#include "fmgr.h"
+#include "funcapi.h"
+#include "storage/bufpage.h"
+#include "executor/spi.h"
+#include "commands/trigger.h"
+#include "nodes/pg_list.h"
+#include "catalog/namespace.h"
+
+#include "utils/array.h"
+
+#include "tsvector.h"
+#include "query.h"
+#include "common.h"
+
+PG_FUNCTION_INFO_V1(rank);
+Datum      rank(PG_FUNCTION_ARGS);
+
+PG_FUNCTION_INFO_V1(rank_def);
+Datum      rank_def(PG_FUNCTION_ARGS);
+
+PG_FUNCTION_INFO_V1(rank_cd);
+Datum      rank_cd(PG_FUNCTION_ARGS);
+
+PG_FUNCTION_INFO_V1(rank_cd_def);
+Datum      rank_cd_def(PG_FUNCTION_ARGS);
+
+PG_FUNCTION_INFO_V1(get_covers);
+Datum      get_covers(PG_FUNCTION_ARGS);
+
+static float weights[]={0.1, 0.2, 0.4, 1.0};
+
+#define wpos(wep)  ( w[ ((WordEntryPos*)(wep))->weight ] )
+
+#define DEF_NORM_METHOD    0
+
+/*
+ * Returns a weight of a word collocation
+ */
+static float4 word_distance ( int4 w ) {
+   if ( w>100 )
+   return 1e-30;
+
+   return 1.0/(1.005+0.05*exp( ((float4)w)/1.5-2) );
+}
+
+static int
+cnt_length( tsvector *t ) {
+   WordEntry   *ptr=ARRPTR(t), *end=(WordEntry*)STRPTR(t);
+   int len = 0, clen;
+
+   while(ptr < end) {
+       if ( (clen=POSDATALEN(t, ptr)) == 0 )
+           len += 1;
+       else
+           len += clen;
+       ptr++;
+   }
+
+   return len;
+}
+
+static int4
+WordECompareITEM(char *eval, char *qval, WordEntry * ptr, ITEM * item) {
+        if (ptr->len == item->length)
+                return strncmp(
+                                           eval + ptr->pos,
+                                           qval + item->distance,
+                                           item->length);
+
+        return (ptr->len > item->length) ? 1 : -1;
+}
+
+static WordEntry*
+find_wordentry(tsvector *t, QUERYTYPE *q, ITEM *item) {
+        WordEntry  *StopLow = ARRPTR(t);
+        WordEntry  *StopHigh = (WordEntry*)STRPTR(t);
+        WordEntry  *StopMiddle;
+        int                     difference;
+
+        /* Loop invariant: StopLow <= item < StopHigh */
+
+        while (StopLow < StopHigh)
+        {
+                StopMiddle = StopLow + (StopHigh - StopLow) / 2;
+                difference = WordECompareITEM(STRPTR(t), GETOPERAND(q), StopMiddle, item);
+                if (difference == 0)
+                        return StopMiddle;
+                else if (difference < 0)
+                        StopLow = StopMiddle + 1;
+                else
+                        StopHigh = StopMiddle;
+        }
+
+        return NULL;
+}
+
+static WordEntryPos    POSNULL[]={
+   {0,0},
+   {0,MAXENTRYPOS-1}
+};
+
+static float
+calc_rank_and(float *w, tsvector *t, QUERYTYPE *q) {
+   uint16 **pos=(uint16**)palloc(sizeof(uint16*) * q->size);
+   int i,k,l,p;
+   WordEntry *entry;
+   WordEntryPos    *post,*ct;
+   int4    dimt,lenct,dist;
+   float res=-1.0;
+   ITEM    *item=GETQUERY(q);
+
+   memset(pos,0,sizeof(uint16**) * q->size);
+   *(uint16*)POSNULL = lengthof(POSNULL)-1;
+
+   for(i=0; isize; i++) {
+       
+       if ( item[i].type != VAL )
+           continue;
+
+       entry=find_wordentry(t,q,&(item[i]));
+       if ( !entry )
+           continue;
+
+       if ( entry->haspos )
+           pos[i] = (uint16*)_POSDATAPTR(t,entry);
+       else
+           pos[i] = (uint16*)POSNULL;
+
+
+       dimt = *(uint16*)(pos[i]);
+       post = (WordEntryPos*)(pos[i]+1);
+       for( k=0; k
+           if ( !pos[k] ) continue;
+           lenct = *(uint16*)(pos[k]);
+           ct = (WordEntryPos*)(pos[k]+1);
+           for(l=0; l
+               for(p=0; p
+                   dist = abs( post[l].pos - ct[p].pos );
+                   if ( dist || (dist==0 && (pos[i]==(uint16*)POSNULL || pos[k]==(uint16*)POSNULL) ) ) {
+                       float curw; 
+                       if ( !dist ) dist=MAXENTRYPOS;  
+                       curw= sqrt( wpos(&(post[l])) * wpos( &(ct[p]) ) * word_distance(dist) );
+                       res = ( res < 0 ) ? curw : 1.0 - ( 1.0 - res ) * ( 1.0 - curw );
+                   }
+               }
+           }
+       }
+   }
+   pfree(pos);
+   return res; 
+}
+
+static float
+calc_rank_or(float *w, tsvector *t, QUERYTYPE *q) {
+   WordEntry *entry;
+   WordEntryPos    *post;
+   int4    dimt,j,i;
+   float res=-1.0;
+   ITEM    *item=GETQUERY(q);
+
+   *(uint16*)POSNULL = lengthof(POSNULL)-1;
+
+   for(i=0; isize; i++) {
+       if ( item[i].type != VAL )
+           continue;
+
+       entry=find_wordentry(t,q,&(item[i]));
+       if ( !entry )
+           continue;
+
+       if ( entry->haspos ) {
+           dimt = POSDATALEN(t,entry);
+           post = POSDATAPTR(t,entry);
+       } else {
+           dimt = *(uint16*)POSNULL;
+           post = POSNULL+1;
+       }
+
+       for(j=0;j
+           if ( res < 0 )
+               res = wpos( &(post[j]) );
+           else
+               res = 1.0 - ( 1.0-res ) * ( 1.0-wpos( &(post[j]) ) );
+       }
+   }
+   return res;
+}
+
+static float
+calc_rank(float *w, tsvector *t, QUERYTYPE *q, int4 method) {
+   ITEM *item = GETQUERY(q);
+   float res=0.0;
+
+   if (!t->size || !q->size)
+       return 0.0;
+
+   res = ( item->type != VAL && item->val == (int4) '&' ) ?
+       calc_rank_and(w,t,q) : calc_rank_or(w,t,q);
+
+   if ( res < 0 )
+       res = 1e-20;
+
+        switch(method) {
+       case 0: break;
+       case 1: res /= log((float)cnt_length(t)); break;
+       case 2: res /= (float)cnt_length(t); break;
+       default:
+       elog(ERROR,"Unknown normalization method: %d",method);
+        }
+
+   return res;
+}
+
+Datum
+rank(PG_FUNCTION_ARGS) {
+   ArrayType  *win = (ArrayType *) PG_DETOAST_DATUM(PG_GETARG_DATUM(0));
+   tsvector       *txt = (tsvector *) PG_DETOAST_DATUM(PG_GETARG_DATUM(1));
+   QUERYTYPE  *query = (QUERYTYPE *) PG_DETOAST_DATUM(PG_GETARG_DATUM(2));
+   int method=DEF_NORM_METHOD;
+   float res=0.0;
+   float ws[ lengthof(weights) ];
+   int i;
+
+   if ( ARR_NDIM(win) != 1 ) 
+       elog(ERROR,"Array of weight is not one dimentional");
+   if ( ARRNELEMS(win) < lengthof(weights) )
+        elog(ERROR,"Array of weight is too short");
+
+   for(i=0;i
+       ws[ i ] = ( ((float4*)ARR_DATA_PTR(win))[i] >= 0 ) ? ((float4*)ARR_DATA_PTR(win))[i] : weights[i];
+       if ( ws[ i ] > 1.0 ) 
+           elog(ERROR,"Weight out of range");
+   } 
+
+   if ( PG_NARGS() == 4 )
+       method=PG_GETARG_INT32(3);
+
+   res=calc_rank(ws, txt, query, method); 
+       
+   PG_FREE_IF_COPY(win, 0);
+   PG_FREE_IF_COPY(txt, 1);
+   PG_FREE_IF_COPY(query, 2);
+   PG_RETURN_FLOAT4(res);
+}
+
+Datum
+rank_def(PG_FUNCTION_ARGS) {
+   tsvector       *txt = (tsvector *) PG_DETOAST_DATUM(PG_GETARG_DATUM(0));
+   QUERYTYPE  *query = (QUERYTYPE *) PG_DETOAST_DATUM(PG_GETARG_DATUM(1));
+   float res=0.0;
+   int method=DEF_NORM_METHOD;
+
+   if ( PG_NARGS() == 3 )
+       method=PG_GETARG_INT32(2);
+
+   res=calc_rank(weights, txt, query, method); 
+       
+   PG_FREE_IF_COPY(txt, 0);
+   PG_FREE_IF_COPY(query, 1);
+   PG_RETURN_FLOAT4(res);
+}
+
+
+typedef struct {
+   ITEM    *item;
+   int32   pos;
+} DocRepresentation;
+
+static int
+compareDocR(const void *a, const void *b) {
+   if ( ((DocRepresentation *) a)->pos == ((DocRepresentation *) b)->pos )
+       return 1;
+   return ( ((DocRepresentation *) a)->pos > ((DocRepresentation *) b)->pos ) ? 1 : -1;
+}
+
+
+typedef struct {
+   DocRepresentation *doc;
+   int len;
+}  ChkDocR;
+
+static bool
+checkcondition_DR(void *checkval, ITEM *val) {
+   DocRepresentation *ptr = ((ChkDocR*)checkval)->doc;
+
+   while( ptr - ((ChkDocR*)checkval)->doc < ((ChkDocR*)checkval)->len ) {
+       if ( val == ptr->item )
+           return true;
+       ptr++;
+   }   
+
+   return false;
+}
+
+
+static bool
+Cover(DocRepresentation *doc, int len, QUERYTYPE *query, int *pos, int *p, int *q) {
+   int i;
+   DocRepresentation   *ptr,*f=(DocRepresentation*)0xffffffff;
+   ITEM    *item=GETQUERY(query);
+   int lastpos=*pos;
+   int oldq=*q;
+
+   *p=0x7fffffff;
+   *q=0;
+
+   for(i=0; isize; i++) {
+       if ( item->type != VAL ) {
+           item++;
+           continue;
+       }
+       ptr = doc + *pos;
+
+       while(ptr-doc
+           if ( ptr->item == item ) {
+               if ( ptr->pos > *q ) {
+                   *q = ptr->pos;
+                   lastpos= ptr - doc;
+               } 
+               break;
+           } 
+           ptr++;
+       }
+
+       item++;
+   }
+
+   if (*q==0 )
+       return false;
+
+   if (*q==oldq) { /* already check this pos */
+       (*pos)++;
+       return Cover(doc, len, query, pos,p,q);
+   } 
+
+   item=GETQUERY(query);
+   for(i=0; isize; i++) {
+       if ( item->type != VAL ) {
+           item++;
+           continue;
+       }
+       ptr = doc + lastpos;
+
+       while(ptr>=doc+*pos) {
+           if ( ptr->item == item ) {
+               if ( ptr->pos < *p ) {
+                   *p = ptr->pos;
+                   f=ptr;
+               }
+               break;
+           }
+           ptr--;
+       }
+       item++;
+   }
+ 
+   if ( *p<=*q ) {
+       ChkDocR ch = { f, (doc + lastpos)-f+1 };
+       *pos = f-doc+1;
+       if ( TS_execute(GETQUERY(query), &ch, false, checkcondition_DR) ) { 
+ /*elog(NOTICE,"OP:%d NP:%d P:%d Q:%d", *pos, lastpos, *p, *q);*/ 
+           return true;
+       } else
+           return Cover(doc, len, query, pos,p,q); 
+   }
+ 
+   return false;
+}
+
+static DocRepresentation*
+get_docrep(tsvector     *txt, QUERYTYPE  *query, int *doclen) {
+   ITEM    *item=GETQUERY(query);
+   WordEntry *entry;
+   WordEntryPos    *post;
+   int4    dimt,j,i;
+   int len=query->size*4,cur=0;
+   DocRepresentation *doc;
+
+   *(uint16*)POSNULL = lengthof(POSNULL)-1;
+   doc = (DocRepresentation*)palloc(sizeof(DocRepresentation)*len);
+   for(i=0; isize; i++) {
+       if ( item[i].type != VAL )
+           continue;
+
+       entry=find_wordentry(txt,query,&(item[i]));
+       if ( !entry )
+           continue;
+
+       if ( entry->haspos ) {
+           dimt = POSDATALEN(txt,entry);
+           post = POSDATAPTR(txt,entry);
+       } else {
+           dimt = *(uint16*)POSNULL;
+           post = POSNULL+1;
+       }
+
+       while( cur+dimt >= len ) {
+           len*=2;
+           doc = (DocRepresentation*)repalloc(doc,sizeof(DocRepresentation)*len);
+       }
+
+       for(j=0;j
+           doc[cur].item=&(item[i]);
+           doc[cur].pos=post[j].pos;
+           cur++;
+       }
+   }
+
+   *doclen=cur;
+   
+   if ( cur>0 ) {
+       if ( cur>1 ) 
+           qsort((void *) doc, cur, sizeof(DocRepresentation), compareDocR);
+       return doc;
+   }
+   
+   pfree(doc);
+   return NULL;
+}
+
+
+Datum
+rank_cd(PG_FUNCTION_ARGS) {
+   int K = PG_GETARG_INT32(0);
+   tsvector       *txt = (tsvector *) PG_DETOAST_DATUM(PG_GETARG_DATUM(1));
+   QUERYTYPE  *query = (QUERYTYPE *) PG_DETOAST_DATUM(PG_GETARG_DATUM(2));
+   int method=DEF_NORM_METHOD;
+   DocRepresentation   *doc;
+   float   res=0.0;
+   int p=0,q=0,len,cur;
+
+   doc = get_docrep(txt, query, &len);
+   if ( !doc ) {
+       PG_FREE_IF_COPY(txt, 1);
+       PG_FREE_IF_COPY(query, 2);
+       PG_RETURN_FLOAT4(0.0);
+   }
+
+   cur=0;
+   if (K<=0)
+       K=4;    
+   while( Cover(doc, len, query, &cur, &p, &q) ) 
+       res += ( q-p+1 > K ) ? ((float)K)/((float)(q-p+1)) : 1.0;
+
+   if ( PG_NARGS() == 4 )
+       method=PG_GETARG_INT32(3);
+
+        switch(method) {
+       case 0: break;
+       case 1: res /= log((float)cnt_length(txt)); break;
+       case 2: res /= (float)cnt_length(txt); break;
+       default:
+       elog(ERROR,"Unknown normalization method: %d",method);
+        }
+
+   pfree(doc);
+   PG_FREE_IF_COPY(txt, 1);
+   PG_FREE_IF_COPY(query, 2);
+
+   PG_RETURN_FLOAT4(res);
+}
+
+
+Datum
+rank_cd_def(PG_FUNCTION_ARGS) {
+   PG_RETURN_DATUM( DirectFunctionCall4(   
+       rank_cd,
+       Int32GetDatum(-1),
+       PG_GETARG_DATUM(0),
+       PG_GETARG_DATUM(1),
+       ( PG_NARGS() == 3 ) ? PG_GETARG_DATUM(2) : Int32GetDatum(DEF_NORM_METHOD)
+   )); 
+}
+
+/**************debug*************/
+
+typedef struct {
+   char    *w;
+   int2    len;
+   int2    pos;
+   int2    start;
+   int2    finish;
+} DocWord;
+
+static int
+compareDocWord(const void *a, const void *b) {
+   if ( ((DocWord *) a)->pos == ((DocWord *) b)->pos )
+       return 1;
+   return ( ((DocWord *) a)->pos > ((DocWord *) b)->pos ) ? 1 : -1;
+}
+
+
+Datum 
+get_covers(PG_FUNCTION_ARGS) {
+   tsvector     *txt = (tsvector *) PG_DETOAST_DATUM(PG_GETARG_DATUM(0));
+   QUERYTYPE  *query = (QUERYTYPE *) PG_DETOAST_DATUM(PG_GETARG_DATUM(1));
+   WordEntry       *pptr=ARRPTR(txt);
+   int i,dlen=0,j,cur=0,len=0,rlen;
+   DocWord *dw,*dwptr;
+   text    *out;
+   char *cptr;
+   DocRepresentation *doc;
+   int pos=0,p,q,olddwpos=0;
+   int ncover=1;
+
+   doc = get_docrep(txt, query, &rlen);
+
+   if ( !doc ) {
+       out=palloc(VARHDRSZ);
+       VARATT_SIZEP(out) = VARHDRSZ;
+       PG_FREE_IF_COPY(txt,0);
+       PG_FREE_IF_COPY(query,1);
+       PG_RETURN_POINTER(out);
+   }
+
+   for(i=0;isize;i++) {
+       if (!pptr[i].haspos)
+           elog(ERROR,"No pos info");
+        dlen += POSDATALEN(txt,&(pptr[i]));
+   }
+
+   dwptr=dw=palloc(sizeof(DocWord)*dlen);
+   memset(dw,0,sizeof(DocWord)*dlen);
+
+   for(i=0;isize;i++) {
+       WordEntryPos    *posdata = POSDATAPTR(txt,&(pptr[i]));
+       for(j=0;j
+           dw[cur].w=STRPTR(txt)+pptr[i].pos;  
+           dw[cur].len=pptr[i].len;    
+           dw[cur].pos=posdata[j].pos;
+           cur++;
+       }
+       len+=(pptr[i].len + 1) * (int)POSDATALEN(txt,&(pptr[i]));
+   }
+   qsort((void *) dw, dlen, sizeof(DocWord), compareDocWord);
+
+   while( Cover(doc, rlen, query, &pos, &p, &q) ) {
+       dwptr=dw+olddwpos;
+       while(dwptr->pos < p && dwptr-dw
+           dwptr++;
+       olddwpos=dwptr-dw;
+       dwptr->start=ncover;
+       while(dwptr->pos < q+1 && dwptr-dw
+           dwptr++;
+       (dwptr-1)->finish=ncover;
+       len+= 4 /* {}+two spaces */ + 2*16 /*numbers*/;
+       ncover++; 
+   } 
+   
+   out=palloc(VARHDRSZ+len);
+   cptr=((char*)out)+VARHDRSZ;
+   dwptr=dw;
+
+   while( dwptr-dw < dlen) {
+       if ( dwptr->start ) {
+           sprintf(cptr,"{%d ",dwptr->start);
+           cptr=strchr(cptr,'\0');
+       }
+       memcpy(cptr,dwptr->w,dwptr->len);
+       cptr+=dwptr->len;
+       *cptr=' ';
+       cptr++;
+       if ( dwptr->finish ) { 
+           sprintf(cptr,"}%d ",dwptr->finish);
+           cptr=strchr(cptr,'\0');
+       }
+       dwptr++;
+   }   
+
+   VARATT_SIZEP(out) = cptr - ((char*)out);
+   
+   pfree(dw);
+   pfree(doc);
+
+   PG_FREE_IF_COPY(txt,0);
+   PG_FREE_IF_COPY(query,1);
+   PG_RETURN_POINTER(out);
+}
+


diff --git a/contrib/tsearch2/rewrite.c b/contrib/tsearch2/rewrite.c

new file mode 100644 (file)

index 0000000..d5bc0f6


--- /dev/null
+++ b/contrib/tsearch2/rewrite.c
@@ -0,0 +1,292 @@
+/*
+ * Rewrite routines of query tree
+ * Teodor Sigaev 
+ */
+
+#include "postgres.h"
+
+#include 
+
+#include "access/gist.h"
+#include "access/itup.h"
+#include "access/rtree.h"
+#include "utils/elog.h"
+#include "utils/palloc.h"
+#include "utils/array.h"
+#include "utils/builtins.h"
+#include "storage/bufpage.h"
+
+#include "query.h"
+#include "rewrite.h"
+
+typedef struct NODE
+{
+   struct NODE *left;
+   struct NODE *right;
+   ITEM       *valnode;
+}  NODE;
+
+/*
+ * make query tree from plain view of query
+ */
+static NODE *
+maketree(ITEM * in)
+{
+   NODE       *node = (NODE *) palloc(sizeof(NODE));
+
+   node->valnode = in;
+   node->right = node->left = NULL;
+   if (in->type == OPR)
+   {
+       node->right = maketree(in + 1);
+       if (in->val != (int4) '!')
+           node->left = maketree(in + in->left);
+   }
+   return node;
+}
+
+typedef struct
+{
+   ITEM       *ptr;
+   int4        len;
+   int4        cur;
+}  PLAINTREE;
+
+static void
+plainnode(PLAINTREE * state, NODE * node)
+{
+   if (state->cur == state->len)
+   {
+       state->len *= 2;
+       state->ptr = (ITEM *) repalloc((void *) state->ptr, state->len * sizeof(ITEM));
+   }
+   memcpy((void *) &(state->ptr[state->cur]), (void *) node->valnode, sizeof(ITEM));
+   if (node->valnode->type == VAL)
+       state->cur++;
+   else if (node->valnode->val == (int4) '!')
+   {
+       state->ptr[state->cur].left = 1;
+       state->cur++;
+       plainnode(state, node->right);
+   }
+   else
+   {
+       int4        cur = state->cur;
+
+       state->cur++;
+       plainnode(state, node->right);
+       state->ptr[cur].left = state->cur - cur;
+       plainnode(state, node->left);
+   }
+   pfree(node);
+}
+
+/*
+ * make plain view of tree from 'normal' view of tree
+ */
+static ITEM *
+plaintree(NODE * root, int4 *len)
+{
+   PLAINTREE   pl;
+
+   pl.cur = 0;
+   pl.len = 16;
+   if (root && (root->valnode->type == VAL || root->valnode->type == OPR))
+   {
+       pl.ptr = (ITEM *) palloc(pl.len * sizeof(ITEM));
+       plainnode(&pl, root);
+   }
+   else
+       pl.ptr = NULL;
+   *len = pl.cur;
+   return pl.ptr;
+}
+
+static void
+freetree(NODE * node)
+{
+   if (!node)
+       return;
+   if (node->left)
+       freetree(node->left);
+   if (node->right)
+       freetree(node->right);
+   pfree(node);
+}
+
+/*
+ * clean tree for ! operator.
+ * It's usefull for debug, but in
+ * other case, such view is used with search in index.
+ * Operator ! always return TRUE
+ */
+static NODE *
+clean_NOT_intree(NODE * node)
+{
+   if (node->valnode->type == VAL)
+       return node;
+
+   if (node->valnode->val == (int4) '!')
+   {
+       freetree(node);
+       return NULL;
+   }
+
+   /* operator & or | */
+   if (node->valnode->val == (int4) '|')
+   {
+       if ((node->left = clean_NOT_intree(node->left)) == NULL ||
+           (node->right = clean_NOT_intree(node->right)) == NULL)
+       {
+           freetree(node);
+           return NULL;
+       }
+   }
+   else
+   {
+       NODE       *res = node;
+
+       node->left = clean_NOT_intree(node->left);
+       node->right = clean_NOT_intree(node->right);
+       if (node->left == NULL && node->right == NULL)
+       {
+           pfree(node);
+           res = NULL;
+       }
+       else if (node->left == NULL)
+       {
+           res = node->right;
+           pfree(node);
+       }
+       else if (node->right == NULL)
+       {
+           res = node->left;
+           pfree(node);
+       }
+       return res;
+   }
+   return node;
+}
+
+ITEM *
+clean_NOT_v2(ITEM * ptr, int4 *len)
+{
+   NODE       *root = maketree(ptr);
+
+   return plaintree(clean_NOT_intree(root), len);
+}
+
+#define V_UNKNOWN  0
+#define V_TRUE     1
+#define V_FALSE        2
+
+/*
+ * Clean query tree from values which is always in
+ * text (stopword)
+ */
+static NODE *
+clean_fakeval_intree(NODE * node, char *result)
+{
+   char        lresult = V_UNKNOWN,
+               rresult = V_UNKNOWN;
+
+   if (node->valnode->type == VAL)
+       return node;
+   else if (node->valnode->type == VALTRUE)
+   {
+       pfree(node);
+       *result = V_TRUE;
+       return NULL;
+   }
+
+
+   if (node->valnode->val == (int4) '!')
+   {
+       node->right = clean_fakeval_intree(node->right, &rresult);
+       if (!node->right)
+       {
+           *result = (rresult == V_TRUE) ? V_FALSE : V_TRUE;
+           freetree(node);
+           return NULL;
+       }
+   }
+   else if (node->valnode->val == (int4) '|')
+   {
+       NODE       *res = node;
+
+       node->left = clean_fakeval_intree(node->left, &lresult);
+       node->right = clean_fakeval_intree(node->right, &rresult);
+       if (lresult == V_TRUE || rresult == V_TRUE)
+       {
+           freetree(node);
+           *result = V_TRUE;
+           return NULL;
+       }
+       else if (lresult == V_FALSE && rresult == V_FALSE)
+       {
+           freetree(node);
+           *result = V_FALSE;
+           return NULL;
+       }
+       else if (lresult == V_FALSE)
+       {
+           res = node->right;
+           pfree(node);
+       }
+       else if (rresult == V_FALSE)
+       {
+           res = node->left;
+           pfree(node);
+       }
+       return res;
+   }
+   else
+   {
+       NODE       *res = node;
+
+       node->left = clean_fakeval_intree(node->left, &lresult);
+       node->right = clean_fakeval_intree(node->right, &rresult);
+       if (lresult == V_FALSE || rresult == V_FALSE)
+       {
+           freetree(node);
+           *result = V_FALSE;
+           return NULL;
+       }
+       else if (lresult == V_TRUE && rresult == V_TRUE)
+       {
+           freetree(node);
+           *result = V_TRUE;
+           return NULL;
+       }
+       else if (lresult == V_TRUE)
+       {
+           res = node->right;
+           pfree(node);
+       }
+       else if (rresult == V_TRUE)
+       {
+           res = node->left;
+           pfree(node);
+       }
+       return res;
+   }
+   return node;
+}
+
+ITEM *
+clean_fakeval_v2(ITEM * ptr, int4 *len)
+{
+   NODE       *root = maketree(ptr);
+   char        result = V_UNKNOWN;
+   NODE       *resroot;
+
+   resroot = clean_fakeval_intree(root, &result);
+   if (result != V_UNKNOWN)
+   {
+       elog(NOTICE, "Query contains only stopword(s) or doesn't contain lexem(s), ignored");
+       *len = 0;
+       return NULL;
+   }
+
+   return plaintree(resroot, len);
+}


diff --git a/contrib/tsearch2/rewrite.h b/contrib/tsearch2/rewrite.h

new file mode 100644 (file)

index 0000000..d47788a


--- /dev/null
+++ b/contrib/tsearch2/rewrite.h
@@ -0,0 +1,7 @@
+#ifndef __REWRITE_H__
+#define __REWRITE_H__
+
+ITEM      *clean_NOT_v2(ITEM * ptr, int4 *len);
+ITEM      *clean_fakeval_v2(ITEM * ptr, int4 *len);
+
+#endif


diff --git a/contrib/tsearch2/snmap.c b/contrib/tsearch2/snmap.c

new file mode 100644 (file)

index 0000000..fe138ad


--- /dev/null
+++ b/contrib/tsearch2/snmap.c
@@ -0,0 +1,75 @@
+/* 
+ * simple but fast map from str to Oid
+ * Teodor Sigaev 
+ */
+#include 
+#include 
+#include 
+
+#include "postgres.h"
+#include "snmap.h"
+#include "common.h"
+
+static int
+compareSNMapEntry(const void *a, const void *b) {
+   return strcmp( ((SNMapEntry*)a)->key, ((SNMapEntry*)b)->key );
+}
+
+void 
+addSNMap( SNMap *map, char *key, Oid value ) {
+   if (map->len>=map->reallen) {
+       SNMapEntry *tmp;
+       int len = (map->reallen) ? 2*map->reallen : 16;
+       tmp=(SNMapEntry*)realloc(map->list, sizeof(SNMapEntry) * len);
+       if ( !tmp )
+           elog(ERROR, "No memory");
+       map->reallen=len;
+       map->list=tmp;
+   }
+   map->list[ map->len ].key = strdup(key);
+   if ( ! map->list[ map->len ].key )
+       elog(ERROR, "No memory");
+   map->list[ map->len ].value=value;
+   map->len++;
+   if ( map->len>1 ) qsort(map->list, map->len, sizeof(SNMapEntry), compareSNMapEntry);
+}
+
+void 
+addSNMap_t( SNMap *map, text *key, Oid value ) {
+   char *k=text2char( key );
+   addSNMap(map, k, value);
+   pfree(k);
+}
+
+Oid 
+findSNMap( SNMap *map, char *key ) {
+   SNMapEntry *ptr;
+   SNMapEntry ks = {key, 0};
+   if ( map->len==0 || !map->list )
+       return 0;   
+   ptr = (SNMapEntry*) bsearch(&ks, map->list, map->len, sizeof(SNMapEntry), compareSNMapEntry);
+   return (ptr) ? ptr->value : 0;
+}
+
+Oid  
+findSNMap_t( SNMap *map, text *key ) {
+   char *k=text2char(key);
+   int res;
+   res= findSNMap(map, k);
+   pfree(k);
+   return res;
+}
+
+void freeSNMap( SNMap *map ) {
+   SNMapEntry *entry=map->list;
+   if ( map->list ) {
+       while( map->len ) {
+           if ( entry->key ) free(entry->key);
+           entry++; map->len--;
+       }
+       free( map->list );
+   }
+   memset(map,0,sizeof(SNMap));
+}
+
+


diff --git a/contrib/tsearch2/snmap.h b/contrib/tsearch2/snmap.h

new file mode 100644 (file)

index 0000000..b485601


--- /dev/null
+++ b/contrib/tsearch2/snmap.h
@@ -0,0 +1,23 @@
+#ifndef __SNMAP_H__
+#define __SNMAP_H__
+
+#include "postgres.h"
+
+typedef struct {
+   char    *key;
+   Oid value;
+} SNMapEntry;
+
+typedef struct {
+   int len;
+   int reallen;
+   SNMapEntry  *list;
+} SNMap;
+
+void addSNMap( SNMap *map, char *key, Oid value );
+void addSNMap_t( SNMap *map, text *key, Oid value );
+Oid findSNMap( SNMap *map, char *key );
+Oid findSNMap_t( SNMap *map, text *key );
+void freeSNMap( SNMap *map );
+
+#endif


diff --git a/contrib/tsearch2/snowball/api.c b/contrib/tsearch2/snowball/api.c

new file mode 100644 (file)

index 0000000..c9019ce


--- /dev/null
+++ b/contrib/tsearch2/snowball/api.c
@@ -0,0 +1,48 @@
+
+#include "header.h"
+
+extern struct SN_env * SN_create_env(int S_size, int I_size, int B_size)
+{   struct SN_env * z = (struct SN_env *) calloc(1, sizeof(struct SN_env));
+    z->p = create_s();
+    if (S_size)
+    {   z->S = (symbol * *) calloc(S_size, sizeof(symbol *));
+        {   int i;
+            for (i = 0; i < S_size; i++) z->S[i] = create_s();
+        }
+        z->S_size = S_size;
+    }
+
+    if (I_size)
+    {   z->I = (int *) calloc(I_size, sizeof(int));
+        z->I_size = I_size;
+    }
+
+    if (B_size)
+    {   z->B = (symbol *) calloc(B_size, sizeof(symbol));
+        z->B_size = B_size;
+    }
+
+    return z;
+}
+
+extern void SN_close_env(struct SN_env * z)
+{
+    if (z->S_size)
+    {
+        {   int i;
+            for (i = 0; i < z->S_size; i++) lose_s(z->S[i]);
+        }
+        free(z->S);
+    }
+    if (z->I_size) free(z->I);
+    if (z->B_size) free(z->B);
+    if (z->p) lose_s(z->p);
+    free(z);
+}
+
+extern void SN_set_current(struct SN_env * z, int size, const symbol * s)
+{
+    replace_s(z, 0, z->l, size, s);
+    z->c = 0;
+}
+


diff --git a/contrib/tsearch2/snowball/api.h b/contrib/tsearch2/snowball/api.h

new file mode 100644 (file)

index 0000000..3e8b6e1


--- /dev/null
+++ b/contrib/tsearch2/snowball/api.h
@@ -0,0 +1,27 @@
+
+typedef unsigned char symbol;
+
+/* Or replace 'char' above with 'short' for 16 bit characters.
+
+   More precisely, replace 'char' with whatever type guarantees the
+   character width you need. Note however that sizeof(symbol) should divide
+   HEAD, defined in header.h as 2*sizeof(int), without remainder, otherwise
+   there is an alignment problem. In the unlikely event of a problem here,
+   consult Martin Porter.
+
+*/
+
+struct SN_env {
+    symbol * p;
+    int c; int a; int l; int lb; int bra; int ket;
+    int S_size; int I_size; int B_size;
+    symbol * * S;
+    int * I;
+    symbol * B;
+};
+
+extern struct SN_env * SN_create_env(int S_size, int I_size, int B_size);
+extern void SN_close_env(struct SN_env * z);
+
+extern void SN_set_current(struct SN_env * z, int size, const symbol * s);
+


diff --git a/contrib/tsearch2/snowball/english_stem.c b/contrib/tsearch2/snowball/english_stem.c

new file mode 100644 (file)

index 0000000..6715c7c


--- /dev/null
+++ b/contrib/tsearch2/snowball/english_stem.c
@@ -0,0 +1,894 @@
+
+/* This file was generated automatically by the Snowball to ANSI C compiler */
+
+#include "header.h"
+
+extern int english_stem(struct SN_env * z);
+static int r_exception2(struct SN_env * z);
+static int r_exception1(struct SN_env * z);
+static int r_Step_5(struct SN_env * z);
+static int r_Step_4(struct SN_env * z);
+static int r_Step_3(struct SN_env * z);
+static int r_Step_2(struct SN_env * z);
+static int r_Step_1c(struct SN_env * z);
+static int r_Step_1b(struct SN_env * z);
+static int r_Step_1a(struct SN_env * z);
+static int r_R2(struct SN_env * z);
+static int r_R1(struct SN_env * z);
+static int r_shortv(struct SN_env * z);
+static int r_mark_regions(struct SN_env * z);
+static int r_postlude(struct SN_env * z);
+static int r_prelude(struct SN_env * z);
+
+extern struct SN_env * english_create_env(void);
+extern void english_close_env(struct SN_env * z);
+
+static symbol s_0_0[5] = { 'g', 'e', 'n', 'e', 'r' };
+
+static struct among a_0[1] =
+{
+/*  0 */ { 5, s_0_0, -1, -1, 0}
+};
+
+static symbol s_1_0[3] = { 'i', 'e', 'd' };
+static symbol s_1_1[1] = { 's' };
+static symbol s_1_2[3] = { 'i', 'e', 's' };
+static symbol s_1_3[4] = { 's', 's', 'e', 's' };
+static symbol s_1_4[2] = { 's', 's' };
+static symbol s_1_5[2] = { 'u', 's' };
+
+static struct among a_1[6] =
+{
+/*  0 */ { 3, s_1_0, -1, 2, 0},
+/*  1 */ { 1, s_1_1, -1, 3, 0},
+/*  2 */ { 3, s_1_2, 1, 2, 0},
+/*  3 */ { 4, s_1_3, 1, 1, 0},
+/*  4 */ { 2, s_1_4, 1, -1, 0},
+/*  5 */ { 2, s_1_5, 1, -1, 0}
+};
+
+static symbol s_2_1[2] = { 'b', 'b' };
+static symbol s_2_2[2] = { 'd', 'd' };
+static symbol s_2_3[2] = { 'f', 'f' };
+static symbol s_2_4[2] = { 'g', 'g' };
+static symbol s_2_5[2] = { 'b', 'l' };
+static symbol s_2_6[2] = { 'm', 'm' };
+static symbol s_2_7[2] = { 'n', 'n' };
+static symbol s_2_8[2] = { 'p', 'p' };
+static symbol s_2_9[2] = { 'r', 'r' };
+static symbol s_2_10[2] = { 'a', 't' };
+static symbol s_2_11[2] = { 't', 't' };
+static symbol s_2_12[2] = { 'i', 'z' };
+
+static struct among a_2[13] =
+{
+/*  0 */ { 0, 0, -1, 3, 0},
+/*  1 */ { 2, s_2_1, 0, 2, 0},
+/*  2 */ { 2, s_2_2, 0, 2, 0},
+/*  3 */ { 2, s_2_3, 0, 2, 0},
+/*  4 */ { 2, s_2_4, 0, 2, 0},
+/*  5 */ { 2, s_2_5, 0, 1, 0},
+/*  6 */ { 2, s_2_6, 0, 2, 0},
+/*  7 */ { 2, s_2_7, 0, 2, 0},
+/*  8 */ { 2, s_2_8, 0, 2, 0},
+/*  9 */ { 2, s_2_9, 0, 2, 0},
+/* 10 */ { 2, s_2_10, 0, 1, 0},
+/* 11 */ { 2, s_2_11, 0, 2, 0},
+/* 12 */ { 2, s_2_12, 0, 1, 0}
+};
+
+static symbol s_3_0[2] = { 'e', 'd' };
+static symbol s_3_1[3] = { 'e', 'e', 'd' };
+static symbol s_3_2[3] = { 'i', 'n', 'g' };
+static symbol s_3_3[4] = { 'e', 'd', 'l', 'y' };
+static symbol s_3_4[5] = { 'e', 'e', 'd', 'l', 'y' };
+static symbol s_3_5[5] = { 'i', 'n', 'g', 'l', 'y' };
+
+static struct among a_3[6] =
+{
+/*  0 */ { 2, s_3_0, -1, 2, 0},
+/*  1 */ { 3, s_3_1, 0, 1, 0},
+/*  2 */ { 3, s_3_2, -1, 2, 0},
+/*  3 */ { 4, s_3_3, -1, 2, 0},
+/*  4 */ { 5, s_3_4, 3, 1, 0},
+/*  5 */ { 5, s_3_5, -1, 2, 0}
+};
+
+static symbol s_4_0[4] = { 'a', 'n', 'c', 'i' };
+static symbol s_4_1[4] = { 'e', 'n', 'c', 'i' };
+static symbol s_4_2[3] = { 'o', 'g', 'i' };
+static symbol s_4_3[2] = { 'l', 'i' };
+static symbol s_4_4[3] = { 'b', 'l', 'i' };
+static symbol s_4_5[4] = { 'a', 'b', 'l', 'i' };
+static symbol s_4_6[4] = { 'a', 'l', 'l', 'i' };
+static symbol s_4_7[5] = { 'f', 'u', 'l', 'l', 'i' };
+static symbol s_4_8[6] = { 'l', 'e', 's', 's', 'l', 'i' };
+static symbol s_4_9[5] = { 'o', 'u', 's', 'l', 'i' };
+static symbol s_4_10[5] = { 'e', 'n', 't', 'l', 'i' };
+static symbol s_4_11[5] = { 'a', 'l', 'i', 't', 'i' };
+static symbol s_4_12[6] = { 'b', 'i', 'l', 'i', 't', 'i' };
+static symbol s_4_13[5] = { 'i', 'v', 'i', 't', 'i' };
+static symbol s_4_14[6] = { 't', 'i', 'o', 'n', 'a', 'l' };
+static symbol s_4_15[7] = { 'a', 't', 'i', 'o', 'n', 'a', 'l' };
+static symbol s_4_16[5] = { 'a', 'l', 'i', 's', 'm' };
+static symbol s_4_17[5] = { 'a', 't', 'i', 'o', 'n' };
+static symbol s_4_18[7] = { 'i', 'z', 'a', 't', 'i', 'o', 'n' };
+static symbol s_4_19[4] = { 'i', 'z', 'e', 'r' };
+static symbol s_4_20[4] = { 'a', 't', 'o', 'r' };
+static symbol s_4_21[7] = { 'i', 'v', 'e', 'n', 'e', 's', 's' };
+static symbol s_4_22[7] = { 'f', 'u', 'l', 'n', 'e', 's', 's' };
+static symbol s_4_23[7] = { 'o', 'u', 's', 'n', 'e', 's', 's' };
+
+static struct among a_4[24] =
+{
+/*  0 */ { 4, s_4_0, -1, 3, 0},
+/*  1 */ { 4, s_4_1, -1, 2, 0},
+/*  2 */ { 3, s_4_2, -1, 13, 0},
+/*  3 */ { 2, s_4_3, -1, 16, 0},
+/*  4 */ { 3, s_4_4, 3, 12, 0},
+/*  5 */ { 4, s_4_5, 4, 4, 0},
+/*  6 */ { 4, s_4_6, 3, 8, 0},
+/*  7 */ { 5, s_4_7, 3, 14, 0},
+/*  8 */ { 6, s_4_8, 3, 15, 0},
+/*  9 */ { 5, s_4_9, 3, 10, 0},
+/* 10 */ { 5, s_4_10, 3, 5, 0},
+/* 11 */ { 5, s_4_11, -1, 8, 0},
+/* 12 */ { 6, s_4_12, -1, 12, 0},
+/* 13 */ { 5, s_4_13, -1, 11, 0},
+/* 14 */ { 6, s_4_14, -1, 1, 0},
+/* 15 */ { 7, s_4_15, 14, 7, 0},
+/* 16 */ { 5, s_4_16, -1, 8, 0},
+/* 17 */ { 5, s_4_17, -1, 7, 0},
+/* 18 */ { 7, s_4_18, 17, 6, 0},
+/* 19 */ { 4, s_4_19, -1, 6, 0},
+/* 20 */ { 4, s_4_20, -1, 7, 0},
+/* 21 */ { 7, s_4_21, -1, 11, 0},
+/* 22 */ { 7, s_4_22, -1, 9, 0},
+/* 23 */ { 7, s_4_23, -1, 10, 0}
+};
+
+static symbol s_5_0[5] = { 'i', 'c', 'a', 't', 'e' };
+static symbol s_5_1[5] = { 'a', 't', 'i', 'v', 'e' };
+static symbol s_5_2[5] = { 'a', 'l', 'i', 'z', 'e' };
+static symbol s_5_3[5] = { 'i', 'c', 'i', 't', 'i' };
+static symbol s_5_4[4] = { 'i', 'c', 'a', 'l' };
+static symbol s_5_5[6] = { 't', 'i', 'o', 'n', 'a', 'l' };
+static symbol s_5_6[7] = { 'a', 't', 'i', 'o', 'n', 'a', 'l' };
+static symbol s_5_7[3] = { 'f', 'u', 'l' };
+static symbol s_5_8[4] = { 'n', 'e', 's', 's' };
+
+static struct among a_5[9] =
+{
+/*  0 */ { 5, s_5_0, -1, 4, 0},
+/*  1 */ { 5, s_5_1, -1, 6, 0},
+/*  2 */ { 5, s_5_2, -1, 3, 0},
+/*  3 */ { 5, s_5_3, -1, 4, 0},
+/*  4 */ { 4, s_5_4, -1, 4, 0},
+/*  5 */ { 6, s_5_5, -1, 1, 0},
+/*  6 */ { 7, s_5_6, 5, 2, 0},
+/*  7 */ { 3, s_5_7, -1, 5, 0},
+/*  8 */ { 4, s_5_8, -1, 5, 0}
+};
+
+static symbol s_6_0[2] = { 'i', 'c' };
+static symbol s_6_1[4] = { 'a', 'n', 'c', 'e' };
+static symbol s_6_2[4] = { 'e', 'n', 'c', 'e' };
+static symbol s_6_3[4] = { 'a', 'b', 'l', 'e' };
+static symbol s_6_4[4] = { 'i', 'b', 'l', 'e' };
+static symbol s_6_5[3] = { 'a', 't', 'e' };
+static symbol s_6_6[3] = { 'i', 'v', 'e' };
+static symbol s_6_7[3] = { 'i', 'z', 'e' };
+static symbol s_6_8[3] = { 'i', 't', 'i' };
+static symbol s_6_9[2] = { 'a', 'l' };
+static symbol s_6_10[3] = { 'i', 's', 'm' };
+static symbol s_6_11[3] = { 'i', 'o', 'n' };
+static symbol s_6_12[2] = { 'e', 'r' };
+static symbol s_6_13[3] = { 'o', 'u', 's' };
+static symbol s_6_14[3] = { 'a', 'n', 't' };
+static symbol s_6_15[3] = { 'e', 'n', 't' };
+static symbol s_6_16[4] = { 'm', 'e', 'n', 't' };
+static symbol s_6_17[5] = { 'e', 'm', 'e', 'n', 't' };
+
+static struct among a_6[18] =
+{
+/*  0 */ { 2, s_6_0, -1, 1, 0},
+/*  1 */ { 4, s_6_1, -1, 1, 0},
+/*  2 */ { 4, s_6_2, -1, 1, 0},
+/*  3 */ { 4, s_6_3, -1, 1, 0},
+/*  4 */ { 4, s_6_4, -1, 1, 0},
+/*  5 */ { 3, s_6_5, -1, 1, 0},
+/*  6 */ { 3, s_6_6, -1, 1, 0},
+/*  7 */ { 3, s_6_7, -1, 1, 0},
+/*  8 */ { 3, s_6_8, -1, 1, 0},
+/*  9 */ { 2, s_6_9, -1, 1, 0},
+/* 10 */ { 3, s_6_10, -1, 1, 0},
+/* 11 */ { 3, s_6_11, -1, 2, 0},
+/* 12 */ { 2, s_6_12, -1, 1, 0},
+/* 13 */ { 3, s_6_13, -1, 1, 0},
+/* 14 */ { 3, s_6_14, -1, 1, 0},
+/* 15 */ { 3, s_6_15, -1, 1, 0},
+/* 16 */ { 4, s_6_16, 15, 1, 0},
+/* 17 */ { 5, s_6_17, 16, 1, 0}
+};
+
+static symbol s_7_0[1] = { 'e' };
+static symbol s_7_1[1] = { 'l' };
+
+static struct among a_7[2] =
+{
+/*  0 */ { 1, s_7_0, -1, 1, 0},
+/*  1 */ { 1, s_7_1, -1, 2, 0}
+};
+
+static symbol s_8_0[7] = { 's', 'u', 'c', 'c', 'e', 'e', 'd' };
+static symbol s_8_1[7] = { 'p', 'r', 'o', 'c', 'e', 'e', 'd' };
+static symbol s_8_2[6] = { 'e', 'x', 'c', 'e', 'e', 'd' };
+static symbol s_8_3[7] = { 'c', 'a', 'n', 'n', 'i', 'n', 'g' };
+static symbol s_8_4[6] = { 'i', 'n', 'n', 'i', 'n', 'g' };
+static symbol s_8_5[7] = { 'e', 'a', 'r', 'r', 'i', 'n', 'g' };
+static symbol s_8_6[7] = { 'h', 'e', 'r', 'r', 'i', 'n', 'g' };
+static symbol s_8_7[6] = { 'o', 'u', 't', 'i', 'n', 'g' };
+
+static struct among a_8[8] =
+{
+/*  0 */ { 7, s_8_0, -1, -1, 0},
+/*  1 */ { 7, s_8_1, -1, -1, 0},
+/*  2 */ { 6, s_8_2, -1, -1, 0},
+/*  3 */ { 7, s_8_3, -1, -1, 0},
+/*  4 */ { 6, s_8_4, -1, -1, 0},
+/*  5 */ { 7, s_8_5, -1, -1, 0},
+/*  6 */ { 7, s_8_6, -1, -1, 0},
+/*  7 */ { 6, s_8_7, -1, -1, 0}
+};
+
+static symbol s_9_0[5] = { 'a', 'n', 'd', 'e', 's' };
+static symbol s_9_1[5] = { 'a', 't', 'l', 'a', 's' };
+static symbol s_9_2[4] = { 'b', 'i', 'a', 's' };
+static symbol s_9_3[6] = { 'c', 'o', 's', 'm', 'o', 's' };
+static symbol s_9_4[5] = { 'd', 'y', 'i', 'n', 'g' };
+static symbol s_9_5[5] = { 'e', 'a', 'r', 'l', 'y' };
+static symbol s_9_6[6] = { 'g', 'e', 'n', 't', 'l', 'y' };
+static symbol s_9_7[4] = { 'h', 'o', 'w', 'e' };
+static symbol s_9_8[4] = { 'i', 'd', 'l', 'y' };
+static symbol s_9_9[5] = { 'l', 'y', 'i', 'n', 'g' };
+static symbol s_9_10[4] = { 'n', 'e', 'w', 's' };
+static symbol s_9_11[4] = { 'o', 'n', 'l', 'y' };
+static symbol s_9_12[6] = { 's', 'i', 'n', 'g', 'l', 'y' };
+static symbol s_9_13[5] = { 's', 'k', 'i', 'e', 's' };
+static symbol s_9_14[4] = { 's', 'k', 'i', 's' };
+static symbol s_9_15[3] = { 's', 'k', 'y' };
+static symbol s_9_16[5] = { 't', 'y', 'i', 'n', 'g' };
+static symbol s_9_17[4] = { 'u', 'g', 'l', 'y' };
+
+static struct among a_9[18] =
+{
+/*  0 */ { 5, s_9_0, -1, -1, 0},
+/*  1 */ { 5, s_9_1, -1, -1, 0},
+/*  2 */ { 4, s_9_2, -1, -1, 0},
+/*  3 */ { 6, s_9_3, -1, -1, 0},
+/*  4 */ { 5, s_9_4, -1, 3, 0},
+/*  5 */ { 5, s_9_5, -1, 9, 0},
+/*  6 */ { 6, s_9_6, -1, 7, 0},
+/*  7 */ { 4, s_9_7, -1, -1, 0},
+/*  8 */ { 4, s_9_8, -1, 6, 0},
+/*  9 */ { 5, s_9_9, -1, 4, 0},
+/* 10 */ { 4, s_9_10, -1, -1, 0},
+/* 11 */ { 4, s_9_11, -1, 10, 0},
+/* 12 */ { 6, s_9_12, -1, 11, 0},
+/* 13 */ { 5, s_9_13, -1, 2, 0},
+/* 14 */ { 4, s_9_14, -1, 1, 0},
+/* 15 */ { 3, s_9_15, -1, -1, 0},
+/* 16 */ { 5, s_9_16, -1, 5, 0},
+/* 17 */ { 4, s_9_17, -1, 8, 0}
+};
+
+static unsigned char g_v[] = { 17, 65, 16, 1 };
+
+static unsigned char g_v_WXY[] = { 1, 17, 65, 208, 1 };
+
+static unsigned char g_valid_LI[] = { 55, 141, 2 };
+
+static symbol s_0[] = { 'y' };
+static symbol s_1[] = { 'Y' };
+static symbol s_2[] = { 'y' };
+static symbol s_3[] = { 'Y' };
+static symbol s_4[] = { 's', 's' };
+static symbol s_5[] = { 'i', 'e' };
+static symbol s_6[] = { 'i' };
+static symbol s_7[] = { 'e', 'e' };
+static symbol s_8[] = { 'e' };
+static symbol s_9[] = { 'e' };
+static symbol s_10[] = { 'y' };
+static symbol s_11[] = { 'Y' };
+static symbol s_12[] = { 'i' };
+static symbol s_13[] = { 't', 'i', 'o', 'n' };
+static symbol s_14[] = { 'e', 'n', 'c', 'e' };
+static symbol s_15[] = { 'a', 'n', 'c', 'e' };
+static symbol s_16[] = { 'a', 'b', 'l', 'e' };
+static symbol s_17[] = { 'e', 'n', 't' };
+static symbol s_18[] = { 'i', 'z', 'e' };
+static symbol s_19[] = { 'a', 't', 'e' };
+static symbol s_20[] = { 'a', 'l' };
+static symbol s_21[] = { 'f', 'u', 'l' };
+static symbol s_22[] = { 'o', 'u', 's' };
+static symbol s_23[] = { 'i', 'v', 'e' };
+static symbol s_24[] = { 'b', 'l', 'e' };
+static symbol s_25[] = { 'l' };
+static symbol s_26[] = { 'o', 'g' };
+static symbol s_27[] = { 'f', 'u', 'l' };
+static symbol s_28[] = { 'l', 'e', 's', 's' };
+static symbol s_29[] = { 't', 'i', 'o', 'n' };
+static symbol s_30[] = { 'a', 't', 'e' };
+static symbol s_31[] = { 'a', 'l' };
+static symbol s_32[] = { 'i', 'c' };
+static symbol s_33[] = { 's' };
+static symbol s_34[] = { 't' };
+static symbol s_35[] = { 'l' };
+static symbol s_36[] = { 's', 'k', 'i' };
+static symbol s_37[] = { 's', 'k', 'y' };
+static symbol s_38[] = { 'd', 'i', 'e' };
+static symbol s_39[] = { 'l', 'i', 'e' };
+static symbol s_40[] = { 't', 'i', 'e' };
+static symbol s_41[] = { 'i', 'd', 'l' };
+static symbol s_42[] = { 'g', 'e', 'n', 't', 'l' };
+static symbol s_43[] = { 'u', 'g', 'l', 'i' };
+static symbol s_44[] = { 'e', 'a', 'r', 'l', 'i' };
+static symbol s_45[] = { 'o', 'n', 'l', 'i' };
+static symbol s_46[] = { 's', 'i', 'n', 'g', 'l' };
+static symbol s_47[] = { 'Y' };
+static symbol s_48[] = { 'y' };
+
+static int r_prelude(struct SN_env * z) {
+    z->B[0] = 0; /* unset Y_found, line 24 */
+    {   int c = z->c; /* do, line 25 */
+        z->bra = z->c; /* [, line 25 */
+        if (!(eq_s(z, 1, s_0))) goto lab0;
+        z->ket = z->c; /* ], line 25 */
+        if (!(in_grouping(z, g_v, 97, 121))) goto lab0;
+        slice_from_s(z, 1, s_1); /* <-, line 25 */
+        z->B[0] = 1; /* set Y_found, line 25 */
+    lab0:
+        z->c = c;
+    }
+    {   int c = z->c; /* do, line 26 */
+        while(1) { /* repeat, line 26 */
+            int c = z->c;
+            while(1) { /* goto, line 26 */
+                int c = z->c;
+                if (!(in_grouping(z, g_v, 97, 121))) goto lab3;
+                z->bra = z->c; /* [, line 26 */
+                if (!(eq_s(z, 1, s_2))) goto lab3;
+                z->ket = z->c; /* ], line 26 */
+                z->c = c;
+                break;
+            lab3:
+                z->c = c;
+                if (z->c >= z->l) goto lab2;
+                z->c++;
+            }
+            slice_from_s(z, 1, s_3); /* <-, line 26 */
+            z->B[0] = 1; /* set Y_found, line 26 */
+            continue;
+        lab2:
+            z->c = c;
+            break;
+        }
+    lab1:
+        z->c = c;
+    }
+    return 1;
+}
+
+static int r_mark_regions(struct SN_env * z) {
+    z->I[0] = z->l;
+    z->I[1] = z->l;
+    {   int c = z->c; /* do, line 32 */
+        {   int c = z->c; /* or, line 36 */
+            if (!(find_among(z, a_0, 1))) goto lab2; /* among, line 33 */
+            goto lab1;
+        lab2:
+            z->c = c;
+            while(1) { /* gopast, line 36 */
+                if (!(in_grouping(z, g_v, 97, 121))) goto lab3;
+                break;
+            lab3:
+                if (z->c >= z->l) goto lab0;
+                z->c++;
+            }
+            while(1) { /* gopast, line 36 */
+                if (!(out_grouping(z, g_v, 97, 121))) goto lab4;
+                break;
+            lab4:
+                if (z->c >= z->l) goto lab0;
+                z->c++;
+            }
+        }
+    lab1:
+        z->I[0] = z->c; /* setmark p1, line 37 */
+        while(1) { /* gopast, line 38 */
+            if (!(in_grouping(z, g_v, 97, 121))) goto lab5;
+            break;
+        lab5:
+            if (z->c >= z->l) goto lab0;
+            z->c++;
+        }
+        while(1) { /* gopast, line 38 */
+            if (!(out_grouping(z, g_v, 97, 121))) goto lab6;
+            break;
+        lab6:
+            if (z->c >= z->l) goto lab0;
+            z->c++;
+        }
+        z->I[1] = z->c; /* setmark p2, line 38 */
+    lab0:
+        z->c = c;
+    }
+    return 1;
+}
+
+static int r_shortv(struct SN_env * z) {
+    {   int m = z->l - z->c; /* or, line 46 */
+        if (!(out_grouping_b(z, g_v_WXY, 89, 121))) goto lab1;
+        if (!(in_grouping_b(z, g_v, 97, 121))) goto lab1;
+        if (!(out_grouping_b(z, g_v, 97, 121))) goto lab1;
+        goto lab0;
+    lab1:
+        z->c = z->l - m;
+        if (!(out_grouping_b(z, g_v, 97, 121))) return 0;
+        if (!(in_grouping_b(z, g_v, 97, 121))) return 0;
+        if (z->c > z->lb) return 0; /* atlimit, line 47 */
+    }
+lab0:
+    return 1;
+}
+
+static int r_R1(struct SN_env * z) {
+    if (!(z->I[0] <= z->c)) return 0;
+    return 1;
+}
+
+static int r_R2(struct SN_env * z) {
+    if (!(z->I[1] <= z->c)) return 0;
+    return 1;
+}
+
+static int r_Step_1a(struct SN_env * z) {
+    int among_var;
+    z->ket = z->c; /* [, line 54 */
+    among_var = find_among_b(z, a_1, 6); /* substring, line 54 */
+    if (!(among_var)) return 0;
+    z->bra = z->c; /* ], line 54 */
+    switch(among_var) {
+        case 0: return 0;
+        case 1:
+            slice_from_s(z, 2, s_4); /* <-, line 55 */
+            break;
+        case 2:
+            {   int m = z->l - z->c; /* or, line 57 */
+                if (z->c <= z->lb) goto lab1;
+                z->c--; /* next, line 57 */
+                if (z->c > z->lb) goto lab1; /* atlimit, line 57 */
+                slice_from_s(z, 2, s_5); /* <-, line 57 */
+                goto lab0;
+            lab1:
+                z->c = z->l - m;
+                slice_from_s(z, 1, s_6); /* <-, line 57 */
+            }
+        lab0:
+            break;
+        case 3:
+            if (z->c <= z->lb) return 0;
+            z->c--; /* next, line 58 */
+            while(1) { /* gopast, line 58 */
+                if (!(in_grouping_b(z, g_v, 97, 121))) goto lab2;
+                break;
+            lab2:
+                if (z->c <= z->lb) return 0;
+                z->c--;
+            }
+            slice_del(z); /* delete, line 58 */
+            break;
+    }
+    return 1;
+}
+
+static int r_Step_1b(struct SN_env * z) {
+    int among_var;
+    z->ket = z->c; /* [, line 64 */
+    among_var = find_among_b(z, a_3, 6); /* substring, line 64 */
+    if (!(among_var)) return 0;
+    z->bra = z->c; /* ], line 64 */
+    switch(among_var) {
+        case 0: return 0;
+        case 1:
+            if (!r_R1(z)) return 0; /* call R1, line 66 */
+            slice_from_s(z, 2, s_7); /* <-, line 66 */
+            break;
+        case 2:
+            {   int m_test = z->l - z->c; /* test, line 69 */
+                while(1) { /* gopast, line 69 */
+                    if (!(in_grouping_b(z, g_v, 97, 121))) goto lab0;
+                    break;
+                lab0:
+                    if (z->c <= z->lb) return 0;
+                    z->c--;
+                }
+                z->c = z->l - m_test;
+            }
+            slice_del(z); /* delete, line 69 */
+            {   int m_test = z->l - z->c; /* test, line 70 */
+                among_var = find_among_b(z, a_2, 13); /* substring, line 70 */
+                if (!(among_var)) return 0;
+                z->c = z->l - m_test;
+            }
+            switch(among_var) {
+                case 0: return 0;
+                case 1:
+                    {   int c = z->c;
+                        insert_s(z, z->c, z->c, 1, s_8); /* <+, line 72 */
+                        z->c = c;
+                    }
+                    break;
+                case 2:
+                    z->ket = z->c; /* [, line 75 */
+                    if (z->c <= z->lb) return 0;
+                    z->c--; /* next, line 75 */
+                    z->bra = z->c; /* ], line 75 */
+                    slice_del(z); /* delete, line 75 */
+                    break;
+                case 3:
+                    if (z->c != z->I[0]) return 0; /* atmark, line 76 */
+                    {   int m_test = z->l - z->c; /* test, line 76 */
+                        if (!r_shortv(z)) return 0; /* call shortv, line 76 */
+                        z->c = z->l - m_test;
+                    }
+                    {   int c = z->c;
+                        insert_s(z, z->c, z->c, 1, s_9); /* <+, line 76 */
+                        z->c = c;
+                    }
+                    break;
+            }
+            break;
+    }
+    return 1;
+}
+
+static int r_Step_1c(struct SN_env * z) {
+    z->ket = z->c; /* [, line 83 */
+    {   int m = z->l - z->c; /* or, line 83 */
+        if (!(eq_s_b(z, 1, s_10))) goto lab1;
+        goto lab0;
+    lab1:
+        z->c = z->l - m;
+        if (!(eq_s_b(z, 1, s_11))) return 0;
+    }
+lab0:
+    z->bra = z->c; /* ], line 83 */
+    if (!(out_grouping_b(z, g_v, 97, 121))) return 0;
+    {   int m = z->l - z->c; /* not, line 84 */
+        if (z->c > z->lb) goto lab2; /* atlimit, line 84 */
+        return 0;
+    lab2:
+        z->c = z->l - m;
+    }
+    slice_from_s(z, 1, s_12); /* <-, line 85 */
+    return 1;
+}
+
+static int r_Step_2(struct SN_env * z) {
+    int among_var;
+    z->ket = z->c; /* [, line 89 */
+    among_var = find_among_b(z, a_4, 24); /* substring, line 89 */
+    if (!(among_var)) return 0;
+    z->bra = z->c; /* ], line 89 */
+    if (!r_R1(z)) return 0; /* call R1, line 89 */
+    switch(among_var) {
+        case 0: return 0;
+        case 1:
+            slice_from_s(z, 4, s_13); /* <-, line 90 */
+            break;
+        case 2:
+            slice_from_s(z, 4, s_14); /* <-, line 91 */
+            break;
+        case 3:
+            slice_from_s(z, 4, s_15); /* <-, line 92 */
+            break;
+        case 4:
+            slice_from_s(z, 4, s_16); /* <-, line 93 */
+            break;
+        case 5:
+            slice_from_s(z, 3, s_17); /* <-, line 94 */
+            break;
+        case 6:
+            slice_from_s(z, 3, s_18); /* <-, line 96 */
+            break;
+        case 7:
+            slice_from_s(z, 3, s_19); /* <-, line 98 */
+            break;
+        case 8:
+            slice_from_s(z, 2, s_20); /* <-, line 100 */
+            break;
+        case 9:
+            slice_from_s(z, 3, s_21); /* <-, line 101 */
+            break;
+        case 10:
+            slice_from_s(z, 3, s_22); /* <-, line 103 */
+            break;
+        case 11:
+            slice_from_s(z, 3, s_23); /* <-, line 105 */
+            break;
+        case 12:
+            slice_from_s(z, 3, s_24); /* <-, line 107 */
+            break;
+        case 13:
+            if (!(eq_s_b(z, 1, s_25))) return 0;
+            slice_from_s(z, 2, s_26); /* <-, line 108 */
+            break;
+        case 14:
+            slice_from_s(z, 3, s_27); /* <-, line 109 */
+            break;
+        case 15:
+            slice_from_s(z, 4, s_28); /* <-, line 110 */
+            break;
+        case 16:
+            if (!(in_grouping_b(z, g_valid_LI, 99, 116))) return 0;
+            slice_del(z); /* delete, line 111 */
+            break;
+    }
+    return 1;
+}
+
+static int r_Step_3(struct SN_env * z) {
+    int among_var;
+    z->ket = z->c; /* [, line 116 */
+    among_var = find_among_b(z, a_5, 9); /* substring, line 116 */
+    if (!(among_var)) return 0;
+    z->bra = z->c; /* ], line 116 */
+    if (!r_R1(z)) return 0; /* call R1, line 116 */
+    switch(among_var) {
+        case 0: return 0;
+        case 1:
+            slice_from_s(z, 4, s_29); /* <-, line 117 */
+            break;
+        case 2:
+            slice_from_s(z, 3, s_30); /* <-, line 118 */
+            break;
+        case 3:
+            slice_from_s(z, 2, s_31); /* <-, line 119 */
+            break;
+        case 4:
+            slice_from_s(z, 2, s_32); /* <-, line 121 */
+            break;
+        case 5:
+            slice_del(z); /* delete, line 123 */
+            break;
+        case 6:
+            if (!r_R2(z)) return 0; /* call R2, line 125 */
+            slice_del(z); /* delete, line 125 */
+            break;
+    }
+    return 1;
+}
+
+static int r_Step_4(struct SN_env * z) {
+    int among_var;
+    z->ket = z->c; /* [, line 130 */
+    among_var = find_among_b(z, a_6, 18); /* substring, line 130 */
+    if (!(among_var)) return 0;
+    z->bra = z->c; /* ], line 130 */
+    if (!r_R2(z)) return 0; /* call R2, line 130 */
+    switch(among_var) {
+        case 0: return 0;
+        case 1:
+            slice_del(z); /* delete, line 133 */
+            break;
+        case 2:
+            {   int m = z->l - z->c; /* or, line 134 */
+                if (!(eq_s_b(z, 1, s_33))) goto lab1;
+                goto lab0;
+            lab1:
+                z->c = z->l - m;
+                if (!(eq_s_b(z, 1, s_34))) return 0;
+            }
+        lab0:
+            slice_del(z); /* delete, line 134 */
+            break;
+    }
+    return 1;
+}
+
+static int r_Step_5(struct SN_env * z) {
+    int among_var;
+    z->ket = z->c; /* [, line 139 */
+    among_var = find_among_b(z, a_7, 2); /* substring, line 139 */
+    if (!(among_var)) return 0;
+    z->bra = z->c; /* ], line 139 */
+    switch(among_var) {
+        case 0: return 0;
+        case 1:
+            {   int m = z->l - z->c; /* or, line 140 */
+                if (!r_R2(z)) goto lab1; /* call R2, line 140 */
+                goto lab0;
+            lab1:
+                z->c = z->l - m;
+                if (!r_R1(z)) return 0; /* call R1, line 140 */
+                {   int m = z->l - z->c; /* not, line 140 */
+                    if (!r_shortv(z)) goto lab2; /* call shortv, line 140 */
+                    return 0;
+                lab2:
+                    z->c = z->l - m;
+                }
+            }
+        lab0:
+            slice_del(z); /* delete, line 140 */
+            break;
+        case 2:
+            if (!r_R2(z)) return 0; /* call R2, line 141 */
+            if (!(eq_s_b(z, 1, s_35))) return 0;
+            slice_del(z); /* delete, line 141 */
+            break;
+    }
+    return 1;
+}
+
+static int r_exception2(struct SN_env * z) {
+    z->ket = z->c; /* [, line 147 */
+    if (!(find_among_b(z, a_8, 8))) return 0; /* substring, line 147 */
+    z->bra = z->c; /* ], line 147 */
+    if (z->c > z->lb) return 0; /* atlimit, line 147 */
+    return 1;
+}
+
+static int r_exception1(struct SN_env * z) {
+    int among_var;
+    z->bra = z->c; /* [, line 159 */
+    among_var = find_among(z, a_9, 18); /* substring, line 159 */
+    if (!(among_var)) return 0;
+    z->ket = z->c; /* ], line 159 */
+    if (z->c < z->l) return 0; /* atlimit, line 159 */
+    switch(among_var) {
+        case 0: return 0;
+        case 1:
+            slice_from_s(z, 3, s_36); /* <-, line 163 */
+            break;
+        case 2:
+            slice_from_s(z, 3, s_37); /* <-, line 164 */
+            break;
+        case 3:
+            slice_from_s(z, 3, s_38); /* <-, line 165 */
+            break;
+        case 4:
+            slice_from_s(z, 3, s_39); /* <-, line 166 */
+            break;
+        case 5:
+            slice_from_s(z, 3, s_40); /* <-, line 167 */
+            break;
+        case 6:
+            slice_from_s(z, 3, s_41); /* <-, line 171 */
+            break;
+        case 7:
+            slice_from_s(z, 5, s_42); /* <-, line 172 */
+            break;
+        case 8:
+            slice_from_s(z, 4, s_43); /* <-, line 173 */
+            break;
+        case 9:
+            slice_from_s(z, 5, s_44); /* <-, line 174 */
+            break;
+        case 10:
+            slice_from_s(z, 4, s_45); /* <-, line 175 */
+            break;
+        case 11:
+            slice_from_s(z, 5, s_46); /* <-, line 176 */
+            break;
+    }
+    return 1;
+}
+
+static int r_postlude(struct SN_env * z) {
+    if (!(z->B[0])) return 0; /* Boolean test Y_found, line 192 */
+    while(1) { /* repeat, line 192 */
+        int c = z->c;
+        while(1) { /* goto, line 192 */
+            int c = z->c;
+            z->bra = z->c; /* [, line 192 */
+            if (!(eq_s(z, 1, s_47))) goto lab1;
+            z->ket = z->c; /* ], line 192 */
+            z->c = c;
+            break;
+        lab1:
+            z->c = c;
+            if (z->c >= z->l) goto lab0;
+            z->c++;
+        }
+        slice_from_s(z, 1, s_48); /* <-, line 192 */
+        continue;
+    lab0:
+        z->c = c;
+        break;
+    }
+    return 1;
+}
+
+extern int english_stem(struct SN_env * z) {
+    {   int c = z->c; /* or, line 196 */
+        if (!r_exception1(z)) goto lab1; /* call exception1, line 196 */
+        goto lab0;
+    lab1:
+        z->c = c;
+        {   int c_test = z->c; /* test, line 198 */
+            {   int c = z->c + 3;
+                if (0 > c || c > z->l) return 0;
+                z->c = c; /* hop, line 198 */
+            }
+            z->c = c_test;
+        }
+        {   int c = z->c; /* do, line 199 */
+            if (!r_prelude(z)) goto lab2; /* call prelude, line 199 */
+        lab2:
+            z->c = c;
+        }
+        {   int c = z->c; /* do, line 200 */
+            if (!r_mark_regions(z)) goto lab3; /* call mark_regions, line 200 */
+        lab3:
+            z->c = c;
+        }
+        z->lb = z->c; z->c = z->l; /* backwards, line 201 */
+
+        {   int m = z->l - z->c; /* do, line 203 */
+            if (!r_Step_1a(z)) goto lab4; /* call Step_1a, line 203 */
+        lab4:
+            z->c = z->l - m;
+        }
+        {   int m = z->l - z->c; /* or, line 205 */
+            if (!r_exception2(z)) goto lab6; /* call exception2, line 205 */
+            goto lab5;
+        lab6:
+            z->c = z->l - m;
+            {   int m = z->l - z->c; /* do, line 207 */
+                if (!r_Step_1b(z)) goto lab7; /* call Step_1b, line 207 */
+            lab7:
+                z->c = z->l - m;
+            }
+            {   int m = z->l - z->c; /* do, line 208 */
+                if (!r_Step_1c(z)) goto lab8; /* call Step_1c, line 208 */
+            lab8:
+                z->c = z->l - m;
+            }
+            {   int m = z->l - z->c; /* do, line 210 */
+                if (!r_Step_2(z)) goto lab9; /* call Step_2, line 210 */
+            lab9:
+                z->c = z->l - m;
+            }
+            {   int m = z->l - z->c; /* do, line 211 */
+                if (!r_Step_3(z)) goto lab10; /* call Step_3, line 211 */
+            lab10:
+                z->c = z->l - m;
+            }
+            {   int m = z->l - z->c; /* do, line 212 */
+                if (!r_Step_4(z)) goto lab11; /* call Step_4, line 212 */
+            lab11:
+                z->c = z->l - m;
+            }
+            {   int m = z->l - z->c; /* do, line 214 */
+                if (!r_Step_5(z)) goto lab12; /* call Step_5, line 214 */
+            lab12:
+                z->c = z->l - m;
+            }
+        }
+    lab5:
+        z->c = z->lb;
+        {   int c = z->c; /* do, line 217 */
+            if (!r_postlude(z)) goto lab13; /* call postlude, line 217 */
+        lab13:
+            z->c = c;
+        }
+    }
+lab0:
+    return 1;
+}
+
+extern struct SN_env * english_create_env(void) { return SN_create_env(0, 2, 1); }
+
+extern void english_close_env(struct SN_env * z) { SN_close_env(z); }
+


diff --git a/contrib/tsearch2/snowball/english_stem.h b/contrib/tsearch2/snowball/english_stem.h

new file mode 100644 (file)

index 0000000..bfefcd5


--- /dev/null
+++ b/contrib/tsearch2/snowball/english_stem.h
@@ -0,0 +1,8 @@
+
+/* This file was generated automatically by the Snowball to ANSI C compiler */
+
+extern struct SN_env * english_create_env(void);
+extern void english_close_env(struct SN_env * z);
+
+extern int english_stem(struct SN_env * z);
+


diff --git a/contrib/tsearch2/snowball/header.h b/contrib/tsearch2/snowball/header.h

new file mode 100644 (file)

index 0000000..aaec3ae


--- /dev/null
+++ b/contrib/tsearch2/snowball/header.h
@@ -0,0 +1,57 @@
+
+#include 
+
+#include "api.h"
+
+#define MAXINT INT_MAX
+#define MININT INT_MIN
+
+#define HEAD 2*sizeof(int)
+
+#define SIZE(p)        ((int *)(p))[-1]
+#define SET_SIZE(p, n) ((int *)(p))[-1] = n
+#define CAPACITY(p)    ((int *)(p))[-2]
+
+struct among
+{   int s_size;     /* number of chars in string */
+    symbol * s;       /* search string */
+    int substring_i;/* index to longest matching substring */
+    int result;     /* result of the lookup */
+    int (* function)(struct SN_env *);
+};
+
+extern symbol * create_s(void);
+extern void lose_s(symbol * p);
+
+extern int in_grouping(struct SN_env * z, unsigned char * s, int min, int max);
+extern int in_grouping_b(struct SN_env * z, unsigned char * s, int min, int max);
+extern int out_grouping(struct SN_env * z, unsigned char * s, int min, int max);
+extern int out_grouping_b(struct SN_env * z, unsigned char * s, int min, int max);
+
+extern int in_range(struct SN_env * z, int min, int max);
+extern int in_range_b(struct SN_env * z, int min, int max);
+extern int out_range(struct SN_env * z, int min, int max);
+extern int out_range_b(struct SN_env * z, int min, int max);
+
+extern int eq_s(struct SN_env * z, int s_size, symbol * s);
+extern int eq_s_b(struct SN_env * z, int s_size, symbol * s);
+extern int eq_v(struct SN_env * z, symbol * p);
+extern int eq_v_b(struct SN_env * z, symbol * p);
+
+extern int find_among(struct SN_env * z, struct among * v, int v_size);
+extern int find_among_b(struct SN_env * z, struct among * v, int v_size);
+
+extern symbol * increase_size(symbol * p, int n);
+extern int replace_s(struct SN_env * z, int c_bra, int c_ket, int s_size, const symbol * s);
+extern void slice_from_s(struct SN_env * z, int s_size, symbol * s);
+extern void slice_from_v(struct SN_env * z, symbol * p);
+extern void slice_del(struct SN_env * z);
+
+extern void insert_s(struct SN_env * z, int bra, int ket, int s_size, symbol * s);
+extern void insert_v(struct SN_env * z, int bra, int ket, symbol * p);
+
+extern symbol * slice_to(struct SN_env * z, symbol * p);
+extern symbol * assign_to(struct SN_env * z, symbol * p);
+
+extern void debug(struct SN_env * z, int number, int line_count);
+


diff --git a/contrib/tsearch2/snowball/russian_stem.c b/contrib/tsearch2/snowball/russian_stem.c

new file mode 100644 (file)

index 0000000..14fd491


--- /dev/null
+++ b/contrib/tsearch2/snowball/russian_stem.c
@@ -0,0 +1,626 @@
+
+/* This file was generated automatically by the Snowball to ANSI C compiler */
+
+#include "header.h"
+
+extern int russian_stem(struct SN_env * z);
+static int r_tidy_up(struct SN_env * z);
+static int r_derivational(struct SN_env * z);
+static int r_noun(struct SN_env * z);
+static int r_verb(struct SN_env * z);
+static int r_reflexive(struct SN_env * z);
+static int r_adjectival(struct SN_env * z);
+static int r_adjective(struct SN_env * z);
+static int r_perfective_gerund(struct SN_env * z);
+static int r_R2(struct SN_env * z);
+static int r_mark_regions(struct SN_env * z);
+
+extern struct SN_env * russian_create_env(void);
+extern void russian_close_env(struct SN_env * z);
+
+static symbol s_0_0[3] = { 215, 219, 201 };
+static symbol s_0_1[4] = { 201, 215, 219, 201 };
+static symbol s_0_2[4] = { 217, 215, 219, 201 };
+static symbol s_0_3[1] = { 215 };
+static symbol s_0_4[2] = { 201, 215 };
+static symbol s_0_5[2] = { 217, 215 };
+static symbol s_0_6[5] = { 215, 219, 201, 211, 216 };
+static symbol s_0_7[6] = { 201, 215, 219, 201, 211, 216 };
+static symbol s_0_8[6] = { 217, 215, 219, 201, 211, 216 };
+
+static struct among a_0[9] =
+{
+/*  0 */ { 3, s_0_0, -1, 1, 0},
+/*  1 */ { 4, s_0_1, 0, 2, 0},
+/*  2 */ { 4, s_0_2, 0, 2, 0},
+/*  3 */ { 1, s_0_3, -1, 1, 0},
+/*  4 */ { 2, s_0_4, 3, 2, 0},
+/*  5 */ { 2, s_0_5, 3, 2, 0},
+/*  6 */ { 5, s_0_6, -1, 1, 0},
+/*  7 */ { 6, s_0_7, 6, 2, 0},
+/*  8 */ { 6, s_0_8, 6, 2, 0}
+};
+
+static symbol s_1_0[2] = { 192, 192 };
+static symbol s_1_1[2] = { 197, 192 };
+static symbol s_1_2[2] = { 207, 192 };
+static symbol s_1_3[2] = { 213, 192 };
+static symbol s_1_4[2] = { 197, 197 };
+static symbol s_1_5[2] = { 201, 197 };
+static symbol s_1_6[2] = { 207, 197 };
+static symbol s_1_7[2] = { 217, 197 };
+static symbol s_1_8[2] = { 201, 200 };
+static symbol s_1_9[2] = { 217, 200 };
+static symbol s_1_10[3] = { 201, 205, 201 };
+static symbol s_1_11[3] = { 217, 205, 201 };
+static symbol s_1_12[2] = { 197, 202 };
+static symbol s_1_13[2] = { 201, 202 };
+static symbol s_1_14[2] = { 207, 202 };
+static symbol s_1_15[2] = { 217, 202 };
+static symbol s_1_16[2] = { 197, 205 };
+static symbol s_1_17[2] = { 201, 205 };
+static symbol s_1_18[2] = { 207, 205 };
+static symbol s_1_19[2] = { 217, 205 };
+static symbol s_1_20[3] = { 197, 199, 207 };
+static symbol s_1_21[3] = { 207, 199, 207 };
+static symbol s_1_22[2] = { 193, 209 };
+static symbol s_1_23[2] = { 209, 209 };
+static symbol s_1_24[3] = { 197, 205, 213 };
+static symbol s_1_25[3] = { 207, 205, 213 };
+
+static struct among a_1[26] =
+{
+/*  0 */ { 2, s_1_0, -1, 1, 0},
+/*  1 */ { 2, s_1_1, -1, 1, 0},
+/*  2 */ { 2, s_1_2, -1, 1, 0},
+/*  3 */ { 2, s_1_3, -1, 1, 0},
+/*  4 */ { 2, s_1_4, -1, 1, 0},
+/*  5 */ { 2, s_1_5, -1, 1, 0},
+/*  6 */ { 2, s_1_6, -1, 1, 0},
+/*  7 */ { 2, s_1_7, -1, 1, 0},
+/*  8 */ { 2, s_1_8, -1, 1, 0},
+/*  9 */ { 2, s_1_9, -1, 1, 0},
+/* 10 */ { 3, s_1_10, -1, 1, 0},
+/* 11 */ { 3, s_1_11, -1, 1, 0},
+/* 12 */ { 2, s_1_12, -1, 1, 0},
+/* 13 */ { 2, s_1_13, -1, 1, 0},
+/* 14 */ { 2, s_1_14, -1, 1, 0},
+/* 15 */ { 2, s_1_15, -1, 1, 0},
+/* 16 */ { 2, s_1_16, -1, 1, 0},
+/* 17 */ { 2, s_1_17, -1, 1, 0},
+/* 18 */ { 2, s_1_18, -1, 1, 0},
+/* 19 */ { 2, s_1_19, -1, 1, 0},
+/* 20 */ { 3, s_1_20, -1, 1, 0},
+/* 21 */ { 3, s_1_21, -1, 1, 0},
+/* 22 */ { 2, s_1_22, -1, 1, 0},
+/* 23 */ { 2, s_1_23, -1, 1, 0},
+/* 24 */ { 3, s_1_24, -1, 1, 0},
+/* 25 */ { 3, s_1_25, -1, 1, 0}
+};
+
+static symbol s_2_0[2] = { 197, 205 };
+static symbol s_2_1[2] = { 206, 206 };
+static symbol s_2_2[2] = { 215, 219 };
+static symbol s_2_3[3] = { 201, 215, 219 };
+static symbol s_2_4[3] = { 217, 215, 219 };
+static symbol s_2_5[1] = { 221 };
+static symbol s_2_6[2] = { 192, 221 };
+static symbol s_2_7[3] = { 213, 192, 221 };
+
+static struct among a_2[8] =
+{
+/*  0 */ { 2, s_2_0, -1, 1, 0},
+/*  1 */ { 2, s_2_1, -1, 1, 0},
+/*  2 */ { 2, s_2_2, -1, 1, 0},
+/*  3 */ { 3, s_2_3, 2, 2, 0},
+/*  4 */ { 3, s_2_4, 2, 2, 0},
+/*  5 */ { 1, s_2_5, -1, 1, 0},
+/*  6 */ { 2, s_2_6, 5, 1, 0},
+/*  7 */ { 3, s_2_7, 6, 2, 0}
+};
+
+static symbol s_3_0[2] = { 211, 209 };
+static symbol s_3_1[2] = { 211, 216 };
+
+static struct among a_3[2] =
+{
+/*  0 */ { 2, s_3_0, -1, 1, 0},
+/*  1 */ { 2, s_3_1, -1, 1, 0}
+};
+
+static symbol s_4_0[1] = { 192 };
+static symbol s_4_1[2] = { 213, 192 };
+static symbol s_4_2[2] = { 204, 193 };
+static symbol s_4_3[3] = { 201, 204, 193 };
+static symbol s_4_4[3] = { 217, 204, 193 };
+static symbol s_4_5[2] = { 206, 193 };
+static symbol s_4_6[3] = { 197, 206, 193 };
+static symbol s_4_7[3] = { 197, 212, 197 };
+static symbol s_4_8[3] = { 201, 212, 197 };
+static symbol s_4_9[3] = { 202, 212, 197 };
+static symbol s_4_10[4] = { 197, 202, 212, 197 };
+static symbol s_4_11[4] = { 213, 202, 212, 197 };
+static symbol s_4_12[2] = { 204, 201 };
+static symbol s_4_13[3] = { 201, 204, 201 };
+static symbol s_4_14[3] = { 217, 204, 201 };
+static symbol s_4_15[1] = { 202 };
+static symbol s_4_16[2] = { 197, 202 };
+static symbol s_4_17[2] = { 213, 202 };
+static symbol s_4_18[1] = { 204 };
+static symbol s_4_19[2] = { 201, 204 };
+static symbol s_4_20[2] = { 217, 204 };
+static symbol s_4_21[2] = { 197, 205 };
+static symbol s_4_22[2] = { 201, 205 };
+static symbol s_4_23[2] = { 217, 205 };
+static symbol s_4_24[1] = { 206 };
+static symbol s_4_25[2] = { 197, 206 };
+static symbol s_4_26[2] = { 204, 207 };
+static symbol s_4_27[3] = { 201, 204, 207 };
+static symbol s_4_28[3] = { 217, 204, 207 };
+static symbol s_4_29[2] = { 206, 207 };
+static symbol s_4_30[3] = { 197, 206, 207 };
+static symbol s_4_31[3] = { 206, 206, 207 };
+static symbol s_4_32[2] = { 192, 212 };
+static symbol s_4_33[3] = { 213, 192, 212 };
+static symbol s_4_34[2] = { 197, 212 };
+static symbol s_4_35[3] = { 213, 197, 212 };
+static symbol s_4_36[2] = { 201, 212 };
+static symbol s_4_37[2] = { 209, 212 };
+static symbol s_4_38[2] = { 217, 212 };
+static symbol s_4_39[2] = { 212, 216 };
+static symbol s_4_40[3] = { 201, 212, 216 };
+static symbol s_4_41[3] = { 217, 212, 216 };
+static symbol s_4_42[3] = { 197, 219, 216 };
+static symbol s_4_43[3] = { 201, 219, 216 };
+static symbol s_4_44[2] = { 206, 217 };
+static symbol s_4_45[3] = { 197, 206, 217 };
+
+static struct among a_4[46] =
+{
+/*  0 */ { 1, s_4_0, -1, 2, 0},
+/*  1 */ { 2, s_4_1, 0, 2, 0},
+/*  2 */ { 2, s_4_2, -1, 1, 0},
+/*  3 */ { 3, s_4_3, 2, 2, 0},
+/*  4 */ { 3, s_4_4, 2, 2, 0},
+/*  5 */ { 2, s_4_5, -1, 1, 0},
+/*  6 */ { 3, s_4_6, 5, 2, 0},
+/*  7 */ { 3, s_4_7, -1, 1, 0},
+/*  8 */ { 3, s_4_8, -1, 2, 0},
+/*  9 */ { 3, s_4_9, -1, 1, 0},
+/* 10 */ { 4, s_4_10, 9, 2, 0},
+/* 11 */ { 4, s_4_11, 9, 2, 0},
+/* 12 */ { 2, s_4_12, -1, 1, 0},
+/* 13 */ { 3, s_4_13, 12, 2, 0},
+/* 14 */ { 3, s_4_14, 12, 2, 0},
+/* 15 */ { 1, s_4_15, -1, 1, 0},
+/* 16 */ { 2, s_4_16, 15, 2, 0},
+/* 17 */ { 2, s_4_17, 15, 2, 0},
+/* 18 */ { 1, s_4_18, -1, 1, 0},
+/* 19 */ { 2, s_4_19, 18, 2, 0},
+/* 20 */ { 2, s_4_20, 18, 2, 0},
+/* 21 */ { 2, s_4_21, -1, 1, 0},
+/* 22 */ { 2, s_4_22, -1, 2, 0},
+/* 23 */ { 2, s_4_23, -1, 2, 0},
+/* 24 */ { 1, s_4_24, -1, 1, 0},
+/* 25 */ { 2, s_4_25, 24, 2, 0},
+/* 26 */ { 2, s_4_26, -1, 1, 0},
+/* 27 */ { 3, s_4_27, 26, 2, 0},
+/* 28 */ { 3, s_4_28, 26, 2, 0},
+/* 29 */ { 2, s_4_29, -1, 1, 0},
+/* 30 */ { 3, s_4_30, 29, 2, 0},
+/* 31 */ { 3, s_4_31, 29, 1, 0},
+/* 32 */ { 2, s_4_32, -1, 1, 0},
+/* 33 */ { 3, s_4_33, 32, 2, 0},
+/* 34 */ { 2, s_4_34, -1, 1, 0},
+/* 35 */ { 3, s_4_35, 34, 2, 0},
+/* 36 */ { 2, s_4_36, -1, 2, 0},
+/* 37 */ { 2, s_4_37, -1, 2, 0},
+/* 38 */ { 2, s_4_38, -1, 2, 0},
+/* 39 */ { 2, s_4_39, -1, 1, 0},
+/* 40 */ { 3, s_4_40, 39, 2, 0},
+/* 41 */ { 3, s_4_41, 39, 2, 0},
+/* 42 */ { 3, s_4_42, -1, 1, 0},
+/* 43 */ { 3, s_4_43, -1, 2, 0},
+/* 44 */ { 2, s_4_44, -1, 1, 0},
+/* 45 */ { 3, s_4_45, 44, 2, 0}
+};
+
+static symbol s_5_0[1] = { 192 };
+static symbol s_5_1[2] = { 201, 192 };
+static symbol s_5_2[2] = { 216, 192 };
+static symbol s_5_3[1] = { 193 };
+static symbol s_5_4[1] = { 197 };
+static symbol s_5_5[2] = { 201, 197 };
+static symbol s_5_6[2] = { 216, 197 };
+static symbol s_5_7[2] = { 193, 200 };
+static symbol s_5_8[2] = { 209, 200 };
+static symbol s_5_9[3] = { 201, 209, 200 };
+static symbol s_5_10[1] = { 201 };
+static symbol s_5_11[2] = { 197, 201 };
+static symbol s_5_12[2] = { 201, 201 };
+static symbol s_5_13[3] = { 193, 205, 201 };
+static symbol s_5_14[3] = { 209, 205, 201 };
+static symbol s_5_15[4] = { 201, 209, 205, 201 };
+static symbol s_5_16[1] = { 202 };
+static symbol s_5_17[2] = { 197, 202 };
+static symbol s_5_18[3] = { 201, 197, 202 };
+static symbol s_5_19[2] = { 201, 202 };
+static symbol s_5_20[2] = { 207, 202 };
+static symbol s_5_21[2] = { 193, 205 };
+static symbol s_5_22[2] = { 197, 205 };
+static symbol s_5_23[3] = { 201, 197, 205 };
+static symbol s_5_24[2] = { 207, 205 };
+static symbol s_5_25[2] = { 209, 205 };
+static symbol s_5_26[3] = { 201, 209, 205 };
+static symbol s_5_27[1] = { 207 };
+static symbol s_5_28[1] = { 209 };
+static symbol s_5_29[2] = { 201, 209 };
+static symbol s_5_30[2] = { 216, 209 };
+static symbol s_5_31[1] = { 213 };
+static symbol s_5_32[2] = { 197, 215 };
+static symbol s_5_33[2] = { 207, 215 };
+static symbol s_5_34[1] = { 216 };
+static symbol s_5_35[1] = { 217 };
+
+static struct among a_5[36] =
+{
+/*  0 */ { 1, s_5_0, -1, 1, 0},
+/*  1 */ { 2, s_5_1, 0, 1, 0},
+/*  2 */ { 2, s_5_2, 0, 1, 0},
+/*  3 */ { 1, s_5_3, -1, 1, 0},
+/*  4 */ { 1, s_5_4, -1, 1, 0},
+/*  5 */ { 2, s_5_5, 4, 1, 0},
+/*  6 */ { 2, s_5_6, 4, 1, 0},
+/*  7 */ { 2, s_5_7, -1, 1, 0},
+/*  8 */ { 2, s_5_8, -1, 1, 0},
+/*  9 */ { 3, s_5_9, 8, 1, 0},
+/* 10 */ { 1, s_5_10, -1, 1, 0},
+/* 11 */ { 2, s_5_11, 10, 1, 0},
+/* 12 */ { 2, s_5_12, 10, 1, 0},
+/* 13 */ { 3, s_5_13, 10, 1, 0},
+/* 14 */ { 3, s_5_14, 10, 1, 0},
+/* 15 */ { 4, s_5_15, 14, 1, 0},
+/* 16 */ { 1, s_5_16, -1, 1, 0},
+/* 17 */ { 2, s_5_17, 16, 1, 0},
+/* 18 */ { 3, s_5_18, 17, 1, 0},
+/* 19 */ { 2, s_5_19, 16, 1, 0},
+/* 20 */ { 2, s_5_20, 16, 1, 0},
+/* 21 */ { 2, s_5_21, -1, 1, 0},
+/* 22 */ { 2, s_5_22, -1, 1, 0},
+/* 23 */ { 3, s_5_23, 22, 1, 0},
+/* 24 */ { 2, s_5_24, -1, 1, 0},
+/* 25 */ { 2, s_5_25, -1, 1, 0},
+/* 26 */ { 3, s_5_26, 25, 1, 0},
+/* 27 */ { 1, s_5_27, -1, 1, 0},
+/* 28 */ { 1, s_5_28, -1, 1, 0},
+/* 29 */ { 2, s_5_29, 28, 1, 0},
+/* 30 */ { 2, s_5_30, 28, 1, 0},
+/* 31 */ { 1, s_5_31, -1, 1, 0},
+/* 32 */ { 2, s_5_32, -1, 1, 0},
+/* 33 */ { 2, s_5_33, -1, 1, 0},
+/* 34 */ { 1, s_5_34, -1, 1, 0},
+/* 35 */ { 1, s_5_35, -1, 1, 0}
+};
+
+static symbol s_6_0[3] = { 207, 211, 212 };
+static symbol s_6_1[4] = { 207, 211, 212, 216 };
+
+static struct among a_6[2] =
+{
+/*  0 */ { 3, s_6_0, -1, 1, 0},
+/*  1 */ { 4, s_6_1, -1, 1, 0}
+};
+
+static symbol s_7_0[4] = { 197, 202, 219, 197 };
+static symbol s_7_1[1] = { 206 };
+static symbol s_7_2[1] = { 216 };
+static symbol s_7_3[3] = { 197, 202, 219 };
+
+static struct among a_7[4] =
+{
+/*  0 */ { 4, s_7_0, -1, 1, 0},
+/*  1 */ { 1, s_7_1, -1, 2, 0},
+/*  2 */ { 1, s_7_2, -1, 3, 0},
+/*  3 */ { 3, s_7_3, -1, 1, 0}
+};
+
+static unsigned char g_v[] = { 35, 130, 34, 18 };
+
+static symbol s_0[] = { 193 };
+static symbol s_1[] = { 209 };
+static symbol s_2[] = { 193 };
+static symbol s_3[] = { 209 };
+static symbol s_4[] = { 193 };
+static symbol s_5[] = { 209 };
+static symbol s_6[] = { 206 };
+static symbol s_7[] = { 206 };
+static symbol s_8[] = { 206 };
+static symbol s_9[] = { 201 };
+
+static int r_mark_regions(struct SN_env * z) {
+    z->I[0] = z->l;
+    z->I[1] = z->l;
+    {   int c = z->c; /* do, line 100 */
+        while(1) { /* gopast, line 101 */
+            if (!(in_grouping(z, g_v, 192, 220))) goto lab1;
+            break;
+        lab1:
+            if (z->c >= z->l) goto lab0;
+            z->c++;
+        }
+        z->I[0] = z->c; /* setmark pV, line 101 */
+        while(1) { /* gopast, line 101 */
+            if (!(out_grouping(z, g_v, 192, 220))) goto lab2;
+            break;
+        lab2:
+            if (z->c >= z->l) goto lab0;
+            z->c++;
+        }
+        while(1) { /* gopast, line 102 */
+            if (!(in_grouping(z, g_v, 192, 220))) goto lab3;
+            break;
+        lab3:
+            if (z->c >= z->l) goto lab0;
+            z->c++;
+        }
+        while(1) { /* gopast, line 102 */
+            if (!(out_grouping(z, g_v, 192, 220))) goto lab4;
+            break;
+        lab4:
+            if (z->c >= z->l) goto lab0;
+            z->c++;
+        }
+        z->I[1] = z->c; /* setmark p2, line 102 */
+    lab0:
+        z->c = c;
+    }
+    return 1;
+}
+
+static int r_R2(struct SN_env * z) {
+    if (!(z->I[1] <= z->c)) return 0;
+    return 1;
+}
+
+static int r_perfective_gerund(struct SN_env * z) {
+    int among_var;
+    z->ket = z->c; /* [, line 111 */
+    among_var = find_among_b(z, a_0, 9); /* substring, line 111 */
+    if (!(among_var)) return 0;
+    z->bra = z->c; /* ], line 111 */
+    switch(among_var) {
+        case 0: return 0;
+        case 1:
+            {   int m = z->l - z->c; /* or, line 115 */
+                if (!(eq_s_b(z, 1, s_0))) goto lab1;
+                goto lab0;
+            lab1:
+                z->c = z->l - m;
+                if (!(eq_s_b(z, 1, s_1))) return 0;
+            }
+        lab0:
+            slice_del(z); /* delete, line 115 */
+            break;
+        case 2:
+            slice_del(z); /* delete, line 122 */
+            break;
+    }
+    return 1;
+}
+
+static int r_adjective(struct SN_env * z) {
+    int among_var;
+    z->ket = z->c; /* [, line 127 */
+    among_var = find_among_b(z, a_1, 26); /* substring, line 127 */
+    if (!(among_var)) return 0;
+    z->bra = z->c; /* ], line 127 */
+    switch(among_var) {
+        case 0: return 0;
+        case 1:
+            slice_del(z); /* delete, line 136 */
+            break;
+    }
+    return 1;
+}
+
+static int r_adjectival(struct SN_env * z) {
+    int among_var;
+    if (!r_adjective(z)) return 0; /* call adjective, line 141 */
+    {   int m = z->l - z->c; /* try, line 148 */
+        z->ket = z->c; /* [, line 149 */
+        among_var = find_among_b(z, a_2, 8); /* substring, line 149 */
+        if (!(among_var)) { z->c = z->l - m; goto lab0; }
+        z->bra = z->c; /* ], line 149 */
+        switch(among_var) {
+            case 0: { z->c = z->l - m; goto lab0; }
+            case 1:
+                {   int m = z->l - z->c; /* or, line 154 */
+                    if (!(eq_s_b(z, 1, s_2))) goto lab2;
+                    goto lab1;
+                lab2:
+                    z->c = z->l - m;
+                    if (!(eq_s_b(z, 1, s_3))) { z->c = z->l - m; goto lab0; }
+                }
+            lab1:
+                slice_del(z); /* delete, line 154 */
+                break;
+            case 2:
+                slice_del(z); /* delete, line 161 */
+                break;
+        }
+    lab0:
+        ;
+    }
+    return 1;
+}
+
+static int r_reflexive(struct SN_env * z) {
+    int among_var;
+    z->ket = z->c; /* [, line 168 */
+    among_var = find_among_b(z, a_3, 2); /* substring, line 168 */
+    if (!(among_var)) return 0;
+    z->bra = z->c; /* ], line 168 */
+    switch(among_var) {
+        case 0: return 0;
+        case 1:
+            slice_del(z); /* delete, line 171 */
+            break;
+    }
+    return 1;
+}
+
+static int r_verb(struct SN_env * z) {
+    int among_var;
+    z->ket = z->c; /* [, line 176 */
+    among_var = find_among_b(z, a_4, 46); /* substring, line 176 */
+    if (!(among_var)) return 0;
+    z->bra = z->c; /* ], line 176 */
+    switch(among_var) {
+        case 0: return 0;
+        case 1:
+            {   int m = z->l - z->c; /* or, line 182 */
+                if (!(eq_s_b(z, 1, s_4))) goto lab1;
+                goto lab0;
+            lab1:
+                z->c = z->l - m;
+                if (!(eq_s_b(z, 1, s_5))) return 0;
+            }
+        lab0:
+            slice_del(z); /* delete, line 182 */
+            break;
+        case 2:
+            slice_del(z); /* delete, line 190 */
+            break;
+    }
+    return 1;
+}
+
+static int r_noun(struct SN_env * z) {
+    int among_var;
+    z->ket = z->c; /* [, line 199 */
+    among_var = find_among_b(z, a_5, 36); /* substring, line 199 */
+    if (!(among_var)) return 0;
+    z->bra = z->c; /* ], line 199 */
+    switch(among_var) {
+        case 0: return 0;
+        case 1:
+            slice_del(z); /* delete, line 206 */
+            break;
+    }
+    return 1;
+}
+
+static int r_derivational(struct SN_env * z) {
+    int among_var;
+    z->ket = z->c; /* [, line 215 */
+    among_var = find_among_b(z, a_6, 2); /* substring, line 215 */
+    if (!(among_var)) return 0;
+    z->bra = z->c; /* ], line 215 */
+    if (!r_R2(z)) return 0; /* call R2, line 215 */
+    switch(among_var) {
+        case 0: return 0;
+        case 1:
+            slice_del(z); /* delete, line 218 */
+            break;
+    }
+    return 1;
+}
+
+static int r_tidy_up(struct SN_env * z) {
+    int among_var;
+    z->ket = z->c; /* [, line 223 */
+    among_var = find_among_b(z, a_7, 4); /* substring, line 223 */
+    if (!(among_var)) return 0;
+    z->bra = z->c; /* ], line 223 */
+    switch(among_var) {
+        case 0: return 0;
+        case 1:
+            slice_del(z); /* delete, line 227 */
+            z->ket = z->c; /* [, line 228 */
+            if (!(eq_s_b(z, 1, s_6))) return 0;
+            z->bra = z->c; /* ], line 228 */
+            if (!(eq_s_b(z, 1, s_7))) return 0;
+            slice_del(z); /* delete, line 228 */
+            break;
+        case 2:
+            if (!(eq_s_b(z, 1, s_8))) return 0;
+            slice_del(z); /* delete, line 231 */
+            break;
+        case 3:
+            slice_del(z); /* delete, line 233 */
+            break;
+    }
+    return 1;
+}
+
+extern int russian_stem(struct SN_env * z) {
+    {   int c = z->c; /* do, line 240 */
+        if (!r_mark_regions(z)) goto lab0; /* call mark_regions, line 240 */
+    lab0:
+        z->c = c;
+    }
+    z->lb = z->c; z->c = z->l; /* backwards, line 241 */
+
+    {   int m = z->l - z->c; /* setlimit, line 241 */
+        int m3;
+        if (z->c < z->I[0]) return 0;
+        z->c = z->I[0]; /* tomark, line 241 */
+        m3 = z->lb; z->lb = z->c;
+        z->c = z->l - m;
+        {   int m = z->l - z->c; /* do, line 242 */
+            {   int m = z->l - z->c; /* or, line 243 */
+                if (!r_perfective_gerund(z)) goto lab3; /* call perfective_gerund, line 243 */
+                goto lab2;
+            lab3:
+                z->c = z->l - m;
+                {   int m = z->l - z->c; /* try, line 244 */
+                    if (!r_reflexive(z)) { z->c = z->l - m; goto lab4; } /* call reflexive, line 244 */
+                lab4:
+                    ;
+                }
+                {   int m = z->l - z->c; /* or, line 245 */
+                    if (!r_adjectival(z)) goto lab6; /* call adjectival, line 245 */
+                    goto lab5;
+                lab6:
+                    z->c = z->l - m;
+                    if (!r_verb(z)) goto lab7; /* call verb, line 245 */
+                    goto lab5;
+                lab7:
+                    z->c = z->l - m;
+                    if (!r_noun(z)) goto lab1; /* call noun, line 245 */
+                }
+            lab5:
+                ;
+            }
+        lab2:
+        lab1:
+            z->c = z->l - m;
+        }
+        {   int m = z->l - z->c; /* try, line 248 */
+            z->ket = z->c; /* [, line 248 */
+            if (!(eq_s_b(z, 1, s_9))) { z->c = z->l - m; goto lab8; }
+            z->bra = z->c; /* ], line 248 */
+            slice_del(z); /* delete, line 248 */
+        lab8:
+            ;
+        }
+        {   int m = z->l - z->c; /* do, line 251 */
+            if (!r_derivational(z)) goto lab9; /* call derivational, line 251 */
+        lab9:
+            z->c = z->l - m;
+        }
+        {   int m = z->l - z->c; /* do, line 252 */
+            if (!r_tidy_up(z)) goto lab10; /* call tidy_up, line 252 */
+        lab10:
+            z->c = z->l - m;
+        }
+        z->lb = m3;
+    }
+    z->c = z->lb;
+    return 1;
+}
+
+extern struct SN_env * russian_create_env(void) { return SN_create_env(0, 2, 0); }
+
+extern void russian_close_env(struct SN_env * z) { SN_close_env(z); }
+


diff --git a/contrib/tsearch2/snowball/russian_stem.h b/contrib/tsearch2/snowball/russian_stem.h

new file mode 100644 (file)

index 0000000..7dc26d4


--- /dev/null
+++ b/contrib/tsearch2/snowball/russian_stem.h
@@ -0,0 +1,8 @@
+
+/* This file was generated automatically by the Snowball to ANSI C compiler */
+
+extern struct SN_env * russian_create_env(void);
+extern void russian_close_env(struct SN_env * z);
+
+extern int russian_stem(struct SN_env * z);
+


diff --git a/contrib/tsearch2/snowball/utilities.c b/contrib/tsearch2/snowball/utilities.c

new file mode 100644 (file)

index 0000000..5dc7524


--- /dev/null
+++ b/contrib/tsearch2/snowball/utilities.c
@@ -0,0 +1,328 @@
+
+#include 
+#include 
+#include 
+
+#include "header.h"
+
+#define unless(C) if(!(C))
+
+#define CREATE_SIZE 1
+
+extern symbol * create_s(void)
+{   symbol * p = (symbol *) (HEAD + (char *) malloc(HEAD + (CREATE_SIZE + 1) * sizeof(symbol)));
+    CAPACITY(p) = CREATE_SIZE;
+    SET_SIZE(p, CREATE_SIZE);
+    return p;
+}
+
+extern void lose_s(symbol * p) { free((char *) p - HEAD); }
+
+extern int in_grouping(struct SN_env * z, unsigned char * s, int min, int max)
+{   if (z->c >= z->l) return 0;
+    {   int ch = z->p[z->c];
+        if
+        (ch > max || (ch -= min) < 0 ||
+         (s[ch >> 3] & (0X1 << (ch & 0X7))) == 0) return 0;
+    }
+    z->c++; return 1;
+}
+
+extern int in_grouping_b(struct SN_env * z, unsigned char * s, int min, int max)
+{   if (z->c <= z->lb) return 0;
+    {   int ch = z->p[z->c - 1];
+        if
+        (ch > max || (ch -= min) < 0 ||
+         (s[ch >> 3] & (0X1 << (ch & 0X7))) == 0) return 0;
+    }
+    z->c--; return 1;
+}
+
+extern int out_grouping(struct SN_env * z, unsigned char * s, int min, int max)
+{   if (z->c >= z->l) return 0;
+    {   int ch = z->p[z->c];
+        unless
+        (ch > max || (ch -= min) < 0 ||
+         (s[ch >> 3] & (0X1 << (ch & 0X7))) == 0) return 0;
+    }
+    z->c++; return 1;
+}
+
+extern int out_grouping_b(struct SN_env * z, unsigned char * s, int min, int max)
+{   if (z->c <= z->lb) return 0;
+    {   int ch = z->p[z->c - 1];
+        unless
+        (ch > max || (ch -= min) < 0 ||
+         (s[ch >> 3] & (0X1 << (ch & 0X7))) == 0) return 0;
+    }
+    z->c--; return 1;
+}
+
+
+extern int in_range(struct SN_env * z, int min, int max)
+{   if (z->c >= z->l) return 0;
+    {   int ch = z->p[z->c];
+        if
+        (ch > max || ch < min) return 0;
+    }
+    z->c++; return 1;
+}
+
+extern int in_range_b(struct SN_env * z, int min, int max)
+{   if (z->c <= z->lb) return 0;
+    {   int ch = z->p[z->c - 1];
+        if
+        (ch > max || ch < min) return 0;
+    }
+    z->c--; return 1;
+}
+
+extern int out_range(struct SN_env * z, int min, int max)
+{   if (z->c >= z->l) return 0;
+    {   int ch = z->p[z->c];
+        unless
+        (ch > max || ch < min) return 0;
+    }
+    z->c++; return 1;
+}
+
+extern int out_range_b(struct SN_env * z, int min, int max)
+{   if (z->c <= z->lb) return 0;
+    {   int ch = z->p[z->c - 1];
+        unless
+        (ch > max || ch < min) return 0;
+    }
+    z->c--; return 1;
+}
+
+extern int eq_s(struct SN_env * z, int s_size, symbol * s)
+{   if (z->l - z->c < s_size ||
+        memcmp(z->p + z->c, s, s_size * sizeof(symbol)) != 0) return 0;
+    z->c += s_size; return 1;
+}
+
+extern int eq_s_b(struct SN_env * z, int s_size, symbol * s)
+{   if (z->c - z->lb < s_size ||
+        memcmp(z->p + z->c - s_size, s, s_size * sizeof(symbol)) != 0) return 0;
+    z->c -= s_size; return 1;
+}
+
+extern int eq_v(struct SN_env * z, symbol * p)
+{   return eq_s(z, SIZE(p), p);
+}
+
+extern int eq_v_b(struct SN_env * z, symbol * p)
+{   return eq_s_b(z, SIZE(p), p);
+}
+
+extern int find_among(struct SN_env * z, struct among * v, int v_size)
+{
+    int i = 0;
+    int j = v_size;
+
+    int c = z->c; int l = z->l;
+    symbol * q = z->p + c;
+
+    struct among * w;
+
+    int common_i = 0;
+    int common_j = 0;
+
+    int first_key_inspected = 0;
+
+    while(1)
+    {   int k = i + ((j - i) >> 1);
+        int diff = 0;
+        int common = common_i < common_j ? common_i : common_j; /* smaller */
+        w = v + k;
+        {   int i; for (i = common; i < w->s_size; i++)
+            {   if (c + common == l) { diff = -1; break; }
+                diff = q[common] - w->s[i];
+                if (diff != 0) break;
+                common++;
+            }
+        }
+        if (diff < 0) { j = k; common_j = common; }
+                 else { i = k; common_i = common; }
+        if (j - i <= 1)
+        {   if (i > 0) break; /* v->s has been inspected */
+            if (j == i) break; /* only one item in v */
+
+            /* - but now we need to go round once more to get
+               v->s inspected. This looks messy, but is actually
+               the optimal approach.  */
+
+            if (first_key_inspected) break;
+            first_key_inspected = 1;
+        }
+    }
+    while(1)
+    {   w = v + i;
+        if (common_i >= w->s_size)
+        {   z->c = c + w->s_size;
+            if (w->function == 0) return w->result;
+            {   int res = w->function(z);
+                z->c = c + w->s_size;
+                if (res) return w->result;
+            }
+        }
+        i = w->substring_i;
+        if (i < 0) return 0;
+    }
+}
+
+/* find_among_b is for backwards processing. Same comments apply */
+
+extern int find_among_b(struct SN_env * z, struct among * v, int v_size)
+{
+    int i = 0;
+    int j = v_size;
+
+    int c = z->c; int lb = z->lb;
+    symbol * q = z->p + c - 1;
+
+    struct among * w;
+
+    int common_i = 0;
+    int common_j = 0;
+
+    int first_key_inspected = 0;
+
+    while(1)
+    {   int k = i + ((j - i) >> 1);
+        int diff = 0;
+        int common = common_i < common_j ? common_i : common_j;
+        w = v + k;
+        {   int i; for (i = w->s_size - 1 - common; i >= 0; i--)
+            {   if (c - common == lb) { diff = -1; break; }
+                diff = q[- common] - w->s[i];
+                if (diff != 0) break;
+                common++;
+            }
+        }
+        if (diff < 0) { j = k; common_j = common; }
+                 else { i = k; common_i = common; }
+        if (j - i <= 1)
+        {   if (i > 0) break;
+            if (j == i) break;
+            if (first_key_inspected) break;
+            first_key_inspected = 1;
+        }
+    }
+    while(1)
+    {   w = v + i;
+        if (common_i >= w->s_size)
+        {   z->c = c - w->s_size;
+            if (w->function == 0) return w->result;
+            {   int res = w->function(z);
+                z->c = c - w->s_size;
+                if (res) return w->result;
+            }
+        }
+        i = w->substring_i;
+        if (i < 0) return 0;
+    }
+}
+
+
+extern symbol * increase_size(symbol * p, int n)
+{   int new_size = n + 20;
+    symbol * q = (symbol *) (HEAD + (char *) malloc(HEAD + (new_size + 1) * sizeof(symbol)));
+    CAPACITY(q) = new_size;
+    memmove(q, p, CAPACITY(p) * sizeof(symbol)); lose_s(p); return q;
+}
+
+/* to replace symbols between c_bra and c_ket in z->p by the
+   s_size symbols at s
+*/
+
+extern int replace_s(struct SN_env * z, int c_bra, int c_ket, int s_size, const symbol * s)
+{   int adjustment = s_size - (c_ket - c_bra);
+    int len = SIZE(z->p);
+    if (adjustment != 0)
+    {   if (adjustment + len > CAPACITY(z->p)) z->p = increase_size(z->p, adjustment + len);
+        memmove(z->p + c_ket + adjustment, z->p + c_ket, (len - c_ket) * sizeof(symbol));
+        SET_SIZE(z->p, adjustment + len);
+        z->l += adjustment;
+        if (z->c >= c_ket) z->c += adjustment; else
+            if (z->c > c_bra) z->c = c_bra;
+    }
+    unless (s_size == 0) memmove(z->p + c_bra, s, s_size * sizeof(symbol));
+    return adjustment;
+}
+
+static void slice_check(struct SN_env * z)
+{
+    if (!(0 <= z->bra &&
+          z->bra <= z->ket &&
+          z->ket <= z->l &&
+          z->l <= SIZE(z->p)))   /* this line could be removed */
+    {
+        fprintf(stderr, "faulty slice operation:\n");
+        debug(z, -1, 0);
+        exit(1);
+    }
+}
+
+extern void slice_from_s(struct SN_env * z, int s_size, symbol * s)
+{   slice_check(z);
+    replace_s(z, z->bra, z->ket, s_size, s);
+}
+
+extern void slice_from_v(struct SN_env * z, symbol * p)
+{   slice_from_s(z, SIZE(p), p);
+}
+
+extern void slice_del(struct SN_env * z)
+{   slice_from_s(z, 0, 0);
+}
+
+extern void insert_s(struct SN_env * z, int bra, int ket, int s_size, symbol * s)
+{   int adjustment = replace_s(z, bra, ket, s_size, s);
+    if (bra <= z->bra) z->bra += adjustment;
+    if (bra <= z->ket) z->ket += adjustment;
+}
+
+extern void insert_v(struct SN_env * z, int bra, int ket, symbol * p)
+{   int adjustment = replace_s(z, bra, ket, SIZE(p), p);
+    if (bra <= z->bra) z->bra += adjustment;
+    if (bra <= z->ket) z->ket += adjustment;
+}
+
+extern symbol * slice_to(struct SN_env * z, symbol * p)
+{   slice_check(z);
+    {   int len = z->ket - z->bra;
+        if (CAPACITY(p) < len) p = increase_size(p, len);
+        memmove(p, z->p + z->bra, len * sizeof(symbol));
+        SET_SIZE(p, len);
+    }
+    return p;
+}
+
+extern symbol * assign_to(struct SN_env * z, symbol * p)
+{   int len = z->l;
+    if (CAPACITY(p) < len) p = increase_size(p, len);
+    memmove(p, z->p, len * sizeof(symbol));
+    SET_SIZE(p, len);
+    return p;
+}
+
+extern void debug(struct SN_env * z, int number, int line_count)
+{   int i;
+    int limit = SIZE(z->p);
+    /*if (number >= 0) printf("%3d (line %4d): '", number, line_count);*/
+    if (number >= 0) printf("%3d (line %4d): [%d]'", number, line_count,limit);
+    for (i = 0; i <= limit; i++)
+    {   if (z->lb == i) printf("{");
+        if (z->bra == i) printf("[");
+        if (z->c == i) printf("|");
+        if (z->ket == i) printf("]");
+        if (z->l == i) printf("}");
+        if (i < limit)
+        {   int ch = z->p[i];
+            if (ch == 0) ch = '#';
+            printf("%c", ch);
+        }
+    }
+    printf("'\n");
+}


diff --git a/contrib/tsearch2/sql/tsearch2.sql b/contrib/tsearch2/sql/tsearch2.sql

new file mode 100644 (file)

index 0000000..6ca6480


--- /dev/null
+++ b/contrib/tsearch2/sql/tsearch2.sql
@@ -0,0 +1,243 @@
+--
+-- first, define the datatype.  Turn off echoing so that expected file
+-- does not depend on contents of seg.sql.
+--
+\set ECHO none
+\i tsearch2.sql
+\set ECHO all
+
+--tsvector
+SELECT '1'::tsvector;
+SELECT '1 '::tsvector;
+SELECT ' 1'::tsvector;
+SELECT ' 1 '::tsvector;
+SELECT '1 2'::tsvector;
+SELECT '\'1 2\''::tsvector;
+SELECT '\'1 \\\'2\''::tsvector;
+SELECT '\'1 \\\'2\'3'::tsvector;
+SELECT '\'1 \\\'2\' 3'::tsvector;
+SELECT '\'1 \\\'2\' \' 3\' 4 '::tsvector;
+select '\'w\':4A,3B,2C,1D,5 a:8';
+select 'a:3A b:2a'::tsvector || 'ba:1234 a:1B';
+select setweight('w:12B w:13* w:12,5,6 a:1,3* a:3 w asd:1dc asd zxc:81,567,222A'::tsvector, 'c');
+select strip('w:12B w:13* w:12,5,6 a:1,3* a:3 w asd:1dc asd'::tsvector);
+
+
+--tsquery
+SELECT '1'::tsquery;
+SELECT '1 '::tsquery;
+SELECT ' 1'::tsquery;
+SELECT ' 1 '::tsquery;
+SELECT '\'1 2\''::tsquery;
+SELECT '\'1 \\\'2\''::tsquery;
+SELECT '!1'::tsquery;
+SELECT '1|2'::tsquery;
+SELECT '1|!2'::tsquery;
+SELECT '!1|2'::tsquery;
+SELECT '!1|!2'::tsquery;
+SELECT '!(!1|!2)'::tsquery;
+SELECT '!(!1|2)'::tsquery;
+SELECT '!(1|!2)'::tsquery;
+SELECT '!(1|2)'::tsquery;
+SELECT '1&2'::tsquery;
+SELECT '!1&2'::tsquery;
+SELECT '1&!2'::tsquery;
+SELECT '!1&!2'::tsquery;
+SELECT '(1&2)'::tsquery;
+SELECT '1&(2)'::tsquery;
+SELECT '!(1)&2'::tsquery;
+SELECT '!(1&2)'::tsquery;
+SELECT '1|2&3'::tsquery;
+SELECT '1|(2&3)'::tsquery;
+SELECT '(1|2)&3'::tsquery;
+SELECT '1|2&!3'::tsquery;
+SELECT '1|!2&3'::tsquery;
+SELECT '!1|2&3'::tsquery;
+SELECT '!1|(2&3)'::tsquery;
+SELECT '!(1|2)&3'::tsquery;
+SELECT '(!1|2)&3'::tsquery;
+SELECT '1|(2|(4|(5|6)))'::tsquery;
+SELECT '1|2|4|5|6'::tsquery;
+SELECT '1&(2&(4&(5&6)))'::tsquery;
+SELECT '1&2&4&5&6'::tsquery;
+SELECT '1&(2&(4&(5|6)))'::tsquery;
+SELECT '1&(2&(4&(5|!6)))'::tsquery;
+SELECT '1&(\'2\'&(\' 4\'&(\\|5 | \'6 \\\' !|&\')))'::tsquery;
+SELECT '\'the wether\':dc & \' sKies \':BC & a:d b:a';
+
+select lexize('simple', 'ASD56 hsdkf');
+select lexize('en_stem', 'SKIES Problems identity');
+
+select * from token_type('default');
+select * from parse('default', '345 [email protected] \' http://www.com/ http://aew.werc.ewr/?ad=qwe&dw 1aew.werc.ewr/?ad=qwe&dw 2aew.werc.ewr http://3aew.werc.ewr/?ad=qwe&dw http://4aew.werc.ewr http://5aew.werc.ewr:8100/?  ad=qwe&dw 6aew.werc.ewr:8100/?ad=qwe&dw 7aew.werc.ewr:8100/?ad=qwe&dw=%20%32 +4.0e-10 qwe qwe qwqwe 234.435 455 5.005 [email protected] qwe-wer asdf qwer jf sdjk ewr1> ewri2 ">
+/usr/local/fff /awdf/dwqe/4325 rewt/ewr wefjn /wqe-324/ewr gist.h gist.h.c gist.c. readline 4.2 4.2. 4.2, readline-4.2 readline-4.2. 234 
+ wow  < jqw <> qwerty');
+
+SELECT to_tsvector('default', '345 [email protected] \' http://www.com/ http://aew.werc.ewr/?ad=qwe&dw 1aew.werc.ewr/?ad=qwe&dw 2aew.werc.ewr http://3aew.werc.ewr/?ad=qwe&dw http://4aew.werc.ewr http://5aew.werc.ewr:8100/?  ad=qwe&dw 6aew.werc.ewr:8100/?ad=qwe&dw 7aew.werc.ewr:8100/?ad=qwe&dw=%20%32 +4.0e-10 qwe qwe qwqwe 234.435 455 5.005 [email protected] qwe-wer asdf qwer jf sdjk ewr1> ewri2 ">
+/usr/local/fff /awdf/dwqe/4325 rewt/ewr wefjn /wqe-324/ewr gist.h gist.h.c gist.c. readline 4.2 4.2. 4.2, readline-4.2 readline-4.2. 234 
+ wow  < jqw <> qwerty');
+
+SELECT length(to_tsvector('default', '345 qw'));
+
+SELECT length(to_tsvector('default', '345 [email protected] \' http://www.com/ http://aew.werc.ewr/?ad=qwe&dw 1aew.werc.ewr/?ad=qwe&dw 2aew.werc.ewr http://3aew.werc.ewr/?ad=qwe&dw http://4aew.werc.ewr http://5aew.werc.ewr:8100/?  ad=qwe&dw 6aew.werc.ewr:8100/?ad=qwe&dw 7aew.werc.ewr:8100/?ad=qwe&dw=%20%32 +4.0e-10 qwe qwe qwqwe 234.435 455 5.005 [email protected] qwe-wer asdf qwer jf sdjk ewr1> ewri2 ">
+/usr/local/fff /awdf/dwqe/4325 rewt/ewr wefjn /wqe-324/ewr gist.h gist.h.c gist.c. readline 4.2 4.2. 4.2, readline-4.2 readline-4.2. 234 
+ wow  < jqw <> qwerty'));
+
+
+select to_tsquery('default', 'qwe & sKies '); 
+select to_tsquery('simple', 'qwe & sKies '); 
+select to_tsquery('default', '\'the wether\':dc & \'           sKies \':BC ');
+select 'a b:89  ca:23A,64b d:34c'::tsvector @@ 'd:AC & ca';
+select 'a b:89  ca:23A,64b d:34c'::tsvector @@ 'd:AC & ca:B';
+select 'a b:89  ca:23A,64b d:34c'::tsvector @@ 'd:AC & ca:A';
+select 'a b:89  ca:23A,64b d:34c'::tsvector @@ 'd:AC & ca:C';
+select 'a b:89  ca:23A,64b d:34c'::tsvector @@ 'd:AC & ca:CB';
+
+CREATE TABLE test_tsvector( t text, a tsvector );
+
+\copy test_tsvector from 'data/test_tsearch.data'
+
+SELECT count(*) FROM test_tsvector WHERE a @@ 'wr|qh';
+SELECT count(*) FROM test_tsvector WHERE a @@ 'wr&qh';
+SELECT count(*) FROM test_tsvector WHERE a @@ 'eq&yt';
+SELECT count(*) FROM test_tsvector WHERE a @@ 'eq|yt';
+SELECT count(*) FROM test_tsvector WHERE a @@ '(eq&yt)|(wr&qh)';
+SELECT count(*) FROM test_tsvector WHERE a @@ '(eq|yt)&(wr|qh)';
+
+create index wowidx on test_tsvector using gist (a);
+set enable_seqscan=off;
+
+SELECT count(*) FROM test_tsvector WHERE a @@ 'wr|qh';
+SELECT count(*) FROM test_tsvector WHERE a @@ 'wr&qh';
+SELECT count(*) FROM test_tsvector WHERE a @@ 'eq&yt';
+SELECT count(*) FROM test_tsvector WHERE a @@ 'eq|yt';
+SELECT count(*) FROM test_tsvector WHERE a @@ '(eq&yt)|(wr&qh)';
+SELECT count(*) FROM test_tsvector WHERE a @@ '(eq|yt)&(wr|qh)';
+
+select set_curcfg('default');
+
+CREATE TRIGGER tsvectorupdate
+BEFORE UPDATE OR INSERT ON test_tsvector
+FOR EACH ROW EXECUTE PROCEDURE tsearch2(a, t);
+
+SELECT count(*) FROM test_tsvector WHERE a @@ to_tsquery('345&qwerty');
+
+INSERT INTO test_tsvector (t) VALUES ('345 qwerty');
+
+SELECT count(*) FROM test_tsvector WHERE a @@ to_tsquery('345&qwerty');
+
+UPDATE test_tsvector SET t = null WHERE t = '345 qwerty';
+
+SELECT count(*) FROM test_tsvector WHERE a @@ to_tsquery('345&qwerty');
+
+drop trigger tsvectorupdate on test_tsvector;
+create function wow(text) returns text as 'select $1 || \' copyright\'; ' language sql;
+create trigger tsvectorupdate before update or insert on test_tsvector
+for each row execute procedure tsearch2(a, wow, t);
+insert into test_tsvector (t) values ('345 qwerty');
+select count(*) FROM test_tsvector WHERE a @@ to_tsquery('345&qwerty');
+select count(*) FROM test_tsvector WHERE a @@ to_tsquery('copyright');
+
+select rank(' a:1 s:2C d g'::tsvector, 'a | s');
+select rank(' a:1 s:2B d g'::tsvector, 'a | s');
+select rank(' a:1 s:2 d g'::tsvector, 'a | s');
+select rank(' a:1 s:2C d g'::tsvector, 'a & s');
+select rank(' a:1 s:2B d g'::tsvector, 'a & s');
+select rank(' a:1 s:2 d g'::tsvector, 'a & s');
+
+insert into test_tsvector (t) values ('foo bar foo the over foo qq bar');
+select * from stat('select a from test_tsvector') order by ndoc desc, nentry desc, word;
+
+select reset_tsearch();
+select to_tsquery('default', 'skies & books');
+
+select rank_cd(to_tsvector('Erosion It took the sea a thousand years,
+A thousand years to trace
+The granite features of this cliff
+In crag and scarp and base.
+It took the sea an hour one night
+An hour of storm to place
+The sculpture of these granite seams,
+Upon a woman s face. E.  J.  Pratt  (1882 1964)
+'), to_tsquery('sea&thousand&years'));
+
+select rank_cd(to_tsvector('Erosion It took the sea a thousand years,
+A thousand years to trace
+The granite features of this cliff
+In crag and scarp and base.
+It took the sea an hour one night
+An hour of storm to place
+The sculpture of these granite seams,
+Upon a woman s face. E.  J.  Pratt  (1882 1964)
+'), to_tsquery('granite&sea'));
+
+select rank_cd(to_tsvector('Erosion It took the sea a thousand years,
+A thousand years to trace
+The granite features of this cliff
+In crag and scarp and base.
+It took the sea an hour one night
+An hour of storm to place
+The sculpture of these granite seams,
+Upon a woman s face. E.  J.  Pratt  (1882 1964)
+'), to_tsquery('sea'));
+
+select get_covers(to_tsvector('Erosion It took the sea a thousand years,
+A thousand years to trace
+The granite features of this cliff
+In crag and scarp and base.
+It took the sea an hour one night
+An hour of storm to place
+The sculpture of these granite seams,
+Upon a woman s face. E.  J.  Pratt  (1882 1964)
+'), to_tsquery('sea&thousand&years'));
+
+select get_covers(to_tsvector('Erosion It took the sea a thousand years,
+A thousand years to trace
+The granite features of this cliff
+In crag and scarp and base.
+It took the sea an hour one night
+An hour of storm to place
+The sculpture of these granite seams,
+Upon a woman s face. E.  J.  Pratt  (1882 1964)
+'), to_tsquery('granite&sea'));
+
+select get_covers(to_tsvector('Erosion It took the sea a thousand years,
+A thousand years to trace
+The granite features of this cliff
+In crag and scarp and base.
+It took the sea an hour one night
+An hour of storm to place
+The sculpture of these granite seams,
+Upon a woman s face. E.  J.  Pratt  (1882 1964)
+'), to_tsquery('sea'));
+
+select headline('Erosion It took the sea a thousand years,
+A thousand years to trace
+The granite features of this cliff
+In crag and scarp and base.
+It took the sea an hour one night
+An hour of storm to place
+The sculpture of these granite seams,
+Upon a woman s face. E.  J.  Pratt  (1882 1964)
+', to_tsquery('sea&thousand&years'));
+ 
+select headline('Erosion It took the sea a thousand years,
+A thousand years to trace
+The granite features of this cliff
+In crag and scarp and base.
+It took the sea an hour one night
+An hour of storm to place
+The sculpture of these granite seams,
+Upon a woman s face. E.  J.  Pratt  (1882 1964)
+', to_tsquery('granite&sea'));
+ 
+select headline('Erosion It took the sea a thousand years,
+A thousand years to trace
+The granite features of this cliff
+In crag and scarp and base.
+It took the sea an hour one night
+An hour of storm to place
+The sculpture of these granite seams,
+Upon a woman s face. E.  J.  Pratt  (1882 1964)
+', to_tsquery('sea'));
+


diff --git a/contrib/tsearch2/stopword.c b/contrib/tsearch2/stopword.c

new file mode 100644 (file)

index 0000000..7f7806f


--- /dev/null
+++ b/contrib/tsearch2/stopword.c
@@ -0,0 +1,101 @@
+/* 
+ * stopword library
+ * Teodor Sigaev 
+ */
+#include 
+#include 
+#include 
+#include 
+
+#include "postgres.h"
+#include "common.h"
+#include "dict.h"
+
+#define STOPBUFLEN 4096
+
+char*
+lowerstr(char *str) {
+   char *ptr=str;
+   while(*ptr) {
+       *ptr = tolower(*(unsigned char*)ptr);
+       ptr++;
+   }
+   return str;
+}
+
+void
+freestoplist(StopList *s) {
+   char **ptr=s->stop;
+   if ( ptr )
+       while( *ptr && s->len >0 ) {
+           free(*ptr);
+           ptr++; s->len--;
+       free(s->stop);
+   }
+   memset(s,0,sizeof(StopList));
+}
+
+void
+readstoplist(text *in, StopList *s) {
+   char **stop=NULL;
+   s->len=0;
+   if ( in && VARSIZE(in) - VARHDRSZ > 0 ) {
+       char *filename=text2char(in);
+       FILE    *hin=NULL;
+       char    buf[STOPBUFLEN];
+       int reallen=0;
+
+       if ( (hin=fopen(filename,"r")) == NULL )
+           elog(ERROR,"Can't open file '%s': %s", filename, strerror(errno));
+       while( fgets(buf,STOPBUFLEN,hin) ) {
+           buf[strlen(buf)-1] = '\0';
+           if ( *buf=='\0' ) continue;
+
+           if ( s->len>= reallen ) {
+               char **tmp;
+               reallen=(reallen) ? reallen*2 : 16;
+               tmp=(char**)realloc((void*)stop, sizeof(char*)*reallen);
+               if (!tmp) {
+                   freestoplist(s);
+                   fclose(hin); 
+                   elog(ERROR,"Not enough memory");
+               }
+               stop=tmp;
+           }
+    
+           stop[s->len]=strdup(buf);
+           if ( !stop[s->len] ) {
+               freestoplist(s);
+               fclose(hin); 
+               elog(ERROR,"Not enough memory");
+           }
+           if ( s->wordop ) 
+               stop[s->len]=(s->wordop)(stop[s->len]);
+
+           (s->len)++; 
+       }
+       fclose(hin);
+       pfree(filename); 
+   }
+   s->stop=stop;
+} 
+
+static int
+comparestr(const void *a, const void *b) {
+   return strcmp( *(char**)a, *(char**)b );
+}
+
+void
+sortstoplist(StopList *s) {
+   if (s->stop && s->len>0)
+       qsort(s->stop, s->len, sizeof(char*), comparestr);
+}
+
+bool
+searchstoplist(StopList *s, char *key) {
+   if ( s->wordop ) 
+       key=(*(s->wordop))(key);
+   return ( s->stop && s->len>0 && bsearch(&key, s->stop, s->len, sizeof(char*), comparestr) ) ? true : false;
+}
+
+


diff --git a/contrib/tsearch2/stopword/english.stop b/contrib/tsearch2/stopword/english.stop

new file mode 100644 (file)

index 0000000..a913011


--- /dev/null
+++ b/contrib/tsearch2/stopword/english.stop
@@ -0,0 +1,128 @@
+i
+me
+my
+myself
+we
+our
+ours
+ourselves
+you
+your
+yours
+yourself
+yourselves
+he
+him
+his
+himself
+she
+her
+hers
+herself
+it
+its
+itself
+they
+them
+their
+theirs
+themselves
+what
+which
+who
+whom
+this
+that
+these
+those
+am
+is
+are
+was
+were
+be
+been
+being
+have
+has
+had
+having
+do
+does
+did
+doing
+a
+an
+the
+and
+but
+if
+or
+because
+as
+until
+while
+of
+at
+by
+for
+with
+about
+against
+between
+into
+through
+during
+before
+after
+above
+below
+to
+from
+up
+down
+in
+out
+on
+off
+over
+under
+again
+further
+then
+once
+here
+there
+when
+where
+why
+how
+all
+any
+both
+each
+few
+more
+most
+other
+some
+such
+no
+nor
+not
+only
+own
+same
+so
+than
+too
+very
+s
+t
+can
+will
+just
+don
+should
+now
+


diff --git a/contrib/tsearch2/stopword/russian.stop b/contrib/tsearch2/stopword/russian.stop

new file mode 100644 (file)

index 0000000..1877e3a


--- /dev/null
+++ b/contrib/tsearch2/stopword/russian.stop
@@ -0,0 +1,151 @@
+É
+×
+×Ï
+ÎÅ
+ÞÔÏ
+ÏÎ
+ÎÁ
+Ñ
+Ó
+ÓÏ
+ËÁË
+Á
+ÔÏ
+×ÓÅ
+ÏÎÁ
+ÔÁË
+ÅÇÏ
+ÎÏ
+ÄÁ
+ÔÙ
+Ë
+Õ
+ÖÅ
+×Ù
+ÚÁ
+ÂÙ
+ÐÏ
+ÔÏÌØËÏ
+ÅÅ
+ÍÎÅ
+ÂÙÌÏ
+×ÏÔ
+ÏÔ
+ÍÅÎÑ
+ÅÝÅ
+ÎÅÔ
+Ï
+ÉÚ
+ÅÍÕ
+ÔÅÐÅÒØ
+ËÏÇÄÁ
+ÄÁÖÅ
+ÎÕ
+×ÄÒÕÇ
+ÌÉ
+ÅÓÌÉ
+ÕÖÅ
+ÉÌÉ
+ÎÉ
+ÂÙÔØ
+ÂÙÌ
+ÎÅÇÏ
+ÄÏ
+×ÁÓ
+ÎÉÂÕÄØ
+ÏÐÑÔØ
+ÕÖ
+×ÁÍ
+×ÅÄØ
+ÔÁÍ
+ÐÏÔÏÍ
+ÓÅÂÑ
+ÎÉÞÅÇÏ
+ÅÊ
+ÍÏÖÅÔ
+ÏÎÉ
+ÔÕÔ
+ÇÄÅ
+ÅÓÔØ
+ÎÁÄÏ
+ÎÅÊ
+ÄÌÑ
+ÍÙ
+ÔÅÂÑ
+ÉÈ
+ÞÅÍ
+ÂÙÌÁ
+ÓÁÍ
+ÞÔÏÂ
+ÂÅÚ
+ÂÕÄÔÏ
+ÞÅÇÏ
+ÒÁÚ
+ÔÏÖÅ
+ÓÅÂÅ
+ÐÏÄ
+ÂÕÄÅÔ
+Ö
+ÔÏÇÄÁ
+ËÔÏ
+ÜÔÏÔ
+ÔÏÇÏ
+ÐÏÔÏÍÕ
+ÜÔÏÇÏ
+ËÁËÏÊ
+ÓÏ×ÓÅÍ
+ÎÉÍ
+ÚÄÅÓØ
+ÜÔÏÍ
+ÏÄÉÎ
+ÐÏÞÔÉ
+ÍÏÊ
+ÔÅÍ
+ÞÔÏÂÙ
+ÎÅÅ
+ÓÅÊÞÁÓ
+ÂÙÌÉ
+ËÕÄÁ
+ÚÁÞÅÍ
+×ÓÅÈ
+ÎÉËÏÇÄÁ
+ÍÏÖÎÏ
+ÐÒÉ
+ÎÁËÏÎÅÃ
+Ä×Á
+ÏÂ
+ÄÒÕÇÏÊ
+ÈÏÔØ
+ÐÏÓÌÅ
+ÎÁÄ
+ÂÏÌØÛÅ
+ÔÏÔ
+ÞÅÒÅÚ
+ÜÔÉ
+ÎÁÓ
+ÐÒÏ
+×ÓÅÇÏ
+ÎÉÈ
+ËÁËÁÑ
+ÍÎÏÇÏ
+ÒÁÚ×Å
+ÔÒÉ
+ÜÔÕ
+ÍÏÑ
+×ÐÒÏÞÅÍ
+ÈÏÒÏÛÏ
+Ó×ÏÀ
+ÜÔÏÊ
+ÐÅÒÅÄ
+ÉÎÏÇÄÁ
+ÌÕÞÛÅ
+ÞÕÔØ
+ÔÏÍ
+ÎÅÌØÚÑ
+ÔÁËÏÊ
+ÉÍ
+ÂÏÌÅÅ
+×ÓÅÇÄÁ
+ËÏÎÅÞÎÏ
+×ÓÀ
+ÍÅÖÄÕ


diff --git a/contrib/tsearch2/ts_cfg.c b/contrib/tsearch2/ts_cfg.c

new file mode 100644 (file)

index 0000000..7c9f20c


--- /dev/null
+++ b/contrib/tsearch2/ts_cfg.c
@@ -0,0 +1,509 @@
+/* 
+ * interface functions to tscfg 
+ * Teodor Sigaev 
+ */
+#include 
+#include 
+#include 
+#include 
+#include 
+
+#include "postgres.h"
+#include "fmgr.h"
+#include "utils/array.h"
+#include "catalog/pg_type.h"
+#include "executor/spi.h"
+
+#include "ts_cfg.h"
+#include "dict.h"
+#include "wparser.h"
+#include "snmap.h"
+#include "common.h"
+#include "tsvector.h"
+
+/*********top interface**********/
+
+static void *plan_getcfg_bylocale=NULL;
+static void *plan_getcfg=NULL;
+static void *plan_getmap=NULL;
+static void *plan_name2id=NULL;
+static Oid current_cfg_id=0;
+
+void
+init_cfg(Oid id, TSCfgInfo *cfg) {
+   Oid arg[2]={ OIDOID, OIDOID };
+   bool isnull;
+   Datum pars[2]={ ObjectIdGetDatum(id), ObjectIdGetDatum(id) } ;
+   int stat,i,j;
+   text *ptr;
+   text *prsname=NULL;
+   MemoryContext   oldcontext;
+
+   memset(cfg,0,sizeof(TSCfgInfo));
+   SPI_connect();
+   if ( !plan_getcfg ) {
+       plan_getcfg = SPI_saveplan( SPI_prepare( "select prs_name from pg_ts_cfg where oid = $1" , 1, arg ) );
+       if ( !plan_getcfg ) 
+           ts_error(ERROR, "SPI_prepare() failed");
+   }
+
+   stat = SPI_execp(plan_getcfg, pars, " ", 1);
+   if ( stat < 0 )
+       ts_error (ERROR, "SPI_execp return %d", stat);
+   if ( SPI_processed > 0 ) {
+       prsname = (text*) DatumGetPointer( 
+           SPI_getbinval(SPI_tuptable->vals[0], SPI_tuptable->tupdesc, 1, &isnull) 
+       );
+       oldcontext = MemoryContextSwitchTo(TopMemoryContext);
+       prsname = ptextdup( prsname );
+       MemoryContextSwitchTo(oldcontext);
+       
+       cfg->id=id;
+   } else 
+       ts_error(ERROR, "No tsearch cfg with id %d", id);
+
+   arg[0]=TEXTOID;
+   if ( !plan_getmap ) {
+       plan_getmap = SPI_saveplan( SPI_prepare( "select lt.tokid, pg_ts_cfgmap.dict_name from pg_ts_cfgmap, pg_ts_cfg, token_type( $1 ) as lt where lt.alias = pg_ts_cfgmap.tok_alias and pg_ts_cfgmap.ts_name = pg_ts_cfg.ts_name and pg_ts_cfg.oid= $2 order by lt.tokid desc;" , 2, arg ) );
+       if ( !plan_getmap )
+           ts_error(ERROR, "SPI_prepare() failed");
+   }
+
+   pars[0]=PointerGetDatum( prsname );
+   stat = SPI_execp(plan_getmap, pars, " ", 0);
+   if ( stat < 0 )
+       ts_error (ERROR, "SPI_execp return %d", stat);
+   if ( SPI_processed <= 0 )
+       ts_error(ERROR, "No parser with id %d", id);
+
+   for(i=0;i
+       int lexid = DatumGetInt32(SPI_getbinval(SPI_tuptable->vals[i], SPI_tuptable->tupdesc, 1, &isnull));
+       ArrayType *toasted_a = (ArrayType*)PointerGetDatum(SPI_getbinval(SPI_tuptable->vals[i], SPI_tuptable->tupdesc, 2, &isnull));
+       ArrayType *a;
+
+       if ( !cfg->map ) {
+           cfg->len=lexid+1;
+           cfg->map = (ListDictionary*)malloc( sizeof(ListDictionary)*cfg->len );
+           if ( !cfg->map )
+               ts_error(ERROR,"No memory");
+           memset( cfg->map, 0, sizeof(ListDictionary)*cfg->len );
+       }
+
+       if (isnull)
+           continue;
+
+       a=(ArrayType*)PointerGetDatum( PG_DETOAST_DATUM( DatumGetPointer(toasted_a) ) );
+       
+       if ( ARR_NDIM(a) != 1 )
+           ts_error(ERROR,"Wrong dimension");
+       if ( ARRNELEMS(a) < 1 )
+           continue;
+
+       cfg->map[lexid].len=ARRNELEMS(a);
+       cfg->map[lexid].dict_id=(Datum*)malloc( sizeof(Datum)*cfg->map[lexid].len );
+       memset(cfg->map[lexid].dict_id,0,sizeof(Datum)*cfg->map[lexid].len );
+       ptr=(text*)ARR_DATA_PTR(a);
+       oldcontext = MemoryContextSwitchTo(TopMemoryContext);
+       for(j=0;jmap[lexid].len;j++) {
+           cfg->map[lexid].dict_id[j] = PointerGetDatum(ptextdup(ptr));
+           ptr=NEXTVAL(ptr);
+       } 
+       MemoryContextSwitchTo(oldcontext);
+
+       if ( a != toasted_a ) 
+           pfree(a);
+   }
+   
+   SPI_finish();
+   cfg->prs_id = name2id_prs( prsname );
+   pfree(prsname);
+   for(i=0;ilen;i++) {
+       for(j=0;jmap[i].len;j++) {
+           ptr = (text*)DatumGetPointer( cfg->map[i].dict_id[j] );
+           cfg->map[i].dict_id[j] = ObjectIdGetDatum( name2id_dict(ptr) );
+           pfree(ptr);
+       }
+   }
+}
+
+typedef struct {
+   TSCfgInfo   *last_cfg;
+   int     len;
+   int     reallen;
+   TSCfgInfo   *list;
+   SNMap       name2id_map;
+} CFGList;
+
+static CFGList CList = {NULL,0,0,NULL,{0,0,NULL}};
+
+void
+reset_cfg(void) {
+        freeSNMap( &(CList.name2id_map) );
+        if ( CList.list ) {
+       int i,j;
+       for(i=0;i
+           if ( CList.list[i].map ) {
+               for(j=0;j
+                   if ( CList.list[i].map[j].dict_id )
+                       free(CList.list[i].map[j].dict_id);
+               free( CList.list[i].map );
+           }
+                free(CList.list);
+   }
+        memset(&CList,0,sizeof(CFGList));
+}
+
+static int
+comparecfg(const void *a, const void *b) {
+   return ((TSCfgInfo*)a)->id - ((TSCfgInfo*)b)->id;
+}
+
+TSCfgInfo *
+findcfg(Oid id) {
+   /* last used cfg */
+   if ( CList.last_cfg && CList.last_cfg->id==id )
+       return CList.last_cfg;
+
+   /* already used cfg */
+   if ( CList.len != 0 ) {
+       TSCfgInfo key;
+       key.id=id;
+       CList.last_cfg = bsearch(&key, CList.list, CList.len, sizeof(TSCfgInfo), comparecfg);
+       if ( CList.last_cfg != NULL )
+           return CList.last_cfg;
+   }
+
+   /* last chance */
+   if ( CList.len==CList.reallen ) {
+       TSCfgInfo *tmp;
+       int reallen = ( CList.reallen ) ? 2*CList.reallen : 16;
+       tmp=(TSCfgInfo*)realloc(CList.list,sizeof(TSCfgInfo)*reallen);
+       if ( !tmp ) 
+           ts_error(ERROR,"No memory");
+       CList.reallen=reallen;
+       CList.list=tmp;
+   }
+   CList.last_cfg=&(CList.list[CList.len]);
+   init_cfg(id, CList.last_cfg);
+   CList.len++;
+   qsort(CList.list, CList.len, sizeof(TSCfgInfo), comparecfg);
+   return findcfg(id); /* qsort changed order!! */;
+}
+
+
+Oid
+name2id_cfg(text *name) {
+   Oid arg[1]={ TEXTOID };
+   bool isnull;
+   Datum pars[1]={ PointerGetDatum(name) };
+   int stat;
+   Oid id=findSNMap_t( &(CList.name2id_map), name );
+   
+   if ( id ) 
+       return id;
+   
+   SPI_connect();
+   if ( !plan_name2id ) {
+       plan_name2id = SPI_saveplan( SPI_prepare( "select oid from pg_ts_cfg where ts_name = $1" , 1, arg ) );
+       if ( !plan_name2id ) 
+           elog(ERROR, "SPI_prepare() failed");
+   }
+
+   stat = SPI_execp(plan_name2id, pars, " ", 1);
+   if ( stat < 0 )
+       elog (ERROR, "SPI_execp return %d", stat);
+   if ( SPI_processed > 0 ) {
+       id=DatumGetObjectId( SPI_getbinval(SPI_tuptable->vals[0], SPI_tuptable->tupdesc, 1, &isnull) );
+       if ( isnull ) 
+           elog(ERROR, "Null id for tsearch config");
+   } else 
+       elog(ERROR, "No tsearch config");
+   SPI_finish();
+   addSNMap_t( &(CList.name2id_map), name, id );
+   return id;
+}
+
+
+void 
+parsetext_v2(TSCfgInfo *cfg, PRSTEXT * prs, char *buf, int4 buflen) {
+   int type, lenlemm, i;
+   char    *lemm=NULL;
+   WParserInfo *prsobj = findprs(cfg->prs_id);
+
+   prsobj->prs=(void*)DatumGetPointer(
+       FunctionCall2(
+           &(prsobj->start_info),
+           PointerGetDatum(buf),
+           Int32GetDatum(buflen)
+       )
+   );
+
+   while( ( type=DatumGetInt32(FunctionCall3(
+           &(prsobj->getlexeme_info),
+           PointerGetDatum(prsobj->prs),
+           PointerGetDatum(&lemm),
+           PointerGetDatum(&lenlemm))) ) != 0 ) {
+
+       if ( lenlemm >= MAXSTRLEN )
+           elog(ERROR, "Word is too long");
+
+
+       if ( type >= cfg->len ) /* skip this type of lexem */
+           continue; 
+
+       for(i=0;imap[type].len;i++) {
+           DictInfo    *dict=finddict( DatumGetObjectId(cfg->map[type].dict_id[i]) );
+           char    **norms, **ptr;
+   
+           norms = ptr = (char**)DatumGetPointer(
+               FunctionCall3(
+                   &(dict->lexize_info),
+                   PointerGetDatum(dict->dictionary),
+                   PointerGetDatum(lemm),
+                   PointerGetDatum(lenlemm)
+               )
+           );
+           if ( !norms ) /* dictionary doesn't know this lexem */
+               continue;
+
+           prs->pos++; /*set pos*/
+
+           while( *ptr ) {
+               if (prs->curwords == prs->lenwords) {
+                   prs->lenwords *= 2;
+                   prs->words = (WORD *) repalloc((void *) prs->words, prs->lenwords * sizeof(WORD));
+               }
+
+               prs->words[prs->curwords].len = strlen(*ptr);
+               prs->words[prs->curwords].word = *ptr;
+               prs->words[prs->curwords].alen = 0;
+               prs->words[prs->curwords].pos.pos = LIMITPOS(prs->pos);
+               ptr++;
+               prs->curwords++;
+           }
+           pfree(norms);
+           break; /* lexem already normalized or is stop word*/
+       }
+   }
+
+   FunctionCall1(
+       &(prsobj->end_info),
+       PointerGetDatum(prsobj->prs)
+   );
+}
+
+static void
+hladdword(HLPRSTEXT * prs, char *buf, int4 buflen, int type) {
+   while (prs->curwords >= prs->lenwords) {
+       prs->lenwords *= 2;
+       prs->words = (HLWORD *) repalloc((void *) prs->words, prs->lenwords * sizeof(HLWORD));
+   }
+   memset( &(prs->words[prs->curwords]), 0, sizeof(HLWORD) ); 
+   prs->words[prs->curwords].type = (uint8)type;
+   prs->words[prs->curwords].len = buflen; 
+   prs->words[prs->curwords].word = palloc(buflen);
+   memcpy(prs->words[prs->curwords].word, buf, buflen);
+   prs->curwords++;    
+}
+
+static void
+hlfinditem(HLPRSTEXT * prs, QUERYTYPE *query, char *buf, int buflen ) {
+   int i;
+   ITEM    *item=GETQUERY(query);
+   HLWORD  *word=&( prs->words[prs->curwords-1] );
+
+   while (prs->curwords + query->size >= prs->lenwords) {
+       prs->lenwords *= 2;
+       prs->words = (HLWORD *) repalloc((void *) prs->words, prs->lenwords * sizeof(HLWORD));
+   }
+
+   for(i=0; isize; i++) { 
+       if ( item->type == VAL && item->length == buflen && strncmp( GETOPERAND(query) + item->distance, buf, buflen )==0 ) {
+           if ( word->item ) {
+               memcpy( &(prs->words[prs->curwords]), word, sizeof(HLWORD) );
+               prs->words[prs->curwords].item=item;
+               prs->words[prs->curwords].repeated=1;
+               prs->curwords++;
+           } else 
+               word->item=item;    
+       }
+       item++;
+   }
+}
+
+void 
+hlparsetext(TSCfgInfo *cfg, HLPRSTEXT * prs, QUERYTYPE *query, char *buf, int4 buflen) {
+   int type, lenlemm, i;
+   char    *lemm=NULL;
+   WParserInfo *prsobj = findprs(cfg->prs_id);
+
+   prsobj->prs=(void*)DatumGetPointer(
+       FunctionCall2(
+           &(prsobj->start_info),
+           PointerGetDatum(buf),
+           Int32GetDatum(buflen)
+       )
+   );
+
+   while( ( type=DatumGetInt32(FunctionCall3(
+           &(prsobj->getlexeme_info),
+           PointerGetDatum(prsobj->prs),
+           PointerGetDatum(&lemm),
+           PointerGetDatum(&lenlemm))) ) != 0 ) {
+
+       if ( lenlemm >= MAXSTRLEN )
+           elog(ERROR, "Word is too long");
+
+       hladdword(prs,lemm,lenlemm,type);
+
+       if ( type >= cfg->len ) 
+           continue; 
+
+       for(i=0;imap[type].len;i++) {
+           DictInfo    *dict=finddict( DatumGetObjectId(cfg->map[type].dict_id[i]) );
+           char    **norms, **ptr;
+   
+           norms = ptr = (char**)DatumGetPointer(
+               FunctionCall3(
+                   &(dict->lexize_info),
+                   PointerGetDatum(dict->dictionary),
+                   PointerGetDatum(lemm),
+                   PointerGetDatum(lenlemm)
+               )
+           );
+           if ( !norms ) /* dictionary doesn't know this lexem */
+               continue;
+
+           while( *ptr ) {
+               hlfinditem(prs,query,*ptr,strlen(*ptr));
+               pfree(*ptr);
+               ptr++;
+           }
+           pfree(norms);
+           break; /* lexem already normalized or is stop word*/
+       }
+   }
+
+   FunctionCall1(
+       &(prsobj->end_info),
+       PointerGetDatum(prsobj->prs)
+   );
+}
+
+text* 
+genhl(HLPRSTEXT * prs) {
+   text *out;
+   int len=128;
+   char *ptr;
+   HLWORD  *wrd=prs->words;
+
+   out = (text*)palloc( len );
+   ptr=((char*)out) + VARHDRSZ;
+
+   while( wrd - prs->words < prs->curwords ) {
+       while (  wrd->len + prs->stopsellen + prs->startsellen + (ptr - ((char*)out)) >= len ) {
+           int dist = ptr - ((char*)out);
+           len*= 2;
+           out = (text *) repalloc(out, len);
+           ptr=((char*)out) + dist;
+       }
+
+       if ( wrd->in && !wrd->skip && !wrd->repeated ) {
+           if ( wrd->replace ) {
+               *ptr=' ';
+               ptr++;
+           } else {
+               if (wrd->selected) {
+                   memcpy(ptr,prs->startsel,prs->startsellen);
+                   ptr+=prs->startsellen;
+               }
+               memcpy(ptr,wrd->word,wrd->len);
+               ptr+=wrd->len;
+               if (wrd->selected) {
+                   memcpy(ptr,prs->stopsel,prs->stopsellen);
+                   ptr+=prs->stopsellen;
+               }
+           }
+       }
+
+       if ( !wrd->repeated )
+           pfree(wrd->word);
+
+       wrd++;
+   }
+
+   VARATT_SIZEP(out)=ptr - ((char*)out);
+   return out; 
+}
+
+int  
+get_currcfg(void) {
+   Oid arg[1]={ TEXTOID };
+   const char *curlocale;
+   Datum pars[1];
+   bool isnull;
+   int stat;
+
+   if ( current_cfg_id > 0 )
+       return current_cfg_id;
+
+   SPI_connect();
+   if ( !plan_getcfg_bylocale ) {
+       plan_getcfg_bylocale=SPI_saveplan( SPI_prepare( "select oid from pg_ts_cfg where locale = $1 ", 1, arg ) );
+       if ( !plan_getcfg_bylocale )
+           elog(ERROR, "SPI_prepare() failed");
+   }
+
+   curlocale = setlocale(LC_CTYPE, NULL);
+   pars[0] = PointerGetDatum( char2text((char*)curlocale) );
+   stat = SPI_execp(plan_getcfg_bylocale, pars, " ", 1);
+
+   if ( stat < 0 )
+       elog (ERROR, "SPI_execp return %d", stat);
+   if ( SPI_processed > 0 )
+       current_cfg_id = DatumGetObjectId( SPI_getbinval(SPI_tuptable->vals[0], SPI_tuptable->tupdesc, 1, &isnull) );
+   else 
+       elog(ERROR,"Can't find tsearch config by locale");
+
+   pfree(DatumGetPointer(pars[0]));
+   SPI_finish();
+   return current_cfg_id;
+}
+
+PG_FUNCTION_INFO_V1(set_curcfg);
+Datum set_curcfg(PG_FUNCTION_ARGS);
+Datum
+set_curcfg(PG_FUNCTION_ARGS) {
+        findcfg(PG_GETARG_OID(0));
+        current_cfg_id=PG_GETARG_OID(0);
+        PG_RETURN_VOID();
+}
+                
+PG_FUNCTION_INFO_V1(set_curcfg_byname);
+Datum set_curcfg_byname(PG_FUNCTION_ARGS);
+Datum
+set_curcfg_byname(PG_FUNCTION_ARGS) {
+        text *name=PG_GETARG_TEXT_P(0);
+   
+        DirectFunctionCall1(
+                set_curcfg,
+                ObjectIdGetDatum( name2id_cfg(name) )
+        );
+        PG_FREE_IF_COPY(name, 0);
+        PG_RETURN_VOID();      
+}       
+
+PG_FUNCTION_INFO_V1(show_curcfg);
+Datum show_curcfg(PG_FUNCTION_ARGS);
+Datum
+show_curcfg(PG_FUNCTION_ARGS) {
+   PG_RETURN_OID( get_currcfg() ); 
+}
+
+PG_FUNCTION_INFO_V1(reset_tsearch);
+Datum reset_tsearch(PG_FUNCTION_ARGS);
+Datum
+reset_tsearch(PG_FUNCTION_ARGS) {
+   ts_error(NOTICE,"TSearch cache cleaned");
+   PG_RETURN_VOID(); 
+}


diff --git a/contrib/tsearch2/ts_cfg.h b/contrib/tsearch2/ts_cfg.h

new file mode 100644 (file)

index 0000000..01006c1


--- /dev/null
+++ b/contrib/tsearch2/ts_cfg.h
@@ -0,0 +1,68 @@
+#ifndef __TS_CFG_H__
+#define __TS_CFG_H__
+#include "postgres.h"
+#include "query.h"
+
+typedef struct {
+   int len;
+   Datum   *dict_id;
+} ListDictionary;
+
+typedef struct {
+   Oid id;
+   Oid prs_id;
+   int len;
+   ListDictionary  *map;   
+}  TSCfgInfo;
+
+Oid name2id_cfg(text *name);
+TSCfgInfo * findcfg(Oid id);
+void init_cfg(Oid id, TSCfgInfo *cfg);
+void reset_cfg(void);
+
+typedef struct {
+        uint16          len;
+   union {
+       uint16      pos;
+       uint16      *apos;
+   } pos;
+        char       *word;
+   uint32  alen;
+}       WORD;
+   
+typedef struct {
+        WORD       *words;
+        int4            lenwords;
+        int4            curwords;
+   int4        pos;
+}       PRSTEXT;
+
+typedef struct {
+        uint16    len;
+   uint8    selected:1,
+         in:1,
+         skip:1,
+         replace:1,
+         repeated:1;
+   uint8   type;
+        char      *word;
+   ITEM      *item;
+}       HLWORD;
+   
+typedef struct {
+        HLWORD       *words;
+        int4            lenwords;
+        int4            curwords;
+        char           *startsel;
+        char            *stopsel;
+        int2            startsellen;
+        int2            stopsellen;
+}       HLPRSTEXT;
+
+void hlparsetext(TSCfgInfo *cfg, HLPRSTEXT * prs, QUERYTYPE *query, char *buf, int4 buflen);
+text* genhl(HLPRSTEXT * prs);
+
+void parsetext_v2(TSCfgInfo *cfg, PRSTEXT * prs, char *buf, int4 buflen);
+int  get_currcfg(void);
+
+#endif


diff --git a/contrib/tsearch2/ts_stat.c b/contrib/tsearch2/ts_stat.c

new file mode 100644 (file)

index 0000000..9099981


--- /dev/null
+++ b/contrib/tsearch2/ts_stat.c
@@ -0,0 +1,412 @@
+/*
+ * stat functions
+ */
+
+#include "tsvector.h"
+#include "ts_stat.h"
+#include "funcapi.h"
+#include "catalog/pg_type.h"
+#include "executor/spi.h"
+#include "common.h"
+
+PG_FUNCTION_INFO_V1(tsstat_in);
+Datum           tsstat_in(PG_FUNCTION_ARGS);
+Datum           
+tsstat_in(PG_FUNCTION_ARGS) {
+   tsstat *stat=palloc(STATHDRSIZE);
+   stat->len=STATHDRSIZE;
+   stat->size=0;
+   PG_RETURN_POINTER(stat);
+}
+
+PG_FUNCTION_INFO_V1(tsstat_out);
+Datum           tsstat_out(PG_FUNCTION_ARGS);
+Datum           
+tsstat_out(PG_FUNCTION_ARGS) {
+   elog(ERROR,"Unimplemented");
+   PG_RETURN_NULL();
+}
+
+static WordEntry**
+SEI_realloc( WordEntry** in, uint32 *len ) {
+   if ( *len==0 || in==NULL ) {
+       *len=8;
+       in=palloc( sizeof(WordEntry*)* (*len) );
+   } else {
+       *len *= 2;
+       in=repalloc( in, sizeof(WordEntry*)* (*len) );
+   }
+   return in;
+}
+
+static int
+compareStatWord(StatEntry *a, WordEntry *b, tsstat *stat, tsvector *txt) {
+   if ( a->len == b->len ) 
+       return strncmp(
+           STATSTRPTR(stat) + a->pos,
+           STRPTR(txt) + b->pos,
+           a->len
+       );
+   return ( a->len > b->len ) ? 1 : -1;
+}
+
+static tsstat*
+formstat(tsstat *stat, tsvector *txt, WordEntry** entry, uint32 len) {
+   tsstat  *newstat;
+   uint32 totallen, nentry;
+   uint32  slen=0;
+   WordEntry   **ptr=entry;
+   char    *curptr;
+   StatEntry   *sptr,*nptr;
+
+   while(ptr-entry
+       slen += (*ptr)->len;
+       ptr++;
+   }
+
+   nentry=stat->size + len;
+   slen+=STATSTRSIZE(stat);
+   totallen=CALCSTATSIZE(nentry,slen);
+   newstat=palloc(totallen);
+   newstat->len=totallen;
+   newstat->size=nentry;
+
+   memcpy(STATSTRPTR(newstat), STATSTRPTR(stat), STATSTRSIZE(stat));
+   curptr=STATSTRPTR(newstat) + STATSTRSIZE(stat);
+
+   ptr=entry;
+   sptr=STATPTR(stat);
+   nptr=STATPTR(newstat);
+
+   if ( len == 1 ) {
+       StatEntry *StopLow = STATPTR(stat);
+       StatEntry *StopHigh = (StatEntry*)STATSTRPTR(stat);
+
+       while (StopLow < StopHigh) {
+           sptr=StopLow + (StopHigh - StopLow) / 2;
+           if ( compareStatWord(sptr,*ptr,stat,txt) < 0 )
+               StopLow = sptr + 1;
+           else
+               StopHigh = sptr; 
+       }
+       nptr =STATPTR(newstat) + (StopLow-STATPTR(stat));
+       memcpy( STATPTR(newstat), STATPTR(stat), sizeof(StatEntry) * (StopLow-STATPTR(stat)) );
+       nptr->nentry=POSDATALEN(txt,*ptr);
+       if ( nptr->nentry==0 )
+               nptr->nentry=1; 
+       nptr->ndoc=1;
+       nptr->len=(*ptr)->len;
+       memcpy(curptr, STRPTR(txt) + (*ptr)->pos, nptr->len);
+       nptr->pos = curptr - STATSTRPTR(newstat);
+       memcpy( nptr+1, StopLow, sizeof(StatEntry) * ( ((StatEntry*)STATSTRPTR(stat))-StopLow ) );
+   } else {
+       while( sptr-STATPTR(stat) < stat->size && ptr-entry
+           if ( compareStatWord(sptr,*ptr,stat,txt) < 0 ) {
+               memcpy(nptr, sptr, sizeof(StatEntry));
+               sptr++;
+           } else {
+               nptr->nentry=POSDATALEN(txt,*ptr);
+               if ( nptr->nentry==0 )
+                   nptr->nentry=1; 
+               nptr->ndoc=1;
+               nptr->len=(*ptr)->len;
+               memcpy(curptr, STRPTR(txt) + (*ptr)->pos, nptr->len);
+               nptr->pos = curptr - STATSTRPTR(newstat);
+               curptr += nptr->len;
+               ptr++;
+           }
+           nptr++;
+       }
+
+       memcpy( nptr, sptr, sizeof(StatEntry)*( stat->size - (sptr-STATPTR(stat)) ) ); 
+       
+       while(ptr-entry
+           nptr->nentry=POSDATALEN(txt,*ptr);
+           if ( nptr->nentry==0 )
+               nptr->nentry=1; 
+           nptr->ndoc=1;
+           nptr->len=(*ptr)->len;
+           memcpy(curptr, STRPTR(txt) + (*ptr)->pos, nptr->len);
+           nptr->pos = curptr - STATSTRPTR(newstat);
+           curptr += nptr->len;
+           ptr++; nptr++;
+       }
+   }
+
+   return newstat;
+} 
+
+PG_FUNCTION_INFO_V1(ts_accum);
+Datum           ts_accum(PG_FUNCTION_ARGS);
+Datum 
+ts_accum(PG_FUNCTION_ARGS) {
+   tsstat *newstat,*stat= (tsstat*)PG_GETARG_POINTER(0);
+   tsvector  *txt = (tsvector *) PG_DETOAST_DATUM(PG_GETARG_DATUM(1));
+   WordEntry   **newentry=NULL;
+   uint32  len=0, cur=0;
+   StatEntry   *sptr;
+   WordEntry   *wptr;
+
+   if ( stat==NULL || PG_ARGISNULL(0) ) { /* Init in first */ 
+       stat=palloc(STATHDRSIZE);
+       stat->len=STATHDRSIZE;
+       stat->size=0;
+   }
+
+   /* simple check of correctness */
+   if ( txt==NULL || PG_ARGISNULL(1) || txt->size==0 ) {
+       PG_FREE_IF_COPY(txt,1); 
+       PG_RETURN_POINTER(stat);
+   }
+
+   sptr=STATPTR(stat);
+   wptr=ARRPTR(txt);
+
+   if ( stat->size < 100*txt->size ) { /* merge */
+       while( sptr-STATPTR(stat) < stat->size && wptr-ARRPTR(txt) < txt->size ) {
+           int cmp = compareStatWord(sptr,wptr,stat,txt);
+           if ( cmp<0 ) {
+               sptr++;
+           } else if ( cmp==0 ) {
+               int n=POSDATALEN(txt,wptr);
+   
+               if (n==0) n=1;
+               sptr->ndoc++;
+               sptr->nentry +=n ;
+               sptr++; wptr++;
+           } else {
+               if ( cur==len )
+                   newentry=SEI_realloc(newentry, &len);
+               newentry[cur]=wptr;
+               wptr++; cur++;
+           }
+       }
+
+       while( wptr-ARRPTR(txt) < txt->size ) {
+           if ( cur==len )
+               newentry=SEI_realloc(newentry, &len);
+           newentry[cur]=wptr;
+           wptr++; cur++;
+       }
+   } else { /* search */
+       while( wptr-ARRPTR(txt) < txt->size ) {
+           StatEntry *StopLow = STATPTR(stat);
+           StatEntry *StopHigh = (StatEntry*)STATSTRPTR(stat);
+           int cmp;
+
+           while (StopLow < StopHigh) {
+               sptr=StopLow + (StopHigh - StopLow) / 2;
+               cmp =  compareStatWord(sptr,wptr,stat,txt);
+               if (cmp==0) {
+                   int n=POSDATALEN(txt,wptr);
+                   if (n==0) n=1;
+                   sptr->ndoc++;
+                   sptr->nentry +=n ;
+                   break;
+               } else if ( cmp < 0 )
+                   StopLow = sptr + 1;
+               else
+                   StopHigh = sptr; 
+           }
+       
+           if ( StopLow >= StopHigh ) { /* not found */
+               if ( cur==len )
+                   newentry=SEI_realloc(newentry, &len);
+               newentry[cur]=wptr;
+               cur++;
+           }
+           wptr++;
+       }   
+   }
+
+   
+   if ( cur==0 ) { /* no new words */ 
+       PG_FREE_IF_COPY(txt,1);
+       PG_RETURN_POINTER(stat);
+   }
+
+   newstat = formstat(stat, txt, newentry, cur);
+   pfree(newentry);
+   PG_FREE_IF_COPY(txt,1);
+   /* pfree(stat); */
+
+   PG_RETURN_POINTER(newstat);
+}
+
+typedef struct {
+   uint32  cur;
+   tsvector *stat;
+} StatStorage;
+
+static void
+ts_setup_firstcall(FuncCallContext  *funcctx, tsstat *stat) {
+   TupleDesc            tupdesc;
+   MemoryContext     oldcontext;
+   StatStorage     *st;
+   
+   oldcontext = MemoryContextSwitchTo(funcctx->multi_call_memory_ctx);
+   st=palloc( sizeof(StatStorage) );
+   st->cur=0;
+   st->stat=palloc( stat->len );
+   memcpy(st->stat, stat, stat->len);
+   funcctx->user_fctx = (void*)st;
+   tupdesc = RelationNameGetTupleDesc("statinfo");
+   funcctx->slot = TupleDescGetSlot(tupdesc);
+   funcctx->attinmeta = TupleDescGetAttInMetadata(tupdesc);
+   MemoryContextSwitchTo(oldcontext);
+}
+
+
+static Datum
+ts_process_call(FuncCallContext  *funcctx) {
+   StatStorage     *st;
+   st=(StatStorage*)funcctx->user_fctx;
+
+   if ( st->cur < st->stat->size ) {
+       Datum result;
+       char* values[3];
+       char    ndoc[16];
+       char    nentry[16];
+       StatEntry *entry=STATPTR(st->stat) + st->cur;
+       HeapTuple    tuple;
+
+       values[1]=ndoc;
+       sprintf(ndoc,"%d",entry->ndoc);
+       values[2]=nentry;
+       sprintf(nentry,"%d",entry->nentry);
+       values[0]=palloc( entry->len+1 );
+       memcpy( values[0], STATSTRPTR(st->stat)+entry->pos, entry->len);
+       (values[0])[entry->len]='\0';
+
+       tuple = BuildTupleFromCStrings(funcctx->attinmeta, values);
+       result = TupleGetDatum(funcctx->slot, tuple);
+
+       pfree(values[0]);
+       st->cur++;
+       return result;  
+   } else {
+       pfree(st->stat);
+       pfree(st);
+   }
+   
+   return (Datum)0;
+}
+
+PG_FUNCTION_INFO_V1(ts_accum_finish);
+Datum           ts_accum_finish(PG_FUNCTION_ARGS);
+Datum 
+ts_accum_finish(PG_FUNCTION_ARGS) {
+   FuncCallContext  *funcctx;
+   Datum result;
+
+   if (SRF_IS_FIRSTCALL()) {
+       funcctx = SRF_FIRSTCALL_INIT();
+       ts_setup_firstcall(funcctx, (tsstat*)PG_GETARG_POINTER(0) );
+   }
+
+   funcctx = SRF_PERCALL_SETUP();
+   if (  (result=ts_process_call(funcctx)) != (Datum)0 )
+       SRF_RETURN_NEXT(funcctx, result);
+   SRF_RETURN_DONE(funcctx);
+}
+
+static Oid tiOid=InvalidOid;
+static void 
+get_ti_Oid(void) {
+   int ret;
+   bool isnull; 
+
+   if ( (ret = SPI_exec("select oid from pg_type where typname='tsvector'",1)) < 0 )   
+       elog(ERROR, "SPI_exec to get tsvector oid returns %d", ret);
+
+   if ( SPI_processed<0 )
+       elog(ERROR, "There is no tsvector type");
+   tiOid = DatumGetObjectId( SPI_getbinval(SPI_tuptable->vals[0], SPI_tuptable->tupdesc, 1, &isnull) );
+   if ( tiOid==InvalidOid )
+       elog(ERROR, "tsvector type has InvalidOid");
+}
+
+static tsstat*
+ts_stat_sql(text *txt) {
+   char *query=text2char(txt);
+   int i;
+   tsstat *newstat,*stat;
+   bool isnull;
+   Portal portal;
+   void    *plan;
+
+   if ( tiOid==InvalidOid ) 
+       get_ti_Oid();
+
+   if ( (plan = SPI_prepare(query,0,NULL))==NULL )
+       elog(ERROR, "SPI_prepare('%s') returns NULL",query);
+
+   if ( (portal = SPI_cursor_open(NULL, plan, NULL, NULL)) == NULL )
+       elog(ERROR, "SPI_cursor_open('%s') returns NULL",query);
+
+   SPI_cursor_fetch(portal, true, 100);
+
+   if ( SPI_tuptable->tupdesc->natts != 1 )
+       elog(ERROR, "Number of fields doesn't equal to 1");
+
+   if ( SPI_gettypeid(SPI_tuptable->tupdesc, 1) != tiOid )
+       elog(ERROR, "Column isn't of tsvector type");
+
+   stat=palloc(STATHDRSIZE);
+   stat->len=STATHDRSIZE;
+   stat->size=0;
+
+   while(SPI_processed>0) {
+       for(i=0;i
+           Datum data=SPI_getbinval(SPI_tuptable->vals[i], SPI_tuptable->tupdesc, 1, &isnull);
+
+           if ( !isnull ) {
+               newstat = (tsstat*)DatumGetPointer(DirectFunctionCall2(
+                   ts_accum,
+                   PointerGetDatum(stat),
+                   data
+               ));
+               if ( stat!=newstat && stat )
+                   pfree(stat);
+               stat=newstat;
+           }
+       } 
+
+       SPI_freetuptable(SPI_tuptable);
+       SPI_cursor_fetch(portal, true, 100);        
+   }   
+
+   SPI_freetuptable(SPI_tuptable);
+   SPI_cursor_close(portal);
+   SPI_freeplan(plan);
+   pfree(query);
+
+   return stat;    
+}
+
+PG_FUNCTION_INFO_V1(ts_stat);
+Datum           ts_stat(PG_FUNCTION_ARGS);
+Datum 
+ts_stat(PG_FUNCTION_ARGS) {
+   FuncCallContext  *funcctx;
+   Datum result;
+
+   if (SRF_IS_FIRSTCALL()) {
+       tsstat *stat;
+       text    *txt=PG_GETARG_TEXT_P(0);
+   
+       funcctx = SRF_FIRSTCALL_INIT();
+       SPI_connect();
+       stat = ts_stat_sql(txt);
+       PG_FREE_IF_COPY(txt,0); 
+       ts_setup_firstcall(funcctx, stat );
+       SPI_finish();
+   }
+
+   funcctx = SRF_PERCALL_SETUP();
+   if (  (result=ts_process_call(funcctx)) != (Datum)0 )
+       SRF_RETURN_NEXT(funcctx, result);
+   SRF_RETURN_DONE(funcctx);
+}
+
+


diff --git a/contrib/tsearch2/ts_stat.h b/contrib/tsearch2/ts_stat.h

new file mode 100644 (file)

index 0000000..c32b17a


--- /dev/null
+++ b/contrib/tsearch2/ts_stat.h
@@ -0,0 +1,32 @@
+#ifndef __TXTIDX_STAT_H__
+#define __TXTIDX_STAT_H__
+
+#include "postgres.h"
+
+#include "access/gist.h"
+#include "access/itup.h"
+#include "utils/elog.h"
+#include "utils/palloc.h"
+#include "utils/builtins.h"
+#include "storage/bufpage.h"
+
+typedef struct {
+   uint32  len;
+   uint32  pos;
+   uint32  ndoc;   
+   uint32  nentry; 
+}  StatEntry;
+
+typedef struct {
+   int4        len;
+   int4        size;
+   char        data[1];
+}  tsstat;
+
+#define STATHDRSIZE (sizeof(int4)*2)
+#define CALCSTATSIZE(x, lenstr) ( x * sizeof(StatEntry) + STATHDRSIZE + lenstr )
+#define STATPTR(x) ( (StatEntry*) ( (char*)x + STATHDRSIZE ) )
+#define STATSTRPTR(x)  ( (char*)x + STATHDRSIZE + ( sizeof(StatEntry) * ((tsvector*)x)->size ) )
+#define STATSTRSIZE(x) ( ((tsvector*)x)->len - STATHDRSIZE - ( sizeof(StatEntry) * ((tsvector*)x)->size ) )
+
+#endif


diff --git a/contrib/tsearch2/tsearch.sql._in b/contrib/tsearch2/tsearch.sql._in

new file mode 100644 (file)

index 0000000..91ffbc8


--- /dev/null
+++ b/contrib/tsearch2/tsearch.sql._in
@@ -0,0 +1,674 @@
+-- Adjust this setting to control where the objects get CREATEd.
+SET search_path = public;
+
+BEGIN;
+
+--dict conf
+CREATE TABLE pg_ts_dict (
+   dict_name   text not null primary key,
+   dict_init   oid,
+   dict_initoption text,
+   dict_lexize oid not null,
+   dict_comment    text
+) with oids;
+
+--dict interface
+CREATE FUNCTION lexize(oid, text) 
+   returns _text
+   as 'MODULE_PATHNAME'
+   language 'C'
+   with (isstrict);
+
+CREATE FUNCTION lexize(text, text)
+        returns _text
+        as 'MODULE_PATHNAME', 'lexize_byname'
+        language 'C'
+        with (isstrict);
+
+CREATE FUNCTION lexize(text)
+        returns _text
+        as 'MODULE_PATHNAME', 'lexize_bycurrent'
+        language 'C'
+        with (isstrict);
+
+CREATE FUNCTION set_curdict(int)
+   returns void
+   as 'MODULE_PATHNAME'
+   language 'C'
+   with (isstrict);
+
+CREATE FUNCTION set_curdict(text)
+   returns void
+   as 'MODULE_PATHNAME', 'set_curdict_byname'
+   language 'C'
+   with (isstrict);
+
+--built-in dictionaries
+CREATE FUNCTION dex_init(text)
+   returns internal
+   as 'MODULE_PATHNAME' 
+   language 'C';
+
+CREATE FUNCTION dex_lexize(internal,internal,int4)
+   returns internal
+   as 'MODULE_PATHNAME'
+   language 'C'
+   with (isstrict);
+
+insert into pg_ts_dict select 
+   'simple', 
+   (select oid from pg_proc where proname='dex_init'),
+   null,
+   (select oid from pg_proc where proname='dex_lexize'),
+   'Simple example of dictionary.'
+;
+    
+CREATE FUNCTION snb_en_init(text)
+   returns internal
+   as 'MODULE_PATHNAME' 
+   language 'C';
+
+CREATE FUNCTION snb_lexize(internal,internal,int4)
+   returns internal
+   as 'MODULE_PATHNAME'
+   language 'C'
+   with (isstrict);
+
+insert into pg_ts_dict select 
+   'en_stem', 
+   (select oid from pg_proc where proname='snb_en_init'),
+   'DATA_PATH/english.stop',
+   (select oid from pg_proc where proname='snb_lexize'),
+   'English Stemmer. Snowball.'
+;
+
+CREATE FUNCTION snb_ru_init(text)
+   returns internal
+   as 'MODULE_PATHNAME' 
+   language 'C';
+
+insert into pg_ts_dict select 
+   'ru_stem', 
+   (select oid from pg_proc where proname='snb_ru_init'),
+   'DATA_PATH/russian.stop',
+   (select oid from pg_proc where proname='snb_lexize'),
+   'Russian Stemmer. Snowball.'
+;
+    
+CREATE FUNCTION spell_init(text)
+   returns internal
+   as 'MODULE_PATHNAME' 
+   language 'C';
+
+CREATE FUNCTION spell_lexize(internal,internal,int4)
+   returns internal
+   as 'MODULE_PATHNAME'
+   language 'C'
+   with (isstrict);
+
+insert into pg_ts_dict select 
+   'ispell_template', 
+   (select oid from pg_proc where proname='spell_init'),
+   null,
+   (select oid from pg_proc where proname='spell_lexize'),
+   'ISpell interface. Must have .dict and .aff files'
+;
+
+CREATE FUNCTION syn_init(text)
+   returns internal
+   as 'MODULE_PATHNAME' 
+   language 'C';
+
+CREATE FUNCTION syn_lexize(internal,internal,int4)
+   returns internal
+   as 'MODULE_PATHNAME'
+   language 'C'
+   with (isstrict);
+
+insert into pg_ts_dict select 
+   'synonym', 
+   (select oid from pg_proc where proname='syn_init'),
+   null,
+   (select oid from pg_proc where proname='syn_lexize'),
+   'Example of synonym dictionary'
+;
+
+--dict conf
+CREATE TABLE pg_ts_parser (
+   prs_name    text not null primary key,
+   prs_start   oid not null,
+   prs_nexttoken   oid not null,
+   prs_end     oid not null,
+   prs_headline    oid not null,
+   prs_lextype oid not null,
+   prs_comment text
+) with oids;
+
+--sql-level interface
+CREATE TYPE tokentype 
+   as (tokid int4, alias text, descr text); 
+
+CREATE FUNCTION token_type(int4)
+   returns setof tokentype
+   as 'MODULE_PATHNAME'
+   language 'C'
+   with (isstrict);
+
+CREATE FUNCTION token_type(text)
+   returns setof tokentype
+   as 'MODULE_PATHNAME', 'token_type_byname'
+   language 'C'
+   with (isstrict);
+
+CREATE FUNCTION token_type()
+   returns setof tokentype
+   as 'MODULE_PATHNAME', 'token_type_current'
+   language 'C'
+   with (isstrict);
+
+CREATE FUNCTION set_curprs(int)
+   returns void
+   as 'MODULE_PATHNAME'
+   language 'C'
+   with (isstrict);
+
+CREATE FUNCTION set_curprs(text)
+   returns void
+   as 'MODULE_PATHNAME', 'set_curprs_byname'
+   language 'C'
+   with (isstrict);
+
+CREATE TYPE tokenout 
+   as (tokid int4, token text);
+
+CREATE FUNCTION parse(oid,text)
+   returns setof tokenout
+   as 'MODULE_PATHNAME'
+   language 'C'
+   with (isstrict);
+ 
+CREATE FUNCTION parse(text,text)
+   returns setof tokenout
+   as 'MODULE_PATHNAME', 'parse_byname'
+   language 'C'
+   with (isstrict);
+ 
+CREATE FUNCTION parse(text)
+   returns setof tokenout
+   as 'MODULE_PATHNAME', 'parse_current'
+   language 'C'
+   with (isstrict);
+ 
+--default parser
+CREATE FUNCTION prsd_start(internal,int4)
+   returns internal
+   as 'MODULE_PATHNAME'
+   language 'C';
+
+CREATE FUNCTION prsd_getlexeme(internal,internal,internal)
+   returns int4
+   as 'MODULE_PATHNAME'
+   language 'C';
+
+CREATE FUNCTION prsd_end(internal)
+   returns void
+   as 'MODULE_PATHNAME'
+   language 'C';
+
+CREATE FUNCTION prsd_lextype(internal)
+   returns internal
+   as 'MODULE_PATHNAME'
+   language 'C';
+
+CREATE FUNCTION prsd_headline(internal,internal,internal)
+   returns internal
+   as 'MODULE_PATHNAME'
+   language 'C';
+
+insert into pg_ts_parser select
+   'default',
+   (select oid from pg_proc where proname='prsd_start'),   
+   (select oid from pg_proc where proname='prsd_getlexeme'),   
+   (select oid from pg_proc where proname='prsd_end'), 
+   (select oid from pg_proc where proname='prsd_headline'),
+   (select oid from pg_proc where proname='prsd_lextype'),
+   'Parser from OpenFTS v0.34'
+;  
+
+--tsearch config
+
+CREATE TABLE pg_ts_cfg (
+   ts_name     text not null primary key,
+   prs_name    text not null,
+   locale      text
+) with oids;
+
+CREATE TABLE pg_ts_cfgmap (
+   ts_name     text not null,
+   tok_alias   text not null,
+   dict_name   text[],
+   primary key (ts_name,tok_alias)
+) with oids;
+
+CREATE FUNCTION set_curcfg(int)
+   returns void
+   as 'MODULE_PATHNAME'
+   language 'C'
+   with (isstrict);
+
+CREATE FUNCTION set_curcfg(text)
+   returns void
+   as 'MODULE_PATHNAME', 'set_curcfg_byname'
+   language 'C'
+   with (isstrict);
+
+CREATE FUNCTION show_curcfg()
+   returns oid
+   as 'MODULE_PATHNAME'
+   language 'C'
+   with (isstrict);
+
+insert into pg_ts_cfg values ('default', 'default','C');
+insert into pg_ts_cfg values ('default_russian', 'default','ru_RU.KOI8-R');
+insert into pg_ts_cfg values ('simple', 'default');
+
+copy pg_ts_cfgmap from stdin;
+default    lword   {en_stem}
+default    nlword  {simple}
+default    word    {simple}
+default    email   {simple}
+default    url {simple}
+default    host    {simple}
+default    sfloat  {simple}
+default    version {simple}
+default    part_hword  {simple}
+default    nlpart_hword    {simple}
+default    lpart_hword {en_stem}
+default    hword   {simple}
+default    lhword  {en_stem}
+default    nlhword {simple}
+default    uri {simple}
+default    file    {simple}
+default    float   {simple}
+default    int {simple}
+default    uint    {simple}
+default_russian    lword   {en_stem}
+default_russian    nlword  {ru_stem}
+default_russian    word    {ru_stem}
+default_russian    email   {simple}
+default_russian    url {simple}
+default_russian    host    {simple}
+default_russian    sfloat  {simple}
+default_russian    version {simple}
+default_russian    part_hword  {simple}
+default_russian    nlpart_hword    {ru_stem}
+default_russian    lpart_hword {en_stem}
+default_russian    hword   {ru_stem}
+default_russian    lhword  {en_stem}
+default_russian    nlhword {ru_stem}
+default_russian    uri {simple}
+default_russian    file    {simple}
+default_russian    float   {simple}
+default_russian    int {simple}
+default_russian    uint    {simple}
+simple lword   {simple}
+simple nlword  {simple}
+simple word    {simple}
+simple email   {simple}
+simple url {simple}
+simple host    {simple}
+simple sfloat  {simple}
+simple version {simple}
+simple part_hword  {simple}
+simple nlpart_hword    {simple}
+simple lpart_hword {simple}
+simple hword   {simple}
+simple lhword  {simple}
+simple nlhword {simple}
+simple uri {simple}
+simple file    {simple}
+simple float   {simple}
+simple int {simple}
+simple uint    {simple}
+\.
+
+--tsvector type
+CREATE FUNCTION tsvector_in(cstring)
+RETURNS tsvector
+AS 'MODULE_PATHNAME'
+LANGUAGE 'C' with (isstrict);
+
+CREATE FUNCTION tsvector_out(tsvector)
+RETURNS cstring
+AS 'MODULE_PATHNAME'
+LANGUAGE 'C' with (isstrict);
+
+CREATE TYPE tsvector (
+        INTERNALLENGTH = -1,
+        INPUT = tsvector_in,
+        OUTPUT = tsvector_out,
+        STORAGE = extended
+);
+
+CREATE FUNCTION length(tsvector)
+RETURNS int4
+AS 'MODULE_PATHNAME', 'tsvector_length'
+LANGUAGE 'C' with (isstrict,iscachable);
+
+CREATE FUNCTION to_tsvector(oid, text)
+RETURNS tsvector
+AS 'MODULE_PATHNAME'
+LANGUAGE 'C' with (isstrict,iscachable);
+
+CREATE FUNCTION to_tsvector(text, text)
+RETURNS tsvector
+AS 'MODULE_PATHNAME', 'to_tsvector_name'
+LANGUAGE 'C' with (isstrict,iscachable);
+
+CREATE FUNCTION to_tsvector(text)
+RETURNS tsvector
+AS 'MODULE_PATHNAME', 'to_tsvector_current'
+LANGUAGE 'C' with (isstrict,iscachable);
+
+CREATE FUNCTION strip(tsvector)
+RETURNS tsvector
+AS 'MODULE_PATHNAME'
+LANGUAGE 'C' with (isstrict,iscachable);
+
+CREATE FUNCTION setweight(tsvector,"char")
+RETURNS tsvector
+AS 'MODULE_PATHNAME'
+LANGUAGE 'C' with (isstrict,iscachable);
+
+CREATE FUNCTION concat(tsvector,tsvector)
+RETURNS tsvector
+AS 'MODULE_PATHNAME'
+LANGUAGE 'C' with (isstrict,iscachable);
+
+CREATE OPERATOR || (
+        LEFTARG = tsvector,
+        RIGHTARG = tsvector,
+        PROCEDURE = concat
+);
+
+--query type
+CREATE FUNCTION tsquery_in(cstring)
+RETURNS tsquery
+AS 'MODULE_PATHNAME'
+LANGUAGE 'C' with (isstrict);
+
+CREATE FUNCTION tsquery_out(tsquery)
+RETURNS cstring
+AS 'MODULE_PATHNAME'
+LANGUAGE 'C' with (isstrict);
+
+CREATE TYPE tsquery (
+        INTERNALLENGTH = -1,
+        INPUT = tsquery_in,
+        OUTPUT = tsquery_out
+);
+
+CREATE FUNCTION querytree(tsquery)
+RETURNS text
+AS 'MODULE_PATHNAME', 'tsquerytree'
+LANGUAGE 'C' with (isstrict);
+
+CREATE FUNCTION to_tsquery(oid, text)
+RETURNS tsquery
+AS 'MODULE_PATHNAME'
+LANGUAGE 'c' with (isstrict,iscachable);
+
+CREATE FUNCTION to_tsquery(text, text)
+RETURNS tsquery
+AS 'MODULE_PATHNAME','to_tsquery_name'
+LANGUAGE 'c' with (isstrict,iscachable);
+
+CREATE FUNCTION to_tsquery(text)
+RETURNS tsquery
+AS 'MODULE_PATHNAME','to_tsquery_current'
+LANGUAGE 'c' with (isstrict,iscachable);
+
+--operations
+CREATE FUNCTION exectsq(tsvector, tsquery)
+RETURNS bool
+AS 'MODULE_PATHNAME'
+LANGUAGE 'C' with (isstrict, iscachable);
+  
+COMMENT ON FUNCTION exectsq(tsvector, tsquery) IS 'boolean operation with text index';
+
+CREATE FUNCTION rexectsq(tsquery, tsvector)
+RETURNS bool
+AS 'MODULE_PATHNAME'
+LANGUAGE 'C' with (isstrict, iscachable);
+
+COMMENT ON FUNCTION rexectsq(tsquery, tsvector) IS 'boolean operation with text index';
+
+CREATE OPERATOR @@ (
+        LEFTARG = tsvector,
+        RIGHTARG = tsquery,
+        PROCEDURE = exectsq,
+        COMMUTATOR = '@@',
+        RESTRICT = contsel,
+        JOIN = contjoinsel
+);
+CREATE OPERATOR @@ (
+        LEFTARG = tsquery,
+        RIGHTARG = tsvector,
+        PROCEDURE = rexectsq,
+        COMMUTATOR = '@@',
+        RESTRICT = contsel,
+        JOIN = contjoinsel
+);
+
+--Trigger
+CREATE FUNCTION tsearch2()
+RETURNS trigger
+AS 'MODULE_PATHNAME'
+LANGUAGE 'C';
+
+--Relevation
+CREATE FUNCTION rank(float4[], tsvector, tsquery)
+RETURNS float4
+AS 'MODULE_PATHNAME'
+LANGUAGE 'C' WITH (isstrict, iscachable);
+
+CREATE FUNCTION rank(float4[], tsvector, tsquery, int4)
+RETURNS float4
+AS 'MODULE_PATHNAME'
+LANGUAGE 'C' WITH (isstrict, iscachable);
+
+CREATE FUNCTION rank(tsvector, tsquery)
+RETURNS float4
+AS 'MODULE_PATHNAME', 'rank_def'
+LANGUAGE 'C' WITH (isstrict, iscachable);
+
+CREATE FUNCTION rank(tsvector, tsquery, int4)
+RETURNS float4
+AS 'MODULE_PATHNAME', 'rank_def'
+LANGUAGE 'C' WITH (isstrict, iscachable);
+
+CREATE FUNCTION rank_cd(int4, tsvector, tsquery)
+RETURNS float4
+AS 'MODULE_PATHNAME'
+LANGUAGE 'C' WITH (isstrict, iscachable);
+
+CREATE FUNCTION rank_cd(int4, tsvector, tsquery, int4)
+RETURNS float4
+AS 'MODULE_PATHNAME'
+LANGUAGE 'C' WITH (isstrict, iscachable);
+
+CREATE FUNCTION rank_cd(tsvector, tsquery)
+RETURNS float4
+AS 'MODULE_PATHNAME', 'rank_cd_def'
+LANGUAGE 'C' WITH (isstrict, iscachable);
+
+CREATE FUNCTION rank_cd(tsvector, tsquery, int4)
+RETURNS float4
+AS 'MODULE_PATHNAME', 'rank_cd_def'
+LANGUAGE 'C' WITH (isstrict, iscachable);
+
+CREATE FUNCTION headline(oid, text, tsquery, text)
+RETURNS text
+AS 'MODULE_PATHNAME', 'headline'
+LANGUAGE 'C' WITH (isstrict, iscachable);
+
+CREATE FUNCTION headline(oid, text, tsquery)
+RETURNS text
+AS 'MODULE_PATHNAME', 'headline'
+LANGUAGE 'C' WITH (isstrict, iscachable);
+
+CREATE FUNCTION headline(text, text, tsquery, text)
+RETURNS text
+AS 'MODULE_PATHNAME', 'headline_byname'
+LANGUAGE 'C' WITH (isstrict, iscachable);
+
+CREATE FUNCTION headline(text, text, tsquery)
+RETURNS text
+AS 'MODULE_PATHNAME', 'headline_byname'
+LANGUAGE 'C' WITH (isstrict, iscachable);
+
+CREATE FUNCTION headline(text, tsquery, text)
+RETURNS text
+AS 'MODULE_PATHNAME', 'headline_current'
+LANGUAGE 'C' WITH (isstrict, iscachable);
+
+CREATE FUNCTION headline(text, tsquery)
+RETURNS text
+AS 'MODULE_PATHNAME', 'headline_current'
+LANGUAGE 'C' WITH (isstrict, iscachable);
+
+--GiST
+--GiST key type 
+CREATE FUNCTION gtsvector_in(cstring)
+RETURNS gtsvector
+AS 'MODULE_PATHNAME'
+LANGUAGE 'C' with (isstrict);
+
+CREATE FUNCTION gtsvector_out(gtsvector)
+RETURNS cstring
+AS 'MODULE_PATHNAME'
+LANGUAGE 'C' with (isstrict);
+
+CREATE TYPE gtsvector (
+        INTERNALLENGTH = -1,
+        INPUT = gtsvector_in,
+        OUTPUT = gtsvector_out
+);
+
+-- support FUNCTIONs
+CREATE FUNCTION gtsvector_consistent(gtsvector,internal,int4)
+RETURNS bool
+AS 'MODULE_PATHNAME'
+LANGUAGE 'C';
+  
+CREATE FUNCTION gtsvector_compress(internal)
+RETURNS internal
+AS 'MODULE_PATHNAME'
+LANGUAGE 'C';
+
+CREATE FUNCTION gtsvector_decompress(internal)
+RETURNS internal
+AS 'MODULE_PATHNAME'
+LANGUAGE 'C';
+
+CREATE FUNCTION gtsvector_penalty(internal,internal,internal)
+RETURNS internal
+AS 'MODULE_PATHNAME'
+LANGUAGE 'C' with (isstrict);
+
+CREATE FUNCTION gtsvector_picksplit(internal, internal)
+RETURNS internal
+AS 'MODULE_PATHNAME'
+LANGUAGE 'C';
+
+CREATE FUNCTION gtsvector_union(bytea, internal)
+RETURNS _int4
+AS 'MODULE_PATHNAME'
+LANGUAGE 'C';
+
+CREATE FUNCTION gtsvector_same(gtsvector, gtsvector, internal)
+RETURNS internal
+AS 'MODULE_PATHNAME'
+LANGUAGE 'C';
+
+-- CREATE the OPERATOR class
+CREATE OPERATOR CLASS gist_tsvector_ops
+DEFAULT FOR TYPE tsvector USING gist
+AS
+        OPERATOR        1       @@ (tsvector, tsquery)  RECHECK ,
+        FUNCTION        1       gtsvector_consistent (gtsvector, internal, int4),
+        FUNCTION        2       gtsvector_union (bytea, internal),
+        FUNCTION        3       gtsvector_compress (internal),
+        FUNCTION        4       gtsvector_decompress (internal),
+        FUNCTION        5       gtsvector_penalty (internal, internal, internal),
+        FUNCTION        6       gtsvector_picksplit (internal, internal),
+        FUNCTION        7       gtsvector_same (gtsvector, gtsvector, internal),
+        STORAGE         gtsvector;
+
+
+--stat info
+CREATE TYPE statinfo 
+   as (word text, ndoc int4, nentry int4);
+
+--REATE FUNCTION tsstat_in(cstring)
+--RETURNS tsstat
+--AS 'MODULE_PATHNAME'
+--LANGUAGE 'C' with (isstrict);
+--
+--CREATE FUNCTION tsstat_out(tsstat)
+--RETURNS cstring
+--AS 'MODULE_PATHNAME'
+--LANGUAGE 'C' with (isstrict);
+--
+--CREATE TYPE tsstat (
+--        INTERNALLENGTH = -1,
+--        INPUT = tsstat_in,
+--        OUTPUT = tsstat_out,
+--        STORAGE = plain
+--);
+--
+--CREATE FUNCTION ts_accum(tsstat,tsvector)
+--RETURNS tsstat
+--AS 'MODULE_PATHNAME'
+--LANGUAGE 'C' with (isstrict);
+--
+--CREATE FUNCTION ts_accum_finish(tsstat)
+-- returns setof statinfo
+-- as 'MODULE_PATHNAME'
+-- language 'C'
+-- with (isstrict);
+--
+--CREATE AGGREGATE stat (
+-- BASETYPE=tsvector,
+-- SFUNC=ts_accum,
+-- STYPE=tsstat,
+-- FINALFUNC = ts_accum_finish,
+-- initcond = ''
+--); 
+
+CREATE FUNCTION stat(text)
+   returns setof statinfo
+   as 'MODULE_PATHNAME', 'ts_stat'
+   language 'C'
+   with (isstrict);
+
+--reset - just for debuging
+CREATE FUNCTION reset_tsearch()
+        returns void
+        as 'MODULE_PATHNAME'
+        language 'C'
+        with (isstrict);
+
+--get cover (debug for rank_cd)
+CREATE FUNCTION get_covers(tsvector,tsquery)
+        returns text
+        as 'MODULE_PATHNAME'
+        language 'C'
+        with (isstrict);
+
+
+--example of ISpell dictionary
+--update pg_ts_dict set dict_initoption='DictFile="/usr/local/share/ispell/russian.dict" ,AffFile ="/usr/local/share/ispell/russian.aff", StopFile="/usr/local/share/ispell/russian.stop"' where dict_id=4;
+--example of synonym dict
+--update pg_ts_dict set dict_initoption='/usr/local/share/ispell/english.syn' where dict_id=5;
+END;


diff --git a/contrib/tsearch2/tsvector.c b/contrib/tsearch2/tsvector.c

new file mode 100644 (file)

index 0000000..ff0794d


--- /dev/null
+++ b/contrib/tsearch2/tsvector.c
@@ -0,0 +1,804 @@
+/*
+ * In/Out definitions for tsvector type
+ * Internal structure:
+ * string of values, array of position lexem in string and it's length
+ * Teodor Sigaev 
+ */
+#include "postgres.h"
+
+#include "access/gist.h"
+#include "access/itup.h"
+#include "utils/elog.h"
+#include "utils/palloc.h"
+#include "utils/builtins.h"
+#include "storage/bufpage.h"
+#include "executor/spi.h"
+#include "commands/trigger.h"
+#include "nodes/pg_list.h"
+#include "catalog/namespace.h"
+
+#include "utils/pg_locale.h"
+
+#include              /* tolower */
+#include "tsvector.h"
+#include "query.h"
+#include "ts_cfg.h"
+#include "common.h"
+
+PG_FUNCTION_INFO_V1(tsvector_in);
+Datum      tsvector_in(PG_FUNCTION_ARGS);
+
+PG_FUNCTION_INFO_V1(tsvector_out);
+Datum      tsvector_out(PG_FUNCTION_ARGS);
+
+PG_FUNCTION_INFO_V1(to_tsvector);
+Datum      to_tsvector(PG_FUNCTION_ARGS);
+PG_FUNCTION_INFO_V1(to_tsvector_current);
+Datum      to_tsvector_current(PG_FUNCTION_ARGS);
+PG_FUNCTION_INFO_V1(to_tsvector_name);
+Datum      to_tsvector_name(PG_FUNCTION_ARGS);
+
+PG_FUNCTION_INFO_V1(tsearch2);
+Datum      tsearch2(PG_FUNCTION_ARGS);
+
+PG_FUNCTION_INFO_V1(tsvector_length);
+Datum      tsvector_length(PG_FUNCTION_ARGS);
+
+/*
+ * in/out text index type
+ */
+static int 
+comparePos(const void *a, const void *b) {
+   if ( ((WordEntryPos *) a)->pos == ((WordEntryPos *) b)->pos )
+       return 1;
+   return ( ((WordEntryPos *) a)->pos > ((WordEntryPos *) b)->pos ) ? 1 : -1;
+}
+
+static int
+uniquePos(WordEntryPos *a, int4 l) {
+   WordEntryPos *ptr, *res;
+
+   res=a;
+   if (l==1)
+       return l;
+
+   qsort((void *) a, l, sizeof(WordEntryPos), comparePos);
+
+   ptr = a + 1;
+   while (ptr - a < l) {
+       if ( ptr->pos != res->pos ) {
+           res++;
+           res->pos = ptr->pos;
+           res->weight = ptr->weight;
+           if ( res-a >= MAXNUMPOS-1 || res->pos == MAXENTRYPOS-1 )
+               break;
+       } else if ( ptr->weight > res->weight )
+           res->weight = ptr->weight;
+       ptr++;
+   }
+   return res + 1 - a;
+}
+
+static char *BufferStr;
+static int
+compareentry(const void *a, const void *b)
+{
+   if ( ((WordEntryIN *) a)->entry.len == ((WordEntryIN *) b)->entry.len)
+   {
+       return strncmp(
+                      &BufferStr[((WordEntryIN *) a)->entry.pos],
+                      &BufferStr[((WordEntryIN *) b)->entry.pos],
+                      ((WordEntryIN *) a)->entry.len);
+   }
+   return ( ((WordEntryIN *) a)->entry.len > ((WordEntryIN *) b)->entry.len ) ? 1 : -1;
+}
+
+static int
+uniqueentry(WordEntryIN * a, int4 l, char *buf, int4 *outbuflen)
+{
+   WordEntryIN  *ptr,
+              *res;
+
+   res = a;
+   if (l == 1) {
+       if ( a->entry.haspos ) {
+           *(uint16*)(a->pos) = uniquePos( &(a->pos[1]), *(uint16*)(a->pos));
+           *outbuflen = SHORTALIGN(res->entry.len) + (*(uint16*)(a->pos) +1 )*sizeof(WordEntryPos);
+       }
+       return l;
+   }
+
+   ptr = a + 1;
+   BufferStr = buf;
+   qsort((void *) a, l, sizeof(WordEntryIN), compareentry);
+
+   while (ptr - a < l)
+   {
+       if (!(ptr->entry.len == res->entry.len &&
+             strncmp(&buf[ptr->entry.pos], &buf[res->entry.pos], res->entry.len) == 0))
+       {
+           if ( res->entry.haspos ) {
+               *(uint16*)(res->pos) = uniquePos( &(res->pos[1]), *(uint16*)(res->pos));
+               *outbuflen += *(uint16*)(res->pos) * sizeof(WordEntryPos);
+           }
+           *outbuflen += SHORTALIGN(res->entry.len);
+           res++;
+           memcpy(res,ptr,sizeof(WordEntryIN));
+       } else if ( ptr->entry.haspos ){
+           if ( res->entry.haspos ) {
+               int4 len=*(uint16*)(ptr->pos) + 1 + *(uint16*)(res->pos);
+               res->pos=(WordEntryPos*)repalloc( res->pos, len*sizeof(WordEntryPos));
+               memcpy( &(res->pos[ *(uint16*)(res->pos) + 1 ]), 
+                   &(ptr->pos[1]), *(uint16*)(ptr->pos) * sizeof(WordEntryPos));
+               *(uint16*)(res->pos) += *(uint16*)(ptr->pos);
+               pfree( ptr->pos );
+           } else {
+               res->entry.haspos=1;
+               res->pos = ptr->pos;
+           }
+       }
+       ptr++;
+   }
+   if ( res->entry.haspos ) {
+       *(uint16*)(res->pos) = uniquePos( &(res->pos[1]), *(uint16*)(res->pos));
+       *outbuflen += *(uint16*)(res->pos) * sizeof(WordEntryPos);
+   }
+   *outbuflen += SHORTALIGN(res->entry.len);
+
+   return res + 1 - a;
+}
+
+#define WAITWORD   1
+#define WAITENDWORD 2
+#define WAITNEXTCHAR   3
+#define WAITENDCMPLX   4
+#define WAITPOSINFO    5
+#define INPOSINFO  6
+#define WAITPOSDELIM   7
+
+#define RESIZEPRSBUF \
+do { \
+   if ( state->curpos - state->word + 1 >= state->len ) \
+   { \
+       int4 clen = state->curpos - state->word; \
+       state->len *= 2; \
+       state->word = (char*)repalloc( (void*)state->word, state->len ); \
+       state->curpos = state->word + clen; \
+   } \
+} while (0)
+
+int4
+gettoken_tsvector(TI_IN_STATE * state)
+{
+   int4        oldstate = 0;
+
+   state->curpos = state->word;
+   state->state = WAITWORD;
+   state->alen=0;
+
+   while (1)
+   {
+       if (state->state == WAITWORD)
+       {
+           if (*(state->prsbuf) == '\0')
+               return 0;
+           else if (*(state->prsbuf) == '\'')
+               state->state = WAITENDCMPLX;
+           else if (*(state->prsbuf) == '\\')
+           {
+               state->state = WAITNEXTCHAR;
+               oldstate = WAITENDWORD;
+           }
+           else if (state->oprisdelim && ISOPERATOR(*(state->prsbuf)))
+               elog(ERROR, "Syntax error");
+           else if (*(state->prsbuf) != ' ')
+           {
+               *(state->curpos) = *(state->prsbuf);
+               state->curpos++;
+               state->state = WAITENDWORD;
+           }
+       }
+       else if (state->state == WAITNEXTCHAR)
+       {
+           if (*(state->prsbuf) == '\0')
+               elog(ERROR, "There is no escaped character");
+           else
+           {
+               RESIZEPRSBUF;
+               *(state->curpos) = *(state->prsbuf);
+               state->curpos++;
+               state->state = oldstate;
+           }
+       }
+       else if (state->state == WAITENDWORD)
+       {
+           if (*(state->prsbuf) == '\\')
+           {
+               state->state = WAITNEXTCHAR;
+               oldstate = WAITENDWORD;
+           }
+           else if (*(state->prsbuf) == ' ' || *(state->prsbuf) == '\0' ||
+                    (state->oprisdelim && ISOPERATOR(*(state->prsbuf))))
+           {
+               RESIZEPRSBUF;
+               if (state->curpos == state->word)
+                   elog(ERROR, "Syntax error");
+               *(state->curpos) = '\0';
+               return 1; 
+           } else if ( *(state->prsbuf) == ':' ) {
+               if (state->curpos == state->word)
+                   elog(ERROR, "Syntax error");
+               *(state->curpos) = '\0';
+               if ( state->oprisdelim )
+                   return 1;
+               else
+                   state->state = INPOSINFO;
+           }
+           else
+           {
+               RESIZEPRSBUF;
+               *(state->curpos) = *(state->prsbuf);
+               state->curpos++;
+           }
+       }
+       else if (state->state == WAITENDCMPLX)
+       {
+           if (*(state->prsbuf) == '\'')
+           {
+               RESIZEPRSBUF;
+               *(state->curpos) = '\0';
+               if (state->curpos == state->word)
+                   elog(ERROR, "Syntax error");
+               if ( state->oprisdelim ) {
+                   state->prsbuf++;
+                   return 1;
+               } else
+                   state->state = WAITPOSINFO;
+           }
+           else if (*(state->prsbuf) == '\\')
+           {
+               state->state = WAITNEXTCHAR;
+               oldstate = WAITENDCMPLX;
+           }
+           else if (*(state->prsbuf) == '\0')
+               elog(ERROR, "Syntax error");
+           else
+           {
+               RESIZEPRSBUF;
+               *(state->curpos) = *(state->prsbuf);
+               state->curpos++;
+           }
+       } else if (state->state == WAITPOSINFO) {
+           if ( *(state->prsbuf) == ':' )
+               state->state=INPOSINFO;
+           else
+               return 1;
+       } else if (state->state == INPOSINFO) {
+           if ( isdigit(*(state->prsbuf)) ) {
+               if ( state->alen==0 ) {
+                   state->alen=4;
+                   state->pos = (WordEntryPos*)palloc( sizeof(WordEntryPos)*state->alen );
+                   *(uint16*)(state->pos)=0;
+               } else if ( *(uint16*)(state->pos) +1 >= state->alen ) {
+                   state->alen *= 2; 
+                   state->pos = (WordEntryPos*)repalloc( state->pos, sizeof(WordEntryPos)*state->alen );
+               }
+               (  *(uint16*)(state->pos) )++;
+               state->pos[ *(uint16*)(state->pos) ].pos = LIMITPOS(atoi(state->prsbuf));
+               if ( state->pos[ *(uint16*)(state->pos) ].pos == 0 )
+                   elog(ERROR,"Wrong position info");
+               state->pos[ *(uint16*)(state->pos) ].weight = 0;
+               state->state = WAITPOSDELIM;
+           } else
+               elog(ERROR,"Syntax error");
+       } else if (state->state == WAITPOSDELIM) {
+           if ( *(state->prsbuf) == ',' ) {
+               state->state = INPOSINFO;
+           } else if ( tolower(*(state->prsbuf)) == 'a' || *(state->prsbuf)=='*' ) {
+               if ( state->pos[ *(uint16*)(state->pos) ].weight )
+                   elog(ERROR,"Syntax error");
+               state->pos[ *(uint16*)(state->pos) ].weight = 3;
+           } else if ( tolower(*(state->prsbuf)) == 'b' ) {
+               if ( state->pos[ *(uint16*)(state->pos) ].weight )
+                   elog(ERROR,"Syntax error");
+               state->pos[ *(uint16*)(state->pos) ].weight = 2;
+           } else if ( tolower(*(state->prsbuf)) == 'c' ) {
+               if ( state->pos[ *(uint16*)(state->pos) ].weight )
+                   elog(ERROR,"Syntax error");
+               state->pos[ *(uint16*)(state->pos) ].weight = 1;
+           } else if ( tolower(*(state->prsbuf)) == 'd' ) {
+               if ( state->pos[ *(uint16*)(state->pos) ].weight )
+                   elog(ERROR,"Syntax error");
+               state->pos[ *(uint16*)(state->pos) ].weight = 0;
+           } else if ( isspace(*(state->prsbuf)) || *(state->prsbuf) == '\0' ) {
+               return 1;
+           } else if ( !isdigit(*(state->prsbuf)) )
+               elog(ERROR,"Syntax error");
+       } else
+           elog(ERROR, "Inner bug :(");
+       state->prsbuf++;
+   }
+
+   return 0;
+}
+
+Datum
+tsvector_in(PG_FUNCTION_ARGS)
+{
+   char       *buf = PG_GETARG_CSTRING(0);
+   TI_IN_STATE state;
+   WordEntryIN  *arr;
+   WordEntry  *inarr;
+   int4        len = 0,
+               totallen = 64;
+   tsvector       *in;
+   char       *tmpbuf,
+              *cur;
+   int4        i,
+               buflen = 256;
+
+   state.prsbuf = buf;
+   state.len = 32;
+   state.word = (char *) palloc(state.len);
+   state.oprisdelim = false;
+
+   arr = (WordEntryIN *) palloc(sizeof(WordEntryIN) * totallen);
+   cur = tmpbuf = (char *) palloc(buflen);
+   while (gettoken_tsvector(&state))
+   {
+       if (len >= totallen)
+       {
+           totallen *= 2;
+           arr = (WordEntryIN *) repalloc((void *) arr, sizeof(WordEntryIN) * totallen);
+       }
+       while ((cur - tmpbuf) + (state.curpos - state.word) >= buflen)
+       {
+           int4        dist = cur - tmpbuf;
+
+           buflen *= 2;
+           tmpbuf = (char *) repalloc((void *) tmpbuf, buflen);
+           cur = tmpbuf + dist;
+       }
+       if (state.curpos - state.word >= MAXSTRLEN)
+           elog(ERROR, "Word is too long");
+       arr[len].entry.len= state.curpos - state.word;
+       if (cur - tmpbuf > MAXSTRPOS)
+           elog(ERROR, "Too long value");
+       arr[len].entry.pos=cur - tmpbuf;
+       memcpy((void *) cur, (void *) state.word, arr[len].entry.len);
+       cur += arr[len].entry.len;
+       if ( state.alen ) {
+           arr[len].entry.haspos=1;
+           arr[len].pos = state.pos;
+       } else
+           arr[len].entry.haspos=0;
+       len++;
+   }
+   pfree(state.word);
+
+   if ( len > 0 )
+       len = uniqueentry(arr, len, tmpbuf, &buflen);
+   totallen = CALCDATASIZE(len, buflen);
+   in = (tsvector *) palloc(totallen);
+   memset(in,0,totallen);
+   in->len = totallen;
+   in->size = len;
+   cur = STRPTR(in);
+   inarr = ARRPTR(in);
+   for (i = 0; i < len; i++)
+   {
+       memcpy((void *) cur, (void *) &tmpbuf[arr[i].entry.pos], arr[i].entry.len);
+       arr[i].entry.pos=cur - STRPTR(in);
+       cur += SHORTALIGN(arr[i].entry.len);
+       if ( arr[i].entry.haspos ) {
+           memcpy( cur, arr[i].pos, (*(uint16*)arr[i].pos + 1) * sizeof(WordEntryPos));
+           cur +=  (*(uint16*)arr[i].pos + 1) * sizeof(WordEntryPos);
+           pfree( arr[i].pos ); 
+       }
+       memcpy( &(inarr[i]), &(arr[i].entry), sizeof(WordEntry) );
+   }
+   pfree(tmpbuf);
+   pfree(arr);
+   PG_RETURN_POINTER(in);
+}
+
+Datum
+tsvector_length(PG_FUNCTION_ARGS)
+{
+   tsvector       *in = (tsvector *) PG_DETOAST_DATUM(PG_GETARG_DATUM(0));
+   int4        ret = in->size;
+
+   PG_FREE_IF_COPY(in, 0);
+   PG_RETURN_INT32(ret);
+}
+
+Datum
+tsvector_out(PG_FUNCTION_ARGS)
+{
+   tsvector       *out = (tsvector *) PG_DETOAST_DATUM(PG_GETARG_DATUM(0));
+   char       *outbuf;
+   int4        i,
+               j,
+               lenbuf = 0, pp;
+   WordEntry  *ptr = ARRPTR(out);
+   char       *curin,
+              *curout;
+
+       lenbuf=out->size * 2 /* '' */ + out->size - 1 /* space */ + 2 /*\0*/;
+       for (i = 0; i < out->size; i++) {
+               lenbuf += ptr[i].len*2 /*for escape */;
+               if ( ptr[i].haspos )
+                       lenbuf += 7*POSDATALEN(out, &(ptr[i]));
+       }
+
+   curout = outbuf = (char *) palloc(lenbuf);
+   for (i = 0; i < out->size; i++)
+   {
+       curin = STRPTR(out)+ptr->pos;
+       if (i != 0)
+           *curout++ = ' ';
+       *curout++ = '\'';
+       j = ptr->len;
+       while (j--)
+       {
+           if (*curin == '\'')
+           {
+               int4        pos = curout - outbuf;
+
+               outbuf = (char *) repalloc((void *) outbuf, ++lenbuf);
+               curout = outbuf + pos;
+               *curout++ = '\\';
+           }
+           *curout++ = *curin++;
+       }
+       *curout++ = '\'';
+       if ( (pp=POSDATALEN(out,ptr)) != 0 ) {
+           WordEntryPos *wptr;
+           *curout++ = ':';
+           wptr=POSDATAPTR(out,ptr);
+           while(pp) {
+               sprintf(curout,"%d",wptr->pos);
+               curout=strchr(curout,'\0');
+               switch( wptr->weight ) {
+                   case 3:   *curout++ = 'A'; break;
+                   case 2:   *curout++ = 'B'; break;
+                   case 1:   *curout++ = 'C'; break;
+                   case 0: 
+                   default: break;
+               }
+               if ( pp>1 )     *curout++ = ',';
+               pp--; wptr++;
+           }
+       }
+       ptr++;
+   }
+   *curout='\0';
+   outbuf[lenbuf - 1] = '\0';
+   PG_FREE_IF_COPY(out, 0);
+   PG_RETURN_POINTER(outbuf);
+}
+
+static int
+compareWORD(const void *a, const void *b)
+{
+   if (((WORD *) a)->len == ((WORD *) b)->len) {
+       int res = strncmp(
+                      ((WORD *) a)->word,
+                      ((WORD *) b)->word,
+                      ((WORD *) b)->len);
+       if ( res==0 ) 
+           return ( ((WORD *) a)->pos.pos > ((WORD *) b)->pos.pos ) ? 1 : -1;
+       return res;
+   }
+   return (((WORD *) a)->len > ((WORD *) b)->len) ? 1 : -1;
+}
+
+static int
+uniqueWORD(WORD * a, int4 l)
+{
+   WORD       *ptr,
+              *res;
+   int tmppos;
+
+   if (l == 1) {
+       tmppos=LIMITPOS(a->pos.pos);
+       a->alen=2;
+       a->pos.apos=(uint16*)palloc( sizeof(uint16)*a->alen );
+       a->pos.apos[0]=1;
+       a->pos.apos[1]=tmppos;
+       return l;
+   }
+
+   res = a;
+   ptr = a + 1;
+
+   qsort((void *) a, l, sizeof(WORD), compareWORD);
+   tmppos=LIMITPOS(a->pos.pos);
+   a->alen=2;
+   a->pos.apos=(uint16*)palloc( sizeof(uint16)*a->alen );
+   a->pos.apos[0]=1;
+   a->pos.apos[1]=tmppos;
+
+   while (ptr - a < l)
+   {
+       if (!(ptr->len == res->len &&
+             strncmp(ptr->word, res->word, res->len) == 0))
+       {
+           res++;
+           res->len = ptr->len;
+           res->word = ptr->word;
+           tmppos=LIMITPOS(ptr->pos.pos);
+           res->alen=2;
+           res->pos.apos=(uint16*)palloc( sizeof(uint16)*res->alen );
+           res->pos.apos[0]=1;
+           res->pos.apos[1]=tmppos;
+       } else {
+           pfree(ptr->word);
+           if ( res->pos.apos[0] < MAXNUMPOS-1 && res->pos.apos[ res->pos.apos[0] ] != MAXENTRYPOS-1 ) {
+               if ( res->pos.apos[0]+1 >= res->alen ) {
+                   res->alen*=2;
+                   res->pos.apos=(uint16*)repalloc( res->pos.apos, sizeof(uint16)*res->alen );
+               }
+               res->pos.apos[ res->pos.apos[0]+1 ] = LIMITPOS(ptr->pos.pos);
+               res->pos.apos[0]++; 
+           }
+       }
+       ptr++;
+   }
+
+   return res + 1 - a;
+}
+
+/*
+ * make value of tsvector
+ */
+static tsvector *
+makevalue(PRSTEXT * prs)
+{
+   int4        i,j,
+               lenstr = 0,
+               totallen;
+   tsvector       *in;
+   WordEntry  *ptr;
+   char       *str,
+              *cur;
+
+   prs->curwords = uniqueWORD(prs->words, prs->curwords);
+   for (i = 0; i < prs->curwords; i++) {
+       lenstr += SHORTALIGN(prs->words[i].len);
+
+       if ( prs->words[i].alen )
+           lenstr += sizeof(uint16) + prs->words[i].pos.apos[0] * sizeof(WordEntryPos);
+   }
+
+   totallen = CALCDATASIZE(prs->curwords, lenstr);
+   in = (tsvector *) palloc(totallen);
+   memset(in,0,totallen);  
+   in->len = totallen;
+   in->size = prs->curwords;
+
+   ptr = ARRPTR(in);
+   cur = str = STRPTR(in);
+   for (i = 0; i < prs->curwords; i++)
+   {
+       ptr->len = prs->words[i].len;
+       if (cur - str > MAXSTRPOS)
+           elog(ERROR, "Value is too big");
+       ptr->pos= cur - str;
+       memcpy((void *) cur, (void *) prs->words[i].word, prs->words[i].len);
+       pfree(prs->words[i].word);
+       cur += SHORTALIGN(prs->words[i].len);
+       if ( prs->words[i].alen ) {
+           WordEntryPos *wptr;
+           
+           ptr->haspos=1;
+           *(uint16*)cur = prs->words[i].pos.apos[0];
+           wptr=POSDATAPTR(in,ptr);
+           for(j=0;j<*(uint16*)cur;j++) {
+               wptr[j].weight=0;
+               wptr[j].pos=prs->words[i].pos.apos[j+1];
+           }
+           cur += sizeof(uint16) + prs->words[i].pos.apos[0] * sizeof(WordEntryPos);
+           pfree(prs->words[i].pos.apos);
+       } else
+           ptr->haspos=0;
+       ptr++;
+   }
+   pfree(prs->words);
+   return in;
+}
+
+
+Datum
+to_tsvector(PG_FUNCTION_ARGS)
+{
+   text       *in = PG_GETARG_TEXT_P(1);
+   PRSTEXT     prs;
+   tsvector       *out = NULL;
+   TSCfgInfo *cfg=findcfg(PG_GETARG_INT32(0)); 
+
+   prs.lenwords = 32;
+   prs.curwords = 0;
+   prs.pos = 0;
+   prs.words = (WORD *) palloc(sizeof(WORD) * prs.lenwords);
+   
+   parsetext_v2(cfg, &prs, VARDATA(in), VARSIZE(in) - VARHDRSZ);
+   PG_FREE_IF_COPY(in, 1);
+
+   if (prs.curwords)
+       out = makevalue(&prs);
+   else {
+       pfree(prs.words);
+       out = palloc(CALCDATASIZE(0,0));
+       out->len = CALCDATASIZE(0,0);
+       out->size = 0;
+   } 
+   PG_RETURN_POINTER(out);
+}
+
+Datum
+to_tsvector_name(PG_FUNCTION_ARGS) {
+   text       *cfg=PG_GETARG_TEXT_P(0);
+   Datum res = DirectFunctionCall3(
+       to_tsvector,
+       Int32GetDatum( name2id_cfg( cfg ) ),
+       PG_GETARG_DATUM(1),
+       (Datum)0
+   );
+   PG_FREE_IF_COPY(cfg,0);
+   PG_RETURN_DATUM(res);   
+}
+
+Datum
+to_tsvector_current(PG_FUNCTION_ARGS) {
+   Datum res = DirectFunctionCall3(
+       to_tsvector,
+       Int32GetDatum( get_currcfg() ),
+       PG_GETARG_DATUM(0),
+       (Datum)0
+   );
+   PG_RETURN_DATUM(res);   
+}
+
+static Oid
+findFunc(char *fname) {
+   FuncCandidateList clist,ptr;
+   Oid funcid = InvalidOid;
+   List *names=makeList1(makeString(fname));
+
+   ptr = clist = FuncnameGetCandidates(names, 1);
+   freeList(names);
+
+   if ( !ptr )
+       return funcid;
+
+   while(ptr) {
+       if ( ptr->args[0] == TEXTOID && funcid == InvalidOid )
+           funcid=ptr->oid;
+       clist=ptr->next;
+       pfree(ptr);
+       ptr=clist;
+   }
+
+   return funcid;
+}
+
+/*
+ * Trigger
+ */
+Datum
+tsearch2(PG_FUNCTION_ARGS)
+{
+   TriggerData *trigdata;
+   Trigger    *trigger;
+   Relation    rel;
+   HeapTuple   rettuple = NULL;
+   TSCfgInfo *cfg=findcfg(get_currcfg()); 
+   int         numidxattr,
+               i;
+   PRSTEXT     prs;
+   Datum       datum = (Datum) 0;
+   Oid     funcoid = InvalidOid;
+
+   if (!CALLED_AS_TRIGGER(fcinfo))
+       elog(ERROR, "TSearch: Not fired by trigger manager");
+
+   trigdata = (TriggerData *) fcinfo->context;
+   if (TRIGGER_FIRED_FOR_STATEMENT(trigdata->tg_event))
+       elog(ERROR, "TSearch: Can't process STATEMENT events");
+   if (TRIGGER_FIRED_AFTER(trigdata->tg_event))
+       elog(ERROR, "TSearch: Must be fired BEFORE event");
+
+   if (TRIGGER_FIRED_BY_INSERT(trigdata->tg_event))
+       rettuple = trigdata->tg_trigtuple;
+   else if (TRIGGER_FIRED_BY_UPDATE(trigdata->tg_event))
+       rettuple = trigdata->tg_newtuple;
+   else
+       elog(ERROR, "TSearch: Unknown event");
+
+   trigger = trigdata->tg_trigger;
+   rel = trigdata->tg_relation;
+
+   if (trigger->tgnargs < 2)
+       elog(ERROR, "TSearch: format tsearch2(tsvector_field, text_field1,...)");
+
+   numidxattr = SPI_fnumber(rel->rd_att, trigger->tgargs[0]);
+   if (numidxattr == SPI_ERROR_NOATTRIBUTE)
+       elog(ERROR, "TSearch: Can not find tsvector_field");
+
+   prs.lenwords = 32;
+   prs.curwords = 0;
+   prs.pos = 0;
+   prs.words = (WORD *) palloc(sizeof(WORD) * prs.lenwords);
+
+   /* find all words in indexable column */
+   for (i = 1; i < trigger->tgnargs; i++)
+   {
+       int         numattr;
+       Oid         oidtype;
+       Datum       txt_toasted;
+       bool        isnull;
+       text       *txt;
+
+       numattr = SPI_fnumber(rel->rd_att, trigger->tgargs[i]);
+       if (numattr == SPI_ERROR_NOATTRIBUTE)
+       {
+           funcoid=findFunc(trigger->tgargs[i]);
+           if ( funcoid==InvalidOid )
+               elog(ERROR,"TSearch: can't find function or field '%s'",trigger->tgargs[i]);
+           continue;
+       }
+       oidtype = SPI_gettypeid(rel->rd_att, numattr);
+       /* We assume char() and varchar() are binary-equivalent to text */
+       if (!(oidtype == TEXTOID ||
+             oidtype == VARCHAROID ||
+             oidtype == BPCHAROID))
+       {
+           elog(WARNING, "TSearch: '%s' is not of character type",
+                trigger->tgargs[i]);
+           continue;
+       }
+       txt_toasted = SPI_getbinval(rettuple, rel->rd_att, numattr, &isnull);
+       if (isnull)
+           continue;
+
+       if ( funcoid!=InvalidOid ) {
+           text *txttmp = (text *) DatumGetPointer( OidFunctionCall1(
+               funcoid,
+               PointerGetDatum(txt_toasted)
+           ));
+           txt = (text *) DatumGetPointer(PG_DETOAST_DATUM(PointerGetDatum(txttmp)));
+           if ( txt == txttmp )
+               txt_toasted = PointerGetDatum(txt);
+       } else
+            txt = (text *) DatumGetPointer(PG_DETOAST_DATUM(PointerGetDatum(txt_toasted)));
+
+       parsetext_v2(cfg, &prs, VARDATA(txt), VARSIZE(txt) - VARHDRSZ);
+       if (txt != (text*)DatumGetPointer(txt_toasted) )
+           pfree(txt);
+   }
+
+   /* make tsvector value */
+   if (prs.curwords)
+   {
+       datum = PointerGetDatum(makevalue(&prs));
+       rettuple = SPI_modifytuple(rel, rettuple, 1, &numidxattr,
+                                  &datum, NULL);
+       pfree(DatumGetPointer(datum));
+   }
+   else
+   {
+       tsvector *out = palloc(CALCDATASIZE(0,0));
+       out->len = CALCDATASIZE(0,0);
+       out->size = 0;
+       datum = PointerGetDatum(out);
+       pfree(prs.words);
+       rettuple = SPI_modifytuple(rel, rettuple, 1, &numidxattr,
+                                  &datum, NULL);
+   }
+
+   if (rettuple == NULL)
+       elog(ERROR, "TSearch: %d returned by SPI_modifytuple", SPI_result);
+
+   return PointerGetDatum(rettuple);
+}


diff --git a/contrib/tsearch2/tsvector.h b/contrib/tsearch2/tsvector.h

new file mode 100644 (file)

index 0000000..31e6a4b


--- /dev/null
+++ b/contrib/tsearch2/tsvector.h
@@ -0,0 +1,71 @@
+#ifndef __TXTIDX_H__
+#define __TXTIDX_H__
+
+/*
+#define TXTIDX_DEBUG
+*/
+
+#include "postgres.h"
+
+#include "access/gist.h"
+#include "access/itup.h"
+#include "utils/elog.h"
+#include "utils/palloc.h"
+#include "utils/builtins.h"
+#include "storage/bufpage.h"
+
+typedef struct {
+   uint32
+       haspos:1,
+       len:11, /* MAX 2Kb */
+       pos:20; /* MAX 1Mb */
+}  WordEntry;
+#define MAXSTRLEN ( 1<<11 )
+#define MAXSTRPOS ( 1<<20 )
+
+typedef struct {
+   uint16
+       weight:2,
+       pos:14;
+} WordEntryPos;
+#define MAXENTRYPOS    (1<<14)
+#define MAXNUMPOS  256
+#define LIMITPOS(x)    ( ( (x) >= MAXENTRYPOS ) ? (MAXENTRYPOS-1) : (x) )
+
+typedef struct
+{
+   int4        len;
+   int4        size;
+   char        data[1];
+}  tsvector;
+
+#define DATAHDRSIZE (sizeof(int4)*2)
+#define CALCDATASIZE(x, lenstr) ( x * sizeof(WordEntry) + DATAHDRSIZE + lenstr )
+#define ARRPTR(x)  ( (WordEntry*) ( (char*)x + DATAHDRSIZE ) )
+#define STRPTR(x)  ( (char*)x + DATAHDRSIZE + ( sizeof(WordEntry) * ((tsvector*)x)->size ) )
+#define STRSIZE(x) ( ((tsvector*)x)->len - DATAHDRSIZE - ( sizeof(WordEntry) * ((tsvector*)x)->size ) )
+#define _POSDATAPTR(x,e)   (STRPTR(x)+((WordEntry*)(e))->pos+SHORTALIGN(((WordEntry*)(e))->len))
+#define POSDATALEN(x,e) ( ( ((WordEntry*)(e))->haspos ) ? (*(uint16*)_POSDATAPTR(x,e)) : 0 ) 
+#define POSDATAPTR(x,e)    ( (WordEntryPos*)( _POSDATAPTR(x,e)+sizeof(uint16) ) )
+
+
+typedef struct {
+   WordEntry   entry;
+   WordEntryPos    *pos;
+}  WordEntryIN;
+
+typedef struct
+{
+   char       *prsbuf;
+   char       *word;
+   char       *curpos;
+   int4        len;
+   int4        state;
+   int4        alen;
+   WordEntryPos    *pos;
+   bool        oprisdelim;
+}  TI_IN_STATE;
+
+int4       gettoken_tsvector(TI_IN_STATE * state);
+
+#endif


diff --git a/contrib/tsearch2/tsvector_op.c b/contrib/tsearch2/tsvector_op.c

new file mode 100644 (file)

index 0000000..3f38014


--- /dev/null
+++ b/contrib/tsearch2/tsvector_op.c
@@ -0,0 +1,264 @@
+/*
+ * Operations for tsvector type
+ * Teodor Sigaev 
+ */
+#include "postgres.h"
+
+#include "access/gist.h"
+#include "access/itup.h"
+#include "utils/elog.h"
+#include "utils/palloc.h"
+#include "utils/builtins.h"
+#include "storage/bufpage.h"
+#include "executor/spi.h"
+#include "commands/trigger.h"
+#include "nodes/pg_list.h"
+#include "catalog/namespace.h"
+
+#include "utils/pg_locale.h"
+
+#include              /* tolower */
+#include "tsvector.h"
+#include "query.h"
+#include "ts_cfg.h"
+#include "common.h"
+
+PG_FUNCTION_INFO_V1(strip);
+Datum      strip(PG_FUNCTION_ARGS);
+
+PG_FUNCTION_INFO_V1(setweight);
+Datum      setweight(PG_FUNCTION_ARGS);
+
+PG_FUNCTION_INFO_V1(concat);
+Datum      concat(PG_FUNCTION_ARGS);
+
+Datum
+strip(PG_FUNCTION_ARGS)
+{
+   tsvector       *in = (tsvector *) PG_DETOAST_DATUM(PG_GETARG_DATUM(0));
+   tsvector    *out;
+   int i,len=0;
+   WordEntry *arrin=ARRPTR(in), *arrout;
+   char *cur;
+
+   for(i=0;isize;i++) 
+       len += SHORTALIGN( arrin[i].len );
+
+   len = CALCDATASIZE(in->size, len);
+   out=(tsvector*)palloc(len);
+   memset(out,0,len);
+   out->len=len;
+   out->size=in->size;
+   arrout=ARRPTR(out);
+   cur=STRPTR(out);
+   for(i=0;isize;i++) {
+       memcpy(cur, STRPTR(in)+arrin[i].pos, arrin[i].len);
+       arrout[i].haspos = 0;
+       arrout[i].len = arrin[i].len;
+       arrout[i].pos = cur - STRPTR(out);
+       cur += SHORTALIGN( arrout[i].len );
+   }
+       
+   PG_FREE_IF_COPY(in, 0);
+   PG_RETURN_POINTER(out);
+}
+
+Datum
+setweight(PG_FUNCTION_ARGS)
+{
+   tsvector       *in = (tsvector *) PG_DETOAST_DATUM(PG_GETARG_DATUM(0));
+   char       cw = PG_GETARG_CHAR(1);
+   tsvector    *out;
+   int i,j;
+   WordEntry *entry;
+   WordEntryPos *p;
+   int w=0;
+
+   switch(tolower(cw)) {
+       case 'a': w=3; break;
+       case 'b': w=2; break;
+       case 'c': w=1; break;
+       case 'd': w=0; break;
+       default: elog(ERROR,"Unknown weight");
+   }
+
+   out=(tsvector*)palloc(in->len);
+   memcpy(out,in,in->len);
+   entry=ARRPTR(out);
+   i=out->size;    
+   while(i--) {
+       if ( (j=POSDATALEN(out,entry)) != 0 ) {
+           p=POSDATAPTR(out,entry);
+           while(j--) {
+               p->weight=w;
+               p++;
+           }
+       }
+       entry++;
+   }
+       
+   PG_FREE_IF_COPY(in, 0);
+   PG_RETURN_POINTER(out);
+}
+
+static int
+compareEntry(char *ptra, WordEntry* a, char *ptrb, WordEntry* b)
+{
+        if ( a->len == b->len)
+        {
+                return strncmp(
+                                           ptra + a->pos,
+                                           ptrb + b->pos,
+                                           a->len);
+        }
+        return ( a->len > b->len ) ? 1 : -1;
+}
+
+static int4
+add_pos(tsvector *src, WordEntry *srcptr, tsvector *dest, WordEntry *destptr, int4 maxpos ) {
+   uint16 *clen = (uint16*)_POSDATAPTR(dest,destptr);
+   int i;
+   uint16 slen = POSDATALEN(src, srcptr), startlen;
+   WordEntryPos *spos=POSDATAPTR(src, srcptr), *dpos=POSDATAPTR(dest,destptr);
+
+   if ( ! destptr->haspos ) 
+       *clen=0;
+
+   startlen = *clen;
+   for(i=0; i
+       dpos[ *clen ].weight = spos[i].weight; 
+       dpos[ *clen ].pos    = LIMITPOS(spos[i].pos + maxpos);
+       (*clen)++;
+   }
+
+   if ( *clen != startlen )
+       destptr->haspos=1; 
+   return  *clen - startlen;
+}
+
+
+Datum
+concat(PG_FUNCTION_ARGS) {
+   tsvector       *in1 = (tsvector *) PG_DETOAST_DATUM(PG_GETARG_DATUM(0));
+   tsvector       *in2 = (tsvector *) PG_DETOAST_DATUM(PG_GETARG_DATUM(1));
+   tsvector       *out;
+   WordEntry *ptr;
+   WordEntry *ptr1,*ptr2;
+   WordEntryPos *p;
+   int maxpos=0,i,j,i1,i2;
+   char *cur;
+   char *data,*data1,*data2;
+
+   ptr=ARRPTR(in1);
+   i=in1->size;
+   while(i--) {
+       if ( (j=POSDATALEN(in1,ptr)) != 0 ) {
+           p=POSDATAPTR(in1,ptr);
+           while(j--) {
+               if ( p->pos > maxpos ) 
+                   maxpos = p->pos;
+               p++;
+           }
+       }
+       ptr++;
+   }
+   
+   ptr1=ARRPTR(in1); ptr2=ARRPTR(in2);
+   data1=STRPTR(in1); data2=STRPTR(in2);
+   i1=in1->size;   i2=in2->size;
+   out=(tsvector*)palloc( in1->len + in2->len );
+   memset(out,0,in1->len + in2->len);
+   out->len = in1->len + in2->len;
+   out->size = in1->size + in2->size;
+   data=cur=STRPTR(out);
+   ptr=ARRPTR(out);
+   while( i1 && i2 ) {
+       int cmp=compareEntry(data1,ptr1,data2,ptr2);
+       if ( cmp < 0 ) { /* in1 first */
+           ptr->haspos = ptr1->haspos;
+           ptr->len = ptr1->len;
+           memcpy( cur, data1 + ptr1->pos, ptr1->len );
+           ptr->pos = cur - data; 
+           cur+=SHORTALIGN(ptr1->len);
+           if ( ptr->haspos ) {
+               memcpy(cur, _POSDATAPTR(in1, ptr1), POSDATALEN(in1, ptr1)*sizeof(WordEntryPos) + sizeof(uint16));
+               cur+=POSDATALEN(in1, ptr1)*sizeof(WordEntryPos) + sizeof(uint16);
+           }
+           ptr++; ptr1++; i1--;
+       } else if ( cmp>0 ) { /* in2 first */ 
+           ptr->haspos = ptr2->haspos;
+           ptr->len = ptr2->len;
+           memcpy( cur, data2 + ptr2->pos, ptr2->len );
+           ptr->pos = cur - data; 
+           cur+=SHORTALIGN(ptr2->len);
+           if ( ptr->haspos ) {
+               int addlen = add_pos(in2, ptr2, out, ptr, maxpos );
+               if ( addlen == 0 )
+                   ptr->haspos=0;
+               else
+                   cur += addlen*sizeof(WordEntryPos) + sizeof(uint16); 
+           }
+           ptr++; ptr2++; i2--;
+       } else {
+           ptr->haspos = ptr1->haspos | ptr2->haspos;
+           ptr->len = ptr1->len;
+           memcpy( cur, data1 + ptr1->pos, ptr1->len );
+           ptr->pos = cur - data; 
+           cur+=SHORTALIGN(ptr1->len);
+           if ( ptr->haspos ) {
+               if ( ptr1->haspos ) {
+                   memcpy(cur, _POSDATAPTR(in1, ptr1), POSDATALEN(in1, ptr1)*sizeof(WordEntryPos) + sizeof(uint16));
+                   cur+=POSDATALEN(in1, ptr1)*sizeof(WordEntryPos) + sizeof(uint16);
+                   if ( ptr2->haspos )
+                       cur += add_pos(in2, ptr2, out, ptr, maxpos )*sizeof(WordEntryPos);
+               } else if ( ptr2->haspos ) {
+                   int addlen = add_pos(in2, ptr2, out, ptr, maxpos );
+                   if ( addlen == 0 )
+                       ptr->haspos=0;
+                   else
+                       cur += addlen*sizeof(WordEntryPos) + sizeof(uint16); 
+               }
+           }
+           ptr++; ptr1++; ptr2++; i1--; i2--;
+       }
+   }
+
+   while(i1) {
+       ptr->haspos = ptr1->haspos;
+       ptr->len = ptr1->len;
+       memcpy( cur, data1 + ptr1->pos, ptr1->len );
+       ptr->pos = cur - data; 
+       cur+=SHORTALIGN(ptr1->len);
+       if ( ptr->haspos ) {
+           memcpy(cur, _POSDATAPTR(in1, ptr1), POSDATALEN(in1, ptr1)*sizeof(WordEntryPos) + sizeof(uint16));
+           cur+=POSDATALEN(in1, ptr1)*sizeof(WordEntryPos) + sizeof(uint16);
+       }
+       ptr++; ptr1++; i1--;
+   }
+
+   while(i2) {
+       ptr->haspos = ptr2->haspos;
+       ptr->len = ptr2->len;
+       memcpy( cur, data2 + ptr2->pos, ptr2->len );
+       ptr->pos = cur - data; 
+       cur+=SHORTALIGN(ptr2->len);
+       if ( ptr->haspos ) {
+           int addlen = add_pos(in2, ptr2, out, ptr, maxpos );
+           if ( addlen == 0 )
+               ptr->haspos=0;
+           else
+               cur += addlen*sizeof(WordEntryPos) + sizeof(uint16); 
+       }
+       ptr++; ptr2++; i2--;
+   }
+   
+   out->size=ptr-ARRPTR(out);
+   out->len = CALCDATASIZE( out->size, cur-data );
+   if ( data != STRPTR(out) )
+       memmove( STRPTR(out), data, cur-data );
+
+   PG_FREE_IF_COPY(in1, 0);
+   PG_FREE_IF_COPY(in2, 1);
+   PG_RETURN_POINTER(out);
+}
+


diff --git a/contrib/tsearch2/untsearch.sql.in b/contrib/tsearch2/untsearch.sql.in

new file mode 100644 (file)

index 0000000..a4fe145


--- /dev/null
+++ b/contrib/tsearch2/untsearch.sql.in
@@ -0,0 +1,62 @@
+BEGIN;
+
+--Be careful !!!
+--script drops all indices, triggers and columns with types defined
+--in tsearch2.sql
+
+
+DROP OPERATOR CLASS gist_tsvector_ops USING gist CASCADE;
+
+
+DROP OPERATOR || (tsvector, tsvector);
+DROP OPERATOR @@ (tsvector, tsquery);
+DROP OPERATOR @@ (tsquery, tsvector);
+
+DROP AGGREGATE stat(tsvector);
+
+DROP TABLE pg_ts_dict;
+DROP TABLE pg_ts_parser;
+DROP TABLE pg_ts_cfg;
+DROP TABLE pg_ts_cfgmap;
+
+DROP TYPE tokentype CASCADE;
+DROP TYPE tokenout CASCADE;
+DROP TYPE tsvector CASCADE;
+DROP TYPE tsquery CASCADE;
+DROP TYPE gtsvector CASCADE;
+DROP TYPE tsstat CASCADE;
+DROP TYPE statinfo CASCADE;
+
+DROP FUNCTION lexize(oid, text) ;
+DROP FUNCTION lexize(text, text);
+DROP FUNCTION lexize(text);
+DROP FUNCTION set_curdict(int);
+DROP FUNCTION set_curdict(text);
+DROP FUNCTION dex_init(text);
+DROP FUNCTION dex_lexize(internal,internal,int4);
+DROP FUNCTION snb_en_init(text);
+DROP FUNCTION snb_lexize(internal,internal,int4);
+DROP FUNCTION snb_ru_init(text);
+DROP FUNCTION spell_init(text);
+DROP FUNCTION spell_lexize(internal,internal,int4);
+DROP FUNCTION syn_init(text);
+DROP FUNCTION syn_lexize(internal,internal,int4);
+DROP FUNCTION set_curprs(int);
+DROP FUNCTION set_curprs(text);
+DROP FUNCTION prsd_start(internal,int4);
+DROP FUNCTION prsd_getlexeme(internal,internal,internal);
+DROP FUNCTION prsd_end(internal);
+DROP FUNCTION prsd_lextype(internal);
+DROP FUNCTION prsd_headline(internal,internal,internal);
+DROP FUNCTION set_curcfg(int);
+DROP FUNCTION set_curcfg(text);
+DROP FUNCTION show_curcfg();
+DROP FUNCTION gtsvector_compress(internal);
+DROP FUNCTION gtsvector_decompress(internal);
+DROP FUNCTION gtsvector_penalty(internal,internal,internal);
+DROP FUNCTION gtsvector_picksplit(internal, internal);
+DROP FUNCTION gtsvector_union(bytea, internal);
+DROP FUNCTION reset_tsearch();
+DROP FUNCTION tsearch2() CASCADE;
+
+END;


diff --git a/contrib/tsearch2/wordparser/deflex.c b/contrib/tsearch2/wordparser/deflex.c

new file mode 100644 (file)

index 0000000..ea596c5


--- /dev/null
+++ b/contrib/tsearch2/wordparser/deflex.c
@@ -0,0 +1,56 @@
+#include "deflex.h"
+
+const char *lex_descr[]={
+   "",
+   "Latin word",
+   "Non-latin word",
+   "Word",
+   "Email",
+   "URL",
+   "Host",
+   "Scientific notation",
+   "VERSION",
+   "Part of hyphenated word",
+   "Non-latin part of hyphenated word",
+   "Latin part of hyphenated word",
+   "Space symbols",
+   "HTML Tag",
+   "HTTP head",
+   "Hyphenated word",
+   "Latin hyphenated word",
+   "Non-latin hyphenated word",
+   "URI",
+   "File or path name",
+   "Decimal notation",
+   "Signed integer",
+   "Unsigned integer",
+   "HTML Entity"
+};
+
+const char *tok_alias[]={
+   "",
+   "lword",
+   "nlword",
+   "word",
+   "email",
+   "url",
+   "host",
+   "sfloat",
+   "version",
+   "part_hword",
+   "nlpart_hword",
+   "lpart_hword",
+   "blank",
+   "tag",
+   "http",
+   "hword",
+   "lhword",
+   "nlhword",
+   "uri",
+   "file",
+   "float",
+   "int",
+   "uint",
+   "entity"
+};
+


diff --git a/contrib/tsearch2/wordparser/deflex.h b/contrib/tsearch2/wordparser/deflex.h

new file mode 100644 (file)

index 0000000..651d1f9


--- /dev/null
+++ b/contrib/tsearch2/wordparser/deflex.h
@@ -0,0 +1,34 @@
+#ifndef __DEFLEX_H__
+#define __DEFLEX_H__
+
+/* rememder !!!! */
+#define LASTNUM        23
+
+#define LATWORD        1
+#define CYRWORD        2
+#define UWORD      3
+#define EMAIL      4
+#define FURL       5
+#define HOST       6
+#define SCIENTIFIC 7
+#define VERSIONNUMBER  8
+#define PARTHYPHENWORD 9
+#define CYRPARTHYPHENWORD  10
+#define LATPARTHYPHENWORD  11
+#define SPACE      12
+#define TAG            13
+#define HTTP       14
+#define HYPHENWORD 15
+#define LATHYPHENWORD  16
+#define CYRHYPHENWORD  17
+#define URI        18
+#define FILEPATH   19
+#define DECIMAL        20
+#define SIGNEDINT  21
+#define UNSIGNEDINT 22
+#define HTMLENTITY 23
+
+extern const char *lex_descr[];
+extern const char *tok_alias[];
+
+#endif


diff --git a/contrib/tsearch2/wordparser/parser.h b/contrib/tsearch2/wordparser/parser.h

new file mode 100644 (file)

index 0000000..55cf005


--- /dev/null
+++ b/contrib/tsearch2/wordparser/parser.h
@@ -0,0 +1,11 @@
+#ifndef __PARSER_H__
+#define __PARSER_H__
+
+char      *token;
+int            tokenlen;
+int            tsearch2_yylex(void);
+void       start_parse_str(char *, int);
+void       start_parse_fh(FILE *, int);
+void       end_parse(void);
+
+#endif


diff --git a/contrib/tsearch2/wordparser/parser.l b/contrib/tsearch2/wordparser/parser.l

new file mode 100644 (file)

index 0000000..49824f5


--- /dev/null
+++ b/contrib/tsearch2/wordparser/parser.l
@@ -0,0 +1,346 @@
+%{
+#include "postgres.h"
+
+#include "deflex.h"
+#include "parser.h"
+#include "common.h"
+
+/* Avoid exit() on fatal scanner errors */
+#define fprintf(file, fmt, msg)  ts_error(ERROR, fmt, msg)
+
+/* postgres allocation function */
+#define free    pfree
+#define malloc  palloc
+#define realloc repalloc
+
+#ifdef strdup
+#undef strdup
+#endif
+#define strdup  pstrdup
+
+char *token = NULL;  /* pointer to token */
+char *s     = NULL;  /* to return WHOLE hyphenated-word */
+
+YY_BUFFER_STATE buf = NULL; /* buffer to parse; it need for parse from string */
+
+int lrlimit = -1;  /* for limiting read from filehandle ( -1 - unlimited read ) */
+int bytestoread = 0;   /* for limiting read from filehandle */
+
+/* redefine macro for read limited length */
+#define YY_INPUT(buf,result,max_size) \
+   if ( yy_current_buffer->yy_is_interactive ) { \
+                int c = '*', n; \
+                for ( n = 0; n < max_size && \
+                             (c = getc( tsearch2_yyin )) != EOF && c != '\n'; ++n ) \
+                        buf[n] = (char) c; \
+                if ( c == '\n' ) \
+                        buf[n++] = (char) c; \
+                if ( c == EOF && ferror( tsearch2_yyin ) ) \
+                        YY_FATAL_ERROR( "input in flex scanner failed" ); \
+                result = n; \
+        }  else { \
+       if ( lrlimit == 0 ) \
+           result=YY_NULL; \
+       else { \
+           if ( lrlimit>0 ) { \
+               bytestoread = ( lrlimit > max_size ) ? max_size : lrlimit; \
+               lrlimit -= bytestoread; \
+           } else \
+               bytestoread = max_size; \
+               if ( ((result = fread( buf, 1, bytestoread, tsearch2_yyin )) == 0) \
+                       && ferror( tsearch2_yyin ) ) \
+                       YY_FATAL_ERROR( "input in flex scanner failed" ); \
+       } \
+   }
+
+%}
+
+%option 8bit
+%option never-interactive
+%option nounput
+%option noyywrap
+
+/* parser's state for parsing hyphenated-word */
+%x DELIM  
+/* parser's state for parsing URL*/
+%x URL  
+%x SERVER  
+
+/* parser's state for parsing TAGS */
+%x INTAG
+%x QINTAG
+%x INCOMMENT
+%x INSCRIPT
+
+/* cyrillic koi8 char */
+CYRALNUM   [0-9\200-\377]
+CYRALPHA   [\200-\377]
+ALPHA      [a-zA-Z\200-\377]
+ALNUM      [0-9a-zA-Z\200-\377]
+
+
+HOSTNAME   ([-_[:alnum:]]+\.)+[[:alpha:]]+
+URI        [-_[:alnum:]/%,\.;=&?#]+
+
+%%
+
+"<"[Ss][Cc][Rr][Ii][Pp][Tt] { BEGIN INSCRIPT; }
+
+"" {
+   BEGIN INITIAL; 
+   *tsearch2_yytext=' '; *(tsearch2_yytext+1) = '\0'; 
+   token = tsearch2_yytext;
+   tokenlen = tsearch2_yyleng;
+   return SPACE;
+}
+
+""   { 
+   BEGIN INITIAL;
+   *tsearch2_yytext=' '; *(tsearch2_yytext+1) = '\0'; 
+   token = tsearch2_yytext;
+   tokenlen = tsearch2_yyleng;
+   return SPACE;
+}
+
+
+"<"[\![:alpha:]]   { BEGIN INTAG; }
+
+"
+
+"\""    { BEGIN QINTAG; }
+
+"\\\"" ;
+
+"\""   { BEGIN INTAG; }
+
+">" { 
+   BEGIN INITIAL;
+   token = tsearch2_yytext;
+   *tsearch2_yytext=' '; 
+   token = tsearch2_yytext;
+   tokenlen = 1;
+   return TAG;
+}
+
+.|\n  ;
+
+\&(quot|amp|nbsp|lt|gt)\;   {
+   token = tsearch2_yytext;
+   tokenlen = tsearch2_yyleng;
+   return HTMLENTITY;
+}
+
+\&\#[0-9][0-9]?[0-9]?\; {
+   token = tsearch2_yytext;
+   tokenlen = tsearch2_yyleng;
+   return HTMLENTITY;
+}
+ 
+[-_\.[:alnum:]]+@{HOSTNAME}  /* Emails */ { 
+   token = tsearch2_yytext; 
+   tokenlen = tsearch2_yyleng;
+   return EMAIL; 
+}
+
+[+-]?[0-9]+(\.[0-9]+)?[eEdD][+-]?[0-9]+  /* float */   { 
+   token = tsearch2_yytext; 
+   tokenlen = tsearch2_yyleng;
+   return SCIENTIFIC; 
+}
+
+[0-9]+\.[0-9]+\.[0-9\.]*[0-9] {
+   token = tsearch2_yytext;
+   tokenlen = tsearch2_yyleng;
+   return VERSIONNUMBER;
+}
+
+[+-]?[0-9]+\.[0-9]+ {
+   token = tsearch2_yytext;
+   tokenlen = tsearch2_yyleng;
+   return DECIMAL;
+}
+
+[+-][0-9]+ { 
+   token = tsearch2_yytext; 
+   tokenlen = tsearch2_yyleng;
+   return SIGNEDINT; 
+}
+
+[0-9]+ { 
+   token = tsearch2_yytext; 
+   tokenlen = tsearch2_yyleng;
+   return UNSIGNEDINT; 
+}
+
+http"://"        { 
+   BEGIN URL; 
+   token = tsearch2_yytext;
+   tokenlen = tsearch2_yyleng;
+   return HTTP;
+}
+
+ftp"://"        { 
+   BEGIN URL; 
+   token = tsearch2_yytext;
+   tokenlen = tsearch2_yyleng;
+   return HTTP;
+}
+
+{HOSTNAME}[/:]{URI} { 
+   BEGIN SERVER;
+   if (s) { free(s); s=NULL; } 
+   s = strdup( tsearch2_yytext ); 
+   tokenlen = tsearch2_yyleng;
+   yyless( 0 ); 
+   token = s;
+   return FURL;
+}
+
+{HOSTNAME} {
+   token = tsearch2_yytext; 
+   tokenlen = tsearch2_yyleng;
+   return HOST;
+}
+
+[/:]{URI}  {
+   token = tsearch2_yytext;
+   tokenlen = tsearch2_yyleng;
+   return URI;
+}
+
+[[:alnum:]\./_-]+"/"[[:alnum:]\./_-]+ {
+   token = tsearch2_yytext;
+   tokenlen = tsearch2_yyleng;
+   return FILEPATH;
+}
+
+({CYRALPHA}+-)+{CYRALPHA}+ /* composite-word */    {
+   BEGIN DELIM;
+   if (s) { free(s); s=NULL; } 
+   s = strdup( tsearch2_yytext );
+   tokenlen = tsearch2_yyleng;
+   yyless( 0 );
+   token = s;
+   return CYRHYPHENWORD;
+}
+
+([[:alpha:]]+-)+[[:alpha:]]+ /* composite-word */  {
+    BEGIN DELIM;
+   if (s) { free(s); s=NULL; } 
+   s = strdup( tsearch2_yytext );
+   tokenlen = tsearch2_yyleng;
+   yyless( 0 );
+   token = s;
+   return LATHYPHENWORD;
+}
+
+({ALNUM}+-)+{ALNUM}+ /* composite-word */  {
+   BEGIN DELIM;
+   if (s) { free(s); s=NULL; } 
+   s = strdup( tsearch2_yytext );
+   tokenlen = tsearch2_yyleng;
+   yyless( 0 );
+   token = s;
+   return HYPHENWORD;
+}
+
+[0-9]+\.[0-9]+\.[0-9\.]*[0-9] {
+   token = tsearch2_yytext;
+   tokenlen = tsearch2_yyleng;
+   return VERSIONNUMBER;
+}
+
+\+?[0-9]+\.[0-9]+ {
+   token = tsearch2_yytext;
+   tokenlen = tsearch2_yyleng;
+   return DECIMAL;
+}
+
+{CYRALPHA}+  /* one word in composite-word */   { 
+   token = tsearch2_yytext; 
+   tokenlen = tsearch2_yyleng;
+   return CYRPARTHYPHENWORD; 
+}
+
+[[:alpha:]]+  /* one word in composite-word */  { 
+   token = tsearch2_yytext; 
+   tokenlen = tsearch2_yyleng;
+   return LATPARTHYPHENWORD; 
+}
+
+{ALNUM}+  /* one word in composite-word */  { 
+   token = tsearch2_yytext; 
+   tokenlen = tsearch2_yyleng;
+   return PARTHYPHENWORD; 
+}
+
+-  { 
+   token = tsearch2_yytext;
+   tokenlen = tsearch2_yyleng;
+   return SPACE;
+}
+
+.|\n /* return in basic state */ {
+   BEGIN INITIAL;
+   yyless( 0 );
+}
+
+{CYRALPHA}+ /* normal word */  { 
+   token = tsearch2_yytext; 
+   tokenlen = tsearch2_yyleng;
+   return CYRWORD; 
+}
+
+[[:alpha:]]+ /* normal word */ { 
+   token = tsearch2_yytext; 
+   tokenlen = tsearch2_yyleng;
+   return LATWORD; 
+}
+
+{ALNUM}+ /* normal word */ { 
+   token = tsearch2_yytext; 
+   tokenlen = tsearch2_yyleng;
+   return UWORD; 
+}
+
+[ \r\n\t]+ {
+   token = tsearch2_yytext;
+   tokenlen = tsearch2_yyleng;
+   return SPACE;
+}
+
+. {
+   token = tsearch2_yytext;
+   tokenlen = tsearch2_yyleng;
+   return SPACE;
+} 
+
+%%
+
+/* clearing after parsing from string */
+void end_parse() {
+   if (s) { free(s); s=NULL; } 
+   tsearch2_yy_delete_buffer( buf );
+   buf = NULL;
+} 
+
+/* start parse from string */
+void start_parse_str(char* str, int limit) {
+   if (buf) end_parse();
+   buf = tsearch2_yy_scan_bytes( str, limit );
+   tsearch2_yy_switch_to_buffer( buf );
+   BEGIN INITIAL;
+}
+
+/* start parse from filehandle */
+void start_parse_fh( FILE* fh, int limit ) {
+   if (buf) end_parse();
+   lrlimit = ( limit ) ? limit : -1;
+   buf = tsearch2_yy_create_buffer( fh, YY_BUF_SIZE );
+   tsearch2_yy_switch_to_buffer( buf );
+   BEGIN INITIAL;
+}
+
+


diff --git a/contrib/tsearch2/wparser.c b/contrib/tsearch2/wparser.c

new file mode 100644 (file)

index 0000000..deff94c


--- /dev/null
+++ b/contrib/tsearch2/wparser.c
@@ -0,0 +1,529 @@
+/* 
+ * interface functions to parser 
+ * Teodor Sigaev 
+ */
+#include 
+#include 
+#include 
+#include 
+
+#include "postgres.h"
+#include "fmgr.h"
+#include "utils/array.h"
+#include "catalog/pg_type.h"
+#include "executor/spi.h"
+#include "funcapi.h"
+
+#include "wparser.h"
+#include "ts_cfg.h"
+#include "snmap.h"
+#include "common.h"
+
+/*********top interface**********/
+
+static void *plan_getparser=NULL;
+static Oid current_parser_id=InvalidOid;
+
+void
+init_prs(Oid id, WParserInfo *prs) {
+   Oid arg[1]={ OIDOID };
+   bool isnull;
+   Datum pars[1]={ ObjectIdGetDatum(id) };
+   int stat;
+
+   memset(prs,0,sizeof(WParserInfo));
+   SPI_connect();
+   if ( !plan_getparser ) {
+       plan_getparser = SPI_saveplan( SPI_prepare( "select prs_start, prs_nexttoken, prs_end, prs_lextype, prs_headline from pg_ts_parser where oid = $1" , 1, arg ) );
+       if ( !plan_getparser ) 
+           ts_error(ERROR, "SPI_prepare() failed");
+   }
+
+   stat = SPI_execp(plan_getparser, pars, " ", 1);
+   if ( stat < 0 )
+       ts_error (ERROR, "SPI_execp return %d", stat);
+   if ( SPI_processed > 0 ) {
+       Oid oid=InvalidOid;
+       oid=DatumGetObjectId( SPI_getbinval(SPI_tuptable->vals[0], SPI_tuptable->tupdesc, 1, &isnull) );
+       fmgr_info_cxt(oid, &(prs->start_info), TopMemoryContext);
+       oid=DatumGetObjectId( SPI_getbinval(SPI_tuptable->vals[0], SPI_tuptable->tupdesc, 2, &isnull) );
+       fmgr_info_cxt(oid, &(prs->getlexeme_info), TopMemoryContext);
+       oid=DatumGetObjectId( SPI_getbinval(SPI_tuptable->vals[0], SPI_tuptable->tupdesc, 3, &isnull) );
+       fmgr_info_cxt(oid, &(prs->end_info), TopMemoryContext);
+       prs->lextype=DatumGetObjectId( SPI_getbinval(SPI_tuptable->vals[0], SPI_tuptable->tupdesc, 4, &isnull) );
+       oid=DatumGetObjectId( SPI_getbinval(SPI_tuptable->vals[0], SPI_tuptable->tupdesc, 5, &isnull) );
+       fmgr_info_cxt(oid, &(prs->headline_info), TopMemoryContext);
+       prs->prs_id=id;
+   } else 
+       ts_error(ERROR, "No parser with id %d", id);
+   SPI_finish();
+}
+
+typedef struct {
+   WParserInfo *last_prs;
+   int     len;
+   int     reallen;
+   WParserInfo *list;
+   SNMap       name2id_map;
+} PrsList;
+
+static PrsList PList = {NULL,0,0,NULL,{0,0,NULL}};
+
+void    
+reset_prs(void) {
+   freeSNMap( &(PList.name2id_map) );
+   if ( PList.list )
+       free(PList.list);
+   memset(&PList,0,sizeof(PrsList));
+}
+
+static int
+compareprs(const void *a, const void *b) {
+   return ((WParserInfo*)a)->prs_id - ((WParserInfo*)b)->prs_id;
+}
+
+WParserInfo *
+findprs(Oid id) {
+   /* last used prs */
+   if ( PList.last_prs && PList.last_prs->prs_id==id )
+       return PList.last_prs;
+
+   /* already used prs */
+   if ( PList.len != 0 ) {
+       WParserInfo key;
+       key.prs_id=id;
+       PList.last_prs = bsearch(&key, PList.list, PList.len, sizeof(WParserInfo), compareprs);
+       if ( PList.last_prs != NULL )
+           return PList.last_prs;
+   }
+
+   /* last chance */
+   if ( PList.len==PList.reallen ) {
+       WParserInfo *tmp;
+       int reallen = ( PList.reallen ) ? 2*PList.reallen : 16;
+       tmp=(WParserInfo*)realloc(PList.list,sizeof(WParserInfo)*reallen);
+       if ( !tmp ) 
+           ts_error(ERROR,"No memory");
+       PList.reallen=reallen;
+       PList.list=tmp;
+   }
+   PList.last_prs=&(PList.list[PList.len]);
+   init_prs(id, PList.last_prs);
+   PList.len++;
+   qsort(PList.list, PList.len, sizeof(WParserInfo), compareprs);
+   return findprs(id); /* qsort changed order!! */;
+}
+
+static void *plan_name2id=NULL;
+
+Oid
+name2id_prs(text *name) {
+   Oid arg[1]={ TEXTOID };
+   bool isnull;
+   Datum pars[1]={ PointerGetDatum(name) };
+   int stat;
+   Oid id=findSNMap_t( &(PList.name2id_map), name );
+   
+   if ( id ) 
+       return id;
+   
+
+   SPI_connect();
+   if ( !plan_name2id ) {
+       plan_name2id = SPI_saveplan( SPI_prepare( "select oid from pg_ts_parser where prs_name = $1" , 1, arg ) );
+       if ( !plan_name2id ) 
+           ts_error(ERROR, "SPI_prepare() failed");
+   }
+
+   stat = SPI_execp(plan_name2id, pars, " ", 1);
+   if ( stat < 0 )
+       ts_error (ERROR, "SPI_execp return %d", stat);
+   if ( SPI_processed > 0 )
+       id=DatumGetObjectId( SPI_getbinval(SPI_tuptable->vals[0], SPI_tuptable->tupdesc, 1, &isnull) );
+   else 
+       ts_error(ERROR, "No parser '%s'", text2char(name));
+   SPI_finish();
+   addSNMap_t( &(PList.name2id_map), name, id );
+   return id;
+}
+
+
+/******sql-level interface******/
+typedef struct {
+   int     cur;
+   LexDescr    *list;
+} TypeStorage;
+
+static void
+setup_firstcall(FuncCallContext  *funcctx, Oid prsid) {
+   TupleDesc            tupdesc;
+   MemoryContext     oldcontext;
+   TypeStorage     *st;
+   WParserInfo *prs = findprs(prsid); 
+
+   oldcontext = MemoryContextSwitchTo(funcctx->multi_call_memory_ctx);
+
+   st=(TypeStorage*)palloc( sizeof(TypeStorage) );
+   st->cur=0;
+   st->list = (LexDescr*)DatumGetPointer(
+       OidFunctionCall1( prs->lextype, PointerGetDatum(prs->prs) )
+   );
+   funcctx->user_fctx = (void*)st;
+   tupdesc = RelationNameGetTupleDesc("tokentype");
+   funcctx->slot = TupleDescGetSlot(tupdesc);
+   funcctx->attinmeta = TupleDescGetAttInMetadata(tupdesc);
+   MemoryContextSwitchTo(oldcontext);
+}
+
+static Datum
+process_call(FuncCallContext  *funcctx) {
+   TypeStorage     *st;
+
+   st=(TypeStorage*)funcctx->user_fctx;
+   if (  st->list && st->list[st->cur].lexid ) {
+       Datum result;
+       char* values[3];
+       char    txtid[16];
+       HeapTuple    tuple;
+
+       values[0]=txtid;
+       sprintf(txtid,"%d",st->list[st->cur].lexid);
+       values[1]=st->list[st->cur].alias;
+       values[2]=st->list[st->cur].descr;
+
+       tuple = BuildTupleFromCStrings(funcctx->attinmeta, values);
+       result = TupleGetDatum(funcctx->slot, tuple);
+
+       pfree(values[1]);
+       pfree(values[2]);
+       st->cur++;
+       return result;
+   } else {
+       if ( st->list ) pfree(st->list);
+       pfree(st);
+   }
+   return (Datum)0;
+}
+
+PG_FUNCTION_INFO_V1(token_type);
+Datum token_type(PG_FUNCTION_ARGS);
+
+Datum
+token_type(PG_FUNCTION_ARGS) {
+   FuncCallContext  *funcctx;
+   Datum result;
+
+   if (SRF_IS_FIRSTCALL()) { 
+       funcctx = SRF_FIRSTCALL_INIT();
+       setup_firstcall(funcctx, PG_GETARG_OID(0) );
+   }
+
+   funcctx = SRF_PERCALL_SETUP();
+
+   if (  (result=process_call(funcctx)) != (Datum)0 )
+       SRF_RETURN_NEXT(funcctx, result);
+   SRF_RETURN_DONE(funcctx);
+}
+
+PG_FUNCTION_INFO_V1(token_type_byname);
+Datum token_type_byname(PG_FUNCTION_ARGS);
+Datum
+token_type_byname(PG_FUNCTION_ARGS) {
+   FuncCallContext  *funcctx;
+   Datum result;
+
+   if (SRF_IS_FIRSTCALL()) {
+       text *name = PG_GETARG_TEXT_P(0); 
+       funcctx = SRF_FIRSTCALL_INIT();
+       setup_firstcall(funcctx, name2id_prs( name ) );
+       PG_FREE_IF_COPY(name,0);
+   }
+
+   funcctx = SRF_PERCALL_SETUP();
+
+   if (  (result=process_call(funcctx)) != (Datum)0 )
+       SRF_RETURN_NEXT(funcctx, result);
+   SRF_RETURN_DONE(funcctx);
+}
+
+PG_FUNCTION_INFO_V1(token_type_current);
+Datum token_type_current(PG_FUNCTION_ARGS);
+Datum
+token_type_current(PG_FUNCTION_ARGS) {
+   FuncCallContext  *funcctx;
+   Datum result;
+
+   if (SRF_IS_FIRSTCALL()) {
+       funcctx = SRF_FIRSTCALL_INIT();
+       if ( current_parser_id==InvalidOid ) 
+           current_parser_id = name2id_prs( char2text("default") );
+       setup_firstcall(funcctx, current_parser_id );
+   }
+
+   funcctx = SRF_PERCALL_SETUP();
+
+   if (  (result=process_call(funcctx)) != (Datum)0 )
+       SRF_RETURN_NEXT(funcctx, result);
+   SRF_RETURN_DONE(funcctx);
+}
+
+
+PG_FUNCTION_INFO_V1(set_curprs);
+Datum set_curprs(PG_FUNCTION_ARGS);
+Datum
+set_curprs(PG_FUNCTION_ARGS) {
+        findprs(PG_GETARG_OID(0));
+        current_parser_id=PG_GETARG_OID(0);
+        PG_RETURN_VOID();
+}
+
+PG_FUNCTION_INFO_V1(set_curprs_byname);
+Datum set_curprs_byname(PG_FUNCTION_ARGS);
+Datum
+set_curprs_byname(PG_FUNCTION_ARGS) {
+        text *name=PG_GETARG_TEXT_P(0);
+    
+        DirectFunctionCall1(
+                set_curprs,
+                ObjectIdGetDatum( name2id_prs(name) )
+        );
+        PG_FREE_IF_COPY(name, 0);
+        PG_RETURN_VOID();
+}
+
+typedef struct {
+   int type;
+   char    *lexem;
+} LexemEntry;
+
+typedef struct {
+   int cur;
+   int len;
+   LexemEntry  *list;
+} PrsStorage;
+   
+
+static void
+prs_setup_firstcall(FuncCallContext  *funcctx, int prsid, text *txt) {
+   TupleDesc            tupdesc;
+   MemoryContext     oldcontext;
+   PrsStorage  *st;
+   WParserInfo *prs = findprs(prsid); 
+   char    *lex=NULL;
+   int     llen=0, type=0; 
+
+   oldcontext = MemoryContextSwitchTo(funcctx->multi_call_memory_ctx);
+
+   st=(PrsStorage*)palloc( sizeof(PrsStorage) );
+   st->cur=0;
+   st->len=16;
+   st->list=(LexemEntry*)palloc( sizeof(LexemEntry)*st->len );
+
+   prs->prs = (void*)DatumGetPointer(
+       FunctionCall2(
+           &(prs->start_info),
+           PointerGetDatum(VARDATA(txt)),
+           Int32GetDatum(VARSIZE(txt)-VARHDRSZ)
+       )
+   );
+
+   while( ( type=DatumGetInt32(FunctionCall3(
+           &(prs->getlexeme_info),
+           PointerGetDatum(prs->prs),
+           PointerGetDatum(&lex),
+           PointerGetDatum(&llen))) ) != 0 ) {
+
+       if ( st->cur>=st->len ) {
+           st->len=2*st->len;
+           st->list=(LexemEntry*)repalloc(st->list, sizeof(LexemEntry)*st->len);
+       }
+       st->list[st->cur].lexem = palloc(llen+1);
+       memcpy( st->list[st->cur].lexem, lex, llen);
+       st->list[st->cur].lexem[llen]='\0';
+       st->list[st->cur].type=type;
+       st->cur++;
+   }
+       
+   FunctionCall1(
+       &(prs->end_info),
+       PointerGetDatum(prs->prs)
+   );
+
+   st->len=st->cur;
+   st->cur=0;
+   
+   funcctx->user_fctx = (void*)st;
+   tupdesc = RelationNameGetTupleDesc("tokenout");
+   funcctx->slot = TupleDescGetSlot(tupdesc);
+   funcctx->attinmeta = TupleDescGetAttInMetadata(tupdesc);
+   MemoryContextSwitchTo(oldcontext);
+}
+
+static Datum
+prs_process_call(FuncCallContext  *funcctx) {
+   PrsStorage  *st;
+
+   st=(PrsStorage*)funcctx->user_fctx;
+   if (  st->cur < st->len ) {
+       Datum result;
+       char* values[2];
+       char    tid[16];
+       HeapTuple    tuple;
+
+       values[0]=tid;
+       sprintf(tid,"%d",st->list[st->cur].type);
+       values[1]=st->list[st->cur].lexem;
+       tuple = BuildTupleFromCStrings(funcctx->attinmeta, values);
+       result = TupleGetDatum(funcctx->slot, tuple);
+
+       pfree(values[1]);
+       st->cur++;
+       return result;
+   } else {
+       if ( st->list ) pfree(st->list);
+       pfree(st);
+   }
+   return (Datum)0;
+}
+
+           
+
+PG_FUNCTION_INFO_V1(parse);
+Datum parse(PG_FUNCTION_ARGS);
+Datum
+parse(PG_FUNCTION_ARGS) {
+   FuncCallContext  *funcctx;
+   Datum result;
+
+   if (SRF_IS_FIRSTCALL()) {
+       text *txt = PG_GETARG_TEXT_P(1); 
+       funcctx = SRF_FIRSTCALL_INIT();
+       prs_setup_firstcall(funcctx, PG_GETARG_OID(0),txt );
+       PG_FREE_IF_COPY(txt,1);
+   }
+
+   funcctx = SRF_PERCALL_SETUP();
+
+   if (  (result=prs_process_call(funcctx)) != (Datum)0 )
+       SRF_RETURN_NEXT(funcctx, result);
+   SRF_RETURN_DONE(funcctx);
+}
+
+PG_FUNCTION_INFO_V1(parse_byname);
+Datum parse_byname(PG_FUNCTION_ARGS);
+Datum
+parse_byname(PG_FUNCTION_ARGS) {
+   FuncCallContext  *funcctx;
+   Datum result;
+
+   if (SRF_IS_FIRSTCALL()) {
+       text *name = PG_GETARG_TEXT_P(0); 
+       text *txt = PG_GETARG_TEXT_P(1); 
+       funcctx = SRF_FIRSTCALL_INIT();
+       prs_setup_firstcall(funcctx, name2id_prs( name ),txt );
+       PG_FREE_IF_COPY(name,0);
+       PG_FREE_IF_COPY(txt,1);
+   }
+
+   funcctx = SRF_PERCALL_SETUP();
+
+   if (  (result=prs_process_call(funcctx)) != (Datum)0 )
+       SRF_RETURN_NEXT(funcctx, result);
+   SRF_RETURN_DONE(funcctx);
+}
+
+
+PG_FUNCTION_INFO_V1(parse_current);
+Datum parse_current(PG_FUNCTION_ARGS);
+Datum
+parse_current(PG_FUNCTION_ARGS) {
+   FuncCallContext  *funcctx;
+   Datum result;
+
+   if (SRF_IS_FIRSTCALL()) {
+       text *txt = PG_GETARG_TEXT_P(0); 
+       funcctx = SRF_FIRSTCALL_INIT();
+       if ( current_parser_id==InvalidOid ) 
+           current_parser_id = name2id_prs( char2text("default") );
+       prs_setup_firstcall(funcctx, current_parser_id,txt );
+       PG_FREE_IF_COPY(txt,0);
+   }
+
+   funcctx = SRF_PERCALL_SETUP();
+
+   if (  (result=prs_process_call(funcctx)) != (Datum)0 )
+       SRF_RETURN_NEXT(funcctx, result);
+   SRF_RETURN_DONE(funcctx);
+}
+
+PG_FUNCTION_INFO_V1(headline);
+Datum headline(PG_FUNCTION_ARGS);
+Datum
+headline(PG_FUNCTION_ARGS) {
+   TSCfgInfo *cfg=findcfg(PG_GETARG_OID(0));
+   text       *in = PG_GETARG_TEXT_P(1);
+   QUERYTYPE  *query = (QUERYTYPE *) DatumGetPointer(PG_DETOAST_DATUM(PG_GETARG_DATUM(2)));
+   text       *opt=( PG_NARGS()>3 && PG_GETARG_POINTER(3) ) ? PG_GETARG_TEXT_P(3) : NULL;
+   HLPRSTEXT   prs;
+   text *out;
+   WParserInfo *prsobj = findprs(cfg->prs_id);
+
+   memset(&prs,0,sizeof(HLPRSTEXT));
+   prs.lenwords = 32;
+   prs.words = (HLWORD *) palloc(sizeof(HLWORD) * prs.lenwords);
+   hlparsetext(cfg, &prs, query, VARDATA(in), VARSIZE(in) - VARHDRSZ);
+
+
+   FunctionCall3(
+       &(prsobj->headline_info),
+       PointerGetDatum(&prs),
+       PointerGetDatum(opt),
+       PointerGetDatum(query)
+   );
+
+   out = genhl(&prs);
+
+   PG_FREE_IF_COPY(in,1);
+   PG_FREE_IF_COPY(query,2);
+   if ( opt ) PG_FREE_IF_COPY(opt,3);
+   pfree(prs.words);
+   pfree(prs.startsel);
+   pfree(prs.stopsel);
+
+   PG_RETURN_POINTER(out);
+}
+
+
+PG_FUNCTION_INFO_V1(headline_byname);
+Datum headline_byname(PG_FUNCTION_ARGS);
+Datum
+headline_byname(PG_FUNCTION_ARGS) {
+   text *cfg=PG_GETARG_TEXT_P(0);
+
+   Datum out=DirectFunctionCall4(
+       headline,
+       ObjectIdGetDatum(name2id_cfg( cfg ) ),
+       PG_GETARG_DATUM(1),
+       PG_GETARG_DATUM(2),
+       ( PG_NARGS()>3 ) ? PG_GETARG_DATUM(3) : PointerGetDatum(NULL)
+   );
+
+   PG_FREE_IF_COPY(cfg,0);
+   PG_RETURN_DATUM(out);   
+}
+
+PG_FUNCTION_INFO_V1(headline_current);
+Datum headline_current(PG_FUNCTION_ARGS);
+Datum
+headline_current(PG_FUNCTION_ARGS) {
+   PG_RETURN_DATUM(DirectFunctionCall4(
+       headline,
+       ObjectIdGetDatum(get_currcfg()),
+       PG_GETARG_DATUM(0),
+       PG_GETARG_DATUM(1),
+       ( PG_NARGS()>2 ) ? PG_GETARG_DATUM(2) : PointerGetDatum(NULL)
+   ));
+}
+
+
+


diff --git a/contrib/tsearch2/wparser.h b/contrib/tsearch2/wparser.h

new file mode 100644 (file)

index 0000000..a8afc56


--- /dev/null
+++ b/contrib/tsearch2/wparser.h
@@ -0,0 +1,28 @@
+#ifndef __WPARSER_H__
+#define __WPARSER_H__
+#include "postgres.h"
+#include "fmgr.h"
+
+typedef struct {
+   Oid prs_id;
+   FmgrInfo start_info;
+   FmgrInfo getlexeme_info;
+   FmgrInfo end_info;
+   FmgrInfo headline_info;
+   Oid lextype;
+   void *prs;
+} WParserInfo;
+
+void init_prs(Oid id, WParserInfo *prs);
+WParserInfo* findprs(Oid id);
+Oid name2id_prs(text *name);
+void   reset_prs(void);
+
+
+typedef struct {
+   int lexid;
+   char    *alias;
+   char    *descr;
+} LexDescr;
+
+#endif


diff --git a/contrib/tsearch2/wparser_def.c b/contrib/tsearch2/wparser_def.c

new file mode 100644 (file)

index 0000000..eec8b03


--- /dev/null
+++ b/contrib/tsearch2/wparser_def.c
@@ -0,0 +1,291 @@
+/* 
+ * default word parser 
+ * Teodor Sigaev 
+ */
+#include 
+#include 
+#include 
+
+#include "postgres.h"
+#include "utils/builtins.h"
+
+#include "dict.h"
+#include "wparser.h"
+#include "common.h"
+#include "ts_cfg.h"
+#include "wordparser/parser.h"
+#include "wordparser/deflex.h"
+
+PG_FUNCTION_INFO_V1(prsd_lextype);
+Datum prsd_lextype(PG_FUNCTION_ARGS);
+
+Datum 
+prsd_lextype(PG_FUNCTION_ARGS) {
+   LexDescr *descr=(LexDescr*)palloc(sizeof(LexDescr)*(LASTNUM+1));
+   int i;
+
+   for(i=1;i<=LASTNUM;i++) {
+       descr[i-1].lexid = i;
+       descr[i-1].alias = pstrdup(tok_alias[i]);
+       descr[i-1].descr = pstrdup(lex_descr[i]);
+   }
+   
+   descr[LASTNUM].lexid=0;
+       
+   PG_RETURN_POINTER(descr);
+}
+
+PG_FUNCTION_INFO_V1(prsd_start);
+Datum prsd_start(PG_FUNCTION_ARGS);
+Datum 
+prsd_start(PG_FUNCTION_ARGS) {
+   start_parse_str( (char*)PG_GETARG_POINTER(0), PG_GETARG_INT32(1) );
+   PG_RETURN_POINTER(NULL);
+}
+
+PG_FUNCTION_INFO_V1(prsd_getlexeme);
+Datum prsd_getlexeme(PG_FUNCTION_ARGS);
+Datum 
+prsd_getlexeme(PG_FUNCTION_ARGS) {
+   /* ParserState *p=(ParserState*)PG_GETARG_POINTER(0); */
+   char **t=(char**)PG_GETARG_POINTER(1); 
+   int *tlen=(int*)PG_GETARG_POINTER(2);
+   int  type=tsearch2_yylex();
+
+   *t = token;
+   *tlen = tokenlen;
+   PG_RETURN_INT32(type);
+}
+
+PG_FUNCTION_INFO_V1(prsd_end);
+Datum prsd_end(PG_FUNCTION_ARGS);
+Datum 
+prsd_end(PG_FUNCTION_ARGS) {
+   /* ParserState *p=(ParserState*)PG_GETARG_POINTER(0); */
+   end_parse();
+   PG_RETURN_VOID();
+}
+
+#define LEAVETOKEN(x)  ( (x)==12 )
+#define COMPLEXTOKEN(x)    ( (x)==5 || (x)==15 || (x)==16 || (x)==17 )
+#define ENDPUNCTOKEN(x)    ( (x)==12 )
+
+
+#define IDIGNORE(x) ( (x)==13 || (x)==14 || (x)==12 || (x)==23 )
+#define HLIDIGNORE(x) ( (x)==5 || (x)==13 || (x)==15 || (x)==16 || (x)==17 )
+#define NONWORDTOKEN(x)    ( (x)==12 || HLIDIGNORE(x) )
+#define NOENDTOKEN(x)  ( NONWORDTOKEN(x) || (x)==7 || (x)==8 || (x)==20 || (x)==21 || (x)==22 || IDIGNORE(x) )
+
+typedef struct {
+   HLWORD  *words;
+   int len;
+} hlCheck;
+
+static bool
+checkcondition_HL(void *checkval, ITEM *val) {
+   int i;
+   for(i=0;i<((hlCheck*)checkval)->len;i++) {
+       if ( ((hlCheck*)checkval)->words[i].item==val )
+           return true;
+   }
+   return false;
+}
+
+
+static bool
+hlCover(HLPRSTEXT *prs, QUERYTYPE *query, int *p, int *q) {
+   int i,j;
+   ITEM    *item=GETQUERY(query);
+   int pos=*p;
+   *q=0;
+   *p=0x7fffffff;
+
+   for(j=0;jsize;j++) {
+       if ( item->type != VAL ) {
+           item++;
+           continue;
+       }
+       for(i=pos;icurwords;i++) {
+           if ( prs->words[i].item == item ) {
+               if ( i>*q) 
+                   *q = i;
+               break;
+           }
+       }
+       item++;
+   }
+
+   if ( *q==0 )
+       return false;
+
+   item=GETQUERY(query);
+   for(j=0;jsize;j++) {
+       if ( item->type != VAL ) {
+           item++;
+           continue;
+       }
+       for(i=*q;i>=pos;i--) {
+           if ( prs->words[i].item == item ) {
+               if ( i<*p )
+                   *p=i;
+               break;
+           }
+       }
+       item++;
+   }   
+
+   if ( *p<=*q ) {
+       hlCheck ch={ &(prs->words[*p]), *q-*p+1 };
+       if ( TS_execute(GETQUERY(query), &ch, false, checkcondition_HL) ) { 
+           return true;
+       } else {
+           (*p)++;
+           return hlCover(prs,query,p,q);
+       }
+   }
+
+   return false;
+}
+
+PG_FUNCTION_INFO_V1(prsd_headline);
+Datum prsd_headline(PG_FUNCTION_ARGS);
+Datum 
+prsd_headline(PG_FUNCTION_ARGS) {
+   HLPRSTEXT   *prs=(HLPRSTEXT*)PG_GETARG_POINTER(0);
+   text    *opt=(text*)PG_GETARG_POINTER(1); /* can't be toasted */
+   QUERYTYPE   *query=(QUERYTYPE*)PG_GETARG_POINTER(2); /* can't be toasted */
+   /* from opt + start and and tag */
+   int min_words=15;   
+   int max_words=35;   
+   int shortword=3;    
+
+   int p=0,q=0;
+   int bestb=-1,beste=-1;
+   int bestlen=-1;
+   int pose=0, poslen, curlen;
+
+   int i;
+
+   /*config*/
+   prs->startsel=NULL;
+   prs->stopsel=NULL;
+   if ( opt ) {
+       Map *map,*mptr;
+       
+       parse_cfgdict(opt,&map);
+       mptr=map;
+
+       while(mptr && mptr->key) {
+           if ( strcasecmp(mptr->key,"MaxWords")==0 )
+               max_words=pg_atoi(mptr->value,4,1);
+           else if ( strcasecmp(mptr->key,"MinWords")==0 )
+               min_words=pg_atoi(mptr->value,4,1);
+           else if ( strcasecmp(mptr->key,"ShortWord")==0 )
+               shortword=pg_atoi(mptr->value,4,1);
+           else if ( strcasecmp(mptr->key,"StartSel")==0 )
+               prs->startsel=pstrdup(mptr->value);
+           else if ( strcasecmp(mptr->key,"StopSel")==0 )
+               prs->stopsel=pstrdup(mptr->value);
+               
+           pfree(mptr->key);
+           pfree(mptr->value);
+
+           mptr++;
+       }
+       pfree(map);
+
+       if ( min_words >= max_words )
+           elog(ERROR,"Must be MinWords < MaxWords");
+       if ( min_words<=0 )
+           elog(ERROR,"Must be MinWords > 0");
+       if ( shortword<0 )
+           elog(ERROR,"Must be ShortWord >= 0");
+   }
+
+   while( hlCover(prs,query,&p,&q) ) {
+       /* find cover len in words */
+       curlen=0;
+       poslen=0;
+       for(i=p;i<=q && curlen < max_words ; i++) {
+           if ( !NONWORDTOKEN(prs->words[i].type) ) 
+               curlen++;
+           if ( prs->words[i].item && !prs->words[i].repeated )
+               poslen++; 
+           pose=i;
+       }
+
+       if ( poslenwords[beste].type) || prs->words[beste].len <= shortword) ) { 
+           /* best already finded, so try one more cover */
+           p++;
+           continue;
+       }
+
+       if ( curlen < max_words ) { /* find good end */
+           for(i=i-1 ;icurwords && curlen
+               if ( i!=q ) {
+                   if ( !NONWORDTOKEN(prs->words[i].type) ) 
+                       curlen++;
+                   if ( prs->words[i].item && !prs->words[i].repeated )
+                       poslen++;
+               }
+               pose=i;
+               if ( NOENDTOKEN(prs->words[i].type) || prs->words[i].len <= shortword ) 
+                   continue;
+               if ( curlen>=min_words )    
+                   break;
+           }
+       } else { /* shorter cover :((( */
+           for(;curlen>min_words;i--) {
+               if ( !NONWORDTOKEN(prs->words[i].type) ) 
+                   curlen--;
+               if ( prs->words[i].item && !prs->words[i].repeated )
+                   poslen--;
+               pose=i;
+               if ( NOENDTOKEN(prs->words[i].type) || prs->words[i].len <= shortword ) 
+                   continue;
+               break;
+           }
+       }
+
+       if ( bestlen <0 || (poslen>bestlen && !(NOENDTOKEN(prs->words[pose].type) || prs->words[pose].len <= shortword)) || 
+               ( bestlen>=0 && !(NOENDTOKEN(prs->words[pose].type)  || prs->words[pose].len <= shortword) && 
+                   (NOENDTOKEN(prs->words[beste].type) || prs->words[beste].len <= shortword) ) ) {
+           bestb=p; beste=pose;
+           bestlen=poslen;
+       } 
+
+       p++;
+   }
+
+   if ( bestlen<0 ) {
+       curlen=0;
+       poslen=0;
+       for(i=0;icurwords && curlen
+           if ( !NONWORDTOKEN(prs->words[i].type) ) 
+               curlen++;
+           pose=i;
+       }
+       bestb=0; beste=pose;
+   }
+
+   for(i=bestb;i<=beste;i++) {
+       if ( prs->words[i].item )
+           prs->words[i].selected=1;
+       if ( prs->words[i].repeated )
+           prs->words[i].skip=1;
+       if ( HLIDIGNORE(prs->words[i].type) )
+           prs->words[i].replace=1;
+
+       prs->words[i].in=1;
+   }
+
+   if (!prs->startsel)
+       prs->startsel=pstrdup("");

+   if (!prs->stopsel)
+       prs->stopsel=pstrdup("");
+        prs->startsellen=strlen(prs->startsel);
+   prs->stopsellen=strlen(prs->stopsel);
+
+   PG_RETURN_POINTER(prs);
+}
+




This is the main PostgreSQL git repository.
RSS
Atom}}
+>over₁♦_{12
+>100₂₂♦_{12
+>feet₁.₁₂
+
+Each word has been assigned type 1;
+each space (represented here by a diamond) and the period, type 12;
+and the number one hundred, type 22.
+We can retrieve the alias for each type
+through the token_type function:
+
+
+=# select * from token_type('default')

+     where tokid = 1 or tokid = 12 or tokid = 22
+ tokid | alias |      descr       
+-------+-------+------------------
+     1 | lword | Latin word
+    12 | blank | Space symbols
+    22 | uint  | Unsigned integer
+(3 rows)
+
+
+
+
+Next, the tokens are assigned to dictionaries
+by looking up their type aliases in pg_ts_cfgmap
+to determine which dictionary should process each token.
+Since we are using the 'default' configuration:
+
+
+=# select * from pg_ts_cfgmap where ts_name = 'default' and

+      (tok_alias = 'lword' or tok_alias = 'blank' or tok_alias = 'uint')
+ ts_name | tok_alias | dict_name 
+---------+-----------+-----------
+ default | lword     | {en_stem}
+ default | uint      | {simple}
+(2 rows)
+
+
+Since this map provides no dictionary for blank tokens,
+the spaces and period are simply discarded,
+leaving nine tokens,
+which are then numbered by their position:
+
+The¹
+walls²
+extend³
+upward⁴
+for⁵
+well⁶
+over⁷
+100⁸
+feet⁹
+
+
+Finally, the words are reduced to lexemes by their respective dictionaries.
+The 100 is submitted to the simple dictionary,
+which returns tokens unaltered except for making them lowercase:
+
+
+=# select lexize('simple', '100')
+ lexize 
+--------
+ {100}
+(1 row)
+
+
+The other words are submitted to en_stem
+which reduces each English word to a linguistic stem,
+and then discards stems which belong to its list of stop words;
+you can see the list of stop words
+in the file whose path is in the dict_initoption field
+of the pg_ts_dict table entry for en_stem.
+The first three words of our text illustrate respectively
+an en_stem stop word,
+a word which en_stem alters by stemming,
+and a word which en_stem leaves alone:
+
+
+=# select lexize('en_stem', 'The')
+ lexize 
+--------
+ {}
+(1 row)
+=# select lexize('en_stem', 'walls')
+ lexize 
+--------
+ {wall}
+(1 row)
+=# select lexize('en_stem', 'extend')
+  lexize  
+----------
+ {extend}
+(1 row)
+
+
+Once en_stem is done discarding stop words and stemming the rest,
+we are left with:
+
+wall²
+extend³
+upward⁴
+well⁶
+100⁸
+feet⁹
+
+Which is precisely the result of the example that began this section.
+
+Query words are stemmed by the to_tsquery() function
+using the same scheme to determine the dictionary for each token,
+with the difference that the query parser recognizes as special
+the boolean operators that separate query words.
+
+
+
+
+}

diff --git a/contrib/tsearch2/docs/tsearch2-ref.html b/contrib/tsearch2/docs/tsearch2-ref.html

new file mode 100644 (file)

index 0000000..df0faa4


--- /dev/null
+++ b/contrib/tsearch2/docs/tsearch2-ref.html
@@ -0,0 +1,448 @@
+
+
+
+
+tsearch2 reference
+
+
+The tsearch2 Reference
+
+
+Brandon Craig Rhodes
30 June 2003
+
+This Reference documents the user types and functions
+of the tsearch2 module for PostgreSQL.
+An introduction to the module is provided
+by the tsearch2 Guide,
+a companion document to this one.
+You can retrieve a beta copy of the tsearch2 module from the
+GiST for PostgreSQL
+page — look under the section entitled Development History
+for the current version.
+
+Vectors and Queries
+
+Vectors and queries both store lexemes,
+but for different purposes.
+A tsvector stores the lexemes
+of the words that are parsed out of a document,
+and can also remember the position of each word.
+A tsquery specifies a boolean condition among lexemes.
+
+Any of the following functions with a configuration argument
+can use either an integer id or textual ts_name
+to select a configuration;
+if the option is omitted, then the current configuration is used.
+For more information on the current configuration,
+read the next section on Configurations.
+
+Vector Operations
+
+
+
+ to_tsvector( [configuration,]

+ document TEXT) RETURNS tsvector
+
+ Parses a document into tokens,
+ reduces the tokens to lexemes,
+ and returns a tsvector which lists the lexemes
+ together with their positions in the document.
+ For the best description of this process,
+ see the section on Parsing and Stemming
+ in the accompanying tsearch2 Guide.
+
+ strip(vector tsvector) RETURNS tsvector
+
+ Return a vector which lists the same lexemes
+ as the given vector,
+ but which lacks any information
+ about where in the document each lexeme appeared.
+ While the returned vector is thus useless for relevance ranking,
+ it will usually be much smaller.
+
+ setweight(vector tsvector, letter) RETURNS tsvector
+
+ This function returns a copy of the input vector
+ in which every location has been labelled
+ with either the letter
+ 'A', 'B', or 'C',
+ or the default label 'D'
+ (which is the default with which new vectors are created,
+ and as such is usually not displayed).
+ These labels are retained when vectors are concatenated,
+ allowing words from different parts of a document
+ to be weighted differently by ranking functions.
+
+ vector1 || vector2
+
+ concat(vector1 tsvector, vector2 tsvector)

+ RETURNS tsvector
+
+ Returns a vector which combines the lexemes and position information
+ in the two vectors given as arguments.
+ Position weight labels (described in the previous paragraph)
+ are retained intact during the concatenation.
+ This has at least two uses.
+ First,
+ if some sections of your document
+ need be parsed with different configurations than others,
+ you can parse them separately
+ and concatenate the resulting vectors into one.
+ Second,
+ you can weight words from some sections of you document
+ more heavily than those from others by:
+ parsing the sections into separate vectors;
+ assigning the vectors different position labels
+ with the setweight() function;
+ concatenating them into a single vector;
+ and then providing a weights argument
+ to the rank() function
+ that assigns different weights to positions with different labels.
+
+ tsvector_size(vector tsvector) RETURNS INT4
+
+ Returns the number of lexemes stored in the vector.
+
+ text::tsvector RETURNS tsvector
+
+ Directly casting text to a tsvector
+ allows you to directly inject lexemes into a vector,
+ with whatever positions and position weights you choose to specify.
+ The text should be formatted
+ like the vector would be printed by the output of a SELECT.
+ See the Casting
+ section in the Guide for details.
+
+
+Query Operations
+
+
+
+ to_tsquery( [configuration,]

+ querytext text) RETURNS tsvector
+
+ Parses a query,
+ which should be single words separated by the boolean operators
+ “&” and,
+ “|” or,
+ and “!” not,
+ which can be grouped using parenthesis.
+ Each word is reduced to a lexeme using the current
+ or specified configuration.
+
+
+ querytree(query tsquery) RETURNS text
+
+ This might return a textual representation of the given query.
+
+ text::tsquery RETURNS tsquery
+
+ Directly casting text to a tsquery
+ allows you to directly inject lexemes into a query,
+ with whatever positions and position weight flags you choose to specify.
+ The text should be formatted
+ like the query would be printed by the output of a SELECT.
+ See the Casting
+ section in the Guide for details.
+
+
+Configurations
+
+A configuration specifies all of the equipment necessary
+to transform a document into a tsvector:
+the parser that breaks its text into tokens,
+and the dictionaries which then transform each token into a lexeme.
+Every call to to_tsvector() (described above)
+uses a configuration to perform its processing.
+Three configurations come with tsearch2:
+
+
+default — Indexes words and numbers,
+ using the en_stem English Snowball stemmer for Latin-alphabet words
+ and the simple dictionary for all others.
+default_russian — Indexes words and numbers,
+ using the en_stem English Snowball stemmer for Latin-alphabet words
+ and the ru_stem Russian Snowball dictionary for all others.
+simple — Processes both words and numbers
+ with the simple dictionary,
+ which neither discards any stop words nor alters them.
+
+
+The tsearch2 modules initially chooses your current configuration
+by looking for your current locale in the locale field
+of the pg_ts_cfg table described below.
+You can manipulate the current configuration yourself with these functions:
+
+
+
+ set_curcfg( id INT | ts_name TEXT

+  ) RETURNS VOID
+
+ Set the current configuration used by to_tsvector
+ and to_tsquery.
+
+ show_curcfg() RETURNS INT4
+
+ Returns the integer id of the current configuration.
+
+
+
+Each configuration is defined by a record in the pg_ts_cfg table:
+
+create table pg_ts_cfg (
+   id      int not  null primary key,
+   ts_name     text not null,
+   prs_name    text not null,
+   locale      text
+);
+
+The id and ts_name are unique values
+which identify the configuration;
+the prs_name specifies which parser the configuration uses.
+Once this parser has split document text into tokens,
+the type of each resulting token —
+or, more specifically, the type's lex_alias
+as specified in the parser's lexem_type() table —
+is searched for together with the configuration's ts_name
+in the pg_ts_cfgmap table:
+
+create table pg_ts_cfgmap (
+   ts_name     text not null,
+   lex_alias   text not null,
+   dict_name   text[],
+   primary key (ts_name,lex_alias)
+);
+
+Those tokens whose types are not listed are discarded.
+The remaining tokens are assigned integer positions,
+starting with 1 for the first token in the document,
+and turned into lexemes with the help of the dictionaries
+whose names are given in the dict_name array for their type.
+These dictionaries are tried in order,
+stopping either with the first one to return a lexeme for the token,
+or discarding the token if no dictionary returns a lexeme for it.
+
+Parsers
+
+Each parser is defined by a record in the pg_ts_parser table:
+
+create table pg_ts_parser (
+   prs_id      int not null primary key,
+   prs_name    text not null,
+   prs_start   oid not null,
+   prs_getlexem    oid not null,
+   prs_end     oid not null,
+   prs_headline    oid not null,
+   prs_lextype oid not null,
+   prs_comment text
+);
+
+The prs_id and prs_name uniquely identify the parser,
+while prs_comment usually describes its name and version
+for the reference of users.
+The other items identify the low-level functions
+which make the parser operate,
+and are only of interest to someone writing a parser of their own.
+
+The tsearch2 module comes with one parser named default
+which is suitable for parsing most plain text and HTML documents.
+
+Each parser argument below
+must designate a parser with either an integer prs_id
+or a textual prs_name;
+the current parser is used when this argument is omitted.
+
+
+
+ CREATE FUNCTION set_curprs(parser) RETURNS VOID
+
+ Selects a current parser
+ which will be used when any of the following functions
+ are called without a parser as an argument.
+
+ CREATE FUNCTION lexem_type(

+  [ parser ]
+  ) RETURNS SETOF lexemtype
+
+ Returns a table which defines and describes
+ each kind of token the parser may produce as output.
+ For each token type the table gives the lexid
+ which the parser will label each token of that type,
+ the alias which names the token type,
+ and a short description descr for the user to read.
+
+ CREATE FUNCTION parse(

+  [ parser, ] document TEXT
+  ) RETURNS SETOF lexemtype
+
+ Parses the given document and returns a series of records,
+ one for each token produced by parsing.
+ Each token includes a lexid giving its type
+ and a lexem which gives its content.
+
+
+Dictionaries
+
+Dictionaries take textual tokens as input,
+usually those produced by a parser,
+and return lexemes which are usually some reduced form of the token.
+Among the dictionaries which come installed with tsearch2 are:
+
+
+simple simply folds uppercase letters to lowercase
+ before returning the word.
+en_stem runs an English Snowball stemmer on each word
+ that attempts to reduce the various forms of a verb or noun
+ to a single recognizable form.
+ru_stem runs a Russian Snowball stemmer on each word.
+
+
+Each dictionary is defined by an entry in the pg_ts_dict table:
+
+CREATE TABLE pg_ts_dict (
+   dict_id     int not null primary key,
+   dict_name   text not null,
+   dict_init   oid,
+   dict_initoption text,
+   dict_lemmatize  oid not null,
+   dict_comment    text
+);
+
+The dict_id and dict_name
+serve as unique identifiers for the dictionary.
+The meaning of the dict_initoption varies among dictionaries,
+but for the built-in Snowball dictionaries
+it specifies a file from which stop words should be read.
+The dict_comment is a human-readable description of the dictionary.
+The other fields are internal function identifiers
+useful only to developers trying to implement their own dictionaries.
+
+The argument named dictionary
+in each of the following functions
+should be either an integer dict_id or a textual dict_name
+identifying which dictionary should be used for the operation;
+if omitted then the current dictionary is used.
+
+
+
+ CREATE FUNCTION set_curdict(dictionary) RETURNS VOID
+
+ Selects a current dictionary for use by functions
+ that do not select a dictionary explicitly.
+
+ CREATE FUNCTION lexize(

+ [ dictionary, ] word text)
+ RETURNS TEXT[]
+
+ Reduces a single word to a lexeme.
+ Note that lexemes are arrays of zero or more strings,
+ since in some languages there might be several base words
+ from which an inflected form could arise.
+
+
+Ranking
+
+Ranking attempts to measure how relevant documents are to particular queries
+by inspecting the number of times each search word appears in the document,
+and whether different search terms occur near each other.
+Note that this information is only available in unstripped vectors —
+ranking functions will only return a useful result
+for a tsvector which still has position information!
+
+Both of these ranking functions
+take an integer normalization option
+that specifies whether a document's length should impact its rank.
+This is often desirable,
+since a hundred-word document with five instances of a search word
+is probably more relevant than a thousand-word document with five instances.
+The option can have the values:
+
+
+0 (the default) ignores document length.
+1 divides the rank by the logarithm of the length.
+2 divides the rank by the length itself.
+
+
+The two ranking functions currently available are:
+
+
+
+ CREATE FUNCTION rank(

+  [ weights float4[], ]
+  vector tsvector, query tsquery,
+  [ normalization int4 ]

+  ) RETURNS float4
+
+ This is the ranking function from the old version of OpenFTS,
+ and offers the ability to weight word instances more heavily
+ depending on how you have classified them.
+ The weights specify how heavily to weight each category of word:
+ 
+>{D-weight, A-weight, B-weight, C-weight}
+ If no weights are provided, then these defaults are used:
+ {0.1, 0.2, 0.4, 1.0}
+ Often weights are used to mark words from special areas of the document,
+ like the title or an initial abstract,
+ and make them more or less important than words in the document body.
+
+ CREATE FUNCTION rank_cd(

+  [ K int4, ]
+  vector tsvector, query tsquery,
+  [ normalization int4 ]

+  ) RETURNS float4
+
+ This function computes the cover density ranking
+ for the given document vector and query,
+ as described in Clarke, Cormack, and Tudhope's
+ “
+>Relevance Ranking for One to Three Term Queries”
+ in the 1999 Information Processing and Management.
+ The value K is one of the values from their formula,
+ and defaults to K=4.
+ The examples in their paper K=16;
+ we can roughly describe the term
+ as stating how far apart two search terms can fall
+ before the formula begins penalizing them for lack of proximity.
+
+
+Headlines
+
+
+
+ CREATE FUNCTION headline(

+  [ id int4, | ts_name text, ]
+  document text, query tsquery,
+  [ options text ]

+  ) RETURNS text
+
+ Every form of the the headline() function
+ accepts a document along with a query,
+ and returns one or more ellipse-separated excerpts from the document
+ in which terms from the query are highlighted.
+ The configuration with which to parse the document
+ can be specified by either its id or ts_name;
+ if none is specified that the current configuration is used instead.
+ 
+ An options string if provided should be a comma-separated list
+ of one or more ‘option=value’ pairs.
+ The available options are:
+ 
+  StartSel, StopSel —
+   the strings with which query words appearing in the document
+   should be delimited to distinguish them from other excerpted words.
+  MaxWords, MinWords —
+   limits on the shortest and longest headlines you will accept.
+  ShortWord —
+   this prevents your headline from beginning or ending
+   with a word which has this many characters or less.
+   The default value of 3 should eliminate most English
+   conjunctions and articles.
+ 
+ Any unspecified options receive these defaults:
+ 
+StartSel=<b>, StopSel=</b>, MaxWords=35, MinWords=15, ShortWord=3
+ 
+
+
+
+


diff --git a/contrib/tsearch2/expected/tsearch2.out b/contrib/tsearch2/expected/tsearch2.out

new file mode 100644 (file)

index 0000000..a842c5b


--- /dev/null
+++ b/contrib/tsearch2/expected/tsearch2.out
@@ -0,0 +1,2055 @@
+--
+-- first, define the datatype.  Turn off echoing so that expected file
+-- does not depend on contents of seg.sql.
+--
+\set ECHO none
+psql:tsearch2.sql:13: NOTICE:  CREATE TABLE / PRIMARY KEY will create implicit index 'pg_ts_dict_pkey' for table 'pg_ts_dict'
+psql:tsearch2.sql:145: NOTICE:  CREATE TABLE / PRIMARY KEY will create implicit index 'pg_ts_parser_pkey' for table 'pg_ts_parser'
+psql:tsearch2.sql:244: NOTICE:  CREATE TABLE / PRIMARY KEY will create implicit index 'pg_ts_cfg_pkey' for table 'pg_ts_cfg'
+psql:tsearch2.sql:251: NOTICE:  CREATE TABLE / PRIMARY KEY will create implicit index 'pg_ts_cfgmap_pkey' for table 'pg_ts_cfgmap'
+psql:tsearch2.sql:339: NOTICE:  ProcedureCreate: type tsvector is not yet defined
+psql:tsearch2.sql:344: NOTICE:  Argument type "tsvector" is only a shell
+psql:tsearch2.sql:398: NOTICE:  ProcedureCreate: type tsquery is not yet defined
+psql:tsearch2.sql:403: NOTICE:  Argument type "tsquery" is only a shell
+psql:tsearch2.sql:545: NOTICE:  ProcedureCreate: type gtsvector is not yet defined
+psql:tsearch2.sql:550: NOTICE:  Argument type "gtsvector" is only a shell
+--tsvector
+SELECT '1'::tsvector;
+ tsvector 
+----------
+ '1'
+(1 row)
+
+SELECT '1 '::tsvector;
+ tsvector 
+----------
+ '1'
+(1 row)
+
+SELECT ' 1'::tsvector;
+ tsvector 
+----------
+ '1'
+(1 row)
+
+SELECT ' 1 '::tsvector;
+ tsvector 
+----------
+ '1'
+(1 row)
+
+SELECT '1 2'::tsvector;
+ tsvector 
+----------
+ '1' '2'
+(1 row)
+
+SELECT '\'1 2\''::tsvector;
+ tsvector 
+----------
+ '1 2'
+(1 row)
+
+SELECT '\'1 \\\'2\''::tsvector;
+ tsvector 
+----------
+ '1 \'2'
+(1 row)
+
+SELECT '\'1 \\\'2\'3'::tsvector;
+  tsvector   
+-------------
+ '3' '1 \'2'
+(1 row)
+
+SELECT '\'1 \\\'2\' 3'::tsvector;
+  tsvector   
+-------------
+ '3' '1 \'2'
+(1 row)
+
+SELECT '\'1 \\\'2\' \' 3\' 4 '::tsvector;
+     tsvector     
+------------------
+ '4' ' 3' '1 \'2'
+(1 row)
+
+select '\'w\':4A,3B,2C,1D,5 a:8';
+       ?column?        
+-----------------------
+ 'w':4A,3B,2C,1D,5 a:8
+(1 row)
+
+select 'a:3A b:2a'::tsvector || 'ba:1234 a:1B';
+          ?column?          
+----------------------------
+ 'a':3A,4B 'b':2A 'ba':1237
+(1 row)
+
+select setweight('w:12B w:13* w:12,5,6 a:1,3* a:3 w asd:1dc asd zxc:81,567,222A'::tsvector, 'c');
+                        setweight                         
+----------------------------------------------------------
+ 'a':1C,3C 'w':5C,6C,12C,13C 'asd':1C 'zxc':81C,222C,567C
+(1 row)
+
+select strip('w:12B w:13* w:12,5,6 a:1,3* a:3 w asd:1dc asd'::tsvector);
+     strip     
+---------------
+ 'a' 'w' 'asd'
+(1 row)
+
+--tsquery
+SELECT '1'::tsquery;
+ tsquery 
+---------
+ '1'
+(1 row)
+
+SELECT '1 '::tsquery;
+ tsquery 
+---------
+ '1'
+(1 row)
+
+SELECT ' 1'::tsquery;
+ tsquery 
+---------
+ '1'
+(1 row)
+
+SELECT ' 1 '::tsquery;
+ tsquery 
+---------
+ '1'
+(1 row)
+
+SELECT '\'1 2\''::tsquery;
+ tsquery 
+---------
+ '1 2'
+(1 row)
+
+SELECT '\'1 \\\'2\''::tsquery;
+ tsquery 
+---------
+ '1 \'2'
+(1 row)
+
+SELECT '!1'::tsquery;
+ tsquery 
+---------
+ !'1'
+(1 row)
+
+SELECT '1|2'::tsquery;
+  tsquery  
+-----------
+ '1' | '2'
+(1 row)
+
+SELECT '1|!2'::tsquery;
+  tsquery   
+------------
+ '1' | !'2'
+(1 row)
+
+SELECT '!1|2'::tsquery;
+  tsquery   
+------------
+ !'1' | '2'
+(1 row)
+
+SELECT '!1|!2'::tsquery;
+   tsquery   
+-------------
+ !'1' | !'2'
+(1 row)
+
+SELECT '!(!1|!2)'::tsquery;
+     tsquery      
+------------------
+ !( !'1' | !'2' )
+(1 row)
+
+SELECT '!(!1|2)'::tsquery;
+     tsquery     
+-----------------
+ !( !'1' | '2' )
+(1 row)
+
+SELECT '!(1|!2)'::tsquery;
+     tsquery     
+-----------------
+ !( '1' | !'2' )
+(1 row)
+
+SELECT '!(1|2)'::tsquery;
+    tsquery     
+----------------
+ !( '1' | '2' )
+(1 row)
+
+SELECT '1&2'::tsquery;
+  tsquery  
+-----------
+ '1' & '2'
+(1 row)
+
+SELECT '!1&2'::tsquery;
+  tsquery   
+------------
+ !'1' & '2'
+(1 row)
+
+SELECT '1&!2'::tsquery;
+  tsquery   
+------------
+ '1' & !'2'
+(1 row)
+
+SELECT '!1&!2'::tsquery;
+   tsquery   
+-------------
+ !'1' & !'2'
+(1 row)
+
+SELECT '(1&2)'::tsquery;
+  tsquery  
+-----------
+ '1' & '2'
+(1 row)
+
+SELECT '1&(2)'::tsquery;
+  tsquery  
+-----------
+ '1' & '2'
+(1 row)
+
+SELECT '!(1)&2'::tsquery;
+  tsquery   
+------------
+ !'1' & '2'
+(1 row)
+
+SELECT '!(1&2)'::tsquery;
+    tsquery     
+----------------
+ !( '1' & '2' )
+(1 row)
+
+SELECT '1|2&3'::tsquery;
+     tsquery     
+-----------------
+ '1' | '2' & '3'
+(1 row)
+
+SELECT '1|(2&3)'::tsquery;
+     tsquery     
+-----------------
+ '1' | '2' & '3'
+(1 row)
+
+SELECT '(1|2)&3'::tsquery;
+       tsquery       
+---------------------
+ ( '1' | '2' ) & '3'
+(1 row)
+
+SELECT '1|2&!3'::tsquery;
+     tsquery      
+------------------
+ '1' | '2' & !'3'
+(1 row)
+
+SELECT '1|!2&3'::tsquery;
+     tsquery      
+------------------
+ '1' | !'2' & '3'
+(1 row)
+
+SELECT '!1|2&3'::tsquery;
+     tsquery      
+------------------
+ !'1' | '2' & '3'
+(1 row)
+
+SELECT '!1|(2&3)'::tsquery;
+     tsquery      
+------------------
+ !'1' | '2' & '3'
+(1 row)
+
+SELECT '!(1|2)&3'::tsquery;
+       tsquery        
+----------------------
+ !( '1' | '2' ) & '3'
+(1 row)
+
+SELECT '(!1|2)&3'::tsquery;
+       tsquery        
+----------------------
+ ( !'1' | '2' ) & '3'
+(1 row)
+
+SELECT '1|(2|(4|(5|6)))'::tsquery;
+                 tsquery                 
+-----------------------------------------
+ '1' | ( '2' | ( '4' | ( '5' | '6' ) ) )
+(1 row)
+
+SELECT '1|2|4|5|6'::tsquery;
+                 tsquery                 
+-----------------------------------------
+ ( ( ( '1' | '2' ) | '4' ) | '5' ) | '6'
+(1 row)
+
+SELECT '1&(2&(4&(5&6)))'::tsquery;
+           tsquery           
+-----------------------------
+ '1' & '2' & '4' & '5' & '6'
+(1 row)
+
+SELECT '1&2&4&5&6'::tsquery;
+           tsquery           
+-----------------------------
+ '1' & '2' & '4' & '5' & '6'
+(1 row)
+
+SELECT '1&(2&(4&(5|6)))'::tsquery;
+             tsquery             
+---------------------------------
+ '1' & '2' & '4' & ( '5' | '6' )
+(1 row)
+
+SELECT '1&(2&(4&(5|!6)))'::tsquery;
+             tsquery              
+----------------------------------
+ '1' & '2' & '4' & ( '5' | !'6' )
+(1 row)
+
+SELECT '1&(\'2\'&(\' 4\'&(\\|5 | \'6 \\\' !|&\')))'::tsquery;
+                 tsquery                  
+------------------------------------------
+ '1' & '2' & ' 4' & ( '|5' | '6 \' !|&' )
+(1 row)
+
+SELECT '\'the wether\':dc & \' sKies \':BC & a:d b:a';
+                 ?column?                 
+------------------------------------------
+ 'the wether':dc & ' sKies ':BC & a:d b:a
+(1 row)
+
+select lexize('simple', 'ASD56 hsdkf');
+     lexize      
+-----------------
+ {"asd56 hsdkf"}
+(1 row)
+
+select lexize('en_stem', 'SKIES Problems identity');
+          lexize          
+--------------------------
+ {"skies problems ident"}
+(1 row)
+
+select * from token_type('default');
+ tokid |    alias     |               descr               
+-------+--------------+-----------------------------------
+     1 | lword        | Latin word
+     2 | nlword       | Non-latin word
+     3 | word         | Word
+     4 | email        | Email
+     5 | url          | URL
+     6 | host         | Host
+     7 | sfloat       | Scientific notation
+     8 | version      | VERSION
+     9 | part_hword   | Part of hyphenated word
+    10 | nlpart_hword | Non-latin part of hyphenated word
+    11 | lpart_hword  | Latin part of hyphenated word
+    12 | blank        | Space symbols
+    13 | tag          | HTML Tag
+    14 | http         | HTTP head
+    15 | hword        | Hyphenated word
+    16 | lhword       | Latin hyphenated word
+    17 | nlhword      | Non-latin hyphenated word
+    18 | uri          | URI
+    19 | file         | File or path name
+    20 | float        | Decimal notation
+    21 | int          | Signed integer
+    22 | uint         | Unsigned integer
+    23 | entity       | HTML Entity
+(23 rows)
+
+select * from parse('default', '345 [email protected] \' http://www.com/ http://aew.werc.ewr/?ad=qwe&dw 1aew.werc.ewr/?ad=qwe&dw 2aew.werc.ewr http://3aew.werc.ewr/?ad=qwe&dw http://4aew.werc.ewr http://5aew.werc.ewr:8100/?  ad=qwe&dw 6aew.werc.ewr:8100/?ad=qwe&dw 7aew.werc.ewr:8100/?ad=qwe&dw=%20%32 +4.0e-10 qwe qwe qwqwe 234.435 455 5.005 [email protected] qwe-wer asdf qwer jf sdjk ewr1> ewri2 ">
+/usr/local/fff /awdf/dwqe/4325 rewt/ewr wefjn /wqe-324/ewr gist.h gist.h.c gist.c. readline 4.2 4.2. 4.2, readline-4.2 readline-4.2. 234 
+ wow  < jqw <> qwerty');
+ tokid |                token                 
+-------+--------------------------------------
+    22 | 345
+    12 |  
+     4 | [email protected]
+    12 |  
+    12 | '
+    12 |  
+    14 | http://
+     6 | www.com
+    12 | /
+    12 |  
+    14 | http://
+     5 | aew.werc.ewr/?ad=qwe&dw
+     6 | aew.werc.ewr
+    18 | /?ad=qwe&dw
+    12 |  
+     5 | 1aew.werc.ewr/?ad=qwe&dw
+     6 | 1aew.werc.ewr
+    18 | /?ad=qwe&dw
+    12 |  
+     6 | 2aew.werc.ewr
+    12 |  
+    14 | http://
+     5 | 3aew.werc.ewr/?ad=qwe&dw
+     6 | 3aew.werc.ewr
+    18 | /?ad=qwe&dw
+    12 |  
+    14 | http://
+     6 | 4aew.werc.ewr
+    12 |  
+    14 | http://
+     5 | 5aew.werc.ewr:8100/?
+     6 | 5aew.werc.ewr
+    18 | :8100/?
+    12 |   
+     1 | ad
+    12 | =
+     1 | qwe
+    12 | &
+     1 | dw
+    12 |  
+     5 | 6aew.werc.ewr:8100/?ad=qwe&dw
+     6 | 6aew.werc.ewr
+    18 | :8100/?ad=qwe&dw
+    12 |  
+     5 | 7aew.werc.ewr:8100/?ad=qwe&dw=%20%32
+     6 | 7aew.werc.ewr
+    18 | :8100/?ad=qwe&dw=%20%32
+    12 |  
+     7 | +4.0e-10
+    12 |  
+     1 | qwe
+    12 |  
+     1 | qwe
+    12 |  
+     1 | qwqwe
+    12 |  
+    20 | 234.435
+    12 |  
+    22 | 455
+    12 |  
+    20 | 5.005
+    12 |  
+     4 | [email protected]
+    12 |  
+    16 | qwe-wer
+    11 | qwe
+    12 | -
+    11 | wer
+    12 |  
+     1 | asdf
+    12 |  
+    13 |  
+     1 | qwer
+    12 |  
+     1 | jf
+    12 |  
+     1 | sdjk
+    13 |  
+    12 |  
+     3 | ewr1
+    12 | >
+    12 |  
+     3 | ewri2
+    12 |  
+    13 |  
+    12 | 
+
+    19 | /usr/local/fff
+    12 |  
+    19 | /awdf/dwqe/4325
+    12 |  
+    19 | rewt/ewr
+    12 |  
+     1 | wefjn
+    12 |  
+    19 | /wqe-324/ewr
+    12 |  
+     6 | gist.h
+    12 |  
+     6 | gist.h.c
+    12 |  
+     6 | gist.c
+    12 | .
+    12 |  
+     1 | readline
+    12 |  
+    20 | 4.2
+    12 |  
+    20 | 4.2
+    12 | .
+    12 |  
+    20 | 4.2
+    12 | ,
+    12 |  
+    15 | readline-4
+    11 | readline
+    12 | -
+    20 | 4.2
+    12 |  
+    15 | readline-4
+    11 | readline
+    12 | -
+    20 | 4.2
+    12 | .
+    12 |  
+    22 | 234
+    12 |  
+
+    13 |  
+    12 |  
+     1 | wow
+    12 |   
+    12 | <
+    12 |  
+     1 | jqw
+    12 |  
+    12 | <
+    12 | >
+    12 |  
+     1 | qwerty
+(138 rows)
+
+SELECT to_tsvector('default', '345 [email protected] \' http://www.com/ http://aew.werc.ewr/?ad=qwe&dw 1aew.werc.ewr/?ad=qwe&dw 2aew.werc.ewr http://3aew.werc.ewr/?ad=qwe&dw http://4aew.werc.ewr http://5aew.werc.ewr:8100/?  ad=qwe&dw 6aew.werc.ewr:8100/?ad=qwe&dw 7aew.werc.ewr:8100/?ad=qwe&dw=%20%32 +4.0e-10 qwe qwe qwqwe 234.435 455 5.005 [email protected] qwe-wer asdf qwer jf sdjk ewr1> ewri2 ">
+/usr/local/fff /awdf/dwqe/4325 rewt/ewr wefjn /wqe-324/ewr gist.h gist.h.c gist.c. readline 4.2 4.2. 4.2, readline-4.2 readline-4.2. 234 
+ wow  < jqw <> qwerty');
+                                                                                                                                                                                                                                                                                                                                                                                                                                               to_tsvector                                                                                                                                                                                                                                                                                                                                                                                                                                                
+----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
+ 'ad':18 'dw':20 'jf':40 '234':62 '345':1 '4.2':53,54,55,58,61 '455':32 'jqw':64 'qwe':19,28,29,36 'wer':37 'wow':63 'asdf':38 'ewr1':42 'qwer':39 'sdjk':41 '5.005':33 'ewri2':43 'qwqwe':30 'wefjn':47 'gist.c':51 'gist.h':49 'qwerti':65 '234.435':31 ':8100/?':17 'qwe-wer':35 'readlin':52,57,60 'www.com':3 '+4.0e-10':27 'gist.h.c':50 'rewt/ewr':46 '[email protected]':2 'readline-4':56,59 '/?ad=qwe&dw':6,9,13 '/wqe-324/ewr':48 'aew.werc.ewr':5 '1aew.werc.ewr':8 '2aew.werc.ewr':10 '3aew.werc.ewr':12 '4aew.werc.ewr':14 '5aew.werc.ewr':16 '6aew.werc.ewr':22 '7aew.werc.ewr':25 '/usr/local/fff':44 '/awdf/dwqe/4325':45 ':8100/?ad=qwe&dw':23 '[email protected]':34 '5aew.werc.ewr:8100/?':15 ':8100/?ad=qwe&dw=%20%32':26 'aew.werc.ewr/?ad=qwe&dw':4 '1aew.werc.ewr/?ad=qwe&dw':7 '3aew.werc.ewr/?ad=qwe&dw':11 '6aew.werc.ewr:8100/?ad=qwe&dw':21 '7aew.werc.ewr:8100/?ad=qwe&dw=%20%32':24
+(1 row)
+
+SELECT length(to_tsvector('default', '345 qw'));
+ length 
+--------
+      2
+(1 row)
+
+SELECT length(to_tsvector('default', '345 [email protected] \' http://www.com/ http://aew.werc.ewr/?ad=qwe&dw 1aew.werc.ewr/?ad=qwe&dw 2aew.werc.ewr http://3aew.werc.ewr/?ad=qwe&dw http://4aew.werc.ewr http://5aew.werc.ewr:8100/?  ad=qwe&dw 6aew.werc.ewr:8100/?ad=qwe&dw 7aew.werc.ewr:8100/?ad=qwe&dw=%20%32 +4.0e-10 qwe qwe qwqwe 234.435 455 5.005 [email protected] qwe-wer asdf qwer jf sdjk ewr1> ewri2 ">
+/usr/local/fff /awdf/dwqe/4325 rewt/ewr wefjn /wqe-324/ewr gist.h gist.h.c gist.c. readline 4.2 4.2. 4.2, readline-4.2 readline-4.2. 234 
+ wow  < jqw <> qwerty'));
+ length 
+--------
+     53
+(1 row)
+
+select to_tsquery('default', 'qwe & sKies '); 
+  to_tsquery   
+---------------
+ 'qwe' & 'sky'
+(1 row)
+
+select to_tsquery('simple', 'qwe & sKies '); 
+   to_tsquery    
+-----------------
+ 'qwe' & 'skies'
+(1 row)
+
+select to_tsquery('default', '\'the wether\':dc & \'           sKies \':BC ');
+       to_tsquery       
+------------------------
+ 'wether':CD & 'sky':BC
+(1 row)
+
+select 'a b:89  ca:23A,64b d:34c'::tsvector @@ 'd:AC & ca';
+ ?column? 
+----------
+ t
+(1 row)
+
+select 'a b:89  ca:23A,64b d:34c'::tsvector @@ 'd:AC & ca:B';
+ ?column? 
+----------
+ t
+(1 row)
+
+select 'a b:89  ca:23A,64b d:34c'::tsvector @@ 'd:AC & ca:A';
+ ?column? 
+----------
+ t
+(1 row)
+
+select 'a b:89  ca:23A,64b d:34c'::tsvector @@ 'd:AC & ca:C';
+ ?column? 
+----------
+ f
+(1 row)
+
+select 'a b:89  ca:23A,64b d:34c'::tsvector @@ 'd:AC & ca:CB';
+ ?column? 
+----------
+ t
+(1 row)
+
+CREATE TABLE test_tsvector( t text, a tsvector );
+\copy test_tsvector from 'data/test_tsearch.data'
+SELECT count(*) FROM test_tsvector WHERE a @@ 'wr|qh';
+ count 
+-------
+   158
+(1 row)
+
+SELECT count(*) FROM test_tsvector WHERE a @@ 'wr&qh';
+ count 
+-------
+    17
+(1 row)
+
+SELECT count(*) FROM test_tsvector WHERE a @@ 'eq&yt';
+ count 
+-------
+     6
+(1 row)
+
+SELECT count(*) FROM test_tsvector WHERE a @@ 'eq|yt';
+ count 
+-------
+    98
+(1 row)
+
+SELECT count(*) FROM test_tsvector WHERE a @@ '(eq&yt)|(wr&qh)';
+ count 
+-------
+    23
+(1 row)
+
+SELECT count(*) FROM test_tsvector WHERE a @@ '(eq|yt)&(wr|qh)';
+ count 
+-------
+    39
+(1 row)
+
+create index wowidx on test_tsvector using gist (a);
+set enable_seqscan=off;
+SELECT count(*) FROM test_tsvector WHERE a @@ 'wr|qh';
+ count 
+-------
+   158
+(1 row)
+
+SELECT count(*) FROM test_tsvector WHERE a @@ 'wr&qh';
+ count 
+-------
+    17
+(1 row)
+
+SELECT count(*) FROM test_tsvector WHERE a @@ 'eq&yt';
+ count 
+-------
+     6
+(1 row)
+
+SELECT count(*) FROM test_tsvector WHERE a @@ 'eq|yt';
+ count 
+-------
+    98
+(1 row)
+
+SELECT count(*) FROM test_tsvector WHERE a @@ '(eq&yt)|(wr&qh)';
+ count 
+-------
+    23
+(1 row)
+
+SELECT count(*) FROM test_tsvector WHERE a @@ '(eq|yt)&(wr|qh)';
+ count 
+-------
+    39
+(1 row)
+
+select set_curcfg('default');
+ set_curcfg 
+------------
+ 
+(1 row)
+
+CREATE TRIGGER tsvectorupdate
+BEFORE UPDATE OR INSERT ON test_tsvector
+FOR EACH ROW EXECUTE PROCEDURE tsearch2(a, t);
+SELECT count(*) FROM test_tsvector WHERE a @@ to_tsquery('345&qwerty');
+ count 
+-------
+     0
+(1 row)
+
+INSERT INTO test_tsvector (t) VALUES ('345 qwerty');
+SELECT count(*) FROM test_tsvector WHERE a @@ to_tsquery('345&qwerty');
+ count 
+-------
+     1
+(1 row)
+
+UPDATE test_tsvector SET t = null WHERE t = '345 qwerty';
+SELECT count(*) FROM test_tsvector WHERE a @@ to_tsquery('345&qwerty');
+ count 
+-------
+     0
+(1 row)
+
+drop trigger tsvectorupdate on test_tsvector;
+create function wow(text) returns text as 'select $1 || \' copyright\'; ' language sql;
+create trigger tsvectorupdate before update or insert on test_tsvector
+for each row execute procedure tsearch2(a, wow, t);
+insert into test_tsvector (t) values ('345 qwerty');
+select count(*) FROM test_tsvector WHERE a @@ to_tsquery('345&qwerty');
+ count 
+-------
+     1
+(1 row)
+
+select count(*) FROM test_tsvector WHERE a @@ to_tsquery('copyright');
+ count 
+-------
+     1
+(1 row)
+
+select rank(' a:1 s:2C d g'::tsvector, 'a | s');
+ rank 
+------
+ 0.28
+(1 row)
+
+select rank(' a:1 s:2B d g'::tsvector, 'a | s');
+ rank 
+------
+ 0.46
+(1 row)
+
+select rank(' a:1 s:2 d g'::tsvector, 'a | s');
+ rank 
+------
+ 0.19
+(1 row)
+
+select rank(' a:1 s:2C d g'::tsvector, 'a & s');
+   rank   
+----------
+ 0.140153
+(1 row)
+
+select rank(' a:1 s:2B d g'::tsvector, 'a & s');
+   rank   
+----------
+ 0.198206
+(1 row)
+
+select rank(' a:1 s:2 d g'::tsvector, 'a & s');
+   rank    
+-----------
+ 0.0991032
+(1 row)
+
+insert into test_tsvector (t) values ('foo bar foo the over foo qq bar');
+select * from stat('select a from test_tsvector') order by ndoc desc, nentry desc, word;
+   word    | ndoc | nentry 
+-----------+------+--------
+ qq        |  109 |    109
+ qt        |  102 |    102
+ qe        |  100 |    100
+ qh        |   98 |     98
+ qw        |   98 |     98
+ qa        |   97 |     97
+ ql        |   94 |     94
+ qs        |   94 |     94
+ qi        |   92 |     92
+ qr        |   92 |     92
+ qj        |   91 |     91
+ qd        |   87 |     87
+ qz        |   87 |     87
+ qc        |   86 |     86
+ qn        |   86 |     86
+ qv        |   85 |     85
+ qo        |   84 |     84
+ qy        |   84 |     84
+ wp        |   84 |     84
+ qf        |   81 |     81
+ qk        |   80 |     80
+ wt        |   80 |     80
+ qu        |   79 |     79
+ qg        |   78 |     78
+ wb        |   78 |     78
+ qx        |   77 |     77
+ wr        |   77 |     77
+ ws        |   73 |     73
+ wy        |   73 |     73
+ wa        |   72 |     72
+ wf        |   70 |     70
+ wg        |   70 |     70
+ wi        |   70 |     70
+ wu        |   70 |     70
+ wc        |   69 |     69
+ wj        |   69 |     69
+ qp        |   68 |     68
+ wh        |   68 |     68
+ wv        |   68 |     68
+ qb        |   66 |     66
+ eu        |   65 |     65
+ we        |   65 |     65
+ wl        |   65 |     65
+ wq        |   65 |     65
+ wk        |   64 |     64
+ ee        |   63 |     63
+ eo        |   63 |     63
+ qm        |   63 |     63
+ wn        |   63 |     63
+ ef        |   62 |     62
+ eh        |   62 |     62
+ ex        |   62 |     62
+ re        |   62 |     62
+ rl        |   62 |     62
+ rr        |   62 |     62
+ eb        |   61 |     61
+ ek        |   61 |     61
+ ww        |   61 |     61
+ ea        |   60 |     60
+ ei        |   60 |     60
+ em        |   60 |     60
+ eq        |   60 |     60
+ ew        |   60 |     60
+ ro        |   60 |     60
+ rw        |   60 |     60
+ tl        |   60 |     60
+ eg        |   59 |     59
+ en        |   59 |     59
+ ez        |   59 |     59
+ rj        |   59 |     59
+ ry        |   59 |     59
+ tw        |   59 |     59
+ tx        |   59 |     59
+ ej        |   58 |     58
+ es        |   58 |     58
+ ra        |   58 |     58
+ rd        |   58 |     58
+ rg        |   58 |     58
+ rx        |   58 |     58
+ tb        |   58 |     58
+ wd        |   58 |     58
+ ed        |   57 |     57
+ tc        |   57 |     57
+ wx        |   57 |     57
+ er        |   56 |     56
+ wm        |   56 |     56
+ wo        |   56 |     56
+ yw        |   56 |     56
+ ep        |   55 |     55
+ rk        |   55 |     55
+ rp        |   55 |     55
+ rz        |   55 |     55
+ ta        |   55 |     55
+ rq        |   54 |     54
+ yn        |   54 |     54
+ ec        |   53 |     53
+ el        |   53 |     53
+ ru        |   53 |     53
+ rv        |   53 |     53
+ tz        |   53 |     53
+ un        |   53 |     53
+ wz        |   53 |     53
+ ys        |   53 |     53
+ oe        |   52 |     52
+ tn        |   52 |     52
+ tq        |   52 |     52
+ ty        |   52 |     52
+ uq        |   52 |     52
+ yg        |   52 |     52
+ ym        |   52 |     52
+ oi        |   51 |     51
+ to        |   51 |     51
+ yi        |   51 |     51
+ pn        |   50 |     50
+ rb        |   50 |     50
+ ri        |   50 |     50
+ rn        |   50 |     50
+ ti        |   50 |     50
+ tv        |   50 |     50
+ um        |   50 |     50
+ ut        |   50 |     50
+ ya        |   50 |     50
+ et        |   49 |     49
+ ix        |   49 |     49
+ ox        |   49 |     49
+ q3        |   49 |     49
+ yf        |   49 |     49
+ yl        |   49 |     49
+ yo        |   49 |     49
+ yr        |   49 |     49
+ ev        |   48 |     48
+ ey        |   48 |     48
+ ot        |   48 |     48
+ rc        |   48 |     48
+ rm        |   48 |     48
+ th        |   48 |     48
+ uo        |   48 |     48
+ ia        |   47 |     47
+ q1        |   47 |     47
+ rh        |   47 |     47
+ yq        |   47 |     47
+ yz        |   47 |     47
+ av        |   46 |     46
+ im        |   46 |     46
+ os        |   46 |     46
+ tk        |   46 |     46
+ yy        |   46 |     46
+ ir        |   45 |     45
+ iv        |   45 |     45
+ iw        |   45 |     45
+ oj        |   45 |     45
+ pl        |   45 |     45
+ pv        |   45 |     45
+ te        |   45 |     45
+ tu        |   45 |     45
+ uv        |   45 |     45
+ ux        |   45 |     45
+ yd        |   45 |     45
+ yx        |   45 |     45
+ ij        |   44 |     44
+ pa        |   44 |     44
+ se        |   44 |     44
+ tg        |   44 |     44
+ ue        |   44 |     44
+ yb        |   44 |     44
+ yt        |   44 |     44
+ if        |   43 |     43
+ ik        |   43 |     43
+ in        |   43 |     43
+ ph        |   43 |     43
+ pj        |   43 |     43
+ q5        |   43 |     43
+ rt        |   43 |     43
+ ub        |   43 |     43
+ ud        |   43 |     43
+ uh        |   43 |     43
+ uj        |   43 |     43
+ w7        |   43 |     43
+ ye        |   43 |     43
+ yv        |   43 |     43
+ db        |   42 |     42
+ do        |   42 |     42
+ id        |   42 |     42
+ ie        |   42 |     42
+ ii        |   42 |     42
+ of        |   42 |     42
+ pr        |   42 |     42
+ q4        |   42 |     42
+ rf        |   42 |     42
+ td        |   42 |     42
+ uk        |   42 |     42
+ up        |   42 |     42
+ yh        |   42 |     42
+ yk        |   42 |     42
+ io        |   41 |     41
+ it        |   41 |     41
+ pb        |   41 |     41
+ q0        |   41 |     41
+ q7        |   41 |     41
+ rs        |   41 |     41
+ tj        |   41 |     41
+ ur        |   41 |     41
+ ig        |   40 |     40
+ iu        |   40 |     40
+ iy        |   40 |     40
+ od        |   40 |     40
+ q6        |   40 |     40
+ tt        |   40 |     40
+ ug        |   40 |     40
+ ul        |   40 |     40
+ us        |   40 |     40
+ uu        |   40 |     40
+ uz        |   40 |     40
+ ah        |   39 |     39
+ ar        |   39 |     39
+ as        |   39 |     39
+ dl        |   39 |     39
+ dt        |   39 |     39
+ hk        |   39 |     39
+ iq        |   39 |     39
+ is        |   39 |     39
+ oc        |   39 |     39
+ ov        |   39 |     39
+ oy        |   39 |     39
+ uf        |   39 |     39
+ ui        |   39 |     39
+ aa        |   38 |     38
+ ad        |   38 |     38
+ fh        |   38 |     38
+ gm        |   38 |     38
+ ic        |   38 |     38
+ jd        |   38 |     38
+ om        |   38 |     38
+ or        |   38 |     38
+ oz        |   38 |     38
+ pm        |   38 |     38
+ q8        |   38 |     38
+ sf        |   38 |     38
+ sm        |   38 |     38
+ sv        |   38 |     38
+ uc        |   38 |     38
+ ak        |   37 |     37
+ aq        |   37 |     37
+ di        |   37 |     37
+ e4        |   37 |     37
+ fi        |   37 |     37
+ fx        |   37 |     37
+ ha        |   37 |     37
+ hp        |   37 |     37
+ ih        |   37 |     37
+ og        |   37 |     37
+ po        |   37 |     37
+ pw        |   37 |     37
+ sn        |   37 |     37
+ su        |   37 |     37
+ sw        |   37 |     37
+ w6        |   37 |     37
+ yj        |   37 |     37
+ yu        |   37 |     37
+ ag        |   36 |     36
+ am        |   36 |     36
+ at        |   36 |     36
+ e1        |   36 |     36
+ ff        |   36 |     36
+ gx        |   36 |     36
+ he        |   36 |     36
+ hj        |   36 |     36
+ ib        |   36 |     36
+ iz        |   36 |     36
+ lm        |   36 |     36
+ ok        |   36 |     36
+ pk        |   36 |     36
+ pp        |   36 |     36
+ pu        |   36 |     36
+ sp        |   36 |     36
+ tf        |   36 |     36
+ tm        |   36 |     36
+ ay        |   35 |     35
+ dy        |   35 |     35
+ fu        |   35 |     35
+ ku        |   35 |     35
+ lh        |   35 |     35
+ lq        |   35 |     35
+ o6        |   35 |     35
+ ob        |   35 |     35
+ on        |   35 |     35
+ op        |   35 |     35
+ pd        |   35 |     35
+ ps        |   35 |     35
+ si        |   35 |     35
+ sl        |   35 |     35
+ sx        |   35 |     35
+ tp        |   35 |     35
+ tr        |   35 |     35
+ w3        |   35 |     35
+ y1        |   35 |     35
+ al        |   34 |     34
+ ap        |   34 |     34
+ az        |   34 |     34
+ dc        |   34 |     34
+ dd        |   34 |     34
+ dz        |   34 |     34
+ e0        |   34 |     34
+ fj        |   34 |     34
+ fp        |   34 |     34
+ gd        |   34 |     34
+ gg        |   34 |     34
+ gk        |   34 |     34
+ go        |   34 |     34
+ ho        |   34 |     34
+ jc        |   34 |     34
+ oa        |   34 |     34
+ oh        |   34 |     34
+ oo        |   34 |     34
+ pe        |   34 |     34
+ px        |   34 |     34
+ sd        |   34 |     34
+ sq        |   34 |     34
+ sy        |   34 |     34
+ ab        |   33 |     33
+ ae        |   33 |     33
+ af        |   33 |     33
+ aw        |   33 |     33
+ e5        |   33 |     33
+ fk        |   33 |     33
+ gu        |   33 |     33
+ gy        |   33 |     33
+ hb        |   33 |     33
+ hm        |   33 |     33
+ hy        |   33 |     33
+ jl        |   33 |     33
+ jr        |   33 |     33
+ ls        |   33 |     33
+ oq        |   33 |     33
+ pt        |   33 |     33
+ sa        |   33 |     33
+ sh        |   33 |     33
+ sj        |   33 |     33
+ so        |   33 |     33
+ sz        |   33 |     33
+ t7        |   33 |     33
+ uw        |   33 |     33
+ w8        |   33 |     33
+ y0        |   33 |     33
+ yp        |   33 |     33
+ dh        |   32 |     32
+ dp        |   32 |     32
+ dq        |   32 |     32
+ e7        |   32 |     32
+ fn        |   32 |     32
+ fo        |   32 |     32
+ fr        |   32 |     32
+ ga        |   32 |     32
+ gq        |   32 |     32
+ hh        |   32 |     32
+ il        |   32 |     32
+ ip        |   32 |     32
+ jv        |   32 |     32
+ lc        |   32 |     32
+ ol        |   32 |     32
+ pc        |   32 |     32
+ q9        |   32 |     32
+ ds        |   31 |     31
+ e9        |   31 |     31
+ fd        |   31 |     31
+ fe        |   31 |     31
+ ft        |   31 |     31
+ gs        |   31 |     31
+ hl        |   31 |     31
+ hs        |   31 |     31
+ jb        |   31 |     31
+ kc        |   31 |     31
+ kw        |   31 |     31
+ mj        |   31 |     31
+ q2        |   31 |     31
+ r3        |   31 |     31
+ sb        |   31 |     31
+ sk        |   31 |     31
+ ts        |   31 |     31
+ ua        |   31 |     31
+ yc        |   31 |     31
+ zw        |   31 |     31
+ ao        |   30 |     30
+ du        |   30 |     30
+ fw        |   30 |     30
+ gj        |   30 |     30
+ hu        |   30 |     30
+ kh        |   30 |     30
+ kl        |   30 |     30
+ kv        |   30 |     30
+ ld        |   30 |     30
+ lf        |   30 |     30
+ pq        |   30 |     30
+ py        |   30 |     30
+ sc        |   30 |     30
+ sr        |   30 |     30
+ uy        |   30 |     30
+ vg        |   30 |     30
+ w2        |   30 |     30
+ xg        |   30 |     30
+ xo        |   30 |     30
+ au        |   29 |     29
+ cx        |   29 |     29
+ fv        |   29 |     29
+ gh        |   29 |     29
+ gl        |   29 |     29
+ gt        |   29 |     29
+ hw        |   29 |     29
+ ji        |   29 |     29
+ km        |   29 |     29
+ la        |   29 |     29
+ ou        |   29 |     29
+ r0        |   29 |     29
+ w0        |   29 |     29
+ y9        |   29 |     29
+ zm        |   29 |     29
+ zs        |   29 |     29
+ zy        |   29 |     29
+ ax        |   28 |     28
+ cd        |   28 |     28
+ dj        |   28 |     28
+ dn        |   28 |     28
+ dr        |   28 |     28
+ ht        |   28 |     28
+ jf        |   28 |     28
+ lo        |   28 |     28
+ lr        |   28 |     28
+ na        |   28 |     28
+ ng        |   28 |     28
+ r8        |   28 |     28
+ ss        |   28 |     28
+ xt        |   28 |     28
+ y6        |   28 |     28
+ aj        |   27 |     27
+ ca        |   27 |     27
+ cg        |   27 |     27
+ df        |   27 |     27
+ dg        |   27 |     27
+ dv        |   27 |     27
+ gc        |   27 |     27
+ gn        |   27 |     27
+ gr        |   27 |     27
+ hd        |   27 |     27
+ i8        |   27 |     27
+ jn        |   27 |     27
+ jt        |   27 |     27
+ lp        |   27 |     27
+ o9        |   27 |     27
+ ow        |   27 |     27
+ r9        |   27 |     27
+ t8        |   27 |     27
+ u5        |   27 |     27
+ w4        |   27 |     27
+ xm        |   27 |     27
+ zz        |   27 |     27
+ a2        |   26 |     26
+ ac        |   26 |     26
+ ai        |   26 |     26
+ cm        |   26 |     26
+ cu        |   26 |     26
+ cw        |   26 |     26
+ dk        |   26 |     26
+ e2        |   26 |     26
+ fc        |   26 |     26
+ fg        |   26 |     26
+ fl        |   26 |     26
+ fs        |   26 |     26
+ ge        |   26 |     26
+ gv        |   26 |     26
+ hc        |   26 |     26
+ hi        |   26 |     26
+ hx        |   26 |     26
+ jj        |   26 |     26
+ jm        |   26 |     26
+ kg        |   26 |     26
+ kk        |   26 |     26
+ kn        |   26 |     26
+ ko        |   26 |     26
+ kt        |   26 |     26
+ ln        |   26 |     26
+ mx        |   26 |     26
+ pg        |   26 |     26
+ r4        |   26 |     26
+ t6        |   26 |     26
+ u1        |   26 |     26
+ u4        |   26 |     26
+ vi        |   26 |     26
+ vr        |   26 |     26
+ w1        |   26 |     26
+ w9        |   26 |     26
+ xk        |   26 |     26
+ xs        |   26 |     26
+ zf        |   26 |     26
+ bb        |   25 |     25
+ dm        |   25 |     25
+ dw        |   25 |     25
+ e8        |   25 |     25
+ fb        |   25 |     25
+ gw        |   25 |     25
+ h8        |   25 |     25
+ hf        |   25 |     25
+ hg        |   25 |     25
+ hn        |   25 |     25
+ hv        |   25 |     25
+ i0        |   25 |     25
+ i3        |   25 |     25
+ jg        |   25 |     25
+ jo        |   25 |     25
+ jx        |   25 |     25
+ kq        |   25 |     25
+ lw        |   25 |     25
+ lx        |   25 |     25
+ o3        |   25 |     25
+ p7        |   25 |     25
+ pf        |   25 |     25
+ pi        |   25 |     25
+ pz        |   25 |     25
+ r2        |   25 |     25
+ r5        |   25 |     25
+ t9        |   25 |     25
+ u7        |   25 |     25
+ ve        |   25 |     25
+ vu        |   25 |     25
+ y5        |   25 |     25
+ y8        |   25 |     25
+ zt        |   25 |     25
+ an        |   24 |     24
+ bj        |   24 |     24
+ dx        |   24 |     24
+ fm        |   24 |     24
+ fz        |   24 |     24
+ gb        |   24 |     24
+ gi        |   24 |     24
+ gp        |   24 |     24
+ hr        |   24 |     24
+ hz        |   24 |     24
+ i5        |   24 |     24
+ jq        |   24 |     24
+ kb        |   24 |     24
+ ke        |   24 |     24
+ kf        |   24 |     24
+ kp        |   24 |     24
+ lv        |   24 |     24
+ lz        |   24 |     24
+ o8        |   24 |     24
+ r1        |   24 |     24
+ s7        |   24 |     24
+ sg        |   24 |     24
+ u3        |   24 |     24
+ vj        |   24 |     24
+ vt        |   24 |     24
+ w5        |   24 |     24
+ zj        |   24 |     24
+ be        |   23 |     23
+ bi        |   23 |     23
+ bn        |   23 |     23
+ cn        |   23 |     23
+ cy        |   23 |     23
+ da        |   23 |     23
+ e6        |   23 |     23
+ fa        |   23 |     23
+ js        |   23 |     23
+ ki        |   23 |     23
+ kz        |   23 |     23
+ li        |   23 |     23
+ mt        |   23 |     23
+ mz        |   23 |     23
+ nu        |   23 |     23
+ o2        |   23 |     23
+ p5        |   23 |     23
+ p8        |   23 |     23
+ r7        |   23 |     23
+ t0        |   23 |     23
+ t1        |   23 |     23
+ t3        |   23 |     23
+ vm        |   23 |     23
+ xh        |   23 |     23
+ xx        |   23 |     23
+ zp        |   23 |     23
+ zr        |   23 |     23
+ a3        |   22 |     22
+ bg        |   22 |     22
+ de        |   22 |     22
+ e3        |   22 |     22
+ fq        |   22 |     22
+ i2        |   22 |     22
+ i7        |   22 |     22
+ ja        |   22 |     22
+ jk        |   22 |     22
+ jy        |   22 |     22
+ kr        |   22 |     22
+ kx        |   22 |     22
+ ly        |   22 |     22
+ nb        |   22 |     22
+ nh        |   22 |     22
+ ns        |   22 |     22
+ s3        |   22 |     22
+ u2        |   22 |     22
+ vn        |   22 |     22
+ xe        |   22 |     22
+ y4        |   22 |     22
+ zh        |   22 |     22
+ zo        |   22 |     22
+ zq        |   22 |     22
+ a1        |   21 |     21
+ bl        |   21 |     21
+ bo        |   21 |     21
+ cb        |   21 |     21
+ ch        |   21 |     21
+ co        |   21 |     21
+ cq        |   21 |     21
+ cv        |   21 |     21
+ d7        |   21 |     21
+ g8        |   21 |     21
+ je        |   21 |     21
+ jp        |   21 |     21
+ jz        |   21 |     21
+ lg        |   21 |     21
+ me        |   21 |     21
+ nc        |   21 |     21
+ p4        |   21 |     21
+ st        |   21 |     21
+ vb        |   21 |     21
+ vw        |   21 |     21
+ vz        |   21 |     21
+ xj        |   21 |     21
+ xq        |   21 |     21
+ xu        |   21 |     21
+ xy        |   21 |     21
+ zb        |   21 |     21
+ bv        |   20 |     20
+ bz        |   20 |     20
+ cj        |   20 |     20
+ cp        |   20 |     20
+ cs        |   20 |     20
+ d8        |   20 |     20
+ ju        |   20 |     20
+ k0        |   20 |     20
+ ks        |   20 |     20
+ ky        |   20 |     20
+ l1        |   20 |     20
+ lb        |   20 |     20
+ lj        |   20 |     20
+ lu        |   20 |     20
+ nm        |   20 |     20
+ nw        |   20 |     20
+ nz        |   20 |     20
+ o7        |   20 |     20
+ p6        |   20 |     20
+ vh        |   20 |     20
+ vp        |   20 |     20
+ vs        |   20 |     20
+ xb        |   20 |     20
+ xr        |   20 |     20
+ z3        |   20 |     20
+ zv        |   20 |     20
+ bq        |   19 |     19
+ br        |   19 |     19
+ by        |   19 |     19
+ cl        |   19 |     19
+ d2        |   19 |     19
+ f1        |   19 |     19
+ f4        |   19 |     19
+ gf        |   19 |     19
+ hq        |   19 |     19
+ k9        |   19 |     19
+ ka        |   19 |     19
+ kd        |   19 |     19
+ kj        |   19 |     19
+ md        |   19 |     19
+ mi        |   19 |     19
+ ml        |   19 |     19
+ my        |   19 |     19
+ nj        |   19 |     19
+ ny        |   19 |     19
+ o1        |   19 |     19
+ s4        |   19 |     19
+ s8        |   19 |     19
+ t5        |   19 |     19
+ u0        |   19 |     19
+ xl        |   19 |     19
+ zg        |   19 |     19
+ zi        |   19 |     19
+ a5        |   18 |     18
+ b9        |   18 |     18
+ bh        |   18 |     18
+ bx        |   18 |     18
+ d3        |   18 |     18
+ fy        |   18 |     18
+ g2        |   18 |     18
+ i4        |   18 |     18
+ i6        |   18 |     18
+ i9        |   18 |     18
+ jw        |   18 |     18
+ lk        |   18 |     18
+ mb        |   18 |     18
+ mv        |   18 |     18
+ nd        |   18 |     18
+ nr        |   18 |     18
+ nt        |   18 |     18
+ t2        |   18 |     18
+ xf        |   18 |     18
+ xv        |   18 |     18
+ zc        |   18 |     18
+ zd        |   18 |     18
+ a7        |   17 |     17
+ bc        |   17 |     17
+ bd        |   17 |     17
+ ce        |   17 |     17
+ cf        |   17 |     17
+ cr        |   17 |     17
+ g9        |   17 |     17
+ j0        |   17 |     17
+ j5        |   17 |     17
+ mp        |   17 |     17
+ mr        |   17 |     17
+ mw        |   17 |     17
+ nk        |   17 |     17
+ no        |   17 |     17
+ o0        |   17 |     17
+ o4        |   17 |     17
+ s0        |   17 |     17
+ s1        |   17 |     17
+ t4        |   17 |     17
+ u9        |   17 |     17
+ vf        |   17 |     17
+ vx        |   17 |     17
+ x3        |   17 |     17
+ xi        |   17 |     17
+ xn        |   17 |     17
+ xz        |   17 |     17
+ zl        |   17 |     17
+ zn        |   17 |     17
+ a0        |   16 |     16
+ bu        |   16 |     16
+ bw        |   16 |     16
+ ci        |   16 |     16
+ ck        |   16 |     16
+ d0        |   16 |     16
+ d4        |   16 |     16
+ d6        |   16 |     16
+ f5        |   16 |     16
+ g1        |   16 |     16
+ gz        |   16 |     16
+ h4        |   16 |     16
+ jh        |   16 |     16
+ l4        |   16 |     16
+ lt        |   16 |     16
+ mg        |   16 |     16
+ mh        |   16 |     16
+ mo        |   16 |     16
+ ni        |   16 |     16
+ nl        |   16 |     16
+ nq        |   16 |     16
+ p2        |   16 |     16
+ u8        |   16 |     16
+ v9        |   16 |     16
+ vl        |   16 |     16
+ vo        |   16 |     16
+ xp        |   16 |     16
+ y3        |   16 |     16
+ y7        |   16 |     16
+ z7        |   16 |     16
+ za        |   16 |     16
+ zx        |   16 |     16
+ bf        |   15 |     15
+ bp        |   15 |     15
+ cc        |   15 |     15
+ g0        |   15 |     15
+ j2        |   15 |     15
+ j9        |   15 |     15
+ l6        |   15 |     15
+ le        |   15 |     15
+ ll        |   15 |     15
+ m8        |   15 |     15
+ ma        |   15 |     15
+ mu        |   15 |     15
+ nf        |   15 |     15
+ r6        |   15 |     15
+ s5        |   15 |     15
+ vd        |   15 |     15
+ vk        |   15 |     15
+ xa        |   15 |     15
+ xw        |   15 |     15
+ y2        |   15 |     15
+ z8        |   15 |     15
+ ze        |   15 |     15
+ zu        |   15 |     15
+ a6        |   14 |     14
+ bk        |   14 |     14
+ bt        |   14 |     14
+ c0        |   14 |     14
+ f8        |   14 |     14
+ g3        |   14 |     14
+ g4        |   14 |     14
+ g7        |   14 |     14
+ h6        |   14 |     14
+ h7        |   14 |     14
+ h9        |   14 |     14
+ i1        |   14 |     14
+ k1        |   14 |     14
+ k2        |   14 |     14
+ k6        |   14 |     14
+ k7        |   14 |     14
+ mc        |   14 |     14
+ nn        |   14 |     14
+ p9        |   14 |     14
+ u6        |   14 |     14
+ xd        |   14 |     14
+ z6        |   14 |     14
+ zk        |   14 |     14
+ a4        |   13 |     13
+ a9        |   13 |     13
+ bm        |   13 |     13
+ cz        |   13 |     13
+ f2        |   13 |     13
+ f3        |   13 |     13
+ f6        |   13 |     13
+ g6        |   13 |     13
+ h2        |   13 |     13
+ j1        |   13 |     13
+ k5        |   13 |     13
+ m1        |   13 |     13
+ mf        |   13 |     13
+ mq        |   13 |     13
+ np        |   13 |     13
+ nx        |   13 |     13
+ o5        |   13 |     13
+ p0        |   13 |     13
+ p1        |   13 |     13
+ s6        |   13 |     13
+ s9        |   13 |     13
+ v6        |   13 |     13
+ va        |   13 |     13
+ vc        |   13 |     13
+ xc        |   13 |     13
+ z0        |   13 |     13
+ c9        |   12 |     12
+ d1        |   12 |     12
+ h0        |   12 |     12
+ h1        |   12 |     12
+ j8        |   12 |     12
+ k4        |   12 |     12
+ l5        |   12 |     12
+ l9        |   12 |     12
+ m2        |   12 |     12
+ m6        |   12 |     12
+ m9        |   12 |     12
+ n7        |   12 |     12
+ nv        |   12 |     12
+ p3        |   12 |     12
+ vq        |   12 |     12
+ vy        |   12 |     12
+ x1        |   12 |     12
+ x2        |   12 |     12
+ z5        |   12 |     12
+ c1        |   11 |     11
+ c3        |   11 |     11
+ ct        |   11 |     11
+ f9        |   11 |     11
+ g5        |   11 |     11
+ j6        |   11 |     11
+ l8        |   11 |     11
+ n1        |   11 |     11
+ v7        |   11 |     11
+ vv        |   11 |     11
+ x5        |   11 |     11
+ x8        |   11 |     11
+ z2        |   11 |     11
+ b0        |   10 |     10
+ b2        |   10 |     10
+ b8        |   10 |     10
+ c6        |   10 |     10
+ f0        |   10 |     10
+ f7        |   10 |     10
+ h5        |   10 |     10
+ j3        |   10 |     10
+ j4        |   10 |     10
+ j7        |   10 |     10
+ l7        |   10 |     10
+ m0        |   10 |     10
+ m7        |   10 |     10
+ mm        |   10 |     10
+ mn        |   10 |     10
+ n8        |   10 |     10
+ v1        |   10 |     10
+ x0        |   10 |     10
+ x6        |   10 |     10
+ x7        |   10 |     10
+ x9        |   10 |     10
+ a8        |    9 |      9
+ b1        |    9 |      9
+ b4        |    9 |      9
+ b5        |    9 |      9
+ b6        |    9 |      9
+ ba        |    9 |      9
+ bs        |    9 |      9
+ c5        |    9 |      9
+ d5        |    9 |      9
+ k8        |    9 |      9
+ l0        |    9 |      9
+ m5        |    9 |      9
+ mk        |    9 |      9
+ ms        |    9 |      9
+ n3        |    9 |      9
+ n4        |    9 |      9
+ n6        |    9 |      9
+ ne        |    9 |      9
+ v0        |    9 |      9
+ v3        |    9 |      9
+ v5        |    9 |      9
+ v8        |    9 |      9
+ b3        |    8 |      8
+ b7        |    8 |      8
+ c2        |    8 |      8
+ c7        |    8 |      8
+ c8        |    8 |      8
+ d9        |    8 |      8
+ k3        |    8 |      8
+ l3        |    8 |      8
+ m3        |    8 |      8
+ m4        |    8 |      8
+ n0        |    8 |      8
+ n5        |    8 |      8
+ v4        |    8 |      8
+ x4        |    8 |      8
+ z1        |    8 |      8
+ z9        |    8 |      8
+ l2        |    7 |      7
+ s2        |    7 |      7
+ z4        |    7 |      7
+ 1l        |    6 |      6
+ 1o        |    6 |      6
+ 1t        |    6 |      6
+ 2e        |    6 |      6
+ 2o        |    6 |      6
+ c4        |    6 |      6
+ h3        |    6 |      6
+ n2        |    6 |      6
+ n9        |    6 |      6
+ v2        |    6 |      6
+ 2l        |    5 |      5
+ 2u        |    5 |      5
+ 3k        |    5 |      5
+ 4p        |    5 |      5
+ 18        |    4 |      4
+ 1a        |    4 |      4
+ 1i        |    4 |      4
+ 2s        |    4 |      4
+ 3q        |    4 |      4
+ 3y        |    4 |      4
+ 5y        |    4 |      4
+ 1f        |    3 |      3
+ 1h        |    3 |      3
+ 1m        |    3 |      3
+ 1p        |    3 |      3
+ 1s        |    3 |      3
+ 1v        |    3 |      3
+ 1x        |    3 |      3
+ 27        |    3 |      3
+ 2a        |    3 |      3
+ 2b        |    3 |      3
+ 2h        |    3 |      3
+ 2n        |    3 |      3
+ 2p        |    3 |      3
+ 2v        |    3 |      3
+ 2y        |    3 |      3
+ 3d        |    3 |      3
+ 3w        |    3 |      3
+ 3z        |    3 |      3
+ 4a        |    3 |      3
+ 4d        |    3 |      3
+ 4v        |    3 |      3
+ 4z        |    3 |      3
+ 5e        |    3 |      3
+ 5i        |    3 |      3
+ 5k        |    3 |      3
+ 5o        |    3 |      3
+ 5t        |    3 |      3
+ 6b        |    3 |      3
+ 6d        |    3 |      3
+ 6o        |    3 |      3
+ 6w        |    3 |      3
+ 7a        |    3 |      3
+ 7h        |    3 |      3
+ 7r        |    3 |      3
+ 93        |    3 |      3
+ 10        |    2 |      2
+ 12        |    2 |      2
+ 15        |    2 |      2
+ 16        |    2 |      2
+ 19        |    2 |      2
+ 1b        |    2 |      2
+ 1d        |    2 |      2
+ 1g        |    2 |      2
+ 1j        |    2 |      2
+ 1n        |    2 |      2
+ 1r        |    2 |      2
+ 1u        |    2 |      2
+ 1w        |    2 |      2
+ 1y        |    2 |      2
+ 20        |    2 |      2
+ 25        |    2 |      2
+ 2d        |    2 |      2
+ 2i        |    2 |      2
+ 2j        |    2 |      2
+ 2k        |    2 |      2
+ 2q        |    2 |      2
+ 2r        |    2 |      2
+ 2t        |    2 |      2
+ 2w        |    2 |      2
+ 2z        |    2 |      2
+ 3b        |    2 |      2
+ 3f        |    2 |      2
+ 3h        |    2 |      2
+ 3o        |    2 |      2
+ 3p        |    2 |      2
+ 3r        |    2 |      2
+ 3s        |    2 |      2
+ 3v        |    2 |      2
+ 42        |    2 |      2
+ 43        |    2 |      2
+ 4f        |    2 |      2
+ 4g        |    2 |      2
+ 4h        |    2 |      2
+ 4j        |    2 |      2
+ 4m        |    2 |      2
+ 4r        |    2 |      2
+ 4s        |    2 |      2
+ 4t        |    2 |      2
+ 4u        |    2 |      2
+ 5c        |    2 |      2
+ 5f        |    2 |      2
+ 5h        |    2 |      2
+ 5p        |    2 |      2
+ 5q        |    2 |      2
+ 5z        |    2 |      2
+ 6a        |    2 |      2
+ 6h        |    2 |      2
+ 6q        |    2 |      2
+ 6r        |    2 |      2
+ 6t        |    2 |      2
+ 6y        |    2 |      2
+ 70        |    2 |      2
+ 7c        |    2 |      2
+ 7g        |    2 |      2
+ 7k        |    2 |      2
+ 7o        |    2 |      2
+ 7u        |    2 |      2
+ 8j        |    2 |      2
+ 8w        |    2 |      2
+ 9f        |    2 |      2
+ 9y        |    2 |      2
+ copyright |    2 |      2
+ foo       |    1 |      3
+ bar       |    1 |      2
+ 0e        |    1 |      1
+ 0h        |    1 |      1
+ 0p        |    1 |      1
+ 0w        |    1 |      1
+ 0z        |    1 |      1
+ 11        |    1 |      1
+ 13        |    1 |      1
+ 14        |    1 |      1
+ 17        |    1 |      1
+ 1k        |    1 |      1
+ 1q        |    1 |      1
+ 1z        |    1 |      1
+ 24        |    1 |      1
+ 26        |    1 |      1
+ 28        |    1 |      1
+ 2f        |    1 |      1
+ 30        |    1 |      1
+ 345       |    1 |      1
+ 37        |    1 |      1
+ 39        |    1 |      1
+ 3a        |    1 |      1
+ 3e        |    1 |      1
+ 3g        |    1 |      1
+ 3i        |    1 |      1
+ 3m        |    1 |      1
+ 3t        |    1 |      1
+ 3u        |    1 |      1
+ 40        |    1 |      1
+ 41        |    1 |      1
+ 44        |    1 |      1
+ 45        |    1 |      1
+ 48        |    1 |      1
+ 4b        |    1 |      1
+ 4c        |    1 |      1
+ 4i        |    1 |      1
+ 4k        |    1 |      1
+ 4n        |    1 |      1
+ 4o        |    1 |      1
+ 4q        |    1 |      1
+ 4w        |    1 |      1
+ 4y        |    1 |      1
+ 51        |    1 |      1
+ 55        |    1 |      1
+ 56        |    1 |      1
+ 5a        |    1 |      1
+ 5d        |    1 |      1
+ 5g        |    1 |      1
+ 5j        |    1 |      1
+ 5l        |    1 |      1
+ 5s        |    1 |      1
+ 5u        |    1 |      1
+ 5x        |    1 |      1
+ 64        |    1 |      1
+ 68        |    1 |      1
+ 6c        |    1 |      1
+ 6f        |    1 |      1
+ 6g        |    1 |      1
+ 6i        |    1 |      1
+ 6k        |    1 |      1
+ 6n        |    1 |      1
+ 6p        |    1 |      1
+ 6s        |    1 |      1
+ 6u        |    1 |      1
+ 6x        |    1 |      1
+ 72        |    1 |      1
+ 7f        |    1 |      1
+ 7j        |    1 |      1
+ 7n        |    1 |      1
+ 7p        |    1 |      1
+ 7w        |    1 |      1
+ 7y        |    1 |      1
+ 7z        |    1 |      1
+ 80        |    1 |      1
+ 82        |    1 |      1
+ 85        |    1 |      1
+ 8d        |    1 |      1
+ 8i        |    1 |      1
+ 8l        |    1 |      1
+ 8n        |    1 |      1
+ 8p        |    1 |      1
+ 8t        |    1 |      1
+ 8x        |    1 |      1
+ 95        |    1 |      1
+ 97        |    1 |      1
+ 9a        |    1 |      1
+ 9e        |    1 |      1
+ 9h        |    1 |      1
+ 9r        |    1 |      1
+ 9w        |    1 |      1
+ qwerti    |    1 |      1
+(1146 rows)
+
+select reset_tsearch();
+NOTICE:  TSearch cache cleaned
+ reset_tsearch 
+---------------
+ 
+(1 row)
+
+select to_tsquery('default', 'skies & books');
+   to_tsquery   
+----------------
+ 'sky' & 'book'
+(1 row)
+
+select rank_cd(to_tsvector('Erosion It took the sea a thousand years,
+A thousand years to trace
+The granite features of this cliff
+In crag and scarp and base.
+It took the sea an hour one night
+An hour of storm to place
+The sculpture of these granite seams,
+Upon a woman s face. E.  J.  Pratt  (1882 1964)
+'), to_tsquery('sea&thousand&years'));
+ rank_cd 
+---------
+     1.2
+(1 row)
+
+select rank_cd(to_tsvector('Erosion It took the sea a thousand years,
+A thousand years to trace
+The granite features of this cliff
+In crag and scarp and base.
+It took the sea an hour one night
+An hour of storm to place
+The sculpture of these granite seams,
+Upon a woman s face. E.  J.  Pratt  (1882 1964)
+'), to_tsquery('granite&sea'));
+ rank_cd  
+----------
+ 0.880303
+(1 row)
+
+select rank_cd(to_tsvector('Erosion It took the sea a thousand years,
+A thousand years to trace
+The granite features of this cliff
+In crag and scarp and base.
+It took the sea an hour one night
+An hour of storm to place
+The sculpture of these granite seams,
+Upon a woman s face. E.  J.  Pratt  (1882 1964)
+'), to_tsquery('sea'));
+ rank_cd 
+---------
+       2
+(1 row)
+
+select get_covers(to_tsvector('Erosion It took the sea a thousand years,
+A thousand years to trace
+The granite features of this cliff
+In crag and scarp and base.
+It took the sea an hour one night
+An hour of storm to place
+The sculpture of these granite seams,
+Upon a woman s face. E.  J.  Pratt  (1882 1964)
+'), to_tsquery('sea&thousand&years'));
+                                                                                             get_covers                                                                                             
+----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
+ eros took {1 sea thousand year }1 {2 thousand year trace granit featur cliff crag scarp base took sea }2 hour one night hour storm place sculptur granit seam upon woman face e j pratt 1882 1964 
+(1 row)
+
+select get_covers(to_tsvector('Erosion It took the sea a thousand years,
+A thousand years to trace
+The granite features of this cliff
+In crag and scarp and base.
+It took the sea an hour one night
+An hour of storm to place
+The sculpture of these granite seams,
+Upon a woman s face. E.  J.  Pratt  (1882 1964)
+'), to_tsquery('granite&sea'));
+                                                                                                get_covers                                                                                                
+----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
+ eros took {1 sea thousand year thousand year trace {2 granit }1 featur cliff crag scarp base took {3 sea }2 hour one night hour storm place sculptur granit }3 seam upon woman face e j pratt 1882 1964 
+(1 row)
+
+select get_covers(to_tsvector('Erosion It took the sea a thousand years,
+A thousand years to trace
+The granite features of this cliff
+In crag and scarp and base.
+It took the sea an hour one night
+An hour of storm to place
+The sculpture of these granite seams,
+Upon a woman s face. E.  J.  Pratt  (1882 1964)
+'), to_tsquery('sea'));
+                                                                                             get_covers                                                                                             
+----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
+ eros took {1 sea }1 thousand year thousand year trace granit featur cliff crag scarp base took {2 sea }2 hour one night hour storm place sculptur granit seam upon woman face e j pratt 1882 1964 
+(1 row)
+
+select headline('Erosion It took the sea a thousand years,
+A thousand years to trace
+The granite features of this cliff
+In crag and scarp and base.
+It took the sea an hour one night
+An hour of storm to place
+The sculpture of these granite seams,
+Upon a woman s face. E.  J.  Pratt  (1882 1964)
+', to_tsquery('sea&thousand&years'));
+                                                       headline                                                        
+-----------------------------------------------------------------------------------------------------------------------
+ sea a thousand years,
+A thousand years to trace
+The granite features of this cliff
+(1 row)
+
+ 
+select headline('Erosion It took the sea a thousand years,
+A thousand years to trace
+The granite features of this cliff
+In crag and scarp and base.
+It took the sea an hour one night
+An hour of storm to place
+The sculpture of these granite seams,
+Upon a woman s face. E.  J.  Pratt  (1882 1964)
+', to_tsquery('granite&sea'));
+                                           headline                                           
+----------------------------------------------------------------------------------------------
+ sea an hour one night
+An hour of storm to place
+The sculpture of these granite
+(1 row)
+
+ 
+select headline('Erosion It took the sea a thousand years,
+A thousand years to trace
+The granite features of this cliff
+In crag and scarp and base.
+It took the sea an hour one night
+An hour of storm to place
+The sculpture of these granite seams,
+Upon a woman s face. E.  J.  Pratt  (1882 1964)
+', to_tsquery('sea'));
+                                         headline                                          
+-------------------------------------------------------------------------------------------
+ sea a thousand years,
+A thousand years to trace
+The granite features of this cliff
+(1 row)
+


diff --git a/contrib/tsearch2/gendict/Makefile.IN b/contrib/tsearch2/gendict/Makefile.IN

new file mode 100644 (file)

index 0000000..c13e496


--- /dev/null
+++ b/contrib/tsearch2/gendict/Makefile.IN
@@ -0,0 +1,12 @@
+subdir = contrib/CFG_DIR
+top_builddir = ../..
+include $(top_builddir)/src/Makefile.global
+
+MODULE_big = dict_CFG_MODNAME
+OBJS = CFG_OFILE
+DATA_built = dict_CFG_MODNAME.sql
+DOCS = README.CFG_MODNAME
+PG_CPPFLAGS =
+SHLIB_LINK = ../tsearch2/libtsearch2.a
+
+include $(top_srcdir)/contrib/contrib-global.mk


diff --git a/contrib/tsearch2/gendict/README.gendict b/contrib/tsearch2/gendict/README.gendict

new file mode 100644 (file)

index 0000000..e91f1b7


--- /dev/null
+++ b/contrib/tsearch2/gendict/README.gendict
@@ -0,0 +1,130 @@
+Gendict - generate dictionary templates for contrib/tsearch2 module.
+
+This utility aims to help people creating dictionary for contrib/tsearch v2
+module. Particularly, it has built-in support for snowball stemmers.
+
+Programming API to tsearch2 dictionaries is described in tsearch v2 
+documentation.
+
+
+Prerequisities:
+
+* PostgreSQL 7.3 and above.
+
+* You need tsearch2 module sources already compiled
+
+* Rights to install contrib modules
+
+Usage:
+
+    run config.sh without parameters to see options and arguments
+
+Usage:
+./config.sh -n DICTNAME ( [ -s [ -p PREFIX ] ] | [ -c CFILES ] [ -h HFILES ] [ -i ] ) [ -v ] [ -d DIR ] [ -C COMMENT ]
+    -v - be verbose
+    -d DIR - name of directory in PGSQL_SRC/contrib (default dict_DICTNAME)
+    -C COMMENT - dictionary comment
+Generate Snowball stemmer:
+./config.sh -n DICTNAME -s [ -p PREFIX ] [ -v ] [ -d DIR ] [ -C COMMENT ]
+    -s - generate Snowball wrapper
+    -p - prefix of Snowball's function, (default DICTNAME)
+Generate template dictionary:
+./config.sh -n DICTNAME [ -c CFILES ] [ -h HFILES ] [ -i ] [ -v ] [ -d DIR ] [ -C COMMENT ]
+    -c CFILES - source files, must be placed in contrib/tsearch2/gendict directory.
+                These files will be used in Makefile.
+    -h HFILES - header files, must be placed in contrib/tsearch2/gendict directory.
+                These files will be used in Makefile and subinclude.h
+    -i - dictionary has init method
+
+
+Example 1:
+
+   Create Portuguese stemmer
+ 
+   0. cd PGSQL_SRC/contrib/tsearch2/gendict
+
+   1. Obtain stem.{c,h} files for Portuguese
+
+      wget http://snowball.tartarus.org/portuguese/stem.c
+      wget http://snowball.tartarus.org/portuguese/stem.h
+   
+   2. Create template files for Portuguese
+
+      ./config.sh -n pt -s -p portuguese -v -C'Snowball stemmer for Portuguese'
+
+      Note, that argument for -p option should be *the same* as name of stemming
+      function in stem.c (without _stem)
+
+      A bunch of files will be generated and placed in PGSQL_SRC/contrib/dict_pt
+      directory.
+
+   3. Compile and install dictionary
+
+   cd PGSQL_SRC/contrib/dict_pt
+   make
+   make install
+
+   4. Test it 
+
+   Sample portuguese words with the stemmed forms are available
+        from http://snowball.tartarus.org/portuguese/stemmer.html
+
+   createdb testdict
+   psql testdict < /usr/local/pgsql/share/contrib/tsearch2.sql
+   psql testdict < /usr/local/pgsql/share/contrib/dict_pt.sql
+   psql -d testdict -c "select lexize('pt','bobagem');"
+    lexize  
+   ---------
+    {bobag}
+   (1 row)
+
+   Here is what I have in pg_ts_dict table
+
+   psql -d testdict -c "select * from pg_ts_dict where dict_name='pt';"
+    dict_name | dict_init | dict_initoption | dict_lexize |          dict_comment           
+   -----------+-----------+-----------------+-------------+---------------------------------
+    pt        |   7177806 |                 |     7159330 | Snowball stemmer for Portuguese
+   (1 row)
+
+ 
+        Note, that you have already installed dictionary and corresponding
+   entry in tsearch configuration and you may modify it using
+   plain SQL commands, for example, specify stop words.
+
+Example 2:
+
+      a) Simple template dictionary with init method 
+
+       ./config.sh -n wow -v -i -C WOW
+
+      b) Create simple template dict (without init method):
+   ./config.sh -n wow -v  -C WOW
+
+        The same as above, but dictionary will have not init method
+
+       Dictionaries obtained in a) and b) are fully working and ready
+       for use: 
+     a) lowercase input word and remove it if it is a stop word
+     b) recognizes any word
+
+      c) Simple template dictionary with source files (with init method):
+
+       ./config.sh -n wow -v -i -c a.c -h a.h -C WOW
+
+        Source files ( a.c ) must be placed in contrib/tsearch2/gendict directory.
+        These files will be used in Makefile.
+
+        Header files ( a.h ), must be placed in contrib/tsearch2/gendict directory.
+        These files will be used in Makefile and subinclude.h
+
+      d) Simple template dictionary with source files (without init method):
+
+   ./config.sh -n wow -v  -c a.c -h a.h -C WOW
+
+   The same as above, but dictionary will have not init method
+
+       After that you have sources in PGSQL_SRC/contrib/dict_wow and
+       you may edit them to create actual dictionary.
+
+  Please, check Tsearch2 home page (http://www.sai.msu.su/~megera/postgres/gist/tsearch/V2/)
+  for additional information about "Gendict tutorial" and dictionaries.
\ No newline at end of file


diff --git a/contrib/tsearch2/gendict/config.sh b/contrib/tsearch2/gendict/config.sh

new file mode 100755 (executable)

index 0000000..26bb542


--- /dev/null
+++ b/contrib/tsearch2/gendict/config.sh
@@ -0,0 +1,183 @@
+#!/bin/sh
+
+usage () {
+   echo Usage:
+   echo $0 -n DICTNAME  \( [ -s [ -p PREFIX ] ] \| [ -c CFILES ] [ -h HFILES ] [ -i ] \) [ -v ] [ -d DIR ] [ -C COMMENT ]
+   echo '    -v - be verbose'
+   echo '    -d DIR - name of directory in PGSQL_SRL/contrib (default dict_DICTNAME)'
+   echo '    -C COMMENT - dictionary comment' 
+   echo Generate Snowball stemmer:
+   echo $0 -n DICTNAME -s [ -p PREFIX ] [ -v ] [ -d DIR ] [ -C COMMENT ]
+   echo '    -s - generate Snowball wrapper'
+   echo "    -p - prefix of Snowball's function, (default DICTNAME)" 
+   echo Generate template dictionary:
+   echo $0 -n DICTNAME [ -c CFILES ] [ -h HFILES ] [ -i ] [ -v ] [ -d DIR ] [ -C COMMENT ]
+   echo '    -c CFILES - source files, must be placed in contrib/tsearch2/gendict directory.'
+   echo '                These files will be used in Makefile.'
+   echo '    -h HFILES - header files, must be placed in contrib/tsearch2/gendict directory.'
+   echo '                These files will be used in Makefile and subinclude.h'
+   echo '    -i - dictionary has init method'
+   exit 1;
+}
+
+dictname=
+stemmode=no
+verbose=no
+cfile=
+hfile=
+dir= 
+hasinit=no
+comment=
+prefix=
+
+while getopts n:c:C:h:d:p:vis opt
+do
+   case "$opt" in
+       v) verbose=yes;;
+       s) stemmode=yes;;
+       i) hasinit=yes;;
+       n) dictname="$OPTARG";;
+       c) cfile="$OPTARG";;
+       h) hfile="$OPTARG";;
+       d) dir="$OPTARG";;
+       C) comment="$OPTARG";;
+       p) prefix="$OPTARG";;
+       \?) usage;;
+   esac
+done
+
+[ ${#dictname} -eq 0 ] && usage
+
+dictname=`echo $dictname | tr '[:upper:]' '[:lower:]'`
+
+if [ $stemmode = "yes" ] ; then 
+   [ ${#prefix} -eq 0 ] && prefix=$dictname
+   hasinit=yes
+   cfile="stem.c"
+   hfile="stem.h"
+fi 
+
+[ ${#dir}   -eq 0 ] && dir="dict_$dictname"
+
+if [ ${#comment} -eq 0 ]; then
+   comment=null
+else
+   comment="'$comment'"
+fi
+
+ofile=
+for f in $cfile
+do
+   f=` echo $f | sed 's#c$#o#'`
+   ofile="$ofile $f"
+done
+
+if [ $stemmode = "yes" ] ; then
+   ofile="$ofile dict_snowball.o"
+else
+   ofile="$ofile dict_tmpl.o"
+fi
+
+if [ $verbose = "yes" ]; then
+   echo Dictname: "'"$dictname"'"
+   echo Snowball stemmer: $stemmode
+   echo Has init method: $hasinit
+   [ $stemmode = "yes" ] && echo Function prefix: $prefix 
+   echo Source files: $cfile
+   echo Header files: $hfile
+   echo Object files: $ofile
+   echo Comment: $comment
+   echo Directory: ../../$dir
+fi
+
+
+[ $verbose = "yes" ] && echo -n 'Build directory...  '
+if [ ! -d ../../$dir ]; then
+   if ! mkdir ../../$dir ; then 
+       echo "Can't create directory ../../$dir"
+       exit 1
+   fi 
+fi
+[ $verbose = "yes" ] && echo ok
+
+
+[ $verbose = "yes" ] && echo -n 'Build Makefile...  '
+sed s#CFG_DIR#$dir# < Makefile.IN | sed s#CFG_MODNAME#$dictname# | sed "s#CFG_OFILE#$ofile#" > ../../$dir/Makefile.tmp
+if [ $stemmode = "yes" ] ; then
+   sed "s#^PG_CPPFLAGS.*\$#PG_CPPFLAGS = -I../tsearch2/snowball -I../tsearch2#" < ../../$dir/Makefile.tmp >  ../../$dir/Makefile 
+else
+   sed "s#^PG_CPPFLAGS.*\$#PG_CPPFLAGS = -I../tsearch2#" < ../../$dir/Makefile.tmp >  ../../$dir/Makefile 
+fi
+rm ../../$dir/Makefile.tmp
+[ $verbose = "yes" ] && echo ok
+
+
+[ $verbose = "yes" ] && echo -n Build dict_$dictname'.sql.in...  '
+if [ $hasinit = "yes" ]; then
+   sed s#CFG_MODNAME#$dictname# < sql.IN | sed "s#CFG_COMMENT#$comment#" | sed s#^HASINIT## | sed 's#^NOINIT.*$##' > ../../$dir/dict_$dictname.sql.in.tmp
+   if [ $stemmode = "yes" ] ; then
+       sed s#^ISSNOWBALL## < ../../$dir/dict_$dictname.sql.in.tmp | sed s#^NOSNOWBALL.*\$## > ../../$dir/dict_$dictname.sql.in
+   else
+       sed s#^NOSNOWBALL## < ../../$dir/dict_$dictname.sql.in.tmp | sed s#^ISSNOWBALL.*\$## > ../../$dir/dict_$dictname.sql.in
+   fi
+   rm ../../$dir/dict_$dictname.sql.in.tmp 
+else 
+   sed s#CFG_MODNAME#$dictname# < sql.IN | sed "s#CFG_COMMENT#$comment#" | sed s#^NOINIT## | sed 's#^HASINIT.*$##' | sed s#^NOSNOWBALL## | sed s#^ISSNOWBALL.*\$## > ../../$dir/dict_$dictname.sql.in
+fi
+[ $verbose = "yes" ] && echo ok
+
+
+
+if [ ${#cfile} -ne 0 ] || [ ${#hfile} -ne 0 ] ; then
+   [ $verbose = "yes" ] && echo -n 'Copy source and header files...  '
+   if [ ${#cfile} -ne 0 ] ; then
+       if ! cp $cfile ../../$dir ; then 
+           echo "Cant cp all or one of files: $cfile"
+           exit 1
+       fi
+   fi
+   if [ ${#hfile} -ne 0 ] ; then 
+       if ! cp $hfile ../../$dir ; then 
+               echo "Cant cp all or one of files: $hfile"
+           exit 1
+       fi
+   fi
+   [ $verbose = "yes" ] && echo ok
+fi
+
+
+[ $verbose = "yes" ] && echo -n 'Build sub-include header...  '
+echo -n > ../../$dir/subinclude.h 
+for i in $hfile
+do
+   echo "#include \"$i\"" >> ../../$dir/subinclude.h
+done
+[ $verbose = "yes" ] && echo ok
+
+
+if  [ $stemmode = "yes" ] ; then 
+   [ $verbose = "yes" ] && echo -n 'Build Snowball stemmer...  '
+   sed s#CFG_MODNAME#$dictname#g < dict_snowball.c.IN | sed s#CFG_PREFIX#$prefix#g > ../../$dir/dict_snowball.c
+else
+   [ $verbose = "yes" ] && echo -n 'Build dictinonary...  '
+   sed s#CFG_MODNAME#$dictname#g < dict_tmpl.c.IN > ../../$dir/dict_tmpl.c.tmp
+   if [ $hasinit = "yes" ]; then
+       sed s#^HASINIT## <  ../../$dir/dict_tmpl.c.tmp | sed 's#^NOINIT.*$##' > ../../$dir/dict_tmpl.c
+   else 
+       sed s#^HASINIT.*\$## <  ../../$dir/dict_tmpl.c.tmp | sed 's#^NOINIT##' > ../../$dir/dict_tmpl.c
+   fi
+   rm ../../$dir/dict_tmpl.c.tmp
+fi 
+[ $verbose = "yes" ] && echo ok
+
+
+[ $verbose = "yes" ] && echo -n "Build README.$dictname...  "
+if  [ $stemmode = "yes" ] ; then
+   echo "Autogenerated Snowball's wrapper for $prefix" > ../../$dir/README.$dictname
+else
+   echo "Autogenerated template for $dictname" > ../../$dir/README.$dictname
+fi
+[ $verbose = "yes" ] && echo ok
+
+echo All is done
+


diff --git a/contrib/tsearch2/gendict/dict_snowball.c.IN b/contrib/tsearch2/gendict/dict_snowball.c.IN

new file mode 100644 (file)

index 0000000..10ef6f1


--- /dev/null
+++ b/contrib/tsearch2/gendict/dict_snowball.c.IN
@@ -0,0 +1,52 @@
+/* 
+ * example of Snowball dictionary
+ * http://snowball.tartarus.org/ 
+ * Teodor Sigaev 
+ */
+#include 
+#include 
+
+#include "postgres.h"
+
+#include "dict.h"
+#include "common.h"
+#include "snowball/header.h"
+#include "subinclude.h"
+
+typedef struct {
+   struct SN_env *z;
+   StopList    stoplist;
+   int (*stem)(struct SN_env * z);
+} DictSnowball;
+
+
+PG_FUNCTION_INFO_V1(dinit_CFG_MODNAME);
+Datum dinit_CFG_MODNAME(PG_FUNCTION_ARGS);
+
+Datum 
+dinit_CFG_MODNAME(PG_FUNCTION_ARGS) {
+   DictSnowball    *d = (DictSnowball*)malloc( sizeof(DictSnowball) );
+
+   if ( !d )
+       elog(ERROR, "No memory");
+   memset(d,0,sizeof(DictSnowball));
+   d->stoplist.wordop=lowerstr;
+       
+   if ( !PG_ARGISNULL(0) && PG_GETARG_POINTER(0)!=NULL ) {
+       text       *in = PG_GETARG_TEXT_P(0);
+       readstoplist(in, &(d->stoplist));
+       sortstoplist(&(d->stoplist));
+       PG_FREE_IF_COPY(in, 0);
+   }
+
+   d->z = CFG_PREFIX_create_env();
+   if (!d->z) {
+       freestoplist(&(d->stoplist));
+       elog(ERROR,"No memory");
+   }
+   d->stem=CFG_PREFIX_stem;
+
+   PG_RETURN_POINTER(d);
+}
+
+


diff --git a/contrib/tsearch2/gendict/dict_tmpl.c.IN b/contrib/tsearch2/gendict/dict_tmpl.c.IN

new file mode 100644 (file)

index 0000000..10c0381


--- /dev/null
+++ b/contrib/tsearch2/gendict/dict_tmpl.c.IN
@@ -0,0 +1,64 @@
+/* 
+ * example of dictionary 
+ * Teodor Sigaev 
+ */
+#include 
+#include 
+#include 
+
+#include "postgres.h"
+
+#include "dict.h"
+#include "common.h"
+
+#include "subinclude.h"
+
+HASINIT typedef struct {
+HASINIT    StopList    stoplist;
+HASINIT } DictExample;
+
+
+HASINIT PG_FUNCTION_INFO_V1(dinit_CFG_MODNAME);
+HASINIT Datum dinit_CFG_MODNAME(PG_FUNCTION_ARGS);
+
+HASINIT Datum 
+HASINIT dinit_CFG_MODNAME(PG_FUNCTION_ARGS) {
+HASINIT    DictExample *d = (DictExample*)malloc( sizeof(DictExample) );
+HASINIT 
+HASINIT    if ( !d )
+HASINIT        elog(ERROR, "No memory");
+HASINIT    memset(d,0,sizeof(DictExample));
+HASINIT 
+HASINIT    d->stoplist.wordop=lowerstr;
+HASINIT    
+HASINIT    /* Your INIT code */
+HASINIT    
+HASINIT    if ( !PG_ARGISNULL(0) && PG_GETARG_POINTER(0)!=NULL ) {
+HASINIT        text       *in = PG_GETARG_TEXT_P(0);
+HASINIT        readstoplist(in, &(d->stoplist));
+HASINIT        sortstoplist(&(d->stoplist));
+HASINIT        PG_FREE_IF_COPY(in, 0);
+HASINIT    }
+HASINIT 
+HASINIT    PG_RETURN_POINTER(d);
+HASINIT }
+
+PG_FUNCTION_INFO_V1(dlexize_CFG_MODNAME);
+Datum dlexize_CFG_MODNAME(PG_FUNCTION_ARGS);
+Datum
+dlexize_CFG_MODNAME(PG_FUNCTION_ARGS) {
+HASINIT    DictExample *d = (DictExample*)PG_GETARG_POINTER(0);
+   char       *in = (char*)PG_GETARG_POINTER(1);
+   char *txt = pnstrdup(in, PG_GETARG_INT32(2));
+   char    **res=palloc(sizeof(char*)*2);
+
+   /* Your INIT dictionary code */
+HASINIT    if ( *txt=='\0' || searchstoplist(&(d->stoplist),txt) ) {
+HASINIT        pfree(txt);
+HASINIT        res[0]=NULL;
+HASINIT    } else 
+       res[0]=txt;
+   res[1]=NULL;
+
+   PG_RETURN_POINTER(res);
+}


diff --git a/contrib/tsearch2/gendict/sql.IN b/contrib/tsearch2/gendict/sql.IN

new file mode 100644 (file)

index 0000000..ff0d842


--- /dev/null
+++ b/contrib/tsearch2/gendict/sql.IN
@@ -0,0 +1,26 @@
+SET search_path = public;
+BEGIN;
+
+HASINIT create function dinit_CFG_MODNAME(text)
+HASINIT         returns internal
+HASINIT         as 'MODULE_PATHNAME'
+HASINIT         language 'C';
+
+NOSNOWBALL create function dlexize_CFG_MODNAME(internal,internal,int4)
+NOSNOWBALL        returns internal
+NOSNOWBALL        as 'MODULE_PATHNAME'
+NOSNOWBALL        language 'C'
+NOSNOWBALL        with (isstrict);
+
+insert into pg_ts_dict select
+        'CFG_MODNAME',
+HASINIT        (select oid from pg_proc where proname='dinit_CFG_MODNAME'),
+NOINIT        null,
+        null,
+ISSNOWBALL        (select oid from pg_proc where proname='snb_lexize'),
+NOSNOWBALL        (select oid from pg_proc where proname='dlexize_CFG_MODNAME'),
+        CFG_COMMENT
+;
+
+
+END;


diff --git a/contrib/tsearch2/gistidx.c b/contrib/tsearch2/gistidx.c

new file mode 100644 (file)

index 0000000..5a34f74


--- /dev/null
+++ b/contrib/tsearch2/gistidx.c
@@ -0,0 +1,686 @@
+#include "postgres.h"
+
+#include 
+
+#include "access/gist.h"
+#include "access/itup.h"
+#include "access/rtree.h"
+#include "utils/elog.h"
+#include "utils/palloc.h"
+#include "utils/array.h"
+#include "utils/builtins.h"
+#include "storage/bufpage.h"
+#include "access/tuptoaster.h"
+
+#include "tsvector.h"
+#include "query.h"
+#include "gistidx.h"
+#include "crc32.h"
+
+PG_FUNCTION_INFO_V1(gtsvector_in);
+Datum      gtsvector_in(PG_FUNCTION_ARGS);
+
+PG_FUNCTION_INFO_V1(gtsvector_out);
+Datum      gtsvector_out(PG_FUNCTION_ARGS);
+
+PG_FUNCTION_INFO_V1(gtsvector_compress);
+Datum      gtsvector_compress(PG_FUNCTION_ARGS);
+
+PG_FUNCTION_INFO_V1(gtsvector_decompress);
+Datum      gtsvector_decompress(PG_FUNCTION_ARGS);
+
+PG_FUNCTION_INFO_V1(gtsvector_consistent);
+Datum      gtsvector_consistent(PG_FUNCTION_ARGS);
+
+PG_FUNCTION_INFO_V1(gtsvector_union);
+Datum      gtsvector_union(PG_FUNCTION_ARGS);
+
+PG_FUNCTION_INFO_V1(gtsvector_same);
+Datum      gtsvector_same(PG_FUNCTION_ARGS);
+
+PG_FUNCTION_INFO_V1(gtsvector_penalty);
+Datum      gtsvector_penalty(PG_FUNCTION_ARGS);
+
+PG_FUNCTION_INFO_V1(gtsvector_picksplit);
+Datum      gtsvector_picksplit(PG_FUNCTION_ARGS);
+
+#define GETENTRY(vec,pos) ((GISTTYPE *) DatumGetPointer(((GISTENTRY *) VARDATA(vec))[(pos)].key))
+#define SUMBIT(val) (       \
+   GETBITBYTE(val,0) + \
+   GETBITBYTE(val,1) + \
+   GETBITBYTE(val,2) + \
+   GETBITBYTE(val,3) + \
+   GETBITBYTE(val,4) + \
+   GETBITBYTE(val,5) + \
+   GETBITBYTE(val,6) + \
+   GETBITBYTE(val,7)   \
+)
+
+
+Datum
+gtsvector_in(PG_FUNCTION_ARGS)
+{
+   elog(ERROR, "Not implemented");
+   PG_RETURN_DATUM(0);
+}
+
+Datum
+gtsvector_out(PG_FUNCTION_ARGS)
+{
+   elog(ERROR, "Not implemented");
+   PG_RETURN_DATUM(0);
+}
+
+static int
+compareint(const void *a, const void *b)
+{
+   if (*((int4 *) a) == *((int4 *) b))
+       return 0;
+   return (*((int4 *) a) > *((int4 *) b)) ? 1 : -1;
+}
+
+static int
+uniqueint(int4 *a, int4 l)
+{
+   int4       *ptr,
+              *res;
+
+   if (l == 1)
+       return l;
+
+   ptr = res = a;
+
+   qsort((void *) a, l, sizeof(int4), compareint);
+
+   while (ptr - a < l)
+       if (*ptr != *res)
+           *(++res) = *ptr++;
+       else
+           ptr++;
+   return res + 1 - a;
+}
+
+static void
+makesign(BITVECP sign, GISTTYPE * a)
+{
+   int4        k,
+               len = ARRNELEM(a);
+   int4       *ptr = GETARR(a);
+
+   MemSet((void *) sign, 0, sizeof(BITVEC));
+   for (k = 0; k < len; k++)
+       HASH(sign, ptr[k]);
+}
+
+Datum
+gtsvector_compress(PG_FUNCTION_ARGS)
+{
+   GISTENTRY  *entry = (GISTENTRY *) PG_GETARG_POINTER(0);
+   GISTENTRY  *retval = entry;
+
+   if (entry->leafkey)
+   {                           /* tsvector */
+       GISTTYPE   *res;
+       tsvector       *toastedval = (tsvector *) DatumGetPointer(entry->key);
+       tsvector       *val = (tsvector *) DatumGetPointer(PG_DETOAST_DATUM(entry->key));
+       int4        len;
+       int4       *arr;
+       WordEntry  *ptr = ARRPTR(val);
+       char       *words = STRPTR(val);
+
+       len = CALCGTSIZE(ARRKEY, val->size);
+       res = (GISTTYPE *) palloc(len);
+       res->len = len;
+       res->flag = ARRKEY;
+       arr = GETARR(res);
+       len = val->size;
+       while (len--)
+       {
+           *arr = crc32_sz((uint8 *) &words[ptr->pos], ptr->len);
+           arr++;
+           ptr++;
+       }
+
+       len = uniqueint(GETARR(res), val->size);
+       if (len != val->size)
+       {
+           /*
+            * there is a collision of hash-function; len is always less
+            * than val->size
+            */
+           len = CALCGTSIZE(ARRKEY, len);
+           res = (GISTTYPE *) repalloc((void *) res, len);
+           res->len = len;
+       }
+       if (val != toastedval)
+           pfree(val);
+
+       /* make signature, if array is too long */
+       if (res->len > TOAST_INDEX_TARGET)
+       {
+           GISTTYPE   *ressign;
+
+           len = CALCGTSIZE(SIGNKEY, 0);
+           ressign = (GISTTYPE *) palloc(len);
+           ressign->len = len;
+           ressign->flag = SIGNKEY;
+           makesign(GETSIGN(ressign), res);
+           pfree(res);
+           res = ressign;
+       }
+
+       retval = (GISTENTRY *) palloc(sizeof(GISTENTRY));
+       gistentryinit(*retval, PointerGetDatum(res),
+                     entry->rel, entry->page,
+                     entry->offset, res->len, FALSE);
+   }
+   else if (ISSIGNKEY(DatumGetPointer(entry->key)) &&
+            !ISALLTRUE(DatumGetPointer(entry->key)))
+   {
+       int4        i,
+                   len;
+       GISTTYPE   *res;
+       BITVECP     sign = GETSIGN(DatumGetPointer(entry->key));
+
+       LOOPBYTE(
+                if ((sign[i] & 0xff) != 0xff)
+                PG_RETURN_POINTER(retval);
+       );
+
+       len = CALCGTSIZE(SIGNKEY | ALLISTRUE, 0);
+       res = (GISTTYPE *) palloc(len);
+       res->len = len;
+       res->flag = SIGNKEY | ALLISTRUE;
+
+       retval = (GISTENTRY *) palloc(sizeof(GISTENTRY));
+       gistentryinit(*retval, PointerGetDatum(res),
+                     entry->rel, entry->page,
+                     entry->offset, res->len, FALSE);
+   }
+   PG_RETURN_POINTER(retval);
+}
+
+Datum
+gtsvector_decompress(PG_FUNCTION_ARGS)
+{
+   GISTENTRY  *entry = (GISTENTRY *) PG_GETARG_POINTER(0);
+   GISTTYPE   *key = (GISTTYPE *) DatumGetPointer(PG_DETOAST_DATUM(entry->key));
+
+   if (key != (GISTTYPE *) DatumGetPointer(entry->key))
+   {
+       GISTENTRY  *retval = (GISTENTRY *) palloc(sizeof(GISTENTRY));
+
+       gistentryinit(*retval, PointerGetDatum(key),
+                     entry->rel, entry->page,
+                     entry->offset, key->len, FALSE);
+
+       PG_RETURN_POINTER(retval);
+   }
+
+   PG_RETURN_POINTER(entry);
+}
+
+typedef struct
+{
+   int4       *arrb;
+   int4       *arre;
+}  CHKVAL;
+
+/*
+ * is there value 'val' in array or not ?
+ */
+static bool
+checkcondition_arr(void *checkval, ITEM * val)
+{
+   int4       *StopLow = ((CHKVAL *) checkval)->arrb;
+   int4       *StopHigh = ((CHKVAL *) checkval)->arre;
+   int4       *StopMiddle;
+
+   /* Loop invariant: StopLow <= val < StopHigh */
+
+   while (StopLow < StopHigh)
+   {
+       StopMiddle = StopLow + (StopHigh - StopLow) / 2;
+       if (*StopMiddle == val->val)
+           return (true);
+       else if (*StopMiddle < val->val)
+           StopLow = StopMiddle + 1;
+       else
+           StopHigh = StopMiddle;
+   }
+
+   return (false);
+}
+
+static bool
+checkcondition_bit(void *checkval, ITEM * val)
+{
+   return GETBIT(checkval, HASHVAL(val->val));
+}
+
+Datum
+gtsvector_consistent(PG_FUNCTION_ARGS)
+{
+   QUERYTYPE  *query = (QUERYTYPE *) PG_GETARG_POINTER(1);
+   GISTTYPE   *key = (GISTTYPE *) DatumGetPointer(
+                               ((GISTENTRY *) PG_GETARG_POINTER(0))->key
+   );
+
+   if (!query->size)
+       PG_RETURN_BOOL(false);
+
+   if (ISSIGNKEY(key))
+   {
+       if (ISALLTRUE(key))
+           PG_RETURN_BOOL(true);
+
+       PG_RETURN_BOOL(TS_execute(
+                              GETQUERY(query),
+                              (void *) GETSIGN(key), false,
+                              checkcondition_bit
+                              ));
+   }
+   else
+   {                           /* only leaf pages */
+       CHKVAL      chkval;
+
+       chkval.arrb = GETARR(key);
+       chkval.arre = chkval.arrb + ARRNELEM(key);
+       PG_RETURN_BOOL(TS_execute(
+                              GETQUERY(query),
+                              (void *) &chkval, true,
+                              checkcondition_arr
+                              ));
+   }
+}
+
+static int4
+unionkey(BITVECP sbase, GISTTYPE * add)
+{
+   int4        i;
+
+   if (ISSIGNKEY(add))
+   {
+       BITVECP     sadd = GETSIGN(add);
+
+       if (ISALLTRUE(add))
+           return 1;
+
+       LOOPBYTE(
+                sbase[i] |= sadd[i];
+       );
+   }
+   else
+   {
+       int4       *ptr = GETARR(add);
+
+       for (i = 0; i < ARRNELEM(add); i++)
+           HASH(sbase, ptr[i]);
+   }
+   return 0;
+}
+
+
+Datum
+gtsvector_union(PG_FUNCTION_ARGS)
+{
+   bytea      *entryvec = (bytea *) PG_GETARG_POINTER(0);
+   int        *size = (int *) PG_GETARG_POINTER(1);
+   BITVEC      base;
+   int4        len = (VARSIZE(entryvec) - VARHDRSZ) / sizeof(GISTENTRY);
+   int4        i;
+   int4        flag = 0;
+   GISTTYPE   *result;
+
+   MemSet((void *) base, 0, sizeof(BITVEC));
+   for (i = 0; i < len; i++)
+   {
+       if (unionkey(base, GETENTRY(entryvec, i)))
+       {
+           flag = ALLISTRUE;
+           break;
+       }
+   }
+
+   flag |= SIGNKEY;
+   len = CALCGTSIZE(flag, 0);
+   result = (GISTTYPE *) palloc(len);
+   *size = result->len = len;
+   result->flag = flag;
+   if (!ISALLTRUE(result))
+       memcpy((void *) GETSIGN(result), (void *) base, sizeof(BITVEC));
+
+   PG_RETURN_POINTER(result);
+}
+
+Datum
+gtsvector_same(PG_FUNCTION_ARGS)
+{
+   GISTTYPE   *a = (GISTTYPE *) PG_GETARG_POINTER(0);
+   GISTTYPE   *b = (GISTTYPE *) PG_GETARG_POINTER(1);
+   bool       *result = (bool *) PG_GETARG_POINTER(2);
+
+   if (ISSIGNKEY(a))
+   {                           /* then b also ISSIGNKEY */
+       if (ISALLTRUE(a) && ISALLTRUE(b))
+           *result = true;
+       else if (ISALLTRUE(a))
+           *result = false;
+       else if (ISALLTRUE(b))
+           *result = false;
+       else
+       {
+           int4        i;
+           BITVECP     sa = GETSIGN(a),
+                       sb = GETSIGN(b);
+
+           *result = true;
+           LOOPBYTE(
+                    if (sa[i] != sb[i])
+                    {
+               *result = false;
+               break;
+           }
+           );
+       }
+   }
+   else
+   {                           /* a and b ISARRKEY */
+       int4        lena = ARRNELEM(a),
+                   lenb = ARRNELEM(b);
+
+       if (lena != lenb)
+           *result = false;
+       else
+       {
+           int4       *ptra = GETARR(a),
+                      *ptrb = GETARR(b);
+           int4        i;
+
+           *result = true;
+           for (i = 0; i < lena; i++)
+               if (ptra[i] != ptrb[i])
+               {
+                   *result = false;
+                   break;
+               }
+       }
+   }
+
+   PG_RETURN_POINTER(result);
+}
+
+static int4
+sizebitvec(BITVECP sign)
+{
+   int4        size = 0,
+               i;
+
+   LOOPBYTE(
+       size += SUMBIT(*(char *) sign);
+       sign = (BITVECP) (((char *) sign) + 1);
+   );
+   return size;
+}
+
+static int
+hemdistsign(BITVECP  a, BITVECP b) {
+   int i,dist=0;
+
+   LOOPBIT(
+       if ( GETBIT(a,i) != GETBIT(b,i) )
+           dist++;
+   );
+   return dist;
+}
+
+static int
+hemdist(GISTTYPE   *a, GISTTYPE   *b) {
+   if ( ISALLTRUE(a) ) {
+       if (ISALLTRUE(b))
+           return 0;
+       else
+           return SIGLENBIT-sizebitvec(GETSIGN(b));
+   } else if (ISALLTRUE(b))
+       return SIGLENBIT-sizebitvec(GETSIGN(a));
+
+   return hemdistsign( GETSIGN(a), GETSIGN(b) );
+}
+
+Datum
+gtsvector_penalty(PG_FUNCTION_ARGS)
+{
+   GISTENTRY  *origentry = (GISTENTRY *) PG_GETARG_POINTER(0); /* always ISSIGNKEY */
+   GISTENTRY  *newentry = (GISTENTRY *) PG_GETARG_POINTER(1);
+   float      *penalty = (float *) PG_GETARG_POINTER(2);
+   GISTTYPE   *origval = (GISTTYPE *) DatumGetPointer(origentry->key);
+   GISTTYPE   *newval = (GISTTYPE *) DatumGetPointer(newentry->key);
+   BITVECP     orig = GETSIGN(origval);
+
+   *penalty = 0.0;
+
+   if (ISARRKEY(newval)) {
+       BITVEC sign;
+       makesign(sign, newval);
+
+       if ( ISALLTRUE(origval) ) 
+           *penalty=((float)(SIGLENBIT-sizebitvec(sign)))/(float)(SIGLENBIT+1);
+       else 
+           *penalty=hemdistsign(sign,orig);
+   } else {
+       *penalty=hemdist(origval,newval);
+   }
+   PG_RETURN_POINTER(penalty);
+}
+
+typedef struct
+{
+   bool        allistrue;
+   BITVEC      sign;
+}  CACHESIGN;
+
+static void
+fillcache(CACHESIGN * item, GISTTYPE * key)
+{
+   item->allistrue = false;
+   if (ISARRKEY(key))
+       makesign(item->sign, key);
+   else if (ISALLTRUE(key))
+       item->allistrue = true;
+   else
+       memcpy((void *) item->sign, (void *) GETSIGN(key), sizeof(BITVEC));
+}
+
+#define WISH_F(a,b,c) (double)( -(double)(((a)-(b))*((a)-(b))*((a)-(b)))*(c) )
+typedef struct
+{
+   OffsetNumber pos;
+   int4        cost;
+} SPLITCOST;
+
+static int
+comparecost(const void *a, const void *b)
+{
+   if (((SPLITCOST *) a)->cost == ((SPLITCOST *) b)->cost)
+       return 0;
+   else
+       return (((SPLITCOST *) a)->cost > ((SPLITCOST *) b)->cost) ? 1 : -1;
+}
+
+
+static int
+hemdistcache(CACHESIGN   *a, CACHESIGN   *b) {
+   if ( a->allistrue ) {
+       if (b->allistrue)
+           return 0;
+       else
+           return SIGLENBIT-sizebitvec(b->sign);
+   } else if (b->allistrue)
+       return SIGLENBIT-sizebitvec(a->sign);
+
+   return hemdistsign( a->sign, b->sign );
+}
+
+Datum
+gtsvector_picksplit(PG_FUNCTION_ARGS)
+{
+   bytea      *entryvec = (bytea *) PG_GETARG_POINTER(0);
+   GIST_SPLITVEC *v = (GIST_SPLITVEC *) PG_GETARG_POINTER(1);
+   OffsetNumber k,
+               j;
+   GISTTYPE   *datum_l,
+              *datum_r;
+   BITVECP     union_l,
+               union_r;
+   int4        size_alpha,
+               size_beta;
+   int4        size_waste,
+               waste = -1;
+   int4        nbytes;
+   OffsetNumber seed_1 = 0,
+               seed_2 = 0;
+   OffsetNumber *left,
+              *right;
+   OffsetNumber maxoff;
+   BITVECP     ptr;
+   int         i;
+   CACHESIGN  *cache;
+   SPLITCOST  *costvector;
+
+   maxoff = ((VARSIZE(entryvec) - VARHDRSZ) / sizeof(GISTENTRY)) - 2;
+   nbytes = (maxoff + 2) * sizeof(OffsetNumber);
+   v->spl_left = (OffsetNumber *) palloc(nbytes);
+   v->spl_right = (OffsetNumber *) palloc(nbytes);
+
+   cache = (CACHESIGN *) palloc(sizeof(CACHESIGN) * (maxoff + 2));
+   fillcache(&cache[FirstOffsetNumber], GETENTRY(entryvec, FirstOffsetNumber));
+
+   for (k = FirstOffsetNumber; k < maxoff; k = OffsetNumberNext(k)) {
+       for (j = OffsetNumberNext(k); j <= maxoff; j = OffsetNumberNext(j)) {
+           if (k == FirstOffsetNumber)
+               fillcache(&cache[j], GETENTRY(entryvec, j));
+
+           size_waste=hemdistcache(&(cache[j]),&(cache[k]));
+           if (size_waste > waste) {
+               waste = size_waste;
+               seed_1 = k;
+               seed_2 = j;
+           }
+       }
+   }
+
+   left = v->spl_left;
+   v->spl_nleft = 0;
+   right = v->spl_right;
+   v->spl_nright = 0;
+
+   if (seed_1 == 0 || seed_2 == 0) {
+       seed_1 = 1;
+       seed_2 = 2;
+   }
+
+   /* form initial .. */
+   if (cache[seed_1].allistrue) {
+       datum_l = (GISTTYPE *) palloc(CALCGTSIZE(SIGNKEY | ALLISTRUE, 0));
+       datum_l->len = CALCGTSIZE(SIGNKEY | ALLISTRUE, 0);
+       datum_l->flag = SIGNKEY | ALLISTRUE;
+   } else {
+       datum_l = (GISTTYPE *) palloc(CALCGTSIZE(SIGNKEY, 0));
+       datum_l->len = CALCGTSIZE(SIGNKEY, 0);
+       datum_l->flag = SIGNKEY;
+       memcpy((void *) GETSIGN(datum_l), (void *) cache[seed_1].sign, sizeof(BITVEC));
+   }
+   if (cache[seed_2].allistrue) {
+       datum_r = (GISTTYPE *) palloc(CALCGTSIZE(SIGNKEY | ALLISTRUE, 0));
+       datum_r->len = CALCGTSIZE(SIGNKEY | ALLISTRUE, 0);
+       datum_r->flag = SIGNKEY | ALLISTRUE;
+   } else {
+       datum_r = (GISTTYPE *) palloc(CALCGTSIZE(SIGNKEY, 0));
+       datum_r->len = CALCGTSIZE(SIGNKEY, 0);
+       datum_r->flag = SIGNKEY;
+       memcpy((void *) GETSIGN(datum_r), (void *) cache[seed_2].sign, sizeof(BITVEC));
+   }
+
+   union_l=GETSIGN(datum_l);
+   union_r=GETSIGN(datum_r);
+   maxoff = OffsetNumberNext(maxoff);
+   fillcache(&cache[maxoff], GETENTRY(entryvec, maxoff));
+   /* sort before ... */
+   costvector = (SPLITCOST *) palloc(sizeof(SPLITCOST) * maxoff);
+   for (j = FirstOffsetNumber; j <= maxoff; j = OffsetNumberNext(j)) {
+       costvector[j - 1].pos = j;
+       size_alpha = hemdistcache(&(cache[seed_1]), &(cache[j]));
+       size_beta  = hemdistcache(&(cache[seed_2]), &(cache[j]));
+       costvector[j - 1].cost = abs(size_alpha - size_beta);
+   }
+   qsort((void *) costvector, maxoff, sizeof(SPLITCOST), comparecost);
+
+   for (k = 0; k < maxoff; k++) {
+       j = costvector[k].pos;
+       if (j == seed_1) {
+           *left++ = j;
+           v->spl_nleft++;
+           continue;
+       } else if (j == seed_2) {
+           *right++ = j;
+           v->spl_nright++;
+           continue;
+       }
+
+       if (ISALLTRUE(datum_l) || cache[j].allistrue) {
+           if ( ISALLTRUE(datum_l) && cache[j].allistrue )
+               size_alpha=0;
+           else
+               size_alpha = SIGLENBIT-sizebitvec(  
+                   ( cache[j].allistrue ) ? GETSIGN(datum_l) : GETSIGN(cache[j].sign)  
+               );
+       } else {
+           size_alpha=hemdistsign(cache[j].sign,GETSIGN(datum_l));
+       }
+
+       if (ISALLTRUE(datum_r) || cache[j].allistrue) {
+           if ( ISALLTRUE(datum_r) && cache[j].allistrue )
+               size_beta=0;
+           else
+               size_beta = SIGLENBIT-sizebitvec(  
+                   ( cache[j].allistrue ) ? GETSIGN(datum_r) : GETSIGN(cache[j].sign)  
+               );
+       } else {
+           size_beta=hemdistsign(cache[j].sign,GETSIGN(datum_r));
+       }
+
+       if (size_alpha  < size_beta + WISH_F(v->spl_nleft, v->spl_nright, 0.1)) {
+           if (ISALLTRUE(datum_l) || cache[j].allistrue) {
+               if (! ISALLTRUE(datum_l) )
+                   MemSet((void *) GETSIGN(datum_l), 0xff, sizeof(BITVEC));
+           } else {
+               ptr=cache[j].sign;
+               LOOPBYTE(
+                   union_l[i] |= ptr[i];
+               );
+           }
+           *left++ = j;
+           v->spl_nleft++;
+       } else {
+           if (ISALLTRUE(datum_r) || cache[j].allistrue) {
+               if (! ISALLTRUE(datum_r) )
+                   MemSet((void *) GETSIGN(datum_r), 0xff, sizeof(BITVEC));
+           } else {
+               ptr=cache[j].sign;
+               LOOPBYTE(
+                   union_r[i] |= ptr[i];
+               );
+           }
+           *right++ = j;
+           v->spl_nright++;
+       }
+   }
+
+   *right = *left = FirstOffsetNumber;
+   pfree(costvector);
+   pfree(cache);
+   v->spl_ldatum = PointerGetDatum(datum_l);
+   v->spl_rdatum = PointerGetDatum(datum_r);
+
+   PG_RETURN_POINTER(v);
+}


diff --git a/contrib/tsearch2/gistidx.h b/contrib/tsearch2/gistidx.h

new file mode 100644 (file)

index 0000000..d081c74


--- /dev/null
+++ b/contrib/tsearch2/gistidx.h
@@ -0,0 +1,67 @@
+#ifndef __GISTIDX_H__
+#define __GISTIDX_H__
+
+/*
+#define GISTIDX_DEBUG
+*/
+
+/*
+ * signature defines
+ */
+
+#define BITBYTE 8
+#define SIGLENINT  63          /* >121 => key will toast, so it will not
+                                * work !!! */
+#define SIGLEN ( sizeof(int4)*SIGLENINT )
+#define SIGLENBIT (SIGLEN*BITBYTE)
+
+typedef char BITVEC[SIGLEN];
+typedef char *BITVECP;
+
+#define LOOPBYTE(a) \
+       for(i=0;i
+               a;\
+       }
+#define LOOPBIT(a) \
+               for(i=0;i
+                               a;\
+               }
+
+#define GETBYTE(x,i) ( *( (BITVECP)(x) + (int)( (i) / BITBYTE ) ) )
+#define GETBITBYTE(x,i) ( ((char)(x)) >> i & 0x01 )
+#define CLRBIT(x,i)   GETBYTE(x,i) &= ~( 0x01 << ( (i) % BITBYTE ) )
+#define SETBIT(x,i)   GETBYTE(x,i) |=  ( 0x01 << ( (i) % BITBYTE ) )
+#define GETBIT(x,i) ( (GETBYTE(x,i) >> ( (i) % BITBYTE )) & 0x01 )
+
+#define abs(a)         ((a) <  (0) ? -(a) : (a))
+#define min(a,b)           ((a) <  (b) ? (a) : (b))
+#define HASHVAL(val) (((unsigned int)(val)) % SIGLENBIT)
+#define HASH(sign, val) SETBIT((sign), HASHVAL(val))
+
+
+/*
+ * type of index key
+ */
+typedef struct
+{
+   int4        len;
+   int4        flag;
+   char        data[1];
+}  GISTTYPE;
+
+#define ARRKEY     0x01
+#define SIGNKEY        0x02
+#define ALLISTRUE  0x04
+
+#define ISARRKEY(x) ( ((GISTTYPE*)x)->flag & ARRKEY )
+#define ISSIGNKEY(x)   ( ((GISTTYPE*)x)->flag & SIGNKEY )
+#define ISALLTRUE(x)   ( ((GISTTYPE*)x)->flag & ALLISTRUE )
+
+#define GTHDRSIZE  ( sizeof(int4)*2  )
+#define CALCGTSIZE(flag, len) ( GTHDRSIZE + ( ( (flag) & ARRKEY ) ? ((len)*sizeof(int4)) : (((flag) & ALLISTRUE) ? 0 : SIGLEN) ) )
+
+#define GETSIGN(x) ( (BITVECP)( (char*)x+GTHDRSIZE ) )
+#define GETARR(x)  ( (int4*)( (char*)x+GTHDRSIZE ) )
+#define ARRNELEM(x) ( ( ((GISTTYPE*)x)->len - GTHDRSIZE )/sizeof(int4) )
+
+#endif


diff --git a/contrib/tsearch2/ispell/spell.c b/contrib/tsearch2/ispell/spell.c

new file mode 100644 (file)

index 0000000..3cf2cc8


--- /dev/null
+++ b/contrib/tsearch2/ispell/spell.c
@@ -0,0 +1,520 @@
+#include 
+#include 
+#include 
+#include 
+
+#include "postgres.h"
+
+#include "spell.h"
+
+#define MAXNORMLEN 56
+
+#define STRNCASECMP(x,y)        (strncasecmp(x,y,strlen(y)))
+
+static int cmpspell(const void *s1,const void *s2){
+   return(strcmp(((const SPELL*)s1)->word,((const SPELL*)s2)->word));
+}
+
+static void 
+strlower( char * str ) {
+   unsigned char *ptr = (unsigned char *)str;
+   while ( *ptr ) {
+       *ptr = tolower( *ptr );
+       ptr++;
+   }
+}
+
+/* backward string compaire for suffix tree operations */
+static int 
+strbcmp(const char *s1, const char *s2) { 
+   int l1 = strlen(s1)-1, l2 = strlen(s2)-1;
+   while (l1 >= 0 && l2 >= 0) {
+       if (s1[l1] < s2[l2]) return -1;
+       if (s1[l1] > s2[l2]) return 1;
+       l1--; l2--;
+   }
+   if (l1 < l2) return -1;
+   if (l1 > l2) return 1;
+
+   return 0;
+}
+static int 
+strbncmp(const char *s1, const char *s2, size_t count) { 
+   int l1 = strlen(s1) - 1, l2 = strlen(s2) - 1, l = count;
+   while (l1 >= 0 && l2 >= 0 && l > 0) {
+       if (s1[l1] < s2[l2]) return -1;
+       if (s1[l1] > s2[l2]) return 1;
+       l1--;
+       l2--;
+       l--;
+   }
+   if (l == 0) return 0;
+   if (l1 < l2) return -1;
+   if (l1 > l2) return 1;
+   return 0;
+}
+
+static int 
+cmpaffix(const void *s1,const void *s2){
+   if (((const AFFIX*)s1)->type < ((const AFFIX*)s2)->type) return -1;
+   if (((const AFFIX*)s1)->type > ((const AFFIX*)s2)->type) return 1;
+   if (((const AFFIX*)s1)->type == 'p')
+       return(strcmp(((const AFFIX*)s1)->repl,((const AFFIX*)s2)->repl));
+   else 
+       return(strbcmp(((const AFFIX*)s1)->repl,((const AFFIX*)s2)->repl));
+}
+
+int 
+AddSpell(IspellDict * Conf,const char * word,const char *flag){
+   if(Conf->nspell>=Conf->mspell){
+       if(Conf->mspell){
+           Conf->mspell+=1024*20;
+           Conf->Spell=(SPELL *)realloc(Conf->Spell,Conf->mspell*sizeof(SPELL));
+       }else{
+           Conf->mspell=1024*20;
+           Conf->Spell=(SPELL *)malloc(Conf->mspell*sizeof(SPELL));
+       }
+       if ( Conf->Spell == NULL )
+           elog(ERROR,"No memory for AddSpell"); 
+   }
+   Conf->Spell[Conf->nspell].word=strdup(word);
+   if ( !Conf->Spell[Conf->nspell].word ) 
+       elog(ERROR,"No memory for AddSpell");
+   strncpy(Conf->Spell[Conf->nspell].flag,flag,10);
+   Conf->nspell++;
+   return(0);
+}
+
+
+int 
+ImportDictionary(IspellDict * Conf,const char *filename){
+   unsigned char str[BUFSIZ];  
+   FILE *dict;
+
+   if(!(dict=fopen(filename,"r")))return(1);
+   while(fgets(str,sizeof(str),dict)){
+       unsigned char *s;
+       const unsigned char *flag;
+
+           flag = NULL;
+       if((s=strchr(str,'/'))){
+           *s=0;
+           s++;flag=s;
+           while(*s){
+               if (((*s>='A')&&(*s<='Z'))||((*s>='a')&&(*s<='z')))
+                   s++;
+               else {
+                   *s=0;
+                   break;
+               }
+           }
+       }else{
+           flag="";
+       }
+       strlower(str);
+       /* Dont load words if first letter is not required */
+       /* It allows to optimize loading at  search time   */
+       s=str;
+       while(*s){
+           if(*s=='\r')*s=0;
+           if(*s=='\n')*s=0;
+           s++;
+       }
+       AddSpell(Conf,str,flag);
+   }
+   fclose(dict);
+   return(0);
+}
+
+
+static SPELL * 
+FindWord(IspellDict * Conf, const char *word, int affixflag) {
+   int l,c,r,resc,resl,resr, i;
+
+   i = (int)(*word) & 255;
+   l = Conf->SpellTree.Left[i];
+   r = Conf->SpellTree.Right[i];
+   if (l == -1) return (NULL);
+   while(l<=r){
+       c = (l + r) >> 1;
+       resc = strcmp(Conf->Spell[c].word, word);
+       if( (resc == 0) && 
+           ((affixflag == 0) || (strchr(Conf->Spell[c].flag, affixflag) != NULL)) ) {
+           return(&Conf->Spell[c]);
+       }
+       resl = strcmp(Conf->Spell[l].word, word);
+       if( (resl == 0) && 
+           ((affixflag == 0) || (strchr(Conf->Spell[l].flag, affixflag) != NULL)) ) {
+           return(&Conf->Spell[l]);
+       }
+       resr = strcmp(Conf->Spell[r].word, word);
+       if( (resr == 0) && 
+           ((affixflag == 0) || (strchr(Conf->Spell[r].flag, affixflag) != NULL)) ) {
+           return(&Conf->Spell[r]);
+       }
+       if(resc < 0){
+           l = c + 1;
+           r--;
+       } else if(resc > 0){
+           r = c - 1;
+           l++;
+       } else {
+           l++;
+           r--;
+       }
+   }
+   return(NULL);
+}
+
+int 
+AddAffix(IspellDict * Conf,int flag,const char *mask,const char *find,const char *repl,int type) {
+   if(Conf->naffixes>=Conf->maffixes){
+       if(Conf->maffixes){
+           Conf->maffixes+=16;
+           Conf->Affix = (AFFIX*)realloc((void*)Conf->Affix,Conf->maffixes*sizeof(AFFIX));
+       }else{
+           Conf->maffixes=16;
+           Conf->Affix = (AFFIX*)malloc(Conf->maffixes * sizeof(AFFIX));
+       }
+       if ( Conf->Affix == NULL ) 
+           elog(ERROR,"No memory for AddAffix");
+   }
+   if (type=='s') {
+       sprintf(Conf->Affix[Conf->naffixes].mask,"%s$",mask);
+   } else {
+       sprintf(Conf->Affix[Conf->naffixes].mask,"^%s",mask);
+   }
+   Conf->Affix[Conf->naffixes].compile = 1;
+   Conf->Affix[Conf->naffixes].flag=flag;
+   Conf->Affix[Conf->naffixes].type=type;
+   
+   strcpy(Conf->Affix[Conf->naffixes].find,find);
+   strcpy(Conf->Affix[Conf->naffixes].repl,repl);
+   Conf->Affix[Conf->naffixes].replen=strlen(repl);
+   Conf->naffixes++;
+   return(0);
+}
+
+static char * 
+remove_spaces(char *dist,char *src){
+char *d,*s;
+   d=dist;
+   s=src;
+   while(*s){
+       if(*s!=' '&&*s!='-'&&*s!='\t'){
+           *d=*s;
+           d++;
+       }
+       s++;
+   }
+   *d=0;
+   return(dist);
+}
+
+
+int 
+ImportAffixes(IspellDict * Conf,const char *filename){
+   unsigned char str[BUFSIZ];
+   unsigned char flag=0;
+   unsigned char mask[BUFSIZ]="";
+   unsigned char find[BUFSIZ]="";
+   unsigned char repl[BUFSIZ]="";
+   unsigned char *s;
+   int i;
+   int suffixes=0;
+   int prefixes=0;
+   FILE *affix;
+
+   if(!(affix=fopen(filename,"r")))
+       return(1);
+
+   while(fgets(str,sizeof(str),affix)){
+       if(!STRNCASECMP(str,"suffixes")){
+           suffixes=1;
+           prefixes=0;
+           continue;
+       }
+       if(!STRNCASECMP(str,"prefixes")){
+           suffixes=0;
+           prefixes=1;
+           continue;
+       }
+       if(!STRNCASECMP(str,"flag ")){
+           s=str+5;
+           while(strchr("* ",*s))
+               s++;
+           flag=*s;
+           continue;
+       }
+       if((!suffixes)&&(!prefixes))continue;
+       if((s=strchr(str,'#')))*s=0;
+       if(!*str)continue;
+       strlower(str);
+       strcpy(mask,"");
+       strcpy(find,"");
+       strcpy(repl,"");
+       i=sscanf(str,"%[^>\n]>%[^,\n],%[^\n]",mask,find,repl);
+       remove_spaces(str,repl);strcpy(repl,str);
+       remove_spaces(str,find);strcpy(find,str);
+       remove_spaces(str,mask);strcpy(mask,str);
+       switch(i){
+           case 3:
+               break;
+           case 2:
+               if(*find != '\0'){
+                   strcpy(repl,find);
+                   strcpy(find,"");
+               }
+               break;
+           default:
+               continue;
+       }
+       
+       AddAffix(Conf,(int)flag,mask,find,repl,suffixes?'s':'p');
+       
+   }
+   fclose(affix);
+       
+   return(0);
+}
+
+void 
+SortDictionary(IspellDict * Conf){
+  int  CurLet = -1, Let;size_t i;
+
+        qsort((void*)Conf->Spell,Conf->nspell,sizeof(SPELL),cmpspell);
+
+   for(i = 0; i < 256 ; i++ )
+       Conf->SpellTree.Left[i] = -1;
+
+   for(i = 0; i < Conf->nspell; i++) {
+     Let = (int)(*(Conf->Spell[i].word)) & 255;
+     if (CurLet != Let) {
+       Conf->SpellTree.Left[Let] = i;
+       CurLet = Let;
+     }
+     Conf->SpellTree.Right[Let] = i;
+   }
+}
+
+void 
+SortAffixes(IspellDict * Conf) {
+  int   CurLetP = -1, CurLetS = -1, Let;
+  AFFIX *Affix; size_t i;
+  
+  if (Conf->naffixes > 1)
+    qsort((void*)Conf->Affix,Conf->naffixes,sizeof(AFFIX),cmpaffix);
+  for(i = 0; i < 256; i++) {
+      Conf->PrefixTree.Left[i] = Conf->PrefixTree.Right[i] = -1;
+      Conf->SuffixTree.Left[i] = Conf->SuffixTree.Right[i] = -1;
+  }
+
+  for(i = 0; i < Conf->naffixes; i++) {
+    Affix = &(((AFFIX*)Conf->Affix)[i]);
+    if(Affix->type == 'p') {
+      Let = (int)(*(Affix->repl)) & 255;
+      if (CurLetP != Let) {
+   Conf->PrefixTree.Left[Let] = i;
+   CurLetP = Let;
+      }
+      Conf->PrefixTree.Right[Let] = i;
+    } else {
+      Let = (Affix->replen) ? (int)(Affix->repl[Affix->replen-1]) & 255 : 0;
+      if (CurLetS != Let) {
+   Conf->SuffixTree.Left[Let] = i;
+   CurLetS = Let;
+      }
+      Conf->SuffixTree.Right[Let] = i;
+    }
+  }
+}
+
+static char * 
+CheckSuffix(const char *word, size_t len, AFFIX *Affix, int *res, IspellDict *Conf) {
+  regmatch_t subs[2]; /* workaround for apache&linux */
+  char newword[2*MAXNORMLEN] = "";
+  int err;
+  
+  *res = strbncmp(word, Affix->repl, Affix->replen);
+  if (*res < 0) {
+    return NULL;
+  }
+  if (*res > 0) {
+    return NULL;
+  }
+  strcpy(newword, word);
+  strcpy(newword+len-Affix->replen, Affix->find);
+
+  if (Affix->compile) {
+    err = regcomp(&(Affix->reg),Affix->mask,REG_EXTENDED|REG_ICASE|REG_NOSUB);
+    if(err){
+      /*regerror(err, &(Affix->reg), regerrstr, ERRSTRSIZE);*/
+      regfree(&(Affix->reg));
+      return(NULL);
+    }
+    Affix->compile = 0;
+  }
+  if(!(err=regexec(&(Affix->reg),newword,1,subs,0))){
+    if(FindWord(Conf, newword, Affix->flag))
+   return pstrdup(newword);    
+  }
+  return NULL;
+}
+
+#define NS 1
+#define MAX_NORM 512
+static int 
+CheckPrefix(const char *word, size_t len, AFFIX *Affix, IspellDict *Conf, int pi,
+       char **forms, char ***cur ) {
+  regmatch_t subs[NS*2];
+  char newword[2*MAXNORMLEN] = "";
+  int err, ls, res, lres;
+  size_t newlen;
+  AFFIX *CAffix = Conf->Affix;
+  
+  res = strncmp(word, Affix->repl, Affix->replen);
+  if (res != 0) {
+    return res;
+  }
+  strcpy(newword, Affix->find);
+  strcat(newword, word+Affix->replen);
+
+  if (Affix->compile) {
+    err = regcomp(&(Affix->reg),Affix->mask,REG_EXTENDED|REG_ICASE|REG_NOSUB);
+    if(err){
+      /*regerror(err, &(Affix->reg), regerrstr, ERRSTRSIZE);*/
+      regfree(&(Affix->reg));
+      return (0);
+    }
+    Affix->compile = 0;
+  }
+  if(!(err=regexec(&(Affix->reg),newword,1,subs,0))){
+    SPELL * curspell;
+
+    if((curspell=FindWord(Conf, newword, Affix->flag))){
+      if ((*cur - forms) < (MAX_NORM-1)) {
+   **cur =  pstrdup(newword);
+   (*cur)++; **cur = NULL;
+      }
+    } 
+    newlen = strlen(newword);
+    ls = Conf->SuffixTree.Left[pi];
+      if ( ls>=0 && ((*cur - forms) < (MAX_NORM-1)) ) {
+   **cur = CheckSuffix(newword, newlen, &CAffix[ls], &lres, Conf);
+   if (**cur) {
+     (*cur)++; **cur = NULL;
+   }
+      }
+  }
+  return 0;
+}
+
+
+char ** 
+NormalizeWord(IspellDict * Conf,char *word){
+/*regmatch_t subs[NS];*/
+size_t len;
+char ** forms;
+char **cur;
+AFFIX * Affix;
+int ri, pi, ipi, lp, rp, cp, ls, rs;
+int lres, rres, cres = 0;
+  SPELL *spell;
+
+   len=strlen(word);
+   if (len > MAXNORMLEN)
+       return(NULL);
+
+   strlower(word);
+
+   forms=(char **) palloc(MAX_NORM*sizeof(char **));
+   cur=forms;*cur=NULL;
+
+   ri = (int)(*word) & 255;
+   pi = (int)(word[strlen(word)-1]) & 255;
+   Affix=(AFFIX*)Conf->Affix;
+
+   /* Check that the word itself is normal form */
+   if((spell = FindWord(Conf, word, 0))){
+       *cur=pstrdup(word);
+       cur++;*cur=NULL;
+   }
+
+   /* Find all other NORMAL forms of the 'word' */
+
+   for (ipi = 0; ipi <= pi; ipi += pi) {
+
+       /* check prefix */
+       lp = Conf->PrefixTree.Left[ri];
+       rp = Conf->PrefixTree.Right[ri];
+       while (lp >= 0 && lp <= rp) {
+         cp = (lp + rp) >> 1;
+         cres = 0;
+         if ((cur - forms) < (MAX_NORM-1)) {
+       cres = CheckPrefix(word, len, &Affix[cp], Conf, ipi, forms, &cur);
+         }
+         if ((lp < cp) && ((cur - forms) < (MAX_NORM-1)) ) {
+       lres = CheckPrefix(word, len, &Affix[lp], Conf, ipi, forms, &cur);
+         }
+         if ( (rp > cp) && ((cur - forms) < (MAX_NORM-1)) ) {
+       rres = CheckPrefix(word, len, &Affix[rp], Conf, ipi, forms, &cur);
+         }
+         if (cres < 0) {
+       rp = cp - 1;
+       lp++;
+         } else if (cres > 0) {
+       lp = cp + 1;
+       rp--;
+         } else {
+       lp++;
+       rp--;
+         }
+       }
+
+       /* check suffix */
+       ls = Conf->SuffixTree.Left[ipi];
+       rs = Conf->SuffixTree.Right[ipi];
+       while (ls >= 0 && ls <= rs) {
+         if (  ((cur - forms) < (MAX_NORM-1)) ) {
+       *cur = CheckSuffix(word, len, &Affix[ls], &lres, Conf);
+       if (*cur) {
+         cur++; *cur = NULL;
+       }
+         }
+         if ( (rs > ls) && ((cur - forms) < (MAX_NORM-1)) ) {
+       *cur = CheckSuffix(word, len, &Affix[rs], &rres, Conf);
+       if (*cur) {
+         cur++; *cur = NULL;
+       }
+         }
+         ls++;
+         rs--;
+       } /* end while */
+     
+   } /* for ipi */
+
+   if(cur==forms){
+       pfree(forms);
+       return(NULL);
+   }
+   return(forms);
+}
+
+void 
+FreeIspell (IspellDict *Conf) {
+  int i;
+  AFFIX *Affix = (AFFIX *)Conf->Affix;
+
+  for (i = 0; i < Conf->naffixes; i++) {
+    if (Affix[i].compile == 0) {
+      regfree(&(Affix[i].reg));
+    }
+  }
+  for (i = 0; i < Conf->naffixes; i++) {
+   free( Conf->Spell[i].word );
+  }
+  free(Conf->Affix);
+  free(Conf->Spell);
+  memset( (void*)Conf, 0, sizeof(IspellDict) );
+  return;
+}


diff --git a/contrib/tsearch2/ispell/spell.h b/contrib/tsearch2/ispell/spell.h

new file mode 100644 (file)

index 0000000..3034ca6


--- /dev/null
+++ b/contrib/tsearch2/ispell/spell.h
@@ -0,0 +1,51 @@
+#ifndef __SPELL_H__
+#define __SPELL_H__
+
+#include 
+#include 
+
+typedef struct spell_struct {
+        char * word; 
+        char flag[10];
+} SPELL;
+
+typedef struct aff_struct {   
+        char flag;
+        char type;
+        char mask[33];
+        char find[16];
+        char repl[16];
+        regex_t reg;
+        size_t replen;
+        char compile;
+} AFFIX;
+
+typedef struct Tree_struct {
+        int Left[256], Right[256];
+} Tree_struct;
+
+typedef struct {
+   int maffixes;
+   int naffixes;
+   AFFIX * Affix;
+
+   int nspell;
+   int mspell;
+   SPELL   *Spell;
+   Tree_struct SpellTree;
+   Tree_struct PrefixTree;
+   Tree_struct SuffixTree;
+
+} IspellDict;
+
+char ** NormalizeWord(IspellDict * Conf,char *word);
+int ImportAffixes(IspellDict * Conf, const char *filename);
+int ImportDictionary(IspellDict * Conf,const char *filename);
+
+int  AddSpell(IspellDict * Conf,const char * word,const char *flag);
+int  AddAffix(IspellDict * Conf,int flag,const char *mask,const char *find,const char *repl,int type);
+void SortDictionary(IspellDict * Conf);
+void SortAffixes(IspellDict * Conf);
+void FreeIspell (IspellDict *Conf);
+
+#endif


diff --git a/contrib/tsearch2/prs_dcfg.c b/contrib/tsearch2/prs_dcfg.c

new file mode 100644 (file)

index 0000000..e4b0e8b


--- /dev/null
+++ b/contrib/tsearch2/prs_dcfg.c
@@ -0,0 +1,119 @@
+/* 
+ * Simple config parser 
+ * Teodor Sigaev 
+ */
+#include 
+#include 
+#include 
+
+#include "postgres.h"
+
+#include "dict.h"
+#include "common.h"
+
+#define CS_WAITKEY 0
+#define CS_INKEY   1
+#define CS_WAITEQ  2
+#define CS_WAITVALUE   3
+#define CS_INVALUE 4
+#define CS_IN2VALUE    5
+#define CS_WAITDELIM   6
+#define CS_INESC   7
+#define CS_IN2ESC  8
+
+static char *
+nstrdup(char *ptr, int len) {
+   char *res=palloc(len+1), *cptr;
+   memcpy(res,ptr,len);
+   res[len]='\0';
+   cptr = ptr = res;
+   while(*ptr) {
+       if ( *ptr == '\\' ) 
+           ptr++;
+       *cptr=*ptr; ptr++; cptr++;
+   }
+   *cptr='\0';
+
+   return res;
+}
+
+void
+parse_cfgdict(text *in, Map **m) {
+   Map *mptr;
+   char *ptr=VARDATA(in), *begin=NULL;
+   char num=0;
+   int state=CS_WAITKEY;
+
+   while( ptr-VARDATA(in) < VARSIZE(in) - VARHDRSZ ) {
+       if ( *ptr==',' ) num++;
+       ptr++;
+   }
+
+   *m=mptr=(Map*)palloc( sizeof(Map)*(num+2) );
+   memset(mptr, 0, sizeof(Map)*(num+2) );
+   ptr=VARDATA(in);
+   while( ptr-VARDATA(in) < VARSIZE(in) - VARHDRSZ ) {
+       if (state==CS_WAITKEY) {
+           if (isalpha(*ptr)) {
+               begin=ptr;
+               state=CS_INKEY;
+           } else if ( !isspace(*ptr) )
+               elog(ERROR,"Syntax error in position %d near '%c'", ptr-VARDATA(in), *ptr);
+       } else if (state==CS_INKEY) {
+           if ( isspace(*ptr) ) {
+               mptr->key=nstrdup(begin, ptr-begin);
+               state=CS_WAITEQ;
+           } else if ( *ptr=='=' ) {
+               mptr->key=nstrdup(begin, ptr-begin);
+               state=CS_WAITVALUE;
+           } else if ( !isalpha(*ptr) ) 
+               elog(ERROR,"Syntax error in position %d near '%c'", ptr-VARDATA(in), *ptr);
+       } else if ( state==CS_WAITEQ ) {
+           if ( *ptr=='=' )
+               state=CS_WAITVALUE;
+           else if ( !isspace(*ptr) )
+               elog(ERROR,"Syntax error in position %d near '%c'", ptr-VARDATA(in), *ptr);
+       } else if ( state==CS_WAITVALUE ) {
+           if ( *ptr=='"' ) {
+               begin=ptr+1;
+               state=CS_INVALUE;
+           } else if ( !isspace(*ptr) ) {
+               begin=ptr;
+               state=CS_IN2VALUE;
+           }
+       } else if ( state==CS_INVALUE ) {
+           if ( *ptr=='"' ) {
+               mptr->value = nstrdup(begin, ptr-begin);
+               mptr++;
+               state=CS_WAITDELIM;
+           } else if ( *ptr=='\\' )
+               state=CS_INESC;
+       } else if ( state==CS_IN2VALUE ) {
+           if ( isspace(*ptr) || *ptr==',' ) {
+               mptr->value = nstrdup(begin, ptr-begin);
+               mptr++;
+               state=( *ptr==',' ) ? CS_WAITKEY : CS_WAITDELIM;
+           } else if ( *ptr=='\\' )
+               state=CS_INESC;
+       } else if ( state==CS_WAITDELIM ) {
+           if ( *ptr==',' ) 
+               state=CS_WAITKEY; 
+           else if ( !isspace(*ptr) )
+               elog(ERROR,"Syntax error in position %d near '%c'", ptr-VARDATA(in), *ptr);
+       } else if ( state == CS_INESC ) {
+           state=CS_INVALUE;
+       } else if ( state == CS_IN2ESC ) {
+           state=CS_IN2VALUE;
+       } else 
+           elog(ERROR,"Bad parser state: %d at position %d near '%c'", state, ptr-VARDATA(in), *ptr);
+       ptr++;
+   }
+
+   if (state==CS_IN2VALUE) {
+       mptr->value = nstrdup(begin, ptr-begin);
+       mptr++;
+   } else if ( !(state==CS_WAITDELIM || state==CS_WAITKEY) ) 
+       elog(ERROR,"Unexpected end of line");
+}
+
+


diff --git a/contrib/tsearch2/query.c b/contrib/tsearch2/query.c

new file mode 100644 (file)

index 0000000..8e714f2


--- /dev/null
+++ b/contrib/tsearch2/query.c
@@ -0,0 +1,862 @@
+/*
+ * IO definitions for tsquery and mtsquery. This type
+ * are identical, but for parsing mtsquery used parser for text
+ * and also morphology is used.
+ * Internal structure:
+ * query tree, then string with original value.
+ * Query tree with plain view. It's means that in array of nodes
+ * right child is always next and left position = item+item->left
+ * Teodor Sigaev 
+ */
+#include "postgres.h"
+
+#include 
+#include 
+
+#include "access/gist.h"
+#include "access/itup.h"
+#include "access/rtree.h"
+#include "utils/elog.h"
+#include "utils/palloc.h"
+#include "utils/array.h"
+#include "utils/builtins.h"
+#include "storage/bufpage.h"
+
+#include "ts_cfg.h"
+#include "tsvector.h"
+#include "crc32.h"
+#include "query.h"
+#include "rewrite.h"
+#include "common.h"
+
+
+PG_FUNCTION_INFO_V1(tsquery_in);
+Datum      tsquery_in(PG_FUNCTION_ARGS);
+
+PG_FUNCTION_INFO_V1(tsquery_out);
+Datum      tsquery_out(PG_FUNCTION_ARGS);
+
+PG_FUNCTION_INFO_V1(exectsq);
+Datum      exectsq(PG_FUNCTION_ARGS);
+
+PG_FUNCTION_INFO_V1(rexectsq);
+Datum      rexectsq(PG_FUNCTION_ARGS);
+
+PG_FUNCTION_INFO_V1(tsquerytree);
+Datum      tsquerytree(PG_FUNCTION_ARGS);
+
+PG_FUNCTION_INFO_V1(to_tsquery);
+Datum      to_tsquery(PG_FUNCTION_ARGS);
+
+PG_FUNCTION_INFO_V1(to_tsquery_name);
+Datum      to_tsquery_name(PG_FUNCTION_ARGS);
+
+PG_FUNCTION_INFO_V1(to_tsquery_current);
+Datum      to_tsquery_current(PG_FUNCTION_ARGS);
+
+#define END            0
+#define ERR            1
+#define VAL            2
+#define OPR            3
+#define OPEN       4
+#define CLOSE      5
+#define VALTRUE        6           /* for stop words */
+#define VALFALSE   7
+
+/* parser's states */
+#define WAITOPERAND 1
+#define WAITOPERATOR   2
+
+/*
+ * node of query tree, also used
+ * for storing polish notation in parser
+ */
+typedef struct NODE
+{
+   int2        weight;
+   int2        type;
+   int4        val;
+   int2        distance;
+   int2        length;
+   struct NODE *next;
+}  NODE;
+
+typedef struct
+{
+   char       *buf;
+   int4        state;
+   int4        count;
+   /* reverse polish notation in list (for temprorary usage) */
+   NODE       *str;
+   /* number in str */
+   int4        num;
+
+   /* user-friendly operand */
+   int4        lenop;
+   int4        sumlen;
+   char       *op;
+   char       *curop;
+
+   /* state for value's parser */
+   TI_IN_STATE valstate;
+
+   /* tscfg */
+   int cfg_id;
+}  QPRS_STATE;
+
+static char*
+get_weight(char *buf, int2 *weight) {
+   *weight = 0;
+
+   if ( *buf != ':' )
+       return buf;
+
+   buf++;
+   while( *buf ) {
+       switch(tolower(*buf)) {
+           case 'a': *weight |= 1<<3; break; 
+           case 'b': *weight |= 1<<2; break; 
+           case 'c': *weight |= 1<<1; break; 
+           case 'd': *weight |= 1;    break;
+           default: return buf; 
+       }
+       buf++;
+   }
+   
+   return buf;
+}
+
+/*
+ * get token from query string
+ */
+static int4
+gettoken_query(QPRS_STATE * state, int4 *val, int4 *lenval, char **strval, int2 *weight)
+{
+   while (1)
+   {
+       switch (state->state)
+       {
+           case WAITOPERAND:
+               if (*(state->buf) == '!')
+               {
+                   (state->buf)++;
+                   *val = (int4) '!';
+                   return OPR;
+               }
+               else if (*(state->buf) == '(')
+               {
+                   state->count++;
+                   (state->buf)++;
+                   return OPEN;
+               } else if ( *(state->buf) == ':' ) {
+                   elog(ERROR,"Error at start of operand"); 
+               } else if (*(state->buf) != ' ') {
+                   state->valstate.prsbuf = state->buf;
+                   state->state = WAITOPERATOR;
+                   if (gettoken_tsvector(&(state->valstate)))
+                   {
+                       *strval = state->valstate.word;
+                       *lenval = state->valstate.curpos - state->valstate.word;
+                       state->buf = get_weight(state->valstate.prsbuf, weight);
+                       return VAL;
+                   }
+                   else
+                       elog(ERROR, "No operand");
+               }
+               break;
+           case WAITOPERATOR:
+               if (*(state->buf) == '&' || *(state->buf) == '|')
+               {
+                   state->state = WAITOPERAND;
+                   *val = (int4) *(state->buf);
+                   (state->buf)++;
+                   return OPR;
+               }
+               else if (*(state->buf) == ')')
+               {
+                   (state->buf)++;
+                   state->count--;
+                   return (state->count < 0) ? ERR : CLOSE;
+               }
+               else if (*(state->buf) == '\0')
+                   return (state->count) ? ERR : END;
+               else if (*(state->buf) != ' ')
+                   return ERR;
+               break;
+           default:
+               return ERR;
+               break;
+       }
+       (state->buf)++;
+   }
+   return END;
+}
+
+/*
+ * push new one in polish notation reverse view
+ */
+static void
+pushquery(QPRS_STATE * state, int4 type, int4 val, int4 distance, int4 lenval, int2 weight)
+{
+   NODE       *tmp = (NODE *) palloc(sizeof(NODE));
+
+   tmp->weight = weight;
+   tmp->type = type;
+   tmp->val = val;
+   if (distance >= MAXSTRPOS)
+       elog(ERROR, "Value is too big");
+   if (lenval >= MAXSTRLEN)
+       elog(ERROR, "Operand is too long");
+   tmp->distance = distance;
+   tmp->length = lenval;
+   tmp->next = state->str;
+   state->str = tmp;
+   state->num++;
+}
+
+/*
+ * This function is used for tsquery parsing
+ */
+static void
+pushval_asis(QPRS_STATE * state, int type, char *strval, int lenval, int2 weight)
+{
+   if (lenval >= MAXSTRLEN)
+       elog(ERROR, "Word is too long");
+
+   pushquery(state, type, crc32_sz((uint8 *) strval, lenval),
+             state->curop - state->op, lenval, weight);
+
+   while (state->curop - state->op + lenval + 1 >= state->lenop)
+   {
+       int4        tmp = state->curop - state->op;
+
+       state->lenop *= 2;
+       state->op = (char *) repalloc((void *) state->op, state->lenop);
+       state->curop = state->op + tmp;
+   }
+   memcpy((void *) state->curop, (void *) strval, lenval);
+   state->curop += lenval;
+   *(state->curop) = '\0';
+   state->curop++;
+   state->sumlen += lenval + 1;
+   return;
+}
+
+/*
+ * This function is used for morph parsing
+ */
+static void
+pushval_morph(QPRS_STATE * state, int typeval, char *strval, int lenval, int2 weight)
+{
+   int4        count = 0;
+   PRSTEXT         prs;
+
+   prs.lenwords = 32;
+   prs.curwords = 0;
+   prs.pos = 0;
+   prs.words = (WORD *) palloc(sizeof(WORD) * prs.lenwords);
+
+   parsetext_v2(findcfg(state->cfg_id), &prs, strval, lenval);
+
+   for(count=0;count
+       pushval_asis(state, VAL, prs.words[count].word, prs.words[count].len, weight);
+       pfree( prs.words[count].word );
+       if (count)
+           pushquery(state, OPR, (int4) '&', 0, 0, 0 );
+   }   
+   pfree(prs.words);
+
+   /* XXX */
+   if ( prs.curwords==0 ) 
+       pushval_asis(state, VALTRUE, 0, 0, 0);
+}
+
+#define STACKDEPTH 32
+/*
+ * make polish notaion of query
+ */
+static int4
+makepol(QPRS_STATE * state, void (*pushval) (QPRS_STATE *, int, char *, int, int2))
+{
+   int4        val,
+               type;
+   int4        lenval;
+   char       *strval;
+   int4        stack[STACKDEPTH];
+   int4        lenstack = 0;
+   int2        weight;
+
+   while ((type = gettoken_query(state, &val, &lenval, &strval, &weight)) != END)
+   {
+       switch (type)
+       {
+           case VAL:
+               (*pushval) (state, VAL, strval, lenval, weight);
+               while (lenstack && (stack[lenstack - 1] == (int4) '&' ||
+                                   stack[lenstack - 1] == (int4) '!'))
+               {
+                   lenstack--;
+                   pushquery(state, OPR, stack[lenstack], 0, 0, 0);
+               }
+               break;
+           case OPR:
+               if (lenstack && val == (int4) '|')
+                   pushquery(state, OPR, val, 0, 0, 0);
+               else
+               {
+                   if (lenstack == STACKDEPTH)
+                       elog(ERROR, "Stack too short");
+                   stack[lenstack] = val;
+                   lenstack++;
+               }
+               break;
+           case OPEN:
+               if (makepol(state, pushval) == ERR)
+                   return ERR;
+               if (lenstack && (stack[lenstack - 1] == (int4) '&' ||
+                                stack[lenstack - 1] == (int4) '!'))
+               {
+                   lenstack--;
+                   pushquery(state, OPR, stack[lenstack], 0, 0, 0);
+               }
+               break;
+           case CLOSE:
+               while (lenstack)
+               {
+                   lenstack--;
+                   pushquery(state, OPR, stack[lenstack], 0, 0, 0);
+               };
+               return END;
+               break;
+           case ERR:
+           default:
+               elog(ERROR, "Syntax error");
+               return ERR;
+
+       }
+   }
+   while (lenstack)
+   {
+       lenstack--;
+       pushquery(state, OPR, stack[lenstack], 0, 0, 0);
+   };
+   return END;
+}
+
+typedef struct
+{
+   WordEntry  *arrb;
+   WordEntry  *arre;
+   char       *values;
+   char       *operand;
+}  CHKVAL;
+
+/*
+ * compare 2 string values
+ */
+static int4
+ValCompare(CHKVAL * chkval, WordEntry * ptr, ITEM * item)
+{
+   if (ptr->len == item->length)
+       return strncmp(
+                      &(chkval->values[ptr->pos]),
+                      &(chkval->operand[item->distance]),
+                      item->length);
+
+   return (ptr->len > item->length) ? 1 : -1;
+}
+
+/*
+ * check weight info
+ */
+static bool
+checkclass_str(CHKVAL * chkval, WordEntry * val, ITEM * item) {
+   WordEntryPos *ptr = (WordEntryPos*) (chkval->values+val->pos+SHORTALIGN(val->len)+sizeof(uint16));
+   uint16  len = *( (uint16*) (chkval->values+val->pos+SHORTALIGN(val->len)) );
+   while (len--) {
+       if ( item->weight & ( 1<weight ) )
+           return true;
+       ptr++;
+   }
+   return false; 
+}
+
+/*
+ * is there value 'val' in array or not ?
+ */
+static bool
+checkcondition_str(void *checkval, ITEM * val)
+{
+   WordEntry  *StopLow = ((CHKVAL *) checkval)->arrb;
+   WordEntry  *StopHigh = ((CHKVAL *) checkval)->arre;
+   WordEntry  *StopMiddle;
+   int         difference;
+
+   /* Loop invariant: StopLow <= val < StopHigh */
+
+   while (StopLow < StopHigh)
+   {
+       StopMiddle = StopLow + (StopHigh - StopLow) / 2;
+       difference = ValCompare((CHKVAL *) checkval, StopMiddle, val);
+       if (difference == 0)
+           return ( val->weight && StopMiddle->haspos ) ? 
+               checkclass_str((CHKVAL *) checkval,StopMiddle, val) : true;
+       else if (difference < 0)
+           StopLow = StopMiddle + 1;
+       else
+           StopHigh = StopMiddle;
+   }
+
+   return (false);
+}
+
+/*
+ * check for boolean condition
+ */
+bool
+TS_execute(ITEM * curitem, void *checkval, bool calcnot, bool (*chkcond) (void *checkval, ITEM * val))
+{
+   if (curitem->type == VAL)
+       return (*chkcond) (checkval, curitem);
+   else if (curitem->val == (int4) '!')
+   {
+       return (calcnot) ?
+           ((TS_execute(curitem + 1, checkval, calcnot, chkcond)) ? false : true)
+           : true;
+   }
+   else if (curitem->val == (int4) '&')
+   {
+       if (TS_execute(curitem + curitem->left, checkval, calcnot, chkcond))
+           return TS_execute(curitem + 1, checkval, calcnot, chkcond);
+       else
+           return false;
+   }
+   else
+   {                           /* |-operator */
+       if (TS_execute(curitem + curitem->left, checkval, calcnot, chkcond))
+           return true;
+       else
+           return TS_execute(curitem + 1, checkval, calcnot, chkcond);
+   }
+   return false;
+}
+
+/*
+ * boolean operations
+ */
+Datum
+rexectsq(PG_FUNCTION_ARGS)
+{
+   return DirectFunctionCall2(
+                              exectsq,
+                              PG_GETARG_DATUM(1),
+                              PG_GETARG_DATUM(0)
+       );
+}
+
+Datum
+exectsq(PG_FUNCTION_ARGS)
+{
+   tsvector       *val = (tsvector *) DatumGetPointer(PG_DETOAST_DATUM(PG_GETARG_DATUM(0)));
+   QUERYTYPE  *query = (QUERYTYPE *) DatumGetPointer(PG_DETOAST_DATUM(PG_GETARG_DATUM(1)));
+   CHKVAL      chkval;
+   bool        result;
+
+   if (!val->size || !query->size)
+   {
+       PG_FREE_IF_COPY(val, 0);
+       PG_FREE_IF_COPY(query, 1);
+       PG_RETURN_BOOL(false);
+   }
+
+   chkval.arrb = ARRPTR(val);
+   chkval.arre = chkval.arrb + val->size;
+   chkval.values = STRPTR(val);
+   chkval.operand = GETOPERAND(query);
+   result = TS_execute(
+                    GETQUERY(query),
+                    &chkval,
+                    true,
+                    checkcondition_str
+       );
+
+   PG_FREE_IF_COPY(val, 0);
+   PG_FREE_IF_COPY(query, 1);
+   PG_RETURN_BOOL(result);
+}
+
+/*
+ * find left operand in polish notation view
+ */
+static void
+findoprnd(ITEM * ptr, int4 *pos)
+{
+#ifdef BS_DEBUG
+   elog(DEBUG3, (ptr[*pos].type == OPR) ?
+        "%d  %c" : "%d  %d ", *pos, ptr[*pos].val);
+#endif
+   if (ptr[*pos].type == VAL || ptr[*pos].type == VALTRUE)
+   {
+       ptr[*pos].left = 0;
+       (*pos)++;
+   }
+   else if (ptr[*pos].val == (int4) '!')
+   {
+       ptr[*pos].left = 1;
+       (*pos)++;
+       findoprnd(ptr, pos);
+   }
+   else
+   {
+       ITEM       *curitem = &ptr[*pos];
+       int4        tmp = *pos;
+
+       (*pos)++;
+       findoprnd(ptr, pos);
+       curitem->left = *pos - tmp;
+       findoprnd(ptr, pos);
+   }
+}
+
+
+/*
+ * input
+ */
+static QUERYTYPE *
+queryin(char *buf, void (*pushval) (QPRS_STATE *, int, char *, int, int2), int cfg_id)
+{
+   QPRS_STATE  state;
+   int4        i;
+   QUERYTYPE  *query;
+   int4        commonlen;
+   ITEM       *ptr;
+   NODE       *tmp;
+   int4        pos = 0;
+
+#ifdef BS_DEBUG
+   char        pbuf[16384],
+              *cur;
+#endif
+
+   /* init state */
+   state.buf = buf;
+   state.state = WAITOPERAND;
+   state.count = 0;
+   state.num = 0;
+   state.str = NULL;
+   state.cfg_id=cfg_id;
+
+   /* init value parser's state */
+   state.valstate.oprisdelim = true;
+   state.valstate.len = 32;
+   state.valstate.word = (char *) palloc(state.valstate.len);
+
+   /* init list of operand */
+   state.sumlen = 0;
+   state.lenop = 64;
+   state.curop = state.op = (char *) palloc(state.lenop);
+   *(state.curop) = '\0';
+
+   /* parse query & make polish notation (postfix, but in reverse order) */
+   makepol(&state, pushval);
+   pfree(state.valstate.word);
+   if (!state.num)
+       elog(ERROR, "Empty query");
+
+   /* make finish struct */
+   commonlen = COMPUTESIZE(state.num, state.sumlen);
+   query = (QUERYTYPE *) palloc(commonlen);
+   query->len = commonlen;
+   query->size = state.num;
+   ptr = GETQUERY(query);
+
+   /* set item in polish notation */
+   for (i = 0; i < state.num; i++)
+   {
+       ptr[i].weight = state.str->weight;
+       ptr[i].type = state.str->type;
+       ptr[i].val = state.str->val;
+       ptr[i].distance = state.str->distance;
+       ptr[i].length = state.str->length;
+       tmp = state.str->next;
+       pfree(state.str);
+       state.str = tmp;
+   }
+
+   /* set user friendly-operand view */
+   memcpy((void *) GETOPERAND(query), (void *) state.op, state.sumlen);
+   pfree(state.op);
+
+   /* set left operand's position for every operator */
+   pos = 0;
+   findoprnd(ptr, &pos);
+
+#ifdef BS_DEBUG
+   cur = pbuf;
+   *cur = '\0';
+   for (i = 0; i < query->size; i++)
+   {
+       if (ptr[i].type == OPR)
+           sprintf(cur, "%c(%d) ", ptr[i].val, ptr[i].left);
+       else
+           sprintf(cur, "%d(%s) ", ptr[i].val, GETOPERAND(query) + ptr[i].distance);
+       cur = strchr(cur, '\0');
+   }
+   elog(DEBUG3, "POR: %s", pbuf);
+#endif
+
+   return query;
+}
+
+/*
+ * in without morphology
+ */
+Datum
+tsquery_in(PG_FUNCTION_ARGS)
+{
+   PG_RETURN_POINTER(queryin((char *) PG_GETARG_POINTER(0), pushval_asis, 0));
+}
+
+/*
+ * out function
+ */
+typedef struct
+{
+   ITEM       *curpol;
+   char       *buf;
+   char       *cur;
+   char       *op;
+   int4        buflen;
+}  INFIX;
+
+#define RESIZEBUF(inf,addsize) \
+while( ( inf->cur - inf->buf ) + addsize + 1 >= inf->buflen ) \
+{ \
+   int4 len = inf->cur - inf->buf; \
+   inf->buflen *= 2; \
+   inf->buf = (char*) repalloc( (void*)inf->buf, inf->buflen ); \
+   inf->cur = inf->buf + len; \
+}
+
+/*
+ * recursive walk on tree and print it in
+ * infix (human-readable) view
+ */
+static void
+infix(INFIX * in, bool first)
+{
+   if (in->curpol->type == VAL)
+   {
+       char       *op = in->op + in->curpol->distance;
+
+       RESIZEBUF(in, in->curpol->length * 2 + 2 + 5);
+       *(in->cur) = '\'';
+       in->cur++;
+       while (*op)
+       {
+           if (*op == '\'')
+           {
+               *(in->cur) = '\\';
+               in->cur++;
+           }
+           *(in->cur) = *op;
+           op++;
+           in->cur++;
+       }
+       *(in->cur) = '\'';
+       in->cur++;
+       if ( in->curpol->weight ) {
+           *(in->cur) = ':'; in->cur++;
+           if ( in->curpol->weight & (1<<3) ) { *(in->cur) = 'A'; in->cur++; }
+           if ( in->curpol->weight & (1<<2) ) { *(in->cur) = 'B'; in->cur++; }
+           if ( in->curpol->weight & (1<<1) ) { *(in->cur) = 'C'; in->cur++; }
+           if ( in->curpol->weight & 1 )      { *(in->cur) = 'D'; in->cur++; }
+       }
+       *(in->cur) = '\0';
+       in->curpol++;
+   }
+   else if (in->curpol->val == (int4) '!')
+   {
+       bool        isopr = false;
+
+       RESIZEBUF(in, 1);
+       *(in->cur) = '!';
+       in->cur++;
+       *(in->cur) = '\0';
+       in->curpol++;
+       if (in->curpol->type == OPR)
+       {
+           isopr = true;
+           RESIZEBUF(in, 2);
+           sprintf(in->cur, "( ");
+           in->cur = strchr(in->cur, '\0');
+       }
+       infix(in, isopr);
+       if (isopr)
+       {
+           RESIZEBUF(in, 2);
+           sprintf(in->cur, " )");
+           in->cur = strchr(in->cur, '\0');
+       }
+   }
+   else
+   {
+       int4        op = in->curpol->val;
+       INFIX       nrm;
+
+       in->curpol++;
+       if (op == (int4) '|' && !first)
+       {
+           RESIZEBUF(in, 2);
+           sprintf(in->cur, "( ");
+           in->cur = strchr(in->cur, '\0');
+       }
+
+       nrm.curpol = in->curpol;
+       nrm.op = in->op;
+       nrm.buflen = 16;
+       nrm.cur = nrm.buf = (char *) palloc(sizeof(char) * nrm.buflen);
+
+       /* get right operand */
+       infix(&nrm, false);
+
+       /* get & print left operand */
+       in->curpol = nrm.curpol;
+       infix(in, false);
+
+       /* print operator & right operand */
+       RESIZEBUF(in, 3 + (nrm.cur - nrm.buf));
+       sprintf(in->cur, " %c %s", op, nrm.buf);
+       in->cur = strchr(in->cur, '\0');
+       pfree(nrm.buf);
+
+       if (op == (int4) '|' && !first)
+       {
+           RESIZEBUF(in, 2);
+           sprintf(in->cur, " )");
+           in->cur = strchr(in->cur, '\0');
+       }
+   }
+}
+
+
+Datum
+tsquery_out(PG_FUNCTION_ARGS)
+{
+   QUERYTYPE  *query = (QUERYTYPE *) DatumGetPointer(PG_DETOAST_DATUM(PG_GETARG_DATUM(0)));
+   INFIX       nrm;
+
+   if (query->size == 0)
+   {
+       char       *b = palloc(1);
+
+       *b = '\0';
+       PG_RETURN_POINTER(b);
+   }
+   nrm.curpol = GETQUERY(query);
+   nrm.buflen = 32;
+   nrm.cur = nrm.buf = (char *) palloc(sizeof(char) * nrm.buflen);
+   *(nrm.cur) = '\0';
+   nrm.op = GETOPERAND(query);
+   infix(&nrm, true);
+
+   PG_FREE_IF_COPY(query, 0);
+   PG_RETURN_POINTER(nrm.buf);
+}
+
+/*
+ * debug function, used only for view query
+ * which will be executed in non-leaf pages in index
+ */
+Datum
+tsquerytree(PG_FUNCTION_ARGS)
+{
+   QUERYTYPE  *query = (QUERYTYPE *) DatumGetPointer(PG_DETOAST_DATUM(PG_GETARG_DATUM(0)));
+   INFIX       nrm;
+   text       *res;
+   ITEM       *q;
+   int4        len;
+
+
+   if (query->size == 0)
+   {
+       res = (text *) palloc(VARHDRSZ);
+       VARATT_SIZEP(res) = VARHDRSZ;
+       PG_RETURN_POINTER(res);
+   }
+
+   q = clean_NOT_v2(GETQUERY(query), &len);
+
+   if (!q)
+   {
+       res = (text *) palloc(1 + VARHDRSZ);
+       VARATT_SIZEP(res) = 1 + VARHDRSZ;
+       *((char *) VARDATA(res)) = 'T';
+   }
+   else
+   {
+       nrm.curpol = q;
+       nrm.buflen = 32;
+       nrm.cur = nrm.buf = (char *) palloc(sizeof(char) * nrm.buflen);
+       *(nrm.cur) = '\0';
+       nrm.op = GETOPERAND(query);
+       infix(&nrm, true);
+
+       res = (text *) palloc(nrm.cur - nrm.buf + VARHDRSZ);
+       VARATT_SIZEP(res) = nrm.cur - nrm.buf + VARHDRSZ;
+       strncpy(VARDATA(res), nrm.buf, nrm.cur - nrm.buf);
+       pfree(q);
+   }
+
+   PG_FREE_IF_COPY(query, 0);
+
+   PG_RETURN_POINTER(res);
+}
+
+Datum
+to_tsquery(PG_FUNCTION_ARGS) {
+   text    *in = PG_GETARG_TEXT_P(1);
+   char *str;
+   QUERYTYPE  *query;
+   ITEM       *res;
+   int4        len;
+
+   str=text2char(in);
+   PG_FREE_IF_COPY(in,1);
+
+   query = queryin(str, pushval_morph, PG_GETARG_INT32(0));
+   res = clean_fakeval_v2(GETQUERY(query), &len);
+   if (!res)
+   {
+       query->len = HDRSIZEQT;
+       query->size = 0;
+       PG_RETURN_POINTER(query);
+   }
+   memcpy((void *) GETQUERY(query), (void *) res, len * sizeof(ITEM));
+   pfree(res);
+   PG_RETURN_POINTER(query);
+}
+
+Datum
+to_tsquery_name(PG_FUNCTION_ARGS) {
+   text *name=PG_GETARG_TEXT_P(0);
+   Datum res= DirectFunctionCall2(
+       to_tsquery,
+       Int32GetDatum( name2id_cfg(name) ),
+       PG_GETARG_DATUM(1)
+   );
+   
+   PG_FREE_IF_COPY(name,1);
+   PG_RETURN_DATUM(res);
+}
+
+Datum
+to_tsquery_current(PG_FUNCTION_ARGS) {
+   PG_RETURN_DATUM( DirectFunctionCall2(
+       to_tsquery,
+       Int32GetDatum( get_currcfg() ),
+       PG_GETARG_DATUM(0)
+   ));
+}
+
+


diff --git a/contrib/tsearch2/query.h b/contrib/tsearch2/query.h

new file mode 100644 (file)

index 0000000..c0715a2


--- /dev/null
+++ b/contrib/tsearch2/query.h
@@ -0,0 +1,55 @@
+#ifndef __QUERY_H__
+#define __QUERY_H__
+/*
+#define BS_DEBUG
+*/
+
+
+/*
+ * item in polish notation with back link
+ * to left operand
+ */
+typedef struct ITEM
+{
+   int8        type;
+   int8        weight;
+   int2        left;
+   int4        val;
+   /* user-friendly value, must correlate with WordEntry */
+   uint32  
+       unused:1,
+       length:11,
+       distance:20;
+}  ITEM;
+
+/*
+ *Storage:
+ * (len)(size)(array of ITEM)(array of operand in user-friendly form)
+ */
+typedef struct
+{
+   int4        len;
+   int4        size;
+   char        data[1];
+}  QUERYTYPE;
+
+#define HDRSIZEQT  ( 2*sizeof(int4) )
+#define COMPUTESIZE(size,lenofoperand) ( HDRSIZEQT + size * sizeof(ITEM) + lenofoperand )
+#define GETQUERY(x)  (ITEM*)( (char*)(x)+HDRSIZEQT )
+#define GETOPERAND(x)  ( (char*)GETQUERY(x) + ((QUERYTYPE*)x)->size * sizeof(ITEM) )
+
+#define ISOPERATOR(x) ( (x)=='!' || (x)=='&' || (x)=='|' || (x)=='(' || (x)==')' )
+
+#define END                0
+#define ERR                1
+#define VAL                2
+#define OPR                3
+#define OPEN           4
+#define CLOSE          5
+#define VALTRUE            6       /* for stop words */
+#define VALFALSE       7
+
+bool TS_execute(ITEM * curitem, void *checkval,
+       bool calcnot, bool (*chkcond) (void *checkval, ITEM * val));
+
+#endif


diff --git a/contrib/tsearch2/rank.c b/contrib/tsearch2/rank.c

new file mode 100644 (file)

index 0000000..b73f400


--- /dev/null
+++ b/contrib/tsearch2/rank.c
@@ -0,0 +1,591 @@
+/*
+ * Relevation
+ * Teodor Sigaev 
+ */
+#include "postgres.h"
+#include 
+
+#include "access/gist.h"
+#include "access/itup.h"
+#include "utils/elog.h"
+#include "utils/palloc.h"
+#include "utils/builtins.h"
+#include "fmgr.h"
+#include "funcapi.h"
+#include "storage/bufpage.h"
+#include "executor/spi.h"
+#include "commands/trigger.h"
+#include "nodes/pg_list.h"
+#include "catalog/namespace.h"
+
+#include "utils/array.h"
+
+#include "tsvector.h"
+#include "query.h"
+#include "common.h"
+
+PG_FUNCTION_INFO_V1(rank);
+Datum      rank(PG_FUNCTION_ARGS);
+
+PG_FUNCTION_INFO_V1(rank_def);
+Datum      rank_def(PG_FUNCTION_ARGS);
+
+PG_FUNCTION_INFO_V1(rank_cd);
+Datum      rank_cd(PG_FUNCTION_ARGS);
+
+PG_FUNCTION_INFO_V1(rank_cd_def);
+Datum      rank_cd_def(PG_FUNCTION_ARGS);
+
+PG_FUNCTION_INFO_V1(get_covers);
+Datum      get_covers(PG_FUNCTION_ARGS);
+
+static float weights[]={0.1, 0.2, 0.4, 1.0};
+
+#define wpos(wep)  ( w[ ((WordEntryPos*)(wep))->weight ] )
+
+#define DEF_NORM_METHOD    0
+
+/*
+ * Returns a weight of a word collocation
+ */
+static float4 word_distance ( int4 w ) {
+   if ( w>100 )
+   return 1e-30;
+
+   return 1.0/(1.005+0.05*exp( ((float4)w)/1.5-2) );
+}
+
+static int
+cnt_length( tsvector *t ) {
+   WordEntry   *ptr=ARRPTR(t), *end=(WordEntry*)STRPTR(t);
+   int len = 0, clen;
+
+   while(ptr < end) {
+       if ( (clen=POSDATALEN(t, ptr)) == 0 )
+           len += 1;
+       else
+           len += clen;
+       ptr++;
+   }
+
+   return len;
+}
+
+static int4
+WordECompareITEM(char *eval, char *qval, WordEntry * ptr, ITEM * item) {
+        if (ptr->len == item->length)
+                return strncmp(
+                                           eval + ptr->pos,
+                                           qval + item->distance,
+                                           item->length);
+
+        return (ptr->len > item->length) ? 1 : -1;
+}
+
+static WordEntry*
+find_wordentry(tsvector *t, QUERYTYPE *q, ITEM *item) {
+        WordEntry  *StopLow = ARRPTR(t);
+        WordEntry  *StopHigh = (WordEntry*)STRPTR(t);
+        WordEntry  *StopMiddle;
+        int                     difference;
+
+        /* Loop invariant: StopLow <= item < StopHigh */
+
+        while (StopLow < StopHigh)
+        {
+                StopMiddle = StopLow + (StopHigh - StopLow) / 2;
+                difference = WordECompareITEM(STRPTR(t), GETOPERAND(q), StopMiddle, item);
+                if (difference == 0)
+                        return StopMiddle;
+                else if (difference < 0)
+                        StopLow = StopMiddle + 1;
+                else
+                        StopHigh = StopMiddle;
+        }
+
+        return NULL;
+}
+
+static WordEntryPos    POSNULL[]={
+   {0,0},
+   {0,MAXENTRYPOS-1}
+};
+
+static float
+calc_rank_and(float *w, tsvector *t, QUERYTYPE *q) {
+   uint16 **pos=(uint16**)palloc(sizeof(uint16*) * q->size);
+   int i,k,l,p;
+   WordEntry *entry;
+   WordEntryPos    *post,*ct;
+   int4    dimt,lenct,dist;
+   float res=-1.0;
+   ITEM    *item=GETQUERY(q);
+
+   memset(pos,0,sizeof(uint16**) * q->size);
+   *(uint16*)POSNULL = lengthof(POSNULL)-1;
+
+   for(i=0; isize; i++) {
+       
+       if ( item[i].type != VAL )
+           continue;
+
+       entry=find_wordentry(t,q,&(item[i]));
+       if ( !entry )
+           continue;
+
+       if ( entry->haspos )
+           pos[i] = (uint16*)_POSDATAPTR(t,entry);
+       else
+           pos[i] = (uint16*)POSNULL;
+
+
+       dimt = *(uint16*)(pos[i]);
+       post = (WordEntryPos*)(pos[i]+1);
+       for( k=0; k
+           if ( !pos[k] ) continue;
+           lenct = *(uint16*)(pos[k]);
+           ct = (WordEntryPos*)(pos[k]+1);
+           for(l=0; l
+               for(p=0; p
+                   dist = abs( post[l].pos - ct[p].pos );
+                   if ( dist || (dist==0 && (pos[i]==(uint16*)POSNULL || pos[k]==(uint16*)POSNULL) ) ) {
+                       float curw; 
+                       if ( !dist ) dist=MAXENTRYPOS;  
+                       curw= sqrt( wpos(&(post[l])) * wpos( &(ct[p]) ) * word_distance(dist) );
+                       res = ( res < 0 ) ? curw : 1.0 - ( 1.0 - res ) * ( 1.0 - curw );
+                   }
+               }
+           }
+       }
+   }
+   pfree(pos);
+   return res; 
+}
+
+static float
+calc_rank_or(float *w, tsvector *t, QUERYTYPE *q) {
+   WordEntry *entry;
+   WordEntryPos    *post;
+   int4    dimt,j,i;
+   float res=-1.0;
+   ITEM    *item=GETQUERY(q);
+
+   *(uint16*)POSNULL = lengthof(POSNULL)-1;
+
+   for(i=0; isize; i++) {
+       if ( item[i].type != VAL )
+           continue;
+
+       entry=find_wordentry(t,q,&(item[i]));
+       if ( !entry )
+           continue;
+
+       if ( entry->haspos ) {
+           dimt = POSDATALEN(t,entry);
+           post = POSDATAPTR(t,entry);
+       } else {
+           dimt = *(uint16*)POSNULL;
+           post = POSNULL+1;
+       }
+
+       for(j=0;j
+           if ( res < 0 )
+               res = wpos( &(post[j]) );
+           else
+               res = 1.0 - ( 1.0-res ) * ( 1.0-wpos( &(post[j]) ) );
+       }
+   }
+   return res;
+}
+
+static float
+calc_rank(float *w, tsvector *t, QUERYTYPE *q, int4 method) {
+   ITEM *item = GETQUERY(q);
+   float res=0.0;
+
+   if (!t->size || !q->size)
+       return 0.0;
+
+   res = ( item->type != VAL && item->val == (int4) '&' ) ?
+       calc_rank_and(w,t,q) : calc_rank_or(w,t,q);
+
+   if ( res < 0 )
+       res = 1e-20;
+
+        switch(method) {
+       case 0: break;
+       case 1: res /= log((float)cnt_length(t)); break;
+       case 2: res /= (float)cnt_length(t); break;
+       default:
+       elog(ERROR,"Unknown normalization method: %d",method);
+        }
+
+   return res;
+}
+
+Datum
+rank(PG_FUNCTION_ARGS) {
+   ArrayType  *win = (ArrayType *) PG_DETOAST_DATUM(PG_GETARG_DATUM(0));
+   tsvector       *txt = (tsvector *) PG_DETOAST_DATUM(PG_GETARG_DATUM(1));
+   QUERYTYPE  *query = (QUERYTYPE *) PG_DETOAST_DATUM(PG_GETARG_DATUM(2));
+   int method=DEF_NORM_METHOD;
+   float res=0.0;
+   float ws[ lengthof(weights) ];
+   int i;
+
+   if ( ARR_NDIM(win) != 1 ) 
+       elog(ERROR,"Array of weight is not one dimentional");
+   if ( ARRNELEMS(win) < lengthof(weights) )
+        elog(ERROR,"Array of weight is too short");
+
+   for(i=0;i
+       ws[ i ] = ( ((float4*)ARR_DATA_PTR(win))[i] >= 0 ) ? ((float4*)ARR_DATA_PTR(win))[i] : weights[i];
+       if ( ws[ i ] > 1.0 ) 
+           elog(ERROR,"Weight out of range");
+   } 
+
+   if ( PG_NARGS() == 4 )
+       method=PG_GETARG_INT32(3);
+
+   res=calc_rank(ws, txt, query, method); 
+       
+   PG_FREE_IF_COPY(win, 0);
+   PG_FREE_IF_COPY(txt, 1);
+   PG_FREE_IF_COPY(query, 2);
+   PG_RETURN_FLOAT4(res);
+}
+
+Datum
+rank_def(PG_FUNCTION_ARGS) {
+   tsvector       *txt = (tsvector *) PG_DETOAST_DATUM(PG_GETARG_DATUM(0));
+   QUERYTYPE  *query = (QUERYTYPE *) PG_DETOAST_DATUM(PG_GETARG_DATUM(1));
+   float res=0.0;
+   int method=DEF_NORM_METHOD;
+
+   if ( PG_NARGS() == 3 )
+       method=PG_GETARG_INT32(2);
+
+   res=calc_rank(weights, txt, query, method); 
+       
+   PG_FREE_IF_COPY(txt, 0);
+   PG_FREE_IF_COPY(query, 1);
+   PG_RETURN_FLOAT4(res);
+}
+
+
+typedef struct {
+   ITEM    *item;
+   int32   pos;
+} DocRepresentation;
+
+static int
+compareDocR(const void *a, const void *b) {
+   if ( ((DocRepresentation *) a)->pos == ((DocRepresentation *) b)->pos )
+       return 1;
+   return ( ((DocRepresentation *) a)->pos > ((DocRepresentation *) b)->pos ) ? 1 : -1;
+}
+
+
+typedef struct {
+   DocRepresentation *doc;
+   int len;
+}  ChkDocR;
+
+static bool
+checkcondition_DR(void *checkval, ITEM *val) {
+   DocRepresentation *ptr = ((ChkDocR*)checkval)->doc;
+
+   while( ptr - ((ChkDocR*)checkval)->doc < ((ChkDocR*)checkval)->len ) {
+       if ( val == ptr->item )
+           return true;
+       ptr++;
+   }   
+
+   return false;
+}
+
+
+static bool
+Cover(DocRepresentation *doc, int len, QUERYTYPE *query, int *pos, int *p, int *q) {
+   int i;
+   DocRepresentation   *ptr,*f=(DocRepresentation*)0xffffffff;
+   ITEM    *item=GETQUERY(query);
+   int lastpos=*pos;
+   int oldq=*q;
+
+   *p=0x7fffffff;
+   *q=0;
+
+   for(i=0; isize; i++) {
+       if ( item->type != VAL ) {
+           item++;
+           continue;
+       }
+       ptr = doc + *pos;
+
+       while(ptr-doc
+           if ( ptr->item == item ) {
+               if ( ptr->pos > *q ) {
+                   *q = ptr->pos;
+                   lastpos= ptr - doc;
+               } 
+               break;
+           } 
+           ptr++;
+       }
+
+       item++;
+   }
+
+   if (*q==0 )
+       return false;
+
+   if (*q==oldq) { /* already check this pos */
+       (*pos)++;
+       return Cover(doc, len, query, pos,p,q);
+   } 
+
+   item=GETQUERY(query);
+   for(i=0; isize; i++) {
+       if ( item->type != VAL ) {
+           item++;
+           continue;
+       }
+       ptr = doc + lastpos;
+
+       while(ptr>=doc+*pos) {
+           if ( ptr->item == item ) {
+               if ( ptr->pos < *p ) {
+                   *p = ptr->pos;
+                   f=ptr;
+               }
+               break;
+           }
+           ptr--;
+       }
+       item++;
+   }
+ 
+   if ( *p<=*q ) {
+       ChkDocR ch = { f, (doc + lastpos)-f+1 };
+       *pos = f-doc+1;
+       if ( TS_execute(GETQUERY(query), &ch, false, checkcondition_DR) ) { 
+ /*elog(NOTICE,"OP:%d NP:%d P:%d Q:%d", *pos, lastpos, *p, *q);*/ 
+           return true;
+       } else
+           return Cover(doc, len, query, pos,p,q); 
+   }
+ 
+   return false;
+}
+
+static DocRepresentation*
+get_docrep(tsvector     *txt, QUERYTYPE  *query, int *doclen) {
+   ITEM    *item=GETQUERY(query);
+   WordEntry *entry;
+   WordEntryPos    *post;
+   int4    dimt,j,i;
+   int len=query->size*4,cur=0;
+   DocRepresentation *doc;
+
+   *(uint16*)POSNULL = lengthof(POSNULL)-1;
+   doc = (DocRepresentation*)palloc(sizeof(DocRepresentation)*len);
+   for(i=0; isize; i++) {
+       if ( item[i].type != VAL )
+           continue;
+
+       entry=find_wordentry(txt,query,&(item[i]));
+       if ( !entry )
+           continue;
+
+       if ( entry->haspos ) {
+           dimt = POSDATALEN(txt,entry);
+           post = POSDATAPTR(txt,entry);
+       } else {
+           dimt = *(uint16*)POSNULL;
+           post = POSNULL+1;
+       }
+
+       while( cur+dimt >= len ) {
+           len*=2;
+           doc = (DocRepresentation*)repalloc(doc,sizeof(DocRepresentation)*len);
+       }
+
+       for(j=0;j
+           doc[cur].item=&(item[i]);
+           doc[cur].pos=post[j].pos;
+           cur++;
+       }
+   }
+
+   *doclen=cur;
+   
+   if ( cur>0 ) {
+       if ( cur>1 ) 
+           qsort((void *) doc, cur, sizeof(DocRepresentation), compareDocR);
+       return doc;
+   }
+   
+   pfree(doc);
+   return NULL;
+}
+
+
+Datum
+rank_cd(PG_FUNCTION_ARGS) {
+   int K = PG_GETARG_INT32(0);
+   tsvector       *txt = (tsvector *) PG_DETOAST_DATUM(PG_GETARG_DATUM(1));
+   QUERYTYPE  *query = (QUERYTYPE *) PG_DETOAST_DATUM(PG_GETARG_DATUM(2));
+   int method=DEF_NORM_METHOD;
+   DocRepresentation   *doc;
+   float   res=0.0;
+   int p=0,q=0,len,cur;
+
+   doc = get_docrep(txt, query, &len);
+   if ( !doc ) {
+       PG_FREE_IF_COPY(txt, 1);
+       PG_FREE_IF_COPY(query, 2);
+       PG_RETURN_FLOAT4(0.0);
+   }
+
+   cur=0;
+   if (K<=0)
+       K=4;    
+   while( Cover(doc, len, query, &cur, &p, &q) ) 
+       res += ( q-p+1 > K ) ? ((float)K)/((float)(q-p+1)) : 1.0;
+
+   if ( PG_NARGS() == 4 )
+       method=PG_GETARG_INT32(3);
+
+        switch(method) {
+       case 0: break;
+       case 1: res /= log((float)cnt_length(txt)); break;
+       case 2: res /= (float)cnt_length(txt); break;
+       default:
+       elog(ERROR,"Unknown normalization method: %d",method);
+        }
+
+   pfree(doc);
+   PG_FREE_IF_COPY(txt, 1);
+   PG_FREE_IF_COPY(query, 2);
+
+   PG_RETURN_FLOAT4(res);
+}
+
+
+Datum
+rank_cd_def(PG_FUNCTION_ARGS) {
+   PG_RETURN_DATUM( DirectFunctionCall4(   
+       rank_cd,
+       Int32GetDatum(-1),
+       PG_GETARG_DATUM(0),
+       PG_GETARG_DATUM(1),
+       ( PG_NARGS() == 3 ) ? PG_GETARG_DATUM(2) : Int32GetDatum(DEF_NORM_METHOD)
+   )); 
+}
+
+/**************debug*************/
+
+typedef struct {
+   char    *w;
+   int2    len;
+   int2    pos;
+   int2    start;
+   int2    finish;
+} DocWord;
+
+static int
+compareDocWord(const void *a, const void *b) {
+   if ( ((DocWord *) a)->pos == ((DocWord *) b)->pos )
+       return 1;
+   return ( ((DocWord *) a)->pos > ((DocWord *) b)->pos ) ? 1 : -1;
+}
+
+
+Datum 
+get_covers(PG_FUNCTION_ARGS) {
+   tsvector     *txt = (tsvector *) PG_DETOAST_DATUM(PG_GETARG_DATUM(0));
+   QUERYTYPE  *query = (QUERYTYPE *) PG_DETOAST_DATUM(PG_GETARG_DATUM(1));
+   WordEntry       *pptr=ARRPTR(txt);
+   int i,dlen=0,j,cur=0,len=0,rlen;
+   DocWord *dw,*dwptr;
+   text    *out;
+   char *cptr;
+   DocRepresentation *doc;
+   int pos=0,p,q,olddwpos=0;
+   int ncover=1;
+
+   doc = get_docrep(txt, query, &rlen);
+
+   if ( !doc ) {
+       out=palloc(VARHDRSZ);
+       VARATT_SIZEP(out) = VARHDRSZ;
+       PG_FREE_IF_COPY(txt,0);
+       PG_FREE_IF_COPY(query,1);
+       PG_RETURN_POINTER(out);
+   }
+
+   for(i=0;isize;i++) {
+       if (!pptr[i].haspos)
+           elog(ERROR,"No pos info");
+        dlen += POSDATALEN(txt,&(pptr[i]));
+   }
+
+   dwptr=dw=palloc(sizeof(DocWord)*dlen);
+   memset(dw,0,sizeof(DocWord)*dlen);
+
+   for(i=0;isize;i++) {
+       WordEntryPos    *posdata = POSDATAPTR(txt,&(pptr[i]));
+       for(j=0;j
+           dw[cur].w=STRPTR(txt)+pptr[i].pos;  
+           dw[cur].len=pptr[i].len;    
+           dw[cur].pos=posdata[j].pos;
+           cur++;
+       }
+       len+=(pptr[i].len + 1) * (int)POSDATALEN(txt,&(pptr[i]));
+   }
+   qsort((void *) dw, dlen, sizeof(DocWord), compareDocWord);
+
+   while( Cover(doc, rlen, query, &pos, &p, &q) ) {
+       dwptr=dw+olddwpos;
+       while(dwptr->pos < p && dwptr-dw
+           dwptr++;
+       olddwpos=dwptr-dw;
+       dwptr->start=ncover;
+       while(dwptr->pos < q+1 && dwptr-dw
+           dwptr++;
+       (dwptr-1)->finish=ncover;
+       len+= 4 /* {}+two spaces */ + 2*16 /*numbers*/;
+       ncover++; 
+   } 
+   
+   out=palloc(VARHDRSZ+len);
+   cptr=((char*)out)+VARHDRSZ;
+   dwptr=dw;
+
+   while( dwptr-dw < dlen) {
+       if ( dwptr->start ) {
+           sprintf(cptr,"{%d ",dwptr->start);
+           cptr=strchr(cptr,'\0');
+       }
+       memcpy(cptr,dwptr->w,dwptr->len);
+       cptr+=dwptr->len;
+       *cptr=' ';
+       cptr++;
+       if ( dwptr->finish ) { 
+           sprintf(cptr,"}%d ",dwptr->finish);
+           cptr=strchr(cptr,'\0');
+       }
+       dwptr++;
+   }   
+
+   VARATT_SIZEP(out) = cptr - ((char*)out);
+   
+   pfree(dw);
+   pfree(doc);
+
+   PG_FREE_IF_COPY(txt,0);
+   PG_FREE_IF_COPY(query,1);
+   PG_RETURN_POINTER(out);
+}
+


diff --git a/contrib/tsearch2/rewrite.c b/contrib/tsearch2/rewrite.c

new file mode 100644 (file)

index 0000000..d5bc0f6


--- /dev/null
+++ b/contrib/tsearch2/rewrite.c
@@ -0,0 +1,292 @@
+/*
+ * Rewrite routines of query tree
+ * Teodor Sigaev 
+ */
+
+#include "postgres.h"
+
+#include 
+
+#include "access/gist.h"
+#include "access/itup.h"
+#include "access/rtree.h"
+#include "utils/elog.h"
+#include "utils/palloc.h"
+#include "utils/array.h"
+#include "utils/builtins.h"
+#include "storage/bufpage.h"
+
+#include "query.h"
+#include "rewrite.h"
+
+typedef struct NODE
+{
+   struct NODE *left;
+   struct NODE *right;
+   ITEM       *valnode;
+}  NODE;
+
+/*
+ * make query tree from plain view of query
+ */
+static NODE *
+maketree(ITEM * in)
+{
+   NODE       *node = (NODE *) palloc(sizeof(NODE));
+
+   node->valnode = in;
+   node->right = node->left = NULL;
+   if (in->type == OPR)
+   {
+       node->right = maketree(in + 1);
+       if (in->val != (int4) '!')
+           node->left = maketree(in + in->left);
+   }
+   return node;
+}
+
+typedef struct
+{
+   ITEM       *ptr;
+   int4        len;
+   int4        cur;
+}  PLAINTREE;
+
+static void
+plainnode(PLAINTREE * state, NODE * node)
+{
+   if (state->cur == state->len)
+   {
+       state->len *= 2;
+       state->ptr = (ITEM *) repalloc((void *) state->ptr, state->len * sizeof(ITEM));
+   }
+   memcpy((void *) &(state->ptr[state->cur]), (void *) node->valnode, sizeof(ITEM));
+   if (node->valnode->type == VAL)
+       state->cur++;
+   else if (node->valnode->val == (int4) '!')
+   {
+       state->ptr[state->cur].left = 1;
+       state->cur++;
+       plainnode(state, node->right);
+   }
+   else
+   {
+       int4        cur = state->cur;
+
+       state->cur++;
+       plainnode(state, node->right);
+       state->ptr[cur].left = state->cur - cur;
+       plainnode(state, node->left);
+   }
+   pfree(node);
+}
+
+/*
+ * make plain view of tree from 'normal' view of tree
+ */
+static ITEM *
+plaintree(NODE * root, int4 *len)
+{
+   PLAINTREE   pl;
+
+   pl.cur = 0;
+   pl.len = 16;
+   if (root && (root->valnode->type == VAL || root->valnode->type == OPR))
+   {
+       pl.ptr = (ITEM *) palloc(pl.len * sizeof(ITEM));
+       plainnode(&pl, root);
+   }
+   else
+       pl.ptr = NULL;
+   *len = pl.cur;
+   return pl.ptr;
+}
+
+static void
+freetree(NODE * node)
+{
+   if (!node)
+       return;
+   if (node->left)
+       freetree(node->left);
+   if (node->right)
+       freetree(node->right);
+   pfree(node);
+}
+
+/*
+ * clean tree for ! operator.
+ * It's usefull for debug, but in
+ * other case, such view is used with search in index.
+ * Operator ! always return TRUE
+ */
+static NODE *
+clean_NOT_intree(NODE * node)
+{
+   if (node->valnode->type == VAL)
+       return node;
+
+   if (node->valnode->val == (int4) '!')
+   {
+       freetree(node);
+       return NULL;
+   }
+
+   /* operator & or | */
+   if (node->valnode->val == (int4) '|')
+   {
+       if ((node->left = clean_NOT_intree(node->left)) == NULL ||
+           (node->right = clean_NOT_intree(node->right)) == NULL)
+       {
+           freetree(node);
+           return NULL;
+       }
+   }
+   else
+   {
+       NODE       *res = node;
+
+       node->left = clean_NOT_intree(node->left);
+       node->right = clean_NOT_intree(node->right);
+       if (node->left == NULL && node->right == NULL)
+       {
+           pfree(node);
+           res = NULL;
+       }
+       else if (node->left == NULL)
+       {
+           res = node->right;
+           pfree(node);
+       }
+       else if (node->right == NULL)
+       {
+           res = node->left;
+           pfree(node);
+       }
+       return res;
+   }
+   return node;
+}
+
+ITEM *
+clean_NOT_v2(ITEM * ptr, int4 *len)
+{
+   NODE       *root = maketree(ptr);
+
+   return plaintree(clean_NOT_intree(root), len);
+}
+
+#define V_UNKNOWN  0
+#define V_TRUE     1
+#define V_FALSE        2
+
+/*
+ * Clean query tree from values which is always in
+ * text (stopword)
+ */
+static NODE *
+clean_fakeval_intree(NODE * node, char *result)
+{
+   char        lresult = V_UNKNOWN,
+               rresult = V_UNKNOWN;
+
+   if (node->valnode->type == VAL)
+       return node;
+   else if (node->valnode->type == VALTRUE)
+   {
+       pfree(node);
+       *result = V_TRUE;
+       return NULL;
+   }
+
+
+   if (node->valnode->val == (int4) '!')
+   {
+       node->right = clean_fakeval_intree(node->right, &rresult);
+       if (!node->right)
+       {
+           *result = (rresult == V_TRUE) ? V_FALSE : V_TRUE;
+           freetree(node);
+           return NULL;
+       }
+   }
+   else if (node->valnode->val == (int4) '|')
+   {
+       NODE       *res = node;
+
+       node->left = clean_fakeval_intree(node->left, &lresult);
+       node->right = clean_fakeval_intree(node->right, &rresult);
+       if (lresult == V_TRUE || rresult == V_TRUE)
+       {
+           freetree(node);
+           *result = V_TRUE;
+           return NULL;
+       }
+       else if (lresult == V_FALSE && rresult == V_FALSE)
+       {
+           freetree(node);
+           *result = V_FALSE;
+           return NULL;
+       }
+       else if (lresult == V_FALSE)
+       {
+           res = node->right;
+           pfree(node);
+       }
+       else if (rresult == V_FALSE)
+       {
+           res = node->left;
+           pfree(node);
+       }
+       return res;
+   }
+   else
+   {
+       NODE       *res = node;
+
+       node->left = clean_fakeval_intree(node->left, &lresult);
+       node->right = clean_fakeval_intree(node->right, &rresult);
+       if (lresult == V_FALSE || rresult == V_FALSE)
+       {
+           freetree(node);
+           *result = V_FALSE;
+           return NULL;
+       }
+       else if (lresult == V_TRUE && rresult == V_TRUE)
+       {
+           freetree(node);
+           *result = V_TRUE;
+           return NULL;
+       }
+       else if (lresult == V_TRUE)
+       {
+           res = node->right;
+           pfree(node);
+       }
+       else if (rresult == V_TRUE)
+       {
+           res = node->left;
+           pfree(node);
+       }
+       return res;
+   }
+   return node;
+}
+
+ITEM *
+clean_fakeval_v2(ITEM * ptr, int4 *len)
+{
+   NODE       *root = maketree(ptr);
+   char        result = V_UNKNOWN;
+   NODE       *resroot;
+
+   resroot = clean_fakeval_intree(root, &result);
+   if (result != V_UNKNOWN)
+   {
+       elog(NOTICE, "Query contains only stopword(s) or doesn't contain lexem(s), ignored");
+       *len = 0;
+       return NULL;
+   }
+
+   return plaintree(resroot, len);
+}


diff --git a/contrib/tsearch2/rewrite.h b/contrib/tsearch2/rewrite.h

new file mode 100644 (file)

index 0000000..d47788a


--- /dev/null
+++ b/contrib/tsearch2/rewrite.h
@@ -0,0 +1,7 @@
+#ifndef __REWRITE_H__
+#define __REWRITE_H__
+
+ITEM      *clean_NOT_v2(ITEM * ptr, int4 *len);
+ITEM      *clean_fakeval_v2(ITEM * ptr, int4 *len);
+
+#endif


diff --git a/contrib/tsearch2/snmap.c b/contrib/tsearch2/snmap.c

new file mode 100644 (file)

index 0000000..fe138ad


--- /dev/null
+++ b/contrib/tsearch2/snmap.c
@@ -0,0 +1,75 @@
+/* 
+ * simple but fast map from str to Oid
+ * Teodor Sigaev 
+ */
+#include 
+#include 
+#include 
+
+#include "postgres.h"
+#include "snmap.h"
+#include "common.h"
+
+static int
+compareSNMapEntry(const void *a, const void *b) {
+   return strcmp( ((SNMapEntry*)a)->key, ((SNMapEntry*)b)->key );
+}
+
+void 
+addSNMap( SNMap *map, char *key, Oid value ) {
+   if (map->len>=map->reallen) {
+       SNMapEntry *tmp;
+       int len = (map->reallen) ? 2*map->reallen : 16;
+       tmp=(SNMapEntry*)realloc(map->list, sizeof(SNMapEntry) * len);
+       if ( !tmp )
+           elog(ERROR, "No memory");
+       map->reallen=len;
+       map->list=tmp;
+   }
+   map->list[ map->len ].key = strdup(key);
+   if ( ! map->list[ map->len ].key )
+       elog(ERROR, "No memory");
+   map->list[ map->len ].value=value;
+   map->len++;
+   if ( map->len>1 ) qsort(map->list, map->len, sizeof(SNMapEntry), compareSNMapEntry);
+}
+
+void 
+addSNMap_t( SNMap *map, text *key, Oid value ) {
+   char *k=text2char( key );
+   addSNMap(map, k, value);
+   pfree(k);
+}
+
+Oid 
+findSNMap( SNMap *map, char *key ) {
+   SNMapEntry *ptr;
+   SNMapEntry ks = {key, 0};
+   if ( map->len==0 || !map->list )
+       return 0;   
+   ptr = (SNMapEntry*) bsearch(&ks, map->list, map->len, sizeof(SNMapEntry), compareSNMapEntry);
+   return (ptr) ? ptr->value : 0;
+}
+
+Oid  
+findSNMap_t( SNMap *map, text *key ) {
+   char *k=text2char(key);
+   int res;
+   res= findSNMap(map, k);
+   pfree(k);
+   return res;
+}
+
+void freeSNMap( SNMap *map ) {
+   SNMapEntry *entry=map->list;
+   if ( map->list ) {
+       while( map->len ) {
+           if ( entry->key ) free(entry->key);
+           entry++; map->len--;
+       }
+       free( map->list );
+   }
+   memset(map,0,sizeof(SNMap));
+}
+
+


diff --git a/contrib/tsearch2/snmap.h b/contrib/tsearch2/snmap.h

new file mode 100644 (file)

index 0000000..b485601


--- /dev/null
+++ b/contrib/tsearch2/snmap.h
@@ -0,0 +1,23 @@
+#ifndef __SNMAP_H__
+#define __SNMAP_H__
+
+#include "postgres.h"
+
+typedef struct {
+   char    *key;
+   Oid value;
+} SNMapEntry;
+
+typedef struct {
+   int len;
+   int reallen;
+   SNMapEntry  *list;
+} SNMap;
+
+void addSNMap( SNMap *map, char *key, Oid value );
+void addSNMap_t( SNMap *map, text *key, Oid value );
+Oid findSNMap( SNMap *map, char *key );
+Oid findSNMap_t( SNMap *map, text *key );
+void freeSNMap( SNMap *map );
+
+#endif


diff --git a/contrib/tsearch2/snowball/api.c b/contrib/tsearch2/snowball/api.c

new file mode 100644 (file)

index 0000000..c9019ce


--- /dev/null
+++ b/contrib/tsearch2/snowball/api.c
@@ -0,0 +1,48 @@
+
+#include "header.h"
+
+extern struct SN_env * SN_create_env(int S_size, int I_size, int B_size)
+{   struct SN_env * z = (struct SN_env *) calloc(1, sizeof(struct SN_env));
+    z->p = create_s();
+    if (S_size)
+    {   z->S = (symbol * *) calloc(S_size, sizeof(symbol *));
+        {   int i;
+            for (i = 0; i < S_size; i++) z->S[i] = create_s();
+        }
+        z->S_size = S_size;
+    }
+
+    if (I_size)
+    {   z->I = (int *) calloc(I_size, sizeof(int));
+        z->I_size = I_size;
+    }
+
+    if (B_size)
+    {   z->B = (symbol *) calloc(B_size, sizeof(symbol));
+        z->B_size = B_size;
+    }
+
+    return z;
+}
+
+extern void SN_close_env(struct SN_env * z)
+{
+    if (z->S_size)
+    {
+        {   int i;
+            for (i = 0; i < z->S_size; i++) lose_s(z->S[i]);
+        }
+        free(z->S);
+    }
+    if (z->I_size) free(z->I);
+    if (z->B_size) free(z->B);
+    if (z->p) lose_s(z->p);
+    free(z);
+}
+
+extern void SN_set_current(struct SN_env * z, int size, const symbol * s)
+{
+    replace_s(z, 0, z->l, size, s);
+    z->c = 0;
+}
+


diff --git a/contrib/tsearch2/snowball/api.h b/contrib/tsearch2/snowball/api.h

new file mode 100644 (file)

index 0000000..3e8b6e1


--- /dev/null
+++ b/contrib/tsearch2/snowball/api.h
@@ -0,0 +1,27 @@
+
+typedef unsigned char symbol;
+
+/* Or replace 'char' above with 'short' for 16 bit characters.
+
+   More precisely, replace 'char' with whatever type guarantees the
+   character width you need. Note however that sizeof(symbol) should divide
+   HEAD, defined in header.h as 2*sizeof(int), without remainder, otherwise
+   there is an alignment problem. In the unlikely event of a problem here,
+   consult Martin Porter.
+
+*/
+
+struct SN_env {
+    symbol * p;
+    int c; int a; int l; int lb; int bra; int ket;
+    int S_size; int I_size; int B_size;
+    symbol * * S;
+    int * I;
+    symbol * B;
+};
+
+extern struct SN_env * SN_create_env(int S_size, int I_size, int B_size);
+extern void SN_close_env(struct SN_env * z);
+
+extern void SN_set_current(struct SN_env * z, int size, const symbol * s);
+


diff --git a/contrib/tsearch2/snowball/english_stem.c b/contrib/tsearch2/snowball/english_stem.c

new file mode 100644 (file)

index 0000000..6715c7c


--- /dev/null
+++ b/contrib/tsearch2/snowball/english_stem.c
@@ -0,0 +1,894 @@
+
+/* This file was generated automatically by the Snowball to ANSI C compiler */
+
+#include "header.h"
+
+extern int english_stem(struct SN_env * z);
+static int r_exception2(struct SN_env * z);
+static int r_exception1(struct SN_env * z);
+static int r_Step_5(struct SN_env * z);
+static int r_Step_4(struct SN_env * z);
+static int r_Step_3(struct SN_env * z);
+static int r_Step_2(struct SN_env * z);
+static int r_Step_1c(struct SN_env * z);
+static int r_Step_1b(struct SN_env * z);
+static int r_Step_1a(struct SN_env * z);
+static int r_R2(struct SN_env * z);
+static int r_R1(struct SN_env * z);
+static int r_shortv(struct SN_env * z);
+static int r_mark_regions(struct SN_env * z);
+static int r_postlude(struct SN_env * z);
+static int r_prelude(struct SN_env * z);
+
+extern struct SN_env * english_create_env(void);
+extern void english_close_env(struct SN_env * z);
+
+static symbol s_0_0[5] = { 'g', 'e', 'n', 'e', 'r' };
+
+static struct among a_0[1] =
+{
+/*  0 */ { 5, s_0_0, -1, -1, 0}
+};
+
+static symbol s_1_0[3] = { 'i', 'e', 'd' };
+static symbol s_1_1[1] = { 's' };
+static symbol s_1_2[3] = { 'i', 'e', 's' };
+static symbol s_1_3[4] = { 's', 's', 'e', 's' };
+static symbol s_1_4[2] = { 's', 's' };
+static symbol s_1_5[2] = { 'u', 's' };
+
+static struct among a_1[6] =
+{
+/*  0 */ { 3, s_1_0, -1, 2, 0},
+/*  1 */ { 1, s_1_1, -1, 3, 0},
+/*  2 */ { 3, s_1_2, 1, 2, 0},
+/*  3 */ { 4, s_1_3, 1, 1, 0},
+/*  4 */ { 2, s_1_4, 1, -1, 0},
+/*  5 */ { 2, s_1_5, 1, -1, 0}
+};
+
+static symbol s_2_1[2] = { 'b', 'b' };
+static symbol s_2_2[2] = { 'd', 'd' };
+static symbol s_2_3[2] = { 'f', 'f' };
+static symbol s_2_4[2] = { 'g', 'g' };
+static symbol s_2_5[2] = { 'b', 'l' };
+static symbol s_2_6[2] = { 'm', 'm' };
+static symbol s_2_7[2] = { 'n', 'n' };
+static symbol s_2_8[2] = { 'p', 'p' };
+static symbol s_2_9[2] = { 'r', 'r' };
+static symbol s_2_10[2] = { 'a', 't' };
+static symbol s_2_11[2] = { 't', 't' };
+static symbol s_2_12[2] = { 'i', 'z' };
+
+static struct among a_2[13] =
+{
+/*  0 */ { 0, 0, -1, 3, 0},
+/*  1 */ { 2, s_2_1, 0, 2, 0},
+/*  2 */ { 2, s_2_2, 0, 2, 0},
+/*  3 */ { 2, s_2_3, 0, 2, 0},
+/*  4 */ { 2, s_2_4, 0, 2, 0},
+/*  5 */ { 2, s_2_5, 0, 1, 0},
+/*  6 */ { 2, s_2_6, 0, 2, 0},
+/*  7 */ { 2, s_2_7, 0, 2, 0},
+/*  8 */ { 2, s_2_8, 0, 2, 0},
+/*  9 */ { 2, s_2_9, 0, 2, 0},
+/* 10 */ { 2, s_2_10, 0, 1, 0},
+/* 11 */ { 2, s_2_11, 0, 2, 0},
+/* 12 */ { 2, s_2_12, 0, 1, 0}
+};
+
+static symbol s_3_0[2] = { 'e', 'd' };
+static symbol s_3_1[3] = { 'e', 'e', 'd' };
+static symbol s_3_2[3] = { 'i', 'n', 'g' };
+static symbol s_3_3[4] = { 'e', 'd', 'l', 'y' };
+static symbol s_3_4[5] = { 'e', 'e', 'd', 'l', 'y' };
+static symbol s_3_5[5] = { 'i', 'n', 'g', 'l', 'y' };
+
+static struct among a_3[6] =
+{
+/*  0 */ { 2, s_3_0, -1, 2, 0},
+/*  1 */ { 3, s_3_1, 0, 1, 0},
+/*  2 */ { 3, s_3_2, -1, 2, 0},
+/*  3 */ { 4, s_3_3, -1, 2, 0},
+/*  4 */ { 5, s_3_4, 3, 1, 0},
+/*  5 */ { 5, s_3_5, -1, 2, 0}
+};
+
+static symbol s_4_0[4] = { 'a', 'n', 'c', 'i' };
+static symbol s_4_1[4] = { 'e', 'n', 'c', 'i' };
+static symbol s_4_2[3] = { 'o', 'g', 'i' };
+static symbol s_4_3[2] = { 'l', 'i' };
+static symbol s_4_4[3] = { 'b', 'l', 'i' };
+static symbol s_4_5[4] = { 'a', 'b', 'l', 'i' };
+static symbol s_4_6[4] = { 'a', 'l', 'l', 'i' };
+static symbol s_4_7[5] = { 'f', 'u', 'l', 'l', 'i' };
+static symbol s_4_8[6] = { 'l', 'e', 's', 's', 'l', 'i' };
+static symbol s_4_9[5] = { 'o', 'u', 's', 'l', 'i' };
+static symbol s_4_10[5] = { 'e', 'n', 't', 'l', 'i' };
+static symbol s_4_11[5] = { 'a', 'l', 'i', 't', 'i' };
+static symbol s_4_12[6] = { 'b', 'i', 'l', 'i', 't', 'i' };
+static symbol s_4_13[5] = { 'i', 'v', 'i', 't', 'i' };
+static symbol s_4_14[6] = { 't', 'i', 'o', 'n', 'a', 'l' };
+static symbol s_4_15[7] = { 'a', 't', 'i', 'o', 'n', 'a', 'l' };
+static symbol s_4_16[5] = { 'a', 'l', 'i', 's', 'm' };
+static symbol s_4_17[5] = { 'a', 't', 'i', 'o', 'n' };
+static symbol s_4_18[7] = { 'i', 'z', 'a', 't', 'i', 'o', 'n' };
+static symbol s_4_19[4] = { 'i', 'z', 'e', 'r' };
+static symbol s_4_20[4] = { 'a', 't', 'o', 'r' };
+static symbol s_4_21[7] = { 'i', 'v', 'e', 'n', 'e', 's', 's' };
+static symbol s_4_22[7] = { 'f', 'u', 'l', 'n', 'e', 's', 's' };
+static symbol s_4_23[7] = { 'o', 'u', 's', 'n', 'e', 's', 's' };
+
+static struct among a_4[24] =
+{
+/*  0 */ { 4, s_4_0, -1, 3, 0},
+/*  1 */ { 4, s_4_1, -1, 2, 0},
+/*  2 */ { 3, s_4_2, -1, 13, 0},
+/*  3 */ { 2, s_4_3, -1, 16, 0},
+/*  4 */ { 3, s_4_4, 3, 12, 0},
+/*  5 */ { 4, s_4_5, 4, 4, 0},
+/*  6 */ { 4, s_4_6, 3, 8, 0},
+/*  7 */ { 5, s_4_7, 3, 14, 0},
+/*  8 */ { 6, s_4_8, 3, 15, 0},
+/*  9 */ { 5, s_4_9, 3, 10, 0},
+/* 10 */ { 5, s_4_10, 3, 5, 0},
+/* 11 */ { 5, s_4_11, -1, 8, 0},
+/* 12 */ { 6, s_4_12, -1, 12, 0},
+/* 13 */ { 5, s_4_13, -1, 11, 0},
+/* 14 */ { 6, s_4_14, -1, 1, 0},
+/* 15 */ { 7, s_4_15, 14, 7, 0},
+/* 16 */ { 5, s_4_16, -1, 8, 0},
+/* 17 */ { 5, s_4_17, -1, 7, 0},
+/* 18 */ { 7, s_4_18, 17, 6, 0},
+/* 19 */ { 4, s_4_19, -1, 6, 0},
+/* 20 */ { 4, s_4_20, -1, 7, 0},
+/* 21 */ { 7, s_4_21, -1, 11, 0},
+/* 22 */ { 7, s_4_22, -1, 9, 0},
+/* 23 */ { 7, s_4_23, -1, 10, 0}
+};
+
+static symbol s_5_0[5] = { 'i', 'c', 'a', 't', 'e' };
+static symbol s_5_1[5] = { 'a', 't', 'i', 'v', 'e' };
+static symbol s_5_2[5] = { 'a', 'l', 'i', 'z', 'e' };
+static symbol s_5_3[5] = { 'i', 'c', 'i', 't', 'i' };
+static symbol s_5_4[4] = { 'i', 'c', 'a', 'l' };
+static symbol s_5_5[6] = { 't', 'i', 'o', 'n', 'a', 'l' };
+static symbol s_5_6[7] = { 'a', 't', 'i', 'o', 'n', 'a', 'l' };
+static symbol s_5_7[3] = { 'f', 'u', 'l' };
+static symbol s_5_8[4] = { 'n', 'e', 's', 's' };
+
+static struct among a_5[9] =
+{
+/*  0 */ { 5, s_5_0, -1, 4, 0},
+/*  1 */ { 5, s_5_1, -1, 6, 0},
+/*  2 */ { 5, s_5_2, -1, 3, 0},
+/*  3 */ { 5, s_5_3, -1, 4, 0},
+/*  4 */ { 4, s_5_4, -1, 4, 0},
+/*  5 */ { 6, s_5_5, -1, 1, 0},
+/*  6 */ { 7, s_5_6, 5, 2, 0},
+/*  7 */ { 3, s_5_7, -1, 5, 0},
+/*  8 */ { 4, s_5_8, -1, 5, 0}
+};
+
+static symbol s_6_0[2] = { 'i', 'c' };
+static symbol s_6_1[4] = { 'a', 'n', 'c', 'e' };
+static symbol s_6_2[4] = { 'e', 'n', 'c', 'e' };
+static symbol s_6_3[4] = { 'a', 'b', 'l', 'e' };
+static symbol s_6_4[4] = { 'i', 'b', 'l', 'e' };
+static symbol s_6_5[3] = { 'a', 't', 'e' };
+static symbol s_6_6[3] = { 'i', 'v', 'e' };
+static symbol s_6_7[3] = { 'i', 'z', 'e' };
+static symbol s_6_8[3] = { 'i', 't', 'i' };
+static symbol s_6_9[2] = { 'a', 'l' };
+static symbol s_6_10[3] = { 'i', 's', 'm' };
+static symbol s_6_11[3] = { 'i', 'o', 'n' };
+static symbol s_6_12[2] = { 'e', 'r' };
+static symbol s_6_13[3] = { 'o', 'u', 's' };
+static symbol s_6_14[3] = { 'a', 'n', 't' };
+static symbol s_6_15[3] = { 'e', 'n', 't' };
+static symbol s_6_16[4] = { 'm', 'e', 'n', 't' };
+static symbol s_6_17[5] = { 'e', 'm', 'e', 'n', 't' };
+
+static struct among a_6[18] =
+{
+/*  0 */ { 2, s_6_0, -1, 1, 0},
+/*  1 */ { 4, s_6_1, -1, 1, 0},
+/*  2 */ { 4, s_6_2, -1, 1, 0},
+/*  3 */ { 4, s_6_3, -1, 1, 0},
+/*  4 */ { 4, s_6_4, -1, 1, 0},
+/*  5 */ { 3, s_6_5, -1, 1, 0},
+/*  6 */ { 3, s_6_6, -1, 1, 0},
+/*  7 */ { 3, s_6_7, -1, 1, 0},
+/*  8 */ { 3, s_6_8, -1, 1, 0},
+/*  9 */ { 2, s_6_9, -1, 1, 0},
+/* 10 */ { 3, s_6_10, -1, 1, 0},
+/* 11 */ { 3, s_6_11, -1, 2, 0},
+/* 12 */ { 2, s_6_12, -1, 1, 0},
+/* 13 */ { 3, s_6_13, -1, 1, 0},
+/* 14 */ { 3, s_6_14, -1, 1, 0},
+/* 15 */ { 3, s_6_15, -1, 1, 0},
+/* 16 */ { 4, s_6_16, 15, 1, 0},
+/* 17 */ { 5, s_6_17, 16, 1, 0}
+};
+
+static symbol s_7_0[1] = { 'e' };
+static symbol s_7_1[1] = { 'l' };
+
+static struct among a_7[2] =
+{
+/*  0 */ { 1, s_7_0, -1, 1, 0},
+/*  1 */ { 1, s_7_1, -1, 2, 0}
+};
+
+static symbol s_8_0[7] = { 's', 'u', 'c', 'c', 'e', 'e', 'd' };
+static symbol s_8_1[7] = { 'p', 'r', 'o', 'c', 'e', 'e', 'd' };
+static symbol s_8_2[6] = { 'e', 'x', 'c', 'e', 'e', 'd' };
+static symbol s_8_3[7] = { 'c', 'a', 'n', 'n', 'i', 'n', 'g' };
+static symbol s_8_4[6] = { 'i', 'n', 'n', 'i', 'n', 'g' };
+static symbol s_8_5[7] = { 'e', 'a', 'r', 'r', 'i', 'n', 'g' };
+static symbol s_8_6[7] = { 'h', 'e', 'r', 'r', 'i', 'n', 'g' };
+static symbol s_8_7[6] = { 'o', 'u', 't', 'i', 'n', 'g' };
+
+static struct among a_8[8] =
+{
+/*  0 */ { 7, s_8_0, -1, -1, 0},
+/*  1 */ { 7, s_8_1, -1, -1, 0},
+/*  2 */ { 6, s_8_2, -1, -1, 0},
+/*  3 */ { 7, s_8_3, -1, -1, 0},
+/*  4 */ { 6, s_8_4, -1, -1, 0},
+/*  5 */ { 7, s_8_5, -1, -1, 0},
+/*  6 */ { 7, s_8_6, -1, -1, 0},
+/*  7 */ { 6, s_8_7, -1, -1, 0}
+};
+
+static symbol s_9_0[5] = { 'a', 'n', 'd', 'e', 's' };
+static symbol s_9_1[5] = { 'a', 't', 'l', 'a', 's' };
+static symbol s_9_2[4] = { 'b', 'i', 'a', 's' };
+static symbol s_9_3[6] = { 'c', 'o', 's', 'm', 'o', 's' };
+static symbol s_9_4[5] = { 'd', 'y', 'i', 'n', 'g' };
+static symbol s_9_5[5] = { 'e', 'a', 'r', 'l', 'y' };
+static symbol s_9_6[6] = { 'g', 'e', 'n', 't', 'l', 'y' };
+static symbol s_9_7[4] = { 'h', 'o', 'w', 'e' };
+static symbol s_9_8[4] = { 'i', 'd', 'l', 'y' };
+static symbol s_9_9[5] = { 'l', 'y', 'i', 'n', 'g' };
+static symbol s_9_10[4] = { 'n', 'e', 'w', 's' };
+static symbol s_9_11[4] = { 'o', 'n', 'l', 'y' };
+static symbol s_9_12[6] = { 's', 'i', 'n', 'g', 'l', 'y' };
+static symbol s_9_13[5] = { 's', 'k', 'i', 'e', 's' };
+static symbol s_9_14[4] = { 's', 'k', 'i', 's' };
+static symbol s_9_15[3] = { 's', 'k', 'y' };
+static symbol s_9_16[5] = { 't', 'y', 'i', 'n', 'g' };
+static symbol s_9_17[4] = { 'u', 'g', 'l', 'y' };
+
+static struct among a_9[18] =
+{
+/*  0 */ { 5, s_9_0, -1, -1, 0},
+/*  1 */ { 5, s_9_1, -1, -1, 0},
+/*  2 */ { 4, s_9_2, -1, -1, 0},
+/*  3 */ { 6, s_9_3, -1, -1, 0},
+/*  4 */ { 5, s_9_4, -1, 3, 0},
+/*  5 */ { 5, s_9_5, -1, 9, 0},
+/*  6 */ { 6, s_9_6, -1, 7, 0},
+/*  7 */ { 4, s_9_7, -1, -1, 0},
+/*  8 */ { 4, s_9_8, -1, 6, 0},
+/*  9 */ { 5, s_9_9, -1, 4, 0},
+/* 10 */ { 4, s_9_10, -1, -1, 0},
+/* 11 */ { 4, s_9_11, -1, 10, 0},
+/* 12 */ { 6, s_9_12, -1, 11, 0},
+/* 13 */ { 5, s_9_13, -1, 2, 0},
+/* 14 */ { 4, s_9_14, -1, 1, 0},
+/* 15 */ { 3, s_9_15, -1, -1, 0},
+/* 16 */ { 5, s_9_16, -1, 5, 0},
+/* 17 */ { 4, s_9_17, -1, 8, 0}
+};
+
+static unsigned char g_v[] = { 17, 65, 16, 1 };
+
+static unsigned char g_v_WXY[] = { 1, 17, 65, 208, 1 };
+
+static unsigned char g_valid_LI[] = { 55, 141, 2 };
+
+static symbol s_0[] = { 'y' };
+static symbol s_1[] = { 'Y' };
+static symbol s_2[] = { 'y' };
+static symbol s_3[] = { 'Y' };
+static symbol s_4[] = { 's', 's' };
+static symbol s_5[] = { 'i', 'e' };
+static symbol s_6[] = { 'i' };
+static symbol s_7[] = { 'e', 'e' };
+static symbol s_8[] = { 'e' };
+static symbol s_9[] = { 'e' };
+static symbol s_10[] = { 'y' };
+static symbol s_11[] = { 'Y' };
+static symbol s_12[] = { 'i' };
+static symbol s_13[] = { 't', 'i', 'o', 'n' };
+static symbol s_14[] = { 'e', 'n', 'c', 'e' };
+static symbol s_15[] = { 'a', 'n', 'c', 'e' };
+static symbol s_16[] = { 'a', 'b', 'l', 'e' };
+static symbol s_17[] = { 'e', 'n', 't' };
+static symbol s_18[] = { 'i', 'z', 'e' };
+static symbol s_19[] = { 'a', 't', 'e' };
+static symbol s_20[] = { 'a', 'l' };
+static symbol s_21[] = { 'f', 'u', 'l' };
+static symbol s_22[] = { 'o', 'u', 's' };
+static symbol s_23[] = { 'i', 'v', 'e' };
+static symbol s_24[] = { 'b', 'l', 'e' };
+static symbol s_25[] = { 'l' };
+static symbol s_26[] = { 'o', 'g' };
+static symbol s_27[] = { 'f', 'u', 'l' };
+static symbol s_28[] = { 'l', 'e', 's', 's' };
+static symbol s_29[] = { 't', 'i', 'o', 'n' };
+static symbol s_30[] = { 'a', 't', 'e' };
+static symbol s_31[] = { 'a', 'l' };
+static symbol s_32[] = { 'i', 'c' };
+static symbol s_33[] = { 's' };
+static symbol s_34[] = { 't' };
+static symbol s_35[] = { 'l' };
+static symbol s_36[] = { 's', 'k', 'i' };
+static symbol s_37[] = { 's', 'k', 'y' };
+static symbol s_38[] = { 'd', 'i', 'e' };
+static symbol s_39[] = { 'l', 'i', 'e' };
+static symbol s_40[] = { 't', 'i', 'e' };
+static symbol s_41[] = { 'i', 'd', 'l' };
+static symbol s_42[] = { 'g', 'e', 'n', 't', 'l' };
+static symbol s_43[] = { 'u', 'g', 'l', 'i' };
+static symbol s_44[] = { 'e', 'a', 'r', 'l', 'i' };
+static symbol s_45[] = { 'o', 'n', 'l', 'i' };
+static symbol s_46[] = { 's', 'i', 'n', 'g', 'l' };
+static symbol s_47[] = { 'Y' };
+static symbol s_48[] = { 'y' };
+
+static int r_prelude(struct SN_env * z) {
+    z->B[0] = 0; /* unset Y_found, line 24 */
+    {   int c = z->c; /* do, line 25 */
+        z->bra = z->c; /* [, line 25 */
+        if (!(eq_s(z, 1, s_0))) goto lab0;
+        z->ket = z->c; /* ], line 25 */
+        if (!(in_grouping(z, g_v, 97, 121))) goto lab0;
+        slice_from_s(z, 1, s_1); /* <-, line 25 */
+        z->B[0] = 1; /* set Y_found, line 25 */
+    lab0:
+        z->c = c;
+    }
+    {   int c = z->c; /* do, line 26 */
+        while(1) { /* repeat, line 26 */
+            int c = z->c;
+            while(1) { /* goto, line 26 */
+                int c = z->c;
+                if (!(in_grouping(z, g_v, 97, 121))) goto lab3;
+                z->bra = z->c; /* [, line 26 */
+                if (!(eq_s(z, 1, s_2))) goto lab3;
+                z->ket = z->c; /* ], line 26 */
+                z->c = c;
+                break;
+            lab3:
+                z->c = c;
+                if (z->c >= z->l) goto lab2;
+                z->c++;
+            }
+            slice_from_s(z, 1, s_3); /* <-, line 26 */
+            z->B[0] = 1; /* set Y_found, line 26 */
+            continue;
+        lab2:
+            z->c = c;
+            break;
+        }
+    lab1:
+        z->c = c;
+    }
+    return 1;
+}
+
+static int r_mark_regions(struct SN_env * z) {
+    z->I[0] = z->l;
+    z->I[1] = z->l;
+    {   int c = z->c; /* do, line 32 */
+        {   int c = z->c; /* or, line 36 */
+            if (!(find_among(z, a_0, 1))) goto lab2; /* among, line 33 */
+            goto lab1;
+        lab2:
+            z->c = c;
+            while(1) { /* gopast, line 36 */
+                if (!(in_grouping(z, g_v, 97, 121))) goto lab3;
+                break;
+            lab3:
+                if (z->c >= z->l) goto lab0;
+                z->c++;
+            }
+            while(1) { /* gopast, line 36 */
+                if (!(out_grouping(z, g_v, 97, 121))) goto lab4;
+                break;
+            lab4:
+                if (z->c >= z->l) goto lab0;
+                z->c++;
+            }
+        }
+    lab1:
+        z->I[0] = z->c; /* setmark p1, line 37 */
+        while(1) { /* gopast, line 38 */
+            if (!(in_grouping(z, g_v, 97, 121))) goto lab5;
+            break;
+        lab5:
+            if (z->c >= z->l) goto lab0;
+            z->c++;
+        }
+        while(1) { /* gopast, line 38 */
+            if (!(out_grouping(z, g_v, 97, 121))) goto lab6;
+            break;
+        lab6:
+            if (z->c >= z->l) goto lab0;
+            z->c++;
+        }
+        z->I[1] = z->c; /* setmark p2, line 38 */
+    lab0:
+        z->c = c;
+    }
+    return 1;
+}
+
+static int r_shortv(struct SN_env * z) {
+    {   int m = z->l - z->c; /* or, line 46 */
+        if (!(out_grouping_b(z, g_v_WXY, 89, 121))) goto lab1;
+        if (!(in_grouping_b(z, g_v, 97, 121))) goto lab1;
+        if (!(out_grouping_b(z, g_v, 97, 121))) goto lab1;
+        goto lab0;
+    lab1:
+        z->c = z->l - m;
+        if (!(out_grouping_b(z, g_v, 97, 121))) return 0;
+        if (!(in_grouping_b(z, g_v, 97, 121))) return 0;
+        if (z->c > z->lb) return 0; /* atlimit, line 47 */
+    }
+lab0:
+    return 1;
+}
+
+static int r_R1(struct SN_env * z) {
+    if (!(z->I[0] <= z->c)) return 0;
+    return 1;
+}
+
+static int r_R2(struct SN_env * z) {
+    if (!(z->I[1] <= z->c)) return 0;
+    return 1;
+}
+
+static int r_Step_1a(struct SN_env * z) {
+    int among_var;
+    z->ket = z->c; /* [, line 54 */
+    among_var = find_among_b(z, a_1, 6); /* substring, line 54 */
+    if (!(among_var)) return 0;
+    z->bra = z->c; /* ], line 54 */
+    switch(among_var) {
+        case 0: return 0;
+        case 1:
+            slice_from_s(z, 2, s_4); /* <-, line 55 */
+            break;
+        case 2:
+            {   int m = z->l - z->c; /* or, line 57 */
+                if (z->c <= z->lb) goto lab1;
+                z->c--; /* next, line 57 */
+                if (z->c > z->lb) goto lab1; /* atlimit, line 57 */
+                slice_from_s(z, 2, s_5); /* <-, line 57 */
+                goto lab0;
+            lab1:
+                z->c = z->l - m;
+                slice_from_s(z, 1, s_6); /* <-, line 57 */
+            }
+        lab0:
+            break;
+        case 3:
+            if (z->c <= z->lb) return 0;
+            z->c--; /* next, line 58 */
+            while(1) { /* gopast, line 58 */
+                if (!(in_grouping_b(z, g_v, 97, 121))) goto lab2;
+                break;
+            lab2:
+                if (z->c <= z->lb) return 0;
+                z->c--;
+            }
+            slice_del(z); /* delete, line 58 */
+            break;
+    }
+    return 1;
+}
+
+static int r_Step_1b(struct SN_env * z) {
+    int among_var;
+    z->ket = z->c; /* [, line 64 */
+    among_var = find_among_b(z, a_3, 6); /* substring, line 64 */
+    if (!(among_var)) return 0;
+    z->bra = z->c; /* ], line 64 */
+    switch(among_var) {
+        case 0: return 0;
+        case 1:
+            if (!r_R1(z)) return 0; /* call R1, line 66 */
+            slice_from_s(z, 2, s_7); /* <-, line 66 */
+            break;
+        case 2:
+            {   int m_test = z->l - z->c; /* test, line 69 */
+                while(1) { /* gopast, line 69 */
+                    if (!(in_grouping_b(z, g_v, 97, 121))) goto lab0;
+                    break;
+                lab0:
+                    if (z->c <= z->lb) return 0;
+                    z->c--;
+                }
+                z->c = z->l - m_test;
+            }
+            slice_del(z); /* delete, line 69 */
+            {   int m_test = z->l - z->c; /* test, line 70 */
+                among_var = find_among_b(z, a_2, 13); /* substring, line 70 */
+                if (!(among_var)) return 0;
+                z->c = z->l - m_test;
+            }
+            switch(among_var) {
+                case 0: return 0;
+                case 1:
+                    {   int c = z->c;
+                        insert_s(z, z->c, z->c, 1, s_8); /* <+, line 72 */
+                        z->c = c;
+                    }
+                    break;
+                case 2:
+                    z->ket = z->c; /* [, line 75 */
+                    if (z->c <= z->lb) return 0;
+                    z->c--; /* next, line 75 */
+                    z->bra = z->c; /* ], line 75 */
+                    slice_del(z); /* delete, line 75 */
+                    break;
+                case 3:
+                    if (z->c != z->I[0]) return 0; /* atmark, line 76 */
+                    {   int m_test = z->l - z->c; /* test, line 76 */
+                        if (!r_shortv(z)) return 0; /* call shortv, line 76 */
+                        z->c = z->l - m_test;
+                    }
+                    {   int c = z->c;
+                        insert_s(z, z->c, z->c, 1, s_9); /* <+, line 76 */
+                        z->c = c;
+                    }
+                    break;
+            }
+            break;
+    }
+    return 1;
+}
+
+static int r_Step_1c(struct SN_env * z) {
+    z->ket = z->c; /* [, line 83 */
+    {   int m = z->l - z->c; /* or, line 83 */
+        if (!(eq_s_b(z, 1, s_10))) goto lab1;
+        goto lab0;
+    lab1:
+        z->c = z->l - m;
+        if (!(eq_s_b(z, 1, s_11))) return 0;
+    }
+lab0:
+    z->bra = z->c; /* ], line 83 */
+    if (!(out_grouping_b(z, g_v, 97, 121))) return 0;
+    {   int m = z->l - z->c; /* not, line 84 */
+        if (z->c > z->lb) goto lab2; /* atlimit, line 84 */
+        return 0;
+    lab2:
+        z->c = z->l - m;
+    }
+    slice_from_s(z, 1, s_12); /* <-, line 85 */
+    return 1;
+}
+
+static int r_Step_2(struct SN_env * z) {
+    int among_var;
+    z->ket = z->c; /* [, line 89 */
+    among_var = find_among_b(z, a_4, 24); /* substring, line 89 */
+    if (!(among_var)) return 0;
+    z->bra = z->c; /* ], line 89 */
+    if (!r_R1(z)) return 0; /* call R1, line 89 */
+    switch(among_var) {
+        case 0: return 0;
+        case 1:
+            slice_from_s(z, 4, s_13); /* <-, line 90 */
+            break;
+        case 2:
+            slice_from_s(z, 4, s_14); /* <-, line 91 */
+            break;
+        case 3:
+            slice_from_s(z, 4, s_15); /* <-, line 92 */
+            break;
+        case 4:
+            slice_from_s(z, 4, s_16); /* <-, line 93 */
+            break;
+        case 5:
+            slice_from_s(z, 3, s_17); /* <-, line 94 */
+            break;
+        case 6:
+            slice_from_s(z, 3, s_18); /* <-, line 96 */
+            break;
+        case 7:
+            slice_from_s(z, 3, s_19); /* <-, line 98 */
+            break;
+        case 8:
+            slice_from_s(z, 2, s_20); /* <-, line 100 */
+            break;
+        case 9:
+            slice_from_s(z, 3, s_21); /* <-, line 101 */
+            break;
+        case 10:
+            slice_from_s(z, 3, s_22); /* <-, line 103 */
+            break;
+        case 11:
+            slice_from_s(z, 3, s_23); /* <-, line 105 */
+            break;
+        case 12:
+            slice_from_s(z, 3, s_24); /* <-, line 107 */
+            break;
+        case 13:
+            if (!(eq_s_b(z, 1, s_25))) return 0;
+            slice_from_s(z, 2, s_26); /* <-, line 108 */
+            break;
+        case 14:
+            slice_from_s(z, 3, s_27); /* <-, line 109 */
+            break;
+        case 15:
+            slice_from_s(z, 4, s_28); /* <-, line 110 */
+            break;
+        case 16:
+            if (!(in_grouping_b(z, g_valid_LI, 99, 116))) return 0;
+            slice_del(z); /* delete, line 111 */
+            break;
+    }
+    return 1;
+}
+
+static int r_Step_3(struct SN_env * z) {
+    int among_var;
+    z->ket = z->c; /* [, line 116 */
+    among_var = find_among_b(z, a_5, 9); /* substring, line 116 */
+    if (!(among_var)) return 0;
+    z->bra = z->c; /* ], line 116 */
+    if (!r_R1(z)) return 0; /* call R1, line 116 */
+    switch(among_var) {
+        case 0: return 0;
+        case 1:
+            slice_from_s(z, 4, s_29); /* <-, line 117 */
+            break;
+        case 2:
+            slice_from_s(z, 3, s_30); /* <-, line 118 */
+            break;
+        case 3:
+            slice_from_s(z, 2, s_31); /* <-, line 119 */
+            break;
+        case 4:
+            slice_from_s(z, 2, s_32); /* <-, line 121 */
+            break;
+        case 5:
+            slice_del(z); /* delete, line 123 */
+            break;
+        case 6:
+            if (!r_R2(z)) return 0; /* call R2, line 125 */
+            slice_del(z); /* delete, line 125 */
+            break;
+    }
+    return 1;
+}
+
+static int r_Step_4(struct SN_env * z) {
+    int among_var;
+    z->ket = z->c; /* [, line 130 */
+    among_var = find_among_b(z, a_6, 18); /* substring, line 130 */
+    if (!(among_var)) return 0;
+    z->bra = z->c; /* ], line 130 */
+    if (!r_R2(z)) return 0; /* call R2, line 130 */
+    switch(among_var) {
+        case 0: return 0;
+        case 1:
+            slice_del(z); /* delete, line 133 */
+            break;
+        case 2:
+            {   int m = z->l - z->c; /* or, line 134 */
+                if (!(eq_s_b(z, 1, s_33))) goto lab1;
+                goto lab0;
+            lab1:
+                z->c = z->l - m;
+                if (!(eq_s_b(z, 1, s_34))) return 0;
+            }
+        lab0:
+            slice_del(z); /* delete, line 134 */
+            break;
+    }
+    return 1;
+}
+
+static int r_Step_5(struct SN_env * z) {
+    int among_var;
+    z->ket = z->c; /* [, line 139 */
+    among_var = find_among_b(z, a_7, 2); /* substring, line 139 */
+    if (!(among_var)) return 0;
+    z->bra = z->c; /* ], line 139 */
+    switch(among_var) {
+        case 0: return 0;
+        case 1:
+            {   int m = z->l - z->c; /* or, line 140 */
+                if (!r_R2(z)) goto lab1; /* call R2, line 140 */
+                goto lab0;
+            lab1:
+                z->c = z->l - m;
+                if (!r_R1(z)) return 0; /* call R1, line 140 */
+                {   int m = z->l - z->c; /* not, line 140 */
+                    if (!r_shortv(z)) goto lab2; /* call shortv, line 140 */
+                    return 0;
+                lab2:
+                    z->c = z->l - m;
+                }
+            }
+        lab0:
+            slice_del(z); /* delete, line 140 */
+            break;
+        case 2:
+            if (!r_R2(z)) return 0; /* call R2, line 141 */
+            if (!(eq_s_b(z, 1, s_35))) return 0;
+            slice_del(z); /* delete, line 141 */
+            break;
+    }
+    return 1;
+}
+
+static int r_exception2(struct SN_env * z) {
+    z->ket = z->c; /* [, line 147 */
+    if (!(find_among_b(z, a_8, 8))) return 0; /* substring, line 147 */
+    z->bra = z->c; /* ], line 147 */
+    if (z->c > z->lb) return 0; /* atlimit, line 147 */
+    return 1;
+}
+
+static int r_exception1(struct SN_env * z) {
+    int among_var;
+    z->bra = z->c; /* [, line 159 */
+    among_var = find_among(z, a_9, 18); /* substring, line 159 */
+    if (!(among_var)) return 0;
+    z->ket = z->c; /* ], line 159 */
+    if (z->c < z->l) return 0; /* atlimit, line 159 */
+    switch(among_var) {
+        case 0: return 0;
+        case 1:
+            slice_from_s(z, 3, s_36); /* <-, line 163 */
+            break;
+        case 2:
+            slice_from_s(z, 3, s_37); /* <-, line 164 */
+            break;
+        case 3:
+            slice_from_s(z, 3, s_38); /* <-, line 165 */
+            break;
+        case 4:
+            slice_from_s(z, 3, s_39); /* <-, line 166 */
+            break;
+        case 5:
+            slice_from_s(z, 3, s_40); /* <-, line 167 */
+            break;
+        case 6:
+            slice_from_s(z, 3, s_41); /* <-, line 171 */
+            break;
+        case 7:
+            slice_from_s(z, 5, s_42); /* <-, line 172 */
+            break;
+        case 8:
+            slice_from_s(z, 4, s_43); /* <-, line 173 */
+            break;
+        case 9:
+            slice_from_s(z, 5, s_44); /* <-, line 174 */
+            break;
+        case 10:
+            slice_from_s(z, 4, s_45); /* <-, line 175 */
+            break;
+        case 11:
+            slice_from_s(z, 5, s_46); /* <-, line 176 */
+            break;
+    }
+    return 1;
+}
+
+static int r_postlude(struct SN_env * z) {
+    if (!(z->B[0])) return 0; /* Boolean test Y_found, line 192 */
+    while(1) { /* repeat, line 192 */
+        int c = z->c;
+        while(1) { /* goto, line 192 */
+            int c = z->c;
+            z->bra = z->c; /* [, line 192 */
+            if (!(eq_s(z, 1, s_47))) goto lab1;
+            z->ket = z->c; /* ], line 192 */
+            z->c = c;
+            break;
+        lab1:
+            z->c = c;
+            if (z->c >= z->l) goto lab0;
+            z->c++;
+        }
+        slice_from_s(z, 1, s_48); /* <-, line 192 */
+        continue;
+    lab0:
+        z->c = c;
+        break;
+    }
+    return 1;
+}
+
+extern int english_stem(struct SN_env * z) {
+    {   int c = z->c; /* or, line 196 */
+        if (!r_exception1(z)) goto lab1; /* call exception1, line 196 */
+        goto lab0;
+    lab1:
+        z->c = c;
+        {   int c_test = z->c; /* test, line 198 */
+            {   int c = z->c + 3;
+                if (0 > c || c > z->l) return 0;
+                z->c = c; /* hop, line 198 */
+            }
+            z->c = c_test;
+        }
+        {   int c = z->c; /* do, line 199 */
+            if (!r_prelude(z)) goto lab2; /* call prelude, line 199 */
+        lab2:
+            z->c = c;
+        }
+        {   int c = z->c; /* do, line 200 */
+            if (!r_mark_regions(z)) goto lab3; /* call mark_regions, line 200 */
+        lab3:
+            z->c = c;
+        }
+        z->lb = z->c; z->c = z->l; /* backwards, line 201 */
+
+        {   int m = z->l - z->c; /* do, line 203 */
+            if (!r_Step_1a(z)) goto lab4; /* call Step_1a, line 203 */
+        lab4:
+            z->c = z->l - m;
+        }
+        {   int m = z->l - z->c; /* or, line 205 */
+            if (!r_exception2(z)) goto lab6; /* call exception2, line 205 */
+            goto lab5;
+        lab6:
+            z->c = z->l - m;
+            {   int m = z->l - z->c; /* do, line 207 */
+                if (!r_Step_1b(z)) goto lab7; /* call Step_1b, line 207 */
+            lab7:
+                z->c = z->l - m;
+            }
+            {   int m = z->l - z->c; /* do, line 208 */
+                if (!r_Step_1c(z)) goto lab8; /* call Step_1c, line 208 */
+            lab8:
+                z->c = z->l - m;
+            }
+            {   int m = z->l - z->c; /* do, line 210 */
+                if (!r_Step_2(z)) goto lab9; /* call Step_2, line 210 */
+            lab9:
+                z->c = z->l - m;
+            }
+            {   int m = z->l - z->c; /* do, line 211 */
+                if (!r_Step_3(z)) goto lab10; /* call Step_3, line 211 */
+            lab10:
+                z->c = z->l - m;
+            }
+            {   int m = z->l - z->c; /* do, line 212 */
+                if (!r_Step_4(z)) goto lab11; /* call Step_4, line 212 */
+            lab11:
+                z->c = z->l - m;
+            }
+            {   int m = z->l - z->c; /* do, line 214 */
+                if (!r_Step_5(z)) goto lab12; /* call Step_5, line 214 */
+            lab12:
+                z->c = z->l - m;
+            }
+        }
+    lab5:
+        z->c = z->lb;
+        {   int c = z->c; /* do, line 217 */
+            if (!r_postlude(z)) goto lab13; /* call postlude, line 217 */
+        lab13:
+            z->c = c;
+        }
+    }
+lab0:
+    return 1;
+}
+
+extern struct SN_env * english_create_env(void) { return SN_create_env(0, 2, 1); }
+
+extern void english_close_env(struct SN_env * z) { SN_close_env(z); }
+


diff --git a/contrib/tsearch2/snowball/english_stem.h b/contrib/tsearch2/snowball/english_stem.h

new file mode 100644 (file)

index 0000000..bfefcd5


--- /dev/null
+++ b/contrib/tsearch2/snowball/english_stem.h
@@ -0,0 +1,8 @@
+
+/* This file was generated automatically by the Snowball to ANSI C compiler */
+
+extern struct SN_env * english_create_env(void);
+extern void english_close_env(struct SN_env * z);
+
+extern int english_stem(struct SN_env * z);
+


diff --git a/contrib/tsearch2/snowball/header.h b/contrib/tsearch2/snowball/header.h

new file mode 100644 (file)

index 0000000..aaec3ae


--- /dev/null
+++ b/contrib/tsearch2/snowball/header.h
@@ -0,0 +1,57 @@
+
+#include 
+
+#include "api.h"
+
+#define MAXINT INT_MAX
+#define MININT INT_MIN
+
+#define HEAD 2*sizeof(int)
+
+#define SIZE(p)        ((int *)(p))[-1]
+#define SET_SIZE(p, n) ((int *)(p))[-1] = n
+#define CAPACITY(p)    ((int *)(p))[-2]
+
+struct among
+{   int s_size;     /* number of chars in string */
+    symbol * s;       /* search string */
+    int substring_i;/* index to longest matching substring */
+    int result;     /* result of the lookup */
+    int (* function)(struct SN_env *);
+};
+
+extern symbol * create_s(void);
+extern void lose_s(symbol * p);
+
+extern int in_grouping(struct SN_env * z, unsigned char * s, int min, int max);
+extern int in_grouping_b(struct SN_env * z, unsigned char * s, int min, int max);
+extern int out_grouping(struct SN_env * z, unsigned char * s, int min, int max);
+extern int out_grouping_b(struct SN_env * z, unsigned char * s, int min, int max);
+
+extern int in_range(struct SN_env * z, int min, int max);
+extern int in_range_b(struct SN_env * z, int min, int max);
+extern int out_range(struct SN_env * z, int min, int max);
+extern int out_range_b(struct SN_env * z, int min, int max);
+
+extern int eq_s(struct SN_env * z, int s_size, symbol * s);
+extern int eq_s_b(struct SN_env * z, int s_size, symbol * s);
+extern int eq_v(struct SN_env * z, symbol * p);
+extern int eq_v_b(struct SN_env * z, symbol * p);
+
+extern int find_among(struct SN_env * z, struct among * v, int v_size);
+extern int find_among_b(struct SN_env * z, struct among * v, int v_size);
+
+extern symbol * increase_size(symbol * p, int n);
+extern int replace_s(struct SN_env * z, int c_bra, int c_ket, int s_size, const symbol * s);
+extern void slice_from_s(struct SN_env * z, int s_size, symbol * s);
+extern void slice_from_v(struct SN_env * z, symbol * p);
+extern void slice_del(struct SN_env * z);
+
+extern void insert_s(struct SN_env * z, int bra, int ket, int s_size, symbol * s);
+extern void insert_v(struct SN_env * z, int bra, int ket, symbol * p);
+
+extern symbol * slice_to(struct SN_env * z, symbol * p);
+extern symbol * assign_to(struct SN_env * z, symbol * p);
+
+extern void debug(struct SN_env * z, int number, int line_count);
+


diff --git a/contrib/tsearch2/snowball/russian_stem.c b/contrib/tsearch2/snowball/russian_stem.c

new file mode 100644 (file)

index 0000000..14fd491


--- /dev/null
+++ b/contrib/tsearch2/snowball/russian_stem.c
@@ -0,0 +1,626 @@
+
+/* This file was generated automatically by the Snowball to ANSI C compiler */
+
+#include "header.h"
+
+extern int russian_stem(struct SN_env * z);
+static int r_tidy_up(struct SN_env * z);
+static int r_derivational(struct SN_env * z);
+static int r_noun(struct SN_env * z);
+static int r_verb(struct SN_env * z);
+static int r_reflexive(struct SN_env * z);
+static int r_adjectival(struct SN_env * z);
+static int r_adjective(struct SN_env * z);
+static int r_perfective_gerund(struct SN_env * z);
+static int r_R2(struct SN_env * z);
+static int r_mark_regions(struct SN_env * z);
+
+extern struct SN_env * russian_create_env(void);
+extern void russian_close_env(struct SN_env * z);
+
+static symbol s_0_0[3] = { 215, 219, 201 };
+static symbol s_0_1[4] = { 201, 215, 219, 201 };
+static symbol s_0_2[4] = { 217, 215, 219, 201 };
+static symbol s_0_3[1] = { 215 };
+static symbol s_0_4[2] = { 201, 215 };
+static symbol s_0_5[2] = { 217, 215 };
+static symbol s_0_6[5] = { 215, 219, 201, 211, 216 };
+static symbol s_0_7[6] = { 201, 215, 219, 201, 211, 216 };
+static symbol s_0_8[6] = { 217, 215, 219, 201, 211, 216 };
+
+static struct among a_0[9] =
+{
+/*  0 */ { 3, s_0_0, -1, 1, 0},
+/*  1 */ { 4, s_0_1, 0, 2, 0},
+/*  2 */ { 4, s_0_2, 0, 2, 0},
+/*  3 */ { 1, s_0_3, -1, 1, 0},
+/*  4 */ { 2, s_0_4, 3, 2, 0},
+/*  5 */ { 2, s_0_5, 3, 2, 0},
+/*  6 */ { 5, s_0_6, -1, 1, 0},
+/*  7 */ { 6, s_0_7, 6, 2, 0},
+/*  8 */ { 6, s_0_8, 6, 2, 0}
+};
+
+static symbol s_1_0[2] = { 192, 192 };
+static symbol s_1_1[2] = { 197, 192 };
+static symbol s_1_2[2] = { 207, 192 };
+static symbol s_1_3[2] = { 213, 192 };
+static symbol s_1_4[2] = { 197, 197 };
+static symbol s_1_5[2] = { 201, 197 };
+static symbol s_1_6[2] = { 207, 197 };
+static symbol s_1_7[2] = { 217, 197 };
+static symbol s_1_8[2] = { 201, 200 };
+static symbol s_1_9[2] = { 217, 200 };
+static symbol s_1_10[3] = { 201, 205, 201 };
+static symbol s_1_11[3] = { 217, 205, 201 };
+static symbol s_1_12[2] = { 197, 202 };
+static symbol s_1_13[2] = { 201, 202 };
+static symbol s_1_14[2] = { 207, 202 };
+static symbol s_1_15[2] = { 217, 202 };
+static symbol s_1_16[2] = { 197, 205 };
+static symbol s_1_17[2] = { 201, 205 };
+static symbol s_1_18[2] = { 207, 205 };
+static symbol s_1_19[2] = { 217, 205 };
+static symbol s_1_20[3] = { 197, 199, 207 };
+static symbol s_1_21[3] = { 207, 199, 207 };
+static symbol s_1_22[2] = { 193, 209 };
+static symbol s_1_23[2] = { 209, 209 };
+static symbol s_1_24[3] = { 197, 205, 213 };
+static symbol s_1_25[3] = { 207, 205, 213 };
+
+static struct among a_1[26] =
+{
+/*  0 */ { 2, s_1_0, -1, 1, 0},
+/*  1 */ { 2, s_1_1, -1, 1, 0},
+/*  2 */ { 2, s_1_2, -1, 1, 0},
+/*  3 */ { 2, s_1_3, -1, 1, 0},
+/*  4 */ { 2, s_1_4, -1, 1, 0},
+/*  5 */ { 2, s_1_5, -1, 1, 0},
+/*  6 */ { 2, s_1_6, -1, 1, 0},
+/*  7 */ { 2, s_1_7, -1, 1, 0},
+/*  8 */ { 2, s_1_8, -1, 1, 0},
+/*  9 */ { 2, s_1_9, -1, 1, 0},
+/* 10 */ { 3, s_1_10, -1, 1, 0},
+/* 11 */ { 3, s_1_11, -1, 1, 0},
+/* 12 */ { 2, s_1_12, -1, 1, 0},
+/* 13 */ { 2, s_1_13, -1, 1, 0},
+/* 14 */ { 2, s_1_14, -1, 1, 0},
+/* 15 */ { 2, s_1_15, -1, 1, 0},
+/* 16 */ { 2, s_1_16, -1, 1, 0},
+/* 17 */ { 2, s_1_17, -1, 1, 0},
+/* 18 */ { 2, s_1_18, -1, 1, 0},
+/* 19 */ { 2, s_1_19, -1, 1, 0},
+/* 20 */ { 3, s_1_20, -1, 1, 0},
+/* 21 */ { 3, s_1_21, -1, 1, 0},
+/* 22 */ { 2, s_1_22, -1, 1, 0},
+/* 23 */ { 2, s_1_23, -1, 1, 0},
+/* 24 */ { 3, s_1_24, -1, 1, 0},
+/* 25 */ { 3, s_1_25, -1, 1, 0}
+};
+
+static symbol s_2_0[2] = { 197, 205 };
+static symbol s_2_1[2] = { 206, 206 };
+static symbol s_2_2[2] = { 215, 219 };
+static symbol s_2_3[3] = { 201, 215, 219 };
+static symbol s_2_4[3] = { 217, 215, 219 };
+static symbol s_2_5[1] = { 221 };
+static symbol s_2_6[2] = { 192, 221 };
+static symbol s_2_7[3] = { 213, 192, 221 };
+
+static struct among a_2[8] =
+{
+/*  0 */ { 2, s_2_0, -1, 1, 0},
+/*  1 */ { 2, s_2_1, -1, 1, 0},
+/*  2 */ { 2, s_2_2, -1, 1, 0},
+/*  3 */ { 3, s_2_3, 2, 2, 0},
+/*  4 */ { 3, s_2_4, 2, 2, 0},
+/*  5 */ { 1, s_2_5, -1, 1, 0},
+/*  6 */ { 2, s_2_6, 5, 1, 0},
+/*  7 */ { 3, s_2_7, 6, 2, 0}
+};
+
+static symbol s_3_0[2] = { 211, 209 };
+static symbol s_3_1[2] = { 211, 216 };
+
+static struct among a_3[2] =
+{
+/*  0 */ { 2, s_3_0, -1, 1, 0},
+/*  1 */ { 2, s_3_1, -1, 1, 0}
+};
+
+static symbol s_4_0[1] = { 192 };
+static symbol s_4_1[2] = { 213, 192 };
+static symbol s_4_2[2] = { 204, 193 };
+static symbol s_4_3[3] = { 201, 204, 193 };
+static symbol s_4_4[3] = { 217, 204, 193 };
+static symbol s_4_5[2] = { 206, 193 };
+static symbol s_4_6[3] = { 197, 206, 193 };
+static symbol s_4_7[3] = { 197, 212, 197 };
+static symbol s_4_8[3] = { 201, 212, 197 };
+static symbol s_4_9[3] = { 202, 212, 197 };
+static symbol s_4_10[4] = { 197, 202, 212, 197 };
+static symbol s_4_11[4] = { 213, 202, 212, 197 };
+static symbol s_4_12[2] = { 204, 201 };
+static symbol s_4_13[3] = { 201, 204, 201 };
+static symbol s_4_14[3] = { 217, 204, 201 };
+static symbol s_4_15[1] = { 202 };
+static symbol s_4_16[2] = { 197, 202 };
+static symbol s_4_17[2] = { 213, 202 };
+static symbol s_4_18[1] = { 204 };
+static symbol s_4_19[2] = { 201, 204 };
+static symbol s_4_20[2] = { 217, 204 };
+static symbol s_4_21[2] = { 197, 205 };
+static symbol s_4_22[2] = { 201, 205 };
+static symbol s_4_23[2] = { 217, 205 };
+static symbol s_4_24[1] = { 206 };
+static symbol s_4_25[2] = { 197, 206 };
+static symbol s_4_26[2] = { 204, 207 };
+static symbol s_4_27[3] = { 201, 204, 207 };
+static symbol s_4_28[3] = { 217, 204, 207 };
+static symbol s_4_29[2] = { 206, 207 };
+static symbol s_4_30[3] = { 197, 206, 207 };
+static symbol s_4_31[3] = { 206, 206, 207 };
+static symbol s_4_32[2] = { 192, 212 };
+static symbol s_4_33[3] = { 213, 192, 212 };
+static symbol s_4_34[2] = { 197, 212 };
+static symbol s_4_35[3] = { 213, 197, 212 };
+static symbol s_4_36[2] = { 201, 212 };
+static symbol s_4_37[2] = { 209, 212 };
+static symbol s_4_38[2] = { 217, 212 };
+static symbol s_4_39[2] = { 212, 216 };
+static symbol s_4_40[3] = { 201, 212, 216 };
+static symbol s_4_41[3] = { 217, 212, 216 };
+static symbol s_4_42[3] = { 197, 219, 216 };
+static symbol s_4_43[3] = { 201, 219, 216 };
+static symbol s_4_44[2] = { 206, 217 };
+static symbol s_4_45[3] = { 197, 206, 217 };
+
+static struct among a_4[46] =
+{
+/*  0 */ { 1, s_4_0, -1, 2, 0},
+/*  1 */ { 2, s_4_1, 0, 2, 0},
+/*  2 */ { 2, s_4_2, -1, 1, 0},
+/*  3 */ { 3, s_4_3, 2, 2, 0},
+/*  4 */ { 3, s_4_4, 2, 2, 0},
+/*  5 */ { 2, s_4_5, -1, 1, 0},
+/*  6 */ { 3, s_4_6, 5, 2, 0},
+/*  7 */ { 3, s_4_7, -1, 1, 0},
+/*  8 */ { 3, s_4_8, -1, 2, 0},
+/*  9 */ { 3, s_4_9, -1, 1, 0},
+/* 10 */ { 4, s_4_10, 9, 2, 0},
+/* 11 */ { 4, s_4_11, 9, 2, 0},
+/* 12 */ { 2, s_4_12, -1, 1, 0},
+/* 13 */ { 3, s_4_13, 12, 2, 0},
+/* 14 */ { 3, s_4_14, 12, 2, 0},
+/* 15 */ { 1, s_4_15, -1, 1, 0},
+/* 16 */ { 2, s_4_16, 15, 2, 0},
+/* 17 */ { 2, s_4_17, 15, 2, 0},
+/* 18 */ { 1, s_4_18, -1, 1, 0},
+/* 19 */ { 2, s_4_19, 18, 2, 0},
+/* 20 */ { 2, s_4_20, 18, 2, 0},
+/* 21 */ { 2, s_4_21, -1, 1, 0},
+/* 22 */ { 2, s_4_22, -1, 2, 0},
+/* 23 */ { 2, s_4_23, -1, 2, 0},
+/* 24 */ { 1, s_4_24, -1, 1, 0},
+/* 25 */ { 2, s_4_25, 24, 2, 0},
+/* 26 */ { 2, s_4_26, -1, 1, 0},
+/* 27 */ { 3, s_4_27, 26, 2, 0},
+/* 28 */ { 3, s_4_28, 26, 2, 0},
+/* 29 */ { 2, s_4_29, -1, 1, 0},
+/* 30 */ { 3, s_4_30, 29, 2, 0},
+/* 31 */ { 3, s_4_31, 29, 1, 0},
+/* 32 */ { 2, s_4_32, -1, 1, 0},
+/* 33 */ { 3, s_4_33, 32, 2, 0},
+/* 34 */ { 2, s_4_34, -1, 1, 0},
+/* 35 */ { 3, s_4_35, 34, 2, 0},
+/* 36 */ { 2, s_4_36, -1, 2, 0},
+/* 37 */ { 2, s_4_37, -1, 2, 0},
+/* 38 */ { 2, s_4_38, -1, 2, 0},
+/* 39 */ { 2, s_4_39, -1, 1, 0},
+/* 40 */ { 3, s_4_40, 39, 2, 0},
+/* 41 */ { 3, s_4_41, 39, 2, 0},
+/* 42 */ { 3, s_4_42, -1, 1, 0},
+/* 43 */ { 3, s_4_43, -1, 2, 0},
+/* 44 */ { 2, s_4_44, -1, 1, 0},
+/* 45 */ { 3, s_4_45, 44, 2, 0}
+};
+
+static symbol s_5_0[1] = { 192 };
+static symbol s_5_1[2] = { 201, 192 };
+static symbol s_5_2[2] = { 216, 192 };
+static symbol s_5_3[1] = { 193 };
+static symbol s_5_4[1] = { 197 };
+static symbol s_5_5[2] = { 201, 197 };
+static symbol s_5_6[2] = { 216, 197 };
+static symbol s_5_7[2] = { 193, 200 };
+static symbol s_5_8[2] = { 209, 200 };
+static symbol s_5_9[3] = { 201, 209, 200 };
+static symbol s_5_10[1] = { 201 };
+static symbol s_5_11[2] = { 197, 201 };
+static symbol s_5_12[2] = { 201, 201 };
+static symbol s_5_13[3] = { 193, 205, 201 };
+static symbol s_5_14[3] = { 209, 205, 201 };
+static symbol s_5_15[4] = { 201, 209, 205, 201 };
+static symbol s_5_16[1] = { 202 };
+static symbol s_5_17[2] = { 197, 202 };
+static symbol s_5_18[3] = { 201, 197, 202 };
+static symbol s_5_19[2] = { 201, 202 };
+static symbol s_5_20[2] = { 207, 202 };
+static symbol s_5_21[2] = { 193, 205 };
+static symbol s_5_22[2] = { 197, 205 };
+static symbol s_5_23[3] = { 201, 197, 205 };
+static symbol s_5_24[2] = { 207, 205 };
+static symbol s_5_25[2] = { 209, 205 };
+static symbol s_5_26[3] = { 201, 209, 205 };
+static symbol s_5_27[1] = { 207 };
+static symbol s_5_28[1] = { 209 };
+static symbol s_5_29[2] = { 201, 209 };
+static symbol s_5_30[2] = { 216, 209 };
+static symbol s_5_31[1] = { 213 };
+static symbol s_5_32[2] = { 197, 215 };
+static symbol s_5_33[2] = { 207, 215 };
+static symbol s_5_34[1] = { 216 };
+static symbol s_5_35[1] = { 217 };
+
+static struct among a_5[36] =
+{
+/*  0 */ { 1, s_5_0, -1, 1, 0},
+/*  1 */ { 2, s_5_1, 0, 1, 0},
+/*  2 */ { 2, s_5_2, 0, 1, 0},
+/*  3 */ { 1, s_5_3, -1, 1, 0},
+/*  4 */ { 1, s_5_4, -1, 1, 0},
+/*  5 */ { 2, s_5_5, 4, 1, 0},
+/*  6 */ { 2, s_5_6, 4, 1, 0},
+/*  7 */ { 2, s_5_7, -1, 1, 0},
+/*  8 */ { 2, s_5_8, -1, 1, 0},
+/*  9 */ { 3, s_5_9, 8, 1, 0},
+/* 10 */ { 1, s_5_10, -1, 1, 0},
+/* 11 */ { 2, s_5_11, 10, 1, 0},
+/* 12 */ { 2, s_5_12, 10, 1, 0},
+/* 13 */ { 3, s_5_13, 10, 1, 0},
+/* 14 */ { 3, s_5_14, 10, 1, 0},
+/* 15 */ { 4, s_5_15, 14, 1, 0},
+/* 16 */ { 1, s_5_16, -1, 1, 0},
+/* 17 */ { 2, s_5_17, 16, 1, 0},
+/* 18 */ { 3, s_5_18, 17, 1, 0},
+/* 19 */ { 2, s_5_19, 16, 1, 0},
+/* 20 */ { 2, s_5_20, 16, 1, 0},
+/* 21 */ { 2, s_5_21, -1, 1, 0},
+/* 22 */ { 2, s_5_22, -1, 1, 0},
+/* 23 */ { 3, s_5_23, 22, 1, 0},
+/* 24 */ { 2, s_5_24, -1, 1, 0},
+/* 25 */ { 2, s_5_25, -1, 1, 0},
+/* 26 */ { 3, s_5_26, 25, 1, 0},
+/* 27 */ { 1, s_5_27, -1, 1, 0},
+/* 28 */ { 1, s_5_28, -1, 1, 0},
+/* 29 */ { 2, s_5_29, 28, 1, 0},
+/* 30 */ { 2, s_5_30, 28, 1, 0},
+/* 31 */ { 1, s_5_31, -1, 1, 0},
+/* 32 */ { 2, s_5_32, -1, 1, 0},
+/* 33 */ { 2, s_5_33, -1, 1, 0},
+/* 34 */ { 1, s_5_34, -1, 1, 0},
+/* 35 */ { 1, s_5_35, -1, 1, 0}
+};
+
+static symbol s_6_0[3] = { 207, 211, 212 };
+static symbol s_6_1[4] = { 207, 211, 212, 216 };
+
+static struct among a_6[2] =
+{
+/*  0 */ { 3, s_6_0, -1, 1, 0},
+/*  1 */ { 4, s_6_1, -1, 1, 0}
+};
+
+static symbol s_7_0[4] = { 197, 202, 219, 197 };
+static symbol s_7_1[1] = { 206 };
+static symbol s_7_2[1] = { 216 };
+static symbol s_7_3[3] = { 197, 202, 219 };
+
+static struct among a_7[4] =
+{
+/*  0 */ { 4, s_7_0, -1, 1, 0},
+/*  1 */ { 1, s_7_1, -1, 2, 0},
+/*  2 */ { 1, s_7_2, -1, 3, 0},
+/*  3 */ { 3, s_7_3, -1, 1, 0}
+};
+
+static unsigned char g_v[] = { 35, 130, 34, 18 };
+
+static symbol s_0[] = { 193 };
+static symbol s_1[] = { 209 };
+static symbol s_2[] = { 193 };
+static symbol s_3[] = { 209 };
+static symbol s_4[] = { 193 };
+static symbol s_5[] = { 209 };
+static symbol s_6[] = { 206 };
+static symbol s_7[] = { 206 };
+static symbol s_8[] = { 206 };
+static symbol s_9[] = { 201 };
+
+static int r_mark_regions(struct SN_env * z) {
+    z->I[0] = z->l;
+    z->I[1] = z->l;
+    {   int c = z->c; /* do, line 100 */
+        while(1) { /* gopast, line 101 */
+            if (!(in_grouping(z, g_v, 192, 220))) goto lab1;
+            break;
+        lab1:
+            if (z->c >= z->l) goto lab0;
+            z->c++;
+        }
+        z->I[0] = z->c; /* setmark pV, line 101 */
+        while(1) { /* gopast, line 101 */
+            if (!(out_grouping(z, g_v, 192, 220))) goto lab2;
+            break;
+        lab2:
+            if (z->c >= z->l) goto lab0;
+            z->c++;
+        }
+        while(1) { /* gopast, line 102 */
+            if (!(in_grouping(z, g_v, 192, 220))) goto lab3;
+            break;
+        lab3:
+            if (z->c >= z->l) goto lab0;
+            z->c++;
+        }
+        while(1) { /* gopast, line 102 */
+            if (!(out_grouping(z, g_v, 192, 220))) goto lab4;
+            break;
+        lab4:
+            if (z->c >= z->l) goto lab0;
+            z->c++;
+        }
+        z->I[1] = z->c; /* setmark p2, line 102 */
+    lab0:
+        z->c = c;
+    }
+    return 1;
+}
+
+static int r_R2(struct SN_env * z) {
+    if (!(z->I[1] <= z->c)) return 0;
+    return 1;
+}
+
+static int r_perfective_gerund(struct SN_env * z) {
+    int among_var;
+    z->ket = z->c; /* [, line 111 */
+    among_var = find_among_b(z, a_0, 9); /* substring, line 111 */
+    if (!(among_var)) return 0;
+    z->bra = z->c; /* ], line 111 */
+    switch(among_var) {
+        case 0: return 0;
+        case 1:
+            {   int m = z->l - z->c; /* or, line 115 */
+                if (!(eq_s_b(z, 1, s_0))) goto lab1;
+                goto lab0;
+            lab1:
+                z->c = z->l - m;
+                if (!(eq_s_b(z, 1, s_1))) return 0;
+            }
+        lab0:
+            slice_del(z); /* delete, line 115 */
+            break;
+        case 2:
+            slice_del(z); /* delete, line 122 */
+            break;
+    }
+    return 1;
+}
+
+static int r_adjective(struct SN_env * z) {
+    int among_var;
+    z->ket = z->c; /* [, line 127 */
+    among_var = find_among_b(z, a_1, 26); /* substring, line 127 */
+    if (!(among_var)) return 0;
+    z->bra = z->c; /* ], line 127 */
+    switch(among_var) {
+        case 0: return 0;
+        case 1:
+            slice_del(z); /* delete, line 136 */
+            break;
+    }
+    return 1;
+}
+
+static int r_adjectival(struct SN_env * z) {
+    int among_var;
+    if (!r_adjective(z)) return 0; /* call adjective, line 141 */
+    {   int m = z->l - z->c; /* try, line 148 */
+        z->ket = z->c; /* [, line 149 */
+        among_var = find_among_b(z, a_2, 8); /* substring, line 149 */
+        if (!(among_var)) { z->c = z->l - m; goto lab0; }
+        z->bra = z->c; /* ], line 149 */
+        switch(among_var) {
+            case 0: { z->c = z->l - m; goto lab0; }
+            case 1:
+                {   int m = z->l - z->c; /* or, line 154 */
+                    if (!(eq_s_b(z, 1, s_2))) goto lab2;
+                    goto lab1;
+                lab2:
+                    z->c = z->l - m;
+                    if (!(eq_s_b(z, 1, s_3))) { z->c = z->l - m; goto lab0; }
+                }
+            lab1:
+                slice_del(z); /* delete, line 154 */
+                break;
+            case 2:
+                slice_del(z); /* delete, line 161 */
+                break;
+        }
+    lab0:
+        ;
+    }
+    return 1;
+}
+
+static int r_reflexive(struct SN_env * z) {
+    int among_var;
+    z->ket = z->c; /* [, line 168 */
+    among_var = find_among_b(z, a_3, 2); /* substring, line 168 */
+    if (!(among_var)) return 0;
+    z->bra = z->c; /* ], line 168 */
+    switch(among_var) {
+        case 0: return 0;
+        case 1:
+            slice_del(z); /* delete, line 171 */
+            break;
+    }
+    return 1;
+}
+
+static int r_verb(struct SN_env * z) {
+    int among_var;
+    z->ket = z->c; /* [, line 176 */
+    among_var = find_among_b(z, a_4, 46); /* substring, line 176 */
+    if (!(among_var)) return 0;
+    z->bra = z->c; /* ], line 176 */
+    switch(among_var) {
+        case 0: return 0;
+        case 1:
+            {   int m = z->l - z->c; /* or, line 182 */
+                if (!(eq_s_b(z, 1, s_4))) goto lab1;
+                goto lab0;
+            lab1:
+                z->c = z->l - m;
+                if (!(eq_s_b(z, 1, s_5))) return 0;
+            }
+        lab0:
+            slice_del(z); /* delete, line 182 */
+            break;
+        case 2:
+            slice_del(z); /* delete, line 190 */
+            break;
+    }
+    return 1;
+}
+
+static int r_noun(struct SN_env * z) {
+    int among_var;
+    z->ket = z->c; /* [, line 199 */
+    among_var = find_among_b(z, a_5, 36); /* substring, line 199 */
+    if (!(among_var)) return 0;
+    z->bra = z->c; /* ], line 199 */
+    switch(among_var) {
+        case 0: return 0;
+        case 1:
+            slice_del(z); /* delete, line 206 */
+            break;
+    }
+    return 1;
+}
+
+static int r_derivational(struct SN_env * z) {
+    int among_var;
+    z->ket = z->c; /* [, line 215 */
+    among_var = find_among_b(z, a_6, 2); /* substring, line 215 */
+    if (!(among_var)) return 0;
+    z->bra = z->c; /* ], line 215 */
+    if (!r_R2(z)) return 0; /* call R2, line 215 */
+    switch(among_var) {
+        case 0: return 0;
+        case 1:
+            slice_del(z); /* delete, line 218 */
+            break;
+    }
+    return 1;
+}
+
+static int r_tidy_up(struct SN_env * z) {
+    int among_var;
+    z->ket = z->c; /* [, line 223 */
+    among_var = find_among_b(z, a_7, 4); /* substring, line 223 */
+    if (!(among_var)) return 0;
+    z->bra = z->c; /* ], line 223 */
+    switch(among_var) {
+        case 0: return 0;
+        case 1:
+            slice_del(z); /* delete, line 227 */
+            z->ket = z->c; /* [, line 228 */
+            if (!(eq_s_b(z, 1, s_6))) return 0;
+            z->bra = z->c; /* ], line 228 */
+            if (!(eq_s_b(z, 1, s_7))) return 0;
+            slice_del(z); /* delete, line 228 */
+            break;
+        case 2:
+            if (!(eq_s_b(z, 1, s_8))) return 0;
+            slice_del(z); /* delete, line 231 */
+            break;
+        case 3:
+            slice_del(z); /* delete, line 233 */
+            break;
+    }
+    return 1;
+}
+
+extern int russian_stem(struct SN_env * z) {
+    {   int c = z->c; /* do, line 240 */
+        if (!r_mark_regions(z)) goto lab0; /* call mark_regions, line 240 */
+    lab0:
+        z->c = c;
+    }
+    z->lb = z->c; z->c = z->l; /* backwards, line 241 */
+
+    {   int m = z->l - z->c; /* setlimit, line 241 */
+        int m3;
+        if (z->c < z->I[0]) return 0;
+        z->c = z->I[0]; /* tomark, line 241 */
+        m3 = z->lb; z->lb = z->c;
+        z->c = z->l - m;
+        {   int m = z->l - z->c; /* do, line 242 */
+            {   int m = z->l - z->c; /* or, line 243 */
+                if (!r_perfective_gerund(z)) goto lab3; /* call perfective_gerund, line 243 */
+                goto lab2;
+            lab3:
+                z->c = z->l - m;
+                {   int m = z->l - z->c; /* try, line 244 */
+                    if (!r_reflexive(z)) { z->c = z->l - m; goto lab4; } /* call reflexive, line 244 */
+                lab4:
+                    ;
+                }
+                {   int m = z->l - z->c; /* or, line 245 */
+                    if (!r_adjectival(z)) goto lab6; /* call adjectival, line 245 */
+                    goto lab5;
+                lab6:
+                    z->c = z->l - m;
+                    if (!r_verb(z)) goto lab7; /* call verb, line 245 */
+                    goto lab5;
+                lab7:
+                    z->c = z->l - m;
+                    if (!r_noun(z)) goto lab1; /* call noun, line 245 */
+                }
+            lab5:
+                ;
+            }
+        lab2:
+        lab1:
+            z->c = z->l - m;
+        }
+        {   int m = z->l - z->c; /* try, line 248 */
+            z->ket = z->c; /* [, line 248 */
+            if (!(eq_s_b(z, 1, s_9))) { z->c = z->l - m; goto lab8; }
+            z->bra = z->c; /* ], line 248 */
+            slice_del(z); /* delete, line 248 */
+        lab8:
+            ;
+        }
+        {   int m = z->l - z->c; /* do, line 251 */
+            if (!r_derivational(z)) goto lab9; /* call derivational, line 251 */
+        lab9:
+            z->c = z->l - m;
+        }
+        {   int m = z->l - z->c; /* do, line 252 */
+            if (!r_tidy_up(z)) goto lab10; /* call tidy_up, line 252 */
+        lab10:
+            z->c = z->l - m;
+        }
+        z->lb = m3;
+    }
+    z->c = z->lb;
+    return 1;
+}
+
+extern struct SN_env * russian_create_env(void) { return SN_create_env(0, 2, 0); }
+
+extern void russian_close_env(struct SN_env * z) { SN_close_env(z); }
+


diff --git a/contrib/tsearch2/snowball/russian_stem.h b/contrib/tsearch2/snowball/russian_stem.h

new file mode 100644 (file)

index 0000000..7dc26d4


--- /dev/null
+++ b/contrib/tsearch2/snowball/russian_stem.h
@@ -0,0 +1,8 @@
+
+/* This file was generated automatically by the Snowball to ANSI C compiler */
+
+extern struct SN_env * russian_create_env(void);
+extern void russian_close_env(struct SN_env * z);
+
+extern int russian_stem(struct SN_env * z);
+


diff --git a/contrib/tsearch2/snowball/utilities.c b/contrib/tsearch2/snowball/utilities.c

new file mode 100644 (file)

index 0000000..5dc7524


--- /dev/null
+++ b/contrib/tsearch2/snowball/utilities.c
@@ -0,0 +1,328 @@
+
+#include 
+#include 
+#include 
+
+#include "header.h"
+
+#define unless(C) if(!(C))
+
+#define CREATE_SIZE 1
+
+extern symbol * create_s(void)
+{   symbol * p = (symbol *) (HEAD + (char *) malloc(HEAD + (CREATE_SIZE + 1) * sizeof(symbol)));
+    CAPACITY(p) = CREATE_SIZE;
+    SET_SIZE(p, CREATE_SIZE);
+    return p;
+}
+
+extern void lose_s(symbol * p) { free((char *) p - HEAD); }
+
+extern int in_grouping(struct SN_env * z, unsigned char * s, int min, int max)
+{   if (z->c >= z->l) return 0;
+    {   int ch = z->p[z->c];
+        if
+        (ch > max || (ch -= min) < 0 ||
+         (s[ch >> 3] & (0X1 << (ch & 0X7))) == 0) return 0;
+    }
+    z->c++; return 1;
+}
+
+extern int in_grouping_b(struct SN_env * z, unsigned char * s, int min, int max)
+{   if (z->c <= z->lb) return 0;
+    {   int ch = z->p[z->c - 1];
+        if
+        (ch > max || (ch -= min) < 0 ||
+         (s[ch >> 3] & (0X1 << (ch & 0X7))) == 0) return 0;
+    }
+    z->c--; return 1;
+}
+
+extern int out_grouping(struct SN_env * z, unsigned char * s, int min, int max)
+{   if (z->c >= z->l) return 0;
+    {   int ch = z->p[z->c];
+        unless
+        (ch > max || (ch -= min) < 0 ||
+         (s[ch >> 3] & (0X1 << (ch & 0X7))) == 0) return 0;
+    }
+    z->c++; return 1;
+}
+
+extern int out_grouping_b(struct SN_env * z, unsigned char * s, int min, int max)
+{   if (z->c <= z->lb) return 0;
+    {   int ch = z->p[z->c - 1];
+        unless
+        (ch > max || (ch -= min) < 0 ||
+         (s[ch >> 3] & (0X1 << (ch & 0X7))) == 0) return 0;
+    }
+    z->c--; return 1;
+}
+
+
+extern int in_range(struct SN_env * z, int min, int max)
+{   if (z->c >= z->l) return 0;
+    {   int ch = z->p[z->c];
+        if
+        (ch > max || ch < min) return 0;
+    }
+    z->c++; return 1;
+}
+
+extern int in_range_b(struct SN_env * z, int min, int max)
+{   if (z->c <= z->lb) return 0;
+    {   int ch = z->p[z->c - 1];
+        if
+        (ch > max || ch < min) return 0;
+    }
+    z->c--; return 1;
+}
+
+extern int out_range(struct SN_env * z, int min, int max)
+{   if (z->c >= z->l) return 0;
+    {   int ch = z->p[z->c];
+        unless
+        (ch > max || ch < min) return 0;
+    }
+    z->c++; return 1;
+}
+
+extern int out_range_b(struct SN_env * z, int min, int max)
+{   if (z->c <= z->lb) return 0;
+    {   int ch = z->p[z->c - 1];
+        unless
+        (ch > max || ch < min) return 0;
+    }
+    z->c--; return 1;
+}
+
+extern int eq_s(struct SN_env * z, int s_size, symbol * s)
+{   if (z->l - z->c < s_size ||
+        memcmp(z->p + z->c, s, s_size * sizeof(symbol)) != 0) return 0;
+    z->c += s_size; return 1;
+}
+
+extern int eq_s_b(struct SN_env * z, int s_size, symbol * s)
+{   if (z->c - z->lb < s_size ||
+        memcmp(z->p + z->c - s_size, s, s_size * sizeof(symbol)) != 0) return 0;
+    z->c -= s_size; return 1;
+}
+
+extern int eq_v(struct SN_env * z, symbol * p)
+{   return eq_s(z, SIZE(p), p);
+}
+
+extern int eq_v_b(struct SN_env * z, symbol * p)
+{   return eq_s_b(z, SIZE(p), p);
+}
+
+extern int find_among(struct SN_env * z, struct among * v, int v_size)
+{
+    int i = 0;
+    int j = v_size;
+
+    int c = z->c; int l = z->l;
+    symbol * q = z->p + c;
+
+    struct among * w;
+
+    int common_i = 0;
+    int common_j = 0;
+
+    int first_key_inspected = 0;
+
+    while(1)
+    {   int k = i + ((j - i) >> 1);
+        int diff = 0;
+        int common = common_i < common_j ? common_i : common_j; /* smaller */
+        w = v + k;
+        {   int i; for (i = common; i < w->s_size; i++)
+            {   if (c + common == l) { diff = -1; break; }
+                diff = q[common] - w->s[i];
+                if (diff != 0) break;
+                common++;
+            }
+        }
+        if (diff < 0) { j = k; common_j = common; }
+                 else { i = k; common_i = common; }
+        if (j - i <= 1)
+        {   if (i > 0) break; /* v->s has been inspected */
+            if (j == i) break; /* only one item in v */
+
+            /* - but now we need to go round once more to get
+               v->s inspected. This looks messy, but is actually
+               the optimal approach.  */
+
+            if (first_key_inspected) break;
+            first_key_inspected = 1;
+        }
+    }
+    while(1)
+    {   w = v + i;
+        if (common_i >= w->s_size)
+        {   z->c = c + w->s_size;
+            if (w->function == 0) return w->result;
+            {   int res = w->function(z);
+                z->c = c + w->s_size;
+                if (res) return w->result;
+            }
+        }
+        i = w->substring_i;
+        if (i < 0) return 0;
+    }
+}
+
+/* find_among_b is for backwards processing. Same comments apply */
+
+extern int find_among_b(struct SN_env * z, struct among * v, int v_size)
+{
+    int i = 0;
+    int j = v_size;
+
+    int c = z->c; int lb = z->lb;
+    symbol * q = z->p + c - 1;
+
+    struct among * w;
+
+    int common_i = 0;
+    int common_j = 0;
+
+    int first_key_inspected = 0;
+
+    while(1)
+    {   int k = i + ((j - i) >> 1);
+        int diff = 0;
+        int common = common_i < common_j ? common_i : common_j;
+        w = v + k;
+        {   int i; for (i = w->s_size - 1 - common; i >= 0; i--)
+            {   if (c - common == lb) { diff = -1; break; }
+                diff = q[- common] - w->s[i];
+                if (diff != 0) break;
+                common++;
+            }
+        }
+        if (diff < 0) { j = k; common_j = common; }
+                 else { i = k; common_i = common; }
+        if (j - i <= 1)
+        {   if (i > 0) break;
+            if (j == i) break;
+            if (first_key_inspected) break;
+            first_key_inspected = 1;
+        }
+    }
+    while(1)
+    {   w = v + i;
+        if (common_i >= w->s_size)
+        {   z->c = c - w->s_size;
+            if (w->function == 0) return w->result;
+            {   int res = w->function(z);
+                z->c = c - w->s_size;
+                if (res) return w->result;
+            }
+        }
+        i = w->substring_i;
+        if (i < 0) return 0;
+    }
+}
+
+
+extern symbol * increase_size(symbol * p, int n)
+{   int new_size = n + 20;
+    symbol * q = (symbol *) (HEAD + (char *) malloc(HEAD + (new_size + 1) * sizeof(symbol)));
+    CAPACITY(q) = new_size;
+    memmove(q, p, CAPACITY(p) * sizeof(symbol)); lose_s(p); return q;
+}
+
+/* to replace symbols between c_bra and c_ket in z->p by the
+   s_size symbols at s
+*/
+
+extern int replace_s(struct SN_env * z, int c_bra, int c_ket, int s_size, const symbol * s)
+{   int adjustment = s_size - (c_ket - c_bra);
+    int len = SIZE(z->p);
+    if (adjustment != 0)
+    {   if (adjustment + len > CAPACITY(z->p)) z->p = increase_size(z->p, adjustment + len);
+        memmove(z->p + c_ket + adjustment, z->p + c_ket, (len - c_ket) * sizeof(symbol));
+        SET_SIZE(z->p, adjustment + len);
+        z->l += adjustment;
+        if (z->c >= c_ket) z->c += adjustment; else
+            if (z->c > c_bra) z->c = c_bra;
+    }
+    unless (s_size == 0) memmove(z->p + c_bra, s, s_size * sizeof(symbol));
+    return adjustment;
+}
+
+static void slice_check(struct SN_env * z)
+{
+    if (!(0 <= z->bra &&
+          z->bra <= z->ket &&
+          z->ket <= z->l &&
+          z->l <= SIZE(z->p)))   /* this line could be removed */
+    {
+        fprintf(stderr, "faulty slice operation:\n");
+        debug(z, -1, 0);
+        exit(1);
+    }
+}
+
+extern void slice_from_s(struct SN_env * z, int s_size, symbol * s)
+{   slice_check(z);
+    replace_s(z, z->bra, z->ket, s_size, s);
+}
+
+extern void slice_from_v(struct SN_env * z, symbol * p)
+{   slice_from_s(z, SIZE(p), p);
+}
+
+extern void slice_del(struct SN_env * z)
+{   slice_from_s(z, 0, 0);
+}
+
+extern void insert_s(struct SN_env * z, int bra, int ket, int s_size, symbol * s)
+{   int adjustment = replace_s(z, bra, ket, s_size, s);
+    if (bra <= z->bra) z->bra += adjustment;
+    if (bra <= z->ket) z->ket += adjustment;
+}
+
+extern void insert_v(struct SN_env * z, int bra, int ket, symbol * p)
+{   int adjustment = replace_s(z, bra, ket, SIZE(p), p);
+    if (bra <= z->bra) z->bra += adjustment;
+    if (bra <= z->ket) z->ket += adjustment;
+}
+
+extern symbol * slice_to(struct SN_env * z, symbol * p)
+{   slice_check(z);
+    {   int len = z->ket - z->bra;
+        if (CAPACITY(p) < len) p = increase_size(p, len);
+        memmove(p, z->p + z->bra, len * sizeof(symbol));
+        SET_SIZE(p, len);
+    }
+    return p;
+}
+
+extern symbol * assign_to(struct SN_env * z, symbol * p)
+{   int len = z->l;
+    if (CAPACITY(p) < len) p = increase_size(p, len);
+    memmove(p, z->p, len * sizeof(symbol));
+    SET_SIZE(p, len);
+    return p;
+}
+
+extern void debug(struct SN_env * z, int number, int line_count)
+{   int i;
+    int limit = SIZE(z->p);
+    /*if (number >= 0) printf("%3d (line %4d): '", number, line_count);*/
+    if (number >= 0) printf("%3d (line %4d): [%d]'", number, line_count,limit);
+    for (i = 0; i <= limit; i++)
+    {   if (z->lb == i) printf("{");
+        if (z->bra == i) printf("[");
+        if (z->c == i) printf("|");
+        if (z->ket == i) printf("]");
+        if (z->l == i) printf("}");
+        if (i < limit)
+        {   int ch = z->p[i];
+            if (ch == 0) ch = '#';
+            printf("%c", ch);
+        }
+    }
+    printf("'\n");
+}


diff --git a/contrib/tsearch2/sql/tsearch2.sql b/contrib/tsearch2/sql/tsearch2.sql

new file mode 100644 (file)

index 0000000..6ca6480


--- /dev/null
+++ b/contrib/tsearch2/sql/tsearch2.sql
@@ -0,0 +1,243 @@
+--
+-- first, define the datatype.  Turn off echoing so that expected file
+-- does not depend on contents of seg.sql.
+--
+\set ECHO none
+\i tsearch2.sql
+\set ECHO all
+
+--tsvector
+SELECT '1'::tsvector;
+SELECT '1 '::tsvector;
+SELECT ' 1'::tsvector;
+SELECT ' 1 '::tsvector;
+SELECT '1 2'::tsvector;
+SELECT '\'1 2\''::tsvector;
+SELECT '\'1 \\\'2\''::tsvector;
+SELECT '\'1 \\\'2\'3'::tsvector;
+SELECT '\'1 \\\'2\' 3'::tsvector;
+SELECT '\'1 \\\'2\' \' 3\' 4 '::tsvector;
+select '\'w\':4A,3B,2C,1D,5 a:8';
+select 'a:3A b:2a'::tsvector || 'ba:1234 a:1B';
+select setweight('w:12B w:13* w:12,5,6 a:1,3* a:3 w asd:1dc asd zxc:81,567,222A'::tsvector, 'c');
+select strip('w:12B w:13* w:12,5,6 a:1,3* a:3 w asd:1dc asd'::tsvector);
+
+
+--tsquery
+SELECT '1'::tsquery;
+SELECT '1 '::tsquery;
+SELECT ' 1'::tsquery;
+SELECT ' 1 '::tsquery;
+SELECT '\'1 2\''::tsquery;
+SELECT '\'1 \\\'2\''::tsquery;
+SELECT '!1'::tsquery;
+SELECT '1|2'::tsquery;
+SELECT '1|!2'::tsquery;
+SELECT '!1|2'::tsquery;
+SELECT '!1|!2'::tsquery;
+SELECT '!(!1|!2)'::tsquery;
+SELECT '!(!1|2)'::tsquery;
+SELECT '!(1|!2)'::tsquery;
+SELECT '!(1|2)'::tsquery;
+SELECT '1&2'::tsquery;
+SELECT '!1&2'::tsquery;
+SELECT '1&!2'::tsquery;
+SELECT '!1&!2'::tsquery;
+SELECT '(1&2)'::tsquery;
+SELECT '1&(2)'::tsquery;
+SELECT '!(1)&2'::tsquery;
+SELECT '!(1&2)'::tsquery;
+SELECT '1|2&3'::tsquery;
+SELECT '1|(2&3)'::tsquery;
+SELECT '(1|2)&3'::tsquery;
+SELECT '1|2&!3'::tsquery;
+SELECT '1|!2&3'::tsquery;
+SELECT '!1|2&3'::tsquery;
+SELECT '!1|(2&3)'::tsquery;
+SELECT '!(1|2)&3'::tsquery;
+SELECT '(!1|2)&3'::tsquery;
+SELECT '1|(2|(4|(5|6)))'::tsquery;
+SELECT '1|2|4|5|6'::tsquery;
+SELECT '1&(2&(4&(5&6)))'::tsquery;
+SELECT '1&2&4&5&6'::tsquery;
+SELECT '1&(2&(4&(5|6)))'::tsquery;
+SELECT '1&(2&(4&(5|!6)))'::tsquery;
+SELECT '1&(\'2\'&(\' 4\'&(\\|5 | \'6 \\\' !|&\')))'::tsquery;
+SELECT '\'the wether\':dc & \' sKies \':BC & a:d b:a';
+
+select lexize('simple', 'ASD56 hsdkf');
+select lexize('en_stem', 'SKIES Problems identity');
+
+select * from token_type('default');
+select * from parse('default', '345 [email protected] \' http://www.com/ http://aew.werc.ewr/?ad=qwe&dw 1aew.werc.ewr/?ad=qwe&dw 2aew.werc.ewr http://3aew.werc.ewr/?ad=qwe&dw http://4aew.werc.ewr http://5aew.werc.ewr:8100/?  ad=qwe&dw 6aew.werc.ewr:8100/?ad=qwe&dw 7aew.werc.ewr:8100/?ad=qwe&dw=%20%32 +4.0e-10 qwe qwe qwqwe 234.435 455 5.005 [email protected] qwe-wer asdf qwer jf sdjk ewr1> ewri2 ">
+/usr/local/fff /awdf/dwqe/4325 rewt/ewr wefjn /wqe-324/ewr gist.h gist.h.c gist.c. readline 4.2 4.2. 4.2, readline-4.2 readline-4.2. 234 
+ wow  < jqw <> qwerty');
+
+SELECT to_tsvector('default', '345 [email protected] \' http://www.com/ http://aew.werc.ewr/?ad=qwe&dw 1aew.werc.ewr/?ad=qwe&dw 2aew.werc.ewr http://3aew.werc.ewr/?ad=qwe&dw http://4aew.werc.ewr http://5aew.werc.ewr:8100/?  ad=qwe&dw 6aew.werc.ewr:8100/?ad=qwe&dw 7aew.werc.ewr:8100/?ad=qwe&dw=%20%32 +4.0e-10 qwe qwe qwqwe 234.435 455 5.005 [email protected] qwe-wer asdf qwer jf sdjk ewr1> ewri2 ">
+/usr/local/fff /awdf/dwqe/4325 rewt/ewr wefjn /wqe-324/ewr gist.h gist.h.c gist.c. readline 4.2 4.2. 4.2, readline-4.2 readline-4.2. 234 
+ wow  < jqw <> qwerty');
+
+SELECT length(to_tsvector('default', '345 qw'));
+
+SELECT length(to_tsvector('default', '345 [email protected] \' http://www.com/ http://aew.werc.ewr/?ad=qwe&dw 1aew.werc.ewr/?ad=qwe&dw 2aew.werc.ewr http://3aew.werc.ewr/?ad=qwe&dw http://4aew.werc.ewr http://5aew.werc.ewr:8100/?  ad=qwe&dw 6aew.werc.ewr:8100/?ad=qwe&dw 7aew.werc.ewr:8100/?ad=qwe&dw=%20%32 +4.0e-10 qwe qwe qwqwe 234.435 455 5.005 [email protected] qwe-wer asdf qwer jf sdjk ewr1> ewri2 ">
+/usr/local/fff /awdf/dwqe/4325 rewt/ewr wefjn /wqe-324/ewr gist.h gist.h.c gist.c. readline 4.2 4.2. 4.2, readline-4.2 readline-4.2. 234 
+ wow  < jqw <> qwerty'));
+
+
+select to_tsquery('default', 'qwe & sKies '); 
+select to_tsquery('simple', 'qwe & sKies '); 
+select to_tsquery('default', '\'the wether\':dc & \'           sKies \':BC ');
+select 'a b:89  ca:23A,64b d:34c'::tsvector @@ 'd:AC & ca';
+select 'a b:89  ca:23A,64b d:34c'::tsvector @@ 'd:AC & ca:B';
+select 'a b:89  ca:23A,64b d:34c'::tsvector @@ 'd:AC & ca:A';
+select 'a b:89  ca:23A,64b d:34c'::tsvector @@ 'd:AC & ca:C';
+select 'a b:89  ca:23A,64b d:34c'::tsvector @@ 'd:AC & ca:CB';
+
+CREATE TABLE test_tsvector( t text, a tsvector );
+
+\copy test_tsvector from 'data/test_tsearch.data'
+
+SELECT count(*) FROM test_tsvector WHERE a @@ 'wr|qh';
+SELECT count(*) FROM test_tsvector WHERE a @@ 'wr&qh';
+SELECT count(*) FROM test_tsvector WHERE a @@ 'eq&yt';
+SELECT count(*) FROM test_tsvector WHERE a @@ 'eq|yt';
+SELECT count(*) FROM test_tsvector WHERE a @@ '(eq&yt)|(wr&qh)';
+SELECT count(*) FROM test_tsvector WHERE a @@ '(eq|yt)&(wr|qh)';
+
+create index wowidx on test_tsvector using gist (a);
+set enable_seqscan=off;
+
+SELECT count(*) FROM test_tsvector WHERE a @@ 'wr|qh';
+SELECT count(*) FROM test_tsvector WHERE a @@ 'wr&qh';
+SELECT count(*) FROM test_tsvector WHERE a @@ 'eq&yt';
+SELECT count(*) FROM test_tsvector WHERE a @@ 'eq|yt';
+SELECT count(*) FROM test_tsvector WHERE a @@ '(eq&yt)|(wr&qh)';
+SELECT count(*) FROM test_tsvector WHERE a @@ '(eq|yt)&(wr|qh)';
+
+select set_curcfg('default');
+
+CREATE TRIGGER tsvectorupdate
+BEFORE UPDATE OR INSERT ON test_tsvector
+FOR EACH ROW EXECUTE PROCEDURE tsearch2(a, t);
+
+SELECT count(*) FROM test_tsvector WHERE a @@ to_tsquery('345&qwerty');
+
+INSERT INTO test_tsvector (t) VALUES ('345 qwerty');
+
+SELECT count(*) FROM test_tsvector WHERE a @@ to_tsquery('345&qwerty');
+
+UPDATE test_tsvector SET t = null WHERE t = '345 qwerty';
+
+SELECT count(*) FROM test_tsvector WHERE a @@ to_tsquery('345&qwerty');
+
+drop trigger tsvectorupdate on test_tsvector;
+create function wow(text) returns text as 'select $1 || \' copyright\'; ' language sql;
+create trigger tsvectorupdate before update or insert on test_tsvector
+for each row execute procedure tsearch2(a, wow, t);
+insert into test_tsvector (t) values ('345 qwerty');
+select count(*) FROM test_tsvector WHERE a @@ to_tsquery('345&qwerty');
+select count(*) FROM test_tsvector WHERE a @@ to_tsquery('copyright');
+
+select rank(' a:1 s:2C d g'::tsvector, 'a | s');
+select rank(' a:1 s:2B d g'::tsvector, 'a | s');
+select rank(' a:1 s:2 d g'::tsvector, 'a | s');
+select rank(' a:1 s:2C d g'::tsvector, 'a & s');
+select rank(' a:1 s:2B d g'::tsvector, 'a & s');
+select rank(' a:1 s:2 d g'::tsvector, 'a & s');
+
+insert into test_tsvector (t) values ('foo bar foo the over foo qq bar');
+select * from stat('select a from test_tsvector') order by ndoc desc, nentry desc, word;
+
+select reset_tsearch();
+select to_tsquery('default', 'skies & books');
+
+select rank_cd(to_tsvector('Erosion It took the sea a thousand years,
+A thousand years to trace
+The granite features of this cliff
+In crag and scarp and base.
+It took the sea an hour one night
+An hour of storm to place
+The sculpture of these granite seams,
+Upon a woman s face. E.  J.  Pratt  (1882 1964)
+'), to_tsquery('sea&thousand&years'));
+
+select rank_cd(to_tsvector('Erosion It took the sea a thousand years,
+A thousand years to trace
+The granite features of this cliff
+In crag and scarp and base.
+It took the sea an hour one night
+An hour of storm to place
+The sculpture of these granite seams,
+Upon a woman s face. E.  J.  Pratt  (1882 1964)
+'), to_tsquery('granite&sea'));
+
+select rank_cd(to_tsvector('Erosion It took the sea a thousand years,
+A thousand years to trace
+The granite features of this cliff
+In crag and scarp and base.
+It took the sea an hour one night
+An hour of storm to place
+The sculpture of these granite seams,
+Upon a woman s face. E.  J.  Pratt  (1882 1964)
+'), to_tsquery('sea'));
+
+select get_covers(to_tsvector('Erosion It took the sea a thousand years,
+A thousand years to trace
+The granite features of this cliff
+In crag and scarp and base.
+It took the sea an hour one night
+An hour of storm to place
+The sculpture of these granite seams,
+Upon a woman s face. E.  J.  Pratt  (1882 1964)
+'), to_tsquery('sea&thousand&years'));
+
+select get_covers(to_tsvector('Erosion It took the sea a thousand years,
+A thousand years to trace
+The granite features of this cliff
+In crag and scarp and base.
+It took the sea an hour one night
+An hour of storm to place
+The sculpture of these granite seams,
+Upon a woman s face. E.  J.  Pratt  (1882 1964)
+'), to_tsquery('granite&sea'));
+
+select get_covers(to_tsvector('Erosion It took the sea a thousand years,
+A thousand years to trace
+The granite features of this cliff
+In crag and scarp and base.
+It took the sea an hour one night
+An hour of storm to place
+The sculpture of these granite seams,
+Upon a woman s face. E.  J.  Pratt  (1882 1964)
+'), to_tsquery('sea'));
+
+select headline('Erosion It took the sea a thousand years,
+A thousand years to trace
+The granite features of this cliff
+In crag and scarp and base.
+It took the sea an hour one night
+An hour of storm to place
+The sculpture of these granite seams,
+Upon a woman s face. E.  J.  Pratt  (1882 1964)
+', to_tsquery('sea&thousand&years'));
+ 
+select headline('Erosion It took the sea a thousand years,
+A thousand years to trace
+The granite features of this cliff
+In crag and scarp and base.
+It took the sea an hour one night
+An hour of storm to place
+The sculpture of these granite seams,
+Upon a woman s face. E.  J.  Pratt  (1882 1964)
+', to_tsquery('granite&sea'));
+ 
+select headline('Erosion It took the sea a thousand years,
+A thousand years to trace
+The granite features of this cliff
+In crag and scarp and base.
+It took the sea an hour one night
+An hour of storm to place
+The sculpture of these granite seams,
+Upon a woman s face. E.  J.  Pratt  (1882 1964)
+', to_tsquery('sea'));
+


diff --git a/contrib/tsearch2/stopword.c b/contrib/tsearch2/stopword.c

new file mode 100644 (file)

index 0000000..7f7806f


--- /dev/null
+++ b/contrib/tsearch2/stopword.c
@@ -0,0 +1,101 @@
+/* 
+ * stopword library
+ * Teodor Sigaev 
+ */
+#include 
+#include 
+#include 
+#include 
+
+#include "postgres.h"
+#include "common.h"
+#include "dict.h"
+
+#define STOPBUFLEN 4096
+
+char*
+lowerstr(char *str) {
+   char *ptr=str;
+   while(*ptr) {
+       *ptr = tolower(*(unsigned char*)ptr);
+       ptr++;
+   }
+   return str;
+}
+
+void
+freestoplist(StopList *s) {
+   char **ptr=s->stop;
+   if ( ptr )
+       while( *ptr && s->len >0 ) {
+           free(*ptr);
+           ptr++; s->len--;
+       free(s->stop);
+   }
+   memset(s,0,sizeof(StopList));
+}
+
+void
+readstoplist(text *in, StopList *s) {
+   char **stop=NULL;
+   s->len=0;
+   if ( in && VARSIZE(in) - VARHDRSZ > 0 ) {
+       char *filename=text2char(in);
+       FILE    *hin=NULL;
+       char    buf[STOPBUFLEN];
+       int reallen=0;
+
+       if ( (hin=fopen(filename,"r")) == NULL )
+           elog(ERROR,"Can't open file '%s': %s", filename, strerror(errno));
+       while( fgets(buf,STOPBUFLEN,hin) ) {
+           buf[strlen(buf)-1] = '\0';
+           if ( *buf=='\0' ) continue;
+
+           if ( s->len>= reallen ) {
+               char **tmp;
+               reallen=(reallen) ? reallen*2 : 16;
+               tmp=(char**)realloc((void*)stop, sizeof(char*)*reallen);
+               if (!tmp) {
+                   freestoplist(s);
+                   fclose(hin); 
+                   elog(ERROR,"Not enough memory");
+               }
+               stop=tmp;
+           }
+    
+           stop[s->len]=strdup(buf);
+           if ( !stop[s->len] ) {
+               freestoplist(s);
+               fclose(hin); 
+               elog(ERROR,"Not enough memory");
+           }
+           if ( s->wordop ) 
+               stop[s->len]=(s->wordop)(stop[s->len]);
+
+           (s->len)++; 
+       }
+       fclose(hin);
+       pfree(filename); 
+   }
+   s->stop=stop;
+} 
+
+static int
+comparestr(const void *a, const void *b) {
+   return strcmp( *(char**)a, *(char**)b );
+}
+
+void
+sortstoplist(StopList *s) {
+   if (s->stop && s->len>0)
+       qsort(s->stop, s->len, sizeof(char*), comparestr);
+}
+
+bool
+searchstoplist(StopList *s, char *key) {
+   if ( s->wordop ) 
+       key=(*(s->wordop))(key);
+   return ( s->stop && s->len>0 && bsearch(&key, s->stop, s->len, sizeof(char*), comparestr) ) ? true : false;
+}
+
+


diff --git a/contrib/tsearch2/stopword/english.stop b/contrib/tsearch2/stopword/english.stop

new file mode 100644 (file)

index 0000000..a913011


--- /dev/null
+++ b/contrib/tsearch2/stopword/english.stop
@@ -0,0 +1,128 @@
+i
+me
+my
+myself
+we
+our
+ours
+ourselves
+you
+your
+yours
+yourself
+yourselves
+he
+him
+his
+himself
+she
+her
+hers
+herself
+it
+its
+itself
+they
+them
+their
+theirs
+themselves
+what
+which
+who
+whom
+this
+that
+these
+those
+am
+is
+are
+was
+were
+be
+been
+being
+have
+has
+had
+having
+do
+does
+did
+doing
+a
+an
+the
+and
+but
+if
+or
+because
+as
+until
+while
+of
+at
+by
+for
+with
+about
+against
+between
+into
+through
+during
+before
+after
+above
+below
+to
+from
+up
+down
+in
+out
+on
+off
+over
+under
+again
+further
+then
+once
+here
+there
+when
+where
+why
+how
+all
+any
+both
+each
+few
+more
+most
+other
+some
+such
+no
+nor
+not
+only
+own
+same
+so
+than
+too
+very
+s
+t
+can
+will
+just
+don
+should
+now
+


diff --git a/contrib/tsearch2/stopword/russian.stop b/contrib/tsearch2/stopword/russian.stop

new file mode 100644 (file)

index 0000000..1877e3a


--- /dev/null
+++ b/contrib/tsearch2/stopword/russian.stop
@@ -0,0 +1,151 @@
+É
+×
+×Ï
+ÎÅ
+ÞÔÏ
+ÏÎ
+ÎÁ
+Ñ
+Ó
+ÓÏ
+ËÁË
+Á
+ÔÏ
+×ÓÅ
+ÏÎÁ
+ÔÁË
+ÅÇÏ
+ÎÏ
+ÄÁ
+ÔÙ
+Ë
+Õ
+ÖÅ
+×Ù
+ÚÁ
+ÂÙ
+ÐÏ
+ÔÏÌØËÏ
+ÅÅ
+ÍÎÅ
+ÂÙÌÏ
+×ÏÔ
+ÏÔ
+ÍÅÎÑ
+ÅÝÅ
+ÎÅÔ
+Ï
+ÉÚ
+ÅÍÕ
+ÔÅÐÅÒØ
+ËÏÇÄÁ
+ÄÁÖÅ
+ÎÕ
+×ÄÒÕÇ
+ÌÉ
+ÅÓÌÉ
+ÕÖÅ
+ÉÌÉ
+ÎÉ
+ÂÙÔØ
+ÂÙÌ
+ÎÅÇÏ
+ÄÏ
+×ÁÓ
+ÎÉÂÕÄØ
+ÏÐÑÔØ
+ÕÖ
+×ÁÍ
+×ÅÄØ
+ÔÁÍ
+ÐÏÔÏÍ
+ÓÅÂÑ
+ÎÉÞÅÇÏ
+ÅÊ
+ÍÏÖÅÔ
+ÏÎÉ
+ÔÕÔ
+ÇÄÅ
+ÅÓÔØ
+ÎÁÄÏ
+ÎÅÊ
+ÄÌÑ
+ÍÙ
+ÔÅÂÑ
+ÉÈ
+ÞÅÍ
+ÂÙÌÁ
+ÓÁÍ
+ÞÔÏÂ
+ÂÅÚ
+ÂÕÄÔÏ
+ÞÅÇÏ
+ÒÁÚ
+ÔÏÖÅ
+ÓÅÂÅ
+ÐÏÄ
+ÂÕÄÅÔ
+Ö
+ÔÏÇÄÁ
+ËÔÏ
+ÜÔÏÔ
+ÔÏÇÏ
+ÐÏÔÏÍÕ
+ÜÔÏÇÏ
+ËÁËÏÊ
+ÓÏ×ÓÅÍ
+ÎÉÍ
+ÚÄÅÓØ
+ÜÔÏÍ
+ÏÄÉÎ
+ÐÏÞÔÉ
+ÍÏÊ
+ÔÅÍ
+ÞÔÏÂÙ
+ÎÅÅ
+ÓÅÊÞÁÓ
+ÂÙÌÉ
+ËÕÄÁ
+ÚÁÞÅÍ
+×ÓÅÈ
+ÎÉËÏÇÄÁ
+ÍÏÖÎÏ
+ÐÒÉ
+ÎÁËÏÎÅÃ
+Ä×Á
+ÏÂ
+ÄÒÕÇÏÊ
+ÈÏÔØ
+ÐÏÓÌÅ
+ÎÁÄ
+ÂÏÌØÛÅ
+ÔÏÔ
+ÞÅÒÅÚ
+ÜÔÉ
+ÎÁÓ
+ÐÒÏ
+×ÓÅÇÏ
+ÎÉÈ
+ËÁËÁÑ
+ÍÎÏÇÏ
+ÒÁÚ×Å
+ÔÒÉ
+ÜÔÕ
+ÍÏÑ
+×ÐÒÏÞÅÍ
+ÈÏÒÏÛÏ
+Ó×ÏÀ
+ÜÔÏÊ
+ÐÅÒÅÄ
+ÉÎÏÇÄÁ
+ÌÕÞÛÅ
+ÞÕÔØ
+ÔÏÍ
+ÎÅÌØÚÑ
+ÔÁËÏÊ
+ÉÍ
+ÂÏÌÅÅ
+×ÓÅÇÄÁ
+ËÏÎÅÞÎÏ
+×ÓÀ
+ÍÅÖÄÕ


diff --git a/contrib/tsearch2/ts_cfg.c b/contrib/tsearch2/ts_cfg.c

new file mode 100644 (file)

index 0000000..7c9f20c


--- /dev/null
+++ b/contrib/tsearch2/ts_cfg.c
@@ -0,0 +1,509 @@
+/* 
+ * interface functions to tscfg 
+ * Teodor Sigaev 
+ */
+#include 
+#include 
+#include 
+#include 
+#include 
+
+#include "postgres.h"
+#include "fmgr.h"
+#include "utils/array.h"
+#include "catalog/pg_type.h"
+#include "executor/spi.h"
+
+#include "ts_cfg.h"
+#include "dict.h"
+#include "wparser.h"
+#include "snmap.h"
+#include "common.h"
+#include "tsvector.h"
+
+/*********top interface**********/
+
+static void *plan_getcfg_bylocale=NULL;
+static void *plan_getcfg=NULL;
+static void *plan_getmap=NULL;
+static void *plan_name2id=NULL;
+static Oid current_cfg_id=0;
+
+void
+init_cfg(Oid id, TSCfgInfo *cfg) {
+   Oid arg[2]={ OIDOID, OIDOID };
+   bool isnull;
+   Datum pars[2]={ ObjectIdGetDatum(id), ObjectIdGetDatum(id) } ;
+   int stat,i,j;
+   text *ptr;
+   text *prsname=NULL;
+   MemoryContext   oldcontext;
+
+   memset(cfg,0,sizeof(TSCfgInfo));
+   SPI_connect();
+   if ( !plan_getcfg ) {
+       plan_getcfg = SPI_saveplan( SPI_prepare( "select prs_name from pg_ts_cfg where oid = $1" , 1, arg ) );
+       if ( !plan_getcfg ) 
+           ts_error(ERROR, "SPI_prepare() failed");
+   }
+
+   stat = SPI_execp(plan_getcfg, pars, " ", 1);
+   if ( stat < 0 )
+       ts_error (ERROR, "SPI_execp return %d", stat);
+   if ( SPI_processed > 0 ) {
+       prsname = (text*) DatumGetPointer( 
+           SPI_getbinval(SPI_tuptable->vals[0], SPI_tuptable->tupdesc, 1, &isnull) 
+       );
+       oldcontext = MemoryContextSwitchTo(TopMemoryContext);
+       prsname = ptextdup( prsname );
+       MemoryContextSwitchTo(oldcontext);
+       
+       cfg->id=id;
+   } else 
+       ts_error(ERROR, "No tsearch cfg with id %d", id);
+
+   arg[0]=TEXTOID;
+   if ( !plan_getmap ) {
+       plan_getmap = SPI_saveplan( SPI_prepare( "select lt.tokid, pg_ts_cfgmap.dict_name from pg_ts_cfgmap, pg_ts_cfg, token_type( $1 ) as lt where lt.alias = pg_ts_cfgmap.tok_alias and pg_ts_cfgmap.ts_name = pg_ts_cfg.ts_name and pg_ts_cfg.oid= $2 order by lt.tokid desc;" , 2, arg ) );
+       if ( !plan_getmap )
+           ts_error(ERROR, "SPI_prepare() failed");
+   }
+
+   pars[0]=PointerGetDatum( prsname );
+   stat = SPI_execp(plan_getmap, pars, " ", 0);
+   if ( stat < 0 )
+       ts_error (ERROR, "SPI_execp return %d", stat);
+   if ( SPI_processed <= 0 )
+       ts_error(ERROR, "No parser with id %d", id);
+
+   for(i=0;i
+       int lexid = DatumGetInt32(SPI_getbinval(SPI_tuptable->vals[i], SPI_tuptable->tupdesc, 1, &isnull));
+       ArrayType *toasted_a = (ArrayType*)PointerGetDatum(SPI_getbinval(SPI_tuptable->vals[i], SPI_tuptable->tupdesc, 2, &isnull));
+       ArrayType *a;
+
+       if ( !cfg->map ) {
+           cfg->len=lexid+1;
+           cfg->map = (ListDictionary*)malloc( sizeof(ListDictionary)*cfg->len );
+           if ( !cfg->map )
+               ts_error(ERROR,"No memory");
+           memset( cfg->map, 0, sizeof(ListDictionary)*cfg->len );
+       }
+
+       if (isnull)
+           continue;
+
+       a=(ArrayType*)PointerGetDatum( PG_DETOAST_DATUM( DatumGetPointer(toasted_a) ) );
+       
+       if ( ARR_NDIM(a) != 1 )
+           ts_error(ERROR,"Wrong dimension");
+       if ( ARRNELEMS(a) < 1 )
+           continue;
+
+       cfg->map[lexid].len=ARRNELEMS(a);
+       cfg->map[lexid].dict_id=(Datum*)malloc( sizeof(Datum)*cfg->map[lexid].len );
+       memset(cfg->map[lexid].dict_id,0,sizeof(Datum)*cfg->map[lexid].len );
+       ptr=(text*)ARR_DATA_PTR(a);
+       oldcontext = MemoryContextSwitchTo(TopMemoryContext);
+       for(j=0;jmap[lexid].len;j++) {
+           cfg->map[lexid].dict_id[j] = PointerGetDatum(ptextdup(ptr));
+           ptr=NEXTVAL(ptr);
+       } 
+       MemoryContextSwitchTo(oldcontext);
+
+       if ( a != toasted_a ) 
+           pfree(a);
+   }
+   
+   SPI_finish();
+   cfg->prs_id = name2id_prs( prsname );
+   pfree(prsname);
+   for(i=0;ilen;i++) {
+       for(j=0;jmap[i].len;j++) {
+           ptr = (text*)DatumGetPointer( cfg->map[i].dict_id[j] );
+           cfg->map[i].dict_id[j] = ObjectIdGetDatum( name2id_dict(ptr) );
+           pfree(ptr);
+       }
+   }
+}
+
+typedef struct {
+   TSCfgInfo   *last_cfg;
+   int     len;
+   int     reallen;
+   TSCfgInfo   *list;
+   SNMap       name2id_map;
+} CFGList;
+
+static CFGList CList = {NULL,0,0,NULL,{0,0,NULL}};
+
+void
+reset_cfg(void) {
+        freeSNMap( &(CList.name2id_map) );
+        if ( CList.list ) {
+       int i,j;
+       for(i=0;i
+           if ( CList.list[i].map ) {
+               for(j=0;j
+                   if ( CList.list[i].map[j].dict_id )
+                       free(CList.list[i].map[j].dict_id);
+               free( CList.list[i].map );
+           }
+                free(CList.list);
+   }
+        memset(&CList,0,sizeof(CFGList));
+}
+
+static int
+comparecfg(const void *a, const void *b) {
+   return ((TSCfgInfo*)a)->id - ((TSCfgInfo*)b)->id;
+}
+
+TSCfgInfo *
+findcfg(Oid id) {
+   /* last used cfg */
+   if ( CList.last_cfg && CList.last_cfg->id==id )
+       return CList.last_cfg;
+
+   /* already used cfg */
+   if ( CList.len != 0 ) {
+       TSCfgInfo key;
+       key.id=id;
+       CList.last_cfg = bsearch(&key, CList.list, CList.len, sizeof(TSCfgInfo), comparecfg);
+       if ( CList.last_cfg != NULL )
+           return CList.last_cfg;
+   }
+
+   /* last chance */
+   if ( CList.len==CList.reallen ) {
+       TSCfgInfo *tmp;
+       int reallen = ( CList.reallen ) ? 2*CList.reallen : 16;
+       tmp=(TSCfgInfo*)realloc(CList.list,sizeof(TSCfgInfo)*reallen);
+       if ( !tmp ) 
+           ts_error(ERROR,"No memory");
+       CList.reallen=reallen;
+       CList.list=tmp;
+   }
+   CList.last_cfg=&(CList.list[CList.len]);
+   init_cfg(id, CList.last_cfg);
+   CList.len++;
+   qsort(CList.list, CList.len, sizeof(TSCfgInfo), comparecfg);
+   return findcfg(id); /* qsort changed order!! */;
+}
+
+
+Oid
+name2id_cfg(text *name) {
+   Oid arg[1]={ TEXTOID };
+   bool isnull;
+   Datum pars[1]={ PointerGetDatum(name) };
+   int stat;
+   Oid id=findSNMap_t( &(CList.name2id_map), name );
+   
+   if ( id ) 
+       return id;
+   
+   SPI_connect();
+   if ( !plan_name2id ) {
+       plan_name2id = SPI_saveplan( SPI_prepare( "select oid from pg_ts_cfg where ts_name = $1" , 1, arg ) );
+       if ( !plan_name2id ) 
+           elog(ERROR, "SPI_prepare() failed");
+   }
+
+   stat = SPI_execp(plan_name2id, pars, " ", 1);
+   if ( stat < 0 )
+       elog (ERROR, "SPI_execp return %d", stat);
+   if ( SPI_processed > 0 ) {
+       id=DatumGetObjectId( SPI_getbinval(SPI_tuptable->vals[0], SPI_tuptable->tupdesc, 1, &isnull) );
+       if ( isnull ) 
+           elog(ERROR, "Null id for tsearch config");
+   } else 
+       elog(ERROR, "No tsearch config");
+   SPI_finish();
+   addSNMap_t( &(CList.name2id_map), name, id );
+   return id;
+}
+
+
+void 
+parsetext_v2(TSCfgInfo *cfg, PRSTEXT * prs, char *buf, int4 buflen) {
+   int type, lenlemm, i;
+   char    *lemm=NULL;
+   WParserInfo *prsobj = findprs(cfg->prs_id);
+
+   prsobj->prs=(void*)DatumGetPointer(
+       FunctionCall2(
+           &(prsobj->start_info),
+           PointerGetDatum(buf),
+           Int32GetDatum(buflen)
+       )
+   );
+
+   while( ( type=DatumGetInt32(FunctionCall3(
+           &(prsobj->getlexeme_info),
+           PointerGetDatum(prsobj->prs),
+           PointerGetDatum(&lemm),
+           PointerGetDatum(&lenlemm))) ) != 0 ) {
+
+       if ( lenlemm >= MAXSTRLEN )
+           elog(ERROR, "Word is too long");
+
+
+       if ( type >= cfg->len ) /* skip this type of lexem */
+           continue; 
+
+       for(i=0;imap[type].len;i++) {
+           DictInfo    *dict=finddict( DatumGetObjectId(cfg->map[type].dict_id[i]) );
+           char    **norms, **ptr;
+   
+           norms = ptr = (char**)DatumGetPointer(
+               FunctionCall3(
+                   &(dict->lexize_info),
+                   PointerGetDatum(dict->dictionary),
+                   PointerGetDatum(lemm),
+                   PointerGetDatum(lenlemm)
+               )
+           );
+           if ( !norms ) /* dictionary doesn't know this lexem */
+               continue;
+
+           prs->pos++; /*set pos*/
+
+           while( *ptr ) {
+               if (prs->curwords == prs->lenwords) {
+                   prs->lenwords *= 2;
+                   prs->words = (WORD *) repalloc((void *) prs->words, prs->lenwords * sizeof(WORD));
+               }
+
+               prs->words[prs->curwords].len = strlen(*ptr);
+               prs->words[prs->curwords].word = *ptr;
+               prs->words[prs->curwords].alen = 0;
+               prs->words[prs->curwords].pos.pos = LIMITPOS(prs->pos);
+               ptr++;
+               prs->curwords++;
+           }
+           pfree(norms);
+           break; /* lexem already normalized or is stop word*/
+       }
+   }
+
+   FunctionCall1(
+       &(prsobj->end_info),
+       PointerGetDatum(prsobj->prs)
+   );
+}
+
+static void
+hladdword(HLPRSTEXT * prs, char *buf, int4 buflen, int type) {
+   while (prs->curwords >= prs->lenwords) {
+       prs->lenwords *= 2;
+       prs->words = (HLWORD *) repalloc((void *) prs->words, prs->lenwords * sizeof(HLWORD));
+   }
+   memset( &(prs->words[prs->curwords]), 0, sizeof(HLWORD) ); 
+   prs->words[prs->curwords].type = (uint8)type;
+   prs->words[prs->curwords].len = buflen; 
+   prs->words[prs->curwords].word = palloc(buflen);
+   memcpy(prs->words[prs->curwords].word, buf, buflen);
+   prs->curwords++;    
+}
+
+static void
+hlfinditem(HLPRSTEXT * prs, QUERYTYPE *query, char *buf, int buflen ) {
+   int i;
+   ITEM    *item=GETQUERY(query);
+   HLWORD  *word=&( prs->words[prs->curwords-1] );
+
+   while (prs->curwords + query->size >= prs->lenwords) {
+       prs->lenwords *= 2;
+       prs->words = (HLWORD *) repalloc((void *) prs->words, prs->lenwords * sizeof(HLWORD));
+   }
+
+   for(i=0; isize; i++) { 
+       if ( item->type == VAL && item->length == buflen && strncmp( GETOPERAND(query) + item->distance, buf, buflen )==0 ) {
+           if ( word->item ) {
+               memcpy( &(prs->words[prs->curwords]), word, sizeof(HLWORD) );
+               prs->words[prs->curwords].item=item;
+               prs->words[prs->curwords].repeated=1;
+               prs->curwords++;
+           } else 
+               word->item=item;    
+       }
+       item++;
+   }
+}
+
+void 
+hlparsetext(TSCfgInfo *cfg, HLPRSTEXT * prs, QUERYTYPE *query, char *buf, int4 buflen) {
+   int type, lenlemm, i;
+   char    *lemm=NULL;
+   WParserInfo *prsobj = findprs(cfg->prs_id);
+
+   prsobj->prs=(void*)DatumGetPointer(
+       FunctionCall2(
+           &(prsobj->start_info),
+           PointerGetDatum(buf),
+           Int32GetDatum(buflen)
+       )
+   );
+
+   while( ( type=DatumGetInt32(FunctionCall3(
+           &(prsobj->getlexeme_info),
+           PointerGetDatum(prsobj->prs),
+           PointerGetDatum(&lemm),
+           PointerGetDatum(&lenlemm))) ) != 0 ) {
+
+       if ( lenlemm >= MAXSTRLEN )
+           elog(ERROR, "Word is too long");
+
+       hladdword(prs,lemm,lenlemm,type);
+
+       if ( type >= cfg->len ) 
+           continue; 
+
+       for(i=0;imap[type].len;i++) {
+           DictInfo    *dict=finddict( DatumGetObjectId(cfg->map[type].dict_id[i]) );
+           char    **norms, **ptr;
+   
+           norms = ptr = (char**)DatumGetPointer(
+               FunctionCall3(
+                   &(dict->lexize_info),
+                   PointerGetDatum(dict->dictionary),
+                   PointerGetDatum(lemm),
+                   PointerGetDatum(lenlemm)
+               )
+           );
+           if ( !norms ) /* dictionary doesn't know this lexem */
+               continue;
+
+           while( *ptr ) {
+               hlfinditem(prs,query,*ptr,strlen(*ptr));
+               pfree(*ptr);
+               ptr++;
+           }
+           pfree(norms);
+           break; /* lexem already normalized or is stop word*/
+       }
+   }
+
+   FunctionCall1(
+       &(prsobj->end_info),
+       PointerGetDatum(prsobj->prs)
+   );
+}
+
+text* 
+genhl(HLPRSTEXT * prs) {
+   text *out;
+   int len=128;
+   char *ptr;
+   HLWORD  *wrd=prs->words;
+
+   out = (text*)palloc( len );
+   ptr=((char*)out) + VARHDRSZ;
+
+   while( wrd - prs->words < prs->curwords ) {
+       while (  wrd->len + prs->stopsellen + prs->startsellen + (ptr - ((char*)out)) >= len ) {
+           int dist = ptr - ((char*)out);
+           len*= 2;
+           out = (text *) repalloc(out, len);
+           ptr=((char*)out) + dist;
+       }
+
+       if ( wrd->in && !wrd->skip && !wrd->repeated ) {
+           if ( wrd->replace ) {
+               *ptr=' ';
+               ptr++;
+           } else {
+               if (wrd->selected) {
+                   memcpy(ptr,prs->startsel,prs->startsellen);
+                   ptr+=prs->startsellen;
+               }
+               memcpy(ptr,wrd->word,wrd->len);
+               ptr+=wrd->len;
+               if (wrd->selected) {
+                   memcpy(ptr,prs->stopsel,prs->stopsellen);
+                   ptr+=prs->stopsellen;
+               }
+           }
+       }
+
+       if ( !wrd->repeated )
+           pfree(wrd->word);
+
+       wrd++;
+   }
+
+   VARATT_SIZEP(out)=ptr - ((char*)out);
+   return out; 
+}
+
+int  
+get_currcfg(void) {
+   Oid arg[1]={ TEXTOID };
+   const char *curlocale;
+   Datum pars[1];
+   bool isnull;
+   int stat;
+
+   if ( current_cfg_id > 0 )
+       return current_cfg_id;
+
+   SPI_connect();
+   if ( !plan_getcfg_bylocale ) {
+       plan_getcfg_bylocale=SPI_saveplan( SPI_prepare( "select oid from pg_ts_cfg where locale = $1 ", 1, arg ) );
+       if ( !plan_getcfg_bylocale )
+           elog(ERROR, "SPI_prepare() failed");
+   }
+
+   curlocale = setlocale(LC_CTYPE, NULL);
+   pars[0] = PointerGetDatum( char2text((char*)curlocale) );
+   stat = SPI_execp(plan_getcfg_bylocale, pars, " ", 1);
+
+   if ( stat < 0 )
+       elog (ERROR, "SPI_execp return %d", stat);
+   if ( SPI_processed > 0 )
+       current_cfg_id = DatumGetObjectId( SPI_getbinval(SPI_tuptable->vals[0], SPI_tuptable->tupdesc, 1, &isnull) );
+   else 
+       elog(ERROR,"Can't find tsearch config by locale");
+
+   pfree(DatumGetPointer(pars[0]));
+   SPI_finish();
+   return current_cfg_id;
+}
+
+PG_FUNCTION_INFO_V1(set_curcfg);
+Datum set_curcfg(PG_FUNCTION_ARGS);
+Datum
+set_curcfg(PG_FUNCTION_ARGS) {
+        findcfg(PG_GETARG_OID(0));
+        current_cfg_id=PG_GETARG_OID(0);
+        PG_RETURN_VOID();
+}
+                
+PG_FUNCTION_INFO_V1(set_curcfg_byname);
+Datum set_curcfg_byname(PG_FUNCTION_ARGS);
+Datum
+set_curcfg_byname(PG_FUNCTION_ARGS) {
+        text *name=PG_GETARG_TEXT_P(0);
+   
+        DirectFunctionCall1(
+                set_curcfg,
+                ObjectIdGetDatum( name2id_cfg(name) )
+        );
+        PG_FREE_IF_COPY(name, 0);
+        PG_RETURN_VOID();      
+}       
+
+PG_FUNCTION_INFO_V1(show_curcfg);
+Datum show_curcfg(PG_FUNCTION_ARGS);
+Datum
+show_curcfg(PG_FUNCTION_ARGS) {
+   PG_RETURN_OID( get_currcfg() ); 
+}
+
+PG_FUNCTION_INFO_V1(reset_tsearch);
+Datum reset_tsearch(PG_FUNCTION_ARGS);
+Datum
+reset_tsearch(PG_FUNCTION_ARGS) {
+   ts_error(NOTICE,"TSearch cache cleaned");
+   PG_RETURN_VOID(); 
+}


diff --git a/contrib/tsearch2/ts_cfg.h b/contrib/tsearch2/ts_cfg.h

new file mode 100644 (file)

index 0000000..01006c1


--- /dev/null
+++ b/contrib/tsearch2/ts_cfg.h
@@ -0,0 +1,68 @@
+#ifndef __TS_CFG_H__
+#define __TS_CFG_H__
+#include "postgres.h"
+#include "query.h"
+
+typedef struct {
+   int len;
+   Datum   *dict_id;
+} ListDictionary;
+
+typedef struct {
+   Oid id;
+   Oid prs_id;
+   int len;
+   ListDictionary  *map;   
+}  TSCfgInfo;
+
+Oid name2id_cfg(text *name);
+TSCfgInfo * findcfg(Oid id);
+void init_cfg(Oid id, TSCfgInfo *cfg);
+void reset_cfg(void);
+
+typedef struct {
+        uint16          len;
+   union {
+       uint16      pos;
+       uint16      *apos;
+   } pos;
+        char       *word;
+   uint32  alen;
+}       WORD;
+   
+typedef struct {
+        WORD       *words;
+        int4            lenwords;
+        int4            curwords;
+   int4        pos;
+}       PRSTEXT;
+
+typedef struct {
+        uint16    len;
+   uint8    selected:1,
+         in:1,
+         skip:1,
+         replace:1,
+         repeated:1;
+   uint8   type;
+        char      *word;
+   ITEM      *item;
+}       HLWORD;
+   
+typedef struct {
+        HLWORD       *words;
+        int4            lenwords;
+        int4            curwords;
+        char           *startsel;
+        char            *stopsel;
+        int2            startsellen;
+        int2            stopsellen;
+}       HLPRSTEXT;
+
+void hlparsetext(TSCfgInfo *cfg, HLPRSTEXT * prs, QUERYTYPE *query, char *buf, int4 buflen);
+text* genhl(HLPRSTEXT * prs);
+
+void parsetext_v2(TSCfgInfo *cfg, PRSTEXT * prs, char *buf, int4 buflen);
+int  get_currcfg(void);
+
+#endif


diff --git a/contrib/tsearch2/ts_stat.c b/contrib/tsearch2/ts_stat.c

new file mode 100644 (file)

index 0000000..9099981


--- /dev/null
+++ b/contrib/tsearch2/ts_stat.c
@@ -0,0 +1,412 @@
+/*
+ * stat functions
+ */
+
+#include "tsvector.h"
+#include "ts_stat.h"
+#include "funcapi.h"
+#include "catalog/pg_type.h"
+#include "executor/spi.h"
+#include "common.h"
+
+PG_FUNCTION_INFO_V1(tsstat_in);
+Datum           tsstat_in(PG_FUNCTION_ARGS);
+Datum           
+tsstat_in(PG_FUNCTION_ARGS) {
+   tsstat *stat=palloc(STATHDRSIZE);
+   stat->len=STATHDRSIZE;
+   stat->size=0;
+   PG_RETURN_POINTER(stat);
+}
+
+PG_FUNCTION_INFO_V1(tsstat_out);
+Datum           tsstat_out(PG_FUNCTION_ARGS);
+Datum           
+tsstat_out(PG_FUNCTION_ARGS) {
+   elog(ERROR,"Unimplemented");
+   PG_RETURN_NULL();
+}
+
+static WordEntry**
+SEI_realloc( WordEntry** in, uint32 *len ) {
+   if ( *len==0 || in==NULL ) {
+       *len=8;
+       in=palloc( sizeof(WordEntry*)* (*len) );
+   } else {
+       *len *= 2;
+       in=repalloc( in, sizeof(WordEntry*)* (*len) );
+   }
+   return in;
+}
+
+static int
+compareStatWord(StatEntry *a, WordEntry *b, tsstat *stat, tsvector *txt) {
+   if ( a->len == b->len ) 
+       return strncmp(
+           STATSTRPTR(stat) + a->pos,
+           STRPTR(txt) + b->pos,
+           a->len
+       );
+   return ( a->len > b->len ) ? 1 : -1;
+}
+
+static tsstat*
+formstat(tsstat *stat, tsvector *txt, WordEntry** entry, uint32 len) {
+   tsstat  *newstat;
+   uint32 totallen, nentry;
+   uint32  slen=0;
+   WordEntry   **ptr=entry;
+   char    *curptr;
+   StatEntry   *sptr,*nptr;
+
+   while(ptr-entry
+       slen += (*ptr)->len;
+       ptr++;
+   }
+
+   nentry=stat->size + len;
+   slen+=STATSTRSIZE(stat);
+   totallen=CALCSTATSIZE(nentry,slen);
+   newstat=palloc(totallen);
+   newstat->len=totallen;
+   newstat->size=nentry;
+
+   memcpy(STATSTRPTR(newstat), STATSTRPTR(stat), STATSTRSIZE(stat));
+   curptr=STATSTRPTR(newstat) + STATSTRSIZE(stat);
+
+   ptr=entry;
+   sptr=STATPTR(stat);
+   nptr=STATPTR(newstat);
+
+   if ( len == 1 ) {
+       StatEntry *StopLow = STATPTR(stat);
+       StatEntry *StopHigh = (StatEntry*)STATSTRPTR(stat);
+
+       while (StopLow < StopHigh) {
+           sptr=StopLow + (StopHigh - StopLow) / 2;
+           if ( compareStatWord(sptr,*ptr,stat,txt) < 0 )
+               StopLow = sptr + 1;
+           else
+               StopHigh = sptr; 
+       }
+       nptr =STATPTR(newstat) + (StopLow-STATPTR(stat));
+       memcpy( STATPTR(newstat), STATPTR(stat), sizeof(StatEntry) * (StopLow-STATPTR(stat)) );
+       nptr->nentry=POSDATALEN(txt,*ptr);
+       if ( nptr->nentry==0 )
+               nptr->nentry=1; 
+       nptr->ndoc=1;
+       nptr->len=(*ptr)->len;
+       memcpy(curptr, STRPTR(txt) + (*ptr)->pos, nptr->len);
+       nptr->pos = curptr - STATSTRPTR(newstat);
+       memcpy( nptr+1, StopLow, sizeof(StatEntry) * ( ((StatEntry*)STATSTRPTR(stat))-StopLow ) );
+   } else {
+       while( sptr-STATPTR(stat) < stat->size && ptr-entry
+           if ( compareStatWord(sptr,*ptr,stat,txt) < 0 ) {
+               memcpy(nptr, sptr, sizeof(StatEntry));
+               sptr++;
+           } else {
+               nptr->nentry=POSDATALEN(txt,*ptr);
+               if ( nptr->nentry==0 )
+                   nptr->nentry=1; 
+               nptr->ndoc=1;
+               nptr->len=(*ptr)->len;
+               memcpy(curptr, STRPTR(txt) + (*ptr)->pos, nptr->len);
+               nptr->pos = curptr - STATSTRPTR(newstat);
+               curptr += nptr->len;
+               ptr++;
+           }
+           nptr++;
+       }
+
+       memcpy( nptr, sptr, sizeof(StatEntry)*( stat->size - (sptr-STATPTR(stat)) ) ); 
+       
+       while(ptr-entry
+           nptr->nentry=POSDATALEN(txt,*ptr);
+           if ( nptr->nentry==0 )
+               nptr->nentry=1; 
+           nptr->ndoc=1;
+           nptr->len=(*ptr)->len;
+           memcpy(curptr, STRPTR(txt) + (*ptr)->pos, nptr->len);
+           nptr->pos = curptr - STATSTRPTR(newstat);
+           curptr += nptr->len;
+           ptr++; nptr++;
+       }
+   }
+
+   return newstat;
+} 
+
+PG_FUNCTION_INFO_V1(ts_accum);
+Datum           ts_accum(PG_FUNCTION_ARGS);
+Datum 
+ts_accum(PG_FUNCTION_ARGS) {
+   tsstat *newstat,*stat= (tsstat*)PG_GETARG_POINTER(0);
+   tsvector  *txt = (tsvector *) PG_DETOAST_DATUM(PG_GETARG_DATUM(1));
+   WordEntry   **newentry=NULL;
+   uint32  len=0, cur=0;
+   StatEntry   *sptr;
+   WordEntry   *wptr;
+
+   if ( stat==NULL || PG_ARGISNULL(0) ) { /* Init in first */ 
+       stat=palloc(STATHDRSIZE);
+       stat->len=STATHDRSIZE;
+       stat->size=0;
+   }
+
+   /* simple check of correctness */
+   if ( txt==NULL || PG_ARGISNULL(1) || txt->size==0 ) {
+       PG_FREE_IF_COPY(txt,1); 
+       PG_RETURN_POINTER(stat);
+   }
+
+   sptr=STATPTR(stat);
+   wptr=ARRPTR(txt);
+
+   if ( stat->size < 100*txt->size ) { /* merge */
+       while( sptr-STATPTR(stat) < stat->size && wptr-ARRPTR(txt) < txt->size ) {
+           int cmp = compareStatWord(sptr,wptr,stat,txt);
+           if ( cmp<0 ) {
+               sptr++;
+           } else if ( cmp==0 ) {
+               int n=POSDATALEN(txt,wptr);
+   
+               if (n==0) n=1;
+               sptr->ndoc++;
+               sptr->nentry +=n ;
+               sptr++; wptr++;
+           } else {
+               if ( cur==len )
+                   newentry=SEI_realloc(newentry, &len);
+               newentry[cur]=wptr;
+               wptr++; cur++;
+           }
+       }
+
+       while( wptr-ARRPTR(txt) < txt->size ) {
+           if ( cur==len )
+               newentry=SEI_realloc(newentry, &len);
+           newentry[cur]=wptr;
+           wptr++; cur++;
+       }
+   } else { /* search */
+       while( wptr-ARRPTR(txt) < txt->size ) {
+           StatEntry *StopLow = STATPTR(stat);
+           StatEntry *StopHigh = (StatEntry*)STATSTRPTR(stat);
+           int cmp;
+
+           while (StopLow < StopHigh) {
+               sptr=StopLow + (StopHigh - StopLow) / 2;
+               cmp =  compareStatWord(sptr,wptr,stat,txt);
+               if (cmp==0) {
+                   int n=POSDATALEN(txt,wptr);
+                   if (n==0) n=1;
+                   sptr->ndoc++;
+                   sptr->nentry +=n ;
+                   break;
+               } else if ( cmp < 0 )
+                   StopLow = sptr + 1;
+               else
+                   StopHigh = sptr; 
+           }
+       
+           if ( StopLow >= StopHigh ) { /* not found */
+               if ( cur==len )
+                   newentry=SEI_realloc(newentry, &len);
+               newentry[cur]=wptr;
+               cur++;
+           }
+           wptr++;
+       }   
+   }
+
+   
+   if ( cur==0 ) { /* no new words */ 
+       PG_FREE_IF_COPY(txt,1);
+       PG_RETURN_POINTER(stat);
+   }
+
+   newstat = formstat(stat, txt, newentry, cur);
+   pfree(newentry);
+   PG_FREE_IF_COPY(txt,1);
+   /* pfree(stat); */
+
+   PG_RETURN_POINTER(newstat);
+}
+
+typedef struct {
+   uint32  cur;
+   tsvector *stat;
+} StatStorage;
+
+static void
+ts_setup_firstcall(FuncCallContext  *funcctx, tsstat *stat) {
+   TupleDesc            tupdesc;
+   MemoryContext     oldcontext;
+   StatStorage     *st;
+   
+   oldcontext = MemoryContextSwitchTo(funcctx->multi_call_memory_ctx);
+   st=palloc( sizeof(StatStorage) );
+   st->cur=0;
+   st->stat=palloc( stat->len );
+   memcpy(st->stat, stat, stat->len);
+   funcctx->user_fctx = (void*)st;
+   tupdesc = RelationNameGetTupleDesc("statinfo");
+   funcctx->slot = TupleDescGetSlot(tupdesc);
+   funcctx->attinmeta = TupleDescGetAttInMetadata(tupdesc);
+   MemoryContextSwitchTo(oldcontext);
+}
+
+
+static Datum
+ts_process_call(FuncCallContext  *funcctx) {
+   StatStorage     *st;
+   st=(StatStorage*)funcctx->user_fctx;
+
+   if ( st->cur < st->stat->size ) {
+       Datum result;
+       char* values[3];
+       char    ndoc[16];
+       char    nentry[16];
+       StatEntry *entry=STATPTR(st->stat) + st->cur;
+       HeapTuple    tuple;
+
+       values[1]=ndoc;
+       sprintf(ndoc,"%d",entry->ndoc);
+       values[2]=nentry;
+       sprintf(nentry,"%d",entry->nentry);
+       values[0]=palloc( entry->len+1 );
+       memcpy( values[0], STATSTRPTR(st->stat)+entry->pos, entry->len);
+       (values[0])[entry->len]='\0';
+
+       tuple = BuildTupleFromCStrings(funcctx->attinmeta, values);
+       result = TupleGetDatum(funcctx->slot, tuple);
+
+       pfree(values[0]);
+       st->cur++;
+       return result;  
+   } else {
+       pfree(st->stat);
+       pfree(st);
+   }
+   
+   return (Datum)0;
+}
+
+PG_FUNCTION_INFO_V1(ts_accum_finish);
+Datum           ts_accum_finish(PG_FUNCTION_ARGS);
+Datum 
+ts_accum_finish(PG_FUNCTION_ARGS) {
+   FuncCallContext  *funcctx;
+   Datum result;
+
+   if (SRF_IS_FIRSTCALL()) {
+       funcctx = SRF_FIRSTCALL_INIT();
+       ts_setup_firstcall(funcctx, (tsstat*)PG_GETARG_POINTER(0) );
+   }
+
+   funcctx = SRF_PERCALL_SETUP();
+   if (  (result=ts_process_call(funcctx)) != (Datum)0 )
+       SRF_RETURN_NEXT(funcctx, result);
+   SRF_RETURN_DONE(funcctx);
+}
+
+static Oid tiOid=InvalidOid;
+static void 
+get_ti_Oid(void) {
+   int ret;
+   bool isnull; 
+
+   if ( (ret = SPI_exec("select oid from pg_type where typname='tsvector'",1)) < 0 )   
+       elog(ERROR, "SPI_exec to get tsvector oid returns %d", ret);
+
+   if ( SPI_processed<0 )
+       elog(ERROR, "There is no tsvector type");
+   tiOid = DatumGetObjectId( SPI_getbinval(SPI_tuptable->vals[0], SPI_tuptable->tupdesc, 1, &isnull) );
+   if ( tiOid==InvalidOid )
+       elog(ERROR, "tsvector type has InvalidOid");
+}
+
+static tsstat*
+ts_stat_sql(text *txt) {
+   char *query=text2char(txt);
+   int i;
+   tsstat *newstat,*stat;
+   bool isnull;
+   Portal portal;
+   void    *plan;
+
+   if ( tiOid==InvalidOid ) 
+       get_ti_Oid();
+
+   if ( (plan = SPI_prepare(query,0,NULL))==NULL )
+       elog(ERROR, "SPI_prepare('%s') returns NULL",query);
+
+   if ( (portal = SPI_cursor_open(NULL, plan, NULL, NULL)) == NULL )
+       elog(ERROR, "SPI_cursor_open('%s') returns NULL",query);
+
+   SPI_cursor_fetch(portal, true, 100);
+
+   if ( SPI_tuptable->tupdesc->natts != 1 )
+       elog(ERROR, "Number of fields doesn't equal to 1");
+
+   if ( SPI_gettypeid(SPI_tuptable->tupdesc, 1) != tiOid )
+       elog(ERROR, "Column isn't of tsvector type");
+
+   stat=palloc(STATHDRSIZE);
+   stat->len=STATHDRSIZE;
+   stat->size=0;
+
+   while(SPI_processed>0) {
+       for(i=0;i
+           Datum data=SPI_getbinval(SPI_tuptable->vals[i], SPI_tuptable->tupdesc, 1, &isnull);
+
+           if ( !isnull ) {
+               newstat = (tsstat*)DatumGetPointer(DirectFunctionCall2(
+                   ts_accum,
+                   PointerGetDatum(stat),
+                   data
+               ));
+               if ( stat!=newstat && stat )
+                   pfree(stat);
+               stat=newstat;
+           }
+       } 
+
+       SPI_freetuptable(SPI_tuptable);
+       SPI_cursor_fetch(portal, true, 100);        
+   }   
+
+   SPI_freetuptable(SPI_tuptable);
+   SPI_cursor_close(portal);
+   SPI_freeplan(plan);
+   pfree(query);
+
+   return stat;    
+}
+
+PG_FUNCTION_INFO_V1(ts_stat);
+Datum           ts_stat(PG_FUNCTION_ARGS);
+Datum 
+ts_stat(PG_FUNCTION_ARGS) {
+   FuncCallContext  *funcctx;
+   Datum result;
+
+   if (SRF_IS_FIRSTCALL()) {
+       tsstat *stat;
+       text    *txt=PG_GETARG_TEXT_P(0);
+   
+       funcctx = SRF_FIRSTCALL_INIT();
+       SPI_connect();
+       stat = ts_stat_sql(txt);
+       PG_FREE_IF_COPY(txt,0); 
+       ts_setup_firstcall(funcctx, stat );
+       SPI_finish();
+   }
+
+   funcctx = SRF_PERCALL_SETUP();
+   if (  (result=ts_process_call(funcctx)) != (Datum)0 )
+       SRF_RETURN_NEXT(funcctx, result);
+   SRF_RETURN_DONE(funcctx);
+}
+
+


diff --git a/contrib/tsearch2/ts_stat.h b/contrib/tsearch2/ts_stat.h

new file mode 100644 (file)

index 0000000..c32b17a


--- /dev/null
+++ b/contrib/tsearch2/ts_stat.h
@@ -0,0 +1,32 @@
+#ifndef __TXTIDX_STAT_H__
+#define __TXTIDX_STAT_H__
+
+#include "postgres.h"
+
+#include "access/gist.h"
+#include "access/itup.h"
+#include "utils/elog.h"
+#include "utils/palloc.h"
+#include "utils/builtins.h"
+#include "storage/bufpage.h"
+
+typedef struct {
+   uint32  len;
+   uint32  pos;
+   uint32  ndoc;   
+   uint32  nentry; 
+}  StatEntry;
+
+typedef struct {
+   int4        len;
+   int4        size;
+   char        data[1];
+}  tsstat;
+
+#define STATHDRSIZE (sizeof(int4)*2)
+#define CALCSTATSIZE(x, lenstr) ( x * sizeof(StatEntry) + STATHDRSIZE + lenstr )
+#define STATPTR(x) ( (StatEntry*) ( (char*)x + STATHDRSIZE ) )
+#define STATSTRPTR(x)  ( (char*)x + STATHDRSIZE + ( sizeof(StatEntry) * ((tsvector*)x)->size ) )
+#define STATSTRSIZE(x) ( ((tsvector*)x)->len - STATHDRSIZE - ( sizeof(StatEntry) * ((tsvector*)x)->size ) )
+
+#endif


diff --git a/contrib/tsearch2/tsearch.sql._in b/contrib/tsearch2/tsearch.sql._in

new file mode 100644 (file)

index 0000000..91ffbc8


--- /dev/null
+++ b/contrib/tsearch2/tsearch.sql._in
@@ -0,0 +1,674 @@
+-- Adjust this setting to control where the objects get CREATEd.
+SET search_path = public;
+
+BEGIN;
+
+--dict conf
+CREATE TABLE pg_ts_dict (
+   dict_name   text not null primary key,
+   dict_init   oid,
+   dict_initoption text,
+   dict_lexize oid not null,
+   dict_comment    text
+) with oids;
+
+--dict interface
+CREATE FUNCTION lexize(oid, text) 
+   returns _text
+   as 'MODULE_PATHNAME'
+   language 'C'
+   with (isstrict);
+
+CREATE FUNCTION lexize(text, text)
+        returns _text
+        as 'MODULE_PATHNAME', 'lexize_byname'
+        language 'C'
+        with (isstrict);
+
+CREATE FUNCTION lexize(text)
+        returns _text
+        as 'MODULE_PATHNAME', 'lexize_bycurrent'
+        language 'C'
+        with (isstrict);
+
+CREATE FUNCTION set_curdict(int)
+   returns void
+   as 'MODULE_PATHNAME'
+   language 'C'
+   with (isstrict);
+
+CREATE FUNCTION set_curdict(text)
+   returns void
+   as 'MODULE_PATHNAME', 'set_curdict_byname'
+   language 'C'
+   with (isstrict);
+
+--built-in dictionaries
+CREATE FUNCTION dex_init(text)
+   returns internal
+   as 'MODULE_PATHNAME' 
+   language 'C';
+
+CREATE FUNCTION dex_lexize(internal,internal,int4)
+   returns internal
+   as 'MODULE_PATHNAME'
+   language 'C'
+   with (isstrict);
+
+insert into pg_ts_dict select 
+   'simple', 
+   (select oid from pg_proc where proname='dex_init'),
+   null,
+   (select oid from pg_proc where proname='dex_lexize'),
+   'Simple example of dictionary.'
+;
+    
+CREATE FUNCTION snb_en_init(text)
+   returns internal
+   as 'MODULE_PATHNAME' 
+   language 'C';
+
+CREATE FUNCTION snb_lexize(internal,internal,int4)
+   returns internal
+   as 'MODULE_PATHNAME'
+   language 'C'
+   with (isstrict);
+
+insert into pg_ts_dict select 
+   'en_stem', 
+   (select oid from pg_proc where proname='snb_en_init'),
+   'DATA_PATH/english.stop',
+   (select oid from pg_proc where proname='snb_lexize'),
+   'English Stemmer. Snowball.'
+;
+
+CREATE FUNCTION snb_ru_init(text)
+   returns internal
+   as 'MODULE_PATHNAME' 
+   language 'C';
+
+insert into pg_ts_dict select 
+   'ru_stem', 
+   (select oid from pg_proc where proname='snb_ru_init'),
+   'DATA_PATH/russian.stop',
+   (select oid from pg_proc where proname='snb_lexize'),
+   'Russian Stemmer. Snowball.'
+;
+    
+CREATE FUNCTION spell_init(text)
+   returns internal
+   as 'MODULE_PATHNAME' 
+   language 'C';
+
+CREATE FUNCTION spell_lexize(internal,internal,int4)
+   returns internal
+   as 'MODULE_PATHNAME'
+   language 'C'
+   with (isstrict);
+
+insert into pg_ts_dict select 
+   'ispell_template', 
+   (select oid from pg_proc where proname='spell_init'),
+   null,
+   (select oid from pg_proc where proname='spell_lexize'),
+   'ISpell interface. Must have .dict and .aff files'
+;
+
+CREATE FUNCTION syn_init(text)
+   returns internal
+   as 'MODULE_PATHNAME' 
+   language 'C';
+
+CREATE FUNCTION syn_lexize(internal,internal,int4)
+   returns internal
+   as 'MODULE_PATHNAME'
+   language 'C'
+   with (isstrict);
+
+insert into pg_ts_dict select 
+   'synonym', 
+   (select oid from pg_proc where proname='syn_init'),
+   null,
+   (select oid from pg_proc where proname='syn_lexize'),
+   'Example of synonym dictionary'
+;
+
+--dict conf
+CREATE TABLE pg_ts_parser (
+   prs_name    text not null primary key,
+   prs_start   oid not null,
+   prs_nexttoken   oid not null,
+   prs_end     oid not null,
+   prs_headline    oid not null,
+   prs_lextype oid not null,
+   prs_comment text
+) with oids;
+
+--sql-level interface
+CREATE TYPE tokentype 
+   as (tokid int4, alias text, descr text); 
+
+CREATE FUNCTION token_type(int4)
+   returns setof tokentype
+   as 'MODULE_PATHNAME'
+   language 'C'
+   with (isstrict);
+
+CREATE FUNCTION token_type(text)
+   returns setof tokentype
+   as 'MODULE_PATHNAME', 'token_type_byname'
+   language 'C'
+   with (isstrict);
+
+CREATE FUNCTION token_type()
+   returns setof tokentype
+   as 'MODULE_PATHNAME', 'token_type_current'
+   language 'C'
+   with (isstrict);
+
+CREATE FUNCTION set_curprs(int)
+   returns void
+   as 'MODULE_PATHNAME'
+   language 'C'
+   with (isstrict);
+
+CREATE FUNCTION set_curprs(text)
+   returns void
+   as 'MODULE_PATHNAME', 'set_curprs_byname'
+   language 'C'
+   with (isstrict);
+
+CREATE TYPE tokenout 
+   as (tokid int4, token text);
+
+CREATE FUNCTION parse(oid,text)
+   returns setof tokenout
+   as 'MODULE_PATHNAME'
+   language 'C'
+   with (isstrict);
+ 
+CREATE FUNCTION parse(text,text)
+   returns setof tokenout
+   as 'MODULE_PATHNAME', 'parse_byname'
+   language 'C'
+   with (isstrict);
+ 
+CREATE FUNCTION parse(text)
+   returns setof tokenout
+   as 'MODULE_PATHNAME', 'parse_current'
+   language 'C'
+   with (isstrict);
+ 
+--default parser
+CREATE FUNCTION prsd_start(internal,int4)
+   returns internal
+   as 'MODULE_PATHNAME'
+   language 'C';
+
+CREATE FUNCTION prsd_getlexeme(internal,internal,internal)
+   returns int4
+   as 'MODULE_PATHNAME'
+   language 'C';
+
+CREATE FUNCTION prsd_end(internal)
+   returns void
+   as 'MODULE_PATHNAME'
+   language 'C';
+
+CREATE FUNCTION prsd_lextype(internal)
+   returns internal
+   as 'MODULE_PATHNAME'
+   language 'C';
+
+CREATE FUNCTION prsd_headline(internal,internal,internal)
+   returns internal
+   as 'MODULE_PATHNAME'
+   language 'C';
+
+insert into pg_ts_parser select
+   'default',
+   (select oid from pg_proc where proname='prsd_start'),   
+   (select oid from pg_proc where proname='prsd_getlexeme'),   
+   (select oid from pg_proc where proname='prsd_end'), 
+   (select oid from pg_proc where proname='prsd_headline'),
+   (select oid from pg_proc where proname='prsd_lextype'),
+   'Parser from OpenFTS v0.34'
+;  
+
+--tsearch config
+
+CREATE TABLE pg_ts_cfg (
+   ts_name     text not null primary key,
+   prs_name    text not null,
+   locale      text
+) with oids;
+
+CREATE TABLE pg_ts_cfgmap (
+   ts_name     text not null,
+   tok_alias   text not null,
+   dict_name   text[],
+   primary key (ts_name,tok_alias)
+) with oids;
+
+CREATE FUNCTION set_curcfg(int)
+   returns void
+   as 'MODULE_PATHNAME'
+   language 'C'
+   with (isstrict);
+
+CREATE FUNCTION set_curcfg(text)
+   returns void
+   as 'MODULE_PATHNAME', 'set_curcfg_byname'
+   language 'C'
+   with (isstrict);
+
+CREATE FUNCTION show_curcfg()
+   returns oid
+   as 'MODULE_PATHNAME'
+   language 'C'
+   with (isstrict);
+
+insert into pg_ts_cfg values ('default', 'default','C');
+insert into pg_ts_cfg values ('default_russian', 'default','ru_RU.KOI8-R');
+insert into pg_ts_cfg values ('simple', 'default');
+
+copy pg_ts_cfgmap from stdin;
+default    lword   {en_stem}
+default    nlword  {simple}
+default    word    {simple}
+default    email   {simple}
+default    url {simple}
+default    host    {simple}
+default    sfloat  {simple}
+default    version {simple}
+default    part_hword  {simple}
+default    nlpart_hword    {simple}
+default    lpart_hword {en_stem}
+default    hword   {simple}
+default    lhword  {en_stem}
+default    nlhword {simple}
+default    uri {simple}
+default    file    {simple}
+default    float   {simple}
+default    int {simple}
+default    uint    {simple}
+default_russian    lword   {en_stem}
+default_russian    nlword  {ru_stem}
+default_russian    word    {ru_stem}
+default_russian    email   {simple}
+default_russian    url {simple}
+default_russian    host    {simple}
+default_russian    sfloat  {simple}
+default_russian    version {simple}
+default_russian    part_hword  {simple}
+default_russian    nlpart_hword    {ru_stem}
+default_russian    lpart_hword {en_stem}
+default_russian    hword   {ru_stem}
+default_russian    lhword  {en_stem}
+default_russian    nlhword {ru_stem}
+default_russian    uri {simple}
+default_russian    file    {simple}
+default_russian    float   {simple}
+default_russian    int {simple}
+default_russian    uint    {simple}
+simple lword   {simple}
+simple nlword  {simple}
+simple word    {simple}
+simple email   {simple}
+simple url {simple}
+simple host    {simple}
+simple sfloat  {simple}
+simple version {simple}
+simple part_hword  {simple}
+simple nlpart_hword    {simple}
+simple lpart_hword {simple}
+simple hword   {simple}
+simple lhword  {simple}
+simple nlhword {simple}
+simple uri {simple}
+simple file    {simple}
+simple float   {simple}
+simple int {simple}
+simple uint    {simple}
+\.
+
+--tsvector type
+CREATE FUNCTION tsvector_in(cstring)
+RETURNS tsvector
+AS 'MODULE_PATHNAME'
+LANGUAGE 'C' with (isstrict);
+
+CREATE FUNCTION tsvector_out(tsvector)
+RETURNS cstring
+AS 'MODULE_PATHNAME'
+LANGUAGE 'C' with (isstrict);
+
+CREATE TYPE tsvector (
+        INTERNALLENGTH = -1,
+        INPUT = tsvector_in,
+        OUTPUT = tsvector_out,
+        STORAGE = extended
+);
+
+CREATE FUNCTION length(tsvector)
+RETURNS int4
+AS 'MODULE_PATHNAME', 'tsvector_length'
+LANGUAGE 'C' with (isstrict,iscachable);
+
+CREATE FUNCTION to_tsvector(oid, text)
+RETURNS tsvector
+AS 'MODULE_PATHNAME'
+LANGUAGE 'C' with (isstrict,iscachable);
+
+CREATE FUNCTION to_tsvector(text, text)
+RETURNS tsvector
+AS 'MODULE_PATHNAME', 'to_tsvector_name'
+LANGUAGE 'C' with (isstrict,iscachable);
+
+CREATE FUNCTION to_tsvector(text)
+RETURNS tsvector
+AS 'MODULE_PATHNAME', 'to_tsvector_current'
+LANGUAGE 'C' with (isstrict,iscachable);
+
+CREATE FUNCTION strip(tsvector)
+RETURNS tsvector
+AS 'MODULE_PATHNAME'
+LANGUAGE 'C' with (isstrict,iscachable);
+
+CREATE FUNCTION setweight(tsvector,"char")
+RETURNS tsvector
+AS 'MODULE_PATHNAME'
+LANGUAGE 'C' with (isstrict,iscachable);
+
+CREATE FUNCTION concat(tsvector,tsvector)
+RETURNS tsvector
+AS 'MODULE_PATHNAME'
+LANGUAGE 'C' with (isstrict,iscachable);
+
+CREATE OPERATOR || (
+        LEFTARG = tsvector,
+        RIGHTARG = tsvector,
+        PROCEDURE = concat
+);
+
+--query type
+CREATE FUNCTION tsquery_in(cstring)
+RETURNS tsquery
+AS 'MODULE_PATHNAME'
+LANGUAGE 'C' with (isstrict);
+
+CREATE FUNCTION tsquery_out(tsquery)
+RETURNS cstring
+AS 'MODULE_PATHNAME'
+LANGUAGE 'C' with (isstrict);
+
+CREATE TYPE tsquery (
+        INTERNALLENGTH = -1,
+        INPUT = tsquery_in,
+        OUTPUT = tsquery_out
+);
+
+CREATE FUNCTION querytree(tsquery)
+RETURNS text
+AS 'MODULE_PATHNAME', 'tsquerytree'
+LANGUAGE 'C' with (isstrict);
+
+CREATE FUNCTION to_tsquery(oid, text)
+RETURNS tsquery
+AS 'MODULE_PATHNAME'
+LANGUAGE 'c' with (isstrict,iscachable);
+
+CREATE FUNCTION to_tsquery(text, text)
+RETURNS tsquery
+AS 'MODULE_PATHNAME','to_tsquery_name'
+LANGUAGE 'c' with (isstrict,iscachable);
+
+CREATE FUNCTION to_tsquery(text)
+RETURNS tsquery
+AS 'MODULE_PATHNAME','to_tsquery_current'
+LANGUAGE 'c' with (isstrict,iscachable);
+
+--operations
+CREATE FUNCTION exectsq(tsvector, tsquery)
+RETURNS bool
+AS 'MODULE_PATHNAME'
+LANGUAGE 'C' with (isstrict, iscachable);
+  
+COMMENT ON FUNCTION exectsq(tsvector, tsquery) IS 'boolean operation with text index';
+
+CREATE FUNCTION rexectsq(tsquery, tsvector)
+RETURNS bool
+AS 'MODULE_PATHNAME'
+LANGUAGE 'C' with (isstrict, iscachable);
+
+COMMENT ON FUNCTION rexectsq(tsquery, tsvector) IS 'boolean operation with text index';
+
+CREATE OPERATOR @@ (
+        LEFTARG = tsvector,
+        RIGHTARG = tsquery,
+        PROCEDURE = exectsq,
+        COMMUTATOR = '@@',
+        RESTRICT = contsel,
+        JOIN = contjoinsel
+);
+CREATE OPERATOR @@ (
+        LEFTARG = tsquery,
+        RIGHTARG = tsvector,
+        PROCEDURE = rexectsq,
+        COMMUTATOR = '@@',
+        RESTRICT = contsel,
+        JOIN = contjoinsel
+);
+
+--Trigger
+CREATE FUNCTION tsearch2()
+RETURNS trigger
+AS 'MODULE_PATHNAME'
+LANGUAGE 'C';
+
+--Relevation
+CREATE FUNCTION rank(float4[], tsvector, tsquery)
+RETURNS float4
+AS 'MODULE_PATHNAME'
+LANGUAGE 'C' WITH (isstrict, iscachable);
+
+CREATE FUNCTION rank(float4[], tsvector, tsquery, int4)
+RETURNS float4
+AS 'MODULE_PATHNAME'
+LANGUAGE 'C' WITH (isstrict, iscachable);
+
+CREATE FUNCTION rank(tsvector, tsquery)
+RETURNS float4
+AS 'MODULE_PATHNAME', 'rank_def'
+LANGUAGE 'C' WITH (isstrict, iscachable);
+
+CREATE FUNCTION rank(tsvector, tsquery, int4)
+RETURNS float4
+AS 'MODULE_PATHNAME', 'rank_def'
+LANGUAGE 'C' WITH (isstrict, iscachable);
+
+CREATE FUNCTION rank_cd(int4, tsvector, tsquery)
+RETURNS float4
+AS 'MODULE_PATHNAME'
+LANGUAGE 'C' WITH (isstrict, iscachable);
+
+CREATE FUNCTION rank_cd(int4, tsvector, tsquery, int4)
+RETURNS float4
+AS 'MODULE_PATHNAME'
+LANGUAGE 'C' WITH (isstrict, iscachable);
+
+CREATE FUNCTION rank_cd(tsvector, tsquery)
+RETURNS float4
+AS 'MODULE_PATHNAME', 'rank_cd_def'
+LANGUAGE 'C' WITH (isstrict, iscachable);
+
+CREATE FUNCTION rank_cd(tsvector, tsquery, int4)
+RETURNS float4
+AS 'MODULE_PATHNAME', 'rank_cd_def'
+LANGUAGE 'C' WITH (isstrict, iscachable);
+
+CREATE FUNCTION headline(oid, text, tsquery, text)
+RETURNS text
+AS 'MODULE_PATHNAME', 'headline'
+LANGUAGE 'C' WITH (isstrict, iscachable);
+
+CREATE FUNCTION headline(oid, text, tsquery)
+RETURNS text
+AS 'MODULE_PATHNAME', 'headline'
+LANGUAGE 'C' WITH (isstrict, iscachable);
+
+CREATE FUNCTION headline(text, text, tsquery, text)
+RETURNS text
+AS 'MODULE_PATHNAME', 'headline_byname'
+LANGUAGE 'C' WITH (isstrict, iscachable);
+
+CREATE FUNCTION headline(text, text, tsquery)
+RETURNS text
+AS 'MODULE_PATHNAME', 'headline_byname'
+LANGUAGE 'C' WITH (isstrict, iscachable);
+
+CREATE FUNCTION headline(text, tsquery, text)
+RETURNS text
+AS 'MODULE_PATHNAME', 'headline_current'
+LANGUAGE 'C' WITH (isstrict, iscachable);
+
+CREATE FUNCTION headline(text, tsquery)
+RETURNS text
+AS 'MODULE_PATHNAME', 'headline_current'
+LANGUAGE 'C' WITH (isstrict, iscachable);
+
+--GiST
+--GiST key type 
+CREATE FUNCTION gtsvector_in(cstring)
+RETURNS gtsvector
+AS 'MODULE_PATHNAME'
+LANGUAGE 'C' with (isstrict);
+
+CREATE FUNCTION gtsvector_out(gtsvector)
+RETURNS cstring
+AS 'MODULE_PATHNAME'
+LANGUAGE 'C' with (isstrict);
+
+CREATE TYPE gtsvector (
+        INTERNALLENGTH = -1,
+        INPUT = gtsvector_in,
+        OUTPUT = gtsvector_out
+);
+
+-- support FUNCTIONs
+CREATE FUNCTION gtsvector_consistent(gtsvector,internal,int4)
+RETURNS bool
+AS 'MODULE_PATHNAME'
+LANGUAGE 'C';
+  
+CREATE FUNCTION gtsvector_compress(internal)
+RETURNS internal
+AS 'MODULE_PATHNAME'
+LANGUAGE 'C';
+
+CREATE FUNCTION gtsvector_decompress(internal)
+RETURNS internal
+AS 'MODULE_PATHNAME'
+LANGUAGE 'C';
+
+CREATE FUNCTION gtsvector_penalty(internal,internal,internal)
+RETURNS internal
+AS 'MODULE_PATHNAME'
+LANGUAGE 'C' with (isstrict);
+
+CREATE FUNCTION gtsvector_picksplit(internal, internal)
+RETURNS internal
+AS 'MODULE_PATHNAME'
+LANGUAGE 'C';
+
+CREATE FUNCTION gtsvector_union(bytea, internal)
+RETURNS _int4
+AS 'MODULE_PATHNAME'
+LANGUAGE 'C';
+
+CREATE FUNCTION gtsvector_same(gtsvector, gtsvector, internal)
+RETURNS internal
+AS 'MODULE_PATHNAME'
+LANGUAGE 'C';
+
+-- CREATE the OPERATOR class
+CREATE OPERATOR CLASS gist_tsvector_ops
+DEFAULT FOR TYPE tsvector USING gist
+AS
+        OPERATOR        1       @@ (tsvector, tsquery)  RECHECK ,
+        FUNCTION        1       gtsvector_consistent (gtsvector, internal, int4),
+        FUNCTION        2       gtsvector_union (bytea, internal),
+        FUNCTION        3       gtsvector_compress (internal),
+        FUNCTION        4       gtsvector_decompress (internal),
+        FUNCTION        5       gtsvector_penalty (internal, internal, internal),
+        FUNCTION        6       gtsvector_picksplit (internal, internal),
+        FUNCTION        7       gtsvector_same (gtsvector, gtsvector, internal),
+        STORAGE         gtsvector;
+
+
+--stat info
+CREATE TYPE statinfo 
+   as (word text, ndoc int4, nentry int4);
+
+--REATE FUNCTION tsstat_in(cstring)
+--RETURNS tsstat
+--AS 'MODULE_PATHNAME'
+--LANGUAGE 'C' with (isstrict);
+--
+--CREATE FUNCTION tsstat_out(tsstat)
+--RETURNS cstring
+--AS 'MODULE_PATHNAME'
+--LANGUAGE 'C' with (isstrict);
+--
+--CREATE TYPE tsstat (
+--        INTERNALLENGTH = -1,
+--        INPUT = tsstat_in,
+--        OUTPUT = tsstat_out,
+--        STORAGE = plain
+--);
+--
+--CREATE FUNCTION ts_accum(tsstat,tsvector)
+--RETURNS tsstat
+--AS 'MODULE_PATHNAME'
+--LANGUAGE 'C' with (isstrict);
+--
+--CREATE FUNCTION ts_accum_finish(tsstat)
+-- returns setof statinfo
+-- as 'MODULE_PATHNAME'
+-- language 'C'
+-- with (isstrict);
+--
+--CREATE AGGREGATE stat (
+-- BASETYPE=tsvector,
+-- SFUNC=ts_accum,
+-- STYPE=tsstat,
+-- FINALFUNC = ts_accum_finish,
+-- initcond = ''
+--); 
+
+CREATE FUNCTION stat(text)
+   returns setof statinfo
+   as 'MODULE_PATHNAME', 'ts_stat'
+   language 'C'
+   with (isstrict);
+
+--reset - just for debuging
+CREATE FUNCTION reset_tsearch()
+        returns void
+        as 'MODULE_PATHNAME'
+        language 'C'
+        with (isstrict);
+
+--get cover (debug for rank_cd)
+CREATE FUNCTION get_covers(tsvector,tsquery)
+        returns text
+        as 'MODULE_PATHNAME'
+        language 'C'
+        with (isstrict);
+
+
+--example of ISpell dictionary
+--update pg_ts_dict set dict_initoption='DictFile="/usr/local/share/ispell/russian.dict" ,AffFile ="/usr/local/share/ispell/russian.aff", StopFile="/usr/local/share/ispell/russian.stop"' where dict_id=4;
+--example of synonym dict
+--update pg_ts_dict set dict_initoption='/usr/local/share/ispell/english.syn' where dict_id=5;
+END;


diff --git a/contrib/tsearch2/tsvector.c b/contrib/tsearch2/tsvector.c

new file mode 100644 (file)

index 0000000..ff0794d


--- /dev/null
+++ b/contrib/tsearch2/tsvector.c
@@ -0,0 +1,804 @@
+/*
+ * In/Out definitions for tsvector type
+ * Internal structure:
+ * string of values, array of position lexem in string and it's length
+ * Teodor Sigaev 
+ */
+#include "postgres.h"
+
+#include "access/gist.h"
+#include "access/itup.h"
+#include "utils/elog.h"
+#include "utils/palloc.h"
+#include "utils/builtins.h"
+#include "storage/bufpage.h"
+#include "executor/spi.h"
+#include "commands/trigger.h"
+#include "nodes/pg_list.h"
+#include "catalog/namespace.h"
+
+#include "utils/pg_locale.h"
+
+#include              /* tolower */
+#include "tsvector.h"
+#include "query.h"
+#include "ts_cfg.h"
+#include "common.h"
+
+PG_FUNCTION_INFO_V1(tsvector_in);
+Datum      tsvector_in(PG_FUNCTION_ARGS);
+
+PG_FUNCTION_INFO_V1(tsvector_out);
+Datum      tsvector_out(PG_FUNCTION_ARGS);
+
+PG_FUNCTION_INFO_V1(to_tsvector);
+Datum      to_tsvector(PG_FUNCTION_ARGS);
+PG_FUNCTION_INFO_V1(to_tsvector_current);
+Datum      to_tsvector_current(PG_FUNCTION_ARGS);
+PG_FUNCTION_INFO_V1(to_tsvector_name);
+Datum      to_tsvector_name(PG_FUNCTION_ARGS);
+
+PG_FUNCTION_INFO_V1(tsearch2);
+Datum      tsearch2(PG_FUNCTION_ARGS);
+
+PG_FUNCTION_INFO_V1(tsvector_length);
+Datum      tsvector_length(PG_FUNCTION_ARGS);
+
+/*
+ * in/out text index type
+ */
+static int 
+comparePos(const void *a, const void *b) {
+   if ( ((WordEntryPos *) a)->pos == ((WordEntryPos *) b)->pos )
+       return 1;
+   return ( ((WordEntryPos *) a)->pos > ((WordEntryPos *) b)->pos ) ? 1 : -1;
+}
+
+static int
+uniquePos(WordEntryPos *a, int4 l) {
+   WordEntryPos *ptr, *res;
+
+   res=a;
+   if (l==1)
+       return l;
+
+   qsort((void *) a, l, sizeof(WordEntryPos), comparePos);
+
+   ptr = a + 1;
+   while (ptr - a < l) {
+       if ( ptr->pos != res->pos ) {
+           res++;
+           res->pos = ptr->pos;
+           res->weight = ptr->weight;
+           if ( res-a >= MAXNUMPOS-1 || res->pos == MAXENTRYPOS-1 )
+               break;
+       } else if ( ptr->weight > res->weight )
+           res->weight = ptr->weight;
+       ptr++;
+   }
+   return res + 1 - a;
+}
+
+static char *BufferStr;
+static int
+compareentry(const void *a, const void *b)
+{
+   if ( ((WordEntryIN *) a)->entry.len == ((WordEntryIN *) b)->entry.len)
+   {
+       return strncmp(
+                      &BufferStr[((WordEntryIN *) a)->entry.pos],
+                      &BufferStr[((WordEntryIN *) b)->entry.pos],
+                      ((WordEntryIN *) a)->entry.len);
+   }
+   return ( ((WordEntryIN *) a)->entry.len > ((WordEntryIN *) b)->entry.len ) ? 1 : -1;
+}
+
+static int
+uniqueentry(WordEntryIN * a, int4 l, char *buf, int4 *outbuflen)
+{
+   WordEntryIN  *ptr,
+              *res;
+
+   res = a;
+   if (l == 1) {
+       if ( a->entry.haspos ) {
+           *(uint16*)(a->pos) = uniquePos( &(a->pos[1]), *(uint16*)(a->pos));
+           *outbuflen = SHORTALIGN(res->entry.len) + (*(uint16*)(a->pos) +1 )*sizeof(WordEntryPos);
+       }
+       return l;
+   }
+
+   ptr = a + 1;
+   BufferStr = buf;
+   qsort((void *) a, l, sizeof(WordEntryIN), compareentry);
+
+   while (ptr - a < l)
+   {
+       if (!(ptr->entry.len == res->entry.len &&
+             strncmp(&buf[ptr->entry.pos], &buf[res->entry.pos], res->entry.len) == 0))
+       {
+           if ( res->entry.haspos ) {
+               *(uint16*)(res->pos) = uniquePos( &(res->pos[1]), *(uint16*)(res->pos));
+               *outbuflen += *(uint16*)(res->pos) * sizeof(WordEntryPos);
+           }
+           *outbuflen += SHORTALIGN(res->entry.len);
+           res++;
+           memcpy(res,ptr,sizeof(WordEntryIN));
+       } else if ( ptr->entry.haspos ){
+           if ( res->entry.haspos ) {
+               int4 len=*(uint16*)(ptr->pos) + 1 + *(uint16*)(res->pos);
+               res->pos=(WordEntryPos*)repalloc( res->pos, len*sizeof(WordEntryPos));
+               memcpy( &(res->pos[ *(uint16*)(res->pos) + 1 ]), 
+                   &(ptr->pos[1]), *(uint16*)(ptr->pos) * sizeof(WordEntryPos));
+               *(uint16*)(res->pos) += *(uint16*)(ptr->pos);
+               pfree( ptr->pos );
+           } else {
+               res->entry.haspos=1;
+               res->pos = ptr->pos;
+           }
+       }
+       ptr++;
+   }
+   if ( res->entry.haspos ) {
+       *(uint16*)(res->pos) = uniquePos( &(res->pos[1]), *(uint16*)(res->pos));
+       *outbuflen += *(uint16*)(res->pos) * sizeof(WordEntryPos);
+   }
+   *outbuflen += SHORTALIGN(res->entry.len);
+
+   return res + 1 - a;
+}
+
+#define WAITWORD   1
+#define WAITENDWORD 2
+#define WAITNEXTCHAR   3
+#define WAITENDCMPLX   4
+#define WAITPOSINFO    5
+#define INPOSINFO  6
+#define WAITPOSDELIM   7
+
+#define RESIZEPRSBUF \
+do { \
+   if ( state->curpos - state->word + 1 >= state->len ) \
+   { \
+       int4 clen = state->curpos - state->word; \
+       state->len *= 2; \
+       state->word = (char*)repalloc( (void*)state->word, state->len ); \
+       state->curpos = state->word + clen; \
+   } \
+} while (0)
+
+int4
+gettoken_tsvector(TI_IN_STATE * state)
+{
+   int4        oldstate = 0;
+
+   state->curpos = state->word;
+   state->state = WAITWORD;
+   state->alen=0;
+
+   while (1)
+   {
+       if (state->state == WAITWORD)
+       {
+           if (*(state->prsbuf) == '\0')
+               return 0;
+           else if (*(state->prsbuf) == '\'')
+               state->state = WAITENDCMPLX;
+           else if (*(state->prsbuf) == '\\')
+           {
+               state->state = WAITNEXTCHAR;
+               oldstate = WAITENDWORD;
+           }
+           else if (state->oprisdelim && ISOPERATOR(*(state->prsbuf)))
+               elog(ERROR, "Syntax error");
+           else if (*(state->prsbuf) != ' ')
+           {
+               *(state->curpos) = *(state->prsbuf);
+               state->curpos++;
+               state->state = WAITENDWORD;
+           }
+       }
+       else if (state->state == WAITNEXTCHAR)
+       {
+           if (*(state->prsbuf) == '\0')
+               elog(ERROR, "There is no escaped character");
+           else
+           {
+               RESIZEPRSBUF;
+               *(state->curpos) = *(state->prsbuf);
+               state->curpos++;
+               state->state = oldstate;
+           }
+       }
+       else if (state->state == WAITENDWORD)
+       {
+           if (*(state->prsbuf) == '\\')
+           {
+               state->state = WAITNEXTCHAR;
+               oldstate = WAITENDWORD;
+           }
+           else if (*(state->prsbuf) == ' ' || *(state->prsbuf) == '\0' ||
+                    (state->oprisdelim && ISOPERATOR(*(state->prsbuf))))
+           {
+               RESIZEPRSBUF;
+               if (state->curpos == state->word)
+                   elog(ERROR, "Syntax error");
+               *(state->curpos) = '\0';
+               return 1; 
+           } else if ( *(state->prsbuf) == ':' ) {
+               if (state->curpos == state->word)
+                   elog(ERROR, "Syntax error");
+               *(state->curpos) = '\0';
+               if ( state->oprisdelim )
+                   return 1;
+               else
+                   state->state = INPOSINFO;
+           }
+           else
+           {
+               RESIZEPRSBUF;
+               *(state->curpos) = *(state->prsbuf);
+               state->curpos++;
+           }
+       }
+       else if (state->state == WAITENDCMPLX)
+       {
+           if (*(state->prsbuf) == '\'')
+           {
+               RESIZEPRSBUF;
+               *(state->curpos) = '\0';
+               if (state->curpos == state->word)
+                   elog(ERROR, "Syntax error");
+               if ( state->oprisdelim ) {
+                   state->prsbuf++;
+                   return 1;
+               } else
+                   state->state = WAITPOSINFO;
+           }
+           else if (*(state->prsbuf) == '\\')
+           {
+               state->state = WAITNEXTCHAR;
+               oldstate = WAITENDCMPLX;
+           }
+           else if (*(state->prsbuf) == '\0')
+               elog(ERROR, "Syntax error");
+           else
+           {
+               RESIZEPRSBUF;
+               *(state->curpos) = *(state->prsbuf);
+               state->curpos++;
+           }
+       } else if (state->state == WAITPOSINFO) {
+           if ( *(state->prsbuf) == ':' )
+               state->state=INPOSINFO;
+           else
+               return 1;
+       } else if (state->state == INPOSINFO) {
+           if ( isdigit(*(state->prsbuf)) ) {
+               if ( state->alen==0 ) {
+                   state->alen=4;
+                   state->pos = (WordEntryPos*)palloc( sizeof(WordEntryPos)*state->alen );
+                   *(uint16*)(state->pos)=0;
+               } else if ( *(uint16*)(state->pos) +1 >= state->alen ) {
+                   state->alen *= 2; 
+                   state->pos = (WordEntryPos*)repalloc( state->pos, sizeof(WordEntryPos)*state->alen );
+               }
+               (  *(uint16*)(state->pos) )++;
+               state->pos[ *(uint16*)(state->pos) ].pos = LIMITPOS(atoi(state->prsbuf));
+               if ( state->pos[ *(uint16*)(state->pos) ].pos == 0 )
+                   elog(ERROR,"Wrong position info");
+               state->pos[ *(uint16*)(state->pos) ].weight = 0;
+               state->state = WAITPOSDELIM;
+           } else
+               elog(ERROR,"Syntax error");
+       } else if (state->state == WAITPOSDELIM) {
+           if ( *(state->prsbuf) == ',' ) {
+               state->state = INPOSINFO;
+           } else if ( tolower(*(state->prsbuf)) == 'a' || *(state->prsbuf)=='*' ) {
+               if ( state->pos[ *(uint16*)(state->pos) ].weight )
+                   elog(ERROR,"Syntax error");
+               state->pos[ *(uint16*)(state->pos) ].weight = 3;
+           } else if ( tolower(*(state->prsbuf)) == 'b' ) {
+               if ( state->pos[ *(uint16*)(state->pos) ].weight )
+                   elog(ERROR,"Syntax error");
+               state->pos[ *(uint16*)(state->pos) ].weight = 2;
+           } else if ( tolower(*(state->prsbuf)) == 'c' ) {
+               if ( state->pos[ *(uint16*)(state->pos) ].weight )
+                   elog(ERROR,"Syntax error");
+               state->pos[ *(uint16*)(state->pos) ].weight = 1;
+           } else if ( tolower(*(state->prsbuf)) == 'd' ) {
+               if ( state->pos[ *(uint16*)(state->pos) ].weight )
+                   elog(ERROR,"Syntax error");
+               state->pos[ *(uint16*)(state->pos) ].weight = 0;
+           } else if ( isspace(*(state->prsbuf)) || *(state->prsbuf) == '\0' ) {
+               return 1;
+           } else if ( !isdigit(*(state->prsbuf)) )
+               elog(ERROR,"Syntax error");
+       } else
+           elog(ERROR, "Inner bug :(");
+       state->prsbuf++;
+   }
+
+   return 0;
+}
+
+Datum
+tsvector_in(PG_FUNCTION_ARGS)
+{
+   char       *buf = PG_GETARG_CSTRING(0);
+   TI_IN_STATE state;
+   WordEntryIN  *arr;
+   WordEntry  *inarr;
+   int4        len = 0,
+               totallen = 64;
+   tsvector       *in;
+   char       *tmpbuf,
+              *cur;
+   int4        i,
+               buflen = 256;
+
+   state.prsbuf = buf;
+   state.len = 32;
+   state.word = (char *) palloc(state.len);
+   state.oprisdelim = false;
+
+   arr = (WordEntryIN *) palloc(sizeof(WordEntryIN) * totallen);
+   cur = tmpbuf = (char *) palloc(buflen);
+   while (gettoken_tsvector(&state))
+   {
+       if (len >= totallen)
+       {
+           totallen *= 2;
+           arr = (WordEntryIN *) repalloc((void *) arr, sizeof(WordEntryIN) * totallen);
+       }
+       while ((cur - tmpbuf) + (state.curpos - state.word) >= buflen)
+       {
+           int4        dist = cur - tmpbuf;
+
+           buflen *= 2;
+           tmpbuf = (char *) repalloc((void *) tmpbuf, buflen);
+           cur = tmpbuf + dist;
+       }
+       if (state.curpos - state.word >= MAXSTRLEN)
+           elog(ERROR, "Word is too long");
+       arr[len].entry.len= state.curpos - state.word;
+       if (cur - tmpbuf > MAXSTRPOS)
+           elog(ERROR, "Too long value");
+       arr[len].entry.pos=cur - tmpbuf;
+       memcpy((void *) cur, (void *) state.word, arr[len].entry.len);
+       cur += arr[len].entry.len;
+       if ( state.alen ) {
+           arr[len].entry.haspos=1;
+           arr[len].pos = state.pos;
+       } else
+           arr[len].entry.haspos=0;
+       len++;
+   }
+   pfree(state.word);
+
+   if ( len > 0 )
+       len = uniqueentry(arr, len, tmpbuf, &buflen);
+   totallen = CALCDATASIZE(len, buflen);
+   in = (tsvector *) palloc(totallen);
+   memset(in,0,totallen);
+   in->len = totallen;
+   in->size = len;
+   cur = STRPTR(in);
+   inarr = ARRPTR(in);
+   for (i = 0; i < len; i++)
+   {
+       memcpy((void *) cur, (void *) &tmpbuf[arr[i].entry.pos], arr[i].entry.len);
+       arr[i].entry.pos=cur - STRPTR(in);
+       cur += SHORTALIGN(arr[i].entry.len);
+       if ( arr[i].entry.haspos ) {
+           memcpy( cur, arr[i].pos, (*(uint16*)arr[i].pos + 1) * sizeof(WordEntryPos));
+           cur +=  (*(uint16*)arr[i].pos + 1) * sizeof(WordEntryPos);
+           pfree( arr[i].pos ); 
+       }
+       memcpy( &(inarr[i]), &(arr[i].entry), sizeof(WordEntry) );
+   }
+   pfree(tmpbuf);
+   pfree(arr);
+   PG_RETURN_POINTER(in);
+}
+
+Datum
+tsvector_length(PG_FUNCTION_ARGS)
+{
+   tsvector       *in = (tsvector *) PG_DETOAST_DATUM(PG_GETARG_DATUM(0));
+   int4        ret = in->size;
+
+   PG_FREE_IF_COPY(in, 0);
+   PG_RETURN_INT32(ret);
+}
+
+Datum
+tsvector_out(PG_FUNCTION_ARGS)
+{
+   tsvector       *out = (tsvector *) PG_DETOAST_DATUM(PG_GETARG_DATUM(0));
+   char       *outbuf;
+   int4        i,
+               j,
+               lenbuf = 0, pp;
+   WordEntry  *ptr = ARRPTR(out);
+   char       *curin,
+              *curout;
+
+       lenbuf=out->size * 2 /* '' */ + out->size - 1 /* space */ + 2 /*\0*/;
+       for (i = 0; i < out->size; i++) {
+               lenbuf += ptr[i].len*2 /*for escape */;
+               if ( ptr[i].haspos )
+                       lenbuf += 7*POSDATALEN(out, &(ptr[i]));
+       }
+
+   curout = outbuf = (char *) palloc(lenbuf);
+   for (i = 0; i < out->size; i++)
+   {
+       curin = STRPTR(out)+ptr->pos;
+       if (i != 0)
+           *curout++ = ' ';
+       *curout++ = '\'';
+       j = ptr->len;
+       while (j--)
+       {
+           if (*curin == '\'')
+           {
+               int4        pos = curout - outbuf;
+
+               outbuf = (char *) repalloc((void *) outbuf, ++lenbuf);
+               curout = outbuf + pos;
+               *curout++ = '\\';
+           }
+           *curout++ = *curin++;
+       }
+       *curout++ = '\'';
+       if ( (pp=POSDATALEN(out,ptr)) != 0 ) {
+           WordEntryPos *wptr;
+           *curout++ = ':';
+           wptr=POSDATAPTR(out,ptr);
+           while(pp) {
+               sprintf(curout,"%d",wptr->pos);
+               curout=strchr(curout,'\0');
+               switch( wptr->weight ) {
+                   case 3:   *curout++ = 'A'; break;
+                   case 2:   *curout++ = 'B'; break;
+                   case 1:   *curout++ = 'C'; break;
+                   case 0: 
+                   default: break;
+               }
+               if ( pp>1 )     *curout++ = ',';
+               pp--; wptr++;
+           }
+       }
+       ptr++;
+   }
+   *curout='\0';
+   outbuf[lenbuf - 1] = '\0';
+   PG_FREE_IF_COPY(out, 0);
+   PG_RETURN_POINTER(outbuf);
+}
+
+static int
+compareWORD(const void *a, const void *b)
+{
+   if (((WORD *) a)->len == ((WORD *) b)->len) {
+       int res = strncmp(
+                      ((WORD *) a)->word,
+                      ((WORD *) b)->word,
+                      ((WORD *) b)->len);
+       if ( res==0 ) 
+           return ( ((WORD *) a)->pos.pos > ((WORD *) b)->pos.pos ) ? 1 : -1;
+       return res;
+   }
+   return (((WORD *) a)->len > ((WORD *) b)->len) ? 1 : -1;
+}
+
+static int
+uniqueWORD(WORD * a, int4 l)
+{
+   WORD       *ptr,
+              *res;
+   int tmppos;
+
+   if (l == 1) {
+       tmppos=LIMITPOS(a->pos.pos);
+       a->alen=2;
+       a->pos.apos=(uint16*)palloc( sizeof(uint16)*a->alen );
+       a->pos.apos[0]=1;
+       a->pos.apos[1]=tmppos;
+       return l;
+   }
+
+   res = a;
+   ptr = a + 1;
+
+   qsort((void *) a, l, sizeof(WORD), compareWORD);
+   tmppos=LIMITPOS(a->pos.pos);
+   a->alen=2;
+   a->pos.apos=(uint16*)palloc( sizeof(uint16)*a->alen );
+   a->pos.apos[0]=1;
+   a->pos.apos[1]=tmppos;
+
+   while (ptr - a < l)
+   {
+       if (!(ptr->len == res->len &&
+             strncmp(ptr->word, res->word, res->len) == 0))
+       {
+           res++;
+           res->len = ptr->len;
+           res->word = ptr->word;
+           tmppos=LIMITPOS(ptr->pos.pos);
+           res->alen=2;
+           res->pos.apos=(uint16*)palloc( sizeof(uint16)*res->alen );
+           res->pos.apos[0]=1;
+           res->pos.apos[1]=tmppos;
+       } else {
+           pfree(ptr->word);
+           if ( res->pos.apos[0] < MAXNUMPOS-1 && res->pos.apos[ res->pos.apos[0] ] != MAXENTRYPOS-1 ) {
+               if ( res->pos.apos[0]+1 >= res->alen ) {
+                   res->alen*=2;
+                   res->pos.apos=(uint16*)repalloc( res->pos.apos, sizeof(uint16)*res->alen );
+               }
+               res->pos.apos[ res->pos.apos[0]+1 ] = LIMITPOS(ptr->pos.pos);
+               res->pos.apos[0]++; 
+           }
+       }
+       ptr++;
+   }
+
+   return res + 1 - a;
+}
+
+/*
+ * make value of tsvector
+ */
+static tsvector *
+makevalue(PRSTEXT * prs)
+{
+   int4        i,j,
+               lenstr = 0,
+               totallen;
+   tsvector       *in;
+   WordEntry  *ptr;
+   char       *str,
+              *cur;
+
+   prs->curwords = uniqueWORD(prs->words, prs->curwords);
+   for (i = 0; i < prs->curwords; i++) {
+       lenstr += SHORTALIGN(prs->words[i].len);
+
+       if ( prs->words[i].alen )
+           lenstr += sizeof(uint16) + prs->words[i].pos.apos[0] * sizeof(WordEntryPos);
+   }
+
+   totallen = CALCDATASIZE(prs->curwords, lenstr);
+   in = (tsvector *) palloc(totallen);
+   memset(in,0,totallen);  
+   in->len = totallen;
+   in->size = prs->curwords;
+
+   ptr = ARRPTR(in);
+   cur = str = STRPTR(in);
+   for (i = 0; i < prs->curwords; i++)
+   {
+       ptr->len = prs->words[i].len;
+       if (cur - str > MAXSTRPOS)
+           elog(ERROR, "Value is too big");
+       ptr->pos= cur - str;
+       memcpy((void *) cur, (void *) prs->words[i].word, prs->words[i].len);
+       pfree(prs->words[i].word);
+       cur += SHORTALIGN(prs->words[i].len);
+       if ( prs->words[i].alen ) {
+           WordEntryPos *wptr;
+           
+           ptr->haspos=1;
+           *(uint16*)cur = prs->words[i].pos.apos[0];
+           wptr=POSDATAPTR(in,ptr);
+           for(j=0;j<*(uint16*)cur;j++) {
+               wptr[j].weight=0;
+               wptr[j].pos=prs->words[i].pos.apos[j+1];
+           }
+           cur += sizeof(uint16) + prs->words[i].pos.apos[0] * sizeof(WordEntryPos);
+           pfree(prs->words[i].pos.apos);
+       } else
+           ptr->haspos=0;
+       ptr++;
+   }
+   pfree(prs->words);
+   return in;
+}
+
+
+Datum
+to_tsvector(PG_FUNCTION_ARGS)
+{
+   text       *in = PG_GETARG_TEXT_P(1);
+   PRSTEXT     prs;
+   tsvector       *out = NULL;
+   TSCfgInfo *cfg=findcfg(PG_GETARG_INT32(0)); 
+
+   prs.lenwords = 32;
+   prs.curwords = 0;
+   prs.pos = 0;
+   prs.words = (WORD *) palloc(sizeof(WORD) * prs.lenwords);
+   
+   parsetext_v2(cfg, &prs, VARDATA(in), VARSIZE(in) - VARHDRSZ);
+   PG_FREE_IF_COPY(in, 1);
+
+   if (prs.curwords)
+       out = makevalue(&prs);
+   else {
+       pfree(prs.words);
+       out = palloc(CALCDATASIZE(0,0));
+       out->len = CALCDATASIZE(0,0);
+       out->size = 0;
+   } 
+   PG_RETURN_POINTER(out);
+}
+
+Datum
+to_tsvector_name(PG_FUNCTION_ARGS) {
+   text       *cfg=PG_GETARG_TEXT_P(0);
+   Datum res = DirectFunctionCall3(
+       to_tsvector,
+       Int32GetDatum( name2id_cfg( cfg ) ),
+       PG_GETARG_DATUM(1),
+       (Datum)0
+   );
+   PG_FREE_IF_COPY(cfg,0);
+   PG_RETURN_DATUM(res);   
+}
+
+Datum
+to_tsvector_current(PG_FUNCTION_ARGS) {
+   Datum res = DirectFunctionCall3(
+       to_tsvector,
+       Int32GetDatum( get_currcfg() ),
+       PG_GETARG_DATUM(0),
+       (Datum)0
+   );
+   PG_RETURN_DATUM(res);   
+}
+
+static Oid
+findFunc(char *fname) {
+   FuncCandidateList clist,ptr;
+   Oid funcid = InvalidOid;
+   List *names=makeList1(makeString(fname));
+
+   ptr = clist = FuncnameGetCandidates(names, 1);
+   freeList(names);
+
+   if ( !ptr )
+       return funcid;
+
+   while(ptr) {
+       if ( ptr->args[0] == TEXTOID && funcid == InvalidOid )
+           funcid=ptr->oid;
+       clist=ptr->next;
+       pfree(ptr);
+       ptr=clist;
+   }
+
+   return funcid;
+}
+
+/*
+ * Trigger
+ */
+Datum
+tsearch2(PG_FUNCTION_ARGS)
+{
+   TriggerData *trigdata;
+   Trigger    *trigger;
+   Relation    rel;
+   HeapTuple   rettuple = NULL;
+   TSCfgInfo *cfg=findcfg(get_currcfg()); 
+   int         numidxattr,
+               i;
+   PRSTEXT     prs;
+   Datum       datum = (Datum) 0;
+   Oid     funcoid = InvalidOid;
+
+   if (!CALLED_AS_TRIGGER(fcinfo))
+       elog(ERROR, "TSearch: Not fired by trigger manager");
+
+   trigdata = (TriggerData *) fcinfo->context;
+   if (TRIGGER_FIRED_FOR_STATEMENT(trigdata->tg_event))
+       elog(ERROR, "TSearch: Can't process STATEMENT events");
+   if (TRIGGER_FIRED_AFTER(trigdata->tg_event))
+       elog(ERROR, "TSearch: Must be fired BEFORE event");
+
+   if (TRIGGER_FIRED_BY_INSERT(trigdata->tg_event))
+       rettuple = trigdata->tg_trigtuple;
+   else if (TRIGGER_FIRED_BY_UPDATE(trigdata->tg_event))
+       rettuple = trigdata->tg_newtuple;
+   else
+       elog(ERROR, "TSearch: Unknown event");
+
+   trigger = trigdata->tg_trigger;
+   rel = trigdata->tg_relation;
+
+   if (trigger->tgnargs < 2)
+       elog(ERROR, "TSearch: format tsearch2(tsvector_field, text_field1,...)");
+
+   numidxattr = SPI_fnumber(rel->rd_att, trigger->tgargs[0]);
+   if (numidxattr == SPI_ERROR_NOATTRIBUTE)
+       elog(ERROR, "TSearch: Can not find tsvector_field");
+
+   prs.lenwords = 32;
+   prs.curwords = 0;
+   prs.pos = 0;
+   prs.words = (WORD *) palloc(sizeof(WORD) * prs.lenwords);
+
+   /* find all words in indexable column */
+   for (i = 1; i < trigger->tgnargs; i++)
+   {
+       int         numattr;
+       Oid         oidtype;
+       Datum       txt_toasted;
+       bool        isnull;
+       text       *txt;
+
+       numattr = SPI_fnumber(rel->rd_att, trigger->tgargs[i]);
+       if (numattr == SPI_ERROR_NOATTRIBUTE)
+       {
+           funcoid=findFunc(trigger->tgargs[i]);
+           if ( funcoid==InvalidOid )
+               elog(ERROR,"TSearch: can't find function or field '%s'",trigger->tgargs[i]);
+           continue;
+       }
+       oidtype = SPI_gettypeid(rel->rd_att, numattr);
+       /* We assume char() and varchar() are binary-equivalent to text */
+       if (!(oidtype == TEXTOID ||
+             oidtype == VARCHAROID ||
+             oidtype == BPCHAROID))
+       {
+           elog(WARNING, "TSearch: '%s' is not of character type",
+                trigger->tgargs[i]);
+           continue;
+       }
+       txt_toasted = SPI_getbinval(rettuple, rel->rd_att, numattr, &isnull);
+       if (isnull)
+           continue;
+
+       if ( funcoid!=InvalidOid ) {
+           text *txttmp = (text *) DatumGetPointer( OidFunctionCall1(
+               funcoid,
+               PointerGetDatum(txt_toasted)
+           ));
+           txt = (text *) DatumGetPointer(PG_DETOAST_DATUM(PointerGetDatum(txttmp)));
+           if ( txt == txttmp )
+               txt_toasted = PointerGetDatum(txt);
+       } else
+            txt = (text *) DatumGetPointer(PG_DETOAST_DATUM(PointerGetDatum(txt_toasted)));
+
+       parsetext_v2(cfg, &prs, VARDATA(txt), VARSIZE(txt) - VARHDRSZ);
+       if (txt != (text*)DatumGetPointer(txt_toasted) )
+           pfree(txt);
+   }
+
+   /* make tsvector value */
+   if (prs.curwords)
+   {
+       datum = PointerGetDatum(makevalue(&prs));
+       rettuple = SPI_modifytuple(rel, rettuple, 1, &numidxattr,
+                                  &datum, NULL);
+       pfree(DatumGetPointer(datum));
+   }
+   else
+   {
+       tsvector *out = palloc(CALCDATASIZE(0,0));
+       out->len = CALCDATASIZE(0,0);
+       out->size = 0;
+       datum = PointerGetDatum(out);
+       pfree(prs.words);
+       rettuple = SPI_modifytuple(rel, rettuple, 1, &numidxattr,
+                                  &datum, NULL);
+   }
+
+   if (rettuple == NULL)
+       elog(ERROR, "TSearch: %d returned by SPI_modifytuple", SPI_result);
+
+   return PointerGetDatum(rettuple);
+}


diff --git a/contrib/tsearch2/tsvector.h b/contrib/tsearch2/tsvector.h

new file mode 100644 (file)

index 0000000..31e6a4b


--- /dev/null
+++ b/contrib/tsearch2/tsvector.h
@@ -0,0 +1,71 @@
+#ifndef __TXTIDX_H__
+#define __TXTIDX_H__
+
+/*
+#define TXTIDX_DEBUG
+*/
+
+#include "postgres.h"
+
+#include "access/gist.h"
+#include "access/itup.h"
+#include "utils/elog.h"
+#include "utils/palloc.h"
+#include "utils/builtins.h"
+#include "storage/bufpage.h"
+
+typedef struct {
+   uint32
+       haspos:1,
+       len:11, /* MAX 2Kb */
+       pos:20; /* MAX 1Mb */
+}  WordEntry;
+#define MAXSTRLEN ( 1<<11 )
+#define MAXSTRPOS ( 1<<20 )
+
+typedef struct {
+   uint16
+       weight:2,
+       pos:14;
+} WordEntryPos;
+#define MAXENTRYPOS    (1<<14)
+#define MAXNUMPOS  256
+#define LIMITPOS(x)    ( ( (x) >= MAXENTRYPOS ) ? (MAXENTRYPOS-1) : (x) )
+
+typedef struct
+{
+   int4        len;
+   int4        size;
+   char        data[1];
+}  tsvector;
+
+#define DATAHDRSIZE (sizeof(int4)*2)
+#define CALCDATASIZE(x, lenstr) ( x * sizeof(WordEntry) + DATAHDRSIZE + lenstr )
+#define ARRPTR(x)  ( (WordEntry*) ( (char*)x + DATAHDRSIZE ) )
+#define STRPTR(x)  ( (char*)x + DATAHDRSIZE + ( sizeof(WordEntry) * ((tsvector*)x)->size ) )
+#define STRSIZE(x) ( ((tsvector*)x)->len - DATAHDRSIZE - ( sizeof(WordEntry) * ((tsvector*)x)->size ) )
+#define _POSDATAPTR(x,e)   (STRPTR(x)+((WordEntry*)(e))->pos+SHORTALIGN(((WordEntry*)(e))->len))
+#define POSDATALEN(x,e) ( ( ((WordEntry*)(e))->haspos ) ? (*(uint16*)_POSDATAPTR(x,e)) : 0 ) 
+#define POSDATAPTR(x,e)    ( (WordEntryPos*)( _POSDATAPTR(x,e)+sizeof(uint16) ) )
+
+
+typedef struct {
+   WordEntry   entry;
+   WordEntryPos    *pos;
+}  WordEntryIN;
+
+typedef struct
+{
+   char       *prsbuf;
+   char       *word;
+   char       *curpos;
+   int4        len;
+   int4        state;
+   int4        alen;
+   WordEntryPos    *pos;
+   bool        oprisdelim;
+}  TI_IN_STATE;
+
+int4       gettoken_tsvector(TI_IN_STATE * state);
+
+#endif


diff --git a/contrib/tsearch2/tsvector_op.c b/contrib/tsearch2/tsvector_op.c

new file mode 100644 (file)

index 0000000..3f38014


--- /dev/null
+++ b/contrib/tsearch2/tsvector_op.c
@@ -0,0 +1,264 @@
+/*
+ * Operations for tsvector type
+ * Teodor Sigaev 
+ */
+#include "postgres.h"
+
+#include "access/gist.h"
+#include "access/itup.h"
+#include "utils/elog.h"
+#include "utils/palloc.h"
+#include "utils/builtins.h"
+#include "storage/bufpage.h"
+#include "executor/spi.h"
+#include "commands/trigger.h"
+#include "nodes/pg_list.h"
+#include "catalog/namespace.h"
+
+#include "utils/pg_locale.h"
+
+#include              /* tolower */
+#include "tsvector.h"
+#include "query.h"
+#include "ts_cfg.h"
+#include "common.h"
+
+PG_FUNCTION_INFO_V1(strip);
+Datum      strip(PG_FUNCTION_ARGS);
+
+PG_FUNCTION_INFO_V1(setweight);
+Datum      setweight(PG_FUNCTION_ARGS);
+
+PG_FUNCTION_INFO_V1(concat);
+Datum      concat(PG_FUNCTION_ARGS);
+
+Datum
+strip(PG_FUNCTION_ARGS)
+{
+   tsvector       *in = (tsvector *) PG_DETOAST_DATUM(PG_GETARG_DATUM(0));
+   tsvector    *out;
+   int i,len=0;
+   WordEntry *arrin=ARRPTR(in), *arrout;
+   char *cur;
+
+   for(i=0;isize;i++) 
+       len += SHORTALIGN( arrin[i].len );
+
+   len = CALCDATASIZE(in->size, len);
+   out=(tsvector*)palloc(len);
+   memset(out,0,len);
+   out->len=len;
+   out->size=in->size;
+   arrout=ARRPTR(out);
+   cur=STRPTR(out);
+   for(i=0;isize;i++) {
+       memcpy(cur, STRPTR(in)+arrin[i].pos, arrin[i].len);
+       arrout[i].haspos = 0;
+       arrout[i].len = arrin[i].len;
+       arrout[i].pos = cur - STRPTR(out);
+       cur += SHORTALIGN( arrout[i].len );
+   }
+       
+   PG_FREE_IF_COPY(in, 0);
+   PG_RETURN_POINTER(out);
+}
+
+Datum
+setweight(PG_FUNCTION_ARGS)
+{
+   tsvector       *in = (tsvector *) PG_DETOAST_DATUM(PG_GETARG_DATUM(0));
+   char       cw = PG_GETARG_CHAR(1);
+   tsvector    *out;
+   int i,j;
+   WordEntry *entry;
+   WordEntryPos *p;
+   int w=0;
+
+   switch(tolower(cw)) {
+       case 'a': w=3; break;
+       case 'b': w=2; break;
+       case 'c': w=1; break;
+       case 'd': w=0; break;
+       default: elog(ERROR,"Unknown weight");
+   }
+
+   out=(tsvector*)palloc(in->len);
+   memcpy(out,in,in->len);
+   entry=ARRPTR(out);
+   i=out->size;    
+   while(i--) {
+       if ( (j=POSDATALEN(out,entry)) != 0 ) {
+           p=POSDATAPTR(out,entry);
+           while(j--) {
+               p->weight=w;
+               p++;
+           }
+       }
+       entry++;
+   }
+       
+   PG_FREE_IF_COPY(in, 0);
+   PG_RETURN_POINTER(out);
+}
+
+static int
+compareEntry(char *ptra, WordEntry* a, char *ptrb, WordEntry* b)
+{
+        if ( a->len == b->len)
+        {
+                return strncmp(
+                                           ptra + a->pos,
+                                           ptrb + b->pos,
+                                           a->len);
+        }
+        return ( a->len > b->len ) ? 1 : -1;
+}
+
+static int4
+add_pos(tsvector *src, WordEntry *srcptr, tsvector *dest, WordEntry *destptr, int4 maxpos ) {
+   uint16 *clen = (uint16*)_POSDATAPTR(dest,destptr);
+   int i;
+   uint16 slen = POSDATALEN(src, srcptr), startlen;
+   WordEntryPos *spos=POSDATAPTR(src, srcptr), *dpos=POSDATAPTR(dest,destptr);
+
+   if ( ! destptr->haspos ) 
+       *clen=0;
+
+   startlen = *clen;
+   for(i=0; i
+       dpos[ *clen ].weight = spos[i].weight; 
+       dpos[ *clen ].pos    = LIMITPOS(spos[i].pos + maxpos);
+       (*clen)++;
+   }
+
+   if ( *clen != startlen )
+       destptr->haspos=1; 
+   return  *clen - startlen;
+}
+
+
+Datum
+concat(PG_FUNCTION_ARGS) {
+   tsvector       *in1 = (tsvector *) PG_DETOAST_DATUM(PG_GETARG_DATUM(0));
+   tsvector       *in2 = (tsvector *) PG_DETOAST_DATUM(PG_GETARG_DATUM(1));
+   tsvector       *out;
+   WordEntry *ptr;
+   WordEntry *ptr1,*ptr2;
+   WordEntryPos *p;
+   int maxpos=0,i,j,i1,i2;
+   char *cur;
+   char *data,*data1,*data2;
+
+   ptr=ARRPTR(in1);
+   i=in1->size;
+   while(i--) {
+       if ( (j=POSDATALEN(in1,ptr)) != 0 ) {
+           p=POSDATAPTR(in1,ptr);
+           while(j--) {
+               if ( p->pos > maxpos ) 
+                   maxpos = p->pos;
+               p++;
+           }
+       }
+       ptr++;
+   }
+   
+   ptr1=ARRPTR(in1); ptr2=ARRPTR(in2);
+   data1=STRPTR(in1); data2=STRPTR(in2);
+   i1=in1->size;   i2=in2->size;
+   out=(tsvector*)palloc( in1->len + in2->len );
+   memset(out,0,in1->len + in2->len);
+   out->len = in1->len + in2->len;
+   out->size = in1->size + in2->size;
+   data=cur=STRPTR(out);
+   ptr=ARRPTR(out);
+   while( i1 && i2 ) {
+       int cmp=compareEntry(data1,ptr1,data2,ptr2);
+       if ( cmp < 0 ) { /* in1 first */
+           ptr->haspos = ptr1->haspos;
+           ptr->len = ptr1->len;
+           memcpy( cur, data1 + ptr1->pos, ptr1->len );
+           ptr->pos = cur - data; 
+           cur+=SHORTALIGN(ptr1->len);
+           if ( ptr->haspos ) {
+               memcpy(cur, _POSDATAPTR(in1, ptr1), POSDATALEN(in1, ptr1)*sizeof(WordEntryPos) + sizeof(uint16));
+               cur+=POSDATALEN(in1, ptr1)*sizeof(WordEntryPos) + sizeof(uint16);
+           }
+           ptr++; ptr1++; i1--;
+       } else if ( cmp>0 ) { /* in2 first */ 
+           ptr->haspos = ptr2->haspos;
+           ptr->len = ptr2->len;
+           memcpy( cur, data2 + ptr2->pos, ptr2->len );
+           ptr->pos = cur - data; 
+           cur+=SHORTALIGN(ptr2->len);
+           if ( ptr->haspos ) {
+               int addlen = add_pos(in2, ptr2, out, ptr, maxpos );
+               if ( addlen == 0 )
+                   ptr->haspos=0;
+               else
+                   cur += addlen*sizeof(WordEntryPos) + sizeof(uint16); 
+           }
+           ptr++; ptr2++; i2--;
+       } else {
+           ptr->haspos = ptr1->haspos | ptr2->haspos;
+           ptr->len = ptr1->len;
+           memcpy( cur, data1 + ptr1->pos, ptr1->len );
+           ptr->pos = cur - data; 
+           cur+=SHORTALIGN(ptr1->len);
+           if ( ptr->haspos ) {
+               if ( ptr1->haspos ) {
+                   memcpy(cur, _POSDATAPTR(in1, ptr1), POSDATALEN(in1, ptr1)*sizeof(WordEntryPos) + sizeof(uint16));
+                   cur+=POSDATALEN(in1, ptr1)*sizeof(WordEntryPos) + sizeof(uint16);
+                   if ( ptr2->haspos )
+                       cur += add_pos(in2, ptr2, out, ptr, maxpos )*sizeof(WordEntryPos);
+               } else if ( ptr2->haspos ) {
+                   int addlen = add_pos(in2, ptr2, out, ptr, maxpos );
+                   if ( addlen == 0 )
+                       ptr->haspos=0;
+                   else
+                       cur += addlen*sizeof(WordEntryPos) + sizeof(uint16); 
+               }
+           }
+           ptr++; ptr1++; ptr2++; i1--; i2--;
+       }
+   }
+
+   while(i1) {
+       ptr->haspos = ptr1->haspos;
+       ptr->len = ptr1->len;
+       memcpy( cur, data1 + ptr1->pos, ptr1->len );
+       ptr->pos = cur - data; 
+       cur+=SHORTALIGN(ptr1->len);
+       if ( ptr->haspos ) {
+           memcpy(cur, _POSDATAPTR(in1, ptr1), POSDATALEN(in1, ptr1)*sizeof(WordEntryPos) + sizeof(uint16));
+           cur+=POSDATALEN(in1, ptr1)*sizeof(WordEntryPos) + sizeof(uint16);
+       }
+       ptr++; ptr1++; i1--;
+   }
+
+   while(i2) {
+       ptr->haspos = ptr2->haspos;
+       ptr->len = ptr2->len;
+       memcpy( cur, data2 + ptr2->pos, ptr2->len );
+       ptr->pos = cur - data; 
+       cur+=SHORTALIGN(ptr2->len);
+       if ( ptr->haspos ) {
+           int addlen = add_pos(in2, ptr2, out, ptr, maxpos );
+           if ( addlen == 0 )
+               ptr->haspos=0;
+           else
+               cur += addlen*sizeof(WordEntryPos) + sizeof(uint16); 
+       }
+       ptr++; ptr2++; i2--;
+   }
+   
+   out->size=ptr-ARRPTR(out);
+   out->len = CALCDATASIZE( out->size, cur-data );
+   if ( data != STRPTR(out) )
+       memmove( STRPTR(out), data, cur-data );
+
+   PG_FREE_IF_COPY(in1, 0);
+   PG_FREE_IF_COPY(in2, 1);
+   PG_RETURN_POINTER(out);
+}
+


diff --git a/contrib/tsearch2/untsearch.sql.in b/contrib/tsearch2/untsearch.sql.in

new file mode 100644 (file)

index 0000000..a4fe145


--- /dev/null
+++ b/contrib/tsearch2/untsearch.sql.in
@@ -0,0 +1,62 @@
+BEGIN;
+
+--Be careful !!!
+--script drops all indices, triggers and columns with types defined
+--in tsearch2.sql
+
+
+DROP OPERATOR CLASS gist_tsvector_ops USING gist CASCADE;
+
+
+DROP OPERATOR || (tsvector, tsvector);
+DROP OPERATOR @@ (tsvector, tsquery);
+DROP OPERATOR @@ (tsquery, tsvector);
+
+DROP AGGREGATE stat(tsvector);
+
+DROP TABLE pg_ts_dict;
+DROP TABLE pg_ts_parser;
+DROP TABLE pg_ts_cfg;
+DROP TABLE pg_ts_cfgmap;
+
+DROP TYPE tokentype CASCADE;
+DROP TYPE tokenout CASCADE;
+DROP TYPE tsvector CASCADE;
+DROP TYPE tsquery CASCADE;
+DROP TYPE gtsvector CASCADE;
+DROP TYPE tsstat CASCADE;
+DROP TYPE statinfo CASCADE;
+
+DROP FUNCTION lexize(oid, text) ;
+DROP FUNCTION lexize(text, text);
+DROP FUNCTION lexize(text);
+DROP FUNCTION set_curdict(int);
+DROP FUNCTION set_curdict(text);
+DROP FUNCTION dex_init(text);
+DROP FUNCTION dex_lexize(internal,internal,int4);
+DROP FUNCTION snb_en_init(text);
+DROP FUNCTION snb_lexize(internal,internal,int4);
+DROP FUNCTION snb_ru_init(text);
+DROP FUNCTION spell_init(text);
+DROP FUNCTION spell_lexize(internal,internal,int4);
+DROP FUNCTION syn_init(text);
+DROP FUNCTION syn_lexize(internal,internal,int4);
+DROP FUNCTION set_curprs(int);
+DROP FUNCTION set_curprs(text);
+DROP FUNCTION prsd_start(internal,int4);
+DROP FUNCTION prsd_getlexeme(internal,internal,internal);
+DROP FUNCTION prsd_end(internal);
+DROP FUNCTION prsd_lextype(internal);
+DROP FUNCTION prsd_headline(internal,internal,internal);
+DROP FUNCTION set_curcfg(int);
+DROP FUNCTION set_curcfg(text);
+DROP FUNCTION show_curcfg();
+DROP FUNCTION gtsvector_compress(internal);
+DROP FUNCTION gtsvector_decompress(internal);
+DROP FUNCTION gtsvector_penalty(internal,internal,internal);
+DROP FUNCTION gtsvector_picksplit(internal, internal);
+DROP FUNCTION gtsvector_union(bytea, internal);
+DROP FUNCTION reset_tsearch();
+DROP FUNCTION tsearch2() CASCADE;
+
+END;


diff --git a/contrib/tsearch2/wordparser/deflex.c b/contrib/tsearch2/wordparser/deflex.c

new file mode 100644 (file)

index 0000000..ea596c5


--- /dev/null
+++ b/contrib/tsearch2/wordparser/deflex.c
@@ -0,0 +1,56 @@
+#include "deflex.h"
+
+const char *lex_descr[]={
+   "",
+   "Latin word",
+   "Non-latin word",
+   "Word",
+   "Email",
+   "URL",
+   "Host",
+   "Scientific notation",
+   "VERSION",
+   "Part of hyphenated word",
+   "Non-latin part of hyphenated word",
+   "Latin part of hyphenated word",
+   "Space symbols",
+   "HTML Tag",
+   "HTTP head",
+   "Hyphenated word",
+   "Latin hyphenated word",
+   "Non-latin hyphenated word",
+   "URI",
+   "File or path name",
+   "Decimal notation",
+   "Signed integer",
+   "Unsigned integer",
+   "HTML Entity"
+};
+
+const char *tok_alias[]={
+   "",
+   "lword",
+   "nlword",
+   "word",
+   "email",
+   "url",
+   "host",
+   "sfloat",
+   "version",
+   "part_hword",
+   "nlpart_hword",
+   "lpart_hword",
+   "blank",
+   "tag",
+   "http",
+   "hword",
+   "lhword",
+   "nlhword",
+   "uri",
+   "file",
+   "float",
+   "int",
+   "uint",
+   "entity"
+};
+


diff --git a/contrib/tsearch2/wordparser/deflex.h b/contrib/tsearch2/wordparser/deflex.h

new file mode 100644 (file)

index 0000000..651d1f9


--- /dev/null
+++ b/contrib/tsearch2/wordparser/deflex.h
@@ -0,0 +1,34 @@
+#ifndef __DEFLEX_H__
+#define __DEFLEX_H__
+
+/* rememder !!!! */
+#define LASTNUM        23
+
+#define LATWORD        1
+#define CYRWORD        2
+#define UWORD      3
+#define EMAIL      4
+#define FURL       5
+#define HOST       6
+#define SCIENTIFIC 7
+#define VERSIONNUMBER  8
+#define PARTHYPHENWORD 9
+#define CYRPARTHYPHENWORD  10
+#define LATPARTHYPHENWORD  11
+#define SPACE      12
+#define TAG            13
+#define HTTP       14
+#define HYPHENWORD 15
+#define LATHYPHENWORD  16
+#define CYRHYPHENWORD  17
+#define URI        18
+#define FILEPATH   19
+#define DECIMAL        20
+#define SIGNEDINT  21
+#define UNSIGNEDINT 22
+#define HTMLENTITY 23
+
+extern const char *lex_descr[];
+extern const char *tok_alias[];
+
+#endif


diff --git a/contrib/tsearch2/wordparser/parser.h b/contrib/tsearch2/wordparser/parser.h

new file mode 100644 (file)

index 0000000..55cf005


--- /dev/null
+++ b/contrib/tsearch2/wordparser/parser.h
@@ -0,0 +1,11 @@
+#ifndef __PARSER_H__
+#define __PARSER_H__
+
+char      *token;
+int            tokenlen;
+int            tsearch2_yylex(void);
+void       start_parse_str(char *, int);
+void       start_parse_fh(FILE *, int);
+void       end_parse(void);
+
+#endif


diff --git a/contrib/tsearch2/wordparser/parser.l b/contrib/tsearch2/wordparser/parser.l

new file mode 100644 (file)

index 0000000..49824f5


--- /dev/null
+++ b/contrib/tsearch2/wordparser/parser.l
@@ -0,0 +1,346 @@
+%{
+#include "postgres.h"
+
+#include "deflex.h"
+#include "parser.h"
+#include "common.h"
+
+/* Avoid exit() on fatal scanner errors */
+#define fprintf(file, fmt, msg)  ts_error(ERROR, fmt, msg)
+
+/* postgres allocation function */
+#define free    pfree
+#define malloc  palloc
+#define realloc repalloc
+
+#ifdef strdup
+#undef strdup
+#endif
+#define strdup  pstrdup
+
+char *token = NULL;  /* pointer to token */
+char *s     = NULL;  /* to return WHOLE hyphenated-word */
+
+YY_BUFFER_STATE buf = NULL; /* buffer to parse; it need for parse from string */
+
+int lrlimit = -1;  /* for limiting read from filehandle ( -1 - unlimited read ) */
+int bytestoread = 0;   /* for limiting read from filehandle */
+
+/* redefine macro for read limited length */
+#define YY_INPUT(buf,result,max_size) \
+   if ( yy_current_buffer->yy_is_interactive ) { \
+                int c = '*', n; \
+                for ( n = 0; n < max_size && \
+                             (c = getc( tsearch2_yyin )) != EOF && c != '\n'; ++n ) \
+                        buf[n] = (char) c; \
+                if ( c == '\n' ) \
+                        buf[n++] = (char) c; \
+                if ( c == EOF && ferror( tsearch2_yyin ) ) \
+                        YY_FATAL_ERROR( "input in flex scanner failed" ); \
+                result = n; \
+        }  else { \
+       if ( lrlimit == 0 ) \
+           result=YY_NULL; \
+       else { \
+           if ( lrlimit>0 ) { \
+               bytestoread = ( lrlimit > max_size ) ? max_size : lrlimit; \
+               lrlimit -= bytestoread; \
+           } else \
+               bytestoread = max_size; \
+               if ( ((result = fread( buf, 1, bytestoread, tsearch2_yyin )) == 0) \
+                       && ferror( tsearch2_yyin ) ) \
+                       YY_FATAL_ERROR( "input in flex scanner failed" ); \
+       } \
+   }
+
+%}
+
+%option 8bit
+%option never-interactive
+%option nounput
+%option noyywrap
+
+/* parser's state for parsing hyphenated-word */
+%x DELIM  
+/* parser's state for parsing URL*/
+%x URL  
+%x SERVER  
+
+/* parser's state for parsing TAGS */
+%x INTAG
+%x QINTAG
+%x INCOMMENT
+%x INSCRIPT
+
+/* cyrillic koi8 char */
+CYRALNUM   [0-9\200-\377]
+CYRALPHA   [\200-\377]
+ALPHA      [a-zA-Z\200-\377]
+ALNUM      [0-9a-zA-Z\200-\377]
+
+
+HOSTNAME   ([-_[:alnum:]]+\.)+[[:alpha:]]+
+URI        [-_[:alnum:]/%,\.;=&?#]+
+
+%%
+
+"<"[Ss][Cc][Rr][Ii][Pp][Tt] { BEGIN INSCRIPT; }
+
+"" {
+   BEGIN INITIAL; 
+   *tsearch2_yytext=' '; *(tsearch2_yytext+1) = '\0'; 
+   token = tsearch2_yytext;
+   tokenlen = tsearch2_yyleng;
+   return SPACE;
+}
+
+""   { 
+   BEGIN INITIAL;
+   *tsearch2_yytext=' '; *(tsearch2_yytext+1) = '\0'; 
+   token = tsearch2_yytext;
+   tokenlen = tsearch2_yyleng;
+   return SPACE;
+}
+
+
+"<"[\![:alpha:]]   { BEGIN INTAG; }
+
+"
+
+"\""    { BEGIN QINTAG; }
+
+"\\\"" ;
+
+"\""   { BEGIN INTAG; }
+
+">" { 
+   BEGIN INITIAL;
+   token = tsearch2_yytext;
+   *tsearch2_yytext=' '; 
+   token = tsearch2_yytext;
+   tokenlen = 1;
+   return TAG;
+}
+
+.|\n  ;
+
+\&(quot|amp|nbsp|lt|gt)\;   {
+   token = tsearch2_yytext;
+   tokenlen = tsearch2_yyleng;
+   return HTMLENTITY;
+}
+
+\&\#[0-9][0-9]?[0-9]?\; {
+   token = tsearch2_yytext;
+   tokenlen = tsearch2_yyleng;
+   return HTMLENTITY;
+}
+ 
+[-_\.[:alnum:]]+@{HOSTNAME}  /* Emails */ { 
+   token = tsearch2_yytext; 
+   tokenlen = tsearch2_yyleng;
+   return EMAIL; 
+}
+
+[+-]?[0-9]+(\.[0-9]+)?[eEdD][+-]?[0-9]+  /* float */   { 
+   token = tsearch2_yytext; 
+   tokenlen = tsearch2_yyleng;
+   return SCIENTIFIC; 
+}
+
+[0-9]+\.[0-9]+\.[0-9\.]*[0-9] {
+   token = tsearch2_yytext;
+   tokenlen = tsearch2_yyleng;
+   return VERSIONNUMBER;
+}
+
+[+-]?[0-9]+\.[0-9]+ {
+   token = tsearch2_yytext;
+   tokenlen = tsearch2_yyleng;
+   return DECIMAL;
+}
+
+[+-][0-9]+ { 
+   token = tsearch2_yytext; 
+   tokenlen = tsearch2_yyleng;
+   return SIGNEDINT; 
+}
+
+[0-9]+ { 
+   token = tsearch2_yytext; 
+   tokenlen = tsearch2_yyleng;
+   return UNSIGNEDINT; 
+}
+
+http"://"        { 
+   BEGIN URL; 
+   token = tsearch2_yytext;
+   tokenlen = tsearch2_yyleng;
+   return HTTP;
+}
+
+ftp"://"        { 
+   BEGIN URL; 
+   token = tsearch2_yytext;
+   tokenlen = tsearch2_yyleng;
+   return HTTP;
+}
+
+{HOSTNAME}[/:]{URI} { 
+   BEGIN SERVER;
+   if (s) { free(s); s=NULL; } 
+   s = strdup( tsearch2_yytext ); 
+   tokenlen = tsearch2_yyleng;
+   yyless( 0 ); 
+   token = s;
+   return FURL;
+}
+
+{HOSTNAME} {
+   token = tsearch2_yytext; 
+   tokenlen = tsearch2_yyleng;
+   return HOST;
+}
+
+[/:]{URI}  {
+   token = tsearch2_yytext;
+   tokenlen = tsearch2_yyleng;
+   return URI;
+}
+
+[[:alnum:]\./_-]+"/"[[:alnum:]\./_-]+ {
+   token = tsearch2_yytext;
+   tokenlen = tsearch2_yyleng;
+   return FILEPATH;
+}
+
+({CYRALPHA}+-)+{CYRALPHA}+ /* composite-word */    {
+   BEGIN DELIM;
+   if (s) { free(s); s=NULL; } 
+   s = strdup( tsearch2_yytext );
+   tokenlen = tsearch2_yyleng;
+   yyless( 0 );
+   token = s;
+   return CYRHYPHENWORD;
+}
+
+([[:alpha:]]+-)+[[:alpha:]]+ /* composite-word */  {
+    BEGIN DELIM;
+   if (s) { free(s); s=NULL; } 
+   s = strdup( tsearch2_yytext );
+   tokenlen = tsearch2_yyleng;
+   yyless( 0 );
+   token = s;
+   return LATHYPHENWORD;
+}
+
+({ALNUM}+-)+{ALNUM}+ /* composite-word */  {
+   BEGIN DELIM;
+   if (s) { free(s); s=NULL; } 
+   s = strdup( tsearch2_yytext );
+   tokenlen = tsearch2_yyleng;
+   yyless( 0 );
+   token = s;
+   return HYPHENWORD;
+}
+
+[0-9]+\.[0-9]+\.[0-9\.]*[0-9] {
+   token = tsearch2_yytext;
+   tokenlen = tsearch2_yyleng;
+   return VERSIONNUMBER;
+}
+
+\+?[0-9]+\.[0-9]+ {
+   token = tsearch2_yytext;
+   tokenlen = tsearch2_yyleng;
+   return DECIMAL;
+}
+
+{CYRALPHA}+  /* one word in composite-word */   { 
+   token = tsearch2_yytext; 
+   tokenlen = tsearch2_yyleng;
+   return CYRPARTHYPHENWORD; 
+}
+
+[[:alpha:]]+  /* one word in composite-word */  { 
+   token = tsearch2_yytext; 
+   tokenlen = tsearch2_yyleng;
+   return LATPARTHYPHENWORD; 
+}
+
+{ALNUM}+  /* one word in composite-word */  { 
+   token = tsearch2_yytext; 
+   tokenlen = tsearch2_yyleng;
+   return PARTHYPHENWORD; 
+}
+
+-  { 
+   token = tsearch2_yytext;
+   tokenlen = tsearch2_yyleng;
+   return SPACE;
+}
+
+.|\n /* return in basic state */ {
+   BEGIN INITIAL;
+   yyless( 0 );
+}
+
+{CYRALPHA}+ /* normal word */  { 
+   token = tsearch2_yytext; 
+   tokenlen = tsearch2_yyleng;
+   return CYRWORD; 
+}
+
+[[:alpha:]]+ /* normal word */ { 
+   token = tsearch2_yytext; 
+   tokenlen = tsearch2_yyleng;
+   return LATWORD; 
+}
+
+{ALNUM}+ /* normal word */ { 
+   token = tsearch2_yytext; 
+   tokenlen = tsearch2_yyleng;
+   return UWORD; 
+}
+
+[ \r\n\t]+ {
+   token = tsearch2_yytext;
+   tokenlen = tsearch2_yyleng;
+   return SPACE;
+}
+
+. {
+   token = tsearch2_yytext;
+   tokenlen = tsearch2_yyleng;
+   return SPACE;
+} 
+
+%%
+
+/* clearing after parsing from string */
+void end_parse() {
+   if (s) { free(s); s=NULL; } 
+   tsearch2_yy_delete_buffer( buf );
+   buf = NULL;
+} 
+
+/* start parse from string */
+void start_parse_str(char* str, int limit) {
+   if (buf) end_parse();
+   buf = tsearch2_yy_scan_bytes( str, limit );
+   tsearch2_yy_switch_to_buffer( buf );
+   BEGIN INITIAL;
+}
+
+/* start parse from filehandle */
+void start_parse_fh( FILE* fh, int limit ) {
+   if (buf) end_parse();
+   lrlimit = ( limit ) ? limit : -1;
+   buf = tsearch2_yy_create_buffer( fh, YY_BUF_SIZE );
+   tsearch2_yy_switch_to_buffer( buf );
+   BEGIN INITIAL;
+}
+
+


diff --git a/contrib/tsearch2/wparser.c b/contrib/tsearch2/wparser.c

new file mode 100644 (file)

index 0000000..deff94c


--- /dev/null
+++ b/contrib/tsearch2/wparser.c
@@ -0,0 +1,529 @@
+/* 
+ * interface functions to parser 
+ * Teodor Sigaev 
+ */
+#include 
+#include 
+#include 
+#include 
+
+#include "postgres.h"
+#include "fmgr.h"
+#include "utils/array.h"
+#include "catalog/pg_type.h"
+#include "executor/spi.h"
+#include "funcapi.h"
+
+#include "wparser.h"
+#include "ts_cfg.h"
+#include "snmap.h"
+#include "common.h"
+
+/*********top interface**********/
+
+static void *plan_getparser=NULL;
+static Oid current_parser_id=InvalidOid;
+
+void
+init_prs(Oid id, WParserInfo *prs) {
+   Oid arg[1]={ OIDOID };
+   bool isnull;
+   Datum pars[1]={ ObjectIdGetDatum(id) };
+   int stat;
+
+   memset(prs,0,sizeof(WParserInfo));
+   SPI_connect();
+   if ( !plan_getparser ) {
+       plan_getparser = SPI_saveplan( SPI_prepare( "select prs_start, prs_nexttoken, prs_end, prs_lextype, prs_headline from pg_ts_parser where oid = $1" , 1, arg ) );
+       if ( !plan_getparser ) 
+           ts_error(ERROR, "SPI_prepare() failed");
+   }
+
+   stat = SPI_execp(plan_getparser, pars, " ", 1);
+   if ( stat < 0 )
+       ts_error (ERROR, "SPI_execp return %d", stat);
+   if ( SPI_processed > 0 ) {
+       Oid oid=InvalidOid;
+       oid=DatumGetObjectId( SPI_getbinval(SPI_tuptable->vals[0], SPI_tuptable->tupdesc, 1, &isnull) );
+       fmgr_info_cxt(oid, &(prs->start_info), TopMemoryContext);
+       oid=DatumGetObjectId( SPI_getbinval(SPI_tuptable->vals[0], SPI_tuptable->tupdesc, 2, &isnull) );
+       fmgr_info_cxt(oid, &(prs->getlexeme_info), TopMemoryContext);
+       oid=DatumGetObjectId( SPI_getbinval(SPI_tuptable->vals[0], SPI_tuptable->tupdesc, 3, &isnull) );
+       fmgr_info_cxt(oid, &(prs->end_info), TopMemoryContext);
+       prs->lextype=DatumGetObjectId( SPI_getbinval(SPI_tuptable->vals[0], SPI_tuptable->tupdesc, 4, &isnull) );
+       oid=DatumGetObjectId( SPI_getbinval(SPI_tuptable->vals[0], SPI_tuptable->tupdesc, 5, &isnull) );
+       fmgr_info_cxt(oid, &(prs->headline_info), TopMemoryContext);
+       prs->prs_id=id;
+   } else 
+       ts_error(ERROR, "No parser with id %d", id);
+   SPI_finish();
+}
+
+typedef struct {
+   WParserInfo *last_prs;
+   int     len;
+   int     reallen;
+   WParserInfo *list;
+   SNMap       name2id_map;
+} PrsList;
+
+static PrsList PList = {NULL,0,0,NULL,{0,0,NULL}};
+
+void    
+reset_prs(void) {
+   freeSNMap( &(PList.name2id_map) );
+   if ( PList.list )
+       free(PList.list);
+   memset(&PList,0,sizeof(PrsList));
+}
+
+static int
+compareprs(const void *a, const void *b) {
+   return ((WParserInfo*)a)->prs_id - ((WParserInfo*)b)->prs_id;
+}
+
+WParserInfo *
+findprs(Oid id) {
+   /* last used prs */
+   if ( PList.last_prs && PList.last_prs->prs_id==id )
+       return PList.last_prs;
+
+   /* already used prs */
+   if ( PList.len != 0 ) {
+       WParserInfo key;
+       key.prs_id=id;
+       PList.last_prs = bsearch(&key, PList.list, PList.len, sizeof(WParserInfo), compareprs);
+       if ( PList.last_prs != NULL )
+           return PList.last_prs;
+   }
+
+   /* last chance */
+   if ( PList.len==PList.reallen ) {
+       WParserInfo *tmp;
+       int reallen = ( PList.reallen ) ? 2*PList.reallen : 16;
+       tmp=(WParserInfo*)realloc(PList.list,sizeof(WParserInfo)*reallen);
+       if ( !tmp ) 
+           ts_error(ERROR,"No memory");
+       PList.reallen=reallen;
+       PList.list=tmp;
+   }
+   PList.last_prs=&(PList.list[PList.len]);
+   init_prs(id, PList.last_prs);
+   PList.len++;
+   qsort(PList.list, PList.len, sizeof(WParserInfo), compareprs);
+   return findprs(id); /* qsort changed order!! */;
+}
+
+static void *plan_name2id=NULL;
+
+Oid
+name2id_prs(text *name) {
+   Oid arg[1]={ TEXTOID };
+   bool isnull;
+   Datum pars[1]={ PointerGetDatum(name) };
+   int stat;
+   Oid id=findSNMap_t( &(PList.name2id_map), name );
+   
+   if ( id ) 
+       return id;
+   
+
+   SPI_connect();
+   if ( !plan_name2id ) {
+       plan_name2id = SPI_saveplan( SPI_prepare( "select oid from pg_ts_parser where prs_name = $1" , 1, arg ) );
+       if ( !plan_name2id ) 
+           ts_error(ERROR, "SPI_prepare() failed");
+   }
+
+   stat = SPI_execp(plan_name2id, pars, " ", 1);
+   if ( stat < 0 )
+       ts_error (ERROR, "SPI_execp return %d", stat);
+   if ( SPI_processed > 0 )
+       id=DatumGetObjectId( SPI_getbinval(SPI_tuptable->vals[0], SPI_tuptable->tupdesc, 1, &isnull) );
+   else 
+       ts_error(ERROR, "No parser '%s'", text2char(name));
+   SPI_finish();
+   addSNMap_t( &(PList.name2id_map), name, id );
+   return id;
+}
+
+
+/******sql-level interface******/
+typedef struct {
+   int     cur;
+   LexDescr    *list;
+} TypeStorage;
+
+static void
+setup_firstcall(FuncCallContext  *funcctx, Oid prsid) {
+   TupleDesc            tupdesc;
+   MemoryContext     oldcontext;
+   TypeStorage     *st;
+   WParserInfo *prs = findprs(prsid); 
+
+   oldcontext = MemoryContextSwitchTo(funcctx->multi_call_memory_ctx);
+
+   st=(TypeStorage*)palloc( sizeof(TypeStorage) );
+   st->cur=0;
+   st->list = (LexDescr*)DatumGetPointer(
+       OidFunctionCall1( prs->lextype, PointerGetDatum(prs->prs) )
+   );
+   funcctx->user_fctx = (void*)st;
+   tupdesc = RelationNameGetTupleDesc("tokentype");
+   funcctx->slot = TupleDescGetSlot(tupdesc);
+   funcctx->attinmeta = TupleDescGetAttInMetadata(tupdesc);
+   MemoryContextSwitchTo(oldcontext);
+}
+
+static Datum
+process_call(FuncCallContext  *funcctx) {
+   TypeStorage     *st;
+
+   st=(TypeStorage*)funcctx->user_fctx;
+   if (  st->list && st->list[st->cur].lexid ) {
+       Datum result;
+       char* values[3];
+       char    txtid[16];
+       HeapTuple    tuple;
+
+       values[0]=txtid;
+       sprintf(txtid,"%d",st->list[st->cur].lexid);
+       values[1]=st->list[st->cur].alias;
+       values[2]=st->list[st->cur].descr;
+
+       tuple = BuildTupleFromCStrings(funcctx->attinmeta, values);
+       result = TupleGetDatum(funcctx->slot, tuple);
+
+       pfree(values[1]);
+       pfree(values[2]);
+       st->cur++;
+       return result;
+   } else {
+       if ( st->list ) pfree(st->list);
+       pfree(st);
+   }
+   return (Datum)0;
+}
+
+PG_FUNCTION_INFO_V1(token_type);
+Datum token_type(PG_FUNCTION_ARGS);
+
+Datum
+token_type(PG_FUNCTION_ARGS) {
+   FuncCallContext  *funcctx;
+   Datum result;
+
+   if (SRF_IS_FIRSTCALL()) { 
+       funcctx = SRF_FIRSTCALL_INIT();
+       setup_firstcall(funcctx, PG_GETARG_OID(0) );
+   }
+
+   funcctx = SRF_PERCALL_SETUP();
+
+   if (  (result=process_call(funcctx)) != (Datum)0 )
+       SRF_RETURN_NEXT(funcctx, result);
+   SRF_RETURN_DONE(funcctx);
+}
+
+PG_FUNCTION_INFO_V1(token_type_byname);
+Datum token_type_byname(PG_FUNCTION_ARGS);
+Datum
+token_type_byname(PG_FUNCTION_ARGS) {
+   FuncCallContext  *funcctx;
+   Datum result;
+
+   if (SRF_IS_FIRSTCALL()) {
+       text *name = PG_GETARG_TEXT_P(0); 
+       funcctx = SRF_FIRSTCALL_INIT();
+       setup_firstcall(funcctx, name2id_prs( name ) );
+       PG_FREE_IF_COPY(name,0);
+   }
+
+   funcctx = SRF_PERCALL_SETUP();
+
+   if (  (result=process_call(funcctx)) != (Datum)0 )
+       SRF_RETURN_NEXT(funcctx, result);
+   SRF_RETURN_DONE(funcctx);
+}
+
+PG_FUNCTION_INFO_V1(token_type_current);
+Datum token_type_current(PG_FUNCTION_ARGS);
+Datum
+token_type_current(PG_FUNCTION_ARGS) {
+   FuncCallContext  *funcctx;
+   Datum result;
+
+   if (SRF_IS_FIRSTCALL()) {
+       funcctx = SRF_FIRSTCALL_INIT();
+       if ( current_parser_id==InvalidOid ) 
+           current_parser_id = name2id_prs( char2text("default") );
+       setup_firstcall(funcctx, current_parser_id );
+   }
+
+   funcctx = SRF_PERCALL_SETUP();
+
+   if (  (result=process_call(funcctx)) != (Datum)0 )
+       SRF_RETURN_NEXT(funcctx, result);
+   SRF_RETURN_DONE(funcctx);
+}
+
+
+PG_FUNCTION_INFO_V1(set_curprs);
+Datum set_curprs(PG_FUNCTION_ARGS);
+Datum
+set_curprs(PG_FUNCTION_ARGS) {
+        findprs(PG_GETARG_OID(0));
+        current_parser_id=PG_GETARG_OID(0);
+        PG_RETURN_VOID();
+}
+
+PG_FUNCTION_INFO_V1(set_curprs_byname);
+Datum set_curprs_byname(PG_FUNCTION_ARGS);
+Datum
+set_curprs_byname(PG_FUNCTION_ARGS) {
+        text *name=PG_GETARG_TEXT_P(0);
+    
+        DirectFunctionCall1(
+                set_curprs,
+                ObjectIdGetDatum( name2id_prs(name) )
+        );
+        PG_FREE_IF_COPY(name, 0);
+        PG_RETURN_VOID();
+}
+
+typedef struct {
+   int type;
+   char    *lexem;
+} LexemEntry;
+
+typedef struct {
+   int cur;
+   int len;
+   LexemEntry  *list;
+} PrsStorage;
+   
+
+static void
+prs_setup_firstcall(FuncCallContext  *funcctx, int prsid, text *txt) {
+   TupleDesc            tupdesc;
+   MemoryContext     oldcontext;
+   PrsStorage  *st;
+   WParserInfo *prs = findprs(prsid); 
+   char    *lex=NULL;
+   int     llen=0, type=0; 
+
+   oldcontext = MemoryContextSwitchTo(funcctx->multi_call_memory_ctx);
+
+   st=(PrsStorage*)palloc( sizeof(PrsStorage) );
+   st->cur=0;
+   st->len=16;
+   st->list=(LexemEntry*)palloc( sizeof(LexemEntry)*st->len );
+
+   prs->prs = (void*)DatumGetPointer(
+       FunctionCall2(
+           &(prs->start_info),
+           PointerGetDatum(VARDATA(txt)),
+           Int32GetDatum(VARSIZE(txt)-VARHDRSZ)
+       )
+   );
+
+   while( ( type=DatumGetInt32(FunctionCall3(
+           &(prs->getlexeme_info),
+           PointerGetDatum(prs->prs),
+           PointerGetDatum(&lex),
+           PointerGetDatum(&llen))) ) != 0 ) {
+
+       if ( st->cur>=st->len ) {
+           st->len=2*st->len;
+           st->list=(LexemEntry*)repalloc(st->list, sizeof(LexemEntry)*st->len);
+       }
+       st->list[st->cur].lexem = palloc(llen+1);
+       memcpy( st->list[st->cur].lexem, lex, llen);
+       st->list[st->cur].lexem[llen]='\0';
+       st->list[st->cur].type=type;
+       st->cur++;
+   }
+       
+   FunctionCall1(
+       &(prs->end_info),
+       PointerGetDatum(prs->prs)
+   );
+
+   st->len=st->cur;
+   st->cur=0;
+   
+   funcctx->user_fctx = (void*)st;
+   tupdesc = RelationNameGetTupleDesc("tokenout");
+   funcctx->slot = TupleDescGetSlot(tupdesc);
+   funcctx->attinmeta = TupleDescGetAttInMetadata(tupdesc);
+   MemoryContextSwitchTo(oldcontext);
+}
+
+static Datum
+prs_process_call(FuncCallContext  *funcctx) {
+   PrsStorage  *st;
+
+   st=(PrsStorage*)funcctx->user_fctx;
+   if (  st->cur < st->len ) {
+       Datum result;
+       char* values[2];
+       char    tid[16];
+       HeapTuple    tuple;
+
+       values[0]=tid;
+       sprintf(tid,"%d",st->list[st->cur].type);
+       values[1]=st->list[st->cur].lexem;
+       tuple = BuildTupleFromCStrings(funcctx->attinmeta, values);
+       result = TupleGetDatum(funcctx->slot, tuple);
+
+       pfree(values[1]);
+       st->cur++;
+       return result;
+   } else {
+       if ( st->list ) pfree(st->list);
+       pfree(st);
+   }
+   return (Datum)0;
+}
+
+           
+
+PG_FUNCTION_INFO_V1(parse);
+Datum parse(PG_FUNCTION_ARGS);
+Datum
+parse(PG_FUNCTION_ARGS) {
+   FuncCallContext  *funcctx;
+   Datum result;
+
+   if (SRF_IS_FIRSTCALL()) {
+       text *txt = PG_GETARG_TEXT_P(1); 
+       funcctx = SRF_FIRSTCALL_INIT();
+       prs_setup_firstcall(funcctx, PG_GETARG_OID(0),txt );
+       PG_FREE_IF_COPY(txt,1);
+   }
+
+   funcctx = SRF_PERCALL_SETUP();
+
+   if (  (result=prs_process_call(funcctx)) != (Datum)0 )
+       SRF_RETURN_NEXT(funcctx, result);
+   SRF_RETURN_DONE(funcctx);
+}
+
+PG_FUNCTION_INFO_V1(parse_byname);
+Datum parse_byname(PG_FUNCTION_ARGS);
+Datum
+parse_byname(PG_FUNCTION_ARGS) {
+   FuncCallContext  *funcctx;
+   Datum result;
+
+   if (SRF_IS_FIRSTCALL()) {
+       text *name = PG_GETARG_TEXT_P(0); 
+       text *txt = PG_GETARG_TEXT_P(1); 
+       funcctx = SRF_FIRSTCALL_INIT();
+       prs_setup_firstcall(funcctx, name2id_prs( name ),txt );
+       PG_FREE_IF_COPY(name,0);
+       PG_FREE_IF_COPY(txt,1);
+   }
+
+   funcctx = SRF_PERCALL_SETUP();
+
+   if (  (result=prs_process_call(funcctx)) != (Datum)0 )
+       SRF_RETURN_NEXT(funcctx, result);
+   SRF_RETURN_DONE(funcctx);
+}
+
+
+PG_FUNCTION_INFO_V1(parse_current);
+Datum parse_current(PG_FUNCTION_ARGS);
+Datum
+parse_current(PG_FUNCTION_ARGS) {
+   FuncCallContext  *funcctx;
+   Datum result;
+
+   if (SRF_IS_FIRSTCALL()) {
+       text *txt = PG_GETARG_TEXT_P(0); 
+       funcctx = SRF_FIRSTCALL_INIT();
+       if ( current_parser_id==InvalidOid ) 
+           current_parser_id = name2id_prs( char2text("default") );
+       prs_setup_firstcall(funcctx, current_parser_id,txt );
+       PG_FREE_IF_COPY(txt,0);
+   }
+
+   funcctx = SRF_PERCALL_SETUP();
+
+   if (  (result=prs_process_call(funcctx)) != (Datum)0 )
+       SRF_RETURN_NEXT(funcctx, result);
+   SRF_RETURN_DONE(funcctx);
+}
+
+PG_FUNCTION_INFO_V1(headline);
+Datum headline(PG_FUNCTION_ARGS);
+Datum
+headline(PG_FUNCTION_ARGS) {
+   TSCfgInfo *cfg=findcfg(PG_GETARG_OID(0));
+   text       *in = PG_GETARG_TEXT_P(1);
+   QUERYTYPE  *query = (QUERYTYPE *) DatumGetPointer(PG_DETOAST_DATUM(PG_GETARG_DATUM(2)));
+   text       *opt=( PG_NARGS()>3 && PG_GETARG_POINTER(3) ) ? PG_GETARG_TEXT_P(3) : NULL;
+   HLPRSTEXT   prs;
+   text *out;
+   WParserInfo *prsobj = findprs(cfg->prs_id);
+
+   memset(&prs,0,sizeof(HLPRSTEXT));
+   prs.lenwords = 32;
+   prs.words = (HLWORD *) palloc(sizeof(HLWORD) * prs.lenwords);
+   hlparsetext(cfg, &prs, query, VARDATA(in), VARSIZE(in) - VARHDRSZ);
+
+
+   FunctionCall3(
+       &(prsobj->headline_info),
+       PointerGetDatum(&prs),
+       PointerGetDatum(opt),
+       PointerGetDatum(query)
+   );
+
+   out = genhl(&prs);
+
+   PG_FREE_IF_COPY(in,1);
+   PG_FREE_IF_COPY(query,2);
+   if ( opt ) PG_FREE_IF_COPY(opt,3);
+   pfree(prs.words);
+   pfree(prs.startsel);
+   pfree(prs.stopsel);
+
+   PG_RETURN_POINTER(out);
+}
+
+
+PG_FUNCTION_INFO_V1(headline_byname);
+Datum headline_byname(PG_FUNCTION_ARGS);
+Datum
+headline_byname(PG_FUNCTION_ARGS) {
+   text *cfg=PG_GETARG_TEXT_P(0);
+
+   Datum out=DirectFunctionCall4(
+       headline,
+       ObjectIdGetDatum(name2id_cfg( cfg ) ),
+       PG_GETARG_DATUM(1),
+       PG_GETARG_DATUM(2),
+       ( PG_NARGS()>3 ) ? PG_GETARG_DATUM(3) : PointerGetDatum(NULL)
+   );
+
+   PG_FREE_IF_COPY(cfg,0);
+   PG_RETURN_DATUM(out);   
+}
+
+PG_FUNCTION_INFO_V1(headline_current);
+Datum headline_current(PG_FUNCTION_ARGS);
+Datum
+headline_current(PG_FUNCTION_ARGS) {
+   PG_RETURN_DATUM(DirectFunctionCall4(
+       headline,
+       ObjectIdGetDatum(get_currcfg()),
+       PG_GETARG_DATUM(0),
+       PG_GETARG_DATUM(1),
+       ( PG_NARGS()>2 ) ? PG_GETARG_DATUM(2) : PointerGetDatum(NULL)
+   ));
+}
+
+
+


diff --git a/contrib/tsearch2/wparser.h b/contrib/tsearch2/wparser.h

new file mode 100644 (file)

index 0000000..a8afc56


--- /dev/null
+++ b/contrib/tsearch2/wparser.h
@@ -0,0 +1,28 @@
+#ifndef __WPARSER_H__
+#define __WPARSER_H__
+#include "postgres.h"
+#include "fmgr.h"
+
+typedef struct {
+   Oid prs_id;
+   FmgrInfo start_info;
+   FmgrInfo getlexeme_info;
+   FmgrInfo end_info;
+   FmgrInfo headline_info;
+   Oid lextype;
+   void *prs;
+} WParserInfo;
+
+void init_prs(Oid id, WParserInfo *prs);
+WParserInfo* findprs(Oid id);
+Oid name2id_prs(text *name);
+void   reset_prs(void);
+
+
+typedef struct {
+   int lexid;
+   char    *alias;
+   char    *descr;
+} LexDescr;
+
+#endif


diff --git a/contrib/tsearch2/wparser_def.c b/contrib/tsearch2/wparser_def.c

new file mode 100644 (file)

index 0000000..eec8b03


--- /dev/null
+++ b/contrib/tsearch2/wparser_def.c
@@ -0,0 +1,291 @@
+/* 
+ * default word parser 
+ * Teodor Sigaev 
+ */
+#include 
+#include 
+#include 
+
+#include "postgres.h"
+#include "utils/builtins.h"
+
+#include "dict.h"
+#include "wparser.h"
+#include "common.h"
+#include "ts_cfg.h"
+#include "wordparser/parser.h"
+#include "wordparser/deflex.h"
+
+PG_FUNCTION_INFO_V1(prsd_lextype);
+Datum prsd_lextype(PG_FUNCTION_ARGS);
+
+Datum 
+prsd_lextype(PG_FUNCTION_ARGS) {
+   LexDescr *descr=(LexDescr*)palloc(sizeof(LexDescr)*(LASTNUM+1));
+   int i;
+
+   for(i=1;i<=LASTNUM;i++) {
+       descr[i-1].lexid = i;
+       descr[i-1].alias = pstrdup(tok_alias[i]);
+       descr[i-1].descr = pstrdup(lex_descr[i]);
+   }
+   
+   descr[LASTNUM].lexid=0;
+       
+   PG_RETURN_POINTER(descr);
+}
+
+PG_FUNCTION_INFO_V1(prsd_start);
+Datum prsd_start(PG_FUNCTION_ARGS);
+Datum 
+prsd_start(PG_FUNCTION_ARGS) {
+   start_parse_str( (char*)PG_GETARG_POINTER(0), PG_GETARG_INT32(1) );
+   PG_RETURN_POINTER(NULL);
+}
+
+PG_FUNCTION_INFO_V1(prsd_getlexeme);
+Datum prsd_getlexeme(PG_FUNCTION_ARGS);
+Datum 
+prsd_getlexeme(PG_FUNCTION_ARGS) {
+   /* ParserState *p=(ParserState*)PG_GETARG_POINTER(0); */
+   char **t=(char**)PG_GETARG_POINTER(1); 
+   int *tlen=(int*)PG_GETARG_POINTER(2);
+   int  type=tsearch2_yylex();
+
+   *t = token;
+   *tlen = tokenlen;
+   PG_RETURN_INT32(type);
+}
+
+PG_FUNCTION_INFO_V1(prsd_end);
+Datum prsd_end(PG_FUNCTION_ARGS);
+Datum 
+prsd_end(PG_FUNCTION_ARGS) {
+   /* ParserState *p=(ParserState*)PG_GETARG_POINTER(0); */
+   end_parse();
+   PG_RETURN_VOID();
+}
+
+#define LEAVETOKEN(x)  ( (x)==12 )
+#define COMPLEXTOKEN(x)    ( (x)==5 || (x)==15 || (x)==16 || (x)==17 )
+#define ENDPUNCTOKEN(x)    ( (x)==12 )
+
+
+#define IDIGNORE(x) ( (x)==13 || (x)==14 || (x)==12 || (x)==23 )
+#define HLIDIGNORE(x) ( (x)==5 || (x)==13 || (x)==15 || (x)==16 || (x)==17 )
+#define NONWORDTOKEN(x)    ( (x)==12 || HLIDIGNORE(x) )
+#define NOENDTOKEN(x)  ( NONWORDTOKEN(x) || (x)==7 || (x)==8 || (x)==20 || (x)==21 || (x)==22 || IDIGNORE(x) )
+
+typedef struct {
+   HLWORD  *words;
+   int len;
+} hlCheck;
+
+static bool
+checkcondition_HL(void *checkval, ITEM *val) {
+   int i;
+   for(i=0;i<((hlCheck*)checkval)->len;i++) {
+       if ( ((hlCheck*)checkval)->words[i].item==val )
+           return true;
+   }
+   return false;
+}
+
+
+static bool
+hlCover(HLPRSTEXT *prs, QUERYTYPE *query, int *p, int *q) {
+   int i,j;
+   ITEM    *item=GETQUERY(query);
+   int pos=*p;
+   *q=0;
+   *p=0x7fffffff;
+
+   for(j=0;jsize;j++) {
+       if ( item->type != VAL ) {
+           item++;
+           continue;
+       }
+       for(i=pos;icurwords;i++) {
+           if ( prs->words[i].item == item ) {
+               if ( i>*q) 
+                   *q = i;
+               break;
+           }
+       }
+       item++;
+   }
+
+   if ( *q==0 )
+       return false;
+
+   item=GETQUERY(query);
+   for(j=0;jsize;j++) {
+       if ( item->type != VAL ) {
+           item++;
+           continue;
+       }
+       for(i=*q;i>=pos;i--) {
+           if ( prs->words[i].item == item ) {
+               if ( i<*p )
+                   *p=i;
+               break;
+           }
+       }
+       item++;
+   }   
+
+   if ( *p<=*q ) {
+       hlCheck ch={ &(prs->words[*p]), *q-*p+1 };
+       if ( TS_execute(GETQUERY(query), &ch, false, checkcondition_HL) ) { 
+           return true;
+       } else {
+           (*p)++;
+           return hlCover(prs,query,p,q);
+       }
+   }
+
+   return false;
+}
+
+PG_FUNCTION_INFO_V1(prsd_headline);
+Datum prsd_headline(PG_FUNCTION_ARGS);
+Datum 
+prsd_headline(PG_FUNCTION_ARGS) {
+   HLPRSTEXT   *prs=(HLPRSTEXT*)PG_GETARG_POINTER(0);
+   text    *opt=(text*)PG_GETARG_POINTER(1); /* can't be toasted */
+   QUERYTYPE   *query=(QUERYTYPE*)PG_GETARG_POINTER(2); /* can't be toasted */
+   /* from opt + start and and tag */
+   int min_words=15;   
+   int max_words=35;   
+   int shortword=3;    
+
+   int p=0,q=0;
+   int bestb=-1,beste=-1;
+   int bestlen=-1;
+   int pose=0, poslen, curlen;
+
+   int i;
+
+   /*config*/
+   prs->startsel=NULL;
+   prs->stopsel=NULL;
+   if ( opt ) {
+       Map *map,*mptr;
+       
+       parse_cfgdict(opt,&map);
+       mptr=map;
+
+       while(mptr && mptr->key) {
+           if ( strcasecmp(mptr->key,"MaxWords")==0 )
+               max_words=pg_atoi(mptr->value,4,1);
+           else if ( strcasecmp(mptr->key,"MinWords")==0 )
+               min_words=pg_atoi(mptr->value,4,1);
+           else if ( strcasecmp(mptr->key,"ShortWord")==0 )
+               shortword=pg_atoi(mptr->value,4,1);
+           else if ( strcasecmp(mptr->key,"StartSel")==0 )
+               prs->startsel=pstrdup(mptr->value);
+           else if ( strcasecmp(mptr->key,"StopSel")==0 )
+               prs->stopsel=pstrdup(mptr->value);
+               
+           pfree(mptr->key);
+           pfree(mptr->value);
+
+           mptr++;
+       }
+       pfree(map);
+
+       if ( min_words >= max_words )
+           elog(ERROR,"Must be MinWords < MaxWords");
+       if ( min_words<=0 )
+           elog(ERROR,"Must be MinWords > 0");
+       if ( shortword<0 )
+           elog(ERROR,"Must be ShortWord >= 0");
+   }
+
+   while( hlCover(prs,query,&p,&q) ) {
+       /* find cover len in words */
+       curlen=0;
+       poslen=0;
+       for(i=p;i<=q && curlen < max_words ; i++) {
+           if ( !NONWORDTOKEN(prs->words[i].type) ) 
+               curlen++;
+           if ( prs->words[i].item && !prs->words[i].repeated )
+               poslen++; 
+           pose=i;
+       }
+
+       if ( poslenwords[beste].type) || prs->words[beste].len <= shortword) ) { 
+           /* best already finded, so try one more cover */
+           p++;
+           continue;
+       }
+
+       if ( curlen < max_words ) { /* find good end */
+           for(i=i-1 ;icurwords && curlen
+               if ( i!=q ) {
+                   if ( !NONWORDTOKEN(prs->words[i].type) ) 
+                       curlen++;
+                   if ( prs->words[i].item && !prs->words[i].repeated )
+                       poslen++;
+               }
+               pose=i;
+               if ( NOENDTOKEN(prs->words[i].type) || prs->words[i].len <= shortword ) 
+                   continue;
+               if ( curlen>=min_words )    
+                   break;
+           }
+       } else { /* shorter cover :((( */
+           for(;curlen>min_words;i--) {
+               if ( !NONWORDTOKEN(prs->words[i].type) ) 
+                   curlen--;
+               if ( prs->words[i].item && !prs->words[i].repeated )
+                   poslen--;
+               pose=i;
+               if ( NOENDTOKEN(prs->words[i].type) || prs->words[i].len <= shortword ) 
+                   continue;
+               break;
+           }
+       }
+
+       if ( bestlen <0 || (poslen>bestlen && !(NOENDTOKEN(prs->words[pose].type) || prs->words[pose].len <= shortword)) || 
+               ( bestlen>=0 && !(NOENDTOKEN(prs->words[pose].type)  || prs->words[pose].len <= shortword) && 
+                   (NOENDTOKEN(prs->words[beste].type) || prs->words[beste].len <= shortword) ) ) {
+           bestb=p; beste=pose;
+           bestlen=poslen;
+       } 
+
+       p++;
+   }
+
+   if ( bestlen<0 ) {
+       curlen=0;
+       poslen=0;
+       for(i=0;icurwords && curlen
+           if ( !NONWORDTOKEN(prs->words[i].type) ) 
+               curlen++;
+           pose=i;
+       }
+       bestb=0; beste=pose;
+   }
+
+   for(i=bestb;i<=beste;i++) {
+       if ( prs->words[i].item )
+           prs->words[i].selected=1;
+       if ( prs->words[i].repeated )
+           prs->words[i].skip=1;
+       if ( HLIDIGNORE(prs->words[i].type) )
+           prs->words[i].replace=1;
+
+       prs->words[i].in=1;
+   }
+
+   if (!prs->startsel)
+       prs->startsel=pstrdup("");

+   if (!prs->stopsel)
+       prs->stopsel=pstrdup("");
+        prs->startsellen=strlen(prs->startsel);
+   prs->stopsellen=strlen(prs->stopsel);
+
+   PG_RETURN_POINTER(prs);
+}
+




This is the main PostgreSQL git repository.
RSS
Atom}
+>100₂₂♦_{12
+>feet₁.₁₂
+
+Each word has been assigned type 1;
+each space (represented here by a diamond) and the period, type 12;
+and the number one hundred, type 22.
+We can retrieve the alias for each type
+through the token_type function:
+
+
+=# select * from token_type('default')

+     where tokid = 1 or tokid = 12 or tokid = 22
+ tokid | alias |      descr       
+-------+-------+------------------
+     1 | lword | Latin word
+    12 | blank | Space symbols
+    22 | uint  | Unsigned integer
+(3 rows)
+
+
+
+
+Next, the tokens are assigned to dictionaries
+by looking up their type aliases in pg_ts_cfgmap
+to determine which dictionary should process each token.
+Since we are using the 'default' configuration:
+
+
+=# select * from pg_ts_cfgmap where ts_name = 'default' and

+      (tok_alias = 'lword' or tok_alias = 'blank' or tok_alias = 'uint')
+ ts_name | tok_alias | dict_name 
+---------+-----------+-----------
+ default | lword     | {en_stem}
+ default | uint      | {simple}
+(2 rows)
+
+
+Since this map provides no dictionary for blank tokens,
+the spaces and period are simply discarded,
+leaving nine tokens,
+which are then numbered by their position:
+
+The¹
+walls²
+extend³
+upward⁴
+for⁵
+well⁶
+over⁷
+100⁸
+feet⁹
+
+
+Finally, the words are reduced to lexemes by their respective dictionaries.
+The 100 is submitted to the simple dictionary,
+which returns tokens unaltered except for making them lowercase:
+
+
+=# select lexize('simple', '100')
+ lexize 
+--------
+ {100}
+(1 row)
+
+
+The other words are submitted to en_stem
+which reduces each English word to a linguistic stem,
+and then discards stems which belong to its list of stop words;
+you can see the list of stop words
+in the file whose path is in the dict_initoption field
+of the pg_ts_dict table entry for en_stem.
+The first three words of our text illustrate respectively
+an en_stem stop word,
+a word which en_stem alters by stemming,
+and a word which en_stem leaves alone:
+
+
+=# select lexize('en_stem', 'The')
+ lexize 
+--------
+ {}
+(1 row)
+=# select lexize('en_stem', 'walls')
+ lexize 
+--------
+ {wall}
+(1 row)
+=# select lexize('en_stem', 'extend')
+  lexize  
+----------
+ {extend}
+(1 row)
+
+
+Once en_stem is done discarding stop words and stemming the rest,
+we are left with:
+
+wall²
+extend³
+upward⁴
+well⁶
+100⁸
+feet⁹
+
+Which is precisely the result of the example that began this section.
+
+Query words are stemmed by the to_tsquery() function
+using the same scheme to determine the dictionary for each token,
+with the difference that the query parser recognizes as special
+the boolean operators that separate query words.
+
+
+
+
+}
+>feet₁.₁₂
+
+Each word has been assigned type 1;
+each space (represented here by a diamond) and the period, type 12;
+and the number one hundred, type 22.
+We can retrieve the alias for each type
+through the token_type function:
+
+
+=# select * from token_type('default')
+     where tokid = 1 or tokid = 12 or tokid = 22
+ tokid | alias |      descr       
+-------+-------+------------------
+     1 | lword | Latin word
+    12 | blank | Space symbols
+    22 | uint  | Unsigned integer
+(3 rows)
+
+
+
+
+Next, the tokens are assigned to dictionaries
+by looking up their type aliases in pg_ts_cfgmap
+to determine which dictionary should process each token.
+Since we are using the 'default' configuration:
+
+
+=# select * from pg_ts_cfgmap where ts_name = 'default' and
+      (tok_alias = 'lword' or tok_alias = 'blank' or tok_alias = 'uint')
+ ts_name | tok_alias | dict_name 
+---------+-----------+-----------
+ default | lword     | {en_stem}
+ default | uint      | {simple}
+(2 rows)
+
+
+Since this map provides no dictionary for blank tokens,
+the spaces and period are simply discarded,
+leaving nine tokens,
+which are then numbered by their position:
+
+The¹
+walls²
+extend³
+upward⁴
+for⁵
+well⁶
+over⁷
+100⁸
+feet⁹
+
+
+Finally, the words are reduced to lexemes by their respective dictionaries.
+The 100 is submitted to the simple dictionary,
+which returns tokens unaltered except for making them lowercase:
+
+
+=# select lexize('simple', '100')
+ lexize 
+--------
+ {100}
+(1 row)
+
+
+The other words are submitted to en_stem
+which reduces each English word to a linguistic stem,
+and then discards stems which belong to its list of stop words;
+you can see the list of stop words
+in the file whose path is in the dict_initoption field
+of the pg_ts_dict table entry for en_stem.
+The first three words of our text illustrate respectively
+an en_stem stop word,
+a word which en_stem alters by stemming,
+and a word which en_stem leaves alone:
+
+
+=# select lexize('en_stem', 'The')
+ lexize 
+--------
+ {}
+(1 row)
+=# select lexize('en_stem', 'walls')
+ lexize 
+--------
+ {wall}
+(1 row)
+=# select lexize('en_stem', 'extend')
+  lexize  
+----------
+ {extend}
+(1 row)
+
+
+Once en_stem is done discarding stop words and stemming the rest,
+we are left with:
+
+wall²
+extend³
+upward⁴
+well⁶
+100⁸
+feet⁹
+
+Which is precisely the result of the example that began this section.
+
+Query words are stemmed by the to_tsquery() function
+using the same scheme to determine the dictionary for each token,
+with the difference that the query parser recognizes as special
+the boolean operators that separate query words.
+
+
+
+
+
diff --git a/contrib/tsearch2/docs/tsearch2-ref.html b/contrib/tsearch2/docs/tsearch2-ref.html

new file mode 100644 (file)

index 0000000..df0faa4
--- /dev/null
+++ b/contrib/tsearch2/docs/tsearch2-ref.html
@@ -0,0 +1,448 @@
+
+
+
+
+tsearch2 reference
+
+
+The tsearch2 Reference
+
+
+Brandon Craig Rhodes
30 June 2003
+
+This Reference documents the user types and functions
+of the tsearch2 module for PostgreSQL.
+An introduction to the module is provided
+by the tsearch2 Guide,
+a companion document to this one.
+You can retrieve a beta copy of the tsearch2 module from the
+GiST for PostgreSQL
+page — look under the section entitled Development History
+for the current version.
+
+Vectors and Queries
+
+Vectors and queries both store lexemes,
+but for different purposes.
+A tsvector stores the lexemes
+of the words that are parsed out of a document,
+and can also remember the position of each word.
+A tsquery specifies a boolean condition among lexemes.
+
+Any of the following functions with a configuration argument
+can use either an integer id or textual ts_name
+to select a configuration;
+if the option is omitted, then the current configuration is used.
+For more information on the current configuration,
+read the next section on Configurations.
+
+Vector Operations
+
+
+
+ to_tsvector( [configuration,]
+ document TEXT) RETURNS tsvector
+
+ Parses a document into tokens,
+ reduces the tokens to lexemes,
+ and returns a tsvector which lists the lexemes
+ together with their positions in the document.
+ For the best description of this process,
+ see the section on Parsing and Stemming
+ in the accompanying tsearch2 Guide.
+
+ strip(vector tsvector) RETURNS tsvector
+
+ Return a vector which lists the same lexemes
+ as the given vector,
+ but which lacks any information
+ about where in the document each lexeme appeared.
+ While the returned vector is thus useless for relevance ranking,
+ it will usually be much smaller.
+
+ setweight(vector tsvector, letter) RETURNS tsvector
+
+ This function returns a copy of the input vector
+ in which every location has been labelled
+ with either the letter
+ 'A', 'B', or 'C',
+ or the default label 'D'
+ (which is the default with which new vectors are created,
+ and as such is usually not displayed).
+ These labels are retained when vectors are concatenated,
+ allowing words from different parts of a document
+ to be weighted differently by ranking functions.
+
+ vector1 || vector2
+
+ concat(vector1 tsvector, vector2 tsvector)
+ RETURNS tsvector
+
+ Returns a vector which combines the lexemes and position information
+ in the two vectors given as arguments.
+ Position weight labels (described in the previous paragraph)
+ are retained intact during the concatenation.
+ This has at least two uses.
+ First,
+ if some sections of your document
+ need be parsed with different configurations than others,
+ you can parse them separately
+ and concatenate the resulting vectors into one.
+ Second,
+ you can weight words from some sections of you document
+ more heavily than those from others by:
+ parsing the sections into separate vectors;
+ assigning the vectors different position labels
+ with the setweight() function;
+ concatenating them into a single vector;
+ and then providing a weights argument
+ to the rank() function
+ that assigns different weights to positions with different labels.
+
+ tsvector_size(vector tsvector) RETURNS INT4
+
+ Returns the number of lexemes stored in the vector.
+
+ text::tsvector RETURNS tsvector
+
+ Directly casting text to a tsvector
+ allows you to directly inject lexemes into a vector,
+ with whatever positions and position weights you choose to specify.
+ The text should be formatted
+ like the vector would be printed by the output of a SELECT.
+ See the Casting
+ section in the Guide for details.
+
+
+Query Operations
+
+
+
+ to_tsquery( [configuration,]
+ querytext text) RETURNS tsvector
+
+ Parses a query,
+ which should be single words separated by the boolean operators
+ “&” and,
+ “|” or,
+ and “!” not,
+ which can be grouped using parenthesis.
+ Each word is reduced to a lexeme using the current
+ or specified configuration.
+
+
+ querytree(query tsquery) RETURNS text
+
+ This might return a textual representation of the given query.
+
+ text::tsquery RETURNS tsquery
+
+ Directly casting text to a tsquery
+ allows you to directly inject lexemes into a query,
+ with whatever positions and position weight flags you choose to specify.
+ The text should be formatted
+ like the query would be printed by the output of a SELECT.
+ See the Casting
+ section in the Guide for details.
+
+
+Configurations
+
+A configuration specifies all of the equipment necessary
+to transform a document into a tsvector:
+the parser that breaks its text into tokens,
+and the dictionaries which then transform each token into a lexeme.
+Every call to to_tsvector() (described above)
+uses a configuration to perform its processing.
+Three configurations come with tsearch2:
+
+
+default — Indexes words and numbers,
+ using the en_stem English Snowball stemmer for Latin-alphabet words
+ and the simple dictionary for all others.
+default_russian — Indexes words and numbers,
+ using the en_stem English Snowball stemmer for Latin-alphabet words
+ and the ru_stem Russian Snowball dictionary for all others.
+simple — Processes both words and numbers
+ with the simple dictionary,
+ which neither discards any stop words nor alters them.
+
+
+The tsearch2 modules initially chooses your current configuration
+by looking for your current locale in the locale field
+of the pg_ts_cfg table described below.
+You can manipulate the current configuration yourself with these functions:
+
+
+
+ set_curcfg( id INT | ts_name TEXT
+  ) RETURNS VOID
+
+ Set the current configuration used by to_tsvector
+ and to_tsquery.
+
+ show_curcfg() RETURNS INT4
+
+ Returns the integer id of the current configuration.
+
+
+
+Each configuration is defined by a record in the pg_ts_cfg table:
+
+create table pg_ts_cfg (
+   id      int not  null primary key,
+   ts_name     text not null,
+   prs_name    text not null,
+   locale      text
+);
+
+The id and ts_name are unique values
+which identify the configuration;
+the prs_name specifies which parser the configuration uses.
+Once this parser has split document text into tokens,
+the type of each resulting token —
+or, more specifically, the type's lex_alias
+as specified in the parser's lexem_type() table —
+is searched for together with the configuration's ts_name
+in the pg_ts_cfgmap table:
+
+create table pg_ts_cfgmap (
+   ts_name     text not null,
+   lex_alias   text not null,
+   dict_name   text[],
+   primary key (ts_name,lex_alias)
+);
+
+Those tokens whose types are not listed are discarded.
+The remaining tokens are assigned integer positions,
+starting with 1 for the first token in the document,
+and turned into lexemes with the help of the dictionaries
+whose names are given in the dict_name array for their type.
+These dictionaries are tried in order,
+stopping either with the first one to return a lexeme for the token,
+or discarding the token if no dictionary returns a lexeme for it.
+
+Parsers
+
+Each parser is defined by a record in the pg_ts_parser table:
+
+create table pg_ts_parser (
+   prs_id      int not null primary key,
+   prs_name    text not null,
+   prs_start   oid not null,
+   prs_getlexem    oid not null,
+   prs_end     oid not null,
+   prs_headline    oid not null,
+   prs_lextype oid not null,
+   prs_comment text
+);
+
+The prs_id and prs_name uniquely identify the parser,
+while prs_comment usually describes its name and version
+for the reference of users.
+The other items identify the low-level functions
+which make the parser operate,
+and are only of interest to someone writing a parser of their own.
+
+The tsearch2 module comes with one parser named default
+which is suitable for parsing most plain text and HTML documents.
+
+Each parser argument below
+must designate a parser with either an integer prs_id
+or a textual prs_name;
+the current parser is used when this argument is omitted.
+
+
+
+ CREATE FUNCTION set_curprs(parser) RETURNS VOID
+
+ Selects a current parser
+ which will be used when any of the following functions
+ are called without a parser as an argument.
+
+ CREATE FUNCTION lexem_type(
+  [ parser ]
+  ) RETURNS SETOF lexemtype
+
+ Returns a table which defines and describes
+ each kind of token the parser may produce as output.
+ For each token type the table gives the lexid
+ which the parser will label each token of that type,
+ the alias which names the token type,
+ and a short description descr for the user to read.
+
+ CREATE FUNCTION parse(
+  [ parser, ] document TEXT
+  ) RETURNS SETOF lexemtype
+
+ Parses the given document and returns a series of records,
+ one for each token produced by parsing.
+ Each token includes a lexid giving its type
+ and a lexem which gives its content.
+
+
+Dictionaries
+
+Dictionaries take textual tokens as input,
+usually those produced by a parser,
+and return lexemes which are usually some reduced form of the token.
+Among the dictionaries which come installed with tsearch2 are:
+
+
+simple simply folds uppercase letters to lowercase
+ before returning the word.
+en_stem runs an English Snowball stemmer on each word
+ that attempts to reduce the various forms of a verb or noun
+ to a single recognizable form.
+ru_stem runs a Russian Snowball stemmer on each word.
+
+
+Each dictionary is defined by an entry in the pg_ts_dict table:
+
+CREATE TABLE pg_ts_dict (
+   dict_id     int not null primary key,
+   dict_name   text not null,
+   dict_init   oid,
+   dict_initoption text,
+   dict_lemmatize  oid not null,
+   dict_comment    text
+);
+
+The dict_id and dict_name
+serve as unique identifiers for the dictionary.
+The meaning of the dict_initoption varies among dictionaries,
+but for the built-in Snowball dictionaries
+it specifies a file from which stop words should be read.
+The dict_comment is a human-readable description of the dictionary.
+The other fields are internal function identifiers
+useful only to developers trying to implement their own dictionaries.
+
+The argument named dictionary
+in each of the following functions
+should be either an integer dict_id or a textual dict_name
+identifying which dictionary should be used for the operation;
+if omitted then the current dictionary is used.
+
+
+
+ CREATE FUNCTION set_curdict(dictionary) RETURNS VOID
+
+ Selects a current dictionary for use by functions
+ that do not select a dictionary explicitly.
+
+ CREATE FUNCTION lexize(
+ [ dictionary, ] word text)
+ RETURNS TEXT[]
+
+ Reduces a single word to a lexeme.
+ Note that lexemes are arrays of zero or more strings,
+ since in some languages there might be several base words
+ from which an inflected form could arise.
+
+
+Ranking
+
+Ranking attempts to measure how relevant documents are to particular queries
+by inspecting the number of times each search word appears in the document,
+and whether different search terms occur near each other.
+Note that this information is only available in unstripped vectors —
+ranking functions will only return a useful result
+for a tsvector which still has position information!
+
+Both of these ranking functions
+take an integer normalization option
+that specifies whether a document's length should impact its rank.
+This is often desirable,
+since a hundred-word document with five instances of a search word
+is probably more relevant than a thousand-word document with five instances.
+The option can have the values:
+
+
+0 (the default) ignores document length.
+1 divides the rank by the logarithm of the length.
+2 divides the rank by the length itself.
+
+
+The two ranking functions currently available are:
+
+
+
+ CREATE FUNCTION rank(
+  [ weights float4[], ]
+  vector tsvector, query tsquery,
+  [ normalization int4 ]
+  ) RETURNS float4
+
+ This is the ranking function from the old version of OpenFTS,
+ and offers the ability to weight word instances more heavily
+ depending on how you have classified them.
+ The weights specify how heavily to weight each category of word:
+ 
+>{D-weight, A-weight, B-weight, C-weight}
+ If no weights are provided, then these defaults are used:
+ {0.1, 0.2, 0.4, 1.0}
+ Often weights are used to mark words from special areas of the document,
+ like the title or an initial abstract,
+ and make them more or less important than words in the document body.
+
+ CREATE FUNCTION rank_cd(

+  [ K int4, ]
+  vector tsvector, query tsquery,
+  [ normalization int4 ]

+  ) RETURNS float4
+
+ This function computes the cover density ranking
+ for the given document vector and query,
+ as described in Clarke, Cormack, and Tudhope's
+ “
+>Relevance Ranking for One to Three Term Queries”
+ in the 1999 Information Processing and Management.
+ The value K is one of the values from their formula,
+ and defaults to K=4.
+ The examples in their paper K=16;
+ we can roughly describe the term
+ as stating how far apart two search terms can fall
+ before the formula begins penalizing them for lack of proximity.
+
+
+Headlines
+
+
+
+ CREATE FUNCTION headline(

+  [ id int4, | ts_name text, ]
+  document text, query tsquery,
+  [ options text ]

+  ) RETURNS text
+
+ Every form of the the headline() function
+ accepts a document along with a query,
+ and returns one or more ellipse-separated excerpts from the document
+ in which terms from the query are highlighted.
+ The configuration with which to parse the document
+ can be specified by either its id or ts_name;
+ if none is specified that the current configuration is used instead.
+ 
+ An options string if provided should be a comma-separated list
+ of one or more ‘option=value’ pairs.
+ The available options are:
+ 
+  StartSel, StopSel —
+   the strings with which query words appearing in the document
+   should be delimited to distinguish them from other excerpted words.
+  MaxWords, MinWords —
+   limits on the shortest and longest headlines you will accept.
+  ShortWord —
+   this prevents your headline from beginning or ending
+   with a word which has this many characters or less.
+   The default value of 3 should eliminate most English
+   conjunctions and articles.
+ 
+ Any unspecified options receive these defaults:
+ 
+StartSel=<b>, StopSel=</b>, MaxWords=35, MinWords=15, ShortWord=3
+ 
+
+
+
+


diff --git a/contrib/tsearch2/expected/tsearch2.out b/contrib/tsearch2/expected/tsearch2.out

new file mode 100644 (file)

index 0000000..a842c5b


--- /dev/null
+++ b/contrib/tsearch2/expected/tsearch2.out
@@ -0,0 +1,2055 @@
+--
+-- first, define the datatype.  Turn off echoing so that expected file
+-- does not depend on contents of seg.sql.
+--
+\set ECHO none
+psql:tsearch2.sql:13: NOTICE:  CREATE TABLE / PRIMARY KEY will create implicit index 'pg_ts_dict_pkey' for table 'pg_ts_dict'
+psql:tsearch2.sql:145: NOTICE:  CREATE TABLE / PRIMARY KEY will create implicit index 'pg_ts_parser_pkey' for table 'pg_ts_parser'
+psql:tsearch2.sql:244: NOTICE:  CREATE TABLE / PRIMARY KEY will create implicit index 'pg_ts_cfg_pkey' for table 'pg_ts_cfg'
+psql:tsearch2.sql:251: NOTICE:  CREATE TABLE / PRIMARY KEY will create implicit index 'pg_ts_cfgmap_pkey' for table 'pg_ts_cfgmap'
+psql:tsearch2.sql:339: NOTICE:  ProcedureCreate: type tsvector is not yet defined
+psql:tsearch2.sql:344: NOTICE:  Argument type "tsvector" is only a shell
+psql:tsearch2.sql:398: NOTICE:  ProcedureCreate: type tsquery is not yet defined
+psql:tsearch2.sql:403: NOTICE:  Argument type "tsquery" is only a shell
+psql:tsearch2.sql:545: NOTICE:  ProcedureCreate: type gtsvector is not yet defined
+psql:tsearch2.sql:550: NOTICE:  Argument type "gtsvector" is only a shell
+--tsvector
+SELECT '1'::tsvector;
+ tsvector 
+----------
+ '1'
+(1 row)
+
+SELECT '1 '::tsvector;
+ tsvector 
+----------
+ '1'
+(1 row)
+
+SELECT ' 1'::tsvector;
+ tsvector 
+----------
+ '1'
+(1 row)
+
+SELECT ' 1 '::tsvector;
+ tsvector 
+----------
+ '1'
+(1 row)
+
+SELECT '1 2'::tsvector;
+ tsvector 
+----------
+ '1' '2'
+(1 row)
+
+SELECT '\'1 2\''::tsvector;
+ tsvector 
+----------
+ '1 2'
+(1 row)
+
+SELECT '\'1 \\\'2\''::tsvector;
+ tsvector 
+----------
+ '1 \'2'
+(1 row)
+
+SELECT '\'1 \\\'2\'3'::tsvector;
+  tsvector   
+-------------
+ '3' '1 \'2'
+(1 row)
+
+SELECT '\'1 \\\'2\' 3'::tsvector;
+  tsvector   
+-------------
+ '3' '1 \'2'
+(1 row)
+
+SELECT '\'1 \\\'2\' \' 3\' 4 '::tsvector;
+     tsvector     
+------------------
+ '4' ' 3' '1 \'2'
+(1 row)
+
+select '\'w\':4A,3B,2C,1D,5 a:8';
+       ?column?        
+-----------------------
+ 'w':4A,3B,2C,1D,5 a:8
+(1 row)
+
+select 'a:3A b:2a'::tsvector || 'ba:1234 a:1B';
+          ?column?          
+----------------------------
+ 'a':3A,4B 'b':2A 'ba':1237
+(1 row)
+
+select setweight('w:12B w:13* w:12,5,6 a:1,3* a:3 w asd:1dc asd zxc:81,567,222A'::tsvector, 'c');
+                        setweight                         
+----------------------------------------------------------
+ 'a':1C,3C 'w':5C,6C,12C,13C 'asd':1C 'zxc':81C,222C,567C
+(1 row)
+
+select strip('w:12B w:13* w:12,5,6 a:1,3* a:3 w asd:1dc asd'::tsvector);
+     strip     
+---------------
+ 'a' 'w' 'asd'
+(1 row)
+
+--tsquery
+SELECT '1'::tsquery;
+ tsquery 
+---------
+ '1'
+(1 row)
+
+SELECT '1 '::tsquery;
+ tsquery 
+---------
+ '1'
+(1 row)
+
+SELECT ' 1'::tsquery;
+ tsquery 
+---------
+ '1'
+(1 row)
+
+SELECT ' 1 '::tsquery;
+ tsquery 
+---------
+ '1'
+(1 row)
+
+SELECT '\'1 2\''::tsquery;
+ tsquery 
+---------
+ '1 2'
+(1 row)
+
+SELECT '\'1 \\\'2\''::tsquery;
+ tsquery 
+---------
+ '1 \'2'
+(1 row)
+
+SELECT '!1'::tsquery;
+ tsquery 
+---------
+ !'1'
+(1 row)
+
+SELECT '1|2'::tsquery;
+  tsquery  
+-----------
+ '1' | '2'
+(1 row)
+
+SELECT '1|!2'::tsquery;
+  tsquery   
+------------
+ '1' | !'2'
+(1 row)
+
+SELECT '!1|2'::tsquery;
+  tsquery   
+------------
+ !'1' | '2'
+(1 row)
+
+SELECT '!1|!2'::tsquery;
+   tsquery   
+-------------
+ !'1' | !'2'
+(1 row)
+
+SELECT '!(!1|!2)'::tsquery;
+     tsquery      
+------------------
+ !( !'1' | !'2' )
+(1 row)
+
+SELECT '!(!1|2)'::tsquery;
+     tsquery     
+-----------------
+ !( !'1' | '2' )
+(1 row)
+
+SELECT '!(1|!2)'::tsquery;
+     tsquery     
+-----------------
+ !( '1' | !'2' )
+(1 row)
+
+SELECT '!(1|2)'::tsquery;
+    tsquery     
+----------------
+ !( '1' | '2' )
+(1 row)
+
+SELECT '1&2'::tsquery;
+  tsquery  
+-----------
+ '1' & '2'
+(1 row)
+
+SELECT '!1&2'::tsquery;
+  tsquery   
+------------
+ !'1' & '2'
+(1 row)
+
+SELECT '1&!2'::tsquery;
+  tsquery   
+------------
+ '1' & !'2'
+(1 row)
+
+SELECT '!1&!2'::tsquery;
+   tsquery   
+-------------
+ !'1' & !'2'
+(1 row)
+
+SELECT '(1&2)'::tsquery;
+  tsquery  
+-----------
+ '1' & '2'
+(1 row)
+
+SELECT '1&(2)'::tsquery;
+  tsquery  
+-----------
+ '1' & '2'
+(1 row)
+
+SELECT '!(1)&2'::tsquery;
+  tsquery   
+------------
+ !'1' & '2'
+(1 row)
+
+SELECT '!(1&2)'::tsquery;
+    tsquery     
+----------------
+ !( '1' & '2' )
+(1 row)
+
+SELECT '1|2&3'::tsquery;
+     tsquery     
+-----------------
+ '1' | '2' & '3'
+(1 row)
+
+SELECT '1|(2&3)'::tsquery;
+     tsquery     
+-----------------
+ '1' | '2' & '3'
+(1 row)
+
+SELECT '(1|2)&3'::tsquery;
+       tsquery       
+---------------------
+ ( '1' | '2' ) & '3'
+(1 row)
+
+SELECT '1|2&!3'::tsquery;
+     tsquery      
+------------------
+ '1' | '2' & !'3'
+(1 row)
+
+SELECT '1|!2&3'::tsquery;
+     tsquery      
+------------------
+ '1' | !'2' & '3'
+(1 row)
+
+SELECT '!1|2&3'::tsquery;
+     tsquery      
+------------------
+ !'1' | '2' & '3'
+(1 row)
+
+SELECT '!1|(2&3)'::tsquery;
+     tsquery      
+------------------
+ !'1' | '2' & '3'
+(1 row)
+
+SELECT '!(1|2)&3'::tsquery;
+       tsquery        
+----------------------
+ !( '1' | '2' ) & '3'
+(1 row)
+
+SELECT '(!1|2)&3'::tsquery;
+       tsquery        
+----------------------
+ ( !'1' | '2' ) & '3'
+(1 row)
+
+SELECT '1|(2|(4|(5|6)))'::tsquery;
+                 tsquery                 
+-----------------------------------------
+ '1' | ( '2' | ( '4' | ( '5' | '6' ) ) )
+(1 row)
+
+SELECT '1|2|4|5|6'::tsquery;
+                 tsquery                 
+-----------------------------------------
+ ( ( ( '1' | '2' ) | '4' ) | '5' ) | '6'
+(1 row)
+
+SELECT '1&(2&(4&(5&6)))'::tsquery;
+           tsquery           
+-----------------------------
+ '1' & '2' & '4' & '5' & '6'
+(1 row)
+
+SELECT '1&2&4&5&6'::tsquery;
+           tsquery           
+-----------------------------
+ '1' & '2' & '4' & '5' & '6'
+(1 row)
+
+SELECT '1&(2&(4&(5|6)))'::tsquery;
+             tsquery             
+---------------------------------
+ '1' & '2' & '4' & ( '5' | '6' )
+(1 row)
+
+SELECT '1&(2&(4&(5|!6)))'::tsquery;
+             tsquery              
+----------------------------------
+ '1' & '2' & '4' & ( '5' | !'6' )
+(1 row)
+
+SELECT '1&(\'2\'&(\' 4\'&(\\|5 | \'6 \\\' !|&\')))'::tsquery;
+                 tsquery                  
+------------------------------------------
+ '1' & '2' & ' 4' & ( '|5' | '6 \' !|&' )
+(1 row)
+
+SELECT '\'the wether\':dc & \' sKies \':BC & a:d b:a';
+                 ?column?                 
+------------------------------------------
+ 'the wether':dc & ' sKies ':BC & a:d b:a
+(1 row)
+
+select lexize('simple', 'ASD56 hsdkf');
+     lexize      
+-----------------
+ {"asd56 hsdkf"}
+(1 row)
+
+select lexize('en_stem', 'SKIES Problems identity');
+          lexize          
+--------------------------
+ {"skies problems ident"}
+(1 row)
+
+select * from token_type('default');
+ tokid |    alias     |               descr               
+-------+--------------+-----------------------------------
+     1 | lword        | Latin word
+     2 | nlword       | Non-latin word
+     3 | word         | Word
+     4 | email        | Email
+     5 | url          | URL
+     6 | host         | Host
+     7 | sfloat       | Scientific notation
+     8 | version      | VERSION
+     9 | part_hword   | Part of hyphenated word
+    10 | nlpart_hword | Non-latin part of hyphenated word
+    11 | lpart_hword  | Latin part of hyphenated word
+    12 | blank        | Space symbols
+    13 | tag          | HTML Tag
+    14 | http         | HTTP head
+    15 | hword        | Hyphenated word
+    16 | lhword       | Latin hyphenated word
+    17 | nlhword      | Non-latin hyphenated word
+    18 | uri          | URI
+    19 | file         | File or path name
+    20 | float        | Decimal notation
+    21 | int          | Signed integer
+    22 | uint         | Unsigned integer
+    23 | entity       | HTML Entity
+(23 rows)
+
+select * from parse('default', '345 [email protected] \' http://www.com/ http://aew.werc.ewr/?ad=qwe&dw 1aew.werc.ewr/?ad=qwe&dw 2aew.werc.ewr http://3aew.werc.ewr/?ad=qwe&dw http://4aew.werc.ewr http://5aew.werc.ewr:8100/?  ad=qwe&dw 6aew.werc.ewr:8100/?ad=qwe&dw 7aew.werc.ewr:8100/?ad=qwe&dw=%20%32 +4.0e-10 qwe qwe qwqwe 234.435 455 5.005 [email protected] qwe-wer asdf qwer jf sdjk ewr1> ewri2 ">
+/usr/local/fff /awdf/dwqe/4325 rewt/ewr wefjn /wqe-324/ewr gist.h gist.h.c gist.c. readline 4.2 4.2. 4.2, readline-4.2 readline-4.2. 234 
+ wow  < jqw <> qwerty');
+ tokid |                token                 
+-------+--------------------------------------
+    22 | 345
+    12 |  
+     4 | [email protected]
+    12 |  
+    12 | '
+    12 |  
+    14 | http://
+     6 | www.com
+    12 | /
+    12 |  
+    14 | http://
+     5 | aew.werc.ewr/?ad=qwe&dw
+     6 | aew.werc.ewr
+    18 | /?ad=qwe&dw
+    12 |  
+     5 | 1aew.werc.ewr/?ad=qwe&dw
+     6 | 1aew.werc.ewr
+    18 | /?ad=qwe&dw
+    12 |  
+     6 | 2aew.werc.ewr
+    12 |  
+    14 | http://
+     5 | 3aew.werc.ewr/?ad=qwe&dw
+     6 | 3aew.werc.ewr
+    18 | /?ad=qwe&dw
+    12 |  
+    14 | http://
+     6 | 4aew.werc.ewr
+    12 |  
+    14 | http://
+     5 | 5aew.werc.ewr:8100/?
+     6 | 5aew.werc.ewr
+    18 | :8100/?
+    12 |   
+     1 | ad
+    12 | =
+     1 | qwe
+    12 | &
+     1 | dw
+    12 |  
+     5 | 6aew.werc.ewr:8100/?ad=qwe&dw
+     6 | 6aew.werc.ewr
+    18 | :8100/?ad=qwe&dw
+    12 |  
+     5 | 7aew.werc.ewr:8100/?ad=qwe&dw=%20%32
+     6 | 7aew.werc.ewr
+    18 | :8100/?ad=qwe&dw=%20%32
+    12 |  
+     7 | +4.0e-10
+    12 |  
+     1 | qwe
+    12 |  
+     1 | qwe
+    12 |  
+     1 | qwqwe
+    12 |  
+    20 | 234.435
+    12 |  
+    22 | 455
+    12 |  
+    20 | 5.005
+    12 |  
+     4 | [email protected]
+    12 |  
+    16 | qwe-wer
+    11 | qwe
+    12 | -
+    11 | wer
+    12 |  
+     1 | asdf
+    12 |  
+    13 |  
+     1 | qwer
+    12 |  
+     1 | jf
+    12 |  
+     1 | sdjk
+    13 |  
+    12 |  
+     3 | ewr1
+    12 | >
+    12 |  
+     3 | ewri2
+    12 |  
+    13 |  
+    12 | 
+
+    19 | /usr/local/fff
+    12 |  
+    19 | /awdf/dwqe/4325
+    12 |  
+    19 | rewt/ewr
+    12 |  
+     1 | wefjn
+    12 |  
+    19 | /wqe-324/ewr
+    12 |  
+     6 | gist.h
+    12 |  
+     6 | gist.h.c
+    12 |  
+     6 | gist.c
+    12 | .
+    12 |  
+     1 | readline
+    12 |  
+    20 | 4.2
+    12 |  
+    20 | 4.2
+    12 | .
+    12 |  
+    20 | 4.2
+    12 | ,
+    12 |  
+    15 | readline-4
+    11 | readline
+    12 | -
+    20 | 4.2
+    12 |  
+    15 | readline-4
+    11 | readline
+    12 | -
+    20 | 4.2
+    12 | .
+    12 |  
+    22 | 234
+    12 |  
+
+    13 |  
+    12 |  
+     1 | wow
+    12 |   
+    12 | <
+    12 |  
+     1 | jqw
+    12 |  
+    12 | <
+    12 | >
+    12 |  
+     1 | qwerty
+(138 rows)
+
+SELECT to_tsvector('default', '345 [email protected] \' http://www.com/ http://aew.werc.ewr/?ad=qwe&dw 1aew.werc.ewr/?ad=qwe&dw 2aew.werc.ewr http://3aew.werc.ewr/?ad=qwe&dw http://4aew.werc.ewr http://5aew.werc.ewr:8100/?  ad=qwe&dw 6aew.werc.ewr:8100/?ad=qwe&dw 7aew.werc.ewr:8100/?ad=qwe&dw=%20%32 +4.0e-10 qwe qwe qwqwe 234.435 455 5.005 [email protected] qwe-wer asdf qwer jf sdjk ewr1> ewri2 ">
+/usr/local/fff /awdf/dwqe/4325 rewt/ewr wefjn /wqe-324/ewr gist.h gist.h.c gist.c. readline 4.2 4.2. 4.2, readline-4.2 readline-4.2. 234 
+ wow  < jqw <> qwerty');
+                                                                                                                                                                                                                                                                                                                                                                                                                                               to_tsvector                                                                                                                                                                                                                                                                                                                                                                                                                                                
+----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
+ 'ad':18 'dw':20 'jf':40 '234':62 '345':1 '4.2':53,54,55,58,61 '455':32 'jqw':64 'qwe':19,28,29,36 'wer':37 'wow':63 'asdf':38 'ewr1':42 'qwer':39 'sdjk':41 '5.005':33 'ewri2':43 'qwqwe':30 'wefjn':47 'gist.c':51 'gist.h':49 'qwerti':65 '234.435':31 ':8100/?':17 'qwe-wer':35 'readlin':52,57,60 'www.com':3 '+4.0e-10':27 'gist.h.c':50 'rewt/ewr':46 '[email protected]':2 'readline-4':56,59 '/?ad=qwe&dw':6,9,13 '/wqe-324/ewr':48 'aew.werc.ewr':5 '1aew.werc.ewr':8 '2aew.werc.ewr':10 '3aew.werc.ewr':12 '4aew.werc.ewr':14 '5aew.werc.ewr':16 '6aew.werc.ewr':22 '7aew.werc.ewr':25 '/usr/local/fff':44 '/awdf/dwqe/4325':45 ':8100/?ad=qwe&dw':23 '[email protected]':34 '5aew.werc.ewr:8100/?':15 ':8100/?ad=qwe&dw=%20%32':26 'aew.werc.ewr/?ad=qwe&dw':4 '1aew.werc.ewr/?ad=qwe&dw':7 '3aew.werc.ewr/?ad=qwe&dw':11 '6aew.werc.ewr:8100/?ad=qwe&dw':21 '7aew.werc.ewr:8100/?ad=qwe&dw=%20%32':24
+(1 row)
+
+SELECT length(to_tsvector('default', '345 qw'));
+ length 
+--------
+      2
+(1 row)
+
+SELECT length(to_tsvector('default', '345 [email protected] \' http://www.com/ http://aew.werc.ewr/?ad=qwe&dw 1aew.werc.ewr/?ad=qwe&dw 2aew.werc.ewr http://3aew.werc.ewr/?ad=qwe&dw http://4aew.werc.ewr http://5aew.werc.ewr:8100/?  ad=qwe&dw 6aew.werc.ewr:8100/?ad=qwe&dw 7aew.werc.ewr:8100/?ad=qwe&dw=%20%32 +4.0e-10 qwe qwe qwqwe 234.435 455 5.005 [email protected] qwe-wer asdf qwer jf sdjk ewr1> ewri2 ">
+/usr/local/fff /awdf/dwqe/4325 rewt/ewr wefjn /wqe-324/ewr gist.h gist.h.c gist.c. readline 4.2 4.2. 4.2, readline-4.2 readline-4.2. 234 
+ wow  < jqw <> qwerty'));
+ length 
+--------
+     53
+(1 row)
+
+select to_tsquery('default', 'qwe & sKies '); 
+  to_tsquery   
+---------------
+ 'qwe' & 'sky'
+(1 row)
+
+select to_tsquery('simple', 'qwe & sKies '); 
+   to_tsquery    
+-----------------
+ 'qwe' & 'skies'
+(1 row)
+
+select to_tsquery('default', '\'the wether\':dc & \'           sKies \':BC ');
+       to_tsquery       
+------------------------
+ 'wether':CD & 'sky':BC
+(1 row)
+
+select 'a b:89  ca:23A,64b d:34c'::tsvector @@ 'd:AC & ca';
+ ?column? 
+----------
+ t
+(1 row)
+
+select 'a b:89  ca:23A,64b d:34c'::tsvector @@ 'd:AC & ca:B';
+ ?column? 
+----------
+ t
+(1 row)
+
+select 'a b:89  ca:23A,64b d:34c'::tsvector @@ 'd:AC & ca:A';
+ ?column? 
+----------
+ t
+(1 row)
+
+select 'a b:89  ca:23A,64b d:34c'::tsvector @@ 'd:AC & ca:C';
+ ?column? 
+----------
+ f
+(1 row)
+
+select 'a b:89  ca:23A,64b d:34c'::tsvector @@ 'd:AC & ca:CB';
+ ?column? 
+----------
+ t
+(1 row)
+
+CREATE TABLE test_tsvector( t text, a tsvector );
+\copy test_tsvector from 'data/test_tsearch.data'
+SELECT count(*) FROM test_tsvector WHERE a @@ 'wr|qh';
+ count 
+-------
+   158
+(1 row)
+
+SELECT count(*) FROM test_tsvector WHERE a @@ 'wr&qh';
+ count 
+-------
+    17
+(1 row)
+
+SELECT count(*) FROM test_tsvector WHERE a @@ 'eq&yt';
+ count 
+-------
+     6
+(1 row)
+
+SELECT count(*) FROM test_tsvector WHERE a @@ 'eq|yt';
+ count 
+-------
+    98
+(1 row)
+
+SELECT count(*) FROM test_tsvector WHERE a @@ '(eq&yt)|(wr&qh)';
+ count 
+-------
+    23
+(1 row)
+
+SELECT count(*) FROM test_tsvector WHERE a @@ '(eq|yt)&(wr|qh)';
+ count 
+-------
+    39
+(1 row)
+
+create index wowidx on test_tsvector using gist (a);
+set enable_seqscan=off;
+SELECT count(*) FROM test_tsvector WHERE a @@ 'wr|qh';
+ count 
+-------
+   158
+(1 row)
+
+SELECT count(*) FROM test_tsvector WHERE a @@ 'wr&qh';
+ count 
+-------
+    17
+(1 row)
+
+SELECT count(*) FROM test_tsvector WHERE a @@ 'eq&yt';
+ count 
+-------
+     6
+(1 row)
+
+SELECT count(*) FROM test_tsvector WHERE a @@ 'eq|yt';
+ count 
+-------
+    98
+(1 row)
+
+SELECT count(*) FROM test_tsvector WHERE a @@ '(eq&yt)|(wr&qh)';
+ count 
+-------
+    23
+(1 row)
+
+SELECT count(*) FROM test_tsvector WHERE a @@ '(eq|yt)&(wr|qh)';
+ count 
+-------
+    39
+(1 row)
+
+select set_curcfg('default');
+ set_curcfg 
+------------
+ 
+(1 row)
+
+CREATE TRIGGER tsvectorupdate
+BEFORE UPDATE OR INSERT ON test_tsvector
+FOR EACH ROW EXECUTE PROCEDURE tsearch2(a, t);
+SELECT count(*) FROM test_tsvector WHERE a @@ to_tsquery('345&qwerty');
+ count 
+-------
+     0
+(1 row)
+
+INSERT INTO test_tsvector (t) VALUES ('345 qwerty');
+SELECT count(*) FROM test_tsvector WHERE a @@ to_tsquery('345&qwerty');
+ count 
+-------
+     1
+(1 row)
+
+UPDATE test_tsvector SET t = null WHERE t = '345 qwerty';
+SELECT count(*) FROM test_tsvector WHERE a @@ to_tsquery('345&qwerty');
+ count 
+-------
+     0
+(1 row)
+
+drop trigger tsvectorupdate on test_tsvector;
+create function wow(text) returns text as 'select $1 || \' copyright\'; ' language sql;
+create trigger tsvectorupdate before update or insert on test_tsvector
+for each row execute procedure tsearch2(a, wow, t);
+insert into test_tsvector (t) values ('345 qwerty');
+select count(*) FROM test_tsvector WHERE a @@ to_tsquery('345&qwerty');
+ count 
+-------
+     1
+(1 row)
+
+select count(*) FROM test_tsvector WHERE a @@ to_tsquery('copyright');
+ count 
+-------
+     1
+(1 row)
+
+select rank(' a:1 s:2C d g'::tsvector, 'a | s');
+ rank 
+------
+ 0.28
+(1 row)
+
+select rank(' a:1 s:2B d g'::tsvector, 'a | s');
+ rank 
+------
+ 0.46
+(1 row)
+
+select rank(' a:1 s:2 d g'::tsvector, 'a | s');
+ rank 
+------
+ 0.19
+(1 row)
+
+select rank(' a:1 s:2C d g'::tsvector, 'a & s');
+   rank   
+----------
+ 0.140153
+(1 row)
+
+select rank(' a:1 s:2B d g'::tsvector, 'a & s');
+   rank   
+----------
+ 0.198206
+(1 row)
+
+select rank(' a:1 s:2 d g'::tsvector, 'a & s');
+   rank    
+-----------
+ 0.0991032
+(1 row)
+
+insert into test_tsvector (t) values ('foo bar foo the over foo qq bar');
+select * from stat('select a from test_tsvector') order by ndoc desc, nentry desc, word;
+   word    | ndoc | nentry 
+-----------+------+--------
+ qq        |  109 |    109
+ qt        |  102 |    102
+ qe        |  100 |    100
+ qh        |   98 |     98
+ qw        |   98 |     98
+ qa        |   97 |     97
+ ql        |   94 |     94
+ qs        |   94 |     94
+ qi        |   92 |     92
+ qr        |   92 |     92
+ qj        |   91 |     91
+ qd        |   87 |     87
+ qz        |   87 |     87
+ qc        |   86 |     86
+ qn        |   86 |     86
+ qv        |   85 |     85
+ qo        |   84 |     84
+ qy        |   84 |     84
+ wp        |   84 |     84
+ qf        |   81 |     81
+ qk        |   80 |     80
+ wt        |   80 |     80
+ qu        |   79 |     79
+ qg        |   78 |     78
+ wb        |   78 |     78
+ qx        |   77 |     77
+ wr        |   77 |     77
+ ws        |   73 |     73
+ wy        |   73 |     73
+ wa        |   72 |     72
+ wf        |   70 |     70
+ wg        |   70 |     70
+ wi        |   70 |     70
+ wu        |   70 |     70
+ wc        |   69 |     69
+ wj        |   69 |     69
+ qp        |   68 |     68
+ wh        |   68 |     68
+ wv        |   68 |     68
+ qb        |   66 |     66
+ eu        |   65 |     65
+ we        |   65 |     65
+ wl        |   65 |     65
+ wq        |   65 |     65
+ wk        |   64 |     64
+ ee        |   63 |     63
+ eo        |   63 |     63
+ qm        |   63 |     63
+ wn        |   63 |     63
+ ef        |   62 |     62
+ eh        |   62 |     62
+ ex        |   62 |     62
+ re        |   62 |     62
+ rl        |   62 |     62
+ rr        |   62 |     62
+ eb        |   61 |     61
+ ek        |   61 |     61
+ ww        |   61 |     61
+ ea        |   60 |     60
+ ei        |   60 |     60
+ em        |   60 |     60
+ eq        |   60 |     60
+ ew        |   60 |     60
+ ro        |   60 |     60
+ rw        |   60 |     60
+ tl        |   60 |     60
+ eg        |   59 |     59
+ en        |   59 |     59
+ ez        |   59 |     59
+ rj        |   59 |     59
+ ry        |   59 |     59
+ tw        |   59 |     59
+ tx        |   59 |     59
+ ej        |   58 |     58
+ es        |   58 |     58
+ ra        |   58 |     58
+ rd        |   58 |     58
+ rg        |   58 |     58
+ rx        |   58 |     58
+ tb        |   58 |     58
+ wd        |   58 |     58
+ ed        |   57 |     57
+ tc        |   57 |     57
+ wx        |   57 |     57
+ er        |   56 |     56
+ wm        |   56 |     56
+ wo        |   56 |     56
+ yw        |   56 |     56
+ ep        |   55 |     55
+ rk        |   55 |     55
+ rp        |   55 |     55
+ rz        |   55 |     55
+ ta        |   55 |     55
+ rq        |   54 |     54
+ yn        |   54 |     54
+ ec        |   53 |     53
+ el        |   53 |     53
+ ru        |   53 |     53
+ rv        |   53 |     53
+ tz        |   53 |     53
+ un        |   53 |     53
+ wz        |   53 |     53
+ ys        |   53 |     53
+ oe        |   52 |     52
+ tn        |   52 |     52
+ tq        |   52 |     52
+ ty        |   52 |     52
+ uq        |   52 |     52
+ yg        |   52 |     52
+ ym        |   52 |     52
+ oi        |   51 |     51
+ to        |   51 |     51
+ yi        |   51 |     51
+ pn        |   50 |     50
+ rb        |   50 |     50
+ ri        |   50 |     50
+ rn        |   50 |     50
+ ti        |   50 |     50
+ tv        |   50 |     50
+ um        |   50 |     50
+ ut        |   50 |     50
+ ya        |   50 |     50
+ et        |   49 |     49
+ ix        |   49 |     49
+ ox        |   49 |     49
+ q3        |   49 |     49
+ yf        |   49 |     49
+ yl        |   49 |     49
+ yo        |   49 |     49
+ yr        |   49 |     49
+ ev        |   48 |     48
+ ey        |   48 |     48
+ ot        |   48 |     48
+ rc        |   48 |     48
+ rm        |   48 |     48
+ th        |   48 |     48
+ uo        |   48 |     48
+ ia        |   47 |     47
+ q1        |   47 |     47
+ rh        |   47 |     47
+ yq        |   47 |     47
+ yz        |   47 |     47
+ av        |   46 |     46
+ im        |   46 |     46
+ os        |   46 |     46
+ tk        |   46 |     46
+ yy        |   46 |     46
+ ir        |   45 |     45
+ iv        |   45 |     45
+ iw        |   45 |     45
+ oj        |   45 |     45
+ pl        |   45 |     45
+ pv        |   45 |     45
+ te        |   45 |     45
+ tu        |   45 |     45
+ uv        |   45 |     45
+ ux        |   45 |     45
+ yd        |   45 |     45
+ yx        |   45 |     45
+ ij        |   44 |     44
+ pa        |   44 |     44
+ se        |   44 |     44
+ tg        |   44 |     44
+ ue        |   44 |     44
+ yb        |   44 |     44
+ yt        |   44 |     44
+ if        |   43 |     43
+ ik        |   43 |     43
+ in        |   43 |     43
+ ph        |   43 |     43
+ pj        |   43 |     43
+ q5        |   43 |     43
+ rt        |   43 |     43
+ ub        |   43 |     43
+ ud        |   43 |     43
+ uh        |   43 |     43
+ uj        |   43 |     43
+ w7        |   43 |     43
+ ye        |   43 |     43
+ yv        |   43 |     43
+ db        |   42 |     42
+ do        |   42 |     42
+ id        |   42 |     42
+ ie        |   42 |     42
+ ii        |   42 |     42
+ of        |   42 |     42
+ pr        |   42 |     42
+ q4        |   42 |     42
+ rf        |   42 |     42
+ td        |   42 |     42
+ uk        |   42 |     42
+ up        |   42 |     42
+ yh        |   42 |     42
+ yk        |   42 |     42
+ io        |   41 |     41
+ it        |   41 |     41
+ pb        |   41 |     41
+ q0        |   41 |     41
+ q7        |   41 |     41
+ rs        |   41 |     41
+ tj        |   41 |     41
+ ur        |   41 |     41
+ ig        |   40 |     40
+ iu        |   40 |     40
+ iy        |   40 |     40
+ od        |   40 |     40
+ q6        |   40 |     40
+ tt        |   40 |     40
+ ug        |   40 |     40
+ ul        |   40 |     40
+ us        |   40 |     40
+ uu        |   40 |     40
+ uz        |   40 |     40
+ ah        |   39 |     39
+ ar        |   39 |     39
+ as        |   39 |     39
+ dl        |   39 |     39
+ dt        |   39 |     39
+ hk        |   39 |     39
+ iq        |   39 |     39
+ is        |   39 |     39
+ oc        |   39 |     39
+ ov        |   39 |     39
+ oy        |   39 |     39
+ uf        |   39 |     39
+ ui        |   39 |     39
+ aa        |   38 |     38
+ ad        |   38 |     38
+ fh        |   38 |     38
+ gm        |   38 |     38
+ ic        |   38 |     38
+ jd        |   38 |     38
+ om        |   38 |     38
+ or        |   38 |     38
+ oz        |   38 |     38
+ pm        |   38 |     38
+ q8        |   38 |     38
+ sf        |   38 |     38
+ sm        |   38 |     38
+ sv        |   38 |     38
+ uc        |   38 |     38
+ ak        |   37 |     37
+ aq        |   37 |     37
+ di        |   37 |     37
+ e4        |   37 |     37
+ fi        |   37 |     37
+ fx        |   37 |     37
+ ha        |   37 |     37
+ hp        |   37 |     37
+ ih        |   37 |     37
+ og        |   37 |     37
+ po        |   37 |     37
+ pw        |   37 |     37
+ sn        |   37 |     37
+ su        |   37 |     37
+ sw        |   37 |     37
+ w6        |   37 |     37
+ yj        |   37 |     37
+ yu        |   37 |     37
+ ag        |   36 |     36
+ am        |   36 |     36
+ at        |   36 |     36
+ e1        |   36 |     36
+ ff        |   36 |     36
+ gx        |   36 |     36
+ he        |   36 |     36
+ hj        |   36 |     36
+ ib        |   36 |     36
+ iz        |   36 |     36
+ lm        |   36 |     36
+ ok        |   36 |     36
+ pk        |   36 |     36
+ pp        |   36 |     36
+ pu        |   36 |     36
+ sp        |   36 |     36
+ tf        |   36 |     36
+ tm        |   36 |     36
+ ay        |   35 |     35
+ dy        |   35 |     35
+ fu        |   35 |     35
+ ku        |   35 |     35
+ lh        |   35 |     35
+ lq        |   35 |     35
+ o6        |   35 |     35
+ ob        |   35 |     35
+ on        |   35 |     35
+ op        |   35 |     35
+ pd        |   35 |     35
+ ps        |   35 |     35
+ si        |   35 |     35
+ sl        |   35 |     35
+ sx        |   35 |     35
+ tp        |   35 |     35
+ tr        |   35 |     35
+ w3        |   35 |     35
+ y1        |   35 |     35
+ al        |   34 |     34
+ ap        |   34 |     34
+ az        |   34 |     34
+ dc        |   34 |     34
+ dd        |   34 |     34
+ dz        |   34 |     34
+ e0        |   34 |     34
+ fj        |   34 |     34
+ fp        |   34 |     34
+ gd        |   34 |     34
+ gg        |   34 |     34
+ gk        |   34 |     34
+ go        |   34 |     34
+ ho        |   34 |     34
+ jc        |   34 |     34
+ oa        |   34 |     34
+ oh        |   34 |     34
+ oo        |   34 |     34
+ pe        |   34 |     34
+ px        |   34 |     34
+ sd        |   34 |     34
+ sq        |   34 |     34
+ sy        |   34 |     34
+ ab        |   33 |     33
+ ae        |   33 |     33
+ af        |   33 |     33
+ aw        |   33 |     33
+ e5        |   33 |     33
+ fk        |   33 |     33
+ gu        |   33 |     33
+ gy        |   33 |     33
+ hb        |   33 |     33
+ hm        |   33 |     33
+ hy        |   33 |     33
+ jl        |   33 |     33
+ jr        |   33 |     33
+ ls        |   33 |     33
+ oq        |   33 |     33
+ pt        |   33 |     33
+ sa        |   33 |     33
+ sh        |   33 |     33
+ sj        |   33 |     33
+ so        |   33 |     33
+ sz        |   33 |     33
+ t7        |   33 |     33
+ uw        |   33 |     33
+ w8        |   33 |     33
+ y0        |   33 |     33
+ yp        |   33 |     33
+ dh        |   32 |     32
+ dp        |   32 |     32
+ dq        |   32 |     32
+ e7        |   32 |     32
+ fn        |   32 |     32
+ fo        |   32 |     32
+ fr        |   32 |     32
+ ga        |   32 |     32
+ gq        |   32 |     32
+ hh        |   32 |     32
+ il        |   32 |     32
+ ip        |   32 |     32
+ jv        |   32 |     32
+ lc        |   32 |     32
+ ol        |   32 |     32
+ pc        |   32 |     32
+ q9        |   32 |     32
+ ds        |   31 |     31
+ e9        |   31 |     31
+ fd        |   31 |     31
+ fe        |   31 |     31
+ ft        |   31 |     31
+ gs        |   31 |     31
+ hl        |   31 |     31
+ hs        |   31 |     31
+ jb        |   31 |     31
+ kc        |   31 |     31
+ kw        |   31 |     31
+ mj        |   31 |     31
+ q2        |   31 |     31
+ r3        |   31 |     31
+ sb        |   31 |     31
+ sk        |   31 |     31
+ ts        |   31 |     31
+ ua        |   31 |     31
+ yc        |   31 |     31
+ zw        |   31 |     31
+ ao        |   30 |     30
+ du        |   30 |     30
+ fw        |   30 |     30
+ gj        |   30 |     30
+ hu        |   30 |     30
+ kh        |   30 |     30
+ kl        |   30 |     30
+ kv        |   30 |     30
+ ld        |   30 |     30
+ lf        |   30 |     30
+ pq        |   30 |     30
+ py        |   30 |     30
+ sc        |   30 |     30
+ sr        |   30 |     30
+ uy        |   30 |     30
+ vg        |   30 |     30
+ w2        |   30 |     30
+ xg        |   30 |     30
+ xo        |   30 |     30
+ au        |   29 |     29
+ cx        |   29 |     29
+ fv        |   29 |     29
+ gh        |   29 |     29
+ gl        |   29 |     29
+ gt        |   29 |     29
+ hw        |   29 |     29
+ ji        |   29 |     29
+ km        |   29 |     29
+ la        |   29 |     29
+ ou        |   29 |     29
+ r0        |   29 |     29
+ w0        |   29 |     29
+ y9        |   29 |     29
+ zm        |   29 |     29
+ zs        |   29 |     29
+ zy        |   29 |     29
+ ax        |   28 |     28
+ cd        |   28 |     28
+ dj        |   28 |     28
+ dn        |   28 |     28
+ dr        |   28 |     28
+ ht        |   28 |     28
+ jf        |   28 |     28
+ lo        |   28 |     28
+ lr        |   28 |     28
+ na        |   28 |     28
+ ng        |   28 |     28
+ r8        |   28 |     28
+ ss        |   28 |     28
+ xt        |   28 |     28
+ y6        |   28 |     28
+ aj        |   27 |     27
+ ca        |   27 |     27
+ cg        |   27 |     27
+ df        |   27 |     27
+ dg        |   27 |     27
+ dv        |   27 |     27
+ gc        |   27 |     27
+ gn        |   27 |     27
+ gr        |   27 |     27
+ hd        |   27 |     27
+ i8        |   27 |     27
+ jn        |   27 |     27
+ jt        |   27 |     27
+ lp        |   27 |     27
+ o9        |   27 |     27
+ ow        |   27 |     27
+ r9        |   27 |     27
+ t8        |   27 |     27
+ u5        |   27 |     27
+ w4        |   27 |     27
+ xm        |   27 |     27
+ zz        |   27 |     27
+ a2        |   26 |     26
+ ac        |   26 |     26
+ ai        |   26 |     26
+ cm        |   26 |     26
+ cu        |   26 |     26
+ cw        |   26 |     26
+ dk        |   26 |     26
+ e2        |   26 |     26
+ fc        |   26 |     26
+ fg        |   26 |     26
+ fl        |   26 |     26
+ fs        |   26 |     26
+ ge        |   26 |     26
+ gv        |   26 |     26
+ hc        |   26 |     26
+ hi        |   26 |     26
+ hx        |   26 |     26
+ jj        |   26 |     26
+ jm        |   26 |     26
+ kg        |   26 |     26
+ kk        |   26 |     26
+ kn        |   26 |     26
+ ko        |   26 |     26
+ kt        |   26 |     26
+ ln        |   26 |     26
+ mx        |   26 |     26
+ pg        |   26 |     26
+ r4        |   26 |     26
+ t6        |   26 |     26
+ u1        |   26 |     26
+ u4        |   26 |     26
+ vi        |   26 |     26
+ vr        |   26 |     26
+ w1        |   26 |     26
+ w9        |   26 |     26
+ xk        |   26 |     26
+ xs        |   26 |     26
+ zf        |   26 |     26
+ bb        |   25 |     25
+ dm        |   25 |     25
+ dw        |   25 |     25
+ e8        |   25 |     25
+ fb        |   25 |     25
+ gw        |   25 |     25
+ h8        |   25 |     25
+ hf        |   25 |     25
+ hg        |   25 |     25
+ hn        |   25 |     25
+ hv        |   25 |     25
+ i0        |   25 |     25
+ i3        |   25 |     25
+ jg        |   25 |     25
+ jo        |   25 |     25
+ jx        |   25 |     25
+ kq        |   25 |     25
+ lw        |   25 |     25
+ lx        |   25 |     25
+ o3        |   25 |     25
+ p7        |   25 |     25
+ pf        |   25 |     25
+ pi        |   25 |     25
+ pz        |   25 |     25
+ r2        |   25 |     25
+ r5        |   25 |     25
+ t9        |   25 |     25
+ u7        |   25 |     25
+ ve        |   25 |     25
+ vu        |   25 |     25
+ y5        |   25 |     25
+ y8        |   25 |     25
+ zt        |   25 |     25
+ an        |   24 |     24
+ bj        |   24 |     24
+ dx        |   24 |     24
+ fm        |   24 |     24
+ fz        |   24 |     24
+ gb        |   24 |     24
+ gi        |   24 |     24
+ gp        |   24 |     24
+ hr        |   24 |     24
+ hz        |   24 |     24
+ i5        |   24 |     24
+ jq        |   24 |     24
+ kb        |   24 |     24
+ ke        |   24 |     24
+ kf        |   24 |     24
+ kp        |   24 |     24
+ lv        |   24 |     24
+ lz        |   24 |     24
+ o8        |   24 |     24
+ r1        |   24 |     24
+ s7        |   24 |     24
+ sg        |   24 |     24
+ u3        |   24 |     24
+ vj        |   24 |     24
+ vt        |   24 |     24
+ w5        |   24 |     24
+ zj        |   24 |     24
+ be        |   23 |     23
+ bi        |   23 |     23
+ bn        |   23 |     23
+ cn        |   23 |     23
+ cy        |   23 |     23
+ da        |   23 |     23
+ e6        |   23 |     23
+ fa        |   23 |     23
+ js        |   23 |     23
+ ki        |   23 |     23
+ kz        |   23 |     23
+ li        |   23 |     23
+ mt        |   23 |     23
+ mz        |   23 |     23
+ nu        |   23 |     23
+ o2        |   23 |     23
+ p5        |   23 |     23
+ p8        |   23 |     23
+ r7        |   23 |     23
+ t0        |   23 |     23
+ t1        |   23 |     23
+ t3        |   23 |     23
+ vm        |   23 |     23
+ xh        |   23 |     23
+ xx        |   23 |     23
+ zp        |   23 |     23
+ zr        |   23 |     23
+ a3        |   22 |     22
+ bg        |   22 |     22
+ de        |   22 |     22
+ e3        |   22 |     22
+ fq        |   22 |     22
+ i2        |   22 |     22
+ i7        |   22 |     22
+ ja        |   22 |     22
+ jk        |   22 |     22
+ jy        |   22 |     22
+ kr        |   22 |     22
+ kx        |   22 |     22
+ ly        |   22 |     22
+ nb        |   22 |     22
+ nh        |   22 |     22
+ ns        |   22 |     22
+ s3        |   22 |     22
+ u2        |   22 |     22
+ vn        |   22 |     22
+ xe        |   22 |     22
+ y4        |   22 |     22
+ zh        |   22 |     22
+ zo        |   22 |     22
+ zq        |   22 |     22
+ a1        |   21 |     21
+ bl        |   21 |     21
+ bo        |   21 |     21
+ cb        |   21 |     21
+ ch        |   21 |     21
+ co        |   21 |     21
+ cq        |   21 |     21
+ cv        |   21 |     21
+ d7        |   21 |     21
+ g8        |   21 |     21
+ je        |   21 |     21
+ jp        |   21 |     21
+ jz        |   21 |     21
+ lg        |   21 |     21
+ me        |   21 |     21
+ nc        |   21 |     21
+ p4        |   21 |     21
+ st        |   21 |     21
+ vb        |   21 |     21
+ vw        |   21 |     21
+ vz        |   21 |     21
+ xj        |   21 |     21
+ xq        |   21 |     21
+ xu        |   21 |     21
+ xy        |   21 |     21
+ zb        |   21 |     21
+ bv        |   20 |     20
+ bz        |   20 |     20
+ cj        |   20 |     20
+ cp        |   20 |     20
+ cs        |   20 |     20
+ d8        |   20 |     20
+ ju        |   20 |     20
+ k0        |   20 |     20
+ ks        |   20 |     20
+ ky        |   20 |     20
+ l1        |   20 |     20
+ lb        |   20 |     20
+ lj        |   20 |     20
+ lu        |   20 |     20
+ nm        |   20 |     20
+ nw        |   20 |     20
+ nz        |   20 |     20
+ o7        |   20 |     20
+ p6        |   20 |     20
+ vh        |   20 |     20
+ vp        |   20 |     20
+ vs        |   20 |     20
+ xb        |   20 |     20
+ xr        |   20 |     20
+ z3        |   20 |     20
+ zv        |   20 |     20
+ bq        |   19 |     19
+ br        |   19 |     19
+ by        |   19 |     19
+ cl        |   19 |     19
+ d2        |   19 |     19
+ f1        |   19 |     19
+ f4        |   19 |     19
+ gf        |   19 |     19
+ hq        |   19 |     19
+ k9        |   19 |     19
+ ka        |   19 |     19
+ kd        |   19 |     19
+ kj        |   19 |     19
+ md        |   19 |     19
+ mi        |   19 |     19
+ ml        |   19 |     19
+ my        |   19 |     19
+ nj        |   19 |     19
+ ny        |   19 |     19
+ o1        |   19 |     19
+ s4        |   19 |     19
+ s8        |   19 |     19
+ t5        |   19 |     19
+ u0        |   19 |     19
+ xl        |   19 |     19
+ zg        |   19 |     19
+ zi        |   19 |     19
+ a5        |   18 |     18
+ b9        |   18 |     18
+ bh        |   18 |     18
+ bx        |   18 |     18
+ d3        |   18 |     18
+ fy        |   18 |     18
+ g2        |   18 |     18
+ i4        |   18 |     18
+ i6        |   18 |     18
+ i9        |   18 |     18
+ jw        |   18 |     18
+ lk        |   18 |     18
+ mb        |   18 |     18
+ mv        |   18 |     18
+ nd        |   18 |     18
+ nr        |   18 |     18
+ nt        |   18 |     18
+ t2        |   18 |     18
+ xf        |   18 |     18
+ xv        |   18 |     18
+ zc        |   18 |     18
+ zd        |   18 |     18
+ a7        |   17 |     17
+ bc        |   17 |     17
+ bd        |   17 |     17
+ ce        |   17 |     17
+ cf        |   17 |     17
+ cr        |   17 |     17
+ g9        |   17 |     17
+ j0        |   17 |     17
+ j5        |   17 |     17
+ mp        |   17 |     17
+ mr        |   17 |     17
+ mw        |   17 |     17
+ nk        |   17 |     17
+ no        |   17 |     17
+ o0        |   17 |     17
+ o4        |   17 |     17
+ s0        |   17 |     17
+ s1        |   17 |     17
+ t4        |   17 |     17
+ u9        |   17 |     17
+ vf        |   17 |     17
+ vx        |   17 |     17
+ x3        |   17 |     17
+ xi        |   17 |     17
+ xn        |   17 |     17
+ xz        |   17 |     17
+ zl        |   17 |     17
+ zn        |   17 |     17
+ a0        |   16 |     16
+ bu        |   16 |     16
+ bw        |   16 |     16
+ ci        |   16 |     16
+ ck        |   16 |     16
+ d0        |   16 |     16
+ d4        |   16 |     16
+ d6        |   16 |     16
+ f5        |   16 |     16
+ g1        |   16 |     16
+ gz        |   16 |     16
+ h4        |   16 |     16
+ jh        |   16 |     16
+ l4        |   16 |     16
+ lt        |   16 |     16
+ mg        |   16 |     16
+ mh        |   16 |     16
+ mo        |   16 |     16
+ ni        |   16 |     16
+ nl        |   16 |     16
+ nq        |   16 |     16
+ p2        |   16 |     16
+ u8        |   16 |     16
+ v9        |   16 |     16
+ vl        |   16 |     16
+ vo        |   16 |     16
+ xp        |   16 |     16
+ y3        |   16 |     16
+ y7        |   16 |     16
+ z7        |   16 |     16
+ za        |   16 |     16
+ zx        |   16 |     16
+ bf        |   15 |     15
+ bp        |   15 |     15
+ cc        |   15 |     15
+ g0        |   15 |     15
+ j2        |   15 |     15
+ j9        |   15 |     15
+ l6        |   15 |     15
+ le        |   15 |     15
+ ll        |   15 |     15
+ m8        |   15 |     15
+ ma        |   15 |     15
+ mu        |   15 |     15
+ nf        |   15 |     15
+ r6        |   15 |     15
+ s5        |   15 |     15
+ vd        |   15 |     15
+ vk        |   15 |     15
+ xa        |   15 |     15
+ xw        |   15 |     15
+ y2        |   15 |     15
+ z8        |   15 |     15
+ ze        |   15 |     15
+ zu        |   15 |     15
+ a6        |   14 |     14
+ bk        |   14 |     14
+ bt        |   14 |     14
+ c0        |   14 |     14
+ f8        |   14 |     14
+ g3        |   14 |     14
+ g4        |   14 |     14
+ g7        |   14 |     14
+ h6        |   14 |     14
+ h7        |   14 |     14
+ h9        |   14 |     14
+ i1        |   14 |     14
+ k1        |   14 |     14
+ k2        |   14 |     14
+ k6        |   14 |     14
+ k7        |   14 |     14
+ mc        |   14 |     14
+ nn        |   14 |     14
+ p9        |   14 |     14
+ u6        |   14 |     14
+ xd        |   14 |     14
+ z6        |   14 |     14
+ zk        |   14 |     14
+ a4        |   13 |     13
+ a9        |   13 |     13
+ bm        |   13 |     13
+ cz        |   13 |     13
+ f2        |   13 |     13
+ f3        |   13 |     13
+ f6        |   13 |     13
+ g6        |   13 |     13
+ h2        |   13 |     13
+ j1        |   13 |     13
+ k5        |   13 |     13
+ m1        |   13 |     13
+ mf        |   13 |     13
+ mq        |   13 |     13
+ np        |   13 |     13
+ nx        |   13 |     13
+ o5        |   13 |     13
+ p0        |   13 |     13
+ p1        |   13 |     13
+ s6        |   13 |     13
+ s9        |   13 |     13
+ v6        |   13 |     13
+ va        |   13 |     13
+ vc        |   13 |     13
+ xc        |   13 |     13
+ z0        |   13 |     13
+ c9        |   12 |     12
+ d1        |   12 |     12
+ h0        |   12 |     12
+ h1        |   12 |     12
+ j8        |   12 |     12
+ k4        |   12 |     12
+ l5        |   12 |     12
+ l9        |   12 |     12
+ m2        |   12 |     12
+ m6        |   12 |     12
+ m9        |   12 |     12
+ n7        |   12 |     12
+ nv        |   12 |     12
+ p3        |   12 |     12
+ vq        |   12 |     12
+ vy        |   12 |     12
+ x1        |   12 |     12
+ x2        |   12 |     12
+ z5        |   12 |     12
+ c1        |   11 |     11
+ c3        |   11 |     11
+ ct        |   11 |     11
+ f9        |   11 |     11
+ g5        |   11 |     11
+ j6        |   11 |     11
+ l8        |   11 |     11
+ n1        |   11 |     11
+ v7        |   11 |     11
+ vv        |   11 |     11
+ x5        |   11 |     11
+ x8        |   11 |     11
+ z2        |   11 |     11
+ b0        |   10 |     10
+ b2        |   10 |     10
+ b8        |   10 |     10
+ c6        |   10 |     10
+ f0        |   10 |     10
+ f7        |   10 |     10
+ h5        |   10 |     10
+ j3        |   10 |     10
+ j4        |   10 |     10
+ j7        |   10 |     10
+ l7        |   10 |     10
+ m0        |   10 |     10
+ m7        |   10 |     10
+ mm        |   10 |     10
+ mn        |   10 |     10
+ n8        |   10 |     10
+ v1        |   10 |     10
+ x0        |   10 |     10
+ x6        |   10 |     10
+ x7        |   10 |     10
+ x9        |   10 |     10
+ a8        |    9 |      9
+ b1        |    9 |      9
+ b4        |    9 |      9
+ b5        |    9 |      9
+ b6        |    9 |      9
+ ba        |    9 |      9
+ bs        |    9 |      9
+ c5        |    9 |      9
+ d5        |    9 |      9
+ k8        |    9 |      9
+ l0        |    9 |      9
+ m5        |    9 |      9
+ mk        |    9 |      9
+ ms        |    9 |      9
+ n3        |    9 |      9
+ n4        |    9 |      9
+ n6        |    9 |      9
+ ne        |    9 |      9
+ v0        |    9 |      9
+ v3        |    9 |      9
+ v5        |    9 |      9
+ v8        |    9 |      9
+ b3        |    8 |      8
+ b7        |    8 |      8
+ c2        |    8 |      8
+ c7        |    8 |      8
+ c8        |    8 |      8
+ d9        |    8 |      8
+ k3        |    8 |      8
+ l3        |    8 |      8
+ m3        |    8 |      8
+ m4        |    8 |      8
+ n0        |    8 |      8
+ n5        |    8 |      8
+ v4        |    8 |      8
+ x4        |    8 |      8
+ z1        |    8 |      8
+ z9        |    8 |      8
+ l2        |    7 |      7
+ s2        |    7 |      7
+ z4        |    7 |      7
+ 1l        |    6 |      6
+ 1o        |    6 |      6
+ 1t        |    6 |      6
+ 2e        |    6 |      6
+ 2o        |    6 |      6
+ c4        |    6 |      6
+ h3        |    6 |      6
+ n2        |    6 |      6
+ n9        |    6 |      6
+ v2        |    6 |      6
+ 2l        |    5 |      5
+ 2u        |    5 |      5
+ 3k        |    5 |      5
+ 4p        |    5 |      5
+ 18        |    4 |      4
+ 1a        |    4 |      4
+ 1i        |    4 |      4
+ 2s        |    4 |      4
+ 3q        |    4 |      4
+ 3y        |    4 |      4
+ 5y        |    4 |      4
+ 1f        |    3 |      3
+ 1h        |    3 |      3
+ 1m        |    3 |      3
+ 1p        |    3 |      3
+ 1s        |    3 |      3
+ 1v        |    3 |      3
+ 1x        |    3 |      3
+ 27        |    3 |      3
+ 2a        |    3 |      3
+ 2b        |    3 |      3
+ 2h        |    3 |      3
+ 2n        |    3 |      3
+ 2p        |    3 |      3
+ 2v        |    3 |      3
+ 2y        |    3 |      3
+ 3d        |    3 |      3
+ 3w        |    3 |      3
+ 3z        |    3 |      3
+ 4a        |    3 |      3
+ 4d        |    3 |      3
+ 4v        |    3 |      3
+ 4z        |    3 |      3
+ 5e        |    3 |      3
+ 5i        |    3 |      3
+ 5k        |    3 |      3
+ 5o        |    3 |      3
+ 5t        |    3 |      3
+ 6b        |    3 |      3
+ 6d        |    3 |      3
+ 6o        |    3 |      3
+ 6w        |    3 |      3
+ 7a        |    3 |      3
+ 7h        |    3 |      3
+ 7r        |    3 |      3
+ 93        |    3 |      3
+ 10        |    2 |      2
+ 12        |    2 |      2
+ 15        |    2 |      2
+ 16        |    2 |      2
+ 19        |    2 |      2
+ 1b        |    2 |      2
+ 1d        |    2 |      2
+ 1g        |    2 |      2
+ 1j        |    2 |      2
+ 1n        |    2 |      2
+ 1r        |    2 |      2
+ 1u        |    2 |      2
+ 1w        |    2 |      2
+ 1y        |    2 |      2
+ 20        |    2 |      2
+ 25        |    2 |      2
+ 2d        |    2 |      2
+ 2i        |    2 |      2
+ 2j        |    2 |      2
+ 2k        |    2 |      2
+ 2q        |    2 |      2
+ 2r        |    2 |      2
+ 2t        |    2 |      2
+ 2w        |    2 |      2
+ 2z        |    2 |      2
+ 3b        |    2 |      2
+ 3f        |    2 |      2
+ 3h        |    2 |      2
+ 3o        |    2 |      2
+ 3p        |    2 |      2
+ 3r        |    2 |      2
+ 3s        |    2 |      2
+ 3v        |    2 |      2
+ 42        |    2 |      2
+ 43        |    2 |      2
+ 4f        |    2 |      2
+ 4g        |    2 |      2
+ 4h        |    2 |      2
+ 4j        |    2 |      2
+ 4m        |    2 |      2
+ 4r        |    2 |      2
+ 4s        |    2 |      2
+ 4t        |    2 |      2
+ 4u        |    2 |      2
+ 5c        |    2 |      2
+ 5f        |    2 |      2
+ 5h        |    2 |      2
+ 5p        |    2 |      2
+ 5q        |    2 |      2
+ 5z        |    2 |      2
+ 6a        |    2 |      2
+ 6h        |    2 |      2
+ 6q        |    2 |      2
+ 6r        |    2 |      2
+ 6t        |    2 |      2
+ 6y        |    2 |      2
+ 70        |    2 |      2
+ 7c        |    2 |      2
+ 7g        |    2 |      2
+ 7k        |    2 |      2
+ 7o        |    2 |      2
+ 7u        |    2 |      2
+ 8j        |    2 |      2
+ 8w        |    2 |      2
+ 9f        |    2 |      2
+ 9y        |    2 |      2
+ copyright |    2 |      2
+ foo       |    1 |      3
+ bar       |    1 |      2
+ 0e        |    1 |      1
+ 0h        |    1 |      1
+ 0p        |    1 |      1
+ 0w        |    1 |      1
+ 0z        |    1 |      1
+ 11        |    1 |      1
+ 13        |    1 |      1
+ 14        |    1 |      1
+ 17        |    1 |      1
+ 1k        |    1 |      1
+ 1q        |    1 |      1
+ 1z        |    1 |      1
+ 24        |    1 |      1
+ 26        |    1 |      1
+ 28        |    1 |      1
+ 2f        |    1 |      1
+ 30        |    1 |      1
+ 345       |    1 |      1
+ 37        |    1 |      1
+ 39        |    1 |      1
+ 3a        |    1 |      1
+ 3e        |    1 |      1
+ 3g        |    1 |      1
+ 3i        |    1 |      1
+ 3m        |    1 |      1
+ 3t        |    1 |      1
+ 3u        |    1 |      1
+ 40        |    1 |      1
+ 41        |    1 |      1
+ 44        |    1 |      1
+ 45        |    1 |      1
+ 48        |    1 |      1
+ 4b        |    1 |      1
+ 4c        |    1 |      1
+ 4i        |    1 |      1
+ 4k        |    1 |      1
+ 4n        |    1 |      1
+ 4o        |    1 |      1
+ 4q        |    1 |      1
+ 4w        |    1 |      1
+ 4y        |    1 |      1
+ 51        |    1 |      1
+ 55        |    1 |      1
+ 56        |    1 |      1
+ 5a        |    1 |      1
+ 5d        |    1 |      1
+ 5g        |    1 |      1
+ 5j        |    1 |      1
+ 5l        |    1 |      1
+ 5s        |    1 |      1
+ 5u        |    1 |      1
+ 5x        |    1 |      1
+ 64        |    1 |      1
+ 68        |    1 |      1
+ 6c        |    1 |      1
+ 6f        |    1 |      1
+ 6g        |    1 |      1
+ 6i        |    1 |      1
+ 6k        |    1 |      1
+ 6n        |    1 |      1
+ 6p        |    1 |      1
+ 6s        |    1 |      1
+ 6u        |    1 |      1
+ 6x        |    1 |      1
+ 72        |    1 |      1
+ 7f        |    1 |      1
+ 7j        |    1 |      1
+ 7n        |    1 |      1
+ 7p        |    1 |      1
+ 7w        |    1 |      1
+ 7y        |    1 |      1
+ 7z        |    1 |      1
+ 80        |    1 |      1
+ 82        |    1 |      1
+ 85        |    1 |      1
+ 8d        |    1 |      1
+ 8i        |    1 |      1
+ 8l        |    1 |      1
+ 8n        |    1 |      1
+ 8p        |    1 |      1
+ 8t        |    1 |      1
+ 8x        |    1 |      1
+ 95        |    1 |      1
+ 97        |    1 |      1
+ 9a        |    1 |      1
+ 9e        |    1 |      1
+ 9h        |    1 |      1
+ 9r        |    1 |      1
+ 9w        |    1 |      1
+ qwerti    |    1 |      1
+(1146 rows)
+
+select reset_tsearch();
+NOTICE:  TSearch cache cleaned
+ reset_tsearch 
+---------------
+ 
+(1 row)
+
+select to_tsquery('default', 'skies & books');
+   to_tsquery   
+----------------
+ 'sky' & 'book'
+(1 row)
+
+select rank_cd(to_tsvector('Erosion It took the sea a thousand years,
+A thousand years to trace
+The granite features of this cliff
+In crag and scarp and base.
+It took the sea an hour one night
+An hour of storm to place
+The sculpture of these granite seams,
+Upon a woman s face. E.  J.  Pratt  (1882 1964)
+'), to_tsquery('sea&thousand&years'));
+ rank_cd 
+---------
+     1.2
+(1 row)
+
+select rank_cd(to_tsvector('Erosion It took the sea a thousand years,
+A thousand years to trace
+The granite features of this cliff
+In crag and scarp and base.
+It took the sea an hour one night
+An hour of storm to place
+The sculpture of these granite seams,
+Upon a woman s face. E.  J.  Pratt  (1882 1964)
+'), to_tsquery('granite&sea'));
+ rank_cd  
+----------
+ 0.880303
+(1 row)
+
+select rank_cd(to_tsvector('Erosion It took the sea a thousand years,
+A thousand years to trace
+The granite features of this cliff
+In crag and scarp and base.
+It took the sea an hour one night
+An hour of storm to place
+The sculpture of these granite seams,
+Upon a woman s face. E.  J.  Pratt  (1882 1964)
+'), to_tsquery('sea'));
+ rank_cd 
+---------
+       2
+(1 row)
+
+select get_covers(to_tsvector('Erosion It took the sea a thousand years,
+A thousand years to trace
+The granite features of this cliff
+In crag and scarp and base.
+It took the sea an hour one night
+An hour of storm to place
+The sculpture of these granite seams,
+Upon a woman s face. E.  J.  Pratt  (1882 1964)
+'), to_tsquery('sea&thousand&years'));
+                                                                                             get_covers                                                                                             
+----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
+ eros took {1 sea thousand year }1 {2 thousand year trace granit featur cliff crag scarp base took sea }2 hour one night hour storm place sculptur granit seam upon woman face e j pratt 1882 1964 
+(1 row)
+
+select get_covers(to_tsvector('Erosion It took the sea a thousand years,
+A thousand years to trace
+The granite features of this cliff
+In crag and scarp and base.
+It took the sea an hour one night
+An hour of storm to place
+The sculpture of these granite seams,
+Upon a woman s face. E.  J.  Pratt  (1882 1964)
+'), to_tsquery('granite&sea'));
+                                                                                                get_covers                                                                                                
+----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
+ eros took {1 sea thousand year thousand year trace {2 granit }1 featur cliff crag scarp base took {3 sea }2 hour one night hour storm place sculptur granit }3 seam upon woman face e j pratt 1882 1964 
+(1 row)
+
+select get_covers(to_tsvector('Erosion It took the sea a thousand years,
+A thousand years to trace
+The granite features of this cliff
+In crag and scarp and base.
+It took the sea an hour one night
+An hour of storm to place
+The sculpture of these granite seams,
+Upon a woman s face. E.  J.  Pratt  (1882 1964)
+'), to_tsquery('sea'));
+                                                                                             get_covers                                                                                             
+----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
+ eros took {1 sea }1 thousand year thousand year trace granit featur cliff crag scarp base took {2 sea }2 hour one night hour storm place sculptur granit seam upon woman face e j pratt 1882 1964 
+(1 row)
+
+select headline('Erosion It took the sea a thousand years,
+A thousand years to trace
+The granite features of this cliff
+In crag and scarp and base.
+It took the sea an hour one night
+An hour of storm to place
+The sculpture of these granite seams,
+Upon a woman s face. E.  J.  Pratt  (1882 1964)
+', to_tsquery('sea&thousand&years'));
+                                                       headline                                                        
+-----------------------------------------------------------------------------------------------------------------------
+ sea a thousand years,
+A thousand years to trace
+The granite features of this cliff
+(1 row)
+
+ 
+select headline('Erosion It took the sea a thousand years,
+A thousand years to trace
+The granite features of this cliff
+In crag and scarp and base.
+It took the sea an hour one night
+An hour of storm to place
+The sculpture of these granite seams,
+Upon a woman s face. E.  J.  Pratt  (1882 1964)
+', to_tsquery('granite&sea'));
+                                           headline                                           
+----------------------------------------------------------------------------------------------
+ sea an hour one night
+An hour of storm to place
+The sculpture of these granite
+(1 row)
+
+ 
+select headline('Erosion It took the sea a thousand years,
+A thousand years to trace
+The granite features of this cliff
+In crag and scarp and base.
+It took the sea an hour one night
+An hour of storm to place
+The sculpture of these granite seams,
+Upon a woman s face. E.  J.  Pratt  (1882 1964)
+', to_tsquery('sea'));
+                                         headline                                          
+-------------------------------------------------------------------------------------------
+ sea a thousand years,
+A thousand years to trace
+The granite features of this cliff
+(1 row)
+


diff --git a/contrib/tsearch2/gendict/Makefile.IN b/contrib/tsearch2/gendict/Makefile.IN

new file mode 100644 (file)

index 0000000..c13e496


--- /dev/null
+++ b/contrib/tsearch2/gendict/Makefile.IN
@@ -0,0 +1,12 @@
+subdir = contrib/CFG_DIR
+top_builddir = ../..
+include $(top_builddir)/src/Makefile.global
+
+MODULE_big = dict_CFG_MODNAME
+OBJS = CFG_OFILE
+DATA_built = dict_CFG_MODNAME.sql
+DOCS = README.CFG_MODNAME
+PG_CPPFLAGS =
+SHLIB_LINK = ../tsearch2/libtsearch2.a
+
+include $(top_srcdir)/contrib/contrib-global.mk


diff --git a/contrib/tsearch2/gendict/README.gendict b/contrib/tsearch2/gendict/README.gendict

new file mode 100644 (file)

index 0000000..e91f1b7


--- /dev/null
+++ b/contrib/tsearch2/gendict/README.gendict
@@ -0,0 +1,130 @@
+Gendict - generate dictionary templates for contrib/tsearch2 module.
+
+This utility aims to help people creating dictionary for contrib/tsearch v2
+module. Particularly, it has built-in support for snowball stemmers.
+
+Programming API to tsearch2 dictionaries is described in tsearch v2 
+documentation.
+
+
+Prerequisities:
+
+* PostgreSQL 7.3 and above.
+
+* You need tsearch2 module sources already compiled
+
+* Rights to install contrib modules
+
+Usage:
+
+    run config.sh without parameters to see options and arguments
+
+Usage:
+./config.sh -n DICTNAME ( [ -s [ -p PREFIX ] ] | [ -c CFILES ] [ -h HFILES ] [ -i ] ) [ -v ] [ -d DIR ] [ -C COMMENT ]
+    -v - be verbose
+    -d DIR - name of directory in PGSQL_SRC/contrib (default dict_DICTNAME)
+    -C COMMENT - dictionary comment
+Generate Snowball stemmer:
+./config.sh -n DICTNAME -s [ -p PREFIX ] [ -v ] [ -d DIR ] [ -C COMMENT ]
+    -s - generate Snowball wrapper
+    -p - prefix of Snowball's function, (default DICTNAME)
+Generate template dictionary:
+./config.sh -n DICTNAME [ -c CFILES ] [ -h HFILES ] [ -i ] [ -v ] [ -d DIR ] [ -C COMMENT ]
+    -c CFILES - source files, must be placed in contrib/tsearch2/gendict directory.
+                These files will be used in Makefile.
+    -h HFILES - header files, must be placed in contrib/tsearch2/gendict directory.
+                These files will be used in Makefile and subinclude.h
+    -i - dictionary has init method
+
+
+Example 1:
+
+   Create Portuguese stemmer
+ 
+   0. cd PGSQL_SRC/contrib/tsearch2/gendict
+
+   1. Obtain stem.{c,h} files for Portuguese
+
+      wget http://snowball.tartarus.org/portuguese/stem.c
+      wget http://snowball.tartarus.org/portuguese/stem.h
+   
+   2. Create template files for Portuguese
+
+      ./config.sh -n pt -s -p portuguese -v -C'Snowball stemmer for Portuguese'
+
+      Note, that argument for -p option should be *the same* as name of stemming
+      function in stem.c (without _stem)
+
+      A bunch of files will be generated and placed in PGSQL_SRC/contrib/dict_pt
+      directory.
+
+   3. Compile and install dictionary
+
+   cd PGSQL_SRC/contrib/dict_pt
+   make
+   make install
+
+   4. Test it 
+
+   Sample portuguese words with the stemmed forms are available
+        from http://snowball.tartarus.org/portuguese/stemmer.html
+
+   createdb testdict
+   psql testdict < /usr/local/pgsql/share/contrib/tsearch2.sql
+   psql testdict < /usr/local/pgsql/share/contrib/dict_pt.sql
+   psql -d testdict -c "select lexize('pt','bobagem');"
+    lexize  
+   ---------
+    {bobag}
+   (1 row)
+
+   Here is what I have in pg_ts_dict table
+
+   psql -d testdict -c "select * from pg_ts_dict where dict_name='pt';"
+    dict_name | dict_init | dict_initoption | dict_lexize |          dict_comment           
+   -----------+-----------+-----------------+-------------+---------------------------------
+    pt        |   7177806 |                 |     7159330 | Snowball stemmer for Portuguese
+   (1 row)
+
+ 
+        Note, that you have already installed dictionary and corresponding
+   entry in tsearch configuration and you may modify it using
+   plain SQL commands, for example, specify stop words.
+
+Example 2:
+
+      a) Simple template dictionary with init method 
+
+       ./config.sh -n wow -v -i -C WOW
+
+      b) Create simple template dict (without init method):
+   ./config.sh -n wow -v  -C WOW
+
+        The same as above, but dictionary will have not init method
+
+       Dictionaries obtained in a) and b) are fully working and ready
+       for use: 
+     a) lowercase input word and remove it if it is a stop word
+     b) recognizes any word
+
+      c) Simple template dictionary with source files (with init method):
+
+       ./config.sh -n wow -v -i -c a.c -h a.h -C WOW
+
+        Source files ( a.c ) must be placed in contrib/tsearch2/gendict directory.
+        These files will be used in Makefile.
+
+        Header files ( a.h ), must be placed in contrib/tsearch2/gendict directory.
+        These files will be used in Makefile and subinclude.h
+
+      d) Simple template dictionary with source files (without init method):
+
+   ./config.sh -n wow -v  -c a.c -h a.h -C WOW
+
+   The same as above, but dictionary will have not init method
+
+       After that you have sources in PGSQL_SRC/contrib/dict_wow and
+       you may edit them to create actual dictionary.
+
+  Please, check Tsearch2 home page (http://www.sai.msu.su/~megera/postgres/gist/tsearch/V2/)
+  for additional information about "Gendict tutorial" and dictionaries.
\ No newline at end of file


diff --git a/contrib/tsearch2/gendict/config.sh b/contrib/tsearch2/gendict/config.sh

new file mode 100755 (executable)

index 0000000..26bb542


--- /dev/null
+++ b/contrib/tsearch2/gendict/config.sh
@@ -0,0 +1,183 @@
+#!/bin/sh
+
+usage () {
+   echo Usage:
+   echo $0 -n DICTNAME  \( [ -s [ -p PREFIX ] ] \| [ -c CFILES ] [ -h HFILES ] [ -i ] \) [ -v ] [ -d DIR ] [ -C COMMENT ]
+   echo '    -v - be verbose'
+   echo '    -d DIR - name of directory in PGSQL_SRL/contrib (default dict_DICTNAME)'
+   echo '    -C COMMENT - dictionary comment' 
+   echo Generate Snowball stemmer:
+   echo $0 -n DICTNAME -s [ -p PREFIX ] [ -v ] [ -d DIR ] [ -C COMMENT ]
+   echo '    -s - generate Snowball wrapper'
+   echo "    -p - prefix of Snowball's function, (default DICTNAME)" 
+   echo Generate template dictionary:
+   echo $0 -n DICTNAME [ -c CFILES ] [ -h HFILES ] [ -i ] [ -v ] [ -d DIR ] [ -C COMMENT ]
+   echo '    -c CFILES - source files, must be placed in contrib/tsearch2/gendict directory.'
+   echo '                These files will be used in Makefile.'
+   echo '    -h HFILES - header files, must be placed in contrib/tsearch2/gendict directory.'
+   echo '                These files will be used in Makefile and subinclude.h'
+   echo '    -i - dictionary has init method'
+   exit 1;
+}
+
+dictname=
+stemmode=no
+verbose=no
+cfile=
+hfile=
+dir= 
+hasinit=no
+comment=
+prefix=
+
+while getopts n:c:C:h:d:p:vis opt
+do
+   case "$opt" in
+       v) verbose=yes;;
+       s) stemmode=yes;;
+       i) hasinit=yes;;
+       n) dictname="$OPTARG";;
+       c) cfile="$OPTARG";;
+       h) hfile="$OPTARG";;
+       d) dir="$OPTARG";;
+       C) comment="$OPTARG";;
+       p) prefix="$OPTARG";;
+       \?) usage;;
+   esac
+done
+
+[ ${#dictname} -eq 0 ] && usage
+
+dictname=`echo $dictname | tr '[:upper:]' '[:lower:]'`
+
+if [ $stemmode = "yes" ] ; then 
+   [ ${#prefix} -eq 0 ] && prefix=$dictname
+   hasinit=yes
+   cfile="stem.c"
+   hfile="stem.h"
+fi 
+
+[ ${#dir}   -eq 0 ] && dir="dict_$dictname"
+
+if [ ${#comment} -eq 0 ]; then
+   comment=null
+else
+   comment="'$comment'"
+fi
+
+ofile=
+for f in $cfile
+do
+   f=` echo $f | sed 's#c$#o#'`
+   ofile="$ofile $f"
+done
+
+if [ $stemmode = "yes" ] ; then
+   ofile="$ofile dict_snowball.o"
+else
+   ofile="$ofile dict_tmpl.o"
+fi
+
+if [ $verbose = "yes" ]; then
+   echo Dictname: "'"$dictname"'"
+   echo Snowball stemmer: $stemmode
+   echo Has init method: $hasinit
+   [ $stemmode = "yes" ] && echo Function prefix: $prefix 
+   echo Source files: $cfile
+   echo Header files: $hfile
+   echo Object files: $ofile
+   echo Comment: $comment
+   echo Directory: ../../$dir
+fi
+
+
+[ $verbose = "yes" ] && echo -n 'Build directory...  '
+if [ ! -d ../../$dir ]; then
+   if ! mkdir ../../$dir ; then 
+       echo "Can't create directory ../../$dir"
+       exit 1
+   fi 
+fi
+[ $verbose = "yes" ] && echo ok
+
+
+[ $verbose = "yes" ] && echo -n 'Build Makefile...  '
+sed s#CFG_DIR#$dir# < Makefile.IN | sed s#CFG_MODNAME#$dictname# | sed "s#CFG_OFILE#$ofile#" > ../../$dir/Makefile.tmp
+if [ $stemmode = "yes" ] ; then
+   sed "s#^PG_CPPFLAGS.*\$#PG_CPPFLAGS = -I../tsearch2/snowball -I../tsearch2#" < ../../$dir/Makefile.tmp >  ../../$dir/Makefile 
+else
+   sed "s#^PG_CPPFLAGS.*\$#PG_CPPFLAGS = -I../tsearch2#" < ../../$dir/Makefile.tmp >  ../../$dir/Makefile 
+fi
+rm ../../$dir/Makefile.tmp
+[ $verbose = "yes" ] && echo ok
+
+
+[ $verbose = "yes" ] && echo -n Build dict_$dictname'.sql.in...  '
+if [ $hasinit = "yes" ]; then
+   sed s#CFG_MODNAME#$dictname# < sql.IN | sed "s#CFG_COMMENT#$comment#" | sed s#^HASINIT## | sed 's#^NOINIT.*$##' > ../../$dir/dict_$dictname.sql.in.tmp
+   if [ $stemmode = "yes" ] ; then
+       sed s#^ISSNOWBALL## < ../../$dir/dict_$dictname.sql.in.tmp | sed s#^NOSNOWBALL.*\$## > ../../$dir/dict_$dictname.sql.in
+   else
+       sed s#^NOSNOWBALL## < ../../$dir/dict_$dictname.sql.in.tmp | sed s#^ISSNOWBALL.*\$## > ../../$dir/dict_$dictname.sql.in
+   fi
+   rm ../../$dir/dict_$dictname.sql.in.tmp 
+else 
+   sed s#CFG_MODNAME#$dictname# < sql.IN | sed "s#CFG_COMMENT#$comment#" | sed s#^NOINIT## | sed 's#^HASINIT.*$##' | sed s#^NOSNOWBALL## | sed s#^ISSNOWBALL.*\$## > ../../$dir/dict_$dictname.sql.in
+fi
+[ $verbose = "yes" ] && echo ok
+
+
+
+if [ ${#cfile} -ne 0 ] || [ ${#hfile} -ne 0 ] ; then
+   [ $verbose = "yes" ] && echo -n 'Copy source and header files...  '
+   if [ ${#cfile} -ne 0 ] ; then
+       if ! cp $cfile ../../$dir ; then 
+           echo "Cant cp all or one of files: $cfile"
+           exit 1
+       fi
+   fi
+   if [ ${#hfile} -ne 0 ] ; then 
+       if ! cp $hfile ../../$dir ; then 
+               echo "Cant cp all or one of files: $hfile"
+           exit 1
+       fi
+   fi
+   [ $verbose = "yes" ] && echo ok
+fi
+
+
+[ $verbose = "yes" ] && echo -n 'Build sub-include header...  '
+echo -n > ../../$dir/subinclude.h 
+for i in $hfile
+do
+   echo "#include \"$i\"" >> ../../$dir/subinclude.h
+done
+[ $verbose = "yes" ] && echo ok
+
+
+if  [ $stemmode = "yes" ] ; then 
+   [ $verbose = "yes" ] && echo -n 'Build Snowball stemmer...  '
+   sed s#CFG_MODNAME#$dictname#g < dict_snowball.c.IN | sed s#CFG_PREFIX#$prefix#g > ../../$dir/dict_snowball.c
+else
+   [ $verbose = "yes" ] && echo -n 'Build dictinonary...  '
+   sed s#CFG_MODNAME#$dictname#g < dict_tmpl.c.IN > ../../$dir/dict_tmpl.c.tmp
+   if [ $hasinit = "yes" ]; then
+       sed s#^HASINIT## <  ../../$dir/dict_tmpl.c.tmp | sed 's#^NOINIT.*$##' > ../../$dir/dict_tmpl.c
+   else 
+       sed s#^HASINIT.*\$## <  ../../$dir/dict_tmpl.c.tmp | sed 's#^NOINIT##' > ../../$dir/dict_tmpl.c
+   fi
+   rm ../../$dir/dict_tmpl.c.tmp
+fi 
+[ $verbose = "yes" ] && echo ok
+
+
+[ $verbose = "yes" ] && echo -n "Build README.$dictname...  "
+if  [ $stemmode = "yes" ] ; then
+   echo "Autogenerated Snowball's wrapper for $prefix" > ../../$dir/README.$dictname
+else
+   echo "Autogenerated template for $dictname" > ../../$dir/README.$dictname
+fi
+[ $verbose = "yes" ] && echo ok
+
+echo All is done
+


diff --git a/contrib/tsearch2/gendict/dict_snowball.c.IN b/contrib/tsearch2/gendict/dict_snowball.c.IN

new file mode 100644 (file)

index 0000000..10ef6f1


--- /dev/null
+++ b/contrib/tsearch2/gendict/dict_snowball.c.IN
@@ -0,0 +1,52 @@
+/* 
+ * example of Snowball dictionary
+ * http://snowball.tartarus.org/ 
+ * Teodor Sigaev 
+ */
+#include 
+#include 
+
+#include "postgres.h"
+
+#include "dict.h"
+#include "common.h"
+#include "snowball/header.h"
+#include "subinclude.h"
+
+typedef struct {
+   struct SN_env *z;
+   StopList    stoplist;
+   int (*stem)(struct SN_env * z);
+} DictSnowball;
+
+
+PG_FUNCTION_INFO_V1(dinit_CFG_MODNAME);
+Datum dinit_CFG_MODNAME(PG_FUNCTION_ARGS);
+
+Datum 
+dinit_CFG_MODNAME(PG_FUNCTION_ARGS) {
+   DictSnowball    *d = (DictSnowball*)malloc( sizeof(DictSnowball) );
+
+   if ( !d )
+       elog(ERROR, "No memory");
+   memset(d,0,sizeof(DictSnowball));
+   d->stoplist.wordop=lowerstr;
+       
+   if ( !PG_ARGISNULL(0) && PG_GETARG_POINTER(0)!=NULL ) {
+       text       *in = PG_GETARG_TEXT_P(0);
+       readstoplist(in, &(d->stoplist));
+       sortstoplist(&(d->stoplist));
+       PG_FREE_IF_COPY(in, 0);
+   }
+
+   d->z = CFG_PREFIX_create_env();
+   if (!d->z) {
+       freestoplist(&(d->stoplist));
+       elog(ERROR,"No memory");
+   }
+   d->stem=CFG_PREFIX_stem;
+
+   PG_RETURN_POINTER(d);
+}
+
+


diff --git a/contrib/tsearch2/gendict/dict_tmpl.c.IN b/contrib/tsearch2/gendict/dict_tmpl.c.IN

new file mode 100644 (file)

index 0000000..10c0381


--- /dev/null
+++ b/contrib/tsearch2/gendict/dict_tmpl.c.IN
@@ -0,0 +1,64 @@
+/* 
+ * example of dictionary 
+ * Teodor Sigaev 
+ */
+#include 
+#include 
+#include 
+
+#include "postgres.h"
+
+#include "dict.h"
+#include "common.h"
+
+#include "subinclude.h"
+
+HASINIT typedef struct {
+HASINIT    StopList    stoplist;
+HASINIT } DictExample;
+
+
+HASINIT PG_FUNCTION_INFO_V1(dinit_CFG_MODNAME);
+HASINIT Datum dinit_CFG_MODNAME(PG_FUNCTION_ARGS);
+
+HASINIT Datum 
+HASINIT dinit_CFG_MODNAME(PG_FUNCTION_ARGS) {
+HASINIT    DictExample *d = (DictExample*)malloc( sizeof(DictExample) );
+HASINIT 
+HASINIT    if ( !d )
+HASINIT        elog(ERROR, "No memory");
+HASINIT    memset(d,0,sizeof(DictExample));
+HASINIT 
+HASINIT    d->stoplist.wordop=lowerstr;
+HASINIT    
+HASINIT    /* Your INIT code */
+HASINIT    
+HASINIT    if ( !PG_ARGISNULL(0) && PG_GETARG_POINTER(0)!=NULL ) {
+HASINIT        text       *in = PG_GETARG_TEXT_P(0);
+HASINIT        readstoplist(in, &(d->stoplist));
+HASINIT        sortstoplist(&(d->stoplist));
+HASINIT        PG_FREE_IF_COPY(in, 0);
+HASINIT    }
+HASINIT 
+HASINIT    PG_RETURN_POINTER(d);
+HASINIT }
+
+PG_FUNCTION_INFO_V1(dlexize_CFG_MODNAME);
+Datum dlexize_CFG_MODNAME(PG_FUNCTION_ARGS);
+Datum
+dlexize_CFG_MODNAME(PG_FUNCTION_ARGS) {
+HASINIT    DictExample *d = (DictExample*)PG_GETARG_POINTER(0);
+   char       *in = (char*)PG_GETARG_POINTER(1);
+   char *txt = pnstrdup(in, PG_GETARG_INT32(2));
+   char    **res=palloc(sizeof(char*)*2);
+
+   /* Your INIT dictionary code */
+HASINIT    if ( *txt=='\0' || searchstoplist(&(d->stoplist),txt) ) {
+HASINIT        pfree(txt);
+HASINIT        res[0]=NULL;
+HASINIT    } else 
+       res[0]=txt;
+   res[1]=NULL;
+
+   PG_RETURN_POINTER(res);
+}


diff --git a/contrib/tsearch2/gendict/sql.IN b/contrib/tsearch2/gendict/sql.IN

new file mode 100644 (file)

index 0000000..ff0d842


--- /dev/null
+++ b/contrib/tsearch2/gendict/sql.IN
@@ -0,0 +1,26 @@
+SET search_path = public;
+BEGIN;
+
+HASINIT create function dinit_CFG_MODNAME(text)
+HASINIT         returns internal
+HASINIT         as 'MODULE_PATHNAME'
+HASINIT         language 'C';
+
+NOSNOWBALL create function dlexize_CFG_MODNAME(internal,internal,int4)
+NOSNOWBALL        returns internal
+NOSNOWBALL        as 'MODULE_PATHNAME'
+NOSNOWBALL        language 'C'
+NOSNOWBALL        with (isstrict);
+
+insert into pg_ts_dict select
+        'CFG_MODNAME',
+HASINIT        (select oid from pg_proc where proname='dinit_CFG_MODNAME'),
+NOINIT        null,
+        null,
+ISSNOWBALL        (select oid from pg_proc where proname='snb_lexize'),
+NOSNOWBALL        (select oid from pg_proc where proname='dlexize_CFG_MODNAME'),
+        CFG_COMMENT
+;
+
+
+END;


diff --git a/contrib/tsearch2/gistidx.c b/contrib/tsearch2/gistidx.c

new file mode 100644 (file)

index 0000000..5a34f74


--- /dev/null
+++ b/contrib/tsearch2/gistidx.c
@@ -0,0 +1,686 @@
+#include "postgres.h"
+
+#include 
+
+#include "access/gist.h"
+#include "access/itup.h"
+#include "access/rtree.h"
+#include "utils/elog.h"
+#include "utils/palloc.h"
+#include "utils/array.h"
+#include "utils/builtins.h"
+#include "storage/bufpage.h"
+#include "access/tuptoaster.h"
+
+#include "tsvector.h"
+#include "query.h"
+#include "gistidx.h"
+#include "crc32.h"
+
+PG_FUNCTION_INFO_V1(gtsvector_in);
+Datum      gtsvector_in(PG_FUNCTION_ARGS);
+
+PG_FUNCTION_INFO_V1(gtsvector_out);
+Datum      gtsvector_out(PG_FUNCTION_ARGS);
+
+PG_FUNCTION_INFO_V1(gtsvector_compress);
+Datum      gtsvector_compress(PG_FUNCTION_ARGS);
+
+PG_FUNCTION_INFO_V1(gtsvector_decompress);
+Datum      gtsvector_decompress(PG_FUNCTION_ARGS);
+
+PG_FUNCTION_INFO_V1(gtsvector_consistent);
+Datum      gtsvector_consistent(PG_FUNCTION_ARGS);
+
+PG_FUNCTION_INFO_V1(gtsvector_union);
+Datum      gtsvector_union(PG_FUNCTION_ARGS);
+
+PG_FUNCTION_INFO_V1(gtsvector_same);
+Datum      gtsvector_same(PG_FUNCTION_ARGS);
+
+PG_FUNCTION_INFO_V1(gtsvector_penalty);
+Datum      gtsvector_penalty(PG_FUNCTION_ARGS);
+
+PG_FUNCTION_INFO_V1(gtsvector_picksplit);
+Datum      gtsvector_picksplit(PG_FUNCTION_ARGS);
+
+#define GETENTRY(vec,pos) ((GISTTYPE *) DatumGetPointer(((GISTENTRY *) VARDATA(vec))[(pos)].key))
+#define SUMBIT(val) (       \
+   GETBITBYTE(val,0) + \
+   GETBITBYTE(val,1) + \
+   GETBITBYTE(val,2) + \
+   GETBITBYTE(val,3) + \
+   GETBITBYTE(val,4) + \
+   GETBITBYTE(val,5) + \
+   GETBITBYTE(val,6) + \
+   GETBITBYTE(val,7)   \
+)
+
+
+Datum
+gtsvector_in(PG_FUNCTION_ARGS)
+{
+   elog(ERROR, "Not implemented");
+   PG_RETURN_DATUM(0);
+}
+
+Datum
+gtsvector_out(PG_FUNCTION_ARGS)
+{
+   elog(ERROR, "Not implemented");
+   PG_RETURN_DATUM(0);
+}
+
+static int
+compareint(const void *a, const void *b)
+{
+   if (*((int4 *) a) == *((int4 *) b))
+       return 0;
+   return (*((int4 *) a) > *((int4 *) b)) ? 1 : -1;
+}
+
+static int
+uniqueint(int4 *a, int4 l)
+{
+   int4       *ptr,
+              *res;
+
+   if (l == 1)
+       return l;
+
+   ptr = res = a;
+
+   qsort((void *) a, l, sizeof(int4), compareint);
+
+   while (ptr - a < l)
+       if (*ptr != *res)
+           *(++res) = *ptr++;
+       else
+           ptr++;
+   return res + 1 - a;
+}
+
+static void
+makesign(BITVECP sign, GISTTYPE * a)
+{
+   int4        k,
+               len = ARRNELEM(a);
+   int4       *ptr = GETARR(a);
+
+   MemSet((void *) sign, 0, sizeof(BITVEC));
+   for (k = 0; k < len; k++)
+       HASH(sign, ptr[k]);
+}
+
+Datum
+gtsvector_compress(PG_FUNCTION_ARGS)
+{
+   GISTENTRY  *entry = (GISTENTRY *) PG_GETARG_POINTER(0);
+   GISTENTRY  *retval = entry;
+
+   if (entry->leafkey)
+   {                           /* tsvector */
+       GISTTYPE   *res;
+       tsvector       *toastedval = (tsvector *) DatumGetPointer(entry->key);
+       tsvector       *val = (tsvector *) DatumGetPointer(PG_DETOAST_DATUM(entry->key));
+       int4        len;
+       int4       *arr;
+       WordEntry  *ptr = ARRPTR(val);
+       char       *words = STRPTR(val);
+
+       len = CALCGTSIZE(ARRKEY, val->size);
+       res = (GISTTYPE *) palloc(len);
+       res->len = len;
+       res->flag = ARRKEY;
+       arr = GETARR(res);
+       len = val->size;
+       while (len--)
+       {
+           *arr = crc32_sz((uint8 *) &words[ptr->pos], ptr->len);
+           arr++;
+           ptr++;
+       }
+
+       len = uniqueint(GETARR(res), val->size);
+       if (len != val->size)
+       {
+           /*
+            * there is a collision of hash-function; len is always less
+            * than val->size
+            */
+           len = CALCGTSIZE(ARRKEY, len);
+           res = (GISTTYPE *) repalloc((void *) res, len);
+           res->len = len;
+       }
+       if (val != toastedval)
+           pfree(val);
+
+       /* make signature, if array is too long */
+       if (res->len > TOAST_INDEX_TARGET)
+       {
+           GISTTYPE   *ressign;
+
+           len = CALCGTSIZE(SIGNKEY, 0);
+           ressign = (GISTTYPE *) palloc(len);
+           ressign->len = len;
+           ressign->flag = SIGNKEY;
+           makesign(GETSIGN(ressign), res);
+           pfree(res);
+           res = ressign;
+       }
+
+       retval = (GISTENTRY *) palloc(sizeof(GISTENTRY));
+       gistentryinit(*retval, PointerGetDatum(res),
+                     entry->rel, entry->page,
+                     entry->offset, res->len, FALSE);
+   }
+   else if (ISSIGNKEY(DatumGetPointer(entry->key)) &&
+            !ISALLTRUE(DatumGetPointer(entry->key)))
+   {
+       int4        i,
+                   len;
+       GISTTYPE   *res;
+       BITVECP     sign = GETSIGN(DatumGetPointer(entry->key));
+
+       LOOPBYTE(
+                if ((sign[i] & 0xff) != 0xff)
+                PG_RETURN_POINTER(retval);
+       );
+
+       len = CALCGTSIZE(SIGNKEY | ALLISTRUE, 0);
+       res = (GISTTYPE *) palloc(len);
+       res->len = len;
+       res->flag = SIGNKEY | ALLISTRUE;
+
+       retval = (GISTENTRY *) palloc(sizeof(GISTENTRY));
+       gistentryinit(*retval, PointerGetDatum(res),
+                     entry->rel, entry->page,
+                     entry->offset, res->len, FALSE);
+   }
+   PG_RETURN_POINTER(retval);
+}
+
+Datum
+gtsvector_decompress(PG_FUNCTION_ARGS)
+{
+   GISTENTRY  *entry = (GISTENTRY *) PG_GETARG_POINTER(0);
+   GISTTYPE   *key = (GISTTYPE *) DatumGetPointer(PG_DETOAST_DATUM(entry->key));
+
+   if (key != (GISTTYPE *) DatumGetPointer(entry->key))
+   {
+       GISTENTRY  *retval = (GISTENTRY *) palloc(sizeof(GISTENTRY));
+
+       gistentryinit(*retval, PointerGetDatum(key),
+                     entry->rel, entry->page,
+                     entry->offset, key->len, FALSE);
+
+       PG_RETURN_POINTER(retval);
+   }
+
+   PG_RETURN_POINTER(entry);
+}
+
+typedef struct
+{
+   int4       *arrb;
+   int4       *arre;
+}  CHKVAL;
+
+/*
+ * is there value 'val' in array or not ?
+ */
+static bool
+checkcondition_arr(void *checkval, ITEM * val)
+{
+   int4       *StopLow = ((CHKVAL *) checkval)->arrb;
+   int4       *StopHigh = ((CHKVAL *) checkval)->arre;
+   int4       *StopMiddle;
+
+   /* Loop invariant: StopLow <= val < StopHigh */
+
+   while (StopLow < StopHigh)
+   {
+       StopMiddle = StopLow + (StopHigh - StopLow) / 2;
+       if (*StopMiddle == val->val)
+           return (true);
+       else if (*StopMiddle < val->val)
+           StopLow = StopMiddle + 1;
+       else
+           StopHigh = StopMiddle;
+   }
+
+   return (false);
+}
+
+static bool
+checkcondition_bit(void *checkval, ITEM * val)
+{
+   return GETBIT(checkval, HASHVAL(val->val));
+}
+
+Datum
+gtsvector_consistent(PG_FUNCTION_ARGS)
+{
+   QUERYTYPE  *query = (QUERYTYPE *) PG_GETARG_POINTER(1);
+   GISTTYPE   *key = (GISTTYPE *) DatumGetPointer(
+                               ((GISTENTRY *) PG_GETARG_POINTER(0))->key
+   );
+
+   if (!query->size)
+       PG_RETURN_BOOL(false);
+
+   if (ISSIGNKEY(key))
+   {
+       if (ISALLTRUE(key))
+           PG_RETURN_BOOL(true);
+
+       PG_RETURN_BOOL(TS_execute(
+                              GETQUERY(query),
+                              (void *) GETSIGN(key), false,
+                              checkcondition_bit
+                              ));
+   }
+   else
+   {                           /* only leaf pages */
+       CHKVAL      chkval;
+
+       chkval.arrb = GETARR(key);
+       chkval.arre = chkval.arrb + ARRNELEM(key);
+       PG_RETURN_BOOL(TS_execute(
+                              GETQUERY(query),
+                              (void *) &chkval, true,
+                              checkcondition_arr
+                              ));
+   }
+}
+
+static int4
+unionkey(BITVECP sbase, GISTTYPE * add)
+{
+   int4        i;
+
+   if (ISSIGNKEY(add))
+   {
+       BITVECP     sadd = GETSIGN(add);
+
+       if (ISALLTRUE(add))
+           return 1;
+
+       LOOPBYTE(
+                sbase[i] |= sadd[i];
+       );
+   }
+   else
+   {
+       int4       *ptr = GETARR(add);
+
+       for (i = 0; i < ARRNELEM(add); i++)
+           HASH(sbase, ptr[i]);
+   }
+   return 0;
+}
+
+
+Datum
+gtsvector_union(PG_FUNCTION_ARGS)
+{
+   bytea      *entryvec = (bytea *) PG_GETARG_POINTER(0);
+   int        *size = (int *) PG_GETARG_POINTER(1);
+   BITVEC      base;
+   int4        len = (VARSIZE(entryvec) - VARHDRSZ) / sizeof(GISTENTRY);
+   int4        i;
+   int4        flag = 0;
+   GISTTYPE   *result;
+
+   MemSet((void *) base, 0, sizeof(BITVEC));
+   for (i = 0; i < len; i++)
+   {
+       if (unionkey(base, GETENTRY(entryvec, i)))
+       {
+           flag = ALLISTRUE;
+           break;
+       }
+   }
+
+   flag |= SIGNKEY;
+   len = CALCGTSIZE(flag, 0);
+   result = (GISTTYPE *) palloc(len);
+   *size = result->len = len;
+   result->flag = flag;
+   if (!ISALLTRUE(result))
+       memcpy((void *) GETSIGN(result), (void *) base, sizeof(BITVEC));
+
+   PG_RETURN_POINTER(result);
+}
+
+Datum
+gtsvector_same(PG_FUNCTION_ARGS)
+{
+   GISTTYPE   *a = (GISTTYPE *) PG_GETARG_POINTER(0);
+   GISTTYPE   *b = (GISTTYPE *) PG_GETARG_POINTER(1);
+   bool       *result = (bool *) PG_GETARG_POINTER(2);
+
+   if (ISSIGNKEY(a))
+   {                           /* then b also ISSIGNKEY */
+       if (ISALLTRUE(a) && ISALLTRUE(b))
+           *result = true;
+       else if (ISALLTRUE(a))
+           *result = false;
+       else if (ISALLTRUE(b))
+           *result = false;
+       else
+       {
+           int4        i;
+           BITVECP     sa = GETSIGN(a),
+                       sb = GETSIGN(b);
+
+           *result = true;
+           LOOPBYTE(
+                    if (sa[i] != sb[i])
+                    {
+               *result = false;
+               break;
+           }
+           );
+       }
+   }
+   else
+   {                           /* a and b ISARRKEY */
+       int4        lena = ARRNELEM(a),
+                   lenb = ARRNELEM(b);
+
+       if (lena != lenb)
+           *result = false;
+       else
+       {
+           int4       *ptra = GETARR(a),
+                      *ptrb = GETARR(b);
+           int4        i;
+
+           *result = true;
+           for (i = 0; i < lena; i++)
+               if (ptra[i] != ptrb[i])
+               {
+                   *result = false;
+                   break;
+               }
+       }
+   }
+
+   PG_RETURN_POINTER(result);
+}
+
+static int4
+sizebitvec(BITVECP sign)
+{
+   int4        size = 0,
+               i;
+
+   LOOPBYTE(
+       size += SUMBIT(*(char *) sign);
+       sign = (BITVECP) (((char *) sign) + 1);
+   );
+   return size;
+}
+
+static int
+hemdistsign(BITVECP  a, BITVECP b) {
+   int i,dist=0;
+
+   LOOPBIT(
+       if ( GETBIT(a,i) != GETBIT(b,i) )
+           dist++;
+   );
+   return dist;
+}
+
+static int
+hemdist(GISTTYPE   *a, GISTTYPE   *b) {
+   if ( ISALLTRUE(a) ) {
+       if (ISALLTRUE(b))
+           return 0;
+       else
+           return SIGLENBIT-sizebitvec(GETSIGN(b));
+   } else if (ISALLTRUE(b))
+       return SIGLENBIT-sizebitvec(GETSIGN(a));
+
+   return hemdistsign( GETSIGN(a), GETSIGN(b) );
+}
+
+Datum
+gtsvector_penalty(PG_FUNCTION_ARGS)
+{
+   GISTENTRY  *origentry = (GISTENTRY *) PG_GETARG_POINTER(0); /* always ISSIGNKEY */
+   GISTENTRY  *newentry = (GISTENTRY *) PG_GETARG_POINTER(1);
+   float      *penalty = (float *) PG_GETARG_POINTER(2);
+   GISTTYPE   *origval = (GISTTYPE *) DatumGetPointer(origentry->key);
+   GISTTYPE   *newval = (GISTTYPE *) DatumGetPointer(newentry->key);
+   BITVECP     orig = GETSIGN(origval);
+
+   *penalty = 0.0;
+
+   if (ISARRKEY(newval)) {
+       BITVEC sign;
+       makesign(sign, newval);
+
+       if ( ISALLTRUE(origval) ) 
+           *penalty=((float)(SIGLENBIT-sizebitvec(sign)))/(float)(SIGLENBIT+1);
+       else 
+           *penalty=hemdistsign(sign,orig);
+   } else {
+       *penalty=hemdist(origval,newval);
+   }
+   PG_RETURN_POINTER(penalty);
+}
+
+typedef struct
+{
+   bool        allistrue;
+   BITVEC      sign;
+}  CACHESIGN;
+
+static void
+fillcache(CACHESIGN * item, GISTTYPE * key)
+{
+   item->allistrue = false;
+   if (ISARRKEY(key))
+       makesign(item->sign, key);
+   else if (ISALLTRUE(key))
+       item->allistrue = true;
+   else
+       memcpy((void *) item->sign, (void *) GETSIGN(key), sizeof(BITVEC));
+}
+
+#define WISH_F(a,b,c) (double)( -(double)(((a)-(b))*((a)-(b))*((a)-(b)))*(c) )
+typedef struct
+{
+   OffsetNumber pos;
+   int4        cost;
+} SPLITCOST;
+
+static int
+comparecost(const void *a, const void *b)
+{
+   if (((SPLITCOST *) a)->cost == ((SPLITCOST *) b)->cost)
+       return 0;
+   else
+       return (((SPLITCOST *) a)->cost > ((SPLITCOST *) b)->cost) ? 1 : -1;
+}
+
+
+static int
+hemdistcache(CACHESIGN   *a, CACHESIGN   *b) {
+   if ( a->allistrue ) {
+       if (b->allistrue)
+           return 0;
+       else
+           return SIGLENBIT-sizebitvec(b->sign);
+   } else if (b->allistrue)
+       return SIGLENBIT-sizebitvec(a->sign);
+
+   return hemdistsign( a->sign, b->sign );
+}
+
+Datum
+gtsvector_picksplit(PG_FUNCTION_ARGS)
+{
+   bytea      *entryvec = (bytea *) PG_GETARG_POINTER(0);
+   GIST_SPLITVEC *v = (GIST_SPLITVEC *) PG_GETARG_POINTER(1);
+   OffsetNumber k,
+               j;
+   GISTTYPE   *datum_l,
+              *datum_r;
+   BITVECP     union_l,
+               union_r;
+   int4        size_alpha,
+               size_beta;
+   int4        size_waste,
+               waste = -1;
+   int4        nbytes;
+   OffsetNumber seed_1 = 0,
+               seed_2 = 0;
+   OffsetNumber *left,
+              *right;
+   OffsetNumber maxoff;
+   BITVECP     ptr;
+   int         i;
+   CACHESIGN  *cache;
+   SPLITCOST  *costvector;
+
+   maxoff = ((VARSIZE(entryvec) - VARHDRSZ) / sizeof(GISTENTRY)) - 2;
+   nbytes = (maxoff + 2) * sizeof(OffsetNumber);
+   v->spl_left = (OffsetNumber *) palloc(nbytes);
+   v->spl_right = (OffsetNumber *) palloc(nbytes);
+
+   cache = (CACHESIGN *) palloc(sizeof(CACHESIGN) * (maxoff + 2));
+   fillcache(&cache[FirstOffsetNumber], GETENTRY(entryvec, FirstOffsetNumber));
+
+   for (k = FirstOffsetNumber; k < maxoff; k = OffsetNumberNext(k)) {
+       for (j = OffsetNumberNext(k); j <= maxoff; j = OffsetNumberNext(j)) {
+           if (k == FirstOffsetNumber)
+               fillcache(&cache[j], GETENTRY(entryvec, j));
+
+           size_waste=hemdistcache(&(cache[j]),&(cache[k]));
+           if (size_waste > waste) {
+               waste = size_waste;
+               seed_1 = k;
+               seed_2 = j;
+           }
+       }
+   }
+
+   left = v->spl_left;
+   v->spl_nleft = 0;
+   right = v->spl_right;
+   v->spl_nright = 0;
+
+   if (seed_1 == 0 || seed_2 == 0) {
+       seed_1 = 1;
+       seed_2 = 2;
+   }
+
+   /* form initial .. */
+   if (cache[seed_1].allistrue) {
+       datum_l = (GISTTYPE *) palloc(CALCGTSIZE(SIGNKEY | ALLISTRUE, 0));
+       datum_l->len = CALCGTSIZE(SIGNKEY | ALLISTRUE, 0);
+       datum_l->flag = SIGNKEY | ALLISTRUE;
+   } else {
+       datum_l = (GISTTYPE *) palloc(CALCGTSIZE(SIGNKEY, 0));
+       datum_l->len = CALCGTSIZE(SIGNKEY, 0);
+       datum_l->flag = SIGNKEY;
+       memcpy((void *) GETSIGN(datum_l), (void *) cache[seed_1].sign, sizeof(BITVEC));
+   }
+   if (cache[seed_2].allistrue) {
+       datum_r = (GISTTYPE *) palloc(CALCGTSIZE(SIGNKEY | ALLISTRUE, 0));
+       datum_r->len = CALCGTSIZE(SIGNKEY | ALLISTRUE, 0);
+       datum_r->flag = SIGNKEY | ALLISTRUE;
+   } else {
+       datum_r = (GISTTYPE *) palloc(CALCGTSIZE(SIGNKEY, 0));
+       datum_r->len = CALCGTSIZE(SIGNKEY, 0);
+       datum_r->flag = SIGNKEY;
+       memcpy((void *) GETSIGN(datum_r), (void *) cache[seed_2].sign, sizeof(BITVEC));
+   }
+
+   union_l=GETSIGN(datum_l);
+   union_r=GETSIGN(datum_r);
+   maxoff = OffsetNumberNext(maxoff);
+   fillcache(&cache[maxoff], GETENTRY(entryvec, maxoff));
+   /* sort before ... */
+   costvector = (SPLITCOST *) palloc(sizeof(SPLITCOST) * maxoff);
+   for (j = FirstOffsetNumber; j <= maxoff; j = OffsetNumberNext(j)) {
+       costvector[j - 1].pos = j;
+       size_alpha = hemdistcache(&(cache[seed_1]), &(cache[j]));
+       size_beta  = hemdistcache(&(cache[seed_2]), &(cache[j]));
+       costvector[j - 1].cost = abs(size_alpha - size_beta);
+   }
+   qsort((void *) costvector, maxoff, sizeof(SPLITCOST), comparecost);
+
+   for (k = 0; k < maxoff; k++) {
+       j = costvector[k].pos;
+       if (j == seed_1) {
+           *left++ = j;
+           v->spl_nleft++;
+           continue;
+       } else if (j == seed_2) {
+           *right++ = j;
+           v->spl_nright++;
+           continue;
+       }
+
+       if (ISALLTRUE(datum_l) || cache[j].allistrue) {
+           if ( ISALLTRUE(datum_l) && cache[j].allistrue )
+               size_alpha=0;
+           else
+               size_alpha = SIGLENBIT-sizebitvec(  
+                   ( cache[j].allistrue ) ? GETSIGN(datum_l) : GETSIGN(cache[j].sign)  
+               );
+       } else {
+           size_alpha=hemdistsign(cache[j].sign,GETSIGN(datum_l));
+       }
+
+       if (ISALLTRUE(datum_r) || cache[j].allistrue) {
+           if ( ISALLTRUE(datum_r) && cache[j].allistrue )
+               size_beta=0;
+           else
+               size_beta = SIGLENBIT-sizebitvec(  
+                   ( cache[j].allistrue ) ? GETSIGN(datum_r) : GETSIGN(cache[j].sign)  
+               );
+       } else {
+           size_beta=hemdistsign(cache[j].sign,GETSIGN(datum_r));
+       }
+
+       if (size_alpha  < size_beta + WISH_F(v->spl_nleft, v->spl_nright, 0.1)) {
+           if (ISALLTRUE(datum_l) || cache[j].allistrue) {
+               if (! ISALLTRUE(datum_l) )
+                   MemSet((void *) GETSIGN(datum_l), 0xff, sizeof(BITVEC));
+           } else {
+               ptr=cache[j].sign;
+               LOOPBYTE(
+                   union_l[i] |= ptr[i];
+               );
+           }
+           *left++ = j;
+           v->spl_nleft++;
+       } else {
+           if (ISALLTRUE(datum_r) || cache[j].allistrue) {
+               if (! ISALLTRUE(datum_r) )
+                   MemSet((void *) GETSIGN(datum_r), 0xff, sizeof(BITVEC));
+           } else {
+               ptr=cache[j].sign;
+               LOOPBYTE(
+                   union_r[i] |= ptr[i];
+               );
+           }
+           *right++ = j;
+           v->spl_nright++;
+       }
+   }
+
+   *right = *left = FirstOffsetNumber;
+   pfree(costvector);
+   pfree(cache);
+   v->spl_ldatum = PointerGetDatum(datum_l);
+   v->spl_rdatum = PointerGetDatum(datum_r);
+
+   PG_RETURN_POINTER(v);
+}


diff --git a/contrib/tsearch2/gistidx.h b/contrib/tsearch2/gistidx.h

new file mode 100644 (file)

index 0000000..d081c74


--- /dev/null
+++ b/contrib/tsearch2/gistidx.h
@@ -0,0 +1,67 @@
+#ifndef __GISTIDX_H__
+#define __GISTIDX_H__
+
+/*
+#define GISTIDX_DEBUG
+*/
+
+/*
+ * signature defines
+ */
+
+#define BITBYTE 8
+#define SIGLENINT  63          /* >121 => key will toast, so it will not
+                                * work !!! */
+#define SIGLEN ( sizeof(int4)*SIGLENINT )
+#define SIGLENBIT (SIGLEN*BITBYTE)
+
+typedef char BITVEC[SIGLEN];
+typedef char *BITVECP;
+
+#define LOOPBYTE(a) \
+       for(i=0;i
+               a;\
+       }
+#define LOOPBIT(a) \
+               for(i=0;i
+                               a;\
+               }
+
+#define GETBYTE(x,i) ( *( (BITVECP)(x) + (int)( (i) / BITBYTE ) ) )
+#define GETBITBYTE(x,i) ( ((char)(x)) >> i & 0x01 )
+#define CLRBIT(x,i)   GETBYTE(x,i) &= ~( 0x01 << ( (i) % BITBYTE ) )
+#define SETBIT(x,i)   GETBYTE(x,i) |=  ( 0x01 << ( (i) % BITBYTE ) )
+#define GETBIT(x,i) ( (GETBYTE(x,i) >> ( (i) % BITBYTE )) & 0x01 )
+
+#define abs(a)         ((a) <  (0) ? -(a) : (a))
+#define min(a,b)           ((a) <  (b) ? (a) : (b))
+#define HASHVAL(val) (((unsigned int)(val)) % SIGLENBIT)
+#define HASH(sign, val) SETBIT((sign), HASHVAL(val))
+
+
+/*
+ * type of index key
+ */
+typedef struct
+{
+   int4        len;
+   int4        flag;
+   char        data[1];
+}  GISTTYPE;
+
+#define ARRKEY     0x01
+#define SIGNKEY        0x02
+#define ALLISTRUE  0x04
+
+#define ISARRKEY(x) ( ((GISTTYPE*)x)->flag & ARRKEY )
+#define ISSIGNKEY(x)   ( ((GISTTYPE*)x)->flag & SIGNKEY )
+#define ISALLTRUE(x)   ( ((GISTTYPE*)x)->flag & ALLISTRUE )
+
+#define GTHDRSIZE  ( sizeof(int4)*2  )
+#define CALCGTSIZE(flag, len) ( GTHDRSIZE + ( ( (flag) & ARRKEY ) ? ((len)*sizeof(int4)) : (((flag) & ALLISTRUE) ? 0 : SIGLEN) ) )
+
+#define GETSIGN(x) ( (BITVECP)( (char*)x+GTHDRSIZE ) )
+#define GETARR(x)  ( (int4*)( (char*)x+GTHDRSIZE ) )
+#define ARRNELEM(x) ( ( ((GISTTYPE*)x)->len - GTHDRSIZE )/sizeof(int4) )
+
+#endif


diff --git a/contrib/tsearch2/ispell/spell.c b/contrib/tsearch2/ispell/spell.c

new file mode 100644 (file)

index 0000000..3cf2cc8


--- /dev/null
+++ b/contrib/tsearch2/ispell/spell.c
@@ -0,0 +1,520 @@
+#include 
+#include 
+#include 
+#include 
+
+#include "postgres.h"
+
+#include "spell.h"
+
+#define MAXNORMLEN 56
+
+#define STRNCASECMP(x,y)        (strncasecmp(x,y,strlen(y)))
+
+static int cmpspell(const void *s1,const void *s2){
+   return(strcmp(((const SPELL*)s1)->word,((const SPELL*)s2)->word));
+}
+
+static void 
+strlower( char * str ) {
+   unsigned char *ptr = (unsigned char *)str;
+   while ( *ptr ) {
+       *ptr = tolower( *ptr );
+       ptr++;
+   }
+}
+
+/* backward string compaire for suffix tree operations */
+static int 
+strbcmp(const char *s1, const char *s2) { 
+   int l1 = strlen(s1)-1, l2 = strlen(s2)-1;
+   while (l1 >= 0 && l2 >= 0) {
+       if (s1[l1] < s2[l2]) return -1;
+       if (s1[l1] > s2[l2]) return 1;
+       l1--; l2--;
+   }
+   if (l1 < l2) return -1;
+   if (l1 > l2) return 1;
+
+   return 0;
+}
+static int 
+strbncmp(const char *s1, const char *s2, size_t count) { 
+   int l1 = strlen(s1) - 1, l2 = strlen(s2) - 1, l = count;
+   while (l1 >= 0 && l2 >= 0 && l > 0) {
+       if (s1[l1] < s2[l2]) return -1;
+       if (s1[l1] > s2[l2]) return 1;
+       l1--;
+       l2--;
+       l--;
+   }
+   if (l == 0) return 0;
+   if (l1 < l2) return -1;
+   if (l1 > l2) return 1;
+   return 0;
+}
+
+static int 
+cmpaffix(const void *s1,const void *s2){
+   if (((const AFFIX*)s1)->type < ((const AFFIX*)s2)->type) return -1;
+   if (((const AFFIX*)s1)->type > ((const AFFIX*)s2)->type) return 1;
+   if (((const AFFIX*)s1)->type == 'p')
+       return(strcmp(((const AFFIX*)s1)->repl,((const AFFIX*)s2)->repl));
+   else 
+       return(strbcmp(((const AFFIX*)s1)->repl,((const AFFIX*)s2)->repl));
+}
+
+int 
+AddSpell(IspellDict * Conf,const char * word,const char *flag){
+   if(Conf->nspell>=Conf->mspell){
+       if(Conf->mspell){
+           Conf->mspell+=1024*20;
+           Conf->Spell=(SPELL *)realloc(Conf->Spell,Conf->mspell*sizeof(SPELL));
+       }else{
+           Conf->mspell=1024*20;
+           Conf->Spell=(SPELL *)malloc(Conf->mspell*sizeof(SPELL));
+       }
+       if ( Conf->Spell == NULL )
+           elog(ERROR,"No memory for AddSpell"); 
+   }
+   Conf->Spell[Conf->nspell].word=strdup(word);
+   if ( !Conf->Spell[Conf->nspell].word ) 
+       elog(ERROR,"No memory for AddSpell");
+   strncpy(Conf->Spell[Conf->nspell].flag,flag,10);
+   Conf->nspell++;
+   return(0);
+}
+
+
+int 
+ImportDictionary(IspellDict * Conf,const char *filename){
+   unsigned char str[BUFSIZ];  
+   FILE *dict;
+
+   if(!(dict=fopen(filename,"r")))return(1);
+   while(fgets(str,sizeof(str),dict)){
+       unsigned char *s;
+       const unsigned char *flag;
+
+           flag = NULL;
+       if((s=strchr(str,'/'))){
+           *s=0;
+           s++;flag=s;
+           while(*s){
+               if (((*s>='A')&&(*s<='Z'))||((*s>='a')&&(*s<='z')))
+                   s++;
+               else {
+                   *s=0;
+                   break;
+               }
+           }
+       }else{
+           flag="";
+       }
+       strlower(str);
+       /* Dont load words if first letter is not required */
+       /* It allows to optimize loading at  search time   */
+       s=str;
+       while(*s){
+           if(*s=='\r')*s=0;
+           if(*s=='\n')*s=0;
+           s++;
+       }
+       AddSpell(Conf,str,flag);
+   }
+   fclose(dict);
+   return(0);
+}
+
+
+static SPELL * 
+FindWord(IspellDict * Conf, const char *word, int affixflag) {
+   int l,c,r,resc,resl,resr, i;
+
+   i = (int)(*word) & 255;
+   l = Conf->SpellTree.Left[i];
+   r = Conf->SpellTree.Right[i];
+   if (l == -1) return (NULL);
+   while(l<=r){
+       c = (l + r) >> 1;
+       resc = strcmp(Conf->Spell[c].word, word);
+       if( (resc == 0) && 
+           ((affixflag == 0) || (strchr(Conf->Spell[c].flag, affixflag) != NULL)) ) {
+           return(&Conf->Spell[c]);
+       }
+       resl = strcmp(Conf->Spell[l].word, word);
+       if( (resl == 0) && 
+           ((affixflag == 0) || (strchr(Conf->Spell[l].flag, affixflag) != NULL)) ) {
+           return(&Conf->Spell[l]);
+       }
+       resr = strcmp(Conf->Spell[r].word, word);
+       if( (resr == 0) && 
+           ((affixflag == 0) || (strchr(Conf->Spell[r].flag, affixflag) != NULL)) ) {
+           return(&Conf->Spell[r]);
+       }
+       if(resc < 0){
+           l = c + 1;
+           r--;
+       } else if(resc > 0){
+           r = c - 1;
+           l++;
+       } else {
+           l++;
+           r--;
+       }
+   }
+   return(NULL);
+}
+
+int 
+AddAffix(IspellDict * Conf,int flag,const char *mask,const char *find,const char *repl,int type) {
+   if(Conf->naffixes>=Conf->maffixes){
+       if(Conf->maffixes){
+           Conf->maffixes+=16;
+           Conf->Affix = (AFFIX*)realloc((void*)Conf->Affix,Conf->maffixes*sizeof(AFFIX));
+       }else{
+           Conf->maffixes=16;
+           Conf->Affix = (AFFIX*)malloc(Conf->maffixes * sizeof(AFFIX));
+       }
+       if ( Conf->Affix == NULL ) 
+           elog(ERROR,"No memory for AddAffix");
+   }
+   if (type=='s') {
+       sprintf(Conf->Affix[Conf->naffixes].mask,"%s$",mask);
+   } else {
+       sprintf(Conf->Affix[Conf->naffixes].mask,"^%s",mask);
+   }
+   Conf->Affix[Conf->naffixes].compile = 1;
+   Conf->Affix[Conf->naffixes].flag=flag;
+   Conf->Affix[Conf->naffixes].type=type;
+   
+   strcpy(Conf->Affix[Conf->naffixes].find,find);
+   strcpy(Conf->Affix[Conf->naffixes].repl,repl);
+   Conf->Affix[Conf->naffixes].replen=strlen(repl);
+   Conf->naffixes++;
+   return(0);
+}
+
+static char * 
+remove_spaces(char *dist,char *src){
+char *d,*s;
+   d=dist;
+   s=src;
+   while(*s){
+       if(*s!=' '&&*s!='-'&&*s!='\t'){
+           *d=*s;
+           d++;
+       }
+       s++;
+   }
+   *d=0;
+   return(dist);
+}
+
+
+int 
+ImportAffixes(IspellDict * Conf,const char *filename){
+   unsigned char str[BUFSIZ];
+   unsigned char flag=0;
+   unsigned char mask[BUFSIZ]="";
+   unsigned char find[BUFSIZ]="";
+   unsigned char repl[BUFSIZ]="";
+   unsigned char *s;
+   int i;
+   int suffixes=0;
+   int prefixes=0;
+   FILE *affix;
+
+   if(!(affix=fopen(filename,"r")))
+       return(1);
+
+   while(fgets(str,sizeof(str),affix)){
+       if(!STRNCASECMP(str,"suffixes")){
+           suffixes=1;
+           prefixes=0;
+           continue;
+       }
+       if(!STRNCASECMP(str,"prefixes")){
+           suffixes=0;
+           prefixes=1;
+           continue;
+       }
+       if(!STRNCASECMP(str,"flag ")){
+           s=str+5;
+           while(strchr("* ",*s))
+               s++;
+           flag=*s;
+           continue;
+       }
+       if((!suffixes)&&(!prefixes))continue;
+       if((s=strchr(str,'#')))*s=0;
+       if(!*str)continue;
+       strlower(str);
+       strcpy(mask,"");
+       strcpy(find,"");
+       strcpy(repl,"");
+       i=sscanf(str,"%[^>\n]>%[^,\n],%[^\n]",mask,find,repl);
+       remove_spaces(str,repl);strcpy(repl,str);
+       remove_spaces(str,find);strcpy(find,str);
+       remove_spaces(str,mask);strcpy(mask,str);
+       switch(i){
+           case 3:
+               break;
+           case 2:
+               if(*find != '\0'){
+                   strcpy(repl,find);
+                   strcpy(find,"");
+               }
+               break;
+           default:
+               continue;
+       }
+       
+       AddAffix(Conf,(int)flag,mask,find,repl,suffixes?'s':'p');
+       
+   }
+   fclose(affix);
+       
+   return(0);
+}
+
+void 
+SortDictionary(IspellDict * Conf){
+  int  CurLet = -1, Let;size_t i;
+
+        qsort((void*)Conf->Spell,Conf->nspell,sizeof(SPELL),cmpspell);
+
+   for(i = 0; i < 256 ; i++ )
+       Conf->SpellTree.Left[i] = -1;
+
+   for(i = 0; i < Conf->nspell; i++) {
+     Let = (int)(*(Conf->Spell[i].word)) & 255;
+     if (CurLet != Let) {
+       Conf->SpellTree.Left[Let] = i;
+       CurLet = Let;
+     }
+     Conf->SpellTree.Right[Let] = i;
+   }
+}
+
+void 
+SortAffixes(IspellDict * Conf) {
+  int   CurLetP = -1, CurLetS = -1, Let;
+  AFFIX *Affix; size_t i;
+  
+  if (Conf->naffixes > 1)
+    qsort((void*)Conf->Affix,Conf->naffixes,sizeof(AFFIX),cmpaffix);
+  for(i = 0; i < 256; i++) {
+      Conf->PrefixTree.Left[i] = Conf->PrefixTree.Right[i] = -1;
+      Conf->SuffixTree.Left[i] = Conf->SuffixTree.Right[i] = -1;
+  }
+
+  for(i = 0; i < Conf->naffixes; i++) {
+    Affix = &(((AFFIX*)Conf->Affix)[i]);
+    if(Affix->type == 'p') {
+      Let = (int)(*(Affix->repl)) & 255;
+      if (CurLetP != Let) {
+   Conf->PrefixTree.Left[Let] = i;
+   CurLetP = Let;
+      }
+      Conf->PrefixTree.Right[Let] = i;
+    } else {
+      Let = (Affix->replen) ? (int)(Affix->repl[Affix->replen-1]) & 255 : 0;
+      if (CurLetS != Let) {
+   Conf->SuffixTree.Left[Let] = i;
+   CurLetS = Let;
+      }
+      Conf->SuffixTree.Right[Let] = i;
+    }
+  }
+}
+
+static char * 
+CheckSuffix(const char *word, size_t len, AFFIX *Affix, int *res, IspellDict *Conf) {
+  regmatch_t subs[2]; /* workaround for apache&linux */
+  char newword[2*MAXNORMLEN] = "";
+  int err;
+  
+  *res = strbncmp(word, Affix->repl, Affix->replen);
+  if (*res < 0) {
+    return NULL;
+  }
+  if (*res > 0) {
+    return NULL;
+  }
+  strcpy(newword, word);
+  strcpy(newword+len-Affix->replen, Affix->find);
+
+  if (Affix->compile) {
+    err = regcomp(&(Affix->reg),Affix->mask,REG_EXTENDED|REG_ICASE|REG_NOSUB);
+    if(err){
+      /*regerror(err, &(Affix->reg), regerrstr, ERRSTRSIZE);*/
+      regfree(&(Affix->reg));
+      return(NULL);
+    }
+    Affix->compile = 0;
+  }
+  if(!(err=regexec(&(Affix->reg),newword,1,subs,0))){
+    if(FindWord(Conf, newword, Affix->flag))
+   return pstrdup(newword);    
+  }
+  return NULL;
+}
+
+#define NS 1
+#define MAX_NORM 512
+static int 
+CheckPrefix(const char *word, size_t len, AFFIX *Affix, IspellDict *Conf, int pi,
+       char **forms, char ***cur ) {
+  regmatch_t subs[NS*2];
+  char newword[2*MAXNORMLEN] = "";
+  int err, ls, res, lres;
+  size_t newlen;
+  AFFIX *CAffix = Conf->Affix;
+  
+  res = strncmp(word, Affix->repl, Affix->replen);
+  if (res != 0) {
+    return res;
+  }
+  strcpy(newword, Affix->find);
+  strcat(newword, word+Affix->replen);
+
+  if (Affix->compile) {
+    err = regcomp(&(Affix->reg),Affix->mask,REG_EXTENDED|REG_ICASE|REG_NOSUB);
+    if(err){
+      /*regerror(err, &(Affix->reg), regerrstr, ERRSTRSIZE);*/
+      regfree(&(Affix->reg));
+      return (0);
+    }
+    Affix->compile = 0;
+  }
+  if(!(err=regexec(&(Affix->reg),newword,1,subs,0))){
+    SPELL * curspell;
+
+    if((curspell=FindWord(Conf, newword, Affix->flag))){
+      if ((*cur - forms) < (MAX_NORM-1)) {
+   **cur =  pstrdup(newword);
+   (*cur)++; **cur = NULL;
+      }
+    } 
+    newlen = strlen(newword);
+    ls = Conf->SuffixTree.Left[pi];
+      if ( ls>=0 && ((*cur - forms) < (MAX_NORM-1)) ) {
+   **cur = CheckSuffix(newword, newlen, &CAffix[ls], &lres, Conf);
+   if (**cur) {
+     (*cur)++; **cur = NULL;
+   }
+      }
+  }
+  return 0;
+}
+
+
+char ** 
+NormalizeWord(IspellDict * Conf,char *word){
+/*regmatch_t subs[NS];*/
+size_t len;
+char ** forms;
+char **cur;
+AFFIX * Affix;
+int ri, pi, ipi, lp, rp, cp, ls, rs;
+int lres, rres, cres = 0;
+  SPELL *spell;
+
+   len=strlen(word);
+   if (len > MAXNORMLEN)
+       return(NULL);
+
+   strlower(word);
+
+   forms=(char **) palloc(MAX_NORM*sizeof(char **));
+   cur=forms;*cur=NULL;
+
+   ri = (int)(*word) & 255;
+   pi = (int)(word[strlen(word)-1]) & 255;
+   Affix=(AFFIX*)Conf->Affix;
+
+   /* Check that the word itself is normal form */
+   if((spell = FindWord(Conf, word, 0))){
+       *cur=pstrdup(word);
+       cur++;*cur=NULL;
+   }
+
+   /* Find all other NORMAL forms of the 'word' */
+
+   for (ipi = 0; ipi <= pi; ipi += pi) {
+
+       /* check prefix */
+       lp = Conf->PrefixTree.Left[ri];
+       rp = Conf->PrefixTree.Right[ri];
+       while (lp >= 0 && lp <= rp) {
+         cp = (lp + rp) >> 1;
+         cres = 0;
+         if ((cur - forms) < (MAX_NORM-1)) {
+       cres = CheckPrefix(word, len, &Affix[cp], Conf, ipi, forms, &cur);
+         }
+         if ((lp < cp) && ((cur - forms) < (MAX_NORM-1)) ) {
+       lres = CheckPrefix(word, len, &Affix[lp], Conf, ipi, forms, &cur);
+         }
+         if ( (rp > cp) && ((cur - forms) < (MAX_NORM-1)) ) {
+       rres = CheckPrefix(word, len, &Affix[rp], Conf, ipi, forms, &cur);
+         }
+         if (cres < 0) {
+       rp = cp - 1;
+       lp++;
+         } else if (cres > 0) {
+       lp = cp + 1;
+       rp--;
+         } else {
+       lp++;
+       rp--;
+         }
+       }
+
+       /* check suffix */
+       ls = Conf->SuffixTree.Left[ipi];
+       rs = Conf->SuffixTree.Right[ipi];
+       while (ls >= 0 && ls <= rs) {
+         if (  ((cur - forms) < (MAX_NORM-1)) ) {
+       *cur = CheckSuffix(word, len, &Affix[ls], &lres, Conf);
+       if (*cur) {
+         cur++; *cur = NULL;
+       }
+         }
+         if ( (rs > ls) && ((cur - forms) < (MAX_NORM-1)) ) {
+       *cur = CheckSuffix(word, len, &Affix[rs], &rres, Conf);
+       if (*cur) {
+         cur++; *cur = NULL;
+       }
+         }
+         ls++;
+         rs--;
+       } /* end while */
+     
+   } /* for ipi */
+
+   if(cur==forms){
+       pfree(forms);
+       return(NULL);
+   }
+   return(forms);
+}
+
+void 
+FreeIspell (IspellDict *Conf) {
+  int i;
+  AFFIX *Affix = (AFFIX *)Conf->Affix;
+
+  for (i = 0; i < Conf->naffixes; i++) {
+    if (Affix[i].compile == 0) {
+      regfree(&(Affix[i].reg));
+    }
+  }
+  for (i = 0; i < Conf->naffixes; i++) {
+   free( Conf->Spell[i].word );
+  }
+  free(Conf->Affix);
+  free(Conf->Spell);
+  memset( (void*)Conf, 0, sizeof(IspellDict) );
+  return;
+}


diff --git a/contrib/tsearch2/ispell/spell.h b/contrib/tsearch2/ispell/spell.h

new file mode 100644 (file)

index 0000000..3034ca6


--- /dev/null
+++ b/contrib/tsearch2/ispell/spell.h
@@ -0,0 +1,51 @@
+#ifndef __SPELL_H__
+#define __SPELL_H__
+
+#include 
+#include 
+
+typedef struct spell_struct {
+        char * word; 
+        char flag[10];
+} SPELL;
+
+typedef struct aff_struct {   
+        char flag;
+        char type;
+        char mask[33];
+        char find[16];
+        char repl[16];
+        regex_t reg;
+        size_t replen;
+        char compile;
+} AFFIX;
+
+typedef struct Tree_struct {
+        int Left[256], Right[256];
+} Tree_struct;
+
+typedef struct {
+   int maffixes;
+   int naffixes;
+   AFFIX * Affix;
+
+   int nspell;
+   int mspell;
+   SPELL   *Spell;
+   Tree_struct SpellTree;
+   Tree_struct PrefixTree;
+   Tree_struct SuffixTree;
+
+} IspellDict;
+
+char ** NormalizeWord(IspellDict * Conf,char *word);
+int ImportAffixes(IspellDict * Conf, const char *filename);
+int ImportDictionary(IspellDict * Conf,const char *filename);
+
+int  AddSpell(IspellDict * Conf,const char * word,const char *flag);
+int  AddAffix(IspellDict * Conf,int flag,const char *mask,const char *find,const char *repl,int type);
+void SortDictionary(IspellDict * Conf);
+void SortAffixes(IspellDict * Conf);
+void FreeIspell (IspellDict *Conf);
+
+#endif


diff --git a/contrib/tsearch2/prs_dcfg.c b/contrib/tsearch2/prs_dcfg.c

new file mode 100644 (file)

index 0000000..e4b0e8b


--- /dev/null
+++ b/contrib/tsearch2/prs_dcfg.c
@@ -0,0 +1,119 @@
+/* 
+ * Simple config parser 
+ * Teodor Sigaev 
+ */
+#include 
+#include 
+#include 
+
+#include "postgres.h"
+
+#include "dict.h"
+#include "common.h"
+
+#define CS_WAITKEY 0
+#define CS_INKEY   1
+#define CS_WAITEQ  2
+#define CS_WAITVALUE   3
+#define CS_INVALUE 4
+#define CS_IN2VALUE    5
+#define CS_WAITDELIM   6
+#define CS_INESC   7
+#define CS_IN2ESC  8
+
+static char *
+nstrdup(char *ptr, int len) {
+   char *res=palloc(len+1), *cptr;
+   memcpy(res,ptr,len);
+   res[len]='\0';
+   cptr = ptr = res;
+   while(*ptr) {
+       if ( *ptr == '\\' ) 
+           ptr++;
+       *cptr=*ptr; ptr++; cptr++;
+   }
+   *cptr='\0';
+
+   return res;
+}
+
+void
+parse_cfgdict(text *in, Map **m) {
+   Map *mptr;
+   char *ptr=VARDATA(in), *begin=NULL;
+   char num=0;
+   int state=CS_WAITKEY;
+
+   while( ptr-VARDATA(in) < VARSIZE(in) - VARHDRSZ ) {
+       if ( *ptr==',' ) num++;
+       ptr++;
+   }
+
+   *m=mptr=(Map*)palloc( sizeof(Map)*(num+2) );
+   memset(mptr, 0, sizeof(Map)*(num+2) );
+   ptr=VARDATA(in);
+   while( ptr-VARDATA(in) < VARSIZE(in) - VARHDRSZ ) {
+       if (state==CS_WAITKEY) {
+           if (isalpha(*ptr)) {
+               begin=ptr;
+               state=CS_INKEY;
+           } else if ( !isspace(*ptr) )
+               elog(ERROR,"Syntax error in position %d near '%c'", ptr-VARDATA(in), *ptr);
+       } else if (state==CS_INKEY) {
+           if ( isspace(*ptr) ) {
+               mptr->key=nstrdup(begin, ptr-begin);
+               state=CS_WAITEQ;
+           } else if ( *ptr=='=' ) {
+               mptr->key=nstrdup(begin, ptr-begin);
+               state=CS_WAITVALUE;
+           } else if ( !isalpha(*ptr) ) 
+               elog(ERROR,"Syntax error in position %d near '%c'", ptr-VARDATA(in), *ptr);
+       } else if ( state==CS_WAITEQ ) {
+           if ( *ptr=='=' )
+               state=CS_WAITVALUE;
+           else if ( !isspace(*ptr) )
+               elog(ERROR,"Syntax error in position %d near '%c'", ptr-VARDATA(in), *ptr);
+       } else if ( state==CS_WAITVALUE ) {
+           if ( *ptr=='"' ) {
+               begin=ptr+1;
+               state=CS_INVALUE;
+           } else if ( !isspace(*ptr) ) {
+               begin=ptr;
+               state=CS_IN2VALUE;
+           }
+       } else if ( state==CS_INVALUE ) {
+           if ( *ptr=='"' ) {
+               mptr->value = nstrdup(begin, ptr-begin);
+               mptr++;
+               state=CS_WAITDELIM;
+           } else if ( *ptr=='\\' )
+               state=CS_INESC;
+       } else if ( state==CS_IN2VALUE ) {
+           if ( isspace(*ptr) || *ptr==',' ) {
+               mptr->value = nstrdup(begin, ptr-begin);
+               mptr++;
+               state=( *ptr==',' ) ? CS_WAITKEY : CS_WAITDELIM;
+           } else if ( *ptr=='\\' )
+               state=CS_INESC;
+       } else if ( state==CS_WAITDELIM ) {
+           if ( *ptr==',' ) 
+               state=CS_WAITKEY; 
+           else if ( !isspace(*ptr) )
+               elog(ERROR,"Syntax error in position %d near '%c'", ptr-VARDATA(in), *ptr);
+       } else if ( state == CS_INESC ) {
+           state=CS_INVALUE;
+       } else if ( state == CS_IN2ESC ) {
+           state=CS_IN2VALUE;
+       } else 
+           elog(ERROR,"Bad parser state: %d at position %d near '%c'", state, ptr-VARDATA(in), *ptr);
+       ptr++;
+   }
+
+   if (state==CS_IN2VALUE) {
+       mptr->value = nstrdup(begin, ptr-begin);
+       mptr++;
+   } else if ( !(state==CS_WAITDELIM || state==CS_WAITKEY) ) 
+       elog(ERROR,"Unexpected end of line");
+}
+
+


diff --git a/contrib/tsearch2/query.c b/contrib/tsearch2/query.c

new file mode 100644 (file)

index 0000000..8e714f2


--- /dev/null
+++ b/contrib/tsearch2/query.c
@@ -0,0 +1,862 @@
+/*
+ * IO definitions for tsquery and mtsquery. This type
+ * are identical, but for parsing mtsquery used parser for text
+ * and also morphology is used.
+ * Internal structure:
+ * query tree, then string with original value.
+ * Query tree with plain view. It's means that in array of nodes
+ * right child is always next and left position = item+item->left
+ * Teodor Sigaev 
+ */
+#include "postgres.h"
+
+#include 
+#include 
+
+#include "access/gist.h"
+#include "access/itup.h"
+#include "access/rtree.h"
+#include "utils/elog.h"
+#include "utils/palloc.h"
+#include "utils/array.h"
+#include "utils/builtins.h"
+#include "storage/bufpage.h"
+
+#include "ts_cfg.h"
+#include "tsvector.h"
+#include "crc32.h"
+#include "query.h"
+#include "rewrite.h"
+#include "common.h"
+
+
+PG_FUNCTION_INFO_V1(tsquery_in);
+Datum      tsquery_in(PG_FUNCTION_ARGS);
+
+PG_FUNCTION_INFO_V1(tsquery_out);
+Datum      tsquery_out(PG_FUNCTION_ARGS);
+
+PG_FUNCTION_INFO_V1(exectsq);
+Datum      exectsq(PG_FUNCTION_ARGS);
+
+PG_FUNCTION_INFO_V1(rexectsq);
+Datum      rexectsq(PG_FUNCTION_ARGS);
+
+PG_FUNCTION_INFO_V1(tsquerytree);
+Datum      tsquerytree(PG_FUNCTION_ARGS);
+
+PG_FUNCTION_INFO_V1(to_tsquery);
+Datum      to_tsquery(PG_FUNCTION_ARGS);
+
+PG_FUNCTION_INFO_V1(to_tsquery_name);
+Datum      to_tsquery_name(PG_FUNCTION_ARGS);
+
+PG_FUNCTION_INFO_V1(to_tsquery_current);
+Datum      to_tsquery_current(PG_FUNCTION_ARGS);
+
+#define END            0
+#define ERR            1
+#define VAL            2
+#define OPR            3
+#define OPEN       4
+#define CLOSE      5
+#define VALTRUE        6           /* for stop words */
+#define VALFALSE   7
+
+/* parser's states */
+#define WAITOPERAND 1
+#define WAITOPERATOR   2
+
+/*
+ * node of query tree, also used
+ * for storing polish notation in parser
+ */
+typedef struct NODE
+{
+   int2        weight;
+   int2        type;
+   int4        val;
+   int2        distance;
+   int2        length;
+   struct NODE *next;
+}  NODE;
+
+typedef struct
+{
+   char       *buf;
+   int4        state;
+   int4        count;
+   /* reverse polish notation in list (for temprorary usage) */
+   NODE       *str;
+   /* number in str */
+   int4        num;
+
+   /* user-friendly operand */
+   int4        lenop;
+   int4        sumlen;
+   char       *op;
+   char       *curop;
+
+   /* state for value's parser */
+   TI_IN_STATE valstate;
+
+   /* tscfg */
+   int cfg_id;
+}  QPRS_STATE;
+
+static char*
+get_weight(char *buf, int2 *weight) {
+   *weight = 0;
+
+   if ( *buf != ':' )
+       return buf;
+
+   buf++;
+   while( *buf ) {
+       switch(tolower(*buf)) {
+           case 'a': *weight |= 1<<3; break; 
+           case 'b': *weight |= 1<<2; break; 
+           case 'c': *weight |= 1<<1; break; 
+           case 'd': *weight |= 1;    break;
+           default: return buf; 
+       }
+       buf++;
+   }
+   
+   return buf;
+}
+
+/*
+ * get token from query string
+ */
+static int4
+gettoken_query(QPRS_STATE * state, int4 *val, int4 *lenval, char **strval, int2 *weight)
+{
+   while (1)
+   {
+       switch (state->state)
+       {
+           case WAITOPERAND:
+               if (*(state->buf) == '!')
+               {
+                   (state->buf)++;
+                   *val = (int4) '!';
+                   return OPR;
+               }
+               else if (*(state->buf) == '(')
+               {
+                   state->count++;
+                   (state->buf)++;
+                   return OPEN;
+               } else if ( *(state->buf) == ':' ) {
+                   elog(ERROR,"Error at start of operand"); 
+               } else if (*(state->buf) != ' ') {
+                   state->valstate.prsbuf = state->buf;
+                   state->state = WAITOPERATOR;
+                   if (gettoken_tsvector(&(state->valstate)))
+                   {
+                       *strval = state->valstate.word;
+                       *lenval = state->valstate.curpos - state->valstate.word;
+                       state->buf = get_weight(state->valstate.prsbuf, weight);
+                       return VAL;
+                   }
+                   else
+                       elog(ERROR, "No operand");
+               }
+               break;
+           case WAITOPERATOR:
+               if (*(state->buf) == '&' || *(state->buf) == '|')
+               {
+                   state->state = WAITOPERAND;
+                   *val = (int4) *(state->buf);
+                   (state->buf)++;
+                   return OPR;
+               }
+               else if (*(state->buf) == ')')
+               {
+                   (state->buf)++;
+                   state->count--;
+                   return (state->count < 0) ? ERR : CLOSE;
+               }
+               else if (*(state->buf) == '\0')
+                   return (state->count) ? ERR : END;
+               else if (*(state->buf) != ' ')
+                   return ERR;
+               break;
+           default:
+               return ERR;
+               break;
+       }
+       (state->buf)++;
+   }
+   return END;
+}
+
+/*
+ * push new one in polish notation reverse view
+ */
+static void
+pushquery(QPRS_STATE * state, int4 type, int4 val, int4 distance, int4 lenval, int2 weight)
+{
+   NODE       *tmp = (NODE *) palloc(sizeof(NODE));
+
+   tmp->weight = weight;
+   tmp->type = type;
+   tmp->val = val;
+   if (distance >= MAXSTRPOS)
+       elog(ERROR, "Value is too big");
+   if (lenval >= MAXSTRLEN)
+       elog(ERROR, "Operand is too long");
+   tmp->distance = distance;
+   tmp->length = lenval;
+   tmp->next = state->str;
+   state->str = tmp;
+   state->num++;
+}
+
+/*
+ * This function is used for tsquery parsing
+ */
+static void
+pushval_asis(QPRS_STATE * state, int type, char *strval, int lenval, int2 weight)
+{
+   if (lenval >= MAXSTRLEN)
+       elog(ERROR, "Word is too long");
+
+   pushquery(state, type, crc32_sz((uint8 *) strval, lenval),
+             state->curop - state->op, lenval, weight);
+
+   while (state->curop - state->op + lenval + 1 >= state->lenop)
+   {
+       int4        tmp = state->curop - state->op;
+
+       state->lenop *= 2;
+       state->op = (char *) repalloc((void *) state->op, state->lenop);
+       state->curop = state->op + tmp;
+   }
+   memcpy((void *) state->curop, (void *) strval, lenval);
+   state->curop += lenval;
+   *(state->curop) = '\0';
+   state->curop++;
+   state->sumlen += lenval + 1;
+   return;
+}
+
+/*
+ * This function is used for morph parsing
+ */
+static void
+pushval_morph(QPRS_STATE * state, int typeval, char *strval, int lenval, int2 weight)
+{
+   int4        count = 0;
+   PRSTEXT         prs;
+
+   prs.lenwords = 32;
+   prs.curwords = 0;
+   prs.pos = 0;
+   prs.words = (WORD *) palloc(sizeof(WORD) * prs.lenwords);
+
+   parsetext_v2(findcfg(state->cfg_id), &prs, strval, lenval);
+
+   for(count=0;count
+       pushval_asis(state, VAL, prs.words[count].word, prs.words[count].len, weight);
+       pfree( prs.words[count].word );
+       if (count)
+           pushquery(state, OPR, (int4) '&', 0, 0, 0 );
+   }   
+   pfree(prs.words);
+
+   /* XXX */
+   if ( prs.curwords==0 ) 
+       pushval_asis(state, VALTRUE, 0, 0, 0);
+}
+
+#define STACKDEPTH 32
+/*
+ * make polish notaion of query
+ */
+static int4
+makepol(QPRS_STATE * state, void (*pushval) (QPRS_STATE *, int, char *, int, int2))
+{
+   int4        val,
+               type;
+   int4        lenval;
+   char       *strval;
+   int4        stack[STACKDEPTH];
+   int4        lenstack = 0;
+   int2        weight;
+
+   while ((type = gettoken_query(state, &val, &lenval, &strval, &weight)) != END)
+   {
+       switch (type)
+       {
+           case VAL:
+               (*pushval) (state, VAL, strval, lenval, weight);
+               while (lenstack && (stack[lenstack - 1] == (int4) '&' ||
+                                   stack[lenstack - 1] == (int4) '!'))
+               {
+                   lenstack--;
+                   pushquery(state, OPR, stack[lenstack], 0, 0, 0);
+               }
+               break;
+           case OPR:
+               if (lenstack && val == (int4) '|')
+                   pushquery(state, OPR, val, 0, 0, 0);
+               else
+               {
+                   if (lenstack == STACKDEPTH)
+                       elog(ERROR, "Stack too short");
+                   stack[lenstack] = val;
+                   lenstack++;
+               }
+               break;
+           case OPEN:
+               if (makepol(state, pushval) == ERR)
+                   return ERR;
+               if (lenstack && (stack[lenstack - 1] == (int4) '&' ||
+                                stack[lenstack - 1] == (int4) '!'))
+               {
+                   lenstack--;
+                   pushquery(state, OPR, stack[lenstack], 0, 0, 0);
+               }
+               break;
+           case CLOSE:
+               while (lenstack)
+               {
+                   lenstack--;
+                   pushquery(state, OPR, stack[lenstack], 0, 0, 0);
+               };
+               return END;
+               break;
+           case ERR:
+           default:
+               elog(ERROR, "Syntax error");
+               return ERR;
+
+       }
+   }
+   while (lenstack)
+   {
+       lenstack--;
+       pushquery(state, OPR, stack[lenstack], 0, 0, 0);
+   };
+   return END;
+}
+
+typedef struct
+{
+   WordEntry  *arrb;
+   WordEntry  *arre;
+   char       *values;
+   char       *operand;
+}  CHKVAL;
+
+/*
+ * compare 2 string values
+ */
+static int4
+ValCompare(CHKVAL * chkval, WordEntry * ptr, ITEM * item)
+{
+   if (ptr->len == item->length)
+       return strncmp(
+                      &(chkval->values[ptr->pos]),
+                      &(chkval->operand[item->distance]),
+                      item->length);
+
+   return (ptr->len > item->length) ? 1 : -1;
+}
+
+/*
+ * check weight info
+ */
+static bool
+checkclass_str(CHKVAL * chkval, WordEntry * val, ITEM * item) {
+   WordEntryPos *ptr = (WordEntryPos*) (chkval->values+val->pos+SHORTALIGN(val->len)+sizeof(uint16));
+   uint16  len = *( (uint16*) (chkval->values+val->pos+SHORTALIGN(val->len)) );
+   while (len--) {
+       if ( item->weight & ( 1<weight ) )
+           return true;
+       ptr++;
+   }
+   return false; 
+}
+
+/*
+ * is there value 'val' in array or not ?
+ */
+static bool
+checkcondition_str(void *checkval, ITEM * val)
+{
+   WordEntry  *StopLow = ((CHKVAL *) checkval)->arrb;
+   WordEntry  *StopHigh = ((CHKVAL *) checkval)->arre;
+   WordEntry  *StopMiddle;
+   int         difference;
+
+   /* Loop invariant: StopLow <= val < StopHigh */
+
+   while (StopLow < StopHigh)
+   {
+       StopMiddle = StopLow + (StopHigh - StopLow) / 2;
+       difference = ValCompare((CHKVAL *) checkval, StopMiddle, val);
+       if (difference == 0)
+           return ( val->weight && StopMiddle->haspos ) ? 
+               checkclass_str((CHKVAL *) checkval,StopMiddle, val) : true;
+       else if (difference < 0)
+           StopLow = StopMiddle + 1;
+       else
+           StopHigh = StopMiddle;
+   }
+
+   return (false);
+}
+
+/*
+ * check for boolean condition
+ */
+bool
+TS_execute(ITEM * curitem, void *checkval, bool calcnot, bool (*chkcond) (void *checkval, ITEM * val))
+{
+   if (curitem->type == VAL)
+       return (*chkcond) (checkval, curitem);
+   else if (curitem->val == (int4) '!')
+   {
+       return (calcnot) ?
+           ((TS_execute(curitem + 1, checkval, calcnot, chkcond)) ? false : true)
+           : true;
+   }
+   else if (curitem->val == (int4) '&')
+   {
+       if (TS_execute(curitem + curitem->left, checkval, calcnot, chkcond))
+           return TS_execute(curitem + 1, checkval, calcnot, chkcond);
+       else
+           return false;
+   }
+   else
+   {                           /* |-operator */
+       if (TS_execute(curitem + curitem->left, checkval, calcnot, chkcond))
+           return true;
+       else
+           return TS_execute(curitem + 1, checkval, calcnot, chkcond);
+   }
+   return false;
+}
+
+/*
+ * boolean operations
+ */
+Datum
+rexectsq(PG_FUNCTION_ARGS)
+{
+   return DirectFunctionCall2(
+                              exectsq,
+                              PG_GETARG_DATUM(1),
+                              PG_GETARG_DATUM(0)
+       );
+}
+
+Datum
+exectsq(PG_FUNCTION_ARGS)
+{
+   tsvector       *val = (tsvector *) DatumGetPointer(PG_DETOAST_DATUM(PG_GETARG_DATUM(0)));
+   QUERYTYPE  *query = (QUERYTYPE *) DatumGetPointer(PG_DETOAST_DATUM(PG_GETARG_DATUM(1)));
+   CHKVAL      chkval;
+   bool        result;
+
+   if (!val->size || !query->size)
+   {
+       PG_FREE_IF_COPY(val, 0);
+       PG_FREE_IF_COPY(query, 1);
+       PG_RETURN_BOOL(false);
+   }
+
+   chkval.arrb = ARRPTR(val);
+   chkval.arre = chkval.arrb + val->size;
+   chkval.values = STRPTR(val);
+   chkval.operand = GETOPERAND(query);
+   result = TS_execute(
+                    GETQUERY(query),
+                    &chkval,
+                    true,
+                    checkcondition_str
+       );
+
+   PG_FREE_IF_COPY(val, 0);
+   PG_FREE_IF_COPY(query, 1);
+   PG_RETURN_BOOL(result);
+}
+
+/*
+ * find left operand in polish notation view
+ */
+static void
+findoprnd(ITEM * ptr, int4 *pos)
+{
+#ifdef BS_DEBUG
+   elog(DEBUG3, (ptr[*pos].type == OPR) ?
+        "%d  %c" : "%d  %d ", *pos, ptr[*pos].val);
+#endif
+   if (ptr[*pos].type == VAL || ptr[*pos].type == VALTRUE)
+   {
+       ptr[*pos].left = 0;
+       (*pos)++;
+   }
+   else if (ptr[*pos].val == (int4) '!')
+   {
+       ptr[*pos].left = 1;
+       (*pos)++;
+       findoprnd(ptr, pos);
+   }
+   else
+   {
+       ITEM       *curitem = &ptr[*pos];
+       int4        tmp = *pos;
+
+       (*pos)++;
+       findoprnd(ptr, pos);
+       curitem->left = *pos - tmp;
+       findoprnd(ptr, pos);
+   }
+}
+
+
+/*
+ * input
+ */
+static QUERYTYPE *
+queryin(char *buf, void (*pushval) (QPRS_STATE *, int, char *, int, int2), int cfg_id)
+{
+   QPRS_STATE  state;
+   int4        i;
+   QUERYTYPE  *query;
+   int4        commonlen;
+   ITEM       *ptr;
+   NODE       *tmp;
+   int4        pos = 0;
+
+#ifdef BS_DEBUG
+   char        pbuf[16384],
+              *cur;
+#endif
+
+   /* init state */
+   state.buf = buf;
+   state.state = WAITOPERAND;
+   state.count = 0;
+   state.num = 0;
+   state.str = NULL;
+   state.cfg_id=cfg_id;
+
+   /* init value parser's state */
+   state.valstate.oprisdelim = true;
+   state.valstate.len = 32;
+   state.valstate.word = (char *) palloc(state.valstate.len);
+
+   /* init list of operand */
+   state.sumlen = 0;
+   state.lenop = 64;
+   state.curop = state.op = (char *) palloc(state.lenop);
+   *(state.curop) = '\0';
+
+   /* parse query & make polish notation (postfix, but in reverse order) */
+   makepol(&state, pushval);
+   pfree(state.valstate.word);
+   if (!state.num)
+       elog(ERROR, "Empty query");
+
+   /* make finish struct */
+   commonlen = COMPUTESIZE(state.num, state.sumlen);
+   query = (QUERYTYPE *) palloc(commonlen);
+   query->len = commonlen;
+   query->size = state.num;
+   ptr = GETQUERY(query);
+
+   /* set item in polish notation */
+   for (i = 0; i < state.num; i++)
+   {
+       ptr[i].weight = state.str->weight;
+       ptr[i].type = state.str->type;
+       ptr[i].val = state.str->val;
+       ptr[i].distance = state.str->distance;
+       ptr[i].length = state.str->length;
+       tmp = state.str->next;
+       pfree(state.str);
+       state.str = tmp;
+   }
+
+   /* set user friendly-operand view */
+   memcpy((void *) GETOPERAND(query), (void *) state.op, state.sumlen);
+   pfree(state.op);
+
+   /* set left operand's position for every operator */
+   pos = 0;
+   findoprnd(ptr, &pos);
+
+#ifdef BS_DEBUG
+   cur = pbuf;
+   *cur = '\0';
+   for (i = 0; i < query->size; i++)
+   {
+       if (ptr[i].type == OPR)
+           sprintf(cur, "%c(%d) ", ptr[i].val, ptr[i].left);
+       else
+           sprintf(cur, "%d(%s) ", ptr[i].val, GETOPERAND(query) + ptr[i].distance);
+       cur = strchr(cur, '\0');
+   }
+   elog(DEBUG3, "POR: %s", pbuf);
+#endif
+
+   return query;
+}
+
+/*
+ * in without morphology
+ */
+Datum
+tsquery_in(PG_FUNCTION_ARGS)
+{
+   PG_RETURN_POINTER(queryin((char *) PG_GETARG_POINTER(0), pushval_asis, 0));
+}
+
+/*
+ * out function
+ */
+typedef struct
+{
+   ITEM       *curpol;
+   char       *buf;
+   char       *cur;
+   char       *op;
+   int4        buflen;
+}  INFIX;
+
+#define RESIZEBUF(inf,addsize) \
+while( ( inf->cur - inf->buf ) + addsize + 1 >= inf->buflen ) \
+{ \
+   int4 len = inf->cur - inf->buf; \
+   inf->buflen *= 2; \
+   inf->buf = (char*) repalloc( (void*)inf->buf, inf->buflen ); \
+   inf->cur = inf->buf + len; \
+}
+
+/*
+ * recursive walk on tree and print it in
+ * infix (human-readable) view
+ */
+static void
+infix(INFIX * in, bool first)
+{
+   if (in->curpol->type == VAL)
+   {
+       char       *op = in->op + in->curpol->distance;
+
+       RESIZEBUF(in, in->curpol->length * 2 + 2 + 5);
+       *(in->cur) = '\'';
+       in->cur++;
+       while (*op)
+       {
+           if (*op == '\'')
+           {
+               *(in->cur) = '\\';
+               in->cur++;
+           }
+           *(in->cur) = *op;
+           op++;
+           in->cur++;
+       }
+       *(in->cur) = '\'';
+       in->cur++;
+       if ( in->curpol->weight ) {
+           *(in->cur) = ':'; in->cur++;
+           if ( in->curpol->weight & (1<<3) ) { *(in->cur) = 'A'; in->cur++; }
+           if ( in->curpol->weight & (1<<2) ) { *(in->cur) = 'B'; in->cur++; }
+           if ( in->curpol->weight & (1<<1) ) { *(in->cur) = 'C'; in->cur++; }
+           if ( in->curpol->weight & 1 )      { *(in->cur) = 'D'; in->cur++; }
+       }
+       *(in->cur) = '\0';
+       in->curpol++;
+   }
+   else if (in->curpol->val == (int4) '!')
+   {
+       bool        isopr = false;
+
+       RESIZEBUF(in, 1);
+       *(in->cur) = '!';
+       in->cur++;
+       *(in->cur) = '\0';
+       in->curpol++;
+       if (in->curpol->type == OPR)
+       {
+           isopr = true;
+           RESIZEBUF(in, 2);
+           sprintf(in->cur, "( ");
+           in->cur = strchr(in->cur, '\0');
+       }
+       infix(in, isopr);
+       if (isopr)
+       {
+           RESIZEBUF(in, 2);
+           sprintf(in->cur, " )");
+           in->cur = strchr(in->cur, '\0');
+       }
+   }
+   else
+   {
+       int4        op = in->curpol->val;
+       INFIX       nrm;
+
+       in->curpol++;
+       if (op == (int4) '|' && !first)
+       {
+           RESIZEBUF(in, 2);
+           sprintf(in->cur, "( ");
+           in->cur = strchr(in->cur, '\0');
+       }
+
+       nrm.curpol = in->curpol;
+       nrm.op = in->op;
+       nrm.buflen = 16;
+       nrm.cur = nrm.buf = (char *) palloc(sizeof(char) * nrm.buflen);
+
+       /* get right operand */
+       infix(&nrm, false);
+
+       /* get & print left operand */
+       in->curpol = nrm.curpol;
+       infix(in, false);
+
+       /* print operator & right operand */
+       RESIZEBUF(in, 3 + (nrm.cur - nrm.buf));
+       sprintf(in->cur, " %c %s", op, nrm.buf);
+       in->cur = strchr(in->cur, '\0');
+       pfree(nrm.buf);
+
+       if (op == (int4) '|' && !first)
+       {
+           RESIZEBUF(in, 2);
+           sprintf(in->cur, " )");
+           in->cur = strchr(in->cur, '\0');
+       }
+   }
+}
+
+
+Datum
+tsquery_out(PG_FUNCTION_ARGS)
+{
+   QUERYTYPE  *query = (QUERYTYPE *) DatumGetPointer(PG_DETOAST_DATUM(PG_GETARG_DATUM(0)));
+   INFIX       nrm;
+
+   if (query->size == 0)
+   {
+       char       *b = palloc(1);
+
+       *b = '\0';
+       PG_RETURN_POINTER(b);
+   }
+   nrm.curpol = GETQUERY(query);
+   nrm.buflen = 32;
+   nrm.cur = nrm.buf = (char *) palloc(sizeof(char) * nrm.buflen);
+   *(nrm.cur) = '\0';
+   nrm.op = GETOPERAND(query);
+   infix(&nrm, true);
+
+   PG_FREE_IF_COPY(query, 0);
+   PG_RETURN_POINTER(nrm.buf);
+}
+
+/*
+ * debug function, used only for view query
+ * which will be executed in non-leaf pages in index
+ */
+Datum
+tsquerytree(PG_FUNCTION_ARGS)
+{
+   QUERYTYPE  *query = (QUERYTYPE *) DatumGetPointer(PG_DETOAST_DATUM(PG_GETARG_DATUM(0)));
+   INFIX       nrm;
+   text       *res;
+   ITEM       *q;
+   int4        len;
+
+
+   if (query->size == 0)
+   {
+       res = (text *) palloc(VARHDRSZ);
+       VARATT_SIZEP(res) = VARHDRSZ;
+       PG_RETURN_POINTER(res);
+   }
+
+   q = clean_NOT_v2(GETQUERY(query), &len);
+
+   if (!q)
+   {
+       res = (text *) palloc(1 + VARHDRSZ);
+       VARATT_SIZEP(res) = 1 + VARHDRSZ;
+       *((char *) VARDATA(res)) = 'T';
+   }
+   else
+   {
+       nrm.curpol = q;
+       nrm.buflen = 32;
+       nrm.cur = nrm.buf = (char *) palloc(sizeof(char) * nrm.buflen);
+       *(nrm.cur) = '\0';
+       nrm.op = GETOPERAND(query);
+       infix(&nrm, true);
+
+       res = (text *) palloc(nrm.cur - nrm.buf + VARHDRSZ);
+       VARATT_SIZEP(res) = nrm.cur - nrm.buf + VARHDRSZ;
+       strncpy(VARDATA(res), nrm.buf, nrm.cur - nrm.buf);
+       pfree(q);
+   }
+
+   PG_FREE_IF_COPY(query, 0);
+
+   PG_RETURN_POINTER(res);
+}
+
+Datum
+to_tsquery(PG_FUNCTION_ARGS) {
+   text    *in = PG_GETARG_TEXT_P(1);
+   char *str;
+   QUERYTYPE  *query;
+   ITEM       *res;
+   int4        len;
+
+   str=text2char(in);
+   PG_FREE_IF_COPY(in,1);
+
+   query = queryin(str, pushval_morph, PG_GETARG_INT32(0));
+   res = clean_fakeval_v2(GETQUERY(query), &len);
+   if (!res)
+   {
+       query->len = HDRSIZEQT;
+       query->size = 0;
+       PG_RETURN_POINTER(query);
+   }
+   memcpy((void *) GETQUERY(query), (void *) res, len * sizeof(ITEM));
+   pfree(res);
+   PG_RETURN_POINTER(query);
+}
+
+Datum
+to_tsquery_name(PG_FUNCTION_ARGS) {
+   text *name=PG_GETARG_TEXT_P(0);
+   Datum res= DirectFunctionCall2(
+       to_tsquery,
+       Int32GetDatum( name2id_cfg(name) ),
+       PG_GETARG_DATUM(1)
+   );
+   
+   PG_FREE_IF_COPY(name,1);
+   PG_RETURN_DATUM(res);
+}
+
+Datum
+to_tsquery_current(PG_FUNCTION_ARGS) {
+   PG_RETURN_DATUM( DirectFunctionCall2(
+       to_tsquery,
+       Int32GetDatum( get_currcfg() ),
+       PG_GETARG_DATUM(0)
+   ));
+}
+
+


diff --git a/contrib/tsearch2/query.h b/contrib/tsearch2/query.h

new file mode 100644 (file)

index 0000000..c0715a2


--- /dev/null
+++ b/contrib/tsearch2/query.h
@@ -0,0 +1,55 @@
+#ifndef __QUERY_H__
+#define __QUERY_H__
+/*
+#define BS_DEBUG
+*/
+
+
+/*
+ * item in polish notation with back link
+ * to left operand
+ */
+typedef struct ITEM
+{
+   int8        type;
+   int8        weight;
+   int2        left;
+   int4        val;
+   /* user-friendly value, must correlate with WordEntry */
+   uint32  
+       unused:1,
+       length:11,
+       distance:20;
+}  ITEM;
+
+/*
+ *Storage:
+ * (len)(size)(array of ITEM)(array of operand in user-friendly form)
+ */
+typedef struct
+{
+   int4        len;
+   int4        size;
+   char        data[1];
+}  QUERYTYPE;
+
+#define HDRSIZEQT  ( 2*sizeof(int4) )
+#define COMPUTESIZE(size,lenofoperand) ( HDRSIZEQT + size * sizeof(ITEM) + lenofoperand )
+#define GETQUERY(x)  (ITEM*)( (char*)(x)+HDRSIZEQT )
+#define GETOPERAND(x)  ( (char*)GETQUERY(x) + ((QUERYTYPE*)x)->size * sizeof(ITEM) )
+
+#define ISOPERATOR(x) ( (x)=='!' || (x)=='&' || (x)=='|' || (x)=='(' || (x)==')' )
+
+#define END                0
+#define ERR                1
+#define VAL                2
+#define OPR                3
+#define OPEN           4
+#define CLOSE          5
+#define VALTRUE            6       /* for stop words */
+#define VALFALSE       7
+
+bool TS_execute(ITEM * curitem, void *checkval,
+       bool calcnot, bool (*chkcond) (void *checkval, ITEM * val));
+
+#endif


diff --git a/contrib/tsearch2/rank.c b/contrib/tsearch2/rank.c

new file mode 100644 (file)

index 0000000..b73f400


--- /dev/null
+++ b/contrib/tsearch2/rank.c
@@ -0,0 +1,591 @@
+/*
+ * Relevation
+ * Teodor Sigaev 
+ */
+#include "postgres.h"
+#include 
+
+#include "access/gist.h"
+#include "access/itup.h"
+#include "utils/elog.h"
+#include "utils/palloc.h"
+#include "utils/builtins.h"
+#include "fmgr.h"
+#include "funcapi.h"
+#include "storage/bufpage.h"
+#include "executor/spi.h"
+#include "commands/trigger.h"
+#include "nodes/pg_list.h"
+#include "catalog/namespace.h"
+
+#include "utils/array.h"
+
+#include "tsvector.h"
+#include "query.h"
+#include "common.h"
+
+PG_FUNCTION_INFO_V1(rank);
+Datum      rank(PG_FUNCTION_ARGS);
+
+PG_FUNCTION_INFO_V1(rank_def);
+Datum      rank_def(PG_FUNCTION_ARGS);
+
+PG_FUNCTION_INFO_V1(rank_cd);
+Datum      rank_cd(PG_FUNCTION_ARGS);
+
+PG_FUNCTION_INFO_V1(rank_cd_def);
+Datum      rank_cd_def(PG_FUNCTION_ARGS);
+
+PG_FUNCTION_INFO_V1(get_covers);
+Datum      get_covers(PG_FUNCTION_ARGS);
+
+static float weights[]={0.1, 0.2, 0.4, 1.0};
+
+#define wpos(wep)  ( w[ ((WordEntryPos*)(wep))->weight ] )
+
+#define DEF_NORM_METHOD    0
+
+/*
+ * Returns a weight of a word collocation
+ */
+static float4 word_distance ( int4 w ) {
+   if ( w>100 )
+   return 1e-30;
+
+   return 1.0/(1.005+0.05*exp( ((float4)w)/1.5-2) );
+}
+
+static int
+cnt_length( tsvector *t ) {
+   WordEntry   *ptr=ARRPTR(t), *end=(WordEntry*)STRPTR(t);
+   int len = 0, clen;
+
+   while(ptr < end) {
+       if ( (clen=POSDATALEN(t, ptr)) == 0 )
+           len += 1;
+       else
+           len += clen;
+       ptr++;
+   }
+
+   return len;
+}
+
+static int4
+WordECompareITEM(char *eval, char *qval, WordEntry * ptr, ITEM * item) {
+        if (ptr->len == item->length)
+                return strncmp(
+                                           eval + ptr->pos,
+                                           qval + item->distance,
+                                           item->length);
+
+        return (ptr->len > item->length) ? 1 : -1;
+}
+
+static WordEntry*
+find_wordentry(tsvector *t, QUERYTYPE *q, ITEM *item) {
+        WordEntry  *StopLow = ARRPTR(t);
+        WordEntry  *StopHigh = (WordEntry*)STRPTR(t);
+        WordEntry  *StopMiddle;
+        int                     difference;
+
+        /* Loop invariant: StopLow <= item < StopHigh */
+
+        while (StopLow < StopHigh)
+        {
+                StopMiddle = StopLow + (StopHigh - StopLow) / 2;
+                difference = WordECompareITEM(STRPTR(t), GETOPERAND(q), StopMiddle, item);
+                if (difference == 0)
+                        return StopMiddle;
+                else if (difference < 0)
+                        StopLow = StopMiddle + 1;
+                else
+                        StopHigh = StopMiddle;
+        }
+
+        return NULL;
+}
+
+static WordEntryPos    POSNULL[]={
+   {0,0},
+   {0,MAXENTRYPOS-1}
+};
+
+static float
+calc_rank_and(float *w, tsvector *t, QUERYTYPE *q) {
+   uint16 **pos=(uint16**)palloc(sizeof(uint16*) * q->size);
+   int i,k,l,p;
+   WordEntry *entry;
+   WordEntryPos    *post,*ct;
+   int4    dimt,lenct,dist;
+   float res=-1.0;
+   ITEM    *item=GETQUERY(q);
+
+   memset(pos,0,sizeof(uint16**) * q->size);
+   *(uint16*)POSNULL = lengthof(POSNULL)-1;
+
+   for(i=0; isize; i++) {
+       
+       if ( item[i].type != VAL )
+           continue;
+
+       entry=find_wordentry(t,q,&(item[i]));
+       if ( !entry )
+           continue;
+
+       if ( entry->haspos )
+           pos[i] = (uint16*)_POSDATAPTR(t,entry);
+       else
+           pos[i] = (uint16*)POSNULL;
+
+
+       dimt = *(uint16*)(pos[i]);
+       post = (WordEntryPos*)(pos[i]+1);
+       for( k=0; k
+           if ( !pos[k] ) continue;
+           lenct = *(uint16*)(pos[k]);
+           ct = (WordEntryPos*)(pos[k]+1);
+           for(l=0; l
+               for(p=0; p
+                   dist = abs( post[l].pos - ct[p].pos );
+                   if ( dist || (dist==0 && (pos[i]==(uint16*)POSNULL || pos[k]==(uint16*)POSNULL) ) ) {
+                       float curw; 
+                       if ( !dist ) dist=MAXENTRYPOS;  
+                       curw= sqrt( wpos(&(post[l])) * wpos( &(ct[p]) ) * word_distance(dist) );
+                       res = ( res < 0 ) ? curw : 1.0 - ( 1.0 - res ) * ( 1.0 - curw );
+                   }
+               }
+           }
+       }
+   }
+   pfree(pos);
+   return res; 
+}
+
+static float
+calc_rank_or(float *w, tsvector *t, QUERYTYPE *q) {
+   WordEntry *entry;
+   WordEntryPos    *post;
+   int4    dimt,j,i;
+   float res=-1.0;
+   ITEM    *item=GETQUERY(q);
+
+   *(uint16*)POSNULL = lengthof(POSNULL)-1;
+
+   for(i=0; isize; i++) {
+       if ( item[i].type != VAL )
+           continue;
+
+       entry=find_wordentry(t,q,&(item[i]));
+       if ( !entry )
+           continue;
+
+       if ( entry->haspos ) {
+           dimt = POSDATALEN(t,entry);
+           post = POSDATAPTR(t,entry);
+       } else {
+           dimt = *(uint16*)POSNULL;
+           post = POSNULL+1;
+       }
+
+       for(j=0;j
+           if ( res < 0 )
+               res = wpos( &(post[j]) );
+           else
+               res = 1.0 - ( 1.0-res ) * ( 1.0-wpos( &(post[j]) ) );
+       }
+   }
+   return res;
+}
+
+static float
+calc_rank(float *w, tsvector *t, QUERYTYPE *q, int4 method) {
+   ITEM *item = GETQUERY(q);
+   float res=0.0;
+
+   if (!t->size || !q->size)
+       return 0.0;
+
+   res = ( item->type != VAL && item->val == (int4) '&' ) ?
+       calc_rank_and(w,t,q) : calc_rank_or(w,t,q);
+
+   if ( res < 0 )
+       res = 1e-20;
+
+        switch(method) {
+       case 0: break;
+       case 1: res /= log((float)cnt_length(t)); break;
+       case 2: res /= (float)cnt_length(t); break;
+       default:
+       elog(ERROR,"Unknown normalization method: %d",method);
+        }
+
+   return res;
+}
+
+Datum
+rank(PG_FUNCTION_ARGS) {
+   ArrayType  *win = (ArrayType *) PG_DETOAST_DATUM(PG_GETARG_DATUM(0));
+   tsvector       *txt = (tsvector *) PG_DETOAST_DATUM(PG_GETARG_DATUM(1));
+   QUERYTYPE  *query = (QUERYTYPE *) PG_DETOAST_DATUM(PG_GETARG_DATUM(2));
+   int method=DEF_NORM_METHOD;
+   float res=0.0;
+   float ws[ lengthof(weights) ];
+   int i;
+
+   if ( ARR_NDIM(win) != 1 ) 
+       elog(ERROR,"Array of weight is not one dimentional");
+   if ( ARRNELEMS(win) < lengthof(weights) )
+        elog(ERROR,"Array of weight is too short");
+
+   for(i=0;i
+       ws[ i ] = ( ((float4*)ARR_DATA_PTR(win))[i] >= 0 ) ? ((float4*)ARR_DATA_PTR(win))[i] : weights[i];
+       if ( ws[ i ] > 1.0 ) 
+           elog(ERROR,"Weight out of range");
+   } 
+
+   if ( PG_NARGS() == 4 )
+       method=PG_GETARG_INT32(3);
+
+   res=calc_rank(ws, txt, query, method); 
+       
+   PG_FREE_IF_COPY(win, 0);
+   PG_FREE_IF_COPY(txt, 1);
+   PG_FREE_IF_COPY(query, 2);
+   PG_RETURN_FLOAT4(res);
+}
+
+Datum
+rank_def(PG_FUNCTION_ARGS) {
+   tsvector       *txt = (tsvector *) PG_DETOAST_DATUM(PG_GETARG_DATUM(0));
+   QUERYTYPE  *query = (QUERYTYPE *) PG_DETOAST_DATUM(PG_GETARG_DATUM(1));
+   float res=0.0;
+   int method=DEF_NORM_METHOD;
+
+   if ( PG_NARGS() == 3 )
+       method=PG_GETARG_INT32(2);
+
+   res=calc_rank(weights, txt, query, method); 
+       
+   PG_FREE_IF_COPY(txt, 0);
+   PG_FREE_IF_COPY(query, 1);
+   PG_RETURN_FLOAT4(res);
+}
+
+
+typedef struct {
+   ITEM    *item;
+   int32   pos;
+} DocRepresentation;
+
+static int
+compareDocR(const void *a, const void *b) {
+   if ( ((DocRepresentation *) a)->pos == ((DocRepresentation *) b)->pos )
+       return 1;
+   return ( ((DocRepresentation *) a)->pos > ((DocRepresentation *) b)->pos ) ? 1 : -1;
+}
+
+
+typedef struct {
+   DocRepresentation *doc;
+   int len;
+}  ChkDocR;
+
+static bool
+checkcondition_DR(void *checkval, ITEM *val) {
+   DocRepresentation *ptr = ((ChkDocR*)checkval)->doc;
+
+   while( ptr - ((ChkDocR*)checkval)->doc < ((ChkDocR*)checkval)->len ) {
+       if ( val == ptr->item )
+           return true;
+       ptr++;
+   }   
+
+   return false;
+}
+
+
+static bool
+Cover(DocRepresentation *doc, int len, QUERYTYPE *query, int *pos, int *p, int *q) {
+   int i;
+   DocRepresentation   *ptr,*f=(DocRepresentation*)0xffffffff;
+   ITEM    *item=GETQUERY(query);
+   int lastpos=*pos;
+   int oldq=*q;
+
+   *p=0x7fffffff;
+   *q=0;
+
+   for(i=0; isize; i++) {
+       if ( item->type != VAL ) {
+           item++;
+           continue;
+       }
+       ptr = doc + *pos;
+
+       while(ptr-doc
+           if ( ptr->item == item ) {
+               if ( ptr->pos > *q ) {
+                   *q = ptr->pos;
+                   lastpos= ptr - doc;
+               } 
+               break;
+           } 
+           ptr++;
+       }
+
+       item++;
+   }
+
+   if (*q==0 )
+       return false;
+
+   if (*q==oldq) { /* already check this pos */
+       (*pos)++;
+       return Cover(doc, len, query, pos,p,q);
+   } 
+
+   item=GETQUERY(query);
+   for(i=0; isize; i++) {
+       if ( item->type != VAL ) {
+           item++;
+           continue;
+       }
+       ptr = doc + lastpos;
+
+       while(ptr>=doc+*pos) {
+           if ( ptr->item == item ) {
+               if ( ptr->pos < *p ) {
+                   *p = ptr->pos;
+                   f=ptr;
+               }
+               break;
+           }
+           ptr--;
+       }
+       item++;
+   }
+ 
+   if ( *p<=*q ) {
+       ChkDocR ch = { f, (doc + lastpos)-f+1 };
+       *pos = f-doc+1;
+       if ( TS_execute(GETQUERY(query), &ch, false, checkcondition_DR) ) { 
+ /*elog(NOTICE,"OP:%d NP:%d P:%d Q:%d", *pos, lastpos, *p, *q);*/ 
+           return true;
+       } else
+           return Cover(doc, len, query, pos,p,q); 
+   }
+ 
+   return false;
+}
+
+static DocRepresentation*
+get_docrep(tsvector     *txt, QUERYTYPE  *query, int *doclen) {
+   ITEM    *item=GETQUERY(query);
+   WordEntry *entry;
+   WordEntryPos    *post;
+   int4    dimt,j,i;
+   int len=query->size*4,cur=0;
+   DocRepresentation *doc;
+
+   *(uint16*)POSNULL = lengthof(POSNULL)-1;
+   doc = (DocRepresentation*)palloc(sizeof(DocRepresentation)*len);
+   for(i=0; isize; i++) {
+       if ( item[i].type != VAL )
+           continue;
+
+       entry=find_wordentry(txt,query,&(item[i]));
+       if ( !entry )
+           continue;
+
+       if ( entry->haspos ) {
+           dimt = POSDATALEN(txt,entry);
+           post = POSDATAPTR(txt,entry);
+       } else {
+           dimt = *(uint16*)POSNULL;
+           post = POSNULL+1;
+       }
+
+       while( cur+dimt >= len ) {
+           len*=2;
+           doc = (DocRepresentation*)repalloc(doc,sizeof(DocRepresentation)*len);
+       }
+
+       for(j=0;j
+           doc[cur].item=&(item[i]);
+           doc[cur].pos=post[j].pos;
+           cur++;
+       }
+   }
+
+   *doclen=cur;
+   
+   if ( cur>0 ) {
+       if ( cur>1 ) 
+           qsort((void *) doc, cur, sizeof(DocRepresentation), compareDocR);
+       return doc;
+   }
+   
+   pfree(doc);
+   return NULL;
+}
+
+
+Datum
+rank_cd(PG_FUNCTION_ARGS) {
+   int K = PG_GETARG_INT32(0);
+   tsvector       *txt = (tsvector *) PG_DETOAST_DATUM(PG_GETARG_DATUM(1));
+   QUERYTYPE  *query = (QUERYTYPE *) PG_DETOAST_DATUM(PG_GETARG_DATUM(2));
+   int method=DEF_NORM_METHOD;
+   DocRepresentation   *doc;
+   float   res=0.0;
+   int p=0,q=0,len,cur;
+
+   doc = get_docrep(txt, query, &len);
+   if ( !doc ) {
+       PG_FREE_IF_COPY(txt, 1);
+       PG_FREE_IF_COPY(query, 2);
+       PG_RETURN_FLOAT4(0.0);
+   }
+
+   cur=0;
+   if (K<=0)
+       K=4;    
+   while( Cover(doc, len, query, &cur, &p, &q) ) 
+       res += ( q-p+1 > K ) ? ((float)K)/((float)(q-p+1)) : 1.0;
+
+   if ( PG_NARGS() == 4 )
+       method=PG_GETARG_INT32(3);
+
+        switch(method) {
+       case 0: break;
+       case 1: res /= log((float)cnt_length(txt)); break;
+       case 2: res /= (float)cnt_length(txt); break;
+       default:
+       elog(ERROR,"Unknown normalization method: %d",method);
+        }
+
+   pfree(doc);
+   PG_FREE_IF_COPY(txt, 1);
+   PG_FREE_IF_COPY(query, 2);
+
+   PG_RETURN_FLOAT4(res);
+}
+
+
+Datum
+rank_cd_def(PG_FUNCTION_ARGS) {
+   PG_RETURN_DATUM( DirectFunctionCall4(   
+       rank_cd,
+       Int32GetDatum(-1),
+       PG_GETARG_DATUM(0),
+       PG_GETARG_DATUM(1),
+       ( PG_NARGS() == 3 ) ? PG_GETARG_DATUM(2) : Int32GetDatum(DEF_NORM_METHOD)
+   )); 
+}
+
+/**************debug*************/
+
+typedef struct {
+   char    *w;
+   int2    len;
+   int2    pos;
+   int2    start;
+   int2    finish;
+} DocWord;
+
+static int
+compareDocWord(const void *a, const void *b) {
+   if ( ((DocWord *) a)->pos == ((DocWord *) b)->pos )
+       return 1;
+   return ( ((DocWord *) a)->pos > ((DocWord *) b)->pos ) ? 1 : -1;
+}
+
+
+Datum 
+get_covers(PG_FUNCTION_ARGS) {
+   tsvector     *txt = (tsvector *) PG_DETOAST_DATUM(PG_GETARG_DATUM(0));
+   QUERYTYPE  *query = (QUERYTYPE *) PG_DETOAST_DATUM(PG_GETARG_DATUM(1));
+   WordEntry       *pptr=ARRPTR(txt);
+   int i,dlen=0,j,cur=0,len=0,rlen;
+   DocWord *dw,*dwptr;
+   text    *out;
+   char *cptr;
+   DocRepresentation *doc;
+   int pos=0,p,q,olddwpos=0;
+   int ncover=1;
+
+   doc = get_docrep(txt, query, &rlen);
+
+   if ( !doc ) {
+       out=palloc(VARHDRSZ);
+       VARATT_SIZEP(out) = VARHDRSZ;
+       PG_FREE_IF_COPY(txt,0);
+       PG_FREE_IF_COPY(query,1);
+       PG_RETURN_POINTER(out);
+   }
+
+   for(i=0;isize;i++) {
+       if (!pptr[i].haspos)
+           elog(ERROR,"No pos info");
+        dlen += POSDATALEN(txt,&(pptr[i]));
+   }
+
+   dwptr=dw=palloc(sizeof(DocWord)*dlen);
+   memset(dw,0,sizeof(DocWord)*dlen);
+
+   for(i=0;isize;i++) {
+       WordEntryPos    *posdata = POSDATAPTR(txt,&(pptr[i]));
+       for(j=0;j
+           dw[cur].w=STRPTR(txt)+pptr[i].pos;  
+           dw[cur].len=pptr[i].len;    
+           dw[cur].pos=posdata[j].pos;
+           cur++;
+       }
+       len+=(pptr[i].len + 1) * (int)POSDATALEN(txt,&(pptr[i]));
+   }
+   qsort((void *) dw, dlen, sizeof(DocWord), compareDocWord);
+
+   while( Cover(doc, rlen, query, &pos, &p, &q) ) {
+       dwptr=dw+olddwpos;
+       while(dwptr->pos < p && dwptr-dw
+           dwptr++;
+       olddwpos=dwptr-dw;
+       dwptr->start=ncover;
+       while(dwptr->pos < q+1 && dwptr-dw
+           dwptr++;
+       (dwptr-1)->finish=ncover;
+       len+= 4 /* {}+two spaces */ + 2*16 /*numbers*/;
+       ncover++; 
+   } 
+   
+   out=palloc(VARHDRSZ+len);
+   cptr=((char*)out)+VARHDRSZ;
+   dwptr=dw;
+
+   while( dwptr-dw < dlen) {
+       if ( dwptr->start ) {
+           sprintf(cptr,"{%d ",dwptr->start);
+           cptr=strchr(cptr,'\0');
+       }
+       memcpy(cptr,dwptr->w,dwptr->len);
+       cptr+=dwptr->len;
+       *cptr=' ';
+       cptr++;
+       if ( dwptr->finish ) { 
+           sprintf(cptr,"}%d ",dwptr->finish);
+           cptr=strchr(cptr,'\0');
+       }
+       dwptr++;
+   }   
+
+   VARATT_SIZEP(out) = cptr - ((char*)out);
+   
+   pfree(dw);
+   pfree(doc);
+
+   PG_FREE_IF_COPY(txt,0);
+   PG_FREE_IF_COPY(query,1);
+   PG_RETURN_POINTER(out);
+}
+


diff --git a/contrib/tsearch2/rewrite.c b/contrib/tsearch2/rewrite.c

new file mode 100644 (file)

index 0000000..d5bc0f6


--- /dev/null
+++ b/contrib/tsearch2/rewrite.c
@@ -0,0 +1,292 @@
+/*
+ * Rewrite routines of query tree
+ * Teodor Sigaev 
+ */
+
+#include "postgres.h"
+
+#include 
+
+#include "access/gist.h"
+#include "access/itup.h"
+#include "access/rtree.h"
+#include "utils/elog.h"
+#include "utils/palloc.h"
+#include "utils/array.h"
+#include "utils/builtins.h"
+#include "storage/bufpage.h"
+
+#include "query.h"
+#include "rewrite.h"
+
+typedef struct NODE
+{
+   struct NODE *left;
+   struct NODE *right;
+   ITEM       *valnode;
+}  NODE;
+
+/*
+ * make query tree from plain view of query
+ */
+static NODE *
+maketree(ITEM * in)
+{
+   NODE       *node = (NODE *) palloc(sizeof(NODE));
+
+   node->valnode = in;
+   node->right = node->left = NULL;
+   if (in->type == OPR)
+   {
+       node->right = maketree(in + 1);
+       if (in->val != (int4) '!')
+           node->left = maketree(in + in->left);
+   }
+   return node;
+}
+
+typedef struct
+{
+   ITEM       *ptr;
+   int4        len;
+   int4        cur;
+}  PLAINTREE;
+
+static void
+plainnode(PLAINTREE * state, NODE * node)
+{
+   if (state->cur == state->len)
+   {
+       state->len *= 2;
+       state->ptr = (ITEM *) repalloc((void *) state->ptr, state->len * sizeof(ITEM));
+   }
+   memcpy((void *) &(state->ptr[state->cur]), (void *) node->valnode, sizeof(ITEM));
+   if (node->valnode->type == VAL)
+       state->cur++;
+   else if (node->valnode->val == (int4) '!')
+   {
+       state->ptr[state->cur].left = 1;
+       state->cur++;
+       plainnode(state, node->right);
+   }
+   else
+   {
+       int4        cur = state->cur;
+
+       state->cur++;
+       plainnode(state, node->right);
+       state->ptr[cur].left = state->cur - cur;
+       plainnode(state, node->left);
+   }
+   pfree(node);
+}
+
+/*
+ * make plain view of tree from 'normal' view of tree
+ */
+static ITEM *
+plaintree(NODE * root, int4 *len)
+{
+   PLAINTREE   pl;
+
+   pl.cur = 0;
+   pl.len = 16;
+   if (root && (root->valnode->type == VAL || root->valnode->type == OPR))
+   {
+       pl.ptr = (ITEM *) palloc(pl.len * sizeof(ITEM));
+       plainnode(&pl, root);
+   }
+   else
+       pl.ptr = NULL;
+   *len = pl.cur;
+   return pl.ptr;
+}
+
+static void
+freetree(NODE * node)
+{
+   if (!node)
+       return;
+   if (node->left)
+       freetree(node->left);
+   if (node->right)
+       freetree(node->right);
+   pfree(node);
+}
+
+/*
+ * clean tree for ! operator.
+ * It's usefull for debug, but in
+ * other case, such view is used with search in index.
+ * Operator ! always return TRUE
+ */
+static NODE *
+clean_NOT_intree(NODE * node)
+{
+   if (node->valnode->type == VAL)
+       return node;
+
+   if (node->valnode->val == (int4) '!')
+   {
+       freetree(node);
+       return NULL;
+   }
+
+   /* operator & or | */
+   if (node->valnode->val == (int4) '|')
+   {
+       if ((node->left = clean_NOT_intree(node->left)) == NULL ||
+           (node->right = clean_NOT_intree(node->right)) == NULL)
+       {
+           freetree(node);
+           return NULL;
+       }
+   }
+   else
+   {
+       NODE       *res = node;
+
+       node->left = clean_NOT_intree(node->left);
+       node->right = clean_NOT_intree(node->right);
+       if (node->left == NULL && node->right == NULL)
+       {
+           pfree(node);
+           res = NULL;
+       }
+       else if (node->left == NULL)
+       {
+           res = node->right;
+           pfree(node);
+       }
+       else if (node->right == NULL)
+       {
+           res = node->left;
+           pfree(node);
+       }
+       return res;
+   }
+   return node;
+}
+
+ITEM *
+clean_NOT_v2(ITEM * ptr, int4 *len)
+{
+   NODE       *root = maketree(ptr);
+
+   return plaintree(clean_NOT_intree(root), len);
+}
+
+#define V_UNKNOWN  0
+#define V_TRUE     1
+#define V_FALSE        2
+
+/*
+ * Clean query tree from values which is always in
+ * text (stopword)
+ */
+static NODE *
+clean_fakeval_intree(NODE * node, char *result)
+{
+   char        lresult = V_UNKNOWN,
+               rresult = V_UNKNOWN;
+
+   if (node->valnode->type == VAL)
+       return node;
+   else if (node->valnode->type == VALTRUE)
+   {
+       pfree(node);
+       *result = V_TRUE;
+       return NULL;
+   }
+
+
+   if (node->valnode->val == (int4) '!')
+   {
+       node->right = clean_fakeval_intree(node->right, &rresult);
+       if (!node->right)
+       {
+           *result = (rresult == V_TRUE) ? V_FALSE : V_TRUE;
+           freetree(node);
+           return NULL;
+       }
+   }
+   else if (node->valnode->val == (int4) '|')
+   {
+       NODE       *res = node;
+
+       node->left = clean_fakeval_intree(node->left, &lresult);
+       node->right = clean_fakeval_intree(node->right, &rresult);
+       if (lresult == V_TRUE || rresult == V_TRUE)
+       {
+           freetree(node);
+           *result = V_TRUE;
+           return NULL;
+       }
+       else if (lresult == V_FALSE && rresult == V_FALSE)
+       {
+           freetree(node);
+           *result = V_FALSE;
+           return NULL;
+       }
+       else if (lresult == V_FALSE)
+       {
+           res = node->right;
+           pfree(node);
+       }
+       else if (rresult == V_FALSE)
+       {
+           res = node->left;
+           pfree(node);
+       }
+       return res;
+   }
+   else
+   {
+       NODE       *res = node;
+
+       node->left = clean_fakeval_intree(node->left, &lresult);
+       node->right = clean_fakeval_intree(node->right, &rresult);
+       if (lresult == V_FALSE || rresult == V_FALSE)
+       {
+           freetree(node);
+           *result = V_FALSE;
+           return NULL;
+       }
+       else if (lresult == V_TRUE && rresult == V_TRUE)
+       {
+           freetree(node);
+           *result = V_TRUE;
+           return NULL;
+       }
+       else if (lresult == V_TRUE)
+       {
+           res = node->right;
+           pfree(node);
+       }
+       else if (rresult == V_TRUE)
+       {
+           res = node->left;
+           pfree(node);
+       }
+       return res;
+   }
+   return node;
+}
+
+ITEM *
+clean_fakeval_v2(ITEM * ptr, int4 *len)
+{
+   NODE       *root = maketree(ptr);
+   char        result = V_UNKNOWN;
+   NODE       *resroot;
+
+   resroot = clean_fakeval_intree(root, &result);
+   if (result != V_UNKNOWN)
+   {
+       elog(NOTICE, "Query contains only stopword(s) or doesn't contain lexem(s), ignored");
+       *len = 0;
+       return NULL;
+   }
+
+   return plaintree(resroot, len);
+}


diff --git a/contrib/tsearch2/rewrite.h b/contrib/tsearch2/rewrite.h

new file mode 100644 (file)

index 0000000..d47788a


--- /dev/null
+++ b/contrib/tsearch2/rewrite.h
@@ -0,0 +1,7 @@
+#ifndef __REWRITE_H__
+#define __REWRITE_H__
+
+ITEM      *clean_NOT_v2(ITEM * ptr, int4 *len);
+ITEM      *clean_fakeval_v2(ITEM * ptr, int4 *len);
+
+#endif


diff --git a/contrib/tsearch2/snmap.c b/contrib/tsearch2/snmap.c

new file mode 100644 (file)

index 0000000..fe138ad


--- /dev/null
+++ b/contrib/tsearch2/snmap.c
@@ -0,0 +1,75 @@
+/* 
+ * simple but fast map from str to Oid
+ * Teodor Sigaev 
+ */
+#include 
+#include 
+#include 
+
+#include "postgres.h"
+#include "snmap.h"
+#include "common.h"
+
+static int
+compareSNMapEntry(const void *a, const void *b) {
+   return strcmp( ((SNMapEntry*)a)->key, ((SNMapEntry*)b)->key );
+}
+
+void 
+addSNMap( SNMap *map, char *key, Oid value ) {
+   if (map->len>=map->reallen) {
+       SNMapEntry *tmp;
+       int len = (map->reallen) ? 2*map->reallen : 16;
+       tmp=(SNMapEntry*)realloc(map->list, sizeof(SNMapEntry) * len);
+       if ( !tmp )
+           elog(ERROR, "No memory");
+       map->reallen=len;
+       map->list=tmp;
+   }
+   map->list[ map->len ].key = strdup(key);
+   if ( ! map->list[ map->len ].key )
+       elog(ERROR, "No memory");
+   map->list[ map->len ].value=value;
+   map->len++;
+   if ( map->len>1 ) qsort(map->list, map->len, sizeof(SNMapEntry), compareSNMapEntry);
+}
+
+void 
+addSNMap_t( SNMap *map, text *key, Oid value ) {
+   char *k=text2char( key );
+   addSNMap(map, k, value);
+   pfree(k);
+}
+
+Oid 
+findSNMap( SNMap *map, char *key ) {
+   SNMapEntry *ptr;
+   SNMapEntry ks = {key, 0};
+   if ( map->len==0 || !map->list )
+       return 0;   
+   ptr = (SNMapEntry*) bsearch(&ks, map->list, map->len, sizeof(SNMapEntry), compareSNMapEntry);
+   return (ptr) ? ptr->value : 0;
+}
+
+Oid  
+findSNMap_t( SNMap *map, text *key ) {
+   char *k=text2char(key);
+   int res;
+   res= findSNMap(map, k);
+   pfree(k);
+   return res;
+}
+
+void freeSNMap( SNMap *map ) {
+   SNMapEntry *entry=map->list;
+   if ( map->list ) {
+       while( map->len ) {
+           if ( entry->key ) free(entry->key);
+           entry++; map->len--;
+       }
+       free( map->list );
+   }
+   memset(map,0,sizeof(SNMap));
+}
+
+


diff --git a/contrib/tsearch2/snmap.h b/contrib/tsearch2/snmap.h

new file mode 100644 (file)

index 0000000..b485601


--- /dev/null
+++ b/contrib/tsearch2/snmap.h
@@ -0,0 +1,23 @@
+#ifndef __SNMAP_H__
+#define __SNMAP_H__
+
+#include "postgres.h"
+
+typedef struct {
+   char    *key;
+   Oid value;
+} SNMapEntry;
+
+typedef struct {
+   int len;
+   int reallen;
+   SNMapEntry  *list;
+} SNMap;
+
+void addSNMap( SNMap *map, char *key, Oid value );
+void addSNMap_t( SNMap *map, text *key, Oid value );
+Oid findSNMap( SNMap *map, char *key );
+Oid findSNMap_t( SNMap *map, text *key );
+void freeSNMap( SNMap *map );
+
+#endif


diff --git a/contrib/tsearch2/snowball/api.c b/contrib/tsearch2/snowball/api.c

new file mode 100644 (file)

index 0000000..c9019ce


--- /dev/null
+++ b/contrib/tsearch2/snowball/api.c
@@ -0,0 +1,48 @@
+
+#include "header.h"
+
+extern struct SN_env * SN_create_env(int S_size, int I_size, int B_size)
+{   struct SN_env * z = (struct SN_env *) calloc(1, sizeof(struct SN_env));
+    z->p = create_s();
+    if (S_size)
+    {   z->S = (symbol * *) calloc(S_size, sizeof(symbol *));
+        {   int i;
+            for (i = 0; i < S_size; i++) z->S[i] = create_s();
+        }
+        z->S_size = S_size;
+    }
+
+    if (I_size)
+    {   z->I = (int *) calloc(I_size, sizeof(int));
+        z->I_size = I_size;
+    }
+
+    if (B_size)
+    {   z->B = (symbol *) calloc(B_size, sizeof(symbol));
+        z->B_size = B_size;
+    }
+
+    return z;
+}
+
+extern void SN_close_env(struct SN_env * z)
+{
+    if (z->S_size)
+    {
+        {   int i;
+            for (i = 0; i < z->S_size; i++) lose_s(z->S[i]);
+        }
+        free(z->S);
+    }
+    if (z->I_size) free(z->I);
+    if (z->B_size) free(z->B);
+    if (z->p) lose_s(z->p);
+    free(z);
+}
+
+extern void SN_set_current(struct SN_env * z, int size, const symbol * s)
+{
+    replace_s(z, 0, z->l, size, s);
+    z->c = 0;
+}
+


diff --git a/contrib/tsearch2/snowball/api.h b/contrib/tsearch2/snowball/api.h

new file mode 100644 (file)

index 0000000..3e8b6e1


--- /dev/null
+++ b/contrib/tsearch2/snowball/api.h
@@ -0,0 +1,27 @@
+
+typedef unsigned char symbol;
+
+/* Or replace 'char' above with 'short' for 16 bit characters.
+
+   More precisely, replace 'char' with whatever type guarantees the
+   character width you need. Note however that sizeof(symbol) should divide
+   HEAD, defined in header.h as 2*sizeof(int), without remainder, otherwise
+   there is an alignment problem. In the unlikely event of a problem here,
+   consult Martin Porter.
+
+*/
+
+struct SN_env {
+    symbol * p;
+    int c; int a; int l; int lb; int bra; int ket;
+    int S_size; int I_size; int B_size;
+    symbol * * S;
+    int * I;
+    symbol * B;
+};
+
+extern struct SN_env * SN_create_env(int S_size, int I_size, int B_size);
+extern void SN_close_env(struct SN_env * z);
+
+extern void SN_set_current(struct SN_env * z, int size, const symbol * s);
+


diff --git a/contrib/tsearch2/snowball/english_stem.c b/contrib/tsearch2/snowball/english_stem.c

new file mode 100644 (file)

index 0000000..6715c7c


--- /dev/null
+++ b/contrib/tsearch2/snowball/english_stem.c
@@ -0,0 +1,894 @@
+
+/* This file was generated automatically by the Snowball to ANSI C compiler */
+
+#include "header.h"
+
+extern int english_stem(struct SN_env * z);
+static int r_exception2(struct SN_env * z);
+static int r_exception1(struct SN_env * z);
+static int r_Step_5(struct SN_env * z);
+static int r_Step_4(struct SN_env * z);
+static int r_Step_3(struct SN_env * z);
+static int r_Step_2(struct SN_env * z);
+static int r_Step_1c(struct SN_env * z);
+static int r_Step_1b(struct SN_env * z);
+static int r_Step_1a(struct SN_env * z);
+static int r_R2(struct SN_env * z);
+static int r_R1(struct SN_env * z);
+static int r_shortv(struct SN_env * z);
+static int r_mark_regions(struct SN_env * z);
+static int r_postlude(struct SN_env * z);
+static int r_prelude(struct SN_env * z);
+
+extern struct SN_env * english_create_env(void);
+extern void english_close_env(struct SN_env * z);
+
+static symbol s_0_0[5] = { 'g', 'e', 'n', 'e', 'r' };
+
+static struct among a_0[1] =
+{
+/*  0 */ { 5, s_0_0, -1, -1, 0}
+};
+
+static symbol s_1_0[3] = { 'i', 'e', 'd' };
+static symbol s_1_1[1] = { 's' };
+static symbol s_1_2[3] = { 'i', 'e', 's' };
+static symbol s_1_3[4] = { 's', 's', 'e', 's' };
+static symbol s_1_4[2] = { 's', 's' };
+static symbol s_1_5[2] = { 'u', 's' };
+
+static struct among a_1[6] =
+{
+/*  0 */ { 3, s_1_0, -1, 2, 0},
+/*  1 */ { 1, s_1_1, -1, 3, 0},
+/*  2 */ { 3, s_1_2, 1, 2, 0},
+/*  3 */ { 4, s_1_3, 1, 1, 0},
+/*  4 */ { 2, s_1_4, 1, -1, 0},
+/*  5 */ { 2, s_1_5, 1, -1, 0}
+};
+
+static symbol s_2_1[2] = { 'b', 'b' };
+static symbol s_2_2[2] = { 'd', 'd' };
+static symbol s_2_3[2] = { 'f', 'f' };
+static symbol s_2_4[2] = { 'g', 'g' };
+static symbol s_2_5[2] = { 'b', 'l' };
+static symbol s_2_6[2] = { 'm', 'm' };
+static symbol s_2_7[2] = { 'n', 'n' };
+static symbol s_2_8[2] = { 'p', 'p' };
+static symbol s_2_9[2] = { 'r', 'r' };
+static symbol s_2_10[2] = { 'a', 't' };
+static symbol s_2_11[2] = { 't', 't' };
+static symbol s_2_12[2] = { 'i', 'z' };
+
+static struct among a_2[13] =
+{
+/*  0 */ { 0, 0, -1, 3, 0},
+/*  1 */ { 2, s_2_1, 0, 2, 0},
+/*  2 */ { 2, s_2_2, 0, 2, 0},
+/*  3 */ { 2, s_2_3, 0, 2, 0},
+/*  4 */ { 2, s_2_4, 0, 2, 0},
+/*  5 */ { 2, s_2_5, 0, 1, 0},
+/*  6 */ { 2, s_2_6, 0, 2, 0},
+/*  7 */ { 2, s_2_7, 0, 2, 0},
+/*  8 */ { 2, s_2_8, 0, 2, 0},
+/*  9 */ { 2, s_2_9, 0, 2, 0},
+/* 10 */ { 2, s_2_10, 0, 1, 0},
+/* 11 */ { 2, s_2_11, 0, 2, 0},
+/* 12 */ { 2, s_2_12, 0, 1, 0}
+};
+
+static symbol s_3_0[2] = { 'e', 'd' };
+static symbol s_3_1[3] = { 'e', 'e', 'd' };
+static symbol s_3_2[3] = { 'i', 'n', 'g' };
+static symbol s_3_3[4] = { 'e', 'd', 'l', 'y' };
+static symbol s_3_4[5] = { 'e', 'e', 'd', 'l', 'y' };
+static symbol s_3_5[5] = { 'i', 'n', 'g', 'l', 'y' };
+
+static struct among a_3[6] =
+{
+/*  0 */ { 2, s_3_0, -1, 2, 0},
+/*  1 */ { 3, s_3_1, 0, 1, 0},
+/*  2 */ { 3, s_3_2, -1, 2, 0},
+/*  3 */ { 4, s_3_3, -1, 2, 0},
+/*  4 */ { 5, s_3_4, 3, 1, 0},
+/*  5 */ { 5, s_3_5, -1, 2, 0}
+};
+
+static symbol s_4_0[4] = { 'a', 'n', 'c', 'i' };
+static symbol s_4_1[4] = { 'e', 'n', 'c', 'i' };
+static symbol s_4_2[3] = { 'o', 'g', 'i' };
+static symbol s_4_3[2] = { 'l', 'i' };
+static symbol s_4_4[3] = { 'b', 'l', 'i' };
+static symbol s_4_5[4] = { 'a', 'b', 'l', 'i' };
+static symbol s_4_6[4] = { 'a', 'l', 'l', 'i' };
+static symbol s_4_7[5] = { 'f', 'u', 'l', 'l', 'i' };
+static symbol s_4_8[6] = { 'l', 'e', 's', 's', 'l', 'i' };
+static symbol s_4_9[5] = { 'o', 'u', 's', 'l', 'i' };
+static symbol s_4_10[5] = { 'e', 'n', 't', 'l', 'i' };
+static symbol s_4_11[5] = { 'a', 'l', 'i', 't', 'i' };
+static symbol s_4_12[6] = { 'b', 'i', 'l', 'i', 't', 'i' };
+static symbol s_4_13[5] = { 'i', 'v', 'i', 't', 'i' };
+static symbol s_4_14[6] = { 't', 'i', 'o', 'n', 'a', 'l' };
+static symbol s_4_15[7] = { 'a', 't', 'i', 'o', 'n', 'a', 'l' };
+static symbol s_4_16[5] = { 'a', 'l', 'i', 's', 'm' };
+static symbol s_4_17[5] = { 'a', 't', 'i', 'o', 'n' };
+static symbol s_4_18[7] = { 'i', 'z', 'a', 't', 'i', 'o', 'n' };
+static symbol s_4_19[4] = { 'i', 'z', 'e', 'r' };
+static symbol s_4_20[4] = { 'a', 't', 'o', 'r' };
+static symbol s_4_21[7] = { 'i', 'v', 'e', 'n', 'e', 's', 's' };
+static symbol s_4_22[7] = { 'f', 'u', 'l', 'n', 'e', 's', 's' };
+static symbol s_4_23[7] = { 'o', 'u', 's', 'n', 'e', 's', 's' };
+
+static struct among a_4[24] =
+{
+/*  0 */ { 4, s_4_0, -1, 3, 0},
+/*  1 */ { 4, s_4_1, -1, 2, 0},
+/*  2 */ { 3, s_4_2, -1, 13, 0},
+/*  3 */ { 2, s_4_3, -1, 16, 0},
+/*  4 */ { 3, s_4_4, 3, 12, 0},
+/*  5 */ { 4, s_4_5, 4, 4, 0},
+/*  6 */ { 4, s_4_6, 3, 8, 0},
+/*  7 */ { 5, s_4_7, 3, 14, 0},
+/*  8 */ { 6, s_4_8, 3, 15, 0},
+/*  9 */ { 5, s_4_9, 3, 10, 0},
+/* 10 */ { 5, s_4_10, 3, 5, 0},
+/* 11 */ { 5, s_4_11, -1, 8, 0},
+/* 12 */ { 6, s_4_12, -1, 12, 0},
+/* 13 */ { 5, s_4_13, -1, 11, 0},
+/* 14 */ { 6, s_4_14, -1, 1, 0},
+/* 15 */ { 7, s_4_15, 14, 7, 0},
+/* 16 */ { 5, s_4_16, -1, 8, 0},
+/* 17 */ { 5, s_4_17, -1, 7, 0},
+/* 18 */ { 7, s_4_18, 17, 6, 0},
+/* 19 */ { 4, s_4_19, -1, 6, 0},
+/* 20 */ { 4, s_4_20, -1, 7, 0},
+/* 21 */ { 7, s_4_21, -1, 11, 0},
+/* 22 */ { 7, s_4_22, -1, 9, 0},
+/* 23 */ { 7, s_4_23, -1, 10, 0}
+};
+
+static symbol s_5_0[5] = { 'i', 'c', 'a', 't', 'e' };
+static symbol s_5_1[5] = { 'a', 't', 'i', 'v', 'e' };
+static symbol s_5_2[5] = { 'a', 'l', 'i', 'z', 'e' };
+static symbol s_5_3[5] = { 'i', 'c', 'i', 't', 'i' };
+static symbol s_5_4[4] = { 'i', 'c', 'a', 'l' };
+static symbol s_5_5[6] = { 't', 'i', 'o', 'n', 'a', 'l' };
+static symbol s_5_6[7] = { 'a', 't', 'i', 'o', 'n', 'a', 'l' };
+static symbol s_5_7[3] = { 'f', 'u', 'l' };
+static symbol s_5_8[4] = { 'n', 'e', 's', 's' };
+
+static struct among a_5[9] =
+{
+/*  0 */ { 5, s_5_0, -1, 4, 0},
+/*  1 */ { 5, s_5_1, -1, 6, 0},
+/*  2 */ { 5, s_5_2, -1, 3, 0},
+/*  3 */ { 5, s_5_3, -1, 4, 0},
+/*  4 */ { 4, s_5_4, -1, 4, 0},
+/*  5 */ { 6, s_5_5, -1, 1, 0},
+/*  6 */ { 7, s_5_6, 5, 2, 0},
+/*  7 */ { 3, s_5_7, -1, 5, 0},
+/*  8 */ { 4, s_5_8, -1, 5, 0}
+};
+
+static symbol s_6_0[2] = { 'i', 'c' };
+static symbol s_6_1[4] = { 'a', 'n', 'c', 'e' };
+static symbol s_6_2[4] = { 'e', 'n', 'c', 'e' };
+static symbol s_6_3[4] = { 'a', 'b', 'l', 'e' };
+static symbol s_6_4[4] = { 'i', 'b', 'l', 'e' };
+static symbol s_6_5[3] = { 'a', 't', 'e' };
+static symbol s_6_6[3] = { 'i', 'v', 'e' };
+static symbol s_6_7[3] = { 'i', 'z', 'e' };
+static symbol s_6_8[3] = { 'i', 't', 'i' };
+static symbol s_6_9[2] = { 'a', 'l' };
+static symbol s_6_10[3] = { 'i', 's', 'm' };
+static symbol s_6_11[3] = { 'i', 'o', 'n' };
+static symbol s_6_12[2] = { 'e', 'r' };
+static symbol s_6_13[3] = { 'o', 'u', 's' };
+static symbol s_6_14[3] = { 'a', 'n', 't' };
+static symbol s_6_15[3] = { 'e', 'n', 't' };
+static symbol s_6_16[4] = { 'm', 'e', 'n', 't' };
+static symbol s_6_17[5] = { 'e', 'm', 'e', 'n', 't' };
+
+static struct among a_6[18] =
+{
+/*  0 */ { 2, s_6_0, -1, 1, 0},
+/*  1 */ { 4, s_6_1, -1, 1, 0},
+/*  2 */ { 4, s_6_2, -1, 1, 0},
+/*  3 */ { 4, s_6_3, -1, 1, 0},
+/*  4 */ { 4, s_6_4, -1, 1, 0},
+/*  5 */ { 3, s_6_5, -1, 1, 0},
+/*  6 */ { 3, s_6_6, -1, 1, 0},
+/*  7 */ { 3, s_6_7, -1, 1, 0},
+/*  8 */ { 3, s_6_8, -1, 1, 0},
+/*  9 */ { 2, s_6_9, -1, 1, 0},
+/* 10 */ { 3, s_6_10, -1, 1, 0},
+/* 11 */ { 3, s_6_11, -1, 2, 0},
+/* 12 */ { 2, s_6_12, -1, 1, 0},
+/* 13 */ { 3, s_6_13, -1, 1, 0},
+/* 14 */ { 3, s_6_14, -1, 1, 0},
+/* 15 */ { 3, s_6_15, -1, 1, 0},
+/* 16 */ { 4, s_6_16, 15, 1, 0},
+/* 17 */ { 5, s_6_17, 16, 1, 0}
+};
+
+static symbol s_7_0[1] = { 'e' };
+static symbol s_7_1[1] = { 'l' };
+
+static struct among a_7[2] =
+{
+/*  0 */ { 1, s_7_0, -1, 1, 0},
+/*  1 */ { 1, s_7_1, -1, 2, 0}
+};
+
+static symbol s_8_0[7] = { 's', 'u', 'c', 'c', 'e', 'e', 'd' };
+static symbol s_8_1[7] = { 'p', 'r', 'o', 'c', 'e', 'e', 'd' };
+static symbol s_8_2[6] = { 'e', 'x', 'c', 'e', 'e', 'd' };
+static symbol s_8_3[7] = { 'c', 'a', 'n', 'n', 'i', 'n', 'g' };
+static symbol s_8_4[6] = { 'i', 'n', 'n', 'i', 'n', 'g' };
+static symbol s_8_5[7] = { 'e', 'a', 'r', 'r', 'i', 'n', 'g' };
+static symbol s_8_6[7] = { 'h', 'e', 'r', 'r', 'i', 'n', 'g' };
+static symbol s_8_7[6] = { 'o', 'u', 't', 'i', 'n', 'g' };
+
+static struct among a_8[8] =
+{
+/*  0 */ { 7, s_8_0, -1, -1, 0},
+/*  1 */ { 7, s_8_1, -1, -1, 0},
+/*  2 */ { 6, s_8_2, -1, -1, 0},
+/*  3 */ { 7, s_8_3, -1, -1, 0},
+/*  4 */ { 6, s_8_4, -1, -1, 0},
+/*  5 */ { 7, s_8_5, -1, -1, 0},
+/*  6 */ { 7, s_8_6, -1, -1, 0},
+/*  7 */ { 6, s_8_7, -1, -1, 0}
+};
+
+static symbol s_9_0[5] = { 'a', 'n', 'd', 'e', 's' };
+static symbol s_9_1[5] = { 'a', 't', 'l', 'a', 's' };
+static symbol s_9_2[4] = { 'b', 'i', 'a', 's' };
+static symbol s_9_3[6] = { 'c', 'o', 's', 'm', 'o', 's' };
+static symbol s_9_4[5] = { 'd', 'y', 'i', 'n', 'g' };
+static symbol s_9_5[5] = { 'e', 'a', 'r', 'l', 'y' };
+static symbol s_9_6[6] = { 'g', 'e', 'n', 't', 'l', 'y' };
+static symbol s_9_7[4] = { 'h', 'o', 'w', 'e' };
+static symbol s_9_8[4] = { 'i', 'd', 'l', 'y' };
+static symbol s_9_9[5] = { 'l', 'y', 'i', 'n', 'g' };
+static symbol s_9_10[4] = { 'n', 'e', 'w', 's' };
+static symbol s_9_11[4] = { 'o', 'n', 'l', 'y' };
+static symbol s_9_12[6] = { 's', 'i', 'n', 'g', 'l', 'y' };
+static symbol s_9_13[5] = { 's', 'k', 'i', 'e', 's' };
+static symbol s_9_14[4] = { 's', 'k', 'i', 's' };
+static symbol s_9_15[3] = { 's', 'k', 'y' };
+static symbol s_9_16[5] = { 't', 'y', 'i', 'n', 'g' };
+static symbol s_9_17[4] = { 'u', 'g', 'l', 'y' };
+
+static struct among a_9[18] =
+{
+/*  0 */ { 5, s_9_0, -1, -1, 0},
+/*  1 */ { 5, s_9_1, -1, -1, 0},
+/*  2 */ { 4, s_9_2, -1, -1, 0},
+/*  3 */ { 6, s_9_3, -1, -1, 0},
+/*  4 */ { 5, s_9_4, -1, 3, 0},
+/*  5 */ { 5, s_9_5, -1, 9, 0},
+/*  6 */ { 6, s_9_6, -1, 7, 0},
+/*  7 */ { 4, s_9_7, -1, -1, 0},
+/*  8 */ { 4, s_9_8, -1, 6, 0},
+/*  9 */ { 5, s_9_9, -1, 4, 0},
+/* 10 */ { 4, s_9_10, -1, -1, 0},
+/* 11 */ { 4, s_9_11, -1, 10, 0},
+/* 12 */ { 6, s_9_12, -1, 11, 0},
+/* 13 */ { 5, s_9_13, -1, 2, 0},
+/* 14 */ { 4, s_9_14, -1, 1, 0},
+/* 15 */ { 3, s_9_15, -1, -1, 0},
+/* 16 */ { 5, s_9_16, -1, 5, 0},
+/* 17 */ { 4, s_9_17, -1, 8, 0}
+};
+
+static unsigned char g_v[] = { 17, 65, 16, 1 };
+
+static unsigned char g_v_WXY[] = { 1, 17, 65, 208, 1 };
+
+static unsigned char g_valid_LI[] = { 55, 141, 2 };
+
+static symbol s_0[] = { 'y' };
+static symbol s_1[] = { 'Y' };
+static symbol s_2[] = { 'y' };
+static symbol s_3[] = { 'Y' };
+static symbol s_4[] = { 's', 's' };
+static symbol s_5[] = { 'i', 'e' };
+static symbol s_6[] = { 'i' };
+static symbol s_7[] = { 'e', 'e' };
+static symbol s_8[] = { 'e' };
+static symbol s_9[] = { 'e' };
+static symbol s_10[] = { 'y' };
+static symbol s_11[] = { 'Y' };
+static symbol s_12[] = { 'i' };
+static symbol s_13[] = { 't', 'i', 'o', 'n' };
+static symbol s_14[] = { 'e', 'n', 'c', 'e' };
+static symbol s_15[] = { 'a', 'n', 'c', 'e' };
+static symbol s_16[] = { 'a', 'b', 'l', 'e' };
+static symbol s_17[] = { 'e', 'n', 't' };
+static symbol s_18[] = { 'i', 'z', 'e' };
+static symbol s_19[] = { 'a', 't', 'e' };
+static symbol s_20[] = { 'a', 'l' };
+static symbol s_21[] = { 'f', 'u', 'l' };
+static symbol s_22[] = { 'o', 'u', 's' };
+static symbol s_23[] = { 'i', 'v', 'e' };
+static symbol s_24[] = { 'b', 'l', 'e' };
+static symbol s_25[] = { 'l' };
+static symbol s_26[] = { 'o', 'g' };
+static symbol s_27[] = { 'f', 'u', 'l' };
+static symbol s_28[] = { 'l', 'e', 's', 's' };
+static symbol s_29[] = { 't', 'i', 'o', 'n' };
+static symbol s_30[] = { 'a', 't', 'e' };
+static symbol s_31[] = { 'a', 'l' };
+static symbol s_32[] = { 'i', 'c' };
+static symbol s_33[] = { 's' };
+static symbol s_34[] = { 't' };
+static symbol s_35[] = { 'l' };
+static symbol s_36[] = { 's', 'k', 'i' };
+static symbol s_37[] = { 's', 'k', 'y' };
+static symbol s_38[] = { 'd', 'i', 'e' };
+static symbol s_39[] = { 'l', 'i', 'e' };
+static symbol s_40[] = { 't', 'i', 'e' };
+static symbol s_41[] = { 'i', 'd', 'l' };
+static symbol s_42[] = { 'g', 'e', 'n', 't', 'l' };
+static symbol s_43[] = { 'u', 'g', 'l', 'i' };
+static symbol s_44[] = { 'e', 'a', 'r', 'l', 'i' };
+static symbol s_45[] = { 'o', 'n', 'l', 'i' };
+static symbol s_46[] = { 's', 'i', 'n', 'g', 'l' };
+static symbol s_47[] = { 'Y' };
+static symbol s_48[] = { 'y' };
+
+static int r_prelude(struct SN_env * z) {
+    z->B[0] = 0; /* unset Y_found, line 24 */
+    {   int c = z->c; /* do, line 25 */
+        z->bra = z->c; /* [, line 25 */
+        if (!(eq_s(z, 1, s_0))) goto lab0;
+        z->ket = z->c; /* ], line 25 */
+        if (!(in_grouping(z, g_v, 97, 121))) goto lab0;
+        slice_from_s(z, 1, s_1); /* <-, line 25 */
+        z->B[0] = 1; /* set Y_found, line 25 */
+    lab0:
+        z->c = c;
+    }
+    {   int c = z->c; /* do, line 26 */
+        while(1) { /* repeat, line 26 */
+            int c = z->c;
+            while(1) { /* goto, line 26 */
+                int c = z->c;
+                if (!(in_grouping(z, g_v, 97, 121))) goto lab3;
+                z->bra = z->c; /* [, line 26 */
+                if (!(eq_s(z, 1, s_2))) goto lab3;
+                z->ket = z->c; /* ], line 26 */
+                z->c = c;
+                break;
+            lab3:
+                z->c = c;
+                if (z->c >= z->l) goto lab2;
+                z->c++;
+            }
+            slice_from_s(z, 1, s_3); /* <-, line 26 */
+            z->B[0] = 1; /* set Y_found, line 26 */
+            continue;
+        lab2:
+            z->c = c;
+            break;
+        }
+    lab1:
+        z->c = c;
+    }
+    return 1;
+}
+
+static int r_mark_regions(struct SN_env * z) {
+    z->I[0] = z->l;
+    z->I[1] = z->l;
+    {   int c = z->c; /* do, line 32 */
+        {   int c = z->c; /* or, line 36 */
+            if (!(find_among(z, a_0, 1))) goto lab2; /* among, line 33 */
+            goto lab1;
+        lab2:
+            z->c = c;
+            while(1) { /* gopast, line 36 */
+                if (!(in_grouping(z, g_v, 97, 121))) goto lab3;
+                break;
+            lab3:
+                if (z->c >= z->l) goto lab0;
+                z->c++;
+            }
+            while(1) { /* gopast, line 36 */
+                if (!(out_grouping(z, g_v, 97, 121))) goto lab4;
+                break;
+            lab4:
+                if (z->c >= z->l) goto lab0;
+                z->c++;
+            }
+        }
+    lab1:
+        z->I[0] = z->c; /* setmark p1, line 37 */
+        while(1) { /* gopast, line 38 */
+            if (!(in_grouping(z, g_v, 97, 121))) goto lab5;
+            break;
+        lab5:
+            if (z->c >= z->l) goto lab0;
+            z->c++;
+        }
+        while(1) { /* gopast, line 38 */
+            if (!(out_grouping(z, g_v, 97, 121))) goto lab6;
+            break;
+        lab6:
+            if (z->c >= z->l) goto lab0;
+            z->c++;
+        }
+        z->I[1] = z->c; /* setmark p2, line 38 */
+    lab0:
+        z->c = c;
+    }
+    return 1;
+}
+
+static int r_shortv(struct SN_env * z) {
+    {   int m = z->l - z->c; /* or, line 46 */
+        if (!(out_grouping_b(z, g_v_WXY, 89, 121))) goto lab1;
+        if (!(in_grouping_b(z, g_v, 97, 121))) goto lab1;
+        if (!(out_grouping_b(z, g_v, 97, 121))) goto lab1;
+        goto lab0;
+    lab1:
+        z->c = z->l - m;
+        if (!(out_grouping_b(z, g_v, 97, 121))) return 0;
+        if (!(in_grouping_b(z, g_v, 97, 121))) return 0;
+        if (z->c > z->lb) return 0; /* atlimit, line 47 */
+    }
+lab0:
+    return 1;
+}
+
+static int r_R1(struct SN_env * z) {
+    if (!(z->I[0] <= z->c)) return 0;
+    return 1;
+}
+
+static int r_R2(struct SN_env * z) {
+    if (!(z->I[1] <= z->c)) return 0;
+    return 1;
+}
+
+static int r_Step_1a(struct SN_env * z) {
+    int among_var;
+    z->ket = z->c; /* [, line 54 */
+    among_var = find_among_b(z, a_1, 6); /* substring, line 54 */
+    if (!(among_var)) return 0;
+    z->bra = z->c; /* ], line 54 */
+    switch(among_var) {
+        case 0: return 0;
+        case 1:
+            slice_from_s(z, 2, s_4); /* <-, line 55 */
+            break;
+        case 2:
+            {   int m = z->l - z->c; /* or, line 57 */
+                if (z->c <= z->lb) goto lab1;
+                z->c--; /* next, line 57 */
+                if (z->c > z->lb) goto lab1; /* atlimit, line 57 */
+                slice_from_s(z, 2, s_5); /* <-, line 57 */
+                goto lab0;
+            lab1:
+                z->c = z->l - m;
+                slice_from_s(z, 1, s_6); /* <-, line 57 */
+            }
+        lab0:
+            break;
+        case 3:
+            if (z->c <= z->lb) return 0;
+            z->c--; /* next, line 58 */
+            while(1) { /* gopast, line 58 */
+                if (!(in_grouping_b(z, g_v, 97, 121))) goto lab2;
+                break;
+            lab2:
+                if (z->c <= z->lb) return 0;
+                z->c--;
+            }
+            slice_del(z); /* delete, line 58 */
+            break;
+    }
+    return 1;
+}
+
+static int r_Step_1b(struct SN_env * z) {
+    int among_var;
+    z->ket = z->c; /* [, line 64 */
+    among_var = find_among_b(z, a_3, 6); /* substring, line 64 */
+    if (!(among_var)) return 0;
+    z->bra = z->c; /* ], line 64 */
+    switch(among_var) {
+        case 0: return 0;
+        case 1:
+            if (!r_R1(z)) return 0; /* call R1, line 66 */
+            slice_from_s(z, 2, s_7); /* <-, line 66 */
+            break;
+        case 2:
+            {   int m_test = z->l - z->c; /* test, line 69 */
+                while(1) { /* gopast, line 69 */
+                    if (!(in_grouping_b(z, g_v, 97, 121))) goto lab0;
+                    break;
+                lab0:
+                    if (z->c <= z->lb) return 0;
+                    z->c--;
+                }
+                z->c = z->l - m_test;
+            }
+            slice_del(z); /* delete, line 69 */
+            {   int m_test = z->l - z->c; /* test, line 70 */
+                among_var = find_among_b(z, a_2, 13); /* substring, line 70 */
+                if (!(among_var)) return 0;
+                z->c = z->l - m_test;
+            }
+            switch(among_var) {
+                case 0: return 0;
+                case 1:
+                    {   int c = z->c;
+                        insert_s(z, z->c, z->c, 1, s_8); /* <+, line 72 */
+                        z->c = c;
+                    }
+                    break;
+                case 2:
+                    z->ket = z->c; /* [, line 75 */
+                    if (z->c <= z->lb) return 0;
+                    z->c--; /* next, line 75 */
+                    z->bra = z->c; /* ], line 75 */
+                    slice_del(z); /* delete, line 75 */
+                    break;
+                case 3:
+                    if (z->c != z->I[0]) return 0; /* atmark, line 76 */
+                    {   int m_test = z->l - z->c; /* test, line 76 */
+                        if (!r_shortv(z)) return 0; /* call shortv, line 76 */
+                        z->c = z->l - m_test;
+                    }
+                    {   int c = z->c;
+                        insert_s(z, z->c, z->c, 1, s_9); /* <+, line 76 */
+                        z->c = c;
+                    }
+                    break;
+            }
+            break;
+    }
+    return 1;
+}
+
+static int r_Step_1c(struct SN_env * z) {
+    z->ket = z->c; /* [, line 83 */
+    {   int m = z->l - z->c; /* or, line 83 */
+        if (!(eq_s_b(z, 1, s_10))) goto lab1;
+        goto lab0;
+    lab1:
+        z->c = z->l - m;
+        if (!(eq_s_b(z, 1, s_11))) return 0;
+    }
+lab0:
+    z->bra = z->c; /* ], line 83 */
+    if (!(out_grouping_b(z, g_v, 97, 121))) return 0;
+    {   int m = z->l - z->c; /* not, line 84 */
+        if (z->c > z->lb) goto lab2; /* atlimit, line 84 */
+        return 0;
+    lab2:
+        z->c = z->l - m;
+    }
+    slice_from_s(z, 1, s_12); /* <-, line 85 */
+    return 1;
+}
+
+static int r_Step_2(struct SN_env * z) {
+    int among_var;
+    z->ket = z->c; /* [, line 89 */
+    among_var = find_among_b(z, a_4, 24); /* substring, line 89 */
+    if (!(among_var)) return 0;
+    z->bra = z->c; /* ], line 89 */
+    if (!r_R1(z)) return 0; /* call R1, line 89 */
+    switch(among_var) {
+        case 0: return 0;
+        case 1:
+            slice_from_s(z, 4, s_13); /* <-, line 90 */
+            break;
+        case 2:
+            slice_from_s(z, 4, s_14); /* <-, line 91 */
+            break;
+        case 3:
+            slice_from_s(z, 4, s_15); /* <-, line 92 */
+            break;
+        case 4:
+            slice_from_s(z, 4, s_16); /* <-, line 93 */
+            break;
+        case 5:
+            slice_from_s(z, 3, s_17); /* <-, line 94 */
+            break;
+        case 6:
+            slice_from_s(z, 3, s_18); /* <-, line 96 */
+            break;
+        case 7:
+            slice_from_s(z, 3, s_19); /* <-, line 98 */
+            break;
+        case 8:
+            slice_from_s(z, 2, s_20); /* <-, line 100 */
+            break;
+        case 9:
+            slice_from_s(z, 3, s_21); /* <-, line 101 */
+            break;
+        case 10:
+            slice_from_s(z, 3, s_22); /* <-, line 103 */
+            break;
+        case 11:
+            slice_from_s(z, 3, s_23); /* <-, line 105 */
+            break;
+        case 12:
+            slice_from_s(z, 3, s_24); /* <-, line 107 */
+            break;
+        case 13:
+            if (!(eq_s_b(z, 1, s_25))) return 0;
+            slice_from_s(z, 2, s_26); /* <-, line 108 */
+            break;
+        case 14:
+            slice_from_s(z, 3, s_27); /* <-, line 109 */
+            break;
+        case 15:
+            slice_from_s(z, 4, s_28); /* <-, line 110 */
+            break;
+        case 16:
+            if (!(in_grouping_b(z, g_valid_LI, 99, 116))) return 0;
+            slice_del(z); /* delete, line 111 */
+            break;
+    }
+    return 1;
+}
+
+static int r_Step_3(struct SN_env * z) {
+    int among_var;
+    z->ket = z->c; /* [, line 116 */
+    among_var = find_among_b(z, a_5, 9); /* substring, line 116 */
+    if (!(among_var)) return 0;
+    z->bra = z->c; /* ], line 116 */
+    if (!r_R1(z)) return 0; /* call R1, line 116 */
+    switch(among_var) {
+        case 0: return 0;
+        case 1:
+            slice_from_s(z, 4, s_29); /* <-, line 117 */
+            break;
+        case 2:
+            slice_from_s(z, 3, s_30); /* <-, line 118 */
+            break;
+        case 3:
+            slice_from_s(z, 2, s_31); /* <-, line 119 */
+            break;
+        case 4:
+            slice_from_s(z, 2, s_32); /* <-, line 121 */
+            break;
+        case 5:
+            slice_del(z); /* delete, line 123 */
+            break;
+        case 6:
+            if (!r_R2(z)) return 0; /* call R2, line 125 */
+            slice_del(z); /* delete, line 125 */
+            break;
+    }
+    return 1;
+}
+
+static int r_Step_4(struct SN_env * z) {
+    int among_var;
+    z->ket = z->c; /* [, line 130 */
+    among_var = find_among_b(z, a_6, 18); /* substring, line 130 */
+    if (!(among_var)) return 0;
+    z->bra = z->c; /* ], line 130 */
+    if (!r_R2(z)) return 0; /* call R2, line 130 */
+    switch(among_var) {
+        case 0: return 0;
+        case 1:
+            slice_del(z); /* delete, line 133 */
+            break;
+        case 2:
+            {   int m = z->l - z->c; /* or, line 134 */
+                if (!(eq_s_b(z, 1, s_33))) goto lab1;
+                goto lab0;
+            lab1:
+                z->c = z->l - m;
+                if (!(eq_s_b(z, 1, s_34))) return 0;
+            }
+        lab0:
+            slice_del(z); /* delete, line 134 */
+            break;
+    }
+    return 1;
+}
+
+static int r_Step_5(struct SN_env * z) {
+    int among_var;
+    z->ket = z->c; /* [, line 139 */
+    among_var = find_among_b(z, a_7, 2); /* substring, line 139 */
+    if (!(among_var)) return 0;
+    z->bra = z->c; /* ], line 139 */
+    switch(among_var) {
+        case 0: return 0;
+        case 1:
+            {   int m = z->l - z->c; /* or, line 140 */
+                if (!r_R2(z)) goto lab1; /* call R2, line 140 */
+                goto lab0;
+            lab1:
+                z->c = z->l - m;
+                if (!r_R1(z)) return 0; /* call R1, line 140 */
+                {   int m = z->l - z->c; /* not, line 140 */
+                    if (!r_shortv(z)) goto lab2; /* call shortv, line 140 */
+                    return 0;
+                lab2:
+                    z->c = z->l - m;
+                }
+            }
+        lab0:
+            slice_del(z); /* delete, line 140 */
+            break;
+        case 2:
+            if (!r_R2(z)) return 0; /* call R2, line 141 */
+            if (!(eq_s_b(z, 1, s_35))) return 0;
+            slice_del(z); /* delete, line 141 */
+            break;
+    }
+    return 1;
+}
+
+static int r_exception2(struct SN_env * z) {
+    z->ket = z->c; /* [, line 147 */
+    if (!(find_among_b(z, a_8, 8))) return 0; /* substring, line 147 */
+    z->bra = z->c; /* ], line 147 */
+    if (z->c > z->lb) return 0; /* atlimit, line 147 */
+    return 1;
+}
+
+static int r_exception1(struct SN_env * z) {
+    int among_var;
+    z->bra = z->c; /* [, line 159 */
+    among_var = find_among(z, a_9, 18); /* substring, line 159 */
+    if (!(among_var)) return 0;
+    z->ket = z->c; /* ], line 159 */
+    if (z->c < z->l) return 0; /* atlimit, line 159 */
+    switch(among_var) {
+        case 0: return 0;
+        case 1:
+            slice_from_s(z, 3, s_36); /* <-, line 163 */
+            break;
+        case 2:
+            slice_from_s(z, 3, s_37); /* <-, line 164 */
+            break;
+        case 3:
+            slice_from_s(z, 3, s_38); /* <-, line 165 */
+            break;
+        case 4:
+            slice_from_s(z, 3, s_39); /* <-, line 166 */
+            break;
+        case 5:
+            slice_from_s(z, 3, s_40); /* <-, line 167 */
+            break;
+        case 6:
+            slice_from_s(z, 3, s_41); /* <-, line 171 */
+            break;
+        case 7:
+            slice_from_s(z, 5, s_42); /* <-, line 172 */
+            break;
+        case 8:
+            slice_from_s(z, 4, s_43); /* <-, line 173 */
+            break;
+        case 9:
+            slice_from_s(z, 5, s_44); /* <-, line 174 */
+            break;
+        case 10:
+            slice_from_s(z, 4, s_45); /* <-, line 175 */
+            break;
+        case 11:
+            slice_from_s(z, 5, s_46); /* <-, line 176 */
+            break;
+    }
+    return 1;
+}
+
+static int r_postlude(struct SN_env * z) {
+    if (!(z->B[0])) return 0; /* Boolean test Y_found, line 192 */
+    while(1) { /* repeat, line 192 */
+        int c = z->c;
+        while(1) { /* goto, line 192 */
+            int c = z->c;
+            z->bra = z->c; /* [, line 192 */
+            if (!(eq_s(z, 1, s_47))) goto lab1;
+            z->ket = z->c; /* ], line 192 */
+            z->c = c;
+            break;
+        lab1:
+            z->c = c;
+            if (z->c >= z->l) goto lab0;
+            z->c++;
+        }
+        slice_from_s(z, 1, s_48); /* <-, line 192 */
+        continue;
+    lab0:
+        z->c = c;
+        break;
+    }
+    return 1;
+}
+
+extern int english_stem(struct SN_env * z) {
+    {   int c = z->c; /* or, line 196 */
+        if (!r_exception1(z)) goto lab1; /* call exception1, line 196 */
+        goto lab0;
+    lab1:
+        z->c = c;
+        {   int c_test = z->c; /* test, line 198 */
+            {   int c = z->c + 3;
+                if (0 > c || c > z->l) return 0;
+                z->c = c; /* hop, line 198 */
+            }
+            z->c = c_test;
+        }
+        {   int c = z->c; /* do, line 199 */
+            if (!r_prelude(z)) goto lab2; /* call prelude, line 199 */
+        lab2:
+            z->c = c;
+        }
+        {   int c = z->c; /* do, line 200 */
+            if (!r_mark_regions(z)) goto lab3; /* call mark_regions, line 200 */
+        lab3:
+            z->c = c;
+        }
+        z->lb = z->c; z->c = z->l; /* backwards, line 201 */
+
+        {   int m = z->l - z->c; /* do, line 203 */
+            if (!r_Step_1a(z)) goto lab4; /* call Step_1a, line 203 */
+        lab4:
+            z->c = z->l - m;
+        }
+        {   int m = z->l - z->c; /* or, line 205 */
+            if (!r_exception2(z)) goto lab6; /* call exception2, line 205 */
+            goto lab5;
+        lab6:
+            z->c = z->l - m;
+            {   int m = z->l - z->c; /* do, line 207 */
+                if (!r_Step_1b(z)) goto lab7; /* call Step_1b, line 207 */
+            lab7:
+                z->c = z->l - m;
+            }
+            {   int m = z->l - z->c; /* do, line 208 */
+                if (!r_Step_1c(z)) goto lab8; /* call Step_1c, line 208 */
+            lab8:
+                z->c = z->l - m;
+            }
+            {   int m = z->l - z->c; /* do, line 210 */
+                if (!r_Step_2(z)) goto lab9; /* call Step_2, line 210 */
+            lab9:
+                z->c = z->l - m;
+            }
+            {   int m = z->l - z->c; /* do, line 211 */
+                if (!r_Step_3(z)) goto lab10; /* call Step_3, line 211 */
+            lab10:
+                z->c = z->l - m;
+            }
+            {   int m = z->l - z->c; /* do, line 212 */
+                if (!r_Step_4(z)) goto lab11; /* call Step_4, line 212 */
+            lab11:
+                z->c = z->l - m;
+            }
+            {   int m = z->l - z->c; /* do, line 214 */
+                if (!r_Step_5(z)) goto lab12; /* call Step_5, line 214 */
+            lab12:
+                z->c = z->l - m;
+            }
+        }
+    lab5:
+        z->c = z->lb;
+        {   int c = z->c; /* do, line 217 */
+            if (!r_postlude(z)) goto lab13; /* call postlude, line 217 */
+        lab13:
+            z->c = c;
+        }
+    }
+lab0:
+    return 1;
+}
+
+extern struct SN_env * english_create_env(void) { return SN_create_env(0, 2, 1); }
+
+extern void english_close_env(struct SN_env * z) { SN_close_env(z); }
+


diff --git a/contrib/tsearch2/snowball/english_stem.h b/contrib/tsearch2/snowball/english_stem.h

new file mode 100644 (file)

index 0000000..bfefcd5


--- /dev/null
+++ b/contrib/tsearch2/snowball/english_stem.h
@@ -0,0 +1,8 @@
+
+/* This file was generated automatically by the Snowball to ANSI C compiler */
+
+extern struct SN_env * english_create_env(void);
+extern void english_close_env(struct SN_env * z);
+
+extern int english_stem(struct SN_env * z);
+


diff --git a/contrib/tsearch2/snowball/header.h b/contrib/tsearch2/snowball/header.h

new file mode 100644 (file)

index 0000000..aaec3ae


--- /dev/null
+++ b/contrib/tsearch2/snowball/header.h
@@ -0,0 +1,57 @@
+
+#include 
+
+#include "api.h"
+
+#define MAXINT INT_MAX
+#define MININT INT_MIN
+
+#define HEAD 2*sizeof(int)
+
+#define SIZE(p)        ((int *)(p))[-1]
+#define SET_SIZE(p, n) ((int *)(p))[-1] = n
+#define CAPACITY(p)    ((int *)(p))[-2]
+
+struct among
+{   int s_size;     /* number of chars in string */
+    symbol * s;       /* search string */
+    int substring_i;/* index to longest matching substring */
+    int result;     /* result of the lookup */
+    int (* function)(struct SN_env *);
+};
+
+extern symbol * create_s(void);
+extern void lose_s(symbol * p);
+
+extern int in_grouping(struct SN_env * z, unsigned char * s, int min, int max);
+extern int in_grouping_b(struct SN_env * z, unsigned char * s, int min, int max);
+extern int out_grouping(struct SN_env * z, unsigned char * s, int min, int max);
+extern int out_grouping_b(struct SN_env * z, unsigned char * s, int min, int max);
+
+extern int in_range(struct SN_env * z, int min, int max);
+extern int in_range_b(struct SN_env * z, int min, int max);
+extern int out_range(struct SN_env * z, int min, int max);
+extern int out_range_b(struct SN_env * z, int min, int max);
+
+extern int eq_s(struct SN_env * z, int s_size, symbol * s);
+extern int eq_s_b(struct SN_env * z, int s_size, symbol * s);
+extern int eq_v(struct SN_env * z, symbol * p);
+extern int eq_v_b(struct SN_env * z, symbol * p);
+
+extern int find_among(struct SN_env * z, struct among * v, int v_size);
+extern int find_among_b(struct SN_env * z, struct among * v, int v_size);
+
+extern symbol * increase_size(symbol * p, int n);
+extern int replace_s(struct SN_env * z, int c_bra, int c_ket, int s_size, const symbol * s);
+extern void slice_from_s(struct SN_env * z, int s_size, symbol * s);
+extern void slice_from_v(struct SN_env * z, symbol * p);
+extern void slice_del(struct SN_env * z);
+
+extern void insert_s(struct SN_env * z, int bra, int ket, int s_size, symbol * s);
+extern void insert_v(struct SN_env * z, int bra, int ket, symbol * p);
+
+extern symbol * slice_to(struct SN_env * z, symbol * p);
+extern symbol * assign_to(struct SN_env * z, symbol * p);
+
+extern void debug(struct SN_env * z, int number, int line_count);
+


diff --git a/contrib/tsearch2/snowball/russian_stem.c b/contrib/tsearch2/snowball/russian_stem.c

new file mode 100644 (file)

index 0000000..14fd491


--- /dev/null
+++ b/contrib/tsearch2/snowball/russian_stem.c
@@ -0,0 +1,626 @@
+
+/* This file was generated automatically by the Snowball to ANSI C compiler */
+
+#include "header.h"
+
+extern int russian_stem(struct SN_env * z);
+static int r_tidy_up(struct SN_env * z);
+static int r_derivational(struct SN_env * z);
+static int r_noun(struct SN_env * z);
+static int r_verb(struct SN_env * z);
+static int r_reflexive(struct SN_env * z);
+static int r_adjectival(struct SN_env * z);
+static int r_adjective(struct SN_env * z);
+static int r_perfective_gerund(struct SN_env * z);
+static int r_R2(struct SN_env * z);
+static int r_mark_regions(struct SN_env * z);
+
+extern struct SN_env * russian_create_env(void);
+extern void russian_close_env(struct SN_env * z);
+
+static symbol s_0_0[3] = { 215, 219, 201 };
+static symbol s_0_1[4] = { 201, 215, 219, 201 };
+static symbol s_0_2[4] = { 217, 215, 219, 201 };
+static symbol s_0_3[1] = { 215 };
+static symbol s_0_4[2] = { 201, 215 };
+static symbol s_0_5[2] = { 217, 215 };
+static symbol s_0_6[5] = { 215, 219, 201, 211, 216 };
+static symbol s_0_7[6] = { 201, 215, 219, 201, 211, 216 };
+static symbol s_0_8[6] = { 217, 215, 219, 201, 211, 216 };
+
+static struct among a_0[9] =
+{
+/*  0 */ { 3, s_0_0, -1, 1, 0},
+/*  1 */ { 4, s_0_1, 0, 2, 0},
+/*  2 */ { 4, s_0_2, 0, 2, 0},
+/*  3 */ { 1, s_0_3, -1, 1, 0},
+/*  4 */ { 2, s_0_4, 3, 2, 0},
+/*  5 */ { 2, s_0_5, 3, 2, 0},
+/*  6 */ { 5, s_0_6, -1, 1, 0},
+/*  7 */ { 6, s_0_7, 6, 2, 0},
+/*  8 */ { 6, s_0_8, 6, 2, 0}
+};
+
+static symbol s_1_0[2] = { 192, 192 };
+static symbol s_1_1[2] = { 197, 192 };
+static symbol s_1_2[2] = { 207, 192 };
+static symbol s_1_3[2] = { 213, 192 };
+static symbol s_1_4[2] = { 197, 197 };
+static symbol s_1_5[2] = { 201, 197 };
+static symbol s_1_6[2] = { 207, 197 };
+static symbol s_1_7[2] = { 217, 197 };
+static symbol s_1_8[2] = { 201, 200 };
+static symbol s_1_9[2] = { 217, 200 };
+static symbol s_1_10[3] = { 201, 205, 201 };
+static symbol s_1_11[3] = { 217, 205, 201 };
+static symbol s_1_12[2] = { 197, 202 };
+static symbol s_1_13[2] = { 201, 202 };
+static symbol s_1_14[2] = { 207, 202 };
+static symbol s_1_15[2] = { 217, 202 };
+static symbol s_1_16[2] = { 197, 205 };
+static symbol s_1_17[2] = { 201, 205 };
+static symbol s_1_18[2] = { 207, 205 };
+static symbol s_1_19[2] = { 217, 205 };
+static symbol s_1_20[3] = { 197, 199, 207 };
+static symbol s_1_21[3] = { 207, 199, 207 };
+static symbol s_1_22[2] = { 193, 209 };
+static symbol s_1_23[2] = { 209, 209 };
+static symbol s_1_24[3] = { 197, 205, 213 };
+static symbol s_1_25[3] = { 207, 205, 213 };
+
+static struct among a_1[26] =
+{
+/*  0 */ { 2, s_1_0, -1, 1, 0},
+/*  1 */ { 2, s_1_1, -1, 1, 0},
+/*  2 */ { 2, s_1_2, -1, 1, 0},
+/*  3 */ { 2, s_1_3, -1, 1, 0},
+/*  4 */ { 2, s_1_4, -1, 1, 0},
+/*  5 */ { 2, s_1_5, -1, 1, 0},
+/*  6 */ { 2, s_1_6, -1, 1, 0},
+/*  7 */ { 2, s_1_7, -1, 1, 0},
+/*  8 */ { 2, s_1_8, -1, 1, 0},
+/*  9 */ { 2, s_1_9, -1, 1, 0},
+/* 10 */ { 3, s_1_10, -1, 1, 0},
+/* 11 */ { 3, s_1_11, -1, 1, 0},
+/* 12 */ { 2, s_1_12, -1, 1, 0},
+/* 13 */ { 2, s_1_13, -1, 1, 0},
+/* 14 */ { 2, s_1_14, -1, 1, 0},
+/* 15 */ { 2, s_1_15, -1, 1, 0},
+/* 16 */ { 2, s_1_16, -1, 1, 0},
+/* 17 */ { 2, s_1_17, -1, 1, 0},
+/* 18 */ { 2, s_1_18, -1, 1, 0},
+/* 19 */ { 2, s_1_19, -1, 1, 0},
+/* 20 */ { 3, s_1_20, -1, 1, 0},
+/* 21 */ { 3, s_1_21, -1, 1, 0},
+/* 22 */ { 2, s_1_22, -1, 1, 0},
+/* 23 */ { 2, s_1_23, -1, 1, 0},
+/* 24 */ { 3, s_1_24, -1, 1, 0},
+/* 25 */ { 3, s_1_25, -1, 1, 0}
+};
+
+static symbol s_2_0[2] = { 197, 205 };
+static symbol s_2_1[2] = { 206, 206 };
+static symbol s_2_2[2] = { 215, 219 };
+static symbol s_2_3[3] = { 201, 215, 219 };
+static symbol s_2_4[3] = { 217, 215, 219 };
+static symbol s_2_5[1] = { 221 };
+static symbol s_2_6[2] = { 192, 221 };
+static symbol s_2_7[3] = { 213, 192, 221 };
+
+static struct among a_2[8] =
+{
+/*  0 */ { 2, s_2_0, -1, 1, 0},
+/*  1 */ { 2, s_2_1, -1, 1, 0},
+/*  2 */ { 2, s_2_2, -1, 1, 0},
+/*  3 */ { 3, s_2_3, 2, 2, 0},
+/*  4 */ { 3, s_2_4, 2, 2, 0},
+/*  5 */ { 1, s_2_5, -1, 1, 0},
+/*  6 */ { 2, s_2_6, 5, 1, 0},
+/*  7 */ { 3, s_2_7, 6, 2, 0}
+};
+
+static symbol s_3_0[2] = { 211, 209 };
+static symbol s_3_1[2] = { 211, 216 };
+
+static struct among a_3[2] =
+{
+/*  0 */ { 2, s_3_0, -1, 1, 0},
+/*  1 */ { 2, s_3_1, -1, 1, 0}
+};
+
+static symbol s_4_0[1] = { 192 };
+static symbol s_4_1[2] = { 213, 192 };
+static symbol s_4_2[2] = { 204, 193 };
+static symbol s_4_3[3] = { 201, 204, 193 };
+static symbol s_4_4[3] = { 217, 204, 193 };
+static symbol s_4_5[2] = { 206, 193 };
+static symbol s_4_6[3] = { 197, 206, 193 };
+static symbol s_4_7[3] = { 197, 212, 197 };
+static symbol s_4_8[3] = { 201, 212, 197 };
+static symbol s_4_9[3] = { 202, 212, 197 };
+static symbol s_4_10[4] = { 197, 202, 212, 197 };
+static symbol s_4_11[4] = { 213, 202, 212, 197 };
+static symbol s_4_12[2] = { 204, 201 };
+static symbol s_4_13[3] = { 201, 204, 201 };
+static symbol s_4_14[3] = { 217, 204, 201 };
+static symbol s_4_15[1] = { 202 };
+static symbol s_4_16[2] = { 197, 202 };
+static symbol s_4_17[2] = { 213, 202 };
+static symbol s_4_18[1] = { 204 };
+static symbol s_4_19[2] = { 201, 204 };
+static symbol s_4_20[2] = { 217, 204 };
+static symbol s_4_21[2] = { 197, 205 };
+static symbol s_4_22[2] = { 201, 205 };
+static symbol s_4_23[2] = { 217, 205 };
+static symbol s_4_24[1] = { 206 };
+static symbol s_4_25[2] = { 197, 206 };
+static symbol s_4_26[2] = { 204, 207 };
+static symbol s_4_27[3] = { 201, 204, 207 };
+static symbol s_4_28[3] = { 217, 204, 207 };
+static symbol s_4_29[2] = { 206, 207 };
+static symbol s_4_30[3] = { 197, 206, 207 };
+static symbol s_4_31[3] = { 206, 206, 207 };
+static symbol s_4_32[2] = { 192, 212 };
+static symbol s_4_33[3] = { 213, 192, 212 };
+static symbol s_4_34[2] = { 197, 212 };
+static symbol s_4_35[3] = { 213, 197, 212 };
+static symbol s_4_36[2] = { 201, 212 };
+static symbol s_4_37[2] = { 209, 212 };
+static symbol s_4_38[2] = { 217, 212 };
+static symbol s_4_39[2] = { 212, 216 };
+static symbol s_4_40[3] = { 201, 212, 216 };
+static symbol s_4_41[3] = { 217, 212, 216 };
+static symbol s_4_42[3] = { 197, 219, 216 };
+static symbol s_4_43[3] = { 201, 219, 216 };
+static symbol s_4_44[2] = { 206, 217 };
+static symbol s_4_45[3] = { 197, 206, 217 };
+
+static struct among a_4[46] =
+{
+/*  0 */ { 1, s_4_0, -1, 2, 0},
+/*  1 */ { 2, s_4_1, 0, 2, 0},
+/*  2 */ { 2, s_4_2, -1, 1, 0},
+/*  3 */ { 3, s_4_3, 2, 2, 0},
+/*  4 */ { 3, s_4_4, 2, 2, 0},
+/*  5 */ { 2, s_4_5, -1, 1, 0},
+/*  6 */ { 3, s_4_6, 5, 2, 0},
+/*  7 */ { 3, s_4_7, -1, 1, 0},
+/*  8 */ { 3, s_4_8, -1, 2, 0},
+/*  9 */ { 3, s_4_9, -1, 1, 0},
+/* 10 */ { 4, s_4_10, 9, 2, 0},
+/* 11 */ { 4, s_4_11, 9, 2, 0},
+/* 12 */ { 2, s_4_12, -1, 1, 0},
+/* 13 */ { 3, s_4_13, 12, 2, 0},
+/* 14 */ { 3, s_4_14, 12, 2, 0},
+/* 15 */ { 1, s_4_15, -1, 1, 0},
+/* 16 */ { 2, s_4_16, 15, 2, 0},
+/* 17 */ { 2, s_4_17, 15, 2, 0},
+/* 18 */ { 1, s_4_18, -1, 1, 0},
+/* 19 */ { 2, s_4_19, 18, 2, 0},
+/* 20 */ { 2, s_4_20, 18, 2, 0},
+/* 21 */ { 2, s_4_21, -1, 1, 0},
+/* 22 */ { 2, s_4_22, -1, 2, 0},
+/* 23 */ { 2, s_4_23, -1, 2, 0},
+/* 24 */ { 1, s_4_24, -1, 1, 0},
+/* 25 */ { 2, s_4_25, 24, 2, 0},
+/* 26 */ { 2, s_4_26, -1, 1, 0},
+/* 27 */ { 3, s_4_27, 26, 2, 0},
+/* 28 */ { 3, s_4_28, 26, 2, 0},
+/* 29 */ { 2, s_4_29, -1, 1, 0},
+/* 30 */ { 3, s_4_30, 29, 2, 0},
+/* 31 */ { 3, s_4_31, 29, 1, 0},
+/* 32 */ { 2, s_4_32, -1, 1, 0},
+/* 33 */ { 3, s_4_33, 32, 2, 0},
+/* 34 */ { 2, s_4_34, -1, 1, 0},
+/* 35 */ { 3, s_4_35, 34, 2, 0},
+/* 36 */ { 2, s_4_36, -1, 2, 0},
+/* 37 */ { 2, s_4_37, -1, 2, 0},
+/* 38 */ { 2, s_4_38, -1, 2, 0},
+/* 39 */ { 2, s_4_39, -1, 1, 0},
+/* 40 */ { 3, s_4_40, 39, 2, 0},
+/* 41 */ { 3, s_4_41, 39, 2, 0},
+/* 42 */ { 3, s_4_42, -1, 1, 0},
+/* 43 */ { 3, s_4_43, -1, 2, 0},
+/* 44 */ { 2, s_4_44, -1, 1, 0},
+/* 45 */ { 3, s_4_45, 44, 2, 0}
+};
+
+static symbol s_5_0[1] = { 192 };
+static symbol s_5_1[2] = { 201, 192 };
+static symbol s_5_2[2] = { 216, 192 };
+static symbol s_5_3[1] = { 193 };
+static symbol s_5_4[1] = { 197 };
+static symbol s_5_5[2] = { 201, 197 };
+static symbol s_5_6[2] = { 216, 197 };
+static symbol s_5_7[2] = { 193, 200 };
+static symbol s_5_8[2] = { 209, 200 };
+static symbol s_5_9[3] = { 201, 209, 200 };
+static symbol s_5_10[1] = { 201 };
+static symbol s_5_11[2] = { 197, 201 };
+static symbol s_5_12[2] = { 201, 201 };
+static symbol s_5_13[3] = { 193, 205, 201 };
+static symbol s_5_14[3] = { 209, 205, 201 };
+static symbol s_5_15[4] = { 201, 209, 205, 201 };
+static symbol s_5_16[1] = { 202 };
+static symbol s_5_17[2] = { 197, 202 };
+static symbol s_5_18[3] = { 201, 197, 202 };
+static symbol s_5_19[2] = { 201, 202 };
+static symbol s_5_20[2] = { 207, 202 };
+static symbol s_5_21[2] = { 193, 205 };
+static symbol s_5_22[2] = { 197, 205 };
+static symbol s_5_23[3] = { 201, 197, 205 };
+static symbol s_5_24[2] = { 207, 205 };
+static symbol s_5_25[2] = { 209, 205 };
+static symbol s_5_26[3] = { 201, 209, 205 };
+static symbol s_5_27[1] = { 207 };
+static symbol s_5_28[1] = { 209 };
+static symbol s_5_29[2] = { 201, 209 };
+static symbol s_5_30[2] = { 216, 209 };
+static symbol s_5_31[1] = { 213 };
+static symbol s_5_32[2] = { 197, 215 };
+static symbol s_5_33[2] = { 207, 215 };
+static symbol s_5_34[1] = { 216 };
+static symbol s_5_35[1] = { 217 };
+
+static struct among a_5[36] =
+{
+/*  0 */ { 1, s_5_0, -1, 1, 0},
+/*  1 */ { 2, s_5_1, 0, 1, 0},
+/*  2 */ { 2, s_5_2, 0, 1, 0},
+/*  3 */ { 1, s_5_3, -1, 1, 0},
+/*  4 */ { 1, s_5_4, -1, 1, 0},
+/*  5 */ { 2, s_5_5, 4, 1, 0},
+/*  6 */ { 2, s_5_6, 4, 1, 0},
+/*  7 */ { 2, s_5_7, -1, 1, 0},
+/*  8 */ { 2, s_5_8, -1, 1, 0},
+/*  9 */ { 3, s_5_9, 8, 1, 0},
+/* 10 */ { 1, s_5_10, -1, 1, 0},
+/* 11 */ { 2, s_5_11, 10, 1, 0},
+/* 12 */ { 2, s_5_12, 10, 1, 0},
+/* 13 */ { 3, s_5_13, 10, 1, 0},
+/* 14 */ { 3, s_5_14, 10, 1, 0},
+/* 15 */ { 4, s_5_15, 14, 1, 0},
+/* 16 */ { 1, s_5_16, -1, 1, 0},
+/* 17 */ { 2, s_5_17, 16, 1, 0},
+/* 18 */ { 3, s_5_18, 17, 1, 0},
+/* 19 */ { 2, s_5_19, 16, 1, 0},
+/* 20 */ { 2, s_5_20, 16, 1, 0},
+/* 21 */ { 2, s_5_21, -1, 1, 0},
+/* 22 */ { 2, s_5_22, -1, 1, 0},
+/* 23 */ { 3, s_5_23, 22, 1, 0},
+/* 24 */ { 2, s_5_24, -1, 1, 0},
+/* 25 */ { 2, s_5_25, -1, 1, 0},
+/* 26 */ { 3, s_5_26, 25, 1, 0},
+/* 27 */ { 1, s_5_27, -1, 1, 0},
+/* 28 */ { 1, s_5_28, -1, 1, 0},
+/* 29 */ { 2, s_5_29, 28, 1, 0},
+/* 30 */ { 2, s_5_30, 28, 1, 0},
+/* 31 */ { 1, s_5_31, -1, 1, 0},
+/* 32 */ { 2, s_5_32, -1, 1, 0},
+/* 33 */ { 2, s_5_33, -1, 1, 0},
+/* 34 */ { 1, s_5_34, -1, 1, 0},
+/* 35 */ { 1, s_5_35, -1, 1, 0}
+};
+
+static symbol s_6_0[3] = { 207, 211, 212 };
+static symbol s_6_1[4] = { 207, 211, 212, 216 };
+
+static struct among a_6[2] =
+{
+/*  0 */ { 3, s_6_0, -1, 1, 0},
+/*  1 */ { 4, s_6_1, -1, 1, 0}
+};
+
+static symbol s_7_0[4] = { 197, 202, 219, 197 };
+static symbol s_7_1[1] = { 206 };
+static symbol s_7_2[1] = { 216 };
+static symbol s_7_3[3] = { 197, 202, 219 };
+
+static struct among a_7[4] =
+{
+/*  0 */ { 4, s_7_0, -1, 1, 0},
+/*  1 */ { 1, s_7_1, -1, 2, 0},
+/*  2 */ { 1, s_7_2, -1, 3, 0},
+/*  3 */ { 3, s_7_3, -1, 1, 0}
+};
+
+static unsigned char g_v[] = { 35, 130, 34, 18 };
+
+static symbol s_0[] = { 193 };
+static symbol s_1[] = { 209 };
+static symbol s_2[] = { 193 };
+static symbol s_3[] = { 209 };
+static symbol s_4[] = { 193 };
+static symbol s_5[] = { 209 };
+static symbol s_6[] = { 206 };
+static symbol s_7[] = { 206 };
+static symbol s_8[] = { 206 };
+static symbol s_9[] = { 201 };
+
+static int r_mark_regions(struct SN_env * z) {
+    z->I[0] = z->l;
+    z->I[1] = z->l;
+    {   int c = z->c; /* do, line 100 */
+        while(1) { /* gopast, line 101 */
+            if (!(in_grouping(z, g_v, 192, 220))) goto lab1;
+            break;
+        lab1:
+            if (z->c >= z->l) goto lab0;
+            z->c++;
+        }
+        z->I[0] = z->c; /* setmark pV, line 101 */
+        while(1) { /* gopast, line 101 */
+            if (!(out_grouping(z, g_v, 192, 220))) goto lab2;
+            break;
+        lab2:
+            if (z->c >= z->l) goto lab0;
+            z->c++;
+        }
+        while(1) { /* gopast, line 102 */
+            if (!(in_grouping(z, g_v, 192, 220))) goto lab3;
+            break;
+        lab3:
+            if (z->c >= z->l) goto lab0;
+            z->c++;
+        }
+        while(1) { /* gopast, line 102 */
+            if (!(out_grouping(z, g_v, 192, 220))) goto lab4;
+            break;
+        lab4:
+            if (z->c >= z->l) goto lab0;
+            z->c++;
+        }
+        z->I[1] = z->c; /* setmark p2, line 102 */
+    lab0:
+        z->c = c;
+    }
+    return 1;
+}
+
+static int r_R2(struct SN_env * z) {
+    if (!(z->I[1] <= z->c)) return 0;
+    return 1;
+}
+
+static int r_perfective_gerund(struct SN_env * z) {
+    int among_var;
+    z->ket = z->c; /* [, line 111 */
+    among_var = find_among_b(z, a_0, 9); /* substring, line 111 */
+    if (!(among_var)) return 0;
+    z->bra = z->c; /* ], line 111 */
+    switch(among_var) {
+        case 0: return 0;
+        case 1:
+            {   int m = z->l - z->c; /* or, line 115 */
+                if (!(eq_s_b(z, 1, s_0))) goto lab1;
+                goto lab0;
+            lab1:
+                z->c = z->l - m;
+                if (!(eq_s_b(z, 1, s_1))) return 0;
+            }
+        lab0:
+            slice_del(z); /* delete, line 115 */
+            break;
+        case 2:
+            slice_del(z); /* delete, line 122 */
+            break;
+    }
+    return 1;
+}
+
+static int r_adjective(struct SN_env * z) {
+    int among_var;
+    z->ket = z->c; /* [, line 127 */
+    among_var = find_among_b(z, a_1, 26); /* substring, line 127 */
+    if (!(among_var)) return 0;
+    z->bra = z->c; /* ], line 127 */
+    switch(among_var) {
+        case 0: return 0;
+        case 1:
+            slice_del(z); /* delete, line 136 */
+            break;
+    }
+    return 1;
+}
+
+static int r_adjectival(struct SN_env * z) {
+    int among_var;
+    if (!r_adjective(z)) return 0; /* call adjective, line 141 */
+    {   int m = z->l - z->c; /* try, line 148 */
+        z->ket = z->c; /* [, line 149 */
+        among_var = find_among_b(z, a_2, 8); /* substring, line 149 */
+        if (!(among_var)) { z->c = z->l - m; goto lab0; }
+        z->bra = z->c; /* ], line 149 */
+        switch(among_var) {
+            case 0: { z->c = z->l - m; goto lab0; }
+            case 1:
+                {   int m = z->l - z->c; /* or, line 154 */
+                    if (!(eq_s_b(z, 1, s_2))) goto lab2;
+                    goto lab1;
+                lab2:
+                    z->c = z->l - m;
+                    if (!(eq_s_b(z, 1, s_3))) { z->c = z->l - m; goto lab0; }
+                }
+            lab1:
+                slice_del(z); /* delete, line 154 */
+                break;
+            case 2:
+                slice_del(z); /* delete, line 161 */
+                break;
+        }
+    lab0:
+        ;
+    }
+    return 1;
+}
+
+static int r_reflexive(struct SN_env * z) {
+    int among_var;
+    z->ket = z->c; /* [, line 168 */
+    among_var = find_among_b(z, a_3, 2); /* substring, line 168 */
+    if (!(among_var)) return 0;
+    z->bra = z->c; /* ], line 168 */
+    switch(among_var) {
+        case 0: return 0;
+        case 1:
+            slice_del(z); /* delete, line 171 */
+            break;
+    }
+    return 1;
+}
+
+static int r_verb(struct SN_env * z) {
+    int among_var;
+    z->ket = z->c; /* [, line 176 */
+    among_var = find_among_b(z, a_4, 46); /* substring, line 176 */
+    if (!(among_var)) return 0;
+    z->bra = z->c; /* ], line 176 */
+    switch(among_var) {
+        case 0: return 0;
+        case 1:
+            {   int m = z->l - z->c; /* or, line 182 */
+                if (!(eq_s_b(z, 1, s_4))) goto lab1;
+                goto lab0;
+            lab1:
+                z->c = z->l - m;
+                if (!(eq_s_b(z, 1, s_5))) return 0;
+            }
+        lab0:
+            slice_del(z); /* delete, line 182 */
+            break;
+        case 2:
+            slice_del(z); /* delete, line 190 */
+            break;
+    }
+    return 1;
+}
+
+static int r_noun(struct SN_env * z) {
+    int among_var;
+    z->ket = z->c; /* [, line 199 */
+    among_var = find_among_b(z, a_5, 36); /* substring, line 199 */
+    if (!(among_var)) return 0;
+    z->bra = z->c; /* ], line 199 */
+    switch(among_var) {
+        case 0: return 0;
+        case 1:
+            slice_del(z); /* delete, line 206 */
+            break;
+    }
+    return 1;
+}
+
+static int r_derivational(struct SN_env * z) {
+    int among_var;
+    z->ket = z->c; /* [, line 215 */
+    among_var = find_among_b(z, a_6, 2); /* substring, line 215 */
+    if (!(among_var)) return 0;
+    z->bra = z->c; /* ], line 215 */
+    if (!r_R2(z)) return 0; /* call R2, line 215 */
+    switch(among_var) {
+        case 0: return 0;
+        case 1:
+            slice_del(z); /* delete, line 218 */
+            break;
+    }
+    return 1;
+}
+
+static int r_tidy_up(struct SN_env * z) {
+    int among_var;
+    z->ket = z->c; /* [, line 223 */
+    among_var = find_among_b(z, a_7, 4); /* substring, line 223 */
+    if (!(among_var)) return 0;
+    z->bra = z->c; /* ], line 223 */
+    switch(among_var) {
+        case 0: return 0;
+        case 1:
+            slice_del(z); /* delete, line 227 */
+            z->ket = z->c; /* [, line 228 */
+            if (!(eq_s_b(z, 1, s_6))) return 0;
+            z->bra = z->c; /* ], line 228 */
+            if (!(eq_s_b(z, 1, s_7))) return 0;
+            slice_del(z); /* delete, line 228 */
+            break;
+        case 2:
+            if (!(eq_s_b(z, 1, s_8))) return 0;
+            slice_del(z); /* delete, line 231 */
+            break;
+        case 3:
+            slice_del(z); /* delete, line 233 */
+            break;
+    }
+    return 1;
+}
+
+extern int russian_stem(struct SN_env * z) {
+    {   int c = z->c; /* do, line 240 */
+        if (!r_mark_regions(z)) goto lab0; /* call mark_regions, line 240 */
+    lab0:
+        z->c = c;
+    }
+    z->lb = z->c; z->c = z->l; /* backwards, line 241 */
+
+    {   int m = z->l - z->c; /* setlimit, line 241 */
+        int m3;
+        if (z->c < z->I[0]) return 0;
+        z->c = z->I[0]; /* tomark, line 241 */
+        m3 = z->lb; z->lb = z->c;
+        z->c = z->l - m;
+        {   int m = z->l - z->c; /* do, line 242 */
+            {   int m = z->l - z->c; /* or, line 243 */
+                if (!r_perfective_gerund(z)) goto lab3; /* call perfective_gerund, line 243 */
+                goto lab2;
+            lab3:
+                z->c = z->l - m;
+                {   int m = z->l - z->c; /* try, line 244 */
+                    if (!r_reflexive(z)) { z->c = z->l - m; goto lab4; } /* call reflexive, line 244 */
+                lab4:
+                    ;
+                }
+                {   int m = z->l - z->c; /* or, line 245 */
+                    if (!r_adjectival(z)) goto lab6; /* call adjectival, line 245 */
+                    goto lab5;
+                lab6:
+                    z->c = z->l - m;
+                    if (!r_verb(z)) goto lab7; /* call verb, line 245 */
+                    goto lab5;
+                lab7:
+                    z->c = z->l - m;
+                    if (!r_noun(z)) goto lab1; /* call noun, line 245 */
+                }
+            lab5:
+                ;
+            }
+        lab2:
+        lab1:
+            z->c = z->l - m;
+        }
+        {   int m = z->l - z->c; /* try, line 248 */
+            z->ket = z->c; /* [, line 248 */
+            if (!(eq_s_b(z, 1, s_9))) { z->c = z->l - m; goto lab8; }
+            z->bra = z->c; /* ], line 248 */
+            slice_del(z); /* delete, line 248 */
+        lab8:
+            ;
+        }
+        {   int m = z->l - z->c; /* do, line 251 */
+            if (!r_derivational(z)) goto lab9; /* call derivational, line 251 */
+        lab9:
+            z->c = z->l - m;
+        }
+        {   int m = z->l - z->c; /* do, line 252 */
+            if (!r_tidy_up(z)) goto lab10; /* call tidy_up, line 252 */
+        lab10:
+            z->c = z->l - m;
+        }
+        z->lb = m3;
+    }
+    z->c = z->lb;
+    return 1;
+}
+
+extern struct SN_env * russian_create_env(void) { return SN_create_env(0, 2, 0); }
+
+extern void russian_close_env(struct SN_env * z) { SN_close_env(z); }
+


diff --git a/contrib/tsearch2/snowball/russian_stem.h b/contrib/tsearch2/snowball/russian_stem.h

new file mode 100644 (file)

index 0000000..7dc26d4


--- /dev/null
+++ b/contrib/tsearch2/snowball/russian_stem.h
@@ -0,0 +1,8 @@
+
+/* This file was generated automatically by the Snowball to ANSI C compiler */
+
+extern struct SN_env * russian_create_env(void);
+extern void russian_close_env(struct SN_env * z);
+
+extern int russian_stem(struct SN_env * z);
+


diff --git a/contrib/tsearch2/snowball/utilities.c b/contrib/tsearch2/snowball/utilities.c

new file mode 100644 (file)

index 0000000..5dc7524


--- /dev/null
+++ b/contrib/tsearch2/snowball/utilities.c
@@ -0,0 +1,328 @@
+
+#include 
+#include 
+#include 
+
+#include "header.h"
+
+#define unless(C) if(!(C))
+
+#define CREATE_SIZE 1
+
+extern symbol * create_s(void)
+{   symbol * p = (symbol *) (HEAD + (char *) malloc(HEAD + (CREATE_SIZE + 1) * sizeof(symbol)));
+    CAPACITY(p) = CREATE_SIZE;
+    SET_SIZE(p, CREATE_SIZE);
+    return p;
+}
+
+extern void lose_s(symbol * p) { free((char *) p - HEAD); }
+
+extern int in_grouping(struct SN_env * z, unsigned char * s, int min, int max)
+{   if (z->c >= z->l) return 0;
+    {   int ch = z->p[z->c];
+        if
+        (ch > max || (ch -= min) < 0 ||
+         (s[ch >> 3] & (0X1 << (ch & 0X7))) == 0) return 0;
+    }
+    z->c++; return 1;
+}
+
+extern int in_grouping_b(struct SN_env * z, unsigned char * s, int min, int max)
+{   if (z->c <= z->lb) return 0;
+    {   int ch = z->p[z->c - 1];
+        if
+        (ch > max || (ch -= min) < 0 ||
+         (s[ch >> 3] & (0X1 << (ch & 0X7))) == 0) return 0;
+    }
+    z->c--; return 1;
+}
+
+extern int out_grouping(struct SN_env * z, unsigned char * s, int min, int max)
+{   if (z->c >= z->l) return 0;
+    {   int ch = z->p[z->c];
+        unless
+        (ch > max || (ch -= min) < 0 ||
+         (s[ch >> 3] & (0X1 << (ch & 0X7))) == 0) return 0;
+    }
+    z->c++; return 1;
+}
+
+extern int out_grouping_b(struct SN_env * z, unsigned char * s, int min, int max)
+{   if (z->c <= z->lb) return 0;
+    {   int ch = z->p[z->c - 1];
+        unless
+        (ch > max || (ch -= min) < 0 ||
+         (s[ch >> 3] & (0X1 << (ch & 0X7))) == 0) return 0;
+    }
+    z->c--; return 1;
+}
+
+
+extern int in_range(struct SN_env * z, int min, int max)
+{   if (z->c >= z->l) return 0;
+    {   int ch = z->p[z->c];
+        if
+        (ch > max || ch < min) return 0;
+    }
+    z->c++; return 1;
+}
+
+extern int in_range_b(struct SN_env * z, int min, int max)
+{   if (z->c <= z->lb) return 0;
+    {   int ch = z->p[z->c - 1];
+        if
+        (ch > max || ch < min) return 0;
+    }
+    z->c--; return 1;
+}
+
+extern int out_range(struct SN_env * z, int min, int max)
+{   if (z->c >= z->l) return 0;
+    {   int ch = z->p[z->c];
+        unless
+        (ch > max || ch < min) return 0;
+    }
+    z->c++; return 1;
+}
+
+extern int out_range_b(struct SN_env * z, int min, int max)
+{   if (z->c <= z->lb) return 0;
+    {   int ch = z->p[z->c - 1];
+        unless
+        (ch > max || ch < min) return 0;
+    }
+    z->c--; return 1;
+}
+
+extern int eq_s(struct SN_env * z, int s_size, symbol * s)
+{   if (z->l - z->c < s_size ||
+        memcmp(z->p + z->c, s, s_size * sizeof(symbol)) != 0) return 0;
+    z->c += s_size; return 1;
+}
+
+extern int eq_s_b(struct SN_env * z, int s_size, symbol * s)
+{   if (z->c - z->lb < s_size ||
+        memcmp(z->p + z->c - s_size, s, s_size * sizeof(symbol)) != 0) return 0;
+    z->c -= s_size; return 1;
+}
+
+extern int eq_v(struct SN_env * z, symbol * p)
+{   return eq_s(z, SIZE(p), p);
+}
+
+extern int eq_v_b(struct SN_env * z, symbol * p)
+{   return eq_s_b(z, SIZE(p), p);
+}
+
+extern int find_among(struct SN_env * z, struct among * v, int v_size)
+{
+    int i = 0;
+    int j = v_size;
+
+    int c = z->c; int l = z->l;
+    symbol * q = z->p + c;
+
+    struct among * w;
+
+    int common_i = 0;
+    int common_j = 0;
+
+    int first_key_inspected = 0;
+
+    while(1)
+    {   int k = i + ((j - i) >> 1);
+        int diff = 0;
+        int common = common_i < common_j ? common_i : common_j; /* smaller */
+        w = v + k;
+        {   int i; for (i = common; i < w->s_size; i++)
+            {   if (c + common == l) { diff = -1; break; }
+                diff = q[common] - w->s[i];
+                if (diff != 0) break;
+                common++;
+            }
+        }
+        if (diff < 0) { j = k; common_j = common; }
+                 else { i = k; common_i = common; }
+        if (j - i <= 1)
+        {   if (i > 0) break; /* v->s has been inspected */
+            if (j == i) break; /* only one item in v */
+
+            /* - but now we need to go round once more to get
+               v->s inspected. This looks messy, but is actually
+               the optimal approach.  */
+
+            if (first_key_inspected) break;
+            first_key_inspected = 1;
+        }
+    }
+    while(1)
+    {   w = v + i;
+        if (common_i >= w->s_size)
+        {   z->c = c + w->s_size;
+            if (w->function == 0) return w->result;
+            {   int res = w->function(z);
+                z->c = c + w->s_size;
+                if (res) return w->result;
+            }
+        }
+        i = w->substring_i;
+        if (i < 0) return 0;
+    }
+}
+
+/* find_among_b is for backwards processing. Same comments apply */
+
+extern int find_among_b(struct SN_env * z, struct among * v, int v_size)
+{
+    int i = 0;
+    int j = v_size;
+
+    int c = z->c; int lb = z->lb;
+    symbol * q = z->p + c - 1;
+
+    struct among * w;
+
+    int common_i = 0;
+    int common_j = 0;
+
+    int first_key_inspected = 0;
+
+    while(1)
+    {   int k = i + ((j - i) >> 1);
+        int diff = 0;
+        int common = common_i < common_j ? common_i : common_j;
+        w = v + k;
+        {   int i; for (i = w->s_size - 1 - common; i >= 0; i--)
+            {   if (c - common == lb) { diff = -1; break; }
+                diff = q[- common] - w->s[i];
+                if (diff != 0) break;
+                common++;
+            }
+        }
+        if (diff < 0) { j = k; common_j = common; }
+                 else { i = k; common_i = common; }
+        if (j - i <= 1)
+        {   if (i > 0) break;
+            if (j == i) break;
+            if (first_key_inspected) break;
+            first_key_inspected = 1;
+        }
+    }
+    while(1)
+    {   w = v + i;
+        if (common_i >= w->s_size)
+        {   z->c = c - w->s_size;
+            if (w->function == 0) return w->result;
+            {   int res = w->function(z);
+                z->c = c - w->s_size;
+                if (res) return w->result;
+            }
+        }
+        i = w->substring_i;
+        if (i < 0) return 0;
+    }
+}
+
+
+extern symbol * increase_size(symbol * p, int n)
+{   int new_size = n + 20;
+    symbol * q = (symbol *) (HEAD + (char *) malloc(HEAD + (new_size + 1) * sizeof(symbol)));
+    CAPACITY(q) = new_size;
+    memmove(q, p, CAPACITY(p) * sizeof(symbol)); lose_s(p); return q;
+}
+
+/* to replace symbols between c_bra and c_ket in z->p by the
+   s_size symbols at s
+*/
+
+extern int replace_s(struct SN_env * z, int c_bra, int c_ket, int s_size, const symbol * s)
+{   int adjustment = s_size - (c_ket - c_bra);
+    int len = SIZE(z->p);
+    if (adjustment != 0)
+    {   if (adjustment + len > CAPACITY(z->p)) z->p = increase_size(z->p, adjustment + len);
+        memmove(z->p + c_ket + adjustment, z->p + c_ket, (len - c_ket) * sizeof(symbol));
+        SET_SIZE(z->p, adjustment + len);
+        z->l += adjustment;
+        if (z->c >= c_ket) z->c += adjustment; else
+            if (z->c > c_bra) z->c = c_bra;
+    }
+    unless (s_size == 0) memmove(z->p + c_bra, s, s_size * sizeof(symbol));
+    return adjustment;
+}
+
+static void slice_check(struct SN_env * z)
+{
+    if (!(0 <= z->bra &&
+          z->bra <= z->ket &&
+          z->ket <= z->l &&
+          z->l <= SIZE(z->p)))   /* this line could be removed */
+    {
+        fprintf(stderr, "faulty slice operation:\n");
+        debug(z, -1, 0);
+        exit(1);
+    }
+}
+
+extern void slice_from_s(struct SN_env * z, int s_size, symbol * s)
+{   slice_check(z);
+    replace_s(z, z->bra, z->ket, s_size, s);
+}
+
+extern void slice_from_v(struct SN_env * z, symbol * p)
+{   slice_from_s(z, SIZE(p), p);
+}
+
+extern void slice_del(struct SN_env * z)
+{   slice_from_s(z, 0, 0);
+}
+
+extern void insert_s(struct SN_env * z, int bra, int ket, int s_size, symbol * s)
+{   int adjustment = replace_s(z, bra, ket, s_size, s);
+    if (bra <= z->bra) z->bra += adjustment;
+    if (bra <= z->ket) z->ket += adjustment;
+}
+
+extern void insert_v(struct SN_env * z, int bra, int ket, symbol * p)
+{   int adjustment = replace_s(z, bra, ket, SIZE(p), p);
+    if (bra <= z->bra) z->bra += adjustment;
+    if (bra <= z->ket) z->ket += adjustment;
+}
+
+extern symbol * slice_to(struct SN_env * z, symbol * p)
+{   slice_check(z);
+    {   int len = z->ket - z->bra;
+        if (CAPACITY(p) < len) p = increase_size(p, len);
+        memmove(p, z->p + z->bra, len * sizeof(symbol));
+        SET_SIZE(p, len);
+    }
+    return p;
+}
+
+extern symbol * assign_to(struct SN_env * z, symbol * p)
+{   int len = z->l;
+    if (CAPACITY(p) < len) p = increase_size(p, len);
+    memmove(p, z->p, len * sizeof(symbol));
+    SET_SIZE(p, len);
+    return p;
+}
+
+extern void debug(struct SN_env * z, int number, int line_count)
+{   int i;
+    int limit = SIZE(z->p);
+    /*if (number >= 0) printf("%3d (line %4d): '", number, line_count);*/
+    if (number >= 0) printf("%3d (line %4d): [%d]'", number, line_count,limit);
+    for (i = 0; i <= limit; i++)
+    {   if (z->lb == i) printf("{");
+        if (z->bra == i) printf("[");
+        if (z->c == i) printf("|");
+        if (z->ket == i) printf("]");
+        if (z->l == i) printf("}");
+        if (i < limit)
+        {   int ch = z->p[i];
+            if (ch == 0) ch = '#';
+            printf("%c", ch);
+        }
+    }
+    printf("'\n");
+}


diff --git a/contrib/tsearch2/sql/tsearch2.sql b/contrib/tsearch2/sql/tsearch2.sql

new file mode 100644 (file)

index 0000000..6ca6480


--- /dev/null
+++ b/contrib/tsearch2/sql/tsearch2.sql
@@ -0,0 +1,243 @@
+--
+-- first, define the datatype.  Turn off echoing so that expected file
+-- does not depend on contents of seg.sql.
+--
+\set ECHO none
+\i tsearch2.sql
+\set ECHO all
+
+--tsvector
+SELECT '1'::tsvector;
+SELECT '1 '::tsvector;
+SELECT ' 1'::tsvector;
+SELECT ' 1 '::tsvector;
+SELECT '1 2'::tsvector;
+SELECT '\'1 2\''::tsvector;
+SELECT '\'1 \\\'2\''::tsvector;
+SELECT '\'1 \\\'2\'3'::tsvector;
+SELECT '\'1 \\\'2\' 3'::tsvector;
+SELECT '\'1 \\\'2\' \' 3\' 4 '::tsvector;
+select '\'w\':4A,3B,2C,1D,5 a:8';
+select 'a:3A b:2a'::tsvector || 'ba:1234 a:1B';
+select setweight('w:12B w:13* w:12,5,6 a:1,3* a:3 w asd:1dc asd zxc:81,567,222A'::tsvector, 'c');
+select strip('w:12B w:13* w:12,5,6 a:1,3* a:3 w asd:1dc asd'::tsvector);
+
+
+--tsquery
+SELECT '1'::tsquery;
+SELECT '1 '::tsquery;
+SELECT ' 1'::tsquery;
+SELECT ' 1 '::tsquery;
+SELECT '\'1 2\''::tsquery;
+SELECT '\'1 \\\'2\''::tsquery;
+SELECT '!1'::tsquery;
+SELECT '1|2'::tsquery;
+SELECT '1|!2'::tsquery;
+SELECT '!1|2'::tsquery;
+SELECT '!1|!2'::tsquery;
+SELECT '!(!1|!2)'::tsquery;
+SELECT '!(!1|2)'::tsquery;
+SELECT '!(1|!2)'::tsquery;
+SELECT '!(1|2)'::tsquery;
+SELECT '1&2'::tsquery;
+SELECT '!1&2'::tsquery;
+SELECT '1&!2'::tsquery;
+SELECT '!1&!2'::tsquery;
+SELECT '(1&2)'::tsquery;
+SELECT '1&(2)'::tsquery;
+SELECT '!(1)&2'::tsquery;
+SELECT '!(1&2)'::tsquery;
+SELECT '1|2&3'::tsquery;
+SELECT '1|(2&3)'::tsquery;
+SELECT '(1|2)&3'::tsquery;
+SELECT '1|2&!3'::tsquery;
+SELECT '1|!2&3'::tsquery;
+SELECT '!1|2&3'::tsquery;
+SELECT '!1|(2&3)'::tsquery;
+SELECT '!(1|2)&3'::tsquery;
+SELECT '(!1|2)&3'::tsquery;
+SELECT '1|(2|(4|(5|6)))'::tsquery;
+SELECT '1|2|4|5|6'::tsquery;
+SELECT '1&(2&(4&(5&6)))'::tsquery;
+SELECT '1&2&4&5&6'::tsquery;
+SELECT '1&(2&(4&(5|6)))'::tsquery;
+SELECT '1&(2&(4&(5|!6)))'::tsquery;
+SELECT '1&(\'2\'&(\' 4\'&(\\|5 | \'6 \\\' !|&\')))'::tsquery;
+SELECT '\'the wether\':dc & \' sKies \':BC & a:d b:a';
+
+select lexize('simple', 'ASD56 hsdkf');
+select lexize('en_stem', 'SKIES Problems identity');
+
+select * from token_type('default');
+select * from parse('default', '345 [email protected] \' http://www.com/ http://aew.werc.ewr/?ad=qwe&dw 1aew.werc.ewr/?ad=qwe&dw 2aew.werc.ewr http://3aew.werc.ewr/?ad=qwe&dw http://4aew.werc.ewr http://5aew.werc.ewr:8100/?  ad=qwe&dw 6aew.werc.ewr:8100/?ad=qwe&dw 7aew.werc.ewr:8100/?ad=qwe&dw=%20%32 +4.0e-10 qwe qwe qwqwe 234.435 455 5.005 [email protected] qwe-wer asdf qwer jf sdjk ewr1> ewri2 ">
+/usr/local/fff /awdf/dwqe/4325 rewt/ewr wefjn /wqe-324/ewr gist.h gist.h.c gist.c. readline 4.2 4.2. 4.2, readline-4.2 readline-4.2. 234 
+ wow  < jqw <> qwerty');
+
+SELECT to_tsvector('default', '345 [email protected] \' http://www.com/ http://aew.werc.ewr/?ad=qwe&dw 1aew.werc.ewr/?ad=qwe&dw 2aew.werc.ewr http://3aew.werc.ewr/?ad=qwe&dw http://4aew.werc.ewr http://5aew.werc.ewr:8100/?  ad=qwe&dw 6aew.werc.ewr:8100/?ad=qwe&dw 7aew.werc.ewr:8100/?ad=qwe&dw=%20%32 +4.0e-10 qwe qwe qwqwe 234.435 455 5.005 [email protected] qwe-wer asdf qwer jf sdjk ewr1> ewri2 ">
+/usr/local/fff /awdf/dwqe/4325 rewt/ewr wefjn /wqe-324/ewr gist.h gist.h.c gist.c. readline 4.2 4.2. 4.2, readline-4.2 readline-4.2. 234 
+ wow  < jqw <> qwerty');
+
+SELECT length(to_tsvector('default', '345 qw'));
+
+SELECT length(to_tsvector('default', '345 [email protected] \' http://www.com/ http://aew.werc.ewr/?ad=qwe&dw 1aew.werc.ewr/?ad=qwe&dw 2aew.werc.ewr http://3aew.werc.ewr/?ad=qwe&dw http://4aew.werc.ewr http://5aew.werc.ewr:8100/?  ad=qwe&dw 6aew.werc.ewr:8100/?ad=qwe&dw 7aew.werc.ewr:8100/?ad=qwe&dw=%20%32 +4.0e-10 qwe qwe qwqwe 234.435 455 5.005 [email protected] qwe-wer asdf qwer jf sdjk ewr1> ewri2 ">
+/usr/local/fff /awdf/dwqe/4325 rewt/ewr wefjn /wqe-324/ewr gist.h gist.h.c gist.c. readline 4.2 4.2. 4.2, readline-4.2 readline-4.2. 234 
+ wow  < jqw <> qwerty'));
+
+
+select to_tsquery('default', 'qwe & sKies '); 
+select to_tsquery('simple', 'qwe & sKies '); 
+select to_tsquery('default', '\'the wether\':dc & \'           sKies \':BC ');
+select 'a b:89  ca:23A,64b d:34c'::tsvector @@ 'd:AC & ca';
+select 'a b:89  ca:23A,64b d:34c'::tsvector @@ 'd:AC & ca:B';
+select 'a b:89  ca:23A,64b d:34c'::tsvector @@ 'd:AC & ca:A';
+select 'a b:89  ca:23A,64b d:34c'::tsvector @@ 'd:AC & ca:C';
+select 'a b:89  ca:23A,64b d:34c'::tsvector @@ 'd:AC & ca:CB';
+
+CREATE TABLE test_tsvector( t text, a tsvector );
+
+\copy test_tsvector from 'data/test_tsearch.data'
+
+SELECT count(*) FROM test_tsvector WHERE a @@ 'wr|qh';
+SELECT count(*) FROM test_tsvector WHERE a @@ 'wr&qh';
+SELECT count(*) FROM test_tsvector WHERE a @@ 'eq&yt';
+SELECT count(*) FROM test_tsvector WHERE a @@ 'eq|yt';
+SELECT count(*) FROM test_tsvector WHERE a @@ '(eq&yt)|(wr&qh)';
+SELECT count(*) FROM test_tsvector WHERE a @@ '(eq|yt)&(wr|qh)';
+
+create index wowidx on test_tsvector using gist (a);
+set enable_seqscan=off;
+
+SELECT count(*) FROM test_tsvector WHERE a @@ 'wr|qh';
+SELECT count(*) FROM test_tsvector WHERE a @@ 'wr&qh';
+SELECT count(*) FROM test_tsvector WHERE a @@ 'eq&yt';
+SELECT count(*) FROM test_tsvector WHERE a @@ 'eq|yt';
+SELECT count(*) FROM test_tsvector WHERE a @@ '(eq&yt)|(wr&qh)';
+SELECT count(*) FROM test_tsvector WHERE a @@ '(eq|yt)&(wr|qh)';
+
+select set_curcfg('default');
+
+CREATE TRIGGER tsvectorupdate
+BEFORE UPDATE OR INSERT ON test_tsvector
+FOR EACH ROW EXECUTE PROCEDURE tsearch2(a, t);
+
+SELECT count(*) FROM test_tsvector WHERE a @@ to_tsquery('345&qwerty');
+
+INSERT INTO test_tsvector (t) VALUES ('345 qwerty');
+
+SELECT count(*) FROM test_tsvector WHERE a @@ to_tsquery('345&qwerty');
+
+UPDATE test_tsvector SET t = null WHERE t = '345 qwerty';
+
+SELECT count(*) FROM test_tsvector WHERE a @@ to_tsquery('345&qwerty');
+
+drop trigger tsvectorupdate on test_tsvector;
+create function wow(text) returns text as 'select $1 || \' copyright\'; ' language sql;
+create trigger tsvectorupdate before update or insert on test_tsvector
+for each row execute procedure tsearch2(a, wow, t);
+insert into test_tsvector (t) values ('345 qwerty');
+select count(*) FROM test_tsvector WHERE a @@ to_tsquery('345&qwerty');
+select count(*) FROM test_tsvector WHERE a @@ to_tsquery('copyright');
+
+select rank(' a:1 s:2C d g'::tsvector, 'a | s');
+select rank(' a:1 s:2B d g'::tsvector, 'a | s');
+select rank(' a:1 s:2 d g'::tsvector, 'a | s');
+select rank(' a:1 s:2C d g'::tsvector, 'a & s');
+select rank(' a:1 s:2B d g'::tsvector, 'a & s');
+select rank(' a:1 s:2 d g'::tsvector, 'a & s');
+
+insert into test_tsvector (t) values ('foo bar foo the over foo qq bar');
+select * from stat('select a from test_tsvector') order by ndoc desc, nentry desc, word;
+
+select reset_tsearch();
+select to_tsquery('default', 'skies & books');
+
+select rank_cd(to_tsvector('Erosion It took the sea a thousand years,
+A thousand years to trace
+The granite features of this cliff
+In crag and scarp and base.
+It took the sea an hour one night
+An hour of storm to place
+The sculpture of these granite seams,
+Upon a woman s face. E.  J.  Pratt  (1882 1964)
+'), to_tsquery('sea&thousand&years'));
+
+select rank_cd(to_tsvector('Erosion It took the sea a thousand years,
+A thousand years to trace
+The granite features of this cliff
+In crag and scarp and base.
+It took the sea an hour one night
+An hour of storm to place
+The sculpture of these granite seams,
+Upon a woman s face. E.  J.  Pratt  (1882 1964)
+'), to_tsquery('granite&sea'));
+
+select rank_cd(to_tsvector('Erosion It took the sea a thousand years,
+A thousand years to trace
+The granite features of this cliff
+In crag and scarp and base.
+It took the sea an hour one night
+An hour of storm to place
+The sculpture of these granite seams,
+Upon a woman s face. E.  J.  Pratt  (1882 1964)
+'), to_tsquery('sea'));
+
+select get_covers(to_tsvector('Erosion It took the sea a thousand years,
+A thousand years to trace
+The granite features of this cliff
+In crag and scarp and base.
+It took the sea an hour one night
+An hour of storm to place
+The sculpture of these granite seams,
+Upon a woman s face. E.  J.  Pratt  (1882 1964)
+'), to_tsquery('sea&thousand&years'));
+
+select get_covers(to_tsvector('Erosion It took the sea a thousand years,
+A thousand years to trace
+The granite features of this cliff
+In crag and scarp and base.
+It took the sea an hour one night
+An hour of storm to place
+The sculpture of these granite seams,
+Upon a woman s face. E.  J.  Pratt  (1882 1964)
+'), to_tsquery('granite&sea'));
+
+select get_covers(to_tsvector('Erosion It took the sea a thousand years,
+A thousand years to trace
+The granite features of this cliff
+In crag and scarp and base.
+It took the sea an hour one night
+An hour of storm to place
+The sculpture of these granite seams,
+Upon a woman s face. E.  J.  Pratt  (1882 1964)
+'), to_tsquery('sea'));
+
+select headline('Erosion It took the sea a thousand years,
+A thousand years to trace
+The granite features of this cliff
+In crag and scarp and base.
+It took the sea an hour one night
+An hour of storm to place
+The sculpture of these granite seams,
+Upon a woman s face. E.  J.  Pratt  (1882 1964)
+', to_tsquery('sea&thousand&years'));
+ 
+select headline('Erosion It took the sea a thousand years,
+A thousand years to trace
+The granite features of this cliff
+In crag and scarp and base.
+It took the sea an hour one night
+An hour of storm to place
+The sculpture of these granite seams,
+Upon a woman s face. E.  J.  Pratt  (1882 1964)
+', to_tsquery('granite&sea'));
+ 
+select headline('Erosion It took the sea a thousand years,
+A thousand years to trace
+The granite features of this cliff
+In crag and scarp and base.
+It took the sea an hour one night
+An hour of storm to place
+The sculpture of these granite seams,
+Upon a woman s face. E.  J.  Pratt  (1882 1964)
+', to_tsquery('sea'));
+


diff --git a/contrib/tsearch2/stopword.c b/contrib/tsearch2/stopword.c

new file mode 100644 (file)

index 0000000..7f7806f


--- /dev/null
+++ b/contrib/tsearch2/stopword.c
@@ -0,0 +1,101 @@
+/* 
+ * stopword library
+ * Teodor Sigaev 
+ */
+#include 
+#include 
+#include 
+#include 
+
+#include "postgres.h"
+#include "common.h"
+#include "dict.h"
+
+#define STOPBUFLEN 4096
+
+char*
+lowerstr(char *str) {
+   char *ptr=str;
+   while(*ptr) {
+       *ptr = tolower(*(unsigned char*)ptr);
+       ptr++;
+   }
+   return str;
+}
+
+void
+freestoplist(StopList *s) {
+   char **ptr=s->stop;
+   if ( ptr )
+       while( *ptr && s->len >0 ) {
+           free(*ptr);
+           ptr++; s->len--;
+       free(s->stop);
+   }
+   memset(s,0,sizeof(StopList));
+}
+
+void
+readstoplist(text *in, StopList *s) {
+   char **stop=NULL;
+   s->len=0;
+   if ( in && VARSIZE(in) - VARHDRSZ > 0 ) {
+       char *filename=text2char(in);
+       FILE    *hin=NULL;
+       char    buf[STOPBUFLEN];
+       int reallen=0;
+
+       if ( (hin=fopen(filename,"r")) == NULL )
+           elog(ERROR,"Can't open file '%s': %s", filename, strerror(errno));
+       while( fgets(buf,STOPBUFLEN,hin) ) {
+           buf[strlen(buf)-1] = '\0';
+           if ( *buf=='\0' ) continue;
+
+           if ( s->len>= reallen ) {
+               char **tmp;
+               reallen=(reallen) ? reallen*2 : 16;
+               tmp=(char**)realloc((void*)stop, sizeof(char*)*reallen);
+               if (!tmp) {
+                   freestoplist(s);
+                   fclose(hin); 
+                   elog(ERROR,"Not enough memory");
+               }
+               stop=tmp;
+           }
+    
+           stop[s->len]=strdup(buf);
+           if ( !stop[s->len] ) {
+               freestoplist(s);
+               fclose(hin); 
+               elog(ERROR,"Not enough memory");
+           }
+           if ( s->wordop ) 
+               stop[s->len]=(s->wordop)(stop[s->len]);
+
+           (s->len)++; 
+       }
+       fclose(hin);
+       pfree(filename); 
+   }
+   s->stop=stop;
+} 
+
+static int
+comparestr(const void *a, const void *b) {
+   return strcmp( *(char**)a, *(char**)b );
+}
+
+void
+sortstoplist(StopList *s) {
+   if (s->stop && s->len>0)
+       qsort(s->stop, s->len, sizeof(char*), comparestr);
+}
+
+bool
+searchstoplist(StopList *s, char *key) {
+   if ( s->wordop ) 
+       key=(*(s->wordop))(key);
+   return ( s->stop && s->len>0 && bsearch(&key, s->stop, s->len, sizeof(char*), comparestr) ) ? true : false;
+}
+
+


diff --git a/contrib/tsearch2/stopword/english.stop b/contrib/tsearch2/stopword/english.stop

new file mode 100644 (file)

index 0000000..a913011


--- /dev/null
+++ b/contrib/tsearch2/stopword/english.stop
@@ -0,0 +1,128 @@
+i
+me
+my
+myself
+we
+our
+ours
+ourselves
+you
+your
+yours
+yourself
+yourselves
+he
+him
+his
+himself
+she
+her
+hers
+herself
+it
+its
+itself
+they
+them
+their
+theirs
+themselves
+what
+which
+who
+whom
+this
+that
+these
+those
+am
+is
+are
+was
+were
+be
+been
+being
+have
+has
+had
+having
+do
+does
+did
+doing
+a
+an
+the
+and
+but
+if
+or
+because
+as
+until
+while
+of
+at
+by
+for
+with
+about
+against
+between
+into
+through
+during
+before
+after
+above
+below
+to
+from
+up
+down
+in
+out
+on
+off
+over
+under
+again
+further
+then
+once
+here
+there
+when
+where
+why
+how
+all
+any
+both
+each
+few
+more
+most
+other
+some
+such
+no
+nor
+not
+only
+own
+same
+so
+than
+too
+very
+s
+t
+can
+will
+just
+don
+should
+now
+


diff --git a/contrib/tsearch2/stopword/russian.stop b/contrib/tsearch2/stopword/russian.stop

new file mode 100644 (file)

index 0000000..1877e3a


--- /dev/null
+++ b/contrib/tsearch2/stopword/russian.stop
@@ -0,0 +1,151 @@
+É
+×
+×Ï
+ÎÅ
+ÞÔÏ
+ÏÎ
+ÎÁ
+Ñ
+Ó
+ÓÏ
+ËÁË
+Á
+ÔÏ
+×ÓÅ
+ÏÎÁ
+ÔÁË
+ÅÇÏ
+ÎÏ
+ÄÁ
+ÔÙ
+Ë
+Õ
+ÖÅ
+×Ù
+ÚÁ
+ÂÙ
+ÐÏ
+ÔÏÌØËÏ
+ÅÅ
+ÍÎÅ
+ÂÙÌÏ
+×ÏÔ
+ÏÔ
+ÍÅÎÑ
+ÅÝÅ
+ÎÅÔ
+Ï
+ÉÚ
+ÅÍÕ
+ÔÅÐÅÒØ
+ËÏÇÄÁ
+ÄÁÖÅ
+ÎÕ
+×ÄÒÕÇ
+ÌÉ
+ÅÓÌÉ
+ÕÖÅ
+ÉÌÉ
+ÎÉ
+ÂÙÔØ
+ÂÙÌ
+ÎÅÇÏ
+ÄÏ
+×ÁÓ
+ÎÉÂÕÄØ
+ÏÐÑÔØ
+ÕÖ
+×ÁÍ
+×ÅÄØ
+ÔÁÍ
+ÐÏÔÏÍ
+ÓÅÂÑ
+ÎÉÞÅÇÏ
+ÅÊ
+ÍÏÖÅÔ
+ÏÎÉ
+ÔÕÔ
+ÇÄÅ
+ÅÓÔØ
+ÎÁÄÏ
+ÎÅÊ
+ÄÌÑ
+ÍÙ
+ÔÅÂÑ
+ÉÈ
+ÞÅÍ
+ÂÙÌÁ
+ÓÁÍ
+ÞÔÏÂ
+ÂÅÚ
+ÂÕÄÔÏ
+ÞÅÇÏ
+ÒÁÚ
+ÔÏÖÅ
+ÓÅÂÅ
+ÐÏÄ
+ÂÕÄÅÔ
+Ö
+ÔÏÇÄÁ
+ËÔÏ
+ÜÔÏÔ
+ÔÏÇÏ
+ÐÏÔÏÍÕ
+ÜÔÏÇÏ
+ËÁËÏÊ
+ÓÏ×ÓÅÍ
+ÎÉÍ
+ÚÄÅÓØ
+ÜÔÏÍ
+ÏÄÉÎ
+ÐÏÞÔÉ
+ÍÏÊ
+ÔÅÍ
+ÞÔÏÂÙ
+ÎÅÅ
+ÓÅÊÞÁÓ
+ÂÙÌÉ
+ËÕÄÁ
+ÚÁÞÅÍ
+×ÓÅÈ
+ÎÉËÏÇÄÁ
+ÍÏÖÎÏ
+ÐÒÉ
+ÎÁËÏÎÅÃ
+Ä×Á
+ÏÂ
+ÄÒÕÇÏÊ
+ÈÏÔØ
+ÐÏÓÌÅ
+ÎÁÄ
+ÂÏÌØÛÅ
+ÔÏÔ
+ÞÅÒÅÚ
+ÜÔÉ
+ÎÁÓ
+ÐÒÏ
+×ÓÅÇÏ
+ÎÉÈ
+ËÁËÁÑ
+ÍÎÏÇÏ
+ÒÁÚ×Å
+ÔÒÉ
+ÜÔÕ
+ÍÏÑ
+×ÐÒÏÞÅÍ
+ÈÏÒÏÛÏ
+Ó×ÏÀ
+ÜÔÏÊ
+ÐÅÒÅÄ
+ÉÎÏÇÄÁ
+ÌÕÞÛÅ
+ÞÕÔØ
+ÔÏÍ
+ÎÅÌØÚÑ
+ÔÁËÏÊ
+ÉÍ
+ÂÏÌÅÅ
+×ÓÅÇÄÁ
+ËÏÎÅÞÎÏ
+×ÓÀ
+ÍÅÖÄÕ


diff --git a/contrib/tsearch2/ts_cfg.c b/contrib/tsearch2/ts_cfg.c

new file mode 100644 (file)

index 0000000..7c9f20c


--- /dev/null
+++ b/contrib/tsearch2/ts_cfg.c
@@ -0,0 +1,509 @@
+/* 
+ * interface functions to tscfg 
+ * Teodor Sigaev 
+ */
+#include 
+#include 
+#include 
+#include 
+#include 
+
+#include "postgres.h"
+#include "fmgr.h"
+#include "utils/array.h"
+#include "catalog/pg_type.h"
+#include "executor/spi.h"
+
+#include "ts_cfg.h"
+#include "dict.h"
+#include "wparser.h"
+#include "snmap.h"
+#include "common.h"
+#include "tsvector.h"
+
+/*********top interface**********/
+
+static void *plan_getcfg_bylocale=NULL;
+static void *plan_getcfg=NULL;
+static void *plan_getmap=NULL;
+static void *plan_name2id=NULL;
+static Oid current_cfg_id=0;
+
+void
+init_cfg(Oid id, TSCfgInfo *cfg) {
+   Oid arg[2]={ OIDOID, OIDOID };
+   bool isnull;
+   Datum pars[2]={ ObjectIdGetDatum(id), ObjectIdGetDatum(id) } ;
+   int stat,i,j;
+   text *ptr;
+   text *prsname=NULL;
+   MemoryContext   oldcontext;
+
+   memset(cfg,0,sizeof(TSCfgInfo));
+   SPI_connect();
+   if ( !plan_getcfg ) {
+       plan_getcfg = SPI_saveplan( SPI_prepare( "select prs_name from pg_ts_cfg where oid = $1" , 1, arg ) );
+       if ( !plan_getcfg ) 
+           ts_error(ERROR, "SPI_prepare() failed");
+   }
+
+   stat = SPI_execp(plan_getcfg, pars, " ", 1);
+   if ( stat < 0 )
+       ts_error (ERROR, "SPI_execp return %d", stat);
+   if ( SPI_processed > 0 ) {
+       prsname = (text*) DatumGetPointer( 
+           SPI_getbinval(SPI_tuptable->vals[0], SPI_tuptable->tupdesc, 1, &isnull) 
+       );
+       oldcontext = MemoryContextSwitchTo(TopMemoryContext);
+       prsname = ptextdup( prsname );
+       MemoryContextSwitchTo(oldcontext);
+       
+       cfg->id=id;
+   } else 
+       ts_error(ERROR, "No tsearch cfg with id %d", id);
+
+   arg[0]=TEXTOID;
+   if ( !plan_getmap ) {
+       plan_getmap = SPI_saveplan( SPI_prepare( "select lt.tokid, pg_ts_cfgmap.dict_name from pg_ts_cfgmap, pg_ts_cfg, token_type( $1 ) as lt where lt.alias = pg_ts_cfgmap.tok_alias and pg_ts_cfgmap.ts_name = pg_ts_cfg.ts_name and pg_ts_cfg.oid= $2 order by lt.tokid desc;" , 2, arg ) );
+       if ( !plan_getmap )
+           ts_error(ERROR, "SPI_prepare() failed");
+   }
+
+   pars[0]=PointerGetDatum( prsname );
+   stat = SPI_execp(plan_getmap, pars, " ", 0);
+   if ( stat < 0 )
+       ts_error (ERROR, "SPI_execp return %d", stat);
+   if ( SPI_processed <= 0 )
+       ts_error(ERROR, "No parser with id %d", id);
+
+   for(i=0;i
+       int lexid = DatumGetInt32(SPI_getbinval(SPI_tuptable->vals[i], SPI_tuptable->tupdesc, 1, &isnull));
+       ArrayType *toasted_a = (ArrayType*)PointerGetDatum(SPI_getbinval(SPI_tuptable->vals[i], SPI_tuptable->tupdesc, 2, &isnull));
+       ArrayType *a;
+
+       if ( !cfg->map ) {
+           cfg->len=lexid+1;
+           cfg->map = (ListDictionary*)malloc( sizeof(ListDictionary)*cfg->len );
+           if ( !cfg->map )
+               ts_error(ERROR,"No memory");
+           memset( cfg->map, 0, sizeof(ListDictionary)*cfg->len );
+       }
+
+       if (isnull)
+           continue;
+
+       a=(ArrayType*)PointerGetDatum( PG_DETOAST_DATUM( DatumGetPointer(toasted_a) ) );
+       
+       if ( ARR_NDIM(a) != 1 )
+           ts_error(ERROR,"Wrong dimension");
+       if ( ARRNELEMS(a) < 1 )
+           continue;
+
+       cfg->map[lexid].len=ARRNELEMS(a);
+       cfg->map[lexid].dict_id=(Datum*)malloc( sizeof(Datum)*cfg->map[lexid].len );
+       memset(cfg->map[lexid].dict_id,0,sizeof(Datum)*cfg->map[lexid].len );
+       ptr=(text*)ARR_DATA_PTR(a);
+       oldcontext = MemoryContextSwitchTo(TopMemoryContext);
+       for(j=0;jmap[lexid].len;j++) {
+           cfg->map[lexid].dict_id[j] = PointerGetDatum(ptextdup(ptr));
+           ptr=NEXTVAL(ptr);
+       } 
+       MemoryContextSwitchTo(oldcontext);
+
+       if ( a != toasted_a ) 
+           pfree(a);
+   }
+   
+   SPI_finish();
+   cfg->prs_id = name2id_prs( prsname );
+   pfree(prsname);
+   for(i=0;ilen;i++) {
+       for(j=0;jmap[i].len;j++) {
+           ptr = (text*)DatumGetPointer( cfg->map[i].dict_id[j] );
+           cfg->map[i].dict_id[j] = ObjectIdGetDatum( name2id_dict(ptr) );
+           pfree(ptr);
+       }
+   }
+}
+
+typedef struct {
+   TSCfgInfo   *last_cfg;
+   int     len;
+   int     reallen;
+   TSCfgInfo   *list;
+   SNMap       name2id_map;
+} CFGList;
+
+static CFGList CList = {NULL,0,0,NULL,{0,0,NULL}};
+
+void
+reset_cfg(void) {
+        freeSNMap( &(CList.name2id_map) );
+        if ( CList.list ) {
+       int i,j;
+       for(i=0;i
+           if ( CList.list[i].map ) {
+               for(j=0;j
+                   if ( CList.list[i].map[j].dict_id )
+                       free(CList.list[i].map[j].dict_id);
+               free( CList.list[i].map );
+           }
+                free(CList.list);
+   }
+        memset(&CList,0,sizeof(CFGList));
+}
+
+static int
+comparecfg(const void *a, const void *b) {
+   return ((TSCfgInfo*)a)->id - ((TSCfgInfo*)b)->id;
+}
+
+TSCfgInfo *
+findcfg(Oid id) {
+   /* last used cfg */
+   if ( CList.last_cfg && CList.last_cfg->id==id )
+       return CList.last_cfg;
+
+   /* already used cfg */
+   if ( CList.len != 0 ) {
+       TSCfgInfo key;
+       key.id=id;
+       CList.last_cfg = bsearch(&key, CList.list, CList.len, sizeof(TSCfgInfo), comparecfg);
+       if ( CList.last_cfg != NULL )
+           return CList.last_cfg;
+   }
+
+   /* last chance */
+   if ( CList.len==CList.reallen ) {
+       TSCfgInfo *tmp;
+       int reallen = ( CList.reallen ) ? 2*CList.reallen : 16;
+       tmp=(TSCfgInfo*)realloc(CList.list,sizeof(TSCfgInfo)*reallen);
+       if ( !tmp ) 
+           ts_error(ERROR,"No memory");
+       CList.reallen=reallen;
+       CList.list=tmp;
+   }
+   CList.last_cfg=&(CList.list[CList.len]);
+   init_cfg(id, CList.last_cfg);
+   CList.len++;
+   qsort(CList.list, CList.len, sizeof(TSCfgInfo), comparecfg);
+   return findcfg(id); /* qsort changed order!! */;
+}
+
+
+Oid
+name2id_cfg(text *name) {
+   Oid arg[1]={ TEXTOID };
+   bool isnull;
+   Datum pars[1]={ PointerGetDatum(name) };
+   int stat;
+   Oid id=findSNMap_t( &(CList.name2id_map), name );
+   
+   if ( id ) 
+       return id;
+   
+   SPI_connect();
+   if ( !plan_name2id ) {
+       plan_name2id = SPI_saveplan( SPI_prepare( "select oid from pg_ts_cfg where ts_name = $1" , 1, arg ) );
+       if ( !plan_name2id ) 
+           elog(ERROR, "SPI_prepare() failed");
+   }
+
+   stat = SPI_execp(plan_name2id, pars, " ", 1);
+   if ( stat < 0 )
+       elog (ERROR, "SPI_execp return %d", stat);
+   if ( SPI_processed > 0 ) {
+       id=DatumGetObjectId( SPI_getbinval(SPI_tuptable->vals[0], SPI_tuptable->tupdesc, 1, &isnull) );
+       if ( isnull ) 
+           elog(ERROR, "Null id for tsearch config");
+   } else 
+       elog(ERROR, "No tsearch config");
+   SPI_finish();
+   addSNMap_t( &(CList.name2id_map), name, id );
+   return id;
+}
+
+
+void 
+parsetext_v2(TSCfgInfo *cfg, PRSTEXT * prs, char *buf, int4 buflen) {
+   int type, lenlemm, i;
+   char    *lemm=NULL;
+   WParserInfo *prsobj = findprs(cfg->prs_id);
+
+   prsobj->prs=(void*)DatumGetPointer(
+       FunctionCall2(
+           &(prsobj->start_info),
+           PointerGetDatum(buf),
+           Int32GetDatum(buflen)
+       )
+   );
+
+   while( ( type=DatumGetInt32(FunctionCall3(
+           &(prsobj->getlexeme_info),
+           PointerGetDatum(prsobj->prs),
+           PointerGetDatum(&lemm),
+           PointerGetDatum(&lenlemm))) ) != 0 ) {
+
+       if ( lenlemm >= MAXSTRLEN )
+           elog(ERROR, "Word is too long");
+
+
+       if ( type >= cfg->len ) /* skip this type of lexem */
+           continue; 
+
+       for(i=0;imap[type].len;i++) {
+           DictInfo    *dict=finddict( DatumGetObjectId(cfg->map[type].dict_id[i]) );
+           char    **norms, **ptr;
+   
+           norms = ptr = (char**)DatumGetPointer(
+               FunctionCall3(
+                   &(dict->lexize_info),
+                   PointerGetDatum(dict->dictionary),
+                   PointerGetDatum(lemm),
+                   PointerGetDatum(lenlemm)
+               )
+           );
+           if ( !norms ) /* dictionary doesn't know this lexem */
+               continue;
+
+           prs->pos++; /*set pos*/
+
+           while( *ptr ) {
+               if (prs->curwords == prs->lenwords) {
+                   prs->lenwords *= 2;
+                   prs->words = (WORD *) repalloc((void *) prs->words, prs->lenwords * sizeof(WORD));
+               }
+
+               prs->words[prs->curwords].len = strlen(*ptr);
+               prs->words[prs->curwords].word = *ptr;
+               prs->words[prs->curwords].alen = 0;
+               prs->words[prs->curwords].pos.pos = LIMITPOS(prs->pos);
+               ptr++;
+               prs->curwords++;
+           }
+           pfree(norms);
+           break; /* lexem already normalized or is stop word*/
+       }
+   }
+
+   FunctionCall1(
+       &(prsobj->end_info),
+       PointerGetDatum(prsobj->prs)
+   );
+}
+
+static void
+hladdword(HLPRSTEXT * prs, char *buf, int4 buflen, int type) {
+   while (prs->curwords >= prs->lenwords) {
+       prs->lenwords *= 2;
+       prs->words = (HLWORD *) repalloc((void *) prs->words, prs->lenwords * sizeof(HLWORD));
+   }
+   memset( &(prs->words[prs->curwords]), 0, sizeof(HLWORD) ); 
+   prs->words[prs->curwords].type = (uint8)type;
+   prs->words[prs->curwords].len = buflen; 
+   prs->words[prs->curwords].word = palloc(buflen);
+   memcpy(prs->words[prs->curwords].word, buf, buflen);
+   prs->curwords++;    
+}
+
+static void
+hlfinditem(HLPRSTEXT * prs, QUERYTYPE *query, char *buf, int buflen ) {
+   int i;
+   ITEM    *item=GETQUERY(query);
+   HLWORD  *word=&( prs->words[prs->curwords-1] );
+
+   while (prs->curwords + query->size >= prs->lenwords) {
+       prs->lenwords *= 2;
+       prs->words = (HLWORD *) repalloc((void *) prs->words, prs->lenwords * sizeof(HLWORD));
+   }
+
+   for(i=0; isize; i++) { 
+       if ( item->type == VAL && item->length == buflen && strncmp( GETOPERAND(query) + item->distance, buf, buflen )==0 ) {
+           if ( word->item ) {
+               memcpy( &(prs->words[prs->curwords]), word, sizeof(HLWORD) );
+               prs->words[prs->curwords].item=item;
+               prs->words[prs->curwords].repeated=1;
+               prs->curwords++;
+           } else 
+               word->item=item;    
+       }
+       item++;
+   }
+}
+
+void 
+hlparsetext(TSCfgInfo *cfg, HLPRSTEXT * prs, QUERYTYPE *query, char *buf, int4 buflen) {
+   int type, lenlemm, i;
+   char    *lemm=NULL;
+   WParserInfo *prsobj = findprs(cfg->prs_id);
+
+   prsobj->prs=(void*)DatumGetPointer(
+       FunctionCall2(
+           &(prsobj->start_info),
+           PointerGetDatum(buf),
+           Int32GetDatum(buflen)
+       )
+   );
+
+   while( ( type=DatumGetInt32(FunctionCall3(
+           &(prsobj->getlexeme_info),
+           PointerGetDatum(prsobj->prs),
+           PointerGetDatum(&lemm),
+           PointerGetDatum(&lenlemm))) ) != 0 ) {
+
+       if ( lenlemm >= MAXSTRLEN )
+           elog(ERROR, "Word is too long");
+
+       hladdword(prs,lemm,lenlemm,type);
+
+       if ( type >= cfg->len ) 
+           continue; 
+
+       for(i=0;imap[type].len;i++) {
+           DictInfo    *dict=finddict( DatumGetObjectId(cfg->map[type].dict_id[i]) );
+           char    **norms, **ptr;
+   
+           norms = ptr = (char**)DatumGetPointer(
+               FunctionCall3(
+                   &(dict->lexize_info),
+                   PointerGetDatum(dict->dictionary),
+                   PointerGetDatum(lemm),
+                   PointerGetDatum(lenlemm)
+               )
+           );
+           if ( !norms ) /* dictionary doesn't know this lexem */
+               continue;
+
+           while( *ptr ) {
+               hlfinditem(prs,query,*ptr,strlen(*ptr));
+               pfree(*ptr);
+               ptr++;
+           }
+           pfree(norms);
+           break; /* lexem already normalized or is stop word*/
+       }
+   }
+
+   FunctionCall1(
+       &(prsobj->end_info),
+       PointerGetDatum(prsobj->prs)
+   );
+}
+
+text* 
+genhl(HLPRSTEXT * prs) {
+   text *out;
+   int len=128;
+   char *ptr;
+   HLWORD  *wrd=prs->words;
+
+   out = (text*)palloc( len );
+   ptr=((char*)out) + VARHDRSZ;
+
+   while( wrd - prs->words < prs->curwords ) {
+       while (  wrd->len + prs->stopsellen + prs->startsellen + (ptr - ((char*)out)) >= len ) {
+           int dist = ptr - ((char*)out);
+           len*= 2;
+           out = (text *) repalloc(out, len);
+           ptr=((char*)out) + dist;
+       }
+
+       if ( wrd->in && !wrd->skip && !wrd->repeated ) {
+           if ( wrd->replace ) {
+               *ptr=' ';
+               ptr++;
+           } else {
+               if (wrd->selected) {
+                   memcpy(ptr,prs->startsel,prs->startsellen);
+                   ptr+=prs->startsellen;
+               }
+               memcpy(ptr,wrd->word,wrd->len);
+               ptr+=wrd->len;
+               if (wrd->selected) {
+                   memcpy(ptr,prs->stopsel,prs->stopsellen);
+                   ptr+=prs->stopsellen;
+               }
+           }
+       }
+
+       if ( !wrd->repeated )
+           pfree(wrd->word);
+
+       wrd++;
+   }
+
+   VARATT_SIZEP(out)=ptr - ((char*)out);
+   return out; 
+}
+
+int  
+get_currcfg(void) {
+   Oid arg[1]={ TEXTOID };
+   const char *curlocale;
+   Datum pars[1];
+   bool isnull;
+   int stat;
+
+   if ( current_cfg_id > 0 )
+       return current_cfg_id;
+
+   SPI_connect();
+   if ( !plan_getcfg_bylocale ) {
+       plan_getcfg_bylocale=SPI_saveplan( SPI_prepare( "select oid from pg_ts_cfg where locale = $1 ", 1, arg ) );
+       if ( !plan_getcfg_bylocale )
+           elog(ERROR, "SPI_prepare() failed");
+   }
+
+   curlocale = setlocale(LC_CTYPE, NULL);
+   pars[0] = PointerGetDatum( char2text((char*)curlocale) );
+   stat = SPI_execp(plan_getcfg_bylocale, pars, " ", 1);
+
+   if ( stat < 0 )
+       elog (ERROR, "SPI_execp return %d", stat);
+   if ( SPI_processed > 0 )
+       current_cfg_id = DatumGetObjectId( SPI_getbinval(SPI_tuptable->vals[0], SPI_tuptable->tupdesc, 1, &isnull) );
+   else 
+       elog(ERROR,"Can't find tsearch config by locale");
+
+   pfree(DatumGetPointer(pars[0]));
+   SPI_finish();
+   return current_cfg_id;
+}
+
+PG_FUNCTION_INFO_V1(set_curcfg);
+Datum set_curcfg(PG_FUNCTION_ARGS);
+Datum
+set_curcfg(PG_FUNCTION_ARGS) {
+        findcfg(PG_GETARG_OID(0));
+        current_cfg_id=PG_GETARG_OID(0);
+        PG_RETURN_VOID();
+}
+                
+PG_FUNCTION_INFO_V1(set_curcfg_byname);
+Datum set_curcfg_byname(PG_FUNCTION_ARGS);
+Datum
+set_curcfg_byname(PG_FUNCTION_ARGS) {
+        text *name=PG_GETARG_TEXT_P(0);
+   
+        DirectFunctionCall1(
+                set_curcfg,
+                ObjectIdGetDatum( name2id_cfg(name) )
+        );
+        PG_FREE_IF_COPY(name, 0);
+        PG_RETURN_VOID();      
+}       
+
+PG_FUNCTION_INFO_V1(show_curcfg);
+Datum show_curcfg(PG_FUNCTION_ARGS);
+Datum
+show_curcfg(PG_FUNCTION_ARGS) {
+   PG_RETURN_OID( get_currcfg() ); 
+}
+
+PG_FUNCTION_INFO_V1(reset_tsearch);
+Datum reset_tsearch(PG_FUNCTION_ARGS);
+Datum
+reset_tsearch(PG_FUNCTION_ARGS) {
+   ts_error(NOTICE,"TSearch cache cleaned");
+   PG_RETURN_VOID(); 
+}


diff --git a/contrib/tsearch2/ts_cfg.h b/contrib/tsearch2/ts_cfg.h

new file mode 100644 (file)

index 0000000..01006c1


--- /dev/null
+++ b/contrib/tsearch2/ts_cfg.h
@@ -0,0 +1,68 @@
+#ifndef __TS_CFG_H__
+#define __TS_CFG_H__
+#include "postgres.h"
+#include "query.h"
+
+typedef struct {
+   int len;
+   Datum   *dict_id;
+} ListDictionary;
+
+typedef struct {
+   Oid id;
+   Oid prs_id;
+   int len;
+   ListDictionary  *map;   
+}  TSCfgInfo;
+
+Oid name2id_cfg(text *name);
+TSCfgInfo * findcfg(Oid id);
+void init_cfg(Oid id, TSCfgInfo *cfg);
+void reset_cfg(void);
+
+typedef struct {
+        uint16          len;
+   union {
+       uint16      pos;
+       uint16      *apos;
+   } pos;
+        char       *word;
+   uint32  alen;
+}       WORD;
+   
+typedef struct {
+        WORD       *words;
+        int4            lenwords;
+        int4            curwords;
+   int4        pos;
+}       PRSTEXT;
+
+typedef struct {
+        uint16    len;
+   uint8    selected:1,
+         in:1,
+         skip:1,
+         replace:1,
+         repeated:1;
+   uint8   type;
+        char      *word;
+   ITEM      *item;
+}       HLWORD;
+   
+typedef struct {
+        HLWORD       *words;
+        int4            lenwords;
+        int4            curwords;
+        char           *startsel;
+        char            *stopsel;
+        int2            startsellen;
+        int2            stopsellen;
+}       HLPRSTEXT;
+
+void hlparsetext(TSCfgInfo *cfg, HLPRSTEXT * prs, QUERYTYPE *query, char *buf, int4 buflen);
+text* genhl(HLPRSTEXT * prs);
+
+void parsetext_v2(TSCfgInfo *cfg, PRSTEXT * prs, char *buf, int4 buflen);
+int  get_currcfg(void);
+
+#endif


diff --git a/contrib/tsearch2/ts_stat.c b/contrib/tsearch2/ts_stat.c

new file mode 100644 (file)

index 0000000..9099981


--- /dev/null
+++ b/contrib/tsearch2/ts_stat.c
@@ -0,0 +1,412 @@
+/*
+ * stat functions
+ */
+
+#include "tsvector.h"
+#include "ts_stat.h"
+#include "funcapi.h"
+#include "catalog/pg_type.h"
+#include "executor/spi.h"
+#include "common.h"
+
+PG_FUNCTION_INFO_V1(tsstat_in);
+Datum           tsstat_in(PG_FUNCTION_ARGS);
+Datum           
+tsstat_in(PG_FUNCTION_ARGS) {
+   tsstat *stat=palloc(STATHDRSIZE);
+   stat->len=STATHDRSIZE;
+   stat->size=0;
+   PG_RETURN_POINTER(stat);
+}
+
+PG_FUNCTION_INFO_V1(tsstat_out);
+Datum           tsstat_out(PG_FUNCTION_ARGS);
+Datum           
+tsstat_out(PG_FUNCTION_ARGS) {
+   elog(ERROR,"Unimplemented");
+   PG_RETURN_NULL();
+}
+
+static WordEntry**
+SEI_realloc( WordEntry** in, uint32 *len ) {
+   if ( *len==0 || in==NULL ) {
+       *len=8;
+       in=palloc( sizeof(WordEntry*)* (*len) );
+   } else {
+       *len *= 2;
+       in=repalloc( in, sizeof(WordEntry*)* (*len) );
+   }
+   return in;
+}
+
+static int
+compareStatWord(StatEntry *a, WordEntry *b, tsstat *stat, tsvector *txt) {
+   if ( a->len == b->len ) 
+       return strncmp(
+           STATSTRPTR(stat) + a->pos,
+           STRPTR(txt) + b->pos,
+           a->len
+       );
+   return ( a->len > b->len ) ? 1 : -1;
+}
+
+static tsstat*
+formstat(tsstat *stat, tsvector *txt, WordEntry** entry, uint32 len) {
+   tsstat  *newstat;
+   uint32 totallen, nentry;
+   uint32  slen=0;
+   WordEntry   **ptr=entry;
+   char    *curptr;
+   StatEntry   *sptr,*nptr;
+
+   while(ptr-entry
+       slen += (*ptr)->len;
+       ptr++;
+   }
+
+   nentry=stat->size + len;
+   slen+=STATSTRSIZE(stat);
+   totallen=CALCSTATSIZE(nentry,slen);
+   newstat=palloc(totallen);
+   newstat->len=totallen;
+   newstat->size=nentry;
+
+   memcpy(STATSTRPTR(newstat), STATSTRPTR(stat), STATSTRSIZE(stat));
+   curptr=STATSTRPTR(newstat) + STATSTRSIZE(stat);
+
+   ptr=entry;
+   sptr=STATPTR(stat);
+   nptr=STATPTR(newstat);
+
+   if ( len == 1 ) {
+       StatEntry *StopLow = STATPTR(stat);
+       StatEntry *StopHigh = (StatEntry*)STATSTRPTR(stat);
+
+       while (StopLow < StopHigh) {
+           sptr=StopLow + (StopHigh - StopLow) / 2;
+           if ( compareStatWord(sptr,*ptr,stat,txt) < 0 )
+               StopLow = sptr + 1;
+           else
+               StopHigh = sptr; 
+       }
+       nptr =STATPTR(newstat) + (StopLow-STATPTR(stat));
+       memcpy( STATPTR(newstat), STATPTR(stat), sizeof(StatEntry) * (StopLow-STATPTR(stat)) );
+       nptr->nentry=POSDATALEN(txt,*ptr);
+       if ( nptr->nentry==0 )
+               nptr->nentry=1; 
+       nptr->ndoc=1;
+       nptr->len=(*ptr)->len;
+       memcpy(curptr, STRPTR(txt) + (*ptr)->pos, nptr->len);
+       nptr->pos = curptr - STATSTRPTR(newstat);
+       memcpy( nptr+1, StopLow, sizeof(StatEntry) * ( ((StatEntry*)STATSTRPTR(stat))-StopLow ) );
+   } else {
+       while( sptr-STATPTR(stat) < stat->size && ptr-entry
+           if ( compareStatWord(sptr,*ptr,stat,txt) < 0 ) {
+               memcpy(nptr, sptr, sizeof(StatEntry));
+               sptr++;
+           } else {
+               nptr->nentry=POSDATALEN(txt,*ptr);
+               if ( nptr->nentry==0 )
+                   nptr->nentry=1; 
+               nptr->ndoc=1;
+               nptr->len=(*ptr)->len;
+               memcpy(curptr, STRPTR(txt) + (*ptr)->pos, nptr->len);
+               nptr->pos = curptr - STATSTRPTR(newstat);
+               curptr += nptr->len;
+               ptr++;
+           }
+           nptr++;
+       }
+
+       memcpy( nptr, sptr, sizeof(StatEntry)*( stat->size - (sptr-STATPTR(stat)) ) ); 
+       
+       while(ptr-entry
+           nptr->nentry=POSDATALEN(txt,*ptr);
+           if ( nptr->nentry==0 )
+               nptr->nentry=1; 
+           nptr->ndoc=1;
+           nptr->len=(*ptr)->len;
+           memcpy(curptr, STRPTR(txt) + (*ptr)->pos, nptr->len);
+           nptr->pos = curptr - STATSTRPTR(newstat);
+           curptr += nptr->len;
+           ptr++; nptr++;
+       }
+   }
+
+   return newstat;
+} 
+
+PG_FUNCTION_INFO_V1(ts_accum);
+Datum           ts_accum(PG_FUNCTION_ARGS);
+Datum 
+ts_accum(PG_FUNCTION_ARGS) {
+   tsstat *newstat,*stat= (tsstat*)PG_GETARG_POINTER(0);
+   tsvector  *txt = (tsvector *) PG_DETOAST_DATUM(PG_GETARG_DATUM(1));
+   WordEntry   **newentry=NULL;
+   uint32  len=0, cur=0;
+   StatEntry   *sptr;
+   WordEntry   *wptr;
+
+   if ( stat==NULL || PG_ARGISNULL(0) ) { /* Init in first */ 
+       stat=palloc(STATHDRSIZE);
+       stat->len=STATHDRSIZE;
+       stat->size=0;
+   }
+
+   /* simple check of correctness */
+   if ( txt==NULL || PG_ARGISNULL(1) || txt->size==0 ) {
+       PG_FREE_IF_COPY(txt,1); 
+       PG_RETURN_POINTER(stat);
+   }
+
+   sptr=STATPTR(stat);
+   wptr=ARRPTR(txt);
+
+   if ( stat->size < 100*txt->size ) { /* merge */
+       while( sptr-STATPTR(stat) < stat->size && wptr-ARRPTR(txt) < txt->size ) {
+           int cmp = compareStatWord(sptr,wptr,stat,txt);
+           if ( cmp<0 ) {
+               sptr++;
+           } else if ( cmp==0 ) {
+               int n=POSDATALEN(txt,wptr);
+   
+               if (n==0) n=1;
+               sptr->ndoc++;
+               sptr->nentry +=n ;
+               sptr++; wptr++;
+           } else {
+               if ( cur==len )
+                   newentry=SEI_realloc(newentry, &len);
+               newentry[cur]=wptr;
+               wptr++; cur++;
+           }
+       }
+
+       while( wptr-ARRPTR(txt) < txt->size ) {
+           if ( cur==len )
+               newentry=SEI_realloc(newentry, &len);
+           newentry[cur]=wptr;
+           wptr++; cur++;
+       }
+   } else { /* search */
+       while( wptr-ARRPTR(txt) < txt->size ) {
+           StatEntry *StopLow = STATPTR(stat);
+           StatEntry *StopHigh = (StatEntry*)STATSTRPTR(stat);
+           int cmp;
+
+           while (StopLow < StopHigh) {
+               sptr=StopLow + (StopHigh - StopLow) / 2;
+               cmp =  compareStatWord(sptr,wptr,stat,txt);
+               if (cmp==0) {
+                   int n=POSDATALEN(txt,wptr);
+                   if (n==0) n=1;
+                   sptr->ndoc++;
+                   sptr->nentry +=n ;
+                   break;
+               } else if ( cmp < 0 )
+                   StopLow = sptr + 1;
+               else
+                   StopHigh = sptr; 
+           }
+       
+           if ( StopLow >= StopHigh ) { /* not found */
+               if ( cur==len )
+                   newentry=SEI_realloc(newentry, &len);
+               newentry[cur]=wptr;
+               cur++;
+           }
+           wptr++;
+       }   
+   }
+
+   
+   if ( cur==0 ) { /* no new words */ 
+       PG_FREE_IF_COPY(txt,1);
+       PG_RETURN_POINTER(stat);
+   }
+
+   newstat = formstat(stat, txt, newentry, cur);
+   pfree(newentry);
+   PG_FREE_IF_COPY(txt,1);
+   /* pfree(stat); */
+
+   PG_RETURN_POINTER(newstat);
+}
+
+typedef struct {
+   uint32  cur;
+   tsvector *stat;
+} StatStorage;
+
+static void
+ts_setup_firstcall(FuncCallContext  *funcctx, tsstat *stat) {
+   TupleDesc            tupdesc;
+   MemoryContext     oldcontext;
+   StatStorage     *st;
+   
+   oldcontext = MemoryContextSwitchTo(funcctx->multi_call_memory_ctx);
+   st=palloc( sizeof(StatStorage) );
+   st->cur=0;
+   st->stat=palloc( stat->len );
+   memcpy(st->stat, stat, stat->len);
+   funcctx->user_fctx = (void*)st;
+   tupdesc = RelationNameGetTupleDesc("statinfo");
+   funcctx->slot = TupleDescGetSlot(tupdesc);
+   funcctx->attinmeta = TupleDescGetAttInMetadata(tupdesc);
+   MemoryContextSwitchTo(oldcontext);
+}
+
+
+static Datum
+ts_process_call(FuncCallContext  *funcctx) {
+   StatStorage     *st;
+   st=(StatStorage*)funcctx->user_fctx;
+
+   if ( st->cur < st->stat->size ) {
+       Datum result;
+       char* values[3];
+       char    ndoc[16];
+       char    nentry[16];
+       StatEntry *entry=STATPTR(st->stat) + st->cur;
+       HeapTuple    tuple;
+
+       values[1]=ndoc;
+       sprintf(ndoc,"%d",entry->ndoc);
+       values[2]=nentry;
+       sprintf(nentry,"%d",entry->nentry);
+       values[0]=palloc( entry->len+1 );
+       memcpy( values[0], STATSTRPTR(st->stat)+entry->pos, entry->len);
+       (values[0])[entry->len]='\0';
+
+       tuple = BuildTupleFromCStrings(funcctx->attinmeta, values);
+       result = TupleGetDatum(funcctx->slot, tuple);
+
+       pfree(values[0]);
+       st->cur++;
+       return result;  
+   } else {
+       pfree(st->stat);
+       pfree(st);
+   }
+   
+   return (Datum)0;
+}
+
+PG_FUNCTION_INFO_V1(ts_accum_finish);
+Datum           ts_accum_finish(PG_FUNCTION_ARGS);
+Datum 
+ts_accum_finish(PG_FUNCTION_ARGS) {
+   FuncCallContext  *funcctx;
+   Datum result;
+
+   if (SRF_IS_FIRSTCALL()) {
+       funcctx = SRF_FIRSTCALL_INIT();
+       ts_setup_firstcall(funcctx, (tsstat*)PG_GETARG_POINTER(0) );
+   }
+
+   funcctx = SRF_PERCALL_SETUP();
+   if (  (result=ts_process_call(funcctx)) != (Datum)0 )
+       SRF_RETURN_NEXT(funcctx, result);
+   SRF_RETURN_DONE(funcctx);
+}
+
+static Oid tiOid=InvalidOid;
+static void 
+get_ti_Oid(void) {
+   int ret;
+   bool isnull; 
+
+   if ( (ret = SPI_exec("select oid from pg_type where typname='tsvector'",1)) < 0 )   
+       elog(ERROR, "SPI_exec to get tsvector oid returns %d", ret);
+
+   if ( SPI_processed<0 )
+       elog(ERROR, "There is no tsvector type");
+   tiOid = DatumGetObjectId( SPI_getbinval(SPI_tuptable->vals[0], SPI_tuptable->tupdesc, 1, &isnull) );
+   if ( tiOid==InvalidOid )
+       elog(ERROR, "tsvector type has InvalidOid");
+}
+
+static tsstat*
+ts_stat_sql(text *txt) {
+   char *query=text2char(txt);
+   int i;
+   tsstat *newstat,*stat;
+   bool isnull;
+   Portal portal;
+   void    *plan;
+
+   if ( tiOid==InvalidOid ) 
+       get_ti_Oid();
+
+   if ( (plan = SPI_prepare(query,0,NULL))==NULL )
+       elog(ERROR, "SPI_prepare('%s') returns NULL",query);
+
+   if ( (portal = SPI_cursor_open(NULL, plan, NULL, NULL)) == NULL )
+       elog(ERROR, "SPI_cursor_open('%s') returns NULL",query);
+
+   SPI_cursor_fetch(portal, true, 100);
+
+   if ( SPI_tuptable->tupdesc->natts != 1 )
+       elog(ERROR, "Number of fields doesn't equal to 1");
+
+   if ( SPI_gettypeid(SPI_tuptable->tupdesc, 1) != tiOid )
+       elog(ERROR, "Column isn't of tsvector type");
+
+   stat=palloc(STATHDRSIZE);
+   stat->len=STATHDRSIZE;
+   stat->size=0;
+
+   while(SPI_processed>0) {
+       for(i=0;i
+           Datum data=SPI_getbinval(SPI_tuptable->vals[i], SPI_tuptable->tupdesc, 1, &isnull);
+
+           if ( !isnull ) {
+               newstat = (tsstat*)DatumGetPointer(DirectFunctionCall2(
+                   ts_accum,
+                   PointerGetDatum(stat),
+                   data
+               ));
+               if ( stat!=newstat && stat )
+                   pfree(stat);
+               stat=newstat;
+           }
+       } 
+
+       SPI_freetuptable(SPI_tuptable);
+       SPI_cursor_fetch(portal, true, 100);        
+   }   
+
+   SPI_freetuptable(SPI_tuptable);
+   SPI_cursor_close(portal);
+   SPI_freeplan(plan);
+   pfree(query);
+
+   return stat;    
+}
+
+PG_FUNCTION_INFO_V1(ts_stat);
+Datum           ts_stat(PG_FUNCTION_ARGS);
+Datum 
+ts_stat(PG_FUNCTION_ARGS) {
+   FuncCallContext  *funcctx;
+   Datum result;
+
+   if (SRF_IS_FIRSTCALL()) {
+       tsstat *stat;
+       text    *txt=PG_GETARG_TEXT_P(0);
+   
+       funcctx = SRF_FIRSTCALL_INIT();
+       SPI_connect();
+       stat = ts_stat_sql(txt);
+       PG_FREE_IF_COPY(txt,0); 
+       ts_setup_firstcall(funcctx, stat );
+       SPI_finish();
+   }
+
+   funcctx = SRF_PERCALL_SETUP();
+   if (  (result=ts_process_call(funcctx)) != (Datum)0 )
+       SRF_RETURN_NEXT(funcctx, result);
+   SRF_RETURN_DONE(funcctx);
+}
+
+


diff --git a/contrib/tsearch2/ts_stat.h b/contrib/tsearch2/ts_stat.h

new file mode 100644 (file)

index 0000000..c32b17a


--- /dev/null
+++ b/contrib/tsearch2/ts_stat.h
@@ -0,0 +1,32 @@
+#ifndef __TXTIDX_STAT_H__
+#define __TXTIDX_STAT_H__
+
+#include "postgres.h"
+
+#include "access/gist.h"
+#include "access/itup.h"
+#include "utils/elog.h"
+#include "utils/palloc.h"
+#include "utils/builtins.h"
+#include "storage/bufpage.h"
+
+typedef struct {
+   uint32  len;
+   uint32  pos;
+   uint32  ndoc;   
+   uint32  nentry; 
+}  StatEntry;
+
+typedef struct {
+   int4        len;
+   int4        size;
+   char        data[1];
+}  tsstat;
+
+#define STATHDRSIZE (sizeof(int4)*2)
+#define CALCSTATSIZE(x, lenstr) ( x * sizeof(StatEntry) + STATHDRSIZE + lenstr )
+#define STATPTR(x) ( (StatEntry*) ( (char*)x + STATHDRSIZE ) )
+#define STATSTRPTR(x)  ( (char*)x + STATHDRSIZE + ( sizeof(StatEntry) * ((tsvector*)x)->size ) )
+#define STATSTRSIZE(x) ( ((tsvector*)x)->len - STATHDRSIZE - ( sizeof(StatEntry) * ((tsvector*)x)->size ) )
+
+#endif


diff --git a/contrib/tsearch2/tsearch.sql._in b/contrib/tsearch2/tsearch.sql._in

new file mode 100644 (file)

index 0000000..91ffbc8


--- /dev/null
+++ b/contrib/tsearch2/tsearch.sql._in
@@ -0,0 +1,674 @@
+-- Adjust this setting to control where the objects get CREATEd.
+SET search_path = public;
+
+BEGIN;
+
+--dict conf
+CREATE TABLE pg_ts_dict (
+   dict_name   text not null primary key,
+   dict_init   oid,
+   dict_initoption text,
+   dict_lexize oid not null,
+   dict_comment    text
+) with oids;
+
+--dict interface
+CREATE FUNCTION lexize(oid, text) 
+   returns _text
+   as 'MODULE_PATHNAME'
+   language 'C'
+   with (isstrict);
+
+CREATE FUNCTION lexize(text, text)
+        returns _text
+        as 'MODULE_PATHNAME', 'lexize_byname'
+        language 'C'
+        with (isstrict);
+
+CREATE FUNCTION lexize(text)
+        returns _text
+        as 'MODULE_PATHNAME', 'lexize_bycurrent'
+        language 'C'
+        with (isstrict);
+
+CREATE FUNCTION set_curdict(int)
+   returns void
+   as 'MODULE_PATHNAME'
+   language 'C'
+   with (isstrict);
+
+CREATE FUNCTION set_curdict(text)
+   returns void
+   as 'MODULE_PATHNAME', 'set_curdict_byname'
+   language 'C'
+   with (isstrict);
+
+--built-in dictionaries
+CREATE FUNCTION dex_init(text)
+   returns internal
+   as 'MODULE_PATHNAME' 
+   language 'C';
+
+CREATE FUNCTION dex_lexize(internal,internal,int4)
+   returns internal
+   as 'MODULE_PATHNAME'
+   language 'C'
+   with (isstrict);
+
+insert into pg_ts_dict select 
+   'simple', 
+   (select oid from pg_proc where proname='dex_init'),
+   null,
+   (select oid from pg_proc where proname='dex_lexize'),
+   'Simple example of dictionary.'
+;
+    
+CREATE FUNCTION snb_en_init(text)
+   returns internal
+   as 'MODULE_PATHNAME' 
+   language 'C';
+
+CREATE FUNCTION snb_lexize(internal,internal,int4)
+   returns internal
+   as 'MODULE_PATHNAME'
+   language 'C'
+   with (isstrict);
+
+insert into pg_ts_dict select 
+   'en_stem', 
+   (select oid from pg_proc where proname='snb_en_init'),
+   'DATA_PATH/english.stop',
+   (select oid from pg_proc where proname='snb_lexize'),
+   'English Stemmer. Snowball.'
+;
+
+CREATE FUNCTION snb_ru_init(text)
+   returns internal
+   as 'MODULE_PATHNAME' 
+   language 'C';
+
+insert into pg_ts_dict select 
+   'ru_stem', 
+   (select oid from pg_proc where proname='snb_ru_init'),
+   'DATA_PATH/russian.stop',
+   (select oid from pg_proc where proname='snb_lexize'),
+   'Russian Stemmer. Snowball.'
+;
+    
+CREATE FUNCTION spell_init(text)
+   returns internal
+   as 'MODULE_PATHNAME' 
+   language 'C';
+
+CREATE FUNCTION spell_lexize(internal,internal,int4)
+   returns internal
+   as 'MODULE_PATHNAME'
+   language 'C'
+   with (isstrict);
+
+insert into pg_ts_dict select 
+   'ispell_template', 
+   (select oid from pg_proc where proname='spell_init'),
+   null,
+   (select oid from pg_proc where proname='spell_lexize'),
+   'ISpell interface. Must have .dict and .aff files'
+;
+
+CREATE FUNCTION syn_init(text)
+   returns internal
+   as 'MODULE_PATHNAME' 
+   language 'C';
+
+CREATE FUNCTION syn_lexize(internal,internal,int4)
+   returns internal
+   as 'MODULE_PATHNAME'
+   language 'C'
+   with (isstrict);
+
+insert into pg_ts_dict select 
+   'synonym', 
+   (select oid from pg_proc where proname='syn_init'),
+   null,
+   (select oid from pg_proc where proname='syn_lexize'),
+   'Example of synonym dictionary'
+;
+
+--dict conf
+CREATE TABLE pg_ts_parser (
+   prs_name    text not null primary key,
+   prs_start   oid not null,
+   prs_nexttoken   oid not null,
+   prs_end     oid not null,
+   prs_headline    oid not null,
+   prs_lextype oid not null,
+   prs_comment text
+) with oids;
+
+--sql-level interface
+CREATE TYPE tokentype 
+   as (tokid int4, alias text, descr text); 
+
+CREATE FUNCTION token_type(int4)
+   returns setof tokentype
+   as 'MODULE_PATHNAME'
+   language 'C'
+   with (isstrict);
+
+CREATE FUNCTION token_type(text)
+   returns setof tokentype
+   as 'MODULE_PATHNAME', 'token_type_byname'
+   language 'C'
+   with (isstrict);
+
+CREATE FUNCTION token_type()
+   returns setof tokentype
+   as 'MODULE_PATHNAME', 'token_type_current'
+   language 'C'
+   with (isstrict);
+
+CREATE FUNCTION set_curprs(int)
+   returns void
+   as 'MODULE_PATHNAME'
+   language 'C'
+   with (isstrict);
+
+CREATE FUNCTION set_curprs(text)
+   returns void
+   as 'MODULE_PATHNAME', 'set_curprs_byname'
+   language 'C'
+   with (isstrict);
+
+CREATE TYPE tokenout 
+   as (tokid int4, token text);
+
+CREATE FUNCTION parse(oid,text)
+   returns setof tokenout
+   as 'MODULE_PATHNAME'
+   language 'C'
+   with (isstrict);
+ 
+CREATE FUNCTION parse(text,text)
+   returns setof tokenout
+   as 'MODULE_PATHNAME', 'parse_byname'
+   language 'C'
+   with (isstrict);
+ 
+CREATE FUNCTION parse(text)
+   returns setof tokenout
+   as 'MODULE_PATHNAME', 'parse_current'
+   language 'C'
+   with (isstrict);
+ 
+--default parser
+CREATE FUNCTION prsd_start(internal,int4)
+   returns internal
+   as 'MODULE_PATHNAME'
+   language 'C';
+
+CREATE FUNCTION prsd_getlexeme(internal,internal,internal)
+   returns int4
+   as 'MODULE_PATHNAME'
+   language 'C';
+
+CREATE FUNCTION prsd_end(internal)
+   returns void
+   as 'MODULE_PATHNAME'
+   language 'C';
+
+CREATE FUNCTION prsd_lextype(internal)
+   returns internal
+   as 'MODULE_PATHNAME'
+   language 'C';
+
+CREATE FUNCTION prsd_headline(internal,internal,internal)
+   returns internal
+   as 'MODULE_PATHNAME'
+   language 'C';
+
+insert into pg_ts_parser select
+   'default',
+   (select oid from pg_proc where proname='prsd_start'),   
+   (select oid from pg_proc where proname='prsd_getlexeme'),   
+   (select oid from pg_proc where proname='prsd_end'), 
+   (select oid from pg_proc where proname='prsd_headline'),
+   (select oid from pg_proc where proname='prsd_lextype'),
+   'Parser from OpenFTS v0.34'
+;  
+
+--tsearch config
+
+CREATE TABLE pg_ts_cfg (
+   ts_name     text not null primary key,
+   prs_name    text not null,
+   locale      text
+) with oids;
+
+CREATE TABLE pg_ts_cfgmap (
+   ts_name     text not null,
+   tok_alias   text not null,
+   dict_name   text[],
+   primary key (ts_name,tok_alias)
+) with oids;
+
+CREATE FUNCTION set_curcfg(int)
+   returns void
+   as 'MODULE_PATHNAME'
+   language 'C'
+   with (isstrict);
+
+CREATE FUNCTION set_curcfg(text)
+   returns void
+   as 'MODULE_PATHNAME', 'set_curcfg_byname'
+   language 'C'
+   with (isstrict);
+
+CREATE FUNCTION show_curcfg()
+   returns oid
+   as 'MODULE_PATHNAME'
+   language 'C'
+   with (isstrict);
+
+insert into pg_ts_cfg values ('default', 'default','C');
+insert into pg_ts_cfg values ('default_russian', 'default','ru_RU.KOI8-R');
+insert into pg_ts_cfg values ('simple', 'default');
+
+copy pg_ts_cfgmap from stdin;
+default    lword   {en_stem}
+default    nlword  {simple}
+default    word    {simple}
+default    email   {simple}
+default    url {simple}
+default    host    {simple}
+default    sfloat  {simple}
+default    version {simple}
+default    part_hword  {simple}
+default    nlpart_hword    {simple}
+default    lpart_hword {en_stem}
+default    hword   {simple}
+default    lhword  {en_stem}
+default    nlhword {simple}
+default    uri {simple}
+default    file    {simple}
+default    float   {simple}
+default    int {simple}
+default    uint    {simple}
+default_russian    lword   {en_stem}
+default_russian    nlword  {ru_stem}
+default_russian    word    {ru_stem}
+default_russian    email   {simple}
+default_russian    url {simple}
+default_russian    host    {simple}
+default_russian    sfloat  {simple}
+default_russian    version {simple}
+default_russian    part_hword  {simple}
+default_russian    nlpart_hword    {ru_stem}
+default_russian    lpart_hword {en_stem}
+default_russian    hword   {ru_stem}
+default_russian    lhword  {en_stem}
+default_russian    nlhword {ru_stem}
+default_russian    uri {simple}
+default_russian    file    {simple}
+default_russian    float   {simple}
+default_russian    int {simple}
+default_russian    uint    {simple}
+simple lword   {simple}
+simple nlword  {simple}
+simple word    {simple}
+simple email   {simple}
+simple url {simple}
+simple host    {simple}
+simple sfloat  {simple}
+simple version {simple}
+simple part_hword  {simple}
+simple nlpart_hword    {simple}
+simple lpart_hword {simple}
+simple hword   {simple}
+simple lhword  {simple}
+simple nlhword {simple}
+simple uri {simple}
+simple file    {simple}
+simple float   {simple}
+simple int {simple}
+simple uint    {simple}
+\.
+
+--tsvector type
+CREATE FUNCTION tsvector_in(cstring)
+RETURNS tsvector
+AS 'MODULE_PATHNAME'
+LANGUAGE 'C' with (isstrict);
+
+CREATE FUNCTION tsvector_out(tsvector)
+RETURNS cstring
+AS 'MODULE_PATHNAME'
+LANGUAGE 'C' with (isstrict);
+
+CREATE TYPE tsvector (
+        INTERNALLENGTH = -1,
+        INPUT = tsvector_in,
+        OUTPUT = tsvector_out,
+        STORAGE = extended
+);
+
+CREATE FUNCTION length(tsvector)
+RETURNS int4
+AS 'MODULE_PATHNAME', 'tsvector_length'
+LANGUAGE 'C' with (isstrict,iscachable);
+
+CREATE FUNCTION to_tsvector(oid, text)
+RETURNS tsvector
+AS 'MODULE_PATHNAME'
+LANGUAGE 'C' with (isstrict,iscachable);
+
+CREATE FUNCTION to_tsvector(text, text)
+RETURNS tsvector
+AS 'MODULE_PATHNAME', 'to_tsvector_name'
+LANGUAGE 'C' with (isstrict,iscachable);
+
+CREATE FUNCTION to_tsvector(text)
+RETURNS tsvector
+AS 'MODULE_PATHNAME', 'to_tsvector_current'
+LANGUAGE 'C' with (isstrict,iscachable);
+
+CREATE FUNCTION strip(tsvector)
+RETURNS tsvector
+AS 'MODULE_PATHNAME'
+LANGUAGE 'C' with (isstrict,iscachable);
+
+CREATE FUNCTION setweight(tsvector,"char")
+RETURNS tsvector
+AS 'MODULE_PATHNAME'
+LANGUAGE 'C' with (isstrict,iscachable);
+
+CREATE FUNCTION concat(tsvector,tsvector)
+RETURNS tsvector
+AS 'MODULE_PATHNAME'
+LANGUAGE 'C' with (isstrict,iscachable);
+
+CREATE OPERATOR || (
+        LEFTARG = tsvector,
+        RIGHTARG = tsvector,
+        PROCEDURE = concat
+);
+
+--query type
+CREATE FUNCTION tsquery_in(cstring)
+RETURNS tsquery
+AS 'MODULE_PATHNAME'
+LANGUAGE 'C' with (isstrict);
+
+CREATE FUNCTION tsquery_out(tsquery)
+RETURNS cstring
+AS 'MODULE_PATHNAME'
+LANGUAGE 'C' with (isstrict);
+
+CREATE TYPE tsquery (
+        INTERNALLENGTH = -1,
+        INPUT = tsquery_in,
+        OUTPUT = tsquery_out
+);
+
+CREATE FUNCTION querytree(tsquery)
+RETURNS text
+AS 'MODULE_PATHNAME', 'tsquerytree'
+LANGUAGE 'C' with (isstrict);
+
+CREATE FUNCTION to_tsquery(oid, text)
+RETURNS tsquery
+AS 'MODULE_PATHNAME'
+LANGUAGE 'c' with (isstrict,iscachable);
+
+CREATE FUNCTION to_tsquery(text, text)
+RETURNS tsquery
+AS 'MODULE_PATHNAME','to_tsquery_name'
+LANGUAGE 'c' with (isstrict,iscachable);
+
+CREATE FUNCTION to_tsquery(text)
+RETURNS tsquery
+AS 'MODULE_PATHNAME','to_tsquery_current'
+LANGUAGE 'c' with (isstrict,iscachable);
+
+--operations
+CREATE FUNCTION exectsq(tsvector, tsquery)
+RETURNS bool
+AS 'MODULE_PATHNAME'
+LANGUAGE 'C' with (isstrict, iscachable);
+  
+COMMENT ON FUNCTION exectsq(tsvector, tsquery) IS 'boolean operation with text index';
+
+CREATE FUNCTION rexectsq(tsquery, tsvector)
+RETURNS bool
+AS 'MODULE_PATHNAME'
+LANGUAGE 'C' with (isstrict, iscachable);
+
+COMMENT ON FUNCTION rexectsq(tsquery, tsvector) IS 'boolean operation with text index';
+
+CREATE OPERATOR @@ (
+        LEFTARG = tsvector,
+        RIGHTARG = tsquery,
+        PROCEDURE = exectsq,
+        COMMUTATOR = '@@',
+        RESTRICT = contsel,
+        JOIN = contjoinsel
+);
+CREATE OPERATOR @@ (
+        LEFTARG = tsquery,
+        RIGHTARG = tsvector,
+        PROCEDURE = rexectsq,
+        COMMUTATOR = '@@',
+        RESTRICT = contsel,
+        JOIN = contjoinsel
+);
+
+--Trigger
+CREATE FUNCTION tsearch2()
+RETURNS trigger
+AS 'MODULE_PATHNAME'
+LANGUAGE 'C';
+
+--Relevation
+CREATE FUNCTION rank(float4[], tsvector, tsquery)
+RETURNS float4
+AS 'MODULE_PATHNAME'
+LANGUAGE 'C' WITH (isstrict, iscachable);
+
+CREATE FUNCTION rank(float4[], tsvector, tsquery, int4)
+RETURNS float4
+AS 'MODULE_PATHNAME'
+LANGUAGE 'C' WITH (isstrict, iscachable);
+
+CREATE FUNCTION rank(tsvector, tsquery)
+RETURNS float4
+AS 'MODULE_PATHNAME', 'rank_def'
+LANGUAGE 'C' WITH (isstrict, iscachable);
+
+CREATE FUNCTION rank(tsvector, tsquery, int4)
+RETURNS float4
+AS 'MODULE_PATHNAME', 'rank_def'
+LANGUAGE 'C' WITH (isstrict, iscachable);
+
+CREATE FUNCTION rank_cd(int4, tsvector, tsquery)
+RETURNS float4
+AS 'MODULE_PATHNAME'
+LANGUAGE 'C' WITH (isstrict, iscachable);
+
+CREATE FUNCTION rank_cd(int4, tsvector, tsquery, int4)
+RETURNS float4
+AS 'MODULE_PATHNAME'
+LANGUAGE 'C' WITH (isstrict, iscachable);
+
+CREATE FUNCTION rank_cd(tsvector, tsquery)
+RETURNS float4
+AS 'MODULE_PATHNAME', 'rank_cd_def'
+LANGUAGE 'C' WITH (isstrict, iscachable);
+
+CREATE FUNCTION rank_cd(tsvector, tsquery, int4)
+RETURNS float4
+AS 'MODULE_PATHNAME', 'rank_cd_def'
+LANGUAGE 'C' WITH (isstrict, iscachable);
+
+CREATE FUNCTION headline(oid, text, tsquery, text)
+RETURNS text
+AS 'MODULE_PATHNAME', 'headline'
+LANGUAGE 'C' WITH (isstrict, iscachable);
+
+CREATE FUNCTION headline(oid, text, tsquery)
+RETURNS text
+AS 'MODULE_PATHNAME', 'headline'
+LANGUAGE 'C' WITH (isstrict, iscachable);
+
+CREATE FUNCTION headline(text, text, tsquery, text)
+RETURNS text
+AS 'MODULE_PATHNAME', 'headline_byname'
+LANGUAGE 'C' WITH (isstrict, iscachable);
+
+CREATE FUNCTION headline(text, text, tsquery)
+RETURNS text
+AS 'MODULE_PATHNAME', 'headline_byname'
+LANGUAGE 'C' WITH (isstrict, iscachable);
+
+CREATE FUNCTION headline(text, tsquery, text)
+RETURNS text
+AS 'MODULE_PATHNAME', 'headline_current'
+LANGUAGE 'C' WITH (isstrict, iscachable);
+
+CREATE FUNCTION headline(text, tsquery)
+RETURNS text
+AS 'MODULE_PATHNAME', 'headline_current'
+LANGUAGE 'C' WITH (isstrict, iscachable);
+
+--GiST
+--GiST key type 
+CREATE FUNCTION gtsvector_in(cstring)
+RETURNS gtsvector
+AS 'MODULE_PATHNAME'
+LANGUAGE 'C' with (isstrict);
+
+CREATE FUNCTION gtsvector_out(gtsvector)
+RETURNS cstring
+AS 'MODULE_PATHNAME'
+LANGUAGE 'C' with (isstrict);
+
+CREATE TYPE gtsvector (
+        INTERNALLENGTH = -1,
+        INPUT = gtsvector_in,
+        OUTPUT = gtsvector_out
+);
+
+-- support FUNCTIONs
+CREATE FUNCTION gtsvector_consistent(gtsvector,internal,int4)
+RETURNS bool
+AS 'MODULE_PATHNAME'
+LANGUAGE 'C';
+  
+CREATE FUNCTION gtsvector_compress(internal)
+RETURNS internal
+AS 'MODULE_PATHNAME'
+LANGUAGE 'C';
+
+CREATE FUNCTION gtsvector_decompress(internal)
+RETURNS internal
+AS 'MODULE_PATHNAME'
+LANGUAGE 'C';
+
+CREATE FUNCTION gtsvector_penalty(internal,internal,internal)
+RETURNS internal
+AS 'MODULE_PATHNAME'
+LANGUAGE 'C' with (isstrict);
+
+CREATE FUNCTION gtsvector_picksplit(internal, internal)
+RETURNS internal
+AS 'MODULE_PATHNAME'
+LANGUAGE 'C';
+
+CREATE FUNCTION gtsvector_union(bytea, internal)
+RETURNS _int4
+AS 'MODULE_PATHNAME'
+LANGUAGE 'C';
+
+CREATE FUNCTION gtsvector_same(gtsvector, gtsvector, internal)
+RETURNS internal
+AS 'MODULE_PATHNAME'
+LANGUAGE 'C';
+
+-- CREATE the OPERATOR class
+CREATE OPERATOR CLASS gist_tsvector_ops
+DEFAULT FOR TYPE tsvector USING gist
+AS
+        OPERATOR        1       @@ (tsvector, tsquery)  RECHECK ,
+        FUNCTION        1       gtsvector_consistent (gtsvector, internal, int4),
+        FUNCTION        2       gtsvector_union (bytea, internal),
+        FUNCTION        3       gtsvector_compress (internal),
+        FUNCTION        4       gtsvector_decompress (internal),
+        FUNCTION        5       gtsvector_penalty (internal, internal, internal),
+        FUNCTION        6       gtsvector_picksplit (internal, internal),
+        FUNCTION        7       gtsvector_same (gtsvector, gtsvector, internal),
+        STORAGE         gtsvector;
+
+
+--stat info
+CREATE TYPE statinfo 
+   as (word text, ndoc int4, nentry int4);
+
+--REATE FUNCTION tsstat_in(cstring)
+--RETURNS tsstat
+--AS 'MODULE_PATHNAME'
+--LANGUAGE 'C' with (isstrict);
+--
+--CREATE FUNCTION tsstat_out(tsstat)
+--RETURNS cstring
+--AS 'MODULE_PATHNAME'
+--LANGUAGE 'C' with (isstrict);
+--
+--CREATE TYPE tsstat (
+--        INTERNALLENGTH = -1,
+--        INPUT = tsstat_in,
+--        OUTPUT = tsstat_out,
+--        STORAGE = plain
+--);
+--
+--CREATE FUNCTION ts_accum(tsstat,tsvector)
+--RETURNS tsstat
+--AS 'MODULE_PATHNAME'
+--LANGUAGE 'C' with (isstrict);
+--
+--CREATE FUNCTION ts_accum_finish(tsstat)
+-- returns setof statinfo
+-- as 'MODULE_PATHNAME'
+-- language 'C'
+-- with (isstrict);
+--
+--CREATE AGGREGATE stat (
+-- BASETYPE=tsvector,
+-- SFUNC=ts_accum,
+-- STYPE=tsstat,
+-- FINALFUNC = ts_accum_finish,
+-- initcond = ''
+--); 
+
+CREATE FUNCTION stat(text)
+   returns setof statinfo
+   as 'MODULE_PATHNAME', 'ts_stat'
+   language 'C'
+   with (isstrict);
+
+--reset - just for debuging
+CREATE FUNCTION reset_tsearch()
+        returns void
+        as 'MODULE_PATHNAME'
+        language 'C'
+        with (isstrict);
+
+--get cover (debug for rank_cd)
+CREATE FUNCTION get_covers(tsvector,tsquery)
+        returns text
+        as 'MODULE_PATHNAME'
+        language 'C'
+        with (isstrict);
+
+
+--example of ISpell dictionary
+--update pg_ts_dict set dict_initoption='DictFile="/usr/local/share/ispell/russian.dict" ,AffFile ="/usr/local/share/ispell/russian.aff", StopFile="/usr/local/share/ispell/russian.stop"' where dict_id=4;
+--example of synonym dict
+--update pg_ts_dict set dict_initoption='/usr/local/share/ispell/english.syn' where dict_id=5;
+END;


diff --git a/contrib/tsearch2/tsvector.c b/contrib/tsearch2/tsvector.c

new file mode 100644 (file)

index 0000000..ff0794d


--- /dev/null
+++ b/contrib/tsearch2/tsvector.c
@@ -0,0 +1,804 @@
+/*
+ * In/Out definitions for tsvector type
+ * Internal structure:
+ * string of values, array of position lexem in string and it's length
+ * Teodor Sigaev 
+ */
+#include "postgres.h"
+
+#include "access/gist.h"
+#include "access/itup.h"
+#include "utils/elog.h"
+#include "utils/palloc.h"
+#include "utils/builtins.h"
+#include "storage/bufpage.h"
+#include "executor/spi.h"
+#include "commands/trigger.h"
+#include "nodes/pg_list.h"
+#include "catalog/namespace.h"
+
+#include "utils/pg_locale.h"
+
+#include              /* tolower */
+#include "tsvector.h"
+#include "query.h"
+#include "ts_cfg.h"
+#include "common.h"
+
+PG_FUNCTION_INFO_V1(tsvector_in);
+Datum      tsvector_in(PG_FUNCTION_ARGS);
+
+PG_FUNCTION_INFO_V1(tsvector_out);
+Datum      tsvector_out(PG_FUNCTION_ARGS);
+
+PG_FUNCTION_INFO_V1(to_tsvector);
+Datum      to_tsvector(PG_FUNCTION_ARGS);
+PG_FUNCTION_INFO_V1(to_tsvector_current);
+Datum      to_tsvector_current(PG_FUNCTION_ARGS);
+PG_FUNCTION_INFO_V1(to_tsvector_name);
+Datum      to_tsvector_name(PG_FUNCTION_ARGS);
+
+PG_FUNCTION_INFO_V1(tsearch2);
+Datum      tsearch2(PG_FUNCTION_ARGS);
+
+PG_FUNCTION_INFO_V1(tsvector_length);
+Datum      tsvector_length(PG_FUNCTION_ARGS);
+
+/*
+ * in/out text index type
+ */
+static int 
+comparePos(const void *a, const void *b) {
+   if ( ((WordEntryPos *) a)->pos == ((WordEntryPos *) b)->pos )
+       return 1;
+   return ( ((WordEntryPos *) a)->pos > ((WordEntryPos *) b)->pos ) ? 1 : -1;
+}
+
+static int
+uniquePos(WordEntryPos *a, int4 l) {
+   WordEntryPos *ptr, *res;
+
+   res=a;
+   if (l==1)
+       return l;
+
+   qsort((void *) a, l, sizeof(WordEntryPos), comparePos);
+
+   ptr = a + 1;
+   while (ptr - a < l) {
+       if ( ptr->pos != res->pos ) {
+           res++;
+           res->pos = ptr->pos;
+           res->weight = ptr->weight;
+           if ( res-a >= MAXNUMPOS-1 || res->pos == MAXENTRYPOS-1 )
+               break;
+       } else if ( ptr->weight > res->weight )
+           res->weight = ptr->weight;
+       ptr++;
+   }
+   return res + 1 - a;
+}
+
+static char *BufferStr;
+static int
+compareentry(const void *a, const void *b)
+{
+   if ( ((WordEntryIN *) a)->entry.len == ((WordEntryIN *) b)->entry.len)
+   {
+       return strncmp(
+                      &BufferStr[((WordEntryIN *) a)->entry.pos],
+                      &BufferStr[((WordEntryIN *) b)->entry.pos],
+                      ((WordEntryIN *) a)->entry.len);
+   }
+   return ( ((WordEntryIN *) a)->entry.len > ((WordEntryIN *) b)->entry.len ) ? 1 : -1;
+}
+
+static int
+uniqueentry(WordEntryIN * a, int4 l, char *buf, int4 *outbuflen)
+{
+   WordEntryIN  *ptr,
+              *res;
+
+   res = a;
+   if (l == 1) {
+       if ( a->entry.haspos ) {
+           *(uint16*)(a->pos) = uniquePos( &(a->pos[1]), *(uint16*)(a->pos));
+           *outbuflen = SHORTALIGN(res->entry.len) + (*(uint16*)(a->pos) +1 )*sizeof(WordEntryPos);
+       }
+       return l;
+   }
+
+   ptr = a + 1;
+   BufferStr = buf;
+   qsort((void *) a, l, sizeof(WordEntryIN), compareentry);
+
+   while (ptr - a < l)
+   {
+       if (!(ptr->entry.len == res->entry.len &&
+             strncmp(&buf[ptr->entry.pos], &buf[res->entry.pos], res->entry.len) == 0))
+       {
+           if ( res->entry.haspos ) {
+               *(uint16*)(res->pos) = uniquePos( &(res->pos[1]), *(uint16*)(res->pos));
+               *outbuflen += *(uint16*)(res->pos) * sizeof(WordEntryPos);
+           }
+           *outbuflen += SHORTALIGN(res->entry.len);
+           res++;
+           memcpy(res,ptr,sizeof(WordEntryIN));
+       } else if ( ptr->entry.haspos ){
+           if ( res->entry.haspos ) {
+               int4 len=*(uint16*)(ptr->pos) + 1 + *(uint16*)(res->pos);
+               res->pos=(WordEntryPos*)repalloc( res->pos, len*sizeof(WordEntryPos));
+               memcpy( &(res->pos[ *(uint16*)(res->pos) + 1 ]), 
+                   &(ptr->pos[1]), *(uint16*)(ptr->pos) * sizeof(WordEntryPos));
+               *(uint16*)(res->pos) += *(uint16*)(ptr->pos);
+               pfree( ptr->pos );
+           } else {
+               res->entry.haspos=1;
+               res->pos = ptr->pos;
+           }
+       }
+       ptr++;
+   }
+   if ( res->entry.haspos ) {
+       *(uint16*)(res->pos) = uniquePos( &(res->pos[1]), *(uint16*)(res->pos));
+       *outbuflen += *(uint16*)(res->pos) * sizeof(WordEntryPos);
+   }
+   *outbuflen += SHORTALIGN(res->entry.len);
+
+   return res + 1 - a;
+}
+
+#define WAITWORD   1
+#define WAITENDWORD 2
+#define WAITNEXTCHAR   3
+#define WAITENDCMPLX   4
+#define WAITPOSINFO    5
+#define INPOSINFO  6
+#define WAITPOSDELIM   7
+
+#define RESIZEPRSBUF \
+do { \
+   if ( state->curpos - state->word + 1 >= state->len ) \
+   { \
+       int4 clen = state->curpos - state->word; \
+       state->len *= 2; \
+       state->word = (char*)repalloc( (void*)state->word, state->len ); \
+       state->curpos = state->word + clen; \
+   } \
+} while (0)
+
+int4
+gettoken_tsvector(TI_IN_STATE * state)
+{
+   int4        oldstate = 0;
+
+   state->curpos = state->word;
+   state->state = WAITWORD;
+   state->alen=0;
+
+   while (1)
+   {
+       if (state->state == WAITWORD)
+       {
+           if (*(state->prsbuf) == '\0')
+               return 0;
+           else if (*(state->prsbuf) == '\'')
+               state->state = WAITENDCMPLX;
+           else if (*(state->prsbuf) == '\\')
+           {
+               state->state = WAITNEXTCHAR;
+               oldstate = WAITENDWORD;
+           }
+           else if (state->oprisdelim && ISOPERATOR(*(state->prsbuf)))
+               elog(ERROR, "Syntax error");
+           else if (*(state->prsbuf) != ' ')
+           {
+               *(state->curpos) = *(state->prsbuf);
+               state->curpos++;
+               state->state = WAITENDWORD;
+           }
+       }
+       else if (state->state == WAITNEXTCHAR)
+       {
+           if (*(state->prsbuf) == '\0')
+               elog(ERROR, "There is no escaped character");
+           else
+           {
+               RESIZEPRSBUF;
+               *(state->curpos) = *(state->prsbuf);
+               state->curpos++;
+               state->state = oldstate;
+           }
+       }
+       else if (state->state == WAITENDWORD)
+       {
+           if (*(state->prsbuf) == '\\')
+           {
+               state->state = WAITNEXTCHAR;
+               oldstate = WAITENDWORD;
+           }
+           else if (*(state->prsbuf) == ' ' || *(state->prsbuf) == '\0' ||
+                    (state->oprisdelim && ISOPERATOR(*(state->prsbuf))))
+           {
+               RESIZEPRSBUF;
+               if (state->curpos == state->word)
+                   elog(ERROR, "Syntax error");
+               *(state->curpos) = '\0';
+               return 1; 
+           } else if ( *(state->prsbuf) == ':' ) {
+               if (state->curpos == state->word)
+                   elog(ERROR, "Syntax error");
+               *(state->curpos) = '\0';
+               if ( state->oprisdelim )
+                   return 1;
+               else
+                   state->state = INPOSINFO;
+           }
+           else
+           {
+               RESIZEPRSBUF;
+               *(state->curpos) = *(state->prsbuf);
+               state->curpos++;
+           }
+       }
+       else if (state->state == WAITENDCMPLX)
+       {
+           if (*(state->prsbuf) == '\'')
+           {
+               RESIZEPRSBUF;
+               *(state->curpos) = '\0';
+               if (state->curpos == state->word)
+                   elog(ERROR, "Syntax error");
+               if ( state->oprisdelim ) {
+                   state->prsbuf++;
+                   return 1;
+               } else
+                   state->state = WAITPOSINFO;
+           }
+           else if (*(state->prsbuf) == '\\')
+           {
+               state->state = WAITNEXTCHAR;
+               oldstate = WAITENDCMPLX;
+           }
+           else if (*(state->prsbuf) == '\0')
+               elog(ERROR, "Syntax error");
+           else
+           {
+               RESIZEPRSBUF;
+               *(state->curpos) = *(state->prsbuf);
+               state->curpos++;
+           }
+       } else if (state->state == WAITPOSINFO) {
+           if ( *(state->prsbuf) == ':' )
+               state->state=INPOSINFO;
+           else
+               return 1;
+       } else if (state->state == INPOSINFO) {
+           if ( isdigit(*(state->prsbuf)) ) {
+               if ( state->alen==0 ) {
+                   state->alen=4;
+                   state->pos = (WordEntryPos*)palloc( sizeof(WordEntryPos)*state->alen );
+                   *(uint16*)(state->pos)=0;
+               } else if ( *(uint16*)(state->pos) +1 >= state->alen ) {
+                   state->alen *= 2; 
+                   state->pos = (WordEntryPos*)repalloc( state->pos, sizeof(WordEntryPos)*state->alen );
+               }
+               (  *(uint16*)(state->pos) )++;
+               state->pos[ *(uint16*)(state->pos) ].pos = LIMITPOS(atoi(state->prsbuf));
+               if ( state->pos[ *(uint16*)(state->pos) ].pos == 0 )
+                   elog(ERROR,"Wrong position info");
+               state->pos[ *(uint16*)(state->pos) ].weight = 0;
+               state->state = WAITPOSDELIM;
+           } else
+               elog(ERROR,"Syntax error");
+       } else if (state->state == WAITPOSDELIM) {
+           if ( *(state->prsbuf) == ',' ) {
+               state->state = INPOSINFO;
+           } else if ( tolower(*(state->prsbuf)) == 'a' || *(state->prsbuf)=='*' ) {
+               if ( state->pos[ *(uint16*)(state->pos) ].weight )
+                   elog(ERROR,"Syntax error");
+               state->pos[ *(uint16*)(state->pos) ].weight = 3;
+           } else if ( tolower(*(state->prsbuf)) == 'b' ) {
+               if ( state->pos[ *(uint16*)(state->pos) ].weight )
+                   elog(ERROR,"Syntax error");
+               state->pos[ *(uint16*)(state->pos) ].weight = 2;
+           } else if ( tolower(*(state->prsbuf)) == 'c' ) {
+               if ( state->pos[ *(uint16*)(state->pos) ].weight )
+                   elog(ERROR,"Syntax error");
+               state->pos[ *(uint16*)(state->pos) ].weight = 1;
+           } else if ( tolower(*(state->prsbuf)) == 'd' ) {
+               if ( state->pos[ *(uint16*)(state->pos) ].weight )
+                   elog(ERROR,"Syntax error");
+               state->pos[ *(uint16*)(state->pos) ].weight = 0;
+           } else if ( isspace(*(state->prsbuf)) || *(state->prsbuf) == '\0' ) {
+               return 1;
+           } else if ( !isdigit(*(state->prsbuf)) )
+               elog(ERROR,"Syntax error");
+       } else
+           elog(ERROR, "Inner bug :(");
+       state->prsbuf++;
+   }
+
+   return 0;
+}
+
+Datum
+tsvector_in(PG_FUNCTION_ARGS)
+{
+   char       *buf = PG_GETARG_CSTRING(0);
+   TI_IN_STATE state;
+   WordEntryIN  *arr;
+   WordEntry  *inarr;
+   int4        len = 0,
+               totallen = 64;
+   tsvector       *in;
+   char       *tmpbuf,
+              *cur;
+   int4        i,
+               buflen = 256;
+
+   state.prsbuf = buf;
+   state.len = 32;
+   state.word = (char *) palloc(state.len);
+   state.oprisdelim = false;
+
+   arr = (WordEntryIN *) palloc(sizeof(WordEntryIN) * totallen);
+   cur = tmpbuf = (char *) palloc(buflen);
+   while (gettoken_tsvector(&state))
+   {
+       if (len >= totallen)
+       {
+           totallen *= 2;
+           arr = (WordEntryIN *) repalloc((void *) arr, sizeof(WordEntryIN) * totallen);
+       }
+       while ((cur - tmpbuf) + (state.curpos - state.word) >= buflen)
+       {
+           int4        dist = cur - tmpbuf;
+
+           buflen *= 2;
+           tmpbuf = (char *) repalloc((void *) tmpbuf, buflen);
+           cur = tmpbuf + dist;
+       }
+       if (state.curpos - state.word >= MAXSTRLEN)
+           elog(ERROR, "Word is too long");
+       arr[len].entry.len= state.curpos - state.word;
+       if (cur - tmpbuf > MAXSTRPOS)
+           elog(ERROR, "Too long value");
+       arr[len].entry.pos=cur - tmpbuf;
+       memcpy((void *) cur, (void *) state.word, arr[len].entry.len);
+       cur += arr[len].entry.len;
+       if ( state.alen ) {
+           arr[len].entry.haspos=1;
+           arr[len].pos = state.pos;
+       } else
+           arr[len].entry.haspos=0;
+       len++;
+   }
+   pfree(state.word);
+
+   if ( len > 0 )
+       len = uniqueentry(arr, len, tmpbuf, &buflen);
+   totallen = CALCDATASIZE(len, buflen);
+   in = (tsvector *) palloc(totallen);
+   memset(in,0,totallen);
+   in->len = totallen;
+   in->size = len;
+   cur = STRPTR(in);
+   inarr = ARRPTR(in);
+   for (i = 0; i < len; i++)
+   {
+       memcpy((void *) cur, (void *) &tmpbuf[arr[i].entry.pos], arr[i].entry.len);
+       arr[i].entry.pos=cur - STRPTR(in);
+       cur += SHORTALIGN(arr[i].entry.len);
+       if ( arr[i].entry.haspos ) {
+           memcpy( cur, arr[i].pos, (*(uint16*)arr[i].pos + 1) * sizeof(WordEntryPos));
+           cur +=  (*(uint16*)arr[i].pos + 1) * sizeof(WordEntryPos);
+           pfree( arr[i].pos ); 
+       }
+       memcpy( &(inarr[i]), &(arr[i].entry), sizeof(WordEntry) );
+   }
+   pfree(tmpbuf);
+   pfree(arr);
+   PG_RETURN_POINTER(in);
+}
+
+Datum
+tsvector_length(PG_FUNCTION_ARGS)
+{
+   tsvector       *in = (tsvector *) PG_DETOAST_DATUM(PG_GETARG_DATUM(0));
+   int4        ret = in->size;
+
+   PG_FREE_IF_COPY(in, 0);
+   PG_RETURN_INT32(ret);
+}
+
+Datum
+tsvector_out(PG_FUNCTION_ARGS)
+{
+   tsvector       *out = (tsvector *) PG_DETOAST_DATUM(PG_GETARG_DATUM(0));
+   char       *outbuf;
+   int4        i,
+               j,
+               lenbuf = 0, pp;
+   WordEntry  *ptr = ARRPTR(out);
+   char       *curin,
+              *curout;
+
+       lenbuf=out->size * 2 /* '' */ + out->size - 1 /* space */ + 2 /*\0*/;
+       for (i = 0; i < out->size; i++) {
+               lenbuf += ptr[i].len*2 /*for escape */;
+               if ( ptr[i].haspos )
+                       lenbuf += 7*POSDATALEN(out, &(ptr[i]));
+       }
+
+   curout = outbuf = (char *) palloc(lenbuf);
+   for (i = 0; i < out->size; i++)
+   {
+       curin = STRPTR(out)+ptr->pos;
+       if (i != 0)
+           *curout++ = ' ';
+       *curout++ = '\'';
+       j = ptr->len;
+       while (j--)
+       {
+           if (*curin == '\'')
+           {
+               int4        pos = curout - outbuf;
+
+               outbuf = (char *) repalloc((void *) outbuf, ++lenbuf);
+               curout = outbuf + pos;
+               *curout++ = '\\';
+           }
+           *curout++ = *curin++;
+       }
+       *curout++ = '\'';
+       if ( (pp=POSDATALEN(out,ptr)) != 0 ) {
+           WordEntryPos *wptr;
+           *curout++ = ':';
+           wptr=POSDATAPTR(out,ptr);
+           while(pp) {
+               sprintf(curout,"%d",wptr->pos);
+               curout=strchr(curout,'\0');
+               switch( wptr->weight ) {
+                   case 3:   *curout++ = 'A'; break;
+                   case 2:   *curout++ = 'B'; break;
+                   case 1:   *curout++ = 'C'; break;
+                   case 0: 
+                   default: break;
+               }
+               if ( pp>1 )     *curout++ = ',';
+               pp--; wptr++;
+           }
+       }
+       ptr++;
+   }
+   *curout='\0';
+   outbuf[lenbuf - 1] = '\0';
+   PG_FREE_IF_COPY(out, 0);
+   PG_RETURN_POINTER(outbuf);
+}
+
+static int
+compareWORD(const void *a, const void *b)
+{
+   if (((WORD *) a)->len == ((WORD *) b)->len) {
+       int res = strncmp(
+                      ((WORD *) a)->word,
+                      ((WORD *) b)->word,
+                      ((WORD *) b)->len);
+       if ( res==0 ) 
+           return ( ((WORD *) a)->pos.pos > ((WORD *) b)->pos.pos ) ? 1 : -1;
+       return res;
+   }
+   return (((WORD *) a)->len > ((WORD *) b)->len) ? 1 : -1;
+}
+
+static int
+uniqueWORD(WORD * a, int4 l)
+{
+   WORD       *ptr,
+              *res;
+   int tmppos;
+
+   if (l == 1) {
+       tmppos=LIMITPOS(a->pos.pos);
+       a->alen=2;
+       a->pos.apos=(uint16*)palloc( sizeof(uint16)*a->alen );
+       a->pos.apos[0]=1;
+       a->pos.apos[1]=tmppos;
+       return l;
+   }
+
+   res = a;
+   ptr = a + 1;
+
+   qsort((void *) a, l, sizeof(WORD), compareWORD);
+   tmppos=LIMITPOS(a->pos.pos);
+   a->alen=2;
+   a->pos.apos=(uint16*)palloc( sizeof(uint16)*a->alen );
+   a->pos.apos[0]=1;
+   a->pos.apos[1]=tmppos;
+
+   while (ptr - a < l)
+   {
+       if (!(ptr->len == res->len &&
+             strncmp(ptr->word, res->word, res->len) == 0))
+       {
+           res++;
+           res->len = ptr->len;
+           res->word = ptr->word;
+           tmppos=LIMITPOS(ptr->pos.pos);
+           res->alen=2;
+           res->pos.apos=(uint16*)palloc( sizeof(uint16)*res->alen );
+           res->pos.apos[0]=1;
+           res->pos.apos[1]=tmppos;
+       } else {
+           pfree(ptr->word);
+           if ( res->pos.apos[0] < MAXNUMPOS-1 && res->pos.apos[ res->pos.apos[0] ] != MAXENTRYPOS-1 ) {
+               if ( res->pos.apos[0]+1 >= res->alen ) {
+                   res->alen*=2;
+                   res->pos.apos=(uint16*)repalloc( res->pos.apos, sizeof(uint16)*res->alen );
+               }
+               res->pos.apos[ res->pos.apos[0]+1 ] = LIMITPOS(ptr->pos.pos);
+               res->pos.apos[0]++; 
+           }
+       }
+       ptr++;
+   }
+
+   return res + 1 - a;
+}
+
+/*
+ * make value of tsvector
+ */
+static tsvector *
+makevalue(PRSTEXT * prs)
+{
+   int4        i,j,
+               lenstr = 0,
+               totallen;
+   tsvector       *in;
+   WordEntry  *ptr;
+   char       *str,
+              *cur;
+
+   prs->curwords = uniqueWORD(prs->words, prs->curwords);
+   for (i = 0; i < prs->curwords; i++) {
+       lenstr += SHORTALIGN(prs->words[i].len);
+
+       if ( prs->words[i].alen )
+           lenstr += sizeof(uint16) + prs->words[i].pos.apos[0] * sizeof(WordEntryPos);
+   }
+
+   totallen = CALCDATASIZE(prs->curwords, lenstr);
+   in = (tsvector *) palloc(totallen);
+   memset(in,0,totallen);  
+   in->len = totallen;
+   in->size = prs->curwords;
+
+   ptr = ARRPTR(in);
+   cur = str = STRPTR(in);
+   for (i = 0; i < prs->curwords; i++)
+   {
+       ptr->len = prs->words[i].len;
+       if (cur - str > MAXSTRPOS)
+           elog(ERROR, "Value is too big");
+       ptr->pos= cur - str;
+       memcpy((void *) cur, (void *) prs->words[i].word, prs->words[i].len);
+       pfree(prs->words[i].word);
+       cur += SHORTALIGN(prs->words[i].len);
+       if ( prs->words[i].alen ) {
+           WordEntryPos *wptr;
+           
+           ptr->haspos=1;
+           *(uint16*)cur = prs->words[i].pos.apos[0];
+           wptr=POSDATAPTR(in,ptr);
+           for(j=0;j<*(uint16*)cur;j++) {
+               wptr[j].weight=0;
+               wptr[j].pos=prs->words[i].pos.apos[j+1];
+           }
+           cur += sizeof(uint16) + prs->words[i].pos.apos[0] * sizeof(WordEntryPos);
+           pfree(prs->words[i].pos.apos);
+       } else
+           ptr->haspos=0;
+       ptr++;
+   }
+   pfree(prs->words);
+   return in;
+}
+
+
+Datum
+to_tsvector(PG_FUNCTION_ARGS)
+{
+   text       *in = PG_GETARG_TEXT_P(1);
+   PRSTEXT     prs;
+   tsvector       *out = NULL;
+   TSCfgInfo *cfg=findcfg(PG_GETARG_INT32(0)); 
+
+   prs.lenwords = 32;
+   prs.curwords = 0;
+   prs.pos = 0;
+   prs.words = (WORD *) palloc(sizeof(WORD) * prs.lenwords);
+   
+   parsetext_v2(cfg, &prs, VARDATA(in), VARSIZE(in) - VARHDRSZ);
+   PG_FREE_IF_COPY(in, 1);
+
+   if (prs.curwords)
+       out = makevalue(&prs);
+   else {
+       pfree(prs.words);
+       out = palloc(CALCDATASIZE(0,0));
+       out->len = CALCDATASIZE(0,0);
+       out->size = 0;
+   } 
+   PG_RETURN_POINTER(out);
+}
+
+Datum
+to_tsvector_name(PG_FUNCTION_ARGS) {
+   text       *cfg=PG_GETARG_TEXT_P(0);
+   Datum res = DirectFunctionCall3(
+       to_tsvector,
+       Int32GetDatum( name2id_cfg( cfg ) ),
+       PG_GETARG_DATUM(1),
+       (Datum)0
+   );
+   PG_FREE_IF_COPY(cfg,0);
+   PG_RETURN_DATUM(res);   
+}
+
+Datum
+to_tsvector_current(PG_FUNCTION_ARGS) {
+   Datum res = DirectFunctionCall3(
+       to_tsvector,
+       Int32GetDatum( get_currcfg() ),
+       PG_GETARG_DATUM(0),
+       (Datum)0
+   );
+   PG_RETURN_DATUM(res);   
+}
+
+static Oid
+findFunc(char *fname) {
+   FuncCandidateList clist,ptr;
+   Oid funcid = InvalidOid;
+   List *names=makeList1(makeString(fname));
+
+   ptr = clist = FuncnameGetCandidates(names, 1);
+   freeList(names);
+
+   if ( !ptr )
+       return funcid;
+
+   while(ptr) {
+       if ( ptr->args[0] == TEXTOID && funcid == InvalidOid )
+           funcid=ptr->oid;
+       clist=ptr->next;
+       pfree(ptr);
+       ptr=clist;
+   }
+
+   return funcid;
+}
+
+/*
+ * Trigger
+ */
+Datum
+tsearch2(PG_FUNCTION_ARGS)
+{
+   TriggerData *trigdata;
+   Trigger    *trigger;
+   Relation    rel;
+   HeapTuple   rettuple = NULL;
+   TSCfgInfo *cfg=findcfg(get_currcfg()); 
+   int         numidxattr,
+               i;
+   PRSTEXT     prs;
+   Datum       datum = (Datum) 0;
+   Oid     funcoid = InvalidOid;
+
+   if (!CALLED_AS_TRIGGER(fcinfo))
+       elog(ERROR, "TSearch: Not fired by trigger manager");
+
+   trigdata = (TriggerData *) fcinfo->context;
+   if (TRIGGER_FIRED_FOR_STATEMENT(trigdata->tg_event))
+       elog(ERROR, "TSearch: Can't process STATEMENT events");
+   if (TRIGGER_FIRED_AFTER(trigdata->tg_event))
+       elog(ERROR, "TSearch: Must be fired BEFORE event");
+
+   if (TRIGGER_FIRED_BY_INSERT(trigdata->tg_event))
+       rettuple = trigdata->tg_trigtuple;
+   else if (TRIGGER_FIRED_BY_UPDATE(trigdata->tg_event))
+       rettuple = trigdata->tg_newtuple;
+   else
+       elog(ERROR, "TSearch: Unknown event");
+
+   trigger = trigdata->tg_trigger;
+   rel = trigdata->tg_relation;
+
+   if (trigger->tgnargs < 2)
+       elog(ERROR, "TSearch: format tsearch2(tsvector_field, text_field1,...)");
+
+   numidxattr = SPI_fnumber(rel->rd_att, trigger->tgargs[0]);
+   if (numidxattr == SPI_ERROR_NOATTRIBUTE)
+       elog(ERROR, "TSearch: Can not find tsvector_field");
+
+   prs.lenwords = 32;
+   prs.curwords = 0;
+   prs.pos = 0;
+   prs.words = (WORD *) palloc(sizeof(WORD) * prs.lenwords);
+
+   /* find all words in indexable column */
+   for (i = 1; i < trigger->tgnargs; i++)
+   {
+       int         numattr;
+       Oid         oidtype;
+       Datum       txt_toasted;
+       bool        isnull;
+       text       *txt;
+
+       numattr = SPI_fnumber(rel->rd_att, trigger->tgargs[i]);
+       if (numattr == SPI_ERROR_NOATTRIBUTE)
+       {
+           funcoid=findFunc(trigger->tgargs[i]);
+           if ( funcoid==InvalidOid )
+               elog(ERROR,"TSearch: can't find function or field '%s'",trigger->tgargs[i]);
+           continue;
+       }
+       oidtype = SPI_gettypeid(rel->rd_att, numattr);
+       /* We assume char() and varchar() are binary-equivalent to text */
+       if (!(oidtype == TEXTOID ||
+             oidtype == VARCHAROID ||
+             oidtype == BPCHAROID))
+       {
+           elog(WARNING, "TSearch: '%s' is not of character type",
+                trigger->tgargs[i]);
+           continue;
+       }
+       txt_toasted = SPI_getbinval(rettuple, rel->rd_att, numattr, &isnull);
+       if (isnull)
+           continue;
+
+       if ( funcoid!=InvalidOid ) {
+           text *txttmp = (text *) DatumGetPointer( OidFunctionCall1(
+               funcoid,
+               PointerGetDatum(txt_toasted)
+           ));
+           txt = (text *) DatumGetPointer(PG_DETOAST_DATUM(PointerGetDatum(txttmp)));
+           if ( txt == txttmp )
+               txt_toasted = PointerGetDatum(txt);
+       } else
+            txt = (text *) DatumGetPointer(PG_DETOAST_DATUM(PointerGetDatum(txt_toasted)));
+
+       parsetext_v2(cfg, &prs, VARDATA(txt), VARSIZE(txt) - VARHDRSZ);
+       if (txt != (text*)DatumGetPointer(txt_toasted) )
+           pfree(txt);
+   }
+
+   /* make tsvector value */
+   if (prs.curwords)
+   {
+       datum = PointerGetDatum(makevalue(&prs));
+       rettuple = SPI_modifytuple(rel, rettuple, 1, &numidxattr,
+                                  &datum, NULL);
+       pfree(DatumGetPointer(datum));
+   }
+   else
+   {
+       tsvector *out = palloc(CALCDATASIZE(0,0));
+       out->len = CALCDATASIZE(0,0);
+       out->size = 0;
+       datum = PointerGetDatum(out);
+       pfree(prs.words);
+       rettuple = SPI_modifytuple(rel, rettuple, 1, &numidxattr,
+                                  &datum, NULL);
+   }
+
+   if (rettuple == NULL)
+       elog(ERROR, "TSearch: %d returned by SPI_modifytuple", SPI_result);
+
+   return PointerGetDatum(rettuple);
+}


diff --git a/contrib/tsearch2/tsvector.h b/contrib/tsearch2/tsvector.h

new file mode 100644 (file)

index 0000000..31e6a4b


--- /dev/null
+++ b/contrib/tsearch2/tsvector.h
@@ -0,0 +1,71 @@
+#ifndef __TXTIDX_H__
+#define __TXTIDX_H__
+
+/*
+#define TXTIDX_DEBUG
+*/
+
+#include "postgres.h"
+
+#include "access/gist.h"
+#include "access/itup.h"
+#include "utils/elog.h"
+#include "utils/palloc.h"
+#include "utils/builtins.h"
+#include "storage/bufpage.h"
+
+typedef struct {
+   uint32
+       haspos:1,
+       len:11, /* MAX 2Kb */
+       pos:20; /* MAX 1Mb */
+}  WordEntry;
+#define MAXSTRLEN ( 1<<11 )
+#define MAXSTRPOS ( 1<<20 )
+
+typedef struct {
+   uint16
+       weight:2,
+       pos:14;
+} WordEntryPos;
+#define MAXENTRYPOS    (1<<14)
+#define MAXNUMPOS  256
+#define LIMITPOS(x)    ( ( (x) >= MAXENTRYPOS ) ? (MAXENTRYPOS-1) : (x) )
+
+typedef struct
+{
+   int4        len;
+   int4        size;
+   char        data[1];
+}  tsvector;
+
+#define DATAHDRSIZE (sizeof(int4)*2)
+#define CALCDATASIZE(x, lenstr) ( x * sizeof(WordEntry) + DATAHDRSIZE + lenstr )
+#define ARRPTR(x)  ( (WordEntry*) ( (char*)x + DATAHDRSIZE ) )
+#define STRPTR(x)  ( (char*)x + DATAHDRSIZE + ( sizeof(WordEntry) * ((tsvector*)x)->size ) )
+#define STRSIZE(x) ( ((tsvector*)x)->len - DATAHDRSIZE - ( sizeof(WordEntry) * ((tsvector*)x)->size ) )
+#define _POSDATAPTR(x,e)   (STRPTR(x)+((WordEntry*)(e))->pos+SHORTALIGN(((WordEntry*)(e))->len))
+#define POSDATALEN(x,e) ( ( ((WordEntry*)(e))->haspos ) ? (*(uint16*)_POSDATAPTR(x,e)) : 0 ) 
+#define POSDATAPTR(x,e)    ( (WordEntryPos*)( _POSDATAPTR(x,e)+sizeof(uint16) ) )
+
+
+typedef struct {
+   WordEntry   entry;
+   WordEntryPos    *pos;
+}  WordEntryIN;
+
+typedef struct
+{
+   char       *prsbuf;
+   char       *word;
+   char       *curpos;
+   int4        len;
+   int4        state;
+   int4        alen;
+   WordEntryPos    *pos;
+   bool        oprisdelim;
+}  TI_IN_STATE;
+
+int4       gettoken_tsvector(TI_IN_STATE * state);
+
+#endif


diff --git a/contrib/tsearch2/tsvector_op.c b/contrib/tsearch2/tsvector_op.c

new file mode 100644 (file)

index 0000000..3f38014


--- /dev/null
+++ b/contrib/tsearch2/tsvector_op.c
@@ -0,0 +1,264 @@
+/*
+ * Operations for tsvector type
+ * Teodor Sigaev 
+ */
+#include "postgres.h"
+
+#include "access/gist.h"
+#include "access/itup.h"
+#include "utils/elog.h"
+#include "utils/palloc.h"
+#include "utils/builtins.h"
+#include "storage/bufpage.h"
+#include "executor/spi.h"
+#include "commands/trigger.h"
+#include "nodes/pg_list.h"
+#include "catalog/namespace.h"
+
+#include "utils/pg_locale.h"
+
+#include              /* tolower */
+#include "tsvector.h"
+#include "query.h"
+#include "ts_cfg.h"
+#include "common.h"
+
+PG_FUNCTION_INFO_V1(strip);
+Datum      strip(PG_FUNCTION_ARGS);
+
+PG_FUNCTION_INFO_V1(setweight);
+Datum      setweight(PG_FUNCTION_ARGS);
+
+PG_FUNCTION_INFO_V1(concat);
+Datum      concat(PG_FUNCTION_ARGS);
+
+Datum
+strip(PG_FUNCTION_ARGS)
+{
+   tsvector       *in = (tsvector *) PG_DETOAST_DATUM(PG_GETARG_DATUM(0));
+   tsvector    *out;
+   int i,len=0;
+   WordEntry *arrin=ARRPTR(in), *arrout;
+   char *cur;
+
+   for(i=0;isize;i++) 
+       len += SHORTALIGN( arrin[i].len );
+
+   len = CALCDATASIZE(in->size, len);
+   out=(tsvector*)palloc(len);
+   memset(out,0,len);
+   out->len=len;
+   out->size=in->size;
+   arrout=ARRPTR(out);
+   cur=STRPTR(out);
+   for(i=0;isize;i++) {
+       memcpy(cur, STRPTR(in)+arrin[i].pos, arrin[i].len);
+       arrout[i].haspos = 0;
+       arrout[i].len = arrin[i].len;
+       arrout[i].pos = cur - STRPTR(out);
+       cur += SHORTALIGN( arrout[i].len );
+   }
+       
+   PG_FREE_IF_COPY(in, 0);
+   PG_RETURN_POINTER(out);
+}
+
+Datum
+setweight(PG_FUNCTION_ARGS)
+{
+   tsvector       *in = (tsvector *) PG_DETOAST_DATUM(PG_GETARG_DATUM(0));
+   char       cw = PG_GETARG_CHAR(1);
+   tsvector    *out;
+   int i,j;
+   WordEntry *entry;
+   WordEntryPos *p;
+   int w=0;
+
+   switch(tolower(cw)) {
+       case 'a': w=3; break;
+       case 'b': w=2; break;
+       case 'c': w=1; break;
+       case 'd': w=0; break;
+       default: elog(ERROR,"Unknown weight");
+   }
+
+   out=(tsvector*)palloc(in->len);
+   memcpy(out,in,in->len);
+   entry=ARRPTR(out);
+   i=out->size;    
+   while(i--) {
+       if ( (j=POSDATALEN(out,entry)) != 0 ) {
+           p=POSDATAPTR(out,entry);
+           while(j--) {
+               p->weight=w;
+               p++;
+           }
+       }
+       entry++;
+   }
+       
+   PG_FREE_IF_COPY(in, 0);
+   PG_RETURN_POINTER(out);
+}
+
+static int
+compareEntry(char *ptra, WordEntry* a, char *ptrb, WordEntry* b)
+{
+        if ( a->len == b->len)
+        {
+                return strncmp(
+                                           ptra + a->pos,
+                                           ptrb + b->pos,
+                                           a->len);
+        }
+        return ( a->len > b->len ) ? 1 : -1;
+}
+
+static int4
+add_pos(tsvector *src, WordEntry *srcptr, tsvector *dest, WordEntry *destptr, int4 maxpos ) {
+   uint16 *clen = (uint16*)_POSDATAPTR(dest,destptr);
+   int i;
+   uint16 slen = POSDATALEN(src, srcptr), startlen;
+   WordEntryPos *spos=POSDATAPTR(src, srcptr), *dpos=POSDATAPTR(dest,destptr);
+
+   if ( ! destptr->haspos ) 
+       *clen=0;
+
+   startlen = *clen;
+   for(i=0; i
+       dpos[ *clen ].weight = spos[i].weight; 
+       dpos[ *clen ].pos    = LIMITPOS(spos[i].pos + maxpos);
+       (*clen)++;
+   }
+
+   if ( *clen != startlen )
+       destptr->haspos=1; 
+   return  *clen - startlen;
+}
+
+
+Datum
+concat(PG_FUNCTION_ARGS) {
+   tsvector       *in1 = (tsvector *) PG_DETOAST_DATUM(PG_GETARG_DATUM(0));
+   tsvector       *in2 = (tsvector *) PG_DETOAST_DATUM(PG_GETARG_DATUM(1));
+   tsvector       *out;
+   WordEntry *ptr;
+   WordEntry *ptr1,*ptr2;
+   WordEntryPos *p;
+   int maxpos=0,i,j,i1,i2;
+   char *cur;
+   char *data,*data1,*data2;
+
+   ptr=ARRPTR(in1);
+   i=in1->size;
+   while(i--) {
+       if ( (j=POSDATALEN(in1,ptr)) != 0 ) {
+           p=POSDATAPTR(in1,ptr);
+           while(j--) {
+               if ( p->pos > maxpos ) 
+                   maxpos = p->pos;
+               p++;
+           }
+       }
+       ptr++;
+   }
+   
+   ptr1=ARRPTR(in1); ptr2=ARRPTR(in2);
+   data1=STRPTR(in1); data2=STRPTR(in2);
+   i1=in1->size;   i2=in2->size;
+   out=(tsvector*)palloc( in1->len + in2->len );
+   memset(out,0,in1->len + in2->len);
+   out->len = in1->len + in2->len;
+   out->size = in1->size + in2->size;
+   data=cur=STRPTR(out);
+   ptr=ARRPTR(out);
+   while( i1 && i2 ) {
+       int cmp=compareEntry(data1,ptr1,data2,ptr2);
+       if ( cmp < 0 ) { /* in1 first */
+           ptr->haspos = ptr1->haspos;
+           ptr->len = ptr1->len;
+           memcpy( cur, data1 + ptr1->pos, ptr1->len );
+           ptr->pos = cur - data; 
+           cur+=SHORTALIGN(ptr1->len);
+           if ( ptr->haspos ) {
+               memcpy(cur, _POSDATAPTR(in1, ptr1), POSDATALEN(in1, ptr1)*sizeof(WordEntryPos) + sizeof(uint16));
+               cur+=POSDATALEN(in1, ptr1)*sizeof(WordEntryPos) + sizeof(uint16);
+           }
+           ptr++; ptr1++; i1--;
+       } else if ( cmp>0 ) { /* in2 first */ 
+           ptr->haspos = ptr2->haspos;
+           ptr->len = ptr2->len;
+           memcpy( cur, data2 + ptr2->pos, ptr2->len );
+           ptr->pos = cur - data; 
+           cur+=SHORTALIGN(ptr2->len);
+           if ( ptr->haspos ) {
+               int addlen = add_pos(in2, ptr2, out, ptr, maxpos );
+               if ( addlen == 0 )
+                   ptr->haspos=0;
+               else
+                   cur += addlen*sizeof(WordEntryPos) + sizeof(uint16); 
+           }
+           ptr++; ptr2++; i2--;
+       } else {
+           ptr->haspos = ptr1->haspos | ptr2->haspos;
+           ptr->len = ptr1->len;
+           memcpy( cur, data1 + ptr1->pos, ptr1->len );
+           ptr->pos = cur - data; 
+           cur+=SHORTALIGN(ptr1->len);
+           if ( ptr->haspos ) {
+               if ( ptr1->haspos ) {
+                   memcpy(cur, _POSDATAPTR(in1, ptr1), POSDATALEN(in1, ptr1)*sizeof(WordEntryPos) + sizeof(uint16));
+                   cur+=POSDATALEN(in1, ptr1)*sizeof(WordEntryPos) + sizeof(uint16);
+                   if ( ptr2->haspos )
+                       cur += add_pos(in2, ptr2, out, ptr, maxpos )*sizeof(WordEntryPos);
+               } else if ( ptr2->haspos ) {
+                   int addlen = add_pos(in2, ptr2, out, ptr, maxpos );
+                   if ( addlen == 0 )
+                       ptr->haspos=0;
+                   else
+                       cur += addlen*sizeof(WordEntryPos) + sizeof(uint16); 
+               }
+           }
+           ptr++; ptr1++; ptr2++; i1--; i2--;
+       }
+   }
+
+   while(i1) {
+       ptr->haspos = ptr1->haspos;
+       ptr->len = ptr1->len;
+       memcpy( cur, data1 + ptr1->pos, ptr1->len );
+       ptr->pos = cur - data; 
+       cur+=SHORTALIGN(ptr1->len);
+       if ( ptr->haspos ) {
+           memcpy(cur, _POSDATAPTR(in1, ptr1), POSDATALEN(in1, ptr1)*sizeof(WordEntryPos) + sizeof(uint16));
+           cur+=POSDATALEN(in1, ptr1)*sizeof(WordEntryPos) + sizeof(uint16);
+       }
+       ptr++; ptr1++; i1--;
+   }
+
+   while(i2) {
+       ptr->haspos = ptr2->haspos;
+       ptr->len = ptr2->len;
+       memcpy( cur, data2 + ptr2->pos, ptr2->len );
+       ptr->pos = cur - data; 
+       cur+=SHORTALIGN(ptr2->len);
+       if ( ptr->haspos ) {
+           int addlen = add_pos(in2, ptr2, out, ptr, maxpos );
+           if ( addlen == 0 )
+               ptr->haspos=0;
+           else
+               cur += addlen*sizeof(WordEntryPos) + sizeof(uint16); 
+       }
+       ptr++; ptr2++; i2--;
+   }
+   
+   out->size=ptr-ARRPTR(out);
+   out->len = CALCDATASIZE( out->size, cur-data );
+   if ( data != STRPTR(out) )
+       memmove( STRPTR(out), data, cur-data );
+
+   PG_FREE_IF_COPY(in1, 0);
+   PG_FREE_IF_COPY(in2, 1);
+   PG_RETURN_POINTER(out);
+}
+


diff --git a/contrib/tsearch2/untsearch.sql.in b/contrib/tsearch2/untsearch.sql.in

new file mode 100644 (file)

index 0000000..a4fe145


--- /dev/null
+++ b/contrib/tsearch2/untsearch.sql.in
@@ -0,0 +1,62 @@
+BEGIN;
+
+--Be careful !!!
+--script drops all indices, triggers and columns with types defined
+--in tsearch2.sql
+
+
+DROP OPERATOR CLASS gist_tsvector_ops USING gist CASCADE;
+
+
+DROP OPERATOR || (tsvector, tsvector);
+DROP OPERATOR @@ (tsvector, tsquery);
+DROP OPERATOR @@ (tsquery, tsvector);
+
+DROP AGGREGATE stat(tsvector);
+
+DROP TABLE pg_ts_dict;
+DROP TABLE pg_ts_parser;
+DROP TABLE pg_ts_cfg;
+DROP TABLE pg_ts_cfgmap;
+
+DROP TYPE tokentype CASCADE;
+DROP TYPE tokenout CASCADE;
+DROP TYPE tsvector CASCADE;
+DROP TYPE tsquery CASCADE;
+DROP TYPE gtsvector CASCADE;
+DROP TYPE tsstat CASCADE;
+DROP TYPE statinfo CASCADE;
+
+DROP FUNCTION lexize(oid, text) ;
+DROP FUNCTION lexize(text, text);
+DROP FUNCTION lexize(text);
+DROP FUNCTION set_curdict(int);
+DROP FUNCTION set_curdict(text);
+DROP FUNCTION dex_init(text);
+DROP FUNCTION dex_lexize(internal,internal,int4);
+DROP FUNCTION snb_en_init(text);
+DROP FUNCTION snb_lexize(internal,internal,int4);
+DROP FUNCTION snb_ru_init(text);
+DROP FUNCTION spell_init(text);
+DROP FUNCTION spell_lexize(internal,internal,int4);
+DROP FUNCTION syn_init(text);
+DROP FUNCTION syn_lexize(internal,internal,int4);
+DROP FUNCTION set_curprs(int);
+DROP FUNCTION set_curprs(text);
+DROP FUNCTION prsd_start(internal,int4);
+DROP FUNCTION prsd_getlexeme(internal,internal,internal);
+DROP FUNCTION prsd_end(internal);
+DROP FUNCTION prsd_lextype(internal);
+DROP FUNCTION prsd_headline(internal,internal,internal);
+DROP FUNCTION set_curcfg(int);
+DROP FUNCTION set_curcfg(text);
+DROP FUNCTION show_curcfg();
+DROP FUNCTION gtsvector_compress(internal);
+DROP FUNCTION gtsvector_decompress(internal);
+DROP FUNCTION gtsvector_penalty(internal,internal,internal);
+DROP FUNCTION gtsvector_picksplit(internal, internal);
+DROP FUNCTION gtsvector_union(bytea, internal);
+DROP FUNCTION reset_tsearch();
+DROP FUNCTION tsearch2() CASCADE;
+
+END;


diff --git a/contrib/tsearch2/wordparser/deflex.c b/contrib/tsearch2/wordparser/deflex.c

new file mode 100644 (file)

index 0000000..ea596c5


--- /dev/null
+++ b/contrib/tsearch2/wordparser/deflex.c
@@ -0,0 +1,56 @@
+#include "deflex.h"
+
+const char *lex_descr[]={
+   "",
+   "Latin word",
+   "Non-latin word",
+   "Word",
+   "Email",
+   "URL",
+   "Host",
+   "Scientific notation",
+   "VERSION",
+   "Part of hyphenated word",
+   "Non-latin part of hyphenated word",
+   "Latin part of hyphenated word",
+   "Space symbols",
+   "HTML Tag",
+   "HTTP head",
+   "Hyphenated word",
+   "Latin hyphenated word",
+   "Non-latin hyphenated word",
+   "URI",
+   "File or path name",
+   "Decimal notation",
+   "Signed integer",
+   "Unsigned integer",
+   "HTML Entity"
+};
+
+const char *tok_alias[]={
+   "",
+   "lword",
+   "nlword",
+   "word",
+   "email",
+   "url",
+   "host",
+   "sfloat",
+   "version",
+   "part_hword",
+   "nlpart_hword",
+   "lpart_hword",
+   "blank",
+   "tag",
+   "http",
+   "hword",
+   "lhword",
+   "nlhword",
+   "uri",
+   "file",
+   "float",
+   "int",
+   "uint",
+   "entity"
+};
+


diff --git a/contrib/tsearch2/wordparser/deflex.h b/contrib/tsearch2/wordparser/deflex.h

new file mode 100644 (file)

index 0000000..651d1f9


--- /dev/null
+++ b/contrib/tsearch2/wordparser/deflex.h
@@ -0,0 +1,34 @@
+#ifndef __DEFLEX_H__
+#define __DEFLEX_H__
+
+/* rememder !!!! */
+#define LASTNUM        23
+
+#define LATWORD        1
+#define CYRWORD        2
+#define UWORD      3
+#define EMAIL      4
+#define FURL       5
+#define HOST       6
+#define SCIENTIFIC 7
+#define VERSIONNUMBER  8
+#define PARTHYPHENWORD 9
+#define CYRPARTHYPHENWORD  10
+#define LATPARTHYPHENWORD  11
+#define SPACE      12
+#define TAG            13
+#define HTTP       14
+#define HYPHENWORD 15
+#define LATHYPHENWORD  16
+#define CYRHYPHENWORD  17
+#define URI        18
+#define FILEPATH   19
+#define DECIMAL        20
+#define SIGNEDINT  21
+#define UNSIGNEDINT 22
+#define HTMLENTITY 23
+
+extern const char *lex_descr[];
+extern const char *tok_alias[];
+
+#endif


diff --git a/contrib/tsearch2/wordparser/parser.h b/contrib/tsearch2/wordparser/parser.h

new file mode 100644 (file)

index 0000000..55cf005


--- /dev/null
+++ b/contrib/tsearch2/wordparser/parser.h
@@ -0,0 +1,11 @@
+#ifndef __PARSER_H__
+#define __PARSER_H__
+
+char      *token;
+int            tokenlen;
+int            tsearch2_yylex(void);
+void       start_parse_str(char *, int);
+void       start_parse_fh(FILE *, int);
+void       end_parse(void);
+
+#endif


diff --git a/contrib/tsearch2/wordparser/parser.l b/contrib/tsearch2/wordparser/parser.l

new file mode 100644 (file)

index 0000000..49824f5


--- /dev/null
+++ b/contrib/tsearch2/wordparser/parser.l
@@ -0,0 +1,346 @@
+%{
+#include "postgres.h"
+
+#include "deflex.h"
+#include "parser.h"
+#include "common.h"
+
+/* Avoid exit() on fatal scanner errors */
+#define fprintf(file, fmt, msg)  ts_error(ERROR, fmt, msg)
+
+/* postgres allocation function */
+#define free    pfree
+#define malloc  palloc
+#define realloc repalloc
+
+#ifdef strdup
+#undef strdup
+#endif
+#define strdup  pstrdup
+
+char *token = NULL;  /* pointer to token */
+char *s     = NULL;  /* to return WHOLE hyphenated-word */
+
+YY_BUFFER_STATE buf = NULL; /* buffer to parse; it need for parse from string */
+
+int lrlimit = -1;  /* for limiting read from filehandle ( -1 - unlimited read ) */
+int bytestoread = 0;   /* for limiting read from filehandle */
+
+/* redefine macro for read limited length */
+#define YY_INPUT(buf,result,max_size) \
+   if ( yy_current_buffer->yy_is_interactive ) { \
+                int c = '*', n; \
+                for ( n = 0; n < max_size && \
+                             (c = getc( tsearch2_yyin )) != EOF && c != '\n'; ++n ) \
+                        buf[n] = (char) c; \
+                if ( c == '\n' ) \
+                        buf[n++] = (char) c; \
+                if ( c == EOF && ferror( tsearch2_yyin ) ) \
+                        YY_FATAL_ERROR( "input in flex scanner failed" ); \
+                result = n; \
+        }  else { \
+       if ( lrlimit == 0 ) \
+           result=YY_NULL; \
+       else { \
+           if ( lrlimit>0 ) { \
+               bytestoread = ( lrlimit > max_size ) ? max_size : lrlimit; \
+               lrlimit -= bytestoread; \
+           } else \
+               bytestoread = max_size; \
+               if ( ((result = fread( buf, 1, bytestoread, tsearch2_yyin )) == 0) \
+                       && ferror( tsearch2_yyin ) ) \
+                       YY_FATAL_ERROR( "input in flex scanner failed" ); \
+       } \
+   }
+
+%}
+
+%option 8bit
+%option never-interactive
+%option nounput
+%option noyywrap
+
+/* parser's state for parsing hyphenated-word */
+%x DELIM  
+/* parser's state for parsing URL*/
+%x URL  
+%x SERVER  
+
+/* parser's state for parsing TAGS */
+%x INTAG
+%x QINTAG
+%x INCOMMENT
+%x INSCRIPT
+
+/* cyrillic koi8 char */
+CYRALNUM   [0-9\200-\377]
+CYRALPHA   [\200-\377]
+ALPHA      [a-zA-Z\200-\377]
+ALNUM      [0-9a-zA-Z\200-\377]
+
+
+HOSTNAME   ([-_[:alnum:]]+\.)+[[:alpha:]]+
+URI        [-_[:alnum:]/%,\.;=&?#]+
+
+%%
+
+"<"[Ss][Cc][Rr][Ii][Pp][Tt] { BEGIN INSCRIPT; }
+
+"" {
+   BEGIN INITIAL; 
+   *tsearch2_yytext=' '; *(tsearch2_yytext+1) = '\0'; 
+   token = tsearch2_yytext;
+   tokenlen = tsearch2_yyleng;
+   return SPACE;
+}
+
+""   { 
+   BEGIN INITIAL;
+   *tsearch2_yytext=' '; *(tsearch2_yytext+1) = '\0'; 
+   token = tsearch2_yytext;
+   tokenlen = tsearch2_yyleng;
+   return SPACE;
+}
+
+
+"<"[\![:alpha:]]   { BEGIN INTAG; }
+
+"
+
+"\""    { BEGIN QINTAG; }
+
+"\\\"" ;
+
+"\""   { BEGIN INTAG; }
+
+">" { 
+   BEGIN INITIAL;
+   token = tsearch2_yytext;
+   *tsearch2_yytext=' '; 
+   token = tsearch2_yytext;
+   tokenlen = 1;
+   return TAG;
+}
+
+.|\n  ;
+
+\&(quot|amp|nbsp|lt|gt)\;   {
+   token = tsearch2_yytext;
+   tokenlen = tsearch2_yyleng;
+   return HTMLENTITY;
+}
+
+\&\#[0-9][0-9]?[0-9]?\; {
+   token = tsearch2_yytext;
+   tokenlen = tsearch2_yyleng;
+   return HTMLENTITY;
+}
+ 
+[-_\.[:alnum:]]+@{HOSTNAME}  /* Emails */ { 
+   token = tsearch2_yytext; 
+   tokenlen = tsearch2_yyleng;
+   return EMAIL; 
+}
+
+[+-]?[0-9]+(\.[0-9]+)?[eEdD][+-]?[0-9]+  /* float */   { 
+   token = tsearch2_yytext; 
+   tokenlen = tsearch2_yyleng;
+   return SCIENTIFIC; 
+}
+
+[0-9]+\.[0-9]+\.[0-9\.]*[0-9] {
+   token = tsearch2_yytext;
+   tokenlen = tsearch2_yyleng;
+   return VERSIONNUMBER;
+}
+
+[+-]?[0-9]+\.[0-9]+ {
+   token = tsearch2_yytext;
+   tokenlen = tsearch2_yyleng;
+   return DECIMAL;
+}
+
+[+-][0-9]+ { 
+   token = tsearch2_yytext; 
+   tokenlen = tsearch2_yyleng;
+   return SIGNEDINT; 
+}
+
+[0-9]+ { 
+   token = tsearch2_yytext; 
+   tokenlen = tsearch2_yyleng;
+   return UNSIGNEDINT; 
+}
+
+http"://"        { 
+   BEGIN URL; 
+   token = tsearch2_yytext;
+   tokenlen = tsearch2_yyleng;
+   return HTTP;
+}
+
+ftp"://"        { 
+   BEGIN URL; 
+   token = tsearch2_yytext;
+   tokenlen = tsearch2_yyleng;
+   return HTTP;
+}
+
+{HOSTNAME}[/:]{URI} { 
+   BEGIN SERVER;
+   if (s) { free(s); s=NULL; } 
+   s = strdup( tsearch2_yytext ); 
+   tokenlen = tsearch2_yyleng;
+   yyless( 0 ); 
+   token = s;
+   return FURL;
+}
+
+{HOSTNAME} {
+   token = tsearch2_yytext; 
+   tokenlen = tsearch2_yyleng;
+   return HOST;
+}
+
+[/:]{URI}  {
+   token = tsearch2_yytext;
+   tokenlen = tsearch2_yyleng;
+   return URI;
+}
+
+[[:alnum:]\./_-]+"/"[[:alnum:]\./_-]+ {
+   token = tsearch2_yytext;
+   tokenlen = tsearch2_yyleng;
+   return FILEPATH;
+}
+
+({CYRALPHA}+-)+{CYRALPHA}+ /* composite-word */    {
+   BEGIN DELIM;
+   if (s) { free(s); s=NULL; } 
+   s = strdup( tsearch2_yytext );
+   tokenlen = tsearch2_yyleng;
+   yyless( 0 );
+   token = s;
+   return CYRHYPHENWORD;
+}
+
+([[:alpha:]]+-)+[[:alpha:]]+ /* composite-word */  {
+    BEGIN DELIM;
+   if (s) { free(s); s=NULL; } 
+   s = strdup( tsearch2_yytext );
+   tokenlen = tsearch2_yyleng;
+   yyless( 0 );
+   token = s;
+   return LATHYPHENWORD;
+}
+
+({ALNUM}+-)+{ALNUM}+ /* composite-word */  {
+   BEGIN DELIM;
+   if (s) { free(s); s=NULL; } 
+   s = strdup( tsearch2_yytext );
+   tokenlen = tsearch2_yyleng;
+   yyless( 0 );
+   token = s;
+   return HYPHENWORD;
+}
+
+[0-9]+\.[0-9]+\.[0-9\.]*[0-9] {
+   token = tsearch2_yytext;
+   tokenlen = tsearch2_yyleng;
+   return VERSIONNUMBER;
+}
+
+\+?[0-9]+\.[0-9]+ {
+   token = tsearch2_yytext;
+   tokenlen = tsearch2_yyleng;
+   return DECIMAL;
+}
+
+{CYRALPHA}+  /* one word in composite-word */   { 
+   token = tsearch2_yytext; 
+   tokenlen = tsearch2_yyleng;
+   return CYRPARTHYPHENWORD; 
+}
+
+[[:alpha:]]+  /* one word in composite-word */  { 
+   token = tsearch2_yytext; 
+   tokenlen = tsearch2_yyleng;
+   return LATPARTHYPHENWORD; 
+}
+
+{ALNUM}+  /* one word in composite-word */  { 
+   token = tsearch2_yytext; 
+   tokenlen = tsearch2_yyleng;
+   return PARTHYPHENWORD; 
+}
+
+-  { 
+   token = tsearch2_yytext;
+   tokenlen = tsearch2_yyleng;
+   return SPACE;
+}
+
+.|\n /* return in basic state */ {
+   BEGIN INITIAL;
+   yyless( 0 );
+}
+
+{CYRALPHA}+ /* normal word */  { 
+   token = tsearch2_yytext; 
+   tokenlen = tsearch2_yyleng;
+   return CYRWORD; 
+}
+
+[[:alpha:]]+ /* normal word */ { 
+   token = tsearch2_yytext; 
+   tokenlen = tsearch2_yyleng;
+   return LATWORD; 
+}
+
+{ALNUM}+ /* normal word */ { 
+   token = tsearch2_yytext; 
+   tokenlen = tsearch2_yyleng;
+   return UWORD; 
+}
+
+[ \r\n\t]+ {
+   token = tsearch2_yytext;
+   tokenlen = tsearch2_yyleng;
+   return SPACE;
+}
+
+. {
+   token = tsearch2_yytext;
+   tokenlen = tsearch2_yyleng;
+   return SPACE;
+} 
+
+%%
+
+/* clearing after parsing from string */
+void end_parse() {
+   if (s) { free(s); s=NULL; } 
+   tsearch2_yy_delete_buffer( buf );
+   buf = NULL;
+} 
+
+/* start parse from string */
+void start_parse_str(char* str, int limit) {
+   if (buf) end_parse();
+   buf = tsearch2_yy_scan_bytes( str, limit );
+   tsearch2_yy_switch_to_buffer( buf );
+   BEGIN INITIAL;
+}
+
+/* start parse from filehandle */
+void start_parse_fh( FILE* fh, int limit ) {
+   if (buf) end_parse();
+   lrlimit = ( limit ) ? limit : -1;
+   buf = tsearch2_yy_create_buffer( fh, YY_BUF_SIZE );
+   tsearch2_yy_switch_to_buffer( buf );
+   BEGIN INITIAL;
+}
+
+


diff --git a/contrib/tsearch2/wparser.c b/contrib/tsearch2/wparser.c

new file mode 100644 (file)

index 0000000..deff94c


--- /dev/null
+++ b/contrib/tsearch2/wparser.c
@@ -0,0 +1,529 @@
+/* 
+ * interface functions to parser 
+ * Teodor Sigaev 
+ */
+#include 
+#include 
+#include 
+#include 
+
+#include "postgres.h"
+#include "fmgr.h"
+#include "utils/array.h"
+#include "catalog/pg_type.h"
+#include "executor/spi.h"
+#include "funcapi.h"
+
+#include "wparser.h"
+#include "ts_cfg.h"
+#include "snmap.h"
+#include "common.h"
+
+/*********top interface**********/
+
+static void *plan_getparser=NULL;
+static Oid current_parser_id=InvalidOid;
+
+void
+init_prs(Oid id, WParserInfo *prs) {
+   Oid arg[1]={ OIDOID };
+   bool isnull;
+   Datum pars[1]={ ObjectIdGetDatum(id) };
+   int stat;
+
+   memset(prs,0,sizeof(WParserInfo));
+   SPI_connect();
+   if ( !plan_getparser ) {
+       plan_getparser = SPI_saveplan( SPI_prepare( "select prs_start, prs_nexttoken, prs_end, prs_lextype, prs_headline from pg_ts_parser where oid = $1" , 1, arg ) );
+       if ( !plan_getparser ) 
+           ts_error(ERROR, "SPI_prepare() failed");
+   }
+
+   stat = SPI_execp(plan_getparser, pars, " ", 1);
+   if ( stat < 0 )
+       ts_error (ERROR, "SPI_execp return %d", stat);
+   if ( SPI_processed > 0 ) {
+       Oid oid=InvalidOid;
+       oid=DatumGetObjectId( SPI_getbinval(SPI_tuptable->vals[0], SPI_tuptable->tupdesc, 1, &isnull) );
+       fmgr_info_cxt(oid, &(prs->start_info), TopMemoryContext);
+       oid=DatumGetObjectId( SPI_getbinval(SPI_tuptable->vals[0], SPI_tuptable->tupdesc, 2, &isnull) );
+       fmgr_info_cxt(oid, &(prs->getlexeme_info), TopMemoryContext);
+       oid=DatumGetObjectId( SPI_getbinval(SPI_tuptable->vals[0], SPI_tuptable->tupdesc, 3, &isnull) );
+       fmgr_info_cxt(oid, &(prs->end_info), TopMemoryContext);
+       prs->lextype=DatumGetObjectId( SPI_getbinval(SPI_tuptable->vals[0], SPI_tuptable->tupdesc, 4, &isnull) );
+       oid=DatumGetObjectId( SPI_getbinval(SPI_tuptable->vals[0], SPI_tuptable->tupdesc, 5, &isnull) );
+       fmgr_info_cxt(oid, &(prs->headline_info), TopMemoryContext);
+       prs->prs_id=id;
+   } else 
+       ts_error(ERROR, "No parser with id %d", id);
+   SPI_finish();
+}
+
+typedef struct {
+   WParserInfo *last_prs;
+   int     len;
+   int     reallen;
+   WParserInfo *list;
+   SNMap       name2id_map;
+} PrsList;
+
+static PrsList PList = {NULL,0,0,NULL,{0,0,NULL}};
+
+void    
+reset_prs(void) {
+   freeSNMap( &(PList.name2id_map) );
+   if ( PList.list )
+       free(PList.list);
+   memset(&PList,0,sizeof(PrsList));
+}
+
+static int
+compareprs(const void *a, const void *b) {
+   return ((WParserInfo*)a)->prs_id - ((WParserInfo*)b)->prs_id;
+}
+
+WParserInfo *
+findprs(Oid id) {
+   /* last used prs */
+   if ( PList.last_prs && PList.last_prs->prs_id==id )
+       return PList.last_prs;
+
+   /* already used prs */
+   if ( PList.len != 0 ) {
+       WParserInfo key;
+       key.prs_id=id;
+       PList.last_prs = bsearch(&key, PList.list, PList.len, sizeof(WParserInfo), compareprs);
+       if ( PList.last_prs != NULL )
+           return PList.last_prs;
+   }
+
+   /* last chance */
+   if ( PList.len==PList.reallen ) {
+       WParserInfo *tmp;
+       int reallen = ( PList.reallen ) ? 2*PList.reallen : 16;
+       tmp=(WParserInfo*)realloc(PList.list,sizeof(WParserInfo)*reallen);
+       if ( !tmp ) 
+           ts_error(ERROR,"No memory");
+       PList.reallen=reallen;
+       PList.list=tmp;
+   }
+   PList.last_prs=&(PList.list[PList.len]);
+   init_prs(id, PList.last_prs);
+   PList.len++;
+   qsort(PList.list, PList.len, sizeof(WParserInfo), compareprs);
+   return findprs(id); /* qsort changed order!! */;
+}
+
+static void *plan_name2id=NULL;
+
+Oid
+name2id_prs(text *name) {
+   Oid arg[1]={ TEXTOID };
+   bool isnull;
+   Datum pars[1]={ PointerGetDatum(name) };
+   int stat;
+   Oid id=findSNMap_t( &(PList.name2id_map), name );
+   
+   if ( id ) 
+       return id;
+   
+
+   SPI_connect();
+   if ( !plan_name2id ) {
+       plan_name2id = SPI_saveplan( SPI_prepare( "select oid from pg_ts_parser where prs_name = $1" , 1, arg ) );
+       if ( !plan_name2id ) 
+           ts_error(ERROR, "SPI_prepare() failed");
+   }
+
+   stat = SPI_execp(plan_name2id, pars, " ", 1);
+   if ( stat < 0 )
+       ts_error (ERROR, "SPI_execp return %d", stat);
+   if ( SPI_processed > 0 )
+       id=DatumGetObjectId( SPI_getbinval(SPI_tuptable->vals[0], SPI_tuptable->tupdesc, 1, &isnull) );
+   else 
+       ts_error(ERROR, "No parser '%s'", text2char(name));
+   SPI_finish();
+   addSNMap_t( &(PList.name2id_map), name, id );
+   return id;
+}
+
+
+/******sql-level interface******/
+typedef struct {
+   int     cur;
+   LexDescr    *list;
+} TypeStorage;
+
+static void
+setup_firstcall(FuncCallContext  *funcctx, Oid prsid) {
+   TupleDesc            tupdesc;
+   MemoryContext     oldcontext;
+   TypeStorage     *st;
+   WParserInfo *prs = findprs(prsid); 
+
+   oldcontext = MemoryContextSwitchTo(funcctx->multi_call_memory_ctx);
+
+   st=(TypeStorage*)palloc( sizeof(TypeStorage) );
+   st->cur=0;
+   st->list = (LexDescr*)DatumGetPointer(
+       OidFunctionCall1( prs->lextype, PointerGetDatum(prs->prs) )
+   );
+   funcctx->user_fctx = (void*)st;
+   tupdesc = RelationNameGetTupleDesc("tokentype");
+   funcctx->slot = TupleDescGetSlot(tupdesc);
+   funcctx->attinmeta = TupleDescGetAttInMetadata(tupdesc);
+   MemoryContextSwitchTo(oldcontext);
+}
+
+static Datum
+process_call(FuncCallContext  *funcctx) {
+   TypeStorage     *st;
+
+   st=(TypeStorage*)funcctx->user_fctx;
+   if (  st->list && st->list[st->cur].lexid ) {
+       Datum result;
+       char* values[3];
+       char    txtid[16];
+       HeapTuple    tuple;
+
+       values[0]=txtid;
+       sprintf(txtid,"%d",st->list[st->cur].lexid);
+       values[1]=st->list[st->cur].alias;
+       values[2]=st->list[st->cur].descr;
+
+       tuple = BuildTupleFromCStrings(funcctx->attinmeta, values);
+       result = TupleGetDatum(funcctx->slot, tuple);
+
+       pfree(values[1]);
+       pfree(values[2]);
+       st->cur++;
+       return result;
+   } else {
+       if ( st->list ) pfree(st->list);
+       pfree(st);
+   }
+   return (Datum)0;
+}
+
+PG_FUNCTION_INFO_V1(token_type);
+Datum token_type(PG_FUNCTION_ARGS);
+
+Datum
+token_type(PG_FUNCTION_ARGS) {
+   FuncCallContext  *funcctx;
+   Datum result;
+
+   if (SRF_IS_FIRSTCALL()) { 
+       funcctx = SRF_FIRSTCALL_INIT();
+       setup_firstcall(funcctx, PG_GETARG_OID(0) );
+   }
+
+   funcctx = SRF_PERCALL_SETUP();
+
+   if (  (result=process_call(funcctx)) != (Datum)0 )
+       SRF_RETURN_NEXT(funcctx, result);
+   SRF_RETURN_DONE(funcctx);
+}
+
+PG_FUNCTION_INFO_V1(token_type_byname);
+Datum token_type_byname(PG_FUNCTION_ARGS);
+Datum
+token_type_byname(PG_FUNCTION_ARGS) {
+   FuncCallContext  *funcctx;
+   Datum result;
+
+   if (SRF_IS_FIRSTCALL()) {
+       text *name = PG_GETARG_TEXT_P(0); 
+       funcctx = SRF_FIRSTCALL_INIT();
+       setup_firstcall(funcctx, name2id_prs( name ) );
+       PG_FREE_IF_COPY(name,0);
+   }
+
+   funcctx = SRF_PERCALL_SETUP();
+
+   if (  (result=process_call(funcctx)) != (Datum)0 )
+       SRF_RETURN_NEXT(funcctx, result);
+   SRF_RETURN_DONE(funcctx);
+}
+
+PG_FUNCTION_INFO_V1(token_type_current);
+Datum token_type_current(PG_FUNCTION_ARGS);
+Datum
+token_type_current(PG_FUNCTION_ARGS) {
+   FuncCallContext  *funcctx;
+   Datum result;
+
+   if (SRF_IS_FIRSTCALL()) {
+       funcctx = SRF_FIRSTCALL_INIT();
+       if ( current_parser_id==InvalidOid ) 
+           current_parser_id = name2id_prs( char2text("default") );
+       setup_firstcall(funcctx, current_parser_id );
+   }
+
+   funcctx = SRF_PERCALL_SETUP();
+
+   if (  (result=process_call(funcctx)) != (Datum)0 )
+       SRF_RETURN_NEXT(funcctx, result);
+   SRF_RETURN_DONE(funcctx);
+}
+
+
+PG_FUNCTION_INFO_V1(set_curprs);
+Datum set_curprs(PG_FUNCTION_ARGS);
+Datum
+set_curprs(PG_FUNCTION_ARGS) {
+        findprs(PG_GETARG_OID(0));
+        current_parser_id=PG_GETARG_OID(0);
+        PG_RETURN_VOID();
+}
+
+PG_FUNCTION_INFO_V1(set_curprs_byname);
+Datum set_curprs_byname(PG_FUNCTION_ARGS);
+Datum
+set_curprs_byname(PG_FUNCTION_ARGS) {
+        text *name=PG_GETARG_TEXT_P(0);
+    
+        DirectFunctionCall1(
+                set_curprs,
+                ObjectIdGetDatum( name2id_prs(name) )
+        );
+        PG_FREE_IF_COPY(name, 0);
+        PG_RETURN_VOID();
+}
+
+typedef struct {
+   int type;
+   char    *lexem;
+} LexemEntry;
+
+typedef struct {
+   int cur;
+   int len;
+   LexemEntry  *list;
+} PrsStorage;
+   
+
+static void
+prs_setup_firstcall(FuncCallContext  *funcctx, int prsid, text *txt) {
+   TupleDesc            tupdesc;
+   MemoryContext     oldcontext;
+   PrsStorage  *st;
+   WParserInfo *prs = findprs(prsid); 
+   char    *lex=NULL;
+   int     llen=0, type=0; 
+
+   oldcontext = MemoryContextSwitchTo(funcctx->multi_call_memory_ctx);
+
+   st=(PrsStorage*)palloc( sizeof(PrsStorage) );
+   st->cur=0;
+   st->len=16;
+   st->list=(LexemEntry*)palloc( sizeof(LexemEntry)*st->len );
+
+   prs->prs = (void*)DatumGetPointer(
+       FunctionCall2(
+           &(prs->start_info),
+           PointerGetDatum(VARDATA(txt)),
+           Int32GetDatum(VARSIZE(txt)-VARHDRSZ)
+       )
+   );
+
+   while( ( type=DatumGetInt32(FunctionCall3(
+           &(prs->getlexeme_info),
+           PointerGetDatum(prs->prs),
+           PointerGetDatum(&lex),
+           PointerGetDatum(&llen))) ) != 0 ) {
+
+       if ( st->cur>=st->len ) {
+           st->len=2*st->len;
+           st->list=(LexemEntry*)repalloc(st->list, sizeof(LexemEntry)*st->len);
+       }
+       st->list[st->cur].lexem = palloc(llen+1);
+       memcpy( st->list[st->cur].lexem, lex, llen);
+       st->list[st->cur].lexem[llen]='\0';
+       st->list[st->cur].type=type;
+       st->cur++;
+   }
+       
+   FunctionCall1(
+       &(prs->end_info),
+       PointerGetDatum(prs->prs)
+   );
+
+   st->len=st->cur;
+   st->cur=0;
+   
+   funcctx->user_fctx = (void*)st;
+   tupdesc = RelationNameGetTupleDesc("tokenout");
+   funcctx->slot = TupleDescGetSlot(tupdesc);
+   funcctx->attinmeta = TupleDescGetAttInMetadata(tupdesc);
+   MemoryContextSwitchTo(oldcontext);
+}
+
+static Datum
+prs_process_call(FuncCallContext  *funcctx) {
+   PrsStorage  *st;
+
+   st=(PrsStorage*)funcctx->user_fctx;
+   if (  st->cur < st->len ) {
+       Datum result;
+       char* values[2];
+       char    tid[16];
+       HeapTuple    tuple;
+
+       values[0]=tid;
+       sprintf(tid,"%d",st->list[st->cur].type);
+       values[1]=st->list[st->cur].lexem;
+       tuple = BuildTupleFromCStrings(funcctx->attinmeta, values);
+       result = TupleGetDatum(funcctx->slot, tuple);
+
+       pfree(values[1]);
+       st->cur++;
+       return result;
+   } else {
+       if ( st->list ) pfree(st->list);
+       pfree(st);
+   }
+   return (Datum)0;
+}
+
+           
+
+PG_FUNCTION_INFO_V1(parse);
+Datum parse(PG_FUNCTION_ARGS);
+Datum
+parse(PG_FUNCTION_ARGS) {
+   FuncCallContext  *funcctx;
+   Datum result;
+
+   if (SRF_IS_FIRSTCALL()) {
+       text *txt = PG_GETARG_TEXT_P(1); 
+       funcctx = SRF_FIRSTCALL_INIT();
+       prs_setup_firstcall(funcctx, PG_GETARG_OID(0),txt );
+       PG_FREE_IF_COPY(txt,1);
+   }
+
+   funcctx = SRF_PERCALL_SETUP();
+
+   if (  (result=prs_process_call(funcctx)) != (Datum)0 )
+       SRF_RETURN_NEXT(funcctx, result);
+   SRF_RETURN_DONE(funcctx);
+}
+
+PG_FUNCTION_INFO_V1(parse_byname);
+Datum parse_byname(PG_FUNCTION_ARGS);
+Datum
+parse_byname(PG_FUNCTION_ARGS) {
+   FuncCallContext  *funcctx;
+   Datum result;
+
+   if (SRF_IS_FIRSTCALL()) {
+       text *name = PG_GETARG_TEXT_P(0); 
+       text *txt = PG_GETARG_TEXT_P(1); 
+       funcctx = SRF_FIRSTCALL_INIT();
+       prs_setup_firstcall(funcctx, name2id_prs( name ),txt );
+       PG_FREE_IF_COPY(name,0);
+       PG_FREE_IF_COPY(txt,1);
+   }
+
+   funcctx = SRF_PERCALL_SETUP();
+
+   if (  (result=prs_process_call(funcctx)) != (Datum)0 )
+       SRF_RETURN_NEXT(funcctx, result);
+   SRF_RETURN_DONE(funcctx);
+}
+
+
+PG_FUNCTION_INFO_V1(parse_current);
+Datum parse_current(PG_FUNCTION_ARGS);
+Datum
+parse_current(PG_FUNCTION_ARGS) {
+   FuncCallContext  *funcctx;
+   Datum result;
+
+   if (SRF_IS_FIRSTCALL()) {
+       text *txt = PG_GETARG_TEXT_P(0); 
+       funcctx = SRF_FIRSTCALL_INIT();
+       if ( current_parser_id==InvalidOid ) 
+           current_parser_id = name2id_prs( char2text("default") );
+       prs_setup_firstcall(funcctx, current_parser_id,txt );
+       PG_FREE_IF_COPY(txt,0);
+   }
+
+   funcctx = SRF_PERCALL_SETUP();
+
+   if (  (result=prs_process_call(funcctx)) != (Datum)0 )
+       SRF_RETURN_NEXT(funcctx, result);
+   SRF_RETURN_DONE(funcctx);
+}
+
+PG_FUNCTION_INFO_V1(headline);
+Datum headline(PG_FUNCTION_ARGS);
+Datum
+headline(PG_FUNCTION_ARGS) {
+   TSCfgInfo *cfg=findcfg(PG_GETARG_OID(0));
+   text       *in = PG_GETARG_TEXT_P(1);
+   QUERYTYPE  *query = (QUERYTYPE *) DatumGetPointer(PG_DETOAST_DATUM(PG_GETARG_DATUM(2)));
+   text       *opt=( PG_NARGS()>3 && PG_GETARG_POINTER(3) ) ? PG_GETARG_TEXT_P(3) : NULL;
+   HLPRSTEXT   prs;
+   text *out;
+   WParserInfo *prsobj = findprs(cfg->prs_id);
+
+   memset(&prs,0,sizeof(HLPRSTEXT));
+   prs.lenwords = 32;
+   prs.words = (HLWORD *) palloc(sizeof(HLWORD) * prs.lenwords);
+   hlparsetext(cfg, &prs, query, VARDATA(in), VARSIZE(in) - VARHDRSZ);
+
+
+   FunctionCall3(
+       &(prsobj->headline_info),
+       PointerGetDatum(&prs),
+       PointerGetDatum(opt),
+       PointerGetDatum(query)
+   );
+
+   out = genhl(&prs);
+
+   PG_FREE_IF_COPY(in,1);
+   PG_FREE_IF_COPY(query,2);
+   if ( opt ) PG_FREE_IF_COPY(opt,3);
+   pfree(prs.words);
+   pfree(prs.startsel);
+   pfree(prs.stopsel);
+
+   PG_RETURN_POINTER(out);
+}
+
+
+PG_FUNCTION_INFO_V1(headline_byname);
+Datum headline_byname(PG_FUNCTION_ARGS);
+Datum
+headline_byname(PG_FUNCTION_ARGS) {
+   text *cfg=PG_GETARG_TEXT_P(0);
+
+   Datum out=DirectFunctionCall4(
+       headline,
+       ObjectIdGetDatum(name2id_cfg( cfg ) ),
+       PG_GETARG_DATUM(1),
+       PG_GETARG_DATUM(2),
+       ( PG_NARGS()>3 ) ? PG_GETARG_DATUM(3) : PointerGetDatum(NULL)
+   );
+
+   PG_FREE_IF_COPY(cfg,0);
+   PG_RETURN_DATUM(out);   
+}
+
+PG_FUNCTION_INFO_V1(headline_current);
+Datum headline_current(PG_FUNCTION_ARGS);
+Datum
+headline_current(PG_FUNCTION_ARGS) {
+   PG_RETURN_DATUM(DirectFunctionCall4(
+       headline,
+       ObjectIdGetDatum(get_currcfg()),
+       PG_GETARG_DATUM(0),
+       PG_GETARG_DATUM(1),
+       ( PG_NARGS()>2 ) ? PG_GETARG_DATUM(2) : PointerGetDatum(NULL)
+   ));
+}
+
+
+


diff --git a/contrib/tsearch2/wparser.h b/contrib/tsearch2/wparser.h

new file mode 100644 (file)

index 0000000..a8afc56


--- /dev/null
+++ b/contrib/tsearch2/wparser.h
@@ -0,0 +1,28 @@
+#ifndef __WPARSER_H__
+#define __WPARSER_H__
+#include "postgres.h"
+#include "fmgr.h"
+
+typedef struct {
+   Oid prs_id;
+   FmgrInfo start_info;
+   FmgrInfo getlexeme_info;
+   FmgrInfo end_info;
+   FmgrInfo headline_info;
+   Oid lextype;
+   void *prs;
+} WParserInfo;
+
+void init_prs(Oid id, WParserInfo *prs);
+WParserInfo* findprs(Oid id);
+Oid name2id_prs(text *name);
+void   reset_prs(void);
+
+
+typedef struct {
+   int lexid;
+   char    *alias;
+   char    *descr;
+} LexDescr;
+
+#endif


diff --git a/contrib/tsearch2/wparser_def.c b/contrib/tsearch2/wparser_def.c

new file mode 100644 (file)

index 0000000..eec8b03


--- /dev/null
+++ b/contrib/tsearch2/wparser_def.c
@@ -0,0 +1,291 @@
+/* 
+ * default word parser 
+ * Teodor Sigaev 
+ */
+#include 
+#include 
+#include 
+
+#include "postgres.h"
+#include "utils/builtins.h"
+
+#include "dict.h"
+#include "wparser.h"
+#include "common.h"
+#include "ts_cfg.h"
+#include "wordparser/parser.h"
+#include "wordparser/deflex.h"
+
+PG_FUNCTION_INFO_V1(prsd_lextype);
+Datum prsd_lextype(PG_FUNCTION_ARGS);
+
+Datum 
+prsd_lextype(PG_FUNCTION_ARGS) {
+   LexDescr *descr=(LexDescr*)palloc(sizeof(LexDescr)*(LASTNUM+1));
+   int i;
+
+   for(i=1;i<=LASTNUM;i++) {
+       descr[i-1].lexid = i;
+       descr[i-1].alias = pstrdup(tok_alias[i]);
+       descr[i-1].descr = pstrdup(lex_descr[i]);
+   }
+   
+   descr[LASTNUM].lexid=0;
+       
+   PG_RETURN_POINTER(descr);
+}
+
+PG_FUNCTION_INFO_V1(prsd_start);
+Datum prsd_start(PG_FUNCTION_ARGS);
+Datum 
+prsd_start(PG_FUNCTION_ARGS) {
+   start_parse_str( (char*)PG_GETARG_POINTER(0), PG_GETARG_INT32(1) );
+   PG_RETURN_POINTER(NULL);
+}
+
+PG_FUNCTION_INFO_V1(prsd_getlexeme);
+Datum prsd_getlexeme(PG_FUNCTION_ARGS);
+Datum 
+prsd_getlexeme(PG_FUNCTION_ARGS) {
+   /* ParserState *p=(ParserState*)PG_GETARG_POINTER(0); */
+   char **t=(char**)PG_GETARG_POINTER(1); 
+   int *tlen=(int*)PG_GETARG_POINTER(2);
+   int  type=tsearch2_yylex();
+
+   *t = token;
+   *tlen = tokenlen;
+   PG_RETURN_INT32(type);
+}
+
+PG_FUNCTION_INFO_V1(prsd_end);
+Datum prsd_end(PG_FUNCTION_ARGS);
+Datum 
+prsd_end(PG_FUNCTION_ARGS) {
+   /* ParserState *p=(ParserState*)PG_GETARG_POINTER(0); */
+   end_parse();
+   PG_RETURN_VOID();
+}
+
+#define LEAVETOKEN(x)  ( (x)==12 )
+#define COMPLEXTOKEN(x)    ( (x)==5 || (x)==15 || (x)==16 || (x)==17 )
+#define ENDPUNCTOKEN(x)    ( (x)==12 )
+
+
+#define IDIGNORE(x) ( (x)==13 || (x)==14 || (x)==12 || (x)==23 )
+#define HLIDIGNORE(x) ( (x)==5 || (x)==13 || (x)==15 || (x)==16 || (x)==17 )
+#define NONWORDTOKEN(x)    ( (x)==12 || HLIDIGNORE(x) )
+#define NOENDTOKEN(x)  ( NONWORDTOKEN(x) || (x)==7 || (x)==8 || (x)==20 || (x)==21 || (x)==22 || IDIGNORE(x) )
+
+typedef struct {
+   HLWORD  *words;
+   int len;
+} hlCheck;
+
+static bool
+checkcondition_HL(void *checkval, ITEM *val) {
+   int i;
+   for(i=0;i<((hlCheck*)checkval)->len;i++) {
+       if ( ((hlCheck*)checkval)->words[i].item==val )
+           return true;
+   }
+   return false;
+}
+
+
+static bool
+hlCover(HLPRSTEXT *prs, QUERYTYPE *query, int *p, int *q) {
+   int i,j;
+   ITEM    *item=GETQUERY(query);
+   int pos=*p;
+   *q=0;
+   *p=0x7fffffff;
+
+   for(j=0;jsize;j++) {
+       if ( item->type != VAL ) {
+           item++;
+           continue;
+       }
+       for(i=pos;icurwords;i++) {
+           if ( prs->words[i].item == item ) {
+               if ( i>*q) 
+                   *q = i;
+               break;
+           }
+       }
+       item++;
+   }
+
+   if ( *q==0 )
+       return false;
+
+   item=GETQUERY(query);
+   for(j=0;jsize;j++) {
+       if ( item->type != VAL ) {
+           item++;
+           continue;
+       }
+       for(i=*q;i>=pos;i--) {
+           if ( prs->words[i].item == item ) {
+               if ( i<*p )
+                   *p=i;
+               break;
+           }
+       }
+       item++;
+   }   
+
+   if ( *p<=*q ) {
+       hlCheck ch={ &(prs->words[*p]), *q-*p+1 };
+       if ( TS_execute(GETQUERY(query), &ch, false, checkcondition_HL) ) { 
+           return true;
+       } else {
+           (*p)++;
+           return hlCover(prs,query,p,q);
+       }
+   }
+
+   return false;
+}
+
+PG_FUNCTION_INFO_V1(prsd_headline);
+Datum prsd_headline(PG_FUNCTION_ARGS);
+Datum 
+prsd_headline(PG_FUNCTION_ARGS) {
+   HLPRSTEXT   *prs=(HLPRSTEXT*)PG_GETARG_POINTER(0);
+   text    *opt=(text*)PG_GETARG_POINTER(1); /* can't be toasted */
+   QUERYTYPE   *query=(QUERYTYPE*)PG_GETARG_POINTER(2); /* can't be toasted */
+   /* from opt + start and and tag */
+   int min_words=15;   
+   int max_words=35;   
+   int shortword=3;    
+
+   int p=0,q=0;
+   int bestb=-1,beste=-1;
+   int bestlen=-1;
+   int pose=0, poslen, curlen;
+
+   int i;
+
+   /*config*/
+   prs->startsel=NULL;
+   prs->stopsel=NULL;
+   if ( opt ) {
+       Map *map,*mptr;
+       
+       parse_cfgdict(opt,&map);
+       mptr=map;
+
+       while(mptr && mptr->key) {
+           if ( strcasecmp(mptr->key,"MaxWords")==0 )
+               max_words=pg_atoi(mptr->value,4,1);
+           else if ( strcasecmp(mptr->key,"MinWords")==0 )
+               min_words=pg_atoi(mptr->value,4,1);
+           else if ( strcasecmp(mptr->key,"ShortWord")==0 )
+               shortword=pg_atoi(mptr->value,4,1);
+           else if ( strcasecmp(mptr->key,"StartSel")==0 )
+               prs->startsel=pstrdup(mptr->value);
+           else if ( strcasecmp(mptr->key,"StopSel")==0 )
+               prs->stopsel=pstrdup(mptr->value);
+               
+           pfree(mptr->key);
+           pfree(mptr->value);
+
+           mptr++;
+       }
+       pfree(map);
+
+       if ( min_words >= max_words )
+           elog(ERROR,"Must be MinWords < MaxWords");
+       if ( min_words<=0 )
+           elog(ERROR,"Must be MinWords > 0");
+       if ( shortword<0 )
+           elog(ERROR,"Must be ShortWord >= 0");
+   }
+
+   while( hlCover(prs,query,&p,&q) ) {
+       /* find cover len in words */
+       curlen=0;
+       poslen=0;
+       for(i=p;i<=q && curlen < max_words ; i++) {
+           if ( !NONWORDTOKEN(prs->words[i].type) ) 
+               curlen++;
+           if ( prs->words[i].item && !prs->words[i].repeated )
+               poslen++; 
+           pose=i;
+       }
+
+       if ( poslenwords[beste].type) || prs->words[beste].len <= shortword) ) { 
+           /* best already finded, so try one more cover */
+           p++;
+           continue;
+       }
+
+       if ( curlen < max_words ) { /* find good end */
+           for(i=i-1 ;icurwords && curlen
+               if ( i!=q ) {
+                   if ( !NONWORDTOKEN(prs->words[i].type) ) 
+                       curlen++;
+                   if ( prs->words[i].item && !prs->words[i].repeated )
+                       poslen++;
+               }
+               pose=i;
+               if ( NOENDTOKEN(prs->words[i].type) || prs->words[i].len <= shortword ) 
+                   continue;
+               if ( curlen>=min_words )    
+                   break;
+           }
+       } else { /* shorter cover :((( */
+           for(;curlen>min_words;i--) {
+               if ( !NONWORDTOKEN(prs->words[i].type) ) 
+                   curlen--;
+               if ( prs->words[i].item && !prs->words[i].repeated )
+                   poslen--;
+               pose=i;
+               if ( NOENDTOKEN(prs->words[i].type) || prs->words[i].len <= shortword ) 
+                   continue;
+               break;
+           }
+       }
+
+       if ( bestlen <0 || (poslen>bestlen && !(NOENDTOKEN(prs->words[pose].type) || prs->words[pose].len <= shortword)) || 
+               ( bestlen>=0 && !(NOENDTOKEN(prs->words[pose].type)  || prs->words[pose].len <= shortword) && 
+                   (NOENDTOKEN(prs->words[beste].type) || prs->words[beste].len <= shortword) ) ) {
+           bestb=p; beste=pose;
+           bestlen=poslen;
+       } 
+
+       p++;
+   }
+
+   if ( bestlen<0 ) {
+       curlen=0;
+       poslen=0;
+       for(i=0;icurwords && curlen
+           if ( !NONWORDTOKEN(prs->words[i].type) ) 
+               curlen++;
+           pose=i;
+       }
+       bestb=0; beste=pose;
+   }
+
+   for(i=bestb;i<=beste;i++) {
+       if ( prs->words[i].item )
+           prs->words[i].selected=1;
+       if ( prs->words[i].repeated )
+           prs->words[i].skip=1;
+       if ( HLIDIGNORE(prs->words[i].type) )
+           prs->words[i].replace=1;
+
+       prs->words[i].in=1;
+   }
+
+   if (!prs->startsel)
+       prs->startsel=pstrdup("");

+   if (!prs->stopsel)
+       prs->stopsel=pstrdup("");
+        prs->startsellen=strlen(prs->startsel);
+   prs->stopsellen=strlen(prs->stopsel);
+
+   PG_RETURN_POINTER(prs);
+}
+




This is the main PostgreSQL git repository.
RSS
Atom
+>{D-weight, A-weight, B-weight, C-weight}
+ If no weights are provided, then these defaults are used:
+ {0.1, 0.2, 0.4, 1.0}
+ Often weights are used to mark words from special areas of the document,
+ like the title or an initial abstract,
+ and make them more or less important than words in the document body.
+
+ CREATE FUNCTION rank_cd(
+  [ K int4, ]
+  vector tsvector, query tsquery,
+  [ normalization int4 ]
+  ) RETURNS float4
+
+ This function computes the cover density ranking
+ for the given document vector and query,
+ as described in Clarke, Cormack, and Tudhope's
+ “
+>Relevance Ranking for One to Three Term Queries”
+ in the 1999 Information Processing and Management.
+ The value K is one of the values from their formula,
+ and defaults to K=4.
+ The examples in their paper K=16;
+ we can roughly describe the term
+ as stating how far apart two search terms can fall
+ before the formula begins penalizing them for lack of proximity.
+
+
+Headlines
+
+
+
+ CREATE FUNCTION headline(

+  [ id int4, | ts_name text, ]
+  document text, query tsquery,
+  [ options text ]

+  ) RETURNS text
+
+ Every form of the the headline() function
+ accepts a document along with a query,
+ and returns one or more ellipse-separated excerpts from the document
+ in which terms from the query are highlighted.
+ The configuration with which to parse the document
+ can be specified by either its id or ts_name;
+ if none is specified that the current configuration is used instead.
+ 
+ An options string if provided should be a comma-separated list
+ of one or more ‘option=value’ pairs.
+ The available options are:
+ 
+  StartSel, StopSel —
+   the strings with which query words appearing in the document
+   should be delimited to distinguish them from other excerpted words.
+  MaxWords, MinWords —
+   limits on the shortest and longest headlines you will accept.
+  ShortWord —
+   this prevents your headline from beginning or ending
+   with a word which has this many characters or less.
+   The default value of 3 should eliminate most English
+   conjunctions and articles.
+ 
+ Any unspecified options receive these defaults:
+ 
+StartSel=<b>, StopSel=</b>, MaxWords=35, MinWords=15, ShortWord=3
+ 
+
+
+
+
+>Relevance Ranking for One to Three Term Queries”
+ in the 1999 Information Processing and Management.
+ The value K is one of the values from their formula,
+ and defaults to K=4.
+ The examples in their paper K=16;
+ we can roughly describe the term
+ as stating how far apart two search terms can fall
+ before the formula begins penalizing them for lack of proximity.
+
+
+Headlines
+
+
+
+ CREATE FUNCTION headline(
+  [ id int4, | ts_name text, ]
+  document text, query tsquery,
+  [ options text ]
+  ) RETURNS text
+
+ Every form of the the headline() function
+ accepts a document along with a query,
+ and returns one or more ellipse-separated excerpts from the document
+ in which terms from the query are highlighted.
+ The configuration with which to parse the document
+ can be specified by either its id or ts_name;
+ if none is specified that the current configuration is used instead.
+ 
+ An options string if provided should be a comma-separated list
+ of one or more ‘option=value’ pairs.
+ The available options are:
+ 
+  StartSel, StopSel —
+   the strings with which query words appearing in the document
+   should be delimited to distinguish them from other excerpted words.
+  MaxWords, MinWords —
+   limits on the shortest and longest headlines you will accept.
+  ShortWord —
+   this prevents your headline from beginning or ending
+   with a word which has this many characters or less.
+   The default value of 3 should eliminate most English
+   conjunctions and articles.
+ 
+ Any unspecified options receive these defaults:
+ 
+StartSel=<b>, StopSel=</b>, MaxWords=35, MinWords=15, ShortWord=3
+ 
+
+
+
+
diff --git a/contrib/tsearch2/expected/tsearch2.out b/contrib/tsearch2/expected/tsearch2.out

new file mode 100644 (file)

index 0000000..a842c5b
--- /dev/null
+++ b/contrib/tsearch2/expected/tsearch2.out
@@ -0,0 +1,2055 @@
+--
+-- first, define the datatype.  Turn off echoing so that expected file
+-- does not depend on contents of seg.sql.
+--
+\set ECHO none
+psql:tsearch2.sql:13: NOTICE:  CREATE TABLE / PRIMARY KEY will create implicit index 'pg_ts_dict_pkey' for table 'pg_ts_dict'
+psql:tsearch2.sql:145: NOTICE:  CREATE TABLE / PRIMARY KEY will create implicit index 'pg_ts_parser_pkey' for table 'pg_ts_parser'
+psql:tsearch2.sql:244: NOTICE:  CREATE TABLE / PRIMARY KEY will create implicit index 'pg_ts_cfg_pkey' for table 'pg_ts_cfg'
+psql:tsearch2.sql:251: NOTICE:  CREATE TABLE / PRIMARY KEY will create implicit index 'pg_ts_cfgmap_pkey' for table 'pg_ts_cfgmap'
+psql:tsearch2.sql:339: NOTICE:  ProcedureCreate: type tsvector is not yet defined
+psql:tsearch2.sql:344: NOTICE:  Argument type "tsvector" is only a shell
+psql:tsearch2.sql:398: NOTICE:  ProcedureCreate: type tsquery is not yet defined
+psql:tsearch2.sql:403: NOTICE:  Argument type "tsquery" is only a shell
+psql:tsearch2.sql:545: NOTICE:  ProcedureCreate: type gtsvector is not yet defined
+psql:tsearch2.sql:550: NOTICE:  Argument type "gtsvector" is only a shell
+--tsvector
+SELECT '1'::tsvector;
+ tsvector 
+----------
+ '1'
+(1 row)
+
+SELECT '1 '::tsvector;
+ tsvector 
+----------
+ '1'
+(1 row)
+
+SELECT ' 1'::tsvector;
+ tsvector 
+----------
+ '1'
+(1 row)
+
+SELECT ' 1 '::tsvector;
+ tsvector 
+----------
+ '1'
+(1 row)
+
+SELECT '1 2'::tsvector;
+ tsvector 
+----------
+ '1' '2'
+(1 row)
+
+SELECT '\'1 2\''::tsvector;
+ tsvector 
+----------
+ '1 2'
+(1 row)
+
+SELECT '\'1 \\\'2\''::tsvector;
+ tsvector 
+----------
+ '1 \'2'
+(1 row)
+
+SELECT '\'1 \\\'2\'3'::tsvector;
+  tsvector   
+-------------
+ '3' '1 \'2'
+(1 row)
+
+SELECT '\'1 \\\'2\' 3'::tsvector;
+  tsvector   
+-------------
+ '3' '1 \'2'
+(1 row)
+
+SELECT '\'1 \\\'2\' \' 3\' 4 '::tsvector;
+     tsvector     
+------------------
+ '4' ' 3' '1 \'2'
+(1 row)
+
+select '\'w\':4A,3B,2C,1D,5 a:8';
+       ?column?        
+-----------------------
+ 'w':4A,3B,2C,1D,5 a:8
+(1 row)
+
+select 'a:3A b:2a'::tsvector || 'ba:1234 a:1B';
+          ?column?          
+----------------------------
+ 'a':3A,4B 'b':2A 'ba':1237
+(1 row)
+
+select setweight('w:12B w:13* w:12,5,6 a:1,3* a:3 w asd:1dc asd zxc:81,567,222A'::tsvector, 'c');
+                        setweight                         
+----------------------------------------------------------
+ 'a':1C,3C 'w':5C,6C,12C,13C 'asd':1C 'zxc':81C,222C,567C
+(1 row)
+
+select strip('w:12B w:13* w:12,5,6 a:1,3* a:3 w asd:1dc asd'::tsvector);
+     strip     
+---------------
+ 'a' 'w' 'asd'
+(1 row)
+
+--tsquery
+SELECT '1'::tsquery;
+ tsquery 
+---------
+ '1'
+(1 row)
+
+SELECT '1 '::tsquery;
+ tsquery 
+---------
+ '1'
+(1 row)
+
+SELECT ' 1'::tsquery;
+ tsquery 
+---------
+ '1'
+(1 row)
+
+SELECT ' 1 '::tsquery;
+ tsquery 
+---------
+ '1'
+(1 row)
+
+SELECT '\'1 2\''::tsquery;
+ tsquery 
+---------
+ '1 2'
+(1 row)
+
+SELECT '\'1 \\\'2\''::tsquery;
+ tsquery 
+---------
+ '1 \'2'
+(1 row)
+
+SELECT '!1'::tsquery;
+ tsquery 
+---------
+ !'1'
+(1 row)
+
+SELECT '1|2'::tsquery;
+  tsquery  
+-----------
+ '1' | '2'
+(1 row)
+
+SELECT '1|!2'::tsquery;
+  tsquery   
+------------
+ '1' | !'2'
+(1 row)
+
+SELECT '!1|2'::tsquery;
+  tsquery   
+------------
+ !'1' | '2'
+(1 row)
+
+SELECT '!1|!2'::tsquery;
+   tsquery   
+-------------
+ !'1' | !'2'
+(1 row)
+
+SELECT '!(!1|!2)'::tsquery;
+     tsquery      
+------------------
+ !( !'1' | !'2' )
+(1 row)
+
+SELECT '!(!1|2)'::tsquery;
+     tsquery     
+-----------------
+ !( !'1' | '2' )
+(1 row)
+
+SELECT '!(1|!2)'::tsquery;
+     tsquery     
+-----------------
+ !( '1' | !'2' )
+(1 row)
+
+SELECT '!(1|2)'::tsquery;
+    tsquery     
+----------------
+ !( '1' | '2' )
+(1 row)
+
+SELECT '1&2'::tsquery;
+  tsquery  
+-----------
+ '1' & '2'
+(1 row)
+
+SELECT '!1&2'::tsquery;
+  tsquery   
+------------
+ !'1' & '2'
+(1 row)
+
+SELECT '1&!2'::tsquery;
+  tsquery   
+------------
+ '1' & !'2'
+(1 row)
+
+SELECT '!1&!2'::tsquery;
+   tsquery   
+-------------
+ !'1' & !'2'
+(1 row)
+
+SELECT '(1&2)'::tsquery;
+  tsquery  
+-----------
+ '1' & '2'
+(1 row)
+
+SELECT '1&(2)'::tsquery;
+  tsquery  
+-----------
+ '1' & '2'
+(1 row)
+
+SELECT '!(1)&2'::tsquery;
+  tsquery   
+------------
+ !'1' & '2'
+(1 row)
+
+SELECT '!(1&2)'::tsquery;
+    tsquery     
+----------------
+ !( '1' & '2' )
+(1 row)
+
+SELECT '1|2&3'::tsquery;
+     tsquery     
+-----------------
+ '1' | '2' & '3'
+(1 row)
+
+SELECT '1|(2&3)'::tsquery;
+     tsquery     
+-----------------
+ '1' | '2' & '3'
+(1 row)
+
+SELECT '(1|2)&3'::tsquery;
+       tsquery       
+---------------------
+ ( '1' | '2' ) & '3'
+(1 row)
+
+SELECT '1|2&!3'::tsquery;
+     tsquery      
+------------------
+ '1' | '2' & !'3'
+(1 row)
+
+SELECT '1|!2&3'::tsquery;
+     tsquery      
+------------------
+ '1' | !'2' & '3'
+(1 row)
+
+SELECT '!1|2&3'::tsquery;
+     tsquery      
+------------------
+ !'1' | '2' & '3'
+(1 row)
+
+SELECT '!1|(2&3)'::tsquery;
+     tsquery      
+------------------
+ !'1' | '2' & '3'
+(1 row)
+
+SELECT '!(1|2)&3'::tsquery;
+       tsquery        
+----------------------
+ !( '1' | '2' ) & '3'
+(1 row)
+
+SELECT '(!1|2)&3'::tsquery;
+       tsquery        
+----------------------
+ ( !'1' | '2' ) & '3'
+(1 row)
+
+SELECT '1|(2|(4|(5|6)))'::tsquery;
+                 tsquery                 
+-----------------------------------------
+ '1' | ( '2' | ( '4' | ( '5' | '6' ) ) )
+(1 row)
+
+SELECT '1|2|4|5|6'::tsquery;
+                 tsquery                 
+-----------------------------------------
+ ( ( ( '1' | '2' ) | '4' ) | '5' ) | '6'
+(1 row)
+
+SELECT '1&(2&(4&(5&6)))'::tsquery;
+           tsquery           
+-----------------------------
+ '1' & '2' & '4' & '5' & '6'
+(1 row)
+
+SELECT '1&2&4&5&6'::tsquery;
+           tsquery           
+-----------------------------
+ '1' & '2' & '4' & '5' & '6'
+(1 row)
+
+SELECT '1&(2&(4&(5|6)))'::tsquery;
+             tsquery             
+---------------------------------
+ '1' & '2' & '4' & ( '5' | '6' )
+(1 row)
+
+SELECT '1&(2&(4&(5|!6)))'::tsquery;
+             tsquery              
+----------------------------------
+ '1' & '2' & '4' & ( '5' | !'6' )
+(1 row)
+
+SELECT '1&(\'2\'&(\' 4\'&(\\|5 | \'6 \\\' !|&\')))'::tsquery;
+                 tsquery                  
+------------------------------------------
+ '1' & '2' & ' 4' & ( '|5' | '6 \' !|&' )
+(1 row)
+
+SELECT '\'the wether\':dc & \' sKies \':BC & a:d b:a';
+                 ?column?                 
+------------------------------------------
+ 'the wether':dc & ' sKies ':BC & a:d b:a
+(1 row)
+
+select lexize('simple', 'ASD56 hsdkf');
+     lexize      
+-----------------
+ {"asd56 hsdkf"}
+(1 row)
+
+select lexize('en_stem', 'SKIES Problems identity');
+          lexize          
+--------------------------
+ {"skies problems ident"}
+(1 row)
+
+select * from token_type('default');
+ tokid |    alias     |               descr               
+-------+--------------+-----------------------------------
+     1 | lword        | Latin word
+     2 | nlword       | Non-latin word
+     3 | word         | Word
+     4 | email        | Email
+     5 | url          | URL
+     6 | host         | Host
+     7 | sfloat       | Scientific notation
+     8 | version      | VERSION
+     9 | part_hword   | Part of hyphenated word
+    10 | nlpart_hword | Non-latin part of hyphenated word
+    11 | lpart_hword  | Latin part of hyphenated word
+    12 | blank        | Space symbols
+    13 | tag          | HTML Tag
+    14 | http         | HTTP head
+    15 | hword        | Hyphenated word
+    16 | lhword       | Latin hyphenated word
+    17 | nlhword      | Non-latin hyphenated word
+    18 | uri          | URI
+    19 | file         | File or path name
+    20 | float        | Decimal notation
+    21 | int          | Signed integer
+    22 | uint         | Unsigned integer
+    23 | entity       | HTML Entity
+(23 rows)
+
+select * from parse('default', '345 [email protected] \' http://www.com/ http://aew.werc.ewr/?ad=qwe&dw 1aew.werc.ewr/?ad=qwe&dw 2aew.werc.ewr http://3aew.werc.ewr/?ad=qwe&dw http://4aew.werc.ewr http://5aew.werc.ewr:8100/?  ad=qwe&dw 6aew.werc.ewr:8100/?ad=qwe&dw 7aew.werc.ewr:8100/?ad=qwe&dw=%20%32 +4.0e-10 qwe qwe qwqwe 234.435 455 5.005 [email protected] qwe-wer asdf qwer jf sdjk ewr1> ewri2 ">
+/usr/local/fff /awdf/dwqe/4325 rewt/ewr wefjn /wqe-324/ewr gist.h gist.h.c gist.c. readline 4.2 4.2. 4.2, readline-4.2 readline-4.2. 234 
+ wow  < jqw <> qwerty');
+ tokid |                token                 
+-------+--------------------------------------
+    22 | 345
+    12 |  
+     4 | [email protected]
+    12 |  
+    12 | '
+    12 |  
+    14 | http://
+     6 | www.com
+    12 | /
+    12 |  
+    14 | http://
+     5 | aew.werc.ewr/?ad=qwe&dw
+     6 | aew.werc.ewr
+    18 | /?ad=qwe&dw
+    12 |  
+     5 | 1aew.werc.ewr/?ad=qwe&dw
+     6 | 1aew.werc.ewr
+    18 | /?ad=qwe&dw
+    12 |  
+     6 | 2aew.werc.ewr
+    12 |  
+    14 | http://
+     5 | 3aew.werc.ewr/?ad=qwe&dw
+     6 | 3aew.werc.ewr
+    18 | /?ad=qwe&dw
+    12 |  
+    14 | http://
+     6 | 4aew.werc.ewr
+    12 |  
+    14 | http://
+     5 | 5aew.werc.ewr:8100/?
+     6 | 5aew.werc.ewr
+    18 | :8100/?
+    12 |   
+     1 | ad
+    12 | =
+     1 | qwe
+    12 | &
+     1 | dw
+    12 |  
+     5 | 6aew.werc.ewr:8100/?ad=qwe&dw
+     6 | 6aew.werc.ewr
+    18 | :8100/?ad=qwe&dw
+    12 |  
+     5 | 7aew.werc.ewr:8100/?ad=qwe&dw=%20%32
+     6 | 7aew.werc.ewr
+    18 | :8100/?ad=qwe&dw=%20%32
+    12 |  
+     7 | +4.0e-10
+    12 |  
+     1 | qwe
+    12 |  
+     1 | qwe
+    12 |  
+     1 | qwqwe
+    12 |  
+    20 | 234.435
+    12 |  
+    22 | 455
+    12 |  
+    20 | 5.005
+    12 |  
+     4 | [email protected]
+    12 |  
+    16 | qwe-wer
+    11 | qwe
+    12 | -
+    11 | wer
+    12 |  
+     1 | asdf
+    12 |  
+    13 |  
+     1 | qwer
+    12 |  
+     1 | jf
+    12 |  
+     1 | sdjk
+    13 |  
+    12 |  
+     3 | ewr1
+    12 | >
+    12 |  
+     3 | ewri2
+    12 |  
+    13 |  
+    12 | 
+
+    19 | /usr/local/fff
+    12 |  
+    19 | /awdf/dwqe/4325
+    12 |  
+    19 | rewt/ewr
+    12 |  
+     1 | wefjn
+    12 |  
+    19 | /wqe-324/ewr
+    12 |  
+     6 | gist.h
+    12 |  
+     6 | gist.h.c
+    12 |  
+     6 | gist.c
+    12 | .
+    12 |  
+     1 | readline
+    12 |  
+    20 | 4.2
+    12 |  
+    20 | 4.2
+    12 | .
+    12 |  
+    20 | 4.2
+    12 | ,
+    12 |  
+    15 | readline-4
+    11 | readline
+    12 | -
+    20 | 4.2
+    12 |  
+    15 | readline-4
+    11 | readline
+    12 | -
+    20 | 4.2
+    12 | .
+    12 |  
+    22 | 234
+    12 |  
+
+    13 |  
+    12 |  
+     1 | wow
+    12 |   
+    12 | <
+    12 |  
+     1 | jqw
+    12 |  
+    12 | <
+    12 | >
+    12 |  
+     1 | qwerty
+(138 rows)
+
+SELECT to_tsvector('default', '345 [email protected] \' http://www.com/ http://aew.werc.ewr/?ad=qwe&dw 1aew.werc.ewr/?ad=qwe&dw 2aew.werc.ewr http://3aew.werc.ewr/?ad=qwe&dw http://4aew.werc.ewr http://5aew.werc.ewr:8100/?  ad=qwe&dw 6aew.werc.ewr:8100/?ad=qwe&dw 7aew.werc.ewr:8100/?ad=qwe&dw=%20%32 +4.0e-10 qwe qwe qwqwe 234.435 455 5.005 [email protected] qwe-wer asdf qwer jf sdjk ewr1> ewri2 ">
+/usr/local/fff /awdf/dwqe/4325 rewt/ewr wefjn /wqe-324/ewr gist.h gist.h.c gist.c. readline 4.2 4.2. 4.2, readline-4.2 readline-4.2. 234 
+ wow  < jqw <> qwerty');
+                                                                                                                                                                                                                                                                                                                                                                                                                                               to_tsvector                                                                                                                                                                                                                                                                                                                                                                                                                                                
+----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
+ 'ad':18 'dw':20 'jf':40 '234':62 '345':1 '4.2':53,54,55,58,61 '455':32 'jqw':64 'qwe':19,28,29,36 'wer':37 'wow':63 'asdf':38 'ewr1':42 'qwer':39 'sdjk':41 '5.005':33 'ewri2':43 'qwqwe':30 'wefjn':47 'gist.c':51 'gist.h':49 'qwerti':65 '234.435':31 ':8100/?':17 'qwe-wer':35 'readlin':52,57,60 'www.com':3 '+4.0e-10':27 'gist.h.c':50 'rewt/ewr':46 '[email protected]':2 'readline-4':56,59 '/?ad=qwe&dw':6,9,13 '/wqe-324/ewr':48 'aew.werc.ewr':5 '1aew.werc.ewr':8 '2aew.werc.ewr':10 '3aew.werc.ewr':12 '4aew.werc.ewr':14 '5aew.werc.ewr':16 '6aew.werc.ewr':22 '7aew.werc.ewr':25 '/usr/local/fff':44 '/awdf/dwqe/4325':45 ':8100/?ad=qwe&dw':23 '[email protected]':34 '5aew.werc.ewr:8100/?':15 ':8100/?ad=qwe&dw=%20%32':26 'aew.werc.ewr/?ad=qwe&dw':4 '1aew.werc.ewr/?ad=qwe&dw':7 '3aew.werc.ewr/?ad=qwe&dw':11 '6aew.werc.ewr:8100/?ad=qwe&dw':21 '7aew.werc.ewr:8100/?ad=qwe&dw=%20%32':24
+(1 row)
+
+SELECT length(to_tsvector('default', '345 qw'));
+ length 
+--------
+      2
+(1 row)
+
+SELECT length(to_tsvector('default', '345 [email protected] \' http://www.com/ http://aew.werc.ewr/?ad=qwe&dw 1aew.werc.ewr/?ad=qwe&dw 2aew.werc.ewr http://3aew.werc.ewr/?ad=qwe&dw http://4aew.werc.ewr http://5aew.werc.ewr:8100/?  ad=qwe&dw 6aew.werc.ewr:8100/?ad=qwe&dw 7aew.werc.ewr:8100/?ad=qwe&dw=%20%32 +4.0e-10 qwe qwe qwqwe 234.435 455 5.005 [email protected] qwe-wer asdf qwer jf sdjk ewr1> ewri2 ">
+/usr/local/fff /awdf/dwqe/4325 rewt/ewr wefjn /wqe-324/ewr gist.h gist.h.c gist.c. readline 4.2 4.2. 4.2, readline-4.2 readline-4.2. 234 
+ wow  < jqw <> qwerty'));
+ length 
+--------
+     53
+(1 row)
+
+select to_tsquery('default', 'qwe & sKies '); 
+  to_tsquery   
+---------------
+ 'qwe' & 'sky'
+(1 row)
+
+select to_tsquery('simple', 'qwe & sKies '); 
+   to_tsquery    
+-----------------
+ 'qwe' & 'skies'
+(1 row)
+
+select to_tsquery('default', '\'the wether\':dc & \'           sKies \':BC ');
+       to_tsquery       
+------------------------
+ 'wether':CD & 'sky':BC
+(1 row)
+
+select 'a b:89  ca:23A,64b d:34c'::tsvector @@ 'd:AC & ca';
+ ?column? 
+----------
+ t
+(1 row)
+
+select 'a b:89  ca:23A,64b d:34c'::tsvector @@ 'd:AC & ca:B';
+ ?column? 
+----------
+ t
+(1 row)
+
+select 'a b:89  ca:23A,64b d:34c'::tsvector @@ 'd:AC & ca:A';
+ ?column? 
+----------
+ t
+(1 row)
+
+select 'a b:89  ca:23A,64b d:34c'::tsvector @@ 'd:AC & ca:C';
+ ?column? 
+----------
+ f
+(1 row)
+
+select 'a b:89  ca:23A,64b d:34c'::tsvector @@ 'd:AC & ca:CB';
+ ?column? 
+----------
+ t
+(1 row)
+
+CREATE TABLE test_tsvector( t text, a tsvector );
+\copy test_tsvector from 'data/test_tsearch.data'
+SELECT count(*) FROM test_tsvector WHERE a @@ 'wr|qh';
+ count 
+-------
+   158
+(1 row)
+
+SELECT count(*) FROM test_tsvector WHERE a @@ 'wr&qh';
+ count 
+-------
+    17
+(1 row)
+
+SELECT count(*) FROM test_tsvector WHERE a @@ 'eq&yt';
+ count 
+-------
+     6
+(1 row)
+
+SELECT count(*) FROM test_tsvector WHERE a @@ 'eq|yt';
+ count 
+-------
+    98
+(1 row)
+
+SELECT count(*) FROM test_tsvector WHERE a @@ '(eq&yt)|(wr&qh)';
+ count 
+-------
+    23
+(1 row)
+
+SELECT count(*) FROM test_tsvector WHERE a @@ '(eq|yt)&(wr|qh)';
+ count 
+-------
+    39
+(1 row)
+
+create index wowidx on test_tsvector using gist (a);
+set enable_seqscan=off;
+SELECT count(*) FROM test_tsvector WHERE a @@ 'wr|qh';
+ count 
+-------
+   158
+(1 row)
+
+SELECT count(*) FROM test_tsvector WHERE a @@ 'wr&qh';
+ count 
+-------
+    17
+(1 row)
+
+SELECT count(*) FROM test_tsvector WHERE a @@ 'eq&yt';
+ count 
+-------
+     6
+(1 row)
+
+SELECT count(*) FROM test_tsvector WHERE a @@ 'eq|yt';
+ count 
+-------
+    98
+(1 row)
+
+SELECT count(*) FROM test_tsvector WHERE a @@ '(eq&yt)|(wr&qh)';
+ count 
+-------
+    23
+(1 row)
+
+SELECT count(*) FROM test_tsvector WHERE a @@ '(eq|yt)&(wr|qh)';
+ count 
+-------
+    39
+(1 row)
+
+select set_curcfg('default');
+ set_curcfg 
+------------
+ 
+(1 row)
+
+CREATE TRIGGER tsvectorupdate
+BEFORE UPDATE OR INSERT ON test_tsvector
+FOR EACH ROW EXECUTE PROCEDURE tsearch2(a, t);
+SELECT count(*) FROM test_tsvector WHERE a @@ to_tsquery('345&qwerty');
+ count 
+-------
+     0
+(1 row)
+
+INSERT INTO test_tsvector (t) VALUES ('345 qwerty');
+SELECT count(*) FROM test_tsvector WHERE a @@ to_tsquery('345&qwerty');
+ count 
+-------
+     1
+(1 row)
+
+UPDATE test_tsvector SET t = null WHERE t = '345 qwerty';
+SELECT count(*) FROM test_tsvector WHERE a @@ to_tsquery('345&qwerty');
+ count 
+-------
+     0
+(1 row)
+
+drop trigger tsvectorupdate on test_tsvector;
+create function wow(text) returns text as 'select $1 || \' copyright\'; ' language sql;
+create trigger tsvectorupdate before update or insert on test_tsvector
+for each row execute procedure tsearch2(a, wow, t);
+insert into test_tsvector (t) values ('345 qwerty');
+select count(*) FROM test_tsvector WHERE a @@ to_tsquery('345&qwerty');
+ count 
+-------
+     1
+(1 row)
+
+select count(*) FROM test_tsvector WHERE a @@ to_tsquery('copyright');
+ count 
+-------
+     1
+(1 row)
+
+select rank(' a:1 s:2C d g'::tsvector, 'a | s');
+ rank 
+------
+ 0.28
+(1 row)
+
+select rank(' a:1 s:2B d g'::tsvector, 'a | s');
+ rank 
+------
+ 0.46
+(1 row)
+
+select rank(' a:1 s:2 d g'::tsvector, 'a | s');
+ rank 
+------
+ 0.19
+(1 row)
+
+select rank(' a:1 s:2C d g'::tsvector, 'a & s');
+   rank   
+----------
+ 0.140153
+(1 row)
+
+select rank(' a:1 s:2B d g'::tsvector, 'a & s');
+   rank   
+----------
+ 0.198206
+(1 row)
+
+select rank(' a:1 s:2 d g'::tsvector, 'a & s');
+   rank    
+-----------
+ 0.0991032
+(1 row)
+
+insert into test_tsvector (t) values ('foo bar foo the over foo qq bar');
+select * from stat('select a from test_tsvector') order by ndoc desc, nentry desc, word;
+   word    | ndoc | nentry 
+-----------+------+--------
+ qq        |  109 |    109
+ qt        |  102 |    102
+ qe        |  100 |    100
+ qh        |   98 |     98
+ qw        |   98 |     98
+ qa        |   97 |     97
+ ql        |   94 |     94
+ qs        |   94 |     94
+ qi        |   92 |     92
+ qr        |   92 |     92
+ qj        |   91 |     91
+ qd        |   87 |     87
+ qz        |   87 |     87
+ qc        |   86 |     86
+ qn        |   86 |     86
+ qv        |   85 |     85
+ qo        |   84 |     84
+ qy        |   84 |     84
+ wp        |   84 |     84
+ qf        |   81 |     81
+ qk        |   80 |     80
+ wt        |   80 |     80
+ qu        |   79 |     79
+ qg        |   78 |     78
+ wb        |   78 |     78
+ qx        |   77 |     77
+ wr        |   77 |     77
+ ws        |   73 |     73
+ wy        |   73 |     73
+ wa        |   72 |     72
+ wf        |   70 |     70
+ wg        |   70 |     70
+ wi        |   70 |     70
+ wu        |   70 |     70
+ wc        |   69 |     69
+ wj        |   69 |     69
+ qp        |   68 |     68
+ wh        |   68 |     68
+ wv        |   68 |     68
+ qb        |   66 |     66
+ eu        |   65 |     65
+ we        |   65 |     65
+ wl        |   65 |     65
+ wq        |   65 |     65
+ wk        |   64 |     64
+ ee        |   63 |     63
+ eo        |   63 |     63
+ qm        |   63 |     63
+ wn        |   63 |     63
+ ef        |   62 |     62
+ eh        |   62 |     62
+ ex        |   62 |     62
+ re        |   62 |     62
+ rl        |   62 |     62
+ rr        |   62 |     62
+ eb        |   61 |     61
+ ek        |   61 |     61
+ ww        |   61 |     61
+ ea        |   60 |     60
+ ei        |   60 |     60
+ em        |   60 |     60
+ eq        |   60 |     60
+ ew        |   60 |     60
+ ro        |   60 |     60
+ rw        |   60 |     60
+ tl        |   60 |     60
+ eg        |   59 |     59
+ en        |   59 |     59
+ ez        |   59 |     59
+ rj        |   59 |     59
+ ry        |   59 |     59
+ tw        |   59 |     59
+ tx        |   59 |     59
+ ej        |   58 |     58
+ es        |   58 |     58
+ ra        |   58 |     58
+ rd        |   58 |     58
+ rg        |   58 |     58
+ rx        |   58 |     58
+ tb        |   58 |     58
+ wd        |   58 |     58
+ ed        |   57 |     57
+ tc        |   57 |     57
+ wx        |   57 |     57
+ er        |   56 |     56
+ wm        |   56 |     56
+ wo        |   56 |     56
+ yw        |   56 |     56
+ ep        |   55 |     55
+ rk        |   55 |     55
+ rp        |   55 |     55
+ rz        |   55 |     55
+ ta        |   55 |     55
+ rq        |   54 |     54
+ yn        |   54 |     54
+ ec        |   53 |     53
+ el        |   53 |     53
+ ru        |   53 |     53
+ rv        |   53 |     53
+ tz        |   53 |     53
+ un        |   53 |     53
+ wz        |   53 |     53
+ ys        |   53 |     53
+ oe        |   52 |     52
+ tn        |   52 |     52
+ tq        |   52 |     52
+ ty        |   52 |     52
+ uq        |   52 |     52
+ yg        |   52 |     52
+ ym        |   52 |     52
+ oi        |   51 |     51
+ to        |   51 |     51
+ yi        |   51 |     51
+ pn        |   50 |     50
+ rb        |   50 |     50
+ ri        |   50 |     50
+ rn        |   50 |     50
+ ti        |   50 |     50
+ tv        |   50 |     50
+ um        |   50 |     50
+ ut        |   50 |     50
+ ya        |   50 |     50
+ et        |   49 |     49
+ ix        |   49 |     49
+ ox        |   49 |     49
+ q3        |   49 |     49
+ yf        |   49 |     49
+ yl        |   49 |     49
+ yo        |   49 |     49
+ yr        |   49 |     49
+ ev        |   48 |     48
+ ey        |   48 |     48
+ ot        |   48 |     48
+ rc        |   48 |     48
+ rm        |   48 |     48
+ th        |   48 |     48
+ uo        |   48 |     48
+ ia        |   47 |     47
+ q1        |   47 |     47
+ rh        |   47 |     47
+ yq        |   47 |     47
+ yz        |   47 |     47
+ av        |   46 |     46
+ im        |   46 |     46
+ os        |   46 |     46
+ tk        |   46 |     46
+ yy        |   46 |     46
+ ir        |   45 |     45
+ iv        |   45 |     45
+ iw        |   45 |     45
+ oj        |   45 |     45
+ pl        |   45 |     45
+ pv        |   45 |     45
+ te        |   45 |     45
+ tu        |   45 |     45
+ uv        |   45 |     45
+ ux        |   45 |     45
+ yd        |   45 |     45
+ yx        |   45 |     45
+ ij        |   44 |     44
+ pa        |   44 |     44
+ se        |   44 |     44
+ tg        |   44 |     44
+ ue        |   44 |     44
+ yb        |   44 |     44
+ yt        |   44 |     44
+ if        |   43 |     43
+ ik        |   43 |     43
+ in        |   43 |     43
+ ph        |   43 |     43
+ pj        |   43 |     43
+ q5        |   43 |     43
+ rt        |   43 |     43
+ ub        |   43 |     43
+ ud        |   43 |     43
+ uh        |   43 |     43
+ uj        |   43 |     43
+ w7        |   43 |     43
+ ye        |   43 |     43
+ yv        |   43 |     43
+ db        |   42 |     42
+ do        |   42 |     42
+ id        |   42 |     42
+ ie        |   42 |     42
+ ii        |   42 |     42
+ of        |   42 |     42
+ pr        |   42 |     42
+ q4        |   42 |     42
+ rf        |   42 |     42
+ td        |   42 |     42
+ uk        |   42 |     42
+ up        |   42 |     42
+ yh        |   42 |     42
+ yk        |   42 |     42
+ io        |   41 |     41
+ it        |   41 |     41
+ pb        |   41 |     41
+ q0        |   41 |     41
+ q7        |   41 |     41
+ rs        |   41 |     41
+ tj        |   41 |     41
+ ur        |   41 |     41
+ ig        |   40 |     40
+ iu        |   40 |     40
+ iy        |   40 |     40
+ od        |   40 |     40
+ q6        |   40 |     40
+ tt        |   40 |     40
+ ug        |   40 |     40
+ ul        |   40 |     40
+ us        |   40 |     40
+ uu        |   40 |     40
+ uz        |   40 |     40
+ ah        |   39 |     39
+ ar        |   39 |     39
+ as        |   39 |     39
+ dl        |   39 |     39
+ dt        |   39 |     39
+ hk        |   39 |     39
+ iq        |   39 |     39
+ is        |   39 |     39
+ oc        |   39 |     39
+ ov        |   39 |     39
+ oy        |   39 |     39
+ uf        |   39 |     39
+ ui        |   39 |     39
+ aa        |   38 |     38
+ ad        |   38 |     38
+ fh        |   38 |     38
+ gm        |   38 |     38
+ ic        |   38 |     38
+ jd        |   38 |     38
+ om        |   38 |     38
+ or        |   38 |     38
+ oz        |   38 |     38
+ pm        |   38 |     38
+ q8        |   38 |     38
+ sf        |   38 |     38
+ sm        |   38 |     38
+ sv        |   38 |     38
+ uc        |   38 |     38
+ ak        |   37 |     37
+ aq        |   37 |     37
+ di        |   37 |     37
+ e4        |   37 |     37
+ fi        |   37 |     37
+ fx        |   37 |     37
+ ha        |   37 |     37
+ hp        |   37 |     37
+ ih        |   37 |     37
+ og        |   37 |     37
+ po        |   37 |     37
+ pw        |   37 |     37
+ sn        |   37 |     37
+ su        |   37 |     37
+ sw        |   37 |     37
+ w6        |   37 |     37
+ yj        |   37 |     37
+ yu        |   37 |     37
+ ag        |   36 |     36
+ am        |   36 |     36
+ at        |   36 |     36
+ e1        |   36 |     36
+ ff        |   36 |     36
+ gx        |   36 |     36
+ he        |   36 |     36
+ hj        |   36 |     36
+ ib        |   36 |     36
+ iz        |   36 |     36
+ lm        |   36 |     36
+ ok        |   36 |     36
+ pk        |   36 |     36
+ pp        |   36 |     36
+ pu        |   36 |     36
+ sp        |   36 |     36
+ tf        |   36 |     36
+ tm        |   36 |     36
+ ay        |   35 |     35
+ dy        |   35 |     35
+ fu        |   35 |     35
+ ku        |   35 |     35
+ lh        |   35 |     35
+ lq        |   35 |     35
+ o6        |   35 |     35
+ ob        |   35 |     35
+ on        |   35 |     35
+ op        |   35 |     35
+ pd        |   35 |     35
+ ps        |   35 |     35
+ si        |   35 |     35
+ sl        |   35 |     35
+ sx        |   35 |     35
+ tp        |   35 |     35
+ tr        |   35 |     35
+ w3        |   35 |     35
+ y1        |   35 |     35
+ al        |   34 |     34
+ ap        |   34 |     34
+ az        |   34 |     34
+ dc        |   34 |     34
+ dd        |   34 |     34
+ dz        |   34 |     34
+ e0        |   34 |     34
+ fj        |   34 |     34
+ fp        |   34 |     34
+ gd        |   34 |     34
+ gg        |   34 |     34
+ gk        |   34 |     34
+ go        |   34 |     34
+ ho        |   34 |     34
+ jc        |   34 |     34
+ oa        |   34 |     34
+ oh        |   34 |     34
+ oo        |   34 |     34
+ pe        |   34 |     34
+ px        |   34 |     34
+ sd        |   34 |     34
+ sq        |   34 |     34
+ sy        |   34 |     34
+ ab        |   33 |     33
+ ae        |   33 |     33
+ af        |   33 |     33
+ aw        |   33 |     33
+ e5        |   33 |     33
+ fk        |   33 |     33
+ gu        |   33 |     33
+ gy        |   33 |     33
+ hb        |   33 |     33
+ hm        |   33 |     33
+ hy        |   33 |     33
+ jl        |   33 |     33
+ jr        |   33 |     33
+ ls        |   33 |     33
+ oq        |   33 |     33
+ pt        |   33 |     33
+ sa        |   33 |     33
+ sh        |   33 |     33
+ sj        |   33 |     33
+ so        |   33 |     33
+ sz        |   33 |     33
+ t7        |   33 |     33
+ uw        |   33 |     33
+ w8        |   33 |     33
+ y0        |   33 |     33
+ yp        |   33 |     33
+ dh        |   32 |     32
+ dp        |   32 |     32
+ dq        |   32 |     32
+ e7        |   32 |     32
+ fn        |   32 |     32
+ fo        |   32 |     32
+ fr        |   32 |     32
+ ga        |   32 |     32
+ gq        |   32 |     32
+ hh        |   32 |     32
+ il        |   32 |     32
+ ip        |   32 |     32
+ jv        |   32 |     32
+ lc        |   32 |     32
+ ol        |   32 |     32
+ pc        |   32 |     32
+ q9        |   32 |     32
+ ds        |   31 |     31
+ e9        |   31 |     31
+ fd        |   31 |     31
+ fe        |   31 |     31
+ ft        |   31 |     31
+ gs        |   31 |     31
+ hl        |   31 |     31
+ hs        |   31 |     31
+ jb        |   31 |     31
+ kc        |   31 |     31
+ kw        |   31 |     31
+ mj        |   31 |     31
+ q2        |   31 |     31
+ r3        |   31 |     31
+ sb        |   31 |     31
+ sk        |   31 |     31
+ ts        |   31 |     31
+ ua        |   31 |     31
+ yc        |   31 |     31
+ zw        |   31 |     31
+ ao        |   30 |     30
+ du        |   30 |     30
+ fw        |   30 |     30
+ gj        |   30 |     30
+ hu        |   30 |     30
+ kh        |   30 |     30
+ kl        |   30 |     30
+ kv        |   30 |     30
+ ld        |   30 |     30
+ lf        |   30 |     30
+ pq        |   30 |     30
+ py        |   30 |     30
+ sc        |   30 |     30
+ sr        |   30 |     30
+ uy        |   30 |     30
+ vg        |   30 |     30
+ w2        |   30 |     30
+ xg        |   30 |     30
+ xo        |   30 |     30
+ au        |   29 |     29
+ cx        |   29 |     29
+ fv        |   29 |     29
+ gh        |   29 |     29
+ gl        |   29 |     29
+ gt        |   29 |     29
+ hw        |   29 |     29
+ ji        |   29 |     29
+ km        |   29 |     29
+ la        |   29 |     29
+ ou        |   29 |     29
+ r0        |   29 |     29
+ w0        |   29 |     29
+ y9        |   29 |     29
+ zm        |   29 |     29
+ zs        |   29 |     29
+ zy        |   29 |     29
+ ax        |   28 |     28
+ cd        |   28 |     28
+ dj        |   28 |     28
+ dn        |   28 |     28
+ dr        |   28 |     28
+ ht        |   28 |     28
+ jf        |   28 |     28
+ lo        |   28 |     28
+ lr        |   28 |     28
+ na        |   28 |     28
+ ng        |   28 |     28
+ r8        |   28 |     28
+ ss        |   28 |     28
+ xt        |   28 |     28
+ y6        |   28 |     28
+ aj        |   27 |     27
+ ca        |   27 |     27
+ cg        |   27 |     27
+ df        |   27 |     27
+ dg        |   27 |     27
+ dv        |   27 |     27
+ gc        |   27 |     27
+ gn        |   27 |     27
+ gr        |   27 |     27
+ hd        |   27 |     27
+ i8        |   27 |     27
+ jn        |   27 |     27
+ jt        |   27 |     27
+ lp        |   27 |     27
+ o9        |   27 |     27
+ ow        |   27 |     27
+ r9        |   27 |     27
+ t8        |   27 |     27
+ u5        |   27 |     27
+ w4        |   27 |     27
+ xm        |   27 |     27
+ zz        |   27 |     27
+ a2        |   26 |     26
+ ac        |   26 |     26
+ ai        |   26 |     26
+ cm        |   26 |     26
+ cu        |   26 |     26
+ cw        |   26 |     26
+ dk        |   26 |     26
+ e2        |   26 |     26
+ fc        |   26 |     26
+ fg        |   26 |     26
+ fl        |   26 |     26
+ fs        |   26 |     26
+ ge        |   26 |     26
+ gv        |   26 |     26
+ hc        |   26 |     26
+ hi        |   26 |     26
+ hx        |   26 |     26
+ jj        |   26 |     26
+ jm        |   26 |     26
+ kg        |   26 |     26
+ kk        |   26 |     26
+ kn        |   26 |     26
+ ko        |   26 |     26
+ kt        |   26 |     26
+ ln        |   26 |     26
+ mx        |   26 |     26
+ pg        |   26 |     26
+ r4        |   26 |     26
+ t6        |   26 |     26
+ u1        |   26 |     26
+ u4        |   26 |     26
+ vi        |   26 |     26
+ vr        |   26 |     26
+ w1        |   26 |     26
+ w9        |   26 |     26
+ xk        |   26 |     26
+ xs        |   26 |     26
+ zf        |   26 |     26
+ bb        |   25 |     25
+ dm        |   25 |     25
+ dw        |   25 |     25
+ e8        |   25 |     25
+ fb        |   25 |     25
+ gw        |   25 |     25
+ h8        |   25 |     25
+ hf        |   25 |     25
+ hg        |   25 |     25
+ hn        |   25 |     25
+ hv        |   25 |     25
+ i0        |   25 |     25
+ i3        |   25 |     25
+ jg        |   25 |     25
+ jo        |   25 |     25
+ jx        |   25 |     25
+ kq        |   25 |     25
+ lw        |   25 |     25
+ lx        |   25 |     25
+ o3        |   25 |     25
+ p7        |   25 |     25
+ pf        |   25 |     25
+ pi        |   25 |     25
+ pz        |   25 |     25
+ r2        |   25 |     25
+ r5        |   25 |     25
+ t9        |   25 |     25
+ u7        |   25 |     25
+ ve        |   25 |     25
+ vu        |   25 |     25
+ y5        |   25 |     25
+ y8        |   25 |     25
+ zt        |   25 |     25
+ an        |   24 |     24
+ bj        |   24 |     24
+ dx        |   24 |     24
+ fm        |   24 |     24
+ fz        |   24 |     24
+ gb        |   24 |     24
+ gi        |   24 |     24
+ gp        |   24 |     24
+ hr        |   24 |     24
+ hz        |   24 |     24
+ i5        |   24 |     24
+ jq        |   24 |     24
+ kb        |   24 |     24
+ ke        |   24 |     24
+ kf        |   24 |     24
+ kp        |   24 |     24
+ lv        |   24 |     24
+ lz        |   24 |     24
+ o8        |   24 |     24
+ r1        |   24 |     24
+ s7        |   24 |     24
+ sg        |   24 |     24
+ u3        |   24 |     24
+ vj        |   24 |     24
+ vt        |   24 |     24
+ w5        |   24 |     24
+ zj        |   24 |     24
+ be        |   23 |     23
+ bi        |   23 |     23
+ bn        |   23 |     23
+ cn        |   23 |     23
+ cy        |   23 |     23
+ da        |   23 |     23
+ e6        |   23 |     23
+ fa        |   23 |     23
+ js        |   23 |     23
+ ki        |   23 |     23
+ kz        |   23 |     23
+ li        |   23 |     23
+ mt        |   23 |     23
+ mz        |   23 |     23
+ nu        |   23 |     23
+ o2        |   23 |     23
+ p5        |   23 |     23
+ p8        |   23 |     23
+ r7        |   23 |     23
+ t0        |   23 |     23
+ t1        |   23 |     23
+ t3        |   23 |     23
+ vm        |   23 |     23
+ xh        |   23 |     23
+ xx        |   23 |     23
+ zp        |   23 |     23
+ zr        |   23 |     23
+ a3        |   22 |     22
+ bg        |   22 |     22
+ de        |   22 |     22
+ e3        |   22 |     22
+ fq        |   22 |     22
+ i2        |   22 |     22
+ i7        |   22 |     22
+ ja        |   22 |     22
+ jk        |   22 |     22
+ jy        |   22 |     22
+ kr        |   22 |     22
+ kx        |   22 |     22
+ ly        |   22 |     22
+ nb        |   22 |     22
+ nh        |   22 |     22
+ ns        |   22 |     22
+ s3        |   22 |     22
+ u2        |   22 |     22
+ vn        |   22 |     22
+ xe        |   22 |     22
+ y4        |   22 |     22
+ zh        |   22 |     22
+ zo        |   22 |     22
+ zq        |   22 |     22
+ a1        |   21 |     21
+ bl        |   21 |     21
+ bo        |   21 |     21
+ cb        |   21 |     21
+ ch        |   21 |     21
+ co        |   21 |     21
+ cq        |   21 |     21
+ cv        |   21 |     21
+ d7        |   21 |     21
+ g8        |   21 |     21
+ je        |   21 |     21
+ jp        |   21 |     21
+ jz        |   21 |     21
+ lg        |   21 |     21
+ me        |   21 |     21
+ nc        |   21 |     21
+ p4        |   21 |     21
+ st        |   21 |     21
+ vb        |   21 |     21
+ vw        |   21 |     21
+ vz        |   21 |     21
+ xj        |   21 |     21
+ xq        |   21 |     21
+ xu        |   21 |     21
+ xy        |   21 |     21
+ zb        |   21 |     21
+ bv        |   20 |     20
+ bz        |   20 |     20
+ cj        |   20 |     20
+ cp        |   20 |     20
+ cs        |   20 |     20
+ d8        |   20 |     20
+ ju        |   20 |     20
+ k0        |   20 |     20
+ ks        |   20 |     20
+ ky        |   20 |     20
+ l1        |   20 |     20
+ lb        |   20 |     20
+ lj        |   20 |     20
+ lu        |   20 |     20
+ nm        |   20 |     20
+ nw        |   20 |     20
+ nz        |   20 |     20
+ o7        |   20 |     20
+ p6        |   20 |     20
+ vh        |   20 |     20
+ vp        |   20 |     20
+ vs        |   20 |     20
+ xb        |   20 |     20
+ xr        |   20 |     20
+ z3        |   20 |     20
+ zv        |   20 |     20
+ bq        |   19 |     19
+ br        |   19 |     19
+ by        |   19 |     19
+ cl        |   19 |     19
+ d2        |   19 |     19
+ f1        |   19 |     19
+ f4        |   19 |     19
+ gf        |   19 |     19
+ hq        |   19 |     19
+ k9        |   19 |     19
+ ka        |   19 |     19
+ kd        |   19 |     19
+ kj        |   19 |     19
+ md        |   19 |     19
+ mi        |   19 |     19
+ ml        |   19 |     19
+ my        |   19 |     19
+ nj        |   19 |     19
+ ny        |   19 |     19
+ o1        |   19 |     19
+ s4        |   19 |     19
+ s8        |   19 |     19
+ t5        |   19 |     19
+ u0        |   19 |     19
+ xl        |   19 |     19
+ zg        |   19 |     19
+ zi        |   19 |     19
+ a5        |   18 |     18
+ b9        |   18 |     18
+ bh        |   18 |     18
+ bx        |   18 |     18
+ d3        |   18 |     18
+ fy        |   18 |     18
+ g2        |   18 |     18
+ i4        |   18 |     18
+ i6        |   18 |     18
+ i9        |   18 |     18
+ jw        |   18 |     18
+ lk        |   18 |     18
+ mb        |   18 |     18
+ mv        |   18 |     18
+ nd        |   18 |     18
+ nr        |   18 |     18
+ nt        |   18 |     18
+ t2        |   18 |     18
+ xf        |   18 |     18
+ xv        |   18 |     18
+ zc        |   18 |     18
+ zd        |   18 |     18
+ a7        |   17 |     17
+ bc        |   17 |     17
+ bd        |   17 |     17
+ ce        |   17 |     17
+ cf        |   17 |     17
+ cr        |   17 |     17
+ g9        |   17 |     17
+ j0        |   17 |     17
+ j5        |   17 |     17
+ mp        |   17 |     17
+ mr        |   17 |     17
+ mw        |   17 |     17
+ nk        |   17 |     17
+ no        |   17 |     17
+ o0        |   17 |     17
+ o4        |   17 |     17
+ s0        |   17 |     17
+ s1        |   17 |     17
+ t4        |   17 |     17
+ u9        |   17 |     17
+ vf        |   17 |     17
+ vx        |   17 |     17
+ x3        |   17 |     17
+ xi        |   17 |     17
+ xn        |   17 |     17
+ xz        |   17 |     17
+ zl        |   17 |     17
+ zn        |   17 |     17
+ a0        |   16 |     16
+ bu        |   16 |     16
+ bw        |   16 |     16
+ ci        |   16 |     16
+ ck        |   16 |     16
+ d0        |   16 |     16
+ d4        |   16 |     16
+ d6        |   16 |     16
+ f5        |   16 |     16
+ g1        |   16 |     16
+ gz        |   16 |     16
+ h4        |   16 |     16
+ jh        |   16 |     16
+ l4        |   16 |     16
+ lt        |   16 |     16
+ mg        |   16 |     16
+ mh        |   16 |     16
+ mo        |   16 |     16
+ ni        |   16 |     16
+ nl        |   16 |     16
+ nq        |   16 |     16
+ p2        |   16 |     16
+ u8        |   16 |     16
+ v9        |   16 |     16
+ vl        |   16 |     16
+ vo        |   16 |     16
+ xp        |   16 |     16
+ y3        |   16 |     16
+ y7        |   16 |     16
+ z7        |   16 |     16
+ za        |   16 |     16
+ zx        |   16 |     16
+ bf        |   15 |     15
+ bp        |   15 |     15
+ cc        |   15 |     15
+ g0        |   15 |     15
+ j2        |   15 |     15
+ j9        |   15 |     15
+ l6        |   15 |     15
+ le        |   15 |     15
+ ll        |   15 |     15
+ m8        |   15 |     15
+ ma        |   15 |     15
+ mu        |   15 |     15
+ nf        |   15 |     15
+ r6        |   15 |     15
+ s5        |   15 |     15
+ vd        |   15 |     15
+ vk        |   15 |     15
+ xa        |   15 |     15
+ xw        |   15 |     15
+ y2        |   15 |     15
+ z8        |   15 |     15
+ ze        |   15 |     15
+ zu        |   15 |     15
+ a6        |   14 |     14
+ bk        |   14 |     14
+ bt        |   14 |     14
+ c0        |   14 |     14
+ f8        |   14 |     14
+ g3        |   14 |     14
+ g4        |   14 |     14
+ g7        |   14 |     14
+ h6        |   14 |     14
+ h7        |   14 |     14
+ h9        |   14 |     14
+ i1        |   14 |     14
+ k1        |   14 |     14
+ k2        |   14 |     14
+ k6        |   14 |     14
+ k7        |   14 |     14
+ mc        |   14 |     14
+ nn        |   14 |     14
+ p9        |   14 |     14
+ u6        |   14 |     14
+ xd        |   14 |     14
+ z6        |   14 |     14
+ zk        |   14 |     14
+ a4        |   13 |     13
+ a9        |   13 |     13
+ bm        |   13 |     13
+ cz        |   13 |     13
+ f2        |   13 |     13
+ f3        |   13 |     13
+ f6        |   13 |     13
+ g6        |   13 |     13
+ h2        |   13 |     13
+ j1        |   13 |     13
+ k5        |   13 |     13
+ m1        |   13 |     13
+ mf        |   13 |     13
+ mq        |   13 |     13
+ np        |   13 |     13
+ nx        |   13 |     13
+ o5        |   13 |     13
+ p0        |   13 |     13
+ p1        |   13 |     13
+ s6        |   13 |     13
+ s9        |   13 |     13
+ v6        |   13 |     13
+ va        |   13 |     13
+ vc        |   13 |     13
+ xc        |   13 |     13
+ z0        |   13 |     13
+ c9        |   12 |     12
+ d1        |   12 |     12
+ h0        |   12 |     12
+ h1        |   12 |     12
+ j8        |   12 |     12
+ k4        |   12 |     12
+ l5        |   12 |     12
+ l9        |   12 |     12
+ m2        |   12 |     12
+ m6        |   12 |     12
+ m9        |   12 |     12
+ n7        |   12 |     12
+ nv        |   12 |     12
+ p3        |   12 |     12
+ vq        |   12 |     12
+ vy        |   12 |     12
+ x1        |   12 |     12
+ x2        |   12 |     12
+ z5        |   12 |     12
+ c1        |   11 |     11
+ c3        |   11 |     11
+ ct        |   11 |     11
+ f9        |   11 |     11
+ g5        |   11 |     11
+ j6        |   11 |     11
+ l8        |   11 |     11
+ n1        |   11 |     11
+ v7        |   11 |     11
+ vv        |   11 |     11
+ x5        |   11 |     11
+ x8        |   11 |     11
+ z2        |   11 |     11
+ b0        |   10 |     10
+ b2        |   10 |     10
+ b8        |   10 |     10
+ c6        |   10 |     10
+ f0        |   10 |     10
+ f7        |   10 |     10
+ h5        |   10 |     10
+ j3        |   10 |     10
+ j4        |   10 |     10
+ j7        |   10 |     10
+ l7        |   10 |     10
+ m0        |   10 |     10
+ m7        |   10 |     10
+ mm        |   10 |     10
+ mn        |   10 |     10
+ n8        |   10 |     10
+ v1        |   10 |     10
+ x0        |   10 |     10
+ x6        |   10 |     10
+ x7        |   10 |     10
+ x9        |   10 |     10
+ a8        |    9 |      9
+ b1        |    9 |      9
+ b4        |    9 |      9
+ b5        |    9 |      9
+ b6        |    9 |      9
+ ba        |    9 |      9
+ bs        |    9 |      9
+ c5        |    9 |      9
+ d5        |    9 |      9
+ k8        |    9 |      9
+ l0        |    9 |      9
+ m5        |    9 |      9
+ mk        |    9 |      9
+ ms        |    9 |      9
+ n3        |    9 |      9
+ n4        |    9 |      9
+ n6        |    9 |      9
+ ne        |    9 |      9
+ v0        |    9 |      9
+ v3        |    9 |      9
+ v5        |    9 |      9
+ v8        |    9 |      9
+ b3        |    8 |      8
+ b7        |    8 |      8
+ c2        |    8 |      8
+ c7        |    8 |      8
+ c8        |    8 |      8
+ d9        |    8 |      8
+ k3        |    8 |      8
+ l3        |    8 |      8
+ m3        |    8 |      8
+ m4        |    8 |      8
+ n0        |    8 |      8
+ n5        |    8 |      8
+ v4        |    8 |      8
+ x4        |    8 |      8
+ z1        |    8 |      8
+ z9        |    8 |      8
+ l2        |    7 |      7
+ s2        |    7 |      7
+ z4        |    7 |      7
+ 1l        |    6 |      6
+ 1o        |    6 |      6
+ 1t        |    6 |      6
+ 2e        |    6 |      6
+ 2o        |    6 |      6
+ c4        |    6 |      6
+ h3        |    6 |      6
+ n2        |    6 |      6
+ n9        |    6 |      6
+ v2        |    6 |      6
+ 2l        |    5 |      5
+ 2u        |    5 |      5
+ 3k        |    5 |      5
+ 4p        |    5 |      5
+ 18        |    4 |      4
+ 1a        |    4 |      4
+ 1i        |    4 |      4
+ 2s        |    4 |      4
+ 3q        |    4 |      4
+ 3y        |    4 |      4
+ 5y        |    4 |      4
+ 1f        |    3 |      3
+ 1h        |    3 |      3
+ 1m        |    3 |      3
+ 1p        |    3 |      3
+ 1s        |    3 |      3
+ 1v        |    3 |      3
+ 1x        |    3 |      3
+ 27        |    3 |      3
+ 2a        |    3 |      3
+ 2b        |    3 |      3
+ 2h        |    3 |      3
+ 2n        |    3 |      3
+ 2p        |    3 |      3
+ 2v        |    3 |      3
+ 2y        |    3 |      3
+ 3d        |    3 |      3
+ 3w        |    3 |      3
+ 3z        |    3 |      3
+ 4a        |    3 |      3
+ 4d        |    3 |      3
+ 4v        |    3 |      3
+ 4z        |    3 |      3
+ 5e        |    3 |      3
+ 5i        |    3 |      3
+ 5k        |    3 |      3
+ 5o        |    3 |      3
+ 5t        |    3 |      3
+ 6b        |    3 |      3
+ 6d        |    3 |      3
+ 6o        |    3 |      3
+ 6w        |    3 |      3
+ 7a        |    3 |      3
+ 7h        |    3 |      3
+ 7r        |    3 |      3
+ 93        |    3 |      3
+ 10        |    2 |      2
+ 12        |    2 |      2
+ 15        |    2 |      2
+ 16        |    2 |      2
+ 19        |    2 |      2
+ 1b        |    2 |      2
+ 1d        |    2 |      2
+ 1g        |    2 |      2
+ 1j        |    2 |      2
+ 1n        |    2 |      2
+ 1r        |    2 |      2
+ 1u        |    2 |      2
+ 1w        |    2 |      2
+ 1y        |    2 |      2
+ 20        |    2 |      2
+ 25        |    2 |      2
+ 2d        |    2 |      2
+ 2i        |    2 |      2
+ 2j        |    2 |      2
+ 2k        |    2 |      2
+ 2q        |    2 |      2
+ 2r        |    2 |      2
+ 2t        |    2 |      2
+ 2w        |    2 |      2
+ 2z        |    2 |      2
+ 3b        |    2 |      2
+ 3f        |    2 |      2
+ 3h        |    2 |      2
+ 3o        |    2 |      2
+ 3p        |    2 |      2
+ 3r        |    2 |      2
+ 3s        |    2 |      2
+ 3v        |    2 |      2
+ 42        |    2 |      2
+ 43        |    2 |      2
+ 4f        |    2 |      2
+ 4g        |    2 |      2
+ 4h        |    2 |      2
+ 4j        |    2 |      2
+ 4m        |    2 |      2
+ 4r        |    2 |      2
+ 4s        |    2 |      2
+ 4t        |    2 |      2
+ 4u        |    2 |      2
+ 5c        |    2 |      2
+ 5f        |    2 |      2
+ 5h        |    2 |      2
+ 5p        |    2 |      2
+ 5q        |    2 |      2
+ 5z        |    2 |      2
+ 6a        |    2 |      2
+ 6h        |    2 |      2
+ 6q        |    2 |      2
+ 6r        |    2 |      2
+ 6t        |    2 |      2
+ 6y        |    2 |      2
+ 70        |    2 |      2
+ 7c        |    2 |      2
+ 7g        |    2 |      2
+ 7k        |    2 |      2
+ 7o        |    2 |      2
+ 7u        |    2 |      2
+ 8j        |    2 |      2
+ 8w        |    2 |      2
+ 9f        |    2 |      2
+ 9y        |    2 |      2
+ copyright |    2 |      2
+ foo       |    1 |      3
+ bar       |    1 |      2
+ 0e        |    1 |      1
+ 0h        |    1 |      1
+ 0p        |    1 |      1
+ 0w        |    1 |      1
+ 0z        |    1 |      1
+ 11        |    1 |      1
+ 13        |    1 |      1
+ 14        |    1 |      1
+ 17        |    1 |      1
+ 1k        |    1 |      1
+ 1q        |    1 |      1
+ 1z        |    1 |      1
+ 24        |    1 |      1
+ 26        |    1 |      1
+ 28        |    1 |      1
+ 2f        |    1 |      1
+ 30        |    1 |      1
+ 345       |    1 |      1
+ 37        |    1 |      1
+ 39        |    1 |      1
+ 3a        |    1 |      1
+ 3e        |    1 |      1
+ 3g        |    1 |      1
+ 3i        |    1 |      1
+ 3m        |    1 |      1
+ 3t        |    1 |      1
+ 3u        |    1 |      1
+ 40        |    1 |      1
+ 41        |    1 |      1
+ 44        |    1 |      1
+ 45        |    1 |      1
+ 48        |    1 |      1
+ 4b        |    1 |      1
+ 4c        |    1 |      1
+ 4i        |    1 |      1
+ 4k        |    1 |      1
+ 4n        |    1 |      1
+ 4o        |    1 |      1
+ 4q        |    1 |      1
+ 4w        |    1 |      1
+ 4y        |    1 |      1
+ 51        |    1 |      1
+ 55        |    1 |      1
+ 56        |    1 |      1
+ 5a        |    1 |      1
+ 5d        |    1 |      1
+ 5g        |    1 |      1
+ 5j        |    1 |      1
+ 5l        |    1 |      1
+ 5s        |    1 |      1
+ 5u        |    1 |      1
+ 5x        |    1 |      1
+ 64        |    1 |      1
+ 68        |    1 |      1
+ 6c        |    1 |      1
+ 6f        |    1 |      1
+ 6g        |    1 |      1
+ 6i        |    1 |      1
+ 6k        |    1 |      1
+ 6n        |    1 |      1
+ 6p        |    1 |      1
+ 6s        |    1 |      1
+ 6u        |    1 |      1
+ 6x        |    1 |      1
+ 72        |    1 |      1
+ 7f        |    1 |      1
+ 7j        |    1 |      1
+ 7n        |    1 |      1
+ 7p        |    1 |      1
+ 7w        |    1 |      1
+ 7y        |    1 |      1
+ 7z        |    1 |      1
+ 80        |    1 |      1
+ 82        |    1 |      1
+ 85        |    1 |      1
+ 8d        |    1 |      1
+ 8i        |    1 |      1
+ 8l        |    1 |      1
+ 8n        |    1 |      1
+ 8p        |    1 |      1
+ 8t        |    1 |      1
+ 8x        |    1 |      1
+ 95        |    1 |      1
+ 97        |    1 |      1
+ 9a        |    1 |      1
+ 9e        |    1 |      1
+ 9h        |    1 |      1
+ 9r        |    1 |      1
+ 9w        |    1 |      1
+ qwerti    |    1 |      1
+(1146 rows)
+
+select reset_tsearch();
+NOTICE:  TSearch cache cleaned
+ reset_tsearch 
+---------------
+ 
+(1 row)
+
+select to_tsquery('default', 'skies & books');
+   to_tsquery   
+----------------
+ 'sky' & 'book'
+(1 row)
+
+select rank_cd(to_tsvector('Erosion It took the sea a thousand years,
+A thousand years to trace
+The granite features of this cliff
+In crag and scarp and base.
+It took the sea an hour one night
+An hour of storm to place
+The sculpture of these granite seams,
+Upon a woman s face. E.  J.  Pratt  (1882 1964)
+'), to_tsquery('sea&thousand&years'));
+ rank_cd 
+---------
+     1.2
+(1 row)
+
+select rank_cd(to_tsvector('Erosion It took the sea a thousand years,
+A thousand years to trace
+The granite features of this cliff
+In crag and scarp and base.
+It took the sea an hour one night
+An hour of storm to place
+The sculpture of these granite seams,
+Upon a woman s face. E.  J.  Pratt  (1882 1964)
+'), to_tsquery('granite&sea'));
+ rank_cd  
+----------
+ 0.880303
+(1 row)
+
+select rank_cd(to_tsvector('Erosion It took the sea a thousand years,
+A thousand years to trace
+The granite features of this cliff
+In crag and scarp and base.
+It took the sea an hour one night
+An hour of storm to place
+The sculpture of these granite seams,
+Upon a woman s face. E.  J.  Pratt  (1882 1964)
+'), to_tsquery('sea'));
+ rank_cd 
+---------
+       2
+(1 row)
+
+select get_covers(to_tsvector('Erosion It took the sea a thousand years,
+A thousand years to trace
+The granite features of this cliff
+In crag and scarp and base.
+It took the sea an hour one night
+An hour of storm to place
+The sculpture of these granite seams,
+Upon a woman s face. E.  J.  Pratt  (1882 1964)
+'), to_tsquery('sea&thousand&years'));
+                                                                                             get_covers                                                                                             
+----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
+ eros took {1 sea thousand year }1 {2 thousand year trace granit featur cliff crag scarp base took sea }2 hour one night hour storm place sculptur granit seam upon woman face e j pratt 1882 1964 
+(1 row)
+
+select get_covers(to_tsvector('Erosion It took the sea a thousand years,
+A thousand years to trace
+The granite features of this cliff
+In crag and scarp and base.
+It took the sea an hour one night
+An hour of storm to place
+The sculpture of these granite seams,
+Upon a woman s face. E.  J.  Pratt  (1882 1964)
+'), to_tsquery('granite&sea'));
+                                                                                                get_covers                                                                                                
+----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
+ eros took {1 sea thousand year thousand year trace {2 granit }1 featur cliff crag scarp base took {3 sea }2 hour one night hour storm place sculptur granit }3 seam upon woman face e j pratt 1882 1964 
+(1 row)
+
+select get_covers(to_tsvector('Erosion It took the sea a thousand years,
+A thousand years to trace
+The granite features of this cliff
+In crag and scarp and base.
+It took the sea an hour one night
+An hour of storm to place
+The sculpture of these granite seams,
+Upon a woman s face. E.  J.  Pratt  (1882 1964)
+'), to_tsquery('sea'));
+                                                                                             get_covers                                                                                             
+----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
+ eros took {1 sea }1 thousand year thousand year trace granit featur cliff crag scarp base took {2 sea }2 hour one night hour storm place sculptur granit seam upon woman face e j pratt 1882 1964 
+(1 row)
+
+select headline('Erosion It took the sea a thousand years,
+A thousand years to trace
+The granite features of this cliff
+In crag and scarp and base.
+It took the sea an hour one night
+An hour of storm to place
+The sculpture of these granite seams,
+Upon a woman s face. E.  J.  Pratt  (1882 1964)
+', to_tsquery('sea&thousand&years'));
+                                                       headline                                                        
+-----------------------------------------------------------------------------------------------------------------------
+ sea a thousand years,
+A thousand years to trace
+The granite features of this cliff
+(1 row)
+
+ 
+select headline('Erosion It took the sea a thousand years,
+A thousand years to trace
+The granite features of this cliff
+In crag and scarp and base.
+It took the sea an hour one night
+An hour of storm to place
+The sculpture of these granite seams,
+Upon a woman s face. E.  J.  Pratt  (1882 1964)
+', to_tsquery('granite&sea'));
+                                           headline                                           
+----------------------------------------------------------------------------------------------
+ sea an hour one night
+An hour of storm to place
+The sculpture of these granite
+(1 row)
+
+ 
+select headline('Erosion It took the sea a thousand years,
+A thousand years to trace
+The granite features of this cliff
+In crag and scarp and base.
+It took the sea an hour one night
+An hour of storm to place
+The sculpture of these granite seams,
+Upon a woman s face. E.  J.  Pratt  (1882 1964)
+', to_tsquery('sea'));
+                                         headline                                          
+-------------------------------------------------------------------------------------------
+ sea a thousand years,
+A thousand years to trace
+The granite features of this cliff
+(1 row)
+
diff --git a/contrib/tsearch2/gendict/Makefile.IN b/contrib/tsearch2/gendict/Makefile.IN

new file mode 100644 (file)

index 0000000..c13e496
--- /dev/null
+++ b/contrib/tsearch2/gendict/Makefile.IN
@@ -0,0 +1,12 @@
+subdir = contrib/CFG_DIR
+top_builddir = ../..
+include $(top_builddir)/src/Makefile.global
+
+MODULE_big = dict_CFG_MODNAME
+OBJS = CFG_OFILE
+DATA_built = dict_CFG_MODNAME.sql
+DOCS = README.CFG_MODNAME
+PG_CPPFLAGS =
+SHLIB_LINK = ../tsearch2/libtsearch2.a
+
+include $(top_srcdir)/contrib/contrib-global.mk
diff --git a/contrib/tsearch2/gendict/README.gendict b/contrib/tsearch2/gendict/README.gendict

new file mode 100644 (file)

index 0000000..e91f1b7
--- /dev/null
+++ b/contrib/tsearch2/gendict/README.gendict
@@ -0,0 +1,130 @@
+Gendict - generate dictionary templates for contrib/tsearch2 module.
+
+This utility aims to help people creating dictionary for contrib/tsearch v2
+module. Particularly, it has built-in support for snowball stemmers.
+
+Programming API to tsearch2 dictionaries is described in tsearch v2 
+documentation.
+
+
+Prerequisities:
+
+* PostgreSQL 7.3 and above.
+
+* You need tsearch2 module sources already compiled
+
+* Rights to install contrib modules
+
+Usage:
+
+    run config.sh without parameters to see options and arguments
+
+Usage:
+./config.sh -n DICTNAME ( [ -s [ -p PREFIX ] ] | [ -c CFILES ] [ -h HFILES ] [ -i ] ) [ -v ] [ -d DIR ] [ -C COMMENT ]
+    -v - be verbose
+    -d DIR - name of directory in PGSQL_SRC/contrib (default dict_DICTNAME)
+    -C COMMENT - dictionary comment
+Generate Snowball stemmer:
+./config.sh -n DICTNAME -s [ -p PREFIX ] [ -v ] [ -d DIR ] [ -C COMMENT ]
+    -s - generate Snowball wrapper
+    -p - prefix of Snowball's function, (default DICTNAME)
+Generate template dictionary:
+./config.sh -n DICTNAME [ -c CFILES ] [ -h HFILES ] [ -i ] [ -v ] [ -d DIR ] [ -C COMMENT ]
+    -c CFILES - source files, must be placed in contrib/tsearch2/gendict directory.
+                These files will be used in Makefile.
+    -h HFILES - header files, must be placed in contrib/tsearch2/gendict directory.
+                These files will be used in Makefile and subinclude.h
+    -i - dictionary has init method
+
+
+Example 1:
+
+   Create Portuguese stemmer
+ 
+   0. cd PGSQL_SRC/contrib/tsearch2/gendict
+
+   1. Obtain stem.{c,h} files for Portuguese
+
+      wget http://snowball.tartarus.org/portuguese/stem.c
+      wget http://snowball.tartarus.org/portuguese/stem.h
+   
+   2. Create template files for Portuguese
+
+      ./config.sh -n pt -s -p portuguese -v -C'Snowball stemmer for Portuguese'
+
+      Note, that argument for -p option should be *the same* as name of stemming
+      function in stem.c (without _stem)
+
+      A bunch of files will be generated and placed in PGSQL_SRC/contrib/dict_pt
+      directory.
+
+   3. Compile and install dictionary
+
+   cd PGSQL_SRC/contrib/dict_pt
+   make
+   make install
+
+   4. Test it 
+
+   Sample portuguese words with the stemmed forms are available
+        from http://snowball.tartarus.org/portuguese/stemmer.html
+
+   createdb testdict
+   psql testdict < /usr/local/pgsql/share/contrib/tsearch2.sql
+   psql testdict < /usr/local/pgsql/share/contrib/dict_pt.sql
+   psql -d testdict -c "select lexize('pt','bobagem');"
+    lexize  
+   ---------
+    {bobag}
+   (1 row)
+
+   Here is what I have in pg_ts_dict table
+
+   psql -d testdict -c "select * from pg_ts_dict where dict_name='pt';"
+    dict_name | dict_init | dict_initoption | dict_lexize |          dict_comment           
+   -----------+-----------+-----------------+-------------+---------------------------------
+    pt        |   7177806 |                 |     7159330 | Snowball stemmer for Portuguese
+   (1 row)
+
+ 
+        Note, that you have already installed dictionary and corresponding
+   entry in tsearch configuration and you may modify it using
+   plain SQL commands, for example, specify stop words.
+
+Example 2:
+
+      a) Simple template dictionary with init method 
+
+       ./config.sh -n wow -v -i -C WOW
+
+      b) Create simple template dict (without init method):
+   ./config.sh -n wow -v  -C WOW
+
+        The same as above, but dictionary will have not init method
+
+       Dictionaries obtained in a) and b) are fully working and ready
+       for use: 
+     a) lowercase input word and remove it if it is a stop word
+     b) recognizes any word
+
+      c) Simple template dictionary with source files (with init method):
+
+       ./config.sh -n wow -v -i -c a.c -h a.h -C WOW
+
+        Source files ( a.c ) must be placed in contrib/tsearch2/gendict directory.
+        These files will be used in Makefile.
+
+        Header files ( a.h ), must be placed in contrib/tsearch2/gendict directory.
+        These files will be used in Makefile and subinclude.h
+
+      d) Simple template dictionary with source files (without init method):
+
+   ./config.sh -n wow -v  -c a.c -h a.h -C WOW
+
+   The same as above, but dictionary will have not init method
+
+       After that you have sources in PGSQL_SRC/contrib/dict_wow and
+       you may edit them to create actual dictionary.
+
+  Please, check Tsearch2 home page (http://www.sai.msu.su/~megera/postgres/gist/tsearch/V2/)
+  for additional information about "Gendict tutorial" and dictionaries.
+\ No newline at end of file
diff --git a/contrib/tsearch2/gendict/config.sh b/contrib/tsearch2/gendict/config.sh

new file mode 100755 (executable)

index 0000000..26bb542
--- /dev/null
+++ b/contrib/tsearch2/gendict/config.sh
@@ -0,0 +1,183 @@
+#!/bin/sh
+
+usage () {
+   echo Usage:
+   echo $0 -n DICTNAME  \( [ -s [ -p PREFIX ] ] \| [ -c CFILES ] [ -h HFILES ] [ -i ] \) [ -v ] [ -d DIR ] [ -C COMMENT ]
+   echo '    -v - be verbose'
+   echo '    -d DIR - name of directory in PGSQL_SRL/contrib (default dict_DICTNAME)'
+   echo '    -C COMMENT - dictionary comment' 
+   echo Generate Snowball stemmer:
+   echo $0 -n DICTNAME -s [ -p PREFIX ] [ -v ] [ -d DIR ] [ -C COMMENT ]
+   echo '    -s - generate Snowball wrapper'
+   echo "    -p - prefix of Snowball's function, (default DICTNAME)" 
+   echo Generate template dictionary:
+   echo $0 -n DICTNAME [ -c CFILES ] [ -h HFILES ] [ -i ] [ -v ] [ -d DIR ] [ -C COMMENT ]
+   echo '    -c CFILES - source files, must be placed in contrib/tsearch2/gendict directory.'
+   echo '                These files will be used in Makefile.'
+   echo '    -h HFILES - header files, must be placed in contrib/tsearch2/gendict directory.'
+   echo '                These files will be used in Makefile and subinclude.h'
+   echo '    -i - dictionary has init method'
+   exit 1;
+}
+
+dictname=
+stemmode=no
+verbose=no
+cfile=
+hfile=
+dir= 
+hasinit=no
+comment=
+prefix=
+
+while getopts n:c:C:h:d:p:vis opt
+do
+   case "$opt" in
+       v) verbose=yes;;
+       s) stemmode=yes;;
+       i) hasinit=yes;;
+       n) dictname="$OPTARG";;
+       c) cfile="$OPTARG";;
+       h) hfile="$OPTARG";;
+       d) dir="$OPTARG";;
+       C) comment="$OPTARG";;
+       p) prefix="$OPTARG";;
+       \?) usage;;
+   esac
+done
+
+[ ${#dictname} -eq 0 ] && usage
+
+dictname=`echo $dictname | tr '[:upper:]' '[:lower:]'`
+
+if [ $stemmode = "yes" ] ; then 
+   [ ${#prefix} -eq 0 ] && prefix=$dictname
+   hasinit=yes
+   cfile="stem.c"
+   hfile="stem.h"
+fi 
+
+[ ${#dir}   -eq 0 ] && dir="dict_$dictname"
+
+if [ ${#comment} -eq 0 ]; then
+   comment=null
+else
+   comment="'$comment'"
+fi
+
+ofile=
+for f in $cfile
+do
+   f=` echo $f | sed 's#c$#o#'`
+   ofile="$ofile $f"
+done
+
+if [ $stemmode = "yes" ] ; then
+   ofile="$ofile dict_snowball.o"
+else
+   ofile="$ofile dict_tmpl.o"
+fi
+
+if [ $verbose = "yes" ]; then
+   echo Dictname: "'"$dictname"'"
+   echo Snowball stemmer: $stemmode
+   echo Has init method: $hasinit
+   [ $stemmode = "yes" ] && echo Function prefix: $prefix 
+   echo Source files: $cfile
+   echo Header files: $hfile
+   echo Object files: $ofile
+   echo Comment: $comment
+   echo Directory: ../../$dir
+fi
+
+
+[ $verbose = "yes" ] && echo -n 'Build directory...  '
+if [ ! -d ../../$dir ]; then
+   if ! mkdir ../../$dir ; then 
+       echo "Can't create directory ../../$dir"
+       exit 1
+   fi 
+fi
+[ $verbose = "yes" ] && echo ok
+
+
+[ $verbose = "yes" ] && echo -n 'Build Makefile...  '
+sed s#CFG_DIR#$dir# < Makefile.IN | sed s#CFG_MODNAME#$dictname# | sed "s#CFG_OFILE#$ofile#" > ../../$dir/Makefile.tmp
+if [ $stemmode = "yes" ] ; then
+   sed "s#^PG_CPPFLAGS.*\$#PG_CPPFLAGS = -I../tsearch2/snowball -I../tsearch2#" < ../../$dir/Makefile.tmp >  ../../$dir/Makefile 
+else
+   sed "s#^PG_CPPFLAGS.*\$#PG_CPPFLAGS = -I../tsearch2#" < ../../$dir/Makefile.tmp >  ../../$dir/Makefile 
+fi
+rm ../../$dir/Makefile.tmp
+[ $verbose = "yes" ] && echo ok
+
+
+[ $verbose = "yes" ] && echo -n Build dict_$dictname'.sql.in...  '
+if [ $hasinit = "yes" ]; then
+   sed s#CFG_MODNAME#$dictname# < sql.IN | sed "s#CFG_COMMENT#$comment#" | sed s#^HASINIT## | sed 's#^NOINIT.*$##' > ../../$dir/dict_$dictname.sql.in.tmp
+   if [ $stemmode = "yes" ] ; then
+       sed s#^ISSNOWBALL## < ../../$dir/dict_$dictname.sql.in.tmp | sed s#^NOSNOWBALL.*\$## > ../../$dir/dict_$dictname.sql.in
+   else
+       sed s#^NOSNOWBALL## < ../../$dir/dict_$dictname.sql.in.tmp | sed s#^ISSNOWBALL.*\$## > ../../$dir/dict_$dictname.sql.in
+   fi
+   rm ../../$dir/dict_$dictname.sql.in.tmp 
+else 
+   sed s#CFG_MODNAME#$dictname# < sql.IN | sed "s#CFG_COMMENT#$comment#" | sed s#^NOINIT## | sed 's#^HASINIT.*$##' | sed s#^NOSNOWBALL## | sed s#^ISSNOWBALL.*\$## > ../../$dir/dict_$dictname.sql.in
+fi
+[ $verbose = "yes" ] && echo ok
+
+
+
+if [ ${#cfile} -ne 0 ] || [ ${#hfile} -ne 0 ] ; then
+   [ $verbose = "yes" ] && echo -n 'Copy source and header files...  '
+   if [ ${#cfile} -ne 0 ] ; then
+       if ! cp $cfile ../../$dir ; then 
+           echo "Cant cp all or one of files: $cfile"
+           exit 1
+       fi
+   fi
+   if [ ${#hfile} -ne 0 ] ; then 
+       if ! cp $hfile ../../$dir ; then 
+               echo "Cant cp all or one of files: $hfile"
+           exit 1
+       fi
+   fi
+   [ $verbose = "yes" ] && echo ok
+fi
+
+
+[ $verbose = "yes" ] && echo -n 'Build sub-include header...  '
+echo -n > ../../$dir/subinclude.h 
+for i in $hfile
+do
+   echo "#include \"$i\"" >> ../../$dir/subinclude.h
+done
+[ $verbose = "yes" ] && echo ok
+
+
+if  [ $stemmode = "yes" ] ; then 
+   [ $verbose = "yes" ] && echo -n 'Build Snowball stemmer...  '
+   sed s#CFG_MODNAME#$dictname#g < dict_snowball.c.IN | sed s#CFG_PREFIX#$prefix#g > ../../$dir/dict_snowball.c
+else
+   [ $verbose = "yes" ] && echo -n 'Build dictinonary...  '
+   sed s#CFG_MODNAME#$dictname#g < dict_tmpl.c.IN > ../../$dir/dict_tmpl.c.tmp
+   if [ $hasinit = "yes" ]; then
+       sed s#^HASINIT## <  ../../$dir/dict_tmpl.c.tmp | sed 's#^NOINIT.*$##' > ../../$dir/dict_tmpl.c
+   else 
+       sed s#^HASINIT.*\$## <  ../../$dir/dict_tmpl.c.tmp | sed 's#^NOINIT##' > ../../$dir/dict_tmpl.c
+   fi
+   rm ../../$dir/dict_tmpl.c.tmp
+fi 
+[ $verbose = "yes" ] && echo ok
+
+
+[ $verbose = "yes" ] && echo -n "Build README.$dictname...  "
+if  [ $stemmode = "yes" ] ; then
+   echo "Autogenerated Snowball's wrapper for $prefix" > ../../$dir/README.$dictname
+else
+   echo "Autogenerated template for $dictname" > ../../$dir/README.$dictname
+fi
+[ $verbose = "yes" ] && echo ok
+
+echo All is done
+
diff --git a/contrib/tsearch2/gendict/dict_snowball.c.IN b/contrib/tsearch2/gendict/dict_snowball.c.IN

new file mode 100644 (file)

index 0000000..10ef6f1
--- /dev/null
+++ b/contrib/tsearch2/gendict/dict_snowball.c.IN
@@ -0,0 +1,52 @@
+/* 
+ * example of Snowball dictionary
+ * http://snowball.tartarus.org/ 
+ * Teodor Sigaev 
+ */
+#include 
+#include 
+
+#include "postgres.h"
+
+#include "dict.h"
+#include "common.h"
+#include "snowball/header.h"
+#include "subinclude.h"
+
+typedef struct {
+   struct SN_env *z;
+   StopList    stoplist;
+   int (*stem)(struct SN_env * z);
+} DictSnowball;
+
+
+PG_FUNCTION_INFO_V1(dinit_CFG_MODNAME);
+Datum dinit_CFG_MODNAME(PG_FUNCTION_ARGS);
+
+Datum 
+dinit_CFG_MODNAME(PG_FUNCTION_ARGS) {
+   DictSnowball    *d = (DictSnowball*)malloc( sizeof(DictSnowball) );
+
+   if ( !d )
+       elog(ERROR, "No memory");
+   memset(d,0,sizeof(DictSnowball));
+   d->stoplist.wordop=lowerstr;
+       
+   if ( !PG_ARGISNULL(0) && PG_GETARG_POINTER(0)!=NULL ) {
+       text       *in = PG_GETARG_TEXT_P(0);
+       readstoplist(in, &(d->stoplist));
+       sortstoplist(&(d->stoplist));
+       PG_FREE_IF_COPY(in, 0);
+   }
+
+   d->z = CFG_PREFIX_create_env();
+   if (!d->z) {
+       freestoplist(&(d->stoplist));
+       elog(ERROR,"No memory");
+   }
+   d->stem=CFG_PREFIX_stem;
+
+   PG_RETURN_POINTER(d);
+}
+
+
diff --git a/contrib/tsearch2/gendict/dict_tmpl.c.IN b/contrib/tsearch2/gendict/dict_tmpl.c.IN

new file mode 100644 (file)

index 0000000..10c0381
--- /dev/null
+++ b/contrib/tsearch2/gendict/dict_tmpl.c.IN
@@ -0,0 +1,64 @@
+/* 
+ * example of dictionary 
+ * Teodor Sigaev 
+ */
+#include 
+#include 
+#include 
+
+#include "postgres.h"
+
+#include "dict.h"
+#include "common.h"
+
+#include "subinclude.h"
+
+HASINIT typedef struct {
+HASINIT    StopList    stoplist;
+HASINIT } DictExample;
+
+
+HASINIT PG_FUNCTION_INFO_V1(dinit_CFG_MODNAME);
+HASINIT Datum dinit_CFG_MODNAME(PG_FUNCTION_ARGS);
+
+HASINIT Datum 
+HASINIT dinit_CFG_MODNAME(PG_FUNCTION_ARGS) {
+HASINIT    DictExample *d = (DictExample*)malloc( sizeof(DictExample) );
+HASINIT 
+HASINIT    if ( !d )
+HASINIT        elog(ERROR, "No memory");
+HASINIT    memset(d,0,sizeof(DictExample));
+HASINIT 
+HASINIT    d->stoplist.wordop=lowerstr;
+HASINIT    
+HASINIT    /* Your INIT code */
+HASINIT    
+HASINIT    if ( !PG_ARGISNULL(0) && PG_GETARG_POINTER(0)!=NULL ) {
+HASINIT        text       *in = PG_GETARG_TEXT_P(0);
+HASINIT        readstoplist(in, &(d->stoplist));
+HASINIT        sortstoplist(&(d->stoplist));
+HASINIT        PG_FREE_IF_COPY(in, 0);
+HASINIT    }
+HASINIT 
+HASINIT    PG_RETURN_POINTER(d);
+HASINIT }
+
+PG_FUNCTION_INFO_V1(dlexize_CFG_MODNAME);
+Datum dlexize_CFG_MODNAME(PG_FUNCTION_ARGS);
+Datum
+dlexize_CFG_MODNAME(PG_FUNCTION_ARGS) {
+HASINIT    DictExample *d = (DictExample*)PG_GETARG_POINTER(0);
+   char       *in = (char*)PG_GETARG_POINTER(1);
+   char *txt = pnstrdup(in, PG_GETARG_INT32(2));
+   char    **res=palloc(sizeof(char*)*2);
+
+   /* Your INIT dictionary code */
+HASINIT    if ( *txt=='\0' || searchstoplist(&(d->stoplist),txt) ) {
+HASINIT        pfree(txt);
+HASINIT        res[0]=NULL;
+HASINIT    } else 
+       res[0]=txt;
+   res[1]=NULL;
+
+   PG_RETURN_POINTER(res);
+}
diff --git a/contrib/tsearch2/gendict/sql.IN b/contrib/tsearch2/gendict/sql.IN

new file mode 100644 (file)

index 0000000..ff0d842
--- /dev/null
+++ b/contrib/tsearch2/gendict/sql.IN
@@ -0,0 +1,26 @@
+SET search_path = public;
+BEGIN;
+
+HASINIT create function dinit_CFG_MODNAME(text)
+HASINIT         returns internal
+HASINIT         as 'MODULE_PATHNAME'
+HASINIT         language 'C';
+
+NOSNOWBALL create function dlexize_CFG_MODNAME(internal,internal,int4)
+NOSNOWBALL        returns internal
+NOSNOWBALL        as 'MODULE_PATHNAME'
+NOSNOWBALL        language 'C'
+NOSNOWBALL        with (isstrict);
+
+insert into pg_ts_dict select
+        'CFG_MODNAME',
+HASINIT        (select oid from pg_proc where proname='dinit_CFG_MODNAME'),
+NOINIT        null,
+        null,
+ISSNOWBALL        (select oid from pg_proc where proname='snb_lexize'),
+NOSNOWBALL        (select oid from pg_proc where proname='dlexize_CFG_MODNAME'),
+        CFG_COMMENT
+;
+
+
+END;
diff --git a/contrib/tsearch2/gistidx.c b/contrib/tsearch2/gistidx.c

new file mode 100644 (file)

index 0000000..5a34f74
--- /dev/null
+++ b/contrib/tsearch2/gistidx.c
@@ -0,0 +1,686 @@
+#include "postgres.h"
+
+#include 
+
+#include "access/gist.h"
+#include "access/itup.h"
+#include "access/rtree.h"
+#include "utils/elog.h"
+#include "utils/palloc.h"
+#include "utils/array.h"
+#include "utils/builtins.h"
+#include "storage/bufpage.h"
+#include "access/tuptoaster.h"
+
+#include "tsvector.h"
+#include "query.h"
+#include "gistidx.h"
+#include "crc32.h"
+
+PG_FUNCTION_INFO_V1(gtsvector_in);
+Datum      gtsvector_in(PG_FUNCTION_ARGS);
+
+PG_FUNCTION_INFO_V1(gtsvector_out);
+Datum      gtsvector_out(PG_FUNCTION_ARGS);
+
+PG_FUNCTION_INFO_V1(gtsvector_compress);
+Datum      gtsvector_compress(PG_FUNCTION_ARGS);
+
+PG_FUNCTION_INFO_V1(gtsvector_decompress);
+Datum      gtsvector_decompress(PG_FUNCTION_ARGS);
+
+PG_FUNCTION_INFO_V1(gtsvector_consistent);
+Datum      gtsvector_consistent(PG_FUNCTION_ARGS);
+
+PG_FUNCTION_INFO_V1(gtsvector_union);
+Datum      gtsvector_union(PG_FUNCTION_ARGS);
+
+PG_FUNCTION_INFO_V1(gtsvector_same);
+Datum      gtsvector_same(PG_FUNCTION_ARGS);
+
+PG_FUNCTION_INFO_V1(gtsvector_penalty);
+Datum      gtsvector_penalty(PG_FUNCTION_ARGS);
+
+PG_FUNCTION_INFO_V1(gtsvector_picksplit);
+Datum      gtsvector_picksplit(PG_FUNCTION_ARGS);
+
+#define GETENTRY(vec,pos) ((GISTTYPE *) DatumGetPointer(((GISTENTRY *) VARDATA(vec))[(pos)].key))
+#define SUMBIT(val) (       \
+   GETBITBYTE(val,0) + \
+   GETBITBYTE(val,1) + \
+   GETBITBYTE(val,2) + \
+   GETBITBYTE(val,3) + \
+   GETBITBYTE(val,4) + \
+   GETBITBYTE(val,5) + \
+   GETBITBYTE(val,6) + \
+   GETBITBYTE(val,7)   \
+)
+
+
+Datum
+gtsvector_in(PG_FUNCTION_ARGS)
+{
+   elog(ERROR, "Not implemented");
+   PG_RETURN_DATUM(0);
+}
+
+Datum
+gtsvector_out(PG_FUNCTION_ARGS)
+{
+   elog(ERROR, "Not implemented");
+   PG_RETURN_DATUM(0);
+}
+
+static int
+compareint(const void *a, const void *b)
+{
+   if (*((int4 *) a) == *((int4 *) b))
+       return 0;
+   return (*((int4 *) a) > *((int4 *) b)) ? 1 : -1;
+}
+
+static int
+uniqueint(int4 *a, int4 l)
+{
+   int4       *ptr,
+              *res;
+
+   if (l == 1)
+       return l;
+
+   ptr = res = a;
+
+   qsort((void *) a, l, sizeof(int4), compareint);
+
+   while (ptr - a < l)
+       if (*ptr != *res)
+           *(++res) = *ptr++;
+       else
+           ptr++;
+   return res + 1 - a;
+}
+
+static void
+makesign(BITVECP sign, GISTTYPE * a)
+{
+   int4        k,
+               len = ARRNELEM(a);
+   int4       *ptr = GETARR(a);
+
+   MemSet((void *) sign, 0, sizeof(BITVEC));
+   for (k = 0; k < len; k++)
+       HASH(sign, ptr[k]);
+}
+
+Datum
+gtsvector_compress(PG_FUNCTION_ARGS)
+{
+   GISTENTRY  *entry = (GISTENTRY *) PG_GETARG_POINTER(0);
+   GISTENTRY  *retval = entry;
+
+   if (entry->leafkey)
+   {                           /* tsvector */
+       GISTTYPE   *res;
+       tsvector       *toastedval = (tsvector *) DatumGetPointer(entry->key);
+       tsvector       *val = (tsvector *) DatumGetPointer(PG_DETOAST_DATUM(entry->key));
+       int4        len;
+       int4       *arr;
+       WordEntry  *ptr = ARRPTR(val);
+       char       *words = STRPTR(val);
+
+       len = CALCGTSIZE(ARRKEY, val->size);
+       res = (GISTTYPE *) palloc(len);
+       res->len = len;
+       res->flag = ARRKEY;
+       arr = GETARR(res);
+       len = val->size;
+       while (len--)
+       {
+           *arr = crc32_sz((uint8 *) &words[ptr->pos], ptr->len);
+           arr++;
+           ptr++;
+       }
+
+       len = uniqueint(GETARR(res), val->size);
+       if (len != val->size)
+       {
+           /*
+            * there is a collision of hash-function; len is always less
+            * than val->size
+            */
+           len = CALCGTSIZE(ARRKEY, len);
+           res = (GISTTYPE *) repalloc((void *) res, len);
+           res->len = len;
+       }
+       if (val != toastedval)
+           pfree(val);
+
+       /* make signature, if array is too long */
+       if (res->len > TOAST_INDEX_TARGET)
+       {
+           GISTTYPE   *ressign;
+
+           len = CALCGTSIZE(SIGNKEY, 0);
+           ressign = (GISTTYPE *) palloc(len);
+           ressign->len = len;
+           ressign->flag = SIGNKEY;
+           makesign(GETSIGN(ressign), res);
+           pfree(res);
+           res = ressign;
+       }
+
+       retval = (GISTENTRY *) palloc(sizeof(GISTENTRY));
+       gistentryinit(*retval, PointerGetDatum(res),
+                     entry->rel, entry->page,
+                     entry->offset, res->len, FALSE);
+   }
+   else if (ISSIGNKEY(DatumGetPointer(entry->key)) &&
+            !ISALLTRUE(DatumGetPointer(entry->key)))
+   {
+       int4        i,
+                   len;
+       GISTTYPE   *res;
+       BITVECP     sign = GETSIGN(DatumGetPointer(entry->key));
+
+       LOOPBYTE(
+                if ((sign[i] & 0xff) != 0xff)
+                PG_RETURN_POINTER(retval);
+       );
+
+       len = CALCGTSIZE(SIGNKEY | ALLISTRUE, 0);
+       res = (GISTTYPE *) palloc(len);
+       res->len = len;
+       res->flag = SIGNKEY | ALLISTRUE;
+
+       retval = (GISTENTRY *) palloc(sizeof(GISTENTRY));
+       gistentryinit(*retval, PointerGetDatum(res),
+                     entry->rel, entry->page,
+                     entry->offset, res->len, FALSE);
+   }
+   PG_RETURN_POINTER(retval);
+}
+
+Datum
+gtsvector_decompress(PG_FUNCTION_ARGS)
+{
+   GISTENTRY  *entry = (GISTENTRY *) PG_GETARG_POINTER(0);
+   GISTTYPE   *key = (GISTTYPE *) DatumGetPointer(PG_DETOAST_DATUM(entry->key));
+
+   if (key != (GISTTYPE *) DatumGetPointer(entry->key))
+   {
+       GISTENTRY  *retval = (GISTENTRY *) palloc(sizeof(GISTENTRY));
+
+       gistentryinit(*retval, PointerGetDatum(key),
+                     entry->rel, entry->page,
+                     entry->offset, key->len, FALSE);
+
+       PG_RETURN_POINTER(retval);
+   }
+
+   PG_RETURN_POINTER(entry);
+}
+
+typedef struct
+{
+   int4       *arrb;
+   int4       *arre;
+}  CHKVAL;
+
+/*
+ * is there value 'val' in array or not ?
+ */
+static bool
+checkcondition_arr(void *checkval, ITEM * val)
+{
+   int4       *StopLow = ((CHKVAL *) checkval)->arrb;
+   int4       *StopHigh = ((CHKVAL *) checkval)->arre;
+   int4       *StopMiddle;
+
+   /* Loop invariant: StopLow <= val < StopHigh */
+
+   while (StopLow < StopHigh)
+   {
+       StopMiddle = StopLow + (StopHigh - StopLow) / 2;
+       if (*StopMiddle == val->val)
+           return (true);
+       else if (*StopMiddle < val->val)
+           StopLow = StopMiddle + 1;
+       else
+           StopHigh = StopMiddle;
+   }
+
+   return (false);
+}
+
+static bool
+checkcondition_bit(void *checkval, ITEM * val)
+{
+   return GETBIT(checkval, HASHVAL(val->val));
+}
+
+Datum
+gtsvector_consistent(PG_FUNCTION_ARGS)
+{
+   QUERYTYPE  *query = (QUERYTYPE *) PG_GETARG_POINTER(1);
+   GISTTYPE   *key = (GISTTYPE *) DatumGetPointer(
+                               ((GISTENTRY *) PG_GETARG_POINTER(0))->key
+   );
+
+   if (!query->size)
+       PG_RETURN_BOOL(false);
+
+   if (ISSIGNKEY(key))
+   {
+       if (ISALLTRUE(key))
+           PG_RETURN_BOOL(true);
+
+       PG_RETURN_BOOL(TS_execute(
+                              GETQUERY(query),
+                              (void *) GETSIGN(key), false,
+                              checkcondition_bit
+                              ));
+   }
+   else
+   {                           /* only leaf pages */
+       CHKVAL      chkval;
+
+       chkval.arrb = GETARR(key);
+       chkval.arre = chkval.arrb + ARRNELEM(key);
+       PG_RETURN_BOOL(TS_execute(
+                              GETQUERY(query),
+                              (void *) &chkval, true,
+                              checkcondition_arr
+                              ));
+   }
+}
+
+static int4
+unionkey(BITVECP sbase, GISTTYPE * add)
+{
+   int4        i;
+
+   if (ISSIGNKEY(add))
+   {
+       BITVECP     sadd = GETSIGN(add);
+
+       if (ISALLTRUE(add))
+           return 1;
+
+       LOOPBYTE(
+                sbase[i] |= sadd[i];
+       );
+   }
+   else
+   {
+       int4       *ptr = GETARR(add);
+
+       for (i = 0; i < ARRNELEM(add); i++)
+           HASH(sbase, ptr[i]);
+   }
+   return 0;
+}
+
+
+Datum
+gtsvector_union(PG_FUNCTION_ARGS)
+{
+   bytea      *entryvec = (bytea *) PG_GETARG_POINTER(0);
+   int        *size = (int *) PG_GETARG_POINTER(1);
+   BITVEC      base;
+   int4        len = (VARSIZE(entryvec) - VARHDRSZ) / sizeof(GISTENTRY);
+   int4        i;
+   int4        flag = 0;
+   GISTTYPE   *result;
+
+   MemSet((void *) base, 0, sizeof(BITVEC));
+   for (i = 0; i < len; i++)
+   {
+       if (unionkey(base, GETENTRY(entryvec, i)))
+       {
+           flag = ALLISTRUE;
+           break;
+       }
+   }
+
+   flag |= SIGNKEY;
+   len = CALCGTSIZE(flag, 0);
+   result = (GISTTYPE *) palloc(len);
+   *size = result->len = len;
+   result->flag = flag;
+   if (!ISALLTRUE(result))
+       memcpy((void *) GETSIGN(result), (void *) base, sizeof(BITVEC));
+
+   PG_RETURN_POINTER(result);
+}
+
+Datum
+gtsvector_same(PG_FUNCTION_ARGS)
+{
+   GISTTYPE   *a = (GISTTYPE *) PG_GETARG_POINTER(0);
+   GISTTYPE   *b = (GISTTYPE *) PG_GETARG_POINTER(1);
+   bool       *result = (bool *) PG_GETARG_POINTER(2);
+
+   if (ISSIGNKEY(a))
+   {                           /* then b also ISSIGNKEY */
+       if (ISALLTRUE(a) && ISALLTRUE(b))
+           *result = true;
+       else if (ISALLTRUE(a))
+           *result = false;
+       else if (ISALLTRUE(b))
+           *result = false;
+       else
+       {
+           int4        i;
+           BITVECP     sa = GETSIGN(a),
+                       sb = GETSIGN(b);
+
+           *result = true;
+           LOOPBYTE(
+                    if (sa[i] != sb[i])
+                    {
+               *result = false;
+               break;
+           }
+           );
+       }
+   }
+   else
+   {                           /* a and b ISARRKEY */
+       int4        lena = ARRNELEM(a),
+                   lenb = ARRNELEM(b);
+
+       if (lena != lenb)
+           *result = false;
+       else
+       {
+           int4       *ptra = GETARR(a),
+                      *ptrb = GETARR(b);
+           int4        i;
+
+           *result = true;
+           for (i = 0; i < lena; i++)
+               if (ptra[i] != ptrb[i])
+               {
+                   *result = false;
+                   break;
+               }
+       }
+   }
+
+   PG_RETURN_POINTER(result);
+}
+
+static int4
+sizebitvec(BITVECP sign)
+{
+   int4        size = 0,
+               i;
+
+   LOOPBYTE(
+       size += SUMBIT(*(char *) sign);
+       sign = (BITVECP) (((char *) sign) + 1);
+   );
+   return size;
+}
+
+static int
+hemdistsign(BITVECP  a, BITVECP b) {
+   int i,dist=0;
+
+   LOOPBIT(
+       if ( GETBIT(a,i) != GETBIT(b,i) )
+           dist++;
+   );
+   return dist;
+}
+
+static int
+hemdist(GISTTYPE   *a, GISTTYPE   *b) {
+   if ( ISALLTRUE(a) ) {
+       if (ISALLTRUE(b))
+           return 0;
+       else
+           return SIGLENBIT-sizebitvec(GETSIGN(b));
+   } else if (ISALLTRUE(b))
+       return SIGLENBIT-sizebitvec(GETSIGN(a));
+
+   return hemdistsign( GETSIGN(a), GETSIGN(b) );
+}
+
+Datum
+gtsvector_penalty(PG_FUNCTION_ARGS)
+{
+   GISTENTRY  *origentry = (GISTENTRY *) PG_GETARG_POINTER(0); /* always ISSIGNKEY */
+   GISTENTRY  *newentry = (GISTENTRY *) PG_GETARG_POINTER(1);
+   float      *penalty = (float *) PG_GETARG_POINTER(2);
+   GISTTYPE   *origval = (GISTTYPE *) DatumGetPointer(origentry->key);
+   GISTTYPE   *newval = (GISTTYPE *) DatumGetPointer(newentry->key);
+   BITVECP     orig = GETSIGN(origval);
+
+   *penalty = 0.0;
+
+   if (ISARRKEY(newval)) {
+       BITVEC sign;
+       makesign(sign, newval);
+
+       if ( ISALLTRUE(origval) ) 
+           *penalty=((float)(SIGLENBIT-sizebitvec(sign)))/(float)(SIGLENBIT+1);
+       else 
+           *penalty=hemdistsign(sign,orig);
+   } else {
+       *penalty=hemdist(origval,newval);
+   }
+   PG_RETURN_POINTER(penalty);
+}
+
+typedef struct
+{
+   bool        allistrue;
+   BITVEC      sign;
+}  CACHESIGN;
+
+static void
+fillcache(CACHESIGN * item, GISTTYPE * key)
+{
+   item->allistrue = false;
+   if (ISARRKEY(key))
+       makesign(item->sign, key);
+   else if (ISALLTRUE(key))
+       item->allistrue = true;
+   else
+       memcpy((void *) item->sign, (void *) GETSIGN(key), sizeof(BITVEC));
+}
+
+#define WISH_F(a,b,c) (double)( -(double)(((a)-(b))*((a)-(b))*((a)-(b)))*(c) )
+typedef struct
+{
+   OffsetNumber pos;
+   int4        cost;
+} SPLITCOST;
+
+static int
+comparecost(const void *a, const void *b)
+{
+   if (((SPLITCOST *) a)->cost == ((SPLITCOST *) b)->cost)
+       return 0;
+   else
+       return (((SPLITCOST *) a)->cost > ((SPLITCOST *) b)->cost) ? 1 : -1;
+}
+
+
+static int
+hemdistcache(CACHESIGN   *a, CACHESIGN   *b) {
+   if ( a->allistrue ) {
+       if (b->allistrue)
+           return 0;
+       else
+           return SIGLENBIT-sizebitvec(b->sign);
+   } else if (b->allistrue)
+       return SIGLENBIT-sizebitvec(a->sign);
+
+   return hemdistsign( a->sign, b->sign );
+}
+
+Datum
+gtsvector_picksplit(PG_FUNCTION_ARGS)
+{
+   bytea      *entryvec = (bytea *) PG_GETARG_POINTER(0);
+   GIST_SPLITVEC *v = (GIST_SPLITVEC *) PG_GETARG_POINTER(1);
+   OffsetNumber k,
+               j;
+   GISTTYPE   *datum_l,
+              *datum_r;
+   BITVECP     union_l,
+               union_r;
+   int4        size_alpha,
+               size_beta;
+   int4        size_waste,
+               waste = -1;
+   int4        nbytes;
+   OffsetNumber seed_1 = 0,
+               seed_2 = 0;
+   OffsetNumber *left,
+              *right;
+   OffsetNumber maxoff;
+   BITVECP     ptr;
+   int         i;
+   CACHESIGN  *cache;
+   SPLITCOST  *costvector;
+
+   maxoff = ((VARSIZE(entryvec) - VARHDRSZ) / sizeof(GISTENTRY)) - 2;
+   nbytes = (maxoff + 2) * sizeof(OffsetNumber);
+   v->spl_left = (OffsetNumber *) palloc(nbytes);
+   v->spl_right = (OffsetNumber *) palloc(nbytes);
+
+   cache = (CACHESIGN *) palloc(sizeof(CACHESIGN) * (maxoff + 2));
+   fillcache(&cache[FirstOffsetNumber], GETENTRY(entryvec, FirstOffsetNumber));
+
+   for (k = FirstOffsetNumber; k < maxoff; k = OffsetNumberNext(k)) {
+       for (j = OffsetNumberNext(k); j <= maxoff; j = OffsetNumberNext(j)) {
+           if (k == FirstOffsetNumber)
+               fillcache(&cache[j], GETENTRY(entryvec, j));
+
+           size_waste=hemdistcache(&(cache[j]),&(cache[k]));
+           if (size_waste > waste) {
+               waste = size_waste;
+               seed_1 = k;
+               seed_2 = j;
+           }
+       }
+   }
+
+   left = v->spl_left;
+   v->spl_nleft = 0;
+   right = v->spl_right;
+   v->spl_nright = 0;
+
+   if (seed_1 == 0 || seed_2 == 0) {
+       seed_1 = 1;
+       seed_2 = 2;
+   }
+
+   /* form initial .. */
+   if (cache[seed_1].allistrue) {
+       datum_l = (GISTTYPE *) palloc(CALCGTSIZE(SIGNKEY | ALLISTRUE, 0));
+       datum_l->len = CALCGTSIZE(SIGNKEY | ALLISTRUE, 0);
+       datum_l->flag = SIGNKEY | ALLISTRUE;
+   } else {
+       datum_l = (GISTTYPE *) palloc(CALCGTSIZE(SIGNKEY, 0));
+       datum_l->len = CALCGTSIZE(SIGNKEY, 0);
+       datum_l->flag = SIGNKEY;
+       memcpy((void *) GETSIGN(datum_l), (void *) cache[seed_1].sign, sizeof(BITVEC));
+   }
+   if (cache[seed_2].allistrue) {
+       datum_r = (GISTTYPE *) palloc(CALCGTSIZE(SIGNKEY | ALLISTRUE, 0));
+       datum_r->len = CALCGTSIZE(SIGNKEY | ALLISTRUE, 0);
+       datum_r->flag = SIGNKEY | ALLISTRUE;
+   } else {
+       datum_r = (GISTTYPE *) palloc(CALCGTSIZE(SIGNKEY, 0));
+       datum_r->len = CALCGTSIZE(SIGNKEY, 0);
+       datum_r->flag = SIGNKEY;
+       memcpy((void *) GETSIGN(datum_r), (void *) cache[seed_2].sign, sizeof(BITVEC));
+   }
+
+   union_l=GETSIGN(datum_l);
+   union_r=GETSIGN(datum_r);
+   maxoff = OffsetNumberNext(maxoff);
+   fillcache(&cache[maxoff], GETENTRY(entryvec, maxoff));
+   /* sort before ... */
+   costvector = (SPLITCOST *) palloc(sizeof(SPLITCOST) * maxoff);
+   for (j = FirstOffsetNumber; j <= maxoff; j = OffsetNumberNext(j)) {
+       costvector[j - 1].pos = j;
+       size_alpha = hemdistcache(&(cache[seed_1]), &(cache[j]));
+       size_beta  = hemdistcache(&(cache[seed_2]), &(cache[j]));
+       costvector[j - 1].cost = abs(size_alpha - size_beta);
+   }
+   qsort((void *) costvector, maxoff, sizeof(SPLITCOST), comparecost);
+
+   for (k = 0; k < maxoff; k++) {
+       j = costvector[k].pos;
+       if (j == seed_1) {
+           *left++ = j;
+           v->spl_nleft++;
+           continue;
+       } else if (j == seed_2) {
+           *right++ = j;
+           v->spl_nright++;
+           continue;
+       }
+
+       if (ISALLTRUE(datum_l) || cache[j].allistrue) {
+           if ( ISALLTRUE(datum_l) && cache[j].allistrue )
+               size_alpha=0;
+           else
+               size_alpha = SIGLENBIT-sizebitvec(  
+                   ( cache[j].allistrue ) ? GETSIGN(datum_l) : GETSIGN(cache[j].sign)  
+               );
+       } else {
+           size_alpha=hemdistsign(cache[j].sign,GETSIGN(datum_l));
+       }
+
+       if (ISALLTRUE(datum_r) || cache[j].allistrue) {
+           if ( ISALLTRUE(datum_r) && cache[j].allistrue )
+               size_beta=0;
+           else
+               size_beta = SIGLENBIT-sizebitvec(  
+                   ( cache[j].allistrue ) ? GETSIGN(datum_r) : GETSIGN(cache[j].sign)  
+               );
+       } else {
+           size_beta=hemdistsign(cache[j].sign,GETSIGN(datum_r));
+       }
+
+       if (size_alpha  < size_beta + WISH_F(v->spl_nleft, v->spl_nright, 0.1)) {
+           if (ISALLTRUE(datum_l) || cache[j].allistrue) {
+               if (! ISALLTRUE(datum_l) )
+                   MemSet((void *) GETSIGN(datum_l), 0xff, sizeof(BITVEC));
+           } else {
+               ptr=cache[j].sign;
+               LOOPBYTE(
+                   union_l[i] |= ptr[i];
+               );
+           }
+           *left++ = j;
+           v->spl_nleft++;
+       } else {
+           if (ISALLTRUE(datum_r) || cache[j].allistrue) {
+               if (! ISALLTRUE(datum_r) )
+                   MemSet((void *) GETSIGN(datum_r), 0xff, sizeof(BITVEC));
+           } else {
+               ptr=cache[j].sign;
+               LOOPBYTE(
+                   union_r[i] |= ptr[i];
+               );
+           }
+           *right++ = j;
+           v->spl_nright++;
+       }
+   }
+
+   *right = *left = FirstOffsetNumber;
+   pfree(costvector);
+   pfree(cache);
+   v->spl_ldatum = PointerGetDatum(datum_l);
+   v->spl_rdatum = PointerGetDatum(datum_r);
+
+   PG_RETURN_POINTER(v);
+}
diff --git a/contrib/tsearch2/gistidx.h b/contrib/tsearch2/gistidx.h

new file mode 100644 (file)

index 0000000..d081c74
--- /dev/null
+++ b/contrib/tsearch2/gistidx.h
@@ -0,0 +1,67 @@
+#ifndef __GISTIDX_H__
+#define __GISTIDX_H__
+
+/*
+#define GISTIDX_DEBUG
+*/
+
+/*
+ * signature defines
+ */
+
+#define BITBYTE 8
+#define SIGLENINT  63          /* >121 => key will toast, so it will not
+                                * work !!! */
+#define SIGLEN ( sizeof(int4)*SIGLENINT )
+#define SIGLENBIT (SIGLEN*BITBYTE)
+
+typedef char BITVEC[SIGLEN];
+typedef char *BITVECP;
+
+#define LOOPBYTE(a) \
+       for(i=0;i
+               a;\
+       }
+#define LOOPBIT(a) \
+               for(i=0;i
+                               a;\
+               }
+
+#define GETBYTE(x,i) ( *( (BITVECP)(x) + (int)( (i) / BITBYTE ) ) )
+#define GETBITBYTE(x,i) ( ((char)(x)) >> i & 0x01 )
+#define CLRBIT(x,i)   GETBYTE(x,i) &= ~( 0x01 << ( (i) % BITBYTE ) )
+#define SETBIT(x,i)   GETBYTE(x,i) |=  ( 0x01 << ( (i) % BITBYTE ) )
+#define GETBIT(x,i) ( (GETBYTE(x,i) >> ( (i) % BITBYTE )) & 0x01 )
+
+#define abs(a)         ((a) <  (0) ? -(a) : (a))
+#define min(a,b)           ((a) <  (b) ? (a) : (b))
+#define HASHVAL(val) (((unsigned int)(val)) % SIGLENBIT)
+#define HASH(sign, val) SETBIT((sign), HASHVAL(val))
+
+
+/*
+ * type of index key
+ */
+typedef struct
+{
+   int4        len;
+   int4        flag;
+   char        data[1];
+}  GISTTYPE;
+
+#define ARRKEY     0x01
+#define SIGNKEY        0x02
+#define ALLISTRUE  0x04
+
+#define ISARRKEY(x) ( ((GISTTYPE*)x)->flag & ARRKEY )
+#define ISSIGNKEY(x)   ( ((GISTTYPE*)x)->flag & SIGNKEY )
+#define ISALLTRUE(x)   ( ((GISTTYPE*)x)->flag & ALLISTRUE )
+
+#define GTHDRSIZE  ( sizeof(int4)*2  )
+#define CALCGTSIZE(flag, len) ( GTHDRSIZE + ( ( (flag) & ARRKEY ) ? ((len)*sizeof(int4)) : (((flag) & ALLISTRUE) ? 0 : SIGLEN) ) )
+
+#define GETSIGN(x) ( (BITVECP)( (char*)x+GTHDRSIZE ) )
+#define GETARR(x)  ( (int4*)( (char*)x+GTHDRSIZE ) )
+#define ARRNELEM(x) ( ( ((GISTTYPE*)x)->len - GTHDRSIZE )/sizeof(int4) )
+
+#endif


diff --git a/contrib/tsearch2/ispell/spell.c b/contrib/tsearch2/ispell/spell.c

new file mode 100644 (file)

index 0000000..3cf2cc8


--- /dev/null
+++ b/contrib/tsearch2/ispell/spell.c
@@ -0,0 +1,520 @@
+#include 
+#include 
+#include 
+#include 
+
+#include "postgres.h"
+
+#include "spell.h"
+
+#define MAXNORMLEN 56
+
+#define STRNCASECMP(x,y)        (strncasecmp(x,y,strlen(y)))
+
+static int cmpspell(const void *s1,const void *s2){
+   return(strcmp(((const SPELL*)s1)->word,((const SPELL*)s2)->word));
+}
+
+static void 
+strlower( char * str ) {
+   unsigned char *ptr = (unsigned char *)str;
+   while ( *ptr ) {
+       *ptr = tolower( *ptr );
+       ptr++;
+   }
+}
+
+/* backward string compaire for suffix tree operations */
+static int 
+strbcmp(const char *s1, const char *s2) { 
+   int l1 = strlen(s1)-1, l2 = strlen(s2)-1;
+   while (l1 >= 0 && l2 >= 0) {
+       if (s1[l1] < s2[l2]) return -1;
+       if (s1[l1] > s2[l2]) return 1;
+       l1--; l2--;
+   }
+   if (l1 < l2) return -1;
+   if (l1 > l2) return 1;
+
+   return 0;
+}
+static int 
+strbncmp(const char *s1, const char *s2, size_t count) { 
+   int l1 = strlen(s1) - 1, l2 = strlen(s2) - 1, l = count;
+   while (l1 >= 0 && l2 >= 0 && l > 0) {
+       if (s1[l1] < s2[l2]) return -1;
+       if (s1[l1] > s2[l2]) return 1;
+       l1--;
+       l2--;
+       l--;
+   }
+   if (l == 0) return 0;
+   if (l1 < l2) return -1;
+   if (l1 > l2) return 1;
+   return 0;
+}
+
+static int 
+cmpaffix(const void *s1,const void *s2){
+   if (((const AFFIX*)s1)->type < ((const AFFIX*)s2)->type) return -1;
+   if (((const AFFIX*)s1)->type > ((const AFFIX*)s2)->type) return 1;
+   if (((const AFFIX*)s1)->type == 'p')
+       return(strcmp(((const AFFIX*)s1)->repl,((const AFFIX*)s2)->repl));
+   else 
+       return(strbcmp(((const AFFIX*)s1)->repl,((const AFFIX*)s2)->repl));
+}
+
+int 
+AddSpell(IspellDict * Conf,const char * word,const char *flag){
+   if(Conf->nspell>=Conf->mspell){
+       if(Conf->mspell){
+           Conf->mspell+=1024*20;
+           Conf->Spell=(SPELL *)realloc(Conf->Spell,Conf->mspell*sizeof(SPELL));
+       }else{
+           Conf->mspell=1024*20;
+           Conf->Spell=(SPELL *)malloc(Conf->mspell*sizeof(SPELL));
+       }
+       if ( Conf->Spell == NULL )
+           elog(ERROR,"No memory for AddSpell"); 
+   }
+   Conf->Spell[Conf->nspell].word=strdup(word);
+   if ( !Conf->Spell[Conf->nspell].word ) 
+       elog(ERROR,"No memory for AddSpell");
+   strncpy(Conf->Spell[Conf->nspell].flag,flag,10);
+   Conf->nspell++;
+   return(0);
+}
+
+
+int 
+ImportDictionary(IspellDict * Conf,const char *filename){
+   unsigned char str[BUFSIZ];  
+   FILE *dict;
+
+   if(!(dict=fopen(filename,"r")))return(1);
+   while(fgets(str,sizeof(str),dict)){
+       unsigned char *s;
+       const unsigned char *flag;
+
+           flag = NULL;
+       if((s=strchr(str,'/'))){
+           *s=0;
+           s++;flag=s;
+           while(*s){
+               if (((*s>='A')&&(*s<='Z'))||((*s>='a')&&(*s<='z')))
+                   s++;
+               else {
+                   *s=0;
+                   break;
+               }
+           }
+       }else{
+           flag="";
+       }
+       strlower(str);
+       /* Dont load words if first letter is not required */
+       /* It allows to optimize loading at  search time   */
+       s=str;
+       while(*s){
+           if(*s=='\r')*s=0;
+           if(*s=='\n')*s=0;
+           s++;
+       }
+       AddSpell(Conf,str,flag);
+   }
+   fclose(dict);
+   return(0);
+}
+
+
+static SPELL * 
+FindWord(IspellDict * Conf, const char *word, int affixflag) {
+   int l,c,r,resc,resl,resr, i;
+
+   i = (int)(*word) & 255;
+   l = Conf->SpellTree.Left[i];
+   r = Conf->SpellTree.Right[i];
+   if (l == -1) return (NULL);
+   while(l<=r){
+       c = (l + r) >> 1;
+       resc = strcmp(Conf->Spell[c].word, word);
+       if( (resc == 0) && 
+           ((affixflag == 0) || (strchr(Conf->Spell[c].flag, affixflag) != NULL)) ) {
+           return(&Conf->Spell[c]);
+       }
+       resl = strcmp(Conf->Spell[l].word, word);
+       if( (resl == 0) && 
+           ((affixflag == 0) || (strchr(Conf->Spell[l].flag, affixflag) != NULL)) ) {
+           return(&Conf->Spell[l]);
+       }
+       resr = strcmp(Conf->Spell[r].word, word);
+       if( (resr == 0) && 
+           ((affixflag == 0) || (strchr(Conf->Spell[r].flag, affixflag) != NULL)) ) {
+           return(&Conf->Spell[r]);
+       }
+       if(resc < 0){
+           l = c + 1;
+           r--;
+       } else if(resc > 0){
+           r = c - 1;
+           l++;
+       } else {
+           l++;
+           r--;
+       }
+   }
+   return(NULL);
+}
+
+int 
+AddAffix(IspellDict * Conf,int flag,const char *mask,const char *find,const char *repl,int type) {
+   if(Conf->naffixes>=Conf->maffixes){
+       if(Conf->maffixes){
+           Conf->maffixes+=16;
+           Conf->Affix = (AFFIX*)realloc((void*)Conf->Affix,Conf->maffixes*sizeof(AFFIX));
+       }else{
+           Conf->maffixes=16;
+           Conf->Affix = (AFFIX*)malloc(Conf->maffixes * sizeof(AFFIX));
+       }
+       if ( Conf->Affix == NULL ) 
+           elog(ERROR,"No memory for AddAffix");
+   }
+   if (type=='s') {
+       sprintf(Conf->Affix[Conf->naffixes].mask,"%s$",mask);
+   } else {
+       sprintf(Conf->Affix[Conf->naffixes].mask,"^%s",mask);
+   }
+   Conf->Affix[Conf->naffixes].compile = 1;
+   Conf->Affix[Conf->naffixes].flag=flag;
+   Conf->Affix[Conf->naffixes].type=type;
+   
+   strcpy(Conf->Affix[Conf->naffixes].find,find);
+   strcpy(Conf->Affix[Conf->naffixes].repl,repl);
+   Conf->Affix[Conf->naffixes].replen=strlen(repl);
+   Conf->naffixes++;
+   return(0);
+}
+
+static char * 
+remove_spaces(char *dist,char *src){
+char *d,*s;
+   d=dist;
+   s=src;
+   while(*s){
+       if(*s!=' '&&*s!='-'&&*s!='\t'){
+           *d=*s;
+           d++;
+       }
+       s++;
+   }
+   *d=0;
+   return(dist);
+}
+
+
+int 
+ImportAffixes(IspellDict * Conf,const char *filename){
+   unsigned char str[BUFSIZ];
+   unsigned char flag=0;
+   unsigned char mask[BUFSIZ]="";
+   unsigned char find[BUFSIZ]="";
+   unsigned char repl[BUFSIZ]="";
+   unsigned char *s;
+   int i;
+   int suffixes=0;
+   int prefixes=0;
+   FILE *affix;
+
+   if(!(affix=fopen(filename,"r")))
+       return(1);
+
+   while(fgets(str,sizeof(str),affix)){
+       if(!STRNCASECMP(str,"suffixes")){
+           suffixes=1;
+           prefixes=0;
+           continue;
+       }
+       if(!STRNCASECMP(str,"prefixes")){
+           suffixes=0;
+           prefixes=1;
+           continue;
+       }
+       if(!STRNCASECMP(str,"flag ")){
+           s=str+5;
+           while(strchr("* ",*s))
+               s++;
+           flag=*s;
+           continue;
+       }
+       if((!suffixes)&&(!prefixes))continue;
+       if((s=strchr(str,'#')))*s=0;
+       if(!*str)continue;
+       strlower(str);
+       strcpy(mask,"");
+       strcpy(find,"");
+       strcpy(repl,"");
+       i=sscanf(str,"%[^>\n]>%[^,\n],%[^\n]",mask,find,repl);
+       remove_spaces(str,repl);strcpy(repl,str);
+       remove_spaces(str,find);strcpy(find,str);
+       remove_spaces(str,mask);strcpy(mask,str);
+       switch(i){
+           case 3:
+               break;
+           case 2:
+               if(*find != '\0'){
+                   strcpy(repl,find);
+                   strcpy(find,"");
+               }
+               break;
+           default:
+               continue;
+       }
+       
+       AddAffix(Conf,(int)flag,mask,find,repl,suffixes?'s':'p');
+       
+   }
+   fclose(affix);
+       
+   return(0);
+}
+
+void 
+SortDictionary(IspellDict * Conf){
+  int  CurLet = -1, Let;size_t i;
+
+        qsort((void*)Conf->Spell,Conf->nspell,sizeof(SPELL),cmpspell);
+
+   for(i = 0; i < 256 ; i++ )
+       Conf->SpellTree.Left[i] = -1;
+
+   for(i = 0; i < Conf->nspell; i++) {
+     Let = (int)(*(Conf->Spell[i].word)) & 255;
+     if (CurLet != Let) {
+       Conf->SpellTree.Left[Let] = i;
+       CurLet = Let;
+     }
+     Conf->SpellTree.Right[Let] = i;
+   }
+}
+
+void 
+SortAffixes(IspellDict * Conf) {
+  int   CurLetP = -1, CurLetS = -1, Let;
+  AFFIX *Affix; size_t i;
+  
+  if (Conf->naffixes > 1)
+    qsort((void*)Conf->Affix,Conf->naffixes,sizeof(AFFIX),cmpaffix);
+  for(i = 0; i < 256; i++) {
+      Conf->PrefixTree.Left[i] = Conf->PrefixTree.Right[i] = -1;
+      Conf->SuffixTree.Left[i] = Conf->SuffixTree.Right[i] = -1;
+  }
+
+  for(i = 0; i < Conf->naffixes; i++) {
+    Affix = &(((AFFIX*)Conf->Affix)[i]);
+    if(Affix->type == 'p') {
+      Let = (int)(*(Affix->repl)) & 255;
+      if (CurLetP != Let) {
+   Conf->PrefixTree.Left[Let] = i;
+   CurLetP = Let;
+      }
+      Conf->PrefixTree.Right[Let] = i;
+    } else {
+      Let = (Affix->replen) ? (int)(Affix->repl[Affix->replen-1]) & 255 : 0;
+      if (CurLetS != Let) {
+   Conf->SuffixTree.Left[Let] = i;
+   CurLetS = Let;
+      }
+      Conf->SuffixTree.Right[Let] = i;
+    }
+  }
+}
+
+static char * 
+CheckSuffix(const char *word, size_t len, AFFIX *Affix, int *res, IspellDict *Conf) {
+  regmatch_t subs[2]; /* workaround for apache&linux */
+  char newword[2*MAXNORMLEN] = "";
+  int err;
+  
+  *res = strbncmp(word, Affix->repl, Affix->replen);
+  if (*res < 0) {
+    return NULL;
+  }
+  if (*res > 0) {
+    return NULL;
+  }
+  strcpy(newword, word);
+  strcpy(newword+len-Affix->replen, Affix->find);
+
+  if (Affix->compile) {
+    err = regcomp(&(Affix->reg),Affix->mask,REG_EXTENDED|REG_ICASE|REG_NOSUB);
+    if(err){
+      /*regerror(err, &(Affix->reg), regerrstr, ERRSTRSIZE);*/
+      regfree(&(Affix->reg));
+      return(NULL);
+    }
+    Affix->compile = 0;
+  }
+  if(!(err=regexec(&(Affix->reg),newword,1,subs,0))){
+    if(FindWord(Conf, newword, Affix->flag))
+   return pstrdup(newword);    
+  }
+  return NULL;
+}
+
+#define NS 1
+#define MAX_NORM 512
+static int 
+CheckPrefix(const char *word, size_t len, AFFIX *Affix, IspellDict *Conf, int pi,
+       char **forms, char ***cur ) {
+  regmatch_t subs[NS*2];
+  char newword[2*MAXNORMLEN] = "";
+  int err, ls, res, lres;
+  size_t newlen;
+  AFFIX *CAffix = Conf->Affix;
+  
+  res = strncmp(word, Affix->repl, Affix->replen);
+  if (res != 0) {
+    return res;
+  }
+  strcpy(newword, Affix->find);
+  strcat(newword, word+Affix->replen);
+
+  if (Affix->compile) {
+    err = regcomp(&(Affix->reg),Affix->mask,REG_EXTENDED|REG_ICASE|REG_NOSUB);
+    if(err){
+      /*regerror(err, &(Affix->reg), regerrstr, ERRSTRSIZE);*/
+      regfree(&(Affix->reg));
+      return (0);
+    }
+    Affix->compile = 0;
+  }
+  if(!(err=regexec(&(Affix->reg),newword,1,subs,0))){
+    SPELL * curspell;
+
+    if((curspell=FindWord(Conf, newword, Affix->flag))){
+      if ((*cur - forms) < (MAX_NORM-1)) {
+   **cur =  pstrdup(newword);
+   (*cur)++; **cur = NULL;
+      }
+    } 
+    newlen = strlen(newword);
+    ls = Conf->SuffixTree.Left[pi];
+      if ( ls>=0 && ((*cur - forms) < (MAX_NORM-1)) ) {
+   **cur = CheckSuffix(newword, newlen, &CAffix[ls], &lres, Conf);
+   if (**cur) {
+     (*cur)++; **cur = NULL;
+   }
+      }
+  }
+  return 0;
+}
+
+
+char ** 
+NormalizeWord(IspellDict * Conf,char *word){
+/*regmatch_t subs[NS];*/
+size_t len;
+char ** forms;
+char **cur;
+AFFIX * Affix;
+int ri, pi, ipi, lp, rp, cp, ls, rs;
+int lres, rres, cres = 0;
+  SPELL *spell;
+
+   len=strlen(word);
+   if (len > MAXNORMLEN)
+       return(NULL);
+
+   strlower(word);
+
+   forms=(char **) palloc(MAX_NORM*sizeof(char **));
+   cur=forms;*cur=NULL;
+
+   ri = (int)(*word) & 255;
+   pi = (int)(word[strlen(word)-1]) & 255;
+   Affix=(AFFIX*)Conf->Affix;
+
+   /* Check that the word itself is normal form */
+   if((spell = FindWord(Conf, word, 0))){
+       *cur=pstrdup(word);
+       cur++;*cur=NULL;
+   }
+
+   /* Find all other NORMAL forms of the 'word' */
+
+   for (ipi = 0; ipi <= pi; ipi += pi) {
+
+       /* check prefix */
+       lp = Conf->PrefixTree.Left[ri];
+       rp = Conf->PrefixTree.Right[ri];
+       while (lp >= 0 && lp <= rp) {
+         cp = (lp + rp) >> 1;
+         cres = 0;
+         if ((cur - forms) < (MAX_NORM-1)) {
+       cres = CheckPrefix(word, len, &Affix[cp], Conf, ipi, forms, &cur);
+         }
+         if ((lp < cp) && ((cur - forms) < (MAX_NORM-1)) ) {
+       lres = CheckPrefix(word, len, &Affix[lp], Conf, ipi, forms, &cur);
+         }
+         if ( (rp > cp) && ((cur - forms) < (MAX_NORM-1)) ) {
+       rres = CheckPrefix(word, len, &Affix[rp], Conf, ipi, forms, &cur);
+         }
+         if (cres < 0) {
+       rp = cp - 1;
+       lp++;
+         } else if (cres > 0) {
+       lp = cp + 1;
+       rp--;
+         } else {
+       lp++;
+       rp--;
+         }
+       }
+
+       /* check suffix */
+       ls = Conf->SuffixTree.Left[ipi];
+       rs = Conf->SuffixTree.Right[ipi];
+       while (ls >= 0 && ls <= rs) {
+         if (  ((cur - forms) < (MAX_NORM-1)) ) {
+       *cur = CheckSuffix(word, len, &Affix[ls], &lres, Conf);
+       if (*cur) {
+         cur++; *cur = NULL;
+       }
+         }
+         if ( (rs > ls) && ((cur - forms) < (MAX_NORM-1)) ) {
+       *cur = CheckSuffix(word, len, &Affix[rs], &rres, Conf);
+       if (*cur) {
+         cur++; *cur = NULL;
+       }
+         }
+         ls++;
+         rs--;
+       } /* end while */
+     
+   } /* for ipi */
+
+   if(cur==forms){
+       pfree(forms);
+       return(NULL);
+   }
+   return(forms);
+}
+
+void 
+FreeIspell (IspellDict *Conf) {
+  int i;
+  AFFIX *Affix = (AFFIX *)Conf->Affix;
+
+  for (i = 0; i < Conf->naffixes; i++) {
+    if (Affix[i].compile == 0) {
+      regfree(&(Affix[i].reg));
+    }
+  }
+  for (i = 0; i < Conf->naffixes; i++) {
+   free( Conf->Spell[i].word );
+  }
+  free(Conf->Affix);
+  free(Conf->Spell);
+  memset( (void*)Conf, 0, sizeof(IspellDict) );
+  return;
+}


diff --git a/contrib/tsearch2/ispell/spell.h b/contrib/tsearch2/ispell/spell.h

new file mode 100644 (file)

index 0000000..3034ca6


--- /dev/null
+++ b/contrib/tsearch2/ispell/spell.h
@@ -0,0 +1,51 @@
+#ifndef __SPELL_H__
+#define __SPELL_H__
+
+#include 
+#include 
+
+typedef struct spell_struct {
+        char * word; 
+        char flag[10];
+} SPELL;
+
+typedef struct aff_struct {   
+        char flag;
+        char type;
+        char mask[33];
+        char find[16];
+        char repl[16];
+        regex_t reg;
+        size_t replen;
+        char compile;
+} AFFIX;
+
+typedef struct Tree_struct {
+        int Left[256], Right[256];
+} Tree_struct;
+
+typedef struct {
+   int maffixes;
+   int naffixes;
+   AFFIX * Affix;
+
+   int nspell;
+   int mspell;
+   SPELL   *Spell;
+   Tree_struct SpellTree;
+   Tree_struct PrefixTree;
+   Tree_struct SuffixTree;
+
+} IspellDict;
+
+char ** NormalizeWord(IspellDict * Conf,char *word);
+int ImportAffixes(IspellDict * Conf, const char *filename);
+int ImportDictionary(IspellDict * Conf,const char *filename);
+
+int  AddSpell(IspellDict * Conf,const char * word,const char *flag);
+int  AddAffix(IspellDict * Conf,int flag,const char *mask,const char *find,const char *repl,int type);
+void SortDictionary(IspellDict * Conf);
+void SortAffixes(IspellDict * Conf);
+void FreeIspell (IspellDict *Conf);
+
+#endif


diff --git a/contrib/tsearch2/prs_dcfg.c b/contrib/tsearch2/prs_dcfg.c

new file mode 100644 (file)

index 0000000..e4b0e8b


--- /dev/null
+++ b/contrib/tsearch2/prs_dcfg.c
@@ -0,0 +1,119 @@
+/* 
+ * Simple config parser 
+ * Teodor Sigaev 
+ */
+#include 
+#include 
+#include 
+
+#include "postgres.h"
+
+#include "dict.h"
+#include "common.h"
+
+#define CS_WAITKEY 0
+#define CS_INKEY   1
+#define CS_WAITEQ  2
+#define CS_WAITVALUE   3
+#define CS_INVALUE 4
+#define CS_IN2VALUE    5
+#define CS_WAITDELIM   6
+#define CS_INESC   7
+#define CS_IN2ESC  8
+
+static char *
+nstrdup(char *ptr, int len) {
+   char *res=palloc(len+1), *cptr;
+   memcpy(res,ptr,len);
+   res[len]='\0';
+   cptr = ptr = res;
+   while(*ptr) {
+       if ( *ptr == '\\' ) 
+           ptr++;
+       *cptr=*ptr; ptr++; cptr++;
+   }
+   *cptr='\0';
+
+   return res;
+}
+
+void
+parse_cfgdict(text *in, Map **m) {
+   Map *mptr;
+   char *ptr=VARDATA(in), *begin=NULL;
+   char num=0;
+   int state=CS_WAITKEY;
+
+   while( ptr-VARDATA(in) < VARSIZE(in) - VARHDRSZ ) {
+       if ( *ptr==',' ) num++;
+       ptr++;
+   }
+
+   *m=mptr=(Map*)palloc( sizeof(Map)*(num+2) );
+   memset(mptr, 0, sizeof(Map)*(num+2) );
+   ptr=VARDATA(in);
+   while( ptr-VARDATA(in) < VARSIZE(in) - VARHDRSZ ) {
+       if (state==CS_WAITKEY) {
+           if (isalpha(*ptr)) {
+               begin=ptr;
+               state=CS_INKEY;
+           } else if ( !isspace(*ptr) )
+               elog(ERROR,"Syntax error in position %d near '%c'", ptr-VARDATA(in), *ptr);
+       } else if (state==CS_INKEY) {
+           if ( isspace(*ptr) ) {
+               mptr->key=nstrdup(begin, ptr-begin);
+               state=CS_WAITEQ;
+           } else if ( *ptr=='=' ) {
+               mptr->key=nstrdup(begin, ptr-begin);
+               state=CS_WAITVALUE;
+           } else if ( !isalpha(*ptr) ) 
+               elog(ERROR,"Syntax error in position %d near '%c'", ptr-VARDATA(in), *ptr);
+       } else if ( state==CS_WAITEQ ) {
+           if ( *ptr=='=' )
+               state=CS_WAITVALUE;
+           else if ( !isspace(*ptr) )
+               elog(ERROR,"Syntax error in position %d near '%c'", ptr-VARDATA(in), *ptr);
+       } else if ( state==CS_WAITVALUE ) {
+           if ( *ptr=='"' ) {
+               begin=ptr+1;
+               state=CS_INVALUE;
+           } else if ( !isspace(*ptr) ) {
+               begin=ptr;
+               state=CS_IN2VALUE;
+           }
+       } else if ( state==CS_INVALUE ) {
+           if ( *ptr=='"' ) {
+               mptr->value = nstrdup(begin, ptr-begin);
+               mptr++;
+               state=CS_WAITDELIM;
+           } else if ( *ptr=='\\' )
+               state=CS_INESC;
+       } else if ( state==CS_IN2VALUE ) {
+           if ( isspace(*ptr) || *ptr==',' ) {
+               mptr->value = nstrdup(begin, ptr-begin);
+               mptr++;
+               state=( *ptr==',' ) ? CS_WAITKEY : CS_WAITDELIM;
+           } else if ( *ptr=='\\' )
+               state=CS_INESC;
+       } else if ( state==CS_WAITDELIM ) {
+           if ( *ptr==',' ) 
+               state=CS_WAITKEY; 
+           else if ( !isspace(*ptr) )
+               elog(ERROR,"Syntax error in position %d near '%c'", ptr-VARDATA(in), *ptr);
+       } else if ( state == CS_INESC ) {
+           state=CS_INVALUE;
+       } else if ( state == CS_IN2ESC ) {
+           state=CS_IN2VALUE;
+       } else 
+           elog(ERROR,"Bad parser state: %d at position %d near '%c'", state, ptr-VARDATA(in), *ptr);
+       ptr++;
+   }
+
+   if (state==CS_IN2VALUE) {
+       mptr->value = nstrdup(begin, ptr-begin);
+       mptr++;
+   } else if ( !(state==CS_WAITDELIM || state==CS_WAITKEY) ) 
+       elog(ERROR,"Unexpected end of line");
+}
+
+


diff --git a/contrib/tsearch2/query.c b/contrib/tsearch2/query.c

new file mode 100644 (file)

index 0000000..8e714f2


--- /dev/null
+++ b/contrib/tsearch2/query.c
@@ -0,0 +1,862 @@
+/*
+ * IO definitions for tsquery and mtsquery. This type
+ * are identical, but for parsing mtsquery used parser for text
+ * and also morphology is used.
+ * Internal structure:
+ * query tree, then string with original value.
+ * Query tree with plain view. It's means that in array of nodes
+ * right child is always next and left position = item+item->left
+ * Teodor Sigaev 
+ */
+#include "postgres.h"
+
+#include 
+#include 
+
+#include "access/gist.h"
+#include "access/itup.h"
+#include "access/rtree.h"
+#include "utils/elog.h"
+#include "utils/palloc.h"
+#include "utils/array.h"
+#include "utils/builtins.h"
+#include "storage/bufpage.h"
+
+#include "ts_cfg.h"
+#include "tsvector.h"
+#include "crc32.h"
+#include "query.h"
+#include "rewrite.h"
+#include "common.h"
+
+
+PG_FUNCTION_INFO_V1(tsquery_in);
+Datum      tsquery_in(PG_FUNCTION_ARGS);
+
+PG_FUNCTION_INFO_V1(tsquery_out);
+Datum      tsquery_out(PG_FUNCTION_ARGS);
+
+PG_FUNCTION_INFO_V1(exectsq);
+Datum      exectsq(PG_FUNCTION_ARGS);
+
+PG_FUNCTION_INFO_V1(rexectsq);
+Datum      rexectsq(PG_FUNCTION_ARGS);
+
+PG_FUNCTION_INFO_V1(tsquerytree);
+Datum      tsquerytree(PG_FUNCTION_ARGS);
+
+PG_FUNCTION_INFO_V1(to_tsquery);
+Datum      to_tsquery(PG_FUNCTION_ARGS);
+
+PG_FUNCTION_INFO_V1(to_tsquery_name);
+Datum      to_tsquery_name(PG_FUNCTION_ARGS);
+
+PG_FUNCTION_INFO_V1(to_tsquery_current);
+Datum      to_tsquery_current(PG_FUNCTION_ARGS);
+
+#define END            0
+#define ERR            1
+#define VAL            2
+#define OPR            3
+#define OPEN       4
+#define CLOSE      5
+#define VALTRUE        6           /* for stop words */
+#define VALFALSE   7
+
+/* parser's states */
+#define WAITOPERAND 1
+#define WAITOPERATOR   2
+
+/*
+ * node of query tree, also used
+ * for storing polish notation in parser
+ */
+typedef struct NODE
+{
+   int2        weight;
+   int2        type;
+   int4        val;
+   int2        distance;
+   int2        length;
+   struct NODE *next;
+}  NODE;
+
+typedef struct
+{
+   char       *buf;
+   int4        state;
+   int4        count;
+   /* reverse polish notation in list (for temprorary usage) */
+   NODE       *str;
+   /* number in str */
+   int4        num;
+
+   /* user-friendly operand */
+   int4        lenop;
+   int4        sumlen;
+   char       *op;
+   char       *curop;
+
+   /* state for value's parser */
+   TI_IN_STATE valstate;
+
+   /* tscfg */
+   int cfg_id;
+}  QPRS_STATE;
+
+static char*
+get_weight(char *buf, int2 *weight) {
+   *weight = 0;
+
+   if ( *buf != ':' )
+       return buf;
+
+   buf++;
+   while( *buf ) {
+       switch(tolower(*buf)) {
+           case 'a': *weight |= 1<<3; break; 
+           case 'b': *weight |= 1<<2; break; 
+           case 'c': *weight |= 1<<1; break; 
+           case 'd': *weight |= 1;    break;
+           default: return buf; 
+       }
+       buf++;
+   }
+   
+   return buf;
+}
+
+/*
+ * get token from query string
+ */
+static int4
+gettoken_query(QPRS_STATE * state, int4 *val, int4 *lenval, char **strval, int2 *weight)
+{
+   while (1)
+   {
+       switch (state->state)
+       {
+           case WAITOPERAND:
+               if (*(state->buf) == '!')
+               {
+                   (state->buf)++;
+                   *val = (int4) '!';
+                   return OPR;
+               }
+               else if (*(state->buf) == '(')
+               {
+                   state->count++;
+                   (state->buf)++;
+                   return OPEN;
+               } else if ( *(state->buf) == ':' ) {
+                   elog(ERROR,"Error at start of operand"); 
+               } else if (*(state->buf) != ' ') {
+                   state->valstate.prsbuf = state->buf;
+                   state->state = WAITOPERATOR;
+                   if (gettoken_tsvector(&(state->valstate)))
+                   {
+                       *strval = state->valstate.word;
+                       *lenval = state->valstate.curpos - state->valstate.word;
+                       state->buf = get_weight(state->valstate.prsbuf, weight);
+                       return VAL;
+                   }
+                   else
+                       elog(ERROR, "No operand");
+               }
+               break;
+           case WAITOPERATOR:
+               if (*(state->buf) == '&' || *(state->buf) == '|')
+               {
+                   state->state = WAITOPERAND;
+                   *val = (int4) *(state->buf);
+                   (state->buf)++;
+                   return OPR;
+               }
+               else if (*(state->buf) == ')')
+               {
+                   (state->buf)++;
+                   state->count--;
+                   return (state->count < 0) ? ERR : CLOSE;
+               }
+               else if (*(state->buf) == '\0')
+                   return (state->count) ? ERR : END;
+               else if (*(state->buf) != ' ')
+                   return ERR;
+               break;
+           default:
+               return ERR;
+               break;
+       }
+       (state->buf)++;
+   }
+   return END;
+}
+
+/*
+ * push new one in polish notation reverse view
+ */
+static void
+pushquery(QPRS_STATE * state, int4 type, int4 val, int4 distance, int4 lenval, int2 weight)
+{
+   NODE       *tmp = (NODE *) palloc(sizeof(NODE));
+
+   tmp->weight = weight;
+   tmp->type = type;
+   tmp->val = val;
+   if (distance >= MAXSTRPOS)
+       elog(ERROR, "Value is too big");
+   if (lenval >= MAXSTRLEN)
+       elog(ERROR, "Operand is too long");
+   tmp->distance = distance;
+   tmp->length = lenval;
+   tmp->next = state->str;
+   state->str = tmp;
+   state->num++;
+}
+
+/*
+ * This function is used for tsquery parsing
+ */
+static void
+pushval_asis(QPRS_STATE * state, int type, char *strval, int lenval, int2 weight)
+{
+   if (lenval >= MAXSTRLEN)
+       elog(ERROR, "Word is too long");
+
+   pushquery(state, type, crc32_sz((uint8 *) strval, lenval),
+             state->curop - state->op, lenval, weight);
+
+   while (state->curop - state->op + lenval + 1 >= state->lenop)
+   {
+       int4        tmp = state->curop - state->op;
+
+       state->lenop *= 2;
+       state->op = (char *) repalloc((void *) state->op, state->lenop);
+       state->curop = state->op + tmp;
+   }
+   memcpy((void *) state->curop, (void *) strval, lenval);
+   state->curop += lenval;
+   *(state->curop) = '\0';
+   state->curop++;
+   state->sumlen += lenval + 1;
+   return;
+}
+
+/*
+ * This function is used for morph parsing
+ */
+static void
+pushval_morph(QPRS_STATE * state, int typeval, char *strval, int lenval, int2 weight)
+{
+   int4        count = 0;
+   PRSTEXT         prs;
+
+   prs.lenwords = 32;
+   prs.curwords = 0;
+   prs.pos = 0;
+   prs.words = (WORD *) palloc(sizeof(WORD) * prs.lenwords);
+
+   parsetext_v2(findcfg(state->cfg_id), &prs, strval, lenval);
+
+   for(count=0;count
+       pushval_asis(state, VAL, prs.words[count].word, prs.words[count].len, weight);
+       pfree( prs.words[count].word );
+       if (count)
+           pushquery(state, OPR, (int4) '&', 0, 0, 0 );
+   }   
+   pfree(prs.words);
+
+   /* XXX */
+   if ( prs.curwords==0 ) 
+       pushval_asis(state, VALTRUE, 0, 0, 0);
+}
+
+#define STACKDEPTH 32
+/*
+ * make polish notaion of query
+ */
+static int4
+makepol(QPRS_STATE * state, void (*pushval) (QPRS_STATE *, int, char *, int, int2))
+{
+   int4        val,
+               type;
+   int4        lenval;
+   char       *strval;
+   int4        stack[STACKDEPTH];
+   int4        lenstack = 0;
+   int2        weight;
+
+   while ((type = gettoken_query(state, &val, &lenval, &strval, &weight)) != END)
+   {
+       switch (type)
+       {
+           case VAL:
+               (*pushval) (state, VAL, strval, lenval, weight);
+               while (lenstack && (stack[lenstack - 1] == (int4) '&' ||
+                                   stack[lenstack - 1] == (int4) '!'))
+               {
+                   lenstack--;
+                   pushquery(state, OPR, stack[lenstack], 0, 0, 0);
+               }
+               break;
+           case OPR:
+               if (lenstack && val == (int4) '|')
+                   pushquery(state, OPR, val, 0, 0, 0);
+               else
+               {
+                   if (lenstack == STACKDEPTH)
+                       elog(ERROR, "Stack too short");
+                   stack[lenstack] = val;
+                   lenstack++;
+               }
+               break;
+           case OPEN:
+               if (makepol(state, pushval) == ERR)
+                   return ERR;
+               if (lenstack && (stack[lenstack - 1] == (int4) '&' ||
+                                stack[lenstack - 1] == (int4) '!'))
+               {
+                   lenstack--;
+                   pushquery(state, OPR, stack[lenstack], 0, 0, 0);
+               }
+               break;
+           case CLOSE:
+               while (lenstack)
+               {
+                   lenstack--;
+                   pushquery(state, OPR, stack[lenstack], 0, 0, 0);
+               };
+               return END;
+               break;
+           case ERR:
+           default:
+               elog(ERROR, "Syntax error");
+               return ERR;
+
+       }
+   }
+   while (lenstack)
+   {
+       lenstack--;
+       pushquery(state, OPR, stack[lenstack], 0, 0, 0);
+   };
+   return END;
+}
+
+typedef struct
+{
+   WordEntry  *arrb;
+   WordEntry  *arre;
+   char       *values;
+   char       *operand;
+}  CHKVAL;
+
+/*
+ * compare 2 string values
+ */
+static int4
+ValCompare(CHKVAL * chkval, WordEntry * ptr, ITEM * item)
+{
+   if (ptr->len == item->length)
+       return strncmp(
+                      &(chkval->values[ptr->pos]),
+                      &(chkval->operand[item->distance]),
+                      item->length);
+
+   return (ptr->len > item->length) ? 1 : -1;
+}
+
+/*
+ * check weight info
+ */
+static bool
+checkclass_str(CHKVAL * chkval, WordEntry * val, ITEM * item) {
+   WordEntryPos *ptr = (WordEntryPos*) (chkval->values+val->pos+SHORTALIGN(val->len)+sizeof(uint16));
+   uint16  len = *( (uint16*) (chkval->values+val->pos+SHORTALIGN(val->len)) );
+   while (len--) {
+       if ( item->weight & ( 1<weight ) )
+           return true;
+       ptr++;
+   }
+   return false; 
+}
+
+/*
+ * is there value 'val' in array or not ?
+ */
+static bool
+checkcondition_str(void *checkval, ITEM * val)
+{
+   WordEntry  *StopLow = ((CHKVAL *) checkval)->arrb;
+   WordEntry  *StopHigh = ((CHKVAL *) checkval)->arre;
+   WordEntry  *StopMiddle;
+   int         difference;
+
+   /* Loop invariant: StopLow <= val < StopHigh */
+
+   while (StopLow < StopHigh)
+   {
+       StopMiddle = StopLow + (StopHigh - StopLow) / 2;
+       difference = ValCompare((CHKVAL *) checkval, StopMiddle, val);
+       if (difference == 0)
+           return ( val->weight && StopMiddle->haspos ) ? 
+               checkclass_str((CHKVAL *) checkval,StopMiddle, val) : true;
+       else if (difference < 0)
+           StopLow = StopMiddle + 1;
+       else
+           StopHigh = StopMiddle;
+   }
+
+   return (false);
+}
+
+/*
+ * check for boolean condition
+ */
+bool
+TS_execute(ITEM * curitem, void *checkval, bool calcnot, bool (*chkcond) (void *checkval, ITEM * val))
+{
+   if (curitem->type == VAL)
+       return (*chkcond) (checkval, curitem);
+   else if (curitem->val == (int4) '!')
+   {
+       return (calcnot) ?
+           ((TS_execute(curitem + 1, checkval, calcnot, chkcond)) ? false : true)
+           : true;
+   }
+   else if (curitem->val == (int4) '&')
+   {
+       if (TS_execute(curitem + curitem->left, checkval, calcnot, chkcond))
+           return TS_execute(curitem + 1, checkval, calcnot, chkcond);
+       else
+           return false;
+   }
+   else
+   {                           /* |-operator */
+       if (TS_execute(curitem + curitem->left, checkval, calcnot, chkcond))
+           return true;
+       else
+           return TS_execute(curitem + 1, checkval, calcnot, chkcond);
+   }
+   return false;
+}
+
+/*
+ * boolean operations
+ */
+Datum
+rexectsq(PG_FUNCTION_ARGS)
+{
+   return DirectFunctionCall2(
+                              exectsq,
+                              PG_GETARG_DATUM(1),
+                              PG_GETARG_DATUM(0)
+       );
+}
+
+Datum
+exectsq(PG_FUNCTION_ARGS)
+{
+   tsvector       *val = (tsvector *) DatumGetPointer(PG_DETOAST_DATUM(PG_GETARG_DATUM(0)));
+   QUERYTYPE  *query = (QUERYTYPE *) DatumGetPointer(PG_DETOAST_DATUM(PG_GETARG_DATUM(1)));
+   CHKVAL      chkval;
+   bool        result;
+
+   if (!val->size || !query->size)
+   {
+       PG_FREE_IF_COPY(val, 0);
+       PG_FREE_IF_COPY(query, 1);
+       PG_RETURN_BOOL(false);
+   }
+
+   chkval.arrb = ARRPTR(val);
+   chkval.arre = chkval.arrb + val->size;
+   chkval.values = STRPTR(val);
+   chkval.operand = GETOPERAND(query);
+   result = TS_execute(
+                    GETQUERY(query),
+                    &chkval,
+                    true,
+                    checkcondition_str
+       );
+
+   PG_FREE_IF_COPY(val, 0);
+   PG_FREE_IF_COPY(query, 1);
+   PG_RETURN_BOOL(result);
+}
+
+/*
+ * find left operand in polish notation view
+ */
+static void
+findoprnd(ITEM * ptr, int4 *pos)
+{
+#ifdef BS_DEBUG
+   elog(DEBUG3, (ptr[*pos].type == OPR) ?
+        "%d  %c" : "%d  %d ", *pos, ptr[*pos].val);
+#endif
+   if (ptr[*pos].type == VAL || ptr[*pos].type == VALTRUE)
+   {
+       ptr[*pos].left = 0;
+       (*pos)++;
+   }
+   else if (ptr[*pos].val == (int4) '!')
+   {
+       ptr[*pos].left = 1;
+       (*pos)++;
+       findoprnd(ptr, pos);
+   }
+   else
+   {
+       ITEM       *curitem = &ptr[*pos];
+       int4        tmp = *pos;
+
+       (*pos)++;
+       findoprnd(ptr, pos);
+       curitem->left = *pos - tmp;
+       findoprnd(ptr, pos);
+   }
+}
+
+
+/*
+ * input
+ */
+static QUERYTYPE *
+queryin(char *buf, void (*pushval) (QPRS_STATE *, int, char *, int, int2), int cfg_id)
+{
+   QPRS_STATE  state;
+   int4        i;
+   QUERYTYPE  *query;
+   int4        commonlen;
+   ITEM       *ptr;
+   NODE       *tmp;
+   int4        pos = 0;
+
+#ifdef BS_DEBUG
+   char        pbuf[16384],
+              *cur;
+#endif
+
+   /* init state */
+   state.buf = buf;
+   state.state = WAITOPERAND;
+   state.count = 0;
+   state.num = 0;
+   state.str = NULL;
+   state.cfg_id=cfg_id;
+
+   /* init value parser's state */
+   state.valstate.oprisdelim = true;
+   state.valstate.len = 32;
+   state.valstate.word = (char *) palloc(state.valstate.len);
+
+   /* init list of operand */
+   state.sumlen = 0;
+   state.lenop = 64;
+   state.curop = state.op = (char *) palloc(state.lenop);
+   *(state.curop) = '\0';
+
+   /* parse query & make polish notation (postfix, but in reverse order) */
+   makepol(&state, pushval);
+   pfree(state.valstate.word);
+   if (!state.num)
+       elog(ERROR, "Empty query");
+
+   /* make finish struct */
+   commonlen = COMPUTESIZE(state.num, state.sumlen);
+   query = (QUERYTYPE *) palloc(commonlen);
+   query->len = commonlen;
+   query->size = state.num;
+   ptr = GETQUERY(query);
+
+   /* set item in polish notation */
+   for (i = 0; i < state.num; i++)
+   {
+       ptr[i].weight = state.str->weight;
+       ptr[i].type = state.str->type;
+       ptr[i].val = state.str->val;
+       ptr[i].distance = state.str->distance;
+       ptr[i].length = state.str->length;
+       tmp = state.str->next;
+       pfree(state.str);
+       state.str = tmp;
+   }
+
+   /* set user friendly-operand view */
+   memcpy((void *) GETOPERAND(query), (void *) state.op, state.sumlen);
+   pfree(state.op);
+
+   /* set left operand's position for every operator */
+   pos = 0;
+   findoprnd(ptr, &pos);
+
+#ifdef BS_DEBUG
+   cur = pbuf;
+   *cur = '\0';
+   for (i = 0; i < query->size; i++)
+   {
+       if (ptr[i].type == OPR)
+           sprintf(cur, "%c(%d) ", ptr[i].val, ptr[i].left);
+       else
+           sprintf(cur, "%d(%s) ", ptr[i].val, GETOPERAND(query) + ptr[i].distance);
+       cur = strchr(cur, '\0');
+   }
+   elog(DEBUG3, "POR: %s", pbuf);
+#endif
+
+   return query;
+}
+
+/*
+ * in without morphology
+ */
+Datum
+tsquery_in(PG_FUNCTION_ARGS)
+{
+   PG_RETURN_POINTER(queryin((char *) PG_GETARG_POINTER(0), pushval_asis, 0));
+}
+
+/*
+ * out function
+ */
+typedef struct
+{
+   ITEM       *curpol;
+   char       *buf;
+   char       *cur;
+   char       *op;
+   int4        buflen;
+}  INFIX;
+
+#define RESIZEBUF(inf,addsize) \
+while( ( inf->cur - inf->buf ) + addsize + 1 >= inf->buflen ) \
+{ \
+   int4 len = inf->cur - inf->buf; \
+   inf->buflen *= 2; \
+   inf->buf = (char*) repalloc( (void*)inf->buf, inf->buflen ); \
+   inf->cur = inf->buf + len; \
+}
+
+/*
+ * recursive walk on tree and print it in
+ * infix (human-readable) view
+ */
+static void
+infix(INFIX * in, bool first)
+{
+   if (in->curpol->type == VAL)
+   {
+       char       *op = in->op + in->curpol->distance;
+
+       RESIZEBUF(in, in->curpol->length * 2 + 2 + 5);
+       *(in->cur) = '\'';
+       in->cur++;
+       while (*op)
+       {
+           if (*op == '\'')
+           {
+               *(in->cur) = '\\';
+               in->cur++;
+           }
+           *(in->cur) = *op;
+           op++;
+           in->cur++;
+       }
+       *(in->cur) = '\'';
+       in->cur++;
+       if ( in->curpol->weight ) {
+           *(in->cur) = ':'; in->cur++;
+           if ( in->curpol->weight & (1<<3) ) { *(in->cur) = 'A'; in->cur++; }
+           if ( in->curpol->weight & (1<<2) ) { *(in->cur) = 'B'; in->cur++; }
+           if ( in->curpol->weight & (1<<1) ) { *(in->cur) = 'C'; in->cur++; }
+           if ( in->curpol->weight & 1 )      { *(in->cur) = 'D'; in->cur++; }
+       }
+       *(in->cur) = '\0';
+       in->curpol++;
+   }
+   else if (in->curpol->val == (int4) '!')
+   {
+       bool        isopr = false;
+
+       RESIZEBUF(in, 1);
+       *(in->cur) = '!';
+       in->cur++;
+       *(in->cur) = '\0';
+       in->curpol++;
+       if (in->curpol->type == OPR)
+       {
+           isopr = true;
+           RESIZEBUF(in, 2);
+           sprintf(in->cur, "( ");
+           in->cur = strchr(in->cur, '\0');
+       }
+       infix(in, isopr);
+       if (isopr)
+       {
+           RESIZEBUF(in, 2);
+           sprintf(in->cur, " )");
+           in->cur = strchr(in->cur, '\0');
+       }
+   }
+   else
+   {
+       int4        op = in->curpol->val;
+       INFIX       nrm;
+
+       in->curpol++;
+       if (op == (int4) '|' && !first)
+       {
+           RESIZEBUF(in, 2);
+           sprintf(in->cur, "( ");
+           in->cur = strchr(in->cur, '\0');
+       }
+
+       nrm.curpol = in->curpol;
+       nrm.op = in->op;
+       nrm.buflen = 16;
+       nrm.cur = nrm.buf = (char *) palloc(sizeof(char) * nrm.buflen);
+
+       /* get right operand */
+       infix(&nrm, false);
+
+       /* get & print left operand */
+       in->curpol = nrm.curpol;
+       infix(in, false);
+
+       /* print operator & right operand */
+       RESIZEBUF(in, 3 + (nrm.cur - nrm.buf));
+       sprintf(in->cur, " %c %s", op, nrm.buf);
+       in->cur = strchr(in->cur, '\0');
+       pfree(nrm.buf);
+
+       if (op == (int4) '|' && !first)
+       {
+           RESIZEBUF(in, 2);
+           sprintf(in->cur, " )");
+           in->cur = strchr(in->cur, '\0');
+       }
+   }
+}
+
+
+Datum
+tsquery_out(PG_FUNCTION_ARGS)
+{
+   QUERYTYPE  *query = (QUERYTYPE *) DatumGetPointer(PG_DETOAST_DATUM(PG_GETARG_DATUM(0)));
+   INFIX       nrm;
+
+   if (query->size == 0)
+   {
+       char       *b = palloc(1);
+
+       *b = '\0';
+       PG_RETURN_POINTER(b);
+   }
+   nrm.curpol = GETQUERY(query);
+   nrm.buflen = 32;
+   nrm.cur = nrm.buf = (char *) palloc(sizeof(char) * nrm.buflen);
+   *(nrm.cur) = '\0';
+   nrm.op = GETOPERAND(query);
+   infix(&nrm, true);
+
+   PG_FREE_IF_COPY(query, 0);
+   PG_RETURN_POINTER(nrm.buf);
+}
+
+/*
+ * debug function, used only for view query
+ * which will be executed in non-leaf pages in index
+ */
+Datum
+tsquerytree(PG_FUNCTION_ARGS)
+{
+   QUERYTYPE  *query = (QUERYTYPE *) DatumGetPointer(PG_DETOAST_DATUM(PG_GETARG_DATUM(0)));
+   INFIX       nrm;
+   text       *res;
+   ITEM       *q;
+   int4        len;
+
+
+   if (query->size == 0)
+   {
+       res = (text *) palloc(VARHDRSZ);
+       VARATT_SIZEP(res) = VARHDRSZ;
+       PG_RETURN_POINTER(res);
+   }
+
+   q = clean_NOT_v2(GETQUERY(query), &len);
+
+   if (!q)
+   {
+       res = (text *) palloc(1 + VARHDRSZ);
+       VARATT_SIZEP(res) = 1 + VARHDRSZ;
+       *((char *) VARDATA(res)) = 'T';
+   }
+   else
+   {
+       nrm.curpol = q;
+       nrm.buflen = 32;
+       nrm.cur = nrm.buf = (char *) palloc(sizeof(char) * nrm.buflen);
+       *(nrm.cur) = '\0';
+       nrm.op = GETOPERAND(query);
+       infix(&nrm, true);
+
+       res = (text *) palloc(nrm.cur - nrm.buf + VARHDRSZ);
+       VARATT_SIZEP(res) = nrm.cur - nrm.buf + VARHDRSZ;
+       strncpy(VARDATA(res), nrm.buf, nrm.cur - nrm.buf);
+       pfree(q);
+   }
+
+   PG_FREE_IF_COPY(query, 0);
+
+   PG_RETURN_POINTER(res);
+}
+
+Datum
+to_tsquery(PG_FUNCTION_ARGS) {
+   text    *in = PG_GETARG_TEXT_P(1);
+   char *str;
+   QUERYTYPE  *query;
+   ITEM       *res;
+   int4        len;
+
+   str=text2char(in);
+   PG_FREE_IF_COPY(in,1);
+
+   query = queryin(str, pushval_morph, PG_GETARG_INT32(0));
+   res = clean_fakeval_v2(GETQUERY(query), &len);
+   if (!res)
+   {
+       query->len = HDRSIZEQT;
+       query->size = 0;
+       PG_RETURN_POINTER(query);
+   }
+   memcpy((void *) GETQUERY(query), (void *) res, len * sizeof(ITEM));
+   pfree(res);
+   PG_RETURN_POINTER(query);
+}
+
+Datum
+to_tsquery_name(PG_FUNCTION_ARGS) {
+   text *name=PG_GETARG_TEXT_P(0);
+   Datum res= DirectFunctionCall2(
+       to_tsquery,
+       Int32GetDatum( name2id_cfg(name) ),
+       PG_GETARG_DATUM(1)
+   );
+   
+   PG_FREE_IF_COPY(name,1);
+   PG_RETURN_DATUM(res);
+}
+
+Datum
+to_tsquery_current(PG_FUNCTION_ARGS) {
+   PG_RETURN_DATUM( DirectFunctionCall2(
+       to_tsquery,
+       Int32GetDatum( get_currcfg() ),
+       PG_GETARG_DATUM(0)
+   ));
+}
+
+


diff --git a/contrib/tsearch2/query.h b/contrib/tsearch2/query.h

new file mode 100644 (file)

index 0000000..c0715a2


--- /dev/null
+++ b/contrib/tsearch2/query.h
@@ -0,0 +1,55 @@
+#ifndef __QUERY_H__
+#define __QUERY_H__
+/*
+#define BS_DEBUG
+*/
+
+
+/*
+ * item in polish notation with back link
+ * to left operand
+ */
+typedef struct ITEM
+{
+   int8        type;
+   int8        weight;
+   int2        left;
+   int4        val;
+   /* user-friendly value, must correlate with WordEntry */
+   uint32  
+       unused:1,
+       length:11,
+       distance:20;
+}  ITEM;
+
+/*
+ *Storage:
+ * (len)(size)(array of ITEM)(array of operand in user-friendly form)
+ */
+typedef struct
+{
+   int4        len;
+   int4        size;
+   char        data[1];
+}  QUERYTYPE;
+
+#define HDRSIZEQT  ( 2*sizeof(int4) )
+#define COMPUTESIZE(size,lenofoperand) ( HDRSIZEQT + size * sizeof(ITEM) + lenofoperand )
+#define GETQUERY(x)  (ITEM*)( (char*)(x)+HDRSIZEQT )
+#define GETOPERAND(x)  ( (char*)GETQUERY(x) + ((QUERYTYPE*)x)->size * sizeof(ITEM) )
+
+#define ISOPERATOR(x) ( (x)=='!' || (x)=='&' || (x)=='|' || (x)=='(' || (x)==')' )
+
+#define END                0
+#define ERR                1
+#define VAL                2
+#define OPR                3
+#define OPEN           4
+#define CLOSE          5
+#define VALTRUE            6       /* for stop words */
+#define VALFALSE       7
+
+bool TS_execute(ITEM * curitem, void *checkval,
+       bool calcnot, bool (*chkcond) (void *checkval, ITEM * val));
+
+#endif


diff --git a/contrib/tsearch2/rank.c b/contrib/tsearch2/rank.c

new file mode 100644 (file)

index 0000000..b73f400


--- /dev/null
+++ b/contrib/tsearch2/rank.c
@@ -0,0 +1,591 @@
+/*
+ * Relevation
+ * Teodor Sigaev 
+ */
+#include "postgres.h"
+#include 
+
+#include "access/gist.h"
+#include "access/itup.h"
+#include "utils/elog.h"
+#include "utils/palloc.h"
+#include "utils/builtins.h"
+#include "fmgr.h"
+#include "funcapi.h"
+#include "storage/bufpage.h"
+#include "executor/spi.h"
+#include "commands/trigger.h"
+#include "nodes/pg_list.h"
+#include "catalog/namespace.h"
+
+#include "utils/array.h"
+
+#include "tsvector.h"
+#include "query.h"
+#include "common.h"
+
+PG_FUNCTION_INFO_V1(rank);
+Datum      rank(PG_FUNCTION_ARGS);
+
+PG_FUNCTION_INFO_V1(rank_def);
+Datum      rank_def(PG_FUNCTION_ARGS);
+
+PG_FUNCTION_INFO_V1(rank_cd);
+Datum      rank_cd(PG_FUNCTION_ARGS);
+
+PG_FUNCTION_INFO_V1(rank_cd_def);
+Datum      rank_cd_def(PG_FUNCTION_ARGS);
+
+PG_FUNCTION_INFO_V1(get_covers);
+Datum      get_covers(PG_FUNCTION_ARGS);
+
+static float weights[]={0.1, 0.2, 0.4, 1.0};
+
+#define wpos(wep)  ( w[ ((WordEntryPos*)(wep))->weight ] )
+
+#define DEF_NORM_METHOD    0
+
+/*
+ * Returns a weight of a word collocation
+ */
+static float4 word_distance ( int4 w ) {
+   if ( w>100 )
+   return 1e-30;
+
+   return 1.0/(1.005+0.05*exp( ((float4)w)/1.5-2) );
+}
+
+static int
+cnt_length( tsvector *t ) {
+   WordEntry   *ptr=ARRPTR(t), *end=(WordEntry*)STRPTR(t);
+   int len = 0, clen;
+
+   while(ptr < end) {
+       if ( (clen=POSDATALEN(t, ptr)) == 0 )
+           len += 1;
+       else
+           len += clen;
+       ptr++;
+   }
+
+   return len;
+}
+
+static int4
+WordECompareITEM(char *eval, char *qval, WordEntry * ptr, ITEM * item) {
+        if (ptr->len == item->length)
+                return strncmp(
+                                           eval + ptr->pos,
+                                           qval + item->distance,
+                                           item->length);
+
+        return (ptr->len > item->length) ? 1 : -1;
+}
+
+static WordEntry*
+find_wordentry(tsvector *t, QUERYTYPE *q, ITEM *item) {
+        WordEntry  *StopLow = ARRPTR(t);
+        WordEntry  *StopHigh = (WordEntry*)STRPTR(t);
+        WordEntry  *StopMiddle;
+        int                     difference;
+
+        /* Loop invariant: StopLow <= item < StopHigh */
+
+        while (StopLow < StopHigh)
+        {
+                StopMiddle = StopLow + (StopHigh - StopLow) / 2;
+                difference = WordECompareITEM(STRPTR(t), GETOPERAND(q), StopMiddle, item);
+                if (difference == 0)
+                        return StopMiddle;
+                else if (difference < 0)
+                        StopLow = StopMiddle + 1;
+                else
+                        StopHigh = StopMiddle;
+        }
+
+        return NULL;
+}
+
+static WordEntryPos    POSNULL[]={
+   {0,0},
+   {0,MAXENTRYPOS-1}
+};
+
+static float
+calc_rank_and(float *w, tsvector *t, QUERYTYPE *q) {
+   uint16 **pos=(uint16**)palloc(sizeof(uint16*) * q->size);
+   int i,k,l,p;
+   WordEntry *entry;
+   WordEntryPos    *post,*ct;
+   int4    dimt,lenct,dist;
+   float res=-1.0;
+   ITEM    *item=GETQUERY(q);
+
+   memset(pos,0,sizeof(uint16**) * q->size);
+   *(uint16*)POSNULL = lengthof(POSNULL)-1;
+
+   for(i=0; isize; i++) {
+       
+       if ( item[i].type != VAL )
+           continue;
+
+       entry=find_wordentry(t,q,&(item[i]));
+       if ( !entry )
+           continue;
+
+       if ( entry->haspos )
+           pos[i] = (uint16*)_POSDATAPTR(t,entry);
+       else
+           pos[i] = (uint16*)POSNULL;
+
+
+       dimt = *(uint16*)(pos[i]);
+       post = (WordEntryPos*)(pos[i]+1);
+       for( k=0; k
+           if ( !pos[k] ) continue;
+           lenct = *(uint16*)(pos[k]);
+           ct = (WordEntryPos*)(pos[k]+1);
+           for(l=0; l
+               for(p=0; p
+                   dist = abs( post[l].pos - ct[p].pos );
+                   if ( dist || (dist==0 && (pos[i]==(uint16*)POSNULL || pos[k]==(uint16*)POSNULL) ) ) {
+                       float curw; 
+                       if ( !dist ) dist=MAXENTRYPOS;  
+                       curw= sqrt( wpos(&(post[l])) * wpos( &(ct[p]) ) * word_distance(dist) );
+                       res = ( res < 0 ) ? curw : 1.0 - ( 1.0 - res ) * ( 1.0 - curw );
+                   }
+               }
+           }
+       }
+   }
+   pfree(pos);
+   return res; 
+}
+
+static float
+calc_rank_or(float *w, tsvector *t, QUERYTYPE *q) {
+   WordEntry *entry;
+   WordEntryPos    *post;
+   int4    dimt,j,i;
+   float res=-1.0;
+   ITEM    *item=GETQUERY(q);
+
+   *(uint16*)POSNULL = lengthof(POSNULL)-1;
+
+   for(i=0; isize; i++) {
+       if ( item[i].type != VAL )
+           continue;
+
+       entry=find_wordentry(t,q,&(item[i]));
+       if ( !entry )
+           continue;
+
+       if ( entry->haspos ) {
+           dimt = POSDATALEN(t,entry);
+           post = POSDATAPTR(t,entry);
+       } else {
+           dimt = *(uint16*)POSNULL;
+           post = POSNULL+1;
+       }
+
+       for(j=0;j
+           if ( res < 0 )
+               res = wpos( &(post[j]) );
+           else
+               res = 1.0 - ( 1.0-res ) * ( 1.0-wpos( &(post[j]) ) );
+       }
+   }
+   return res;
+}
+
+static float
+calc_rank(float *w, tsvector *t, QUERYTYPE *q, int4 method) {
+   ITEM *item = GETQUERY(q);
+   float res=0.0;
+
+   if (!t->size || !q->size)
+       return 0.0;
+
+   res = ( item->type != VAL && item->val == (int4) '&' ) ?
+       calc_rank_and(w,t,q) : calc_rank_or(w,t,q);
+
+   if ( res < 0 )
+       res = 1e-20;
+
+        switch(method) {
+       case 0: break;
+       case 1: res /= log((float)cnt_length(t)); break;
+       case 2: res /= (float)cnt_length(t); break;
+       default:
+       elog(ERROR,"Unknown normalization method: %d",method);
+        }
+
+   return res;
+}
+
+Datum
+rank(PG_FUNCTION_ARGS) {
+   ArrayType  *win = (ArrayType *) PG_DETOAST_DATUM(PG_GETARG_DATUM(0));
+   tsvector       *txt = (tsvector *) PG_DETOAST_DATUM(PG_GETARG_DATUM(1));
+   QUERYTYPE  *query = (QUERYTYPE *) PG_DETOAST_DATUM(PG_GETARG_DATUM(2));
+   int method=DEF_NORM_METHOD;
+   float res=0.0;
+   float ws[ lengthof(weights) ];
+   int i;
+
+   if ( ARR_NDIM(win) != 1 ) 
+       elog(ERROR,"Array of weight is not one dimentional");
+   if ( ARRNELEMS(win) < lengthof(weights) )
+        elog(ERROR,"Array of weight is too short");
+
+   for(i=0;i
+       ws[ i ] = ( ((float4*)ARR_DATA_PTR(win))[i] >= 0 ) ? ((float4*)ARR_DATA_PTR(win))[i] : weights[i];
+       if ( ws[ i ] > 1.0 ) 
+           elog(ERROR,"Weight out of range");
+   } 
+
+   if ( PG_NARGS() == 4 )
+       method=PG_GETARG_INT32(3);
+
+   res=calc_rank(ws, txt, query, method); 
+       
+   PG_FREE_IF_COPY(win, 0);
+   PG_FREE_IF_COPY(txt, 1);
+   PG_FREE_IF_COPY(query, 2);
+   PG_RETURN_FLOAT4(res);
+}
+
+Datum
+rank_def(PG_FUNCTION_ARGS) {
+   tsvector       *txt = (tsvector *) PG_DETOAST_DATUM(PG_GETARG_DATUM(0));
+   QUERYTYPE  *query = (QUERYTYPE *) PG_DETOAST_DATUM(PG_GETARG_DATUM(1));
+   float res=0.0;
+   int method=DEF_NORM_METHOD;
+
+   if ( PG_NARGS() == 3 )
+       method=PG_GETARG_INT32(2);
+
+   res=calc_rank(weights, txt, query, method); 
+       
+   PG_FREE_IF_COPY(txt, 0);
+   PG_FREE_IF_COPY(query, 1);
+   PG_RETURN_FLOAT4(res);
+}
+
+
+typedef struct {
+   ITEM    *item;
+   int32   pos;
+} DocRepresentation;
+
+static int
+compareDocR(const void *a, const void *b) {
+   if ( ((DocRepresentation *) a)->pos == ((DocRepresentation *) b)->pos )
+       return 1;
+   return ( ((DocRepresentation *) a)->pos > ((DocRepresentation *) b)->pos ) ? 1 : -1;
+}
+
+
+typedef struct {
+   DocRepresentation *doc;
+   int len;
+}  ChkDocR;
+
+static bool
+checkcondition_DR(void *checkval, ITEM *val) {
+   DocRepresentation *ptr = ((ChkDocR*)checkval)->doc;
+
+   while( ptr - ((ChkDocR*)checkval)->doc < ((ChkDocR*)checkval)->len ) {
+       if ( val == ptr->item )
+           return true;
+       ptr++;
+   }   
+
+   return false;
+}
+
+
+static bool
+Cover(DocRepresentation *doc, int len, QUERYTYPE *query, int *pos, int *p, int *q) {
+   int i;
+   DocRepresentation   *ptr,*f=(DocRepresentation*)0xffffffff;
+   ITEM    *item=GETQUERY(query);
+   int lastpos=*pos;
+   int oldq=*q;
+
+   *p=0x7fffffff;
+   *q=0;
+
+   for(i=0; isize; i++) {
+       if ( item->type != VAL ) {
+           item++;
+           continue;
+       }
+       ptr = doc + *pos;
+
+       while(ptr-doc
+           if ( ptr->item == item ) {
+               if ( ptr->pos > *q ) {
+                   *q = ptr->pos;
+                   lastpos= ptr - doc;
+               } 
+               break;
+           } 
+           ptr++;
+       }
+
+       item++;
+   }
+
+   if (*q==0 )
+       return false;
+
+   if (*q==oldq) { /* already check this pos */
+       (*pos)++;
+       return Cover(doc, len, query, pos,p,q);
+   } 
+
+   item=GETQUERY(query);
+   for(i=0; isize; i++) {
+       if ( item->type != VAL ) {
+           item++;
+           continue;
+       }
+       ptr = doc + lastpos;
+
+       while(ptr>=doc+*pos) {
+           if ( ptr->item == item ) {
+               if ( ptr->pos < *p ) {
+                   *p = ptr->pos;
+                   f=ptr;
+               }
+               break;
+           }
+           ptr--;
+       }
+       item++;
+   }
+ 
+   if ( *p<=*q ) {
+       ChkDocR ch = { f, (doc + lastpos)-f+1 };
+       *pos = f-doc+1;
+       if ( TS_execute(GETQUERY(query), &ch, false, checkcondition_DR) ) { 
+ /*elog(NOTICE,"OP:%d NP:%d P:%d Q:%d", *pos, lastpos, *p, *q);*/ 
+           return true;
+       } else
+           return Cover(doc, len, query, pos,p,q); 
+   }
+ 
+   return false;
+}
+
+static DocRepresentation*
+get_docrep(tsvector     *txt, QUERYTYPE  *query, int *doclen) {
+   ITEM    *item=GETQUERY(query);
+   WordEntry *entry;
+   WordEntryPos    *post;
+   int4    dimt,j,i;
+   int len=query->size*4,cur=0;
+   DocRepresentation *doc;
+
+   *(uint16*)POSNULL = lengthof(POSNULL)-1;
+   doc = (DocRepresentation*)palloc(sizeof(DocRepresentation)*len);
+   for(i=0; isize; i++) {
+       if ( item[i].type != VAL )
+           continue;
+
+       entry=find_wordentry(txt,query,&(item[i]));
+       if ( !entry )
+           continue;
+
+       if ( entry->haspos ) {
+           dimt = POSDATALEN(txt,entry);
+           post = POSDATAPTR(txt,entry);
+       } else {
+           dimt = *(uint16*)POSNULL;
+           post = POSNULL+1;
+       }
+
+       while( cur+dimt >= len ) {
+           len*=2;
+           doc = (DocRepresentation*)repalloc(doc,sizeof(DocRepresentation)*len);
+       }
+
+       for(j=0;j
+           doc[cur].item=&(item[i]);
+           doc[cur].pos=post[j].pos;
+           cur++;
+       }
+   }
+
+   *doclen=cur;
+   
+   if ( cur>0 ) {
+       if ( cur>1 ) 
+           qsort((void *) doc, cur, sizeof(DocRepresentation), compareDocR);
+       return doc;
+   }
+   
+   pfree(doc);
+   return NULL;
+}
+
+
+Datum
+rank_cd(PG_FUNCTION_ARGS) {
+   int K = PG_GETARG_INT32(0);
+   tsvector       *txt = (tsvector *) PG_DETOAST_DATUM(PG_GETARG_DATUM(1));
+   QUERYTYPE  *query = (QUERYTYPE *) PG_DETOAST_DATUM(PG_GETARG_DATUM(2));
+   int method=DEF_NORM_METHOD;
+   DocRepresentation   *doc;
+   float   res=0.0;
+   int p=0,q=0,len,cur;
+
+   doc = get_docrep(txt, query, &len);
+   if ( !doc ) {
+       PG_FREE_IF_COPY(txt, 1);
+       PG_FREE_IF_COPY(query, 2);
+       PG_RETURN_FLOAT4(0.0);
+   }
+
+   cur=0;
+   if (K<=0)
+       K=4;    
+   while( Cover(doc, len, query, &cur, &p, &q) ) 
+       res += ( q-p+1 > K ) ? ((float)K)/((float)(q-p+1)) : 1.0;
+
+   if ( PG_NARGS() == 4 )
+       method=PG_GETARG_INT32(3);
+
+        switch(method) {
+       case 0: break;
+       case 1: res /= log((float)cnt_length(txt)); break;
+       case 2: res /= (float)cnt_length(txt); break;
+       default:
+       elog(ERROR,"Unknown normalization method: %d",method);
+        }
+
+   pfree(doc);
+   PG_FREE_IF_COPY(txt, 1);
+   PG_FREE_IF_COPY(query, 2);
+
+   PG_RETURN_FLOAT4(res);
+}
+
+
+Datum
+rank_cd_def(PG_FUNCTION_ARGS) {
+   PG_RETURN_DATUM( DirectFunctionCall4(   
+       rank_cd,
+       Int32GetDatum(-1),
+       PG_GETARG_DATUM(0),
+       PG_GETARG_DATUM(1),
+       ( PG_NARGS() == 3 ) ? PG_GETARG_DATUM(2) : Int32GetDatum(DEF_NORM_METHOD)
+   )); 
+}
+
+/**************debug*************/
+
+typedef struct {
+   char    *w;
+   int2    len;
+   int2    pos;
+   int2    start;
+   int2    finish;
+} DocWord;
+
+static int
+compareDocWord(const void *a, const void *b) {
+   if ( ((DocWord *) a)->pos == ((DocWord *) b)->pos )
+       return 1;
+   return ( ((DocWord *) a)->pos > ((DocWord *) b)->pos ) ? 1 : -1;
+}
+
+
+Datum 
+get_covers(PG_FUNCTION_ARGS) {
+   tsvector     *txt = (tsvector *) PG_DETOAST_DATUM(PG_GETARG_DATUM(0));
+   QUERYTYPE  *query = (QUERYTYPE *) PG_DETOAST_DATUM(PG_GETARG_DATUM(1));
+   WordEntry       *pptr=ARRPTR(txt);
+   int i,dlen=0,j,cur=0,len=0,rlen;
+   DocWord *dw,*dwptr;
+   text    *out;
+   char *cptr;
+   DocRepresentation *doc;
+   int pos=0,p,q,olddwpos=0;
+   int ncover=1;
+
+   doc = get_docrep(txt, query, &rlen);
+
+   if ( !doc ) {
+       out=palloc(VARHDRSZ);
+       VARATT_SIZEP(out) = VARHDRSZ;
+       PG_FREE_IF_COPY(txt,0);
+       PG_FREE_IF_COPY(query,1);
+       PG_RETURN_POINTER(out);
+   }
+
+   for(i=0;isize;i++) {
+       if (!pptr[i].haspos)
+           elog(ERROR,"No pos info");
+        dlen += POSDATALEN(txt,&(pptr[i]));
+   }
+
+   dwptr=dw=palloc(sizeof(DocWord)*dlen);
+   memset(dw,0,sizeof(DocWord)*dlen);
+
+   for(i=0;isize;i++) {
+       WordEntryPos    *posdata = POSDATAPTR(txt,&(pptr[i]));
+       for(j=0;j
+           dw[cur].w=STRPTR(txt)+pptr[i].pos;  
+           dw[cur].len=pptr[i].len;    
+           dw[cur].pos=posdata[j].pos;
+           cur++;
+       }
+       len+=(pptr[i].len + 1) * (int)POSDATALEN(txt,&(pptr[i]));
+   }
+   qsort((void *) dw, dlen, sizeof(DocWord), compareDocWord);
+
+   while( Cover(doc, rlen, query, &pos, &p, &q) ) {
+       dwptr=dw+olddwpos;
+       while(dwptr->pos < p && dwptr-dw
+           dwptr++;
+       olddwpos=dwptr-dw;
+       dwptr->start=ncover;
+       while(dwptr->pos < q+1 && dwptr-dw
+           dwptr++;
+       (dwptr-1)->finish=ncover;
+       len+= 4 /* {}+two spaces */ + 2*16 /*numbers*/;
+       ncover++; 
+   } 
+   
+   out=palloc(VARHDRSZ+len);
+   cptr=((char*)out)+VARHDRSZ;
+   dwptr=dw;
+
+   while( dwptr-dw < dlen) {
+       if ( dwptr->start ) {
+           sprintf(cptr,"{%d ",dwptr->start);
+           cptr=strchr(cptr,'\0');
+       }
+       memcpy(cptr,dwptr->w,dwptr->len);
+       cptr+=dwptr->len;
+       *cptr=' ';
+       cptr++;
+       if ( dwptr->finish ) { 
+           sprintf(cptr,"}%d ",dwptr->finish);
+           cptr=strchr(cptr,'\0');
+       }
+       dwptr++;
+   }   
+
+   VARATT_SIZEP(out) = cptr - ((char*)out);
+   
+   pfree(dw);
+   pfree(doc);
+
+   PG_FREE_IF_COPY(txt,0);
+   PG_FREE_IF_COPY(query,1);
+   PG_RETURN_POINTER(out);
+}
+


diff --git a/contrib/tsearch2/rewrite.c b/contrib/tsearch2/rewrite.c

new file mode 100644 (file)

index 0000000..d5bc0f6


--- /dev/null
+++ b/contrib/tsearch2/rewrite.c
@@ -0,0 +1,292 @@
+/*
+ * Rewrite routines of query tree
+ * Teodor Sigaev 
+ */
+
+#include "postgres.h"
+
+#include 
+
+#include "access/gist.h"
+#include "access/itup.h"
+#include "access/rtree.h"
+#include "utils/elog.h"
+#include "utils/palloc.h"
+#include "utils/array.h"
+#include "utils/builtins.h"
+#include "storage/bufpage.h"
+
+#include "query.h"
+#include "rewrite.h"
+
+typedef struct NODE
+{
+   struct NODE *left;
+   struct NODE *right;
+   ITEM       *valnode;
+}  NODE;
+
+/*
+ * make query tree from plain view of query
+ */
+static NODE *
+maketree(ITEM * in)
+{
+   NODE       *node = (NODE *) palloc(sizeof(NODE));
+
+   node->valnode = in;
+   node->right = node->left = NULL;
+   if (in->type == OPR)
+   {
+       node->right = maketree(in + 1);
+       if (in->val != (int4) '!')
+           node->left = maketree(in + in->left);
+   }
+   return node;
+}
+
+typedef struct
+{
+   ITEM       *ptr;
+   int4        len;
+   int4        cur;
+}  PLAINTREE;
+
+static void
+plainnode(PLAINTREE * state, NODE * node)
+{
+   if (state->cur == state->len)
+   {
+       state->len *= 2;
+       state->ptr = (ITEM *) repalloc((void *) state->ptr, state->len * sizeof(ITEM));
+   }
+   memcpy((void *) &(state->ptr[state->cur]), (void *) node->valnode, sizeof(ITEM));
+   if (node->valnode->type == VAL)
+       state->cur++;
+   else if (node->valnode->val == (int4) '!')
+   {
+       state->ptr[state->cur].left = 1;
+       state->cur++;
+       plainnode(state, node->right);
+   }
+   else
+   {
+       int4        cur = state->cur;
+
+       state->cur++;
+       plainnode(state, node->right);
+       state->ptr[cur].left = state->cur - cur;
+       plainnode(state, node->left);
+   }
+   pfree(node);
+}
+
+/*
+ * make plain view of tree from 'normal' view of tree
+ */
+static ITEM *
+plaintree(NODE * root, int4 *len)
+{
+   PLAINTREE   pl;
+
+   pl.cur = 0;
+   pl.len = 16;
+   if (root && (root->valnode->type == VAL || root->valnode->type == OPR))
+   {
+       pl.ptr = (ITEM *) palloc(pl.len * sizeof(ITEM));
+       plainnode(&pl, root);
+   }
+   else
+       pl.ptr = NULL;
+   *len = pl.cur;
+   return pl.ptr;
+}
+
+static void
+freetree(NODE * node)
+{
+   if (!node)
+       return;
+   if (node->left)
+       freetree(node->left);
+   if (node->right)
+       freetree(node->right);
+   pfree(node);
+}
+
+/*
+ * clean tree for ! operator.
+ * It's usefull for debug, but in
+ * other case, such view is used with search in index.
+ * Operator ! always return TRUE
+ */
+static NODE *
+clean_NOT_intree(NODE * node)
+{
+   if (node->valnode->type == VAL)
+       return node;
+
+   if (node->valnode->val == (int4) '!')
+   {
+       freetree(node);
+       return NULL;
+   }
+
+   /* operator & or | */
+   if (node->valnode->val == (int4) '|')
+   {
+       if ((node->left = clean_NOT_intree(node->left)) == NULL ||
+           (node->right = clean_NOT_intree(node->right)) == NULL)
+       {
+           freetree(node);
+           return NULL;
+       }
+   }
+   else
+   {
+       NODE       *res = node;
+
+       node->left = clean_NOT_intree(node->left);
+       node->right = clean_NOT_intree(node->right);
+       if (node->left == NULL && node->right == NULL)
+       {
+           pfree(node);
+           res = NULL;
+       }
+       else if (node->left == NULL)
+       {
+           res = node->right;
+           pfree(node);
+       }
+       else if (node->right == NULL)
+       {
+           res = node->left;
+           pfree(node);
+       }
+       return res;
+   }
+   return node;
+}
+
+ITEM *
+clean_NOT_v2(ITEM * ptr, int4 *len)
+{
+   NODE       *root = maketree(ptr);
+
+   return plaintree(clean_NOT_intree(root), len);
+}
+
+#define V_UNKNOWN  0
+#define V_TRUE     1
+#define V_FALSE        2
+
+/*
+ * Clean query tree from values which is always in
+ * text (stopword)
+ */
+static NODE *
+clean_fakeval_intree(NODE * node, char *result)
+{
+   char        lresult = V_UNKNOWN,
+               rresult = V_UNKNOWN;
+
+   if (node->valnode->type == VAL)
+       return node;
+   else if (node->valnode->type == VALTRUE)
+   {
+       pfree(node);
+       *result = V_TRUE;
+       return NULL;
+   }
+
+
+   if (node->valnode->val == (int4) '!')
+   {
+       node->right = clean_fakeval_intree(node->right, &rresult);
+       if (!node->right)
+       {
+           *result = (rresult == V_TRUE) ? V_FALSE : V_TRUE;
+           freetree(node);
+           return NULL;
+       }
+   }
+   else if (node->valnode->val == (int4) '|')
+   {
+       NODE       *res = node;
+
+       node->left = clean_fakeval_intree(node->left, &lresult);
+       node->right = clean_fakeval_intree(node->right, &rresult);
+       if (lresult == V_TRUE || rresult == V_TRUE)
+       {
+           freetree(node);
+           *result = V_TRUE;
+           return NULL;
+       }
+       else if (lresult == V_FALSE && rresult == V_FALSE)
+       {
+           freetree(node);
+           *result = V_FALSE;
+           return NULL;
+       }
+       else if (lresult == V_FALSE)
+       {
+           res = node->right;
+           pfree(node);
+       }
+       else if (rresult == V_FALSE)
+       {
+           res = node->left;
+           pfree(node);
+       }
+       return res;
+   }
+   else
+   {
+       NODE       *res = node;
+
+       node->left = clean_fakeval_intree(node->left, &lresult);
+       node->right = clean_fakeval_intree(node->right, &rresult);
+       if (lresult == V_FALSE || rresult == V_FALSE)
+       {
+           freetree(node);
+           *result = V_FALSE;
+           return NULL;
+       }
+       else if (lresult == V_TRUE && rresult == V_TRUE)
+       {
+           freetree(node);
+           *result = V_TRUE;
+           return NULL;
+       }
+       else if (lresult == V_TRUE)
+       {
+           res = node->right;
+           pfree(node);
+       }
+       else if (rresult == V_TRUE)
+       {
+           res = node->left;
+           pfree(node);
+       }
+       return res;
+   }
+   return node;
+}
+
+ITEM *
+clean_fakeval_v2(ITEM * ptr, int4 *len)
+{
+   NODE       *root = maketree(ptr);
+   char        result = V_UNKNOWN;
+   NODE       *resroot;
+
+   resroot = clean_fakeval_intree(root, &result);
+   if (result != V_UNKNOWN)
+   {
+       elog(NOTICE, "Query contains only stopword(s) or doesn't contain lexem(s), ignored");
+       *len = 0;
+       return NULL;
+   }
+
+   return plaintree(resroot, len);
+}


diff --git a/contrib/tsearch2/rewrite.h b/contrib/tsearch2/rewrite.h

new file mode 100644 (file)

index 0000000..d47788a


--- /dev/null
+++ b/contrib/tsearch2/rewrite.h
@@ -0,0 +1,7 @@
+#ifndef __REWRITE_H__
+#define __REWRITE_H__
+
+ITEM      *clean_NOT_v2(ITEM * ptr, int4 *len);
+ITEM      *clean_fakeval_v2(ITEM * ptr, int4 *len);
+
+#endif


diff --git a/contrib/tsearch2/snmap.c b/contrib/tsearch2/snmap.c

new file mode 100644 (file)

index 0000000..fe138ad


--- /dev/null
+++ b/contrib/tsearch2/snmap.c
@@ -0,0 +1,75 @@
+/* 
+ * simple but fast map from str to Oid
+ * Teodor Sigaev 
+ */
+#include 
+#include 
+#include 
+
+#include "postgres.h"
+#include "snmap.h"
+#include "common.h"
+
+static int
+compareSNMapEntry(const void *a, const void *b) {
+   return strcmp( ((SNMapEntry*)a)->key, ((SNMapEntry*)b)->key );
+}
+
+void 
+addSNMap( SNMap *map, char *key, Oid value ) {
+   if (map->len>=map->reallen) {
+       SNMapEntry *tmp;
+       int len = (map->reallen) ? 2*map->reallen : 16;
+       tmp=(SNMapEntry*)realloc(map->list, sizeof(SNMapEntry) * len);
+       if ( !tmp )
+           elog(ERROR, "No memory");
+       map->reallen=len;
+       map->list=tmp;
+   }
+   map->list[ map->len ].key = strdup(key);
+   if ( ! map->list[ map->len ].key )
+       elog(ERROR, "No memory");
+   map->list[ map->len ].value=value;
+   map->len++;
+   if ( map->len>1 ) qsort(map->list, map->len, sizeof(SNMapEntry), compareSNMapEntry);
+}
+
+void 
+addSNMap_t( SNMap *map, text *key, Oid value ) {
+   char *k=text2char( key );
+   addSNMap(map, k, value);
+   pfree(k);
+}
+
+Oid 
+findSNMap( SNMap *map, char *key ) {
+   SNMapEntry *ptr;
+   SNMapEntry ks = {key, 0};
+   if ( map->len==0 || !map->list )
+       return 0;   
+   ptr = (SNMapEntry*) bsearch(&ks, map->list, map->len, sizeof(SNMapEntry), compareSNMapEntry);
+   return (ptr) ? ptr->value : 0;
+}
+
+Oid  
+findSNMap_t( SNMap *map, text *key ) {
+   char *k=text2char(key);
+   int res;
+   res= findSNMap(map, k);
+   pfree(k);
+   return res;
+}
+
+void freeSNMap( SNMap *map ) {
+   SNMapEntry *entry=map->list;
+   if ( map->list ) {
+       while( map->len ) {
+           if ( entry->key ) free(entry->key);
+           entry++; map->len--;
+       }
+       free( map->list );
+   }
+   memset(map,0,sizeof(SNMap));
+}
+
+


diff --git a/contrib/tsearch2/snmap.h b/contrib/tsearch2/snmap.h

new file mode 100644 (file)

index 0000000..b485601


--- /dev/null
+++ b/contrib/tsearch2/snmap.h
@@ -0,0 +1,23 @@
+#ifndef __SNMAP_H__
+#define __SNMAP_H__
+
+#include "postgres.h"
+
+typedef struct {
+   char    *key;
+   Oid value;
+} SNMapEntry;
+
+typedef struct {
+   int len;
+   int reallen;
+   SNMapEntry  *list;
+} SNMap;
+
+void addSNMap( SNMap *map, char *key, Oid value );
+void addSNMap_t( SNMap *map, text *key, Oid value );
+Oid findSNMap( SNMap *map, char *key );
+Oid findSNMap_t( SNMap *map, text *key );
+void freeSNMap( SNMap *map );
+
+#endif


diff --git a/contrib/tsearch2/snowball/api.c b/contrib/tsearch2/snowball/api.c

new file mode 100644 (file)

index 0000000..c9019ce


--- /dev/null
+++ b/contrib/tsearch2/snowball/api.c
@@ -0,0 +1,48 @@
+
+#include "header.h"
+
+extern struct SN_env * SN_create_env(int S_size, int I_size, int B_size)
+{   struct SN_env * z = (struct SN_env *) calloc(1, sizeof(struct SN_env));
+    z->p = create_s();
+    if (S_size)
+    {   z->S = (symbol * *) calloc(S_size, sizeof(symbol *));
+        {   int i;
+            for (i = 0; i < S_size; i++) z->S[i] = create_s();
+        }
+        z->S_size = S_size;
+    }
+
+    if (I_size)
+    {   z->I = (int *) calloc(I_size, sizeof(int));
+        z->I_size = I_size;
+    }
+
+    if (B_size)
+    {   z->B = (symbol *) calloc(B_size, sizeof(symbol));
+        z->B_size = B_size;
+    }
+
+    return z;
+}
+
+extern void SN_close_env(struct SN_env * z)
+{
+    if (z->S_size)
+    {
+        {   int i;
+            for (i = 0; i < z->S_size; i++) lose_s(z->S[i]);
+        }
+        free(z->S);
+    }
+    if (z->I_size) free(z->I);
+    if (z->B_size) free(z->B);
+    if (z->p) lose_s(z->p);
+    free(z);
+}
+
+extern void SN_set_current(struct SN_env * z, int size, const symbol * s)
+{
+    replace_s(z, 0, z->l, size, s);
+    z->c = 0;
+}
+


diff --git a/contrib/tsearch2/snowball/api.h b/contrib/tsearch2/snowball/api.h

new file mode 100644 (file)

index 0000000..3e8b6e1


--- /dev/null
+++ b/contrib/tsearch2/snowball/api.h
@@ -0,0 +1,27 @@
+
+typedef unsigned char symbol;
+
+/* Or replace 'char' above with 'short' for 16 bit characters.
+
+   More precisely, replace 'char' with whatever type guarantees the
+   character width you need. Note however that sizeof(symbol) should divide
+   HEAD, defined in header.h as 2*sizeof(int), without remainder, otherwise
+   there is an alignment problem. In the unlikely event of a problem here,
+   consult Martin Porter.
+
+*/
+
+struct SN_env {
+    symbol * p;
+    int c; int a; int l; int lb; int bra; int ket;
+    int S_size; int I_size; int B_size;
+    symbol * * S;
+    int * I;
+    symbol * B;
+};
+
+extern struct SN_env * SN_create_env(int S_size, int I_size, int B_size);
+extern void SN_close_env(struct SN_env * z);
+
+extern void SN_set_current(struct SN_env * z, int size, const symbol * s);
+


diff --git a/contrib/tsearch2/snowball/english_stem.c b/contrib/tsearch2/snowball/english_stem.c

new file mode 100644 (file)

index 0000000..6715c7c


--- /dev/null
+++ b/contrib/tsearch2/snowball/english_stem.c
@@ -0,0 +1,894 @@
+
+/* This file was generated automatically by the Snowball to ANSI C compiler */
+
+#include "header.h"
+
+extern int english_stem(struct SN_env * z);
+static int r_exception2(struct SN_env * z);
+static int r_exception1(struct SN_env * z);
+static int r_Step_5(struct SN_env * z);
+static int r_Step_4(struct SN_env * z);
+static int r_Step_3(struct SN_env * z);
+static int r_Step_2(struct SN_env * z);
+static int r_Step_1c(struct SN_env * z);
+static int r_Step_1b(struct SN_env * z);
+static int r_Step_1a(struct SN_env * z);
+static int r_R2(struct SN_env * z);
+static int r_R1(struct SN_env * z);
+static int r_shortv(struct SN_env * z);
+static int r_mark_regions(struct SN_env * z);
+static int r_postlude(struct SN_env * z);
+static int r_prelude(struct SN_env * z);
+
+extern struct SN_env * english_create_env(void);
+extern void english_close_env(struct SN_env * z);
+
+static symbol s_0_0[5] = { 'g', 'e', 'n', 'e', 'r' };
+
+static struct among a_0[1] =
+{
+/*  0 */ { 5, s_0_0, -1, -1, 0}
+};
+
+static symbol s_1_0[3] = { 'i', 'e', 'd' };
+static symbol s_1_1[1] = { 's' };
+static symbol s_1_2[3] = { 'i', 'e', 's' };
+static symbol s_1_3[4] = { 's', 's', 'e', 's' };
+static symbol s_1_4[2] = { 's', 's' };
+static symbol s_1_5[2] = { 'u', 's' };
+
+static struct among a_1[6] =
+{
+/*  0 */ { 3, s_1_0, -1, 2, 0},
+/*  1 */ { 1, s_1_1, -1, 3, 0},
+/*  2 */ { 3, s_1_2, 1, 2, 0},
+/*  3 */ { 4, s_1_3, 1, 1, 0},
+/*  4 */ { 2, s_1_4, 1, -1, 0},
+/*  5 */ { 2, s_1_5, 1, -1, 0}
+};
+
+static symbol s_2_1[2] = { 'b', 'b' };
+static symbol s_2_2[2] = { 'd', 'd' };
+static symbol s_2_3[2] = { 'f', 'f' };
+static symbol s_2_4[2] = { 'g', 'g' };
+static symbol s_2_5[2] = { 'b', 'l' };
+static symbol s_2_6[2] = { 'm', 'm' };
+static symbol s_2_7[2] = { 'n', 'n' };
+static symbol s_2_8[2] = { 'p', 'p' };
+static symbol s_2_9[2] = { 'r', 'r' };
+static symbol s_2_10[2] = { 'a', 't' };
+static symbol s_2_11[2] = { 't', 't' };
+static symbol s_2_12[2] = { 'i', 'z' };
+
+static struct among a_2[13] =
+{
+/*  0 */ { 0, 0, -1, 3, 0},
+/*  1 */ { 2, s_2_1, 0, 2, 0},
+/*  2 */ { 2, s_2_2, 0, 2, 0},
+/*  3 */ { 2, s_2_3, 0, 2, 0},
+/*  4 */ { 2, s_2_4, 0, 2, 0},
+/*  5 */ { 2, s_2_5, 0, 1, 0},
+/*  6 */ { 2, s_2_6, 0, 2, 0},
+/*  7 */ { 2, s_2_7, 0, 2, 0},
+/*  8 */ { 2, s_2_8, 0, 2, 0},
+/*  9 */ { 2, s_2_9, 0, 2, 0},
+/* 10 */ { 2, s_2_10, 0, 1, 0},
+/* 11 */ { 2, s_2_11, 0, 2, 0},
+/* 12 */ { 2, s_2_12, 0, 1, 0}
+};
+
+static symbol s_3_0[2] = { 'e', 'd' };
+static symbol s_3_1[3] = { 'e', 'e', 'd' };
+static symbol s_3_2[3] = { 'i', 'n', 'g' };
+static symbol s_3_3[4] = { 'e', 'd', 'l', 'y' };
+static symbol s_3_4[5] = { 'e', 'e', 'd', 'l', 'y' };
+static symbol s_3_5[5] = { 'i', 'n', 'g', 'l', 'y' };
+
+static struct among a_3[6] =
+{
+/*  0 */ { 2, s_3_0, -1, 2, 0},
+/*  1 */ { 3, s_3_1, 0, 1, 0},
+/*  2 */ { 3, s_3_2, -1, 2, 0},
+/*  3 */ { 4, s_3_3, -1, 2, 0},
+/*  4 */ { 5, s_3_4, 3, 1, 0},
+/*  5 */ { 5, s_3_5, -1, 2, 0}
+};
+
+static symbol s_4_0[4] = { 'a', 'n', 'c', 'i' };
+static symbol s_4_1[4] = { 'e', 'n', 'c', 'i' };
+static symbol s_4_2[3] = { 'o', 'g', 'i' };
+static symbol s_4_3[2] = { 'l', 'i' };
+static symbol s_4_4[3] = { 'b', 'l', 'i' };
+static symbol s_4_5[4] = { 'a', 'b', 'l', 'i' };
+static symbol s_4_6[4] = { 'a', 'l', 'l', 'i' };
+static symbol s_4_7[5] = { 'f', 'u', 'l', 'l', 'i' };
+static symbol s_4_8[6] = { 'l', 'e', 's', 's', 'l', 'i' };
+static symbol s_4_9[5] = { 'o', 'u', 's', 'l', 'i' };
+static symbol s_4_10[5] = { 'e', 'n', 't', 'l', 'i' };
+static symbol s_4_11[5] = { 'a', 'l', 'i', 't', 'i' };
+static symbol s_4_12[6] = { 'b', 'i', 'l', 'i', 't', 'i' };
+static symbol s_4_13[5] = { 'i', 'v', 'i', 't', 'i' };
+static symbol s_4_14[6] = { 't', 'i', 'o', 'n', 'a', 'l' };
+static symbol s_4_15[7] = { 'a', 't', 'i', 'o', 'n', 'a', 'l' };
+static symbol s_4_16[5] = { 'a', 'l', 'i', 's', 'm' };
+static symbol s_4_17[5] = { 'a', 't', 'i', 'o', 'n' };
+static symbol s_4_18[7] = { 'i', 'z', 'a', 't', 'i', 'o', 'n' };
+static symbol s_4_19[4] = { 'i', 'z', 'e', 'r' };
+static symbol s_4_20[4] = { 'a', 't', 'o', 'r' };
+static symbol s_4_21[7] = { 'i', 'v', 'e', 'n', 'e', 's', 's' };
+static symbol s_4_22[7] = { 'f', 'u', 'l', 'n', 'e', 's', 's' };
+static symbol s_4_23[7] = { 'o', 'u', 's', 'n', 'e', 's', 's' };
+
+static struct among a_4[24] =
+{
+/*  0 */ { 4, s_4_0, -1, 3, 0},
+/*  1 */ { 4, s_4_1, -1, 2, 0},
+/*  2 */ { 3, s_4_2, -1, 13, 0},
+/*  3 */ { 2, s_4_3, -1, 16, 0},
+/*  4 */ { 3, s_4_4, 3, 12, 0},
+/*  5 */ { 4, s_4_5, 4, 4, 0},
+/*  6 */ { 4, s_4_6, 3, 8, 0},
+/*  7 */ { 5, s_4_7, 3, 14, 0},
+/*  8 */ { 6, s_4_8, 3, 15, 0},
+/*  9 */ { 5, s_4_9, 3, 10, 0},
+/* 10 */ { 5, s_4_10, 3, 5, 0},
+/* 11 */ { 5, s_4_11, -1, 8, 0},
+/* 12 */ { 6, s_4_12, -1, 12, 0},
+/* 13 */ { 5, s_4_13, -1, 11, 0},
+/* 14 */ { 6, s_4_14, -1, 1, 0},
+/* 15 */ { 7, s_4_15, 14, 7, 0},
+/* 16 */ { 5, s_4_16, -1, 8, 0},
+/* 17 */ { 5, s_4_17, -1, 7, 0},
+/* 18 */ { 7, s_4_18, 17, 6, 0},
+/* 19 */ { 4, s_4_19, -1, 6, 0},
+/* 20 */ { 4, s_4_20, -1, 7, 0},
+/* 21 */ { 7, s_4_21, -1, 11, 0},
+/* 22 */ { 7, s_4_22, -1, 9, 0},
+/* 23 */ { 7, s_4_23, -1, 10, 0}
+};
+
+static symbol s_5_0[5] = { 'i', 'c', 'a', 't', 'e' };
+static symbol s_5_1[5] = { 'a', 't', 'i', 'v', 'e' };
+static symbol s_5_2[5] = { 'a', 'l', 'i', 'z', 'e' };
+static symbol s_5_3[5] = { 'i', 'c', 'i', 't', 'i' };
+static symbol s_5_4[4] = { 'i', 'c', 'a', 'l' };
+static symbol s_5_5[6] = { 't', 'i', 'o', 'n', 'a', 'l' };
+static symbol s_5_6[7] = { 'a', 't', 'i', 'o', 'n', 'a', 'l' };
+static symbol s_5_7[3] = { 'f', 'u', 'l' };
+static symbol s_5_8[4] = { 'n', 'e', 's', 's' };
+
+static struct among a_5[9] =
+{
+/*  0 */ { 5, s_5_0, -1, 4, 0},
+/*  1 */ { 5, s_5_1, -1, 6, 0},
+/*  2 */ { 5, s_5_2, -1, 3, 0},
+/*  3 */ { 5, s_5_3, -1, 4, 0},
+/*  4 */ { 4, s_5_4, -1, 4, 0},
+/*  5 */ { 6, s_5_5, -1, 1, 0},
+/*  6 */ { 7, s_5_6, 5, 2, 0},
+/*  7 */ { 3, s_5_7, -1, 5, 0},
+/*  8 */ { 4, s_5_8, -1, 5, 0}
+};
+
+static symbol s_6_0[2] = { 'i', 'c' };
+static symbol s_6_1[4] = { 'a', 'n', 'c', 'e' };
+static symbol s_6_2[4] = { 'e', 'n', 'c', 'e' };
+static symbol s_6_3[4] = { 'a', 'b', 'l', 'e' };
+static symbol s_6_4[4] = { 'i', 'b', 'l', 'e' };
+static symbol s_6_5[3] = { 'a', 't', 'e' };
+static symbol s_6_6[3] = { 'i', 'v', 'e' };
+static symbol s_6_7[3] = { 'i', 'z', 'e' };
+static symbol s_6_8[3] = { 'i', 't', 'i' };
+static symbol s_6_9[2] = { 'a', 'l' };
+static symbol s_6_10[3] = { 'i', 's', 'm' };
+static symbol s_6_11[3] = { 'i', 'o', 'n' };
+static symbol s_6_12[2] = { 'e', 'r' };
+static symbol s_6_13[3] = { 'o', 'u', 's' };
+static symbol s_6_14[3] = { 'a', 'n', 't' };
+static symbol s_6_15[3] = { 'e', 'n', 't' };
+static symbol s_6_16[4] = { 'm', 'e', 'n', 't' };
+static symbol s_6_17[5] = { 'e', 'm', 'e', 'n', 't' };
+
+static struct among a_6[18] =
+{
+/*  0 */ { 2, s_6_0, -1, 1, 0},
+/*  1 */ { 4, s_6_1, -1, 1, 0},
+/*  2 */ { 4, s_6_2, -1, 1, 0},
+/*  3 */ { 4, s_6_3, -1, 1, 0},
+/*  4 */ { 4, s_6_4, -1, 1, 0},
+/*  5 */ { 3, s_6_5, -1, 1, 0},
+/*  6 */ { 3, s_6_6, -1, 1, 0},
+/*  7 */ { 3, s_6_7, -1, 1, 0},
+/*  8 */ { 3, s_6_8, -1, 1, 0},
+/*  9 */ { 2, s_6_9, -1, 1, 0},
+/* 10 */ { 3, s_6_10, -1, 1, 0},
+/* 11 */ { 3, s_6_11, -1, 2, 0},
+/* 12 */ { 2, s_6_12, -1, 1, 0},
+/* 13 */ { 3, s_6_13, -1, 1, 0},
+/* 14 */ { 3, s_6_14, -1, 1, 0},
+/* 15 */ { 3, s_6_15, -1, 1, 0},
+/* 16 */ { 4, s_6_16, 15, 1, 0},
+/* 17 */ { 5, s_6_17, 16, 1, 0}
+};
+
+static symbol s_7_0[1] = { 'e' };
+static symbol s_7_1[1] = { 'l' };
+
+static struct among a_7[2] =
+{
+/*  0 */ { 1, s_7_0, -1, 1, 0},
+/*  1 */ { 1, s_7_1, -1, 2, 0}
+};
+
+static symbol s_8_0[7] = { 's', 'u', 'c', 'c', 'e', 'e', 'd' };
+static symbol s_8_1[7] = { 'p', 'r', 'o', 'c', 'e', 'e', 'd' };
+static symbol s_8_2[6] = { 'e', 'x', 'c', 'e', 'e', 'd' };
+static symbol s_8_3[7] = { 'c', 'a', 'n', 'n', 'i', 'n', 'g' };
+static symbol s_8_4[6] = { 'i', 'n', 'n', 'i', 'n', 'g' };
+static symbol s_8_5[7] = { 'e', 'a', 'r', 'r', 'i', 'n', 'g' };
+static symbol s_8_6[7] = { 'h', 'e', 'r', 'r', 'i', 'n', 'g' };
+static symbol s_8_7[6] = { 'o', 'u', 't', 'i', 'n', 'g' };
+
+static struct among a_8[8] =
+{
+/*  0 */ { 7, s_8_0, -1, -1, 0},
+/*  1 */ { 7, s_8_1, -1, -1, 0},
+/*  2 */ { 6, s_8_2, -1, -1, 0},
+/*  3 */ { 7, s_8_3, -1, -1, 0},
+/*  4 */ { 6, s_8_4, -1, -1, 0},
+/*  5 */ { 7, s_8_5, -1, -1, 0},
+/*  6 */ { 7, s_8_6, -1, -1, 0},
+/*  7 */ { 6, s_8_7, -1, -1, 0}
+};
+
+static symbol s_9_0[5] = { 'a', 'n', 'd', 'e', 's' };
+static symbol s_9_1[5] = { 'a', 't', 'l', 'a', 's' };
+static symbol s_9_2[4] = { 'b', 'i', 'a', 's' };
+static symbol s_9_3[6] = { 'c', 'o', 's', 'm', 'o', 's' };
+static symbol s_9_4[5] = { 'd', 'y', 'i', 'n', 'g' };
+static symbol s_9_5[5] = { 'e', 'a', 'r', 'l', 'y' };
+static symbol s_9_6[6] = { 'g', 'e', 'n', 't', 'l', 'y' };
+static symbol s_9_7[4] = { 'h', 'o', 'w', 'e' };
+static symbol s_9_8[4] = { 'i', 'd', 'l', 'y' };
+static symbol s_9_9[5] = { 'l', 'y', 'i', 'n', 'g' };
+static symbol s_9_10[4] = { 'n', 'e', 'w', 's' };
+static symbol s_9_11[4] = { 'o', 'n', 'l', 'y' };
+static symbol s_9_12[6] = { 's', 'i', 'n', 'g', 'l', 'y' };
+static symbol s_9_13[5] = { 's', 'k', 'i', 'e', 's' };
+static symbol s_9_14[4] = { 's', 'k', 'i', 's' };
+static symbol s_9_15[3] = { 's', 'k', 'y' };
+static symbol s_9_16[5] = { 't', 'y', 'i', 'n', 'g' };
+static symbol s_9_17[4] = { 'u', 'g', 'l', 'y' };
+
+static struct among a_9[18] =
+{
+/*  0 */ { 5, s_9_0, -1, -1, 0},
+/*  1 */ { 5, s_9_1, -1, -1, 0},
+/*  2 */ { 4, s_9_2, -1, -1, 0},
+/*  3 */ { 6, s_9_3, -1, -1, 0},
+/*  4 */ { 5, s_9_4, -1, 3, 0},
+/*  5 */ { 5, s_9_5, -1, 9, 0},
+/*  6 */ { 6, s_9_6, -1, 7, 0},
+/*  7 */ { 4, s_9_7, -1, -1, 0},
+/*  8 */ { 4, s_9_8, -1, 6, 0},
+/*  9 */ { 5, s_9_9, -1, 4, 0},
+/* 10 */ { 4, s_9_10, -1, -1, 0},
+/* 11 */ { 4, s_9_11, -1, 10, 0},
+/* 12 */ { 6, s_9_12, -1, 11, 0},
+/* 13 */ { 5, s_9_13, -1, 2, 0},
+/* 14 */ { 4, s_9_14, -1, 1, 0},
+/* 15 */ { 3, s_9_15, -1, -1, 0},
+/* 16 */ { 5, s_9_16, -1, 5, 0},
+/* 17 */ { 4, s_9_17, -1, 8, 0}
+};
+
+static unsigned char g_v[] = { 17, 65, 16, 1 };
+
+static unsigned char g_v_WXY[] = { 1, 17, 65, 208, 1 };
+
+static unsigned char g_valid_LI[] = { 55, 141, 2 };
+
+static symbol s_0[] = { 'y' };
+static symbol s_1[] = { 'Y' };
+static symbol s_2[] = { 'y' };
+static symbol s_3[] = { 'Y' };
+static symbol s_4[] = { 's', 's' };
+static symbol s_5[] = { 'i', 'e' };
+static symbol s_6[] = { 'i' };
+static symbol s_7[] = { 'e', 'e' };
+static symbol s_8[] = { 'e' };
+static symbol s_9[] = { 'e' };
+static symbol s_10[] = { 'y' };
+static symbol s_11[] = { 'Y' };
+static symbol s_12[] = { 'i' };
+static symbol s_13[] = { 't', 'i', 'o', 'n' };
+static symbol s_14[] = { 'e', 'n', 'c', 'e' };
+static symbol s_15[] = { 'a', 'n', 'c', 'e' };
+static symbol s_16[] = { 'a', 'b', 'l', 'e' };
+static symbol s_17[] = { 'e', 'n', 't' };
+static symbol s_18[] = { 'i', 'z', 'e' };
+static symbol s_19[] = { 'a', 't', 'e' };
+static symbol s_20[] = { 'a', 'l' };
+static symbol s_21[] = { 'f', 'u', 'l' };
+static symbol s_22[] = { 'o', 'u', 's' };
+static symbol s_23[] = { 'i', 'v', 'e' };
+static symbol s_24[] = { 'b', 'l', 'e' };
+static symbol s_25[] = { 'l' };
+static symbol s_26[] = { 'o', 'g' };
+static symbol s_27[] = { 'f', 'u', 'l' };
+static symbol s_28[] = { 'l', 'e', 's', 's' };
+static symbol s_29[] = { 't', 'i', 'o', 'n' };
+static symbol s_30[] = { 'a', 't', 'e' };
+static symbol s_31[] = { 'a', 'l' };
+static symbol s_32[] = { 'i', 'c' };
+static symbol s_33[] = { 's' };
+static symbol s_34[] = { 't' };
+static symbol s_35[] = { 'l' };
+static symbol s_36[] = { 's', 'k', 'i' };
+static symbol s_37[] = { 's', 'k', 'y' };
+static symbol s_38[] = { 'd', 'i', 'e' };
+static symbol s_39[] = { 'l', 'i', 'e' };
+static symbol s_40[] = { 't', 'i', 'e' };
+static symbol s_41[] = { 'i', 'd', 'l' };
+static symbol s_42[] = { 'g', 'e', 'n', 't', 'l' };
+static symbol s_43[] = { 'u', 'g', 'l', 'i' };
+static symbol s_44[] = { 'e', 'a', 'r', 'l', 'i' };
+static symbol s_45[] = { 'o', 'n', 'l', 'i' };
+static symbol s_46[] = { 's', 'i', 'n', 'g', 'l' };
+static symbol s_47[] = { 'Y' };
+static symbol s_48[] = { 'y' };
+
+static int r_prelude(struct SN_env * z) {
+    z->B[0] = 0; /* unset Y_found, line 24 */
+    {   int c = z->c; /* do, line 25 */
+        z->bra = z->c; /* [, line 25 */
+        if (!(eq_s(z, 1, s_0))) goto lab0;
+        z->ket = z->c; /* ], line 25 */
+        if (!(in_grouping(z, g_v, 97, 121))) goto lab0;
+        slice_from_s(z, 1, s_1); /* <-, line 25 */
+        z->B[0] = 1; /* set Y_found, line 25 */
+    lab0:
+        z->c = c;
+    }
+    {   int c = z->c; /* do, line 26 */
+        while(1) { /* repeat, line 26 */
+            int c = z->c;
+            while(1) { /* goto, line 26 */
+                int c = z->c;
+                if (!(in_grouping(z, g_v, 97, 121))) goto lab3;
+                z->bra = z->c; /* [, line 26 */
+                if (!(eq_s(z, 1, s_2))) goto lab3;
+                z->ket = z->c; /* ], line 26 */
+                z->c = c;
+                break;
+            lab3:
+                z->c = c;
+                if (z->c >= z->l) goto lab2;
+                z->c++;
+            }
+            slice_from_s(z, 1, s_3); /* <-, line 26 */
+            z->B[0] = 1; /* set Y_found, line 26 */
+            continue;
+        lab2:
+            z->c = c;
+            break;
+        }
+    lab1:
+        z->c = c;
+    }
+    return 1;
+}
+
+static int r_mark_regions(struct SN_env * z) {
+    z->I[0] = z->l;
+    z->I[1] = z->l;
+    {   int c = z->c; /* do, line 32 */
+        {   int c = z->c; /* or, line 36 */
+            if (!(find_among(z, a_0, 1))) goto lab2; /* among, line 33 */
+            goto lab1;
+        lab2:
+            z->c = c;
+            while(1) { /* gopast, line 36 */
+                if (!(in_grouping(z, g_v, 97, 121))) goto lab3;
+                break;
+            lab3:
+                if (z->c >= z->l) goto lab0;
+                z->c++;
+            }
+            while(1) { /* gopast, line 36 */
+                if (!(out_grouping(z, g_v, 97, 121))) goto lab4;
+                break;
+            lab4:
+                if (z->c >= z->l) goto lab0;
+                z->c++;
+            }
+        }
+    lab1:
+        z->I[0] = z->c; /* setmark p1, line 37 */
+        while(1) { /* gopast, line 38 */
+            if (!(in_grouping(z, g_v, 97, 121))) goto lab5;
+            break;
+        lab5:
+            if (z->c >= z->l) goto lab0;
+            z->c++;
+        }
+        while(1) { /* gopast, line 38 */
+            if (!(out_grouping(z, g_v, 97, 121))) goto lab6;
+            break;
+        lab6:
+            if (z->c >= z->l) goto lab0;
+            z->c++;
+        }
+        z->I[1] = z->c; /* setmark p2, line 38 */
+    lab0:
+        z->c = c;
+    }
+    return 1;
+}
+
+static int r_shortv(struct SN_env * z) {
+    {   int m = z->l - z->c; /* or, line 46 */
+        if (!(out_grouping_b(z, g_v_WXY, 89, 121))) goto lab1;
+        if (!(in_grouping_b(z, g_v, 97, 121))) goto lab1;
+        if (!(out_grouping_b(z, g_v, 97, 121))) goto lab1;
+        goto lab0;
+    lab1:
+        z->c = z->l - m;
+        if (!(out_grouping_b(z, g_v, 97, 121))) return 0;
+        if (!(in_grouping_b(z, g_v, 97, 121))) return 0;
+        if (z->c > z->lb) return 0; /* atlimit, line 47 */
+    }
+lab0:
+    return 1;
+}
+
+static int r_R1(struct SN_env * z) {
+    if (!(z->I[0] <= z->c)) return 0;
+    return 1;
+}
+
+static int r_R2(struct SN_env * z) {
+    if (!(z->I[1] <= z->c)) return 0;
+    return 1;
+}
+
+static int r_Step_1a(struct SN_env * z) {
+    int among_var;
+    z->ket = z->c; /* [, line 54 */
+    among_var = find_among_b(z, a_1, 6); /* substring, line 54 */
+    if (!(among_var)) return 0;
+    z->bra = z->c; /* ], line 54 */
+    switch(among_var) {
+        case 0: return 0;
+        case 1:
+            slice_from_s(z, 2, s_4); /* <-, line 55 */
+            break;
+        case 2:
+            {   int m = z->l - z->c; /* or, line 57 */
+                if (z->c <= z->lb) goto lab1;
+                z->c--; /* next, line 57 */
+                if (z->c > z->lb) goto lab1; /* atlimit, line 57 */
+                slice_from_s(z, 2, s_5); /* <-, line 57 */
+                goto lab0;
+            lab1:
+                z->c = z->l - m;
+                slice_from_s(z, 1, s_6); /* <-, line 57 */
+            }
+        lab0:
+            break;
+        case 3:
+            if (z->c <= z->lb) return 0;
+            z->c--; /* next, line 58 */
+            while(1) { /* gopast, line 58 */
+                if (!(in_grouping_b(z, g_v, 97, 121))) goto lab2;
+                break;
+            lab2:
+                if (z->c <= z->lb) return 0;
+                z->c--;
+            }
+            slice_del(z); /* delete, line 58 */
+            break;
+    }
+    return 1;
+}
+
+static int r_Step_1b(struct SN_env * z) {
+    int among_var;
+    z->ket = z->c; /* [, line 64 */
+    among_var = find_among_b(z, a_3, 6); /* substring, line 64 */
+    if (!(among_var)) return 0;
+    z->bra = z->c; /* ], line 64 */
+    switch(among_var) {
+        case 0: return 0;
+        case 1:
+            if (!r_R1(z)) return 0; /* call R1, line 66 */
+            slice_from_s(z, 2, s_7); /* <-, line 66 */
+            break;
+        case 2:
+            {   int m_test = z->l - z->c; /* test, line 69 */
+                while(1) { /* gopast, line 69 */
+                    if (!(in_grouping_b(z, g_v, 97, 121))) goto lab0;
+                    break;
+                lab0:
+                    if (z->c <= z->lb) return 0;
+                    z->c--;
+                }
+                z->c = z->l - m_test;
+            }
+            slice_del(z); /* delete, line 69 */
+            {   int m_test = z->l - z->c; /* test, line 70 */
+                among_var = find_among_b(z, a_2, 13); /* substring, line 70 */
+                if (!(among_var)) return 0;
+                z->c = z->l - m_test;
+            }
+            switch(among_var) {
+                case 0: return 0;
+                case 1:
+                    {   int c = z->c;
+                        insert_s(z, z->c, z->c, 1, s_8); /* <+, line 72 */
+                        z->c = c;
+                    }
+                    break;
+                case 2:
+                    z->ket = z->c; /* [, line 75 */
+                    if (z->c <= z->lb) return 0;
+                    z->c--; /* next, line 75 */
+                    z->bra = z->c; /* ], line 75 */
+                    slice_del(z); /* delete, line 75 */
+                    break;
+                case 3:
+                    if (z->c != z->I[0]) return 0; /* atmark, line 76 */
+                    {   int m_test = z->l - z->c; /* test, line 76 */
+                        if (!r_shortv(z)) return 0; /* call shortv, line 76 */
+                        z->c = z->l - m_test;
+                    }
+                    {   int c = z->c;
+                        insert_s(z, z->c, z->c, 1, s_9); /* <+, line 76 */
+                        z->c = c;
+                    }
+                    break;
+            }
+            break;
+    }
+    return 1;
+}
+
+static int r_Step_1c(struct SN_env * z) {
+    z->ket = z->c; /* [, line 83 */
+    {   int m = z->l - z->c; /* or, line 83 */
+        if (!(eq_s_b(z, 1, s_10))) goto lab1;
+        goto lab0;
+    lab1:
+        z->c = z->l - m;
+        if (!(eq_s_b(z, 1, s_11))) return 0;
+    }
+lab0:
+    z->bra = z->c; /* ], line 83 */
+    if (!(out_grouping_b(z, g_v, 97, 121))) return 0;
+    {   int m = z->l - z->c; /* not, line 84 */
+        if (z->c > z->lb) goto lab2; /* atlimit, line 84 */
+        return 0;
+    lab2:
+        z->c = z->l - m;
+    }
+    slice_from_s(z, 1, s_12); /* <-, line 85 */
+    return 1;
+}
+
+static int r_Step_2(struct SN_env * z) {
+    int among_var;
+    z->ket = z->c; /* [, line 89 */
+    among_var = find_among_b(z, a_4, 24); /* substring, line 89 */
+    if (!(among_var)) return 0;
+    z->bra = z->c; /* ], line 89 */
+    if (!r_R1(z)) return 0; /* call R1, line 89 */
+    switch(among_var) {
+        case 0: return 0;
+        case 1:
+            slice_from_s(z, 4, s_13); /* <-, line 90 */
+            break;
+        case 2:
+            slice_from_s(z, 4, s_14); /* <-, line 91 */
+            break;
+        case 3:
+            slice_from_s(z, 4, s_15); /* <-, line 92 */
+            break;
+        case 4:
+            slice_from_s(z, 4, s_16); /* <-, line 93 */
+            break;
+        case 5:
+            slice_from_s(z, 3, s_17); /* <-, line 94 */
+            break;
+        case 6:
+            slice_from_s(z, 3, s_18); /* <-, line 96 */
+            break;
+        case 7:
+            slice_from_s(z, 3, s_19); /* <-, line 98 */
+            break;
+        case 8:
+            slice_from_s(z, 2, s_20); /* <-, line 100 */
+            break;
+        case 9:
+            slice_from_s(z, 3, s_21); /* <-, line 101 */
+            break;
+        case 10:
+            slice_from_s(z, 3, s_22); /* <-, line 103 */
+            break;
+        case 11:
+            slice_from_s(z, 3, s_23); /* <-, line 105 */
+            break;
+        case 12:
+            slice_from_s(z, 3, s_24); /* <-, line 107 */
+            break;
+        case 13:
+            if (!(eq_s_b(z, 1, s_25))) return 0;
+            slice_from_s(z, 2, s_26); /* <-, line 108 */
+            break;
+        case 14:
+            slice_from_s(z, 3, s_27); /* <-, line 109 */
+            break;
+        case 15:
+            slice_from_s(z, 4, s_28); /* <-, line 110 */
+            break;
+        case 16:
+            if (!(in_grouping_b(z, g_valid_LI, 99, 116))) return 0;
+            slice_del(z); /* delete, line 111 */
+            break;
+    }
+    return 1;
+}
+
+static int r_Step_3(struct SN_env * z) {
+    int among_var;
+    z->ket = z->c; /* [, line 116 */
+    among_var = find_among_b(z, a_5, 9); /* substring, line 116 */
+    if (!(among_var)) return 0;
+    z->bra = z->c; /* ], line 116 */
+    if (!r_R1(z)) return 0; /* call R1, line 116 */
+    switch(among_var) {
+        case 0: return 0;
+        case 1:
+            slice_from_s(z, 4, s_29); /* <-, line 117 */
+            break;
+        case 2:
+            slice_from_s(z, 3, s_30); /* <-, line 118 */
+            break;
+        case 3:
+            slice_from_s(z, 2, s_31); /* <-, line 119 */
+            break;
+        case 4:
+            slice_from_s(z, 2, s_32); /* <-, line 121 */
+            break;
+        case 5:
+            slice_del(z); /* delete, line 123 */
+            break;
+        case 6:
+            if (!r_R2(z)) return 0; /* call R2, line 125 */
+            slice_del(z); /* delete, line 125 */
+            break;
+    }
+    return 1;
+}
+
+static int r_Step_4(struct SN_env * z) {
+    int among_var;
+    z->ket = z->c; /* [, line 130 */
+    among_var = find_among_b(z, a_6, 18); /* substring, line 130 */
+    if (!(among_var)) return 0;
+    z->bra = z->c; /* ], line 130 */
+    if (!r_R2(z)) return 0; /* call R2, line 130 */
+    switch(among_var) {
+        case 0: return 0;
+        case 1:
+            slice_del(z); /* delete, line 133 */
+            break;
+        case 2:
+            {   int m = z->l - z->c; /* or, line 134 */
+                if (!(eq_s_b(z, 1, s_33))) goto lab1;
+                goto lab0;
+            lab1:
+                z->c = z->l - m;
+                if (!(eq_s_b(z, 1, s_34))) return 0;
+            }
+        lab0:
+            slice_del(z); /* delete, line 134 */
+            break;
+    }
+    return 1;
+}
+
+static int r_Step_5(struct SN_env * z) {
+    int among_var;
+    z->ket = z->c; /* [, line 139 */
+    among_var = find_among_b(z, a_7, 2); /* substring, line 139 */
+    if (!(among_var)) return 0;
+    z->bra = z->c; /* ], line 139 */
+    switch(among_var) {
+        case 0: return 0;
+        case 1:
+            {   int m = z->l - z->c; /* or, line 140 */
+                if (!r_R2(z)) goto lab1; /* call R2, line 140 */
+                goto lab0;
+            lab1:
+                z->c = z->l - m;
+                if (!r_R1(z)) return 0; /* call R1, line 140 */
+                {   int m = z->l - z->c; /* not, line 140 */
+                    if (!r_shortv(z)) goto lab2; /* call shortv, line 140 */
+                    return 0;
+                lab2:
+                    z->c = z->l - m;
+                }
+            }
+        lab0:
+            slice_del(z); /* delete, line 140 */
+            break;
+        case 2:
+            if (!r_R2(z)) return 0; /* call R2, line 141 */
+            if (!(eq_s_b(z, 1, s_35))) return 0;
+            slice_del(z); /* delete, line 141 */
+            break;
+    }
+    return 1;
+}
+
+static int r_exception2(struct SN_env * z) {
+    z->ket = z->c; /* [, line 147 */
+    if (!(find_among_b(z, a_8, 8))) return 0; /* substring, line 147 */
+    z->bra = z->c; /* ], line 147 */
+    if (z->c > z->lb) return 0; /* atlimit, line 147 */
+    return 1;
+}
+
+static int r_exception1(struct SN_env * z) {
+    int among_var;
+    z->bra = z->c; /* [, line 159 */
+    among_var = find_among(z, a_9, 18); /* substring, line 159 */
+    if (!(among_var)) return 0;
+    z->ket = z->c; /* ], line 159 */
+    if (z->c < z->l) return 0; /* atlimit, line 159 */
+    switch(among_var) {
+        case 0: return 0;
+        case 1:
+            slice_from_s(z, 3, s_36); /* <-, line 163 */
+            break;
+        case 2:
+            slice_from_s(z, 3, s_37); /* <-, line 164 */
+            break;
+        case 3:
+            slice_from_s(z, 3, s_38); /* <-, line 165 */
+            break;
+        case 4:
+            slice_from_s(z, 3, s_39); /* <-, line 166 */
+            break;
+        case 5:
+            slice_from_s(z, 3, s_40); /* <-, line 167 */
+            break;
+        case 6:
+            slice_from_s(z, 3, s_41); /* <-, line 171 */
+            break;
+        case 7:
+            slice_from_s(z, 5, s_42); /* <-, line 172 */
+            break;
+        case 8:
+            slice_from_s(z, 4, s_43); /* <-, line 173 */
+            break;
+        case 9:
+            slice_from_s(z, 5, s_44); /* <-, line 174 */
+            break;
+        case 10:
+            slice_from_s(z, 4, s_45); /* <-, line 175 */
+            break;
+        case 11:
+            slice_from_s(z, 5, s_46); /* <-, line 176 */
+            break;
+    }
+    return 1;
+}
+
+static int r_postlude(struct SN_env * z) {
+    if (!(z->B[0])) return 0; /* Boolean test Y_found, line 192 */
+    while(1) { /* repeat, line 192 */
+        int c = z->c;
+        while(1) { /* goto, line 192 */
+            int c = z->c;
+            z->bra = z->c; /* [, line 192 */
+            if (!(eq_s(z, 1, s_47))) goto lab1;
+            z->ket = z->c; /* ], line 192 */
+            z->c = c;
+            break;
+        lab1:
+            z->c = c;
+            if (z->c >= z->l) goto lab0;
+            z->c++;
+        }
+        slice_from_s(z, 1, s_48); /* <-, line 192 */
+        continue;
+    lab0:
+        z->c = c;
+        break;
+    }
+    return 1;
+}
+
+extern int english_stem(struct SN_env * z) {
+    {   int c = z->c; /* or, line 196 */
+        if (!r_exception1(z)) goto lab1; /* call exception1, line 196 */
+        goto lab0;
+    lab1:
+        z->c = c;
+        {   int c_test = z->c; /* test, line 198 */
+            {   int c = z->c + 3;
+                if (0 > c || c > z->l) return 0;
+                z->c = c; /* hop, line 198 */
+            }
+            z->c = c_test;
+        }
+        {   int c = z->c; /* do, line 199 */
+            if (!r_prelude(z)) goto lab2; /* call prelude, line 199 */
+        lab2:
+            z->c = c;
+        }
+        {   int c = z->c; /* do, line 200 */
+            if (!r_mark_regions(z)) goto lab3; /* call mark_regions, line 200 */
+        lab3:
+            z->c = c;
+        }
+        z->lb = z->c; z->c = z->l; /* backwards, line 201 */
+
+        {   int m = z->l - z->c; /* do, line 203 */
+            if (!r_Step_1a(z)) goto lab4; /* call Step_1a, line 203 */
+        lab4:
+            z->c = z->l - m;
+        }
+        {   int m = z->l - z->c; /* or, line 205 */
+            if (!r_exception2(z)) goto lab6; /* call exception2, line 205 */
+            goto lab5;
+        lab6:
+            z->c = z->l - m;
+            {   int m = z->l - z->c; /* do, line 207 */
+                if (!r_Step_1b(z)) goto lab7; /* call Step_1b, line 207 */
+            lab7:
+                z->c = z->l - m;
+            }
+            {   int m = z->l - z->c; /* do, line 208 */
+                if (!r_Step_1c(z)) goto lab8; /* call Step_1c, line 208 */
+            lab8:
+                z->c = z->l - m;
+            }
+            {   int m = z->l - z->c; /* do, line 210 */
+                if (!r_Step_2(z)) goto lab9; /* call Step_2, line 210 */
+            lab9:
+                z->c = z->l - m;
+            }
+            {   int m = z->l - z->c; /* do, line 211 */
+                if (!r_Step_3(z)) goto lab10; /* call Step_3, line 211 */
+            lab10:
+                z->c = z->l - m;
+            }
+            {   int m = z->l - z->c; /* do, line 212 */
+                if (!r_Step_4(z)) goto lab11; /* call Step_4, line 212 */
+            lab11:
+                z->c = z->l - m;
+            }
+            {   int m = z->l - z->c; /* do, line 214 */
+                if (!r_Step_5(z)) goto lab12; /* call Step_5, line 214 */
+            lab12:
+                z->c = z->l - m;
+            }
+        }
+    lab5:
+        z->c = z->lb;
+        {   int c = z->c; /* do, line 217 */
+            if (!r_postlude(z)) goto lab13; /* call postlude, line 217 */
+        lab13:
+            z->c = c;
+        }
+    }
+lab0:
+    return 1;
+}
+
+extern struct SN_env * english_create_env(void) { return SN_create_env(0, 2, 1); }
+
+extern void english_close_env(struct SN_env * z) { SN_close_env(z); }
+


diff --git a/contrib/tsearch2/snowball/english_stem.h b/contrib/tsearch2/snowball/english_stem.h

new file mode 100644 (file)

index 0000000..bfefcd5


--- /dev/null
+++ b/contrib/tsearch2/snowball/english_stem.h
@@ -0,0 +1,8 @@
+
+/* This file was generated automatically by the Snowball to ANSI C compiler */
+
+extern struct SN_env * english_create_env(void);
+extern void english_close_env(struct SN_env * z);
+
+extern int english_stem(struct SN_env * z);
+


diff --git a/contrib/tsearch2/snowball/header.h b/contrib/tsearch2/snowball/header.h

new file mode 100644 (file)

index 0000000..aaec3ae


--- /dev/null
+++ b/contrib/tsearch2/snowball/header.h
@@ -0,0 +1,57 @@
+
+#include 
+
+#include "api.h"
+
+#define MAXINT INT_MAX
+#define MININT INT_MIN
+
+#define HEAD 2*sizeof(int)
+
+#define SIZE(p)        ((int *)(p))[-1]
+#define SET_SIZE(p, n) ((int *)(p))[-1] = n
+#define CAPACITY(p)    ((int *)(p))[-2]
+
+struct among
+{   int s_size;     /* number of chars in string */
+    symbol * s;       /* search string */
+    int substring_i;/* index to longest matching substring */
+    int result;     /* result of the lookup */
+    int (* function)(struct SN_env *);
+};
+
+extern symbol * create_s(void);
+extern void lose_s(symbol * p);
+
+extern int in_grouping(struct SN_env * z, unsigned char * s, int min, int max);
+extern int in_grouping_b(struct SN_env * z, unsigned char * s, int min, int max);
+extern int out_grouping(struct SN_env * z, unsigned char * s, int min, int max);
+extern int out_grouping_b(struct SN_env * z, unsigned char * s, int min, int max);
+
+extern int in_range(struct SN_env * z, int min, int max);
+extern int in_range_b(struct SN_env * z, int min, int max);
+extern int out_range(struct SN_env * z, int min, int max);
+extern int out_range_b(struct SN_env * z, int min, int max);
+
+extern int eq_s(struct SN_env * z, int s_size, symbol * s);
+extern int eq_s_b(struct SN_env * z, int s_size, symbol * s);
+extern int eq_v(struct SN_env * z, symbol * p);
+extern int eq_v_b(struct SN_env * z, symbol * p);
+
+extern int find_among(struct SN_env * z, struct among * v, int v_size);
+extern int find_among_b(struct SN_env * z, struct among * v, int v_size);
+
+extern symbol * increase_size(symbol * p, int n);
+extern int replace_s(struct SN_env * z, int c_bra, int c_ket, int s_size, const symbol * s);
+extern void slice_from_s(struct SN_env * z, int s_size, symbol * s);
+extern void slice_from_v(struct SN_env * z, symbol * p);
+extern void slice_del(struct SN_env * z);
+
+extern void insert_s(struct SN_env * z, int bra, int ket, int s_size, symbol * s);
+extern void insert_v(struct SN_env * z, int bra, int ket, symbol * p);
+
+extern symbol * slice_to(struct SN_env * z, symbol * p);
+extern symbol * assign_to(struct SN_env * z, symbol * p);
+
+extern void debug(struct SN_env * z, int number, int line_count);
+


diff --git a/contrib/tsearch2/snowball/russian_stem.c b/contrib/tsearch2/snowball/russian_stem.c

new file mode 100644 (file)

index 0000000..14fd491


--- /dev/null
+++ b/contrib/tsearch2/snowball/russian_stem.c
@@ -0,0 +1,626 @@
+
+/* This file was generated automatically by the Snowball to ANSI C compiler */
+
+#include "header.h"
+
+extern int russian_stem(struct SN_env * z);
+static int r_tidy_up(struct SN_env * z);
+static int r_derivational(struct SN_env * z);
+static int r_noun(struct SN_env * z);
+static int r_verb(struct SN_env * z);
+static int r_reflexive(struct SN_env * z);
+static int r_adjectival(struct SN_env * z);
+static int r_adjective(struct SN_env * z);
+static int r_perfective_gerund(struct SN_env * z);
+static int r_R2(struct SN_env * z);
+static int r_mark_regions(struct SN_env * z);
+
+extern struct SN_env * russian_create_env(void);
+extern void russian_close_env(struct SN_env * z);
+
+static symbol s_0_0[3] = { 215, 219, 201 };
+static symbol s_0_1[4] = { 201, 215, 219, 201 };
+static symbol s_0_2[4] = { 217, 215, 219, 201 };
+static symbol s_0_3[1] = { 215 };
+static symbol s_0_4[2] = { 201, 215 };
+static symbol s_0_5[2] = { 217, 215 };
+static symbol s_0_6[5] = { 215, 219, 201, 211, 216 };
+static symbol s_0_7[6] = { 201, 215, 219, 201, 211, 216 };
+static symbol s_0_8[6] = { 217, 215, 219, 201, 211, 216 };
+
+static struct among a_0[9] =
+{
+/*  0 */ { 3, s_0_0, -1, 1, 0},
+/*  1 */ { 4, s_0_1, 0, 2, 0},
+/*  2 */ { 4, s_0_2, 0, 2, 0},
+/*  3 */ { 1, s_0_3, -1, 1, 0},
+/*  4 */ { 2, s_0_4, 3, 2, 0},
+/*  5 */ { 2, s_0_5, 3, 2, 0},
+/*  6 */ { 5, s_0_6, -1, 1, 0},
+/*  7 */ { 6, s_0_7, 6, 2, 0},
+/*  8 */ { 6, s_0_8, 6, 2, 0}
+};
+
+static symbol s_1_0[2] = { 192, 192 };
+static symbol s_1_1[2] = { 197, 192 };
+static symbol s_1_2[2] = { 207, 192 };
+static symbol s_1_3[2] = { 213, 192 };
+static symbol s_1_4[2] = { 197, 197 };
+static symbol s_1_5[2] = { 201, 197 };
+static symbol s_1_6[2] = { 207, 197 };
+static symbol s_1_7[2] = { 217, 197 };
+static symbol s_1_8[2] = { 201, 200 };
+static symbol s_1_9[2] = { 217, 200 };
+static symbol s_1_10[3] = { 201, 205, 201 };
+static symbol s_1_11[3] = { 217, 205, 201 };
+static symbol s_1_12[2] = { 197, 202 };
+static symbol s_1_13[2] = { 201, 202 };
+static symbol s_1_14[2] = { 207, 202 };
+static symbol s_1_15[2] = { 217, 202 };
+static symbol s_1_16[2] = { 197, 205 };
+static symbol s_1_17[2] = { 201, 205 };
+static symbol s_1_18[2] = { 207, 205 };
+static symbol s_1_19[2] = { 217, 205 };
+static symbol s_1_20[3] = { 197, 199, 207 };
+static symbol s_1_21[3] = { 207, 199, 207 };
+static symbol s_1_22[2] = { 193, 209 };
+static symbol s_1_23[2] = { 209, 209 };
+static symbol s_1_24[3] = { 197, 205, 213 };
+static symbol s_1_25[3] = { 207, 205, 213 };
+
+static struct among a_1[26] =
+{
+/*  0 */ { 2, s_1_0, -1, 1, 0},
+/*  1 */ { 2, s_1_1, -1, 1, 0},
+/*  2 */ { 2, s_1_2, -1, 1, 0},
+/*  3 */ { 2, s_1_3, -1, 1, 0},
+/*  4 */ { 2, s_1_4, -1, 1, 0},
+/*  5 */ { 2, s_1_5, -1, 1, 0},
+/*  6 */ { 2, s_1_6, -1, 1, 0},
+/*  7 */ { 2, s_1_7, -1, 1, 0},
+/*  8 */ { 2, s_1_8, -1, 1, 0},
+/*  9 */ { 2, s_1_9, -1, 1, 0},
+/* 10 */ { 3, s_1_10, -1, 1, 0},
+/* 11 */ { 3, s_1_11, -1, 1, 0},
+/* 12 */ { 2, s_1_12, -1, 1, 0},
+/* 13 */ { 2, s_1_13, -1, 1, 0},
+/* 14 */ { 2, s_1_14, -1, 1, 0},
+/* 15 */ { 2, s_1_15, -1, 1, 0},
+/* 16 */ { 2, s_1_16, -1, 1, 0},
+/* 17 */ { 2, s_1_17, -1, 1, 0},
+/* 18 */ { 2, s_1_18, -1, 1, 0},
+/* 19 */ { 2, s_1_19, -1, 1, 0},
+/* 20 */ { 3, s_1_20, -1, 1, 0},
+/* 21 */ { 3, s_1_21, -1, 1, 0},
+/* 22 */ { 2, s_1_22, -1, 1, 0},
+/* 23 */ { 2, s_1_23, -1, 1, 0},
+/* 24 */ { 3, s_1_24, -1, 1, 0},
+/* 25 */ { 3, s_1_25, -1, 1, 0}
+};
+
+static symbol s_2_0[2] = { 197, 205 };
+static symbol s_2_1[2] = { 206, 206 };
+static symbol s_2_2[2] = { 215, 219 };
+static symbol s_2_3[3] = { 201, 215, 219 };
+static symbol s_2_4[3] = { 217, 215, 219 };
+static symbol s_2_5[1] = { 221 };
+static symbol s_2_6[2] = { 192, 221 };
+static symbol s_2_7[3] = { 213, 192, 221 };
+
+static struct among a_2[8] =
+{
+/*  0 */ { 2, s_2_0, -1, 1, 0},
+/*  1 */ { 2, s_2_1, -1, 1, 0},
+/*  2 */ { 2, s_2_2, -1, 1, 0},
+/*  3 */ { 3, s_2_3, 2, 2, 0},
+/*  4 */ { 3, s_2_4, 2, 2, 0},
+/*  5 */ { 1, s_2_5, -1, 1, 0},
+/*  6 */ { 2, s_2_6, 5, 1, 0},
+/*  7 */ { 3, s_2_7, 6, 2, 0}
+};
+
+static symbol s_3_0[2] = { 211, 209 };
+static symbol s_3_1[2] = { 211, 216 };
+
+static struct among a_3[2] =
+{
+/*  0 */ { 2, s_3_0, -1, 1, 0},
+/*  1 */ { 2, s_3_1, -1, 1, 0}
+};
+
+static symbol s_4_0[1] = { 192 };
+static symbol s_4_1[2] = { 213, 192 };
+static symbol s_4_2[2] = { 204, 193 };
+static symbol s_4_3[3] = { 201, 204, 193 };
+static symbol s_4_4[3] = { 217, 204, 193 };
+static symbol s_4_5[2] = { 206, 193 };
+static symbol s_4_6[3] = { 197, 206, 193 };
+static symbol s_4_7[3] = { 197, 212, 197 };
+static symbol s_4_8[3] = { 201, 212, 197 };
+static symbol s_4_9[3] = { 202, 212, 197 };
+static symbol s_4_10[4] = { 197, 202, 212, 197 };
+static symbol s_4_11[4] = { 213, 202, 212, 197 };
+static symbol s_4_12[2] = { 204, 201 };
+static symbol s_4_13[3] = { 201, 204, 201 };
+static symbol s_4_14[3] = { 217, 204, 201 };
+static symbol s_4_15[1] = { 202 };
+static symbol s_4_16[2] = { 197, 202 };
+static symbol s_4_17[2] = { 213, 202 };
+static symbol s_4_18[1] = { 204 };
+static symbol s_4_19[2] = { 201, 204 };
+static symbol s_4_20[2] = { 217, 204 };
+static symbol s_4_21[2] = { 197, 205 };
+static symbol s_4_22[2] = { 201, 205 };
+static symbol s_4_23[2] = { 217, 205 };
+static symbol s_4_24[1] = { 206 };
+static symbol s_4_25[2] = { 197, 206 };
+static symbol s_4_26[2] = { 204, 207 };
+static symbol s_4_27[3] = { 201, 204, 207 };
+static symbol s_4_28[3] = { 217, 204, 207 };
+static symbol s_4_29[2] = { 206, 207 };
+static symbol s_4_30[3] = { 197, 206, 207 };
+static symbol s_4_31[3] = { 206, 206, 207 };
+static symbol s_4_32[2] = { 192, 212 };
+static symbol s_4_33[3] = { 213, 192, 212 };
+static symbol s_4_34[2] = { 197, 212 };
+static symbol s_4_35[3] = { 213, 197, 212 };
+static symbol s_4_36[2] = { 201, 212 };
+static symbol s_4_37[2] = { 209, 212 };
+static symbol s_4_38[2] = { 217, 212 };
+static symbol s_4_39[2] = { 212, 216 };
+static symbol s_4_40[3] = { 201, 212, 216 };
+static symbol s_4_41[3] = { 217, 212, 216 };
+static symbol s_4_42[3] = { 197, 219, 216 };
+static symbol s_4_43[3] = { 201, 219, 216 };
+static symbol s_4_44[2] = { 206, 217 };
+static symbol s_4_45[3] = { 197, 206, 217 };
+
+static struct among a_4[46] =
+{
+/*  0 */ { 1, s_4_0, -1, 2, 0},
+/*  1 */ { 2, s_4_1, 0, 2, 0},
+/*  2 */ { 2, s_4_2, -1, 1, 0},
+/*  3 */ { 3, s_4_3, 2, 2, 0},
+/*  4 */ { 3, s_4_4, 2, 2, 0},
+/*  5 */ { 2, s_4_5, -1, 1, 0},
+/*  6 */ { 3, s_4_6, 5, 2, 0},
+/*  7 */ { 3, s_4_7, -1, 1, 0},
+/*  8 */ { 3, s_4_8, -1, 2, 0},
+/*  9 */ { 3, s_4_9, -1, 1, 0},
+/* 10 */ { 4, s_4_10, 9, 2, 0},
+/* 11 */ { 4, s_4_11, 9, 2, 0},
+/* 12 */ { 2, s_4_12, -1, 1, 0},
+/* 13 */ { 3, s_4_13, 12, 2, 0},
+/* 14 */ { 3, s_4_14, 12, 2, 0},
+/* 15 */ { 1, s_4_15, -1, 1, 0},
+/* 16 */ { 2, s_4_16, 15, 2, 0},
+/* 17 */ { 2, s_4_17, 15, 2, 0},
+/* 18 */ { 1, s_4_18, -1, 1, 0},
+/* 19 */ { 2, s_4_19, 18, 2, 0},
+/* 20 */ { 2, s_4_20, 18, 2, 0},
+/* 21 */ { 2, s_4_21, -1, 1, 0},
+/* 22 */ { 2, s_4_22, -1, 2, 0},
+/* 23 */ { 2, s_4_23, -1, 2, 0},
+/* 24 */ { 1, s_4_24, -1, 1, 0},
+/* 25 */ { 2, s_4_25, 24, 2, 0},
+/* 26 */ { 2, s_4_26, -1, 1, 0},
+/* 27 */ { 3, s_4_27, 26, 2, 0},
+/* 28 */ { 3, s_4_28, 26, 2, 0},
+/* 29 */ { 2, s_4_29, -1, 1, 0},
+/* 30 */ { 3, s_4_30, 29, 2, 0},
+/* 31 */ { 3, s_4_31, 29, 1, 0},
+/* 32 */ { 2, s_4_32, -1, 1, 0},
+/* 33 */ { 3, s_4_33, 32, 2, 0},
+/* 34 */ { 2, s_4_34, -1, 1, 0},
+/* 35 */ { 3, s_4_35, 34, 2, 0},
+/* 36 */ { 2, s_4_36, -1, 2, 0},
+/* 37 */ { 2, s_4_37, -1, 2, 0},
+/* 38 */ { 2, s_4_38, -1, 2, 0},
+/* 39 */ { 2, s_4_39, -1, 1, 0},
+/* 40 */ { 3, s_4_40, 39, 2, 0},
+/* 41 */ { 3, s_4_41, 39, 2, 0},
+/* 42 */ { 3, s_4_42, -1, 1, 0},
+/* 43 */ { 3, s_4_43, -1, 2, 0},
+/* 44 */ { 2, s_4_44, -1, 1, 0},
+/* 45 */ { 3, s_4_45, 44, 2, 0}
+};
+
+static symbol s_5_0[1] = { 192 };
+static symbol s_5_1[2] = { 201, 192 };
+static symbol s_5_2[2] = { 216, 192 };
+static symbol s_5_3[1] = { 193 };
+static symbol s_5_4[1] = { 197 };
+static symbol s_5_5[2] = { 201, 197 };
+static symbol s_5_6[2] = { 216, 197 };
+static symbol s_5_7[2] = { 193, 200 };
+static symbol s_5_8[2] = { 209, 200 };
+static symbol s_5_9[3] = { 201, 209, 200 };
+static symbol s_5_10[1] = { 201 };
+static symbol s_5_11[2] = { 197, 201 };
+static symbol s_5_12[2] = { 201, 201 };
+static symbol s_5_13[3] = { 193, 205, 201 };
+static symbol s_5_14[3] = { 209, 205, 201 };
+static symbol s_5_15[4] = { 201, 209, 205, 201 };
+static symbol s_5_16[1] = { 202 };
+static symbol s_5_17[2] = { 197, 202 };
+static symbol s_5_18[3] = { 201, 197, 202 };
+static symbol s_5_19[2] = { 201, 202 };
+static symbol s_5_20[2] = { 207, 202 };
+static symbol s_5_21[2] = { 193, 205 };
+static symbol s_5_22[2] = { 197, 205 };
+static symbol s_5_23[3] = { 201, 197, 205 };
+static symbol s_5_24[2] = { 207, 205 };
+static symbol s_5_25[2] = { 209, 205 };
+static symbol s_5_26[3] = { 201, 209, 205 };
+static symbol s_5_27[1] = { 207 };
+static symbol s_5_28[1] = { 209 };
+static symbol s_5_29[2] = { 201, 209 };
+static symbol s_5_30[2] = { 216, 209 };
+static symbol s_5_31[1] = { 213 };
+static symbol s_5_32[2] = { 197, 215 };
+static symbol s_5_33[2] = { 207, 215 };
+static symbol s_5_34[1] = { 216 };
+static symbol s_5_35[1] = { 217 };
+
+static struct among a_5[36] =
+{
+/*  0 */ { 1, s_5_0, -1, 1, 0},
+/*  1 */ { 2, s_5_1, 0, 1, 0},
+/*  2 */ { 2, s_5_2, 0, 1, 0},
+/*  3 */ { 1, s_5_3, -1, 1, 0},
+/*  4 */ { 1, s_5_4, -1, 1, 0},
+/*  5 */ { 2, s_5_5, 4, 1, 0},
+/*  6 */ { 2, s_5_6, 4, 1, 0},
+/*  7 */ { 2, s_5_7, -1, 1, 0},
+/*  8 */ { 2, s_5_8, -1, 1, 0},
+/*  9 */ { 3, s_5_9, 8, 1, 0},
+/* 10 */ { 1, s_5_10, -1, 1, 0},
+/* 11 */ { 2, s_5_11, 10, 1, 0},
+/* 12 */ { 2, s_5_12, 10, 1, 0},
+/* 13 */ { 3, s_5_13, 10, 1, 0},
+/* 14 */ { 3, s_5_14, 10, 1, 0},
+/* 15 */ { 4, s_5_15, 14, 1, 0},
+/* 16 */ { 1, s_5_16, -1, 1, 0},
+/* 17 */ { 2, s_5_17, 16, 1, 0},
+/* 18 */ { 3, s_5_18, 17, 1, 0},
+/* 19 */ { 2, s_5_19, 16, 1, 0},
+/* 20 */ { 2, s_5_20, 16, 1, 0},
+/* 21 */ { 2, s_5_21, -1, 1, 0},
+/* 22 */ { 2, s_5_22, -1, 1, 0},
+/* 23 */ { 3, s_5_23, 22, 1, 0},
+/* 24 */ { 2, s_5_24, -1, 1, 0},
+/* 25 */ { 2, s_5_25, -1, 1, 0},
+/* 26 */ { 3, s_5_26, 25, 1, 0},
+/* 27 */ { 1, s_5_27, -1, 1, 0},
+/* 28 */ { 1, s_5_28, -1, 1, 0},
+/* 29 */ { 2, s_5_29, 28, 1, 0},
+/* 30 */ { 2, s_5_30, 28, 1, 0},
+/* 31 */ { 1, s_5_31, -1, 1, 0},
+/* 32 */ { 2, s_5_32, -1, 1, 0},
+/* 33 */ { 2, s_5_33, -1, 1, 0},
+/* 34 */ { 1, s_5_34, -1, 1, 0},
+/* 35 */ { 1, s_5_35, -1, 1, 0}
+};
+
+static symbol s_6_0[3] = { 207, 211, 212 };
+static symbol s_6_1[4] = { 207, 211, 212, 216 };
+
+static struct among a_6[2] =
+{
+/*  0 */ { 3, s_6_0, -1, 1, 0},
+/*  1 */ { 4, s_6_1, -1, 1, 0}
+};
+
+static symbol s_7_0[4] = { 197, 202, 219, 197 };
+static symbol s_7_1[1] = { 206 };
+static symbol s_7_2[1] = { 216 };
+static symbol s_7_3[3] = { 197, 202, 219 };
+
+static struct among a_7[4] =
+{
+/*  0 */ { 4, s_7_0, -1, 1, 0},
+/*  1 */ { 1, s_7_1, -1, 2, 0},
+/*  2 */ { 1, s_7_2, -1, 3, 0},
+/*  3 */ { 3, s_7_3, -1, 1, 0}
+};
+
+static unsigned char g_v[] = { 35, 130, 34, 18 };
+
+static symbol s_0[] = { 193 };
+static symbol s_1[] = { 209 };
+static symbol s_2[] = { 193 };
+static symbol s_3[] = { 209 };
+static symbol s_4[] = { 193 };
+static symbol s_5[] = { 209 };
+static symbol s_6[] = { 206 };
+static symbol s_7[] = { 206 };
+static symbol s_8[] = { 206 };
+static symbol s_9[] = { 201 };
+
+static int r_mark_regions(struct SN_env * z) {
+    z->I[0] = z->l;
+    z->I[1] = z->l;
+    {   int c = z->c; /* do, line 100 */
+        while(1) { /* gopast, line 101 */
+            if (!(in_grouping(z, g_v, 192, 220))) goto lab1;
+            break;
+        lab1:
+            if (z->c >= z->l) goto lab0;
+            z->c++;
+        }
+        z->I[0] = z->c; /* setmark pV, line 101 */
+        while(1) { /* gopast, line 101 */
+            if (!(out_grouping(z, g_v, 192, 220))) goto lab2;
+            break;
+        lab2:
+            if (z->c >= z->l) goto lab0;
+            z->c++;
+        }
+        while(1) { /* gopast, line 102 */
+            if (!(in_grouping(z, g_v, 192, 220))) goto lab3;
+            break;
+        lab3:
+            if (z->c >= z->l) goto lab0;
+            z->c++;
+        }
+        while(1) { /* gopast, line 102 */
+            if (!(out_grouping(z, g_v, 192, 220))) goto lab4;
+            break;
+        lab4:
+            if (z->c >= z->l) goto lab0;
+            z->c++;
+        }
+        z->I[1] = z->c; /* setmark p2, line 102 */
+    lab0:
+        z->c = c;
+    }
+    return 1;
+}
+
+static int r_R2(struct SN_env * z) {
+    if (!(z->I[1] <= z->c)) return 0;
+    return 1;
+}
+
+static int r_perfective_gerund(struct SN_env * z) {
+    int among_var;
+    z->ket = z->c; /* [, line 111 */
+    among_var = find_among_b(z, a_0, 9); /* substring, line 111 */
+    if (!(among_var)) return 0;
+    z->bra = z->c; /* ], line 111 */
+    switch(among_var) {
+        case 0: return 0;
+        case 1:
+            {   int m = z->l - z->c; /* or, line 115 */
+                if (!(eq_s_b(z, 1, s_0))) goto lab1;
+                goto lab0;
+            lab1:
+                z->c = z->l - m;
+                if (!(eq_s_b(z, 1, s_1))) return 0;
+            }
+        lab0:
+            slice_del(z); /* delete, line 115 */
+            break;
+        case 2:
+            slice_del(z); /* delete, line 122 */
+            break;
+    }
+    return 1;
+}
+
+static int r_adjective(struct SN_env * z) {
+    int among_var;
+    z->ket = z->c; /* [, line 127 */
+    among_var = find_among_b(z, a_1, 26); /* substring, line 127 */
+    if (!(among_var)) return 0;
+    z->bra = z->c; /* ], line 127 */
+    switch(among_var) {
+        case 0: return 0;
+        case 1:
+            slice_del(z); /* delete, line 136 */
+            break;
+    }
+    return 1;
+}
+
+static int r_adjectival(struct SN_env * z) {
+    int among_var;
+    if (!r_adjective(z)) return 0; /* call adjective, line 141 */
+    {   int m = z->l - z->c; /* try, line 148 */
+        z->ket = z->c; /* [, line 149 */
+        among_var = find_among_b(z, a_2, 8); /* substring, line 149 */
+        if (!(among_var)) { z->c = z->l - m; goto lab0; }
+        z->bra = z->c; /* ], line 149 */
+        switch(among_var) {
+            case 0: { z->c = z->l - m; goto lab0; }
+            case 1:
+                {   int m = z->l - z->c; /* or, line 154 */
+                    if (!(eq_s_b(z, 1, s_2))) goto lab2;
+                    goto lab1;
+                lab2:
+                    z->c = z->l - m;
+                    if (!(eq_s_b(z, 1, s_3))) { z->c = z->l - m; goto lab0; }
+                }
+            lab1:
+                slice_del(z); /* delete, line 154 */
+                break;
+            case 2:
+                slice_del(z); /* delete, line 161 */
+                break;
+        }
+    lab0:
+        ;
+    }
+    return 1;
+}
+
+static int r_reflexive(struct SN_env * z) {
+    int among_var;
+    z->ket = z->c; /* [, line 168 */
+    among_var = find_among_b(z, a_3, 2); /* substring, line 168 */
+    if (!(among_var)) return 0;
+    z->bra = z->c; /* ], line 168 */
+    switch(among_var) {
+        case 0: return 0;
+        case 1:
+            slice_del(z); /* delete, line 171 */
+            break;
+    }
+    return 1;
+}
+
+static int r_verb(struct SN_env * z) {
+    int among_var;
+    z->ket = z->c; /* [, line 176 */
+    among_var = find_among_b(z, a_4, 46); /* substring, line 176 */
+    if (!(among_var)) return 0;
+    z->bra = z->c; /* ], line 176 */
+    switch(among_var) {
+        case 0: return 0;
+        case 1:
+            {   int m = z->l - z->c; /* or, line 182 */
+                if (!(eq_s_b(z, 1, s_4))) goto lab1;
+                goto lab0;
+            lab1:
+                z->c = z->l - m;
+                if (!(eq_s_b(z, 1, s_5))) return 0;
+            }
+        lab0:
+            slice_del(z); /* delete, line 182 */
+            break;
+        case 2:
+            slice_del(z); /* delete, line 190 */
+            break;
+    }
+    return 1;
+}
+
+static int r_noun(struct SN_env * z) {
+    int among_var;
+    z->ket = z->c; /* [, line 199 */
+    among_var = find_among_b(z, a_5, 36); /* substring, line 199 */
+    if (!(among_var)) return 0;
+    z->bra = z->c; /* ], line 199 */
+    switch(among_var) {
+        case 0: return 0;
+        case 1:
+            slice_del(z); /* delete, line 206 */
+            break;
+    }
+    return 1;
+}
+
+static int r_derivational(struct SN_env * z) {
+    int among_var;
+    z->ket = z->c; /* [, line 215 */
+    among_var = find_among_b(z, a_6, 2); /* substring, line 215 */
+    if (!(among_var)) return 0;
+    z->bra = z->c; /* ], line 215 */
+    if (!r_R2(z)) return 0; /* call R2, line 215 */
+    switch(among_var) {
+        case 0: return 0;
+        case 1:
+            slice_del(z); /* delete, line 218 */
+            break;
+    }
+    return 1;
+}
+
+static int r_tidy_up(struct SN_env * z) {
+    int among_var;
+    z->ket = z->c; /* [, line 223 */
+    among_var = find_among_b(z, a_7, 4); /* substring, line 223 */
+    if (!(among_var)) return 0;
+    z->bra = z->c; /* ], line 223 */
+    switch(among_var) {
+        case 0: return 0;
+        case 1:
+            slice_del(z); /* delete, line 227 */
+            z->ket = z->c; /* [, line 228 */
+            if (!(eq_s_b(z, 1, s_6))) return 0;
+            z->bra = z->c; /* ], line 228 */
+            if (!(eq_s_b(z, 1, s_7))) return 0;
+            slice_del(z); /* delete, line 228 */
+            break;
+        case 2:
+            if (!(eq_s_b(z, 1, s_8))) return 0;
+            slice_del(z); /* delete, line 231 */
+            break;
+        case 3:
+            slice_del(z); /* delete, line 233 */
+            break;
+    }
+    return 1;
+}
+
+extern int russian_stem(struct SN_env * z) {
+    {   int c = z->c; /* do, line 240 */
+        if (!r_mark_regions(z)) goto lab0; /* call mark_regions, line 240 */
+    lab0:
+        z->c = c;
+    }
+    z->lb = z->c; z->c = z->l; /* backwards, line 241 */
+
+    {   int m = z->l - z->c; /* setlimit, line 241 */
+        int m3;
+        if (z->c < z->I[0]) return 0;
+        z->c = z->I[0]; /* tomark, line 241 */
+        m3 = z->lb; z->lb = z->c;
+        z->c = z->l - m;
+        {   int m = z->l - z->c; /* do, line 242 */
+            {   int m = z->l - z->c; /* or, line 243 */
+                if (!r_perfective_gerund(z)) goto lab3; /* call perfective_gerund, line 243 */
+                goto lab2;
+            lab3:
+                z->c = z->l - m;
+                {   int m = z->l - z->c; /* try, line 244 */
+                    if (!r_reflexive(z)) { z->c = z->l - m; goto lab4; } /* call reflexive, line 244 */
+                lab4:
+                    ;
+                }
+                {   int m = z->l - z->c; /* or, line 245 */
+                    if (!r_adjectival(z)) goto lab6; /* call adjectival, line 245 */
+                    goto lab5;
+                lab6:
+                    z->c = z->l - m;
+                    if (!r_verb(z)) goto lab7; /* call verb, line 245 */
+                    goto lab5;
+                lab7:
+                    z->c = z->l - m;
+                    if (!r_noun(z)) goto lab1; /* call noun, line 245 */
+                }
+            lab5:
+                ;
+            }
+        lab2:
+        lab1:
+            z->c = z->l - m;
+        }
+        {   int m = z->l - z->c; /* try, line 248 */
+            z->ket = z->c; /* [, line 248 */
+            if (!(eq_s_b(z, 1, s_9))) { z->c = z->l - m; goto lab8; }
+            z->bra = z->c; /* ], line 248 */
+            slice_del(z); /* delete, line 248 */
+        lab8:
+            ;
+        }
+        {   int m = z->l - z->c; /* do, line 251 */
+            if (!r_derivational(z)) goto lab9; /* call derivational, line 251 */
+        lab9:
+            z->c = z->l - m;
+        }
+        {   int m = z->l - z->c; /* do, line 252 */
+            if (!r_tidy_up(z)) goto lab10; /* call tidy_up, line 252 */
+        lab10:
+            z->c = z->l - m;
+        }
+        z->lb = m3;
+    }
+    z->c = z->lb;
+    return 1;
+}
+
+extern struct SN_env * russian_create_env(void) { return SN_create_env(0, 2, 0); }
+
+extern void russian_close_env(struct SN_env * z) { SN_close_env(z); }
+


diff --git a/contrib/tsearch2/snowball/russian_stem.h b/contrib/tsearch2/snowball/russian_stem.h

new file mode 100644 (file)

index 0000000..7dc26d4


--- /dev/null
+++ b/contrib/tsearch2/snowball/russian_stem.h
@@ -0,0 +1,8 @@
+
+/* This file was generated automatically by the Snowball to ANSI C compiler */
+
+extern struct SN_env * russian_create_env(void);
+extern void russian_close_env(struct SN_env * z);
+
+extern int russian_stem(struct SN_env * z);
+


diff --git a/contrib/tsearch2/snowball/utilities.c b/contrib/tsearch2/snowball/utilities.c

new file mode 100644 (file)

index 0000000..5dc7524


--- /dev/null
+++ b/contrib/tsearch2/snowball/utilities.c
@@ -0,0 +1,328 @@
+
+#include 
+#include 
+#include 
+
+#include "header.h"
+
+#define unless(C) if(!(C))
+
+#define CREATE_SIZE 1
+
+extern symbol * create_s(void)
+{   symbol * p = (symbol *) (HEAD + (char *) malloc(HEAD + (CREATE_SIZE + 1) * sizeof(symbol)));
+    CAPACITY(p) = CREATE_SIZE;
+    SET_SIZE(p, CREATE_SIZE);
+    return p;
+}
+
+extern void lose_s(symbol * p) { free((char *) p - HEAD); }
+
+extern int in_grouping(struct SN_env * z, unsigned char * s, int min, int max)
+{   if (z->c >= z->l) return 0;
+    {   int ch = z->p[z->c];
+        if
+        (ch > max || (ch -= min) < 0 ||
+         (s[ch >> 3] & (0X1 << (ch & 0X7))) == 0) return 0;
+    }
+    z->c++; return 1;
+}
+
+extern int in_grouping_b(struct SN_env * z, unsigned char * s, int min, int max)
+{   if (z->c <= z->lb) return 0;
+    {   int ch = z->p[z->c - 1];
+        if
+        (ch > max || (ch -= min) < 0 ||
+         (s[ch >> 3] & (0X1 << (ch & 0X7))) == 0) return 0;
+    }
+    z->c--; return 1;
+}
+
+extern int out_grouping(struct SN_env * z, unsigned char * s, int min, int max)
+{   if (z->c >= z->l) return 0;
+    {   int ch = z->p[z->c];
+        unless
+        (ch > max || (ch -= min) < 0 ||
+         (s[ch >> 3] & (0X1 << (ch & 0X7))) == 0) return 0;
+    }
+    z->c++; return 1;
+}
+
+extern int out_grouping_b(struct SN_env * z, unsigned char * s, int min, int max)
+{   if (z->c <= z->lb) return 0;
+    {   int ch = z->p[z->c - 1];
+        unless
+        (ch > max || (ch -= min) < 0 ||
+         (s[ch >> 3] & (0X1 << (ch & 0X7))) == 0) return 0;
+    }
+    z->c--; return 1;
+}
+
+
+extern int in_range(struct SN_env * z, int min, int max)
+{   if (z->c >= z->l) return 0;
+    {   int ch = z->p[z->c];
+        if
+        (ch > max || ch < min) return 0;
+    }
+    z->c++; return 1;
+}
+
+extern int in_range_b(struct SN_env * z, int min, int max)
+{   if (z->c <= z->lb) return 0;
+    {   int ch = z->p[z->c - 1];
+        if
+        (ch > max || ch < min) return 0;
+    }
+    z->c--; return 1;
+}
+
+extern int out_range(struct SN_env * z, int min, int max)
+{   if (z->c >= z->l) return 0;
+    {   int ch = z->p[z->c];
+        unless
+        (ch > max || ch < min) return 0;
+    }
+    z->c++; return 1;
+}
+
+extern int out_range_b(struct SN_env * z, int min, int max)
+{   if (z->c <= z->lb) return 0;
+    {   int ch = z->p[z->c - 1];
+        unless
+        (ch > max || ch < min) return 0;
+    }
+    z->c--; return 1;
+}
+
+extern int eq_s(struct SN_env * z, int s_size, symbol * s)
+{   if (z->l - z->c < s_size ||
+        memcmp(z->p + z->c, s, s_size * sizeof(symbol)) != 0) return 0;
+    z->c += s_size; return 1;
+}
+
+extern int eq_s_b(struct SN_env * z, int s_size, symbol * s)
+{   if (z->c - z->lb < s_size ||
+        memcmp(z->p + z->c - s_size, s, s_size * sizeof(symbol)) != 0) return 0;
+    z->c -= s_size; return 1;
+}
+
+extern int eq_v(struct SN_env * z, symbol * p)
+{   return eq_s(z, SIZE(p), p);
+}
+
+extern int eq_v_b(struct SN_env * z, symbol * p)
+{   return eq_s_b(z, SIZE(p), p);
+}
+
+extern int find_among(struct SN_env * z, struct among * v, int v_size)
+{
+    int i = 0;
+    int j = v_size;
+
+    int c = z->c; int l = z->l;
+    symbol * q = z->p + c;
+
+    struct among * w;
+
+    int common_i = 0;
+    int common_j = 0;
+
+    int first_key_inspected = 0;
+
+    while(1)
+    {   int k = i + ((j - i) >> 1);
+        int diff = 0;
+        int common = common_i < common_j ? common_i : common_j; /* smaller */
+        w = v + k;
+        {   int i; for (i = common; i < w->s_size; i++)
+            {   if (c + common == l) { diff = -1; break; }
+                diff = q[common] - w->s[i];
+                if (diff != 0) break;
+                common++;
+            }
+        }
+        if (diff < 0) { j = k; common_j = common; }
+                 else { i = k; common_i = common; }
+        if (j - i <= 1)
+        {   if (i > 0) break; /* v->s has been inspected */
+            if (j == i) break; /* only one item in v */
+
+            /* - but now we need to go round once more to get
+               v->s inspected. This looks messy, but is actually
+               the optimal approach.  */
+
+            if (first_key_inspected) break;
+            first_key_inspected = 1;
+        }
+    }
+    while(1)
+    {   w = v + i;
+        if (common_i >= w->s_size)
+        {   z->c = c + w->s_size;
+            if (w->function == 0) return w->result;
+            {   int res = w->function(z);
+                z->c = c + w->s_size;
+                if (res) return w->result;
+            }
+        }
+        i = w->substring_i;
+        if (i < 0) return 0;
+    }
+}
+
+/* find_among_b is for backwards processing. Same comments apply */
+
+extern int find_among_b(struct SN_env * z, struct among * v, int v_size)
+{
+    int i = 0;
+    int j = v_size;
+
+    int c = z->c; int lb = z->lb;
+    symbol * q = z->p + c - 1;
+
+    struct among * w;
+
+    int common_i = 0;
+    int common_j = 0;
+
+    int first_key_inspected = 0;
+
+    while(1)
+    {   int k = i + ((j - i) >> 1);
+        int diff = 0;
+        int common = common_i < common_j ? common_i : common_j;
+        w = v + k;
+        {   int i; for (i = w->s_size - 1 - common; i >= 0; i--)
+            {   if (c - common == lb) { diff = -1; break; }
+                diff = q[- common] - w->s[i];
+                if (diff != 0) break;
+                common++;
+            }
+        }
+        if (diff < 0) { j = k; common_j = common; }
+                 else { i = k; common_i = common; }
+        if (j - i <= 1)
+        {   if (i > 0) break;
+            if (j == i) break;
+            if (first_key_inspected) break;
+            first_key_inspected = 1;
+        }
+    }
+    while(1)
+    {   w = v + i;
+        if (common_i >= w->s_size)
+        {   z->c = c - w->s_size;
+            if (w->function == 0) return w->result;
+            {   int res = w->function(z);
+                z->c = c - w->s_size;
+                if (res) return w->result;
+            }
+        }
+        i = w->substring_i;
+        if (i < 0) return 0;
+    }
+}
+
+
+extern symbol * increase_size(symbol * p, int n)
+{   int new_size = n + 20;
+    symbol * q = (symbol *) (HEAD + (char *) malloc(HEAD + (new_size + 1) * sizeof(symbol)));
+    CAPACITY(q) = new_size;
+    memmove(q, p, CAPACITY(p) * sizeof(symbol)); lose_s(p); return q;
+}
+
+/* to replace symbols between c_bra and c_ket in z->p by the
+   s_size symbols at s
+*/
+
+extern int replace_s(struct SN_env * z, int c_bra, int c_ket, int s_size, const symbol * s)
+{   int adjustment = s_size - (c_ket - c_bra);
+    int len = SIZE(z->p);
+    if (adjustment != 0)
+    {   if (adjustment + len > CAPACITY(z->p)) z->p = increase_size(z->p, adjustment + len);
+        memmove(z->p + c_ket + adjustment, z->p + c_ket, (len - c_ket) * sizeof(symbol));
+        SET_SIZE(z->p, adjustment + len);
+        z->l += adjustment;
+        if (z->c >= c_ket) z->c += adjustment; else
+            if (z->c > c_bra) z->c = c_bra;
+    }
+    unless (s_size == 0) memmove(z->p + c_bra, s, s_size * sizeof(symbol));
+    return adjustment;
+}
+
+static void slice_check(struct SN_env * z)
+{
+    if (!(0 <= z->bra &&
+          z->bra <= z->ket &&
+          z->ket <= z->l &&
+          z->l <= SIZE(z->p)))   /* this line could be removed */
+    {
+        fprintf(stderr, "faulty slice operation:\n");
+        debug(z, -1, 0);
+        exit(1);
+    }
+}
+
+extern void slice_from_s(struct SN_env * z, int s_size, symbol * s)
+{   slice_check(z);
+    replace_s(z, z->bra, z->ket, s_size, s);
+}
+
+extern void slice_from_v(struct SN_env * z, symbol * p)
+{   slice_from_s(z, SIZE(p), p);
+}
+
+extern void slice_del(struct SN_env * z)
+{   slice_from_s(z, 0, 0);
+}
+
+extern void insert_s(struct SN_env * z, int bra, int ket, int s_size, symbol * s)
+{   int adjustment = replace_s(z, bra, ket, s_size, s);
+    if (bra <= z->bra) z->bra += adjustment;
+    if (bra <= z->ket) z->ket += adjustment;
+}
+
+extern void insert_v(struct SN_env * z, int bra, int ket, symbol * p)
+{   int adjustment = replace_s(z, bra, ket, SIZE(p), p);
+    if (bra <= z->bra) z->bra += adjustment;
+    if (bra <= z->ket) z->ket += adjustment;
+}
+
+extern symbol * slice_to(struct SN_env * z, symbol * p)
+{   slice_check(z);
+    {   int len = z->ket - z->bra;
+        if (CAPACITY(p) < len) p = increase_size(p, len);
+        memmove(p, z->p + z->bra, len * sizeof(symbol));
+        SET_SIZE(p, len);
+    }
+    return p;
+}
+
+extern symbol * assign_to(struct SN_env * z, symbol * p)
+{   int len = z->l;
+    if (CAPACITY(p) < len) p = increase_size(p, len);
+    memmove(p, z->p, len * sizeof(symbol));
+    SET_SIZE(p, len);
+    return p;
+}
+
+extern void debug(struct SN_env * z, int number, int line_count)
+{   int i;
+    int limit = SIZE(z->p);
+    /*if (number >= 0) printf("%3d (line %4d): '", number, line_count);*/
+    if (number >= 0) printf("%3d (line %4d): [%d]'", number, line_count,limit);
+    for (i = 0; i <= limit; i++)
+    {   if (z->lb == i) printf("{");
+        if (z->bra == i) printf("[");
+        if (z->c == i) printf("|");
+        if (z->ket == i) printf("]");
+        if (z->l == i) printf("}");
+        if (i < limit)
+        {   int ch = z->p[i];
+            if (ch == 0) ch = '#';
+            printf("%c", ch);
+        }
+    }
+    printf("'\n");
+}


diff --git a/contrib/tsearch2/sql/tsearch2.sql b/contrib/tsearch2/sql/tsearch2.sql

new file mode 100644 (file)

index 0000000..6ca6480


--- /dev/null
+++ b/contrib/tsearch2/sql/tsearch2.sql
@@ -0,0 +1,243 @@
+--
+-- first, define the datatype.  Turn off echoing so that expected file
+-- does not depend on contents of seg.sql.
+--
+\set ECHO none
+\i tsearch2.sql
+\set ECHO all
+
+--tsvector
+SELECT '1'::tsvector;
+SELECT '1 '::tsvector;
+SELECT ' 1'::tsvector;
+SELECT ' 1 '::tsvector;
+SELECT '1 2'::tsvector;
+SELECT '\'1 2\''::tsvector;
+SELECT '\'1 \\\'2\''::tsvector;
+SELECT '\'1 \\\'2\'3'::tsvector;
+SELECT '\'1 \\\'2\' 3'::tsvector;
+SELECT '\'1 \\\'2\' \' 3\' 4 '::tsvector;
+select '\'w\':4A,3B,2C,1D,5 a:8';
+select 'a:3A b:2a'::tsvector || 'ba:1234 a:1B';
+select setweight('w:12B w:13* w:12,5,6 a:1,3* a:3 w asd:1dc asd zxc:81,567,222A'::tsvector, 'c');
+select strip('w:12B w:13* w:12,5,6 a:1,3* a:3 w asd:1dc asd'::tsvector);
+
+
+--tsquery
+SELECT '1'::tsquery;
+SELECT '1 '::tsquery;
+SELECT ' 1'::tsquery;
+SELECT ' 1 '::tsquery;
+SELECT '\'1 2\''::tsquery;
+SELECT '\'1 \\\'2\''::tsquery;
+SELECT '!1'::tsquery;
+SELECT '1|2'::tsquery;
+SELECT '1|!2'::tsquery;
+SELECT '!1|2'::tsquery;
+SELECT '!1|!2'::tsquery;
+SELECT '!(!1|!2)'::tsquery;
+SELECT '!(!1|2)'::tsquery;
+SELECT '!(1|!2)'::tsquery;
+SELECT '!(1|2)'::tsquery;
+SELECT '1&2'::tsquery;
+SELECT '!1&2'::tsquery;
+SELECT '1&!2'::tsquery;
+SELECT '!1&!2'::tsquery;
+SELECT '(1&2)'::tsquery;
+SELECT '1&(2)'::tsquery;
+SELECT '!(1)&2'::tsquery;
+SELECT '!(1&2)'::tsquery;
+SELECT '1|2&3'::tsquery;
+SELECT '1|(2&3)'::tsquery;
+SELECT '(1|2)&3'::tsquery;
+SELECT '1|2&!3'::tsquery;
+SELECT '1|!2&3'::tsquery;
+SELECT '!1|2&3'::tsquery;
+SELECT '!1|(2&3)'::tsquery;
+SELECT '!(1|2)&3'::tsquery;
+SELECT '(!1|2)&3'::tsquery;
+SELECT '1|(2|(4|(5|6)))'::tsquery;
+SELECT '1|2|4|5|6'::tsquery;
+SELECT '1&(2&(4&(5&6)))'::tsquery;
+SELECT '1&2&4&5&6'::tsquery;
+SELECT '1&(2&(4&(5|6)))'::tsquery;
+SELECT '1&(2&(4&(5|!6)))'::tsquery;
+SELECT '1&(\'2\'&(\' 4\'&(\\|5 | \'6 \\\' !|&\')))'::tsquery;
+SELECT '\'the wether\':dc & \' sKies \':BC & a:d b:a';
+
+select lexize('simple', 'ASD56 hsdkf');
+select lexize('en_stem', 'SKIES Problems identity');
+
+select * from token_type('default');
+select * from parse('default', '345 [email protected] \' http://www.com/ http://aew.werc.ewr/?ad=qwe&dw 1aew.werc.ewr/?ad=qwe&dw 2aew.werc.ewr http://3aew.werc.ewr/?ad=qwe&dw http://4aew.werc.ewr http://5aew.werc.ewr:8100/?  ad=qwe&dw 6aew.werc.ewr:8100/?ad=qwe&dw 7aew.werc.ewr:8100/?ad=qwe&dw=%20%32 +4.0e-10 qwe qwe qwqwe 234.435 455 5.005 [email protected] qwe-wer asdf qwer jf sdjk ewr1> ewri2 ">
+/usr/local/fff /awdf/dwqe/4325 rewt/ewr wefjn /wqe-324/ewr gist.h gist.h.c gist.c. readline 4.2 4.2. 4.2, readline-4.2 readline-4.2. 234 
+ wow  < jqw <> qwerty');
+
+SELECT to_tsvector('default', '345 [email protected] \' http://www.com/ http://aew.werc.ewr/?ad=qwe&dw 1aew.werc.ewr/?ad=qwe&dw 2aew.werc.ewr http://3aew.werc.ewr/?ad=qwe&dw http://4aew.werc.ewr http://5aew.werc.ewr:8100/?  ad=qwe&dw 6aew.werc.ewr:8100/?ad=qwe&dw 7aew.werc.ewr:8100/?ad=qwe&dw=%20%32 +4.0e-10 qwe qwe qwqwe 234.435 455 5.005 [email protected] qwe-wer asdf qwer jf sdjk ewr1> ewri2 ">
+/usr/local/fff /awdf/dwqe/4325 rewt/ewr wefjn /wqe-324/ewr gist.h gist.h.c gist.c. readline 4.2 4.2. 4.2, readline-4.2 readline-4.2. 234 
+ wow  < jqw <> qwerty');
+
+SELECT length(to_tsvector('default', '345 qw'));
+
+SELECT length(to_tsvector('default', '345 [email protected] \' http://www.com/ http://aew.werc.ewr/?ad=qwe&dw 1aew.werc.ewr/?ad=qwe&dw 2aew.werc.ewr http://3aew.werc.ewr/?ad=qwe&dw http://4aew.werc.ewr http://5aew.werc.ewr:8100/?  ad=qwe&dw 6aew.werc.ewr:8100/?ad=qwe&dw 7aew.werc.ewr:8100/?ad=qwe&dw=%20%32 +4.0e-10 qwe qwe qwqwe 234.435 455 5.005 [email protected] qwe-wer asdf qwer jf sdjk ewr1> ewri2 ">
+/usr/local/fff /awdf/dwqe/4325 rewt/ewr wefjn /wqe-324/ewr gist.h gist.h.c gist.c. readline 4.2 4.2. 4.2, readline-4.2 readline-4.2. 234 
+ wow  < jqw <> qwerty'));
+
+
+select to_tsquery('default', 'qwe & sKies '); 
+select to_tsquery('simple', 'qwe & sKies '); 
+select to_tsquery('default', '\'the wether\':dc & \'           sKies \':BC ');
+select 'a b:89  ca:23A,64b d:34c'::tsvector @@ 'd:AC & ca';
+select 'a b:89  ca:23A,64b d:34c'::tsvector @@ 'd:AC & ca:B';
+select 'a b:89  ca:23A,64b d:34c'::tsvector @@ 'd:AC & ca:A';
+select 'a b:89  ca:23A,64b d:34c'::tsvector @@ 'd:AC & ca:C';
+select 'a b:89  ca:23A,64b d:34c'::tsvector @@ 'd:AC & ca:CB';
+
+CREATE TABLE test_tsvector( t text, a tsvector );
+
+\copy test_tsvector from 'data/test_tsearch.data'
+
+SELECT count(*) FROM test_tsvector WHERE a @@ 'wr|qh';
+SELECT count(*) FROM test_tsvector WHERE a @@ 'wr&qh';
+SELECT count(*) FROM test_tsvector WHERE a @@ 'eq&yt';
+SELECT count(*) FROM test_tsvector WHERE a @@ 'eq|yt';
+SELECT count(*) FROM test_tsvector WHERE a @@ '(eq&yt)|(wr&qh)';
+SELECT count(*) FROM test_tsvector WHERE a @@ '(eq|yt)&(wr|qh)';
+
+create index wowidx on test_tsvector using gist (a);
+set enable_seqscan=off;
+
+SELECT count(*) FROM test_tsvector WHERE a @@ 'wr|qh';
+SELECT count(*) FROM test_tsvector WHERE a @@ 'wr&qh';
+SELECT count(*) FROM test_tsvector WHERE a @@ 'eq&yt';
+SELECT count(*) FROM test_tsvector WHERE a @@ 'eq|yt';
+SELECT count(*) FROM test_tsvector WHERE a @@ '(eq&yt)|(wr&qh)';
+SELECT count(*) FROM test_tsvector WHERE a @@ '(eq|yt)&(wr|qh)';
+
+select set_curcfg('default');
+
+CREATE TRIGGER tsvectorupdate
+BEFORE UPDATE OR INSERT ON test_tsvector
+FOR EACH ROW EXECUTE PROCEDURE tsearch2(a, t);
+
+SELECT count(*) FROM test_tsvector WHERE a @@ to_tsquery('345&qwerty');
+
+INSERT INTO test_tsvector (t) VALUES ('345 qwerty');
+
+SELECT count(*) FROM test_tsvector WHERE a @@ to_tsquery('345&qwerty');
+
+UPDATE test_tsvector SET t = null WHERE t = '345 qwerty';
+
+SELECT count(*) FROM test_tsvector WHERE a @@ to_tsquery('345&qwerty');
+
+drop trigger tsvectorupdate on test_tsvector;
+create function wow(text) returns text as 'select $1 || \' copyright\'; ' language sql;
+create trigger tsvectorupdate before update or insert on test_tsvector
+for each row execute procedure tsearch2(a, wow, t);
+insert into test_tsvector (t) values ('345 qwerty');
+select count(*) FROM test_tsvector WHERE a @@ to_tsquery('345&qwerty');
+select count(*) FROM test_tsvector WHERE a @@ to_tsquery('copyright');
+
+select rank(' a:1 s:2C d g'::tsvector, 'a | s');
+select rank(' a:1 s:2B d g'::tsvector, 'a | s');
+select rank(' a:1 s:2 d g'::tsvector, 'a | s');
+select rank(' a:1 s:2C d g'::tsvector, 'a & s');
+select rank(' a:1 s:2B d g'::tsvector, 'a & s');
+select rank(' a:1 s:2 d g'::tsvector, 'a & s');
+
+insert into test_tsvector (t) values ('foo bar foo the over foo qq bar');
+select * from stat('select a from test_tsvector') order by ndoc desc, nentry desc, word;
+
+select reset_tsearch();
+select to_tsquery('default', 'skies & books');
+
+select rank_cd(to_tsvector('Erosion It took the sea a thousand years,
+A thousand years to trace
+The granite features of this cliff
+In crag and scarp and base.
+It took the sea an hour one night
+An hour of storm to place
+The sculpture of these granite seams,
+Upon a woman s face. E.  J.  Pratt  (1882 1964)
+'), to_tsquery('sea&thousand&years'));
+
+select rank_cd(to_tsvector('Erosion It took the sea a thousand years,
+A thousand years to trace
+The granite features of this cliff
+In crag and scarp and base.
+It took the sea an hour one night
+An hour of storm to place
+The sculpture of these granite seams,
+Upon a woman s face. E.  J.  Pratt  (1882 1964)
+'), to_tsquery('granite&sea'));
+
+select rank_cd(to_tsvector('Erosion It took the sea a thousand years,
+A thousand years to trace
+The granite features of this cliff
+In crag and scarp and base.
+It took the sea an hour one night
+An hour of storm to place
+The sculpture of these granite seams,
+Upon a woman s face. E.  J.  Pratt  (1882 1964)
+'), to_tsquery('sea'));
+
+select get_covers(to_tsvector('Erosion It took the sea a thousand years,
+A thousand years to trace
+The granite features of this cliff
+In crag and scarp and base.
+It took the sea an hour one night
+An hour of storm to place
+The sculpture of these granite seams,
+Upon a woman s face. E.  J.  Pratt  (1882 1964)
+'), to_tsquery('sea&thousand&years'));
+
+select get_covers(to_tsvector('Erosion It took the sea a thousand years,
+A thousand years to trace
+The granite features of this cliff
+In crag and scarp and base.
+It took the sea an hour one night
+An hour of storm to place
+The sculpture of these granite seams,
+Upon a woman s face. E.  J.  Pratt  (1882 1964)
+'), to_tsquery('granite&sea'));
+
+select get_covers(to_tsvector('Erosion It took the sea a thousand years,
+A thousand years to trace
+The granite features of this cliff
+In crag and scarp and base.
+It took the sea an hour one night
+An hour of storm to place
+The sculpture of these granite seams,
+Upon a woman s face. E.  J.  Pratt  (1882 1964)
+'), to_tsquery('sea'));
+
+select headline('Erosion It took the sea a thousand years,
+A thousand years to trace
+The granite features of this cliff
+In crag and scarp and base.
+It took the sea an hour one night
+An hour of storm to place
+The sculpture of these granite seams,
+Upon a woman s face. E.  J.  Pratt  (1882 1964)
+', to_tsquery('sea&thousand&years'));
+ 
+select headline('Erosion It took the sea a thousand years,
+A thousand years to trace
+The granite features of this cliff
+In crag and scarp and base.
+It took the sea an hour one night
+An hour of storm to place
+The sculpture of these granite seams,
+Upon a woman s face. E.  J.  Pratt  (1882 1964)
+', to_tsquery('granite&sea'));
+ 
+select headline('Erosion It took the sea a thousand years,
+A thousand years to trace
+The granite features of this cliff
+In crag and scarp and base.
+It took the sea an hour one night
+An hour of storm to place
+The sculpture of these granite seams,
+Upon a woman s face. E.  J.  Pratt  (1882 1964)
+', to_tsquery('sea'));
+


diff --git a/contrib/tsearch2/stopword.c b/contrib/tsearch2/stopword.c

new file mode 100644 (file)

index 0000000..7f7806f


--- /dev/null
+++ b/contrib/tsearch2/stopword.c
@@ -0,0 +1,101 @@
+/* 
+ * stopword library
+ * Teodor Sigaev 
+ */
+#include 
+#include 
+#include 
+#include 
+
+#include "postgres.h"
+#include "common.h"
+#include "dict.h"
+
+#define STOPBUFLEN 4096
+
+char*
+lowerstr(char *str) {
+   char *ptr=str;
+   while(*ptr) {
+       *ptr = tolower(*(unsigned char*)ptr);
+       ptr++;
+   }
+   return str;
+}
+
+void
+freestoplist(StopList *s) {
+   char **ptr=s->stop;
+   if ( ptr )
+       while( *ptr && s->len >0 ) {
+           free(*ptr);
+           ptr++; s->len--;
+       free(s->stop);
+   }
+   memset(s,0,sizeof(StopList));
+}
+
+void
+readstoplist(text *in, StopList *s) {
+   char **stop=NULL;
+   s->len=0;
+   if ( in && VARSIZE(in) - VARHDRSZ > 0 ) {
+       char *filename=text2char(in);
+       FILE    *hin=NULL;
+       char    buf[STOPBUFLEN];
+       int reallen=0;
+
+       if ( (hin=fopen(filename,"r")) == NULL )
+           elog(ERROR,"Can't open file '%s': %s", filename, strerror(errno));
+       while( fgets(buf,STOPBUFLEN,hin) ) {
+           buf[strlen(buf)-1] = '\0';
+           if ( *buf=='\0' ) continue;
+
+           if ( s->len>= reallen ) {
+               char **tmp;
+               reallen=(reallen) ? reallen*2 : 16;
+               tmp=(char**)realloc((void*)stop, sizeof(char*)*reallen);
+               if (!tmp) {
+                   freestoplist(s);
+                   fclose(hin); 
+                   elog(ERROR,"Not enough memory");
+               }
+               stop=tmp;
+           }
+    
+           stop[s->len]=strdup(buf);
+           if ( !stop[s->len] ) {
+               freestoplist(s);
+               fclose(hin); 
+               elog(ERROR,"Not enough memory");
+           }
+           if ( s->wordop ) 
+               stop[s->len]=(s->wordop)(stop[s->len]);
+
+           (s->len)++; 
+       }
+       fclose(hin);
+       pfree(filename); 
+   }
+   s->stop=stop;
+} 
+
+static int
+comparestr(const void *a, const void *b) {
+   return strcmp( *(char**)a, *(char**)b );
+}
+
+void
+sortstoplist(StopList *s) {
+   if (s->stop && s->len>0)
+       qsort(s->stop, s->len, sizeof(char*), comparestr);
+}
+
+bool
+searchstoplist(StopList *s, char *key) {
+   if ( s->wordop ) 
+       key=(*(s->wordop))(key);
+   return ( s->stop && s->len>0 && bsearch(&key, s->stop, s->len, sizeof(char*), comparestr) ) ? true : false;
+}
+
+


diff --git a/contrib/tsearch2/stopword/english.stop b/contrib/tsearch2/stopword/english.stop

new file mode 100644 (file)

index 0000000..a913011


--- /dev/null
+++ b/contrib/tsearch2/stopword/english.stop
@@ -0,0 +1,128 @@
+i
+me
+my
+myself
+we
+our
+ours
+ourselves
+you
+your
+yours
+yourself
+yourselves
+he
+him
+his
+himself
+she
+her
+hers
+herself
+it
+its
+itself
+they
+them
+their
+theirs
+themselves
+what
+which
+who
+whom
+this
+that
+these
+those
+am
+is
+are
+was
+were
+be
+been
+being
+have
+has
+had
+having
+do
+does
+did
+doing
+a
+an
+the
+and
+but
+if
+or
+because
+as
+until
+while
+of
+at
+by
+for
+with
+about
+against
+between
+into
+through
+during
+before
+after
+above
+below
+to
+from
+up
+down
+in
+out
+on
+off
+over
+under
+again
+further
+then
+once
+here
+there
+when
+where
+why
+how
+all
+any
+both
+each
+few
+more
+most
+other
+some
+such
+no
+nor
+not
+only
+own
+same
+so
+than
+too
+very
+s
+t
+can
+will
+just
+don
+should
+now
+


diff --git a/contrib/tsearch2/stopword/russian.stop b/contrib/tsearch2/stopword/russian.stop

new file mode 100644 (file)

index 0000000..1877e3a


--- /dev/null
+++ b/contrib/tsearch2/stopword/russian.stop
@@ -0,0 +1,151 @@
+É
+×
+×Ï
+ÎÅ
+ÞÔÏ
+ÏÎ
+ÎÁ
+Ñ
+Ó
+ÓÏ
+ËÁË
+Á
+ÔÏ
+×ÓÅ
+ÏÎÁ
+ÔÁË
+ÅÇÏ
+ÎÏ
+ÄÁ
+ÔÙ
+Ë
+Õ
+ÖÅ
+×Ù
+ÚÁ
+ÂÙ
+ÐÏ
+ÔÏÌØËÏ
+ÅÅ
+ÍÎÅ
+ÂÙÌÏ
+×ÏÔ
+ÏÔ
+ÍÅÎÑ
+ÅÝÅ
+ÎÅÔ
+Ï
+ÉÚ
+ÅÍÕ
+ÔÅÐÅÒØ
+ËÏÇÄÁ
+ÄÁÖÅ
+ÎÕ
+×ÄÒÕÇ
+ÌÉ
+ÅÓÌÉ
+ÕÖÅ
+ÉÌÉ
+ÎÉ
+ÂÙÔØ
+ÂÙÌ
+ÎÅÇÏ
+ÄÏ
+×ÁÓ
+ÎÉÂÕÄØ
+ÏÐÑÔØ
+ÕÖ
+×ÁÍ
+×ÅÄØ
+ÔÁÍ
+ÐÏÔÏÍ
+ÓÅÂÑ
+ÎÉÞÅÇÏ
+ÅÊ
+ÍÏÖÅÔ
+ÏÎÉ
+ÔÕÔ
+ÇÄÅ
+ÅÓÔØ
+ÎÁÄÏ
+ÎÅÊ
+ÄÌÑ
+ÍÙ
+ÔÅÂÑ
+ÉÈ
+ÞÅÍ
+ÂÙÌÁ
+ÓÁÍ
+ÞÔÏÂ
+ÂÅÚ
+ÂÕÄÔÏ
+ÞÅÇÏ
+ÒÁÚ
+ÔÏÖÅ
+ÓÅÂÅ
+ÐÏÄ
+ÂÕÄÅÔ
+Ö
+ÔÏÇÄÁ
+ËÔÏ
+ÜÔÏÔ
+ÔÏÇÏ
+ÐÏÔÏÍÕ
+ÜÔÏÇÏ
+ËÁËÏÊ
+ÓÏ×ÓÅÍ
+ÎÉÍ
+ÚÄÅÓØ
+ÜÔÏÍ
+ÏÄÉÎ
+ÐÏÞÔÉ
+ÍÏÊ
+ÔÅÍ
+ÞÔÏÂÙ
+ÎÅÅ
+ÓÅÊÞÁÓ
+ÂÙÌÉ
+ËÕÄÁ
+ÚÁÞÅÍ
+×ÓÅÈ
+ÎÉËÏÇÄÁ
+ÍÏÖÎÏ
+ÐÒÉ
+ÎÁËÏÎÅÃ
+Ä×Á
+ÏÂ
+ÄÒÕÇÏÊ
+ÈÏÔØ
+ÐÏÓÌÅ
+ÎÁÄ
+ÂÏÌØÛÅ
+ÔÏÔ
+ÞÅÒÅÚ
+ÜÔÉ
+ÎÁÓ
+ÐÒÏ
+×ÓÅÇÏ
+ÎÉÈ
+ËÁËÁÑ
+ÍÎÏÇÏ
+ÒÁÚ×Å
+ÔÒÉ
+ÜÔÕ
+ÍÏÑ
+×ÐÒÏÞÅÍ
+ÈÏÒÏÛÏ
+Ó×ÏÀ
+ÜÔÏÊ
+ÐÅÒÅÄ
+ÉÎÏÇÄÁ
+ÌÕÞÛÅ
+ÞÕÔØ
+ÔÏÍ
+ÎÅÌØÚÑ
+ÔÁËÏÊ
+ÉÍ
+ÂÏÌÅÅ
+×ÓÅÇÄÁ
+ËÏÎÅÞÎÏ
+×ÓÀ
+ÍÅÖÄÕ


diff --git a/contrib/tsearch2/ts_cfg.c b/contrib/tsearch2/ts_cfg.c

new file mode 100644 (file)

index 0000000..7c9f20c


--- /dev/null
+++ b/contrib/tsearch2/ts_cfg.c
@@ -0,0 +1,509 @@
+/* 
+ * interface functions to tscfg 
+ * Teodor Sigaev 
+ */
+#include 
+#include 
+#include 
+#include 
+#include 
+
+#include "postgres.h"
+#include "fmgr.h"
+#include "utils/array.h"
+#include "catalog/pg_type.h"
+#include "executor/spi.h"
+
+#include "ts_cfg.h"
+#include "dict.h"
+#include "wparser.h"
+#include "snmap.h"
+#include "common.h"
+#include "tsvector.h"
+
+/*********top interface**********/
+
+static void *plan_getcfg_bylocale=NULL;
+static void *plan_getcfg=NULL;
+static void *plan_getmap=NULL;
+static void *plan_name2id=NULL;
+static Oid current_cfg_id=0;
+
+void
+init_cfg(Oid id, TSCfgInfo *cfg) {
+   Oid arg[2]={ OIDOID, OIDOID };
+   bool isnull;
+   Datum pars[2]={ ObjectIdGetDatum(id), ObjectIdGetDatum(id) } ;
+   int stat,i,j;
+   text *ptr;
+   text *prsname=NULL;
+   MemoryContext   oldcontext;
+
+   memset(cfg,0,sizeof(TSCfgInfo));
+   SPI_connect();
+   if ( !plan_getcfg ) {
+       plan_getcfg = SPI_saveplan( SPI_prepare( "select prs_name from pg_ts_cfg where oid = $1" , 1, arg ) );
+       if ( !plan_getcfg ) 
+           ts_error(ERROR, "SPI_prepare() failed");
+   }
+
+   stat = SPI_execp(plan_getcfg, pars, " ", 1);
+   if ( stat < 0 )
+       ts_error (ERROR, "SPI_execp return %d", stat);
+   if ( SPI_processed > 0 ) {
+       prsname = (text*) DatumGetPointer( 
+           SPI_getbinval(SPI_tuptable->vals[0], SPI_tuptable->tupdesc, 1, &isnull) 
+       );
+       oldcontext = MemoryContextSwitchTo(TopMemoryContext);
+       prsname = ptextdup( prsname );
+       MemoryContextSwitchTo(oldcontext);
+       
+       cfg->id=id;
+   } else 
+       ts_error(ERROR, "No tsearch cfg with id %d", id);
+
+   arg[0]=TEXTOID;
+   if ( !plan_getmap ) {
+       plan_getmap = SPI_saveplan( SPI_prepare( "select lt.tokid, pg_ts_cfgmap.dict_name from pg_ts_cfgmap, pg_ts_cfg, token_type( $1 ) as lt where lt.alias = pg_ts_cfgmap.tok_alias and pg_ts_cfgmap.ts_name = pg_ts_cfg.ts_name and pg_ts_cfg.oid= $2 order by lt.tokid desc;" , 2, arg ) );
+       if ( !plan_getmap )
+           ts_error(ERROR, "SPI_prepare() failed");
+   }
+
+   pars[0]=PointerGetDatum( prsname );
+   stat = SPI_execp(plan_getmap, pars, " ", 0);
+   if ( stat < 0 )
+       ts_error (ERROR, "SPI_execp return %d", stat);
+   if ( SPI_processed <= 0 )
+       ts_error(ERROR, "No parser with id %d", id);
+
+   for(i=0;i
+       int lexid = DatumGetInt32(SPI_getbinval(SPI_tuptable->vals[i], SPI_tuptable->tupdesc, 1, &isnull));
+       ArrayType *toasted_a = (ArrayType*)PointerGetDatum(SPI_getbinval(SPI_tuptable->vals[i], SPI_tuptable->tupdesc, 2, &isnull));
+       ArrayType *a;
+
+       if ( !cfg->map ) {
+           cfg->len=lexid+1;
+           cfg->map = (ListDictionary*)malloc( sizeof(ListDictionary)*cfg->len );
+           if ( !cfg->map )
+               ts_error(ERROR,"No memory");
+           memset( cfg->map, 0, sizeof(ListDictionary)*cfg->len );
+       }
+
+       if (isnull)
+           continue;
+
+       a=(ArrayType*)PointerGetDatum( PG_DETOAST_DATUM( DatumGetPointer(toasted_a) ) );
+       
+       if ( ARR_NDIM(a) != 1 )
+           ts_error(ERROR,"Wrong dimension");
+       if ( ARRNELEMS(a) < 1 )
+           continue;
+
+       cfg->map[lexid].len=ARRNELEMS(a);
+       cfg->map[lexid].dict_id=(Datum*)malloc( sizeof(Datum)*cfg->map[lexid].len );
+       memset(cfg->map[lexid].dict_id,0,sizeof(Datum)*cfg->map[lexid].len );
+       ptr=(text*)ARR_DATA_PTR(a);
+       oldcontext = MemoryContextSwitchTo(TopMemoryContext);
+       for(j=0;jmap[lexid].len;j++) {
+           cfg->map[lexid].dict_id[j] = PointerGetDatum(ptextdup(ptr));
+           ptr=NEXTVAL(ptr);
+       } 
+       MemoryContextSwitchTo(oldcontext);
+
+       if ( a != toasted_a ) 
+           pfree(a);
+   }
+   
+   SPI_finish();
+   cfg->prs_id = name2id_prs( prsname );
+   pfree(prsname);
+   for(i=0;ilen;i++) {
+       for(j=0;jmap[i].len;j++) {
+           ptr = (text*)DatumGetPointer( cfg->map[i].dict_id[j] );
+           cfg->map[i].dict_id[j] = ObjectIdGetDatum( name2id_dict(ptr) );
+           pfree(ptr);
+       }
+   }
+}
+
+typedef struct {
+   TSCfgInfo   *last_cfg;
+   int     len;
+   int     reallen;
+   TSCfgInfo   *list;
+   SNMap       name2id_map;
+} CFGList;
+
+static CFGList CList = {NULL,0,0,NULL,{0,0,NULL}};
+
+void
+reset_cfg(void) {
+        freeSNMap( &(CList.name2id_map) );
+        if ( CList.list ) {
+       int i,j;
+       for(i=0;i
+           if ( CList.list[i].map ) {
+               for(j=0;j
+                   if ( CList.list[i].map[j].dict_id )
+                       free(CList.list[i].map[j].dict_id);
+               free( CList.list[i].map );
+           }
+                free(CList.list);
+   }
+        memset(&CList,0,sizeof(CFGList));
+}
+
+static int
+comparecfg(const void *a, const void *b) {
+   return ((TSCfgInfo*)a)->id - ((TSCfgInfo*)b)->id;
+}
+
+TSCfgInfo *
+findcfg(Oid id) {
+   /* last used cfg */
+   if ( CList.last_cfg && CList.last_cfg->id==id )
+       return CList.last_cfg;
+
+   /* already used cfg */
+   if ( CList.len != 0 ) {
+       TSCfgInfo key;
+       key.id=id;
+       CList.last_cfg = bsearch(&key, CList.list, CList.len, sizeof(TSCfgInfo), comparecfg);
+       if ( CList.last_cfg != NULL )
+           return CList.last_cfg;
+   }
+
+   /* last chance */
+   if ( CList.len==CList.reallen ) {
+       TSCfgInfo *tmp;
+       int reallen = ( CList.reallen ) ? 2*CList.reallen : 16;
+       tmp=(TSCfgInfo*)realloc(CList.list,sizeof(TSCfgInfo)*reallen);
+       if ( !tmp ) 
+           ts_error(ERROR,"No memory");
+       CList.reallen=reallen;
+       CList.list=tmp;
+   }
+   CList.last_cfg=&(CList.list[CList.len]);
+   init_cfg(id, CList.last_cfg);
+   CList.len++;
+   qsort(CList.list, CList.len, sizeof(TSCfgInfo), comparecfg);
+   return findcfg(id); /* qsort changed order!! */;
+}
+
+
+Oid
+name2id_cfg(text *name) {
+   Oid arg[1]={ TEXTOID };
+   bool isnull;
+   Datum pars[1]={ PointerGetDatum(name) };
+   int stat;
+   Oid id=findSNMap_t( &(CList.name2id_map), name );
+   
+   if ( id ) 
+       return id;
+   
+   SPI_connect();
+   if ( !plan_name2id ) {
+       plan_name2id = SPI_saveplan( SPI_prepare( "select oid from pg_ts_cfg where ts_name = $1" , 1, arg ) );
+       if ( !plan_name2id ) 
+           elog(ERROR, "SPI_prepare() failed");
+   }
+
+   stat = SPI_execp(plan_name2id, pars, " ", 1);
+   if ( stat < 0 )
+       elog (ERROR, "SPI_execp return %d", stat);
+   if ( SPI_processed > 0 ) {
+       id=DatumGetObjectId( SPI_getbinval(SPI_tuptable->vals[0], SPI_tuptable->tupdesc, 1, &isnull) );
+       if ( isnull ) 
+           elog(ERROR, "Null id for tsearch config");
+   } else 
+       elog(ERROR, "No tsearch config");
+   SPI_finish();
+   addSNMap_t( &(CList.name2id_map), name, id );
+   return id;
+}
+
+
+void 
+parsetext_v2(TSCfgInfo *cfg, PRSTEXT * prs, char *buf, int4 buflen) {
+   int type, lenlemm, i;
+   char    *lemm=NULL;
+   WParserInfo *prsobj = findprs(cfg->prs_id);
+
+   prsobj->prs=(void*)DatumGetPointer(
+       FunctionCall2(
+           &(prsobj->start_info),
+           PointerGetDatum(buf),
+           Int32GetDatum(buflen)
+       )
+   );
+
+   while( ( type=DatumGetInt32(FunctionCall3(
+           &(prsobj->getlexeme_info),
+           PointerGetDatum(prsobj->prs),
+           PointerGetDatum(&lemm),
+           PointerGetDatum(&lenlemm))) ) != 0 ) {
+
+       if ( lenlemm >= MAXSTRLEN )
+           elog(ERROR, "Word is too long");
+
+
+       if ( type >= cfg->len ) /* skip this type of lexem */
+           continue; 
+
+       for(i=0;imap[type].len;i++) {
+           DictInfo    *dict=finddict( DatumGetObjectId(cfg->map[type].dict_id[i]) );
+           char    **norms, **ptr;
+   
+           norms = ptr = (char**)DatumGetPointer(
+               FunctionCall3(
+                   &(dict->lexize_info),
+                   PointerGetDatum(dict->dictionary),
+                   PointerGetDatum(lemm),
+                   PointerGetDatum(lenlemm)
+               )
+           );
+           if ( !norms ) /* dictionary doesn't know this lexem */
+               continue;
+
+           prs->pos++; /*set pos*/
+
+           while( *ptr ) {
+               if (prs->curwords == prs->lenwords) {
+                   prs->lenwords *= 2;
+                   prs->words = (WORD *) repalloc((void *) prs->words, prs->lenwords * sizeof(WORD));
+               }
+
+               prs->words[prs->curwords].len = strlen(*ptr);
+               prs->words[prs->curwords].word = *ptr;
+               prs->words[prs->curwords].alen = 0;
+               prs->words[prs->curwords].pos.pos = LIMITPOS(prs->pos);
+               ptr++;
+               prs->curwords++;
+           }
+           pfree(norms);
+           break; /* lexem already normalized or is stop word*/
+       }
+   }
+
+   FunctionCall1(
+       &(prsobj->end_info),
+       PointerGetDatum(prsobj->prs)
+   );
+}
+
+static void
+hladdword(HLPRSTEXT * prs, char *buf, int4 buflen, int type) {
+   while (prs->curwords >= prs->lenwords) {
+       prs->lenwords *= 2;
+       prs->words = (HLWORD *) repalloc((void *) prs->words, prs->lenwords * sizeof(HLWORD));
+   }
+   memset( &(prs->words[prs->curwords]), 0, sizeof(HLWORD) ); 
+   prs->words[prs->curwords].type = (uint8)type;
+   prs->words[prs->curwords].len = buflen; 
+   prs->words[prs->curwords].word = palloc(buflen);
+   memcpy(prs->words[prs->curwords].word, buf, buflen);
+   prs->curwords++;    
+}
+
+static void
+hlfinditem(HLPRSTEXT * prs, QUERYTYPE *query, char *buf, int buflen ) {
+   int i;
+   ITEM    *item=GETQUERY(query);
+   HLWORD  *word=&( prs->words[prs->curwords-1] );
+
+   while (prs->curwords + query->size >= prs->lenwords) {
+       prs->lenwords *= 2;
+       prs->words = (HLWORD *) repalloc((void *) prs->words, prs->lenwords * sizeof(HLWORD));
+   }
+
+   for(i=0; isize; i++) { 
+       if ( item->type == VAL && item->length == buflen && strncmp( GETOPERAND(query) + item->distance, buf, buflen )==0 ) {
+           if ( word->item ) {
+               memcpy( &(prs->words[prs->curwords]), word, sizeof(HLWORD) );
+               prs->words[prs->curwords].item=item;
+               prs->words[prs->curwords].repeated=1;
+               prs->curwords++;
+           } else 
+               word->item=item;    
+       }
+       item++;
+   }
+}
+
+void 
+hlparsetext(TSCfgInfo *cfg, HLPRSTEXT * prs, QUERYTYPE *query, char *buf, int4 buflen) {
+   int type, lenlemm, i;
+   char    *lemm=NULL;
+   WParserInfo *prsobj = findprs(cfg->prs_id);
+
+   prsobj->prs=(void*)DatumGetPointer(
+       FunctionCall2(
+           &(prsobj->start_info),
+           PointerGetDatum(buf),
+           Int32GetDatum(buflen)
+       )
+   );
+
+   while( ( type=DatumGetInt32(FunctionCall3(
+           &(prsobj->getlexeme_info),
+           PointerGetDatum(prsobj->prs),
+           PointerGetDatum(&lemm),
+           PointerGetDatum(&lenlemm))) ) != 0 ) {
+
+       if ( lenlemm >= MAXSTRLEN )
+           elog(ERROR, "Word is too long");
+
+       hladdword(prs,lemm,lenlemm,type);
+
+       if ( type >= cfg->len ) 
+           continue; 
+
+       for(i=0;imap[type].len;i++) {
+           DictInfo    *dict=finddict( DatumGetObjectId(cfg->map[type].dict_id[i]) );
+           char    **norms, **ptr;
+   
+           norms = ptr = (char**)DatumGetPointer(
+               FunctionCall3(
+                   &(dict->lexize_info),
+                   PointerGetDatum(dict->dictionary),
+                   PointerGetDatum(lemm),
+                   PointerGetDatum(lenlemm)
+               )
+           );
+           if ( !norms ) /* dictionary doesn't know this lexem */
+               continue;
+
+           while( *ptr ) {
+               hlfinditem(prs,query,*ptr,strlen(*ptr));
+               pfree(*ptr);
+               ptr++;
+           }
+           pfree(norms);
+           break; /* lexem already normalized or is stop word*/
+       }
+   }
+
+   FunctionCall1(
+       &(prsobj->end_info),
+       PointerGetDatum(prsobj->prs)
+   );
+}
+
+text* 
+genhl(HLPRSTEXT * prs) {
+   text *out;
+   int len=128;
+   char *ptr;
+   HLWORD  *wrd=prs->words;
+
+   out = (text*)palloc( len );
+   ptr=((char*)out) + VARHDRSZ;
+
+   while( wrd - prs->words < prs->curwords ) {
+       while (  wrd->len + prs->stopsellen + prs->startsellen + (ptr - ((char*)out)) >= len ) {
+           int dist = ptr - ((char*)out);
+           len*= 2;
+           out = (text *) repalloc(out, len);
+           ptr=((char*)out) + dist;
+       }
+
+       if ( wrd->in && !wrd->skip && !wrd->repeated ) {
+           if ( wrd->replace ) {
+               *ptr=' ';
+               ptr++;
+           } else {
+               if (wrd->selected) {
+                   memcpy(ptr,prs->startsel,prs->startsellen);
+                   ptr+=prs->startsellen;
+               }
+               memcpy(ptr,wrd->word,wrd->len);
+               ptr+=wrd->len;
+               if (wrd->selected) {
+                   memcpy(ptr,prs->stopsel,prs->stopsellen);
+                   ptr+=prs->stopsellen;
+               }
+           }
+       }
+
+       if ( !wrd->repeated )
+           pfree(wrd->word);
+
+       wrd++;
+   }
+
+   VARATT_SIZEP(out)=ptr - ((char*)out);
+   return out; 
+}
+
+int  
+get_currcfg(void) {
+   Oid arg[1]={ TEXTOID };
+   const char *curlocale;
+   Datum pars[1];
+   bool isnull;
+   int stat;
+
+   if ( current_cfg_id > 0 )
+       return current_cfg_id;
+
+   SPI_connect();
+   if ( !plan_getcfg_bylocale ) {
+       plan_getcfg_bylocale=SPI_saveplan( SPI_prepare( "select oid from pg_ts_cfg where locale = $1 ", 1, arg ) );
+       if ( !plan_getcfg_bylocale )
+           elog(ERROR, "SPI_prepare() failed");
+   }
+
+   curlocale = setlocale(LC_CTYPE, NULL);
+   pars[0] = PointerGetDatum( char2text((char*)curlocale) );
+   stat = SPI_execp(plan_getcfg_bylocale, pars, " ", 1);
+
+   if ( stat < 0 )
+       elog (ERROR, "SPI_execp return %d", stat);
+   if ( SPI_processed > 0 )
+       current_cfg_id = DatumGetObjectId( SPI_getbinval(SPI_tuptable->vals[0], SPI_tuptable->tupdesc, 1, &isnull) );
+   else 
+       elog(ERROR,"Can't find tsearch config by locale");
+
+   pfree(DatumGetPointer(pars[0]));
+   SPI_finish();
+   return current_cfg_id;
+}
+
+PG_FUNCTION_INFO_V1(set_curcfg);
+Datum set_curcfg(PG_FUNCTION_ARGS);
+Datum
+set_curcfg(PG_FUNCTION_ARGS) {
+        findcfg(PG_GETARG_OID(0));
+        current_cfg_id=PG_GETARG_OID(0);
+        PG_RETURN_VOID();
+}
+                
+PG_FUNCTION_INFO_V1(set_curcfg_byname);
+Datum set_curcfg_byname(PG_FUNCTION_ARGS);
+Datum
+set_curcfg_byname(PG_FUNCTION_ARGS) {
+        text *name=PG_GETARG_TEXT_P(0);
+   
+        DirectFunctionCall1(
+                set_curcfg,
+                ObjectIdGetDatum( name2id_cfg(name) )
+        );
+        PG_FREE_IF_COPY(name, 0);
+        PG_RETURN_VOID();      
+}       
+
+PG_FUNCTION_INFO_V1(show_curcfg);
+Datum show_curcfg(PG_FUNCTION_ARGS);
+Datum
+show_curcfg(PG_FUNCTION_ARGS) {
+   PG_RETURN_OID( get_currcfg() ); 
+}
+
+PG_FUNCTION_INFO_V1(reset_tsearch);
+Datum reset_tsearch(PG_FUNCTION_ARGS);
+Datum
+reset_tsearch(PG_FUNCTION_ARGS) {
+   ts_error(NOTICE,"TSearch cache cleaned");
+   PG_RETURN_VOID(); 
+}


diff --git a/contrib/tsearch2/ts_cfg.h b/contrib/tsearch2/ts_cfg.h

new file mode 100644 (file)

index 0000000..01006c1


--- /dev/null
+++ b/contrib/tsearch2/ts_cfg.h
@@ -0,0 +1,68 @@
+#ifndef __TS_CFG_H__
+#define __TS_CFG_H__
+#include "postgres.h"
+#include "query.h"
+
+typedef struct {
+   int len;
+   Datum   *dict_id;
+} ListDictionary;
+
+typedef struct {
+   Oid id;
+   Oid prs_id;
+   int len;
+   ListDictionary  *map;   
+}  TSCfgInfo;
+
+Oid name2id_cfg(text *name);
+TSCfgInfo * findcfg(Oid id);
+void init_cfg(Oid id, TSCfgInfo *cfg);
+void reset_cfg(void);
+
+typedef struct {
+        uint16          len;
+   union {
+       uint16      pos;
+       uint16      *apos;
+   } pos;
+        char       *word;
+   uint32  alen;
+}       WORD;
+   
+typedef struct {
+        WORD       *words;
+        int4            lenwords;
+        int4            curwords;
+   int4        pos;
+}       PRSTEXT;
+
+typedef struct {
+        uint16    len;
+   uint8    selected:1,
+         in:1,
+         skip:1,
+         replace:1,
+         repeated:1;
+   uint8   type;
+        char      *word;
+   ITEM      *item;
+}       HLWORD;
+   
+typedef struct {
+        HLWORD       *words;
+        int4            lenwords;
+        int4            curwords;
+        char           *startsel;
+        char            *stopsel;
+        int2            startsellen;
+        int2            stopsellen;
+}       HLPRSTEXT;
+
+void hlparsetext(TSCfgInfo *cfg, HLPRSTEXT * prs, QUERYTYPE *query, char *buf, int4 buflen);
+text* genhl(HLPRSTEXT * prs);
+
+void parsetext_v2(TSCfgInfo *cfg, PRSTEXT * prs, char *buf, int4 buflen);
+int  get_currcfg(void);
+
+#endif


diff --git a/contrib/tsearch2/ts_stat.c b/contrib/tsearch2/ts_stat.c

new file mode 100644 (file)

index 0000000..9099981


--- /dev/null
+++ b/contrib/tsearch2/ts_stat.c
@@ -0,0 +1,412 @@
+/*
+ * stat functions
+ */
+
+#include "tsvector.h"
+#include "ts_stat.h"
+#include "funcapi.h"
+#include "catalog/pg_type.h"
+#include "executor/spi.h"
+#include "common.h"
+
+PG_FUNCTION_INFO_V1(tsstat_in);
+Datum           tsstat_in(PG_FUNCTION_ARGS);
+Datum           
+tsstat_in(PG_FUNCTION_ARGS) {
+   tsstat *stat=palloc(STATHDRSIZE);
+   stat->len=STATHDRSIZE;
+   stat->size=0;
+   PG_RETURN_POINTER(stat);
+}
+
+PG_FUNCTION_INFO_V1(tsstat_out);
+Datum           tsstat_out(PG_FUNCTION_ARGS);
+Datum           
+tsstat_out(PG_FUNCTION_ARGS) {
+   elog(ERROR,"Unimplemented");
+   PG_RETURN_NULL();
+}
+
+static WordEntry**
+SEI_realloc( WordEntry** in, uint32 *len ) {
+   if ( *len==0 || in==NULL ) {
+       *len=8;
+       in=palloc( sizeof(WordEntry*)* (*len) );
+   } else {
+       *len *= 2;
+       in=repalloc( in, sizeof(WordEntry*)* (*len) );
+   }
+   return in;
+}
+
+static int
+compareStatWord(StatEntry *a, WordEntry *b, tsstat *stat, tsvector *txt) {
+   if ( a->len == b->len ) 
+       return strncmp(
+           STATSTRPTR(stat) + a->pos,
+           STRPTR(txt) + b->pos,
+           a->len
+       );
+   return ( a->len > b->len ) ? 1 : -1;
+}
+
+static tsstat*
+formstat(tsstat *stat, tsvector *txt, WordEntry** entry, uint32 len) {
+   tsstat  *newstat;
+   uint32 totallen, nentry;
+   uint32  slen=0;
+   WordEntry   **ptr=entry;
+   char    *curptr;
+   StatEntry   *sptr,*nptr;
+
+   while(ptr-entry
+       slen += (*ptr)->len;
+       ptr++;
+   }
+
+   nentry=stat->size + len;
+   slen+=STATSTRSIZE(stat);
+   totallen=CALCSTATSIZE(nentry,slen);
+   newstat=palloc(totallen);
+   newstat->len=totallen;
+   newstat->size=nentry;
+
+   memcpy(STATSTRPTR(newstat), STATSTRPTR(stat), STATSTRSIZE(stat));
+   curptr=STATSTRPTR(newstat) + STATSTRSIZE(stat);
+
+   ptr=entry;
+   sptr=STATPTR(stat);
+   nptr=STATPTR(newstat);
+
+   if ( len == 1 ) {
+       StatEntry *StopLow = STATPTR(stat);
+       StatEntry *StopHigh = (StatEntry*)STATSTRPTR(stat);
+
+       while (StopLow < StopHigh) {
+           sptr=StopLow + (StopHigh - StopLow) / 2;
+           if ( compareStatWord(sptr,*ptr,stat,txt) < 0 )
+               StopLow = sptr + 1;
+           else
+               StopHigh = sptr; 
+       }
+       nptr =STATPTR(newstat) + (StopLow-STATPTR(stat));
+       memcpy( STATPTR(newstat), STATPTR(stat), sizeof(StatEntry) * (StopLow-STATPTR(stat)) );
+       nptr->nentry=POSDATALEN(txt,*ptr);
+       if ( nptr->nentry==0 )
+               nptr->nentry=1; 
+       nptr->ndoc=1;
+       nptr->len=(*ptr)->len;
+       memcpy(curptr, STRPTR(txt) + (*ptr)->pos, nptr->len);
+       nptr->pos = curptr - STATSTRPTR(newstat);
+       memcpy( nptr+1, StopLow, sizeof(StatEntry) * ( ((StatEntry*)STATSTRPTR(stat))-StopLow ) );
+   } else {
+       while( sptr-STATPTR(stat) < stat->size && ptr-entry
+           if ( compareStatWord(sptr,*ptr,stat,txt) < 0 ) {
+               memcpy(nptr, sptr, sizeof(StatEntry));
+               sptr++;
+           } else {
+               nptr->nentry=POSDATALEN(txt,*ptr);
+               if ( nptr->nentry==0 )
+                   nptr->nentry=1; 
+               nptr->ndoc=1;
+               nptr->len=(*ptr)->len;
+               memcpy(curptr, STRPTR(txt) + (*ptr)->pos, nptr->len);
+               nptr->pos = curptr - STATSTRPTR(newstat);
+               curptr += nptr->len;
+               ptr++;
+           }
+           nptr++;
+       }
+
+       memcpy( nptr, sptr, sizeof(StatEntry)*( stat->size - (sptr-STATPTR(stat)) ) ); 
+       
+       while(ptr-entry
+           nptr->nentry=POSDATALEN(txt,*ptr);
+           if ( nptr->nentry==0 )
+               nptr->nentry=1; 
+           nptr->ndoc=1;
+           nptr->len=(*ptr)->len;
+           memcpy(curptr, STRPTR(txt) + (*ptr)->pos, nptr->len);
+           nptr->pos = curptr - STATSTRPTR(newstat);
+           curptr += nptr->len;
+           ptr++; nptr++;
+       }
+   }
+
+   return newstat;
+} 
+
+PG_FUNCTION_INFO_V1(ts_accum);
+Datum           ts_accum(PG_FUNCTION_ARGS);
+Datum 
+ts_accum(PG_FUNCTION_ARGS) {
+   tsstat *newstat,*stat= (tsstat*)PG_GETARG_POINTER(0);
+   tsvector  *txt = (tsvector *) PG_DETOAST_DATUM(PG_GETARG_DATUM(1));
+   WordEntry   **newentry=NULL;
+   uint32  len=0, cur=0;
+   StatEntry   *sptr;
+   WordEntry   *wptr;
+
+   if ( stat==NULL || PG_ARGISNULL(0) ) { /* Init in first */ 
+       stat=palloc(STATHDRSIZE);
+       stat->len=STATHDRSIZE;
+       stat->size=0;
+   }
+
+   /* simple check of correctness */
+   if ( txt==NULL || PG_ARGISNULL(1) || txt->size==0 ) {
+       PG_FREE_IF_COPY(txt,1); 
+       PG_RETURN_POINTER(stat);
+   }
+
+   sptr=STATPTR(stat);
+   wptr=ARRPTR(txt);
+
+   if ( stat->size < 100*txt->size ) { /* merge */
+       while( sptr-STATPTR(stat) < stat->size && wptr-ARRPTR(txt) < txt->size ) {
+           int cmp = compareStatWord(sptr,wptr,stat,txt);
+           if ( cmp<0 ) {
+               sptr++;
+           } else if ( cmp==0 ) {
+               int n=POSDATALEN(txt,wptr);
+   
+               if (n==0) n=1;
+               sptr->ndoc++;
+               sptr->nentry +=n ;
+               sptr++; wptr++;
+           } else {
+               if ( cur==len )
+                   newentry=SEI_realloc(newentry, &len);
+               newentry[cur]=wptr;
+               wptr++; cur++;
+           }
+       }
+
+       while( wptr-ARRPTR(txt) < txt->size ) {
+           if ( cur==len )
+               newentry=SEI_realloc(newentry, &len);
+           newentry[cur]=wptr;
+           wptr++; cur++;
+       }
+   } else { /* search */
+       while( wptr-ARRPTR(txt) < txt->size ) {
+           StatEntry *StopLow = STATPTR(stat);
+           StatEntry *StopHigh = (StatEntry*)STATSTRPTR(stat);
+           int cmp;
+
+           while (StopLow < StopHigh) {
+               sptr=StopLow + (StopHigh - StopLow) / 2;
+               cmp =  compareStatWord(sptr,wptr,stat,txt);
+               if (cmp==0) {
+                   int n=POSDATALEN(txt,wptr);
+                   if (n==0) n=1;
+                   sptr->ndoc++;
+                   sptr->nentry +=n ;
+                   break;
+               } else if ( cmp < 0 )
+                   StopLow = sptr + 1;
+               else
+                   StopHigh = sptr; 
+           }
+       
+           if ( StopLow >= StopHigh ) { /* not found */
+               if ( cur==len )
+                   newentry=SEI_realloc(newentry, &len);
+               newentry[cur]=wptr;
+               cur++;
+           }
+           wptr++;
+       }   
+   }
+
+   
+   if ( cur==0 ) { /* no new words */ 
+       PG_FREE_IF_COPY(txt,1);
+       PG_RETURN_POINTER(stat);
+   }
+
+   newstat = formstat(stat, txt, newentry, cur);
+   pfree(newentry);
+   PG_FREE_IF_COPY(txt,1);
+   /* pfree(stat); */
+
+   PG_RETURN_POINTER(newstat);
+}
+
+typedef struct {
+   uint32  cur;
+   tsvector *stat;
+} StatStorage;
+
+static void
+ts_setup_firstcall(FuncCallContext  *funcctx, tsstat *stat) {
+   TupleDesc            tupdesc;
+   MemoryContext     oldcontext;
+   StatStorage     *st;
+   
+   oldcontext = MemoryContextSwitchTo(funcctx->multi_call_memory_ctx);
+   st=palloc( sizeof(StatStorage) );
+   st->cur=0;
+   st->stat=palloc( stat->len );
+   memcpy(st->stat, stat, stat->len);
+   funcctx->user_fctx = (void*)st;
+   tupdesc = RelationNameGetTupleDesc("statinfo");
+   funcctx->slot = TupleDescGetSlot(tupdesc);
+   funcctx->attinmeta = TupleDescGetAttInMetadata(tupdesc);
+   MemoryContextSwitchTo(oldcontext);
+}
+
+
+static Datum
+ts_process_call(FuncCallContext  *funcctx) {
+   StatStorage     *st;
+   st=(StatStorage*)funcctx->user_fctx;
+
+   if ( st->cur < st->stat->size ) {
+       Datum result;
+       char* values[3];
+       char    ndoc[16];
+       char    nentry[16];
+       StatEntry *entry=STATPTR(st->stat) + st->cur;
+       HeapTuple    tuple;
+
+       values[1]=ndoc;
+       sprintf(ndoc,"%d",entry->ndoc);
+       values[2]=nentry;
+       sprintf(nentry,"%d",entry->nentry);
+       values[0]=palloc( entry->len+1 );
+       memcpy( values[0], STATSTRPTR(st->stat)+entry->pos, entry->len);
+       (values[0])[entry->len]='\0';
+
+       tuple = BuildTupleFromCStrings(funcctx->attinmeta, values);
+       result = TupleGetDatum(funcctx->slot, tuple);
+
+       pfree(values[0]);
+       st->cur++;
+       return result;  
+   } else {
+       pfree(st->stat);
+       pfree(st);
+   }
+   
+   return (Datum)0;
+}
+
+PG_FUNCTION_INFO_V1(ts_accum_finish);
+Datum           ts_accum_finish(PG_FUNCTION_ARGS);
+Datum 
+ts_accum_finish(PG_FUNCTION_ARGS) {
+   FuncCallContext  *funcctx;
+   Datum result;
+
+   if (SRF_IS_FIRSTCALL()) {
+       funcctx = SRF_FIRSTCALL_INIT();
+       ts_setup_firstcall(funcctx, (tsstat*)PG_GETARG_POINTER(0) );
+   }
+
+   funcctx = SRF_PERCALL_SETUP();
+   if (  (result=ts_process_call(funcctx)) != (Datum)0 )
+       SRF_RETURN_NEXT(funcctx, result);
+   SRF_RETURN_DONE(funcctx);
+}
+
+static Oid tiOid=InvalidOid;
+static void 
+get_ti_Oid(void) {
+   int ret;
+   bool isnull; 
+
+   if ( (ret = SPI_exec("select oid from pg_type where typname='tsvector'",1)) < 0 )   
+       elog(ERROR, "SPI_exec to get tsvector oid returns %d", ret);
+
+   if ( SPI_processed<0 )
+       elog(ERROR, "There is no tsvector type");
+   tiOid = DatumGetObjectId( SPI_getbinval(SPI_tuptable->vals[0], SPI_tuptable->tupdesc, 1, &isnull) );
+   if ( tiOid==InvalidOid )
+       elog(ERROR, "tsvector type has InvalidOid");
+}
+
+static tsstat*
+ts_stat_sql(text *txt) {
+   char *query=text2char(txt);
+   int i;
+   tsstat *newstat,*stat;
+   bool isnull;
+   Portal portal;
+   void    *plan;
+
+   if ( tiOid==InvalidOid ) 
+       get_ti_Oid();
+
+   if ( (plan = SPI_prepare(query,0,NULL))==NULL )
+       elog(ERROR, "SPI_prepare('%s') returns NULL",query);
+
+   if ( (portal = SPI_cursor_open(NULL, plan, NULL, NULL)) == NULL )
+       elog(ERROR, "SPI_cursor_open('%s') returns NULL",query);
+
+   SPI_cursor_fetch(portal, true, 100);
+
+   if ( SPI_tuptable->tupdesc->natts != 1 )
+       elog(ERROR, "Number of fields doesn't equal to 1");
+
+   if ( SPI_gettypeid(SPI_tuptable->tupdesc, 1) != tiOid )
+       elog(ERROR, "Column isn't of tsvector type");
+
+   stat=palloc(STATHDRSIZE);
+   stat->len=STATHDRSIZE;
+   stat->size=0;
+
+   while(SPI_processed>0) {
+       for(i=0;i
+           Datum data=SPI_getbinval(SPI_tuptable->vals[i], SPI_tuptable->tupdesc, 1, &isnull);
+
+           if ( !isnull ) {
+               newstat = (tsstat*)DatumGetPointer(DirectFunctionCall2(
+                   ts_accum,
+                   PointerGetDatum(stat),
+                   data
+               ));
+               if ( stat!=newstat && stat )
+                   pfree(stat);
+               stat=newstat;
+           }
+       } 
+
+       SPI_freetuptable(SPI_tuptable);
+       SPI_cursor_fetch(portal, true, 100);        
+   }   
+
+   SPI_freetuptable(SPI_tuptable);
+   SPI_cursor_close(portal);
+   SPI_freeplan(plan);
+   pfree(query);
+
+   return stat;    
+}
+
+PG_FUNCTION_INFO_V1(ts_stat);
+Datum           ts_stat(PG_FUNCTION_ARGS);
+Datum 
+ts_stat(PG_FUNCTION_ARGS) {
+   FuncCallContext  *funcctx;
+   Datum result;
+
+   if (SRF_IS_FIRSTCALL()) {
+       tsstat *stat;
+       text    *txt=PG_GETARG_TEXT_P(0);
+   
+       funcctx = SRF_FIRSTCALL_INIT();
+       SPI_connect();
+       stat = ts_stat_sql(txt);
+       PG_FREE_IF_COPY(txt,0); 
+       ts_setup_firstcall(funcctx, stat );
+       SPI_finish();
+   }
+
+   funcctx = SRF_PERCALL_SETUP();
+   if (  (result=ts_process_call(funcctx)) != (Datum)0 )
+       SRF_RETURN_NEXT(funcctx, result);
+   SRF_RETURN_DONE(funcctx);
+}
+
+


diff --git a/contrib/tsearch2/ts_stat.h b/contrib/tsearch2/ts_stat.h

new file mode 100644 (file)

index 0000000..c32b17a


--- /dev/null
+++ b/contrib/tsearch2/ts_stat.h
@@ -0,0 +1,32 @@
+#ifndef __TXTIDX_STAT_H__
+#define __TXTIDX_STAT_H__
+
+#include "postgres.h"
+
+#include "access/gist.h"
+#include "access/itup.h"
+#include "utils/elog.h"
+#include "utils/palloc.h"
+#include "utils/builtins.h"
+#include "storage/bufpage.h"
+
+typedef struct {
+   uint32  len;
+   uint32  pos;
+   uint32  ndoc;   
+   uint32  nentry; 
+}  StatEntry;
+
+typedef struct {
+   int4        len;
+   int4        size;
+   char        data[1];
+}  tsstat;
+
+#define STATHDRSIZE (sizeof(int4)*2)
+#define CALCSTATSIZE(x, lenstr) ( x * sizeof(StatEntry) + STATHDRSIZE + lenstr )
+#define STATPTR(x) ( (StatEntry*) ( (char*)x + STATHDRSIZE ) )
+#define STATSTRPTR(x)  ( (char*)x + STATHDRSIZE + ( sizeof(StatEntry) * ((tsvector*)x)->size ) )
+#define STATSTRSIZE(x) ( ((tsvector*)x)->len - STATHDRSIZE - ( sizeof(StatEntry) * ((tsvector*)x)->size ) )
+
+#endif


diff --git a/contrib/tsearch2/tsearch.sql._in b/contrib/tsearch2/tsearch.sql._in

new file mode 100644 (file)

index 0000000..91ffbc8


--- /dev/null
+++ b/contrib/tsearch2/tsearch.sql._in
@@ -0,0 +1,674 @@
+-- Adjust this setting to control where the objects get CREATEd.
+SET search_path = public;
+
+BEGIN;
+
+--dict conf
+CREATE TABLE pg_ts_dict (
+   dict_name   text not null primary key,
+   dict_init   oid,
+   dict_initoption text,
+   dict_lexize oid not null,
+   dict_comment    text
+) with oids;
+
+--dict interface
+CREATE FUNCTION lexize(oid, text) 
+   returns _text
+   as 'MODULE_PATHNAME'
+   language 'C'
+   with (isstrict);
+
+CREATE FUNCTION lexize(text, text)
+        returns _text
+        as 'MODULE_PATHNAME', 'lexize_byname'
+        language 'C'
+        with (isstrict);
+
+CREATE FUNCTION lexize(text)
+        returns _text
+        as 'MODULE_PATHNAME', 'lexize_bycurrent'
+        language 'C'
+        with (isstrict);
+
+CREATE FUNCTION set_curdict(int)
+   returns void
+   as 'MODULE_PATHNAME'
+   language 'C'
+   with (isstrict);
+
+CREATE FUNCTION set_curdict(text)
+   returns void
+   as 'MODULE_PATHNAME', 'set_curdict_byname'
+   language 'C'
+   with (isstrict);
+
+--built-in dictionaries
+CREATE FUNCTION dex_init(text)
+   returns internal
+   as 'MODULE_PATHNAME' 
+   language 'C';
+
+CREATE FUNCTION dex_lexize(internal,internal,int4)
+   returns internal
+   as 'MODULE_PATHNAME'
+   language 'C'
+   with (isstrict);
+
+insert into pg_ts_dict select 
+   'simple', 
+   (select oid from pg_proc where proname='dex_init'),
+   null,
+   (select oid from pg_proc where proname='dex_lexize'),
+   'Simple example of dictionary.'
+;
+    
+CREATE FUNCTION snb_en_init(text)
+   returns internal
+   as 'MODULE_PATHNAME' 
+   language 'C';
+
+CREATE FUNCTION snb_lexize(internal,internal,int4)
+   returns internal
+   as 'MODULE_PATHNAME'
+   language 'C'
+   with (isstrict);
+
+insert into pg_ts_dict select 
+   'en_stem', 
+   (select oid from pg_proc where proname='snb_en_init'),
+   'DATA_PATH/english.stop',
+   (select oid from pg_proc where proname='snb_lexize'),
+   'English Stemmer. Snowball.'
+;
+
+CREATE FUNCTION snb_ru_init(text)
+   returns internal
+   as 'MODULE_PATHNAME' 
+   language 'C';
+
+insert into pg_ts_dict select 
+   'ru_stem', 
+   (select oid from pg_proc where proname='snb_ru_init'),
+   'DATA_PATH/russian.stop',
+   (select oid from pg_proc where proname='snb_lexize'),
+   'Russian Stemmer. Snowball.'
+;
+    
+CREATE FUNCTION spell_init(text)
+   returns internal
+   as 'MODULE_PATHNAME' 
+   language 'C';
+
+CREATE FUNCTION spell_lexize(internal,internal,int4)
+   returns internal
+   as 'MODULE_PATHNAME'
+   language 'C'
+   with (isstrict);
+
+insert into pg_ts_dict select 
+   'ispell_template', 
+   (select oid from pg_proc where proname='spell_init'),
+   null,
+   (select oid from pg_proc where proname='spell_lexize'),
+   'ISpell interface. Must have .dict and .aff files'
+;
+
+CREATE FUNCTION syn_init(text)
+   returns internal
+   as 'MODULE_PATHNAME' 
+   language 'C';
+
+CREATE FUNCTION syn_lexize(internal,internal,int4)
+   returns internal
+   as 'MODULE_PATHNAME'
+   language 'C'
+   with (isstrict);
+
+insert into pg_ts_dict select 
+   'synonym', 
+   (select oid from pg_proc where proname='syn_init'),
+   null,
+   (select oid from pg_proc where proname='syn_lexize'),
+   'Example of synonym dictionary'
+;
+
+--dict conf
+CREATE TABLE pg_ts_parser (
+   prs_name    text not null primary key,
+   prs_start   oid not null,
+   prs_nexttoken   oid not null,
+   prs_end     oid not null,
+   prs_headline    oid not null,
+   prs_lextype oid not null,
+   prs_comment text
+) with oids;
+
+--sql-level interface
+CREATE TYPE tokentype 
+   as (tokid int4, alias text, descr text); 
+
+CREATE FUNCTION token_type(int4)
+   returns setof tokentype
+   as 'MODULE_PATHNAME'
+   language 'C'
+   with (isstrict);
+
+CREATE FUNCTION token_type(text)
+   returns setof tokentype
+   as 'MODULE_PATHNAME', 'token_type_byname'
+   language 'C'
+   with (isstrict);
+
+CREATE FUNCTION token_type()
+   returns setof tokentype
+   as 'MODULE_PATHNAME', 'token_type_current'
+   language 'C'
+   with (isstrict);
+
+CREATE FUNCTION set_curprs(int)
+   returns void
+   as 'MODULE_PATHNAME'
+   language 'C'
+   with (isstrict);
+
+CREATE FUNCTION set_curprs(text)
+   returns void
+   as 'MODULE_PATHNAME', 'set_curprs_byname'
+   language 'C'
+   with (isstrict);
+
+CREATE TYPE tokenout 
+   as (tokid int4, token text);
+
+CREATE FUNCTION parse(oid,text)
+   returns setof tokenout
+   as 'MODULE_PATHNAME'
+   language 'C'
+   with (isstrict);
+ 
+CREATE FUNCTION parse(text,text)
+   returns setof tokenout
+   as 'MODULE_PATHNAME', 'parse_byname'
+   language 'C'
+   with (isstrict);
+ 
+CREATE FUNCTION parse(text)
+   returns setof tokenout
+   as 'MODULE_PATHNAME', 'parse_current'
+   language 'C'
+   with (isstrict);
+ 
+--default parser
+CREATE FUNCTION prsd_start(internal,int4)
+   returns internal
+   as 'MODULE_PATHNAME'
+   language 'C';
+
+CREATE FUNCTION prsd_getlexeme(internal,internal,internal)
+   returns int4
+   as 'MODULE_PATHNAME'
+   language 'C';
+
+CREATE FUNCTION prsd_end(internal)
+   returns void
+   as 'MODULE_PATHNAME'
+   language 'C';
+
+CREATE FUNCTION prsd_lextype(internal)
+   returns internal
+   as 'MODULE_PATHNAME'
+   language 'C';
+
+CREATE FUNCTION prsd_headline(internal,internal,internal)
+   returns internal
+   as 'MODULE_PATHNAME'
+   language 'C';
+
+insert into pg_ts_parser select
+   'default',
+   (select oid from pg_proc where proname='prsd_start'),   
+   (select oid from pg_proc where proname='prsd_getlexeme'),   
+   (select oid from pg_proc where proname='prsd_end'), 
+   (select oid from pg_proc where proname='prsd_headline'),
+   (select oid from pg_proc where proname='prsd_lextype'),
+   'Parser from OpenFTS v0.34'
+;  
+
+--tsearch config
+
+CREATE TABLE pg_ts_cfg (
+   ts_name     text not null primary key,
+   prs_name    text not null,
+   locale      text
+) with oids;
+
+CREATE TABLE pg_ts_cfgmap (
+   ts_name     text not null,
+   tok_alias   text not null,
+   dict_name   text[],
+   primary key (ts_name,tok_alias)
+) with oids;
+
+CREATE FUNCTION set_curcfg(int)
+   returns void
+   as 'MODULE_PATHNAME'
+   language 'C'
+   with (isstrict);
+
+CREATE FUNCTION set_curcfg(text)
+   returns void
+   as 'MODULE_PATHNAME', 'set_curcfg_byname'
+   language 'C'
+   with (isstrict);
+
+CREATE FUNCTION show_curcfg()
+   returns oid
+   as 'MODULE_PATHNAME'
+   language 'C'
+   with (isstrict);
+
+insert into pg_ts_cfg values ('default', 'default','C');
+insert into pg_ts_cfg values ('default_russian', 'default','ru_RU.KOI8-R');
+insert into pg_ts_cfg values ('simple', 'default');
+
+copy pg_ts_cfgmap from stdin;
+default    lword   {en_stem}
+default    nlword  {simple}
+default    word    {simple}
+default    email   {simple}
+default    url {simple}
+default    host    {simple}
+default    sfloat  {simple}
+default    version {simple}
+default    part_hword  {simple}
+default    nlpart_hword    {simple}
+default    lpart_hword {en_stem}
+default    hword   {simple}
+default    lhword  {en_stem}
+default    nlhword {simple}
+default    uri {simple}
+default    file    {simple}
+default    float   {simple}
+default    int {simple}
+default    uint    {simple}
+default_russian    lword   {en_stem}
+default_russian    nlword  {ru_stem}
+default_russian    word    {ru_stem}
+default_russian    email   {simple}
+default_russian    url {simple}
+default_russian    host    {simple}
+default_russian    sfloat  {simple}
+default_russian    version {simple}
+default_russian    part_hword  {simple}
+default_russian    nlpart_hword    {ru_stem}
+default_russian    lpart_hword {en_stem}
+default_russian    hword   {ru_stem}
+default_russian    lhword  {en_stem}
+default_russian    nlhword {ru_stem}
+default_russian    uri {simple}
+default_russian    file    {simple}
+default_russian    float   {simple}
+default_russian    int {simple}
+default_russian    uint    {simple}
+simple lword   {simple}
+simple nlword  {simple}
+simple word    {simple}
+simple email   {simple}
+simple url {simple}
+simple host    {simple}
+simple sfloat  {simple}
+simple version {simple}
+simple part_hword  {simple}
+simple nlpart_hword    {simple}
+simple lpart_hword {simple}
+simple hword   {simple}
+simple lhword  {simple}
+simple nlhword {simple}
+simple uri {simple}
+simple file    {simple}
+simple float   {simple}
+simple int {simple}
+simple uint    {simple}
+\.
+
+--tsvector type
+CREATE FUNCTION tsvector_in(cstring)
+RETURNS tsvector
+AS 'MODULE_PATHNAME'
+LANGUAGE 'C' with (isstrict);
+
+CREATE FUNCTION tsvector_out(tsvector)
+RETURNS cstring
+AS 'MODULE_PATHNAME'
+LANGUAGE 'C' with (isstrict);
+
+CREATE TYPE tsvector (
+        INTERNALLENGTH = -1,
+        INPUT = tsvector_in,
+        OUTPUT = tsvector_out,
+        STORAGE = extended
+);
+
+CREATE FUNCTION length(tsvector)
+RETURNS int4
+AS 'MODULE_PATHNAME', 'tsvector_length'
+LANGUAGE 'C' with (isstrict,iscachable);
+
+CREATE FUNCTION to_tsvector(oid, text)
+RETURNS tsvector
+AS 'MODULE_PATHNAME'
+LANGUAGE 'C' with (isstrict,iscachable);
+
+CREATE FUNCTION to_tsvector(text, text)
+RETURNS tsvector
+AS 'MODULE_PATHNAME', 'to_tsvector_name'
+LANGUAGE 'C' with (isstrict,iscachable);
+
+CREATE FUNCTION to_tsvector(text)
+RETURNS tsvector
+AS 'MODULE_PATHNAME', 'to_tsvector_current'
+LANGUAGE 'C' with (isstrict,iscachable);
+
+CREATE FUNCTION strip(tsvector)
+RETURNS tsvector
+AS 'MODULE_PATHNAME'
+LANGUAGE 'C' with (isstrict,iscachable);
+
+CREATE FUNCTION setweight(tsvector,"char")
+RETURNS tsvector
+AS 'MODULE_PATHNAME'
+LANGUAGE 'C' with (isstrict,iscachable);
+
+CREATE FUNCTION concat(tsvector,tsvector)
+RETURNS tsvector
+AS 'MODULE_PATHNAME'
+LANGUAGE 'C' with (isstrict,iscachable);
+
+CREATE OPERATOR || (
+        LEFTARG = tsvector,
+        RIGHTARG = tsvector,
+        PROCEDURE = concat
+);
+
+--query type
+CREATE FUNCTION tsquery_in(cstring)
+RETURNS tsquery
+AS 'MODULE_PATHNAME'
+LANGUAGE 'C' with (isstrict);
+
+CREATE FUNCTION tsquery_out(tsquery)
+RETURNS cstring
+AS 'MODULE_PATHNAME'
+LANGUAGE 'C' with (isstrict);
+
+CREATE TYPE tsquery (
+        INTERNALLENGTH = -1,
+        INPUT = tsquery_in,
+        OUTPUT = tsquery_out
+);
+
+CREATE FUNCTION querytree(tsquery)
+RETURNS text
+AS 'MODULE_PATHNAME', 'tsquerytree'
+LANGUAGE 'C' with (isstrict);
+
+CREATE FUNCTION to_tsquery(oid, text)
+RETURNS tsquery
+AS 'MODULE_PATHNAME'
+LANGUAGE 'c' with (isstrict,iscachable);
+
+CREATE FUNCTION to_tsquery(text, text)
+RETURNS tsquery
+AS 'MODULE_PATHNAME','to_tsquery_name'
+LANGUAGE 'c' with (isstrict,iscachable);
+
+CREATE FUNCTION to_tsquery(text)
+RETURNS tsquery
+AS 'MODULE_PATHNAME','to_tsquery_current'
+LANGUAGE 'c' with (isstrict,iscachable);
+
+--operations
+CREATE FUNCTION exectsq(tsvector, tsquery)
+RETURNS bool
+AS 'MODULE_PATHNAME'
+LANGUAGE 'C' with (isstrict, iscachable);
+  
+COMMENT ON FUNCTION exectsq(tsvector, tsquery) IS 'boolean operation with text index';
+
+CREATE FUNCTION rexectsq(tsquery, tsvector)
+RETURNS bool
+AS 'MODULE_PATHNAME'
+LANGUAGE 'C' with (isstrict, iscachable);
+
+COMMENT ON FUNCTION rexectsq(tsquery, tsvector) IS 'boolean operation with text index';
+
+CREATE OPERATOR @@ (
+        LEFTARG = tsvector,
+        RIGHTARG = tsquery,
+        PROCEDURE = exectsq,
+        COMMUTATOR = '@@',
+        RESTRICT = contsel,
+        JOIN = contjoinsel
+);
+CREATE OPERATOR @@ (
+        LEFTARG = tsquery,
+        RIGHTARG = tsvector,
+        PROCEDURE = rexectsq,
+        COMMUTATOR = '@@',
+        RESTRICT = contsel,
+        JOIN = contjoinsel
+);
+
+--Trigger
+CREATE FUNCTION tsearch2()
+RETURNS trigger
+AS 'MODULE_PATHNAME'
+LANGUAGE 'C';
+
+--Relevation
+CREATE FUNCTION rank(float4[], tsvector, tsquery)
+RETURNS float4
+AS 'MODULE_PATHNAME'
+LANGUAGE 'C' WITH (isstrict, iscachable);
+
+CREATE FUNCTION rank(float4[], tsvector, tsquery, int4)
+RETURNS float4
+AS 'MODULE_PATHNAME'
+LANGUAGE 'C' WITH (isstrict, iscachable);
+
+CREATE FUNCTION rank(tsvector, tsquery)
+RETURNS float4
+AS 'MODULE_PATHNAME', 'rank_def'
+LANGUAGE 'C' WITH (isstrict, iscachable);
+
+CREATE FUNCTION rank(tsvector, tsquery, int4)
+RETURNS float4
+AS 'MODULE_PATHNAME', 'rank_def'
+LANGUAGE 'C' WITH (isstrict, iscachable);
+
+CREATE FUNCTION rank_cd(int4, tsvector, tsquery)
+RETURNS float4
+AS 'MODULE_PATHNAME'
+LANGUAGE 'C' WITH (isstrict, iscachable);
+
+CREATE FUNCTION rank_cd(int4, tsvector, tsquery, int4)
+RETURNS float4
+AS 'MODULE_PATHNAME'
+LANGUAGE 'C' WITH (isstrict, iscachable);
+
+CREATE FUNCTION rank_cd(tsvector, tsquery)
+RETURNS float4
+AS 'MODULE_PATHNAME', 'rank_cd_def'
+LANGUAGE 'C' WITH (isstrict, iscachable);
+
+CREATE FUNCTION rank_cd(tsvector, tsquery, int4)
+RETURNS float4
+AS 'MODULE_PATHNAME', 'rank_cd_def'
+LANGUAGE 'C' WITH (isstrict, iscachable);
+
+CREATE FUNCTION headline(oid, text, tsquery, text)
+RETURNS text
+AS 'MODULE_PATHNAME', 'headline'
+LANGUAGE 'C' WITH (isstrict, iscachable);
+
+CREATE FUNCTION headline(oid, text, tsquery)
+RETURNS text
+AS 'MODULE_PATHNAME', 'headline'
+LANGUAGE 'C' WITH (isstrict, iscachable);
+
+CREATE FUNCTION headline(text, text, tsquery, text)
+RETURNS text
+AS 'MODULE_PATHNAME', 'headline_byname'
+LANGUAGE 'C' WITH (isstrict, iscachable);
+
+CREATE FUNCTION headline(text, text, tsquery)
+RETURNS text
+AS 'MODULE_PATHNAME', 'headline_byname'
+LANGUAGE 'C' WITH (isstrict, iscachable);
+
+CREATE FUNCTION headline(text, tsquery, text)
+RETURNS text
+AS 'MODULE_PATHNAME', 'headline_current'
+LANGUAGE 'C' WITH (isstrict, iscachable);
+
+CREATE FUNCTION headline(text, tsquery)
+RETURNS text
+AS 'MODULE_PATHNAME', 'headline_current'
+LANGUAGE 'C' WITH (isstrict, iscachable);
+
+--GiST
+--GiST key type 
+CREATE FUNCTION gtsvector_in(cstring)
+RETURNS gtsvector
+AS 'MODULE_PATHNAME'
+LANGUAGE 'C' with (isstrict);
+
+CREATE FUNCTION gtsvector_out(gtsvector)
+RETURNS cstring
+AS 'MODULE_PATHNAME'
+LANGUAGE 'C' with (isstrict);
+
+CREATE TYPE gtsvector (
+        INTERNALLENGTH = -1,
+        INPUT = gtsvector_in,
+        OUTPUT = gtsvector_out
+);
+
+-- support FUNCTIONs
+CREATE FUNCTION gtsvector_consistent(gtsvector,internal,int4)
+RETURNS bool
+AS 'MODULE_PATHNAME'
+LANGUAGE 'C';
+  
+CREATE FUNCTION gtsvector_compress(internal)
+RETURNS internal
+AS 'MODULE_PATHNAME'
+LANGUAGE 'C';
+
+CREATE FUNCTION gtsvector_decompress(internal)
+RETURNS internal
+AS 'MODULE_PATHNAME'
+LANGUAGE 'C';
+
+CREATE FUNCTION gtsvector_penalty(internal,internal,internal)
+RETURNS internal
+AS 'MODULE_PATHNAME'
+LANGUAGE 'C' with (isstrict);
+
+CREATE FUNCTION gtsvector_picksplit(internal, internal)
+RETURNS internal
+AS 'MODULE_PATHNAME'
+LANGUAGE 'C';
+
+CREATE FUNCTION gtsvector_union(bytea, internal)
+RETURNS _int4
+AS 'MODULE_PATHNAME'
+LANGUAGE 'C';
+
+CREATE FUNCTION gtsvector_same(gtsvector, gtsvector, internal)
+RETURNS internal
+AS 'MODULE_PATHNAME'
+LANGUAGE 'C';
+
+-- CREATE the OPERATOR class
+CREATE OPERATOR CLASS gist_tsvector_ops
+DEFAULT FOR TYPE tsvector USING gist
+AS
+        OPERATOR        1       @@ (tsvector, tsquery)  RECHECK ,
+        FUNCTION        1       gtsvector_consistent (gtsvector, internal, int4),
+        FUNCTION        2       gtsvector_union (bytea, internal),
+        FUNCTION        3       gtsvector_compress (internal),
+        FUNCTION        4       gtsvector_decompress (internal),
+        FUNCTION        5       gtsvector_penalty (internal, internal, internal),
+        FUNCTION        6       gtsvector_picksplit (internal, internal),
+        FUNCTION        7       gtsvector_same (gtsvector, gtsvector, internal),
+        STORAGE         gtsvector;
+
+
+--stat info
+CREATE TYPE statinfo 
+   as (word text, ndoc int4, nentry int4);
+
+--REATE FUNCTION tsstat_in(cstring)
+--RETURNS tsstat
+--AS 'MODULE_PATHNAME'
+--LANGUAGE 'C' with (isstrict);
+--
+--CREATE FUNCTION tsstat_out(tsstat)
+--RETURNS cstring
+--AS 'MODULE_PATHNAME'
+--LANGUAGE 'C' with (isstrict);
+--
+--CREATE TYPE tsstat (
+--        INTERNALLENGTH = -1,
+--        INPUT = tsstat_in,
+--        OUTPUT = tsstat_out,
+--        STORAGE = plain
+--);
+--
+--CREATE FUNCTION ts_accum(tsstat,tsvector)
+--RETURNS tsstat
+--AS 'MODULE_PATHNAME'
+--LANGUAGE 'C' with (isstrict);
+--
+--CREATE FUNCTION ts_accum_finish(tsstat)
+-- returns setof statinfo
+-- as 'MODULE_PATHNAME'
+-- language 'C'
+-- with (isstrict);
+--
+--CREATE AGGREGATE stat (
+-- BASETYPE=tsvector,
+-- SFUNC=ts_accum,
+-- STYPE=tsstat,
+-- FINALFUNC = ts_accum_finish,
+-- initcond = ''
+--); 
+
+CREATE FUNCTION stat(text)
+   returns setof statinfo
+   as 'MODULE_PATHNAME', 'ts_stat'
+   language 'C'
+   with (isstrict);
+
+--reset - just for debuging
+CREATE FUNCTION reset_tsearch()
+        returns void
+        as 'MODULE_PATHNAME'
+        language 'C'
+        with (isstrict);
+
+--get cover (debug for rank_cd)
+CREATE FUNCTION get_covers(tsvector,tsquery)
+        returns text
+        as 'MODULE_PATHNAME'
+        language 'C'
+        with (isstrict);
+
+
+--example of ISpell dictionary
+--update pg_ts_dict set dict_initoption='DictFile="/usr/local/share/ispell/russian.dict" ,AffFile ="/usr/local/share/ispell/russian.aff", StopFile="/usr/local/share/ispell/russian.stop"' where dict_id=4;
+--example of synonym dict
+--update pg_ts_dict set dict_initoption='/usr/local/share/ispell/english.syn' where dict_id=5;
+END;


diff --git a/contrib/tsearch2/tsvector.c b/contrib/tsearch2/tsvector.c

new file mode 100644 (file)

index 0000000..ff0794d


--- /dev/null
+++ b/contrib/tsearch2/tsvector.c
@@ -0,0 +1,804 @@
+/*
+ * In/Out definitions for tsvector type
+ * Internal structure:
+ * string of values, array of position lexem in string and it's length
+ * Teodor Sigaev 
+ */
+#include "postgres.h"
+
+#include "access/gist.h"
+#include "access/itup.h"
+#include "utils/elog.h"
+#include "utils/palloc.h"
+#include "utils/builtins.h"
+#include "storage/bufpage.h"
+#include "executor/spi.h"
+#include "commands/trigger.h"
+#include "nodes/pg_list.h"
+#include "catalog/namespace.h"
+
+#include "utils/pg_locale.h"
+
+#include              /* tolower */
+#include "tsvector.h"
+#include "query.h"
+#include "ts_cfg.h"
+#include "common.h"
+
+PG_FUNCTION_INFO_V1(tsvector_in);
+Datum      tsvector_in(PG_FUNCTION_ARGS);
+
+PG_FUNCTION_INFO_V1(tsvector_out);
+Datum      tsvector_out(PG_FUNCTION_ARGS);
+
+PG_FUNCTION_INFO_V1(to_tsvector);
+Datum      to_tsvector(PG_FUNCTION_ARGS);
+PG_FUNCTION_INFO_V1(to_tsvector_current);
+Datum      to_tsvector_current(PG_FUNCTION_ARGS);
+PG_FUNCTION_INFO_V1(to_tsvector_name);
+Datum      to_tsvector_name(PG_FUNCTION_ARGS);
+
+PG_FUNCTION_INFO_V1(tsearch2);
+Datum      tsearch2(PG_FUNCTION_ARGS);
+
+PG_FUNCTION_INFO_V1(tsvector_length);
+Datum      tsvector_length(PG_FUNCTION_ARGS);
+
+/*
+ * in/out text index type
+ */
+static int 
+comparePos(const void *a, const void *b) {
+   if ( ((WordEntryPos *) a)->pos == ((WordEntryPos *) b)->pos )
+       return 1;
+   return ( ((WordEntryPos *) a)->pos > ((WordEntryPos *) b)->pos ) ? 1 : -1;
+}
+
+static int
+uniquePos(WordEntryPos *a, int4 l) {
+   WordEntryPos *ptr, *res;
+
+   res=a;
+   if (l==1)
+       return l;
+
+   qsort((void *) a, l, sizeof(WordEntryPos), comparePos);
+
+   ptr = a + 1;
+   while (ptr - a < l) {
+       if ( ptr->pos != res->pos ) {
+           res++;
+           res->pos = ptr->pos;
+           res->weight = ptr->weight;
+           if ( res-a >= MAXNUMPOS-1 || res->pos == MAXENTRYPOS-1 )
+               break;
+       } else if ( ptr->weight > res->weight )
+           res->weight = ptr->weight;
+       ptr++;
+   }
+   return res + 1 - a;
+}
+
+static char *BufferStr;
+static int
+compareentry(const void *a, const void *b)
+{
+   if ( ((WordEntryIN *) a)->entry.len == ((WordEntryIN *) b)->entry.len)
+   {
+       return strncmp(
+                      &BufferStr[((WordEntryIN *) a)->entry.pos],
+                      &BufferStr[((WordEntryIN *) b)->entry.pos],
+                      ((WordEntryIN *) a)->entry.len);
+   }
+   return ( ((WordEntryIN *) a)->entry.len > ((WordEntryIN *) b)->entry.len ) ? 1 : -1;
+}
+
+static int
+uniqueentry(WordEntryIN * a, int4 l, char *buf, int4 *outbuflen)
+{
+   WordEntryIN  *ptr,
+              *res;
+
+   res = a;
+   if (l == 1) {
+       if ( a->entry.haspos ) {
+           *(uint16*)(a->pos) = uniquePos( &(a->pos[1]), *(uint16*)(a->pos));
+           *outbuflen = SHORTALIGN(res->entry.len) + (*(uint16*)(a->pos) +1 )*sizeof(WordEntryPos);
+       }
+       return l;
+   }
+
+   ptr = a + 1;
+   BufferStr = buf;
+   qsort((void *) a, l, sizeof(WordEntryIN), compareentry);
+
+   while (ptr - a < l)
+   {
+       if (!(ptr->entry.len == res->entry.len &&
+             strncmp(&buf[ptr->entry.pos], &buf[res->entry.pos], res->entry.len) == 0))
+       {
+           if ( res->entry.haspos ) {
+               *(uint16*)(res->pos) = uniquePos( &(res->pos[1]), *(uint16*)(res->pos));
+               *outbuflen += *(uint16*)(res->pos) * sizeof(WordEntryPos);
+           }
+           *outbuflen += SHORTALIGN(res->entry.len);
+           res++;
+           memcpy(res,ptr,sizeof(WordEntryIN));
+       } else if ( ptr->entry.haspos ){
+           if ( res->entry.haspos ) {
+               int4 len=*(uint16*)(ptr->pos) + 1 + *(uint16*)(res->pos);
+               res->pos=(WordEntryPos*)repalloc( res->pos, len*sizeof(WordEntryPos));
+               memcpy( &(res->pos[ *(uint16*)(res->pos) + 1 ]), 
+                   &(ptr->pos[1]), *(uint16*)(ptr->pos) * sizeof(WordEntryPos));
+               *(uint16*)(res->pos) += *(uint16*)(ptr->pos);
+               pfree( ptr->pos );
+           } else {
+               res->entry.haspos=1;
+               res->pos = ptr->pos;
+           }
+       }
+       ptr++;
+   }
+   if ( res->entry.haspos ) {
+       *(uint16*)(res->pos) = uniquePos( &(res->pos[1]), *(uint16*)(res->pos));
+       *outbuflen += *(uint16*)(res->pos) * sizeof(WordEntryPos);
+   }
+   *outbuflen += SHORTALIGN(res->entry.len);
+
+   return res + 1 - a;
+}
+
+#define WAITWORD   1
+#define WAITENDWORD 2
+#define WAITNEXTCHAR   3
+#define WAITENDCMPLX   4
+#define WAITPOSINFO    5
+#define INPOSINFO  6
+#define WAITPOSDELIM   7
+
+#define RESIZEPRSBUF \
+do { \
+   if ( state->curpos - state->word + 1 >= state->len ) \
+   { \
+       int4 clen = state->curpos - state->word; \
+       state->len *= 2; \
+       state->word = (char*)repalloc( (void*)state->word, state->len ); \
+       state->curpos = state->word + clen; \
+   } \
+} while (0)
+
+int4
+gettoken_tsvector(TI_IN_STATE * state)
+{
+   int4        oldstate = 0;
+
+   state->curpos = state->word;
+   state->state = WAITWORD;
+   state->alen=0;
+
+   while (1)
+   {
+       if (state->state == WAITWORD)
+       {
+           if (*(state->prsbuf) == '\0')
+               return 0;
+           else if (*(state->prsbuf) == '\'')
+               state->state = WAITENDCMPLX;
+           else if (*(state->prsbuf) == '\\')
+           {
+               state->state = WAITNEXTCHAR;
+               oldstate = WAITENDWORD;
+           }
+           else if (state->oprisdelim && ISOPERATOR(*(state->prsbuf)))
+               elog(ERROR, "Syntax error");
+           else if (*(state->prsbuf) != ' ')
+           {
+               *(state->curpos) = *(state->prsbuf);
+               state->curpos++;
+               state->state = WAITENDWORD;
+           }
+       }
+       else if (state->state == WAITNEXTCHAR)
+       {
+           if (*(state->prsbuf) == '\0')
+               elog(ERROR, "There is no escaped character");
+           else
+           {
+               RESIZEPRSBUF;
+               *(state->curpos) = *(state->prsbuf);
+               state->curpos++;
+               state->state = oldstate;
+           }
+       }
+       else if (state->state == WAITENDWORD)
+       {
+           if (*(state->prsbuf) == '\\')
+           {
+               state->state = WAITNEXTCHAR;
+               oldstate = WAITENDWORD;
+           }
+           else if (*(state->prsbuf) == ' ' || *(state->prsbuf) == '\0' ||
+                    (state->oprisdelim && ISOPERATOR(*(state->prsbuf))))
+           {
+               RESIZEPRSBUF;
+               if (state->curpos == state->word)
+                   elog(ERROR, "Syntax error");
+               *(state->curpos) = '\0';
+               return 1; 
+           } else if ( *(state->prsbuf) == ':' ) {
+               if (state->curpos == state->word)
+                   elog(ERROR, "Syntax error");
+               *(state->curpos) = '\0';
+               if ( state->oprisdelim )
+                   return 1;
+               else
+                   state->state = INPOSINFO;
+           }
+           else
+           {
+               RESIZEPRSBUF;
+               *(state->curpos) = *(state->prsbuf);
+               state->curpos++;
+           }
+       }
+       else if (state->state == WAITENDCMPLX)
+       {
+           if (*(state->prsbuf) == '\'')
+           {
+               RESIZEPRSBUF;
+               *(state->curpos) = '\0';
+               if (state->curpos == state->word)
+                   elog(ERROR, "Syntax error");
+               if ( state->oprisdelim ) {
+                   state->prsbuf++;
+                   return 1;
+               } else
+                   state->state = WAITPOSINFO;
+           }
+           else if (*(state->prsbuf) == '\\')
+           {
+               state->state = WAITNEXTCHAR;
+               oldstate = WAITENDCMPLX;
+           }
+           else if (*(state->prsbuf) == '\0')
+               elog(ERROR, "Syntax error");
+           else
+           {
+               RESIZEPRSBUF;
+               *(state->curpos) = *(state->prsbuf);
+               state->curpos++;
+           }
+       } else if (state->state == WAITPOSINFO) {
+           if ( *(state->prsbuf) == ':' )
+               state->state=INPOSINFO;
+           else
+               return 1;
+       } else if (state->state == INPOSINFO) {
+           if ( isdigit(*(state->prsbuf)) ) {
+               if ( state->alen==0 ) {
+                   state->alen=4;
+                   state->pos = (WordEntryPos*)palloc( sizeof(WordEntryPos)*state->alen );
+                   *(uint16*)(state->pos)=0;
+               } else if ( *(uint16*)(state->pos) +1 >= state->alen ) {
+                   state->alen *= 2; 
+                   state->pos = (WordEntryPos*)repalloc( state->pos, sizeof(WordEntryPos)*state->alen );
+               }
+               (  *(uint16*)(state->pos) )++;
+               state->pos[ *(uint16*)(state->pos) ].pos = LIMITPOS(atoi(state->prsbuf));
+               if ( state->pos[ *(uint16*)(state->pos) ].pos == 0 )
+                   elog(ERROR,"Wrong position info");
+               state->pos[ *(uint16*)(state->pos) ].weight = 0;
+               state->state = WAITPOSDELIM;
+           } else
+               elog(ERROR,"Syntax error");
+       } else if (state->state == WAITPOSDELIM) {
+           if ( *(state->prsbuf) == ',' ) {
+               state->state = INPOSINFO;
+           } else if ( tolower(*(state->prsbuf)) == 'a' || *(state->prsbuf)=='*' ) {
+               if ( state->pos[ *(uint16*)(state->pos) ].weight )
+                   elog(ERROR,"Syntax error");
+               state->pos[ *(uint16*)(state->pos) ].weight = 3;
+           } else if ( tolower(*(state->prsbuf)) == 'b' ) {
+               if ( state->pos[ *(uint16*)(state->pos) ].weight )
+                   elog(ERROR,"Syntax error");
+               state->pos[ *(uint16*)(state->pos) ].weight = 2;
+           } else if ( tolower(*(state->prsbuf)) == 'c' ) {
+               if ( state->pos[ *(uint16*)(state->pos) ].weight )
+                   elog(ERROR,"Syntax error");
+               state->pos[ *(uint16*)(state->pos) ].weight = 1;
+           } else if ( tolower(*(state->prsbuf)) == 'd' ) {
+               if ( state->pos[ *(uint16*)(state->pos) ].weight )
+                   elog(ERROR,"Syntax error");
+               state->pos[ *(uint16*)(state->pos) ].weight = 0;
+           } else if ( isspace(*(state->prsbuf)) || *(state->prsbuf) == '\0' ) {
+               return 1;
+           } else if ( !isdigit(*(state->prsbuf)) )
+               elog(ERROR,"Syntax error");
+       } else
+           elog(ERROR, "Inner bug :(");
+       state->prsbuf++;
+   }
+
+   return 0;
+}
+
+Datum
+tsvector_in(PG_FUNCTION_ARGS)
+{
+   char       *buf = PG_GETARG_CSTRING(0);
+   TI_IN_STATE state;
+   WordEntryIN  *arr;
+   WordEntry  *inarr;
+   int4        len = 0,
+               totallen = 64;
+   tsvector       *in;
+   char       *tmpbuf,
+              *cur;
+   int4        i,
+               buflen = 256;
+
+   state.prsbuf = buf;
+   state.len = 32;
+   state.word = (char *) palloc(state.len);
+   state.oprisdelim = false;
+
+   arr = (WordEntryIN *) palloc(sizeof(WordEntryIN) * totallen);
+   cur = tmpbuf = (char *) palloc(buflen);
+   while (gettoken_tsvector(&state))
+   {
+       if (len >= totallen)
+       {
+           totallen *= 2;
+           arr = (WordEntryIN *) repalloc((void *) arr, sizeof(WordEntryIN) * totallen);
+       }
+       while ((cur - tmpbuf) + (state.curpos - state.word) >= buflen)
+       {
+           int4        dist = cur - tmpbuf;
+
+           buflen *= 2;
+           tmpbuf = (char *) repalloc((void *) tmpbuf, buflen);
+           cur = tmpbuf + dist;
+       }
+       if (state.curpos - state.word >= MAXSTRLEN)
+           elog(ERROR, "Word is too long");
+       arr[len].entry.len= state.curpos - state.word;
+       if (cur - tmpbuf > MAXSTRPOS)
+           elog(ERROR, "Too long value");
+       arr[len].entry.pos=cur - tmpbuf;
+       memcpy((void *) cur, (void *) state.word, arr[len].entry.len);
+       cur += arr[len].entry.len;
+       if ( state.alen ) {
+           arr[len].entry.haspos=1;
+           arr[len].pos = state.pos;
+       } else
+           arr[len].entry.haspos=0;
+       len++;
+   }
+   pfree(state.word);
+
+   if ( len > 0 )
+       len = uniqueentry(arr, len, tmpbuf, &buflen);
+   totallen = CALCDATASIZE(len, buflen);
+   in = (tsvector *) palloc(totallen);
+   memset(in,0,totallen);
+   in->len = totallen;
+   in->size = len;
+   cur = STRPTR(in);
+   inarr = ARRPTR(in);
+   for (i = 0; i < len; i++)
+   {
+       memcpy((void *) cur, (void *) &tmpbuf[arr[i].entry.pos], arr[i].entry.len);
+       arr[i].entry.pos=cur - STRPTR(in);
+       cur += SHORTALIGN(arr[i].entry.len);
+       if ( arr[i].entry.haspos ) {
+           memcpy( cur, arr[i].pos, (*(uint16*)arr[i].pos + 1) * sizeof(WordEntryPos));
+           cur +=  (*(uint16*)arr[i].pos + 1) * sizeof(WordEntryPos);
+           pfree( arr[i].pos ); 
+       }
+       memcpy( &(inarr[i]), &(arr[i].entry), sizeof(WordEntry) );
+   }
+   pfree(tmpbuf);
+   pfree(arr);
+   PG_RETURN_POINTER(in);
+}
+
+Datum
+tsvector_length(PG_FUNCTION_ARGS)
+{
+   tsvector       *in = (tsvector *) PG_DETOAST_DATUM(PG_GETARG_DATUM(0));
+   int4        ret = in->size;
+
+   PG_FREE_IF_COPY(in, 0);
+   PG_RETURN_INT32(ret);
+}
+
+Datum
+tsvector_out(PG_FUNCTION_ARGS)
+{
+   tsvector       *out = (tsvector *) PG_DETOAST_DATUM(PG_GETARG_DATUM(0));
+   char       *outbuf;
+   int4        i,
+               j,
+               lenbuf = 0, pp;
+   WordEntry  *ptr = ARRPTR(out);
+   char       *curin,
+              *curout;
+
+       lenbuf=out->size * 2 /* '' */ + out->size - 1 /* space */ + 2 /*\0*/;
+       for (i = 0; i < out->size; i++) {
+               lenbuf += ptr[i].len*2 /*for escape */;
+               if ( ptr[i].haspos )
+                       lenbuf += 7*POSDATALEN(out, &(ptr[i]));
+       }
+
+   curout = outbuf = (char *) palloc(lenbuf);
+   for (i = 0; i < out->size; i++)
+   {
+       curin = STRPTR(out)+ptr->pos;
+       if (i != 0)
+           *curout++ = ' ';
+       *curout++ = '\'';
+       j = ptr->len;
+       while (j--)
+       {
+           if (*curin == '\'')
+           {
+               int4        pos = curout - outbuf;
+
+               outbuf = (char *) repalloc((void *) outbuf, ++lenbuf);
+               curout = outbuf + pos;
+               *curout++ = '\\';
+           }
+           *curout++ = *curin++;
+       }
+       *curout++ = '\'';
+       if ( (pp=POSDATALEN(out,ptr)) != 0 ) {
+           WordEntryPos *wptr;
+           *curout++ = ':';
+           wptr=POSDATAPTR(out,ptr);
+           while(pp) {
+               sprintf(curout,"%d",wptr->pos);
+               curout=strchr(curout,'\0');
+               switch( wptr->weight ) {
+                   case 3:   *curout++ = 'A'; break;
+                   case 2:   *curout++ = 'B'; break;
+                   case 1:   *curout++ = 'C'; break;
+                   case 0: 
+                   default: break;
+               }
+               if ( pp>1 )     *curout++ = ',';
+               pp--; wptr++;
+           }
+       }
+       ptr++;
+   }
+   *curout='\0';
+   outbuf[lenbuf - 1] = '\0';
+   PG_FREE_IF_COPY(out, 0);
+   PG_RETURN_POINTER(outbuf);
+}
+
+static int
+compareWORD(const void *a, const void *b)
+{
+   if (((WORD *) a)->len == ((WORD *) b)->len) {
+       int res = strncmp(
+                      ((WORD *) a)->word,
+                      ((WORD *) b)->word,
+                      ((WORD *) b)->len);
+       if ( res==0 ) 
+           return ( ((WORD *) a)->pos.pos > ((WORD *) b)->pos.pos ) ? 1 : -1;
+       return res;
+   }
+   return (((WORD *) a)->len > ((WORD *) b)->len) ? 1 : -1;
+}
+
+static int
+uniqueWORD(WORD * a, int4 l)
+{
+   WORD       *ptr,
+              *res;
+   int tmppos;
+
+   if (l == 1) {
+       tmppos=LIMITPOS(a->pos.pos);
+       a->alen=2;
+       a->pos.apos=(uint16*)palloc( sizeof(uint16)*a->alen );
+       a->pos.apos[0]=1;
+       a->pos.apos[1]=tmppos;
+       return l;
+   }
+
+   res = a;
+   ptr = a + 1;
+
+   qsort((void *) a, l, sizeof(WORD), compareWORD);
+   tmppos=LIMITPOS(a->pos.pos);
+   a->alen=2;
+   a->pos.apos=(uint16*)palloc( sizeof(uint16)*a->alen );
+   a->pos.apos[0]=1;
+   a->pos.apos[1]=tmppos;
+
+   while (ptr - a < l)
+   {
+       if (!(ptr->len == res->len &&
+             strncmp(ptr->word, res->word, res->len) == 0))
+       {
+           res++;
+           res->len = ptr->len;
+           res->word = ptr->word;
+           tmppos=LIMITPOS(ptr->pos.pos);
+           res->alen=2;
+           res->pos.apos=(uint16*)palloc( sizeof(uint16)*res->alen );
+           res->pos.apos[0]=1;
+           res->pos.apos[1]=tmppos;
+       } else {
+           pfree(ptr->word);
+           if ( res->pos.apos[0] < MAXNUMPOS-1 && res->pos.apos[ res->pos.apos[0] ] != MAXENTRYPOS-1 ) {
+               if ( res->pos.apos[0]+1 >= res->alen ) {
+                   res->alen*=2;
+                   res->pos.apos=(uint16*)repalloc( res->pos.apos, sizeof(uint16)*res->alen );
+               }
+               res->pos.apos[ res->pos.apos[0]+1 ] = LIMITPOS(ptr->pos.pos);
+               res->pos.apos[0]++; 
+           }
+       }
+       ptr++;
+   }
+
+   return res + 1 - a;
+}
+
+/*
+ * make value of tsvector
+ */
+static tsvector *
+makevalue(PRSTEXT * prs)
+{
+   int4        i,j,
+               lenstr = 0,
+               totallen;
+   tsvector       *in;
+   WordEntry  *ptr;
+   char       *str,
+              *cur;
+
+   prs->curwords = uniqueWORD(prs->words, prs->curwords);
+   for (i = 0; i < prs->curwords; i++) {
+       lenstr += SHORTALIGN(prs->words[i].len);
+
+       if ( prs->words[i].alen )
+           lenstr += sizeof(uint16) + prs->words[i].pos.apos[0] * sizeof(WordEntryPos);
+   }
+
+   totallen = CALCDATASIZE(prs->curwords, lenstr);
+   in = (tsvector *) palloc(totallen);
+   memset(in,0,totallen);  
+   in->len = totallen;
+   in->size = prs->curwords;
+
+   ptr = ARRPTR(in);
+   cur = str = STRPTR(in);
+   for (i = 0; i < prs->curwords; i++)
+   {
+       ptr->len = prs->words[i].len;
+       if (cur - str > MAXSTRPOS)
+           elog(ERROR, "Value is too big");
+       ptr->pos= cur - str;
+       memcpy((void *) cur, (void *) prs->words[i].word, prs->words[i].len);
+       pfree(prs->words[i].word);
+       cur += SHORTALIGN(prs->words[i].len);
+       if ( prs->words[i].alen ) {
+           WordEntryPos *wptr;
+           
+           ptr->haspos=1;
+           *(uint16*)cur = prs->words[i].pos.apos[0];
+           wptr=POSDATAPTR(in,ptr);
+           for(j=0;j<*(uint16*)cur;j++) {
+               wptr[j].weight=0;
+               wptr[j].pos=prs->words[i].pos.apos[j+1];
+           }
+           cur += sizeof(uint16) + prs->words[i].pos.apos[0] * sizeof(WordEntryPos);
+           pfree(prs->words[i].pos.apos);
+       } else
+           ptr->haspos=0;
+       ptr++;
+   }
+   pfree(prs->words);
+   return in;
+}
+
+
+Datum
+to_tsvector(PG_FUNCTION_ARGS)
+{
+   text       *in = PG_GETARG_TEXT_P(1);
+   PRSTEXT     prs;
+   tsvector       *out = NULL;
+   TSCfgInfo *cfg=findcfg(PG_GETARG_INT32(0)); 
+
+   prs.lenwords = 32;
+   prs.curwords = 0;
+   prs.pos = 0;
+   prs.words = (WORD *) palloc(sizeof(WORD) * prs.lenwords);
+   
+   parsetext_v2(cfg, &prs, VARDATA(in), VARSIZE(in) - VARHDRSZ);
+   PG_FREE_IF_COPY(in, 1);
+
+   if (prs.curwords)
+       out = makevalue(&prs);
+   else {
+       pfree(prs.words);
+       out = palloc(CALCDATASIZE(0,0));
+       out->len = CALCDATASIZE(0,0);
+       out->size = 0;
+   } 
+   PG_RETURN_POINTER(out);
+}
+
+Datum
+to_tsvector_name(PG_FUNCTION_ARGS) {
+   text       *cfg=PG_GETARG_TEXT_P(0);
+   Datum res = DirectFunctionCall3(
+       to_tsvector,
+       Int32GetDatum( name2id_cfg( cfg ) ),
+       PG_GETARG_DATUM(1),
+       (Datum)0
+   );
+   PG_FREE_IF_COPY(cfg,0);
+   PG_RETURN_DATUM(res);   
+}
+
+Datum
+to_tsvector_current(PG_FUNCTION_ARGS) {
+   Datum res = DirectFunctionCall3(
+       to_tsvector,
+       Int32GetDatum( get_currcfg() ),
+       PG_GETARG_DATUM(0),
+       (Datum)0
+   );
+   PG_RETURN_DATUM(res);   
+}
+
+static Oid
+findFunc(char *fname) {
+   FuncCandidateList clist,ptr;
+   Oid funcid = InvalidOid;
+   List *names=makeList1(makeString(fname));
+
+   ptr = clist = FuncnameGetCandidates(names, 1);
+   freeList(names);
+
+   if ( !ptr )
+       return funcid;
+
+   while(ptr) {
+       if ( ptr->args[0] == TEXTOID && funcid == InvalidOid )
+           funcid=ptr->oid;
+       clist=ptr->next;
+       pfree(ptr);
+       ptr=clist;
+   }
+
+   return funcid;
+}
+
+/*
+ * Trigger
+ */
+Datum
+tsearch2(PG_FUNCTION_ARGS)
+{
+   TriggerData *trigdata;
+   Trigger    *trigger;
+   Relation    rel;
+   HeapTuple   rettuple = NULL;
+   TSCfgInfo *cfg=findcfg(get_currcfg()); 
+   int         numidxattr,
+               i;
+   PRSTEXT     prs;
+   Datum       datum = (Datum) 0;
+   Oid     funcoid = InvalidOid;
+
+   if (!CALLED_AS_TRIGGER(fcinfo))
+       elog(ERROR, "TSearch: Not fired by trigger manager");
+
+   trigdata = (TriggerData *) fcinfo->context;
+   if (TRIGGER_FIRED_FOR_STATEMENT(trigdata->tg_event))
+       elog(ERROR, "TSearch: Can't process STATEMENT events");
+   if (TRIGGER_FIRED_AFTER(trigdata->tg_event))
+       elog(ERROR, "TSearch: Must be fired BEFORE event");
+
+   if (TRIGGER_FIRED_BY_INSERT(trigdata->tg_event))
+       rettuple = trigdata->tg_trigtuple;
+   else if (TRIGGER_FIRED_BY_UPDATE(trigdata->tg_event))
+       rettuple = trigdata->tg_newtuple;
+   else
+       elog(ERROR, "TSearch: Unknown event");
+
+   trigger = trigdata->tg_trigger;
+   rel = trigdata->tg_relation;
+
+   if (trigger->tgnargs < 2)
+       elog(ERROR, "TSearch: format tsearch2(tsvector_field, text_field1,...)");
+
+   numidxattr = SPI_fnumber(rel->rd_att, trigger->tgargs[0]);
+   if (numidxattr == SPI_ERROR_NOATTRIBUTE)
+       elog(ERROR, "TSearch: Can not find tsvector_field");
+
+   prs.lenwords = 32;
+   prs.curwords = 0;
+   prs.pos = 0;
+   prs.words = (WORD *) palloc(sizeof(WORD) * prs.lenwords);
+
+   /* find all words in indexable column */
+   for (i = 1; i < trigger->tgnargs; i++)
+   {
+       int         numattr;
+       Oid         oidtype;
+       Datum       txt_toasted;
+       bool        isnull;
+       text       *txt;
+
+       numattr = SPI_fnumber(rel->rd_att, trigger->tgargs[i]);
+       if (numattr == SPI_ERROR_NOATTRIBUTE)
+       {
+           funcoid=findFunc(trigger->tgargs[i]);
+           if ( funcoid==InvalidOid )
+               elog(ERROR,"TSearch: can't find function or field '%s'",trigger->tgargs[i]);
+           continue;
+       }
+       oidtype = SPI_gettypeid(rel->rd_att, numattr);
+       /* We assume char() and varchar() are binary-equivalent to text */
+       if (!(oidtype == TEXTOID ||
+             oidtype == VARCHAROID ||
+             oidtype == BPCHAROID))
+       {
+           elog(WARNING, "TSearch: '%s' is not of character type",
+                trigger->tgargs[i]);
+           continue;
+       }
+       txt_toasted = SPI_getbinval(rettuple, rel->rd_att, numattr, &isnull);
+       if (isnull)
+           continue;
+
+       if ( funcoid!=InvalidOid ) {
+           text *txttmp = (text *) DatumGetPointer( OidFunctionCall1(
+               funcoid,
+               PointerGetDatum(txt_toasted)
+           ));
+           txt = (text *) DatumGetPointer(PG_DETOAST_DATUM(PointerGetDatum(txttmp)));
+           if ( txt == txttmp )
+               txt_toasted = PointerGetDatum(txt);
+       } else
+            txt = (text *) DatumGetPointer(PG_DETOAST_DATUM(PointerGetDatum(txt_toasted)));
+
+       parsetext_v2(cfg, &prs, VARDATA(txt), VARSIZE(txt) - VARHDRSZ);
+       if (txt != (text*)DatumGetPointer(txt_toasted) )
+           pfree(txt);
+   }
+
+   /* make tsvector value */
+   if (prs.curwords)
+   {
+       datum = PointerGetDatum(makevalue(&prs));
+       rettuple = SPI_modifytuple(rel, rettuple, 1, &numidxattr,
+                                  &datum, NULL);
+       pfree(DatumGetPointer(datum));
+   }
+   else
+   {
+       tsvector *out = palloc(CALCDATASIZE(0,0));
+       out->len = CALCDATASIZE(0,0);
+       out->size = 0;
+       datum = PointerGetDatum(out);
+       pfree(prs.words);
+       rettuple = SPI_modifytuple(rel, rettuple, 1, &numidxattr,
+                                  &datum, NULL);
+   }
+
+   if (rettuple == NULL)
+       elog(ERROR, "TSearch: %d returned by SPI_modifytuple", SPI_result);
+
+   return PointerGetDatum(rettuple);
+}


diff --git a/contrib/tsearch2/tsvector.h b/contrib/tsearch2/tsvector.h

new file mode 100644 (file)

index 0000000..31e6a4b


--- /dev/null
+++ b/contrib/tsearch2/tsvector.h
@@ -0,0 +1,71 @@
+#ifndef __TXTIDX_H__
+#define __TXTIDX_H__
+
+/*
+#define TXTIDX_DEBUG
+*/
+
+#include "postgres.h"
+
+#include "access/gist.h"
+#include "access/itup.h"
+#include "utils/elog.h"
+#include "utils/palloc.h"
+#include "utils/builtins.h"
+#include "storage/bufpage.h"
+
+typedef struct {
+   uint32
+       haspos:1,
+       len:11, /* MAX 2Kb */
+       pos:20; /* MAX 1Mb */
+}  WordEntry;
+#define MAXSTRLEN ( 1<<11 )
+#define MAXSTRPOS ( 1<<20 )
+
+typedef struct {
+   uint16
+       weight:2,
+       pos:14;
+} WordEntryPos;
+#define MAXENTRYPOS    (1<<14)
+#define MAXNUMPOS  256
+#define LIMITPOS(x)    ( ( (x) >= MAXENTRYPOS ) ? (MAXENTRYPOS-1) : (x) )
+
+typedef struct
+{
+   int4        len;
+   int4        size;
+   char        data[1];
+}  tsvector;
+
+#define DATAHDRSIZE (sizeof(int4)*2)
+#define CALCDATASIZE(x, lenstr) ( x * sizeof(WordEntry) + DATAHDRSIZE + lenstr )
+#define ARRPTR(x)  ( (WordEntry*) ( (char*)x + DATAHDRSIZE ) )
+#define STRPTR(x)  ( (char*)x + DATAHDRSIZE + ( sizeof(WordEntry) * ((tsvector*)x)->size ) )
+#define STRSIZE(x) ( ((tsvector*)x)->len - DATAHDRSIZE - ( sizeof(WordEntry) * ((tsvector*)x)->size ) )
+#define _POSDATAPTR(x,e)   (STRPTR(x)+((WordEntry*)(e))->pos+SHORTALIGN(((WordEntry*)(e))->len))
+#define POSDATALEN(x,e) ( ( ((WordEntry*)(e))->haspos ) ? (*(uint16*)_POSDATAPTR(x,e)) : 0 ) 
+#define POSDATAPTR(x,e)    ( (WordEntryPos*)( _POSDATAPTR(x,e)+sizeof(uint16) ) )
+
+
+typedef struct {
+   WordEntry   entry;
+   WordEntryPos    *pos;
+}  WordEntryIN;
+
+typedef struct
+{
+   char       *prsbuf;
+   char       *word;
+   char       *curpos;
+   int4        len;
+   int4        state;
+   int4        alen;
+   WordEntryPos    *pos;
+   bool        oprisdelim;
+}  TI_IN_STATE;
+
+int4       gettoken_tsvector(TI_IN_STATE * state);
+
+#endif


diff --git a/contrib/tsearch2/tsvector_op.c b/contrib/tsearch2/tsvector_op.c

new file mode 100644 (file)

index 0000000..3f38014


--- /dev/null
+++ b/contrib/tsearch2/tsvector_op.c
@@ -0,0 +1,264 @@
+/*
+ * Operations for tsvector type
+ * Teodor Sigaev 
+ */
+#include "postgres.h"
+
+#include "access/gist.h"
+#include "access/itup.h"
+#include "utils/elog.h"
+#include "utils/palloc.h"
+#include "utils/builtins.h"
+#include "storage/bufpage.h"
+#include "executor/spi.h"
+#include "commands/trigger.h"
+#include "nodes/pg_list.h"
+#include "catalog/namespace.h"
+
+#include "utils/pg_locale.h"
+
+#include              /* tolower */
+#include "tsvector.h"
+#include "query.h"
+#include "ts_cfg.h"
+#include "common.h"
+
+PG_FUNCTION_INFO_V1(strip);
+Datum      strip(PG_FUNCTION_ARGS);
+
+PG_FUNCTION_INFO_V1(setweight);
+Datum      setweight(PG_FUNCTION_ARGS);
+
+PG_FUNCTION_INFO_V1(concat);
+Datum      concat(PG_FUNCTION_ARGS);
+
+Datum
+strip(PG_FUNCTION_ARGS)
+{
+   tsvector       *in = (tsvector *) PG_DETOAST_DATUM(PG_GETARG_DATUM(0));
+   tsvector    *out;
+   int i,len=0;
+   WordEntry *arrin=ARRPTR(in), *arrout;
+   char *cur;
+
+   for(i=0;isize;i++) 
+       len += SHORTALIGN( arrin[i].len );
+
+   len = CALCDATASIZE(in->size, len);
+   out=(tsvector*)palloc(len);
+   memset(out,0,len);
+   out->len=len;
+   out->size=in->size;
+   arrout=ARRPTR(out);
+   cur=STRPTR(out);
+   for(i=0;isize;i++) {
+       memcpy(cur, STRPTR(in)+arrin[i].pos, arrin[i].len);
+       arrout[i].haspos = 0;
+       arrout[i].len = arrin[i].len;
+       arrout[i].pos = cur - STRPTR(out);
+       cur += SHORTALIGN( arrout[i].len );
+   }
+       
+   PG_FREE_IF_COPY(in, 0);
+   PG_RETURN_POINTER(out);
+}
+
+Datum
+setweight(PG_FUNCTION_ARGS)
+{
+   tsvector       *in = (tsvector *) PG_DETOAST_DATUM(PG_GETARG_DATUM(0));
+   char       cw = PG_GETARG_CHAR(1);
+   tsvector    *out;
+   int i,j;
+   WordEntry *entry;
+   WordEntryPos *p;
+   int w=0;
+
+   switch(tolower(cw)) {
+       case 'a': w=3; break;
+       case 'b': w=2; break;
+       case 'c': w=1; break;
+       case 'd': w=0; break;
+       default: elog(ERROR,"Unknown weight");
+   }
+
+   out=(tsvector*)palloc(in->len);
+   memcpy(out,in,in->len);
+   entry=ARRPTR(out);
+   i=out->size;    
+   while(i--) {
+       if ( (j=POSDATALEN(out,entry)) != 0 ) {
+           p=POSDATAPTR(out,entry);
+           while(j--) {
+               p->weight=w;
+               p++;
+           }
+       }
+       entry++;
+   }
+       
+   PG_FREE_IF_COPY(in, 0);
+   PG_RETURN_POINTER(out);
+}
+
+static int
+compareEntry(char *ptra, WordEntry* a, char *ptrb, WordEntry* b)
+{
+        if ( a->len == b->len)
+        {
+                return strncmp(
+                                           ptra + a->pos,
+                                           ptrb + b->pos,
+                                           a->len);
+        }
+        return ( a->len > b->len ) ? 1 : -1;
+}
+
+static int4
+add_pos(tsvector *src, WordEntry *srcptr, tsvector *dest, WordEntry *destptr, int4 maxpos ) {
+   uint16 *clen = (uint16*)_POSDATAPTR(dest,destptr);
+   int i;
+   uint16 slen = POSDATALEN(src, srcptr), startlen;
+   WordEntryPos *spos=POSDATAPTR(src, srcptr), *dpos=POSDATAPTR(dest,destptr);
+
+   if ( ! destptr->haspos ) 
+       *clen=0;
+
+   startlen = *clen;
+   for(i=0; i
+       dpos[ *clen ].weight = spos[i].weight; 
+       dpos[ *clen ].pos    = LIMITPOS(spos[i].pos + maxpos);
+       (*clen)++;
+   }
+
+   if ( *clen != startlen )
+       destptr->haspos=1; 
+   return  *clen - startlen;
+}
+
+
+Datum
+concat(PG_FUNCTION_ARGS) {
+   tsvector       *in1 = (tsvector *) PG_DETOAST_DATUM(PG_GETARG_DATUM(0));
+   tsvector       *in2 = (tsvector *) PG_DETOAST_DATUM(PG_GETARG_DATUM(1));
+   tsvector       *out;
+   WordEntry *ptr;
+   WordEntry *ptr1,*ptr2;
+   WordEntryPos *p;
+   int maxpos=0,i,j,i1,i2;
+   char *cur;
+   char *data,*data1,*data2;
+
+   ptr=ARRPTR(in1);
+   i=in1->size;
+   while(i--) {
+       if ( (j=POSDATALEN(in1,ptr)) != 0 ) {
+           p=POSDATAPTR(in1,ptr);
+           while(j--) {
+               if ( p->pos > maxpos ) 
+                   maxpos = p->pos;
+               p++;
+           }
+       }
+       ptr++;
+   }
+   
+   ptr1=ARRPTR(in1); ptr2=ARRPTR(in2);
+   data1=STRPTR(in1); data2=STRPTR(in2);
+   i1=in1->size;   i2=in2->size;
+   out=(tsvector*)palloc( in1->len + in2->len );
+   memset(out,0,in1->len + in2->len);
+   out->len = in1->len + in2->len;
+   out->size = in1->size + in2->size;
+   data=cur=STRPTR(out);
+   ptr=ARRPTR(out);
+   while( i1 && i2 ) {
+       int cmp=compareEntry(data1,ptr1,data2,ptr2);
+       if ( cmp < 0 ) { /* in1 first */
+           ptr->haspos = ptr1->haspos;
+           ptr->len = ptr1->len;
+           memcpy( cur, data1 + ptr1->pos, ptr1->len );
+           ptr->pos = cur - data; 
+           cur+=SHORTALIGN(ptr1->len);
+           if ( ptr->haspos ) {
+               memcpy(cur, _POSDATAPTR(in1, ptr1), POSDATALEN(in1, ptr1)*sizeof(WordEntryPos) + sizeof(uint16));
+               cur+=POSDATALEN(in1, ptr1)*sizeof(WordEntryPos) + sizeof(uint16);
+           }
+           ptr++; ptr1++; i1--;
+       } else if ( cmp>0 ) { /* in2 first */ 
+           ptr->haspos = ptr2->haspos;
+           ptr->len = ptr2->len;
+           memcpy( cur, data2 + ptr2->pos, ptr2->len );
+           ptr->pos = cur - data; 
+           cur+=SHORTALIGN(ptr2->len);
+           if ( ptr->haspos ) {
+               int addlen = add_pos(in2, ptr2, out, ptr, maxpos );
+               if ( addlen == 0 )
+                   ptr->haspos=0;
+               else
+                   cur += addlen*sizeof(WordEntryPos) + sizeof(uint16); 
+           }
+           ptr++; ptr2++; i2--;
+       } else {
+           ptr->haspos = ptr1->haspos | ptr2->haspos;
+           ptr->len = ptr1->len;
+           memcpy( cur, data1 + ptr1->pos, ptr1->len );
+           ptr->pos = cur - data; 
+           cur+=SHORTALIGN(ptr1->len);
+           if ( ptr->haspos ) {
+               if ( ptr1->haspos ) {
+                   memcpy(cur, _POSDATAPTR(in1, ptr1), POSDATALEN(in1, ptr1)*sizeof(WordEntryPos) + sizeof(uint16));
+                   cur+=POSDATALEN(in1, ptr1)*sizeof(WordEntryPos) + sizeof(uint16);
+                   if ( ptr2->haspos )
+                       cur += add_pos(in2, ptr2, out, ptr, maxpos )*sizeof(WordEntryPos);
+               } else if ( ptr2->haspos ) {
+                   int addlen = add_pos(in2, ptr2, out, ptr, maxpos );
+                   if ( addlen == 0 )
+                       ptr->haspos=0;
+                   else
+                       cur += addlen*sizeof(WordEntryPos) + sizeof(uint16); 
+               }
+           }
+           ptr++; ptr1++; ptr2++; i1--; i2--;
+       }
+   }
+
+   while(i1) {
+       ptr->haspos = ptr1->haspos;
+       ptr->len = ptr1->len;
+       memcpy( cur, data1 + ptr1->pos, ptr1->len );
+       ptr->pos = cur - data; 
+       cur+=SHORTALIGN(ptr1->len);
+       if ( ptr->haspos ) {
+           memcpy(cur, _POSDATAPTR(in1, ptr1), POSDATALEN(in1, ptr1)*sizeof(WordEntryPos) + sizeof(uint16));
+           cur+=POSDATALEN(in1, ptr1)*sizeof(WordEntryPos) + sizeof(uint16);
+       }
+       ptr++; ptr1++; i1--;
+   }
+
+   while(i2) {
+       ptr->haspos = ptr2->haspos;
+       ptr->len = ptr2->len;
+       memcpy( cur, data2 + ptr2->pos, ptr2->len );
+       ptr->pos = cur - data; 
+       cur+=SHORTALIGN(ptr2->len);
+       if ( ptr->haspos ) {
+           int addlen = add_pos(in2, ptr2, out, ptr, maxpos );
+           if ( addlen == 0 )
+               ptr->haspos=0;
+           else
+               cur += addlen*sizeof(WordEntryPos) + sizeof(uint16); 
+       }
+       ptr++; ptr2++; i2--;
+   }
+   
+   out->size=ptr-ARRPTR(out);
+   out->len = CALCDATASIZE( out->size, cur-data );
+   if ( data != STRPTR(out) )
+       memmove( STRPTR(out), data, cur-data );
+
+   PG_FREE_IF_COPY(in1, 0);
+   PG_FREE_IF_COPY(in2, 1);
+   PG_RETURN_POINTER(out);
+}
+


diff --git a/contrib/tsearch2/untsearch.sql.in b/contrib/tsearch2/untsearch.sql.in

new file mode 100644 (file)

index 0000000..a4fe145


--- /dev/null
+++ b/contrib/tsearch2/untsearch.sql.in
@@ -0,0 +1,62 @@
+BEGIN;
+
+--Be careful !!!
+--script drops all indices, triggers and columns with types defined
+--in tsearch2.sql
+
+
+DROP OPERATOR CLASS gist_tsvector_ops USING gist CASCADE;
+
+
+DROP OPERATOR || (tsvector, tsvector);
+DROP OPERATOR @@ (tsvector, tsquery);
+DROP OPERATOR @@ (tsquery, tsvector);
+
+DROP AGGREGATE stat(tsvector);
+
+DROP TABLE pg_ts_dict;
+DROP TABLE pg_ts_parser;
+DROP TABLE pg_ts_cfg;
+DROP TABLE pg_ts_cfgmap;
+
+DROP TYPE tokentype CASCADE;
+DROP TYPE tokenout CASCADE;
+DROP TYPE tsvector CASCADE;
+DROP TYPE tsquery CASCADE;
+DROP TYPE gtsvector CASCADE;
+DROP TYPE tsstat CASCADE;
+DROP TYPE statinfo CASCADE;
+
+DROP FUNCTION lexize(oid, text) ;
+DROP FUNCTION lexize(text, text);
+DROP FUNCTION lexize(text);
+DROP FUNCTION set_curdict(int);
+DROP FUNCTION set_curdict(text);
+DROP FUNCTION dex_init(text);
+DROP FUNCTION dex_lexize(internal,internal,int4);
+DROP FUNCTION snb_en_init(text);
+DROP FUNCTION snb_lexize(internal,internal,int4);
+DROP FUNCTION snb_ru_init(text);
+DROP FUNCTION spell_init(text);
+DROP FUNCTION spell_lexize(internal,internal,int4);
+DROP FUNCTION syn_init(text);
+DROP FUNCTION syn_lexize(internal,internal,int4);
+DROP FUNCTION set_curprs(int);
+DROP FUNCTION set_curprs(text);
+DROP FUNCTION prsd_start(internal,int4);
+DROP FUNCTION prsd_getlexeme(internal,internal,internal);
+DROP FUNCTION prsd_end(internal);
+DROP FUNCTION prsd_lextype(internal);
+DROP FUNCTION prsd_headline(internal,internal,internal);
+DROP FUNCTION set_curcfg(int);
+DROP FUNCTION set_curcfg(text);
+DROP FUNCTION show_curcfg();
+DROP FUNCTION gtsvector_compress(internal);
+DROP FUNCTION gtsvector_decompress(internal);
+DROP FUNCTION gtsvector_penalty(internal,internal,internal);
+DROP FUNCTION gtsvector_picksplit(internal, internal);
+DROP FUNCTION gtsvector_union(bytea, internal);
+DROP FUNCTION reset_tsearch();
+DROP FUNCTION tsearch2() CASCADE;
+
+END;


diff --git a/contrib/tsearch2/wordparser/deflex.c b/contrib/tsearch2/wordparser/deflex.c

new file mode 100644 (file)

index 0000000..ea596c5


--- /dev/null
+++ b/contrib/tsearch2/wordparser/deflex.c
@@ -0,0 +1,56 @@
+#include "deflex.h"
+
+const char *lex_descr[]={
+   "",
+   "Latin word",
+   "Non-latin word",
+   "Word",
+   "Email",
+   "URL",
+   "Host",
+   "Scientific notation",
+   "VERSION",
+   "Part of hyphenated word",
+   "Non-latin part of hyphenated word",
+   "Latin part of hyphenated word",
+   "Space symbols",
+   "HTML Tag",
+   "HTTP head",
+   "Hyphenated word",
+   "Latin hyphenated word",
+   "Non-latin hyphenated word",
+   "URI",
+   "File or path name",
+   "Decimal notation",
+   "Signed integer",
+   "Unsigned integer",
+   "HTML Entity"
+};
+
+const char *tok_alias[]={
+   "",
+   "lword",
+   "nlword",
+   "word",
+   "email",
+   "url",
+   "host",
+   "sfloat",
+   "version",
+   "part_hword",
+   "nlpart_hword",
+   "lpart_hword",
+   "blank",
+   "tag",
+   "http",
+   "hword",
+   "lhword",
+   "nlhword",
+   "uri",
+   "file",
+   "float",
+   "int",
+   "uint",
+   "entity"
+};
+


diff --git a/contrib/tsearch2/wordparser/deflex.h b/contrib/tsearch2/wordparser/deflex.h

new file mode 100644 (file)

index 0000000..651d1f9


--- /dev/null
+++ b/contrib/tsearch2/wordparser/deflex.h
@@ -0,0 +1,34 @@
+#ifndef __DEFLEX_H__
+#define __DEFLEX_H__
+
+/* rememder !!!! */
+#define LASTNUM        23
+
+#define LATWORD        1
+#define CYRWORD        2
+#define UWORD      3
+#define EMAIL      4
+#define FURL       5
+#define HOST       6
+#define SCIENTIFIC 7
+#define VERSIONNUMBER  8
+#define PARTHYPHENWORD 9
+#define CYRPARTHYPHENWORD  10
+#define LATPARTHYPHENWORD  11
+#define SPACE      12
+#define TAG            13
+#define HTTP       14
+#define HYPHENWORD 15
+#define LATHYPHENWORD  16
+#define CYRHYPHENWORD  17
+#define URI        18
+#define FILEPATH   19
+#define DECIMAL        20
+#define SIGNEDINT  21
+#define UNSIGNEDINT 22
+#define HTMLENTITY 23
+
+extern const char *lex_descr[];
+extern const char *tok_alias[];
+
+#endif


diff --git a/contrib/tsearch2/wordparser/parser.h b/contrib/tsearch2/wordparser/parser.h

new file mode 100644 (file)

index 0000000..55cf005


--- /dev/null
+++ b/contrib/tsearch2/wordparser/parser.h
@@ -0,0 +1,11 @@
+#ifndef __PARSER_H__
+#define __PARSER_H__
+
+char      *token;
+int            tokenlen;
+int            tsearch2_yylex(void);
+void       start_parse_str(char *, int);
+void       start_parse_fh(FILE *, int);
+void       end_parse(void);
+
+#endif


diff --git a/contrib/tsearch2/wordparser/parser.l b/contrib/tsearch2/wordparser/parser.l

new file mode 100644 (file)

index 0000000..49824f5


--- /dev/null
+++ b/contrib/tsearch2/wordparser/parser.l
@@ -0,0 +1,346 @@
+%{
+#include "postgres.h"
+
+#include "deflex.h"
+#include "parser.h"
+#include "common.h"
+
+/* Avoid exit() on fatal scanner errors */
+#define fprintf(file, fmt, msg)  ts_error(ERROR, fmt, msg)
+
+/* postgres allocation function */
+#define free    pfree
+#define malloc  palloc
+#define realloc repalloc
+
+#ifdef strdup
+#undef strdup
+#endif
+#define strdup  pstrdup
+
+char *token = NULL;  /* pointer to token */
+char *s     = NULL;  /* to return WHOLE hyphenated-word */
+
+YY_BUFFER_STATE buf = NULL; /* buffer to parse; it need for parse from string */
+
+int lrlimit = -1;  /* for limiting read from filehandle ( -1 - unlimited read ) */
+int bytestoread = 0;   /* for limiting read from filehandle */
+
+/* redefine macro for read limited length */
+#define YY_INPUT(buf,result,max_size) \
+   if ( yy_current_buffer->yy_is_interactive ) { \
+                int c = '*', n; \
+                for ( n = 0; n < max_size && \
+                             (c = getc( tsearch2_yyin )) != EOF && c != '\n'; ++n ) \
+                        buf[n] = (char) c; \
+                if ( c == '\n' ) \
+                        buf[n++] = (char) c; \
+                if ( c == EOF && ferror( tsearch2_yyin ) ) \
+                        YY_FATAL_ERROR( "input in flex scanner failed" ); \
+                result = n; \
+        }  else { \
+       if ( lrlimit == 0 ) \
+           result=YY_NULL; \
+       else { \
+           if ( lrlimit>0 ) { \
+               bytestoread = ( lrlimit > max_size ) ? max_size : lrlimit; \
+               lrlimit -= bytestoread; \
+           } else \
+               bytestoread = max_size; \
+               if ( ((result = fread( buf, 1, bytestoread, tsearch2_yyin )) == 0) \
+                       && ferror( tsearch2_yyin ) ) \
+                       YY_FATAL_ERROR( "input in flex scanner failed" ); \
+       } \
+   }
+
+%}
+
+%option 8bit
+%option never-interactive
+%option nounput
+%option noyywrap
+
+/* parser's state for parsing hyphenated-word */
+%x DELIM  
+/* parser's state for parsing URL*/
+%x URL  
+%x SERVER  
+
+/* parser's state for parsing TAGS */
+%x INTAG
+%x QINTAG
+%x INCOMMENT
+%x INSCRIPT
+
+/* cyrillic koi8 char */
+CYRALNUM   [0-9\200-\377]
+CYRALPHA   [\200-\377]
+ALPHA      [a-zA-Z\200-\377]
+ALNUM      [0-9a-zA-Z\200-\377]
+
+
+HOSTNAME   ([-_[:alnum:]]+\.)+[[:alpha:]]+
+URI        [-_[:alnum:]/%,\.;=&?#]+
+
+%%
+
+"<"[Ss][Cc][Rr][Ii][Pp][Tt] { BEGIN INSCRIPT; }
+
+"" {
+   BEGIN INITIAL; 
+   *tsearch2_yytext=' '; *(tsearch2_yytext+1) = '\0'; 
+   token = tsearch2_yytext;
+   tokenlen = tsearch2_yyleng;
+   return SPACE;
+}
+
+""   { 
+   BEGIN INITIAL;
+   *tsearch2_yytext=' '; *(tsearch2_yytext+1) = '\0'; 
+   token = tsearch2_yytext;
+   tokenlen = tsearch2_yyleng;
+   return SPACE;
+}
+
+
+"<"[\![:alpha:]]   { BEGIN INTAG; }
+
+"
+
+"\""    { BEGIN QINTAG; }
+
+"\\\"" ;
+
+"\""   { BEGIN INTAG; }
+
+">" { 
+   BEGIN INITIAL;
+   token = tsearch2_yytext;
+   *tsearch2_yytext=' '; 
+   token = tsearch2_yytext;
+   tokenlen = 1;
+   return TAG;
+}
+
+.|\n  ;
+
+\&(quot|amp|nbsp|lt|gt)\;   {
+   token = tsearch2_yytext;
+   tokenlen = tsearch2_yyleng;
+   return HTMLENTITY;
+}
+
+\&\#[0-9][0-9]?[0-9]?\; {
+   token = tsearch2_yytext;
+   tokenlen = tsearch2_yyleng;
+   return HTMLENTITY;
+}
+ 
+[-_\.[:alnum:]]+@{HOSTNAME}  /* Emails */ { 
+   token = tsearch2_yytext; 
+   tokenlen = tsearch2_yyleng;
+   return EMAIL; 
+}
+
+[+-]?[0-9]+(\.[0-9]+)?[eEdD][+-]?[0-9]+  /* float */   { 
+   token = tsearch2_yytext; 
+   tokenlen = tsearch2_yyleng;
+   return SCIENTIFIC; 
+}
+
+[0-9]+\.[0-9]+\.[0-9\.]*[0-9] {
+   token = tsearch2_yytext;
+   tokenlen = tsearch2_yyleng;
+   return VERSIONNUMBER;
+}
+
+[+-]?[0-9]+\.[0-9]+ {
+   token = tsearch2_yytext;
+   tokenlen = tsearch2_yyleng;
+   return DECIMAL;
+}
+
+[+-][0-9]+ { 
+   token = tsearch2_yytext; 
+   tokenlen = tsearch2_yyleng;
+   return SIGNEDINT; 
+}
+
+[0-9]+ { 
+   token = tsearch2_yytext; 
+   tokenlen = tsearch2_yyleng;
+   return UNSIGNEDINT; 
+}
+
+http"://"        { 
+   BEGIN URL; 
+   token = tsearch2_yytext;
+   tokenlen = tsearch2_yyleng;
+   return HTTP;
+}
+
+ftp"://"        { 
+   BEGIN URL; 
+   token = tsearch2_yytext;
+   tokenlen = tsearch2_yyleng;
+   return HTTP;
+}
+
+{HOSTNAME}[/:]{URI} { 
+   BEGIN SERVER;
+   if (s) { free(s); s=NULL; } 
+   s = strdup( tsearch2_yytext ); 
+   tokenlen = tsearch2_yyleng;
+   yyless( 0 ); 
+   token = s;
+   return FURL;
+}
+
+{HOSTNAME} {
+   token = tsearch2_yytext; 
+   tokenlen = tsearch2_yyleng;
+   return HOST;
+}
+
+[/:]{URI}  {
+   token = tsearch2_yytext;
+   tokenlen = tsearch2_yyleng;
+   return URI;
+}
+
+[[:alnum:]\./_-]+"/"[[:alnum:]\./_-]+ {
+   token = tsearch2_yytext;
+   tokenlen = tsearch2_yyleng;
+   return FILEPATH;
+}
+
+({CYRALPHA}+-)+{CYRALPHA}+ /* composite-word */    {
+   BEGIN DELIM;
+   if (s) { free(s); s=NULL; } 
+   s = strdup( tsearch2_yytext );
+   tokenlen = tsearch2_yyleng;
+   yyless( 0 );
+   token = s;
+   return CYRHYPHENWORD;
+}
+
+([[:alpha:]]+-)+[[:alpha:]]+ /* composite-word */  {
+    BEGIN DELIM;
+   if (s) { free(s); s=NULL; } 
+   s = strdup( tsearch2_yytext );
+   tokenlen = tsearch2_yyleng;
+   yyless( 0 );
+   token = s;
+   return LATHYPHENWORD;
+}
+
+({ALNUM}+-)+{ALNUM}+ /* composite-word */  {
+   BEGIN DELIM;
+   if (s) { free(s); s=NULL; } 
+   s = strdup( tsearch2_yytext );
+   tokenlen = tsearch2_yyleng;
+   yyless( 0 );
+   token = s;
+   return HYPHENWORD;
+}
+
+[0-9]+\.[0-9]+\.[0-9\.]*[0-9] {
+   token = tsearch2_yytext;
+   tokenlen = tsearch2_yyleng;
+   return VERSIONNUMBER;
+}
+
+\+?[0-9]+\.[0-9]+ {
+   token = tsearch2_yytext;
+   tokenlen = tsearch2_yyleng;
+   return DECIMAL;
+}
+
+{CYRALPHA}+  /* one word in composite-word */   { 
+   token = tsearch2_yytext; 
+   tokenlen = tsearch2_yyleng;
+   return CYRPARTHYPHENWORD; 
+}
+
+[[:alpha:]]+  /* one word in composite-word */  { 
+   token = tsearch2_yytext; 
+   tokenlen = tsearch2_yyleng;
+   return LATPARTHYPHENWORD; 
+}
+
+{ALNUM}+  /* one word in composite-word */  { 
+   token = tsearch2_yytext; 
+   tokenlen = tsearch2_yyleng;
+   return PARTHYPHENWORD; 
+}
+
+-  { 
+   token = tsearch2_yytext;
+   tokenlen = tsearch2_yyleng;
+   return SPACE;
+}
+
+.|\n /* return in basic state */ {
+   BEGIN INITIAL;
+   yyless( 0 );
+}
+
+{CYRALPHA}+ /* normal word */  { 
+   token = tsearch2_yytext; 
+   tokenlen = tsearch2_yyleng;
+   return CYRWORD; 
+}
+
+[[:alpha:]]+ /* normal word */ { 
+   token = tsearch2_yytext; 
+   tokenlen = tsearch2_yyleng;
+   return LATWORD; 
+}
+
+{ALNUM}+ /* normal word */ { 
+   token = tsearch2_yytext; 
+   tokenlen = tsearch2_yyleng;
+   return UWORD; 
+}
+
+[ \r\n\t]+ {
+   token = tsearch2_yytext;
+   tokenlen = tsearch2_yyleng;
+   return SPACE;
+}
+
+. {
+   token = tsearch2_yytext;
+   tokenlen = tsearch2_yyleng;
+   return SPACE;
+} 
+
+%%
+
+/* clearing after parsing from string */
+void end_parse() {
+   if (s) { free(s); s=NULL; } 
+   tsearch2_yy_delete_buffer( buf );
+   buf = NULL;
+} 
+
+/* start parse from string */
+void start_parse_str(char* str, int limit) {
+   if (buf) end_parse();
+   buf = tsearch2_yy_scan_bytes( str, limit );
+   tsearch2_yy_switch_to_buffer( buf );
+   BEGIN INITIAL;
+}
+
+/* start parse from filehandle */
+void start_parse_fh( FILE* fh, int limit ) {
+   if (buf) end_parse();
+   lrlimit = ( limit ) ? limit : -1;
+   buf = tsearch2_yy_create_buffer( fh, YY_BUF_SIZE );
+   tsearch2_yy_switch_to_buffer( buf );
+   BEGIN INITIAL;
+}
+
+


diff --git a/contrib/tsearch2/wparser.c b/contrib/tsearch2/wparser.c

new file mode 100644 (file)

index 0000000..deff94c


--- /dev/null
+++ b/contrib/tsearch2/wparser.c
@@ -0,0 +1,529 @@
+/* 
+ * interface functions to parser 
+ * Teodor Sigaev 
+ */
+#include 
+#include 
+#include 
+#include 
+
+#include "postgres.h"
+#include "fmgr.h"
+#include "utils/array.h"
+#include "catalog/pg_type.h"
+#include "executor/spi.h"
+#include "funcapi.h"
+
+#include "wparser.h"
+#include "ts_cfg.h"
+#include "snmap.h"
+#include "common.h"
+
+/*********top interface**********/
+
+static void *plan_getparser=NULL;
+static Oid current_parser_id=InvalidOid;
+
+void
+init_prs(Oid id, WParserInfo *prs) {
+   Oid arg[1]={ OIDOID };
+   bool isnull;
+   Datum pars[1]={ ObjectIdGetDatum(id) };
+   int stat;
+
+   memset(prs,0,sizeof(WParserInfo));
+   SPI_connect();
+   if ( !plan_getparser ) {
+       plan_getparser = SPI_saveplan( SPI_prepare( "select prs_start, prs_nexttoken, prs_end, prs_lextype, prs_headline from pg_ts_parser where oid = $1" , 1, arg ) );
+       if ( !plan_getparser ) 
+           ts_error(ERROR, "SPI_prepare() failed");
+   }
+
+   stat = SPI_execp(plan_getparser, pars, " ", 1);
+   if ( stat < 0 )
+       ts_error (ERROR, "SPI_execp return %d", stat);
+   if ( SPI_processed > 0 ) {
+       Oid oid=InvalidOid;
+       oid=DatumGetObjectId( SPI_getbinval(SPI_tuptable->vals[0], SPI_tuptable->tupdesc, 1, &isnull) );
+       fmgr_info_cxt(oid, &(prs->start_info), TopMemoryContext);
+       oid=DatumGetObjectId( SPI_getbinval(SPI_tuptable->vals[0], SPI_tuptable->tupdesc, 2, &isnull) );
+       fmgr_info_cxt(oid, &(prs->getlexeme_info), TopMemoryContext);
+       oid=DatumGetObjectId( SPI_getbinval(SPI_tuptable->vals[0], SPI_tuptable->tupdesc, 3, &isnull) );
+       fmgr_info_cxt(oid, &(prs->end_info), TopMemoryContext);
+       prs->lextype=DatumGetObjectId( SPI_getbinval(SPI_tuptable->vals[0], SPI_tuptable->tupdesc, 4, &isnull) );
+       oid=DatumGetObjectId( SPI_getbinval(SPI_tuptable->vals[0], SPI_tuptable->tupdesc, 5, &isnull) );
+       fmgr_info_cxt(oid, &(prs->headline_info), TopMemoryContext);
+       prs->prs_id=id;
+   } else 
+       ts_error(ERROR, "No parser with id %d", id);
+   SPI_finish();
+}
+
+typedef struct {
+   WParserInfo *last_prs;
+   int     len;
+   int     reallen;
+   WParserInfo *list;
+   SNMap       name2id_map;
+} PrsList;
+
+static PrsList PList = {NULL,0,0,NULL,{0,0,NULL}};
+
+void    
+reset_prs(void) {
+   freeSNMap( &(PList.name2id_map) );
+   if ( PList.list )
+       free(PList.list);
+   memset(&PList,0,sizeof(PrsList));
+}
+
+static int
+compareprs(const void *a, const void *b) {
+   return ((WParserInfo*)a)->prs_id - ((WParserInfo*)b)->prs_id;
+}
+
+WParserInfo *
+findprs(Oid id) {
+   /* last used prs */
+   if ( PList.last_prs && PList.last_prs->prs_id==id )
+       return PList.last_prs;
+
+   /* already used prs */
+   if ( PList.len != 0 ) {
+       WParserInfo key;
+       key.prs_id=id;
+       PList.last_prs = bsearch(&key, PList.list, PList.len, sizeof(WParserInfo), compareprs);
+       if ( PList.last_prs != NULL )
+           return PList.last_prs;
+   }
+
+   /* last chance */
+   if ( PList.len==PList.reallen ) {
+       WParserInfo *tmp;
+       int reallen = ( PList.reallen ) ? 2*PList.reallen : 16;
+       tmp=(WParserInfo*)realloc(PList.list,sizeof(WParserInfo)*reallen);
+       if ( !tmp ) 
+           ts_error(ERROR,"No memory");
+       PList.reallen=reallen;
+       PList.list=tmp;
+   }
+   PList.last_prs=&(PList.list[PList.len]);
+   init_prs(id, PList.last_prs);
+   PList.len++;
+   qsort(PList.list, PList.len, sizeof(WParserInfo), compareprs);
+   return findprs(id); /* qsort changed order!! */;
+}
+
+static void *plan_name2id=NULL;
+
+Oid
+name2id_prs(text *name) {
+   Oid arg[1]={ TEXTOID };
+   bool isnull;
+   Datum pars[1]={ PointerGetDatum(name) };
+   int stat;
+   Oid id=findSNMap_t( &(PList.name2id_map), name );
+   
+   if ( id ) 
+       return id;
+   
+
+   SPI_connect();
+   if ( !plan_name2id ) {
+       plan_name2id = SPI_saveplan( SPI_prepare( "select oid from pg_ts_parser where prs_name = $1" , 1, arg ) );
+       if ( !plan_name2id ) 
+           ts_error(ERROR, "SPI_prepare() failed");
+   }
+
+   stat = SPI_execp(plan_name2id, pars, " ", 1);
+   if ( stat < 0 )
+       ts_error (ERROR, "SPI_execp return %d", stat);
+   if ( SPI_processed > 0 )
+       id=DatumGetObjectId( SPI_getbinval(SPI_tuptable->vals[0], SPI_tuptable->tupdesc, 1, &isnull) );
+   else 
+       ts_error(ERROR, "No parser '%s'", text2char(name));
+   SPI_finish();
+   addSNMap_t( &(PList.name2id_map), name, id );
+   return id;
+}
+
+
+/******sql-level interface******/
+typedef struct {
+   int     cur;
+   LexDescr    *list;
+} TypeStorage;
+
+static void
+setup_firstcall(FuncCallContext  *funcctx, Oid prsid) {
+   TupleDesc            tupdesc;
+   MemoryContext     oldcontext;
+   TypeStorage     *st;
+   WParserInfo *prs = findprs(prsid); 
+
+   oldcontext = MemoryContextSwitchTo(funcctx->multi_call_memory_ctx);
+
+   st=(TypeStorage*)palloc( sizeof(TypeStorage) );
+   st->cur=0;
+   st->list = (LexDescr*)DatumGetPointer(
+       OidFunctionCall1( prs->lextype, PointerGetDatum(prs->prs) )
+   );
+   funcctx->user_fctx = (void*)st;
+   tupdesc = RelationNameGetTupleDesc("tokentype");
+   funcctx->slot = TupleDescGetSlot(tupdesc);
+   funcctx->attinmeta = TupleDescGetAttInMetadata(tupdesc);
+   MemoryContextSwitchTo(oldcontext);
+}
+
+static Datum
+process_call(FuncCallContext  *funcctx) {
+   TypeStorage     *st;
+
+   st=(TypeStorage*)funcctx->user_fctx;
+   if (  st->list && st->list[st->cur].lexid ) {
+       Datum result;
+       char* values[3];
+       char    txtid[16];
+       HeapTuple    tuple;
+
+       values[0]=txtid;
+       sprintf(txtid,"%d",st->list[st->cur].lexid);
+       values[1]=st->list[st->cur].alias;
+       values[2]=st->list[st->cur].descr;
+
+       tuple = BuildTupleFromCStrings(funcctx->attinmeta, values);
+       result = TupleGetDatum(funcctx->slot, tuple);
+
+       pfree(values[1]);
+       pfree(values[2]);
+       st->cur++;
+       return result;
+   } else {
+       if ( st->list ) pfree(st->list);
+       pfree(st);
+   }
+   return (Datum)0;
+}
+
+PG_FUNCTION_INFO_V1(token_type);
+Datum token_type(PG_FUNCTION_ARGS);
+
+Datum
+token_type(PG_FUNCTION_ARGS) {
+   FuncCallContext  *funcctx;
+   Datum result;
+
+   if (SRF_IS_FIRSTCALL()) { 
+       funcctx = SRF_FIRSTCALL_INIT();
+       setup_firstcall(funcctx, PG_GETARG_OID(0) );
+   }
+
+   funcctx = SRF_PERCALL_SETUP();
+
+   if (  (result=process_call(funcctx)) != (Datum)0 )
+       SRF_RETURN_NEXT(funcctx, result);
+   SRF_RETURN_DONE(funcctx);
+}
+
+PG_FUNCTION_INFO_V1(token_type_byname);
+Datum token_type_byname(PG_FUNCTION_ARGS);
+Datum
+token_type_byname(PG_FUNCTION_ARGS) {
+   FuncCallContext  *funcctx;
+   Datum result;
+
+   if (SRF_IS_FIRSTCALL()) {
+       text *name = PG_GETARG_TEXT_P(0); 
+       funcctx = SRF_FIRSTCALL_INIT();
+       setup_firstcall(funcctx, name2id_prs( name ) );
+       PG_FREE_IF_COPY(name,0);
+   }
+
+   funcctx = SRF_PERCALL_SETUP();
+
+   if (  (result=process_call(funcctx)) != (Datum)0 )
+       SRF_RETURN_NEXT(funcctx, result);
+   SRF_RETURN_DONE(funcctx);
+}
+
+PG_FUNCTION_INFO_V1(token_type_current);
+Datum token_type_current(PG_FUNCTION_ARGS);
+Datum
+token_type_current(PG_FUNCTION_ARGS) {
+   FuncCallContext  *funcctx;
+   Datum result;
+
+   if (SRF_IS_FIRSTCALL()) {
+       funcctx = SRF_FIRSTCALL_INIT();
+       if ( current_parser_id==InvalidOid ) 
+           current_parser_id = name2id_prs( char2text("default") );
+       setup_firstcall(funcctx, current_parser_id );
+   }
+
+   funcctx = SRF_PERCALL_SETUP();
+
+   if (  (result=process_call(funcctx)) != (Datum)0 )
+       SRF_RETURN_NEXT(funcctx, result);
+   SRF_RETURN_DONE(funcctx);
+}
+
+
+PG_FUNCTION_INFO_V1(set_curprs);
+Datum set_curprs(PG_FUNCTION_ARGS);
+Datum
+set_curprs(PG_FUNCTION_ARGS) {
+        findprs(PG_GETARG_OID(0));
+        current_parser_id=PG_GETARG_OID(0);
+        PG_RETURN_VOID();
+}
+
+PG_FUNCTION_INFO_V1(set_curprs_byname);
+Datum set_curprs_byname(PG_FUNCTION_ARGS);
+Datum
+set_curprs_byname(PG_FUNCTION_ARGS) {
+        text *name=PG_GETARG_TEXT_P(0);
+    
+        DirectFunctionCall1(
+                set_curprs,
+                ObjectIdGetDatum( name2id_prs(name) )
+        );
+        PG_FREE_IF_COPY(name, 0);
+        PG_RETURN_VOID();
+}
+
+typedef struct {
+   int type;
+   char    *lexem;
+} LexemEntry;
+
+typedef struct {
+   int cur;
+   int len;
+   LexemEntry  *list;
+} PrsStorage;
+   
+
+static void
+prs_setup_firstcall(FuncCallContext  *funcctx, int prsid, text *txt) {
+   TupleDesc            tupdesc;
+   MemoryContext     oldcontext;
+   PrsStorage  *st;
+   WParserInfo *prs = findprs(prsid); 
+   char    *lex=NULL;
+   int     llen=0, type=0; 
+
+   oldcontext = MemoryContextSwitchTo(funcctx->multi_call_memory_ctx);
+
+   st=(PrsStorage*)palloc( sizeof(PrsStorage) );
+   st->cur=0;
+   st->len=16;
+   st->list=(LexemEntry*)palloc( sizeof(LexemEntry)*st->len );
+
+   prs->prs = (void*)DatumGetPointer(
+       FunctionCall2(
+           &(prs->start_info),
+           PointerGetDatum(VARDATA(txt)),
+           Int32GetDatum(VARSIZE(txt)-VARHDRSZ)
+       )
+   );
+
+   while( ( type=DatumGetInt32(FunctionCall3(
+           &(prs->getlexeme_info),
+           PointerGetDatum(prs->prs),
+           PointerGetDatum(&lex),
+           PointerGetDatum(&llen))) ) != 0 ) {
+
+       if ( st->cur>=st->len ) {
+           st->len=2*st->len;
+           st->list=(LexemEntry*)repalloc(st->list, sizeof(LexemEntry)*st->len);
+       }
+       st->list[st->cur].lexem = palloc(llen+1);
+       memcpy( st->list[st->cur].lexem, lex, llen);
+       st->list[st->cur].lexem[llen]='\0';
+       st->list[st->cur].type=type;
+       st->cur++;
+   }
+       
+   FunctionCall1(
+       &(prs->end_info),
+       PointerGetDatum(prs->prs)
+   );
+
+   st->len=st->cur;
+   st->cur=0;
+   
+   funcctx->user_fctx = (void*)st;
+   tupdesc = RelationNameGetTupleDesc("tokenout");
+   funcctx->slot = TupleDescGetSlot(tupdesc);
+   funcctx->attinmeta = TupleDescGetAttInMetadata(tupdesc);
+   MemoryContextSwitchTo(oldcontext);
+}
+
+static Datum
+prs_process_call(FuncCallContext  *funcctx) {
+   PrsStorage  *st;
+
+   st=(PrsStorage*)funcctx->user_fctx;
+   if (  st->cur < st->len ) {
+       Datum result;
+       char* values[2];
+       char    tid[16];
+       HeapTuple    tuple;
+
+       values[0]=tid;
+       sprintf(tid,"%d",st->list[st->cur].type);
+       values[1]=st->list[st->cur].lexem;
+       tuple = BuildTupleFromCStrings(funcctx->attinmeta, values);
+       result = TupleGetDatum(funcctx->slot, tuple);
+
+       pfree(values[1]);
+       st->cur++;
+       return result;
+   } else {
+       if ( st->list ) pfree(st->list);
+       pfree(st);
+   }
+   return (Datum)0;
+}
+
+           
+
+PG_FUNCTION_INFO_V1(parse);
+Datum parse(PG_FUNCTION_ARGS);
+Datum
+parse(PG_FUNCTION_ARGS) {
+   FuncCallContext  *funcctx;
+   Datum result;
+
+   if (SRF_IS_FIRSTCALL()) {
+       text *txt = PG_GETARG_TEXT_P(1); 
+       funcctx = SRF_FIRSTCALL_INIT();
+       prs_setup_firstcall(funcctx, PG_GETARG_OID(0),txt );
+       PG_FREE_IF_COPY(txt,1);
+   }
+
+   funcctx = SRF_PERCALL_SETUP();
+
+   if (  (result=prs_process_call(funcctx)) != (Datum)0 )
+       SRF_RETURN_NEXT(funcctx, result);
+   SRF_RETURN_DONE(funcctx);
+}
+
+PG_FUNCTION_INFO_V1(parse_byname);
+Datum parse_byname(PG_FUNCTION_ARGS);
+Datum
+parse_byname(PG_FUNCTION_ARGS) {
+   FuncCallContext  *funcctx;
+   Datum result;
+
+   if (SRF_IS_FIRSTCALL()) {
+       text *name = PG_GETARG_TEXT_P(0); 
+       text *txt = PG_GETARG_TEXT_P(1); 
+       funcctx = SRF_FIRSTCALL_INIT();
+       prs_setup_firstcall(funcctx, name2id_prs( name ),txt );
+       PG_FREE_IF_COPY(name,0);
+       PG_FREE_IF_COPY(txt,1);
+   }
+
+   funcctx = SRF_PERCALL_SETUP();
+
+   if (  (result=prs_process_call(funcctx)) != (Datum)0 )
+       SRF_RETURN_NEXT(funcctx, result);
+   SRF_RETURN_DONE(funcctx);
+}
+
+
+PG_FUNCTION_INFO_V1(parse_current);
+Datum parse_current(PG_FUNCTION_ARGS);
+Datum
+parse_current(PG_FUNCTION_ARGS) {
+   FuncCallContext  *funcctx;
+   Datum result;
+
+   if (SRF_IS_FIRSTCALL()) {
+       text *txt = PG_GETARG_TEXT_P(0); 
+       funcctx = SRF_FIRSTCALL_INIT();
+       if ( current_parser_id==InvalidOid ) 
+           current_parser_id = name2id_prs( char2text("default") );
+       prs_setup_firstcall(funcctx, current_parser_id,txt );
+       PG_FREE_IF_COPY(txt,0);
+   }
+
+   funcctx = SRF_PERCALL_SETUP();
+
+   if (  (result=prs_process_call(funcctx)) != (Datum)0 )
+       SRF_RETURN_NEXT(funcctx, result);
+   SRF_RETURN_DONE(funcctx);
+}
+
+PG_FUNCTION_INFO_V1(headline);
+Datum headline(PG_FUNCTION_ARGS);
+Datum
+headline(PG_FUNCTION_ARGS) {
+   TSCfgInfo *cfg=findcfg(PG_GETARG_OID(0));
+   text       *in = PG_GETARG_TEXT_P(1);
+   QUERYTYPE  *query = (QUERYTYPE *) DatumGetPointer(PG_DETOAST_DATUM(PG_GETARG_DATUM(2)));
+   text       *opt=( PG_NARGS()>3 && PG_GETARG_POINTER(3) ) ? PG_GETARG_TEXT_P(3) : NULL;
+   HLPRSTEXT   prs;
+   text *out;
+   WParserInfo *prsobj = findprs(cfg->prs_id);
+
+   memset(&prs,0,sizeof(HLPRSTEXT));
+   prs.lenwords = 32;
+   prs.words = (HLWORD *) palloc(sizeof(HLWORD) * prs.lenwords);
+   hlparsetext(cfg, &prs, query, VARDATA(in), VARSIZE(in) - VARHDRSZ);
+
+
+   FunctionCall3(
+       &(prsobj->headline_info),
+       PointerGetDatum(&prs),
+       PointerGetDatum(opt),
+       PointerGetDatum(query)
+   );
+
+   out = genhl(&prs);
+
+   PG_FREE_IF_COPY(in,1);
+   PG_FREE_IF_COPY(query,2);
+   if ( opt ) PG_FREE_IF_COPY(opt,3);
+   pfree(prs.words);
+   pfree(prs.startsel);
+   pfree(prs.stopsel);
+
+   PG_RETURN_POINTER(out);
+}
+
+
+PG_FUNCTION_INFO_V1(headline_byname);
+Datum headline_byname(PG_FUNCTION_ARGS);
+Datum
+headline_byname(PG_FUNCTION_ARGS) {
+   text *cfg=PG_GETARG_TEXT_P(0);
+
+   Datum out=DirectFunctionCall4(
+       headline,
+       ObjectIdGetDatum(name2id_cfg( cfg ) ),
+       PG_GETARG_DATUM(1),
+       PG_GETARG_DATUM(2),
+       ( PG_NARGS()>3 ) ? PG_GETARG_DATUM(3) : PointerGetDatum(NULL)
+   );
+
+   PG_FREE_IF_COPY(cfg,0);
+   PG_RETURN_DATUM(out);   
+}
+
+PG_FUNCTION_INFO_V1(headline_current);
+Datum headline_current(PG_FUNCTION_ARGS);
+Datum
+headline_current(PG_FUNCTION_ARGS) {
+   PG_RETURN_DATUM(DirectFunctionCall4(
+       headline,
+       ObjectIdGetDatum(get_currcfg()),
+       PG_GETARG_DATUM(0),
+       PG_GETARG_DATUM(1),
+       ( PG_NARGS()>2 ) ? PG_GETARG_DATUM(2) : PointerGetDatum(NULL)
+   ));
+}
+
+
+


diff --git a/contrib/tsearch2/wparser.h b/contrib/tsearch2/wparser.h

new file mode 100644 (file)

index 0000000..a8afc56


--- /dev/null
+++ b/contrib/tsearch2/wparser.h
@@ -0,0 +1,28 @@
+#ifndef __WPARSER_H__
+#define __WPARSER_H__
+#include "postgres.h"
+#include "fmgr.h"
+
+typedef struct {
+   Oid prs_id;
+   FmgrInfo start_info;
+   FmgrInfo getlexeme_info;
+   FmgrInfo end_info;
+   FmgrInfo headline_info;
+   Oid lextype;
+   void *prs;
+} WParserInfo;
+
+void init_prs(Oid id, WParserInfo *prs);
+WParserInfo* findprs(Oid id);
+Oid name2id_prs(text *name);
+void   reset_prs(void);
+
+
+typedef struct {
+   int lexid;
+   char    *alias;
+   char    *descr;
+} LexDescr;
+
+#endif


diff --git a/contrib/tsearch2/wparser_def.c b/contrib/tsearch2/wparser_def.c

new file mode 100644 (file)

index 0000000..eec8b03


--- /dev/null
+++ b/contrib/tsearch2/wparser_def.c
@@ -0,0 +1,291 @@
+/* 
+ * default word parser 
+ * Teodor Sigaev 
+ */
+#include 
+#include 
+#include 
+
+#include "postgres.h"
+#include "utils/builtins.h"
+
+#include "dict.h"
+#include "wparser.h"
+#include "common.h"
+#include "ts_cfg.h"
+#include "wordparser/parser.h"
+#include "wordparser/deflex.h"
+
+PG_FUNCTION_INFO_V1(prsd_lextype);
+Datum prsd_lextype(PG_FUNCTION_ARGS);
+
+Datum 
+prsd_lextype(PG_FUNCTION_ARGS) {
+   LexDescr *descr=(LexDescr*)palloc(sizeof(LexDescr)*(LASTNUM+1));
+   int i;
+
+   for(i=1;i<=LASTNUM;i++) {
+       descr[i-1].lexid = i;
+       descr[i-1].alias = pstrdup(tok_alias[i]);
+       descr[i-1].descr = pstrdup(lex_descr[i]);
+   }
+   
+   descr[LASTNUM].lexid=0;
+       
+   PG_RETURN_POINTER(descr);
+}
+
+PG_FUNCTION_INFO_V1(prsd_start);
+Datum prsd_start(PG_FUNCTION_ARGS);
+Datum 
+prsd_start(PG_FUNCTION_ARGS) {
+   start_parse_str( (char*)PG_GETARG_POINTER(0), PG_GETARG_INT32(1) );
+   PG_RETURN_POINTER(NULL);
+}
+
+PG_FUNCTION_INFO_V1(prsd_getlexeme);
+Datum prsd_getlexeme(PG_FUNCTION_ARGS);
+Datum 
+prsd_getlexeme(PG_FUNCTION_ARGS) {
+   /* ParserState *p=(ParserState*)PG_GETARG_POINTER(0); */
+   char **t=(char**)PG_GETARG_POINTER(1); 
+   int *tlen=(int*)PG_GETARG_POINTER(2);
+   int  type=tsearch2_yylex();
+
+   *t = token;
+   *tlen = tokenlen;
+   PG_RETURN_INT32(type);
+}
+
+PG_FUNCTION_INFO_V1(prsd_end);
+Datum prsd_end(PG_FUNCTION_ARGS);
+Datum 
+prsd_end(PG_FUNCTION_ARGS) {
+   /* ParserState *p=(ParserState*)PG_GETARG_POINTER(0); */
+   end_parse();
+   PG_RETURN_VOID();
+}
+
+#define LEAVETOKEN(x)  ( (x)==12 )
+#define COMPLEXTOKEN(x)    ( (x)==5 || (x)==15 || (x)==16 || (x)==17 )
+#define ENDPUNCTOKEN(x)    ( (x)==12 )
+
+
+#define IDIGNORE(x) ( (x)==13 || (x)==14 || (x)==12 || (x)==23 )
+#define HLIDIGNORE(x) ( (x)==5 || (x)==13 || (x)==15 || (x)==16 || (x)==17 )
+#define NONWORDTOKEN(x)    ( (x)==12 || HLIDIGNORE(x) )
+#define NOENDTOKEN(x)  ( NONWORDTOKEN(x) || (x)==7 || (x)==8 || (x)==20 || (x)==21 || (x)==22 || IDIGNORE(x) )
+
+typedef struct {
+   HLWORD  *words;
+   int len;
+} hlCheck;
+
+static bool
+checkcondition_HL(void *checkval, ITEM *val) {
+   int i;
+   for(i=0;i<((hlCheck*)checkval)->len;i++) {
+       if ( ((hlCheck*)checkval)->words[i].item==val )
+           return true;
+   }
+   return false;
+}
+
+
+static bool
+hlCover(HLPRSTEXT *prs, QUERYTYPE *query, int *p, int *q) {
+   int i,j;
+   ITEM    *item=GETQUERY(query);
+   int pos=*p;
+   *q=0;
+   *p=0x7fffffff;
+
+   for(j=0;jsize;j++) {
+       if ( item->type != VAL ) {
+           item++;
+           continue;
+       }
+       for(i=pos;icurwords;i++) {
+           if ( prs->words[i].item == item ) {
+               if ( i>*q) 
+                   *q = i;
+               break;
+           }
+       }
+       item++;
+   }
+
+   if ( *q==0 )
+       return false;
+
+   item=GETQUERY(query);
+   for(j=0;jsize;j++) {
+       if ( item->type != VAL ) {
+           item++;
+           continue;
+       }
+       for(i=*q;i>=pos;i--) {
+           if ( prs->words[i].item == item ) {
+               if ( i<*p )
+                   *p=i;
+               break;
+           }
+       }
+       item++;
+   }   
+
+   if ( *p<=*q ) {
+       hlCheck ch={ &(prs->words[*p]), *q-*p+1 };
+       if ( TS_execute(GETQUERY(query), &ch, false, checkcondition_HL) ) { 
+           return true;
+       } else {
+           (*p)++;
+           return hlCover(prs,query,p,q);
+       }
+   }
+
+   return false;
+}
+
+PG_FUNCTION_INFO_V1(prsd_headline);
+Datum prsd_headline(PG_FUNCTION_ARGS);
+Datum 
+prsd_headline(PG_FUNCTION_ARGS) {
+   HLPRSTEXT   *prs=(HLPRSTEXT*)PG_GETARG_POINTER(0);
+   text    *opt=(text*)PG_GETARG_POINTER(1); /* can't be toasted */
+   QUERYTYPE   *query=(QUERYTYPE*)PG_GETARG_POINTER(2); /* can't be toasted */
+   /* from opt + start and and tag */
+   int min_words=15;   
+   int max_words=35;   
+   int shortword=3;    
+
+   int p=0,q=0;
+   int bestb=-1,beste=-1;
+   int bestlen=-1;
+   int pose=0, poslen, curlen;
+
+   int i;
+
+   /*config*/
+   prs->startsel=NULL;
+   prs->stopsel=NULL;
+   if ( opt ) {
+       Map *map,*mptr;
+       
+       parse_cfgdict(opt,&map);
+       mptr=map;
+
+       while(mptr && mptr->key) {
+           if ( strcasecmp(mptr->key,"MaxWords")==0 )
+               max_words=pg_atoi(mptr->value,4,1);
+           else if ( strcasecmp(mptr->key,"MinWords")==0 )
+               min_words=pg_atoi(mptr->value,4,1);
+           else if ( strcasecmp(mptr->key,"ShortWord")==0 )
+               shortword=pg_atoi(mptr->value,4,1);
+           else if ( strcasecmp(mptr->key,"StartSel")==0 )
+               prs->startsel=pstrdup(mptr->value);
+           else if ( strcasecmp(mptr->key,"StopSel")==0 )
+               prs->stopsel=pstrdup(mptr->value);
+               
+           pfree(mptr->key);
+           pfree(mptr->value);
+
+           mptr++;
+       }
+       pfree(map);
+
+       if ( min_words >= max_words )
+           elog(ERROR,"Must be MinWords < MaxWords");
+       if ( min_words<=0 )
+           elog(ERROR,"Must be MinWords > 0");
+       if ( shortword<0 )
+           elog(ERROR,"Must be ShortWord >= 0");
+   }
+
+   while( hlCover(prs,query,&p,&q) ) {
+       /* find cover len in words */
+       curlen=0;
+       poslen=0;
+       for(i=p;i<=q && curlen < max_words ; i++) {
+           if ( !NONWORDTOKEN(prs->words[i].type) ) 
+               curlen++;
+           if ( prs->words[i].item && !prs->words[i].repeated )
+               poslen++; 
+           pose=i;
+       }
+
+       if ( poslenwords[beste].type) || prs->words[beste].len <= shortword) ) { 
+           /* best already finded, so try one more cover */
+           p++;
+           continue;
+       }
+
+       if ( curlen < max_words ) { /* find good end */
+           for(i=i-1 ;icurwords && curlen
+               if ( i!=q ) {
+                   if ( !NONWORDTOKEN(prs->words[i].type) ) 
+                       curlen++;
+                   if ( prs->words[i].item && !prs->words[i].repeated )
+                       poslen++;
+               }
+               pose=i;
+               if ( NOENDTOKEN(prs->words[i].type) || prs->words[i].len <= shortword ) 
+                   continue;
+               if ( curlen>=min_words )    
+                   break;
+           }
+       } else { /* shorter cover :((( */
+           for(;curlen>min_words;i--) {
+               if ( !NONWORDTOKEN(prs->words[i].type) ) 
+                   curlen--;
+               if ( prs->words[i].item && !prs->words[i].repeated )
+                   poslen--;
+               pose=i;
+               if ( NOENDTOKEN(prs->words[i].type) || prs->words[i].len <= shortword ) 
+                   continue;
+               break;
+           }
+       }
+
+       if ( bestlen <0 || (poslen>bestlen && !(NOENDTOKEN(prs->words[pose].type) || prs->words[pose].len <= shortword)) || 
+               ( bestlen>=0 && !(NOENDTOKEN(prs->words[pose].type)  || prs->words[pose].len <= shortword) && 
+                   (NOENDTOKEN(prs->words[beste].type) || prs->words[beste].len <= shortword) ) ) {
+           bestb=p; beste=pose;
+           bestlen=poslen;
+       } 
+
+       p++;
+   }
+
+   if ( bestlen<0 ) {
+       curlen=0;
+       poslen=0;
+       for(i=0;icurwords && curlen
+           if ( !NONWORDTOKEN(prs->words[i].type) ) 
+               curlen++;
+           pose=i;
+       }
+       bestb=0; beste=pose;
+   }
+
+   for(i=bestb;i<=beste;i++) {
+       if ( prs->words[i].item )
+           prs->words[i].selected=1;
+       if ( prs->words[i].repeated )
+           prs->words[i].skip=1;
+       if ( HLIDIGNORE(prs->words[i].type) )
+           prs->words[i].replace=1;
+
+       prs->words[i].in=1;
+   }
+
+   if (!prs->startsel)
+       prs->startsel=pstrdup("");

+   if (!prs->stopsel)
+       prs->stopsel=pstrdup("");
+        prs->startsellen=strlen(prs->startsel);
+   prs->stopsellen=strlen(prs->stopsel);
+
+   PG_RETURN_POINTER(prs);
+}
+




This is the main PostgreSQL git repository.
RSS
Atom
+               a;\
+       }
+#define LOOPBIT(a) \
+               for(i=0;i
+                               a;\
+               }
+
+#define GETBYTE(x,i) ( *( (BITVECP)(x) + (int)( (i) / BITBYTE ) ) )
+#define GETBITBYTE(x,i) ( ((char)(x)) >> i & 0x01 )
+#define CLRBIT(x,i)   GETBYTE(x,i) &= ~( 0x01 << ( (i) % BITBYTE ) )
+#define SETBIT(x,i)   GETBYTE(x,i) |=  ( 0x01 << ( (i) % BITBYTE ) )
+#define GETBIT(x,i) ( (GETBYTE(x,i) >> ( (i) % BITBYTE )) & 0x01 )
+
+#define abs(a)         ((a) <  (0) ? -(a) : (a))
+#define min(a,b)           ((a) <  (b) ? (a) : (b))
+#define HASHVAL(val) (((unsigned int)(val)) % SIGLENBIT)
+#define HASH(sign, val) SETBIT((sign), HASHVAL(val))
+
+
+/*
+ * type of index key
+ */
+typedef struct
+{
+   int4        len;
+   int4        flag;
+   char        data[1];
+}  GISTTYPE;
+
+#define ARRKEY     0x01
+#define SIGNKEY        0x02
+#define ALLISTRUE  0x04
+
+#define ISARRKEY(x) ( ((GISTTYPE*)x)->flag & ARRKEY )
+#define ISSIGNKEY(x)   ( ((GISTTYPE*)x)->flag & SIGNKEY )
+#define ISALLTRUE(x)   ( ((GISTTYPE*)x)->flag & ALLISTRUE )
+
+#define GTHDRSIZE  ( sizeof(int4)*2  )
+#define CALCGTSIZE(flag, len) ( GTHDRSIZE + ( ( (flag) & ARRKEY ) ? ((len)*sizeof(int4)) : (((flag) & ALLISTRUE) ? 0 : SIGLEN) ) )
+
+#define GETSIGN(x) ( (BITVECP)( (char*)x+GTHDRSIZE ) )
+#define GETARR(x)  ( (int4*)( (char*)x+GTHDRSIZE ) )
+#define ARRNELEM(x) ( ( ((GISTTYPE*)x)->len - GTHDRSIZE )/sizeof(int4) )
+
+#endif
+                               a;\
+               }
+
+#define GETBYTE(x,i) ( *( (BITVECP)(x) + (int)( (i) / BITBYTE ) ) )
+#define GETBITBYTE(x,i) ( ((char)(x)) >> i & 0x01 )
+#define CLRBIT(x,i)   GETBYTE(x,i) &= ~( 0x01 << ( (i) % BITBYTE ) )
+#define SETBIT(x,i)   GETBYTE(x,i) |=  ( 0x01 << ( (i) % BITBYTE ) )
+#define GETBIT(x,i) ( (GETBYTE(x,i) >> ( (i) % BITBYTE )) & 0x01 )
+
+#define abs(a)         ((a) <  (0) ? -(a) : (a))
+#define min(a,b)           ((a) <  (b) ? (a) : (b))
+#define HASHVAL(val) (((unsigned int)(val)) % SIGLENBIT)
+#define HASH(sign, val) SETBIT((sign), HASHVAL(val))
+
+
+/*
+ * type of index key
+ */
+typedef struct
+{
+   int4        len;
+   int4        flag;
+   char        data[1];
+}  GISTTYPE;
+
+#define ARRKEY     0x01
+#define SIGNKEY        0x02
+#define ALLISTRUE  0x04
+
+#define ISARRKEY(x) ( ((GISTTYPE*)x)->flag & ARRKEY )
+#define ISSIGNKEY(x)   ( ((GISTTYPE*)x)->flag & SIGNKEY )
+#define ISALLTRUE(x)   ( ((GISTTYPE*)x)->flag & ALLISTRUE )
+
+#define GTHDRSIZE  ( sizeof(int4)*2  )
+#define CALCGTSIZE(flag, len) ( GTHDRSIZE + ( ( (flag) & ARRKEY ) ? ((len)*sizeof(int4)) : (((flag) & ALLISTRUE) ? 0 : SIGLEN) ) )
+
+#define GETSIGN(x) ( (BITVECP)( (char*)x+GTHDRSIZE ) )
+#define GETARR(x)  ( (int4*)( (char*)x+GTHDRSIZE ) )
+#define ARRNELEM(x) ( ( ((GISTTYPE*)x)->len - GTHDRSIZE )/sizeof(int4) )
+
+#endif
diff --git a/contrib/tsearch2/ispell/spell.c b/contrib/tsearch2/ispell/spell.c

new file mode 100644 (file)

index 0000000..3cf2cc8
--- /dev/null
+++ b/contrib/tsearch2/ispell/spell.c
@@ -0,0 +1,520 @@
+#include 
+#include 
+#include 
+#include 
+
+#include "postgres.h"
+
+#include "spell.h"
+
+#define MAXNORMLEN 56
+
+#define STRNCASECMP(x,y)        (strncasecmp(x,y,strlen(y)))
+
+static int cmpspell(const void *s1,const void *s2){
+   return(strcmp(((const SPELL*)s1)->word,((const SPELL*)s2)->word));
+}
+
+static void 
+strlower( char * str ) {
+   unsigned char *ptr = (unsigned char *)str;
+   while ( *ptr ) {
+       *ptr = tolower( *ptr );
+       ptr++;
+   }
+}
+
+/* backward string compaire for suffix tree operations */
+static int 
+strbcmp(const char *s1, const char *s2) { 
+   int l1 = strlen(s1)-1, l2 = strlen(s2)-1;
+   while (l1 >= 0 && l2 >= 0) {
+       if (s1[l1] < s2[l2]) return -1;
+       if (s1[l1] > s2[l2]) return 1;
+       l1--; l2--;
+   }
+   if (l1 < l2) return -1;
+   if (l1 > l2) return 1;
+
+   return 0;
+}
+static int 
+strbncmp(const char *s1, const char *s2, size_t count) { 
+   int l1 = strlen(s1) - 1, l2 = strlen(s2) - 1, l = count;
+   while (l1 >= 0 && l2 >= 0 && l > 0) {
+       if (s1[l1] < s2[l2]) return -1;
+       if (s1[l1] > s2[l2]) return 1;
+       l1--;
+       l2--;
+       l--;
+   }
+   if (l == 0) return 0;
+   if (l1 < l2) return -1;
+   if (l1 > l2) return 1;
+   return 0;
+}
+
+static int 
+cmpaffix(const void *s1,const void *s2){
+   if (((const AFFIX*)s1)->type < ((const AFFIX*)s2)->type) return -1;
+   if (((const AFFIX*)s1)->type > ((const AFFIX*)s2)->type) return 1;
+   if (((const AFFIX*)s1)->type == 'p')
+       return(strcmp(((const AFFIX*)s1)->repl,((const AFFIX*)s2)->repl));
+   else 
+       return(strbcmp(((const AFFIX*)s1)->repl,((const AFFIX*)s2)->repl));
+}
+
+int 
+AddSpell(IspellDict * Conf,const char * word,const char *flag){
+   if(Conf->nspell>=Conf->mspell){
+       if(Conf->mspell){
+           Conf->mspell+=1024*20;
+           Conf->Spell=(SPELL *)realloc(Conf->Spell,Conf->mspell*sizeof(SPELL));
+       }else{
+           Conf->mspell=1024*20;
+           Conf->Spell=(SPELL *)malloc(Conf->mspell*sizeof(SPELL));
+       }
+       if ( Conf->Spell == NULL )
+           elog(ERROR,"No memory for AddSpell"); 
+   }
+   Conf->Spell[Conf->nspell].word=strdup(word);
+   if ( !Conf->Spell[Conf->nspell].word ) 
+       elog(ERROR,"No memory for AddSpell");
+   strncpy(Conf->Spell[Conf->nspell].flag,flag,10);
+   Conf->nspell++;
+   return(0);
+}
+
+
+int 
+ImportDictionary(IspellDict * Conf,const char *filename){
+   unsigned char str[BUFSIZ];  
+   FILE *dict;
+
+   if(!(dict=fopen(filename,"r")))return(1);
+   while(fgets(str,sizeof(str),dict)){
+       unsigned char *s;
+       const unsigned char *flag;
+
+           flag = NULL;
+       if((s=strchr(str,'/'))){
+           *s=0;
+           s++;flag=s;
+           while(*s){
+               if (((*s>='A')&&(*s<='Z'))||((*s>='a')&&(*s<='z')))
+                   s++;
+               else {
+                   *s=0;
+                   break;
+               }
+           }
+       }else{
+           flag="";
+       }
+       strlower(str);
+       /* Dont load words if first letter is not required */
+       /* It allows to optimize loading at  search time   */
+       s=str;
+       while(*s){
+           if(*s=='\r')*s=0;
+           if(*s=='\n')*s=0;
+           s++;
+       }
+       AddSpell(Conf,str,flag);
+   }
+   fclose(dict);
+   return(0);
+}
+
+
+static SPELL * 
+FindWord(IspellDict * Conf, const char *word, int affixflag) {
+   int l,c,r,resc,resl,resr, i;
+
+   i = (int)(*word) & 255;
+   l = Conf->SpellTree.Left[i];
+   r = Conf->SpellTree.Right[i];
+   if (l == -1) return (NULL);
+   while(l<=r){
+       c = (l + r) >> 1;
+       resc = strcmp(Conf->Spell[c].word, word);
+       if( (resc == 0) && 
+           ((affixflag == 0) || (strchr(Conf->Spell[c].flag, affixflag) != NULL)) ) {
+           return(&Conf->Spell[c]);
+       }
+       resl = strcmp(Conf->Spell[l].word, word);
+       if( (resl == 0) && 
+           ((affixflag == 0) || (strchr(Conf->Spell[l].flag, affixflag) != NULL)) ) {
+           return(&Conf->Spell[l]);
+       }
+       resr = strcmp(Conf->Spell[r].word, word);
+       if( (resr == 0) && 
+           ((affixflag == 0) || (strchr(Conf->Spell[r].flag, affixflag) != NULL)) ) {
+           return(&Conf->Spell[r]);
+       }
+       if(resc < 0){
+           l = c + 1;
+           r--;
+       } else if(resc > 0){
+           r = c - 1;
+           l++;
+       } else {
+           l++;
+           r--;
+       }
+   }
+   return(NULL);
+}
+
+int 
+AddAffix(IspellDict * Conf,int flag,const char *mask,const char *find,const char *repl,int type) {
+   if(Conf->naffixes>=Conf->maffixes){
+       if(Conf->maffixes){
+           Conf->maffixes+=16;
+           Conf->Affix = (AFFIX*)realloc((void*)Conf->Affix,Conf->maffixes*sizeof(AFFIX));
+       }else{
+           Conf->maffixes=16;
+           Conf->Affix = (AFFIX*)malloc(Conf->maffixes * sizeof(AFFIX));
+       }
+       if ( Conf->Affix == NULL ) 
+           elog(ERROR,"No memory for AddAffix");
+   }
+   if (type=='s') {
+       sprintf(Conf->Affix[Conf->naffixes].mask,"%s$",mask);
+   } else {
+       sprintf(Conf->Affix[Conf->naffixes].mask,"^%s",mask);
+   }
+   Conf->Affix[Conf->naffixes].compile = 1;
+   Conf->Affix[Conf->naffixes].flag=flag;
+   Conf->Affix[Conf->naffixes].type=type;
+   
+   strcpy(Conf->Affix[Conf->naffixes].find,find);
+   strcpy(Conf->Affix[Conf->naffixes].repl,repl);
+   Conf->Affix[Conf->naffixes].replen=strlen(repl);
+   Conf->naffixes++;
+   return(0);
+}
+
+static char * 
+remove_spaces(char *dist,char *src){
+char *d,*s;
+   d=dist;
+   s=src;
+   while(*s){
+       if(*s!=' '&&*s!='-'&&*s!='\t'){
+           *d=*s;
+           d++;
+       }
+       s++;
+   }
+   *d=0;
+   return(dist);
+}
+
+
+int 
+ImportAffixes(IspellDict * Conf,const char *filename){
+   unsigned char str[BUFSIZ];
+   unsigned char flag=0;
+   unsigned char mask[BUFSIZ]="";
+   unsigned char find[BUFSIZ]="";
+   unsigned char repl[BUFSIZ]="";
+   unsigned char *s;
+   int i;
+   int suffixes=0;
+   int prefixes=0;
+   FILE *affix;
+
+   if(!(affix=fopen(filename,"r")))
+       return(1);
+
+   while(fgets(str,sizeof(str),affix)){
+       if(!STRNCASECMP(str,"suffixes")){
+           suffixes=1;
+           prefixes=0;
+           continue;
+       }
+       if(!STRNCASECMP(str,"prefixes")){
+           suffixes=0;
+           prefixes=1;
+           continue;
+       }
+       if(!STRNCASECMP(str,"flag ")){
+           s=str+5;
+           while(strchr("* ",*s))
+               s++;
+           flag=*s;
+           continue;
+       }
+       if((!suffixes)&&(!prefixes))continue;
+       if((s=strchr(str,'#')))*s=0;
+       if(!*str)continue;
+       strlower(str);
+       strcpy(mask,"");
+       strcpy(find,"");
+       strcpy(repl,"");
+       i=sscanf(str,"%[^>\n]>%[^,\n],%[^\n]",mask,find,repl);
+       remove_spaces(str,repl);strcpy(repl,str);
+       remove_spaces(str,find);strcpy(find,str);
+       remove_spaces(str,mask);strcpy(mask,str);
+       switch(i){
+           case 3:
+               break;
+           case 2:
+               if(*find != '\0'){
+                   strcpy(repl,find);
+                   strcpy(find,"");
+               }
+               break;
+           default:
+               continue;
+       }
+       
+       AddAffix(Conf,(int)flag,mask,find,repl,suffixes?'s':'p');
+       
+   }
+   fclose(affix);
+       
+   return(0);
+}
+
+void 
+SortDictionary(IspellDict * Conf){
+  int  CurLet = -1, Let;size_t i;
+
+        qsort((void*)Conf->Spell,Conf->nspell,sizeof(SPELL),cmpspell);
+
+   for(i = 0; i < 256 ; i++ )
+       Conf->SpellTree.Left[i] = -1;
+
+   for(i = 0; i < Conf->nspell; i++) {
+     Let = (int)(*(Conf->Spell[i].word)) & 255;
+     if (CurLet != Let) {
+       Conf->SpellTree.Left[Let] = i;
+       CurLet = Let;
+     }
+     Conf->SpellTree.Right[Let] = i;
+   }
+}
+
+void 
+SortAffixes(IspellDict * Conf) {
+  int   CurLetP = -1, CurLetS = -1, Let;
+  AFFIX *Affix; size_t i;
+  
+  if (Conf->naffixes > 1)
+    qsort((void*)Conf->Affix,Conf->naffixes,sizeof(AFFIX),cmpaffix);
+  for(i = 0; i < 256; i++) {
+      Conf->PrefixTree.Left[i] = Conf->PrefixTree.Right[i] = -1;
+      Conf->SuffixTree.Left[i] = Conf->SuffixTree.Right[i] = -1;
+  }
+
+  for(i = 0; i < Conf->naffixes; i++) {
+    Affix = &(((AFFIX*)Conf->Affix)[i]);
+    if(Affix->type == 'p') {
+      Let = (int)(*(Affix->repl)) & 255;
+      if (CurLetP != Let) {
+   Conf->PrefixTree.Left[Let] = i;
+   CurLetP = Let;
+      }
+      Conf->PrefixTree.Right[Let] = i;
+    } else {
+      Let = (Affix->replen) ? (int)(Affix->repl[Affix->replen-1]) & 255 : 0;
+      if (CurLetS != Let) {
+   Conf->SuffixTree.Left[Let] = i;
+   CurLetS = Let;
+      }
+      Conf->SuffixTree.Right[Let] = i;
+    }
+  }
+}
+
+static char * 
+CheckSuffix(const char *word, size_t len, AFFIX *Affix, int *res, IspellDict *Conf) {
+  regmatch_t subs[2]; /* workaround for apache&linux */
+  char newword[2*MAXNORMLEN] = "";
+  int err;
+  
+  *res = strbncmp(word, Affix->repl, Affix->replen);
+  if (*res < 0) {
+    return NULL;
+  }
+  if (*res > 0) {
+    return NULL;
+  }
+  strcpy(newword, word);
+  strcpy(newword+len-Affix->replen, Affix->find);
+
+  if (Affix->compile) {
+    err = regcomp(&(Affix->reg),Affix->mask,REG_EXTENDED|REG_ICASE|REG_NOSUB);
+    if(err){
+      /*regerror(err, &(Affix->reg), regerrstr, ERRSTRSIZE);*/
+      regfree(&(Affix->reg));
+      return(NULL);
+    }
+    Affix->compile = 0;
+  }
+  if(!(err=regexec(&(Affix->reg),newword,1,subs,0))){
+    if(FindWord(Conf, newword, Affix->flag))
+   return pstrdup(newword);    
+  }
+  return NULL;
+}
+
+#define NS 1
+#define MAX_NORM 512
+static int 
+CheckPrefix(const char *word, size_t len, AFFIX *Affix, IspellDict *Conf, int pi,
+       char **forms, char ***cur ) {
+  regmatch_t subs[NS*2];
+  char newword[2*MAXNORMLEN] = "";
+  int err, ls, res, lres;
+  size_t newlen;
+  AFFIX *CAffix = Conf->Affix;
+  
+  res = strncmp(word, Affix->repl, Affix->replen);
+  if (res != 0) {
+    return res;
+  }
+  strcpy(newword, Affix->find);
+  strcat(newword, word+Affix->replen);
+
+  if (Affix->compile) {
+    err = regcomp(&(Affix->reg),Affix->mask,REG_EXTENDED|REG_ICASE|REG_NOSUB);
+    if(err){
+      /*regerror(err, &(Affix->reg), regerrstr, ERRSTRSIZE);*/
+      regfree(&(Affix->reg));
+      return (0);
+    }
+    Affix->compile = 0;
+  }
+  if(!(err=regexec(&(Affix->reg),newword,1,subs,0))){
+    SPELL * curspell;
+
+    if((curspell=FindWord(Conf, newword, Affix->flag))){
+      if ((*cur - forms) < (MAX_NORM-1)) {
+   **cur =  pstrdup(newword);
+   (*cur)++; **cur = NULL;
+      }
+    } 
+    newlen = strlen(newword);
+    ls = Conf->SuffixTree.Left[pi];
+      if ( ls>=0 && ((*cur - forms) < (MAX_NORM-1)) ) {
+   **cur = CheckSuffix(newword, newlen, &CAffix[ls], &lres, Conf);
+   if (**cur) {
+     (*cur)++; **cur = NULL;
+   }
+      }
+  }
+  return 0;
+}
+
+
+char ** 
+NormalizeWord(IspellDict * Conf,char *word){
+/*regmatch_t subs[NS];*/
+size_t len;
+char ** forms;
+char **cur;
+AFFIX * Affix;
+int ri, pi, ipi, lp, rp, cp, ls, rs;
+int lres, rres, cres = 0;
+  SPELL *spell;
+
+   len=strlen(word);
+   if (len > MAXNORMLEN)
+       return(NULL);
+
+   strlower(word);
+
+   forms=(char **) palloc(MAX_NORM*sizeof(char **));
+   cur=forms;*cur=NULL;
+
+   ri = (int)(*word) & 255;
+   pi = (int)(word[strlen(word)-1]) & 255;
+   Affix=(AFFIX*)Conf->Affix;
+
+   /* Check that the word itself is normal form */
+   if((spell = FindWord(Conf, word, 0))){
+       *cur=pstrdup(word);
+       cur++;*cur=NULL;
+   }
+
+   /* Find all other NORMAL forms of the 'word' */
+
+   for (ipi = 0; ipi <= pi; ipi += pi) {
+
+       /* check prefix */
+       lp = Conf->PrefixTree.Left[ri];
+       rp = Conf->PrefixTree.Right[ri];
+       while (lp >= 0 && lp <= rp) {
+         cp = (lp + rp) >> 1;
+         cres = 0;
+         if ((cur - forms) < (MAX_NORM-1)) {
+       cres = CheckPrefix(word, len, &Affix[cp], Conf, ipi, forms, &cur);
+         }
+         if ((lp < cp) && ((cur - forms) < (MAX_NORM-1)) ) {
+       lres = CheckPrefix(word, len, &Affix[lp], Conf, ipi, forms, &cur);
+         }
+         if ( (rp > cp) && ((cur - forms) < (MAX_NORM-1)) ) {
+       rres = CheckPrefix(word, len, &Affix[rp], Conf, ipi, forms, &cur);
+         }
+         if (cres < 0) {
+       rp = cp - 1;
+       lp++;
+         } else if (cres > 0) {
+       lp = cp + 1;
+       rp--;
+         } else {
+       lp++;
+       rp--;
+         }
+       }
+
+       /* check suffix */
+       ls = Conf->SuffixTree.Left[ipi];
+       rs = Conf->SuffixTree.Right[ipi];
+       while (ls >= 0 && ls <= rs) {
+         if (  ((cur - forms) < (MAX_NORM-1)) ) {
+       *cur = CheckSuffix(word, len, &Affix[ls], &lres, Conf);
+       if (*cur) {
+         cur++; *cur = NULL;
+       }
+         }
+         if ( (rs > ls) && ((cur - forms) < (MAX_NORM-1)) ) {
+       *cur = CheckSuffix(word, len, &Affix[rs], &rres, Conf);
+       if (*cur) {
+         cur++; *cur = NULL;
+       }
+         }
+         ls++;
+         rs--;
+       } /* end while */
+     
+   } /* for ipi */
+
+   if(cur==forms){
+       pfree(forms);
+       return(NULL);
+   }
+   return(forms);
+}
+
+void 
+FreeIspell (IspellDict *Conf) {
+  int i;
+  AFFIX *Affix = (AFFIX *)Conf->Affix;
+
+  for (i = 0; i < Conf->naffixes; i++) {
+    if (Affix[i].compile == 0) {
+      regfree(&(Affix[i].reg));
+    }
+  }
+  for (i = 0; i < Conf->naffixes; i++) {
+   free( Conf->Spell[i].word );
+  }
+  free(Conf->Affix);
+  free(Conf->Spell);
+  memset( (void*)Conf, 0, sizeof(IspellDict) );
+  return;
+}
diff --git a/contrib/tsearch2/ispell/spell.h b/contrib/tsearch2/ispell/spell.h

new file mode 100644 (file)

index 0000000..3034ca6
--- /dev/null
+++ b/contrib/tsearch2/ispell/spell.h
@@ -0,0 +1,51 @@
+#ifndef __SPELL_H__
+#define __SPELL_H__
+
+#include 
+#include 
+
+typedef struct spell_struct {
+        char * word; 
+        char flag[10];
+} SPELL;
+
+typedef struct aff_struct {   
+        char flag;
+        char type;
+        char mask[33];
+        char find[16];
+        char repl[16];
+        regex_t reg;
+        size_t replen;
+        char compile;
+} AFFIX;
+
+typedef struct Tree_struct {
+        int Left[256], Right[256];
+} Tree_struct;
+
+typedef struct {
+   int maffixes;
+   int naffixes;
+   AFFIX * Affix;
+
+   int nspell;
+   int mspell;
+   SPELL   *Spell;
+   Tree_struct SpellTree;
+   Tree_struct PrefixTree;
+   Tree_struct SuffixTree;
+
+} IspellDict;
+
+char ** NormalizeWord(IspellDict * Conf,char *word);
+int ImportAffixes(IspellDict * Conf, const char *filename);
+int ImportDictionary(IspellDict * Conf,const char *filename);
+
+int  AddSpell(IspellDict * Conf,const char * word,const char *flag);
+int  AddAffix(IspellDict * Conf,int flag,const char *mask,const char *find,const char *repl,int type);
+void SortDictionary(IspellDict * Conf);
+void SortAffixes(IspellDict * Conf);
+void FreeIspell (IspellDict *Conf);
+
+#endif
diff --git a/contrib/tsearch2/prs_dcfg.c b/contrib/tsearch2/prs_dcfg.c

new file mode 100644 (file)

index 0000000..e4b0e8b
--- /dev/null
+++ b/contrib/tsearch2/prs_dcfg.c
@@ -0,0 +1,119 @@
+/* 
+ * Simple config parser 
+ * Teodor Sigaev 
+ */
+#include 
+#include 
+#include 
+
+#include "postgres.h"
+
+#include "dict.h"
+#include "common.h"
+
+#define CS_WAITKEY 0
+#define CS_INKEY   1
+#define CS_WAITEQ  2
+#define CS_WAITVALUE   3
+#define CS_INVALUE 4
+#define CS_IN2VALUE    5
+#define CS_WAITDELIM   6
+#define CS_INESC   7
+#define CS_IN2ESC  8
+
+static char *
+nstrdup(char *ptr, int len) {
+   char *res=palloc(len+1), *cptr;
+   memcpy(res,ptr,len);
+   res[len]='\0';
+   cptr = ptr = res;
+   while(*ptr) {
+       if ( *ptr == '\\' ) 
+           ptr++;
+       *cptr=*ptr; ptr++; cptr++;
+   }
+   *cptr='\0';
+
+   return res;
+}
+
+void
+parse_cfgdict(text *in, Map **m) {
+   Map *mptr;
+   char *ptr=VARDATA(in), *begin=NULL;
+   char num=0;
+   int state=CS_WAITKEY;
+
+   while( ptr-VARDATA(in) < VARSIZE(in) - VARHDRSZ ) {
+       if ( *ptr==',' ) num++;
+       ptr++;
+   }
+
+   *m=mptr=(Map*)palloc( sizeof(Map)*(num+2) );
+   memset(mptr, 0, sizeof(Map)*(num+2) );
+   ptr=VARDATA(in);
+   while( ptr-VARDATA(in) < VARSIZE(in) - VARHDRSZ ) {
+       if (state==CS_WAITKEY) {
+           if (isalpha(*ptr)) {
+               begin=ptr;
+               state=CS_INKEY;
+           } else if ( !isspace(*ptr) )
+               elog(ERROR,"Syntax error in position %d near '%c'", ptr-VARDATA(in), *ptr);
+       } else if (state==CS_INKEY) {
+           if ( isspace(*ptr) ) {
+               mptr->key=nstrdup(begin, ptr-begin);
+               state=CS_WAITEQ;
+           } else if ( *ptr=='=' ) {
+               mptr->key=nstrdup(begin, ptr-begin);
+               state=CS_WAITVALUE;
+           } else if ( !isalpha(*ptr) ) 
+               elog(ERROR,"Syntax error in position %d near '%c'", ptr-VARDATA(in), *ptr);
+       } else if ( state==CS_WAITEQ ) {
+           if ( *ptr=='=' )
+               state=CS_WAITVALUE;
+           else if ( !isspace(*ptr) )
+               elog(ERROR,"Syntax error in position %d near '%c'", ptr-VARDATA(in), *ptr);
+       } else if ( state==CS_WAITVALUE ) {
+           if ( *ptr=='"' ) {
+               begin=ptr+1;
+               state=CS_INVALUE;
+           } else if ( !isspace(*ptr) ) {
+               begin=ptr;
+               state=CS_IN2VALUE;
+           }
+       } else if ( state==CS_INVALUE ) {
+           if ( *ptr=='"' ) {
+               mptr->value = nstrdup(begin, ptr-begin);
+               mptr++;
+               state=CS_WAITDELIM;
+           } else if ( *ptr=='\\' )
+               state=CS_INESC;
+       } else if ( state==CS_IN2VALUE ) {
+           if ( isspace(*ptr) || *ptr==',' ) {
+               mptr->value = nstrdup(begin, ptr-begin);
+               mptr++;
+               state=( *ptr==',' ) ? CS_WAITKEY : CS_WAITDELIM;
+           } else if ( *ptr=='\\' )
+               state=CS_INESC;
+       } else if ( state==CS_WAITDELIM ) {
+           if ( *ptr==',' ) 
+               state=CS_WAITKEY; 
+           else if ( !isspace(*ptr) )
+               elog(ERROR,"Syntax error in position %d near '%c'", ptr-VARDATA(in), *ptr);
+       } else if ( state == CS_INESC ) {
+           state=CS_INVALUE;
+       } else if ( state == CS_IN2ESC ) {
+           state=CS_IN2VALUE;
+       } else 
+           elog(ERROR,"Bad parser state: %d at position %d near '%c'", state, ptr-VARDATA(in), *ptr);
+       ptr++;
+   }
+
+   if (state==CS_IN2VALUE) {
+       mptr->value = nstrdup(begin, ptr-begin);
+       mptr++;
+   } else if ( !(state==CS_WAITDELIM || state==CS_WAITKEY) ) 
+       elog(ERROR,"Unexpected end of line");
+}
+
+
diff --git a/contrib/tsearch2/query.c b/contrib/tsearch2/query.c

new file mode 100644 (file)

index 0000000..8e714f2
--- /dev/null
+++ b/contrib/tsearch2/query.c
@@ -0,0 +1,862 @@
+/*
+ * IO definitions for tsquery and mtsquery. This type
+ * are identical, but for parsing mtsquery used parser for text
+ * and also morphology is used.
+ * Internal structure:
+ * query tree, then string with original value.
+ * Query tree with plain view. It's means that in array of nodes
+ * right child is always next and left position = item+item->left
+ * Teodor Sigaev 
+ */
+#include "postgres.h"
+
+#include 
+#include 
+
+#include "access/gist.h"
+#include "access/itup.h"
+#include "access/rtree.h"
+#include "utils/elog.h"
+#include "utils/palloc.h"
+#include "utils/array.h"
+#include "utils/builtins.h"
+#include "storage/bufpage.h"
+
+#include "ts_cfg.h"
+#include "tsvector.h"
+#include "crc32.h"
+#include "query.h"
+#include "rewrite.h"
+#include "common.h"
+
+
+PG_FUNCTION_INFO_V1(tsquery_in);
+Datum      tsquery_in(PG_FUNCTION_ARGS);
+
+PG_FUNCTION_INFO_V1(tsquery_out);
+Datum      tsquery_out(PG_FUNCTION_ARGS);
+
+PG_FUNCTION_INFO_V1(exectsq);
+Datum      exectsq(PG_FUNCTION_ARGS);
+
+PG_FUNCTION_INFO_V1(rexectsq);
+Datum      rexectsq(PG_FUNCTION_ARGS);
+
+PG_FUNCTION_INFO_V1(tsquerytree);
+Datum      tsquerytree(PG_FUNCTION_ARGS);
+
+PG_FUNCTION_INFO_V1(to_tsquery);
+Datum      to_tsquery(PG_FUNCTION_ARGS);
+
+PG_FUNCTION_INFO_V1(to_tsquery_name);
+Datum      to_tsquery_name(PG_FUNCTION_ARGS);
+
+PG_FUNCTION_INFO_V1(to_tsquery_current);
+Datum      to_tsquery_current(PG_FUNCTION_ARGS);
+
+#define END            0
+#define ERR            1
+#define VAL            2
+#define OPR            3
+#define OPEN       4
+#define CLOSE      5
+#define VALTRUE        6           /* for stop words */
+#define VALFALSE   7
+
+/* parser's states */
+#define WAITOPERAND 1
+#define WAITOPERATOR   2
+
+/*
+ * node of query tree, also used
+ * for storing polish notation in parser
+ */
+typedef struct NODE
+{
+   int2        weight;
+   int2        type;
+   int4        val;
+   int2        distance;
+   int2        length;
+   struct NODE *next;
+}  NODE;
+
+typedef struct
+{
+   char       *buf;
+   int4        state;
+   int4        count;
+   /* reverse polish notation in list (for temprorary usage) */
+   NODE       *str;
+   /* number in str */
+   int4        num;
+
+   /* user-friendly operand */
+   int4        lenop;
+   int4        sumlen;
+   char       *op;
+   char       *curop;
+
+   /* state for value's parser */
+   TI_IN_STATE valstate;
+
+   /* tscfg */
+   int cfg_id;
+}  QPRS_STATE;
+
+static char*
+get_weight(char *buf, int2 *weight) {
+   *weight = 0;
+
+   if ( *buf != ':' )
+       return buf;
+
+   buf++;
+   while( *buf ) {
+       switch(tolower(*buf)) {
+           case 'a': *weight |= 1<<3; break; 
+           case 'b': *weight |= 1<<2; break; 
+           case 'c': *weight |= 1<<1; break; 
+           case 'd': *weight |= 1;    break;
+           default: return buf; 
+       }
+       buf++;
+   }
+   
+   return buf;
+}
+
+/*
+ * get token from query string
+ */
+static int4
+gettoken_query(QPRS_STATE * state, int4 *val, int4 *lenval, char **strval, int2 *weight)
+{
+   while (1)
+   {
+       switch (state->state)
+       {
+           case WAITOPERAND:
+               if (*(state->buf) == '!')
+               {
+                   (state->buf)++;
+                   *val = (int4) '!';
+                   return OPR;
+               }
+               else if (*(state->buf) == '(')
+               {
+                   state->count++;
+                   (state->buf)++;
+                   return OPEN;
+               } else if ( *(state->buf) == ':' ) {
+                   elog(ERROR,"Error at start of operand"); 
+               } else if (*(state->buf) != ' ') {
+                   state->valstate.prsbuf = state->buf;
+                   state->state = WAITOPERATOR;
+                   if (gettoken_tsvector(&(state->valstate)))
+                   {
+                       *strval = state->valstate.word;
+                       *lenval = state->valstate.curpos - state->valstate.word;
+                       state->buf = get_weight(state->valstate.prsbuf, weight);
+                       return VAL;
+                   }
+                   else
+                       elog(ERROR, "No operand");
+               }
+               break;
+           case WAITOPERATOR:
+               if (*(state->buf) == '&' || *(state->buf) == '|')
+               {
+                   state->state = WAITOPERAND;
+                   *val = (int4) *(state->buf);
+                   (state->buf)++;
+                   return OPR;
+               }
+               else if (*(state->buf) == ')')
+               {
+                   (state->buf)++;
+                   state->count--;
+                   return (state->count < 0) ? ERR : CLOSE;
+               }
+               else if (*(state->buf) == '\0')
+                   return (state->count) ? ERR : END;
+               else if (*(state->buf) != ' ')
+                   return ERR;
+               break;
+           default:
+               return ERR;
+               break;
+       }
+       (state->buf)++;
+   }
+   return END;
+}
+
+/*
+ * push new one in polish notation reverse view
+ */
+static void
+pushquery(QPRS_STATE * state, int4 type, int4 val, int4 distance, int4 lenval, int2 weight)
+{
+   NODE       *tmp = (NODE *) palloc(sizeof(NODE));
+
+   tmp->weight = weight;
+   tmp->type = type;
+   tmp->val = val;
+   if (distance >= MAXSTRPOS)
+       elog(ERROR, "Value is too big");
+   if (lenval >= MAXSTRLEN)
+       elog(ERROR, "Operand is too long");
+   tmp->distance = distance;
+   tmp->length = lenval;
+   tmp->next = state->str;
+   state->str = tmp;
+   state->num++;
+}
+
+/*
+ * This function is used for tsquery parsing
+ */
+static void
+pushval_asis(QPRS_STATE * state, int type, char *strval, int lenval, int2 weight)
+{
+   if (lenval >= MAXSTRLEN)
+       elog(ERROR, "Word is too long");
+
+   pushquery(state, type, crc32_sz((uint8 *) strval, lenval),
+             state->curop - state->op, lenval, weight);
+
+   while (state->curop - state->op + lenval + 1 >= state->lenop)
+   {
+       int4        tmp = state->curop - state->op;
+
+       state->lenop *= 2;
+       state->op = (char *) repalloc((void *) state->op, state->lenop);
+       state->curop = state->op + tmp;
+   }
+   memcpy((void *) state->curop, (void *) strval, lenval);
+   state->curop += lenval;
+   *(state->curop) = '\0';
+   state->curop++;
+   state->sumlen += lenval + 1;
+   return;
+}
+
+/*
+ * This function is used for morph parsing
+ */
+static void
+pushval_morph(QPRS_STATE * state, int typeval, char *strval, int lenval, int2 weight)
+{
+   int4        count = 0;
+   PRSTEXT         prs;
+
+   prs.lenwords = 32;
+   prs.curwords = 0;
+   prs.pos = 0;
+   prs.words = (WORD *) palloc(sizeof(WORD) * prs.lenwords);
+
+   parsetext_v2(findcfg(state->cfg_id), &prs, strval, lenval);
+
+   for(count=0;count
+       pushval_asis(state, VAL, prs.words[count].word, prs.words[count].len, weight);
+       pfree( prs.words[count].word );
+       if (count)
+           pushquery(state, OPR, (int4) '&', 0, 0, 0 );
+   }   
+   pfree(prs.words);
+
+   /* XXX */
+   if ( prs.curwords==0 ) 
+       pushval_asis(state, VALTRUE, 0, 0, 0);
+}
+
+#define STACKDEPTH 32
+/*
+ * make polish notaion of query
+ */
+static int4
+makepol(QPRS_STATE * state, void (*pushval) (QPRS_STATE *, int, char *, int, int2))
+{
+   int4        val,
+               type;
+   int4        lenval;
+   char       *strval;
+   int4        stack[STACKDEPTH];
+   int4        lenstack = 0;
+   int2        weight;
+
+   while ((type = gettoken_query(state, &val, &lenval, &strval, &weight)) != END)
+   {
+       switch (type)
+       {
+           case VAL:
+               (*pushval) (state, VAL, strval, lenval, weight);
+               while (lenstack && (stack[lenstack - 1] == (int4) '&' ||
+                                   stack[lenstack - 1] == (int4) '!'))
+               {
+                   lenstack--;
+                   pushquery(state, OPR, stack[lenstack], 0, 0, 0);
+               }
+               break;
+           case OPR:
+               if (lenstack && val == (int4) '|')
+                   pushquery(state, OPR, val, 0, 0, 0);
+               else
+               {
+                   if (lenstack == STACKDEPTH)
+                       elog(ERROR, "Stack too short");
+                   stack[lenstack] = val;
+                   lenstack++;
+               }
+               break;
+           case OPEN:
+               if (makepol(state, pushval) == ERR)
+                   return ERR;
+               if (lenstack && (stack[lenstack - 1] == (int4) '&' ||
+                                stack[lenstack - 1] == (int4) '!'))
+               {
+                   lenstack--;
+                   pushquery(state, OPR, stack[lenstack], 0, 0, 0);
+               }
+               break;
+           case CLOSE:
+               while (lenstack)
+               {
+                   lenstack--;
+                   pushquery(state, OPR, stack[lenstack], 0, 0, 0);
+               };
+               return END;
+               break;
+           case ERR:
+           default:
+               elog(ERROR, "Syntax error");
+               return ERR;
+
+       }
+   }
+   while (lenstack)
+   {
+       lenstack--;
+       pushquery(state, OPR, stack[lenstack], 0, 0, 0);
+   };
+   return END;
+}
+
+typedef struct
+{
+   WordEntry  *arrb;
+   WordEntry  *arre;
+   char       *values;
+   char       *operand;
+}  CHKVAL;
+
+/*
+ * compare 2 string values
+ */
+static int4
+ValCompare(CHKVAL * chkval, WordEntry * ptr, ITEM * item)
+{
+   if (ptr->len == item->length)
+       return strncmp(
+                      &(chkval->values[ptr->pos]),
+                      &(chkval->operand[item->distance]),
+                      item->length);
+
+   return (ptr->len > item->length) ? 1 : -1;
+}
+
+/*
+ * check weight info
+ */
+static bool
+checkclass_str(CHKVAL * chkval, WordEntry * val, ITEM * item) {
+   WordEntryPos *ptr = (WordEntryPos*) (chkval->values+val->pos+SHORTALIGN(val->len)+sizeof(uint16));
+   uint16  len = *( (uint16*) (chkval->values+val->pos+SHORTALIGN(val->len)) );
+   while (len--) {
+       if ( item->weight & ( 1<weight ) )
+           return true;
+       ptr++;
+   }
+   return false; 
+}
+
+/*
+ * is there value 'val' in array or not ?
+ */
+static bool
+checkcondition_str(void *checkval, ITEM * val)
+{
+   WordEntry  *StopLow = ((CHKVAL *) checkval)->arrb;
+   WordEntry  *StopHigh = ((CHKVAL *) checkval)->arre;
+   WordEntry  *StopMiddle;
+   int         difference;
+
+   /* Loop invariant: StopLow <= val < StopHigh */
+
+   while (StopLow < StopHigh)
+   {
+       StopMiddle = StopLow + (StopHigh - StopLow) / 2;
+       difference = ValCompare((CHKVAL *) checkval, StopMiddle, val);
+       if (difference == 0)
+           return ( val->weight && StopMiddle->haspos ) ? 
+               checkclass_str((CHKVAL *) checkval,StopMiddle, val) : true;
+       else if (difference < 0)
+           StopLow = StopMiddle + 1;
+       else
+           StopHigh = StopMiddle;
+   }
+
+   return (false);
+}
+
+/*
+ * check for boolean condition
+ */
+bool
+TS_execute(ITEM * curitem, void *checkval, bool calcnot, bool (*chkcond) (void *checkval, ITEM * val))
+{
+   if (curitem->type == VAL)
+       return (*chkcond) (checkval, curitem);
+   else if (curitem->val == (int4) '!')
+   {
+       return (calcnot) ?
+           ((TS_execute(curitem + 1, checkval, calcnot, chkcond)) ? false : true)
+           : true;
+   }
+   else if (curitem->val == (int4) '&')
+   {
+       if (TS_execute(curitem + curitem->left, checkval, calcnot, chkcond))
+           return TS_execute(curitem + 1, checkval, calcnot, chkcond);
+       else
+           return false;
+   }
+   else
+   {                           /* |-operator */
+       if (TS_execute(curitem + curitem->left, checkval, calcnot, chkcond))
+           return true;
+       else
+           return TS_execute(curitem + 1, checkval, calcnot, chkcond);
+   }
+   return false;
+}
+
+/*
+ * boolean operations
+ */
+Datum
+rexectsq(PG_FUNCTION_ARGS)
+{
+   return DirectFunctionCall2(
+                              exectsq,
+                              PG_GETARG_DATUM(1),
+                              PG_GETARG_DATUM(0)
+       );
+}
+
+Datum
+exectsq(PG_FUNCTION_ARGS)
+{
+   tsvector       *val = (tsvector *) DatumGetPointer(PG_DETOAST_DATUM(PG_GETARG_DATUM(0)));
+   QUERYTYPE  *query = (QUERYTYPE *) DatumGetPointer(PG_DETOAST_DATUM(PG_GETARG_DATUM(1)));
+   CHKVAL      chkval;
+   bool        result;
+
+   if (!val->size || !query->size)
+   {
+       PG_FREE_IF_COPY(val, 0);
+       PG_FREE_IF_COPY(query, 1);
+       PG_RETURN_BOOL(false);
+   }
+
+   chkval.arrb = ARRPTR(val);
+   chkval.arre = chkval.arrb + val->size;
+   chkval.values = STRPTR(val);
+   chkval.operand = GETOPERAND(query);
+   result = TS_execute(
+                    GETQUERY(query),
+                    &chkval,
+                    true,
+                    checkcondition_str
+       );
+
+   PG_FREE_IF_COPY(val, 0);
+   PG_FREE_IF_COPY(query, 1);
+   PG_RETURN_BOOL(result);
+}
+
+/*
+ * find left operand in polish notation view
+ */
+static void
+findoprnd(ITEM * ptr, int4 *pos)
+{
+#ifdef BS_DEBUG
+   elog(DEBUG3, (ptr[*pos].type == OPR) ?
+        "%d  %c" : "%d  %d ", *pos, ptr[*pos].val);
+#endif
+   if (ptr[*pos].type == VAL || ptr[*pos].type == VALTRUE)
+   {
+       ptr[*pos].left = 0;
+       (*pos)++;
+   }
+   else if (ptr[*pos].val == (int4) '!')
+   {
+       ptr[*pos].left = 1;
+       (*pos)++;
+       findoprnd(ptr, pos);
+   }
+   else
+   {
+       ITEM       *curitem = &ptr[*pos];
+       int4        tmp = *pos;
+
+       (*pos)++;
+       findoprnd(ptr, pos);
+       curitem->left = *pos - tmp;
+       findoprnd(ptr, pos);
+   }
+}
+
+
+/*
+ * input
+ */
+static QUERYTYPE *
+queryin(char *buf, void (*pushval) (QPRS_STATE *, int, char *, int, int2), int cfg_id)
+{
+   QPRS_STATE  state;
+   int4        i;
+   QUERYTYPE  *query;
+   int4        commonlen;
+   ITEM       *ptr;
+   NODE       *tmp;
+   int4        pos = 0;
+
+#ifdef BS_DEBUG
+   char        pbuf[16384],
+              *cur;
+#endif
+
+   /* init state */
+   state.buf = buf;
+   state.state = WAITOPERAND;
+   state.count = 0;
+   state.num = 0;
+   state.str = NULL;
+   state.cfg_id=cfg_id;
+
+   /* init value parser's state */
+   state.valstate.oprisdelim = true;
+   state.valstate.len = 32;
+   state.valstate.word = (char *) palloc(state.valstate.len);
+
+   /* init list of operand */
+   state.sumlen = 0;
+   state.lenop = 64;
+   state.curop = state.op = (char *) palloc(state.lenop);
+   *(state.curop) = '\0';
+
+   /* parse query & make polish notation (postfix, but in reverse order) */
+   makepol(&state, pushval);
+   pfree(state.valstate.word);
+   if (!state.num)
+       elog(ERROR, "Empty query");
+
+   /* make finish struct */
+   commonlen = COMPUTESIZE(state.num, state.sumlen);
+   query = (QUERYTYPE *) palloc(commonlen);
+   query->len = commonlen;
+   query->size = state.num;
+   ptr = GETQUERY(query);
+
+   /* set item in polish notation */
+   for (i = 0; i < state.num; i++)
+   {
+       ptr[i].weight = state.str->weight;
+       ptr[i].type = state.str->type;
+       ptr[i].val = state.str->val;
+       ptr[i].distance = state.str->distance;
+       ptr[i].length = state.str->length;
+       tmp = state.str->next;
+       pfree(state.str);
+       state.str = tmp;
+   }
+
+   /* set user friendly-operand view */
+   memcpy((void *) GETOPERAND(query), (void *) state.op, state.sumlen);
+   pfree(state.op);
+
+   /* set left operand's position for every operator */
+   pos = 0;
+   findoprnd(ptr, &pos);
+
+#ifdef BS_DEBUG
+   cur = pbuf;
+   *cur = '\0';
+   for (i = 0; i < query->size; i++)
+   {
+       if (ptr[i].type == OPR)
+           sprintf(cur, "%c(%d) ", ptr[i].val, ptr[i].left);
+       else
+           sprintf(cur, "%d(%s) ", ptr[i].val, GETOPERAND(query) + ptr[i].distance);
+       cur = strchr(cur, '\0');
+   }
+   elog(DEBUG3, "POR: %s", pbuf);
+#endif
+
+   return query;
+}
+
+/*
+ * in without morphology
+ */
+Datum
+tsquery_in(PG_FUNCTION_ARGS)
+{
+   PG_RETURN_POINTER(queryin((char *) PG_GETARG_POINTER(0), pushval_asis, 0));
+}
+
+/*
+ * out function
+ */
+typedef struct
+{
+   ITEM       *curpol;
+   char       *buf;
+   char       *cur;
+   char       *op;
+   int4        buflen;
+}  INFIX;
+
+#define RESIZEBUF(inf,addsize) \
+while( ( inf->cur - inf->buf ) + addsize + 1 >= inf->buflen ) \
+{ \
+   int4 len = inf->cur - inf->buf; \
+   inf->buflen *= 2; \
+   inf->buf = (char*) repalloc( (void*)inf->buf, inf->buflen ); \
+   inf->cur = inf->buf + len; \
+}
+
+/*
+ * recursive walk on tree and print it in
+ * infix (human-readable) view
+ */
+static void
+infix(INFIX * in, bool first)
+{
+   if (in->curpol->type == VAL)
+   {
+       char       *op = in->op + in->curpol->distance;
+
+       RESIZEBUF(in, in->curpol->length * 2 + 2 + 5);
+       *(in->cur) = '\'';
+       in->cur++;
+       while (*op)
+       {
+           if (*op == '\'')
+           {
+               *(in->cur) = '\\';
+               in->cur++;
+           }
+           *(in->cur) = *op;
+           op++;
+           in->cur++;
+       }
+       *(in->cur) = '\'';
+       in->cur++;
+       if ( in->curpol->weight ) {
+           *(in->cur) = ':'; in->cur++;
+           if ( in->curpol->weight & (1<<3) ) { *(in->cur) = 'A'; in->cur++; }
+           if ( in->curpol->weight & (1<<2) ) { *(in->cur) = 'B'; in->cur++; }
+           if ( in->curpol->weight & (1<<1) ) { *(in->cur) = 'C'; in->cur++; }
+           if ( in->curpol->weight & 1 )      { *(in->cur) = 'D'; in->cur++; }
+       }
+       *(in->cur) = '\0';
+       in->curpol++;
+   }
+   else if (in->curpol->val == (int4) '!')
+   {
+       bool        isopr = false;
+
+       RESIZEBUF(in, 1);
+       *(in->cur) = '!';
+       in->cur++;
+       *(in->cur) = '\0';
+       in->curpol++;
+       if (in->curpol->type == OPR)
+       {
+           isopr = true;
+           RESIZEBUF(in, 2);
+           sprintf(in->cur, "( ");
+           in->cur = strchr(in->cur, '\0');
+       }
+       infix(in, isopr);
+       if (isopr)
+       {
+           RESIZEBUF(in, 2);
+           sprintf(in->cur, " )");
+           in->cur = strchr(in->cur, '\0');
+       }
+   }
+   else
+   {
+       int4        op = in->curpol->val;
+       INFIX       nrm;
+
+       in->curpol++;
+       if (op == (int4) '|' && !first)
+       {
+           RESIZEBUF(in, 2);
+           sprintf(in->cur, "( ");
+           in->cur = strchr(in->cur, '\0');
+       }
+
+       nrm.curpol = in->curpol;
+       nrm.op = in->op;
+       nrm.buflen = 16;
+       nrm.cur = nrm.buf = (char *) palloc(sizeof(char) * nrm.buflen);
+
+       /* get right operand */
+       infix(&nrm, false);
+
+       /* get & print left operand */
+       in->curpol = nrm.curpol;
+       infix(in, false);
+
+       /* print operator & right operand */
+       RESIZEBUF(in, 3 + (nrm.cur - nrm.buf));
+       sprintf(in->cur, " %c %s", op, nrm.buf);
+       in->cur = strchr(in->cur, '\0');
+       pfree(nrm.buf);
+
+       if (op == (int4) '|' && !first)
+       {
+           RESIZEBUF(in, 2);
+           sprintf(in->cur, " )");
+           in->cur = strchr(in->cur, '\0');
+       }
+   }
+}
+
+
+Datum
+tsquery_out(PG_FUNCTION_ARGS)
+{
+   QUERYTYPE  *query = (QUERYTYPE *) DatumGetPointer(PG_DETOAST_DATUM(PG_GETARG_DATUM(0)));
+   INFIX       nrm;
+
+   if (query->size == 0)
+   {
+       char       *b = palloc(1);
+
+       *b = '\0';
+       PG_RETURN_POINTER(b);
+   }
+   nrm.curpol = GETQUERY(query);
+   nrm.buflen = 32;
+   nrm.cur = nrm.buf = (char *) palloc(sizeof(char) * nrm.buflen);
+   *(nrm.cur) = '\0';
+   nrm.op = GETOPERAND(query);
+   infix(&nrm, true);
+
+   PG_FREE_IF_COPY(query, 0);
+   PG_RETURN_POINTER(nrm.buf);
+}
+
+/*
+ * debug function, used only for view query
+ * which will be executed in non-leaf pages in index
+ */
+Datum
+tsquerytree(PG_FUNCTION_ARGS)
+{
+   QUERYTYPE  *query = (QUERYTYPE *) DatumGetPointer(PG_DETOAST_DATUM(PG_GETARG_DATUM(0)));
+   INFIX       nrm;
+   text       *res;
+   ITEM       *q;
+   int4        len;
+
+
+   if (query->size == 0)
+   {
+       res = (text *) palloc(VARHDRSZ);
+       VARATT_SIZEP(res) = VARHDRSZ;
+       PG_RETURN_POINTER(res);
+   }
+
+   q = clean_NOT_v2(GETQUERY(query), &len);
+
+   if (!q)
+   {
+       res = (text *) palloc(1 + VARHDRSZ);
+       VARATT_SIZEP(res) = 1 + VARHDRSZ;
+       *((char *) VARDATA(res)) = 'T';
+   }
+   else
+   {
+       nrm.curpol = q;
+       nrm.buflen = 32;
+       nrm.cur = nrm.buf = (char *) palloc(sizeof(char) * nrm.buflen);
+       *(nrm.cur) = '\0';
+       nrm.op = GETOPERAND(query);
+       infix(&nrm, true);
+
+       res = (text *) palloc(nrm.cur - nrm.buf + VARHDRSZ);
+       VARATT_SIZEP(res) = nrm.cur - nrm.buf + VARHDRSZ;
+       strncpy(VARDATA(res), nrm.buf, nrm.cur - nrm.buf);
+       pfree(q);
+   }
+
+   PG_FREE_IF_COPY(query, 0);
+
+   PG_RETURN_POINTER(res);
+}
+
+Datum
+to_tsquery(PG_FUNCTION_ARGS) {
+   text    *in = PG_GETARG_TEXT_P(1);
+   char *str;
+   QUERYTYPE  *query;
+   ITEM       *res;
+   int4        len;
+
+   str=text2char(in);
+   PG_FREE_IF_COPY(in,1);
+
+   query = queryin(str, pushval_morph, PG_GETARG_INT32(0));
+   res = clean_fakeval_v2(GETQUERY(query), &len);
+   if (!res)
+   {
+       query->len = HDRSIZEQT;
+       query->size = 0;
+       PG_RETURN_POINTER(query);
+   }
+   memcpy((void *) GETQUERY(query), (void *) res, len * sizeof(ITEM));
+   pfree(res);
+   PG_RETURN_POINTER(query);
+}
+
+Datum
+to_tsquery_name(PG_FUNCTION_ARGS) {
+   text *name=PG_GETARG_TEXT_P(0);
+   Datum res= DirectFunctionCall2(
+       to_tsquery,
+       Int32GetDatum( name2id_cfg(name) ),
+       PG_GETARG_DATUM(1)
+   );
+   
+   PG_FREE_IF_COPY(name,1);
+   PG_RETURN_DATUM(res);
+}
+
+Datum
+to_tsquery_current(PG_FUNCTION_ARGS) {
+   PG_RETURN_DATUM( DirectFunctionCall2(
+       to_tsquery,
+       Int32GetDatum( get_currcfg() ),
+       PG_GETARG_DATUM(0)
+   ));
+}
+
+
+       pushval_asis(state, VAL, prs.words[count].word, prs.words[count].len, weight);
+       pfree( prs.words[count].word );
+       if (count)
+           pushquery(state, OPR, (int4) '&', 0, 0, 0 );
+   }   
+   pfree(prs.words);
+
+   /* XXX */
+   if ( prs.curwords==0 ) 
+       pushval_asis(state, VALTRUE, 0, 0, 0);
+}
+
+#define STACKDEPTH 32
+/*
+ * make polish notaion of query
+ */
+static int4
+makepol(QPRS_STATE * state, void (*pushval) (QPRS_STATE *, int, char *, int, int2))
+{
+   int4        val,
+               type;
+   int4        lenval;
+   char       *strval;
+   int4        stack[STACKDEPTH];
+   int4        lenstack = 0;
+   int2        weight;
+
+   while ((type = gettoken_query(state, &val, &lenval, &strval, &weight)) != END)
+   {
+       switch (type)
+       {
+           case VAL:
+               (*pushval) (state, VAL, strval, lenval, weight);
+               while (lenstack && (stack[lenstack - 1] == (int4) '&' ||
+                                   stack[lenstack - 1] == (int4) '!'))
+               {
+                   lenstack--;
+                   pushquery(state, OPR, stack[lenstack], 0, 0, 0);
+               }
+               break;
+           case OPR:
+               if (lenstack && val == (int4) '|')
+                   pushquery(state, OPR, val, 0, 0, 0);
+               else
+               {
+                   if (lenstack == STACKDEPTH)
+                       elog(ERROR, "Stack too short");
+                   stack[lenstack] = val;
+                   lenstack++;
+               }
+               break;
+           case OPEN:
+               if (makepol(state, pushval) == ERR)
+                   return ERR;
+               if (lenstack && (stack[lenstack - 1] == (int4) '&' ||
+                                stack[lenstack - 1] == (int4) '!'))
+               {
+                   lenstack--;
+                   pushquery(state, OPR, stack[lenstack], 0, 0, 0);
+               }
+               break;
+           case CLOSE:
+               while (lenstack)
+               {
+                   lenstack--;
+                   pushquery(state, OPR, stack[lenstack], 0, 0, 0);
+               };
+               return END;
+               break;
+           case ERR:
+           default:
+               elog(ERROR, "Syntax error");
+               return ERR;
+
+       }
+   }
+   while (lenstack)
+   {
+       lenstack--;
+       pushquery(state, OPR, stack[lenstack], 0, 0, 0);
+   };
+   return END;
+}
+
+typedef struct
+{
+   WordEntry  *arrb;
+   WordEntry  *arre;
+   char       *values;
+   char       *operand;
+}  CHKVAL;
+
+/*
+ * compare 2 string values
+ */
+static int4
+ValCompare(CHKVAL * chkval, WordEntry * ptr, ITEM * item)
+{
+   if (ptr->len == item->length)
+       return strncmp(
+                      &(chkval->values[ptr->pos]),
+                      &(chkval->operand[item->distance]),
+                      item->length);
+
+   return (ptr->len > item->length) ? 1 : -1;
+}
+
+/*
+ * check weight info
+ */
+static bool
+checkclass_str(CHKVAL * chkval, WordEntry * val, ITEM * item) {
+   WordEntryPos *ptr = (WordEntryPos*) (chkval->values+val->pos+SHORTALIGN(val->len)+sizeof(uint16));
+   uint16  len = *( (uint16*) (chkval->values+val->pos+SHORTALIGN(val->len)) );
+   while (len--) {
+       if ( item->weight & ( 1<weight ) )
+           return true;
+       ptr++;
+   }
+   return false; 
+}
+
+/*
+ * is there value 'val' in array or not ?
+ */
+static bool
+checkcondition_str(void *checkval, ITEM * val)
+{
+   WordEntry  *StopLow = ((CHKVAL *) checkval)->arrb;
+   WordEntry  *StopHigh = ((CHKVAL *) checkval)->arre;
+   WordEntry  *StopMiddle;
+   int         difference;
+
+   /* Loop invariant: StopLow <= val < StopHigh */
+
+   while (StopLow < StopHigh)
+   {
+       StopMiddle = StopLow + (StopHigh - StopLow) / 2;
+       difference = ValCompare((CHKVAL *) checkval, StopMiddle, val);
+       if (difference == 0)
+           return ( val->weight && StopMiddle->haspos ) ? 
+               checkclass_str((CHKVAL *) checkval,StopMiddle, val) : true;
+       else if (difference < 0)
+           StopLow = StopMiddle + 1;
+       else
+           StopHigh = StopMiddle;
+   }
+
+   return (false);
+}
+
+/*
+ * check for boolean condition
+ */
+bool
+TS_execute(ITEM * curitem, void *checkval, bool calcnot, bool (*chkcond) (void *checkval, ITEM * val))
+{
+   if (curitem->type == VAL)
+       return (*chkcond) (checkval, curitem);
+   else if (curitem->val == (int4) '!')
+   {
+       return (calcnot) ?
+           ((TS_execute(curitem + 1, checkval, calcnot, chkcond)) ? false : true)
+           : true;
+   }
+   else if (curitem->val == (int4) '&')
+   {
+       if (TS_execute(curitem + curitem->left, checkval, calcnot, chkcond))
+           return TS_execute(curitem + 1, checkval, calcnot, chkcond);
+       else
+           return false;
+   }
+   else
+   {                           /* |-operator */
+       if (TS_execute(curitem + curitem->left, checkval, calcnot, chkcond))
+           return true;
+       else
+           return TS_execute(curitem + 1, checkval, calcnot, chkcond);
+   }
+   return false;
+}
+
+/*
+ * boolean operations
+ */
+Datum
+rexectsq(PG_FUNCTION_ARGS)
+{
+   return DirectFunctionCall2(
+                              exectsq,
+                              PG_GETARG_DATUM(1),
+                              PG_GETARG_DATUM(0)
+       );
+}
+
+Datum
+exectsq(PG_FUNCTION_ARGS)
+{
+   tsvector       *val = (tsvector *) DatumGetPointer(PG_DETOAST_DATUM(PG_GETARG_DATUM(0)));
+   QUERYTYPE  *query = (QUERYTYPE *) DatumGetPointer(PG_DETOAST_DATUM(PG_GETARG_DATUM(1)));
+   CHKVAL      chkval;
+   bool        result;
+
+   if (!val->size || !query->size)
+   {
+       PG_FREE_IF_COPY(val, 0);
+       PG_FREE_IF_COPY(query, 1);
+       PG_RETURN_BOOL(false);
+   }
+
+   chkval.arrb = ARRPTR(val);
+   chkval.arre = chkval.arrb + val->size;
+   chkval.values = STRPTR(val);
+   chkval.operand = GETOPERAND(query);
+   result = TS_execute(
+                    GETQUERY(query),
+                    &chkval,
+                    true,
+                    checkcondition_str
+       );
+
+   PG_FREE_IF_COPY(val, 0);
+   PG_FREE_IF_COPY(query, 1);
+   PG_RETURN_BOOL(result);
+}
+
+/*
+ * find left operand in polish notation view
+ */
+static void
+findoprnd(ITEM * ptr, int4 *pos)
+{
+#ifdef BS_DEBUG
+   elog(DEBUG3, (ptr[*pos].type == OPR) ?
+        "%d  %c" : "%d  %d ", *pos, ptr[*pos].val);
+#endif
+   if (ptr[*pos].type == VAL || ptr[*pos].type == VALTRUE)
+   {
+       ptr[*pos].left = 0;
+       (*pos)++;
+   }
+   else if (ptr[*pos].val == (int4) '!')
+   {
+       ptr[*pos].left = 1;
+       (*pos)++;
+       findoprnd(ptr, pos);
+   }
+   else
+   {
+       ITEM       *curitem = &ptr[*pos];
+       int4        tmp = *pos;
+
+       (*pos)++;
+       findoprnd(ptr, pos);
+       curitem->left = *pos - tmp;
+       findoprnd(ptr, pos);
+   }
+}
+
+
+/*
+ * input
+ */
+static QUERYTYPE *
+queryin(char *buf, void (*pushval) (QPRS_STATE *, int, char *, int, int2), int cfg_id)
+{
+   QPRS_STATE  state;
+   int4        i;
+   QUERYTYPE  *query;
+   int4        commonlen;
+   ITEM       *ptr;
+   NODE       *tmp;
+   int4        pos = 0;
+
+#ifdef BS_DEBUG
+   char        pbuf[16384],
+              *cur;
+#endif
+
+   /* init state */
+   state.buf = buf;
+   state.state = WAITOPERAND;
+   state.count = 0;
+   state.num = 0;
+   state.str = NULL;
+   state.cfg_id=cfg_id;
+
+   /* init value parser's state */
+   state.valstate.oprisdelim = true;
+   state.valstate.len = 32;
+   state.valstate.word = (char *) palloc(state.valstate.len);
+
+   /* init list of operand */
+   state.sumlen = 0;
+   state.lenop = 64;
+   state.curop = state.op = (char *) palloc(state.lenop);
+   *(state.curop) = '\0';
+
+   /* parse query & make polish notation (postfix, but in reverse order) */
+   makepol(&state, pushval);
+   pfree(state.valstate.word);
+   if (!state.num)
+       elog(ERROR, "Empty query");
+
+   /* make finish struct */
+   commonlen = COMPUTESIZE(state.num, state.sumlen);
+   query = (QUERYTYPE *) palloc(commonlen);
+   query->len = commonlen;
+   query->size = state.num;
+   ptr = GETQUERY(query);
+
+   /* set item in polish notation */
+   for (i = 0; i < state.num; i++)
+   {
+       ptr[i].weight = state.str->weight;
+       ptr[i].type = state.str->type;
+       ptr[i].val = state.str->val;
+       ptr[i].distance = state.str->distance;
+       ptr[i].length = state.str->length;
+       tmp = state.str->next;
+       pfree(state.str);
+       state.str = tmp;
+   }
+
+   /* set user friendly-operand view */
+   memcpy((void *) GETOPERAND(query), (void *) state.op, state.sumlen);
+   pfree(state.op);
+
+   /* set left operand's position for every operator */
+   pos = 0;
+   findoprnd(ptr, &pos);
+
+#ifdef BS_DEBUG
+   cur = pbuf;
+   *cur = '\0';
+   for (i = 0; i < query->size; i++)
+   {
+       if (ptr[i].type == OPR)
+           sprintf(cur, "%c(%d) ", ptr[i].val, ptr[i].left);
+       else
+           sprintf(cur, "%d(%s) ", ptr[i].val, GETOPERAND(query) + ptr[i].distance);
+       cur = strchr(cur, '\0');
+   }
+   elog(DEBUG3, "POR: %s", pbuf);
+#endif
+
+   return query;
+}
+
+/*
+ * in without morphology
+ */
+Datum
+tsquery_in(PG_FUNCTION_ARGS)
+{
+   PG_RETURN_POINTER(queryin((char *) PG_GETARG_POINTER(0), pushval_asis, 0));
+}
+
+/*
+ * out function
+ */
+typedef struct
+{
+   ITEM       *curpol;
+   char       *buf;
+   char       *cur;
+   char       *op;
+   int4        buflen;
+}  INFIX;
+
+#define RESIZEBUF(inf,addsize) \
+while( ( inf->cur - inf->buf ) + addsize + 1 >= inf->buflen ) \
+{ \
+   int4 len = inf->cur - inf->buf; \
+   inf->buflen *= 2; \
+   inf->buf = (char*) repalloc( (void*)inf->buf, inf->buflen ); \
+   inf->cur = inf->buf + len; \
+}
+
+/*
+ * recursive walk on tree and print it in
+ * infix (human-readable) view
+ */
+static void
+infix(INFIX * in, bool first)
+{
+   if (in->curpol->type == VAL)
+   {
+       char       *op = in->op + in->curpol->distance;
+
+       RESIZEBUF(in, in->curpol->length * 2 + 2 + 5);
+       *(in->cur) = '\'';
+       in->cur++;
+       while (*op)
+       {
+           if (*op == '\'')
+           {
+               *(in->cur) = '\\';
+               in->cur++;
+           }
+           *(in->cur) = *op;
+           op++;
+           in->cur++;
+       }
+       *(in->cur) = '\'';
+       in->cur++;
+       if ( in->curpol->weight ) {
+           *(in->cur) = ':'; in->cur++;
+           if ( in->curpol->weight & (1<<3) ) { *(in->cur) = 'A'; in->cur++; }
+           if ( in->curpol->weight & (1<<2) ) { *(in->cur) = 'B'; in->cur++; }
+           if ( in->curpol->weight & (1<<1) ) { *(in->cur) = 'C'; in->cur++; }
+           if ( in->curpol->weight & 1 )      { *(in->cur) = 'D'; in->cur++; }
+       }
+       *(in->cur) = '\0';
+       in->curpol++;
+   }
+   else if (in->curpol->val == (int4) '!')
+   {
+       bool        isopr = false;
+
+       RESIZEBUF(in, 1);
+       *(in->cur) = '!';
+       in->cur++;
+       *(in->cur) = '\0';
+       in->curpol++;
+       if (in->curpol->type == OPR)
+       {
+           isopr = true;
+           RESIZEBUF(in, 2);
+           sprintf(in->cur, "( ");
+           in->cur = strchr(in->cur, '\0');
+       }
+       infix(in, isopr);
+       if (isopr)
+       {
+           RESIZEBUF(in, 2);
+           sprintf(in->cur, " )");
+           in->cur = strchr(in->cur, '\0');
+       }
+   }
+   else
+   {
+       int4        op = in->curpol->val;
+       INFIX       nrm;
+
+       in->curpol++;
+       if (op == (int4) '|' && !first)
+       {
+           RESIZEBUF(in, 2);
+           sprintf(in->cur, "( ");
+           in->cur = strchr(in->cur, '\0');
+       }
+
+       nrm.curpol = in->curpol;
+       nrm.op = in->op;
+       nrm.buflen = 16;
+       nrm.cur = nrm.buf = (char *) palloc(sizeof(char) * nrm.buflen);
+
+       /* get right operand */
+       infix(&nrm, false);
+
+       /* get & print left operand */
+       in->curpol = nrm.curpol;
+       infix(in, false);
+
+       /* print operator & right operand */
+       RESIZEBUF(in, 3 + (nrm.cur - nrm.buf));
+       sprintf(in->cur, " %c %s", op, nrm.buf);
+       in->cur = strchr(in->cur, '\0');
+       pfree(nrm.buf);
+
+       if (op == (int4) '|' && !first)
+       {
+           RESIZEBUF(in, 2);
+           sprintf(in->cur, " )");
+           in->cur = strchr(in->cur, '\0');
+       }
+   }
+}
+
+
+Datum
+tsquery_out(PG_FUNCTION_ARGS)
+{
+   QUERYTYPE  *query = (QUERYTYPE *) DatumGetPointer(PG_DETOAST_DATUM(PG_GETARG_DATUM(0)));
+   INFIX       nrm;
+
+   if (query->size == 0)
+   {
+       char       *b = palloc(1);
+
+       *b = '\0';
+       PG_RETURN_POINTER(b);
+   }
+   nrm.curpol = GETQUERY(query);
+   nrm.buflen = 32;
+   nrm.cur = nrm.buf = (char *) palloc(sizeof(char) * nrm.buflen);
+   *(nrm.cur) = '\0';
+   nrm.op = GETOPERAND(query);
+   infix(&nrm, true);
+
+   PG_FREE_IF_COPY(query, 0);
+   PG_RETURN_POINTER(nrm.buf);
+}
+
+/*
+ * debug function, used only for view query
+ * which will be executed in non-leaf pages in index
+ */
+Datum
+tsquerytree(PG_FUNCTION_ARGS)
+{
+   QUERYTYPE  *query = (QUERYTYPE *) DatumGetPointer(PG_DETOAST_DATUM(PG_GETARG_DATUM(0)));
+   INFIX       nrm;
+   text       *res;
+   ITEM       *q;
+   int4        len;
+
+
+   if (query->size == 0)
+   {
+       res = (text *) palloc(VARHDRSZ);
+       VARATT_SIZEP(res) = VARHDRSZ;
+       PG_RETURN_POINTER(res);
+   }
+
+   q = clean_NOT_v2(GETQUERY(query), &len);
+
+   if (!q)
+   {
+       res = (text *) palloc(1 + VARHDRSZ);
+       VARATT_SIZEP(res) = 1 + VARHDRSZ;
+       *((char *) VARDATA(res)) = 'T';
+   }
+   else
+   {
+       nrm.curpol = q;
+       nrm.buflen = 32;
+       nrm.cur = nrm.buf = (char *) palloc(sizeof(char) * nrm.buflen);
+       *(nrm.cur) = '\0';
+       nrm.op = GETOPERAND(query);
+       infix(&nrm, true);
+
+       res = (text *) palloc(nrm.cur - nrm.buf + VARHDRSZ);
+       VARATT_SIZEP(res) = nrm.cur - nrm.buf + VARHDRSZ;
+       strncpy(VARDATA(res), nrm.buf, nrm.cur - nrm.buf);
+       pfree(q);
+   }
+
+   PG_FREE_IF_COPY(query, 0);
+
+   PG_RETURN_POINTER(res);
+}
+
+Datum
+to_tsquery(PG_FUNCTION_ARGS) {
+   text    *in = PG_GETARG_TEXT_P(1);
+   char *str;
+   QUERYTYPE  *query;
+   ITEM       *res;
+   int4        len;
+
+   str=text2char(in);
+   PG_FREE_IF_COPY(in,1);
+
+   query = queryin(str, pushval_morph, PG_GETARG_INT32(0));
+   res = clean_fakeval_v2(GETQUERY(query), &len);
+   if (!res)
+   {
+       query->len = HDRSIZEQT;
+       query->size = 0;
+       PG_RETURN_POINTER(query);
+   }
+   memcpy((void *) GETQUERY(query), (void *) res, len * sizeof(ITEM));
+   pfree(res);
+   PG_RETURN_POINTER(query);
+}
+
+Datum
+to_tsquery_name(PG_FUNCTION_ARGS) {
+   text *name=PG_GETARG_TEXT_P(0);
+   Datum res= DirectFunctionCall2(
+       to_tsquery,
+       Int32GetDatum( name2id_cfg(name) ),
+       PG_GETARG_DATUM(1)
+   );
+   
+   PG_FREE_IF_COPY(name,1);
+   PG_RETURN_DATUM(res);
+}
+
+Datum
+to_tsquery_current(PG_FUNCTION_ARGS) {
+   PG_RETURN_DATUM( DirectFunctionCall2(
+       to_tsquery,
+       Int32GetDatum( get_currcfg() ),
+       PG_GETARG_DATUM(0)
+   ));
+}
+
+
diff --git a/contrib/tsearch2/query.h b/contrib/tsearch2/query.h

new file mode 100644 (file)

index 0000000..c0715a2
--- /dev/null
+++ b/contrib/tsearch2/query.h
@@ -0,0 +1,55 @@
+#ifndef __QUERY_H__
+#define __QUERY_H__
+/*
+#define BS_DEBUG
+*/
+
+
+/*
+ * item in polish notation with back link
+ * to left operand
+ */
+typedef struct ITEM
+{
+   int8        type;
+   int8        weight;
+   int2        left;
+   int4        val;
+   /* user-friendly value, must correlate with WordEntry */
+   uint32  
+       unused:1,
+       length:11,
+       distance:20;
+}  ITEM;
+
+/*
+ *Storage:
+ * (len)(size)(array of ITEM)(array of operand in user-friendly form)
+ */
+typedef struct
+{
+   int4        len;
+   int4        size;
+   char        data[1];
+}  QUERYTYPE;
+
+#define HDRSIZEQT  ( 2*sizeof(int4) )
+#define COMPUTESIZE(size,lenofoperand) ( HDRSIZEQT + size * sizeof(ITEM) + lenofoperand )
+#define GETQUERY(x)  (ITEM*)( (char*)(x)+HDRSIZEQT )
+#define GETOPERAND(x)  ( (char*)GETQUERY(x) + ((QUERYTYPE*)x)->size * sizeof(ITEM) )
+
+#define ISOPERATOR(x) ( (x)=='!' || (x)=='&' || (x)=='|' || (x)=='(' || (x)==')' )
+
+#define END                0
+#define ERR                1
+#define VAL                2
+#define OPR                3
+#define OPEN           4
+#define CLOSE          5
+#define VALTRUE            6       /* for stop words */
+#define VALFALSE       7
+
+bool TS_execute(ITEM * curitem, void *checkval,
+       bool calcnot, bool (*chkcond) (void *checkval, ITEM * val));
+
+#endif
diff --git a/contrib/tsearch2/rank.c b/contrib/tsearch2/rank.c

new file mode 100644 (file)

index 0000000..b73f400
--- /dev/null
+++ b/contrib/tsearch2/rank.c
@@ -0,0 +1,591 @@
+/*
+ * Relevation
+ * Teodor Sigaev 
+ */
+#include "postgres.h"
+#include 
+
+#include "access/gist.h"
+#include "access/itup.h"
+#include "utils/elog.h"
+#include "utils/palloc.h"
+#include "utils/builtins.h"
+#include "fmgr.h"
+#include "funcapi.h"
+#include "storage/bufpage.h"
+#include "executor/spi.h"
+#include "commands/trigger.h"
+#include "nodes/pg_list.h"
+#include "catalog/namespace.h"
+
+#include "utils/array.h"
+
+#include "tsvector.h"
+#include "query.h"
+#include "common.h"
+
+PG_FUNCTION_INFO_V1(rank);
+Datum      rank(PG_FUNCTION_ARGS);
+
+PG_FUNCTION_INFO_V1(rank_def);
+Datum      rank_def(PG_FUNCTION_ARGS);
+
+PG_FUNCTION_INFO_V1(rank_cd);
+Datum      rank_cd(PG_FUNCTION_ARGS);
+
+PG_FUNCTION_INFO_V1(rank_cd_def);
+Datum      rank_cd_def(PG_FUNCTION_ARGS);
+
+PG_FUNCTION_INFO_V1(get_covers);
+Datum      get_covers(PG_FUNCTION_ARGS);
+
+static float weights[]={0.1, 0.2, 0.4, 1.0};
+
+#define wpos(wep)  ( w[ ((WordEntryPos*)(wep))->weight ] )
+
+#define DEF_NORM_METHOD    0
+
+/*
+ * Returns a weight of a word collocation
+ */
+static float4 word_distance ( int4 w ) {
+   if ( w>100 )
+   return 1e-30;
+
+   return 1.0/(1.005+0.05*exp( ((float4)w)/1.5-2) );
+}
+
+static int
+cnt_length( tsvector *t ) {
+   WordEntry   *ptr=ARRPTR(t), *end=(WordEntry*)STRPTR(t);
+   int len = 0, clen;
+
+   while(ptr < end) {
+       if ( (clen=POSDATALEN(t, ptr)) == 0 )
+           len += 1;
+       else
+           len += clen;
+       ptr++;
+   }
+
+   return len;
+}
+
+static int4
+WordECompareITEM(char *eval, char *qval, WordEntry * ptr, ITEM * item) {
+        if (ptr->len == item->length)
+                return strncmp(
+                                           eval + ptr->pos,
+                                           qval + item->distance,
+                                           item->length);
+
+        return (ptr->len > item->length) ? 1 : -1;
+}
+
+static WordEntry*
+find_wordentry(tsvector *t, QUERYTYPE *q, ITEM *item) {
+        WordEntry  *StopLow = ARRPTR(t);
+        WordEntry  *StopHigh = (WordEntry*)STRPTR(t);
+        WordEntry  *StopMiddle;
+        int                     difference;
+
+        /* Loop invariant: StopLow <= item < StopHigh */
+
+        while (StopLow < StopHigh)
+        {
+                StopMiddle = StopLow + (StopHigh - StopLow) / 2;
+                difference = WordECompareITEM(STRPTR(t), GETOPERAND(q), StopMiddle, item);
+                if (difference == 0)
+                        return StopMiddle;
+                else if (difference < 0)
+                        StopLow = StopMiddle + 1;
+                else
+                        StopHigh = StopMiddle;
+        }
+
+        return NULL;
+}
+
+static WordEntryPos    POSNULL[]={
+   {0,0},
+   {0,MAXENTRYPOS-1}
+};
+
+static float
+calc_rank_and(float *w, tsvector *t, QUERYTYPE *q) {
+   uint16 **pos=(uint16**)palloc(sizeof(uint16*) * q->size);
+   int i,k,l,p;
+   WordEntry *entry;
+   WordEntryPos    *post,*ct;
+   int4    dimt,lenct,dist;
+   float res=-1.0;
+   ITEM    *item=GETQUERY(q);
+
+   memset(pos,0,sizeof(uint16**) * q->size);
+   *(uint16*)POSNULL = lengthof(POSNULL)-1;
+
+   for(i=0; isize; i++) {
+       
+       if ( item[i].type != VAL )
+           continue;
+
+       entry=find_wordentry(t,q,&(item[i]));
+       if ( !entry )
+           continue;
+
+       if ( entry->haspos )
+           pos[i] = (uint16*)_POSDATAPTR(t,entry);
+       else
+           pos[i] = (uint16*)POSNULL;
+
+
+       dimt = *(uint16*)(pos[i]);
+       post = (WordEntryPos*)(pos[i]+1);
+       for( k=0; k
+           if ( !pos[k] ) continue;
+           lenct = *(uint16*)(pos[k]);
+           ct = (WordEntryPos*)(pos[k]+1);
+           for(l=0; l
+               for(p=0; p
+                   dist = abs( post[l].pos - ct[p].pos );
+                   if ( dist || (dist==0 && (pos[i]==(uint16*)POSNULL || pos[k]==(uint16*)POSNULL) ) ) {
+                       float curw; 
+                       if ( !dist ) dist=MAXENTRYPOS;  
+                       curw= sqrt( wpos(&(post[l])) * wpos( &(ct[p]) ) * word_distance(dist) );
+                       res = ( res < 0 ) ? curw : 1.0 - ( 1.0 - res ) * ( 1.0 - curw );
+                   }
+               }
+           }
+       }
+   }
+   pfree(pos);
+   return res; 
+}
+
+static float
+calc_rank_or(float *w, tsvector *t, QUERYTYPE *q) {
+   WordEntry *entry;
+   WordEntryPos    *post;
+   int4    dimt,j,i;
+   float res=-1.0;
+   ITEM    *item=GETQUERY(q);
+
+   *(uint16*)POSNULL = lengthof(POSNULL)-1;
+
+   for(i=0; isize; i++) {
+       if ( item[i].type != VAL )
+           continue;
+
+       entry=find_wordentry(t,q,&(item[i]));
+       if ( !entry )
+           continue;
+
+       if ( entry->haspos ) {
+           dimt = POSDATALEN(t,entry);
+           post = POSDATAPTR(t,entry);
+       } else {
+           dimt = *(uint16*)POSNULL;
+           post = POSNULL+1;
+       }
+
+       for(j=0;j
+           if ( res < 0 )
+               res = wpos( &(post[j]) );
+           else
+               res = 1.0 - ( 1.0-res ) * ( 1.0-wpos( &(post[j]) ) );
+       }
+   }
+   return res;
+}
+
+static float
+calc_rank(float *w, tsvector *t, QUERYTYPE *q, int4 method) {
+   ITEM *item = GETQUERY(q);
+   float res=0.0;
+
+   if (!t->size || !q->size)
+       return 0.0;
+
+   res = ( item->type != VAL && item->val == (int4) '&' ) ?
+       calc_rank_and(w,t,q) : calc_rank_or(w,t,q);
+
+   if ( res < 0 )
+       res = 1e-20;
+
+        switch(method) {
+       case 0: break;
+       case 1: res /= log((float)cnt_length(t)); break;
+       case 2: res /= (float)cnt_length(t); break;
+       default:
+       elog(ERROR,"Unknown normalization method: %d",method);
+        }
+
+   return res;
+}
+
+Datum
+rank(PG_FUNCTION_ARGS) {
+   ArrayType  *win = (ArrayType *) PG_DETOAST_DATUM(PG_GETARG_DATUM(0));
+   tsvector       *txt = (tsvector *) PG_DETOAST_DATUM(PG_GETARG_DATUM(1));
+   QUERYTYPE  *query = (QUERYTYPE *) PG_DETOAST_DATUM(PG_GETARG_DATUM(2));
+   int method=DEF_NORM_METHOD;
+   float res=0.0;
+   float ws[ lengthof(weights) ];
+   int i;
+
+   if ( ARR_NDIM(win) != 1 ) 
+       elog(ERROR,"Array of weight is not one dimentional");
+   if ( ARRNELEMS(win) < lengthof(weights) )
+        elog(ERROR,"Array of weight is too short");
+
+   for(i=0;i
+       ws[ i ] = ( ((float4*)ARR_DATA_PTR(win))[i] >= 0 ) ? ((float4*)ARR_DATA_PTR(win))[i] : weights[i];
+       if ( ws[ i ] > 1.0 ) 
+           elog(ERROR,"Weight out of range");
+   } 
+
+   if ( PG_NARGS() == 4 )
+       method=PG_GETARG_INT32(3);
+
+   res=calc_rank(ws, txt, query, method); 
+       
+   PG_FREE_IF_COPY(win, 0);
+   PG_FREE_IF_COPY(txt, 1);
+   PG_FREE_IF_COPY(query, 2);
+   PG_RETURN_FLOAT4(res);
+}
+
+Datum
+rank_def(PG_FUNCTION_ARGS) {
+   tsvector       *txt = (tsvector *) PG_DETOAST_DATUM(PG_GETARG_DATUM(0));
+   QUERYTYPE  *query = (QUERYTYPE *) PG_DETOAST_DATUM(PG_GETARG_DATUM(1));
+   float res=0.0;
+   int method=DEF_NORM_METHOD;
+
+   if ( PG_NARGS() == 3 )
+       method=PG_GETARG_INT32(2);
+
+   res=calc_rank(weights, txt, query, method); 
+       
+   PG_FREE_IF_COPY(txt, 0);
+   PG_FREE_IF_COPY(query, 1);
+   PG_RETURN_FLOAT4(res);
+}
+
+
+typedef struct {
+   ITEM    *item;
+   int32   pos;
+} DocRepresentation;
+
+static int
+compareDocR(const void *a, const void *b) {
+   if ( ((DocRepresentation *) a)->pos == ((DocRepresentation *) b)->pos )
+       return 1;
+   return ( ((DocRepresentation *) a)->pos > ((DocRepresentation *) b)->pos ) ? 1 : -1;
+}
+
+
+typedef struct {
+   DocRepresentation *doc;
+   int len;
+}  ChkDocR;
+
+static bool
+checkcondition_DR(void *checkval, ITEM *val) {
+   DocRepresentation *ptr = ((ChkDocR*)checkval)->doc;
+
+   while( ptr - ((ChkDocR*)checkval)->doc < ((ChkDocR*)checkval)->len ) {
+       if ( val == ptr->item )
+           return true;
+       ptr++;
+   }   
+
+   return false;
+}
+
+
+static bool
+Cover(DocRepresentation *doc, int len, QUERYTYPE *query, int *pos, int *p, int *q) {
+   int i;
+   DocRepresentation   *ptr,*f=(DocRepresentation*)0xffffffff;
+   ITEM    *item=GETQUERY(query);
+   int lastpos=*pos;
+   int oldq=*q;
+
+   *p=0x7fffffff;
+   *q=0;
+
+   for(i=0; isize; i++) {
+       if ( item->type != VAL ) {
+           item++;
+           continue;
+       }
+       ptr = doc + *pos;
+
+       while(ptr-doc
+           if ( ptr->item == item ) {
+               if ( ptr->pos > *q ) {
+                   *q = ptr->pos;
+                   lastpos= ptr - doc;
+               } 
+               break;
+           } 
+           ptr++;
+       }
+
+       item++;
+   }
+
+   if (*q==0 )
+       return false;
+
+   if (*q==oldq) { /* already check this pos */
+       (*pos)++;
+       return Cover(doc, len, query, pos,p,q);
+   } 
+
+   item=GETQUERY(query);
+   for(i=0; isize; i++) {
+       if ( item->type != VAL ) {
+           item++;
+           continue;
+       }
+       ptr = doc + lastpos;
+
+       while(ptr>=doc+*pos) {
+           if ( ptr->item == item ) {
+               if ( ptr->pos < *p ) {
+                   *p = ptr->pos;
+                   f=ptr;
+               }
+               break;
+           }
+           ptr--;
+       }
+       item++;
+   }
+ 
+   if ( *p<=*q ) {
+       ChkDocR ch = { f, (doc + lastpos)-f+1 };
+       *pos = f-doc+1;
+       if ( TS_execute(GETQUERY(query), &ch, false, checkcondition_DR) ) { 
+ /*elog(NOTICE,"OP:%d NP:%d P:%d Q:%d", *pos, lastpos, *p, *q);*/ 
+           return true;
+       } else
+           return Cover(doc, len, query, pos,p,q); 
+   }
+ 
+   return false;
+}
+
+static DocRepresentation*
+get_docrep(tsvector     *txt, QUERYTYPE  *query, int *doclen) {
+   ITEM    *item=GETQUERY(query);
+   WordEntry *entry;
+   WordEntryPos    *post;
+   int4    dimt,j,i;
+   int len=query->size*4,cur=0;
+   DocRepresentation *doc;
+
+   *(uint16*)POSNULL = lengthof(POSNULL)-1;
+   doc = (DocRepresentation*)palloc(sizeof(DocRepresentation)*len);
+   for(i=0; isize; i++) {
+       if ( item[i].type != VAL )
+           continue;
+
+       entry=find_wordentry(txt,query,&(item[i]));
+       if ( !entry )
+           continue;
+
+       if ( entry->haspos ) {
+           dimt = POSDATALEN(txt,entry);
+           post = POSDATAPTR(txt,entry);
+       } else {
+           dimt = *(uint16*)POSNULL;
+           post = POSNULL+1;
+       }
+
+       while( cur+dimt >= len ) {
+           len*=2;
+           doc = (DocRepresentation*)repalloc(doc,sizeof(DocRepresentation)*len);
+       }
+
+       for(j=0;j
+           doc[cur].item=&(item[i]);
+           doc[cur].pos=post[j].pos;
+           cur++;
+       }
+   }
+
+   *doclen=cur;
+   
+   if ( cur>0 ) {
+       if ( cur>1 ) 
+           qsort((void *) doc, cur, sizeof(DocRepresentation), compareDocR);
+       return doc;
+   }
+   
+   pfree(doc);
+   return NULL;
+}
+
+
+Datum
+rank_cd(PG_FUNCTION_ARGS) {
+   int K = PG_GETARG_INT32(0);
+   tsvector       *txt = (tsvector *) PG_DETOAST_DATUM(PG_GETARG_DATUM(1));
+   QUERYTYPE  *query = (QUERYTYPE *) PG_DETOAST_DATUM(PG_GETARG_DATUM(2));
+   int method=DEF_NORM_METHOD;
+   DocRepresentation   *doc;
+   float   res=0.0;
+   int p=0,q=0,len,cur;
+
+   doc = get_docrep(txt, query, &len);
+   if ( !doc ) {
+       PG_FREE_IF_COPY(txt, 1);
+       PG_FREE_IF_COPY(query, 2);
+       PG_RETURN_FLOAT4(0.0);
+   }
+
+   cur=0;
+   if (K<=0)
+       K=4;    
+   while( Cover(doc, len, query, &cur, &p, &q) ) 
+       res += ( q-p+1 > K ) ? ((float)K)/((float)(q-p+1)) : 1.0;
+
+   if ( PG_NARGS() == 4 )
+       method=PG_GETARG_INT32(3);
+
+        switch(method) {
+       case 0: break;
+       case 1: res /= log((float)cnt_length(txt)); break;
+       case 2: res /= (float)cnt_length(txt); break;
+       default:
+       elog(ERROR,"Unknown normalization method: %d",method);
+        }
+
+   pfree(doc);
+   PG_FREE_IF_COPY(txt, 1);
+   PG_FREE_IF_COPY(query, 2);
+
+   PG_RETURN_FLOAT4(res);
+}
+
+
+Datum
+rank_cd_def(PG_FUNCTION_ARGS) {
+   PG_RETURN_DATUM( DirectFunctionCall4(   
+       rank_cd,
+       Int32GetDatum(-1),
+       PG_GETARG_DATUM(0),
+       PG_GETARG_DATUM(1),
+       ( PG_NARGS() == 3 ) ? PG_GETARG_DATUM(2) : Int32GetDatum(DEF_NORM_METHOD)
+   )); 
+}
+
+/**************debug*************/
+
+typedef struct {
+   char    *w;
+   int2    len;
+   int2    pos;
+   int2    start;
+   int2    finish;
+} DocWord;
+
+static int
+compareDocWord(const void *a, const void *b) {
+   if ( ((DocWord *) a)->pos == ((DocWord *) b)->pos )
+       return 1;
+   return ( ((DocWord *) a)->pos > ((DocWord *) b)->pos ) ? 1 : -1;
+}
+
+
+Datum 
+get_covers(PG_FUNCTION_ARGS) {
+   tsvector     *txt = (tsvector *) PG_DETOAST_DATUM(PG_GETARG_DATUM(0));
+   QUERYTYPE  *query = (QUERYTYPE *) PG_DETOAST_DATUM(PG_GETARG_DATUM(1));
+   WordEntry       *pptr=ARRPTR(txt);
+   int i,dlen=0,j,cur=0,len=0,rlen;
+   DocWord *dw,*dwptr;
+   text    *out;
+   char *cptr;
+   DocRepresentation *doc;
+   int pos=0,p,q,olddwpos=0;
+   int ncover=1;
+
+   doc = get_docrep(txt, query, &rlen);
+
+   if ( !doc ) {
+       out=palloc(VARHDRSZ);
+       VARATT_SIZEP(out) = VARHDRSZ;
+       PG_FREE_IF_COPY(txt,0);
+       PG_FREE_IF_COPY(query,1);
+       PG_RETURN_POINTER(out);
+   }
+
+   for(i=0;isize;i++) {
+       if (!pptr[i].haspos)
+           elog(ERROR,"No pos info");
+        dlen += POSDATALEN(txt,&(pptr[i]));
+   }
+
+   dwptr=dw=palloc(sizeof(DocWord)*dlen);
+   memset(dw,0,sizeof(DocWord)*dlen);
+
+   for(i=0;isize;i++) {
+       WordEntryPos    *posdata = POSDATAPTR(txt,&(pptr[i]));
+       for(j=0;j
+           dw[cur].w=STRPTR(txt)+pptr[i].pos;  
+           dw[cur].len=pptr[i].len;    
+           dw[cur].pos=posdata[j].pos;
+           cur++;
+       }
+       len+=(pptr[i].len + 1) * (int)POSDATALEN(txt,&(pptr[i]));
+   }
+   qsort((void *) dw, dlen, sizeof(DocWord), compareDocWord);
+
+   while( Cover(doc, rlen, query, &pos, &p, &q) ) {
+       dwptr=dw+olddwpos;
+       while(dwptr->pos < p && dwptr-dw
+           dwptr++;
+       olddwpos=dwptr-dw;
+       dwptr->start=ncover;
+       while(dwptr->pos < q+1 && dwptr-dw
+           dwptr++;
+       (dwptr-1)->finish=ncover;
+       len+= 4 /* {}+two spaces */ + 2*16 /*numbers*/;
+       ncover++; 
+   } 
+   
+   out=palloc(VARHDRSZ+len);
+   cptr=((char*)out)+VARHDRSZ;
+   dwptr=dw;
+
+   while( dwptr-dw < dlen) {
+       if ( dwptr->start ) {
+           sprintf(cptr,"{%d ",dwptr->start);
+           cptr=strchr(cptr,'\0');
+       }
+       memcpy(cptr,dwptr->w,dwptr->len);
+       cptr+=dwptr->len;
+       *cptr=' ';
+       cptr++;
+       if ( dwptr->finish ) { 
+           sprintf(cptr,"}%d ",dwptr->finish);
+           cptr=strchr(cptr,'\0');
+       }
+       dwptr++;
+   }   
+
+   VARATT_SIZEP(out) = cptr - ((char*)out);
+   
+   pfree(dw);
+   pfree(doc);
+
+   PG_FREE_IF_COPY(txt,0);
+   PG_FREE_IF_COPY(query,1);
+   PG_RETURN_POINTER(out);
+}
+


diff --git a/contrib/tsearch2/rewrite.c b/contrib/tsearch2/rewrite.c

new file mode 100644 (file)

index 0000000..d5bc0f6


--- /dev/null
+++ b/contrib/tsearch2/rewrite.c
@@ -0,0 +1,292 @@
+/*
+ * Rewrite routines of query tree
+ * Teodor Sigaev 
+ */
+
+#include "postgres.h"
+
+#include 
+
+#include "access/gist.h"
+#include "access/itup.h"
+#include "access/rtree.h"
+#include "utils/elog.h"
+#include "utils/palloc.h"
+#include "utils/array.h"
+#include "utils/builtins.h"
+#include "storage/bufpage.h"
+
+#include "query.h"
+#include "rewrite.h"
+
+typedef struct NODE
+{
+   struct NODE *left;
+   struct NODE *right;
+   ITEM       *valnode;
+}  NODE;
+
+/*
+ * make query tree from plain view of query
+ */
+static NODE *
+maketree(ITEM * in)
+{
+   NODE       *node = (NODE *) palloc(sizeof(NODE));
+
+   node->valnode = in;
+   node->right = node->left = NULL;
+   if (in->type == OPR)
+   {
+       node->right = maketree(in + 1);
+       if (in->val != (int4) '!')
+           node->left = maketree(in + in->left);
+   }
+   return node;
+}
+
+typedef struct
+{
+   ITEM       *ptr;
+   int4        len;
+   int4        cur;
+}  PLAINTREE;
+
+static void
+plainnode(PLAINTREE * state, NODE * node)
+{
+   if (state->cur == state->len)
+   {
+       state->len *= 2;
+       state->ptr = (ITEM *) repalloc((void *) state->ptr, state->len * sizeof(ITEM));
+   }
+   memcpy((void *) &(state->ptr[state->cur]), (void *) node->valnode, sizeof(ITEM));
+   if (node->valnode->type == VAL)
+       state->cur++;
+   else if (node->valnode->val == (int4) '!')
+   {
+       state->ptr[state->cur].left = 1;
+       state->cur++;
+       plainnode(state, node->right);
+   }
+   else
+   {
+       int4        cur = state->cur;
+
+       state->cur++;
+       plainnode(state, node->right);
+       state->ptr[cur].left = state->cur - cur;
+       plainnode(state, node->left);
+   }
+   pfree(node);
+}
+
+/*
+ * make plain view of tree from 'normal' view of tree
+ */
+static ITEM *
+plaintree(NODE * root, int4 *len)
+{
+   PLAINTREE   pl;
+
+   pl.cur = 0;
+   pl.len = 16;
+   if (root && (root->valnode->type == VAL || root->valnode->type == OPR))
+   {
+       pl.ptr = (ITEM *) palloc(pl.len * sizeof(ITEM));
+       plainnode(&pl, root);
+   }
+   else
+       pl.ptr = NULL;
+   *len = pl.cur;
+   return pl.ptr;
+}
+
+static void
+freetree(NODE * node)
+{
+   if (!node)
+       return;
+   if (node->left)
+       freetree(node->left);
+   if (node->right)
+       freetree(node->right);
+   pfree(node);
+}
+
+/*
+ * clean tree for ! operator.
+ * It's usefull for debug, but in
+ * other case, such view is used with search in index.
+ * Operator ! always return TRUE
+ */
+static NODE *
+clean_NOT_intree(NODE * node)
+{
+   if (node->valnode->type == VAL)
+       return node;
+
+   if (node->valnode->val == (int4) '!')
+   {
+       freetree(node);
+       return NULL;
+   }
+
+   /* operator & or | */
+   if (node->valnode->val == (int4) '|')
+   {
+       if ((node->left = clean_NOT_intree(node->left)) == NULL ||
+           (node->right = clean_NOT_intree(node->right)) == NULL)
+       {
+           freetree(node);
+           return NULL;
+       }
+   }
+   else
+   {
+       NODE       *res = node;
+
+       node->left = clean_NOT_intree(node->left);
+       node->right = clean_NOT_intree(node->right);
+       if (node->left == NULL && node->right == NULL)
+       {
+           pfree(node);
+           res = NULL;
+       }
+       else if (node->left == NULL)
+       {
+           res = node->right;
+           pfree(node);
+       }
+       else if (node->right == NULL)
+       {
+           res = node->left;
+           pfree(node);
+       }
+       return res;
+   }
+   return node;
+}
+
+ITEM *
+clean_NOT_v2(ITEM * ptr, int4 *len)
+{
+   NODE       *root = maketree(ptr);
+
+   return plaintree(clean_NOT_intree(root), len);
+}
+
+#define V_UNKNOWN  0
+#define V_TRUE     1
+#define V_FALSE        2
+
+/*
+ * Clean query tree from values which is always in
+ * text (stopword)
+ */
+static NODE *
+clean_fakeval_intree(NODE * node, char *result)
+{
+   char        lresult = V_UNKNOWN,
+               rresult = V_UNKNOWN;
+
+   if (node->valnode->type == VAL)
+       return node;
+   else if (node->valnode->type == VALTRUE)
+   {
+       pfree(node);
+       *result = V_TRUE;
+       return NULL;
+   }
+
+
+   if (node->valnode->val == (int4) '!')
+   {
+       node->right = clean_fakeval_intree(node->right, &rresult);
+       if (!node->right)
+       {
+           *result = (rresult == V_TRUE) ? V_FALSE : V_TRUE;
+           freetree(node);
+           return NULL;
+       }
+   }
+   else if (node->valnode->val == (int4) '|')
+   {
+       NODE       *res = node;
+
+       node->left = clean_fakeval_intree(node->left, &lresult);
+       node->right = clean_fakeval_intree(node->right, &rresult);
+       if (lresult == V_TRUE || rresult == V_TRUE)
+       {
+           freetree(node);
+           *result = V_TRUE;
+           return NULL;
+       }
+       else if (lresult == V_FALSE && rresult == V_FALSE)
+       {
+           freetree(node);
+           *result = V_FALSE;
+           return NULL;
+       }
+       else if (lresult == V_FALSE)
+       {
+           res = node->right;
+           pfree(node);
+       }
+       else if (rresult == V_FALSE)
+       {
+           res = node->left;
+           pfree(node);
+       }
+       return res;
+   }
+   else
+   {
+       NODE       *res = node;
+
+       node->left = clean_fakeval_intree(node->left, &lresult);
+       node->right = clean_fakeval_intree(node->right, &rresult);
+       if (lresult == V_FALSE || rresult == V_FALSE)
+       {
+           freetree(node);
+           *result = V_FALSE;
+           return NULL;
+       }
+       else if (lresult == V_TRUE && rresult == V_TRUE)
+       {
+           freetree(node);
+           *result = V_TRUE;
+           return NULL;
+       }
+       else if (lresult == V_TRUE)
+       {
+           res = node->right;
+           pfree(node);
+       }
+       else if (rresult == V_TRUE)
+       {
+           res = node->left;
+           pfree(node);
+       }
+       return res;
+   }
+   return node;
+}
+
+ITEM *
+clean_fakeval_v2(ITEM * ptr, int4 *len)
+{
+   NODE       *root = maketree(ptr);
+   char        result = V_UNKNOWN;
+   NODE       *resroot;
+
+   resroot = clean_fakeval_intree(root, &result);
+   if (result != V_UNKNOWN)
+   {
+       elog(NOTICE, "Query contains only stopword(s) or doesn't contain lexem(s), ignored");
+       *len = 0;
+       return NULL;
+   }
+
+   return plaintree(resroot, len);
+}


diff --git a/contrib/tsearch2/rewrite.h b/contrib/tsearch2/rewrite.h

new file mode 100644 (file)

index 0000000..d47788a


--- /dev/null
+++ b/contrib/tsearch2/rewrite.h
@@ -0,0 +1,7 @@
+#ifndef __REWRITE_H__
+#define __REWRITE_H__
+
+ITEM      *clean_NOT_v2(ITEM * ptr, int4 *len);
+ITEM      *clean_fakeval_v2(ITEM * ptr, int4 *len);
+
+#endif


diff --git a/contrib/tsearch2/snmap.c b/contrib/tsearch2/snmap.c

new file mode 100644 (file)

index 0000000..fe138ad


--- /dev/null
+++ b/contrib/tsearch2/snmap.c
@@ -0,0 +1,75 @@
+/* 
+ * simple but fast map from str to Oid
+ * Teodor Sigaev 
+ */
+#include 
+#include 
+#include 
+
+#include "postgres.h"
+#include "snmap.h"
+#include "common.h"
+
+static int
+compareSNMapEntry(const void *a, const void *b) {
+   return strcmp( ((SNMapEntry*)a)->key, ((SNMapEntry*)b)->key );
+}
+
+void 
+addSNMap( SNMap *map, char *key, Oid value ) {
+   if (map->len>=map->reallen) {
+       SNMapEntry *tmp;
+       int len = (map->reallen) ? 2*map->reallen : 16;
+       tmp=(SNMapEntry*)realloc(map->list, sizeof(SNMapEntry) * len);
+       if ( !tmp )
+           elog(ERROR, "No memory");
+       map->reallen=len;
+       map->list=tmp;
+   }
+   map->list[ map->len ].key = strdup(key);
+   if ( ! map->list[ map->len ].key )
+       elog(ERROR, "No memory");
+   map->list[ map->len ].value=value;
+   map->len++;
+   if ( map->len>1 ) qsort(map->list, map->len, sizeof(SNMapEntry), compareSNMapEntry);
+}
+
+void 
+addSNMap_t( SNMap *map, text *key, Oid value ) {
+   char *k=text2char( key );
+   addSNMap(map, k, value);
+   pfree(k);
+}
+
+Oid 
+findSNMap( SNMap *map, char *key ) {
+   SNMapEntry *ptr;
+   SNMapEntry ks = {key, 0};
+   if ( map->len==0 || !map->list )
+       return 0;   
+   ptr = (SNMapEntry*) bsearch(&ks, map->list, map->len, sizeof(SNMapEntry), compareSNMapEntry);
+   return (ptr) ? ptr->value : 0;
+}
+
+Oid  
+findSNMap_t( SNMap *map, text *key ) {
+   char *k=text2char(key);
+   int res;
+   res= findSNMap(map, k);
+   pfree(k);
+   return res;
+}
+
+void freeSNMap( SNMap *map ) {
+   SNMapEntry *entry=map->list;
+   if ( map->list ) {
+       while( map->len ) {
+           if ( entry->key ) free(entry->key);
+           entry++; map->len--;
+       }
+       free( map->list );
+   }
+   memset(map,0,sizeof(SNMap));
+}
+
+


diff --git a/contrib/tsearch2/snmap.h b/contrib/tsearch2/snmap.h

new file mode 100644 (file)

index 0000000..b485601


--- /dev/null
+++ b/contrib/tsearch2/snmap.h
@@ -0,0 +1,23 @@
+#ifndef __SNMAP_H__
+#define __SNMAP_H__
+
+#include "postgres.h"
+
+typedef struct {
+   char    *key;
+   Oid value;
+} SNMapEntry;
+
+typedef struct {
+   int len;
+   int reallen;
+   SNMapEntry  *list;
+} SNMap;
+
+void addSNMap( SNMap *map, char *key, Oid value );
+void addSNMap_t( SNMap *map, text *key, Oid value );
+Oid findSNMap( SNMap *map, char *key );
+Oid findSNMap_t( SNMap *map, text *key );
+void freeSNMap( SNMap *map );
+
+#endif


diff --git a/contrib/tsearch2/snowball/api.c b/contrib/tsearch2/snowball/api.c

new file mode 100644 (file)

index 0000000..c9019ce


--- /dev/null
+++ b/contrib/tsearch2/snowball/api.c
@@ -0,0 +1,48 @@
+
+#include "header.h"
+
+extern struct SN_env * SN_create_env(int S_size, int I_size, int B_size)
+{   struct SN_env * z = (struct SN_env *) calloc(1, sizeof(struct SN_env));
+    z->p = create_s();
+    if (S_size)
+    {   z->S = (symbol * *) calloc(S_size, sizeof(symbol *));
+        {   int i;
+            for (i = 0; i < S_size; i++) z->S[i] = create_s();
+        }
+        z->S_size = S_size;
+    }
+
+    if (I_size)
+    {   z->I = (int *) calloc(I_size, sizeof(int));
+        z->I_size = I_size;
+    }
+
+    if (B_size)
+    {   z->B = (symbol *) calloc(B_size, sizeof(symbol));
+        z->B_size = B_size;
+    }
+
+    return z;
+}
+
+extern void SN_close_env(struct SN_env * z)
+{
+    if (z->S_size)
+    {
+        {   int i;
+            for (i = 0; i < z->S_size; i++) lose_s(z->S[i]);
+        }
+        free(z->S);
+    }
+    if (z->I_size) free(z->I);
+    if (z->B_size) free(z->B);
+    if (z->p) lose_s(z->p);
+    free(z);
+}
+
+extern void SN_set_current(struct SN_env * z, int size, const symbol * s)
+{
+    replace_s(z, 0, z->l, size, s);
+    z->c = 0;
+}
+


diff --git a/contrib/tsearch2/snowball/api.h b/contrib/tsearch2/snowball/api.h

new file mode 100644 (file)

index 0000000..3e8b6e1


--- /dev/null
+++ b/contrib/tsearch2/snowball/api.h
@@ -0,0 +1,27 @@
+
+typedef unsigned char symbol;
+
+/* Or replace 'char' above with 'short' for 16 bit characters.
+
+   More precisely, replace 'char' with whatever type guarantees the
+   character width you need. Note however that sizeof(symbol) should divide
+   HEAD, defined in header.h as 2*sizeof(int), without remainder, otherwise
+   there is an alignment problem. In the unlikely event of a problem here,
+   consult Martin Porter.
+
+*/
+
+struct SN_env {
+    symbol * p;
+    int c; int a; int l; int lb; int bra; int ket;
+    int S_size; int I_size; int B_size;
+    symbol * * S;
+    int * I;
+    symbol * B;
+};
+
+extern struct SN_env * SN_create_env(int S_size, int I_size, int B_size);
+extern void SN_close_env(struct SN_env * z);
+
+extern void SN_set_current(struct SN_env * z, int size, const symbol * s);
+


diff --git a/contrib/tsearch2/snowball/english_stem.c b/contrib/tsearch2/snowball/english_stem.c

new file mode 100644 (file)

index 0000000..6715c7c


--- /dev/null
+++ b/contrib/tsearch2/snowball/english_stem.c
@@ -0,0 +1,894 @@
+
+/* This file was generated automatically by the Snowball to ANSI C compiler */
+
+#include "header.h"
+
+extern int english_stem(struct SN_env * z);
+static int r_exception2(struct SN_env * z);
+static int r_exception1(struct SN_env * z);
+static int r_Step_5(struct SN_env * z);
+static int r_Step_4(struct SN_env * z);
+static int r_Step_3(struct SN_env * z);
+static int r_Step_2(struct SN_env * z);
+static int r_Step_1c(struct SN_env * z);
+static int r_Step_1b(struct SN_env * z);
+static int r_Step_1a(struct SN_env * z);
+static int r_R2(struct SN_env * z);
+static int r_R1(struct SN_env * z);
+static int r_shortv(struct SN_env * z);
+static int r_mark_regions(struct SN_env * z);
+static int r_postlude(struct SN_env * z);
+static int r_prelude(struct SN_env * z);
+
+extern struct SN_env * english_create_env(void);
+extern void english_close_env(struct SN_env * z);
+
+static symbol s_0_0[5] = { 'g', 'e', 'n', 'e', 'r' };
+
+static struct among a_0[1] =
+{
+/*  0 */ { 5, s_0_0, -1, -1, 0}
+};
+
+static symbol s_1_0[3] = { 'i', 'e', 'd' };
+static symbol s_1_1[1] = { 's' };
+static symbol s_1_2[3] = { 'i', 'e', 's' };
+static symbol s_1_3[4] = { 's', 's', 'e', 's' };
+static symbol s_1_4[2] = { 's', 's' };
+static symbol s_1_5[2] = { 'u', 's' };
+
+static struct among a_1[6] =
+{
+/*  0 */ { 3, s_1_0, -1, 2, 0},
+/*  1 */ { 1, s_1_1, -1, 3, 0},
+/*  2 */ { 3, s_1_2, 1, 2, 0},
+/*  3 */ { 4, s_1_3, 1, 1, 0},
+/*  4 */ { 2, s_1_4, 1, -1, 0},
+/*  5 */ { 2, s_1_5, 1, -1, 0}
+};
+
+static symbol s_2_1[2] = { 'b', 'b' };
+static symbol s_2_2[2] = { 'd', 'd' };
+static symbol s_2_3[2] = { 'f', 'f' };
+static symbol s_2_4[2] = { 'g', 'g' };
+static symbol s_2_5[2] = { 'b', 'l' };
+static symbol s_2_6[2] = { 'm', 'm' };
+static symbol s_2_7[2] = { 'n', 'n' };
+static symbol s_2_8[2] = { 'p', 'p' };
+static symbol s_2_9[2] = { 'r', 'r' };
+static symbol s_2_10[2] = { 'a', 't' };
+static symbol s_2_11[2] = { 't', 't' };
+static symbol s_2_12[2] = { 'i', 'z' };
+
+static struct among a_2[13] =
+{
+/*  0 */ { 0, 0, -1, 3, 0},
+/*  1 */ { 2, s_2_1, 0, 2, 0},
+/*  2 */ { 2, s_2_2, 0, 2, 0},
+/*  3 */ { 2, s_2_3, 0, 2, 0},
+/*  4 */ { 2, s_2_4, 0, 2, 0},
+/*  5 */ { 2, s_2_5, 0, 1, 0},
+/*  6 */ { 2, s_2_6, 0, 2, 0},
+/*  7 */ { 2, s_2_7, 0, 2, 0},
+/*  8 */ { 2, s_2_8, 0, 2, 0},
+/*  9 */ { 2, s_2_9, 0, 2, 0},
+/* 10 */ { 2, s_2_10, 0, 1, 0},
+/* 11 */ { 2, s_2_11, 0, 2, 0},
+/* 12 */ { 2, s_2_12, 0, 1, 0}
+};
+
+static symbol s_3_0[2] = { 'e', 'd' };
+static symbol s_3_1[3] = { 'e', 'e', 'd' };
+static symbol s_3_2[3] = { 'i', 'n', 'g' };
+static symbol s_3_3[4] = { 'e', 'd', 'l', 'y' };
+static symbol s_3_4[5] = { 'e', 'e', 'd', 'l', 'y' };
+static symbol s_3_5[5] = { 'i', 'n', 'g', 'l', 'y' };
+
+static struct among a_3[6] =
+{
+/*  0 */ { 2, s_3_0, -1, 2, 0},
+/*  1 */ { 3, s_3_1, 0, 1, 0},
+/*  2 */ { 3, s_3_2, -1, 2, 0},
+/*  3 */ { 4, s_3_3, -1, 2, 0},
+/*  4 */ { 5, s_3_4, 3, 1, 0},
+/*  5 */ { 5, s_3_5, -1, 2, 0}
+};
+
+static symbol s_4_0[4] = { 'a', 'n', 'c', 'i' };
+static symbol s_4_1[4] = { 'e', 'n', 'c', 'i' };
+static symbol s_4_2[3] = { 'o', 'g', 'i' };
+static symbol s_4_3[2] = { 'l', 'i' };
+static symbol s_4_4[3] = { 'b', 'l', 'i' };
+static symbol s_4_5[4] = { 'a', 'b', 'l', 'i' };
+static symbol s_4_6[4] = { 'a', 'l', 'l', 'i' };
+static symbol s_4_7[5] = { 'f', 'u', 'l', 'l', 'i' };
+static symbol s_4_8[6] = { 'l', 'e', 's', 's', 'l', 'i' };
+static symbol s_4_9[5] = { 'o', 'u', 's', 'l', 'i' };
+static symbol s_4_10[5] = { 'e', 'n', 't', 'l', 'i' };
+static symbol s_4_11[5] = { 'a', 'l', 'i', 't', 'i' };
+static symbol s_4_12[6] = { 'b', 'i', 'l', 'i', 't', 'i' };
+static symbol s_4_13[5] = { 'i', 'v', 'i', 't', 'i' };
+static symbol s_4_14[6] = { 't', 'i', 'o', 'n', 'a', 'l' };
+static symbol s_4_15[7] = { 'a', 't', 'i', 'o', 'n', 'a', 'l' };
+static symbol s_4_16[5] = { 'a', 'l', 'i', 's', 'm' };
+static symbol s_4_17[5] = { 'a', 't', 'i', 'o', 'n' };
+static symbol s_4_18[7] = { 'i', 'z', 'a', 't', 'i', 'o', 'n' };
+static symbol s_4_19[4] = { 'i', 'z', 'e', 'r' };
+static symbol s_4_20[4] = { 'a', 't', 'o', 'r' };
+static symbol s_4_21[7] = { 'i', 'v', 'e', 'n', 'e', 's', 's' };
+static symbol s_4_22[7] = { 'f', 'u', 'l', 'n', 'e', 's', 's' };
+static symbol s_4_23[7] = { 'o', 'u', 's', 'n', 'e', 's', 's' };
+
+static struct among a_4[24] =
+{
+/*  0 */ { 4, s_4_0, -1, 3, 0},
+/*  1 */ { 4, s_4_1, -1, 2, 0},
+/*  2 */ { 3, s_4_2, -1, 13, 0},
+/*  3 */ { 2, s_4_3, -1, 16, 0},
+/*  4 */ { 3, s_4_4, 3, 12, 0},
+/*  5 */ { 4, s_4_5, 4, 4, 0},
+/*  6 */ { 4, s_4_6, 3, 8, 0},
+/*  7 */ { 5, s_4_7, 3, 14, 0},
+/*  8 */ { 6, s_4_8, 3, 15, 0},
+/*  9 */ { 5, s_4_9, 3, 10, 0},
+/* 10 */ { 5, s_4_10, 3, 5, 0},
+/* 11 */ { 5, s_4_11, -1, 8, 0},
+/* 12 */ { 6, s_4_12, -1, 12, 0},
+/* 13 */ { 5, s_4_13, -1, 11, 0},
+/* 14 */ { 6, s_4_14, -1, 1, 0},
+/* 15 */ { 7, s_4_15, 14, 7, 0},
+/* 16 */ { 5, s_4_16, -1, 8, 0},
+/* 17 */ { 5, s_4_17, -1, 7, 0},
+/* 18 */ { 7, s_4_18, 17, 6, 0},
+/* 19 */ { 4, s_4_19, -1, 6, 0},
+/* 20 */ { 4, s_4_20, -1, 7, 0},
+/* 21 */ { 7, s_4_21, -1, 11, 0},
+/* 22 */ { 7, s_4_22, -1, 9, 0},
+/* 23 */ { 7, s_4_23, -1, 10, 0}
+};
+
+static symbol s_5_0[5] = { 'i', 'c', 'a', 't', 'e' };
+static symbol s_5_1[5] = { 'a', 't', 'i', 'v', 'e' };
+static symbol s_5_2[5] = { 'a', 'l', 'i', 'z', 'e' };
+static symbol s_5_3[5] = { 'i', 'c', 'i', 't', 'i' };
+static symbol s_5_4[4] = { 'i', 'c', 'a', 'l' };
+static symbol s_5_5[6] = { 't', 'i', 'o', 'n', 'a', 'l' };
+static symbol s_5_6[7] = { 'a', 't', 'i', 'o', 'n', 'a', 'l' };
+static symbol s_5_7[3] = { 'f', 'u', 'l' };
+static symbol s_5_8[4] = { 'n', 'e', 's', 's' };
+
+static struct among a_5[9] =
+{
+/*  0 */ { 5, s_5_0, -1, 4, 0},
+/*  1 */ { 5, s_5_1, -1, 6, 0},
+/*  2 */ { 5, s_5_2, -1, 3, 0},
+/*  3 */ { 5, s_5_3, -1, 4, 0},
+/*  4 */ { 4, s_5_4, -1, 4, 0},
+/*  5 */ { 6, s_5_5, -1, 1, 0},
+/*  6 */ { 7, s_5_6, 5, 2, 0},
+/*  7 */ { 3, s_5_7, -1, 5, 0},
+/*  8 */ { 4, s_5_8, -1, 5, 0}
+};
+
+static symbol s_6_0[2] = { 'i', 'c' };
+static symbol s_6_1[4] = { 'a', 'n', 'c', 'e' };
+static symbol s_6_2[4] = { 'e', 'n', 'c', 'e' };
+static symbol s_6_3[4] = { 'a', 'b', 'l', 'e' };
+static symbol s_6_4[4] = { 'i', 'b', 'l', 'e' };
+static symbol s_6_5[3] = { 'a', 't', 'e' };
+static symbol s_6_6[3] = { 'i', 'v', 'e' };
+static symbol s_6_7[3] = { 'i', 'z', 'e' };
+static symbol s_6_8[3] = { 'i', 't', 'i' };
+static symbol s_6_9[2] = { 'a', 'l' };
+static symbol s_6_10[3] = { 'i', 's', 'm' };
+static symbol s_6_11[3] = { 'i', 'o', 'n' };
+static symbol s_6_12[2] = { 'e', 'r' };
+static symbol s_6_13[3] = { 'o', 'u', 's' };
+static symbol s_6_14[3] = { 'a', 'n', 't' };
+static symbol s_6_15[3] = { 'e', 'n', 't' };
+static symbol s_6_16[4] = { 'm', 'e', 'n', 't' };
+static symbol s_6_17[5] = { 'e', 'm', 'e', 'n', 't' };
+
+static struct among a_6[18] =
+{
+/*  0 */ { 2, s_6_0, -1, 1, 0},
+/*  1 */ { 4, s_6_1, -1, 1, 0},
+/*  2 */ { 4, s_6_2, -1, 1, 0},
+/*  3 */ { 4, s_6_3, -1, 1, 0},
+/*  4 */ { 4, s_6_4, -1, 1, 0},
+/*  5 */ { 3, s_6_5, -1, 1, 0},
+/*  6 */ { 3, s_6_6, -1, 1, 0},
+/*  7 */ { 3, s_6_7, -1, 1, 0},
+/*  8 */ { 3, s_6_8, -1, 1, 0},
+/*  9 */ { 2, s_6_9, -1, 1, 0},
+/* 10 */ { 3, s_6_10, -1, 1, 0},
+/* 11 */ { 3, s_6_11, -1, 2, 0},
+/* 12 */ { 2, s_6_12, -1, 1, 0},
+/* 13 */ { 3, s_6_13, -1, 1, 0},
+/* 14 */ { 3, s_6_14, -1, 1, 0},
+/* 15 */ { 3, s_6_15, -1, 1, 0},
+/* 16 */ { 4, s_6_16, 15, 1, 0},
+/* 17 */ { 5, s_6_17, 16, 1, 0}
+};
+
+static symbol s_7_0[1] = { 'e' };
+static symbol s_7_1[1] = { 'l' };
+
+static struct among a_7[2] =
+{
+/*  0 */ { 1, s_7_0, -1, 1, 0},
+/*  1 */ { 1, s_7_1, -1, 2, 0}
+};
+
+static symbol s_8_0[7] = { 's', 'u', 'c', 'c', 'e', 'e', 'd' };
+static symbol s_8_1[7] = { 'p', 'r', 'o', 'c', 'e', 'e', 'd' };
+static symbol s_8_2[6] = { 'e', 'x', 'c', 'e', 'e', 'd' };
+static symbol s_8_3[7] = { 'c', 'a', 'n', 'n', 'i', 'n', 'g' };
+static symbol s_8_4[6] = { 'i', 'n', 'n', 'i', 'n', 'g' };
+static symbol s_8_5[7] = { 'e', 'a', 'r', 'r', 'i', 'n', 'g' };
+static symbol s_8_6[7] = { 'h', 'e', 'r', 'r', 'i', 'n', 'g' };
+static symbol s_8_7[6] = { 'o', 'u', 't', 'i', 'n', 'g' };
+
+static struct among a_8[8] =
+{
+/*  0 */ { 7, s_8_0, -1, -1, 0},
+/*  1 */ { 7, s_8_1, -1, -1, 0},
+/*  2 */ { 6, s_8_2, -1, -1, 0},
+/*  3 */ { 7, s_8_3, -1, -1, 0},
+/*  4 */ { 6, s_8_4, -1, -1, 0},
+/*  5 */ { 7, s_8_5, -1, -1, 0},
+/*  6 */ { 7, s_8_6, -1, -1, 0},
+/*  7 */ { 6, s_8_7, -1, -1, 0}
+};
+
+static symbol s_9_0[5] = { 'a', 'n', 'd', 'e', 's' };
+static symbol s_9_1[5] = { 'a', 't', 'l', 'a', 's' };
+static symbol s_9_2[4] = { 'b', 'i', 'a', 's' };
+static symbol s_9_3[6] = { 'c', 'o', 's', 'm', 'o', 's' };
+static symbol s_9_4[5] = { 'd', 'y', 'i', 'n', 'g' };
+static symbol s_9_5[5] = { 'e', 'a', 'r', 'l', 'y' };
+static symbol s_9_6[6] = { 'g', 'e', 'n', 't', 'l', 'y' };
+static symbol s_9_7[4] = { 'h', 'o', 'w', 'e' };
+static symbol s_9_8[4] = { 'i', 'd', 'l', 'y' };
+static symbol s_9_9[5] = { 'l', 'y', 'i', 'n', 'g' };
+static symbol s_9_10[4] = { 'n', 'e', 'w', 's' };
+static symbol s_9_11[4] = { 'o', 'n', 'l', 'y' };
+static symbol s_9_12[6] = { 's', 'i', 'n', 'g', 'l', 'y' };
+static symbol s_9_13[5] = { 's', 'k', 'i', 'e', 's' };
+static symbol s_9_14[4] = { 's', 'k', 'i', 's' };
+static symbol s_9_15[3] = { 's', 'k', 'y' };
+static symbol s_9_16[5] = { 't', 'y', 'i', 'n', 'g' };
+static symbol s_9_17[4] = { 'u', 'g', 'l', 'y' };
+
+static struct among a_9[18] =
+{
+/*  0 */ { 5, s_9_0, -1, -1, 0},
+/*  1 */ { 5, s_9_1, -1, -1, 0},
+/*  2 */ { 4, s_9_2, -1, -1, 0},
+/*  3 */ { 6, s_9_3, -1, -1, 0},
+/*  4 */ { 5, s_9_4, -1, 3, 0},
+/*  5 */ { 5, s_9_5, -1, 9, 0},
+/*  6 */ { 6, s_9_6, -1, 7, 0},
+/*  7 */ { 4, s_9_7, -1, -1, 0},
+/*  8 */ { 4, s_9_8, -1, 6, 0},
+/*  9 */ { 5, s_9_9, -1, 4, 0},
+/* 10 */ { 4, s_9_10, -1, -1, 0},
+/* 11 */ { 4, s_9_11, -1, 10, 0},
+/* 12 */ { 6, s_9_12, -1, 11, 0},
+/* 13 */ { 5, s_9_13, -1, 2, 0},
+/* 14 */ { 4, s_9_14, -1, 1, 0},
+/* 15 */ { 3, s_9_15, -1, -1, 0},
+/* 16 */ { 5, s_9_16, -1, 5, 0},
+/* 17 */ { 4, s_9_17, -1, 8, 0}
+};
+
+static unsigned char g_v[] = { 17, 65, 16, 1 };
+
+static unsigned char g_v_WXY[] = { 1, 17, 65, 208, 1 };
+
+static unsigned char g_valid_LI[] = { 55, 141, 2 };
+
+static symbol s_0[] = { 'y' };
+static symbol s_1[] = { 'Y' };
+static symbol s_2[] = { 'y' };
+static symbol s_3[] = { 'Y' };
+static symbol s_4[] = { 's', 's' };
+static symbol s_5[] = { 'i', 'e' };
+static symbol s_6[] = { 'i' };
+static symbol s_7[] = { 'e', 'e' };
+static symbol s_8[] = { 'e' };
+static symbol s_9[] = { 'e' };
+static symbol s_10[] = { 'y' };
+static symbol s_11[] = { 'Y' };
+static symbol s_12[] = { 'i' };
+static symbol s_13[] = { 't', 'i', 'o', 'n' };
+static symbol s_14[] = { 'e', 'n', 'c', 'e' };
+static symbol s_15[] = { 'a', 'n', 'c', 'e' };
+static symbol s_16[] = { 'a', 'b', 'l', 'e' };
+static symbol s_17[] = { 'e', 'n', 't' };
+static symbol s_18[] = { 'i', 'z', 'e' };
+static symbol s_19[] = { 'a', 't', 'e' };
+static symbol s_20[] = { 'a', 'l' };
+static symbol s_21[] = { 'f', 'u', 'l' };
+static symbol s_22[] = { 'o', 'u', 's' };
+static symbol s_23[] = { 'i', 'v', 'e' };
+static symbol s_24[] = { 'b', 'l', 'e' };
+static symbol s_25[] = { 'l' };
+static symbol s_26[] = { 'o', 'g' };
+static symbol s_27[] = { 'f', 'u', 'l' };
+static symbol s_28[] = { 'l', 'e', 's', 's' };
+static symbol s_29[] = { 't', 'i', 'o', 'n' };
+static symbol s_30[] = { 'a', 't', 'e' };
+static symbol s_31[] = { 'a', 'l' };
+static symbol s_32[] = { 'i', 'c' };
+static symbol s_33[] = { 's' };
+static symbol s_34[] = { 't' };
+static symbol s_35[] = { 'l' };
+static symbol s_36[] = { 's', 'k', 'i' };
+static symbol s_37[] = { 's', 'k', 'y' };
+static symbol s_38[] = { 'd', 'i', 'e' };
+static symbol s_39[] = { 'l', 'i', 'e' };
+static symbol s_40[] = { 't', 'i', 'e' };
+static symbol s_41[] = { 'i', 'd', 'l' };
+static symbol s_42[] = { 'g', 'e', 'n', 't', 'l' };
+static symbol s_43[] = { 'u', 'g', 'l', 'i' };
+static symbol s_44[] = { 'e', 'a', 'r', 'l', 'i' };
+static symbol s_45[] = { 'o', 'n', 'l', 'i' };
+static symbol s_46[] = { 's', 'i', 'n', 'g', 'l' };
+static symbol s_47[] = { 'Y' };
+static symbol s_48[] = { 'y' };
+
+static int r_prelude(struct SN_env * z) {
+    z->B[0] = 0; /* unset Y_found, line 24 */
+    {   int c = z->c; /* do, line 25 */
+        z->bra = z->c; /* [, line 25 */
+        if (!(eq_s(z, 1, s_0))) goto lab0;
+        z->ket = z->c; /* ], line 25 */
+        if (!(in_grouping(z, g_v, 97, 121))) goto lab0;
+        slice_from_s(z, 1, s_1); /* <-, line 25 */
+        z->B[0] = 1; /* set Y_found, line 25 */
+    lab0:
+        z->c = c;
+    }
+    {   int c = z->c; /* do, line 26 */
+        while(1) { /* repeat, line 26 */
+            int c = z->c;
+            while(1) { /* goto, line 26 */
+                int c = z->c;
+                if (!(in_grouping(z, g_v, 97, 121))) goto lab3;
+                z->bra = z->c; /* [, line 26 */
+                if (!(eq_s(z, 1, s_2))) goto lab3;
+                z->ket = z->c; /* ], line 26 */
+                z->c = c;
+                break;
+            lab3:
+                z->c = c;
+                if (z->c >= z->l) goto lab2;
+                z->c++;
+            }
+            slice_from_s(z, 1, s_3); /* <-, line 26 */
+            z->B[0] = 1; /* set Y_found, line 26 */
+            continue;
+        lab2:
+            z->c = c;
+            break;
+        }
+    lab1:
+        z->c = c;
+    }
+    return 1;
+}
+
+static int r_mark_regions(struct SN_env * z) {
+    z->I[0] = z->l;
+    z->I[1] = z->l;
+    {   int c = z->c; /* do, line 32 */
+        {   int c = z->c; /* or, line 36 */
+            if (!(find_among(z, a_0, 1))) goto lab2; /* among, line 33 */
+            goto lab1;
+        lab2:
+            z->c = c;
+            while(1) { /* gopast, line 36 */
+                if (!(in_grouping(z, g_v, 97, 121))) goto lab3;
+                break;
+            lab3:
+                if (z->c >= z->l) goto lab0;
+                z->c++;
+            }
+            while(1) { /* gopast, line 36 */
+                if (!(out_grouping(z, g_v, 97, 121))) goto lab4;
+                break;
+            lab4:
+                if (z->c >= z->l) goto lab0;
+                z->c++;
+            }
+        }
+    lab1:
+        z->I[0] = z->c; /* setmark p1, line 37 */
+        while(1) { /* gopast, line 38 */
+            if (!(in_grouping(z, g_v, 97, 121))) goto lab5;
+            break;
+        lab5:
+            if (z->c >= z->l) goto lab0;
+            z->c++;
+        }
+        while(1) { /* gopast, line 38 */
+            if (!(out_grouping(z, g_v, 97, 121))) goto lab6;
+            break;
+        lab6:
+            if (z->c >= z->l) goto lab0;
+            z->c++;
+        }
+        z->I[1] = z->c; /* setmark p2, line 38 */
+    lab0:
+        z->c = c;
+    }
+    return 1;
+}
+
+static int r_shortv(struct SN_env * z) {
+    {   int m = z->l - z->c; /* or, line 46 */
+        if (!(out_grouping_b(z, g_v_WXY, 89, 121))) goto lab1;
+        if (!(in_grouping_b(z, g_v, 97, 121))) goto lab1;
+        if (!(out_grouping_b(z, g_v, 97, 121))) goto lab1;
+        goto lab0;
+    lab1:
+        z->c = z->l - m;
+        if (!(out_grouping_b(z, g_v, 97, 121))) return 0;
+        if (!(in_grouping_b(z, g_v, 97, 121))) return 0;
+        if (z->c > z->lb) return 0; /* atlimit, line 47 */
+    }
+lab0:
+    return 1;
+}
+
+static int r_R1(struct SN_env * z) {
+    if (!(z->I[0] <= z->c)) return 0;
+    return 1;
+}
+
+static int r_R2(struct SN_env * z) {
+    if (!(z->I[1] <= z->c)) return 0;
+    return 1;
+}
+
+static int r_Step_1a(struct SN_env * z) {
+    int among_var;
+    z->ket = z->c; /* [, line 54 */
+    among_var = find_among_b(z, a_1, 6); /* substring, line 54 */
+    if (!(among_var)) return 0;
+    z->bra = z->c; /* ], line 54 */
+    switch(among_var) {
+        case 0: return 0;
+        case 1:
+            slice_from_s(z, 2, s_4); /* <-, line 55 */
+            break;
+        case 2:
+            {   int m = z->l - z->c; /* or, line 57 */
+                if (z->c <= z->lb) goto lab1;
+                z->c--; /* next, line 57 */
+                if (z->c > z->lb) goto lab1; /* atlimit, line 57 */
+                slice_from_s(z, 2, s_5); /* <-, line 57 */
+                goto lab0;
+            lab1:
+                z->c = z->l - m;
+                slice_from_s(z, 1, s_6); /* <-, line 57 */
+            }
+        lab0:
+            break;
+        case 3:
+            if (z->c <= z->lb) return 0;
+            z->c--; /* next, line 58 */
+            while(1) { /* gopast, line 58 */
+                if (!(in_grouping_b(z, g_v, 97, 121))) goto lab2;
+                break;
+            lab2:
+                if (z->c <= z->lb) return 0;
+                z->c--;
+            }
+            slice_del(z); /* delete, line 58 */
+            break;
+    }
+    return 1;
+}
+
+static int r_Step_1b(struct SN_env * z) {
+    int among_var;
+    z->ket = z->c; /* [, line 64 */
+    among_var = find_among_b(z, a_3, 6); /* substring, line 64 */
+    if (!(among_var)) return 0;
+    z->bra = z->c; /* ], line 64 */
+    switch(among_var) {
+        case 0: return 0;
+        case 1:
+            if (!r_R1(z)) return 0; /* call R1, line 66 */
+            slice_from_s(z, 2, s_7); /* <-, line 66 */
+            break;
+        case 2:
+            {   int m_test = z->l - z->c; /* test, line 69 */
+                while(1) { /* gopast, line 69 */
+                    if (!(in_grouping_b(z, g_v, 97, 121))) goto lab0;
+                    break;
+                lab0:
+                    if (z->c <= z->lb) return 0;
+                    z->c--;
+                }
+                z->c = z->l - m_test;
+            }
+            slice_del(z); /* delete, line 69 */
+            {   int m_test = z->l - z->c; /* test, line 70 */
+                among_var = find_among_b(z, a_2, 13); /* substring, line 70 */
+                if (!(among_var)) return 0;
+                z->c = z->l - m_test;
+            }
+            switch(among_var) {
+                case 0: return 0;
+                case 1:
+                    {   int c = z->c;
+                        insert_s(z, z->c, z->c, 1, s_8); /* <+, line 72 */
+                        z->c = c;
+                    }
+                    break;
+                case 2:
+                    z->ket = z->c; /* [, line 75 */
+                    if (z->c <= z->lb) return 0;
+                    z->c--; /* next, line 75 */
+                    z->bra = z->c; /* ], line 75 */
+                    slice_del(z); /* delete, line 75 */
+                    break;
+                case 3:
+                    if (z->c != z->I[0]) return 0; /* atmark, line 76 */
+                    {   int m_test = z->l - z->c; /* test, line 76 */
+                        if (!r_shortv(z)) return 0; /* call shortv, line 76 */
+                        z->c = z->l - m_test;
+                    }
+                    {   int c = z->c;
+                        insert_s(z, z->c, z->c, 1, s_9); /* <+, line 76 */
+                        z->c = c;
+                    }
+                    break;
+            }
+            break;
+    }
+    return 1;
+}
+
+static int r_Step_1c(struct SN_env * z) {
+    z->ket = z->c; /* [, line 83 */
+    {   int m = z->l - z->c; /* or, line 83 */
+        if (!(eq_s_b(z, 1, s_10))) goto lab1;
+        goto lab0;
+    lab1:
+        z->c = z->l - m;
+        if (!(eq_s_b(z, 1, s_11))) return 0;
+    }
+lab0:
+    z->bra = z->c; /* ], line 83 */
+    if (!(out_grouping_b(z, g_v, 97, 121))) return 0;
+    {   int m = z->l - z->c; /* not, line 84 */
+        if (z->c > z->lb) goto lab2; /* atlimit, line 84 */
+        return 0;
+    lab2:
+        z->c = z->l - m;
+    }
+    slice_from_s(z, 1, s_12); /* <-, line 85 */
+    return 1;
+}
+
+static int r_Step_2(struct SN_env * z) {
+    int among_var;
+    z->ket = z->c; /* [, line 89 */
+    among_var = find_among_b(z, a_4, 24); /* substring, line 89 */
+    if (!(among_var)) return 0;
+    z->bra = z->c; /* ], line 89 */
+    if (!r_R1(z)) return 0; /* call R1, line 89 */
+    switch(among_var) {
+        case 0: return 0;
+        case 1:
+            slice_from_s(z, 4, s_13); /* <-, line 90 */
+            break;
+        case 2:
+            slice_from_s(z, 4, s_14); /* <-, line 91 */
+            break;
+        case 3:
+            slice_from_s(z, 4, s_15); /* <-, line 92 */
+            break;
+        case 4:
+            slice_from_s(z, 4, s_16); /* <-, line 93 */
+            break;
+        case 5:
+            slice_from_s(z, 3, s_17); /* <-, line 94 */
+            break;
+        case 6:
+            slice_from_s(z, 3, s_18); /* <-, line 96 */
+            break;
+        case 7:
+            slice_from_s(z, 3, s_19); /* <-, line 98 */
+            break;
+        case 8:
+            slice_from_s(z, 2, s_20); /* <-, line 100 */
+            break;
+        case 9:
+            slice_from_s(z, 3, s_21); /* <-, line 101 */
+            break;
+        case 10:
+            slice_from_s(z, 3, s_22); /* <-, line 103 */
+            break;
+        case 11:
+            slice_from_s(z, 3, s_23); /* <-, line 105 */
+            break;
+        case 12:
+            slice_from_s(z, 3, s_24); /* <-, line 107 */
+            break;
+        case 13:
+            if (!(eq_s_b(z, 1, s_25))) return 0;
+            slice_from_s(z, 2, s_26); /* <-, line 108 */
+            break;
+        case 14:
+            slice_from_s(z, 3, s_27); /* <-, line 109 */
+            break;
+        case 15:
+            slice_from_s(z, 4, s_28); /* <-, line 110 */
+            break;
+        case 16:
+            if (!(in_grouping_b(z, g_valid_LI, 99, 116))) return 0;
+            slice_del(z); /* delete, line 111 */
+            break;
+    }
+    return 1;
+}
+
+static int r_Step_3(struct SN_env * z) {
+    int among_var;
+    z->ket = z->c; /* [, line 116 */
+    among_var = find_among_b(z, a_5, 9); /* substring, line 116 */
+    if (!(among_var)) return 0;
+    z->bra = z->c; /* ], line 116 */
+    if (!r_R1(z)) return 0; /* call R1, line 116 */
+    switch(among_var) {
+        case 0: return 0;
+        case 1:
+            slice_from_s(z, 4, s_29); /* <-, line 117 */
+            break;
+        case 2:
+            slice_from_s(z, 3, s_30); /* <-, line 118 */
+            break;
+        case 3:
+            slice_from_s(z, 2, s_31); /* <-, line 119 */
+            break;
+        case 4:
+            slice_from_s(z, 2, s_32); /* <-, line 121 */
+            break;
+        case 5:
+            slice_del(z); /* delete, line 123 */
+            break;
+        case 6:
+            if (!r_R2(z)) return 0; /* call R2, line 125 */
+            slice_del(z); /* delete, line 125 */
+            break;
+    }
+    return 1;
+}
+
+static int r_Step_4(struct SN_env * z) {
+    int among_var;
+    z->ket = z->c; /* [, line 130 */
+    among_var = find_among_b(z, a_6, 18); /* substring, line 130 */
+    if (!(among_var)) return 0;
+    z->bra = z->c; /* ], line 130 */
+    if (!r_R2(z)) return 0; /* call R2, line 130 */
+    switch(among_var) {
+        case 0: return 0;
+        case 1:
+            slice_del(z); /* delete, line 133 */
+            break;
+        case 2:
+            {   int m = z->l - z->c; /* or, line 134 */
+                if (!(eq_s_b(z, 1, s_33))) goto lab1;
+                goto lab0;
+            lab1:
+                z->c = z->l - m;
+                if (!(eq_s_b(z, 1, s_34))) return 0;
+            }
+        lab0:
+            slice_del(z); /* delete, line 134 */
+            break;
+    }
+    return 1;
+}
+
+static int r_Step_5(struct SN_env * z) {
+    int among_var;
+    z->ket = z->c; /* [, line 139 */
+    among_var = find_among_b(z, a_7, 2); /* substring, line 139 */
+    if (!(among_var)) return 0;
+    z->bra = z->c; /* ], line 139 */
+    switch(among_var) {
+        case 0: return 0;
+        case 1:
+            {   int m = z->l - z->c; /* or, line 140 */
+                if (!r_R2(z)) goto lab1; /* call R2, line 140 */
+                goto lab0;
+            lab1:
+                z->c = z->l - m;
+                if (!r_R1(z)) return 0; /* call R1, line 140 */
+                {   int m = z->l - z->c; /* not, line 140 */
+                    if (!r_shortv(z)) goto lab2; /* call shortv, line 140 */
+                    return 0;
+                lab2:
+                    z->c = z->l - m;
+                }
+            }
+        lab0:
+            slice_del(z); /* delete, line 140 */
+            break;
+        case 2:
+            if (!r_R2(z)) return 0; /* call R2, line 141 */
+            if (!(eq_s_b(z, 1, s_35))) return 0;
+            slice_del(z); /* delete, line 141 */
+            break;
+    }
+    return 1;
+}
+
+static int r_exception2(struct SN_env * z) {
+    z->ket = z->c; /* [, line 147 */
+    if (!(find_among_b(z, a_8, 8))) return 0; /* substring, line 147 */
+    z->bra = z->c; /* ], line 147 */
+    if (z->c > z->lb) return 0; /* atlimit, line 147 */
+    return 1;
+}
+
+static int r_exception1(struct SN_env * z) {
+    int among_var;
+    z->bra = z->c; /* [, line 159 */
+    among_var = find_among(z, a_9, 18); /* substring, line 159 */
+    if (!(among_var)) return 0;
+    z->ket = z->c; /* ], line 159 */
+    if (z->c < z->l) return 0; /* atlimit, line 159 */
+    switch(among_var) {
+        case 0: return 0;
+        case 1:
+            slice_from_s(z, 3, s_36); /* <-, line 163 */
+            break;
+        case 2:
+            slice_from_s(z, 3, s_37); /* <-, line 164 */
+            break;
+        case 3:
+            slice_from_s(z, 3, s_38); /* <-, line 165 */
+            break;
+        case 4:
+            slice_from_s(z, 3, s_39); /* <-, line 166 */
+            break;
+        case 5:
+            slice_from_s(z, 3, s_40); /* <-, line 167 */
+            break;
+        case 6:
+            slice_from_s(z, 3, s_41); /* <-, line 171 */
+            break;
+        case 7:
+            slice_from_s(z, 5, s_42); /* <-, line 172 */
+            break;
+        case 8:
+            slice_from_s(z, 4, s_43); /* <-, line 173 */
+            break;
+        case 9:
+            slice_from_s(z, 5, s_44); /* <-, line 174 */
+            break;
+        case 10:
+            slice_from_s(z, 4, s_45); /* <-, line 175 */
+            break;
+        case 11:
+            slice_from_s(z, 5, s_46); /* <-, line 176 */
+            break;
+    }
+    return 1;
+}
+
+static int r_postlude(struct SN_env * z) {
+    if (!(z->B[0])) return 0; /* Boolean test Y_found, line 192 */
+    while(1) { /* repeat, line 192 */
+        int c = z->c;
+        while(1) { /* goto, line 192 */
+            int c = z->c;
+            z->bra = z->c; /* [, line 192 */
+            if (!(eq_s(z, 1, s_47))) goto lab1;
+            z->ket = z->c; /* ], line 192 */
+            z->c = c;
+            break;
+        lab1:
+            z->c = c;
+            if (z->c >= z->l) goto lab0;
+            z->c++;
+        }
+        slice_from_s(z, 1, s_48); /* <-, line 192 */
+        continue;
+    lab0:
+        z->c = c;
+        break;
+    }
+    return 1;
+}
+
+extern int english_stem(struct SN_env * z) {
+    {   int c = z->c; /* or, line 196 */
+        if (!r_exception1(z)) goto lab1; /* call exception1, line 196 */
+        goto lab0;
+    lab1:
+        z->c = c;
+        {   int c_test = z->c; /* test, line 198 */
+            {   int c = z->c + 3;
+                if (0 > c || c > z->l) return 0;
+                z->c = c; /* hop, line 198 */
+            }
+            z->c = c_test;
+        }
+        {   int c = z->c; /* do, line 199 */
+            if (!r_prelude(z)) goto lab2; /* call prelude, line 199 */
+        lab2:
+            z->c = c;
+        }
+        {   int c = z->c; /* do, line 200 */
+            if (!r_mark_regions(z)) goto lab3; /* call mark_regions, line 200 */
+        lab3:
+            z->c = c;
+        }
+        z->lb = z->c; z->c = z->l; /* backwards, line 201 */
+
+        {   int m = z->l - z->c; /* do, line 203 */
+            if (!r_Step_1a(z)) goto lab4; /* call Step_1a, line 203 */
+        lab4:
+            z->c = z->l - m;
+        }
+        {   int m = z->l - z->c; /* or, line 205 */
+            if (!r_exception2(z)) goto lab6; /* call exception2, line 205 */
+            goto lab5;
+        lab6:
+            z->c = z->l - m;
+            {   int m = z->l - z->c; /* do, line 207 */
+                if (!r_Step_1b(z)) goto lab7; /* call Step_1b, line 207 */
+            lab7:
+                z->c = z->l - m;
+            }
+            {   int m = z->l - z->c; /* do, line 208 */
+                if (!r_Step_1c(z)) goto lab8; /* call Step_1c, line 208 */
+            lab8:
+                z->c = z->l - m;
+            }
+            {   int m = z->l - z->c; /* do, line 210 */
+                if (!r_Step_2(z)) goto lab9; /* call Step_2, line 210 */
+            lab9:
+                z->c = z->l - m;
+            }
+            {   int m = z->l - z->c; /* do, line 211 */
+                if (!r_Step_3(z)) goto lab10; /* call Step_3, line 211 */
+            lab10:
+                z->c = z->l - m;
+            }
+            {   int m = z->l - z->c; /* do, line 212 */
+                if (!r_Step_4(z)) goto lab11; /* call Step_4, line 212 */
+            lab11:
+                z->c = z->l - m;
+            }
+            {   int m = z->l - z->c; /* do, line 214 */
+                if (!r_Step_5(z)) goto lab12; /* call Step_5, line 214 */
+            lab12:
+                z->c = z->l - m;
+            }
+        }
+    lab5:
+        z->c = z->lb;
+        {   int c = z->c; /* do, line 217 */
+            if (!r_postlude(z)) goto lab13; /* call postlude, line 217 */
+        lab13:
+            z->c = c;
+        }
+    }
+lab0:
+    return 1;
+}
+
+extern struct SN_env * english_create_env(void) { return SN_create_env(0, 2, 1); }
+
+extern void english_close_env(struct SN_env * z) { SN_close_env(z); }
+


diff --git a/contrib/tsearch2/snowball/english_stem.h b/contrib/tsearch2/snowball/english_stem.h

new file mode 100644 (file)

index 0000000..bfefcd5


--- /dev/null
+++ b/contrib/tsearch2/snowball/english_stem.h
@@ -0,0 +1,8 @@
+
+/* This file was generated automatically by the Snowball to ANSI C compiler */
+
+extern struct SN_env * english_create_env(void);
+extern void english_close_env(struct SN_env * z);
+
+extern int english_stem(struct SN_env * z);
+


diff --git a/contrib/tsearch2/snowball/header.h b/contrib/tsearch2/snowball/header.h

new file mode 100644 (file)

index 0000000..aaec3ae


--- /dev/null
+++ b/contrib/tsearch2/snowball/header.h
@@ -0,0 +1,57 @@
+
+#include 
+
+#include "api.h"
+
+#define MAXINT INT_MAX
+#define MININT INT_MIN
+
+#define HEAD 2*sizeof(int)
+
+#define SIZE(p)        ((int *)(p))[-1]
+#define SET_SIZE(p, n) ((int *)(p))[-1] = n
+#define CAPACITY(p)    ((int *)(p))[-2]
+
+struct among
+{   int s_size;     /* number of chars in string */
+    symbol * s;       /* search string */
+    int substring_i;/* index to longest matching substring */
+    int result;     /* result of the lookup */
+    int (* function)(struct SN_env *);
+};
+
+extern symbol * create_s(void);
+extern void lose_s(symbol * p);
+
+extern int in_grouping(struct SN_env * z, unsigned char * s, int min, int max);
+extern int in_grouping_b(struct SN_env * z, unsigned char * s, int min, int max);
+extern int out_grouping(struct SN_env * z, unsigned char * s, int min, int max);
+extern int out_grouping_b(struct SN_env * z, unsigned char * s, int min, int max);
+
+extern int in_range(struct SN_env * z, int min, int max);
+extern int in_range_b(struct SN_env * z, int min, int max);
+extern int out_range(struct SN_env * z, int min, int max);
+extern int out_range_b(struct SN_env * z, int min, int max);
+
+extern int eq_s(struct SN_env * z, int s_size, symbol * s);
+extern int eq_s_b(struct SN_env * z, int s_size, symbol * s);
+extern int eq_v(struct SN_env * z, symbol * p);
+extern int eq_v_b(struct SN_env * z, symbol * p);
+
+extern int find_among(struct SN_env * z, struct among * v, int v_size);
+extern int find_among_b(struct SN_env * z, struct among * v, int v_size);
+
+extern symbol * increase_size(symbol * p, int n);
+extern int replace_s(struct SN_env * z, int c_bra, int c_ket, int s_size, const symbol * s);
+extern void slice_from_s(struct SN_env * z, int s_size, symbol * s);
+extern void slice_from_v(struct SN_env * z, symbol * p);
+extern void slice_del(struct SN_env * z);
+
+extern void insert_s(struct SN_env * z, int bra, int ket, int s_size, symbol * s);
+extern void insert_v(struct SN_env * z, int bra, int ket, symbol * p);
+
+extern symbol * slice_to(struct SN_env * z, symbol * p);
+extern symbol * assign_to(struct SN_env * z, symbol * p);
+
+extern void debug(struct SN_env * z, int number, int line_count);
+


diff --git a/contrib/tsearch2/snowball/russian_stem.c b/contrib/tsearch2/snowball/russian_stem.c

new file mode 100644 (file)

index 0000000..14fd491


--- /dev/null
+++ b/contrib/tsearch2/snowball/russian_stem.c
@@ -0,0 +1,626 @@
+
+/* This file was generated automatically by the Snowball to ANSI C compiler */
+
+#include "header.h"
+
+extern int russian_stem(struct SN_env * z);
+static int r_tidy_up(struct SN_env * z);
+static int r_derivational(struct SN_env * z);
+static int r_noun(struct SN_env * z);
+static int r_verb(struct SN_env * z);
+static int r_reflexive(struct SN_env * z);
+static int r_adjectival(struct SN_env * z);
+static int r_adjective(struct SN_env * z);
+static int r_perfective_gerund(struct SN_env * z);
+static int r_R2(struct SN_env * z);
+static int r_mark_regions(struct SN_env * z);
+
+extern struct SN_env * russian_create_env(void);
+extern void russian_close_env(struct SN_env * z);
+
+static symbol s_0_0[3] = { 215, 219, 201 };
+static symbol s_0_1[4] = { 201, 215, 219, 201 };
+static symbol s_0_2[4] = { 217, 215, 219, 201 };
+static symbol s_0_3[1] = { 215 };
+static symbol s_0_4[2] = { 201, 215 };
+static symbol s_0_5[2] = { 217, 215 };
+static symbol s_0_6[5] = { 215, 219, 201, 211, 216 };
+static symbol s_0_7[6] = { 201, 215, 219, 201, 211, 216 };
+static symbol s_0_8[6] = { 217, 215, 219, 201, 211, 216 };
+
+static struct among a_0[9] =
+{
+/*  0 */ { 3, s_0_0, -1, 1, 0},
+/*  1 */ { 4, s_0_1, 0, 2, 0},
+/*  2 */ { 4, s_0_2, 0, 2, 0},
+/*  3 */ { 1, s_0_3, -1, 1, 0},
+/*  4 */ { 2, s_0_4, 3, 2, 0},
+/*  5 */ { 2, s_0_5, 3, 2, 0},
+/*  6 */ { 5, s_0_6, -1, 1, 0},
+/*  7 */ { 6, s_0_7, 6, 2, 0},
+/*  8 */ { 6, s_0_8, 6, 2, 0}
+};
+
+static symbol s_1_0[2] = { 192, 192 };
+static symbol s_1_1[2] = { 197, 192 };
+static symbol s_1_2[2] = { 207, 192 };
+static symbol s_1_3[2] = { 213, 192 };
+static symbol s_1_4[2] = { 197, 197 };
+static symbol s_1_5[2] = { 201, 197 };
+static symbol s_1_6[2] = { 207, 197 };
+static symbol s_1_7[2] = { 217, 197 };
+static symbol s_1_8[2] = { 201, 200 };
+static symbol s_1_9[2] = { 217, 200 };
+static symbol s_1_10[3] = { 201, 205, 201 };
+static symbol s_1_11[3] = { 217, 205, 201 };
+static symbol s_1_12[2] = { 197, 202 };
+static symbol s_1_13[2] = { 201, 202 };
+static symbol s_1_14[2] = { 207, 202 };
+static symbol s_1_15[2] = { 217, 202 };
+static symbol s_1_16[2] = { 197, 205 };
+static symbol s_1_17[2] = { 201, 205 };
+static symbol s_1_18[2] = { 207, 205 };
+static symbol s_1_19[2] = { 217, 205 };
+static symbol s_1_20[3] = { 197, 199, 207 };
+static symbol s_1_21[3] = { 207, 199, 207 };
+static symbol s_1_22[2] = { 193, 209 };
+static symbol s_1_23[2] = { 209, 209 };
+static symbol s_1_24[3] = { 197, 205, 213 };
+static symbol s_1_25[3] = { 207, 205, 213 };
+
+static struct among a_1[26] =
+{
+/*  0 */ { 2, s_1_0, -1, 1, 0},
+/*  1 */ { 2, s_1_1, -1, 1, 0},
+/*  2 */ { 2, s_1_2, -1, 1, 0},
+/*  3 */ { 2, s_1_3, -1, 1, 0},
+/*  4 */ { 2, s_1_4, -1, 1, 0},
+/*  5 */ { 2, s_1_5, -1, 1, 0},
+/*  6 */ { 2, s_1_6, -1, 1, 0},
+/*  7 */ { 2, s_1_7, -1, 1, 0},
+/*  8 */ { 2, s_1_8, -1, 1, 0},
+/*  9 */ { 2, s_1_9, -1, 1, 0},
+/* 10 */ { 3, s_1_10, -1, 1, 0},
+/* 11 */ { 3, s_1_11, -1, 1, 0},
+/* 12 */ { 2, s_1_12, -1, 1, 0},
+/* 13 */ { 2, s_1_13, -1, 1, 0},
+/* 14 */ { 2, s_1_14, -1, 1, 0},
+/* 15 */ { 2, s_1_15, -1, 1, 0},
+/* 16 */ { 2, s_1_16, -1, 1, 0},
+/* 17 */ { 2, s_1_17, -1, 1, 0},
+/* 18 */ { 2, s_1_18, -1, 1, 0},
+/* 19 */ { 2, s_1_19, -1, 1, 0},
+/* 20 */ { 3, s_1_20, -1, 1, 0},
+/* 21 */ { 3, s_1_21, -1, 1, 0},
+/* 22 */ { 2, s_1_22, -1, 1, 0},
+/* 23 */ { 2, s_1_23, -1, 1, 0},
+/* 24 */ { 3, s_1_24, -1, 1, 0},
+/* 25 */ { 3, s_1_25, -1, 1, 0}
+};
+
+static symbol s_2_0[2] = { 197, 205 };
+static symbol s_2_1[2] = { 206, 206 };
+static symbol s_2_2[2] = { 215, 219 };
+static symbol s_2_3[3] = { 201, 215, 219 };
+static symbol s_2_4[3] = { 217, 215, 219 };
+static symbol s_2_5[1] = { 221 };
+static symbol s_2_6[2] = { 192, 221 };
+static symbol s_2_7[3] = { 213, 192, 221 };
+
+static struct among a_2[8] =
+{
+/*  0 */ { 2, s_2_0, -1, 1, 0},
+/*  1 */ { 2, s_2_1, -1, 1, 0},
+/*  2 */ { 2, s_2_2, -1, 1, 0},
+/*  3 */ { 3, s_2_3, 2, 2, 0},
+/*  4 */ { 3, s_2_4, 2, 2, 0},
+/*  5 */ { 1, s_2_5, -1, 1, 0},
+/*  6 */ { 2, s_2_6, 5, 1, 0},
+/*  7 */ { 3, s_2_7, 6, 2, 0}
+};
+
+static symbol s_3_0[2] = { 211, 209 };
+static symbol s_3_1[2] = { 211, 216 };
+
+static struct among a_3[2] =
+{
+/*  0 */ { 2, s_3_0, -1, 1, 0},
+/*  1 */ { 2, s_3_1, -1, 1, 0}
+};
+
+static symbol s_4_0[1] = { 192 };
+static symbol s_4_1[2] = { 213, 192 };
+static symbol s_4_2[2] = { 204, 193 };
+static symbol s_4_3[3] = { 201, 204, 193 };
+static symbol s_4_4[3] = { 217, 204, 193 };
+static symbol s_4_5[2] = { 206, 193 };
+static symbol s_4_6[3] = { 197, 206, 193 };
+static symbol s_4_7[3] = { 197, 212, 197 };
+static symbol s_4_8[3] = { 201, 212, 197 };
+static symbol s_4_9[3] = { 202, 212, 197 };
+static symbol s_4_10[4] = { 197, 202, 212, 197 };
+static symbol s_4_11[4] = { 213, 202, 212, 197 };
+static symbol s_4_12[2] = { 204, 201 };
+static symbol s_4_13[3] = { 201, 204, 201 };
+static symbol s_4_14[3] = { 217, 204, 201 };
+static symbol s_4_15[1] = { 202 };
+static symbol s_4_16[2] = { 197, 202 };
+static symbol s_4_17[2] = { 213, 202 };
+static symbol s_4_18[1] = { 204 };
+static symbol s_4_19[2] = { 201, 204 };
+static symbol s_4_20[2] = { 217, 204 };
+static symbol s_4_21[2] = { 197, 205 };
+static symbol s_4_22[2] = { 201, 205 };
+static symbol s_4_23[2] = { 217, 205 };
+static symbol s_4_24[1] = { 206 };
+static symbol s_4_25[2] = { 197, 206 };
+static symbol s_4_26[2] = { 204, 207 };
+static symbol s_4_27[3] = { 201, 204, 207 };
+static symbol s_4_28[3] = { 217, 204, 207 };
+static symbol s_4_29[2] = { 206, 207 };
+static symbol s_4_30[3] = { 197, 206, 207 };
+static symbol s_4_31[3] = { 206, 206, 207 };
+static symbol s_4_32[2] = { 192, 212 };
+static symbol s_4_33[3] = { 213, 192, 212 };
+static symbol s_4_34[2] = { 197, 212 };
+static symbol s_4_35[3] = { 213, 197, 212 };
+static symbol s_4_36[2] = { 201, 212 };
+static symbol s_4_37[2] = { 209, 212 };
+static symbol s_4_38[2] = { 217, 212 };
+static symbol s_4_39[2] = { 212, 216 };
+static symbol s_4_40[3] = { 201, 212, 216 };
+static symbol s_4_41[3] = { 217, 212, 216 };
+static symbol s_4_42[3] = { 197, 219, 216 };
+static symbol s_4_43[3] = { 201, 219, 216 };
+static symbol s_4_44[2] = { 206, 217 };
+static symbol s_4_45[3] = { 197, 206, 217 };
+
+static struct among a_4[46] =
+{
+/*  0 */ { 1, s_4_0, -1, 2, 0},
+/*  1 */ { 2, s_4_1, 0, 2, 0},
+/*  2 */ { 2, s_4_2, -1, 1, 0},
+/*  3 */ { 3, s_4_3, 2, 2, 0},
+/*  4 */ { 3, s_4_4, 2, 2, 0},
+/*  5 */ { 2, s_4_5, -1, 1, 0},
+/*  6 */ { 3, s_4_6, 5, 2, 0},
+/*  7 */ { 3, s_4_7, -1, 1, 0},
+/*  8 */ { 3, s_4_8, -1, 2, 0},
+/*  9 */ { 3, s_4_9, -1, 1, 0},
+/* 10 */ { 4, s_4_10, 9, 2, 0},
+/* 11 */ { 4, s_4_11, 9, 2, 0},
+/* 12 */ { 2, s_4_12, -1, 1, 0},
+/* 13 */ { 3, s_4_13, 12, 2, 0},
+/* 14 */ { 3, s_4_14, 12, 2, 0},
+/* 15 */ { 1, s_4_15, -1, 1, 0},
+/* 16 */ { 2, s_4_16, 15, 2, 0},
+/* 17 */ { 2, s_4_17, 15, 2, 0},
+/* 18 */ { 1, s_4_18, -1, 1, 0},
+/* 19 */ { 2, s_4_19, 18, 2, 0},
+/* 20 */ { 2, s_4_20, 18, 2, 0},
+/* 21 */ { 2, s_4_21, -1, 1, 0},
+/* 22 */ { 2, s_4_22, -1, 2, 0},
+/* 23 */ { 2, s_4_23, -1, 2, 0},
+/* 24 */ { 1, s_4_24, -1, 1, 0},
+/* 25 */ { 2, s_4_25, 24, 2, 0},
+/* 26 */ { 2, s_4_26, -1, 1, 0},
+/* 27 */ { 3, s_4_27, 26, 2, 0},
+/* 28 */ { 3, s_4_28, 26, 2, 0},
+/* 29 */ { 2, s_4_29, -1, 1, 0},
+/* 30 */ { 3, s_4_30, 29, 2, 0},
+/* 31 */ { 3, s_4_31, 29, 1, 0},
+/* 32 */ { 2, s_4_32, -1, 1, 0},
+/* 33 */ { 3, s_4_33, 32, 2, 0},
+/* 34 */ { 2, s_4_34, -1, 1, 0},
+/* 35 */ { 3, s_4_35, 34, 2, 0},
+/* 36 */ { 2, s_4_36, -1, 2, 0},
+/* 37 */ { 2, s_4_37, -1, 2, 0},
+/* 38 */ { 2, s_4_38, -1, 2, 0},
+/* 39 */ { 2, s_4_39, -1, 1, 0},
+/* 40 */ { 3, s_4_40, 39, 2, 0},
+/* 41 */ { 3, s_4_41, 39, 2, 0},
+/* 42 */ { 3, s_4_42, -1, 1, 0},
+/* 43 */ { 3, s_4_43, -1, 2, 0},
+/* 44 */ { 2, s_4_44, -1, 1, 0},
+/* 45 */ { 3, s_4_45, 44, 2, 0}
+};
+
+static symbol s_5_0[1] = { 192 };
+static symbol s_5_1[2] = { 201, 192 };
+static symbol s_5_2[2] = { 216, 192 };
+static symbol s_5_3[1] = { 193 };
+static symbol s_5_4[1] = { 197 };
+static symbol s_5_5[2] = { 201, 197 };
+static symbol s_5_6[2] = { 216, 197 };
+static symbol s_5_7[2] = { 193, 200 };
+static symbol s_5_8[2] = { 209, 200 };
+static symbol s_5_9[3] = { 201, 209, 200 };
+static symbol s_5_10[1] = { 201 };
+static symbol s_5_11[2] = { 197, 201 };
+static symbol s_5_12[2] = { 201, 201 };
+static symbol s_5_13[3] = { 193, 205, 201 };
+static symbol s_5_14[3] = { 209, 205, 201 };
+static symbol s_5_15[4] = { 201, 209, 205, 201 };
+static symbol s_5_16[1] = { 202 };
+static symbol s_5_17[2] = { 197, 202 };
+static symbol s_5_18[3] = { 201, 197, 202 };
+static symbol s_5_19[2] = { 201, 202 };
+static symbol s_5_20[2] = { 207, 202 };
+static symbol s_5_21[2] = { 193, 205 };
+static symbol s_5_22[2] = { 197, 205 };
+static symbol s_5_23[3] = { 201, 197, 205 };
+static symbol s_5_24[2] = { 207, 205 };
+static symbol s_5_25[2] = { 209, 205 };
+static symbol s_5_26[3] = { 201, 209, 205 };
+static symbol s_5_27[1] = { 207 };
+static symbol s_5_28[1] = { 209 };
+static symbol s_5_29[2] = { 201, 209 };
+static symbol s_5_30[2] = { 216, 209 };
+static symbol s_5_31[1] = { 213 };
+static symbol s_5_32[2] = { 197, 215 };
+static symbol s_5_33[2] = { 207, 215 };
+static symbol s_5_34[1] = { 216 };
+static symbol s_5_35[1] = { 217 };
+
+static struct among a_5[36] =
+{
+/*  0 */ { 1, s_5_0, -1, 1, 0},
+/*  1 */ { 2, s_5_1, 0, 1, 0},
+/*  2 */ { 2, s_5_2, 0, 1, 0},
+/*  3 */ { 1, s_5_3, -1, 1, 0},
+/*  4 */ { 1, s_5_4, -1, 1, 0},
+/*  5 */ { 2, s_5_5, 4, 1, 0},
+/*  6 */ { 2, s_5_6, 4, 1, 0},
+/*  7 */ { 2, s_5_7, -1, 1, 0},
+/*  8 */ { 2, s_5_8, -1, 1, 0},
+/*  9 */ { 3, s_5_9, 8, 1, 0},
+/* 10 */ { 1, s_5_10, -1, 1, 0},
+/* 11 */ { 2, s_5_11, 10, 1, 0},
+/* 12 */ { 2, s_5_12, 10, 1, 0},
+/* 13 */ { 3, s_5_13, 10, 1, 0},
+/* 14 */ { 3, s_5_14, 10, 1, 0},
+/* 15 */ { 4, s_5_15, 14, 1, 0},
+/* 16 */ { 1, s_5_16, -1, 1, 0},
+/* 17 */ { 2, s_5_17, 16, 1, 0},
+/* 18 */ { 3, s_5_18, 17, 1, 0},
+/* 19 */ { 2, s_5_19, 16, 1, 0},
+/* 20 */ { 2, s_5_20, 16, 1, 0},
+/* 21 */ { 2, s_5_21, -1, 1, 0},
+/* 22 */ { 2, s_5_22, -1, 1, 0},
+/* 23 */ { 3, s_5_23, 22, 1, 0},
+/* 24 */ { 2, s_5_24, -1, 1, 0},
+/* 25 */ { 2, s_5_25, -1, 1, 0},
+/* 26 */ { 3, s_5_26, 25, 1, 0},
+/* 27 */ { 1, s_5_27, -1, 1, 0},
+/* 28 */ { 1, s_5_28, -1, 1, 0},
+/* 29 */ { 2, s_5_29, 28, 1, 0},
+/* 30 */ { 2, s_5_30, 28, 1, 0},
+/* 31 */ { 1, s_5_31, -1, 1, 0},
+/* 32 */ { 2, s_5_32, -1, 1, 0},
+/* 33 */ { 2, s_5_33, -1, 1, 0},
+/* 34 */ { 1, s_5_34, -1, 1, 0},
+/* 35 */ { 1, s_5_35, -1, 1, 0}
+};
+
+static symbol s_6_0[3] = { 207, 211, 212 };
+static symbol s_6_1[4] = { 207, 211, 212, 216 };
+
+static struct among a_6[2] =
+{
+/*  0 */ { 3, s_6_0, -1, 1, 0},
+/*  1 */ { 4, s_6_1, -1, 1, 0}
+};
+
+static symbol s_7_0[4] = { 197, 202, 219, 197 };
+static symbol s_7_1[1] = { 206 };
+static symbol s_7_2[1] = { 216 };
+static symbol s_7_3[3] = { 197, 202, 219 };
+
+static struct among a_7[4] =
+{
+/*  0 */ { 4, s_7_0, -1, 1, 0},
+/*  1 */ { 1, s_7_1, -1, 2, 0},
+/*  2 */ { 1, s_7_2, -1, 3, 0},
+/*  3 */ { 3, s_7_3, -1, 1, 0}
+};
+
+static unsigned char g_v[] = { 35, 130, 34, 18 };
+
+static symbol s_0[] = { 193 };
+static symbol s_1[] = { 209 };
+static symbol s_2[] = { 193 };
+static symbol s_3[] = { 209 };
+static symbol s_4[] = { 193 };
+static symbol s_5[] = { 209 };
+static symbol s_6[] = { 206 };
+static symbol s_7[] = { 206 };
+static symbol s_8[] = { 206 };
+static symbol s_9[] = { 201 };
+
+static int r_mark_regions(struct SN_env * z) {
+    z->I[0] = z->l;
+    z->I[1] = z->l;
+    {   int c = z->c; /* do, line 100 */
+        while(1) { /* gopast, line 101 */
+            if (!(in_grouping(z, g_v, 192, 220))) goto lab1;
+            break;
+        lab1:
+            if (z->c >= z->l) goto lab0;
+            z->c++;
+        }
+        z->I[0] = z->c; /* setmark pV, line 101 */
+        while(1) { /* gopast, line 101 */
+            if (!(out_grouping(z, g_v, 192, 220))) goto lab2;
+            break;
+        lab2:
+            if (z->c >= z->l) goto lab0;
+            z->c++;
+        }
+        while(1) { /* gopast, line 102 */
+            if (!(in_grouping(z, g_v, 192, 220))) goto lab3;
+            break;
+        lab3:
+            if (z->c >= z->l) goto lab0;
+            z->c++;
+        }
+        while(1) { /* gopast, line 102 */
+            if (!(out_grouping(z, g_v, 192, 220))) goto lab4;
+            break;
+        lab4:
+            if (z->c >= z->l) goto lab0;
+            z->c++;
+        }
+        z->I[1] = z->c; /* setmark p2, line 102 */
+    lab0:
+        z->c = c;
+    }
+    return 1;
+}
+
+static int r_R2(struct SN_env * z) {
+    if (!(z->I[1] <= z->c)) return 0;
+    return 1;
+}
+
+static int r_perfective_gerund(struct SN_env * z) {
+    int among_var;
+    z->ket = z->c; /* [, line 111 */
+    among_var = find_among_b(z, a_0, 9); /* substring, line 111 */
+    if (!(among_var)) return 0;
+    z->bra = z->c; /* ], line 111 */
+    switch(among_var) {
+        case 0: return 0;
+        case 1:
+            {   int m = z->l - z->c; /* or, line 115 */
+                if (!(eq_s_b(z, 1, s_0))) goto lab1;
+                goto lab0;
+            lab1:
+                z->c = z->l - m;
+                if (!(eq_s_b(z, 1, s_1))) return 0;
+            }
+        lab0:
+            slice_del(z); /* delete, line 115 */
+            break;
+        case 2:
+            slice_del(z); /* delete, line 122 */
+            break;
+    }
+    return 1;
+}
+
+static int r_adjective(struct SN_env * z) {
+    int among_var;
+    z->ket = z->c; /* [, line 127 */
+    among_var = find_among_b(z, a_1, 26); /* substring, line 127 */
+    if (!(among_var)) return 0;
+    z->bra = z->c; /* ], line 127 */
+    switch(among_var) {
+        case 0: return 0;
+        case 1:
+            slice_del(z); /* delete, line 136 */
+            break;
+    }
+    return 1;
+}
+
+static int r_adjectival(struct SN_env * z) {
+    int among_var;
+    if (!r_adjective(z)) return 0; /* call adjective, line 141 */
+    {   int m = z->l - z->c; /* try, line 148 */
+        z->ket = z->c; /* [, line 149 */
+        among_var = find_among_b(z, a_2, 8); /* substring, line 149 */
+        if (!(among_var)) { z->c = z->l - m; goto lab0; }
+        z->bra = z->c; /* ], line 149 */
+        switch(among_var) {
+            case 0: { z->c = z->l - m; goto lab0; }
+            case 1:
+                {   int m = z->l - z->c; /* or, line 154 */
+                    if (!(eq_s_b(z, 1, s_2))) goto lab2;
+                    goto lab1;
+                lab2:
+                    z->c = z->l - m;
+                    if (!(eq_s_b(z, 1, s_3))) { z->c = z->l - m; goto lab0; }
+                }
+            lab1:
+                slice_del(z); /* delete, line 154 */
+                break;
+            case 2:
+                slice_del(z); /* delete, line 161 */
+                break;
+        }
+    lab0:
+        ;
+    }
+    return 1;
+}
+
+static int r_reflexive(struct SN_env * z) {
+    int among_var;
+    z->ket = z->c; /* [, line 168 */
+    among_var = find_among_b(z, a_3, 2); /* substring, line 168 */
+    if (!(among_var)) return 0;
+    z->bra = z->c; /* ], line 168 */
+    switch(among_var) {
+        case 0: return 0;
+        case 1:
+            slice_del(z); /* delete, line 171 */
+            break;
+    }
+    return 1;
+}
+
+static int r_verb(struct SN_env * z) {
+    int among_var;
+    z->ket = z->c; /* [, line 176 */
+    among_var = find_among_b(z, a_4, 46); /* substring, line 176 */
+    if (!(among_var)) return 0;
+    z->bra = z->c; /* ], line 176 */
+    switch(among_var) {
+        case 0: return 0;
+        case 1:
+            {   int m = z->l - z->c; /* or, line 182 */
+                if (!(eq_s_b(z, 1, s_4))) goto lab1;
+                goto lab0;
+            lab1:
+                z->c = z->l - m;
+                if (!(eq_s_b(z, 1, s_5))) return 0;
+            }
+        lab0:
+            slice_del(z); /* delete, line 182 */
+            break;
+        case 2:
+            slice_del(z); /* delete, line 190 */
+            break;
+    }
+    return 1;
+}
+
+static int r_noun(struct SN_env * z) {
+    int among_var;
+    z->ket = z->c; /* [, line 199 */
+    among_var = find_among_b(z, a_5, 36); /* substring, line 199 */
+    if (!(among_var)) return 0;
+    z->bra = z->c; /* ], line 199 */
+    switch(among_var) {
+        case 0: return 0;
+        case 1:
+            slice_del(z); /* delete, line 206 */
+            break;
+    }
+    return 1;
+}
+
+static int r_derivational(struct SN_env * z) {
+    int among_var;
+    z->ket = z->c; /* [, line 215 */
+    among_var = find_among_b(z, a_6, 2); /* substring, line 215 */
+    if (!(among_var)) return 0;
+    z->bra = z->c; /* ], line 215 */
+    if (!r_R2(z)) return 0; /* call R2, line 215 */
+    switch(among_var) {
+        case 0: return 0;
+        case 1:
+            slice_del(z); /* delete, line 218 */
+            break;
+    }
+    return 1;
+}
+
+static int r_tidy_up(struct SN_env * z) {
+    int among_var;
+    z->ket = z->c; /* [, line 223 */
+    among_var = find_among_b(z, a_7, 4); /* substring, line 223 */
+    if (!(among_var)) return 0;
+    z->bra = z->c; /* ], line 223 */
+    switch(among_var) {
+        case 0: return 0;
+        case 1:
+            slice_del(z); /* delete, line 227 */
+            z->ket = z->c; /* [, line 228 */
+            if (!(eq_s_b(z, 1, s_6))) return 0;
+            z->bra = z->c; /* ], line 228 */
+            if (!(eq_s_b(z, 1, s_7))) return 0;
+            slice_del(z); /* delete, line 228 */
+            break;
+        case 2:
+            if (!(eq_s_b(z, 1, s_8))) return 0;
+            slice_del(z); /* delete, line 231 */
+            break;
+        case 3:
+            slice_del(z); /* delete, line 233 */
+            break;
+    }
+    return 1;
+}
+
+extern int russian_stem(struct SN_env * z) {
+    {   int c = z->c; /* do, line 240 */
+        if (!r_mark_regions(z)) goto lab0; /* call mark_regions, line 240 */
+    lab0:
+        z->c = c;
+    }
+    z->lb = z->c; z->c = z->l; /* backwards, line 241 */
+
+    {   int m = z->l - z->c; /* setlimit, line 241 */
+        int m3;
+        if (z->c < z->I[0]) return 0;
+        z->c = z->I[0]; /* tomark, line 241 */
+        m3 = z->lb; z->lb = z->c;
+        z->c = z->l - m;
+        {   int m = z->l - z->c; /* do, line 242 */
+            {   int m = z->l - z->c; /* or, line 243 */
+                if (!r_perfective_gerund(z)) goto lab3; /* call perfective_gerund, line 243 */
+                goto lab2;
+            lab3:
+                z->c = z->l - m;
+                {   int m = z->l - z->c; /* try, line 244 */
+                    if (!r_reflexive(z)) { z->c = z->l - m; goto lab4; } /* call reflexive, line 244 */
+                lab4:
+                    ;
+                }
+                {   int m = z->l - z->c; /* or, line 245 */
+                    if (!r_adjectival(z)) goto lab6; /* call adjectival, line 245 */
+                    goto lab5;
+                lab6:
+                    z->c = z->l - m;
+                    if (!r_verb(z)) goto lab7; /* call verb, line 245 */
+                    goto lab5;
+                lab7:
+                    z->c = z->l - m;
+                    if (!r_noun(z)) goto lab1; /* call noun, line 245 */
+                }
+            lab5:
+                ;
+            }
+        lab2:
+        lab1:
+            z->c = z->l - m;
+        }
+        {   int m = z->l - z->c; /* try, line 248 */
+            z->ket = z->c; /* [, line 248 */
+            if (!(eq_s_b(z, 1, s_9))) { z->c = z->l - m; goto lab8; }
+            z->bra = z->c; /* ], line 248 */
+            slice_del(z); /* delete, line 248 */
+        lab8:
+            ;
+        }
+        {   int m = z->l - z->c; /* do, line 251 */
+            if (!r_derivational(z)) goto lab9; /* call derivational, line 251 */
+        lab9:
+            z->c = z->l - m;
+        }
+        {   int m = z->l - z->c; /* do, line 252 */
+            if (!r_tidy_up(z)) goto lab10; /* call tidy_up, line 252 */
+        lab10:
+            z->c = z->l - m;
+        }
+        z->lb = m3;
+    }
+    z->c = z->lb;
+    return 1;
+}
+
+extern struct SN_env * russian_create_env(void) { return SN_create_env(0, 2, 0); }
+
+extern void russian_close_env(struct SN_env * z) { SN_close_env(z); }
+


diff --git a/contrib/tsearch2/snowball/russian_stem.h b/contrib/tsearch2/snowball/russian_stem.h

new file mode 100644 (file)

index 0000000..7dc26d4


--- /dev/null
+++ b/contrib/tsearch2/snowball/russian_stem.h
@@ -0,0 +1,8 @@
+
+/* This file was generated automatically by the Snowball to ANSI C compiler */
+
+extern struct SN_env * russian_create_env(void);
+extern void russian_close_env(struct SN_env * z);
+
+extern int russian_stem(struct SN_env * z);
+


diff --git a/contrib/tsearch2/snowball/utilities.c b/contrib/tsearch2/snowball/utilities.c

new file mode 100644 (file)

index 0000000..5dc7524


--- /dev/null
+++ b/contrib/tsearch2/snowball/utilities.c
@@ -0,0 +1,328 @@
+
+#include 
+#include 
+#include 
+
+#include "header.h"
+
+#define unless(C) if(!(C))
+
+#define CREATE_SIZE 1
+
+extern symbol * create_s(void)
+{   symbol * p = (symbol *) (HEAD + (char *) malloc(HEAD + (CREATE_SIZE + 1) * sizeof(symbol)));
+    CAPACITY(p) = CREATE_SIZE;
+    SET_SIZE(p, CREATE_SIZE);
+    return p;
+}
+
+extern void lose_s(symbol * p) { free((char *) p - HEAD); }
+
+extern int in_grouping(struct SN_env * z, unsigned char * s, int min, int max)
+{   if (z->c >= z->l) return 0;
+    {   int ch = z->p[z->c];
+        if
+        (ch > max || (ch -= min) < 0 ||
+         (s[ch >> 3] & (0X1 << (ch & 0X7))) == 0) return 0;
+    }
+    z->c++; return 1;
+}
+
+extern int in_grouping_b(struct SN_env * z, unsigned char * s, int min, int max)
+{   if (z->c <= z->lb) return 0;
+    {   int ch = z->p[z->c - 1];
+        if
+        (ch > max || (ch -= min) < 0 ||
+         (s[ch >> 3] & (0X1 << (ch & 0X7))) == 0) return 0;
+    }
+    z->c--; return 1;
+}
+
+extern int out_grouping(struct SN_env * z, unsigned char * s, int min, int max)
+{   if (z->c >= z->l) return 0;
+    {   int ch = z->p[z->c];
+        unless
+        (ch > max || (ch -= min) < 0 ||
+         (s[ch >> 3] & (0X1 << (ch & 0X7))) == 0) return 0;
+    }
+    z->c++; return 1;
+}
+
+extern int out_grouping_b(struct SN_env * z, unsigned char * s, int min, int max)
+{   if (z->c <= z->lb) return 0;
+    {   int ch = z->p[z->c - 1];
+        unless
+        (ch > max || (ch -= min) < 0 ||
+         (s[ch >> 3] & (0X1 << (ch & 0X7))) == 0) return 0;
+    }
+    z->c--; return 1;
+}
+
+
+extern int in_range(struct SN_env * z, int min, int max)
+{   if (z->c >= z->l) return 0;
+    {   int ch = z->p[z->c];
+        if
+        (ch > max || ch < min) return 0;
+    }
+    z->c++; return 1;
+}
+
+extern int in_range_b(struct SN_env * z, int min, int max)
+{   if (z->c <= z->lb) return 0;
+    {   int ch = z->p[z->c - 1];
+        if
+        (ch > max || ch < min) return 0;
+    }
+    z->c--; return 1;
+}
+
+extern int out_range(struct SN_env * z, int min, int max)
+{   if (z->c >= z->l) return 0;
+    {   int ch = z->p[z->c];
+        unless
+        (ch > max || ch < min) return 0;
+    }
+    z->c++; return 1;
+}
+
+extern int out_range_b(struct SN_env * z, int min, int max)
+{   if (z->c <= z->lb) return 0;
+    {   int ch = z->p[z->c - 1];
+        unless
+        (ch > max || ch < min) return 0;
+    }
+    z->c--; return 1;
+}
+
+extern int eq_s(struct SN_env * z, int s_size, symbol * s)
+{   if (z->l - z->c < s_size ||
+        memcmp(z->p + z->c, s, s_size * sizeof(symbol)) != 0) return 0;
+    z->c += s_size; return 1;
+}
+
+extern int eq_s_b(struct SN_env * z, int s_size, symbol * s)
+{   if (z->c - z->lb < s_size ||
+        memcmp(z->p + z->c - s_size, s, s_size * sizeof(symbol)) != 0) return 0;
+    z->c -= s_size; return 1;
+}
+
+extern int eq_v(struct SN_env * z, symbol * p)
+{   return eq_s(z, SIZE(p), p);
+}
+
+extern int eq_v_b(struct SN_env * z, symbol * p)
+{   return eq_s_b(z, SIZE(p), p);
+}
+
+extern int find_among(struct SN_env * z, struct among * v, int v_size)
+{
+    int i = 0;
+    int j = v_size;
+
+    int c = z->c; int l = z->l;
+    symbol * q = z->p + c;
+
+    struct among * w;
+
+    int common_i = 0;
+    int common_j = 0;
+
+    int first_key_inspected = 0;
+
+    while(1)
+    {   int k = i + ((j - i) >> 1);
+        int diff = 0;
+        int common = common_i < common_j ? common_i : common_j; /* smaller */
+        w = v + k;
+        {   int i; for (i = common; i < w->s_size; i++)
+            {   if (c + common == l) { diff = -1; break; }
+                diff = q[common] - w->s[i];
+                if (diff != 0) break;
+                common++;
+            }
+        }
+        if (diff < 0) { j = k; common_j = common; }
+                 else { i = k; common_i = common; }
+        if (j - i <= 1)
+        {   if (i > 0) break; /* v->s has been inspected */
+            if (j == i) break; /* only one item in v */
+
+            /* - but now we need to go round once more to get
+               v->s inspected. This looks messy, but is actually
+               the optimal approach.  */
+
+            if (first_key_inspected) break;
+            first_key_inspected = 1;
+        }
+    }
+    while(1)
+    {   w = v + i;
+        if (common_i >= w->s_size)
+        {   z->c = c + w->s_size;
+            if (w->function == 0) return w->result;
+            {   int res = w->function(z);
+                z->c = c + w->s_size;
+                if (res) return w->result;
+            }
+        }
+        i = w->substring_i;
+        if (i < 0) return 0;
+    }
+}
+
+/* find_among_b is for backwards processing. Same comments apply */
+
+extern int find_among_b(struct SN_env * z, struct among * v, int v_size)
+{
+    int i = 0;
+    int j = v_size;
+
+    int c = z->c; int lb = z->lb;
+    symbol * q = z->p + c - 1;
+
+    struct among * w;
+
+    int common_i = 0;
+    int common_j = 0;
+
+    int first_key_inspected = 0;
+
+    while(1)
+    {   int k = i + ((j - i) >> 1);
+        int diff = 0;
+        int common = common_i < common_j ? common_i : common_j;
+        w = v + k;
+        {   int i; for (i = w->s_size - 1 - common; i >= 0; i--)
+            {   if (c - common == lb) { diff = -1; break; }
+                diff = q[- common] - w->s[i];
+                if (diff != 0) break;
+                common++;
+            }
+        }
+        if (diff < 0) { j = k; common_j = common; }
+                 else { i = k; common_i = common; }
+        if (j - i <= 1)
+        {   if (i > 0) break;
+            if (j == i) break;
+            if (first_key_inspected) break;
+            first_key_inspected = 1;
+        }
+    }
+    while(1)
+    {   w = v + i;
+        if (common_i >= w->s_size)
+        {   z->c = c - w->s_size;
+            if (w->function == 0) return w->result;
+            {   int res = w->function(z);
+                z->c = c - w->s_size;
+                if (res) return w->result;
+            }
+        }
+        i = w->substring_i;
+        if (i < 0) return 0;
+    }
+}
+
+
+extern symbol * increase_size(symbol * p, int n)
+{   int new_size = n + 20;
+    symbol * q = (symbol *) (HEAD + (char *) malloc(HEAD + (new_size + 1) * sizeof(symbol)));
+    CAPACITY(q) = new_size;
+    memmove(q, p, CAPACITY(p) * sizeof(symbol)); lose_s(p); return q;
+}
+
+/* to replace symbols between c_bra and c_ket in z->p by the
+   s_size symbols at s
+*/
+
+extern int replace_s(struct SN_env * z, int c_bra, int c_ket, int s_size, const symbol * s)
+{   int adjustment = s_size - (c_ket - c_bra);
+    int len = SIZE(z->p);
+    if (adjustment != 0)
+    {   if (adjustment + len > CAPACITY(z->p)) z->p = increase_size(z->p, adjustment + len);
+        memmove(z->p + c_ket + adjustment, z->p + c_ket, (len - c_ket) * sizeof(symbol));
+        SET_SIZE(z->p, adjustment + len);
+        z->l += adjustment;
+        if (z->c >= c_ket) z->c += adjustment; else
+            if (z->c > c_bra) z->c = c_bra;
+    }
+    unless (s_size == 0) memmove(z->p + c_bra, s, s_size * sizeof(symbol));
+    return adjustment;
+}
+
+static void slice_check(struct SN_env * z)
+{
+    if (!(0 <= z->bra &&
+          z->bra <= z->ket &&
+          z->ket <= z->l &&
+          z->l <= SIZE(z->p)))   /* this line could be removed */
+    {
+        fprintf(stderr, "faulty slice operation:\n");
+        debug(z, -1, 0);
+        exit(1);
+    }
+}
+
+extern void slice_from_s(struct SN_env * z, int s_size, symbol * s)
+{   slice_check(z);
+    replace_s(z, z->bra, z->ket, s_size, s);
+}
+
+extern void slice_from_v(struct SN_env * z, symbol * p)
+{   slice_from_s(z, SIZE(p), p);
+}
+
+extern void slice_del(struct SN_env * z)
+{   slice_from_s(z, 0, 0);
+}
+
+extern void insert_s(struct SN_env * z, int bra, int ket, int s_size, symbol * s)
+{   int adjustment = replace_s(z, bra, ket, s_size, s);
+    if (bra <= z->bra) z->bra += adjustment;
+    if (bra <= z->ket) z->ket += adjustment;
+}
+
+extern void insert_v(struct SN_env * z, int bra, int ket, symbol * p)
+{   int adjustment = replace_s(z, bra, ket, SIZE(p), p);
+    if (bra <= z->bra) z->bra += adjustment;
+    if (bra <= z->ket) z->ket += adjustment;
+}
+
+extern symbol * slice_to(struct SN_env * z, symbol * p)
+{   slice_check(z);
+    {   int len = z->ket - z->bra;
+        if (CAPACITY(p) < len) p = increase_size(p, len);
+        memmove(p, z->p + z->bra, len * sizeof(symbol));
+        SET_SIZE(p, len);
+    }
+    return p;
+}
+
+extern symbol * assign_to(struct SN_env * z, symbol * p)
+{   int len = z->l;
+    if (CAPACITY(p) < len) p = increase_size(p, len);
+    memmove(p, z->p, len * sizeof(symbol));
+    SET_SIZE(p, len);
+    return p;
+}
+
+extern void debug(struct SN_env * z, int number, int line_count)
+{   int i;
+    int limit = SIZE(z->p);
+    /*if (number >= 0) printf("%3d (line %4d): '", number, line_count);*/
+    if (number >= 0) printf("%3d (line %4d): [%d]'", number, line_count,limit);
+    for (i = 0; i <= limit; i++)
+    {   if (z->lb == i) printf("{");
+        if (z->bra == i) printf("[");
+        if (z->c == i) printf("|");
+        if (z->ket == i) printf("]");
+        if (z->l == i) printf("}");
+        if (i < limit)
+        {   int ch = z->p[i];
+            if (ch == 0) ch = '#';
+            printf("%c", ch);
+        }
+    }
+    printf("'\n");
+}


diff --git a/contrib/tsearch2/sql/tsearch2.sql b/contrib/tsearch2/sql/tsearch2.sql

new file mode 100644 (file)

index 0000000..6ca6480


--- /dev/null
+++ b/contrib/tsearch2/sql/tsearch2.sql
@@ -0,0 +1,243 @@
+--
+-- first, define the datatype.  Turn off echoing so that expected file
+-- does not depend on contents of seg.sql.
+--
+\set ECHO none
+\i tsearch2.sql
+\set ECHO all
+
+--tsvector
+SELECT '1'::tsvector;
+SELECT '1 '::tsvector;
+SELECT ' 1'::tsvector;
+SELECT ' 1 '::tsvector;
+SELECT '1 2'::tsvector;
+SELECT '\'1 2\''::tsvector;
+SELECT '\'1 \\\'2\''::tsvector;
+SELECT '\'1 \\\'2\'3'::tsvector;
+SELECT '\'1 \\\'2\' 3'::tsvector;
+SELECT '\'1 \\\'2\' \' 3\' 4 '::tsvector;
+select '\'w\':4A,3B,2C,1D,5 a:8';
+select 'a:3A b:2a'::tsvector || 'ba:1234 a:1B';
+select setweight('w:12B w:13* w:12,5,6 a:1,3* a:3 w asd:1dc asd zxc:81,567,222A'::tsvector, 'c');
+select strip('w:12B w:13* w:12,5,6 a:1,3* a:3 w asd:1dc asd'::tsvector);
+
+
+--tsquery
+SELECT '1'::tsquery;
+SELECT '1 '::tsquery;
+SELECT ' 1'::tsquery;
+SELECT ' 1 '::tsquery;
+SELECT '\'1 2\''::tsquery;
+SELECT '\'1 \\\'2\''::tsquery;
+SELECT '!1'::tsquery;
+SELECT '1|2'::tsquery;
+SELECT '1|!2'::tsquery;
+SELECT '!1|2'::tsquery;
+SELECT '!1|!2'::tsquery;
+SELECT '!(!1|!2)'::tsquery;
+SELECT '!(!1|2)'::tsquery;
+SELECT '!(1|!2)'::tsquery;
+SELECT '!(1|2)'::tsquery;
+SELECT '1&2'::tsquery;
+SELECT '!1&2'::tsquery;
+SELECT '1&!2'::tsquery;
+SELECT '!1&!2'::tsquery;
+SELECT '(1&2)'::tsquery;
+SELECT '1&(2)'::tsquery;
+SELECT '!(1)&2'::tsquery;
+SELECT '!(1&2)'::tsquery;
+SELECT '1|2&3'::tsquery;
+SELECT '1|(2&3)'::tsquery;
+SELECT '(1|2)&3'::tsquery;
+SELECT '1|2&!3'::tsquery;
+SELECT '1|!2&3'::tsquery;
+SELECT '!1|2&3'::tsquery;
+SELECT '!1|(2&3)'::tsquery;
+SELECT '!(1|2)&3'::tsquery;
+SELECT '(!1|2)&3'::tsquery;
+SELECT '1|(2|(4|(5|6)))'::tsquery;
+SELECT '1|2|4|5|6'::tsquery;
+SELECT '1&(2&(4&(5&6)))'::tsquery;
+SELECT '1&2&4&5&6'::tsquery;
+SELECT '1&(2&(4&(5|6)))'::tsquery;
+SELECT '1&(2&(4&(5|!6)))'::tsquery;
+SELECT '1&(\'2\'&(\' 4\'&(\\|5 | \'6 \\\' !|&\')))'::tsquery;
+SELECT '\'the wether\':dc & \' sKies \':BC & a:d b:a';
+
+select lexize('simple', 'ASD56 hsdkf');
+select lexize('en_stem', 'SKIES Problems identity');
+
+select * from token_type('default');
+select * from parse('default', '345 [email protected] \' http://www.com/ http://aew.werc.ewr/?ad=qwe&dw 1aew.werc.ewr/?ad=qwe&dw 2aew.werc.ewr http://3aew.werc.ewr/?ad=qwe&dw http://4aew.werc.ewr http://5aew.werc.ewr:8100/?  ad=qwe&dw 6aew.werc.ewr:8100/?ad=qwe&dw 7aew.werc.ewr:8100/?ad=qwe&dw=%20%32 +4.0e-10 qwe qwe qwqwe 234.435 455 5.005 [email protected] qwe-wer asdf qwer jf sdjk ewr1> ewri2 ">
+/usr/local/fff /awdf/dwqe/4325 rewt/ewr wefjn /wqe-324/ewr gist.h gist.h.c gist.c. readline 4.2 4.2. 4.2, readline-4.2 readline-4.2. 234 
+ wow  < jqw <> qwerty');
+
+SELECT to_tsvector('default', '345 [email protected] \' http://www.com/ http://aew.werc.ewr/?ad=qwe&dw 1aew.werc.ewr/?ad=qwe&dw 2aew.werc.ewr http://3aew.werc.ewr/?ad=qwe&dw http://4aew.werc.ewr http://5aew.werc.ewr:8100/?  ad=qwe&dw 6aew.werc.ewr:8100/?ad=qwe&dw 7aew.werc.ewr:8100/?ad=qwe&dw=%20%32 +4.0e-10 qwe qwe qwqwe 234.435 455 5.005 [email protected] qwe-wer asdf qwer jf sdjk ewr1> ewri2 ">
+/usr/local/fff /awdf/dwqe/4325 rewt/ewr wefjn /wqe-324/ewr gist.h gist.h.c gist.c. readline 4.2 4.2. 4.2, readline-4.2 readline-4.2. 234 
+ wow  < jqw <> qwerty');
+
+SELECT length(to_tsvector('default', '345 qw'));
+
+SELECT length(to_tsvector('default', '345 [email protected] \' http://www.com/ http://aew.werc.ewr/?ad=qwe&dw 1aew.werc.ewr/?ad=qwe&dw 2aew.werc.ewr http://3aew.werc.ewr/?ad=qwe&dw http://4aew.werc.ewr http://5aew.werc.ewr:8100/?  ad=qwe&dw 6aew.werc.ewr:8100/?ad=qwe&dw 7aew.werc.ewr:8100/?ad=qwe&dw=%20%32 +4.0e-10 qwe qwe qwqwe 234.435 455 5.005 [email protected] qwe-wer asdf qwer jf sdjk ewr1> ewri2 ">
+/usr/local/fff /awdf/dwqe/4325 rewt/ewr wefjn /wqe-324/ewr gist.h gist.h.c gist.c. readline 4.2 4.2. 4.2, readline-4.2 readline-4.2. 234 
+ wow  < jqw <> qwerty'));
+
+
+select to_tsquery('default', 'qwe & sKies '); 
+select to_tsquery('simple', 'qwe & sKies '); 
+select to_tsquery('default', '\'the wether\':dc & \'           sKies \':BC ');
+select 'a b:89  ca:23A,64b d:34c'::tsvector @@ 'd:AC & ca';
+select 'a b:89  ca:23A,64b d:34c'::tsvector @@ 'd:AC & ca:B';
+select 'a b:89  ca:23A,64b d:34c'::tsvector @@ 'd:AC & ca:A';
+select 'a b:89  ca:23A,64b d:34c'::tsvector @@ 'd:AC & ca:C';
+select 'a b:89  ca:23A,64b d:34c'::tsvector @@ 'd:AC & ca:CB';
+
+CREATE TABLE test_tsvector( t text, a tsvector );
+
+\copy test_tsvector from 'data/test_tsearch.data'
+
+SELECT count(*) FROM test_tsvector WHERE a @@ 'wr|qh';
+SELECT count(*) FROM test_tsvector WHERE a @@ 'wr&qh';
+SELECT count(*) FROM test_tsvector WHERE a @@ 'eq&yt';
+SELECT count(*) FROM test_tsvector WHERE a @@ 'eq|yt';
+SELECT count(*) FROM test_tsvector WHERE a @@ '(eq&yt)|(wr&qh)';
+SELECT count(*) FROM test_tsvector WHERE a @@ '(eq|yt)&(wr|qh)';
+
+create index wowidx on test_tsvector using gist (a);
+set enable_seqscan=off;
+
+SELECT count(*) FROM test_tsvector WHERE a @@ 'wr|qh';
+SELECT count(*) FROM test_tsvector WHERE a @@ 'wr&qh';
+SELECT count(*) FROM test_tsvector WHERE a @@ 'eq&yt';
+SELECT count(*) FROM test_tsvector WHERE a @@ 'eq|yt';
+SELECT count(*) FROM test_tsvector WHERE a @@ '(eq&yt)|(wr&qh)';
+SELECT count(*) FROM test_tsvector WHERE a @@ '(eq|yt)&(wr|qh)';
+
+select set_curcfg('default');
+
+CREATE TRIGGER tsvectorupdate
+BEFORE UPDATE OR INSERT ON test_tsvector
+FOR EACH ROW EXECUTE PROCEDURE tsearch2(a, t);
+
+SELECT count(*) FROM test_tsvector WHERE a @@ to_tsquery('345&qwerty');
+
+INSERT INTO test_tsvector (t) VALUES ('345 qwerty');
+
+SELECT count(*) FROM test_tsvector WHERE a @@ to_tsquery('345&qwerty');
+
+UPDATE test_tsvector SET t = null WHERE t = '345 qwerty';
+
+SELECT count(*) FROM test_tsvector WHERE a @@ to_tsquery('345&qwerty');
+
+drop trigger tsvectorupdate on test_tsvector;
+create function wow(text) returns text as 'select $1 || \' copyright\'; ' language sql;
+create trigger tsvectorupdate before update or insert on test_tsvector
+for each row execute procedure tsearch2(a, wow, t);
+insert into test_tsvector (t) values ('345 qwerty');
+select count(*) FROM test_tsvector WHERE a @@ to_tsquery('345&qwerty');
+select count(*) FROM test_tsvector WHERE a @@ to_tsquery('copyright');
+
+select rank(' a:1 s:2C d g'::tsvector, 'a | s');
+select rank(' a:1 s:2B d g'::tsvector, 'a | s');
+select rank(' a:1 s:2 d g'::tsvector, 'a | s');
+select rank(' a:1 s:2C d g'::tsvector, 'a & s');
+select rank(' a:1 s:2B d g'::tsvector, 'a & s');
+select rank(' a:1 s:2 d g'::tsvector, 'a & s');
+
+insert into test_tsvector (t) values ('foo bar foo the over foo qq bar');
+select * from stat('select a from test_tsvector') order by ndoc desc, nentry desc, word;
+
+select reset_tsearch();
+select to_tsquery('default', 'skies & books');
+
+select rank_cd(to_tsvector('Erosion It took the sea a thousand years,
+A thousand years to trace
+The granite features of this cliff
+In crag and scarp and base.
+It took the sea an hour one night
+An hour of storm to place
+The sculpture of these granite seams,
+Upon a woman s face. E.  J.  Pratt  (1882 1964)
+'), to_tsquery('sea&thousand&years'));
+
+select rank_cd(to_tsvector('Erosion It took the sea a thousand years,
+A thousand years to trace
+The granite features of this cliff
+In crag and scarp and base.
+It took the sea an hour one night
+An hour of storm to place
+The sculpture of these granite seams,
+Upon a woman s face. E.  J.  Pratt  (1882 1964)
+'), to_tsquery('granite&sea'));
+
+select rank_cd(to_tsvector('Erosion It took the sea a thousand years,
+A thousand years to trace
+The granite features of this cliff
+In crag and scarp and base.
+It took the sea an hour one night
+An hour of storm to place
+The sculpture of these granite seams,
+Upon a woman s face. E.  J.  Pratt  (1882 1964)
+'), to_tsquery('sea'));
+
+select get_covers(to_tsvector('Erosion It took the sea a thousand years,
+A thousand years to trace
+The granite features of this cliff
+In crag and scarp and base.
+It took the sea an hour one night
+An hour of storm to place
+The sculpture of these granite seams,
+Upon a woman s face. E.  J.  Pratt  (1882 1964)
+'), to_tsquery('sea&thousand&years'));
+
+select get_covers(to_tsvector('Erosion It took the sea a thousand years,
+A thousand years to trace
+The granite features of this cliff
+In crag and scarp and base.
+It took the sea an hour one night
+An hour of storm to place
+The sculpture of these granite seams,
+Upon a woman s face. E.  J.  Pratt  (1882 1964)
+'), to_tsquery('granite&sea'));
+
+select get_covers(to_tsvector('Erosion It took the sea a thousand years,
+A thousand years to trace
+The granite features of this cliff
+In crag and scarp and base.
+It took the sea an hour one night
+An hour of storm to place
+The sculpture of these granite seams,
+Upon a woman s face. E.  J.  Pratt  (1882 1964)
+'), to_tsquery('sea'));
+
+select headline('Erosion It took the sea a thousand years,
+A thousand years to trace
+The granite features of this cliff
+In crag and scarp and base.
+It took the sea an hour one night
+An hour of storm to place
+The sculpture of these granite seams,
+Upon a woman s face. E.  J.  Pratt  (1882 1964)
+', to_tsquery('sea&thousand&years'));
+ 
+select headline('Erosion It took the sea a thousand years,
+A thousand years to trace
+The granite features of this cliff
+In crag and scarp and base.
+It took the sea an hour one night
+An hour of storm to place
+The sculpture of these granite seams,
+Upon a woman s face. E.  J.  Pratt  (1882 1964)
+', to_tsquery('granite&sea'));
+ 
+select headline('Erosion It took the sea a thousand years,
+A thousand years to trace
+The granite features of this cliff
+In crag and scarp and base.
+It took the sea an hour one night
+An hour of storm to place
+The sculpture of these granite seams,
+Upon a woman s face. E.  J.  Pratt  (1882 1964)
+', to_tsquery('sea'));
+


diff --git a/contrib/tsearch2/stopword.c b/contrib/tsearch2/stopword.c

new file mode 100644 (file)

index 0000000..7f7806f


--- /dev/null
+++ b/contrib/tsearch2/stopword.c
@@ -0,0 +1,101 @@
+/* 
+ * stopword library
+ * Teodor Sigaev 
+ */
+#include 
+#include 
+#include 
+#include 
+
+#include "postgres.h"
+#include "common.h"
+#include "dict.h"
+
+#define STOPBUFLEN 4096
+
+char*
+lowerstr(char *str) {
+   char *ptr=str;
+   while(*ptr) {
+       *ptr = tolower(*(unsigned char*)ptr);
+       ptr++;
+   }
+   return str;
+}
+
+void
+freestoplist(StopList *s) {
+   char **ptr=s->stop;
+   if ( ptr )
+       while( *ptr && s->len >0 ) {
+           free(*ptr);
+           ptr++; s->len--;
+       free(s->stop);
+   }
+   memset(s,0,sizeof(StopList));
+}
+
+void
+readstoplist(text *in, StopList *s) {
+   char **stop=NULL;
+   s->len=0;
+   if ( in && VARSIZE(in) - VARHDRSZ > 0 ) {
+       char *filename=text2char(in);
+       FILE    *hin=NULL;
+       char    buf[STOPBUFLEN];
+       int reallen=0;
+
+       if ( (hin=fopen(filename,"r")) == NULL )
+           elog(ERROR,"Can't open file '%s': %s", filename, strerror(errno));
+       while( fgets(buf,STOPBUFLEN,hin) ) {
+           buf[strlen(buf)-1] = '\0';
+           if ( *buf=='\0' ) continue;
+
+           if ( s->len>= reallen ) {
+               char **tmp;
+               reallen=(reallen) ? reallen*2 : 16;
+               tmp=(char**)realloc((void*)stop, sizeof(char*)*reallen);
+               if (!tmp) {
+                   freestoplist(s);
+                   fclose(hin); 
+                   elog(ERROR,"Not enough memory");
+               }
+               stop=tmp;
+           }
+    
+           stop[s->len]=strdup(buf);
+           if ( !stop[s->len] ) {
+               freestoplist(s);
+               fclose(hin); 
+               elog(ERROR,"Not enough memory");
+           }
+           if ( s->wordop ) 
+               stop[s->len]=(s->wordop)(stop[s->len]);
+
+           (s->len)++; 
+       }
+       fclose(hin);
+       pfree(filename); 
+   }
+   s->stop=stop;
+} 
+
+static int
+comparestr(const void *a, const void *b) {
+   return strcmp( *(char**)a, *(char**)b );
+}
+
+void
+sortstoplist(StopList *s) {
+   if (s->stop && s->len>0)
+       qsort(s->stop, s->len, sizeof(char*), comparestr);
+}
+
+bool
+searchstoplist(StopList *s, char *key) {
+   if ( s->wordop ) 
+       key=(*(s->wordop))(key);
+   return ( s->stop && s->len>0 && bsearch(&key, s->stop, s->len, sizeof(char*), comparestr) ) ? true : false;
+}
+
+


diff --git a/contrib/tsearch2/stopword/english.stop b/contrib/tsearch2/stopword/english.stop

new file mode 100644 (file)

index 0000000..a913011


--- /dev/null
+++ b/contrib/tsearch2/stopword/english.stop
@@ -0,0 +1,128 @@
+i
+me
+my
+myself
+we
+our
+ours
+ourselves
+you
+your
+yours
+yourself
+yourselves
+he
+him
+his
+himself
+she
+her
+hers
+herself
+it
+its
+itself
+they
+them
+their
+theirs
+themselves
+what
+which
+who
+whom
+this
+that
+these
+those
+am
+is
+are
+was
+were
+be
+been
+being
+have
+has
+had
+having
+do
+does
+did
+doing
+a
+an
+the
+and
+but
+if
+or
+because
+as
+until
+while
+of
+at
+by
+for
+with
+about
+against
+between
+into
+through
+during
+before
+after
+above
+below
+to
+from
+up
+down
+in
+out
+on
+off
+over
+under
+again
+further
+then
+once
+here
+there
+when
+where
+why
+how
+all
+any
+both
+each
+few
+more
+most
+other
+some
+such
+no
+nor
+not
+only
+own
+same
+so
+than
+too
+very
+s
+t
+can
+will
+just
+don
+should
+now
+


diff --git a/contrib/tsearch2/stopword/russian.stop b/contrib/tsearch2/stopword/russian.stop

new file mode 100644 (file)

index 0000000..1877e3a


--- /dev/null
+++ b/contrib/tsearch2/stopword/russian.stop
@@ -0,0 +1,151 @@
+É
+×
+×Ï
+ÎÅ
+ÞÔÏ
+ÏÎ
+ÎÁ
+Ñ
+Ó
+ÓÏ
+ËÁË
+Á
+ÔÏ
+×ÓÅ
+ÏÎÁ
+ÔÁË
+ÅÇÏ
+ÎÏ
+ÄÁ
+ÔÙ
+Ë
+Õ
+ÖÅ
+×Ù
+ÚÁ
+ÂÙ
+ÐÏ
+ÔÏÌØËÏ
+ÅÅ
+ÍÎÅ
+ÂÙÌÏ
+×ÏÔ
+ÏÔ
+ÍÅÎÑ
+ÅÝÅ
+ÎÅÔ
+Ï
+ÉÚ
+ÅÍÕ
+ÔÅÐÅÒØ
+ËÏÇÄÁ
+ÄÁÖÅ
+ÎÕ
+×ÄÒÕÇ
+ÌÉ
+ÅÓÌÉ
+ÕÖÅ
+ÉÌÉ
+ÎÉ
+ÂÙÔØ
+ÂÙÌ
+ÎÅÇÏ
+ÄÏ
+×ÁÓ
+ÎÉÂÕÄØ
+ÏÐÑÔØ
+ÕÖ
+×ÁÍ
+×ÅÄØ
+ÔÁÍ
+ÐÏÔÏÍ
+ÓÅÂÑ
+ÎÉÞÅÇÏ
+ÅÊ
+ÍÏÖÅÔ
+ÏÎÉ
+ÔÕÔ
+ÇÄÅ
+ÅÓÔØ
+ÎÁÄÏ
+ÎÅÊ
+ÄÌÑ
+ÍÙ
+ÔÅÂÑ
+ÉÈ
+ÞÅÍ
+ÂÙÌÁ
+ÓÁÍ
+ÞÔÏÂ
+ÂÅÚ
+ÂÕÄÔÏ
+ÞÅÇÏ
+ÒÁÚ
+ÔÏÖÅ
+ÓÅÂÅ
+ÐÏÄ
+ÂÕÄÅÔ
+Ö
+ÔÏÇÄÁ
+ËÔÏ
+ÜÔÏÔ
+ÔÏÇÏ
+ÐÏÔÏÍÕ
+ÜÔÏÇÏ
+ËÁËÏÊ
+ÓÏ×ÓÅÍ
+ÎÉÍ
+ÚÄÅÓØ
+ÜÔÏÍ
+ÏÄÉÎ
+ÐÏÞÔÉ
+ÍÏÊ
+ÔÅÍ
+ÞÔÏÂÙ
+ÎÅÅ
+ÓÅÊÞÁÓ
+ÂÙÌÉ
+ËÕÄÁ
+ÚÁÞÅÍ
+×ÓÅÈ
+ÎÉËÏÇÄÁ
+ÍÏÖÎÏ
+ÐÒÉ
+ÎÁËÏÎÅÃ
+Ä×Á
+ÏÂ
+ÄÒÕÇÏÊ
+ÈÏÔØ
+ÐÏÓÌÅ
+ÎÁÄ
+ÂÏÌØÛÅ
+ÔÏÔ
+ÞÅÒÅÚ
+ÜÔÉ
+ÎÁÓ
+ÐÒÏ
+×ÓÅÇÏ
+ÎÉÈ
+ËÁËÁÑ
+ÍÎÏÇÏ
+ÒÁÚ×Å
+ÔÒÉ
+ÜÔÕ
+ÍÏÑ
+×ÐÒÏÞÅÍ
+ÈÏÒÏÛÏ
+Ó×ÏÀ
+ÜÔÏÊ
+ÐÅÒÅÄ
+ÉÎÏÇÄÁ
+ÌÕÞÛÅ
+ÞÕÔØ
+ÔÏÍ
+ÎÅÌØÚÑ
+ÔÁËÏÊ
+ÉÍ
+ÂÏÌÅÅ
+×ÓÅÇÄÁ
+ËÏÎÅÞÎÏ
+×ÓÀ
+ÍÅÖÄÕ


diff --git a/contrib/tsearch2/ts_cfg.c b/contrib/tsearch2/ts_cfg.c

new file mode 100644 (file)

index 0000000..7c9f20c


--- /dev/null
+++ b/contrib/tsearch2/ts_cfg.c
@@ -0,0 +1,509 @@
+/* 
+ * interface functions to tscfg 
+ * Teodor Sigaev 
+ */
+#include 
+#include 
+#include 
+#include 
+#include 
+
+#include "postgres.h"
+#include "fmgr.h"
+#include "utils/array.h"
+#include "catalog/pg_type.h"
+#include "executor/spi.h"
+
+#include "ts_cfg.h"
+#include "dict.h"
+#include "wparser.h"
+#include "snmap.h"
+#include "common.h"
+#include "tsvector.h"
+
+/*********top interface**********/
+
+static void *plan_getcfg_bylocale=NULL;
+static void *plan_getcfg=NULL;
+static void *plan_getmap=NULL;
+static void *plan_name2id=NULL;
+static Oid current_cfg_id=0;
+
+void
+init_cfg(Oid id, TSCfgInfo *cfg) {
+   Oid arg[2]={ OIDOID, OIDOID };
+   bool isnull;
+   Datum pars[2]={ ObjectIdGetDatum(id), ObjectIdGetDatum(id) } ;
+   int stat,i,j;
+   text *ptr;
+   text *prsname=NULL;
+   MemoryContext   oldcontext;
+
+   memset(cfg,0,sizeof(TSCfgInfo));
+   SPI_connect();
+   if ( !plan_getcfg ) {
+       plan_getcfg = SPI_saveplan( SPI_prepare( "select prs_name from pg_ts_cfg where oid = $1" , 1, arg ) );
+       if ( !plan_getcfg ) 
+           ts_error(ERROR, "SPI_prepare() failed");
+   }
+
+   stat = SPI_execp(plan_getcfg, pars, " ", 1);
+   if ( stat < 0 )
+       ts_error (ERROR, "SPI_execp return %d", stat);
+   if ( SPI_processed > 0 ) {
+       prsname = (text*) DatumGetPointer( 
+           SPI_getbinval(SPI_tuptable->vals[0], SPI_tuptable->tupdesc, 1, &isnull) 
+       );
+       oldcontext = MemoryContextSwitchTo(TopMemoryContext);
+       prsname = ptextdup( prsname );
+       MemoryContextSwitchTo(oldcontext);
+       
+       cfg->id=id;
+   } else 
+       ts_error(ERROR, "No tsearch cfg with id %d", id);
+
+   arg[0]=TEXTOID;
+   if ( !plan_getmap ) {
+       plan_getmap = SPI_saveplan( SPI_prepare( "select lt.tokid, pg_ts_cfgmap.dict_name from pg_ts_cfgmap, pg_ts_cfg, token_type( $1 ) as lt where lt.alias = pg_ts_cfgmap.tok_alias and pg_ts_cfgmap.ts_name = pg_ts_cfg.ts_name and pg_ts_cfg.oid= $2 order by lt.tokid desc;" , 2, arg ) );
+       if ( !plan_getmap )
+           ts_error(ERROR, "SPI_prepare() failed");
+   }
+
+   pars[0]=PointerGetDatum( prsname );
+   stat = SPI_execp(plan_getmap, pars, " ", 0);
+   if ( stat < 0 )
+       ts_error (ERROR, "SPI_execp return %d", stat);
+   if ( SPI_processed <= 0 )
+       ts_error(ERROR, "No parser with id %d", id);
+
+   for(i=0;i
+       int lexid = DatumGetInt32(SPI_getbinval(SPI_tuptable->vals[i], SPI_tuptable->tupdesc, 1, &isnull));
+       ArrayType *toasted_a = (ArrayType*)PointerGetDatum(SPI_getbinval(SPI_tuptable->vals[i], SPI_tuptable->tupdesc, 2, &isnull));
+       ArrayType *a;
+
+       if ( !cfg->map ) {
+           cfg->len=lexid+1;
+           cfg->map = (ListDictionary*)malloc( sizeof(ListDictionary)*cfg->len );
+           if ( !cfg->map )
+               ts_error(ERROR,"No memory");
+           memset( cfg->map, 0, sizeof(ListDictionary)*cfg->len );
+       }
+
+       if (isnull)
+           continue;
+
+       a=(ArrayType*)PointerGetDatum( PG_DETOAST_DATUM( DatumGetPointer(toasted_a) ) );
+       
+       if ( ARR_NDIM(a) != 1 )
+           ts_error(ERROR,"Wrong dimension");
+       if ( ARRNELEMS(a) < 1 )
+           continue;
+
+       cfg->map[lexid].len=ARRNELEMS(a);
+       cfg->map[lexid].dict_id=(Datum*)malloc( sizeof(Datum)*cfg->map[lexid].len );
+       memset(cfg->map[lexid].dict_id,0,sizeof(Datum)*cfg->map[lexid].len );
+       ptr=(text*)ARR_DATA_PTR(a);
+       oldcontext = MemoryContextSwitchTo(TopMemoryContext);
+       for(j=0;jmap[lexid].len;j++) {
+           cfg->map[lexid].dict_id[j] = PointerGetDatum(ptextdup(ptr));
+           ptr=NEXTVAL(ptr);
+       } 
+       MemoryContextSwitchTo(oldcontext);
+
+       if ( a != toasted_a ) 
+           pfree(a);
+   }
+   
+   SPI_finish();
+   cfg->prs_id = name2id_prs( prsname );
+   pfree(prsname);
+   for(i=0;ilen;i++) {
+       for(j=0;jmap[i].len;j++) {
+           ptr = (text*)DatumGetPointer( cfg->map[i].dict_id[j] );
+           cfg->map[i].dict_id[j] = ObjectIdGetDatum( name2id_dict(ptr) );
+           pfree(ptr);
+       }
+   }
+}
+
+typedef struct {
+   TSCfgInfo   *last_cfg;
+   int     len;
+   int     reallen;
+   TSCfgInfo   *list;
+   SNMap       name2id_map;
+} CFGList;
+
+static CFGList CList = {NULL,0,0,NULL,{0,0,NULL}};
+
+void
+reset_cfg(void) {
+        freeSNMap( &(CList.name2id_map) );
+        if ( CList.list ) {
+       int i,j;
+       for(i=0;i
+           if ( CList.list[i].map ) {
+               for(j=0;j
+                   if ( CList.list[i].map[j].dict_id )
+                       free(CList.list[i].map[j].dict_id);
+               free( CList.list[i].map );
+           }
+                free(CList.list);
+   }
+        memset(&CList,0,sizeof(CFGList));
+}
+
+static int
+comparecfg(const void *a, const void *b) {
+   return ((TSCfgInfo*)a)->id - ((TSCfgInfo*)b)->id;
+}
+
+TSCfgInfo *
+findcfg(Oid id) {
+   /* last used cfg */
+   if ( CList.last_cfg && CList.last_cfg->id==id )
+       return CList.last_cfg;
+
+   /* already used cfg */
+   if ( CList.len != 0 ) {
+       TSCfgInfo key;
+       key.id=id;
+       CList.last_cfg = bsearch(&key, CList.list, CList.len, sizeof(TSCfgInfo), comparecfg);
+       if ( CList.last_cfg != NULL )
+           return CList.last_cfg;
+   }
+
+   /* last chance */
+   if ( CList.len==CList.reallen ) {
+       TSCfgInfo *tmp;
+       int reallen = ( CList.reallen ) ? 2*CList.reallen : 16;
+       tmp=(TSCfgInfo*)realloc(CList.list,sizeof(TSCfgInfo)*reallen);
+       if ( !tmp ) 
+           ts_error(ERROR,"No memory");
+       CList.reallen=reallen;
+       CList.list=tmp;
+   }
+   CList.last_cfg=&(CList.list[CList.len]);
+   init_cfg(id, CList.last_cfg);
+   CList.len++;
+   qsort(CList.list, CList.len, sizeof(TSCfgInfo), comparecfg);
+   return findcfg(id); /* qsort changed order!! */;
+}
+
+
+Oid
+name2id_cfg(text *name) {
+   Oid arg[1]={ TEXTOID };
+   bool isnull;
+   Datum pars[1]={ PointerGetDatum(name) };
+   int stat;
+   Oid id=findSNMap_t( &(CList.name2id_map), name );
+   
+   if ( id ) 
+       return id;
+   
+   SPI_connect();
+   if ( !plan_name2id ) {
+       plan_name2id = SPI_saveplan( SPI_prepare( "select oid from pg_ts_cfg where ts_name = $1" , 1, arg ) );
+       if ( !plan_name2id ) 
+           elog(ERROR, "SPI_prepare() failed");
+   }
+
+   stat = SPI_execp(plan_name2id, pars, " ", 1);
+   if ( stat < 0 )
+       elog (ERROR, "SPI_execp return %d", stat);
+   if ( SPI_processed > 0 ) {
+       id=DatumGetObjectId( SPI_getbinval(SPI_tuptable->vals[0], SPI_tuptable->tupdesc, 1, &isnull) );
+       if ( isnull ) 
+           elog(ERROR, "Null id for tsearch config");
+   } else 
+       elog(ERROR, "No tsearch config");
+   SPI_finish();
+   addSNMap_t( &(CList.name2id_map), name, id );
+   return id;
+}
+
+
+void 
+parsetext_v2(TSCfgInfo *cfg, PRSTEXT * prs, char *buf, int4 buflen) {
+   int type, lenlemm, i;
+   char    *lemm=NULL;
+   WParserInfo *prsobj = findprs(cfg->prs_id);
+
+   prsobj->prs=(void*)DatumGetPointer(
+       FunctionCall2(
+           &(prsobj->start_info),
+           PointerGetDatum(buf),
+           Int32GetDatum(buflen)
+       )
+   );
+
+   while( ( type=DatumGetInt32(FunctionCall3(
+           &(prsobj->getlexeme_info),
+           PointerGetDatum(prsobj->prs),
+           PointerGetDatum(&lemm),
+           PointerGetDatum(&lenlemm))) ) != 0 ) {
+
+       if ( lenlemm >= MAXSTRLEN )
+           elog(ERROR, "Word is too long");
+
+
+       if ( type >= cfg->len ) /* skip this type of lexem */
+           continue; 
+
+       for(i=0;imap[type].len;i++) {
+           DictInfo    *dict=finddict( DatumGetObjectId(cfg->map[type].dict_id[i]) );
+           char    **norms, **ptr;
+   
+           norms = ptr = (char**)DatumGetPointer(
+               FunctionCall3(
+                   &(dict->lexize_info),
+                   PointerGetDatum(dict->dictionary),
+                   PointerGetDatum(lemm),
+                   PointerGetDatum(lenlemm)
+               )
+           );
+           if ( !norms ) /* dictionary doesn't know this lexem */
+               continue;
+
+           prs->pos++; /*set pos*/
+
+           while( *ptr ) {
+               if (prs->curwords == prs->lenwords) {
+                   prs->lenwords *= 2;
+                   prs->words = (WORD *) repalloc((void *) prs->words, prs->lenwords * sizeof(WORD));
+               }
+
+               prs->words[prs->curwords].len = strlen(*ptr);
+               prs->words[prs->curwords].word = *ptr;
+               prs->words[prs->curwords].alen = 0;
+               prs->words[prs->curwords].pos.pos = LIMITPOS(prs->pos);
+               ptr++;
+               prs->curwords++;
+           }
+           pfree(norms);
+           break; /* lexem already normalized or is stop word*/
+       }
+   }
+
+   FunctionCall1(
+       &(prsobj->end_info),
+       PointerGetDatum(prsobj->prs)
+   );
+}
+
+static void
+hladdword(HLPRSTEXT * prs, char *buf, int4 buflen, int type) {
+   while (prs->curwords >= prs->lenwords) {
+       prs->lenwords *= 2;
+       prs->words = (HLWORD *) repalloc((void *) prs->words, prs->lenwords * sizeof(HLWORD));
+   }
+   memset( &(prs->words[prs->curwords]), 0, sizeof(HLWORD) ); 
+   prs->words[prs->curwords].type = (uint8)type;
+   prs->words[prs->curwords].len = buflen; 
+   prs->words[prs->curwords].word = palloc(buflen);
+   memcpy(prs->words[prs->curwords].word, buf, buflen);
+   prs->curwords++;    
+}
+
+static void
+hlfinditem(HLPRSTEXT * prs, QUERYTYPE *query, char *buf, int buflen ) {
+   int i;
+   ITEM    *item=GETQUERY(query);
+   HLWORD  *word=&( prs->words[prs->curwords-1] );
+
+   while (prs->curwords + query->size >= prs->lenwords) {
+       prs->lenwords *= 2;
+       prs->words = (HLWORD *) repalloc((void *) prs->words, prs->lenwords * sizeof(HLWORD));
+   }
+
+   for(i=0; isize; i++) { 
+       if ( item->type == VAL && item->length == buflen && strncmp( GETOPERAND(query) + item->distance, buf, buflen )==0 ) {
+           if ( word->item ) {
+               memcpy( &(prs->words[prs->curwords]), word, sizeof(HLWORD) );
+               prs->words[prs->curwords].item=item;
+               prs->words[prs->curwords].repeated=1;
+               prs->curwords++;
+           } else 
+               word->item=item;    
+       }
+       item++;
+   }
+}
+
+void 
+hlparsetext(TSCfgInfo *cfg, HLPRSTEXT * prs, QUERYTYPE *query, char *buf, int4 buflen) {
+   int type, lenlemm, i;
+   char    *lemm=NULL;
+   WParserInfo *prsobj = findprs(cfg->prs_id);
+
+   prsobj->prs=(void*)DatumGetPointer(
+       FunctionCall2(
+           &(prsobj->start_info),
+           PointerGetDatum(buf),
+           Int32GetDatum(buflen)
+       )
+   );
+
+   while( ( type=DatumGetInt32(FunctionCall3(
+           &(prsobj->getlexeme_info),
+           PointerGetDatum(prsobj->prs),
+           PointerGetDatum(&lemm),
+           PointerGetDatum(&lenlemm))) ) != 0 ) {
+
+       if ( lenlemm >= MAXSTRLEN )
+           elog(ERROR, "Word is too long");
+
+       hladdword(prs,lemm,lenlemm,type);
+
+       if ( type >= cfg->len ) 
+           continue; 
+
+       for(i=0;imap[type].len;i++) {
+           DictInfo    *dict=finddict( DatumGetObjectId(cfg->map[type].dict_id[i]) );
+           char    **norms, **ptr;
+   
+           norms = ptr = (char**)DatumGetPointer(
+               FunctionCall3(
+                   &(dict->lexize_info),
+                   PointerGetDatum(dict->dictionary),
+                   PointerGetDatum(lemm),
+                   PointerGetDatum(lenlemm)
+               )
+           );
+           if ( !norms ) /* dictionary doesn't know this lexem */
+               continue;
+
+           while( *ptr ) {
+               hlfinditem(prs,query,*ptr,strlen(*ptr));
+               pfree(*ptr);
+               ptr++;
+           }
+           pfree(norms);
+           break; /* lexem already normalized or is stop word*/
+       }
+   }
+
+   FunctionCall1(
+       &(prsobj->end_info),
+       PointerGetDatum(prsobj->prs)
+   );
+}
+
+text* 
+genhl(HLPRSTEXT * prs) {
+   text *out;
+   int len=128;
+   char *ptr;
+   HLWORD  *wrd=prs->words;
+
+   out = (text*)palloc( len );
+   ptr=((char*)out) + VARHDRSZ;
+
+   while( wrd - prs->words < prs->curwords ) {
+       while (  wrd->len + prs->stopsellen + prs->startsellen + (ptr - ((char*)out)) >= len ) {
+           int dist = ptr - ((char*)out);
+           len*= 2;
+           out = (text *) repalloc(out, len);
+           ptr=((char*)out) + dist;
+       }
+
+       if ( wrd->in && !wrd->skip && !wrd->repeated ) {
+           if ( wrd->replace ) {
+               *ptr=' ';
+               ptr++;
+           } else {
+               if (wrd->selected) {
+                   memcpy(ptr,prs->startsel,prs->startsellen);
+                   ptr+=prs->startsellen;
+               }
+               memcpy(ptr,wrd->word,wrd->len);
+               ptr+=wrd->len;
+               if (wrd->selected) {
+                   memcpy(ptr,prs->stopsel,prs->stopsellen);
+                   ptr+=prs->stopsellen;
+               }
+           }
+       }
+
+       if ( !wrd->repeated )
+           pfree(wrd->word);
+
+       wrd++;
+   }
+
+   VARATT_SIZEP(out)=ptr - ((char*)out);
+   return out; 
+}
+
+int  
+get_currcfg(void) {
+   Oid arg[1]={ TEXTOID };
+   const char *curlocale;
+   Datum pars[1];
+   bool isnull;
+   int stat;
+
+   if ( current_cfg_id > 0 )
+       return current_cfg_id;
+
+   SPI_connect();
+   if ( !plan_getcfg_bylocale ) {
+       plan_getcfg_bylocale=SPI_saveplan( SPI_prepare( "select oid from pg_ts_cfg where locale = $1 ", 1, arg ) );
+       if ( !plan_getcfg_bylocale )
+           elog(ERROR, "SPI_prepare() failed");
+   }
+
+   curlocale = setlocale(LC_CTYPE, NULL);
+   pars[0] = PointerGetDatum( char2text((char*)curlocale) );
+   stat = SPI_execp(plan_getcfg_bylocale, pars, " ", 1);
+
+   if ( stat < 0 )
+       elog (ERROR, "SPI_execp return %d", stat);
+   if ( SPI_processed > 0 )
+       current_cfg_id = DatumGetObjectId( SPI_getbinval(SPI_tuptable->vals[0], SPI_tuptable->tupdesc, 1, &isnull) );
+   else 
+       elog(ERROR,"Can't find tsearch config by locale");
+
+   pfree(DatumGetPointer(pars[0]));
+   SPI_finish();
+   return current_cfg_id;
+}
+
+PG_FUNCTION_INFO_V1(set_curcfg);
+Datum set_curcfg(PG_FUNCTION_ARGS);
+Datum
+set_curcfg(PG_FUNCTION_ARGS) {
+        findcfg(PG_GETARG_OID(0));
+        current_cfg_id=PG_GETARG_OID(0);
+        PG_RETURN_VOID();
+}
+                
+PG_FUNCTION_INFO_V1(set_curcfg_byname);
+Datum set_curcfg_byname(PG_FUNCTION_ARGS);
+Datum
+set_curcfg_byname(PG_FUNCTION_ARGS) {
+        text *name=PG_GETARG_TEXT_P(0);
+   
+        DirectFunctionCall1(
+                set_curcfg,
+                ObjectIdGetDatum( name2id_cfg(name) )
+        );
+        PG_FREE_IF_COPY(name, 0);
+        PG_RETURN_VOID();      
+}       
+
+PG_FUNCTION_INFO_V1(show_curcfg);
+Datum show_curcfg(PG_FUNCTION_ARGS);
+Datum
+show_curcfg(PG_FUNCTION_ARGS) {
+   PG_RETURN_OID( get_currcfg() ); 
+}
+
+PG_FUNCTION_INFO_V1(reset_tsearch);
+Datum reset_tsearch(PG_FUNCTION_ARGS);
+Datum
+reset_tsearch(PG_FUNCTION_ARGS) {
+   ts_error(NOTICE,"TSearch cache cleaned");
+   PG_RETURN_VOID(); 
+}


diff --git a/contrib/tsearch2/ts_cfg.h b/contrib/tsearch2/ts_cfg.h

new file mode 100644 (file)

index 0000000..01006c1


--- /dev/null
+++ b/contrib/tsearch2/ts_cfg.h
@@ -0,0 +1,68 @@
+#ifndef __TS_CFG_H__
+#define __TS_CFG_H__
+#include "postgres.h"
+#include "query.h"
+
+typedef struct {
+   int len;
+   Datum   *dict_id;
+} ListDictionary;
+
+typedef struct {
+   Oid id;
+   Oid prs_id;
+   int len;
+   ListDictionary  *map;   
+}  TSCfgInfo;
+
+Oid name2id_cfg(text *name);
+TSCfgInfo * findcfg(Oid id);
+void init_cfg(Oid id, TSCfgInfo *cfg);
+void reset_cfg(void);
+
+typedef struct {
+        uint16          len;
+   union {
+       uint16      pos;
+       uint16      *apos;
+   } pos;
+        char       *word;
+   uint32  alen;
+}       WORD;
+   
+typedef struct {
+        WORD       *words;
+        int4            lenwords;
+        int4            curwords;
+   int4        pos;
+}       PRSTEXT;
+
+typedef struct {
+        uint16    len;
+   uint8    selected:1,
+         in:1,
+         skip:1,
+         replace:1,
+         repeated:1;
+   uint8   type;
+        char      *word;
+   ITEM      *item;
+}       HLWORD;
+   
+typedef struct {
+        HLWORD       *words;
+        int4            lenwords;
+        int4            curwords;
+        char           *startsel;
+        char            *stopsel;
+        int2            startsellen;
+        int2            stopsellen;
+}       HLPRSTEXT;
+
+void hlparsetext(TSCfgInfo *cfg, HLPRSTEXT * prs, QUERYTYPE *query, char *buf, int4 buflen);
+text* genhl(HLPRSTEXT * prs);
+
+void parsetext_v2(TSCfgInfo *cfg, PRSTEXT * prs, char *buf, int4 buflen);
+int  get_currcfg(void);
+
+#endif


diff --git a/contrib/tsearch2/ts_stat.c b/contrib/tsearch2/ts_stat.c

new file mode 100644 (file)

index 0000000..9099981


--- /dev/null
+++ b/contrib/tsearch2/ts_stat.c
@@ -0,0 +1,412 @@
+/*
+ * stat functions
+ */
+
+#include "tsvector.h"
+#include "ts_stat.h"
+#include "funcapi.h"
+#include "catalog/pg_type.h"
+#include "executor/spi.h"
+#include "common.h"
+
+PG_FUNCTION_INFO_V1(tsstat_in);
+Datum           tsstat_in(PG_FUNCTION_ARGS);
+Datum           
+tsstat_in(PG_FUNCTION_ARGS) {
+   tsstat *stat=palloc(STATHDRSIZE);
+   stat->len=STATHDRSIZE;
+   stat->size=0;
+   PG_RETURN_POINTER(stat);
+}
+
+PG_FUNCTION_INFO_V1(tsstat_out);
+Datum           tsstat_out(PG_FUNCTION_ARGS);
+Datum           
+tsstat_out(PG_FUNCTION_ARGS) {
+   elog(ERROR,"Unimplemented");
+   PG_RETURN_NULL();
+}
+
+static WordEntry**
+SEI_realloc( WordEntry** in, uint32 *len ) {
+   if ( *len==0 || in==NULL ) {
+       *len=8;
+       in=palloc( sizeof(WordEntry*)* (*len) );
+   } else {
+       *len *= 2;
+       in=repalloc( in, sizeof(WordEntry*)* (*len) );
+   }
+   return in;
+}
+
+static int
+compareStatWord(StatEntry *a, WordEntry *b, tsstat *stat, tsvector *txt) {
+   if ( a->len == b->len ) 
+       return strncmp(
+           STATSTRPTR(stat) + a->pos,
+           STRPTR(txt) + b->pos,
+           a->len
+       );
+   return ( a->len > b->len ) ? 1 : -1;
+}
+
+static tsstat*
+formstat(tsstat *stat, tsvector *txt, WordEntry** entry, uint32 len) {
+   tsstat  *newstat;
+   uint32 totallen, nentry;
+   uint32  slen=0;
+   WordEntry   **ptr=entry;
+   char    *curptr;
+   StatEntry   *sptr,*nptr;
+
+   while(ptr-entry
+       slen += (*ptr)->len;
+       ptr++;
+   }
+
+   nentry=stat->size + len;
+   slen+=STATSTRSIZE(stat);
+   totallen=CALCSTATSIZE(nentry,slen);
+   newstat=palloc(totallen);
+   newstat->len=totallen;
+   newstat->size=nentry;
+
+   memcpy(STATSTRPTR(newstat), STATSTRPTR(stat), STATSTRSIZE(stat));
+   curptr=STATSTRPTR(newstat) + STATSTRSIZE(stat);
+
+   ptr=entry;
+   sptr=STATPTR(stat);
+   nptr=STATPTR(newstat);
+
+   if ( len == 1 ) {
+       StatEntry *StopLow = STATPTR(stat);
+       StatEntry *StopHigh = (StatEntry*)STATSTRPTR(stat);
+
+       while (StopLow < StopHigh) {
+           sptr=StopLow + (StopHigh - StopLow) / 2;
+           if ( compareStatWord(sptr,*ptr,stat,txt) < 0 )
+               StopLow = sptr + 1;
+           else
+               StopHigh = sptr; 
+       }
+       nptr =STATPTR(newstat) + (StopLow-STATPTR(stat));
+       memcpy( STATPTR(newstat), STATPTR(stat), sizeof(StatEntry) * (StopLow-STATPTR(stat)) );
+       nptr->nentry=POSDATALEN(txt,*ptr);
+       if ( nptr->nentry==0 )
+               nptr->nentry=1; 
+       nptr->ndoc=1;
+       nptr->len=(*ptr)->len;
+       memcpy(curptr, STRPTR(txt) + (*ptr)->pos, nptr->len);
+       nptr->pos = curptr - STATSTRPTR(newstat);
+       memcpy( nptr+1, StopLow, sizeof(StatEntry) * ( ((StatEntry*)STATSTRPTR(stat))-StopLow ) );
+   } else {
+       while( sptr-STATPTR(stat) < stat->size && ptr-entry
+           if ( compareStatWord(sptr,*ptr,stat,txt) < 0 ) {
+               memcpy(nptr, sptr, sizeof(StatEntry));
+               sptr++;
+           } else {
+               nptr->nentry=POSDATALEN(txt,*ptr);
+               if ( nptr->nentry==0 )
+                   nptr->nentry=1; 
+               nptr->ndoc=1;
+               nptr->len=(*ptr)->len;
+               memcpy(curptr, STRPTR(txt) + (*ptr)->pos, nptr->len);
+               nptr->pos = curptr - STATSTRPTR(newstat);
+               curptr += nptr->len;
+               ptr++;
+           }
+           nptr++;
+       }
+
+       memcpy( nptr, sptr, sizeof(StatEntry)*( stat->size - (sptr-STATPTR(stat)) ) ); 
+       
+       while(ptr-entry
+           nptr->nentry=POSDATALEN(txt,*ptr);
+           if ( nptr->nentry==0 )
+               nptr->nentry=1; 
+           nptr->ndoc=1;
+           nptr->len=(*ptr)->len;
+           memcpy(curptr, STRPTR(txt) + (*ptr)->pos, nptr->len);
+           nptr->pos = curptr - STATSTRPTR(newstat);
+           curptr += nptr->len;
+           ptr++; nptr++;
+       }
+   }
+
+   return newstat;
+} 
+
+PG_FUNCTION_INFO_V1(ts_accum);
+Datum           ts_accum(PG_FUNCTION_ARGS);
+Datum 
+ts_accum(PG_FUNCTION_ARGS) {
+   tsstat *newstat,*stat= (tsstat*)PG_GETARG_POINTER(0);
+   tsvector  *txt = (tsvector *) PG_DETOAST_DATUM(PG_GETARG_DATUM(1));
+   WordEntry   **newentry=NULL;
+   uint32  len=0, cur=0;
+   StatEntry   *sptr;
+   WordEntry   *wptr;
+
+   if ( stat==NULL || PG_ARGISNULL(0) ) { /* Init in first */ 
+       stat=palloc(STATHDRSIZE);
+       stat->len=STATHDRSIZE;
+       stat->size=0;
+   }
+
+   /* simple check of correctness */
+   if ( txt==NULL || PG_ARGISNULL(1) || txt->size==0 ) {
+       PG_FREE_IF_COPY(txt,1); 
+       PG_RETURN_POINTER(stat);
+   }
+
+   sptr=STATPTR(stat);
+   wptr=ARRPTR(txt);
+
+   if ( stat->size < 100*txt->size ) { /* merge */
+       while( sptr-STATPTR(stat) < stat->size && wptr-ARRPTR(txt) < txt->size ) {
+           int cmp = compareStatWord(sptr,wptr,stat,txt);
+           if ( cmp<0 ) {
+               sptr++;
+           } else if ( cmp==0 ) {
+               int n=POSDATALEN(txt,wptr);
+   
+               if (n==0) n=1;
+               sptr->ndoc++;
+               sptr->nentry +=n ;
+               sptr++; wptr++;
+           } else {
+               if ( cur==len )
+                   newentry=SEI_realloc(newentry, &len);
+               newentry[cur]=wptr;
+               wptr++; cur++;
+           }
+       }
+
+       while( wptr-ARRPTR(txt) < txt->size ) {
+           if ( cur==len )
+               newentry=SEI_realloc(newentry, &len);
+           newentry[cur]=wptr;
+           wptr++; cur++;
+       }
+   } else { /* search */
+       while( wptr-ARRPTR(txt) < txt->size ) {
+           StatEntry *StopLow = STATPTR(stat);
+           StatEntry *StopHigh = (StatEntry*)STATSTRPTR(stat);
+           int cmp;
+
+           while (StopLow < StopHigh) {
+               sptr=StopLow + (StopHigh - StopLow) / 2;
+               cmp =  compareStatWord(sptr,wptr,stat,txt);
+               if (cmp==0) {
+                   int n=POSDATALEN(txt,wptr);
+                   if (n==0) n=1;
+                   sptr->ndoc++;
+                   sptr->nentry +=n ;
+                   break;
+               } else if ( cmp < 0 )
+                   StopLow = sptr + 1;
+               else
+                   StopHigh = sptr; 
+           }
+       
+           if ( StopLow >= StopHigh ) { /* not found */
+               if ( cur==len )
+                   newentry=SEI_realloc(newentry, &len);
+               newentry[cur]=wptr;
+               cur++;
+           }
+           wptr++;
+       }   
+   }
+
+   
+   if ( cur==0 ) { /* no new words */ 
+       PG_FREE_IF_COPY(txt,1);
+       PG_RETURN_POINTER(stat);
+   }
+
+   newstat = formstat(stat, txt, newentry, cur);
+   pfree(newentry);
+   PG_FREE_IF_COPY(txt,1);
+   /* pfree(stat); */
+
+   PG_RETURN_POINTER(newstat);
+}
+
+typedef struct {
+   uint32  cur;
+   tsvector *stat;
+} StatStorage;
+
+static void
+ts_setup_firstcall(FuncCallContext  *funcctx, tsstat *stat) {
+   TupleDesc            tupdesc;
+   MemoryContext     oldcontext;
+   StatStorage     *st;
+   
+   oldcontext = MemoryContextSwitchTo(funcctx->multi_call_memory_ctx);
+   st=palloc( sizeof(StatStorage) );
+   st->cur=0;
+   st->stat=palloc( stat->len );
+   memcpy(st->stat, stat, stat->len);
+   funcctx->user_fctx = (void*)st;
+   tupdesc = RelationNameGetTupleDesc("statinfo");
+   funcctx->slot = TupleDescGetSlot(tupdesc);
+   funcctx->attinmeta = TupleDescGetAttInMetadata(tupdesc);
+   MemoryContextSwitchTo(oldcontext);
+}
+
+
+static Datum
+ts_process_call(FuncCallContext  *funcctx) {
+   StatStorage     *st;
+   st=(StatStorage*)funcctx->user_fctx;
+
+   if ( st->cur < st->stat->size ) {
+       Datum result;
+       char* values[3];
+       char    ndoc[16];
+       char    nentry[16];
+       StatEntry *entry=STATPTR(st->stat) + st->cur;
+       HeapTuple    tuple;
+
+       values[1]=ndoc;
+       sprintf(ndoc,"%d",entry->ndoc);
+       values[2]=nentry;
+       sprintf(nentry,"%d",entry->nentry);
+       values[0]=palloc( entry->len+1 );
+       memcpy( values[0], STATSTRPTR(st->stat)+entry->pos, entry->len);
+       (values[0])[entry->len]='\0';
+
+       tuple = BuildTupleFromCStrings(funcctx->attinmeta, values);
+       result = TupleGetDatum(funcctx->slot, tuple);
+
+       pfree(values[0]);
+       st->cur++;
+       return result;  
+   } else {
+       pfree(st->stat);
+       pfree(st);
+   }
+   
+   return (Datum)0;
+}
+
+PG_FUNCTION_INFO_V1(ts_accum_finish);
+Datum           ts_accum_finish(PG_FUNCTION_ARGS);
+Datum 
+ts_accum_finish(PG_FUNCTION_ARGS) {
+   FuncCallContext  *funcctx;
+   Datum result;
+
+   if (SRF_IS_FIRSTCALL()) {
+       funcctx = SRF_FIRSTCALL_INIT();
+       ts_setup_firstcall(funcctx, (tsstat*)PG_GETARG_POINTER(0) );
+   }
+
+   funcctx = SRF_PERCALL_SETUP();
+   if (  (result=ts_process_call(funcctx)) != (Datum)0 )
+       SRF_RETURN_NEXT(funcctx, result);
+   SRF_RETURN_DONE(funcctx);
+}
+
+static Oid tiOid=InvalidOid;
+static void 
+get_ti_Oid(void) {
+   int ret;
+   bool isnull; 
+
+   if ( (ret = SPI_exec("select oid from pg_type where typname='tsvector'",1)) < 0 )   
+       elog(ERROR, "SPI_exec to get tsvector oid returns %d", ret);
+
+   if ( SPI_processed<0 )
+       elog(ERROR, "There is no tsvector type");
+   tiOid = DatumGetObjectId( SPI_getbinval(SPI_tuptable->vals[0], SPI_tuptable->tupdesc, 1, &isnull) );
+   if ( tiOid==InvalidOid )
+       elog(ERROR, "tsvector type has InvalidOid");
+}
+
+static tsstat*
+ts_stat_sql(text *txt) {
+   char *query=text2char(txt);
+   int i;
+   tsstat *newstat,*stat;
+   bool isnull;
+   Portal portal;
+   void    *plan;
+
+   if ( tiOid==InvalidOid ) 
+       get_ti_Oid();
+
+   if ( (plan = SPI_prepare(query,0,NULL))==NULL )
+       elog(ERROR, "SPI_prepare('%s') returns NULL",query);
+
+   if ( (portal = SPI_cursor_open(NULL, plan, NULL, NULL)) == NULL )
+       elog(ERROR, "SPI_cursor_open('%s') returns NULL",query);
+
+   SPI_cursor_fetch(portal, true, 100);
+
+   if ( SPI_tuptable->tupdesc->natts != 1 )
+       elog(ERROR, "Number of fields doesn't equal to 1");
+
+   if ( SPI_gettypeid(SPI_tuptable->tupdesc, 1) != tiOid )
+       elog(ERROR, "Column isn't of tsvector type");
+
+   stat=palloc(STATHDRSIZE);
+   stat->len=STATHDRSIZE;
+   stat->size=0;
+
+   while(SPI_processed>0) {
+       for(i=0;i
+           Datum data=SPI_getbinval(SPI_tuptable->vals[i], SPI_tuptable->tupdesc, 1, &isnull);
+
+           if ( !isnull ) {
+               newstat = (tsstat*)DatumGetPointer(DirectFunctionCall2(
+                   ts_accum,
+                   PointerGetDatum(stat),
+                   data
+               ));
+               if ( stat!=newstat && stat )
+                   pfree(stat);
+               stat=newstat;
+           }
+       } 
+
+       SPI_freetuptable(SPI_tuptable);
+       SPI_cursor_fetch(portal, true, 100);        
+   }   
+
+   SPI_freetuptable(SPI_tuptable);
+   SPI_cursor_close(portal);
+   SPI_freeplan(plan);
+   pfree(query);
+
+   return stat;    
+}
+
+PG_FUNCTION_INFO_V1(ts_stat);
+Datum           ts_stat(PG_FUNCTION_ARGS);
+Datum 
+ts_stat(PG_FUNCTION_ARGS) {
+   FuncCallContext  *funcctx;
+   Datum result;
+
+   if (SRF_IS_FIRSTCALL()) {
+       tsstat *stat;
+       text    *txt=PG_GETARG_TEXT_P(0);
+   
+       funcctx = SRF_FIRSTCALL_INIT();
+       SPI_connect();
+       stat = ts_stat_sql(txt);
+       PG_FREE_IF_COPY(txt,0); 
+       ts_setup_firstcall(funcctx, stat );
+       SPI_finish();
+   }
+
+   funcctx = SRF_PERCALL_SETUP();
+   if (  (result=ts_process_call(funcctx)) != (Datum)0 )
+       SRF_RETURN_NEXT(funcctx, result);
+   SRF_RETURN_DONE(funcctx);
+}
+
+


diff --git a/contrib/tsearch2/ts_stat.h b/contrib/tsearch2/ts_stat.h

new file mode 100644 (file)

index 0000000..c32b17a


--- /dev/null
+++ b/contrib/tsearch2/ts_stat.h
@@ -0,0 +1,32 @@
+#ifndef __TXTIDX_STAT_H__
+#define __TXTIDX_STAT_H__
+
+#include "postgres.h"
+
+#include "access/gist.h"
+#include "access/itup.h"
+#include "utils/elog.h"
+#include "utils/palloc.h"
+#include "utils/builtins.h"
+#include "storage/bufpage.h"
+
+typedef struct {
+   uint32  len;
+   uint32  pos;
+   uint32  ndoc;   
+   uint32  nentry; 
+}  StatEntry;
+
+typedef struct {
+   int4        len;
+   int4        size;
+   char        data[1];
+}  tsstat;
+
+#define STATHDRSIZE (sizeof(int4)*2)
+#define CALCSTATSIZE(x, lenstr) ( x * sizeof(StatEntry) + STATHDRSIZE + lenstr )
+#define STATPTR(x) ( (StatEntry*) ( (char*)x + STATHDRSIZE ) )
+#define STATSTRPTR(x)  ( (char*)x + STATHDRSIZE + ( sizeof(StatEntry) * ((tsvector*)x)->size ) )
+#define STATSTRSIZE(x) ( ((tsvector*)x)->len - STATHDRSIZE - ( sizeof(StatEntry) * ((tsvector*)x)->size ) )
+
+#endif


diff --git a/contrib/tsearch2/tsearch.sql._in b/contrib/tsearch2/tsearch.sql._in

new file mode 100644 (file)

index 0000000..91ffbc8


--- /dev/null
+++ b/contrib/tsearch2/tsearch.sql._in
@@ -0,0 +1,674 @@
+-- Adjust this setting to control where the objects get CREATEd.
+SET search_path = public;
+
+BEGIN;
+
+--dict conf
+CREATE TABLE pg_ts_dict (
+   dict_name   text not null primary key,
+   dict_init   oid,
+   dict_initoption text,
+   dict_lexize oid not null,
+   dict_comment    text
+) with oids;
+
+--dict interface
+CREATE FUNCTION lexize(oid, text) 
+   returns _text
+   as 'MODULE_PATHNAME'
+   language 'C'
+   with (isstrict);
+
+CREATE FUNCTION lexize(text, text)
+        returns _text
+        as 'MODULE_PATHNAME', 'lexize_byname'
+        language 'C'
+        with (isstrict);
+
+CREATE FUNCTION lexize(text)
+        returns _text
+        as 'MODULE_PATHNAME', 'lexize_bycurrent'
+        language 'C'
+        with (isstrict);
+
+CREATE FUNCTION set_curdict(int)
+   returns void
+   as 'MODULE_PATHNAME'
+   language 'C'
+   with (isstrict);
+
+CREATE FUNCTION set_curdict(text)
+   returns void
+   as 'MODULE_PATHNAME', 'set_curdict_byname'
+   language 'C'
+   with (isstrict);
+
+--built-in dictionaries
+CREATE FUNCTION dex_init(text)
+   returns internal
+   as 'MODULE_PATHNAME' 
+   language 'C';
+
+CREATE FUNCTION dex_lexize(internal,internal,int4)
+   returns internal
+   as 'MODULE_PATHNAME'
+   language 'C'
+   with (isstrict);
+
+insert into pg_ts_dict select 
+   'simple', 
+   (select oid from pg_proc where proname='dex_init'),
+   null,
+   (select oid from pg_proc where proname='dex_lexize'),
+   'Simple example of dictionary.'
+;
+    
+CREATE FUNCTION snb_en_init(text)
+   returns internal
+   as 'MODULE_PATHNAME' 
+   language 'C';
+
+CREATE FUNCTION snb_lexize(internal,internal,int4)
+   returns internal
+   as 'MODULE_PATHNAME'
+   language 'C'
+   with (isstrict);
+
+insert into pg_ts_dict select 
+   'en_stem', 
+   (select oid from pg_proc where proname='snb_en_init'),
+   'DATA_PATH/english.stop',
+   (select oid from pg_proc where proname='snb_lexize'),
+   'English Stemmer. Snowball.'
+;
+
+CREATE FUNCTION snb_ru_init(text)
+   returns internal
+   as 'MODULE_PATHNAME' 
+   language 'C';
+
+insert into pg_ts_dict select 
+   'ru_stem', 
+   (select oid from pg_proc where proname='snb_ru_init'),
+   'DATA_PATH/russian.stop',
+   (select oid from pg_proc where proname='snb_lexize'),
+   'Russian Stemmer. Snowball.'
+;
+    
+CREATE FUNCTION spell_init(text)
+   returns internal
+   as 'MODULE_PATHNAME' 
+   language 'C';
+
+CREATE FUNCTION spell_lexize(internal,internal,int4)
+   returns internal
+   as 'MODULE_PATHNAME'
+   language 'C'
+   with (isstrict);
+
+insert into pg_ts_dict select 
+   'ispell_template', 
+   (select oid from pg_proc where proname='spell_init'),
+   null,
+   (select oid from pg_proc where proname='spell_lexize'),
+   'ISpell interface. Must have .dict and .aff files'
+;
+
+CREATE FUNCTION syn_init(text)
+   returns internal
+   as 'MODULE_PATHNAME' 
+   language 'C';
+
+CREATE FUNCTION syn_lexize(internal,internal,int4)
+   returns internal
+   as 'MODULE_PATHNAME'
+   language 'C'
+   with (isstrict);
+
+insert into pg_ts_dict select 
+   'synonym', 
+   (select oid from pg_proc where proname='syn_init'),
+   null,
+   (select oid from pg_proc where proname='syn_lexize'),
+   'Example of synonym dictionary'
+;
+
+--dict conf
+CREATE TABLE pg_ts_parser (
+   prs_name    text not null primary key,
+   prs_start   oid not null,
+   prs_nexttoken   oid not null,
+   prs_end     oid not null,
+   prs_headline    oid not null,
+   prs_lextype oid not null,
+   prs_comment text
+) with oids;
+
+--sql-level interface
+CREATE TYPE tokentype 
+   as (tokid int4, alias text, descr text); 
+
+CREATE FUNCTION token_type(int4)
+   returns setof tokentype
+   as 'MODULE_PATHNAME'
+   language 'C'
+   with (isstrict);
+
+CREATE FUNCTION token_type(text)
+   returns setof tokentype
+   as 'MODULE_PATHNAME', 'token_type_byname'
+   language 'C'
+   with (isstrict);
+
+CREATE FUNCTION token_type()
+   returns setof tokentype
+   as 'MODULE_PATHNAME', 'token_type_current'
+   language 'C'
+   with (isstrict);
+
+CREATE FUNCTION set_curprs(int)
+   returns void
+   as 'MODULE_PATHNAME'
+   language 'C'
+   with (isstrict);
+
+CREATE FUNCTION set_curprs(text)
+   returns void
+   as 'MODULE_PATHNAME', 'set_curprs_byname'
+   language 'C'
+   with (isstrict);
+
+CREATE TYPE tokenout 
+   as (tokid int4, token text);
+
+CREATE FUNCTION parse(oid,text)
+   returns setof tokenout
+   as 'MODULE_PATHNAME'
+   language 'C'
+   with (isstrict);
+ 
+CREATE FUNCTION parse(text,text)
+   returns setof tokenout
+   as 'MODULE_PATHNAME', 'parse_byname'
+   language 'C'
+   with (isstrict);
+ 
+CREATE FUNCTION parse(text)
+   returns setof tokenout
+   as 'MODULE_PATHNAME', 'parse_current'
+   language 'C'
+   with (isstrict);
+ 
+--default parser
+CREATE FUNCTION prsd_start(internal,int4)
+   returns internal
+   as 'MODULE_PATHNAME'
+   language 'C';
+
+CREATE FUNCTION prsd_getlexeme(internal,internal,internal)
+   returns int4
+   as 'MODULE_PATHNAME'
+   language 'C';
+
+CREATE FUNCTION prsd_end(internal)
+   returns void
+   as 'MODULE_PATHNAME'
+   language 'C';
+
+CREATE FUNCTION prsd_lextype(internal)
+   returns internal
+   as 'MODULE_PATHNAME'
+   language 'C';
+
+CREATE FUNCTION prsd_headline(internal,internal,internal)
+   returns internal
+   as 'MODULE_PATHNAME'
+   language 'C';
+
+insert into pg_ts_parser select
+   'default',
+   (select oid from pg_proc where proname='prsd_start'),   
+   (select oid from pg_proc where proname='prsd_getlexeme'),   
+   (select oid from pg_proc where proname='prsd_end'), 
+   (select oid from pg_proc where proname='prsd_headline'),
+   (select oid from pg_proc where proname='prsd_lextype'),
+   'Parser from OpenFTS v0.34'
+;  
+
+--tsearch config
+
+CREATE TABLE pg_ts_cfg (
+   ts_name     text not null primary key,
+   prs_name    text not null,
+   locale      text
+) with oids;
+
+CREATE TABLE pg_ts_cfgmap (
+   ts_name     text not null,
+   tok_alias   text not null,
+   dict_name   text[],
+   primary key (ts_name,tok_alias)
+) with oids;
+
+CREATE FUNCTION set_curcfg(int)
+   returns void
+   as 'MODULE_PATHNAME'
+   language 'C'
+   with (isstrict);
+
+CREATE FUNCTION set_curcfg(text)
+   returns void
+   as 'MODULE_PATHNAME', 'set_curcfg_byname'
+   language 'C'
+   with (isstrict);
+
+CREATE FUNCTION show_curcfg()
+   returns oid
+   as 'MODULE_PATHNAME'
+   language 'C'
+   with (isstrict);
+
+insert into pg_ts_cfg values ('default', 'default','C');
+insert into pg_ts_cfg values ('default_russian', 'default','ru_RU.KOI8-R');
+insert into pg_ts_cfg values ('simple', 'default');
+
+copy pg_ts_cfgmap from stdin;
+default    lword   {en_stem}
+default    nlword  {simple}
+default    word    {simple}
+default    email   {simple}
+default    url {simple}
+default    host    {simple}
+default    sfloat  {simple}
+default    version {simple}
+default    part_hword  {simple}
+default    nlpart_hword    {simple}
+default    lpart_hword {en_stem}
+default    hword   {simple}
+default    lhword  {en_stem}
+default    nlhword {simple}
+default    uri {simple}
+default    file    {simple}
+default    float   {simple}
+default    int {simple}
+default    uint    {simple}
+default_russian    lword   {en_stem}
+default_russian    nlword  {ru_stem}
+default_russian    word    {ru_stem}
+default_russian    email   {simple}
+default_russian    url {simple}
+default_russian    host    {simple}
+default_russian    sfloat  {simple}
+default_russian    version {simple}
+default_russian    part_hword  {simple}
+default_russian    nlpart_hword    {ru_stem}
+default_russian    lpart_hword {en_stem}
+default_russian    hword   {ru_stem}
+default_russian    lhword  {en_stem}
+default_russian    nlhword {ru_stem}
+default_russian    uri {simple}
+default_russian    file    {simple}
+default_russian    float   {simple}
+default_russian    int {simple}
+default_russian    uint    {simple}
+simple lword   {simple}
+simple nlword  {simple}
+simple word    {simple}
+simple email   {simple}
+simple url {simple}
+simple host    {simple}
+simple sfloat  {simple}
+simple version {simple}
+simple part_hword  {simple}
+simple nlpart_hword    {simple}
+simple lpart_hword {simple}
+simple hword   {simple}
+simple lhword  {simple}
+simple nlhword {simple}
+simple uri {simple}
+simple file    {simple}
+simple float   {simple}
+simple int {simple}
+simple uint    {simple}
+\.
+
+--tsvector type
+CREATE FUNCTION tsvector_in(cstring)
+RETURNS tsvector
+AS 'MODULE_PATHNAME'
+LANGUAGE 'C' with (isstrict);
+
+CREATE FUNCTION tsvector_out(tsvector)
+RETURNS cstring
+AS 'MODULE_PATHNAME'
+LANGUAGE 'C' with (isstrict);
+
+CREATE TYPE tsvector (
+        INTERNALLENGTH = -1,
+        INPUT = tsvector_in,
+        OUTPUT = tsvector_out,
+        STORAGE = extended
+);
+
+CREATE FUNCTION length(tsvector)
+RETURNS int4
+AS 'MODULE_PATHNAME', 'tsvector_length'
+LANGUAGE 'C' with (isstrict,iscachable);
+
+CREATE FUNCTION to_tsvector(oid, text)
+RETURNS tsvector
+AS 'MODULE_PATHNAME'
+LANGUAGE 'C' with (isstrict,iscachable);
+
+CREATE FUNCTION to_tsvector(text, text)
+RETURNS tsvector
+AS 'MODULE_PATHNAME', 'to_tsvector_name'
+LANGUAGE 'C' with (isstrict,iscachable);
+
+CREATE FUNCTION to_tsvector(text)
+RETURNS tsvector
+AS 'MODULE_PATHNAME', 'to_tsvector_current'
+LANGUAGE 'C' with (isstrict,iscachable);
+
+CREATE FUNCTION strip(tsvector)
+RETURNS tsvector
+AS 'MODULE_PATHNAME'
+LANGUAGE 'C' with (isstrict,iscachable);
+
+CREATE FUNCTION setweight(tsvector,"char")
+RETURNS tsvector
+AS 'MODULE_PATHNAME'
+LANGUAGE 'C' with (isstrict,iscachable);
+
+CREATE FUNCTION concat(tsvector,tsvector)
+RETURNS tsvector
+AS 'MODULE_PATHNAME'
+LANGUAGE 'C' with (isstrict,iscachable);
+
+CREATE OPERATOR || (
+        LEFTARG = tsvector,
+        RIGHTARG = tsvector,
+        PROCEDURE = concat
+);
+
+--query type
+CREATE FUNCTION tsquery_in(cstring)
+RETURNS tsquery
+AS 'MODULE_PATHNAME'
+LANGUAGE 'C' with (isstrict);
+
+CREATE FUNCTION tsquery_out(tsquery)
+RETURNS cstring
+AS 'MODULE_PATHNAME'
+LANGUAGE 'C' with (isstrict);
+
+CREATE TYPE tsquery (
+        INTERNALLENGTH = -1,
+        INPUT = tsquery_in,
+        OUTPUT = tsquery_out
+);
+
+CREATE FUNCTION querytree(tsquery)
+RETURNS text
+AS 'MODULE_PATHNAME', 'tsquerytree'
+LANGUAGE 'C' with (isstrict);
+
+CREATE FUNCTION to_tsquery(oid, text)
+RETURNS tsquery
+AS 'MODULE_PATHNAME'
+LANGUAGE 'c' with (isstrict,iscachable);
+
+CREATE FUNCTION to_tsquery(text, text)
+RETURNS tsquery
+AS 'MODULE_PATHNAME','to_tsquery_name'
+LANGUAGE 'c' with (isstrict,iscachable);
+
+CREATE FUNCTION to_tsquery(text)
+RETURNS tsquery
+AS 'MODULE_PATHNAME','to_tsquery_current'
+LANGUAGE 'c' with (isstrict,iscachable);
+
+--operations
+CREATE FUNCTION exectsq(tsvector, tsquery)
+RETURNS bool
+AS 'MODULE_PATHNAME'
+LANGUAGE 'C' with (isstrict, iscachable);
+  
+COMMENT ON FUNCTION exectsq(tsvector, tsquery) IS 'boolean operation with text index';
+
+CREATE FUNCTION rexectsq(tsquery, tsvector)
+RETURNS bool
+AS 'MODULE_PATHNAME'
+LANGUAGE 'C' with (isstrict, iscachable);
+
+COMMENT ON FUNCTION rexectsq(tsquery, tsvector) IS 'boolean operation with text index';
+
+CREATE OPERATOR @@ (
+        LEFTARG = tsvector,
+        RIGHTARG = tsquery,
+        PROCEDURE = exectsq,
+        COMMUTATOR = '@@',
+        RESTRICT = contsel,
+        JOIN = contjoinsel
+);
+CREATE OPERATOR @@ (
+        LEFTARG = tsquery,
+        RIGHTARG = tsvector,
+        PROCEDURE = rexectsq,
+        COMMUTATOR = '@@',
+        RESTRICT = contsel,
+        JOIN = contjoinsel
+);
+
+--Trigger
+CREATE FUNCTION tsearch2()
+RETURNS trigger
+AS 'MODULE_PATHNAME'
+LANGUAGE 'C';
+
+--Relevation
+CREATE FUNCTION rank(float4[], tsvector, tsquery)
+RETURNS float4
+AS 'MODULE_PATHNAME'
+LANGUAGE 'C' WITH (isstrict, iscachable);
+
+CREATE FUNCTION rank(float4[], tsvector, tsquery, int4)
+RETURNS float4
+AS 'MODULE_PATHNAME'
+LANGUAGE 'C' WITH (isstrict, iscachable);
+
+CREATE FUNCTION rank(tsvector, tsquery)
+RETURNS float4
+AS 'MODULE_PATHNAME', 'rank_def'
+LANGUAGE 'C' WITH (isstrict, iscachable);
+
+CREATE FUNCTION rank(tsvector, tsquery, int4)
+RETURNS float4
+AS 'MODULE_PATHNAME', 'rank_def'
+LANGUAGE 'C' WITH (isstrict, iscachable);
+
+CREATE FUNCTION rank_cd(int4, tsvector, tsquery)
+RETURNS float4
+AS 'MODULE_PATHNAME'
+LANGUAGE 'C' WITH (isstrict, iscachable);
+
+CREATE FUNCTION rank_cd(int4, tsvector, tsquery, int4)
+RETURNS float4
+AS 'MODULE_PATHNAME'
+LANGUAGE 'C' WITH (isstrict, iscachable);
+
+CREATE FUNCTION rank_cd(tsvector, tsquery)
+RETURNS float4
+AS 'MODULE_PATHNAME', 'rank_cd_def'
+LANGUAGE 'C' WITH (isstrict, iscachable);
+
+CREATE FUNCTION rank_cd(tsvector, tsquery, int4)
+RETURNS float4
+AS 'MODULE_PATHNAME', 'rank_cd_def'
+LANGUAGE 'C' WITH (isstrict, iscachable);
+
+CREATE FUNCTION headline(oid, text, tsquery, text)
+RETURNS text
+AS 'MODULE_PATHNAME', 'headline'
+LANGUAGE 'C' WITH (isstrict, iscachable);
+
+CREATE FUNCTION headline(oid, text, tsquery)
+RETURNS text
+AS 'MODULE_PATHNAME', 'headline'
+LANGUAGE 'C' WITH (isstrict, iscachable);
+
+CREATE FUNCTION headline(text, text, tsquery, text)
+RETURNS text
+AS 'MODULE_PATHNAME', 'headline_byname'
+LANGUAGE 'C' WITH (isstrict, iscachable);
+
+CREATE FUNCTION headline(text, text, tsquery)
+RETURNS text
+AS 'MODULE_PATHNAME', 'headline_byname'
+LANGUAGE 'C' WITH (isstrict, iscachable);
+
+CREATE FUNCTION headline(text, tsquery, text)
+RETURNS text
+AS 'MODULE_PATHNAME', 'headline_current'
+LANGUAGE 'C' WITH (isstrict, iscachable);
+
+CREATE FUNCTION headline(text, tsquery)
+RETURNS text
+AS 'MODULE_PATHNAME', 'headline_current'
+LANGUAGE 'C' WITH (isstrict, iscachable);
+
+--GiST
+--GiST key type 
+CREATE FUNCTION gtsvector_in(cstring)
+RETURNS gtsvector
+AS 'MODULE_PATHNAME'
+LANGUAGE 'C' with (isstrict);
+
+CREATE FUNCTION gtsvector_out(gtsvector)
+RETURNS cstring
+AS 'MODULE_PATHNAME'
+LANGUAGE 'C' with (isstrict);
+
+CREATE TYPE gtsvector (
+        INTERNALLENGTH = -1,
+        INPUT = gtsvector_in,
+        OUTPUT = gtsvector_out
+);
+
+-- support FUNCTIONs
+CREATE FUNCTION gtsvector_consistent(gtsvector,internal,int4)
+RETURNS bool
+AS 'MODULE_PATHNAME'
+LANGUAGE 'C';
+  
+CREATE FUNCTION gtsvector_compress(internal)
+RETURNS internal
+AS 'MODULE_PATHNAME'
+LANGUAGE 'C';
+
+CREATE FUNCTION gtsvector_decompress(internal)
+RETURNS internal
+AS 'MODULE_PATHNAME'
+LANGUAGE 'C';
+
+CREATE FUNCTION gtsvector_penalty(internal,internal,internal)
+RETURNS internal
+AS 'MODULE_PATHNAME'
+LANGUAGE 'C' with (isstrict);
+
+CREATE FUNCTION gtsvector_picksplit(internal, internal)
+RETURNS internal
+AS 'MODULE_PATHNAME'
+LANGUAGE 'C';
+
+CREATE FUNCTION gtsvector_union(bytea, internal)
+RETURNS _int4
+AS 'MODULE_PATHNAME'
+LANGUAGE 'C';
+
+CREATE FUNCTION gtsvector_same(gtsvector, gtsvector, internal)
+RETURNS internal
+AS 'MODULE_PATHNAME'
+LANGUAGE 'C';
+
+-- CREATE the OPERATOR class
+CREATE OPERATOR CLASS gist_tsvector_ops
+DEFAULT FOR TYPE tsvector USING gist
+AS
+        OPERATOR        1       @@ (tsvector, tsquery)  RECHECK ,
+        FUNCTION        1       gtsvector_consistent (gtsvector, internal, int4),
+        FUNCTION        2       gtsvector_union (bytea, internal),
+        FUNCTION        3       gtsvector_compress (internal),
+        FUNCTION        4       gtsvector_decompress (internal),
+        FUNCTION        5       gtsvector_penalty (internal, internal, internal),
+        FUNCTION        6       gtsvector_picksplit (internal, internal),
+        FUNCTION        7       gtsvector_same (gtsvector, gtsvector, internal),
+        STORAGE         gtsvector;
+
+
+--stat info
+CREATE TYPE statinfo 
+   as (word text, ndoc int4, nentry int4);
+
+--REATE FUNCTION tsstat_in(cstring)
+--RETURNS tsstat
+--AS 'MODULE_PATHNAME'
+--LANGUAGE 'C' with (isstrict);
+--
+--CREATE FUNCTION tsstat_out(tsstat)
+--RETURNS cstring
+--AS 'MODULE_PATHNAME'
+--LANGUAGE 'C' with (isstrict);
+--
+--CREATE TYPE tsstat (
+--        INTERNALLENGTH = -1,
+--        INPUT = tsstat_in,
+--        OUTPUT = tsstat_out,
+--        STORAGE = plain
+--);
+--
+--CREATE FUNCTION ts_accum(tsstat,tsvector)
+--RETURNS tsstat
+--AS 'MODULE_PATHNAME'
+--LANGUAGE 'C' with (isstrict);
+--
+--CREATE FUNCTION ts_accum_finish(tsstat)
+-- returns setof statinfo
+-- as 'MODULE_PATHNAME'
+-- language 'C'
+-- with (isstrict);
+--
+--CREATE AGGREGATE stat (
+-- BASETYPE=tsvector,
+-- SFUNC=ts_accum,
+-- STYPE=tsstat,
+-- FINALFUNC = ts_accum_finish,
+-- initcond = ''
+--); 
+
+CREATE FUNCTION stat(text)
+   returns setof statinfo
+   as 'MODULE_PATHNAME', 'ts_stat'
+   language 'C'
+   with (isstrict);
+
+--reset - just for debuging
+CREATE FUNCTION reset_tsearch()
+        returns void
+        as 'MODULE_PATHNAME'
+        language 'C'
+        with (isstrict);
+
+--get cover (debug for rank_cd)
+CREATE FUNCTION get_covers(tsvector,tsquery)
+        returns text
+        as 'MODULE_PATHNAME'
+        language 'C'
+        with (isstrict);
+
+
+--example of ISpell dictionary
+--update pg_ts_dict set dict_initoption='DictFile="/usr/local/share/ispell/russian.dict" ,AffFile ="/usr/local/share/ispell/russian.aff", StopFile="/usr/local/share/ispell/russian.stop"' where dict_id=4;
+--example of synonym dict
+--update pg_ts_dict set dict_initoption='/usr/local/share/ispell/english.syn' where dict_id=5;
+END;


diff --git a/contrib/tsearch2/tsvector.c b/contrib/tsearch2/tsvector.c

new file mode 100644 (file)

index 0000000..ff0794d


--- /dev/null
+++ b/contrib/tsearch2/tsvector.c
@@ -0,0 +1,804 @@
+/*
+ * In/Out definitions for tsvector type
+ * Internal structure:
+ * string of values, array of position lexem in string and it's length
+ * Teodor Sigaev 
+ */
+#include "postgres.h"
+
+#include "access/gist.h"
+#include "access/itup.h"
+#include "utils/elog.h"
+#include "utils/palloc.h"
+#include "utils/builtins.h"
+#include "storage/bufpage.h"
+#include "executor/spi.h"
+#include "commands/trigger.h"
+#include "nodes/pg_list.h"
+#include "catalog/namespace.h"
+
+#include "utils/pg_locale.h"
+
+#include              /* tolower */
+#include "tsvector.h"
+#include "query.h"
+#include "ts_cfg.h"
+#include "common.h"
+
+PG_FUNCTION_INFO_V1(tsvector_in);
+Datum      tsvector_in(PG_FUNCTION_ARGS);
+
+PG_FUNCTION_INFO_V1(tsvector_out);
+Datum      tsvector_out(PG_FUNCTION_ARGS);
+
+PG_FUNCTION_INFO_V1(to_tsvector);
+Datum      to_tsvector(PG_FUNCTION_ARGS);
+PG_FUNCTION_INFO_V1(to_tsvector_current);
+Datum      to_tsvector_current(PG_FUNCTION_ARGS);
+PG_FUNCTION_INFO_V1(to_tsvector_name);
+Datum      to_tsvector_name(PG_FUNCTION_ARGS);
+
+PG_FUNCTION_INFO_V1(tsearch2);
+Datum      tsearch2(PG_FUNCTION_ARGS);
+
+PG_FUNCTION_INFO_V1(tsvector_length);
+Datum      tsvector_length(PG_FUNCTION_ARGS);
+
+/*
+ * in/out text index type
+ */
+static int 
+comparePos(const void *a, const void *b) {
+   if ( ((WordEntryPos *) a)->pos == ((WordEntryPos *) b)->pos )
+       return 1;
+   return ( ((WordEntryPos *) a)->pos > ((WordEntryPos *) b)->pos ) ? 1 : -1;
+}
+
+static int
+uniquePos(WordEntryPos *a, int4 l) {
+   WordEntryPos *ptr, *res;
+
+   res=a;
+   if (l==1)
+       return l;
+
+   qsort((void *) a, l, sizeof(WordEntryPos), comparePos);
+
+   ptr = a + 1;
+   while (ptr - a < l) {
+       if ( ptr->pos != res->pos ) {
+           res++;
+           res->pos = ptr->pos;
+           res->weight = ptr->weight;
+           if ( res-a >= MAXNUMPOS-1 || res->pos == MAXENTRYPOS-1 )
+               break;
+       } else if ( ptr->weight > res->weight )
+           res->weight = ptr->weight;
+       ptr++;
+   }
+   return res + 1 - a;
+}
+
+static char *BufferStr;
+static int
+compareentry(const void *a, const void *b)
+{
+   if ( ((WordEntryIN *) a)->entry.len == ((WordEntryIN *) b)->entry.len)
+   {
+       return strncmp(
+                      &BufferStr[((WordEntryIN *) a)->entry.pos],
+                      &BufferStr[((WordEntryIN *) b)->entry.pos],
+                      ((WordEntryIN *) a)->entry.len);
+   }
+   return ( ((WordEntryIN *) a)->entry.len > ((WordEntryIN *) b)->entry.len ) ? 1 : -1;
+}
+
+static int
+uniqueentry(WordEntryIN * a, int4 l, char *buf, int4 *outbuflen)
+{
+   WordEntryIN  *ptr,
+              *res;
+
+   res = a;
+   if (l == 1) {
+       if ( a->entry.haspos ) {
+           *(uint16*)(a->pos) = uniquePos( &(a->pos[1]), *(uint16*)(a->pos));
+           *outbuflen = SHORTALIGN(res->entry.len) + (*(uint16*)(a->pos) +1 )*sizeof(WordEntryPos);
+       }
+       return l;
+   }
+
+   ptr = a + 1;
+   BufferStr = buf;
+   qsort((void *) a, l, sizeof(WordEntryIN), compareentry);
+
+   while (ptr - a < l)
+   {
+       if (!(ptr->entry.len == res->entry.len &&
+             strncmp(&buf[ptr->entry.pos], &buf[res->entry.pos], res->entry.len) == 0))
+       {
+           if ( res->entry.haspos ) {
+               *(uint16*)(res->pos) = uniquePos( &(res->pos[1]), *(uint16*)(res->pos));
+               *outbuflen += *(uint16*)(res->pos) * sizeof(WordEntryPos);
+           }
+           *outbuflen += SHORTALIGN(res->entry.len);
+           res++;
+           memcpy(res,ptr,sizeof(WordEntryIN));
+       } else if ( ptr->entry.haspos ){
+           if ( res->entry.haspos ) {
+               int4 len=*(uint16*)(ptr->pos) + 1 + *(uint16*)(res->pos);
+               res->pos=(WordEntryPos*)repalloc( res->pos, len*sizeof(WordEntryPos));
+               memcpy( &(res->pos[ *(uint16*)(res->pos) + 1 ]), 
+                   &(ptr->pos[1]), *(uint16*)(ptr->pos) * sizeof(WordEntryPos));
+               *(uint16*)(res->pos) += *(uint16*)(ptr->pos);
+               pfree( ptr->pos );
+           } else {
+               res->entry.haspos=1;
+               res->pos = ptr->pos;
+           }
+       }
+       ptr++;
+   }
+   if ( res->entry.haspos ) {
+       *(uint16*)(res->pos) = uniquePos( &(res->pos[1]), *(uint16*)(res->pos));
+       *outbuflen += *(uint16*)(res->pos) * sizeof(WordEntryPos);
+   }
+   *outbuflen += SHORTALIGN(res->entry.len);
+
+   return res + 1 - a;
+}
+
+#define WAITWORD   1
+#define WAITENDWORD 2
+#define WAITNEXTCHAR   3
+#define WAITENDCMPLX   4
+#define WAITPOSINFO    5
+#define INPOSINFO  6
+#define WAITPOSDELIM   7
+
+#define RESIZEPRSBUF \
+do { \
+   if ( state->curpos - state->word + 1 >= state->len ) \
+   { \
+       int4 clen = state->curpos - state->word; \
+       state->len *= 2; \
+       state->word = (char*)repalloc( (void*)state->word, state->len ); \
+       state->curpos = state->word + clen; \
+   } \
+} while (0)
+
+int4
+gettoken_tsvector(TI_IN_STATE * state)
+{
+   int4        oldstate = 0;
+
+   state->curpos = state->word;
+   state->state = WAITWORD;
+   state->alen=0;
+
+   while (1)
+   {
+       if (state->state == WAITWORD)
+       {
+           if (*(state->prsbuf) == '\0')
+               return 0;
+           else if (*(state->prsbuf) == '\'')
+               state->state = WAITENDCMPLX;
+           else if (*(state->prsbuf) == '\\')
+           {
+               state->state = WAITNEXTCHAR;
+               oldstate = WAITENDWORD;
+           }
+           else if (state->oprisdelim && ISOPERATOR(*(state->prsbuf)))
+               elog(ERROR, "Syntax error");
+           else if (*(state->prsbuf) != ' ')
+           {
+               *(state->curpos) = *(state->prsbuf);
+               state->curpos++;
+               state->state = WAITENDWORD;
+           }
+       }
+       else if (state->state == WAITNEXTCHAR)
+       {
+           if (*(state->prsbuf) == '\0')
+               elog(ERROR, "There is no escaped character");
+           else
+           {
+               RESIZEPRSBUF;
+               *(state->curpos) = *(state->prsbuf);
+               state->curpos++;
+               state->state = oldstate;
+           }
+       }
+       else if (state->state == WAITENDWORD)
+       {
+           if (*(state->prsbuf) == '\\')
+           {
+               state->state = WAITNEXTCHAR;
+               oldstate = WAITENDWORD;
+           }
+           else if (*(state->prsbuf) == ' ' || *(state->prsbuf) == '\0' ||
+                    (state->oprisdelim && ISOPERATOR(*(state->prsbuf))))
+           {
+               RESIZEPRSBUF;
+               if (state->curpos == state->word)
+                   elog(ERROR, "Syntax error");
+               *(state->curpos) = '\0';
+               return 1; 
+           } else if ( *(state->prsbuf) == ':' ) {
+               if (state->curpos == state->word)
+                   elog(ERROR, "Syntax error");
+               *(state->curpos) = '\0';
+               if ( state->oprisdelim )
+                   return 1;
+               else
+                   state->state = INPOSINFO;
+           }
+           else
+           {
+               RESIZEPRSBUF;
+               *(state->curpos) = *(state->prsbuf);
+               state->curpos++;
+           }
+       }
+       else if (state->state == WAITENDCMPLX)
+       {
+           if (*(state->prsbuf) == '\'')
+           {
+               RESIZEPRSBUF;
+               *(state->curpos) = '\0';
+               if (state->curpos == state->word)
+                   elog(ERROR, "Syntax error");
+               if ( state->oprisdelim ) {
+                   state->prsbuf++;
+                   return 1;
+               } else
+                   state->state = WAITPOSINFO;
+           }
+           else if (*(state->prsbuf) == '\\')
+           {
+               state->state = WAITNEXTCHAR;
+               oldstate = WAITENDCMPLX;
+           }
+           else if (*(state->prsbuf) == '\0')
+               elog(ERROR, "Syntax error");
+           else
+           {
+               RESIZEPRSBUF;
+               *(state->curpos) = *(state->prsbuf);
+               state->curpos++;
+           }
+       } else if (state->state == WAITPOSINFO) {
+           if ( *(state->prsbuf) == ':' )
+               state->state=INPOSINFO;
+           else
+               return 1;
+       } else if (state->state == INPOSINFO) {
+           if ( isdigit(*(state->prsbuf)) ) {
+               if ( state->alen==0 ) {
+                   state->alen=4;
+                   state->pos = (WordEntryPos*)palloc( sizeof(WordEntryPos)*state->alen );
+                   *(uint16*)(state->pos)=0;
+               } else if ( *(uint16*)(state->pos) +1 >= state->alen ) {
+                   state->alen *= 2; 
+                   state->pos = (WordEntryPos*)repalloc( state->pos, sizeof(WordEntryPos)*state->alen );
+               }
+               (  *(uint16*)(state->pos) )++;
+               state->pos[ *(uint16*)(state->pos) ].pos = LIMITPOS(atoi(state->prsbuf));
+               if ( state->pos[ *(uint16*)(state->pos) ].pos == 0 )
+                   elog(ERROR,"Wrong position info");
+               state->pos[ *(uint16*)(state->pos) ].weight = 0;
+               state->state = WAITPOSDELIM;
+           } else
+               elog(ERROR,"Syntax error");
+       } else if (state->state == WAITPOSDELIM) {
+           if ( *(state->prsbuf) == ',' ) {
+               state->state = INPOSINFO;
+           } else if ( tolower(*(state->prsbuf)) == 'a' || *(state->prsbuf)=='*' ) {
+               if ( state->pos[ *(uint16*)(state->pos) ].weight )
+                   elog(ERROR,"Syntax error");
+               state->pos[ *(uint16*)(state->pos) ].weight = 3;
+           } else if ( tolower(*(state->prsbuf)) == 'b' ) {
+               if ( state->pos[ *(uint16*)(state->pos) ].weight )
+                   elog(ERROR,"Syntax error");
+               state->pos[ *(uint16*)(state->pos) ].weight = 2;
+           } else if ( tolower(*(state->prsbuf)) == 'c' ) {
+               if ( state->pos[ *(uint16*)(state->pos) ].weight )
+                   elog(ERROR,"Syntax error");
+               state->pos[ *(uint16*)(state->pos) ].weight = 1;
+           } else if ( tolower(*(state->prsbuf)) == 'd' ) {
+               if ( state->pos[ *(uint16*)(state->pos) ].weight )
+                   elog(ERROR,"Syntax error");
+               state->pos[ *(uint16*)(state->pos) ].weight = 0;
+           } else if ( isspace(*(state->prsbuf)) || *(state->prsbuf) == '\0' ) {
+               return 1;
+           } else if ( !isdigit(*(state->prsbuf)) )
+               elog(ERROR,"Syntax error");
+       } else
+           elog(ERROR, "Inner bug :(");
+       state->prsbuf++;
+   }
+
+   return 0;
+}
+
+Datum
+tsvector_in(PG_FUNCTION_ARGS)
+{
+   char       *buf = PG_GETARG_CSTRING(0);
+   TI_IN_STATE state;
+   WordEntryIN  *arr;
+   WordEntry  *inarr;
+   int4        len = 0,
+               totallen = 64;
+   tsvector       *in;
+   char       *tmpbuf,
+              *cur;
+   int4        i,
+               buflen = 256;
+
+   state.prsbuf = buf;
+   state.len = 32;
+   state.word = (char *) palloc(state.len);
+   state.oprisdelim = false;
+
+   arr = (WordEntryIN *) palloc(sizeof(WordEntryIN) * totallen);
+   cur = tmpbuf = (char *) palloc(buflen);
+   while (gettoken_tsvector(&state))
+   {
+       if (len >= totallen)
+       {
+           totallen *= 2;
+           arr = (WordEntryIN *) repalloc((void *) arr, sizeof(WordEntryIN) * totallen);
+       }
+       while ((cur - tmpbuf) + (state.curpos - state.word) >= buflen)
+       {
+           int4        dist = cur - tmpbuf;
+
+           buflen *= 2;
+           tmpbuf = (char *) repalloc((void *) tmpbuf, buflen);
+           cur = tmpbuf + dist;
+       }
+       if (state.curpos - state.word >= MAXSTRLEN)
+           elog(ERROR, "Word is too long");
+       arr[len].entry.len= state.curpos - state.word;
+       if (cur - tmpbuf > MAXSTRPOS)
+           elog(ERROR, "Too long value");
+       arr[len].entry.pos=cur - tmpbuf;
+       memcpy((void *) cur, (void *) state.word, arr[len].entry.len);
+       cur += arr[len].entry.len;
+       if ( state.alen ) {
+           arr[len].entry.haspos=1;
+           arr[len].pos = state.pos;
+       } else
+           arr[len].entry.haspos=0;
+       len++;
+   }
+   pfree(state.word);
+
+   if ( len > 0 )
+       len = uniqueentry(arr, len, tmpbuf, &buflen);
+   totallen = CALCDATASIZE(len, buflen);
+   in = (tsvector *) palloc(totallen);
+   memset(in,0,totallen);
+   in->len = totallen;
+   in->size = len;
+   cur = STRPTR(in);
+   inarr = ARRPTR(in);
+   for (i = 0; i < len; i++)
+   {
+       memcpy((void *) cur, (void *) &tmpbuf[arr[i].entry.pos], arr[i].entry.len);
+       arr[i].entry.pos=cur - STRPTR(in);
+       cur += SHORTALIGN(arr[i].entry.len);
+       if ( arr[i].entry.haspos ) {
+           memcpy( cur, arr[i].pos, (*(uint16*)arr[i].pos + 1) * sizeof(WordEntryPos));
+           cur +=  (*(uint16*)arr[i].pos + 1) * sizeof(WordEntryPos);
+           pfree( arr[i].pos ); 
+       }
+       memcpy( &(inarr[i]), &(arr[i].entry), sizeof(WordEntry) );
+   }
+   pfree(tmpbuf);
+   pfree(arr);
+   PG_RETURN_POINTER(in);
+}
+
+Datum
+tsvector_length(PG_FUNCTION_ARGS)
+{
+   tsvector       *in = (tsvector *) PG_DETOAST_DATUM(PG_GETARG_DATUM(0));
+   int4        ret = in->size;
+
+   PG_FREE_IF_COPY(in, 0);
+   PG_RETURN_INT32(ret);
+}
+
+Datum
+tsvector_out(PG_FUNCTION_ARGS)
+{
+   tsvector       *out = (tsvector *) PG_DETOAST_DATUM(PG_GETARG_DATUM(0));
+   char       *outbuf;
+   int4        i,
+               j,
+               lenbuf = 0, pp;
+   WordEntry  *ptr = ARRPTR(out);
+   char       *curin,
+              *curout;
+
+       lenbuf=out->size * 2 /* '' */ + out->size - 1 /* space */ + 2 /*\0*/;
+       for (i = 0; i < out->size; i++) {
+               lenbuf += ptr[i].len*2 /*for escape */;
+               if ( ptr[i].haspos )
+                       lenbuf += 7*POSDATALEN(out, &(ptr[i]));
+       }
+
+   curout = outbuf = (char *) palloc(lenbuf);
+   for (i = 0; i < out->size; i++)
+   {
+       curin = STRPTR(out)+ptr->pos;
+       if (i != 0)
+           *curout++ = ' ';
+       *curout++ = '\'';
+       j = ptr->len;
+       while (j--)
+       {
+           if (*curin == '\'')
+           {
+               int4        pos = curout - outbuf;
+
+               outbuf = (char *) repalloc((void *) outbuf, ++lenbuf);
+               curout = outbuf + pos;
+               *curout++ = '\\';
+           }
+           *curout++ = *curin++;
+       }
+       *curout++ = '\'';
+       if ( (pp=POSDATALEN(out,ptr)) != 0 ) {
+           WordEntryPos *wptr;
+           *curout++ = ':';
+           wptr=POSDATAPTR(out,ptr);
+           while(pp) {
+               sprintf(curout,"%d",wptr->pos);
+               curout=strchr(curout,'\0');
+               switch( wptr->weight ) {
+                   case 3:   *curout++ = 'A'; break;
+                   case 2:   *curout++ = 'B'; break;
+                   case 1:   *curout++ = 'C'; break;
+                   case 0: 
+                   default: break;
+               }
+               if ( pp>1 )     *curout++ = ',';
+               pp--; wptr++;
+           }
+       }
+       ptr++;
+   }
+   *curout='\0';
+   outbuf[lenbuf - 1] = '\0';
+   PG_FREE_IF_COPY(out, 0);
+   PG_RETURN_POINTER(outbuf);
+}
+
+static int
+compareWORD(const void *a, const void *b)
+{
+   if (((WORD *) a)->len == ((WORD *) b)->len) {
+       int res = strncmp(
+                      ((WORD *) a)->word,
+                      ((WORD *) b)->word,
+                      ((WORD *) b)->len);
+       if ( res==0 ) 
+           return ( ((WORD *) a)->pos.pos > ((WORD *) b)->pos.pos ) ? 1 : -1;
+       return res;
+   }
+   return (((WORD *) a)->len > ((WORD *) b)->len) ? 1 : -1;
+}
+
+static int
+uniqueWORD(WORD * a, int4 l)
+{
+   WORD       *ptr,
+              *res;
+   int tmppos;
+
+   if (l == 1) {
+       tmppos=LIMITPOS(a->pos.pos);
+       a->alen=2;
+       a->pos.apos=(uint16*)palloc( sizeof(uint16)*a->alen );
+       a->pos.apos[0]=1;
+       a->pos.apos[1]=tmppos;
+       return l;
+   }
+
+   res = a;
+   ptr = a + 1;
+
+   qsort((void *) a, l, sizeof(WORD), compareWORD);
+   tmppos=LIMITPOS(a->pos.pos);
+   a->alen=2;
+   a->pos.apos=(uint16*)palloc( sizeof(uint16)*a->alen );
+   a->pos.apos[0]=1;
+   a->pos.apos[1]=tmppos;
+
+   while (ptr - a < l)
+   {
+       if (!(ptr->len == res->len &&
+             strncmp(ptr->word, res->word, res->len) == 0))
+       {
+           res++;
+           res->len = ptr->len;
+           res->word = ptr->word;
+           tmppos=LIMITPOS(ptr->pos.pos);
+           res->alen=2;
+           res->pos.apos=(uint16*)palloc( sizeof(uint16)*res->alen );
+           res->pos.apos[0]=1;
+           res->pos.apos[1]=tmppos;
+       } else {
+           pfree(ptr->word);
+           if ( res->pos.apos[0] < MAXNUMPOS-1 && res->pos.apos[ res->pos.apos[0] ] != MAXENTRYPOS-1 ) {
+               if ( res->pos.apos[0]+1 >= res->alen ) {
+                   res->alen*=2;
+                   res->pos.apos=(uint16*)repalloc( res->pos.apos, sizeof(uint16)*res->alen );
+               }
+               res->pos.apos[ res->pos.apos[0]+1 ] = LIMITPOS(ptr->pos.pos);
+               res->pos.apos[0]++; 
+           }
+       }
+       ptr++;
+   }
+
+   return res + 1 - a;
+}
+
+/*
+ * make value of tsvector
+ */
+static tsvector *
+makevalue(PRSTEXT * prs)
+{
+   int4        i,j,
+               lenstr = 0,
+               totallen;
+   tsvector       *in;
+   WordEntry  *ptr;
+   char       *str,
+              *cur;
+
+   prs->curwords = uniqueWORD(prs->words, prs->curwords);
+   for (i = 0; i < prs->curwords; i++) {
+       lenstr += SHORTALIGN(prs->words[i].len);
+
+       if ( prs->words[i].alen )
+           lenstr += sizeof(uint16) + prs->words[i].pos.apos[0] * sizeof(WordEntryPos);
+   }
+
+   totallen = CALCDATASIZE(prs->curwords, lenstr);
+   in = (tsvector *) palloc(totallen);
+   memset(in,0,totallen);  
+   in->len = totallen;
+   in->size = prs->curwords;
+
+   ptr = ARRPTR(in);
+   cur = str = STRPTR(in);
+   for (i = 0; i < prs->curwords; i++)
+   {
+       ptr->len = prs->words[i].len;
+       if (cur - str > MAXSTRPOS)
+           elog(ERROR, "Value is too big");
+       ptr->pos= cur - str;
+       memcpy((void *) cur, (void *) prs->words[i].word, prs->words[i].len);
+       pfree(prs->words[i].word);
+       cur += SHORTALIGN(prs->words[i].len);
+       if ( prs->words[i].alen ) {
+           WordEntryPos *wptr;
+           
+           ptr->haspos=1;
+           *(uint16*)cur = prs->words[i].pos.apos[0];
+           wptr=POSDATAPTR(in,ptr);
+           for(j=0;j<*(uint16*)cur;j++) {
+               wptr[j].weight=0;
+               wptr[j].pos=prs->words[i].pos.apos[j+1];
+           }
+           cur += sizeof(uint16) + prs->words[i].pos.apos[0] * sizeof(WordEntryPos);
+           pfree(prs->words[i].pos.apos);
+       } else
+           ptr->haspos=0;
+       ptr++;
+   }
+   pfree(prs->words);
+   return in;
+}
+
+
+Datum
+to_tsvector(PG_FUNCTION_ARGS)
+{
+   text       *in = PG_GETARG_TEXT_P(1);
+   PRSTEXT     prs;
+   tsvector       *out = NULL;
+   TSCfgInfo *cfg=findcfg(PG_GETARG_INT32(0)); 
+
+   prs.lenwords = 32;
+   prs.curwords = 0;
+   prs.pos = 0;
+   prs.words = (WORD *) palloc(sizeof(WORD) * prs.lenwords);
+   
+   parsetext_v2(cfg, &prs, VARDATA(in), VARSIZE(in) - VARHDRSZ);
+   PG_FREE_IF_COPY(in, 1);
+
+   if (prs.curwords)
+       out = makevalue(&prs);
+   else {
+       pfree(prs.words);
+       out = palloc(CALCDATASIZE(0,0));
+       out->len = CALCDATASIZE(0,0);
+       out->size = 0;
+   } 
+   PG_RETURN_POINTER(out);
+}
+
+Datum
+to_tsvector_name(PG_FUNCTION_ARGS) {
+   text       *cfg=PG_GETARG_TEXT_P(0);
+   Datum res = DirectFunctionCall3(
+       to_tsvector,
+       Int32GetDatum( name2id_cfg( cfg ) ),
+       PG_GETARG_DATUM(1),
+       (Datum)0
+   );
+   PG_FREE_IF_COPY(cfg,0);
+   PG_RETURN_DATUM(res);   
+}
+
+Datum
+to_tsvector_current(PG_FUNCTION_ARGS) {
+   Datum res = DirectFunctionCall3(
+       to_tsvector,
+       Int32GetDatum( get_currcfg() ),
+       PG_GETARG_DATUM(0),
+       (Datum)0
+   );
+   PG_RETURN_DATUM(res);   
+}
+
+static Oid
+findFunc(char *fname) {
+   FuncCandidateList clist,ptr;
+   Oid funcid = InvalidOid;
+   List *names=makeList1(makeString(fname));
+
+   ptr = clist = FuncnameGetCandidates(names, 1);
+   freeList(names);
+
+   if ( !ptr )
+       return funcid;
+
+   while(ptr) {
+       if ( ptr->args[0] == TEXTOID && funcid == InvalidOid )
+           funcid=ptr->oid;
+       clist=ptr->next;
+       pfree(ptr);
+       ptr=clist;
+   }
+
+   return funcid;
+}
+
+/*
+ * Trigger
+ */
+Datum
+tsearch2(PG_FUNCTION_ARGS)
+{
+   TriggerData *trigdata;
+   Trigger    *trigger;
+   Relation    rel;
+   HeapTuple   rettuple = NULL;
+   TSCfgInfo *cfg=findcfg(get_currcfg()); 
+   int         numidxattr,
+               i;
+   PRSTEXT     prs;
+   Datum       datum = (Datum) 0;
+   Oid     funcoid = InvalidOid;
+
+   if (!CALLED_AS_TRIGGER(fcinfo))
+       elog(ERROR, "TSearch: Not fired by trigger manager");
+
+   trigdata = (TriggerData *) fcinfo->context;
+   if (TRIGGER_FIRED_FOR_STATEMENT(trigdata->tg_event))
+       elog(ERROR, "TSearch: Can't process STATEMENT events");
+   if (TRIGGER_FIRED_AFTER(trigdata->tg_event))
+       elog(ERROR, "TSearch: Must be fired BEFORE event");
+
+   if (TRIGGER_FIRED_BY_INSERT(trigdata->tg_event))
+       rettuple = trigdata->tg_trigtuple;
+   else if (TRIGGER_FIRED_BY_UPDATE(trigdata->tg_event))
+       rettuple = trigdata->tg_newtuple;
+   else
+       elog(ERROR, "TSearch: Unknown event");
+
+   trigger = trigdata->tg_trigger;
+   rel = trigdata->tg_relation;
+
+   if (trigger->tgnargs < 2)
+       elog(ERROR, "TSearch: format tsearch2(tsvector_field, text_field1,...)");
+
+   numidxattr = SPI_fnumber(rel->rd_att, trigger->tgargs[0]);
+   if (numidxattr == SPI_ERROR_NOATTRIBUTE)
+       elog(ERROR, "TSearch: Can not find tsvector_field");
+
+   prs.lenwords = 32;
+   prs.curwords = 0;
+   prs.pos = 0;
+   prs.words = (WORD *) palloc(sizeof(WORD) * prs.lenwords);
+
+   /* find all words in indexable column */
+   for (i = 1; i < trigger->tgnargs; i++)
+   {
+       int         numattr;
+       Oid         oidtype;
+       Datum       txt_toasted;
+       bool        isnull;
+       text       *txt;
+
+       numattr = SPI_fnumber(rel->rd_att, trigger->tgargs[i]);
+       if (numattr == SPI_ERROR_NOATTRIBUTE)
+       {
+           funcoid=findFunc(trigger->tgargs[i]);
+           if ( funcoid==InvalidOid )
+               elog(ERROR,"TSearch: can't find function or field '%s'",trigger->tgargs[i]);
+           continue;
+       }
+       oidtype = SPI_gettypeid(rel->rd_att, numattr);
+       /* We assume char() and varchar() are binary-equivalent to text */
+       if (!(oidtype == TEXTOID ||
+             oidtype == VARCHAROID ||
+             oidtype == BPCHAROID))
+       {
+           elog(WARNING, "TSearch: '%s' is not of character type",
+                trigger->tgargs[i]);
+           continue;
+       }
+       txt_toasted = SPI_getbinval(rettuple, rel->rd_att, numattr, &isnull);
+       if (isnull)
+           continue;
+
+       if ( funcoid!=InvalidOid ) {
+           text *txttmp = (text *) DatumGetPointer( OidFunctionCall1(
+               funcoid,
+               PointerGetDatum(txt_toasted)
+           ));
+           txt = (text *) DatumGetPointer(PG_DETOAST_DATUM(PointerGetDatum(txttmp)));
+           if ( txt == txttmp )
+               txt_toasted = PointerGetDatum(txt);
+       } else
+            txt = (text *) DatumGetPointer(PG_DETOAST_DATUM(PointerGetDatum(txt_toasted)));
+
+       parsetext_v2(cfg, &prs, VARDATA(txt), VARSIZE(txt) - VARHDRSZ);
+       if (txt != (text*)DatumGetPointer(txt_toasted) )
+           pfree(txt);
+   }
+
+   /* make tsvector value */
+   if (prs.curwords)
+   {
+       datum = PointerGetDatum(makevalue(&prs));
+       rettuple = SPI_modifytuple(rel, rettuple, 1, &numidxattr,
+                                  &datum, NULL);
+       pfree(DatumGetPointer(datum));
+   }
+   else
+   {
+       tsvector *out = palloc(CALCDATASIZE(0,0));
+       out->len = CALCDATASIZE(0,0);
+       out->size = 0;
+       datum = PointerGetDatum(out);
+       pfree(prs.words);
+       rettuple = SPI_modifytuple(rel, rettuple, 1, &numidxattr,
+                                  &datum, NULL);
+   }
+
+   if (rettuple == NULL)
+       elog(ERROR, "TSearch: %d returned by SPI_modifytuple", SPI_result);
+
+   return PointerGetDatum(rettuple);
+}


diff --git a/contrib/tsearch2/tsvector.h b/contrib/tsearch2/tsvector.h

new file mode 100644 (file)

index 0000000..31e6a4b


--- /dev/null
+++ b/contrib/tsearch2/tsvector.h
@@ -0,0 +1,71 @@
+#ifndef __TXTIDX_H__
+#define __TXTIDX_H__
+
+/*
+#define TXTIDX_DEBUG
+*/
+
+#include "postgres.h"
+
+#include "access/gist.h"
+#include "access/itup.h"
+#include "utils/elog.h"
+#include "utils/palloc.h"
+#include "utils/builtins.h"
+#include "storage/bufpage.h"
+
+typedef struct {
+   uint32
+       haspos:1,
+       len:11, /* MAX 2Kb */
+       pos:20; /* MAX 1Mb */
+}  WordEntry;
+#define MAXSTRLEN ( 1<<11 )
+#define MAXSTRPOS ( 1<<20 )
+
+typedef struct {
+   uint16
+       weight:2,
+       pos:14;
+} WordEntryPos;
+#define MAXENTRYPOS    (1<<14)
+#define MAXNUMPOS  256
+#define LIMITPOS(x)    ( ( (x) >= MAXENTRYPOS ) ? (MAXENTRYPOS-1) : (x) )
+
+typedef struct
+{
+   int4        len;
+   int4        size;
+   char        data[1];
+}  tsvector;
+
+#define DATAHDRSIZE (sizeof(int4)*2)
+#define CALCDATASIZE(x, lenstr) ( x * sizeof(WordEntry) + DATAHDRSIZE + lenstr )
+#define ARRPTR(x)  ( (WordEntry*) ( (char*)x + DATAHDRSIZE ) )
+#define STRPTR(x)  ( (char*)x + DATAHDRSIZE + ( sizeof(WordEntry) * ((tsvector*)x)->size ) )
+#define STRSIZE(x) ( ((tsvector*)x)->len - DATAHDRSIZE - ( sizeof(WordEntry) * ((tsvector*)x)->size ) )
+#define _POSDATAPTR(x,e)   (STRPTR(x)+((WordEntry*)(e))->pos+SHORTALIGN(((WordEntry*)(e))->len))
+#define POSDATALEN(x,e) ( ( ((WordEntry*)(e))->haspos ) ? (*(uint16*)_POSDATAPTR(x,e)) : 0 ) 
+#define POSDATAPTR(x,e)    ( (WordEntryPos*)( _POSDATAPTR(x,e)+sizeof(uint16) ) )
+
+
+typedef struct {
+   WordEntry   entry;
+   WordEntryPos    *pos;
+}  WordEntryIN;
+
+typedef struct
+{
+   char       *prsbuf;
+   char       *word;
+   char       *curpos;
+   int4        len;
+   int4        state;
+   int4        alen;
+   WordEntryPos    *pos;
+   bool        oprisdelim;
+}  TI_IN_STATE;
+
+int4       gettoken_tsvector(TI_IN_STATE * state);
+
+#endif


diff --git a/contrib/tsearch2/tsvector_op.c b/contrib/tsearch2/tsvector_op.c

new file mode 100644 (file)

index 0000000..3f38014


--- /dev/null
+++ b/contrib/tsearch2/tsvector_op.c
@@ -0,0 +1,264 @@
+/*
+ * Operations for tsvector type
+ * Teodor Sigaev 
+ */
+#include "postgres.h"
+
+#include "access/gist.h"
+#include "access/itup.h"
+#include "utils/elog.h"
+#include "utils/palloc.h"
+#include "utils/builtins.h"
+#include "storage/bufpage.h"
+#include "executor/spi.h"
+#include "commands/trigger.h"
+#include "nodes/pg_list.h"
+#include "catalog/namespace.h"
+
+#include "utils/pg_locale.h"
+
+#include              /* tolower */
+#include "tsvector.h"
+#include "query.h"
+#include "ts_cfg.h"
+#include "common.h"
+
+PG_FUNCTION_INFO_V1(strip);
+Datum      strip(PG_FUNCTION_ARGS);
+
+PG_FUNCTION_INFO_V1(setweight);
+Datum      setweight(PG_FUNCTION_ARGS);
+
+PG_FUNCTION_INFO_V1(concat);
+Datum      concat(PG_FUNCTION_ARGS);
+
+Datum
+strip(PG_FUNCTION_ARGS)
+{
+   tsvector       *in = (tsvector *) PG_DETOAST_DATUM(PG_GETARG_DATUM(0));
+   tsvector    *out;
+   int i,len=0;
+   WordEntry *arrin=ARRPTR(in), *arrout;
+   char *cur;
+
+   for(i=0;isize;i++) 
+       len += SHORTALIGN( arrin[i].len );
+
+   len = CALCDATASIZE(in->size, len);
+   out=(tsvector*)palloc(len);
+   memset(out,0,len);
+   out->len=len;
+   out->size=in->size;
+   arrout=ARRPTR(out);
+   cur=STRPTR(out);
+   for(i=0;isize;i++) {
+       memcpy(cur, STRPTR(in)+arrin[i].pos, arrin[i].len);
+       arrout[i].haspos = 0;
+       arrout[i].len = arrin[i].len;
+       arrout[i].pos = cur - STRPTR(out);
+       cur += SHORTALIGN( arrout[i].len );
+   }
+       
+   PG_FREE_IF_COPY(in, 0);
+   PG_RETURN_POINTER(out);
+}
+
+Datum
+setweight(PG_FUNCTION_ARGS)
+{
+   tsvector       *in = (tsvector *) PG_DETOAST_DATUM(PG_GETARG_DATUM(0));
+   char       cw = PG_GETARG_CHAR(1);
+   tsvector    *out;
+   int i,j;
+   WordEntry *entry;
+   WordEntryPos *p;
+   int w=0;
+
+   switch(tolower(cw)) {
+       case 'a': w=3; break;
+       case 'b': w=2; break;
+       case 'c': w=1; break;
+       case 'd': w=0; break;
+       default: elog(ERROR,"Unknown weight");
+   }
+
+   out=(tsvector*)palloc(in->len);
+   memcpy(out,in,in->len);
+   entry=ARRPTR(out);
+   i=out->size;    
+   while(i--) {
+       if ( (j=POSDATALEN(out,entry)) != 0 ) {
+           p=POSDATAPTR(out,entry);
+           while(j--) {
+               p->weight=w;
+               p++;
+           }
+       }
+       entry++;
+   }
+       
+   PG_FREE_IF_COPY(in, 0);
+   PG_RETURN_POINTER(out);
+}
+
+static int
+compareEntry(char *ptra, WordEntry* a, char *ptrb, WordEntry* b)
+{
+        if ( a->len == b->len)
+        {
+                return strncmp(
+                                           ptra + a->pos,
+                                           ptrb + b->pos,
+                                           a->len);
+        }
+        return ( a->len > b->len ) ? 1 : -1;
+}
+
+static int4
+add_pos(tsvector *src, WordEntry *srcptr, tsvector *dest, WordEntry *destptr, int4 maxpos ) {
+   uint16 *clen = (uint16*)_POSDATAPTR(dest,destptr);
+   int i;
+   uint16 slen = POSDATALEN(src, srcptr), startlen;
+   WordEntryPos *spos=POSDATAPTR(src, srcptr), *dpos=POSDATAPTR(dest,destptr);
+
+   if ( ! destptr->haspos ) 
+       *clen=0;
+
+   startlen = *clen;
+   for(i=0; i
+       dpos[ *clen ].weight = spos[i].weight; 
+       dpos[ *clen ].pos    = LIMITPOS(spos[i].pos + maxpos);
+       (*clen)++;
+   }
+
+   if ( *clen != startlen )
+       destptr->haspos=1; 
+   return  *clen - startlen;
+}
+
+
+Datum
+concat(PG_FUNCTION_ARGS) {
+   tsvector       *in1 = (tsvector *) PG_DETOAST_DATUM(PG_GETARG_DATUM(0));
+   tsvector       *in2 = (tsvector *) PG_DETOAST_DATUM(PG_GETARG_DATUM(1));
+   tsvector       *out;
+   WordEntry *ptr;
+   WordEntry *ptr1,*ptr2;
+   WordEntryPos *p;
+   int maxpos=0,i,j,i1,i2;
+   char *cur;
+   char *data,*data1,*data2;
+
+   ptr=ARRPTR(in1);
+   i=in1->size;
+   while(i--) {
+       if ( (j=POSDATALEN(in1,ptr)) != 0 ) {
+           p=POSDATAPTR(in1,ptr);
+           while(j--) {
+               if ( p->pos > maxpos ) 
+                   maxpos = p->pos;
+               p++;
+           }
+       }
+       ptr++;
+   }
+   
+   ptr1=ARRPTR(in1); ptr2=ARRPTR(in2);
+   data1=STRPTR(in1); data2=STRPTR(in2);
+   i1=in1->size;   i2=in2->size;
+   out=(tsvector*)palloc( in1->len + in2->len );
+   memset(out,0,in1->len + in2->len);
+   out->len = in1->len + in2->len;
+   out->size = in1->size + in2->size;
+   data=cur=STRPTR(out);
+   ptr=ARRPTR(out);
+   while( i1 && i2 ) {
+       int cmp=compareEntry(data1,ptr1,data2,ptr2);
+       if ( cmp < 0 ) { /* in1 first */
+           ptr->haspos = ptr1->haspos;
+           ptr->len = ptr1->len;
+           memcpy( cur, data1 + ptr1->pos, ptr1->len );
+           ptr->pos = cur - data; 
+           cur+=SHORTALIGN(ptr1->len);
+           if ( ptr->haspos ) {
+               memcpy(cur, _POSDATAPTR(in1, ptr1), POSDATALEN(in1, ptr1)*sizeof(WordEntryPos) + sizeof(uint16));
+               cur+=POSDATALEN(in1, ptr1)*sizeof(WordEntryPos) + sizeof(uint16);
+           }
+           ptr++; ptr1++; i1--;
+       } else if ( cmp>0 ) { /* in2 first */ 
+           ptr->haspos = ptr2->haspos;
+           ptr->len = ptr2->len;
+           memcpy( cur, data2 + ptr2->pos, ptr2->len );
+           ptr->pos = cur - data; 
+           cur+=SHORTALIGN(ptr2->len);
+           if ( ptr->haspos ) {
+               int addlen = add_pos(in2, ptr2, out, ptr, maxpos );
+               if ( addlen == 0 )
+                   ptr->haspos=0;
+               else
+                   cur += addlen*sizeof(WordEntryPos) + sizeof(uint16); 
+           }
+           ptr++; ptr2++; i2--;
+       } else {
+           ptr->haspos = ptr1->haspos | ptr2->haspos;
+           ptr->len = ptr1->len;
+           memcpy( cur, data1 + ptr1->pos, ptr1->len );
+           ptr->pos = cur - data; 
+           cur+=SHORTALIGN(ptr1->len);
+           if ( ptr->haspos ) {
+               if ( ptr1->haspos ) {
+                   memcpy(cur, _POSDATAPTR(in1, ptr1), POSDATALEN(in1, ptr1)*sizeof(WordEntryPos) + sizeof(uint16));
+                   cur+=POSDATALEN(in1, ptr1)*sizeof(WordEntryPos) + sizeof(uint16);
+                   if ( ptr2->haspos )
+                       cur += add_pos(in2, ptr2, out, ptr, maxpos )*sizeof(WordEntryPos);
+               } else if ( ptr2->haspos ) {
+                   int addlen = add_pos(in2, ptr2, out, ptr, maxpos );
+                   if ( addlen == 0 )
+                       ptr->haspos=0;
+                   else
+                       cur += addlen*sizeof(WordEntryPos) + sizeof(uint16); 
+               }
+           }
+           ptr++; ptr1++; ptr2++; i1--; i2--;
+       }
+   }
+
+   while(i1) {
+       ptr->haspos = ptr1->haspos;
+       ptr->len = ptr1->len;
+       memcpy( cur, data1 + ptr1->pos, ptr1->len );
+       ptr->pos = cur - data; 
+       cur+=SHORTALIGN(ptr1->len);
+       if ( ptr->haspos ) {
+           memcpy(cur, _POSDATAPTR(in1, ptr1), POSDATALEN(in1, ptr1)*sizeof(WordEntryPos) + sizeof(uint16));
+           cur+=POSDATALEN(in1, ptr1)*sizeof(WordEntryPos) + sizeof(uint16);
+       }
+       ptr++; ptr1++; i1--;
+   }
+
+   while(i2) {
+       ptr->haspos = ptr2->haspos;
+       ptr->len = ptr2->len;
+       memcpy( cur, data2 + ptr2->pos, ptr2->len );
+       ptr->pos = cur - data; 
+       cur+=SHORTALIGN(ptr2->len);
+       if ( ptr->haspos ) {
+           int addlen = add_pos(in2, ptr2, out, ptr, maxpos );
+           if ( addlen == 0 )
+               ptr->haspos=0;
+           else
+               cur += addlen*sizeof(WordEntryPos) + sizeof(uint16); 
+       }
+       ptr++; ptr2++; i2--;
+   }
+   
+   out->size=ptr-ARRPTR(out);
+   out->len = CALCDATASIZE( out->size, cur-data );
+   if ( data != STRPTR(out) )
+       memmove( STRPTR(out), data, cur-data );
+
+   PG_FREE_IF_COPY(in1, 0);
+   PG_FREE_IF_COPY(in2, 1);
+   PG_RETURN_POINTER(out);
+}
+


diff --git a/contrib/tsearch2/untsearch.sql.in b/contrib/tsearch2/untsearch.sql.in

new file mode 100644 (file)

index 0000000..a4fe145


--- /dev/null
+++ b/contrib/tsearch2/untsearch.sql.in
@@ -0,0 +1,62 @@
+BEGIN;
+
+--Be careful !!!
+--script drops all indices, triggers and columns with types defined
+--in tsearch2.sql
+
+
+DROP OPERATOR CLASS gist_tsvector_ops USING gist CASCADE;
+
+
+DROP OPERATOR || (tsvector, tsvector);
+DROP OPERATOR @@ (tsvector, tsquery);
+DROP OPERATOR @@ (tsquery, tsvector);
+
+DROP AGGREGATE stat(tsvector);
+
+DROP TABLE pg_ts_dict;
+DROP TABLE pg_ts_parser;
+DROP TABLE pg_ts_cfg;
+DROP TABLE pg_ts_cfgmap;
+
+DROP TYPE tokentype CASCADE;
+DROP TYPE tokenout CASCADE;
+DROP TYPE tsvector CASCADE;
+DROP TYPE tsquery CASCADE;
+DROP TYPE gtsvector CASCADE;
+DROP TYPE tsstat CASCADE;
+DROP TYPE statinfo CASCADE;
+
+DROP FUNCTION lexize(oid, text) ;
+DROP FUNCTION lexize(text, text);
+DROP FUNCTION lexize(text);
+DROP FUNCTION set_curdict(int);
+DROP FUNCTION set_curdict(text);
+DROP FUNCTION dex_init(text);
+DROP FUNCTION dex_lexize(internal,internal,int4);
+DROP FUNCTION snb_en_init(text);
+DROP FUNCTION snb_lexize(internal,internal,int4);
+DROP FUNCTION snb_ru_init(text);
+DROP FUNCTION spell_init(text);
+DROP FUNCTION spell_lexize(internal,internal,int4);
+DROP FUNCTION syn_init(text);
+DROP FUNCTION syn_lexize(internal,internal,int4);
+DROP FUNCTION set_curprs(int);
+DROP FUNCTION set_curprs(text);
+DROP FUNCTION prsd_start(internal,int4);
+DROP FUNCTION prsd_getlexeme(internal,internal,internal);
+DROP FUNCTION prsd_end(internal);
+DROP FUNCTION prsd_lextype(internal);
+DROP FUNCTION prsd_headline(internal,internal,internal);
+DROP FUNCTION set_curcfg(int);
+DROP FUNCTION set_curcfg(text);
+DROP FUNCTION show_curcfg();
+DROP FUNCTION gtsvector_compress(internal);
+DROP FUNCTION gtsvector_decompress(internal);
+DROP FUNCTION gtsvector_penalty(internal,internal,internal);
+DROP FUNCTION gtsvector_picksplit(internal, internal);
+DROP FUNCTION gtsvector_union(bytea, internal);
+DROP FUNCTION reset_tsearch();
+DROP FUNCTION tsearch2() CASCADE;
+
+END;


diff --git a/contrib/tsearch2/wordparser/deflex.c b/contrib/tsearch2/wordparser/deflex.c

new file mode 100644 (file)

index 0000000..ea596c5


--- /dev/null
+++ b/contrib/tsearch2/wordparser/deflex.c
@@ -0,0 +1,56 @@
+#include "deflex.h"
+
+const char *lex_descr[]={
+   "",
+   "Latin word",
+   "Non-latin word",
+   "Word",
+   "Email",
+   "URL",
+   "Host",
+   "Scientific notation",
+   "VERSION",
+   "Part of hyphenated word",
+   "Non-latin part of hyphenated word",
+   "Latin part of hyphenated word",
+   "Space symbols",
+   "HTML Tag",
+   "HTTP head",
+   "Hyphenated word",
+   "Latin hyphenated word",
+   "Non-latin hyphenated word",
+   "URI",
+   "File or path name",
+   "Decimal notation",
+   "Signed integer",
+   "Unsigned integer",
+   "HTML Entity"
+};
+
+const char *tok_alias[]={
+   "",
+   "lword",
+   "nlword",
+   "word",
+   "email",
+   "url",
+   "host",
+   "sfloat",
+   "version",
+   "part_hword",
+   "nlpart_hword",
+   "lpart_hword",
+   "blank",
+   "tag",
+   "http",
+   "hword",
+   "lhword",
+   "nlhword",
+   "uri",
+   "file",
+   "float",
+   "int",
+   "uint",
+   "entity"
+};
+


diff --git a/contrib/tsearch2/wordparser/deflex.h b/contrib/tsearch2/wordparser/deflex.h

new file mode 100644 (file)

index 0000000..651d1f9


--- /dev/null
+++ b/contrib/tsearch2/wordparser/deflex.h
@@ -0,0 +1,34 @@
+#ifndef __DEFLEX_H__
+#define __DEFLEX_H__
+
+/* rememder !!!! */
+#define LASTNUM        23
+
+#define LATWORD        1
+#define CYRWORD        2
+#define UWORD      3
+#define EMAIL      4
+#define FURL       5
+#define HOST       6
+#define SCIENTIFIC 7
+#define VERSIONNUMBER  8
+#define PARTHYPHENWORD 9
+#define CYRPARTHYPHENWORD  10
+#define LATPARTHYPHENWORD  11
+#define SPACE      12
+#define TAG            13
+#define HTTP       14
+#define HYPHENWORD 15
+#define LATHYPHENWORD  16
+#define CYRHYPHENWORD  17
+#define URI        18
+#define FILEPATH   19
+#define DECIMAL        20
+#define SIGNEDINT  21
+#define UNSIGNEDINT 22
+#define HTMLENTITY 23
+
+extern const char *lex_descr[];
+extern const char *tok_alias[];
+
+#endif


diff --git a/contrib/tsearch2/wordparser/parser.h b/contrib/tsearch2/wordparser/parser.h

new file mode 100644 (file)

index 0000000..55cf005


--- /dev/null
+++ b/contrib/tsearch2/wordparser/parser.h
@@ -0,0 +1,11 @@
+#ifndef __PARSER_H__
+#define __PARSER_H__
+
+char      *token;
+int            tokenlen;
+int            tsearch2_yylex(void);
+void       start_parse_str(char *, int);
+void       start_parse_fh(FILE *, int);
+void       end_parse(void);
+
+#endif


diff --git a/contrib/tsearch2/wordparser/parser.l b/contrib/tsearch2/wordparser/parser.l

new file mode 100644 (file)

index 0000000..49824f5


--- /dev/null
+++ b/contrib/tsearch2/wordparser/parser.l
@@ -0,0 +1,346 @@
+%{
+#include "postgres.h"
+
+#include "deflex.h"
+#include "parser.h"
+#include "common.h"
+
+/* Avoid exit() on fatal scanner errors */
+#define fprintf(file, fmt, msg)  ts_error(ERROR, fmt, msg)
+
+/* postgres allocation function */
+#define free    pfree
+#define malloc  palloc
+#define realloc repalloc
+
+#ifdef strdup
+#undef strdup
+#endif
+#define strdup  pstrdup
+
+char *token = NULL;  /* pointer to token */
+char *s     = NULL;  /* to return WHOLE hyphenated-word */
+
+YY_BUFFER_STATE buf = NULL; /* buffer to parse; it need for parse from string */
+
+int lrlimit = -1;  /* for limiting read from filehandle ( -1 - unlimited read ) */
+int bytestoread = 0;   /* for limiting read from filehandle */
+
+/* redefine macro for read limited length */
+#define YY_INPUT(buf,result,max_size) \
+   if ( yy_current_buffer->yy_is_interactive ) { \
+                int c = '*', n; \
+                for ( n = 0; n < max_size && \
+                             (c = getc( tsearch2_yyin )) != EOF && c != '\n'; ++n ) \
+                        buf[n] = (char) c; \
+                if ( c == '\n' ) \
+                        buf[n++] = (char) c; \
+                if ( c == EOF && ferror( tsearch2_yyin ) ) \
+                        YY_FATAL_ERROR( "input in flex scanner failed" ); \
+                result = n; \
+        }  else { \
+       if ( lrlimit == 0 ) \
+           result=YY_NULL; \
+       else { \
+           if ( lrlimit>0 ) { \
+               bytestoread = ( lrlimit > max_size ) ? max_size : lrlimit; \
+               lrlimit -= bytestoread; \
+           } else \
+               bytestoread = max_size; \
+               if ( ((result = fread( buf, 1, bytestoread, tsearch2_yyin )) == 0) \
+                       && ferror( tsearch2_yyin ) ) \
+                       YY_FATAL_ERROR( "input in flex scanner failed" ); \
+       } \
+   }
+
+%}
+
+%option 8bit
+%option never-interactive
+%option nounput
+%option noyywrap
+
+/* parser's state for parsing hyphenated-word */
+%x DELIM  
+/* parser's state for parsing URL*/
+%x URL  
+%x SERVER  
+
+/* parser's state for parsing TAGS */
+%x INTAG
+%x QINTAG
+%x INCOMMENT
+%x INSCRIPT
+
+/* cyrillic koi8 char */
+CYRALNUM   [0-9\200-\377]
+CYRALPHA   [\200-\377]
+ALPHA      [a-zA-Z\200-\377]
+ALNUM      [0-9a-zA-Z\200-\377]
+
+
+HOSTNAME   ([-_[:alnum:]]+\.)+[[:alpha:]]+
+URI        [-_[:alnum:]/%,\.;=&?#]+
+
+%%
+
+"<"[Ss][Cc][Rr][Ii][Pp][Tt] { BEGIN INSCRIPT; }
+
+"" {
+   BEGIN INITIAL; 
+   *tsearch2_yytext=' '; *(tsearch2_yytext+1) = '\0'; 
+   token = tsearch2_yytext;
+   tokenlen = tsearch2_yyleng;
+   return SPACE;
+}
+
+""   { 
+   BEGIN INITIAL;
+   *tsearch2_yytext=' '; *(tsearch2_yytext+1) = '\0'; 
+   token = tsearch2_yytext;
+   tokenlen = tsearch2_yyleng;
+   return SPACE;
+}
+
+
+"<"[\![:alpha:]]   { BEGIN INTAG; }
+
+"
+
+"\""    { BEGIN QINTAG; }
+
+"\\\"" ;
+
+"\""   { BEGIN INTAG; }
+
+">" { 
+   BEGIN INITIAL;
+   token = tsearch2_yytext;
+   *tsearch2_yytext=' '; 
+   token = tsearch2_yytext;
+   tokenlen = 1;
+   return TAG;
+}
+
+.|\n  ;
+
+\&(quot|amp|nbsp|lt|gt)\;   {
+   token = tsearch2_yytext;
+   tokenlen = tsearch2_yyleng;
+   return HTMLENTITY;
+}
+
+\&\#[0-9][0-9]?[0-9]?\; {
+   token = tsearch2_yytext;
+   tokenlen = tsearch2_yyleng;
+   return HTMLENTITY;
+}
+ 
+[-_\.[:alnum:]]+@{HOSTNAME}  /* Emails */ { 
+   token = tsearch2_yytext; 
+   tokenlen = tsearch2_yyleng;
+   return EMAIL; 
+}
+
+[+-]?[0-9]+(\.[0-9]+)?[eEdD][+-]?[0-9]+  /* float */   { 
+   token = tsearch2_yytext; 
+   tokenlen = tsearch2_yyleng;
+   return SCIENTIFIC; 
+}
+
+[0-9]+\.[0-9]+\.[0-9\.]*[0-9] {
+   token = tsearch2_yytext;
+   tokenlen = tsearch2_yyleng;
+   return VERSIONNUMBER;
+}
+
+[+-]?[0-9]+\.[0-9]+ {
+   token = tsearch2_yytext;
+   tokenlen = tsearch2_yyleng;
+   return DECIMAL;
+}
+
+[+-][0-9]+ { 
+   token = tsearch2_yytext; 
+   tokenlen = tsearch2_yyleng;
+   return SIGNEDINT; 
+}
+
+[0-9]+ { 
+   token = tsearch2_yytext; 
+   tokenlen = tsearch2_yyleng;
+   return UNSIGNEDINT; 
+}
+
+http"://"        { 
+   BEGIN URL; 
+   token = tsearch2_yytext;
+   tokenlen = tsearch2_yyleng;
+   return HTTP;
+}
+
+ftp"://"        { 
+   BEGIN URL; 
+   token = tsearch2_yytext;
+   tokenlen = tsearch2_yyleng;
+   return HTTP;
+}
+
+{HOSTNAME}[/:]{URI} { 
+   BEGIN SERVER;
+   if (s) { free(s); s=NULL; } 
+   s = strdup( tsearch2_yytext ); 
+   tokenlen = tsearch2_yyleng;
+   yyless( 0 ); 
+   token = s;
+   return FURL;
+}
+
+{HOSTNAME} {
+   token = tsearch2_yytext; 
+   tokenlen = tsearch2_yyleng;
+   return HOST;
+}
+
+[/:]{URI}  {
+   token = tsearch2_yytext;
+   tokenlen = tsearch2_yyleng;
+   return URI;
+}
+
+[[:alnum:]\./_-]+"/"[[:alnum:]\./_-]+ {
+   token = tsearch2_yytext;
+   tokenlen = tsearch2_yyleng;
+   return FILEPATH;
+}
+
+({CYRALPHA}+-)+{CYRALPHA}+ /* composite-word */    {
+   BEGIN DELIM;
+   if (s) { free(s); s=NULL; } 
+   s = strdup( tsearch2_yytext );
+   tokenlen = tsearch2_yyleng;
+   yyless( 0 );
+   token = s;
+   return CYRHYPHENWORD;
+}
+
+([[:alpha:]]+-)+[[:alpha:]]+ /* composite-word */  {
+    BEGIN DELIM;
+   if (s) { free(s); s=NULL; } 
+   s = strdup( tsearch2_yytext );
+   tokenlen = tsearch2_yyleng;
+   yyless( 0 );
+   token = s;
+   return LATHYPHENWORD;
+}
+
+({ALNUM}+-)+{ALNUM}+ /* composite-word */  {
+   BEGIN DELIM;
+   if (s) { free(s); s=NULL; } 
+   s = strdup( tsearch2_yytext );
+   tokenlen = tsearch2_yyleng;
+   yyless( 0 );
+   token = s;
+   return HYPHENWORD;
+}
+
+[0-9]+\.[0-9]+\.[0-9\.]*[0-9] {
+   token = tsearch2_yytext;
+   tokenlen = tsearch2_yyleng;
+   return VERSIONNUMBER;
+}
+
+\+?[0-9]+\.[0-9]+ {
+   token = tsearch2_yytext;
+   tokenlen = tsearch2_yyleng;
+   return DECIMAL;
+}
+
+{CYRALPHA}+  /* one word in composite-word */   { 
+   token = tsearch2_yytext; 
+   tokenlen = tsearch2_yyleng;
+   return CYRPARTHYPHENWORD; 
+}
+
+[[:alpha:]]+  /* one word in composite-word */  { 
+   token = tsearch2_yytext; 
+   tokenlen = tsearch2_yyleng;
+   return LATPARTHYPHENWORD; 
+}
+
+{ALNUM}+  /* one word in composite-word */  { 
+   token = tsearch2_yytext; 
+   tokenlen = tsearch2_yyleng;
+   return PARTHYPHENWORD; 
+}
+
+-  { 
+   token = tsearch2_yytext;
+   tokenlen = tsearch2_yyleng;
+   return SPACE;
+}
+
+.|\n /* return in basic state */ {
+   BEGIN INITIAL;
+   yyless( 0 );
+}
+
+{CYRALPHA}+ /* normal word */  { 
+   token = tsearch2_yytext; 
+   tokenlen = tsearch2_yyleng;
+   return CYRWORD; 
+}
+
+[[:alpha:]]+ /* normal word */ { 
+   token = tsearch2_yytext; 
+   tokenlen = tsearch2_yyleng;
+   return LATWORD; 
+}
+
+{ALNUM}+ /* normal word */ { 
+   token = tsearch2_yytext; 
+   tokenlen = tsearch2_yyleng;
+   return UWORD; 
+}
+
+[ \r\n\t]+ {
+   token = tsearch2_yytext;
+   tokenlen = tsearch2_yyleng;
+   return SPACE;
+}
+
+. {
+   token = tsearch2_yytext;
+   tokenlen = tsearch2_yyleng;
+   return SPACE;
+} 
+
+%%
+
+/* clearing after parsing from string */
+void end_parse() {
+   if (s) { free(s); s=NULL; } 
+   tsearch2_yy_delete_buffer( buf );
+   buf = NULL;
+} 
+
+/* start parse from string */
+void start_parse_str(char* str, int limit) {
+   if (buf) end_parse();
+   buf = tsearch2_yy_scan_bytes( str, limit );
+   tsearch2_yy_switch_to_buffer( buf );
+   BEGIN INITIAL;
+}
+
+/* start parse from filehandle */
+void start_parse_fh( FILE* fh, int limit ) {
+   if (buf) end_parse();
+   lrlimit = ( limit ) ? limit : -1;
+   buf = tsearch2_yy_create_buffer( fh, YY_BUF_SIZE );
+   tsearch2_yy_switch_to_buffer( buf );
+   BEGIN INITIAL;
+}
+
+


diff --git a/contrib/tsearch2/wparser.c b/contrib/tsearch2/wparser.c

new file mode 100644 (file)

index 0000000..deff94c


--- /dev/null
+++ b/contrib/tsearch2/wparser.c
@@ -0,0 +1,529 @@
+/* 
+ * interface functions to parser 
+ * Teodor Sigaev 
+ */
+#include 
+#include 
+#include 
+#include 
+
+#include "postgres.h"
+#include "fmgr.h"
+#include "utils/array.h"
+#include "catalog/pg_type.h"
+#include "executor/spi.h"
+#include "funcapi.h"
+
+#include "wparser.h"
+#include "ts_cfg.h"
+#include "snmap.h"
+#include "common.h"
+
+/*********top interface**********/
+
+static void *plan_getparser=NULL;
+static Oid current_parser_id=InvalidOid;
+
+void
+init_prs(Oid id, WParserInfo *prs) {
+   Oid arg[1]={ OIDOID };
+   bool isnull;
+   Datum pars[1]={ ObjectIdGetDatum(id) };
+   int stat;
+
+   memset(prs,0,sizeof(WParserInfo));
+   SPI_connect();
+   if ( !plan_getparser ) {
+       plan_getparser = SPI_saveplan( SPI_prepare( "select prs_start, prs_nexttoken, prs_end, prs_lextype, prs_headline from pg_ts_parser where oid = $1" , 1, arg ) );
+       if ( !plan_getparser ) 
+           ts_error(ERROR, "SPI_prepare() failed");
+   }
+
+   stat = SPI_execp(plan_getparser, pars, " ", 1);
+   if ( stat < 0 )
+       ts_error (ERROR, "SPI_execp return %d", stat);
+   if ( SPI_processed > 0 ) {
+       Oid oid=InvalidOid;
+       oid=DatumGetObjectId( SPI_getbinval(SPI_tuptable->vals[0], SPI_tuptable->tupdesc, 1, &isnull) );
+       fmgr_info_cxt(oid, &(prs->start_info), TopMemoryContext);
+       oid=DatumGetObjectId( SPI_getbinval(SPI_tuptable->vals[0], SPI_tuptable->tupdesc, 2, &isnull) );
+       fmgr_info_cxt(oid, &(prs->getlexeme_info), TopMemoryContext);
+       oid=DatumGetObjectId( SPI_getbinval(SPI_tuptable->vals[0], SPI_tuptable->tupdesc, 3, &isnull) );
+       fmgr_info_cxt(oid, &(prs->end_info), TopMemoryContext);
+       prs->lextype=DatumGetObjectId( SPI_getbinval(SPI_tuptable->vals[0], SPI_tuptable->tupdesc, 4, &isnull) );
+       oid=DatumGetObjectId( SPI_getbinval(SPI_tuptable->vals[0], SPI_tuptable->tupdesc, 5, &isnull) );
+       fmgr_info_cxt(oid, &(prs->headline_info), TopMemoryContext);
+       prs->prs_id=id;
+   } else 
+       ts_error(ERROR, "No parser with id %d", id);
+   SPI_finish();
+}
+
+typedef struct {
+   WParserInfo *last_prs;
+   int     len;
+   int     reallen;
+   WParserInfo *list;
+   SNMap       name2id_map;
+} PrsList;
+
+static PrsList PList = {NULL,0,0,NULL,{0,0,NULL}};
+
+void    
+reset_prs(void) {
+   freeSNMap( &(PList.name2id_map) );
+   if ( PList.list )
+       free(PList.list);
+   memset(&PList,0,sizeof(PrsList));
+}
+
+static int
+compareprs(const void *a, const void *b) {
+   return ((WParserInfo*)a)->prs_id - ((WParserInfo*)b)->prs_id;
+}
+
+WParserInfo *
+findprs(Oid id) {
+   /* last used prs */
+   if ( PList.last_prs && PList.last_prs->prs_id==id )
+       return PList.last_prs;
+
+   /* already used prs */
+   if ( PList.len != 0 ) {
+       WParserInfo key;
+       key.prs_id=id;
+       PList.last_prs = bsearch(&key, PList.list, PList.len, sizeof(WParserInfo), compareprs);
+       if ( PList.last_prs != NULL )
+           return PList.last_prs;
+   }
+
+   /* last chance */
+   if ( PList.len==PList.reallen ) {
+       WParserInfo *tmp;
+       int reallen = ( PList.reallen ) ? 2*PList.reallen : 16;
+       tmp=(WParserInfo*)realloc(PList.list,sizeof(WParserInfo)*reallen);
+       if ( !tmp ) 
+           ts_error(ERROR,"No memory");
+       PList.reallen=reallen;
+       PList.list=tmp;
+   }
+   PList.last_prs=&(PList.list[PList.len]);
+   init_prs(id, PList.last_prs);
+   PList.len++;
+   qsort(PList.list, PList.len, sizeof(WParserInfo), compareprs);
+   return findprs(id); /* qsort changed order!! */;
+}
+
+static void *plan_name2id=NULL;
+
+Oid
+name2id_prs(text *name) {
+   Oid arg[1]={ TEXTOID };
+   bool isnull;
+   Datum pars[1]={ PointerGetDatum(name) };
+   int stat;
+   Oid id=findSNMap_t( &(PList.name2id_map), name );
+   
+   if ( id ) 
+       return id;
+   
+
+   SPI_connect();
+   if ( !plan_name2id ) {
+       plan_name2id = SPI_saveplan( SPI_prepare( "select oid from pg_ts_parser where prs_name = $1" , 1, arg ) );
+       if ( !plan_name2id ) 
+           ts_error(ERROR, "SPI_prepare() failed");
+   }
+
+   stat = SPI_execp(plan_name2id, pars, " ", 1);
+   if ( stat < 0 )
+       ts_error (ERROR, "SPI_execp return %d", stat);
+   if ( SPI_processed > 0 )
+       id=DatumGetObjectId( SPI_getbinval(SPI_tuptable->vals[0], SPI_tuptable->tupdesc, 1, &isnull) );
+   else 
+       ts_error(ERROR, "No parser '%s'", text2char(name));
+   SPI_finish();
+   addSNMap_t( &(PList.name2id_map), name, id );
+   return id;
+}
+
+
+/******sql-level interface******/
+typedef struct {
+   int     cur;
+   LexDescr    *list;
+} TypeStorage;
+
+static void
+setup_firstcall(FuncCallContext  *funcctx, Oid prsid) {
+   TupleDesc            tupdesc;
+   MemoryContext     oldcontext;
+   TypeStorage     *st;
+   WParserInfo *prs = findprs(prsid); 
+
+   oldcontext = MemoryContextSwitchTo(funcctx->multi_call_memory_ctx);
+
+   st=(TypeStorage*)palloc( sizeof(TypeStorage) );
+   st->cur=0;
+   st->list = (LexDescr*)DatumGetPointer(
+       OidFunctionCall1( prs->lextype, PointerGetDatum(prs->prs) )
+   );
+   funcctx->user_fctx = (void*)st;
+   tupdesc = RelationNameGetTupleDesc("tokentype");
+   funcctx->slot = TupleDescGetSlot(tupdesc);
+   funcctx->attinmeta = TupleDescGetAttInMetadata(tupdesc);
+   MemoryContextSwitchTo(oldcontext);
+}
+
+static Datum
+process_call(FuncCallContext  *funcctx) {
+   TypeStorage     *st;
+
+   st=(TypeStorage*)funcctx->user_fctx;
+   if (  st->list && st->list[st->cur].lexid ) {
+       Datum result;
+       char* values[3];
+       char    txtid[16];
+       HeapTuple    tuple;
+
+       values[0]=txtid;
+       sprintf(txtid,"%d",st->list[st->cur].lexid);
+       values[1]=st->list[st->cur].alias;
+       values[2]=st->list[st->cur].descr;
+
+       tuple = BuildTupleFromCStrings(funcctx->attinmeta, values);
+       result = TupleGetDatum(funcctx->slot, tuple);
+
+       pfree(values[1]);
+       pfree(values[2]);
+       st->cur++;
+       return result;
+   } else {
+       if ( st->list ) pfree(st->list);
+       pfree(st);
+   }
+   return (Datum)0;
+}
+
+PG_FUNCTION_INFO_V1(token_type);
+Datum token_type(PG_FUNCTION_ARGS);
+
+Datum
+token_type(PG_FUNCTION_ARGS) {
+   FuncCallContext  *funcctx;
+   Datum result;
+
+   if (SRF_IS_FIRSTCALL()) { 
+       funcctx = SRF_FIRSTCALL_INIT();
+       setup_firstcall(funcctx, PG_GETARG_OID(0) );
+   }
+
+   funcctx = SRF_PERCALL_SETUP();
+
+   if (  (result=process_call(funcctx)) != (Datum)0 )
+       SRF_RETURN_NEXT(funcctx, result);
+   SRF_RETURN_DONE(funcctx);
+}
+
+PG_FUNCTION_INFO_V1(token_type_byname);
+Datum token_type_byname(PG_FUNCTION_ARGS);
+Datum
+token_type_byname(PG_FUNCTION_ARGS) {
+   FuncCallContext  *funcctx;
+   Datum result;
+
+   if (SRF_IS_FIRSTCALL()) {
+       text *name = PG_GETARG_TEXT_P(0); 
+       funcctx = SRF_FIRSTCALL_INIT();
+       setup_firstcall(funcctx, name2id_prs( name ) );
+       PG_FREE_IF_COPY(name,0);
+   }
+
+   funcctx = SRF_PERCALL_SETUP();
+
+   if (  (result=process_call(funcctx)) != (Datum)0 )
+       SRF_RETURN_NEXT(funcctx, result);
+   SRF_RETURN_DONE(funcctx);
+}
+
+PG_FUNCTION_INFO_V1(token_type_current);
+Datum token_type_current(PG_FUNCTION_ARGS);
+Datum
+token_type_current(PG_FUNCTION_ARGS) {
+   FuncCallContext  *funcctx;
+   Datum result;
+
+   if (SRF_IS_FIRSTCALL()) {
+       funcctx = SRF_FIRSTCALL_INIT();
+       if ( current_parser_id==InvalidOid ) 
+           current_parser_id = name2id_prs( char2text("default") );
+       setup_firstcall(funcctx, current_parser_id );
+   }
+
+   funcctx = SRF_PERCALL_SETUP();
+
+   if (  (result=process_call(funcctx)) != (Datum)0 )
+       SRF_RETURN_NEXT(funcctx, result);
+   SRF_RETURN_DONE(funcctx);
+}
+
+
+PG_FUNCTION_INFO_V1(set_curprs);
+Datum set_curprs(PG_FUNCTION_ARGS);
+Datum
+set_curprs(PG_FUNCTION_ARGS) {
+        findprs(PG_GETARG_OID(0));
+        current_parser_id=PG_GETARG_OID(0);
+        PG_RETURN_VOID();
+}
+
+PG_FUNCTION_INFO_V1(set_curprs_byname);
+Datum set_curprs_byname(PG_FUNCTION_ARGS);
+Datum
+set_curprs_byname(PG_FUNCTION_ARGS) {
+        text *name=PG_GETARG_TEXT_P(0);
+    
+        DirectFunctionCall1(
+                set_curprs,
+                ObjectIdGetDatum( name2id_prs(name) )
+        );
+        PG_FREE_IF_COPY(name, 0);
+        PG_RETURN_VOID();
+}
+
+typedef struct {
+   int type;
+   char    *lexem;
+} LexemEntry;
+
+typedef struct {
+   int cur;
+   int len;
+   LexemEntry  *list;
+} PrsStorage;
+   
+
+static void
+prs_setup_firstcall(FuncCallContext  *funcctx, int prsid, text *txt) {
+   TupleDesc            tupdesc;
+   MemoryContext     oldcontext;
+   PrsStorage  *st;
+   WParserInfo *prs = findprs(prsid); 
+   char    *lex=NULL;
+   int     llen=0, type=0; 
+
+   oldcontext = MemoryContextSwitchTo(funcctx->multi_call_memory_ctx);
+
+   st=(PrsStorage*)palloc( sizeof(PrsStorage) );
+   st->cur=0;
+   st->len=16;
+   st->list=(LexemEntry*)palloc( sizeof(LexemEntry)*st->len );
+
+   prs->prs = (void*)DatumGetPointer(
+       FunctionCall2(
+           &(prs->start_info),
+           PointerGetDatum(VARDATA(txt)),
+           Int32GetDatum(VARSIZE(txt)-VARHDRSZ)
+       )
+   );
+
+   while( ( type=DatumGetInt32(FunctionCall3(
+           &(prs->getlexeme_info),
+           PointerGetDatum(prs->prs),
+           PointerGetDatum(&lex),
+           PointerGetDatum(&llen))) ) != 0 ) {
+
+       if ( st->cur>=st->len ) {
+           st->len=2*st->len;
+           st->list=(LexemEntry*)repalloc(st->list, sizeof(LexemEntry)*st->len);
+       }
+       st->list[st->cur].lexem = palloc(llen+1);
+       memcpy( st->list[st->cur].lexem, lex, llen);
+       st->list[st->cur].lexem[llen]='\0';
+       st->list[st->cur].type=type;
+       st->cur++;
+   }
+       
+   FunctionCall1(
+       &(prs->end_info),
+       PointerGetDatum(prs->prs)
+   );
+
+   st->len=st->cur;
+   st->cur=0;
+   
+   funcctx->user_fctx = (void*)st;
+   tupdesc = RelationNameGetTupleDesc("tokenout");
+   funcctx->slot = TupleDescGetSlot(tupdesc);
+   funcctx->attinmeta = TupleDescGetAttInMetadata(tupdesc);
+   MemoryContextSwitchTo(oldcontext);
+}
+
+static Datum
+prs_process_call(FuncCallContext  *funcctx) {
+   PrsStorage  *st;
+
+   st=(PrsStorage*)funcctx->user_fctx;
+   if (  st->cur < st->len ) {
+       Datum result;
+       char* values[2];
+       char    tid[16];
+       HeapTuple    tuple;
+
+       values[0]=tid;
+       sprintf(tid,"%d",st->list[st->cur].type);
+       values[1]=st->list[st->cur].lexem;
+       tuple = BuildTupleFromCStrings(funcctx->attinmeta, values);
+       result = TupleGetDatum(funcctx->slot, tuple);
+
+       pfree(values[1]);
+       st->cur++;
+       return result;
+   } else {
+       if ( st->list ) pfree(st->list);
+       pfree(st);
+   }
+   return (Datum)0;
+}
+
+           
+
+PG_FUNCTION_INFO_V1(parse);
+Datum parse(PG_FUNCTION_ARGS);
+Datum
+parse(PG_FUNCTION_ARGS) {
+   FuncCallContext  *funcctx;
+   Datum result;
+
+   if (SRF_IS_FIRSTCALL()) {
+       text *txt = PG_GETARG_TEXT_P(1); 
+       funcctx = SRF_FIRSTCALL_INIT();
+       prs_setup_firstcall(funcctx, PG_GETARG_OID(0),txt );
+       PG_FREE_IF_COPY(txt,1);
+   }
+
+   funcctx = SRF_PERCALL_SETUP();
+
+   if (  (result=prs_process_call(funcctx)) != (Datum)0 )
+       SRF_RETURN_NEXT(funcctx, result);
+   SRF_RETURN_DONE(funcctx);
+}
+
+PG_FUNCTION_INFO_V1(parse_byname);
+Datum parse_byname(PG_FUNCTION_ARGS);
+Datum
+parse_byname(PG_FUNCTION_ARGS) {
+   FuncCallContext  *funcctx;
+   Datum result;
+
+   if (SRF_IS_FIRSTCALL()) {
+       text *name = PG_GETARG_TEXT_P(0); 
+       text *txt = PG_GETARG_TEXT_P(1); 
+       funcctx = SRF_FIRSTCALL_INIT();
+       prs_setup_firstcall(funcctx, name2id_prs( name ),txt );
+       PG_FREE_IF_COPY(name,0);
+       PG_FREE_IF_COPY(txt,1);
+   }
+
+   funcctx = SRF_PERCALL_SETUP();
+
+   if (  (result=prs_process_call(funcctx)) != (Datum)0 )
+       SRF_RETURN_NEXT(funcctx, result);
+   SRF_RETURN_DONE(funcctx);
+}
+
+
+PG_FUNCTION_INFO_V1(parse_current);
+Datum parse_current(PG_FUNCTION_ARGS);
+Datum
+parse_current(PG_FUNCTION_ARGS) {
+   FuncCallContext  *funcctx;
+   Datum result;
+
+   if (SRF_IS_FIRSTCALL()) {
+       text *txt = PG_GETARG_TEXT_P(0); 
+       funcctx = SRF_FIRSTCALL_INIT();
+       if ( current_parser_id==InvalidOid ) 
+           current_parser_id = name2id_prs( char2text("default") );
+       prs_setup_firstcall(funcctx, current_parser_id,txt );
+       PG_FREE_IF_COPY(txt,0);
+   }
+
+   funcctx = SRF_PERCALL_SETUP();
+
+   if (  (result=prs_process_call(funcctx)) != (Datum)0 )
+       SRF_RETURN_NEXT(funcctx, result);
+   SRF_RETURN_DONE(funcctx);
+}
+
+PG_FUNCTION_INFO_V1(headline);
+Datum headline(PG_FUNCTION_ARGS);
+Datum
+headline(PG_FUNCTION_ARGS) {
+   TSCfgInfo *cfg=findcfg(PG_GETARG_OID(0));
+   text       *in = PG_GETARG_TEXT_P(1);
+   QUERYTYPE  *query = (QUERYTYPE *) DatumGetPointer(PG_DETOAST_DATUM(PG_GETARG_DATUM(2)));
+   text       *opt=( PG_NARGS()>3 && PG_GETARG_POINTER(3) ) ? PG_GETARG_TEXT_P(3) : NULL;
+   HLPRSTEXT   prs;
+   text *out;
+   WParserInfo *prsobj = findprs(cfg->prs_id);
+
+   memset(&prs,0,sizeof(HLPRSTEXT));
+   prs.lenwords = 32;
+   prs.words = (HLWORD *) palloc(sizeof(HLWORD) * prs.lenwords);
+   hlparsetext(cfg, &prs, query, VARDATA(in), VARSIZE(in) - VARHDRSZ);
+
+
+   FunctionCall3(
+       &(prsobj->headline_info),
+       PointerGetDatum(&prs),
+       PointerGetDatum(opt),
+       PointerGetDatum(query)
+   );
+
+   out = genhl(&prs);
+
+   PG_FREE_IF_COPY(in,1);
+   PG_FREE_IF_COPY(query,2);
+   if ( opt ) PG_FREE_IF_COPY(opt,3);
+   pfree(prs.words);
+   pfree(prs.startsel);
+   pfree(prs.stopsel);
+
+   PG_RETURN_POINTER(out);
+}
+
+
+PG_FUNCTION_INFO_V1(headline_byname);
+Datum headline_byname(PG_FUNCTION_ARGS);
+Datum
+headline_byname(PG_FUNCTION_ARGS) {
+   text *cfg=PG_GETARG_TEXT_P(0);
+
+   Datum out=DirectFunctionCall4(
+       headline,
+       ObjectIdGetDatum(name2id_cfg( cfg ) ),
+       PG_GETARG_DATUM(1),
+       PG_GETARG_DATUM(2),
+       ( PG_NARGS()>3 ) ? PG_GETARG_DATUM(3) : PointerGetDatum(NULL)
+   );
+
+   PG_FREE_IF_COPY(cfg,0);
+   PG_RETURN_DATUM(out);   
+}
+
+PG_FUNCTION_INFO_V1(headline_current);
+Datum headline_current(PG_FUNCTION_ARGS);
+Datum
+headline_current(PG_FUNCTION_ARGS) {
+   PG_RETURN_DATUM(DirectFunctionCall4(
+       headline,
+       ObjectIdGetDatum(get_currcfg()),
+       PG_GETARG_DATUM(0),
+       PG_GETARG_DATUM(1),
+       ( PG_NARGS()>2 ) ? PG_GETARG_DATUM(2) : PointerGetDatum(NULL)
+   ));
+}
+
+
+


diff --git a/contrib/tsearch2/wparser.h b/contrib/tsearch2/wparser.h

new file mode 100644 (file)

index 0000000..a8afc56


--- /dev/null
+++ b/contrib/tsearch2/wparser.h
@@ -0,0 +1,28 @@
+#ifndef __WPARSER_H__
+#define __WPARSER_H__
+#include "postgres.h"
+#include "fmgr.h"
+
+typedef struct {
+   Oid prs_id;
+   FmgrInfo start_info;
+   FmgrInfo getlexeme_info;
+   FmgrInfo end_info;
+   FmgrInfo headline_info;
+   Oid lextype;
+   void *prs;
+} WParserInfo;
+
+void init_prs(Oid id, WParserInfo *prs);
+WParserInfo* findprs(Oid id);
+Oid name2id_prs(text *name);
+void   reset_prs(void);
+
+
+typedef struct {
+   int lexid;
+   char    *alias;
+   char    *descr;
+} LexDescr;
+
+#endif


diff --git a/contrib/tsearch2/wparser_def.c b/contrib/tsearch2/wparser_def.c

new file mode 100644 (file)

index 0000000..eec8b03


--- /dev/null
+++ b/contrib/tsearch2/wparser_def.c
@@ -0,0 +1,291 @@
+/* 
+ * default word parser 
+ * Teodor Sigaev 
+ */
+#include 
+#include 
+#include 
+
+#include "postgres.h"
+#include "utils/builtins.h"
+
+#include "dict.h"
+#include "wparser.h"
+#include "common.h"
+#include "ts_cfg.h"
+#include "wordparser/parser.h"
+#include "wordparser/deflex.h"
+
+PG_FUNCTION_INFO_V1(prsd_lextype);
+Datum prsd_lextype(PG_FUNCTION_ARGS);
+
+Datum 
+prsd_lextype(PG_FUNCTION_ARGS) {
+   LexDescr *descr=(LexDescr*)palloc(sizeof(LexDescr)*(LASTNUM+1));
+   int i;
+
+   for(i=1;i<=LASTNUM;i++) {
+       descr[i-1].lexid = i;
+       descr[i-1].alias = pstrdup(tok_alias[i]);
+       descr[i-1].descr = pstrdup(lex_descr[i]);
+   }
+   
+   descr[LASTNUM].lexid=0;
+       
+   PG_RETURN_POINTER(descr);
+}
+
+PG_FUNCTION_INFO_V1(prsd_start);
+Datum prsd_start(PG_FUNCTION_ARGS);
+Datum 
+prsd_start(PG_FUNCTION_ARGS) {
+   start_parse_str( (char*)PG_GETARG_POINTER(0), PG_GETARG_INT32(1) );
+   PG_RETURN_POINTER(NULL);
+}
+
+PG_FUNCTION_INFO_V1(prsd_getlexeme);
+Datum prsd_getlexeme(PG_FUNCTION_ARGS);
+Datum 
+prsd_getlexeme(PG_FUNCTION_ARGS) {
+   /* ParserState *p=(ParserState*)PG_GETARG_POINTER(0); */
+   char **t=(char**)PG_GETARG_POINTER(1); 
+   int *tlen=(int*)PG_GETARG_POINTER(2);
+   int  type=tsearch2_yylex();
+
+   *t = token;
+   *tlen = tokenlen;
+   PG_RETURN_INT32(type);
+}
+
+PG_FUNCTION_INFO_V1(prsd_end);
+Datum prsd_end(PG_FUNCTION_ARGS);
+Datum 
+prsd_end(PG_FUNCTION_ARGS) {
+   /* ParserState *p=(ParserState*)PG_GETARG_POINTER(0); */
+   end_parse();
+   PG_RETURN_VOID();
+}
+
+#define LEAVETOKEN(x)  ( (x)==12 )
+#define COMPLEXTOKEN(x)    ( (x)==5 || (x)==15 || (x)==16 || (x)==17 )
+#define ENDPUNCTOKEN(x)    ( (x)==12 )
+
+
+#define IDIGNORE(x) ( (x)==13 || (x)==14 || (x)==12 || (x)==23 )
+#define HLIDIGNORE(x) ( (x)==5 || (x)==13 || (x)==15 || (x)==16 || (x)==17 )
+#define NONWORDTOKEN(x)    ( (x)==12 || HLIDIGNORE(x) )
+#define NOENDTOKEN(x)  ( NONWORDTOKEN(x) || (x)==7 || (x)==8 || (x)==20 || (x)==21 || (x)==22 || IDIGNORE(x) )
+
+typedef struct {
+   HLWORD  *words;
+   int len;
+} hlCheck;
+
+static bool
+checkcondition_HL(void *checkval, ITEM *val) {
+   int i;
+   for(i=0;i<((hlCheck*)checkval)->len;i++) {
+       if ( ((hlCheck*)checkval)->words[i].item==val )
+           return true;
+   }
+   return false;
+}
+
+
+static bool
+hlCover(HLPRSTEXT *prs, QUERYTYPE *query, int *p, int *q) {
+   int i,j;
+   ITEM    *item=GETQUERY(query);
+   int pos=*p;
+   *q=0;
+   *p=0x7fffffff;
+
+   for(j=0;jsize;j++) {
+       if ( item->type != VAL ) {
+           item++;
+           continue;
+       }
+       for(i=pos;icurwords;i++) {
+           if ( prs->words[i].item == item ) {
+               if ( i>*q) 
+                   *q = i;
+               break;
+           }
+       }
+       item++;
+   }
+
+   if ( *q==0 )
+       return false;
+
+   item=GETQUERY(query);
+   for(j=0;jsize;j++) {
+       if ( item->type != VAL ) {
+           item++;
+           continue;
+       }
+       for(i=*q;i>=pos;i--) {
+           if ( prs->words[i].item == item ) {
+               if ( i<*p )
+                   *p=i;
+               break;
+           }
+       }
+       item++;
+   }   
+
+   if ( *p<=*q ) {
+       hlCheck ch={ &(prs->words[*p]), *q-*p+1 };
+       if ( TS_execute(GETQUERY(query), &ch, false, checkcondition_HL) ) { 
+           return true;
+       } else {
+           (*p)++;
+           return hlCover(prs,query,p,q);
+       }
+   }
+
+   return false;
+}
+
+PG_FUNCTION_INFO_V1(prsd_headline);
+Datum prsd_headline(PG_FUNCTION_ARGS);
+Datum 
+prsd_headline(PG_FUNCTION_ARGS) {
+   HLPRSTEXT   *prs=(HLPRSTEXT*)PG_GETARG_POINTER(0);
+   text    *opt=(text*)PG_GETARG_POINTER(1); /* can't be toasted */
+   QUERYTYPE   *query=(QUERYTYPE*)PG_GETARG_POINTER(2); /* can't be toasted */
+   /* from opt + start and and tag */
+   int min_words=15;   
+   int max_words=35;   
+   int shortword=3;    
+
+   int p=0,q=0;
+   int bestb=-1,beste=-1;
+   int bestlen=-1;
+   int pose=0, poslen, curlen;
+
+   int i;
+
+   /*config*/
+   prs->startsel=NULL;
+   prs->stopsel=NULL;
+   if ( opt ) {
+       Map *map,*mptr;
+       
+       parse_cfgdict(opt,&map);
+       mptr=map;
+
+       while(mptr && mptr->key) {
+           if ( strcasecmp(mptr->key,"MaxWords")==0 )
+               max_words=pg_atoi(mptr->value,4,1);
+           else if ( strcasecmp(mptr->key,"MinWords")==0 )
+               min_words=pg_atoi(mptr->value,4,1);
+           else if ( strcasecmp(mptr->key,"ShortWord")==0 )
+               shortword=pg_atoi(mptr->value,4,1);
+           else if ( strcasecmp(mptr->key,"StartSel")==0 )
+               prs->startsel=pstrdup(mptr->value);
+           else if ( strcasecmp(mptr->key,"StopSel")==0 )
+               prs->stopsel=pstrdup(mptr->value);
+               
+           pfree(mptr->key);
+           pfree(mptr->value);
+
+           mptr++;
+       }
+       pfree(map);
+
+       if ( min_words >= max_words )
+           elog(ERROR,"Must be MinWords < MaxWords");
+       if ( min_words<=0 )
+           elog(ERROR,"Must be MinWords > 0");
+       if ( shortword<0 )
+           elog(ERROR,"Must be ShortWord >= 0");
+   }
+
+   while( hlCover(prs,query,&p,&q) ) {
+       /* find cover len in words */
+       curlen=0;
+       poslen=0;
+       for(i=p;i<=q && curlen < max_words ; i++) {
+           if ( !NONWORDTOKEN(prs->words[i].type) ) 
+               curlen++;
+           if ( prs->words[i].item && !prs->words[i].repeated )
+               poslen++; 
+           pose=i;
+       }
+
+       if ( poslenwords[beste].type) || prs->words[beste].len <= shortword) ) { 
+           /* best already finded, so try one more cover */
+           p++;
+           continue;
+       }
+
+       if ( curlen < max_words ) { /* find good end */
+           for(i=i-1 ;icurwords && curlen
+               if ( i!=q ) {
+                   if ( !NONWORDTOKEN(prs->words[i].type) ) 
+                       curlen++;
+                   if ( prs->words[i].item && !prs->words[i].repeated )
+                       poslen++;
+               }
+               pose=i;
+               if ( NOENDTOKEN(prs->words[i].type) || prs->words[i].len <= shortword ) 
+                   continue;
+               if ( curlen>=min_words )    
+                   break;
+           }
+       } else { /* shorter cover :((( */
+           for(;curlen>min_words;i--) {
+               if ( !NONWORDTOKEN(prs->words[i].type) ) 
+                   curlen--;
+               if ( prs->words[i].item && !prs->words[i].repeated )
+                   poslen--;
+               pose=i;
+               if ( NOENDTOKEN(prs->words[i].type) || prs->words[i].len <= shortword ) 
+                   continue;
+               break;
+           }
+       }
+
+       if ( bestlen <0 || (poslen>bestlen && !(NOENDTOKEN(prs->words[pose].type) || prs->words[pose].len <= shortword)) || 
+               ( bestlen>=0 && !(NOENDTOKEN(prs->words[pose].type)  || prs->words[pose].len <= shortword) && 
+                   (NOENDTOKEN(prs->words[beste].type) || prs->words[beste].len <= shortword) ) ) {
+           bestb=p; beste=pose;
+           bestlen=poslen;
+       } 
+
+       p++;
+   }
+
+   if ( bestlen<0 ) {
+       curlen=0;
+       poslen=0;
+       for(i=0;icurwords && curlen
+           if ( !NONWORDTOKEN(prs->words[i].type) ) 
+               curlen++;
+           pose=i;
+       }
+       bestb=0; beste=pose;
+   }
+
+   for(i=bestb;i<=beste;i++) {
+       if ( prs->words[i].item )
+           prs->words[i].selected=1;
+       if ( prs->words[i].repeated )
+           prs->words[i].skip=1;
+       if ( HLIDIGNORE(prs->words[i].type) )
+           prs->words[i].replace=1;
+
+       prs->words[i].in=1;
+   }
+
+   if (!prs->startsel)
+       prs->startsel=pstrdup("");

+   if (!prs->stopsel)
+       prs->stopsel=pstrdup("");
+        prs->startsellen=strlen(prs->startsel);
+   prs->stopsellen=strlen(prs->stopsel);
+
+   PG_RETURN_POINTER(prs);
+}
+




This is the main PostgreSQL git repository.
RSS
Atom
+           if ( !pos[k] ) continue;
+           lenct = *(uint16*)(pos[k]);
+           ct = (WordEntryPos*)(pos[k]+1);
+           for(l=0; l
+               for(p=0; p
+                   dist = abs( post[l].pos - ct[p].pos );
+                   if ( dist || (dist==0 && (pos[i]==(uint16*)POSNULL || pos[k]==(uint16*)POSNULL) ) ) {
+                       float curw; 
+                       if ( !dist ) dist=MAXENTRYPOS;  
+                       curw= sqrt( wpos(&(post[l])) * wpos( &(ct[p]) ) * word_distance(dist) );
+                       res = ( res < 0 ) ? curw : 1.0 - ( 1.0 - res ) * ( 1.0 - curw );
+                   }
+               }
+           }
+       }
+   }
+   pfree(pos);
+   return res; 
+}
+
+static float
+calc_rank_or(float *w, tsvector *t, QUERYTYPE *q) {
+   WordEntry *entry;
+   WordEntryPos    *post;
+   int4    dimt,j,i;
+   float res=-1.0;
+   ITEM    *item=GETQUERY(q);
+
+   *(uint16*)POSNULL = lengthof(POSNULL)-1;
+
+   for(i=0; isize; i++) {
+       if ( item[i].type != VAL )
+           continue;
+
+       entry=find_wordentry(t,q,&(item[i]));
+       if ( !entry )
+           continue;
+
+       if ( entry->haspos ) {
+           dimt = POSDATALEN(t,entry);
+           post = POSDATAPTR(t,entry);
+       } else {
+           dimt = *(uint16*)POSNULL;
+           post = POSNULL+1;
+       }
+
+       for(j=0;j
+           if ( res < 0 )
+               res = wpos( &(post[j]) );
+           else
+               res = 1.0 - ( 1.0-res ) * ( 1.0-wpos( &(post[j]) ) );
+       }
+   }
+   return res;
+}
+
+static float
+calc_rank(float *w, tsvector *t, QUERYTYPE *q, int4 method) {
+   ITEM *item = GETQUERY(q);
+   float res=0.0;
+
+   if (!t->size || !q->size)
+       return 0.0;
+
+   res = ( item->type != VAL && item->val == (int4) '&' ) ?
+       calc_rank_and(w,t,q) : calc_rank_or(w,t,q);
+
+   if ( res < 0 )
+       res = 1e-20;
+
+        switch(method) {
+       case 0: break;
+       case 1: res /= log((float)cnt_length(t)); break;
+       case 2: res /= (float)cnt_length(t); break;
+       default:
+       elog(ERROR,"Unknown normalization method: %d",method);
+        }
+
+   return res;
+}
+
+Datum
+rank(PG_FUNCTION_ARGS) {
+   ArrayType  *win = (ArrayType *) PG_DETOAST_DATUM(PG_GETARG_DATUM(0));
+   tsvector       *txt = (tsvector *) PG_DETOAST_DATUM(PG_GETARG_DATUM(1));
+   QUERYTYPE  *query = (QUERYTYPE *) PG_DETOAST_DATUM(PG_GETARG_DATUM(2));
+   int method=DEF_NORM_METHOD;
+   float res=0.0;
+   float ws[ lengthof(weights) ];
+   int i;
+
+   if ( ARR_NDIM(win) != 1 ) 
+       elog(ERROR,"Array of weight is not one dimentional");
+   if ( ARRNELEMS(win) < lengthof(weights) )
+        elog(ERROR,"Array of weight is too short");
+
+   for(i=0;i
+       ws[ i ] = ( ((float4*)ARR_DATA_PTR(win))[i] >= 0 ) ? ((float4*)ARR_DATA_PTR(win))[i] : weights[i];
+       if ( ws[ i ] > 1.0 ) 
+           elog(ERROR,"Weight out of range");
+   } 
+
+   if ( PG_NARGS() == 4 )
+       method=PG_GETARG_INT32(3);
+
+   res=calc_rank(ws, txt, query, method); 
+       
+   PG_FREE_IF_COPY(win, 0);
+   PG_FREE_IF_COPY(txt, 1);
+   PG_FREE_IF_COPY(query, 2);
+   PG_RETURN_FLOAT4(res);
+}
+
+Datum
+rank_def(PG_FUNCTION_ARGS) {
+   tsvector       *txt = (tsvector *) PG_DETOAST_DATUM(PG_GETARG_DATUM(0));
+   QUERYTYPE  *query = (QUERYTYPE *) PG_DETOAST_DATUM(PG_GETARG_DATUM(1));
+   float res=0.0;
+   int method=DEF_NORM_METHOD;
+
+   if ( PG_NARGS() == 3 )
+       method=PG_GETARG_INT32(2);
+
+   res=calc_rank(weights, txt, query, method); 
+       
+   PG_FREE_IF_COPY(txt, 0);
+   PG_FREE_IF_COPY(query, 1);
+   PG_RETURN_FLOAT4(res);
+}
+
+
+typedef struct {
+   ITEM    *item;
+   int32   pos;
+} DocRepresentation;
+
+static int
+compareDocR(const void *a, const void *b) {
+   if ( ((DocRepresentation *) a)->pos == ((DocRepresentation *) b)->pos )
+       return 1;
+   return ( ((DocRepresentation *) a)->pos > ((DocRepresentation *) b)->pos ) ? 1 : -1;
+}
+
+
+typedef struct {
+   DocRepresentation *doc;
+   int len;
+}  ChkDocR;
+
+static bool
+checkcondition_DR(void *checkval, ITEM *val) {
+   DocRepresentation *ptr = ((ChkDocR*)checkval)->doc;
+
+   while( ptr - ((ChkDocR*)checkval)->doc < ((ChkDocR*)checkval)->len ) {
+       if ( val == ptr->item )
+           return true;
+       ptr++;
+   }   
+
+   return false;
+}
+
+
+static bool
+Cover(DocRepresentation *doc, int len, QUERYTYPE *query, int *pos, int *p, int *q) {
+   int i;
+   DocRepresentation   *ptr,*f=(DocRepresentation*)0xffffffff;
+   ITEM    *item=GETQUERY(query);
+   int lastpos=*pos;
+   int oldq=*q;
+
+   *p=0x7fffffff;
+   *q=0;
+
+   for(i=0; isize; i++) {
+       if ( item->type != VAL ) {
+           item++;
+           continue;
+       }
+       ptr = doc + *pos;
+
+       while(ptr-doc
+           if ( ptr->item == item ) {
+               if ( ptr->pos > *q ) {
+                   *q = ptr->pos;
+                   lastpos= ptr - doc;
+               } 
+               break;
+           } 
+           ptr++;
+       }
+
+       item++;
+   }
+
+   if (*q==0 )
+       return false;
+
+   if (*q==oldq) { /* already check this pos */
+       (*pos)++;
+       return Cover(doc, len, query, pos,p,q);
+   } 
+
+   item=GETQUERY(query);
+   for(i=0; isize; i++) {
+       if ( item->type != VAL ) {
+           item++;
+           continue;
+       }
+       ptr = doc + lastpos;
+
+       while(ptr>=doc+*pos) {
+           if ( ptr->item == item ) {
+               if ( ptr->pos < *p ) {
+                   *p = ptr->pos;
+                   f=ptr;
+               }
+               break;
+           }
+           ptr--;
+       }
+       item++;
+   }
+ 
+   if ( *p<=*q ) {
+       ChkDocR ch = { f, (doc + lastpos)-f+1 };
+       *pos = f-doc+1;
+       if ( TS_execute(GETQUERY(query), &ch, false, checkcondition_DR) ) { 
+ /*elog(NOTICE,"OP:%d NP:%d P:%d Q:%d", *pos, lastpos, *p, *q);*/ 
+           return true;
+       } else
+           return Cover(doc, len, query, pos,p,q); 
+   }
+ 
+   return false;
+}
+
+static DocRepresentation*
+get_docrep(tsvector     *txt, QUERYTYPE  *query, int *doclen) {
+   ITEM    *item=GETQUERY(query);
+   WordEntry *entry;
+   WordEntryPos    *post;
+   int4    dimt,j,i;
+   int len=query->size*4,cur=0;
+   DocRepresentation *doc;
+
+   *(uint16*)POSNULL = lengthof(POSNULL)-1;
+   doc = (DocRepresentation*)palloc(sizeof(DocRepresentation)*len);
+   for(i=0; isize; i++) {
+       if ( item[i].type != VAL )
+           continue;
+
+       entry=find_wordentry(txt,query,&(item[i]));
+       if ( !entry )
+           continue;
+
+       if ( entry->haspos ) {
+           dimt = POSDATALEN(txt,entry);
+           post = POSDATAPTR(txt,entry);
+       } else {
+           dimt = *(uint16*)POSNULL;
+           post = POSNULL+1;
+       }
+
+       while( cur+dimt >= len ) {
+           len*=2;
+           doc = (DocRepresentation*)repalloc(doc,sizeof(DocRepresentation)*len);
+       }
+
+       for(j=0;j
+           doc[cur].item=&(item[i]);
+           doc[cur].pos=post[j].pos;
+           cur++;
+       }
+   }
+
+   *doclen=cur;
+   
+   if ( cur>0 ) {
+       if ( cur>1 ) 
+           qsort((void *) doc, cur, sizeof(DocRepresentation), compareDocR);
+       return doc;
+   }
+   
+   pfree(doc);
+   return NULL;
+}
+
+
+Datum
+rank_cd(PG_FUNCTION_ARGS) {
+   int K = PG_GETARG_INT32(0);
+   tsvector       *txt = (tsvector *) PG_DETOAST_DATUM(PG_GETARG_DATUM(1));
+   QUERYTYPE  *query = (QUERYTYPE *) PG_DETOAST_DATUM(PG_GETARG_DATUM(2));
+   int method=DEF_NORM_METHOD;
+   DocRepresentation   *doc;
+   float   res=0.0;
+   int p=0,q=0,len,cur;
+
+   doc = get_docrep(txt, query, &len);
+   if ( !doc ) {
+       PG_FREE_IF_COPY(txt, 1);
+       PG_FREE_IF_COPY(query, 2);
+       PG_RETURN_FLOAT4(0.0);
+   }
+
+   cur=0;
+   if (K<=0)
+       K=4;    
+   while( Cover(doc, len, query, &cur, &p, &q) ) 
+       res += ( q-p+1 > K ) ? ((float)K)/((float)(q-p+1)) : 1.0;
+
+   if ( PG_NARGS() == 4 )
+       method=PG_GETARG_INT32(3);
+
+        switch(method) {
+       case 0: break;
+       case 1: res /= log((float)cnt_length(txt)); break;
+       case 2: res /= (float)cnt_length(txt); break;
+       default:
+       elog(ERROR,"Unknown normalization method: %d",method);
+        }
+
+   pfree(doc);
+   PG_FREE_IF_COPY(txt, 1);
+   PG_FREE_IF_COPY(query, 2);
+
+   PG_RETURN_FLOAT4(res);
+}
+
+
+Datum
+rank_cd_def(PG_FUNCTION_ARGS) {
+   PG_RETURN_DATUM( DirectFunctionCall4(   
+       rank_cd,
+       Int32GetDatum(-1),
+       PG_GETARG_DATUM(0),
+       PG_GETARG_DATUM(1),
+       ( PG_NARGS() == 3 ) ? PG_GETARG_DATUM(2) : Int32GetDatum(DEF_NORM_METHOD)
+   )); 
+}
+
+/**************debug*************/
+
+typedef struct {
+   char    *w;
+   int2    len;
+   int2    pos;
+   int2    start;
+   int2    finish;
+} DocWord;
+
+static int
+compareDocWord(const void *a, const void *b) {
+   if ( ((DocWord *) a)->pos == ((DocWord *) b)->pos )
+       return 1;
+   return ( ((DocWord *) a)->pos > ((DocWord *) b)->pos ) ? 1 : -1;
+}
+
+
+Datum 
+get_covers(PG_FUNCTION_ARGS) {
+   tsvector     *txt = (tsvector *) PG_DETOAST_DATUM(PG_GETARG_DATUM(0));
+   QUERYTYPE  *query = (QUERYTYPE *) PG_DETOAST_DATUM(PG_GETARG_DATUM(1));
+   WordEntry       *pptr=ARRPTR(txt);
+   int i,dlen=0,j,cur=0,len=0,rlen;
+   DocWord *dw,*dwptr;
+   text    *out;
+   char *cptr;
+   DocRepresentation *doc;
+   int pos=0,p,q,olddwpos=0;
+   int ncover=1;
+
+   doc = get_docrep(txt, query, &rlen);
+
+   if ( !doc ) {
+       out=palloc(VARHDRSZ);
+       VARATT_SIZEP(out) = VARHDRSZ;
+       PG_FREE_IF_COPY(txt,0);
+       PG_FREE_IF_COPY(query,1);
+       PG_RETURN_POINTER(out);
+   }
+
+   for(i=0;isize;i++) {
+       if (!pptr[i].haspos)
+           elog(ERROR,"No pos info");
+        dlen += POSDATALEN(txt,&(pptr[i]));
+   }
+
+   dwptr=dw=palloc(sizeof(DocWord)*dlen);
+   memset(dw,0,sizeof(DocWord)*dlen);
+
+   for(i=0;isize;i++) {
+       WordEntryPos    *posdata = POSDATAPTR(txt,&(pptr[i]));
+       for(j=0;j
+           dw[cur].w=STRPTR(txt)+pptr[i].pos;  
+           dw[cur].len=pptr[i].len;    
+           dw[cur].pos=posdata[j].pos;
+           cur++;
+       }
+       len+=(pptr[i].len + 1) * (int)POSDATALEN(txt,&(pptr[i]));
+   }
+   qsort((void *) dw, dlen, sizeof(DocWord), compareDocWord);
+
+   while( Cover(doc, rlen, query, &pos, &p, &q) ) {
+       dwptr=dw+olddwpos;
+       while(dwptr->pos < p && dwptr-dw
+           dwptr++;
+       olddwpos=dwptr-dw;
+       dwptr->start=ncover;
+       while(dwptr->pos < q+1 && dwptr-dw
+           dwptr++;
+       (dwptr-1)->finish=ncover;
+       len+= 4 /* {}+two spaces */ + 2*16 /*numbers*/;
+       ncover++; 
+   } 
+   
+   out=palloc(VARHDRSZ+len);
+   cptr=((char*)out)+VARHDRSZ;
+   dwptr=dw;
+
+   while( dwptr-dw < dlen) {
+       if ( dwptr->start ) {
+           sprintf(cptr,"{%d ",dwptr->start);
+           cptr=strchr(cptr,'\0');
+       }
+       memcpy(cptr,dwptr->w,dwptr->len);
+       cptr+=dwptr->len;
+       *cptr=' ';
+       cptr++;
+       if ( dwptr->finish ) { 
+           sprintf(cptr,"}%d ",dwptr->finish);
+           cptr=strchr(cptr,'\0');
+       }
+       dwptr++;
+   }   
+
+   VARATT_SIZEP(out) = cptr - ((char*)out);
+   
+   pfree(dw);
+   pfree(doc);
+
+   PG_FREE_IF_COPY(txt,0);
+   PG_FREE_IF_COPY(query,1);
+   PG_RETURN_POINTER(out);
+}
+


diff --git a/contrib/tsearch2/rewrite.c b/contrib/tsearch2/rewrite.c

new file mode 100644 (file)

index 0000000..d5bc0f6


--- /dev/null
+++ b/contrib/tsearch2/rewrite.c
@@ -0,0 +1,292 @@
+/*
+ * Rewrite routines of query tree
+ * Teodor Sigaev 
+ */
+
+#include "postgres.h"
+
+#include 
+
+#include "access/gist.h"
+#include "access/itup.h"
+#include "access/rtree.h"
+#include "utils/elog.h"
+#include "utils/palloc.h"
+#include "utils/array.h"
+#include "utils/builtins.h"
+#include "storage/bufpage.h"
+
+#include "query.h"
+#include "rewrite.h"
+
+typedef struct NODE
+{
+   struct NODE *left;
+   struct NODE *right;
+   ITEM       *valnode;
+}  NODE;
+
+/*
+ * make query tree from plain view of query
+ */
+static NODE *
+maketree(ITEM * in)
+{
+   NODE       *node = (NODE *) palloc(sizeof(NODE));
+
+   node->valnode = in;
+   node->right = node->left = NULL;
+   if (in->type == OPR)
+   {
+       node->right = maketree(in + 1);
+       if (in->val != (int4) '!')
+           node->left = maketree(in + in->left);
+   }
+   return node;
+}
+
+typedef struct
+{
+   ITEM       *ptr;
+   int4        len;
+   int4        cur;
+}  PLAINTREE;
+
+static void
+plainnode(PLAINTREE * state, NODE * node)
+{
+   if (state->cur == state->len)
+   {
+       state->len *= 2;
+       state->ptr = (ITEM *) repalloc((void *) state->ptr, state->len * sizeof(ITEM));
+   }
+   memcpy((void *) &(state->ptr[state->cur]), (void *) node->valnode, sizeof(ITEM));
+   if (node->valnode->type == VAL)
+       state->cur++;
+   else if (node->valnode->val == (int4) '!')
+   {
+       state->ptr[state->cur].left = 1;
+       state->cur++;
+       plainnode(state, node->right);
+   }
+   else
+   {
+       int4        cur = state->cur;
+
+       state->cur++;
+       plainnode(state, node->right);
+       state->ptr[cur].left = state->cur - cur;
+       plainnode(state, node->left);
+   }
+   pfree(node);
+}
+
+/*
+ * make plain view of tree from 'normal' view of tree
+ */
+static ITEM *
+plaintree(NODE * root, int4 *len)
+{
+   PLAINTREE   pl;
+
+   pl.cur = 0;
+   pl.len = 16;
+   if (root && (root->valnode->type == VAL || root->valnode->type == OPR))
+   {
+       pl.ptr = (ITEM *) palloc(pl.len * sizeof(ITEM));
+       plainnode(&pl, root);
+   }
+   else
+       pl.ptr = NULL;
+   *len = pl.cur;
+   return pl.ptr;
+}
+
+static void
+freetree(NODE * node)
+{
+   if (!node)
+       return;
+   if (node->left)
+       freetree(node->left);
+   if (node->right)
+       freetree(node->right);
+   pfree(node);
+}
+
+/*
+ * clean tree for ! operator.
+ * It's usefull for debug, but in
+ * other case, such view is used with search in index.
+ * Operator ! always return TRUE
+ */
+static NODE *
+clean_NOT_intree(NODE * node)
+{
+   if (node->valnode->type == VAL)
+       return node;
+
+   if (node->valnode->val == (int4) '!')
+   {
+       freetree(node);
+       return NULL;
+   }
+
+   /* operator & or | */
+   if (node->valnode->val == (int4) '|')
+   {
+       if ((node->left = clean_NOT_intree(node->left)) == NULL ||
+           (node->right = clean_NOT_intree(node->right)) == NULL)
+       {
+           freetree(node);
+           return NULL;
+       }
+   }
+   else
+   {
+       NODE       *res = node;
+
+       node->left = clean_NOT_intree(node->left);
+       node->right = clean_NOT_intree(node->right);
+       if (node->left == NULL && node->right == NULL)
+       {
+           pfree(node);
+           res = NULL;
+       }
+       else if (node->left == NULL)
+       {
+           res = node->right;
+           pfree(node);
+       }
+       else if (node->right == NULL)
+       {
+           res = node->left;
+           pfree(node);
+       }
+       return res;
+   }
+   return node;
+}
+
+ITEM *
+clean_NOT_v2(ITEM * ptr, int4 *len)
+{
+   NODE       *root = maketree(ptr);
+
+   return plaintree(clean_NOT_intree(root), len);
+}
+
+#define V_UNKNOWN  0
+#define V_TRUE     1
+#define V_FALSE        2
+
+/*
+ * Clean query tree from values which is always in
+ * text (stopword)
+ */
+static NODE *
+clean_fakeval_intree(NODE * node, char *result)
+{
+   char        lresult = V_UNKNOWN,
+               rresult = V_UNKNOWN;
+
+   if (node->valnode->type == VAL)
+       return node;
+   else if (node->valnode->type == VALTRUE)
+   {
+       pfree(node);
+       *result = V_TRUE;
+       return NULL;
+   }
+
+
+   if (node->valnode->val == (int4) '!')
+   {
+       node->right = clean_fakeval_intree(node->right, &rresult);
+       if (!node->right)
+       {
+           *result = (rresult == V_TRUE) ? V_FALSE : V_TRUE;
+           freetree(node);
+           return NULL;
+       }
+   }
+   else if (node->valnode->val == (int4) '|')
+   {
+       NODE       *res = node;
+
+       node->left = clean_fakeval_intree(node->left, &lresult);
+       node->right = clean_fakeval_intree(node->right, &rresult);
+       if (lresult == V_TRUE || rresult == V_TRUE)
+       {
+           freetree(node);
+           *result = V_TRUE;
+           return NULL;
+       }
+       else if (lresult == V_FALSE && rresult == V_FALSE)
+       {
+           freetree(node);
+           *result = V_FALSE;
+           return NULL;
+       }
+       else if (lresult == V_FALSE)
+       {
+           res = node->right;
+           pfree(node);
+       }
+       else if (rresult == V_FALSE)
+       {
+           res = node->left;
+           pfree(node);
+       }
+       return res;
+   }
+   else
+   {
+       NODE       *res = node;
+
+       node->left = clean_fakeval_intree(node->left, &lresult);
+       node->right = clean_fakeval_intree(node->right, &rresult);
+       if (lresult == V_FALSE || rresult == V_FALSE)
+       {
+           freetree(node);
+           *result = V_FALSE;
+           return NULL;
+       }
+       else if (lresult == V_TRUE && rresult == V_TRUE)
+       {
+           freetree(node);
+           *result = V_TRUE;
+           return NULL;
+       }
+       else if (lresult == V_TRUE)
+       {
+           res = node->right;
+           pfree(node);
+       }
+       else if (rresult == V_TRUE)
+       {
+           res = node->left;
+           pfree(node);
+       }
+       return res;
+   }
+   return node;
+}
+
+ITEM *
+clean_fakeval_v2(ITEM * ptr, int4 *len)
+{
+   NODE       *root = maketree(ptr);
+   char        result = V_UNKNOWN;
+   NODE       *resroot;
+
+   resroot = clean_fakeval_intree(root, &result);
+   if (result != V_UNKNOWN)
+   {
+       elog(NOTICE, "Query contains only stopword(s) or doesn't contain lexem(s), ignored");
+       *len = 0;
+       return NULL;
+   }
+
+   return plaintree(resroot, len);
+}


diff --git a/contrib/tsearch2/rewrite.h b/contrib/tsearch2/rewrite.h

new file mode 100644 (file)

index 0000000..d47788a


--- /dev/null
+++ b/contrib/tsearch2/rewrite.h
@@ -0,0 +1,7 @@
+#ifndef __REWRITE_H__
+#define __REWRITE_H__
+
+ITEM      *clean_NOT_v2(ITEM * ptr, int4 *len);
+ITEM      *clean_fakeval_v2(ITEM * ptr, int4 *len);
+
+#endif


diff --git a/contrib/tsearch2/snmap.c b/contrib/tsearch2/snmap.c

new file mode 100644 (file)

index 0000000..fe138ad


--- /dev/null
+++ b/contrib/tsearch2/snmap.c
@@ -0,0 +1,75 @@
+/* 
+ * simple but fast map from str to Oid
+ * Teodor Sigaev 
+ */
+#include 
+#include 
+#include 
+
+#include "postgres.h"
+#include "snmap.h"
+#include "common.h"
+
+static int
+compareSNMapEntry(const void *a, const void *b) {
+   return strcmp( ((SNMapEntry*)a)->key, ((SNMapEntry*)b)->key );
+}
+
+void 
+addSNMap( SNMap *map, char *key, Oid value ) {
+   if (map->len>=map->reallen) {
+       SNMapEntry *tmp;
+       int len = (map->reallen) ? 2*map->reallen : 16;
+       tmp=(SNMapEntry*)realloc(map->list, sizeof(SNMapEntry) * len);
+       if ( !tmp )
+           elog(ERROR, "No memory");
+       map->reallen=len;
+       map->list=tmp;
+   }
+   map->list[ map->len ].key = strdup(key);
+   if ( ! map->list[ map->len ].key )
+       elog(ERROR, "No memory");
+   map->list[ map->len ].value=value;
+   map->len++;
+   if ( map->len>1 ) qsort(map->list, map->len, sizeof(SNMapEntry), compareSNMapEntry);
+}
+
+void 
+addSNMap_t( SNMap *map, text *key, Oid value ) {
+   char *k=text2char( key );
+   addSNMap(map, k, value);
+   pfree(k);
+}
+
+Oid 
+findSNMap( SNMap *map, char *key ) {
+   SNMapEntry *ptr;
+   SNMapEntry ks = {key, 0};
+   if ( map->len==0 || !map->list )
+       return 0;   
+   ptr = (SNMapEntry*) bsearch(&ks, map->list, map->len, sizeof(SNMapEntry), compareSNMapEntry);
+   return (ptr) ? ptr->value : 0;
+}
+
+Oid  
+findSNMap_t( SNMap *map, text *key ) {
+   char *k=text2char(key);
+   int res;
+   res= findSNMap(map, k);
+   pfree(k);
+   return res;
+}
+
+void freeSNMap( SNMap *map ) {
+   SNMapEntry *entry=map->list;
+   if ( map->list ) {
+       while( map->len ) {
+           if ( entry->key ) free(entry->key);
+           entry++; map->len--;
+       }
+       free( map->list );
+   }
+   memset(map,0,sizeof(SNMap));
+}
+
+


diff --git a/contrib/tsearch2/snmap.h b/contrib/tsearch2/snmap.h

new file mode 100644 (file)

index 0000000..b485601


--- /dev/null
+++ b/contrib/tsearch2/snmap.h
@@ -0,0 +1,23 @@
+#ifndef __SNMAP_H__
+#define __SNMAP_H__
+
+#include "postgres.h"
+
+typedef struct {
+   char    *key;
+   Oid value;
+} SNMapEntry;
+
+typedef struct {
+   int len;
+   int reallen;
+   SNMapEntry  *list;
+} SNMap;
+
+void addSNMap( SNMap *map, char *key, Oid value );
+void addSNMap_t( SNMap *map, text *key, Oid value );
+Oid findSNMap( SNMap *map, char *key );
+Oid findSNMap_t( SNMap *map, text *key );
+void freeSNMap( SNMap *map );
+
+#endif


diff --git a/contrib/tsearch2/snowball/api.c b/contrib/tsearch2/snowball/api.c

new file mode 100644 (file)

index 0000000..c9019ce


--- /dev/null
+++ b/contrib/tsearch2/snowball/api.c
@@ -0,0 +1,48 @@
+
+#include "header.h"
+
+extern struct SN_env * SN_create_env(int S_size, int I_size, int B_size)
+{   struct SN_env * z = (struct SN_env *) calloc(1, sizeof(struct SN_env));
+    z->p = create_s();
+    if (S_size)
+    {   z->S = (symbol * *) calloc(S_size, sizeof(symbol *));
+        {   int i;
+            for (i = 0; i < S_size; i++) z->S[i] = create_s();
+        }
+        z->S_size = S_size;
+    }
+
+    if (I_size)
+    {   z->I = (int *) calloc(I_size, sizeof(int));
+        z->I_size = I_size;
+    }
+
+    if (B_size)
+    {   z->B = (symbol *) calloc(B_size, sizeof(symbol));
+        z->B_size = B_size;
+    }
+
+    return z;
+}
+
+extern void SN_close_env(struct SN_env * z)
+{
+    if (z->S_size)
+    {
+        {   int i;
+            for (i = 0; i < z->S_size; i++) lose_s(z->S[i]);
+        }
+        free(z->S);
+    }
+    if (z->I_size) free(z->I);
+    if (z->B_size) free(z->B);
+    if (z->p) lose_s(z->p);
+    free(z);
+}
+
+extern void SN_set_current(struct SN_env * z, int size, const symbol * s)
+{
+    replace_s(z, 0, z->l, size, s);
+    z->c = 0;
+}
+


diff --git a/contrib/tsearch2/snowball/api.h b/contrib/tsearch2/snowball/api.h

new file mode 100644 (file)

index 0000000..3e8b6e1


--- /dev/null
+++ b/contrib/tsearch2/snowball/api.h
@@ -0,0 +1,27 @@
+
+typedef unsigned char symbol;
+
+/* Or replace 'char' above with 'short' for 16 bit characters.
+
+   More precisely, replace 'char' with whatever type guarantees the
+   character width you need. Note however that sizeof(symbol) should divide
+   HEAD, defined in header.h as 2*sizeof(int), without remainder, otherwise
+   there is an alignment problem. In the unlikely event of a problem here,
+   consult Martin Porter.
+
+*/
+
+struct SN_env {
+    symbol * p;
+    int c; int a; int l; int lb; int bra; int ket;
+    int S_size; int I_size; int B_size;
+    symbol * * S;
+    int * I;
+    symbol * B;
+};
+
+extern struct SN_env * SN_create_env(int S_size, int I_size, int B_size);
+extern void SN_close_env(struct SN_env * z);
+
+extern void SN_set_current(struct SN_env * z, int size, const symbol * s);
+


diff --git a/contrib/tsearch2/snowball/english_stem.c b/contrib/tsearch2/snowball/english_stem.c

new file mode 100644 (file)

index 0000000..6715c7c


--- /dev/null
+++ b/contrib/tsearch2/snowball/english_stem.c
@@ -0,0 +1,894 @@
+
+/* This file was generated automatically by the Snowball to ANSI C compiler */
+
+#include "header.h"
+
+extern int english_stem(struct SN_env * z);
+static int r_exception2(struct SN_env * z);
+static int r_exception1(struct SN_env * z);
+static int r_Step_5(struct SN_env * z);
+static int r_Step_4(struct SN_env * z);
+static int r_Step_3(struct SN_env * z);
+static int r_Step_2(struct SN_env * z);
+static int r_Step_1c(struct SN_env * z);
+static int r_Step_1b(struct SN_env * z);
+static int r_Step_1a(struct SN_env * z);
+static int r_R2(struct SN_env * z);
+static int r_R1(struct SN_env * z);
+static int r_shortv(struct SN_env * z);
+static int r_mark_regions(struct SN_env * z);
+static int r_postlude(struct SN_env * z);
+static int r_prelude(struct SN_env * z);
+
+extern struct SN_env * english_create_env(void);
+extern void english_close_env(struct SN_env * z);
+
+static symbol s_0_0[5] = { 'g', 'e', 'n', 'e', 'r' };
+
+static struct among a_0[1] =
+{
+/*  0 */ { 5, s_0_0, -1, -1, 0}
+};
+
+static symbol s_1_0[3] = { 'i', 'e', 'd' };
+static symbol s_1_1[1] = { 's' };
+static symbol s_1_2[3] = { 'i', 'e', 's' };
+static symbol s_1_3[4] = { 's', 's', 'e', 's' };
+static symbol s_1_4[2] = { 's', 's' };
+static symbol s_1_5[2] = { 'u', 's' };
+
+static struct among a_1[6] =
+{
+/*  0 */ { 3, s_1_0, -1, 2, 0},
+/*  1 */ { 1, s_1_1, -1, 3, 0},
+/*  2 */ { 3, s_1_2, 1, 2, 0},
+/*  3 */ { 4, s_1_3, 1, 1, 0},
+/*  4 */ { 2, s_1_4, 1, -1, 0},
+/*  5 */ { 2, s_1_5, 1, -1, 0}
+};
+
+static symbol s_2_1[2] = { 'b', 'b' };
+static symbol s_2_2[2] = { 'd', 'd' };
+static symbol s_2_3[2] = { 'f', 'f' };
+static symbol s_2_4[2] = { 'g', 'g' };
+static symbol s_2_5[2] = { 'b', 'l' };
+static symbol s_2_6[2] = { 'm', 'm' };
+static symbol s_2_7[2] = { 'n', 'n' };
+static symbol s_2_8[2] = { 'p', 'p' };
+static symbol s_2_9[2] = { 'r', 'r' };
+static symbol s_2_10[2] = { 'a', 't' };
+static symbol s_2_11[2] = { 't', 't' };
+static symbol s_2_12[2] = { 'i', 'z' };
+
+static struct among a_2[13] =
+{
+/*  0 */ { 0, 0, -1, 3, 0},
+/*  1 */ { 2, s_2_1, 0, 2, 0},
+/*  2 */ { 2, s_2_2, 0, 2, 0},
+/*  3 */ { 2, s_2_3, 0, 2, 0},
+/*  4 */ { 2, s_2_4, 0, 2, 0},
+/*  5 */ { 2, s_2_5, 0, 1, 0},
+/*  6 */ { 2, s_2_6, 0, 2, 0},
+/*  7 */ { 2, s_2_7, 0, 2, 0},
+/*  8 */ { 2, s_2_8, 0, 2, 0},
+/*  9 */ { 2, s_2_9, 0, 2, 0},
+/* 10 */ { 2, s_2_10, 0, 1, 0},
+/* 11 */ { 2, s_2_11, 0, 2, 0},
+/* 12 */ { 2, s_2_12, 0, 1, 0}
+};
+
+static symbol s_3_0[2] = { 'e', 'd' };
+static symbol s_3_1[3] = { 'e', 'e', 'd' };
+static symbol s_3_2[3] = { 'i', 'n', 'g' };
+static symbol s_3_3[4] = { 'e', 'd', 'l', 'y' };
+static symbol s_3_4[5] = { 'e', 'e', 'd', 'l', 'y' };
+static symbol s_3_5[5] = { 'i', 'n', 'g', 'l', 'y' };
+
+static struct among a_3[6] =
+{
+/*  0 */ { 2, s_3_0, -1, 2, 0},
+/*  1 */ { 3, s_3_1, 0, 1, 0},
+/*  2 */ { 3, s_3_2, -1, 2, 0},
+/*  3 */ { 4, s_3_3, -1, 2, 0},
+/*  4 */ { 5, s_3_4, 3, 1, 0},
+/*  5 */ { 5, s_3_5, -1, 2, 0}
+};
+
+static symbol s_4_0[4] = { 'a', 'n', 'c', 'i' };
+static symbol s_4_1[4] = { 'e', 'n', 'c', 'i' };
+static symbol s_4_2[3] = { 'o', 'g', 'i' };
+static symbol s_4_3[2] = { 'l', 'i' };
+static symbol s_4_4[3] = { 'b', 'l', 'i' };
+static symbol s_4_5[4] = { 'a', 'b', 'l', 'i' };
+static symbol s_4_6[4] = { 'a', 'l', 'l', 'i' };
+static symbol s_4_7[5] = { 'f', 'u', 'l', 'l', 'i' };
+static symbol s_4_8[6] = { 'l', 'e', 's', 's', 'l', 'i' };
+static symbol s_4_9[5] = { 'o', 'u', 's', 'l', 'i' };
+static symbol s_4_10[5] = { 'e', 'n', 't', 'l', 'i' };
+static symbol s_4_11[5] = { 'a', 'l', 'i', 't', 'i' };
+static symbol s_4_12[6] = { 'b', 'i', 'l', 'i', 't', 'i' };
+static symbol s_4_13[5] = { 'i', 'v', 'i', 't', 'i' };
+static symbol s_4_14[6] = { 't', 'i', 'o', 'n', 'a', 'l' };
+static symbol s_4_15[7] = { 'a', 't', 'i', 'o', 'n', 'a', 'l' };
+static symbol s_4_16[5] = { 'a', 'l', 'i', 's', 'm' };
+static symbol s_4_17[5] = { 'a', 't', 'i', 'o', 'n' };
+static symbol s_4_18[7] = { 'i', 'z', 'a', 't', 'i', 'o', 'n' };
+static symbol s_4_19[4] = { 'i', 'z', 'e', 'r' };
+static symbol s_4_20[4] = { 'a', 't', 'o', 'r' };
+static symbol s_4_21[7] = { 'i', 'v', 'e', 'n', 'e', 's', 's' };
+static symbol s_4_22[7] = { 'f', 'u', 'l', 'n', 'e', 's', 's' };
+static symbol s_4_23[7] = { 'o', 'u', 's', 'n', 'e', 's', 's' };
+
+static struct among a_4[24] =
+{
+/*  0 */ { 4, s_4_0, -1, 3, 0},
+/*  1 */ { 4, s_4_1, -1, 2, 0},
+/*  2 */ { 3, s_4_2, -1, 13, 0},
+/*  3 */ { 2, s_4_3, -1, 16, 0},
+/*  4 */ { 3, s_4_4, 3, 12, 0},
+/*  5 */ { 4, s_4_5, 4, 4, 0},
+/*  6 */ { 4, s_4_6, 3, 8, 0},
+/*  7 */ { 5, s_4_7, 3, 14, 0},
+/*  8 */ { 6, s_4_8, 3, 15, 0},
+/*  9 */ { 5, s_4_9, 3, 10, 0},
+/* 10 */ { 5, s_4_10, 3, 5, 0},
+/* 11 */ { 5, s_4_11, -1, 8, 0},
+/* 12 */ { 6, s_4_12, -1, 12, 0},
+/* 13 */ { 5, s_4_13, -1, 11, 0},
+/* 14 */ { 6, s_4_14, -1, 1, 0},
+/* 15 */ { 7, s_4_15, 14, 7, 0},
+/* 16 */ { 5, s_4_16, -1, 8, 0},
+/* 17 */ { 5, s_4_17, -1, 7, 0},
+/* 18 */ { 7, s_4_18, 17, 6, 0},
+/* 19 */ { 4, s_4_19, -1, 6, 0},
+/* 20 */ { 4, s_4_20, -1, 7, 0},
+/* 21 */ { 7, s_4_21, -1, 11, 0},
+/* 22 */ { 7, s_4_22, -1, 9, 0},
+/* 23 */ { 7, s_4_23, -1, 10, 0}
+};
+
+static symbol s_5_0[5] = { 'i', 'c', 'a', 't', 'e' };
+static symbol s_5_1[5] = { 'a', 't', 'i', 'v', 'e' };
+static symbol s_5_2[5] = { 'a', 'l', 'i', 'z', 'e' };
+static symbol s_5_3[5] = { 'i', 'c', 'i', 't', 'i' };
+static symbol s_5_4[4] = { 'i', 'c', 'a', 'l' };
+static symbol s_5_5[6] = { 't', 'i', 'o', 'n', 'a', 'l' };
+static symbol s_5_6[7] = { 'a', 't', 'i', 'o', 'n', 'a', 'l' };
+static symbol s_5_7[3] = { 'f', 'u', 'l' };
+static symbol s_5_8[4] = { 'n', 'e', 's', 's' };
+
+static struct among a_5[9] =
+{
+/*  0 */ { 5, s_5_0, -1, 4, 0},
+/*  1 */ { 5, s_5_1, -1, 6, 0},
+/*  2 */ { 5, s_5_2, -1, 3, 0},
+/*  3 */ { 5, s_5_3, -1, 4, 0},
+/*  4 */ { 4, s_5_4, -1, 4, 0},
+/*  5 */ { 6, s_5_5, -1, 1, 0},
+/*  6 */ { 7, s_5_6, 5, 2, 0},
+/*  7 */ { 3, s_5_7, -1, 5, 0},
+/*  8 */ { 4, s_5_8, -1, 5, 0}
+};
+
+static symbol s_6_0[2] = { 'i', 'c' };
+static symbol s_6_1[4] = { 'a', 'n', 'c', 'e' };
+static symbol s_6_2[4] = { 'e', 'n', 'c', 'e' };
+static symbol s_6_3[4] = { 'a', 'b', 'l', 'e' };
+static symbol s_6_4[4] = { 'i', 'b', 'l', 'e' };
+static symbol s_6_5[3] = { 'a', 't', 'e' };
+static symbol s_6_6[3] = { 'i', 'v', 'e' };
+static symbol s_6_7[3] = { 'i', 'z', 'e' };
+static symbol s_6_8[3] = { 'i', 't', 'i' };
+static symbol s_6_9[2] = { 'a', 'l' };
+static symbol s_6_10[3] = { 'i', 's', 'm' };
+static symbol s_6_11[3] = { 'i', 'o', 'n' };
+static symbol s_6_12[2] = { 'e', 'r' };
+static symbol s_6_13[3] = { 'o', 'u', 's' };
+static symbol s_6_14[3] = { 'a', 'n', 't' };
+static symbol s_6_15[3] = { 'e', 'n', 't' };
+static symbol s_6_16[4] = { 'm', 'e', 'n', 't' };
+static symbol s_6_17[5] = { 'e', 'm', 'e', 'n', 't' };
+
+static struct among a_6[18] =
+{
+/*  0 */ { 2, s_6_0, -1, 1, 0},
+/*  1 */ { 4, s_6_1, -1, 1, 0},
+/*  2 */ { 4, s_6_2, -1, 1, 0},
+/*  3 */ { 4, s_6_3, -1, 1, 0},
+/*  4 */ { 4, s_6_4, -1, 1, 0},
+/*  5 */ { 3, s_6_5, -1, 1, 0},
+/*  6 */ { 3, s_6_6, -1, 1, 0},
+/*  7 */ { 3, s_6_7, -1, 1, 0},
+/*  8 */ { 3, s_6_8, -1, 1, 0},
+/*  9 */ { 2, s_6_9, -1, 1, 0},
+/* 10 */ { 3, s_6_10, -1, 1, 0},
+/* 11 */ { 3, s_6_11, -1, 2, 0},
+/* 12 */ { 2, s_6_12, -1, 1, 0},
+/* 13 */ { 3, s_6_13, -1, 1, 0},
+/* 14 */ { 3, s_6_14, -1, 1, 0},
+/* 15 */ { 3, s_6_15, -1, 1, 0},
+/* 16 */ { 4, s_6_16, 15, 1, 0},
+/* 17 */ { 5, s_6_17, 16, 1, 0}
+};
+
+static symbol s_7_0[1] = { 'e' };
+static symbol s_7_1[1] = { 'l' };
+
+static struct among a_7[2] =
+{
+/*  0 */ { 1, s_7_0, -1, 1, 0},
+/*  1 */ { 1, s_7_1, -1, 2, 0}
+};
+
+static symbol s_8_0[7] = { 's', 'u', 'c', 'c', 'e', 'e', 'd' };
+static symbol s_8_1[7] = { 'p', 'r', 'o', 'c', 'e', 'e', 'd' };
+static symbol s_8_2[6] = { 'e', 'x', 'c', 'e', 'e', 'd' };
+static symbol s_8_3[7] = { 'c', 'a', 'n', 'n', 'i', 'n', 'g' };
+static symbol s_8_4[6] = { 'i', 'n', 'n', 'i', 'n', 'g' };
+static symbol s_8_5[7] = { 'e', 'a', 'r', 'r', 'i', 'n', 'g' };
+static symbol s_8_6[7] = { 'h', 'e', 'r', 'r', 'i', 'n', 'g' };
+static symbol s_8_7[6] = { 'o', 'u', 't', 'i', 'n', 'g' };
+
+static struct among a_8[8] =
+{
+/*  0 */ { 7, s_8_0, -1, -1, 0},
+/*  1 */ { 7, s_8_1, -1, -1, 0},
+/*  2 */ { 6, s_8_2, -1, -1, 0},
+/*  3 */ { 7, s_8_3, -1, -1, 0},
+/*  4 */ { 6, s_8_4, -1, -1, 0},
+/*  5 */ { 7, s_8_5, -1, -1, 0},
+/*  6 */ { 7, s_8_6, -1, -1, 0},
+/*  7 */ { 6, s_8_7, -1, -1, 0}
+};
+
+static symbol s_9_0[5] = { 'a', 'n', 'd', 'e', 's' };
+static symbol s_9_1[5] = { 'a', 't', 'l', 'a', 's' };
+static symbol s_9_2[4] = { 'b', 'i', 'a', 's' };
+static symbol s_9_3[6] = { 'c', 'o', 's', 'm', 'o', 's' };
+static symbol s_9_4[5] = { 'd', 'y', 'i', 'n', 'g' };
+static symbol s_9_5[5] = { 'e', 'a', 'r', 'l', 'y' };
+static symbol s_9_6[6] = { 'g', 'e', 'n', 't', 'l', 'y' };
+static symbol s_9_7[4] = { 'h', 'o', 'w', 'e' };
+static symbol s_9_8[4] = { 'i', 'd', 'l', 'y' };
+static symbol s_9_9[5] = { 'l', 'y', 'i', 'n', 'g' };
+static symbol s_9_10[4] = { 'n', 'e', 'w', 's' };
+static symbol s_9_11[4] = { 'o', 'n', 'l', 'y' };
+static symbol s_9_12[6] = { 's', 'i', 'n', 'g', 'l', 'y' };
+static symbol s_9_13[5] = { 's', 'k', 'i', 'e', 's' };
+static symbol s_9_14[4] = { 's', 'k', 'i', 's' };
+static symbol s_9_15[3] = { 's', 'k', 'y' };
+static symbol s_9_16[5] = { 't', 'y', 'i', 'n', 'g' };
+static symbol s_9_17[4] = { 'u', 'g', 'l', 'y' };
+
+static struct among a_9[18] =
+{
+/*  0 */ { 5, s_9_0, -1, -1, 0},
+/*  1 */ { 5, s_9_1, -1, -1, 0},
+/*  2 */ { 4, s_9_2, -1, -1, 0},
+/*  3 */ { 6, s_9_3, -1, -1, 0},
+/*  4 */ { 5, s_9_4, -1, 3, 0},
+/*  5 */ { 5, s_9_5, -1, 9, 0},
+/*  6 */ { 6, s_9_6, -1, 7, 0},
+/*  7 */ { 4, s_9_7, -1, -1, 0},
+/*  8 */ { 4, s_9_8, -1, 6, 0},
+/*  9 */ { 5, s_9_9, -1, 4, 0},
+/* 10 */ { 4, s_9_10, -1, -1, 0},
+/* 11 */ { 4, s_9_11, -1, 10, 0},
+/* 12 */ { 6, s_9_12, -1, 11, 0},
+/* 13 */ { 5, s_9_13, -1, 2, 0},
+/* 14 */ { 4, s_9_14, -1, 1, 0},
+/* 15 */ { 3, s_9_15, -1, -1, 0},
+/* 16 */ { 5, s_9_16, -1, 5, 0},
+/* 17 */ { 4, s_9_17, -1, 8, 0}
+};
+
+static unsigned char g_v[] = { 17, 65, 16, 1 };
+
+static unsigned char g_v_WXY[] = { 1, 17, 65, 208, 1 };
+
+static unsigned char g_valid_LI[] = { 55, 141, 2 };
+
+static symbol s_0[] = { 'y' };
+static symbol s_1[] = { 'Y' };
+static symbol s_2[] = { 'y' };
+static symbol s_3[] = { 'Y' };
+static symbol s_4[] = { 's', 's' };
+static symbol s_5[] = { 'i', 'e' };
+static symbol s_6[] = { 'i' };
+static symbol s_7[] = { 'e', 'e' };
+static symbol s_8[] = { 'e' };
+static symbol s_9[] = { 'e' };
+static symbol s_10[] = { 'y' };
+static symbol s_11[] = { 'Y' };
+static symbol s_12[] = { 'i' };
+static symbol s_13[] = { 't', 'i', 'o', 'n' };
+static symbol s_14[] = { 'e', 'n', 'c', 'e' };
+static symbol s_15[] = { 'a', 'n', 'c', 'e' };
+static symbol s_16[] = { 'a', 'b', 'l', 'e' };
+static symbol s_17[] = { 'e', 'n', 't' };
+static symbol s_18[] = { 'i', 'z', 'e' };
+static symbol s_19[] = { 'a', 't', 'e' };
+static symbol s_20[] = { 'a', 'l' };
+static symbol s_21[] = { 'f', 'u', 'l' };
+static symbol s_22[] = { 'o', 'u', 's' };
+static symbol s_23[] = { 'i', 'v', 'e' };
+static symbol s_24[] = { 'b', 'l', 'e' };
+static symbol s_25[] = { 'l' };
+static symbol s_26[] = { 'o', 'g' };
+static symbol s_27[] = { 'f', 'u', 'l' };
+static symbol s_28[] = { 'l', 'e', 's', 's' };
+static symbol s_29[] = { 't', 'i', 'o', 'n' };
+static symbol s_30[] = { 'a', 't', 'e' };
+static symbol s_31[] = { 'a', 'l' };
+static symbol s_32[] = { 'i', 'c' };
+static symbol s_33[] = { 's' };
+static symbol s_34[] = { 't' };
+static symbol s_35[] = { 'l' };
+static symbol s_36[] = { 's', 'k', 'i' };
+static symbol s_37[] = { 's', 'k', 'y' };
+static symbol s_38[] = { 'd', 'i', 'e' };
+static symbol s_39[] = { 'l', 'i', 'e' };
+static symbol s_40[] = { 't', 'i', 'e' };
+static symbol s_41[] = { 'i', 'd', 'l' };
+static symbol s_42[] = { 'g', 'e', 'n', 't', 'l' };
+static symbol s_43[] = { 'u', 'g', 'l', 'i' };
+static symbol s_44[] = { 'e', 'a', 'r', 'l', 'i' };
+static symbol s_45[] = { 'o', 'n', 'l', 'i' };
+static symbol s_46[] = { 's', 'i', 'n', 'g', 'l' };
+static symbol s_47[] = { 'Y' };
+static symbol s_48[] = { 'y' };
+
+static int r_prelude(struct SN_env * z) {
+    z->B[0] = 0; /* unset Y_found, line 24 */
+    {   int c = z->c; /* do, line 25 */
+        z->bra = z->c; /* [, line 25 */
+        if (!(eq_s(z, 1, s_0))) goto lab0;
+        z->ket = z->c; /* ], line 25 */
+        if (!(in_grouping(z, g_v, 97, 121))) goto lab0;
+        slice_from_s(z, 1, s_1); /* <-, line 25 */
+        z->B[0] = 1; /* set Y_found, line 25 */
+    lab0:
+        z->c = c;
+    }
+    {   int c = z->c; /* do, line 26 */
+        while(1) { /* repeat, line 26 */
+            int c = z->c;
+            while(1) { /* goto, line 26 */
+                int c = z->c;
+                if (!(in_grouping(z, g_v, 97, 121))) goto lab3;
+                z->bra = z->c; /* [, line 26 */
+                if (!(eq_s(z, 1, s_2))) goto lab3;
+                z->ket = z->c; /* ], line 26 */
+                z->c = c;
+                break;
+            lab3:
+                z->c = c;
+                if (z->c >= z->l) goto lab2;
+                z->c++;
+            }
+            slice_from_s(z, 1, s_3); /* <-, line 26 */
+            z->B[0] = 1; /* set Y_found, line 26 */
+            continue;
+        lab2:
+            z->c = c;
+            break;
+        }
+    lab1:
+        z->c = c;
+    }
+    return 1;
+}
+
+static int r_mark_regions(struct SN_env * z) {
+    z->I[0] = z->l;
+    z->I[1] = z->l;
+    {   int c = z->c; /* do, line 32 */
+        {   int c = z->c; /* or, line 36 */
+            if (!(find_among(z, a_0, 1))) goto lab2; /* among, line 33 */
+            goto lab1;
+        lab2:
+            z->c = c;
+            while(1) { /* gopast, line 36 */
+                if (!(in_grouping(z, g_v, 97, 121))) goto lab3;
+                break;
+            lab3:
+                if (z->c >= z->l) goto lab0;
+                z->c++;
+            }
+            while(1) { /* gopast, line 36 */
+                if (!(out_grouping(z, g_v, 97, 121))) goto lab4;
+                break;
+            lab4:
+                if (z->c >= z->l) goto lab0;
+                z->c++;
+            }
+        }
+    lab1:
+        z->I[0] = z->c; /* setmark p1, line 37 */
+        while(1) { /* gopast, line 38 */
+            if (!(in_grouping(z, g_v, 97, 121))) goto lab5;
+            break;
+        lab5:
+            if (z->c >= z->l) goto lab0;
+            z->c++;
+        }
+        while(1) { /* gopast, line 38 */
+            if (!(out_grouping(z, g_v, 97, 121))) goto lab6;
+            break;
+        lab6:
+            if (z->c >= z->l) goto lab0;
+            z->c++;
+        }
+        z->I[1] = z->c; /* setmark p2, line 38 */
+    lab0:
+        z->c = c;
+    }
+    return 1;
+}
+
+static int r_shortv(struct SN_env * z) {
+    {   int m = z->l - z->c; /* or, line 46 */
+        if (!(out_grouping_b(z, g_v_WXY, 89, 121))) goto lab1;
+        if (!(in_grouping_b(z, g_v, 97, 121))) goto lab1;
+        if (!(out_grouping_b(z, g_v, 97, 121))) goto lab1;
+        goto lab0;
+    lab1:
+        z->c = z->l - m;
+        if (!(out_grouping_b(z, g_v, 97, 121))) return 0;
+        if (!(in_grouping_b(z, g_v, 97, 121))) return 0;
+        if (z->c > z->lb) return 0; /* atlimit, line 47 */
+    }
+lab0:
+    return 1;
+}
+
+static int r_R1(struct SN_env * z) {
+    if (!(z->I[0] <= z->c)) return 0;
+    return 1;
+}
+
+static int r_R2(struct SN_env * z) {
+    if (!(z->I[1] <= z->c)) return 0;
+    return 1;
+}
+
+static int r_Step_1a(struct SN_env * z) {
+    int among_var;
+    z->ket = z->c; /* [, line 54 */
+    among_var = find_among_b(z, a_1, 6); /* substring, line 54 */
+    if (!(among_var)) return 0;
+    z->bra = z->c; /* ], line 54 */
+    switch(among_var) {
+        case 0: return 0;
+        case 1:
+            slice_from_s(z, 2, s_4); /* <-, line 55 */
+            break;
+        case 2:
+            {   int m = z->l - z->c; /* or, line 57 */
+                if (z->c <= z->lb) goto lab1;
+                z->c--; /* next, line 57 */
+                if (z->c > z->lb) goto lab1; /* atlimit, line 57 */
+                slice_from_s(z, 2, s_5); /* <-, line 57 */
+                goto lab0;
+            lab1:
+                z->c = z->l - m;
+                slice_from_s(z, 1, s_6); /* <-, line 57 */
+            }
+        lab0:
+            break;
+        case 3:
+            if (z->c <= z->lb) return 0;
+            z->c--; /* next, line 58 */
+            while(1) { /* gopast, line 58 */
+                if (!(in_grouping_b(z, g_v, 97, 121))) goto lab2;
+                break;
+            lab2:
+                if (z->c <= z->lb) return 0;
+                z->c--;
+            }
+            slice_del(z); /* delete, line 58 */
+            break;
+    }
+    return 1;
+}
+
+static int r_Step_1b(struct SN_env * z) {
+    int among_var;
+    z->ket = z->c; /* [, line 64 */
+    among_var = find_among_b(z, a_3, 6); /* substring, line 64 */
+    if (!(among_var)) return 0;
+    z->bra = z->c; /* ], line 64 */
+    switch(among_var) {
+        case 0: return 0;
+        case 1:
+            if (!r_R1(z)) return 0; /* call R1, line 66 */
+            slice_from_s(z, 2, s_7); /* <-, line 66 */
+            break;
+        case 2:
+            {   int m_test = z->l - z->c; /* test, line 69 */
+                while(1) { /* gopast, line 69 */
+                    if (!(in_grouping_b(z, g_v, 97, 121))) goto lab0;
+                    break;
+                lab0:
+                    if (z->c <= z->lb) return 0;
+                    z->c--;
+                }
+                z->c = z->l - m_test;
+            }
+            slice_del(z); /* delete, line 69 */
+            {   int m_test = z->l - z->c; /* test, line 70 */
+                among_var = find_among_b(z, a_2, 13); /* substring, line 70 */
+                if (!(among_var)) return 0;
+                z->c = z->l - m_test;
+            }
+            switch(among_var) {
+                case 0: return 0;
+                case 1:
+                    {   int c = z->c;
+                        insert_s(z, z->c, z->c, 1, s_8); /* <+, line 72 */
+                        z->c = c;
+                    }
+                    break;
+                case 2:
+                    z->ket = z->c; /* [, line 75 */
+                    if (z->c <= z->lb) return 0;
+                    z->c--; /* next, line 75 */
+                    z->bra = z->c; /* ], line 75 */
+                    slice_del(z); /* delete, line 75 */
+                    break;
+                case 3:
+                    if (z->c != z->I[0]) return 0; /* atmark, line 76 */
+                    {   int m_test = z->l - z->c; /* test, line 76 */
+                        if (!r_shortv(z)) return 0; /* call shortv, line 76 */
+                        z->c = z->l - m_test;
+                    }
+                    {   int c = z->c;
+                        insert_s(z, z->c, z->c, 1, s_9); /* <+, line 76 */
+                        z->c = c;
+                    }
+                    break;
+            }
+            break;
+    }
+    return 1;
+}
+
+static int r_Step_1c(struct SN_env * z) {
+    z->ket = z->c; /* [, line 83 */
+    {   int m = z->l - z->c; /* or, line 83 */
+        if (!(eq_s_b(z, 1, s_10))) goto lab1;
+        goto lab0;
+    lab1:
+        z->c = z->l - m;
+        if (!(eq_s_b(z, 1, s_11))) return 0;
+    }
+lab0:
+    z->bra = z->c; /* ], line 83 */
+    if (!(out_grouping_b(z, g_v, 97, 121))) return 0;
+    {   int m = z->l - z->c; /* not, line 84 */
+        if (z->c > z->lb) goto lab2; /* atlimit, line 84 */
+        return 0;
+    lab2:
+        z->c = z->l - m;
+    }
+    slice_from_s(z, 1, s_12); /* <-, line 85 */
+    return 1;
+}
+
+static int r_Step_2(struct SN_env * z) {
+    int among_var;
+    z->ket = z->c; /* [, line 89 */
+    among_var = find_among_b(z, a_4, 24); /* substring, line 89 */
+    if (!(among_var)) return 0;
+    z->bra = z->c; /* ], line 89 */
+    if (!r_R1(z)) return 0; /* call R1, line 89 */
+    switch(among_var) {
+        case 0: return 0;
+        case 1:
+            slice_from_s(z, 4, s_13); /* <-, line 90 */
+            break;
+        case 2:
+            slice_from_s(z, 4, s_14); /* <-, line 91 */
+            break;
+        case 3:
+            slice_from_s(z, 4, s_15); /* <-, line 92 */
+            break;
+        case 4:
+            slice_from_s(z, 4, s_16); /* <-, line 93 */
+            break;
+        case 5:
+            slice_from_s(z, 3, s_17); /* <-, line 94 */
+            break;
+        case 6:
+            slice_from_s(z, 3, s_18); /* <-, line 96 */
+            break;
+        case 7:
+            slice_from_s(z, 3, s_19); /* <-, line 98 */
+            break;
+        case 8:
+            slice_from_s(z, 2, s_20); /* <-, line 100 */
+            break;
+        case 9:
+            slice_from_s(z, 3, s_21); /* <-, line 101 */
+            break;
+        case 10:
+            slice_from_s(z, 3, s_22); /* <-, line 103 */
+            break;
+        case 11:
+            slice_from_s(z, 3, s_23); /* <-, line 105 */
+            break;
+        case 12:
+            slice_from_s(z, 3, s_24); /* <-, line 107 */
+            break;
+        case 13:
+            if (!(eq_s_b(z, 1, s_25))) return 0;
+            slice_from_s(z, 2, s_26); /* <-, line 108 */
+            break;
+        case 14:
+            slice_from_s(z, 3, s_27); /* <-, line 109 */
+            break;
+        case 15:
+            slice_from_s(z, 4, s_28); /* <-, line 110 */
+            break;
+        case 16:
+            if (!(in_grouping_b(z, g_valid_LI, 99, 116))) return 0;
+            slice_del(z); /* delete, line 111 */
+            break;
+    }
+    return 1;
+}
+
+static int r_Step_3(struct SN_env * z) {
+    int among_var;
+    z->ket = z->c; /* [, line 116 */
+    among_var = find_among_b(z, a_5, 9); /* substring, line 116 */
+    if (!(among_var)) return 0;
+    z->bra = z->c; /* ], line 116 */
+    if (!r_R1(z)) return 0; /* call R1, line 116 */
+    switch(among_var) {
+        case 0: return 0;
+        case 1:
+            slice_from_s(z, 4, s_29); /* <-, line 117 */
+            break;
+        case 2:
+            slice_from_s(z, 3, s_30); /* <-, line 118 */
+            break;
+        case 3:
+            slice_from_s(z, 2, s_31); /* <-, line 119 */
+            break;
+        case 4:
+            slice_from_s(z, 2, s_32); /* <-, line 121 */
+            break;
+        case 5:
+            slice_del(z); /* delete, line 123 */
+            break;
+        case 6:
+            if (!r_R2(z)) return 0; /* call R2, line 125 */
+            slice_del(z); /* delete, line 125 */
+            break;
+    }
+    return 1;
+}
+
+static int r_Step_4(struct SN_env * z) {
+    int among_var;
+    z->ket = z->c; /* [, line 130 */
+    among_var = find_among_b(z, a_6, 18); /* substring, line 130 */
+    if (!(among_var)) return 0;
+    z->bra = z->c; /* ], line 130 */
+    if (!r_R2(z)) return 0; /* call R2, line 130 */
+    switch(among_var) {
+        case 0: return 0;
+        case 1:
+            slice_del(z); /* delete, line 133 */
+            break;
+        case 2:
+            {   int m = z->l - z->c; /* or, line 134 */
+                if (!(eq_s_b(z, 1, s_33))) goto lab1;
+                goto lab0;
+            lab1:
+                z->c = z->l - m;
+                if (!(eq_s_b(z, 1, s_34))) return 0;
+            }
+        lab0:
+            slice_del(z); /* delete, line 134 */
+            break;
+    }
+    return 1;
+}
+
+static int r_Step_5(struct SN_env * z) {
+    int among_var;
+    z->ket = z->c; /* [, line 139 */
+    among_var = find_among_b(z, a_7, 2); /* substring, line 139 */
+    if (!(among_var)) return 0;
+    z->bra = z->c; /* ], line 139 */
+    switch(among_var) {
+        case 0: return 0;
+        case 1:
+            {   int m = z->l - z->c; /* or, line 140 */
+                if (!r_R2(z)) goto lab1; /* call R2, line 140 */
+                goto lab0;
+            lab1:
+                z->c = z->l - m;
+                if (!r_R1(z)) return 0; /* call R1, line 140 */
+                {   int m = z->l - z->c; /* not, line 140 */
+                    if (!r_shortv(z)) goto lab2; /* call shortv, line 140 */
+                    return 0;
+                lab2:
+                    z->c = z->l - m;
+                }
+            }
+        lab0:
+            slice_del(z); /* delete, line 140 */
+            break;
+        case 2:
+            if (!r_R2(z)) return 0; /* call R2, line 141 */
+            if (!(eq_s_b(z, 1, s_35))) return 0;
+            slice_del(z); /* delete, line 141 */
+            break;
+    }
+    return 1;
+}
+
+static int r_exception2(struct SN_env * z) {
+    z->ket = z->c; /* [, line 147 */
+    if (!(find_among_b(z, a_8, 8))) return 0; /* substring, line 147 */
+    z->bra = z->c; /* ], line 147 */
+    if (z->c > z->lb) return 0; /* atlimit, line 147 */
+    return 1;
+}
+
+static int r_exception1(struct SN_env * z) {
+    int among_var;
+    z->bra = z->c; /* [, line 159 */
+    among_var = find_among(z, a_9, 18); /* substring, line 159 */
+    if (!(among_var)) return 0;
+    z->ket = z->c; /* ], line 159 */
+    if (z->c < z->l) return 0; /* atlimit, line 159 */
+    switch(among_var) {
+        case 0: return 0;
+        case 1:
+            slice_from_s(z, 3, s_36); /* <-, line 163 */
+            break;
+        case 2:
+            slice_from_s(z, 3, s_37); /* <-, line 164 */
+            break;
+        case 3:
+            slice_from_s(z, 3, s_38); /* <-, line 165 */
+            break;
+        case 4:
+            slice_from_s(z, 3, s_39); /* <-, line 166 */
+            break;
+        case 5:
+            slice_from_s(z, 3, s_40); /* <-, line 167 */
+            break;
+        case 6:
+            slice_from_s(z, 3, s_41); /* <-, line 171 */
+            break;
+        case 7:
+            slice_from_s(z, 5, s_42); /* <-, line 172 */
+            break;
+        case 8:
+            slice_from_s(z, 4, s_43); /* <-, line 173 */
+            break;
+        case 9:
+            slice_from_s(z, 5, s_44); /* <-, line 174 */
+            break;
+        case 10:
+            slice_from_s(z, 4, s_45); /* <-, line 175 */
+            break;
+        case 11:
+            slice_from_s(z, 5, s_46); /* <-, line 176 */
+            break;
+    }
+    return 1;
+}
+
+static int r_postlude(struct SN_env * z) {
+    if (!(z->B[0])) return 0; /* Boolean test Y_found, line 192 */
+    while(1) { /* repeat, line 192 */
+        int c = z->c;
+        while(1) { /* goto, line 192 */
+            int c = z->c;
+            z->bra = z->c; /* [, line 192 */
+            if (!(eq_s(z, 1, s_47))) goto lab1;
+            z->ket = z->c; /* ], line 192 */
+            z->c = c;
+            break;
+        lab1:
+            z->c = c;
+            if (z->c >= z->l) goto lab0;
+            z->c++;
+        }
+        slice_from_s(z, 1, s_48); /* <-, line 192 */
+        continue;
+    lab0:
+        z->c = c;
+        break;
+    }
+    return 1;
+}
+
+extern int english_stem(struct SN_env * z) {
+    {   int c = z->c; /* or, line 196 */
+        if (!r_exception1(z)) goto lab1; /* call exception1, line 196 */
+        goto lab0;
+    lab1:
+        z->c = c;
+        {   int c_test = z->c; /* test, line 198 */
+            {   int c = z->c + 3;
+                if (0 > c || c > z->l) return 0;
+                z->c = c; /* hop, line 198 */
+            }
+            z->c = c_test;
+        }
+        {   int c = z->c; /* do, line 199 */
+            if (!r_prelude(z)) goto lab2; /* call prelude, line 199 */
+        lab2:
+            z->c = c;
+        }
+        {   int c = z->c; /* do, line 200 */
+            if (!r_mark_regions(z)) goto lab3; /* call mark_regions, line 200 */
+        lab3:
+            z->c = c;
+        }
+        z->lb = z->c; z->c = z->l; /* backwards, line 201 */
+
+        {   int m = z->l - z->c; /* do, line 203 */
+            if (!r_Step_1a(z)) goto lab4; /* call Step_1a, line 203 */
+        lab4:
+            z->c = z->l - m;
+        }
+        {   int m = z->l - z->c; /* or, line 205 */
+            if (!r_exception2(z)) goto lab6; /* call exception2, line 205 */
+            goto lab5;
+        lab6:
+            z->c = z->l - m;
+            {   int m = z->l - z->c; /* do, line 207 */
+                if (!r_Step_1b(z)) goto lab7; /* call Step_1b, line 207 */
+            lab7:
+                z->c = z->l - m;
+            }
+            {   int m = z->l - z->c; /* do, line 208 */
+                if (!r_Step_1c(z)) goto lab8; /* call Step_1c, line 208 */
+            lab8:
+                z->c = z->l - m;
+            }
+            {   int m = z->l - z->c; /* do, line 210 */
+                if (!r_Step_2(z)) goto lab9; /* call Step_2, line 210 */
+            lab9:
+                z->c = z->l - m;
+            }
+            {   int m = z->l - z->c; /* do, line 211 */
+                if (!r_Step_3(z)) goto lab10; /* call Step_3, line 211 */
+            lab10:
+                z->c = z->l - m;
+            }
+            {   int m = z->l - z->c; /* do, line 212 */
+                if (!r_Step_4(z)) goto lab11; /* call Step_4, line 212 */
+            lab11:
+                z->c = z->l - m;
+            }
+            {   int m = z->l - z->c; /* do, line 214 */
+                if (!r_Step_5(z)) goto lab12; /* call Step_5, line 214 */
+            lab12:
+                z->c = z->l - m;
+            }
+        }
+    lab5:
+        z->c = z->lb;
+        {   int c = z->c; /* do, line 217 */
+            if (!r_postlude(z)) goto lab13; /* call postlude, line 217 */
+        lab13:
+            z->c = c;
+        }
+    }
+lab0:
+    return 1;
+}
+
+extern struct SN_env * english_create_env(void) { return SN_create_env(0, 2, 1); }
+
+extern void english_close_env(struct SN_env * z) { SN_close_env(z); }
+


diff --git a/contrib/tsearch2/snowball/english_stem.h b/contrib/tsearch2/snowball/english_stem.h

new file mode 100644 (file)

index 0000000..bfefcd5


--- /dev/null
+++ b/contrib/tsearch2/snowball/english_stem.h
@@ -0,0 +1,8 @@
+
+/* This file was generated automatically by the Snowball to ANSI C compiler */
+
+extern struct SN_env * english_create_env(void);
+extern void english_close_env(struct SN_env * z);
+
+extern int english_stem(struct SN_env * z);
+


diff --git a/contrib/tsearch2/snowball/header.h b/contrib/tsearch2/snowball/header.h

new file mode 100644 (file)

index 0000000..aaec3ae


--- /dev/null
+++ b/contrib/tsearch2/snowball/header.h
@@ -0,0 +1,57 @@
+
+#include 
+
+#include "api.h"
+
+#define MAXINT INT_MAX
+#define MININT INT_MIN
+
+#define HEAD 2*sizeof(int)
+
+#define SIZE(p)        ((int *)(p))[-1]
+#define SET_SIZE(p, n) ((int *)(p))[-1] = n
+#define CAPACITY(p)    ((int *)(p))[-2]
+
+struct among
+{   int s_size;     /* number of chars in string */
+    symbol * s;       /* search string */
+    int substring_i;/* index to longest matching substring */
+    int result;     /* result of the lookup */
+    int (* function)(struct SN_env *);
+};
+
+extern symbol * create_s(void);
+extern void lose_s(symbol * p);
+
+extern int in_grouping(struct SN_env * z, unsigned char * s, int min, int max);
+extern int in_grouping_b(struct SN_env * z, unsigned char * s, int min, int max);
+extern int out_grouping(struct SN_env * z, unsigned char * s, int min, int max);
+extern int out_grouping_b(struct SN_env * z, unsigned char * s, int min, int max);
+
+extern int in_range(struct SN_env * z, int min, int max);
+extern int in_range_b(struct SN_env * z, int min, int max);
+extern int out_range(struct SN_env * z, int min, int max);
+extern int out_range_b(struct SN_env * z, int min, int max);
+
+extern int eq_s(struct SN_env * z, int s_size, symbol * s);
+extern int eq_s_b(struct SN_env * z, int s_size, symbol * s);
+extern int eq_v(struct SN_env * z, symbol * p);
+extern int eq_v_b(struct SN_env * z, symbol * p);
+
+extern int find_among(struct SN_env * z, struct among * v, int v_size);
+extern int find_among_b(struct SN_env * z, struct among * v, int v_size);
+
+extern symbol * increase_size(symbol * p, int n);
+extern int replace_s(struct SN_env * z, int c_bra, int c_ket, int s_size, const symbol * s);
+extern void slice_from_s(struct SN_env * z, int s_size, symbol * s);
+extern void slice_from_v(struct SN_env * z, symbol * p);
+extern void slice_del(struct SN_env * z);
+
+extern void insert_s(struct SN_env * z, int bra, int ket, int s_size, symbol * s);
+extern void insert_v(struct SN_env * z, int bra, int ket, symbol * p);
+
+extern symbol * slice_to(struct SN_env * z, symbol * p);
+extern symbol * assign_to(struct SN_env * z, symbol * p);
+
+extern void debug(struct SN_env * z, int number, int line_count);
+


diff --git a/contrib/tsearch2/snowball/russian_stem.c b/contrib/tsearch2/snowball/russian_stem.c

new file mode 100644 (file)

index 0000000..14fd491


--- /dev/null
+++ b/contrib/tsearch2/snowball/russian_stem.c
@@ -0,0 +1,626 @@
+
+/* This file was generated automatically by the Snowball to ANSI C compiler */
+
+#include "header.h"
+
+extern int russian_stem(struct SN_env * z);
+static int r_tidy_up(struct SN_env * z);
+static int r_derivational(struct SN_env * z);
+static int r_noun(struct SN_env * z);
+static int r_verb(struct SN_env * z);
+static int r_reflexive(struct SN_env * z);
+static int r_adjectival(struct SN_env * z);
+static int r_adjective(struct SN_env * z);
+static int r_perfective_gerund(struct SN_env * z);
+static int r_R2(struct SN_env * z);
+static int r_mark_regions(struct SN_env * z);
+
+extern struct SN_env * russian_create_env(void);
+extern void russian_close_env(struct SN_env * z);
+
+static symbol s_0_0[3] = { 215, 219, 201 };
+static symbol s_0_1[4] = { 201, 215, 219, 201 };
+static symbol s_0_2[4] = { 217, 215, 219, 201 };
+static symbol s_0_3[1] = { 215 };
+static symbol s_0_4[2] = { 201, 215 };
+static symbol s_0_5[2] = { 217, 215 };
+static symbol s_0_6[5] = { 215, 219, 201, 211, 216 };
+static symbol s_0_7[6] = { 201, 215, 219, 201, 211, 216 };
+static symbol s_0_8[6] = { 217, 215, 219, 201, 211, 216 };
+
+static struct among a_0[9] =
+{
+/*  0 */ { 3, s_0_0, -1, 1, 0},
+/*  1 */ { 4, s_0_1, 0, 2, 0},
+/*  2 */ { 4, s_0_2, 0, 2, 0},
+/*  3 */ { 1, s_0_3, -1, 1, 0},
+/*  4 */ { 2, s_0_4, 3, 2, 0},
+/*  5 */ { 2, s_0_5, 3, 2, 0},
+/*  6 */ { 5, s_0_6, -1, 1, 0},
+/*  7 */ { 6, s_0_7, 6, 2, 0},
+/*  8 */ { 6, s_0_8, 6, 2, 0}
+};
+
+static symbol s_1_0[2] = { 192, 192 };
+static symbol s_1_1[2] = { 197, 192 };
+static symbol s_1_2[2] = { 207, 192 };
+static symbol s_1_3[2] = { 213, 192 };
+static symbol s_1_4[2] = { 197, 197 };
+static symbol s_1_5[2] = { 201, 197 };
+static symbol s_1_6[2] = { 207, 197 };
+static symbol s_1_7[2] = { 217, 197 };
+static symbol s_1_8[2] = { 201, 200 };
+static symbol s_1_9[2] = { 217, 200 };
+static symbol s_1_10[3] = { 201, 205, 201 };
+static symbol s_1_11[3] = { 217, 205, 201 };
+static symbol s_1_12[2] = { 197, 202 };
+static symbol s_1_13[2] = { 201, 202 };
+static symbol s_1_14[2] = { 207, 202 };
+static symbol s_1_15[2] = { 217, 202 };
+static symbol s_1_16[2] = { 197, 205 };
+static symbol s_1_17[2] = { 201, 205 };
+static symbol s_1_18[2] = { 207, 205 };
+static symbol s_1_19[2] = { 217, 205 };
+static symbol s_1_20[3] = { 197, 199, 207 };
+static symbol s_1_21[3] = { 207, 199, 207 };
+static symbol s_1_22[2] = { 193, 209 };
+static symbol s_1_23[2] = { 209, 209 };
+static symbol s_1_24[3] = { 197, 205, 213 };
+static symbol s_1_25[3] = { 207, 205, 213 };
+
+static struct among a_1[26] =
+{
+/*  0 */ { 2, s_1_0, -1, 1, 0},
+/*  1 */ { 2, s_1_1, -1, 1, 0},
+/*  2 */ { 2, s_1_2, -1, 1, 0},
+/*  3 */ { 2, s_1_3, -1, 1, 0},
+/*  4 */ { 2, s_1_4, -1, 1, 0},
+/*  5 */ { 2, s_1_5, -1, 1, 0},
+/*  6 */ { 2, s_1_6, -1, 1, 0},
+/*  7 */ { 2, s_1_7, -1, 1, 0},
+/*  8 */ { 2, s_1_8, -1, 1, 0},
+/*  9 */ { 2, s_1_9, -1, 1, 0},
+/* 10 */ { 3, s_1_10, -1, 1, 0},
+/* 11 */ { 3, s_1_11, -1, 1, 0},
+/* 12 */ { 2, s_1_12, -1, 1, 0},
+/* 13 */ { 2, s_1_13, -1, 1, 0},
+/* 14 */ { 2, s_1_14, -1, 1, 0},
+/* 15 */ { 2, s_1_15, -1, 1, 0},
+/* 16 */ { 2, s_1_16, -1, 1, 0},
+/* 17 */ { 2, s_1_17, -1, 1, 0},
+/* 18 */ { 2, s_1_18, -1, 1, 0},
+/* 19 */ { 2, s_1_19, -1, 1, 0},
+/* 20 */ { 3, s_1_20, -1, 1, 0},
+/* 21 */ { 3, s_1_21, -1, 1, 0},
+/* 22 */ { 2, s_1_22, -1, 1, 0},
+/* 23 */ { 2, s_1_23, -1, 1, 0},
+/* 24 */ { 3, s_1_24, -1, 1, 0},
+/* 25 */ { 3, s_1_25, -1, 1, 0}
+};
+
+static symbol s_2_0[2] = { 197, 205 };
+static symbol s_2_1[2] = { 206, 206 };
+static symbol s_2_2[2] = { 215, 219 };
+static symbol s_2_3[3] = { 201, 215, 219 };
+static symbol s_2_4[3] = { 217, 215, 219 };
+static symbol s_2_5[1] = { 221 };
+static symbol s_2_6[2] = { 192, 221 };
+static symbol s_2_7[3] = { 213, 192, 221 };
+
+static struct among a_2[8] =
+{
+/*  0 */ { 2, s_2_0, -1, 1, 0},
+/*  1 */ { 2, s_2_1, -1, 1, 0},
+/*  2 */ { 2, s_2_2, -1, 1, 0},
+/*  3 */ { 3, s_2_3, 2, 2, 0},
+/*  4 */ { 3, s_2_4, 2, 2, 0},
+/*  5 */ { 1, s_2_5, -1, 1, 0},
+/*  6 */ { 2, s_2_6, 5, 1, 0},
+/*  7 */ { 3, s_2_7, 6, 2, 0}
+};
+
+static symbol s_3_0[2] = { 211, 209 };
+static symbol s_3_1[2] = { 211, 216 };
+
+static struct among a_3[2] =
+{
+/*  0 */ { 2, s_3_0, -1, 1, 0},
+/*  1 */ { 2, s_3_1, -1, 1, 0}
+};
+
+static symbol s_4_0[1] = { 192 };
+static symbol s_4_1[2] = { 213, 192 };
+static symbol s_4_2[2] = { 204, 193 };
+static symbol s_4_3[3] = { 201, 204, 193 };
+static symbol s_4_4[3] = { 217, 204, 193 };
+static symbol s_4_5[2] = { 206, 193 };
+static symbol s_4_6[3] = { 197, 206, 193 };
+static symbol s_4_7[3] = { 197, 212, 197 };
+static symbol s_4_8[3] = { 201, 212, 197 };
+static symbol s_4_9[3] = { 202, 212, 197 };
+static symbol s_4_10[4] = { 197, 202, 212, 197 };
+static symbol s_4_11[4] = { 213, 202, 212, 197 };
+static symbol s_4_12[2] = { 204, 201 };
+static symbol s_4_13[3] = { 201, 204, 201 };
+static symbol s_4_14[3] = { 217, 204, 201 };
+static symbol s_4_15[1] = { 202 };
+static symbol s_4_16[2] = { 197, 202 };
+static symbol s_4_17[2] = { 213, 202 };
+static symbol s_4_18[1] = { 204 };
+static symbol s_4_19[2] = { 201, 204 };
+static symbol s_4_20[2] = { 217, 204 };
+static symbol s_4_21[2] = { 197, 205 };
+static symbol s_4_22[2] = { 201, 205 };
+static symbol s_4_23[2] = { 217, 205 };
+static symbol s_4_24[1] = { 206 };
+static symbol s_4_25[2] = { 197, 206 };
+static symbol s_4_26[2] = { 204, 207 };
+static symbol s_4_27[3] = { 201, 204, 207 };
+static symbol s_4_28[3] = { 217, 204, 207 };
+static symbol s_4_29[2] = { 206, 207 };
+static symbol s_4_30[3] = { 197, 206, 207 };
+static symbol s_4_31[3] = { 206, 206, 207 };
+static symbol s_4_32[2] = { 192, 212 };
+static symbol s_4_33[3] = { 213, 192, 212 };
+static symbol s_4_34[2] = { 197, 212 };
+static symbol s_4_35[3] = { 213, 197, 212 };
+static symbol s_4_36[2] = { 201, 212 };
+static symbol s_4_37[2] = { 209, 212 };
+static symbol s_4_38[2] = { 217, 212 };
+static symbol s_4_39[2] = { 212, 216 };
+static symbol s_4_40[3] = { 201, 212, 216 };
+static symbol s_4_41[3] = { 217, 212, 216 };
+static symbol s_4_42[3] = { 197, 219, 216 };
+static symbol s_4_43[3] = { 201, 219, 216 };
+static symbol s_4_44[2] = { 206, 217 };
+static symbol s_4_45[3] = { 197, 206, 217 };
+
+static struct among a_4[46] =
+{
+/*  0 */ { 1, s_4_0, -1, 2, 0},
+/*  1 */ { 2, s_4_1, 0, 2, 0},
+/*  2 */ { 2, s_4_2, -1, 1, 0},
+/*  3 */ { 3, s_4_3, 2, 2, 0},
+/*  4 */ { 3, s_4_4, 2, 2, 0},
+/*  5 */ { 2, s_4_5, -1, 1, 0},
+/*  6 */ { 3, s_4_6, 5, 2, 0},
+/*  7 */ { 3, s_4_7, -1, 1, 0},
+/*  8 */ { 3, s_4_8, -1, 2, 0},
+/*  9 */ { 3, s_4_9, -1, 1, 0},
+/* 10 */ { 4, s_4_10, 9, 2, 0},
+/* 11 */ { 4, s_4_11, 9, 2, 0},
+/* 12 */ { 2, s_4_12, -1, 1, 0},
+/* 13 */ { 3, s_4_13, 12, 2, 0},
+/* 14 */ { 3, s_4_14, 12, 2, 0},
+/* 15 */ { 1, s_4_15, -1, 1, 0},
+/* 16 */ { 2, s_4_16, 15, 2, 0},
+/* 17 */ { 2, s_4_17, 15, 2, 0},
+/* 18 */ { 1, s_4_18, -1, 1, 0},
+/* 19 */ { 2, s_4_19, 18, 2, 0},
+/* 20 */ { 2, s_4_20, 18, 2, 0},
+/* 21 */ { 2, s_4_21, -1, 1, 0},
+/* 22 */ { 2, s_4_22, -1, 2, 0},
+/* 23 */ { 2, s_4_23, -1, 2, 0},
+/* 24 */ { 1, s_4_24, -1, 1, 0},
+/* 25 */ { 2, s_4_25, 24, 2, 0},
+/* 26 */ { 2, s_4_26, -1, 1, 0},
+/* 27 */ { 3, s_4_27, 26, 2, 0},
+/* 28 */ { 3, s_4_28, 26, 2, 0},
+/* 29 */ { 2, s_4_29, -1, 1, 0},
+/* 30 */ { 3, s_4_30, 29, 2, 0},
+/* 31 */ { 3, s_4_31, 29, 1, 0},
+/* 32 */ { 2, s_4_32, -1, 1, 0},
+/* 33 */ { 3, s_4_33, 32, 2, 0},
+/* 34 */ { 2, s_4_34, -1, 1, 0},
+/* 35 */ { 3, s_4_35, 34, 2, 0},
+/* 36 */ { 2, s_4_36, -1, 2, 0},
+/* 37 */ { 2, s_4_37, -1, 2, 0},
+/* 38 */ { 2, s_4_38, -1, 2, 0},
+/* 39 */ { 2, s_4_39, -1, 1, 0},
+/* 40 */ { 3, s_4_40, 39, 2, 0},
+/* 41 */ { 3, s_4_41, 39, 2, 0},
+/* 42 */ { 3, s_4_42, -1, 1, 0},
+/* 43 */ { 3, s_4_43, -1, 2, 0},
+/* 44 */ { 2, s_4_44, -1, 1, 0},
+/* 45 */ { 3, s_4_45, 44, 2, 0}
+};
+
+static symbol s_5_0[1] = { 192 };
+static symbol s_5_1[2] = { 201, 192 };
+static symbol s_5_2[2] = { 216, 192 };
+static symbol s_5_3[1] = { 193 };
+static symbol s_5_4[1] = { 197 };
+static symbol s_5_5[2] = { 201, 197 };
+static symbol s_5_6[2] = { 216, 197 };
+static symbol s_5_7[2] = { 193, 200 };
+static symbol s_5_8[2] = { 209, 200 };
+static symbol s_5_9[3] = { 201, 209, 200 };
+static symbol s_5_10[1] = { 201 };
+static symbol s_5_11[2] = { 197, 201 };
+static symbol s_5_12[2] = { 201, 201 };
+static symbol s_5_13[3] = { 193, 205, 201 };
+static symbol s_5_14[3] = { 209, 205, 201 };
+static symbol s_5_15[4] = { 201, 209, 205, 201 };
+static symbol s_5_16[1] = { 202 };
+static symbol s_5_17[2] = { 197, 202 };
+static symbol s_5_18[3] = { 201, 197, 202 };
+static symbol s_5_19[2] = { 201, 202 };
+static symbol s_5_20[2] = { 207, 202 };
+static symbol s_5_21[2] = { 193, 205 };
+static symbol s_5_22[2] = { 197, 205 };
+static symbol s_5_23[3] = { 201, 197, 205 };
+static symbol s_5_24[2] = { 207, 205 };
+static symbol s_5_25[2] = { 209, 205 };
+static symbol s_5_26[3] = { 201, 209, 205 };
+static symbol s_5_27[1] = { 207 };
+static symbol s_5_28[1] = { 209 };
+static symbol s_5_29[2] = { 201, 209 };
+static symbol s_5_30[2] = { 216, 209 };
+static symbol s_5_31[1] = { 213 };
+static symbol s_5_32[2] = { 197, 215 };
+static symbol s_5_33[2] = { 207, 215 };
+static symbol s_5_34[1] = { 216 };
+static symbol s_5_35[1] = { 217 };
+
+static struct among a_5[36] =
+{
+/*  0 */ { 1, s_5_0, -1, 1, 0},
+/*  1 */ { 2, s_5_1, 0, 1, 0},
+/*  2 */ { 2, s_5_2, 0, 1, 0},
+/*  3 */ { 1, s_5_3, -1, 1, 0},
+/*  4 */ { 1, s_5_4, -1, 1, 0},
+/*  5 */ { 2, s_5_5, 4, 1, 0},
+/*  6 */ { 2, s_5_6, 4, 1, 0},
+/*  7 */ { 2, s_5_7, -1, 1, 0},
+/*  8 */ { 2, s_5_8, -1, 1, 0},
+/*  9 */ { 3, s_5_9, 8, 1, 0},
+/* 10 */ { 1, s_5_10, -1, 1, 0},
+/* 11 */ { 2, s_5_11, 10, 1, 0},
+/* 12 */ { 2, s_5_12, 10, 1, 0},
+/* 13 */ { 3, s_5_13, 10, 1, 0},
+/* 14 */ { 3, s_5_14, 10, 1, 0},
+/* 15 */ { 4, s_5_15, 14, 1, 0},
+/* 16 */ { 1, s_5_16, -1, 1, 0},
+/* 17 */ { 2, s_5_17, 16, 1, 0},
+/* 18 */ { 3, s_5_18, 17, 1, 0},
+/* 19 */ { 2, s_5_19, 16, 1, 0},
+/* 20 */ { 2, s_5_20, 16, 1, 0},
+/* 21 */ { 2, s_5_21, -1, 1, 0},
+/* 22 */ { 2, s_5_22, -1, 1, 0},
+/* 23 */ { 3, s_5_23, 22, 1, 0},
+/* 24 */ { 2, s_5_24, -1, 1, 0},
+/* 25 */ { 2, s_5_25, -1, 1, 0},
+/* 26 */ { 3, s_5_26, 25, 1, 0},
+/* 27 */ { 1, s_5_27, -1, 1, 0},
+/* 28 */ { 1, s_5_28, -1, 1, 0},
+/* 29 */ { 2, s_5_29, 28, 1, 0},
+/* 30 */ { 2, s_5_30, 28, 1, 0},
+/* 31 */ { 1, s_5_31, -1, 1, 0},
+/* 32 */ { 2, s_5_32, -1, 1, 0},
+/* 33 */ { 2, s_5_33, -1, 1, 0},
+/* 34 */ { 1, s_5_34, -1, 1, 0},
+/* 35 */ { 1, s_5_35, -1, 1, 0}
+};
+
+static symbol s_6_0[3] = { 207, 211, 212 };
+static symbol s_6_1[4] = { 207, 211, 212, 216 };
+
+static struct among a_6[2] =
+{
+/*  0 */ { 3, s_6_0, -1, 1, 0},
+/*  1 */ { 4, s_6_1, -1, 1, 0}
+};
+
+static symbol s_7_0[4] = { 197, 202, 219, 197 };
+static symbol s_7_1[1] = { 206 };
+static symbol s_7_2[1] = { 216 };
+static symbol s_7_3[3] = { 197, 202, 219 };
+
+static struct among a_7[4] =
+{
+/*  0 */ { 4, s_7_0, -1, 1, 0},
+/*  1 */ { 1, s_7_1, -1, 2, 0},
+/*  2 */ { 1, s_7_2, -1, 3, 0},
+/*  3 */ { 3, s_7_3, -1, 1, 0}
+};
+
+static unsigned char g_v[] = { 35, 130, 34, 18 };
+
+static symbol s_0[] = { 193 };
+static symbol s_1[] = { 209 };
+static symbol s_2[] = { 193 };
+static symbol s_3[] = { 209 };
+static symbol s_4[] = { 193 };
+static symbol s_5[] = { 209 };
+static symbol s_6[] = { 206 };
+static symbol s_7[] = { 206 };
+static symbol s_8[] = { 206 };
+static symbol s_9[] = { 201 };
+
+static int r_mark_regions(struct SN_env * z) {
+    z->I[0] = z->l;
+    z->I[1] = z->l;
+    {   int c = z->c; /* do, line 100 */
+        while(1) { /* gopast, line 101 */
+            if (!(in_grouping(z, g_v, 192, 220))) goto lab1;
+            break;
+        lab1:
+            if (z->c >= z->l) goto lab0;
+            z->c++;
+        }
+        z->I[0] = z->c; /* setmark pV, line 101 */
+        while(1) { /* gopast, line 101 */
+            if (!(out_grouping(z, g_v, 192, 220))) goto lab2;
+            break;
+        lab2:
+            if (z->c >= z->l) goto lab0;
+            z->c++;
+        }
+        while(1) { /* gopast, line 102 */
+            if (!(in_grouping(z, g_v, 192, 220))) goto lab3;
+            break;
+        lab3:
+            if (z->c >= z->l) goto lab0;
+            z->c++;
+        }
+        while(1) { /* gopast, line 102 */
+            if (!(out_grouping(z, g_v, 192, 220))) goto lab4;
+            break;
+        lab4:
+            if (z->c >= z->l) goto lab0;
+            z->c++;
+        }
+        z->I[1] = z->c; /* setmark p2, line 102 */
+    lab0:
+        z->c = c;
+    }
+    return 1;
+}
+
+static int r_R2(struct SN_env * z) {
+    if (!(z->I[1] <= z->c)) return 0;
+    return 1;
+}
+
+static int r_perfective_gerund(struct SN_env * z) {
+    int among_var;
+    z->ket = z->c; /* [, line 111 */
+    among_var = find_among_b(z, a_0, 9); /* substring, line 111 */
+    if (!(among_var)) return 0;
+    z->bra = z->c; /* ], line 111 */
+    switch(among_var) {
+        case 0: return 0;
+        case 1:
+            {   int m = z->l - z->c; /* or, line 115 */
+                if (!(eq_s_b(z, 1, s_0))) goto lab1;
+                goto lab0;
+            lab1:
+                z->c = z->l - m;
+                if (!(eq_s_b(z, 1, s_1))) return 0;
+            }
+        lab0:
+            slice_del(z); /* delete, line 115 */
+            break;
+        case 2:
+            slice_del(z); /* delete, line 122 */
+            break;
+    }
+    return 1;
+}
+
+static int r_adjective(struct SN_env * z) {
+    int among_var;
+    z->ket = z->c; /* [, line 127 */
+    among_var = find_among_b(z, a_1, 26); /* substring, line 127 */
+    if (!(among_var)) return 0;
+    z->bra = z->c; /* ], line 127 */
+    switch(among_var) {
+        case 0: return 0;
+        case 1:
+            slice_del(z); /* delete, line 136 */
+            break;
+    }
+    return 1;
+}
+
+static int r_adjectival(struct SN_env * z) {
+    int among_var;
+    if (!r_adjective(z)) return 0; /* call adjective, line 141 */
+    {   int m = z->l - z->c; /* try, line 148 */
+        z->ket = z->c; /* [, line 149 */
+        among_var = find_among_b(z, a_2, 8); /* substring, line 149 */
+        if (!(among_var)) { z->c = z->l - m; goto lab0; }
+        z->bra = z->c; /* ], line 149 */
+        switch(among_var) {
+            case 0: { z->c = z->l - m; goto lab0; }
+            case 1:
+                {   int m = z->l - z->c; /* or, line 154 */
+                    if (!(eq_s_b(z, 1, s_2))) goto lab2;
+                    goto lab1;
+                lab2:
+                    z->c = z->l - m;
+                    if (!(eq_s_b(z, 1, s_3))) { z->c = z->l - m; goto lab0; }
+                }
+            lab1:
+                slice_del(z); /* delete, line 154 */
+                break;
+            case 2:
+                slice_del(z); /* delete, line 161 */
+                break;
+        }
+    lab0:
+        ;
+    }
+    return 1;
+}
+
+static int r_reflexive(struct SN_env * z) {
+    int among_var;
+    z->ket = z->c; /* [, line 168 */
+    among_var = find_among_b(z, a_3, 2); /* substring, line 168 */
+    if (!(among_var)) return 0;
+    z->bra = z->c; /* ], line 168 */
+    switch(among_var) {
+        case 0: return 0;
+        case 1:
+            slice_del(z); /* delete, line 171 */
+            break;
+    }
+    return 1;
+}
+
+static int r_verb(struct SN_env * z) {
+    int among_var;
+    z->ket = z->c; /* [, line 176 */
+    among_var = find_among_b(z, a_4, 46); /* substring, line 176 */
+    if (!(among_var)) return 0;
+    z->bra = z->c; /* ], line 176 */
+    switch(among_var) {
+        case 0: return 0;
+        case 1:
+            {   int m = z->l - z->c; /* or, line 182 */
+                if (!(eq_s_b(z, 1, s_4))) goto lab1;
+                goto lab0;
+            lab1:
+                z->c = z->l - m;
+                if (!(eq_s_b(z, 1, s_5))) return 0;
+            }
+        lab0:
+            slice_del(z); /* delete, line 182 */
+            break;
+        case 2:
+            slice_del(z); /* delete, line 190 */
+            break;
+    }
+    return 1;
+}
+
+static int r_noun(struct SN_env * z) {
+    int among_var;
+    z->ket = z->c; /* [, line 199 */
+    among_var = find_among_b(z, a_5, 36); /* substring, line 199 */
+    if (!(among_var)) return 0;
+    z->bra = z->c; /* ], line 199 */
+    switch(among_var) {
+        case 0: return 0;
+        case 1:
+            slice_del(z); /* delete, line 206 */
+            break;
+    }
+    return 1;
+}
+
+static int r_derivational(struct SN_env * z) {
+    int among_var;
+    z->ket = z->c; /* [, line 215 */
+    among_var = find_among_b(z, a_6, 2); /* substring, line 215 */
+    if (!(among_var)) return 0;
+    z->bra = z->c; /* ], line 215 */
+    if (!r_R2(z)) return 0; /* call R2, line 215 */
+    switch(among_var) {
+        case 0: return 0;
+        case 1:
+            slice_del(z); /* delete, line 218 */
+            break;
+    }
+    return 1;
+}
+
+static int r_tidy_up(struct SN_env * z) {
+    int among_var;
+    z->ket = z->c; /* [, line 223 */
+    among_var = find_among_b(z, a_7, 4); /* substring, line 223 */
+    if (!(among_var)) return 0;
+    z->bra = z->c; /* ], line 223 */
+    switch(among_var) {
+        case 0: return 0;
+        case 1:
+            slice_del(z); /* delete, line 227 */
+            z->ket = z->c; /* [, line 228 */
+            if (!(eq_s_b(z, 1, s_6))) return 0;
+            z->bra = z->c; /* ], line 228 */
+            if (!(eq_s_b(z, 1, s_7))) return 0;
+            slice_del(z); /* delete, line 228 */
+            break;
+        case 2:
+            if (!(eq_s_b(z, 1, s_8))) return 0;
+            slice_del(z); /* delete, line 231 */
+            break;
+        case 3:
+            slice_del(z); /* delete, line 233 */
+            break;
+    }
+    return 1;
+}
+
+extern int russian_stem(struct SN_env * z) {
+    {   int c = z->c; /* do, line 240 */
+        if (!r_mark_regions(z)) goto lab0; /* call mark_regions, line 240 */
+    lab0:
+        z->c = c;
+    }
+    z->lb = z->c; z->c = z->l; /* backwards, line 241 */
+
+    {   int m = z->l - z->c; /* setlimit, line 241 */
+        int m3;
+        if (z->c < z->I[0]) return 0;
+        z->c = z->I[0]; /* tomark, line 241 */
+        m3 = z->lb; z->lb = z->c;
+        z->c = z->l - m;
+        {   int m = z->l - z->c; /* do, line 242 */
+            {   int m = z->l - z->c; /* or, line 243 */
+                if (!r_perfective_gerund(z)) goto lab3; /* call perfective_gerund, line 243 */
+                goto lab2;
+            lab3:
+                z->c = z->l - m;
+                {   int m = z->l - z->c; /* try, line 244 */
+                    if (!r_reflexive(z)) { z->c = z->l - m; goto lab4; } /* call reflexive, line 244 */
+                lab4:
+                    ;
+                }
+                {   int m = z->l - z->c; /* or, line 245 */
+                    if (!r_adjectival(z)) goto lab6; /* call adjectival, line 245 */
+                    goto lab5;
+                lab6:
+                    z->c = z->l - m;
+                    if (!r_verb(z)) goto lab7; /* call verb, line 245 */
+                    goto lab5;
+                lab7:
+                    z->c = z->l - m;
+                    if (!r_noun(z)) goto lab1; /* call noun, line 245 */
+                }
+            lab5:
+                ;
+            }
+        lab2:
+        lab1:
+            z->c = z->l - m;
+        }
+        {   int m = z->l - z->c; /* try, line 248 */
+            z->ket = z->c; /* [, line 248 */
+            if (!(eq_s_b(z, 1, s_9))) { z->c = z->l - m; goto lab8; }
+            z->bra = z->c; /* ], line 248 */
+            slice_del(z); /* delete, line 248 */
+        lab8:
+            ;
+        }
+        {   int m = z->l - z->c; /* do, line 251 */
+            if (!r_derivational(z)) goto lab9; /* call derivational, line 251 */
+        lab9:
+            z->c = z->l - m;
+        }
+        {   int m = z->l - z->c; /* do, line 252 */
+            if (!r_tidy_up(z)) goto lab10; /* call tidy_up, line 252 */
+        lab10:
+            z->c = z->l - m;
+        }
+        z->lb = m3;
+    }
+    z->c = z->lb;
+    return 1;
+}
+
+extern struct SN_env * russian_create_env(void) { return SN_create_env(0, 2, 0); }
+
+extern void russian_close_env(struct SN_env * z) { SN_close_env(z); }
+


diff --git a/contrib/tsearch2/snowball/russian_stem.h b/contrib/tsearch2/snowball/russian_stem.h

new file mode 100644 (file)

index 0000000..7dc26d4


--- /dev/null
+++ b/contrib/tsearch2/snowball/russian_stem.h
@@ -0,0 +1,8 @@
+
+/* This file was generated automatically by the Snowball to ANSI C compiler */
+
+extern struct SN_env * russian_create_env(void);
+extern void russian_close_env(struct SN_env * z);
+
+extern int russian_stem(struct SN_env * z);
+


diff --git a/contrib/tsearch2/snowball/utilities.c b/contrib/tsearch2/snowball/utilities.c

new file mode 100644 (file)

index 0000000..5dc7524


--- /dev/null
+++ b/contrib/tsearch2/snowball/utilities.c
@@ -0,0 +1,328 @@
+
+#include 
+#include 
+#include 
+
+#include "header.h"
+
+#define unless(C) if(!(C))
+
+#define CREATE_SIZE 1
+
+extern symbol * create_s(void)
+{   symbol * p = (symbol *) (HEAD + (char *) malloc(HEAD + (CREATE_SIZE + 1) * sizeof(symbol)));
+    CAPACITY(p) = CREATE_SIZE;
+    SET_SIZE(p, CREATE_SIZE);
+    return p;
+}
+
+extern void lose_s(symbol * p) { free((char *) p - HEAD); }
+
+extern int in_grouping(struct SN_env * z, unsigned char * s, int min, int max)
+{   if (z->c >= z->l) return 0;
+    {   int ch = z->p[z->c];
+        if
+        (ch > max || (ch -= min) < 0 ||
+         (s[ch >> 3] & (0X1 << (ch & 0X7))) == 0) return 0;
+    }
+    z->c++; return 1;
+}
+
+extern int in_grouping_b(struct SN_env * z, unsigned char * s, int min, int max)
+{   if (z->c <= z->lb) return 0;
+    {   int ch = z->p[z->c - 1];
+        if
+        (ch > max || (ch -= min) < 0 ||
+         (s[ch >> 3] & (0X1 << (ch & 0X7))) == 0) return 0;
+    }
+    z->c--; return 1;
+}
+
+extern int out_grouping(struct SN_env * z, unsigned char * s, int min, int max)
+{   if (z->c >= z->l) return 0;
+    {   int ch = z->p[z->c];
+        unless
+        (ch > max || (ch -= min) < 0 ||
+         (s[ch >> 3] & (0X1 << (ch & 0X7))) == 0) return 0;
+    }
+    z->c++; return 1;
+}
+
+extern int out_grouping_b(struct SN_env * z, unsigned char * s, int min, int max)
+{   if (z->c <= z->lb) return 0;
+    {   int ch = z->p[z->c - 1];
+        unless
+        (ch > max || (ch -= min) < 0 ||
+         (s[ch >> 3] & (0X1 << (ch & 0X7))) == 0) return 0;
+    }
+    z->c--; return 1;
+}
+
+
+extern int in_range(struct SN_env * z, int min, int max)
+{   if (z->c >= z->l) return 0;
+    {   int ch = z->p[z->c];
+        if
+        (ch > max || ch < min) return 0;
+    }
+    z->c++; return 1;
+}
+
+extern int in_range_b(struct SN_env * z, int min, int max)
+{   if (z->c <= z->lb) return 0;
+    {   int ch = z->p[z->c - 1];
+        if
+        (ch > max || ch < min) return 0;
+    }
+    z->c--; return 1;
+}
+
+extern int out_range(struct SN_env * z, int min, int max)
+{   if (z->c >= z->l) return 0;
+    {   int ch = z->p[z->c];
+        unless
+        (ch > max || ch < min) return 0;
+    }
+    z->c++; return 1;
+}
+
+extern int out_range_b(struct SN_env * z, int min, int max)
+{   if (z->c <= z->lb) return 0;
+    {   int ch = z->p[z->c - 1];
+        unless
+        (ch > max || ch < min) return 0;
+    }
+    z->c--; return 1;
+}
+
+extern int eq_s(struct SN_env * z, int s_size, symbol * s)
+{   if (z->l - z->c < s_size ||
+        memcmp(z->p + z->c, s, s_size * sizeof(symbol)) != 0) return 0;
+    z->c += s_size; return 1;
+}
+
+extern int eq_s_b(struct SN_env * z, int s_size, symbol * s)
+{   if (z->c - z->lb < s_size ||
+        memcmp(z->p + z->c - s_size, s, s_size * sizeof(symbol)) != 0) return 0;
+    z->c -= s_size; return 1;
+}
+
+extern int eq_v(struct SN_env * z, symbol * p)
+{   return eq_s(z, SIZE(p), p);
+}
+
+extern int eq_v_b(struct SN_env * z, symbol * p)
+{   return eq_s_b(z, SIZE(p), p);
+}
+
+extern int find_among(struct SN_env * z, struct among * v, int v_size)
+{
+    int i = 0;
+    int j = v_size;
+
+    int c = z->c; int l = z->l;
+    symbol * q = z->p + c;
+
+    struct among * w;
+
+    int common_i = 0;
+    int common_j = 0;
+
+    int first_key_inspected = 0;
+
+    while(1)
+    {   int k = i + ((j - i) >> 1);
+        int diff = 0;
+        int common = common_i < common_j ? common_i : common_j; /* smaller */
+        w = v + k;
+        {   int i; for (i = common; i < w->s_size; i++)
+            {   if (c + common == l) { diff = -1; break; }
+                diff = q[common] - w->s[i];
+                if (diff != 0) break;
+                common++;
+            }
+        }
+        if (diff < 0) { j = k; common_j = common; }
+                 else { i = k; common_i = common; }
+        if (j - i <= 1)
+        {   if (i > 0) break; /* v->s has been inspected */
+            if (j == i) break; /* only one item in v */
+
+            /* - but now we need to go round once more to get
+               v->s inspected. This looks messy, but is actually
+               the optimal approach.  */
+
+            if (first_key_inspected) break;
+            first_key_inspected = 1;
+        }
+    }
+    while(1)
+    {   w = v + i;
+        if (common_i >= w->s_size)
+        {   z->c = c + w->s_size;
+            if (w->function == 0) return w->result;
+            {   int res = w->function(z);
+                z->c = c + w->s_size;
+                if (res) return w->result;
+            }
+        }
+        i = w->substring_i;
+        if (i < 0) return 0;
+    }
+}
+
+/* find_among_b is for backwards processing. Same comments apply */
+
+extern int find_among_b(struct SN_env * z, struct among * v, int v_size)
+{
+    int i = 0;
+    int j = v_size;
+
+    int c = z->c; int lb = z->lb;
+    symbol * q = z->p + c - 1;
+
+    struct among * w;
+
+    int common_i = 0;
+    int common_j = 0;
+
+    int first_key_inspected = 0;
+
+    while(1)
+    {   int k = i + ((j - i) >> 1);
+        int diff = 0;
+        int common = common_i < common_j ? common_i : common_j;
+        w = v + k;
+        {   int i; for (i = w->s_size - 1 - common; i >= 0; i--)
+            {   if (c - common == lb) { diff = -1; break; }
+                diff = q[- common] - w->s[i];
+                if (diff != 0) break;
+                common++;
+            }
+        }
+        if (diff < 0) { j = k; common_j = common; }
+                 else { i = k; common_i = common; }
+        if (j - i <= 1)
+        {   if (i > 0) break;
+            if (j == i) break;
+            if (first_key_inspected) break;
+            first_key_inspected = 1;
+        }
+    }
+    while(1)
+    {   w = v + i;
+        if (common_i >= w->s_size)
+        {   z->c = c - w->s_size;
+            if (w->function == 0) return w->result;
+            {   int res = w->function(z);
+                z->c = c - w->s_size;
+                if (res) return w->result;
+            }
+        }
+        i = w->substring_i;
+        if (i < 0) return 0;
+    }
+}
+
+
+extern symbol * increase_size(symbol * p, int n)
+{   int new_size = n + 20;
+    symbol * q = (symbol *) (HEAD + (char *) malloc(HEAD + (new_size + 1) * sizeof(symbol)));
+    CAPACITY(q) = new_size;
+    memmove(q, p, CAPACITY(p) * sizeof(symbol)); lose_s(p); return q;
+}
+
+/* to replace symbols between c_bra and c_ket in z->p by the
+   s_size symbols at s
+*/
+
+extern int replace_s(struct SN_env * z, int c_bra, int c_ket, int s_size, const symbol * s)
+{   int adjustment = s_size - (c_ket - c_bra);
+    int len = SIZE(z->p);
+    if (adjustment != 0)
+    {   if (adjustment + len > CAPACITY(z->p)) z->p = increase_size(z->p, adjustment + len);
+        memmove(z->p + c_ket + adjustment, z->p + c_ket, (len - c_ket) * sizeof(symbol));
+        SET_SIZE(z->p, adjustment + len);
+        z->l += adjustment;
+        if (z->c >= c_ket) z->c += adjustment; else
+            if (z->c > c_bra) z->c = c_bra;
+    }
+    unless (s_size == 0) memmove(z->p + c_bra, s, s_size * sizeof(symbol));
+    return adjustment;
+}
+
+static void slice_check(struct SN_env * z)
+{
+    if (!(0 <= z->bra &&
+          z->bra <= z->ket &&
+          z->ket <= z->l &&
+          z->l <= SIZE(z->p)))   /* this line could be removed */
+    {
+        fprintf(stderr, "faulty slice operation:\n");
+        debug(z, -1, 0);
+        exit(1);
+    }
+}
+
+extern void slice_from_s(struct SN_env * z, int s_size, symbol * s)
+{   slice_check(z);
+    replace_s(z, z->bra, z->ket, s_size, s);
+}
+
+extern void slice_from_v(struct SN_env * z, symbol * p)
+{   slice_from_s(z, SIZE(p), p);
+}
+
+extern void slice_del(struct SN_env * z)
+{   slice_from_s(z, 0, 0);
+}
+
+extern void insert_s(struct SN_env * z, int bra, int ket, int s_size, symbol * s)
+{   int adjustment = replace_s(z, bra, ket, s_size, s);
+    if (bra <= z->bra) z->bra += adjustment;
+    if (bra <= z->ket) z->ket += adjustment;
+}
+
+extern void insert_v(struct SN_env * z, int bra, int ket, symbol * p)
+{   int adjustment = replace_s(z, bra, ket, SIZE(p), p);
+    if (bra <= z->bra) z->bra += adjustment;
+    if (bra <= z->ket) z->ket += adjustment;
+}
+
+extern symbol * slice_to(struct SN_env * z, symbol * p)
+{   slice_check(z);
+    {   int len = z->ket - z->bra;
+        if (CAPACITY(p) < len) p = increase_size(p, len);
+        memmove(p, z->p + z->bra, len * sizeof(symbol));
+        SET_SIZE(p, len);
+    }
+    return p;
+}
+
+extern symbol * assign_to(struct SN_env * z, symbol * p)
+{   int len = z->l;
+    if (CAPACITY(p) < len) p = increase_size(p, len);
+    memmove(p, z->p, len * sizeof(symbol));
+    SET_SIZE(p, len);
+    return p;
+}
+
+extern void debug(struct SN_env * z, int number, int line_count)
+{   int i;
+    int limit = SIZE(z->p);
+    /*if (number >= 0) printf("%3d (line %4d): '", number, line_count);*/
+    if (number >= 0) printf("%3d (line %4d): [%d]'", number, line_count,limit);
+    for (i = 0; i <= limit; i++)
+    {   if (z->lb == i) printf("{");
+        if (z->bra == i) printf("[");
+        if (z->c == i) printf("|");
+        if (z->ket == i) printf("]");
+        if (z->l == i) printf("}");
+        if (i < limit)
+        {   int ch = z->p[i];
+            if (ch == 0) ch = '#';
+            printf("%c", ch);
+        }
+    }
+    printf("'\n");
+}


diff --git a/contrib/tsearch2/sql/tsearch2.sql b/contrib/tsearch2/sql/tsearch2.sql

new file mode 100644 (file)

index 0000000..6ca6480


--- /dev/null
+++ b/contrib/tsearch2/sql/tsearch2.sql
@@ -0,0 +1,243 @@
+--
+-- first, define the datatype.  Turn off echoing so that expected file
+-- does not depend on contents of seg.sql.
+--
+\set ECHO none
+\i tsearch2.sql
+\set ECHO all
+
+--tsvector
+SELECT '1'::tsvector;
+SELECT '1 '::tsvector;
+SELECT ' 1'::tsvector;
+SELECT ' 1 '::tsvector;
+SELECT '1 2'::tsvector;
+SELECT '\'1 2\''::tsvector;
+SELECT '\'1 \\\'2\''::tsvector;
+SELECT '\'1 \\\'2\'3'::tsvector;
+SELECT '\'1 \\\'2\' 3'::tsvector;
+SELECT '\'1 \\\'2\' \' 3\' 4 '::tsvector;
+select '\'w\':4A,3B,2C,1D,5 a:8';
+select 'a:3A b:2a'::tsvector || 'ba:1234 a:1B';
+select setweight('w:12B w:13* w:12,5,6 a:1,3* a:3 w asd:1dc asd zxc:81,567,222A'::tsvector, 'c');
+select strip('w:12B w:13* w:12,5,6 a:1,3* a:3 w asd:1dc asd'::tsvector);
+
+
+--tsquery
+SELECT '1'::tsquery;
+SELECT '1 '::tsquery;
+SELECT ' 1'::tsquery;
+SELECT ' 1 '::tsquery;
+SELECT '\'1 2\''::tsquery;
+SELECT '\'1 \\\'2\''::tsquery;
+SELECT '!1'::tsquery;
+SELECT '1|2'::tsquery;
+SELECT '1|!2'::tsquery;
+SELECT '!1|2'::tsquery;
+SELECT '!1|!2'::tsquery;
+SELECT '!(!1|!2)'::tsquery;
+SELECT '!(!1|2)'::tsquery;
+SELECT '!(1|!2)'::tsquery;
+SELECT '!(1|2)'::tsquery;
+SELECT '1&2'::tsquery;
+SELECT '!1&2'::tsquery;
+SELECT '1&!2'::tsquery;
+SELECT '!1&!2'::tsquery;
+SELECT '(1&2)'::tsquery;
+SELECT '1&(2)'::tsquery;
+SELECT '!(1)&2'::tsquery;
+SELECT '!(1&2)'::tsquery;
+SELECT '1|2&3'::tsquery;
+SELECT '1|(2&3)'::tsquery;
+SELECT '(1|2)&3'::tsquery;
+SELECT '1|2&!3'::tsquery;
+SELECT '1|!2&3'::tsquery;
+SELECT '!1|2&3'::tsquery;
+SELECT '!1|(2&3)'::tsquery;
+SELECT '!(1|2)&3'::tsquery;
+SELECT '(!1|2)&3'::tsquery;
+SELECT '1|(2|(4|(5|6)))'::tsquery;
+SELECT '1|2|4|5|6'::tsquery;
+SELECT '1&(2&(4&(5&6)))'::tsquery;
+SELECT '1&2&4&5&6'::tsquery;
+SELECT '1&(2&(4&(5|6)))'::tsquery;
+SELECT '1&(2&(4&(5|!6)))'::tsquery;
+SELECT '1&(\'2\'&(\' 4\'&(\\|5 | \'6 \\\' !|&\')))'::tsquery;
+SELECT '\'the wether\':dc & \' sKies \':BC & a:d b:a';
+
+select lexize('simple', 'ASD56 hsdkf');
+select lexize('en_stem', 'SKIES Problems identity');
+
+select * from token_type('default');
+select * from parse('default', '345 [email protected] \' http://www.com/ http://aew.werc.ewr/?ad=qwe&dw 1aew.werc.ewr/?ad=qwe&dw 2aew.werc.ewr http://3aew.werc.ewr/?ad=qwe&dw http://4aew.werc.ewr http://5aew.werc.ewr:8100/?  ad=qwe&dw 6aew.werc.ewr:8100/?ad=qwe&dw 7aew.werc.ewr:8100/?ad=qwe&dw=%20%32 +4.0e-10 qwe qwe qwqwe 234.435 455 5.005 [email protected] qwe-wer asdf qwer jf sdjk ewr1> ewri2 ">
+/usr/local/fff /awdf/dwqe/4325 rewt/ewr wefjn /wqe-324/ewr gist.h gist.h.c gist.c. readline 4.2 4.2. 4.2, readline-4.2 readline-4.2. 234 
+ wow  < jqw <> qwerty');
+
+SELECT to_tsvector('default', '345 [email protected] \' http://www.com/ http://aew.werc.ewr/?ad=qwe&dw 1aew.werc.ewr/?ad=qwe&dw 2aew.werc.ewr http://3aew.werc.ewr/?ad=qwe&dw http://4aew.werc.ewr http://5aew.werc.ewr:8100/?  ad=qwe&dw 6aew.werc.ewr:8100/?ad=qwe&dw 7aew.werc.ewr:8100/?ad=qwe&dw=%20%32 +4.0e-10 qwe qwe qwqwe 234.435 455 5.005 [email protected] qwe-wer asdf qwer jf sdjk ewr1> ewri2 ">
+/usr/local/fff /awdf/dwqe/4325 rewt/ewr wefjn /wqe-324/ewr gist.h gist.h.c gist.c. readline 4.2 4.2. 4.2, readline-4.2 readline-4.2. 234 
+ wow  < jqw <> qwerty');
+
+SELECT length(to_tsvector('default', '345 qw'));
+
+SELECT length(to_tsvector('default', '345 [email protected] \' http://www.com/ http://aew.werc.ewr/?ad=qwe&dw 1aew.werc.ewr/?ad=qwe&dw 2aew.werc.ewr http://3aew.werc.ewr/?ad=qwe&dw http://4aew.werc.ewr http://5aew.werc.ewr:8100/?  ad=qwe&dw 6aew.werc.ewr:8100/?ad=qwe&dw 7aew.werc.ewr:8100/?ad=qwe&dw=%20%32 +4.0e-10 qwe qwe qwqwe 234.435 455 5.005 [email protected] qwe-wer asdf qwer jf sdjk ewr1> ewri2 ">
+/usr/local/fff /awdf/dwqe/4325 rewt/ewr wefjn /wqe-324/ewr gist.h gist.h.c gist.c. readline 4.2 4.2. 4.2, readline-4.2 readline-4.2. 234 
+ wow  < jqw <> qwerty'));
+
+
+select to_tsquery('default', 'qwe & sKies '); 
+select to_tsquery('simple', 'qwe & sKies '); 
+select to_tsquery('default', '\'the wether\':dc & \'           sKies \':BC ');
+select 'a b:89  ca:23A,64b d:34c'::tsvector @@ 'd:AC & ca';
+select 'a b:89  ca:23A,64b d:34c'::tsvector @@ 'd:AC & ca:B';
+select 'a b:89  ca:23A,64b d:34c'::tsvector @@ 'd:AC & ca:A';
+select 'a b:89  ca:23A,64b d:34c'::tsvector @@ 'd:AC & ca:C';
+select 'a b:89  ca:23A,64b d:34c'::tsvector @@ 'd:AC & ca:CB';
+
+CREATE TABLE test_tsvector( t text, a tsvector );
+
+\copy test_tsvector from 'data/test_tsearch.data'
+
+SELECT count(*) FROM test_tsvector WHERE a @@ 'wr|qh';
+SELECT count(*) FROM test_tsvector WHERE a @@ 'wr&qh';
+SELECT count(*) FROM test_tsvector WHERE a @@ 'eq&yt';
+SELECT count(*) FROM test_tsvector WHERE a @@ 'eq|yt';
+SELECT count(*) FROM test_tsvector WHERE a @@ '(eq&yt)|(wr&qh)';
+SELECT count(*) FROM test_tsvector WHERE a @@ '(eq|yt)&(wr|qh)';
+
+create index wowidx on test_tsvector using gist (a);
+set enable_seqscan=off;
+
+SELECT count(*) FROM test_tsvector WHERE a @@ 'wr|qh';
+SELECT count(*) FROM test_tsvector WHERE a @@ 'wr&qh';
+SELECT count(*) FROM test_tsvector WHERE a @@ 'eq&yt';
+SELECT count(*) FROM test_tsvector WHERE a @@ 'eq|yt';
+SELECT count(*) FROM test_tsvector WHERE a @@ '(eq&yt)|(wr&qh)';
+SELECT count(*) FROM test_tsvector WHERE a @@ '(eq|yt)&(wr|qh)';
+
+select set_curcfg('default');
+
+CREATE TRIGGER tsvectorupdate
+BEFORE UPDATE OR INSERT ON test_tsvector
+FOR EACH ROW EXECUTE PROCEDURE tsearch2(a, t);
+
+SELECT count(*) FROM test_tsvector WHERE a @@ to_tsquery('345&qwerty');
+
+INSERT INTO test_tsvector (t) VALUES ('345 qwerty');
+
+SELECT count(*) FROM test_tsvector WHERE a @@ to_tsquery('345&qwerty');
+
+UPDATE test_tsvector SET t = null WHERE t = '345 qwerty';
+
+SELECT count(*) FROM test_tsvector WHERE a @@ to_tsquery('345&qwerty');
+
+drop trigger tsvectorupdate on test_tsvector;
+create function wow(text) returns text as 'select $1 || \' copyright\'; ' language sql;
+create trigger tsvectorupdate before update or insert on test_tsvector
+for each row execute procedure tsearch2(a, wow, t);
+insert into test_tsvector (t) values ('345 qwerty');
+select count(*) FROM test_tsvector WHERE a @@ to_tsquery('345&qwerty');
+select count(*) FROM test_tsvector WHERE a @@ to_tsquery('copyright');
+
+select rank(' a:1 s:2C d g'::tsvector, 'a | s');
+select rank(' a:1 s:2B d g'::tsvector, 'a | s');
+select rank(' a:1 s:2 d g'::tsvector, 'a | s');
+select rank(' a:1 s:2C d g'::tsvector, 'a & s');
+select rank(' a:1 s:2B d g'::tsvector, 'a & s');
+select rank(' a:1 s:2 d g'::tsvector, 'a & s');
+
+insert into test_tsvector (t) values ('foo bar foo the over foo qq bar');
+select * from stat('select a from test_tsvector') order by ndoc desc, nentry desc, word;
+
+select reset_tsearch();
+select to_tsquery('default', 'skies & books');
+
+select rank_cd(to_tsvector('Erosion It took the sea a thousand years,
+A thousand years to trace
+The granite features of this cliff
+In crag and scarp and base.
+It took the sea an hour one night
+An hour of storm to place
+The sculpture of these granite seams,
+Upon a woman s face. E.  J.  Pratt  (1882 1964)
+'), to_tsquery('sea&thousand&years'));
+
+select rank_cd(to_tsvector('Erosion It took the sea a thousand years,
+A thousand years to trace
+The granite features of this cliff
+In crag and scarp and base.
+It took the sea an hour one night
+An hour of storm to place
+The sculpture of these granite seams,
+Upon a woman s face. E.  J.  Pratt  (1882 1964)
+'), to_tsquery('granite&sea'));
+
+select rank_cd(to_tsvector('Erosion It took the sea a thousand years,
+A thousand years to trace
+The granite features of this cliff
+In crag and scarp and base.
+It took the sea an hour one night
+An hour of storm to place
+The sculpture of these granite seams,
+Upon a woman s face. E.  J.  Pratt  (1882 1964)
+'), to_tsquery('sea'));
+
+select get_covers(to_tsvector('Erosion It took the sea a thousand years,
+A thousand years to trace
+The granite features of this cliff
+In crag and scarp and base.
+It took the sea an hour one night
+An hour of storm to place
+The sculpture of these granite seams,
+Upon a woman s face. E.  J.  Pratt  (1882 1964)
+'), to_tsquery('sea&thousand&years'));
+
+select get_covers(to_tsvector('Erosion It took the sea a thousand years,
+A thousand years to trace
+The granite features of this cliff
+In crag and scarp and base.
+It took the sea an hour one night
+An hour of storm to place
+The sculpture of these granite seams,
+Upon a woman s face. E.  J.  Pratt  (1882 1964)
+'), to_tsquery('granite&sea'));
+
+select get_covers(to_tsvector('Erosion It took the sea a thousand years,
+A thousand years to trace
+The granite features of this cliff
+In crag and scarp and base.
+It took the sea an hour one night
+An hour of storm to place
+The sculpture of these granite seams,
+Upon a woman s face. E.  J.  Pratt  (1882 1964)
+'), to_tsquery('sea'));
+
+select headline('Erosion It took the sea a thousand years,
+A thousand years to trace
+The granite features of this cliff
+In crag and scarp and base.
+It took the sea an hour one night
+An hour of storm to place
+The sculpture of these granite seams,
+Upon a woman s face. E.  J.  Pratt  (1882 1964)
+', to_tsquery('sea&thousand&years'));
+ 
+select headline('Erosion It took the sea a thousand years,
+A thousand years to trace
+The granite features of this cliff
+In crag and scarp and base.
+It took the sea an hour one night
+An hour of storm to place
+The sculpture of these granite seams,
+Upon a woman s face. E.  J.  Pratt  (1882 1964)
+', to_tsquery('granite&sea'));
+ 
+select headline('Erosion It took the sea a thousand years,
+A thousand years to trace
+The granite features of this cliff
+In crag and scarp and base.
+It took the sea an hour one night
+An hour of storm to place
+The sculpture of these granite seams,
+Upon a woman s face. E.  J.  Pratt  (1882 1964)
+', to_tsquery('sea'));
+


diff --git a/contrib/tsearch2/stopword.c b/contrib/tsearch2/stopword.c

new file mode 100644 (file)

index 0000000..7f7806f


--- /dev/null
+++ b/contrib/tsearch2/stopword.c
@@ -0,0 +1,101 @@
+/* 
+ * stopword library
+ * Teodor Sigaev 
+ */
+#include 
+#include 
+#include 
+#include 
+
+#include "postgres.h"
+#include "common.h"
+#include "dict.h"
+
+#define STOPBUFLEN 4096
+
+char*
+lowerstr(char *str) {
+   char *ptr=str;
+   while(*ptr) {
+       *ptr = tolower(*(unsigned char*)ptr);
+       ptr++;
+   }
+   return str;
+}
+
+void
+freestoplist(StopList *s) {
+   char **ptr=s->stop;
+   if ( ptr )
+       while( *ptr && s->len >0 ) {
+           free(*ptr);
+           ptr++; s->len--;
+       free(s->stop);
+   }
+   memset(s,0,sizeof(StopList));
+}
+
+void
+readstoplist(text *in, StopList *s) {
+   char **stop=NULL;
+   s->len=0;
+   if ( in && VARSIZE(in) - VARHDRSZ > 0 ) {
+       char *filename=text2char(in);
+       FILE    *hin=NULL;
+       char    buf[STOPBUFLEN];
+       int reallen=0;
+
+       if ( (hin=fopen(filename,"r")) == NULL )
+           elog(ERROR,"Can't open file '%s': %s", filename, strerror(errno));
+       while( fgets(buf,STOPBUFLEN,hin) ) {
+           buf[strlen(buf)-1] = '\0';
+           if ( *buf=='\0' ) continue;
+
+           if ( s->len>= reallen ) {
+               char **tmp;
+               reallen=(reallen) ? reallen*2 : 16;
+               tmp=(char**)realloc((void*)stop, sizeof(char*)*reallen);
+               if (!tmp) {
+                   freestoplist(s);
+                   fclose(hin); 
+                   elog(ERROR,"Not enough memory");
+               }
+               stop=tmp;
+           }
+    
+           stop[s->len]=strdup(buf);
+           if ( !stop[s->len] ) {
+               freestoplist(s);
+               fclose(hin); 
+               elog(ERROR,"Not enough memory");
+           }
+           if ( s->wordop ) 
+               stop[s->len]=(s->wordop)(stop[s->len]);
+
+           (s->len)++; 
+       }
+       fclose(hin);
+       pfree(filename); 
+   }
+   s->stop=stop;
+} 
+
+static int
+comparestr(const void *a, const void *b) {
+   return strcmp( *(char**)a, *(char**)b );
+}
+
+void
+sortstoplist(StopList *s) {
+   if (s->stop && s->len>0)
+       qsort(s->stop, s->len, sizeof(char*), comparestr);
+}
+
+bool
+searchstoplist(StopList *s, char *key) {
+   if ( s->wordop ) 
+       key=(*(s->wordop))(key);
+   return ( s->stop && s->len>0 && bsearch(&key, s->stop, s->len, sizeof(char*), comparestr) ) ? true : false;
+}
+
+


diff --git a/contrib/tsearch2/stopword/english.stop b/contrib/tsearch2/stopword/english.stop

new file mode 100644 (file)

index 0000000..a913011


--- /dev/null
+++ b/contrib/tsearch2/stopword/english.stop
@@ -0,0 +1,128 @@
+i
+me
+my
+myself
+we
+our
+ours
+ourselves
+you
+your
+yours
+yourself
+yourselves
+he
+him
+his
+himself
+she
+her
+hers
+herself
+it
+its
+itself
+they
+them
+their
+theirs
+themselves
+what
+which
+who
+whom
+this
+that
+these
+those
+am
+is
+are
+was
+were
+be
+been
+being
+have
+has
+had
+having
+do
+does
+did
+doing
+a
+an
+the
+and
+but
+if
+or
+because
+as
+until
+while
+of
+at
+by
+for
+with
+about
+against
+between
+into
+through
+during
+before
+after
+above
+below
+to
+from
+up
+down
+in
+out
+on
+off
+over
+under
+again
+further
+then
+once
+here
+there
+when
+where
+why
+how
+all
+any
+both
+each
+few
+more
+most
+other
+some
+such
+no
+nor
+not
+only
+own
+same
+so
+than
+too
+very
+s
+t
+can
+will
+just
+don
+should
+now
+


diff --git a/contrib/tsearch2/stopword/russian.stop b/contrib/tsearch2/stopword/russian.stop

new file mode 100644 (file)

index 0000000..1877e3a


--- /dev/null
+++ b/contrib/tsearch2/stopword/russian.stop
@@ -0,0 +1,151 @@
+É
+×
+×Ï
+ÎÅ
+ÞÔÏ
+ÏÎ
+ÎÁ
+Ñ
+Ó
+ÓÏ
+ËÁË
+Á
+ÔÏ
+×ÓÅ
+ÏÎÁ
+ÔÁË
+ÅÇÏ
+ÎÏ
+ÄÁ
+ÔÙ
+Ë
+Õ
+ÖÅ
+×Ù
+ÚÁ
+ÂÙ
+ÐÏ
+ÔÏÌØËÏ
+ÅÅ
+ÍÎÅ
+ÂÙÌÏ
+×ÏÔ
+ÏÔ
+ÍÅÎÑ
+ÅÝÅ
+ÎÅÔ
+Ï
+ÉÚ
+ÅÍÕ
+ÔÅÐÅÒØ
+ËÏÇÄÁ
+ÄÁÖÅ
+ÎÕ
+×ÄÒÕÇ
+ÌÉ
+ÅÓÌÉ
+ÕÖÅ
+ÉÌÉ
+ÎÉ
+ÂÙÔØ
+ÂÙÌ
+ÎÅÇÏ
+ÄÏ
+×ÁÓ
+ÎÉÂÕÄØ
+ÏÐÑÔØ
+ÕÖ
+×ÁÍ
+×ÅÄØ
+ÔÁÍ
+ÐÏÔÏÍ
+ÓÅÂÑ
+ÎÉÞÅÇÏ
+ÅÊ
+ÍÏÖÅÔ
+ÏÎÉ
+ÔÕÔ
+ÇÄÅ
+ÅÓÔØ
+ÎÁÄÏ
+ÎÅÊ
+ÄÌÑ
+ÍÙ
+ÔÅÂÑ
+ÉÈ
+ÞÅÍ
+ÂÙÌÁ
+ÓÁÍ
+ÞÔÏÂ
+ÂÅÚ
+ÂÕÄÔÏ
+ÞÅÇÏ
+ÒÁÚ
+ÔÏÖÅ
+ÓÅÂÅ
+ÐÏÄ
+ÂÕÄÅÔ
+Ö
+ÔÏÇÄÁ
+ËÔÏ
+ÜÔÏÔ
+ÔÏÇÏ
+ÐÏÔÏÍÕ
+ÜÔÏÇÏ
+ËÁËÏÊ
+ÓÏ×ÓÅÍ
+ÎÉÍ
+ÚÄÅÓØ
+ÜÔÏÍ
+ÏÄÉÎ
+ÐÏÞÔÉ
+ÍÏÊ
+ÔÅÍ
+ÞÔÏÂÙ
+ÎÅÅ
+ÓÅÊÞÁÓ
+ÂÙÌÉ
+ËÕÄÁ
+ÚÁÞÅÍ
+×ÓÅÈ
+ÎÉËÏÇÄÁ
+ÍÏÖÎÏ
+ÐÒÉ
+ÎÁËÏÎÅÃ
+Ä×Á
+ÏÂ
+ÄÒÕÇÏÊ
+ÈÏÔØ
+ÐÏÓÌÅ
+ÎÁÄ
+ÂÏÌØÛÅ
+ÔÏÔ
+ÞÅÒÅÚ
+ÜÔÉ
+ÎÁÓ
+ÐÒÏ
+×ÓÅÇÏ
+ÎÉÈ
+ËÁËÁÑ
+ÍÎÏÇÏ
+ÒÁÚ×Å
+ÔÒÉ
+ÜÔÕ
+ÍÏÑ
+×ÐÒÏÞÅÍ
+ÈÏÒÏÛÏ
+Ó×ÏÀ
+ÜÔÏÊ
+ÐÅÒÅÄ
+ÉÎÏÇÄÁ
+ÌÕÞÛÅ
+ÞÕÔØ
+ÔÏÍ
+ÎÅÌØÚÑ
+ÔÁËÏÊ
+ÉÍ
+ÂÏÌÅÅ
+×ÓÅÇÄÁ
+ËÏÎÅÞÎÏ
+×ÓÀ
+ÍÅÖÄÕ


diff --git a/contrib/tsearch2/ts_cfg.c b/contrib/tsearch2/ts_cfg.c

new file mode 100644 (file)

index 0000000..7c9f20c


--- /dev/null
+++ b/contrib/tsearch2/ts_cfg.c
@@ -0,0 +1,509 @@
+/* 
+ * interface functions to tscfg 
+ * Teodor Sigaev 
+ */
+#include 
+#include 
+#include 
+#include 
+#include 
+
+#include "postgres.h"
+#include "fmgr.h"
+#include "utils/array.h"
+#include "catalog/pg_type.h"
+#include "executor/spi.h"
+
+#include "ts_cfg.h"
+#include "dict.h"
+#include "wparser.h"
+#include "snmap.h"
+#include "common.h"
+#include "tsvector.h"
+
+/*********top interface**********/
+
+static void *plan_getcfg_bylocale=NULL;
+static void *plan_getcfg=NULL;
+static void *plan_getmap=NULL;
+static void *plan_name2id=NULL;
+static Oid current_cfg_id=0;
+
+void
+init_cfg(Oid id, TSCfgInfo *cfg) {
+   Oid arg[2]={ OIDOID, OIDOID };
+   bool isnull;
+   Datum pars[2]={ ObjectIdGetDatum(id), ObjectIdGetDatum(id) } ;
+   int stat,i,j;
+   text *ptr;
+   text *prsname=NULL;
+   MemoryContext   oldcontext;
+
+   memset(cfg,0,sizeof(TSCfgInfo));
+   SPI_connect();
+   if ( !plan_getcfg ) {
+       plan_getcfg = SPI_saveplan( SPI_prepare( "select prs_name from pg_ts_cfg where oid = $1" , 1, arg ) );
+       if ( !plan_getcfg ) 
+           ts_error(ERROR, "SPI_prepare() failed");
+   }
+
+   stat = SPI_execp(plan_getcfg, pars, " ", 1);
+   if ( stat < 0 )
+       ts_error (ERROR, "SPI_execp return %d", stat);
+   if ( SPI_processed > 0 ) {
+       prsname = (text*) DatumGetPointer( 
+           SPI_getbinval(SPI_tuptable->vals[0], SPI_tuptable->tupdesc, 1, &isnull) 
+       );
+       oldcontext = MemoryContextSwitchTo(TopMemoryContext);
+       prsname = ptextdup( prsname );
+       MemoryContextSwitchTo(oldcontext);
+       
+       cfg->id=id;
+   } else 
+       ts_error(ERROR, "No tsearch cfg with id %d", id);
+
+   arg[0]=TEXTOID;
+   if ( !plan_getmap ) {
+       plan_getmap = SPI_saveplan( SPI_prepare( "select lt.tokid, pg_ts_cfgmap.dict_name from pg_ts_cfgmap, pg_ts_cfg, token_type( $1 ) as lt where lt.alias = pg_ts_cfgmap.tok_alias and pg_ts_cfgmap.ts_name = pg_ts_cfg.ts_name and pg_ts_cfg.oid= $2 order by lt.tokid desc;" , 2, arg ) );
+       if ( !plan_getmap )
+           ts_error(ERROR, "SPI_prepare() failed");
+   }
+
+   pars[0]=PointerGetDatum( prsname );
+   stat = SPI_execp(plan_getmap, pars, " ", 0);
+   if ( stat < 0 )
+       ts_error (ERROR, "SPI_execp return %d", stat);
+   if ( SPI_processed <= 0 )
+       ts_error(ERROR, "No parser with id %d", id);
+
+   for(i=0;i
+       int lexid = DatumGetInt32(SPI_getbinval(SPI_tuptable->vals[i], SPI_tuptable->tupdesc, 1, &isnull));
+       ArrayType *toasted_a = (ArrayType*)PointerGetDatum(SPI_getbinval(SPI_tuptable->vals[i], SPI_tuptable->tupdesc, 2, &isnull));
+       ArrayType *a;
+
+       if ( !cfg->map ) {
+           cfg->len=lexid+1;
+           cfg->map = (ListDictionary*)malloc( sizeof(ListDictionary)*cfg->len );
+           if ( !cfg->map )
+               ts_error(ERROR,"No memory");
+           memset( cfg->map, 0, sizeof(ListDictionary)*cfg->len );
+       }
+
+       if (isnull)
+           continue;
+
+       a=(ArrayType*)PointerGetDatum( PG_DETOAST_DATUM( DatumGetPointer(toasted_a) ) );
+       
+       if ( ARR_NDIM(a) != 1 )
+           ts_error(ERROR,"Wrong dimension");
+       if ( ARRNELEMS(a) < 1 )
+           continue;
+
+       cfg->map[lexid].len=ARRNELEMS(a);
+       cfg->map[lexid].dict_id=(Datum*)malloc( sizeof(Datum)*cfg->map[lexid].len );
+       memset(cfg->map[lexid].dict_id,0,sizeof(Datum)*cfg->map[lexid].len );
+       ptr=(text*)ARR_DATA_PTR(a);
+       oldcontext = MemoryContextSwitchTo(TopMemoryContext);
+       for(j=0;jmap[lexid].len;j++) {
+           cfg->map[lexid].dict_id[j] = PointerGetDatum(ptextdup(ptr));
+           ptr=NEXTVAL(ptr);
+       } 
+       MemoryContextSwitchTo(oldcontext);
+
+       if ( a != toasted_a ) 
+           pfree(a);
+   }
+   
+   SPI_finish();
+   cfg->prs_id = name2id_prs( prsname );
+   pfree(prsname);
+   for(i=0;ilen;i++) {
+       for(j=0;jmap[i].len;j++) {
+           ptr = (text*)DatumGetPointer( cfg->map[i].dict_id[j] );
+           cfg->map[i].dict_id[j] = ObjectIdGetDatum( name2id_dict(ptr) );
+           pfree(ptr);
+       }
+   }
+}
+
+typedef struct {
+   TSCfgInfo   *last_cfg;
+   int     len;
+   int     reallen;
+   TSCfgInfo   *list;
+   SNMap       name2id_map;
+} CFGList;
+
+static CFGList CList = {NULL,0,0,NULL,{0,0,NULL}};
+
+void
+reset_cfg(void) {
+        freeSNMap( &(CList.name2id_map) );
+        if ( CList.list ) {
+       int i,j;
+       for(i=0;i
+           if ( CList.list[i].map ) {
+               for(j=0;j
+                   if ( CList.list[i].map[j].dict_id )
+                       free(CList.list[i].map[j].dict_id);
+               free( CList.list[i].map );
+           }
+                free(CList.list);
+   }
+        memset(&CList,0,sizeof(CFGList));
+}
+
+static int
+comparecfg(const void *a, const void *b) {
+   return ((TSCfgInfo*)a)->id - ((TSCfgInfo*)b)->id;
+}
+
+TSCfgInfo *
+findcfg(Oid id) {
+   /* last used cfg */
+   if ( CList.last_cfg && CList.last_cfg->id==id )
+       return CList.last_cfg;
+
+   /* already used cfg */
+   if ( CList.len != 0 ) {
+       TSCfgInfo key;
+       key.id=id;
+       CList.last_cfg = bsearch(&key, CList.list, CList.len, sizeof(TSCfgInfo), comparecfg);
+       if ( CList.last_cfg != NULL )
+           return CList.last_cfg;
+   }
+
+   /* last chance */
+   if ( CList.len==CList.reallen ) {
+       TSCfgInfo *tmp;
+       int reallen = ( CList.reallen ) ? 2*CList.reallen : 16;
+       tmp=(TSCfgInfo*)realloc(CList.list,sizeof(TSCfgInfo)*reallen);
+       if ( !tmp ) 
+           ts_error(ERROR,"No memory");
+       CList.reallen=reallen;
+       CList.list=tmp;
+   }
+   CList.last_cfg=&(CList.list[CList.len]);
+   init_cfg(id, CList.last_cfg);
+   CList.len++;
+   qsort(CList.list, CList.len, sizeof(TSCfgInfo), comparecfg);
+   return findcfg(id); /* qsort changed order!! */;
+}
+
+
+Oid
+name2id_cfg(text *name) {
+   Oid arg[1]={ TEXTOID };
+   bool isnull;
+   Datum pars[1]={ PointerGetDatum(name) };
+   int stat;
+   Oid id=findSNMap_t( &(CList.name2id_map), name );
+   
+   if ( id ) 
+       return id;
+   
+   SPI_connect();
+   if ( !plan_name2id ) {
+       plan_name2id = SPI_saveplan( SPI_prepare( "select oid from pg_ts_cfg where ts_name = $1" , 1, arg ) );
+       if ( !plan_name2id ) 
+           elog(ERROR, "SPI_prepare() failed");
+   }
+
+   stat = SPI_execp(plan_name2id, pars, " ", 1);
+   if ( stat < 0 )
+       elog (ERROR, "SPI_execp return %d", stat);
+   if ( SPI_processed > 0 ) {
+       id=DatumGetObjectId( SPI_getbinval(SPI_tuptable->vals[0], SPI_tuptable->tupdesc, 1, &isnull) );
+       if ( isnull ) 
+           elog(ERROR, "Null id for tsearch config");
+   } else 
+       elog(ERROR, "No tsearch config");
+   SPI_finish();
+   addSNMap_t( &(CList.name2id_map), name, id );
+   return id;
+}
+
+
+void 
+parsetext_v2(TSCfgInfo *cfg, PRSTEXT * prs, char *buf, int4 buflen) {
+   int type, lenlemm, i;
+   char    *lemm=NULL;
+   WParserInfo *prsobj = findprs(cfg->prs_id);
+
+   prsobj->prs=(void*)DatumGetPointer(
+       FunctionCall2(
+           &(prsobj->start_info),
+           PointerGetDatum(buf),
+           Int32GetDatum(buflen)
+       )
+   );
+
+   while( ( type=DatumGetInt32(FunctionCall3(
+           &(prsobj->getlexeme_info),
+           PointerGetDatum(prsobj->prs),
+           PointerGetDatum(&lemm),
+           PointerGetDatum(&lenlemm))) ) != 0 ) {
+
+       if ( lenlemm >= MAXSTRLEN )
+           elog(ERROR, "Word is too long");
+
+
+       if ( type >= cfg->len ) /* skip this type of lexem */
+           continue; 
+
+       for(i=0;imap[type].len;i++) {
+           DictInfo    *dict=finddict( DatumGetObjectId(cfg->map[type].dict_id[i]) );
+           char    **norms, **ptr;
+   
+           norms = ptr = (char**)DatumGetPointer(
+               FunctionCall3(
+                   &(dict->lexize_info),
+                   PointerGetDatum(dict->dictionary),
+                   PointerGetDatum(lemm),
+                   PointerGetDatum(lenlemm)
+               )
+           );
+           if ( !norms ) /* dictionary doesn't know this lexem */
+               continue;
+
+           prs->pos++; /*set pos*/
+
+           while( *ptr ) {
+               if (prs->curwords == prs->lenwords) {
+                   prs->lenwords *= 2;
+                   prs->words = (WORD *) repalloc((void *) prs->words, prs->lenwords * sizeof(WORD));
+               }
+
+               prs->words[prs->curwords].len = strlen(*ptr);
+               prs->words[prs->curwords].word = *ptr;
+               prs->words[prs->curwords].alen = 0;
+               prs->words[prs->curwords].pos.pos = LIMITPOS(prs->pos);
+               ptr++;
+               prs->curwords++;
+           }
+           pfree(norms);
+           break; /* lexem already normalized or is stop word*/
+       }
+   }
+
+   FunctionCall1(
+       &(prsobj->end_info),
+       PointerGetDatum(prsobj->prs)
+   );
+}
+
+static void
+hladdword(HLPRSTEXT * prs, char *buf, int4 buflen, int type) {
+   while (prs->curwords >= prs->lenwords) {
+       prs->lenwords *= 2;
+       prs->words = (HLWORD *) repalloc((void *) prs->words, prs->lenwords * sizeof(HLWORD));
+   }
+   memset( &(prs->words[prs->curwords]), 0, sizeof(HLWORD) ); 
+   prs->words[prs->curwords].type = (uint8)type;
+   prs->words[prs->curwords].len = buflen; 
+   prs->words[prs->curwords].word = palloc(buflen);
+   memcpy(prs->words[prs->curwords].word, buf, buflen);
+   prs->curwords++;    
+}
+
+static void
+hlfinditem(HLPRSTEXT * prs, QUERYTYPE *query, char *buf, int buflen ) {
+   int i;
+   ITEM    *item=GETQUERY(query);
+   HLWORD  *word=&( prs->words[prs->curwords-1] );
+
+   while (prs->curwords + query->size >= prs->lenwords) {
+       prs->lenwords *= 2;
+       prs->words = (HLWORD *) repalloc((void *) prs->words, prs->lenwords * sizeof(HLWORD));
+   }
+
+   for(i=0; isize; i++) { 
+       if ( item->type == VAL && item->length == buflen && strncmp( GETOPERAND(query) + item->distance, buf, buflen )==0 ) {
+           if ( word->item ) {
+               memcpy( &(prs->words[prs->curwords]), word, sizeof(HLWORD) );
+               prs->words[prs->curwords].item=item;
+               prs->words[prs->curwords].repeated=1;
+               prs->curwords++;
+           } else 
+               word->item=item;    
+       }
+       item++;
+   }
+}
+
+void 
+hlparsetext(TSCfgInfo *cfg, HLPRSTEXT * prs, QUERYTYPE *query, char *buf, int4 buflen) {
+   int type, lenlemm, i;
+   char    *lemm=NULL;
+   WParserInfo *prsobj = findprs(cfg->prs_id);
+
+   prsobj->prs=(void*)DatumGetPointer(
+       FunctionCall2(
+           &(prsobj->start_info),
+           PointerGetDatum(buf),
+           Int32GetDatum(buflen)
+       )
+   );
+
+   while( ( type=DatumGetInt32(FunctionCall3(
+           &(prsobj->getlexeme_info),
+           PointerGetDatum(prsobj->prs),
+           PointerGetDatum(&lemm),
+           PointerGetDatum(&lenlemm))) ) != 0 ) {
+
+       if ( lenlemm >= MAXSTRLEN )
+           elog(ERROR, "Word is too long");
+
+       hladdword(prs,lemm,lenlemm,type);
+
+       if ( type >= cfg->len ) 
+           continue; 
+
+       for(i=0;imap[type].len;i++) {
+           DictInfo    *dict=finddict( DatumGetObjectId(cfg->map[type].dict_id[i]) );
+           char    **norms, **ptr;
+   
+           norms = ptr = (char**)DatumGetPointer(
+               FunctionCall3(
+                   &(dict->lexize_info),
+                   PointerGetDatum(dict->dictionary),
+                   PointerGetDatum(lemm),
+                   PointerGetDatum(lenlemm)
+               )
+           );
+           if ( !norms ) /* dictionary doesn't know this lexem */
+               continue;
+
+           while( *ptr ) {
+               hlfinditem(prs,query,*ptr,strlen(*ptr));
+               pfree(*ptr);
+               ptr++;
+           }
+           pfree(norms);
+           break; /* lexem already normalized or is stop word*/
+       }
+   }
+
+   FunctionCall1(
+       &(prsobj->end_info),
+       PointerGetDatum(prsobj->prs)
+   );
+}
+
+text* 
+genhl(HLPRSTEXT * prs) {
+   text *out;
+   int len=128;
+   char *ptr;
+   HLWORD  *wrd=prs->words;
+
+   out = (text*)palloc( len );
+   ptr=((char*)out) + VARHDRSZ;
+
+   while( wrd - prs->words < prs->curwords ) {
+       while (  wrd->len + prs->stopsellen + prs->startsellen + (ptr - ((char*)out)) >= len ) {
+           int dist = ptr - ((char*)out);
+           len*= 2;
+           out = (text *) repalloc(out, len);
+           ptr=((char*)out) + dist;
+       }
+
+       if ( wrd->in && !wrd->skip && !wrd->repeated ) {
+           if ( wrd->replace ) {
+               *ptr=' ';
+               ptr++;
+           } else {
+               if (wrd->selected) {
+                   memcpy(ptr,prs->startsel,prs->startsellen);
+                   ptr+=prs->startsellen;
+               }
+               memcpy(ptr,wrd->word,wrd->len);
+               ptr+=wrd->len;
+               if (wrd->selected) {
+                   memcpy(ptr,prs->stopsel,prs->stopsellen);
+                   ptr+=prs->stopsellen;
+               }
+           }
+       }
+
+       if ( !wrd->repeated )
+           pfree(wrd->word);
+
+       wrd++;
+   }
+
+   VARATT_SIZEP(out)=ptr - ((char*)out);
+   return out; 
+}
+
+int  
+get_currcfg(void) {
+   Oid arg[1]={ TEXTOID };
+   const char *curlocale;
+   Datum pars[1];
+   bool isnull;
+   int stat;
+
+   if ( current_cfg_id > 0 )
+       return current_cfg_id;
+
+   SPI_connect();
+   if ( !plan_getcfg_bylocale ) {
+       plan_getcfg_bylocale=SPI_saveplan( SPI_prepare( "select oid from pg_ts_cfg where locale = $1 ", 1, arg ) );
+       if ( !plan_getcfg_bylocale )
+           elog(ERROR, "SPI_prepare() failed");
+   }
+
+   curlocale = setlocale(LC_CTYPE, NULL);
+   pars[0] = PointerGetDatum( char2text((char*)curlocale) );
+   stat = SPI_execp(plan_getcfg_bylocale, pars, " ", 1);
+
+   if ( stat < 0 )
+       elog (ERROR, "SPI_execp return %d", stat);
+   if ( SPI_processed > 0 )
+       current_cfg_id = DatumGetObjectId( SPI_getbinval(SPI_tuptable->vals[0], SPI_tuptable->tupdesc, 1, &isnull) );
+   else 
+       elog(ERROR,"Can't find tsearch config by locale");
+
+   pfree(DatumGetPointer(pars[0]));
+   SPI_finish();
+   return current_cfg_id;
+}
+
+PG_FUNCTION_INFO_V1(set_curcfg);
+Datum set_curcfg(PG_FUNCTION_ARGS);
+Datum
+set_curcfg(PG_FUNCTION_ARGS) {
+        findcfg(PG_GETARG_OID(0));
+        current_cfg_id=PG_GETARG_OID(0);
+        PG_RETURN_VOID();
+}
+                
+PG_FUNCTION_INFO_V1(set_curcfg_byname);
+Datum set_curcfg_byname(PG_FUNCTION_ARGS);
+Datum
+set_curcfg_byname(PG_FUNCTION_ARGS) {
+        text *name=PG_GETARG_TEXT_P(0);
+   
+        DirectFunctionCall1(
+                set_curcfg,
+                ObjectIdGetDatum( name2id_cfg(name) )
+        );
+        PG_FREE_IF_COPY(name, 0);
+        PG_RETURN_VOID();      
+}       
+
+PG_FUNCTION_INFO_V1(show_curcfg);
+Datum show_curcfg(PG_FUNCTION_ARGS);
+Datum
+show_curcfg(PG_FUNCTION_ARGS) {
+   PG_RETURN_OID( get_currcfg() ); 
+}
+
+PG_FUNCTION_INFO_V1(reset_tsearch);
+Datum reset_tsearch(PG_FUNCTION_ARGS);
+Datum
+reset_tsearch(PG_FUNCTION_ARGS) {
+   ts_error(NOTICE,"TSearch cache cleaned");
+   PG_RETURN_VOID(); 
+}


diff --git a/contrib/tsearch2/ts_cfg.h b/contrib/tsearch2/ts_cfg.h

new file mode 100644 (file)

index 0000000..01006c1


--- /dev/null
+++ b/contrib/tsearch2/ts_cfg.h
@@ -0,0 +1,68 @@
+#ifndef __TS_CFG_H__
+#define __TS_CFG_H__
+#include "postgres.h"
+#include "query.h"
+
+typedef struct {
+   int len;
+   Datum   *dict_id;
+} ListDictionary;
+
+typedef struct {
+   Oid id;
+   Oid prs_id;
+   int len;
+   ListDictionary  *map;   
+}  TSCfgInfo;
+
+Oid name2id_cfg(text *name);
+TSCfgInfo * findcfg(Oid id);
+void init_cfg(Oid id, TSCfgInfo *cfg);
+void reset_cfg(void);
+
+typedef struct {
+        uint16          len;
+   union {
+       uint16      pos;
+       uint16      *apos;
+   } pos;
+        char       *word;
+   uint32  alen;
+}       WORD;
+   
+typedef struct {
+        WORD       *words;
+        int4            lenwords;
+        int4            curwords;
+   int4        pos;
+}       PRSTEXT;
+
+typedef struct {
+        uint16    len;
+   uint8    selected:1,
+         in:1,
+         skip:1,
+         replace:1,
+         repeated:1;
+   uint8   type;
+        char      *word;
+   ITEM      *item;
+}       HLWORD;
+   
+typedef struct {
+        HLWORD       *words;
+        int4            lenwords;
+        int4            curwords;
+        char           *startsel;
+        char            *stopsel;
+        int2            startsellen;
+        int2            stopsellen;
+}       HLPRSTEXT;
+
+void hlparsetext(TSCfgInfo *cfg, HLPRSTEXT * prs, QUERYTYPE *query, char *buf, int4 buflen);
+text* genhl(HLPRSTEXT * prs);
+
+void parsetext_v2(TSCfgInfo *cfg, PRSTEXT * prs, char *buf, int4 buflen);
+int  get_currcfg(void);
+
+#endif


diff --git a/contrib/tsearch2/ts_stat.c b/contrib/tsearch2/ts_stat.c

new file mode 100644 (file)

index 0000000..9099981


--- /dev/null
+++ b/contrib/tsearch2/ts_stat.c
@@ -0,0 +1,412 @@
+/*
+ * stat functions
+ */
+
+#include "tsvector.h"
+#include "ts_stat.h"
+#include "funcapi.h"
+#include "catalog/pg_type.h"
+#include "executor/spi.h"
+#include "common.h"
+
+PG_FUNCTION_INFO_V1(tsstat_in);
+Datum           tsstat_in(PG_FUNCTION_ARGS);
+Datum           
+tsstat_in(PG_FUNCTION_ARGS) {
+   tsstat *stat=palloc(STATHDRSIZE);
+   stat->len=STATHDRSIZE;
+   stat->size=0;
+   PG_RETURN_POINTER(stat);
+}
+
+PG_FUNCTION_INFO_V1(tsstat_out);
+Datum           tsstat_out(PG_FUNCTION_ARGS);
+Datum           
+tsstat_out(PG_FUNCTION_ARGS) {
+   elog(ERROR,"Unimplemented");
+   PG_RETURN_NULL();
+}
+
+static WordEntry**
+SEI_realloc( WordEntry** in, uint32 *len ) {
+   if ( *len==0 || in==NULL ) {
+       *len=8;
+       in=palloc( sizeof(WordEntry*)* (*len) );
+   } else {
+       *len *= 2;
+       in=repalloc( in, sizeof(WordEntry*)* (*len) );
+   }
+   return in;
+}
+
+static int
+compareStatWord(StatEntry *a, WordEntry *b, tsstat *stat, tsvector *txt) {
+   if ( a->len == b->len ) 
+       return strncmp(
+           STATSTRPTR(stat) + a->pos,
+           STRPTR(txt) + b->pos,
+           a->len
+       );
+   return ( a->len > b->len ) ? 1 : -1;
+}
+
+static tsstat*
+formstat(tsstat *stat, tsvector *txt, WordEntry** entry, uint32 len) {
+   tsstat  *newstat;
+   uint32 totallen, nentry;
+   uint32  slen=0;
+   WordEntry   **ptr=entry;
+   char    *curptr;
+   StatEntry   *sptr,*nptr;
+
+   while(ptr-entry
+       slen += (*ptr)->len;
+       ptr++;
+   }
+
+   nentry=stat->size + len;
+   slen+=STATSTRSIZE(stat);
+   totallen=CALCSTATSIZE(nentry,slen);
+   newstat=palloc(totallen);
+   newstat->len=totallen;
+   newstat->size=nentry;
+
+   memcpy(STATSTRPTR(newstat), STATSTRPTR(stat), STATSTRSIZE(stat));
+   curptr=STATSTRPTR(newstat) + STATSTRSIZE(stat);
+
+   ptr=entry;
+   sptr=STATPTR(stat);
+   nptr=STATPTR(newstat);
+
+   if ( len == 1 ) {
+       StatEntry *StopLow = STATPTR(stat);
+       StatEntry *StopHigh = (StatEntry*)STATSTRPTR(stat);
+
+       while (StopLow < StopHigh) {
+           sptr=StopLow + (StopHigh - StopLow) / 2;
+           if ( compareStatWord(sptr,*ptr,stat,txt) < 0 )
+               StopLow = sptr + 1;
+           else
+               StopHigh = sptr; 
+       }
+       nptr =STATPTR(newstat) + (StopLow-STATPTR(stat));
+       memcpy( STATPTR(newstat), STATPTR(stat), sizeof(StatEntry) * (StopLow-STATPTR(stat)) );
+       nptr->nentry=POSDATALEN(txt,*ptr);
+       if ( nptr->nentry==0 )
+               nptr->nentry=1; 
+       nptr->ndoc=1;
+       nptr->len=(*ptr)->len;
+       memcpy(curptr, STRPTR(txt) + (*ptr)->pos, nptr->len);
+       nptr->pos = curptr - STATSTRPTR(newstat);
+       memcpy( nptr+1, StopLow, sizeof(StatEntry) * ( ((StatEntry*)STATSTRPTR(stat))-StopLow ) );
+   } else {
+       while( sptr-STATPTR(stat) < stat->size && ptr-entry
+           if ( compareStatWord(sptr,*ptr,stat,txt) < 0 ) {
+               memcpy(nptr, sptr, sizeof(StatEntry));
+               sptr++;
+           } else {
+               nptr->nentry=POSDATALEN(txt,*ptr);
+               if ( nptr->nentry==0 )
+                   nptr->nentry=1; 
+               nptr->ndoc=1;
+               nptr->len=(*ptr)->len;
+               memcpy(curptr, STRPTR(txt) + (*ptr)->pos, nptr->len);
+               nptr->pos = curptr - STATSTRPTR(newstat);
+               curptr += nptr->len;
+               ptr++;
+           }
+           nptr++;
+       }
+
+       memcpy( nptr, sptr, sizeof(StatEntry)*( stat->size - (sptr-STATPTR(stat)) ) ); 
+       
+       while(ptr-entry
+           nptr->nentry=POSDATALEN(txt,*ptr);
+           if ( nptr->nentry==0 )
+               nptr->nentry=1; 
+           nptr->ndoc=1;
+           nptr->len=(*ptr)->len;
+           memcpy(curptr, STRPTR(txt) + (*ptr)->pos, nptr->len);
+           nptr->pos = curptr - STATSTRPTR(newstat);
+           curptr += nptr->len;
+           ptr++; nptr++;
+       }
+   }
+
+   return newstat;
+} 
+
+PG_FUNCTION_INFO_V1(ts_accum);
+Datum           ts_accum(PG_FUNCTION_ARGS);
+Datum 
+ts_accum(PG_FUNCTION_ARGS) {
+   tsstat *newstat,*stat= (tsstat*)PG_GETARG_POINTER(0);
+   tsvector  *txt = (tsvector *) PG_DETOAST_DATUM(PG_GETARG_DATUM(1));
+   WordEntry   **newentry=NULL;
+   uint32  len=0, cur=0;
+   StatEntry   *sptr;
+   WordEntry   *wptr;
+
+   if ( stat==NULL || PG_ARGISNULL(0) ) { /* Init in first */ 
+       stat=palloc(STATHDRSIZE);
+       stat->len=STATHDRSIZE;
+       stat->size=0;
+   }
+
+   /* simple check of correctness */
+   if ( txt==NULL || PG_ARGISNULL(1) || txt->size==0 ) {
+       PG_FREE_IF_COPY(txt,1); 
+       PG_RETURN_POINTER(stat);
+   }
+
+   sptr=STATPTR(stat);
+   wptr=ARRPTR(txt);
+
+   if ( stat->size < 100*txt->size ) { /* merge */
+       while( sptr-STATPTR(stat) < stat->size && wptr-ARRPTR(txt) < txt->size ) {
+           int cmp = compareStatWord(sptr,wptr,stat,txt);
+           if ( cmp<0 ) {
+               sptr++;
+           } else if ( cmp==0 ) {
+               int n=POSDATALEN(txt,wptr);
+   
+               if (n==0) n=1;
+               sptr->ndoc++;
+               sptr->nentry +=n ;
+               sptr++; wptr++;
+           } else {
+               if ( cur==len )
+                   newentry=SEI_realloc(newentry, &len);
+               newentry[cur]=wptr;
+               wptr++; cur++;
+           }
+       }
+
+       while( wptr-ARRPTR(txt) < txt->size ) {
+           if ( cur==len )
+               newentry=SEI_realloc(newentry, &len);
+           newentry[cur]=wptr;
+           wptr++; cur++;
+       }
+   } else { /* search */
+       while( wptr-ARRPTR(txt) < txt->size ) {
+           StatEntry *StopLow = STATPTR(stat);
+           StatEntry *StopHigh = (StatEntry*)STATSTRPTR(stat);
+           int cmp;
+
+           while (StopLow < StopHigh) {
+               sptr=StopLow + (StopHigh - StopLow) / 2;
+               cmp =  compareStatWord(sptr,wptr,stat,txt);
+               if (cmp==0) {
+                   int n=POSDATALEN(txt,wptr);
+                   if (n==0) n=1;
+                   sptr->ndoc++;
+                   sptr->nentry +=n ;
+                   break;
+               } else if ( cmp < 0 )
+                   StopLow = sptr + 1;
+               else
+                   StopHigh = sptr; 
+           }
+       
+           if ( StopLow >= StopHigh ) { /* not found */
+               if ( cur==len )
+                   newentry=SEI_realloc(newentry, &len);
+               newentry[cur]=wptr;
+               cur++;
+           }
+           wptr++;
+       }   
+   }
+
+   
+   if ( cur==0 ) { /* no new words */ 
+       PG_FREE_IF_COPY(txt,1);
+       PG_RETURN_POINTER(stat);
+   }
+
+   newstat = formstat(stat, txt, newentry, cur);
+   pfree(newentry);
+   PG_FREE_IF_COPY(txt,1);
+   /* pfree(stat); */
+
+   PG_RETURN_POINTER(newstat);
+}
+
+typedef struct {
+   uint32  cur;
+   tsvector *stat;
+} StatStorage;
+
+static void
+ts_setup_firstcall(FuncCallContext  *funcctx, tsstat *stat) {
+   TupleDesc            tupdesc;
+   MemoryContext     oldcontext;
+   StatStorage     *st;
+   
+   oldcontext = MemoryContextSwitchTo(funcctx->multi_call_memory_ctx);
+   st=palloc( sizeof(StatStorage) );
+   st->cur=0;
+   st->stat=palloc( stat->len );
+   memcpy(st->stat, stat, stat->len);
+   funcctx->user_fctx = (void*)st;
+   tupdesc = RelationNameGetTupleDesc("statinfo");
+   funcctx->slot = TupleDescGetSlot(tupdesc);
+   funcctx->attinmeta = TupleDescGetAttInMetadata(tupdesc);
+   MemoryContextSwitchTo(oldcontext);
+}
+
+
+static Datum
+ts_process_call(FuncCallContext  *funcctx) {
+   StatStorage     *st;
+   st=(StatStorage*)funcctx->user_fctx;
+
+   if ( st->cur < st->stat->size ) {
+       Datum result;
+       char* values[3];
+       char    ndoc[16];
+       char    nentry[16];
+       StatEntry *entry=STATPTR(st->stat) + st->cur;
+       HeapTuple    tuple;
+
+       values[1]=ndoc;
+       sprintf(ndoc,"%d",entry->ndoc);
+       values[2]=nentry;
+       sprintf(nentry,"%d",entry->nentry);
+       values[0]=palloc( entry->len+1 );
+       memcpy( values[0], STATSTRPTR(st->stat)+entry->pos, entry->len);
+       (values[0])[entry->len]='\0';
+
+       tuple = BuildTupleFromCStrings(funcctx->attinmeta, values);
+       result = TupleGetDatum(funcctx->slot, tuple);
+
+       pfree(values[0]);
+       st->cur++;
+       return result;  
+   } else {
+       pfree(st->stat);
+       pfree(st);
+   }
+   
+   return (Datum)0;
+}
+
+PG_FUNCTION_INFO_V1(ts_accum_finish);
+Datum           ts_accum_finish(PG_FUNCTION_ARGS);
+Datum 
+ts_accum_finish(PG_FUNCTION_ARGS) {
+   FuncCallContext  *funcctx;
+   Datum result;
+
+   if (SRF_IS_FIRSTCALL()) {
+       funcctx = SRF_FIRSTCALL_INIT();
+       ts_setup_firstcall(funcctx, (tsstat*)PG_GETARG_POINTER(0) );
+   }
+
+   funcctx = SRF_PERCALL_SETUP();
+   if (  (result=ts_process_call(funcctx)) != (Datum)0 )
+       SRF_RETURN_NEXT(funcctx, result);
+   SRF_RETURN_DONE(funcctx);
+}
+
+static Oid tiOid=InvalidOid;
+static void 
+get_ti_Oid(void) {
+   int ret;
+   bool isnull; 
+
+   if ( (ret = SPI_exec("select oid from pg_type where typname='tsvector'",1)) < 0 )   
+       elog(ERROR, "SPI_exec to get tsvector oid returns %d", ret);
+
+   if ( SPI_processed<0 )
+       elog(ERROR, "There is no tsvector type");
+   tiOid = DatumGetObjectId( SPI_getbinval(SPI_tuptable->vals[0], SPI_tuptable->tupdesc, 1, &isnull) );
+   if ( tiOid==InvalidOid )
+       elog(ERROR, "tsvector type has InvalidOid");
+}
+
+static tsstat*
+ts_stat_sql(text *txt) {
+   char *query=text2char(txt);
+   int i;
+   tsstat *newstat,*stat;
+   bool isnull;
+   Portal portal;
+   void    *plan;
+
+   if ( tiOid==InvalidOid ) 
+       get_ti_Oid();
+
+   if ( (plan = SPI_prepare(query,0,NULL))==NULL )
+       elog(ERROR, "SPI_prepare('%s') returns NULL",query);
+
+   if ( (portal = SPI_cursor_open(NULL, plan, NULL, NULL)) == NULL )
+       elog(ERROR, "SPI_cursor_open('%s') returns NULL",query);
+
+   SPI_cursor_fetch(portal, true, 100);
+
+   if ( SPI_tuptable->tupdesc->natts != 1 )
+       elog(ERROR, "Number of fields doesn't equal to 1");
+
+   if ( SPI_gettypeid(SPI_tuptable->tupdesc, 1) != tiOid )
+       elog(ERROR, "Column isn't of tsvector type");
+
+   stat=palloc(STATHDRSIZE);
+   stat->len=STATHDRSIZE;
+   stat->size=0;
+
+   while(SPI_processed>0) {
+       for(i=0;i
+           Datum data=SPI_getbinval(SPI_tuptable->vals[i], SPI_tuptable->tupdesc, 1, &isnull);
+
+           if ( !isnull ) {
+               newstat = (tsstat*)DatumGetPointer(DirectFunctionCall2(
+                   ts_accum,
+                   PointerGetDatum(stat),
+                   data
+               ));
+               if ( stat!=newstat && stat )
+                   pfree(stat);
+               stat=newstat;
+           }
+       } 
+
+       SPI_freetuptable(SPI_tuptable);
+       SPI_cursor_fetch(portal, true, 100);        
+   }   
+
+   SPI_freetuptable(SPI_tuptable);
+   SPI_cursor_close(portal);
+   SPI_freeplan(plan);
+   pfree(query);
+
+   return stat;    
+}
+
+PG_FUNCTION_INFO_V1(ts_stat);
+Datum           ts_stat(PG_FUNCTION_ARGS);
+Datum 
+ts_stat(PG_FUNCTION_ARGS) {
+   FuncCallContext  *funcctx;
+   Datum result;
+
+   if (SRF_IS_FIRSTCALL()) {
+       tsstat *stat;
+       text    *txt=PG_GETARG_TEXT_P(0);
+   
+       funcctx = SRF_FIRSTCALL_INIT();
+       SPI_connect();
+       stat = ts_stat_sql(txt);
+       PG_FREE_IF_COPY(txt,0); 
+       ts_setup_firstcall(funcctx, stat );
+       SPI_finish();
+   }
+
+   funcctx = SRF_PERCALL_SETUP();
+   if (  (result=ts_process_call(funcctx)) != (Datum)0 )
+       SRF_RETURN_NEXT(funcctx, result);
+   SRF_RETURN_DONE(funcctx);
+}
+
+


diff --git a/contrib/tsearch2/ts_stat.h b/contrib/tsearch2/ts_stat.h

new file mode 100644 (file)

index 0000000..c32b17a


--- /dev/null
+++ b/contrib/tsearch2/ts_stat.h
@@ -0,0 +1,32 @@
+#ifndef __TXTIDX_STAT_H__
+#define __TXTIDX_STAT_H__
+
+#include "postgres.h"
+
+#include "access/gist.h"
+#include "access/itup.h"
+#include "utils/elog.h"
+#include "utils/palloc.h"
+#include "utils/builtins.h"
+#include "storage/bufpage.h"
+
+typedef struct {
+   uint32  len;
+   uint32  pos;
+   uint32  ndoc;   
+   uint32  nentry; 
+}  StatEntry;
+
+typedef struct {
+   int4        len;
+   int4        size;
+   char        data[1];
+}  tsstat;
+
+#define STATHDRSIZE (sizeof(int4)*2)
+#define CALCSTATSIZE(x, lenstr) ( x * sizeof(StatEntry) + STATHDRSIZE + lenstr )
+#define STATPTR(x) ( (StatEntry*) ( (char*)x + STATHDRSIZE ) )
+#define STATSTRPTR(x)  ( (char*)x + STATHDRSIZE + ( sizeof(StatEntry) * ((tsvector*)x)->size ) )
+#define STATSTRSIZE(x) ( ((tsvector*)x)->len - STATHDRSIZE - ( sizeof(StatEntry) * ((tsvector*)x)->size ) )
+
+#endif


diff --git a/contrib/tsearch2/tsearch.sql._in b/contrib/tsearch2/tsearch.sql._in

new file mode 100644 (file)

index 0000000..91ffbc8


--- /dev/null
+++ b/contrib/tsearch2/tsearch.sql._in
@@ -0,0 +1,674 @@
+-- Adjust this setting to control where the objects get CREATEd.
+SET search_path = public;
+
+BEGIN;
+
+--dict conf
+CREATE TABLE pg_ts_dict (
+   dict_name   text not null primary key,
+   dict_init   oid,
+   dict_initoption text,
+   dict_lexize oid not null,
+   dict_comment    text
+) with oids;
+
+--dict interface
+CREATE FUNCTION lexize(oid, text) 
+   returns _text
+   as 'MODULE_PATHNAME'
+   language 'C'
+   with (isstrict);
+
+CREATE FUNCTION lexize(text, text)
+        returns _text
+        as 'MODULE_PATHNAME', 'lexize_byname'
+        language 'C'
+        with (isstrict);
+
+CREATE FUNCTION lexize(text)
+        returns _text
+        as 'MODULE_PATHNAME', 'lexize_bycurrent'
+        language 'C'
+        with (isstrict);
+
+CREATE FUNCTION set_curdict(int)
+   returns void
+   as 'MODULE_PATHNAME'
+   language 'C'
+   with (isstrict);
+
+CREATE FUNCTION set_curdict(text)
+   returns void
+   as 'MODULE_PATHNAME', 'set_curdict_byname'
+   language 'C'
+   with (isstrict);
+
+--built-in dictionaries
+CREATE FUNCTION dex_init(text)
+   returns internal
+   as 'MODULE_PATHNAME' 
+   language 'C';
+
+CREATE FUNCTION dex_lexize(internal,internal,int4)
+   returns internal
+   as 'MODULE_PATHNAME'
+   language 'C'
+   with (isstrict);
+
+insert into pg_ts_dict select 
+   'simple', 
+   (select oid from pg_proc where proname='dex_init'),
+   null,
+   (select oid from pg_proc where proname='dex_lexize'),
+   'Simple example of dictionary.'
+;
+    
+CREATE FUNCTION snb_en_init(text)
+   returns internal
+   as 'MODULE_PATHNAME' 
+   language 'C';
+
+CREATE FUNCTION snb_lexize(internal,internal,int4)
+   returns internal
+   as 'MODULE_PATHNAME'
+   language 'C'
+   with (isstrict);
+
+insert into pg_ts_dict select 
+   'en_stem', 
+   (select oid from pg_proc where proname='snb_en_init'),
+   'DATA_PATH/english.stop',
+   (select oid from pg_proc where proname='snb_lexize'),
+   'English Stemmer. Snowball.'
+;
+
+CREATE FUNCTION snb_ru_init(text)
+   returns internal
+   as 'MODULE_PATHNAME' 
+   language 'C';
+
+insert into pg_ts_dict select 
+   'ru_stem', 
+   (select oid from pg_proc where proname='snb_ru_init'),
+   'DATA_PATH/russian.stop',
+   (select oid from pg_proc where proname='snb_lexize'),
+   'Russian Stemmer. Snowball.'
+;
+    
+CREATE FUNCTION spell_init(text)
+   returns internal
+   as 'MODULE_PATHNAME' 
+   language 'C';
+
+CREATE FUNCTION spell_lexize(internal,internal,int4)
+   returns internal
+   as 'MODULE_PATHNAME'
+   language 'C'
+   with (isstrict);
+
+insert into pg_ts_dict select 
+   'ispell_template', 
+   (select oid from pg_proc where proname='spell_init'),
+   null,
+   (select oid from pg_proc where proname='spell_lexize'),
+   'ISpell interface. Must have .dict and .aff files'
+;
+
+CREATE FUNCTION syn_init(text)
+   returns internal
+   as 'MODULE_PATHNAME' 
+   language 'C';
+
+CREATE FUNCTION syn_lexize(internal,internal,int4)
+   returns internal
+   as 'MODULE_PATHNAME'
+   language 'C'
+   with (isstrict);
+
+insert into pg_ts_dict select 
+   'synonym', 
+   (select oid from pg_proc where proname='syn_init'),
+   null,
+   (select oid from pg_proc where proname='syn_lexize'),
+   'Example of synonym dictionary'
+;
+
+--dict conf
+CREATE TABLE pg_ts_parser (
+   prs_name    text not null primary key,
+   prs_start   oid not null,
+   prs_nexttoken   oid not null,
+   prs_end     oid not null,
+   prs_headline    oid not null,
+   prs_lextype oid not null,
+   prs_comment text
+) with oids;
+
+--sql-level interface
+CREATE TYPE tokentype 
+   as (tokid int4, alias text, descr text); 
+
+CREATE FUNCTION token_type(int4)
+   returns setof tokentype
+   as 'MODULE_PATHNAME'
+   language 'C'
+   with (isstrict);
+
+CREATE FUNCTION token_type(text)
+   returns setof tokentype
+   as 'MODULE_PATHNAME', 'token_type_byname'
+   language 'C'
+   with (isstrict);
+
+CREATE FUNCTION token_type()
+   returns setof tokentype
+   as 'MODULE_PATHNAME', 'token_type_current'
+   language 'C'
+   with (isstrict);
+
+CREATE FUNCTION set_curprs(int)
+   returns void
+   as 'MODULE_PATHNAME'
+   language 'C'
+   with (isstrict);
+
+CREATE FUNCTION set_curprs(text)
+   returns void
+   as 'MODULE_PATHNAME', 'set_curprs_byname'
+   language 'C'
+   with (isstrict);
+
+CREATE TYPE tokenout 
+   as (tokid int4, token text);
+
+CREATE FUNCTION parse(oid,text)
+   returns setof tokenout
+   as 'MODULE_PATHNAME'
+   language 'C'
+   with (isstrict);
+ 
+CREATE FUNCTION parse(text,text)
+   returns setof tokenout
+   as 'MODULE_PATHNAME', 'parse_byname'
+   language 'C'
+   with (isstrict);
+ 
+CREATE FUNCTION parse(text)
+   returns setof tokenout
+   as 'MODULE_PATHNAME', 'parse_current'
+   language 'C'
+   with (isstrict);
+ 
+--default parser
+CREATE FUNCTION prsd_start(internal,int4)
+   returns internal
+   as 'MODULE_PATHNAME'
+   language 'C';
+
+CREATE FUNCTION prsd_getlexeme(internal,internal,internal)
+   returns int4
+   as 'MODULE_PATHNAME'
+   language 'C';
+
+CREATE FUNCTION prsd_end(internal)
+   returns void
+   as 'MODULE_PATHNAME'
+   language 'C';
+
+CREATE FUNCTION prsd_lextype(internal)
+   returns internal
+   as 'MODULE_PATHNAME'
+   language 'C';
+
+CREATE FUNCTION prsd_headline(internal,internal,internal)
+   returns internal
+   as 'MODULE_PATHNAME'
+   language 'C';
+
+insert into pg_ts_parser select
+   'default',
+   (select oid from pg_proc where proname='prsd_start'),   
+   (select oid from pg_proc where proname='prsd_getlexeme'),   
+   (select oid from pg_proc where proname='prsd_end'), 
+   (select oid from pg_proc where proname='prsd_headline'),
+   (select oid from pg_proc where proname='prsd_lextype'),
+   'Parser from OpenFTS v0.34'
+;  
+
+--tsearch config
+
+CREATE TABLE pg_ts_cfg (
+   ts_name     text not null primary key,
+   prs_name    text not null,
+   locale      text
+) with oids;
+
+CREATE TABLE pg_ts_cfgmap (
+   ts_name     text not null,
+   tok_alias   text not null,
+   dict_name   text[],
+   primary key (ts_name,tok_alias)
+) with oids;
+
+CREATE FUNCTION set_curcfg(int)
+   returns void
+   as 'MODULE_PATHNAME'
+   language 'C'
+   with (isstrict);
+
+CREATE FUNCTION set_curcfg(text)
+   returns void
+   as 'MODULE_PATHNAME', 'set_curcfg_byname'
+   language 'C'
+   with (isstrict);
+
+CREATE FUNCTION show_curcfg()
+   returns oid
+   as 'MODULE_PATHNAME'
+   language 'C'
+   with (isstrict);
+
+insert into pg_ts_cfg values ('default', 'default','C');
+insert into pg_ts_cfg values ('default_russian', 'default','ru_RU.KOI8-R');
+insert into pg_ts_cfg values ('simple', 'default');
+
+copy pg_ts_cfgmap from stdin;
+default    lword   {en_stem}
+default    nlword  {simple}
+default    word    {simple}
+default    email   {simple}
+default    url {simple}
+default    host    {simple}
+default    sfloat  {simple}
+default    version {simple}
+default    part_hword  {simple}
+default    nlpart_hword    {simple}
+default    lpart_hword {en_stem}
+default    hword   {simple}
+default    lhword  {en_stem}
+default    nlhword {simple}
+default    uri {simple}
+default    file    {simple}
+default    float   {simple}
+default    int {simple}
+default    uint    {simple}
+default_russian    lword   {en_stem}
+default_russian    nlword  {ru_stem}
+default_russian    word    {ru_stem}
+default_russian    email   {simple}
+default_russian    url {simple}
+default_russian    host    {simple}
+default_russian    sfloat  {simple}
+default_russian    version {simple}
+default_russian    part_hword  {simple}
+default_russian    nlpart_hword    {ru_stem}
+default_russian    lpart_hword {en_stem}
+default_russian    hword   {ru_stem}
+default_russian    lhword  {en_stem}
+default_russian    nlhword {ru_stem}
+default_russian    uri {simple}
+default_russian    file    {simple}
+default_russian    float   {simple}
+default_russian    int {simple}
+default_russian    uint    {simple}
+simple lword   {simple}
+simple nlword  {simple}
+simple word    {simple}
+simple email   {simple}
+simple url {simple}
+simple host    {simple}
+simple sfloat  {simple}
+simple version {simple}
+simple part_hword  {simple}
+simple nlpart_hword    {simple}
+simple lpart_hword {simple}
+simple hword   {simple}
+simple lhword  {simple}
+simple nlhword {simple}
+simple uri {simple}
+simple file    {simple}
+simple float   {simple}
+simple int {simple}
+simple uint    {simple}
+\.
+
+--tsvector type
+CREATE FUNCTION tsvector_in(cstring)
+RETURNS tsvector
+AS 'MODULE_PATHNAME'
+LANGUAGE 'C' with (isstrict);
+
+CREATE FUNCTION tsvector_out(tsvector)
+RETURNS cstring
+AS 'MODULE_PATHNAME'
+LANGUAGE 'C' with (isstrict);
+
+CREATE TYPE tsvector (
+        INTERNALLENGTH = -1,
+        INPUT = tsvector_in,
+        OUTPUT = tsvector_out,
+        STORAGE = extended
+);
+
+CREATE FUNCTION length(tsvector)
+RETURNS int4
+AS 'MODULE_PATHNAME', 'tsvector_length'
+LANGUAGE 'C' with (isstrict,iscachable);
+
+CREATE FUNCTION to_tsvector(oid, text)
+RETURNS tsvector
+AS 'MODULE_PATHNAME'
+LANGUAGE 'C' with (isstrict,iscachable);
+
+CREATE FUNCTION to_tsvector(text, text)
+RETURNS tsvector
+AS 'MODULE_PATHNAME', 'to_tsvector_name'
+LANGUAGE 'C' with (isstrict,iscachable);
+
+CREATE FUNCTION to_tsvector(text)
+RETURNS tsvector
+AS 'MODULE_PATHNAME', 'to_tsvector_current'
+LANGUAGE 'C' with (isstrict,iscachable);
+
+CREATE FUNCTION strip(tsvector)
+RETURNS tsvector
+AS 'MODULE_PATHNAME'
+LANGUAGE 'C' with (isstrict,iscachable);
+
+CREATE FUNCTION setweight(tsvector,"char")
+RETURNS tsvector
+AS 'MODULE_PATHNAME'
+LANGUAGE 'C' with (isstrict,iscachable);
+
+CREATE FUNCTION concat(tsvector,tsvector)
+RETURNS tsvector
+AS 'MODULE_PATHNAME'
+LANGUAGE 'C' with (isstrict,iscachable);
+
+CREATE OPERATOR || (
+        LEFTARG = tsvector,
+        RIGHTARG = tsvector,
+        PROCEDURE = concat
+);
+
+--query type
+CREATE FUNCTION tsquery_in(cstring)
+RETURNS tsquery
+AS 'MODULE_PATHNAME'
+LANGUAGE 'C' with (isstrict);
+
+CREATE FUNCTION tsquery_out(tsquery)
+RETURNS cstring
+AS 'MODULE_PATHNAME'
+LANGUAGE 'C' with (isstrict);
+
+CREATE TYPE tsquery (
+        INTERNALLENGTH = -1,
+        INPUT = tsquery_in,
+        OUTPUT = tsquery_out
+);
+
+CREATE FUNCTION querytree(tsquery)
+RETURNS text
+AS 'MODULE_PATHNAME', 'tsquerytree'
+LANGUAGE 'C' with (isstrict);
+
+CREATE FUNCTION to_tsquery(oid, text)
+RETURNS tsquery
+AS 'MODULE_PATHNAME'
+LANGUAGE 'c' with (isstrict,iscachable);
+
+CREATE FUNCTION to_tsquery(text, text)
+RETURNS tsquery
+AS 'MODULE_PATHNAME','to_tsquery_name'
+LANGUAGE 'c' with (isstrict,iscachable);
+
+CREATE FUNCTION to_tsquery(text)
+RETURNS tsquery
+AS 'MODULE_PATHNAME','to_tsquery_current'
+LANGUAGE 'c' with (isstrict,iscachable);
+
+--operations
+CREATE FUNCTION exectsq(tsvector, tsquery)
+RETURNS bool
+AS 'MODULE_PATHNAME'
+LANGUAGE 'C' with (isstrict, iscachable);
+  
+COMMENT ON FUNCTION exectsq(tsvector, tsquery) IS 'boolean operation with text index';
+
+CREATE FUNCTION rexectsq(tsquery, tsvector)
+RETURNS bool
+AS 'MODULE_PATHNAME'
+LANGUAGE 'C' with (isstrict, iscachable);
+
+COMMENT ON FUNCTION rexectsq(tsquery, tsvector) IS 'boolean operation with text index';
+
+CREATE OPERATOR @@ (
+        LEFTARG = tsvector,
+        RIGHTARG = tsquery,
+        PROCEDURE = exectsq,
+        COMMUTATOR = '@@',
+        RESTRICT = contsel,
+        JOIN = contjoinsel
+);
+CREATE OPERATOR @@ (
+        LEFTARG = tsquery,
+        RIGHTARG = tsvector,
+        PROCEDURE = rexectsq,
+        COMMUTATOR = '@@',
+        RESTRICT = contsel,
+        JOIN = contjoinsel
+);
+
+--Trigger
+CREATE FUNCTION tsearch2()
+RETURNS trigger
+AS 'MODULE_PATHNAME'
+LANGUAGE 'C';
+
+--Relevation
+CREATE FUNCTION rank(float4[], tsvector, tsquery)
+RETURNS float4
+AS 'MODULE_PATHNAME'
+LANGUAGE 'C' WITH (isstrict, iscachable);
+
+CREATE FUNCTION rank(float4[], tsvector, tsquery, int4)
+RETURNS float4
+AS 'MODULE_PATHNAME'
+LANGUAGE 'C' WITH (isstrict, iscachable);
+
+CREATE FUNCTION rank(tsvector, tsquery)
+RETURNS float4
+AS 'MODULE_PATHNAME', 'rank_def'
+LANGUAGE 'C' WITH (isstrict, iscachable);
+
+CREATE FUNCTION rank(tsvector, tsquery, int4)
+RETURNS float4
+AS 'MODULE_PATHNAME', 'rank_def'
+LANGUAGE 'C' WITH (isstrict, iscachable);
+
+CREATE FUNCTION rank_cd(int4, tsvector, tsquery)
+RETURNS float4
+AS 'MODULE_PATHNAME'
+LANGUAGE 'C' WITH (isstrict, iscachable);
+
+CREATE FUNCTION rank_cd(int4, tsvector, tsquery, int4)
+RETURNS float4
+AS 'MODULE_PATHNAME'
+LANGUAGE 'C' WITH (isstrict, iscachable);
+
+CREATE FUNCTION rank_cd(tsvector, tsquery)
+RETURNS float4
+AS 'MODULE_PATHNAME', 'rank_cd_def'
+LANGUAGE 'C' WITH (isstrict, iscachable);
+
+CREATE FUNCTION rank_cd(tsvector, tsquery, int4)
+RETURNS float4
+AS 'MODULE_PATHNAME', 'rank_cd_def'
+LANGUAGE 'C' WITH (isstrict, iscachable);
+
+CREATE FUNCTION headline(oid, text, tsquery, text)
+RETURNS text
+AS 'MODULE_PATHNAME', 'headline'
+LANGUAGE 'C' WITH (isstrict, iscachable);
+
+CREATE FUNCTION headline(oid, text, tsquery)
+RETURNS text
+AS 'MODULE_PATHNAME', 'headline'
+LANGUAGE 'C' WITH (isstrict, iscachable);
+
+CREATE FUNCTION headline(text, text, tsquery, text)
+RETURNS text
+AS 'MODULE_PATHNAME', 'headline_byname'
+LANGUAGE 'C' WITH (isstrict, iscachable);
+
+CREATE FUNCTION headline(text, text, tsquery)
+RETURNS text
+AS 'MODULE_PATHNAME', 'headline_byname'
+LANGUAGE 'C' WITH (isstrict, iscachable);
+
+CREATE FUNCTION headline(text, tsquery, text)
+RETURNS text
+AS 'MODULE_PATHNAME', 'headline_current'
+LANGUAGE 'C' WITH (isstrict, iscachable);
+
+CREATE FUNCTION headline(text, tsquery)
+RETURNS text
+AS 'MODULE_PATHNAME', 'headline_current'
+LANGUAGE 'C' WITH (isstrict, iscachable);
+
+--GiST
+--GiST key type 
+CREATE FUNCTION gtsvector_in(cstring)
+RETURNS gtsvector
+AS 'MODULE_PATHNAME'
+LANGUAGE 'C' with (isstrict);
+
+CREATE FUNCTION gtsvector_out(gtsvector)
+RETURNS cstring
+AS 'MODULE_PATHNAME'
+LANGUAGE 'C' with (isstrict);
+
+CREATE TYPE gtsvector (
+        INTERNALLENGTH = -1,
+        INPUT = gtsvector_in,
+        OUTPUT = gtsvector_out
+);
+
+-- support FUNCTIONs
+CREATE FUNCTION gtsvector_consistent(gtsvector,internal,int4)
+RETURNS bool
+AS 'MODULE_PATHNAME'
+LANGUAGE 'C';
+  
+CREATE FUNCTION gtsvector_compress(internal)
+RETURNS internal
+AS 'MODULE_PATHNAME'
+LANGUAGE 'C';
+
+CREATE FUNCTION gtsvector_decompress(internal)
+RETURNS internal
+AS 'MODULE_PATHNAME'
+LANGUAGE 'C';
+
+CREATE FUNCTION gtsvector_penalty(internal,internal,internal)
+RETURNS internal
+AS 'MODULE_PATHNAME'
+LANGUAGE 'C' with (isstrict);
+
+CREATE FUNCTION gtsvector_picksplit(internal, internal)
+RETURNS internal
+AS 'MODULE_PATHNAME'
+LANGUAGE 'C';
+
+CREATE FUNCTION gtsvector_union(bytea, internal)
+RETURNS _int4
+AS 'MODULE_PATHNAME'
+LANGUAGE 'C';
+
+CREATE FUNCTION gtsvector_same(gtsvector, gtsvector, internal)
+RETURNS internal
+AS 'MODULE_PATHNAME'
+LANGUAGE 'C';
+
+-- CREATE the OPERATOR class
+CREATE OPERATOR CLASS gist_tsvector_ops
+DEFAULT FOR TYPE tsvector USING gist
+AS
+        OPERATOR        1       @@ (tsvector, tsquery)  RECHECK ,
+        FUNCTION        1       gtsvector_consistent (gtsvector, internal, int4),
+        FUNCTION        2       gtsvector_union (bytea, internal),
+        FUNCTION        3       gtsvector_compress (internal),
+        FUNCTION        4       gtsvector_decompress (internal),
+        FUNCTION        5       gtsvector_penalty (internal, internal, internal),
+        FUNCTION        6       gtsvector_picksplit (internal, internal),
+        FUNCTION        7       gtsvector_same (gtsvector, gtsvector, internal),
+        STORAGE         gtsvector;
+
+
+--stat info
+CREATE TYPE statinfo 
+   as (word text, ndoc int4, nentry int4);
+
+--REATE FUNCTION tsstat_in(cstring)
+--RETURNS tsstat
+--AS 'MODULE_PATHNAME'
+--LANGUAGE 'C' with (isstrict);
+--
+--CREATE FUNCTION tsstat_out(tsstat)
+--RETURNS cstring
+--AS 'MODULE_PATHNAME'
+--LANGUAGE 'C' with (isstrict);
+--
+--CREATE TYPE tsstat (
+--        INTERNALLENGTH = -1,
+--        INPUT = tsstat_in,
+--        OUTPUT = tsstat_out,
+--        STORAGE = plain
+--);
+--
+--CREATE FUNCTION ts_accum(tsstat,tsvector)
+--RETURNS tsstat
+--AS 'MODULE_PATHNAME'
+--LANGUAGE 'C' with (isstrict);
+--
+--CREATE FUNCTION ts_accum_finish(tsstat)
+-- returns setof statinfo
+-- as 'MODULE_PATHNAME'
+-- language 'C'
+-- with (isstrict);
+--
+--CREATE AGGREGATE stat (
+-- BASETYPE=tsvector,
+-- SFUNC=ts_accum,
+-- STYPE=tsstat,
+-- FINALFUNC = ts_accum_finish,
+-- initcond = ''
+--); 
+
+CREATE FUNCTION stat(text)
+   returns setof statinfo
+   as 'MODULE_PATHNAME', 'ts_stat'
+   language 'C'
+   with (isstrict);
+
+--reset - just for debuging
+CREATE FUNCTION reset_tsearch()
+        returns void
+        as 'MODULE_PATHNAME'
+        language 'C'
+        with (isstrict);
+
+--get cover (debug for rank_cd)
+CREATE FUNCTION get_covers(tsvector,tsquery)
+        returns text
+        as 'MODULE_PATHNAME'
+        language 'C'
+        with (isstrict);
+
+
+--example of ISpell dictionary
+--update pg_ts_dict set dict_initoption='DictFile="/usr/local/share/ispell/russian.dict" ,AffFile ="/usr/local/share/ispell/russian.aff", StopFile="/usr/local/share/ispell/russian.stop"' where dict_id=4;
+--example of synonym dict
+--update pg_ts_dict set dict_initoption='/usr/local/share/ispell/english.syn' where dict_id=5;
+END;


diff --git a/contrib/tsearch2/tsvector.c b/contrib/tsearch2/tsvector.c

new file mode 100644 (file)

index 0000000..ff0794d


--- /dev/null
+++ b/contrib/tsearch2/tsvector.c
@@ -0,0 +1,804 @@
+/*
+ * In/Out definitions for tsvector type
+ * Internal structure:
+ * string of values, array of position lexem in string and it's length
+ * Teodor Sigaev 
+ */
+#include "postgres.h"
+
+#include "access/gist.h"
+#include "access/itup.h"
+#include "utils/elog.h"
+#include "utils/palloc.h"
+#include "utils/builtins.h"
+#include "storage/bufpage.h"
+#include "executor/spi.h"
+#include "commands/trigger.h"
+#include "nodes/pg_list.h"
+#include "catalog/namespace.h"
+
+#include "utils/pg_locale.h"
+
+#include              /* tolower */
+#include "tsvector.h"
+#include "query.h"
+#include "ts_cfg.h"
+#include "common.h"
+
+PG_FUNCTION_INFO_V1(tsvector_in);
+Datum      tsvector_in(PG_FUNCTION_ARGS);
+
+PG_FUNCTION_INFO_V1(tsvector_out);
+Datum      tsvector_out(PG_FUNCTION_ARGS);
+
+PG_FUNCTION_INFO_V1(to_tsvector);
+Datum      to_tsvector(PG_FUNCTION_ARGS);
+PG_FUNCTION_INFO_V1(to_tsvector_current);
+Datum      to_tsvector_current(PG_FUNCTION_ARGS);
+PG_FUNCTION_INFO_V1(to_tsvector_name);
+Datum      to_tsvector_name(PG_FUNCTION_ARGS);
+
+PG_FUNCTION_INFO_V1(tsearch2);
+Datum      tsearch2(PG_FUNCTION_ARGS);
+
+PG_FUNCTION_INFO_V1(tsvector_length);
+Datum      tsvector_length(PG_FUNCTION_ARGS);
+
+/*
+ * in/out text index type
+ */
+static int 
+comparePos(const void *a, const void *b) {
+   if ( ((WordEntryPos *) a)->pos == ((WordEntryPos *) b)->pos )
+       return 1;
+   return ( ((WordEntryPos *) a)->pos > ((WordEntryPos *) b)->pos ) ? 1 : -1;
+}
+
+static int
+uniquePos(WordEntryPos *a, int4 l) {
+   WordEntryPos *ptr, *res;
+
+   res=a;
+   if (l==1)
+       return l;
+
+   qsort((void *) a, l, sizeof(WordEntryPos), comparePos);
+
+   ptr = a + 1;
+   while (ptr - a < l) {
+       if ( ptr->pos != res->pos ) {
+           res++;
+           res->pos = ptr->pos;
+           res->weight = ptr->weight;
+           if ( res-a >= MAXNUMPOS-1 || res->pos == MAXENTRYPOS-1 )
+               break;
+       } else if ( ptr->weight > res->weight )
+           res->weight = ptr->weight;
+       ptr++;
+   }
+   return res + 1 - a;
+}
+
+static char *BufferStr;
+static int
+compareentry(const void *a, const void *b)
+{
+   if ( ((WordEntryIN *) a)->entry.len == ((WordEntryIN *) b)->entry.len)
+   {
+       return strncmp(
+                      &BufferStr[((WordEntryIN *) a)->entry.pos],
+                      &BufferStr[((WordEntryIN *) b)->entry.pos],
+                      ((WordEntryIN *) a)->entry.len);
+   }
+   return ( ((WordEntryIN *) a)->entry.len > ((WordEntryIN *) b)->entry.len ) ? 1 : -1;
+}
+
+static int
+uniqueentry(WordEntryIN * a, int4 l, char *buf, int4 *outbuflen)
+{
+   WordEntryIN  *ptr,
+              *res;
+
+   res = a;
+   if (l == 1) {
+       if ( a->entry.haspos ) {
+           *(uint16*)(a->pos) = uniquePos( &(a->pos[1]), *(uint16*)(a->pos));
+           *outbuflen = SHORTALIGN(res->entry.len) + (*(uint16*)(a->pos) +1 )*sizeof(WordEntryPos);
+       }
+       return l;
+   }
+
+   ptr = a + 1;
+   BufferStr = buf;
+   qsort((void *) a, l, sizeof(WordEntryIN), compareentry);
+
+   while (ptr - a < l)
+   {
+       if (!(ptr->entry.len == res->entry.len &&
+             strncmp(&buf[ptr->entry.pos], &buf[res->entry.pos], res->entry.len) == 0))
+       {
+           if ( res->entry.haspos ) {
+               *(uint16*)(res->pos) = uniquePos( &(res->pos[1]), *(uint16*)(res->pos));
+               *outbuflen += *(uint16*)(res->pos) * sizeof(WordEntryPos);
+           }
+           *outbuflen += SHORTALIGN(res->entry.len);
+           res++;
+           memcpy(res,ptr,sizeof(WordEntryIN));
+       } else if ( ptr->entry.haspos ){
+           if ( res->entry.haspos ) {
+               int4 len=*(uint16*)(ptr->pos) + 1 + *(uint16*)(res->pos);
+               res->pos=(WordEntryPos*)repalloc( res->pos, len*sizeof(WordEntryPos));
+               memcpy( &(res->pos[ *(uint16*)(res->pos) + 1 ]), 
+                   &(ptr->pos[1]), *(uint16*)(ptr->pos) * sizeof(WordEntryPos));
+               *(uint16*)(res->pos) += *(uint16*)(ptr->pos);
+               pfree( ptr->pos );
+           } else {
+               res->entry.haspos=1;
+               res->pos = ptr->pos;
+           }
+       }
+       ptr++;
+   }
+   if ( res->entry.haspos ) {
+       *(uint16*)(res->pos) = uniquePos( &(res->pos[1]), *(uint16*)(res->pos));
+       *outbuflen += *(uint16*)(res->pos) * sizeof(WordEntryPos);
+   }
+   *outbuflen += SHORTALIGN(res->entry.len);
+
+   return res + 1 - a;
+}
+
+#define WAITWORD   1
+#define WAITENDWORD 2
+#define WAITNEXTCHAR   3
+#define WAITENDCMPLX   4
+#define WAITPOSINFO    5
+#define INPOSINFO  6
+#define WAITPOSDELIM   7
+
+#define RESIZEPRSBUF \
+do { \
+   if ( state->curpos - state->word + 1 >= state->len ) \
+   { \
+       int4 clen = state->curpos - state->word; \
+       state->len *= 2; \
+       state->word = (char*)repalloc( (void*)state->word, state->len ); \
+       state->curpos = state->word + clen; \
+   } \
+} while (0)
+
+int4
+gettoken_tsvector(TI_IN_STATE * state)
+{
+   int4        oldstate = 0;
+
+   state->curpos = state->word;
+   state->state = WAITWORD;
+   state->alen=0;
+
+   while (1)
+   {
+       if (state->state == WAITWORD)
+       {
+           if (*(state->prsbuf) == '\0')
+               return 0;
+           else if (*(state->prsbuf) == '\'')
+               state->state = WAITENDCMPLX;
+           else if (*(state->prsbuf) == '\\')
+           {
+               state->state = WAITNEXTCHAR;
+               oldstate = WAITENDWORD;
+           }
+           else if (state->oprisdelim && ISOPERATOR(*(state->prsbuf)))
+               elog(ERROR, "Syntax error");
+           else if (*(state->prsbuf) != ' ')
+           {
+               *(state->curpos) = *(state->prsbuf);
+               state->curpos++;
+               state->state = WAITENDWORD;
+           }
+       }
+       else if (state->state == WAITNEXTCHAR)
+       {
+           if (*(state->prsbuf) == '\0')
+               elog(ERROR, "There is no escaped character");
+           else
+           {
+               RESIZEPRSBUF;
+               *(state->curpos) = *(state->prsbuf);
+               state->curpos++;
+               state->state = oldstate;
+           }
+       }
+       else if (state->state == WAITENDWORD)
+       {
+           if (*(state->prsbuf) == '\\')
+           {
+               state->state = WAITNEXTCHAR;
+               oldstate = WAITENDWORD;
+           }
+           else if (*(state->prsbuf) == ' ' || *(state->prsbuf) == '\0' ||
+                    (state->oprisdelim && ISOPERATOR(*(state->prsbuf))))
+           {
+               RESIZEPRSBUF;
+               if (state->curpos == state->word)
+                   elog(ERROR, "Syntax error");
+               *(state->curpos) = '\0';
+               return 1; 
+           } else if ( *(state->prsbuf) == ':' ) {
+               if (state->curpos == state->word)
+                   elog(ERROR, "Syntax error");
+               *(state->curpos) = '\0';
+               if ( state->oprisdelim )
+                   return 1;
+               else
+                   state->state = INPOSINFO;
+           }
+           else
+           {
+               RESIZEPRSBUF;
+               *(state->curpos) = *(state->prsbuf);
+               state->curpos++;
+           }
+       }
+       else if (state->state == WAITENDCMPLX)
+       {
+           if (*(state->prsbuf) == '\'')
+           {
+               RESIZEPRSBUF;
+               *(state->curpos) = '\0';
+               if (state->curpos == state->word)
+                   elog(ERROR, "Syntax error");
+               if ( state->oprisdelim ) {
+                   state->prsbuf++;
+                   return 1;
+               } else
+                   state->state = WAITPOSINFO;
+           }
+           else if (*(state->prsbuf) == '\\')
+           {
+               state->state = WAITNEXTCHAR;
+               oldstate = WAITENDCMPLX;
+           }
+           else if (*(state->prsbuf) == '\0')
+               elog(ERROR, "Syntax error");
+           else
+           {
+               RESIZEPRSBUF;
+               *(state->curpos) = *(state->prsbuf);
+               state->curpos++;
+           }
+       } else if (state->state == WAITPOSINFO) {
+           if ( *(state->prsbuf) == ':' )
+               state->state=INPOSINFO;
+           else
+               return 1;
+       } else if (state->state == INPOSINFO) {
+           if ( isdigit(*(state->prsbuf)) ) {
+               if ( state->alen==0 ) {
+                   state->alen=4;
+                   state->pos = (WordEntryPos*)palloc( sizeof(WordEntryPos)*state->alen );
+                   *(uint16*)(state->pos)=0;
+               } else if ( *(uint16*)(state->pos) +1 >= state->alen ) {
+                   state->alen *= 2; 
+                   state->pos = (WordEntryPos*)repalloc( state->pos, sizeof(WordEntryPos)*state->alen );
+               }
+               (  *(uint16*)(state->pos) )++;
+               state->pos[ *(uint16*)(state->pos) ].pos = LIMITPOS(atoi(state->prsbuf));
+               if ( state->pos[ *(uint16*)(state->pos) ].pos == 0 )
+                   elog(ERROR,"Wrong position info");
+               state->pos[ *(uint16*)(state->pos) ].weight = 0;
+               state->state = WAITPOSDELIM;
+           } else
+               elog(ERROR,"Syntax error");
+       } else if (state->state == WAITPOSDELIM) {
+           if ( *(state->prsbuf) == ',' ) {
+               state->state = INPOSINFO;
+           } else if ( tolower(*(state->prsbuf)) == 'a' || *(state->prsbuf)=='*' ) {
+               if ( state->pos[ *(uint16*)(state->pos) ].weight )
+                   elog(ERROR,"Syntax error");
+               state->pos[ *(uint16*)(state->pos) ].weight = 3;
+           } else if ( tolower(*(state->prsbuf)) == 'b' ) {
+               if ( state->pos[ *(uint16*)(state->pos) ].weight )
+                   elog(ERROR,"Syntax error");
+               state->pos[ *(uint16*)(state->pos) ].weight = 2;
+           } else if ( tolower(*(state->prsbuf)) == 'c' ) {
+               if ( state->pos[ *(uint16*)(state->pos) ].weight )
+                   elog(ERROR,"Syntax error");
+               state->pos[ *(uint16*)(state->pos) ].weight = 1;
+           } else if ( tolower(*(state->prsbuf)) == 'd' ) {
+               if ( state->pos[ *(uint16*)(state->pos) ].weight )
+                   elog(ERROR,"Syntax error");
+               state->pos[ *(uint16*)(state->pos) ].weight = 0;
+           } else if ( isspace(*(state->prsbuf)) || *(state->prsbuf) == '\0' ) {
+               return 1;
+           } else if ( !isdigit(*(state->prsbuf)) )
+               elog(ERROR,"Syntax error");
+       } else
+           elog(ERROR, "Inner bug :(");
+       state->prsbuf++;
+   }
+
+   return 0;
+}
+
+Datum
+tsvector_in(PG_FUNCTION_ARGS)
+{
+   char       *buf = PG_GETARG_CSTRING(0);
+   TI_IN_STATE state;
+   WordEntryIN  *arr;
+   WordEntry  *inarr;
+   int4        len = 0,
+               totallen = 64;
+   tsvector       *in;
+   char       *tmpbuf,
+              *cur;
+   int4        i,
+               buflen = 256;
+
+   state.prsbuf = buf;
+   state.len = 32;
+   state.word = (char *) palloc(state.len);
+   state.oprisdelim = false;
+
+   arr = (WordEntryIN *) palloc(sizeof(WordEntryIN) * totallen);
+   cur = tmpbuf = (char *) palloc(buflen);
+   while (gettoken_tsvector(&state))
+   {
+       if (len >= totallen)
+       {
+           totallen *= 2;
+           arr = (WordEntryIN *) repalloc((void *) arr, sizeof(WordEntryIN) * totallen);
+       }
+       while ((cur - tmpbuf) + (state.curpos - state.word) >= buflen)
+       {
+           int4        dist = cur - tmpbuf;
+
+           buflen *= 2;
+           tmpbuf = (char *) repalloc((void *) tmpbuf, buflen);
+           cur = tmpbuf + dist;
+       }
+       if (state.curpos - state.word >= MAXSTRLEN)
+           elog(ERROR, "Word is too long");
+       arr[len].entry.len= state.curpos - state.word;
+       if (cur - tmpbuf > MAXSTRPOS)
+           elog(ERROR, "Too long value");
+       arr[len].entry.pos=cur - tmpbuf;
+       memcpy((void *) cur, (void *) state.word, arr[len].entry.len);
+       cur += arr[len].entry.len;
+       if ( state.alen ) {
+           arr[len].entry.haspos=1;
+           arr[len].pos = state.pos;
+       } else
+           arr[len].entry.haspos=0;
+       len++;
+   }
+   pfree(state.word);
+
+   if ( len > 0 )
+       len = uniqueentry(arr, len, tmpbuf, &buflen);
+   totallen = CALCDATASIZE(len, buflen);
+   in = (tsvector *) palloc(totallen);
+   memset(in,0,totallen);
+   in->len = totallen;
+   in->size = len;
+   cur = STRPTR(in);
+   inarr = ARRPTR(in);
+   for (i = 0; i < len; i++)
+   {
+       memcpy((void *) cur, (void *) &tmpbuf[arr[i].entry.pos], arr[i].entry.len);
+       arr[i].entry.pos=cur - STRPTR(in);
+       cur += SHORTALIGN(arr[i].entry.len);
+       if ( arr[i].entry.haspos ) {
+           memcpy( cur, arr[i].pos, (*(uint16*)arr[i].pos + 1) * sizeof(WordEntryPos));
+           cur +=  (*(uint16*)arr[i].pos + 1) * sizeof(WordEntryPos);
+           pfree( arr[i].pos ); 
+       }
+       memcpy( &(inarr[i]), &(arr[i].entry), sizeof(WordEntry) );
+   }
+   pfree(tmpbuf);
+   pfree(arr);
+   PG_RETURN_POINTER(in);
+}
+
+Datum
+tsvector_length(PG_FUNCTION_ARGS)
+{
+   tsvector       *in = (tsvector *) PG_DETOAST_DATUM(PG_GETARG_DATUM(0));
+   int4        ret = in->size;
+
+   PG_FREE_IF_COPY(in, 0);
+   PG_RETURN_INT32(ret);
+}
+
+Datum
+tsvector_out(PG_FUNCTION_ARGS)
+{
+   tsvector       *out = (tsvector *) PG_DETOAST_DATUM(PG_GETARG_DATUM(0));
+   char       *outbuf;
+   int4        i,
+               j,
+               lenbuf = 0, pp;
+   WordEntry  *ptr = ARRPTR(out);
+   char       *curin,
+              *curout;
+
+       lenbuf=out->size * 2 /* '' */ + out->size - 1 /* space */ + 2 /*\0*/;
+       for (i = 0; i < out->size; i++) {
+               lenbuf += ptr[i].len*2 /*for escape */;
+               if ( ptr[i].haspos )
+                       lenbuf += 7*POSDATALEN(out, &(ptr[i]));
+       }
+
+   curout = outbuf = (char *) palloc(lenbuf);
+   for (i = 0; i < out->size; i++)
+   {
+       curin = STRPTR(out)+ptr->pos;
+       if (i != 0)
+           *curout++ = ' ';
+       *curout++ = '\'';
+       j = ptr->len;
+       while (j--)
+       {
+           if (*curin == '\'')
+           {
+               int4        pos = curout - outbuf;
+
+               outbuf = (char *) repalloc((void *) outbuf, ++lenbuf);
+               curout = outbuf + pos;
+               *curout++ = '\\';
+           }
+           *curout++ = *curin++;
+       }
+       *curout++ = '\'';
+       if ( (pp=POSDATALEN(out,ptr)) != 0 ) {
+           WordEntryPos *wptr;
+           *curout++ = ':';
+           wptr=POSDATAPTR(out,ptr);
+           while(pp) {
+               sprintf(curout,"%d",wptr->pos);
+               curout=strchr(curout,'\0');
+               switch( wptr->weight ) {
+                   case 3:   *curout++ = 'A'; break;
+                   case 2:   *curout++ = 'B'; break;
+                   case 1:   *curout++ = 'C'; break;
+                   case 0: 
+                   default: break;
+               }
+               if ( pp>1 )     *curout++ = ',';
+               pp--; wptr++;
+           }
+       }
+       ptr++;
+   }
+   *curout='\0';
+   outbuf[lenbuf - 1] = '\0';
+   PG_FREE_IF_COPY(out, 0);
+   PG_RETURN_POINTER(outbuf);
+}
+
+static int
+compareWORD(const void *a, const void *b)
+{
+   if (((WORD *) a)->len == ((WORD *) b)->len) {
+       int res = strncmp(
+                      ((WORD *) a)->word,
+                      ((WORD *) b)->word,
+                      ((WORD *) b)->len);
+       if ( res==0 ) 
+           return ( ((WORD *) a)->pos.pos > ((WORD *) b)->pos.pos ) ? 1 : -1;
+       return res;
+   }
+   return (((WORD *) a)->len > ((WORD *) b)->len) ? 1 : -1;
+}
+
+static int
+uniqueWORD(WORD * a, int4 l)
+{
+   WORD       *ptr,
+              *res;
+   int tmppos;
+
+   if (l == 1) {
+       tmppos=LIMITPOS(a->pos.pos);
+       a->alen=2;
+       a->pos.apos=(uint16*)palloc( sizeof(uint16)*a->alen );
+       a->pos.apos[0]=1;
+       a->pos.apos[1]=tmppos;
+       return l;
+   }
+
+   res = a;
+   ptr = a + 1;
+
+   qsort((void *) a, l, sizeof(WORD), compareWORD);
+   tmppos=LIMITPOS(a->pos.pos);
+   a->alen=2;
+   a->pos.apos=(uint16*)palloc( sizeof(uint16)*a->alen );
+   a->pos.apos[0]=1;
+   a->pos.apos[1]=tmppos;
+
+   while (ptr - a < l)
+   {
+       if (!(ptr->len == res->len &&
+             strncmp(ptr->word, res->word, res->len) == 0))
+       {
+           res++;
+           res->len = ptr->len;
+           res->word = ptr->word;
+           tmppos=LIMITPOS(ptr->pos.pos);
+           res->alen=2;
+           res->pos.apos=(uint16*)palloc( sizeof(uint16)*res->alen );
+           res->pos.apos[0]=1;
+           res->pos.apos[1]=tmppos;
+       } else {
+           pfree(ptr->word);
+           if ( res->pos.apos[0] < MAXNUMPOS-1 && res->pos.apos[ res->pos.apos[0] ] != MAXENTRYPOS-1 ) {
+               if ( res->pos.apos[0]+1 >= res->alen ) {
+                   res->alen*=2;
+                   res->pos.apos=(uint16*)repalloc( res->pos.apos, sizeof(uint16)*res->alen );
+               }
+               res->pos.apos[ res->pos.apos[0]+1 ] = LIMITPOS(ptr->pos.pos);
+               res->pos.apos[0]++; 
+           }
+       }
+       ptr++;
+   }
+
+   return res + 1 - a;
+}
+
+/*
+ * make value of tsvector
+ */
+static tsvector *
+makevalue(PRSTEXT * prs)
+{
+   int4        i,j,
+               lenstr = 0,
+               totallen;
+   tsvector       *in;
+   WordEntry  *ptr;
+   char       *str,
+              *cur;
+
+   prs->curwords = uniqueWORD(prs->words, prs->curwords);
+   for (i = 0; i < prs->curwords; i++) {
+       lenstr += SHORTALIGN(prs->words[i].len);
+
+       if ( prs->words[i].alen )
+           lenstr += sizeof(uint16) + prs->words[i].pos.apos[0] * sizeof(WordEntryPos);
+   }
+
+   totallen = CALCDATASIZE(prs->curwords, lenstr);
+   in = (tsvector *) palloc(totallen);
+   memset(in,0,totallen);  
+   in->len = totallen;
+   in->size = prs->curwords;
+
+   ptr = ARRPTR(in);
+   cur = str = STRPTR(in);
+   for (i = 0; i < prs->curwords; i++)
+   {
+       ptr->len = prs->words[i].len;
+       if (cur - str > MAXSTRPOS)
+           elog(ERROR, "Value is too big");
+       ptr->pos= cur - str;
+       memcpy((void *) cur, (void *) prs->words[i].word, prs->words[i].len);
+       pfree(prs->words[i].word);
+       cur += SHORTALIGN(prs->words[i].len);
+       if ( prs->words[i].alen ) {
+           WordEntryPos *wptr;
+           
+           ptr->haspos=1;
+           *(uint16*)cur = prs->words[i].pos.apos[0];
+           wptr=POSDATAPTR(in,ptr);
+           for(j=0;j<*(uint16*)cur;j++) {
+               wptr[j].weight=0;
+               wptr[j].pos=prs->words[i].pos.apos[j+1];
+           }
+           cur += sizeof(uint16) + prs->words[i].pos.apos[0] * sizeof(WordEntryPos);
+           pfree(prs->words[i].pos.apos);
+       } else
+           ptr->haspos=0;
+       ptr++;
+   }
+   pfree(prs->words);
+   return in;
+}
+
+
+Datum
+to_tsvector(PG_FUNCTION_ARGS)
+{
+   text       *in = PG_GETARG_TEXT_P(1);
+   PRSTEXT     prs;
+   tsvector       *out = NULL;
+   TSCfgInfo *cfg=findcfg(PG_GETARG_INT32(0)); 
+
+   prs.lenwords = 32;
+   prs.curwords = 0;
+   prs.pos = 0;
+   prs.words = (WORD *) palloc(sizeof(WORD) * prs.lenwords);
+   
+   parsetext_v2(cfg, &prs, VARDATA(in), VARSIZE(in) - VARHDRSZ);
+   PG_FREE_IF_COPY(in, 1);
+
+   if (prs.curwords)
+       out = makevalue(&prs);
+   else {
+       pfree(prs.words);
+       out = palloc(CALCDATASIZE(0,0));
+       out->len = CALCDATASIZE(0,0);
+       out->size = 0;
+   } 
+   PG_RETURN_POINTER(out);
+}
+
+Datum
+to_tsvector_name(PG_FUNCTION_ARGS) {
+   text       *cfg=PG_GETARG_TEXT_P(0);
+   Datum res = DirectFunctionCall3(
+       to_tsvector,
+       Int32GetDatum( name2id_cfg( cfg ) ),
+       PG_GETARG_DATUM(1),
+       (Datum)0
+   );
+   PG_FREE_IF_COPY(cfg,0);
+   PG_RETURN_DATUM(res);   
+}
+
+Datum
+to_tsvector_current(PG_FUNCTION_ARGS) {
+   Datum res = DirectFunctionCall3(
+       to_tsvector,
+       Int32GetDatum( get_currcfg() ),
+       PG_GETARG_DATUM(0),
+       (Datum)0
+   );
+   PG_RETURN_DATUM(res);   
+}
+
+static Oid
+findFunc(char *fname) {
+   FuncCandidateList clist,ptr;
+   Oid funcid = InvalidOid;
+   List *names=makeList1(makeString(fname));
+
+   ptr = clist = FuncnameGetCandidates(names, 1);
+   freeList(names);
+
+   if ( !ptr )
+       return funcid;
+
+   while(ptr) {
+       if ( ptr->args[0] == TEXTOID && funcid == InvalidOid )
+           funcid=ptr->oid;
+       clist=ptr->next;
+       pfree(ptr);
+       ptr=clist;
+   }
+
+   return funcid;
+}
+
+/*
+ * Trigger
+ */
+Datum
+tsearch2(PG_FUNCTION_ARGS)
+{
+   TriggerData *trigdata;
+   Trigger    *trigger;
+   Relation    rel;
+   HeapTuple   rettuple = NULL;
+   TSCfgInfo *cfg=findcfg(get_currcfg()); 
+   int         numidxattr,
+               i;
+   PRSTEXT     prs;
+   Datum       datum = (Datum) 0;
+   Oid     funcoid = InvalidOid;
+
+   if (!CALLED_AS_TRIGGER(fcinfo))
+       elog(ERROR, "TSearch: Not fired by trigger manager");
+
+   trigdata = (TriggerData *) fcinfo->context;
+   if (TRIGGER_FIRED_FOR_STATEMENT(trigdata->tg_event))
+       elog(ERROR, "TSearch: Can't process STATEMENT events");
+   if (TRIGGER_FIRED_AFTER(trigdata->tg_event))
+       elog(ERROR, "TSearch: Must be fired BEFORE event");
+
+   if (TRIGGER_FIRED_BY_INSERT(trigdata->tg_event))
+       rettuple = trigdata->tg_trigtuple;
+   else if (TRIGGER_FIRED_BY_UPDATE(trigdata->tg_event))
+       rettuple = trigdata->tg_newtuple;
+   else
+       elog(ERROR, "TSearch: Unknown event");
+
+   trigger = trigdata->tg_trigger;
+   rel = trigdata->tg_relation;
+
+   if (trigger->tgnargs < 2)
+       elog(ERROR, "TSearch: format tsearch2(tsvector_field, text_field1,...)");
+
+   numidxattr = SPI_fnumber(rel->rd_att, trigger->tgargs[0]);
+   if (numidxattr == SPI_ERROR_NOATTRIBUTE)
+       elog(ERROR, "TSearch: Can not find tsvector_field");
+
+   prs.lenwords = 32;
+   prs.curwords = 0;
+   prs.pos = 0;
+   prs.words = (WORD *) palloc(sizeof(WORD) * prs.lenwords);
+
+   /* find all words in indexable column */
+   for (i = 1; i < trigger->tgnargs; i++)
+   {
+       int         numattr;
+       Oid         oidtype;
+       Datum       txt_toasted;
+       bool        isnull;
+       text       *txt;
+
+       numattr = SPI_fnumber(rel->rd_att, trigger->tgargs[i]);
+       if (numattr == SPI_ERROR_NOATTRIBUTE)
+       {
+           funcoid=findFunc(trigger->tgargs[i]);
+           if ( funcoid==InvalidOid )
+               elog(ERROR,"TSearch: can't find function or field '%s'",trigger->tgargs[i]);
+           continue;
+       }
+       oidtype = SPI_gettypeid(rel->rd_att, numattr);
+       /* We assume char() and varchar() are binary-equivalent to text */
+       if (!(oidtype == TEXTOID ||
+             oidtype == VARCHAROID ||
+             oidtype == BPCHAROID))
+       {
+           elog(WARNING, "TSearch: '%s' is not of character type",
+                trigger->tgargs[i]);
+           continue;
+       }
+       txt_toasted = SPI_getbinval(rettuple, rel->rd_att, numattr, &isnull);
+       if (isnull)
+           continue;
+
+       if ( funcoid!=InvalidOid ) {
+           text *txttmp = (text *) DatumGetPointer( OidFunctionCall1(
+               funcoid,
+               PointerGetDatum(txt_toasted)
+           ));
+           txt = (text *) DatumGetPointer(PG_DETOAST_DATUM(PointerGetDatum(txttmp)));
+           if ( txt == txttmp )
+               txt_toasted = PointerGetDatum(txt);
+       } else
+            txt = (text *) DatumGetPointer(PG_DETOAST_DATUM(PointerGetDatum(txt_toasted)));
+
+       parsetext_v2(cfg, &prs, VARDATA(txt), VARSIZE(txt) - VARHDRSZ);
+       if (txt != (text*)DatumGetPointer(txt_toasted) )
+           pfree(txt);
+   }
+
+   /* make tsvector value */
+   if (prs.curwords)
+   {
+       datum = PointerGetDatum(makevalue(&prs));
+       rettuple = SPI_modifytuple(rel, rettuple, 1, &numidxattr,
+                                  &datum, NULL);
+       pfree(DatumGetPointer(datum));
+   }
+   else
+   {
+       tsvector *out = palloc(CALCDATASIZE(0,0));
+       out->len = CALCDATASIZE(0,0);
+       out->size = 0;
+       datum = PointerGetDatum(out);
+       pfree(prs.words);
+       rettuple = SPI_modifytuple(rel, rettuple, 1, &numidxattr,
+                                  &datum, NULL);
+   }
+
+   if (rettuple == NULL)
+       elog(ERROR, "TSearch: %d returned by SPI_modifytuple", SPI_result);
+
+   return PointerGetDatum(rettuple);
+}


diff --git a/contrib/tsearch2/tsvector.h b/contrib/tsearch2/tsvector.h

new file mode 100644 (file)

index 0000000..31e6a4b


--- /dev/null
+++ b/contrib/tsearch2/tsvector.h
@@ -0,0 +1,71 @@
+#ifndef __TXTIDX_H__
+#define __TXTIDX_H__
+
+/*
+#define TXTIDX_DEBUG
+*/
+
+#include "postgres.h"
+
+#include "access/gist.h"
+#include "access/itup.h"
+#include "utils/elog.h"
+#include "utils/palloc.h"
+#include "utils/builtins.h"
+#include "storage/bufpage.h"
+
+typedef struct {
+   uint32
+       haspos:1,
+       len:11, /* MAX 2Kb */
+       pos:20; /* MAX 1Mb */
+}  WordEntry;
+#define MAXSTRLEN ( 1<<11 )
+#define MAXSTRPOS ( 1<<20 )
+
+typedef struct {
+   uint16
+       weight:2,
+       pos:14;
+} WordEntryPos;
+#define MAXENTRYPOS    (1<<14)
+#define MAXNUMPOS  256
+#define LIMITPOS(x)    ( ( (x) >= MAXENTRYPOS ) ? (MAXENTRYPOS-1) : (x) )
+
+typedef struct
+{
+   int4        len;
+   int4        size;
+   char        data[1];
+}  tsvector;
+
+#define DATAHDRSIZE (sizeof(int4)*2)
+#define CALCDATASIZE(x, lenstr) ( x * sizeof(WordEntry) + DATAHDRSIZE + lenstr )
+#define ARRPTR(x)  ( (WordEntry*) ( (char*)x + DATAHDRSIZE ) )
+#define STRPTR(x)  ( (char*)x + DATAHDRSIZE + ( sizeof(WordEntry) * ((tsvector*)x)->size ) )
+#define STRSIZE(x) ( ((tsvector*)x)->len - DATAHDRSIZE - ( sizeof(WordEntry) * ((tsvector*)x)->size ) )
+#define _POSDATAPTR(x,e)   (STRPTR(x)+((WordEntry*)(e))->pos+SHORTALIGN(((WordEntry*)(e))->len))
+#define POSDATALEN(x,e) ( ( ((WordEntry*)(e))->haspos ) ? (*(uint16*)_POSDATAPTR(x,e)) : 0 ) 
+#define POSDATAPTR(x,e)    ( (WordEntryPos*)( _POSDATAPTR(x,e)+sizeof(uint16) ) )
+
+
+typedef struct {
+   WordEntry   entry;
+   WordEntryPos    *pos;
+}  WordEntryIN;
+
+typedef struct
+{
+   char       *prsbuf;
+   char       *word;
+   char       *curpos;
+   int4        len;
+   int4        state;
+   int4        alen;
+   WordEntryPos    *pos;
+   bool        oprisdelim;
+}  TI_IN_STATE;
+
+int4       gettoken_tsvector(TI_IN_STATE * state);
+
+#endif


diff --git a/contrib/tsearch2/tsvector_op.c b/contrib/tsearch2/tsvector_op.c

new file mode 100644 (file)

index 0000000..3f38014


--- /dev/null
+++ b/contrib/tsearch2/tsvector_op.c
@@ -0,0 +1,264 @@
+/*
+ * Operations for tsvector type
+ * Teodor Sigaev 
+ */
+#include "postgres.h"
+
+#include "access/gist.h"
+#include "access/itup.h"
+#include "utils/elog.h"
+#include "utils/palloc.h"
+#include "utils/builtins.h"
+#include "storage/bufpage.h"
+#include "executor/spi.h"
+#include "commands/trigger.h"
+#include "nodes/pg_list.h"
+#include "catalog/namespace.h"
+
+#include "utils/pg_locale.h"
+
+#include              /* tolower */
+#include "tsvector.h"
+#include "query.h"
+#include "ts_cfg.h"
+#include "common.h"
+
+PG_FUNCTION_INFO_V1(strip);
+Datum      strip(PG_FUNCTION_ARGS);
+
+PG_FUNCTION_INFO_V1(setweight);
+Datum      setweight(PG_FUNCTION_ARGS);
+
+PG_FUNCTION_INFO_V1(concat);
+Datum      concat(PG_FUNCTION_ARGS);
+
+Datum
+strip(PG_FUNCTION_ARGS)
+{
+   tsvector       *in = (tsvector *) PG_DETOAST_DATUM(PG_GETARG_DATUM(0));
+   tsvector    *out;
+   int i,len=0;
+   WordEntry *arrin=ARRPTR(in), *arrout;
+   char *cur;
+
+   for(i=0;isize;i++) 
+       len += SHORTALIGN( arrin[i].len );
+
+   len = CALCDATASIZE(in->size, len);
+   out=(tsvector*)palloc(len);
+   memset(out,0,len);
+   out->len=len;
+   out->size=in->size;
+   arrout=ARRPTR(out);
+   cur=STRPTR(out);
+   for(i=0;isize;i++) {
+       memcpy(cur, STRPTR(in)+arrin[i].pos, arrin[i].len);
+       arrout[i].haspos = 0;
+       arrout[i].len = arrin[i].len;
+       arrout[i].pos = cur - STRPTR(out);
+       cur += SHORTALIGN( arrout[i].len );
+   }
+       
+   PG_FREE_IF_COPY(in, 0);
+   PG_RETURN_POINTER(out);
+}
+
+Datum
+setweight(PG_FUNCTION_ARGS)
+{
+   tsvector       *in = (tsvector *) PG_DETOAST_DATUM(PG_GETARG_DATUM(0));
+   char       cw = PG_GETARG_CHAR(1);
+   tsvector    *out;
+   int i,j;
+   WordEntry *entry;
+   WordEntryPos *p;
+   int w=0;
+
+   switch(tolower(cw)) {
+       case 'a': w=3; break;
+       case 'b': w=2; break;
+       case 'c': w=1; break;
+       case 'd': w=0; break;
+       default: elog(ERROR,"Unknown weight");
+   }
+
+   out=(tsvector*)palloc(in->len);
+   memcpy(out,in,in->len);
+   entry=ARRPTR(out);
+   i=out->size;    
+   while(i--) {
+       if ( (j=POSDATALEN(out,entry)) != 0 ) {
+           p=POSDATAPTR(out,entry);
+           while(j--) {
+               p->weight=w;
+               p++;
+           }
+       }
+       entry++;
+   }
+       
+   PG_FREE_IF_COPY(in, 0);
+   PG_RETURN_POINTER(out);
+}
+
+static int
+compareEntry(char *ptra, WordEntry* a, char *ptrb, WordEntry* b)
+{
+        if ( a->len == b->len)
+        {
+                return strncmp(
+                                           ptra + a->pos,
+                                           ptrb + b->pos,
+                                           a->len);
+        }
+        return ( a->len > b->len ) ? 1 : -1;
+}
+
+static int4
+add_pos(tsvector *src, WordEntry *srcptr, tsvector *dest, WordEntry *destptr, int4 maxpos ) {
+   uint16 *clen = (uint16*)_POSDATAPTR(dest,destptr);
+   int i;
+   uint16 slen = POSDATALEN(src, srcptr), startlen;
+   WordEntryPos *spos=POSDATAPTR(src, srcptr), *dpos=POSDATAPTR(dest,destptr);
+
+   if ( ! destptr->haspos ) 
+       *clen=0;
+
+   startlen = *clen;
+   for(i=0; i
+       dpos[ *clen ].weight = spos[i].weight; 
+       dpos[ *clen ].pos    = LIMITPOS(spos[i].pos + maxpos);
+       (*clen)++;
+   }
+
+   if ( *clen != startlen )
+       destptr->haspos=1; 
+   return  *clen - startlen;
+}
+
+
+Datum
+concat(PG_FUNCTION_ARGS) {
+   tsvector       *in1 = (tsvector *) PG_DETOAST_DATUM(PG_GETARG_DATUM(0));
+   tsvector       *in2 = (tsvector *) PG_DETOAST_DATUM(PG_GETARG_DATUM(1));
+   tsvector       *out;
+   WordEntry *ptr;
+   WordEntry *ptr1,*ptr2;
+   WordEntryPos *p;
+   int maxpos=0,i,j,i1,i2;
+   char *cur;
+   char *data,*data1,*data2;
+
+   ptr=ARRPTR(in1);
+   i=in1->size;
+   while(i--) {
+       if ( (j=POSDATALEN(in1,ptr)) != 0 ) {
+           p=POSDATAPTR(in1,ptr);
+           while(j--) {
+               if ( p->pos > maxpos ) 
+                   maxpos = p->pos;
+               p++;
+           }
+       }
+       ptr++;
+   }
+   
+   ptr1=ARRPTR(in1); ptr2=ARRPTR(in2);
+   data1=STRPTR(in1); data2=STRPTR(in2);
+   i1=in1->size;   i2=in2->size;
+   out=(tsvector*)palloc( in1->len + in2->len );
+   memset(out,0,in1->len + in2->len);
+   out->len = in1->len + in2->len;
+   out->size = in1->size + in2->size;
+   data=cur=STRPTR(out);
+   ptr=ARRPTR(out);
+   while( i1 && i2 ) {
+       int cmp=compareEntry(data1,ptr1,data2,ptr2);
+       if ( cmp < 0 ) { /* in1 first */
+           ptr->haspos = ptr1->haspos;
+           ptr->len = ptr1->len;
+           memcpy( cur, data1 + ptr1->pos, ptr1->len );
+           ptr->pos = cur - data; 
+           cur+=SHORTALIGN(ptr1->len);
+           if ( ptr->haspos ) {
+               memcpy(cur, _POSDATAPTR(in1, ptr1), POSDATALEN(in1, ptr1)*sizeof(WordEntryPos) + sizeof(uint16));
+               cur+=POSDATALEN(in1, ptr1)*sizeof(WordEntryPos) + sizeof(uint16);
+           }
+           ptr++; ptr1++; i1--;
+       } else if ( cmp>0 ) { /* in2 first */ 
+           ptr->haspos = ptr2->haspos;
+           ptr->len = ptr2->len;
+           memcpy( cur, data2 + ptr2->pos, ptr2->len );
+           ptr->pos = cur - data; 
+           cur+=SHORTALIGN(ptr2->len);
+           if ( ptr->haspos ) {
+               int addlen = add_pos(in2, ptr2, out, ptr, maxpos );
+               if ( addlen == 0 )
+                   ptr->haspos=0;
+               else
+                   cur += addlen*sizeof(WordEntryPos) + sizeof(uint16); 
+           }
+           ptr++; ptr2++; i2--;
+       } else {
+           ptr->haspos = ptr1->haspos | ptr2->haspos;
+           ptr->len = ptr1->len;
+           memcpy( cur, data1 + ptr1->pos, ptr1->len );
+           ptr->pos = cur - data; 
+           cur+=SHORTALIGN(ptr1->len);
+           if ( ptr->haspos ) {
+               if ( ptr1->haspos ) {
+                   memcpy(cur, _POSDATAPTR(in1, ptr1), POSDATALEN(in1, ptr1)*sizeof(WordEntryPos) + sizeof(uint16));
+                   cur+=POSDATALEN(in1, ptr1)*sizeof(WordEntryPos) + sizeof(uint16);
+                   if ( ptr2->haspos )
+                       cur += add_pos(in2, ptr2, out, ptr, maxpos )*sizeof(WordEntryPos);
+               } else if ( ptr2->haspos ) {
+                   int addlen = add_pos(in2, ptr2, out, ptr, maxpos );
+                   if ( addlen == 0 )
+                       ptr->haspos=0;
+                   else
+                       cur += addlen*sizeof(WordEntryPos) + sizeof(uint16); 
+               }
+           }
+           ptr++; ptr1++; ptr2++; i1--; i2--;
+       }
+   }
+
+   while(i1) {
+       ptr->haspos = ptr1->haspos;
+       ptr->len = ptr1->len;
+       memcpy( cur, data1 + ptr1->pos, ptr1->len );
+       ptr->pos = cur - data; 
+       cur+=SHORTALIGN(ptr1->len);
+       if ( ptr->haspos ) {
+           memcpy(cur, _POSDATAPTR(in1, ptr1), POSDATALEN(in1, ptr1)*sizeof(WordEntryPos) + sizeof(uint16));
+           cur+=POSDATALEN(in1, ptr1)*sizeof(WordEntryPos) + sizeof(uint16);
+       }
+       ptr++; ptr1++; i1--;
+   }
+
+   while(i2) {
+       ptr->haspos = ptr2->haspos;
+       ptr->len = ptr2->len;
+       memcpy( cur, data2 + ptr2->pos, ptr2->len );
+       ptr->pos = cur - data; 
+       cur+=SHORTALIGN(ptr2->len);
+       if ( ptr->haspos ) {
+           int addlen = add_pos(in2, ptr2, out, ptr, maxpos );
+           if ( addlen == 0 )
+               ptr->haspos=0;
+           else
+               cur += addlen*sizeof(WordEntryPos) + sizeof(uint16); 
+       }
+       ptr++; ptr2++; i2--;
+   }
+   
+   out->size=ptr-ARRPTR(out);
+   out->len = CALCDATASIZE( out->size, cur-data );
+   if ( data != STRPTR(out) )
+       memmove( STRPTR(out), data, cur-data );
+
+   PG_FREE_IF_COPY(in1, 0);
+   PG_FREE_IF_COPY(in2, 1);
+   PG_RETURN_POINTER(out);
+}
+


diff --git a/contrib/tsearch2/untsearch.sql.in b/contrib/tsearch2/untsearch.sql.in

new file mode 100644 (file)

index 0000000..a4fe145


--- /dev/null
+++ b/contrib/tsearch2/untsearch.sql.in
@@ -0,0 +1,62 @@
+BEGIN;
+
+--Be careful !!!
+--script drops all indices, triggers and columns with types defined
+--in tsearch2.sql
+
+
+DROP OPERATOR CLASS gist_tsvector_ops USING gist CASCADE;
+
+
+DROP OPERATOR || (tsvector, tsvector);
+DROP OPERATOR @@ (tsvector, tsquery);
+DROP OPERATOR @@ (tsquery, tsvector);
+
+DROP AGGREGATE stat(tsvector);
+
+DROP TABLE pg_ts_dict;
+DROP TABLE pg_ts_parser;
+DROP TABLE pg_ts_cfg;
+DROP TABLE pg_ts_cfgmap;
+
+DROP TYPE tokentype CASCADE;
+DROP TYPE tokenout CASCADE;
+DROP TYPE tsvector CASCADE;
+DROP TYPE tsquery CASCADE;
+DROP TYPE gtsvector CASCADE;
+DROP TYPE tsstat CASCADE;
+DROP TYPE statinfo CASCADE;
+
+DROP FUNCTION lexize(oid, text) ;
+DROP FUNCTION lexize(text, text);
+DROP FUNCTION lexize(text);
+DROP FUNCTION set_curdict(int);
+DROP FUNCTION set_curdict(text);
+DROP FUNCTION dex_init(text);
+DROP FUNCTION dex_lexize(internal,internal,int4);
+DROP FUNCTION snb_en_init(text);
+DROP FUNCTION snb_lexize(internal,internal,int4);
+DROP FUNCTION snb_ru_init(text);
+DROP FUNCTION spell_init(text);
+DROP FUNCTION spell_lexize(internal,internal,int4);
+DROP FUNCTION syn_init(text);
+DROP FUNCTION syn_lexize(internal,internal,int4);
+DROP FUNCTION set_curprs(int);
+DROP FUNCTION set_curprs(text);
+DROP FUNCTION prsd_start(internal,int4);
+DROP FUNCTION prsd_getlexeme(internal,internal,internal);
+DROP FUNCTION prsd_end(internal);
+DROP FUNCTION prsd_lextype(internal);
+DROP FUNCTION prsd_headline(internal,internal,internal);
+DROP FUNCTION set_curcfg(int);
+DROP FUNCTION set_curcfg(text);
+DROP FUNCTION show_curcfg();
+DROP FUNCTION gtsvector_compress(internal);
+DROP FUNCTION gtsvector_decompress(internal);
+DROP FUNCTION gtsvector_penalty(internal,internal,internal);
+DROP FUNCTION gtsvector_picksplit(internal, internal);
+DROP FUNCTION gtsvector_union(bytea, internal);
+DROP FUNCTION reset_tsearch();
+DROP FUNCTION tsearch2() CASCADE;
+
+END;


diff --git a/contrib/tsearch2/wordparser/deflex.c b/contrib/tsearch2/wordparser/deflex.c

new file mode 100644 (file)

index 0000000..ea596c5


--- /dev/null
+++ b/contrib/tsearch2/wordparser/deflex.c
@@ -0,0 +1,56 @@
+#include "deflex.h"
+
+const char *lex_descr[]={
+   "",
+   "Latin word",
+   "Non-latin word",
+   "Word",
+   "Email",
+   "URL",
+   "Host",
+   "Scientific notation",
+   "VERSION",
+   "Part of hyphenated word",
+   "Non-latin part of hyphenated word",
+   "Latin part of hyphenated word",
+   "Space symbols",
+   "HTML Tag",
+   "HTTP head",
+   "Hyphenated word",
+   "Latin hyphenated word",
+   "Non-latin hyphenated word",
+   "URI",
+   "File or path name",
+   "Decimal notation",
+   "Signed integer",
+   "Unsigned integer",
+   "HTML Entity"
+};
+
+const char *tok_alias[]={
+   "",
+   "lword",
+   "nlword",
+   "word",
+   "email",
+   "url",
+   "host",
+   "sfloat",
+   "version",
+   "part_hword",
+   "nlpart_hword",
+   "lpart_hword",
+   "blank",
+   "tag",
+   "http",
+   "hword",
+   "lhword",
+   "nlhword",
+   "uri",
+   "file",
+   "float",
+   "int",
+   "uint",
+   "entity"
+};
+


diff --git a/contrib/tsearch2/wordparser/deflex.h b/contrib/tsearch2/wordparser/deflex.h

new file mode 100644 (file)

index 0000000..651d1f9


--- /dev/null
+++ b/contrib/tsearch2/wordparser/deflex.h
@@ -0,0 +1,34 @@
+#ifndef __DEFLEX_H__
+#define __DEFLEX_H__
+
+/* rememder !!!! */
+#define LASTNUM        23
+
+#define LATWORD        1
+#define CYRWORD        2
+#define UWORD      3
+#define EMAIL      4
+#define FURL       5
+#define HOST       6
+#define SCIENTIFIC 7
+#define VERSIONNUMBER  8
+#define PARTHYPHENWORD 9
+#define CYRPARTHYPHENWORD  10
+#define LATPARTHYPHENWORD  11
+#define SPACE      12
+#define TAG            13
+#define HTTP       14
+#define HYPHENWORD 15
+#define LATHYPHENWORD  16
+#define CYRHYPHENWORD  17
+#define URI        18
+#define FILEPATH   19
+#define DECIMAL        20
+#define SIGNEDINT  21
+#define UNSIGNEDINT 22
+#define HTMLENTITY 23
+
+extern const char *lex_descr[];
+extern const char *tok_alias[];
+
+#endif


diff --git a/contrib/tsearch2/wordparser/parser.h b/contrib/tsearch2/wordparser/parser.h

new file mode 100644 (file)

index 0000000..55cf005


--- /dev/null
+++ b/contrib/tsearch2/wordparser/parser.h
@@ -0,0 +1,11 @@
+#ifndef __PARSER_H__
+#define __PARSER_H__
+
+char      *token;
+int            tokenlen;
+int            tsearch2_yylex(void);
+void       start_parse_str(char *, int);
+void       start_parse_fh(FILE *, int);
+void       end_parse(void);
+
+#endif


diff --git a/contrib/tsearch2/wordparser/parser.l b/contrib/tsearch2/wordparser/parser.l

new file mode 100644 (file)

index 0000000..49824f5


--- /dev/null
+++ b/contrib/tsearch2/wordparser/parser.l
@@ -0,0 +1,346 @@
+%{
+#include "postgres.h"
+
+#include "deflex.h"
+#include "parser.h"
+#include "common.h"
+
+/* Avoid exit() on fatal scanner errors */
+#define fprintf(file, fmt, msg)  ts_error(ERROR, fmt, msg)
+
+/* postgres allocation function */
+#define free    pfree
+#define malloc  palloc
+#define realloc repalloc
+
+#ifdef strdup
+#undef strdup
+#endif
+#define strdup  pstrdup
+
+char *token = NULL;  /* pointer to token */
+char *s     = NULL;  /* to return WHOLE hyphenated-word */
+
+YY_BUFFER_STATE buf = NULL; /* buffer to parse; it need for parse from string */
+
+int lrlimit = -1;  /* for limiting read from filehandle ( -1 - unlimited read ) */
+int bytestoread = 0;   /* for limiting read from filehandle */
+
+/* redefine macro for read limited length */
+#define YY_INPUT(buf,result,max_size) \
+   if ( yy_current_buffer->yy_is_interactive ) { \
+                int c = '*', n; \
+                for ( n = 0; n < max_size && \
+                             (c = getc( tsearch2_yyin )) != EOF && c != '\n'; ++n ) \
+                        buf[n] = (char) c; \
+                if ( c == '\n' ) \
+                        buf[n++] = (char) c; \
+                if ( c == EOF && ferror( tsearch2_yyin ) ) \
+                        YY_FATAL_ERROR( "input in flex scanner failed" ); \
+                result = n; \
+        }  else { \
+       if ( lrlimit == 0 ) \
+           result=YY_NULL; \
+       else { \
+           if ( lrlimit>0 ) { \
+               bytestoread = ( lrlimit > max_size ) ? max_size : lrlimit; \
+               lrlimit -= bytestoread; \
+           } else \
+               bytestoread = max_size; \
+               if ( ((result = fread( buf, 1, bytestoread, tsearch2_yyin )) == 0) \
+                       && ferror( tsearch2_yyin ) ) \
+                       YY_FATAL_ERROR( "input in flex scanner failed" ); \
+       } \
+   }
+
+%}
+
+%option 8bit
+%option never-interactive
+%option nounput
+%option noyywrap
+
+/* parser's state for parsing hyphenated-word */
+%x DELIM  
+/* parser's state for parsing URL*/
+%x URL  
+%x SERVER  
+
+/* parser's state for parsing TAGS */
+%x INTAG
+%x QINTAG
+%x INCOMMENT
+%x INSCRIPT
+
+/* cyrillic koi8 char */
+CYRALNUM   [0-9\200-\377]
+CYRALPHA   [\200-\377]
+ALPHA      [a-zA-Z\200-\377]
+ALNUM      [0-9a-zA-Z\200-\377]
+
+
+HOSTNAME   ([-_[:alnum:]]+\.)+[[:alpha:]]+
+URI        [-_[:alnum:]/%,\.;=&?#]+
+
+%%
+
+"<"[Ss][Cc][Rr][Ii][Pp][Tt] { BEGIN INSCRIPT; }
+
+"" {
+   BEGIN INITIAL; 
+   *tsearch2_yytext=' '; *(tsearch2_yytext+1) = '\0'; 
+   token = tsearch2_yytext;
+   tokenlen = tsearch2_yyleng;
+   return SPACE;
+}
+
+""   { 
+   BEGIN INITIAL;
+   *tsearch2_yytext=' '; *(tsearch2_yytext+1) = '\0'; 
+   token = tsearch2_yytext;
+   tokenlen = tsearch2_yyleng;
+   return SPACE;
+}
+
+
+"<"[\![:alpha:]]   { BEGIN INTAG; }
+
+"
+
+"\""    { BEGIN QINTAG; }
+
+"\\\"" ;
+
+"\""   { BEGIN INTAG; }
+
+">" { 
+   BEGIN INITIAL;
+   token = tsearch2_yytext;
+   *tsearch2_yytext=' '; 
+   token = tsearch2_yytext;
+   tokenlen = 1;
+   return TAG;
+}
+
+.|\n  ;
+
+\&(quot|amp|nbsp|lt|gt)\;   {
+   token = tsearch2_yytext;
+   tokenlen = tsearch2_yyleng;
+   return HTMLENTITY;
+}
+
+\&\#[0-9][0-9]?[0-9]?\; {
+   token = tsearch2_yytext;
+   tokenlen = tsearch2_yyleng;
+   return HTMLENTITY;
+}
+ 
+[-_\.[:alnum:]]+@{HOSTNAME}  /* Emails */ { 
+   token = tsearch2_yytext; 
+   tokenlen = tsearch2_yyleng;
+   return EMAIL; 
+}
+
+[+-]?[0-9]+(\.[0-9]+)?[eEdD][+-]?[0-9]+  /* float */   { 
+   token = tsearch2_yytext; 
+   tokenlen = tsearch2_yyleng;
+   return SCIENTIFIC; 
+}
+
+[0-9]+\.[0-9]+\.[0-9\.]*[0-9] {
+   token = tsearch2_yytext;
+   tokenlen = tsearch2_yyleng;
+   return VERSIONNUMBER;
+}
+
+[+-]?[0-9]+\.[0-9]+ {
+   token = tsearch2_yytext;
+   tokenlen = tsearch2_yyleng;
+   return DECIMAL;
+}
+
+[+-][0-9]+ { 
+   token = tsearch2_yytext; 
+   tokenlen = tsearch2_yyleng;
+   return SIGNEDINT; 
+}
+
+[0-9]+ { 
+   token = tsearch2_yytext; 
+   tokenlen = tsearch2_yyleng;
+   return UNSIGNEDINT; 
+}
+
+http"://"        { 
+   BEGIN URL; 
+   token = tsearch2_yytext;
+   tokenlen = tsearch2_yyleng;
+   return HTTP;
+}
+
+ftp"://"        { 
+   BEGIN URL; 
+   token = tsearch2_yytext;
+   tokenlen = tsearch2_yyleng;
+   return HTTP;
+}
+
+{HOSTNAME}[/:]{URI} { 
+   BEGIN SERVER;
+   if (s) { free(s); s=NULL; } 
+   s = strdup( tsearch2_yytext ); 
+   tokenlen = tsearch2_yyleng;
+   yyless( 0 ); 
+   token = s;
+   return FURL;
+}
+
+{HOSTNAME} {
+   token = tsearch2_yytext; 
+   tokenlen = tsearch2_yyleng;
+   return HOST;
+}
+
+[/:]{URI}  {
+   token = tsearch2_yytext;
+   tokenlen = tsearch2_yyleng;
+   return URI;
+}
+
+[[:alnum:]\./_-]+"/"[[:alnum:]\./_-]+ {
+   token = tsearch2_yytext;
+   tokenlen = tsearch2_yyleng;
+   return FILEPATH;
+}
+
+({CYRALPHA}+-)+{CYRALPHA}+ /* composite-word */    {
+   BEGIN DELIM;
+   if (s) { free(s); s=NULL; } 
+   s = strdup( tsearch2_yytext );
+   tokenlen = tsearch2_yyleng;
+   yyless( 0 );
+   token = s;
+   return CYRHYPHENWORD;
+}
+
+([[:alpha:]]+-)+[[:alpha:]]+ /* composite-word */  {
+    BEGIN DELIM;
+   if (s) { free(s); s=NULL; } 
+   s = strdup( tsearch2_yytext );
+   tokenlen = tsearch2_yyleng;
+   yyless( 0 );
+   token = s;
+   return LATHYPHENWORD;
+}
+
+({ALNUM}+-)+{ALNUM}+ /* composite-word */  {
+   BEGIN DELIM;
+   if (s) { free(s); s=NULL; } 
+   s = strdup( tsearch2_yytext );
+   tokenlen = tsearch2_yyleng;
+   yyless( 0 );
+   token = s;
+   return HYPHENWORD;
+}
+
+[0-9]+\.[0-9]+\.[0-9\.]*[0-9] {
+   token = tsearch2_yytext;
+   tokenlen = tsearch2_yyleng;
+   return VERSIONNUMBER;
+}
+
+\+?[0-9]+\.[0-9]+ {
+   token = tsearch2_yytext;
+   tokenlen = tsearch2_yyleng;
+   return DECIMAL;
+}
+
+{CYRALPHA}+  /* one word in composite-word */   { 
+   token = tsearch2_yytext; 
+   tokenlen = tsearch2_yyleng;
+   return CYRPARTHYPHENWORD; 
+}
+
+[[:alpha:]]+  /* one word in composite-word */  { 
+   token = tsearch2_yytext; 
+   tokenlen = tsearch2_yyleng;
+   return LATPARTHYPHENWORD; 
+}
+
+{ALNUM}+  /* one word in composite-word */  { 
+   token = tsearch2_yytext; 
+   tokenlen = tsearch2_yyleng;
+   return PARTHYPHENWORD; 
+}
+
+-  { 
+   token = tsearch2_yytext;
+   tokenlen = tsearch2_yyleng;
+   return SPACE;
+}
+
+.|\n /* return in basic state */ {
+   BEGIN INITIAL;
+   yyless( 0 );
+}
+
+{CYRALPHA}+ /* normal word */  { 
+   token = tsearch2_yytext; 
+   tokenlen = tsearch2_yyleng;
+   return CYRWORD; 
+}
+
+[[:alpha:]]+ /* normal word */ { 
+   token = tsearch2_yytext; 
+   tokenlen = tsearch2_yyleng;
+   return LATWORD; 
+}
+
+{ALNUM}+ /* normal word */ { 
+   token = tsearch2_yytext; 
+   tokenlen = tsearch2_yyleng;
+   return UWORD; 
+}
+
+[ \r\n\t]+ {
+   token = tsearch2_yytext;
+   tokenlen = tsearch2_yyleng;
+   return SPACE;
+}
+
+. {
+   token = tsearch2_yytext;
+   tokenlen = tsearch2_yyleng;
+   return SPACE;
+} 
+
+%%
+
+/* clearing after parsing from string */
+void end_parse() {
+   if (s) { free(s); s=NULL; } 
+   tsearch2_yy_delete_buffer( buf );
+   buf = NULL;
+} 
+
+/* start parse from string */
+void start_parse_str(char* str, int limit) {
+   if (buf) end_parse();
+   buf = tsearch2_yy_scan_bytes( str, limit );
+   tsearch2_yy_switch_to_buffer( buf );
+   BEGIN INITIAL;
+}
+
+/* start parse from filehandle */
+void start_parse_fh( FILE* fh, int limit ) {
+   if (buf) end_parse();
+   lrlimit = ( limit ) ? limit : -1;
+   buf = tsearch2_yy_create_buffer( fh, YY_BUF_SIZE );
+   tsearch2_yy_switch_to_buffer( buf );
+   BEGIN INITIAL;
+}
+
+


diff --git a/contrib/tsearch2/wparser.c b/contrib/tsearch2/wparser.c

new file mode 100644 (file)

index 0000000..deff94c


--- /dev/null
+++ b/contrib/tsearch2/wparser.c
@@ -0,0 +1,529 @@
+/* 
+ * interface functions to parser 
+ * Teodor Sigaev 
+ */
+#include 
+#include 
+#include 
+#include 
+
+#include "postgres.h"
+#include "fmgr.h"
+#include "utils/array.h"
+#include "catalog/pg_type.h"
+#include "executor/spi.h"
+#include "funcapi.h"
+
+#include "wparser.h"
+#include "ts_cfg.h"
+#include "snmap.h"
+#include "common.h"
+
+/*********top interface**********/
+
+static void *plan_getparser=NULL;
+static Oid current_parser_id=InvalidOid;
+
+void
+init_prs(Oid id, WParserInfo *prs) {
+   Oid arg[1]={ OIDOID };
+   bool isnull;
+   Datum pars[1]={ ObjectIdGetDatum(id) };
+   int stat;
+
+   memset(prs,0,sizeof(WParserInfo));
+   SPI_connect();
+   if ( !plan_getparser ) {
+       plan_getparser = SPI_saveplan( SPI_prepare( "select prs_start, prs_nexttoken, prs_end, prs_lextype, prs_headline from pg_ts_parser where oid = $1" , 1, arg ) );
+       if ( !plan_getparser ) 
+           ts_error(ERROR, "SPI_prepare() failed");
+   }
+
+   stat = SPI_execp(plan_getparser, pars, " ", 1);
+   if ( stat < 0 )
+       ts_error (ERROR, "SPI_execp return %d", stat);
+   if ( SPI_processed > 0 ) {
+       Oid oid=InvalidOid;
+       oid=DatumGetObjectId( SPI_getbinval(SPI_tuptable->vals[0], SPI_tuptable->tupdesc, 1, &isnull) );
+       fmgr_info_cxt(oid, &(prs->start_info), TopMemoryContext);
+       oid=DatumGetObjectId( SPI_getbinval(SPI_tuptable->vals[0], SPI_tuptable->tupdesc, 2, &isnull) );
+       fmgr_info_cxt(oid, &(prs->getlexeme_info), TopMemoryContext);
+       oid=DatumGetObjectId( SPI_getbinval(SPI_tuptable->vals[0], SPI_tuptable->tupdesc, 3, &isnull) );
+       fmgr_info_cxt(oid, &(prs->end_info), TopMemoryContext);
+       prs->lextype=DatumGetObjectId( SPI_getbinval(SPI_tuptable->vals[0], SPI_tuptable->tupdesc, 4, &isnull) );
+       oid=DatumGetObjectId( SPI_getbinval(SPI_tuptable->vals[0], SPI_tuptable->tupdesc, 5, &isnull) );
+       fmgr_info_cxt(oid, &(prs->headline_info), TopMemoryContext);
+       prs->prs_id=id;
+   } else 
+       ts_error(ERROR, "No parser with id %d", id);
+   SPI_finish();
+}
+
+typedef struct {
+   WParserInfo *last_prs;
+   int     len;
+   int     reallen;
+   WParserInfo *list;
+   SNMap       name2id_map;
+} PrsList;
+
+static PrsList PList = {NULL,0,0,NULL,{0,0,NULL}};
+
+void    
+reset_prs(void) {
+   freeSNMap( &(PList.name2id_map) );
+   if ( PList.list )
+       free(PList.list);
+   memset(&PList,0,sizeof(PrsList));
+}
+
+static int
+compareprs(const void *a, const void *b) {
+   return ((WParserInfo*)a)->prs_id - ((WParserInfo*)b)->prs_id;
+}
+
+WParserInfo *
+findprs(Oid id) {
+   /* last used prs */
+   if ( PList.last_prs && PList.last_prs->prs_id==id )
+       return PList.last_prs;
+
+   /* already used prs */
+   if ( PList.len != 0 ) {
+       WParserInfo key;
+       key.prs_id=id;
+       PList.last_prs = bsearch(&key, PList.list, PList.len, sizeof(WParserInfo), compareprs);
+       if ( PList.last_prs != NULL )
+           return PList.last_prs;
+   }
+
+   /* last chance */
+   if ( PList.len==PList.reallen ) {
+       WParserInfo *tmp;
+       int reallen = ( PList.reallen ) ? 2*PList.reallen : 16;
+       tmp=(WParserInfo*)realloc(PList.list,sizeof(WParserInfo)*reallen);
+       if ( !tmp ) 
+           ts_error(ERROR,"No memory");
+       PList.reallen=reallen;
+       PList.list=tmp;
+   }
+   PList.last_prs=&(PList.list[PList.len]);
+   init_prs(id, PList.last_prs);
+   PList.len++;
+   qsort(PList.list, PList.len, sizeof(WParserInfo), compareprs);
+   return findprs(id); /* qsort changed order!! */;
+}
+
+static void *plan_name2id=NULL;
+
+Oid
+name2id_prs(text *name) {
+   Oid arg[1]={ TEXTOID };
+   bool isnull;
+   Datum pars[1]={ PointerGetDatum(name) };
+   int stat;
+   Oid id=findSNMap_t( &(PList.name2id_map), name );
+   
+   if ( id ) 
+       return id;
+   
+
+   SPI_connect();
+   if ( !plan_name2id ) {
+       plan_name2id = SPI_saveplan( SPI_prepare( "select oid from pg_ts_parser where prs_name = $1" , 1, arg ) );
+       if ( !plan_name2id ) 
+           ts_error(ERROR, "SPI_prepare() failed");
+   }
+
+   stat = SPI_execp(plan_name2id, pars, " ", 1);
+   if ( stat < 0 )
+       ts_error (ERROR, "SPI_execp return %d", stat);
+   if ( SPI_processed > 0 )
+       id=DatumGetObjectId( SPI_getbinval(SPI_tuptable->vals[0], SPI_tuptable->tupdesc, 1, &isnull) );
+   else 
+       ts_error(ERROR, "No parser '%s'", text2char(name));
+   SPI_finish();
+   addSNMap_t( &(PList.name2id_map), name, id );
+   return id;
+}
+
+
+/******sql-level interface******/
+typedef struct {
+   int     cur;
+   LexDescr    *list;
+} TypeStorage;
+
+static void
+setup_firstcall(FuncCallContext  *funcctx, Oid prsid) {
+   TupleDesc            tupdesc;
+   MemoryContext     oldcontext;
+   TypeStorage     *st;
+   WParserInfo *prs = findprs(prsid); 
+
+   oldcontext = MemoryContextSwitchTo(funcctx->multi_call_memory_ctx);
+
+   st=(TypeStorage*)palloc( sizeof(TypeStorage) );
+   st->cur=0;
+   st->list = (LexDescr*)DatumGetPointer(
+       OidFunctionCall1( prs->lextype, PointerGetDatum(prs->prs) )
+   );
+   funcctx->user_fctx = (void*)st;
+   tupdesc = RelationNameGetTupleDesc("tokentype");
+   funcctx->slot = TupleDescGetSlot(tupdesc);
+   funcctx->attinmeta = TupleDescGetAttInMetadata(tupdesc);
+   MemoryContextSwitchTo(oldcontext);
+}
+
+static Datum
+process_call(FuncCallContext  *funcctx) {
+   TypeStorage     *st;
+
+   st=(TypeStorage*)funcctx->user_fctx;
+   if (  st->list && st->list[st->cur].lexid ) {
+       Datum result;
+       char* values[3];
+       char    txtid[16];
+       HeapTuple    tuple;
+
+       values[0]=txtid;
+       sprintf(txtid,"%d",st->list[st->cur].lexid);
+       values[1]=st->list[st->cur].alias;
+       values[2]=st->list[st->cur].descr;
+
+       tuple = BuildTupleFromCStrings(funcctx->attinmeta, values);
+       result = TupleGetDatum(funcctx->slot, tuple);
+
+       pfree(values[1]);
+       pfree(values[2]);
+       st->cur++;
+       return result;
+   } else {
+       if ( st->list ) pfree(st->list);
+       pfree(st);
+   }
+   return (Datum)0;
+}
+
+PG_FUNCTION_INFO_V1(token_type);
+Datum token_type(PG_FUNCTION_ARGS);
+
+Datum
+token_type(PG_FUNCTION_ARGS) {
+   FuncCallContext  *funcctx;
+   Datum result;
+
+   if (SRF_IS_FIRSTCALL()) { 
+       funcctx = SRF_FIRSTCALL_INIT();
+       setup_firstcall(funcctx, PG_GETARG_OID(0) );
+   }
+
+   funcctx = SRF_PERCALL_SETUP();
+
+   if (  (result=process_call(funcctx)) != (Datum)0 )
+       SRF_RETURN_NEXT(funcctx, result);
+   SRF_RETURN_DONE(funcctx);
+}
+
+PG_FUNCTION_INFO_V1(token_type_byname);
+Datum token_type_byname(PG_FUNCTION_ARGS);
+Datum
+token_type_byname(PG_FUNCTION_ARGS) {
+   FuncCallContext  *funcctx;
+   Datum result;
+
+   if (SRF_IS_FIRSTCALL()) {
+       text *name = PG_GETARG_TEXT_P(0); 
+       funcctx = SRF_FIRSTCALL_INIT();
+       setup_firstcall(funcctx, name2id_prs( name ) );
+       PG_FREE_IF_COPY(name,0);
+   }
+
+   funcctx = SRF_PERCALL_SETUP();
+
+   if (  (result=process_call(funcctx)) != (Datum)0 )
+       SRF_RETURN_NEXT(funcctx, result);
+   SRF_RETURN_DONE(funcctx);
+}
+
+PG_FUNCTION_INFO_V1(token_type_current);
+Datum token_type_current(PG_FUNCTION_ARGS);
+Datum
+token_type_current(PG_FUNCTION_ARGS) {
+   FuncCallContext  *funcctx;
+   Datum result;
+
+   if (SRF_IS_FIRSTCALL()) {
+       funcctx = SRF_FIRSTCALL_INIT();
+       if ( current_parser_id==InvalidOid ) 
+           current_parser_id = name2id_prs( char2text("default") );
+       setup_firstcall(funcctx, current_parser_id );
+   }
+
+   funcctx = SRF_PERCALL_SETUP();
+
+   if (  (result=process_call(funcctx)) != (Datum)0 )
+       SRF_RETURN_NEXT(funcctx, result);
+   SRF_RETURN_DONE(funcctx);
+}
+
+
+PG_FUNCTION_INFO_V1(set_curprs);
+Datum set_curprs(PG_FUNCTION_ARGS);
+Datum
+set_curprs(PG_FUNCTION_ARGS) {
+        findprs(PG_GETARG_OID(0));
+        current_parser_id=PG_GETARG_OID(0);
+        PG_RETURN_VOID();
+}
+
+PG_FUNCTION_INFO_V1(set_curprs_byname);
+Datum set_curprs_byname(PG_FUNCTION_ARGS);
+Datum
+set_curprs_byname(PG_FUNCTION_ARGS) {
+        text *name=PG_GETARG_TEXT_P(0);
+    
+        DirectFunctionCall1(
+                set_curprs,
+                ObjectIdGetDatum( name2id_prs(name) )
+        );
+        PG_FREE_IF_COPY(name, 0);
+        PG_RETURN_VOID();
+}
+
+typedef struct {
+   int type;
+   char    *lexem;
+} LexemEntry;
+
+typedef struct {
+   int cur;
+   int len;
+   LexemEntry  *list;
+} PrsStorage;
+   
+
+static void
+prs_setup_firstcall(FuncCallContext  *funcctx, int prsid, text *txt) {
+   TupleDesc            tupdesc;
+   MemoryContext     oldcontext;
+   PrsStorage  *st;
+   WParserInfo *prs = findprs(prsid); 
+   char    *lex=NULL;
+   int     llen=0, type=0; 
+
+   oldcontext = MemoryContextSwitchTo(funcctx->multi_call_memory_ctx);
+
+   st=(PrsStorage*)palloc( sizeof(PrsStorage) );
+   st->cur=0;
+   st->len=16;
+   st->list=(LexemEntry*)palloc( sizeof(LexemEntry)*st->len );
+
+   prs->prs = (void*)DatumGetPointer(
+       FunctionCall2(
+           &(prs->start_info),
+           PointerGetDatum(VARDATA(txt)),
+           Int32GetDatum(VARSIZE(txt)-VARHDRSZ)
+       )
+   );
+
+   while( ( type=DatumGetInt32(FunctionCall3(
+           &(prs->getlexeme_info),
+           PointerGetDatum(prs->prs),
+           PointerGetDatum(&lex),
+           PointerGetDatum(&llen))) ) != 0 ) {
+
+       if ( st->cur>=st->len ) {
+           st->len=2*st->len;
+           st->list=(LexemEntry*)repalloc(st->list, sizeof(LexemEntry)*st->len);
+       }
+       st->list[st->cur].lexem = palloc(llen+1);
+       memcpy( st->list[st->cur].lexem, lex, llen);
+       st->list[st->cur].lexem[llen]='\0';
+       st->list[st->cur].type=type;
+       st->cur++;
+   }
+       
+   FunctionCall1(
+       &(prs->end_info),
+       PointerGetDatum(prs->prs)
+   );
+
+   st->len=st->cur;
+   st->cur=0;
+   
+   funcctx->user_fctx = (void*)st;
+   tupdesc = RelationNameGetTupleDesc("tokenout");
+   funcctx->slot = TupleDescGetSlot(tupdesc);
+   funcctx->attinmeta = TupleDescGetAttInMetadata(tupdesc);
+   MemoryContextSwitchTo(oldcontext);
+}
+
+static Datum
+prs_process_call(FuncCallContext  *funcctx) {
+   PrsStorage  *st;
+
+   st=(PrsStorage*)funcctx->user_fctx;
+   if (  st->cur < st->len ) {
+       Datum result;
+       char* values[2];
+       char    tid[16];
+       HeapTuple    tuple;
+
+       values[0]=tid;
+       sprintf(tid,"%d",st->list[st->cur].type);
+       values[1]=st->list[st->cur].lexem;
+       tuple = BuildTupleFromCStrings(funcctx->attinmeta, values);
+       result = TupleGetDatum(funcctx->slot, tuple);
+
+       pfree(values[1]);
+       st->cur++;
+       return result;
+   } else {
+       if ( st->list ) pfree(st->list);
+       pfree(st);
+   }
+   return (Datum)0;
+}
+
+           
+
+PG_FUNCTION_INFO_V1(parse);
+Datum parse(PG_FUNCTION_ARGS);
+Datum
+parse(PG_FUNCTION_ARGS) {
+   FuncCallContext  *funcctx;
+   Datum result;
+
+   if (SRF_IS_FIRSTCALL()) {
+       text *txt = PG_GETARG_TEXT_P(1); 
+       funcctx = SRF_FIRSTCALL_INIT();
+       prs_setup_firstcall(funcctx, PG_GETARG_OID(0),txt );
+       PG_FREE_IF_COPY(txt,1);
+   }
+
+   funcctx = SRF_PERCALL_SETUP();
+
+   if (  (result=prs_process_call(funcctx)) != (Datum)0 )
+       SRF_RETURN_NEXT(funcctx, result);
+   SRF_RETURN_DONE(funcctx);
+}
+
+PG_FUNCTION_INFO_V1(parse_byname);
+Datum parse_byname(PG_FUNCTION_ARGS);
+Datum
+parse_byname(PG_FUNCTION_ARGS) {
+   FuncCallContext  *funcctx;
+   Datum result;
+
+   if (SRF_IS_FIRSTCALL()) {
+       text *name = PG_GETARG_TEXT_P(0); 
+       text *txt = PG_GETARG_TEXT_P(1); 
+       funcctx = SRF_FIRSTCALL_INIT();
+       prs_setup_firstcall(funcctx, name2id_prs( name ),txt );
+       PG_FREE_IF_COPY(name,0);
+       PG_FREE_IF_COPY(txt,1);
+   }
+
+   funcctx = SRF_PERCALL_SETUP();
+
+   if (  (result=prs_process_call(funcctx)) != (Datum)0 )
+       SRF_RETURN_NEXT(funcctx, result);
+   SRF_RETURN_DONE(funcctx);
+}
+
+
+PG_FUNCTION_INFO_V1(parse_current);
+Datum parse_current(PG_FUNCTION_ARGS);
+Datum
+parse_current(PG_FUNCTION_ARGS) {
+   FuncCallContext  *funcctx;
+   Datum result;
+
+   if (SRF_IS_FIRSTCALL()) {
+       text *txt = PG_GETARG_TEXT_P(0); 
+       funcctx = SRF_FIRSTCALL_INIT();
+       if ( current_parser_id==InvalidOid ) 
+           current_parser_id = name2id_prs( char2text("default") );
+       prs_setup_firstcall(funcctx, current_parser_id,txt );
+       PG_FREE_IF_COPY(txt,0);
+   }
+
+   funcctx = SRF_PERCALL_SETUP();
+
+   if (  (result=prs_process_call(funcctx)) != (Datum)0 )
+       SRF_RETURN_NEXT(funcctx, result);
+   SRF_RETURN_DONE(funcctx);
+}
+
+PG_FUNCTION_INFO_V1(headline);
+Datum headline(PG_FUNCTION_ARGS);
+Datum
+headline(PG_FUNCTION_ARGS) {
+   TSCfgInfo *cfg=findcfg(PG_GETARG_OID(0));
+   text       *in = PG_GETARG_TEXT_P(1);
+   QUERYTYPE  *query = (QUERYTYPE *) DatumGetPointer(PG_DETOAST_DATUM(PG_GETARG_DATUM(2)));
+   text       *opt=( PG_NARGS()>3 && PG_GETARG_POINTER(3) ) ? PG_GETARG_TEXT_P(3) : NULL;
+   HLPRSTEXT   prs;
+   text *out;
+   WParserInfo *prsobj = findprs(cfg->prs_id);
+
+   memset(&prs,0,sizeof(HLPRSTEXT));
+   prs.lenwords = 32;
+   prs.words = (HLWORD *) palloc(sizeof(HLWORD) * prs.lenwords);
+   hlparsetext(cfg, &prs, query, VARDATA(in), VARSIZE(in) - VARHDRSZ);
+
+
+   FunctionCall3(
+       &(prsobj->headline_info),
+       PointerGetDatum(&prs),
+       PointerGetDatum(opt),
+       PointerGetDatum(query)
+   );
+
+   out = genhl(&prs);
+
+   PG_FREE_IF_COPY(in,1);
+   PG_FREE_IF_COPY(query,2);
+   if ( opt ) PG_FREE_IF_COPY(opt,3);
+   pfree(prs.words);
+   pfree(prs.startsel);
+   pfree(prs.stopsel);
+
+   PG_RETURN_POINTER(out);
+}
+
+
+PG_FUNCTION_INFO_V1(headline_byname);
+Datum headline_byname(PG_FUNCTION_ARGS);
+Datum
+headline_byname(PG_FUNCTION_ARGS) {
+   text *cfg=PG_GETARG_TEXT_P(0);
+
+   Datum out=DirectFunctionCall4(
+       headline,
+       ObjectIdGetDatum(name2id_cfg( cfg ) ),
+       PG_GETARG_DATUM(1),
+       PG_GETARG_DATUM(2),
+       ( PG_NARGS()>3 ) ? PG_GETARG_DATUM(3) : PointerGetDatum(NULL)
+   );
+
+   PG_FREE_IF_COPY(cfg,0);
+   PG_RETURN_DATUM(out);   
+}
+
+PG_FUNCTION_INFO_V1(headline_current);
+Datum headline_current(PG_FUNCTION_ARGS);
+Datum
+headline_current(PG_FUNCTION_ARGS) {
+   PG_RETURN_DATUM(DirectFunctionCall4(
+       headline,
+       ObjectIdGetDatum(get_currcfg()),
+       PG_GETARG_DATUM(0),
+       PG_GETARG_DATUM(1),
+       ( PG_NARGS()>2 ) ? PG_GETARG_DATUM(2) : PointerGetDatum(NULL)
+   ));
+}
+
+
+


diff --git a/contrib/tsearch2/wparser.h b/contrib/tsearch2/wparser.h

new file mode 100644 (file)

index 0000000..a8afc56


--- /dev/null
+++ b/contrib/tsearch2/wparser.h
@@ -0,0 +1,28 @@
+#ifndef __WPARSER_H__
+#define __WPARSER_H__
+#include "postgres.h"
+#include "fmgr.h"
+
+typedef struct {
+   Oid prs_id;
+   FmgrInfo start_info;
+   FmgrInfo getlexeme_info;
+   FmgrInfo end_info;
+   FmgrInfo headline_info;
+   Oid lextype;
+   void *prs;
+} WParserInfo;
+
+void init_prs(Oid id, WParserInfo *prs);
+WParserInfo* findprs(Oid id);
+Oid name2id_prs(text *name);
+void   reset_prs(void);
+
+
+typedef struct {
+   int lexid;
+   char    *alias;
+   char    *descr;
+} LexDescr;
+
+#endif


diff --git a/contrib/tsearch2/wparser_def.c b/contrib/tsearch2/wparser_def.c

new file mode 100644 (file)

index 0000000..eec8b03


--- /dev/null
+++ b/contrib/tsearch2/wparser_def.c
@@ -0,0 +1,291 @@
+/* 
+ * default word parser 
+ * Teodor Sigaev 
+ */
+#include 
+#include 
+#include 
+
+#include "postgres.h"
+#include "utils/builtins.h"
+
+#include "dict.h"
+#include "wparser.h"
+#include "common.h"
+#include "ts_cfg.h"
+#include "wordparser/parser.h"
+#include "wordparser/deflex.h"
+
+PG_FUNCTION_INFO_V1(prsd_lextype);
+Datum prsd_lextype(PG_FUNCTION_ARGS);
+
+Datum 
+prsd_lextype(PG_FUNCTION_ARGS) {
+   LexDescr *descr=(LexDescr*)palloc(sizeof(LexDescr)*(LASTNUM+1));
+   int i;
+
+   for(i=1;i<=LASTNUM;i++) {
+       descr[i-1].lexid = i;
+       descr[i-1].alias = pstrdup(tok_alias[i]);
+       descr[i-1].descr = pstrdup(lex_descr[i]);
+   }
+   
+   descr[LASTNUM].lexid=0;
+       
+   PG_RETURN_POINTER(descr);
+}
+
+PG_FUNCTION_INFO_V1(prsd_start);
+Datum prsd_start(PG_FUNCTION_ARGS);
+Datum 
+prsd_start(PG_FUNCTION_ARGS) {
+   start_parse_str( (char*)PG_GETARG_POINTER(0), PG_GETARG_INT32(1) );
+   PG_RETURN_POINTER(NULL);
+}
+
+PG_FUNCTION_INFO_V1(prsd_getlexeme);
+Datum prsd_getlexeme(PG_FUNCTION_ARGS);
+Datum 
+prsd_getlexeme(PG_FUNCTION_ARGS) {
+   /* ParserState *p=(ParserState*)PG_GETARG_POINTER(0); */
+   char **t=(char**)PG_GETARG_POINTER(1); 
+   int *tlen=(int*)PG_GETARG_POINTER(2);
+   int  type=tsearch2_yylex();
+
+   *t = token;
+   *tlen = tokenlen;
+   PG_RETURN_INT32(type);
+}
+
+PG_FUNCTION_INFO_V1(prsd_end);
+Datum prsd_end(PG_FUNCTION_ARGS);
+Datum 
+prsd_end(PG_FUNCTION_ARGS) {
+   /* ParserState *p=(ParserState*)PG_GETARG_POINTER(0); */
+   end_parse();
+   PG_RETURN_VOID();
+}
+
+#define LEAVETOKEN(x)  ( (x)==12 )
+#define COMPLEXTOKEN(x)    ( (x)==5 || (x)==15 || (x)==16 || (x)==17 )
+#define ENDPUNCTOKEN(x)    ( (x)==12 )
+
+
+#define IDIGNORE(x) ( (x)==13 || (x)==14 || (x)==12 || (x)==23 )
+#define HLIDIGNORE(x) ( (x)==5 || (x)==13 || (x)==15 || (x)==16 || (x)==17 )
+#define NONWORDTOKEN(x)    ( (x)==12 || HLIDIGNORE(x) )
+#define NOENDTOKEN(x)  ( NONWORDTOKEN(x) || (x)==7 || (x)==8 || (x)==20 || (x)==21 || (x)==22 || IDIGNORE(x) )
+
+typedef struct {
+   HLWORD  *words;
+   int len;
+} hlCheck;
+
+static bool
+checkcondition_HL(void *checkval, ITEM *val) {
+   int i;
+   for(i=0;i<((hlCheck*)checkval)->len;i++) {
+       if ( ((hlCheck*)checkval)->words[i].item==val )
+           return true;
+   }
+   return false;
+}
+
+
+static bool
+hlCover(HLPRSTEXT *prs, QUERYTYPE *query, int *p, int *q) {
+   int i,j;
+   ITEM    *item=GETQUERY(query);
+   int pos=*p;
+   *q=0;
+   *p=0x7fffffff;
+
+   for(j=0;jsize;j++) {
+       if ( item->type != VAL ) {
+           item++;
+           continue;
+       }
+       for(i=pos;icurwords;i++) {
+           if ( prs->words[i].item == item ) {
+               if ( i>*q) 
+                   *q = i;
+               break;
+           }
+       }
+       item++;
+   }
+
+   if ( *q==0 )
+       return false;
+
+   item=GETQUERY(query);
+   for(j=0;jsize;j++) {
+       if ( item->type != VAL ) {
+           item++;
+           continue;
+       }
+       for(i=*q;i>=pos;i--) {
+           if ( prs->words[i].item == item ) {
+               if ( i<*p )
+                   *p=i;
+               break;
+           }
+       }
+       item++;
+   }   
+
+   if ( *p<=*q ) {
+       hlCheck ch={ &(prs->words[*p]), *q-*p+1 };
+       if ( TS_execute(GETQUERY(query), &ch, false, checkcondition_HL) ) { 
+           return true;
+       } else {
+           (*p)++;
+           return hlCover(prs,query,p,q);
+       }
+   }
+
+   return false;
+}
+
+PG_FUNCTION_INFO_V1(prsd_headline);
+Datum prsd_headline(PG_FUNCTION_ARGS);
+Datum 
+prsd_headline(PG_FUNCTION_ARGS) {
+   HLPRSTEXT   *prs=(HLPRSTEXT*)PG_GETARG_POINTER(0);
+   text    *opt=(text*)PG_GETARG_POINTER(1); /* can't be toasted */
+   QUERYTYPE   *query=(QUERYTYPE*)PG_GETARG_POINTER(2); /* can't be toasted */
+   /* from opt + start and and tag */
+   int min_words=15;   
+   int max_words=35;   
+   int shortword=3;    
+
+   int p=0,q=0;
+   int bestb=-1,beste=-1;
+   int bestlen=-1;
+   int pose=0, poslen, curlen;
+
+   int i;
+
+   /*config*/
+   prs->startsel=NULL;
+   prs->stopsel=NULL;
+   if ( opt ) {
+       Map *map,*mptr;
+       
+       parse_cfgdict(opt,&map);
+       mptr=map;
+
+       while(mptr && mptr->key) {
+           if ( strcasecmp(mptr->key,"MaxWords")==0 )
+               max_words=pg_atoi(mptr->value,4,1);
+           else if ( strcasecmp(mptr->key,"MinWords")==0 )
+               min_words=pg_atoi(mptr->value,4,1);
+           else if ( strcasecmp(mptr->key,"ShortWord")==0 )
+               shortword=pg_atoi(mptr->value,4,1);
+           else if ( strcasecmp(mptr->key,"StartSel")==0 )
+               prs->startsel=pstrdup(mptr->value);
+           else if ( strcasecmp(mptr->key,"StopSel")==0 )
+               prs->stopsel=pstrdup(mptr->value);
+               
+           pfree(mptr->key);
+           pfree(mptr->value);
+
+           mptr++;
+       }
+       pfree(map);
+
+       if ( min_words >= max_words )
+           elog(ERROR,"Must be MinWords < MaxWords");
+       if ( min_words<=0 )
+           elog(ERROR,"Must be MinWords > 0");
+       if ( shortword<0 )
+           elog(ERROR,"Must be ShortWord >= 0");
+   }
+
+   while( hlCover(prs,query,&p,&q) ) {
+       /* find cover len in words */
+       curlen=0;
+       poslen=0;
+       for(i=p;i<=q && curlen < max_words ; i++) {
+           if ( !NONWORDTOKEN(prs->words[i].type) ) 
+               curlen++;
+           if ( prs->words[i].item && !prs->words[i].repeated )
+               poslen++; 
+           pose=i;
+       }
+
+       if ( poslenwords[beste].type) || prs->words[beste].len <= shortword) ) { 
+           /* best already finded, so try one more cover */
+           p++;
+           continue;
+       }
+
+       if ( curlen < max_words ) { /* find good end */
+           for(i=i-1 ;icurwords && curlen
+               if ( i!=q ) {
+                   if ( !NONWORDTOKEN(prs->words[i].type) ) 
+                       curlen++;
+                   if ( prs->words[i].item && !prs->words[i].repeated )
+                       poslen++;
+               }
+               pose=i;
+               if ( NOENDTOKEN(prs->words[i].type) || prs->words[i].len <= shortword ) 
+                   continue;
+               if ( curlen>=min_words )    
+                   break;
+           }
+       } else { /* shorter cover :((( */
+           for(;curlen>min_words;i--) {
+               if ( !NONWORDTOKEN(prs->words[i].type) ) 
+                   curlen--;
+               if ( prs->words[i].item && !prs->words[i].repeated )
+                   poslen--;
+               pose=i;
+               if ( NOENDTOKEN(prs->words[i].type) || prs->words[i].len <= shortword ) 
+                   continue;
+               break;
+           }
+       }
+
+       if ( bestlen <0 || (poslen>bestlen && !(NOENDTOKEN(prs->words[pose].type) || prs->words[pose].len <= shortword)) || 
+               ( bestlen>=0 && !(NOENDTOKEN(prs->words[pose].type)  || prs->words[pose].len <= shortword) && 
+                   (NOENDTOKEN(prs->words[beste].type) || prs->words[beste].len <= shortword) ) ) {
+           bestb=p; beste=pose;
+           bestlen=poslen;
+       } 
+
+       p++;
+   }
+
+   if ( bestlen<0 ) {
+       curlen=0;
+       poslen=0;
+       for(i=0;icurwords && curlen
+           if ( !NONWORDTOKEN(prs->words[i].type) ) 
+               curlen++;
+           pose=i;
+       }
+       bestb=0; beste=pose;
+   }
+
+   for(i=bestb;i<=beste;i++) {
+       if ( prs->words[i].item )
+           prs->words[i].selected=1;
+       if ( prs->words[i].repeated )
+           prs->words[i].skip=1;
+       if ( HLIDIGNORE(prs->words[i].type) )
+           prs->words[i].replace=1;
+
+       prs->words[i].in=1;
+   }
+
+   if (!prs->startsel)
+       prs->startsel=pstrdup("");

+   if (!prs->stopsel)
+       prs->stopsel=pstrdup("");
+        prs->startsellen=strlen(prs->startsel);
+   prs->stopsellen=strlen(prs->stopsel);
+
+   PG_RETURN_POINTER(prs);
+}
+




This is the main PostgreSQL git repository.
RSS
Atom
+               for(p=0; p
+                   dist = abs( post[l].pos - ct[p].pos );
+                   if ( dist || (dist==0 && (pos[i]==(uint16*)POSNULL || pos[k]==(uint16*)POSNULL) ) ) {
+                       float curw; 
+                       if ( !dist ) dist=MAXENTRYPOS;  
+                       curw= sqrt( wpos(&(post[l])) * wpos( &(ct[p]) ) * word_distance(dist) );
+                       res = ( res < 0 ) ? curw : 1.0 - ( 1.0 - res ) * ( 1.0 - curw );
+                   }
+               }
+           }
+       }
+   }
+   pfree(pos);
+   return res; 
+}
+
+static float
+calc_rank_or(float *w, tsvector *t, QUERYTYPE *q) {
+   WordEntry *entry;
+   WordEntryPos    *post;
+   int4    dimt,j,i;
+   float res=-1.0;
+   ITEM    *item=GETQUERY(q);
+
+   *(uint16*)POSNULL = lengthof(POSNULL)-1;
+
+   for(i=0; isize; i++) {
+       if ( item[i].type != VAL )
+           continue;
+
+       entry=find_wordentry(t,q,&(item[i]));
+       if ( !entry )
+           continue;
+
+       if ( entry->haspos ) {
+           dimt = POSDATALEN(t,entry);
+           post = POSDATAPTR(t,entry);
+       } else {
+           dimt = *(uint16*)POSNULL;
+           post = POSNULL+1;
+       }
+
+       for(j=0;j
+           if ( res < 0 )
+               res = wpos( &(post[j]) );
+           else
+               res = 1.0 - ( 1.0-res ) * ( 1.0-wpos( &(post[j]) ) );
+       }
+   }
+   return res;
+}
+
+static float
+calc_rank(float *w, tsvector *t, QUERYTYPE *q, int4 method) {
+   ITEM *item = GETQUERY(q);
+   float res=0.0;
+
+   if (!t->size || !q->size)
+       return 0.0;
+
+   res = ( item->type != VAL && item->val == (int4) '&' ) ?
+       calc_rank_and(w,t,q) : calc_rank_or(w,t,q);
+
+   if ( res < 0 )
+       res = 1e-20;
+
+        switch(method) {
+       case 0: break;
+       case 1: res /= log((float)cnt_length(t)); break;
+       case 2: res /= (float)cnt_length(t); break;
+       default:
+       elog(ERROR,"Unknown normalization method: %d",method);
+        }
+
+   return res;
+}
+
+Datum
+rank(PG_FUNCTION_ARGS) {
+   ArrayType  *win = (ArrayType *) PG_DETOAST_DATUM(PG_GETARG_DATUM(0));
+   tsvector       *txt = (tsvector *) PG_DETOAST_DATUM(PG_GETARG_DATUM(1));
+   QUERYTYPE  *query = (QUERYTYPE *) PG_DETOAST_DATUM(PG_GETARG_DATUM(2));
+   int method=DEF_NORM_METHOD;
+   float res=0.0;
+   float ws[ lengthof(weights) ];
+   int i;
+
+   if ( ARR_NDIM(win) != 1 ) 
+       elog(ERROR,"Array of weight is not one dimentional");
+   if ( ARRNELEMS(win) < lengthof(weights) )
+        elog(ERROR,"Array of weight is too short");
+
+   for(i=0;i
+       ws[ i ] = ( ((float4*)ARR_DATA_PTR(win))[i] >= 0 ) ? ((float4*)ARR_DATA_PTR(win))[i] : weights[i];
+       if ( ws[ i ] > 1.0 ) 
+           elog(ERROR,"Weight out of range");
+   } 
+
+   if ( PG_NARGS() == 4 )
+       method=PG_GETARG_INT32(3);
+
+   res=calc_rank(ws, txt, query, method); 
+       
+   PG_FREE_IF_COPY(win, 0);
+   PG_FREE_IF_COPY(txt, 1);
+   PG_FREE_IF_COPY(query, 2);
+   PG_RETURN_FLOAT4(res);
+}
+
+Datum
+rank_def(PG_FUNCTION_ARGS) {
+   tsvector       *txt = (tsvector *) PG_DETOAST_DATUM(PG_GETARG_DATUM(0));
+   QUERYTYPE  *query = (QUERYTYPE *) PG_DETOAST_DATUM(PG_GETARG_DATUM(1));
+   float res=0.0;
+   int method=DEF_NORM_METHOD;
+
+   if ( PG_NARGS() == 3 )
+       method=PG_GETARG_INT32(2);
+
+   res=calc_rank(weights, txt, query, method); 
+       
+   PG_FREE_IF_COPY(txt, 0);
+   PG_FREE_IF_COPY(query, 1);
+   PG_RETURN_FLOAT4(res);
+}
+
+
+typedef struct {
+   ITEM    *item;
+   int32   pos;
+} DocRepresentation;
+
+static int
+compareDocR(const void *a, const void *b) {
+   if ( ((DocRepresentation *) a)->pos == ((DocRepresentation *) b)->pos )
+       return 1;
+   return ( ((DocRepresentation *) a)->pos > ((DocRepresentation *) b)->pos ) ? 1 : -1;
+}
+
+
+typedef struct {
+   DocRepresentation *doc;
+   int len;
+}  ChkDocR;
+
+static bool
+checkcondition_DR(void *checkval, ITEM *val) {
+   DocRepresentation *ptr = ((ChkDocR*)checkval)->doc;
+
+   while( ptr - ((ChkDocR*)checkval)->doc < ((ChkDocR*)checkval)->len ) {
+       if ( val == ptr->item )
+           return true;
+       ptr++;
+   }   
+
+   return false;
+}
+
+
+static bool
+Cover(DocRepresentation *doc, int len, QUERYTYPE *query, int *pos, int *p, int *q) {
+   int i;
+   DocRepresentation   *ptr,*f=(DocRepresentation*)0xffffffff;
+   ITEM    *item=GETQUERY(query);
+   int lastpos=*pos;
+   int oldq=*q;
+
+   *p=0x7fffffff;
+   *q=0;
+
+   for(i=0; isize; i++) {
+       if ( item->type != VAL ) {
+           item++;
+           continue;
+       }
+       ptr = doc + *pos;
+
+       while(ptr-doc
+           if ( ptr->item == item ) {
+               if ( ptr->pos > *q ) {
+                   *q = ptr->pos;
+                   lastpos= ptr - doc;
+               } 
+               break;
+           } 
+           ptr++;
+       }
+
+       item++;
+   }
+
+   if (*q==0 )
+       return false;
+
+   if (*q==oldq) { /* already check this pos */
+       (*pos)++;
+       return Cover(doc, len, query, pos,p,q);
+   } 
+
+   item=GETQUERY(query);
+   for(i=0; isize; i++) {
+       if ( item->type != VAL ) {
+           item++;
+           continue;
+       }
+       ptr = doc + lastpos;
+
+       while(ptr>=doc+*pos) {
+           if ( ptr->item == item ) {
+               if ( ptr->pos < *p ) {
+                   *p = ptr->pos;
+                   f=ptr;
+               }
+               break;
+           }
+           ptr--;
+       }
+       item++;
+   }
+ 
+   if ( *p<=*q ) {
+       ChkDocR ch = { f, (doc + lastpos)-f+1 };
+       *pos = f-doc+1;
+       if ( TS_execute(GETQUERY(query), &ch, false, checkcondition_DR) ) { 
+ /*elog(NOTICE,"OP:%d NP:%d P:%d Q:%d", *pos, lastpos, *p, *q);*/ 
+           return true;
+       } else
+           return Cover(doc, len, query, pos,p,q); 
+   }
+ 
+   return false;
+}
+
+static DocRepresentation*
+get_docrep(tsvector     *txt, QUERYTYPE  *query, int *doclen) {
+   ITEM    *item=GETQUERY(query);
+   WordEntry *entry;
+   WordEntryPos    *post;
+   int4    dimt,j,i;
+   int len=query->size*4,cur=0;
+   DocRepresentation *doc;
+
+   *(uint16*)POSNULL = lengthof(POSNULL)-1;
+   doc = (DocRepresentation*)palloc(sizeof(DocRepresentation)*len);
+   for(i=0; isize; i++) {
+       if ( item[i].type != VAL )
+           continue;
+
+       entry=find_wordentry(txt,query,&(item[i]));
+       if ( !entry )
+           continue;
+
+       if ( entry->haspos ) {
+           dimt = POSDATALEN(txt,entry);
+           post = POSDATAPTR(txt,entry);
+       } else {
+           dimt = *(uint16*)POSNULL;
+           post = POSNULL+1;
+       }
+
+       while( cur+dimt >= len ) {
+           len*=2;
+           doc = (DocRepresentation*)repalloc(doc,sizeof(DocRepresentation)*len);
+       }
+
+       for(j=0;j
+           doc[cur].item=&(item[i]);
+           doc[cur].pos=post[j].pos;
+           cur++;
+       }
+   }
+
+   *doclen=cur;
+   
+   if ( cur>0 ) {
+       if ( cur>1 ) 
+           qsort((void *) doc, cur, sizeof(DocRepresentation), compareDocR);
+       return doc;
+   }
+   
+   pfree(doc);
+   return NULL;
+}
+
+
+Datum
+rank_cd(PG_FUNCTION_ARGS) {
+   int K = PG_GETARG_INT32(0);
+   tsvector       *txt = (tsvector *) PG_DETOAST_DATUM(PG_GETARG_DATUM(1));
+   QUERYTYPE  *query = (QUERYTYPE *) PG_DETOAST_DATUM(PG_GETARG_DATUM(2));
+   int method=DEF_NORM_METHOD;
+   DocRepresentation   *doc;
+   float   res=0.0;
+   int p=0,q=0,len,cur;
+
+   doc = get_docrep(txt, query, &len);
+   if ( !doc ) {
+       PG_FREE_IF_COPY(txt, 1);
+       PG_FREE_IF_COPY(query, 2);
+       PG_RETURN_FLOAT4(0.0);
+   }
+
+   cur=0;
+   if (K<=0)
+       K=4;    
+   while( Cover(doc, len, query, &cur, &p, &q) ) 
+       res += ( q-p+1 > K ) ? ((float)K)/((float)(q-p+1)) : 1.0;
+
+   if ( PG_NARGS() == 4 )
+       method=PG_GETARG_INT32(3);
+
+        switch(method) {
+       case 0: break;
+       case 1: res /= log((float)cnt_length(txt)); break;
+       case 2: res /= (float)cnt_length(txt); break;
+       default:
+       elog(ERROR,"Unknown normalization method: %d",method);
+        }
+
+   pfree(doc);
+   PG_FREE_IF_COPY(txt, 1);
+   PG_FREE_IF_COPY(query, 2);
+
+   PG_RETURN_FLOAT4(res);
+}
+
+
+Datum
+rank_cd_def(PG_FUNCTION_ARGS) {
+   PG_RETURN_DATUM( DirectFunctionCall4(   
+       rank_cd,
+       Int32GetDatum(-1),
+       PG_GETARG_DATUM(0),
+       PG_GETARG_DATUM(1),
+       ( PG_NARGS() == 3 ) ? PG_GETARG_DATUM(2) : Int32GetDatum(DEF_NORM_METHOD)
+   )); 
+}
+
+/**************debug*************/
+
+typedef struct {
+   char    *w;
+   int2    len;
+   int2    pos;
+   int2    start;
+   int2    finish;
+} DocWord;
+
+static int
+compareDocWord(const void *a, const void *b) {
+   if ( ((DocWord *) a)->pos == ((DocWord *) b)->pos )
+       return 1;
+   return ( ((DocWord *) a)->pos > ((DocWord *) b)->pos ) ? 1 : -1;
+}
+
+
+Datum 
+get_covers(PG_FUNCTION_ARGS) {
+   tsvector     *txt = (tsvector *) PG_DETOAST_DATUM(PG_GETARG_DATUM(0));
+   QUERYTYPE  *query = (QUERYTYPE *) PG_DETOAST_DATUM(PG_GETARG_DATUM(1));
+   WordEntry       *pptr=ARRPTR(txt);
+   int i,dlen=0,j,cur=0,len=0,rlen;
+   DocWord *dw,*dwptr;
+   text    *out;
+   char *cptr;
+   DocRepresentation *doc;
+   int pos=0,p,q,olddwpos=0;
+   int ncover=1;
+
+   doc = get_docrep(txt, query, &rlen);
+
+   if ( !doc ) {
+       out=palloc(VARHDRSZ);
+       VARATT_SIZEP(out) = VARHDRSZ;
+       PG_FREE_IF_COPY(txt,0);
+       PG_FREE_IF_COPY(query,1);
+       PG_RETURN_POINTER(out);
+   }
+
+   for(i=0;isize;i++) {
+       if (!pptr[i].haspos)
+           elog(ERROR,"No pos info");
+        dlen += POSDATALEN(txt,&(pptr[i]));
+   }
+
+   dwptr=dw=palloc(sizeof(DocWord)*dlen);
+   memset(dw,0,sizeof(DocWord)*dlen);
+
+   for(i=0;isize;i++) {
+       WordEntryPos    *posdata = POSDATAPTR(txt,&(pptr[i]));
+       for(j=0;j
+           dw[cur].w=STRPTR(txt)+pptr[i].pos;  
+           dw[cur].len=pptr[i].len;    
+           dw[cur].pos=posdata[j].pos;
+           cur++;
+       }
+       len+=(pptr[i].len + 1) * (int)POSDATALEN(txt,&(pptr[i]));
+   }
+   qsort((void *) dw, dlen, sizeof(DocWord), compareDocWord);
+
+   while( Cover(doc, rlen, query, &pos, &p, &q) ) {
+       dwptr=dw+olddwpos;
+       while(dwptr->pos < p && dwptr-dw
+           dwptr++;
+       olddwpos=dwptr-dw;
+       dwptr->start=ncover;
+       while(dwptr->pos < q+1 && dwptr-dw
+           dwptr++;
+       (dwptr-1)->finish=ncover;
+       len+= 4 /* {}+two spaces */ + 2*16 /*numbers*/;
+       ncover++; 
+   } 
+   
+   out=palloc(VARHDRSZ+len);
+   cptr=((char*)out)+VARHDRSZ;
+   dwptr=dw;
+
+   while( dwptr-dw < dlen) {
+       if ( dwptr->start ) {
+           sprintf(cptr,"{%d ",dwptr->start);
+           cptr=strchr(cptr,'\0');
+       }
+       memcpy(cptr,dwptr->w,dwptr->len);
+       cptr+=dwptr->len;
+       *cptr=' ';
+       cptr++;
+       if ( dwptr->finish ) { 
+           sprintf(cptr,"}%d ",dwptr->finish);
+           cptr=strchr(cptr,'\0');
+       }
+       dwptr++;
+   }   
+
+   VARATT_SIZEP(out) = cptr - ((char*)out);
+   
+   pfree(dw);
+   pfree(doc);
+
+   PG_FREE_IF_COPY(txt,0);
+   PG_FREE_IF_COPY(query,1);
+   PG_RETURN_POINTER(out);
+}
+


diff --git a/contrib/tsearch2/rewrite.c b/contrib/tsearch2/rewrite.c

new file mode 100644 (file)

index 0000000..d5bc0f6


--- /dev/null
+++ b/contrib/tsearch2/rewrite.c
@@ -0,0 +1,292 @@
+/*
+ * Rewrite routines of query tree
+ * Teodor Sigaev 
+ */
+
+#include "postgres.h"
+
+#include 
+
+#include "access/gist.h"
+#include "access/itup.h"
+#include "access/rtree.h"
+#include "utils/elog.h"
+#include "utils/palloc.h"
+#include "utils/array.h"
+#include "utils/builtins.h"
+#include "storage/bufpage.h"
+
+#include "query.h"
+#include "rewrite.h"
+
+typedef struct NODE
+{
+   struct NODE *left;
+   struct NODE *right;
+   ITEM       *valnode;
+}  NODE;
+
+/*
+ * make query tree from plain view of query
+ */
+static NODE *
+maketree(ITEM * in)
+{
+   NODE       *node = (NODE *) palloc(sizeof(NODE));
+
+   node->valnode = in;
+   node->right = node->left = NULL;
+   if (in->type == OPR)
+   {
+       node->right = maketree(in + 1);
+       if (in->val != (int4) '!')
+           node->left = maketree(in + in->left);
+   }
+   return node;
+}
+
+typedef struct
+{
+   ITEM       *ptr;
+   int4        len;
+   int4        cur;
+}  PLAINTREE;
+
+static void
+plainnode(PLAINTREE * state, NODE * node)
+{
+   if (state->cur == state->len)
+   {
+       state->len *= 2;
+       state->ptr = (ITEM *) repalloc((void *) state->ptr, state->len * sizeof(ITEM));
+   }
+   memcpy((void *) &(state->ptr[state->cur]), (void *) node->valnode, sizeof(ITEM));
+   if (node->valnode->type == VAL)
+       state->cur++;
+   else if (node->valnode->val == (int4) '!')
+   {
+       state->ptr[state->cur].left = 1;
+       state->cur++;
+       plainnode(state, node->right);
+   }
+   else
+   {
+       int4        cur = state->cur;
+
+       state->cur++;
+       plainnode(state, node->right);
+       state->ptr[cur].left = state->cur - cur;
+       plainnode(state, node->left);
+   }
+   pfree(node);
+}
+
+/*
+ * make plain view of tree from 'normal' view of tree
+ */
+static ITEM *
+plaintree(NODE * root, int4 *len)
+{
+   PLAINTREE   pl;
+
+   pl.cur = 0;
+   pl.len = 16;
+   if (root && (root->valnode->type == VAL || root->valnode->type == OPR))
+   {
+       pl.ptr = (ITEM *) palloc(pl.len * sizeof(ITEM));
+       plainnode(&pl, root);
+   }
+   else
+       pl.ptr = NULL;
+   *len = pl.cur;
+   return pl.ptr;
+}
+
+static void
+freetree(NODE * node)
+{
+   if (!node)
+       return;
+   if (node->left)
+       freetree(node->left);
+   if (node->right)
+       freetree(node->right);
+   pfree(node);
+}
+
+/*
+ * clean tree for ! operator.
+ * It's usefull for debug, but in
+ * other case, such view is used with search in index.
+ * Operator ! always return TRUE
+ */
+static NODE *
+clean_NOT_intree(NODE * node)
+{
+   if (node->valnode->type == VAL)
+       return node;
+
+   if (node->valnode->val == (int4) '!')
+   {
+       freetree(node);
+       return NULL;
+   }
+
+   /* operator & or | */
+   if (node->valnode->val == (int4) '|')
+   {
+       if ((node->left = clean_NOT_intree(node->left)) == NULL ||
+           (node->right = clean_NOT_intree(node->right)) == NULL)
+       {
+           freetree(node);
+           return NULL;
+       }
+   }
+   else
+   {
+       NODE       *res = node;
+
+       node->left = clean_NOT_intree(node->left);
+       node->right = clean_NOT_intree(node->right);
+       if (node->left == NULL && node->right == NULL)
+       {
+           pfree(node);
+           res = NULL;
+       }
+       else if (node->left == NULL)
+       {
+           res = node->right;
+           pfree(node);
+       }
+       else if (node->right == NULL)
+       {
+           res = node->left;
+           pfree(node);
+       }
+       return res;
+   }
+   return node;
+}
+
+ITEM *
+clean_NOT_v2(ITEM * ptr, int4 *len)
+{
+   NODE       *root = maketree(ptr);
+
+   return plaintree(clean_NOT_intree(root), len);
+}
+
+#define V_UNKNOWN  0
+#define V_TRUE     1
+#define V_FALSE        2
+
+/*
+ * Clean query tree from values which is always in
+ * text (stopword)
+ */
+static NODE *
+clean_fakeval_intree(NODE * node, char *result)
+{
+   char        lresult = V_UNKNOWN,
+               rresult = V_UNKNOWN;
+
+   if (node->valnode->type == VAL)
+       return node;
+   else if (node->valnode->type == VALTRUE)
+   {
+       pfree(node);
+       *result = V_TRUE;
+       return NULL;
+   }
+
+
+   if (node->valnode->val == (int4) '!')
+   {
+       node->right = clean_fakeval_intree(node->right, &rresult);
+       if (!node->right)
+       {
+           *result = (rresult == V_TRUE) ? V_FALSE : V_TRUE;
+           freetree(node);
+           return NULL;
+       }
+   }
+   else if (node->valnode->val == (int4) '|')
+   {
+       NODE       *res = node;
+
+       node->left = clean_fakeval_intree(node->left, &lresult);
+       node->right = clean_fakeval_intree(node->right, &rresult);
+       if (lresult == V_TRUE || rresult == V_TRUE)
+       {
+           freetree(node);
+           *result = V_TRUE;
+           return NULL;
+       }
+       else if (lresult == V_FALSE && rresult == V_FALSE)
+       {
+           freetree(node);
+           *result = V_FALSE;
+           return NULL;
+       }
+       else if (lresult == V_FALSE)
+       {
+           res = node->right;
+           pfree(node);
+       }
+       else if (rresult == V_FALSE)
+       {
+           res = node->left;
+           pfree(node);
+       }
+       return res;
+   }
+   else
+   {
+       NODE       *res = node;
+
+       node->left = clean_fakeval_intree(node->left, &lresult);
+       node->right = clean_fakeval_intree(node->right, &rresult);
+       if (lresult == V_FALSE || rresult == V_FALSE)
+       {
+           freetree(node);
+           *result = V_FALSE;
+           return NULL;
+       }
+       else if (lresult == V_TRUE && rresult == V_TRUE)
+       {
+           freetree(node);
+           *result = V_TRUE;
+           return NULL;
+       }
+       else if (lresult == V_TRUE)
+       {
+           res = node->right;
+           pfree(node);
+       }
+       else if (rresult == V_TRUE)
+       {
+           res = node->left;
+           pfree(node);
+       }
+       return res;
+   }
+   return node;
+}
+
+ITEM *
+clean_fakeval_v2(ITEM * ptr, int4 *len)
+{
+   NODE       *root = maketree(ptr);
+   char        result = V_UNKNOWN;
+   NODE       *resroot;
+
+   resroot = clean_fakeval_intree(root, &result);
+   if (result != V_UNKNOWN)
+   {
+       elog(NOTICE, "Query contains only stopword(s) or doesn't contain lexem(s), ignored");
+       *len = 0;
+       return NULL;
+   }
+
+   return plaintree(resroot, len);
+}


diff --git a/contrib/tsearch2/rewrite.h b/contrib/tsearch2/rewrite.h

new file mode 100644 (file)

index 0000000..d47788a


--- /dev/null
+++ b/contrib/tsearch2/rewrite.h
@@ -0,0 +1,7 @@
+#ifndef __REWRITE_H__
+#define __REWRITE_H__
+
+ITEM      *clean_NOT_v2(ITEM * ptr, int4 *len);
+ITEM      *clean_fakeval_v2(ITEM * ptr, int4 *len);
+
+#endif


diff --git a/contrib/tsearch2/snmap.c b/contrib/tsearch2/snmap.c

new file mode 100644 (file)

index 0000000..fe138ad


--- /dev/null
+++ b/contrib/tsearch2/snmap.c
@@ -0,0 +1,75 @@
+/* 
+ * simple but fast map from str to Oid
+ * Teodor Sigaev 
+ */
+#include 
+#include 
+#include 
+
+#include "postgres.h"
+#include "snmap.h"
+#include "common.h"
+
+static int
+compareSNMapEntry(const void *a, const void *b) {
+   return strcmp( ((SNMapEntry*)a)->key, ((SNMapEntry*)b)->key );
+}
+
+void 
+addSNMap( SNMap *map, char *key, Oid value ) {
+   if (map->len>=map->reallen) {
+       SNMapEntry *tmp;
+       int len = (map->reallen) ? 2*map->reallen : 16;
+       tmp=(SNMapEntry*)realloc(map->list, sizeof(SNMapEntry) * len);
+       if ( !tmp )
+           elog(ERROR, "No memory");
+       map->reallen=len;
+       map->list=tmp;
+   }
+   map->list[ map->len ].key = strdup(key);
+   if ( ! map->list[ map->len ].key )
+       elog(ERROR, "No memory");
+   map->list[ map->len ].value=value;
+   map->len++;
+   if ( map->len>1 ) qsort(map->list, map->len, sizeof(SNMapEntry), compareSNMapEntry);
+}
+
+void 
+addSNMap_t( SNMap *map, text *key, Oid value ) {
+   char *k=text2char( key );
+   addSNMap(map, k, value);
+   pfree(k);
+}
+
+Oid 
+findSNMap( SNMap *map, char *key ) {
+   SNMapEntry *ptr;
+   SNMapEntry ks = {key, 0};
+   if ( map->len==0 || !map->list )
+       return 0;   
+   ptr = (SNMapEntry*) bsearch(&ks, map->list, map->len, sizeof(SNMapEntry), compareSNMapEntry);
+   return (ptr) ? ptr->value : 0;
+}
+
+Oid  
+findSNMap_t( SNMap *map, text *key ) {
+   char *k=text2char(key);
+   int res;
+   res= findSNMap(map, k);
+   pfree(k);
+   return res;
+}
+
+void freeSNMap( SNMap *map ) {
+   SNMapEntry *entry=map->list;
+   if ( map->list ) {
+       while( map->len ) {
+           if ( entry->key ) free(entry->key);
+           entry++; map->len--;
+       }
+       free( map->list );
+   }
+   memset(map,0,sizeof(SNMap));
+}
+
+


diff --git a/contrib/tsearch2/snmap.h b/contrib/tsearch2/snmap.h

new file mode 100644 (file)

index 0000000..b485601


--- /dev/null
+++ b/contrib/tsearch2/snmap.h
@@ -0,0 +1,23 @@
+#ifndef __SNMAP_H__
+#define __SNMAP_H__
+
+#include "postgres.h"
+
+typedef struct {
+   char    *key;
+   Oid value;
+} SNMapEntry;
+
+typedef struct {
+   int len;
+   int reallen;
+   SNMapEntry  *list;
+} SNMap;
+
+void addSNMap( SNMap *map, char *key, Oid value );
+void addSNMap_t( SNMap *map, text *key, Oid value );
+Oid findSNMap( SNMap *map, char *key );
+Oid findSNMap_t( SNMap *map, text *key );
+void freeSNMap( SNMap *map );
+
+#endif


diff --git a/contrib/tsearch2/snowball/api.c b/contrib/tsearch2/snowball/api.c

new file mode 100644 (file)

index 0000000..c9019ce


--- /dev/null
+++ b/contrib/tsearch2/snowball/api.c
@@ -0,0 +1,48 @@
+
+#include "header.h"
+
+extern struct SN_env * SN_create_env(int S_size, int I_size, int B_size)
+{   struct SN_env * z = (struct SN_env *) calloc(1, sizeof(struct SN_env));
+    z->p = create_s();
+    if (S_size)
+    {   z->S = (symbol * *) calloc(S_size, sizeof(symbol *));
+        {   int i;
+            for (i = 0; i < S_size; i++) z->S[i] = create_s();
+        }
+        z->S_size = S_size;
+    }
+
+    if (I_size)
+    {   z->I = (int *) calloc(I_size, sizeof(int));
+        z->I_size = I_size;
+    }
+
+    if (B_size)
+    {   z->B = (symbol *) calloc(B_size, sizeof(symbol));
+        z->B_size = B_size;
+    }
+
+    return z;
+}
+
+extern void SN_close_env(struct SN_env * z)
+{
+    if (z->S_size)
+    {
+        {   int i;
+            for (i = 0; i < z->S_size; i++) lose_s(z->S[i]);
+        }
+        free(z->S);
+    }
+    if (z->I_size) free(z->I);
+    if (z->B_size) free(z->B);
+    if (z->p) lose_s(z->p);
+    free(z);
+}
+
+extern void SN_set_current(struct SN_env * z, int size, const symbol * s)
+{
+    replace_s(z, 0, z->l, size, s);
+    z->c = 0;
+}
+


diff --git a/contrib/tsearch2/snowball/api.h b/contrib/tsearch2/snowball/api.h

new file mode 100644 (file)

index 0000000..3e8b6e1


--- /dev/null
+++ b/contrib/tsearch2/snowball/api.h
@@ -0,0 +1,27 @@
+
+typedef unsigned char symbol;
+
+/* Or replace 'char' above with 'short' for 16 bit characters.
+
+   More precisely, replace 'char' with whatever type guarantees the
+   character width you need. Note however that sizeof(symbol) should divide
+   HEAD, defined in header.h as 2*sizeof(int), without remainder, otherwise
+   there is an alignment problem. In the unlikely event of a problem here,
+   consult Martin Porter.
+
+*/
+
+struct SN_env {
+    symbol * p;
+    int c; int a; int l; int lb; int bra; int ket;
+    int S_size; int I_size; int B_size;
+    symbol * * S;
+    int * I;
+    symbol * B;
+};
+
+extern struct SN_env * SN_create_env(int S_size, int I_size, int B_size);
+extern void SN_close_env(struct SN_env * z);
+
+extern void SN_set_current(struct SN_env * z, int size, const symbol * s);
+


diff --git a/contrib/tsearch2/snowball/english_stem.c b/contrib/tsearch2/snowball/english_stem.c

new file mode 100644 (file)

index 0000000..6715c7c


--- /dev/null
+++ b/contrib/tsearch2/snowball/english_stem.c
@@ -0,0 +1,894 @@
+
+/* This file was generated automatically by the Snowball to ANSI C compiler */
+
+#include "header.h"
+
+extern int english_stem(struct SN_env * z);
+static int r_exception2(struct SN_env * z);
+static int r_exception1(struct SN_env * z);
+static int r_Step_5(struct SN_env * z);
+static int r_Step_4(struct SN_env * z);
+static int r_Step_3(struct SN_env * z);
+static int r_Step_2(struct SN_env * z);
+static int r_Step_1c(struct SN_env * z);
+static int r_Step_1b(struct SN_env * z);
+static int r_Step_1a(struct SN_env * z);
+static int r_R2(struct SN_env * z);
+static int r_R1(struct SN_env * z);
+static int r_shortv(struct SN_env * z);
+static int r_mark_regions(struct SN_env * z);
+static int r_postlude(struct SN_env * z);
+static int r_prelude(struct SN_env * z);
+
+extern struct SN_env * english_create_env(void);
+extern void english_close_env(struct SN_env * z);
+
+static symbol s_0_0[5] = { 'g', 'e', 'n', 'e', 'r' };
+
+static struct among a_0[1] =
+{
+/*  0 */ { 5, s_0_0, -1, -1, 0}
+};
+
+static symbol s_1_0[3] = { 'i', 'e', 'd' };
+static symbol s_1_1[1] = { 's' };
+static symbol s_1_2[3] = { 'i', 'e', 's' };
+static symbol s_1_3[4] = { 's', 's', 'e', 's' };
+static symbol s_1_4[2] = { 's', 's' };
+static symbol s_1_5[2] = { 'u', 's' };
+
+static struct among a_1[6] =
+{
+/*  0 */ { 3, s_1_0, -1, 2, 0},
+/*  1 */ { 1, s_1_1, -1, 3, 0},
+/*  2 */ { 3, s_1_2, 1, 2, 0},
+/*  3 */ { 4, s_1_3, 1, 1, 0},
+/*  4 */ { 2, s_1_4, 1, -1, 0},
+/*  5 */ { 2, s_1_5, 1, -1, 0}
+};
+
+static symbol s_2_1[2] = { 'b', 'b' };
+static symbol s_2_2[2] = { 'd', 'd' };
+static symbol s_2_3[2] = { 'f', 'f' };
+static symbol s_2_4[2] = { 'g', 'g' };
+static symbol s_2_5[2] = { 'b', 'l' };
+static symbol s_2_6[2] = { 'm', 'm' };
+static symbol s_2_7[2] = { 'n', 'n' };
+static symbol s_2_8[2] = { 'p', 'p' };
+static symbol s_2_9[2] = { 'r', 'r' };
+static symbol s_2_10[2] = { 'a', 't' };
+static symbol s_2_11[2] = { 't', 't' };
+static symbol s_2_12[2] = { 'i', 'z' };
+
+static struct among a_2[13] =
+{
+/*  0 */ { 0, 0, -1, 3, 0},
+/*  1 */ { 2, s_2_1, 0, 2, 0},
+/*  2 */ { 2, s_2_2, 0, 2, 0},
+/*  3 */ { 2, s_2_3, 0, 2, 0},
+/*  4 */ { 2, s_2_4, 0, 2, 0},
+/*  5 */ { 2, s_2_5, 0, 1, 0},
+/*  6 */ { 2, s_2_6, 0, 2, 0},
+/*  7 */ { 2, s_2_7, 0, 2, 0},
+/*  8 */ { 2, s_2_8, 0, 2, 0},
+/*  9 */ { 2, s_2_9, 0, 2, 0},
+/* 10 */ { 2, s_2_10, 0, 1, 0},
+/* 11 */ { 2, s_2_11, 0, 2, 0},
+/* 12 */ { 2, s_2_12, 0, 1, 0}
+};
+
+static symbol s_3_0[2] = { 'e', 'd' };
+static symbol s_3_1[3] = { 'e', 'e', 'd' };
+static symbol s_3_2[3] = { 'i', 'n', 'g' };
+static symbol s_3_3[4] = { 'e', 'd', 'l', 'y' };
+static symbol s_3_4[5] = { 'e', 'e', 'd', 'l', 'y' };
+static symbol s_3_5[5] = { 'i', 'n', 'g', 'l', 'y' };
+
+static struct among a_3[6] =
+{
+/*  0 */ { 2, s_3_0, -1, 2, 0},
+/*  1 */ { 3, s_3_1, 0, 1, 0},
+/*  2 */ { 3, s_3_2, -1, 2, 0},
+/*  3 */ { 4, s_3_3, -1, 2, 0},
+/*  4 */ { 5, s_3_4, 3, 1, 0},
+/*  5 */ { 5, s_3_5, -1, 2, 0}
+};
+
+static symbol s_4_0[4] = { 'a', 'n', 'c', 'i' };
+static symbol s_4_1[4] = { 'e', 'n', 'c', 'i' };
+static symbol s_4_2[3] = { 'o', 'g', 'i' };
+static symbol s_4_3[2] = { 'l', 'i' };
+static symbol s_4_4[3] = { 'b', 'l', 'i' };
+static symbol s_4_5[4] = { 'a', 'b', 'l', 'i' };
+static symbol s_4_6[4] = { 'a', 'l', 'l', 'i' };
+static symbol s_4_7[5] = { 'f', 'u', 'l', 'l', 'i' };
+static symbol s_4_8[6] = { 'l', 'e', 's', 's', 'l', 'i' };
+static symbol s_4_9[5] = { 'o', 'u', 's', 'l', 'i' };
+static symbol s_4_10[5] = { 'e', 'n', 't', 'l', 'i' };
+static symbol s_4_11[5] = { 'a', 'l', 'i', 't', 'i' };
+static symbol s_4_12[6] = { 'b', 'i', 'l', 'i', 't', 'i' };
+static symbol s_4_13[5] = { 'i', 'v', 'i', 't', 'i' };
+static symbol s_4_14[6] = { 't', 'i', 'o', 'n', 'a', 'l' };
+static symbol s_4_15[7] = { 'a', 't', 'i', 'o', 'n', 'a', 'l' };
+static symbol s_4_16[5] = { 'a', 'l', 'i', 's', 'm' };
+static symbol s_4_17[5] = { 'a', 't', 'i', 'o', 'n' };
+static symbol s_4_18[7] = { 'i', 'z', 'a', 't', 'i', 'o', 'n' };
+static symbol s_4_19[4] = { 'i', 'z', 'e', 'r' };
+static symbol s_4_20[4] = { 'a', 't', 'o', 'r' };
+static symbol s_4_21[7] = { 'i', 'v', 'e', 'n', 'e', 's', 's' };
+static symbol s_4_22[7] = { 'f', 'u', 'l', 'n', 'e', 's', 's' };
+static symbol s_4_23[7] = { 'o', 'u', 's', 'n', 'e', 's', 's' };
+
+static struct among a_4[24] =
+{
+/*  0 */ { 4, s_4_0, -1, 3, 0},
+/*  1 */ { 4, s_4_1, -1, 2, 0},
+/*  2 */ { 3, s_4_2, -1, 13, 0},
+/*  3 */ { 2, s_4_3, -1, 16, 0},
+/*  4 */ { 3, s_4_4, 3, 12, 0},
+/*  5 */ { 4, s_4_5, 4, 4, 0},
+/*  6 */ { 4, s_4_6, 3, 8, 0},
+/*  7 */ { 5, s_4_7, 3, 14, 0},
+/*  8 */ { 6, s_4_8, 3, 15, 0},
+/*  9 */ { 5, s_4_9, 3, 10, 0},
+/* 10 */ { 5, s_4_10, 3, 5, 0},
+/* 11 */ { 5, s_4_11, -1, 8, 0},
+/* 12 */ { 6, s_4_12, -1, 12, 0},
+/* 13 */ { 5, s_4_13, -1, 11, 0},
+/* 14 */ { 6, s_4_14, -1, 1, 0},
+/* 15 */ { 7, s_4_15, 14, 7, 0},
+/* 16 */ { 5, s_4_16, -1, 8, 0},
+/* 17 */ { 5, s_4_17, -1, 7, 0},
+/* 18 */ { 7, s_4_18, 17, 6, 0},
+/* 19 */ { 4, s_4_19, -1, 6, 0},
+/* 20 */ { 4, s_4_20, -1, 7, 0},
+/* 21 */ { 7, s_4_21, -1, 11, 0},
+/* 22 */ { 7, s_4_22, -1, 9, 0},
+/* 23 */ { 7, s_4_23, -1, 10, 0}
+};
+
+static symbol s_5_0[5] = { 'i', 'c', 'a', 't', 'e' };
+static symbol s_5_1[5] = { 'a', 't', 'i', 'v', 'e' };
+static symbol s_5_2[5] = { 'a', 'l', 'i', 'z', 'e' };
+static symbol s_5_3[5] = { 'i', 'c', 'i', 't', 'i' };
+static symbol s_5_4[4] = { 'i', 'c', 'a', 'l' };
+static symbol s_5_5[6] = { 't', 'i', 'o', 'n', 'a', 'l' };
+static symbol s_5_6[7] = { 'a', 't', 'i', 'o', 'n', 'a', 'l' };
+static symbol s_5_7[3] = { 'f', 'u', 'l' };
+static symbol s_5_8[4] = { 'n', 'e', 's', 's' };
+
+static struct among a_5[9] =
+{
+/*  0 */ { 5, s_5_0, -1, 4, 0},
+/*  1 */ { 5, s_5_1, -1, 6, 0},
+/*  2 */ { 5, s_5_2, -1, 3, 0},
+/*  3 */ { 5, s_5_3, -1, 4, 0},
+/*  4 */ { 4, s_5_4, -1, 4, 0},
+/*  5 */ { 6, s_5_5, -1, 1, 0},
+/*  6 */ { 7, s_5_6, 5, 2, 0},
+/*  7 */ { 3, s_5_7, -1, 5, 0},
+/*  8 */ { 4, s_5_8, -1, 5, 0}
+};
+
+static symbol s_6_0[2] = { 'i', 'c' };
+static symbol s_6_1[4] = { 'a', 'n', 'c', 'e' };
+static symbol s_6_2[4] = { 'e', 'n', 'c', 'e' };
+static symbol s_6_3[4] = { 'a', 'b', 'l', 'e' };
+static symbol s_6_4[4] = { 'i', 'b', 'l', 'e' };
+static symbol s_6_5[3] = { 'a', 't', 'e' };
+static symbol s_6_6[3] = { 'i', 'v', 'e' };
+static symbol s_6_7[3] = { 'i', 'z', 'e' };
+static symbol s_6_8[3] = { 'i', 't', 'i' };
+static symbol s_6_9[2] = { 'a', 'l' };
+static symbol s_6_10[3] = { 'i', 's', 'm' };
+static symbol s_6_11[3] = { 'i', 'o', 'n' };
+static symbol s_6_12[2] = { 'e', 'r' };
+static symbol s_6_13[3] = { 'o', 'u', 's' };
+static symbol s_6_14[3] = { 'a', 'n', 't' };
+static symbol s_6_15[3] = { 'e', 'n', 't' };
+static symbol s_6_16[4] = { 'm', 'e', 'n', 't' };
+static symbol s_6_17[5] = { 'e', 'm', 'e', 'n', 't' };
+
+static struct among a_6[18] =
+{
+/*  0 */ { 2, s_6_0, -1, 1, 0},
+/*  1 */ { 4, s_6_1, -1, 1, 0},
+/*  2 */ { 4, s_6_2, -1, 1, 0},
+/*  3 */ { 4, s_6_3, -1, 1, 0},
+/*  4 */ { 4, s_6_4, -1, 1, 0},
+/*  5 */ { 3, s_6_5, -1, 1, 0},
+/*  6 */ { 3, s_6_6, -1, 1, 0},
+/*  7 */ { 3, s_6_7, -1, 1, 0},
+/*  8 */ { 3, s_6_8, -1, 1, 0},
+/*  9 */ { 2, s_6_9, -1, 1, 0},
+/* 10 */ { 3, s_6_10, -1, 1, 0},
+/* 11 */ { 3, s_6_11, -1, 2, 0},
+/* 12 */ { 2, s_6_12, -1, 1, 0},
+/* 13 */ { 3, s_6_13, -1, 1, 0},
+/* 14 */ { 3, s_6_14, -1, 1, 0},
+/* 15 */ { 3, s_6_15, -1, 1, 0},
+/* 16 */ { 4, s_6_16, 15, 1, 0},
+/* 17 */ { 5, s_6_17, 16, 1, 0}
+};
+
+static symbol s_7_0[1] = { 'e' };
+static symbol s_7_1[1] = { 'l' };
+
+static struct among a_7[2] =
+{
+/*  0 */ { 1, s_7_0, -1, 1, 0},
+/*  1 */ { 1, s_7_1, -1, 2, 0}
+};
+
+static symbol s_8_0[7] = { 's', 'u', 'c', 'c', 'e', 'e', 'd' };
+static symbol s_8_1[7] = { 'p', 'r', 'o', 'c', 'e', 'e', 'd' };
+static symbol s_8_2[6] = { 'e', 'x', 'c', 'e', 'e', 'd' };
+static symbol s_8_3[7] = { 'c', 'a', 'n', 'n', 'i', 'n', 'g' };
+static symbol s_8_4[6] = { 'i', 'n', 'n', 'i', 'n', 'g' };
+static symbol s_8_5[7] = { 'e', 'a', 'r', 'r', 'i', 'n', 'g' };
+static symbol s_8_6[7] = { 'h', 'e', 'r', 'r', 'i', 'n', 'g' };
+static symbol s_8_7[6] = { 'o', 'u', 't', 'i', 'n', 'g' };
+
+static struct among a_8[8] =
+{
+/*  0 */ { 7, s_8_0, -1, -1, 0},
+/*  1 */ { 7, s_8_1, -1, -1, 0},
+/*  2 */ { 6, s_8_2, -1, -1, 0},
+/*  3 */ { 7, s_8_3, -1, -1, 0},
+/*  4 */ { 6, s_8_4, -1, -1, 0},
+/*  5 */ { 7, s_8_5, -1, -1, 0},
+/*  6 */ { 7, s_8_6, -1, -1, 0},
+/*  7 */ { 6, s_8_7, -1, -1, 0}
+};
+
+static symbol s_9_0[5] = { 'a', 'n', 'd', 'e', 's' };
+static symbol s_9_1[5] = { 'a', 't', 'l', 'a', 's' };
+static symbol s_9_2[4] = { 'b', 'i', 'a', 's' };
+static symbol s_9_3[6] = { 'c', 'o', 's', 'm', 'o', 's' };
+static symbol s_9_4[5] = { 'd', 'y', 'i', 'n', 'g' };
+static symbol s_9_5[5] = { 'e', 'a', 'r', 'l', 'y' };
+static symbol s_9_6[6] = { 'g', 'e', 'n', 't', 'l', 'y' };
+static symbol s_9_7[4] = { 'h', 'o', 'w', 'e' };
+static symbol s_9_8[4] = { 'i', 'd', 'l', 'y' };
+static symbol s_9_9[5] = { 'l', 'y', 'i', 'n', 'g' };
+static symbol s_9_10[4] = { 'n', 'e', 'w', 's' };
+static symbol s_9_11[4] = { 'o', 'n', 'l', 'y' };
+static symbol s_9_12[6] = { 's', 'i', 'n', 'g', 'l', 'y' };
+static symbol s_9_13[5] = { 's', 'k', 'i', 'e', 's' };
+static symbol s_9_14[4] = { 's', 'k', 'i', 's' };
+static symbol s_9_15[3] = { 's', 'k', 'y' };
+static symbol s_9_16[5] = { 't', 'y', 'i', 'n', 'g' };
+static symbol s_9_17[4] = { 'u', 'g', 'l', 'y' };
+
+static struct among a_9[18] =
+{
+/*  0 */ { 5, s_9_0, -1, -1, 0},
+/*  1 */ { 5, s_9_1, -1, -1, 0},
+/*  2 */ { 4, s_9_2, -1, -1, 0},
+/*  3 */ { 6, s_9_3, -1, -1, 0},
+/*  4 */ { 5, s_9_4, -1, 3, 0},
+/*  5 */ { 5, s_9_5, -1, 9, 0},
+/*  6 */ { 6, s_9_6, -1, 7, 0},
+/*  7 */ { 4, s_9_7, -1, -1, 0},
+/*  8 */ { 4, s_9_8, -1, 6, 0},
+/*  9 */ { 5, s_9_9, -1, 4, 0},
+/* 10 */ { 4, s_9_10, -1, -1, 0},
+/* 11 */ { 4, s_9_11, -1, 10, 0},
+/* 12 */ { 6, s_9_12, -1, 11, 0},
+/* 13 */ { 5, s_9_13, -1, 2, 0},
+/* 14 */ { 4, s_9_14, -1, 1, 0},
+/* 15 */ { 3, s_9_15, -1, -1, 0},
+/* 16 */ { 5, s_9_16, -1, 5, 0},
+/* 17 */ { 4, s_9_17, -1, 8, 0}
+};
+
+static unsigned char g_v[] = { 17, 65, 16, 1 };
+
+static unsigned char g_v_WXY[] = { 1, 17, 65, 208, 1 };
+
+static unsigned char g_valid_LI[] = { 55, 141, 2 };
+
+static symbol s_0[] = { 'y' };
+static symbol s_1[] = { 'Y' };
+static symbol s_2[] = { 'y' };
+static symbol s_3[] = { 'Y' };
+static symbol s_4[] = { 's', 's' };
+static symbol s_5[] = { 'i', 'e' };
+static symbol s_6[] = { 'i' };
+static symbol s_7[] = { 'e', 'e' };
+static symbol s_8[] = { 'e' };
+static symbol s_9[] = { 'e' };
+static symbol s_10[] = { 'y' };
+static symbol s_11[] = { 'Y' };
+static symbol s_12[] = { 'i' };
+static symbol s_13[] = { 't', 'i', 'o', 'n' };
+static symbol s_14[] = { 'e', 'n', 'c', 'e' };
+static symbol s_15[] = { 'a', 'n', 'c', 'e' };
+static symbol s_16[] = { 'a', 'b', 'l', 'e' };
+static symbol s_17[] = { 'e', 'n', 't' };
+static symbol s_18[] = { 'i', 'z', 'e' };
+static symbol s_19[] = { 'a', 't', 'e' };
+static symbol s_20[] = { 'a', 'l' };
+static symbol s_21[] = { 'f', 'u', 'l' };
+static symbol s_22[] = { 'o', 'u', 's' };
+static symbol s_23[] = { 'i', 'v', 'e' };
+static symbol s_24[] = { 'b', 'l', 'e' };
+static symbol s_25[] = { 'l' };
+static symbol s_26[] = { 'o', 'g' };
+static symbol s_27[] = { 'f', 'u', 'l' };
+static symbol s_28[] = { 'l', 'e', 's', 's' };
+static symbol s_29[] = { 't', 'i', 'o', 'n' };
+static symbol s_30[] = { 'a', 't', 'e' };
+static symbol s_31[] = { 'a', 'l' };
+static symbol s_32[] = { 'i', 'c' };
+static symbol s_33[] = { 's' };
+static symbol s_34[] = { 't' };
+static symbol s_35[] = { 'l' };
+static symbol s_36[] = { 's', 'k', 'i' };
+static symbol s_37[] = { 's', 'k', 'y' };
+static symbol s_38[] = { 'd', 'i', 'e' };
+static symbol s_39[] = { 'l', 'i', 'e' };
+static symbol s_40[] = { 't', 'i', 'e' };
+static symbol s_41[] = { 'i', 'd', 'l' };
+static symbol s_42[] = { 'g', 'e', 'n', 't', 'l' };
+static symbol s_43[] = { 'u', 'g', 'l', 'i' };
+static symbol s_44[] = { 'e', 'a', 'r', 'l', 'i' };
+static symbol s_45[] = { 'o', 'n', 'l', 'i' };
+static symbol s_46[] = { 's', 'i', 'n', 'g', 'l' };
+static symbol s_47[] = { 'Y' };
+static symbol s_48[] = { 'y' };
+
+static int r_prelude(struct SN_env * z) {
+    z->B[0] = 0; /* unset Y_found, line 24 */
+    {   int c = z->c; /* do, line 25 */
+        z->bra = z->c; /* [, line 25 */
+        if (!(eq_s(z, 1, s_0))) goto lab0;
+        z->ket = z->c; /* ], line 25 */
+        if (!(in_grouping(z, g_v, 97, 121))) goto lab0;
+        slice_from_s(z, 1, s_1); /* <-, line 25 */
+        z->B[0] = 1; /* set Y_found, line 25 */
+    lab0:
+        z->c = c;
+    }
+    {   int c = z->c; /* do, line 26 */
+        while(1) { /* repeat, line 26 */
+            int c = z->c;
+            while(1) { /* goto, line 26 */
+                int c = z->c;
+                if (!(in_grouping(z, g_v, 97, 121))) goto lab3;
+                z->bra = z->c; /* [, line 26 */
+                if (!(eq_s(z, 1, s_2))) goto lab3;
+                z->ket = z->c; /* ], line 26 */
+                z->c = c;
+                break;
+            lab3:
+                z->c = c;
+                if (z->c >= z->l) goto lab2;
+                z->c++;
+            }
+            slice_from_s(z, 1, s_3); /* <-, line 26 */
+            z->B[0] = 1; /* set Y_found, line 26 */
+            continue;
+        lab2:
+            z->c = c;
+            break;
+        }
+    lab1:
+        z->c = c;
+    }
+    return 1;
+}
+
+static int r_mark_regions(struct SN_env * z) {
+    z->I[0] = z->l;
+    z->I[1] = z->l;
+    {   int c = z->c; /* do, line 32 */
+        {   int c = z->c; /* or, line 36 */
+            if (!(find_among(z, a_0, 1))) goto lab2; /* among, line 33 */
+            goto lab1;
+        lab2:
+            z->c = c;
+            while(1) { /* gopast, line 36 */
+                if (!(in_grouping(z, g_v, 97, 121))) goto lab3;
+                break;
+            lab3:
+                if (z->c >= z->l) goto lab0;
+                z->c++;
+            }
+            while(1) { /* gopast, line 36 */
+                if (!(out_grouping(z, g_v, 97, 121))) goto lab4;
+                break;
+            lab4:
+                if (z->c >= z->l) goto lab0;
+                z->c++;
+            }
+        }
+    lab1:
+        z->I[0] = z->c; /* setmark p1, line 37 */
+        while(1) { /* gopast, line 38 */
+            if (!(in_grouping(z, g_v, 97, 121))) goto lab5;
+            break;
+        lab5:
+            if (z->c >= z->l) goto lab0;
+            z->c++;
+        }
+        while(1) { /* gopast, line 38 */
+            if (!(out_grouping(z, g_v, 97, 121))) goto lab6;
+            break;
+        lab6:
+            if (z->c >= z->l) goto lab0;
+            z->c++;
+        }
+        z->I[1] = z->c; /* setmark p2, line 38 */
+    lab0:
+        z->c = c;
+    }
+    return 1;
+}
+
+static int r_shortv(struct SN_env * z) {
+    {   int m = z->l - z->c; /* or, line 46 */
+        if (!(out_grouping_b(z, g_v_WXY, 89, 121))) goto lab1;
+        if (!(in_grouping_b(z, g_v, 97, 121))) goto lab1;
+        if (!(out_grouping_b(z, g_v, 97, 121))) goto lab1;
+        goto lab0;
+    lab1:
+        z->c = z->l - m;
+        if (!(out_grouping_b(z, g_v, 97, 121))) return 0;
+        if (!(in_grouping_b(z, g_v, 97, 121))) return 0;
+        if (z->c > z->lb) return 0; /* atlimit, line 47 */
+    }
+lab0:
+    return 1;
+}
+
+static int r_R1(struct SN_env * z) {
+    if (!(z->I[0] <= z->c)) return 0;
+    return 1;
+}
+
+static int r_R2(struct SN_env * z) {
+    if (!(z->I[1] <= z->c)) return 0;
+    return 1;
+}
+
+static int r_Step_1a(struct SN_env * z) {
+    int among_var;
+    z->ket = z->c; /* [, line 54 */
+    among_var = find_among_b(z, a_1, 6); /* substring, line 54 */
+    if (!(among_var)) return 0;
+    z->bra = z->c; /* ], line 54 */
+    switch(among_var) {
+        case 0: return 0;
+        case 1:
+            slice_from_s(z, 2, s_4); /* <-, line 55 */
+            break;
+        case 2:
+            {   int m = z->l - z->c; /* or, line 57 */
+                if (z->c <= z->lb) goto lab1;
+                z->c--; /* next, line 57 */
+                if (z->c > z->lb) goto lab1; /* atlimit, line 57 */
+                slice_from_s(z, 2, s_5); /* <-, line 57 */
+                goto lab0;
+            lab1:
+                z->c = z->l - m;
+                slice_from_s(z, 1, s_6); /* <-, line 57 */
+            }
+        lab0:
+            break;
+        case 3:
+            if (z->c <= z->lb) return 0;
+            z->c--; /* next, line 58 */
+            while(1) { /* gopast, line 58 */
+                if (!(in_grouping_b(z, g_v, 97, 121))) goto lab2;
+                break;
+            lab2:
+                if (z->c <= z->lb) return 0;
+                z->c--;
+            }
+            slice_del(z); /* delete, line 58 */
+            break;
+    }
+    return 1;
+}
+
+static int r_Step_1b(struct SN_env * z) {
+    int among_var;
+    z->ket = z->c; /* [, line 64 */
+    among_var = find_among_b(z, a_3, 6); /* substring, line 64 */
+    if (!(among_var)) return 0;
+    z->bra = z->c; /* ], line 64 */
+    switch(among_var) {
+        case 0: return 0;
+        case 1:
+            if (!r_R1(z)) return 0; /* call R1, line 66 */
+            slice_from_s(z, 2, s_7); /* <-, line 66 */
+            break;
+        case 2:
+            {   int m_test = z->l - z->c; /* test, line 69 */
+                while(1) { /* gopast, line 69 */
+                    if (!(in_grouping_b(z, g_v, 97, 121))) goto lab0;
+                    break;
+                lab0:
+                    if (z->c <= z->lb) return 0;
+                    z->c--;
+                }
+                z->c = z->l - m_test;
+            }
+            slice_del(z); /* delete, line 69 */
+            {   int m_test = z->l - z->c; /* test, line 70 */
+                among_var = find_among_b(z, a_2, 13); /* substring, line 70 */
+                if (!(among_var)) return 0;
+                z->c = z->l - m_test;
+            }
+            switch(among_var) {
+                case 0: return 0;
+                case 1:
+                    {   int c = z->c;
+                        insert_s(z, z->c, z->c, 1, s_8); /* <+, line 72 */
+                        z->c = c;
+                    }
+                    break;
+                case 2:
+                    z->ket = z->c; /* [, line 75 */
+                    if (z->c <= z->lb) return 0;
+                    z->c--; /* next, line 75 */
+                    z->bra = z->c; /* ], line 75 */
+                    slice_del(z); /* delete, line 75 */
+                    break;
+                case 3:
+                    if (z->c != z->I[0]) return 0; /* atmark, line 76 */
+                    {   int m_test = z->l - z->c; /* test, line 76 */
+                        if (!r_shortv(z)) return 0; /* call shortv, line 76 */
+                        z->c = z->l - m_test;
+                    }
+                    {   int c = z->c;
+                        insert_s(z, z->c, z->c, 1, s_9); /* <+, line 76 */
+                        z->c = c;
+                    }
+                    break;
+            }
+            break;
+    }
+    return 1;
+}
+
+static int r_Step_1c(struct SN_env * z) {
+    z->ket = z->c; /* [, line 83 */
+    {   int m = z->l - z->c; /* or, line 83 */
+        if (!(eq_s_b(z, 1, s_10))) goto lab1;
+        goto lab0;
+    lab1:
+        z->c = z->l - m;
+        if (!(eq_s_b(z, 1, s_11))) return 0;
+    }
+lab0:
+    z->bra = z->c; /* ], line 83 */
+    if (!(out_grouping_b(z, g_v, 97, 121))) return 0;
+    {   int m = z->l - z->c; /* not, line 84 */
+        if (z->c > z->lb) goto lab2; /* atlimit, line 84 */
+        return 0;
+    lab2:
+        z->c = z->l - m;
+    }
+    slice_from_s(z, 1, s_12); /* <-, line 85 */
+    return 1;
+}
+
+static int r_Step_2(struct SN_env * z) {
+    int among_var;
+    z->ket = z->c; /* [, line 89 */
+    among_var = find_among_b(z, a_4, 24); /* substring, line 89 */
+    if (!(among_var)) return 0;
+    z->bra = z->c; /* ], line 89 */
+    if (!r_R1(z)) return 0; /* call R1, line 89 */
+    switch(among_var) {
+        case 0: return 0;
+        case 1:
+            slice_from_s(z, 4, s_13); /* <-, line 90 */
+            break;
+        case 2:
+            slice_from_s(z, 4, s_14); /* <-, line 91 */
+            break;
+        case 3:
+            slice_from_s(z, 4, s_15); /* <-, line 92 */
+            break;
+        case 4:
+            slice_from_s(z, 4, s_16); /* <-, line 93 */
+            break;
+        case 5:
+            slice_from_s(z, 3, s_17); /* <-, line 94 */
+            break;
+        case 6:
+            slice_from_s(z, 3, s_18); /* <-, line 96 */
+            break;
+        case 7:
+            slice_from_s(z, 3, s_19); /* <-, line 98 */
+            break;
+        case 8:
+            slice_from_s(z, 2, s_20); /* <-, line 100 */
+            break;
+        case 9:
+            slice_from_s(z, 3, s_21); /* <-, line 101 */
+            break;
+        case 10:
+            slice_from_s(z, 3, s_22); /* <-, line 103 */
+            break;
+        case 11:
+            slice_from_s(z, 3, s_23); /* <-, line 105 */
+            break;
+        case 12:
+            slice_from_s(z, 3, s_24); /* <-, line 107 */
+            break;
+        case 13:
+            if (!(eq_s_b(z, 1, s_25))) return 0;
+            slice_from_s(z, 2, s_26); /* <-, line 108 */
+            break;
+        case 14:
+            slice_from_s(z, 3, s_27); /* <-, line 109 */
+            break;
+        case 15:
+            slice_from_s(z, 4, s_28); /* <-, line 110 */
+            break;
+        case 16:
+            if (!(in_grouping_b(z, g_valid_LI, 99, 116))) return 0;
+            slice_del(z); /* delete, line 111 */
+            break;
+    }
+    return 1;
+}
+
+static int r_Step_3(struct SN_env * z) {
+    int among_var;
+    z->ket = z->c; /* [, line 116 */
+    among_var = find_among_b(z, a_5, 9); /* substring, line 116 */
+    if (!(among_var)) return 0;
+    z->bra = z->c; /* ], line 116 */
+    if (!r_R1(z)) return 0; /* call R1, line 116 */
+    switch(among_var) {
+        case 0: return 0;
+        case 1:
+            slice_from_s(z, 4, s_29); /* <-, line 117 */
+            break;
+        case 2:
+            slice_from_s(z, 3, s_30); /* <-, line 118 */
+            break;
+        case 3:
+            slice_from_s(z, 2, s_31); /* <-, line 119 */
+            break;
+        case 4:
+            slice_from_s(z, 2, s_32); /* <-, line 121 */
+            break;
+        case 5:
+            slice_del(z); /* delete, line 123 */
+            break;
+        case 6:
+            if (!r_R2(z)) return 0; /* call R2, line 125 */
+            slice_del(z); /* delete, line 125 */
+            break;
+    }
+    return 1;
+}
+
+static int r_Step_4(struct SN_env * z) {
+    int among_var;
+    z->ket = z->c; /* [, line 130 */
+    among_var = find_among_b(z, a_6, 18); /* substring, line 130 */
+    if (!(among_var)) return 0;
+    z->bra = z->c; /* ], line 130 */
+    if (!r_R2(z)) return 0; /* call R2, line 130 */
+    switch(among_var) {
+        case 0: return 0;
+        case 1:
+            slice_del(z); /* delete, line 133 */
+            break;
+        case 2:
+            {   int m = z->l - z->c; /* or, line 134 */
+                if (!(eq_s_b(z, 1, s_33))) goto lab1;
+                goto lab0;
+            lab1:
+                z->c = z->l - m;
+                if (!(eq_s_b(z, 1, s_34))) return 0;
+            }
+        lab0:
+            slice_del(z); /* delete, line 134 */
+            break;
+    }
+    return 1;
+}
+
+static int r_Step_5(struct SN_env * z) {
+    int among_var;
+    z->ket = z->c; /* [, line 139 */
+    among_var = find_among_b(z, a_7, 2); /* substring, line 139 */
+    if (!(among_var)) return 0;
+    z->bra = z->c; /* ], line 139 */
+    switch(among_var) {
+        case 0: return 0;
+        case 1:
+            {   int m = z->l - z->c; /* or, line 140 */
+                if (!r_R2(z)) goto lab1; /* call R2, line 140 */
+                goto lab0;
+            lab1:
+                z->c = z->l - m;
+                if (!r_R1(z)) return 0; /* call R1, line 140 */
+                {   int m = z->l - z->c; /* not, line 140 */
+                    if (!r_shortv(z)) goto lab2; /* call shortv, line 140 */
+                    return 0;
+                lab2:
+                    z->c = z->l - m;
+                }
+            }
+        lab0:
+            slice_del(z); /* delete, line 140 */
+            break;
+        case 2:
+            if (!r_R2(z)) return 0; /* call R2, line 141 */
+            if (!(eq_s_b(z, 1, s_35))) return 0;
+            slice_del(z); /* delete, line 141 */
+            break;
+    }
+    return 1;
+}
+
+static int r_exception2(struct SN_env * z) {
+    z->ket = z->c; /* [, line 147 */
+    if (!(find_among_b(z, a_8, 8))) return 0; /* substring, line 147 */
+    z->bra = z->c; /* ], line 147 */
+    if (z->c > z->lb) return 0; /* atlimit, line 147 */
+    return 1;
+}
+
+static int r_exception1(struct SN_env * z) {
+    int among_var;
+    z->bra = z->c; /* [, line 159 */
+    among_var = find_among(z, a_9, 18); /* substring, line 159 */
+    if (!(among_var)) return 0;
+    z->ket = z->c; /* ], line 159 */
+    if (z->c < z->l) return 0; /* atlimit, line 159 */
+    switch(among_var) {
+        case 0: return 0;
+        case 1:
+            slice_from_s(z, 3, s_36); /* <-, line 163 */
+            break;
+        case 2:
+            slice_from_s(z, 3, s_37); /* <-, line 164 */
+            break;
+        case 3:
+            slice_from_s(z, 3, s_38); /* <-, line 165 */
+            break;
+        case 4:
+            slice_from_s(z, 3, s_39); /* <-, line 166 */
+            break;
+        case 5:
+            slice_from_s(z, 3, s_40); /* <-, line 167 */
+            break;
+        case 6:
+            slice_from_s(z, 3, s_41); /* <-, line 171 */
+            break;
+        case 7:
+            slice_from_s(z, 5, s_42); /* <-, line 172 */
+            break;
+        case 8:
+            slice_from_s(z, 4, s_43); /* <-, line 173 */
+            break;
+        case 9:
+            slice_from_s(z, 5, s_44); /* <-, line 174 */
+            break;
+        case 10:
+            slice_from_s(z, 4, s_45); /* <-, line 175 */
+            break;
+        case 11:
+            slice_from_s(z, 5, s_46); /* <-, line 176 */
+            break;
+    }
+    return 1;
+}
+
+static int r_postlude(struct SN_env * z) {
+    if (!(z->B[0])) return 0; /* Boolean test Y_found, line 192 */
+    while(1) { /* repeat, line 192 */
+        int c = z->c;
+        while(1) { /* goto, line 192 */
+            int c = z->c;
+            z->bra = z->c; /* [, line 192 */
+            if (!(eq_s(z, 1, s_47))) goto lab1;
+            z->ket = z->c; /* ], line 192 */
+            z->c = c;
+            break;
+        lab1:
+            z->c = c;
+            if (z->c >= z->l) goto lab0;
+            z->c++;
+        }
+        slice_from_s(z, 1, s_48); /* <-, line 192 */
+        continue;
+    lab0:
+        z->c = c;
+        break;
+    }
+    return 1;
+}
+
+extern int english_stem(struct SN_env * z) {
+    {   int c = z->c; /* or, line 196 */
+        if (!r_exception1(z)) goto lab1; /* call exception1, line 196 */
+        goto lab0;
+    lab1:
+        z->c = c;
+        {   int c_test = z->c; /* test, line 198 */
+            {   int c = z->c + 3;
+                if (0 > c || c > z->l) return 0;
+                z->c = c; /* hop, line 198 */
+            }
+            z->c = c_test;
+        }
+        {   int c = z->c; /* do, line 199 */
+            if (!r_prelude(z)) goto lab2; /* call prelude, line 199 */
+        lab2:
+            z->c = c;
+        }
+        {   int c = z->c; /* do, line 200 */
+            if (!r_mark_regions(z)) goto lab3; /* call mark_regions, line 200 */
+        lab3:
+            z->c = c;
+        }
+        z->lb = z->c; z->c = z->l; /* backwards, line 201 */
+
+        {   int m = z->l - z->c; /* do, line 203 */
+            if (!r_Step_1a(z)) goto lab4; /* call Step_1a, line 203 */
+        lab4:
+            z->c = z->l - m;
+        }
+        {   int m = z->l - z->c; /* or, line 205 */
+            if (!r_exception2(z)) goto lab6; /* call exception2, line 205 */
+            goto lab5;
+        lab6:
+            z->c = z->l - m;
+            {   int m = z->l - z->c; /* do, line 207 */
+                if (!r_Step_1b(z)) goto lab7; /* call Step_1b, line 207 */
+            lab7:
+                z->c = z->l - m;
+            }
+            {   int m = z->l - z->c; /* do, line 208 */
+                if (!r_Step_1c(z)) goto lab8; /* call Step_1c, line 208 */
+            lab8:
+                z->c = z->l - m;
+            }
+            {   int m = z->l - z->c; /* do, line 210 */
+                if (!r_Step_2(z)) goto lab9; /* call Step_2, line 210 */
+            lab9:
+                z->c = z->l - m;
+            }
+            {   int m = z->l - z->c; /* do, line 211 */
+                if (!r_Step_3(z)) goto lab10; /* call Step_3, line 211 */
+            lab10:
+                z->c = z->l - m;
+            }
+            {   int m = z->l - z->c; /* do, line 212 */
+                if (!r_Step_4(z)) goto lab11; /* call Step_4, line 212 */
+            lab11:
+                z->c = z->l - m;
+            }
+            {   int m = z->l - z->c; /* do, line 214 */
+                if (!r_Step_5(z)) goto lab12; /* call Step_5, line 214 */
+            lab12:
+                z->c = z->l - m;
+            }
+        }
+    lab5:
+        z->c = z->lb;
+        {   int c = z->c; /* do, line 217 */
+            if (!r_postlude(z)) goto lab13; /* call postlude, line 217 */
+        lab13:
+            z->c = c;
+        }
+    }
+lab0:
+    return 1;
+}
+
+extern struct SN_env * english_create_env(void) { return SN_create_env(0, 2, 1); }
+
+extern void english_close_env(struct SN_env * z) { SN_close_env(z); }
+


diff --git a/contrib/tsearch2/snowball/english_stem.h b/contrib/tsearch2/snowball/english_stem.h

new file mode 100644 (file)

index 0000000..bfefcd5


--- /dev/null
+++ b/contrib/tsearch2/snowball/english_stem.h
@@ -0,0 +1,8 @@
+
+/* This file was generated automatically by the Snowball to ANSI C compiler */
+
+extern struct SN_env * english_create_env(void);
+extern void english_close_env(struct SN_env * z);
+
+extern int english_stem(struct SN_env * z);
+


diff --git a/contrib/tsearch2/snowball/header.h b/contrib/tsearch2/snowball/header.h

new file mode 100644 (file)

index 0000000..aaec3ae


--- /dev/null
+++ b/contrib/tsearch2/snowball/header.h
@@ -0,0 +1,57 @@
+
+#include 
+
+#include "api.h"
+
+#define MAXINT INT_MAX
+#define MININT INT_MIN
+
+#define HEAD 2*sizeof(int)
+
+#define SIZE(p)        ((int *)(p))[-1]
+#define SET_SIZE(p, n) ((int *)(p))[-1] = n
+#define CAPACITY(p)    ((int *)(p))[-2]
+
+struct among
+{   int s_size;     /* number of chars in string */
+    symbol * s;       /* search string */
+    int substring_i;/* index to longest matching substring */
+    int result;     /* result of the lookup */
+    int (* function)(struct SN_env *);
+};
+
+extern symbol * create_s(void);
+extern void lose_s(symbol * p);
+
+extern int in_grouping(struct SN_env * z, unsigned char * s, int min, int max);
+extern int in_grouping_b(struct SN_env * z, unsigned char * s, int min, int max);
+extern int out_grouping(struct SN_env * z, unsigned char * s, int min, int max);
+extern int out_grouping_b(struct SN_env * z, unsigned char * s, int min, int max);
+
+extern int in_range(struct SN_env * z, int min, int max);
+extern int in_range_b(struct SN_env * z, int min, int max);
+extern int out_range(struct SN_env * z, int min, int max);
+extern int out_range_b(struct SN_env * z, int min, int max);
+
+extern int eq_s(struct SN_env * z, int s_size, symbol * s);
+extern int eq_s_b(struct SN_env * z, int s_size, symbol * s);
+extern int eq_v(struct SN_env * z, symbol * p);
+extern int eq_v_b(struct SN_env * z, symbol * p);
+
+extern int find_among(struct SN_env * z, struct among * v, int v_size);
+extern int find_among_b(struct SN_env * z, struct among * v, int v_size);
+
+extern symbol * increase_size(symbol * p, int n);
+extern int replace_s(struct SN_env * z, int c_bra, int c_ket, int s_size, const symbol * s);
+extern void slice_from_s(struct SN_env * z, int s_size, symbol * s);
+extern void slice_from_v(struct SN_env * z, symbol * p);
+extern void slice_del(struct SN_env * z);
+
+extern void insert_s(struct SN_env * z, int bra, int ket, int s_size, symbol * s);
+extern void insert_v(struct SN_env * z, int bra, int ket, symbol * p);
+
+extern symbol * slice_to(struct SN_env * z, symbol * p);
+extern symbol * assign_to(struct SN_env * z, symbol * p);
+
+extern void debug(struct SN_env * z, int number, int line_count);
+


diff --git a/contrib/tsearch2/snowball/russian_stem.c b/contrib/tsearch2/snowball/russian_stem.c

new file mode 100644 (file)

index 0000000..14fd491


--- /dev/null
+++ b/contrib/tsearch2/snowball/russian_stem.c
@@ -0,0 +1,626 @@
+
+/* This file was generated automatically by the Snowball to ANSI C compiler */
+
+#include "header.h"
+
+extern int russian_stem(struct SN_env * z);
+static int r_tidy_up(struct SN_env * z);
+static int r_derivational(struct SN_env * z);
+static int r_noun(struct SN_env * z);
+static int r_verb(struct SN_env * z);
+static int r_reflexive(struct SN_env * z);
+static int r_adjectival(struct SN_env * z);
+static int r_adjective(struct SN_env * z);
+static int r_perfective_gerund(struct SN_env * z);
+static int r_R2(struct SN_env * z);
+static int r_mark_regions(struct SN_env * z);
+
+extern struct SN_env * russian_create_env(void);
+extern void russian_close_env(struct SN_env * z);
+
+static symbol s_0_0[3] = { 215, 219, 201 };
+static symbol s_0_1[4] = { 201, 215, 219, 201 };
+static symbol s_0_2[4] = { 217, 215, 219, 201 };
+static symbol s_0_3[1] = { 215 };
+static symbol s_0_4[2] = { 201, 215 };
+static symbol s_0_5[2] = { 217, 215 };
+static symbol s_0_6[5] = { 215, 219, 201, 211, 216 };
+static symbol s_0_7[6] = { 201, 215, 219, 201, 211, 216 };
+static symbol s_0_8[6] = { 217, 215, 219, 201, 211, 216 };
+
+static struct among a_0[9] =
+{
+/*  0 */ { 3, s_0_0, -1, 1, 0},
+/*  1 */ { 4, s_0_1, 0, 2, 0},
+/*  2 */ { 4, s_0_2, 0, 2, 0},
+/*  3 */ { 1, s_0_3, -1, 1, 0},
+/*  4 */ { 2, s_0_4, 3, 2, 0},
+/*  5 */ { 2, s_0_5, 3, 2, 0},
+/*  6 */ { 5, s_0_6, -1, 1, 0},
+/*  7 */ { 6, s_0_7, 6, 2, 0},
+/*  8 */ { 6, s_0_8, 6, 2, 0}
+};
+
+static symbol s_1_0[2] = { 192, 192 };
+static symbol s_1_1[2] = { 197, 192 };
+static symbol s_1_2[2] = { 207, 192 };
+static symbol s_1_3[2] = { 213, 192 };
+static symbol s_1_4[2] = { 197, 197 };
+static symbol s_1_5[2] = { 201, 197 };
+static symbol s_1_6[2] = { 207, 197 };
+static symbol s_1_7[2] = { 217, 197 };
+static symbol s_1_8[2] = { 201, 200 };
+static symbol s_1_9[2] = { 217, 200 };
+static symbol s_1_10[3] = { 201, 205, 201 };
+static symbol s_1_11[3] = { 217, 205, 201 };
+static symbol s_1_12[2] = { 197, 202 };
+static symbol s_1_13[2] = { 201, 202 };
+static symbol s_1_14[2] = { 207, 202 };
+static symbol s_1_15[2] = { 217, 202 };
+static symbol s_1_16[2] = { 197, 205 };
+static symbol s_1_17[2] = { 201, 205 };
+static symbol s_1_18[2] = { 207, 205 };
+static symbol s_1_19[2] = { 217, 205 };
+static symbol s_1_20[3] = { 197, 199, 207 };
+static symbol s_1_21[3] = { 207, 199, 207 };
+static symbol s_1_22[2] = { 193, 209 };
+static symbol s_1_23[2] = { 209, 209 };
+static symbol s_1_24[3] = { 197, 205, 213 };
+static symbol s_1_25[3] = { 207, 205, 213 };
+
+static struct among a_1[26] =
+{
+/*  0 */ { 2, s_1_0, -1, 1, 0},
+/*  1 */ { 2, s_1_1, -1, 1, 0},
+/*  2 */ { 2, s_1_2, -1, 1, 0},
+/*  3 */ { 2, s_1_3, -1, 1, 0},
+/*  4 */ { 2, s_1_4, -1, 1, 0},
+/*  5 */ { 2, s_1_5, -1, 1, 0},
+/*  6 */ { 2, s_1_6, -1, 1, 0},
+/*  7 */ { 2, s_1_7, -1, 1, 0},
+/*  8 */ { 2, s_1_8, -1, 1, 0},
+/*  9 */ { 2, s_1_9, -1, 1, 0},
+/* 10 */ { 3, s_1_10, -1, 1, 0},
+/* 11 */ { 3, s_1_11, -1, 1, 0},
+/* 12 */ { 2, s_1_12, -1, 1, 0},
+/* 13 */ { 2, s_1_13, -1, 1, 0},
+/* 14 */ { 2, s_1_14, -1, 1, 0},
+/* 15 */ { 2, s_1_15, -1, 1, 0},
+/* 16 */ { 2, s_1_16, -1, 1, 0},
+/* 17 */ { 2, s_1_17, -1, 1, 0},
+/* 18 */ { 2, s_1_18, -1, 1, 0},
+/* 19 */ { 2, s_1_19, -1, 1, 0},
+/* 20 */ { 3, s_1_20, -1, 1, 0},
+/* 21 */ { 3, s_1_21, -1, 1, 0},
+/* 22 */ { 2, s_1_22, -1, 1, 0},
+/* 23 */ { 2, s_1_23, -1, 1, 0},
+/* 24 */ { 3, s_1_24, -1, 1, 0},
+/* 25 */ { 3, s_1_25, -1, 1, 0}
+};
+
+static symbol s_2_0[2] = { 197, 205 };
+static symbol s_2_1[2] = { 206, 206 };
+static symbol s_2_2[2] = { 215, 219 };
+static symbol s_2_3[3] = { 201, 215, 219 };
+static symbol s_2_4[3] = { 217, 215, 219 };
+static symbol s_2_5[1] = { 221 };
+static symbol s_2_6[2] = { 192, 221 };
+static symbol s_2_7[3] = { 213, 192, 221 };
+
+static struct among a_2[8] =
+{
+/*  0 */ { 2, s_2_0, -1, 1, 0},
+/*  1 */ { 2, s_2_1, -1, 1, 0},
+/*  2 */ { 2, s_2_2, -1, 1, 0},
+/*  3 */ { 3, s_2_3, 2, 2, 0},
+/*  4 */ { 3, s_2_4, 2, 2, 0},
+/*  5 */ { 1, s_2_5, -1, 1, 0},
+/*  6 */ { 2, s_2_6, 5, 1, 0},
+/*  7 */ { 3, s_2_7, 6, 2, 0}
+};
+
+static symbol s_3_0[2] = { 211, 209 };
+static symbol s_3_1[2] = { 211, 216 };
+
+static struct among a_3[2] =
+{
+/*  0 */ { 2, s_3_0, -1, 1, 0},
+/*  1 */ { 2, s_3_1, -1, 1, 0}
+};
+
+static symbol s_4_0[1] = { 192 };
+static symbol s_4_1[2] = { 213, 192 };
+static symbol s_4_2[2] = { 204, 193 };
+static symbol s_4_3[3] = { 201, 204, 193 };
+static symbol s_4_4[3] = { 217, 204, 193 };
+static symbol s_4_5[2] = { 206, 193 };
+static symbol s_4_6[3] = { 197, 206, 193 };
+static symbol s_4_7[3] = { 197, 212, 197 };
+static symbol s_4_8[3] = { 201, 212, 197 };
+static symbol s_4_9[3] = { 202, 212, 197 };
+static symbol s_4_10[4] = { 197, 202, 212, 197 };
+static symbol s_4_11[4] = { 213, 202, 212, 197 };
+static symbol s_4_12[2] = { 204, 201 };
+static symbol s_4_13[3] = { 201, 204, 201 };
+static symbol s_4_14[3] = { 217, 204, 201 };
+static symbol s_4_15[1] = { 202 };
+static symbol s_4_16[2] = { 197, 202 };
+static symbol s_4_17[2] = { 213, 202 };
+static symbol s_4_18[1] = { 204 };
+static symbol s_4_19[2] = { 201, 204 };
+static symbol s_4_20[2] = { 217, 204 };
+static symbol s_4_21[2] = { 197, 205 };
+static symbol s_4_22[2] = { 201, 205 };
+static symbol s_4_23[2] = { 217, 205 };
+static symbol s_4_24[1] = { 206 };
+static symbol s_4_25[2] = { 197, 206 };
+static symbol s_4_26[2] = { 204, 207 };
+static symbol s_4_27[3] = { 201, 204, 207 };
+static symbol s_4_28[3] = { 217, 204, 207 };
+static symbol s_4_29[2] = { 206, 207 };
+static symbol s_4_30[3] = { 197, 206, 207 };
+static symbol s_4_31[3] = { 206, 206, 207 };
+static symbol s_4_32[2] = { 192, 212 };
+static symbol s_4_33[3] = { 213, 192, 212 };
+static symbol s_4_34[2] = { 197, 212 };
+static symbol s_4_35[3] = { 213, 197, 212 };
+static symbol s_4_36[2] = { 201, 212 };
+static symbol s_4_37[2] = { 209, 212 };
+static symbol s_4_38[2] = { 217, 212 };
+static symbol s_4_39[2] = { 212, 216 };
+static symbol s_4_40[3] = { 201, 212, 216 };
+static symbol s_4_41[3] = { 217, 212, 216 };
+static symbol s_4_42[3] = { 197, 219, 216 };
+static symbol s_4_43[3] = { 201, 219, 216 };
+static symbol s_4_44[2] = { 206, 217 };
+static symbol s_4_45[3] = { 197, 206, 217 };
+
+static struct among a_4[46] =
+{
+/*  0 */ { 1, s_4_0, -1, 2, 0},
+/*  1 */ { 2, s_4_1, 0, 2, 0},
+/*  2 */ { 2, s_4_2, -1, 1, 0},
+/*  3 */ { 3, s_4_3, 2, 2, 0},
+/*  4 */ { 3, s_4_4, 2, 2, 0},
+/*  5 */ { 2, s_4_5, -1, 1, 0},
+/*  6 */ { 3, s_4_6, 5, 2, 0},
+/*  7 */ { 3, s_4_7, -1, 1, 0},
+/*  8 */ { 3, s_4_8, -1, 2, 0},
+/*  9 */ { 3, s_4_9, -1, 1, 0},
+/* 10 */ { 4, s_4_10, 9, 2, 0},
+/* 11 */ { 4, s_4_11, 9, 2, 0},
+/* 12 */ { 2, s_4_12, -1, 1, 0},
+/* 13 */ { 3, s_4_13, 12, 2, 0},
+/* 14 */ { 3, s_4_14, 12, 2, 0},
+/* 15 */ { 1, s_4_15, -1, 1, 0},
+/* 16 */ { 2, s_4_16, 15, 2, 0},
+/* 17 */ { 2, s_4_17, 15, 2, 0},
+/* 18 */ { 1, s_4_18, -1, 1, 0},
+/* 19 */ { 2, s_4_19, 18, 2, 0},
+/* 20 */ { 2, s_4_20, 18, 2, 0},
+/* 21 */ { 2, s_4_21, -1, 1, 0},
+/* 22 */ { 2, s_4_22, -1, 2, 0},
+/* 23 */ { 2, s_4_23, -1, 2, 0},
+/* 24 */ { 1, s_4_24, -1, 1, 0},
+/* 25 */ { 2, s_4_25, 24, 2, 0},
+/* 26 */ { 2, s_4_26, -1, 1, 0},
+/* 27 */ { 3, s_4_27, 26, 2, 0},
+/* 28 */ { 3, s_4_28, 26, 2, 0},
+/* 29 */ { 2, s_4_29, -1, 1, 0},
+/* 30 */ { 3, s_4_30, 29, 2, 0},
+/* 31 */ { 3, s_4_31, 29, 1, 0},
+/* 32 */ { 2, s_4_32, -1, 1, 0},
+/* 33 */ { 3, s_4_33, 32, 2, 0},
+/* 34 */ { 2, s_4_34, -1, 1, 0},
+/* 35 */ { 3, s_4_35, 34, 2, 0},
+/* 36 */ { 2, s_4_36, -1, 2, 0},
+/* 37 */ { 2, s_4_37, -1, 2, 0},
+/* 38 */ { 2, s_4_38, -1, 2, 0},
+/* 39 */ { 2, s_4_39, -1, 1, 0},
+/* 40 */ { 3, s_4_40, 39, 2, 0},
+/* 41 */ { 3, s_4_41, 39, 2, 0},
+/* 42 */ { 3, s_4_42, -1, 1, 0},
+/* 43 */ { 3, s_4_43, -1, 2, 0},
+/* 44 */ { 2, s_4_44, -1, 1, 0},
+/* 45 */ { 3, s_4_45, 44, 2, 0}
+};
+
+static symbol s_5_0[1] = { 192 };
+static symbol s_5_1[2] = { 201, 192 };
+static symbol s_5_2[2] = { 216, 192 };
+static symbol s_5_3[1] = { 193 };
+static symbol s_5_4[1] = { 197 };
+static symbol s_5_5[2] = { 201, 197 };
+static symbol s_5_6[2] = { 216, 197 };
+static symbol s_5_7[2] = { 193, 200 };
+static symbol s_5_8[2] = { 209, 200 };
+static symbol s_5_9[3] = { 201, 209, 200 };
+static symbol s_5_10[1] = { 201 };
+static symbol s_5_11[2] = { 197, 201 };
+static symbol s_5_12[2] = { 201, 201 };
+static symbol s_5_13[3] = { 193, 205, 201 };
+static symbol s_5_14[3] = { 209, 205, 201 };
+static symbol s_5_15[4] = { 201, 209, 205, 201 };
+static symbol s_5_16[1] = { 202 };
+static symbol s_5_17[2] = { 197, 202 };
+static symbol s_5_18[3] = { 201, 197, 202 };
+static symbol s_5_19[2] = { 201, 202 };
+static symbol s_5_20[2] = { 207, 202 };
+static symbol s_5_21[2] = { 193, 205 };
+static symbol s_5_22[2] = { 197, 205 };
+static symbol s_5_23[3] = { 201, 197, 205 };
+static symbol s_5_24[2] = { 207, 205 };
+static symbol s_5_25[2] = { 209, 205 };
+static symbol s_5_26[3] = { 201, 209, 205 };
+static symbol s_5_27[1] = { 207 };
+static symbol s_5_28[1] = { 209 };
+static symbol s_5_29[2] = { 201, 209 };
+static symbol s_5_30[2] = { 216, 209 };
+static symbol s_5_31[1] = { 213 };
+static symbol s_5_32[2] = { 197, 215 };
+static symbol s_5_33[2] = { 207, 215 };
+static symbol s_5_34[1] = { 216 };
+static symbol s_5_35[1] = { 217 };
+
+static struct among a_5[36] =
+{
+/*  0 */ { 1, s_5_0, -1, 1, 0},
+/*  1 */ { 2, s_5_1, 0, 1, 0},
+/*  2 */ { 2, s_5_2, 0, 1, 0},
+/*  3 */ { 1, s_5_3, -1, 1, 0},
+/*  4 */ { 1, s_5_4, -1, 1, 0},
+/*  5 */ { 2, s_5_5, 4, 1, 0},
+/*  6 */ { 2, s_5_6, 4, 1, 0},
+/*  7 */ { 2, s_5_7, -1, 1, 0},
+/*  8 */ { 2, s_5_8, -1, 1, 0},
+/*  9 */ { 3, s_5_9, 8, 1, 0},
+/* 10 */ { 1, s_5_10, -1, 1, 0},
+/* 11 */ { 2, s_5_11, 10, 1, 0},
+/* 12 */ { 2, s_5_12, 10, 1, 0},
+/* 13 */ { 3, s_5_13, 10, 1, 0},
+/* 14 */ { 3, s_5_14, 10, 1, 0},
+/* 15 */ { 4, s_5_15, 14, 1, 0},
+/* 16 */ { 1, s_5_16, -1, 1, 0},
+/* 17 */ { 2, s_5_17, 16, 1, 0},
+/* 18 */ { 3, s_5_18, 17, 1, 0},
+/* 19 */ { 2, s_5_19, 16, 1, 0},
+/* 20 */ { 2, s_5_20, 16, 1, 0},
+/* 21 */ { 2, s_5_21, -1, 1, 0},
+/* 22 */ { 2, s_5_22, -1, 1, 0},
+/* 23 */ { 3, s_5_23, 22, 1, 0},
+/* 24 */ { 2, s_5_24, -1, 1, 0},
+/* 25 */ { 2, s_5_25, -1, 1, 0},
+/* 26 */ { 3, s_5_26, 25, 1, 0},
+/* 27 */ { 1, s_5_27, -1, 1, 0},
+/* 28 */ { 1, s_5_28, -1, 1, 0},
+/* 29 */ { 2, s_5_29, 28, 1, 0},
+/* 30 */ { 2, s_5_30, 28, 1, 0},
+/* 31 */ { 1, s_5_31, -1, 1, 0},
+/* 32 */ { 2, s_5_32, -1, 1, 0},
+/* 33 */ { 2, s_5_33, -1, 1, 0},
+/* 34 */ { 1, s_5_34, -1, 1, 0},
+/* 35 */ { 1, s_5_35, -1, 1, 0}
+};
+
+static symbol s_6_0[3] = { 207, 211, 212 };
+static symbol s_6_1[4] = { 207, 211, 212, 216 };
+
+static struct among a_6[2] =
+{
+/*  0 */ { 3, s_6_0, -1, 1, 0},
+/*  1 */ { 4, s_6_1, -1, 1, 0}
+};
+
+static symbol s_7_0[4] = { 197, 202, 219, 197 };
+static symbol s_7_1[1] = { 206 };
+static symbol s_7_2[1] = { 216 };
+static symbol s_7_3[3] = { 197, 202, 219 };
+
+static struct among a_7[4] =
+{
+/*  0 */ { 4, s_7_0, -1, 1, 0},
+/*  1 */ { 1, s_7_1, -1, 2, 0},
+/*  2 */ { 1, s_7_2, -1, 3, 0},
+/*  3 */ { 3, s_7_3, -1, 1, 0}
+};
+
+static unsigned char g_v[] = { 35, 130, 34, 18 };
+
+static symbol s_0[] = { 193 };
+static symbol s_1[] = { 209 };
+static symbol s_2[] = { 193 };
+static symbol s_3[] = { 209 };
+static symbol s_4[] = { 193 };
+static symbol s_5[] = { 209 };
+static symbol s_6[] = { 206 };
+static symbol s_7[] = { 206 };
+static symbol s_8[] = { 206 };
+static symbol s_9[] = { 201 };
+
+static int r_mark_regions(struct SN_env * z) {
+    z->I[0] = z->l;
+    z->I[1] = z->l;
+    {   int c = z->c; /* do, line 100 */
+        while(1) { /* gopast, line 101 */
+            if (!(in_grouping(z, g_v, 192, 220))) goto lab1;
+            break;
+        lab1:
+            if (z->c >= z->l) goto lab0;
+            z->c++;
+        }
+        z->I[0] = z->c; /* setmark pV, line 101 */
+        while(1) { /* gopast, line 101 */
+            if (!(out_grouping(z, g_v, 192, 220))) goto lab2;
+            break;
+        lab2:
+            if (z->c >= z->l) goto lab0;
+            z->c++;
+        }
+        while(1) { /* gopast, line 102 */
+            if (!(in_grouping(z, g_v, 192, 220))) goto lab3;
+            break;
+        lab3:
+            if (z->c >= z->l) goto lab0;
+            z->c++;
+        }
+        while(1) { /* gopast, line 102 */
+            if (!(out_grouping(z, g_v, 192, 220))) goto lab4;
+            break;
+        lab4:
+            if (z->c >= z->l) goto lab0;
+            z->c++;
+        }
+        z->I[1] = z->c; /* setmark p2, line 102 */
+    lab0:
+        z->c = c;
+    }
+    return 1;
+}
+
+static int r_R2(struct SN_env * z) {
+    if (!(z->I[1] <= z->c)) return 0;
+    return 1;
+}
+
+static int r_perfective_gerund(struct SN_env * z) {
+    int among_var;
+    z->ket = z->c; /* [, line 111 */
+    among_var = find_among_b(z, a_0, 9); /* substring, line 111 */
+    if (!(among_var)) return 0;
+    z->bra = z->c; /* ], line 111 */
+    switch(among_var) {
+        case 0: return 0;
+        case 1:
+            {   int m = z->l - z->c; /* or, line 115 */
+                if (!(eq_s_b(z, 1, s_0))) goto lab1;
+                goto lab0;
+            lab1:
+                z->c = z->l - m;
+                if (!(eq_s_b(z, 1, s_1))) return 0;
+            }
+        lab0:
+            slice_del(z); /* delete, line 115 */
+            break;
+        case 2:
+            slice_del(z); /* delete, line 122 */
+            break;
+    }
+    return 1;
+}
+
+static int r_adjective(struct SN_env * z) {
+    int among_var;
+    z->ket = z->c; /* [, line 127 */
+    among_var = find_among_b(z, a_1, 26); /* substring, line 127 */
+    if (!(among_var)) return 0;
+    z->bra = z->c; /* ], line 127 */
+    switch(among_var) {
+        case 0: return 0;
+        case 1:
+            slice_del(z); /* delete, line 136 */
+            break;
+    }
+    return 1;
+}
+
+static int r_adjectival(struct SN_env * z) {
+    int among_var;
+    if (!r_adjective(z)) return 0; /* call adjective, line 141 */
+    {   int m = z->l - z->c; /* try, line 148 */
+        z->ket = z->c; /* [, line 149 */
+        among_var = find_among_b(z, a_2, 8); /* substring, line 149 */
+        if (!(among_var)) { z->c = z->l - m; goto lab0; }
+        z->bra = z->c; /* ], line 149 */
+        switch(among_var) {
+            case 0: { z->c = z->l - m; goto lab0; }
+            case 1:
+                {   int m = z->l - z->c; /* or, line 154 */
+                    if (!(eq_s_b(z, 1, s_2))) goto lab2;
+                    goto lab1;
+                lab2:
+                    z->c = z->l - m;
+                    if (!(eq_s_b(z, 1, s_3))) { z->c = z->l - m; goto lab0; }
+                }
+            lab1:
+                slice_del(z); /* delete, line 154 */
+                break;
+            case 2:
+                slice_del(z); /* delete, line 161 */
+                break;
+        }
+    lab0:
+        ;
+    }
+    return 1;
+}
+
+static int r_reflexive(struct SN_env * z) {
+    int among_var;
+    z->ket = z->c; /* [, line 168 */
+    among_var = find_among_b(z, a_3, 2); /* substring, line 168 */
+    if (!(among_var)) return 0;
+    z->bra = z->c; /* ], line 168 */
+    switch(among_var) {
+        case 0: return 0;
+        case 1:
+            slice_del(z); /* delete, line 171 */
+            break;
+    }
+    return 1;
+}
+
+static int r_verb(struct SN_env * z) {
+    int among_var;
+    z->ket = z->c; /* [, line 176 */
+    among_var = find_among_b(z, a_4, 46); /* substring, line 176 */
+    if (!(among_var)) return 0;
+    z->bra = z->c; /* ], line 176 */
+    switch(among_var) {
+        case 0: return 0;
+        case 1:
+            {   int m = z->l - z->c; /* or, line 182 */
+                if (!(eq_s_b(z, 1, s_4))) goto lab1;
+                goto lab0;
+            lab1:
+                z->c = z->l - m;
+                if (!(eq_s_b(z, 1, s_5))) return 0;
+            }
+        lab0:
+            slice_del(z); /* delete, line 182 */
+            break;
+        case 2:
+            slice_del(z); /* delete, line 190 */
+            break;
+    }
+    return 1;
+}
+
+static int r_noun(struct SN_env * z) {
+    int among_var;
+    z->ket = z->c; /* [, line 199 */
+    among_var = find_among_b(z, a_5, 36); /* substring, line 199 */
+    if (!(among_var)) return 0;
+    z->bra = z->c; /* ], line 199 */
+    switch(among_var) {
+        case 0: return 0;
+        case 1:
+            slice_del(z); /* delete, line 206 */
+            break;
+    }
+    return 1;
+}
+
+static int r_derivational(struct SN_env * z) {
+    int among_var;
+    z->ket = z->c; /* [, line 215 */
+    among_var = find_among_b(z, a_6, 2); /* substring, line 215 */
+    if (!(among_var)) return 0;
+    z->bra = z->c; /* ], line 215 */
+    if (!r_R2(z)) return 0; /* call R2, line 215 */
+    switch(among_var) {
+        case 0: return 0;
+        case 1:
+            slice_del(z); /* delete, line 218 */
+            break;
+    }
+    return 1;
+}
+
+static int r_tidy_up(struct SN_env * z) {
+    int among_var;
+    z->ket = z->c; /* [, line 223 */
+    among_var = find_among_b(z, a_7, 4); /* substring, line 223 */
+    if (!(among_var)) return 0;
+    z->bra = z->c; /* ], line 223 */
+    switch(among_var) {
+        case 0: return 0;
+        case 1:
+            slice_del(z); /* delete, line 227 */
+            z->ket = z->c; /* [, line 228 */
+            if (!(eq_s_b(z, 1, s_6))) return 0;
+            z->bra = z->c; /* ], line 228 */
+            if (!(eq_s_b(z, 1, s_7))) return 0;
+            slice_del(z); /* delete, line 228 */
+            break;
+        case 2:
+            if (!(eq_s_b(z, 1, s_8))) return 0;
+            slice_del(z); /* delete, line 231 */
+            break;
+        case 3:
+            slice_del(z); /* delete, line 233 */
+            break;
+    }
+    return 1;
+}
+
+extern int russian_stem(struct SN_env * z) {
+    {   int c = z->c; /* do, line 240 */
+        if (!r_mark_regions(z)) goto lab0; /* call mark_regions, line 240 */
+    lab0:
+        z->c = c;
+    }
+    z->lb = z->c; z->c = z->l; /* backwards, line 241 */
+
+    {   int m = z->l - z->c; /* setlimit, line 241 */
+        int m3;
+        if (z->c < z->I[0]) return 0;
+        z->c = z->I[0]; /* tomark, line 241 */
+        m3 = z->lb; z->lb = z->c;
+        z->c = z->l - m;
+        {   int m = z->l - z->c; /* do, line 242 */
+            {   int m = z->l - z->c; /* or, line 243 */
+                if (!r_perfective_gerund(z)) goto lab3; /* call perfective_gerund, line 243 */
+                goto lab2;
+            lab3:
+                z->c = z->l - m;
+                {   int m = z->l - z->c; /* try, line 244 */
+                    if (!r_reflexive(z)) { z->c = z->l - m; goto lab4; } /* call reflexive, line 244 */
+                lab4:
+                    ;
+                }
+                {   int m = z->l - z->c; /* or, line 245 */
+                    if (!r_adjectival(z)) goto lab6; /* call adjectival, line 245 */
+                    goto lab5;
+                lab6:
+                    z->c = z->l - m;
+                    if (!r_verb(z)) goto lab7; /* call verb, line 245 */
+                    goto lab5;
+                lab7:
+                    z->c = z->l - m;
+                    if (!r_noun(z)) goto lab1; /* call noun, line 245 */
+                }
+            lab5:
+                ;
+            }
+        lab2:
+        lab1:
+            z->c = z->l - m;
+        }
+        {   int m = z->l - z->c; /* try, line 248 */
+            z->ket = z->c; /* [, line 248 */
+            if (!(eq_s_b(z, 1, s_9))) { z->c = z->l - m; goto lab8; }
+            z->bra = z->c; /* ], line 248 */
+            slice_del(z); /* delete, line 248 */
+        lab8:
+            ;
+        }
+        {   int m = z->l - z->c; /* do, line 251 */
+            if (!r_derivational(z)) goto lab9; /* call derivational, line 251 */
+        lab9:
+            z->c = z->l - m;
+        }
+        {   int m = z->l - z->c; /* do, line 252 */
+            if (!r_tidy_up(z)) goto lab10; /* call tidy_up, line 252 */
+        lab10:
+            z->c = z->l - m;
+        }
+        z->lb = m3;
+    }
+    z->c = z->lb;
+    return 1;
+}
+
+extern struct SN_env * russian_create_env(void) { return SN_create_env(0, 2, 0); }
+
+extern void russian_close_env(struct SN_env * z) { SN_close_env(z); }
+


diff --git a/contrib/tsearch2/snowball/russian_stem.h b/contrib/tsearch2/snowball/russian_stem.h

new file mode 100644 (file)

index 0000000..7dc26d4


--- /dev/null
+++ b/contrib/tsearch2/snowball/russian_stem.h
@@ -0,0 +1,8 @@
+
+/* This file was generated automatically by the Snowball to ANSI C compiler */
+
+extern struct SN_env * russian_create_env(void);
+extern void russian_close_env(struct SN_env * z);
+
+extern int russian_stem(struct SN_env * z);
+


diff --git a/contrib/tsearch2/snowball/utilities.c b/contrib/tsearch2/snowball/utilities.c

new file mode 100644 (file)

index 0000000..5dc7524


--- /dev/null
+++ b/contrib/tsearch2/snowball/utilities.c
@@ -0,0 +1,328 @@
+
+#include 
+#include 
+#include 
+
+#include "header.h"
+
+#define unless(C) if(!(C))
+
+#define CREATE_SIZE 1
+
+extern symbol * create_s(void)
+{   symbol * p = (symbol *) (HEAD + (char *) malloc(HEAD + (CREATE_SIZE + 1) * sizeof(symbol)));
+    CAPACITY(p) = CREATE_SIZE;
+    SET_SIZE(p, CREATE_SIZE);
+    return p;
+}
+
+extern void lose_s(symbol * p) { free((char *) p - HEAD); }
+
+extern int in_grouping(struct SN_env * z, unsigned char * s, int min, int max)
+{   if (z->c >= z->l) return 0;
+    {   int ch = z->p[z->c];
+        if
+        (ch > max || (ch -= min) < 0 ||
+         (s[ch >> 3] & (0X1 << (ch & 0X7))) == 0) return 0;
+    }
+    z->c++; return 1;
+}
+
+extern int in_grouping_b(struct SN_env * z, unsigned char * s, int min, int max)
+{   if (z->c <= z->lb) return 0;
+    {   int ch = z->p[z->c - 1];
+        if
+        (ch > max || (ch -= min) < 0 ||
+         (s[ch >> 3] & (0X1 << (ch & 0X7))) == 0) return 0;
+    }
+    z->c--; return 1;
+}
+
+extern int out_grouping(struct SN_env * z, unsigned char * s, int min, int max)
+{   if (z->c >= z->l) return 0;
+    {   int ch = z->p[z->c];
+        unless
+        (ch > max || (ch -= min) < 0 ||
+         (s[ch >> 3] & (0X1 << (ch & 0X7))) == 0) return 0;
+    }
+    z->c++; return 1;
+}
+
+extern int out_grouping_b(struct SN_env * z, unsigned char * s, int min, int max)
+{   if (z->c <= z->lb) return 0;
+    {   int ch = z->p[z->c - 1];
+        unless
+        (ch > max || (ch -= min) < 0 ||
+         (s[ch >> 3] & (0X1 << (ch & 0X7))) == 0) return 0;
+    }
+    z->c--; return 1;
+}
+
+
+extern int in_range(struct SN_env * z, int min, int max)
+{   if (z->c >= z->l) return 0;
+    {   int ch = z->p[z->c];
+        if
+        (ch > max || ch < min) return 0;
+    }
+    z->c++; return 1;
+}
+
+extern int in_range_b(struct SN_env * z, int min, int max)
+{   if (z->c <= z->lb) return 0;
+    {   int ch = z->p[z->c - 1];
+        if
+        (ch > max || ch < min) return 0;
+    }
+    z->c--; return 1;
+}
+
+extern int out_range(struct SN_env * z, int min, int max)
+{   if (z->c >= z->l) return 0;
+    {   int ch = z->p[z->c];
+        unless
+        (ch > max || ch < min) return 0;
+    }
+    z->c++; return 1;
+}
+
+extern int out_range_b(struct SN_env * z, int min, int max)
+{   if (z->c <= z->lb) return 0;
+    {   int ch = z->p[z->c - 1];
+        unless
+        (ch > max || ch < min) return 0;
+    }
+    z->c--; return 1;
+}
+
+extern int eq_s(struct SN_env * z, int s_size, symbol * s)
+{   if (z->l - z->c < s_size ||
+        memcmp(z->p + z->c, s, s_size * sizeof(symbol)) != 0) return 0;
+    z->c += s_size; return 1;
+}
+
+extern int eq_s_b(struct SN_env * z, int s_size, symbol * s)
+{   if (z->c - z->lb < s_size ||
+        memcmp(z->p + z->c - s_size, s, s_size * sizeof(symbol)) != 0) return 0;
+    z->c -= s_size; return 1;
+}
+
+extern int eq_v(struct SN_env * z, symbol * p)
+{   return eq_s(z, SIZE(p), p);
+}
+
+extern int eq_v_b(struct SN_env * z, symbol * p)
+{   return eq_s_b(z, SIZE(p), p);
+}
+
+extern int find_among(struct SN_env * z, struct among * v, int v_size)
+{
+    int i = 0;
+    int j = v_size;
+
+    int c = z->c; int l = z->l;
+    symbol * q = z->p + c;
+
+    struct among * w;
+
+    int common_i = 0;
+    int common_j = 0;
+
+    int first_key_inspected = 0;
+
+    while(1)
+    {   int k = i + ((j - i) >> 1);
+        int diff = 0;
+        int common = common_i < common_j ? common_i : common_j; /* smaller */
+        w = v + k;
+        {   int i; for (i = common; i < w->s_size; i++)
+            {   if (c + common == l) { diff = -1; break; }
+                diff = q[common] - w->s[i];
+                if (diff != 0) break;
+                common++;
+            }
+        }
+        if (diff < 0) { j = k; common_j = common; }
+                 else { i = k; common_i = common; }
+        if (j - i <= 1)
+        {   if (i > 0) break; /* v->s has been inspected */
+            if (j == i) break; /* only one item in v */
+
+            /* - but now we need to go round once more to get
+               v->s inspected. This looks messy, but is actually
+               the optimal approach.  */
+
+            if (first_key_inspected) break;
+            first_key_inspected = 1;
+        }
+    }
+    while(1)
+    {   w = v + i;
+        if (common_i >= w->s_size)
+        {   z->c = c + w->s_size;
+            if (w->function == 0) return w->result;
+            {   int res = w->function(z);
+                z->c = c + w->s_size;
+                if (res) return w->result;
+            }
+        }
+        i = w->substring_i;
+        if (i < 0) return 0;
+    }
+}
+
+/* find_among_b is for backwards processing. Same comments apply */
+
+extern int find_among_b(struct SN_env * z, struct among * v, int v_size)
+{
+    int i = 0;
+    int j = v_size;
+
+    int c = z->c; int lb = z->lb;
+    symbol * q = z->p + c - 1;
+
+    struct among * w;
+
+    int common_i = 0;
+    int common_j = 0;
+
+    int first_key_inspected = 0;
+
+    while(1)
+    {   int k = i + ((j - i) >> 1);
+        int diff = 0;
+        int common = common_i < common_j ? common_i : common_j;
+        w = v + k;
+        {   int i; for (i = w->s_size - 1 - common; i >= 0; i--)
+            {   if (c - common == lb) { diff = -1; break; }
+                diff = q[- common] - w->s[i];
+                if (diff != 0) break;
+                common++;
+            }
+        }
+        if (diff < 0) { j = k; common_j = common; }
+                 else { i = k; common_i = common; }
+        if (j - i <= 1)
+        {   if (i > 0) break;
+            if (j == i) break;
+            if (first_key_inspected) break;
+            first_key_inspected = 1;
+        }
+    }
+    while(1)
+    {   w = v + i;
+        if (common_i >= w->s_size)
+        {   z->c = c - w->s_size;
+            if (w->function == 0) return w->result;
+            {   int res = w->function(z);
+                z->c = c - w->s_size;
+                if (res) return w->result;
+            }
+        }
+        i = w->substring_i;
+        if (i < 0) return 0;
+    }
+}
+
+
+extern symbol * increase_size(symbol * p, int n)
+{   int new_size = n + 20;
+    symbol * q = (symbol *) (HEAD + (char *) malloc(HEAD + (new_size + 1) * sizeof(symbol)));
+    CAPACITY(q) = new_size;
+    memmove(q, p, CAPACITY(p) * sizeof(symbol)); lose_s(p); return q;
+}
+
+/* to replace symbols between c_bra and c_ket in z->p by the
+   s_size symbols at s
+*/
+
+extern int replace_s(struct SN_env * z, int c_bra, int c_ket, int s_size, const symbol * s)
+{   int adjustment = s_size - (c_ket - c_bra);
+    int len = SIZE(z->p);
+    if (adjustment != 0)
+    {   if (adjustment + len > CAPACITY(z->p)) z->p = increase_size(z->p, adjustment + len);
+        memmove(z->p + c_ket + adjustment, z->p + c_ket, (len - c_ket) * sizeof(symbol));
+        SET_SIZE(z->p, adjustment + len);
+        z->l += adjustment;
+        if (z->c >= c_ket) z->c += adjustment; else
+            if (z->c > c_bra) z->c = c_bra;
+    }
+    unless (s_size == 0) memmove(z->p + c_bra, s, s_size * sizeof(symbol));
+    return adjustment;
+}
+
+static void slice_check(struct SN_env * z)
+{
+    if (!(0 <= z->bra &&
+          z->bra <= z->ket &&
+          z->ket <= z->l &&
+          z->l <= SIZE(z->p)))   /* this line could be removed */
+    {
+        fprintf(stderr, "faulty slice operation:\n");
+        debug(z, -1, 0);
+        exit(1);
+    }
+}
+
+extern void slice_from_s(struct SN_env * z, int s_size, symbol * s)
+{   slice_check(z);
+    replace_s(z, z->bra, z->ket, s_size, s);
+}
+
+extern void slice_from_v(struct SN_env * z, symbol * p)
+{   slice_from_s(z, SIZE(p), p);
+}
+
+extern void slice_del(struct SN_env * z)
+{   slice_from_s(z, 0, 0);
+}
+
+extern void insert_s(struct SN_env * z, int bra, int ket, int s_size, symbol * s)
+{   int adjustment = replace_s(z, bra, ket, s_size, s);
+    if (bra <= z->bra) z->bra += adjustment;
+    if (bra <= z->ket) z->ket += adjustment;
+}
+
+extern void insert_v(struct SN_env * z, int bra, int ket, symbol * p)
+{   int adjustment = replace_s(z, bra, ket, SIZE(p), p);
+    if (bra <= z->bra) z->bra += adjustment;
+    if (bra <= z->ket) z->ket += adjustment;
+}
+
+extern symbol * slice_to(struct SN_env * z, symbol * p)
+{   slice_check(z);
+    {   int len = z->ket - z->bra;
+        if (CAPACITY(p) < len) p = increase_size(p, len);
+        memmove(p, z->p + z->bra, len * sizeof(symbol));
+        SET_SIZE(p, len);
+    }
+    return p;
+}
+
+extern symbol * assign_to(struct SN_env * z, symbol * p)
+{   int len = z->l;
+    if (CAPACITY(p) < len) p = increase_size(p, len);
+    memmove(p, z->p, len * sizeof(symbol));
+    SET_SIZE(p, len);
+    return p;
+}
+
+extern void debug(struct SN_env * z, int number, int line_count)
+{   int i;
+    int limit = SIZE(z->p);
+    /*if (number >= 0) printf("%3d (line %4d): '", number, line_count);*/
+    if (number >= 0) printf("%3d (line %4d): [%d]'", number, line_count,limit);
+    for (i = 0; i <= limit; i++)
+    {   if (z->lb == i) printf("{");
+        if (z->bra == i) printf("[");
+        if (z->c == i) printf("|");
+        if (z->ket == i) printf("]");
+        if (z->l == i) printf("}");
+        if (i < limit)
+        {   int ch = z->p[i];
+            if (ch == 0) ch = '#';
+            printf("%c", ch);
+        }
+    }
+    printf("'\n");
+}


diff --git a/contrib/tsearch2/sql/tsearch2.sql b/contrib/tsearch2/sql/tsearch2.sql

new file mode 100644 (file)

index 0000000..6ca6480


--- /dev/null
+++ b/contrib/tsearch2/sql/tsearch2.sql
@@ -0,0 +1,243 @@
+--
+-- first, define the datatype.  Turn off echoing so that expected file
+-- does not depend on contents of seg.sql.
+--
+\set ECHO none
+\i tsearch2.sql
+\set ECHO all
+
+--tsvector
+SELECT '1'::tsvector;
+SELECT '1 '::tsvector;
+SELECT ' 1'::tsvector;
+SELECT ' 1 '::tsvector;
+SELECT '1 2'::tsvector;
+SELECT '\'1 2\''::tsvector;
+SELECT '\'1 \\\'2\''::tsvector;
+SELECT '\'1 \\\'2\'3'::tsvector;
+SELECT '\'1 \\\'2\' 3'::tsvector;
+SELECT '\'1 \\\'2\' \' 3\' 4 '::tsvector;
+select '\'w\':4A,3B,2C,1D,5 a:8';
+select 'a:3A b:2a'::tsvector || 'ba:1234 a:1B';
+select setweight('w:12B w:13* w:12,5,6 a:1,3* a:3 w asd:1dc asd zxc:81,567,222A'::tsvector, 'c');
+select strip('w:12B w:13* w:12,5,6 a:1,3* a:3 w asd:1dc asd'::tsvector);
+
+
+--tsquery
+SELECT '1'::tsquery;
+SELECT '1 '::tsquery;
+SELECT ' 1'::tsquery;
+SELECT ' 1 '::tsquery;
+SELECT '\'1 2\''::tsquery;
+SELECT '\'1 \\\'2\''::tsquery;
+SELECT '!1'::tsquery;
+SELECT '1|2'::tsquery;
+SELECT '1|!2'::tsquery;
+SELECT '!1|2'::tsquery;
+SELECT '!1|!2'::tsquery;
+SELECT '!(!1|!2)'::tsquery;
+SELECT '!(!1|2)'::tsquery;
+SELECT '!(1|!2)'::tsquery;
+SELECT '!(1|2)'::tsquery;
+SELECT '1&2'::tsquery;
+SELECT '!1&2'::tsquery;
+SELECT '1&!2'::tsquery;
+SELECT '!1&!2'::tsquery;
+SELECT '(1&2)'::tsquery;
+SELECT '1&(2)'::tsquery;
+SELECT '!(1)&2'::tsquery;
+SELECT '!(1&2)'::tsquery;
+SELECT '1|2&3'::tsquery;
+SELECT '1|(2&3)'::tsquery;
+SELECT '(1|2)&3'::tsquery;
+SELECT '1|2&!3'::tsquery;
+SELECT '1|!2&3'::tsquery;
+SELECT '!1|2&3'::tsquery;
+SELECT '!1|(2&3)'::tsquery;
+SELECT '!(1|2)&3'::tsquery;
+SELECT '(!1|2)&3'::tsquery;
+SELECT '1|(2|(4|(5|6)))'::tsquery;
+SELECT '1|2|4|5|6'::tsquery;
+SELECT '1&(2&(4&(5&6)))'::tsquery;
+SELECT '1&2&4&5&6'::tsquery;
+SELECT '1&(2&(4&(5|6)))'::tsquery;
+SELECT '1&(2&(4&(5|!6)))'::tsquery;
+SELECT '1&(\'2\'&(\' 4\'&(\\|5 | \'6 \\\' !|&\')))'::tsquery;
+SELECT '\'the wether\':dc & \' sKies \':BC & a:d b:a';
+
+select lexize('simple', 'ASD56 hsdkf');
+select lexize('en_stem', 'SKIES Problems identity');
+
+select * from token_type('default');
+select * from parse('default', '345 [email protected] \' http://www.com/ http://aew.werc.ewr/?ad=qwe&dw 1aew.werc.ewr/?ad=qwe&dw 2aew.werc.ewr http://3aew.werc.ewr/?ad=qwe&dw http://4aew.werc.ewr http://5aew.werc.ewr:8100/?  ad=qwe&dw 6aew.werc.ewr:8100/?ad=qwe&dw 7aew.werc.ewr:8100/?ad=qwe&dw=%20%32 +4.0e-10 qwe qwe qwqwe 234.435 455 5.005 [email protected] qwe-wer asdf qwer jf sdjk ewr1> ewri2 ">
+/usr/local/fff /awdf/dwqe/4325 rewt/ewr wefjn /wqe-324/ewr gist.h gist.h.c gist.c. readline 4.2 4.2. 4.2, readline-4.2 readline-4.2. 234 
+ wow  < jqw <> qwerty');
+
+SELECT to_tsvector('default', '345 [email protected] \' http://www.com/ http://aew.werc.ewr/?ad=qwe&dw 1aew.werc.ewr/?ad=qwe&dw 2aew.werc.ewr http://3aew.werc.ewr/?ad=qwe&dw http://4aew.werc.ewr http://5aew.werc.ewr:8100/?  ad=qwe&dw 6aew.werc.ewr:8100/?ad=qwe&dw 7aew.werc.ewr:8100/?ad=qwe&dw=%20%32 +4.0e-10 qwe qwe qwqwe 234.435 455 5.005 [email protected] qwe-wer asdf qwer jf sdjk ewr1> ewri2 ">
+/usr/local/fff /awdf/dwqe/4325 rewt/ewr wefjn /wqe-324/ewr gist.h gist.h.c gist.c. readline 4.2 4.2. 4.2, readline-4.2 readline-4.2. 234 
+ wow  < jqw <> qwerty');
+
+SELECT length(to_tsvector('default', '345 qw'));
+
+SELECT length(to_tsvector('default', '345 [email protected] \' http://www.com/ http://aew.werc.ewr/?ad=qwe&dw 1aew.werc.ewr/?ad=qwe&dw 2aew.werc.ewr http://3aew.werc.ewr/?ad=qwe&dw http://4aew.werc.ewr http://5aew.werc.ewr:8100/?  ad=qwe&dw 6aew.werc.ewr:8100/?ad=qwe&dw 7aew.werc.ewr:8100/?ad=qwe&dw=%20%32 +4.0e-10 qwe qwe qwqwe 234.435 455 5.005 [email protected] qwe-wer asdf qwer jf sdjk ewr1> ewri2 ">
+/usr/local/fff /awdf/dwqe/4325 rewt/ewr wefjn /wqe-324/ewr gist.h gist.h.c gist.c. readline 4.2 4.2. 4.2, readline-4.2 readline-4.2. 234 
+ wow  < jqw <> qwerty'));
+
+
+select to_tsquery('default', 'qwe & sKies '); 
+select to_tsquery('simple', 'qwe & sKies '); 
+select to_tsquery('default', '\'the wether\':dc & \'           sKies \':BC ');
+select 'a b:89  ca:23A,64b d:34c'::tsvector @@ 'd:AC & ca';
+select 'a b:89  ca:23A,64b d:34c'::tsvector @@ 'd:AC & ca:B';
+select 'a b:89  ca:23A,64b d:34c'::tsvector @@ 'd:AC & ca:A';
+select 'a b:89  ca:23A,64b d:34c'::tsvector @@ 'd:AC & ca:C';
+select 'a b:89  ca:23A,64b d:34c'::tsvector @@ 'd:AC & ca:CB';
+
+CREATE TABLE test_tsvector( t text, a tsvector );
+
+\copy test_tsvector from 'data/test_tsearch.data'
+
+SELECT count(*) FROM test_tsvector WHERE a @@ 'wr|qh';
+SELECT count(*) FROM test_tsvector WHERE a @@ 'wr&qh';
+SELECT count(*) FROM test_tsvector WHERE a @@ 'eq&yt';
+SELECT count(*) FROM test_tsvector WHERE a @@ 'eq|yt';
+SELECT count(*) FROM test_tsvector WHERE a @@ '(eq&yt)|(wr&qh)';
+SELECT count(*) FROM test_tsvector WHERE a @@ '(eq|yt)&(wr|qh)';
+
+create index wowidx on test_tsvector using gist (a);
+set enable_seqscan=off;
+
+SELECT count(*) FROM test_tsvector WHERE a @@ 'wr|qh';
+SELECT count(*) FROM test_tsvector WHERE a @@ 'wr&qh';
+SELECT count(*) FROM test_tsvector WHERE a @@ 'eq&yt';
+SELECT count(*) FROM test_tsvector WHERE a @@ 'eq|yt';
+SELECT count(*) FROM test_tsvector WHERE a @@ '(eq&yt)|(wr&qh)';
+SELECT count(*) FROM test_tsvector WHERE a @@ '(eq|yt)&(wr|qh)';
+
+select set_curcfg('default');
+
+CREATE TRIGGER tsvectorupdate
+BEFORE UPDATE OR INSERT ON test_tsvector
+FOR EACH ROW EXECUTE PROCEDURE tsearch2(a, t);
+
+SELECT count(*) FROM test_tsvector WHERE a @@ to_tsquery('345&qwerty');
+
+INSERT INTO test_tsvector (t) VALUES ('345 qwerty');
+
+SELECT count(*) FROM test_tsvector WHERE a @@ to_tsquery('345&qwerty');
+
+UPDATE test_tsvector SET t = null WHERE t = '345 qwerty';
+
+SELECT count(*) FROM test_tsvector WHERE a @@ to_tsquery('345&qwerty');
+
+drop trigger tsvectorupdate on test_tsvector;
+create function wow(text) returns text as 'select $1 || \' copyright\'; ' language sql;
+create trigger tsvectorupdate before update or insert on test_tsvector
+for each row execute procedure tsearch2(a, wow, t);
+insert into test_tsvector (t) values ('345 qwerty');
+select count(*) FROM test_tsvector WHERE a @@ to_tsquery('345&qwerty');
+select count(*) FROM test_tsvector WHERE a @@ to_tsquery('copyright');
+
+select rank(' a:1 s:2C d g'::tsvector, 'a | s');
+select rank(' a:1 s:2B d g'::tsvector, 'a | s');
+select rank(' a:1 s:2 d g'::tsvector, 'a | s');
+select rank(' a:1 s:2C d g'::tsvector, 'a & s');
+select rank(' a:1 s:2B d g'::tsvector, 'a & s');
+select rank(' a:1 s:2 d g'::tsvector, 'a & s');
+
+insert into test_tsvector (t) values ('foo bar foo the over foo qq bar');
+select * from stat('select a from test_tsvector') order by ndoc desc, nentry desc, word;
+
+select reset_tsearch();
+select to_tsquery('default', 'skies & books');
+
+select rank_cd(to_tsvector('Erosion It took the sea a thousand years,
+A thousand years to trace
+The granite features of this cliff
+In crag and scarp and base.
+It took the sea an hour one night
+An hour of storm to place
+The sculpture of these granite seams,
+Upon a woman s face. E.  J.  Pratt  (1882 1964)
+'), to_tsquery('sea&thousand&years'));
+
+select rank_cd(to_tsvector('Erosion It took the sea a thousand years,
+A thousand years to trace
+The granite features of this cliff
+In crag and scarp and base.
+It took the sea an hour one night
+An hour of storm to place
+The sculpture of these granite seams,
+Upon a woman s face. E.  J.  Pratt  (1882 1964)
+'), to_tsquery('granite&sea'));
+
+select rank_cd(to_tsvector('Erosion It took the sea a thousand years,
+A thousand years to trace
+The granite features of this cliff
+In crag and scarp and base.
+It took the sea an hour one night
+An hour of storm to place
+The sculpture of these granite seams,
+Upon a woman s face. E.  J.  Pratt  (1882 1964)
+'), to_tsquery('sea'));
+
+select get_covers(to_tsvector('Erosion It took the sea a thousand years,
+A thousand years to trace
+The granite features of this cliff
+In crag and scarp and base.
+It took the sea an hour one night
+An hour of storm to place
+The sculpture of these granite seams,
+Upon a woman s face. E.  J.  Pratt  (1882 1964)
+'), to_tsquery('sea&thousand&years'));
+
+select get_covers(to_tsvector('Erosion It took the sea a thousand years,
+A thousand years to trace
+The granite features of this cliff
+In crag and scarp and base.
+It took the sea an hour one night
+An hour of storm to place
+The sculpture of these granite seams,
+Upon a woman s face. E.  J.  Pratt  (1882 1964)
+'), to_tsquery('granite&sea'));
+
+select get_covers(to_tsvector('Erosion It took the sea a thousand years,
+A thousand years to trace
+The granite features of this cliff
+In crag and scarp and base.
+It took the sea an hour one night
+An hour of storm to place
+The sculpture of these granite seams,
+Upon a woman s face. E.  J.  Pratt  (1882 1964)
+'), to_tsquery('sea'));
+
+select headline('Erosion It took the sea a thousand years,
+A thousand years to trace
+The granite features of this cliff
+In crag and scarp and base.
+It took the sea an hour one night
+An hour of storm to place
+The sculpture of these granite seams,
+Upon a woman s face. E.  J.  Pratt  (1882 1964)
+', to_tsquery('sea&thousand&years'));
+ 
+select headline('Erosion It took the sea a thousand years,
+A thousand years to trace
+The granite features of this cliff
+In crag and scarp and base.
+It took the sea an hour one night
+An hour of storm to place
+The sculpture of these granite seams,
+Upon a woman s face. E.  J.  Pratt  (1882 1964)
+', to_tsquery('granite&sea'));
+ 
+select headline('Erosion It took the sea a thousand years,
+A thousand years to trace
+The granite features of this cliff
+In crag and scarp and base.
+It took the sea an hour one night
+An hour of storm to place
+The sculpture of these granite seams,
+Upon a woman s face. E.  J.  Pratt  (1882 1964)
+', to_tsquery('sea'));
+


diff --git a/contrib/tsearch2/stopword.c b/contrib/tsearch2/stopword.c

new file mode 100644 (file)

index 0000000..7f7806f


--- /dev/null
+++ b/contrib/tsearch2/stopword.c
@@ -0,0 +1,101 @@
+/* 
+ * stopword library
+ * Teodor Sigaev 
+ */
+#include 
+#include 
+#include 
+#include 
+
+#include "postgres.h"
+#include "common.h"
+#include "dict.h"
+
+#define STOPBUFLEN 4096
+
+char*
+lowerstr(char *str) {
+   char *ptr=str;
+   while(*ptr) {
+       *ptr = tolower(*(unsigned char*)ptr);
+       ptr++;
+   }
+   return str;
+}
+
+void
+freestoplist(StopList *s) {
+   char **ptr=s->stop;
+   if ( ptr )
+       while( *ptr && s->len >0 ) {
+           free(*ptr);
+           ptr++; s->len--;
+       free(s->stop);
+   }
+   memset(s,0,sizeof(StopList));
+}
+
+void
+readstoplist(text *in, StopList *s) {
+   char **stop=NULL;
+   s->len=0;
+   if ( in && VARSIZE(in) - VARHDRSZ > 0 ) {
+       char *filename=text2char(in);
+       FILE    *hin=NULL;
+       char    buf[STOPBUFLEN];
+       int reallen=0;
+
+       if ( (hin=fopen(filename,"r")) == NULL )
+           elog(ERROR,"Can't open file '%s': %s", filename, strerror(errno));
+       while( fgets(buf,STOPBUFLEN,hin) ) {
+           buf[strlen(buf)-1] = '\0';
+           if ( *buf=='\0' ) continue;
+
+           if ( s->len>= reallen ) {
+               char **tmp;
+               reallen=(reallen) ? reallen*2 : 16;
+               tmp=(char**)realloc((void*)stop, sizeof(char*)*reallen);
+               if (!tmp) {
+                   freestoplist(s);
+                   fclose(hin); 
+                   elog(ERROR,"Not enough memory");
+               }
+               stop=tmp;
+           }
+    
+           stop[s->len]=strdup(buf);
+           if ( !stop[s->len] ) {
+               freestoplist(s);
+               fclose(hin); 
+               elog(ERROR,"Not enough memory");
+           }
+           if ( s->wordop ) 
+               stop[s->len]=(s->wordop)(stop[s->len]);
+
+           (s->len)++; 
+       }
+       fclose(hin);
+       pfree(filename); 
+   }
+   s->stop=stop;
+} 
+
+static int
+comparestr(const void *a, const void *b) {
+   return strcmp( *(char**)a, *(char**)b );
+}
+
+void
+sortstoplist(StopList *s) {
+   if (s->stop && s->len>0)
+       qsort(s->stop, s->len, sizeof(char*), comparestr);
+}
+
+bool
+searchstoplist(StopList *s, char *key) {
+   if ( s->wordop ) 
+       key=(*(s->wordop))(key);
+   return ( s->stop && s->len>0 && bsearch(&key, s->stop, s->len, sizeof(char*), comparestr) ) ? true : false;
+}
+
+


diff --git a/contrib/tsearch2/stopword/english.stop b/contrib/tsearch2/stopword/english.stop

new file mode 100644 (file)

index 0000000..a913011


--- /dev/null
+++ b/contrib/tsearch2/stopword/english.stop
@@ -0,0 +1,128 @@
+i
+me
+my
+myself
+we
+our
+ours
+ourselves
+you
+your
+yours
+yourself
+yourselves
+he
+him
+his
+himself
+she
+her
+hers
+herself
+it
+its
+itself
+they
+them
+their
+theirs
+themselves
+what
+which
+who
+whom
+this
+that
+these
+those
+am
+is
+are
+was
+were
+be
+been
+being
+have
+has
+had
+having
+do
+does
+did
+doing
+a
+an
+the
+and
+but
+if
+or
+because
+as
+until
+while
+of
+at
+by
+for
+with
+about
+against
+between
+into
+through
+during
+before
+after
+above
+below
+to
+from
+up
+down
+in
+out
+on
+off
+over
+under
+again
+further
+then
+once
+here
+there
+when
+where
+why
+how
+all
+any
+both
+each
+few
+more
+most
+other
+some
+such
+no
+nor
+not
+only
+own
+same
+so
+than
+too
+very
+s
+t
+can
+will
+just
+don
+should
+now
+


diff --git a/contrib/tsearch2/stopword/russian.stop b/contrib/tsearch2/stopword/russian.stop

new file mode 100644 (file)

index 0000000..1877e3a


--- /dev/null
+++ b/contrib/tsearch2/stopword/russian.stop
@@ -0,0 +1,151 @@
+É
+×
+×Ï
+ÎÅ
+ÞÔÏ
+ÏÎ
+ÎÁ
+Ñ
+Ó
+ÓÏ
+ËÁË
+Á
+ÔÏ
+×ÓÅ
+ÏÎÁ
+ÔÁË
+ÅÇÏ
+ÎÏ
+ÄÁ
+ÔÙ
+Ë
+Õ
+ÖÅ
+×Ù
+ÚÁ
+ÂÙ
+ÐÏ
+ÔÏÌØËÏ
+ÅÅ
+ÍÎÅ
+ÂÙÌÏ
+×ÏÔ
+ÏÔ
+ÍÅÎÑ
+ÅÝÅ
+ÎÅÔ
+Ï
+ÉÚ
+ÅÍÕ
+ÔÅÐÅÒØ
+ËÏÇÄÁ
+ÄÁÖÅ
+ÎÕ
+×ÄÒÕÇ
+ÌÉ
+ÅÓÌÉ
+ÕÖÅ
+ÉÌÉ
+ÎÉ
+ÂÙÔØ
+ÂÙÌ
+ÎÅÇÏ
+ÄÏ
+×ÁÓ
+ÎÉÂÕÄØ
+ÏÐÑÔØ
+ÕÖ
+×ÁÍ
+×ÅÄØ
+ÔÁÍ
+ÐÏÔÏÍ
+ÓÅÂÑ
+ÎÉÞÅÇÏ
+ÅÊ
+ÍÏÖÅÔ
+ÏÎÉ
+ÔÕÔ
+ÇÄÅ
+ÅÓÔØ
+ÎÁÄÏ
+ÎÅÊ
+ÄÌÑ
+ÍÙ
+ÔÅÂÑ
+ÉÈ
+ÞÅÍ
+ÂÙÌÁ
+ÓÁÍ
+ÞÔÏÂ
+ÂÅÚ
+ÂÕÄÔÏ
+ÞÅÇÏ
+ÒÁÚ
+ÔÏÖÅ
+ÓÅÂÅ
+ÐÏÄ
+ÂÕÄÅÔ
+Ö
+ÔÏÇÄÁ
+ËÔÏ
+ÜÔÏÔ
+ÔÏÇÏ
+ÐÏÔÏÍÕ
+ÜÔÏÇÏ
+ËÁËÏÊ
+ÓÏ×ÓÅÍ
+ÎÉÍ
+ÚÄÅÓØ
+ÜÔÏÍ
+ÏÄÉÎ
+ÐÏÞÔÉ
+ÍÏÊ
+ÔÅÍ
+ÞÔÏÂÙ
+ÎÅÅ
+ÓÅÊÞÁÓ
+ÂÙÌÉ
+ËÕÄÁ
+ÚÁÞÅÍ
+×ÓÅÈ
+ÎÉËÏÇÄÁ
+ÍÏÖÎÏ
+ÐÒÉ
+ÎÁËÏÎÅÃ
+Ä×Á
+ÏÂ
+ÄÒÕÇÏÊ
+ÈÏÔØ
+ÐÏÓÌÅ
+ÎÁÄ
+ÂÏÌØÛÅ
+ÔÏÔ
+ÞÅÒÅÚ
+ÜÔÉ
+ÎÁÓ
+ÐÒÏ
+×ÓÅÇÏ
+ÎÉÈ
+ËÁËÁÑ
+ÍÎÏÇÏ
+ÒÁÚ×Å
+ÔÒÉ
+ÜÔÕ
+ÍÏÑ
+×ÐÒÏÞÅÍ
+ÈÏÒÏÛÏ
+Ó×ÏÀ
+ÜÔÏÊ
+ÐÅÒÅÄ
+ÉÎÏÇÄÁ
+ÌÕÞÛÅ
+ÞÕÔØ
+ÔÏÍ
+ÎÅÌØÚÑ
+ÔÁËÏÊ
+ÉÍ
+ÂÏÌÅÅ
+×ÓÅÇÄÁ
+ËÏÎÅÞÎÏ
+×ÓÀ
+ÍÅÖÄÕ


diff --git a/contrib/tsearch2/ts_cfg.c b/contrib/tsearch2/ts_cfg.c

new file mode 100644 (file)

index 0000000..7c9f20c


--- /dev/null
+++ b/contrib/tsearch2/ts_cfg.c
@@ -0,0 +1,509 @@
+/* 
+ * interface functions to tscfg 
+ * Teodor Sigaev 
+ */
+#include 
+#include 
+#include 
+#include 
+#include 
+
+#include "postgres.h"
+#include "fmgr.h"
+#include "utils/array.h"
+#include "catalog/pg_type.h"
+#include "executor/spi.h"
+
+#include "ts_cfg.h"
+#include "dict.h"
+#include "wparser.h"
+#include "snmap.h"
+#include "common.h"
+#include "tsvector.h"
+
+/*********top interface**********/
+
+static void *plan_getcfg_bylocale=NULL;
+static void *plan_getcfg=NULL;
+static void *plan_getmap=NULL;
+static void *plan_name2id=NULL;
+static Oid current_cfg_id=0;
+
+void
+init_cfg(Oid id, TSCfgInfo *cfg) {
+   Oid arg[2]={ OIDOID, OIDOID };
+   bool isnull;
+   Datum pars[2]={ ObjectIdGetDatum(id), ObjectIdGetDatum(id) } ;
+   int stat,i,j;
+   text *ptr;
+   text *prsname=NULL;
+   MemoryContext   oldcontext;
+
+   memset(cfg,0,sizeof(TSCfgInfo));
+   SPI_connect();
+   if ( !plan_getcfg ) {
+       plan_getcfg = SPI_saveplan( SPI_prepare( "select prs_name from pg_ts_cfg where oid = $1" , 1, arg ) );
+       if ( !plan_getcfg ) 
+           ts_error(ERROR, "SPI_prepare() failed");
+   }
+
+   stat = SPI_execp(plan_getcfg, pars, " ", 1);
+   if ( stat < 0 )
+       ts_error (ERROR, "SPI_execp return %d", stat);
+   if ( SPI_processed > 0 ) {
+       prsname = (text*) DatumGetPointer( 
+           SPI_getbinval(SPI_tuptable->vals[0], SPI_tuptable->tupdesc, 1, &isnull) 
+       );
+       oldcontext = MemoryContextSwitchTo(TopMemoryContext);
+       prsname = ptextdup( prsname );
+       MemoryContextSwitchTo(oldcontext);
+       
+       cfg->id=id;
+   } else 
+       ts_error(ERROR, "No tsearch cfg with id %d", id);
+
+   arg[0]=TEXTOID;
+   if ( !plan_getmap ) {
+       plan_getmap = SPI_saveplan( SPI_prepare( "select lt.tokid, pg_ts_cfgmap.dict_name from pg_ts_cfgmap, pg_ts_cfg, token_type( $1 ) as lt where lt.alias = pg_ts_cfgmap.tok_alias and pg_ts_cfgmap.ts_name = pg_ts_cfg.ts_name and pg_ts_cfg.oid= $2 order by lt.tokid desc;" , 2, arg ) );
+       if ( !plan_getmap )
+           ts_error(ERROR, "SPI_prepare() failed");
+   }
+
+   pars[0]=PointerGetDatum( prsname );
+   stat = SPI_execp(plan_getmap, pars, " ", 0);
+   if ( stat < 0 )
+       ts_error (ERROR, "SPI_execp return %d", stat);
+   if ( SPI_processed <= 0 )
+       ts_error(ERROR, "No parser with id %d", id);
+
+   for(i=0;i
+       int lexid = DatumGetInt32(SPI_getbinval(SPI_tuptable->vals[i], SPI_tuptable->tupdesc, 1, &isnull));
+       ArrayType *toasted_a = (ArrayType*)PointerGetDatum(SPI_getbinval(SPI_tuptable->vals[i], SPI_tuptable->tupdesc, 2, &isnull));
+       ArrayType *a;
+
+       if ( !cfg->map ) {
+           cfg->len=lexid+1;
+           cfg->map = (ListDictionary*)malloc( sizeof(ListDictionary)*cfg->len );
+           if ( !cfg->map )
+               ts_error(ERROR,"No memory");
+           memset( cfg->map, 0, sizeof(ListDictionary)*cfg->len );
+       }
+
+       if (isnull)
+           continue;
+
+       a=(ArrayType*)PointerGetDatum( PG_DETOAST_DATUM( DatumGetPointer(toasted_a) ) );
+       
+       if ( ARR_NDIM(a) != 1 )
+           ts_error(ERROR,"Wrong dimension");
+       if ( ARRNELEMS(a) < 1 )
+           continue;
+
+       cfg->map[lexid].len=ARRNELEMS(a);
+       cfg->map[lexid].dict_id=(Datum*)malloc( sizeof(Datum)*cfg->map[lexid].len );
+       memset(cfg->map[lexid].dict_id,0,sizeof(Datum)*cfg->map[lexid].len );
+       ptr=(text*)ARR_DATA_PTR(a);
+       oldcontext = MemoryContextSwitchTo(TopMemoryContext);
+       for(j=0;jmap[lexid].len;j++) {
+           cfg->map[lexid].dict_id[j] = PointerGetDatum(ptextdup(ptr));
+           ptr=NEXTVAL(ptr);
+       } 
+       MemoryContextSwitchTo(oldcontext);
+
+       if ( a != toasted_a ) 
+           pfree(a);
+   }
+   
+   SPI_finish();
+   cfg->prs_id = name2id_prs( prsname );
+   pfree(prsname);
+   for(i=0;ilen;i++) {
+       for(j=0;jmap[i].len;j++) {
+           ptr = (text*)DatumGetPointer( cfg->map[i].dict_id[j] );
+           cfg->map[i].dict_id[j] = ObjectIdGetDatum( name2id_dict(ptr) );
+           pfree(ptr);
+       }
+   }
+}
+
+typedef struct {
+   TSCfgInfo   *last_cfg;
+   int     len;
+   int     reallen;
+   TSCfgInfo   *list;
+   SNMap       name2id_map;
+} CFGList;
+
+static CFGList CList = {NULL,0,0,NULL,{0,0,NULL}};
+
+void
+reset_cfg(void) {
+        freeSNMap( &(CList.name2id_map) );
+        if ( CList.list ) {
+       int i,j;
+       for(i=0;i
+           if ( CList.list[i].map ) {
+               for(j=0;j
+                   if ( CList.list[i].map[j].dict_id )
+                       free(CList.list[i].map[j].dict_id);
+               free( CList.list[i].map );
+           }
+                free(CList.list);
+   }
+        memset(&CList,0,sizeof(CFGList));
+}
+
+static int
+comparecfg(const void *a, const void *b) {
+   return ((TSCfgInfo*)a)->id - ((TSCfgInfo*)b)->id;
+}
+
+TSCfgInfo *
+findcfg(Oid id) {
+   /* last used cfg */
+   if ( CList.last_cfg && CList.last_cfg->id==id )
+       return CList.last_cfg;
+
+   /* already used cfg */
+   if ( CList.len != 0 ) {
+       TSCfgInfo key;
+       key.id=id;
+       CList.last_cfg = bsearch(&key, CList.list, CList.len, sizeof(TSCfgInfo), comparecfg);
+       if ( CList.last_cfg != NULL )
+           return CList.last_cfg;
+   }
+
+   /* last chance */
+   if ( CList.len==CList.reallen ) {
+       TSCfgInfo *tmp;
+       int reallen = ( CList.reallen ) ? 2*CList.reallen : 16;
+       tmp=(TSCfgInfo*)realloc(CList.list,sizeof(TSCfgInfo)*reallen);
+       if ( !tmp ) 
+           ts_error(ERROR,"No memory");
+       CList.reallen=reallen;
+       CList.list=tmp;
+   }
+   CList.last_cfg=&(CList.list[CList.len]);
+   init_cfg(id, CList.last_cfg);
+   CList.len++;
+   qsort(CList.list, CList.len, sizeof(TSCfgInfo), comparecfg);
+   return findcfg(id); /* qsort changed order!! */;
+}
+
+
+Oid
+name2id_cfg(text *name) {
+   Oid arg[1]={ TEXTOID };
+   bool isnull;
+   Datum pars[1]={ PointerGetDatum(name) };
+   int stat;
+   Oid id=findSNMap_t( &(CList.name2id_map), name );
+   
+   if ( id ) 
+       return id;
+   
+   SPI_connect();
+   if ( !plan_name2id ) {
+       plan_name2id = SPI_saveplan( SPI_prepare( "select oid from pg_ts_cfg where ts_name = $1" , 1, arg ) );
+       if ( !plan_name2id ) 
+           elog(ERROR, "SPI_prepare() failed");
+   }
+
+   stat = SPI_execp(plan_name2id, pars, " ", 1);
+   if ( stat < 0 )
+       elog (ERROR, "SPI_execp return %d", stat);
+   if ( SPI_processed > 0 ) {
+       id=DatumGetObjectId( SPI_getbinval(SPI_tuptable->vals[0], SPI_tuptable->tupdesc, 1, &isnull) );
+       if ( isnull ) 
+           elog(ERROR, "Null id for tsearch config");
+   } else 
+       elog(ERROR, "No tsearch config");
+   SPI_finish();
+   addSNMap_t( &(CList.name2id_map), name, id );
+   return id;
+}
+
+
+void 
+parsetext_v2(TSCfgInfo *cfg, PRSTEXT * prs, char *buf, int4 buflen) {
+   int type, lenlemm, i;
+   char    *lemm=NULL;
+   WParserInfo *prsobj = findprs(cfg->prs_id);
+
+   prsobj->prs=(void*)DatumGetPointer(
+       FunctionCall2(
+           &(prsobj->start_info),
+           PointerGetDatum(buf),
+           Int32GetDatum(buflen)
+       )
+   );
+
+   while( ( type=DatumGetInt32(FunctionCall3(
+           &(prsobj->getlexeme_info),
+           PointerGetDatum(prsobj->prs),
+           PointerGetDatum(&lemm),
+           PointerGetDatum(&lenlemm))) ) != 0 ) {
+
+       if ( lenlemm >= MAXSTRLEN )
+           elog(ERROR, "Word is too long");
+
+
+       if ( type >= cfg->len ) /* skip this type of lexem */
+           continue; 
+
+       for(i=0;imap[type].len;i++) {
+           DictInfo    *dict=finddict( DatumGetObjectId(cfg->map[type].dict_id[i]) );
+           char    **norms, **ptr;
+   
+           norms = ptr = (char**)DatumGetPointer(
+               FunctionCall3(
+                   &(dict->lexize_info),
+                   PointerGetDatum(dict->dictionary),
+                   PointerGetDatum(lemm),
+                   PointerGetDatum(lenlemm)
+               )
+           );
+           if ( !norms ) /* dictionary doesn't know this lexem */
+               continue;
+
+           prs->pos++; /*set pos*/
+
+           while( *ptr ) {
+               if (prs->curwords == prs->lenwords) {
+                   prs->lenwords *= 2;
+                   prs->words = (WORD *) repalloc((void *) prs->words, prs->lenwords * sizeof(WORD));
+               }
+
+               prs->words[prs->curwords].len = strlen(*ptr);
+               prs->words[prs->curwords].word = *ptr;
+               prs->words[prs->curwords].alen = 0;
+               prs->words[prs->curwords].pos.pos = LIMITPOS(prs->pos);
+               ptr++;
+               prs->curwords++;
+           }
+           pfree(norms);
+           break; /* lexem already normalized or is stop word*/
+       }
+   }
+
+   FunctionCall1(
+       &(prsobj->end_info),
+       PointerGetDatum(prsobj->prs)
+   );
+}
+
+static void
+hladdword(HLPRSTEXT * prs, char *buf, int4 buflen, int type) {
+   while (prs->curwords >= prs->lenwords) {
+       prs->lenwords *= 2;
+       prs->words = (HLWORD *) repalloc((void *) prs->words, prs->lenwords * sizeof(HLWORD));
+   }
+   memset( &(prs->words[prs->curwords]), 0, sizeof(HLWORD) ); 
+   prs->words[prs->curwords].type = (uint8)type;
+   prs->words[prs->curwords].len = buflen; 
+   prs->words[prs->curwords].word = palloc(buflen);
+   memcpy(prs->words[prs->curwords].word, buf, buflen);
+   prs->curwords++;    
+}
+
+static void
+hlfinditem(HLPRSTEXT * prs, QUERYTYPE *query, char *buf, int buflen ) {
+   int i;
+   ITEM    *item=GETQUERY(query);
+   HLWORD  *word=&( prs->words[prs->curwords-1] );
+
+   while (prs->curwords + query->size >= prs->lenwords) {
+       prs->lenwords *= 2;
+       prs->words = (HLWORD *) repalloc((void *) prs->words, prs->lenwords * sizeof(HLWORD));
+   }
+
+   for(i=0; isize; i++) { 
+       if ( item->type == VAL && item->length == buflen && strncmp( GETOPERAND(query) + item->distance, buf, buflen )==0 ) {
+           if ( word->item ) {
+               memcpy( &(prs->words[prs->curwords]), word, sizeof(HLWORD) );
+               prs->words[prs->curwords].item=item;
+               prs->words[prs->curwords].repeated=1;
+               prs->curwords++;
+           } else 
+               word->item=item;    
+       }
+       item++;
+   }
+}
+
+void 
+hlparsetext(TSCfgInfo *cfg, HLPRSTEXT * prs, QUERYTYPE *query, char *buf, int4 buflen) {
+   int type, lenlemm, i;
+   char    *lemm=NULL;
+   WParserInfo *prsobj = findprs(cfg->prs_id);
+
+   prsobj->prs=(void*)DatumGetPointer(
+       FunctionCall2(
+           &(prsobj->start_info),
+           PointerGetDatum(buf),
+           Int32GetDatum(buflen)
+       )
+   );
+
+   while( ( type=DatumGetInt32(FunctionCall3(
+           &(prsobj->getlexeme_info),
+           PointerGetDatum(prsobj->prs),
+           PointerGetDatum(&lemm),
+           PointerGetDatum(&lenlemm))) ) != 0 ) {
+
+       if ( lenlemm >= MAXSTRLEN )
+           elog(ERROR, "Word is too long");
+
+       hladdword(prs,lemm,lenlemm,type);
+
+       if ( type >= cfg->len ) 
+           continue; 
+
+       for(i=0;imap[type].len;i++) {
+           DictInfo    *dict=finddict( DatumGetObjectId(cfg->map[type].dict_id[i]) );
+           char    **norms, **ptr;
+   
+           norms = ptr = (char**)DatumGetPointer(
+               FunctionCall3(
+                   &(dict->lexize_info),
+                   PointerGetDatum(dict->dictionary),
+                   PointerGetDatum(lemm),
+                   PointerGetDatum(lenlemm)
+               )
+           );
+           if ( !norms ) /* dictionary doesn't know this lexem */
+               continue;
+
+           while( *ptr ) {
+               hlfinditem(prs,query,*ptr,strlen(*ptr));
+               pfree(*ptr);
+               ptr++;
+           }
+           pfree(norms);
+           break; /* lexem already normalized or is stop word*/
+       }
+   }
+
+   FunctionCall1(
+       &(prsobj->end_info),
+       PointerGetDatum(prsobj->prs)
+   );
+}
+
+text* 
+genhl(HLPRSTEXT * prs) {
+   text *out;
+   int len=128;
+   char *ptr;
+   HLWORD  *wrd=prs->words;
+
+   out = (text*)palloc( len );
+   ptr=((char*)out) + VARHDRSZ;
+
+   while( wrd - prs->words < prs->curwords ) {
+       while (  wrd->len + prs->stopsellen + prs->startsellen + (ptr - ((char*)out)) >= len ) {
+           int dist = ptr - ((char*)out);
+           len*= 2;
+           out = (text *) repalloc(out, len);
+           ptr=((char*)out) + dist;
+       }
+
+       if ( wrd->in && !wrd->skip && !wrd->repeated ) {
+           if ( wrd->replace ) {
+               *ptr=' ';
+               ptr++;
+           } else {
+               if (wrd->selected) {
+                   memcpy(ptr,prs->startsel,prs->startsellen);
+                   ptr+=prs->startsellen;
+               }
+               memcpy(ptr,wrd->word,wrd->len);
+               ptr+=wrd->len;
+               if (wrd->selected) {
+                   memcpy(ptr,prs->stopsel,prs->stopsellen);
+                   ptr+=prs->stopsellen;
+               }
+           }
+       }
+
+       if ( !wrd->repeated )
+           pfree(wrd->word);
+
+       wrd++;
+   }
+
+   VARATT_SIZEP(out)=ptr - ((char*)out);
+   return out; 
+}
+
+int  
+get_currcfg(void) {
+   Oid arg[1]={ TEXTOID };
+   const char *curlocale;
+   Datum pars[1];
+   bool isnull;
+   int stat;
+
+   if ( current_cfg_id > 0 )
+       return current_cfg_id;
+
+   SPI_connect();
+   if ( !plan_getcfg_bylocale ) {
+       plan_getcfg_bylocale=SPI_saveplan( SPI_prepare( "select oid from pg_ts_cfg where locale = $1 ", 1, arg ) );
+       if ( !plan_getcfg_bylocale )
+           elog(ERROR, "SPI_prepare() failed");
+   }
+
+   curlocale = setlocale(LC_CTYPE, NULL);
+   pars[0] = PointerGetDatum( char2text((char*)curlocale) );
+   stat = SPI_execp(plan_getcfg_bylocale, pars, " ", 1);
+
+   if ( stat < 0 )
+       elog (ERROR, "SPI_execp return %d", stat);
+   if ( SPI_processed > 0 )
+       current_cfg_id = DatumGetObjectId( SPI_getbinval(SPI_tuptable->vals[0], SPI_tuptable->tupdesc, 1, &isnull) );
+   else 
+       elog(ERROR,"Can't find tsearch config by locale");
+
+   pfree(DatumGetPointer(pars[0]));
+   SPI_finish();
+   return current_cfg_id;
+}
+
+PG_FUNCTION_INFO_V1(set_curcfg);
+Datum set_curcfg(PG_FUNCTION_ARGS);
+Datum
+set_curcfg(PG_FUNCTION_ARGS) {
+        findcfg(PG_GETARG_OID(0));
+        current_cfg_id=PG_GETARG_OID(0);
+        PG_RETURN_VOID();
+}
+                
+PG_FUNCTION_INFO_V1(set_curcfg_byname);
+Datum set_curcfg_byname(PG_FUNCTION_ARGS);
+Datum
+set_curcfg_byname(PG_FUNCTION_ARGS) {
+        text *name=PG_GETARG_TEXT_P(0);
+   
+        DirectFunctionCall1(
+                set_curcfg,
+                ObjectIdGetDatum( name2id_cfg(name) )
+        );
+        PG_FREE_IF_COPY(name, 0);
+        PG_RETURN_VOID();      
+}       
+
+PG_FUNCTION_INFO_V1(show_curcfg);
+Datum show_curcfg(PG_FUNCTION_ARGS);
+Datum
+show_curcfg(PG_FUNCTION_ARGS) {
+   PG_RETURN_OID( get_currcfg() ); 
+}
+
+PG_FUNCTION_INFO_V1(reset_tsearch);
+Datum reset_tsearch(PG_FUNCTION_ARGS);
+Datum
+reset_tsearch(PG_FUNCTION_ARGS) {
+   ts_error(NOTICE,"TSearch cache cleaned");
+   PG_RETURN_VOID(); 
+}


diff --git a/contrib/tsearch2/ts_cfg.h b/contrib/tsearch2/ts_cfg.h

new file mode 100644 (file)

index 0000000..01006c1


--- /dev/null
+++ b/contrib/tsearch2/ts_cfg.h
@@ -0,0 +1,68 @@
+#ifndef __TS_CFG_H__
+#define __TS_CFG_H__
+#include "postgres.h"
+#include "query.h"
+
+typedef struct {
+   int len;
+   Datum   *dict_id;
+} ListDictionary;
+
+typedef struct {
+   Oid id;
+   Oid prs_id;
+   int len;
+   ListDictionary  *map;   
+}  TSCfgInfo;
+
+Oid name2id_cfg(text *name);
+TSCfgInfo * findcfg(Oid id);
+void init_cfg(Oid id, TSCfgInfo *cfg);
+void reset_cfg(void);
+
+typedef struct {
+        uint16          len;
+   union {
+       uint16      pos;
+       uint16      *apos;
+   } pos;
+        char       *word;
+   uint32  alen;
+}       WORD;
+   
+typedef struct {
+        WORD       *words;
+        int4            lenwords;
+        int4            curwords;
+   int4        pos;
+}       PRSTEXT;
+
+typedef struct {
+        uint16    len;
+   uint8    selected:1,
+         in:1,
+         skip:1,
+         replace:1,
+         repeated:1;
+   uint8   type;
+        char      *word;
+   ITEM      *item;
+}       HLWORD;
+   
+typedef struct {
+        HLWORD       *words;
+        int4            lenwords;
+        int4            curwords;
+        char           *startsel;
+        char            *stopsel;
+        int2            startsellen;
+        int2            stopsellen;
+}       HLPRSTEXT;
+
+void hlparsetext(TSCfgInfo *cfg, HLPRSTEXT * prs, QUERYTYPE *query, char *buf, int4 buflen);
+text* genhl(HLPRSTEXT * prs);
+
+void parsetext_v2(TSCfgInfo *cfg, PRSTEXT * prs, char *buf, int4 buflen);
+int  get_currcfg(void);
+
+#endif


diff --git a/contrib/tsearch2/ts_stat.c b/contrib/tsearch2/ts_stat.c

new file mode 100644 (file)

index 0000000..9099981


--- /dev/null
+++ b/contrib/tsearch2/ts_stat.c
@@ -0,0 +1,412 @@
+/*
+ * stat functions
+ */
+
+#include "tsvector.h"
+#include "ts_stat.h"
+#include "funcapi.h"
+#include "catalog/pg_type.h"
+#include "executor/spi.h"
+#include "common.h"
+
+PG_FUNCTION_INFO_V1(tsstat_in);
+Datum           tsstat_in(PG_FUNCTION_ARGS);
+Datum           
+tsstat_in(PG_FUNCTION_ARGS) {
+   tsstat *stat=palloc(STATHDRSIZE);
+   stat->len=STATHDRSIZE;
+   stat->size=0;
+   PG_RETURN_POINTER(stat);
+}
+
+PG_FUNCTION_INFO_V1(tsstat_out);
+Datum           tsstat_out(PG_FUNCTION_ARGS);
+Datum           
+tsstat_out(PG_FUNCTION_ARGS) {
+   elog(ERROR,"Unimplemented");
+   PG_RETURN_NULL();
+}
+
+static WordEntry**
+SEI_realloc( WordEntry** in, uint32 *len ) {
+   if ( *len==0 || in==NULL ) {
+       *len=8;
+       in=palloc( sizeof(WordEntry*)* (*len) );
+   } else {
+       *len *= 2;
+       in=repalloc( in, sizeof(WordEntry*)* (*len) );
+   }
+   return in;
+}
+
+static int
+compareStatWord(StatEntry *a, WordEntry *b, tsstat *stat, tsvector *txt) {
+   if ( a->len == b->len ) 
+       return strncmp(
+           STATSTRPTR(stat) + a->pos,
+           STRPTR(txt) + b->pos,
+           a->len
+       );
+   return ( a->len > b->len ) ? 1 : -1;
+}
+
+static tsstat*
+formstat(tsstat *stat, tsvector *txt, WordEntry** entry, uint32 len) {
+   tsstat  *newstat;
+   uint32 totallen, nentry;
+   uint32  slen=0;
+   WordEntry   **ptr=entry;
+   char    *curptr;
+   StatEntry   *sptr,*nptr;
+
+   while(ptr-entry
+       slen += (*ptr)->len;
+       ptr++;
+   }
+
+   nentry=stat->size + len;
+   slen+=STATSTRSIZE(stat);
+   totallen=CALCSTATSIZE(nentry,slen);
+   newstat=palloc(totallen);
+   newstat->len=totallen;
+   newstat->size=nentry;
+
+   memcpy(STATSTRPTR(newstat), STATSTRPTR(stat), STATSTRSIZE(stat));
+   curptr=STATSTRPTR(newstat) + STATSTRSIZE(stat);
+
+   ptr=entry;
+   sptr=STATPTR(stat);
+   nptr=STATPTR(newstat);
+
+   if ( len == 1 ) {
+       StatEntry *StopLow = STATPTR(stat);
+       StatEntry *StopHigh = (StatEntry*)STATSTRPTR(stat);
+
+       while (StopLow < StopHigh) {
+           sptr=StopLow + (StopHigh - StopLow) / 2;
+           if ( compareStatWord(sptr,*ptr,stat,txt) < 0 )
+               StopLow = sptr + 1;
+           else
+               StopHigh = sptr; 
+       }
+       nptr =STATPTR(newstat) + (StopLow-STATPTR(stat));
+       memcpy( STATPTR(newstat), STATPTR(stat), sizeof(StatEntry) * (StopLow-STATPTR(stat)) );
+       nptr->nentry=POSDATALEN(txt,*ptr);
+       if ( nptr->nentry==0 )
+               nptr->nentry=1; 
+       nptr->ndoc=1;
+       nptr->len=(*ptr)->len;
+       memcpy(curptr, STRPTR(txt) + (*ptr)->pos, nptr->len);
+       nptr->pos = curptr - STATSTRPTR(newstat);
+       memcpy( nptr+1, StopLow, sizeof(StatEntry) * ( ((StatEntry*)STATSTRPTR(stat))-StopLow ) );
+   } else {
+       while( sptr-STATPTR(stat) < stat->size && ptr-entry
+           if ( compareStatWord(sptr,*ptr,stat,txt) < 0 ) {
+               memcpy(nptr, sptr, sizeof(StatEntry));
+               sptr++;
+           } else {
+               nptr->nentry=POSDATALEN(txt,*ptr);
+               if ( nptr->nentry==0 )
+                   nptr->nentry=1; 
+               nptr->ndoc=1;
+               nptr->len=(*ptr)->len;
+               memcpy(curptr, STRPTR(txt) + (*ptr)->pos, nptr->len);
+               nptr->pos = curptr - STATSTRPTR(newstat);
+               curptr += nptr->len;
+               ptr++;
+           }
+           nptr++;
+       }
+
+       memcpy( nptr, sptr, sizeof(StatEntry)*( stat->size - (sptr-STATPTR(stat)) ) ); 
+       
+       while(ptr-entry
+           nptr->nentry=POSDATALEN(txt,*ptr);
+           if ( nptr->nentry==0 )
+               nptr->nentry=1; 
+           nptr->ndoc=1;
+           nptr->len=(*ptr)->len;
+           memcpy(curptr, STRPTR(txt) + (*ptr)->pos, nptr->len);
+           nptr->pos = curptr - STATSTRPTR(newstat);
+           curptr += nptr->len;
+           ptr++; nptr++;
+       }
+   }
+
+   return newstat;
+} 
+
+PG_FUNCTION_INFO_V1(ts_accum);
+Datum           ts_accum(PG_FUNCTION_ARGS);
+Datum 
+ts_accum(PG_FUNCTION_ARGS) {
+   tsstat *newstat,*stat= (tsstat*)PG_GETARG_POINTER(0);
+   tsvector  *txt = (tsvector *) PG_DETOAST_DATUM(PG_GETARG_DATUM(1));
+   WordEntry   **newentry=NULL;
+   uint32  len=0, cur=0;
+   StatEntry   *sptr;
+   WordEntry   *wptr;
+
+   if ( stat==NULL || PG_ARGISNULL(0) ) { /* Init in first */ 
+       stat=palloc(STATHDRSIZE);
+       stat->len=STATHDRSIZE;
+       stat->size=0;
+   }
+
+   /* simple check of correctness */
+   if ( txt==NULL || PG_ARGISNULL(1) || txt->size==0 ) {
+       PG_FREE_IF_COPY(txt,1); 
+       PG_RETURN_POINTER(stat);
+   }
+
+   sptr=STATPTR(stat);
+   wptr=ARRPTR(txt);
+
+   if ( stat->size < 100*txt->size ) { /* merge */
+       while( sptr-STATPTR(stat) < stat->size && wptr-ARRPTR(txt) < txt->size ) {
+           int cmp = compareStatWord(sptr,wptr,stat,txt);
+           if ( cmp<0 ) {
+               sptr++;
+           } else if ( cmp==0 ) {
+               int n=POSDATALEN(txt,wptr);
+   
+               if (n==0) n=1;
+               sptr->ndoc++;
+               sptr->nentry +=n ;
+               sptr++; wptr++;
+           } else {
+               if ( cur==len )
+                   newentry=SEI_realloc(newentry, &len);
+               newentry[cur]=wptr;
+               wptr++; cur++;
+           }
+       }
+
+       while( wptr-ARRPTR(txt) < txt->size ) {
+           if ( cur==len )
+               newentry=SEI_realloc(newentry, &len);
+           newentry[cur]=wptr;
+           wptr++; cur++;
+       }
+   } else { /* search */
+       while( wptr-ARRPTR(txt) < txt->size ) {
+           StatEntry *StopLow = STATPTR(stat);
+           StatEntry *StopHigh = (StatEntry*)STATSTRPTR(stat);
+           int cmp;
+
+           while (StopLow < StopHigh) {
+               sptr=StopLow + (StopHigh - StopLow) / 2;
+               cmp =  compareStatWord(sptr,wptr,stat,txt);
+               if (cmp==0) {
+                   int n=POSDATALEN(txt,wptr);
+                   if (n==0) n=1;
+                   sptr->ndoc++;
+                   sptr->nentry +=n ;
+                   break;
+               } else if ( cmp < 0 )
+                   StopLow = sptr + 1;
+               else
+                   StopHigh = sptr; 
+           }
+       
+           if ( StopLow >= StopHigh ) { /* not found */
+               if ( cur==len )
+                   newentry=SEI_realloc(newentry, &len);
+               newentry[cur]=wptr;
+               cur++;
+           }
+           wptr++;
+       }   
+   }
+
+   
+   if ( cur==0 ) { /* no new words */ 
+       PG_FREE_IF_COPY(txt,1);
+       PG_RETURN_POINTER(stat);
+   }
+
+   newstat = formstat(stat, txt, newentry, cur);
+   pfree(newentry);
+   PG_FREE_IF_COPY(txt,1);
+   /* pfree(stat); */
+
+   PG_RETURN_POINTER(newstat);
+}
+
+typedef struct {
+   uint32  cur;
+   tsvector *stat;
+} StatStorage;
+
+static void
+ts_setup_firstcall(FuncCallContext  *funcctx, tsstat *stat) {
+   TupleDesc            tupdesc;
+   MemoryContext     oldcontext;
+   StatStorage     *st;
+   
+   oldcontext = MemoryContextSwitchTo(funcctx->multi_call_memory_ctx);
+   st=palloc( sizeof(StatStorage) );
+   st->cur=0;
+   st->stat=palloc( stat->len );
+   memcpy(st->stat, stat, stat->len);
+   funcctx->user_fctx = (void*)st;
+   tupdesc = RelationNameGetTupleDesc("statinfo");
+   funcctx->slot = TupleDescGetSlot(tupdesc);
+   funcctx->attinmeta = TupleDescGetAttInMetadata(tupdesc);
+   MemoryContextSwitchTo(oldcontext);
+}
+
+
+static Datum
+ts_process_call(FuncCallContext  *funcctx) {
+   StatStorage     *st;
+   st=(StatStorage*)funcctx->user_fctx;
+
+   if ( st->cur < st->stat->size ) {
+       Datum result;
+       char* values[3];
+       char    ndoc[16];
+       char    nentry[16];
+       StatEntry *entry=STATPTR(st->stat) + st->cur;
+       HeapTuple    tuple;
+
+       values[1]=ndoc;
+       sprintf(ndoc,"%d",entry->ndoc);
+       values[2]=nentry;
+       sprintf(nentry,"%d",entry->nentry);
+       values[0]=palloc( entry->len+1 );
+       memcpy( values[0], STATSTRPTR(st->stat)+entry->pos, entry->len);
+       (values[0])[entry->len]='\0';
+
+       tuple = BuildTupleFromCStrings(funcctx->attinmeta, values);
+       result = TupleGetDatum(funcctx->slot, tuple);
+
+       pfree(values[0]);
+       st->cur++;
+       return result;  
+   } else {
+       pfree(st->stat);
+       pfree(st);
+   }
+   
+   return (Datum)0;
+}
+
+PG_FUNCTION_INFO_V1(ts_accum_finish);
+Datum           ts_accum_finish(PG_FUNCTION_ARGS);
+Datum 
+ts_accum_finish(PG_FUNCTION_ARGS) {
+   FuncCallContext  *funcctx;
+   Datum result;
+
+   if (SRF_IS_FIRSTCALL()) {
+       funcctx = SRF_FIRSTCALL_INIT();
+       ts_setup_firstcall(funcctx, (tsstat*)PG_GETARG_POINTER(0) );
+   }
+
+   funcctx = SRF_PERCALL_SETUP();
+   if (  (result=ts_process_call(funcctx)) != (Datum)0 )
+       SRF_RETURN_NEXT(funcctx, result);
+   SRF_RETURN_DONE(funcctx);
+}
+
+static Oid tiOid=InvalidOid;
+static void 
+get_ti_Oid(void) {
+   int ret;
+   bool isnull; 
+
+   if ( (ret = SPI_exec("select oid from pg_type where typname='tsvector'",1)) < 0 )   
+       elog(ERROR, "SPI_exec to get tsvector oid returns %d", ret);
+
+   if ( SPI_processed<0 )
+       elog(ERROR, "There is no tsvector type");
+   tiOid = DatumGetObjectId( SPI_getbinval(SPI_tuptable->vals[0], SPI_tuptable->tupdesc, 1, &isnull) );
+   if ( tiOid==InvalidOid )
+       elog(ERROR, "tsvector type has InvalidOid");
+}
+
+static tsstat*
+ts_stat_sql(text *txt) {
+   char *query=text2char(txt);
+   int i;
+   tsstat *newstat,*stat;
+   bool isnull;
+   Portal portal;
+   void    *plan;
+
+   if ( tiOid==InvalidOid ) 
+       get_ti_Oid();
+
+   if ( (plan = SPI_prepare(query,0,NULL))==NULL )
+       elog(ERROR, "SPI_prepare('%s') returns NULL",query);
+
+   if ( (portal = SPI_cursor_open(NULL, plan, NULL, NULL)) == NULL )
+       elog(ERROR, "SPI_cursor_open('%s') returns NULL",query);
+
+   SPI_cursor_fetch(portal, true, 100);
+
+   if ( SPI_tuptable->tupdesc->natts != 1 )
+       elog(ERROR, "Number of fields doesn't equal to 1");
+
+   if ( SPI_gettypeid(SPI_tuptable->tupdesc, 1) != tiOid )
+       elog(ERROR, "Column isn't of tsvector type");
+
+   stat=palloc(STATHDRSIZE);
+   stat->len=STATHDRSIZE;
+   stat->size=0;
+
+   while(SPI_processed>0) {
+       for(i=0;i
+           Datum data=SPI_getbinval(SPI_tuptable->vals[i], SPI_tuptable->tupdesc, 1, &isnull);
+
+           if ( !isnull ) {
+               newstat = (tsstat*)DatumGetPointer(DirectFunctionCall2(
+                   ts_accum,
+                   PointerGetDatum(stat),
+                   data
+               ));
+               if ( stat!=newstat && stat )
+                   pfree(stat);
+               stat=newstat;
+           }
+       } 
+
+       SPI_freetuptable(SPI_tuptable);
+       SPI_cursor_fetch(portal, true, 100);        
+   }   
+
+   SPI_freetuptable(SPI_tuptable);
+   SPI_cursor_close(portal);
+   SPI_freeplan(plan);
+   pfree(query);
+
+   return stat;    
+}
+
+PG_FUNCTION_INFO_V1(ts_stat);
+Datum           ts_stat(PG_FUNCTION_ARGS);
+Datum 
+ts_stat(PG_FUNCTION_ARGS) {
+   FuncCallContext  *funcctx;
+   Datum result;
+
+   if (SRF_IS_FIRSTCALL()) {
+       tsstat *stat;
+       text    *txt=PG_GETARG_TEXT_P(0);
+   
+       funcctx = SRF_FIRSTCALL_INIT();
+       SPI_connect();
+       stat = ts_stat_sql(txt);
+       PG_FREE_IF_COPY(txt,0); 
+       ts_setup_firstcall(funcctx, stat );
+       SPI_finish();
+   }
+
+   funcctx = SRF_PERCALL_SETUP();
+   if (  (result=ts_process_call(funcctx)) != (Datum)0 )
+       SRF_RETURN_NEXT(funcctx, result);
+   SRF_RETURN_DONE(funcctx);
+}
+
+


diff --git a/contrib/tsearch2/ts_stat.h b/contrib/tsearch2/ts_stat.h

new file mode 100644 (file)

index 0000000..c32b17a


--- /dev/null
+++ b/contrib/tsearch2/ts_stat.h
@@ -0,0 +1,32 @@
+#ifndef __TXTIDX_STAT_H__
+#define __TXTIDX_STAT_H__
+
+#include "postgres.h"
+
+#include "access/gist.h"
+#include "access/itup.h"
+#include "utils/elog.h"
+#include "utils/palloc.h"
+#include "utils/builtins.h"
+#include "storage/bufpage.h"
+
+typedef struct {
+   uint32  len;
+   uint32  pos;
+   uint32  ndoc;   
+   uint32  nentry; 
+}  StatEntry;
+
+typedef struct {
+   int4        len;
+   int4        size;
+   char        data[1];
+}  tsstat;
+
+#define STATHDRSIZE (sizeof(int4)*2)
+#define CALCSTATSIZE(x, lenstr) ( x * sizeof(StatEntry) + STATHDRSIZE + lenstr )
+#define STATPTR(x) ( (StatEntry*) ( (char*)x + STATHDRSIZE ) )
+#define STATSTRPTR(x)  ( (char*)x + STATHDRSIZE + ( sizeof(StatEntry) * ((tsvector*)x)->size ) )
+#define STATSTRSIZE(x) ( ((tsvector*)x)->len - STATHDRSIZE - ( sizeof(StatEntry) * ((tsvector*)x)->size ) )
+
+#endif


diff --git a/contrib/tsearch2/tsearch.sql._in b/contrib/tsearch2/tsearch.sql._in

new file mode 100644 (file)

index 0000000..91ffbc8


--- /dev/null
+++ b/contrib/tsearch2/tsearch.sql._in
@@ -0,0 +1,674 @@
+-- Adjust this setting to control where the objects get CREATEd.
+SET search_path = public;
+
+BEGIN;
+
+--dict conf
+CREATE TABLE pg_ts_dict (
+   dict_name   text not null primary key,
+   dict_init   oid,
+   dict_initoption text,
+   dict_lexize oid not null,
+   dict_comment    text
+) with oids;
+
+--dict interface
+CREATE FUNCTION lexize(oid, text) 
+   returns _text
+   as 'MODULE_PATHNAME'
+   language 'C'
+   with (isstrict);
+
+CREATE FUNCTION lexize(text, text)
+        returns _text
+        as 'MODULE_PATHNAME', 'lexize_byname'
+        language 'C'
+        with (isstrict);
+
+CREATE FUNCTION lexize(text)
+        returns _text
+        as 'MODULE_PATHNAME', 'lexize_bycurrent'
+        language 'C'
+        with (isstrict);
+
+CREATE FUNCTION set_curdict(int)
+   returns void
+   as 'MODULE_PATHNAME'
+   language 'C'
+   with (isstrict);
+
+CREATE FUNCTION set_curdict(text)
+   returns void
+   as 'MODULE_PATHNAME', 'set_curdict_byname'
+   language 'C'
+   with (isstrict);
+
+--built-in dictionaries
+CREATE FUNCTION dex_init(text)
+   returns internal
+   as 'MODULE_PATHNAME' 
+   language 'C';
+
+CREATE FUNCTION dex_lexize(internal,internal,int4)
+   returns internal
+   as 'MODULE_PATHNAME'
+   language 'C'
+   with (isstrict);
+
+insert into pg_ts_dict select 
+   'simple', 
+   (select oid from pg_proc where proname='dex_init'),
+   null,
+   (select oid from pg_proc where proname='dex_lexize'),
+   'Simple example of dictionary.'
+;
+    
+CREATE FUNCTION snb_en_init(text)
+   returns internal
+   as 'MODULE_PATHNAME' 
+   language 'C';
+
+CREATE FUNCTION snb_lexize(internal,internal,int4)
+   returns internal
+   as 'MODULE_PATHNAME'
+   language 'C'
+   with (isstrict);
+
+insert into pg_ts_dict select 
+   'en_stem', 
+   (select oid from pg_proc where proname='snb_en_init'),
+   'DATA_PATH/english.stop',
+   (select oid from pg_proc where proname='snb_lexize'),
+   'English Stemmer. Snowball.'
+;
+
+CREATE FUNCTION snb_ru_init(text)
+   returns internal
+   as 'MODULE_PATHNAME' 
+   language 'C';
+
+insert into pg_ts_dict select 
+   'ru_stem', 
+   (select oid from pg_proc where proname='snb_ru_init'),
+   'DATA_PATH/russian.stop',
+   (select oid from pg_proc where proname='snb_lexize'),
+   'Russian Stemmer. Snowball.'
+;
+    
+CREATE FUNCTION spell_init(text)
+   returns internal
+   as 'MODULE_PATHNAME' 
+   language 'C';
+
+CREATE FUNCTION spell_lexize(internal,internal,int4)
+   returns internal
+   as 'MODULE_PATHNAME'
+   language 'C'
+   with (isstrict);
+
+insert into pg_ts_dict select 
+   'ispell_template', 
+   (select oid from pg_proc where proname='spell_init'),
+   null,
+   (select oid from pg_proc where proname='spell_lexize'),
+   'ISpell interface. Must have .dict and .aff files'
+;
+
+CREATE FUNCTION syn_init(text)
+   returns internal
+   as 'MODULE_PATHNAME' 
+   language 'C';
+
+CREATE FUNCTION syn_lexize(internal,internal,int4)
+   returns internal
+   as 'MODULE_PATHNAME'
+   language 'C'
+   with (isstrict);
+
+insert into pg_ts_dict select 
+   'synonym', 
+   (select oid from pg_proc where proname='syn_init'),
+   null,
+   (select oid from pg_proc where proname='syn_lexize'),
+   'Example of synonym dictionary'
+;
+
+--dict conf
+CREATE TABLE pg_ts_parser (
+   prs_name    text not null primary key,
+   prs_start   oid not null,
+   prs_nexttoken   oid not null,
+   prs_end     oid not null,
+   prs_headline    oid not null,
+   prs_lextype oid not null,
+   prs_comment text
+) with oids;
+
+--sql-level interface
+CREATE TYPE tokentype 
+   as (tokid int4, alias text, descr text); 
+
+CREATE FUNCTION token_type(int4)
+   returns setof tokentype
+   as 'MODULE_PATHNAME'
+   language 'C'
+   with (isstrict);
+
+CREATE FUNCTION token_type(text)
+   returns setof tokentype
+   as 'MODULE_PATHNAME', 'token_type_byname'
+   language 'C'
+   with (isstrict);
+
+CREATE FUNCTION token_type()
+   returns setof tokentype
+   as 'MODULE_PATHNAME', 'token_type_current'
+   language 'C'
+   with (isstrict);
+
+CREATE FUNCTION set_curprs(int)
+   returns void
+   as 'MODULE_PATHNAME'
+   language 'C'
+   with (isstrict);
+
+CREATE FUNCTION set_curprs(text)
+   returns void
+   as 'MODULE_PATHNAME', 'set_curprs_byname'
+   language 'C'
+   with (isstrict);
+
+CREATE TYPE tokenout 
+   as (tokid int4, token text);
+
+CREATE FUNCTION parse(oid,text)
+   returns setof tokenout
+   as 'MODULE_PATHNAME'
+   language 'C'
+   with (isstrict);
+ 
+CREATE FUNCTION parse(text,text)
+   returns setof tokenout
+   as 'MODULE_PATHNAME', 'parse_byname'
+   language 'C'
+   with (isstrict);
+ 
+CREATE FUNCTION parse(text)
+   returns setof tokenout
+   as 'MODULE_PATHNAME', 'parse_current'
+   language 'C'
+   with (isstrict);
+ 
+--default parser
+CREATE FUNCTION prsd_start(internal,int4)
+   returns internal
+   as 'MODULE_PATHNAME'
+   language 'C';
+
+CREATE FUNCTION prsd_getlexeme(internal,internal,internal)
+   returns int4
+   as 'MODULE_PATHNAME'
+   language 'C';
+
+CREATE FUNCTION prsd_end(internal)
+   returns void
+   as 'MODULE_PATHNAME'
+   language 'C';
+
+CREATE FUNCTION prsd_lextype(internal)
+   returns internal
+   as 'MODULE_PATHNAME'
+   language 'C';
+
+CREATE FUNCTION prsd_headline(internal,internal,internal)
+   returns internal
+   as 'MODULE_PATHNAME'
+   language 'C';
+
+insert into pg_ts_parser select
+   'default',
+   (select oid from pg_proc where proname='prsd_start'),   
+   (select oid from pg_proc where proname='prsd_getlexeme'),   
+   (select oid from pg_proc where proname='prsd_end'), 
+   (select oid from pg_proc where proname='prsd_headline'),
+   (select oid from pg_proc where proname='prsd_lextype'),
+   'Parser from OpenFTS v0.34'
+;  
+
+--tsearch config
+
+CREATE TABLE pg_ts_cfg (
+   ts_name     text not null primary key,
+   prs_name    text not null,
+   locale      text
+) with oids;
+
+CREATE TABLE pg_ts_cfgmap (
+   ts_name     text not null,
+   tok_alias   text not null,
+   dict_name   text[],
+   primary key (ts_name,tok_alias)
+) with oids;
+
+CREATE FUNCTION set_curcfg(int)
+   returns void
+   as 'MODULE_PATHNAME'
+   language 'C'
+   with (isstrict);
+
+CREATE FUNCTION set_curcfg(text)
+   returns void
+   as 'MODULE_PATHNAME', 'set_curcfg_byname'
+   language 'C'
+   with (isstrict);
+
+CREATE FUNCTION show_curcfg()
+   returns oid
+   as 'MODULE_PATHNAME'
+   language 'C'
+   with (isstrict);
+
+insert into pg_ts_cfg values ('default', 'default','C');
+insert into pg_ts_cfg values ('default_russian', 'default','ru_RU.KOI8-R');
+insert into pg_ts_cfg values ('simple', 'default');
+
+copy pg_ts_cfgmap from stdin;
+default    lword   {en_stem}
+default    nlword  {simple}
+default    word    {simple}
+default    email   {simple}
+default    url {simple}
+default    host    {simple}
+default    sfloat  {simple}
+default    version {simple}
+default    part_hword  {simple}
+default    nlpart_hword    {simple}
+default    lpart_hword {en_stem}
+default    hword   {simple}
+default    lhword  {en_stem}
+default    nlhword {simple}
+default    uri {simple}
+default    file    {simple}
+default    float   {simple}
+default    int {simple}
+default    uint    {simple}
+default_russian    lword   {en_stem}
+default_russian    nlword  {ru_stem}
+default_russian    word    {ru_stem}
+default_russian    email   {simple}
+default_russian    url {simple}
+default_russian    host    {simple}
+default_russian    sfloat  {simple}
+default_russian    version {simple}
+default_russian    part_hword  {simple}
+default_russian    nlpart_hword    {ru_stem}
+default_russian    lpart_hword {en_stem}
+default_russian    hword   {ru_stem}
+default_russian    lhword  {en_stem}
+default_russian    nlhword {ru_stem}
+default_russian    uri {simple}
+default_russian    file    {simple}
+default_russian    float   {simple}
+default_russian    int {simple}
+default_russian    uint    {simple}
+simple lword   {simple}
+simple nlword  {simple}
+simple word    {simple}
+simple email   {simple}
+simple url {simple}
+simple host    {simple}
+simple sfloat  {simple}
+simple version {simple}
+simple part_hword  {simple}
+simple nlpart_hword    {simple}
+simple lpart_hword {simple}
+simple hword   {simple}
+simple lhword  {simple}
+simple nlhword {simple}
+simple uri {simple}
+simple file    {simple}
+simple float   {simple}
+simple int {simple}
+simple uint    {simple}
+\.
+
+--tsvector type
+CREATE FUNCTION tsvector_in(cstring)
+RETURNS tsvector
+AS 'MODULE_PATHNAME'
+LANGUAGE 'C' with (isstrict);
+
+CREATE FUNCTION tsvector_out(tsvector)
+RETURNS cstring
+AS 'MODULE_PATHNAME'
+LANGUAGE 'C' with (isstrict);
+
+CREATE TYPE tsvector (
+        INTERNALLENGTH = -1,
+        INPUT = tsvector_in,
+        OUTPUT = tsvector_out,
+        STORAGE = extended
+);
+
+CREATE FUNCTION length(tsvector)
+RETURNS int4
+AS 'MODULE_PATHNAME', 'tsvector_length'
+LANGUAGE 'C' with (isstrict,iscachable);
+
+CREATE FUNCTION to_tsvector(oid, text)
+RETURNS tsvector
+AS 'MODULE_PATHNAME'
+LANGUAGE 'C' with (isstrict,iscachable);
+
+CREATE FUNCTION to_tsvector(text, text)
+RETURNS tsvector
+AS 'MODULE_PATHNAME', 'to_tsvector_name'
+LANGUAGE 'C' with (isstrict,iscachable);
+
+CREATE FUNCTION to_tsvector(text)
+RETURNS tsvector
+AS 'MODULE_PATHNAME', 'to_tsvector_current'
+LANGUAGE 'C' with (isstrict,iscachable);
+
+CREATE FUNCTION strip(tsvector)
+RETURNS tsvector
+AS 'MODULE_PATHNAME'
+LANGUAGE 'C' with (isstrict,iscachable);
+
+CREATE FUNCTION setweight(tsvector,"char")
+RETURNS tsvector
+AS 'MODULE_PATHNAME'
+LANGUAGE 'C' with (isstrict,iscachable);
+
+CREATE FUNCTION concat(tsvector,tsvector)
+RETURNS tsvector
+AS 'MODULE_PATHNAME'
+LANGUAGE 'C' with (isstrict,iscachable);
+
+CREATE OPERATOR || (
+        LEFTARG = tsvector,
+        RIGHTARG = tsvector,
+        PROCEDURE = concat
+);
+
+--query type
+CREATE FUNCTION tsquery_in(cstring)
+RETURNS tsquery
+AS 'MODULE_PATHNAME'
+LANGUAGE 'C' with (isstrict);
+
+CREATE FUNCTION tsquery_out(tsquery)
+RETURNS cstring
+AS 'MODULE_PATHNAME'
+LANGUAGE 'C' with (isstrict);
+
+CREATE TYPE tsquery (
+        INTERNALLENGTH = -1,
+        INPUT = tsquery_in,
+        OUTPUT = tsquery_out
+);
+
+CREATE FUNCTION querytree(tsquery)
+RETURNS text
+AS 'MODULE_PATHNAME', 'tsquerytree'
+LANGUAGE 'C' with (isstrict);
+
+CREATE FUNCTION to_tsquery(oid, text)
+RETURNS tsquery
+AS 'MODULE_PATHNAME'
+LANGUAGE 'c' with (isstrict,iscachable);
+
+CREATE FUNCTION to_tsquery(text, text)
+RETURNS tsquery
+AS 'MODULE_PATHNAME','to_tsquery_name'
+LANGUAGE 'c' with (isstrict,iscachable);
+
+CREATE FUNCTION to_tsquery(text)
+RETURNS tsquery
+AS 'MODULE_PATHNAME','to_tsquery_current'
+LANGUAGE 'c' with (isstrict,iscachable);
+
+--operations
+CREATE FUNCTION exectsq(tsvector, tsquery)
+RETURNS bool
+AS 'MODULE_PATHNAME'
+LANGUAGE 'C' with (isstrict, iscachable);
+  
+COMMENT ON FUNCTION exectsq(tsvector, tsquery) IS 'boolean operation with text index';
+
+CREATE FUNCTION rexectsq(tsquery, tsvector)
+RETURNS bool
+AS 'MODULE_PATHNAME'
+LANGUAGE 'C' with (isstrict, iscachable);
+
+COMMENT ON FUNCTION rexectsq(tsquery, tsvector) IS 'boolean operation with text index';
+
+CREATE OPERATOR @@ (
+        LEFTARG = tsvector,
+        RIGHTARG = tsquery,
+        PROCEDURE = exectsq,
+        COMMUTATOR = '@@',
+        RESTRICT = contsel,
+        JOIN = contjoinsel
+);
+CREATE OPERATOR @@ (
+        LEFTARG = tsquery,
+        RIGHTARG = tsvector,
+        PROCEDURE = rexectsq,
+        COMMUTATOR = '@@',
+        RESTRICT = contsel,
+        JOIN = contjoinsel
+);
+
+--Trigger
+CREATE FUNCTION tsearch2()
+RETURNS trigger
+AS 'MODULE_PATHNAME'
+LANGUAGE 'C';
+
+--Relevation
+CREATE FUNCTION rank(float4[], tsvector, tsquery)
+RETURNS float4
+AS 'MODULE_PATHNAME'
+LANGUAGE 'C' WITH (isstrict, iscachable);
+
+CREATE FUNCTION rank(float4[], tsvector, tsquery, int4)
+RETURNS float4
+AS 'MODULE_PATHNAME'
+LANGUAGE 'C' WITH (isstrict, iscachable);
+
+CREATE FUNCTION rank(tsvector, tsquery)
+RETURNS float4
+AS 'MODULE_PATHNAME', 'rank_def'
+LANGUAGE 'C' WITH (isstrict, iscachable);
+
+CREATE FUNCTION rank(tsvector, tsquery, int4)
+RETURNS float4
+AS 'MODULE_PATHNAME', 'rank_def'
+LANGUAGE 'C' WITH (isstrict, iscachable);
+
+CREATE FUNCTION rank_cd(int4, tsvector, tsquery)
+RETURNS float4
+AS 'MODULE_PATHNAME'
+LANGUAGE 'C' WITH (isstrict, iscachable);
+
+CREATE FUNCTION rank_cd(int4, tsvector, tsquery, int4)
+RETURNS float4
+AS 'MODULE_PATHNAME'
+LANGUAGE 'C' WITH (isstrict, iscachable);
+
+CREATE FUNCTION rank_cd(tsvector, tsquery)
+RETURNS float4
+AS 'MODULE_PATHNAME', 'rank_cd_def'
+LANGUAGE 'C' WITH (isstrict, iscachable);
+
+CREATE FUNCTION rank_cd(tsvector, tsquery, int4)
+RETURNS float4
+AS 'MODULE_PATHNAME', 'rank_cd_def'
+LANGUAGE 'C' WITH (isstrict, iscachable);
+
+CREATE FUNCTION headline(oid, text, tsquery, text)
+RETURNS text
+AS 'MODULE_PATHNAME', 'headline'
+LANGUAGE 'C' WITH (isstrict, iscachable);
+
+CREATE FUNCTION headline(oid, text, tsquery)
+RETURNS text
+AS 'MODULE_PATHNAME', 'headline'
+LANGUAGE 'C' WITH (isstrict, iscachable);
+
+CREATE FUNCTION headline(text, text, tsquery, text)
+RETURNS text
+AS 'MODULE_PATHNAME', 'headline_byname'
+LANGUAGE 'C' WITH (isstrict, iscachable);
+
+CREATE FUNCTION headline(text, text, tsquery)
+RETURNS text
+AS 'MODULE_PATHNAME', 'headline_byname'
+LANGUAGE 'C' WITH (isstrict, iscachable);
+
+CREATE FUNCTION headline(text, tsquery, text)
+RETURNS text
+AS 'MODULE_PATHNAME', 'headline_current'
+LANGUAGE 'C' WITH (isstrict, iscachable);
+
+CREATE FUNCTION headline(text, tsquery)
+RETURNS text
+AS 'MODULE_PATHNAME', 'headline_current'
+LANGUAGE 'C' WITH (isstrict, iscachable);
+
+--GiST
+--GiST key type 
+CREATE FUNCTION gtsvector_in(cstring)
+RETURNS gtsvector
+AS 'MODULE_PATHNAME'
+LANGUAGE 'C' with (isstrict);
+
+CREATE FUNCTION gtsvector_out(gtsvector)
+RETURNS cstring
+AS 'MODULE_PATHNAME'
+LANGUAGE 'C' with (isstrict);
+
+CREATE TYPE gtsvector (
+        INTERNALLENGTH = -1,
+        INPUT = gtsvector_in,
+        OUTPUT = gtsvector_out
+);
+
+-- support FUNCTIONs
+CREATE FUNCTION gtsvector_consistent(gtsvector,internal,int4)
+RETURNS bool
+AS 'MODULE_PATHNAME'
+LANGUAGE 'C';
+  
+CREATE FUNCTION gtsvector_compress(internal)
+RETURNS internal
+AS 'MODULE_PATHNAME'
+LANGUAGE 'C';
+
+CREATE FUNCTION gtsvector_decompress(internal)
+RETURNS internal
+AS 'MODULE_PATHNAME'
+LANGUAGE 'C';
+
+CREATE FUNCTION gtsvector_penalty(internal,internal,internal)
+RETURNS internal
+AS 'MODULE_PATHNAME'
+LANGUAGE 'C' with (isstrict);
+
+CREATE FUNCTION gtsvector_picksplit(internal, internal)
+RETURNS internal
+AS 'MODULE_PATHNAME'
+LANGUAGE 'C';
+
+CREATE FUNCTION gtsvector_union(bytea, internal)
+RETURNS _int4
+AS 'MODULE_PATHNAME'
+LANGUAGE 'C';
+
+CREATE FUNCTION gtsvector_same(gtsvector, gtsvector, internal)
+RETURNS internal
+AS 'MODULE_PATHNAME'
+LANGUAGE 'C';
+
+-- CREATE the OPERATOR class
+CREATE OPERATOR CLASS gist_tsvector_ops
+DEFAULT FOR TYPE tsvector USING gist
+AS
+        OPERATOR        1       @@ (tsvector, tsquery)  RECHECK ,
+        FUNCTION        1       gtsvector_consistent (gtsvector, internal, int4),
+        FUNCTION        2       gtsvector_union (bytea, internal),
+        FUNCTION        3       gtsvector_compress (internal),
+        FUNCTION        4       gtsvector_decompress (internal),
+        FUNCTION        5       gtsvector_penalty (internal, internal, internal),
+        FUNCTION        6       gtsvector_picksplit (internal, internal),
+        FUNCTION        7       gtsvector_same (gtsvector, gtsvector, internal),
+        STORAGE         gtsvector;
+
+
+--stat info
+CREATE TYPE statinfo 
+   as (word text, ndoc int4, nentry int4);
+
+--REATE FUNCTION tsstat_in(cstring)
+--RETURNS tsstat
+--AS 'MODULE_PATHNAME'
+--LANGUAGE 'C' with (isstrict);
+--
+--CREATE FUNCTION tsstat_out(tsstat)
+--RETURNS cstring
+--AS 'MODULE_PATHNAME'
+--LANGUAGE 'C' with (isstrict);
+--
+--CREATE TYPE tsstat (
+--        INTERNALLENGTH = -1,
+--        INPUT = tsstat_in,
+--        OUTPUT = tsstat_out,
+--        STORAGE = plain
+--);
+--
+--CREATE FUNCTION ts_accum(tsstat,tsvector)
+--RETURNS tsstat
+--AS 'MODULE_PATHNAME'
+--LANGUAGE 'C' with (isstrict);
+--
+--CREATE FUNCTION ts_accum_finish(tsstat)
+-- returns setof statinfo
+-- as 'MODULE_PATHNAME'
+-- language 'C'
+-- with (isstrict);
+--
+--CREATE AGGREGATE stat (
+-- BASETYPE=tsvector,
+-- SFUNC=ts_accum,
+-- STYPE=tsstat,
+-- FINALFUNC = ts_accum_finish,
+-- initcond = ''
+--); 
+
+CREATE FUNCTION stat(text)
+   returns setof statinfo
+   as 'MODULE_PATHNAME', 'ts_stat'
+   language 'C'
+   with (isstrict);
+
+--reset - just for debuging
+CREATE FUNCTION reset_tsearch()
+        returns void
+        as 'MODULE_PATHNAME'
+        language 'C'
+        with (isstrict);
+
+--get cover (debug for rank_cd)
+CREATE FUNCTION get_covers(tsvector,tsquery)
+        returns text
+        as 'MODULE_PATHNAME'
+        language 'C'
+        with (isstrict);
+
+
+--example of ISpell dictionary
+--update pg_ts_dict set dict_initoption='DictFile="/usr/local/share/ispell/russian.dict" ,AffFile ="/usr/local/share/ispell/russian.aff", StopFile="/usr/local/share/ispell/russian.stop"' where dict_id=4;
+--example of synonym dict
+--update pg_ts_dict set dict_initoption='/usr/local/share/ispell/english.syn' where dict_id=5;
+END;


diff --git a/contrib/tsearch2/tsvector.c b/contrib/tsearch2/tsvector.c

new file mode 100644 (file)

index 0000000..ff0794d


--- /dev/null
+++ b/contrib/tsearch2/tsvector.c
@@ -0,0 +1,804 @@
+/*
+ * In/Out definitions for tsvector type
+ * Internal structure:
+ * string of values, array of position lexem in string and it's length
+ * Teodor Sigaev 
+ */
+#include "postgres.h"
+
+#include "access/gist.h"
+#include "access/itup.h"
+#include "utils/elog.h"
+#include "utils/palloc.h"
+#include "utils/builtins.h"
+#include "storage/bufpage.h"
+#include "executor/spi.h"
+#include "commands/trigger.h"
+#include "nodes/pg_list.h"
+#include "catalog/namespace.h"
+
+#include "utils/pg_locale.h"
+
+#include              /* tolower */
+#include "tsvector.h"
+#include "query.h"
+#include "ts_cfg.h"
+#include "common.h"
+
+PG_FUNCTION_INFO_V1(tsvector_in);
+Datum      tsvector_in(PG_FUNCTION_ARGS);
+
+PG_FUNCTION_INFO_V1(tsvector_out);
+Datum      tsvector_out(PG_FUNCTION_ARGS);
+
+PG_FUNCTION_INFO_V1(to_tsvector);
+Datum      to_tsvector(PG_FUNCTION_ARGS);
+PG_FUNCTION_INFO_V1(to_tsvector_current);
+Datum      to_tsvector_current(PG_FUNCTION_ARGS);
+PG_FUNCTION_INFO_V1(to_tsvector_name);
+Datum      to_tsvector_name(PG_FUNCTION_ARGS);
+
+PG_FUNCTION_INFO_V1(tsearch2);
+Datum      tsearch2(PG_FUNCTION_ARGS);
+
+PG_FUNCTION_INFO_V1(tsvector_length);
+Datum      tsvector_length(PG_FUNCTION_ARGS);
+
+/*
+ * in/out text index type
+ */
+static int 
+comparePos(const void *a, const void *b) {
+   if ( ((WordEntryPos *) a)->pos == ((WordEntryPos *) b)->pos )
+       return 1;
+   return ( ((WordEntryPos *) a)->pos > ((WordEntryPos *) b)->pos ) ? 1 : -1;
+}
+
+static int
+uniquePos(WordEntryPos *a, int4 l) {
+   WordEntryPos *ptr, *res;
+
+   res=a;
+   if (l==1)
+       return l;
+
+   qsort((void *) a, l, sizeof(WordEntryPos), comparePos);
+
+   ptr = a + 1;
+   while (ptr - a < l) {
+       if ( ptr->pos != res->pos ) {
+           res++;
+           res->pos = ptr->pos;
+           res->weight = ptr->weight;
+           if ( res-a >= MAXNUMPOS-1 || res->pos == MAXENTRYPOS-1 )
+               break;
+       } else if ( ptr->weight > res->weight )
+           res->weight = ptr->weight;
+       ptr++;
+   }
+   return res + 1 - a;
+}
+
+static char *BufferStr;
+static int
+compareentry(const void *a, const void *b)
+{
+   if ( ((WordEntryIN *) a)->entry.len == ((WordEntryIN *) b)->entry.len)
+   {
+       return strncmp(
+                      &BufferStr[((WordEntryIN *) a)->entry.pos],
+                      &BufferStr[((WordEntryIN *) b)->entry.pos],
+                      ((WordEntryIN *) a)->entry.len);
+   }
+   return ( ((WordEntryIN *) a)->entry.len > ((WordEntryIN *) b)->entry.len ) ? 1 : -1;
+}
+
+static int
+uniqueentry(WordEntryIN * a, int4 l, char *buf, int4 *outbuflen)
+{
+   WordEntryIN  *ptr,
+              *res;
+
+   res = a;
+   if (l == 1) {
+       if ( a->entry.haspos ) {
+           *(uint16*)(a->pos) = uniquePos( &(a->pos[1]), *(uint16*)(a->pos));
+           *outbuflen = SHORTALIGN(res->entry.len) + (*(uint16*)(a->pos) +1 )*sizeof(WordEntryPos);
+       }
+       return l;
+   }
+
+   ptr = a + 1;
+   BufferStr = buf;
+   qsort((void *) a, l, sizeof(WordEntryIN), compareentry);
+
+   while (ptr - a < l)
+   {
+       if (!(ptr->entry.len == res->entry.len &&
+             strncmp(&buf[ptr->entry.pos], &buf[res->entry.pos], res->entry.len) == 0))
+       {
+           if ( res->entry.haspos ) {
+               *(uint16*)(res->pos) = uniquePos( &(res->pos[1]), *(uint16*)(res->pos));
+               *outbuflen += *(uint16*)(res->pos) * sizeof(WordEntryPos);
+           }
+           *outbuflen += SHORTALIGN(res->entry.len);
+           res++;
+           memcpy(res,ptr,sizeof(WordEntryIN));
+       } else if ( ptr->entry.haspos ){
+           if ( res->entry.haspos ) {
+               int4 len=*(uint16*)(ptr->pos) + 1 + *(uint16*)(res->pos);
+               res->pos=(WordEntryPos*)repalloc( res->pos, len*sizeof(WordEntryPos));
+               memcpy( &(res->pos[ *(uint16*)(res->pos) + 1 ]), 
+                   &(ptr->pos[1]), *(uint16*)(ptr->pos) * sizeof(WordEntryPos));
+               *(uint16*)(res->pos) += *(uint16*)(ptr->pos);
+               pfree( ptr->pos );
+           } else {
+               res->entry.haspos=1;
+               res->pos = ptr->pos;
+           }
+       }
+       ptr++;
+   }
+   if ( res->entry.haspos ) {
+       *(uint16*)(res->pos) = uniquePos( &(res->pos[1]), *(uint16*)(res->pos));
+       *outbuflen += *(uint16*)(res->pos) * sizeof(WordEntryPos);
+   }
+   *outbuflen += SHORTALIGN(res->entry.len);
+
+   return res + 1 - a;
+}
+
+#define WAITWORD   1
+#define WAITENDWORD 2
+#define WAITNEXTCHAR   3
+#define WAITENDCMPLX   4
+#define WAITPOSINFO    5
+#define INPOSINFO  6
+#define WAITPOSDELIM   7
+
+#define RESIZEPRSBUF \
+do { \
+   if ( state->curpos - state->word + 1 >= state->len ) \
+   { \
+       int4 clen = state->curpos - state->word; \
+       state->len *= 2; \
+       state->word = (char*)repalloc( (void*)state->word, state->len ); \
+       state->curpos = state->word + clen; \
+   } \
+} while (0)
+
+int4
+gettoken_tsvector(TI_IN_STATE * state)
+{
+   int4        oldstate = 0;
+
+   state->curpos = state->word;
+   state->state = WAITWORD;
+   state->alen=0;
+
+   while (1)
+   {
+       if (state->state == WAITWORD)
+       {
+           if (*(state->prsbuf) == '\0')
+               return 0;
+           else if (*(state->prsbuf) == '\'')
+               state->state = WAITENDCMPLX;
+           else if (*(state->prsbuf) == '\\')
+           {
+               state->state = WAITNEXTCHAR;
+               oldstate = WAITENDWORD;
+           }
+           else if (state->oprisdelim && ISOPERATOR(*(state->prsbuf)))
+               elog(ERROR, "Syntax error");
+           else if (*(state->prsbuf) != ' ')
+           {
+               *(state->curpos) = *(state->prsbuf);
+               state->curpos++;
+               state->state = WAITENDWORD;
+           }
+       }
+       else if (state->state == WAITNEXTCHAR)
+       {
+           if (*(state->prsbuf) == '\0')
+               elog(ERROR, "There is no escaped character");
+           else
+           {
+               RESIZEPRSBUF;
+               *(state->curpos) = *(state->prsbuf);
+               state->curpos++;
+               state->state = oldstate;
+           }
+       }
+       else if (state->state == WAITENDWORD)
+       {
+           if (*(state->prsbuf) == '\\')
+           {
+               state->state = WAITNEXTCHAR;
+               oldstate = WAITENDWORD;
+           }
+           else if (*(state->prsbuf) == ' ' || *(state->prsbuf) == '\0' ||
+                    (state->oprisdelim && ISOPERATOR(*(state->prsbuf))))
+           {
+               RESIZEPRSBUF;
+               if (state->curpos == state->word)
+                   elog(ERROR, "Syntax error");
+               *(state->curpos) = '\0';
+               return 1; 
+           } else if ( *(state->prsbuf) == ':' ) {
+               if (state->curpos == state->word)
+                   elog(ERROR, "Syntax error");
+               *(state->curpos) = '\0';
+               if ( state->oprisdelim )
+                   return 1;
+               else
+                   state->state = INPOSINFO;
+           }
+           else
+           {
+               RESIZEPRSBUF;
+               *(state->curpos) = *(state->prsbuf);
+               state->curpos++;
+           }
+       }
+       else if (state->state == WAITENDCMPLX)
+       {
+           if (*(state->prsbuf) == '\'')
+           {
+               RESIZEPRSBUF;
+               *(state->curpos) = '\0';
+               if (state->curpos == state->word)
+                   elog(ERROR, "Syntax error");
+               if ( state->oprisdelim ) {
+                   state->prsbuf++;
+                   return 1;
+               } else
+                   state->state = WAITPOSINFO;
+           }
+           else if (*(state->prsbuf) == '\\')
+           {
+               state->state = WAITNEXTCHAR;
+               oldstate = WAITENDCMPLX;
+           }
+           else if (*(state->prsbuf) == '\0')
+               elog(ERROR, "Syntax error");
+           else
+           {
+               RESIZEPRSBUF;
+               *(state->curpos) = *(state->prsbuf);
+               state->curpos++;
+           }
+       } else if (state->state == WAITPOSINFO) {
+           if ( *(state->prsbuf) == ':' )
+               state->state=INPOSINFO;
+           else
+               return 1;
+       } else if (state->state == INPOSINFO) {
+           if ( isdigit(*(state->prsbuf)) ) {
+               if ( state->alen==0 ) {
+                   state->alen=4;
+                   state->pos = (WordEntryPos*)palloc( sizeof(WordEntryPos)*state->alen );
+                   *(uint16*)(state->pos)=0;
+               } else if ( *(uint16*)(state->pos) +1 >= state->alen ) {
+                   state->alen *= 2; 
+                   state->pos = (WordEntryPos*)repalloc( state->pos, sizeof(WordEntryPos)*state->alen );
+               }
+               (  *(uint16*)(state->pos) )++;
+               state->pos[ *(uint16*)(state->pos) ].pos = LIMITPOS(atoi(state->prsbuf));
+               if ( state->pos[ *(uint16*)(state->pos) ].pos == 0 )
+                   elog(ERROR,"Wrong position info");
+               state->pos[ *(uint16*)(state->pos) ].weight = 0;
+               state->state = WAITPOSDELIM;
+           } else
+               elog(ERROR,"Syntax error");
+       } else if (state->state == WAITPOSDELIM) {
+           if ( *(state->prsbuf) == ',' ) {
+               state->state = INPOSINFO;
+           } else if ( tolower(*(state->prsbuf)) == 'a' || *(state->prsbuf)=='*' ) {
+               if ( state->pos[ *(uint16*)(state->pos) ].weight )
+                   elog(ERROR,"Syntax error");
+               state->pos[ *(uint16*)(state->pos) ].weight = 3;
+           } else if ( tolower(*(state->prsbuf)) == 'b' ) {
+               if ( state->pos[ *(uint16*)(state->pos) ].weight )
+                   elog(ERROR,"Syntax error");
+               state->pos[ *(uint16*)(state->pos) ].weight = 2;
+           } else if ( tolower(*(state->prsbuf)) == 'c' ) {
+               if ( state->pos[ *(uint16*)(state->pos) ].weight )
+                   elog(ERROR,"Syntax error");
+               state->pos[ *(uint16*)(state->pos) ].weight = 1;
+           } else if ( tolower(*(state->prsbuf)) == 'd' ) {
+               if ( state->pos[ *(uint16*)(state->pos) ].weight )
+                   elog(ERROR,"Syntax error");
+               state->pos[ *(uint16*)(state->pos) ].weight = 0;
+           } else if ( isspace(*(state->prsbuf)) || *(state->prsbuf) == '\0' ) {
+               return 1;
+           } else if ( !isdigit(*(state->prsbuf)) )
+               elog(ERROR,"Syntax error");
+       } else
+           elog(ERROR, "Inner bug :(");
+       state->prsbuf++;
+   }
+
+   return 0;
+}
+
+Datum
+tsvector_in(PG_FUNCTION_ARGS)
+{
+   char       *buf = PG_GETARG_CSTRING(0);
+   TI_IN_STATE state;
+   WordEntryIN  *arr;
+   WordEntry  *inarr;
+   int4        len = 0,
+               totallen = 64;
+   tsvector       *in;
+   char       *tmpbuf,
+              *cur;
+   int4        i,
+               buflen = 256;
+
+   state.prsbuf = buf;
+   state.len = 32;
+   state.word = (char *) palloc(state.len);
+   state.oprisdelim = false;
+
+   arr = (WordEntryIN *) palloc(sizeof(WordEntryIN) * totallen);
+   cur = tmpbuf = (char *) palloc(buflen);
+   while (gettoken_tsvector(&state))
+   {
+       if (len >= totallen)
+       {
+           totallen *= 2;
+           arr = (WordEntryIN *) repalloc((void *) arr, sizeof(WordEntryIN) * totallen);
+       }
+       while ((cur - tmpbuf) + (state.curpos - state.word) >= buflen)
+       {
+           int4        dist = cur - tmpbuf;
+
+           buflen *= 2;
+           tmpbuf = (char *) repalloc((void *) tmpbuf, buflen);
+           cur = tmpbuf + dist;
+       }
+       if (state.curpos - state.word >= MAXSTRLEN)
+           elog(ERROR, "Word is too long");
+       arr[len].entry.len= state.curpos - state.word;
+       if (cur - tmpbuf > MAXSTRPOS)
+           elog(ERROR, "Too long value");
+       arr[len].entry.pos=cur - tmpbuf;
+       memcpy((void *) cur, (void *) state.word, arr[len].entry.len);
+       cur += arr[len].entry.len;
+       if ( state.alen ) {
+           arr[len].entry.haspos=1;
+           arr[len].pos = state.pos;
+       } else
+           arr[len].entry.haspos=0;
+       len++;
+   }
+   pfree(state.word);
+
+   if ( len > 0 )
+       len = uniqueentry(arr, len, tmpbuf, &buflen);
+   totallen = CALCDATASIZE(len, buflen);
+   in = (tsvector *) palloc(totallen);
+   memset(in,0,totallen);
+   in->len = totallen;
+   in->size = len;
+   cur = STRPTR(in);
+   inarr = ARRPTR(in);
+   for (i = 0; i < len; i++)
+   {
+       memcpy((void *) cur, (void *) &tmpbuf[arr[i].entry.pos], arr[i].entry.len);
+       arr[i].entry.pos=cur - STRPTR(in);
+       cur += SHORTALIGN(arr[i].entry.len);
+       if ( arr[i].entry.haspos ) {
+           memcpy( cur, arr[i].pos, (*(uint16*)arr[i].pos + 1) * sizeof(WordEntryPos));
+           cur +=  (*(uint16*)arr[i].pos + 1) * sizeof(WordEntryPos);
+           pfree( arr[i].pos ); 
+       }
+       memcpy( &(inarr[i]), &(arr[i].entry), sizeof(WordEntry) );
+   }
+   pfree(tmpbuf);
+   pfree(arr);
+   PG_RETURN_POINTER(in);
+}
+
+Datum
+tsvector_length(PG_FUNCTION_ARGS)
+{
+   tsvector       *in = (tsvector *) PG_DETOAST_DATUM(PG_GETARG_DATUM(0));
+   int4        ret = in->size;
+
+   PG_FREE_IF_COPY(in, 0);
+   PG_RETURN_INT32(ret);
+}
+
+Datum
+tsvector_out(PG_FUNCTION_ARGS)
+{
+   tsvector       *out = (tsvector *) PG_DETOAST_DATUM(PG_GETARG_DATUM(0));
+   char       *outbuf;
+   int4        i,
+               j,
+               lenbuf = 0, pp;
+   WordEntry  *ptr = ARRPTR(out);
+   char       *curin,
+              *curout;
+
+       lenbuf=out->size * 2 /* '' */ + out->size - 1 /* space */ + 2 /*\0*/;
+       for (i = 0; i < out->size; i++) {
+               lenbuf += ptr[i].len*2 /*for escape */;
+               if ( ptr[i].haspos )
+                       lenbuf += 7*POSDATALEN(out, &(ptr[i]));
+       }
+
+   curout = outbuf = (char *) palloc(lenbuf);
+   for (i = 0; i < out->size; i++)
+   {
+       curin = STRPTR(out)+ptr->pos;
+       if (i != 0)
+           *curout++ = ' ';
+       *curout++ = '\'';
+       j = ptr->len;
+       while (j--)
+       {
+           if (*curin == '\'')
+           {
+               int4        pos = curout - outbuf;
+
+               outbuf = (char *) repalloc((void *) outbuf, ++lenbuf);
+               curout = outbuf + pos;
+               *curout++ = '\\';
+           }
+           *curout++ = *curin++;
+       }
+       *curout++ = '\'';
+       if ( (pp=POSDATALEN(out,ptr)) != 0 ) {
+           WordEntryPos *wptr;
+           *curout++ = ':';
+           wptr=POSDATAPTR(out,ptr);
+           while(pp) {
+               sprintf(curout,"%d",wptr->pos);
+               curout=strchr(curout,'\0');
+               switch( wptr->weight ) {
+                   case 3:   *curout++ = 'A'; break;
+                   case 2:   *curout++ = 'B'; break;
+                   case 1:   *curout++ = 'C'; break;
+                   case 0: 
+                   default: break;
+               }
+               if ( pp>1 )     *curout++ = ',';
+               pp--; wptr++;
+           }
+       }
+       ptr++;
+   }
+   *curout='\0';
+   outbuf[lenbuf - 1] = '\0';
+   PG_FREE_IF_COPY(out, 0);
+   PG_RETURN_POINTER(outbuf);
+}
+
+static int
+compareWORD(const void *a, const void *b)
+{
+   if (((WORD *) a)->len == ((WORD *) b)->len) {
+       int res = strncmp(
+                      ((WORD *) a)->word,
+                      ((WORD *) b)->word,
+                      ((WORD *) b)->len);
+       if ( res==0 ) 
+           return ( ((WORD *) a)->pos.pos > ((WORD *) b)->pos.pos ) ? 1 : -1;
+       return res;
+   }
+   return (((WORD *) a)->len > ((WORD *) b)->len) ? 1 : -1;
+}
+
+static int
+uniqueWORD(WORD * a, int4 l)
+{
+   WORD       *ptr,
+              *res;
+   int tmppos;
+
+   if (l == 1) {
+       tmppos=LIMITPOS(a->pos.pos);
+       a->alen=2;
+       a->pos.apos=(uint16*)palloc( sizeof(uint16)*a->alen );
+       a->pos.apos[0]=1;
+       a->pos.apos[1]=tmppos;
+       return l;
+   }
+
+   res = a;
+   ptr = a + 1;
+
+   qsort((void *) a, l, sizeof(WORD), compareWORD);
+   tmppos=LIMITPOS(a->pos.pos);
+   a->alen=2;
+   a->pos.apos=(uint16*)palloc( sizeof(uint16)*a->alen );
+   a->pos.apos[0]=1;
+   a->pos.apos[1]=tmppos;
+
+   while (ptr - a < l)
+   {
+       if (!(ptr->len == res->len &&
+             strncmp(ptr->word, res->word, res->len) == 0))
+       {
+           res++;
+           res->len = ptr->len;
+           res->word = ptr->word;
+           tmppos=LIMITPOS(ptr->pos.pos);
+           res->alen=2;
+           res->pos.apos=(uint16*)palloc( sizeof(uint16)*res->alen );
+           res->pos.apos[0]=1;
+           res->pos.apos[1]=tmppos;
+       } else {
+           pfree(ptr->word);
+           if ( res->pos.apos[0] < MAXNUMPOS-1 && res->pos.apos[ res->pos.apos[0] ] != MAXENTRYPOS-1 ) {
+               if ( res->pos.apos[0]+1 >= res->alen ) {
+                   res->alen*=2;
+                   res->pos.apos=(uint16*)repalloc( res->pos.apos, sizeof(uint16)*res->alen );
+               }
+               res->pos.apos[ res->pos.apos[0]+1 ] = LIMITPOS(ptr->pos.pos);
+               res->pos.apos[0]++; 
+           }
+       }
+       ptr++;
+   }
+
+   return res + 1 - a;
+}
+
+/*
+ * make value of tsvector
+ */
+static tsvector *
+makevalue(PRSTEXT * prs)
+{
+   int4        i,j,
+               lenstr = 0,
+               totallen;
+   tsvector       *in;
+   WordEntry  *ptr;
+   char       *str,
+              *cur;
+
+   prs->curwords = uniqueWORD(prs->words, prs->curwords);
+   for (i = 0; i < prs->curwords; i++) {
+       lenstr += SHORTALIGN(prs->words[i].len);
+
+       if ( prs->words[i].alen )
+           lenstr += sizeof(uint16) + prs->words[i].pos.apos[0] * sizeof(WordEntryPos);
+   }
+
+   totallen = CALCDATASIZE(prs->curwords, lenstr);
+   in = (tsvector *) palloc(totallen);
+   memset(in,0,totallen);  
+   in->len = totallen;
+   in->size = prs->curwords;
+
+   ptr = ARRPTR(in);
+   cur = str = STRPTR(in);
+   for (i = 0; i < prs->curwords; i++)
+   {
+       ptr->len = prs->words[i].len;
+       if (cur - str > MAXSTRPOS)
+           elog(ERROR, "Value is too big");
+       ptr->pos= cur - str;
+       memcpy((void *) cur, (void *) prs->words[i].word, prs->words[i].len);
+       pfree(prs->words[i].word);
+       cur += SHORTALIGN(prs->words[i].len);
+       if ( prs->words[i].alen ) {
+           WordEntryPos *wptr;
+           
+           ptr->haspos=1;
+           *(uint16*)cur = prs->words[i].pos.apos[0];
+           wptr=POSDATAPTR(in,ptr);
+           for(j=0;j<*(uint16*)cur;j++) {
+               wptr[j].weight=0;
+               wptr[j].pos=prs->words[i].pos.apos[j+1];
+           }
+           cur += sizeof(uint16) + prs->words[i].pos.apos[0] * sizeof(WordEntryPos);
+           pfree(prs->words[i].pos.apos);
+       } else
+           ptr->haspos=0;
+       ptr++;
+   }
+   pfree(prs->words);
+   return in;
+}
+
+
+Datum
+to_tsvector(PG_FUNCTION_ARGS)
+{
+   text       *in = PG_GETARG_TEXT_P(1);
+   PRSTEXT     prs;
+   tsvector       *out = NULL;
+   TSCfgInfo *cfg=findcfg(PG_GETARG_INT32(0)); 
+
+   prs.lenwords = 32;
+   prs.curwords = 0;
+   prs.pos = 0;
+   prs.words = (WORD *) palloc(sizeof(WORD) * prs.lenwords);
+   
+   parsetext_v2(cfg, &prs, VARDATA(in), VARSIZE(in) - VARHDRSZ);
+   PG_FREE_IF_COPY(in, 1);
+
+   if (prs.curwords)
+       out = makevalue(&prs);
+   else {
+       pfree(prs.words);
+       out = palloc(CALCDATASIZE(0,0));
+       out->len = CALCDATASIZE(0,0);
+       out->size = 0;
+   } 
+   PG_RETURN_POINTER(out);
+}
+
+Datum
+to_tsvector_name(PG_FUNCTION_ARGS) {
+   text       *cfg=PG_GETARG_TEXT_P(0);
+   Datum res = DirectFunctionCall3(
+       to_tsvector,
+       Int32GetDatum( name2id_cfg( cfg ) ),
+       PG_GETARG_DATUM(1),
+       (Datum)0
+   );
+   PG_FREE_IF_COPY(cfg,0);
+   PG_RETURN_DATUM(res);   
+}
+
+Datum
+to_tsvector_current(PG_FUNCTION_ARGS) {
+   Datum res = DirectFunctionCall3(
+       to_tsvector,
+       Int32GetDatum( get_currcfg() ),
+       PG_GETARG_DATUM(0),
+       (Datum)0
+   );
+   PG_RETURN_DATUM(res);   
+}
+
+static Oid
+findFunc(char *fname) {
+   FuncCandidateList clist,ptr;
+   Oid funcid = InvalidOid;
+   List *names=makeList1(makeString(fname));
+
+   ptr = clist = FuncnameGetCandidates(names, 1);
+   freeList(names);
+
+   if ( !ptr )
+       return funcid;
+
+   while(ptr) {
+       if ( ptr->args[0] == TEXTOID && funcid == InvalidOid )
+           funcid=ptr->oid;
+       clist=ptr->next;
+       pfree(ptr);
+       ptr=clist;
+   }
+
+   return funcid;
+}
+
+/*
+ * Trigger
+ */
+Datum
+tsearch2(PG_FUNCTION_ARGS)
+{
+   TriggerData *trigdata;
+   Trigger    *trigger;
+   Relation    rel;
+   HeapTuple   rettuple = NULL;
+   TSCfgInfo *cfg=findcfg(get_currcfg()); 
+   int         numidxattr,
+               i;
+   PRSTEXT     prs;
+   Datum       datum = (Datum) 0;
+   Oid     funcoid = InvalidOid;
+
+   if (!CALLED_AS_TRIGGER(fcinfo))
+       elog(ERROR, "TSearch: Not fired by trigger manager");
+
+   trigdata = (TriggerData *) fcinfo->context;
+   if (TRIGGER_FIRED_FOR_STATEMENT(trigdata->tg_event))
+       elog(ERROR, "TSearch: Can't process STATEMENT events");
+   if (TRIGGER_FIRED_AFTER(trigdata->tg_event))
+       elog(ERROR, "TSearch: Must be fired BEFORE event");
+
+   if (TRIGGER_FIRED_BY_INSERT(trigdata->tg_event))
+       rettuple = trigdata->tg_trigtuple;
+   else if (TRIGGER_FIRED_BY_UPDATE(trigdata->tg_event))
+       rettuple = trigdata->tg_newtuple;
+   else
+       elog(ERROR, "TSearch: Unknown event");
+
+   trigger = trigdata->tg_trigger;
+   rel = trigdata->tg_relation;
+
+   if (trigger->tgnargs < 2)
+       elog(ERROR, "TSearch: format tsearch2(tsvector_field, text_field1,...)");
+
+   numidxattr = SPI_fnumber(rel->rd_att, trigger->tgargs[0]);
+   if (numidxattr == SPI_ERROR_NOATTRIBUTE)
+       elog(ERROR, "TSearch: Can not find tsvector_field");
+
+   prs.lenwords = 32;
+   prs.curwords = 0;
+   prs.pos = 0;
+   prs.words = (WORD *) palloc(sizeof(WORD) * prs.lenwords);
+
+   /* find all words in indexable column */
+   for (i = 1; i < trigger->tgnargs; i++)
+   {
+       int         numattr;
+       Oid         oidtype;
+       Datum       txt_toasted;
+       bool        isnull;
+       text       *txt;
+
+       numattr = SPI_fnumber(rel->rd_att, trigger->tgargs[i]);
+       if (numattr == SPI_ERROR_NOATTRIBUTE)
+       {
+           funcoid=findFunc(trigger->tgargs[i]);
+           if ( funcoid==InvalidOid )
+               elog(ERROR,"TSearch: can't find function or field '%s'",trigger->tgargs[i]);
+           continue;
+       }
+       oidtype = SPI_gettypeid(rel->rd_att, numattr);
+       /* We assume char() and varchar() are binary-equivalent to text */
+       if (!(oidtype == TEXTOID ||
+             oidtype == VARCHAROID ||
+             oidtype == BPCHAROID))
+       {
+           elog(WARNING, "TSearch: '%s' is not of character type",
+                trigger->tgargs[i]);
+           continue;
+       }
+       txt_toasted = SPI_getbinval(rettuple, rel->rd_att, numattr, &isnull);
+       if (isnull)
+           continue;
+
+       if ( funcoid!=InvalidOid ) {
+           text *txttmp = (text *) DatumGetPointer( OidFunctionCall1(
+               funcoid,
+               PointerGetDatum(txt_toasted)
+           ));
+           txt = (text *) DatumGetPointer(PG_DETOAST_DATUM(PointerGetDatum(txttmp)));
+           if ( txt == txttmp )
+               txt_toasted = PointerGetDatum(txt);
+       } else
+            txt = (text *) DatumGetPointer(PG_DETOAST_DATUM(PointerGetDatum(txt_toasted)));
+
+       parsetext_v2(cfg, &prs, VARDATA(txt), VARSIZE(txt) - VARHDRSZ);
+       if (txt != (text*)DatumGetPointer(txt_toasted) )
+           pfree(txt);
+   }
+
+   /* make tsvector value */
+   if (prs.curwords)
+   {
+       datum = PointerGetDatum(makevalue(&prs));
+       rettuple = SPI_modifytuple(rel, rettuple, 1, &numidxattr,
+                                  &datum, NULL);
+       pfree(DatumGetPointer(datum));
+   }
+   else
+   {
+       tsvector *out = palloc(CALCDATASIZE(0,0));
+       out->len = CALCDATASIZE(0,0);
+       out->size = 0;
+       datum = PointerGetDatum(out);
+       pfree(prs.words);
+       rettuple = SPI_modifytuple(rel, rettuple, 1, &numidxattr,
+                                  &datum, NULL);
+   }
+
+   if (rettuple == NULL)
+       elog(ERROR, "TSearch: %d returned by SPI_modifytuple", SPI_result);
+
+   return PointerGetDatum(rettuple);
+}


diff --git a/contrib/tsearch2/tsvector.h b/contrib/tsearch2/tsvector.h

new file mode 100644 (file)

index 0000000..31e6a4b


--- /dev/null
+++ b/contrib/tsearch2/tsvector.h
@@ -0,0 +1,71 @@
+#ifndef __TXTIDX_H__
+#define __TXTIDX_H__
+
+/*
+#define TXTIDX_DEBUG
+*/
+
+#include "postgres.h"
+
+#include "access/gist.h"
+#include "access/itup.h"
+#include "utils/elog.h"
+#include "utils/palloc.h"
+#include "utils/builtins.h"
+#include "storage/bufpage.h"
+
+typedef struct {
+   uint32
+       haspos:1,
+       len:11, /* MAX 2Kb */
+       pos:20; /* MAX 1Mb */
+}  WordEntry;
+#define MAXSTRLEN ( 1<<11 )
+#define MAXSTRPOS ( 1<<20 )
+
+typedef struct {
+   uint16
+       weight:2,
+       pos:14;
+} WordEntryPos;
+#define MAXENTRYPOS    (1<<14)
+#define MAXNUMPOS  256
+#define LIMITPOS(x)    ( ( (x) >= MAXENTRYPOS ) ? (MAXENTRYPOS-1) : (x) )
+
+typedef struct
+{
+   int4        len;
+   int4        size;
+   char        data[1];
+}  tsvector;
+
+#define DATAHDRSIZE (sizeof(int4)*2)
+#define CALCDATASIZE(x, lenstr) ( x * sizeof(WordEntry) + DATAHDRSIZE + lenstr )
+#define ARRPTR(x)  ( (WordEntry*) ( (char*)x + DATAHDRSIZE ) )
+#define STRPTR(x)  ( (char*)x + DATAHDRSIZE + ( sizeof(WordEntry) * ((tsvector*)x)->size ) )
+#define STRSIZE(x) ( ((tsvector*)x)->len - DATAHDRSIZE - ( sizeof(WordEntry) * ((tsvector*)x)->size ) )
+#define _POSDATAPTR(x,e)   (STRPTR(x)+((WordEntry*)(e))->pos+SHORTALIGN(((WordEntry*)(e))->len))
+#define POSDATALEN(x,e) ( ( ((WordEntry*)(e))->haspos ) ? (*(uint16*)_POSDATAPTR(x,e)) : 0 ) 
+#define POSDATAPTR(x,e)    ( (WordEntryPos*)( _POSDATAPTR(x,e)+sizeof(uint16) ) )
+
+
+typedef struct {
+   WordEntry   entry;
+   WordEntryPos    *pos;
+}  WordEntryIN;
+
+typedef struct
+{
+   char       *prsbuf;
+   char       *word;
+   char       *curpos;
+   int4        len;
+   int4        state;
+   int4        alen;
+   WordEntryPos    *pos;
+   bool        oprisdelim;
+}  TI_IN_STATE;
+
+int4       gettoken_tsvector(TI_IN_STATE * state);
+
+#endif


diff --git a/contrib/tsearch2/tsvector_op.c b/contrib/tsearch2/tsvector_op.c

new file mode 100644 (file)

index 0000000..3f38014


--- /dev/null
+++ b/contrib/tsearch2/tsvector_op.c
@@ -0,0 +1,264 @@
+/*
+ * Operations for tsvector type
+ * Teodor Sigaev 
+ */
+#include "postgres.h"
+
+#include "access/gist.h"
+#include "access/itup.h"
+#include "utils/elog.h"
+#include "utils/palloc.h"
+#include "utils/builtins.h"
+#include "storage/bufpage.h"
+#include "executor/spi.h"
+#include "commands/trigger.h"
+#include "nodes/pg_list.h"
+#include "catalog/namespace.h"
+
+#include "utils/pg_locale.h"
+
+#include              /* tolower */
+#include "tsvector.h"
+#include "query.h"
+#include "ts_cfg.h"
+#include "common.h"
+
+PG_FUNCTION_INFO_V1(strip);
+Datum      strip(PG_FUNCTION_ARGS);
+
+PG_FUNCTION_INFO_V1(setweight);
+Datum      setweight(PG_FUNCTION_ARGS);
+
+PG_FUNCTION_INFO_V1(concat);
+Datum      concat(PG_FUNCTION_ARGS);
+
+Datum
+strip(PG_FUNCTION_ARGS)
+{
+   tsvector       *in = (tsvector *) PG_DETOAST_DATUM(PG_GETARG_DATUM(0));
+   tsvector    *out;
+   int i,len=0;
+   WordEntry *arrin=ARRPTR(in), *arrout;
+   char *cur;
+
+   for(i=0;isize;i++) 
+       len += SHORTALIGN( arrin[i].len );
+
+   len = CALCDATASIZE(in->size, len);
+   out=(tsvector*)palloc(len);
+   memset(out,0,len);
+   out->len=len;
+   out->size=in->size;
+   arrout=ARRPTR(out);
+   cur=STRPTR(out);
+   for(i=0;isize;i++) {
+       memcpy(cur, STRPTR(in)+arrin[i].pos, arrin[i].len);
+       arrout[i].haspos = 0;
+       arrout[i].len = arrin[i].len;
+       arrout[i].pos = cur - STRPTR(out);
+       cur += SHORTALIGN( arrout[i].len );
+   }
+       
+   PG_FREE_IF_COPY(in, 0);
+   PG_RETURN_POINTER(out);
+}
+
+Datum
+setweight(PG_FUNCTION_ARGS)
+{
+   tsvector       *in = (tsvector *) PG_DETOAST_DATUM(PG_GETARG_DATUM(0));
+   char       cw = PG_GETARG_CHAR(1);
+   tsvector    *out;
+   int i,j;
+   WordEntry *entry;
+   WordEntryPos *p;
+   int w=0;
+
+   switch(tolower(cw)) {
+       case 'a': w=3; break;
+       case 'b': w=2; break;
+       case 'c': w=1; break;
+       case 'd': w=0; break;
+       default: elog(ERROR,"Unknown weight");
+   }
+
+   out=(tsvector*)palloc(in->len);
+   memcpy(out,in,in->len);
+   entry=ARRPTR(out);
+   i=out->size;    
+   while(i--) {
+       if ( (j=POSDATALEN(out,entry)) != 0 ) {
+           p=POSDATAPTR(out,entry);
+           while(j--) {
+               p->weight=w;
+               p++;
+           }
+       }
+       entry++;
+   }
+       
+   PG_FREE_IF_COPY(in, 0);
+   PG_RETURN_POINTER(out);
+}
+
+static int
+compareEntry(char *ptra, WordEntry* a, char *ptrb, WordEntry* b)
+{
+        if ( a->len == b->len)
+        {
+                return strncmp(
+                                           ptra + a->pos,
+                                           ptrb + b->pos,
+                                           a->len);
+        }
+        return ( a->len > b->len ) ? 1 : -1;
+}
+
+static int4
+add_pos(tsvector *src, WordEntry *srcptr, tsvector *dest, WordEntry *destptr, int4 maxpos ) {
+   uint16 *clen = (uint16*)_POSDATAPTR(dest,destptr);
+   int i;
+   uint16 slen = POSDATALEN(src, srcptr), startlen;
+   WordEntryPos *spos=POSDATAPTR(src, srcptr), *dpos=POSDATAPTR(dest,destptr);
+
+   if ( ! destptr->haspos ) 
+       *clen=0;
+
+   startlen = *clen;
+   for(i=0; i
+       dpos[ *clen ].weight = spos[i].weight; 
+       dpos[ *clen ].pos    = LIMITPOS(spos[i].pos + maxpos);
+       (*clen)++;
+   }
+
+   if ( *clen != startlen )
+       destptr->haspos=1; 
+   return  *clen - startlen;
+}
+
+
+Datum
+concat(PG_FUNCTION_ARGS) {
+   tsvector       *in1 = (tsvector *) PG_DETOAST_DATUM(PG_GETARG_DATUM(0));
+   tsvector       *in2 = (tsvector *) PG_DETOAST_DATUM(PG_GETARG_DATUM(1));
+   tsvector       *out;
+   WordEntry *ptr;
+   WordEntry *ptr1,*ptr2;
+   WordEntryPos *p;
+   int maxpos=0,i,j,i1,i2;
+   char *cur;
+   char *data,*data1,*data2;
+
+   ptr=ARRPTR(in1);
+   i=in1->size;
+   while(i--) {
+       if ( (j=POSDATALEN(in1,ptr)) != 0 ) {
+           p=POSDATAPTR(in1,ptr);
+           while(j--) {
+               if ( p->pos > maxpos ) 
+                   maxpos = p->pos;
+               p++;
+           }
+       }
+       ptr++;
+   }
+   
+   ptr1=ARRPTR(in1); ptr2=ARRPTR(in2);
+   data1=STRPTR(in1); data2=STRPTR(in2);
+   i1=in1->size;   i2=in2->size;
+   out=(tsvector*)palloc( in1->len + in2->len );
+   memset(out,0,in1->len + in2->len);
+   out->len = in1->len + in2->len;
+   out->size = in1->size + in2->size;
+   data=cur=STRPTR(out);
+   ptr=ARRPTR(out);
+   while( i1 && i2 ) {
+       int cmp=compareEntry(data1,ptr1,data2,ptr2);
+       if ( cmp < 0 ) { /* in1 first */
+           ptr->haspos = ptr1->haspos;
+           ptr->len = ptr1->len;
+           memcpy( cur, data1 + ptr1->pos, ptr1->len );
+           ptr->pos = cur - data; 
+           cur+=SHORTALIGN(ptr1->len);
+           if ( ptr->haspos ) {
+               memcpy(cur, _POSDATAPTR(in1, ptr1), POSDATALEN(in1, ptr1)*sizeof(WordEntryPos) + sizeof(uint16));
+               cur+=POSDATALEN(in1, ptr1)*sizeof(WordEntryPos) + sizeof(uint16);
+           }
+           ptr++; ptr1++; i1--;
+       } else if ( cmp>0 ) { /* in2 first */ 
+           ptr->haspos = ptr2->haspos;
+           ptr->len = ptr2->len;
+           memcpy( cur, data2 + ptr2->pos, ptr2->len );
+           ptr->pos = cur - data; 
+           cur+=SHORTALIGN(ptr2->len);
+           if ( ptr->haspos ) {
+               int addlen = add_pos(in2, ptr2, out, ptr, maxpos );
+               if ( addlen == 0 )
+                   ptr->haspos=0;
+               else
+                   cur += addlen*sizeof(WordEntryPos) + sizeof(uint16); 
+           }
+           ptr++; ptr2++; i2--;
+       } else {
+           ptr->haspos = ptr1->haspos | ptr2->haspos;
+           ptr->len = ptr1->len;
+           memcpy( cur, data1 + ptr1->pos, ptr1->len );
+           ptr->pos = cur - data; 
+           cur+=SHORTALIGN(ptr1->len);
+           if ( ptr->haspos ) {
+               if ( ptr1->haspos ) {
+                   memcpy(cur, _POSDATAPTR(in1, ptr1), POSDATALEN(in1, ptr1)*sizeof(WordEntryPos) + sizeof(uint16));
+                   cur+=POSDATALEN(in1, ptr1)*sizeof(WordEntryPos) + sizeof(uint16);
+                   if ( ptr2->haspos )
+                       cur += add_pos(in2, ptr2, out, ptr, maxpos )*sizeof(WordEntryPos);
+               } else if ( ptr2->haspos ) {
+                   int addlen = add_pos(in2, ptr2, out, ptr, maxpos );
+                   if ( addlen == 0 )
+                       ptr->haspos=0;
+                   else
+                       cur += addlen*sizeof(WordEntryPos) + sizeof(uint16); 
+               }
+           }
+           ptr++; ptr1++; ptr2++; i1--; i2--;
+       }
+   }
+
+   while(i1) {
+       ptr->haspos = ptr1->haspos;
+       ptr->len = ptr1->len;
+       memcpy( cur, data1 + ptr1->pos, ptr1->len );
+       ptr->pos = cur - data; 
+       cur+=SHORTALIGN(ptr1->len);
+       if ( ptr->haspos ) {
+           memcpy(cur, _POSDATAPTR(in1, ptr1), POSDATALEN(in1, ptr1)*sizeof(WordEntryPos) + sizeof(uint16));
+           cur+=POSDATALEN(in1, ptr1)*sizeof(WordEntryPos) + sizeof(uint16);
+       }
+       ptr++; ptr1++; i1--;
+   }
+
+   while(i2) {
+       ptr->haspos = ptr2->haspos;
+       ptr->len = ptr2->len;
+       memcpy( cur, data2 + ptr2->pos, ptr2->len );
+       ptr->pos = cur - data; 
+       cur+=SHORTALIGN(ptr2->len);
+       if ( ptr->haspos ) {
+           int addlen = add_pos(in2, ptr2, out, ptr, maxpos );
+           if ( addlen == 0 )
+               ptr->haspos=0;
+           else
+               cur += addlen*sizeof(WordEntryPos) + sizeof(uint16); 
+       }
+       ptr++; ptr2++; i2--;
+   }
+   
+   out->size=ptr-ARRPTR(out);
+   out->len = CALCDATASIZE( out->size, cur-data );
+   if ( data != STRPTR(out) )
+       memmove( STRPTR(out), data, cur-data );
+
+   PG_FREE_IF_COPY(in1, 0);
+   PG_FREE_IF_COPY(in2, 1);
+   PG_RETURN_POINTER(out);
+}
+


diff --git a/contrib/tsearch2/untsearch.sql.in b/contrib/tsearch2/untsearch.sql.in

new file mode 100644 (file)

index 0000000..a4fe145


--- /dev/null
+++ b/contrib/tsearch2/untsearch.sql.in
@@ -0,0 +1,62 @@
+BEGIN;
+
+--Be careful !!!
+--script drops all indices, triggers and columns with types defined
+--in tsearch2.sql
+
+
+DROP OPERATOR CLASS gist_tsvector_ops USING gist CASCADE;
+
+
+DROP OPERATOR || (tsvector, tsvector);
+DROP OPERATOR @@ (tsvector, tsquery);
+DROP OPERATOR @@ (tsquery, tsvector);
+
+DROP AGGREGATE stat(tsvector);
+
+DROP TABLE pg_ts_dict;
+DROP TABLE pg_ts_parser;
+DROP TABLE pg_ts_cfg;
+DROP TABLE pg_ts_cfgmap;
+
+DROP TYPE tokentype CASCADE;
+DROP TYPE tokenout CASCADE;
+DROP TYPE tsvector CASCADE;
+DROP TYPE tsquery CASCADE;
+DROP TYPE gtsvector CASCADE;
+DROP TYPE tsstat CASCADE;
+DROP TYPE statinfo CASCADE;
+
+DROP FUNCTION lexize(oid, text) ;
+DROP FUNCTION lexize(text, text);
+DROP FUNCTION lexize(text);
+DROP FUNCTION set_curdict(int);
+DROP FUNCTION set_curdict(text);
+DROP FUNCTION dex_init(text);
+DROP FUNCTION dex_lexize(internal,internal,int4);
+DROP FUNCTION snb_en_init(text);
+DROP FUNCTION snb_lexize(internal,internal,int4);
+DROP FUNCTION snb_ru_init(text);
+DROP FUNCTION spell_init(text);
+DROP FUNCTION spell_lexize(internal,internal,int4);
+DROP FUNCTION syn_init(text);
+DROP FUNCTION syn_lexize(internal,internal,int4);
+DROP FUNCTION set_curprs(int);
+DROP FUNCTION set_curprs(text);
+DROP FUNCTION prsd_start(internal,int4);
+DROP FUNCTION prsd_getlexeme(internal,internal,internal);
+DROP FUNCTION prsd_end(internal);
+DROP FUNCTION prsd_lextype(internal);
+DROP FUNCTION prsd_headline(internal,internal,internal);
+DROP FUNCTION set_curcfg(int);
+DROP FUNCTION set_curcfg(text);
+DROP FUNCTION show_curcfg();
+DROP FUNCTION gtsvector_compress(internal);
+DROP FUNCTION gtsvector_decompress(internal);
+DROP FUNCTION gtsvector_penalty(internal,internal,internal);
+DROP FUNCTION gtsvector_picksplit(internal, internal);
+DROP FUNCTION gtsvector_union(bytea, internal);
+DROP FUNCTION reset_tsearch();
+DROP FUNCTION tsearch2() CASCADE;
+
+END;


diff --git a/contrib/tsearch2/wordparser/deflex.c b/contrib/tsearch2/wordparser/deflex.c

new file mode 100644 (file)

index 0000000..ea596c5


--- /dev/null
+++ b/contrib/tsearch2/wordparser/deflex.c
@@ -0,0 +1,56 @@
+#include "deflex.h"
+
+const char *lex_descr[]={
+   "",
+   "Latin word",
+   "Non-latin word",
+   "Word",
+   "Email",
+   "URL",
+   "Host",
+   "Scientific notation",
+   "VERSION",
+   "Part of hyphenated word",
+   "Non-latin part of hyphenated word",
+   "Latin part of hyphenated word",
+   "Space symbols",
+   "HTML Tag",
+   "HTTP head",
+   "Hyphenated word",
+   "Latin hyphenated word",
+   "Non-latin hyphenated word",
+   "URI",
+   "File or path name",
+   "Decimal notation",
+   "Signed integer",
+   "Unsigned integer",
+   "HTML Entity"
+};
+
+const char *tok_alias[]={
+   "",
+   "lword",
+   "nlword",
+   "word",
+   "email",
+   "url",
+   "host",
+   "sfloat",
+   "version",
+   "part_hword",
+   "nlpart_hword",
+   "lpart_hword",
+   "blank",
+   "tag",
+   "http",
+   "hword",
+   "lhword",
+   "nlhword",
+   "uri",
+   "file",
+   "float",
+   "int",
+   "uint",
+   "entity"
+};
+


diff --git a/contrib/tsearch2/wordparser/deflex.h b/contrib/tsearch2/wordparser/deflex.h

new file mode 100644 (file)

index 0000000..651d1f9


--- /dev/null
+++ b/contrib/tsearch2/wordparser/deflex.h
@@ -0,0 +1,34 @@
+#ifndef __DEFLEX_H__
+#define __DEFLEX_H__
+
+/* rememder !!!! */
+#define LASTNUM        23
+
+#define LATWORD        1
+#define CYRWORD        2
+#define UWORD      3
+#define EMAIL      4
+#define FURL       5
+#define HOST       6
+#define SCIENTIFIC 7
+#define VERSIONNUMBER  8
+#define PARTHYPHENWORD 9
+#define CYRPARTHYPHENWORD  10
+#define LATPARTHYPHENWORD  11
+#define SPACE      12
+#define TAG            13
+#define HTTP       14
+#define HYPHENWORD 15
+#define LATHYPHENWORD  16
+#define CYRHYPHENWORD  17
+#define URI        18
+#define FILEPATH   19
+#define DECIMAL        20
+#define SIGNEDINT  21
+#define UNSIGNEDINT 22
+#define HTMLENTITY 23
+
+extern const char *lex_descr[];
+extern const char *tok_alias[];
+
+#endif


diff --git a/contrib/tsearch2/wordparser/parser.h b/contrib/tsearch2/wordparser/parser.h

new file mode 100644 (file)

index 0000000..55cf005


--- /dev/null
+++ b/contrib/tsearch2/wordparser/parser.h
@@ -0,0 +1,11 @@
+#ifndef __PARSER_H__
+#define __PARSER_H__
+
+char      *token;
+int            tokenlen;
+int            tsearch2_yylex(void);
+void       start_parse_str(char *, int);
+void       start_parse_fh(FILE *, int);
+void       end_parse(void);
+
+#endif


diff --git a/contrib/tsearch2/wordparser/parser.l b/contrib/tsearch2/wordparser/parser.l

new file mode 100644 (file)

index 0000000..49824f5


--- /dev/null
+++ b/contrib/tsearch2/wordparser/parser.l
@@ -0,0 +1,346 @@
+%{
+#include "postgres.h"
+
+#include "deflex.h"
+#include "parser.h"
+#include "common.h"
+
+/* Avoid exit() on fatal scanner errors */
+#define fprintf(file, fmt, msg)  ts_error(ERROR, fmt, msg)
+
+/* postgres allocation function */
+#define free    pfree
+#define malloc  palloc
+#define realloc repalloc
+
+#ifdef strdup
+#undef strdup
+#endif
+#define strdup  pstrdup
+
+char *token = NULL;  /* pointer to token */
+char *s     = NULL;  /* to return WHOLE hyphenated-word */
+
+YY_BUFFER_STATE buf = NULL; /* buffer to parse; it need for parse from string */
+
+int lrlimit = -1;  /* for limiting read from filehandle ( -1 - unlimited read ) */
+int bytestoread = 0;   /* for limiting read from filehandle */
+
+/* redefine macro for read limited length */
+#define YY_INPUT(buf,result,max_size) \
+   if ( yy_current_buffer->yy_is_interactive ) { \
+                int c = '*', n; \
+                for ( n = 0; n < max_size && \
+                             (c = getc( tsearch2_yyin )) != EOF && c != '\n'; ++n ) \
+                        buf[n] = (char) c; \
+                if ( c == '\n' ) \
+                        buf[n++] = (char) c; \
+                if ( c == EOF && ferror( tsearch2_yyin ) ) \
+                        YY_FATAL_ERROR( "input in flex scanner failed" ); \
+                result = n; \
+        }  else { \
+       if ( lrlimit == 0 ) \
+           result=YY_NULL; \
+       else { \
+           if ( lrlimit>0 ) { \
+               bytestoread = ( lrlimit > max_size ) ? max_size : lrlimit; \
+               lrlimit -= bytestoread; \
+           } else \
+               bytestoread = max_size; \
+               if ( ((result = fread( buf, 1, bytestoread, tsearch2_yyin )) == 0) \
+                       && ferror( tsearch2_yyin ) ) \
+                       YY_FATAL_ERROR( "input in flex scanner failed" ); \
+       } \
+   }
+
+%}
+
+%option 8bit
+%option never-interactive
+%option nounput
+%option noyywrap
+
+/* parser's state for parsing hyphenated-word */
+%x DELIM  
+/* parser's state for parsing URL*/
+%x URL  
+%x SERVER  
+
+/* parser's state for parsing TAGS */
+%x INTAG
+%x QINTAG
+%x INCOMMENT
+%x INSCRIPT
+
+/* cyrillic koi8 char */
+CYRALNUM   [0-9\200-\377]
+CYRALPHA   [\200-\377]
+ALPHA      [a-zA-Z\200-\377]
+ALNUM      [0-9a-zA-Z\200-\377]
+
+
+HOSTNAME   ([-_[:alnum:]]+\.)+[[:alpha:]]+
+URI        [-_[:alnum:]/%,\.;=&?#]+
+
+%%
+
+"<"[Ss][Cc][Rr][Ii][Pp][Tt] { BEGIN INSCRIPT; }
+
+"" {
+   BEGIN INITIAL; 
+   *tsearch2_yytext=' '; *(tsearch2_yytext+1) = '\0'; 
+   token = tsearch2_yytext;
+   tokenlen = tsearch2_yyleng;
+   return SPACE;
+}
+
+""   { 
+   BEGIN INITIAL;
+   *tsearch2_yytext=' '; *(tsearch2_yytext+1) = '\0'; 
+   token = tsearch2_yytext;
+   tokenlen = tsearch2_yyleng;
+   return SPACE;
+}
+
+
+"<"[\![:alpha:]]   { BEGIN INTAG; }
+
+"
+
+"\""    { BEGIN QINTAG; }
+
+"\\\"" ;
+
+"\""   { BEGIN INTAG; }
+
+">" { 
+   BEGIN INITIAL;
+   token = tsearch2_yytext;
+   *tsearch2_yytext=' '; 
+   token = tsearch2_yytext;
+   tokenlen = 1;
+   return TAG;
+}
+
+.|\n  ;
+
+\&(quot|amp|nbsp|lt|gt)\;   {
+   token = tsearch2_yytext;
+   tokenlen = tsearch2_yyleng;
+   return HTMLENTITY;
+}
+
+\&\#[0-9][0-9]?[0-9]?\; {
+   token = tsearch2_yytext;
+   tokenlen = tsearch2_yyleng;
+   return HTMLENTITY;
+}
+ 
+[-_\.[:alnum:]]+@{HOSTNAME}  /* Emails */ { 
+   token = tsearch2_yytext; 
+   tokenlen = tsearch2_yyleng;
+   return EMAIL; 
+}
+
+[+-]?[0-9]+(\.[0-9]+)?[eEdD][+-]?[0-9]+  /* float */   { 
+   token = tsearch2_yytext; 
+   tokenlen = tsearch2_yyleng;
+   return SCIENTIFIC; 
+}
+
+[0-9]+\.[0-9]+\.[0-9\.]*[0-9] {
+   token = tsearch2_yytext;
+   tokenlen = tsearch2_yyleng;
+   return VERSIONNUMBER;
+}
+
+[+-]?[0-9]+\.[0-9]+ {
+   token = tsearch2_yytext;
+   tokenlen = tsearch2_yyleng;
+   return DECIMAL;
+}
+
+[+-][0-9]+ { 
+   token = tsearch2_yytext; 
+   tokenlen = tsearch2_yyleng;
+   return SIGNEDINT; 
+}
+
+[0-9]+ { 
+   token = tsearch2_yytext; 
+   tokenlen = tsearch2_yyleng;
+   return UNSIGNEDINT; 
+}
+
+http"://"        { 
+   BEGIN URL; 
+   token = tsearch2_yytext;
+   tokenlen = tsearch2_yyleng;
+   return HTTP;
+}
+
+ftp"://"        { 
+   BEGIN URL; 
+   token = tsearch2_yytext;
+   tokenlen = tsearch2_yyleng;
+   return HTTP;
+}
+
+{HOSTNAME}[/:]{URI} { 
+   BEGIN SERVER;
+   if (s) { free(s); s=NULL; } 
+   s = strdup( tsearch2_yytext ); 
+   tokenlen = tsearch2_yyleng;
+   yyless( 0 ); 
+   token = s;
+   return FURL;
+}
+
+{HOSTNAME} {
+   token = tsearch2_yytext; 
+   tokenlen = tsearch2_yyleng;
+   return HOST;
+}
+
+[/:]{URI}  {
+   token = tsearch2_yytext;
+   tokenlen = tsearch2_yyleng;
+   return URI;
+}
+
+[[:alnum:]\./_-]+"/"[[:alnum:]\./_-]+ {
+   token = tsearch2_yytext;
+   tokenlen = tsearch2_yyleng;
+   return FILEPATH;
+}
+
+({CYRALPHA}+-)+{CYRALPHA}+ /* composite-word */    {
+   BEGIN DELIM;
+   if (s) { free(s); s=NULL; } 
+   s = strdup( tsearch2_yytext );
+   tokenlen = tsearch2_yyleng;
+   yyless( 0 );
+   token = s;
+   return CYRHYPHENWORD;
+}
+
+([[:alpha:]]+-)+[[:alpha:]]+ /* composite-word */  {
+    BEGIN DELIM;
+   if (s) { free(s); s=NULL; } 
+   s = strdup( tsearch2_yytext );
+   tokenlen = tsearch2_yyleng;
+   yyless( 0 );
+   token = s;
+   return LATHYPHENWORD;
+}
+
+({ALNUM}+-)+{ALNUM}+ /* composite-word */  {
+   BEGIN DELIM;
+   if (s) { free(s); s=NULL; } 
+   s = strdup( tsearch2_yytext );
+   tokenlen = tsearch2_yyleng;
+   yyless( 0 );
+   token = s;
+   return HYPHENWORD;
+}
+
+[0-9]+\.[0-9]+\.[0-9\.]*[0-9] {
+   token = tsearch2_yytext;
+   tokenlen = tsearch2_yyleng;
+   return VERSIONNUMBER;
+}
+
+\+?[0-9]+\.[0-9]+ {
+   token = tsearch2_yytext;
+   tokenlen = tsearch2_yyleng;
+   return DECIMAL;
+}
+
+{CYRALPHA}+  /* one word in composite-word */   { 
+   token = tsearch2_yytext; 
+   tokenlen = tsearch2_yyleng;
+   return CYRPARTHYPHENWORD; 
+}
+
+[[:alpha:]]+  /* one word in composite-word */  { 
+   token = tsearch2_yytext; 
+   tokenlen = tsearch2_yyleng;
+   return LATPARTHYPHENWORD; 
+}
+
+{ALNUM}+  /* one word in composite-word */  { 
+   token = tsearch2_yytext; 
+   tokenlen = tsearch2_yyleng;
+   return PARTHYPHENWORD; 
+}
+
+-  { 
+   token = tsearch2_yytext;
+   tokenlen = tsearch2_yyleng;
+   return SPACE;
+}
+
+.|\n /* return in basic state */ {
+   BEGIN INITIAL;
+   yyless( 0 );
+}
+
+{CYRALPHA}+ /* normal word */  { 
+   token = tsearch2_yytext; 
+   tokenlen = tsearch2_yyleng;
+   return CYRWORD; 
+}
+
+[[:alpha:]]+ /* normal word */ { 
+   token = tsearch2_yytext; 
+   tokenlen = tsearch2_yyleng;
+   return LATWORD; 
+}
+
+{ALNUM}+ /* normal word */ { 
+   token = tsearch2_yytext; 
+   tokenlen = tsearch2_yyleng;
+   return UWORD; 
+}
+
+[ \r\n\t]+ {
+   token = tsearch2_yytext;
+   tokenlen = tsearch2_yyleng;
+   return SPACE;
+}
+
+. {
+   token = tsearch2_yytext;
+   tokenlen = tsearch2_yyleng;
+   return SPACE;
+} 
+
+%%
+
+/* clearing after parsing from string */
+void end_parse() {
+   if (s) { free(s); s=NULL; } 
+   tsearch2_yy_delete_buffer( buf );
+   buf = NULL;
+} 
+
+/* start parse from string */
+void start_parse_str(char* str, int limit) {
+   if (buf) end_parse();
+   buf = tsearch2_yy_scan_bytes( str, limit );
+   tsearch2_yy_switch_to_buffer( buf );
+   BEGIN INITIAL;
+}
+
+/* start parse from filehandle */
+void start_parse_fh( FILE* fh, int limit ) {
+   if (buf) end_parse();
+   lrlimit = ( limit ) ? limit : -1;
+   buf = tsearch2_yy_create_buffer( fh, YY_BUF_SIZE );
+   tsearch2_yy_switch_to_buffer( buf );
+   BEGIN INITIAL;
+}
+
+


diff --git a/contrib/tsearch2/wparser.c b/contrib/tsearch2/wparser.c

new file mode 100644 (file)

index 0000000..deff94c


--- /dev/null
+++ b/contrib/tsearch2/wparser.c
@@ -0,0 +1,529 @@
+/* 
+ * interface functions to parser 
+ * Teodor Sigaev 
+ */
+#include 
+#include 
+#include 
+#include 
+
+#include "postgres.h"
+#include "fmgr.h"
+#include "utils/array.h"
+#include "catalog/pg_type.h"
+#include "executor/spi.h"
+#include "funcapi.h"
+
+#include "wparser.h"
+#include "ts_cfg.h"
+#include "snmap.h"
+#include "common.h"
+
+/*********top interface**********/
+
+static void *plan_getparser=NULL;
+static Oid current_parser_id=InvalidOid;
+
+void
+init_prs(Oid id, WParserInfo *prs) {
+   Oid arg[1]={ OIDOID };
+   bool isnull;
+   Datum pars[1]={ ObjectIdGetDatum(id) };
+   int stat;
+
+   memset(prs,0,sizeof(WParserInfo));
+   SPI_connect();
+   if ( !plan_getparser ) {
+       plan_getparser = SPI_saveplan( SPI_prepare( "select prs_start, prs_nexttoken, prs_end, prs_lextype, prs_headline from pg_ts_parser where oid = $1" , 1, arg ) );
+       if ( !plan_getparser ) 
+           ts_error(ERROR, "SPI_prepare() failed");
+   }
+
+   stat = SPI_execp(plan_getparser, pars, " ", 1);
+   if ( stat < 0 )
+       ts_error (ERROR, "SPI_execp return %d", stat);
+   if ( SPI_processed > 0 ) {
+       Oid oid=InvalidOid;
+       oid=DatumGetObjectId( SPI_getbinval(SPI_tuptable->vals[0], SPI_tuptable->tupdesc, 1, &isnull) );
+       fmgr_info_cxt(oid, &(prs->start_info), TopMemoryContext);
+       oid=DatumGetObjectId( SPI_getbinval(SPI_tuptable->vals[0], SPI_tuptable->tupdesc, 2, &isnull) );
+       fmgr_info_cxt(oid, &(prs->getlexeme_info), TopMemoryContext);
+       oid=DatumGetObjectId( SPI_getbinval(SPI_tuptable->vals[0], SPI_tuptable->tupdesc, 3, &isnull) );
+       fmgr_info_cxt(oid, &(prs->end_info), TopMemoryContext);
+       prs->lextype=DatumGetObjectId( SPI_getbinval(SPI_tuptable->vals[0], SPI_tuptable->tupdesc, 4, &isnull) );
+       oid=DatumGetObjectId( SPI_getbinval(SPI_tuptable->vals[0], SPI_tuptable->tupdesc, 5, &isnull) );
+       fmgr_info_cxt(oid, &(prs->headline_info), TopMemoryContext);
+       prs->prs_id=id;
+   } else 
+       ts_error(ERROR, "No parser with id %d", id);
+   SPI_finish();
+}
+
+typedef struct {
+   WParserInfo *last_prs;
+   int     len;
+   int     reallen;
+   WParserInfo *list;
+   SNMap       name2id_map;
+} PrsList;
+
+static PrsList PList = {NULL,0,0,NULL,{0,0,NULL}};
+
+void    
+reset_prs(void) {
+   freeSNMap( &(PList.name2id_map) );
+   if ( PList.list )
+       free(PList.list);
+   memset(&PList,0,sizeof(PrsList));
+}
+
+static int
+compareprs(const void *a, const void *b) {
+   return ((WParserInfo*)a)->prs_id - ((WParserInfo*)b)->prs_id;
+}
+
+WParserInfo *
+findprs(Oid id) {
+   /* last used prs */
+   if ( PList.last_prs && PList.last_prs->prs_id==id )
+       return PList.last_prs;
+
+   /* already used prs */
+   if ( PList.len != 0 ) {
+       WParserInfo key;
+       key.prs_id=id;
+       PList.last_prs = bsearch(&key, PList.list, PList.len, sizeof(WParserInfo), compareprs);
+       if ( PList.last_prs != NULL )
+           return PList.last_prs;
+   }
+
+   /* last chance */
+   if ( PList.len==PList.reallen ) {
+       WParserInfo *tmp;
+       int reallen = ( PList.reallen ) ? 2*PList.reallen : 16;
+       tmp=(WParserInfo*)realloc(PList.list,sizeof(WParserInfo)*reallen);
+       if ( !tmp ) 
+           ts_error(ERROR,"No memory");
+       PList.reallen=reallen;
+       PList.list=tmp;
+   }
+   PList.last_prs=&(PList.list[PList.len]);
+   init_prs(id, PList.last_prs);
+   PList.len++;
+   qsort(PList.list, PList.len, sizeof(WParserInfo), compareprs);
+   return findprs(id); /* qsort changed order!! */;
+}
+
+static void *plan_name2id=NULL;
+
+Oid
+name2id_prs(text *name) {
+   Oid arg[1]={ TEXTOID };
+   bool isnull;
+   Datum pars[1]={ PointerGetDatum(name) };
+   int stat;
+   Oid id=findSNMap_t( &(PList.name2id_map), name );
+   
+   if ( id ) 
+       return id;
+   
+
+   SPI_connect();
+   if ( !plan_name2id ) {
+       plan_name2id = SPI_saveplan( SPI_prepare( "select oid from pg_ts_parser where prs_name = $1" , 1, arg ) );
+       if ( !plan_name2id ) 
+           ts_error(ERROR, "SPI_prepare() failed");
+   }
+
+   stat = SPI_execp(plan_name2id, pars, " ", 1);
+   if ( stat < 0 )
+       ts_error (ERROR, "SPI_execp return %d", stat);
+   if ( SPI_processed > 0 )
+       id=DatumGetObjectId( SPI_getbinval(SPI_tuptable->vals[0], SPI_tuptable->tupdesc, 1, &isnull) );
+   else 
+       ts_error(ERROR, "No parser '%s'", text2char(name));
+   SPI_finish();
+   addSNMap_t( &(PList.name2id_map), name, id );
+   return id;
+}
+
+
+/******sql-level interface******/
+typedef struct {
+   int     cur;
+   LexDescr    *list;
+} TypeStorage;
+
+static void
+setup_firstcall(FuncCallContext  *funcctx, Oid prsid) {
+   TupleDesc            tupdesc;
+   MemoryContext     oldcontext;
+   TypeStorage     *st;
+   WParserInfo *prs = findprs(prsid); 
+
+   oldcontext = MemoryContextSwitchTo(funcctx->multi_call_memory_ctx);
+
+   st=(TypeStorage*)palloc( sizeof(TypeStorage) );
+   st->cur=0;
+   st->list = (LexDescr*)DatumGetPointer(
+       OidFunctionCall1( prs->lextype, PointerGetDatum(prs->prs) )
+   );
+   funcctx->user_fctx = (void*)st;
+   tupdesc = RelationNameGetTupleDesc("tokentype");
+   funcctx->slot = TupleDescGetSlot(tupdesc);
+   funcctx->attinmeta = TupleDescGetAttInMetadata(tupdesc);
+   MemoryContextSwitchTo(oldcontext);
+}
+
+static Datum
+process_call(FuncCallContext  *funcctx) {
+   TypeStorage     *st;
+
+   st=(TypeStorage*)funcctx->user_fctx;
+   if (  st->list && st->list[st->cur].lexid ) {
+       Datum result;
+       char* values[3];
+       char    txtid[16];
+       HeapTuple    tuple;
+
+       values[0]=txtid;
+       sprintf(txtid,"%d",st->list[st->cur].lexid);
+       values[1]=st->list[st->cur].alias;
+       values[2]=st->list[st->cur].descr;
+
+       tuple = BuildTupleFromCStrings(funcctx->attinmeta, values);
+       result = TupleGetDatum(funcctx->slot, tuple);
+
+       pfree(values[1]);
+       pfree(values[2]);
+       st->cur++;
+       return result;
+   } else {
+       if ( st->list ) pfree(st->list);
+       pfree(st);
+   }
+   return (Datum)0;
+}
+
+PG_FUNCTION_INFO_V1(token_type);
+Datum token_type(PG_FUNCTION_ARGS);
+
+Datum
+token_type(PG_FUNCTION_ARGS) {
+   FuncCallContext  *funcctx;
+   Datum result;
+
+   if (SRF_IS_FIRSTCALL()) { 
+       funcctx = SRF_FIRSTCALL_INIT();
+       setup_firstcall(funcctx, PG_GETARG_OID(0) );
+   }
+
+   funcctx = SRF_PERCALL_SETUP();
+
+   if (  (result=process_call(funcctx)) != (Datum)0 )
+       SRF_RETURN_NEXT(funcctx, result);
+   SRF_RETURN_DONE(funcctx);
+}
+
+PG_FUNCTION_INFO_V1(token_type_byname);
+Datum token_type_byname(PG_FUNCTION_ARGS);
+Datum
+token_type_byname(PG_FUNCTION_ARGS) {
+   FuncCallContext  *funcctx;
+   Datum result;
+
+   if (SRF_IS_FIRSTCALL()) {
+       text *name = PG_GETARG_TEXT_P(0); 
+       funcctx = SRF_FIRSTCALL_INIT();
+       setup_firstcall(funcctx, name2id_prs( name ) );
+       PG_FREE_IF_COPY(name,0);
+   }
+
+   funcctx = SRF_PERCALL_SETUP();
+
+   if (  (result=process_call(funcctx)) != (Datum)0 )
+       SRF_RETURN_NEXT(funcctx, result);
+   SRF_RETURN_DONE(funcctx);
+}
+
+PG_FUNCTION_INFO_V1(token_type_current);
+Datum token_type_current(PG_FUNCTION_ARGS);
+Datum
+token_type_current(PG_FUNCTION_ARGS) {
+   FuncCallContext  *funcctx;
+   Datum result;
+
+   if (SRF_IS_FIRSTCALL()) {
+       funcctx = SRF_FIRSTCALL_INIT();
+       if ( current_parser_id==InvalidOid ) 
+           current_parser_id = name2id_prs( char2text("default") );
+       setup_firstcall(funcctx, current_parser_id );
+   }
+
+   funcctx = SRF_PERCALL_SETUP();
+
+   if (  (result=process_call(funcctx)) != (Datum)0 )
+       SRF_RETURN_NEXT(funcctx, result);
+   SRF_RETURN_DONE(funcctx);
+}
+
+
+PG_FUNCTION_INFO_V1(set_curprs);
+Datum set_curprs(PG_FUNCTION_ARGS);
+Datum
+set_curprs(PG_FUNCTION_ARGS) {
+        findprs(PG_GETARG_OID(0));
+        current_parser_id=PG_GETARG_OID(0);
+        PG_RETURN_VOID();
+}
+
+PG_FUNCTION_INFO_V1(set_curprs_byname);
+Datum set_curprs_byname(PG_FUNCTION_ARGS);
+Datum
+set_curprs_byname(PG_FUNCTION_ARGS) {
+        text *name=PG_GETARG_TEXT_P(0);
+    
+        DirectFunctionCall1(
+                set_curprs,
+                ObjectIdGetDatum( name2id_prs(name) )
+        );
+        PG_FREE_IF_COPY(name, 0);
+        PG_RETURN_VOID();
+}
+
+typedef struct {
+   int type;
+   char    *lexem;
+} LexemEntry;
+
+typedef struct {
+   int cur;
+   int len;
+   LexemEntry  *list;
+} PrsStorage;
+   
+
+static void
+prs_setup_firstcall(FuncCallContext  *funcctx, int prsid, text *txt) {
+   TupleDesc            tupdesc;
+   MemoryContext     oldcontext;
+   PrsStorage  *st;
+   WParserInfo *prs = findprs(prsid); 
+   char    *lex=NULL;
+   int     llen=0, type=0; 
+
+   oldcontext = MemoryContextSwitchTo(funcctx->multi_call_memory_ctx);
+
+   st=(PrsStorage*)palloc( sizeof(PrsStorage) );
+   st->cur=0;
+   st->len=16;
+   st->list=(LexemEntry*)palloc( sizeof(LexemEntry)*st->len );
+
+   prs->prs = (void*)DatumGetPointer(
+       FunctionCall2(
+           &(prs->start_info),
+           PointerGetDatum(VARDATA(txt)),
+           Int32GetDatum(VARSIZE(txt)-VARHDRSZ)
+       )
+   );
+
+   while( ( type=DatumGetInt32(FunctionCall3(
+           &(prs->getlexeme_info),
+           PointerGetDatum(prs->prs),
+           PointerGetDatum(&lex),
+           PointerGetDatum(&llen))) ) != 0 ) {
+
+       if ( st->cur>=st->len ) {
+           st->len=2*st->len;
+           st->list=(LexemEntry*)repalloc(st->list, sizeof(LexemEntry)*st->len);
+       }
+       st->list[st->cur].lexem = palloc(llen+1);
+       memcpy( st->list[st->cur].lexem, lex, llen);
+       st->list[st->cur].lexem[llen]='\0';
+       st->list[st->cur].type=type;
+       st->cur++;
+   }
+       
+   FunctionCall1(
+       &(prs->end_info),
+       PointerGetDatum(prs->prs)
+   );
+
+   st->len=st->cur;
+   st->cur=0;
+   
+   funcctx->user_fctx = (void*)st;
+   tupdesc = RelationNameGetTupleDesc("tokenout");
+   funcctx->slot = TupleDescGetSlot(tupdesc);
+   funcctx->attinmeta = TupleDescGetAttInMetadata(tupdesc);
+   MemoryContextSwitchTo(oldcontext);
+}
+
+static Datum
+prs_process_call(FuncCallContext  *funcctx) {
+   PrsStorage  *st;
+
+   st=(PrsStorage*)funcctx->user_fctx;
+   if (  st->cur < st->len ) {
+       Datum result;
+       char* values[2];
+       char    tid[16];
+       HeapTuple    tuple;
+
+       values[0]=tid;
+       sprintf(tid,"%d",st->list[st->cur].type);
+       values[1]=st->list[st->cur].lexem;
+       tuple = BuildTupleFromCStrings(funcctx->attinmeta, values);
+       result = TupleGetDatum(funcctx->slot, tuple);
+
+       pfree(values[1]);
+       st->cur++;
+       return result;
+   } else {
+       if ( st->list ) pfree(st->list);
+       pfree(st);
+   }
+   return (Datum)0;
+}
+
+           
+
+PG_FUNCTION_INFO_V1(parse);
+Datum parse(PG_FUNCTION_ARGS);
+Datum
+parse(PG_FUNCTION_ARGS) {
+   FuncCallContext  *funcctx;
+   Datum result;
+
+   if (SRF_IS_FIRSTCALL()) {
+       text *txt = PG_GETARG_TEXT_P(1); 
+       funcctx = SRF_FIRSTCALL_INIT();
+       prs_setup_firstcall(funcctx, PG_GETARG_OID(0),txt );
+       PG_FREE_IF_COPY(txt,1);
+   }
+
+   funcctx = SRF_PERCALL_SETUP();
+
+   if (  (result=prs_process_call(funcctx)) != (Datum)0 )
+       SRF_RETURN_NEXT(funcctx, result);
+   SRF_RETURN_DONE(funcctx);
+}
+
+PG_FUNCTION_INFO_V1(parse_byname);
+Datum parse_byname(PG_FUNCTION_ARGS);
+Datum
+parse_byname(PG_FUNCTION_ARGS) {
+   FuncCallContext  *funcctx;
+   Datum result;
+
+   if (SRF_IS_FIRSTCALL()) {
+       text *name = PG_GETARG_TEXT_P(0); 
+       text *txt = PG_GETARG_TEXT_P(1); 
+       funcctx = SRF_FIRSTCALL_INIT();
+       prs_setup_firstcall(funcctx, name2id_prs( name ),txt );
+       PG_FREE_IF_COPY(name,0);
+       PG_FREE_IF_COPY(txt,1);
+   }
+
+   funcctx = SRF_PERCALL_SETUP();
+
+   if (  (result=prs_process_call(funcctx)) != (Datum)0 )
+       SRF_RETURN_NEXT(funcctx, result);
+   SRF_RETURN_DONE(funcctx);
+}
+
+
+PG_FUNCTION_INFO_V1(parse_current);
+Datum parse_current(PG_FUNCTION_ARGS);
+Datum
+parse_current(PG_FUNCTION_ARGS) {
+   FuncCallContext  *funcctx;
+   Datum result;
+
+   if (SRF_IS_FIRSTCALL()) {
+       text *txt = PG_GETARG_TEXT_P(0); 
+       funcctx = SRF_FIRSTCALL_INIT();
+       if ( current_parser_id==InvalidOid ) 
+           current_parser_id = name2id_prs( char2text("default") );
+       prs_setup_firstcall(funcctx, current_parser_id,txt );
+       PG_FREE_IF_COPY(txt,0);
+   }
+
+   funcctx = SRF_PERCALL_SETUP();
+
+   if (  (result=prs_process_call(funcctx)) != (Datum)0 )
+       SRF_RETURN_NEXT(funcctx, result);
+   SRF_RETURN_DONE(funcctx);
+}
+
+PG_FUNCTION_INFO_V1(headline);
+Datum headline(PG_FUNCTION_ARGS);
+Datum
+headline(PG_FUNCTION_ARGS) {
+   TSCfgInfo *cfg=findcfg(PG_GETARG_OID(0));
+   text       *in = PG_GETARG_TEXT_P(1);
+   QUERYTYPE  *query = (QUERYTYPE *) DatumGetPointer(PG_DETOAST_DATUM(PG_GETARG_DATUM(2)));
+   text       *opt=( PG_NARGS()>3 && PG_GETARG_POINTER(3) ) ? PG_GETARG_TEXT_P(3) : NULL;
+   HLPRSTEXT   prs;
+   text *out;
+   WParserInfo *prsobj = findprs(cfg->prs_id);
+
+   memset(&prs,0,sizeof(HLPRSTEXT));
+   prs.lenwords = 32;
+   prs.words = (HLWORD *) palloc(sizeof(HLWORD) * prs.lenwords);
+   hlparsetext(cfg, &prs, query, VARDATA(in), VARSIZE(in) - VARHDRSZ);
+
+
+   FunctionCall3(
+       &(prsobj->headline_info),
+       PointerGetDatum(&prs),
+       PointerGetDatum(opt),
+       PointerGetDatum(query)
+   );
+
+   out = genhl(&prs);
+
+   PG_FREE_IF_COPY(in,1);
+   PG_FREE_IF_COPY(query,2);
+   if ( opt ) PG_FREE_IF_COPY(opt,3);
+   pfree(prs.words);
+   pfree(prs.startsel);
+   pfree(prs.stopsel);
+
+   PG_RETURN_POINTER(out);
+}
+
+
+PG_FUNCTION_INFO_V1(headline_byname);
+Datum headline_byname(PG_FUNCTION_ARGS);
+Datum
+headline_byname(PG_FUNCTION_ARGS) {
+   text *cfg=PG_GETARG_TEXT_P(0);
+
+   Datum out=DirectFunctionCall4(
+       headline,
+       ObjectIdGetDatum(name2id_cfg( cfg ) ),
+       PG_GETARG_DATUM(1),
+       PG_GETARG_DATUM(2),
+       ( PG_NARGS()>3 ) ? PG_GETARG_DATUM(3) : PointerGetDatum(NULL)
+   );
+
+   PG_FREE_IF_COPY(cfg,0);
+   PG_RETURN_DATUM(out);   
+}
+
+PG_FUNCTION_INFO_V1(headline_current);
+Datum headline_current(PG_FUNCTION_ARGS);
+Datum
+headline_current(PG_FUNCTION_ARGS) {
+   PG_RETURN_DATUM(DirectFunctionCall4(
+       headline,
+       ObjectIdGetDatum(get_currcfg()),
+       PG_GETARG_DATUM(0),
+       PG_GETARG_DATUM(1),
+       ( PG_NARGS()>2 ) ? PG_GETARG_DATUM(2) : PointerGetDatum(NULL)
+   ));
+}
+
+
+


diff --git a/contrib/tsearch2/wparser.h b/contrib/tsearch2/wparser.h

new file mode 100644 (file)

index 0000000..a8afc56


--- /dev/null
+++ b/contrib/tsearch2/wparser.h
@@ -0,0 +1,28 @@
+#ifndef __WPARSER_H__
+#define __WPARSER_H__
+#include "postgres.h"
+#include "fmgr.h"
+
+typedef struct {
+   Oid prs_id;
+   FmgrInfo start_info;
+   FmgrInfo getlexeme_info;
+   FmgrInfo end_info;
+   FmgrInfo headline_info;
+   Oid lextype;
+   void *prs;
+} WParserInfo;
+
+void init_prs(Oid id, WParserInfo *prs);
+WParserInfo* findprs(Oid id);
+Oid name2id_prs(text *name);
+void   reset_prs(void);
+
+
+typedef struct {
+   int lexid;
+   char    *alias;
+   char    *descr;
+} LexDescr;
+
+#endif


diff --git a/contrib/tsearch2/wparser_def.c b/contrib/tsearch2/wparser_def.c

new file mode 100644 (file)

index 0000000..eec8b03


--- /dev/null
+++ b/contrib/tsearch2/wparser_def.c
@@ -0,0 +1,291 @@
+/* 
+ * default word parser 
+ * Teodor Sigaev 
+ */
+#include 
+#include 
+#include 
+
+#include "postgres.h"
+#include "utils/builtins.h"
+
+#include "dict.h"
+#include "wparser.h"
+#include "common.h"
+#include "ts_cfg.h"
+#include "wordparser/parser.h"
+#include "wordparser/deflex.h"
+
+PG_FUNCTION_INFO_V1(prsd_lextype);
+Datum prsd_lextype(PG_FUNCTION_ARGS);
+
+Datum 
+prsd_lextype(PG_FUNCTION_ARGS) {
+   LexDescr *descr=(LexDescr*)palloc(sizeof(LexDescr)*(LASTNUM+1));
+   int i;
+
+   for(i=1;i<=LASTNUM;i++) {
+       descr[i-1].lexid = i;
+       descr[i-1].alias = pstrdup(tok_alias[i]);
+       descr[i-1].descr = pstrdup(lex_descr[i]);
+   }
+   
+   descr[LASTNUM].lexid=0;
+       
+   PG_RETURN_POINTER(descr);
+}
+
+PG_FUNCTION_INFO_V1(prsd_start);
+Datum prsd_start(PG_FUNCTION_ARGS);
+Datum 
+prsd_start(PG_FUNCTION_ARGS) {
+   start_parse_str( (char*)PG_GETARG_POINTER(0), PG_GETARG_INT32(1) );
+   PG_RETURN_POINTER(NULL);
+}
+
+PG_FUNCTION_INFO_V1(prsd_getlexeme);
+Datum prsd_getlexeme(PG_FUNCTION_ARGS);
+Datum 
+prsd_getlexeme(PG_FUNCTION_ARGS) {
+   /* ParserState *p=(ParserState*)PG_GETARG_POINTER(0); */
+   char **t=(char**)PG_GETARG_POINTER(1); 
+   int *tlen=(int*)PG_GETARG_POINTER(2);
+   int  type=tsearch2_yylex();
+
+   *t = token;
+   *tlen = tokenlen;
+   PG_RETURN_INT32(type);
+}
+
+PG_FUNCTION_INFO_V1(prsd_end);
+Datum prsd_end(PG_FUNCTION_ARGS);
+Datum 
+prsd_end(PG_FUNCTION_ARGS) {
+   /* ParserState *p=(ParserState*)PG_GETARG_POINTER(0); */
+   end_parse();
+   PG_RETURN_VOID();
+}
+
+#define LEAVETOKEN(x)  ( (x)==12 )
+#define COMPLEXTOKEN(x)    ( (x)==5 || (x)==15 || (x)==16 || (x)==17 )
+#define ENDPUNCTOKEN(x)    ( (x)==12 )
+
+
+#define IDIGNORE(x) ( (x)==13 || (x)==14 || (x)==12 || (x)==23 )
+#define HLIDIGNORE(x) ( (x)==5 || (x)==13 || (x)==15 || (x)==16 || (x)==17 )
+#define NONWORDTOKEN(x)    ( (x)==12 || HLIDIGNORE(x) )
+#define NOENDTOKEN(x)  ( NONWORDTOKEN(x) || (x)==7 || (x)==8 || (x)==20 || (x)==21 || (x)==22 || IDIGNORE(x) )
+
+typedef struct {
+   HLWORD  *words;
+   int len;
+} hlCheck;
+
+static bool
+checkcondition_HL(void *checkval, ITEM *val) {
+   int i;
+   for(i=0;i<((hlCheck*)checkval)->len;i++) {
+       if ( ((hlCheck*)checkval)->words[i].item==val )
+           return true;
+   }
+   return false;
+}
+
+
+static bool
+hlCover(HLPRSTEXT *prs, QUERYTYPE *query, int *p, int *q) {
+   int i,j;
+   ITEM    *item=GETQUERY(query);
+   int pos=*p;
+   *q=0;
+   *p=0x7fffffff;
+
+   for(j=0;jsize;j++) {
+       if ( item->type != VAL ) {
+           item++;
+           continue;
+       }
+       for(i=pos;icurwords;i++) {
+           if ( prs->words[i].item == item ) {
+               if ( i>*q) 
+                   *q = i;
+               break;
+           }
+       }
+       item++;
+   }
+
+   if ( *q==0 )
+       return false;
+
+   item=GETQUERY(query);
+   for(j=0;jsize;j++) {
+       if ( item->type != VAL ) {
+           item++;
+           continue;
+       }
+       for(i=*q;i>=pos;i--) {
+           if ( prs->words[i].item == item ) {
+               if ( i<*p )
+                   *p=i;
+               break;
+           }
+       }
+       item++;
+   }   
+
+   if ( *p<=*q ) {
+       hlCheck ch={ &(prs->words[*p]), *q-*p+1 };
+       if ( TS_execute(GETQUERY(query), &ch, false, checkcondition_HL) ) { 
+           return true;
+       } else {
+           (*p)++;
+           return hlCover(prs,query,p,q);
+       }
+   }
+
+   return false;
+}
+
+PG_FUNCTION_INFO_V1(prsd_headline);
+Datum prsd_headline(PG_FUNCTION_ARGS);
+Datum 
+prsd_headline(PG_FUNCTION_ARGS) {
+   HLPRSTEXT   *prs=(HLPRSTEXT*)PG_GETARG_POINTER(0);
+   text    *opt=(text*)PG_GETARG_POINTER(1); /* can't be toasted */
+   QUERYTYPE   *query=(QUERYTYPE*)PG_GETARG_POINTER(2); /* can't be toasted */
+   /* from opt + start and and tag */
+   int min_words=15;   
+   int max_words=35;   
+   int shortword=3;    
+
+   int p=0,q=0;
+   int bestb=-1,beste=-1;
+   int bestlen=-1;
+   int pose=0, poslen, curlen;
+
+   int i;
+
+   /*config*/
+   prs->startsel=NULL;
+   prs->stopsel=NULL;
+   if ( opt ) {
+       Map *map,*mptr;
+       
+       parse_cfgdict(opt,&map);
+       mptr=map;
+
+       while(mptr && mptr->key) {
+           if ( strcasecmp(mptr->key,"MaxWords")==0 )
+               max_words=pg_atoi(mptr->value,4,1);
+           else if ( strcasecmp(mptr->key,"MinWords")==0 )
+               min_words=pg_atoi(mptr->value,4,1);
+           else if ( strcasecmp(mptr->key,"ShortWord")==0 )
+               shortword=pg_atoi(mptr->value,4,1);
+           else if ( strcasecmp(mptr->key,"StartSel")==0 )
+               prs->startsel=pstrdup(mptr->value);
+           else if ( strcasecmp(mptr->key,"StopSel")==0 )
+               prs->stopsel=pstrdup(mptr->value);
+               
+           pfree(mptr->key);
+           pfree(mptr->value);
+
+           mptr++;
+       }
+       pfree(map);
+
+       if ( min_words >= max_words )
+           elog(ERROR,"Must be MinWords < MaxWords");
+       if ( min_words<=0 )
+           elog(ERROR,"Must be MinWords > 0");
+       if ( shortword<0 )
+           elog(ERROR,"Must be ShortWord >= 0");
+   }
+
+   while( hlCover(prs,query,&p,&q) ) {
+       /* find cover len in words */
+       curlen=0;
+       poslen=0;
+       for(i=p;i<=q && curlen < max_words ; i++) {
+           if ( !NONWORDTOKEN(prs->words[i].type) ) 
+               curlen++;
+           if ( prs->words[i].item && !prs->words[i].repeated )
+               poslen++; 
+           pose=i;
+       }
+
+       if ( poslenwords[beste].type) || prs->words[beste].len <= shortword) ) { 
+           /* best already finded, so try one more cover */
+           p++;
+           continue;
+       }
+
+       if ( curlen < max_words ) { /* find good end */
+           for(i=i-1 ;icurwords && curlen
+               if ( i!=q ) {
+                   if ( !NONWORDTOKEN(prs->words[i].type) ) 
+                       curlen++;
+                   if ( prs->words[i].item && !prs->words[i].repeated )
+                       poslen++;
+               }
+               pose=i;
+               if ( NOENDTOKEN(prs->words[i].type) || prs->words[i].len <= shortword ) 
+                   continue;
+               if ( curlen>=min_words )    
+                   break;
+           }
+       } else { /* shorter cover :((( */
+           for(;curlen>min_words;i--) {
+               if ( !NONWORDTOKEN(prs->words[i].type) ) 
+                   curlen--;
+               if ( prs->words[i].item && !prs->words[i].repeated )
+                   poslen--;
+               pose=i;
+               if ( NOENDTOKEN(prs->words[i].type) || prs->words[i].len <= shortword ) 
+                   continue;
+               break;
+           }
+       }
+
+       if ( bestlen <0 || (poslen>bestlen && !(NOENDTOKEN(prs->words[pose].type) || prs->words[pose].len <= shortword)) || 
+               ( bestlen>=0 && !(NOENDTOKEN(prs->words[pose].type)  || prs->words[pose].len <= shortword) && 
+                   (NOENDTOKEN(prs->words[beste].type) || prs->words[beste].len <= shortword) ) ) {
+           bestb=p; beste=pose;
+           bestlen=poslen;
+       } 
+
+       p++;
+   }
+
+   if ( bestlen<0 ) {
+       curlen=0;
+       poslen=0;
+       for(i=0;icurwords && curlen
+           if ( !NONWORDTOKEN(prs->words[i].type) ) 
+               curlen++;
+           pose=i;
+       }
+       bestb=0; beste=pose;
+   }
+
+   for(i=bestb;i<=beste;i++) {
+       if ( prs->words[i].item )
+           prs->words[i].selected=1;
+       if ( prs->words[i].repeated )
+           prs->words[i].skip=1;
+       if ( HLIDIGNORE(prs->words[i].type) )
+           prs->words[i].replace=1;
+
+       prs->words[i].in=1;
+   }
+
+   if (!prs->startsel)
+       prs->startsel=pstrdup("");

+   if (!prs->stopsel)
+       prs->stopsel=pstrdup("");
+        prs->startsellen=strlen(prs->startsel);
+   prs->stopsellen=strlen(prs->stopsel);
+
+   PG_RETURN_POINTER(prs);
+}
+




This is the main PostgreSQL git repository.
RSS
Atom
+                   dist = abs( post[l].pos - ct[p].pos );
+                   if ( dist || (dist==0 && (pos[i]==(uint16*)POSNULL || pos[k]==(uint16*)POSNULL) ) ) {
+                       float curw; 
+                       if ( !dist ) dist=MAXENTRYPOS;  
+                       curw= sqrt( wpos(&(post[l])) * wpos( &(ct[p]) ) * word_distance(dist) );
+                       res = ( res < 0 ) ? curw : 1.0 - ( 1.0 - res ) * ( 1.0 - curw );
+                   }
+               }
+           }
+       }
+   }
+   pfree(pos);
+   return res; 
+}
+
+static float
+calc_rank_or(float *w, tsvector *t, QUERYTYPE *q) {
+   WordEntry *entry;
+   WordEntryPos    *post;
+   int4    dimt,j,i;
+   float res=-1.0;
+   ITEM    *item=GETQUERY(q);
+
+   *(uint16*)POSNULL = lengthof(POSNULL)-1;
+
+   for(i=0; isize; i++) {
+       if ( item[i].type != VAL )
+           continue;
+
+       entry=find_wordentry(t,q,&(item[i]));
+       if ( !entry )
+           continue;
+
+       if ( entry->haspos ) {
+           dimt = POSDATALEN(t,entry);
+           post = POSDATAPTR(t,entry);
+       } else {
+           dimt = *(uint16*)POSNULL;
+           post = POSNULL+1;
+       }
+
+       for(j=0;j
+           if ( res < 0 )
+               res = wpos( &(post[j]) );
+           else
+               res = 1.0 - ( 1.0-res ) * ( 1.0-wpos( &(post[j]) ) );
+       }
+   }
+   return res;
+}
+
+static float
+calc_rank(float *w, tsvector *t, QUERYTYPE *q, int4 method) {
+   ITEM *item = GETQUERY(q);
+   float res=0.0;
+
+   if (!t->size || !q->size)
+       return 0.0;
+
+   res = ( item->type != VAL && item->val == (int4) '&' ) ?
+       calc_rank_and(w,t,q) : calc_rank_or(w,t,q);
+
+   if ( res < 0 )
+       res = 1e-20;
+
+        switch(method) {
+       case 0: break;
+       case 1: res /= log((float)cnt_length(t)); break;
+       case 2: res /= (float)cnt_length(t); break;
+       default:
+       elog(ERROR,"Unknown normalization method: %d",method);
+        }
+
+   return res;
+}
+
+Datum
+rank(PG_FUNCTION_ARGS) {
+   ArrayType  *win = (ArrayType *) PG_DETOAST_DATUM(PG_GETARG_DATUM(0));
+   tsvector       *txt = (tsvector *) PG_DETOAST_DATUM(PG_GETARG_DATUM(1));
+   QUERYTYPE  *query = (QUERYTYPE *) PG_DETOAST_DATUM(PG_GETARG_DATUM(2));
+   int method=DEF_NORM_METHOD;
+   float res=0.0;
+   float ws[ lengthof(weights) ];
+   int i;
+
+   if ( ARR_NDIM(win) != 1 ) 
+       elog(ERROR,"Array of weight is not one dimentional");
+   if ( ARRNELEMS(win) < lengthof(weights) )
+        elog(ERROR,"Array of weight is too short");
+
+   for(i=0;i
+       ws[ i ] = ( ((float4*)ARR_DATA_PTR(win))[i] >= 0 ) ? ((float4*)ARR_DATA_PTR(win))[i] : weights[i];
+       if ( ws[ i ] > 1.0 ) 
+           elog(ERROR,"Weight out of range");
+   } 
+
+   if ( PG_NARGS() == 4 )
+       method=PG_GETARG_INT32(3);
+
+   res=calc_rank(ws, txt, query, method); 
+       
+   PG_FREE_IF_COPY(win, 0);
+   PG_FREE_IF_COPY(txt, 1);
+   PG_FREE_IF_COPY(query, 2);
+   PG_RETURN_FLOAT4(res);
+}
+
+Datum
+rank_def(PG_FUNCTION_ARGS) {
+   tsvector       *txt = (tsvector *) PG_DETOAST_DATUM(PG_GETARG_DATUM(0));
+   QUERYTYPE  *query = (QUERYTYPE *) PG_DETOAST_DATUM(PG_GETARG_DATUM(1));
+   float res=0.0;
+   int method=DEF_NORM_METHOD;
+
+   if ( PG_NARGS() == 3 )
+       method=PG_GETARG_INT32(2);
+
+   res=calc_rank(weights, txt, query, method); 
+       
+   PG_FREE_IF_COPY(txt, 0);
+   PG_FREE_IF_COPY(query, 1);
+   PG_RETURN_FLOAT4(res);
+}
+
+
+typedef struct {
+   ITEM    *item;
+   int32   pos;
+} DocRepresentation;
+
+static int
+compareDocR(const void *a, const void *b) {
+   if ( ((DocRepresentation *) a)->pos == ((DocRepresentation *) b)->pos )
+       return 1;
+   return ( ((DocRepresentation *) a)->pos > ((DocRepresentation *) b)->pos ) ? 1 : -1;
+}
+
+
+typedef struct {
+   DocRepresentation *doc;
+   int len;
+}  ChkDocR;
+
+static bool
+checkcondition_DR(void *checkval, ITEM *val) {
+   DocRepresentation *ptr = ((ChkDocR*)checkval)->doc;
+
+   while( ptr - ((ChkDocR*)checkval)->doc < ((ChkDocR*)checkval)->len ) {
+       if ( val == ptr->item )
+           return true;
+       ptr++;
+   }   
+
+   return false;
+}
+
+
+static bool
+Cover(DocRepresentation *doc, int len, QUERYTYPE *query, int *pos, int *p, int *q) {
+   int i;
+   DocRepresentation   *ptr,*f=(DocRepresentation*)0xffffffff;
+   ITEM    *item=GETQUERY(query);
+   int lastpos=*pos;
+   int oldq=*q;
+
+   *p=0x7fffffff;
+   *q=0;
+
+   for(i=0; isize; i++) {
+       if ( item->type != VAL ) {
+           item++;
+           continue;
+       }
+       ptr = doc + *pos;
+
+       while(ptr-doc
+           if ( ptr->item == item ) {
+               if ( ptr->pos > *q ) {
+                   *q = ptr->pos;
+                   lastpos= ptr - doc;
+               } 
+               break;
+           } 
+           ptr++;
+       }
+
+       item++;
+   }
+
+   if (*q==0 )
+       return false;
+
+   if (*q==oldq) { /* already check this pos */
+       (*pos)++;
+       return Cover(doc, len, query, pos,p,q);
+   } 
+
+   item=GETQUERY(query);
+   for(i=0; isize; i++) {
+       if ( item->type != VAL ) {
+           item++;
+           continue;
+       }
+       ptr = doc + lastpos;
+
+       while(ptr>=doc+*pos) {
+           if ( ptr->item == item ) {
+               if ( ptr->pos < *p ) {
+                   *p = ptr->pos;
+                   f=ptr;
+               }
+               break;
+           }
+           ptr--;
+       }
+       item++;
+   }
+ 
+   if ( *p<=*q ) {
+       ChkDocR ch = { f, (doc + lastpos)-f+1 };
+       *pos = f-doc+1;
+       if ( TS_execute(GETQUERY(query), &ch, false, checkcondition_DR) ) { 
+ /*elog(NOTICE,"OP:%d NP:%d P:%d Q:%d", *pos, lastpos, *p, *q);*/ 
+           return true;
+       } else
+           return Cover(doc, len, query, pos,p,q); 
+   }
+ 
+   return false;
+}
+
+static DocRepresentation*
+get_docrep(tsvector     *txt, QUERYTYPE  *query, int *doclen) {
+   ITEM    *item=GETQUERY(query);
+   WordEntry *entry;
+   WordEntryPos    *post;
+   int4    dimt,j,i;
+   int len=query->size*4,cur=0;
+   DocRepresentation *doc;
+
+   *(uint16*)POSNULL = lengthof(POSNULL)-1;
+   doc = (DocRepresentation*)palloc(sizeof(DocRepresentation)*len);
+   for(i=0; isize; i++) {
+       if ( item[i].type != VAL )
+           continue;
+
+       entry=find_wordentry(txt,query,&(item[i]));
+       if ( !entry )
+           continue;
+
+       if ( entry->haspos ) {
+           dimt = POSDATALEN(txt,entry);
+           post = POSDATAPTR(txt,entry);
+       } else {
+           dimt = *(uint16*)POSNULL;
+           post = POSNULL+1;
+       }
+
+       while( cur+dimt >= len ) {
+           len*=2;
+           doc = (DocRepresentation*)repalloc(doc,sizeof(DocRepresentation)*len);
+       }
+
+       for(j=0;j
+           doc[cur].item=&(item[i]);
+           doc[cur].pos=post[j].pos;
+           cur++;
+       }
+   }
+
+   *doclen=cur;
+   
+   if ( cur>0 ) {
+       if ( cur>1 ) 
+           qsort((void *) doc, cur, sizeof(DocRepresentation), compareDocR);
+       return doc;
+   }
+   
+   pfree(doc);
+   return NULL;
+}
+
+
+Datum
+rank_cd(PG_FUNCTION_ARGS) {
+   int K = PG_GETARG_INT32(0);
+   tsvector       *txt = (tsvector *) PG_DETOAST_DATUM(PG_GETARG_DATUM(1));
+   QUERYTYPE  *query = (QUERYTYPE *) PG_DETOAST_DATUM(PG_GETARG_DATUM(2));
+   int method=DEF_NORM_METHOD;
+   DocRepresentation   *doc;
+   float   res=0.0;
+   int p=0,q=0,len,cur;
+
+   doc = get_docrep(txt, query, &len);
+   if ( !doc ) {
+       PG_FREE_IF_COPY(txt, 1);
+       PG_FREE_IF_COPY(query, 2);
+       PG_RETURN_FLOAT4(0.0);
+   }
+
+   cur=0;
+   if (K<=0)
+       K=4;    
+   while( Cover(doc, len, query, &cur, &p, &q) ) 
+       res += ( q-p+1 > K ) ? ((float)K)/((float)(q-p+1)) : 1.0;
+
+   if ( PG_NARGS() == 4 )
+       method=PG_GETARG_INT32(3);
+
+        switch(method) {
+       case 0: break;
+       case 1: res /= log((float)cnt_length(txt)); break;
+       case 2: res /= (float)cnt_length(txt); break;
+       default:
+       elog(ERROR,"Unknown normalization method: %d",method);
+        }
+
+   pfree(doc);
+   PG_FREE_IF_COPY(txt, 1);
+   PG_FREE_IF_COPY(query, 2);
+
+   PG_RETURN_FLOAT4(res);
+}
+
+
+Datum
+rank_cd_def(PG_FUNCTION_ARGS) {
+   PG_RETURN_DATUM( DirectFunctionCall4(   
+       rank_cd,
+       Int32GetDatum(-1),
+       PG_GETARG_DATUM(0),
+       PG_GETARG_DATUM(1),
+       ( PG_NARGS() == 3 ) ? PG_GETARG_DATUM(2) : Int32GetDatum(DEF_NORM_METHOD)
+   )); 
+}
+
+/**************debug*************/
+
+typedef struct {
+   char    *w;
+   int2    len;
+   int2    pos;
+   int2    start;
+   int2    finish;
+} DocWord;
+
+static int
+compareDocWord(const void *a, const void *b) {
+   if ( ((DocWord *) a)->pos == ((DocWord *) b)->pos )
+       return 1;
+   return ( ((DocWord *) a)->pos > ((DocWord *) b)->pos ) ? 1 : -1;
+}
+
+
+Datum 
+get_covers(PG_FUNCTION_ARGS) {
+   tsvector     *txt = (tsvector *) PG_DETOAST_DATUM(PG_GETARG_DATUM(0));
+   QUERYTYPE  *query = (QUERYTYPE *) PG_DETOAST_DATUM(PG_GETARG_DATUM(1));
+   WordEntry       *pptr=ARRPTR(txt);
+   int i,dlen=0,j,cur=0,len=0,rlen;
+   DocWord *dw,*dwptr;
+   text    *out;
+   char *cptr;
+   DocRepresentation *doc;
+   int pos=0,p,q,olddwpos=0;
+   int ncover=1;
+
+   doc = get_docrep(txt, query, &rlen);
+
+   if ( !doc ) {
+       out=palloc(VARHDRSZ);
+       VARATT_SIZEP(out) = VARHDRSZ;
+       PG_FREE_IF_COPY(txt,0);
+       PG_FREE_IF_COPY(query,1);
+       PG_RETURN_POINTER(out);
+   }
+
+   for(i=0;isize;i++) {
+       if (!pptr[i].haspos)
+           elog(ERROR,"No pos info");
+        dlen += POSDATALEN(txt,&(pptr[i]));
+   }
+
+   dwptr=dw=palloc(sizeof(DocWord)*dlen);
+   memset(dw,0,sizeof(DocWord)*dlen);
+
+   for(i=0;isize;i++) {
+       WordEntryPos    *posdata = POSDATAPTR(txt,&(pptr[i]));
+       for(j=0;j
+           dw[cur].w=STRPTR(txt)+pptr[i].pos;  
+           dw[cur].len=pptr[i].len;    
+           dw[cur].pos=posdata[j].pos;
+           cur++;
+       }
+       len+=(pptr[i].len + 1) * (int)POSDATALEN(txt,&(pptr[i]));
+   }
+   qsort((void *) dw, dlen, sizeof(DocWord), compareDocWord);
+
+   while( Cover(doc, rlen, query, &pos, &p, &q) ) {
+       dwptr=dw+olddwpos;
+       while(dwptr->pos < p && dwptr-dw
+           dwptr++;
+       olddwpos=dwptr-dw;
+       dwptr->start=ncover;
+       while(dwptr->pos < q+1 && dwptr-dw
+           dwptr++;
+       (dwptr-1)->finish=ncover;
+       len+= 4 /* {}+two spaces */ + 2*16 /*numbers*/;
+       ncover++; 
+   } 
+   
+   out=palloc(VARHDRSZ+len);
+   cptr=((char*)out)+VARHDRSZ;
+   dwptr=dw;
+
+   while( dwptr-dw < dlen) {
+       if ( dwptr->start ) {
+           sprintf(cptr,"{%d ",dwptr->start);
+           cptr=strchr(cptr,'\0');
+       }
+       memcpy(cptr,dwptr->w,dwptr->len);
+       cptr+=dwptr->len;
+       *cptr=' ';
+       cptr++;
+       if ( dwptr->finish ) { 
+           sprintf(cptr,"}%d ",dwptr->finish);
+           cptr=strchr(cptr,'\0');
+       }
+       dwptr++;
+   }   
+
+   VARATT_SIZEP(out) = cptr - ((char*)out);
+   
+   pfree(dw);
+   pfree(doc);
+
+   PG_FREE_IF_COPY(txt,0);
+   PG_FREE_IF_COPY(query,1);
+   PG_RETURN_POINTER(out);
+}
+


diff --git a/contrib/tsearch2/rewrite.c b/contrib/tsearch2/rewrite.c

new file mode 100644 (file)

index 0000000..d5bc0f6


--- /dev/null
+++ b/contrib/tsearch2/rewrite.c
@@ -0,0 +1,292 @@
+/*
+ * Rewrite routines of query tree
+ * Teodor Sigaev 
+ */
+
+#include "postgres.h"
+
+#include 
+
+#include "access/gist.h"
+#include "access/itup.h"
+#include "access/rtree.h"
+#include "utils/elog.h"
+#include "utils/palloc.h"
+#include "utils/array.h"
+#include "utils/builtins.h"
+#include "storage/bufpage.h"
+
+#include "query.h"
+#include "rewrite.h"
+
+typedef struct NODE
+{
+   struct NODE *left;
+   struct NODE *right;
+   ITEM       *valnode;
+}  NODE;
+
+/*
+ * make query tree from plain view of query
+ */
+static NODE *
+maketree(ITEM * in)
+{
+   NODE       *node = (NODE *) palloc(sizeof(NODE));
+
+   node->valnode = in;
+   node->right = node->left = NULL;
+   if (in->type == OPR)
+   {
+       node->right = maketree(in + 1);
+       if (in->val != (int4) '!')
+           node->left = maketree(in + in->left);
+   }
+   return node;
+}
+
+typedef struct
+{
+   ITEM       *ptr;
+   int4        len;
+   int4        cur;
+}  PLAINTREE;
+
+static void
+plainnode(PLAINTREE * state, NODE * node)
+{
+   if (state->cur == state->len)
+   {
+       state->len *= 2;
+       state->ptr = (ITEM *) repalloc((void *) state->ptr, state->len * sizeof(ITEM));
+   }
+   memcpy((void *) &(state->ptr[state->cur]), (void *) node->valnode, sizeof(ITEM));
+   if (node->valnode->type == VAL)
+       state->cur++;
+   else if (node->valnode->val == (int4) '!')
+   {
+       state->ptr[state->cur].left = 1;
+       state->cur++;
+       plainnode(state, node->right);
+   }
+   else
+   {
+       int4        cur = state->cur;
+
+       state->cur++;
+       plainnode(state, node->right);
+       state->ptr[cur].left = state->cur - cur;
+       plainnode(state, node->left);
+   }
+   pfree(node);
+}
+
+/*
+ * make plain view of tree from 'normal' view of tree
+ */
+static ITEM *
+plaintree(NODE * root, int4 *len)
+{
+   PLAINTREE   pl;
+
+   pl.cur = 0;
+   pl.len = 16;
+   if (root && (root->valnode->type == VAL || root->valnode->type == OPR))
+   {
+       pl.ptr = (ITEM *) palloc(pl.len * sizeof(ITEM));
+       plainnode(&pl, root);
+   }
+   else
+       pl.ptr = NULL;
+   *len = pl.cur;
+   return pl.ptr;
+}
+
+static void
+freetree(NODE * node)
+{
+   if (!node)
+       return;
+   if (node->left)
+       freetree(node->left);
+   if (node->right)
+       freetree(node->right);
+   pfree(node);
+}
+
+/*
+ * clean tree for ! operator.
+ * It's usefull for debug, but in
+ * other case, such view is used with search in index.
+ * Operator ! always return TRUE
+ */
+static NODE *
+clean_NOT_intree(NODE * node)
+{
+   if (node->valnode->type == VAL)
+       return node;
+
+   if (node->valnode->val == (int4) '!')
+   {
+       freetree(node);
+       return NULL;
+   }
+
+   /* operator & or | */
+   if (node->valnode->val == (int4) '|')
+   {
+       if ((node->left = clean_NOT_intree(node->left)) == NULL ||
+           (node->right = clean_NOT_intree(node->right)) == NULL)
+       {
+           freetree(node);
+           return NULL;
+       }
+   }
+   else
+   {
+       NODE       *res = node;
+
+       node->left = clean_NOT_intree(node->left);
+       node->right = clean_NOT_intree(node->right);
+       if (node->left == NULL && node->right == NULL)
+       {
+           pfree(node);
+           res = NULL;
+       }
+       else if (node->left == NULL)
+       {
+           res = node->right;
+           pfree(node);
+       }
+       else if (node->right == NULL)
+       {
+           res = node->left;
+           pfree(node);
+       }
+       return res;
+   }
+   return node;
+}
+
+ITEM *
+clean_NOT_v2(ITEM * ptr, int4 *len)
+{
+   NODE       *root = maketree(ptr);
+
+   return plaintree(clean_NOT_intree(root), len);
+}
+
+#define V_UNKNOWN  0
+#define V_TRUE     1
+#define V_FALSE        2
+
+/*
+ * Clean query tree from values which is always in
+ * text (stopword)
+ */
+static NODE *
+clean_fakeval_intree(NODE * node, char *result)
+{
+   char        lresult = V_UNKNOWN,
+               rresult = V_UNKNOWN;
+
+   if (node->valnode->type == VAL)
+       return node;
+   else if (node->valnode->type == VALTRUE)
+   {
+       pfree(node);
+       *result = V_TRUE;
+       return NULL;
+   }
+
+
+   if (node->valnode->val == (int4) '!')
+   {
+       node->right = clean_fakeval_intree(node->right, &rresult);
+       if (!node->right)
+       {
+           *result = (rresult == V_TRUE) ? V_FALSE : V_TRUE;
+           freetree(node);
+           return NULL;
+       }
+   }
+   else if (node->valnode->val == (int4) '|')
+   {
+       NODE       *res = node;
+
+       node->left = clean_fakeval_intree(node->left, &lresult);
+       node->right = clean_fakeval_intree(node->right, &rresult);
+       if (lresult == V_TRUE || rresult == V_TRUE)
+       {
+           freetree(node);
+           *result = V_TRUE;
+           return NULL;
+       }
+       else if (lresult == V_FALSE && rresult == V_FALSE)
+       {
+           freetree(node);
+           *result = V_FALSE;
+           return NULL;
+       }
+       else if (lresult == V_FALSE)
+       {
+           res = node->right;
+           pfree(node);
+       }
+       else if (rresult == V_FALSE)
+       {
+           res = node->left;
+           pfree(node);
+       }
+       return res;
+   }
+   else
+   {
+       NODE       *res = node;
+
+       node->left = clean_fakeval_intree(node->left, &lresult);
+       node->right = clean_fakeval_intree(node->right, &rresult);
+       if (lresult == V_FALSE || rresult == V_FALSE)
+       {
+           freetree(node);
+           *result = V_FALSE;
+           return NULL;
+       }
+       else if (lresult == V_TRUE && rresult == V_TRUE)
+       {
+           freetree(node);
+           *result = V_TRUE;
+           return NULL;
+       }
+       else if (lresult == V_TRUE)
+       {
+           res = node->right;
+           pfree(node);
+       }
+       else if (rresult == V_TRUE)
+       {
+           res = node->left;
+           pfree(node);
+       }
+       return res;
+   }
+   return node;
+}
+
+ITEM *
+clean_fakeval_v2(ITEM * ptr, int4 *len)
+{
+   NODE       *root = maketree(ptr);
+   char        result = V_UNKNOWN;
+   NODE       *resroot;
+
+   resroot = clean_fakeval_intree(root, &result);
+   if (result != V_UNKNOWN)
+   {
+       elog(NOTICE, "Query contains only stopword(s) or doesn't contain lexem(s), ignored");
+       *len = 0;
+       return NULL;
+   }
+
+   return plaintree(resroot, len);
+}


diff --git a/contrib/tsearch2/rewrite.h b/contrib/tsearch2/rewrite.h

new file mode 100644 (file)

index 0000000..d47788a


--- /dev/null
+++ b/contrib/tsearch2/rewrite.h
@@ -0,0 +1,7 @@
+#ifndef __REWRITE_H__
+#define __REWRITE_H__
+
+ITEM      *clean_NOT_v2(ITEM * ptr, int4 *len);
+ITEM      *clean_fakeval_v2(ITEM * ptr, int4 *len);
+
+#endif


diff --git a/contrib/tsearch2/snmap.c b/contrib/tsearch2/snmap.c

new file mode 100644 (file)

index 0000000..fe138ad


--- /dev/null
+++ b/contrib/tsearch2/snmap.c
@@ -0,0 +1,75 @@
+/* 
+ * simple but fast map from str to Oid
+ * Teodor Sigaev 
+ */
+#include 
+#include 
+#include 
+
+#include "postgres.h"
+#include "snmap.h"
+#include "common.h"
+
+static int
+compareSNMapEntry(const void *a, const void *b) {
+   return strcmp( ((SNMapEntry*)a)->key, ((SNMapEntry*)b)->key );
+}
+
+void 
+addSNMap( SNMap *map, char *key, Oid value ) {
+   if (map->len>=map->reallen) {
+       SNMapEntry *tmp;
+       int len = (map->reallen) ? 2*map->reallen : 16;
+       tmp=(SNMapEntry*)realloc(map->list, sizeof(SNMapEntry) * len);
+       if ( !tmp )
+           elog(ERROR, "No memory");
+       map->reallen=len;
+       map->list=tmp;
+   }
+   map->list[ map->len ].key = strdup(key);
+   if ( ! map->list[ map->len ].key )
+       elog(ERROR, "No memory");
+   map->list[ map->len ].value=value;
+   map->len++;
+   if ( map->len>1 ) qsort(map->list, map->len, sizeof(SNMapEntry), compareSNMapEntry);
+}
+
+void 
+addSNMap_t( SNMap *map, text *key, Oid value ) {
+   char *k=text2char( key );
+   addSNMap(map, k, value);
+   pfree(k);
+}
+
+Oid 
+findSNMap( SNMap *map, char *key ) {
+   SNMapEntry *ptr;
+   SNMapEntry ks = {key, 0};
+   if ( map->len==0 || !map->list )
+       return 0;   
+   ptr = (SNMapEntry*) bsearch(&ks, map->list, map->len, sizeof(SNMapEntry), compareSNMapEntry);
+   return (ptr) ? ptr->value : 0;
+}
+
+Oid  
+findSNMap_t( SNMap *map, text *key ) {
+   char *k=text2char(key);
+   int res;
+   res= findSNMap(map, k);
+   pfree(k);
+   return res;
+}
+
+void freeSNMap( SNMap *map ) {
+   SNMapEntry *entry=map->list;
+   if ( map->list ) {
+       while( map->len ) {
+           if ( entry->key ) free(entry->key);
+           entry++; map->len--;
+       }
+       free( map->list );
+   }
+   memset(map,0,sizeof(SNMap));
+}
+
+


diff --git a/contrib/tsearch2/snmap.h b/contrib/tsearch2/snmap.h

new file mode 100644 (file)

index 0000000..b485601


--- /dev/null
+++ b/contrib/tsearch2/snmap.h
@@ -0,0 +1,23 @@
+#ifndef __SNMAP_H__
+#define __SNMAP_H__
+
+#include "postgres.h"
+
+typedef struct {
+   char    *key;
+   Oid value;
+} SNMapEntry;
+
+typedef struct {
+   int len;
+   int reallen;
+   SNMapEntry  *list;
+} SNMap;
+
+void addSNMap( SNMap *map, char *key, Oid value );
+void addSNMap_t( SNMap *map, text *key, Oid value );
+Oid findSNMap( SNMap *map, char *key );
+Oid findSNMap_t( SNMap *map, text *key );
+void freeSNMap( SNMap *map );
+
+#endif


diff --git a/contrib/tsearch2/snowball/api.c b/contrib/tsearch2/snowball/api.c

new file mode 100644 (file)

index 0000000..c9019ce


--- /dev/null
+++ b/contrib/tsearch2/snowball/api.c
@@ -0,0 +1,48 @@
+
+#include "header.h"
+
+extern struct SN_env * SN_create_env(int S_size, int I_size, int B_size)
+{   struct SN_env * z = (struct SN_env *) calloc(1, sizeof(struct SN_env));
+    z->p = create_s();
+    if (S_size)
+    {   z->S = (symbol * *) calloc(S_size, sizeof(symbol *));
+        {   int i;
+            for (i = 0; i < S_size; i++) z->S[i] = create_s();
+        }
+        z->S_size = S_size;
+    }
+
+    if (I_size)
+    {   z->I = (int *) calloc(I_size, sizeof(int));
+        z->I_size = I_size;
+    }
+
+    if (B_size)
+    {   z->B = (symbol *) calloc(B_size, sizeof(symbol));
+        z->B_size = B_size;
+    }
+
+    return z;
+}
+
+extern void SN_close_env(struct SN_env * z)
+{
+    if (z->S_size)
+    {
+        {   int i;
+            for (i = 0; i < z->S_size; i++) lose_s(z->S[i]);
+        }
+        free(z->S);
+    }
+    if (z->I_size) free(z->I);
+    if (z->B_size) free(z->B);
+    if (z->p) lose_s(z->p);
+    free(z);
+}
+
+extern void SN_set_current(struct SN_env * z, int size, const symbol * s)
+{
+    replace_s(z, 0, z->l, size, s);
+    z->c = 0;
+}
+


diff --git a/contrib/tsearch2/snowball/api.h b/contrib/tsearch2/snowball/api.h

new file mode 100644 (file)

index 0000000..3e8b6e1


--- /dev/null
+++ b/contrib/tsearch2/snowball/api.h
@@ -0,0 +1,27 @@
+
+typedef unsigned char symbol;
+
+/* Or replace 'char' above with 'short' for 16 bit characters.
+
+   More precisely, replace 'char' with whatever type guarantees the
+   character width you need. Note however that sizeof(symbol) should divide
+   HEAD, defined in header.h as 2*sizeof(int), without remainder, otherwise
+   there is an alignment problem. In the unlikely event of a problem here,
+   consult Martin Porter.
+
+*/
+
+struct SN_env {
+    symbol * p;
+    int c; int a; int l; int lb; int bra; int ket;
+    int S_size; int I_size; int B_size;
+    symbol * * S;
+    int * I;
+    symbol * B;
+};
+
+extern struct SN_env * SN_create_env(int S_size, int I_size, int B_size);
+extern void SN_close_env(struct SN_env * z);
+
+extern void SN_set_current(struct SN_env * z, int size, const symbol * s);
+


diff --git a/contrib/tsearch2/snowball/english_stem.c b/contrib/tsearch2/snowball/english_stem.c

new file mode 100644 (file)

index 0000000..6715c7c


--- /dev/null
+++ b/contrib/tsearch2/snowball/english_stem.c
@@ -0,0 +1,894 @@
+
+/* This file was generated automatically by the Snowball to ANSI C compiler */
+
+#include "header.h"
+
+extern int english_stem(struct SN_env * z);
+static int r_exception2(struct SN_env * z);
+static int r_exception1(struct SN_env * z);
+static int r_Step_5(struct SN_env * z);
+static int r_Step_4(struct SN_env * z);
+static int r_Step_3(struct SN_env * z);
+static int r_Step_2(struct SN_env * z);
+static int r_Step_1c(struct SN_env * z);
+static int r_Step_1b(struct SN_env * z);
+static int r_Step_1a(struct SN_env * z);
+static int r_R2(struct SN_env * z);
+static int r_R1(struct SN_env * z);
+static int r_shortv(struct SN_env * z);
+static int r_mark_regions(struct SN_env * z);
+static int r_postlude(struct SN_env * z);
+static int r_prelude(struct SN_env * z);
+
+extern struct SN_env * english_create_env(void);
+extern void english_close_env(struct SN_env * z);
+
+static symbol s_0_0[5] = { 'g', 'e', 'n', 'e', 'r' };
+
+static struct among a_0[1] =
+{
+/*  0 */ { 5, s_0_0, -1, -1, 0}
+};
+
+static symbol s_1_0[3] = { 'i', 'e', 'd' };
+static symbol s_1_1[1] = { 's' };
+static symbol s_1_2[3] = { 'i', 'e', 's' };
+static symbol s_1_3[4] = { 's', 's', 'e', 's' };
+static symbol s_1_4[2] = { 's', 's' };
+static symbol s_1_5[2] = { 'u', 's' };
+
+static struct among a_1[6] =
+{
+/*  0 */ { 3, s_1_0, -1, 2, 0},
+/*  1 */ { 1, s_1_1, -1, 3, 0},
+/*  2 */ { 3, s_1_2, 1, 2, 0},
+/*  3 */ { 4, s_1_3, 1, 1, 0},
+/*  4 */ { 2, s_1_4, 1, -1, 0},
+/*  5 */ { 2, s_1_5, 1, -1, 0}
+};
+
+static symbol s_2_1[2] = { 'b', 'b' };
+static symbol s_2_2[2] = { 'd', 'd' };
+static symbol s_2_3[2] = { 'f', 'f' };
+static symbol s_2_4[2] = { 'g', 'g' };
+static symbol s_2_5[2] = { 'b', 'l' };
+static symbol s_2_6[2] = { 'm', 'm' };
+static symbol s_2_7[2] = { 'n', 'n' };
+static symbol s_2_8[2] = { 'p', 'p' };
+static symbol s_2_9[2] = { 'r', 'r' };
+static symbol s_2_10[2] = { 'a', 't' };
+static symbol s_2_11[2] = { 't', 't' };
+static symbol s_2_12[2] = { 'i', 'z' };
+
+static struct among a_2[13] =
+{
+/*  0 */ { 0, 0, -1, 3, 0},
+/*  1 */ { 2, s_2_1, 0, 2, 0},
+/*  2 */ { 2, s_2_2, 0, 2, 0},
+/*  3 */ { 2, s_2_3, 0, 2, 0},
+/*  4 */ { 2, s_2_4, 0, 2, 0},
+/*  5 */ { 2, s_2_5, 0, 1, 0},
+/*  6 */ { 2, s_2_6, 0, 2, 0},
+/*  7 */ { 2, s_2_7, 0, 2, 0},
+/*  8 */ { 2, s_2_8, 0, 2, 0},
+/*  9 */ { 2, s_2_9, 0, 2, 0},
+/* 10 */ { 2, s_2_10, 0, 1, 0},
+/* 11 */ { 2, s_2_11, 0, 2, 0},
+/* 12 */ { 2, s_2_12, 0, 1, 0}
+};
+
+static symbol s_3_0[2] = { 'e', 'd' };
+static symbol s_3_1[3] = { 'e', 'e', 'd' };
+static symbol s_3_2[3] = { 'i', 'n', 'g' };
+static symbol s_3_3[4] = { 'e', 'd', 'l', 'y' };
+static symbol s_3_4[5] = { 'e', 'e', 'd', 'l', 'y' };
+static symbol s_3_5[5] = { 'i', 'n', 'g', 'l', 'y' };
+
+static struct among a_3[6] =
+{
+/*  0 */ { 2, s_3_0, -1, 2, 0},
+/*  1 */ { 3, s_3_1, 0, 1, 0},
+/*  2 */ { 3, s_3_2, -1, 2, 0},
+/*  3 */ { 4, s_3_3, -1, 2, 0},
+/*  4 */ { 5, s_3_4, 3, 1, 0},
+/*  5 */ { 5, s_3_5, -1, 2, 0}
+};
+
+static symbol s_4_0[4] = { 'a', 'n', 'c', 'i' };
+static symbol s_4_1[4] = { 'e', 'n', 'c', 'i' };
+static symbol s_4_2[3] = { 'o', 'g', 'i' };
+static symbol s_4_3[2] = { 'l', 'i' };
+static symbol s_4_4[3] = { 'b', 'l', 'i' };
+static symbol s_4_5[4] = { 'a', 'b', 'l', 'i' };
+static symbol s_4_6[4] = { 'a', 'l', 'l', 'i' };
+static symbol s_4_7[5] = { 'f', 'u', 'l', 'l', 'i' };
+static symbol s_4_8[6] = { 'l', 'e', 's', 's', 'l', 'i' };
+static symbol s_4_9[5] = { 'o', 'u', 's', 'l', 'i' };
+static symbol s_4_10[5] = { 'e', 'n', 't', 'l', 'i' };
+static symbol s_4_11[5] = { 'a', 'l', 'i', 't', 'i' };
+static symbol s_4_12[6] = { 'b', 'i', 'l', 'i', 't', 'i' };
+static symbol s_4_13[5] = { 'i', 'v', 'i', 't', 'i' };
+static symbol s_4_14[6] = { 't', 'i', 'o', 'n', 'a', 'l' };
+static symbol s_4_15[7] = { 'a', 't', 'i', 'o', 'n', 'a', 'l' };
+static symbol s_4_16[5] = { 'a', 'l', 'i', 's', 'm' };
+static symbol s_4_17[5] = { 'a', 't', 'i', 'o', 'n' };
+static symbol s_4_18[7] = { 'i', 'z', 'a', 't', 'i', 'o', 'n' };
+static symbol s_4_19[4] = { 'i', 'z', 'e', 'r' };
+static symbol s_4_20[4] = { 'a', 't', 'o', 'r' };
+static symbol s_4_21[7] = { 'i', 'v', 'e', 'n', 'e', 's', 's' };
+static symbol s_4_22[7] = { 'f', 'u', 'l', 'n', 'e', 's', 's' };
+static symbol s_4_23[7] = { 'o', 'u', 's', 'n', 'e', 's', 's' };
+
+static struct among a_4[24] =
+{
+/*  0 */ { 4, s_4_0, -1, 3, 0},
+/*  1 */ { 4, s_4_1, -1, 2, 0},
+/*  2 */ { 3, s_4_2, -1, 13, 0},
+/*  3 */ { 2, s_4_3, -1, 16, 0},
+/*  4 */ { 3, s_4_4, 3, 12, 0},
+/*  5 */ { 4, s_4_5, 4, 4, 0},
+/*  6 */ { 4, s_4_6, 3, 8, 0},
+/*  7 */ { 5, s_4_7, 3, 14, 0},
+/*  8 */ { 6, s_4_8, 3, 15, 0},
+/*  9 */ { 5, s_4_9, 3, 10, 0},
+/* 10 */ { 5, s_4_10, 3, 5, 0},
+/* 11 */ { 5, s_4_11, -1, 8, 0},
+/* 12 */ { 6, s_4_12, -1, 12, 0},
+/* 13 */ { 5, s_4_13, -1, 11, 0},
+/* 14 */ { 6, s_4_14, -1, 1, 0},
+/* 15 */ { 7, s_4_15, 14, 7, 0},
+/* 16 */ { 5, s_4_16, -1, 8, 0},
+/* 17 */ { 5, s_4_17, -1, 7, 0},
+/* 18 */ { 7, s_4_18, 17, 6, 0},
+/* 19 */ { 4, s_4_19, -1, 6, 0},
+/* 20 */ { 4, s_4_20, -1, 7, 0},
+/* 21 */ { 7, s_4_21, -1, 11, 0},
+/* 22 */ { 7, s_4_22, -1, 9, 0},
+/* 23 */ { 7, s_4_23, -1, 10, 0}
+};
+
+static symbol s_5_0[5] = { 'i', 'c', 'a', 't', 'e' };
+static symbol s_5_1[5] = { 'a', 't', 'i', 'v', 'e' };
+static symbol s_5_2[5] = { 'a', 'l', 'i', 'z', 'e' };
+static symbol s_5_3[5] = { 'i', 'c', 'i', 't', 'i' };
+static symbol s_5_4[4] = { 'i', 'c', 'a', 'l' };
+static symbol s_5_5[6] = { 't', 'i', 'o', 'n', 'a', 'l' };
+static symbol s_5_6[7] = { 'a', 't', 'i', 'o', 'n', 'a', 'l' };
+static symbol s_5_7[3] = { 'f', 'u', 'l' };
+static symbol s_5_8[4] = { 'n', 'e', 's', 's' };
+
+static struct among a_5[9] =
+{
+/*  0 */ { 5, s_5_0, -1, 4, 0},
+/*  1 */ { 5, s_5_1, -1, 6, 0},
+/*  2 */ { 5, s_5_2, -1, 3, 0},
+/*  3 */ { 5, s_5_3, -1, 4, 0},
+/*  4 */ { 4, s_5_4, -1, 4, 0},
+/*  5 */ { 6, s_5_5, -1, 1, 0},
+/*  6 */ { 7, s_5_6, 5, 2, 0},
+/*  7 */ { 3, s_5_7, -1, 5, 0},
+/*  8 */ { 4, s_5_8, -1, 5, 0}
+};
+
+static symbol s_6_0[2] = { 'i', 'c' };
+static symbol s_6_1[4] = { 'a', 'n', 'c', 'e' };
+static symbol s_6_2[4] = { 'e', 'n', 'c', 'e' };
+static symbol s_6_3[4] = { 'a', 'b', 'l', 'e' };
+static symbol s_6_4[4] = { 'i', 'b', 'l', 'e' };
+static symbol s_6_5[3] = { 'a', 't', 'e' };
+static symbol s_6_6[3] = { 'i', 'v', 'e' };
+static symbol s_6_7[3] = { 'i', 'z', 'e' };
+static symbol s_6_8[3] = { 'i', 't', 'i' };
+static symbol s_6_9[2] = { 'a', 'l' };
+static symbol s_6_10[3] = { 'i', 's', 'm' };
+static symbol s_6_11[3] = { 'i', 'o', 'n' };
+static symbol s_6_12[2] = { 'e', 'r' };
+static symbol s_6_13[3] = { 'o', 'u', 's' };
+static symbol s_6_14[3] = { 'a', 'n', 't' };
+static symbol s_6_15[3] = { 'e', 'n', 't' };
+static symbol s_6_16[4] = { 'm', 'e', 'n', 't' };
+static symbol s_6_17[5] = { 'e', 'm', 'e', 'n', 't' };
+
+static struct among a_6[18] =
+{
+/*  0 */ { 2, s_6_0, -1, 1, 0},
+/*  1 */ { 4, s_6_1, -1, 1, 0},
+/*  2 */ { 4, s_6_2, -1, 1, 0},
+/*  3 */ { 4, s_6_3, -1, 1, 0},
+/*  4 */ { 4, s_6_4, -1, 1, 0},
+/*  5 */ { 3, s_6_5, -1, 1, 0},
+/*  6 */ { 3, s_6_6, -1, 1, 0},
+/*  7 */ { 3, s_6_7, -1, 1, 0},
+/*  8 */ { 3, s_6_8, -1, 1, 0},
+/*  9 */ { 2, s_6_9, -1, 1, 0},
+/* 10 */ { 3, s_6_10, -1, 1, 0},
+/* 11 */ { 3, s_6_11, -1, 2, 0},
+/* 12 */ { 2, s_6_12, -1, 1, 0},
+/* 13 */ { 3, s_6_13, -1, 1, 0},
+/* 14 */ { 3, s_6_14, -1, 1, 0},
+/* 15 */ { 3, s_6_15, -1, 1, 0},
+/* 16 */ { 4, s_6_16, 15, 1, 0},
+/* 17 */ { 5, s_6_17, 16, 1, 0}
+};
+
+static symbol s_7_0[1] = { 'e' };
+static symbol s_7_1[1] = { 'l' };
+
+static struct among a_7[2] =
+{
+/*  0 */ { 1, s_7_0, -1, 1, 0},
+/*  1 */ { 1, s_7_1, -1, 2, 0}
+};
+
+static symbol s_8_0[7] = { 's', 'u', 'c', 'c', 'e', 'e', 'd' };
+static symbol s_8_1[7] = { 'p', 'r', 'o', 'c', 'e', 'e', 'd' };
+static symbol s_8_2[6] = { 'e', 'x', 'c', 'e', 'e', 'd' };
+static symbol s_8_3[7] = { 'c', 'a', 'n', 'n', 'i', 'n', 'g' };
+static symbol s_8_4[6] = { 'i', 'n', 'n', 'i', 'n', 'g' };
+static symbol s_8_5[7] = { 'e', 'a', 'r', 'r', 'i', 'n', 'g' };
+static symbol s_8_6[7] = { 'h', 'e', 'r', 'r', 'i', 'n', 'g' };
+static symbol s_8_7[6] = { 'o', 'u', 't', 'i', 'n', 'g' };
+
+static struct among a_8[8] =
+{
+/*  0 */ { 7, s_8_0, -1, -1, 0},
+/*  1 */ { 7, s_8_1, -1, -1, 0},
+/*  2 */ { 6, s_8_2, -1, -1, 0},
+/*  3 */ { 7, s_8_3, -1, -1, 0},
+/*  4 */ { 6, s_8_4, -1, -1, 0},
+/*  5 */ { 7, s_8_5, -1, -1, 0},
+/*  6 */ { 7, s_8_6, -1, -1, 0},
+/*  7 */ { 6, s_8_7, -1, -1, 0}
+};
+
+static symbol s_9_0[5] = { 'a', 'n', 'd', 'e', 's' };
+static symbol s_9_1[5] = { 'a', 't', 'l', 'a', 's' };
+static symbol s_9_2[4] = { 'b', 'i', 'a', 's' };
+static symbol s_9_3[6] = { 'c', 'o', 's', 'm', 'o', 's' };
+static symbol s_9_4[5] = { 'd', 'y', 'i', 'n', 'g' };
+static symbol s_9_5[5] = { 'e', 'a', 'r', 'l', 'y' };
+static symbol s_9_6[6] = { 'g', 'e', 'n', 't', 'l', 'y' };
+static symbol s_9_7[4] = { 'h', 'o', 'w', 'e' };
+static symbol s_9_8[4] = { 'i', 'd', 'l', 'y' };
+static symbol s_9_9[5] = { 'l', 'y', 'i', 'n', 'g' };
+static symbol s_9_10[4] = { 'n', 'e', 'w', 's' };
+static symbol s_9_11[4] = { 'o', 'n', 'l', 'y' };
+static symbol s_9_12[6] = { 's', 'i', 'n', 'g', 'l', 'y' };
+static symbol s_9_13[5] = { 's', 'k', 'i', 'e', 's' };
+static symbol s_9_14[4] = { 's', 'k', 'i', 's' };
+static symbol s_9_15[3] = { 's', 'k', 'y' };
+static symbol s_9_16[5] = { 't', 'y', 'i', 'n', 'g' };
+static symbol s_9_17[4] = { 'u', 'g', 'l', 'y' };
+
+static struct among a_9[18] =
+{
+/*  0 */ { 5, s_9_0, -1, -1, 0},
+/*  1 */ { 5, s_9_1, -1, -1, 0},
+/*  2 */ { 4, s_9_2, -1, -1, 0},
+/*  3 */ { 6, s_9_3, -1, -1, 0},
+/*  4 */ { 5, s_9_4, -1, 3, 0},
+/*  5 */ { 5, s_9_5, -1, 9, 0},
+/*  6 */ { 6, s_9_6, -1, 7, 0},
+/*  7 */ { 4, s_9_7, -1, -1, 0},
+/*  8 */ { 4, s_9_8, -1, 6, 0},
+/*  9 */ { 5, s_9_9, -1, 4, 0},
+/* 10 */ { 4, s_9_10, -1, -1, 0},
+/* 11 */ { 4, s_9_11, -1, 10, 0},
+/* 12 */ { 6, s_9_12, -1, 11, 0},
+/* 13 */ { 5, s_9_13, -1, 2, 0},
+/* 14 */ { 4, s_9_14, -1, 1, 0},
+/* 15 */ { 3, s_9_15, -1, -1, 0},
+/* 16 */ { 5, s_9_16, -1, 5, 0},
+/* 17 */ { 4, s_9_17, -1, 8, 0}
+};
+
+static unsigned char g_v[] = { 17, 65, 16, 1 };
+
+static unsigned char g_v_WXY[] = { 1, 17, 65, 208, 1 };
+
+static unsigned char g_valid_LI[] = { 55, 141, 2 };
+
+static symbol s_0[] = { 'y' };
+static symbol s_1[] = { 'Y' };
+static symbol s_2[] = { 'y' };
+static symbol s_3[] = { 'Y' };
+static symbol s_4[] = { 's', 's' };
+static symbol s_5[] = { 'i', 'e' };
+static symbol s_6[] = { 'i' };
+static symbol s_7[] = { 'e', 'e' };
+static symbol s_8[] = { 'e' };
+static symbol s_9[] = { 'e' };
+static symbol s_10[] = { 'y' };
+static symbol s_11[] = { 'Y' };
+static symbol s_12[] = { 'i' };
+static symbol s_13[] = { 't', 'i', 'o', 'n' };
+static symbol s_14[] = { 'e', 'n', 'c', 'e' };
+static symbol s_15[] = { 'a', 'n', 'c', 'e' };
+static symbol s_16[] = { 'a', 'b', 'l', 'e' };
+static symbol s_17[] = { 'e', 'n', 't' };
+static symbol s_18[] = { 'i', 'z', 'e' };
+static symbol s_19[] = { 'a', 't', 'e' };
+static symbol s_20[] = { 'a', 'l' };
+static symbol s_21[] = { 'f', 'u', 'l' };
+static symbol s_22[] = { 'o', 'u', 's' };
+static symbol s_23[] = { 'i', 'v', 'e' };
+static symbol s_24[] = { 'b', 'l', 'e' };
+static symbol s_25[] = { 'l' };
+static symbol s_26[] = { 'o', 'g' };
+static symbol s_27[] = { 'f', 'u', 'l' };
+static symbol s_28[] = { 'l', 'e', 's', 's' };
+static symbol s_29[] = { 't', 'i', 'o', 'n' };
+static symbol s_30[] = { 'a', 't', 'e' };
+static symbol s_31[] = { 'a', 'l' };
+static symbol s_32[] = { 'i', 'c' };
+static symbol s_33[] = { 's' };
+static symbol s_34[] = { 't' };
+static symbol s_35[] = { 'l' };
+static symbol s_36[] = { 's', 'k', 'i' };
+static symbol s_37[] = { 's', 'k', 'y' };
+static symbol s_38[] = { 'd', 'i', 'e' };
+static symbol s_39[] = { 'l', 'i', 'e' };
+static symbol s_40[] = { 't', 'i', 'e' };
+static symbol s_41[] = { 'i', 'd', 'l' };
+static symbol s_42[] = { 'g', 'e', 'n', 't', 'l' };
+static symbol s_43[] = { 'u', 'g', 'l', 'i' };
+static symbol s_44[] = { 'e', 'a', 'r', 'l', 'i' };
+static symbol s_45[] = { 'o', 'n', 'l', 'i' };
+static symbol s_46[] = { 's', 'i', 'n', 'g', 'l' };
+static symbol s_47[] = { 'Y' };
+static symbol s_48[] = { 'y' };
+
+static int r_prelude(struct SN_env * z) {
+    z->B[0] = 0; /* unset Y_found, line 24 */
+    {   int c = z->c; /* do, line 25 */
+        z->bra = z->c; /* [, line 25 */
+        if (!(eq_s(z, 1, s_0))) goto lab0;
+        z->ket = z->c; /* ], line 25 */
+        if (!(in_grouping(z, g_v, 97, 121))) goto lab0;
+        slice_from_s(z, 1, s_1); /* <-, line 25 */
+        z->B[0] = 1; /* set Y_found, line 25 */
+    lab0:
+        z->c = c;
+    }
+    {   int c = z->c; /* do, line 26 */
+        while(1) { /* repeat, line 26 */
+            int c = z->c;
+            while(1) { /* goto, line 26 */
+                int c = z->c;
+                if (!(in_grouping(z, g_v, 97, 121))) goto lab3;
+                z->bra = z->c; /* [, line 26 */
+                if (!(eq_s(z, 1, s_2))) goto lab3;
+                z->ket = z->c; /* ], line 26 */
+                z->c = c;
+                break;
+            lab3:
+                z->c = c;
+                if (z->c >= z->l) goto lab2;
+                z->c++;
+            }
+            slice_from_s(z, 1, s_3); /* <-, line 26 */
+            z->B[0] = 1; /* set Y_found, line 26 */
+            continue;
+        lab2:
+            z->c = c;
+            break;
+        }
+    lab1:
+        z->c = c;
+    }
+    return 1;
+}
+
+static int r_mark_regions(struct SN_env * z) {
+    z->I[0] = z->l;
+    z->I[1] = z->l;
+    {   int c = z->c; /* do, line 32 */
+        {   int c = z->c; /* or, line 36 */
+            if (!(find_among(z, a_0, 1))) goto lab2; /* among, line 33 */
+            goto lab1;
+        lab2:
+            z->c = c;
+            while(1) { /* gopast, line 36 */
+                if (!(in_grouping(z, g_v, 97, 121))) goto lab3;
+                break;
+            lab3:
+                if (z->c >= z->l) goto lab0;
+                z->c++;
+            }
+            while(1) { /* gopast, line 36 */
+                if (!(out_grouping(z, g_v, 97, 121))) goto lab4;
+                break;
+            lab4:
+                if (z->c >= z->l) goto lab0;
+                z->c++;
+            }
+        }
+    lab1:
+        z->I[0] = z->c; /* setmark p1, line 37 */
+        while(1) { /* gopast, line 38 */
+            if (!(in_grouping(z, g_v, 97, 121))) goto lab5;
+            break;
+        lab5:
+            if (z->c >= z->l) goto lab0;
+            z->c++;
+        }
+        while(1) { /* gopast, line 38 */
+            if (!(out_grouping(z, g_v, 97, 121))) goto lab6;
+            break;
+        lab6:
+            if (z->c >= z->l) goto lab0;
+            z->c++;
+        }
+        z->I[1] = z->c; /* setmark p2, line 38 */
+    lab0:
+        z->c = c;
+    }
+    return 1;
+}
+
+static int r_shortv(struct SN_env * z) {
+    {   int m = z->l - z->c; /* or, line 46 */
+        if (!(out_grouping_b(z, g_v_WXY, 89, 121))) goto lab1;
+        if (!(in_grouping_b(z, g_v, 97, 121))) goto lab1;
+        if (!(out_grouping_b(z, g_v, 97, 121))) goto lab1;
+        goto lab0;
+    lab1:
+        z->c = z->l - m;
+        if (!(out_grouping_b(z, g_v, 97, 121))) return 0;
+        if (!(in_grouping_b(z, g_v, 97, 121))) return 0;
+        if (z->c > z->lb) return 0; /* atlimit, line 47 */
+    }
+lab0:
+    return 1;
+}
+
+static int r_R1(struct SN_env * z) {
+    if (!(z->I[0] <= z->c)) return 0;
+    return 1;
+}
+
+static int r_R2(struct SN_env * z) {
+    if (!(z->I[1] <= z->c)) return 0;
+    return 1;
+}
+
+static int r_Step_1a(struct SN_env * z) {
+    int among_var;
+    z->ket = z->c; /* [, line 54 */
+    among_var = find_among_b(z, a_1, 6); /* substring, line 54 */
+    if (!(among_var)) return 0;
+    z->bra = z->c; /* ], line 54 */
+    switch(among_var) {
+        case 0: return 0;
+        case 1:
+            slice_from_s(z, 2, s_4); /* <-, line 55 */
+            break;
+        case 2:
+            {   int m = z->l - z->c; /* or, line 57 */
+                if (z->c <= z->lb) goto lab1;
+                z->c--; /* next, line 57 */
+                if (z->c > z->lb) goto lab1; /* atlimit, line 57 */
+                slice_from_s(z, 2, s_5); /* <-, line 57 */
+                goto lab0;
+            lab1:
+                z->c = z->l - m;
+                slice_from_s(z, 1, s_6); /* <-, line 57 */
+            }
+        lab0:
+            break;
+        case 3:
+            if (z->c <= z->lb) return 0;
+            z->c--; /* next, line 58 */
+            while(1) { /* gopast, line 58 */
+                if (!(in_grouping_b(z, g_v, 97, 121))) goto lab2;
+                break;
+            lab2:
+                if (z->c <= z->lb) return 0;
+                z->c--;
+            }
+            slice_del(z); /* delete, line 58 */
+            break;
+    }
+    return 1;
+}
+
+static int r_Step_1b(struct SN_env * z) {
+    int among_var;
+    z->ket = z->c; /* [, line 64 */
+    among_var = find_among_b(z, a_3, 6); /* substring, line 64 */
+    if (!(among_var)) return 0;
+    z->bra = z->c; /* ], line 64 */
+    switch(among_var) {
+        case 0: return 0;
+        case 1:
+            if (!r_R1(z)) return 0; /* call R1, line 66 */
+            slice_from_s(z, 2, s_7); /* <-, line 66 */
+            break;
+        case 2:
+            {   int m_test = z->l - z->c; /* test, line 69 */
+                while(1) { /* gopast, line 69 */
+                    if (!(in_grouping_b(z, g_v, 97, 121))) goto lab0;
+                    break;
+                lab0:
+                    if (z->c <= z->lb) return 0;
+                    z->c--;
+                }
+                z->c = z->l - m_test;
+            }
+            slice_del(z); /* delete, line 69 */
+            {   int m_test = z->l - z->c; /* test, line 70 */
+                among_var = find_among_b(z, a_2, 13); /* substring, line 70 */
+                if (!(among_var)) return 0;
+                z->c = z->l - m_test;
+            }
+            switch(among_var) {
+                case 0: return 0;
+                case 1:
+                    {   int c = z->c;
+                        insert_s(z, z->c, z->c, 1, s_8); /* <+, line 72 */
+                        z->c = c;
+                    }
+                    break;
+                case 2:
+                    z->ket = z->c; /* [, line 75 */
+                    if (z->c <= z->lb) return 0;
+                    z->c--; /* next, line 75 */
+                    z->bra = z->c; /* ], line 75 */
+                    slice_del(z); /* delete, line 75 */
+                    break;
+                case 3:
+                    if (z->c != z->I[0]) return 0; /* atmark, line 76 */
+                    {   int m_test = z->l - z->c; /* test, line 76 */
+                        if (!r_shortv(z)) return 0; /* call shortv, line 76 */
+                        z->c = z->l - m_test;
+                    }
+                    {   int c = z->c;
+                        insert_s(z, z->c, z->c, 1, s_9); /* <+, line 76 */
+                        z->c = c;
+                    }
+                    break;
+            }
+            break;
+    }
+    return 1;
+}
+
+static int r_Step_1c(struct SN_env * z) {
+    z->ket = z->c; /* [, line 83 */
+    {   int m = z->l - z->c; /* or, line 83 */
+        if (!(eq_s_b(z, 1, s_10))) goto lab1;
+        goto lab0;
+    lab1:
+        z->c = z->l - m;
+        if (!(eq_s_b(z, 1, s_11))) return 0;
+    }
+lab0:
+    z->bra = z->c; /* ], line 83 */
+    if (!(out_grouping_b(z, g_v, 97, 121))) return 0;
+    {   int m = z->l - z->c; /* not, line 84 */
+        if (z->c > z->lb) goto lab2; /* atlimit, line 84 */
+        return 0;
+    lab2:
+        z->c = z->l - m;
+    }
+    slice_from_s(z, 1, s_12); /* <-, line 85 */
+    return 1;
+}
+
+static int r_Step_2(struct SN_env * z) {
+    int among_var;
+    z->ket = z->c; /* [, line 89 */
+    among_var = find_among_b(z, a_4, 24); /* substring, line 89 */
+    if (!(among_var)) return 0;
+    z->bra = z->c; /* ], line 89 */
+    if (!r_R1(z)) return 0; /* call R1, line 89 */
+    switch(among_var) {
+        case 0: return 0;
+        case 1:
+            slice_from_s(z, 4, s_13); /* <-, line 90 */
+            break;
+        case 2:
+            slice_from_s(z, 4, s_14); /* <-, line 91 */
+            break;
+        case 3:
+            slice_from_s(z, 4, s_15); /* <-, line 92 */
+            break;
+        case 4:
+            slice_from_s(z, 4, s_16); /* <-, line 93 */
+            break;
+        case 5:
+            slice_from_s(z, 3, s_17); /* <-, line 94 */
+            break;
+        case 6:
+            slice_from_s(z, 3, s_18); /* <-, line 96 */
+            break;
+        case 7:
+            slice_from_s(z, 3, s_19); /* <-, line 98 */
+            break;
+        case 8:
+            slice_from_s(z, 2, s_20); /* <-, line 100 */
+            break;
+        case 9:
+            slice_from_s(z, 3, s_21); /* <-, line 101 */
+            break;
+        case 10:
+            slice_from_s(z, 3, s_22); /* <-, line 103 */
+            break;
+        case 11:
+            slice_from_s(z, 3, s_23); /* <-, line 105 */
+            break;
+        case 12:
+            slice_from_s(z, 3, s_24); /* <-, line 107 */
+            break;
+        case 13:
+            if (!(eq_s_b(z, 1, s_25))) return 0;
+            slice_from_s(z, 2, s_26); /* <-, line 108 */
+            break;
+        case 14:
+            slice_from_s(z, 3, s_27); /* <-, line 109 */
+            break;
+        case 15:
+            slice_from_s(z, 4, s_28); /* <-, line 110 */
+            break;
+        case 16:
+            if (!(in_grouping_b(z, g_valid_LI, 99, 116))) return 0;
+            slice_del(z); /* delete, line 111 */
+            break;
+    }
+    return 1;
+}
+
+static int r_Step_3(struct SN_env * z) {
+    int among_var;
+    z->ket = z->c; /* [, line 116 */
+    among_var = find_among_b(z, a_5, 9); /* substring, line 116 */
+    if (!(among_var)) return 0;
+    z->bra = z->c; /* ], line 116 */
+    if (!r_R1(z)) return 0; /* call R1, line 116 */
+    switch(among_var) {
+        case 0: return 0;
+        case 1:
+            slice_from_s(z, 4, s_29); /* <-, line 117 */
+            break;
+        case 2:
+            slice_from_s(z, 3, s_30); /* <-, line 118 */
+            break;
+        case 3:
+            slice_from_s(z, 2, s_31); /* <-, line 119 */
+            break;
+        case 4:
+            slice_from_s(z, 2, s_32); /* <-, line 121 */
+            break;
+        case 5:
+            slice_del(z); /* delete, line 123 */
+            break;
+        case 6:
+            if (!r_R2(z)) return 0; /* call R2, line 125 */
+            slice_del(z); /* delete, line 125 */
+            break;
+    }
+    return 1;
+}
+
+static int r_Step_4(struct SN_env * z) {
+    int among_var;
+    z->ket = z->c; /* [, line 130 */
+    among_var = find_among_b(z, a_6, 18); /* substring, line 130 */
+    if (!(among_var)) return 0;
+    z->bra = z->c; /* ], line 130 */
+    if (!r_R2(z)) return 0; /* call R2, line 130 */
+    switch(among_var) {
+        case 0: return 0;
+        case 1:
+            slice_del(z); /* delete, line 133 */
+            break;
+        case 2:
+            {   int m = z->l - z->c; /* or, line 134 */
+                if (!(eq_s_b(z, 1, s_33))) goto lab1;
+                goto lab0;
+            lab1:
+                z->c = z->l - m;
+                if (!(eq_s_b(z, 1, s_34))) return 0;
+            }
+        lab0:
+            slice_del(z); /* delete, line 134 */
+            break;
+    }
+    return 1;
+}
+
+static int r_Step_5(struct SN_env * z) {
+    int among_var;
+    z->ket = z->c; /* [, line 139 */
+    among_var = find_among_b(z, a_7, 2); /* substring, line 139 */
+    if (!(among_var)) return 0;
+    z->bra = z->c; /* ], line 139 */
+    switch(among_var) {
+        case 0: return 0;
+        case 1:
+            {   int m = z->l - z->c; /* or, line 140 */
+                if (!r_R2(z)) goto lab1; /* call R2, line 140 */
+                goto lab0;
+            lab1:
+                z->c = z->l - m;
+                if (!r_R1(z)) return 0; /* call R1, line 140 */
+                {   int m = z->l - z->c; /* not, line 140 */
+                    if (!r_shortv(z)) goto lab2; /* call shortv, line 140 */
+                    return 0;
+                lab2:
+                    z->c = z->l - m;
+                }
+            }
+        lab0:
+            slice_del(z); /* delete, line 140 */
+            break;
+        case 2:
+            if (!r_R2(z)) return 0; /* call R2, line 141 */
+            if (!(eq_s_b(z, 1, s_35))) return 0;
+            slice_del(z); /* delete, line 141 */
+            break;
+    }
+    return 1;
+}
+
+static int r_exception2(struct SN_env * z) {
+    z->ket = z->c; /* [, line 147 */
+    if (!(find_among_b(z, a_8, 8))) return 0; /* substring, line 147 */
+    z->bra = z->c; /* ], line 147 */
+    if (z->c > z->lb) return 0; /* atlimit, line 147 */
+    return 1;
+}
+
+static int r_exception1(struct SN_env * z) {
+    int among_var;
+    z->bra = z->c; /* [, line 159 */
+    among_var = find_among(z, a_9, 18); /* substring, line 159 */
+    if (!(among_var)) return 0;
+    z->ket = z->c; /* ], line 159 */
+    if (z->c < z->l) return 0; /* atlimit, line 159 */
+    switch(among_var) {
+        case 0: return 0;
+        case 1:
+            slice_from_s(z, 3, s_36); /* <-, line 163 */
+            break;
+        case 2:
+            slice_from_s(z, 3, s_37); /* <-, line 164 */
+            break;
+        case 3:
+            slice_from_s(z, 3, s_38); /* <-, line 165 */
+            break;
+        case 4:
+            slice_from_s(z, 3, s_39); /* <-, line 166 */
+            break;
+        case 5:
+            slice_from_s(z, 3, s_40); /* <-, line 167 */
+            break;
+        case 6:
+            slice_from_s(z, 3, s_41); /* <-, line 171 */
+            break;
+        case 7:
+            slice_from_s(z, 5, s_42); /* <-, line 172 */
+            break;
+        case 8:
+            slice_from_s(z, 4, s_43); /* <-, line 173 */
+            break;
+        case 9:
+            slice_from_s(z, 5, s_44); /* <-, line 174 */
+            break;
+        case 10:
+            slice_from_s(z, 4, s_45); /* <-, line 175 */
+            break;
+        case 11:
+            slice_from_s(z, 5, s_46); /* <-, line 176 */
+            break;
+    }
+    return 1;
+}
+
+static int r_postlude(struct SN_env * z) {
+    if (!(z->B[0])) return 0; /* Boolean test Y_found, line 192 */
+    while(1) { /* repeat, line 192 */
+        int c = z->c;
+        while(1) { /* goto, line 192 */
+            int c = z->c;
+            z->bra = z->c; /* [, line 192 */
+            if (!(eq_s(z, 1, s_47))) goto lab1;
+            z->ket = z->c; /* ], line 192 */
+            z->c = c;
+            break;
+        lab1:
+            z->c = c;
+            if (z->c >= z->l) goto lab0;
+            z->c++;
+        }
+        slice_from_s(z, 1, s_48); /* <-, line 192 */
+        continue;
+    lab0:
+        z->c = c;
+        break;
+    }
+    return 1;
+}
+
+extern int english_stem(struct SN_env * z) {
+    {   int c = z->c; /* or, line 196 */
+        if (!r_exception1(z)) goto lab1; /* call exception1, line 196 */
+        goto lab0;
+    lab1:
+        z->c = c;
+        {   int c_test = z->c; /* test, line 198 */
+            {   int c = z->c + 3;
+                if (0 > c || c > z->l) return 0;
+                z->c = c; /* hop, line 198 */
+            }
+            z->c = c_test;
+        }
+        {   int c = z->c; /* do, line 199 */
+            if (!r_prelude(z)) goto lab2; /* call prelude, line 199 */
+        lab2:
+            z->c = c;
+        }
+        {   int c = z->c; /* do, line 200 */
+            if (!r_mark_regions(z)) goto lab3; /* call mark_regions, line 200 */
+        lab3:
+            z->c = c;
+        }
+        z->lb = z->c; z->c = z->l; /* backwards, line 201 */
+
+        {   int m = z->l - z->c; /* do, line 203 */
+            if (!r_Step_1a(z)) goto lab4; /* call Step_1a, line 203 */
+        lab4:
+            z->c = z->l - m;
+        }
+        {   int m = z->l - z->c; /* or, line 205 */
+            if (!r_exception2(z)) goto lab6; /* call exception2, line 205 */
+            goto lab5;
+        lab6:
+            z->c = z->l - m;
+            {   int m = z->l - z->c; /* do, line 207 */
+                if (!r_Step_1b(z)) goto lab7; /* call Step_1b, line 207 */
+            lab7:
+                z->c = z->l - m;
+            }
+            {   int m = z->l - z->c; /* do, line 208 */
+                if (!r_Step_1c(z)) goto lab8; /* call Step_1c, line 208 */
+            lab8:
+                z->c = z->l - m;
+            }
+            {   int m = z->l - z->c; /* do, line 210 */
+                if (!r_Step_2(z)) goto lab9; /* call Step_2, line 210 */
+            lab9:
+                z->c = z->l - m;
+            }
+            {   int m = z->l - z->c; /* do, line 211 */
+                if (!r_Step_3(z)) goto lab10; /* call Step_3, line 211 */
+            lab10:
+                z->c = z->l - m;
+            }
+            {   int m = z->l - z->c; /* do, line 212 */
+                if (!r_Step_4(z)) goto lab11; /* call Step_4, line 212 */
+            lab11:
+                z->c = z->l - m;
+            }
+            {   int m = z->l - z->c; /* do, line 214 */
+                if (!r_Step_5(z)) goto lab12; /* call Step_5, line 214 */
+            lab12:
+                z->c = z->l - m;
+            }
+        }
+    lab5:
+        z->c = z->lb;
+        {   int c = z->c; /* do, line 217 */
+            if (!r_postlude(z)) goto lab13; /* call postlude, line 217 */
+        lab13:
+            z->c = c;
+        }
+    }
+lab0:
+    return 1;
+}
+
+extern struct SN_env * english_create_env(void) { return SN_create_env(0, 2, 1); }
+
+extern void english_close_env(struct SN_env * z) { SN_close_env(z); }
+


diff --git a/contrib/tsearch2/snowball/english_stem.h b/contrib/tsearch2/snowball/english_stem.h

new file mode 100644 (file)

index 0000000..bfefcd5


--- /dev/null
+++ b/contrib/tsearch2/snowball/english_stem.h
@@ -0,0 +1,8 @@
+
+/* This file was generated automatically by the Snowball to ANSI C compiler */
+
+extern struct SN_env * english_create_env(void);
+extern void english_close_env(struct SN_env * z);
+
+extern int english_stem(struct SN_env * z);
+


diff --git a/contrib/tsearch2/snowball/header.h b/contrib/tsearch2/snowball/header.h

new file mode 100644 (file)

index 0000000..aaec3ae


--- /dev/null
+++ b/contrib/tsearch2/snowball/header.h
@@ -0,0 +1,57 @@
+
+#include 
+
+#include "api.h"
+
+#define MAXINT INT_MAX
+#define MININT INT_MIN
+
+#define HEAD 2*sizeof(int)
+
+#define SIZE(p)        ((int *)(p))[-1]
+#define SET_SIZE(p, n) ((int *)(p))[-1] = n
+#define CAPACITY(p)    ((int *)(p))[-2]
+
+struct among
+{   int s_size;     /* number of chars in string */
+    symbol * s;       /* search string */
+    int substring_i;/* index to longest matching substring */
+    int result;     /* result of the lookup */
+    int (* function)(struct SN_env *);
+};
+
+extern symbol * create_s(void);
+extern void lose_s(symbol * p);
+
+extern int in_grouping(struct SN_env * z, unsigned char * s, int min, int max);
+extern int in_grouping_b(struct SN_env * z, unsigned char * s, int min, int max);
+extern int out_grouping(struct SN_env * z, unsigned char * s, int min, int max);
+extern int out_grouping_b(struct SN_env * z, unsigned char * s, int min, int max);
+
+extern int in_range(struct SN_env * z, int min, int max);
+extern int in_range_b(struct SN_env * z, int min, int max);
+extern int out_range(struct SN_env * z, int min, int max);
+extern int out_range_b(struct SN_env * z, int min, int max);
+
+extern int eq_s(struct SN_env * z, int s_size, symbol * s);
+extern int eq_s_b(struct SN_env * z, int s_size, symbol * s);
+extern int eq_v(struct SN_env * z, symbol * p);
+extern int eq_v_b(struct SN_env * z, symbol * p);
+
+extern int find_among(struct SN_env * z, struct among * v, int v_size);
+extern int find_among_b(struct SN_env * z, struct among * v, int v_size);
+
+extern symbol * increase_size(symbol * p, int n);
+extern int replace_s(struct SN_env * z, int c_bra, int c_ket, int s_size, const symbol * s);
+extern void slice_from_s(struct SN_env * z, int s_size, symbol * s);
+extern void slice_from_v(struct SN_env * z, symbol * p);
+extern void slice_del(struct SN_env * z);
+
+extern void insert_s(struct SN_env * z, int bra, int ket, int s_size, symbol * s);
+extern void insert_v(struct SN_env * z, int bra, int ket, symbol * p);
+
+extern symbol * slice_to(struct SN_env * z, symbol * p);
+extern symbol * assign_to(struct SN_env * z, symbol * p);
+
+extern void debug(struct SN_env * z, int number, int line_count);
+


diff --git a/contrib/tsearch2/snowball/russian_stem.c b/contrib/tsearch2/snowball/russian_stem.c

new file mode 100644 (file)

index 0000000..14fd491


--- /dev/null
+++ b/contrib/tsearch2/snowball/russian_stem.c
@@ -0,0 +1,626 @@
+
+/* This file was generated automatically by the Snowball to ANSI C compiler */
+
+#include "header.h"
+
+extern int russian_stem(struct SN_env * z);
+static int r_tidy_up(struct SN_env * z);
+static int r_derivational(struct SN_env * z);
+static int r_noun(struct SN_env * z);
+static int r_verb(struct SN_env * z);
+static int r_reflexive(struct SN_env * z);
+static int r_adjectival(struct SN_env * z);
+static int r_adjective(struct SN_env * z);
+static int r_perfective_gerund(struct SN_env * z);
+static int r_R2(struct SN_env * z);
+static int r_mark_regions(struct SN_env * z);
+
+extern struct SN_env * russian_create_env(void);
+extern void russian_close_env(struct SN_env * z);
+
+static symbol s_0_0[3] = { 215, 219, 201 };
+static symbol s_0_1[4] = { 201, 215, 219, 201 };
+static symbol s_0_2[4] = { 217, 215, 219, 201 };
+static symbol s_0_3[1] = { 215 };
+static symbol s_0_4[2] = { 201, 215 };
+static symbol s_0_5[2] = { 217, 215 };
+static symbol s_0_6[5] = { 215, 219, 201, 211, 216 };
+static symbol s_0_7[6] = { 201, 215, 219, 201, 211, 216 };
+static symbol s_0_8[6] = { 217, 215, 219, 201, 211, 216 };
+
+static struct among a_0[9] =
+{
+/*  0 */ { 3, s_0_0, -1, 1, 0},
+/*  1 */ { 4, s_0_1, 0, 2, 0},
+/*  2 */ { 4, s_0_2, 0, 2, 0},
+/*  3 */ { 1, s_0_3, -1, 1, 0},
+/*  4 */ { 2, s_0_4, 3, 2, 0},
+/*  5 */ { 2, s_0_5, 3, 2, 0},
+/*  6 */ { 5, s_0_6, -1, 1, 0},
+/*  7 */ { 6, s_0_7, 6, 2, 0},
+/*  8 */ { 6, s_0_8, 6, 2, 0}
+};
+
+static symbol s_1_0[2] = { 192, 192 };
+static symbol s_1_1[2] = { 197, 192 };
+static symbol s_1_2[2] = { 207, 192 };
+static symbol s_1_3[2] = { 213, 192 };
+static symbol s_1_4[2] = { 197, 197 };
+static symbol s_1_5[2] = { 201, 197 };
+static symbol s_1_6[2] = { 207, 197 };
+static symbol s_1_7[2] = { 217, 197 };
+static symbol s_1_8[2] = { 201, 200 };
+static symbol s_1_9[2] = { 217, 200 };
+static symbol s_1_10[3] = { 201, 205, 201 };
+static symbol s_1_11[3] = { 217, 205, 201 };
+static symbol s_1_12[2] = { 197, 202 };
+static symbol s_1_13[2] = { 201, 202 };
+static symbol s_1_14[2] = { 207, 202 };
+static symbol s_1_15[2] = { 217, 202 };
+static symbol s_1_16[2] = { 197, 205 };
+static symbol s_1_17[2] = { 201, 205 };
+static symbol s_1_18[2] = { 207, 205 };
+static symbol s_1_19[2] = { 217, 205 };
+static symbol s_1_20[3] = { 197, 199, 207 };
+static symbol s_1_21[3] = { 207, 199, 207 };
+static symbol s_1_22[2] = { 193, 209 };
+static symbol s_1_23[2] = { 209, 209 };
+static symbol s_1_24[3] = { 197, 205, 213 };
+static symbol s_1_25[3] = { 207, 205, 213 };
+
+static struct among a_1[26] =
+{
+/*  0 */ { 2, s_1_0, -1, 1, 0},
+/*  1 */ { 2, s_1_1, -1, 1, 0},
+/*  2 */ { 2, s_1_2, -1, 1, 0},
+/*  3 */ { 2, s_1_3, -1, 1, 0},
+/*  4 */ { 2, s_1_4, -1, 1, 0},
+/*  5 */ { 2, s_1_5, -1, 1, 0},
+/*  6 */ { 2, s_1_6, -1, 1, 0},
+/*  7 */ { 2, s_1_7, -1, 1, 0},
+/*  8 */ { 2, s_1_8, -1, 1, 0},
+/*  9 */ { 2, s_1_9, -1, 1, 0},
+/* 10 */ { 3, s_1_10, -1, 1, 0},
+/* 11 */ { 3, s_1_11, -1, 1, 0},
+/* 12 */ { 2, s_1_12, -1, 1, 0},
+/* 13 */ { 2, s_1_13, -1, 1, 0},
+/* 14 */ { 2, s_1_14, -1, 1, 0},
+/* 15 */ { 2, s_1_15, -1, 1, 0},
+/* 16 */ { 2, s_1_16, -1, 1, 0},
+/* 17 */ { 2, s_1_17, -1, 1, 0},
+/* 18 */ { 2, s_1_18, -1, 1, 0},
+/* 19 */ { 2, s_1_19, -1, 1, 0},
+/* 20 */ { 3, s_1_20, -1, 1, 0},
+/* 21 */ { 3, s_1_21, -1, 1, 0},
+/* 22 */ { 2, s_1_22, -1, 1, 0},
+/* 23 */ { 2, s_1_23, -1, 1, 0},
+/* 24 */ { 3, s_1_24, -1, 1, 0},
+/* 25 */ { 3, s_1_25, -1, 1, 0}
+};
+
+static symbol s_2_0[2] = { 197, 205 };
+static symbol s_2_1[2] = { 206, 206 };
+static symbol s_2_2[2] = { 215, 219 };
+static symbol s_2_3[3] = { 201, 215, 219 };
+static symbol s_2_4[3] = { 217, 215, 219 };
+static symbol s_2_5[1] = { 221 };
+static symbol s_2_6[2] = { 192, 221 };
+static symbol s_2_7[3] = { 213, 192, 221 };
+
+static struct among a_2[8] =
+{
+/*  0 */ { 2, s_2_0, -1, 1, 0},
+/*  1 */ { 2, s_2_1, -1, 1, 0},
+/*  2 */ { 2, s_2_2, -1, 1, 0},
+/*  3 */ { 3, s_2_3, 2, 2, 0},
+/*  4 */ { 3, s_2_4, 2, 2, 0},
+/*  5 */ { 1, s_2_5, -1, 1, 0},
+/*  6 */ { 2, s_2_6, 5, 1, 0},
+/*  7 */ { 3, s_2_7, 6, 2, 0}
+};
+
+static symbol s_3_0[2] = { 211, 209 };
+static symbol s_3_1[2] = { 211, 216 };
+
+static struct among a_3[2] =
+{
+/*  0 */ { 2, s_3_0, -1, 1, 0},
+/*  1 */ { 2, s_3_1, -1, 1, 0}
+};
+
+static symbol s_4_0[1] = { 192 };
+static symbol s_4_1[2] = { 213, 192 };
+static symbol s_4_2[2] = { 204, 193 };
+static symbol s_4_3[3] = { 201, 204, 193 };
+static symbol s_4_4[3] = { 217, 204, 193 };
+static symbol s_4_5[2] = { 206, 193 };
+static symbol s_4_6[3] = { 197, 206, 193 };
+static symbol s_4_7[3] = { 197, 212, 197 };
+static symbol s_4_8[3] = { 201, 212, 197 };
+static symbol s_4_9[3] = { 202, 212, 197 };
+static symbol s_4_10[4] = { 197, 202, 212, 197 };
+static symbol s_4_11[4] = { 213, 202, 212, 197 };
+static symbol s_4_12[2] = { 204, 201 };
+static symbol s_4_13[3] = { 201, 204, 201 };
+static symbol s_4_14[3] = { 217, 204, 201 };
+static symbol s_4_15[1] = { 202 };
+static symbol s_4_16[2] = { 197, 202 };
+static symbol s_4_17[2] = { 213, 202 };
+static symbol s_4_18[1] = { 204 };
+static symbol s_4_19[2] = { 201, 204 };
+static symbol s_4_20[2] = { 217, 204 };
+static symbol s_4_21[2] = { 197, 205 };
+static symbol s_4_22[2] = { 201, 205 };
+static symbol s_4_23[2] = { 217, 205 };
+static symbol s_4_24[1] = { 206 };
+static symbol s_4_25[2] = { 197, 206 };
+static symbol s_4_26[2] = { 204, 207 };
+static symbol s_4_27[3] = { 201, 204, 207 };
+static symbol s_4_28[3] = { 217, 204, 207 };
+static symbol s_4_29[2] = { 206, 207 };
+static symbol s_4_30[3] = { 197, 206, 207 };
+static symbol s_4_31[3] = { 206, 206, 207 };
+static symbol s_4_32[2] = { 192, 212 };
+static symbol s_4_33[3] = { 213, 192, 212 };
+static symbol s_4_34[2] = { 197, 212 };
+static symbol s_4_35[3] = { 213, 197, 212 };
+static symbol s_4_36[2] = { 201, 212 };
+static symbol s_4_37[2] = { 209, 212 };
+static symbol s_4_38[2] = { 217, 212 };
+static symbol s_4_39[2] = { 212, 216 };
+static symbol s_4_40[3] = { 201, 212, 216 };
+static symbol s_4_41[3] = { 217, 212, 216 };
+static symbol s_4_42[3] = { 197, 219, 216 };
+static symbol s_4_43[3] = { 201, 219, 216 };
+static symbol s_4_44[2] = { 206, 217 };
+static symbol s_4_45[3] = { 197, 206, 217 };
+
+static struct among a_4[46] =
+{
+/*  0 */ { 1, s_4_0, -1, 2, 0},
+/*  1 */ { 2, s_4_1, 0, 2, 0},
+/*  2 */ { 2, s_4_2, -1, 1, 0},
+/*  3 */ { 3, s_4_3, 2, 2, 0},
+/*  4 */ { 3, s_4_4, 2, 2, 0},
+/*  5 */ { 2, s_4_5, -1, 1, 0},
+/*  6 */ { 3, s_4_6, 5, 2, 0},
+/*  7 */ { 3, s_4_7, -1, 1, 0},
+/*  8 */ { 3, s_4_8, -1, 2, 0},
+/*  9 */ { 3, s_4_9, -1, 1, 0},
+/* 10 */ { 4, s_4_10, 9, 2, 0},
+/* 11 */ { 4, s_4_11, 9, 2, 0},
+/* 12 */ { 2, s_4_12, -1, 1, 0},
+/* 13 */ { 3, s_4_13, 12, 2, 0},
+/* 14 */ { 3, s_4_14, 12, 2, 0},
+/* 15 */ { 1, s_4_15, -1, 1, 0},
+/* 16 */ { 2, s_4_16, 15, 2, 0},
+/* 17 */ { 2, s_4_17, 15, 2, 0},
+/* 18 */ { 1, s_4_18, -1, 1, 0},
+/* 19 */ { 2, s_4_19, 18, 2, 0},
+/* 20 */ { 2, s_4_20, 18, 2, 0},
+/* 21 */ { 2, s_4_21, -1, 1, 0},
+/* 22 */ { 2, s_4_22, -1, 2, 0},
+/* 23 */ { 2, s_4_23, -1, 2, 0},
+/* 24 */ { 1, s_4_24, -1, 1, 0},
+/* 25 */ { 2, s_4_25, 24, 2, 0},
+/* 26 */ { 2, s_4_26, -1, 1, 0},
+/* 27 */ { 3, s_4_27, 26, 2, 0},
+/* 28 */ { 3, s_4_28, 26, 2, 0},
+/* 29 */ { 2, s_4_29, -1, 1, 0},
+/* 30 */ { 3, s_4_30, 29, 2, 0},
+/* 31 */ { 3, s_4_31, 29, 1, 0},
+/* 32 */ { 2, s_4_32, -1, 1, 0},
+/* 33 */ { 3, s_4_33, 32, 2, 0},
+/* 34 */ { 2, s_4_34, -1, 1, 0},
+/* 35 */ { 3, s_4_35, 34, 2, 0},
+/* 36 */ { 2, s_4_36, -1, 2, 0},
+/* 37 */ { 2, s_4_37, -1, 2, 0},
+/* 38 */ { 2, s_4_38, -1, 2, 0},
+/* 39 */ { 2, s_4_39, -1, 1, 0},
+/* 40 */ { 3, s_4_40, 39, 2, 0},
+/* 41 */ { 3, s_4_41, 39, 2, 0},
+/* 42 */ { 3, s_4_42, -1, 1, 0},
+/* 43 */ { 3, s_4_43, -1, 2, 0},
+/* 44 */ { 2, s_4_44, -1, 1, 0},
+/* 45 */ { 3, s_4_45, 44, 2, 0}
+};
+
+static symbol s_5_0[1] = { 192 };
+static symbol s_5_1[2] = { 201, 192 };
+static symbol s_5_2[2] = { 216, 192 };
+static symbol s_5_3[1] = { 193 };
+static symbol s_5_4[1] = { 197 };
+static symbol s_5_5[2] = { 201, 197 };
+static symbol s_5_6[2] = { 216, 197 };
+static symbol s_5_7[2] = { 193, 200 };
+static symbol s_5_8[2] = { 209, 200 };
+static symbol s_5_9[3] = { 201, 209, 200 };
+static symbol s_5_10[1] = { 201 };
+static symbol s_5_11[2] = { 197, 201 };
+static symbol s_5_12[2] = { 201, 201 };
+static symbol s_5_13[3] = { 193, 205, 201 };
+static symbol s_5_14[3] = { 209, 205, 201 };
+static symbol s_5_15[4] = { 201, 209, 205, 201 };
+static symbol s_5_16[1] = { 202 };
+static symbol s_5_17[2] = { 197, 202 };
+static symbol s_5_18[3] = { 201, 197, 202 };
+static symbol s_5_19[2] = { 201, 202 };
+static symbol s_5_20[2] = { 207, 202 };
+static symbol s_5_21[2] = { 193, 205 };
+static symbol s_5_22[2] = { 197, 205 };
+static symbol s_5_23[3] = { 201, 197, 205 };
+static symbol s_5_24[2] = { 207, 205 };
+static symbol s_5_25[2] = { 209, 205 };
+static symbol s_5_26[3] = { 201, 209, 205 };
+static symbol s_5_27[1] = { 207 };
+static symbol s_5_28[1] = { 209 };
+static symbol s_5_29[2] = { 201, 209 };
+static symbol s_5_30[2] = { 216, 209 };
+static symbol s_5_31[1] = { 213 };
+static symbol s_5_32[2] = { 197, 215 };
+static symbol s_5_33[2] = { 207, 215 };
+static symbol s_5_34[1] = { 216 };
+static symbol s_5_35[1] = { 217 };
+
+static struct among a_5[36] =
+{
+/*  0 */ { 1, s_5_0, -1, 1, 0},
+/*  1 */ { 2, s_5_1, 0, 1, 0},
+/*  2 */ { 2, s_5_2, 0, 1, 0},
+/*  3 */ { 1, s_5_3, -1, 1, 0},
+/*  4 */ { 1, s_5_4, -1, 1, 0},
+/*  5 */ { 2, s_5_5, 4, 1, 0},
+/*  6 */ { 2, s_5_6, 4, 1, 0},
+/*  7 */ { 2, s_5_7, -1, 1, 0},
+/*  8 */ { 2, s_5_8, -1, 1, 0},
+/*  9 */ { 3, s_5_9, 8, 1, 0},
+/* 10 */ { 1, s_5_10, -1, 1, 0},
+/* 11 */ { 2, s_5_11, 10, 1, 0},
+/* 12 */ { 2, s_5_12, 10, 1, 0},
+/* 13 */ { 3, s_5_13, 10, 1, 0},
+/* 14 */ { 3, s_5_14, 10, 1, 0},
+/* 15 */ { 4, s_5_15, 14, 1, 0},
+/* 16 */ { 1, s_5_16, -1, 1, 0},
+/* 17 */ { 2, s_5_17, 16, 1, 0},
+/* 18 */ { 3, s_5_18, 17, 1, 0},
+/* 19 */ { 2, s_5_19, 16, 1, 0},
+/* 20 */ { 2, s_5_20, 16, 1, 0},
+/* 21 */ { 2, s_5_21, -1, 1, 0},
+/* 22 */ { 2, s_5_22, -1, 1, 0},
+/* 23 */ { 3, s_5_23, 22, 1, 0},
+/* 24 */ { 2, s_5_24, -1, 1, 0},
+/* 25 */ { 2, s_5_25, -1, 1, 0},
+/* 26 */ { 3, s_5_26, 25, 1, 0},
+/* 27 */ { 1, s_5_27, -1, 1, 0},
+/* 28 */ { 1, s_5_28, -1, 1, 0},
+/* 29 */ { 2, s_5_29, 28, 1, 0},
+/* 30 */ { 2, s_5_30, 28, 1, 0},
+/* 31 */ { 1, s_5_31, -1, 1, 0},
+/* 32 */ { 2, s_5_32, -1, 1, 0},
+/* 33 */ { 2, s_5_33, -1, 1, 0},
+/* 34 */ { 1, s_5_34, -1, 1, 0},
+/* 35 */ { 1, s_5_35, -1, 1, 0}
+};
+
+static symbol s_6_0[3] = { 207, 211, 212 };
+static symbol s_6_1[4] = { 207, 211, 212, 216 };
+
+static struct among a_6[2] =
+{
+/*  0 */ { 3, s_6_0, -1, 1, 0},
+/*  1 */ { 4, s_6_1, -1, 1, 0}
+};
+
+static symbol s_7_0[4] = { 197, 202, 219, 197 };
+static symbol s_7_1[1] = { 206 };
+static symbol s_7_2[1] = { 216 };
+static symbol s_7_3[3] = { 197, 202, 219 };
+
+static struct among a_7[4] =
+{
+/*  0 */ { 4, s_7_0, -1, 1, 0},
+/*  1 */ { 1, s_7_1, -1, 2, 0},
+/*  2 */ { 1, s_7_2, -1, 3, 0},
+/*  3 */ { 3, s_7_3, -1, 1, 0}
+};
+
+static unsigned char g_v[] = { 35, 130, 34, 18 };
+
+static symbol s_0[] = { 193 };
+static symbol s_1[] = { 209 };
+static symbol s_2[] = { 193 };
+static symbol s_3[] = { 209 };
+static symbol s_4[] = { 193 };
+static symbol s_5[] = { 209 };
+static symbol s_6[] = { 206 };
+static symbol s_7[] = { 206 };
+static symbol s_8[] = { 206 };
+static symbol s_9[] = { 201 };
+
+static int r_mark_regions(struct SN_env * z) {
+    z->I[0] = z->l;
+    z->I[1] = z->l;
+    {   int c = z->c; /* do, line 100 */
+        while(1) { /* gopast, line 101 */
+            if (!(in_grouping(z, g_v, 192, 220))) goto lab1;
+            break;
+        lab1:
+            if (z->c >= z->l) goto lab0;
+            z->c++;
+        }
+        z->I[0] = z->c; /* setmark pV, line 101 */
+        while(1) { /* gopast, line 101 */
+            if (!(out_grouping(z, g_v, 192, 220))) goto lab2;
+            break;
+        lab2:
+            if (z->c >= z->l) goto lab0;
+            z->c++;
+        }
+        while(1) { /* gopast, line 102 */
+            if (!(in_grouping(z, g_v, 192, 220))) goto lab3;
+            break;
+        lab3:
+            if (z->c >= z->l) goto lab0;
+            z->c++;
+        }
+        while(1) { /* gopast, line 102 */
+            if (!(out_grouping(z, g_v, 192, 220))) goto lab4;
+            break;
+        lab4:
+            if (z->c >= z->l) goto lab0;
+            z->c++;
+        }
+        z->I[1] = z->c; /* setmark p2, line 102 */
+    lab0:
+        z->c = c;
+    }
+    return 1;
+}
+
+static int r_R2(struct SN_env * z) {
+    if (!(z->I[1] <= z->c)) return 0;
+    return 1;
+}
+
+static int r_perfective_gerund(struct SN_env * z) {
+    int among_var;
+    z->ket = z->c; /* [, line 111 */
+    among_var = find_among_b(z, a_0, 9); /* substring, line 111 */
+    if (!(among_var)) return 0;
+    z->bra = z->c; /* ], line 111 */
+    switch(among_var) {
+        case 0: return 0;
+        case 1:
+            {   int m = z->l - z->c; /* or, line 115 */
+                if (!(eq_s_b(z, 1, s_0))) goto lab1;
+                goto lab0;
+            lab1:
+                z->c = z->l - m;
+                if (!(eq_s_b(z, 1, s_1))) return 0;
+            }
+        lab0:
+            slice_del(z); /* delete, line 115 */
+            break;
+        case 2:
+            slice_del(z); /* delete, line 122 */
+            break;
+    }
+    return 1;
+}
+
+static int r_adjective(struct SN_env * z) {
+    int among_var;
+    z->ket = z->c; /* [, line 127 */
+    among_var = find_among_b(z, a_1, 26); /* substring, line 127 */
+    if (!(among_var)) return 0;
+    z->bra = z->c; /* ], line 127 */
+    switch(among_var) {
+        case 0: return 0;
+        case 1:
+            slice_del(z); /* delete, line 136 */
+            break;
+    }
+    return 1;
+}
+
+static int r_adjectival(struct SN_env * z) {
+    int among_var;
+    if (!r_adjective(z)) return 0; /* call adjective, line 141 */
+    {   int m = z->l - z->c; /* try, line 148 */
+        z->ket = z->c; /* [, line 149 */
+        among_var = find_among_b(z, a_2, 8); /* substring, line 149 */
+        if (!(among_var)) { z->c = z->l - m; goto lab0; }
+        z->bra = z->c; /* ], line 149 */
+        switch(among_var) {
+            case 0: { z->c = z->l - m; goto lab0; }
+            case 1:
+                {   int m = z->l - z->c; /* or, line 154 */
+                    if (!(eq_s_b(z, 1, s_2))) goto lab2;
+                    goto lab1;
+                lab2:
+                    z->c = z->l - m;
+                    if (!(eq_s_b(z, 1, s_3))) { z->c = z->l - m; goto lab0; }
+                }
+            lab1:
+                slice_del(z); /* delete, line 154 */
+                break;
+            case 2:
+                slice_del(z); /* delete, line 161 */
+                break;
+        }
+    lab0:
+        ;
+    }
+    return 1;
+}
+
+static int r_reflexive(struct SN_env * z) {
+    int among_var;
+    z->ket = z->c; /* [, line 168 */
+    among_var = find_among_b(z, a_3, 2); /* substring, line 168 */
+    if (!(among_var)) return 0;
+    z->bra = z->c; /* ], line 168 */
+    switch(among_var) {
+        case 0: return 0;
+        case 1:
+            slice_del(z); /* delete, line 171 */
+            break;
+    }
+    return 1;
+}
+
+static int r_verb(struct SN_env * z) {
+    int among_var;
+    z->ket = z->c; /* [, line 176 */
+    among_var = find_among_b(z, a_4, 46); /* substring, line 176 */
+    if (!(among_var)) return 0;
+    z->bra = z->c; /* ], line 176 */
+    switch(among_var) {
+        case 0: return 0;
+        case 1:
+            {   int m = z->l - z->c; /* or, line 182 */
+                if (!(eq_s_b(z, 1, s_4))) goto lab1;
+                goto lab0;
+            lab1:
+                z->c = z->l - m;
+                if (!(eq_s_b(z, 1, s_5))) return 0;
+            }
+        lab0:
+            slice_del(z); /* delete, line 182 */
+            break;
+        case 2:
+            slice_del(z); /* delete, line 190 */
+            break;
+    }
+    return 1;
+}
+
+static int r_noun(struct SN_env * z) {
+    int among_var;
+    z->ket = z->c; /* [, line 199 */
+    among_var = find_among_b(z, a_5, 36); /* substring, line 199 */
+    if (!(among_var)) return 0;
+    z->bra = z->c; /* ], line 199 */
+    switch(among_var) {
+        case 0: return 0;
+        case 1:
+            slice_del(z); /* delete, line 206 */
+            break;
+    }
+    return 1;
+}
+
+static int r_derivational(struct SN_env * z) {
+    int among_var;
+    z->ket = z->c; /* [, line 215 */
+    among_var = find_among_b(z, a_6, 2); /* substring, line 215 */
+    if (!(among_var)) return 0;
+    z->bra = z->c; /* ], line 215 */
+    if (!r_R2(z)) return 0; /* call R2, line 215 */
+    switch(among_var) {
+        case 0: return 0;
+        case 1:
+            slice_del(z); /* delete, line 218 */
+            break;
+    }
+    return 1;
+}
+
+static int r_tidy_up(struct SN_env * z) {
+    int among_var;
+    z->ket = z->c; /* [, line 223 */
+    among_var = find_among_b(z, a_7, 4); /* substring, line 223 */
+    if (!(among_var)) return 0;
+    z->bra = z->c; /* ], line 223 */
+    switch(among_var) {
+        case 0: return 0;
+        case 1:
+            slice_del(z); /* delete, line 227 */
+            z->ket = z->c; /* [, line 228 */
+            if (!(eq_s_b(z, 1, s_6))) return 0;
+            z->bra = z->c; /* ], line 228 */
+            if (!(eq_s_b(z, 1, s_7))) return 0;
+            slice_del(z); /* delete, line 228 */
+            break;
+        case 2:
+            if (!(eq_s_b(z, 1, s_8))) return 0;
+            slice_del(z); /* delete, line 231 */
+            break;
+        case 3:
+            slice_del(z); /* delete, line 233 */
+            break;
+    }
+    return 1;
+}
+
+extern int russian_stem(struct SN_env * z) {
+    {   int c = z->c; /* do, line 240 */
+        if (!r_mark_regions(z)) goto lab0; /* call mark_regions, line 240 */
+    lab0:
+        z->c = c;
+    }
+    z->lb = z->c; z->c = z->l; /* backwards, line 241 */
+
+    {   int m = z->l - z->c; /* setlimit, line 241 */
+        int m3;
+        if (z->c < z->I[0]) return 0;
+        z->c = z->I[0]; /* tomark, line 241 */
+        m3 = z->lb; z->lb = z->c;
+        z->c = z->l - m;
+        {   int m = z->l - z->c; /* do, line 242 */
+            {   int m = z->l - z->c; /* or, line 243 */
+                if (!r_perfective_gerund(z)) goto lab3; /* call perfective_gerund, line 243 */
+                goto lab2;
+            lab3:
+                z->c = z->l - m;
+                {   int m = z->l - z->c; /* try, line 244 */
+                    if (!r_reflexive(z)) { z->c = z->l - m; goto lab4; } /* call reflexive, line 244 */
+                lab4:
+                    ;
+                }
+                {   int m = z->l - z->c; /* or, line 245 */
+                    if (!r_adjectival(z)) goto lab6; /* call adjectival, line 245 */
+                    goto lab5;
+                lab6:
+                    z->c = z->l - m;
+                    if (!r_verb(z)) goto lab7; /* call verb, line 245 */
+                    goto lab5;
+                lab7:
+                    z->c = z->l - m;
+                    if (!r_noun(z)) goto lab1; /* call noun, line 245 */
+                }
+            lab5:
+                ;
+            }
+        lab2:
+        lab1:
+            z->c = z->l - m;
+        }
+        {   int m = z->l - z->c; /* try, line 248 */
+            z->ket = z->c; /* [, line 248 */
+            if (!(eq_s_b(z, 1, s_9))) { z->c = z->l - m; goto lab8; }
+            z->bra = z->c; /* ], line 248 */
+            slice_del(z); /* delete, line 248 */
+        lab8:
+            ;
+        }
+        {   int m = z->l - z->c; /* do, line 251 */
+            if (!r_derivational(z)) goto lab9; /* call derivational, line 251 */
+        lab9:
+            z->c = z->l - m;
+        }
+        {   int m = z->l - z->c; /* do, line 252 */
+            if (!r_tidy_up(z)) goto lab10; /* call tidy_up, line 252 */
+        lab10:
+            z->c = z->l - m;
+        }
+        z->lb = m3;
+    }
+    z->c = z->lb;
+    return 1;
+}
+
+extern struct SN_env * russian_create_env(void) { return SN_create_env(0, 2, 0); }
+
+extern void russian_close_env(struct SN_env * z) { SN_close_env(z); }
+


diff --git a/contrib/tsearch2/snowball/russian_stem.h b/contrib/tsearch2/snowball/russian_stem.h

new file mode 100644 (file)

index 0000000..7dc26d4


--- /dev/null
+++ b/contrib/tsearch2/snowball/russian_stem.h
@@ -0,0 +1,8 @@
+
+/* This file was generated automatically by the Snowball to ANSI C compiler */
+
+extern struct SN_env * russian_create_env(void);
+extern void russian_close_env(struct SN_env * z);
+
+extern int russian_stem(struct SN_env * z);
+


diff --git a/contrib/tsearch2/snowball/utilities.c b/contrib/tsearch2/snowball/utilities.c

new file mode 100644 (file)

index 0000000..5dc7524


--- /dev/null
+++ b/contrib/tsearch2/snowball/utilities.c
@@ -0,0 +1,328 @@
+
+#include 
+#include 
+#include 
+
+#include "header.h"
+
+#define unless(C) if(!(C))
+
+#define CREATE_SIZE 1
+
+extern symbol * create_s(void)
+{   symbol * p = (symbol *) (HEAD + (char *) malloc(HEAD + (CREATE_SIZE + 1) * sizeof(symbol)));
+    CAPACITY(p) = CREATE_SIZE;
+    SET_SIZE(p, CREATE_SIZE);
+    return p;
+}
+
+extern void lose_s(symbol * p) { free((char *) p - HEAD); }
+
+extern int in_grouping(struct SN_env * z, unsigned char * s, int min, int max)
+{   if (z->c >= z->l) return 0;
+    {   int ch = z->p[z->c];
+        if
+        (ch > max || (ch -= min) < 0 ||
+         (s[ch >> 3] & (0X1 << (ch & 0X7))) == 0) return 0;
+    }
+    z->c++; return 1;
+}
+
+extern int in_grouping_b(struct SN_env * z, unsigned char * s, int min, int max)
+{   if (z->c <= z->lb) return 0;
+    {   int ch = z->p[z->c - 1];
+        if
+        (ch > max || (ch -= min) < 0 ||
+         (s[ch >> 3] & (0X1 << (ch & 0X7))) == 0) return 0;
+    }
+    z->c--; return 1;
+}
+
+extern int out_grouping(struct SN_env * z, unsigned char * s, int min, int max)
+{   if (z->c >= z->l) return 0;
+    {   int ch = z->p[z->c];
+        unless
+        (ch > max || (ch -= min) < 0 ||
+         (s[ch >> 3] & (0X1 << (ch & 0X7))) == 0) return 0;
+    }
+    z->c++; return 1;
+}
+
+extern int out_grouping_b(struct SN_env * z, unsigned char * s, int min, int max)
+{   if (z->c <= z->lb) return 0;
+    {   int ch = z->p[z->c - 1];
+        unless
+        (ch > max || (ch -= min) < 0 ||
+         (s[ch >> 3] & (0X1 << (ch & 0X7))) == 0) return 0;
+    }
+    z->c--; return 1;
+}
+
+
+extern int in_range(struct SN_env * z, int min, int max)
+{   if (z->c >= z->l) return 0;
+    {   int ch = z->p[z->c];
+        if
+        (ch > max || ch < min) return 0;
+    }
+    z->c++; return 1;
+}
+
+extern int in_range_b(struct SN_env * z, int min, int max)
+{   if (z->c <= z->lb) return 0;
+    {   int ch = z->p[z->c - 1];
+        if
+        (ch > max || ch < min) return 0;
+    }
+    z->c--; return 1;
+}
+
+extern int out_range(struct SN_env * z, int min, int max)
+{   if (z->c >= z->l) return 0;
+    {   int ch = z->p[z->c];
+        unless
+        (ch > max || ch < min) return 0;
+    }
+    z->c++; return 1;
+}
+
+extern int out_range_b(struct SN_env * z, int min, int max)
+{   if (z->c <= z->lb) return 0;
+    {   int ch = z->p[z->c - 1];
+        unless
+        (ch > max || ch < min) return 0;
+    }
+    z->c--; return 1;
+}
+
+extern int eq_s(struct SN_env * z, int s_size, symbol * s)
+{   if (z->l - z->c < s_size ||
+        memcmp(z->p + z->c, s, s_size * sizeof(symbol)) != 0) return 0;
+    z->c += s_size; return 1;
+}
+
+extern int eq_s_b(struct SN_env * z, int s_size, symbol * s)
+{   if (z->c - z->lb < s_size ||
+        memcmp(z->p + z->c - s_size, s, s_size * sizeof(symbol)) != 0) return 0;
+    z->c -= s_size; return 1;
+}
+
+extern int eq_v(struct SN_env * z, symbol * p)
+{   return eq_s(z, SIZE(p), p);
+}
+
+extern int eq_v_b(struct SN_env * z, symbol * p)
+{   return eq_s_b(z, SIZE(p), p);
+}
+
+extern int find_among(struct SN_env * z, struct among * v, int v_size)
+{
+    int i = 0;
+    int j = v_size;
+
+    int c = z->c; int l = z->l;
+    symbol * q = z->p + c;
+
+    struct among * w;
+
+    int common_i = 0;
+    int common_j = 0;
+
+    int first_key_inspected = 0;
+
+    while(1)
+    {   int k = i + ((j - i) >> 1);
+        int diff = 0;
+        int common = common_i < common_j ? common_i : common_j; /* smaller */
+        w = v + k;
+        {   int i; for (i = common; i < w->s_size; i++)
+            {   if (c + common == l) { diff = -1; break; }
+                diff = q[common] - w->s[i];
+                if (diff != 0) break;
+                common++;
+            }
+        }
+        if (diff < 0) { j = k; common_j = common; }
+                 else { i = k; common_i = common; }
+        if (j - i <= 1)
+        {   if (i > 0) break; /* v->s has been inspected */
+            if (j == i) break; /* only one item in v */
+
+            /* - but now we need to go round once more to get
+               v->s inspected. This looks messy, but is actually
+               the optimal approach.  */
+
+            if (first_key_inspected) break;
+            first_key_inspected = 1;
+        }
+    }
+    while(1)
+    {   w = v + i;
+        if (common_i >= w->s_size)
+        {   z->c = c + w->s_size;
+            if (w->function == 0) return w->result;
+            {   int res = w->function(z);
+                z->c = c + w->s_size;
+                if (res) return w->result;
+            }
+        }
+        i = w->substring_i;
+        if (i < 0) return 0;
+    }
+}
+
+/* find_among_b is for backwards processing. Same comments apply */
+
+extern int find_among_b(struct SN_env * z, struct among * v, int v_size)
+{
+    int i = 0;
+    int j = v_size;
+
+    int c = z->c; int lb = z->lb;
+    symbol * q = z->p + c - 1;
+
+    struct among * w;
+
+    int common_i = 0;
+    int common_j = 0;
+
+    int first_key_inspected = 0;
+
+    while(1)
+    {   int k = i + ((j - i) >> 1);
+        int diff = 0;
+        int common = common_i < common_j ? common_i : common_j;
+        w = v + k;
+        {   int i; for (i = w->s_size - 1 - common; i >= 0; i--)
+            {   if (c - common == lb) { diff = -1; break; }
+                diff = q[- common] - w->s[i];
+                if (diff != 0) break;
+                common++;
+            }
+        }
+        if (diff < 0) { j = k; common_j = common; }
+                 else { i = k; common_i = common; }
+        if (j - i <= 1)
+        {   if (i > 0) break;
+            if (j == i) break;
+            if (first_key_inspected) break;
+            first_key_inspected = 1;
+        }
+    }
+    while(1)
+    {   w = v + i;
+        if (common_i >= w->s_size)
+        {   z->c = c - w->s_size;
+            if (w->function == 0) return w->result;
+            {   int res = w->function(z);
+                z->c = c - w->s_size;
+                if (res) return w->result;
+            }
+        }
+        i = w->substring_i;
+        if (i < 0) return 0;
+    }
+}
+
+
+extern symbol * increase_size(symbol * p, int n)
+{   int new_size = n + 20;
+    symbol * q = (symbol *) (HEAD + (char *) malloc(HEAD + (new_size + 1) * sizeof(symbol)));
+    CAPACITY(q) = new_size;
+    memmove(q, p, CAPACITY(p) * sizeof(symbol)); lose_s(p); return q;
+}
+
+/* to replace symbols between c_bra and c_ket in z->p by the
+   s_size symbols at s
+*/
+
+extern int replace_s(struct SN_env * z, int c_bra, int c_ket, int s_size, const symbol * s)
+{   int adjustment = s_size - (c_ket - c_bra);
+    int len = SIZE(z->p);
+    if (adjustment != 0)
+    {   if (adjustment + len > CAPACITY(z->p)) z->p = increase_size(z->p, adjustment + len);
+        memmove(z->p + c_ket + adjustment, z->p + c_ket, (len - c_ket) * sizeof(symbol));
+        SET_SIZE(z->p, adjustment + len);
+        z->l += adjustment;
+        if (z->c >= c_ket) z->c += adjustment; else
+            if (z->c > c_bra) z->c = c_bra;
+    }
+    unless (s_size == 0) memmove(z->p + c_bra, s, s_size * sizeof(symbol));
+    return adjustment;
+}
+
+static void slice_check(struct SN_env * z)
+{
+    if (!(0 <= z->bra &&
+          z->bra <= z->ket &&
+          z->ket <= z->l &&
+          z->l <= SIZE(z->p)))   /* this line could be removed */
+    {
+        fprintf(stderr, "faulty slice operation:\n");
+        debug(z, -1, 0);
+        exit(1);
+    }
+}
+
+extern void slice_from_s(struct SN_env * z, int s_size, symbol * s)
+{   slice_check(z);
+    replace_s(z, z->bra, z->ket, s_size, s);
+}
+
+extern void slice_from_v(struct SN_env * z, symbol * p)
+{   slice_from_s(z, SIZE(p), p);
+}
+
+extern void slice_del(struct SN_env * z)
+{   slice_from_s(z, 0, 0);
+}
+
+extern void insert_s(struct SN_env * z, int bra, int ket, int s_size, symbol * s)
+{   int adjustment = replace_s(z, bra, ket, s_size, s);
+    if (bra <= z->bra) z->bra += adjustment;
+    if (bra <= z->ket) z->ket += adjustment;
+}
+
+extern void insert_v(struct SN_env * z, int bra, int ket, symbol * p)
+{   int adjustment = replace_s(z, bra, ket, SIZE(p), p);
+    if (bra <= z->bra) z->bra += adjustment;
+    if (bra <= z->ket) z->ket += adjustment;
+}
+
+extern symbol * slice_to(struct SN_env * z, symbol * p)
+{   slice_check(z);
+    {   int len = z->ket - z->bra;
+        if (CAPACITY(p) < len) p = increase_size(p, len);
+        memmove(p, z->p + z->bra, len * sizeof(symbol));
+        SET_SIZE(p, len);
+    }
+    return p;
+}
+
+extern symbol * assign_to(struct SN_env * z, symbol * p)
+{   int len = z->l;
+    if (CAPACITY(p) < len) p = increase_size(p, len);
+    memmove(p, z->p, len * sizeof(symbol));
+    SET_SIZE(p, len);
+    return p;
+}
+
+extern void debug(struct SN_env * z, int number, int line_count)
+{   int i;
+    int limit = SIZE(z->p);
+    /*if (number >= 0) printf("%3d (line %4d): '", number, line_count);*/
+    if (number >= 0) printf("%3d (line %4d): [%d]'", number, line_count,limit);
+    for (i = 0; i <= limit; i++)
+    {   if (z->lb == i) printf("{");
+        if (z->bra == i) printf("[");
+        if (z->c == i) printf("|");
+        if (z->ket == i) printf("]");
+        if (z->l == i) printf("}");
+        if (i < limit)
+        {   int ch = z->p[i];
+            if (ch == 0) ch = '#';
+            printf("%c", ch);
+        }
+    }
+    printf("'\n");
+}


diff --git a/contrib/tsearch2/sql/tsearch2.sql b/contrib/tsearch2/sql/tsearch2.sql

new file mode 100644 (file)

index 0000000..6ca6480


--- /dev/null
+++ b/contrib/tsearch2/sql/tsearch2.sql
@@ -0,0 +1,243 @@
+--
+-- first, define the datatype.  Turn off echoing so that expected file
+-- does not depend on contents of seg.sql.
+--
+\set ECHO none
+\i tsearch2.sql
+\set ECHO all
+
+--tsvector
+SELECT '1'::tsvector;
+SELECT '1 '::tsvector;
+SELECT ' 1'::tsvector;
+SELECT ' 1 '::tsvector;
+SELECT '1 2'::tsvector;
+SELECT '\'1 2\''::tsvector;
+SELECT '\'1 \\\'2\''::tsvector;
+SELECT '\'1 \\\'2\'3'::tsvector;
+SELECT '\'1 \\\'2\' 3'::tsvector;
+SELECT '\'1 \\\'2\' \' 3\' 4 '::tsvector;
+select '\'w\':4A,3B,2C,1D,5 a:8';
+select 'a:3A b:2a'::tsvector || 'ba:1234 a:1B';
+select setweight('w:12B w:13* w:12,5,6 a:1,3* a:3 w asd:1dc asd zxc:81,567,222A'::tsvector, 'c');
+select strip('w:12B w:13* w:12,5,6 a:1,3* a:3 w asd:1dc asd'::tsvector);
+
+
+--tsquery
+SELECT '1'::tsquery;
+SELECT '1 '::tsquery;
+SELECT ' 1'::tsquery;
+SELECT ' 1 '::tsquery;
+SELECT '\'1 2\''::tsquery;
+SELECT '\'1 \\\'2\''::tsquery;
+SELECT '!1'::tsquery;
+SELECT '1|2'::tsquery;
+SELECT '1|!2'::tsquery;
+SELECT '!1|2'::tsquery;
+SELECT '!1|!2'::tsquery;
+SELECT '!(!1|!2)'::tsquery;
+SELECT '!(!1|2)'::tsquery;
+SELECT '!(1|!2)'::tsquery;
+SELECT '!(1|2)'::tsquery;
+SELECT '1&2'::tsquery;
+SELECT '!1&2'::tsquery;
+SELECT '1&!2'::tsquery;
+SELECT '!1&!2'::tsquery;
+SELECT '(1&2)'::tsquery;
+SELECT '1&(2)'::tsquery;
+SELECT '!(1)&2'::tsquery;
+SELECT '!(1&2)'::tsquery;
+SELECT '1|2&3'::tsquery;
+SELECT '1|(2&3)'::tsquery;
+SELECT '(1|2)&3'::tsquery;
+SELECT '1|2&!3'::tsquery;
+SELECT '1|!2&3'::tsquery;
+SELECT '!1|2&3'::tsquery;
+SELECT '!1|(2&3)'::tsquery;
+SELECT '!(1|2)&3'::tsquery;
+SELECT '(!1|2)&3'::tsquery;
+SELECT '1|(2|(4|(5|6)))'::tsquery;
+SELECT '1|2|4|5|6'::tsquery;
+SELECT '1&(2&(4&(5&6)))'::tsquery;
+SELECT '1&2&4&5&6'::tsquery;
+SELECT '1&(2&(4&(5|6)))'::tsquery;
+SELECT '1&(2&(4&(5|!6)))'::tsquery;
+SELECT '1&(\'2\'&(\' 4\'&(\\|5 | \'6 \\\' !|&\')))'::tsquery;
+SELECT '\'the wether\':dc & \' sKies \':BC & a:d b:a';
+
+select lexize('simple', 'ASD56 hsdkf');
+select lexize('en_stem', 'SKIES Problems identity');
+
+select * from token_type('default');
+select * from parse('default', '345 [email protected] \' http://www.com/ http://aew.werc.ewr/?ad=qwe&dw 1aew.werc.ewr/?ad=qwe&dw 2aew.werc.ewr http://3aew.werc.ewr/?ad=qwe&dw http://4aew.werc.ewr http://5aew.werc.ewr:8100/?  ad=qwe&dw 6aew.werc.ewr:8100/?ad=qwe&dw 7aew.werc.ewr:8100/?ad=qwe&dw=%20%32 +4.0e-10 qwe qwe qwqwe 234.435 455 5.005 [email protected] qwe-wer asdf qwer jf sdjk ewr1> ewri2 ">
+/usr/local/fff /awdf/dwqe/4325 rewt/ewr wefjn /wqe-324/ewr gist.h gist.h.c gist.c. readline 4.2 4.2. 4.2, readline-4.2 readline-4.2. 234 
+ wow  < jqw <> qwerty');
+
+SELECT to_tsvector('default', '345 [email protected] \' http://www.com/ http://aew.werc.ewr/?ad=qwe&dw 1aew.werc.ewr/?ad=qwe&dw 2aew.werc.ewr http://3aew.werc.ewr/?ad=qwe&dw http://4aew.werc.ewr http://5aew.werc.ewr:8100/?  ad=qwe&dw 6aew.werc.ewr:8100/?ad=qwe&dw 7aew.werc.ewr:8100/?ad=qwe&dw=%20%32 +4.0e-10 qwe qwe qwqwe 234.435 455 5.005 [email protected] qwe-wer asdf qwer jf sdjk ewr1> ewri2 ">
+/usr/local/fff /awdf/dwqe/4325 rewt/ewr wefjn /wqe-324/ewr gist.h gist.h.c gist.c. readline 4.2 4.2. 4.2, readline-4.2 readline-4.2. 234 
+ wow  < jqw <> qwerty');
+
+SELECT length(to_tsvector('default', '345 qw'));
+
+SELECT length(to_tsvector('default', '345 [email protected] \' http://www.com/ http://aew.werc.ewr/?ad=qwe&dw 1aew.werc.ewr/?ad=qwe&dw 2aew.werc.ewr http://3aew.werc.ewr/?ad=qwe&dw http://4aew.werc.ewr http://5aew.werc.ewr:8100/?  ad=qwe&dw 6aew.werc.ewr:8100/?ad=qwe&dw 7aew.werc.ewr:8100/?ad=qwe&dw=%20%32 +4.0e-10 qwe qwe qwqwe 234.435 455 5.005 [email protected] qwe-wer asdf qwer jf sdjk ewr1> ewri2 ">
+/usr/local/fff /awdf/dwqe/4325 rewt/ewr wefjn /wqe-324/ewr gist.h gist.h.c gist.c. readline 4.2 4.2. 4.2, readline-4.2 readline-4.2. 234 
+ wow  < jqw <> qwerty'));
+
+
+select to_tsquery('default', 'qwe & sKies '); 
+select to_tsquery('simple', 'qwe & sKies '); 
+select to_tsquery('default', '\'the wether\':dc & \'           sKies \':BC ');
+select 'a b:89  ca:23A,64b d:34c'::tsvector @@ 'd:AC & ca';
+select 'a b:89  ca:23A,64b d:34c'::tsvector @@ 'd:AC & ca:B';
+select 'a b:89  ca:23A,64b d:34c'::tsvector @@ 'd:AC & ca:A';
+select 'a b:89  ca:23A,64b d:34c'::tsvector @@ 'd:AC & ca:C';
+select 'a b:89  ca:23A,64b d:34c'::tsvector @@ 'd:AC & ca:CB';
+
+CREATE TABLE test_tsvector( t text, a tsvector );
+
+\copy test_tsvector from 'data/test_tsearch.data'
+
+SELECT count(*) FROM test_tsvector WHERE a @@ 'wr|qh';
+SELECT count(*) FROM test_tsvector WHERE a @@ 'wr&qh';
+SELECT count(*) FROM test_tsvector WHERE a @@ 'eq&yt';
+SELECT count(*) FROM test_tsvector WHERE a @@ 'eq|yt';
+SELECT count(*) FROM test_tsvector WHERE a @@ '(eq&yt)|(wr&qh)';
+SELECT count(*) FROM test_tsvector WHERE a @@ '(eq|yt)&(wr|qh)';
+
+create index wowidx on test_tsvector using gist (a);
+set enable_seqscan=off;
+
+SELECT count(*) FROM test_tsvector WHERE a @@ 'wr|qh';
+SELECT count(*) FROM test_tsvector WHERE a @@ 'wr&qh';
+SELECT count(*) FROM test_tsvector WHERE a @@ 'eq&yt';
+SELECT count(*) FROM test_tsvector WHERE a @@ 'eq|yt';
+SELECT count(*) FROM test_tsvector WHERE a @@ '(eq&yt)|(wr&qh)';
+SELECT count(*) FROM test_tsvector WHERE a @@ '(eq|yt)&(wr|qh)';
+
+select set_curcfg('default');
+
+CREATE TRIGGER tsvectorupdate
+BEFORE UPDATE OR INSERT ON test_tsvector
+FOR EACH ROW EXECUTE PROCEDURE tsearch2(a, t);
+
+SELECT count(*) FROM test_tsvector WHERE a @@ to_tsquery('345&qwerty');
+
+INSERT INTO test_tsvector (t) VALUES ('345 qwerty');
+
+SELECT count(*) FROM test_tsvector WHERE a @@ to_tsquery('345&qwerty');
+
+UPDATE test_tsvector SET t = null WHERE t = '345 qwerty';
+
+SELECT count(*) FROM test_tsvector WHERE a @@ to_tsquery('345&qwerty');
+
+drop trigger tsvectorupdate on test_tsvector;
+create function wow(text) returns text as 'select $1 || \' copyright\'; ' language sql;
+create trigger tsvectorupdate before update or insert on test_tsvector
+for each row execute procedure tsearch2(a, wow, t);
+insert into test_tsvector (t) values ('345 qwerty');
+select count(*) FROM test_tsvector WHERE a @@ to_tsquery('345&qwerty');
+select count(*) FROM test_tsvector WHERE a @@ to_tsquery('copyright');
+
+select rank(' a:1 s:2C d g'::tsvector, 'a | s');
+select rank(' a:1 s:2B d g'::tsvector, 'a | s');
+select rank(' a:1 s:2 d g'::tsvector, 'a | s');
+select rank(' a:1 s:2C d g'::tsvector, 'a & s');
+select rank(' a:1 s:2B d g'::tsvector, 'a & s');
+select rank(' a:1 s:2 d g'::tsvector, 'a & s');
+
+insert into test_tsvector (t) values ('foo bar foo the over foo qq bar');
+select * from stat('select a from test_tsvector') order by ndoc desc, nentry desc, word;
+
+select reset_tsearch();
+select to_tsquery('default', 'skies & books');
+
+select rank_cd(to_tsvector('Erosion It took the sea a thousand years,
+A thousand years to trace
+The granite features of this cliff
+In crag and scarp and base.
+It took the sea an hour one night
+An hour of storm to place
+The sculpture of these granite seams,
+Upon a woman s face. E.  J.  Pratt  (1882 1964)
+'), to_tsquery('sea&thousand&years'));
+
+select rank_cd(to_tsvector('Erosion It took the sea a thousand years,
+A thousand years to trace
+The granite features of this cliff
+In crag and scarp and base.
+It took the sea an hour one night
+An hour of storm to place
+The sculpture of these granite seams,
+Upon a woman s face. E.  J.  Pratt  (1882 1964)
+'), to_tsquery('granite&sea'));
+
+select rank_cd(to_tsvector('Erosion It took the sea a thousand years,
+A thousand years to trace
+The granite features of this cliff
+In crag and scarp and base.
+It took the sea an hour one night
+An hour of storm to place
+The sculpture of these granite seams,
+Upon a woman s face. E.  J.  Pratt  (1882 1964)
+'), to_tsquery('sea'));
+
+select get_covers(to_tsvector('Erosion It took the sea a thousand years,
+A thousand years to trace
+The granite features of this cliff
+In crag and scarp and base.
+It took the sea an hour one night
+An hour of storm to place
+The sculpture of these granite seams,
+Upon a woman s face. E.  J.  Pratt  (1882 1964)
+'), to_tsquery('sea&thousand&years'));
+
+select get_covers(to_tsvector('Erosion It took the sea a thousand years,
+A thousand years to trace
+The granite features of this cliff
+In crag and scarp and base.
+It took the sea an hour one night
+An hour of storm to place
+The sculpture of these granite seams,
+Upon a woman s face. E.  J.  Pratt  (1882 1964)
+'), to_tsquery('granite&sea'));
+
+select get_covers(to_tsvector('Erosion It took the sea a thousand years,
+A thousand years to trace
+The granite features of this cliff
+In crag and scarp and base.
+It took the sea an hour one night
+An hour of storm to place
+The sculpture of these granite seams,
+Upon a woman s face. E.  J.  Pratt  (1882 1964)
+'), to_tsquery('sea'));
+
+select headline('Erosion It took the sea a thousand years,
+A thousand years to trace
+The granite features of this cliff
+In crag and scarp and base.
+It took the sea an hour one night
+An hour of storm to place
+The sculpture of these granite seams,
+Upon a woman s face. E.  J.  Pratt  (1882 1964)
+', to_tsquery('sea&thousand&years'));
+ 
+select headline('Erosion It took the sea a thousand years,
+A thousand years to trace
+The granite features of this cliff
+In crag and scarp and base.
+It took the sea an hour one night
+An hour of storm to place
+The sculpture of these granite seams,
+Upon a woman s face. E.  J.  Pratt  (1882 1964)
+', to_tsquery('granite&sea'));
+ 
+select headline('Erosion It took the sea a thousand years,
+A thousand years to trace
+The granite features of this cliff
+In crag and scarp and base.
+It took the sea an hour one night
+An hour of storm to place
+The sculpture of these granite seams,
+Upon a woman s face. E.  J.  Pratt  (1882 1964)
+', to_tsquery('sea'));
+


diff --git a/contrib/tsearch2/stopword.c b/contrib/tsearch2/stopword.c

new file mode 100644 (file)

index 0000000..7f7806f


--- /dev/null
+++ b/contrib/tsearch2/stopword.c
@@ -0,0 +1,101 @@
+/* 
+ * stopword library
+ * Teodor Sigaev 
+ */
+#include 
+#include 
+#include 
+#include 
+
+#include "postgres.h"
+#include "common.h"
+#include "dict.h"
+
+#define STOPBUFLEN 4096
+
+char*
+lowerstr(char *str) {
+   char *ptr=str;
+   while(*ptr) {
+       *ptr = tolower(*(unsigned char*)ptr);
+       ptr++;
+   }
+   return str;
+}
+
+void
+freestoplist(StopList *s) {
+   char **ptr=s->stop;
+   if ( ptr )
+       while( *ptr && s->len >0 ) {
+           free(*ptr);
+           ptr++; s->len--;
+       free(s->stop);
+   }
+   memset(s,0,sizeof(StopList));
+}
+
+void
+readstoplist(text *in, StopList *s) {
+   char **stop=NULL;
+   s->len=0;
+   if ( in && VARSIZE(in) - VARHDRSZ > 0 ) {
+       char *filename=text2char(in);
+       FILE    *hin=NULL;
+       char    buf[STOPBUFLEN];
+       int reallen=0;
+
+       if ( (hin=fopen(filename,"r")) == NULL )
+           elog(ERROR,"Can't open file '%s': %s", filename, strerror(errno));
+       while( fgets(buf,STOPBUFLEN,hin) ) {
+           buf[strlen(buf)-1] = '\0';
+           if ( *buf=='\0' ) continue;
+
+           if ( s->len>= reallen ) {
+               char **tmp;
+               reallen=(reallen) ? reallen*2 : 16;
+               tmp=(char**)realloc((void*)stop, sizeof(char*)*reallen);
+               if (!tmp) {
+                   freestoplist(s);
+                   fclose(hin); 
+                   elog(ERROR,"Not enough memory");
+               }
+               stop=tmp;
+           }
+    
+           stop[s->len]=strdup(buf);
+           if ( !stop[s->len] ) {
+               freestoplist(s);
+               fclose(hin); 
+               elog(ERROR,"Not enough memory");
+           }
+           if ( s->wordop ) 
+               stop[s->len]=(s->wordop)(stop[s->len]);
+
+           (s->len)++; 
+       }
+       fclose(hin);
+       pfree(filename); 
+   }
+   s->stop=stop;
+} 
+
+static int
+comparestr(const void *a, const void *b) {
+   return strcmp( *(char**)a, *(char**)b );
+}
+
+void
+sortstoplist(StopList *s) {
+   if (s->stop && s->len>0)
+       qsort(s->stop, s->len, sizeof(char*), comparestr);
+}
+
+bool
+searchstoplist(StopList *s, char *key) {
+   if ( s->wordop ) 
+       key=(*(s->wordop))(key);
+   return ( s->stop && s->len>0 && bsearch(&key, s->stop, s->len, sizeof(char*), comparestr) ) ? true : false;
+}
+
+


diff --git a/contrib/tsearch2/stopword/english.stop b/contrib/tsearch2/stopword/english.stop

new file mode 100644 (file)

index 0000000..a913011


--- /dev/null
+++ b/contrib/tsearch2/stopword/english.stop
@@ -0,0 +1,128 @@
+i
+me
+my
+myself
+we
+our
+ours
+ourselves
+you
+your
+yours
+yourself
+yourselves
+he
+him
+his
+himself
+she
+her
+hers
+herself
+it
+its
+itself
+they
+them
+their
+theirs
+themselves
+what
+which
+who
+whom
+this
+that
+these
+those
+am
+is
+are
+was
+were
+be
+been
+being
+have
+has
+had
+having
+do
+does
+did
+doing
+a
+an
+the
+and
+but
+if
+or
+because
+as
+until
+while
+of
+at
+by
+for
+with
+about
+against
+between
+into
+through
+during
+before
+after
+above
+below
+to
+from
+up
+down
+in
+out
+on
+off
+over
+under
+again
+further
+then
+once
+here
+there
+when
+where
+why
+how
+all
+any
+both
+each
+few
+more
+most
+other
+some
+such
+no
+nor
+not
+only
+own
+same
+so
+than
+too
+very
+s
+t
+can
+will
+just
+don
+should
+now
+


diff --git a/contrib/tsearch2/stopword/russian.stop b/contrib/tsearch2/stopword/russian.stop

new file mode 100644 (file)

index 0000000..1877e3a


--- /dev/null
+++ b/contrib/tsearch2/stopword/russian.stop
@@ -0,0 +1,151 @@
+É
+×
+×Ï
+ÎÅ
+ÞÔÏ
+ÏÎ
+ÎÁ
+Ñ
+Ó
+ÓÏ
+ËÁË
+Á
+ÔÏ
+×ÓÅ
+ÏÎÁ
+ÔÁË
+ÅÇÏ
+ÎÏ
+ÄÁ
+ÔÙ
+Ë
+Õ
+ÖÅ
+×Ù
+ÚÁ
+ÂÙ
+ÐÏ
+ÔÏÌØËÏ
+ÅÅ
+ÍÎÅ
+ÂÙÌÏ
+×ÏÔ
+ÏÔ
+ÍÅÎÑ
+ÅÝÅ
+ÎÅÔ
+Ï
+ÉÚ
+ÅÍÕ
+ÔÅÐÅÒØ
+ËÏÇÄÁ
+ÄÁÖÅ
+ÎÕ
+×ÄÒÕÇ
+ÌÉ
+ÅÓÌÉ
+ÕÖÅ
+ÉÌÉ
+ÎÉ
+ÂÙÔØ
+ÂÙÌ
+ÎÅÇÏ
+ÄÏ
+×ÁÓ
+ÎÉÂÕÄØ
+ÏÐÑÔØ
+ÕÖ
+×ÁÍ
+×ÅÄØ
+ÔÁÍ
+ÐÏÔÏÍ
+ÓÅÂÑ
+ÎÉÞÅÇÏ
+ÅÊ
+ÍÏÖÅÔ
+ÏÎÉ
+ÔÕÔ
+ÇÄÅ
+ÅÓÔØ
+ÎÁÄÏ
+ÎÅÊ
+ÄÌÑ
+ÍÙ
+ÔÅÂÑ
+ÉÈ
+ÞÅÍ
+ÂÙÌÁ
+ÓÁÍ
+ÞÔÏÂ
+ÂÅÚ
+ÂÕÄÔÏ
+ÞÅÇÏ
+ÒÁÚ
+ÔÏÖÅ
+ÓÅÂÅ
+ÐÏÄ
+ÂÕÄÅÔ
+Ö
+ÔÏÇÄÁ
+ËÔÏ
+ÜÔÏÔ
+ÔÏÇÏ
+ÐÏÔÏÍÕ
+ÜÔÏÇÏ
+ËÁËÏÊ
+ÓÏ×ÓÅÍ
+ÎÉÍ
+ÚÄÅÓØ
+ÜÔÏÍ
+ÏÄÉÎ
+ÐÏÞÔÉ
+ÍÏÊ
+ÔÅÍ
+ÞÔÏÂÙ
+ÎÅÅ
+ÓÅÊÞÁÓ
+ÂÙÌÉ
+ËÕÄÁ
+ÚÁÞÅÍ
+×ÓÅÈ
+ÎÉËÏÇÄÁ
+ÍÏÖÎÏ
+ÐÒÉ
+ÎÁËÏÎÅÃ
+Ä×Á
+ÏÂ
+ÄÒÕÇÏÊ
+ÈÏÔØ
+ÐÏÓÌÅ
+ÎÁÄ
+ÂÏÌØÛÅ
+ÔÏÔ
+ÞÅÒÅÚ
+ÜÔÉ
+ÎÁÓ
+ÐÒÏ
+×ÓÅÇÏ
+ÎÉÈ
+ËÁËÁÑ
+ÍÎÏÇÏ
+ÒÁÚ×Å
+ÔÒÉ
+ÜÔÕ
+ÍÏÑ
+×ÐÒÏÞÅÍ
+ÈÏÒÏÛÏ
+Ó×ÏÀ
+ÜÔÏÊ
+ÐÅÒÅÄ
+ÉÎÏÇÄÁ
+ÌÕÞÛÅ
+ÞÕÔØ
+ÔÏÍ
+ÎÅÌØÚÑ
+ÔÁËÏÊ
+ÉÍ
+ÂÏÌÅÅ
+×ÓÅÇÄÁ
+ËÏÎÅÞÎÏ
+×ÓÀ
+ÍÅÖÄÕ


diff --git a/contrib/tsearch2/ts_cfg.c b/contrib/tsearch2/ts_cfg.c

new file mode 100644 (file)

index 0000000..7c9f20c


--- /dev/null
+++ b/contrib/tsearch2/ts_cfg.c
@@ -0,0 +1,509 @@
+/* 
+ * interface functions to tscfg 
+ * Teodor Sigaev 
+ */
+#include 
+#include 
+#include 
+#include 
+#include 
+
+#include "postgres.h"
+#include "fmgr.h"
+#include "utils/array.h"
+#include "catalog/pg_type.h"
+#include "executor/spi.h"
+
+#include "ts_cfg.h"
+#include "dict.h"
+#include "wparser.h"
+#include "snmap.h"
+#include "common.h"
+#include "tsvector.h"
+
+/*********top interface**********/
+
+static void *plan_getcfg_bylocale=NULL;
+static void *plan_getcfg=NULL;
+static void *plan_getmap=NULL;
+static void *plan_name2id=NULL;
+static Oid current_cfg_id=0;
+
+void
+init_cfg(Oid id, TSCfgInfo *cfg) {
+   Oid arg[2]={ OIDOID, OIDOID };
+   bool isnull;
+   Datum pars[2]={ ObjectIdGetDatum(id), ObjectIdGetDatum(id) } ;
+   int stat,i,j;
+   text *ptr;
+   text *prsname=NULL;
+   MemoryContext   oldcontext;
+
+   memset(cfg,0,sizeof(TSCfgInfo));
+   SPI_connect();
+   if ( !plan_getcfg ) {
+       plan_getcfg = SPI_saveplan( SPI_prepare( "select prs_name from pg_ts_cfg where oid = $1" , 1, arg ) );
+       if ( !plan_getcfg ) 
+           ts_error(ERROR, "SPI_prepare() failed");
+   }
+
+   stat = SPI_execp(plan_getcfg, pars, " ", 1);
+   if ( stat < 0 )
+       ts_error (ERROR, "SPI_execp return %d", stat);
+   if ( SPI_processed > 0 ) {
+       prsname = (text*) DatumGetPointer( 
+           SPI_getbinval(SPI_tuptable->vals[0], SPI_tuptable->tupdesc, 1, &isnull) 
+       );
+       oldcontext = MemoryContextSwitchTo(TopMemoryContext);
+       prsname = ptextdup( prsname );
+       MemoryContextSwitchTo(oldcontext);
+       
+       cfg->id=id;
+   } else 
+       ts_error(ERROR, "No tsearch cfg with id %d", id);
+
+   arg[0]=TEXTOID;
+   if ( !plan_getmap ) {
+       plan_getmap = SPI_saveplan( SPI_prepare( "select lt.tokid, pg_ts_cfgmap.dict_name from pg_ts_cfgmap, pg_ts_cfg, token_type( $1 ) as lt where lt.alias = pg_ts_cfgmap.tok_alias and pg_ts_cfgmap.ts_name = pg_ts_cfg.ts_name and pg_ts_cfg.oid= $2 order by lt.tokid desc;" , 2, arg ) );
+       if ( !plan_getmap )
+           ts_error(ERROR, "SPI_prepare() failed");
+   }
+
+   pars[0]=PointerGetDatum( prsname );
+   stat = SPI_execp(plan_getmap, pars, " ", 0);
+   if ( stat < 0 )
+       ts_error (ERROR, "SPI_execp return %d", stat);
+   if ( SPI_processed <= 0 )
+       ts_error(ERROR, "No parser with id %d", id);
+
+   for(i=0;i
+       int lexid = DatumGetInt32(SPI_getbinval(SPI_tuptable->vals[i], SPI_tuptable->tupdesc, 1, &isnull));
+       ArrayType *toasted_a = (ArrayType*)PointerGetDatum(SPI_getbinval(SPI_tuptable->vals[i], SPI_tuptable->tupdesc, 2, &isnull));
+       ArrayType *a;
+
+       if ( !cfg->map ) {
+           cfg->len=lexid+1;
+           cfg->map = (ListDictionary*)malloc( sizeof(ListDictionary)*cfg->len );
+           if ( !cfg->map )
+               ts_error(ERROR,"No memory");
+           memset( cfg->map, 0, sizeof(ListDictionary)*cfg->len );
+       }
+
+       if (isnull)
+           continue;
+
+       a=(ArrayType*)PointerGetDatum( PG_DETOAST_DATUM( DatumGetPointer(toasted_a) ) );
+       
+       if ( ARR_NDIM(a) != 1 )
+           ts_error(ERROR,"Wrong dimension");
+       if ( ARRNELEMS(a) < 1 )
+           continue;
+
+       cfg->map[lexid].len=ARRNELEMS(a);
+       cfg->map[lexid].dict_id=(Datum*)malloc( sizeof(Datum)*cfg->map[lexid].len );
+       memset(cfg->map[lexid].dict_id,0,sizeof(Datum)*cfg->map[lexid].len );
+       ptr=(text*)ARR_DATA_PTR(a);
+       oldcontext = MemoryContextSwitchTo(TopMemoryContext);
+       for(j=0;jmap[lexid].len;j++) {
+           cfg->map[lexid].dict_id[j] = PointerGetDatum(ptextdup(ptr));
+           ptr=NEXTVAL(ptr);
+       } 
+       MemoryContextSwitchTo(oldcontext);
+
+       if ( a != toasted_a ) 
+           pfree(a);
+   }
+   
+   SPI_finish();
+   cfg->prs_id = name2id_prs( prsname );
+   pfree(prsname);
+   for(i=0;ilen;i++) {
+       for(j=0;jmap[i].len;j++) {
+           ptr = (text*)DatumGetPointer( cfg->map[i].dict_id[j] );
+           cfg->map[i].dict_id[j] = ObjectIdGetDatum( name2id_dict(ptr) );
+           pfree(ptr);
+       }
+   }
+}
+
+typedef struct {
+   TSCfgInfo   *last_cfg;
+   int     len;
+   int     reallen;
+   TSCfgInfo   *list;
+   SNMap       name2id_map;
+} CFGList;
+
+static CFGList CList = {NULL,0,0,NULL,{0,0,NULL}};
+
+void
+reset_cfg(void) {
+        freeSNMap( &(CList.name2id_map) );
+        if ( CList.list ) {
+       int i,j;
+       for(i=0;i
+           if ( CList.list[i].map ) {
+               for(j=0;j
+                   if ( CList.list[i].map[j].dict_id )
+                       free(CList.list[i].map[j].dict_id);
+               free( CList.list[i].map );
+           }
+                free(CList.list);
+   }
+        memset(&CList,0,sizeof(CFGList));
+}
+
+static int
+comparecfg(const void *a, const void *b) {
+   return ((TSCfgInfo*)a)->id - ((TSCfgInfo*)b)->id;
+}
+
+TSCfgInfo *
+findcfg(Oid id) {
+   /* last used cfg */
+   if ( CList.last_cfg && CList.last_cfg->id==id )
+       return CList.last_cfg;
+
+   /* already used cfg */
+   if ( CList.len != 0 ) {
+       TSCfgInfo key;
+       key.id=id;
+       CList.last_cfg = bsearch(&key, CList.list, CList.len, sizeof(TSCfgInfo), comparecfg);
+       if ( CList.last_cfg != NULL )
+           return CList.last_cfg;
+   }
+
+   /* last chance */
+   if ( CList.len==CList.reallen ) {
+       TSCfgInfo *tmp;
+       int reallen = ( CList.reallen ) ? 2*CList.reallen : 16;
+       tmp=(TSCfgInfo*)realloc(CList.list,sizeof(TSCfgInfo)*reallen);
+       if ( !tmp ) 
+           ts_error(ERROR,"No memory");
+       CList.reallen=reallen;
+       CList.list=tmp;
+   }
+   CList.last_cfg=&(CList.list[CList.len]);
+   init_cfg(id, CList.last_cfg);
+   CList.len++;
+   qsort(CList.list, CList.len, sizeof(TSCfgInfo), comparecfg);
+   return findcfg(id); /* qsort changed order!! */;
+}
+
+
+Oid
+name2id_cfg(text *name) {
+   Oid arg[1]={ TEXTOID };
+   bool isnull;
+   Datum pars[1]={ PointerGetDatum(name) };
+   int stat;
+   Oid id=findSNMap_t( &(CList.name2id_map), name );
+   
+   if ( id ) 
+       return id;
+   
+   SPI_connect();
+   if ( !plan_name2id ) {
+       plan_name2id = SPI_saveplan( SPI_prepare( "select oid from pg_ts_cfg where ts_name = $1" , 1, arg ) );
+       if ( !plan_name2id ) 
+           elog(ERROR, "SPI_prepare() failed");
+   }
+
+   stat = SPI_execp(plan_name2id, pars, " ", 1);
+   if ( stat < 0 )
+       elog (ERROR, "SPI_execp return %d", stat);
+   if ( SPI_processed > 0 ) {
+       id=DatumGetObjectId( SPI_getbinval(SPI_tuptable->vals[0], SPI_tuptable->tupdesc, 1, &isnull) );
+       if ( isnull ) 
+           elog(ERROR, "Null id for tsearch config");
+   } else 
+       elog(ERROR, "No tsearch config");
+   SPI_finish();
+   addSNMap_t( &(CList.name2id_map), name, id );
+   return id;
+}
+
+
+void 
+parsetext_v2(TSCfgInfo *cfg, PRSTEXT * prs, char *buf, int4 buflen) {
+   int type, lenlemm, i;
+   char    *lemm=NULL;
+   WParserInfo *prsobj = findprs(cfg->prs_id);
+
+   prsobj->prs=(void*)DatumGetPointer(
+       FunctionCall2(
+           &(prsobj->start_info),
+           PointerGetDatum(buf),
+           Int32GetDatum(buflen)
+       )
+   );
+
+   while( ( type=DatumGetInt32(FunctionCall3(
+           &(prsobj->getlexeme_info),
+           PointerGetDatum(prsobj->prs),
+           PointerGetDatum(&lemm),
+           PointerGetDatum(&lenlemm))) ) != 0 ) {
+
+       if ( lenlemm >= MAXSTRLEN )
+           elog(ERROR, "Word is too long");
+
+
+       if ( type >= cfg->len ) /* skip this type of lexem */
+           continue; 
+
+       for(i=0;imap[type].len;i++) {
+           DictInfo    *dict=finddict( DatumGetObjectId(cfg->map[type].dict_id[i]) );
+           char    **norms, **ptr;
+   
+           norms = ptr = (char**)DatumGetPointer(
+               FunctionCall3(
+                   &(dict->lexize_info),
+                   PointerGetDatum(dict->dictionary),
+                   PointerGetDatum(lemm),
+                   PointerGetDatum(lenlemm)
+               )
+           );
+           if ( !norms ) /* dictionary doesn't know this lexem */
+               continue;
+
+           prs->pos++; /*set pos*/
+
+           while( *ptr ) {
+               if (prs->curwords == prs->lenwords) {
+                   prs->lenwords *= 2;
+                   prs->words = (WORD *) repalloc((void *) prs->words, prs->lenwords * sizeof(WORD));
+               }
+
+               prs->words[prs->curwords].len = strlen(*ptr);
+               prs->words[prs->curwords].word = *ptr;
+               prs->words[prs->curwords].alen = 0;
+               prs->words[prs->curwords].pos.pos = LIMITPOS(prs->pos);
+               ptr++;
+               prs->curwords++;
+           }
+           pfree(norms);
+           break; /* lexem already normalized or is stop word*/
+       }
+   }
+
+   FunctionCall1(
+       &(prsobj->end_info),
+       PointerGetDatum(prsobj->prs)
+   );
+}
+
+static void
+hladdword(HLPRSTEXT * prs, char *buf, int4 buflen, int type) {
+   while (prs->curwords >= prs->lenwords) {
+       prs->lenwords *= 2;
+       prs->words = (HLWORD *) repalloc((void *) prs->words, prs->lenwords * sizeof(HLWORD));
+   }
+   memset( &(prs->words[prs->curwords]), 0, sizeof(HLWORD) ); 
+   prs->words[prs->curwords].type = (uint8)type;
+   prs->words[prs->curwords].len = buflen; 
+   prs->words[prs->curwords].word = palloc(buflen);
+   memcpy(prs->words[prs->curwords].word, buf, buflen);
+   prs->curwords++;    
+}
+
+static void
+hlfinditem(HLPRSTEXT * prs, QUERYTYPE *query, char *buf, int buflen ) {
+   int i;
+   ITEM    *item=GETQUERY(query);
+   HLWORD  *word=&( prs->words[prs->curwords-1] );
+
+   while (prs->curwords + query->size >= prs->lenwords) {
+       prs->lenwords *= 2;
+       prs->words = (HLWORD *) repalloc((void *) prs->words, prs->lenwords * sizeof(HLWORD));
+   }
+
+   for(i=0; isize; i++) { 
+       if ( item->type == VAL && item->length == buflen && strncmp( GETOPERAND(query) + item->distance, buf, buflen )==0 ) {
+           if ( word->item ) {
+               memcpy( &(prs->words[prs->curwords]), word, sizeof(HLWORD) );
+               prs->words[prs->curwords].item=item;
+               prs->words[prs->curwords].repeated=1;
+               prs->curwords++;
+           } else 
+               word->item=item;    
+       }
+       item++;
+   }
+}
+
+void 
+hlparsetext(TSCfgInfo *cfg, HLPRSTEXT * prs, QUERYTYPE *query, char *buf, int4 buflen) {
+   int type, lenlemm, i;
+   char    *lemm=NULL;
+   WParserInfo *prsobj = findprs(cfg->prs_id);
+
+   prsobj->prs=(void*)DatumGetPointer(
+       FunctionCall2(
+           &(prsobj->start_info),
+           PointerGetDatum(buf),
+           Int32GetDatum(buflen)
+       )
+   );
+
+   while( ( type=DatumGetInt32(FunctionCall3(
+           &(prsobj->getlexeme_info),
+           PointerGetDatum(prsobj->prs),
+           PointerGetDatum(&lemm),
+           PointerGetDatum(&lenlemm))) ) != 0 ) {
+
+       if ( lenlemm >= MAXSTRLEN )
+           elog(ERROR, "Word is too long");
+
+       hladdword(prs,lemm,lenlemm,type);
+
+       if ( type >= cfg->len ) 
+           continue; 
+
+       for(i=0;imap[type].len;i++) {
+           DictInfo    *dict=finddict( DatumGetObjectId(cfg->map[type].dict_id[i]) );
+           char    **norms, **ptr;
+   
+           norms = ptr = (char**)DatumGetPointer(
+               FunctionCall3(
+                   &(dict->lexize_info),
+                   PointerGetDatum(dict->dictionary),
+                   PointerGetDatum(lemm),
+                   PointerGetDatum(lenlemm)
+               )
+           );
+           if ( !norms ) /* dictionary doesn't know this lexem */
+               continue;
+
+           while( *ptr ) {
+               hlfinditem(prs,query,*ptr,strlen(*ptr));
+               pfree(*ptr);
+               ptr++;
+           }
+           pfree(norms);
+           break; /* lexem already normalized or is stop word*/
+       }
+   }
+
+   FunctionCall1(
+       &(prsobj->end_info),
+       PointerGetDatum(prsobj->prs)
+   );
+}
+
+text* 
+genhl(HLPRSTEXT * prs) {
+   text *out;
+   int len=128;
+   char *ptr;
+   HLWORD  *wrd=prs->words;
+
+   out = (text*)palloc( len );
+   ptr=((char*)out) + VARHDRSZ;
+
+   while( wrd - prs->words < prs->curwords ) {
+       while (  wrd->len + prs->stopsellen + prs->startsellen + (ptr - ((char*)out)) >= len ) {
+           int dist = ptr - ((char*)out);
+           len*= 2;
+           out = (text *) repalloc(out, len);
+           ptr=((char*)out) + dist;
+       }
+
+       if ( wrd->in && !wrd->skip && !wrd->repeated ) {
+           if ( wrd->replace ) {
+               *ptr=' ';
+               ptr++;
+           } else {
+               if (wrd->selected) {
+                   memcpy(ptr,prs->startsel,prs->startsellen);
+                   ptr+=prs->startsellen;
+               }
+               memcpy(ptr,wrd->word,wrd->len);
+               ptr+=wrd->len;
+               if (wrd->selected) {
+                   memcpy(ptr,prs->stopsel,prs->stopsellen);
+                   ptr+=prs->stopsellen;
+               }
+           }
+       }
+
+       if ( !wrd->repeated )
+           pfree(wrd->word);
+
+       wrd++;
+   }
+
+   VARATT_SIZEP(out)=ptr - ((char*)out);
+   return out; 
+}
+
+int  
+get_currcfg(void) {
+   Oid arg[1]={ TEXTOID };
+   const char *curlocale;
+   Datum pars[1];
+   bool isnull;
+   int stat;
+
+   if ( current_cfg_id > 0 )
+       return current_cfg_id;
+
+   SPI_connect();
+   if ( !plan_getcfg_bylocale ) {
+       plan_getcfg_bylocale=SPI_saveplan( SPI_prepare( "select oid from pg_ts_cfg where locale = $1 ", 1, arg ) );
+       if ( !plan_getcfg_bylocale )
+           elog(ERROR, "SPI_prepare() failed");
+   }
+
+   curlocale = setlocale(LC_CTYPE, NULL);
+   pars[0] = PointerGetDatum( char2text((char*)curlocale) );
+   stat = SPI_execp(plan_getcfg_bylocale, pars, " ", 1);
+
+   if ( stat < 0 )
+       elog (ERROR, "SPI_execp return %d", stat);
+   if ( SPI_processed > 0 )
+       current_cfg_id = DatumGetObjectId( SPI_getbinval(SPI_tuptable->vals[0], SPI_tuptable->tupdesc, 1, &isnull) );
+   else 
+       elog(ERROR,"Can't find tsearch config by locale");
+
+   pfree(DatumGetPointer(pars[0]));
+   SPI_finish();
+   return current_cfg_id;
+}
+
+PG_FUNCTION_INFO_V1(set_curcfg);
+Datum set_curcfg(PG_FUNCTION_ARGS);
+Datum
+set_curcfg(PG_FUNCTION_ARGS) {
+        findcfg(PG_GETARG_OID(0));
+        current_cfg_id=PG_GETARG_OID(0);
+        PG_RETURN_VOID();
+}
+                
+PG_FUNCTION_INFO_V1(set_curcfg_byname);
+Datum set_curcfg_byname(PG_FUNCTION_ARGS);
+Datum
+set_curcfg_byname(PG_FUNCTION_ARGS) {
+        text *name=PG_GETARG_TEXT_P(0);
+   
+        DirectFunctionCall1(
+                set_curcfg,
+                ObjectIdGetDatum( name2id_cfg(name) )
+        );
+        PG_FREE_IF_COPY(name, 0);
+        PG_RETURN_VOID();      
+}       
+
+PG_FUNCTION_INFO_V1(show_curcfg);
+Datum show_curcfg(PG_FUNCTION_ARGS);
+Datum
+show_curcfg(PG_FUNCTION_ARGS) {
+   PG_RETURN_OID( get_currcfg() ); 
+}
+
+PG_FUNCTION_INFO_V1(reset_tsearch);
+Datum reset_tsearch(PG_FUNCTION_ARGS);
+Datum
+reset_tsearch(PG_FUNCTION_ARGS) {
+   ts_error(NOTICE,"TSearch cache cleaned");
+   PG_RETURN_VOID(); 
+}


diff --git a/contrib/tsearch2/ts_cfg.h b/contrib/tsearch2/ts_cfg.h

new file mode 100644 (file)

index 0000000..01006c1


--- /dev/null
+++ b/contrib/tsearch2/ts_cfg.h
@@ -0,0 +1,68 @@
+#ifndef __TS_CFG_H__
+#define __TS_CFG_H__
+#include "postgres.h"
+#include "query.h"
+
+typedef struct {
+   int len;
+   Datum   *dict_id;
+} ListDictionary;
+
+typedef struct {
+   Oid id;
+   Oid prs_id;
+   int len;
+   ListDictionary  *map;   
+}  TSCfgInfo;
+
+Oid name2id_cfg(text *name);
+TSCfgInfo * findcfg(Oid id);
+void init_cfg(Oid id, TSCfgInfo *cfg);
+void reset_cfg(void);
+
+typedef struct {
+        uint16          len;
+   union {
+       uint16      pos;
+       uint16      *apos;
+   } pos;
+        char       *word;
+   uint32  alen;
+}       WORD;
+   
+typedef struct {
+        WORD       *words;
+        int4            lenwords;
+        int4            curwords;
+   int4        pos;
+}       PRSTEXT;
+
+typedef struct {
+        uint16    len;
+   uint8    selected:1,
+         in:1,
+         skip:1,
+         replace:1,
+         repeated:1;
+   uint8   type;
+        char      *word;
+   ITEM      *item;
+}       HLWORD;
+   
+typedef struct {
+        HLWORD       *words;
+        int4            lenwords;
+        int4            curwords;
+        char           *startsel;
+        char            *stopsel;
+        int2            startsellen;
+        int2            stopsellen;
+}       HLPRSTEXT;
+
+void hlparsetext(TSCfgInfo *cfg, HLPRSTEXT * prs, QUERYTYPE *query, char *buf, int4 buflen);
+text* genhl(HLPRSTEXT * prs);
+
+void parsetext_v2(TSCfgInfo *cfg, PRSTEXT * prs, char *buf, int4 buflen);
+int  get_currcfg(void);
+
+#endif


diff --git a/contrib/tsearch2/ts_stat.c b/contrib/tsearch2/ts_stat.c

new file mode 100644 (file)

index 0000000..9099981


--- /dev/null
+++ b/contrib/tsearch2/ts_stat.c
@@ -0,0 +1,412 @@
+/*
+ * stat functions
+ */
+
+#include "tsvector.h"
+#include "ts_stat.h"
+#include "funcapi.h"
+#include "catalog/pg_type.h"
+#include "executor/spi.h"
+#include "common.h"
+
+PG_FUNCTION_INFO_V1(tsstat_in);
+Datum           tsstat_in(PG_FUNCTION_ARGS);
+Datum           
+tsstat_in(PG_FUNCTION_ARGS) {
+   tsstat *stat=palloc(STATHDRSIZE);
+   stat->len=STATHDRSIZE;
+   stat->size=0;
+   PG_RETURN_POINTER(stat);
+}
+
+PG_FUNCTION_INFO_V1(tsstat_out);
+Datum           tsstat_out(PG_FUNCTION_ARGS);
+Datum           
+tsstat_out(PG_FUNCTION_ARGS) {
+   elog(ERROR,"Unimplemented");
+   PG_RETURN_NULL();
+}
+
+static WordEntry**
+SEI_realloc( WordEntry** in, uint32 *len ) {
+   if ( *len==0 || in==NULL ) {
+       *len=8;
+       in=palloc( sizeof(WordEntry*)* (*len) );
+   } else {
+       *len *= 2;
+       in=repalloc( in, sizeof(WordEntry*)* (*len) );
+   }
+   return in;
+}
+
+static int
+compareStatWord(StatEntry *a, WordEntry *b, tsstat *stat, tsvector *txt) {
+   if ( a->len == b->len ) 
+       return strncmp(
+           STATSTRPTR(stat) + a->pos,
+           STRPTR(txt) + b->pos,
+           a->len
+       );
+   return ( a->len > b->len ) ? 1 : -1;
+}
+
+static tsstat*
+formstat(tsstat *stat, tsvector *txt, WordEntry** entry, uint32 len) {
+   tsstat  *newstat;
+   uint32 totallen, nentry;
+   uint32  slen=0;
+   WordEntry   **ptr=entry;
+   char    *curptr;
+   StatEntry   *sptr,*nptr;
+
+   while(ptr-entry
+       slen += (*ptr)->len;
+       ptr++;
+   }
+
+   nentry=stat->size + len;
+   slen+=STATSTRSIZE(stat);
+   totallen=CALCSTATSIZE(nentry,slen);
+   newstat=palloc(totallen);
+   newstat->len=totallen;
+   newstat->size=nentry;
+
+   memcpy(STATSTRPTR(newstat), STATSTRPTR(stat), STATSTRSIZE(stat));
+   curptr=STATSTRPTR(newstat) + STATSTRSIZE(stat);
+
+   ptr=entry;
+   sptr=STATPTR(stat);
+   nptr=STATPTR(newstat);
+
+   if ( len == 1 ) {
+       StatEntry *StopLow = STATPTR(stat);
+       StatEntry *StopHigh = (StatEntry*)STATSTRPTR(stat);
+
+       while (StopLow < StopHigh) {
+           sptr=StopLow + (StopHigh - StopLow) / 2;
+           if ( compareStatWord(sptr,*ptr,stat,txt) < 0 )
+               StopLow = sptr + 1;
+           else
+               StopHigh = sptr; 
+       }
+       nptr =STATPTR(newstat) + (StopLow-STATPTR(stat));
+       memcpy( STATPTR(newstat), STATPTR(stat), sizeof(StatEntry) * (StopLow-STATPTR(stat)) );
+       nptr->nentry=POSDATALEN(txt,*ptr);
+       if ( nptr->nentry==0 )
+               nptr->nentry=1; 
+       nptr->ndoc=1;
+       nptr->len=(*ptr)->len;
+       memcpy(curptr, STRPTR(txt) + (*ptr)->pos, nptr->len);
+       nptr->pos = curptr - STATSTRPTR(newstat);
+       memcpy( nptr+1, StopLow, sizeof(StatEntry) * ( ((StatEntry*)STATSTRPTR(stat))-StopLow ) );
+   } else {
+       while( sptr-STATPTR(stat) < stat->size && ptr-entry
+           if ( compareStatWord(sptr,*ptr,stat,txt) < 0 ) {
+               memcpy(nptr, sptr, sizeof(StatEntry));
+               sptr++;
+           } else {
+               nptr->nentry=POSDATALEN(txt,*ptr);
+               if ( nptr->nentry==0 )
+                   nptr->nentry=1; 
+               nptr->ndoc=1;
+               nptr->len=(*ptr)->len;
+               memcpy(curptr, STRPTR(txt) + (*ptr)->pos, nptr->len);
+               nptr->pos = curptr - STATSTRPTR(newstat);
+               curptr += nptr->len;
+               ptr++;
+           }
+           nptr++;
+       }
+
+       memcpy( nptr, sptr, sizeof(StatEntry)*( stat->size - (sptr-STATPTR(stat)) ) ); 
+       
+       while(ptr-entry
+           nptr->nentry=POSDATALEN(txt,*ptr);
+           if ( nptr->nentry==0 )
+               nptr->nentry=1; 
+           nptr->ndoc=1;
+           nptr->len=(*ptr)->len;
+           memcpy(curptr, STRPTR(txt) + (*ptr)->pos, nptr->len);
+           nptr->pos = curptr - STATSTRPTR(newstat);
+           curptr += nptr->len;
+           ptr++; nptr++;
+       }
+   }
+
+   return newstat;
+} 
+
+PG_FUNCTION_INFO_V1(ts_accum);
+Datum           ts_accum(PG_FUNCTION_ARGS);
+Datum 
+ts_accum(PG_FUNCTION_ARGS) {
+   tsstat *newstat,*stat= (tsstat*)PG_GETARG_POINTER(0);
+   tsvector  *txt = (tsvector *) PG_DETOAST_DATUM(PG_GETARG_DATUM(1));
+   WordEntry   **newentry=NULL;
+   uint32  len=0, cur=0;
+   StatEntry   *sptr;
+   WordEntry   *wptr;
+
+   if ( stat==NULL || PG_ARGISNULL(0) ) { /* Init in first */ 
+       stat=palloc(STATHDRSIZE);
+       stat->len=STATHDRSIZE;
+       stat->size=0;
+   }
+
+   /* simple check of correctness */
+   if ( txt==NULL || PG_ARGISNULL(1) || txt->size==0 ) {
+       PG_FREE_IF_COPY(txt,1); 
+       PG_RETURN_POINTER(stat);
+   }
+
+   sptr=STATPTR(stat);
+   wptr=ARRPTR(txt);
+
+   if ( stat->size < 100*txt->size ) { /* merge */
+       while( sptr-STATPTR(stat) < stat->size && wptr-ARRPTR(txt) < txt->size ) {
+           int cmp = compareStatWord(sptr,wptr,stat,txt);
+           if ( cmp<0 ) {
+               sptr++;
+           } else if ( cmp==0 ) {
+               int n=POSDATALEN(txt,wptr);
+   
+               if (n==0) n=1;
+               sptr->ndoc++;
+               sptr->nentry +=n ;
+               sptr++; wptr++;
+           } else {
+               if ( cur==len )
+                   newentry=SEI_realloc(newentry, &len);
+               newentry[cur]=wptr;
+               wptr++; cur++;
+           }
+       }
+
+       while( wptr-ARRPTR(txt) < txt->size ) {
+           if ( cur==len )
+               newentry=SEI_realloc(newentry, &len);
+           newentry[cur]=wptr;
+           wptr++; cur++;
+       }
+   } else { /* search */
+       while( wptr-ARRPTR(txt) < txt->size ) {
+           StatEntry *StopLow = STATPTR(stat);
+           StatEntry *StopHigh = (StatEntry*)STATSTRPTR(stat);
+           int cmp;
+
+           while (StopLow < StopHigh) {
+               sptr=StopLow + (StopHigh - StopLow) / 2;
+               cmp =  compareStatWord(sptr,wptr,stat,txt);
+               if (cmp==0) {
+                   int n=POSDATALEN(txt,wptr);
+                   if (n==0) n=1;
+                   sptr->ndoc++;
+                   sptr->nentry +=n ;
+                   break;
+               } else if ( cmp < 0 )
+                   StopLow = sptr + 1;
+               else
+                   StopHigh = sptr; 
+           }
+       
+           if ( StopLow >= StopHigh ) { /* not found */
+               if ( cur==len )
+                   newentry=SEI_realloc(newentry, &len);
+               newentry[cur]=wptr;
+               cur++;
+           }
+           wptr++;
+       }   
+   }
+
+   
+   if ( cur==0 ) { /* no new words */ 
+       PG_FREE_IF_COPY(txt,1);
+       PG_RETURN_POINTER(stat);
+   }
+
+   newstat = formstat(stat, txt, newentry, cur);
+   pfree(newentry);
+   PG_FREE_IF_COPY(txt,1);
+   /* pfree(stat); */
+
+   PG_RETURN_POINTER(newstat);
+}
+
+typedef struct {
+   uint32  cur;
+   tsvector *stat;
+} StatStorage;
+
+static void
+ts_setup_firstcall(FuncCallContext  *funcctx, tsstat *stat) {
+   TupleDesc            tupdesc;
+   MemoryContext     oldcontext;
+   StatStorage     *st;
+   
+   oldcontext = MemoryContextSwitchTo(funcctx->multi_call_memory_ctx);
+   st=palloc( sizeof(StatStorage) );
+   st->cur=0;
+   st->stat=palloc( stat->len );
+   memcpy(st->stat, stat, stat->len);
+   funcctx->user_fctx = (void*)st;
+   tupdesc = RelationNameGetTupleDesc("statinfo");
+   funcctx->slot = TupleDescGetSlot(tupdesc);
+   funcctx->attinmeta = TupleDescGetAttInMetadata(tupdesc);
+   MemoryContextSwitchTo(oldcontext);
+}
+
+
+static Datum
+ts_process_call(FuncCallContext  *funcctx) {
+   StatStorage     *st;
+   st=(StatStorage*)funcctx->user_fctx;
+
+   if ( st->cur < st->stat->size ) {
+       Datum result;
+       char* values[3];
+       char    ndoc[16];
+       char    nentry[16];
+       StatEntry *entry=STATPTR(st->stat) + st->cur;
+       HeapTuple    tuple;
+
+       values[1]=ndoc;
+       sprintf(ndoc,"%d",entry->ndoc);
+       values[2]=nentry;
+       sprintf(nentry,"%d",entry->nentry);
+       values[0]=palloc( entry->len+1 );
+       memcpy( values[0], STATSTRPTR(st->stat)+entry->pos, entry->len);
+       (values[0])[entry->len]='\0';
+
+       tuple = BuildTupleFromCStrings(funcctx->attinmeta, values);
+       result = TupleGetDatum(funcctx->slot, tuple);
+
+       pfree(values[0]);
+       st->cur++;
+       return result;  
+   } else {
+       pfree(st->stat);
+       pfree(st);
+   }
+   
+   return (Datum)0;
+}
+
+PG_FUNCTION_INFO_V1(ts_accum_finish);
+Datum           ts_accum_finish(PG_FUNCTION_ARGS);
+Datum 
+ts_accum_finish(PG_FUNCTION_ARGS) {
+   FuncCallContext  *funcctx;
+   Datum result;
+
+   if (SRF_IS_FIRSTCALL()) {
+       funcctx = SRF_FIRSTCALL_INIT();
+       ts_setup_firstcall(funcctx, (tsstat*)PG_GETARG_POINTER(0) );
+   }
+
+   funcctx = SRF_PERCALL_SETUP();
+   if (  (result=ts_process_call(funcctx)) != (Datum)0 )
+       SRF_RETURN_NEXT(funcctx, result);
+   SRF_RETURN_DONE(funcctx);
+}
+
+static Oid tiOid=InvalidOid;
+static void 
+get_ti_Oid(void) {
+   int ret;
+   bool isnull; 
+
+   if ( (ret = SPI_exec("select oid from pg_type where typname='tsvector'",1)) < 0 )   
+       elog(ERROR, "SPI_exec to get tsvector oid returns %d", ret);
+
+   if ( SPI_processed<0 )
+       elog(ERROR, "There is no tsvector type");
+   tiOid = DatumGetObjectId( SPI_getbinval(SPI_tuptable->vals[0], SPI_tuptable->tupdesc, 1, &isnull) );
+   if ( tiOid==InvalidOid )
+       elog(ERROR, "tsvector type has InvalidOid");
+}
+
+static tsstat*
+ts_stat_sql(text *txt) {
+   char *query=text2char(txt);
+   int i;
+   tsstat *newstat,*stat;
+   bool isnull;
+   Portal portal;
+   void    *plan;
+
+   if ( tiOid==InvalidOid ) 
+       get_ti_Oid();
+
+   if ( (plan = SPI_prepare(query,0,NULL))==NULL )
+       elog(ERROR, "SPI_prepare('%s') returns NULL",query);
+
+   if ( (portal = SPI_cursor_open(NULL, plan, NULL, NULL)) == NULL )
+       elog(ERROR, "SPI_cursor_open('%s') returns NULL",query);
+
+   SPI_cursor_fetch(portal, true, 100);
+
+   if ( SPI_tuptable->tupdesc->natts != 1 )
+       elog(ERROR, "Number of fields doesn't equal to 1");
+
+   if ( SPI_gettypeid(SPI_tuptable->tupdesc, 1) != tiOid )
+       elog(ERROR, "Column isn't of tsvector type");
+
+   stat=palloc(STATHDRSIZE);
+   stat->len=STATHDRSIZE;
+   stat->size=0;
+
+   while(SPI_processed>0) {
+       for(i=0;i
+           Datum data=SPI_getbinval(SPI_tuptable->vals[i], SPI_tuptable->tupdesc, 1, &isnull);
+
+           if ( !isnull ) {
+               newstat = (tsstat*)DatumGetPointer(DirectFunctionCall2(
+                   ts_accum,
+                   PointerGetDatum(stat),
+                   data
+               ));
+               if ( stat!=newstat && stat )
+                   pfree(stat);
+               stat=newstat;
+           }
+       } 
+
+       SPI_freetuptable(SPI_tuptable);
+       SPI_cursor_fetch(portal, true, 100);        
+   }   
+
+   SPI_freetuptable(SPI_tuptable);
+   SPI_cursor_close(portal);
+   SPI_freeplan(plan);
+   pfree(query);
+
+   return stat;    
+}
+
+PG_FUNCTION_INFO_V1(ts_stat);
+Datum           ts_stat(PG_FUNCTION_ARGS);
+Datum 
+ts_stat(PG_FUNCTION_ARGS) {
+   FuncCallContext  *funcctx;
+   Datum result;
+
+   if (SRF_IS_FIRSTCALL()) {
+       tsstat *stat;
+       text    *txt=PG_GETARG_TEXT_P(0);
+   
+       funcctx = SRF_FIRSTCALL_INIT();
+       SPI_connect();
+       stat = ts_stat_sql(txt);
+       PG_FREE_IF_COPY(txt,0); 
+       ts_setup_firstcall(funcctx, stat );
+       SPI_finish();
+   }
+
+   funcctx = SRF_PERCALL_SETUP();
+   if (  (result=ts_process_call(funcctx)) != (Datum)0 )
+       SRF_RETURN_NEXT(funcctx, result);
+   SRF_RETURN_DONE(funcctx);
+}
+
+


diff --git a/contrib/tsearch2/ts_stat.h b/contrib/tsearch2/ts_stat.h

new file mode 100644 (file)

index 0000000..c32b17a


--- /dev/null
+++ b/contrib/tsearch2/ts_stat.h
@@ -0,0 +1,32 @@
+#ifndef __TXTIDX_STAT_H__
+#define __TXTIDX_STAT_H__
+
+#include "postgres.h"
+
+#include "access/gist.h"
+#include "access/itup.h"
+#include "utils/elog.h"
+#include "utils/palloc.h"
+#include "utils/builtins.h"
+#include "storage/bufpage.h"
+
+typedef struct {
+   uint32  len;
+   uint32  pos;
+   uint32  ndoc;   
+   uint32  nentry; 
+}  StatEntry;
+
+typedef struct {
+   int4        len;
+   int4        size;
+   char        data[1];
+}  tsstat;
+
+#define STATHDRSIZE (sizeof(int4)*2)
+#define CALCSTATSIZE(x, lenstr) ( x * sizeof(StatEntry) + STATHDRSIZE + lenstr )
+#define STATPTR(x) ( (StatEntry*) ( (char*)x + STATHDRSIZE ) )
+#define STATSTRPTR(x)  ( (char*)x + STATHDRSIZE + ( sizeof(StatEntry) * ((tsvector*)x)->size ) )
+#define STATSTRSIZE(x) ( ((tsvector*)x)->len - STATHDRSIZE - ( sizeof(StatEntry) * ((tsvector*)x)->size ) )
+
+#endif


diff --git a/contrib/tsearch2/tsearch.sql._in b/contrib/tsearch2/tsearch.sql._in

new file mode 100644 (file)

index 0000000..91ffbc8


--- /dev/null
+++ b/contrib/tsearch2/tsearch.sql._in
@@ -0,0 +1,674 @@
+-- Adjust this setting to control where the objects get CREATEd.
+SET search_path = public;
+
+BEGIN;
+
+--dict conf
+CREATE TABLE pg_ts_dict (
+   dict_name   text not null primary key,
+   dict_init   oid,
+   dict_initoption text,
+   dict_lexize oid not null,
+   dict_comment    text
+) with oids;
+
+--dict interface
+CREATE FUNCTION lexize(oid, text) 
+   returns _text
+   as 'MODULE_PATHNAME'
+   language 'C'
+   with (isstrict);
+
+CREATE FUNCTION lexize(text, text)
+        returns _text
+        as 'MODULE_PATHNAME', 'lexize_byname'
+        language 'C'
+        with (isstrict);
+
+CREATE FUNCTION lexize(text)
+        returns _text
+        as 'MODULE_PATHNAME', 'lexize_bycurrent'
+        language 'C'
+        with (isstrict);
+
+CREATE FUNCTION set_curdict(int)
+   returns void
+   as 'MODULE_PATHNAME'
+   language 'C'
+   with (isstrict);
+
+CREATE FUNCTION set_curdict(text)
+   returns void
+   as 'MODULE_PATHNAME', 'set_curdict_byname'
+   language 'C'
+   with (isstrict);
+
+--built-in dictionaries
+CREATE FUNCTION dex_init(text)
+   returns internal
+   as 'MODULE_PATHNAME' 
+   language 'C';
+
+CREATE FUNCTION dex_lexize(internal,internal,int4)
+   returns internal
+   as 'MODULE_PATHNAME'
+   language 'C'
+   with (isstrict);
+
+insert into pg_ts_dict select 
+   'simple', 
+   (select oid from pg_proc where proname='dex_init'),
+   null,
+   (select oid from pg_proc where proname='dex_lexize'),
+   'Simple example of dictionary.'
+;
+    
+CREATE FUNCTION snb_en_init(text)
+   returns internal
+   as 'MODULE_PATHNAME' 
+   language 'C';
+
+CREATE FUNCTION snb_lexize(internal,internal,int4)
+   returns internal
+   as 'MODULE_PATHNAME'
+   language 'C'
+   with (isstrict);
+
+insert into pg_ts_dict select 
+   'en_stem', 
+   (select oid from pg_proc where proname='snb_en_init'),
+   'DATA_PATH/english.stop',
+   (select oid from pg_proc where proname='snb_lexize'),
+   'English Stemmer. Snowball.'
+;
+
+CREATE FUNCTION snb_ru_init(text)
+   returns internal
+   as 'MODULE_PATHNAME' 
+   language 'C';
+
+insert into pg_ts_dict select 
+   'ru_stem', 
+   (select oid from pg_proc where proname='snb_ru_init'),
+   'DATA_PATH/russian.stop',
+   (select oid from pg_proc where proname='snb_lexize'),
+   'Russian Stemmer. Snowball.'
+;
+    
+CREATE FUNCTION spell_init(text)
+   returns internal
+   as 'MODULE_PATHNAME' 
+   language 'C';
+
+CREATE FUNCTION spell_lexize(internal,internal,int4)
+   returns internal
+   as 'MODULE_PATHNAME'
+   language 'C'
+   with (isstrict);
+
+insert into pg_ts_dict select 
+   'ispell_template', 
+   (select oid from pg_proc where proname='spell_init'),
+   null,
+   (select oid from pg_proc where proname='spell_lexize'),
+   'ISpell interface. Must have .dict and .aff files'
+;
+
+CREATE FUNCTION syn_init(text)
+   returns internal
+   as 'MODULE_PATHNAME' 
+   language 'C';
+
+CREATE FUNCTION syn_lexize(internal,internal,int4)
+   returns internal
+   as 'MODULE_PATHNAME'
+   language 'C'
+   with (isstrict);
+
+insert into pg_ts_dict select 
+   'synonym', 
+   (select oid from pg_proc where proname='syn_init'),
+   null,
+   (select oid from pg_proc where proname='syn_lexize'),
+   'Example of synonym dictionary'
+;
+
+--dict conf
+CREATE TABLE pg_ts_parser (
+   prs_name    text not null primary key,
+   prs_start   oid not null,
+   prs_nexttoken   oid not null,
+   prs_end     oid not null,
+   prs_headline    oid not null,
+   prs_lextype oid not null,
+   prs_comment text
+) with oids;
+
+--sql-level interface
+CREATE TYPE tokentype 
+   as (tokid int4, alias text, descr text); 
+
+CREATE FUNCTION token_type(int4)
+   returns setof tokentype
+   as 'MODULE_PATHNAME'
+   language 'C'
+   with (isstrict);
+
+CREATE FUNCTION token_type(text)
+   returns setof tokentype
+   as 'MODULE_PATHNAME', 'token_type_byname'
+   language 'C'
+   with (isstrict);
+
+CREATE FUNCTION token_type()
+   returns setof tokentype
+   as 'MODULE_PATHNAME', 'token_type_current'
+   language 'C'
+   with (isstrict);
+
+CREATE FUNCTION set_curprs(int)
+   returns void
+   as 'MODULE_PATHNAME'
+   language 'C'
+   with (isstrict);
+
+CREATE FUNCTION set_curprs(text)
+   returns void
+   as 'MODULE_PATHNAME', 'set_curprs_byname'
+   language 'C'
+   with (isstrict);
+
+CREATE TYPE tokenout 
+   as (tokid int4, token text);
+
+CREATE FUNCTION parse(oid,text)
+   returns setof tokenout
+   as 'MODULE_PATHNAME'
+   language 'C'
+   with (isstrict);
+ 
+CREATE FUNCTION parse(text,text)
+   returns setof tokenout
+   as 'MODULE_PATHNAME', 'parse_byname'
+   language 'C'
+   with (isstrict);
+ 
+CREATE FUNCTION parse(text)
+   returns setof tokenout
+   as 'MODULE_PATHNAME', 'parse_current'
+   language 'C'
+   with (isstrict);
+ 
+--default parser
+CREATE FUNCTION prsd_start(internal,int4)
+   returns internal
+   as 'MODULE_PATHNAME'
+   language 'C';
+
+CREATE FUNCTION prsd_getlexeme(internal,internal,internal)
+   returns int4
+   as 'MODULE_PATHNAME'
+   language 'C';
+
+CREATE FUNCTION prsd_end(internal)
+   returns void
+   as 'MODULE_PATHNAME'
+   language 'C';
+
+CREATE FUNCTION prsd_lextype(internal)
+   returns internal
+   as 'MODULE_PATHNAME'
+   language 'C';
+
+CREATE FUNCTION prsd_headline(internal,internal,internal)
+   returns internal
+   as 'MODULE_PATHNAME'
+   language 'C';
+
+insert into pg_ts_parser select
+   'default',
+   (select oid from pg_proc where proname='prsd_start'),   
+   (select oid from pg_proc where proname='prsd_getlexeme'),   
+   (select oid from pg_proc where proname='prsd_end'), 
+   (select oid from pg_proc where proname='prsd_headline'),
+   (select oid from pg_proc where proname='prsd_lextype'),
+   'Parser from OpenFTS v0.34'
+;  
+
+--tsearch config
+
+CREATE TABLE pg_ts_cfg (
+   ts_name     text not null primary key,
+   prs_name    text not null,
+   locale      text
+) with oids;
+
+CREATE TABLE pg_ts_cfgmap (
+   ts_name     text not null,
+   tok_alias   text not null,
+   dict_name   text[],
+   primary key (ts_name,tok_alias)
+) with oids;
+
+CREATE FUNCTION set_curcfg(int)
+   returns void
+   as 'MODULE_PATHNAME'
+   language 'C'
+   with (isstrict);
+
+CREATE FUNCTION set_curcfg(text)
+   returns void
+   as 'MODULE_PATHNAME', 'set_curcfg_byname'
+   language 'C'
+   with (isstrict);
+
+CREATE FUNCTION show_curcfg()
+   returns oid
+   as 'MODULE_PATHNAME'
+   language 'C'
+   with (isstrict);
+
+insert into pg_ts_cfg values ('default', 'default','C');
+insert into pg_ts_cfg values ('default_russian', 'default','ru_RU.KOI8-R');
+insert into pg_ts_cfg values ('simple', 'default');
+
+copy pg_ts_cfgmap from stdin;
+default    lword   {en_stem}
+default    nlword  {simple}
+default    word    {simple}
+default    email   {simple}
+default    url {simple}
+default    host    {simple}
+default    sfloat  {simple}
+default    version {simple}
+default    part_hword  {simple}
+default    nlpart_hword    {simple}
+default    lpart_hword {en_stem}
+default    hword   {simple}
+default    lhword  {en_stem}
+default    nlhword {simple}
+default    uri {simple}
+default    file    {simple}
+default    float   {simple}
+default    int {simple}
+default    uint    {simple}
+default_russian    lword   {en_stem}
+default_russian    nlword  {ru_stem}
+default_russian    word    {ru_stem}
+default_russian    email   {simple}
+default_russian    url {simple}
+default_russian    host    {simple}
+default_russian    sfloat  {simple}
+default_russian    version {simple}
+default_russian    part_hword  {simple}
+default_russian    nlpart_hword    {ru_stem}
+default_russian    lpart_hword {en_stem}
+default_russian    hword   {ru_stem}
+default_russian    lhword  {en_stem}
+default_russian    nlhword {ru_stem}
+default_russian    uri {simple}
+default_russian    file    {simple}
+default_russian    float   {simple}
+default_russian    int {simple}
+default_russian    uint    {simple}
+simple lword   {simple}
+simple nlword  {simple}
+simple word    {simple}
+simple email   {simple}
+simple url {simple}
+simple host    {simple}
+simple sfloat  {simple}
+simple version {simple}
+simple part_hword  {simple}
+simple nlpart_hword    {simple}
+simple lpart_hword {simple}
+simple hword   {simple}
+simple lhword  {simple}
+simple nlhword {simple}
+simple uri {simple}
+simple file    {simple}
+simple float   {simple}
+simple int {simple}
+simple uint    {simple}
+\.
+
+--tsvector type
+CREATE FUNCTION tsvector_in(cstring)
+RETURNS tsvector
+AS 'MODULE_PATHNAME'
+LANGUAGE 'C' with (isstrict);
+
+CREATE FUNCTION tsvector_out(tsvector)
+RETURNS cstring
+AS 'MODULE_PATHNAME'
+LANGUAGE 'C' with (isstrict);
+
+CREATE TYPE tsvector (
+        INTERNALLENGTH = -1,
+        INPUT = tsvector_in,
+        OUTPUT = tsvector_out,
+        STORAGE = extended
+);
+
+CREATE FUNCTION length(tsvector)
+RETURNS int4
+AS 'MODULE_PATHNAME', 'tsvector_length'
+LANGUAGE 'C' with (isstrict,iscachable);
+
+CREATE FUNCTION to_tsvector(oid, text)
+RETURNS tsvector
+AS 'MODULE_PATHNAME'
+LANGUAGE 'C' with (isstrict,iscachable);
+
+CREATE FUNCTION to_tsvector(text, text)
+RETURNS tsvector
+AS 'MODULE_PATHNAME', 'to_tsvector_name'
+LANGUAGE 'C' with (isstrict,iscachable);
+
+CREATE FUNCTION to_tsvector(text)
+RETURNS tsvector
+AS 'MODULE_PATHNAME', 'to_tsvector_current'
+LANGUAGE 'C' with (isstrict,iscachable);
+
+CREATE FUNCTION strip(tsvector)
+RETURNS tsvector
+AS 'MODULE_PATHNAME'
+LANGUAGE 'C' with (isstrict,iscachable);
+
+CREATE FUNCTION setweight(tsvector,"char")
+RETURNS tsvector
+AS 'MODULE_PATHNAME'
+LANGUAGE 'C' with (isstrict,iscachable);
+
+CREATE FUNCTION concat(tsvector,tsvector)
+RETURNS tsvector
+AS 'MODULE_PATHNAME'
+LANGUAGE 'C' with (isstrict,iscachable);
+
+CREATE OPERATOR || (
+        LEFTARG = tsvector,
+        RIGHTARG = tsvector,
+        PROCEDURE = concat
+);
+
+--query type
+CREATE FUNCTION tsquery_in(cstring)
+RETURNS tsquery
+AS 'MODULE_PATHNAME'
+LANGUAGE 'C' with (isstrict);
+
+CREATE FUNCTION tsquery_out(tsquery)
+RETURNS cstring
+AS 'MODULE_PATHNAME'
+LANGUAGE 'C' with (isstrict);
+
+CREATE TYPE tsquery (
+        INTERNALLENGTH = -1,
+        INPUT = tsquery_in,
+        OUTPUT = tsquery_out
+);
+
+CREATE FUNCTION querytree(tsquery)
+RETURNS text
+AS 'MODULE_PATHNAME', 'tsquerytree'
+LANGUAGE 'C' with (isstrict);
+
+CREATE FUNCTION to_tsquery(oid, text)
+RETURNS tsquery
+AS 'MODULE_PATHNAME'
+LANGUAGE 'c' with (isstrict,iscachable);
+
+CREATE FUNCTION to_tsquery(text, text)
+RETURNS tsquery
+AS 'MODULE_PATHNAME','to_tsquery_name'
+LANGUAGE 'c' with (isstrict,iscachable);
+
+CREATE FUNCTION to_tsquery(text)
+RETURNS tsquery
+AS 'MODULE_PATHNAME','to_tsquery_current'
+LANGUAGE 'c' with (isstrict,iscachable);
+
+--operations
+CREATE FUNCTION exectsq(tsvector, tsquery)
+RETURNS bool
+AS 'MODULE_PATHNAME'
+LANGUAGE 'C' with (isstrict, iscachable);
+  
+COMMENT ON FUNCTION exectsq(tsvector, tsquery) IS 'boolean operation with text index';
+
+CREATE FUNCTION rexectsq(tsquery, tsvector)
+RETURNS bool
+AS 'MODULE_PATHNAME'
+LANGUAGE 'C' with (isstrict, iscachable);
+
+COMMENT ON FUNCTION rexectsq(tsquery, tsvector) IS 'boolean operation with text index';
+
+CREATE OPERATOR @@ (
+        LEFTARG = tsvector,
+        RIGHTARG = tsquery,
+        PROCEDURE = exectsq,
+        COMMUTATOR = '@@',
+        RESTRICT = contsel,
+        JOIN = contjoinsel
+);
+CREATE OPERATOR @@ (
+        LEFTARG = tsquery,
+        RIGHTARG = tsvector,
+        PROCEDURE = rexectsq,
+        COMMUTATOR = '@@',
+        RESTRICT = contsel,
+        JOIN = contjoinsel
+);
+
+--Trigger
+CREATE FUNCTION tsearch2()
+RETURNS trigger
+AS 'MODULE_PATHNAME'
+LANGUAGE 'C';
+
+--Relevation
+CREATE FUNCTION rank(float4[], tsvector, tsquery)
+RETURNS float4
+AS 'MODULE_PATHNAME'
+LANGUAGE 'C' WITH (isstrict, iscachable);
+
+CREATE FUNCTION rank(float4[], tsvector, tsquery, int4)
+RETURNS float4
+AS 'MODULE_PATHNAME'
+LANGUAGE 'C' WITH (isstrict, iscachable);
+
+CREATE FUNCTION rank(tsvector, tsquery)
+RETURNS float4
+AS 'MODULE_PATHNAME', 'rank_def'
+LANGUAGE 'C' WITH (isstrict, iscachable);
+
+CREATE FUNCTION rank(tsvector, tsquery, int4)
+RETURNS float4
+AS 'MODULE_PATHNAME', 'rank_def'
+LANGUAGE 'C' WITH (isstrict, iscachable);
+
+CREATE FUNCTION rank_cd(int4, tsvector, tsquery)
+RETURNS float4
+AS 'MODULE_PATHNAME'
+LANGUAGE 'C' WITH (isstrict, iscachable);
+
+CREATE FUNCTION rank_cd(int4, tsvector, tsquery, int4)
+RETURNS float4
+AS 'MODULE_PATHNAME'
+LANGUAGE 'C' WITH (isstrict, iscachable);
+
+CREATE FUNCTION rank_cd(tsvector, tsquery)
+RETURNS float4
+AS 'MODULE_PATHNAME', 'rank_cd_def'
+LANGUAGE 'C' WITH (isstrict, iscachable);
+
+CREATE FUNCTION rank_cd(tsvector, tsquery, int4)
+RETURNS float4
+AS 'MODULE_PATHNAME', 'rank_cd_def'
+LANGUAGE 'C' WITH (isstrict, iscachable);
+
+CREATE FUNCTION headline(oid, text, tsquery, text)
+RETURNS text
+AS 'MODULE_PATHNAME', 'headline'
+LANGUAGE 'C' WITH (isstrict, iscachable);
+
+CREATE FUNCTION headline(oid, text, tsquery)
+RETURNS text
+AS 'MODULE_PATHNAME', 'headline'
+LANGUAGE 'C' WITH (isstrict, iscachable);
+
+CREATE FUNCTION headline(text, text, tsquery, text)
+RETURNS text
+AS 'MODULE_PATHNAME', 'headline_byname'
+LANGUAGE 'C' WITH (isstrict, iscachable);
+
+CREATE FUNCTION headline(text, text, tsquery)
+RETURNS text
+AS 'MODULE_PATHNAME', 'headline_byname'
+LANGUAGE 'C' WITH (isstrict, iscachable);
+
+CREATE FUNCTION headline(text, tsquery, text)
+RETURNS text
+AS 'MODULE_PATHNAME', 'headline_current'
+LANGUAGE 'C' WITH (isstrict, iscachable);
+
+CREATE FUNCTION headline(text, tsquery)
+RETURNS text
+AS 'MODULE_PATHNAME', 'headline_current'
+LANGUAGE 'C' WITH (isstrict, iscachable);
+
+--GiST
+--GiST key type 
+CREATE FUNCTION gtsvector_in(cstring)
+RETURNS gtsvector
+AS 'MODULE_PATHNAME'
+LANGUAGE 'C' with (isstrict);
+
+CREATE FUNCTION gtsvector_out(gtsvector)
+RETURNS cstring
+AS 'MODULE_PATHNAME'
+LANGUAGE 'C' with (isstrict);
+
+CREATE TYPE gtsvector (
+        INTERNALLENGTH = -1,
+        INPUT = gtsvector_in,
+        OUTPUT = gtsvector_out
+);
+
+-- support FUNCTIONs
+CREATE FUNCTION gtsvector_consistent(gtsvector,internal,int4)
+RETURNS bool
+AS 'MODULE_PATHNAME'
+LANGUAGE 'C';
+  
+CREATE FUNCTION gtsvector_compress(internal)
+RETURNS internal
+AS 'MODULE_PATHNAME'
+LANGUAGE 'C';
+
+CREATE FUNCTION gtsvector_decompress(internal)
+RETURNS internal
+AS 'MODULE_PATHNAME'
+LANGUAGE 'C';
+
+CREATE FUNCTION gtsvector_penalty(internal,internal,internal)
+RETURNS internal
+AS 'MODULE_PATHNAME'
+LANGUAGE 'C' with (isstrict);
+
+CREATE FUNCTION gtsvector_picksplit(internal, internal)
+RETURNS internal
+AS 'MODULE_PATHNAME'
+LANGUAGE 'C';
+
+CREATE FUNCTION gtsvector_union(bytea, internal)
+RETURNS _int4
+AS 'MODULE_PATHNAME'
+LANGUAGE 'C';
+
+CREATE FUNCTION gtsvector_same(gtsvector, gtsvector, internal)
+RETURNS internal
+AS 'MODULE_PATHNAME'
+LANGUAGE 'C';
+
+-- CREATE the OPERATOR class
+CREATE OPERATOR CLASS gist_tsvector_ops
+DEFAULT FOR TYPE tsvector USING gist
+AS
+        OPERATOR        1       @@ (tsvector, tsquery)  RECHECK ,
+        FUNCTION        1       gtsvector_consistent (gtsvector, internal, int4),
+        FUNCTION        2       gtsvector_union (bytea, internal),
+        FUNCTION        3       gtsvector_compress (internal),
+        FUNCTION        4       gtsvector_decompress (internal),
+        FUNCTION        5       gtsvector_penalty (internal, internal, internal),
+        FUNCTION        6       gtsvector_picksplit (internal, internal),
+        FUNCTION        7       gtsvector_same (gtsvector, gtsvector, internal),
+        STORAGE         gtsvector;
+
+
+--stat info
+CREATE TYPE statinfo 
+   as (word text, ndoc int4, nentry int4);
+
+--REATE FUNCTION tsstat_in(cstring)
+--RETURNS tsstat
+--AS 'MODULE_PATHNAME'
+--LANGUAGE 'C' with (isstrict);
+--
+--CREATE FUNCTION tsstat_out(tsstat)
+--RETURNS cstring
+--AS 'MODULE_PATHNAME'
+--LANGUAGE 'C' with (isstrict);
+--
+--CREATE TYPE tsstat (
+--        INTERNALLENGTH = -1,
+--        INPUT = tsstat_in,
+--        OUTPUT = tsstat_out,
+--        STORAGE = plain
+--);
+--
+--CREATE FUNCTION ts_accum(tsstat,tsvector)
+--RETURNS tsstat
+--AS 'MODULE_PATHNAME'
+--LANGUAGE 'C' with (isstrict);
+--
+--CREATE FUNCTION ts_accum_finish(tsstat)
+-- returns setof statinfo
+-- as 'MODULE_PATHNAME'
+-- language 'C'
+-- with (isstrict);
+--
+--CREATE AGGREGATE stat (
+-- BASETYPE=tsvector,
+-- SFUNC=ts_accum,
+-- STYPE=tsstat,
+-- FINALFUNC = ts_accum_finish,
+-- initcond = ''
+--); 
+
+CREATE FUNCTION stat(text)
+   returns setof statinfo
+   as 'MODULE_PATHNAME', 'ts_stat'
+   language 'C'
+   with (isstrict);
+
+--reset - just for debuging
+CREATE FUNCTION reset_tsearch()
+        returns void
+        as 'MODULE_PATHNAME'
+        language 'C'
+        with (isstrict);
+
+--get cover (debug for rank_cd)
+CREATE FUNCTION get_covers(tsvector,tsquery)
+        returns text
+        as 'MODULE_PATHNAME'
+        language 'C'
+        with (isstrict);
+
+
+--example of ISpell dictionary
+--update pg_ts_dict set dict_initoption='DictFile="/usr/local/share/ispell/russian.dict" ,AffFile ="/usr/local/share/ispell/russian.aff", StopFile="/usr/local/share/ispell/russian.stop"' where dict_id=4;
+--example of synonym dict
+--update pg_ts_dict set dict_initoption='/usr/local/share/ispell/english.syn' where dict_id=5;
+END;


diff --git a/contrib/tsearch2/tsvector.c b/contrib/tsearch2/tsvector.c

new file mode 100644 (file)

index 0000000..ff0794d


--- /dev/null
+++ b/contrib/tsearch2/tsvector.c
@@ -0,0 +1,804 @@
+/*
+ * In/Out definitions for tsvector type
+ * Internal structure:
+ * string of values, array of position lexem in string and it's length
+ * Teodor Sigaev 
+ */
+#include "postgres.h"
+
+#include "access/gist.h"
+#include "access/itup.h"
+#include "utils/elog.h"
+#include "utils/palloc.h"
+#include "utils/builtins.h"
+#include "storage/bufpage.h"
+#include "executor/spi.h"
+#include "commands/trigger.h"
+#include "nodes/pg_list.h"
+#include "catalog/namespace.h"
+
+#include "utils/pg_locale.h"
+
+#include              /* tolower */
+#include "tsvector.h"
+#include "query.h"
+#include "ts_cfg.h"
+#include "common.h"
+
+PG_FUNCTION_INFO_V1(tsvector_in);
+Datum      tsvector_in(PG_FUNCTION_ARGS);
+
+PG_FUNCTION_INFO_V1(tsvector_out);
+Datum      tsvector_out(PG_FUNCTION_ARGS);
+
+PG_FUNCTION_INFO_V1(to_tsvector);
+Datum      to_tsvector(PG_FUNCTION_ARGS);
+PG_FUNCTION_INFO_V1(to_tsvector_current);
+Datum      to_tsvector_current(PG_FUNCTION_ARGS);
+PG_FUNCTION_INFO_V1(to_tsvector_name);
+Datum      to_tsvector_name(PG_FUNCTION_ARGS);
+
+PG_FUNCTION_INFO_V1(tsearch2);
+Datum      tsearch2(PG_FUNCTION_ARGS);
+
+PG_FUNCTION_INFO_V1(tsvector_length);
+Datum      tsvector_length(PG_FUNCTION_ARGS);
+
+/*
+ * in/out text index type
+ */
+static int 
+comparePos(const void *a, const void *b) {
+   if ( ((WordEntryPos *) a)->pos == ((WordEntryPos *) b)->pos )
+       return 1;
+   return ( ((WordEntryPos *) a)->pos > ((WordEntryPos *) b)->pos ) ? 1 : -1;
+}
+
+static int
+uniquePos(WordEntryPos *a, int4 l) {
+   WordEntryPos *ptr, *res;
+
+   res=a;
+   if (l==1)
+       return l;
+
+   qsort((void *) a, l, sizeof(WordEntryPos), comparePos);
+
+   ptr = a + 1;
+   while (ptr - a < l) {
+       if ( ptr->pos != res->pos ) {
+           res++;
+           res->pos = ptr->pos;
+           res->weight = ptr->weight;
+           if ( res-a >= MAXNUMPOS-1 || res->pos == MAXENTRYPOS-1 )
+               break;
+       } else if ( ptr->weight > res->weight )
+           res->weight = ptr->weight;
+       ptr++;
+   }
+   return res + 1 - a;
+}
+
+static char *BufferStr;
+static int
+compareentry(const void *a, const void *b)
+{
+   if ( ((WordEntryIN *) a)->entry.len == ((WordEntryIN *) b)->entry.len)
+   {
+       return strncmp(
+                      &BufferStr[((WordEntryIN *) a)->entry.pos],
+                      &BufferStr[((WordEntryIN *) b)->entry.pos],
+                      ((WordEntryIN *) a)->entry.len);
+   }
+   return ( ((WordEntryIN *) a)->entry.len > ((WordEntryIN *) b)->entry.len ) ? 1 : -1;
+}
+
+static int
+uniqueentry(WordEntryIN * a, int4 l, char *buf, int4 *outbuflen)
+{
+   WordEntryIN  *ptr,
+              *res;
+
+   res = a;
+   if (l == 1) {
+       if ( a->entry.haspos ) {
+           *(uint16*)(a->pos) = uniquePos( &(a->pos[1]), *(uint16*)(a->pos));
+           *outbuflen = SHORTALIGN(res->entry.len) + (*(uint16*)(a->pos) +1 )*sizeof(WordEntryPos);
+       }
+       return l;
+   }
+
+   ptr = a + 1;
+   BufferStr = buf;
+   qsort((void *) a, l, sizeof(WordEntryIN), compareentry);
+
+   while (ptr - a < l)
+   {
+       if (!(ptr->entry.len == res->entry.len &&
+             strncmp(&buf[ptr->entry.pos], &buf[res->entry.pos], res->entry.len) == 0))
+       {
+           if ( res->entry.haspos ) {
+               *(uint16*)(res->pos) = uniquePos( &(res->pos[1]), *(uint16*)(res->pos));
+               *outbuflen += *(uint16*)(res->pos) * sizeof(WordEntryPos);
+           }
+           *outbuflen += SHORTALIGN(res->entry.len);
+           res++;
+           memcpy(res,ptr,sizeof(WordEntryIN));
+       } else if ( ptr->entry.haspos ){
+           if ( res->entry.haspos ) {
+               int4 len=*(uint16*)(ptr->pos) + 1 + *(uint16*)(res->pos);
+               res->pos=(WordEntryPos*)repalloc( res->pos, len*sizeof(WordEntryPos));
+               memcpy( &(res->pos[ *(uint16*)(res->pos) + 1 ]), 
+                   &(ptr->pos[1]), *(uint16*)(ptr->pos) * sizeof(WordEntryPos));
+               *(uint16*)(res->pos) += *(uint16*)(ptr->pos);
+               pfree( ptr->pos );
+           } else {
+               res->entry.haspos=1;
+               res->pos = ptr->pos;
+           }
+       }
+       ptr++;
+   }
+   if ( res->entry.haspos ) {
+       *(uint16*)(res->pos) = uniquePos( &(res->pos[1]), *(uint16*)(res->pos));
+       *outbuflen += *(uint16*)(res->pos) * sizeof(WordEntryPos);
+   }
+   *outbuflen += SHORTALIGN(res->entry.len);
+
+   return res + 1 - a;
+}
+
+#define WAITWORD   1
+#define WAITENDWORD 2
+#define WAITNEXTCHAR   3
+#define WAITENDCMPLX   4
+#define WAITPOSINFO    5
+#define INPOSINFO  6
+#define WAITPOSDELIM   7
+
+#define RESIZEPRSBUF \
+do { \
+   if ( state->curpos - state->word + 1 >= state->len ) \
+   { \
+       int4 clen = state->curpos - state->word; \
+       state->len *= 2; \
+       state->word = (char*)repalloc( (void*)state->word, state->len ); \
+       state->curpos = state->word + clen; \
+   } \
+} while (0)
+
+int4
+gettoken_tsvector(TI_IN_STATE * state)
+{
+   int4        oldstate = 0;
+
+   state->curpos = state->word;
+   state->state = WAITWORD;
+   state->alen=0;
+
+   while (1)
+   {
+       if (state->state == WAITWORD)
+       {
+           if (*(state->prsbuf) == '\0')
+               return 0;
+           else if (*(state->prsbuf) == '\'')
+               state->state = WAITENDCMPLX;
+           else if (*(state->prsbuf) == '\\')
+           {
+               state->state = WAITNEXTCHAR;
+               oldstate = WAITENDWORD;
+           }
+           else if (state->oprisdelim && ISOPERATOR(*(state->prsbuf)))
+               elog(ERROR, "Syntax error");
+           else if (*(state->prsbuf) != ' ')
+           {
+               *(state->curpos) = *(state->prsbuf);
+               state->curpos++;
+               state->state = WAITENDWORD;
+           }
+       }
+       else if (state->state == WAITNEXTCHAR)
+       {
+           if (*(state->prsbuf) == '\0')
+               elog(ERROR, "There is no escaped character");
+           else
+           {
+               RESIZEPRSBUF;
+               *(state->curpos) = *(state->prsbuf);
+               state->curpos++;
+               state->state = oldstate;
+           }
+       }
+       else if (state->state == WAITENDWORD)
+       {
+           if (*(state->prsbuf) == '\\')
+           {
+               state->state = WAITNEXTCHAR;
+               oldstate = WAITENDWORD;
+           }
+           else if (*(state->prsbuf) == ' ' || *(state->prsbuf) == '\0' ||
+                    (state->oprisdelim && ISOPERATOR(*(state->prsbuf))))
+           {
+               RESIZEPRSBUF;
+               if (state->curpos == state->word)
+                   elog(ERROR, "Syntax error");
+               *(state->curpos) = '\0';
+               return 1; 
+           } else if ( *(state->prsbuf) == ':' ) {
+               if (state->curpos == state->word)
+                   elog(ERROR, "Syntax error");
+               *(state->curpos) = '\0';
+               if ( state->oprisdelim )
+                   return 1;
+               else
+                   state->state = INPOSINFO;
+           }
+           else
+           {
+               RESIZEPRSBUF;
+               *(state->curpos) = *(state->prsbuf);
+               state->curpos++;
+           }
+       }
+       else if (state->state == WAITENDCMPLX)
+       {
+           if (*(state->prsbuf) == '\'')
+           {
+               RESIZEPRSBUF;
+               *(state->curpos) = '\0';
+               if (state->curpos == state->word)
+                   elog(ERROR, "Syntax error");
+               if ( state->oprisdelim ) {
+                   state->prsbuf++;
+                   return 1;
+               } else
+                   state->state = WAITPOSINFO;
+           }
+           else if (*(state->prsbuf) == '\\')
+           {
+               state->state = WAITNEXTCHAR;
+               oldstate = WAITENDCMPLX;
+           }
+           else if (*(state->prsbuf) == '\0')
+               elog(ERROR, "Syntax error");
+           else
+           {
+               RESIZEPRSBUF;
+               *(state->curpos) = *(state->prsbuf);
+               state->curpos++;
+           }
+       } else if (state->state == WAITPOSINFO) {
+           if ( *(state->prsbuf) == ':' )
+               state->state=INPOSINFO;
+           else
+               return 1;
+       } else if (state->state == INPOSINFO) {
+           if ( isdigit(*(state->prsbuf)) ) {
+               if ( state->alen==0 ) {
+                   state->alen=4;
+                   state->pos = (WordEntryPos*)palloc( sizeof(WordEntryPos)*state->alen );
+                   *(uint16*)(state->pos)=0;
+               } else if ( *(uint16*)(state->pos) +1 >= state->alen ) {
+                   state->alen *= 2; 
+                   state->pos = (WordEntryPos*)repalloc( state->pos, sizeof(WordEntryPos)*state->alen );
+               }
+               (  *(uint16*)(state->pos) )++;
+               state->pos[ *(uint16*)(state->pos) ].pos = LIMITPOS(atoi(state->prsbuf));
+               if ( state->pos[ *(uint16*)(state->pos) ].pos == 0 )
+                   elog(ERROR,"Wrong position info");
+               state->pos[ *(uint16*)(state->pos) ].weight = 0;
+               state->state = WAITPOSDELIM;
+           } else
+               elog(ERROR,"Syntax error");
+       } else if (state->state == WAITPOSDELIM) {
+           if ( *(state->prsbuf) == ',' ) {
+               state->state = INPOSINFO;
+           } else if ( tolower(*(state->prsbuf)) == 'a' || *(state->prsbuf)=='*' ) {
+               if ( state->pos[ *(uint16*)(state->pos) ].weight )
+                   elog(ERROR,"Syntax error");
+               state->pos[ *(uint16*)(state->pos) ].weight = 3;
+           } else if ( tolower(*(state->prsbuf)) == 'b' ) {
+               if ( state->pos[ *(uint16*)(state->pos) ].weight )
+                   elog(ERROR,"Syntax error");
+               state->pos[ *(uint16*)(state->pos) ].weight = 2;
+           } else if ( tolower(*(state->prsbuf)) == 'c' ) {
+               if ( state->pos[ *(uint16*)(state->pos) ].weight )
+                   elog(ERROR,"Syntax error");
+               state->pos[ *(uint16*)(state->pos) ].weight = 1;
+           } else if ( tolower(*(state->prsbuf)) == 'd' ) {
+               if ( state->pos[ *(uint16*)(state->pos) ].weight )
+                   elog(ERROR,"Syntax error");
+               state->pos[ *(uint16*)(state->pos) ].weight = 0;
+           } else if ( isspace(*(state->prsbuf)) || *(state->prsbuf) == '\0' ) {
+               return 1;
+           } else if ( !isdigit(*(state->prsbuf)) )
+               elog(ERROR,"Syntax error");
+       } else
+           elog(ERROR, "Inner bug :(");
+       state->prsbuf++;
+   }
+
+   return 0;
+}
+
+Datum
+tsvector_in(PG_FUNCTION_ARGS)
+{
+   char       *buf = PG_GETARG_CSTRING(0);
+   TI_IN_STATE state;
+   WordEntryIN  *arr;
+   WordEntry  *inarr;
+   int4        len = 0,
+               totallen = 64;
+   tsvector       *in;
+   char       *tmpbuf,
+              *cur;
+   int4        i,
+               buflen = 256;
+
+   state.prsbuf = buf;
+   state.len = 32;
+   state.word = (char *) palloc(state.len);
+   state.oprisdelim = false;
+
+   arr = (WordEntryIN *) palloc(sizeof(WordEntryIN) * totallen);
+   cur = tmpbuf = (char *) palloc(buflen);
+   while (gettoken_tsvector(&state))
+   {
+       if (len >= totallen)
+       {
+           totallen *= 2;
+           arr = (WordEntryIN *) repalloc((void *) arr, sizeof(WordEntryIN) * totallen);
+       }
+       while ((cur - tmpbuf) + (state.curpos - state.word) >= buflen)
+       {
+           int4        dist = cur - tmpbuf;
+
+           buflen *= 2;
+           tmpbuf = (char *) repalloc((void *) tmpbuf, buflen);
+           cur = tmpbuf + dist;
+       }
+       if (state.curpos - state.word >= MAXSTRLEN)
+           elog(ERROR, "Word is too long");
+       arr[len].entry.len= state.curpos - state.word;
+       if (cur - tmpbuf > MAXSTRPOS)
+           elog(ERROR, "Too long value");
+       arr[len].entry.pos=cur - tmpbuf;
+       memcpy((void *) cur, (void *) state.word, arr[len].entry.len);
+       cur += arr[len].entry.len;
+       if ( state.alen ) {
+           arr[len].entry.haspos=1;
+           arr[len].pos = state.pos;
+       } else
+           arr[len].entry.haspos=0;
+       len++;
+   }
+   pfree(state.word);
+
+   if ( len > 0 )
+       len = uniqueentry(arr, len, tmpbuf, &buflen);
+   totallen = CALCDATASIZE(len, buflen);
+   in = (tsvector *) palloc(totallen);
+   memset(in,0,totallen);
+   in->len = totallen;
+   in->size = len;
+   cur = STRPTR(in);
+   inarr = ARRPTR(in);
+   for (i = 0; i < len; i++)
+   {
+       memcpy((void *) cur, (void *) &tmpbuf[arr[i].entry.pos], arr[i].entry.len);
+       arr[i].entry.pos=cur - STRPTR(in);
+       cur += SHORTALIGN(arr[i].entry.len);
+       if ( arr[i].entry.haspos ) {
+           memcpy( cur, arr[i].pos, (*(uint16*)arr[i].pos + 1) * sizeof(WordEntryPos));
+           cur +=  (*(uint16*)arr[i].pos + 1) * sizeof(WordEntryPos);
+           pfree( arr[i].pos ); 
+       }
+       memcpy( &(inarr[i]), &(arr[i].entry), sizeof(WordEntry) );
+   }
+   pfree(tmpbuf);
+   pfree(arr);
+   PG_RETURN_POINTER(in);
+}
+
+Datum
+tsvector_length(PG_FUNCTION_ARGS)
+{
+   tsvector       *in = (tsvector *) PG_DETOAST_DATUM(PG_GETARG_DATUM(0));
+   int4        ret = in->size;
+
+   PG_FREE_IF_COPY(in, 0);
+   PG_RETURN_INT32(ret);
+}
+
+Datum
+tsvector_out(PG_FUNCTION_ARGS)
+{
+   tsvector       *out = (tsvector *) PG_DETOAST_DATUM(PG_GETARG_DATUM(0));
+   char       *outbuf;
+   int4        i,
+               j,
+               lenbuf = 0, pp;
+   WordEntry  *ptr = ARRPTR(out);
+   char       *curin,
+              *curout;
+
+       lenbuf=out->size * 2 /* '' */ + out->size - 1 /* space */ + 2 /*\0*/;
+       for (i = 0; i < out->size; i++) {
+               lenbuf += ptr[i].len*2 /*for escape */;
+               if ( ptr[i].haspos )
+                       lenbuf += 7*POSDATALEN(out, &(ptr[i]));
+       }
+
+   curout = outbuf = (char *) palloc(lenbuf);
+   for (i = 0; i < out->size; i++)
+   {
+       curin = STRPTR(out)+ptr->pos;
+       if (i != 0)
+           *curout++ = ' ';
+       *curout++ = '\'';
+       j = ptr->len;
+       while (j--)
+       {
+           if (*curin == '\'')
+           {
+               int4        pos = curout - outbuf;
+
+               outbuf = (char *) repalloc((void *) outbuf, ++lenbuf);
+               curout = outbuf + pos;
+               *curout++ = '\\';
+           }
+           *curout++ = *curin++;
+       }
+       *curout++ = '\'';
+       if ( (pp=POSDATALEN(out,ptr)) != 0 ) {
+           WordEntryPos *wptr;
+           *curout++ = ':';
+           wptr=POSDATAPTR(out,ptr);
+           while(pp) {
+               sprintf(curout,"%d",wptr->pos);
+               curout=strchr(curout,'\0');
+               switch( wptr->weight ) {
+                   case 3:   *curout++ = 'A'; break;
+                   case 2:   *curout++ = 'B'; break;
+                   case 1:   *curout++ = 'C'; break;
+                   case 0: 
+                   default: break;
+               }
+               if ( pp>1 )     *curout++ = ',';
+               pp--; wptr++;
+           }
+       }
+       ptr++;
+   }
+   *curout='\0';
+   outbuf[lenbuf - 1] = '\0';
+   PG_FREE_IF_COPY(out, 0);
+   PG_RETURN_POINTER(outbuf);
+}
+
+static int
+compareWORD(const void *a, const void *b)
+{
+   if (((WORD *) a)->len == ((WORD *) b)->len) {
+       int res = strncmp(
+                      ((WORD *) a)->word,
+                      ((WORD *) b)->word,
+                      ((WORD *) b)->len);
+       if ( res==0 ) 
+           return ( ((WORD *) a)->pos.pos > ((WORD *) b)->pos.pos ) ? 1 : -1;
+       return res;
+   }
+   return (((WORD *) a)->len > ((WORD *) b)->len) ? 1 : -1;
+}
+
+static int
+uniqueWORD(WORD * a, int4 l)
+{
+   WORD       *ptr,
+              *res;
+   int tmppos;
+
+   if (l == 1) {
+       tmppos=LIMITPOS(a->pos.pos);
+       a->alen=2;
+       a->pos.apos=(uint16*)palloc( sizeof(uint16)*a->alen );
+       a->pos.apos[0]=1;
+       a->pos.apos[1]=tmppos;
+       return l;
+   }
+
+   res = a;
+   ptr = a + 1;
+
+   qsort((void *) a, l, sizeof(WORD), compareWORD);
+   tmppos=LIMITPOS(a->pos.pos);
+   a->alen=2;
+   a->pos.apos=(uint16*)palloc( sizeof(uint16)*a->alen );
+   a->pos.apos[0]=1;
+   a->pos.apos[1]=tmppos;
+
+   while (ptr - a < l)
+   {
+       if (!(ptr->len == res->len &&
+             strncmp(ptr->word, res->word, res->len) == 0))
+       {
+           res++;
+           res->len = ptr->len;
+           res->word = ptr->word;
+           tmppos=LIMITPOS(ptr->pos.pos);
+           res->alen=2;
+           res->pos.apos=(uint16*)palloc( sizeof(uint16)*res->alen );
+           res->pos.apos[0]=1;
+           res->pos.apos[1]=tmppos;
+       } else {
+           pfree(ptr->word);
+           if ( res->pos.apos[0] < MAXNUMPOS-1 && res->pos.apos[ res->pos.apos[0] ] != MAXENTRYPOS-1 ) {
+               if ( res->pos.apos[0]+1 >= res->alen ) {
+                   res->alen*=2;
+                   res->pos.apos=(uint16*)repalloc( res->pos.apos, sizeof(uint16)*res->alen );
+               }
+               res->pos.apos[ res->pos.apos[0]+1 ] = LIMITPOS(ptr->pos.pos);
+               res->pos.apos[0]++; 
+           }
+       }
+       ptr++;
+   }
+
+   return res + 1 - a;
+}
+
+/*
+ * make value of tsvector
+ */
+static tsvector *
+makevalue(PRSTEXT * prs)
+{
+   int4        i,j,
+               lenstr = 0,
+               totallen;
+   tsvector       *in;
+   WordEntry  *ptr;
+   char       *str,
+              *cur;
+
+   prs->curwords = uniqueWORD(prs->words, prs->curwords);
+   for (i = 0; i < prs->curwords; i++) {
+       lenstr += SHORTALIGN(prs->words[i].len);
+
+       if ( prs->words[i].alen )
+           lenstr += sizeof(uint16) + prs->words[i].pos.apos[0] * sizeof(WordEntryPos);
+   }
+
+   totallen = CALCDATASIZE(prs->curwords, lenstr);
+   in = (tsvector *) palloc(totallen);
+   memset(in,0,totallen);  
+   in->len = totallen;
+   in->size = prs->curwords;
+
+   ptr = ARRPTR(in);
+   cur = str = STRPTR(in);
+   for (i = 0; i < prs->curwords; i++)
+   {
+       ptr->len = prs->words[i].len;
+       if (cur - str > MAXSTRPOS)
+           elog(ERROR, "Value is too big");
+       ptr->pos= cur - str;
+       memcpy((void *) cur, (void *) prs->words[i].word, prs->words[i].len);
+       pfree(prs->words[i].word);
+       cur += SHORTALIGN(prs->words[i].len);
+       if ( prs->words[i].alen ) {
+           WordEntryPos *wptr;
+           
+           ptr->haspos=1;
+           *(uint16*)cur = prs->words[i].pos.apos[0];
+           wptr=POSDATAPTR(in,ptr);
+           for(j=0;j<*(uint16*)cur;j++) {
+               wptr[j].weight=0;
+               wptr[j].pos=prs->words[i].pos.apos[j+1];
+           }
+           cur += sizeof(uint16) + prs->words[i].pos.apos[0] * sizeof(WordEntryPos);
+           pfree(prs->words[i].pos.apos);
+       } else
+           ptr->haspos=0;
+       ptr++;
+   }
+   pfree(prs->words);
+   return in;
+}
+
+
+Datum
+to_tsvector(PG_FUNCTION_ARGS)
+{
+   text       *in = PG_GETARG_TEXT_P(1);
+   PRSTEXT     prs;
+   tsvector       *out = NULL;
+   TSCfgInfo *cfg=findcfg(PG_GETARG_INT32(0)); 
+
+   prs.lenwords = 32;
+   prs.curwords = 0;
+   prs.pos = 0;
+   prs.words = (WORD *) palloc(sizeof(WORD) * prs.lenwords);
+   
+   parsetext_v2(cfg, &prs, VARDATA(in), VARSIZE(in) - VARHDRSZ);
+   PG_FREE_IF_COPY(in, 1);
+
+   if (prs.curwords)
+       out = makevalue(&prs);
+   else {
+       pfree(prs.words);
+       out = palloc(CALCDATASIZE(0,0));
+       out->len = CALCDATASIZE(0,0);
+       out->size = 0;
+   } 
+   PG_RETURN_POINTER(out);
+}
+
+Datum
+to_tsvector_name(PG_FUNCTION_ARGS) {
+   text       *cfg=PG_GETARG_TEXT_P(0);
+   Datum res = DirectFunctionCall3(
+       to_tsvector,
+       Int32GetDatum( name2id_cfg( cfg ) ),
+       PG_GETARG_DATUM(1),
+       (Datum)0
+   );
+   PG_FREE_IF_COPY(cfg,0);
+   PG_RETURN_DATUM(res);   
+}
+
+Datum
+to_tsvector_current(PG_FUNCTION_ARGS) {
+   Datum res = DirectFunctionCall3(
+       to_tsvector,
+       Int32GetDatum( get_currcfg() ),
+       PG_GETARG_DATUM(0),
+       (Datum)0
+   );
+   PG_RETURN_DATUM(res);   
+}
+
+static Oid
+findFunc(char *fname) {
+   FuncCandidateList clist,ptr;
+   Oid funcid = InvalidOid;
+   List *names=makeList1(makeString(fname));
+
+   ptr = clist = FuncnameGetCandidates(names, 1);
+   freeList(names);
+
+   if ( !ptr )
+       return funcid;
+
+   while(ptr) {
+       if ( ptr->args[0] == TEXTOID && funcid == InvalidOid )
+           funcid=ptr->oid;
+       clist=ptr->next;
+       pfree(ptr);
+       ptr=clist;
+   }
+
+   return funcid;
+}
+
+/*
+ * Trigger
+ */
+Datum
+tsearch2(PG_FUNCTION_ARGS)
+{
+   TriggerData *trigdata;
+   Trigger    *trigger;
+   Relation    rel;
+   HeapTuple   rettuple = NULL;
+   TSCfgInfo *cfg=findcfg(get_currcfg()); 
+   int         numidxattr,
+               i;
+   PRSTEXT     prs;
+   Datum       datum = (Datum) 0;
+   Oid     funcoid = InvalidOid;
+
+   if (!CALLED_AS_TRIGGER(fcinfo))
+       elog(ERROR, "TSearch: Not fired by trigger manager");
+
+   trigdata = (TriggerData *) fcinfo->context;
+   if (TRIGGER_FIRED_FOR_STATEMENT(trigdata->tg_event))
+       elog(ERROR, "TSearch: Can't process STATEMENT events");
+   if (TRIGGER_FIRED_AFTER(trigdata->tg_event))
+       elog(ERROR, "TSearch: Must be fired BEFORE event");
+
+   if (TRIGGER_FIRED_BY_INSERT(trigdata->tg_event))
+       rettuple = trigdata->tg_trigtuple;
+   else if (TRIGGER_FIRED_BY_UPDATE(trigdata->tg_event))
+       rettuple = trigdata->tg_newtuple;
+   else
+       elog(ERROR, "TSearch: Unknown event");
+
+   trigger = trigdata->tg_trigger;
+   rel = trigdata->tg_relation;
+
+   if (trigger->tgnargs < 2)
+       elog(ERROR, "TSearch: format tsearch2(tsvector_field, text_field1,...)");
+
+   numidxattr = SPI_fnumber(rel->rd_att, trigger->tgargs[0]);
+   if (numidxattr == SPI_ERROR_NOATTRIBUTE)
+       elog(ERROR, "TSearch: Can not find tsvector_field");
+
+   prs.lenwords = 32;
+   prs.curwords = 0;
+   prs.pos = 0;
+   prs.words = (WORD *) palloc(sizeof(WORD) * prs.lenwords);
+
+   /* find all words in indexable column */
+   for (i = 1; i < trigger->tgnargs; i++)
+   {
+       int         numattr;
+       Oid         oidtype;
+       Datum       txt_toasted;
+       bool        isnull;
+       text       *txt;
+
+       numattr = SPI_fnumber(rel->rd_att, trigger->tgargs[i]);
+       if (numattr == SPI_ERROR_NOATTRIBUTE)
+       {
+           funcoid=findFunc(trigger->tgargs[i]);
+           if ( funcoid==InvalidOid )
+               elog(ERROR,"TSearch: can't find function or field '%s'",trigger->tgargs[i]);
+           continue;
+       }
+       oidtype = SPI_gettypeid(rel->rd_att, numattr);
+       /* We assume char() and varchar() are binary-equivalent to text */
+       if (!(oidtype == TEXTOID ||
+             oidtype == VARCHAROID ||
+             oidtype == BPCHAROID))
+       {
+           elog(WARNING, "TSearch: '%s' is not of character type",
+                trigger->tgargs[i]);
+           continue;
+       }
+       txt_toasted = SPI_getbinval(rettuple, rel->rd_att, numattr, &isnull);
+       if (isnull)
+           continue;
+
+       if ( funcoid!=InvalidOid ) {
+           text *txttmp = (text *) DatumGetPointer( OidFunctionCall1(
+               funcoid,
+               PointerGetDatum(txt_toasted)
+           ));
+           txt = (text *) DatumGetPointer(PG_DETOAST_DATUM(PointerGetDatum(txttmp)));
+           if ( txt == txttmp )
+               txt_toasted = PointerGetDatum(txt);
+       } else
+            txt = (text *) DatumGetPointer(PG_DETOAST_DATUM(PointerGetDatum(txt_toasted)));
+
+       parsetext_v2(cfg, &prs, VARDATA(txt), VARSIZE(txt) - VARHDRSZ);
+       if (txt != (text*)DatumGetPointer(txt_toasted) )
+           pfree(txt);
+   }
+
+   /* make tsvector value */
+   if (prs.curwords)
+   {
+       datum = PointerGetDatum(makevalue(&prs));
+       rettuple = SPI_modifytuple(rel, rettuple, 1, &numidxattr,
+                                  &datum, NULL);
+       pfree(DatumGetPointer(datum));
+   }
+   else
+   {
+       tsvector *out = palloc(CALCDATASIZE(0,0));
+       out->len = CALCDATASIZE(0,0);
+       out->size = 0;
+       datum = PointerGetDatum(out);
+       pfree(prs.words);
+       rettuple = SPI_modifytuple(rel, rettuple, 1, &numidxattr,
+                                  &datum, NULL);
+   }
+
+   if (rettuple == NULL)
+       elog(ERROR, "TSearch: %d returned by SPI_modifytuple", SPI_result);
+
+   return PointerGetDatum(rettuple);
+}


diff --git a/contrib/tsearch2/tsvector.h b/contrib/tsearch2/tsvector.h

new file mode 100644 (file)

index 0000000..31e6a4b


--- /dev/null
+++ b/contrib/tsearch2/tsvector.h
@@ -0,0 +1,71 @@
+#ifndef __TXTIDX_H__
+#define __TXTIDX_H__
+
+/*
+#define TXTIDX_DEBUG
+*/
+
+#include "postgres.h"
+
+#include "access/gist.h"
+#include "access/itup.h"
+#include "utils/elog.h"
+#include "utils/palloc.h"
+#include "utils/builtins.h"
+#include "storage/bufpage.h"
+
+typedef struct {
+   uint32
+       haspos:1,
+       len:11, /* MAX 2Kb */
+       pos:20; /* MAX 1Mb */
+}  WordEntry;
+#define MAXSTRLEN ( 1<<11 )
+#define MAXSTRPOS ( 1<<20 )
+
+typedef struct {
+   uint16
+       weight:2,
+       pos:14;
+} WordEntryPos;
+#define MAXENTRYPOS    (1<<14)
+#define MAXNUMPOS  256
+#define LIMITPOS(x)    ( ( (x) >= MAXENTRYPOS ) ? (MAXENTRYPOS-1) : (x) )
+
+typedef struct
+{
+   int4        len;
+   int4        size;
+   char        data[1];
+}  tsvector;
+
+#define DATAHDRSIZE (sizeof(int4)*2)
+#define CALCDATASIZE(x, lenstr) ( x * sizeof(WordEntry) + DATAHDRSIZE + lenstr )
+#define ARRPTR(x)  ( (WordEntry*) ( (char*)x + DATAHDRSIZE ) )
+#define STRPTR(x)  ( (char*)x + DATAHDRSIZE + ( sizeof(WordEntry) * ((tsvector*)x)->size ) )
+#define STRSIZE(x) ( ((tsvector*)x)->len - DATAHDRSIZE - ( sizeof(WordEntry) * ((tsvector*)x)->size ) )
+#define _POSDATAPTR(x,e)   (STRPTR(x)+((WordEntry*)(e))->pos+SHORTALIGN(((WordEntry*)(e))->len))
+#define POSDATALEN(x,e) ( ( ((WordEntry*)(e))->haspos ) ? (*(uint16*)_POSDATAPTR(x,e)) : 0 ) 
+#define POSDATAPTR(x,e)    ( (WordEntryPos*)( _POSDATAPTR(x,e)+sizeof(uint16) ) )
+
+
+typedef struct {
+   WordEntry   entry;
+   WordEntryPos    *pos;
+}  WordEntryIN;
+
+typedef struct
+{
+   char       *prsbuf;
+   char       *word;
+   char       *curpos;
+   int4        len;
+   int4        state;
+   int4        alen;
+   WordEntryPos    *pos;
+   bool        oprisdelim;
+}  TI_IN_STATE;
+
+int4       gettoken_tsvector(TI_IN_STATE * state);
+
+#endif


diff --git a/contrib/tsearch2/tsvector_op.c b/contrib/tsearch2/tsvector_op.c

new file mode 100644 (file)

index 0000000..3f38014


--- /dev/null
+++ b/contrib/tsearch2/tsvector_op.c
@@ -0,0 +1,264 @@
+/*
+ * Operations for tsvector type
+ * Teodor Sigaev 
+ */
+#include "postgres.h"
+
+#include "access/gist.h"
+#include "access/itup.h"
+#include "utils/elog.h"
+#include "utils/palloc.h"
+#include "utils/builtins.h"
+#include "storage/bufpage.h"
+#include "executor/spi.h"
+#include "commands/trigger.h"
+#include "nodes/pg_list.h"
+#include "catalog/namespace.h"
+
+#include "utils/pg_locale.h"
+
+#include              /* tolower */
+#include "tsvector.h"
+#include "query.h"
+#include "ts_cfg.h"
+#include "common.h"
+
+PG_FUNCTION_INFO_V1(strip);
+Datum      strip(PG_FUNCTION_ARGS);
+
+PG_FUNCTION_INFO_V1(setweight);
+Datum      setweight(PG_FUNCTION_ARGS);
+
+PG_FUNCTION_INFO_V1(concat);
+Datum      concat(PG_FUNCTION_ARGS);
+
+Datum
+strip(PG_FUNCTION_ARGS)
+{
+   tsvector       *in = (tsvector *) PG_DETOAST_DATUM(PG_GETARG_DATUM(0));
+   tsvector    *out;
+   int i,len=0;
+   WordEntry *arrin=ARRPTR(in), *arrout;
+   char *cur;
+
+   for(i=0;isize;i++) 
+       len += SHORTALIGN( arrin[i].len );
+
+   len = CALCDATASIZE(in->size, len);
+   out=(tsvector*)palloc(len);
+   memset(out,0,len);
+   out->len=len;
+   out->size=in->size;
+   arrout=ARRPTR(out);
+   cur=STRPTR(out);
+   for(i=0;isize;i++) {
+       memcpy(cur, STRPTR(in)+arrin[i].pos, arrin[i].len);
+       arrout[i].haspos = 0;
+       arrout[i].len = arrin[i].len;
+       arrout[i].pos = cur - STRPTR(out);
+       cur += SHORTALIGN( arrout[i].len );
+   }
+       
+   PG_FREE_IF_COPY(in, 0);
+   PG_RETURN_POINTER(out);
+}
+
+Datum
+setweight(PG_FUNCTION_ARGS)
+{
+   tsvector       *in = (tsvector *) PG_DETOAST_DATUM(PG_GETARG_DATUM(0));
+   char       cw = PG_GETARG_CHAR(1);
+   tsvector    *out;
+   int i,j;
+   WordEntry *entry;
+   WordEntryPos *p;
+   int w=0;
+
+   switch(tolower(cw)) {
+       case 'a': w=3; break;
+       case 'b': w=2; break;
+       case 'c': w=1; break;
+       case 'd': w=0; break;
+       default: elog(ERROR,"Unknown weight");
+   }
+
+   out=(tsvector*)palloc(in->len);
+   memcpy(out,in,in->len);
+   entry=ARRPTR(out);
+   i=out->size;    
+   while(i--) {
+       if ( (j=POSDATALEN(out,entry)) != 0 ) {
+           p=POSDATAPTR(out,entry);
+           while(j--) {
+               p->weight=w;
+               p++;
+           }
+       }
+       entry++;
+   }
+       
+   PG_FREE_IF_COPY(in, 0);
+   PG_RETURN_POINTER(out);
+}
+
+static int
+compareEntry(char *ptra, WordEntry* a, char *ptrb, WordEntry* b)
+{
+        if ( a->len == b->len)
+        {
+                return strncmp(
+                                           ptra + a->pos,
+                                           ptrb + b->pos,
+                                           a->len);
+        }
+        return ( a->len > b->len ) ? 1 : -1;
+}
+
+static int4
+add_pos(tsvector *src, WordEntry *srcptr, tsvector *dest, WordEntry *destptr, int4 maxpos ) {
+   uint16 *clen = (uint16*)_POSDATAPTR(dest,destptr);
+   int i;
+   uint16 slen = POSDATALEN(src, srcptr), startlen;
+   WordEntryPos *spos=POSDATAPTR(src, srcptr), *dpos=POSDATAPTR(dest,destptr);
+
+   if ( ! destptr->haspos ) 
+       *clen=0;
+
+   startlen = *clen;
+   for(i=0; i
+       dpos[ *clen ].weight = spos[i].weight; 
+       dpos[ *clen ].pos    = LIMITPOS(spos[i].pos + maxpos);
+       (*clen)++;
+   }
+
+   if ( *clen != startlen )
+       destptr->haspos=1; 
+   return  *clen - startlen;
+}
+
+
+Datum
+concat(PG_FUNCTION_ARGS) {
+   tsvector       *in1 = (tsvector *) PG_DETOAST_DATUM(PG_GETARG_DATUM(0));
+   tsvector       *in2 = (tsvector *) PG_DETOAST_DATUM(PG_GETARG_DATUM(1));
+   tsvector       *out;
+   WordEntry *ptr;
+   WordEntry *ptr1,*ptr2;
+   WordEntryPos *p;
+   int maxpos=0,i,j,i1,i2;
+   char *cur;
+   char *data,*data1,*data2;
+
+   ptr=ARRPTR(in1);
+   i=in1->size;
+   while(i--) {
+       if ( (j=POSDATALEN(in1,ptr)) != 0 ) {
+           p=POSDATAPTR(in1,ptr);
+           while(j--) {
+               if ( p->pos > maxpos ) 
+                   maxpos = p->pos;
+               p++;
+           }
+       }
+       ptr++;
+   }
+   
+   ptr1=ARRPTR(in1); ptr2=ARRPTR(in2);
+   data1=STRPTR(in1); data2=STRPTR(in2);
+   i1=in1->size;   i2=in2->size;
+   out=(tsvector*)palloc( in1->len + in2->len );
+   memset(out,0,in1->len + in2->len);
+   out->len = in1->len + in2->len;
+   out->size = in1->size + in2->size;
+   data=cur=STRPTR(out);
+   ptr=ARRPTR(out);
+   while( i1 && i2 ) {
+       int cmp=compareEntry(data1,ptr1,data2,ptr2);
+       if ( cmp < 0 ) { /* in1 first */
+           ptr->haspos = ptr1->haspos;
+           ptr->len = ptr1->len;
+           memcpy( cur, data1 + ptr1->pos, ptr1->len );
+           ptr->pos = cur - data; 
+           cur+=SHORTALIGN(ptr1->len);
+           if ( ptr->haspos ) {
+               memcpy(cur, _POSDATAPTR(in1, ptr1), POSDATALEN(in1, ptr1)*sizeof(WordEntryPos) + sizeof(uint16));
+               cur+=POSDATALEN(in1, ptr1)*sizeof(WordEntryPos) + sizeof(uint16);
+           }
+           ptr++; ptr1++; i1--;
+       } else if ( cmp>0 ) { /* in2 first */ 
+           ptr->haspos = ptr2->haspos;
+           ptr->len = ptr2->len;
+           memcpy( cur, data2 + ptr2->pos, ptr2->len );
+           ptr->pos = cur - data; 
+           cur+=SHORTALIGN(ptr2->len);
+           if ( ptr->haspos ) {
+               int addlen = add_pos(in2, ptr2, out, ptr, maxpos );
+               if ( addlen == 0 )
+                   ptr->haspos=0;
+               else
+                   cur += addlen*sizeof(WordEntryPos) + sizeof(uint16); 
+           }
+           ptr++; ptr2++; i2--;
+       } else {
+           ptr->haspos = ptr1->haspos | ptr2->haspos;
+           ptr->len = ptr1->len;
+           memcpy( cur, data1 + ptr1->pos, ptr1->len );
+           ptr->pos = cur - data; 
+           cur+=SHORTALIGN(ptr1->len);
+           if ( ptr->haspos ) {
+               if ( ptr1->haspos ) {
+                   memcpy(cur, _POSDATAPTR(in1, ptr1), POSDATALEN(in1, ptr1)*sizeof(WordEntryPos) + sizeof(uint16));
+                   cur+=POSDATALEN(in1, ptr1)*sizeof(WordEntryPos) + sizeof(uint16);
+                   if ( ptr2->haspos )
+                       cur += add_pos(in2, ptr2, out, ptr, maxpos )*sizeof(WordEntryPos);
+               } else if ( ptr2->haspos ) {
+                   int addlen = add_pos(in2, ptr2, out, ptr, maxpos );
+                   if ( addlen == 0 )
+                       ptr->haspos=0;
+                   else
+                       cur += addlen*sizeof(WordEntryPos) + sizeof(uint16); 
+               }
+           }
+           ptr++; ptr1++; ptr2++; i1--; i2--;
+       }
+   }
+
+   while(i1) {
+       ptr->haspos = ptr1->haspos;
+       ptr->len = ptr1->len;
+       memcpy( cur, data1 + ptr1->pos, ptr1->len );
+       ptr->pos = cur - data; 
+       cur+=SHORTALIGN(ptr1->len);
+       if ( ptr->haspos ) {
+           memcpy(cur, _POSDATAPTR(in1, ptr1), POSDATALEN(in1, ptr1)*sizeof(WordEntryPos) + sizeof(uint16));
+           cur+=POSDATALEN(in1, ptr1)*sizeof(WordEntryPos) + sizeof(uint16);
+       }
+       ptr++; ptr1++; i1--;
+   }
+
+   while(i2) {
+       ptr->haspos = ptr2->haspos;
+       ptr->len = ptr2->len;
+       memcpy( cur, data2 + ptr2->pos, ptr2->len );
+       ptr->pos = cur - data; 
+       cur+=SHORTALIGN(ptr2->len);
+       if ( ptr->haspos ) {
+           int addlen = add_pos(in2, ptr2, out, ptr, maxpos );
+           if ( addlen == 0 )
+               ptr->haspos=0;
+           else
+               cur += addlen*sizeof(WordEntryPos) + sizeof(uint16); 
+       }
+       ptr++; ptr2++; i2--;
+   }
+   
+   out->size=ptr-ARRPTR(out);
+   out->len = CALCDATASIZE( out->size, cur-data );
+   if ( data != STRPTR(out) )
+       memmove( STRPTR(out), data, cur-data );
+
+   PG_FREE_IF_COPY(in1, 0);
+   PG_FREE_IF_COPY(in2, 1);
+   PG_RETURN_POINTER(out);
+}
+


diff --git a/contrib/tsearch2/untsearch.sql.in b/contrib/tsearch2/untsearch.sql.in

new file mode 100644 (file)

index 0000000..a4fe145


--- /dev/null
+++ b/contrib/tsearch2/untsearch.sql.in
@@ -0,0 +1,62 @@
+BEGIN;
+
+--Be careful !!!
+--script drops all indices, triggers and columns with types defined
+--in tsearch2.sql
+
+
+DROP OPERATOR CLASS gist_tsvector_ops USING gist CASCADE;
+
+
+DROP OPERATOR || (tsvector, tsvector);
+DROP OPERATOR @@ (tsvector, tsquery);
+DROP OPERATOR @@ (tsquery, tsvector);
+
+DROP AGGREGATE stat(tsvector);
+
+DROP TABLE pg_ts_dict;
+DROP TABLE pg_ts_parser;
+DROP TABLE pg_ts_cfg;
+DROP TABLE pg_ts_cfgmap;
+
+DROP TYPE tokentype CASCADE;
+DROP TYPE tokenout CASCADE;
+DROP TYPE tsvector CASCADE;
+DROP TYPE tsquery CASCADE;
+DROP TYPE gtsvector CASCADE;
+DROP TYPE tsstat CASCADE;
+DROP TYPE statinfo CASCADE;
+
+DROP FUNCTION lexize(oid, text) ;
+DROP FUNCTION lexize(text, text);
+DROP FUNCTION lexize(text);
+DROP FUNCTION set_curdict(int);
+DROP FUNCTION set_curdict(text);
+DROP FUNCTION dex_init(text);
+DROP FUNCTION dex_lexize(internal,internal,int4);
+DROP FUNCTION snb_en_init(text);
+DROP FUNCTION snb_lexize(internal,internal,int4);
+DROP FUNCTION snb_ru_init(text);
+DROP FUNCTION spell_init(text);
+DROP FUNCTION spell_lexize(internal,internal,int4);
+DROP FUNCTION syn_init(text);
+DROP FUNCTION syn_lexize(internal,internal,int4);
+DROP FUNCTION set_curprs(int);
+DROP FUNCTION set_curprs(text);
+DROP FUNCTION prsd_start(internal,int4);
+DROP FUNCTION prsd_getlexeme(internal,internal,internal);
+DROP FUNCTION prsd_end(internal);
+DROP FUNCTION prsd_lextype(internal);
+DROP FUNCTION prsd_headline(internal,internal,internal);
+DROP FUNCTION set_curcfg(int);
+DROP FUNCTION set_curcfg(text);
+DROP FUNCTION show_curcfg();
+DROP FUNCTION gtsvector_compress(internal);
+DROP FUNCTION gtsvector_decompress(internal);
+DROP FUNCTION gtsvector_penalty(internal,internal,internal);
+DROP FUNCTION gtsvector_picksplit(internal, internal);
+DROP FUNCTION gtsvector_union(bytea, internal);
+DROP FUNCTION reset_tsearch();
+DROP FUNCTION tsearch2() CASCADE;
+
+END;


diff --git a/contrib/tsearch2/wordparser/deflex.c b/contrib/tsearch2/wordparser/deflex.c

new file mode 100644 (file)

index 0000000..ea596c5


--- /dev/null
+++ b/contrib/tsearch2/wordparser/deflex.c
@@ -0,0 +1,56 @@
+#include "deflex.h"
+
+const char *lex_descr[]={
+   "",
+   "Latin word",
+   "Non-latin word",
+   "Word",
+   "Email",
+   "URL",
+   "Host",
+   "Scientific notation",
+   "VERSION",
+   "Part of hyphenated word",
+   "Non-latin part of hyphenated word",
+   "Latin part of hyphenated word",
+   "Space symbols",
+   "HTML Tag",
+   "HTTP head",
+   "Hyphenated word",
+   "Latin hyphenated word",
+   "Non-latin hyphenated word",
+   "URI",
+   "File or path name",
+   "Decimal notation",
+   "Signed integer",
+   "Unsigned integer",
+   "HTML Entity"
+};
+
+const char *tok_alias[]={
+   "",
+   "lword",
+   "nlword",
+   "word",
+   "email",
+   "url",
+   "host",
+   "sfloat",
+   "version",
+   "part_hword",
+   "nlpart_hword",
+   "lpart_hword",
+   "blank",
+   "tag",
+   "http",
+   "hword",
+   "lhword",
+   "nlhword",
+   "uri",
+   "file",
+   "float",
+   "int",
+   "uint",
+   "entity"
+};
+


diff --git a/contrib/tsearch2/wordparser/deflex.h b/contrib/tsearch2/wordparser/deflex.h

new file mode 100644 (file)

index 0000000..651d1f9


--- /dev/null
+++ b/contrib/tsearch2/wordparser/deflex.h
@@ -0,0 +1,34 @@
+#ifndef __DEFLEX_H__
+#define __DEFLEX_H__
+
+/* rememder !!!! */
+#define LASTNUM        23
+
+#define LATWORD        1
+#define CYRWORD        2
+#define UWORD      3
+#define EMAIL      4
+#define FURL       5
+#define HOST       6
+#define SCIENTIFIC 7
+#define VERSIONNUMBER  8
+#define PARTHYPHENWORD 9
+#define CYRPARTHYPHENWORD  10
+#define LATPARTHYPHENWORD  11
+#define SPACE      12
+#define TAG            13
+#define HTTP       14
+#define HYPHENWORD 15
+#define LATHYPHENWORD  16
+#define CYRHYPHENWORD  17
+#define URI        18
+#define FILEPATH   19
+#define DECIMAL        20
+#define SIGNEDINT  21
+#define UNSIGNEDINT 22
+#define HTMLENTITY 23
+
+extern const char *lex_descr[];
+extern const char *tok_alias[];
+
+#endif


diff --git a/contrib/tsearch2/wordparser/parser.h b/contrib/tsearch2/wordparser/parser.h

new file mode 100644 (file)

index 0000000..55cf005


--- /dev/null
+++ b/contrib/tsearch2/wordparser/parser.h
@@ -0,0 +1,11 @@
+#ifndef __PARSER_H__
+#define __PARSER_H__
+
+char      *token;
+int            tokenlen;
+int            tsearch2_yylex(void);
+void       start_parse_str(char *, int);
+void       start_parse_fh(FILE *, int);
+void       end_parse(void);
+
+#endif


diff --git a/contrib/tsearch2/wordparser/parser.l b/contrib/tsearch2/wordparser/parser.l

new file mode 100644 (file)

index 0000000..49824f5


--- /dev/null
+++ b/contrib/tsearch2/wordparser/parser.l
@@ -0,0 +1,346 @@
+%{
+#include "postgres.h"
+
+#include "deflex.h"
+#include "parser.h"
+#include "common.h"
+
+/* Avoid exit() on fatal scanner errors */
+#define fprintf(file, fmt, msg)  ts_error(ERROR, fmt, msg)
+
+/* postgres allocation function */
+#define free    pfree
+#define malloc  palloc
+#define realloc repalloc
+
+#ifdef strdup
+#undef strdup
+#endif
+#define strdup  pstrdup
+
+char *token = NULL;  /* pointer to token */
+char *s     = NULL;  /* to return WHOLE hyphenated-word */
+
+YY_BUFFER_STATE buf = NULL; /* buffer to parse; it need for parse from string */
+
+int lrlimit = -1;  /* for limiting read from filehandle ( -1 - unlimited read ) */
+int bytestoread = 0;   /* for limiting read from filehandle */
+
+/* redefine macro for read limited length */
+#define YY_INPUT(buf,result,max_size) \
+   if ( yy_current_buffer->yy_is_interactive ) { \
+                int c = '*', n; \
+                for ( n = 0; n < max_size && \
+                             (c = getc( tsearch2_yyin )) != EOF && c != '\n'; ++n ) \
+                        buf[n] = (char) c; \
+                if ( c == '\n' ) \
+                        buf[n++] = (char) c; \
+                if ( c == EOF && ferror( tsearch2_yyin ) ) \
+                        YY_FATAL_ERROR( "input in flex scanner failed" ); \
+                result = n; \
+        }  else { \
+       if ( lrlimit == 0 ) \
+           result=YY_NULL; \
+       else { \
+           if ( lrlimit>0 ) { \
+               bytestoread = ( lrlimit > max_size ) ? max_size : lrlimit; \
+               lrlimit -= bytestoread; \
+           } else \
+               bytestoread = max_size; \
+               if ( ((result = fread( buf, 1, bytestoread, tsearch2_yyin )) == 0) \
+                       && ferror( tsearch2_yyin ) ) \
+                       YY_FATAL_ERROR( "input in flex scanner failed" ); \
+       } \
+   }
+
+%}
+
+%option 8bit
+%option never-interactive
+%option nounput
+%option noyywrap
+
+/* parser's state for parsing hyphenated-word */
+%x DELIM  
+/* parser's state for parsing URL*/
+%x URL  
+%x SERVER  
+
+/* parser's state for parsing TAGS */
+%x INTAG
+%x QINTAG
+%x INCOMMENT
+%x INSCRIPT
+
+/* cyrillic koi8 char */
+CYRALNUM   [0-9\200-\377]
+CYRALPHA   [\200-\377]
+ALPHA      [a-zA-Z\200-\377]
+ALNUM      [0-9a-zA-Z\200-\377]
+
+
+HOSTNAME   ([-_[:alnum:]]+\.)+[[:alpha:]]+
+URI        [-_[:alnum:]/%,\.;=&?#]+
+
+%%
+
+"<"[Ss][Cc][Rr][Ii][Pp][Tt] { BEGIN INSCRIPT; }
+
+"" {
+   BEGIN INITIAL; 
+   *tsearch2_yytext=' '; *(tsearch2_yytext+1) = '\0'; 
+   token = tsearch2_yytext;
+   tokenlen = tsearch2_yyleng;
+   return SPACE;
+}
+
+""   { 
+   BEGIN INITIAL;
+   *tsearch2_yytext=' '; *(tsearch2_yytext+1) = '\0'; 
+   token = tsearch2_yytext;
+   tokenlen = tsearch2_yyleng;
+   return SPACE;
+}
+
+
+"<"[\![:alpha:]]   { BEGIN INTAG; }
+
+"
+
+"\""    { BEGIN QINTAG; }
+
+"\\\"" ;
+
+"\""   { BEGIN INTAG; }
+
+">" { 
+   BEGIN INITIAL;
+   token = tsearch2_yytext;
+   *tsearch2_yytext=' '; 
+   token = tsearch2_yytext;
+   tokenlen = 1;
+   return TAG;
+}
+
+.|\n  ;
+
+\&(quot|amp|nbsp|lt|gt)\;   {
+   token = tsearch2_yytext;
+   tokenlen = tsearch2_yyleng;
+   return HTMLENTITY;
+}
+
+\&\#[0-9][0-9]?[0-9]?\; {
+   token = tsearch2_yytext;
+   tokenlen = tsearch2_yyleng;
+   return HTMLENTITY;
+}
+ 
+[-_\.[:alnum:]]+@{HOSTNAME}  /* Emails */ { 
+   token = tsearch2_yytext; 
+   tokenlen = tsearch2_yyleng;
+   return EMAIL; 
+}
+
+[+-]?[0-9]+(\.[0-9]+)?[eEdD][+-]?[0-9]+  /* float */   { 
+   token = tsearch2_yytext; 
+   tokenlen = tsearch2_yyleng;
+   return SCIENTIFIC; 
+}
+
+[0-9]+\.[0-9]+\.[0-9\.]*[0-9] {
+   token = tsearch2_yytext;
+   tokenlen = tsearch2_yyleng;
+   return VERSIONNUMBER;
+}
+
+[+-]?[0-9]+\.[0-9]+ {
+   token = tsearch2_yytext;
+   tokenlen = tsearch2_yyleng;
+   return DECIMAL;
+}
+
+[+-][0-9]+ { 
+   token = tsearch2_yytext; 
+   tokenlen = tsearch2_yyleng;
+   return SIGNEDINT; 
+}
+
+[0-9]+ { 
+   token = tsearch2_yytext; 
+   tokenlen = tsearch2_yyleng;
+   return UNSIGNEDINT; 
+}
+
+http"://"        { 
+   BEGIN URL; 
+   token = tsearch2_yytext;
+   tokenlen = tsearch2_yyleng;
+   return HTTP;
+}
+
+ftp"://"        { 
+   BEGIN URL; 
+   token = tsearch2_yytext;
+   tokenlen = tsearch2_yyleng;
+   return HTTP;
+}
+
+{HOSTNAME}[/:]{URI} { 
+   BEGIN SERVER;
+   if (s) { free(s); s=NULL; } 
+   s = strdup( tsearch2_yytext ); 
+   tokenlen = tsearch2_yyleng;
+   yyless( 0 ); 
+   token = s;
+   return FURL;
+}
+
+{HOSTNAME} {
+   token = tsearch2_yytext; 
+   tokenlen = tsearch2_yyleng;
+   return HOST;
+}
+
+[/:]{URI}  {
+   token = tsearch2_yytext;
+   tokenlen = tsearch2_yyleng;
+   return URI;
+}
+
+[[:alnum:]\./_-]+"/"[[:alnum:]\./_-]+ {
+   token = tsearch2_yytext;
+   tokenlen = tsearch2_yyleng;
+   return FILEPATH;
+}
+
+({CYRALPHA}+-)+{CYRALPHA}+ /* composite-word */    {
+   BEGIN DELIM;
+   if (s) { free(s); s=NULL; } 
+   s = strdup( tsearch2_yytext );
+   tokenlen = tsearch2_yyleng;
+   yyless( 0 );
+   token = s;
+   return CYRHYPHENWORD;
+}
+
+([[:alpha:]]+-)+[[:alpha:]]+ /* composite-word */  {
+    BEGIN DELIM;
+   if (s) { free(s); s=NULL; } 
+   s = strdup( tsearch2_yytext );
+   tokenlen = tsearch2_yyleng;
+   yyless( 0 );
+   token = s;
+   return LATHYPHENWORD;
+}
+
+({ALNUM}+-)+{ALNUM}+ /* composite-word */  {
+   BEGIN DELIM;
+   if (s) { free(s); s=NULL; } 
+   s = strdup( tsearch2_yytext );
+   tokenlen = tsearch2_yyleng;
+   yyless( 0 );
+   token = s;
+   return HYPHENWORD;
+}
+
+[0-9]+\.[0-9]+\.[0-9\.]*[0-9] {
+   token = tsearch2_yytext;
+   tokenlen = tsearch2_yyleng;
+   return VERSIONNUMBER;
+}
+
+\+?[0-9]+\.[0-9]+ {
+   token = tsearch2_yytext;
+   tokenlen = tsearch2_yyleng;
+   return DECIMAL;
+}
+
+{CYRALPHA}+  /* one word in composite-word */   { 
+   token = tsearch2_yytext; 
+   tokenlen = tsearch2_yyleng;
+   return CYRPARTHYPHENWORD; 
+}
+
+[[:alpha:]]+  /* one word in composite-word */  { 
+   token = tsearch2_yytext; 
+   tokenlen = tsearch2_yyleng;
+   return LATPARTHYPHENWORD; 
+}
+
+{ALNUM}+  /* one word in composite-word */  { 
+   token = tsearch2_yytext; 
+   tokenlen = tsearch2_yyleng;
+   return PARTHYPHENWORD; 
+}
+
+-  { 
+   token = tsearch2_yytext;
+   tokenlen = tsearch2_yyleng;
+   return SPACE;
+}
+
+.|\n /* return in basic state */ {
+   BEGIN INITIAL;
+   yyless( 0 );
+}
+
+{CYRALPHA}+ /* normal word */  { 
+   token = tsearch2_yytext; 
+   tokenlen = tsearch2_yyleng;
+   return CYRWORD; 
+}
+
+[[:alpha:]]+ /* normal word */ { 
+   token = tsearch2_yytext; 
+   tokenlen = tsearch2_yyleng;
+   return LATWORD; 
+}
+
+{ALNUM}+ /* normal word */ { 
+   token = tsearch2_yytext; 
+   tokenlen = tsearch2_yyleng;
+   return UWORD; 
+}
+
+[ \r\n\t]+ {
+   token = tsearch2_yytext;
+   tokenlen = tsearch2_yyleng;
+   return SPACE;
+}
+
+. {
+   token = tsearch2_yytext;
+   tokenlen = tsearch2_yyleng;
+   return SPACE;
+} 
+
+%%
+
+/* clearing after parsing from string */
+void end_parse() {
+   if (s) { free(s); s=NULL; } 
+   tsearch2_yy_delete_buffer( buf );
+   buf = NULL;
+} 
+
+/* start parse from string */
+void start_parse_str(char* str, int limit) {
+   if (buf) end_parse();
+   buf = tsearch2_yy_scan_bytes( str, limit );
+   tsearch2_yy_switch_to_buffer( buf );
+   BEGIN INITIAL;
+}
+
+/* start parse from filehandle */
+void start_parse_fh( FILE* fh, int limit ) {
+   if (buf) end_parse();
+   lrlimit = ( limit ) ? limit : -1;
+   buf = tsearch2_yy_create_buffer( fh, YY_BUF_SIZE );
+   tsearch2_yy_switch_to_buffer( buf );
+   BEGIN INITIAL;
+}
+
+


diff --git a/contrib/tsearch2/wparser.c b/contrib/tsearch2/wparser.c

new file mode 100644 (file)

index 0000000..deff94c


--- /dev/null
+++ b/contrib/tsearch2/wparser.c
@@ -0,0 +1,529 @@
+/* 
+ * interface functions to parser 
+ * Teodor Sigaev 
+ */
+#include 
+#include 
+#include 
+#include 
+
+#include "postgres.h"
+#include "fmgr.h"
+#include "utils/array.h"
+#include "catalog/pg_type.h"
+#include "executor/spi.h"
+#include "funcapi.h"
+
+#include "wparser.h"
+#include "ts_cfg.h"
+#include "snmap.h"
+#include "common.h"
+
+/*********top interface**********/
+
+static void *plan_getparser=NULL;
+static Oid current_parser_id=InvalidOid;
+
+void
+init_prs(Oid id, WParserInfo *prs) {
+   Oid arg[1]={ OIDOID };
+   bool isnull;
+   Datum pars[1]={ ObjectIdGetDatum(id) };
+   int stat;
+
+   memset(prs,0,sizeof(WParserInfo));
+   SPI_connect();
+   if ( !plan_getparser ) {
+       plan_getparser = SPI_saveplan( SPI_prepare( "select prs_start, prs_nexttoken, prs_end, prs_lextype, prs_headline from pg_ts_parser where oid = $1" , 1, arg ) );
+       if ( !plan_getparser ) 
+           ts_error(ERROR, "SPI_prepare() failed");
+   }
+
+   stat = SPI_execp(plan_getparser, pars, " ", 1);
+   if ( stat < 0 )
+       ts_error (ERROR, "SPI_execp return %d", stat);
+   if ( SPI_processed > 0 ) {
+       Oid oid=InvalidOid;
+       oid=DatumGetObjectId( SPI_getbinval(SPI_tuptable->vals[0], SPI_tuptable->tupdesc, 1, &isnull) );
+       fmgr_info_cxt(oid, &(prs->start_info), TopMemoryContext);
+       oid=DatumGetObjectId( SPI_getbinval(SPI_tuptable->vals[0], SPI_tuptable->tupdesc, 2, &isnull) );
+       fmgr_info_cxt(oid, &(prs->getlexeme_info), TopMemoryContext);
+       oid=DatumGetObjectId( SPI_getbinval(SPI_tuptable->vals[0], SPI_tuptable->tupdesc, 3, &isnull) );
+       fmgr_info_cxt(oid, &(prs->end_info), TopMemoryContext);
+       prs->lextype=DatumGetObjectId( SPI_getbinval(SPI_tuptable->vals[0], SPI_tuptable->tupdesc, 4, &isnull) );
+       oid=DatumGetObjectId( SPI_getbinval(SPI_tuptable->vals[0], SPI_tuptable->tupdesc, 5, &isnull) );
+       fmgr_info_cxt(oid, &(prs->headline_info), TopMemoryContext);
+       prs->prs_id=id;
+   } else 
+       ts_error(ERROR, "No parser with id %d", id);
+   SPI_finish();
+}
+
+typedef struct {
+   WParserInfo *last_prs;
+   int     len;
+   int     reallen;
+   WParserInfo *list;
+   SNMap       name2id_map;
+} PrsList;
+
+static PrsList PList = {NULL,0,0,NULL,{0,0,NULL}};
+
+void    
+reset_prs(void) {
+   freeSNMap( &(PList.name2id_map) );
+   if ( PList.list )
+       free(PList.list);
+   memset(&PList,0,sizeof(PrsList));
+}
+
+static int
+compareprs(const void *a, const void *b) {
+   return ((WParserInfo*)a)->prs_id - ((WParserInfo*)b)->prs_id;
+}
+
+WParserInfo *
+findprs(Oid id) {
+   /* last used prs */
+   if ( PList.last_prs && PList.last_prs->prs_id==id )
+       return PList.last_prs;
+
+   /* already used prs */
+   if ( PList.len != 0 ) {
+       WParserInfo key;
+       key.prs_id=id;
+       PList.last_prs = bsearch(&key, PList.list, PList.len, sizeof(WParserInfo), compareprs);
+       if ( PList.last_prs != NULL )
+           return PList.last_prs;
+   }
+
+   /* last chance */
+   if ( PList.len==PList.reallen ) {
+       WParserInfo *tmp;
+       int reallen = ( PList.reallen ) ? 2*PList.reallen : 16;
+       tmp=(WParserInfo*)realloc(PList.list,sizeof(WParserInfo)*reallen);
+       if ( !tmp ) 
+           ts_error(ERROR,"No memory");
+       PList.reallen=reallen;
+       PList.list=tmp;
+   }
+   PList.last_prs=&(PList.list[PList.len]);
+   init_prs(id, PList.last_prs);
+   PList.len++;
+   qsort(PList.list, PList.len, sizeof(WParserInfo), compareprs);
+   return findprs(id); /* qsort changed order!! */;
+}
+
+static void *plan_name2id=NULL;
+
+Oid
+name2id_prs(text *name) {
+   Oid arg[1]={ TEXTOID };
+   bool isnull;
+   Datum pars[1]={ PointerGetDatum(name) };
+   int stat;
+   Oid id=findSNMap_t( &(PList.name2id_map), name );
+   
+   if ( id ) 
+       return id;
+   
+
+   SPI_connect();
+   if ( !plan_name2id ) {
+       plan_name2id = SPI_saveplan( SPI_prepare( "select oid from pg_ts_parser where prs_name = $1" , 1, arg ) );
+       if ( !plan_name2id ) 
+           ts_error(ERROR, "SPI_prepare() failed");
+   }
+
+   stat = SPI_execp(plan_name2id, pars, " ", 1);
+   if ( stat < 0 )
+       ts_error (ERROR, "SPI_execp return %d", stat);
+   if ( SPI_processed > 0 )
+       id=DatumGetObjectId( SPI_getbinval(SPI_tuptable->vals[0], SPI_tuptable->tupdesc, 1, &isnull) );
+   else 
+       ts_error(ERROR, "No parser '%s'", text2char(name));
+   SPI_finish();
+   addSNMap_t( &(PList.name2id_map), name, id );
+   return id;
+}
+
+
+/******sql-level interface******/
+typedef struct {
+   int     cur;
+   LexDescr    *list;
+} TypeStorage;
+
+static void
+setup_firstcall(FuncCallContext  *funcctx, Oid prsid) {
+   TupleDesc            tupdesc;
+   MemoryContext     oldcontext;
+   TypeStorage     *st;
+   WParserInfo *prs = findprs(prsid); 
+
+   oldcontext = MemoryContextSwitchTo(funcctx->multi_call_memory_ctx);
+
+   st=(TypeStorage*)palloc( sizeof(TypeStorage) );
+   st->cur=0;
+   st->list = (LexDescr*)DatumGetPointer(
+       OidFunctionCall1( prs->lextype, PointerGetDatum(prs->prs) )
+   );
+   funcctx->user_fctx = (void*)st;
+   tupdesc = RelationNameGetTupleDesc("tokentype");
+   funcctx->slot = TupleDescGetSlot(tupdesc);
+   funcctx->attinmeta = TupleDescGetAttInMetadata(tupdesc);
+   MemoryContextSwitchTo(oldcontext);
+}
+
+static Datum
+process_call(FuncCallContext  *funcctx) {
+   TypeStorage     *st;
+
+   st=(TypeStorage*)funcctx->user_fctx;
+   if (  st->list && st->list[st->cur].lexid ) {
+       Datum result;
+       char* values[3];
+       char    txtid[16];
+       HeapTuple    tuple;
+
+       values[0]=txtid;
+       sprintf(txtid,"%d",st->list[st->cur].lexid);
+       values[1]=st->list[st->cur].alias;
+       values[2]=st->list[st->cur].descr;
+
+       tuple = BuildTupleFromCStrings(funcctx->attinmeta, values);
+       result = TupleGetDatum(funcctx->slot, tuple);
+
+       pfree(values[1]);
+       pfree(values[2]);
+       st->cur++;
+       return result;
+   } else {
+       if ( st->list ) pfree(st->list);
+       pfree(st);
+   }
+   return (Datum)0;
+}
+
+PG_FUNCTION_INFO_V1(token_type);
+Datum token_type(PG_FUNCTION_ARGS);
+
+Datum
+token_type(PG_FUNCTION_ARGS) {
+   FuncCallContext  *funcctx;
+   Datum result;
+
+   if (SRF_IS_FIRSTCALL()) { 
+       funcctx = SRF_FIRSTCALL_INIT();
+       setup_firstcall(funcctx, PG_GETARG_OID(0) );
+   }
+
+   funcctx = SRF_PERCALL_SETUP();
+
+   if (  (result=process_call(funcctx)) != (Datum)0 )
+       SRF_RETURN_NEXT(funcctx, result);
+   SRF_RETURN_DONE(funcctx);
+}
+
+PG_FUNCTION_INFO_V1(token_type_byname);
+Datum token_type_byname(PG_FUNCTION_ARGS);
+Datum
+token_type_byname(PG_FUNCTION_ARGS) {
+   FuncCallContext  *funcctx;
+   Datum result;
+
+   if (SRF_IS_FIRSTCALL()) {
+       text *name = PG_GETARG_TEXT_P(0); 
+       funcctx = SRF_FIRSTCALL_INIT();
+       setup_firstcall(funcctx, name2id_prs( name ) );
+       PG_FREE_IF_COPY(name,0);
+   }
+
+   funcctx = SRF_PERCALL_SETUP();
+
+   if (  (result=process_call(funcctx)) != (Datum)0 )
+       SRF_RETURN_NEXT(funcctx, result);
+   SRF_RETURN_DONE(funcctx);
+}
+
+PG_FUNCTION_INFO_V1(token_type_current);
+Datum token_type_current(PG_FUNCTION_ARGS);
+Datum
+token_type_current(PG_FUNCTION_ARGS) {
+   FuncCallContext  *funcctx;
+   Datum result;
+
+   if (SRF_IS_FIRSTCALL()) {
+       funcctx = SRF_FIRSTCALL_INIT();
+       if ( current_parser_id==InvalidOid ) 
+           current_parser_id = name2id_prs( char2text("default") );
+       setup_firstcall(funcctx, current_parser_id );
+   }
+
+   funcctx = SRF_PERCALL_SETUP();
+
+   if (  (result=process_call(funcctx)) != (Datum)0 )
+       SRF_RETURN_NEXT(funcctx, result);
+   SRF_RETURN_DONE(funcctx);
+}
+
+
+PG_FUNCTION_INFO_V1(set_curprs);
+Datum set_curprs(PG_FUNCTION_ARGS);
+Datum
+set_curprs(PG_FUNCTION_ARGS) {
+        findprs(PG_GETARG_OID(0));
+        current_parser_id=PG_GETARG_OID(0);
+        PG_RETURN_VOID();
+}
+
+PG_FUNCTION_INFO_V1(set_curprs_byname);
+Datum set_curprs_byname(PG_FUNCTION_ARGS);
+Datum
+set_curprs_byname(PG_FUNCTION_ARGS) {
+        text *name=PG_GETARG_TEXT_P(0);
+    
+        DirectFunctionCall1(
+                set_curprs,
+                ObjectIdGetDatum( name2id_prs(name) )
+        );
+        PG_FREE_IF_COPY(name, 0);
+        PG_RETURN_VOID();
+}
+
+typedef struct {
+   int type;
+   char    *lexem;
+} LexemEntry;
+
+typedef struct {
+   int cur;
+   int len;
+   LexemEntry  *list;
+} PrsStorage;
+   
+
+static void
+prs_setup_firstcall(FuncCallContext  *funcctx, int prsid, text *txt) {
+   TupleDesc            tupdesc;
+   MemoryContext     oldcontext;
+   PrsStorage  *st;
+   WParserInfo *prs = findprs(prsid); 
+   char    *lex=NULL;
+   int     llen=0, type=0; 
+
+   oldcontext = MemoryContextSwitchTo(funcctx->multi_call_memory_ctx);
+
+   st=(PrsStorage*)palloc( sizeof(PrsStorage) );
+   st->cur=0;
+   st->len=16;
+   st->list=(LexemEntry*)palloc( sizeof(LexemEntry)*st->len );
+
+   prs->prs = (void*)DatumGetPointer(
+       FunctionCall2(
+           &(prs->start_info),
+           PointerGetDatum(VARDATA(txt)),
+           Int32GetDatum(VARSIZE(txt)-VARHDRSZ)
+       )
+   );
+
+   while( ( type=DatumGetInt32(FunctionCall3(
+           &(prs->getlexeme_info),
+           PointerGetDatum(prs->prs),
+           PointerGetDatum(&lex),
+           PointerGetDatum(&llen))) ) != 0 ) {
+
+       if ( st->cur>=st->len ) {
+           st->len=2*st->len;
+           st->list=(LexemEntry*)repalloc(st->list, sizeof(LexemEntry)*st->len);
+       }
+       st->list[st->cur].lexem = palloc(llen+1);
+       memcpy( st->list[st->cur].lexem, lex, llen);
+       st->list[st->cur].lexem[llen]='\0';
+       st->list[st->cur].type=type;
+       st->cur++;
+   }
+       
+   FunctionCall1(
+       &(prs->end_info),
+       PointerGetDatum(prs->prs)
+   );
+
+   st->len=st->cur;
+   st->cur=0;
+   
+   funcctx->user_fctx = (void*)st;
+   tupdesc = RelationNameGetTupleDesc("tokenout");
+   funcctx->slot = TupleDescGetSlot(tupdesc);
+   funcctx->attinmeta = TupleDescGetAttInMetadata(tupdesc);
+   MemoryContextSwitchTo(oldcontext);
+}
+
+static Datum
+prs_process_call(FuncCallContext  *funcctx) {
+   PrsStorage  *st;
+
+   st=(PrsStorage*)funcctx->user_fctx;
+   if (  st->cur < st->len ) {
+       Datum result;
+       char* values[2];
+       char    tid[16];
+       HeapTuple    tuple;
+
+       values[0]=tid;
+       sprintf(tid,"%d",st->list[st->cur].type);
+       values[1]=st->list[st->cur].lexem;
+       tuple = BuildTupleFromCStrings(funcctx->attinmeta, values);
+       result = TupleGetDatum(funcctx->slot, tuple);
+
+       pfree(values[1]);
+       st->cur++;
+       return result;
+   } else {
+       if ( st->list ) pfree(st->list);
+       pfree(st);
+   }
+   return (Datum)0;
+}
+
+           
+
+PG_FUNCTION_INFO_V1(parse);
+Datum parse(PG_FUNCTION_ARGS);
+Datum
+parse(PG_FUNCTION_ARGS) {
+   FuncCallContext  *funcctx;
+   Datum result;
+
+   if (SRF_IS_FIRSTCALL()) {
+       text *txt = PG_GETARG_TEXT_P(1); 
+       funcctx = SRF_FIRSTCALL_INIT();
+       prs_setup_firstcall(funcctx, PG_GETARG_OID(0),txt );
+       PG_FREE_IF_COPY(txt,1);
+   }
+
+   funcctx = SRF_PERCALL_SETUP();
+
+   if (  (result=prs_process_call(funcctx)) != (Datum)0 )
+       SRF_RETURN_NEXT(funcctx, result);
+   SRF_RETURN_DONE(funcctx);
+}
+
+PG_FUNCTION_INFO_V1(parse_byname);
+Datum parse_byname(PG_FUNCTION_ARGS);
+Datum
+parse_byname(PG_FUNCTION_ARGS) {
+   FuncCallContext  *funcctx;
+   Datum result;
+
+   if (SRF_IS_FIRSTCALL()) {
+       text *name = PG_GETARG_TEXT_P(0); 
+       text *txt = PG_GETARG_TEXT_P(1); 
+       funcctx = SRF_FIRSTCALL_INIT();
+       prs_setup_firstcall(funcctx, name2id_prs( name ),txt );
+       PG_FREE_IF_COPY(name,0);
+       PG_FREE_IF_COPY(txt,1);
+   }
+
+   funcctx = SRF_PERCALL_SETUP();
+
+   if (  (result=prs_process_call(funcctx)) != (Datum)0 )
+       SRF_RETURN_NEXT(funcctx, result);
+   SRF_RETURN_DONE(funcctx);
+}
+
+
+PG_FUNCTION_INFO_V1(parse_current);
+Datum parse_current(PG_FUNCTION_ARGS);
+Datum
+parse_current(PG_FUNCTION_ARGS) {
+   FuncCallContext  *funcctx;
+   Datum result;
+
+   if (SRF_IS_FIRSTCALL()) {
+       text *txt = PG_GETARG_TEXT_P(0); 
+       funcctx = SRF_FIRSTCALL_INIT();
+       if ( current_parser_id==InvalidOid ) 
+           current_parser_id = name2id_prs( char2text("default") );
+       prs_setup_firstcall(funcctx, current_parser_id,txt );
+       PG_FREE_IF_COPY(txt,0);
+   }
+
+   funcctx = SRF_PERCALL_SETUP();
+
+   if (  (result=prs_process_call(funcctx)) != (Datum)0 )
+       SRF_RETURN_NEXT(funcctx, result);
+   SRF_RETURN_DONE(funcctx);
+}
+
+PG_FUNCTION_INFO_V1(headline);
+Datum headline(PG_FUNCTION_ARGS);
+Datum
+headline(PG_FUNCTION_ARGS) {
+   TSCfgInfo *cfg=findcfg(PG_GETARG_OID(0));
+   text       *in = PG_GETARG_TEXT_P(1);
+   QUERYTYPE  *query = (QUERYTYPE *) DatumGetPointer(PG_DETOAST_DATUM(PG_GETARG_DATUM(2)));
+   text       *opt=( PG_NARGS()>3 && PG_GETARG_POINTER(3) ) ? PG_GETARG_TEXT_P(3) : NULL;
+   HLPRSTEXT   prs;
+   text *out;
+   WParserInfo *prsobj = findprs(cfg->prs_id);
+
+   memset(&prs,0,sizeof(HLPRSTEXT));
+   prs.lenwords = 32;
+   prs.words = (HLWORD *) palloc(sizeof(HLWORD) * prs.lenwords);
+   hlparsetext(cfg, &prs, query, VARDATA(in), VARSIZE(in) - VARHDRSZ);
+
+
+   FunctionCall3(
+       &(prsobj->headline_info),
+       PointerGetDatum(&prs),
+       PointerGetDatum(opt),
+       PointerGetDatum(query)
+   );
+
+   out = genhl(&prs);
+
+   PG_FREE_IF_COPY(in,1);
+   PG_FREE_IF_COPY(query,2);
+   if ( opt ) PG_FREE_IF_COPY(opt,3);
+   pfree(prs.words);
+   pfree(prs.startsel);
+   pfree(prs.stopsel);
+
+   PG_RETURN_POINTER(out);
+}
+
+
+PG_FUNCTION_INFO_V1(headline_byname);
+Datum headline_byname(PG_FUNCTION_ARGS);
+Datum
+headline_byname(PG_FUNCTION_ARGS) {
+   text *cfg=PG_GETARG_TEXT_P(0);
+
+   Datum out=DirectFunctionCall4(
+       headline,
+       ObjectIdGetDatum(name2id_cfg( cfg ) ),
+       PG_GETARG_DATUM(1),
+       PG_GETARG_DATUM(2),
+       ( PG_NARGS()>3 ) ? PG_GETARG_DATUM(3) : PointerGetDatum(NULL)
+   );
+
+   PG_FREE_IF_COPY(cfg,0);
+   PG_RETURN_DATUM(out);   
+}
+
+PG_FUNCTION_INFO_V1(headline_current);
+Datum headline_current(PG_FUNCTION_ARGS);
+Datum
+headline_current(PG_FUNCTION_ARGS) {
+   PG_RETURN_DATUM(DirectFunctionCall4(
+       headline,
+       ObjectIdGetDatum(get_currcfg()),
+       PG_GETARG_DATUM(0),
+       PG_GETARG_DATUM(1),
+       ( PG_NARGS()>2 ) ? PG_GETARG_DATUM(2) : PointerGetDatum(NULL)
+   ));
+}
+
+
+


diff --git a/contrib/tsearch2/wparser.h b/contrib/tsearch2/wparser.h

new file mode 100644 (file)

index 0000000..a8afc56


--- /dev/null
+++ b/contrib/tsearch2/wparser.h
@@ -0,0 +1,28 @@
+#ifndef __WPARSER_H__
+#define __WPARSER_H__
+#include "postgres.h"
+#include "fmgr.h"
+
+typedef struct {
+   Oid prs_id;
+   FmgrInfo start_info;
+   FmgrInfo getlexeme_info;
+   FmgrInfo end_info;
+   FmgrInfo headline_info;
+   Oid lextype;
+   void *prs;
+} WParserInfo;
+
+void init_prs(Oid id, WParserInfo *prs);
+WParserInfo* findprs(Oid id);
+Oid name2id_prs(text *name);
+void   reset_prs(void);
+
+
+typedef struct {
+   int lexid;
+   char    *alias;
+   char    *descr;
+} LexDescr;
+
+#endif


diff --git a/contrib/tsearch2/wparser_def.c b/contrib/tsearch2/wparser_def.c

new file mode 100644 (file)

index 0000000..eec8b03


--- /dev/null
+++ b/contrib/tsearch2/wparser_def.c
@@ -0,0 +1,291 @@
+/* 
+ * default word parser 
+ * Teodor Sigaev 
+ */
+#include 
+#include 
+#include 
+
+#include "postgres.h"
+#include "utils/builtins.h"
+
+#include "dict.h"
+#include "wparser.h"
+#include "common.h"
+#include "ts_cfg.h"
+#include "wordparser/parser.h"
+#include "wordparser/deflex.h"
+
+PG_FUNCTION_INFO_V1(prsd_lextype);
+Datum prsd_lextype(PG_FUNCTION_ARGS);
+
+Datum 
+prsd_lextype(PG_FUNCTION_ARGS) {
+   LexDescr *descr=(LexDescr*)palloc(sizeof(LexDescr)*(LASTNUM+1));
+   int i;
+
+   for(i=1;i<=LASTNUM;i++) {
+       descr[i-1].lexid = i;
+       descr[i-1].alias = pstrdup(tok_alias[i]);
+       descr[i-1].descr = pstrdup(lex_descr[i]);
+   }
+   
+   descr[LASTNUM].lexid=0;
+       
+   PG_RETURN_POINTER(descr);
+}
+
+PG_FUNCTION_INFO_V1(prsd_start);
+Datum prsd_start(PG_FUNCTION_ARGS);
+Datum 
+prsd_start(PG_FUNCTION_ARGS) {
+   start_parse_str( (char*)PG_GETARG_POINTER(0), PG_GETARG_INT32(1) );
+   PG_RETURN_POINTER(NULL);
+}
+
+PG_FUNCTION_INFO_V1(prsd_getlexeme);
+Datum prsd_getlexeme(PG_FUNCTION_ARGS);
+Datum 
+prsd_getlexeme(PG_FUNCTION_ARGS) {
+   /* ParserState *p=(ParserState*)PG_GETARG_POINTER(0); */
+   char **t=(char**)PG_GETARG_POINTER(1); 
+   int *tlen=(int*)PG_GETARG_POINTER(2);
+   int  type=tsearch2_yylex();
+
+   *t = token;
+   *tlen = tokenlen;
+   PG_RETURN_INT32(type);
+}
+
+PG_FUNCTION_INFO_V1(prsd_end);
+Datum prsd_end(PG_FUNCTION_ARGS);
+Datum 
+prsd_end(PG_FUNCTION_ARGS) {
+   /* ParserState *p=(ParserState*)PG_GETARG_POINTER(0); */
+   end_parse();
+   PG_RETURN_VOID();
+}
+
+#define LEAVETOKEN(x)  ( (x)==12 )
+#define COMPLEXTOKEN(x)    ( (x)==5 || (x)==15 || (x)==16 || (x)==17 )
+#define ENDPUNCTOKEN(x)    ( (x)==12 )
+
+
+#define IDIGNORE(x) ( (x)==13 || (x)==14 || (x)==12 || (x)==23 )
+#define HLIDIGNORE(x) ( (x)==5 || (x)==13 || (x)==15 || (x)==16 || (x)==17 )
+#define NONWORDTOKEN(x)    ( (x)==12 || HLIDIGNORE(x) )
+#define NOENDTOKEN(x)  ( NONWORDTOKEN(x) || (x)==7 || (x)==8 || (x)==20 || (x)==21 || (x)==22 || IDIGNORE(x) )
+
+typedef struct {
+   HLWORD  *words;
+   int len;
+} hlCheck;
+
+static bool
+checkcondition_HL(void *checkval, ITEM *val) {
+   int i;
+   for(i=0;i<((hlCheck*)checkval)->len;i++) {
+       if ( ((hlCheck*)checkval)->words[i].item==val )
+           return true;
+   }
+   return false;
+}
+
+
+static bool
+hlCover(HLPRSTEXT *prs, QUERYTYPE *query, int *p, int *q) {
+   int i,j;
+   ITEM    *item=GETQUERY(query);
+   int pos=*p;
+   *q=0;
+   *p=0x7fffffff;
+
+   for(j=0;jsize;j++) {
+       if ( item->type != VAL ) {
+           item++;
+           continue;
+       }
+       for(i=pos;icurwords;i++) {
+           if ( prs->words[i].item == item ) {
+               if ( i>*q) 
+                   *q = i;
+               break;
+           }
+       }
+       item++;
+   }
+
+   if ( *q==0 )
+       return false;
+
+   item=GETQUERY(query);
+   for(j=0;jsize;j++) {
+       if ( item->type != VAL ) {
+           item++;
+           continue;
+       }
+       for(i=*q;i>=pos;i--) {
+           if ( prs->words[i].item == item ) {
+               if ( i<*p )
+                   *p=i;
+               break;
+           }
+       }
+       item++;
+   }   
+
+   if ( *p<=*q ) {
+       hlCheck ch={ &(prs->words[*p]), *q-*p+1 };
+       if ( TS_execute(GETQUERY(query), &ch, false, checkcondition_HL) ) { 
+           return true;
+       } else {
+           (*p)++;
+           return hlCover(prs,query,p,q);
+       }
+   }
+
+   return false;
+}
+
+PG_FUNCTION_INFO_V1(prsd_headline);
+Datum prsd_headline(PG_FUNCTION_ARGS);
+Datum 
+prsd_headline(PG_FUNCTION_ARGS) {
+   HLPRSTEXT   *prs=(HLPRSTEXT*)PG_GETARG_POINTER(0);
+   text    *opt=(text*)PG_GETARG_POINTER(1); /* can't be toasted */
+   QUERYTYPE   *query=(QUERYTYPE*)PG_GETARG_POINTER(2); /* can't be toasted */
+   /* from opt + start and and tag */
+   int min_words=15;   
+   int max_words=35;   
+   int shortword=3;    
+
+   int p=0,q=0;
+   int bestb=-1,beste=-1;
+   int bestlen=-1;
+   int pose=0, poslen, curlen;
+
+   int i;
+
+   /*config*/
+   prs->startsel=NULL;
+   prs->stopsel=NULL;
+   if ( opt ) {
+       Map *map,*mptr;
+       
+       parse_cfgdict(opt,&map);
+       mptr=map;
+
+       while(mptr && mptr->key) {
+           if ( strcasecmp(mptr->key,"MaxWords")==0 )
+               max_words=pg_atoi(mptr->value,4,1);
+           else if ( strcasecmp(mptr->key,"MinWords")==0 )
+               min_words=pg_atoi(mptr->value,4,1);
+           else if ( strcasecmp(mptr->key,"ShortWord")==0 )
+               shortword=pg_atoi(mptr->value,4,1);
+           else if ( strcasecmp(mptr->key,"StartSel")==0 )
+               prs->startsel=pstrdup(mptr->value);
+           else if ( strcasecmp(mptr->key,"StopSel")==0 )
+               prs->stopsel=pstrdup(mptr->value);
+               
+           pfree(mptr->key);
+           pfree(mptr->value);
+
+           mptr++;
+       }
+       pfree(map);
+
+       if ( min_words >= max_words )
+           elog(ERROR,"Must be MinWords < MaxWords");
+       if ( min_words<=0 )
+           elog(ERROR,"Must be MinWords > 0");
+       if ( shortword<0 )
+           elog(ERROR,"Must be ShortWord >= 0");
+   }
+
+   while( hlCover(prs,query,&p,&q) ) {
+       /* find cover len in words */
+       curlen=0;
+       poslen=0;
+       for(i=p;i<=q && curlen < max_words ; i++) {
+           if ( !NONWORDTOKEN(prs->words[i].type) ) 
+               curlen++;
+           if ( prs->words[i].item && !prs->words[i].repeated )
+               poslen++; 
+           pose=i;
+       }
+
+       if ( poslenwords[beste].type) || prs->words[beste].len <= shortword) ) { 
+           /* best already finded, so try one more cover */
+           p++;
+           continue;
+       }
+
+       if ( curlen < max_words ) { /* find good end */
+           for(i=i-1 ;icurwords && curlen
+               if ( i!=q ) {
+                   if ( !NONWORDTOKEN(prs->words[i].type) ) 
+                       curlen++;
+                   if ( prs->words[i].item && !prs->words[i].repeated )
+                       poslen++;
+               }
+               pose=i;
+               if ( NOENDTOKEN(prs->words[i].type) || prs->words[i].len <= shortword ) 
+                   continue;
+               if ( curlen>=min_words )    
+                   break;
+           }
+       } else { /* shorter cover :((( */
+           for(;curlen>min_words;i--) {
+               if ( !NONWORDTOKEN(prs->words[i].type) ) 
+                   curlen--;
+               if ( prs->words[i].item && !prs->words[i].repeated )
+                   poslen--;
+               pose=i;
+               if ( NOENDTOKEN(prs->words[i].type) || prs->words[i].len <= shortword ) 
+                   continue;
+               break;
+           }
+       }
+
+       if ( bestlen <0 || (poslen>bestlen && !(NOENDTOKEN(prs->words[pose].type) || prs->words[pose].len <= shortword)) || 
+               ( bestlen>=0 && !(NOENDTOKEN(prs->words[pose].type)  || prs->words[pose].len <= shortword) && 
+                   (NOENDTOKEN(prs->words[beste].type) || prs->words[beste].len <= shortword) ) ) {
+           bestb=p; beste=pose;
+           bestlen=poslen;
+       } 
+
+       p++;
+   }
+
+   if ( bestlen<0 ) {
+       curlen=0;
+       poslen=0;
+       for(i=0;icurwords && curlen
+           if ( !NONWORDTOKEN(prs->words[i].type) ) 
+               curlen++;
+           pose=i;
+       }
+       bestb=0; beste=pose;
+   }
+
+   for(i=bestb;i<=beste;i++) {
+       if ( prs->words[i].item )
+           prs->words[i].selected=1;
+       if ( prs->words[i].repeated )
+           prs->words[i].skip=1;
+       if ( HLIDIGNORE(prs->words[i].type) )
+           prs->words[i].replace=1;
+
+       prs->words[i].in=1;
+   }
+
+   if (!prs->startsel)
+       prs->startsel=pstrdup("");

+   if (!prs->stopsel)
+       prs->stopsel=pstrdup("");
+        prs->startsellen=strlen(prs->startsel);
+   prs->stopsellen=strlen(prs->stopsel);
+
+   PG_RETURN_POINTER(prs);
+}
+




This is the main PostgreSQL git repository.
RSS
Atom
+           if ( res < 0 )
+               res = wpos( &(post[j]) );
+           else
+               res = 1.0 - ( 1.0-res ) * ( 1.0-wpos( &(post[j]) ) );
+       }
+   }
+   return res;
+}
+
+static float
+calc_rank(float *w, tsvector *t, QUERYTYPE *q, int4 method) {
+   ITEM *item = GETQUERY(q);
+   float res=0.0;
+
+   if (!t->size || !q->size)
+       return 0.0;
+
+   res = ( item->type != VAL && item->val == (int4) '&' ) ?
+       calc_rank_and(w,t,q) : calc_rank_or(w,t,q);
+
+   if ( res < 0 )
+       res = 1e-20;
+
+        switch(method) {
+       case 0: break;
+       case 1: res /= log((float)cnt_length(t)); break;
+       case 2: res /= (float)cnt_length(t); break;
+       default:
+       elog(ERROR,"Unknown normalization method: %d",method);
+        }
+
+   return res;
+}
+
+Datum
+rank(PG_FUNCTION_ARGS) {
+   ArrayType  *win = (ArrayType *) PG_DETOAST_DATUM(PG_GETARG_DATUM(0));
+   tsvector       *txt = (tsvector *) PG_DETOAST_DATUM(PG_GETARG_DATUM(1));
+   QUERYTYPE  *query = (QUERYTYPE *) PG_DETOAST_DATUM(PG_GETARG_DATUM(2));
+   int method=DEF_NORM_METHOD;
+   float res=0.0;
+   float ws[ lengthof(weights) ];
+   int i;
+
+   if ( ARR_NDIM(win) != 1 ) 
+       elog(ERROR,"Array of weight is not one dimentional");
+   if ( ARRNELEMS(win) < lengthof(weights) )
+        elog(ERROR,"Array of weight is too short");
+
+   for(i=0;i
+       ws[ i ] = ( ((float4*)ARR_DATA_PTR(win))[i] >= 0 ) ? ((float4*)ARR_DATA_PTR(win))[i] : weights[i];
+       if ( ws[ i ] > 1.0 ) 
+           elog(ERROR,"Weight out of range");
+   } 
+
+   if ( PG_NARGS() == 4 )
+       method=PG_GETARG_INT32(3);
+
+   res=calc_rank(ws, txt, query, method); 
+       
+   PG_FREE_IF_COPY(win, 0);
+   PG_FREE_IF_COPY(txt, 1);
+   PG_FREE_IF_COPY(query, 2);
+   PG_RETURN_FLOAT4(res);
+}
+
+Datum
+rank_def(PG_FUNCTION_ARGS) {
+   tsvector       *txt = (tsvector *) PG_DETOAST_DATUM(PG_GETARG_DATUM(0));
+   QUERYTYPE  *query = (QUERYTYPE *) PG_DETOAST_DATUM(PG_GETARG_DATUM(1));
+   float res=0.0;
+   int method=DEF_NORM_METHOD;
+
+   if ( PG_NARGS() == 3 )
+       method=PG_GETARG_INT32(2);
+
+   res=calc_rank(weights, txt, query, method); 
+       
+   PG_FREE_IF_COPY(txt, 0);
+   PG_FREE_IF_COPY(query, 1);
+   PG_RETURN_FLOAT4(res);
+}
+
+
+typedef struct {
+   ITEM    *item;
+   int32   pos;
+} DocRepresentation;
+
+static int
+compareDocR(const void *a, const void *b) {
+   if ( ((DocRepresentation *) a)->pos == ((DocRepresentation *) b)->pos )
+       return 1;
+   return ( ((DocRepresentation *) a)->pos > ((DocRepresentation *) b)->pos ) ? 1 : -1;
+}
+
+
+typedef struct {
+   DocRepresentation *doc;
+   int len;
+}  ChkDocR;
+
+static bool
+checkcondition_DR(void *checkval, ITEM *val) {
+   DocRepresentation *ptr = ((ChkDocR*)checkval)->doc;
+
+   while( ptr - ((ChkDocR*)checkval)->doc < ((ChkDocR*)checkval)->len ) {
+       if ( val == ptr->item )
+           return true;
+       ptr++;
+   }   
+
+   return false;
+}
+
+
+static bool
+Cover(DocRepresentation *doc, int len, QUERYTYPE *query, int *pos, int *p, int *q) {
+   int i;
+   DocRepresentation   *ptr,*f=(DocRepresentation*)0xffffffff;
+   ITEM    *item=GETQUERY(query);
+   int lastpos=*pos;
+   int oldq=*q;
+
+   *p=0x7fffffff;
+   *q=0;
+
+   for(i=0; isize; i++) {
+       if ( item->type != VAL ) {
+           item++;
+           continue;
+       }
+       ptr = doc + *pos;
+
+       while(ptr-doc
+           if ( ptr->item == item ) {
+               if ( ptr->pos > *q ) {
+                   *q = ptr->pos;
+                   lastpos= ptr - doc;
+               } 
+               break;
+           } 
+           ptr++;
+       }
+
+       item++;
+   }
+
+   if (*q==0 )
+       return false;
+
+   if (*q==oldq) { /* already check this pos */
+       (*pos)++;
+       return Cover(doc, len, query, pos,p,q);
+   } 
+
+   item=GETQUERY(query);
+   for(i=0; isize; i++) {
+       if ( item->type != VAL ) {
+           item++;
+           continue;
+       }
+       ptr = doc + lastpos;
+
+       while(ptr>=doc+*pos) {
+           if ( ptr->item == item ) {
+               if ( ptr->pos < *p ) {
+                   *p = ptr->pos;
+                   f=ptr;
+               }
+               break;
+           }
+           ptr--;
+       }
+       item++;
+   }
+ 
+   if ( *p<=*q ) {
+       ChkDocR ch = { f, (doc + lastpos)-f+1 };
+       *pos = f-doc+1;
+       if ( TS_execute(GETQUERY(query), &ch, false, checkcondition_DR) ) { 
+ /*elog(NOTICE,"OP:%d NP:%d P:%d Q:%d", *pos, lastpos, *p, *q);*/ 
+           return true;
+       } else
+           return Cover(doc, len, query, pos,p,q); 
+   }
+ 
+   return false;
+}
+
+static DocRepresentation*
+get_docrep(tsvector     *txt, QUERYTYPE  *query, int *doclen) {
+   ITEM    *item=GETQUERY(query);
+   WordEntry *entry;
+   WordEntryPos    *post;
+   int4    dimt,j,i;
+   int len=query->size*4,cur=0;
+   DocRepresentation *doc;
+
+   *(uint16*)POSNULL = lengthof(POSNULL)-1;
+   doc = (DocRepresentation*)palloc(sizeof(DocRepresentation)*len);
+   for(i=0; isize; i++) {
+       if ( item[i].type != VAL )
+           continue;
+
+       entry=find_wordentry(txt,query,&(item[i]));
+       if ( !entry )
+           continue;
+
+       if ( entry->haspos ) {
+           dimt = POSDATALEN(txt,entry);
+           post = POSDATAPTR(txt,entry);
+       } else {
+           dimt = *(uint16*)POSNULL;
+           post = POSNULL+1;
+       }
+
+       while( cur+dimt >= len ) {
+           len*=2;
+           doc = (DocRepresentation*)repalloc(doc,sizeof(DocRepresentation)*len);
+       }
+
+       for(j=0;j
+           doc[cur].item=&(item[i]);
+           doc[cur].pos=post[j].pos;
+           cur++;
+       }
+   }
+
+   *doclen=cur;
+   
+   if ( cur>0 ) {
+       if ( cur>1 ) 
+           qsort((void *) doc, cur, sizeof(DocRepresentation), compareDocR);
+       return doc;
+   }
+   
+   pfree(doc);
+   return NULL;
+}
+
+
+Datum
+rank_cd(PG_FUNCTION_ARGS) {
+   int K = PG_GETARG_INT32(0);
+   tsvector       *txt = (tsvector *) PG_DETOAST_DATUM(PG_GETARG_DATUM(1));
+   QUERYTYPE  *query = (QUERYTYPE *) PG_DETOAST_DATUM(PG_GETARG_DATUM(2));
+   int method=DEF_NORM_METHOD;
+   DocRepresentation   *doc;
+   float   res=0.0;
+   int p=0,q=0,len,cur;
+
+   doc = get_docrep(txt, query, &len);
+   if ( !doc ) {
+       PG_FREE_IF_COPY(txt, 1);
+       PG_FREE_IF_COPY(query, 2);
+       PG_RETURN_FLOAT4(0.0);
+   }
+
+   cur=0;
+   if (K<=0)
+       K=4;    
+   while( Cover(doc, len, query, &cur, &p, &q) ) 
+       res += ( q-p+1 > K ) ? ((float)K)/((float)(q-p+1)) : 1.0;
+
+   if ( PG_NARGS() == 4 )
+       method=PG_GETARG_INT32(3);
+
+        switch(method) {
+       case 0: break;
+       case 1: res /= log((float)cnt_length(txt)); break;
+       case 2: res /= (float)cnt_length(txt); break;
+       default:
+       elog(ERROR,"Unknown normalization method: %d",method);
+        }
+
+   pfree(doc);
+   PG_FREE_IF_COPY(txt, 1);
+   PG_FREE_IF_COPY(query, 2);
+
+   PG_RETURN_FLOAT4(res);
+}
+
+
+Datum
+rank_cd_def(PG_FUNCTION_ARGS) {
+   PG_RETURN_DATUM( DirectFunctionCall4(   
+       rank_cd,
+       Int32GetDatum(-1),
+       PG_GETARG_DATUM(0),
+       PG_GETARG_DATUM(1),
+       ( PG_NARGS() == 3 ) ? PG_GETARG_DATUM(2) : Int32GetDatum(DEF_NORM_METHOD)
+   )); 
+}
+
+/**************debug*************/
+
+typedef struct {
+   char    *w;
+   int2    len;
+   int2    pos;
+   int2    start;
+   int2    finish;
+} DocWord;
+
+static int
+compareDocWord(const void *a, const void *b) {
+   if ( ((DocWord *) a)->pos == ((DocWord *) b)->pos )
+       return 1;
+   return ( ((DocWord *) a)->pos > ((DocWord *) b)->pos ) ? 1 : -1;
+}
+
+
+Datum 
+get_covers(PG_FUNCTION_ARGS) {
+   tsvector     *txt = (tsvector *) PG_DETOAST_DATUM(PG_GETARG_DATUM(0));
+   QUERYTYPE  *query = (QUERYTYPE *) PG_DETOAST_DATUM(PG_GETARG_DATUM(1));
+   WordEntry       *pptr=ARRPTR(txt);
+   int i,dlen=0,j,cur=0,len=0,rlen;
+   DocWord *dw,*dwptr;
+   text    *out;
+   char *cptr;
+   DocRepresentation *doc;
+   int pos=0,p,q,olddwpos=0;
+   int ncover=1;
+
+   doc = get_docrep(txt, query, &rlen);
+
+   if ( !doc ) {
+       out=palloc(VARHDRSZ);
+       VARATT_SIZEP(out) = VARHDRSZ;
+       PG_FREE_IF_COPY(txt,0);
+       PG_FREE_IF_COPY(query,1);
+       PG_RETURN_POINTER(out);
+   }
+
+   for(i=0;isize;i++) {
+       if (!pptr[i].haspos)
+           elog(ERROR,"No pos info");
+        dlen += POSDATALEN(txt,&(pptr[i]));
+   }
+
+   dwptr=dw=palloc(sizeof(DocWord)*dlen);
+   memset(dw,0,sizeof(DocWord)*dlen);
+
+   for(i=0;isize;i++) {
+       WordEntryPos    *posdata = POSDATAPTR(txt,&(pptr[i]));
+       for(j=0;j
+           dw[cur].w=STRPTR(txt)+pptr[i].pos;  
+           dw[cur].len=pptr[i].len;    
+           dw[cur].pos=posdata[j].pos;
+           cur++;
+       }
+       len+=(pptr[i].len + 1) * (int)POSDATALEN(txt,&(pptr[i]));
+   }
+   qsort((void *) dw, dlen, sizeof(DocWord), compareDocWord);
+
+   while( Cover(doc, rlen, query, &pos, &p, &q) ) {
+       dwptr=dw+olddwpos;
+       while(dwptr->pos < p && dwptr-dw
+           dwptr++;
+       olddwpos=dwptr-dw;
+       dwptr->start=ncover;
+       while(dwptr->pos < q+1 && dwptr-dw
+           dwptr++;
+       (dwptr-1)->finish=ncover;
+       len+= 4 /* {}+two spaces */ + 2*16 /*numbers*/;
+       ncover++; 
+   } 
+   
+   out=palloc(VARHDRSZ+len);
+   cptr=((char*)out)+VARHDRSZ;
+   dwptr=dw;
+
+   while( dwptr-dw < dlen) {
+       if ( dwptr->start ) {
+           sprintf(cptr,"{%d ",dwptr->start);
+           cptr=strchr(cptr,'\0');
+       }
+       memcpy(cptr,dwptr->w,dwptr->len);
+       cptr+=dwptr->len;
+       *cptr=' ';
+       cptr++;
+       if ( dwptr->finish ) { 
+           sprintf(cptr,"}%d ",dwptr->finish);
+           cptr=strchr(cptr,'\0');
+       }
+       dwptr++;
+   }   
+
+   VARATT_SIZEP(out) = cptr - ((char*)out);
+   
+   pfree(dw);
+   pfree(doc);
+
+   PG_FREE_IF_COPY(txt,0);
+   PG_FREE_IF_COPY(query,1);
+   PG_RETURN_POINTER(out);
+}
+


diff --git a/contrib/tsearch2/rewrite.c b/contrib/tsearch2/rewrite.c

new file mode 100644 (file)

index 0000000..d5bc0f6


--- /dev/null
+++ b/contrib/tsearch2/rewrite.c
@@ -0,0 +1,292 @@
+/*
+ * Rewrite routines of query tree
+ * Teodor Sigaev 
+ */
+
+#include "postgres.h"
+
+#include 
+
+#include "access/gist.h"
+#include "access/itup.h"
+#include "access/rtree.h"
+#include "utils/elog.h"
+#include "utils/palloc.h"
+#include "utils/array.h"
+#include "utils/builtins.h"
+#include "storage/bufpage.h"
+
+#include "query.h"
+#include "rewrite.h"
+
+typedef struct NODE
+{
+   struct NODE *left;
+   struct NODE *right;
+   ITEM       *valnode;
+}  NODE;
+
+/*
+ * make query tree from plain view of query
+ */
+static NODE *
+maketree(ITEM * in)
+{
+   NODE       *node = (NODE *) palloc(sizeof(NODE));
+
+   node->valnode = in;
+   node->right = node->left = NULL;
+   if (in->type == OPR)
+   {
+       node->right = maketree(in + 1);
+       if (in->val != (int4) '!')
+           node->left = maketree(in + in->left);
+   }
+   return node;
+}
+
+typedef struct
+{
+   ITEM       *ptr;
+   int4        len;
+   int4        cur;
+}  PLAINTREE;
+
+static void
+plainnode(PLAINTREE * state, NODE * node)
+{
+   if (state->cur == state->len)
+   {
+       state->len *= 2;
+       state->ptr = (ITEM *) repalloc((void *) state->ptr, state->len * sizeof(ITEM));
+   }
+   memcpy((void *) &(state->ptr[state->cur]), (void *) node->valnode, sizeof(ITEM));
+   if (node->valnode->type == VAL)
+       state->cur++;
+   else if (node->valnode->val == (int4) '!')
+   {
+       state->ptr[state->cur].left = 1;
+       state->cur++;
+       plainnode(state, node->right);
+   }
+   else
+   {
+       int4        cur = state->cur;
+
+       state->cur++;
+       plainnode(state, node->right);
+       state->ptr[cur].left = state->cur - cur;
+       plainnode(state, node->left);
+   }
+   pfree(node);
+}
+
+/*
+ * make plain view of tree from 'normal' view of tree
+ */
+static ITEM *
+plaintree(NODE * root, int4 *len)
+{
+   PLAINTREE   pl;
+
+   pl.cur = 0;
+   pl.len = 16;
+   if (root && (root->valnode->type == VAL || root->valnode->type == OPR))
+   {
+       pl.ptr = (ITEM *) palloc(pl.len * sizeof(ITEM));
+       plainnode(&pl, root);
+   }
+   else
+       pl.ptr = NULL;
+   *len = pl.cur;
+   return pl.ptr;
+}
+
+static void
+freetree(NODE * node)
+{
+   if (!node)
+       return;
+   if (node->left)
+       freetree(node->left);
+   if (node->right)
+       freetree(node->right);
+   pfree(node);
+}
+
+/*
+ * clean tree for ! operator.
+ * It's usefull for debug, but in
+ * other case, such view is used with search in index.
+ * Operator ! always return TRUE
+ */
+static NODE *
+clean_NOT_intree(NODE * node)
+{
+   if (node->valnode->type == VAL)
+       return node;
+
+   if (node->valnode->val == (int4) '!')
+   {
+       freetree(node);
+       return NULL;
+   }
+
+   /* operator & or | */
+   if (node->valnode->val == (int4) '|')
+   {
+       if ((node->left = clean_NOT_intree(node->left)) == NULL ||
+           (node->right = clean_NOT_intree(node->right)) == NULL)
+       {
+           freetree(node);
+           return NULL;
+       }
+   }
+   else
+   {
+       NODE       *res = node;
+
+       node->left = clean_NOT_intree(node->left);
+       node->right = clean_NOT_intree(node->right);
+       if (node->left == NULL && node->right == NULL)
+       {
+           pfree(node);
+           res = NULL;
+       }
+       else if (node->left == NULL)
+       {
+           res = node->right;
+           pfree(node);
+       }
+       else if (node->right == NULL)
+       {
+           res = node->left;
+           pfree(node);
+       }
+       return res;
+   }
+   return node;
+}
+
+ITEM *
+clean_NOT_v2(ITEM * ptr, int4 *len)
+{
+   NODE       *root = maketree(ptr);
+
+   return plaintree(clean_NOT_intree(root), len);
+}
+
+#define V_UNKNOWN  0
+#define V_TRUE     1
+#define V_FALSE        2
+
+/*
+ * Clean query tree from values which is always in
+ * text (stopword)
+ */
+static NODE *
+clean_fakeval_intree(NODE * node, char *result)
+{
+   char        lresult = V_UNKNOWN,
+               rresult = V_UNKNOWN;
+
+   if (node->valnode->type == VAL)
+       return node;
+   else if (node->valnode->type == VALTRUE)
+   {
+       pfree(node);
+       *result = V_TRUE;
+       return NULL;
+   }
+
+
+   if (node->valnode->val == (int4) '!')
+   {
+       node->right = clean_fakeval_intree(node->right, &rresult);
+       if (!node->right)
+       {
+           *result = (rresult == V_TRUE) ? V_FALSE : V_TRUE;
+           freetree(node);
+           return NULL;
+       }
+   }
+   else if (node->valnode->val == (int4) '|')
+   {
+       NODE       *res = node;
+
+       node->left = clean_fakeval_intree(node->left, &lresult);
+       node->right = clean_fakeval_intree(node->right, &rresult);
+       if (lresult == V_TRUE || rresult == V_TRUE)
+       {
+           freetree(node);
+           *result = V_TRUE;
+           return NULL;
+       }
+       else if (lresult == V_FALSE && rresult == V_FALSE)
+       {
+           freetree(node);
+           *result = V_FALSE;
+           return NULL;
+       }
+       else if (lresult == V_FALSE)
+       {
+           res = node->right;
+           pfree(node);
+       }
+       else if (rresult == V_FALSE)
+       {
+           res = node->left;
+           pfree(node);
+       }
+       return res;
+   }
+   else
+   {
+       NODE       *res = node;
+
+       node->left = clean_fakeval_intree(node->left, &lresult);
+       node->right = clean_fakeval_intree(node->right, &rresult);
+       if (lresult == V_FALSE || rresult == V_FALSE)
+       {
+           freetree(node);
+           *result = V_FALSE;
+           return NULL;
+       }
+       else if (lresult == V_TRUE && rresult == V_TRUE)
+       {
+           freetree(node);
+           *result = V_TRUE;
+           return NULL;
+       }
+       else if (lresult == V_TRUE)
+       {
+           res = node->right;
+           pfree(node);
+       }
+       else if (rresult == V_TRUE)
+       {
+           res = node->left;
+           pfree(node);
+       }
+       return res;
+   }
+   return node;
+}
+
+ITEM *
+clean_fakeval_v2(ITEM * ptr, int4 *len)
+{
+   NODE       *root = maketree(ptr);
+   char        result = V_UNKNOWN;
+   NODE       *resroot;
+
+   resroot = clean_fakeval_intree(root, &result);
+   if (result != V_UNKNOWN)
+   {
+       elog(NOTICE, "Query contains only stopword(s) or doesn't contain lexem(s), ignored");
+       *len = 0;
+       return NULL;
+   }
+
+   return plaintree(resroot, len);
+}


diff --git a/contrib/tsearch2/rewrite.h b/contrib/tsearch2/rewrite.h

new file mode 100644 (file)

index 0000000..d47788a


--- /dev/null
+++ b/contrib/tsearch2/rewrite.h
@@ -0,0 +1,7 @@
+#ifndef __REWRITE_H__
+#define __REWRITE_H__
+
+ITEM      *clean_NOT_v2(ITEM * ptr, int4 *len);
+ITEM      *clean_fakeval_v2(ITEM * ptr, int4 *len);
+
+#endif


diff --git a/contrib/tsearch2/snmap.c b/contrib/tsearch2/snmap.c

new file mode 100644 (file)

index 0000000..fe138ad


--- /dev/null
+++ b/contrib/tsearch2/snmap.c
@@ -0,0 +1,75 @@
+/* 
+ * simple but fast map from str to Oid
+ * Teodor Sigaev 
+ */
+#include 
+#include 
+#include 
+
+#include "postgres.h"
+#include "snmap.h"
+#include "common.h"
+
+static int
+compareSNMapEntry(const void *a, const void *b) {
+   return strcmp( ((SNMapEntry*)a)->key, ((SNMapEntry*)b)->key );
+}
+
+void 
+addSNMap( SNMap *map, char *key, Oid value ) {
+   if (map->len>=map->reallen) {
+       SNMapEntry *tmp;
+       int len = (map->reallen) ? 2*map->reallen : 16;
+       tmp=(SNMapEntry*)realloc(map->list, sizeof(SNMapEntry) * len);
+       if ( !tmp )
+           elog(ERROR, "No memory");
+       map->reallen=len;
+       map->list=tmp;
+   }
+   map->list[ map->len ].key = strdup(key);
+   if ( ! map->list[ map->len ].key )
+       elog(ERROR, "No memory");
+   map->list[ map->len ].value=value;
+   map->len++;
+   if ( map->len>1 ) qsort(map->list, map->len, sizeof(SNMapEntry), compareSNMapEntry);
+}
+
+void 
+addSNMap_t( SNMap *map, text *key, Oid value ) {
+   char *k=text2char( key );
+   addSNMap(map, k, value);
+   pfree(k);
+}
+
+Oid 
+findSNMap( SNMap *map, char *key ) {
+   SNMapEntry *ptr;
+   SNMapEntry ks = {key, 0};
+   if ( map->len==0 || !map->list )
+       return 0;   
+   ptr = (SNMapEntry*) bsearch(&ks, map->list, map->len, sizeof(SNMapEntry), compareSNMapEntry);
+   return (ptr) ? ptr->value : 0;
+}
+
+Oid  
+findSNMap_t( SNMap *map, text *key ) {
+   char *k=text2char(key);
+   int res;
+   res= findSNMap(map, k);
+   pfree(k);
+   return res;
+}
+
+void freeSNMap( SNMap *map ) {
+   SNMapEntry *entry=map->list;
+   if ( map->list ) {
+       while( map->len ) {
+           if ( entry->key ) free(entry->key);
+           entry++; map->len--;
+       }
+       free( map->list );
+   }
+   memset(map,0,sizeof(SNMap));
+}
+
+


diff --git a/contrib/tsearch2/snmap.h b/contrib/tsearch2/snmap.h

new file mode 100644 (file)

index 0000000..b485601


--- /dev/null
+++ b/contrib/tsearch2/snmap.h
@@ -0,0 +1,23 @@
+#ifndef __SNMAP_H__
+#define __SNMAP_H__
+
+#include "postgres.h"
+
+typedef struct {
+   char    *key;
+   Oid value;
+} SNMapEntry;
+
+typedef struct {
+   int len;
+   int reallen;
+   SNMapEntry  *list;
+} SNMap;
+
+void addSNMap( SNMap *map, char *key, Oid value );
+void addSNMap_t( SNMap *map, text *key, Oid value );
+Oid findSNMap( SNMap *map, char *key );
+Oid findSNMap_t( SNMap *map, text *key );
+void freeSNMap( SNMap *map );
+
+#endif


diff --git a/contrib/tsearch2/snowball/api.c b/contrib/tsearch2/snowball/api.c

new file mode 100644 (file)

index 0000000..c9019ce


--- /dev/null
+++ b/contrib/tsearch2/snowball/api.c
@@ -0,0 +1,48 @@
+
+#include "header.h"
+
+extern struct SN_env * SN_create_env(int S_size, int I_size, int B_size)
+{   struct SN_env * z = (struct SN_env *) calloc(1, sizeof(struct SN_env));
+    z->p = create_s();
+    if (S_size)
+    {   z->S = (symbol * *) calloc(S_size, sizeof(symbol *));
+        {   int i;
+            for (i = 0; i < S_size; i++) z->S[i] = create_s();
+        }
+        z->S_size = S_size;
+    }
+
+    if (I_size)
+    {   z->I = (int *) calloc(I_size, sizeof(int));
+        z->I_size = I_size;
+    }
+
+    if (B_size)
+    {   z->B = (symbol *) calloc(B_size, sizeof(symbol));
+        z->B_size = B_size;
+    }
+
+    return z;
+}
+
+extern void SN_close_env(struct SN_env * z)
+{
+    if (z->S_size)
+    {
+        {   int i;
+            for (i = 0; i < z->S_size; i++) lose_s(z->S[i]);
+        }
+        free(z->S);
+    }
+    if (z->I_size) free(z->I);
+    if (z->B_size) free(z->B);
+    if (z->p) lose_s(z->p);
+    free(z);
+}
+
+extern void SN_set_current(struct SN_env * z, int size, const symbol * s)
+{
+    replace_s(z, 0, z->l, size, s);
+    z->c = 0;
+}
+


diff --git a/contrib/tsearch2/snowball/api.h b/contrib/tsearch2/snowball/api.h

new file mode 100644 (file)

index 0000000..3e8b6e1


--- /dev/null
+++ b/contrib/tsearch2/snowball/api.h
@@ -0,0 +1,27 @@
+
+typedef unsigned char symbol;
+
+/* Or replace 'char' above with 'short' for 16 bit characters.
+
+   More precisely, replace 'char' with whatever type guarantees the
+   character width you need. Note however that sizeof(symbol) should divide
+   HEAD, defined in header.h as 2*sizeof(int), without remainder, otherwise
+   there is an alignment problem. In the unlikely event of a problem here,
+   consult Martin Porter.
+
+*/
+
+struct SN_env {
+    symbol * p;
+    int c; int a; int l; int lb; int bra; int ket;
+    int S_size; int I_size; int B_size;
+    symbol * * S;
+    int * I;
+    symbol * B;
+};
+
+extern struct SN_env * SN_create_env(int S_size, int I_size, int B_size);
+extern void SN_close_env(struct SN_env * z);
+
+extern void SN_set_current(struct SN_env * z, int size, const symbol * s);
+


diff --git a/contrib/tsearch2/snowball/english_stem.c b/contrib/tsearch2/snowball/english_stem.c

new file mode 100644 (file)

index 0000000..6715c7c


--- /dev/null
+++ b/contrib/tsearch2/snowball/english_stem.c
@@ -0,0 +1,894 @@
+
+/* This file was generated automatically by the Snowball to ANSI C compiler */
+
+#include "header.h"
+
+extern int english_stem(struct SN_env * z);
+static int r_exception2(struct SN_env * z);
+static int r_exception1(struct SN_env * z);
+static int r_Step_5(struct SN_env * z);
+static int r_Step_4(struct SN_env * z);
+static int r_Step_3(struct SN_env * z);
+static int r_Step_2(struct SN_env * z);
+static int r_Step_1c(struct SN_env * z);
+static int r_Step_1b(struct SN_env * z);
+static int r_Step_1a(struct SN_env * z);
+static int r_R2(struct SN_env * z);
+static int r_R1(struct SN_env * z);
+static int r_shortv(struct SN_env * z);
+static int r_mark_regions(struct SN_env * z);
+static int r_postlude(struct SN_env * z);
+static int r_prelude(struct SN_env * z);
+
+extern struct SN_env * english_create_env(void);
+extern void english_close_env(struct SN_env * z);
+
+static symbol s_0_0[5] = { 'g', 'e', 'n', 'e', 'r' };
+
+static struct among a_0[1] =
+{
+/*  0 */ { 5, s_0_0, -1, -1, 0}
+};
+
+static symbol s_1_0[3] = { 'i', 'e', 'd' };
+static symbol s_1_1[1] = { 's' };
+static symbol s_1_2[3] = { 'i', 'e', 's' };
+static symbol s_1_3[4] = { 's', 's', 'e', 's' };
+static symbol s_1_4[2] = { 's', 's' };
+static symbol s_1_5[2] = { 'u', 's' };
+
+static struct among a_1[6] =
+{
+/*  0 */ { 3, s_1_0, -1, 2, 0},
+/*  1 */ { 1, s_1_1, -1, 3, 0},
+/*  2 */ { 3, s_1_2, 1, 2, 0},
+/*  3 */ { 4, s_1_3, 1, 1, 0},
+/*  4 */ { 2, s_1_4, 1, -1, 0},
+/*  5 */ { 2, s_1_5, 1, -1, 0}
+};
+
+static symbol s_2_1[2] = { 'b', 'b' };
+static symbol s_2_2[2] = { 'd', 'd' };
+static symbol s_2_3[2] = { 'f', 'f' };
+static symbol s_2_4[2] = { 'g', 'g' };
+static symbol s_2_5[2] = { 'b', 'l' };
+static symbol s_2_6[2] = { 'm', 'm' };
+static symbol s_2_7[2] = { 'n', 'n' };
+static symbol s_2_8[2] = { 'p', 'p' };
+static symbol s_2_9[2] = { 'r', 'r' };
+static symbol s_2_10[2] = { 'a', 't' };
+static symbol s_2_11[2] = { 't', 't' };
+static symbol s_2_12[2] = { 'i', 'z' };
+
+static struct among a_2[13] =
+{
+/*  0 */ { 0, 0, -1, 3, 0},
+/*  1 */ { 2, s_2_1, 0, 2, 0},
+/*  2 */ { 2, s_2_2, 0, 2, 0},
+/*  3 */ { 2, s_2_3, 0, 2, 0},
+/*  4 */ { 2, s_2_4, 0, 2, 0},
+/*  5 */ { 2, s_2_5, 0, 1, 0},
+/*  6 */ { 2, s_2_6, 0, 2, 0},
+/*  7 */ { 2, s_2_7, 0, 2, 0},
+/*  8 */ { 2, s_2_8, 0, 2, 0},
+/*  9 */ { 2, s_2_9, 0, 2, 0},
+/* 10 */ { 2, s_2_10, 0, 1, 0},
+/* 11 */ { 2, s_2_11, 0, 2, 0},
+/* 12 */ { 2, s_2_12, 0, 1, 0}
+};
+
+static symbol s_3_0[2] = { 'e', 'd' };
+static symbol s_3_1[3] = { 'e', 'e', 'd' };
+static symbol s_3_2[3] = { 'i', 'n', 'g' };
+static symbol s_3_3[4] = { 'e', 'd', 'l', 'y' };
+static symbol s_3_4[5] = { 'e', 'e', 'd', 'l', 'y' };
+static symbol s_3_5[5] = { 'i', 'n', 'g', 'l', 'y' };
+
+static struct among a_3[6] =
+{
+/*  0 */ { 2, s_3_0, -1, 2, 0},
+/*  1 */ { 3, s_3_1, 0, 1, 0},
+/*  2 */ { 3, s_3_2, -1, 2, 0},
+/*  3 */ { 4, s_3_3, -1, 2, 0},
+/*  4 */ { 5, s_3_4, 3, 1, 0},
+/*  5 */ { 5, s_3_5, -1, 2, 0}
+};
+
+static symbol s_4_0[4] = { 'a', 'n', 'c', 'i' };
+static symbol s_4_1[4] = { 'e', 'n', 'c', 'i' };
+static symbol s_4_2[3] = { 'o', 'g', 'i' };
+static symbol s_4_3[2] = { 'l', 'i' };
+static symbol s_4_4[3] = { 'b', 'l', 'i' };
+static symbol s_4_5[4] = { 'a', 'b', 'l', 'i' };
+static symbol s_4_6[4] = { 'a', 'l', 'l', 'i' };
+static symbol s_4_7[5] = { 'f', 'u', 'l', 'l', 'i' };
+static symbol s_4_8[6] = { 'l', 'e', 's', 's', 'l', 'i' };
+static symbol s_4_9[5] = { 'o', 'u', 's', 'l', 'i' };
+static symbol s_4_10[5] = { 'e', 'n', 't', 'l', 'i' };
+static symbol s_4_11[5] = { 'a', 'l', 'i', 't', 'i' };
+static symbol s_4_12[6] = { 'b', 'i', 'l', 'i', 't', 'i' };
+static symbol s_4_13[5] = { 'i', 'v', 'i', 't', 'i' };
+static symbol s_4_14[6] = { 't', 'i', 'o', 'n', 'a', 'l' };
+static symbol s_4_15[7] = { 'a', 't', 'i', 'o', 'n', 'a', 'l' };
+static symbol s_4_16[5] = { 'a', 'l', 'i', 's', 'm' };
+static symbol s_4_17[5] = { 'a', 't', 'i', 'o', 'n' };
+static symbol s_4_18[7] = { 'i', 'z', 'a', 't', 'i', 'o', 'n' };
+static symbol s_4_19[4] = { 'i', 'z', 'e', 'r' };
+static symbol s_4_20[4] = { 'a', 't', 'o', 'r' };
+static symbol s_4_21[7] = { 'i', 'v', 'e', 'n', 'e', 's', 's' };
+static symbol s_4_22[7] = { 'f', 'u', 'l', 'n', 'e', 's', 's' };
+static symbol s_4_23[7] = { 'o', 'u', 's', 'n', 'e', 's', 's' };
+
+static struct among a_4[24] =
+{
+/*  0 */ { 4, s_4_0, -1, 3, 0},
+/*  1 */ { 4, s_4_1, -1, 2, 0},
+/*  2 */ { 3, s_4_2, -1, 13, 0},
+/*  3 */ { 2, s_4_3, -1, 16, 0},
+/*  4 */ { 3, s_4_4, 3, 12, 0},
+/*  5 */ { 4, s_4_5, 4, 4, 0},
+/*  6 */ { 4, s_4_6, 3, 8, 0},
+/*  7 */ { 5, s_4_7, 3, 14, 0},
+/*  8 */ { 6, s_4_8, 3, 15, 0},
+/*  9 */ { 5, s_4_9, 3, 10, 0},
+/* 10 */ { 5, s_4_10, 3, 5, 0},
+/* 11 */ { 5, s_4_11, -1, 8, 0},
+/* 12 */ { 6, s_4_12, -1, 12, 0},
+/* 13 */ { 5, s_4_13, -1, 11, 0},
+/* 14 */ { 6, s_4_14, -1, 1, 0},
+/* 15 */ { 7, s_4_15, 14, 7, 0},
+/* 16 */ { 5, s_4_16, -1, 8, 0},
+/* 17 */ { 5, s_4_17, -1, 7, 0},
+/* 18 */ { 7, s_4_18, 17, 6, 0},
+/* 19 */ { 4, s_4_19, -1, 6, 0},
+/* 20 */ { 4, s_4_20, -1, 7, 0},
+/* 21 */ { 7, s_4_21, -1, 11, 0},
+/* 22 */ { 7, s_4_22, -1, 9, 0},
+/* 23 */ { 7, s_4_23, -1, 10, 0}
+};
+
+static symbol s_5_0[5] = { 'i', 'c', 'a', 't', 'e' };
+static symbol s_5_1[5] = { 'a', 't', 'i', 'v', 'e' };
+static symbol s_5_2[5] = { 'a', 'l', 'i', 'z', 'e' };
+static symbol s_5_3[5] = { 'i', 'c', 'i', 't', 'i' };
+static symbol s_5_4[4] = { 'i', 'c', 'a', 'l' };
+static symbol s_5_5[6] = { 't', 'i', 'o', 'n', 'a', 'l' };
+static symbol s_5_6[7] = { 'a', 't', 'i', 'o', 'n', 'a', 'l' };
+static symbol s_5_7[3] = { 'f', 'u', 'l' };
+static symbol s_5_8[4] = { 'n', 'e', 's', 's' };
+
+static struct among a_5[9] =
+{
+/*  0 */ { 5, s_5_0, -1, 4, 0},
+/*  1 */ { 5, s_5_1, -1, 6, 0},
+/*  2 */ { 5, s_5_2, -1, 3, 0},
+/*  3 */ { 5, s_5_3, -1, 4, 0},
+/*  4 */ { 4, s_5_4, -1, 4, 0},
+/*  5 */ { 6, s_5_5, -1, 1, 0},
+/*  6 */ { 7, s_5_6, 5, 2, 0},
+/*  7 */ { 3, s_5_7, -1, 5, 0},
+/*  8 */ { 4, s_5_8, -1, 5, 0}
+};
+
+static symbol s_6_0[2] = { 'i', 'c' };
+static symbol s_6_1[4] = { 'a', 'n', 'c', 'e' };
+static symbol s_6_2[4] = { 'e', 'n', 'c', 'e' };
+static symbol s_6_3[4] = { 'a', 'b', 'l', 'e' };
+static symbol s_6_4[4] = { 'i', 'b', 'l', 'e' };
+static symbol s_6_5[3] = { 'a', 't', 'e' };
+static symbol s_6_6[3] = { 'i', 'v', 'e' };
+static symbol s_6_7[3] = { 'i', 'z', 'e' };
+static symbol s_6_8[3] = { 'i', 't', 'i' };
+static symbol s_6_9[2] = { 'a', 'l' };
+static symbol s_6_10[3] = { 'i', 's', 'm' };
+static symbol s_6_11[3] = { 'i', 'o', 'n' };
+static symbol s_6_12[2] = { 'e', 'r' };
+static symbol s_6_13[3] = { 'o', 'u', 's' };
+static symbol s_6_14[3] = { 'a', 'n', 't' };
+static symbol s_6_15[3] = { 'e', 'n', 't' };
+static symbol s_6_16[4] = { 'm', 'e', 'n', 't' };
+static symbol s_6_17[5] = { 'e', 'm', 'e', 'n', 't' };
+
+static struct among a_6[18] =
+{
+/*  0 */ { 2, s_6_0, -1, 1, 0},
+/*  1 */ { 4, s_6_1, -1, 1, 0},
+/*  2 */ { 4, s_6_2, -1, 1, 0},
+/*  3 */ { 4, s_6_3, -1, 1, 0},
+/*  4 */ { 4, s_6_4, -1, 1, 0},
+/*  5 */ { 3, s_6_5, -1, 1, 0},
+/*  6 */ { 3, s_6_6, -1, 1, 0},
+/*  7 */ { 3, s_6_7, -1, 1, 0},
+/*  8 */ { 3, s_6_8, -1, 1, 0},
+/*  9 */ { 2, s_6_9, -1, 1, 0},
+/* 10 */ { 3, s_6_10, -1, 1, 0},
+/* 11 */ { 3, s_6_11, -1, 2, 0},
+/* 12 */ { 2, s_6_12, -1, 1, 0},
+/* 13 */ { 3, s_6_13, -1, 1, 0},
+/* 14 */ { 3, s_6_14, -1, 1, 0},
+/* 15 */ { 3, s_6_15, -1, 1, 0},
+/* 16 */ { 4, s_6_16, 15, 1, 0},
+/* 17 */ { 5, s_6_17, 16, 1, 0}
+};
+
+static symbol s_7_0[1] = { 'e' };
+static symbol s_7_1[1] = { 'l' };
+
+static struct among a_7[2] =
+{
+/*  0 */ { 1, s_7_0, -1, 1, 0},
+/*  1 */ { 1, s_7_1, -1, 2, 0}
+};
+
+static symbol s_8_0[7] = { 's', 'u', 'c', 'c', 'e', 'e', 'd' };
+static symbol s_8_1[7] = { 'p', 'r', 'o', 'c', 'e', 'e', 'd' };
+static symbol s_8_2[6] = { 'e', 'x', 'c', 'e', 'e', 'd' };
+static symbol s_8_3[7] = { 'c', 'a', 'n', 'n', 'i', 'n', 'g' };
+static symbol s_8_4[6] = { 'i', 'n', 'n', 'i', 'n', 'g' };
+static symbol s_8_5[7] = { 'e', 'a', 'r', 'r', 'i', 'n', 'g' };
+static symbol s_8_6[7] = { 'h', 'e', 'r', 'r', 'i', 'n', 'g' };
+static symbol s_8_7[6] = { 'o', 'u', 't', 'i', 'n', 'g' };
+
+static struct among a_8[8] =
+{
+/*  0 */ { 7, s_8_0, -1, -1, 0},
+/*  1 */ { 7, s_8_1, -1, -1, 0},
+/*  2 */ { 6, s_8_2, -1, -1, 0},
+/*  3 */ { 7, s_8_3, -1, -1, 0},
+/*  4 */ { 6, s_8_4, -1, -1, 0},
+/*  5 */ { 7, s_8_5, -1, -1, 0},
+/*  6 */ { 7, s_8_6, -1, -1, 0},
+/*  7 */ { 6, s_8_7, -1, -1, 0}
+};
+
+static symbol s_9_0[5] = { 'a', 'n', 'd', 'e', 's' };
+static symbol s_9_1[5] = { 'a', 't', 'l', 'a', 's' };
+static symbol s_9_2[4] = { 'b', 'i', 'a', 's' };
+static symbol s_9_3[6] = { 'c', 'o', 's', 'm', 'o', 's' };
+static symbol s_9_4[5] = { 'd', 'y', 'i', 'n', 'g' };
+static symbol s_9_5[5] = { 'e', 'a', 'r', 'l', 'y' };
+static symbol s_9_6[6] = { 'g', 'e', 'n', 't', 'l', 'y' };
+static symbol s_9_7[4] = { 'h', 'o', 'w', 'e' };
+static symbol s_9_8[4] = { 'i', 'd', 'l', 'y' };
+static symbol s_9_9[5] = { 'l', 'y', 'i', 'n', 'g' };
+static symbol s_9_10[4] = { 'n', 'e', 'w', 's' };
+static symbol s_9_11[4] = { 'o', 'n', 'l', 'y' };
+static symbol s_9_12[6] = { 's', 'i', 'n', 'g', 'l', 'y' };
+static symbol s_9_13[5] = { 's', 'k', 'i', 'e', 's' };
+static symbol s_9_14[4] = { 's', 'k', 'i', 's' };
+static symbol s_9_15[3] = { 's', 'k', 'y' };
+static symbol s_9_16[5] = { 't', 'y', 'i', 'n', 'g' };
+static symbol s_9_17[4] = { 'u', 'g', 'l', 'y' };
+
+static struct among a_9[18] =
+{
+/*  0 */ { 5, s_9_0, -1, -1, 0},
+/*  1 */ { 5, s_9_1, -1, -1, 0},
+/*  2 */ { 4, s_9_2, -1, -1, 0},
+/*  3 */ { 6, s_9_3, -1, -1, 0},
+/*  4 */ { 5, s_9_4, -1, 3, 0},
+/*  5 */ { 5, s_9_5, -1, 9, 0},
+/*  6 */ { 6, s_9_6, -1, 7, 0},
+/*  7 */ { 4, s_9_7, -1, -1, 0},
+/*  8 */ { 4, s_9_8, -1, 6, 0},
+/*  9 */ { 5, s_9_9, -1, 4, 0},
+/* 10 */ { 4, s_9_10, -1, -1, 0},
+/* 11 */ { 4, s_9_11, -1, 10, 0},
+/* 12 */ { 6, s_9_12, -1, 11, 0},
+/* 13 */ { 5, s_9_13, -1, 2, 0},
+/* 14 */ { 4, s_9_14, -1, 1, 0},
+/* 15 */ { 3, s_9_15, -1, -1, 0},
+/* 16 */ { 5, s_9_16, -1, 5, 0},
+/* 17 */ { 4, s_9_17, -1, 8, 0}
+};
+
+static unsigned char g_v[] = { 17, 65, 16, 1 };
+
+static unsigned char g_v_WXY[] = { 1, 17, 65, 208, 1 };
+
+static unsigned char g_valid_LI[] = { 55, 141, 2 };
+
+static symbol s_0[] = { 'y' };
+static symbol s_1[] = { 'Y' };
+static symbol s_2[] = { 'y' };
+static symbol s_3[] = { 'Y' };
+static symbol s_4[] = { 's', 's' };
+static symbol s_5[] = { 'i', 'e' };
+static symbol s_6[] = { 'i' };
+static symbol s_7[] = { 'e', 'e' };
+static symbol s_8[] = { 'e' };
+static symbol s_9[] = { 'e' };
+static symbol s_10[] = { 'y' };
+static symbol s_11[] = { 'Y' };
+static symbol s_12[] = { 'i' };
+static symbol s_13[] = { 't', 'i', 'o', 'n' };
+static symbol s_14[] = { 'e', 'n', 'c', 'e' };
+static symbol s_15[] = { 'a', 'n', 'c', 'e' };
+static symbol s_16[] = { 'a', 'b', 'l', 'e' };
+static symbol s_17[] = { 'e', 'n', 't' };
+static symbol s_18[] = { 'i', 'z', 'e' };
+static symbol s_19[] = { 'a', 't', 'e' };
+static symbol s_20[] = { 'a', 'l' };
+static symbol s_21[] = { 'f', 'u', 'l' };
+static symbol s_22[] = { 'o', 'u', 's' };
+static symbol s_23[] = { 'i', 'v', 'e' };
+static symbol s_24[] = { 'b', 'l', 'e' };
+static symbol s_25[] = { 'l' };
+static symbol s_26[] = { 'o', 'g' };
+static symbol s_27[] = { 'f', 'u', 'l' };
+static symbol s_28[] = { 'l', 'e', 's', 's' };
+static symbol s_29[] = { 't', 'i', 'o', 'n' };
+static symbol s_30[] = { 'a', 't', 'e' };
+static symbol s_31[] = { 'a', 'l' };
+static symbol s_32[] = { 'i', 'c' };
+static symbol s_33[] = { 's' };
+static symbol s_34[] = { 't' };
+static symbol s_35[] = { 'l' };
+static symbol s_36[] = { 's', 'k', 'i' };
+static symbol s_37[] = { 's', 'k', 'y' };
+static symbol s_38[] = { 'd', 'i', 'e' };
+static symbol s_39[] = { 'l', 'i', 'e' };
+static symbol s_40[] = { 't', 'i', 'e' };
+static symbol s_41[] = { 'i', 'd', 'l' };
+static symbol s_42[] = { 'g', 'e', 'n', 't', 'l' };
+static symbol s_43[] = { 'u', 'g', 'l', 'i' };
+static symbol s_44[] = { 'e', 'a', 'r', 'l', 'i' };
+static symbol s_45[] = { 'o', 'n', 'l', 'i' };
+static symbol s_46[] = { 's', 'i', 'n', 'g', 'l' };
+static symbol s_47[] = { 'Y' };
+static symbol s_48[] = { 'y' };
+
+static int r_prelude(struct SN_env * z) {
+    z->B[0] = 0; /* unset Y_found, line 24 */
+    {   int c = z->c; /* do, line 25 */
+        z->bra = z->c; /* [, line 25 */
+        if (!(eq_s(z, 1, s_0))) goto lab0;
+        z->ket = z->c; /* ], line 25 */
+        if (!(in_grouping(z, g_v, 97, 121))) goto lab0;
+        slice_from_s(z, 1, s_1); /* <-, line 25 */
+        z->B[0] = 1; /* set Y_found, line 25 */
+    lab0:
+        z->c = c;
+    }
+    {   int c = z->c; /* do, line 26 */
+        while(1) { /* repeat, line 26 */
+            int c = z->c;
+            while(1) { /* goto, line 26 */
+                int c = z->c;
+                if (!(in_grouping(z, g_v, 97, 121))) goto lab3;
+                z->bra = z->c; /* [, line 26 */
+                if (!(eq_s(z, 1, s_2))) goto lab3;
+                z->ket = z->c; /* ], line 26 */
+                z->c = c;
+                break;
+            lab3:
+                z->c = c;
+                if (z->c >= z->l) goto lab2;
+                z->c++;
+            }
+            slice_from_s(z, 1, s_3); /* <-, line 26 */
+            z->B[0] = 1; /* set Y_found, line 26 */
+            continue;
+        lab2:
+            z->c = c;
+            break;
+        }
+    lab1:
+        z->c = c;
+    }
+    return 1;
+}
+
+static int r_mark_regions(struct SN_env * z) {
+    z->I[0] = z->l;
+    z->I[1] = z->l;
+    {   int c = z->c; /* do, line 32 */
+        {   int c = z->c; /* or, line 36 */
+            if (!(find_among(z, a_0, 1))) goto lab2; /* among, line 33 */
+            goto lab1;
+        lab2:
+            z->c = c;
+            while(1) { /* gopast, line 36 */
+                if (!(in_grouping(z, g_v, 97, 121))) goto lab3;
+                break;
+            lab3:
+                if (z->c >= z->l) goto lab0;
+                z->c++;
+            }
+            while(1) { /* gopast, line 36 */
+                if (!(out_grouping(z, g_v, 97, 121))) goto lab4;
+                break;
+            lab4:
+                if (z->c >= z->l) goto lab0;
+                z->c++;
+            }
+        }
+    lab1:
+        z->I[0] = z->c; /* setmark p1, line 37 */
+        while(1) { /* gopast, line 38 */
+            if (!(in_grouping(z, g_v, 97, 121))) goto lab5;
+            break;
+        lab5:
+            if (z->c >= z->l) goto lab0;
+            z->c++;
+        }
+        while(1) { /* gopast, line 38 */
+            if (!(out_grouping(z, g_v, 97, 121))) goto lab6;
+            break;
+        lab6:
+            if (z->c >= z->l) goto lab0;
+            z->c++;
+        }
+        z->I[1] = z->c; /* setmark p2, line 38 */
+    lab0:
+        z->c = c;
+    }
+    return 1;
+}
+
+static int r_shortv(struct SN_env * z) {
+    {   int m = z->l - z->c; /* or, line 46 */
+        if (!(out_grouping_b(z, g_v_WXY, 89, 121))) goto lab1;
+        if (!(in_grouping_b(z, g_v, 97, 121))) goto lab1;
+        if (!(out_grouping_b(z, g_v, 97, 121))) goto lab1;
+        goto lab0;
+    lab1:
+        z->c = z->l - m;
+        if (!(out_grouping_b(z, g_v, 97, 121))) return 0;
+        if (!(in_grouping_b(z, g_v, 97, 121))) return 0;
+        if (z->c > z->lb) return 0; /* atlimit, line 47 */
+    }
+lab0:
+    return 1;
+}
+
+static int r_R1(struct SN_env * z) {
+    if (!(z->I[0] <= z->c)) return 0;
+    return 1;
+}
+
+static int r_R2(struct SN_env * z) {
+    if (!(z->I[1] <= z->c)) return 0;
+    return 1;
+}
+
+static int r_Step_1a(struct SN_env * z) {
+    int among_var;
+    z->ket = z->c; /* [, line 54 */
+    among_var = find_among_b(z, a_1, 6); /* substring, line 54 */
+    if (!(among_var)) return 0;
+    z->bra = z->c; /* ], line 54 */
+    switch(among_var) {
+        case 0: return 0;
+        case 1:
+            slice_from_s(z, 2, s_4); /* <-, line 55 */
+            break;
+        case 2:
+            {   int m = z->l - z->c; /* or, line 57 */
+                if (z->c <= z->lb) goto lab1;
+                z->c--; /* next, line 57 */
+                if (z->c > z->lb) goto lab1; /* atlimit, line 57 */
+                slice_from_s(z, 2, s_5); /* <-, line 57 */
+                goto lab0;
+            lab1:
+                z->c = z->l - m;
+                slice_from_s(z, 1, s_6); /* <-, line 57 */
+            }
+        lab0:
+            break;
+        case 3:
+            if (z->c <= z->lb) return 0;
+            z->c--; /* next, line 58 */
+            while(1) { /* gopast, line 58 */
+                if (!(in_grouping_b(z, g_v, 97, 121))) goto lab2;
+                break;
+            lab2:
+                if (z->c <= z->lb) return 0;
+                z->c--;
+            }
+            slice_del(z); /* delete, line 58 */
+            break;
+    }
+    return 1;
+}
+
+static int r_Step_1b(struct SN_env * z) {
+    int among_var;
+    z->ket = z->c; /* [, line 64 */
+    among_var = find_among_b(z, a_3, 6); /* substring, line 64 */
+    if (!(among_var)) return 0;
+    z->bra = z->c; /* ], line 64 */
+    switch(among_var) {
+        case 0: return 0;
+        case 1:
+            if (!r_R1(z)) return 0; /* call R1, line 66 */
+            slice_from_s(z, 2, s_7); /* <-, line 66 */
+            break;
+        case 2:
+            {   int m_test = z->l - z->c; /* test, line 69 */
+                while(1) { /* gopast, line 69 */
+                    if (!(in_grouping_b(z, g_v, 97, 121))) goto lab0;
+                    break;
+                lab0:
+                    if (z->c <= z->lb) return 0;
+                    z->c--;
+                }
+                z->c = z->l - m_test;
+            }
+            slice_del(z); /* delete, line 69 */
+            {   int m_test = z->l - z->c; /* test, line 70 */
+                among_var = find_among_b(z, a_2, 13); /* substring, line 70 */
+                if (!(among_var)) return 0;
+                z->c = z->l - m_test;
+            }
+            switch(among_var) {
+                case 0: return 0;
+                case 1:
+                    {   int c = z->c;
+                        insert_s(z, z->c, z->c, 1, s_8); /* <+, line 72 */
+                        z->c = c;
+                    }
+                    break;
+                case 2:
+                    z->ket = z->c; /* [, line 75 */
+                    if (z->c <= z->lb) return 0;
+                    z->c--; /* next, line 75 */
+                    z->bra = z->c; /* ], line 75 */
+                    slice_del(z); /* delete, line 75 */
+                    break;
+                case 3:
+                    if (z->c != z->I[0]) return 0; /* atmark, line 76 */
+                    {   int m_test = z->l - z->c; /* test, line 76 */
+                        if (!r_shortv(z)) return 0; /* call shortv, line 76 */
+                        z->c = z->l - m_test;
+                    }
+                    {   int c = z->c;
+                        insert_s(z, z->c, z->c, 1, s_9); /* <+, line 76 */
+                        z->c = c;
+                    }
+                    break;
+            }
+            break;
+    }
+    return 1;
+}
+
+static int r_Step_1c(struct SN_env * z) {
+    z->ket = z->c; /* [, line 83 */
+    {   int m = z->l - z->c; /* or, line 83 */
+        if (!(eq_s_b(z, 1, s_10))) goto lab1;
+        goto lab0;
+    lab1:
+        z->c = z->l - m;
+        if (!(eq_s_b(z, 1, s_11))) return 0;
+    }
+lab0:
+    z->bra = z->c; /* ], line 83 */
+    if (!(out_grouping_b(z, g_v, 97, 121))) return 0;
+    {   int m = z->l - z->c; /* not, line 84 */
+        if (z->c > z->lb) goto lab2; /* atlimit, line 84 */
+        return 0;
+    lab2:
+        z->c = z->l - m;
+    }
+    slice_from_s(z, 1, s_12); /* <-, line 85 */
+    return 1;
+}
+
+static int r_Step_2(struct SN_env * z) {
+    int among_var;
+    z->ket = z->c; /* [, line 89 */
+    among_var = find_among_b(z, a_4, 24); /* substring, line 89 */
+    if (!(among_var)) return 0;
+    z->bra = z->c; /* ], line 89 */
+    if (!r_R1(z)) return 0; /* call R1, line 89 */
+    switch(among_var) {
+        case 0: return 0;
+        case 1:
+            slice_from_s(z, 4, s_13); /* <-, line 90 */
+            break;
+        case 2:
+            slice_from_s(z, 4, s_14); /* <-, line 91 */
+            break;
+        case 3:
+            slice_from_s(z, 4, s_15); /* <-, line 92 */
+            break;
+        case 4:
+            slice_from_s(z, 4, s_16); /* <-, line 93 */
+            break;
+        case 5:
+            slice_from_s(z, 3, s_17); /* <-, line 94 */
+            break;
+        case 6:
+            slice_from_s(z, 3, s_18); /* <-, line 96 */
+            break;
+        case 7:
+            slice_from_s(z, 3, s_19); /* <-, line 98 */
+            break;
+        case 8:
+            slice_from_s(z, 2, s_20); /* <-, line 100 */
+            break;
+        case 9:
+            slice_from_s(z, 3, s_21); /* <-, line 101 */
+            break;
+        case 10:
+            slice_from_s(z, 3, s_22); /* <-, line 103 */
+            break;
+        case 11:
+            slice_from_s(z, 3, s_23); /* <-, line 105 */
+            break;
+        case 12:
+            slice_from_s(z, 3, s_24); /* <-, line 107 */
+            break;
+        case 13:
+            if (!(eq_s_b(z, 1, s_25))) return 0;
+            slice_from_s(z, 2, s_26); /* <-, line 108 */
+            break;
+        case 14:
+            slice_from_s(z, 3, s_27); /* <-, line 109 */
+            break;
+        case 15:
+            slice_from_s(z, 4, s_28); /* <-, line 110 */
+            break;
+        case 16:
+            if (!(in_grouping_b(z, g_valid_LI, 99, 116))) return 0;
+            slice_del(z); /* delete, line 111 */
+            break;
+    }
+    return 1;
+}
+
+static int r_Step_3(struct SN_env * z) {
+    int among_var;
+    z->ket = z->c; /* [, line 116 */
+    among_var = find_among_b(z, a_5, 9); /* substring, line 116 */
+    if (!(among_var)) return 0;
+    z->bra = z->c; /* ], line 116 */
+    if (!r_R1(z)) return 0; /* call R1, line 116 */
+    switch(among_var) {
+        case 0: return 0;
+        case 1:
+            slice_from_s(z, 4, s_29); /* <-, line 117 */
+            break;
+        case 2:
+            slice_from_s(z, 3, s_30); /* <-, line 118 */
+            break;
+        case 3:
+            slice_from_s(z, 2, s_31); /* <-, line 119 */
+            break;
+        case 4:
+            slice_from_s(z, 2, s_32); /* <-, line 121 */
+            break;
+        case 5:
+            slice_del(z); /* delete, line 123 */
+            break;
+        case 6:
+            if (!r_R2(z)) return 0; /* call R2, line 125 */
+            slice_del(z); /* delete, line 125 */
+            break;
+    }
+    return 1;
+}
+
+static int r_Step_4(struct SN_env * z) {
+    int among_var;
+    z->ket = z->c; /* [, line 130 */
+    among_var = find_among_b(z, a_6, 18); /* substring, line 130 */
+    if (!(among_var)) return 0;
+    z->bra = z->c; /* ], line 130 */
+    if (!r_R2(z)) return 0; /* call R2, line 130 */
+    switch(among_var) {
+        case 0: return 0;
+        case 1:
+            slice_del(z); /* delete, line 133 */
+            break;
+        case 2:
+            {   int m = z->l - z->c; /* or, line 134 */
+                if (!(eq_s_b(z, 1, s_33))) goto lab1;
+                goto lab0;
+            lab1:
+                z->c = z->l - m;
+                if (!(eq_s_b(z, 1, s_34))) return 0;
+            }
+        lab0:
+            slice_del(z); /* delete, line 134 */
+            break;
+    }
+    return 1;
+}
+
+static int r_Step_5(struct SN_env * z) {
+    int among_var;
+    z->ket = z->c; /* [, line 139 */
+    among_var = find_among_b(z, a_7, 2); /* substring, line 139 */
+    if (!(among_var)) return 0;
+    z->bra = z->c; /* ], line 139 */
+    switch(among_var) {
+        case 0: return 0;
+        case 1:
+            {   int m = z->l - z->c; /* or, line 140 */
+                if (!r_R2(z)) goto lab1; /* call R2, line 140 */
+                goto lab0;
+            lab1:
+                z->c = z->l - m;
+                if (!r_R1(z)) return 0; /* call R1, line 140 */
+                {   int m = z->l - z->c; /* not, line 140 */
+                    if (!r_shortv(z)) goto lab2; /* call shortv, line 140 */
+                    return 0;
+                lab2:
+                    z->c = z->l - m;
+                }
+            }
+        lab0:
+            slice_del(z); /* delete, line 140 */
+            break;
+        case 2:
+            if (!r_R2(z)) return 0; /* call R2, line 141 */
+            if (!(eq_s_b(z, 1, s_35))) return 0;
+            slice_del(z); /* delete, line 141 */
+            break;
+    }
+    return 1;
+}
+
+static int r_exception2(struct SN_env * z) {
+    z->ket = z->c; /* [, line 147 */
+    if (!(find_among_b(z, a_8, 8))) return 0; /* substring, line 147 */
+    z->bra = z->c; /* ], line 147 */
+    if (z->c > z->lb) return 0; /* atlimit, line 147 */
+    return 1;
+}
+
+static int r_exception1(struct SN_env * z) {
+    int among_var;
+    z->bra = z->c; /* [, line 159 */
+    among_var = find_among(z, a_9, 18); /* substring, line 159 */
+    if (!(among_var)) return 0;
+    z->ket = z->c; /* ], line 159 */
+    if (z->c < z->l) return 0; /* atlimit, line 159 */
+    switch(among_var) {
+        case 0: return 0;
+        case 1:
+            slice_from_s(z, 3, s_36); /* <-, line 163 */
+            break;
+        case 2:
+            slice_from_s(z, 3, s_37); /* <-, line 164 */
+            break;
+        case 3:
+            slice_from_s(z, 3, s_38); /* <-, line 165 */
+            break;
+        case 4:
+            slice_from_s(z, 3, s_39); /* <-, line 166 */
+            break;
+        case 5:
+            slice_from_s(z, 3, s_40); /* <-, line 167 */
+            break;
+        case 6:
+            slice_from_s(z, 3, s_41); /* <-, line 171 */
+            break;
+        case 7:
+            slice_from_s(z, 5, s_42); /* <-, line 172 */
+            break;
+        case 8:
+            slice_from_s(z, 4, s_43); /* <-, line 173 */
+            break;
+        case 9:
+            slice_from_s(z, 5, s_44); /* <-, line 174 */
+            break;
+        case 10:
+            slice_from_s(z, 4, s_45); /* <-, line 175 */
+            break;
+        case 11:
+            slice_from_s(z, 5, s_46); /* <-, line 176 */
+            break;
+    }
+    return 1;
+}
+
+static int r_postlude(struct SN_env * z) {
+    if (!(z->B[0])) return 0; /* Boolean test Y_found, line 192 */
+    while(1) { /* repeat, line 192 */
+        int c = z->c;
+        while(1) { /* goto, line 192 */
+            int c = z->c;
+            z->bra = z->c; /* [, line 192 */
+            if (!(eq_s(z, 1, s_47))) goto lab1;
+            z->ket = z->c; /* ], line 192 */
+            z->c = c;
+            break;
+        lab1:
+            z->c = c;
+            if (z->c >= z->l) goto lab0;
+            z->c++;
+        }
+        slice_from_s(z, 1, s_48); /* <-, line 192 */
+        continue;
+    lab0:
+        z->c = c;
+        break;
+    }
+    return 1;
+}
+
+extern int english_stem(struct SN_env * z) {
+    {   int c = z->c; /* or, line 196 */
+        if (!r_exception1(z)) goto lab1; /* call exception1, line 196 */
+        goto lab0;
+    lab1:
+        z->c = c;
+        {   int c_test = z->c; /* test, line 198 */
+            {   int c = z->c + 3;
+                if (0 > c || c > z->l) return 0;
+                z->c = c; /* hop, line 198 */
+            }
+            z->c = c_test;
+        }
+        {   int c = z->c; /* do, line 199 */
+            if (!r_prelude(z)) goto lab2; /* call prelude, line 199 */
+        lab2:
+            z->c = c;
+        }
+        {   int c = z->c; /* do, line 200 */
+            if (!r_mark_regions(z)) goto lab3; /* call mark_regions, line 200 */
+        lab3:
+            z->c = c;
+        }
+        z->lb = z->c; z->c = z->l; /* backwards, line 201 */
+
+        {   int m = z->l - z->c; /* do, line 203 */
+            if (!r_Step_1a(z)) goto lab4; /* call Step_1a, line 203 */
+        lab4:
+            z->c = z->l - m;
+        }
+        {   int m = z->l - z->c; /* or, line 205 */
+            if (!r_exception2(z)) goto lab6; /* call exception2, line 205 */
+            goto lab5;
+        lab6:
+            z->c = z->l - m;
+            {   int m = z->l - z->c; /* do, line 207 */
+                if (!r_Step_1b(z)) goto lab7; /* call Step_1b, line 207 */
+            lab7:
+                z->c = z->l - m;
+            }
+            {   int m = z->l - z->c; /* do, line 208 */
+                if (!r_Step_1c(z)) goto lab8; /* call Step_1c, line 208 */
+            lab8:
+                z->c = z->l - m;
+            }
+            {   int m = z->l - z->c; /* do, line 210 */
+                if (!r_Step_2(z)) goto lab9; /* call Step_2, line 210 */
+            lab9:
+                z->c = z->l - m;
+            }
+            {   int m = z->l - z->c; /* do, line 211 */
+                if (!r_Step_3(z)) goto lab10; /* call Step_3, line 211 */
+            lab10:
+                z->c = z->l - m;
+            }
+            {   int m = z->l - z->c; /* do, line 212 */
+                if (!r_Step_4(z)) goto lab11; /* call Step_4, line 212 */
+            lab11:
+                z->c = z->l - m;
+            }
+            {   int m = z->l - z->c; /* do, line 214 */
+                if (!r_Step_5(z)) goto lab12; /* call Step_5, line 214 */
+            lab12:
+                z->c = z->l - m;
+            }
+        }
+    lab5:
+        z->c = z->lb;
+        {   int c = z->c; /* do, line 217 */
+            if (!r_postlude(z)) goto lab13; /* call postlude, line 217 */
+        lab13:
+            z->c = c;
+        }
+    }
+lab0:
+    return 1;
+}
+
+extern struct SN_env * english_create_env(void) { return SN_create_env(0, 2, 1); }
+
+extern void english_close_env(struct SN_env * z) { SN_close_env(z); }
+


diff --git a/contrib/tsearch2/snowball/english_stem.h b/contrib/tsearch2/snowball/english_stem.h

new file mode 100644 (file)

index 0000000..bfefcd5


--- /dev/null
+++ b/contrib/tsearch2/snowball/english_stem.h
@@ -0,0 +1,8 @@
+
+/* This file was generated automatically by the Snowball to ANSI C compiler */
+
+extern struct SN_env * english_create_env(void);
+extern void english_close_env(struct SN_env * z);
+
+extern int english_stem(struct SN_env * z);
+


diff --git a/contrib/tsearch2/snowball/header.h b/contrib/tsearch2/snowball/header.h

new file mode 100644 (file)

index 0000000..aaec3ae


--- /dev/null
+++ b/contrib/tsearch2/snowball/header.h
@@ -0,0 +1,57 @@
+
+#include 
+
+#include "api.h"
+
+#define MAXINT INT_MAX
+#define MININT INT_MIN
+
+#define HEAD 2*sizeof(int)
+
+#define SIZE(p)        ((int *)(p))[-1]
+#define SET_SIZE(p, n) ((int *)(p))[-1] = n
+#define CAPACITY(p)    ((int *)(p))[-2]
+
+struct among
+{   int s_size;     /* number of chars in string */
+    symbol * s;       /* search string */
+    int substring_i;/* index to longest matching substring */
+    int result;     /* result of the lookup */
+    int (* function)(struct SN_env *);
+};
+
+extern symbol * create_s(void);
+extern void lose_s(symbol * p);
+
+extern int in_grouping(struct SN_env * z, unsigned char * s, int min, int max);
+extern int in_grouping_b(struct SN_env * z, unsigned char * s, int min, int max);
+extern int out_grouping(struct SN_env * z, unsigned char * s, int min, int max);
+extern int out_grouping_b(struct SN_env * z, unsigned char * s, int min, int max);
+
+extern int in_range(struct SN_env * z, int min, int max);
+extern int in_range_b(struct SN_env * z, int min, int max);
+extern int out_range(struct SN_env * z, int min, int max);
+extern int out_range_b(struct SN_env * z, int min, int max);
+
+extern int eq_s(struct SN_env * z, int s_size, symbol * s);
+extern int eq_s_b(struct SN_env * z, int s_size, symbol * s);
+extern int eq_v(struct SN_env * z, symbol * p);
+extern int eq_v_b(struct SN_env * z, symbol * p);
+
+extern int find_among(struct SN_env * z, struct among * v, int v_size);
+extern int find_among_b(struct SN_env * z, struct among * v, int v_size);
+
+extern symbol * increase_size(symbol * p, int n);
+extern int replace_s(struct SN_env * z, int c_bra, int c_ket, int s_size, const symbol * s);
+extern void slice_from_s(struct SN_env * z, int s_size, symbol * s);
+extern void slice_from_v(struct SN_env * z, symbol * p);
+extern void slice_del(struct SN_env * z);
+
+extern void insert_s(struct SN_env * z, int bra, int ket, int s_size, symbol * s);
+extern void insert_v(struct SN_env * z, int bra, int ket, symbol * p);
+
+extern symbol * slice_to(struct SN_env * z, symbol * p);
+extern symbol * assign_to(struct SN_env * z, symbol * p);
+
+extern void debug(struct SN_env * z, int number, int line_count);
+


diff --git a/contrib/tsearch2/snowball/russian_stem.c b/contrib/tsearch2/snowball/russian_stem.c

new file mode 100644 (file)

index 0000000..14fd491


--- /dev/null
+++ b/contrib/tsearch2/snowball/russian_stem.c
@@ -0,0 +1,626 @@
+
+/* This file was generated automatically by the Snowball to ANSI C compiler */
+
+#include "header.h"
+
+extern int russian_stem(struct SN_env * z);
+static int r_tidy_up(struct SN_env * z);
+static int r_derivational(struct SN_env * z);
+static int r_noun(struct SN_env * z);
+static int r_verb(struct SN_env * z);
+static int r_reflexive(struct SN_env * z);
+static int r_adjectival(struct SN_env * z);
+static int r_adjective(struct SN_env * z);
+static int r_perfective_gerund(struct SN_env * z);
+static int r_R2(struct SN_env * z);
+static int r_mark_regions(struct SN_env * z);
+
+extern struct SN_env * russian_create_env(void);
+extern void russian_close_env(struct SN_env * z);
+
+static symbol s_0_0[3] = { 215, 219, 201 };
+static symbol s_0_1[4] = { 201, 215, 219, 201 };
+static symbol s_0_2[4] = { 217, 215, 219, 201 };
+static symbol s_0_3[1] = { 215 };
+static symbol s_0_4[2] = { 201, 215 };
+static symbol s_0_5[2] = { 217, 215 };
+static symbol s_0_6[5] = { 215, 219, 201, 211, 216 };
+static symbol s_0_7[6] = { 201, 215, 219, 201, 211, 216 };
+static symbol s_0_8[6] = { 217, 215, 219, 201, 211, 216 };
+
+static struct among a_0[9] =
+{
+/*  0 */ { 3, s_0_0, -1, 1, 0},
+/*  1 */ { 4, s_0_1, 0, 2, 0},
+/*  2 */ { 4, s_0_2, 0, 2, 0},
+/*  3 */ { 1, s_0_3, -1, 1, 0},
+/*  4 */ { 2, s_0_4, 3, 2, 0},
+/*  5 */ { 2, s_0_5, 3, 2, 0},
+/*  6 */ { 5, s_0_6, -1, 1, 0},
+/*  7 */ { 6, s_0_7, 6, 2, 0},
+/*  8 */ { 6, s_0_8, 6, 2, 0}
+};
+
+static symbol s_1_0[2] = { 192, 192 };
+static symbol s_1_1[2] = { 197, 192 };
+static symbol s_1_2[2] = { 207, 192 };
+static symbol s_1_3[2] = { 213, 192 };
+static symbol s_1_4[2] = { 197, 197 };
+static symbol s_1_5[2] = { 201, 197 };
+static symbol s_1_6[2] = { 207, 197 };
+static symbol s_1_7[2] = { 217, 197 };
+static symbol s_1_8[2] = { 201, 200 };
+static symbol s_1_9[2] = { 217, 200 };
+static symbol s_1_10[3] = { 201, 205, 201 };
+static symbol s_1_11[3] = { 217, 205, 201 };
+static symbol s_1_12[2] = { 197, 202 };
+static symbol s_1_13[2] = { 201, 202 };
+static symbol s_1_14[2] = { 207, 202 };
+static symbol s_1_15[2] = { 217, 202 };
+static symbol s_1_16[2] = { 197, 205 };
+static symbol s_1_17[2] = { 201, 205 };
+static symbol s_1_18[2] = { 207, 205 };
+static symbol s_1_19[2] = { 217, 205 };
+static symbol s_1_20[3] = { 197, 199, 207 };
+static symbol s_1_21[3] = { 207, 199, 207 };
+static symbol s_1_22[2] = { 193, 209 };
+static symbol s_1_23[2] = { 209, 209 };
+static symbol s_1_24[3] = { 197, 205, 213 };
+static symbol s_1_25[3] = { 207, 205, 213 };
+
+static struct among a_1[26] =
+{
+/*  0 */ { 2, s_1_0, -1, 1, 0},
+/*  1 */ { 2, s_1_1, -1, 1, 0},
+/*  2 */ { 2, s_1_2, -1, 1, 0},
+/*  3 */ { 2, s_1_3, -1, 1, 0},
+/*  4 */ { 2, s_1_4, -1, 1, 0},
+/*  5 */ { 2, s_1_5, -1, 1, 0},
+/*  6 */ { 2, s_1_6, -1, 1, 0},
+/*  7 */ { 2, s_1_7, -1, 1, 0},
+/*  8 */ { 2, s_1_8, -1, 1, 0},
+/*  9 */ { 2, s_1_9, -1, 1, 0},
+/* 10 */ { 3, s_1_10, -1, 1, 0},
+/* 11 */ { 3, s_1_11, -1, 1, 0},
+/* 12 */ { 2, s_1_12, -1, 1, 0},
+/* 13 */ { 2, s_1_13, -1, 1, 0},
+/* 14 */ { 2, s_1_14, -1, 1, 0},
+/* 15 */ { 2, s_1_15, -1, 1, 0},
+/* 16 */ { 2, s_1_16, -1, 1, 0},
+/* 17 */ { 2, s_1_17, -1, 1, 0},
+/* 18 */ { 2, s_1_18, -1, 1, 0},
+/* 19 */ { 2, s_1_19, -1, 1, 0},
+/* 20 */ { 3, s_1_20, -1, 1, 0},
+/* 21 */ { 3, s_1_21, -1, 1, 0},
+/* 22 */ { 2, s_1_22, -1, 1, 0},
+/* 23 */ { 2, s_1_23, -1, 1, 0},
+/* 24 */ { 3, s_1_24, -1, 1, 0},
+/* 25 */ { 3, s_1_25, -1, 1, 0}
+};
+
+static symbol s_2_0[2] = { 197, 205 };
+static symbol s_2_1[2] = { 206, 206 };
+static symbol s_2_2[2] = { 215, 219 };
+static symbol s_2_3[3] = { 201, 215, 219 };
+static symbol s_2_4[3] = { 217, 215, 219 };
+static symbol s_2_5[1] = { 221 };
+static symbol s_2_6[2] = { 192, 221 };
+static symbol s_2_7[3] = { 213, 192, 221 };
+
+static struct among a_2[8] =
+{
+/*  0 */ { 2, s_2_0, -1, 1, 0},
+/*  1 */ { 2, s_2_1, -1, 1, 0},
+/*  2 */ { 2, s_2_2, -1, 1, 0},
+/*  3 */ { 3, s_2_3, 2, 2, 0},
+/*  4 */ { 3, s_2_4, 2, 2, 0},
+/*  5 */ { 1, s_2_5, -1, 1, 0},
+/*  6 */ { 2, s_2_6, 5, 1, 0},
+/*  7 */ { 3, s_2_7, 6, 2, 0}
+};
+
+static symbol s_3_0[2] = { 211, 209 };
+static symbol s_3_1[2] = { 211, 216 };
+
+static struct among a_3[2] =
+{
+/*  0 */ { 2, s_3_0, -1, 1, 0},
+/*  1 */ { 2, s_3_1, -1, 1, 0}
+};
+
+static symbol s_4_0[1] = { 192 };
+static symbol s_4_1[2] = { 213, 192 };
+static symbol s_4_2[2] = { 204, 193 };
+static symbol s_4_3[3] = { 201, 204, 193 };
+static symbol s_4_4[3] = { 217, 204, 193 };
+static symbol s_4_5[2] = { 206, 193 };
+static symbol s_4_6[3] = { 197, 206, 193 };
+static symbol s_4_7[3] = { 197, 212, 197 };
+static symbol s_4_8[3] = { 201, 212, 197 };
+static symbol s_4_9[3] = { 202, 212, 197 };
+static symbol s_4_10[4] = { 197, 202, 212, 197 };
+static symbol s_4_11[4] = { 213, 202, 212, 197 };
+static symbol s_4_12[2] = { 204, 201 };
+static symbol s_4_13[3] = { 201, 204, 201 };
+static symbol s_4_14[3] = { 217, 204, 201 };
+static symbol s_4_15[1] = { 202 };
+static symbol s_4_16[2] = { 197, 202 };
+static symbol s_4_17[2] = { 213, 202 };
+static symbol s_4_18[1] = { 204 };
+static symbol s_4_19[2] = { 201, 204 };
+static symbol s_4_20[2] = { 217, 204 };
+static symbol s_4_21[2] = { 197, 205 };
+static symbol s_4_22[2] = { 201, 205 };
+static symbol s_4_23[2] = { 217, 205 };
+static symbol s_4_24[1] = { 206 };
+static symbol s_4_25[2] = { 197, 206 };
+static symbol s_4_26[2] = { 204, 207 };
+static symbol s_4_27[3] = { 201, 204, 207 };
+static symbol s_4_28[3] = { 217, 204, 207 };
+static symbol s_4_29[2] = { 206, 207 };
+static symbol s_4_30[3] = { 197, 206, 207 };
+static symbol s_4_31[3] = { 206, 206, 207 };
+static symbol s_4_32[2] = { 192, 212 };
+static symbol s_4_33[3] = { 213, 192, 212 };
+static symbol s_4_34[2] = { 197, 212 };
+static symbol s_4_35[3] = { 213, 197, 212 };
+static symbol s_4_36[2] = { 201, 212 };
+static symbol s_4_37[2] = { 209, 212 };
+static symbol s_4_38[2] = { 217, 212 };
+static symbol s_4_39[2] = { 212, 216 };
+static symbol s_4_40[3] = { 201, 212, 216 };
+static symbol s_4_41[3] = { 217, 212, 216 };
+static symbol s_4_42[3] = { 197, 219, 216 };
+static symbol s_4_43[3] = { 201, 219, 216 };
+static symbol s_4_44[2] = { 206, 217 };
+static symbol s_4_45[3] = { 197, 206, 217 };
+
+static struct among a_4[46] =
+{
+/*  0 */ { 1, s_4_0, -1, 2, 0},
+/*  1 */ { 2, s_4_1, 0, 2, 0},
+/*  2 */ { 2, s_4_2, -1, 1, 0},
+/*  3 */ { 3, s_4_3, 2, 2, 0},
+/*  4 */ { 3, s_4_4, 2, 2, 0},
+/*  5 */ { 2, s_4_5, -1, 1, 0},
+/*  6 */ { 3, s_4_6, 5, 2, 0},
+/*  7 */ { 3, s_4_7, -1, 1, 0},
+/*  8 */ { 3, s_4_8, -1, 2, 0},
+/*  9 */ { 3, s_4_9, -1, 1, 0},
+/* 10 */ { 4, s_4_10, 9, 2, 0},
+/* 11 */ { 4, s_4_11, 9, 2, 0},
+/* 12 */ { 2, s_4_12, -1, 1, 0},
+/* 13 */ { 3, s_4_13, 12, 2, 0},
+/* 14 */ { 3, s_4_14, 12, 2, 0},
+/* 15 */ { 1, s_4_15, -1, 1, 0},
+/* 16 */ { 2, s_4_16, 15, 2, 0},
+/* 17 */ { 2, s_4_17, 15, 2, 0},
+/* 18 */ { 1, s_4_18, -1, 1, 0},
+/* 19 */ { 2, s_4_19, 18, 2, 0},
+/* 20 */ { 2, s_4_20, 18, 2, 0},
+/* 21 */ { 2, s_4_21, -1, 1, 0},
+/* 22 */ { 2, s_4_22, -1, 2, 0},
+/* 23 */ { 2, s_4_23, -1, 2, 0},
+/* 24 */ { 1, s_4_24, -1, 1, 0},
+/* 25 */ { 2, s_4_25, 24, 2, 0},
+/* 26 */ { 2, s_4_26, -1, 1, 0},
+/* 27 */ { 3, s_4_27, 26, 2, 0},
+/* 28 */ { 3, s_4_28, 26, 2, 0},
+/* 29 */ { 2, s_4_29, -1, 1, 0},
+/* 30 */ { 3, s_4_30, 29, 2, 0},
+/* 31 */ { 3, s_4_31, 29, 1, 0},
+/* 32 */ { 2, s_4_32, -1, 1, 0},
+/* 33 */ { 3, s_4_33, 32, 2, 0},
+/* 34 */ { 2, s_4_34, -1, 1, 0},
+/* 35 */ { 3, s_4_35, 34, 2, 0},
+/* 36 */ { 2, s_4_36, -1, 2, 0},
+/* 37 */ { 2, s_4_37, -1, 2, 0},
+/* 38 */ { 2, s_4_38, -1, 2, 0},
+/* 39 */ { 2, s_4_39, -1, 1, 0},
+/* 40 */ { 3, s_4_40, 39, 2, 0},
+/* 41 */ { 3, s_4_41, 39, 2, 0},
+/* 42 */ { 3, s_4_42, -1, 1, 0},
+/* 43 */ { 3, s_4_43, -1, 2, 0},
+/* 44 */ { 2, s_4_44, -1, 1, 0},
+/* 45 */ { 3, s_4_45, 44, 2, 0}
+};
+
+static symbol s_5_0[1] = { 192 };
+static symbol s_5_1[2] = { 201, 192 };
+static symbol s_5_2[2] = { 216, 192 };
+static symbol s_5_3[1] = { 193 };
+static symbol s_5_4[1] = { 197 };
+static symbol s_5_5[2] = { 201, 197 };
+static symbol s_5_6[2] = { 216, 197 };
+static symbol s_5_7[2] = { 193, 200 };
+static symbol s_5_8[2] = { 209, 200 };
+static symbol s_5_9[3] = { 201, 209, 200 };
+static symbol s_5_10[1] = { 201 };
+static symbol s_5_11[2] = { 197, 201 };
+static symbol s_5_12[2] = { 201, 201 };
+static symbol s_5_13[3] = { 193, 205, 201 };
+static symbol s_5_14[3] = { 209, 205, 201 };
+static symbol s_5_15[4] = { 201, 209, 205, 201 };
+static symbol s_5_16[1] = { 202 };
+static symbol s_5_17[2] = { 197, 202 };
+static symbol s_5_18[3] = { 201, 197, 202 };
+static symbol s_5_19[2] = { 201, 202 };
+static symbol s_5_20[2] = { 207, 202 };
+static symbol s_5_21[2] = { 193, 205 };
+static symbol s_5_22[2] = { 197, 205 };
+static symbol s_5_23[3] = { 201, 197, 205 };
+static symbol s_5_24[2] = { 207, 205 };
+static symbol s_5_25[2] = { 209, 205 };
+static symbol s_5_26[3] = { 201, 209, 205 };
+static symbol s_5_27[1] = { 207 };
+static symbol s_5_28[1] = { 209 };
+static symbol s_5_29[2] = { 201, 209 };
+static symbol s_5_30[2] = { 216, 209 };
+static symbol s_5_31[1] = { 213 };
+static symbol s_5_32[2] = { 197, 215 };
+static symbol s_5_33[2] = { 207, 215 };
+static symbol s_5_34[1] = { 216 };
+static symbol s_5_35[1] = { 217 };
+
+static struct among a_5[36] =
+{
+/*  0 */ { 1, s_5_0, -1, 1, 0},
+/*  1 */ { 2, s_5_1, 0, 1, 0},
+/*  2 */ { 2, s_5_2, 0, 1, 0},
+/*  3 */ { 1, s_5_3, -1, 1, 0},
+/*  4 */ { 1, s_5_4, -1, 1, 0},
+/*  5 */ { 2, s_5_5, 4, 1, 0},
+/*  6 */ { 2, s_5_6, 4, 1, 0},
+/*  7 */ { 2, s_5_7, -1, 1, 0},
+/*  8 */ { 2, s_5_8, -1, 1, 0},
+/*  9 */ { 3, s_5_9, 8, 1, 0},
+/* 10 */ { 1, s_5_10, -1, 1, 0},
+/* 11 */ { 2, s_5_11, 10, 1, 0},
+/* 12 */ { 2, s_5_12, 10, 1, 0},
+/* 13 */ { 3, s_5_13, 10, 1, 0},
+/* 14 */ { 3, s_5_14, 10, 1, 0},
+/* 15 */ { 4, s_5_15, 14, 1, 0},
+/* 16 */ { 1, s_5_16, -1, 1, 0},
+/* 17 */ { 2, s_5_17, 16, 1, 0},
+/* 18 */ { 3, s_5_18, 17, 1, 0},
+/* 19 */ { 2, s_5_19, 16, 1, 0},
+/* 20 */ { 2, s_5_20, 16, 1, 0},
+/* 21 */ { 2, s_5_21, -1, 1, 0},
+/* 22 */ { 2, s_5_22, -1, 1, 0},
+/* 23 */ { 3, s_5_23, 22, 1, 0},
+/* 24 */ { 2, s_5_24, -1, 1, 0},
+/* 25 */ { 2, s_5_25, -1, 1, 0},
+/* 26 */ { 3, s_5_26, 25, 1, 0},
+/* 27 */ { 1, s_5_27, -1, 1, 0},
+/* 28 */ { 1, s_5_28, -1, 1, 0},
+/* 29 */ { 2, s_5_29, 28, 1, 0},
+/* 30 */ { 2, s_5_30, 28, 1, 0},
+/* 31 */ { 1, s_5_31, -1, 1, 0},
+/* 32 */ { 2, s_5_32, -1, 1, 0},
+/* 33 */ { 2, s_5_33, -1, 1, 0},
+/* 34 */ { 1, s_5_34, -1, 1, 0},
+/* 35 */ { 1, s_5_35, -1, 1, 0}
+};
+
+static symbol s_6_0[3] = { 207, 211, 212 };
+static symbol s_6_1[4] = { 207, 211, 212, 216 };
+
+static struct among a_6[2] =
+{
+/*  0 */ { 3, s_6_0, -1, 1, 0},
+/*  1 */ { 4, s_6_1, -1, 1, 0}
+};
+
+static symbol s_7_0[4] = { 197, 202, 219, 197 };
+static symbol s_7_1[1] = { 206 };
+static symbol s_7_2[1] = { 216 };
+static symbol s_7_3[3] = { 197, 202, 219 };
+
+static struct among a_7[4] =
+{
+/*  0 */ { 4, s_7_0, -1, 1, 0},
+/*  1 */ { 1, s_7_1, -1, 2, 0},
+/*  2 */ { 1, s_7_2, -1, 3, 0},
+/*  3 */ { 3, s_7_3, -1, 1, 0}
+};
+
+static unsigned char g_v[] = { 35, 130, 34, 18 };
+
+static symbol s_0[] = { 193 };
+static symbol s_1[] = { 209 };
+static symbol s_2[] = { 193 };
+static symbol s_3[] = { 209 };
+static symbol s_4[] = { 193 };
+static symbol s_5[] = { 209 };
+static symbol s_6[] = { 206 };
+static symbol s_7[] = { 206 };
+static symbol s_8[] = { 206 };
+static symbol s_9[] = { 201 };
+
+static int r_mark_regions(struct SN_env * z) {
+    z->I[0] = z->l;
+    z->I[1] = z->l;
+    {   int c = z->c; /* do, line 100 */
+        while(1) { /* gopast, line 101 */
+            if (!(in_grouping(z, g_v, 192, 220))) goto lab1;
+            break;
+        lab1:
+            if (z->c >= z->l) goto lab0;
+            z->c++;
+        }
+        z->I[0] = z->c; /* setmark pV, line 101 */
+        while(1) { /* gopast, line 101 */
+            if (!(out_grouping(z, g_v, 192, 220))) goto lab2;
+            break;
+        lab2:
+            if (z->c >= z->l) goto lab0;
+            z->c++;
+        }
+        while(1) { /* gopast, line 102 */
+            if (!(in_grouping(z, g_v, 192, 220))) goto lab3;
+            break;
+        lab3:
+            if (z->c >= z->l) goto lab0;
+            z->c++;
+        }
+        while(1) { /* gopast, line 102 */
+            if (!(out_grouping(z, g_v, 192, 220))) goto lab4;
+            break;
+        lab4:
+            if (z->c >= z->l) goto lab0;
+            z->c++;
+        }
+        z->I[1] = z->c; /* setmark p2, line 102 */
+    lab0:
+        z->c = c;
+    }
+    return 1;
+}
+
+static int r_R2(struct SN_env * z) {
+    if (!(z->I[1] <= z->c)) return 0;
+    return 1;
+}
+
+static int r_perfective_gerund(struct SN_env * z) {
+    int among_var;
+    z->ket = z->c; /* [, line 111 */
+    among_var = find_among_b(z, a_0, 9); /* substring, line 111 */
+    if (!(among_var)) return 0;
+    z->bra = z->c; /* ], line 111 */
+    switch(among_var) {
+        case 0: return 0;
+        case 1:
+            {   int m = z->l - z->c; /* or, line 115 */
+                if (!(eq_s_b(z, 1, s_0))) goto lab1;
+                goto lab0;
+            lab1:
+                z->c = z->l - m;
+                if (!(eq_s_b(z, 1, s_1))) return 0;
+            }
+        lab0:
+            slice_del(z); /* delete, line 115 */
+            break;
+        case 2:
+            slice_del(z); /* delete, line 122 */
+            break;
+    }
+    return 1;
+}
+
+static int r_adjective(struct SN_env * z) {
+    int among_var;
+    z->ket = z->c; /* [, line 127 */
+    among_var = find_among_b(z, a_1, 26); /* substring, line 127 */
+    if (!(among_var)) return 0;
+    z->bra = z->c; /* ], line 127 */
+    switch(among_var) {
+        case 0: return 0;
+        case 1:
+            slice_del(z); /* delete, line 136 */
+            break;
+    }
+    return 1;
+}
+
+static int r_adjectival(struct SN_env * z) {
+    int among_var;
+    if (!r_adjective(z)) return 0; /* call adjective, line 141 */
+    {   int m = z->l - z->c; /* try, line 148 */
+        z->ket = z->c; /* [, line 149 */
+        among_var = find_among_b(z, a_2, 8); /* substring, line 149 */
+        if (!(among_var)) { z->c = z->l - m; goto lab0; }
+        z->bra = z->c; /* ], line 149 */
+        switch(among_var) {
+            case 0: { z->c = z->l - m; goto lab0; }
+            case 1:
+                {   int m = z->l - z->c; /* or, line 154 */
+                    if (!(eq_s_b(z, 1, s_2))) goto lab2;
+                    goto lab1;
+                lab2:
+                    z->c = z->l - m;
+                    if (!(eq_s_b(z, 1, s_3))) { z->c = z->l - m; goto lab0; }
+                }
+            lab1:
+                slice_del(z); /* delete, line 154 */
+                break;
+            case 2:
+                slice_del(z); /* delete, line 161 */
+                break;
+        }
+    lab0:
+        ;
+    }
+    return 1;
+}
+
+static int r_reflexive(struct SN_env * z) {
+    int among_var;
+    z->ket = z->c; /* [, line 168 */
+    among_var = find_among_b(z, a_3, 2); /* substring, line 168 */
+    if (!(among_var)) return 0;
+    z->bra = z->c; /* ], line 168 */
+    switch(among_var) {
+        case 0: return 0;
+        case 1:
+            slice_del(z); /* delete, line 171 */
+            break;
+    }
+    return 1;
+}
+
+static int r_verb(struct SN_env * z) {
+    int among_var;
+    z->ket = z->c; /* [, line 176 */
+    among_var = find_among_b(z, a_4, 46); /* substring, line 176 */
+    if (!(among_var)) return 0;
+    z->bra = z->c; /* ], line 176 */
+    switch(among_var) {
+        case 0: return 0;
+        case 1:
+            {   int m = z->l - z->c; /* or, line 182 */
+                if (!(eq_s_b(z, 1, s_4))) goto lab1;
+                goto lab0;
+            lab1:
+                z->c = z->l - m;
+                if (!(eq_s_b(z, 1, s_5))) return 0;
+            }
+        lab0:
+            slice_del(z); /* delete, line 182 */
+            break;
+        case 2:
+            slice_del(z); /* delete, line 190 */
+            break;
+    }
+    return 1;
+}
+
+static int r_noun(struct SN_env * z) {
+    int among_var;
+    z->ket = z->c; /* [, line 199 */
+    among_var = find_among_b(z, a_5, 36); /* substring, line 199 */
+    if (!(among_var)) return 0;
+    z->bra = z->c; /* ], line 199 */
+    switch(among_var) {
+        case 0: return 0;
+        case 1:
+            slice_del(z); /* delete, line 206 */
+            break;
+    }
+    return 1;
+}
+
+static int r_derivational(struct SN_env * z) {
+    int among_var;
+    z->ket = z->c; /* [, line 215 */
+    among_var = find_among_b(z, a_6, 2); /* substring, line 215 */
+    if (!(among_var)) return 0;
+    z->bra = z->c; /* ], line 215 */
+    if (!r_R2(z)) return 0; /* call R2, line 215 */
+    switch(among_var) {
+        case 0: return 0;
+        case 1:
+            slice_del(z); /* delete, line 218 */
+            break;
+    }
+    return 1;
+}
+
+static int r_tidy_up(struct SN_env * z) {
+    int among_var;
+    z->ket = z->c; /* [, line 223 */
+    among_var = find_among_b(z, a_7, 4); /* substring, line 223 */
+    if (!(among_var)) return 0;
+    z->bra = z->c; /* ], line 223 */
+    switch(among_var) {
+        case 0: return 0;
+        case 1:
+            slice_del(z); /* delete, line 227 */
+            z->ket = z->c; /* [, line 228 */
+            if (!(eq_s_b(z, 1, s_6))) return 0;
+            z->bra = z->c; /* ], line 228 */
+            if (!(eq_s_b(z, 1, s_7))) return 0;
+            slice_del(z); /* delete, line 228 */
+            break;
+        case 2:
+            if (!(eq_s_b(z, 1, s_8))) return 0;
+            slice_del(z); /* delete, line 231 */
+            break;
+        case 3:
+            slice_del(z); /* delete, line 233 */
+            break;
+    }
+    return 1;
+}
+
+extern int russian_stem(struct SN_env * z) {
+    {   int c = z->c; /* do, line 240 */
+        if (!r_mark_regions(z)) goto lab0; /* call mark_regions, line 240 */
+    lab0:
+        z->c = c;
+    }
+    z->lb = z->c; z->c = z->l; /* backwards, line 241 */
+
+    {   int m = z->l - z->c; /* setlimit, line 241 */
+        int m3;
+        if (z->c < z->I[0]) return 0;
+        z->c = z->I[0]; /* tomark, line 241 */
+        m3 = z->lb; z->lb = z->c;
+        z->c = z->l - m;
+        {   int m = z->l - z->c; /* do, line 242 */
+            {   int m = z->l - z->c; /* or, line 243 */
+                if (!r_perfective_gerund(z)) goto lab3; /* call perfective_gerund, line 243 */
+                goto lab2;
+            lab3:
+                z->c = z->l - m;
+                {   int m = z->l - z->c; /* try, line 244 */
+                    if (!r_reflexive(z)) { z->c = z->l - m; goto lab4; } /* call reflexive, line 244 */
+                lab4:
+                    ;
+                }
+                {   int m = z->l - z->c; /* or, line 245 */
+                    if (!r_adjectival(z)) goto lab6; /* call adjectival, line 245 */
+                    goto lab5;
+                lab6:
+                    z->c = z->l - m;
+                    if (!r_verb(z)) goto lab7; /* call verb, line 245 */
+                    goto lab5;
+                lab7:
+                    z->c = z->l - m;
+                    if (!r_noun(z)) goto lab1; /* call noun, line 245 */
+                }
+            lab5:
+                ;
+            }
+        lab2:
+        lab1:
+            z->c = z->l - m;
+        }
+        {   int m = z->l - z->c; /* try, line 248 */
+            z->ket = z->c; /* [, line 248 */
+            if (!(eq_s_b(z, 1, s_9))) { z->c = z->l - m; goto lab8; }
+            z->bra = z->c; /* ], line 248 */
+            slice_del(z); /* delete, line 248 */
+        lab8:
+            ;
+        }
+        {   int m = z->l - z->c; /* do, line 251 */
+            if (!r_derivational(z)) goto lab9; /* call derivational, line 251 */
+        lab9:
+            z->c = z->l - m;
+        }
+        {   int m = z->l - z->c; /* do, line 252 */
+            if (!r_tidy_up(z)) goto lab10; /* call tidy_up, line 252 */
+        lab10:
+            z->c = z->l - m;
+        }
+        z->lb = m3;
+    }
+    z->c = z->lb;
+    return 1;
+}
+
+extern struct SN_env * russian_create_env(void) { return SN_create_env(0, 2, 0); }
+
+extern void russian_close_env(struct SN_env * z) { SN_close_env(z); }
+


diff --git a/contrib/tsearch2/snowball/russian_stem.h b/contrib/tsearch2/snowball/russian_stem.h

new file mode 100644 (file)

index 0000000..7dc26d4


--- /dev/null
+++ b/contrib/tsearch2/snowball/russian_stem.h
@@ -0,0 +1,8 @@
+
+/* This file was generated automatically by the Snowball to ANSI C compiler */
+
+extern struct SN_env * russian_create_env(void);
+extern void russian_close_env(struct SN_env * z);
+
+extern int russian_stem(struct SN_env * z);
+


diff --git a/contrib/tsearch2/snowball/utilities.c b/contrib/tsearch2/snowball/utilities.c

new file mode 100644 (file)

index 0000000..5dc7524


--- /dev/null
+++ b/contrib/tsearch2/snowball/utilities.c
@@ -0,0 +1,328 @@
+
+#include 
+#include 
+#include 
+
+#include "header.h"
+
+#define unless(C) if(!(C))
+
+#define CREATE_SIZE 1
+
+extern symbol * create_s(void)
+{   symbol * p = (symbol *) (HEAD + (char *) malloc(HEAD + (CREATE_SIZE + 1) * sizeof(symbol)));
+    CAPACITY(p) = CREATE_SIZE;
+    SET_SIZE(p, CREATE_SIZE);
+    return p;
+}
+
+extern void lose_s(symbol * p) { free((char *) p - HEAD); }
+
+extern int in_grouping(struct SN_env * z, unsigned char * s, int min, int max)
+{   if (z->c >= z->l) return 0;
+    {   int ch = z->p[z->c];
+        if
+        (ch > max || (ch -= min) < 0 ||
+         (s[ch >> 3] & (0X1 << (ch & 0X7))) == 0) return 0;
+    }
+    z->c++; return 1;
+}
+
+extern int in_grouping_b(struct SN_env * z, unsigned char * s, int min, int max)
+{   if (z->c <= z->lb) return 0;
+    {   int ch = z->p[z->c - 1];
+        if
+        (ch > max || (ch -= min) < 0 ||
+         (s[ch >> 3] & (0X1 << (ch & 0X7))) == 0) return 0;
+    }
+    z->c--; return 1;
+}
+
+extern int out_grouping(struct SN_env * z, unsigned char * s, int min, int max)
+{   if (z->c >= z->l) return 0;
+    {   int ch = z->p[z->c];
+        unless
+        (ch > max || (ch -= min) < 0 ||
+         (s[ch >> 3] & (0X1 << (ch & 0X7))) == 0) return 0;
+    }
+    z->c++; return 1;
+}
+
+extern int out_grouping_b(struct SN_env * z, unsigned char * s, int min, int max)
+{   if (z->c <= z->lb) return 0;
+    {   int ch = z->p[z->c - 1];
+        unless
+        (ch > max || (ch -= min) < 0 ||
+         (s[ch >> 3] & (0X1 << (ch & 0X7))) == 0) return 0;
+    }
+    z->c--; return 1;
+}
+
+
+extern int in_range(struct SN_env * z, int min, int max)
+{   if (z->c >= z->l) return 0;
+    {   int ch = z->p[z->c];
+        if
+        (ch > max || ch < min) return 0;
+    }
+    z->c++; return 1;
+}
+
+extern int in_range_b(struct SN_env * z, int min, int max)
+{   if (z->c <= z->lb) return 0;
+    {   int ch = z->p[z->c - 1];
+        if
+        (ch > max || ch < min) return 0;
+    }
+    z->c--; return 1;
+}
+
+extern int out_range(struct SN_env * z, int min, int max)
+{   if (z->c >= z->l) return 0;
+    {   int ch = z->p[z->c];
+        unless
+        (ch > max || ch < min) return 0;
+    }
+    z->c++; return 1;
+}
+
+extern int out_range_b(struct SN_env * z, int min, int max)
+{   if (z->c <= z->lb) return 0;
+    {   int ch = z->p[z->c - 1];
+        unless
+        (ch > max || ch < min) return 0;
+    }
+    z->c--; return 1;
+}
+
+extern int eq_s(struct SN_env * z, int s_size, symbol * s)
+{   if (z->l - z->c < s_size ||
+        memcmp(z->p + z->c, s, s_size * sizeof(symbol)) != 0) return 0;
+    z->c += s_size; return 1;
+}
+
+extern int eq_s_b(struct SN_env * z, int s_size, symbol * s)
+{   if (z->c - z->lb < s_size ||
+        memcmp(z->p + z->c - s_size, s, s_size * sizeof(symbol)) != 0) return 0;
+    z->c -= s_size; return 1;
+}
+
+extern int eq_v(struct SN_env * z, symbol * p)
+{   return eq_s(z, SIZE(p), p);
+}
+
+extern int eq_v_b(struct SN_env * z, symbol * p)
+{   return eq_s_b(z, SIZE(p), p);
+}
+
+extern int find_among(struct SN_env * z, struct among * v, int v_size)
+{
+    int i = 0;
+    int j = v_size;
+
+    int c = z->c; int l = z->l;
+    symbol * q = z->p + c;
+
+    struct among * w;
+
+    int common_i = 0;
+    int common_j = 0;
+
+    int first_key_inspected = 0;
+
+    while(1)
+    {   int k = i + ((j - i) >> 1);
+        int diff = 0;
+        int common = common_i < common_j ? common_i : common_j; /* smaller */
+        w = v + k;
+        {   int i; for (i = common; i < w->s_size; i++)
+            {   if (c + common == l) { diff = -1; break; }
+                diff = q[common] - w->s[i];
+                if (diff != 0) break;
+                common++;
+            }
+        }
+        if (diff < 0) { j = k; common_j = common; }
+                 else { i = k; common_i = common; }
+        if (j - i <= 1)
+        {   if (i > 0) break; /* v->s has been inspected */
+            if (j == i) break; /* only one item in v */
+
+            /* - but now we need to go round once more to get
+               v->s inspected. This looks messy, but is actually
+               the optimal approach.  */
+
+            if (first_key_inspected) break;
+            first_key_inspected = 1;
+        }
+    }
+    while(1)
+    {   w = v + i;
+        if (common_i >= w->s_size)
+        {   z->c = c + w->s_size;
+            if (w->function == 0) return w->result;
+            {   int res = w->function(z);
+                z->c = c + w->s_size;
+                if (res) return w->result;
+            }
+        }
+        i = w->substring_i;
+        if (i < 0) return 0;
+    }
+}
+
+/* find_among_b is for backwards processing. Same comments apply */
+
+extern int find_among_b(struct SN_env * z, struct among * v, int v_size)
+{
+    int i = 0;
+    int j = v_size;
+
+    int c = z->c; int lb = z->lb;
+    symbol * q = z->p + c - 1;
+
+    struct among * w;
+
+    int common_i = 0;
+    int common_j = 0;
+
+    int first_key_inspected = 0;
+
+    while(1)
+    {   int k = i + ((j - i) >> 1);
+        int diff = 0;
+        int common = common_i < common_j ? common_i : common_j;
+        w = v + k;
+        {   int i; for (i = w->s_size - 1 - common; i >= 0; i--)
+            {   if (c - common == lb) { diff = -1; break; }
+                diff = q[- common] - w->s[i];
+                if (diff != 0) break;
+                common++;
+            }
+        }
+        if (diff < 0) { j = k; common_j = common; }
+                 else { i = k; common_i = common; }
+        if (j - i <= 1)
+        {   if (i > 0) break;
+            if (j == i) break;
+            if (first_key_inspected) break;
+            first_key_inspected = 1;
+        }
+    }
+    while(1)
+    {   w = v + i;
+        if (common_i >= w->s_size)
+        {   z->c = c - w->s_size;
+            if (w->function == 0) return w->result;
+            {   int res = w->function(z);
+                z->c = c - w->s_size;
+                if (res) return w->result;
+            }
+        }
+        i = w->substring_i;
+        if (i < 0) return 0;
+    }
+}
+
+
+extern symbol * increase_size(symbol * p, int n)
+{   int new_size = n + 20;
+    symbol * q = (symbol *) (HEAD + (char *) malloc(HEAD + (new_size + 1) * sizeof(symbol)));
+    CAPACITY(q) = new_size;
+    memmove(q, p, CAPACITY(p) * sizeof(symbol)); lose_s(p); return q;
+}
+
+/* to replace symbols between c_bra and c_ket in z->p by the
+   s_size symbols at s
+*/
+
+extern int replace_s(struct SN_env * z, int c_bra, int c_ket, int s_size, const symbol * s)
+{   int adjustment = s_size - (c_ket - c_bra);
+    int len = SIZE(z->p);
+    if (adjustment != 0)
+    {   if (adjustment + len > CAPACITY(z->p)) z->p = increase_size(z->p, adjustment + len);
+        memmove(z->p + c_ket + adjustment, z->p + c_ket, (len - c_ket) * sizeof(symbol));
+        SET_SIZE(z->p, adjustment + len);
+        z->l += adjustment;
+        if (z->c >= c_ket) z->c += adjustment; else
+            if (z->c > c_bra) z->c = c_bra;
+    }
+    unless (s_size == 0) memmove(z->p + c_bra, s, s_size * sizeof(symbol));
+    return adjustment;
+}
+
+static void slice_check(struct SN_env * z)
+{
+    if (!(0 <= z->bra &&
+          z->bra <= z->ket &&
+          z->ket <= z->l &&
+          z->l <= SIZE(z->p)))   /* this line could be removed */
+    {
+        fprintf(stderr, "faulty slice operation:\n");
+        debug(z, -1, 0);
+        exit(1);
+    }
+}
+
+extern void slice_from_s(struct SN_env * z, int s_size, symbol * s)
+{   slice_check(z);
+    replace_s(z, z->bra, z->ket, s_size, s);
+}
+
+extern void slice_from_v(struct SN_env * z, symbol * p)
+{   slice_from_s(z, SIZE(p), p);
+}
+
+extern void slice_del(struct SN_env * z)
+{   slice_from_s(z, 0, 0);
+}
+
+extern void insert_s(struct SN_env * z, int bra, int ket, int s_size, symbol * s)
+{   int adjustment = replace_s(z, bra, ket, s_size, s);
+    if (bra <= z->bra) z->bra += adjustment;
+    if (bra <= z->ket) z->ket += adjustment;
+}
+
+extern void insert_v(struct SN_env * z, int bra, int ket, symbol * p)
+{   int adjustment = replace_s(z, bra, ket, SIZE(p), p);
+    if (bra <= z->bra) z->bra += adjustment;
+    if (bra <= z->ket) z->ket += adjustment;
+}
+
+extern symbol * slice_to(struct SN_env * z, symbol * p)
+{   slice_check(z);
+    {   int len = z->ket - z->bra;
+        if (CAPACITY(p) < len) p = increase_size(p, len);
+        memmove(p, z->p + z->bra, len * sizeof(symbol));
+        SET_SIZE(p, len);
+    }
+    return p;
+}
+
+extern symbol * assign_to(struct SN_env * z, symbol * p)
+{   int len = z->l;
+    if (CAPACITY(p) < len) p = increase_size(p, len);
+    memmove(p, z->p, len * sizeof(symbol));
+    SET_SIZE(p, len);
+    return p;
+}
+
+extern void debug(struct SN_env * z, int number, int line_count)
+{   int i;
+    int limit = SIZE(z->p);
+    /*if (number >= 0) printf("%3d (line %4d): '", number, line_count);*/
+    if (number >= 0) printf("%3d (line %4d): [%d]'", number, line_count,limit);
+    for (i = 0; i <= limit; i++)
+    {   if (z->lb == i) printf("{");
+        if (z->bra == i) printf("[");
+        if (z->c == i) printf("|");
+        if (z->ket == i) printf("]");
+        if (z->l == i) printf("}");
+        if (i < limit)
+        {   int ch = z->p[i];
+            if (ch == 0) ch = '#';
+            printf("%c", ch);
+        }
+    }
+    printf("'\n");
+}


diff --git a/contrib/tsearch2/sql/tsearch2.sql b/contrib/tsearch2/sql/tsearch2.sql

new file mode 100644 (file)

index 0000000..6ca6480


--- /dev/null
+++ b/contrib/tsearch2/sql/tsearch2.sql
@@ -0,0 +1,243 @@
+--
+-- first, define the datatype.  Turn off echoing so that expected file
+-- does not depend on contents of seg.sql.
+--
+\set ECHO none
+\i tsearch2.sql
+\set ECHO all
+
+--tsvector
+SELECT '1'::tsvector;
+SELECT '1 '::tsvector;
+SELECT ' 1'::tsvector;
+SELECT ' 1 '::tsvector;
+SELECT '1 2'::tsvector;
+SELECT '\'1 2\''::tsvector;
+SELECT '\'1 \\\'2\''::tsvector;
+SELECT '\'1 \\\'2\'3'::tsvector;
+SELECT '\'1 \\\'2\' 3'::tsvector;
+SELECT '\'1 \\\'2\' \' 3\' 4 '::tsvector;
+select '\'w\':4A,3B,2C,1D,5 a:8';
+select 'a:3A b:2a'::tsvector || 'ba:1234 a:1B';
+select setweight('w:12B w:13* w:12,5,6 a:1,3* a:3 w asd:1dc asd zxc:81,567,222A'::tsvector, 'c');
+select strip('w:12B w:13* w:12,5,6 a:1,3* a:3 w asd:1dc asd'::tsvector);
+
+
+--tsquery
+SELECT '1'::tsquery;
+SELECT '1 '::tsquery;
+SELECT ' 1'::tsquery;
+SELECT ' 1 '::tsquery;
+SELECT '\'1 2\''::tsquery;
+SELECT '\'1 \\\'2\''::tsquery;
+SELECT '!1'::tsquery;
+SELECT '1|2'::tsquery;
+SELECT '1|!2'::tsquery;
+SELECT '!1|2'::tsquery;
+SELECT '!1|!2'::tsquery;
+SELECT '!(!1|!2)'::tsquery;
+SELECT '!(!1|2)'::tsquery;
+SELECT '!(1|!2)'::tsquery;
+SELECT '!(1|2)'::tsquery;
+SELECT '1&2'::tsquery;
+SELECT '!1&2'::tsquery;
+SELECT '1&!2'::tsquery;
+SELECT '!1&!2'::tsquery;
+SELECT '(1&2)'::tsquery;
+SELECT '1&(2)'::tsquery;
+SELECT '!(1)&2'::tsquery;
+SELECT '!(1&2)'::tsquery;
+SELECT '1|2&3'::tsquery;
+SELECT '1|(2&3)'::tsquery;
+SELECT '(1|2)&3'::tsquery;
+SELECT '1|2&!3'::tsquery;
+SELECT '1|!2&3'::tsquery;
+SELECT '!1|2&3'::tsquery;
+SELECT '!1|(2&3)'::tsquery;
+SELECT '!(1|2)&3'::tsquery;
+SELECT '(!1|2)&3'::tsquery;
+SELECT '1|(2|(4|(5|6)))'::tsquery;
+SELECT '1|2|4|5|6'::tsquery;
+SELECT '1&(2&(4&(5&6)))'::tsquery;
+SELECT '1&2&4&5&6'::tsquery;
+SELECT '1&(2&(4&(5|6)))'::tsquery;
+SELECT '1&(2&(4&(5|!6)))'::tsquery;
+SELECT '1&(\'2\'&(\' 4\'&(\\|5 | \'6 \\\' !|&\')))'::tsquery;
+SELECT '\'the wether\':dc & \' sKies \':BC & a:d b:a';
+
+select lexize('simple', 'ASD56 hsdkf');
+select lexize('en_stem', 'SKIES Problems identity');
+
+select * from token_type('default');
+select * from parse('default', '345 [email protected] \' http://www.com/ http://aew.werc.ewr/?ad=qwe&dw 1aew.werc.ewr/?ad=qwe&dw 2aew.werc.ewr http://3aew.werc.ewr/?ad=qwe&dw http://4aew.werc.ewr http://5aew.werc.ewr:8100/?  ad=qwe&dw 6aew.werc.ewr:8100/?ad=qwe&dw 7aew.werc.ewr:8100/?ad=qwe&dw=%20%32 +4.0e-10 qwe qwe qwqwe 234.435 455 5.005 [email protected] qwe-wer asdf qwer jf sdjk ewr1> ewri2 ">
+/usr/local/fff /awdf/dwqe/4325 rewt/ewr wefjn /wqe-324/ewr gist.h gist.h.c gist.c. readline 4.2 4.2. 4.2, readline-4.2 readline-4.2. 234 
+ wow  < jqw <> qwerty');
+
+SELECT to_tsvector('default', '345 [email protected] \' http://www.com/ http://aew.werc.ewr/?ad=qwe&dw 1aew.werc.ewr/?ad=qwe&dw 2aew.werc.ewr http://3aew.werc.ewr/?ad=qwe&dw http://4aew.werc.ewr http://5aew.werc.ewr:8100/?  ad=qwe&dw 6aew.werc.ewr:8100/?ad=qwe&dw 7aew.werc.ewr:8100/?ad=qwe&dw=%20%32 +4.0e-10 qwe qwe qwqwe 234.435 455 5.005 [email protected] qwe-wer asdf qwer jf sdjk ewr1> ewri2 ">
+/usr/local/fff /awdf/dwqe/4325 rewt/ewr wefjn /wqe-324/ewr gist.h gist.h.c gist.c. readline 4.2 4.2. 4.2, readline-4.2 readline-4.2. 234 
+ wow  < jqw <> qwerty');
+
+SELECT length(to_tsvector('default', '345 qw'));
+
+SELECT length(to_tsvector('default', '345 [email protected] \' http://www.com/ http://aew.werc.ewr/?ad=qwe&dw 1aew.werc.ewr/?ad=qwe&dw 2aew.werc.ewr http://3aew.werc.ewr/?ad=qwe&dw http://4aew.werc.ewr http://5aew.werc.ewr:8100/?  ad=qwe&dw 6aew.werc.ewr:8100/?ad=qwe&dw 7aew.werc.ewr:8100/?ad=qwe&dw=%20%32 +4.0e-10 qwe qwe qwqwe 234.435 455 5.005 [email protected] qwe-wer asdf qwer jf sdjk ewr1> ewri2 ">
+/usr/local/fff /awdf/dwqe/4325 rewt/ewr wefjn /wqe-324/ewr gist.h gist.h.c gist.c. readline 4.2 4.2. 4.2, readline-4.2 readline-4.2. 234 
+ wow  < jqw <> qwerty'));
+
+
+select to_tsquery('default', 'qwe & sKies '); 
+select to_tsquery('simple', 'qwe & sKies '); 
+select to_tsquery('default', '\'the wether\':dc & \'           sKies \':BC ');
+select 'a b:89  ca:23A,64b d:34c'::tsvector @@ 'd:AC & ca';
+select 'a b:89  ca:23A,64b d:34c'::tsvector @@ 'd:AC & ca:B';
+select 'a b:89  ca:23A,64b d:34c'::tsvector @@ 'd:AC & ca:A';
+select 'a b:89  ca:23A,64b d:34c'::tsvector @@ 'd:AC & ca:C';
+select 'a b:89  ca:23A,64b d:34c'::tsvector @@ 'd:AC & ca:CB';
+
+CREATE TABLE test_tsvector( t text, a tsvector );
+
+\copy test_tsvector from 'data/test_tsearch.data'
+
+SELECT count(*) FROM test_tsvector WHERE a @@ 'wr|qh';
+SELECT count(*) FROM test_tsvector WHERE a @@ 'wr&qh';
+SELECT count(*) FROM test_tsvector WHERE a @@ 'eq&yt';
+SELECT count(*) FROM test_tsvector WHERE a @@ 'eq|yt';
+SELECT count(*) FROM test_tsvector WHERE a @@ '(eq&yt)|(wr&qh)';
+SELECT count(*) FROM test_tsvector WHERE a @@ '(eq|yt)&(wr|qh)';
+
+create index wowidx on test_tsvector using gist (a);
+set enable_seqscan=off;
+
+SELECT count(*) FROM test_tsvector WHERE a @@ 'wr|qh';
+SELECT count(*) FROM test_tsvector WHERE a @@ 'wr&qh';
+SELECT count(*) FROM test_tsvector WHERE a @@ 'eq&yt';
+SELECT count(*) FROM test_tsvector WHERE a @@ 'eq|yt';
+SELECT count(*) FROM test_tsvector WHERE a @@ '(eq&yt)|(wr&qh)';
+SELECT count(*) FROM test_tsvector WHERE a @@ '(eq|yt)&(wr|qh)';
+
+select set_curcfg('default');
+
+CREATE TRIGGER tsvectorupdate
+BEFORE UPDATE OR INSERT ON test_tsvector
+FOR EACH ROW EXECUTE PROCEDURE tsearch2(a, t);
+
+SELECT count(*) FROM test_tsvector WHERE a @@ to_tsquery('345&qwerty');
+
+INSERT INTO test_tsvector (t) VALUES ('345 qwerty');
+
+SELECT count(*) FROM test_tsvector WHERE a @@ to_tsquery('345&qwerty');
+
+UPDATE test_tsvector SET t = null WHERE t = '345 qwerty';
+
+SELECT count(*) FROM test_tsvector WHERE a @@ to_tsquery('345&qwerty');
+
+drop trigger tsvectorupdate on test_tsvector;
+create function wow(text) returns text as 'select $1 || \' copyright\'; ' language sql;
+create trigger tsvectorupdate before update or insert on test_tsvector
+for each row execute procedure tsearch2(a, wow, t);
+insert into test_tsvector (t) values ('345 qwerty');
+select count(*) FROM test_tsvector WHERE a @@ to_tsquery('345&qwerty');
+select count(*) FROM test_tsvector WHERE a @@ to_tsquery('copyright');
+
+select rank(' a:1 s:2C d g'::tsvector, 'a | s');
+select rank(' a:1 s:2B d g'::tsvector, 'a | s');
+select rank(' a:1 s:2 d g'::tsvector, 'a | s');
+select rank(' a:1 s:2C d g'::tsvector, 'a & s');
+select rank(' a:1 s:2B d g'::tsvector, 'a & s');
+select rank(' a:1 s:2 d g'::tsvector, 'a & s');
+
+insert into test_tsvector (t) values ('foo bar foo the over foo qq bar');
+select * from stat('select a from test_tsvector') order by ndoc desc, nentry desc, word;
+
+select reset_tsearch();
+select to_tsquery('default', 'skies & books');
+
+select rank_cd(to_tsvector('Erosion It took the sea a thousand years,
+A thousand years to trace
+The granite features of this cliff
+In crag and scarp and base.
+It took the sea an hour one night
+An hour of storm to place
+The sculpture of these granite seams,
+Upon a woman s face. E.  J.  Pratt  (1882 1964)
+'), to_tsquery('sea&thousand&years'));
+
+select rank_cd(to_tsvector('Erosion It took the sea a thousand years,
+A thousand years to trace
+The granite features of this cliff
+In crag and scarp and base.
+It took the sea an hour one night
+An hour of storm to place
+The sculpture of these granite seams,
+Upon a woman s face. E.  J.  Pratt  (1882 1964)
+'), to_tsquery('granite&sea'));
+
+select rank_cd(to_tsvector('Erosion It took the sea a thousand years,
+A thousand years to trace
+The granite features of this cliff
+In crag and scarp and base.
+It took the sea an hour one night
+An hour of storm to place
+The sculpture of these granite seams,
+Upon a woman s face. E.  J.  Pratt  (1882 1964)
+'), to_tsquery('sea'));
+
+select get_covers(to_tsvector('Erosion It took the sea a thousand years,
+A thousand years to trace
+The granite features of this cliff
+In crag and scarp and base.
+It took the sea an hour one night
+An hour of storm to place
+The sculpture of these granite seams,
+Upon a woman s face. E.  J.  Pratt  (1882 1964)
+'), to_tsquery('sea&thousand&years'));
+
+select get_covers(to_tsvector('Erosion It took the sea a thousand years,
+A thousand years to trace
+The granite features of this cliff
+In crag and scarp and base.
+It took the sea an hour one night
+An hour of storm to place
+The sculpture of these granite seams,
+Upon a woman s face. E.  J.  Pratt  (1882 1964)
+'), to_tsquery('granite&sea'));
+
+select get_covers(to_tsvector('Erosion It took the sea a thousand years,
+A thousand years to trace
+The granite features of this cliff
+In crag and scarp and base.
+It took the sea an hour one night
+An hour of storm to place
+The sculpture of these granite seams,
+Upon a woman s face. E.  J.  Pratt  (1882 1964)
+'), to_tsquery('sea'));
+
+select headline('Erosion It took the sea a thousand years,
+A thousand years to trace
+The granite features of this cliff
+In crag and scarp and base.
+It took the sea an hour one night
+An hour of storm to place
+The sculpture of these granite seams,
+Upon a woman s face. E.  J.  Pratt  (1882 1964)
+', to_tsquery('sea&thousand&years'));
+ 
+select headline('Erosion It took the sea a thousand years,
+A thousand years to trace
+The granite features of this cliff
+In crag and scarp and base.
+It took the sea an hour one night
+An hour of storm to place
+The sculpture of these granite seams,
+Upon a woman s face. E.  J.  Pratt  (1882 1964)
+', to_tsquery('granite&sea'));
+ 
+select headline('Erosion It took the sea a thousand years,
+A thousand years to trace
+The granite features of this cliff
+In crag and scarp and base.
+It took the sea an hour one night
+An hour of storm to place
+The sculpture of these granite seams,
+Upon a woman s face. E.  J.  Pratt  (1882 1964)
+', to_tsquery('sea'));
+


diff --git a/contrib/tsearch2/stopword.c b/contrib/tsearch2/stopword.c

new file mode 100644 (file)

index 0000000..7f7806f


--- /dev/null
+++ b/contrib/tsearch2/stopword.c
@@ -0,0 +1,101 @@
+/* 
+ * stopword library
+ * Teodor Sigaev 
+ */
+#include 
+#include 
+#include 
+#include 
+
+#include "postgres.h"
+#include "common.h"
+#include "dict.h"
+
+#define STOPBUFLEN 4096
+
+char*
+lowerstr(char *str) {
+   char *ptr=str;
+   while(*ptr) {
+       *ptr = tolower(*(unsigned char*)ptr);
+       ptr++;
+   }
+   return str;
+}
+
+void
+freestoplist(StopList *s) {
+   char **ptr=s->stop;
+   if ( ptr )
+       while( *ptr && s->len >0 ) {
+           free(*ptr);
+           ptr++; s->len--;
+       free(s->stop);
+   }
+   memset(s,0,sizeof(StopList));
+}
+
+void
+readstoplist(text *in, StopList *s) {
+   char **stop=NULL;
+   s->len=0;
+   if ( in && VARSIZE(in) - VARHDRSZ > 0 ) {
+       char *filename=text2char(in);
+       FILE    *hin=NULL;
+       char    buf[STOPBUFLEN];
+       int reallen=0;
+
+       if ( (hin=fopen(filename,"r")) == NULL )
+           elog(ERROR,"Can't open file '%s': %s", filename, strerror(errno));
+       while( fgets(buf,STOPBUFLEN,hin) ) {
+           buf[strlen(buf)-1] = '\0';
+           if ( *buf=='\0' ) continue;
+
+           if ( s->len>= reallen ) {
+               char **tmp;
+               reallen=(reallen) ? reallen*2 : 16;
+               tmp=(char**)realloc((void*)stop, sizeof(char*)*reallen);
+               if (!tmp) {
+                   freestoplist(s);
+                   fclose(hin); 
+                   elog(ERROR,"Not enough memory");
+               }
+               stop=tmp;
+           }
+    
+           stop[s->len]=strdup(buf);
+           if ( !stop[s->len] ) {
+               freestoplist(s);
+               fclose(hin); 
+               elog(ERROR,"Not enough memory");
+           }
+           if ( s->wordop ) 
+               stop[s->len]=(s->wordop)(stop[s->len]);
+
+           (s->len)++; 
+       }
+       fclose(hin);
+       pfree(filename); 
+   }
+   s->stop=stop;
+} 
+
+static int
+comparestr(const void *a, const void *b) {
+   return strcmp( *(char**)a, *(char**)b );
+}
+
+void
+sortstoplist(StopList *s) {
+   if (s->stop && s->len>0)
+       qsort(s->stop, s->len, sizeof(char*), comparestr);
+}
+
+bool
+searchstoplist(StopList *s, char *key) {
+   if ( s->wordop ) 
+       key=(*(s->wordop))(key);
+   return ( s->stop && s->len>0 && bsearch(&key, s->stop, s->len, sizeof(char*), comparestr) ) ? true : false;
+}
+
+


diff --git a/contrib/tsearch2/stopword/english.stop b/contrib/tsearch2/stopword/english.stop

new file mode 100644 (file)

index 0000000..a913011


--- /dev/null
+++ b/contrib/tsearch2/stopword/english.stop
@@ -0,0 +1,128 @@
+i
+me
+my
+myself
+we
+our
+ours
+ourselves
+you
+your
+yours
+yourself
+yourselves
+he
+him
+his
+himself
+she
+her
+hers
+herself
+it
+its
+itself
+they
+them
+their
+theirs
+themselves
+what
+which
+who
+whom
+this
+that
+these
+those
+am
+is
+are
+was
+were
+be
+been
+being
+have
+has
+had
+having
+do
+does
+did
+doing
+a
+an
+the
+and
+but
+if
+or
+because
+as
+until
+while
+of
+at
+by
+for
+with
+about
+against
+between
+into
+through
+during
+before
+after
+above
+below
+to
+from
+up
+down
+in
+out
+on
+off
+over
+under
+again
+further
+then
+once
+here
+there
+when
+where
+why
+how
+all
+any
+both
+each
+few
+more
+most
+other
+some
+such
+no
+nor
+not
+only
+own
+same
+so
+than
+too
+very
+s
+t
+can
+will
+just
+don
+should
+now
+


diff --git a/contrib/tsearch2/stopword/russian.stop b/contrib/tsearch2/stopword/russian.stop

new file mode 100644 (file)

index 0000000..1877e3a


--- /dev/null
+++ b/contrib/tsearch2/stopword/russian.stop
@@ -0,0 +1,151 @@
+É
+×
+×Ï
+ÎÅ
+ÞÔÏ
+ÏÎ
+ÎÁ
+Ñ
+Ó
+ÓÏ
+ËÁË
+Á
+ÔÏ
+×ÓÅ
+ÏÎÁ
+ÔÁË
+ÅÇÏ
+ÎÏ
+ÄÁ
+ÔÙ
+Ë
+Õ
+ÖÅ
+×Ù
+ÚÁ
+ÂÙ
+ÐÏ
+ÔÏÌØËÏ
+ÅÅ
+ÍÎÅ
+ÂÙÌÏ
+×ÏÔ
+ÏÔ
+ÍÅÎÑ
+ÅÝÅ
+ÎÅÔ
+Ï
+ÉÚ
+ÅÍÕ
+ÔÅÐÅÒØ
+ËÏÇÄÁ
+ÄÁÖÅ
+ÎÕ
+×ÄÒÕÇ
+ÌÉ
+ÅÓÌÉ
+ÕÖÅ
+ÉÌÉ
+ÎÉ
+ÂÙÔØ
+ÂÙÌ
+ÎÅÇÏ
+ÄÏ
+×ÁÓ
+ÎÉÂÕÄØ
+ÏÐÑÔØ
+ÕÖ
+×ÁÍ
+×ÅÄØ
+ÔÁÍ
+ÐÏÔÏÍ
+ÓÅÂÑ
+ÎÉÞÅÇÏ
+ÅÊ
+ÍÏÖÅÔ
+ÏÎÉ
+ÔÕÔ
+ÇÄÅ
+ÅÓÔØ
+ÎÁÄÏ
+ÎÅÊ
+ÄÌÑ
+ÍÙ
+ÔÅÂÑ
+ÉÈ
+ÞÅÍ
+ÂÙÌÁ
+ÓÁÍ
+ÞÔÏÂ
+ÂÅÚ
+ÂÕÄÔÏ
+ÞÅÇÏ
+ÒÁÚ
+ÔÏÖÅ
+ÓÅÂÅ
+ÐÏÄ
+ÂÕÄÅÔ
+Ö
+ÔÏÇÄÁ
+ËÔÏ
+ÜÔÏÔ
+ÔÏÇÏ
+ÐÏÔÏÍÕ
+ÜÔÏÇÏ
+ËÁËÏÊ
+ÓÏ×ÓÅÍ
+ÎÉÍ
+ÚÄÅÓØ
+ÜÔÏÍ
+ÏÄÉÎ
+ÐÏÞÔÉ
+ÍÏÊ
+ÔÅÍ
+ÞÔÏÂÙ
+ÎÅÅ
+ÓÅÊÞÁÓ
+ÂÙÌÉ
+ËÕÄÁ
+ÚÁÞÅÍ
+×ÓÅÈ
+ÎÉËÏÇÄÁ
+ÍÏÖÎÏ
+ÐÒÉ
+ÎÁËÏÎÅÃ
+Ä×Á
+ÏÂ
+ÄÒÕÇÏÊ
+ÈÏÔØ
+ÐÏÓÌÅ
+ÎÁÄ
+ÂÏÌØÛÅ
+ÔÏÔ
+ÞÅÒÅÚ
+ÜÔÉ
+ÎÁÓ
+ÐÒÏ
+×ÓÅÇÏ
+ÎÉÈ
+ËÁËÁÑ
+ÍÎÏÇÏ
+ÒÁÚ×Å
+ÔÒÉ
+ÜÔÕ
+ÍÏÑ
+×ÐÒÏÞÅÍ
+ÈÏÒÏÛÏ
+Ó×ÏÀ
+ÜÔÏÊ
+ÐÅÒÅÄ
+ÉÎÏÇÄÁ
+ÌÕÞÛÅ
+ÞÕÔØ
+ÔÏÍ
+ÎÅÌØÚÑ
+ÔÁËÏÊ
+ÉÍ
+ÂÏÌÅÅ
+×ÓÅÇÄÁ
+ËÏÎÅÞÎÏ
+×ÓÀ
+ÍÅÖÄÕ


diff --git a/contrib/tsearch2/ts_cfg.c b/contrib/tsearch2/ts_cfg.c

new file mode 100644 (file)

index 0000000..7c9f20c


--- /dev/null
+++ b/contrib/tsearch2/ts_cfg.c
@@ -0,0 +1,509 @@
+/* 
+ * interface functions to tscfg 
+ * Teodor Sigaev 
+ */
+#include 
+#include 
+#include 
+#include 
+#include 
+
+#include "postgres.h"
+#include "fmgr.h"
+#include "utils/array.h"
+#include "catalog/pg_type.h"
+#include "executor/spi.h"
+
+#include "ts_cfg.h"
+#include "dict.h"
+#include "wparser.h"
+#include "snmap.h"
+#include "common.h"
+#include "tsvector.h"
+
+/*********top interface**********/
+
+static void *plan_getcfg_bylocale=NULL;
+static void *plan_getcfg=NULL;
+static void *plan_getmap=NULL;
+static void *plan_name2id=NULL;
+static Oid current_cfg_id=0;
+
+void
+init_cfg(Oid id, TSCfgInfo *cfg) {
+   Oid arg[2]={ OIDOID, OIDOID };
+   bool isnull;
+   Datum pars[2]={ ObjectIdGetDatum(id), ObjectIdGetDatum(id) } ;
+   int stat,i,j;
+   text *ptr;
+   text *prsname=NULL;
+   MemoryContext   oldcontext;
+
+   memset(cfg,0,sizeof(TSCfgInfo));
+   SPI_connect();
+   if ( !plan_getcfg ) {
+       plan_getcfg = SPI_saveplan( SPI_prepare( "select prs_name from pg_ts_cfg where oid = $1" , 1, arg ) );
+       if ( !plan_getcfg ) 
+           ts_error(ERROR, "SPI_prepare() failed");
+   }
+
+   stat = SPI_execp(plan_getcfg, pars, " ", 1);
+   if ( stat < 0 )
+       ts_error (ERROR, "SPI_execp return %d", stat);
+   if ( SPI_processed > 0 ) {
+       prsname = (text*) DatumGetPointer( 
+           SPI_getbinval(SPI_tuptable->vals[0], SPI_tuptable->tupdesc, 1, &isnull) 
+       );
+       oldcontext = MemoryContextSwitchTo(TopMemoryContext);
+       prsname = ptextdup( prsname );
+       MemoryContextSwitchTo(oldcontext);
+       
+       cfg->id=id;
+   } else 
+       ts_error(ERROR, "No tsearch cfg with id %d", id);
+
+   arg[0]=TEXTOID;
+   if ( !plan_getmap ) {
+       plan_getmap = SPI_saveplan( SPI_prepare( "select lt.tokid, pg_ts_cfgmap.dict_name from pg_ts_cfgmap, pg_ts_cfg, token_type( $1 ) as lt where lt.alias = pg_ts_cfgmap.tok_alias and pg_ts_cfgmap.ts_name = pg_ts_cfg.ts_name and pg_ts_cfg.oid= $2 order by lt.tokid desc;" , 2, arg ) );
+       if ( !plan_getmap )
+           ts_error(ERROR, "SPI_prepare() failed");
+   }
+
+   pars[0]=PointerGetDatum( prsname );
+   stat = SPI_execp(plan_getmap, pars, " ", 0);
+   if ( stat < 0 )
+       ts_error (ERROR, "SPI_execp return %d", stat);
+   if ( SPI_processed <= 0 )
+       ts_error(ERROR, "No parser with id %d", id);
+
+   for(i=0;i
+       int lexid = DatumGetInt32(SPI_getbinval(SPI_tuptable->vals[i], SPI_tuptable->tupdesc, 1, &isnull));
+       ArrayType *toasted_a = (ArrayType*)PointerGetDatum(SPI_getbinval(SPI_tuptable->vals[i], SPI_tuptable->tupdesc, 2, &isnull));
+       ArrayType *a;
+
+       if ( !cfg->map ) {
+           cfg->len=lexid+1;
+           cfg->map = (ListDictionary*)malloc( sizeof(ListDictionary)*cfg->len );
+           if ( !cfg->map )
+               ts_error(ERROR,"No memory");
+           memset( cfg->map, 0, sizeof(ListDictionary)*cfg->len );
+       }
+
+       if (isnull)
+           continue;
+
+       a=(ArrayType*)PointerGetDatum( PG_DETOAST_DATUM( DatumGetPointer(toasted_a) ) );
+       
+       if ( ARR_NDIM(a) != 1 )
+           ts_error(ERROR,"Wrong dimension");
+       if ( ARRNELEMS(a) < 1 )
+           continue;
+
+       cfg->map[lexid].len=ARRNELEMS(a);
+       cfg->map[lexid].dict_id=(Datum*)malloc( sizeof(Datum)*cfg->map[lexid].len );
+       memset(cfg->map[lexid].dict_id,0,sizeof(Datum)*cfg->map[lexid].len );
+       ptr=(text*)ARR_DATA_PTR(a);
+       oldcontext = MemoryContextSwitchTo(TopMemoryContext);
+       for(j=0;jmap[lexid].len;j++) {
+           cfg->map[lexid].dict_id[j] = PointerGetDatum(ptextdup(ptr));
+           ptr=NEXTVAL(ptr);
+       } 
+       MemoryContextSwitchTo(oldcontext);
+
+       if ( a != toasted_a ) 
+           pfree(a);
+   }
+   
+   SPI_finish();
+   cfg->prs_id = name2id_prs( prsname );
+   pfree(prsname);
+   for(i=0;ilen;i++) {
+       for(j=0;jmap[i].len;j++) {
+           ptr = (text*)DatumGetPointer( cfg->map[i].dict_id[j] );
+           cfg->map[i].dict_id[j] = ObjectIdGetDatum( name2id_dict(ptr) );
+           pfree(ptr);
+       }
+   }
+}
+
+typedef struct {
+   TSCfgInfo   *last_cfg;
+   int     len;
+   int     reallen;
+   TSCfgInfo   *list;
+   SNMap       name2id_map;
+} CFGList;
+
+static CFGList CList = {NULL,0,0,NULL,{0,0,NULL}};
+
+void
+reset_cfg(void) {
+        freeSNMap( &(CList.name2id_map) );
+        if ( CList.list ) {
+       int i,j;
+       for(i=0;i
+           if ( CList.list[i].map ) {
+               for(j=0;j
+                   if ( CList.list[i].map[j].dict_id )
+                       free(CList.list[i].map[j].dict_id);
+               free( CList.list[i].map );
+           }
+                free(CList.list);
+   }
+        memset(&CList,0,sizeof(CFGList));
+}
+
+static int
+comparecfg(const void *a, const void *b) {
+   return ((TSCfgInfo*)a)->id - ((TSCfgInfo*)b)->id;
+}
+
+TSCfgInfo *
+findcfg(Oid id) {
+   /* last used cfg */
+   if ( CList.last_cfg && CList.last_cfg->id==id )
+       return CList.last_cfg;
+
+   /* already used cfg */
+   if ( CList.len != 0 ) {
+       TSCfgInfo key;
+       key.id=id;
+       CList.last_cfg = bsearch(&key, CList.list, CList.len, sizeof(TSCfgInfo), comparecfg);
+       if ( CList.last_cfg != NULL )
+           return CList.last_cfg;
+   }
+
+   /* last chance */
+   if ( CList.len==CList.reallen ) {
+       TSCfgInfo *tmp;
+       int reallen = ( CList.reallen ) ? 2*CList.reallen : 16;
+       tmp=(TSCfgInfo*)realloc(CList.list,sizeof(TSCfgInfo)*reallen);
+       if ( !tmp ) 
+           ts_error(ERROR,"No memory");
+       CList.reallen=reallen;
+       CList.list=tmp;
+   }
+   CList.last_cfg=&(CList.list[CList.len]);
+   init_cfg(id, CList.last_cfg);
+   CList.len++;
+   qsort(CList.list, CList.len, sizeof(TSCfgInfo), comparecfg);
+   return findcfg(id); /* qsort changed order!! */;
+}
+
+
+Oid
+name2id_cfg(text *name) {
+   Oid arg[1]={ TEXTOID };
+   bool isnull;
+   Datum pars[1]={ PointerGetDatum(name) };
+   int stat;
+   Oid id=findSNMap_t( &(CList.name2id_map), name );
+   
+   if ( id ) 
+       return id;
+   
+   SPI_connect();
+   if ( !plan_name2id ) {
+       plan_name2id = SPI_saveplan( SPI_prepare( "select oid from pg_ts_cfg where ts_name = $1" , 1, arg ) );
+       if ( !plan_name2id ) 
+           elog(ERROR, "SPI_prepare() failed");
+   }
+
+   stat = SPI_execp(plan_name2id, pars, " ", 1);
+   if ( stat < 0 )
+       elog (ERROR, "SPI_execp return %d", stat);
+   if ( SPI_processed > 0 ) {
+       id=DatumGetObjectId( SPI_getbinval(SPI_tuptable->vals[0], SPI_tuptable->tupdesc, 1, &isnull) );
+       if ( isnull ) 
+           elog(ERROR, "Null id for tsearch config");
+   } else 
+       elog(ERROR, "No tsearch config");
+   SPI_finish();
+   addSNMap_t( &(CList.name2id_map), name, id );
+   return id;
+}
+
+
+void 
+parsetext_v2(TSCfgInfo *cfg, PRSTEXT * prs, char *buf, int4 buflen) {
+   int type, lenlemm, i;
+   char    *lemm=NULL;
+   WParserInfo *prsobj = findprs(cfg->prs_id);
+
+   prsobj->prs=(void*)DatumGetPointer(
+       FunctionCall2(
+           &(prsobj->start_info),
+           PointerGetDatum(buf),
+           Int32GetDatum(buflen)
+       )
+   );
+
+   while( ( type=DatumGetInt32(FunctionCall3(
+           &(prsobj->getlexeme_info),
+           PointerGetDatum(prsobj->prs),
+           PointerGetDatum(&lemm),
+           PointerGetDatum(&lenlemm))) ) != 0 ) {
+
+       if ( lenlemm >= MAXSTRLEN )
+           elog(ERROR, "Word is too long");
+
+
+       if ( type >= cfg->len ) /* skip this type of lexem */
+           continue; 
+
+       for(i=0;imap[type].len;i++) {
+           DictInfo    *dict=finddict( DatumGetObjectId(cfg->map[type].dict_id[i]) );
+           char    **norms, **ptr;
+   
+           norms = ptr = (char**)DatumGetPointer(
+               FunctionCall3(
+                   &(dict->lexize_info),
+                   PointerGetDatum(dict->dictionary),
+                   PointerGetDatum(lemm),
+                   PointerGetDatum(lenlemm)
+               )
+           );
+           if ( !norms ) /* dictionary doesn't know this lexem */
+               continue;
+
+           prs->pos++; /*set pos*/
+
+           while( *ptr ) {
+               if (prs->curwords == prs->lenwords) {
+                   prs->lenwords *= 2;
+                   prs->words = (WORD *) repalloc((void *) prs->words, prs->lenwords * sizeof(WORD));
+               }
+
+               prs->words[prs->curwords].len = strlen(*ptr);
+               prs->words[prs->curwords].word = *ptr;
+               prs->words[prs->curwords].alen = 0;
+               prs->words[prs->curwords].pos.pos = LIMITPOS(prs->pos);
+               ptr++;
+               prs->curwords++;
+           }
+           pfree(norms);
+           break; /* lexem already normalized or is stop word*/
+       }
+   }
+
+   FunctionCall1(
+       &(prsobj->end_info),
+       PointerGetDatum(prsobj->prs)
+   );
+}
+
+static void
+hladdword(HLPRSTEXT * prs, char *buf, int4 buflen, int type) {
+   while (prs->curwords >= prs->lenwords) {
+       prs->lenwords *= 2;
+       prs->words = (HLWORD *) repalloc((void *) prs->words, prs->lenwords * sizeof(HLWORD));
+   }
+   memset( &(prs->words[prs->curwords]), 0, sizeof(HLWORD) ); 
+   prs->words[prs->curwords].type = (uint8)type;
+   prs->words[prs->curwords].len = buflen; 
+   prs->words[prs->curwords].word = palloc(buflen);
+   memcpy(prs->words[prs->curwords].word, buf, buflen);
+   prs->curwords++;    
+}
+
+static void
+hlfinditem(HLPRSTEXT * prs, QUERYTYPE *query, char *buf, int buflen ) {
+   int i;
+   ITEM    *item=GETQUERY(query);
+   HLWORD  *word=&( prs->words[prs->curwords-1] );
+
+   while (prs->curwords + query->size >= prs->lenwords) {
+       prs->lenwords *= 2;
+       prs->words = (HLWORD *) repalloc((void *) prs->words, prs->lenwords * sizeof(HLWORD));
+   }
+
+   for(i=0; isize; i++) { 
+       if ( item->type == VAL && item->length == buflen && strncmp( GETOPERAND(query) + item->distance, buf, buflen )==0 ) {
+           if ( word->item ) {
+               memcpy( &(prs->words[prs->curwords]), word, sizeof(HLWORD) );
+               prs->words[prs->curwords].item=item;
+               prs->words[prs->curwords].repeated=1;
+               prs->curwords++;
+           } else 
+               word->item=item;    
+       }
+       item++;
+   }
+}
+
+void 
+hlparsetext(TSCfgInfo *cfg, HLPRSTEXT * prs, QUERYTYPE *query, char *buf, int4 buflen) {
+   int type, lenlemm, i;
+   char    *lemm=NULL;
+   WParserInfo *prsobj = findprs(cfg->prs_id);
+
+   prsobj->prs=(void*)DatumGetPointer(
+       FunctionCall2(
+           &(prsobj->start_info),
+           PointerGetDatum(buf),
+           Int32GetDatum(buflen)
+       )
+   );
+
+   while( ( type=DatumGetInt32(FunctionCall3(
+           &(prsobj->getlexeme_info),
+           PointerGetDatum(prsobj->prs),
+           PointerGetDatum(&lemm),
+           PointerGetDatum(&lenlemm))) ) != 0 ) {
+
+       if ( lenlemm >= MAXSTRLEN )
+           elog(ERROR, "Word is too long");
+
+       hladdword(prs,lemm,lenlemm,type);
+
+       if ( type >= cfg->len ) 
+           continue; 
+
+       for(i=0;imap[type].len;i++) {
+           DictInfo    *dict=finddict( DatumGetObjectId(cfg->map[type].dict_id[i]) );
+           char    **norms, **ptr;
+   
+           norms = ptr = (char**)DatumGetPointer(
+               FunctionCall3(
+                   &(dict->lexize_info),
+                   PointerGetDatum(dict->dictionary),
+                   PointerGetDatum(lemm),
+                   PointerGetDatum(lenlemm)
+               )
+           );
+           if ( !norms ) /* dictionary doesn't know this lexem */
+               continue;
+
+           while( *ptr ) {
+               hlfinditem(prs,query,*ptr,strlen(*ptr));
+               pfree(*ptr);
+               ptr++;
+           }
+           pfree(norms);
+           break; /* lexem already normalized or is stop word*/
+       }
+   }
+
+   FunctionCall1(
+       &(prsobj->end_info),
+       PointerGetDatum(prsobj->prs)
+   );
+}
+
+text* 
+genhl(HLPRSTEXT * prs) {
+   text *out;
+   int len=128;
+   char *ptr;
+   HLWORD  *wrd=prs->words;
+
+   out = (text*)palloc( len );
+   ptr=((char*)out) + VARHDRSZ;
+
+   while( wrd - prs->words < prs->curwords ) {
+       while (  wrd->len + prs->stopsellen + prs->startsellen + (ptr - ((char*)out)) >= len ) {
+           int dist = ptr - ((char*)out);
+           len*= 2;
+           out = (text *) repalloc(out, len);
+           ptr=((char*)out) + dist;
+       }
+
+       if ( wrd->in && !wrd->skip && !wrd->repeated ) {
+           if ( wrd->replace ) {
+               *ptr=' ';
+               ptr++;
+           } else {
+               if (wrd->selected) {
+                   memcpy(ptr,prs->startsel,prs->startsellen);
+                   ptr+=prs->startsellen;
+               }
+               memcpy(ptr,wrd->word,wrd->len);
+               ptr+=wrd->len;
+               if (wrd->selected) {
+                   memcpy(ptr,prs->stopsel,prs->stopsellen);
+                   ptr+=prs->stopsellen;
+               }
+           }
+       }
+
+       if ( !wrd->repeated )
+           pfree(wrd->word);
+
+       wrd++;
+   }
+
+   VARATT_SIZEP(out)=ptr - ((char*)out);
+   return out; 
+}
+
+int  
+get_currcfg(void) {
+   Oid arg[1]={ TEXTOID };
+   const char *curlocale;
+   Datum pars[1];
+   bool isnull;
+   int stat;
+
+   if ( current_cfg_id > 0 )
+       return current_cfg_id;
+
+   SPI_connect();
+   if ( !plan_getcfg_bylocale ) {
+       plan_getcfg_bylocale=SPI_saveplan( SPI_prepare( "select oid from pg_ts_cfg where locale = $1 ", 1, arg ) );
+       if ( !plan_getcfg_bylocale )
+           elog(ERROR, "SPI_prepare() failed");
+   }
+
+   curlocale = setlocale(LC_CTYPE, NULL);
+   pars[0] = PointerGetDatum( char2text((char*)curlocale) );
+   stat = SPI_execp(plan_getcfg_bylocale, pars, " ", 1);
+
+   if ( stat < 0 )
+       elog (ERROR, "SPI_execp return %d", stat);
+   if ( SPI_processed > 0 )
+       current_cfg_id = DatumGetObjectId( SPI_getbinval(SPI_tuptable->vals[0], SPI_tuptable->tupdesc, 1, &isnull) );
+   else 
+       elog(ERROR,"Can't find tsearch config by locale");
+
+   pfree(DatumGetPointer(pars[0]));
+   SPI_finish();
+   return current_cfg_id;
+}
+
+PG_FUNCTION_INFO_V1(set_curcfg);
+Datum set_curcfg(PG_FUNCTION_ARGS);
+Datum
+set_curcfg(PG_FUNCTION_ARGS) {
+        findcfg(PG_GETARG_OID(0));
+        current_cfg_id=PG_GETARG_OID(0);
+        PG_RETURN_VOID();
+}
+                
+PG_FUNCTION_INFO_V1(set_curcfg_byname);
+Datum set_curcfg_byname(PG_FUNCTION_ARGS);
+Datum
+set_curcfg_byname(PG_FUNCTION_ARGS) {
+        text *name=PG_GETARG_TEXT_P(0);
+   
+        DirectFunctionCall1(
+                set_curcfg,
+                ObjectIdGetDatum( name2id_cfg(name) )
+        );
+        PG_FREE_IF_COPY(name, 0);
+        PG_RETURN_VOID();      
+}       
+
+PG_FUNCTION_INFO_V1(show_curcfg);
+Datum show_curcfg(PG_FUNCTION_ARGS);
+Datum
+show_curcfg(PG_FUNCTION_ARGS) {
+   PG_RETURN_OID( get_currcfg() ); 
+}
+
+PG_FUNCTION_INFO_V1(reset_tsearch);
+Datum reset_tsearch(PG_FUNCTION_ARGS);
+Datum
+reset_tsearch(PG_FUNCTION_ARGS) {
+   ts_error(NOTICE,"TSearch cache cleaned");
+   PG_RETURN_VOID(); 
+}


diff --git a/contrib/tsearch2/ts_cfg.h b/contrib/tsearch2/ts_cfg.h

new file mode 100644 (file)

index 0000000..01006c1


--- /dev/null
+++ b/contrib/tsearch2/ts_cfg.h
@@ -0,0 +1,68 @@
+#ifndef __TS_CFG_H__
+#define __TS_CFG_H__
+#include "postgres.h"
+#include "query.h"
+
+typedef struct {
+   int len;
+   Datum   *dict_id;
+} ListDictionary;
+
+typedef struct {
+   Oid id;
+   Oid prs_id;
+   int len;
+   ListDictionary  *map;   
+}  TSCfgInfo;
+
+Oid name2id_cfg(text *name);
+TSCfgInfo * findcfg(Oid id);
+void init_cfg(Oid id, TSCfgInfo *cfg);
+void reset_cfg(void);
+
+typedef struct {
+        uint16          len;
+   union {
+       uint16      pos;
+       uint16      *apos;
+   } pos;
+        char       *word;
+   uint32  alen;
+}       WORD;
+   
+typedef struct {
+        WORD       *words;
+        int4            lenwords;
+        int4            curwords;
+   int4        pos;
+}       PRSTEXT;
+
+typedef struct {
+        uint16    len;
+   uint8    selected:1,
+         in:1,
+         skip:1,
+         replace:1,
+         repeated:1;
+   uint8   type;
+        char      *word;
+   ITEM      *item;
+}       HLWORD;
+   
+typedef struct {
+        HLWORD       *words;
+        int4            lenwords;
+        int4            curwords;
+        char           *startsel;
+        char            *stopsel;
+        int2            startsellen;
+        int2            stopsellen;
+}       HLPRSTEXT;
+
+void hlparsetext(TSCfgInfo *cfg, HLPRSTEXT * prs, QUERYTYPE *query, char *buf, int4 buflen);
+text* genhl(HLPRSTEXT * prs);
+
+void parsetext_v2(TSCfgInfo *cfg, PRSTEXT * prs, char *buf, int4 buflen);
+int  get_currcfg(void);
+
+#endif


diff --git a/contrib/tsearch2/ts_stat.c b/contrib/tsearch2/ts_stat.c

new file mode 100644 (file)

index 0000000..9099981


--- /dev/null
+++ b/contrib/tsearch2/ts_stat.c
@@ -0,0 +1,412 @@
+/*
+ * stat functions
+ */
+
+#include "tsvector.h"
+#include "ts_stat.h"
+#include "funcapi.h"
+#include "catalog/pg_type.h"
+#include "executor/spi.h"
+#include "common.h"
+
+PG_FUNCTION_INFO_V1(tsstat_in);
+Datum           tsstat_in(PG_FUNCTION_ARGS);
+Datum           
+tsstat_in(PG_FUNCTION_ARGS) {
+   tsstat *stat=palloc(STATHDRSIZE);
+   stat->len=STATHDRSIZE;
+   stat->size=0;
+   PG_RETURN_POINTER(stat);
+}
+
+PG_FUNCTION_INFO_V1(tsstat_out);
+Datum           tsstat_out(PG_FUNCTION_ARGS);
+Datum           
+tsstat_out(PG_FUNCTION_ARGS) {
+   elog(ERROR,"Unimplemented");
+   PG_RETURN_NULL();
+}
+
+static WordEntry**
+SEI_realloc( WordEntry** in, uint32 *len ) {
+   if ( *len==0 || in==NULL ) {
+       *len=8;
+       in=palloc( sizeof(WordEntry*)* (*len) );
+   } else {
+       *len *= 2;
+       in=repalloc( in, sizeof(WordEntry*)* (*len) );
+   }
+   return in;
+}
+
+static int
+compareStatWord(StatEntry *a, WordEntry *b, tsstat *stat, tsvector *txt) {
+   if ( a->len == b->len ) 
+       return strncmp(
+           STATSTRPTR(stat) + a->pos,
+           STRPTR(txt) + b->pos,
+           a->len
+       );
+   return ( a->len > b->len ) ? 1 : -1;
+}
+
+static tsstat*
+formstat(tsstat *stat, tsvector *txt, WordEntry** entry, uint32 len) {
+   tsstat  *newstat;
+   uint32 totallen, nentry;
+   uint32  slen=0;
+   WordEntry   **ptr=entry;
+   char    *curptr;
+   StatEntry   *sptr,*nptr;
+
+   while(ptr-entry
+       slen += (*ptr)->len;
+       ptr++;
+   }
+
+   nentry=stat->size + len;
+   slen+=STATSTRSIZE(stat);
+   totallen=CALCSTATSIZE(nentry,slen);
+   newstat=palloc(totallen);
+   newstat->len=totallen;
+   newstat->size=nentry;
+
+   memcpy(STATSTRPTR(newstat), STATSTRPTR(stat), STATSTRSIZE(stat));
+   curptr=STATSTRPTR(newstat) + STATSTRSIZE(stat);
+
+   ptr=entry;
+   sptr=STATPTR(stat);
+   nptr=STATPTR(newstat);
+
+   if ( len == 1 ) {
+       StatEntry *StopLow = STATPTR(stat);
+       StatEntry *StopHigh = (StatEntry*)STATSTRPTR(stat);
+
+       while (StopLow < StopHigh) {
+           sptr=StopLow + (StopHigh - StopLow) / 2;
+           if ( compareStatWord(sptr,*ptr,stat,txt) < 0 )
+               StopLow = sptr + 1;
+           else
+               StopHigh = sptr; 
+       }
+       nptr =STATPTR(newstat) + (StopLow-STATPTR(stat));
+       memcpy( STATPTR(newstat), STATPTR(stat), sizeof(StatEntry) * (StopLow-STATPTR(stat)) );
+       nptr->nentry=POSDATALEN(txt,*ptr);
+       if ( nptr->nentry==0 )
+               nptr->nentry=1; 
+       nptr->ndoc=1;
+       nptr->len=(*ptr)->len;
+       memcpy(curptr, STRPTR(txt) + (*ptr)->pos, nptr->len);
+       nptr->pos = curptr - STATSTRPTR(newstat);
+       memcpy( nptr+1, StopLow, sizeof(StatEntry) * ( ((StatEntry*)STATSTRPTR(stat))-StopLow ) );
+   } else {
+       while( sptr-STATPTR(stat) < stat->size && ptr-entry
+           if ( compareStatWord(sptr,*ptr,stat,txt) < 0 ) {
+               memcpy(nptr, sptr, sizeof(StatEntry));
+               sptr++;
+           } else {
+               nptr->nentry=POSDATALEN(txt,*ptr);
+               if ( nptr->nentry==0 )
+                   nptr->nentry=1; 
+               nptr->ndoc=1;
+               nptr->len=(*ptr)->len;
+               memcpy(curptr, STRPTR(txt) + (*ptr)->pos, nptr->len);
+               nptr->pos = curptr - STATSTRPTR(newstat);
+               curptr += nptr->len;
+               ptr++;
+           }
+           nptr++;
+       }
+
+       memcpy( nptr, sptr, sizeof(StatEntry)*( stat->size - (sptr-STATPTR(stat)) ) ); 
+       
+       while(ptr-entry
+           nptr->nentry=POSDATALEN(txt,*ptr);
+           if ( nptr->nentry==0 )
+               nptr->nentry=1; 
+           nptr->ndoc=1;
+           nptr->len=(*ptr)->len;
+           memcpy(curptr, STRPTR(txt) + (*ptr)->pos, nptr->len);
+           nptr->pos = curptr - STATSTRPTR(newstat);
+           curptr += nptr->len;
+           ptr++; nptr++;
+       }
+   }
+
+   return newstat;
+} 
+
+PG_FUNCTION_INFO_V1(ts_accum);
+Datum           ts_accum(PG_FUNCTION_ARGS);
+Datum 
+ts_accum(PG_FUNCTION_ARGS) {
+   tsstat *newstat,*stat= (tsstat*)PG_GETARG_POINTER(0);
+   tsvector  *txt = (tsvector *) PG_DETOAST_DATUM(PG_GETARG_DATUM(1));
+   WordEntry   **newentry=NULL;
+   uint32  len=0, cur=0;
+   StatEntry   *sptr;
+   WordEntry   *wptr;
+
+   if ( stat==NULL || PG_ARGISNULL(0) ) { /* Init in first */ 
+       stat=palloc(STATHDRSIZE);
+       stat->len=STATHDRSIZE;
+       stat->size=0;
+   }
+
+   /* simple check of correctness */
+   if ( txt==NULL || PG_ARGISNULL(1) || txt->size==0 ) {
+       PG_FREE_IF_COPY(txt,1); 
+       PG_RETURN_POINTER(stat);
+   }
+
+   sptr=STATPTR(stat);
+   wptr=ARRPTR(txt);
+
+   if ( stat->size < 100*txt->size ) { /* merge */
+       while( sptr-STATPTR(stat) < stat->size && wptr-ARRPTR(txt) < txt->size ) {
+           int cmp = compareStatWord(sptr,wptr,stat,txt);
+           if ( cmp<0 ) {
+               sptr++;
+           } else if ( cmp==0 ) {
+               int n=POSDATALEN(txt,wptr);
+   
+               if (n==0) n=1;
+               sptr->ndoc++;
+               sptr->nentry +=n ;
+               sptr++; wptr++;
+           } else {
+               if ( cur==len )
+                   newentry=SEI_realloc(newentry, &len);
+               newentry[cur]=wptr;
+               wptr++; cur++;
+           }
+       }
+
+       while( wptr-ARRPTR(txt) < txt->size ) {
+           if ( cur==len )
+               newentry=SEI_realloc(newentry, &len);
+           newentry[cur]=wptr;
+           wptr++; cur++;
+       }
+   } else { /* search */
+       while( wptr-ARRPTR(txt) < txt->size ) {
+           StatEntry *StopLow = STATPTR(stat);
+           StatEntry *StopHigh = (StatEntry*)STATSTRPTR(stat);
+           int cmp;
+
+           while (StopLow < StopHigh) {
+               sptr=StopLow + (StopHigh - StopLow) / 2;
+               cmp =  compareStatWord(sptr,wptr,stat,txt);
+               if (cmp==0) {
+                   int n=POSDATALEN(txt,wptr);
+                   if (n==0) n=1;
+                   sptr->ndoc++;
+                   sptr->nentry +=n ;
+                   break;
+               } else if ( cmp < 0 )
+                   StopLow = sptr + 1;
+               else
+                   StopHigh = sptr; 
+           }
+       
+           if ( StopLow >= StopHigh ) { /* not found */
+               if ( cur==len )
+                   newentry=SEI_realloc(newentry, &len);
+               newentry[cur]=wptr;
+               cur++;
+           }
+           wptr++;
+       }   
+   }
+
+   
+   if ( cur==0 ) { /* no new words */ 
+       PG_FREE_IF_COPY(txt,1);
+       PG_RETURN_POINTER(stat);
+   }
+
+   newstat = formstat(stat, txt, newentry, cur);
+   pfree(newentry);
+   PG_FREE_IF_COPY(txt,1);
+   /* pfree(stat); */
+
+   PG_RETURN_POINTER(newstat);
+}
+
+typedef struct {
+   uint32  cur;
+   tsvector *stat;
+} StatStorage;
+
+static void
+ts_setup_firstcall(FuncCallContext  *funcctx, tsstat *stat) {
+   TupleDesc            tupdesc;
+   MemoryContext     oldcontext;
+   StatStorage     *st;
+   
+   oldcontext = MemoryContextSwitchTo(funcctx->multi_call_memory_ctx);
+   st=palloc( sizeof(StatStorage) );
+   st->cur=0;
+   st->stat=palloc( stat->len );
+   memcpy(st->stat, stat, stat->len);
+   funcctx->user_fctx = (void*)st;
+   tupdesc = RelationNameGetTupleDesc("statinfo");
+   funcctx->slot = TupleDescGetSlot(tupdesc);
+   funcctx->attinmeta = TupleDescGetAttInMetadata(tupdesc);
+   MemoryContextSwitchTo(oldcontext);
+}
+
+
+static Datum
+ts_process_call(FuncCallContext  *funcctx) {
+   StatStorage     *st;
+   st=(StatStorage*)funcctx->user_fctx;
+
+   if ( st->cur < st->stat->size ) {
+       Datum result;
+       char* values[3];
+       char    ndoc[16];
+       char    nentry[16];
+       StatEntry *entry=STATPTR(st->stat) + st->cur;
+       HeapTuple    tuple;
+
+       values[1]=ndoc;
+       sprintf(ndoc,"%d",entry->ndoc);
+       values[2]=nentry;
+       sprintf(nentry,"%d",entry->nentry);
+       values[0]=palloc( entry->len+1 );
+       memcpy( values[0], STATSTRPTR(st->stat)+entry->pos, entry->len);
+       (values[0])[entry->len]='\0';
+
+       tuple = BuildTupleFromCStrings(funcctx->attinmeta, values);
+       result = TupleGetDatum(funcctx->slot, tuple);
+
+       pfree(values[0]);
+       st->cur++;
+       return result;  
+   } else {
+       pfree(st->stat);
+       pfree(st);
+   }
+   
+   return (Datum)0;
+}
+
+PG_FUNCTION_INFO_V1(ts_accum_finish);
+Datum           ts_accum_finish(PG_FUNCTION_ARGS);
+Datum 
+ts_accum_finish(PG_FUNCTION_ARGS) {
+   FuncCallContext  *funcctx;
+   Datum result;
+
+   if (SRF_IS_FIRSTCALL()) {
+       funcctx = SRF_FIRSTCALL_INIT();
+       ts_setup_firstcall(funcctx, (tsstat*)PG_GETARG_POINTER(0) );
+   }
+
+   funcctx = SRF_PERCALL_SETUP();
+   if (  (result=ts_process_call(funcctx)) != (Datum)0 )
+       SRF_RETURN_NEXT(funcctx, result);
+   SRF_RETURN_DONE(funcctx);
+}
+
+static Oid tiOid=InvalidOid;
+static void 
+get_ti_Oid(void) {
+   int ret;
+   bool isnull; 
+
+   if ( (ret = SPI_exec("select oid from pg_type where typname='tsvector'",1)) < 0 )   
+       elog(ERROR, "SPI_exec to get tsvector oid returns %d", ret);
+
+   if ( SPI_processed<0 )
+       elog(ERROR, "There is no tsvector type");
+   tiOid = DatumGetObjectId( SPI_getbinval(SPI_tuptable->vals[0], SPI_tuptable->tupdesc, 1, &isnull) );
+   if ( tiOid==InvalidOid )
+       elog(ERROR, "tsvector type has InvalidOid");
+}
+
+static tsstat*
+ts_stat_sql(text *txt) {
+   char *query=text2char(txt);
+   int i;
+   tsstat *newstat,*stat;
+   bool isnull;
+   Portal portal;
+   void    *plan;
+
+   if ( tiOid==InvalidOid ) 
+       get_ti_Oid();
+
+   if ( (plan = SPI_prepare(query,0,NULL))==NULL )
+       elog(ERROR, "SPI_prepare('%s') returns NULL",query);
+
+   if ( (portal = SPI_cursor_open(NULL, plan, NULL, NULL)) == NULL )
+       elog(ERROR, "SPI_cursor_open('%s') returns NULL",query);
+
+   SPI_cursor_fetch(portal, true, 100);
+
+   if ( SPI_tuptable->tupdesc->natts != 1 )
+       elog(ERROR, "Number of fields doesn't equal to 1");
+
+   if ( SPI_gettypeid(SPI_tuptable->tupdesc, 1) != tiOid )
+       elog(ERROR, "Column isn't of tsvector type");
+
+   stat=palloc(STATHDRSIZE);
+   stat->len=STATHDRSIZE;
+   stat->size=0;
+
+   while(SPI_processed>0) {
+       for(i=0;i
+           Datum data=SPI_getbinval(SPI_tuptable->vals[i], SPI_tuptable->tupdesc, 1, &isnull);
+
+           if ( !isnull ) {
+               newstat = (tsstat*)DatumGetPointer(DirectFunctionCall2(
+                   ts_accum,
+                   PointerGetDatum(stat),
+                   data
+               ));
+               if ( stat!=newstat && stat )
+                   pfree(stat);
+               stat=newstat;
+           }
+       } 
+
+       SPI_freetuptable(SPI_tuptable);
+       SPI_cursor_fetch(portal, true, 100);        
+   }   
+
+   SPI_freetuptable(SPI_tuptable);
+   SPI_cursor_close(portal);
+   SPI_freeplan(plan);
+   pfree(query);
+
+   return stat;    
+}
+
+PG_FUNCTION_INFO_V1(ts_stat);
+Datum           ts_stat(PG_FUNCTION_ARGS);
+Datum 
+ts_stat(PG_FUNCTION_ARGS) {
+   FuncCallContext  *funcctx;
+   Datum result;
+
+   if (SRF_IS_FIRSTCALL()) {
+       tsstat *stat;
+       text    *txt=PG_GETARG_TEXT_P(0);
+   
+       funcctx = SRF_FIRSTCALL_INIT();
+       SPI_connect();
+       stat = ts_stat_sql(txt);
+       PG_FREE_IF_COPY(txt,0); 
+       ts_setup_firstcall(funcctx, stat );
+       SPI_finish();
+   }
+
+   funcctx = SRF_PERCALL_SETUP();
+   if (  (result=ts_process_call(funcctx)) != (Datum)0 )
+       SRF_RETURN_NEXT(funcctx, result);
+   SRF_RETURN_DONE(funcctx);
+}
+
+


diff --git a/contrib/tsearch2/ts_stat.h b/contrib/tsearch2/ts_stat.h

new file mode 100644 (file)

index 0000000..c32b17a


--- /dev/null
+++ b/contrib/tsearch2/ts_stat.h
@@ -0,0 +1,32 @@
+#ifndef __TXTIDX_STAT_H__
+#define __TXTIDX_STAT_H__
+
+#include "postgres.h"
+
+#include "access/gist.h"
+#include "access/itup.h"
+#include "utils/elog.h"
+#include "utils/palloc.h"
+#include "utils/builtins.h"
+#include "storage/bufpage.h"
+
+typedef struct {
+   uint32  len;
+   uint32  pos;
+   uint32  ndoc;   
+   uint32  nentry; 
+}  StatEntry;
+
+typedef struct {
+   int4        len;
+   int4        size;
+   char        data[1];
+}  tsstat;
+
+#define STATHDRSIZE (sizeof(int4)*2)
+#define CALCSTATSIZE(x, lenstr) ( x * sizeof(StatEntry) + STATHDRSIZE + lenstr )
+#define STATPTR(x) ( (StatEntry*) ( (char*)x + STATHDRSIZE ) )
+#define STATSTRPTR(x)  ( (char*)x + STATHDRSIZE + ( sizeof(StatEntry) * ((tsvector*)x)->size ) )
+#define STATSTRSIZE(x) ( ((tsvector*)x)->len - STATHDRSIZE - ( sizeof(StatEntry) * ((tsvector*)x)->size ) )
+
+#endif


diff --git a/contrib/tsearch2/tsearch.sql._in b/contrib/tsearch2/tsearch.sql._in

new file mode 100644 (file)

index 0000000..91ffbc8


--- /dev/null
+++ b/contrib/tsearch2/tsearch.sql._in
@@ -0,0 +1,674 @@
+-- Adjust this setting to control where the objects get CREATEd.
+SET search_path = public;
+
+BEGIN;
+
+--dict conf
+CREATE TABLE pg_ts_dict (
+   dict_name   text not null primary key,
+   dict_init   oid,
+   dict_initoption text,
+   dict_lexize oid not null,
+   dict_comment    text
+) with oids;
+
+--dict interface
+CREATE FUNCTION lexize(oid, text) 
+   returns _text
+   as 'MODULE_PATHNAME'
+   language 'C'
+   with (isstrict);
+
+CREATE FUNCTION lexize(text, text)
+        returns _text
+        as 'MODULE_PATHNAME', 'lexize_byname'
+        language 'C'
+        with (isstrict);
+
+CREATE FUNCTION lexize(text)
+        returns _text
+        as 'MODULE_PATHNAME', 'lexize_bycurrent'
+        language 'C'
+        with (isstrict);
+
+CREATE FUNCTION set_curdict(int)
+   returns void
+   as 'MODULE_PATHNAME'
+   language 'C'
+   with (isstrict);
+
+CREATE FUNCTION set_curdict(text)
+   returns void
+   as 'MODULE_PATHNAME', 'set_curdict_byname'
+   language 'C'
+   with (isstrict);
+
+--built-in dictionaries
+CREATE FUNCTION dex_init(text)
+   returns internal
+   as 'MODULE_PATHNAME' 
+   language 'C';
+
+CREATE FUNCTION dex_lexize(internal,internal,int4)
+   returns internal
+   as 'MODULE_PATHNAME'
+   language 'C'
+   with (isstrict);
+
+insert into pg_ts_dict select 
+   'simple', 
+   (select oid from pg_proc where proname='dex_init'),
+   null,
+   (select oid from pg_proc where proname='dex_lexize'),
+   'Simple example of dictionary.'
+;
+    
+CREATE FUNCTION snb_en_init(text)
+   returns internal
+   as 'MODULE_PATHNAME' 
+   language 'C';
+
+CREATE FUNCTION snb_lexize(internal,internal,int4)
+   returns internal
+   as 'MODULE_PATHNAME'
+   language 'C'
+   with (isstrict);
+
+insert into pg_ts_dict select 
+   'en_stem', 
+   (select oid from pg_proc where proname='snb_en_init'),
+   'DATA_PATH/english.stop',
+   (select oid from pg_proc where proname='snb_lexize'),
+   'English Stemmer. Snowball.'
+;
+
+CREATE FUNCTION snb_ru_init(text)
+   returns internal
+   as 'MODULE_PATHNAME' 
+   language 'C';
+
+insert into pg_ts_dict select 
+   'ru_stem', 
+   (select oid from pg_proc where proname='snb_ru_init'),
+   'DATA_PATH/russian.stop',
+   (select oid from pg_proc where proname='snb_lexize'),
+   'Russian Stemmer. Snowball.'
+;
+    
+CREATE FUNCTION spell_init(text)
+   returns internal
+   as 'MODULE_PATHNAME' 
+   language 'C';
+
+CREATE FUNCTION spell_lexize(internal,internal,int4)
+   returns internal
+   as 'MODULE_PATHNAME'
+   language 'C'
+   with (isstrict);
+
+insert into pg_ts_dict select 
+   'ispell_template', 
+   (select oid from pg_proc where proname='spell_init'),
+   null,
+   (select oid from pg_proc where proname='spell_lexize'),
+   'ISpell interface. Must have .dict and .aff files'
+;
+
+CREATE FUNCTION syn_init(text)
+   returns internal
+   as 'MODULE_PATHNAME' 
+   language 'C';
+
+CREATE FUNCTION syn_lexize(internal,internal,int4)
+   returns internal
+   as 'MODULE_PATHNAME'
+   language 'C'
+   with (isstrict);
+
+insert into pg_ts_dict select 
+   'synonym', 
+   (select oid from pg_proc where proname='syn_init'),
+   null,
+   (select oid from pg_proc where proname='syn_lexize'),
+   'Example of synonym dictionary'
+;
+
+--dict conf
+CREATE TABLE pg_ts_parser (
+   prs_name    text not null primary key,
+   prs_start   oid not null,
+   prs_nexttoken   oid not null,
+   prs_end     oid not null,
+   prs_headline    oid not null,
+   prs_lextype oid not null,
+   prs_comment text
+) with oids;
+
+--sql-level interface
+CREATE TYPE tokentype 
+   as (tokid int4, alias text, descr text); 
+
+CREATE FUNCTION token_type(int4)
+   returns setof tokentype
+   as 'MODULE_PATHNAME'
+   language 'C'
+   with (isstrict);
+
+CREATE FUNCTION token_type(text)
+   returns setof tokentype
+   as 'MODULE_PATHNAME', 'token_type_byname'
+   language 'C'
+   with (isstrict);
+
+CREATE FUNCTION token_type()
+   returns setof tokentype
+   as 'MODULE_PATHNAME', 'token_type_current'
+   language 'C'
+   with (isstrict);
+
+CREATE FUNCTION set_curprs(int)
+   returns void
+   as 'MODULE_PATHNAME'
+   language 'C'
+   with (isstrict);
+
+CREATE FUNCTION set_curprs(text)
+   returns void
+   as 'MODULE_PATHNAME', 'set_curprs_byname'
+   language 'C'
+   with (isstrict);
+
+CREATE TYPE tokenout 
+   as (tokid int4, token text);
+
+CREATE FUNCTION parse(oid,text)
+   returns setof tokenout
+   as 'MODULE_PATHNAME'
+   language 'C'
+   with (isstrict);
+ 
+CREATE FUNCTION parse(text,text)
+   returns setof tokenout
+   as 'MODULE_PATHNAME', 'parse_byname'
+   language 'C'
+   with (isstrict);
+ 
+CREATE FUNCTION parse(text)
+   returns setof tokenout
+   as 'MODULE_PATHNAME', 'parse_current'
+   language 'C'
+   with (isstrict);
+ 
+--default parser
+CREATE FUNCTION prsd_start(internal,int4)
+   returns internal
+   as 'MODULE_PATHNAME'
+   language 'C';
+
+CREATE FUNCTION prsd_getlexeme(internal,internal,internal)
+   returns int4
+   as 'MODULE_PATHNAME'
+   language 'C';
+
+CREATE FUNCTION prsd_end(internal)
+   returns void
+   as 'MODULE_PATHNAME'
+   language 'C';
+
+CREATE FUNCTION prsd_lextype(internal)
+   returns internal
+   as 'MODULE_PATHNAME'
+   language 'C';
+
+CREATE FUNCTION prsd_headline(internal,internal,internal)
+   returns internal
+   as 'MODULE_PATHNAME'
+   language 'C';
+
+insert into pg_ts_parser select
+   'default',
+   (select oid from pg_proc where proname='prsd_start'),   
+   (select oid from pg_proc where proname='prsd_getlexeme'),   
+   (select oid from pg_proc where proname='prsd_end'), 
+   (select oid from pg_proc where proname='prsd_headline'),
+   (select oid from pg_proc where proname='prsd_lextype'),
+   'Parser from OpenFTS v0.34'
+;  
+
+--tsearch config
+
+CREATE TABLE pg_ts_cfg (
+   ts_name     text not null primary key,
+   prs_name    text not null,
+   locale      text
+) with oids;
+
+CREATE TABLE pg_ts_cfgmap (
+   ts_name     text not null,
+   tok_alias   text not null,
+   dict_name   text[],
+   primary key (ts_name,tok_alias)
+) with oids;
+
+CREATE FUNCTION set_curcfg(int)
+   returns void
+   as 'MODULE_PATHNAME'
+   language 'C'
+   with (isstrict);
+
+CREATE FUNCTION set_curcfg(text)
+   returns void
+   as 'MODULE_PATHNAME', 'set_curcfg_byname'
+   language 'C'
+   with (isstrict);
+
+CREATE FUNCTION show_curcfg()
+   returns oid
+   as 'MODULE_PATHNAME'
+   language 'C'
+   with (isstrict);
+
+insert into pg_ts_cfg values ('default', 'default','C');
+insert into pg_ts_cfg values ('default_russian', 'default','ru_RU.KOI8-R');
+insert into pg_ts_cfg values ('simple', 'default');
+
+copy pg_ts_cfgmap from stdin;
+default    lword   {en_stem}
+default    nlword  {simple}
+default    word    {simple}
+default    email   {simple}
+default    url {simple}
+default    host    {simple}
+default    sfloat  {simple}
+default    version {simple}
+default    part_hword  {simple}
+default    nlpart_hword    {simple}
+default    lpart_hword {en_stem}
+default    hword   {simple}
+default    lhword  {en_stem}
+default    nlhword {simple}
+default    uri {simple}
+default    file    {simple}
+default    float   {simple}
+default    int {simple}
+default    uint    {simple}
+default_russian    lword   {en_stem}
+default_russian    nlword  {ru_stem}
+default_russian    word    {ru_stem}
+default_russian    email   {simple}
+default_russian    url {simple}
+default_russian    host    {simple}
+default_russian    sfloat  {simple}
+default_russian    version {simple}
+default_russian    part_hword  {simple}
+default_russian    nlpart_hword    {ru_stem}
+default_russian    lpart_hword {en_stem}
+default_russian    hword   {ru_stem}
+default_russian    lhword  {en_stem}
+default_russian    nlhword {ru_stem}
+default_russian    uri {simple}
+default_russian    file    {simple}
+default_russian    float   {simple}
+default_russian    int {simple}
+default_russian    uint    {simple}
+simple lword   {simple}
+simple nlword  {simple}
+simple word    {simple}
+simple email   {simple}
+simple url {simple}
+simple host    {simple}
+simple sfloat  {simple}
+simple version {simple}
+simple part_hword  {simple}
+simple nlpart_hword    {simple}
+simple lpart_hword {simple}
+simple hword   {simple}
+simple lhword  {simple}
+simple nlhword {simple}
+simple uri {simple}
+simple file    {simple}
+simple float   {simple}
+simple int {simple}
+simple uint    {simple}
+\.
+
+--tsvector type
+CREATE FUNCTION tsvector_in(cstring)
+RETURNS tsvector
+AS 'MODULE_PATHNAME'
+LANGUAGE 'C' with (isstrict);
+
+CREATE FUNCTION tsvector_out(tsvector)
+RETURNS cstring
+AS 'MODULE_PATHNAME'
+LANGUAGE 'C' with (isstrict);
+
+CREATE TYPE tsvector (
+        INTERNALLENGTH = -1,
+        INPUT = tsvector_in,
+        OUTPUT = tsvector_out,
+        STORAGE = extended
+);
+
+CREATE FUNCTION length(tsvector)
+RETURNS int4
+AS 'MODULE_PATHNAME', 'tsvector_length'
+LANGUAGE 'C' with (isstrict,iscachable);
+
+CREATE FUNCTION to_tsvector(oid, text)
+RETURNS tsvector
+AS 'MODULE_PATHNAME'
+LANGUAGE 'C' with (isstrict,iscachable);
+
+CREATE FUNCTION to_tsvector(text, text)
+RETURNS tsvector
+AS 'MODULE_PATHNAME', 'to_tsvector_name'
+LANGUAGE 'C' with (isstrict,iscachable);
+
+CREATE FUNCTION to_tsvector(text)
+RETURNS tsvector
+AS 'MODULE_PATHNAME', 'to_tsvector_current'
+LANGUAGE 'C' with (isstrict,iscachable);
+
+CREATE FUNCTION strip(tsvector)
+RETURNS tsvector
+AS 'MODULE_PATHNAME'
+LANGUAGE 'C' with (isstrict,iscachable);
+
+CREATE FUNCTION setweight(tsvector,"char")
+RETURNS tsvector
+AS 'MODULE_PATHNAME'
+LANGUAGE 'C' with (isstrict,iscachable);
+
+CREATE FUNCTION concat(tsvector,tsvector)
+RETURNS tsvector
+AS 'MODULE_PATHNAME'
+LANGUAGE 'C' with (isstrict,iscachable);
+
+CREATE OPERATOR || (
+        LEFTARG = tsvector,
+        RIGHTARG = tsvector,
+        PROCEDURE = concat
+);
+
+--query type
+CREATE FUNCTION tsquery_in(cstring)
+RETURNS tsquery
+AS 'MODULE_PATHNAME'
+LANGUAGE 'C' with (isstrict);
+
+CREATE FUNCTION tsquery_out(tsquery)
+RETURNS cstring
+AS 'MODULE_PATHNAME'
+LANGUAGE 'C' with (isstrict);
+
+CREATE TYPE tsquery (
+        INTERNALLENGTH = -1,
+        INPUT = tsquery_in,
+        OUTPUT = tsquery_out
+);
+
+CREATE FUNCTION querytree(tsquery)
+RETURNS text
+AS 'MODULE_PATHNAME', 'tsquerytree'
+LANGUAGE 'C' with (isstrict);
+
+CREATE FUNCTION to_tsquery(oid, text)
+RETURNS tsquery
+AS 'MODULE_PATHNAME'
+LANGUAGE 'c' with (isstrict,iscachable);
+
+CREATE FUNCTION to_tsquery(text, text)
+RETURNS tsquery
+AS 'MODULE_PATHNAME','to_tsquery_name'
+LANGUAGE 'c' with (isstrict,iscachable);
+
+CREATE FUNCTION to_tsquery(text)
+RETURNS tsquery
+AS 'MODULE_PATHNAME','to_tsquery_current'
+LANGUAGE 'c' with (isstrict,iscachable);
+
+--operations
+CREATE FUNCTION exectsq(tsvector, tsquery)
+RETURNS bool
+AS 'MODULE_PATHNAME'
+LANGUAGE 'C' with (isstrict, iscachable);
+  
+COMMENT ON FUNCTION exectsq(tsvector, tsquery) IS 'boolean operation with text index';
+
+CREATE FUNCTION rexectsq(tsquery, tsvector)
+RETURNS bool
+AS 'MODULE_PATHNAME'
+LANGUAGE 'C' with (isstrict, iscachable);
+
+COMMENT ON FUNCTION rexectsq(tsquery, tsvector) IS 'boolean operation with text index';
+
+CREATE OPERATOR @@ (
+        LEFTARG = tsvector,
+        RIGHTARG = tsquery,
+        PROCEDURE = exectsq,
+        COMMUTATOR = '@@',
+        RESTRICT = contsel,
+        JOIN = contjoinsel
+);
+CREATE OPERATOR @@ (
+        LEFTARG = tsquery,
+        RIGHTARG = tsvector,
+        PROCEDURE = rexectsq,
+        COMMUTATOR = '@@',
+        RESTRICT = contsel,
+        JOIN = contjoinsel
+);
+
+--Trigger
+CREATE FUNCTION tsearch2()
+RETURNS trigger
+AS 'MODULE_PATHNAME'
+LANGUAGE 'C';
+
+--Relevation
+CREATE FUNCTION rank(float4[], tsvector, tsquery)
+RETURNS float4
+AS 'MODULE_PATHNAME'
+LANGUAGE 'C' WITH (isstrict, iscachable);
+
+CREATE FUNCTION rank(float4[], tsvector, tsquery, int4)
+RETURNS float4
+AS 'MODULE_PATHNAME'
+LANGUAGE 'C' WITH (isstrict, iscachable);
+
+CREATE FUNCTION rank(tsvector, tsquery)
+RETURNS float4
+AS 'MODULE_PATHNAME', 'rank_def'
+LANGUAGE 'C' WITH (isstrict, iscachable);
+
+CREATE FUNCTION rank(tsvector, tsquery, int4)
+RETURNS float4
+AS 'MODULE_PATHNAME', 'rank_def'
+LANGUAGE 'C' WITH (isstrict, iscachable);
+
+CREATE FUNCTION rank_cd(int4, tsvector, tsquery)
+RETURNS float4
+AS 'MODULE_PATHNAME'
+LANGUAGE 'C' WITH (isstrict, iscachable);
+
+CREATE FUNCTION rank_cd(int4, tsvector, tsquery, int4)
+RETURNS float4
+AS 'MODULE_PATHNAME'
+LANGUAGE 'C' WITH (isstrict, iscachable);
+
+CREATE FUNCTION rank_cd(tsvector, tsquery)
+RETURNS float4
+AS 'MODULE_PATHNAME', 'rank_cd_def'
+LANGUAGE 'C' WITH (isstrict, iscachable);
+
+CREATE FUNCTION rank_cd(tsvector, tsquery, int4)
+RETURNS float4
+AS 'MODULE_PATHNAME', 'rank_cd_def'
+LANGUAGE 'C' WITH (isstrict, iscachable);
+
+CREATE FUNCTION headline(oid, text, tsquery, text)
+RETURNS text
+AS 'MODULE_PATHNAME', 'headline'
+LANGUAGE 'C' WITH (isstrict, iscachable);
+
+CREATE FUNCTION headline(oid, text, tsquery)
+RETURNS text
+AS 'MODULE_PATHNAME', 'headline'
+LANGUAGE 'C' WITH (isstrict, iscachable);
+
+CREATE FUNCTION headline(text, text, tsquery, text)
+RETURNS text
+AS 'MODULE_PATHNAME', 'headline_byname'
+LANGUAGE 'C' WITH (isstrict, iscachable);
+
+CREATE FUNCTION headline(text, text, tsquery)
+RETURNS text
+AS 'MODULE_PATHNAME', 'headline_byname'
+LANGUAGE 'C' WITH (isstrict, iscachable);
+
+CREATE FUNCTION headline(text, tsquery, text)
+RETURNS text
+AS 'MODULE_PATHNAME', 'headline_current'
+LANGUAGE 'C' WITH (isstrict, iscachable);
+
+CREATE FUNCTION headline(text, tsquery)
+RETURNS text
+AS 'MODULE_PATHNAME', 'headline_current'
+LANGUAGE 'C' WITH (isstrict, iscachable);
+
+--GiST
+--GiST key type 
+CREATE FUNCTION gtsvector_in(cstring)
+RETURNS gtsvector
+AS 'MODULE_PATHNAME'
+LANGUAGE 'C' with (isstrict);
+
+CREATE FUNCTION gtsvector_out(gtsvector)
+RETURNS cstring
+AS 'MODULE_PATHNAME'
+LANGUAGE 'C' with (isstrict);
+
+CREATE TYPE gtsvector (
+        INTERNALLENGTH = -1,
+        INPUT = gtsvector_in,
+        OUTPUT = gtsvector_out
+);
+
+-- support FUNCTIONs
+CREATE FUNCTION gtsvector_consistent(gtsvector,internal,int4)
+RETURNS bool
+AS 'MODULE_PATHNAME'
+LANGUAGE 'C';
+  
+CREATE FUNCTION gtsvector_compress(internal)
+RETURNS internal
+AS 'MODULE_PATHNAME'
+LANGUAGE 'C';
+
+CREATE FUNCTION gtsvector_decompress(internal)
+RETURNS internal
+AS 'MODULE_PATHNAME'
+LANGUAGE 'C';
+
+CREATE FUNCTION gtsvector_penalty(internal,internal,internal)
+RETURNS internal
+AS 'MODULE_PATHNAME'
+LANGUAGE 'C' with (isstrict);
+
+CREATE FUNCTION gtsvector_picksplit(internal, internal)
+RETURNS internal
+AS 'MODULE_PATHNAME'
+LANGUAGE 'C';
+
+CREATE FUNCTION gtsvector_union(bytea, internal)
+RETURNS _int4
+AS 'MODULE_PATHNAME'
+LANGUAGE 'C';
+
+CREATE FUNCTION gtsvector_same(gtsvector, gtsvector, internal)
+RETURNS internal
+AS 'MODULE_PATHNAME'
+LANGUAGE 'C';
+
+-- CREATE the OPERATOR class
+CREATE OPERATOR CLASS gist_tsvector_ops
+DEFAULT FOR TYPE tsvector USING gist
+AS
+        OPERATOR        1       @@ (tsvector, tsquery)  RECHECK ,
+        FUNCTION        1       gtsvector_consistent (gtsvector, internal, int4),
+        FUNCTION        2       gtsvector_union (bytea, internal),
+        FUNCTION        3       gtsvector_compress (internal),
+        FUNCTION        4       gtsvector_decompress (internal),
+        FUNCTION        5       gtsvector_penalty (internal, internal, internal),
+        FUNCTION        6       gtsvector_picksplit (internal, internal),
+        FUNCTION        7       gtsvector_same (gtsvector, gtsvector, internal),
+        STORAGE         gtsvector;
+
+
+--stat info
+CREATE TYPE statinfo 
+   as (word text, ndoc int4, nentry int4);
+
+--REATE FUNCTION tsstat_in(cstring)
+--RETURNS tsstat
+--AS 'MODULE_PATHNAME'
+--LANGUAGE 'C' with (isstrict);
+--
+--CREATE FUNCTION tsstat_out(tsstat)
+--RETURNS cstring
+--AS 'MODULE_PATHNAME'
+--LANGUAGE 'C' with (isstrict);
+--
+--CREATE TYPE tsstat (
+--        INTERNALLENGTH = -1,
+--        INPUT = tsstat_in,
+--        OUTPUT = tsstat_out,
+--        STORAGE = plain
+--);
+--
+--CREATE FUNCTION ts_accum(tsstat,tsvector)
+--RETURNS tsstat
+--AS 'MODULE_PATHNAME'
+--LANGUAGE 'C' with (isstrict);
+--
+--CREATE FUNCTION ts_accum_finish(tsstat)
+-- returns setof statinfo
+-- as 'MODULE_PATHNAME'
+-- language 'C'
+-- with (isstrict);
+--
+--CREATE AGGREGATE stat (
+-- BASETYPE=tsvector,
+-- SFUNC=ts_accum,
+-- STYPE=tsstat,
+-- FINALFUNC = ts_accum_finish,
+-- initcond = ''
+--); 
+
+CREATE FUNCTION stat(text)
+   returns setof statinfo
+   as 'MODULE_PATHNAME', 'ts_stat'
+   language 'C'
+   with (isstrict);
+
+--reset - just for debuging
+CREATE FUNCTION reset_tsearch()
+        returns void
+        as 'MODULE_PATHNAME'
+        language 'C'
+        with (isstrict);
+
+--get cover (debug for rank_cd)
+CREATE FUNCTION get_covers(tsvector,tsquery)
+        returns text
+        as 'MODULE_PATHNAME'
+        language 'C'
+        with (isstrict);
+
+
+--example of ISpell dictionary
+--update pg_ts_dict set dict_initoption='DictFile="/usr/local/share/ispell/russian.dict" ,AffFile ="/usr/local/share/ispell/russian.aff", StopFile="/usr/local/share/ispell/russian.stop"' where dict_id=4;
+--example of synonym dict
+--update pg_ts_dict set dict_initoption='/usr/local/share/ispell/english.syn' where dict_id=5;
+END;


diff --git a/contrib/tsearch2/tsvector.c b/contrib/tsearch2/tsvector.c

new file mode 100644 (file)

index 0000000..ff0794d


--- /dev/null
+++ b/contrib/tsearch2/tsvector.c
@@ -0,0 +1,804 @@
+/*
+ * In/Out definitions for tsvector type
+ * Internal structure:
+ * string of values, array of position lexem in string and it's length
+ * Teodor Sigaev 
+ */
+#include "postgres.h"
+
+#include "access/gist.h"
+#include "access/itup.h"
+#include "utils/elog.h"
+#include "utils/palloc.h"
+#include "utils/builtins.h"
+#include "storage/bufpage.h"
+#include "executor/spi.h"
+#include "commands/trigger.h"
+#include "nodes/pg_list.h"
+#include "catalog/namespace.h"
+
+#include "utils/pg_locale.h"
+
+#include              /* tolower */
+#include "tsvector.h"
+#include "query.h"
+#include "ts_cfg.h"
+#include "common.h"
+
+PG_FUNCTION_INFO_V1(tsvector_in);
+Datum      tsvector_in(PG_FUNCTION_ARGS);
+
+PG_FUNCTION_INFO_V1(tsvector_out);
+Datum      tsvector_out(PG_FUNCTION_ARGS);
+
+PG_FUNCTION_INFO_V1(to_tsvector);
+Datum      to_tsvector(PG_FUNCTION_ARGS);
+PG_FUNCTION_INFO_V1(to_tsvector_current);
+Datum      to_tsvector_current(PG_FUNCTION_ARGS);
+PG_FUNCTION_INFO_V1(to_tsvector_name);
+Datum      to_tsvector_name(PG_FUNCTION_ARGS);
+
+PG_FUNCTION_INFO_V1(tsearch2);
+Datum      tsearch2(PG_FUNCTION_ARGS);
+
+PG_FUNCTION_INFO_V1(tsvector_length);
+Datum      tsvector_length(PG_FUNCTION_ARGS);
+
+/*
+ * in/out text index type
+ */
+static int 
+comparePos(const void *a, const void *b) {
+   if ( ((WordEntryPos *) a)->pos == ((WordEntryPos *) b)->pos )
+       return 1;
+   return ( ((WordEntryPos *) a)->pos > ((WordEntryPos *) b)->pos ) ? 1 : -1;
+}
+
+static int
+uniquePos(WordEntryPos *a, int4 l) {
+   WordEntryPos *ptr, *res;
+
+   res=a;
+   if (l==1)
+       return l;
+
+   qsort((void *) a, l, sizeof(WordEntryPos), comparePos);
+
+   ptr = a + 1;
+   while (ptr - a < l) {
+       if ( ptr->pos != res->pos ) {
+           res++;
+           res->pos = ptr->pos;
+           res->weight = ptr->weight;
+           if ( res-a >= MAXNUMPOS-1 || res->pos == MAXENTRYPOS-1 )
+               break;
+       } else if ( ptr->weight > res->weight )
+           res->weight = ptr->weight;
+       ptr++;
+   }
+   return res + 1 - a;
+}
+
+static char *BufferStr;
+static int
+compareentry(const void *a, const void *b)
+{
+   if ( ((WordEntryIN *) a)->entry.len == ((WordEntryIN *) b)->entry.len)
+   {
+       return strncmp(
+                      &BufferStr[((WordEntryIN *) a)->entry.pos],
+                      &BufferStr[((WordEntryIN *) b)->entry.pos],
+                      ((WordEntryIN *) a)->entry.len);
+   }
+   return ( ((WordEntryIN *) a)->entry.len > ((WordEntryIN *) b)->entry.len ) ? 1 : -1;
+}
+
+static int
+uniqueentry(WordEntryIN * a, int4 l, char *buf, int4 *outbuflen)
+{
+   WordEntryIN  *ptr,
+              *res;
+
+   res = a;
+   if (l == 1) {
+       if ( a->entry.haspos ) {
+           *(uint16*)(a->pos) = uniquePos( &(a->pos[1]), *(uint16*)(a->pos));
+           *outbuflen = SHORTALIGN(res->entry.len) + (*(uint16*)(a->pos) +1 )*sizeof(WordEntryPos);
+       }
+       return l;
+   }
+
+   ptr = a + 1;
+   BufferStr = buf;
+   qsort((void *) a, l, sizeof(WordEntryIN), compareentry);
+
+   while (ptr - a < l)
+   {
+       if (!(ptr->entry.len == res->entry.len &&
+             strncmp(&buf[ptr->entry.pos], &buf[res->entry.pos], res->entry.len) == 0))
+       {
+           if ( res->entry.haspos ) {
+               *(uint16*)(res->pos) = uniquePos( &(res->pos[1]), *(uint16*)(res->pos));
+               *outbuflen += *(uint16*)(res->pos) * sizeof(WordEntryPos);
+           }
+           *outbuflen += SHORTALIGN(res->entry.len);
+           res++;
+           memcpy(res,ptr,sizeof(WordEntryIN));
+       } else if ( ptr->entry.haspos ){
+           if ( res->entry.haspos ) {
+               int4 len=*(uint16*)(ptr->pos) + 1 + *(uint16*)(res->pos);
+               res->pos=(WordEntryPos*)repalloc( res->pos, len*sizeof(WordEntryPos));
+               memcpy( &(res->pos[ *(uint16*)(res->pos) + 1 ]), 
+                   &(ptr->pos[1]), *(uint16*)(ptr->pos) * sizeof(WordEntryPos));
+               *(uint16*)(res->pos) += *(uint16*)(ptr->pos);
+               pfree( ptr->pos );
+           } else {
+               res->entry.haspos=1;
+               res->pos = ptr->pos;
+           }
+       }
+       ptr++;
+   }
+   if ( res->entry.haspos ) {
+       *(uint16*)(res->pos) = uniquePos( &(res->pos[1]), *(uint16*)(res->pos));
+       *outbuflen += *(uint16*)(res->pos) * sizeof(WordEntryPos);
+   }
+   *outbuflen += SHORTALIGN(res->entry.len);
+
+   return res + 1 - a;
+}
+
+#define WAITWORD   1
+#define WAITENDWORD 2
+#define WAITNEXTCHAR   3
+#define WAITENDCMPLX   4
+#define WAITPOSINFO    5
+#define INPOSINFO  6
+#define WAITPOSDELIM   7
+
+#define RESIZEPRSBUF \
+do { \
+   if ( state->curpos - state->word + 1 >= state->len ) \
+   { \
+       int4 clen = state->curpos - state->word; \
+       state->len *= 2; \
+       state->word = (char*)repalloc( (void*)state->word, state->len ); \
+       state->curpos = state->word + clen; \
+   } \
+} while (0)
+
+int4
+gettoken_tsvector(TI_IN_STATE * state)
+{
+   int4        oldstate = 0;
+
+   state->curpos = state->word;
+   state->state = WAITWORD;
+   state->alen=0;
+
+   while (1)
+   {
+       if (state->state == WAITWORD)
+       {
+           if (*(state->prsbuf) == '\0')
+               return 0;
+           else if (*(state->prsbuf) == '\'')
+               state->state = WAITENDCMPLX;
+           else if (*(state->prsbuf) == '\\')
+           {
+               state->state = WAITNEXTCHAR;
+               oldstate = WAITENDWORD;
+           }
+           else if (state->oprisdelim && ISOPERATOR(*(state->prsbuf)))
+               elog(ERROR, "Syntax error");
+           else if (*(state->prsbuf) != ' ')
+           {
+               *(state->curpos) = *(state->prsbuf);
+               state->curpos++;
+               state->state = WAITENDWORD;
+           }
+       }
+       else if (state->state == WAITNEXTCHAR)
+       {
+           if (*(state->prsbuf) == '\0')
+               elog(ERROR, "There is no escaped character");
+           else
+           {
+               RESIZEPRSBUF;
+               *(state->curpos) = *(state->prsbuf);
+               state->curpos++;
+               state->state = oldstate;
+           }
+       }
+       else if (state->state == WAITENDWORD)
+       {
+           if (*(state->prsbuf) == '\\')
+           {
+               state->state = WAITNEXTCHAR;
+               oldstate = WAITENDWORD;
+           }
+           else if (*(state->prsbuf) == ' ' || *(state->prsbuf) == '\0' ||
+                    (state->oprisdelim && ISOPERATOR(*(state->prsbuf))))
+           {
+               RESIZEPRSBUF;
+               if (state->curpos == state->word)
+                   elog(ERROR, "Syntax error");
+               *(state->curpos) = '\0';
+               return 1; 
+           } else if ( *(state->prsbuf) == ':' ) {
+               if (state->curpos == state->word)
+                   elog(ERROR, "Syntax error");
+               *(state->curpos) = '\0';
+               if ( state->oprisdelim )
+                   return 1;
+               else
+                   state->state = INPOSINFO;
+           }
+           else
+           {
+               RESIZEPRSBUF;
+               *(state->curpos) = *(state->prsbuf);
+               state->curpos++;
+           }
+       }
+       else if (state->state == WAITENDCMPLX)
+       {
+           if (*(state->prsbuf) == '\'')
+           {
+               RESIZEPRSBUF;
+               *(state->curpos) = '\0';
+               if (state->curpos == state->word)
+                   elog(ERROR, "Syntax error");
+               if ( state->oprisdelim ) {
+                   state->prsbuf++;
+                   return 1;
+               } else
+                   state->state = WAITPOSINFO;
+           }
+           else if (*(state->prsbuf) == '\\')
+           {
+               state->state = WAITNEXTCHAR;
+               oldstate = WAITENDCMPLX;
+           }
+           else if (*(state->prsbuf) == '\0')
+               elog(ERROR, "Syntax error");
+           else
+           {
+               RESIZEPRSBUF;
+               *(state->curpos) = *(state->prsbuf);
+               state->curpos++;
+           }
+       } else if (state->state == WAITPOSINFO) {
+           if ( *(state->prsbuf) == ':' )
+               state->state=INPOSINFO;
+           else
+               return 1;
+       } else if (state->state == INPOSINFO) {
+           if ( isdigit(*(state->prsbuf)) ) {
+               if ( state->alen==0 ) {
+                   state->alen=4;
+                   state->pos = (WordEntryPos*)palloc( sizeof(WordEntryPos)*state->alen );
+                   *(uint16*)(state->pos)=0;
+               } else if ( *(uint16*)(state->pos) +1 >= state->alen ) {
+                   state->alen *= 2; 
+                   state->pos = (WordEntryPos*)repalloc( state->pos, sizeof(WordEntryPos)*state->alen );
+               }
+               (  *(uint16*)(state->pos) )++;
+               state->pos[ *(uint16*)(state->pos) ].pos = LIMITPOS(atoi(state->prsbuf));
+               if ( state->pos[ *(uint16*)(state->pos) ].pos == 0 )
+                   elog(ERROR,"Wrong position info");
+               state->pos[ *(uint16*)(state->pos) ].weight = 0;
+               state->state = WAITPOSDELIM;
+           } else
+               elog(ERROR,"Syntax error");
+       } else if (state->state == WAITPOSDELIM) {
+           if ( *(state->prsbuf) == ',' ) {
+               state->state = INPOSINFO;
+           } else if ( tolower(*(state->prsbuf)) == 'a' || *(state->prsbuf)=='*' ) {
+               if ( state->pos[ *(uint16*)(state->pos) ].weight )
+                   elog(ERROR,"Syntax error");
+               state->pos[ *(uint16*)(state->pos) ].weight = 3;
+           } else if ( tolower(*(state->prsbuf)) == 'b' ) {
+               if ( state->pos[ *(uint16*)(state->pos) ].weight )
+                   elog(ERROR,"Syntax error");
+               state->pos[ *(uint16*)(state->pos) ].weight = 2;
+           } else if ( tolower(*(state->prsbuf)) == 'c' ) {
+               if ( state->pos[ *(uint16*)(state->pos) ].weight )
+                   elog(ERROR,"Syntax error");
+               state->pos[ *(uint16*)(state->pos) ].weight = 1;
+           } else if ( tolower(*(state->prsbuf)) == 'd' ) {
+               if ( state->pos[ *(uint16*)(state->pos) ].weight )
+                   elog(ERROR,"Syntax error");
+               state->pos[ *(uint16*)(state->pos) ].weight = 0;
+           } else if ( isspace(*(state->prsbuf)) || *(state->prsbuf) == '\0' ) {
+               return 1;
+           } else if ( !isdigit(*(state->prsbuf)) )
+               elog(ERROR,"Syntax error");
+       } else
+           elog(ERROR, "Inner bug :(");
+       state->prsbuf++;
+   }
+
+   return 0;
+}
+
+Datum
+tsvector_in(PG_FUNCTION_ARGS)
+{
+   char       *buf = PG_GETARG_CSTRING(0);
+   TI_IN_STATE state;
+   WordEntryIN  *arr;
+   WordEntry  *inarr;
+   int4        len = 0,
+               totallen = 64;
+   tsvector       *in;
+   char       *tmpbuf,
+              *cur;
+   int4        i,
+               buflen = 256;
+
+   state.prsbuf = buf;
+   state.len = 32;
+   state.word = (char *) palloc(state.len);
+   state.oprisdelim = false;
+
+   arr = (WordEntryIN *) palloc(sizeof(WordEntryIN) * totallen);
+   cur = tmpbuf = (char *) palloc(buflen);
+   while (gettoken_tsvector(&state))
+   {
+       if (len >= totallen)
+       {
+           totallen *= 2;
+           arr = (WordEntryIN *) repalloc((void *) arr, sizeof(WordEntryIN) * totallen);
+       }
+       while ((cur - tmpbuf) + (state.curpos - state.word) >= buflen)
+       {
+           int4        dist = cur - tmpbuf;
+
+           buflen *= 2;
+           tmpbuf = (char *) repalloc((void *) tmpbuf, buflen);
+           cur = tmpbuf + dist;
+       }
+       if (state.curpos - state.word >= MAXSTRLEN)
+           elog(ERROR, "Word is too long");
+       arr[len].entry.len= state.curpos - state.word;
+       if (cur - tmpbuf > MAXSTRPOS)
+           elog(ERROR, "Too long value");
+       arr[len].entry.pos=cur - tmpbuf;
+       memcpy((void *) cur, (void *) state.word, arr[len].entry.len);
+       cur += arr[len].entry.len;
+       if ( state.alen ) {
+           arr[len].entry.haspos=1;
+           arr[len].pos = state.pos;
+       } else
+           arr[len].entry.haspos=0;
+       len++;
+   }
+   pfree(state.word);
+
+   if ( len > 0 )
+       len = uniqueentry(arr, len, tmpbuf, &buflen);
+   totallen = CALCDATASIZE(len, buflen);
+   in = (tsvector *) palloc(totallen);
+   memset(in,0,totallen);
+   in->len = totallen;
+   in->size = len;
+   cur = STRPTR(in);
+   inarr = ARRPTR(in);
+   for (i = 0; i < len; i++)
+   {
+       memcpy((void *) cur, (void *) &tmpbuf[arr[i].entry.pos], arr[i].entry.len);
+       arr[i].entry.pos=cur - STRPTR(in);
+       cur += SHORTALIGN(arr[i].entry.len);
+       if ( arr[i].entry.haspos ) {
+           memcpy( cur, arr[i].pos, (*(uint16*)arr[i].pos + 1) * sizeof(WordEntryPos));
+           cur +=  (*(uint16*)arr[i].pos + 1) * sizeof(WordEntryPos);
+           pfree( arr[i].pos ); 
+       }
+       memcpy( &(inarr[i]), &(arr[i].entry), sizeof(WordEntry) );
+   }
+   pfree(tmpbuf);
+   pfree(arr);
+   PG_RETURN_POINTER(in);
+}
+
+Datum
+tsvector_length(PG_FUNCTION_ARGS)
+{
+   tsvector       *in = (tsvector *) PG_DETOAST_DATUM(PG_GETARG_DATUM(0));
+   int4        ret = in->size;
+
+   PG_FREE_IF_COPY(in, 0);
+   PG_RETURN_INT32(ret);
+}
+
+Datum
+tsvector_out(PG_FUNCTION_ARGS)
+{
+   tsvector       *out = (tsvector *) PG_DETOAST_DATUM(PG_GETARG_DATUM(0));
+   char       *outbuf;
+   int4        i,
+               j,
+               lenbuf = 0, pp;
+   WordEntry  *ptr = ARRPTR(out);
+   char       *curin,
+              *curout;
+
+       lenbuf=out->size * 2 /* '' */ + out->size - 1 /* space */ + 2 /*\0*/;
+       for (i = 0; i < out->size; i++) {
+               lenbuf += ptr[i].len*2 /*for escape */;
+               if ( ptr[i].haspos )
+                       lenbuf += 7*POSDATALEN(out, &(ptr[i]));
+       }
+
+   curout = outbuf = (char *) palloc(lenbuf);
+   for (i = 0; i < out->size; i++)
+   {
+       curin = STRPTR(out)+ptr->pos;
+       if (i != 0)
+           *curout++ = ' ';
+       *curout++ = '\'';
+       j = ptr->len;
+       while (j--)
+       {
+           if (*curin == '\'')
+           {
+               int4        pos = curout - outbuf;
+
+               outbuf = (char *) repalloc((void *) outbuf, ++lenbuf);
+               curout = outbuf + pos;
+               *curout++ = '\\';
+           }
+           *curout++ = *curin++;
+       }
+       *curout++ = '\'';
+       if ( (pp=POSDATALEN(out,ptr)) != 0 ) {
+           WordEntryPos *wptr;
+           *curout++ = ':';
+           wptr=POSDATAPTR(out,ptr);
+           while(pp) {
+               sprintf(curout,"%d",wptr->pos);
+               curout=strchr(curout,'\0');
+               switch( wptr->weight ) {
+                   case 3:   *curout++ = 'A'; break;
+                   case 2:   *curout++ = 'B'; break;
+                   case 1:   *curout++ = 'C'; break;
+                   case 0: 
+                   default: break;
+               }
+               if ( pp>1 )     *curout++ = ',';
+               pp--; wptr++;
+           }
+       }
+       ptr++;
+   }
+   *curout='\0';
+   outbuf[lenbuf - 1] = '\0';
+   PG_FREE_IF_COPY(out, 0);
+   PG_RETURN_POINTER(outbuf);
+}
+
+static int
+compareWORD(const void *a, const void *b)
+{
+   if (((WORD *) a)->len == ((WORD *) b)->len) {
+       int res = strncmp(
+                      ((WORD *) a)->word,
+                      ((WORD *) b)->word,
+                      ((WORD *) b)->len);
+       if ( res==0 ) 
+           return ( ((WORD *) a)->pos.pos > ((WORD *) b)->pos.pos ) ? 1 : -1;
+       return res;
+   }
+   return (((WORD *) a)->len > ((WORD *) b)->len) ? 1 : -1;
+}
+
+static int
+uniqueWORD(WORD * a, int4 l)
+{
+   WORD       *ptr,
+              *res;
+   int tmppos;
+
+   if (l == 1) {
+       tmppos=LIMITPOS(a->pos.pos);
+       a->alen=2;
+       a->pos.apos=(uint16*)palloc( sizeof(uint16)*a->alen );
+       a->pos.apos[0]=1;
+       a->pos.apos[1]=tmppos;
+       return l;
+   }
+
+   res = a;
+   ptr = a + 1;
+
+   qsort((void *) a, l, sizeof(WORD), compareWORD);
+   tmppos=LIMITPOS(a->pos.pos);
+   a->alen=2;
+   a->pos.apos=(uint16*)palloc( sizeof(uint16)*a->alen );
+   a->pos.apos[0]=1;
+   a->pos.apos[1]=tmppos;
+
+   while (ptr - a < l)
+   {
+       if (!(ptr->len == res->len &&
+             strncmp(ptr->word, res->word, res->len) == 0))
+       {
+           res++;
+           res->len = ptr->len;
+           res->word = ptr->word;
+           tmppos=LIMITPOS(ptr->pos.pos);
+           res->alen=2;
+           res->pos.apos=(uint16*)palloc( sizeof(uint16)*res->alen );
+           res->pos.apos[0]=1;
+           res->pos.apos[1]=tmppos;
+       } else {
+           pfree(ptr->word);
+           if ( res->pos.apos[0] < MAXNUMPOS-1 && res->pos.apos[ res->pos.apos[0] ] != MAXENTRYPOS-1 ) {
+               if ( res->pos.apos[0]+1 >= res->alen ) {
+                   res->alen*=2;
+                   res->pos.apos=(uint16*)repalloc( res->pos.apos, sizeof(uint16)*res->alen );
+               }
+               res->pos.apos[ res->pos.apos[0]+1 ] = LIMITPOS(ptr->pos.pos);
+               res->pos.apos[0]++; 
+           }
+       }
+       ptr++;
+   }
+
+   return res + 1 - a;
+}
+
+/*
+ * make value of tsvector
+ */
+static tsvector *
+makevalue(PRSTEXT * prs)
+{
+   int4        i,j,
+               lenstr = 0,
+               totallen;
+   tsvector       *in;
+   WordEntry  *ptr;
+   char       *str,
+              *cur;
+
+   prs->curwords = uniqueWORD(prs->words, prs->curwords);
+   for (i = 0; i < prs->curwords; i++) {
+       lenstr += SHORTALIGN(prs->words[i].len);
+
+       if ( prs->words[i].alen )
+           lenstr += sizeof(uint16) + prs->words[i].pos.apos[0] * sizeof(WordEntryPos);
+   }
+
+   totallen = CALCDATASIZE(prs->curwords, lenstr);
+   in = (tsvector *) palloc(totallen);
+   memset(in,0,totallen);  
+   in->len = totallen;
+   in->size = prs->curwords;
+
+   ptr = ARRPTR(in);
+   cur = str = STRPTR(in);
+   for (i = 0; i < prs->curwords; i++)
+   {
+       ptr->len = prs->words[i].len;
+       if (cur - str > MAXSTRPOS)
+           elog(ERROR, "Value is too big");
+       ptr->pos= cur - str;
+       memcpy((void *) cur, (void *) prs->words[i].word, prs->words[i].len);
+       pfree(prs->words[i].word);
+       cur += SHORTALIGN(prs->words[i].len);
+       if ( prs->words[i].alen ) {
+           WordEntryPos *wptr;
+           
+           ptr->haspos=1;
+           *(uint16*)cur = prs->words[i].pos.apos[0];
+           wptr=POSDATAPTR(in,ptr);
+           for(j=0;j<*(uint16*)cur;j++) {
+               wptr[j].weight=0;
+               wptr[j].pos=prs->words[i].pos.apos[j+1];
+           }
+           cur += sizeof(uint16) + prs->words[i].pos.apos[0] * sizeof(WordEntryPos);
+           pfree(prs->words[i].pos.apos);
+       } else
+           ptr->haspos=0;
+       ptr++;
+   }
+   pfree(prs->words);
+   return in;
+}
+
+
+Datum
+to_tsvector(PG_FUNCTION_ARGS)
+{
+   text       *in = PG_GETARG_TEXT_P(1);
+   PRSTEXT     prs;
+   tsvector       *out = NULL;
+   TSCfgInfo *cfg=findcfg(PG_GETARG_INT32(0)); 
+
+   prs.lenwords = 32;
+   prs.curwords = 0;
+   prs.pos = 0;
+   prs.words = (WORD *) palloc(sizeof(WORD) * prs.lenwords);
+   
+   parsetext_v2(cfg, &prs, VARDATA(in), VARSIZE(in) - VARHDRSZ);
+   PG_FREE_IF_COPY(in, 1);
+
+   if (prs.curwords)
+       out = makevalue(&prs);
+   else {
+       pfree(prs.words);
+       out = palloc(CALCDATASIZE(0,0));
+       out->len = CALCDATASIZE(0,0);
+       out->size = 0;
+   } 
+   PG_RETURN_POINTER(out);
+}
+
+Datum
+to_tsvector_name(PG_FUNCTION_ARGS) {
+   text       *cfg=PG_GETARG_TEXT_P(0);
+   Datum res = DirectFunctionCall3(
+       to_tsvector,
+       Int32GetDatum( name2id_cfg( cfg ) ),
+       PG_GETARG_DATUM(1),
+       (Datum)0
+   );
+   PG_FREE_IF_COPY(cfg,0);
+   PG_RETURN_DATUM(res);   
+}
+
+Datum
+to_tsvector_current(PG_FUNCTION_ARGS) {
+   Datum res = DirectFunctionCall3(
+       to_tsvector,
+       Int32GetDatum( get_currcfg() ),
+       PG_GETARG_DATUM(0),
+       (Datum)0
+   );
+   PG_RETURN_DATUM(res);   
+}
+
+static Oid
+findFunc(char *fname) {
+   FuncCandidateList clist,ptr;
+   Oid funcid = InvalidOid;
+   List *names=makeList1(makeString(fname));
+
+   ptr = clist = FuncnameGetCandidates(names, 1);
+   freeList(names);
+
+   if ( !ptr )
+       return funcid;
+
+   while(ptr) {
+       if ( ptr->args[0] == TEXTOID && funcid == InvalidOid )
+           funcid=ptr->oid;
+       clist=ptr->next;
+       pfree(ptr);
+       ptr=clist;
+   }
+
+   return funcid;
+}
+
+/*
+ * Trigger
+ */
+Datum
+tsearch2(PG_FUNCTION_ARGS)
+{
+   TriggerData *trigdata;
+   Trigger    *trigger;
+   Relation    rel;
+   HeapTuple   rettuple = NULL;
+   TSCfgInfo *cfg=findcfg(get_currcfg()); 
+   int         numidxattr,
+               i;
+   PRSTEXT     prs;
+   Datum       datum = (Datum) 0;
+   Oid     funcoid = InvalidOid;
+
+   if (!CALLED_AS_TRIGGER(fcinfo))
+       elog(ERROR, "TSearch: Not fired by trigger manager");
+
+   trigdata = (TriggerData *) fcinfo->context;
+   if (TRIGGER_FIRED_FOR_STATEMENT(trigdata->tg_event))
+       elog(ERROR, "TSearch: Can't process STATEMENT events");
+   if (TRIGGER_FIRED_AFTER(trigdata->tg_event))
+       elog(ERROR, "TSearch: Must be fired BEFORE event");
+
+   if (TRIGGER_FIRED_BY_INSERT(trigdata->tg_event))
+       rettuple = trigdata->tg_trigtuple;
+   else if (TRIGGER_FIRED_BY_UPDATE(trigdata->tg_event))
+       rettuple = trigdata->tg_newtuple;
+   else
+       elog(ERROR, "TSearch: Unknown event");
+
+   trigger = trigdata->tg_trigger;
+   rel = trigdata->tg_relation;
+
+   if (trigger->tgnargs < 2)
+       elog(ERROR, "TSearch: format tsearch2(tsvector_field, text_field1,...)");
+
+   numidxattr = SPI_fnumber(rel->rd_att, trigger->tgargs[0]);
+   if (numidxattr == SPI_ERROR_NOATTRIBUTE)
+       elog(ERROR, "TSearch: Can not find tsvector_field");
+
+   prs.lenwords = 32;
+   prs.curwords = 0;
+   prs.pos = 0;
+   prs.words = (WORD *) palloc(sizeof(WORD) * prs.lenwords);
+
+   /* find all words in indexable column */
+   for (i = 1; i < trigger->tgnargs; i++)
+   {
+       int         numattr;
+       Oid         oidtype;
+       Datum       txt_toasted;
+       bool        isnull;
+       text       *txt;
+
+       numattr = SPI_fnumber(rel->rd_att, trigger->tgargs[i]);
+       if (numattr == SPI_ERROR_NOATTRIBUTE)
+       {
+           funcoid=findFunc(trigger->tgargs[i]);
+           if ( funcoid==InvalidOid )
+               elog(ERROR,"TSearch: can't find function or field '%s'",trigger->tgargs[i]);
+           continue;
+       }
+       oidtype = SPI_gettypeid(rel->rd_att, numattr);
+       /* We assume char() and varchar() are binary-equivalent to text */
+       if (!(oidtype == TEXTOID ||
+             oidtype == VARCHAROID ||
+             oidtype == BPCHAROID))
+       {
+           elog(WARNING, "TSearch: '%s' is not of character type",
+                trigger->tgargs[i]);
+           continue;
+       }
+       txt_toasted = SPI_getbinval(rettuple, rel->rd_att, numattr, &isnull);
+       if (isnull)
+           continue;
+
+       if ( funcoid!=InvalidOid ) {
+           text *txttmp = (text *) DatumGetPointer( OidFunctionCall1(
+               funcoid,
+               PointerGetDatum(txt_toasted)
+           ));
+           txt = (text *) DatumGetPointer(PG_DETOAST_DATUM(PointerGetDatum(txttmp)));
+           if ( txt == txttmp )
+               txt_toasted = PointerGetDatum(txt);
+       } else
+            txt = (text *) DatumGetPointer(PG_DETOAST_DATUM(PointerGetDatum(txt_toasted)));
+
+       parsetext_v2(cfg, &prs, VARDATA(txt), VARSIZE(txt) - VARHDRSZ);
+       if (txt != (text*)DatumGetPointer(txt_toasted) )
+           pfree(txt);
+   }
+
+   /* make tsvector value */
+   if (prs.curwords)
+   {
+       datum = PointerGetDatum(makevalue(&prs));
+       rettuple = SPI_modifytuple(rel, rettuple, 1, &numidxattr,
+                                  &datum, NULL);
+       pfree(DatumGetPointer(datum));
+   }
+   else
+   {
+       tsvector *out = palloc(CALCDATASIZE(0,0));
+       out->len = CALCDATASIZE(0,0);
+       out->size = 0;
+       datum = PointerGetDatum(out);
+       pfree(prs.words);
+       rettuple = SPI_modifytuple(rel, rettuple, 1, &numidxattr,
+                                  &datum, NULL);
+   }
+
+   if (rettuple == NULL)
+       elog(ERROR, "TSearch: %d returned by SPI_modifytuple", SPI_result);
+
+   return PointerGetDatum(rettuple);
+}


diff --git a/contrib/tsearch2/tsvector.h b/contrib/tsearch2/tsvector.h

new file mode 100644 (file)

index 0000000..31e6a4b


--- /dev/null
+++ b/contrib/tsearch2/tsvector.h
@@ -0,0 +1,71 @@
+#ifndef __TXTIDX_H__
+#define __TXTIDX_H__
+
+/*
+#define TXTIDX_DEBUG
+*/
+
+#include "postgres.h"
+
+#include "access/gist.h"
+#include "access/itup.h"
+#include "utils/elog.h"
+#include "utils/palloc.h"
+#include "utils/builtins.h"
+#include "storage/bufpage.h"
+
+typedef struct {
+   uint32
+       haspos:1,
+       len:11, /* MAX 2Kb */
+       pos:20; /* MAX 1Mb */
+}  WordEntry;
+#define MAXSTRLEN ( 1<<11 )
+#define MAXSTRPOS ( 1<<20 )
+
+typedef struct {
+   uint16
+       weight:2,
+       pos:14;
+} WordEntryPos;
+#define MAXENTRYPOS    (1<<14)
+#define MAXNUMPOS  256
+#define LIMITPOS(x)    ( ( (x) >= MAXENTRYPOS ) ? (MAXENTRYPOS-1) : (x) )
+
+typedef struct
+{
+   int4        len;
+   int4        size;
+   char        data[1];
+}  tsvector;
+
+#define DATAHDRSIZE (sizeof(int4)*2)
+#define CALCDATASIZE(x, lenstr) ( x * sizeof(WordEntry) + DATAHDRSIZE + lenstr )
+#define ARRPTR(x)  ( (WordEntry*) ( (char*)x + DATAHDRSIZE ) )
+#define STRPTR(x)  ( (char*)x + DATAHDRSIZE + ( sizeof(WordEntry) * ((tsvector*)x)->size ) )
+#define STRSIZE(x) ( ((tsvector*)x)->len - DATAHDRSIZE - ( sizeof(WordEntry) * ((tsvector*)x)->size ) )
+#define _POSDATAPTR(x,e)   (STRPTR(x)+((WordEntry*)(e))->pos+SHORTALIGN(((WordEntry*)(e))->len))
+#define POSDATALEN(x,e) ( ( ((WordEntry*)(e))->haspos ) ? (*(uint16*)_POSDATAPTR(x,e)) : 0 ) 
+#define POSDATAPTR(x,e)    ( (WordEntryPos*)( _POSDATAPTR(x,e)+sizeof(uint16) ) )
+
+
+typedef struct {
+   WordEntry   entry;
+   WordEntryPos    *pos;
+}  WordEntryIN;
+
+typedef struct
+{
+   char       *prsbuf;
+   char       *word;
+   char       *curpos;
+   int4        len;
+   int4        state;
+   int4        alen;
+   WordEntryPos    *pos;
+   bool        oprisdelim;
+}  TI_IN_STATE;
+
+int4       gettoken_tsvector(TI_IN_STATE * state);
+
+#endif


diff --git a/contrib/tsearch2/tsvector_op.c b/contrib/tsearch2/tsvector_op.c

new file mode 100644 (file)

index 0000000..3f38014


--- /dev/null
+++ b/contrib/tsearch2/tsvector_op.c
@@ -0,0 +1,264 @@
+/*
+ * Operations for tsvector type
+ * Teodor Sigaev 
+ */
+#include "postgres.h"
+
+#include "access/gist.h"
+#include "access/itup.h"
+#include "utils/elog.h"
+#include "utils/palloc.h"
+#include "utils/builtins.h"
+#include "storage/bufpage.h"
+#include "executor/spi.h"
+#include "commands/trigger.h"
+#include "nodes/pg_list.h"
+#include "catalog/namespace.h"
+
+#include "utils/pg_locale.h"
+
+#include              /* tolower */
+#include "tsvector.h"
+#include "query.h"
+#include "ts_cfg.h"
+#include "common.h"
+
+PG_FUNCTION_INFO_V1(strip);
+Datum      strip(PG_FUNCTION_ARGS);
+
+PG_FUNCTION_INFO_V1(setweight);
+Datum      setweight(PG_FUNCTION_ARGS);
+
+PG_FUNCTION_INFO_V1(concat);
+Datum      concat(PG_FUNCTION_ARGS);
+
+Datum
+strip(PG_FUNCTION_ARGS)
+{
+   tsvector       *in = (tsvector *) PG_DETOAST_DATUM(PG_GETARG_DATUM(0));
+   tsvector    *out;
+   int i,len=0;
+   WordEntry *arrin=ARRPTR(in), *arrout;
+   char *cur;
+
+   for(i=0;isize;i++) 
+       len += SHORTALIGN( arrin[i].len );
+
+   len = CALCDATASIZE(in->size, len);
+   out=(tsvector*)palloc(len);
+   memset(out,0,len);
+   out->len=len;
+   out->size=in->size;
+   arrout=ARRPTR(out);
+   cur=STRPTR(out);
+   for(i=0;isize;i++) {
+       memcpy(cur, STRPTR(in)+arrin[i].pos, arrin[i].len);
+       arrout[i].haspos = 0;
+       arrout[i].len = arrin[i].len;
+       arrout[i].pos = cur - STRPTR(out);
+       cur += SHORTALIGN( arrout[i].len );
+   }
+       
+   PG_FREE_IF_COPY(in, 0);
+   PG_RETURN_POINTER(out);
+}
+
+Datum
+setweight(PG_FUNCTION_ARGS)
+{
+   tsvector       *in = (tsvector *) PG_DETOAST_DATUM(PG_GETARG_DATUM(0));
+   char       cw = PG_GETARG_CHAR(1);
+   tsvector    *out;
+   int i,j;
+   WordEntry *entry;
+   WordEntryPos *p;
+   int w=0;
+
+   switch(tolower(cw)) {
+       case 'a': w=3; break;
+       case 'b': w=2; break;
+       case 'c': w=1; break;
+       case 'd': w=0; break;
+       default: elog(ERROR,"Unknown weight");
+   }
+
+   out=(tsvector*)palloc(in->len);
+   memcpy(out,in,in->len);
+   entry=ARRPTR(out);
+   i=out->size;    
+   while(i--) {
+       if ( (j=POSDATALEN(out,entry)) != 0 ) {
+           p=POSDATAPTR(out,entry);
+           while(j--) {
+               p->weight=w;
+               p++;
+           }
+       }
+       entry++;
+   }
+       
+   PG_FREE_IF_COPY(in, 0);
+   PG_RETURN_POINTER(out);
+}
+
+static int
+compareEntry(char *ptra, WordEntry* a, char *ptrb, WordEntry* b)
+{
+        if ( a->len == b->len)
+        {
+                return strncmp(
+                                           ptra + a->pos,
+                                           ptrb + b->pos,
+                                           a->len);
+        }
+        return ( a->len > b->len ) ? 1 : -1;
+}
+
+static int4
+add_pos(tsvector *src, WordEntry *srcptr, tsvector *dest, WordEntry *destptr, int4 maxpos ) {
+   uint16 *clen = (uint16*)_POSDATAPTR(dest,destptr);
+   int i;
+   uint16 slen = POSDATALEN(src, srcptr), startlen;
+   WordEntryPos *spos=POSDATAPTR(src, srcptr), *dpos=POSDATAPTR(dest,destptr);
+
+   if ( ! destptr->haspos ) 
+       *clen=0;
+
+   startlen = *clen;
+   for(i=0; i
+       dpos[ *clen ].weight = spos[i].weight; 
+       dpos[ *clen ].pos    = LIMITPOS(spos[i].pos + maxpos);
+       (*clen)++;
+   }
+
+   if ( *clen != startlen )
+       destptr->haspos=1; 
+   return  *clen - startlen;
+}
+
+
+Datum
+concat(PG_FUNCTION_ARGS) {
+   tsvector       *in1 = (tsvector *) PG_DETOAST_DATUM(PG_GETARG_DATUM(0));
+   tsvector       *in2 = (tsvector *) PG_DETOAST_DATUM(PG_GETARG_DATUM(1));
+   tsvector       *out;
+   WordEntry *ptr;
+   WordEntry *ptr1,*ptr2;
+   WordEntryPos *p;
+   int maxpos=0,i,j,i1,i2;
+   char *cur;
+   char *data,*data1,*data2;
+
+   ptr=ARRPTR(in1);
+   i=in1->size;
+   while(i--) {
+       if ( (j=POSDATALEN(in1,ptr)) != 0 ) {
+           p=POSDATAPTR(in1,ptr);
+           while(j--) {
+               if ( p->pos > maxpos ) 
+                   maxpos = p->pos;
+               p++;
+           }
+       }
+       ptr++;
+   }
+   
+   ptr1=ARRPTR(in1); ptr2=ARRPTR(in2);
+   data1=STRPTR(in1); data2=STRPTR(in2);
+   i1=in1->size;   i2=in2->size;
+   out=(tsvector*)palloc( in1->len + in2->len );
+   memset(out,0,in1->len + in2->len);
+   out->len = in1->len + in2->len;
+   out->size = in1->size + in2->size;
+   data=cur=STRPTR(out);
+   ptr=ARRPTR(out);
+   while( i1 && i2 ) {
+       int cmp=compareEntry(data1,ptr1,data2,ptr2);
+       if ( cmp < 0 ) { /* in1 first */
+           ptr->haspos = ptr1->haspos;
+           ptr->len = ptr1->len;
+           memcpy( cur, data1 + ptr1->pos, ptr1->len );
+           ptr->pos = cur - data; 
+           cur+=SHORTALIGN(ptr1->len);
+           if ( ptr->haspos ) {
+               memcpy(cur, _POSDATAPTR(in1, ptr1), POSDATALEN(in1, ptr1)*sizeof(WordEntryPos) + sizeof(uint16));
+               cur+=POSDATALEN(in1, ptr1)*sizeof(WordEntryPos) + sizeof(uint16);
+           }
+           ptr++; ptr1++; i1--;
+       } else if ( cmp>0 ) { /* in2 first */ 
+           ptr->haspos = ptr2->haspos;
+           ptr->len = ptr2->len;
+           memcpy( cur, data2 + ptr2->pos, ptr2->len );
+           ptr->pos = cur - data; 
+           cur+=SHORTALIGN(ptr2->len);
+           if ( ptr->haspos ) {
+               int addlen = add_pos(in2, ptr2, out, ptr, maxpos );
+               if ( addlen == 0 )
+                   ptr->haspos=0;
+               else
+                   cur += addlen*sizeof(WordEntryPos) + sizeof(uint16); 
+           }
+           ptr++; ptr2++; i2--;
+       } else {
+           ptr->haspos = ptr1->haspos | ptr2->haspos;
+           ptr->len = ptr1->len;
+           memcpy( cur, data1 + ptr1->pos, ptr1->len );
+           ptr->pos = cur - data; 
+           cur+=SHORTALIGN(ptr1->len);
+           if ( ptr->haspos ) {
+               if ( ptr1->haspos ) {
+                   memcpy(cur, _POSDATAPTR(in1, ptr1), POSDATALEN(in1, ptr1)*sizeof(WordEntryPos) + sizeof(uint16));
+                   cur+=POSDATALEN(in1, ptr1)*sizeof(WordEntryPos) + sizeof(uint16);
+                   if ( ptr2->haspos )
+                       cur += add_pos(in2, ptr2, out, ptr, maxpos )*sizeof(WordEntryPos);
+               } else if ( ptr2->haspos ) {
+                   int addlen = add_pos(in2, ptr2, out, ptr, maxpos );
+                   if ( addlen == 0 )
+                       ptr->haspos=0;
+                   else
+                       cur += addlen*sizeof(WordEntryPos) + sizeof(uint16); 
+               }
+           }
+           ptr++; ptr1++; ptr2++; i1--; i2--;
+       }
+   }
+
+   while(i1) {
+       ptr->haspos = ptr1->haspos;
+       ptr->len = ptr1->len;
+       memcpy( cur, data1 + ptr1->pos, ptr1->len );
+       ptr->pos = cur - data; 
+       cur+=SHORTALIGN(ptr1->len);
+       if ( ptr->haspos ) {
+           memcpy(cur, _POSDATAPTR(in1, ptr1), POSDATALEN(in1, ptr1)*sizeof(WordEntryPos) + sizeof(uint16));
+           cur+=POSDATALEN(in1, ptr1)*sizeof(WordEntryPos) + sizeof(uint16);
+       }
+       ptr++; ptr1++; i1--;
+   }
+
+   while(i2) {
+       ptr->haspos = ptr2->haspos;
+       ptr->len = ptr2->len;
+       memcpy( cur, data2 + ptr2->pos, ptr2->len );
+       ptr->pos = cur - data; 
+       cur+=SHORTALIGN(ptr2->len);
+       if ( ptr->haspos ) {
+           int addlen = add_pos(in2, ptr2, out, ptr, maxpos );
+           if ( addlen == 0 )
+               ptr->haspos=0;
+           else
+               cur += addlen*sizeof(WordEntryPos) + sizeof(uint16); 
+       }
+       ptr++; ptr2++; i2--;
+   }
+   
+   out->size=ptr-ARRPTR(out);
+   out->len = CALCDATASIZE( out->size, cur-data );
+   if ( data != STRPTR(out) )
+       memmove( STRPTR(out), data, cur-data );
+
+   PG_FREE_IF_COPY(in1, 0);
+   PG_FREE_IF_COPY(in2, 1);
+   PG_RETURN_POINTER(out);
+}
+


diff --git a/contrib/tsearch2/untsearch.sql.in b/contrib/tsearch2/untsearch.sql.in

new file mode 100644 (file)

index 0000000..a4fe145


--- /dev/null
+++ b/contrib/tsearch2/untsearch.sql.in
@@ -0,0 +1,62 @@
+BEGIN;
+
+--Be careful !!!
+--script drops all indices, triggers and columns with types defined
+--in tsearch2.sql
+
+
+DROP OPERATOR CLASS gist_tsvector_ops USING gist CASCADE;
+
+
+DROP OPERATOR || (tsvector, tsvector);
+DROP OPERATOR @@ (tsvector, tsquery);
+DROP OPERATOR @@ (tsquery, tsvector);
+
+DROP AGGREGATE stat(tsvector);
+
+DROP TABLE pg_ts_dict;
+DROP TABLE pg_ts_parser;
+DROP TABLE pg_ts_cfg;
+DROP TABLE pg_ts_cfgmap;
+
+DROP TYPE tokentype CASCADE;
+DROP TYPE tokenout CASCADE;
+DROP TYPE tsvector CASCADE;
+DROP TYPE tsquery CASCADE;
+DROP TYPE gtsvector CASCADE;
+DROP TYPE tsstat CASCADE;
+DROP TYPE statinfo CASCADE;
+
+DROP FUNCTION lexize(oid, text) ;
+DROP FUNCTION lexize(text, text);
+DROP FUNCTION lexize(text);
+DROP FUNCTION set_curdict(int);
+DROP FUNCTION set_curdict(text);
+DROP FUNCTION dex_init(text);
+DROP FUNCTION dex_lexize(internal,internal,int4);
+DROP FUNCTION snb_en_init(text);
+DROP FUNCTION snb_lexize(internal,internal,int4);
+DROP FUNCTION snb_ru_init(text);
+DROP FUNCTION spell_init(text);
+DROP FUNCTION spell_lexize(internal,internal,int4);
+DROP FUNCTION syn_init(text);
+DROP FUNCTION syn_lexize(internal,internal,int4);
+DROP FUNCTION set_curprs(int);
+DROP FUNCTION set_curprs(text);
+DROP FUNCTION prsd_start(internal,int4);
+DROP FUNCTION prsd_getlexeme(internal,internal,internal);
+DROP FUNCTION prsd_end(internal);
+DROP FUNCTION prsd_lextype(internal);
+DROP FUNCTION prsd_headline(internal,internal,internal);
+DROP FUNCTION set_curcfg(int);
+DROP FUNCTION set_curcfg(text);
+DROP FUNCTION show_curcfg();
+DROP FUNCTION gtsvector_compress(internal);
+DROP FUNCTION gtsvector_decompress(internal);
+DROP FUNCTION gtsvector_penalty(internal,internal,internal);
+DROP FUNCTION gtsvector_picksplit(internal, internal);
+DROP FUNCTION gtsvector_union(bytea, internal);
+DROP FUNCTION reset_tsearch();
+DROP FUNCTION tsearch2() CASCADE;
+
+END;


diff --git a/contrib/tsearch2/wordparser/deflex.c b/contrib/tsearch2/wordparser/deflex.c

new file mode 100644 (file)

index 0000000..ea596c5


--- /dev/null
+++ b/contrib/tsearch2/wordparser/deflex.c
@@ -0,0 +1,56 @@
+#include "deflex.h"
+
+const char *lex_descr[]={
+   "",
+   "Latin word",
+   "Non-latin word",
+   "Word",
+   "Email",
+   "URL",
+   "Host",
+   "Scientific notation",
+   "VERSION",
+   "Part of hyphenated word",
+   "Non-latin part of hyphenated word",
+   "Latin part of hyphenated word",
+   "Space symbols",
+   "HTML Tag",
+   "HTTP head",
+   "Hyphenated word",
+   "Latin hyphenated word",
+   "Non-latin hyphenated word",
+   "URI",
+   "File or path name",
+   "Decimal notation",
+   "Signed integer",
+   "Unsigned integer",
+   "HTML Entity"
+};
+
+const char *tok_alias[]={
+   "",
+   "lword",
+   "nlword",
+   "word",
+   "email",
+   "url",
+   "host",
+   "sfloat",
+   "version",
+   "part_hword",
+   "nlpart_hword",
+   "lpart_hword",
+   "blank",
+   "tag",
+   "http",
+   "hword",
+   "lhword",
+   "nlhword",
+   "uri",
+   "file",
+   "float",
+   "int",
+   "uint",
+   "entity"
+};
+


diff --git a/contrib/tsearch2/wordparser/deflex.h b/contrib/tsearch2/wordparser/deflex.h

new file mode 100644 (file)

index 0000000..651d1f9


--- /dev/null
+++ b/contrib/tsearch2/wordparser/deflex.h
@@ -0,0 +1,34 @@
+#ifndef __DEFLEX_H__
+#define __DEFLEX_H__
+
+/* rememder !!!! */
+#define LASTNUM        23
+
+#define LATWORD        1
+#define CYRWORD        2
+#define UWORD      3
+#define EMAIL      4
+#define FURL       5
+#define HOST       6
+#define SCIENTIFIC 7
+#define VERSIONNUMBER  8
+#define PARTHYPHENWORD 9
+#define CYRPARTHYPHENWORD  10
+#define LATPARTHYPHENWORD  11
+#define SPACE      12
+#define TAG            13
+#define HTTP       14
+#define HYPHENWORD 15
+#define LATHYPHENWORD  16
+#define CYRHYPHENWORD  17
+#define URI        18
+#define FILEPATH   19
+#define DECIMAL        20
+#define SIGNEDINT  21
+#define UNSIGNEDINT 22
+#define HTMLENTITY 23
+
+extern const char *lex_descr[];
+extern const char *tok_alias[];
+
+#endif


diff --git a/contrib/tsearch2/wordparser/parser.h b/contrib/tsearch2/wordparser/parser.h

new file mode 100644 (file)

index 0000000..55cf005


--- /dev/null
+++ b/contrib/tsearch2/wordparser/parser.h
@@ -0,0 +1,11 @@
+#ifndef __PARSER_H__
+#define __PARSER_H__
+
+char      *token;
+int            tokenlen;
+int            tsearch2_yylex(void);
+void       start_parse_str(char *, int);
+void       start_parse_fh(FILE *, int);
+void       end_parse(void);
+
+#endif


diff --git a/contrib/tsearch2/wordparser/parser.l b/contrib/tsearch2/wordparser/parser.l

new file mode 100644 (file)

index 0000000..49824f5


--- /dev/null
+++ b/contrib/tsearch2/wordparser/parser.l
@@ -0,0 +1,346 @@
+%{
+#include "postgres.h"
+
+#include "deflex.h"
+#include "parser.h"
+#include "common.h"
+
+/* Avoid exit() on fatal scanner errors */
+#define fprintf(file, fmt, msg)  ts_error(ERROR, fmt, msg)
+
+/* postgres allocation function */
+#define free    pfree
+#define malloc  palloc
+#define realloc repalloc
+
+#ifdef strdup
+#undef strdup
+#endif
+#define strdup  pstrdup
+
+char *token = NULL;  /* pointer to token */
+char *s     = NULL;  /* to return WHOLE hyphenated-word */
+
+YY_BUFFER_STATE buf = NULL; /* buffer to parse; it need for parse from string */
+
+int lrlimit = -1;  /* for limiting read from filehandle ( -1 - unlimited read ) */
+int bytestoread = 0;   /* for limiting read from filehandle */
+
+/* redefine macro for read limited length */
+#define YY_INPUT(buf,result,max_size) \
+   if ( yy_current_buffer->yy_is_interactive ) { \
+                int c = '*', n; \
+                for ( n = 0; n < max_size && \
+                             (c = getc( tsearch2_yyin )) != EOF && c != '\n'; ++n ) \
+                        buf[n] = (char) c; \
+                if ( c == '\n' ) \
+                        buf[n++] = (char) c; \
+                if ( c == EOF && ferror( tsearch2_yyin ) ) \
+                        YY_FATAL_ERROR( "input in flex scanner failed" ); \
+                result = n; \
+        }  else { \
+       if ( lrlimit == 0 ) \
+           result=YY_NULL; \
+       else { \
+           if ( lrlimit>0 ) { \
+               bytestoread = ( lrlimit > max_size ) ? max_size : lrlimit; \
+               lrlimit -= bytestoread; \
+           } else \
+               bytestoread = max_size; \
+               if ( ((result = fread( buf, 1, bytestoread, tsearch2_yyin )) == 0) \
+                       && ferror( tsearch2_yyin ) ) \
+                       YY_FATAL_ERROR( "input in flex scanner failed" ); \
+       } \
+   }
+
+%}
+
+%option 8bit
+%option never-interactive
+%option nounput
+%option noyywrap
+
+/* parser's state for parsing hyphenated-word */
+%x DELIM  
+/* parser's state for parsing URL*/
+%x URL  
+%x SERVER  
+
+/* parser's state for parsing TAGS */
+%x INTAG
+%x QINTAG
+%x INCOMMENT
+%x INSCRIPT
+
+/* cyrillic koi8 char */
+CYRALNUM   [0-9\200-\377]
+CYRALPHA   [\200-\377]
+ALPHA      [a-zA-Z\200-\377]
+ALNUM      [0-9a-zA-Z\200-\377]
+
+
+HOSTNAME   ([-_[:alnum:]]+\.)+[[:alpha:]]+
+URI        [-_[:alnum:]/%,\.;=&?#]+
+
+%%
+
+"<"[Ss][Cc][Rr][Ii][Pp][Tt] { BEGIN INSCRIPT; }
+
+"" {
+   BEGIN INITIAL; 
+   *tsearch2_yytext=' '; *(tsearch2_yytext+1) = '\0'; 
+   token = tsearch2_yytext;
+   tokenlen = tsearch2_yyleng;
+   return SPACE;
+}
+
+""   { 
+   BEGIN INITIAL;
+   *tsearch2_yytext=' '; *(tsearch2_yytext+1) = '\0'; 
+   token = tsearch2_yytext;
+   tokenlen = tsearch2_yyleng;
+   return SPACE;
+}
+
+
+"<"[\![:alpha:]]   { BEGIN INTAG; }
+
+"
+
+"\""    { BEGIN QINTAG; }
+
+"\\\"" ;
+
+"\""   { BEGIN INTAG; }
+
+">" { 
+   BEGIN INITIAL;
+   token = tsearch2_yytext;
+   *tsearch2_yytext=' '; 
+   token = tsearch2_yytext;
+   tokenlen = 1;
+   return TAG;
+}
+
+.|\n  ;
+
+\&(quot|amp|nbsp|lt|gt)\;   {
+   token = tsearch2_yytext;
+   tokenlen = tsearch2_yyleng;
+   return HTMLENTITY;
+}
+
+\&\#[0-9][0-9]?[0-9]?\; {
+   token = tsearch2_yytext;
+   tokenlen = tsearch2_yyleng;
+   return HTMLENTITY;
+}
+ 
+[-_\.[:alnum:]]+@{HOSTNAME}  /* Emails */ { 
+   token = tsearch2_yytext; 
+   tokenlen = tsearch2_yyleng;
+   return EMAIL; 
+}
+
+[+-]?[0-9]+(\.[0-9]+)?[eEdD][+-]?[0-9]+  /* float */   { 
+   token = tsearch2_yytext; 
+   tokenlen = tsearch2_yyleng;
+   return SCIENTIFIC; 
+}
+
+[0-9]+\.[0-9]+\.[0-9\.]*[0-9] {
+   token = tsearch2_yytext;
+   tokenlen = tsearch2_yyleng;
+   return VERSIONNUMBER;
+}
+
+[+-]?[0-9]+\.[0-9]+ {
+   token = tsearch2_yytext;
+   tokenlen = tsearch2_yyleng;
+   return DECIMAL;
+}
+
+[+-][0-9]+ { 
+   token = tsearch2_yytext; 
+   tokenlen = tsearch2_yyleng;
+   return SIGNEDINT; 
+}
+
+[0-9]+ { 
+   token = tsearch2_yytext; 
+   tokenlen = tsearch2_yyleng;
+   return UNSIGNEDINT; 
+}
+
+http"://"        { 
+   BEGIN URL; 
+   token = tsearch2_yytext;
+   tokenlen = tsearch2_yyleng;
+   return HTTP;
+}
+
+ftp"://"        { 
+   BEGIN URL; 
+   token = tsearch2_yytext;
+   tokenlen = tsearch2_yyleng;
+   return HTTP;
+}
+
+{HOSTNAME}[/:]{URI} { 
+   BEGIN SERVER;
+   if (s) { free(s); s=NULL; } 
+   s = strdup( tsearch2_yytext ); 
+   tokenlen = tsearch2_yyleng;
+   yyless( 0 ); 
+   token = s;
+   return FURL;
+}
+
+{HOSTNAME} {
+   token = tsearch2_yytext; 
+   tokenlen = tsearch2_yyleng;
+   return HOST;
+}
+
+[/:]{URI}  {
+   token = tsearch2_yytext;
+   tokenlen = tsearch2_yyleng;
+   return URI;
+}
+
+[[:alnum:]\./_-]+"/"[[:alnum:]\./_-]+ {
+   token = tsearch2_yytext;
+   tokenlen = tsearch2_yyleng;
+   return FILEPATH;
+}
+
+({CYRALPHA}+-)+{CYRALPHA}+ /* composite-word */    {
+   BEGIN DELIM;
+   if (s) { free(s); s=NULL; } 
+   s = strdup( tsearch2_yytext );
+   tokenlen = tsearch2_yyleng;
+   yyless( 0 );
+   token = s;
+   return CYRHYPHENWORD;
+}
+
+([[:alpha:]]+-)+[[:alpha:]]+ /* composite-word */  {
+    BEGIN DELIM;
+   if (s) { free(s); s=NULL; } 
+   s = strdup( tsearch2_yytext );
+   tokenlen = tsearch2_yyleng;
+   yyless( 0 );
+   token = s;
+   return LATHYPHENWORD;
+}
+
+({ALNUM}+-)+{ALNUM}+ /* composite-word */  {
+   BEGIN DELIM;
+   if (s) { free(s); s=NULL; } 
+   s = strdup( tsearch2_yytext );
+   tokenlen = tsearch2_yyleng;
+   yyless( 0 );
+   token = s;
+   return HYPHENWORD;
+}
+
+[0-9]+\.[0-9]+\.[0-9\.]*[0-9] {
+   token = tsearch2_yytext;
+   tokenlen = tsearch2_yyleng;
+   return VERSIONNUMBER;
+}
+
+\+?[0-9]+\.[0-9]+ {
+   token = tsearch2_yytext;
+   tokenlen = tsearch2_yyleng;
+   return DECIMAL;
+}
+
+{CYRALPHA}+  /* one word in composite-word */   { 
+   token = tsearch2_yytext; 
+   tokenlen = tsearch2_yyleng;
+   return CYRPARTHYPHENWORD; 
+}
+
+[[:alpha:]]+  /* one word in composite-word */  { 
+   token = tsearch2_yytext; 
+   tokenlen = tsearch2_yyleng;
+   return LATPARTHYPHENWORD; 
+}
+
+{ALNUM}+  /* one word in composite-word */  { 
+   token = tsearch2_yytext; 
+   tokenlen = tsearch2_yyleng;
+   return PARTHYPHENWORD; 
+}
+
+-  { 
+   token = tsearch2_yytext;
+   tokenlen = tsearch2_yyleng;
+   return SPACE;
+}
+
+.|\n /* return in basic state */ {
+   BEGIN INITIAL;
+   yyless( 0 );
+}
+
+{CYRALPHA}+ /* normal word */  { 
+   token = tsearch2_yytext; 
+   tokenlen = tsearch2_yyleng;
+   return CYRWORD; 
+}
+
+[[:alpha:]]+ /* normal word */ { 
+   token = tsearch2_yytext; 
+   tokenlen = tsearch2_yyleng;
+   return LATWORD; 
+}
+
+{ALNUM}+ /* normal word */ { 
+   token = tsearch2_yytext; 
+   tokenlen = tsearch2_yyleng;
+   return UWORD; 
+}
+
+[ \r\n\t]+ {
+   token = tsearch2_yytext;
+   tokenlen = tsearch2_yyleng;
+   return SPACE;
+}
+
+. {
+   token = tsearch2_yytext;
+   tokenlen = tsearch2_yyleng;
+   return SPACE;
+} 
+
+%%
+
+/* clearing after parsing from string */
+void end_parse() {
+   if (s) { free(s); s=NULL; } 
+   tsearch2_yy_delete_buffer( buf );
+   buf = NULL;
+} 
+
+/* start parse from string */
+void start_parse_str(char* str, int limit) {
+   if (buf) end_parse();
+   buf = tsearch2_yy_scan_bytes( str, limit );
+   tsearch2_yy_switch_to_buffer( buf );
+   BEGIN INITIAL;
+}
+
+/* start parse from filehandle */
+void start_parse_fh( FILE* fh, int limit ) {
+   if (buf) end_parse();
+   lrlimit = ( limit ) ? limit : -1;
+   buf = tsearch2_yy_create_buffer( fh, YY_BUF_SIZE );
+   tsearch2_yy_switch_to_buffer( buf );
+   BEGIN INITIAL;
+}
+
+


diff --git a/contrib/tsearch2/wparser.c b/contrib/tsearch2/wparser.c

new file mode 100644 (file)

index 0000000..deff94c


--- /dev/null
+++ b/contrib/tsearch2/wparser.c
@@ -0,0 +1,529 @@
+/* 
+ * interface functions to parser 
+ * Teodor Sigaev 
+ */
+#include 
+#include 
+#include 
+#include 
+
+#include "postgres.h"
+#include "fmgr.h"
+#include "utils/array.h"
+#include "catalog/pg_type.h"
+#include "executor/spi.h"
+#include "funcapi.h"
+
+#include "wparser.h"
+#include "ts_cfg.h"
+#include "snmap.h"
+#include "common.h"
+
+/*********top interface**********/
+
+static void *plan_getparser=NULL;
+static Oid current_parser_id=InvalidOid;
+
+void
+init_prs(Oid id, WParserInfo *prs) {
+   Oid arg[1]={ OIDOID };
+   bool isnull;
+   Datum pars[1]={ ObjectIdGetDatum(id) };
+   int stat;
+
+   memset(prs,0,sizeof(WParserInfo));
+   SPI_connect();
+   if ( !plan_getparser ) {
+       plan_getparser = SPI_saveplan( SPI_prepare( "select prs_start, prs_nexttoken, prs_end, prs_lextype, prs_headline from pg_ts_parser where oid = $1" , 1, arg ) );
+       if ( !plan_getparser ) 
+           ts_error(ERROR, "SPI_prepare() failed");
+   }
+
+   stat = SPI_execp(plan_getparser, pars, " ", 1);
+   if ( stat < 0 )
+       ts_error (ERROR, "SPI_execp return %d", stat);
+   if ( SPI_processed > 0 ) {
+       Oid oid=InvalidOid;
+       oid=DatumGetObjectId( SPI_getbinval(SPI_tuptable->vals[0], SPI_tuptable->tupdesc, 1, &isnull) );
+       fmgr_info_cxt(oid, &(prs->start_info), TopMemoryContext);
+       oid=DatumGetObjectId( SPI_getbinval(SPI_tuptable->vals[0], SPI_tuptable->tupdesc, 2, &isnull) );
+       fmgr_info_cxt(oid, &(prs->getlexeme_info), TopMemoryContext);
+       oid=DatumGetObjectId( SPI_getbinval(SPI_tuptable->vals[0], SPI_tuptable->tupdesc, 3, &isnull) );
+       fmgr_info_cxt(oid, &(prs->end_info), TopMemoryContext);
+       prs->lextype=DatumGetObjectId( SPI_getbinval(SPI_tuptable->vals[0], SPI_tuptable->tupdesc, 4, &isnull) );
+       oid=DatumGetObjectId( SPI_getbinval(SPI_tuptable->vals[0], SPI_tuptable->tupdesc, 5, &isnull) );
+       fmgr_info_cxt(oid, &(prs->headline_info), TopMemoryContext);
+       prs->prs_id=id;
+   } else 
+       ts_error(ERROR, "No parser with id %d", id);
+   SPI_finish();
+}
+
+typedef struct {
+   WParserInfo *last_prs;
+   int     len;
+   int     reallen;
+   WParserInfo *list;
+   SNMap       name2id_map;
+} PrsList;
+
+static PrsList PList = {NULL,0,0,NULL,{0,0,NULL}};
+
+void    
+reset_prs(void) {
+   freeSNMap( &(PList.name2id_map) );
+   if ( PList.list )
+       free(PList.list);
+   memset(&PList,0,sizeof(PrsList));
+}
+
+static int
+compareprs(const void *a, const void *b) {
+   return ((WParserInfo*)a)->prs_id - ((WParserInfo*)b)->prs_id;
+}
+
+WParserInfo *
+findprs(Oid id) {
+   /* last used prs */
+   if ( PList.last_prs && PList.last_prs->prs_id==id )
+       return PList.last_prs;
+
+   /* already used prs */
+   if ( PList.len != 0 ) {
+       WParserInfo key;
+       key.prs_id=id;
+       PList.last_prs = bsearch(&key, PList.list, PList.len, sizeof(WParserInfo), compareprs);
+       if ( PList.last_prs != NULL )
+           return PList.last_prs;
+   }
+
+   /* last chance */
+   if ( PList.len==PList.reallen ) {
+       WParserInfo *tmp;
+       int reallen = ( PList.reallen ) ? 2*PList.reallen : 16;
+       tmp=(WParserInfo*)realloc(PList.list,sizeof(WParserInfo)*reallen);
+       if ( !tmp ) 
+           ts_error(ERROR,"No memory");
+       PList.reallen=reallen;
+       PList.list=tmp;
+   }
+   PList.last_prs=&(PList.list[PList.len]);
+   init_prs(id, PList.last_prs);
+   PList.len++;
+   qsort(PList.list, PList.len, sizeof(WParserInfo), compareprs);
+   return findprs(id); /* qsort changed order!! */;
+}
+
+static void *plan_name2id=NULL;
+
+Oid
+name2id_prs(text *name) {
+   Oid arg[1]={ TEXTOID };
+   bool isnull;
+   Datum pars[1]={ PointerGetDatum(name) };
+   int stat;
+   Oid id=findSNMap_t( &(PList.name2id_map), name );
+   
+   if ( id ) 
+       return id;
+   
+
+   SPI_connect();
+   if ( !plan_name2id ) {
+       plan_name2id = SPI_saveplan( SPI_prepare( "select oid from pg_ts_parser where prs_name = $1" , 1, arg ) );
+       if ( !plan_name2id ) 
+           ts_error(ERROR, "SPI_prepare() failed");
+   }
+
+   stat = SPI_execp(plan_name2id, pars, " ", 1);
+   if ( stat < 0 )
+       ts_error (ERROR, "SPI_execp return %d", stat);
+   if ( SPI_processed > 0 )
+       id=DatumGetObjectId( SPI_getbinval(SPI_tuptable->vals[0], SPI_tuptable->tupdesc, 1, &isnull) );
+   else 
+       ts_error(ERROR, "No parser '%s'", text2char(name));
+   SPI_finish();
+   addSNMap_t( &(PList.name2id_map), name, id );
+   return id;
+}
+
+
+/******sql-level interface******/
+typedef struct {
+   int     cur;
+   LexDescr    *list;
+} TypeStorage;
+
+static void
+setup_firstcall(FuncCallContext  *funcctx, Oid prsid) {
+   TupleDesc            tupdesc;
+   MemoryContext     oldcontext;
+   TypeStorage     *st;
+   WParserInfo *prs = findprs(prsid); 
+
+   oldcontext = MemoryContextSwitchTo(funcctx->multi_call_memory_ctx);
+
+   st=(TypeStorage*)palloc( sizeof(TypeStorage) );
+   st->cur=0;
+   st->list = (LexDescr*)DatumGetPointer(
+       OidFunctionCall1( prs->lextype, PointerGetDatum(prs->prs) )
+   );
+   funcctx->user_fctx = (void*)st;
+   tupdesc = RelationNameGetTupleDesc("tokentype");
+   funcctx->slot = TupleDescGetSlot(tupdesc);
+   funcctx->attinmeta = TupleDescGetAttInMetadata(tupdesc);
+   MemoryContextSwitchTo(oldcontext);
+}
+
+static Datum
+process_call(FuncCallContext  *funcctx) {
+   TypeStorage     *st;
+
+   st=(TypeStorage*)funcctx->user_fctx;
+   if (  st->list && st->list[st->cur].lexid ) {
+       Datum result;
+       char* values[3];
+       char    txtid[16];
+       HeapTuple    tuple;
+
+       values[0]=txtid;
+       sprintf(txtid,"%d",st->list[st->cur].lexid);
+       values[1]=st->list[st->cur].alias;
+       values[2]=st->list[st->cur].descr;
+
+       tuple = BuildTupleFromCStrings(funcctx->attinmeta, values);
+       result = TupleGetDatum(funcctx->slot, tuple);
+
+       pfree(values[1]);
+       pfree(values[2]);
+       st->cur++;
+       return result;
+   } else {
+       if ( st->list ) pfree(st->list);
+       pfree(st);
+   }
+   return (Datum)0;
+}
+
+PG_FUNCTION_INFO_V1(token_type);
+Datum token_type(PG_FUNCTION_ARGS);
+
+Datum
+token_type(PG_FUNCTION_ARGS) {
+   FuncCallContext  *funcctx;
+   Datum result;
+
+   if (SRF_IS_FIRSTCALL()) { 
+       funcctx = SRF_FIRSTCALL_INIT();
+       setup_firstcall(funcctx, PG_GETARG_OID(0) );
+   }
+
+   funcctx = SRF_PERCALL_SETUP();
+
+   if (  (result=process_call(funcctx)) != (Datum)0 )
+       SRF_RETURN_NEXT(funcctx, result);
+   SRF_RETURN_DONE(funcctx);
+}
+
+PG_FUNCTION_INFO_V1(token_type_byname);
+Datum token_type_byname(PG_FUNCTION_ARGS);
+Datum
+token_type_byname(PG_FUNCTION_ARGS) {
+   FuncCallContext  *funcctx;
+   Datum result;
+
+   if (SRF_IS_FIRSTCALL()) {
+       text *name = PG_GETARG_TEXT_P(0); 
+       funcctx = SRF_FIRSTCALL_INIT();
+       setup_firstcall(funcctx, name2id_prs( name ) );
+       PG_FREE_IF_COPY(name,0);
+   }
+
+   funcctx = SRF_PERCALL_SETUP();
+
+   if (  (result=process_call(funcctx)) != (Datum)0 )
+       SRF_RETURN_NEXT(funcctx, result);
+   SRF_RETURN_DONE(funcctx);
+}
+
+PG_FUNCTION_INFO_V1(token_type_current);
+Datum token_type_current(PG_FUNCTION_ARGS);
+Datum
+token_type_current(PG_FUNCTION_ARGS) {
+   FuncCallContext  *funcctx;
+   Datum result;
+
+   if (SRF_IS_FIRSTCALL()) {
+       funcctx = SRF_FIRSTCALL_INIT();
+       if ( current_parser_id==InvalidOid ) 
+           current_parser_id = name2id_prs( char2text("default") );
+       setup_firstcall(funcctx, current_parser_id );
+   }
+
+   funcctx = SRF_PERCALL_SETUP();
+
+   if (  (result=process_call(funcctx)) != (Datum)0 )
+       SRF_RETURN_NEXT(funcctx, result);
+   SRF_RETURN_DONE(funcctx);
+}
+
+
+PG_FUNCTION_INFO_V1(set_curprs);
+Datum set_curprs(PG_FUNCTION_ARGS);
+Datum
+set_curprs(PG_FUNCTION_ARGS) {
+        findprs(PG_GETARG_OID(0));
+        current_parser_id=PG_GETARG_OID(0);
+        PG_RETURN_VOID();
+}
+
+PG_FUNCTION_INFO_V1(set_curprs_byname);
+Datum set_curprs_byname(PG_FUNCTION_ARGS);
+Datum
+set_curprs_byname(PG_FUNCTION_ARGS) {
+        text *name=PG_GETARG_TEXT_P(0);
+    
+        DirectFunctionCall1(
+                set_curprs,
+                ObjectIdGetDatum( name2id_prs(name) )
+        );
+        PG_FREE_IF_COPY(name, 0);
+        PG_RETURN_VOID();
+}
+
+typedef struct {
+   int type;
+   char    *lexem;
+} LexemEntry;
+
+typedef struct {
+   int cur;
+   int len;
+   LexemEntry  *list;
+} PrsStorage;
+   
+
+static void
+prs_setup_firstcall(FuncCallContext  *funcctx, int prsid, text *txt) {
+   TupleDesc            tupdesc;
+   MemoryContext     oldcontext;
+   PrsStorage  *st;
+   WParserInfo *prs = findprs(prsid); 
+   char    *lex=NULL;
+   int     llen=0, type=0; 
+
+   oldcontext = MemoryContextSwitchTo(funcctx->multi_call_memory_ctx);
+
+   st=(PrsStorage*)palloc( sizeof(PrsStorage) );
+   st->cur=0;
+   st->len=16;
+   st->list=(LexemEntry*)palloc( sizeof(LexemEntry)*st->len );
+
+   prs->prs = (void*)DatumGetPointer(
+       FunctionCall2(
+           &(prs->start_info),
+           PointerGetDatum(VARDATA(txt)),
+           Int32GetDatum(VARSIZE(txt)-VARHDRSZ)
+       )
+   );
+
+   while( ( type=DatumGetInt32(FunctionCall3(
+           &(prs->getlexeme_info),
+           PointerGetDatum(prs->prs),
+           PointerGetDatum(&lex),
+           PointerGetDatum(&llen))) ) != 0 ) {
+
+       if ( st->cur>=st->len ) {
+           st->len=2*st->len;
+           st->list=(LexemEntry*)repalloc(st->list, sizeof(LexemEntry)*st->len);
+       }
+       st->list[st->cur].lexem = palloc(llen+1);
+       memcpy( st->list[st->cur].lexem, lex, llen);
+       st->list[st->cur].lexem[llen]='\0';
+       st->list[st->cur].type=type;
+       st->cur++;
+   }
+       
+   FunctionCall1(
+       &(prs->end_info),
+       PointerGetDatum(prs->prs)
+   );
+
+   st->len=st->cur;
+   st->cur=0;
+   
+   funcctx->user_fctx = (void*)st;
+   tupdesc = RelationNameGetTupleDesc("tokenout");
+   funcctx->slot = TupleDescGetSlot(tupdesc);
+   funcctx->attinmeta = TupleDescGetAttInMetadata(tupdesc);
+   MemoryContextSwitchTo(oldcontext);
+}
+
+static Datum
+prs_process_call(FuncCallContext  *funcctx) {
+   PrsStorage  *st;
+
+   st=(PrsStorage*)funcctx->user_fctx;
+   if (  st->cur < st->len ) {
+       Datum result;
+       char* values[2];
+       char    tid[16];
+       HeapTuple    tuple;
+
+       values[0]=tid;
+       sprintf(tid,"%d",st->list[st->cur].type);
+       values[1]=st->list[st->cur].lexem;
+       tuple = BuildTupleFromCStrings(funcctx->attinmeta, values);
+       result = TupleGetDatum(funcctx->slot, tuple);
+
+       pfree(values[1]);
+       st->cur++;
+       return result;
+   } else {
+       if ( st->list ) pfree(st->list);
+       pfree(st);
+   }
+   return (Datum)0;
+}
+
+           
+
+PG_FUNCTION_INFO_V1(parse);
+Datum parse(PG_FUNCTION_ARGS);
+Datum
+parse(PG_FUNCTION_ARGS) {
+   FuncCallContext  *funcctx;
+   Datum result;
+
+   if (SRF_IS_FIRSTCALL()) {
+       text *txt = PG_GETARG_TEXT_P(1); 
+       funcctx = SRF_FIRSTCALL_INIT();
+       prs_setup_firstcall(funcctx, PG_GETARG_OID(0),txt );
+       PG_FREE_IF_COPY(txt,1);
+   }
+
+   funcctx = SRF_PERCALL_SETUP();
+
+   if (  (result=prs_process_call(funcctx)) != (Datum)0 )
+       SRF_RETURN_NEXT(funcctx, result);
+   SRF_RETURN_DONE(funcctx);
+}
+
+PG_FUNCTION_INFO_V1(parse_byname);
+Datum parse_byname(PG_FUNCTION_ARGS);
+Datum
+parse_byname(PG_FUNCTION_ARGS) {
+   FuncCallContext  *funcctx;
+   Datum result;
+
+   if (SRF_IS_FIRSTCALL()) {
+       text *name = PG_GETARG_TEXT_P(0); 
+       text *txt = PG_GETARG_TEXT_P(1); 
+       funcctx = SRF_FIRSTCALL_INIT();
+       prs_setup_firstcall(funcctx, name2id_prs( name ),txt );
+       PG_FREE_IF_COPY(name,0);
+       PG_FREE_IF_COPY(txt,1);
+   }
+
+   funcctx = SRF_PERCALL_SETUP();
+
+   if (  (result=prs_process_call(funcctx)) != (Datum)0 )
+       SRF_RETURN_NEXT(funcctx, result);
+   SRF_RETURN_DONE(funcctx);
+}
+
+
+PG_FUNCTION_INFO_V1(parse_current);
+Datum parse_current(PG_FUNCTION_ARGS);
+Datum
+parse_current(PG_FUNCTION_ARGS) {
+   FuncCallContext  *funcctx;
+   Datum result;
+
+   if (SRF_IS_FIRSTCALL()) {
+       text *txt = PG_GETARG_TEXT_P(0); 
+       funcctx = SRF_FIRSTCALL_INIT();
+       if ( current_parser_id==InvalidOid ) 
+           current_parser_id = name2id_prs( char2text("default") );
+       prs_setup_firstcall(funcctx, current_parser_id,txt );
+       PG_FREE_IF_COPY(txt,0);
+   }
+
+   funcctx = SRF_PERCALL_SETUP();
+
+   if (  (result=prs_process_call(funcctx)) != (Datum)0 )
+       SRF_RETURN_NEXT(funcctx, result);
+   SRF_RETURN_DONE(funcctx);
+}
+
+PG_FUNCTION_INFO_V1(headline);
+Datum headline(PG_FUNCTION_ARGS);
+Datum
+headline(PG_FUNCTION_ARGS) {
+   TSCfgInfo *cfg=findcfg(PG_GETARG_OID(0));
+   text       *in = PG_GETARG_TEXT_P(1);
+   QUERYTYPE  *query = (QUERYTYPE *) DatumGetPointer(PG_DETOAST_DATUM(PG_GETARG_DATUM(2)));
+   text       *opt=( PG_NARGS()>3 && PG_GETARG_POINTER(3) ) ? PG_GETARG_TEXT_P(3) : NULL;
+   HLPRSTEXT   prs;
+   text *out;
+   WParserInfo *prsobj = findprs(cfg->prs_id);
+
+   memset(&prs,0,sizeof(HLPRSTEXT));
+   prs.lenwords = 32;
+   prs.words = (HLWORD *) palloc(sizeof(HLWORD) * prs.lenwords);
+   hlparsetext(cfg, &prs, query, VARDATA(in), VARSIZE(in) - VARHDRSZ);
+
+
+   FunctionCall3(
+       &(prsobj->headline_info),
+       PointerGetDatum(&prs),
+       PointerGetDatum(opt),
+       PointerGetDatum(query)
+   );
+
+   out = genhl(&prs);
+
+   PG_FREE_IF_COPY(in,1);
+   PG_FREE_IF_COPY(query,2);
+   if ( opt ) PG_FREE_IF_COPY(opt,3);
+   pfree(prs.words);
+   pfree(prs.startsel);
+   pfree(prs.stopsel);
+
+   PG_RETURN_POINTER(out);
+}
+
+
+PG_FUNCTION_INFO_V1(headline_byname);
+Datum headline_byname(PG_FUNCTION_ARGS);
+Datum
+headline_byname(PG_FUNCTION_ARGS) {
+   text *cfg=PG_GETARG_TEXT_P(0);
+
+   Datum out=DirectFunctionCall4(
+       headline,
+       ObjectIdGetDatum(name2id_cfg( cfg ) ),
+       PG_GETARG_DATUM(1),
+       PG_GETARG_DATUM(2),
+       ( PG_NARGS()>3 ) ? PG_GETARG_DATUM(3) : PointerGetDatum(NULL)
+   );
+
+   PG_FREE_IF_COPY(cfg,0);
+   PG_RETURN_DATUM(out);   
+}
+
+PG_FUNCTION_INFO_V1(headline_current);
+Datum headline_current(PG_FUNCTION_ARGS);
+Datum
+headline_current(PG_FUNCTION_ARGS) {
+   PG_RETURN_DATUM(DirectFunctionCall4(
+       headline,
+       ObjectIdGetDatum(get_currcfg()),
+       PG_GETARG_DATUM(0),
+       PG_GETARG_DATUM(1),
+       ( PG_NARGS()>2 ) ? PG_GETARG_DATUM(2) : PointerGetDatum(NULL)
+   ));
+}
+
+
+


diff --git a/contrib/tsearch2/wparser.h b/contrib/tsearch2/wparser.h

new file mode 100644 (file)

index 0000000..a8afc56


--- /dev/null
+++ b/contrib/tsearch2/wparser.h
@@ -0,0 +1,28 @@
+#ifndef __WPARSER_H__
+#define __WPARSER_H__
+#include "postgres.h"
+#include "fmgr.h"
+
+typedef struct {
+   Oid prs_id;
+   FmgrInfo start_info;
+   FmgrInfo getlexeme_info;
+   FmgrInfo end_info;
+   FmgrInfo headline_info;
+   Oid lextype;
+   void *prs;
+} WParserInfo;
+
+void init_prs(Oid id, WParserInfo *prs);
+WParserInfo* findprs(Oid id);
+Oid name2id_prs(text *name);
+void   reset_prs(void);
+
+
+typedef struct {
+   int lexid;
+   char    *alias;
+   char    *descr;
+} LexDescr;
+
+#endif


diff --git a/contrib/tsearch2/wparser_def.c b/contrib/tsearch2/wparser_def.c

new file mode 100644 (file)

index 0000000..eec8b03


--- /dev/null
+++ b/contrib/tsearch2/wparser_def.c
@@ -0,0 +1,291 @@
+/* 
+ * default word parser 
+ * Teodor Sigaev 
+ */
+#include 
+#include 
+#include 
+
+#include "postgres.h"
+#include "utils/builtins.h"
+
+#include "dict.h"
+#include "wparser.h"
+#include "common.h"
+#include "ts_cfg.h"
+#include "wordparser/parser.h"
+#include "wordparser/deflex.h"
+
+PG_FUNCTION_INFO_V1(prsd_lextype);
+Datum prsd_lextype(PG_FUNCTION_ARGS);
+
+Datum 
+prsd_lextype(PG_FUNCTION_ARGS) {
+   LexDescr *descr=(LexDescr*)palloc(sizeof(LexDescr)*(LASTNUM+1));
+   int i;
+
+   for(i=1;i<=LASTNUM;i++) {
+       descr[i-1].lexid = i;
+       descr[i-1].alias = pstrdup(tok_alias[i]);
+       descr[i-1].descr = pstrdup(lex_descr[i]);
+   }
+   
+   descr[LASTNUM].lexid=0;
+       
+   PG_RETURN_POINTER(descr);
+}
+
+PG_FUNCTION_INFO_V1(prsd_start);
+Datum prsd_start(PG_FUNCTION_ARGS);
+Datum 
+prsd_start(PG_FUNCTION_ARGS) {
+   start_parse_str( (char*)PG_GETARG_POINTER(0), PG_GETARG_INT32(1) );
+   PG_RETURN_POINTER(NULL);
+}
+
+PG_FUNCTION_INFO_V1(prsd_getlexeme);
+Datum prsd_getlexeme(PG_FUNCTION_ARGS);
+Datum 
+prsd_getlexeme(PG_FUNCTION_ARGS) {
+   /* ParserState *p=(ParserState*)PG_GETARG_POINTER(0); */
+   char **t=(char**)PG_GETARG_POINTER(1); 
+   int *tlen=(int*)PG_GETARG_POINTER(2);
+   int  type=tsearch2_yylex();
+
+   *t = token;
+   *tlen = tokenlen;
+   PG_RETURN_INT32(type);
+}
+
+PG_FUNCTION_INFO_V1(prsd_end);
+Datum prsd_end(PG_FUNCTION_ARGS);
+Datum 
+prsd_end(PG_FUNCTION_ARGS) {
+   /* ParserState *p=(ParserState*)PG_GETARG_POINTER(0); */
+   end_parse();
+   PG_RETURN_VOID();
+}
+
+#define LEAVETOKEN(x)  ( (x)==12 )
+#define COMPLEXTOKEN(x)    ( (x)==5 || (x)==15 || (x)==16 || (x)==17 )
+#define ENDPUNCTOKEN(x)    ( (x)==12 )
+
+
+#define IDIGNORE(x) ( (x)==13 || (x)==14 || (x)==12 || (x)==23 )
+#define HLIDIGNORE(x) ( (x)==5 || (x)==13 || (x)==15 || (x)==16 || (x)==17 )
+#define NONWORDTOKEN(x)    ( (x)==12 || HLIDIGNORE(x) )
+#define NOENDTOKEN(x)  ( NONWORDTOKEN(x) || (x)==7 || (x)==8 || (x)==20 || (x)==21 || (x)==22 || IDIGNORE(x) )
+
+typedef struct {
+   HLWORD  *words;
+   int len;
+} hlCheck;
+
+static bool
+checkcondition_HL(void *checkval, ITEM *val) {
+   int i;
+   for(i=0;i<((hlCheck*)checkval)->len;i++) {
+       if ( ((hlCheck*)checkval)->words[i].item==val )
+           return true;
+   }
+   return false;
+}
+
+
+static bool
+hlCover(HLPRSTEXT *prs, QUERYTYPE *query, int *p, int *q) {
+   int i,j;
+   ITEM    *item=GETQUERY(query);
+   int pos=*p;
+   *q=0;
+   *p=0x7fffffff;
+
+   for(j=0;jsize;j++) {
+       if ( item->type != VAL ) {
+           item++;
+           continue;
+       }
+       for(i=pos;icurwords;i++) {
+           if ( prs->words[i].item == item ) {
+               if ( i>*q) 
+                   *q = i;
+               break;
+           }
+       }
+       item++;
+   }
+
+   if ( *q==0 )
+       return false;
+
+   item=GETQUERY(query);
+   for(j=0;jsize;j++) {
+       if ( item->type != VAL ) {
+           item++;
+           continue;
+       }
+       for(i=*q;i>=pos;i--) {
+           if ( prs->words[i].item == item ) {
+               if ( i<*p )
+                   *p=i;
+               break;
+           }
+       }
+       item++;
+   }   
+
+   if ( *p<=*q ) {
+       hlCheck ch={ &(prs->words[*p]), *q-*p+1 };
+       if ( TS_execute(GETQUERY(query), &ch, false, checkcondition_HL) ) { 
+           return true;
+       } else {
+           (*p)++;
+           return hlCover(prs,query,p,q);
+       }
+   }
+
+   return false;
+}
+
+PG_FUNCTION_INFO_V1(prsd_headline);
+Datum prsd_headline(PG_FUNCTION_ARGS);
+Datum 
+prsd_headline(PG_FUNCTION_ARGS) {
+   HLPRSTEXT   *prs=(HLPRSTEXT*)PG_GETARG_POINTER(0);
+   text    *opt=(text*)PG_GETARG_POINTER(1); /* can't be toasted */
+   QUERYTYPE   *query=(QUERYTYPE*)PG_GETARG_POINTER(2); /* can't be toasted */
+   /* from opt + start and and tag */
+   int min_words=15;   
+   int max_words=35;   
+   int shortword=3;    
+
+   int p=0,q=0;
+   int bestb=-1,beste=-1;
+   int bestlen=-1;
+   int pose=0, poslen, curlen;
+
+   int i;
+
+   /*config*/
+   prs->startsel=NULL;
+   prs->stopsel=NULL;
+   if ( opt ) {
+       Map *map,*mptr;
+       
+       parse_cfgdict(opt,&map);
+       mptr=map;
+
+       while(mptr && mptr->key) {
+           if ( strcasecmp(mptr->key,"MaxWords")==0 )
+               max_words=pg_atoi(mptr->value,4,1);
+           else if ( strcasecmp(mptr->key,"MinWords")==0 )
+               min_words=pg_atoi(mptr->value,4,1);
+           else if ( strcasecmp(mptr->key,"ShortWord")==0 )
+               shortword=pg_atoi(mptr->value,4,1);
+           else if ( strcasecmp(mptr->key,"StartSel")==0 )
+               prs->startsel=pstrdup(mptr->value);
+           else if ( strcasecmp(mptr->key,"StopSel")==0 )
+               prs->stopsel=pstrdup(mptr->value);
+               
+           pfree(mptr->key);
+           pfree(mptr->value);
+
+           mptr++;
+       }
+       pfree(map);
+
+       if ( min_words >= max_words )
+           elog(ERROR,"Must be MinWords < MaxWords");
+       if ( min_words<=0 )
+           elog(ERROR,"Must be MinWords > 0");
+       if ( shortword<0 )
+           elog(ERROR,"Must be ShortWord >= 0");
+   }
+
+   while( hlCover(prs,query,&p,&q) ) {
+       /* find cover len in words */
+       curlen=0;
+       poslen=0;
+       for(i=p;i<=q && curlen < max_words ; i++) {
+           if ( !NONWORDTOKEN(prs->words[i].type) ) 
+               curlen++;
+           if ( prs->words[i].item && !prs->words[i].repeated )
+               poslen++; 
+           pose=i;
+       }
+
+       if ( poslenwords[beste].type) || prs->words[beste].len <= shortword) ) { 
+           /* best already finded, so try one more cover */
+           p++;
+           continue;
+       }
+
+       if ( curlen < max_words ) { /* find good end */
+           for(i=i-1 ;icurwords && curlen
+               if ( i!=q ) {
+                   if ( !NONWORDTOKEN(prs->words[i].type) ) 
+                       curlen++;
+                   if ( prs->words[i].item && !prs->words[i].repeated )
+                       poslen++;
+               }
+               pose=i;
+               if ( NOENDTOKEN(prs->words[i].type) || prs->words[i].len <= shortword ) 
+                   continue;
+               if ( curlen>=min_words )    
+                   break;
+           }
+       } else { /* shorter cover :((( */
+           for(;curlen>min_words;i--) {
+               if ( !NONWORDTOKEN(prs->words[i].type) ) 
+                   curlen--;
+               if ( prs->words[i].item && !prs->words[i].repeated )
+                   poslen--;
+               pose=i;
+               if ( NOENDTOKEN(prs->words[i].type) || prs->words[i].len <= shortword ) 
+                   continue;
+               break;
+           }
+       }
+
+       if ( bestlen <0 || (poslen>bestlen && !(NOENDTOKEN(prs->words[pose].type) || prs->words[pose].len <= shortword)) || 
+               ( bestlen>=0 && !(NOENDTOKEN(prs->words[pose].type)  || prs->words[pose].len <= shortword) && 
+                   (NOENDTOKEN(prs->words[beste].type) || prs->words[beste].len <= shortword) ) ) {
+           bestb=p; beste=pose;
+           bestlen=poslen;
+       } 
+
+       p++;
+   }
+
+   if ( bestlen<0 ) {
+       curlen=0;
+       poslen=0;
+       for(i=0;icurwords && curlen
+           if ( !NONWORDTOKEN(prs->words[i].type) ) 
+               curlen++;
+           pose=i;
+       }
+       bestb=0; beste=pose;
+   }
+
+   for(i=bestb;i<=beste;i++) {
+       if ( prs->words[i].item )
+           prs->words[i].selected=1;
+       if ( prs->words[i].repeated )
+           prs->words[i].skip=1;
+       if ( HLIDIGNORE(prs->words[i].type) )
+           prs->words[i].replace=1;
+
+       prs->words[i].in=1;
+   }
+
+   if (!prs->startsel)
+       prs->startsel=pstrdup("");

+   if (!prs->stopsel)
+       prs->stopsel=pstrdup("");
+        prs->startsellen=strlen(prs->startsel);
+   prs->stopsellen=strlen(prs->stopsel);
+
+   PG_RETURN_POINTER(prs);
+}
+




This is the main PostgreSQL git repository.
RSS
Atom
+       ws[ i ] = ( ((float4*)ARR_DATA_PTR(win))[i] >= 0 ) ? ((float4*)ARR_DATA_PTR(win))[i] : weights[i];
+       if ( ws[ i ] > 1.0 ) 
+           elog(ERROR,"Weight out of range");
+   } 
+
+   if ( PG_NARGS() == 4 )
+       method=PG_GETARG_INT32(3);
+
+   res=calc_rank(ws, txt, query, method); 
+       
+   PG_FREE_IF_COPY(win, 0);
+   PG_FREE_IF_COPY(txt, 1);
+   PG_FREE_IF_COPY(query, 2);
+   PG_RETURN_FLOAT4(res);
+}
+
+Datum
+rank_def(PG_FUNCTION_ARGS) {
+   tsvector       *txt = (tsvector *) PG_DETOAST_DATUM(PG_GETARG_DATUM(0));
+   QUERYTYPE  *query = (QUERYTYPE *) PG_DETOAST_DATUM(PG_GETARG_DATUM(1));
+   float res=0.0;
+   int method=DEF_NORM_METHOD;
+
+   if ( PG_NARGS() == 3 )
+       method=PG_GETARG_INT32(2);
+
+   res=calc_rank(weights, txt, query, method); 
+       
+   PG_FREE_IF_COPY(txt, 0);
+   PG_FREE_IF_COPY(query, 1);
+   PG_RETURN_FLOAT4(res);
+}
+
+
+typedef struct {
+   ITEM    *item;
+   int32   pos;
+} DocRepresentation;
+
+static int
+compareDocR(const void *a, const void *b) {
+   if ( ((DocRepresentation *) a)->pos == ((DocRepresentation *) b)->pos )
+       return 1;
+   return ( ((DocRepresentation *) a)->pos > ((DocRepresentation *) b)->pos ) ? 1 : -1;
+}
+
+
+typedef struct {
+   DocRepresentation *doc;
+   int len;
+}  ChkDocR;
+
+static bool
+checkcondition_DR(void *checkval, ITEM *val) {
+   DocRepresentation *ptr = ((ChkDocR*)checkval)->doc;
+
+   while( ptr - ((ChkDocR*)checkval)->doc < ((ChkDocR*)checkval)->len ) {
+       if ( val == ptr->item )
+           return true;
+       ptr++;
+   }   
+
+   return false;
+}
+
+
+static bool
+Cover(DocRepresentation *doc, int len, QUERYTYPE *query, int *pos, int *p, int *q) {
+   int i;
+   DocRepresentation   *ptr,*f=(DocRepresentation*)0xffffffff;
+   ITEM    *item=GETQUERY(query);
+   int lastpos=*pos;
+   int oldq=*q;
+
+   *p=0x7fffffff;
+   *q=0;
+
+   for(i=0; isize; i++) {
+       if ( item->type != VAL ) {
+           item++;
+           continue;
+       }
+       ptr = doc + *pos;
+
+       while(ptr-doc
+           if ( ptr->item == item ) {
+               if ( ptr->pos > *q ) {
+                   *q = ptr->pos;
+                   lastpos= ptr - doc;
+               } 
+               break;
+           } 
+           ptr++;
+       }
+
+       item++;
+   }
+
+   if (*q==0 )
+       return false;
+
+   if (*q==oldq) { /* already check this pos */
+       (*pos)++;
+       return Cover(doc, len, query, pos,p,q);
+   } 
+
+   item=GETQUERY(query);
+   for(i=0; isize; i++) {
+       if ( item->type != VAL ) {
+           item++;
+           continue;
+       }
+       ptr = doc + lastpos;
+
+       while(ptr>=doc+*pos) {
+           if ( ptr->item == item ) {
+               if ( ptr->pos < *p ) {
+                   *p = ptr->pos;
+                   f=ptr;
+               }
+               break;
+           }
+           ptr--;
+       }
+       item++;
+   }
+ 
+   if ( *p<=*q ) {
+       ChkDocR ch = { f, (doc + lastpos)-f+1 };
+       *pos = f-doc+1;
+       if ( TS_execute(GETQUERY(query), &ch, false, checkcondition_DR) ) { 
+ /*elog(NOTICE,"OP:%d NP:%d P:%d Q:%d", *pos, lastpos, *p, *q);*/ 
+           return true;
+       } else
+           return Cover(doc, len, query, pos,p,q); 
+   }
+ 
+   return false;
+}
+
+static DocRepresentation*
+get_docrep(tsvector     *txt, QUERYTYPE  *query, int *doclen) {
+   ITEM    *item=GETQUERY(query);
+   WordEntry *entry;
+   WordEntryPos    *post;
+   int4    dimt,j,i;
+   int len=query->size*4,cur=0;
+   DocRepresentation *doc;
+
+   *(uint16*)POSNULL = lengthof(POSNULL)-1;
+   doc = (DocRepresentation*)palloc(sizeof(DocRepresentation)*len);
+   for(i=0; isize; i++) {
+       if ( item[i].type != VAL )
+           continue;
+
+       entry=find_wordentry(txt,query,&(item[i]));
+       if ( !entry )
+           continue;
+
+       if ( entry->haspos ) {
+           dimt = POSDATALEN(txt,entry);
+           post = POSDATAPTR(txt,entry);
+       } else {
+           dimt = *(uint16*)POSNULL;
+           post = POSNULL+1;
+       }
+
+       while( cur+dimt >= len ) {
+           len*=2;
+           doc = (DocRepresentation*)repalloc(doc,sizeof(DocRepresentation)*len);
+       }
+
+       for(j=0;j
+           doc[cur].item=&(item[i]);
+           doc[cur].pos=post[j].pos;
+           cur++;
+       }
+   }
+
+   *doclen=cur;
+   
+   if ( cur>0 ) {
+       if ( cur>1 ) 
+           qsort((void *) doc, cur, sizeof(DocRepresentation), compareDocR);
+       return doc;
+   }
+   
+   pfree(doc);
+   return NULL;
+}
+
+
+Datum
+rank_cd(PG_FUNCTION_ARGS) {
+   int K = PG_GETARG_INT32(0);
+   tsvector       *txt = (tsvector *) PG_DETOAST_DATUM(PG_GETARG_DATUM(1));
+   QUERYTYPE  *query = (QUERYTYPE *) PG_DETOAST_DATUM(PG_GETARG_DATUM(2));
+   int method=DEF_NORM_METHOD;
+   DocRepresentation   *doc;
+   float   res=0.0;
+   int p=0,q=0,len,cur;
+
+   doc = get_docrep(txt, query, &len);
+   if ( !doc ) {
+       PG_FREE_IF_COPY(txt, 1);
+       PG_FREE_IF_COPY(query, 2);
+       PG_RETURN_FLOAT4(0.0);
+   }
+
+   cur=0;
+   if (K<=0)
+       K=4;    
+   while( Cover(doc, len, query, &cur, &p, &q) ) 
+       res += ( q-p+1 > K ) ? ((float)K)/((float)(q-p+1)) : 1.0;
+
+   if ( PG_NARGS() == 4 )
+       method=PG_GETARG_INT32(3);
+
+        switch(method) {
+       case 0: break;
+       case 1: res /= log((float)cnt_length(txt)); break;
+       case 2: res /= (float)cnt_length(txt); break;
+       default:
+       elog(ERROR,"Unknown normalization method: %d",method);
+        }
+
+   pfree(doc);
+   PG_FREE_IF_COPY(txt, 1);
+   PG_FREE_IF_COPY(query, 2);
+
+   PG_RETURN_FLOAT4(res);
+}
+
+
+Datum
+rank_cd_def(PG_FUNCTION_ARGS) {
+   PG_RETURN_DATUM( DirectFunctionCall4(   
+       rank_cd,
+       Int32GetDatum(-1),
+       PG_GETARG_DATUM(0),
+       PG_GETARG_DATUM(1),
+       ( PG_NARGS() == 3 ) ? PG_GETARG_DATUM(2) : Int32GetDatum(DEF_NORM_METHOD)
+   )); 
+}
+
+/**************debug*************/
+
+typedef struct {
+   char    *w;
+   int2    len;
+   int2    pos;
+   int2    start;
+   int2    finish;
+} DocWord;
+
+static int
+compareDocWord(const void *a, const void *b) {
+   if ( ((DocWord *) a)->pos == ((DocWord *) b)->pos )
+       return 1;
+   return ( ((DocWord *) a)->pos > ((DocWord *) b)->pos ) ? 1 : -1;
+}
+
+
+Datum 
+get_covers(PG_FUNCTION_ARGS) {
+   tsvector     *txt = (tsvector *) PG_DETOAST_DATUM(PG_GETARG_DATUM(0));
+   QUERYTYPE  *query = (QUERYTYPE *) PG_DETOAST_DATUM(PG_GETARG_DATUM(1));
+   WordEntry       *pptr=ARRPTR(txt);
+   int i,dlen=0,j,cur=0,len=0,rlen;
+   DocWord *dw,*dwptr;
+   text    *out;
+   char *cptr;
+   DocRepresentation *doc;
+   int pos=0,p,q,olddwpos=0;
+   int ncover=1;
+
+   doc = get_docrep(txt, query, &rlen);
+
+   if ( !doc ) {
+       out=palloc(VARHDRSZ);
+       VARATT_SIZEP(out) = VARHDRSZ;
+       PG_FREE_IF_COPY(txt,0);
+       PG_FREE_IF_COPY(query,1);
+       PG_RETURN_POINTER(out);
+   }
+
+   for(i=0;isize;i++) {
+       if (!pptr[i].haspos)
+           elog(ERROR,"No pos info");
+        dlen += POSDATALEN(txt,&(pptr[i]));
+   }
+
+   dwptr=dw=palloc(sizeof(DocWord)*dlen);
+   memset(dw,0,sizeof(DocWord)*dlen);
+
+   for(i=0;isize;i++) {
+       WordEntryPos    *posdata = POSDATAPTR(txt,&(pptr[i]));
+       for(j=0;j
+           dw[cur].w=STRPTR(txt)+pptr[i].pos;  
+           dw[cur].len=pptr[i].len;    
+           dw[cur].pos=posdata[j].pos;
+           cur++;
+       }
+       len+=(pptr[i].len + 1) * (int)POSDATALEN(txt,&(pptr[i]));
+   }
+   qsort((void *) dw, dlen, sizeof(DocWord), compareDocWord);
+
+   while( Cover(doc, rlen, query, &pos, &p, &q) ) {
+       dwptr=dw+olddwpos;
+       while(dwptr->pos < p && dwptr-dw
+           dwptr++;
+       olddwpos=dwptr-dw;
+       dwptr->start=ncover;
+       while(dwptr->pos < q+1 && dwptr-dw
+           dwptr++;
+       (dwptr-1)->finish=ncover;
+       len+= 4 /* {}+two spaces */ + 2*16 /*numbers*/;
+       ncover++; 
+   } 
+   
+   out=palloc(VARHDRSZ+len);
+   cptr=((char*)out)+VARHDRSZ;
+   dwptr=dw;
+
+   while( dwptr-dw < dlen) {
+       if ( dwptr->start ) {
+           sprintf(cptr,"{%d ",dwptr->start);
+           cptr=strchr(cptr,'\0');
+       }
+       memcpy(cptr,dwptr->w,dwptr->len);
+       cptr+=dwptr->len;
+       *cptr=' ';
+       cptr++;
+       if ( dwptr->finish ) { 
+           sprintf(cptr,"}%d ",dwptr->finish);
+           cptr=strchr(cptr,'\0');
+       }
+       dwptr++;
+   }   
+
+   VARATT_SIZEP(out) = cptr - ((char*)out);
+   
+   pfree(dw);
+   pfree(doc);
+
+   PG_FREE_IF_COPY(txt,0);
+   PG_FREE_IF_COPY(query,1);
+   PG_RETURN_POINTER(out);
+}
+


diff --git a/contrib/tsearch2/rewrite.c b/contrib/tsearch2/rewrite.c

new file mode 100644 (file)

index 0000000..d5bc0f6


--- /dev/null
+++ b/contrib/tsearch2/rewrite.c
@@ -0,0 +1,292 @@
+/*
+ * Rewrite routines of query tree
+ * Teodor Sigaev 
+ */
+
+#include "postgres.h"
+
+#include 
+
+#include "access/gist.h"
+#include "access/itup.h"
+#include "access/rtree.h"
+#include "utils/elog.h"
+#include "utils/palloc.h"
+#include "utils/array.h"
+#include "utils/builtins.h"
+#include "storage/bufpage.h"
+
+#include "query.h"
+#include "rewrite.h"
+
+typedef struct NODE
+{
+   struct NODE *left;
+   struct NODE *right;
+   ITEM       *valnode;
+}  NODE;
+
+/*
+ * make query tree from plain view of query
+ */
+static NODE *
+maketree(ITEM * in)
+{
+   NODE       *node = (NODE *) palloc(sizeof(NODE));
+
+   node->valnode = in;
+   node->right = node->left = NULL;
+   if (in->type == OPR)
+   {
+       node->right = maketree(in + 1);
+       if (in->val != (int4) '!')
+           node->left = maketree(in + in->left);
+   }
+   return node;
+}
+
+typedef struct
+{
+   ITEM       *ptr;
+   int4        len;
+   int4        cur;
+}  PLAINTREE;
+
+static void
+plainnode(PLAINTREE * state, NODE * node)
+{
+   if (state->cur == state->len)
+   {
+       state->len *= 2;
+       state->ptr = (ITEM *) repalloc((void *) state->ptr, state->len * sizeof(ITEM));
+   }
+   memcpy((void *) &(state->ptr[state->cur]), (void *) node->valnode, sizeof(ITEM));
+   if (node->valnode->type == VAL)
+       state->cur++;
+   else if (node->valnode->val == (int4) '!')
+   {
+       state->ptr[state->cur].left = 1;
+       state->cur++;
+       plainnode(state, node->right);
+   }
+   else
+   {
+       int4        cur = state->cur;
+
+       state->cur++;
+       plainnode(state, node->right);
+       state->ptr[cur].left = state->cur - cur;
+       plainnode(state, node->left);
+   }
+   pfree(node);
+}
+
+/*
+ * make plain view of tree from 'normal' view of tree
+ */
+static ITEM *
+plaintree(NODE * root, int4 *len)
+{
+   PLAINTREE   pl;
+
+   pl.cur = 0;
+   pl.len = 16;
+   if (root && (root->valnode->type == VAL || root->valnode->type == OPR))
+   {
+       pl.ptr = (ITEM *) palloc(pl.len * sizeof(ITEM));
+       plainnode(&pl, root);
+   }
+   else
+       pl.ptr = NULL;
+   *len = pl.cur;
+   return pl.ptr;
+}
+
+static void
+freetree(NODE * node)
+{
+   if (!node)
+       return;
+   if (node->left)
+       freetree(node->left);
+   if (node->right)
+       freetree(node->right);
+   pfree(node);
+}
+
+/*
+ * clean tree for ! operator.
+ * It's usefull for debug, but in
+ * other case, such view is used with search in index.
+ * Operator ! always return TRUE
+ */
+static NODE *
+clean_NOT_intree(NODE * node)
+{
+   if (node->valnode->type == VAL)
+       return node;
+
+   if (node->valnode->val == (int4) '!')
+   {
+       freetree(node);
+       return NULL;
+   }
+
+   /* operator & or | */
+   if (node->valnode->val == (int4) '|')
+   {
+       if ((node->left = clean_NOT_intree(node->left)) == NULL ||
+           (node->right = clean_NOT_intree(node->right)) == NULL)
+       {
+           freetree(node);
+           return NULL;
+       }
+   }
+   else
+   {
+       NODE       *res = node;
+
+       node->left = clean_NOT_intree(node->left);
+       node->right = clean_NOT_intree(node->right);
+       if (node->left == NULL && node->right == NULL)
+       {
+           pfree(node);
+           res = NULL;
+       }
+       else if (node->left == NULL)
+       {
+           res = node->right;
+           pfree(node);
+       }
+       else if (node->right == NULL)
+       {
+           res = node->left;
+           pfree(node);
+       }
+       return res;
+   }
+   return node;
+}
+
+ITEM *
+clean_NOT_v2(ITEM * ptr, int4 *len)
+{
+   NODE       *root = maketree(ptr);
+
+   return plaintree(clean_NOT_intree(root), len);
+}
+
+#define V_UNKNOWN  0
+#define V_TRUE     1
+#define V_FALSE        2
+
+/*
+ * Clean query tree from values which is always in
+ * text (stopword)
+ */
+static NODE *
+clean_fakeval_intree(NODE * node, char *result)
+{
+   char        lresult = V_UNKNOWN,
+               rresult = V_UNKNOWN;
+
+   if (node->valnode->type == VAL)
+       return node;
+   else if (node->valnode->type == VALTRUE)
+   {
+       pfree(node);
+       *result = V_TRUE;
+       return NULL;
+   }
+
+
+   if (node->valnode->val == (int4) '!')
+   {
+       node->right = clean_fakeval_intree(node->right, &rresult);
+       if (!node->right)
+       {
+           *result = (rresult == V_TRUE) ? V_FALSE : V_TRUE;
+           freetree(node);
+           return NULL;
+       }
+   }
+   else if (node->valnode->val == (int4) '|')
+   {
+       NODE       *res = node;
+
+       node->left = clean_fakeval_intree(node->left, &lresult);
+       node->right = clean_fakeval_intree(node->right, &rresult);
+       if (lresult == V_TRUE || rresult == V_TRUE)
+       {
+           freetree(node);
+           *result = V_TRUE;
+           return NULL;
+       }
+       else if (lresult == V_FALSE && rresult == V_FALSE)
+       {
+           freetree(node);
+           *result = V_FALSE;
+           return NULL;
+       }
+       else if (lresult == V_FALSE)
+       {
+           res = node->right;
+           pfree(node);
+       }
+       else if (rresult == V_FALSE)
+       {
+           res = node->left;
+           pfree(node);
+       }
+       return res;
+   }
+   else
+   {
+       NODE       *res = node;
+
+       node->left = clean_fakeval_intree(node->left, &lresult);
+       node->right = clean_fakeval_intree(node->right, &rresult);
+       if (lresult == V_FALSE || rresult == V_FALSE)
+       {
+           freetree(node);
+           *result = V_FALSE;
+           return NULL;
+       }
+       else if (lresult == V_TRUE && rresult == V_TRUE)
+       {
+           freetree(node);
+           *result = V_TRUE;
+           return NULL;
+       }
+       else if (lresult == V_TRUE)
+       {
+           res = node->right;
+           pfree(node);
+       }
+       else if (rresult == V_TRUE)
+       {
+           res = node->left;
+           pfree(node);
+       }
+       return res;
+   }
+   return node;
+}
+
+ITEM *
+clean_fakeval_v2(ITEM * ptr, int4 *len)
+{
+   NODE       *root = maketree(ptr);
+   char        result = V_UNKNOWN;
+   NODE       *resroot;
+
+   resroot = clean_fakeval_intree(root, &result);
+   if (result != V_UNKNOWN)
+   {
+       elog(NOTICE, "Query contains only stopword(s) or doesn't contain lexem(s), ignored");
+       *len = 0;
+       return NULL;
+   }
+
+   return plaintree(resroot, len);
+}


diff --git a/contrib/tsearch2/rewrite.h b/contrib/tsearch2/rewrite.h

new file mode 100644 (file)

index 0000000..d47788a


--- /dev/null
+++ b/contrib/tsearch2/rewrite.h
@@ -0,0 +1,7 @@
+#ifndef __REWRITE_H__
+#define __REWRITE_H__
+
+ITEM      *clean_NOT_v2(ITEM * ptr, int4 *len);
+ITEM      *clean_fakeval_v2(ITEM * ptr, int4 *len);
+
+#endif


diff --git a/contrib/tsearch2/snmap.c b/contrib/tsearch2/snmap.c

new file mode 100644 (file)

index 0000000..fe138ad


--- /dev/null
+++ b/contrib/tsearch2/snmap.c
@@ -0,0 +1,75 @@
+/* 
+ * simple but fast map from str to Oid
+ * Teodor Sigaev 
+ */
+#include 
+#include 
+#include 
+
+#include "postgres.h"
+#include "snmap.h"
+#include "common.h"
+
+static int
+compareSNMapEntry(const void *a, const void *b) {
+   return strcmp( ((SNMapEntry*)a)->key, ((SNMapEntry*)b)->key );
+}
+
+void 
+addSNMap( SNMap *map, char *key, Oid value ) {
+   if (map->len>=map->reallen) {
+       SNMapEntry *tmp;
+       int len = (map->reallen) ? 2*map->reallen : 16;
+       tmp=(SNMapEntry*)realloc(map->list, sizeof(SNMapEntry) * len);
+       if ( !tmp )
+           elog(ERROR, "No memory");
+       map->reallen=len;
+       map->list=tmp;
+   }
+   map->list[ map->len ].key = strdup(key);
+   if ( ! map->list[ map->len ].key )
+       elog(ERROR, "No memory");
+   map->list[ map->len ].value=value;
+   map->len++;
+   if ( map->len>1 ) qsort(map->list, map->len, sizeof(SNMapEntry), compareSNMapEntry);
+}
+
+void 
+addSNMap_t( SNMap *map, text *key, Oid value ) {
+   char *k=text2char( key );
+   addSNMap(map, k, value);
+   pfree(k);
+}
+
+Oid 
+findSNMap( SNMap *map, char *key ) {
+   SNMapEntry *ptr;
+   SNMapEntry ks = {key, 0};
+   if ( map->len==0 || !map->list )
+       return 0;   
+   ptr = (SNMapEntry*) bsearch(&ks, map->list, map->len, sizeof(SNMapEntry), compareSNMapEntry);
+   return (ptr) ? ptr->value : 0;
+}
+
+Oid  
+findSNMap_t( SNMap *map, text *key ) {
+   char *k=text2char(key);
+   int res;
+   res= findSNMap(map, k);
+   pfree(k);
+   return res;
+}
+
+void freeSNMap( SNMap *map ) {
+   SNMapEntry *entry=map->list;
+   if ( map->list ) {
+       while( map->len ) {
+           if ( entry->key ) free(entry->key);
+           entry++; map->len--;
+       }
+       free( map->list );
+   }
+   memset(map,0,sizeof(SNMap));
+}
+
+


diff --git a/contrib/tsearch2/snmap.h b/contrib/tsearch2/snmap.h

new file mode 100644 (file)

index 0000000..b485601


--- /dev/null
+++ b/contrib/tsearch2/snmap.h
@@ -0,0 +1,23 @@
+#ifndef __SNMAP_H__
+#define __SNMAP_H__
+
+#include "postgres.h"
+
+typedef struct {
+   char    *key;
+   Oid value;
+} SNMapEntry;
+
+typedef struct {
+   int len;
+   int reallen;
+   SNMapEntry  *list;
+} SNMap;
+
+void addSNMap( SNMap *map, char *key, Oid value );
+void addSNMap_t( SNMap *map, text *key, Oid value );
+Oid findSNMap( SNMap *map, char *key );
+Oid findSNMap_t( SNMap *map, text *key );
+void freeSNMap( SNMap *map );
+
+#endif


diff --git a/contrib/tsearch2/snowball/api.c b/contrib/tsearch2/snowball/api.c

new file mode 100644 (file)

index 0000000..c9019ce


--- /dev/null
+++ b/contrib/tsearch2/snowball/api.c
@@ -0,0 +1,48 @@
+
+#include "header.h"
+
+extern struct SN_env * SN_create_env(int S_size, int I_size, int B_size)
+{   struct SN_env * z = (struct SN_env *) calloc(1, sizeof(struct SN_env));
+    z->p = create_s();
+    if (S_size)
+    {   z->S = (symbol * *) calloc(S_size, sizeof(symbol *));
+        {   int i;
+            for (i = 0; i < S_size; i++) z->S[i] = create_s();
+        }
+        z->S_size = S_size;
+    }
+
+    if (I_size)
+    {   z->I = (int *) calloc(I_size, sizeof(int));
+        z->I_size = I_size;
+    }
+
+    if (B_size)
+    {   z->B = (symbol *) calloc(B_size, sizeof(symbol));
+        z->B_size = B_size;
+    }
+
+    return z;
+}
+
+extern void SN_close_env(struct SN_env * z)
+{
+    if (z->S_size)
+    {
+        {   int i;
+            for (i = 0; i < z->S_size; i++) lose_s(z->S[i]);
+        }
+        free(z->S);
+    }
+    if (z->I_size) free(z->I);
+    if (z->B_size) free(z->B);
+    if (z->p) lose_s(z->p);
+    free(z);
+}
+
+extern void SN_set_current(struct SN_env * z, int size, const symbol * s)
+{
+    replace_s(z, 0, z->l, size, s);
+    z->c = 0;
+}
+


diff --git a/contrib/tsearch2/snowball/api.h b/contrib/tsearch2/snowball/api.h

new file mode 100644 (file)

index 0000000..3e8b6e1


--- /dev/null
+++ b/contrib/tsearch2/snowball/api.h
@@ -0,0 +1,27 @@
+
+typedef unsigned char symbol;
+
+/* Or replace 'char' above with 'short' for 16 bit characters.
+
+   More precisely, replace 'char' with whatever type guarantees the
+   character width you need. Note however that sizeof(symbol) should divide
+   HEAD, defined in header.h as 2*sizeof(int), without remainder, otherwise
+   there is an alignment problem. In the unlikely event of a problem here,
+   consult Martin Porter.
+
+*/
+
+struct SN_env {
+    symbol * p;
+    int c; int a; int l; int lb; int bra; int ket;
+    int S_size; int I_size; int B_size;
+    symbol * * S;
+    int * I;
+    symbol * B;
+};
+
+extern struct SN_env * SN_create_env(int S_size, int I_size, int B_size);
+extern void SN_close_env(struct SN_env * z);
+
+extern void SN_set_current(struct SN_env * z, int size, const symbol * s);
+


diff --git a/contrib/tsearch2/snowball/english_stem.c b/contrib/tsearch2/snowball/english_stem.c

new file mode 100644 (file)

index 0000000..6715c7c


--- /dev/null
+++ b/contrib/tsearch2/snowball/english_stem.c
@@ -0,0 +1,894 @@
+
+/* This file was generated automatically by the Snowball to ANSI C compiler */
+
+#include "header.h"
+
+extern int english_stem(struct SN_env * z);
+static int r_exception2(struct SN_env * z);
+static int r_exception1(struct SN_env * z);
+static int r_Step_5(struct SN_env * z);
+static int r_Step_4(struct SN_env * z);
+static int r_Step_3(struct SN_env * z);
+static int r_Step_2(struct SN_env * z);
+static int r_Step_1c(struct SN_env * z);
+static int r_Step_1b(struct SN_env * z);
+static int r_Step_1a(struct SN_env * z);
+static int r_R2(struct SN_env * z);
+static int r_R1(struct SN_env * z);
+static int r_shortv(struct SN_env * z);
+static int r_mark_regions(struct SN_env * z);
+static int r_postlude(struct SN_env * z);
+static int r_prelude(struct SN_env * z);
+
+extern struct SN_env * english_create_env(void);
+extern void english_close_env(struct SN_env * z);
+
+static symbol s_0_0[5] = { 'g', 'e', 'n', 'e', 'r' };
+
+static struct among a_0[1] =
+{
+/*  0 */ { 5, s_0_0, -1, -1, 0}
+};
+
+static symbol s_1_0[3] = { 'i', 'e', 'd' };
+static symbol s_1_1[1] = { 's' };
+static symbol s_1_2[3] = { 'i', 'e', 's' };
+static symbol s_1_3[4] = { 's', 's', 'e', 's' };
+static symbol s_1_4[2] = { 's', 's' };
+static symbol s_1_5[2] = { 'u', 's' };
+
+static struct among a_1[6] =
+{
+/*  0 */ { 3, s_1_0, -1, 2, 0},
+/*  1 */ { 1, s_1_1, -1, 3, 0},
+/*  2 */ { 3, s_1_2, 1, 2, 0},
+/*  3 */ { 4, s_1_3, 1, 1, 0},
+/*  4 */ { 2, s_1_4, 1, -1, 0},
+/*  5 */ { 2, s_1_5, 1, -1, 0}
+};
+
+static symbol s_2_1[2] = { 'b', 'b' };
+static symbol s_2_2[2] = { 'd', 'd' };
+static symbol s_2_3[2] = { 'f', 'f' };
+static symbol s_2_4[2] = { 'g', 'g' };
+static symbol s_2_5[2] = { 'b', 'l' };
+static symbol s_2_6[2] = { 'm', 'm' };
+static symbol s_2_7[2] = { 'n', 'n' };
+static symbol s_2_8[2] = { 'p', 'p' };
+static symbol s_2_9[2] = { 'r', 'r' };
+static symbol s_2_10[2] = { 'a', 't' };
+static symbol s_2_11[2] = { 't', 't' };
+static symbol s_2_12[2] = { 'i', 'z' };
+
+static struct among a_2[13] =
+{
+/*  0 */ { 0, 0, -1, 3, 0},
+/*  1 */ { 2, s_2_1, 0, 2, 0},
+/*  2 */ { 2, s_2_2, 0, 2, 0},
+/*  3 */ { 2, s_2_3, 0, 2, 0},
+/*  4 */ { 2, s_2_4, 0, 2, 0},
+/*  5 */ { 2, s_2_5, 0, 1, 0},
+/*  6 */ { 2, s_2_6, 0, 2, 0},
+/*  7 */ { 2, s_2_7, 0, 2, 0},
+/*  8 */ { 2, s_2_8, 0, 2, 0},
+/*  9 */ { 2, s_2_9, 0, 2, 0},
+/* 10 */ { 2, s_2_10, 0, 1, 0},
+/* 11 */ { 2, s_2_11, 0, 2, 0},
+/* 12 */ { 2, s_2_12, 0, 1, 0}
+};
+
+static symbol s_3_0[2] = { 'e', 'd' };
+static symbol s_3_1[3] = { 'e', 'e', 'd' };
+static symbol s_3_2[3] = { 'i', 'n', 'g' };
+static symbol s_3_3[4] = { 'e', 'd', 'l', 'y' };
+static symbol s_3_4[5] = { 'e', 'e', 'd', 'l', 'y' };
+static symbol s_3_5[5] = { 'i', 'n', 'g', 'l', 'y' };
+
+static struct among a_3[6] =
+{
+/*  0 */ { 2, s_3_0, -1, 2, 0},
+/*  1 */ { 3, s_3_1, 0, 1, 0},
+/*  2 */ { 3, s_3_2, -1, 2, 0},
+/*  3 */ { 4, s_3_3, -1, 2, 0},
+/*  4 */ { 5, s_3_4, 3, 1, 0},
+/*  5 */ { 5, s_3_5, -1, 2, 0}
+};
+
+static symbol s_4_0[4] = { 'a', 'n', 'c', 'i' };
+static symbol s_4_1[4] = { 'e', 'n', 'c', 'i' };
+static symbol s_4_2[3] = { 'o', 'g', 'i' };
+static symbol s_4_3[2] = { 'l', 'i' };
+static symbol s_4_4[3] = { 'b', 'l', 'i' };
+static symbol s_4_5[4] = { 'a', 'b', 'l', 'i' };
+static symbol s_4_6[4] = { 'a', 'l', 'l', 'i' };
+static symbol s_4_7[5] = { 'f', 'u', 'l', 'l', 'i' };
+static symbol s_4_8[6] = { 'l', 'e', 's', 's', 'l', 'i' };
+static symbol s_4_9[5] = { 'o', 'u', 's', 'l', 'i' };
+static symbol s_4_10[5] = { 'e', 'n', 't', 'l', 'i' };
+static symbol s_4_11[5] = { 'a', 'l', 'i', 't', 'i' };
+static symbol s_4_12[6] = { 'b', 'i', 'l', 'i', 't', 'i' };
+static symbol s_4_13[5] = { 'i', 'v', 'i', 't', 'i' };
+static symbol s_4_14[6] = { 't', 'i', 'o', 'n', 'a', 'l' };
+static symbol s_4_15[7] = { 'a', 't', 'i', 'o', 'n', 'a', 'l' };
+static symbol s_4_16[5] = { 'a', 'l', 'i', 's', 'm' };
+static symbol s_4_17[5] = { 'a', 't', 'i', 'o', 'n' };
+static symbol s_4_18[7] = { 'i', 'z', 'a', 't', 'i', 'o', 'n' };
+static symbol s_4_19[4] = { 'i', 'z', 'e', 'r' };
+static symbol s_4_20[4] = { 'a', 't', 'o', 'r' };
+static symbol s_4_21[7] = { 'i', 'v', 'e', 'n', 'e', 's', 's' };
+static symbol s_4_22[7] = { 'f', 'u', 'l', 'n', 'e', 's', 's' };
+static symbol s_4_23[7] = { 'o', 'u', 's', 'n', 'e', 's', 's' };
+
+static struct among a_4[24] =
+{
+/*  0 */ { 4, s_4_0, -1, 3, 0},
+/*  1 */ { 4, s_4_1, -1, 2, 0},
+/*  2 */ { 3, s_4_2, -1, 13, 0},
+/*  3 */ { 2, s_4_3, -1, 16, 0},
+/*  4 */ { 3, s_4_4, 3, 12, 0},
+/*  5 */ { 4, s_4_5, 4, 4, 0},
+/*  6 */ { 4, s_4_6, 3, 8, 0},
+/*  7 */ { 5, s_4_7, 3, 14, 0},
+/*  8 */ { 6, s_4_8, 3, 15, 0},
+/*  9 */ { 5, s_4_9, 3, 10, 0},
+/* 10 */ { 5, s_4_10, 3, 5, 0},
+/* 11 */ { 5, s_4_11, -1, 8, 0},
+/* 12 */ { 6, s_4_12, -1, 12, 0},
+/* 13 */ { 5, s_4_13, -1, 11, 0},
+/* 14 */ { 6, s_4_14, -1, 1, 0},
+/* 15 */ { 7, s_4_15, 14, 7, 0},
+/* 16 */ { 5, s_4_16, -1, 8, 0},
+/* 17 */ { 5, s_4_17, -1, 7, 0},
+/* 18 */ { 7, s_4_18, 17, 6, 0},
+/* 19 */ { 4, s_4_19, -1, 6, 0},
+/* 20 */ { 4, s_4_20, -1, 7, 0},
+/* 21 */ { 7, s_4_21, -1, 11, 0},
+/* 22 */ { 7, s_4_22, -1, 9, 0},
+/* 23 */ { 7, s_4_23, -1, 10, 0}
+};
+
+static symbol s_5_0[5] = { 'i', 'c', 'a', 't', 'e' };
+static symbol s_5_1[5] = { 'a', 't', 'i', 'v', 'e' };
+static symbol s_5_2[5] = { 'a', 'l', 'i', 'z', 'e' };
+static symbol s_5_3[5] = { 'i', 'c', 'i', 't', 'i' };
+static symbol s_5_4[4] = { 'i', 'c', 'a', 'l' };
+static symbol s_5_5[6] = { 't', 'i', 'o', 'n', 'a', 'l' };
+static symbol s_5_6[7] = { 'a', 't', 'i', 'o', 'n', 'a', 'l' };
+static symbol s_5_7[3] = { 'f', 'u', 'l' };
+static symbol s_5_8[4] = { 'n', 'e', 's', 's' };
+
+static struct among a_5[9] =
+{
+/*  0 */ { 5, s_5_0, -1, 4, 0},
+/*  1 */ { 5, s_5_1, -1, 6, 0},
+/*  2 */ { 5, s_5_2, -1, 3, 0},
+/*  3 */ { 5, s_5_3, -1, 4, 0},
+/*  4 */ { 4, s_5_4, -1, 4, 0},
+/*  5 */ { 6, s_5_5, -1, 1, 0},
+/*  6 */ { 7, s_5_6, 5, 2, 0},
+/*  7 */ { 3, s_5_7, -1, 5, 0},
+/*  8 */ { 4, s_5_8, -1, 5, 0}
+};
+
+static symbol s_6_0[2] = { 'i', 'c' };
+static symbol s_6_1[4] = { 'a', 'n', 'c', 'e' };
+static symbol s_6_2[4] = { 'e', 'n', 'c', 'e' };
+static symbol s_6_3[4] = { 'a', 'b', 'l', 'e' };
+static symbol s_6_4[4] = { 'i', 'b', 'l', 'e' };
+static symbol s_6_5[3] = { 'a', 't', 'e' };
+static symbol s_6_6[3] = { 'i', 'v', 'e' };
+static symbol s_6_7[3] = { 'i', 'z', 'e' };
+static symbol s_6_8[3] = { 'i', 't', 'i' };
+static symbol s_6_9[2] = { 'a', 'l' };
+static symbol s_6_10[3] = { 'i', 's', 'm' };
+static symbol s_6_11[3] = { 'i', 'o', 'n' };
+static symbol s_6_12[2] = { 'e', 'r' };
+static symbol s_6_13[3] = { 'o', 'u', 's' };
+static symbol s_6_14[3] = { 'a', 'n', 't' };
+static symbol s_6_15[3] = { 'e', 'n', 't' };
+static symbol s_6_16[4] = { 'm', 'e', 'n', 't' };
+static symbol s_6_17[5] = { 'e', 'm', 'e', 'n', 't' };
+
+static struct among a_6[18] =
+{
+/*  0 */ { 2, s_6_0, -1, 1, 0},
+/*  1 */ { 4, s_6_1, -1, 1, 0},
+/*  2 */ { 4, s_6_2, -1, 1, 0},
+/*  3 */ { 4, s_6_3, -1, 1, 0},
+/*  4 */ { 4, s_6_4, -1, 1, 0},
+/*  5 */ { 3, s_6_5, -1, 1, 0},
+/*  6 */ { 3, s_6_6, -1, 1, 0},
+/*  7 */ { 3, s_6_7, -1, 1, 0},
+/*  8 */ { 3, s_6_8, -1, 1, 0},
+/*  9 */ { 2, s_6_9, -1, 1, 0},
+/* 10 */ { 3, s_6_10, -1, 1, 0},
+/* 11 */ { 3, s_6_11, -1, 2, 0},
+/* 12 */ { 2, s_6_12, -1, 1, 0},
+/* 13 */ { 3, s_6_13, -1, 1, 0},
+/* 14 */ { 3, s_6_14, -1, 1, 0},
+/* 15 */ { 3, s_6_15, -1, 1, 0},
+/* 16 */ { 4, s_6_16, 15, 1, 0},
+/* 17 */ { 5, s_6_17, 16, 1, 0}
+};
+
+static symbol s_7_0[1] = { 'e' };
+static symbol s_7_1[1] = { 'l' };
+
+static struct among a_7[2] =
+{
+/*  0 */ { 1, s_7_0, -1, 1, 0},
+/*  1 */ { 1, s_7_1, -1, 2, 0}
+};
+
+static symbol s_8_0[7] = { 's', 'u', 'c', 'c', 'e', 'e', 'd' };
+static symbol s_8_1[7] = { 'p', 'r', 'o', 'c', 'e', 'e', 'd' };
+static symbol s_8_2[6] = { 'e', 'x', 'c', 'e', 'e', 'd' };
+static symbol s_8_3[7] = { 'c', 'a', 'n', 'n', 'i', 'n', 'g' };
+static symbol s_8_4[6] = { 'i', 'n', 'n', 'i', 'n', 'g' };
+static symbol s_8_5[7] = { 'e', 'a', 'r', 'r', 'i', 'n', 'g' };
+static symbol s_8_6[7] = { 'h', 'e', 'r', 'r', 'i', 'n', 'g' };
+static symbol s_8_7[6] = { 'o', 'u', 't', 'i', 'n', 'g' };
+
+static struct among a_8[8] =
+{
+/*  0 */ { 7, s_8_0, -1, -1, 0},
+/*  1 */ { 7, s_8_1, -1, -1, 0},
+/*  2 */ { 6, s_8_2, -1, -1, 0},
+/*  3 */ { 7, s_8_3, -1, -1, 0},
+/*  4 */ { 6, s_8_4, -1, -1, 0},
+/*  5 */ { 7, s_8_5, -1, -1, 0},
+/*  6 */ { 7, s_8_6, -1, -1, 0},
+/*  7 */ { 6, s_8_7, -1, -1, 0}
+};
+
+static symbol s_9_0[5] = { 'a', 'n', 'd', 'e', 's' };
+static symbol s_9_1[5] = { 'a', 't', 'l', 'a', 's' };
+static symbol s_9_2[4] = { 'b', 'i', 'a', 's' };
+static symbol s_9_3[6] = { 'c', 'o', 's', 'm', 'o', 's' };
+static symbol s_9_4[5] = { 'd', 'y', 'i', 'n', 'g' };
+static symbol s_9_5[5] = { 'e', 'a', 'r', 'l', 'y' };
+static symbol s_9_6[6] = { 'g', 'e', 'n', 't', 'l', 'y' };
+static symbol s_9_7[4] = { 'h', 'o', 'w', 'e' };
+static symbol s_9_8[4] = { 'i', 'd', 'l', 'y' };
+static symbol s_9_9[5] = { 'l', 'y', 'i', 'n', 'g' };
+static symbol s_9_10[4] = { 'n', 'e', 'w', 's' };
+static symbol s_9_11[4] = { 'o', 'n', 'l', 'y' };
+static symbol s_9_12[6] = { 's', 'i', 'n', 'g', 'l', 'y' };
+static symbol s_9_13[5] = { 's', 'k', 'i', 'e', 's' };
+static symbol s_9_14[4] = { 's', 'k', 'i', 's' };
+static symbol s_9_15[3] = { 's', 'k', 'y' };
+static symbol s_9_16[5] = { 't', 'y', 'i', 'n', 'g' };
+static symbol s_9_17[4] = { 'u', 'g', 'l', 'y' };
+
+static struct among a_9[18] =
+{
+/*  0 */ { 5, s_9_0, -1, -1, 0},
+/*  1 */ { 5, s_9_1, -1, -1, 0},
+/*  2 */ { 4, s_9_2, -1, -1, 0},
+/*  3 */ { 6, s_9_3, -1, -1, 0},
+/*  4 */ { 5, s_9_4, -1, 3, 0},
+/*  5 */ { 5, s_9_5, -1, 9, 0},
+/*  6 */ { 6, s_9_6, -1, 7, 0},
+/*  7 */ { 4, s_9_7, -1, -1, 0},
+/*  8 */ { 4, s_9_8, -1, 6, 0},
+/*  9 */ { 5, s_9_9, -1, 4, 0},
+/* 10 */ { 4, s_9_10, -1, -1, 0},
+/* 11 */ { 4, s_9_11, -1, 10, 0},
+/* 12 */ { 6, s_9_12, -1, 11, 0},
+/* 13 */ { 5, s_9_13, -1, 2, 0},
+/* 14 */ { 4, s_9_14, -1, 1, 0},
+/* 15 */ { 3, s_9_15, -1, -1, 0},
+/* 16 */ { 5, s_9_16, -1, 5, 0},
+/* 17 */ { 4, s_9_17, -1, 8, 0}
+};
+
+static unsigned char g_v[] = { 17, 65, 16, 1 };
+
+static unsigned char g_v_WXY[] = { 1, 17, 65, 208, 1 };
+
+static unsigned char g_valid_LI[] = { 55, 141, 2 };
+
+static symbol s_0[] = { 'y' };
+static symbol s_1[] = { 'Y' };
+static symbol s_2[] = { 'y' };
+static symbol s_3[] = { 'Y' };
+static symbol s_4[] = { 's', 's' };
+static symbol s_5[] = { 'i', 'e' };
+static symbol s_6[] = { 'i' };
+static symbol s_7[] = { 'e', 'e' };
+static symbol s_8[] = { 'e' };
+static symbol s_9[] = { 'e' };
+static symbol s_10[] = { 'y' };
+static symbol s_11[] = { 'Y' };
+static symbol s_12[] = { 'i' };
+static symbol s_13[] = { 't', 'i', 'o', 'n' };
+static symbol s_14[] = { 'e', 'n', 'c', 'e' };
+static symbol s_15[] = { 'a', 'n', 'c', 'e' };
+static symbol s_16[] = { 'a', 'b', 'l', 'e' };
+static symbol s_17[] = { 'e', 'n', 't' };
+static symbol s_18[] = { 'i', 'z', 'e' };
+static symbol s_19[] = { 'a', 't', 'e' };
+static symbol s_20[] = { 'a', 'l' };
+static symbol s_21[] = { 'f', 'u', 'l' };
+static symbol s_22[] = { 'o', 'u', 's' };
+static symbol s_23[] = { 'i', 'v', 'e' };
+static symbol s_24[] = { 'b', 'l', 'e' };
+static symbol s_25[] = { 'l' };
+static symbol s_26[] = { 'o', 'g' };
+static symbol s_27[] = { 'f', 'u', 'l' };
+static symbol s_28[] = { 'l', 'e', 's', 's' };
+static symbol s_29[] = { 't', 'i', 'o', 'n' };
+static symbol s_30[] = { 'a', 't', 'e' };
+static symbol s_31[] = { 'a', 'l' };
+static symbol s_32[] = { 'i', 'c' };
+static symbol s_33[] = { 's' };
+static symbol s_34[] = { 't' };
+static symbol s_35[] = { 'l' };
+static symbol s_36[] = { 's', 'k', 'i' };
+static symbol s_37[] = { 's', 'k', 'y' };
+static symbol s_38[] = { 'd', 'i', 'e' };
+static symbol s_39[] = { 'l', 'i', 'e' };
+static symbol s_40[] = { 't', 'i', 'e' };
+static symbol s_41[] = { 'i', 'd', 'l' };
+static symbol s_42[] = { 'g', 'e', 'n', 't', 'l' };
+static symbol s_43[] = { 'u', 'g', 'l', 'i' };
+static symbol s_44[] = { 'e', 'a', 'r', 'l', 'i' };
+static symbol s_45[] = { 'o', 'n', 'l', 'i' };
+static symbol s_46[] = { 's', 'i', 'n', 'g', 'l' };
+static symbol s_47[] = { 'Y' };
+static symbol s_48[] = { 'y' };
+
+static int r_prelude(struct SN_env * z) {
+    z->B[0] = 0; /* unset Y_found, line 24 */
+    {   int c = z->c; /* do, line 25 */
+        z->bra = z->c; /* [, line 25 */
+        if (!(eq_s(z, 1, s_0))) goto lab0;
+        z->ket = z->c; /* ], line 25 */
+        if (!(in_grouping(z, g_v, 97, 121))) goto lab0;
+        slice_from_s(z, 1, s_1); /* <-, line 25 */
+        z->B[0] = 1; /* set Y_found, line 25 */
+    lab0:
+        z->c = c;
+    }
+    {   int c = z->c; /* do, line 26 */
+        while(1) { /* repeat, line 26 */
+            int c = z->c;
+            while(1) { /* goto, line 26 */
+                int c = z->c;
+                if (!(in_grouping(z, g_v, 97, 121))) goto lab3;
+                z->bra = z->c; /* [, line 26 */
+                if (!(eq_s(z, 1, s_2))) goto lab3;
+                z->ket = z->c; /* ], line 26 */
+                z->c = c;
+                break;
+            lab3:
+                z->c = c;
+                if (z->c >= z->l) goto lab2;
+                z->c++;
+            }
+            slice_from_s(z, 1, s_3); /* <-, line 26 */
+            z->B[0] = 1; /* set Y_found, line 26 */
+            continue;
+        lab2:
+            z->c = c;
+            break;
+        }
+    lab1:
+        z->c = c;
+    }
+    return 1;
+}
+
+static int r_mark_regions(struct SN_env * z) {
+    z->I[0] = z->l;
+    z->I[1] = z->l;
+    {   int c = z->c; /* do, line 32 */
+        {   int c = z->c; /* or, line 36 */
+            if (!(find_among(z, a_0, 1))) goto lab2; /* among, line 33 */
+            goto lab1;
+        lab2:
+            z->c = c;
+            while(1) { /* gopast, line 36 */
+                if (!(in_grouping(z, g_v, 97, 121))) goto lab3;
+                break;
+            lab3:
+                if (z->c >= z->l) goto lab0;
+                z->c++;
+            }
+            while(1) { /* gopast, line 36 */
+                if (!(out_grouping(z, g_v, 97, 121))) goto lab4;
+                break;
+            lab4:
+                if (z->c >= z->l) goto lab0;
+                z->c++;
+            }
+        }
+    lab1:
+        z->I[0] = z->c; /* setmark p1, line 37 */
+        while(1) { /* gopast, line 38 */
+            if (!(in_grouping(z, g_v, 97, 121))) goto lab5;
+            break;
+        lab5:
+            if (z->c >= z->l) goto lab0;
+            z->c++;
+        }
+        while(1) { /* gopast, line 38 */
+            if (!(out_grouping(z, g_v, 97, 121))) goto lab6;
+            break;
+        lab6:
+            if (z->c >= z->l) goto lab0;
+            z->c++;
+        }
+        z->I[1] = z->c; /* setmark p2, line 38 */
+    lab0:
+        z->c = c;
+    }
+    return 1;
+}
+
+static int r_shortv(struct SN_env * z) {
+    {   int m = z->l - z->c; /* or, line 46 */
+        if (!(out_grouping_b(z, g_v_WXY, 89, 121))) goto lab1;
+        if (!(in_grouping_b(z, g_v, 97, 121))) goto lab1;
+        if (!(out_grouping_b(z, g_v, 97, 121))) goto lab1;
+        goto lab0;
+    lab1:
+        z->c = z->l - m;
+        if (!(out_grouping_b(z, g_v, 97, 121))) return 0;
+        if (!(in_grouping_b(z, g_v, 97, 121))) return 0;
+        if (z->c > z->lb) return 0; /* atlimit, line 47 */
+    }
+lab0:
+    return 1;
+}
+
+static int r_R1(struct SN_env * z) {
+    if (!(z->I[0] <= z->c)) return 0;
+    return 1;
+}
+
+static int r_R2(struct SN_env * z) {
+    if (!(z->I[1] <= z->c)) return 0;
+    return 1;
+}
+
+static int r_Step_1a(struct SN_env * z) {
+    int among_var;
+    z->ket = z->c; /* [, line 54 */
+    among_var = find_among_b(z, a_1, 6); /* substring, line 54 */
+    if (!(among_var)) return 0;
+    z->bra = z->c; /* ], line 54 */
+    switch(among_var) {
+        case 0: return 0;
+        case 1:
+            slice_from_s(z, 2, s_4); /* <-, line 55 */
+            break;
+        case 2:
+            {   int m = z->l - z->c; /* or, line 57 */
+                if (z->c <= z->lb) goto lab1;
+                z->c--; /* next, line 57 */
+                if (z->c > z->lb) goto lab1; /* atlimit, line 57 */
+                slice_from_s(z, 2, s_5); /* <-, line 57 */
+                goto lab0;
+            lab1:
+                z->c = z->l - m;
+                slice_from_s(z, 1, s_6); /* <-, line 57 */
+            }
+        lab0:
+            break;
+        case 3:
+            if (z->c <= z->lb) return 0;
+            z->c--; /* next, line 58 */
+            while(1) { /* gopast, line 58 */
+                if (!(in_grouping_b(z, g_v, 97, 121))) goto lab2;
+                break;
+            lab2:
+                if (z->c <= z->lb) return 0;
+                z->c--;
+            }
+            slice_del(z); /* delete, line 58 */
+            break;
+    }
+    return 1;
+}
+
+static int r_Step_1b(struct SN_env * z) {
+    int among_var;
+    z->ket = z->c; /* [, line 64 */
+    among_var = find_among_b(z, a_3, 6); /* substring, line 64 */
+    if (!(among_var)) return 0;
+    z->bra = z->c; /* ], line 64 */
+    switch(among_var) {
+        case 0: return 0;
+        case 1:
+            if (!r_R1(z)) return 0; /* call R1, line 66 */
+            slice_from_s(z, 2, s_7); /* <-, line 66 */
+            break;
+        case 2:
+            {   int m_test = z->l - z->c; /* test, line 69 */
+                while(1) { /* gopast, line 69 */
+                    if (!(in_grouping_b(z, g_v, 97, 121))) goto lab0;
+                    break;
+                lab0:
+                    if (z->c <= z->lb) return 0;
+                    z->c--;
+                }
+                z->c = z->l - m_test;
+            }
+            slice_del(z); /* delete, line 69 */
+            {   int m_test = z->l - z->c; /* test, line 70 */
+                among_var = find_among_b(z, a_2, 13); /* substring, line 70 */
+                if (!(among_var)) return 0;
+                z->c = z->l - m_test;
+            }
+            switch(among_var) {
+                case 0: return 0;
+                case 1:
+                    {   int c = z->c;
+                        insert_s(z, z->c, z->c, 1, s_8); /* <+, line 72 */
+                        z->c = c;
+                    }
+                    break;
+                case 2:
+                    z->ket = z->c; /* [, line 75 */
+                    if (z->c <= z->lb) return 0;
+                    z->c--; /* next, line 75 */
+                    z->bra = z->c; /* ], line 75 */
+                    slice_del(z); /* delete, line 75 */
+                    break;
+                case 3:
+                    if (z->c != z->I[0]) return 0; /* atmark, line 76 */
+                    {   int m_test = z->l - z->c; /* test, line 76 */
+                        if (!r_shortv(z)) return 0; /* call shortv, line 76 */
+                        z->c = z->l - m_test;
+                    }
+                    {   int c = z->c;
+                        insert_s(z, z->c, z->c, 1, s_9); /* <+, line 76 */
+                        z->c = c;
+                    }
+                    break;
+            }
+            break;
+    }
+    return 1;
+}
+
+static int r_Step_1c(struct SN_env * z) {
+    z->ket = z->c; /* [, line 83 */
+    {   int m = z->l - z->c; /* or, line 83 */
+        if (!(eq_s_b(z, 1, s_10))) goto lab1;
+        goto lab0;
+    lab1:
+        z->c = z->l - m;
+        if (!(eq_s_b(z, 1, s_11))) return 0;
+    }
+lab0:
+    z->bra = z->c; /* ], line 83 */
+    if (!(out_grouping_b(z, g_v, 97, 121))) return 0;
+    {   int m = z->l - z->c; /* not, line 84 */
+        if (z->c > z->lb) goto lab2; /* atlimit, line 84 */
+        return 0;
+    lab2:
+        z->c = z->l - m;
+    }
+    slice_from_s(z, 1, s_12); /* <-, line 85 */
+    return 1;
+}
+
+static int r_Step_2(struct SN_env * z) {
+    int among_var;
+    z->ket = z->c; /* [, line 89 */
+    among_var = find_among_b(z, a_4, 24); /* substring, line 89 */
+    if (!(among_var)) return 0;
+    z->bra = z->c; /* ], line 89 */
+    if (!r_R1(z)) return 0; /* call R1, line 89 */
+    switch(among_var) {
+        case 0: return 0;
+        case 1:
+            slice_from_s(z, 4, s_13); /* <-, line 90 */
+            break;
+        case 2:
+            slice_from_s(z, 4, s_14); /* <-, line 91 */
+            break;
+        case 3:
+            slice_from_s(z, 4, s_15); /* <-, line 92 */
+            break;
+        case 4:
+            slice_from_s(z, 4, s_16); /* <-, line 93 */
+            break;
+        case 5:
+            slice_from_s(z, 3, s_17); /* <-, line 94 */
+            break;
+        case 6:
+            slice_from_s(z, 3, s_18); /* <-, line 96 */
+            break;
+        case 7:
+            slice_from_s(z, 3, s_19); /* <-, line 98 */
+            break;
+        case 8:
+            slice_from_s(z, 2, s_20); /* <-, line 100 */
+            break;
+        case 9:
+            slice_from_s(z, 3, s_21); /* <-, line 101 */
+            break;
+        case 10:
+            slice_from_s(z, 3, s_22); /* <-, line 103 */
+            break;
+        case 11:
+            slice_from_s(z, 3, s_23); /* <-, line 105 */
+            break;
+        case 12:
+            slice_from_s(z, 3, s_24); /* <-, line 107 */
+            break;
+        case 13:
+            if (!(eq_s_b(z, 1, s_25))) return 0;
+            slice_from_s(z, 2, s_26); /* <-, line 108 */
+            break;
+        case 14:
+            slice_from_s(z, 3, s_27); /* <-, line 109 */
+            break;
+        case 15:
+            slice_from_s(z, 4, s_28); /* <-, line 110 */
+            break;
+        case 16:
+            if (!(in_grouping_b(z, g_valid_LI, 99, 116))) return 0;
+            slice_del(z); /* delete, line 111 */
+            break;
+    }
+    return 1;
+}
+
+static int r_Step_3(struct SN_env * z) {
+    int among_var;
+    z->ket = z->c; /* [, line 116 */
+    among_var = find_among_b(z, a_5, 9); /* substring, line 116 */
+    if (!(among_var)) return 0;
+    z->bra = z->c; /* ], line 116 */
+    if (!r_R1(z)) return 0; /* call R1, line 116 */
+    switch(among_var) {
+        case 0: return 0;
+        case 1:
+            slice_from_s(z, 4, s_29); /* <-, line 117 */
+            break;
+        case 2:
+            slice_from_s(z, 3, s_30); /* <-, line 118 */
+            break;
+        case 3:
+            slice_from_s(z, 2, s_31); /* <-, line 119 */
+            break;
+        case 4:
+            slice_from_s(z, 2, s_32); /* <-, line 121 */
+            break;
+        case 5:
+            slice_del(z); /* delete, line 123 */
+            break;
+        case 6:
+            if (!r_R2(z)) return 0; /* call R2, line 125 */
+            slice_del(z); /* delete, line 125 */
+            break;
+    }
+    return 1;
+}
+
+static int r_Step_4(struct SN_env * z) {
+    int among_var;
+    z->ket = z->c; /* [, line 130 */
+    among_var = find_among_b(z, a_6, 18); /* substring, line 130 */
+    if (!(among_var)) return 0;
+    z->bra = z->c; /* ], line 130 */
+    if (!r_R2(z)) return 0; /* call R2, line 130 */
+    switch(among_var) {
+        case 0: return 0;
+        case 1:
+            slice_del(z); /* delete, line 133 */
+            break;
+        case 2:
+            {   int m = z->l - z->c; /* or, line 134 */
+                if (!(eq_s_b(z, 1, s_33))) goto lab1;
+                goto lab0;
+            lab1:
+                z->c = z->l - m;
+                if (!(eq_s_b(z, 1, s_34))) return 0;
+            }
+        lab0:
+            slice_del(z); /* delete, line 134 */
+            break;
+    }
+    return 1;
+}
+
+static int r_Step_5(struct SN_env * z) {
+    int among_var;
+    z->ket = z->c; /* [, line 139 */
+    among_var = find_among_b(z, a_7, 2); /* substring, line 139 */
+    if (!(among_var)) return 0;
+    z->bra = z->c; /* ], line 139 */
+    switch(among_var) {
+        case 0: return 0;
+        case 1:
+            {   int m = z->l - z->c; /* or, line 140 */
+                if (!r_R2(z)) goto lab1; /* call R2, line 140 */
+                goto lab0;
+            lab1:
+                z->c = z->l - m;
+                if (!r_R1(z)) return 0; /* call R1, line 140 */
+                {   int m = z->l - z->c; /* not, line 140 */
+                    if (!r_shortv(z)) goto lab2; /* call shortv, line 140 */
+                    return 0;
+                lab2:
+                    z->c = z->l - m;
+                }
+            }
+        lab0:
+            slice_del(z); /* delete, line 140 */
+            break;
+        case 2:
+            if (!r_R2(z)) return 0; /* call R2, line 141 */
+            if (!(eq_s_b(z, 1, s_35))) return 0;
+            slice_del(z); /* delete, line 141 */
+            break;
+    }
+    return 1;
+}
+
+static int r_exception2(struct SN_env * z) {
+    z->ket = z->c; /* [, line 147 */
+    if (!(find_among_b(z, a_8, 8))) return 0; /* substring, line 147 */
+    z->bra = z->c; /* ], line 147 */
+    if (z->c > z->lb) return 0; /* atlimit, line 147 */
+    return 1;
+}
+
+static int r_exception1(struct SN_env * z) {
+    int among_var;
+    z->bra = z->c; /* [, line 159 */
+    among_var = find_among(z, a_9, 18); /* substring, line 159 */
+    if (!(among_var)) return 0;
+    z->ket = z->c; /* ], line 159 */
+    if (z->c < z->l) return 0; /* atlimit, line 159 */
+    switch(among_var) {
+        case 0: return 0;
+        case 1:
+            slice_from_s(z, 3, s_36); /* <-, line 163 */
+            break;
+        case 2:
+            slice_from_s(z, 3, s_37); /* <-, line 164 */
+            break;
+        case 3:
+            slice_from_s(z, 3, s_38); /* <-, line 165 */
+            break;
+        case 4:
+            slice_from_s(z, 3, s_39); /* <-, line 166 */
+            break;
+        case 5:
+            slice_from_s(z, 3, s_40); /* <-, line 167 */
+            break;
+        case 6:
+            slice_from_s(z, 3, s_41); /* <-, line 171 */
+            break;
+        case 7:
+            slice_from_s(z, 5, s_42); /* <-, line 172 */
+            break;
+        case 8:
+            slice_from_s(z, 4, s_43); /* <-, line 173 */
+            break;
+        case 9:
+            slice_from_s(z, 5, s_44); /* <-, line 174 */
+            break;
+        case 10:
+            slice_from_s(z, 4, s_45); /* <-, line 175 */
+            break;
+        case 11:
+            slice_from_s(z, 5, s_46); /* <-, line 176 */
+            break;
+    }
+    return 1;
+}
+
+static int r_postlude(struct SN_env * z) {
+    if (!(z->B[0])) return 0; /* Boolean test Y_found, line 192 */
+    while(1) { /* repeat, line 192 */
+        int c = z->c;
+        while(1) { /* goto, line 192 */
+            int c = z->c;
+            z->bra = z->c; /* [, line 192 */
+            if (!(eq_s(z, 1, s_47))) goto lab1;
+            z->ket = z->c; /* ], line 192 */
+            z->c = c;
+            break;
+        lab1:
+            z->c = c;
+            if (z->c >= z->l) goto lab0;
+            z->c++;
+        }
+        slice_from_s(z, 1, s_48); /* <-, line 192 */
+        continue;
+    lab0:
+        z->c = c;
+        break;
+    }
+    return 1;
+}
+
+extern int english_stem(struct SN_env * z) {
+    {   int c = z->c; /* or, line 196 */
+        if (!r_exception1(z)) goto lab1; /* call exception1, line 196 */
+        goto lab0;
+    lab1:
+        z->c = c;
+        {   int c_test = z->c; /* test, line 198 */
+            {   int c = z->c + 3;
+                if (0 > c || c > z->l) return 0;
+                z->c = c; /* hop, line 198 */
+            }
+            z->c = c_test;
+        }
+        {   int c = z->c; /* do, line 199 */
+            if (!r_prelude(z)) goto lab2; /* call prelude, line 199 */
+        lab2:
+            z->c = c;
+        }
+        {   int c = z->c; /* do, line 200 */
+            if (!r_mark_regions(z)) goto lab3; /* call mark_regions, line 200 */
+        lab3:
+            z->c = c;
+        }
+        z->lb = z->c; z->c = z->l; /* backwards, line 201 */
+
+        {   int m = z->l - z->c; /* do, line 203 */
+            if (!r_Step_1a(z)) goto lab4; /* call Step_1a, line 203 */
+        lab4:
+            z->c = z->l - m;
+        }
+        {   int m = z->l - z->c; /* or, line 205 */
+            if (!r_exception2(z)) goto lab6; /* call exception2, line 205 */
+            goto lab5;
+        lab6:
+            z->c = z->l - m;
+            {   int m = z->l - z->c; /* do, line 207 */
+                if (!r_Step_1b(z)) goto lab7; /* call Step_1b, line 207 */
+            lab7:
+                z->c = z->l - m;
+            }
+            {   int m = z->l - z->c; /* do, line 208 */
+                if (!r_Step_1c(z)) goto lab8; /* call Step_1c, line 208 */
+            lab8:
+                z->c = z->l - m;
+            }
+            {   int m = z->l - z->c; /* do, line 210 */
+                if (!r_Step_2(z)) goto lab9; /* call Step_2, line 210 */
+            lab9:
+                z->c = z->l - m;
+            }
+            {   int m = z->l - z->c; /* do, line 211 */
+                if (!r_Step_3(z)) goto lab10; /* call Step_3, line 211 */
+            lab10:
+                z->c = z->l - m;
+            }
+            {   int m = z->l - z->c; /* do, line 212 */
+                if (!r_Step_4(z)) goto lab11; /* call Step_4, line 212 */
+            lab11:
+                z->c = z->l - m;
+            }
+            {   int m = z->l - z->c; /* do, line 214 */
+                if (!r_Step_5(z)) goto lab12; /* call Step_5, line 214 */
+            lab12:
+                z->c = z->l - m;
+            }
+        }
+    lab5:
+        z->c = z->lb;
+        {   int c = z->c; /* do, line 217 */
+            if (!r_postlude(z)) goto lab13; /* call postlude, line 217 */
+        lab13:
+            z->c = c;
+        }
+    }
+lab0:
+    return 1;
+}
+
+extern struct SN_env * english_create_env(void) { return SN_create_env(0, 2, 1); }
+
+extern void english_close_env(struct SN_env * z) { SN_close_env(z); }
+


diff --git a/contrib/tsearch2/snowball/english_stem.h b/contrib/tsearch2/snowball/english_stem.h

new file mode 100644 (file)

index 0000000..bfefcd5


--- /dev/null
+++ b/contrib/tsearch2/snowball/english_stem.h
@@ -0,0 +1,8 @@
+
+/* This file was generated automatically by the Snowball to ANSI C compiler */
+
+extern struct SN_env * english_create_env(void);
+extern void english_close_env(struct SN_env * z);
+
+extern int english_stem(struct SN_env * z);
+


diff --git a/contrib/tsearch2/snowball/header.h b/contrib/tsearch2/snowball/header.h

new file mode 100644 (file)

index 0000000..aaec3ae


--- /dev/null
+++ b/contrib/tsearch2/snowball/header.h
@@ -0,0 +1,57 @@
+
+#include 
+
+#include "api.h"
+
+#define MAXINT INT_MAX
+#define MININT INT_MIN
+
+#define HEAD 2*sizeof(int)
+
+#define SIZE(p)        ((int *)(p))[-1]
+#define SET_SIZE(p, n) ((int *)(p))[-1] = n
+#define CAPACITY(p)    ((int *)(p))[-2]
+
+struct among
+{   int s_size;     /* number of chars in string */
+    symbol * s;       /* search string */
+    int substring_i;/* index to longest matching substring */
+    int result;     /* result of the lookup */
+    int (* function)(struct SN_env *);
+};
+
+extern symbol * create_s(void);
+extern void lose_s(symbol * p);
+
+extern int in_grouping(struct SN_env * z, unsigned char * s, int min, int max);
+extern int in_grouping_b(struct SN_env * z, unsigned char * s, int min, int max);
+extern int out_grouping(struct SN_env * z, unsigned char * s, int min, int max);
+extern int out_grouping_b(struct SN_env * z, unsigned char * s, int min, int max);
+
+extern int in_range(struct SN_env * z, int min, int max);
+extern int in_range_b(struct SN_env * z, int min, int max);
+extern int out_range(struct SN_env * z, int min, int max);
+extern int out_range_b(struct SN_env * z, int min, int max);
+
+extern int eq_s(struct SN_env * z, int s_size, symbol * s);
+extern int eq_s_b(struct SN_env * z, int s_size, symbol * s);
+extern int eq_v(struct SN_env * z, symbol * p);
+extern int eq_v_b(struct SN_env * z, symbol * p);
+
+extern int find_among(struct SN_env * z, struct among * v, int v_size);
+extern int find_among_b(struct SN_env * z, struct among * v, int v_size);
+
+extern symbol * increase_size(symbol * p, int n);
+extern int replace_s(struct SN_env * z, int c_bra, int c_ket, int s_size, const symbol * s);
+extern void slice_from_s(struct SN_env * z, int s_size, symbol * s);
+extern void slice_from_v(struct SN_env * z, symbol * p);
+extern void slice_del(struct SN_env * z);
+
+extern void insert_s(struct SN_env * z, int bra, int ket, int s_size, symbol * s);
+extern void insert_v(struct SN_env * z, int bra, int ket, symbol * p);
+
+extern symbol * slice_to(struct SN_env * z, symbol * p);
+extern symbol * assign_to(struct SN_env * z, symbol * p);
+
+extern void debug(struct SN_env * z, int number, int line_count);
+


diff --git a/contrib/tsearch2/snowball/russian_stem.c b/contrib/tsearch2/snowball/russian_stem.c

new file mode 100644 (file)

index 0000000..14fd491


--- /dev/null
+++ b/contrib/tsearch2/snowball/russian_stem.c
@@ -0,0 +1,626 @@
+
+/* This file was generated automatically by the Snowball to ANSI C compiler */
+
+#include "header.h"
+
+extern int russian_stem(struct SN_env * z);
+static int r_tidy_up(struct SN_env * z);
+static int r_derivational(struct SN_env * z);
+static int r_noun(struct SN_env * z);
+static int r_verb(struct SN_env * z);
+static int r_reflexive(struct SN_env * z);
+static int r_adjectival(struct SN_env * z);
+static int r_adjective(struct SN_env * z);
+static int r_perfective_gerund(struct SN_env * z);
+static int r_R2(struct SN_env * z);
+static int r_mark_regions(struct SN_env * z);
+
+extern struct SN_env * russian_create_env(void);
+extern void russian_close_env(struct SN_env * z);
+
+static symbol s_0_0[3] = { 215, 219, 201 };
+static symbol s_0_1[4] = { 201, 215, 219, 201 };
+static symbol s_0_2[4] = { 217, 215, 219, 201 };
+static symbol s_0_3[1] = { 215 };
+static symbol s_0_4[2] = { 201, 215 };
+static symbol s_0_5[2] = { 217, 215 };
+static symbol s_0_6[5] = { 215, 219, 201, 211, 216 };
+static symbol s_0_7[6] = { 201, 215, 219, 201, 211, 216 };
+static symbol s_0_8[6] = { 217, 215, 219, 201, 211, 216 };
+
+static struct among a_0[9] =
+{
+/*  0 */ { 3, s_0_0, -1, 1, 0},
+/*  1 */ { 4, s_0_1, 0, 2, 0},
+/*  2 */ { 4, s_0_2, 0, 2, 0},
+/*  3 */ { 1, s_0_3, -1, 1, 0},
+/*  4 */ { 2, s_0_4, 3, 2, 0},
+/*  5 */ { 2, s_0_5, 3, 2, 0},
+/*  6 */ { 5, s_0_6, -1, 1, 0},
+/*  7 */ { 6, s_0_7, 6, 2, 0},
+/*  8 */ { 6, s_0_8, 6, 2, 0}
+};
+
+static symbol s_1_0[2] = { 192, 192 };
+static symbol s_1_1[2] = { 197, 192 };
+static symbol s_1_2[2] = { 207, 192 };
+static symbol s_1_3[2] = { 213, 192 };
+static symbol s_1_4[2] = { 197, 197 };
+static symbol s_1_5[2] = { 201, 197 };
+static symbol s_1_6[2] = { 207, 197 };
+static symbol s_1_7[2] = { 217, 197 };
+static symbol s_1_8[2] = { 201, 200 };
+static symbol s_1_9[2] = { 217, 200 };
+static symbol s_1_10[3] = { 201, 205, 201 };
+static symbol s_1_11[3] = { 217, 205, 201 };
+static symbol s_1_12[2] = { 197, 202 };
+static symbol s_1_13[2] = { 201, 202 };
+static symbol s_1_14[2] = { 207, 202 };
+static symbol s_1_15[2] = { 217, 202 };
+static symbol s_1_16[2] = { 197, 205 };
+static symbol s_1_17[2] = { 201, 205 };
+static symbol s_1_18[2] = { 207, 205 };
+static symbol s_1_19[2] = { 217, 205 };
+static symbol s_1_20[3] = { 197, 199, 207 };
+static symbol s_1_21[3] = { 207, 199, 207 };
+static symbol s_1_22[2] = { 193, 209 };
+static symbol s_1_23[2] = { 209, 209 };
+static symbol s_1_24[3] = { 197, 205, 213 };
+static symbol s_1_25[3] = { 207, 205, 213 };
+
+static struct among a_1[26] =
+{
+/*  0 */ { 2, s_1_0, -1, 1, 0},
+/*  1 */ { 2, s_1_1, -1, 1, 0},
+/*  2 */ { 2, s_1_2, -1, 1, 0},
+/*  3 */ { 2, s_1_3, -1, 1, 0},
+/*  4 */ { 2, s_1_4, -1, 1, 0},
+/*  5 */ { 2, s_1_5, -1, 1, 0},
+/*  6 */ { 2, s_1_6, -1, 1, 0},
+/*  7 */ { 2, s_1_7, -1, 1, 0},
+/*  8 */ { 2, s_1_8, -1, 1, 0},
+/*  9 */ { 2, s_1_9, -1, 1, 0},
+/* 10 */ { 3, s_1_10, -1, 1, 0},
+/* 11 */ { 3, s_1_11, -1, 1, 0},
+/* 12 */ { 2, s_1_12, -1, 1, 0},
+/* 13 */ { 2, s_1_13, -1, 1, 0},
+/* 14 */ { 2, s_1_14, -1, 1, 0},
+/* 15 */ { 2, s_1_15, -1, 1, 0},
+/* 16 */ { 2, s_1_16, -1, 1, 0},
+/* 17 */ { 2, s_1_17, -1, 1, 0},
+/* 18 */ { 2, s_1_18, -1, 1, 0},
+/* 19 */ { 2, s_1_19, -1, 1, 0},
+/* 20 */ { 3, s_1_20, -1, 1, 0},
+/* 21 */ { 3, s_1_21, -1, 1, 0},
+/* 22 */ { 2, s_1_22, -1, 1, 0},
+/* 23 */ { 2, s_1_23, -1, 1, 0},
+/* 24 */ { 3, s_1_24, -1, 1, 0},
+/* 25 */ { 3, s_1_25, -1, 1, 0}
+};
+
+static symbol s_2_0[2] = { 197, 205 };
+static symbol s_2_1[2] = { 206, 206 };
+static symbol s_2_2[2] = { 215, 219 };
+static symbol s_2_3[3] = { 201, 215, 219 };
+static symbol s_2_4[3] = { 217, 215, 219 };
+static symbol s_2_5[1] = { 221 };
+static symbol s_2_6[2] = { 192, 221 };
+static symbol s_2_7[3] = { 213, 192, 221 };
+
+static struct among a_2[8] =
+{
+/*  0 */ { 2, s_2_0, -1, 1, 0},
+/*  1 */ { 2, s_2_1, -1, 1, 0},
+/*  2 */ { 2, s_2_2, -1, 1, 0},
+/*  3 */ { 3, s_2_3, 2, 2, 0},
+/*  4 */ { 3, s_2_4, 2, 2, 0},
+/*  5 */ { 1, s_2_5, -1, 1, 0},
+/*  6 */ { 2, s_2_6, 5, 1, 0},
+/*  7 */ { 3, s_2_7, 6, 2, 0}
+};
+
+static symbol s_3_0[2] = { 211, 209 };
+static symbol s_3_1[2] = { 211, 216 };
+
+static struct among a_3[2] =
+{
+/*  0 */ { 2, s_3_0, -1, 1, 0},
+/*  1 */ { 2, s_3_1, -1, 1, 0}
+};
+
+static symbol s_4_0[1] = { 192 };
+static symbol s_4_1[2] = { 213, 192 };
+static symbol s_4_2[2] = { 204, 193 };
+static symbol s_4_3[3] = { 201, 204, 193 };
+static symbol s_4_4[3] = { 217, 204, 193 };
+static symbol s_4_5[2] = { 206, 193 };
+static symbol s_4_6[3] = { 197, 206, 193 };
+static symbol s_4_7[3] = { 197, 212, 197 };
+static symbol s_4_8[3] = { 201, 212, 197 };
+static symbol s_4_9[3] = { 202, 212, 197 };
+static symbol s_4_10[4] = { 197, 202, 212, 197 };
+static symbol s_4_11[4] = { 213, 202, 212, 197 };
+static symbol s_4_12[2] = { 204, 201 };
+static symbol s_4_13[3] = { 201, 204, 201 };
+static symbol s_4_14[3] = { 217, 204, 201 };
+static symbol s_4_15[1] = { 202 };
+static symbol s_4_16[2] = { 197, 202 };
+static symbol s_4_17[2] = { 213, 202 };
+static symbol s_4_18[1] = { 204 };
+static symbol s_4_19[2] = { 201, 204 };
+static symbol s_4_20[2] = { 217, 204 };
+static symbol s_4_21[2] = { 197, 205 };
+static symbol s_4_22[2] = { 201, 205 };
+static symbol s_4_23[2] = { 217, 205 };
+static symbol s_4_24[1] = { 206 };
+static symbol s_4_25[2] = { 197, 206 };
+static symbol s_4_26[2] = { 204, 207 };
+static symbol s_4_27[3] = { 201, 204, 207 };
+static symbol s_4_28[3] = { 217, 204, 207 };
+static symbol s_4_29[2] = { 206, 207 };
+static symbol s_4_30[3] = { 197, 206, 207 };
+static symbol s_4_31[3] = { 206, 206, 207 };
+static symbol s_4_32[2] = { 192, 212 };
+static symbol s_4_33[3] = { 213, 192, 212 };
+static symbol s_4_34[2] = { 197, 212 };
+static symbol s_4_35[3] = { 213, 197, 212 };
+static symbol s_4_36[2] = { 201, 212 };
+static symbol s_4_37[2] = { 209, 212 };
+static symbol s_4_38[2] = { 217, 212 };
+static symbol s_4_39[2] = { 212, 216 };
+static symbol s_4_40[3] = { 201, 212, 216 };
+static symbol s_4_41[3] = { 217, 212, 216 };
+static symbol s_4_42[3] = { 197, 219, 216 };
+static symbol s_4_43[3] = { 201, 219, 216 };
+static symbol s_4_44[2] = { 206, 217 };
+static symbol s_4_45[3] = { 197, 206, 217 };
+
+static struct among a_4[46] =
+{
+/*  0 */ { 1, s_4_0, -1, 2, 0},
+/*  1 */ { 2, s_4_1, 0, 2, 0},
+/*  2 */ { 2, s_4_2, -1, 1, 0},
+/*  3 */ { 3, s_4_3, 2, 2, 0},
+/*  4 */ { 3, s_4_4, 2, 2, 0},
+/*  5 */ { 2, s_4_5, -1, 1, 0},
+/*  6 */ { 3, s_4_6, 5, 2, 0},
+/*  7 */ { 3, s_4_7, -1, 1, 0},
+/*  8 */ { 3, s_4_8, -1, 2, 0},
+/*  9 */ { 3, s_4_9, -1, 1, 0},
+/* 10 */ { 4, s_4_10, 9, 2, 0},
+/* 11 */ { 4, s_4_11, 9, 2, 0},
+/* 12 */ { 2, s_4_12, -1, 1, 0},
+/* 13 */ { 3, s_4_13, 12, 2, 0},
+/* 14 */ { 3, s_4_14, 12, 2, 0},
+/* 15 */ { 1, s_4_15, -1, 1, 0},
+/* 16 */ { 2, s_4_16, 15, 2, 0},
+/* 17 */ { 2, s_4_17, 15, 2, 0},
+/* 18 */ { 1, s_4_18, -1, 1, 0},
+/* 19 */ { 2, s_4_19, 18, 2, 0},
+/* 20 */ { 2, s_4_20, 18, 2, 0},
+/* 21 */ { 2, s_4_21, -1, 1, 0},
+/* 22 */ { 2, s_4_22, -1, 2, 0},
+/* 23 */ { 2, s_4_23, -1, 2, 0},
+/* 24 */ { 1, s_4_24, -1, 1, 0},
+/* 25 */ { 2, s_4_25, 24, 2, 0},
+/* 26 */ { 2, s_4_26, -1, 1, 0},
+/* 27 */ { 3, s_4_27, 26, 2, 0},
+/* 28 */ { 3, s_4_28, 26, 2, 0},
+/* 29 */ { 2, s_4_29, -1, 1, 0},
+/* 30 */ { 3, s_4_30, 29, 2, 0},
+/* 31 */ { 3, s_4_31, 29, 1, 0},
+/* 32 */ { 2, s_4_32, -1, 1, 0},
+/* 33 */ { 3, s_4_33, 32, 2, 0},
+/* 34 */ { 2, s_4_34, -1, 1, 0},
+/* 35 */ { 3, s_4_35, 34, 2, 0},
+/* 36 */ { 2, s_4_36, -1, 2, 0},
+/* 37 */ { 2, s_4_37, -1, 2, 0},
+/* 38 */ { 2, s_4_38, -1, 2, 0},
+/* 39 */ { 2, s_4_39, -1, 1, 0},
+/* 40 */ { 3, s_4_40, 39, 2, 0},
+/* 41 */ { 3, s_4_41, 39, 2, 0},
+/* 42 */ { 3, s_4_42, -1, 1, 0},
+/* 43 */ { 3, s_4_43, -1, 2, 0},
+/* 44 */ { 2, s_4_44, -1, 1, 0},
+/* 45 */ { 3, s_4_45, 44, 2, 0}
+};
+
+static symbol s_5_0[1] = { 192 };
+static symbol s_5_1[2] = { 201, 192 };
+static symbol s_5_2[2] = { 216, 192 };
+static symbol s_5_3[1] = { 193 };
+static symbol s_5_4[1] = { 197 };
+static symbol s_5_5[2] = { 201, 197 };
+static symbol s_5_6[2] = { 216, 197 };
+static symbol s_5_7[2] = { 193, 200 };
+static symbol s_5_8[2] = { 209, 200 };
+static symbol s_5_9[3] = { 201, 209, 200 };
+static symbol s_5_10[1] = { 201 };
+static symbol s_5_11[2] = { 197, 201 };
+static symbol s_5_12[2] = { 201, 201 };
+static symbol s_5_13[3] = { 193, 205, 201 };
+static symbol s_5_14[3] = { 209, 205, 201 };
+static symbol s_5_15[4] = { 201, 209, 205, 201 };
+static symbol s_5_16[1] = { 202 };
+static symbol s_5_17[2] = { 197, 202 };
+static symbol s_5_18[3] = { 201, 197, 202 };
+static symbol s_5_19[2] = { 201, 202 };
+static symbol s_5_20[2] = { 207, 202 };
+static symbol s_5_21[2] = { 193, 205 };
+static symbol s_5_22[2] = { 197, 205 };
+static symbol s_5_23[3] = { 201, 197, 205 };
+static symbol s_5_24[2] = { 207, 205 };
+static symbol s_5_25[2] = { 209, 205 };
+static symbol s_5_26[3] = { 201, 209, 205 };
+static symbol s_5_27[1] = { 207 };
+static symbol s_5_28[1] = { 209 };
+static symbol s_5_29[2] = { 201, 209 };
+static symbol s_5_30[2] = { 216, 209 };
+static symbol s_5_31[1] = { 213 };
+static symbol s_5_32[2] = { 197, 215 };
+static symbol s_5_33[2] = { 207, 215 };
+static symbol s_5_34[1] = { 216 };
+static symbol s_5_35[1] = { 217 };
+
+static struct among a_5[36] =
+{
+/*  0 */ { 1, s_5_0, -1, 1, 0},
+/*  1 */ { 2, s_5_1, 0, 1, 0},
+/*  2 */ { 2, s_5_2, 0, 1, 0},
+/*  3 */ { 1, s_5_3, -1, 1, 0},
+/*  4 */ { 1, s_5_4, -1, 1, 0},
+/*  5 */ { 2, s_5_5, 4, 1, 0},
+/*  6 */ { 2, s_5_6, 4, 1, 0},
+/*  7 */ { 2, s_5_7, -1, 1, 0},
+/*  8 */ { 2, s_5_8, -1, 1, 0},
+/*  9 */ { 3, s_5_9, 8, 1, 0},
+/* 10 */ { 1, s_5_10, -1, 1, 0},
+/* 11 */ { 2, s_5_11, 10, 1, 0},
+/* 12 */ { 2, s_5_12, 10, 1, 0},
+/* 13 */ { 3, s_5_13, 10, 1, 0},
+/* 14 */ { 3, s_5_14, 10, 1, 0},
+/* 15 */ { 4, s_5_15, 14, 1, 0},
+/* 16 */ { 1, s_5_16, -1, 1, 0},
+/* 17 */ { 2, s_5_17, 16, 1, 0},
+/* 18 */ { 3, s_5_18, 17, 1, 0},
+/* 19 */ { 2, s_5_19, 16, 1, 0},
+/* 20 */ { 2, s_5_20, 16, 1, 0},
+/* 21 */ { 2, s_5_21, -1, 1, 0},
+/* 22 */ { 2, s_5_22, -1, 1, 0},
+/* 23 */ { 3, s_5_23, 22, 1, 0},
+/* 24 */ { 2, s_5_24, -1, 1, 0},
+/* 25 */ { 2, s_5_25, -1, 1, 0},
+/* 26 */ { 3, s_5_26, 25, 1, 0},
+/* 27 */ { 1, s_5_27, -1, 1, 0},
+/* 28 */ { 1, s_5_28, -1, 1, 0},
+/* 29 */ { 2, s_5_29, 28, 1, 0},
+/* 30 */ { 2, s_5_30, 28, 1, 0},
+/* 31 */ { 1, s_5_31, -1, 1, 0},
+/* 32 */ { 2, s_5_32, -1, 1, 0},
+/* 33 */ { 2, s_5_33, -1, 1, 0},
+/* 34 */ { 1, s_5_34, -1, 1, 0},
+/* 35 */ { 1, s_5_35, -1, 1, 0}
+};
+
+static symbol s_6_0[3] = { 207, 211, 212 };
+static symbol s_6_1[4] = { 207, 211, 212, 216 };
+
+static struct among a_6[2] =
+{
+/*  0 */ { 3, s_6_0, -1, 1, 0},
+/*  1 */ { 4, s_6_1, -1, 1, 0}
+};
+
+static symbol s_7_0[4] = { 197, 202, 219, 197 };
+static symbol s_7_1[1] = { 206 };
+static symbol s_7_2[1] = { 216 };
+static symbol s_7_3[3] = { 197, 202, 219 };
+
+static struct among a_7[4] =
+{
+/*  0 */ { 4, s_7_0, -1, 1, 0},
+/*  1 */ { 1, s_7_1, -1, 2, 0},
+/*  2 */ { 1, s_7_2, -1, 3, 0},
+/*  3 */ { 3, s_7_3, -1, 1, 0}
+};
+
+static unsigned char g_v[] = { 35, 130, 34, 18 };
+
+static symbol s_0[] = { 193 };
+static symbol s_1[] = { 209 };
+static symbol s_2[] = { 193 };
+static symbol s_3[] = { 209 };
+static symbol s_4[] = { 193 };
+static symbol s_5[] = { 209 };
+static symbol s_6[] = { 206 };
+static symbol s_7[] = { 206 };
+static symbol s_8[] = { 206 };
+static symbol s_9[] = { 201 };
+
+static int r_mark_regions(struct SN_env * z) {
+    z->I[0] = z->l;
+    z->I[1] = z->l;
+    {   int c = z->c; /* do, line 100 */
+        while(1) { /* gopast, line 101 */
+            if (!(in_grouping(z, g_v, 192, 220))) goto lab1;
+            break;
+        lab1:
+            if (z->c >= z->l) goto lab0;
+            z->c++;
+        }
+        z->I[0] = z->c; /* setmark pV, line 101 */
+        while(1) { /* gopast, line 101 */
+            if (!(out_grouping(z, g_v, 192, 220))) goto lab2;
+            break;
+        lab2:
+            if (z->c >= z->l) goto lab0;
+            z->c++;
+        }
+        while(1) { /* gopast, line 102 */
+            if (!(in_grouping(z, g_v, 192, 220))) goto lab3;
+            break;
+        lab3:
+            if (z->c >= z->l) goto lab0;
+            z->c++;
+        }
+        while(1) { /* gopast, line 102 */
+            if (!(out_grouping(z, g_v, 192, 220))) goto lab4;
+            break;
+        lab4:
+            if (z->c >= z->l) goto lab0;
+            z->c++;
+        }
+        z->I[1] = z->c; /* setmark p2, line 102 */
+    lab0:
+        z->c = c;
+    }
+    return 1;
+}
+
+static int r_R2(struct SN_env * z) {
+    if (!(z->I[1] <= z->c)) return 0;
+    return 1;
+}
+
+static int r_perfective_gerund(struct SN_env * z) {
+    int among_var;
+    z->ket = z->c; /* [, line 111 */
+    among_var = find_among_b(z, a_0, 9); /* substring, line 111 */
+    if (!(among_var)) return 0;
+    z->bra = z->c; /* ], line 111 */
+    switch(among_var) {
+        case 0: return 0;
+        case 1:
+            {   int m = z->l - z->c; /* or, line 115 */
+                if (!(eq_s_b(z, 1, s_0))) goto lab1;
+                goto lab0;
+            lab1:
+                z->c = z->l - m;
+                if (!(eq_s_b(z, 1, s_1))) return 0;
+            }
+        lab0:
+            slice_del(z); /* delete, line 115 */
+            break;
+        case 2:
+            slice_del(z); /* delete, line 122 */
+            break;
+    }
+    return 1;
+}
+
+static int r_adjective(struct SN_env * z) {
+    int among_var;
+    z->ket = z->c; /* [, line 127 */
+    among_var = find_among_b(z, a_1, 26); /* substring, line 127 */
+    if (!(among_var)) return 0;
+    z->bra = z->c; /* ], line 127 */
+    switch(among_var) {
+        case 0: return 0;
+        case 1:
+            slice_del(z); /* delete, line 136 */
+            break;
+    }
+    return 1;
+}
+
+static int r_adjectival(struct SN_env * z) {
+    int among_var;
+    if (!r_adjective(z)) return 0; /* call adjective, line 141 */
+    {   int m = z->l - z->c; /* try, line 148 */
+        z->ket = z->c; /* [, line 149 */
+        among_var = find_among_b(z, a_2, 8); /* substring, line 149 */
+        if (!(among_var)) { z->c = z->l - m; goto lab0; }
+        z->bra = z->c; /* ], line 149 */
+        switch(among_var) {
+            case 0: { z->c = z->l - m; goto lab0; }
+            case 1:
+                {   int m = z->l - z->c; /* or, line 154 */
+                    if (!(eq_s_b(z, 1, s_2))) goto lab2;
+                    goto lab1;
+                lab2:
+                    z->c = z->l - m;
+                    if (!(eq_s_b(z, 1, s_3))) { z->c = z->l - m; goto lab0; }
+                }
+            lab1:
+                slice_del(z); /* delete, line 154 */
+                break;
+            case 2:
+                slice_del(z); /* delete, line 161 */
+                break;
+        }
+    lab0:
+        ;
+    }
+    return 1;
+}
+
+static int r_reflexive(struct SN_env * z) {
+    int among_var;
+    z->ket = z->c; /* [, line 168 */
+    among_var = find_among_b(z, a_3, 2); /* substring, line 168 */
+    if (!(among_var)) return 0;
+    z->bra = z->c; /* ], line 168 */
+    switch(among_var) {
+        case 0: return 0;
+        case 1:
+            slice_del(z); /* delete, line 171 */
+            break;
+    }
+    return 1;
+}
+
+static int r_verb(struct SN_env * z) {
+    int among_var;
+    z->ket = z->c; /* [, line 176 */
+    among_var = find_among_b(z, a_4, 46); /* substring, line 176 */
+    if (!(among_var)) return 0;
+    z->bra = z->c; /* ], line 176 */
+    switch(among_var) {
+        case 0: return 0;
+        case 1:
+            {   int m = z->l - z->c; /* or, line 182 */
+                if (!(eq_s_b(z, 1, s_4))) goto lab1;
+                goto lab0;
+            lab1:
+                z->c = z->l - m;
+                if (!(eq_s_b(z, 1, s_5))) return 0;
+            }
+        lab0:
+            slice_del(z); /* delete, line 182 */
+            break;
+        case 2:
+            slice_del(z); /* delete, line 190 */
+            break;
+    }
+    return 1;
+}
+
+static int r_noun(struct SN_env * z) {
+    int among_var;
+    z->ket = z->c; /* [, line 199 */
+    among_var = find_among_b(z, a_5, 36); /* substring, line 199 */
+    if (!(among_var)) return 0;
+    z->bra = z->c; /* ], line 199 */
+    switch(among_var) {
+        case 0: return 0;
+        case 1:
+            slice_del(z); /* delete, line 206 */
+            break;
+    }
+    return 1;
+}
+
+static int r_derivational(struct SN_env * z) {
+    int among_var;
+    z->ket = z->c; /* [, line 215 */
+    among_var = find_among_b(z, a_6, 2); /* substring, line 215 */
+    if (!(among_var)) return 0;
+    z->bra = z->c; /* ], line 215 */
+    if (!r_R2(z)) return 0; /* call R2, line 215 */
+    switch(among_var) {
+        case 0: return 0;
+        case 1:
+            slice_del(z); /* delete, line 218 */
+            break;
+    }
+    return 1;
+}
+
+static int r_tidy_up(struct SN_env * z) {
+    int among_var;
+    z->ket = z->c; /* [, line 223 */
+    among_var = find_among_b(z, a_7, 4); /* substring, line 223 */
+    if (!(among_var)) return 0;
+    z->bra = z->c; /* ], line 223 */
+    switch(among_var) {
+        case 0: return 0;
+        case 1:
+            slice_del(z); /* delete, line 227 */
+            z->ket = z->c; /* [, line 228 */
+            if (!(eq_s_b(z, 1, s_6))) return 0;
+            z->bra = z->c; /* ], line 228 */
+            if (!(eq_s_b(z, 1, s_7))) return 0;
+            slice_del(z); /* delete, line 228 */
+            break;
+        case 2:
+            if (!(eq_s_b(z, 1, s_8))) return 0;
+            slice_del(z); /* delete, line 231 */
+            break;
+        case 3:
+            slice_del(z); /* delete, line 233 */
+            break;
+    }
+    return 1;
+}
+
+extern int russian_stem(struct SN_env * z) {
+    {   int c = z->c; /* do, line 240 */
+        if (!r_mark_regions(z)) goto lab0; /* call mark_regions, line 240 */
+    lab0:
+        z->c = c;
+    }
+    z->lb = z->c; z->c = z->l; /* backwards, line 241 */
+
+    {   int m = z->l - z->c; /* setlimit, line 241 */
+        int m3;
+        if (z->c < z->I[0]) return 0;
+        z->c = z->I[0]; /* tomark, line 241 */
+        m3 = z->lb; z->lb = z->c;
+        z->c = z->l - m;
+        {   int m = z->l - z->c; /* do, line 242 */
+            {   int m = z->l - z->c; /* or, line 243 */
+                if (!r_perfective_gerund(z)) goto lab3; /* call perfective_gerund, line 243 */
+                goto lab2;
+            lab3:
+                z->c = z->l - m;
+                {   int m = z->l - z->c; /* try, line 244 */
+                    if (!r_reflexive(z)) { z->c = z->l - m; goto lab4; } /* call reflexive, line 244 */
+                lab4:
+                    ;
+                }
+                {   int m = z->l - z->c; /* or, line 245 */
+                    if (!r_adjectival(z)) goto lab6; /* call adjectival, line 245 */
+                    goto lab5;
+                lab6:
+                    z->c = z->l - m;
+                    if (!r_verb(z)) goto lab7; /* call verb, line 245 */
+                    goto lab5;
+                lab7:
+                    z->c = z->l - m;
+                    if (!r_noun(z)) goto lab1; /* call noun, line 245 */
+                }
+            lab5:
+                ;
+            }
+        lab2:
+        lab1:
+            z->c = z->l - m;
+        }
+        {   int m = z->l - z->c; /* try, line 248 */
+            z->ket = z->c; /* [, line 248 */
+            if (!(eq_s_b(z, 1, s_9))) { z->c = z->l - m; goto lab8; }
+            z->bra = z->c; /* ], line 248 */
+            slice_del(z); /* delete, line 248 */
+        lab8:
+            ;
+        }
+        {   int m = z->l - z->c; /* do, line 251 */
+            if (!r_derivational(z)) goto lab9; /* call derivational, line 251 */
+        lab9:
+            z->c = z->l - m;
+        }
+        {   int m = z->l - z->c; /* do, line 252 */
+            if (!r_tidy_up(z)) goto lab10; /* call tidy_up, line 252 */
+        lab10:
+            z->c = z->l - m;
+        }
+        z->lb = m3;
+    }
+    z->c = z->lb;
+    return 1;
+}
+
+extern struct SN_env * russian_create_env(void) { return SN_create_env(0, 2, 0); }
+
+extern void russian_close_env(struct SN_env * z) { SN_close_env(z); }
+


diff --git a/contrib/tsearch2/snowball/russian_stem.h b/contrib/tsearch2/snowball/russian_stem.h

new file mode 100644 (file)

index 0000000..7dc26d4


--- /dev/null
+++ b/contrib/tsearch2/snowball/russian_stem.h
@@ -0,0 +1,8 @@
+
+/* This file was generated automatically by the Snowball to ANSI C compiler */
+
+extern struct SN_env * russian_create_env(void);
+extern void russian_close_env(struct SN_env * z);
+
+extern int russian_stem(struct SN_env * z);
+


diff --git a/contrib/tsearch2/snowball/utilities.c b/contrib/tsearch2/snowball/utilities.c

new file mode 100644 (file)

index 0000000..5dc7524


--- /dev/null
+++ b/contrib/tsearch2/snowball/utilities.c
@@ -0,0 +1,328 @@
+
+#include 
+#include 
+#include 
+
+#include "header.h"
+
+#define unless(C) if(!(C))
+
+#define CREATE_SIZE 1
+
+extern symbol * create_s(void)
+{   symbol * p = (symbol *) (HEAD + (char *) malloc(HEAD + (CREATE_SIZE + 1) * sizeof(symbol)));
+    CAPACITY(p) = CREATE_SIZE;
+    SET_SIZE(p, CREATE_SIZE);
+    return p;
+}
+
+extern void lose_s(symbol * p) { free((char *) p - HEAD); }
+
+extern int in_grouping(struct SN_env * z, unsigned char * s, int min, int max)
+{   if (z->c >= z->l) return 0;
+    {   int ch = z->p[z->c];
+        if
+        (ch > max || (ch -= min) < 0 ||
+         (s[ch >> 3] & (0X1 << (ch & 0X7))) == 0) return 0;
+    }
+    z->c++; return 1;
+}
+
+extern int in_grouping_b(struct SN_env * z, unsigned char * s, int min, int max)
+{   if (z->c <= z->lb) return 0;
+    {   int ch = z->p[z->c - 1];
+        if
+        (ch > max || (ch -= min) < 0 ||
+         (s[ch >> 3] & (0X1 << (ch & 0X7))) == 0) return 0;
+    }
+    z->c--; return 1;
+}
+
+extern int out_grouping(struct SN_env * z, unsigned char * s, int min, int max)
+{   if (z->c >= z->l) return 0;
+    {   int ch = z->p[z->c];
+        unless
+        (ch > max || (ch -= min) < 0 ||
+         (s[ch >> 3] & (0X1 << (ch & 0X7))) == 0) return 0;
+    }
+    z->c++; return 1;
+}
+
+extern int out_grouping_b(struct SN_env * z, unsigned char * s, int min, int max)
+{   if (z->c <= z->lb) return 0;
+    {   int ch = z->p[z->c - 1];
+        unless
+        (ch > max || (ch -= min) < 0 ||
+         (s[ch >> 3] & (0X1 << (ch & 0X7))) == 0) return 0;
+    }
+    z->c--; return 1;
+}
+
+
+extern int in_range(struct SN_env * z, int min, int max)
+{   if (z->c >= z->l) return 0;
+    {   int ch = z->p[z->c];
+        if
+        (ch > max || ch < min) return 0;
+    }
+    z->c++; return 1;
+}
+
+extern int in_range_b(struct SN_env * z, int min, int max)
+{   if (z->c <= z->lb) return 0;
+    {   int ch = z->p[z->c - 1];
+        if
+        (ch > max || ch < min) return 0;
+    }
+    z->c--; return 1;
+}
+
+extern int out_range(struct SN_env * z, int min, int max)
+{   if (z->c >= z->l) return 0;
+    {   int ch = z->p[z->c];
+        unless
+        (ch > max || ch < min) return 0;
+    }
+    z->c++; return 1;
+}
+
+extern int out_range_b(struct SN_env * z, int min, int max)
+{   if (z->c <= z->lb) return 0;
+    {   int ch = z->p[z->c - 1];
+        unless
+        (ch > max || ch < min) return 0;
+    }
+    z->c--; return 1;
+}
+
+extern int eq_s(struct SN_env * z, int s_size, symbol * s)
+{   if (z->l - z->c < s_size ||
+        memcmp(z->p + z->c, s, s_size * sizeof(symbol)) != 0) return 0;
+    z->c += s_size; return 1;
+}
+
+extern int eq_s_b(struct SN_env * z, int s_size, symbol * s)
+{   if (z->c - z->lb < s_size ||
+        memcmp(z->p + z->c - s_size, s, s_size * sizeof(symbol)) != 0) return 0;
+    z->c -= s_size; return 1;
+}
+
+extern int eq_v(struct SN_env * z, symbol * p)
+{   return eq_s(z, SIZE(p), p);
+}
+
+extern int eq_v_b(struct SN_env * z, symbol * p)
+{   return eq_s_b(z, SIZE(p), p);
+}
+
+extern int find_among(struct SN_env * z, struct among * v, int v_size)
+{
+    int i = 0;
+    int j = v_size;
+
+    int c = z->c; int l = z->l;
+    symbol * q = z->p + c;
+
+    struct among * w;
+
+    int common_i = 0;
+    int common_j = 0;
+
+    int first_key_inspected = 0;
+
+    while(1)
+    {   int k = i + ((j - i) >> 1);
+        int diff = 0;
+        int common = common_i < common_j ? common_i : common_j; /* smaller */
+        w = v + k;
+        {   int i; for (i = common; i < w->s_size; i++)
+            {   if (c + common == l) { diff = -1; break; }
+                diff = q[common] - w->s[i];
+                if (diff != 0) break;
+                common++;
+            }
+        }
+        if (diff < 0) { j = k; common_j = common; }
+                 else { i = k; common_i = common; }
+        if (j - i <= 1)
+        {   if (i > 0) break; /* v->s has been inspected */
+            if (j == i) break; /* only one item in v */
+
+            /* - but now we need to go round once more to get
+               v->s inspected. This looks messy, but is actually
+               the optimal approach.  */
+
+            if (first_key_inspected) break;
+            first_key_inspected = 1;
+        }
+    }
+    while(1)
+    {   w = v + i;
+        if (common_i >= w->s_size)
+        {   z->c = c + w->s_size;
+            if (w->function == 0) return w->result;
+            {   int res = w->function(z);
+                z->c = c + w->s_size;
+                if (res) return w->result;
+            }
+        }
+        i = w->substring_i;
+        if (i < 0) return 0;
+    }
+}
+
+/* find_among_b is for backwards processing. Same comments apply */
+
+extern int find_among_b(struct SN_env * z, struct among * v, int v_size)
+{
+    int i = 0;
+    int j = v_size;
+
+    int c = z->c; int lb = z->lb;
+    symbol * q = z->p + c - 1;
+
+    struct among * w;
+
+    int common_i = 0;
+    int common_j = 0;
+
+    int first_key_inspected = 0;
+
+    while(1)
+    {   int k = i + ((j - i) >> 1);
+        int diff = 0;
+        int common = common_i < common_j ? common_i : common_j;
+        w = v + k;
+        {   int i; for (i = w->s_size - 1 - common; i >= 0; i--)
+            {   if (c - common == lb) { diff = -1; break; }
+                diff = q[- common] - w->s[i];
+                if (diff != 0) break;
+                common++;
+            }
+        }
+        if (diff < 0) { j = k; common_j = common; }
+                 else { i = k; common_i = common; }
+        if (j - i <= 1)
+        {   if (i > 0) break;
+            if (j == i) break;
+            if (first_key_inspected) break;
+            first_key_inspected = 1;
+        }
+    }
+    while(1)
+    {   w = v + i;
+        if (common_i >= w->s_size)
+        {   z->c = c - w->s_size;
+            if (w->function == 0) return w->result;
+            {   int res = w->function(z);
+                z->c = c - w->s_size;
+                if (res) return w->result;
+            }
+        }
+        i = w->substring_i;
+        if (i < 0) return 0;
+    }
+}
+
+
+extern symbol * increase_size(symbol * p, int n)
+{   int new_size = n + 20;
+    symbol * q = (symbol *) (HEAD + (char *) malloc(HEAD + (new_size + 1) * sizeof(symbol)));
+    CAPACITY(q) = new_size;
+    memmove(q, p, CAPACITY(p) * sizeof(symbol)); lose_s(p); return q;
+}
+
+/* to replace symbols between c_bra and c_ket in z->p by the
+   s_size symbols at s
+*/
+
+extern int replace_s(struct SN_env * z, int c_bra, int c_ket, int s_size, const symbol * s)
+{   int adjustment = s_size - (c_ket - c_bra);
+    int len = SIZE(z->p);
+    if (adjustment != 0)
+    {   if (adjustment + len > CAPACITY(z->p)) z->p = increase_size(z->p, adjustment + len);
+        memmove(z->p + c_ket + adjustment, z->p + c_ket, (len - c_ket) * sizeof(symbol));
+        SET_SIZE(z->p, adjustment + len);
+        z->l += adjustment;
+        if (z->c >= c_ket) z->c += adjustment; else
+            if (z->c > c_bra) z->c = c_bra;
+    }
+    unless (s_size == 0) memmove(z->p + c_bra, s, s_size * sizeof(symbol));
+    return adjustment;
+}
+
+static void slice_check(struct SN_env * z)
+{
+    if (!(0 <= z->bra &&
+          z->bra <= z->ket &&
+          z->ket <= z->l &&
+          z->l <= SIZE(z->p)))   /* this line could be removed */
+    {
+        fprintf(stderr, "faulty slice operation:\n");
+        debug(z, -1, 0);
+        exit(1);
+    }
+}
+
+extern void slice_from_s(struct SN_env * z, int s_size, symbol * s)
+{   slice_check(z);
+    replace_s(z, z->bra, z->ket, s_size, s);
+}
+
+extern void slice_from_v(struct SN_env * z, symbol * p)
+{   slice_from_s(z, SIZE(p), p);
+}
+
+extern void slice_del(struct SN_env * z)
+{   slice_from_s(z, 0, 0);
+}
+
+extern void insert_s(struct SN_env * z, int bra, int ket, int s_size, symbol * s)
+{   int adjustment = replace_s(z, bra, ket, s_size, s);
+    if (bra <= z->bra) z->bra += adjustment;
+    if (bra <= z->ket) z->ket += adjustment;
+}
+
+extern void insert_v(struct SN_env * z, int bra, int ket, symbol * p)
+{   int adjustment = replace_s(z, bra, ket, SIZE(p), p);
+    if (bra <= z->bra) z->bra += adjustment;
+    if (bra <= z->ket) z->ket += adjustment;
+}
+
+extern symbol * slice_to(struct SN_env * z, symbol * p)
+{   slice_check(z);
+    {   int len = z->ket - z->bra;
+        if (CAPACITY(p) < len) p = increase_size(p, len);
+        memmove(p, z->p + z->bra, len * sizeof(symbol));
+        SET_SIZE(p, len);
+    }
+    return p;
+}
+
+extern symbol * assign_to(struct SN_env * z, symbol * p)
+{   int len = z->l;
+    if (CAPACITY(p) < len) p = increase_size(p, len);
+    memmove(p, z->p, len * sizeof(symbol));
+    SET_SIZE(p, len);
+    return p;
+}
+
+extern void debug(struct SN_env * z, int number, int line_count)
+{   int i;
+    int limit = SIZE(z->p);
+    /*if (number >= 0) printf("%3d (line %4d): '", number, line_count);*/
+    if (number >= 0) printf("%3d (line %4d): [%d]'", number, line_count,limit);
+    for (i = 0; i <= limit; i++)
+    {   if (z->lb == i) printf("{");
+        if (z->bra == i) printf("[");
+        if (z->c == i) printf("|");
+        if (z->ket == i) printf("]");
+        if (z->l == i) printf("}");
+        if (i < limit)
+        {   int ch = z->p[i];
+            if (ch == 0) ch = '#';
+            printf("%c", ch);
+        }
+    }
+    printf("'\n");
+}


diff --git a/contrib/tsearch2/sql/tsearch2.sql b/contrib/tsearch2/sql/tsearch2.sql

new file mode 100644 (file)

index 0000000..6ca6480


--- /dev/null
+++ b/contrib/tsearch2/sql/tsearch2.sql
@@ -0,0 +1,243 @@
+--
+-- first, define the datatype.  Turn off echoing so that expected file
+-- does not depend on contents of seg.sql.
+--
+\set ECHO none
+\i tsearch2.sql
+\set ECHO all
+
+--tsvector
+SELECT '1'::tsvector;
+SELECT '1 '::tsvector;
+SELECT ' 1'::tsvector;
+SELECT ' 1 '::tsvector;
+SELECT '1 2'::tsvector;
+SELECT '\'1 2\''::tsvector;
+SELECT '\'1 \\\'2\''::tsvector;
+SELECT '\'1 \\\'2\'3'::tsvector;
+SELECT '\'1 \\\'2\' 3'::tsvector;
+SELECT '\'1 \\\'2\' \' 3\' 4 '::tsvector;
+select '\'w\':4A,3B,2C,1D,5 a:8';
+select 'a:3A b:2a'::tsvector || 'ba:1234 a:1B';
+select setweight('w:12B w:13* w:12,5,6 a:1,3* a:3 w asd:1dc asd zxc:81,567,222A'::tsvector, 'c');
+select strip('w:12B w:13* w:12,5,6 a:1,3* a:3 w asd:1dc asd'::tsvector);
+
+
+--tsquery
+SELECT '1'::tsquery;
+SELECT '1 '::tsquery;
+SELECT ' 1'::tsquery;
+SELECT ' 1 '::tsquery;
+SELECT '\'1 2\''::tsquery;
+SELECT '\'1 \\\'2\''::tsquery;
+SELECT '!1'::tsquery;
+SELECT '1|2'::tsquery;
+SELECT '1|!2'::tsquery;
+SELECT '!1|2'::tsquery;
+SELECT '!1|!2'::tsquery;
+SELECT '!(!1|!2)'::tsquery;
+SELECT '!(!1|2)'::tsquery;
+SELECT '!(1|!2)'::tsquery;
+SELECT '!(1|2)'::tsquery;
+SELECT '1&2'::tsquery;
+SELECT '!1&2'::tsquery;
+SELECT '1&!2'::tsquery;
+SELECT '!1&!2'::tsquery;
+SELECT '(1&2)'::tsquery;
+SELECT '1&(2)'::tsquery;
+SELECT '!(1)&2'::tsquery;
+SELECT '!(1&2)'::tsquery;
+SELECT '1|2&3'::tsquery;
+SELECT '1|(2&3)'::tsquery;
+SELECT '(1|2)&3'::tsquery;
+SELECT '1|2&!3'::tsquery;
+SELECT '1|!2&3'::tsquery;
+SELECT '!1|2&3'::tsquery;
+SELECT '!1|(2&3)'::tsquery;
+SELECT '!(1|2)&3'::tsquery;
+SELECT '(!1|2)&3'::tsquery;
+SELECT '1|(2|(4|(5|6)))'::tsquery;
+SELECT '1|2|4|5|6'::tsquery;
+SELECT '1&(2&(4&(5&6)))'::tsquery;
+SELECT '1&2&4&5&6'::tsquery;
+SELECT '1&(2&(4&(5|6)))'::tsquery;
+SELECT '1&(2&(4&(5|!6)))'::tsquery;
+SELECT '1&(\'2\'&(\' 4\'&(\\|5 | \'6 \\\' !|&\')))'::tsquery;
+SELECT '\'the wether\':dc & \' sKies \':BC & a:d b:a';
+
+select lexize('simple', 'ASD56 hsdkf');
+select lexize('en_stem', 'SKIES Problems identity');
+
+select * from token_type('default');
+select * from parse('default', '345 [email protected] \' http://www.com/ http://aew.werc.ewr/?ad=qwe&dw 1aew.werc.ewr/?ad=qwe&dw 2aew.werc.ewr http://3aew.werc.ewr/?ad=qwe&dw http://4aew.werc.ewr http://5aew.werc.ewr:8100/?  ad=qwe&dw 6aew.werc.ewr:8100/?ad=qwe&dw 7aew.werc.ewr:8100/?ad=qwe&dw=%20%32 +4.0e-10 qwe qwe qwqwe 234.435 455 5.005 [email protected] qwe-wer asdf qwer jf sdjk ewr1> ewri2 ">
+/usr/local/fff /awdf/dwqe/4325 rewt/ewr wefjn /wqe-324/ewr gist.h gist.h.c gist.c. readline 4.2 4.2. 4.2, readline-4.2 readline-4.2. 234 
+ wow  < jqw <> qwerty');
+
+SELECT to_tsvector('default', '345 [email protected] \' http://www.com/ http://aew.werc.ewr/?ad=qwe&dw 1aew.werc.ewr/?ad=qwe&dw 2aew.werc.ewr http://3aew.werc.ewr/?ad=qwe&dw http://4aew.werc.ewr http://5aew.werc.ewr:8100/?  ad=qwe&dw 6aew.werc.ewr:8100/?ad=qwe&dw 7aew.werc.ewr:8100/?ad=qwe&dw=%20%32 +4.0e-10 qwe qwe qwqwe 234.435 455 5.005 [email protected] qwe-wer asdf qwer jf sdjk ewr1> ewri2 ">
+/usr/local/fff /awdf/dwqe/4325 rewt/ewr wefjn /wqe-324/ewr gist.h gist.h.c gist.c. readline 4.2 4.2. 4.2, readline-4.2 readline-4.2. 234 
+ wow  < jqw <> qwerty');
+
+SELECT length(to_tsvector('default', '345 qw'));
+
+SELECT length(to_tsvector('default', '345 [email protected] \' http://www.com/ http://aew.werc.ewr/?ad=qwe&dw 1aew.werc.ewr/?ad=qwe&dw 2aew.werc.ewr http://3aew.werc.ewr/?ad=qwe&dw http://4aew.werc.ewr http://5aew.werc.ewr:8100/?  ad=qwe&dw 6aew.werc.ewr:8100/?ad=qwe&dw 7aew.werc.ewr:8100/?ad=qwe&dw=%20%32 +4.0e-10 qwe qwe qwqwe 234.435 455 5.005 [email protected] qwe-wer asdf qwer jf sdjk ewr1> ewri2 ">
+/usr/local/fff /awdf/dwqe/4325 rewt/ewr wefjn /wqe-324/ewr gist.h gist.h.c gist.c. readline 4.2 4.2. 4.2, readline-4.2 readline-4.2. 234 
+ wow  < jqw <> qwerty'));
+
+
+select to_tsquery('default', 'qwe & sKies '); 
+select to_tsquery('simple', 'qwe & sKies '); 
+select to_tsquery('default', '\'the wether\':dc & \'           sKies \':BC ');
+select 'a b:89  ca:23A,64b d:34c'::tsvector @@ 'd:AC & ca';
+select 'a b:89  ca:23A,64b d:34c'::tsvector @@ 'd:AC & ca:B';
+select 'a b:89  ca:23A,64b d:34c'::tsvector @@ 'd:AC & ca:A';
+select 'a b:89  ca:23A,64b d:34c'::tsvector @@ 'd:AC & ca:C';
+select 'a b:89  ca:23A,64b d:34c'::tsvector @@ 'd:AC & ca:CB';
+
+CREATE TABLE test_tsvector( t text, a tsvector );
+
+\copy test_tsvector from 'data/test_tsearch.data'
+
+SELECT count(*) FROM test_tsvector WHERE a @@ 'wr|qh';
+SELECT count(*) FROM test_tsvector WHERE a @@ 'wr&qh';
+SELECT count(*) FROM test_tsvector WHERE a @@ 'eq&yt';
+SELECT count(*) FROM test_tsvector WHERE a @@ 'eq|yt';
+SELECT count(*) FROM test_tsvector WHERE a @@ '(eq&yt)|(wr&qh)';
+SELECT count(*) FROM test_tsvector WHERE a @@ '(eq|yt)&(wr|qh)';
+
+create index wowidx on test_tsvector using gist (a);
+set enable_seqscan=off;
+
+SELECT count(*) FROM test_tsvector WHERE a @@ 'wr|qh';
+SELECT count(*) FROM test_tsvector WHERE a @@ 'wr&qh';
+SELECT count(*) FROM test_tsvector WHERE a @@ 'eq&yt';
+SELECT count(*) FROM test_tsvector WHERE a @@ 'eq|yt';
+SELECT count(*) FROM test_tsvector WHERE a @@ '(eq&yt)|(wr&qh)';
+SELECT count(*) FROM test_tsvector WHERE a @@ '(eq|yt)&(wr|qh)';
+
+select set_curcfg('default');
+
+CREATE TRIGGER tsvectorupdate
+BEFORE UPDATE OR INSERT ON test_tsvector
+FOR EACH ROW EXECUTE PROCEDURE tsearch2(a, t);
+
+SELECT count(*) FROM test_tsvector WHERE a @@ to_tsquery('345&qwerty');
+
+INSERT INTO test_tsvector (t) VALUES ('345 qwerty');
+
+SELECT count(*) FROM test_tsvector WHERE a @@ to_tsquery('345&qwerty');
+
+UPDATE test_tsvector SET t = null WHERE t = '345 qwerty';
+
+SELECT count(*) FROM test_tsvector WHERE a @@ to_tsquery('345&qwerty');
+
+drop trigger tsvectorupdate on test_tsvector;
+create function wow(text) returns text as 'select $1 || \' copyright\'; ' language sql;
+create trigger tsvectorupdate before update or insert on test_tsvector
+for each row execute procedure tsearch2(a, wow, t);
+insert into test_tsvector (t) values ('345 qwerty');
+select count(*) FROM test_tsvector WHERE a @@ to_tsquery('345&qwerty');
+select count(*) FROM test_tsvector WHERE a @@ to_tsquery('copyright');
+
+select rank(' a:1 s:2C d g'::tsvector, 'a | s');
+select rank(' a:1 s:2B d g'::tsvector, 'a | s');
+select rank(' a:1 s:2 d g'::tsvector, 'a | s');
+select rank(' a:1 s:2C d g'::tsvector, 'a & s');
+select rank(' a:1 s:2B d g'::tsvector, 'a & s');
+select rank(' a:1 s:2 d g'::tsvector, 'a & s');
+
+insert into test_tsvector (t) values ('foo bar foo the over foo qq bar');
+select * from stat('select a from test_tsvector') order by ndoc desc, nentry desc, word;
+
+select reset_tsearch();
+select to_tsquery('default', 'skies & books');
+
+select rank_cd(to_tsvector('Erosion It took the sea a thousand years,
+A thousand years to trace
+The granite features of this cliff
+In crag and scarp and base.
+It took the sea an hour one night
+An hour of storm to place
+The sculpture of these granite seams,
+Upon a woman s face. E.  J.  Pratt  (1882 1964)
+'), to_tsquery('sea&thousand&years'));
+
+select rank_cd(to_tsvector('Erosion It took the sea a thousand years,
+A thousand years to trace
+The granite features of this cliff
+In crag and scarp and base.
+It took the sea an hour one night
+An hour of storm to place
+The sculpture of these granite seams,
+Upon a woman s face. E.  J.  Pratt  (1882 1964)
+'), to_tsquery('granite&sea'));
+
+select rank_cd(to_tsvector('Erosion It took the sea a thousand years,
+A thousand years to trace
+The granite features of this cliff
+In crag and scarp and base.
+It took the sea an hour one night
+An hour of storm to place
+The sculpture of these granite seams,
+Upon a woman s face. E.  J.  Pratt  (1882 1964)
+'), to_tsquery('sea'));
+
+select get_covers(to_tsvector('Erosion It took the sea a thousand years,
+A thousand years to trace
+The granite features of this cliff
+In crag and scarp and base.
+It took the sea an hour one night
+An hour of storm to place
+The sculpture of these granite seams,
+Upon a woman s face. E.  J.  Pratt  (1882 1964)
+'), to_tsquery('sea&thousand&years'));
+
+select get_covers(to_tsvector('Erosion It took the sea a thousand years,
+A thousand years to trace
+The granite features of this cliff
+In crag and scarp and base.
+It took the sea an hour one night
+An hour of storm to place
+The sculpture of these granite seams,
+Upon a woman s face. E.  J.  Pratt  (1882 1964)
+'), to_tsquery('granite&sea'));
+
+select get_covers(to_tsvector('Erosion It took the sea a thousand years,
+A thousand years to trace
+The granite features of this cliff
+In crag and scarp and base.
+It took the sea an hour one night
+An hour of storm to place
+The sculpture of these granite seams,
+Upon a woman s face. E.  J.  Pratt  (1882 1964)
+'), to_tsquery('sea'));
+
+select headline('Erosion It took the sea a thousand years,
+A thousand years to trace
+The granite features of this cliff
+In crag and scarp and base.
+It took the sea an hour one night
+An hour of storm to place
+The sculpture of these granite seams,
+Upon a woman s face. E.  J.  Pratt  (1882 1964)
+', to_tsquery('sea&thousand&years'));
+ 
+select headline('Erosion It took the sea a thousand years,
+A thousand years to trace
+The granite features of this cliff
+In crag and scarp and base.
+It took the sea an hour one night
+An hour of storm to place
+The sculpture of these granite seams,
+Upon a woman s face. E.  J.  Pratt  (1882 1964)
+', to_tsquery('granite&sea'));
+ 
+select headline('Erosion It took the sea a thousand years,
+A thousand years to trace
+The granite features of this cliff
+In crag and scarp and base.
+It took the sea an hour one night
+An hour of storm to place
+The sculpture of these granite seams,
+Upon a woman s face. E.  J.  Pratt  (1882 1964)
+', to_tsquery('sea'));
+


diff --git a/contrib/tsearch2/stopword.c b/contrib/tsearch2/stopword.c

new file mode 100644 (file)

index 0000000..7f7806f


--- /dev/null
+++ b/contrib/tsearch2/stopword.c
@@ -0,0 +1,101 @@
+/* 
+ * stopword library
+ * Teodor Sigaev 
+ */
+#include 
+#include 
+#include 
+#include 
+
+#include "postgres.h"
+#include "common.h"
+#include "dict.h"
+
+#define STOPBUFLEN 4096
+
+char*
+lowerstr(char *str) {
+   char *ptr=str;
+   while(*ptr) {
+       *ptr = tolower(*(unsigned char*)ptr);
+       ptr++;
+   }
+   return str;
+}
+
+void
+freestoplist(StopList *s) {
+   char **ptr=s->stop;
+   if ( ptr )
+       while( *ptr && s->len >0 ) {
+           free(*ptr);
+           ptr++; s->len--;
+       free(s->stop);
+   }
+   memset(s,0,sizeof(StopList));
+}
+
+void
+readstoplist(text *in, StopList *s) {
+   char **stop=NULL;
+   s->len=0;
+   if ( in && VARSIZE(in) - VARHDRSZ > 0 ) {
+       char *filename=text2char(in);
+       FILE    *hin=NULL;
+       char    buf[STOPBUFLEN];
+       int reallen=0;
+
+       if ( (hin=fopen(filename,"r")) == NULL )
+           elog(ERROR,"Can't open file '%s': %s", filename, strerror(errno));
+       while( fgets(buf,STOPBUFLEN,hin) ) {
+           buf[strlen(buf)-1] = '\0';
+           if ( *buf=='\0' ) continue;
+
+           if ( s->len>= reallen ) {
+               char **tmp;
+               reallen=(reallen) ? reallen*2 : 16;
+               tmp=(char**)realloc((void*)stop, sizeof(char*)*reallen);
+               if (!tmp) {
+                   freestoplist(s);
+                   fclose(hin); 
+                   elog(ERROR,"Not enough memory");
+               }
+               stop=tmp;
+           }
+    
+           stop[s->len]=strdup(buf);
+           if ( !stop[s->len] ) {
+               freestoplist(s);
+               fclose(hin); 
+               elog(ERROR,"Not enough memory");
+           }
+           if ( s->wordop ) 
+               stop[s->len]=(s->wordop)(stop[s->len]);
+
+           (s->len)++; 
+       }
+       fclose(hin);
+       pfree(filename); 
+   }
+   s->stop=stop;
+} 
+
+static int
+comparestr(const void *a, const void *b) {
+   return strcmp( *(char**)a, *(char**)b );
+}
+
+void
+sortstoplist(StopList *s) {
+   if (s->stop && s->len>0)
+       qsort(s->stop, s->len, sizeof(char*), comparestr);
+}
+
+bool
+searchstoplist(StopList *s, char *key) {
+   if ( s->wordop ) 
+       key=(*(s->wordop))(key);
+   return ( s->stop && s->len>0 && bsearch(&key, s->stop, s->len, sizeof(char*), comparestr) ) ? true : false;
+}
+
+


diff --git a/contrib/tsearch2/stopword/english.stop b/contrib/tsearch2/stopword/english.stop

new file mode 100644 (file)

index 0000000..a913011


--- /dev/null
+++ b/contrib/tsearch2/stopword/english.stop
@@ -0,0 +1,128 @@
+i
+me
+my
+myself
+we
+our
+ours
+ourselves
+you
+your
+yours
+yourself
+yourselves
+he
+him
+his
+himself
+she
+her
+hers
+herself
+it
+its
+itself
+they
+them
+their
+theirs
+themselves
+what
+which
+who
+whom
+this
+that
+these
+those
+am
+is
+are
+was
+were
+be
+been
+being
+have
+has
+had
+having
+do
+does
+did
+doing
+a
+an
+the
+and
+but
+if
+or
+because
+as
+until
+while
+of
+at
+by
+for
+with
+about
+against
+between
+into
+through
+during
+before
+after
+above
+below
+to
+from
+up
+down
+in
+out
+on
+off
+over
+under
+again
+further
+then
+once
+here
+there
+when
+where
+why
+how
+all
+any
+both
+each
+few
+more
+most
+other
+some
+such
+no
+nor
+not
+only
+own
+same
+so
+than
+too
+very
+s
+t
+can
+will
+just
+don
+should
+now
+


diff --git a/contrib/tsearch2/stopword/russian.stop b/contrib/tsearch2/stopword/russian.stop

new file mode 100644 (file)

index 0000000..1877e3a


--- /dev/null
+++ b/contrib/tsearch2/stopword/russian.stop
@@ -0,0 +1,151 @@
+É
+×
+×Ï
+ÎÅ
+ÞÔÏ
+ÏÎ
+ÎÁ
+Ñ
+Ó
+ÓÏ
+ËÁË
+Á
+ÔÏ
+×ÓÅ
+ÏÎÁ
+ÔÁË
+ÅÇÏ
+ÎÏ
+ÄÁ
+ÔÙ
+Ë
+Õ
+ÖÅ
+×Ù
+ÚÁ
+ÂÙ
+ÐÏ
+ÔÏÌØËÏ
+ÅÅ
+ÍÎÅ
+ÂÙÌÏ
+×ÏÔ
+ÏÔ
+ÍÅÎÑ
+ÅÝÅ
+ÎÅÔ
+Ï
+ÉÚ
+ÅÍÕ
+ÔÅÐÅÒØ
+ËÏÇÄÁ
+ÄÁÖÅ
+ÎÕ
+×ÄÒÕÇ
+ÌÉ
+ÅÓÌÉ
+ÕÖÅ
+ÉÌÉ
+ÎÉ
+ÂÙÔØ
+ÂÙÌ
+ÎÅÇÏ
+ÄÏ
+×ÁÓ
+ÎÉÂÕÄØ
+ÏÐÑÔØ
+ÕÖ
+×ÁÍ
+×ÅÄØ
+ÔÁÍ
+ÐÏÔÏÍ
+ÓÅÂÑ
+ÎÉÞÅÇÏ
+ÅÊ
+ÍÏÖÅÔ
+ÏÎÉ
+ÔÕÔ
+ÇÄÅ
+ÅÓÔØ
+ÎÁÄÏ
+ÎÅÊ
+ÄÌÑ
+ÍÙ
+ÔÅÂÑ
+ÉÈ
+ÞÅÍ
+ÂÙÌÁ
+ÓÁÍ
+ÞÔÏÂ
+ÂÅÚ
+ÂÕÄÔÏ
+ÞÅÇÏ
+ÒÁÚ
+ÔÏÖÅ
+ÓÅÂÅ
+ÐÏÄ
+ÂÕÄÅÔ
+Ö
+ÔÏÇÄÁ
+ËÔÏ
+ÜÔÏÔ
+ÔÏÇÏ
+ÐÏÔÏÍÕ
+ÜÔÏÇÏ
+ËÁËÏÊ
+ÓÏ×ÓÅÍ
+ÎÉÍ
+ÚÄÅÓØ
+ÜÔÏÍ
+ÏÄÉÎ
+ÐÏÞÔÉ
+ÍÏÊ
+ÔÅÍ
+ÞÔÏÂÙ
+ÎÅÅ
+ÓÅÊÞÁÓ
+ÂÙÌÉ
+ËÕÄÁ
+ÚÁÞÅÍ
+×ÓÅÈ
+ÎÉËÏÇÄÁ
+ÍÏÖÎÏ
+ÐÒÉ
+ÎÁËÏÎÅÃ
+Ä×Á
+ÏÂ
+ÄÒÕÇÏÊ
+ÈÏÔØ
+ÐÏÓÌÅ
+ÎÁÄ
+ÂÏÌØÛÅ
+ÔÏÔ
+ÞÅÒÅÚ
+ÜÔÉ
+ÎÁÓ
+ÐÒÏ
+×ÓÅÇÏ
+ÎÉÈ
+ËÁËÁÑ
+ÍÎÏÇÏ
+ÒÁÚ×Å
+ÔÒÉ
+ÜÔÕ
+ÍÏÑ
+×ÐÒÏÞÅÍ
+ÈÏÒÏÛÏ
+Ó×ÏÀ
+ÜÔÏÊ
+ÐÅÒÅÄ
+ÉÎÏÇÄÁ
+ÌÕÞÛÅ
+ÞÕÔØ
+ÔÏÍ
+ÎÅÌØÚÑ
+ÔÁËÏÊ
+ÉÍ
+ÂÏÌÅÅ
+×ÓÅÇÄÁ
+ËÏÎÅÞÎÏ
+×ÓÀ
+ÍÅÖÄÕ


diff --git a/contrib/tsearch2/ts_cfg.c b/contrib/tsearch2/ts_cfg.c

new file mode 100644 (file)

index 0000000..7c9f20c


--- /dev/null
+++ b/contrib/tsearch2/ts_cfg.c
@@ -0,0 +1,509 @@
+/* 
+ * interface functions to tscfg 
+ * Teodor Sigaev 
+ */
+#include 
+#include 
+#include 
+#include 
+#include 
+
+#include "postgres.h"
+#include "fmgr.h"
+#include "utils/array.h"
+#include "catalog/pg_type.h"
+#include "executor/spi.h"
+
+#include "ts_cfg.h"
+#include "dict.h"
+#include "wparser.h"
+#include "snmap.h"
+#include "common.h"
+#include "tsvector.h"
+
+/*********top interface**********/
+
+static void *plan_getcfg_bylocale=NULL;
+static void *plan_getcfg=NULL;
+static void *plan_getmap=NULL;
+static void *plan_name2id=NULL;
+static Oid current_cfg_id=0;
+
+void
+init_cfg(Oid id, TSCfgInfo *cfg) {
+   Oid arg[2]={ OIDOID, OIDOID };
+   bool isnull;
+   Datum pars[2]={ ObjectIdGetDatum(id), ObjectIdGetDatum(id) } ;
+   int stat,i,j;
+   text *ptr;
+   text *prsname=NULL;
+   MemoryContext   oldcontext;
+
+   memset(cfg,0,sizeof(TSCfgInfo));
+   SPI_connect();
+   if ( !plan_getcfg ) {
+       plan_getcfg = SPI_saveplan( SPI_prepare( "select prs_name from pg_ts_cfg where oid = $1" , 1, arg ) );
+       if ( !plan_getcfg ) 
+           ts_error(ERROR, "SPI_prepare() failed");
+   }
+
+   stat = SPI_execp(plan_getcfg, pars, " ", 1);
+   if ( stat < 0 )
+       ts_error (ERROR, "SPI_execp return %d", stat);
+   if ( SPI_processed > 0 ) {
+       prsname = (text*) DatumGetPointer( 
+           SPI_getbinval(SPI_tuptable->vals[0], SPI_tuptable->tupdesc, 1, &isnull) 
+       );
+       oldcontext = MemoryContextSwitchTo(TopMemoryContext);
+       prsname = ptextdup( prsname );
+       MemoryContextSwitchTo(oldcontext);
+       
+       cfg->id=id;
+   } else 
+       ts_error(ERROR, "No tsearch cfg with id %d", id);
+
+   arg[0]=TEXTOID;
+   if ( !plan_getmap ) {
+       plan_getmap = SPI_saveplan( SPI_prepare( "select lt.tokid, pg_ts_cfgmap.dict_name from pg_ts_cfgmap, pg_ts_cfg, token_type( $1 ) as lt where lt.alias = pg_ts_cfgmap.tok_alias and pg_ts_cfgmap.ts_name = pg_ts_cfg.ts_name and pg_ts_cfg.oid= $2 order by lt.tokid desc;" , 2, arg ) );
+       if ( !plan_getmap )
+           ts_error(ERROR, "SPI_prepare() failed");
+   }
+
+   pars[0]=PointerGetDatum( prsname );
+   stat = SPI_execp(plan_getmap, pars, " ", 0);
+   if ( stat < 0 )
+       ts_error (ERROR, "SPI_execp return %d", stat);
+   if ( SPI_processed <= 0 )
+       ts_error(ERROR, "No parser with id %d", id);
+
+   for(i=0;i
+       int lexid = DatumGetInt32(SPI_getbinval(SPI_tuptable->vals[i], SPI_tuptable->tupdesc, 1, &isnull));
+       ArrayType *toasted_a = (ArrayType*)PointerGetDatum(SPI_getbinval(SPI_tuptable->vals[i], SPI_tuptable->tupdesc, 2, &isnull));
+       ArrayType *a;
+
+       if ( !cfg->map ) {
+           cfg->len=lexid+1;
+           cfg->map = (ListDictionary*)malloc( sizeof(ListDictionary)*cfg->len );
+           if ( !cfg->map )
+               ts_error(ERROR,"No memory");
+           memset( cfg->map, 0, sizeof(ListDictionary)*cfg->len );
+       }
+
+       if (isnull)
+           continue;
+
+       a=(ArrayType*)PointerGetDatum( PG_DETOAST_DATUM( DatumGetPointer(toasted_a) ) );
+       
+       if ( ARR_NDIM(a) != 1 )
+           ts_error(ERROR,"Wrong dimension");
+       if ( ARRNELEMS(a) < 1 )
+           continue;
+
+       cfg->map[lexid].len=ARRNELEMS(a);
+       cfg->map[lexid].dict_id=(Datum*)malloc( sizeof(Datum)*cfg->map[lexid].len );
+       memset(cfg->map[lexid].dict_id,0,sizeof(Datum)*cfg->map[lexid].len );
+       ptr=(text*)ARR_DATA_PTR(a);
+       oldcontext = MemoryContextSwitchTo(TopMemoryContext);
+       for(j=0;jmap[lexid].len;j++) {
+           cfg->map[lexid].dict_id[j] = PointerGetDatum(ptextdup(ptr));
+           ptr=NEXTVAL(ptr);
+       } 
+       MemoryContextSwitchTo(oldcontext);
+
+       if ( a != toasted_a ) 
+           pfree(a);
+   }
+   
+   SPI_finish();
+   cfg->prs_id = name2id_prs( prsname );
+   pfree(prsname);
+   for(i=0;ilen;i++) {
+       for(j=0;jmap[i].len;j++) {
+           ptr = (text*)DatumGetPointer( cfg->map[i].dict_id[j] );
+           cfg->map[i].dict_id[j] = ObjectIdGetDatum( name2id_dict(ptr) );
+           pfree(ptr);
+       }
+   }
+}
+
+typedef struct {
+   TSCfgInfo   *last_cfg;
+   int     len;
+   int     reallen;
+   TSCfgInfo   *list;
+   SNMap       name2id_map;
+} CFGList;
+
+static CFGList CList = {NULL,0,0,NULL,{0,0,NULL}};
+
+void
+reset_cfg(void) {
+        freeSNMap( &(CList.name2id_map) );
+        if ( CList.list ) {
+       int i,j;
+       for(i=0;i
+           if ( CList.list[i].map ) {
+               for(j=0;j
+                   if ( CList.list[i].map[j].dict_id )
+                       free(CList.list[i].map[j].dict_id);
+               free( CList.list[i].map );
+           }
+                free(CList.list);
+   }
+        memset(&CList,0,sizeof(CFGList));
+}
+
+static int
+comparecfg(const void *a, const void *b) {
+   return ((TSCfgInfo*)a)->id - ((TSCfgInfo*)b)->id;
+}
+
+TSCfgInfo *
+findcfg(Oid id) {
+   /* last used cfg */
+   if ( CList.last_cfg && CList.last_cfg->id==id )
+       return CList.last_cfg;
+
+   /* already used cfg */
+   if ( CList.len != 0 ) {
+       TSCfgInfo key;
+       key.id=id;
+       CList.last_cfg = bsearch(&key, CList.list, CList.len, sizeof(TSCfgInfo), comparecfg);
+       if ( CList.last_cfg != NULL )
+           return CList.last_cfg;
+   }
+
+   /* last chance */
+   if ( CList.len==CList.reallen ) {
+       TSCfgInfo *tmp;
+       int reallen = ( CList.reallen ) ? 2*CList.reallen : 16;
+       tmp=(TSCfgInfo*)realloc(CList.list,sizeof(TSCfgInfo)*reallen);
+       if ( !tmp ) 
+           ts_error(ERROR,"No memory");
+       CList.reallen=reallen;
+       CList.list=tmp;
+   }
+   CList.last_cfg=&(CList.list[CList.len]);
+   init_cfg(id, CList.last_cfg);
+   CList.len++;
+   qsort(CList.list, CList.len, sizeof(TSCfgInfo), comparecfg);
+   return findcfg(id); /* qsort changed order!! */;
+}
+
+
+Oid
+name2id_cfg(text *name) {
+   Oid arg[1]={ TEXTOID };
+   bool isnull;
+   Datum pars[1]={ PointerGetDatum(name) };
+   int stat;
+   Oid id=findSNMap_t( &(CList.name2id_map), name );
+   
+   if ( id ) 
+       return id;
+   
+   SPI_connect();
+   if ( !plan_name2id ) {
+       plan_name2id = SPI_saveplan( SPI_prepare( "select oid from pg_ts_cfg where ts_name = $1" , 1, arg ) );
+       if ( !plan_name2id ) 
+           elog(ERROR, "SPI_prepare() failed");
+   }
+
+   stat = SPI_execp(plan_name2id, pars, " ", 1);
+   if ( stat < 0 )
+       elog (ERROR, "SPI_execp return %d", stat);
+   if ( SPI_processed > 0 ) {
+       id=DatumGetObjectId( SPI_getbinval(SPI_tuptable->vals[0], SPI_tuptable->tupdesc, 1, &isnull) );
+       if ( isnull ) 
+           elog(ERROR, "Null id for tsearch config");
+   } else 
+       elog(ERROR, "No tsearch config");
+   SPI_finish();
+   addSNMap_t( &(CList.name2id_map), name, id );
+   return id;
+}
+
+
+void 
+parsetext_v2(TSCfgInfo *cfg, PRSTEXT * prs, char *buf, int4 buflen) {
+   int type, lenlemm, i;
+   char    *lemm=NULL;
+   WParserInfo *prsobj = findprs(cfg->prs_id);
+
+   prsobj->prs=(void*)DatumGetPointer(
+       FunctionCall2(
+           &(prsobj->start_info),
+           PointerGetDatum(buf),
+           Int32GetDatum(buflen)
+       )
+   );
+
+   while( ( type=DatumGetInt32(FunctionCall3(
+           &(prsobj->getlexeme_info),
+           PointerGetDatum(prsobj->prs),
+           PointerGetDatum(&lemm),
+           PointerGetDatum(&lenlemm))) ) != 0 ) {
+
+       if ( lenlemm >= MAXSTRLEN )
+           elog(ERROR, "Word is too long");
+
+
+       if ( type >= cfg->len ) /* skip this type of lexem */
+           continue; 
+
+       for(i=0;imap[type].len;i++) {
+           DictInfo    *dict=finddict( DatumGetObjectId(cfg->map[type].dict_id[i]) );
+           char    **norms, **ptr;
+   
+           norms = ptr = (char**)DatumGetPointer(
+               FunctionCall3(
+                   &(dict->lexize_info),
+                   PointerGetDatum(dict->dictionary),
+                   PointerGetDatum(lemm),
+                   PointerGetDatum(lenlemm)
+               )
+           );
+           if ( !norms ) /* dictionary doesn't know this lexem */
+               continue;
+
+           prs->pos++; /*set pos*/
+
+           while( *ptr ) {
+               if (prs->curwords == prs->lenwords) {
+                   prs->lenwords *= 2;
+                   prs->words = (WORD *) repalloc((void *) prs->words, prs->lenwords * sizeof(WORD));
+               }
+
+               prs->words[prs->curwords].len = strlen(*ptr);
+               prs->words[prs->curwords].word = *ptr;
+               prs->words[prs->curwords].alen = 0;
+               prs->words[prs->curwords].pos.pos = LIMITPOS(prs->pos);
+               ptr++;
+               prs->curwords++;
+           }
+           pfree(norms);
+           break; /* lexem already normalized or is stop word*/
+       }
+   }
+
+   FunctionCall1(
+       &(prsobj->end_info),
+       PointerGetDatum(prsobj->prs)
+   );
+}
+
+static void
+hladdword(HLPRSTEXT * prs, char *buf, int4 buflen, int type) {
+   while (prs->curwords >= prs->lenwords) {
+       prs->lenwords *= 2;
+       prs->words = (HLWORD *) repalloc((void *) prs->words, prs->lenwords * sizeof(HLWORD));
+   }
+   memset( &(prs->words[prs->curwords]), 0, sizeof(HLWORD) ); 
+   prs->words[prs->curwords].type = (uint8)type;
+   prs->words[prs->curwords].len = buflen; 
+   prs->words[prs->curwords].word = palloc(buflen);
+   memcpy(prs->words[prs->curwords].word, buf, buflen);
+   prs->curwords++;    
+}
+
+static void
+hlfinditem(HLPRSTEXT * prs, QUERYTYPE *query, char *buf, int buflen ) {
+   int i;
+   ITEM    *item=GETQUERY(query);
+   HLWORD  *word=&( prs->words[prs->curwords-1] );
+
+   while (prs->curwords + query->size >= prs->lenwords) {
+       prs->lenwords *= 2;
+       prs->words = (HLWORD *) repalloc((void *) prs->words, prs->lenwords * sizeof(HLWORD));
+   }
+
+   for(i=0; isize; i++) { 
+       if ( item->type == VAL && item->length == buflen && strncmp( GETOPERAND(query) + item->distance, buf, buflen )==0 ) {
+           if ( word->item ) {
+               memcpy( &(prs->words[prs->curwords]), word, sizeof(HLWORD) );
+               prs->words[prs->curwords].item=item;
+               prs->words[prs->curwords].repeated=1;
+               prs->curwords++;
+           } else 
+               word->item=item;    
+       }
+       item++;
+   }
+}
+
+void 
+hlparsetext(TSCfgInfo *cfg, HLPRSTEXT * prs, QUERYTYPE *query, char *buf, int4 buflen) {
+   int type, lenlemm, i;
+   char    *lemm=NULL;
+   WParserInfo *prsobj = findprs(cfg->prs_id);
+
+   prsobj->prs=(void*)DatumGetPointer(
+       FunctionCall2(
+           &(prsobj->start_info),
+           PointerGetDatum(buf),
+           Int32GetDatum(buflen)
+       )
+   );
+
+   while( ( type=DatumGetInt32(FunctionCall3(
+           &(prsobj->getlexeme_info),
+           PointerGetDatum(prsobj->prs),
+           PointerGetDatum(&lemm),
+           PointerGetDatum(&lenlemm))) ) != 0 ) {
+
+       if ( lenlemm >= MAXSTRLEN )
+           elog(ERROR, "Word is too long");
+
+       hladdword(prs,lemm,lenlemm,type);
+
+       if ( type >= cfg->len ) 
+           continue; 
+
+       for(i=0;imap[type].len;i++) {
+           DictInfo    *dict=finddict( DatumGetObjectId(cfg->map[type].dict_id[i]) );
+           char    **norms, **ptr;
+   
+           norms = ptr = (char**)DatumGetPointer(
+               FunctionCall3(
+                   &(dict->lexize_info),
+                   PointerGetDatum(dict->dictionary),
+                   PointerGetDatum(lemm),
+                   PointerGetDatum(lenlemm)
+               )
+           );
+           if ( !norms ) /* dictionary doesn't know this lexem */
+               continue;
+
+           while( *ptr ) {
+               hlfinditem(prs,query,*ptr,strlen(*ptr));
+               pfree(*ptr);
+               ptr++;
+           }
+           pfree(norms);
+           break; /* lexem already normalized or is stop word*/
+       }
+   }
+
+   FunctionCall1(
+       &(prsobj->end_info),
+       PointerGetDatum(prsobj->prs)
+   );
+}
+
+text* 
+genhl(HLPRSTEXT * prs) {
+   text *out;
+   int len=128;
+   char *ptr;
+   HLWORD  *wrd=prs->words;
+
+   out = (text*)palloc( len );
+   ptr=((char*)out) + VARHDRSZ;
+
+   while( wrd - prs->words < prs->curwords ) {
+       while (  wrd->len + prs->stopsellen + prs->startsellen + (ptr - ((char*)out)) >= len ) {
+           int dist = ptr - ((char*)out);
+           len*= 2;
+           out = (text *) repalloc(out, len);
+           ptr=((char*)out) + dist;
+       }
+
+       if ( wrd->in && !wrd->skip && !wrd->repeated ) {
+           if ( wrd->replace ) {
+               *ptr=' ';
+               ptr++;
+           } else {
+               if (wrd->selected) {
+                   memcpy(ptr,prs->startsel,prs->startsellen);
+                   ptr+=prs->startsellen;
+               }
+               memcpy(ptr,wrd->word,wrd->len);
+               ptr+=wrd->len;
+               if (wrd->selected) {
+                   memcpy(ptr,prs->stopsel,prs->stopsellen);
+                   ptr+=prs->stopsellen;
+               }
+           }
+       }
+
+       if ( !wrd->repeated )
+           pfree(wrd->word);
+
+       wrd++;
+   }
+
+   VARATT_SIZEP(out)=ptr - ((char*)out);
+   return out; 
+}
+
+int  
+get_currcfg(void) {
+   Oid arg[1]={ TEXTOID };
+   const char *curlocale;
+   Datum pars[1];
+   bool isnull;
+   int stat;
+
+   if ( current_cfg_id > 0 )
+       return current_cfg_id;
+
+   SPI_connect();
+   if ( !plan_getcfg_bylocale ) {
+       plan_getcfg_bylocale=SPI_saveplan( SPI_prepare( "select oid from pg_ts_cfg where locale = $1 ", 1, arg ) );
+       if ( !plan_getcfg_bylocale )
+           elog(ERROR, "SPI_prepare() failed");
+   }
+
+   curlocale = setlocale(LC_CTYPE, NULL);
+   pars[0] = PointerGetDatum( char2text((char*)curlocale) );
+   stat = SPI_execp(plan_getcfg_bylocale, pars, " ", 1);
+
+   if ( stat < 0 )
+       elog (ERROR, "SPI_execp return %d", stat);
+   if ( SPI_processed > 0 )
+       current_cfg_id = DatumGetObjectId( SPI_getbinval(SPI_tuptable->vals[0], SPI_tuptable->tupdesc, 1, &isnull) );
+   else 
+       elog(ERROR,"Can't find tsearch config by locale");
+
+   pfree(DatumGetPointer(pars[0]));
+   SPI_finish();
+   return current_cfg_id;
+}
+
+PG_FUNCTION_INFO_V1(set_curcfg);
+Datum set_curcfg(PG_FUNCTION_ARGS);
+Datum
+set_curcfg(PG_FUNCTION_ARGS) {
+        findcfg(PG_GETARG_OID(0));
+        current_cfg_id=PG_GETARG_OID(0);
+        PG_RETURN_VOID();
+}
+                
+PG_FUNCTION_INFO_V1(set_curcfg_byname);
+Datum set_curcfg_byname(PG_FUNCTION_ARGS);
+Datum
+set_curcfg_byname(PG_FUNCTION_ARGS) {
+        text *name=PG_GETARG_TEXT_P(0);
+   
+        DirectFunctionCall1(
+                set_curcfg,
+                ObjectIdGetDatum( name2id_cfg(name) )
+        );
+        PG_FREE_IF_COPY(name, 0);
+        PG_RETURN_VOID();      
+}       
+
+PG_FUNCTION_INFO_V1(show_curcfg);
+Datum show_curcfg(PG_FUNCTION_ARGS);
+Datum
+show_curcfg(PG_FUNCTION_ARGS) {
+   PG_RETURN_OID( get_currcfg() ); 
+}
+
+PG_FUNCTION_INFO_V1(reset_tsearch);
+Datum reset_tsearch(PG_FUNCTION_ARGS);
+Datum
+reset_tsearch(PG_FUNCTION_ARGS) {
+   ts_error(NOTICE,"TSearch cache cleaned");
+   PG_RETURN_VOID(); 
+}


diff --git a/contrib/tsearch2/ts_cfg.h b/contrib/tsearch2/ts_cfg.h

new file mode 100644 (file)

index 0000000..01006c1


--- /dev/null
+++ b/contrib/tsearch2/ts_cfg.h
@@ -0,0 +1,68 @@
+#ifndef __TS_CFG_H__
+#define __TS_CFG_H__
+#include "postgres.h"
+#include "query.h"
+
+typedef struct {
+   int len;
+   Datum   *dict_id;
+} ListDictionary;
+
+typedef struct {
+   Oid id;
+   Oid prs_id;
+   int len;
+   ListDictionary  *map;   
+}  TSCfgInfo;
+
+Oid name2id_cfg(text *name);
+TSCfgInfo * findcfg(Oid id);
+void init_cfg(Oid id, TSCfgInfo *cfg);
+void reset_cfg(void);
+
+typedef struct {
+        uint16          len;
+   union {
+       uint16      pos;
+       uint16      *apos;
+   } pos;
+        char       *word;
+   uint32  alen;
+}       WORD;
+   
+typedef struct {
+        WORD       *words;
+        int4            lenwords;
+        int4            curwords;
+   int4        pos;
+}       PRSTEXT;
+
+typedef struct {
+        uint16    len;
+   uint8    selected:1,
+         in:1,
+         skip:1,
+         replace:1,
+         repeated:1;
+   uint8   type;
+        char      *word;
+   ITEM      *item;
+}       HLWORD;
+   
+typedef struct {
+        HLWORD       *words;
+        int4            lenwords;
+        int4            curwords;
+        char           *startsel;
+        char            *stopsel;
+        int2            startsellen;
+        int2            stopsellen;
+}       HLPRSTEXT;
+
+void hlparsetext(TSCfgInfo *cfg, HLPRSTEXT * prs, QUERYTYPE *query, char *buf, int4 buflen);
+text* genhl(HLPRSTEXT * prs);
+
+void parsetext_v2(TSCfgInfo *cfg, PRSTEXT * prs, char *buf, int4 buflen);
+int  get_currcfg(void);
+
+#endif


diff --git a/contrib/tsearch2/ts_stat.c b/contrib/tsearch2/ts_stat.c

new file mode 100644 (file)

index 0000000..9099981


--- /dev/null
+++ b/contrib/tsearch2/ts_stat.c
@@ -0,0 +1,412 @@
+/*
+ * stat functions
+ */
+
+#include "tsvector.h"
+#include "ts_stat.h"
+#include "funcapi.h"
+#include "catalog/pg_type.h"
+#include "executor/spi.h"
+#include "common.h"
+
+PG_FUNCTION_INFO_V1(tsstat_in);
+Datum           tsstat_in(PG_FUNCTION_ARGS);
+Datum           
+tsstat_in(PG_FUNCTION_ARGS) {
+   tsstat *stat=palloc(STATHDRSIZE);
+   stat->len=STATHDRSIZE;
+   stat->size=0;
+   PG_RETURN_POINTER(stat);
+}
+
+PG_FUNCTION_INFO_V1(tsstat_out);
+Datum           tsstat_out(PG_FUNCTION_ARGS);
+Datum           
+tsstat_out(PG_FUNCTION_ARGS) {
+   elog(ERROR,"Unimplemented");
+   PG_RETURN_NULL();
+}
+
+static WordEntry**
+SEI_realloc( WordEntry** in, uint32 *len ) {
+   if ( *len==0 || in==NULL ) {
+       *len=8;
+       in=palloc( sizeof(WordEntry*)* (*len) );
+   } else {
+       *len *= 2;
+       in=repalloc( in, sizeof(WordEntry*)* (*len) );
+   }
+   return in;
+}
+
+static int
+compareStatWord(StatEntry *a, WordEntry *b, tsstat *stat, tsvector *txt) {
+   if ( a->len == b->len ) 
+       return strncmp(
+           STATSTRPTR(stat) + a->pos,
+           STRPTR(txt) + b->pos,
+           a->len
+       );
+   return ( a->len > b->len ) ? 1 : -1;
+}
+
+static tsstat*
+formstat(tsstat *stat, tsvector *txt, WordEntry** entry, uint32 len) {
+   tsstat  *newstat;
+   uint32 totallen, nentry;
+   uint32  slen=0;
+   WordEntry   **ptr=entry;
+   char    *curptr;
+   StatEntry   *sptr,*nptr;
+
+   while(ptr-entry
+       slen += (*ptr)->len;
+       ptr++;
+   }
+
+   nentry=stat->size + len;
+   slen+=STATSTRSIZE(stat);
+   totallen=CALCSTATSIZE(nentry,slen);
+   newstat=palloc(totallen);
+   newstat->len=totallen;
+   newstat->size=nentry;
+
+   memcpy(STATSTRPTR(newstat), STATSTRPTR(stat), STATSTRSIZE(stat));
+   curptr=STATSTRPTR(newstat) + STATSTRSIZE(stat);
+
+   ptr=entry;
+   sptr=STATPTR(stat);
+   nptr=STATPTR(newstat);
+
+   if ( len == 1 ) {
+       StatEntry *StopLow = STATPTR(stat);
+       StatEntry *StopHigh = (StatEntry*)STATSTRPTR(stat);
+
+       while (StopLow < StopHigh) {
+           sptr=StopLow + (StopHigh - StopLow) / 2;
+           if ( compareStatWord(sptr,*ptr,stat,txt) < 0 )
+               StopLow = sptr + 1;
+           else
+               StopHigh = sptr; 
+       }
+       nptr =STATPTR(newstat) + (StopLow-STATPTR(stat));
+       memcpy( STATPTR(newstat), STATPTR(stat), sizeof(StatEntry) * (StopLow-STATPTR(stat)) );
+       nptr->nentry=POSDATALEN(txt,*ptr);
+       if ( nptr->nentry==0 )
+               nptr->nentry=1; 
+       nptr->ndoc=1;
+       nptr->len=(*ptr)->len;
+       memcpy(curptr, STRPTR(txt) + (*ptr)->pos, nptr->len);
+       nptr->pos = curptr - STATSTRPTR(newstat);
+       memcpy( nptr+1, StopLow, sizeof(StatEntry) * ( ((StatEntry*)STATSTRPTR(stat))-StopLow ) );
+   } else {
+       while( sptr-STATPTR(stat) < stat->size && ptr-entry
+           if ( compareStatWord(sptr,*ptr,stat,txt) < 0 ) {
+               memcpy(nptr, sptr, sizeof(StatEntry));
+               sptr++;
+           } else {
+               nptr->nentry=POSDATALEN(txt,*ptr);
+               if ( nptr->nentry==0 )
+                   nptr->nentry=1; 
+               nptr->ndoc=1;
+               nptr->len=(*ptr)->len;
+               memcpy(curptr, STRPTR(txt) + (*ptr)->pos, nptr->len);
+               nptr->pos = curptr - STATSTRPTR(newstat);
+               curptr += nptr->len;
+               ptr++;
+           }
+           nptr++;
+       }
+
+       memcpy( nptr, sptr, sizeof(StatEntry)*( stat->size - (sptr-STATPTR(stat)) ) ); 
+       
+       while(ptr-entry
+           nptr->nentry=POSDATALEN(txt,*ptr);
+           if ( nptr->nentry==0 )
+               nptr->nentry=1; 
+           nptr->ndoc=1;
+           nptr->len=(*ptr)->len;
+           memcpy(curptr, STRPTR(txt) + (*ptr)->pos, nptr->len);
+           nptr->pos = curptr - STATSTRPTR(newstat);
+           curptr += nptr->len;
+           ptr++; nptr++;
+       }
+   }
+
+   return newstat;
+} 
+
+PG_FUNCTION_INFO_V1(ts_accum);
+Datum           ts_accum(PG_FUNCTION_ARGS);
+Datum 
+ts_accum(PG_FUNCTION_ARGS) {
+   tsstat *newstat,*stat= (tsstat*)PG_GETARG_POINTER(0);
+   tsvector  *txt = (tsvector *) PG_DETOAST_DATUM(PG_GETARG_DATUM(1));
+   WordEntry   **newentry=NULL;
+   uint32  len=0, cur=0;
+   StatEntry   *sptr;
+   WordEntry   *wptr;
+
+   if ( stat==NULL || PG_ARGISNULL(0) ) { /* Init in first */ 
+       stat=palloc(STATHDRSIZE);
+       stat->len=STATHDRSIZE;
+       stat->size=0;
+   }
+
+   /* simple check of correctness */
+   if ( txt==NULL || PG_ARGISNULL(1) || txt->size==0 ) {
+       PG_FREE_IF_COPY(txt,1); 
+       PG_RETURN_POINTER(stat);
+   }
+
+   sptr=STATPTR(stat);
+   wptr=ARRPTR(txt);
+
+   if ( stat->size < 100*txt->size ) { /* merge */
+       while( sptr-STATPTR(stat) < stat->size && wptr-ARRPTR(txt) < txt->size ) {
+           int cmp = compareStatWord(sptr,wptr,stat,txt);
+           if ( cmp<0 ) {
+               sptr++;
+           } else if ( cmp==0 ) {
+               int n=POSDATALEN(txt,wptr);
+   
+               if (n==0) n=1;
+               sptr->ndoc++;
+               sptr->nentry +=n ;
+               sptr++; wptr++;
+           } else {
+               if ( cur==len )
+                   newentry=SEI_realloc(newentry, &len);
+               newentry[cur]=wptr;
+               wptr++; cur++;
+           }
+       }
+
+       while( wptr-ARRPTR(txt) < txt->size ) {
+           if ( cur==len )
+               newentry=SEI_realloc(newentry, &len);
+           newentry[cur]=wptr;
+           wptr++; cur++;
+       }
+   } else { /* search */
+       while( wptr-ARRPTR(txt) < txt->size ) {
+           StatEntry *StopLow = STATPTR(stat);
+           StatEntry *StopHigh = (StatEntry*)STATSTRPTR(stat);
+           int cmp;
+
+           while (StopLow < StopHigh) {
+               sptr=StopLow + (StopHigh - StopLow) / 2;
+               cmp =  compareStatWord(sptr,wptr,stat,txt);
+               if (cmp==0) {
+                   int n=POSDATALEN(txt,wptr);
+                   if (n==0) n=1;
+                   sptr->ndoc++;
+                   sptr->nentry +=n ;
+                   break;
+               } else if ( cmp < 0 )
+                   StopLow = sptr + 1;
+               else
+                   StopHigh = sptr; 
+           }
+       
+           if ( StopLow >= StopHigh ) { /* not found */
+               if ( cur==len )
+                   newentry=SEI_realloc(newentry, &len);
+               newentry[cur]=wptr;
+               cur++;
+           }
+           wptr++;
+       }   
+   }
+
+   
+   if ( cur==0 ) { /* no new words */ 
+       PG_FREE_IF_COPY(txt,1);
+       PG_RETURN_POINTER(stat);
+   }
+
+   newstat = formstat(stat, txt, newentry, cur);
+   pfree(newentry);
+   PG_FREE_IF_COPY(txt,1);
+   /* pfree(stat); */
+
+   PG_RETURN_POINTER(newstat);
+}
+
+typedef struct {
+   uint32  cur;
+   tsvector *stat;
+} StatStorage;
+
+static void
+ts_setup_firstcall(FuncCallContext  *funcctx, tsstat *stat) {
+   TupleDesc            tupdesc;
+   MemoryContext     oldcontext;
+   StatStorage     *st;
+   
+   oldcontext = MemoryContextSwitchTo(funcctx->multi_call_memory_ctx);
+   st=palloc( sizeof(StatStorage) );
+   st->cur=0;
+   st->stat=palloc( stat->len );
+   memcpy(st->stat, stat, stat->len);
+   funcctx->user_fctx = (void*)st;
+   tupdesc = RelationNameGetTupleDesc("statinfo");
+   funcctx->slot = TupleDescGetSlot(tupdesc);
+   funcctx->attinmeta = TupleDescGetAttInMetadata(tupdesc);
+   MemoryContextSwitchTo(oldcontext);
+}
+
+
+static Datum
+ts_process_call(FuncCallContext  *funcctx) {
+   StatStorage     *st;
+   st=(StatStorage*)funcctx->user_fctx;
+
+   if ( st->cur < st->stat->size ) {
+       Datum result;
+       char* values[3];
+       char    ndoc[16];
+       char    nentry[16];
+       StatEntry *entry=STATPTR(st->stat) + st->cur;
+       HeapTuple    tuple;
+
+       values[1]=ndoc;
+       sprintf(ndoc,"%d",entry->ndoc);
+       values[2]=nentry;
+       sprintf(nentry,"%d",entry->nentry);
+       values[0]=palloc( entry->len+1 );
+       memcpy( values[0], STATSTRPTR(st->stat)+entry->pos, entry->len);
+       (values[0])[entry->len]='\0';
+
+       tuple = BuildTupleFromCStrings(funcctx->attinmeta, values);
+       result = TupleGetDatum(funcctx->slot, tuple);
+
+       pfree(values[0]);
+       st->cur++;
+       return result;  
+   } else {
+       pfree(st->stat);
+       pfree(st);
+   }
+   
+   return (Datum)0;
+}
+
+PG_FUNCTION_INFO_V1(ts_accum_finish);
+Datum           ts_accum_finish(PG_FUNCTION_ARGS);
+Datum 
+ts_accum_finish(PG_FUNCTION_ARGS) {
+   FuncCallContext  *funcctx;
+   Datum result;
+
+   if (SRF_IS_FIRSTCALL()) {
+       funcctx = SRF_FIRSTCALL_INIT();
+       ts_setup_firstcall(funcctx, (tsstat*)PG_GETARG_POINTER(0) );
+   }
+
+   funcctx = SRF_PERCALL_SETUP();
+   if (  (result=ts_process_call(funcctx)) != (Datum)0 )
+       SRF_RETURN_NEXT(funcctx, result);
+   SRF_RETURN_DONE(funcctx);
+}
+
+static Oid tiOid=InvalidOid;
+static void 
+get_ti_Oid(void) {
+   int ret;
+   bool isnull; 
+
+   if ( (ret = SPI_exec("select oid from pg_type where typname='tsvector'",1)) < 0 )   
+       elog(ERROR, "SPI_exec to get tsvector oid returns %d", ret);
+
+   if ( SPI_processed<0 )
+       elog(ERROR, "There is no tsvector type");
+   tiOid = DatumGetObjectId( SPI_getbinval(SPI_tuptable->vals[0], SPI_tuptable->tupdesc, 1, &isnull) );
+   if ( tiOid==InvalidOid )
+       elog(ERROR, "tsvector type has InvalidOid");
+}
+
+static tsstat*
+ts_stat_sql(text *txt) {
+   char *query=text2char(txt);
+   int i;
+   tsstat *newstat,*stat;
+   bool isnull;
+   Portal portal;
+   void    *plan;
+
+   if ( tiOid==InvalidOid ) 
+       get_ti_Oid();
+
+   if ( (plan = SPI_prepare(query,0,NULL))==NULL )
+       elog(ERROR, "SPI_prepare('%s') returns NULL",query);
+
+   if ( (portal = SPI_cursor_open(NULL, plan, NULL, NULL)) == NULL )
+       elog(ERROR, "SPI_cursor_open('%s') returns NULL",query);
+
+   SPI_cursor_fetch(portal, true, 100);
+
+   if ( SPI_tuptable->tupdesc->natts != 1 )
+       elog(ERROR, "Number of fields doesn't equal to 1");
+
+   if ( SPI_gettypeid(SPI_tuptable->tupdesc, 1) != tiOid )
+       elog(ERROR, "Column isn't of tsvector type");
+
+   stat=palloc(STATHDRSIZE);
+   stat->len=STATHDRSIZE;
+   stat->size=0;
+
+   while(SPI_processed>0) {
+       for(i=0;i
+           Datum data=SPI_getbinval(SPI_tuptable->vals[i], SPI_tuptable->tupdesc, 1, &isnull);
+
+           if ( !isnull ) {
+               newstat = (tsstat*)DatumGetPointer(DirectFunctionCall2(
+                   ts_accum,
+                   PointerGetDatum(stat),
+                   data
+               ));
+               if ( stat!=newstat && stat )
+                   pfree(stat);
+               stat=newstat;
+           }
+       } 
+
+       SPI_freetuptable(SPI_tuptable);
+       SPI_cursor_fetch(portal, true, 100);        
+   }   
+
+   SPI_freetuptable(SPI_tuptable);
+   SPI_cursor_close(portal);
+   SPI_freeplan(plan);
+   pfree(query);
+
+   return stat;    
+}
+
+PG_FUNCTION_INFO_V1(ts_stat);
+Datum           ts_stat(PG_FUNCTION_ARGS);
+Datum 
+ts_stat(PG_FUNCTION_ARGS) {
+   FuncCallContext  *funcctx;
+   Datum result;
+
+   if (SRF_IS_FIRSTCALL()) {
+       tsstat *stat;
+       text    *txt=PG_GETARG_TEXT_P(0);
+   
+       funcctx = SRF_FIRSTCALL_INIT();
+       SPI_connect();
+       stat = ts_stat_sql(txt);
+       PG_FREE_IF_COPY(txt,0); 
+       ts_setup_firstcall(funcctx, stat );
+       SPI_finish();
+   }
+
+   funcctx = SRF_PERCALL_SETUP();
+   if (  (result=ts_process_call(funcctx)) != (Datum)0 )
+       SRF_RETURN_NEXT(funcctx, result);
+   SRF_RETURN_DONE(funcctx);
+}
+
+


diff --git a/contrib/tsearch2/ts_stat.h b/contrib/tsearch2/ts_stat.h

new file mode 100644 (file)

index 0000000..c32b17a


--- /dev/null
+++ b/contrib/tsearch2/ts_stat.h
@@ -0,0 +1,32 @@
+#ifndef __TXTIDX_STAT_H__
+#define __TXTIDX_STAT_H__
+
+#include "postgres.h"
+
+#include "access/gist.h"
+#include "access/itup.h"
+#include "utils/elog.h"
+#include "utils/palloc.h"
+#include "utils/builtins.h"
+#include "storage/bufpage.h"
+
+typedef struct {
+   uint32  len;
+   uint32  pos;
+   uint32  ndoc;   
+   uint32  nentry; 
+}  StatEntry;
+
+typedef struct {
+   int4        len;
+   int4        size;
+   char        data[1];
+}  tsstat;
+
+#define STATHDRSIZE (sizeof(int4)*2)
+#define CALCSTATSIZE(x, lenstr) ( x * sizeof(StatEntry) + STATHDRSIZE + lenstr )
+#define STATPTR(x) ( (StatEntry*) ( (char*)x + STATHDRSIZE ) )
+#define STATSTRPTR(x)  ( (char*)x + STATHDRSIZE + ( sizeof(StatEntry) * ((tsvector*)x)->size ) )
+#define STATSTRSIZE(x) ( ((tsvector*)x)->len - STATHDRSIZE - ( sizeof(StatEntry) * ((tsvector*)x)->size ) )
+
+#endif


diff --git a/contrib/tsearch2/tsearch.sql._in b/contrib/tsearch2/tsearch.sql._in

new file mode 100644 (file)

index 0000000..91ffbc8


--- /dev/null
+++ b/contrib/tsearch2/tsearch.sql._in
@@ -0,0 +1,674 @@
+-- Adjust this setting to control where the objects get CREATEd.
+SET search_path = public;
+
+BEGIN;
+
+--dict conf
+CREATE TABLE pg_ts_dict (
+   dict_name   text not null primary key,
+   dict_init   oid,
+   dict_initoption text,
+   dict_lexize oid not null,
+   dict_comment    text
+) with oids;
+
+--dict interface
+CREATE FUNCTION lexize(oid, text) 
+   returns _text
+   as 'MODULE_PATHNAME'
+   language 'C'
+   with (isstrict);
+
+CREATE FUNCTION lexize(text, text)
+        returns _text
+        as 'MODULE_PATHNAME', 'lexize_byname'
+        language 'C'
+        with (isstrict);
+
+CREATE FUNCTION lexize(text)
+        returns _text
+        as 'MODULE_PATHNAME', 'lexize_bycurrent'
+        language 'C'
+        with (isstrict);
+
+CREATE FUNCTION set_curdict(int)
+   returns void
+   as 'MODULE_PATHNAME'
+   language 'C'
+   with (isstrict);
+
+CREATE FUNCTION set_curdict(text)
+   returns void
+   as 'MODULE_PATHNAME', 'set_curdict_byname'
+   language 'C'
+   with (isstrict);
+
+--built-in dictionaries
+CREATE FUNCTION dex_init(text)
+   returns internal
+   as 'MODULE_PATHNAME' 
+   language 'C';
+
+CREATE FUNCTION dex_lexize(internal,internal,int4)
+   returns internal
+   as 'MODULE_PATHNAME'
+   language 'C'
+   with (isstrict);
+
+insert into pg_ts_dict select 
+   'simple', 
+   (select oid from pg_proc where proname='dex_init'),
+   null,
+   (select oid from pg_proc where proname='dex_lexize'),
+   'Simple example of dictionary.'
+;
+    
+CREATE FUNCTION snb_en_init(text)
+   returns internal
+   as 'MODULE_PATHNAME' 
+   language 'C';
+
+CREATE FUNCTION snb_lexize(internal,internal,int4)
+   returns internal
+   as 'MODULE_PATHNAME'
+   language 'C'
+   with (isstrict);
+
+insert into pg_ts_dict select 
+   'en_stem', 
+   (select oid from pg_proc where proname='snb_en_init'),
+   'DATA_PATH/english.stop',
+   (select oid from pg_proc where proname='snb_lexize'),
+   'English Stemmer. Snowball.'
+;
+
+CREATE FUNCTION snb_ru_init(text)
+   returns internal
+   as 'MODULE_PATHNAME' 
+   language 'C';
+
+insert into pg_ts_dict select 
+   'ru_stem', 
+   (select oid from pg_proc where proname='snb_ru_init'),
+   'DATA_PATH/russian.stop',
+   (select oid from pg_proc where proname='snb_lexize'),
+   'Russian Stemmer. Snowball.'
+;
+    
+CREATE FUNCTION spell_init(text)
+   returns internal
+   as 'MODULE_PATHNAME' 
+   language 'C';
+
+CREATE FUNCTION spell_lexize(internal,internal,int4)
+   returns internal
+   as 'MODULE_PATHNAME'
+   language 'C'
+   with (isstrict);
+
+insert into pg_ts_dict select 
+   'ispell_template', 
+   (select oid from pg_proc where proname='spell_init'),
+   null,
+   (select oid from pg_proc where proname='spell_lexize'),
+   'ISpell interface. Must have .dict and .aff files'
+;
+
+CREATE FUNCTION syn_init(text)
+   returns internal
+   as 'MODULE_PATHNAME' 
+   language 'C';
+
+CREATE FUNCTION syn_lexize(internal,internal,int4)
+   returns internal
+   as 'MODULE_PATHNAME'
+   language 'C'
+   with (isstrict);
+
+insert into pg_ts_dict select 
+   'synonym', 
+   (select oid from pg_proc where proname='syn_init'),
+   null,
+   (select oid from pg_proc where proname='syn_lexize'),
+   'Example of synonym dictionary'
+;
+
+--dict conf
+CREATE TABLE pg_ts_parser (
+   prs_name    text not null primary key,
+   prs_start   oid not null,
+   prs_nexttoken   oid not null,
+   prs_end     oid not null,
+   prs_headline    oid not null,
+   prs_lextype oid not null,
+   prs_comment text
+) with oids;
+
+--sql-level interface
+CREATE TYPE tokentype 
+   as (tokid int4, alias text, descr text); 
+
+CREATE FUNCTION token_type(int4)
+   returns setof tokentype
+   as 'MODULE_PATHNAME'
+   language 'C'
+   with (isstrict);
+
+CREATE FUNCTION token_type(text)
+   returns setof tokentype
+   as 'MODULE_PATHNAME', 'token_type_byname'
+   language 'C'
+   with (isstrict);
+
+CREATE FUNCTION token_type()
+   returns setof tokentype
+   as 'MODULE_PATHNAME', 'token_type_current'
+   language 'C'
+   with (isstrict);
+
+CREATE FUNCTION set_curprs(int)
+   returns void
+   as 'MODULE_PATHNAME'
+   language 'C'
+   with (isstrict);
+
+CREATE FUNCTION set_curprs(text)
+   returns void
+   as 'MODULE_PATHNAME', 'set_curprs_byname'
+   language 'C'
+   with (isstrict);
+
+CREATE TYPE tokenout 
+   as (tokid int4, token text);
+
+CREATE FUNCTION parse(oid,text)
+   returns setof tokenout
+   as 'MODULE_PATHNAME'
+   language 'C'
+   with (isstrict);
+ 
+CREATE FUNCTION parse(text,text)
+   returns setof tokenout
+   as 'MODULE_PATHNAME', 'parse_byname'
+   language 'C'
+   with (isstrict);
+ 
+CREATE FUNCTION parse(text)
+   returns setof tokenout
+   as 'MODULE_PATHNAME', 'parse_current'
+   language 'C'
+   with (isstrict);
+ 
+--default parser
+CREATE FUNCTION prsd_start(internal,int4)
+   returns internal
+   as 'MODULE_PATHNAME'
+   language 'C';
+
+CREATE FUNCTION prsd_getlexeme(internal,internal,internal)
+   returns int4
+   as 'MODULE_PATHNAME'
+   language 'C';
+
+CREATE FUNCTION prsd_end(internal)
+   returns void
+   as 'MODULE_PATHNAME'
+   language 'C';
+
+CREATE FUNCTION prsd_lextype(internal)
+   returns internal
+   as 'MODULE_PATHNAME'
+   language 'C';
+
+CREATE FUNCTION prsd_headline(internal,internal,internal)
+   returns internal
+   as 'MODULE_PATHNAME'
+   language 'C';
+
+insert into pg_ts_parser select
+   'default',
+   (select oid from pg_proc where proname='prsd_start'),   
+   (select oid from pg_proc where proname='prsd_getlexeme'),   
+   (select oid from pg_proc where proname='prsd_end'), 
+   (select oid from pg_proc where proname='prsd_headline'),
+   (select oid from pg_proc where proname='prsd_lextype'),
+   'Parser from OpenFTS v0.34'
+;  
+
+--tsearch config
+
+CREATE TABLE pg_ts_cfg (
+   ts_name     text not null primary key,
+   prs_name    text not null,
+   locale      text
+) with oids;
+
+CREATE TABLE pg_ts_cfgmap (
+   ts_name     text not null,
+   tok_alias   text not null,
+   dict_name   text[],
+   primary key (ts_name,tok_alias)
+) with oids;
+
+CREATE FUNCTION set_curcfg(int)
+   returns void
+   as 'MODULE_PATHNAME'
+   language 'C'
+   with (isstrict);
+
+CREATE FUNCTION set_curcfg(text)
+   returns void
+   as 'MODULE_PATHNAME', 'set_curcfg_byname'
+   language 'C'
+   with (isstrict);
+
+CREATE FUNCTION show_curcfg()
+   returns oid
+   as 'MODULE_PATHNAME'
+   language 'C'
+   with (isstrict);
+
+insert into pg_ts_cfg values ('default', 'default','C');
+insert into pg_ts_cfg values ('default_russian', 'default','ru_RU.KOI8-R');
+insert into pg_ts_cfg values ('simple', 'default');
+
+copy pg_ts_cfgmap from stdin;
+default    lword   {en_stem}
+default    nlword  {simple}
+default    word    {simple}
+default    email   {simple}
+default    url {simple}
+default    host    {simple}
+default    sfloat  {simple}
+default    version {simple}
+default    part_hword  {simple}
+default    nlpart_hword    {simple}
+default    lpart_hword {en_stem}
+default    hword   {simple}
+default    lhword  {en_stem}
+default    nlhword {simple}
+default    uri {simple}
+default    file    {simple}
+default    float   {simple}
+default    int {simple}
+default    uint    {simple}
+default_russian    lword   {en_stem}
+default_russian    nlword  {ru_stem}
+default_russian    word    {ru_stem}
+default_russian    email   {simple}
+default_russian    url {simple}
+default_russian    host    {simple}
+default_russian    sfloat  {simple}
+default_russian    version {simple}
+default_russian    part_hword  {simple}
+default_russian    nlpart_hword    {ru_stem}
+default_russian    lpart_hword {en_stem}
+default_russian    hword   {ru_stem}
+default_russian    lhword  {en_stem}
+default_russian    nlhword {ru_stem}
+default_russian    uri {simple}
+default_russian    file    {simple}
+default_russian    float   {simple}
+default_russian    int {simple}
+default_russian    uint    {simple}
+simple lword   {simple}
+simple nlword  {simple}
+simple word    {simple}
+simple email   {simple}
+simple url {simple}
+simple host    {simple}
+simple sfloat  {simple}
+simple version {simple}
+simple part_hword  {simple}
+simple nlpart_hword    {simple}
+simple lpart_hword {simple}
+simple hword   {simple}
+simple lhword  {simple}
+simple nlhword {simple}
+simple uri {simple}
+simple file    {simple}
+simple float   {simple}
+simple int {simple}
+simple uint    {simple}
+\.
+
+--tsvector type
+CREATE FUNCTION tsvector_in(cstring)
+RETURNS tsvector
+AS 'MODULE_PATHNAME'
+LANGUAGE 'C' with (isstrict);
+
+CREATE FUNCTION tsvector_out(tsvector)
+RETURNS cstring
+AS 'MODULE_PATHNAME'
+LANGUAGE 'C' with (isstrict);
+
+CREATE TYPE tsvector (
+        INTERNALLENGTH = -1,
+        INPUT = tsvector_in,
+        OUTPUT = tsvector_out,
+        STORAGE = extended
+);
+
+CREATE FUNCTION length(tsvector)
+RETURNS int4
+AS 'MODULE_PATHNAME', 'tsvector_length'
+LANGUAGE 'C' with (isstrict,iscachable);
+
+CREATE FUNCTION to_tsvector(oid, text)
+RETURNS tsvector
+AS 'MODULE_PATHNAME'
+LANGUAGE 'C' with (isstrict,iscachable);
+
+CREATE FUNCTION to_tsvector(text, text)
+RETURNS tsvector
+AS 'MODULE_PATHNAME', 'to_tsvector_name'
+LANGUAGE 'C' with (isstrict,iscachable);
+
+CREATE FUNCTION to_tsvector(text)
+RETURNS tsvector
+AS 'MODULE_PATHNAME', 'to_tsvector_current'
+LANGUAGE 'C' with (isstrict,iscachable);
+
+CREATE FUNCTION strip(tsvector)
+RETURNS tsvector
+AS 'MODULE_PATHNAME'
+LANGUAGE 'C' with (isstrict,iscachable);
+
+CREATE FUNCTION setweight(tsvector,"char")
+RETURNS tsvector
+AS 'MODULE_PATHNAME'
+LANGUAGE 'C' with (isstrict,iscachable);
+
+CREATE FUNCTION concat(tsvector,tsvector)
+RETURNS tsvector
+AS 'MODULE_PATHNAME'
+LANGUAGE 'C' with (isstrict,iscachable);
+
+CREATE OPERATOR || (
+        LEFTARG = tsvector,
+        RIGHTARG = tsvector,
+        PROCEDURE = concat
+);
+
+--query type
+CREATE FUNCTION tsquery_in(cstring)
+RETURNS tsquery
+AS 'MODULE_PATHNAME'
+LANGUAGE 'C' with (isstrict);
+
+CREATE FUNCTION tsquery_out(tsquery)
+RETURNS cstring
+AS 'MODULE_PATHNAME'
+LANGUAGE 'C' with (isstrict);
+
+CREATE TYPE tsquery (
+        INTERNALLENGTH = -1,
+        INPUT = tsquery_in,
+        OUTPUT = tsquery_out
+);
+
+CREATE FUNCTION querytree(tsquery)
+RETURNS text
+AS 'MODULE_PATHNAME', 'tsquerytree'
+LANGUAGE 'C' with (isstrict);
+
+CREATE FUNCTION to_tsquery(oid, text)
+RETURNS tsquery
+AS 'MODULE_PATHNAME'
+LANGUAGE 'c' with (isstrict,iscachable);
+
+CREATE FUNCTION to_tsquery(text, text)
+RETURNS tsquery
+AS 'MODULE_PATHNAME','to_tsquery_name'
+LANGUAGE 'c' with (isstrict,iscachable);
+
+CREATE FUNCTION to_tsquery(text)
+RETURNS tsquery
+AS 'MODULE_PATHNAME','to_tsquery_current'
+LANGUAGE 'c' with (isstrict,iscachable);
+
+--operations
+CREATE FUNCTION exectsq(tsvector, tsquery)
+RETURNS bool
+AS 'MODULE_PATHNAME'
+LANGUAGE 'C' with (isstrict, iscachable);
+  
+COMMENT ON FUNCTION exectsq(tsvector, tsquery) IS 'boolean operation with text index';
+
+CREATE FUNCTION rexectsq(tsquery, tsvector)
+RETURNS bool
+AS 'MODULE_PATHNAME'
+LANGUAGE 'C' with (isstrict, iscachable);
+
+COMMENT ON FUNCTION rexectsq(tsquery, tsvector) IS 'boolean operation with text index';
+
+CREATE OPERATOR @@ (
+        LEFTARG = tsvector,
+        RIGHTARG = tsquery,
+        PROCEDURE = exectsq,
+        COMMUTATOR = '@@',
+        RESTRICT = contsel,
+        JOIN = contjoinsel
+);
+CREATE OPERATOR @@ (
+        LEFTARG = tsquery,
+        RIGHTARG = tsvector,
+        PROCEDURE = rexectsq,
+        COMMUTATOR = '@@',
+        RESTRICT = contsel,
+        JOIN = contjoinsel
+);
+
+--Trigger
+CREATE FUNCTION tsearch2()
+RETURNS trigger
+AS 'MODULE_PATHNAME'
+LANGUAGE 'C';
+
+--Relevation
+CREATE FUNCTION rank(float4[], tsvector, tsquery)
+RETURNS float4
+AS 'MODULE_PATHNAME'
+LANGUAGE 'C' WITH (isstrict, iscachable);
+
+CREATE FUNCTION rank(float4[], tsvector, tsquery, int4)
+RETURNS float4
+AS 'MODULE_PATHNAME'
+LANGUAGE 'C' WITH (isstrict, iscachable);
+
+CREATE FUNCTION rank(tsvector, tsquery)
+RETURNS float4
+AS 'MODULE_PATHNAME', 'rank_def'
+LANGUAGE 'C' WITH (isstrict, iscachable);
+
+CREATE FUNCTION rank(tsvector, tsquery, int4)
+RETURNS float4
+AS 'MODULE_PATHNAME', 'rank_def'
+LANGUAGE 'C' WITH (isstrict, iscachable);
+
+CREATE FUNCTION rank_cd(int4, tsvector, tsquery)
+RETURNS float4
+AS 'MODULE_PATHNAME'
+LANGUAGE 'C' WITH (isstrict, iscachable);
+
+CREATE FUNCTION rank_cd(int4, tsvector, tsquery, int4)
+RETURNS float4
+AS 'MODULE_PATHNAME'
+LANGUAGE 'C' WITH (isstrict, iscachable);
+
+CREATE FUNCTION rank_cd(tsvector, tsquery)
+RETURNS float4
+AS 'MODULE_PATHNAME', 'rank_cd_def'
+LANGUAGE 'C' WITH (isstrict, iscachable);
+
+CREATE FUNCTION rank_cd(tsvector, tsquery, int4)
+RETURNS float4
+AS 'MODULE_PATHNAME', 'rank_cd_def'
+LANGUAGE 'C' WITH (isstrict, iscachable);
+
+CREATE FUNCTION headline(oid, text, tsquery, text)
+RETURNS text
+AS 'MODULE_PATHNAME', 'headline'
+LANGUAGE 'C' WITH (isstrict, iscachable);
+
+CREATE FUNCTION headline(oid, text, tsquery)
+RETURNS text
+AS 'MODULE_PATHNAME', 'headline'
+LANGUAGE 'C' WITH (isstrict, iscachable);
+
+CREATE FUNCTION headline(text, text, tsquery, text)
+RETURNS text
+AS 'MODULE_PATHNAME', 'headline_byname'
+LANGUAGE 'C' WITH (isstrict, iscachable);
+
+CREATE FUNCTION headline(text, text, tsquery)
+RETURNS text
+AS 'MODULE_PATHNAME', 'headline_byname'
+LANGUAGE 'C' WITH (isstrict, iscachable);
+
+CREATE FUNCTION headline(text, tsquery, text)
+RETURNS text
+AS 'MODULE_PATHNAME', 'headline_current'
+LANGUAGE 'C' WITH (isstrict, iscachable);
+
+CREATE FUNCTION headline(text, tsquery)
+RETURNS text
+AS 'MODULE_PATHNAME', 'headline_current'
+LANGUAGE 'C' WITH (isstrict, iscachable);
+
+--GiST
+--GiST key type 
+CREATE FUNCTION gtsvector_in(cstring)
+RETURNS gtsvector
+AS 'MODULE_PATHNAME'
+LANGUAGE 'C' with (isstrict);
+
+CREATE FUNCTION gtsvector_out(gtsvector)
+RETURNS cstring
+AS 'MODULE_PATHNAME'
+LANGUAGE 'C' with (isstrict);
+
+CREATE TYPE gtsvector (
+        INTERNALLENGTH = -1,
+        INPUT = gtsvector_in,
+        OUTPUT = gtsvector_out
+);
+
+-- support FUNCTIONs
+CREATE FUNCTION gtsvector_consistent(gtsvector,internal,int4)
+RETURNS bool
+AS 'MODULE_PATHNAME'
+LANGUAGE 'C';
+  
+CREATE FUNCTION gtsvector_compress(internal)
+RETURNS internal
+AS 'MODULE_PATHNAME'
+LANGUAGE 'C';
+
+CREATE FUNCTION gtsvector_decompress(internal)
+RETURNS internal
+AS 'MODULE_PATHNAME'
+LANGUAGE 'C';
+
+CREATE FUNCTION gtsvector_penalty(internal,internal,internal)
+RETURNS internal
+AS 'MODULE_PATHNAME'
+LANGUAGE 'C' with (isstrict);
+
+CREATE FUNCTION gtsvector_picksplit(internal, internal)
+RETURNS internal
+AS 'MODULE_PATHNAME'
+LANGUAGE 'C';
+
+CREATE FUNCTION gtsvector_union(bytea, internal)
+RETURNS _int4
+AS 'MODULE_PATHNAME'
+LANGUAGE 'C';
+
+CREATE FUNCTION gtsvector_same(gtsvector, gtsvector, internal)
+RETURNS internal
+AS 'MODULE_PATHNAME'
+LANGUAGE 'C';
+
+-- CREATE the OPERATOR class
+CREATE OPERATOR CLASS gist_tsvector_ops
+DEFAULT FOR TYPE tsvector USING gist
+AS
+        OPERATOR        1       @@ (tsvector, tsquery)  RECHECK ,
+        FUNCTION        1       gtsvector_consistent (gtsvector, internal, int4),
+        FUNCTION        2       gtsvector_union (bytea, internal),
+        FUNCTION        3       gtsvector_compress (internal),
+        FUNCTION        4       gtsvector_decompress (internal),
+        FUNCTION        5       gtsvector_penalty (internal, internal, internal),
+        FUNCTION        6       gtsvector_picksplit (internal, internal),
+        FUNCTION        7       gtsvector_same (gtsvector, gtsvector, internal),
+        STORAGE         gtsvector;
+
+
+--stat info
+CREATE TYPE statinfo 
+   as (word text, ndoc int4, nentry int4);
+
+--REATE FUNCTION tsstat_in(cstring)
+--RETURNS tsstat
+--AS 'MODULE_PATHNAME'
+--LANGUAGE 'C' with (isstrict);
+--
+--CREATE FUNCTION tsstat_out(tsstat)
+--RETURNS cstring
+--AS 'MODULE_PATHNAME'
+--LANGUAGE 'C' with (isstrict);
+--
+--CREATE TYPE tsstat (
+--        INTERNALLENGTH = -1,
+--        INPUT = tsstat_in,
+--        OUTPUT = tsstat_out,
+--        STORAGE = plain
+--);
+--
+--CREATE FUNCTION ts_accum(tsstat,tsvector)
+--RETURNS tsstat
+--AS 'MODULE_PATHNAME'
+--LANGUAGE 'C' with (isstrict);
+--
+--CREATE FUNCTION ts_accum_finish(tsstat)
+-- returns setof statinfo
+-- as 'MODULE_PATHNAME'
+-- language 'C'
+-- with (isstrict);
+--
+--CREATE AGGREGATE stat (
+-- BASETYPE=tsvector,
+-- SFUNC=ts_accum,
+-- STYPE=tsstat,
+-- FINALFUNC = ts_accum_finish,
+-- initcond = ''
+--); 
+
+CREATE FUNCTION stat(text)
+   returns setof statinfo
+   as 'MODULE_PATHNAME', 'ts_stat'
+   language 'C'
+   with (isstrict);
+
+--reset - just for debuging
+CREATE FUNCTION reset_tsearch()
+        returns void
+        as 'MODULE_PATHNAME'
+        language 'C'
+        with (isstrict);
+
+--get cover (debug for rank_cd)
+CREATE FUNCTION get_covers(tsvector,tsquery)
+        returns text
+        as 'MODULE_PATHNAME'
+        language 'C'
+        with (isstrict);
+
+
+--example of ISpell dictionary
+--update pg_ts_dict set dict_initoption='DictFile="/usr/local/share/ispell/russian.dict" ,AffFile ="/usr/local/share/ispell/russian.aff", StopFile="/usr/local/share/ispell/russian.stop"' where dict_id=4;
+--example of synonym dict
+--update pg_ts_dict set dict_initoption='/usr/local/share/ispell/english.syn' where dict_id=5;
+END;


diff --git a/contrib/tsearch2/tsvector.c b/contrib/tsearch2/tsvector.c

new file mode 100644 (file)

index 0000000..ff0794d


--- /dev/null
+++ b/contrib/tsearch2/tsvector.c
@@ -0,0 +1,804 @@
+/*
+ * In/Out definitions for tsvector type
+ * Internal structure:
+ * string of values, array of position lexem in string and it's length
+ * Teodor Sigaev 
+ */
+#include "postgres.h"
+
+#include "access/gist.h"
+#include "access/itup.h"
+#include "utils/elog.h"
+#include "utils/palloc.h"
+#include "utils/builtins.h"
+#include "storage/bufpage.h"
+#include "executor/spi.h"
+#include "commands/trigger.h"
+#include "nodes/pg_list.h"
+#include "catalog/namespace.h"
+
+#include "utils/pg_locale.h"
+
+#include              /* tolower */
+#include "tsvector.h"
+#include "query.h"
+#include "ts_cfg.h"
+#include "common.h"
+
+PG_FUNCTION_INFO_V1(tsvector_in);
+Datum      tsvector_in(PG_FUNCTION_ARGS);
+
+PG_FUNCTION_INFO_V1(tsvector_out);
+Datum      tsvector_out(PG_FUNCTION_ARGS);
+
+PG_FUNCTION_INFO_V1(to_tsvector);
+Datum      to_tsvector(PG_FUNCTION_ARGS);
+PG_FUNCTION_INFO_V1(to_tsvector_current);
+Datum      to_tsvector_current(PG_FUNCTION_ARGS);
+PG_FUNCTION_INFO_V1(to_tsvector_name);
+Datum      to_tsvector_name(PG_FUNCTION_ARGS);
+
+PG_FUNCTION_INFO_V1(tsearch2);
+Datum      tsearch2(PG_FUNCTION_ARGS);
+
+PG_FUNCTION_INFO_V1(tsvector_length);
+Datum      tsvector_length(PG_FUNCTION_ARGS);
+
+/*
+ * in/out text index type
+ */
+static int 
+comparePos(const void *a, const void *b) {
+   if ( ((WordEntryPos *) a)->pos == ((WordEntryPos *) b)->pos )
+       return 1;
+   return ( ((WordEntryPos *) a)->pos > ((WordEntryPos *) b)->pos ) ? 1 : -1;
+}
+
+static int
+uniquePos(WordEntryPos *a, int4 l) {
+   WordEntryPos *ptr, *res;
+
+   res=a;
+   if (l==1)
+       return l;
+
+   qsort((void *) a, l, sizeof(WordEntryPos), comparePos);
+
+   ptr = a + 1;
+   while (ptr - a < l) {
+       if ( ptr->pos != res->pos ) {
+           res++;
+           res->pos = ptr->pos;
+           res->weight = ptr->weight;
+           if ( res-a >= MAXNUMPOS-1 || res->pos == MAXENTRYPOS-1 )
+               break;
+       } else if ( ptr->weight > res->weight )
+           res->weight = ptr->weight;
+       ptr++;
+   }
+   return res + 1 - a;
+}
+
+static char *BufferStr;
+static int
+compareentry(const void *a, const void *b)
+{
+   if ( ((WordEntryIN *) a)->entry.len == ((WordEntryIN *) b)->entry.len)
+   {
+       return strncmp(
+                      &BufferStr[((WordEntryIN *) a)->entry.pos],
+                      &BufferStr[((WordEntryIN *) b)->entry.pos],
+                      ((WordEntryIN *) a)->entry.len);
+   }
+   return ( ((WordEntryIN *) a)->entry.len > ((WordEntryIN *) b)->entry.len ) ? 1 : -1;
+}
+
+static int
+uniqueentry(WordEntryIN * a, int4 l, char *buf, int4 *outbuflen)
+{
+   WordEntryIN  *ptr,
+              *res;
+
+   res = a;
+   if (l == 1) {
+       if ( a->entry.haspos ) {
+           *(uint16*)(a->pos) = uniquePos( &(a->pos[1]), *(uint16*)(a->pos));
+           *outbuflen = SHORTALIGN(res->entry.len) + (*(uint16*)(a->pos) +1 )*sizeof(WordEntryPos);
+       }
+       return l;
+   }
+
+   ptr = a + 1;
+   BufferStr = buf;
+   qsort((void *) a, l, sizeof(WordEntryIN), compareentry);
+
+   while (ptr - a < l)
+   {
+       if (!(ptr->entry.len == res->entry.len &&
+             strncmp(&buf[ptr->entry.pos], &buf[res->entry.pos], res->entry.len) == 0))
+       {
+           if ( res->entry.haspos ) {
+               *(uint16*)(res->pos) = uniquePos( &(res->pos[1]), *(uint16*)(res->pos));
+               *outbuflen += *(uint16*)(res->pos) * sizeof(WordEntryPos);
+           }
+           *outbuflen += SHORTALIGN(res->entry.len);
+           res++;
+           memcpy(res,ptr,sizeof(WordEntryIN));
+       } else if ( ptr->entry.haspos ){
+           if ( res->entry.haspos ) {
+               int4 len=*(uint16*)(ptr->pos) + 1 + *(uint16*)(res->pos);
+               res->pos=(WordEntryPos*)repalloc( res->pos, len*sizeof(WordEntryPos));
+               memcpy( &(res->pos[ *(uint16*)(res->pos) + 1 ]), 
+                   &(ptr->pos[1]), *(uint16*)(ptr->pos) * sizeof(WordEntryPos));
+               *(uint16*)(res->pos) += *(uint16*)(ptr->pos);
+               pfree( ptr->pos );
+           } else {
+               res->entry.haspos=1;
+               res->pos = ptr->pos;
+           }
+       }
+       ptr++;
+   }
+   if ( res->entry.haspos ) {
+       *(uint16*)(res->pos) = uniquePos( &(res->pos[1]), *(uint16*)(res->pos));
+       *outbuflen += *(uint16*)(res->pos) * sizeof(WordEntryPos);
+   }
+   *outbuflen += SHORTALIGN(res->entry.len);
+
+   return res + 1 - a;
+}
+
+#define WAITWORD   1
+#define WAITENDWORD 2
+#define WAITNEXTCHAR   3
+#define WAITENDCMPLX   4
+#define WAITPOSINFO    5
+#define INPOSINFO  6
+#define WAITPOSDELIM   7
+
+#define RESIZEPRSBUF \
+do { \
+   if ( state->curpos - state->word + 1 >= state->len ) \
+   { \
+       int4 clen = state->curpos - state->word; \
+       state->len *= 2; \
+       state->word = (char*)repalloc( (void*)state->word, state->len ); \
+       state->curpos = state->word + clen; \
+   } \
+} while (0)
+
+int4
+gettoken_tsvector(TI_IN_STATE * state)
+{
+   int4        oldstate = 0;
+
+   state->curpos = state->word;
+   state->state = WAITWORD;
+   state->alen=0;
+
+   while (1)
+   {
+       if (state->state == WAITWORD)
+       {
+           if (*(state->prsbuf) == '\0')
+               return 0;
+           else if (*(state->prsbuf) == '\'')
+               state->state = WAITENDCMPLX;
+           else if (*(state->prsbuf) == '\\')
+           {
+               state->state = WAITNEXTCHAR;
+               oldstate = WAITENDWORD;
+           }
+           else if (state->oprisdelim && ISOPERATOR(*(state->prsbuf)))
+               elog(ERROR, "Syntax error");
+           else if (*(state->prsbuf) != ' ')
+           {
+               *(state->curpos) = *(state->prsbuf);
+               state->curpos++;
+               state->state = WAITENDWORD;
+           }
+       }
+       else if (state->state == WAITNEXTCHAR)
+       {
+           if (*(state->prsbuf) == '\0')
+               elog(ERROR, "There is no escaped character");
+           else
+           {
+               RESIZEPRSBUF;
+               *(state->curpos) = *(state->prsbuf);
+               state->curpos++;
+               state->state = oldstate;
+           }
+       }
+       else if (state->state == WAITENDWORD)
+       {
+           if (*(state->prsbuf) == '\\')
+           {
+               state->state = WAITNEXTCHAR;
+               oldstate = WAITENDWORD;
+           }
+           else if (*(state->prsbuf) == ' ' || *(state->prsbuf) == '\0' ||
+                    (state->oprisdelim && ISOPERATOR(*(state->prsbuf))))
+           {
+               RESIZEPRSBUF;
+               if (state->curpos == state->word)
+                   elog(ERROR, "Syntax error");
+               *(state->curpos) = '\0';
+               return 1; 
+           } else if ( *(state->prsbuf) == ':' ) {
+               if (state->curpos == state->word)
+                   elog(ERROR, "Syntax error");
+               *(state->curpos) = '\0';
+               if ( state->oprisdelim )
+                   return 1;
+               else
+                   state->state = INPOSINFO;
+           }
+           else
+           {
+               RESIZEPRSBUF;
+               *(state->curpos) = *(state->prsbuf);
+               state->curpos++;
+           }
+       }
+       else if (state->state == WAITENDCMPLX)
+       {
+           if (*(state->prsbuf) == '\'')
+           {
+               RESIZEPRSBUF;
+               *(state->curpos) = '\0';
+               if (state->curpos == state->word)
+                   elog(ERROR, "Syntax error");
+               if ( state->oprisdelim ) {
+                   state->prsbuf++;
+                   return 1;
+               } else
+                   state->state = WAITPOSINFO;
+           }
+           else if (*(state->prsbuf) == '\\')
+           {
+               state->state = WAITNEXTCHAR;
+               oldstate = WAITENDCMPLX;
+           }
+           else if (*(state->prsbuf) == '\0')
+               elog(ERROR, "Syntax error");
+           else
+           {
+               RESIZEPRSBUF;
+               *(state->curpos) = *(state->prsbuf);
+               state->curpos++;
+           }
+       } else if (state->state == WAITPOSINFO) {
+           if ( *(state->prsbuf) == ':' )
+               state->state=INPOSINFO;
+           else
+               return 1;
+       } else if (state->state == INPOSINFO) {
+           if ( isdigit(*(state->prsbuf)) ) {
+               if ( state->alen==0 ) {
+                   state->alen=4;
+                   state->pos = (WordEntryPos*)palloc( sizeof(WordEntryPos)*state->alen );
+                   *(uint16*)(state->pos)=0;
+               } else if ( *(uint16*)(state->pos) +1 >= state->alen ) {
+                   state->alen *= 2; 
+                   state->pos = (WordEntryPos*)repalloc( state->pos, sizeof(WordEntryPos)*state->alen );
+               }
+               (  *(uint16*)(state->pos) )++;
+               state->pos[ *(uint16*)(state->pos) ].pos = LIMITPOS(atoi(state->prsbuf));
+               if ( state->pos[ *(uint16*)(state->pos) ].pos == 0 )
+                   elog(ERROR,"Wrong position info");
+               state->pos[ *(uint16*)(state->pos) ].weight = 0;
+               state->state = WAITPOSDELIM;
+           } else
+               elog(ERROR,"Syntax error");
+       } else if (state->state == WAITPOSDELIM) {
+           if ( *(state->prsbuf) == ',' ) {
+               state->state = INPOSINFO;
+           } else if ( tolower(*(state->prsbuf)) == 'a' || *(state->prsbuf)=='*' ) {
+               if ( state->pos[ *(uint16*)(state->pos) ].weight )
+                   elog(ERROR,"Syntax error");
+               state->pos[ *(uint16*)(state->pos) ].weight = 3;
+           } else if ( tolower(*(state->prsbuf)) == 'b' ) {
+               if ( state->pos[ *(uint16*)(state->pos) ].weight )
+                   elog(ERROR,"Syntax error");
+               state->pos[ *(uint16*)(state->pos) ].weight = 2;
+           } else if ( tolower(*(state->prsbuf)) == 'c' ) {
+               if ( state->pos[ *(uint16*)(state->pos) ].weight )
+                   elog(ERROR,"Syntax error");
+               state->pos[ *(uint16*)(state->pos) ].weight = 1;
+           } else if ( tolower(*(state->prsbuf)) == 'd' ) {
+               if ( state->pos[ *(uint16*)(state->pos) ].weight )
+                   elog(ERROR,"Syntax error");
+               state->pos[ *(uint16*)(state->pos) ].weight = 0;
+           } else if ( isspace(*(state->prsbuf)) || *(state->prsbuf) == '\0' ) {
+               return 1;
+           } else if ( !isdigit(*(state->prsbuf)) )
+               elog(ERROR,"Syntax error");
+       } else
+           elog(ERROR, "Inner bug :(");
+       state->prsbuf++;
+   }
+
+   return 0;
+}
+
+Datum
+tsvector_in(PG_FUNCTION_ARGS)
+{
+   char       *buf = PG_GETARG_CSTRING(0);
+   TI_IN_STATE state;
+   WordEntryIN  *arr;
+   WordEntry  *inarr;
+   int4        len = 0,
+               totallen = 64;
+   tsvector       *in;
+   char       *tmpbuf,
+              *cur;
+   int4        i,
+               buflen = 256;
+
+   state.prsbuf = buf;
+   state.len = 32;
+   state.word = (char *) palloc(state.len);
+   state.oprisdelim = false;
+
+   arr = (WordEntryIN *) palloc(sizeof(WordEntryIN) * totallen);
+   cur = tmpbuf = (char *) palloc(buflen);
+   while (gettoken_tsvector(&state))
+   {
+       if (len >= totallen)
+       {
+           totallen *= 2;
+           arr = (WordEntryIN *) repalloc((void *) arr, sizeof(WordEntryIN) * totallen);
+       }
+       while ((cur - tmpbuf) + (state.curpos - state.word) >= buflen)
+       {
+           int4        dist = cur - tmpbuf;
+
+           buflen *= 2;
+           tmpbuf = (char *) repalloc((void *) tmpbuf, buflen);
+           cur = tmpbuf + dist;
+       }
+       if (state.curpos - state.word >= MAXSTRLEN)
+           elog(ERROR, "Word is too long");
+       arr[len].entry.len= state.curpos - state.word;
+       if (cur - tmpbuf > MAXSTRPOS)
+           elog(ERROR, "Too long value");
+       arr[len].entry.pos=cur - tmpbuf;
+       memcpy((void *) cur, (void *) state.word, arr[len].entry.len);
+       cur += arr[len].entry.len;
+       if ( state.alen ) {
+           arr[len].entry.haspos=1;
+           arr[len].pos = state.pos;
+       } else
+           arr[len].entry.haspos=0;
+       len++;
+   }
+   pfree(state.word);
+
+   if ( len > 0 )
+       len = uniqueentry(arr, len, tmpbuf, &buflen);
+   totallen = CALCDATASIZE(len, buflen);
+   in = (tsvector *) palloc(totallen);
+   memset(in,0,totallen);
+   in->len = totallen;
+   in->size = len;
+   cur = STRPTR(in);
+   inarr = ARRPTR(in);
+   for (i = 0; i < len; i++)
+   {
+       memcpy((void *) cur, (void *) &tmpbuf[arr[i].entry.pos], arr[i].entry.len);
+       arr[i].entry.pos=cur - STRPTR(in);
+       cur += SHORTALIGN(arr[i].entry.len);
+       if ( arr[i].entry.haspos ) {
+           memcpy( cur, arr[i].pos, (*(uint16*)arr[i].pos + 1) * sizeof(WordEntryPos));
+           cur +=  (*(uint16*)arr[i].pos + 1) * sizeof(WordEntryPos);
+           pfree( arr[i].pos ); 
+       }
+       memcpy( &(inarr[i]), &(arr[i].entry), sizeof(WordEntry) );
+   }
+   pfree(tmpbuf);
+   pfree(arr);
+   PG_RETURN_POINTER(in);
+}
+
+Datum
+tsvector_length(PG_FUNCTION_ARGS)
+{
+   tsvector       *in = (tsvector *) PG_DETOAST_DATUM(PG_GETARG_DATUM(0));
+   int4        ret = in->size;
+
+   PG_FREE_IF_COPY(in, 0);
+   PG_RETURN_INT32(ret);
+}
+
+Datum
+tsvector_out(PG_FUNCTION_ARGS)
+{
+   tsvector       *out = (tsvector *) PG_DETOAST_DATUM(PG_GETARG_DATUM(0));
+   char       *outbuf;
+   int4        i,
+               j,
+               lenbuf = 0, pp;
+   WordEntry  *ptr = ARRPTR(out);
+   char       *curin,
+              *curout;
+
+       lenbuf=out->size * 2 /* '' */ + out->size - 1 /* space */ + 2 /*\0*/;
+       for (i = 0; i < out->size; i++) {
+               lenbuf += ptr[i].len*2 /*for escape */;
+               if ( ptr[i].haspos )
+                       lenbuf += 7*POSDATALEN(out, &(ptr[i]));
+       }
+
+   curout = outbuf = (char *) palloc(lenbuf);
+   for (i = 0; i < out->size; i++)
+   {
+       curin = STRPTR(out)+ptr->pos;
+       if (i != 0)
+           *curout++ = ' ';
+       *curout++ = '\'';
+       j = ptr->len;
+       while (j--)
+       {
+           if (*curin == '\'')
+           {
+               int4        pos = curout - outbuf;
+
+               outbuf = (char *) repalloc((void *) outbuf, ++lenbuf);
+               curout = outbuf + pos;
+               *curout++ = '\\';
+           }
+           *curout++ = *curin++;
+       }
+       *curout++ = '\'';
+       if ( (pp=POSDATALEN(out,ptr)) != 0 ) {
+           WordEntryPos *wptr;
+           *curout++ = ':';
+           wptr=POSDATAPTR(out,ptr);
+           while(pp) {
+               sprintf(curout,"%d",wptr->pos);
+               curout=strchr(curout,'\0');
+               switch( wptr->weight ) {
+                   case 3:   *curout++ = 'A'; break;
+                   case 2:   *curout++ = 'B'; break;
+                   case 1:   *curout++ = 'C'; break;
+                   case 0: 
+                   default: break;
+               }
+               if ( pp>1 )     *curout++ = ',';
+               pp--; wptr++;
+           }
+       }
+       ptr++;
+   }
+   *curout='\0';
+   outbuf[lenbuf - 1] = '\0';
+   PG_FREE_IF_COPY(out, 0);
+   PG_RETURN_POINTER(outbuf);
+}
+
+static int
+compareWORD(const void *a, const void *b)
+{
+   if (((WORD *) a)->len == ((WORD *) b)->len) {
+       int res = strncmp(
+                      ((WORD *) a)->word,
+                      ((WORD *) b)->word,
+                      ((WORD *) b)->len);
+       if ( res==0 ) 
+           return ( ((WORD *) a)->pos.pos > ((WORD *) b)->pos.pos ) ? 1 : -1;
+       return res;
+   }
+   return (((WORD *) a)->len > ((WORD *) b)->len) ? 1 : -1;
+}
+
+static int
+uniqueWORD(WORD * a, int4 l)
+{
+   WORD       *ptr,
+              *res;
+   int tmppos;
+
+   if (l == 1) {
+       tmppos=LIMITPOS(a->pos.pos);
+       a->alen=2;
+       a->pos.apos=(uint16*)palloc( sizeof(uint16)*a->alen );
+       a->pos.apos[0]=1;
+       a->pos.apos[1]=tmppos;
+       return l;
+   }
+
+   res = a;
+   ptr = a + 1;
+
+   qsort((void *) a, l, sizeof(WORD), compareWORD);
+   tmppos=LIMITPOS(a->pos.pos);
+   a->alen=2;
+   a->pos.apos=(uint16*)palloc( sizeof(uint16)*a->alen );
+   a->pos.apos[0]=1;
+   a->pos.apos[1]=tmppos;
+
+   while (ptr - a < l)
+   {
+       if (!(ptr->len == res->len &&
+             strncmp(ptr->word, res->word, res->len) == 0))
+       {
+           res++;
+           res->len = ptr->len;
+           res->word = ptr->word;
+           tmppos=LIMITPOS(ptr->pos.pos);
+           res->alen=2;
+           res->pos.apos=(uint16*)palloc( sizeof(uint16)*res->alen );
+           res->pos.apos[0]=1;
+           res->pos.apos[1]=tmppos;
+       } else {
+           pfree(ptr->word);
+           if ( res->pos.apos[0] < MAXNUMPOS-1 && res->pos.apos[ res->pos.apos[0] ] != MAXENTRYPOS-1 ) {
+               if ( res->pos.apos[0]+1 >= res->alen ) {
+                   res->alen*=2;
+                   res->pos.apos=(uint16*)repalloc( res->pos.apos, sizeof(uint16)*res->alen );
+               }
+               res->pos.apos[ res->pos.apos[0]+1 ] = LIMITPOS(ptr->pos.pos);
+               res->pos.apos[0]++; 
+           }
+       }
+       ptr++;
+   }
+
+   return res + 1 - a;
+}
+
+/*
+ * make value of tsvector
+ */
+static tsvector *
+makevalue(PRSTEXT * prs)
+{
+   int4        i,j,
+               lenstr = 0,
+               totallen;
+   tsvector       *in;
+   WordEntry  *ptr;
+   char       *str,
+              *cur;
+
+   prs->curwords = uniqueWORD(prs->words, prs->curwords);
+   for (i = 0; i < prs->curwords; i++) {
+       lenstr += SHORTALIGN(prs->words[i].len);
+
+       if ( prs->words[i].alen )
+           lenstr += sizeof(uint16) + prs->words[i].pos.apos[0] * sizeof(WordEntryPos);
+   }
+
+   totallen = CALCDATASIZE(prs->curwords, lenstr);
+   in = (tsvector *) palloc(totallen);
+   memset(in,0,totallen);  
+   in->len = totallen;
+   in->size = prs->curwords;
+
+   ptr = ARRPTR(in);
+   cur = str = STRPTR(in);
+   for (i = 0; i < prs->curwords; i++)
+   {
+       ptr->len = prs->words[i].len;
+       if (cur - str > MAXSTRPOS)
+           elog(ERROR, "Value is too big");
+       ptr->pos= cur - str;
+       memcpy((void *) cur, (void *) prs->words[i].word, prs->words[i].len);
+       pfree(prs->words[i].word);
+       cur += SHORTALIGN(prs->words[i].len);
+       if ( prs->words[i].alen ) {
+           WordEntryPos *wptr;
+           
+           ptr->haspos=1;
+           *(uint16*)cur = prs->words[i].pos.apos[0];
+           wptr=POSDATAPTR(in,ptr);
+           for(j=0;j<*(uint16*)cur;j++) {
+               wptr[j].weight=0;
+               wptr[j].pos=prs->words[i].pos.apos[j+1];
+           }
+           cur += sizeof(uint16) + prs->words[i].pos.apos[0] * sizeof(WordEntryPos);
+           pfree(prs->words[i].pos.apos);
+       } else
+           ptr->haspos=0;
+       ptr++;
+   }
+   pfree(prs->words);
+   return in;
+}
+
+
+Datum
+to_tsvector(PG_FUNCTION_ARGS)
+{
+   text       *in = PG_GETARG_TEXT_P(1);
+   PRSTEXT     prs;
+   tsvector       *out = NULL;
+   TSCfgInfo *cfg=findcfg(PG_GETARG_INT32(0)); 
+
+   prs.lenwords = 32;
+   prs.curwords = 0;
+   prs.pos = 0;
+   prs.words = (WORD *) palloc(sizeof(WORD) * prs.lenwords);
+   
+   parsetext_v2(cfg, &prs, VARDATA(in), VARSIZE(in) - VARHDRSZ);
+   PG_FREE_IF_COPY(in, 1);
+
+   if (prs.curwords)
+       out = makevalue(&prs);
+   else {
+       pfree(prs.words);
+       out = palloc(CALCDATASIZE(0,0));
+       out->len = CALCDATASIZE(0,0);
+       out->size = 0;
+   } 
+   PG_RETURN_POINTER(out);
+}
+
+Datum
+to_tsvector_name(PG_FUNCTION_ARGS) {
+   text       *cfg=PG_GETARG_TEXT_P(0);
+   Datum res = DirectFunctionCall3(
+       to_tsvector,
+       Int32GetDatum( name2id_cfg( cfg ) ),
+       PG_GETARG_DATUM(1),
+       (Datum)0
+   );
+   PG_FREE_IF_COPY(cfg,0);
+   PG_RETURN_DATUM(res);   
+}
+
+Datum
+to_tsvector_current(PG_FUNCTION_ARGS) {
+   Datum res = DirectFunctionCall3(
+       to_tsvector,
+       Int32GetDatum( get_currcfg() ),
+       PG_GETARG_DATUM(0),
+       (Datum)0
+   );
+   PG_RETURN_DATUM(res);   
+}
+
+static Oid
+findFunc(char *fname) {
+   FuncCandidateList clist,ptr;
+   Oid funcid = InvalidOid;
+   List *names=makeList1(makeString(fname));
+
+   ptr = clist = FuncnameGetCandidates(names, 1);
+   freeList(names);
+
+   if ( !ptr )
+       return funcid;
+
+   while(ptr) {
+       if ( ptr->args[0] == TEXTOID && funcid == InvalidOid )
+           funcid=ptr->oid;
+       clist=ptr->next;
+       pfree(ptr);
+       ptr=clist;
+   }
+
+   return funcid;
+}
+
+/*
+ * Trigger
+ */
+Datum
+tsearch2(PG_FUNCTION_ARGS)
+{
+   TriggerData *trigdata;
+   Trigger    *trigger;
+   Relation    rel;
+   HeapTuple   rettuple = NULL;
+   TSCfgInfo *cfg=findcfg(get_currcfg()); 
+   int         numidxattr,
+               i;
+   PRSTEXT     prs;
+   Datum       datum = (Datum) 0;
+   Oid     funcoid = InvalidOid;
+
+   if (!CALLED_AS_TRIGGER(fcinfo))
+       elog(ERROR, "TSearch: Not fired by trigger manager");
+
+   trigdata = (TriggerData *) fcinfo->context;
+   if (TRIGGER_FIRED_FOR_STATEMENT(trigdata->tg_event))
+       elog(ERROR, "TSearch: Can't process STATEMENT events");
+   if (TRIGGER_FIRED_AFTER(trigdata->tg_event))
+       elog(ERROR, "TSearch: Must be fired BEFORE event");
+
+   if (TRIGGER_FIRED_BY_INSERT(trigdata->tg_event))
+       rettuple = trigdata->tg_trigtuple;
+   else if (TRIGGER_FIRED_BY_UPDATE(trigdata->tg_event))
+       rettuple = trigdata->tg_newtuple;
+   else
+       elog(ERROR, "TSearch: Unknown event");
+
+   trigger = trigdata->tg_trigger;
+   rel = trigdata->tg_relation;
+
+   if (trigger->tgnargs < 2)
+       elog(ERROR, "TSearch: format tsearch2(tsvector_field, text_field1,...)");
+
+   numidxattr = SPI_fnumber(rel->rd_att, trigger->tgargs[0]);
+   if (numidxattr == SPI_ERROR_NOATTRIBUTE)
+       elog(ERROR, "TSearch: Can not find tsvector_field");
+
+   prs.lenwords = 32;
+   prs.curwords = 0;
+   prs.pos = 0;
+   prs.words = (WORD *) palloc(sizeof(WORD) * prs.lenwords);
+
+   /* find all words in indexable column */
+   for (i = 1; i < trigger->tgnargs; i++)
+   {
+       int         numattr;
+       Oid         oidtype;
+       Datum       txt_toasted;
+       bool        isnull;
+       text       *txt;
+
+       numattr = SPI_fnumber(rel->rd_att, trigger->tgargs[i]);
+       if (numattr == SPI_ERROR_NOATTRIBUTE)
+       {
+           funcoid=findFunc(trigger->tgargs[i]);
+           if ( funcoid==InvalidOid )
+               elog(ERROR,"TSearch: can't find function or field '%s'",trigger->tgargs[i]);
+           continue;
+       }
+       oidtype = SPI_gettypeid(rel->rd_att, numattr);
+       /* We assume char() and varchar() are binary-equivalent to text */
+       if (!(oidtype == TEXTOID ||
+             oidtype == VARCHAROID ||
+             oidtype == BPCHAROID))
+       {
+           elog(WARNING, "TSearch: '%s' is not of character type",
+                trigger->tgargs[i]);
+           continue;
+       }
+       txt_toasted = SPI_getbinval(rettuple, rel->rd_att, numattr, &isnull);
+       if (isnull)
+           continue;
+
+       if ( funcoid!=InvalidOid ) {
+           text *txttmp = (text *) DatumGetPointer( OidFunctionCall1(
+               funcoid,
+               PointerGetDatum(txt_toasted)
+           ));
+           txt = (text *) DatumGetPointer(PG_DETOAST_DATUM(PointerGetDatum(txttmp)));
+           if ( txt == txttmp )
+               txt_toasted = PointerGetDatum(txt);
+       } else
+            txt = (text *) DatumGetPointer(PG_DETOAST_DATUM(PointerGetDatum(txt_toasted)));
+
+       parsetext_v2(cfg, &prs, VARDATA(txt), VARSIZE(txt) - VARHDRSZ);
+       if (txt != (text*)DatumGetPointer(txt_toasted) )
+           pfree(txt);
+   }
+
+   /* make tsvector value */
+   if (prs.curwords)
+   {
+       datum = PointerGetDatum(makevalue(&prs));
+       rettuple = SPI_modifytuple(rel, rettuple, 1, &numidxattr,
+                                  &datum, NULL);
+       pfree(DatumGetPointer(datum));
+   }
+   else
+   {
+       tsvector *out = palloc(CALCDATASIZE(0,0));
+       out->len = CALCDATASIZE(0,0);
+       out->size = 0;
+       datum = PointerGetDatum(out);
+       pfree(prs.words);
+       rettuple = SPI_modifytuple(rel, rettuple, 1, &numidxattr,
+                                  &datum, NULL);
+   }
+
+   if (rettuple == NULL)
+       elog(ERROR, "TSearch: %d returned by SPI_modifytuple", SPI_result);
+
+   return PointerGetDatum(rettuple);
+}


diff --git a/contrib/tsearch2/tsvector.h b/contrib/tsearch2/tsvector.h

new file mode 100644 (file)

index 0000000..31e6a4b


--- /dev/null
+++ b/contrib/tsearch2/tsvector.h
@@ -0,0 +1,71 @@
+#ifndef __TXTIDX_H__
+#define __TXTIDX_H__
+
+/*
+#define TXTIDX_DEBUG
+*/
+
+#include "postgres.h"
+
+#include "access/gist.h"
+#include "access/itup.h"
+#include "utils/elog.h"
+#include "utils/palloc.h"
+#include "utils/builtins.h"
+#include "storage/bufpage.h"
+
+typedef struct {
+   uint32
+       haspos:1,
+       len:11, /* MAX 2Kb */
+       pos:20; /* MAX 1Mb */
+}  WordEntry;
+#define MAXSTRLEN ( 1<<11 )
+#define MAXSTRPOS ( 1<<20 )
+
+typedef struct {
+   uint16
+       weight:2,
+       pos:14;
+} WordEntryPos;
+#define MAXENTRYPOS    (1<<14)
+#define MAXNUMPOS  256
+#define LIMITPOS(x)    ( ( (x) >= MAXENTRYPOS ) ? (MAXENTRYPOS-1) : (x) )
+
+typedef struct
+{
+   int4        len;
+   int4        size;
+   char        data[1];
+}  tsvector;
+
+#define DATAHDRSIZE (sizeof(int4)*2)
+#define CALCDATASIZE(x, lenstr) ( x * sizeof(WordEntry) + DATAHDRSIZE + lenstr )
+#define ARRPTR(x)  ( (WordEntry*) ( (char*)x + DATAHDRSIZE ) )
+#define STRPTR(x)  ( (char*)x + DATAHDRSIZE + ( sizeof(WordEntry) * ((tsvector*)x)->size ) )
+#define STRSIZE(x) ( ((tsvector*)x)->len - DATAHDRSIZE - ( sizeof(WordEntry) * ((tsvector*)x)->size ) )
+#define _POSDATAPTR(x,e)   (STRPTR(x)+((WordEntry*)(e))->pos+SHORTALIGN(((WordEntry*)(e))->len))
+#define POSDATALEN(x,e) ( ( ((WordEntry*)(e))->haspos ) ? (*(uint16*)_POSDATAPTR(x,e)) : 0 ) 
+#define POSDATAPTR(x,e)    ( (WordEntryPos*)( _POSDATAPTR(x,e)+sizeof(uint16) ) )
+
+
+typedef struct {
+   WordEntry   entry;
+   WordEntryPos    *pos;
+}  WordEntryIN;
+
+typedef struct
+{
+   char       *prsbuf;
+   char       *word;
+   char       *curpos;
+   int4        len;
+   int4        state;
+   int4        alen;
+   WordEntryPos    *pos;
+   bool        oprisdelim;
+}  TI_IN_STATE;
+
+int4       gettoken_tsvector(TI_IN_STATE * state);
+
+#endif


diff --git a/contrib/tsearch2/tsvector_op.c b/contrib/tsearch2/tsvector_op.c

new file mode 100644 (file)

index 0000000..3f38014


--- /dev/null
+++ b/contrib/tsearch2/tsvector_op.c
@@ -0,0 +1,264 @@
+/*
+ * Operations for tsvector type
+ * Teodor Sigaev 
+ */
+#include "postgres.h"
+
+#include "access/gist.h"
+#include "access/itup.h"
+#include "utils/elog.h"
+#include "utils/palloc.h"
+#include "utils/builtins.h"
+#include "storage/bufpage.h"
+#include "executor/spi.h"
+#include "commands/trigger.h"
+#include "nodes/pg_list.h"
+#include "catalog/namespace.h"
+
+#include "utils/pg_locale.h"
+
+#include              /* tolower */
+#include "tsvector.h"
+#include "query.h"
+#include "ts_cfg.h"
+#include "common.h"
+
+PG_FUNCTION_INFO_V1(strip);
+Datum      strip(PG_FUNCTION_ARGS);
+
+PG_FUNCTION_INFO_V1(setweight);
+Datum      setweight(PG_FUNCTION_ARGS);
+
+PG_FUNCTION_INFO_V1(concat);
+Datum      concat(PG_FUNCTION_ARGS);
+
+Datum
+strip(PG_FUNCTION_ARGS)
+{
+   tsvector       *in = (tsvector *) PG_DETOAST_DATUM(PG_GETARG_DATUM(0));
+   tsvector    *out;
+   int i,len=0;
+   WordEntry *arrin=ARRPTR(in), *arrout;
+   char *cur;
+
+   for(i=0;isize;i++) 
+       len += SHORTALIGN( arrin[i].len );
+
+   len = CALCDATASIZE(in->size, len);
+   out=(tsvector*)palloc(len);
+   memset(out,0,len);
+   out->len=len;
+   out->size=in->size;
+   arrout=ARRPTR(out);
+   cur=STRPTR(out);
+   for(i=0;isize;i++) {
+       memcpy(cur, STRPTR(in)+arrin[i].pos, arrin[i].len);
+       arrout[i].haspos = 0;
+       arrout[i].len = arrin[i].len;
+       arrout[i].pos = cur - STRPTR(out);
+       cur += SHORTALIGN( arrout[i].len );
+   }
+       
+   PG_FREE_IF_COPY(in, 0);
+   PG_RETURN_POINTER(out);
+}
+
+Datum
+setweight(PG_FUNCTION_ARGS)
+{
+   tsvector       *in = (tsvector *) PG_DETOAST_DATUM(PG_GETARG_DATUM(0));
+   char       cw = PG_GETARG_CHAR(1);
+   tsvector    *out;
+   int i,j;
+   WordEntry *entry;
+   WordEntryPos *p;
+   int w=0;
+
+   switch(tolower(cw)) {
+       case 'a': w=3; break;
+       case 'b': w=2; break;
+       case 'c': w=1; break;
+       case 'd': w=0; break;
+       default: elog(ERROR,"Unknown weight");
+   }
+
+   out=(tsvector*)palloc(in->len);
+   memcpy(out,in,in->len);
+   entry=ARRPTR(out);
+   i=out->size;    
+   while(i--) {
+       if ( (j=POSDATALEN(out,entry)) != 0 ) {
+           p=POSDATAPTR(out,entry);
+           while(j--) {
+               p->weight=w;
+               p++;
+           }
+       }
+       entry++;
+   }
+       
+   PG_FREE_IF_COPY(in, 0);
+   PG_RETURN_POINTER(out);
+}
+
+static int
+compareEntry(char *ptra, WordEntry* a, char *ptrb, WordEntry* b)
+{
+        if ( a->len == b->len)
+        {
+                return strncmp(
+                                           ptra + a->pos,
+                                           ptrb + b->pos,
+                                           a->len);
+        }
+        return ( a->len > b->len ) ? 1 : -1;
+}
+
+static int4
+add_pos(tsvector *src, WordEntry *srcptr, tsvector *dest, WordEntry *destptr, int4 maxpos ) {
+   uint16 *clen = (uint16*)_POSDATAPTR(dest,destptr);
+   int i;
+   uint16 slen = POSDATALEN(src, srcptr), startlen;
+   WordEntryPos *spos=POSDATAPTR(src, srcptr), *dpos=POSDATAPTR(dest,destptr);
+
+   if ( ! destptr->haspos ) 
+       *clen=0;
+
+   startlen = *clen;
+   for(i=0; i
+       dpos[ *clen ].weight = spos[i].weight; 
+       dpos[ *clen ].pos    = LIMITPOS(spos[i].pos + maxpos);
+       (*clen)++;
+   }
+
+   if ( *clen != startlen )
+       destptr->haspos=1; 
+   return  *clen - startlen;
+}
+
+
+Datum
+concat(PG_FUNCTION_ARGS) {
+   tsvector       *in1 = (tsvector *) PG_DETOAST_DATUM(PG_GETARG_DATUM(0));
+   tsvector       *in2 = (tsvector *) PG_DETOAST_DATUM(PG_GETARG_DATUM(1));
+   tsvector       *out;
+   WordEntry *ptr;
+   WordEntry *ptr1,*ptr2;
+   WordEntryPos *p;
+   int maxpos=0,i,j,i1,i2;
+   char *cur;
+   char *data,*data1,*data2;
+
+   ptr=ARRPTR(in1);
+   i=in1->size;
+   while(i--) {
+       if ( (j=POSDATALEN(in1,ptr)) != 0 ) {
+           p=POSDATAPTR(in1,ptr);
+           while(j--) {
+               if ( p->pos > maxpos ) 
+                   maxpos = p->pos;
+               p++;
+           }
+       }
+       ptr++;
+   }
+   
+   ptr1=ARRPTR(in1); ptr2=ARRPTR(in2);
+   data1=STRPTR(in1); data2=STRPTR(in2);
+   i1=in1->size;   i2=in2->size;
+   out=(tsvector*)palloc( in1->len + in2->len );
+   memset(out,0,in1->len + in2->len);
+   out->len = in1->len + in2->len;
+   out->size = in1->size + in2->size;
+   data=cur=STRPTR(out);
+   ptr=ARRPTR(out);
+   while( i1 && i2 ) {
+       int cmp=compareEntry(data1,ptr1,data2,ptr2);
+       if ( cmp < 0 ) { /* in1 first */
+           ptr->haspos = ptr1->haspos;
+           ptr->len = ptr1->len;
+           memcpy( cur, data1 + ptr1->pos, ptr1->len );
+           ptr->pos = cur - data; 
+           cur+=SHORTALIGN(ptr1->len);
+           if ( ptr->haspos ) {
+               memcpy(cur, _POSDATAPTR(in1, ptr1), POSDATALEN(in1, ptr1)*sizeof(WordEntryPos) + sizeof(uint16));
+               cur+=POSDATALEN(in1, ptr1)*sizeof(WordEntryPos) + sizeof(uint16);
+           }
+           ptr++; ptr1++; i1--;
+       } else if ( cmp>0 ) { /* in2 first */ 
+           ptr->haspos = ptr2->haspos;
+           ptr->len = ptr2->len;
+           memcpy( cur, data2 + ptr2->pos, ptr2->len );
+           ptr->pos = cur - data; 
+           cur+=SHORTALIGN(ptr2->len);
+           if ( ptr->haspos ) {
+               int addlen = add_pos(in2, ptr2, out, ptr, maxpos );
+               if ( addlen == 0 )
+                   ptr->haspos=0;
+               else
+                   cur += addlen*sizeof(WordEntryPos) + sizeof(uint16); 
+           }
+           ptr++; ptr2++; i2--;
+       } else {
+           ptr->haspos = ptr1->haspos | ptr2->haspos;
+           ptr->len = ptr1->len;
+           memcpy( cur, data1 + ptr1->pos, ptr1->len );
+           ptr->pos = cur - data; 
+           cur+=SHORTALIGN(ptr1->len);
+           if ( ptr->haspos ) {
+               if ( ptr1->haspos ) {
+                   memcpy(cur, _POSDATAPTR(in1, ptr1), POSDATALEN(in1, ptr1)*sizeof(WordEntryPos) + sizeof(uint16));
+                   cur+=POSDATALEN(in1, ptr1)*sizeof(WordEntryPos) + sizeof(uint16);
+                   if ( ptr2->haspos )
+                       cur += add_pos(in2, ptr2, out, ptr, maxpos )*sizeof(WordEntryPos);
+               } else if ( ptr2->haspos ) {
+                   int addlen = add_pos(in2, ptr2, out, ptr, maxpos );
+                   if ( addlen == 0 )
+                       ptr->haspos=0;
+                   else
+                       cur += addlen*sizeof(WordEntryPos) + sizeof(uint16); 
+               }
+           }
+           ptr++; ptr1++; ptr2++; i1--; i2--;
+       }
+   }
+
+   while(i1) {
+       ptr->haspos = ptr1->haspos;
+       ptr->len = ptr1->len;
+       memcpy( cur, data1 + ptr1->pos, ptr1->len );
+       ptr->pos = cur - data; 
+       cur+=SHORTALIGN(ptr1->len);
+       if ( ptr->haspos ) {
+           memcpy(cur, _POSDATAPTR(in1, ptr1), POSDATALEN(in1, ptr1)*sizeof(WordEntryPos) + sizeof(uint16));
+           cur+=POSDATALEN(in1, ptr1)*sizeof(WordEntryPos) + sizeof(uint16);
+       }
+       ptr++; ptr1++; i1--;
+   }
+
+   while(i2) {
+       ptr->haspos = ptr2->haspos;
+       ptr->len = ptr2->len;
+       memcpy( cur, data2 + ptr2->pos, ptr2->len );
+       ptr->pos = cur - data; 
+       cur+=SHORTALIGN(ptr2->len);
+       if ( ptr->haspos ) {
+           int addlen = add_pos(in2, ptr2, out, ptr, maxpos );
+           if ( addlen == 0 )
+               ptr->haspos=0;
+           else
+               cur += addlen*sizeof(WordEntryPos) + sizeof(uint16); 
+       }
+       ptr++; ptr2++; i2--;
+   }
+   
+   out->size=ptr-ARRPTR(out);
+   out->len = CALCDATASIZE( out->size, cur-data );
+   if ( data != STRPTR(out) )
+       memmove( STRPTR(out), data, cur-data );
+
+   PG_FREE_IF_COPY(in1, 0);
+   PG_FREE_IF_COPY(in2, 1);
+   PG_RETURN_POINTER(out);
+}
+


diff --git a/contrib/tsearch2/untsearch.sql.in b/contrib/tsearch2/untsearch.sql.in

new file mode 100644 (file)

index 0000000..a4fe145


--- /dev/null
+++ b/contrib/tsearch2/untsearch.sql.in
@@ -0,0 +1,62 @@
+BEGIN;
+
+--Be careful !!!
+--script drops all indices, triggers and columns with types defined
+--in tsearch2.sql
+
+
+DROP OPERATOR CLASS gist_tsvector_ops USING gist CASCADE;
+
+
+DROP OPERATOR || (tsvector, tsvector);
+DROP OPERATOR @@ (tsvector, tsquery);
+DROP OPERATOR @@ (tsquery, tsvector);
+
+DROP AGGREGATE stat(tsvector);
+
+DROP TABLE pg_ts_dict;
+DROP TABLE pg_ts_parser;
+DROP TABLE pg_ts_cfg;
+DROP TABLE pg_ts_cfgmap;
+
+DROP TYPE tokentype CASCADE;
+DROP TYPE tokenout CASCADE;
+DROP TYPE tsvector CASCADE;
+DROP TYPE tsquery CASCADE;
+DROP TYPE gtsvector CASCADE;
+DROP TYPE tsstat CASCADE;
+DROP TYPE statinfo CASCADE;
+
+DROP FUNCTION lexize(oid, text) ;
+DROP FUNCTION lexize(text, text);
+DROP FUNCTION lexize(text);
+DROP FUNCTION set_curdict(int);
+DROP FUNCTION set_curdict(text);
+DROP FUNCTION dex_init(text);
+DROP FUNCTION dex_lexize(internal,internal,int4);
+DROP FUNCTION snb_en_init(text);
+DROP FUNCTION snb_lexize(internal,internal,int4);
+DROP FUNCTION snb_ru_init(text);
+DROP FUNCTION spell_init(text);
+DROP FUNCTION spell_lexize(internal,internal,int4);
+DROP FUNCTION syn_init(text);
+DROP FUNCTION syn_lexize(internal,internal,int4);
+DROP FUNCTION set_curprs(int);
+DROP FUNCTION set_curprs(text);
+DROP FUNCTION prsd_start(internal,int4);
+DROP FUNCTION prsd_getlexeme(internal,internal,internal);
+DROP FUNCTION prsd_end(internal);
+DROP FUNCTION prsd_lextype(internal);
+DROP FUNCTION prsd_headline(internal,internal,internal);
+DROP FUNCTION set_curcfg(int);
+DROP FUNCTION set_curcfg(text);
+DROP FUNCTION show_curcfg();
+DROP FUNCTION gtsvector_compress(internal);
+DROP FUNCTION gtsvector_decompress(internal);
+DROP FUNCTION gtsvector_penalty(internal,internal,internal);
+DROP FUNCTION gtsvector_picksplit(internal, internal);
+DROP FUNCTION gtsvector_union(bytea, internal);
+DROP FUNCTION reset_tsearch();
+DROP FUNCTION tsearch2() CASCADE;
+
+END;


diff --git a/contrib/tsearch2/wordparser/deflex.c b/contrib/tsearch2/wordparser/deflex.c

new file mode 100644 (file)

index 0000000..ea596c5


--- /dev/null
+++ b/contrib/tsearch2/wordparser/deflex.c
@@ -0,0 +1,56 @@
+#include "deflex.h"
+
+const char *lex_descr[]={
+   "",
+   "Latin word",
+   "Non-latin word",
+   "Word",
+   "Email",
+   "URL",
+   "Host",
+   "Scientific notation",
+   "VERSION",
+   "Part of hyphenated word",
+   "Non-latin part of hyphenated word",
+   "Latin part of hyphenated word",
+   "Space symbols",
+   "HTML Tag",
+   "HTTP head",
+   "Hyphenated word",
+   "Latin hyphenated word",
+   "Non-latin hyphenated word",
+   "URI",
+   "File or path name",
+   "Decimal notation",
+   "Signed integer",
+   "Unsigned integer",
+   "HTML Entity"
+};
+
+const char *tok_alias[]={
+   "",
+   "lword",
+   "nlword",
+   "word",
+   "email",
+   "url",
+   "host",
+   "sfloat",
+   "version",
+   "part_hword",
+   "nlpart_hword",
+   "lpart_hword",
+   "blank",
+   "tag",
+   "http",
+   "hword",
+   "lhword",
+   "nlhword",
+   "uri",
+   "file",
+   "float",
+   "int",
+   "uint",
+   "entity"
+};
+


diff --git a/contrib/tsearch2/wordparser/deflex.h b/contrib/tsearch2/wordparser/deflex.h

new file mode 100644 (file)

index 0000000..651d1f9


--- /dev/null
+++ b/contrib/tsearch2/wordparser/deflex.h
@@ -0,0 +1,34 @@
+#ifndef __DEFLEX_H__
+#define __DEFLEX_H__
+
+/* rememder !!!! */
+#define LASTNUM        23
+
+#define LATWORD        1
+#define CYRWORD        2
+#define UWORD      3
+#define EMAIL      4
+#define FURL       5
+#define HOST       6
+#define SCIENTIFIC 7
+#define VERSIONNUMBER  8
+#define PARTHYPHENWORD 9
+#define CYRPARTHYPHENWORD  10
+#define LATPARTHYPHENWORD  11
+#define SPACE      12
+#define TAG            13
+#define HTTP       14
+#define HYPHENWORD 15
+#define LATHYPHENWORD  16
+#define CYRHYPHENWORD  17
+#define URI        18
+#define FILEPATH   19
+#define DECIMAL        20
+#define SIGNEDINT  21
+#define UNSIGNEDINT 22
+#define HTMLENTITY 23
+
+extern const char *lex_descr[];
+extern const char *tok_alias[];
+
+#endif


diff --git a/contrib/tsearch2/wordparser/parser.h b/contrib/tsearch2/wordparser/parser.h

new file mode 100644 (file)

index 0000000..55cf005


--- /dev/null
+++ b/contrib/tsearch2/wordparser/parser.h
@@ -0,0 +1,11 @@
+#ifndef __PARSER_H__
+#define __PARSER_H__
+
+char      *token;
+int            tokenlen;
+int            tsearch2_yylex(void);
+void       start_parse_str(char *, int);
+void       start_parse_fh(FILE *, int);
+void       end_parse(void);
+
+#endif


diff --git a/contrib/tsearch2/wordparser/parser.l b/contrib/tsearch2/wordparser/parser.l

new file mode 100644 (file)

index 0000000..49824f5


--- /dev/null
+++ b/contrib/tsearch2/wordparser/parser.l
@@ -0,0 +1,346 @@
+%{
+#include "postgres.h"
+
+#include "deflex.h"
+#include "parser.h"
+#include "common.h"
+
+/* Avoid exit() on fatal scanner errors */
+#define fprintf(file, fmt, msg)  ts_error(ERROR, fmt, msg)
+
+/* postgres allocation function */
+#define free    pfree
+#define malloc  palloc
+#define realloc repalloc
+
+#ifdef strdup
+#undef strdup
+#endif
+#define strdup  pstrdup
+
+char *token = NULL;  /* pointer to token */
+char *s     = NULL;  /* to return WHOLE hyphenated-word */
+
+YY_BUFFER_STATE buf = NULL; /* buffer to parse; it need for parse from string */
+
+int lrlimit = -1;  /* for limiting read from filehandle ( -1 - unlimited read ) */
+int bytestoread = 0;   /* for limiting read from filehandle */
+
+/* redefine macro for read limited length */
+#define YY_INPUT(buf,result,max_size) \
+   if ( yy_current_buffer->yy_is_interactive ) { \
+                int c = '*', n; \
+                for ( n = 0; n < max_size && \
+                             (c = getc( tsearch2_yyin )) != EOF && c != '\n'; ++n ) \
+                        buf[n] = (char) c; \
+                if ( c == '\n' ) \
+                        buf[n++] = (char) c; \
+                if ( c == EOF && ferror( tsearch2_yyin ) ) \
+                        YY_FATAL_ERROR( "input in flex scanner failed" ); \
+                result = n; \
+        }  else { \
+       if ( lrlimit == 0 ) \
+           result=YY_NULL; \
+       else { \
+           if ( lrlimit>0 ) { \
+               bytestoread = ( lrlimit > max_size ) ? max_size : lrlimit; \
+               lrlimit -= bytestoread; \
+           } else \
+               bytestoread = max_size; \
+               if ( ((result = fread( buf, 1, bytestoread, tsearch2_yyin )) == 0) \
+                       && ferror( tsearch2_yyin ) ) \
+                       YY_FATAL_ERROR( "input in flex scanner failed" ); \
+       } \
+   }
+
+%}
+
+%option 8bit
+%option never-interactive
+%option nounput
+%option noyywrap
+
+/* parser's state for parsing hyphenated-word */
+%x DELIM  
+/* parser's state for parsing URL*/
+%x URL  
+%x SERVER  
+
+/* parser's state for parsing TAGS */
+%x INTAG
+%x QINTAG
+%x INCOMMENT
+%x INSCRIPT
+
+/* cyrillic koi8 char */
+CYRALNUM   [0-9\200-\377]
+CYRALPHA   [\200-\377]
+ALPHA      [a-zA-Z\200-\377]
+ALNUM      [0-9a-zA-Z\200-\377]
+
+
+HOSTNAME   ([-_[:alnum:]]+\.)+[[:alpha:]]+
+URI        [-_[:alnum:]/%,\.;=&?#]+
+
+%%
+
+"<"[Ss][Cc][Rr][Ii][Pp][Tt] { BEGIN INSCRIPT; }
+
+"" {
+   BEGIN INITIAL; 
+   *tsearch2_yytext=' '; *(tsearch2_yytext+1) = '\0'; 
+   token = tsearch2_yytext;
+   tokenlen = tsearch2_yyleng;
+   return SPACE;
+}
+
+""   { 
+   BEGIN INITIAL;
+   *tsearch2_yytext=' '; *(tsearch2_yytext+1) = '\0'; 
+   token = tsearch2_yytext;
+   tokenlen = tsearch2_yyleng;
+   return SPACE;
+}
+
+
+"<"[\![:alpha:]]   { BEGIN INTAG; }
+
+"
+
+"\""    { BEGIN QINTAG; }
+
+"\\\"" ;
+
+"\""   { BEGIN INTAG; }
+
+">" { 
+   BEGIN INITIAL;
+   token = tsearch2_yytext;
+   *tsearch2_yytext=' '; 
+   token = tsearch2_yytext;
+   tokenlen = 1;
+   return TAG;
+}
+
+.|\n  ;
+
+\&(quot|amp|nbsp|lt|gt)\;   {
+   token = tsearch2_yytext;
+   tokenlen = tsearch2_yyleng;
+   return HTMLENTITY;
+}
+
+\&\#[0-9][0-9]?[0-9]?\; {
+   token = tsearch2_yytext;
+   tokenlen = tsearch2_yyleng;
+   return HTMLENTITY;
+}
+ 
+[-_\.[:alnum:]]+@{HOSTNAME}  /* Emails */ { 
+   token = tsearch2_yytext; 
+   tokenlen = tsearch2_yyleng;
+   return EMAIL; 
+}
+
+[+-]?[0-9]+(\.[0-9]+)?[eEdD][+-]?[0-9]+  /* float */   { 
+   token = tsearch2_yytext; 
+   tokenlen = tsearch2_yyleng;
+   return SCIENTIFIC; 
+}
+
+[0-9]+\.[0-9]+\.[0-9\.]*[0-9] {
+   token = tsearch2_yytext;
+   tokenlen = tsearch2_yyleng;
+   return VERSIONNUMBER;
+}
+
+[+-]?[0-9]+\.[0-9]+ {
+   token = tsearch2_yytext;
+   tokenlen = tsearch2_yyleng;
+   return DECIMAL;
+}
+
+[+-][0-9]+ { 
+   token = tsearch2_yytext; 
+   tokenlen = tsearch2_yyleng;
+   return SIGNEDINT; 
+}
+
+[0-9]+ { 
+   token = tsearch2_yytext; 
+   tokenlen = tsearch2_yyleng;
+   return UNSIGNEDINT; 
+}
+
+http"://"        { 
+   BEGIN URL; 
+   token = tsearch2_yytext;
+   tokenlen = tsearch2_yyleng;
+   return HTTP;
+}
+
+ftp"://"        { 
+   BEGIN URL; 
+   token = tsearch2_yytext;
+   tokenlen = tsearch2_yyleng;
+   return HTTP;
+}
+
+{HOSTNAME}[/:]{URI} { 
+   BEGIN SERVER;
+   if (s) { free(s); s=NULL; } 
+   s = strdup( tsearch2_yytext ); 
+   tokenlen = tsearch2_yyleng;
+   yyless( 0 ); 
+   token = s;
+   return FURL;
+}
+
+{HOSTNAME} {
+   token = tsearch2_yytext; 
+   tokenlen = tsearch2_yyleng;
+   return HOST;
+}
+
+[/:]{URI}  {
+   token = tsearch2_yytext;
+   tokenlen = tsearch2_yyleng;
+   return URI;
+}
+
+[[:alnum:]\./_-]+"/"[[:alnum:]\./_-]+ {
+   token = tsearch2_yytext;
+   tokenlen = tsearch2_yyleng;
+   return FILEPATH;
+}
+
+({CYRALPHA}+-)+{CYRALPHA}+ /* composite-word */    {
+   BEGIN DELIM;
+   if (s) { free(s); s=NULL; } 
+   s = strdup( tsearch2_yytext );
+   tokenlen = tsearch2_yyleng;
+   yyless( 0 );
+   token = s;
+   return CYRHYPHENWORD;
+}
+
+([[:alpha:]]+-)+[[:alpha:]]+ /* composite-word */  {
+    BEGIN DELIM;
+   if (s) { free(s); s=NULL; } 
+   s = strdup( tsearch2_yytext );
+   tokenlen = tsearch2_yyleng;
+   yyless( 0 );
+   token = s;
+   return LATHYPHENWORD;
+}
+
+({ALNUM}+-)+{ALNUM}+ /* composite-word */  {
+   BEGIN DELIM;
+   if (s) { free(s); s=NULL; } 
+   s = strdup( tsearch2_yytext );
+   tokenlen = tsearch2_yyleng;
+   yyless( 0 );
+   token = s;
+   return HYPHENWORD;
+}
+
+[0-9]+\.[0-9]+\.[0-9\.]*[0-9] {
+   token = tsearch2_yytext;
+   tokenlen = tsearch2_yyleng;
+   return VERSIONNUMBER;
+}
+
+\+?[0-9]+\.[0-9]+ {
+   token = tsearch2_yytext;
+   tokenlen = tsearch2_yyleng;
+   return DECIMAL;
+}
+
+{CYRALPHA}+  /* one word in composite-word */   { 
+   token = tsearch2_yytext; 
+   tokenlen = tsearch2_yyleng;
+   return CYRPARTHYPHENWORD; 
+}
+
+[[:alpha:]]+  /* one word in composite-word */  { 
+   token = tsearch2_yytext; 
+   tokenlen = tsearch2_yyleng;
+   return LATPARTHYPHENWORD; 
+}
+
+{ALNUM}+  /* one word in composite-word */  { 
+   token = tsearch2_yytext; 
+   tokenlen = tsearch2_yyleng;
+   return PARTHYPHENWORD; 
+}
+
+-  { 
+   token = tsearch2_yytext;
+   tokenlen = tsearch2_yyleng;
+   return SPACE;
+}
+
+.|\n /* return in basic state */ {
+   BEGIN INITIAL;
+   yyless( 0 );
+}
+
+{CYRALPHA}+ /* normal word */  { 
+   token = tsearch2_yytext; 
+   tokenlen = tsearch2_yyleng;
+   return CYRWORD; 
+}
+
+[[:alpha:]]+ /* normal word */ { 
+   token = tsearch2_yytext; 
+   tokenlen = tsearch2_yyleng;
+   return LATWORD; 
+}
+
+{ALNUM}+ /* normal word */ { 
+   token = tsearch2_yytext; 
+   tokenlen = tsearch2_yyleng;
+   return UWORD; 
+}
+
+[ \r\n\t]+ {
+   token = tsearch2_yytext;
+   tokenlen = tsearch2_yyleng;
+   return SPACE;
+}
+
+. {
+   token = tsearch2_yytext;
+   tokenlen = tsearch2_yyleng;
+   return SPACE;
+} 
+
+%%
+
+/* clearing after parsing from string */
+void end_parse() {
+   if (s) { free(s); s=NULL; } 
+   tsearch2_yy_delete_buffer( buf );
+   buf = NULL;
+} 
+
+/* start parse from string */
+void start_parse_str(char* str, int limit) {
+   if (buf) end_parse();
+   buf = tsearch2_yy_scan_bytes( str, limit );
+   tsearch2_yy_switch_to_buffer( buf );
+   BEGIN INITIAL;
+}
+
+/* start parse from filehandle */
+void start_parse_fh( FILE* fh, int limit ) {
+   if (buf) end_parse();
+   lrlimit = ( limit ) ? limit : -1;
+   buf = tsearch2_yy_create_buffer( fh, YY_BUF_SIZE );
+   tsearch2_yy_switch_to_buffer( buf );
+   BEGIN INITIAL;
+}
+
+


diff --git a/contrib/tsearch2/wparser.c b/contrib/tsearch2/wparser.c

new file mode 100644 (file)

index 0000000..deff94c


--- /dev/null
+++ b/contrib/tsearch2/wparser.c
@@ -0,0 +1,529 @@
+/* 
+ * interface functions to parser 
+ * Teodor Sigaev 
+ */
+#include 
+#include 
+#include 
+#include 
+
+#include "postgres.h"
+#include "fmgr.h"
+#include "utils/array.h"
+#include "catalog/pg_type.h"
+#include "executor/spi.h"
+#include "funcapi.h"
+
+#include "wparser.h"
+#include "ts_cfg.h"
+#include "snmap.h"
+#include "common.h"
+
+/*********top interface**********/
+
+static void *plan_getparser=NULL;
+static Oid current_parser_id=InvalidOid;
+
+void
+init_prs(Oid id, WParserInfo *prs) {
+   Oid arg[1]={ OIDOID };
+   bool isnull;
+   Datum pars[1]={ ObjectIdGetDatum(id) };
+   int stat;
+
+   memset(prs,0,sizeof(WParserInfo));
+   SPI_connect();
+   if ( !plan_getparser ) {
+       plan_getparser = SPI_saveplan( SPI_prepare( "select prs_start, prs_nexttoken, prs_end, prs_lextype, prs_headline from pg_ts_parser where oid = $1" , 1, arg ) );
+       if ( !plan_getparser ) 
+           ts_error(ERROR, "SPI_prepare() failed");
+   }
+
+   stat = SPI_execp(plan_getparser, pars, " ", 1);
+   if ( stat < 0 )
+       ts_error (ERROR, "SPI_execp return %d", stat);
+   if ( SPI_processed > 0 ) {
+       Oid oid=InvalidOid;
+       oid=DatumGetObjectId( SPI_getbinval(SPI_tuptable->vals[0], SPI_tuptable->tupdesc, 1, &isnull) );
+       fmgr_info_cxt(oid, &(prs->start_info), TopMemoryContext);
+       oid=DatumGetObjectId( SPI_getbinval(SPI_tuptable->vals[0], SPI_tuptable->tupdesc, 2, &isnull) );
+       fmgr_info_cxt(oid, &(prs->getlexeme_info), TopMemoryContext);
+       oid=DatumGetObjectId( SPI_getbinval(SPI_tuptable->vals[0], SPI_tuptable->tupdesc, 3, &isnull) );
+       fmgr_info_cxt(oid, &(prs->end_info), TopMemoryContext);
+       prs->lextype=DatumGetObjectId( SPI_getbinval(SPI_tuptable->vals[0], SPI_tuptable->tupdesc, 4, &isnull) );
+       oid=DatumGetObjectId( SPI_getbinval(SPI_tuptable->vals[0], SPI_tuptable->tupdesc, 5, &isnull) );
+       fmgr_info_cxt(oid, &(prs->headline_info), TopMemoryContext);
+       prs->prs_id=id;
+   } else 
+       ts_error(ERROR, "No parser with id %d", id);
+   SPI_finish();
+}
+
+typedef struct {
+   WParserInfo *last_prs;
+   int     len;
+   int     reallen;
+   WParserInfo *list;
+   SNMap       name2id_map;
+} PrsList;
+
+static PrsList PList = {NULL,0,0,NULL,{0,0,NULL}};
+
+void    
+reset_prs(void) {
+   freeSNMap( &(PList.name2id_map) );
+   if ( PList.list )
+       free(PList.list);
+   memset(&PList,0,sizeof(PrsList));
+}
+
+static int
+compareprs(const void *a, const void *b) {
+   return ((WParserInfo*)a)->prs_id - ((WParserInfo*)b)->prs_id;
+}
+
+WParserInfo *
+findprs(Oid id) {
+   /* last used prs */
+   if ( PList.last_prs && PList.last_prs->prs_id==id )
+       return PList.last_prs;
+
+   /* already used prs */
+   if ( PList.len != 0 ) {
+       WParserInfo key;
+       key.prs_id=id;
+       PList.last_prs = bsearch(&key, PList.list, PList.len, sizeof(WParserInfo), compareprs);
+       if ( PList.last_prs != NULL )
+           return PList.last_prs;
+   }
+
+   /* last chance */
+   if ( PList.len==PList.reallen ) {
+       WParserInfo *tmp;
+       int reallen = ( PList.reallen ) ? 2*PList.reallen : 16;
+       tmp=(WParserInfo*)realloc(PList.list,sizeof(WParserInfo)*reallen);
+       if ( !tmp ) 
+           ts_error(ERROR,"No memory");
+       PList.reallen=reallen;
+       PList.list=tmp;
+   }
+   PList.last_prs=&(PList.list[PList.len]);
+   init_prs(id, PList.last_prs);
+   PList.len++;
+   qsort(PList.list, PList.len, sizeof(WParserInfo), compareprs);
+   return findprs(id); /* qsort changed order!! */;
+}
+
+static void *plan_name2id=NULL;
+
+Oid
+name2id_prs(text *name) {
+   Oid arg[1]={ TEXTOID };
+   bool isnull;
+   Datum pars[1]={ PointerGetDatum(name) };
+   int stat;
+   Oid id=findSNMap_t( &(PList.name2id_map), name );
+   
+   if ( id ) 
+       return id;
+   
+
+   SPI_connect();
+   if ( !plan_name2id ) {
+       plan_name2id = SPI_saveplan( SPI_prepare( "select oid from pg_ts_parser where prs_name = $1" , 1, arg ) );
+       if ( !plan_name2id ) 
+           ts_error(ERROR, "SPI_prepare() failed");
+   }
+
+   stat = SPI_execp(plan_name2id, pars, " ", 1);
+   if ( stat < 0 )
+       ts_error (ERROR, "SPI_execp return %d", stat);
+   if ( SPI_processed > 0 )
+       id=DatumGetObjectId( SPI_getbinval(SPI_tuptable->vals[0], SPI_tuptable->tupdesc, 1, &isnull) );
+   else 
+       ts_error(ERROR, "No parser '%s'", text2char(name));
+   SPI_finish();
+   addSNMap_t( &(PList.name2id_map), name, id );
+   return id;
+}
+
+
+/******sql-level interface******/
+typedef struct {
+   int     cur;
+   LexDescr    *list;
+} TypeStorage;
+
+static void
+setup_firstcall(FuncCallContext  *funcctx, Oid prsid) {
+   TupleDesc            tupdesc;
+   MemoryContext     oldcontext;
+   TypeStorage     *st;
+   WParserInfo *prs = findprs(prsid); 
+
+   oldcontext = MemoryContextSwitchTo(funcctx->multi_call_memory_ctx);
+
+   st=(TypeStorage*)palloc( sizeof(TypeStorage) );
+   st->cur=0;
+   st->list = (LexDescr*)DatumGetPointer(
+       OidFunctionCall1( prs->lextype, PointerGetDatum(prs->prs) )
+   );
+   funcctx->user_fctx = (void*)st;
+   tupdesc = RelationNameGetTupleDesc("tokentype");
+   funcctx->slot = TupleDescGetSlot(tupdesc);
+   funcctx->attinmeta = TupleDescGetAttInMetadata(tupdesc);
+   MemoryContextSwitchTo(oldcontext);
+}
+
+static Datum
+process_call(FuncCallContext  *funcctx) {
+   TypeStorage     *st;
+
+   st=(TypeStorage*)funcctx->user_fctx;
+   if (  st->list && st->list[st->cur].lexid ) {
+       Datum result;
+       char* values[3];
+       char    txtid[16];
+       HeapTuple    tuple;
+
+       values[0]=txtid;
+       sprintf(txtid,"%d",st->list[st->cur].lexid);
+       values[1]=st->list[st->cur].alias;
+       values[2]=st->list[st->cur].descr;
+
+       tuple = BuildTupleFromCStrings(funcctx->attinmeta, values);
+       result = TupleGetDatum(funcctx->slot, tuple);
+
+       pfree(values[1]);
+       pfree(values[2]);
+       st->cur++;
+       return result;
+   } else {
+       if ( st->list ) pfree(st->list);
+       pfree(st);
+   }
+   return (Datum)0;
+}
+
+PG_FUNCTION_INFO_V1(token_type);
+Datum token_type(PG_FUNCTION_ARGS);
+
+Datum
+token_type(PG_FUNCTION_ARGS) {
+   FuncCallContext  *funcctx;
+   Datum result;
+
+   if (SRF_IS_FIRSTCALL()) { 
+       funcctx = SRF_FIRSTCALL_INIT();
+       setup_firstcall(funcctx, PG_GETARG_OID(0) );
+   }
+
+   funcctx = SRF_PERCALL_SETUP();
+
+   if (  (result=process_call(funcctx)) != (Datum)0 )
+       SRF_RETURN_NEXT(funcctx, result);
+   SRF_RETURN_DONE(funcctx);
+}
+
+PG_FUNCTION_INFO_V1(token_type_byname);
+Datum token_type_byname(PG_FUNCTION_ARGS);
+Datum
+token_type_byname(PG_FUNCTION_ARGS) {
+   FuncCallContext  *funcctx;
+   Datum result;
+
+   if (SRF_IS_FIRSTCALL()) {
+       text *name = PG_GETARG_TEXT_P(0); 
+       funcctx = SRF_FIRSTCALL_INIT();
+       setup_firstcall(funcctx, name2id_prs( name ) );
+       PG_FREE_IF_COPY(name,0);
+   }
+
+   funcctx = SRF_PERCALL_SETUP();
+
+   if (  (result=process_call(funcctx)) != (Datum)0 )
+       SRF_RETURN_NEXT(funcctx, result);
+   SRF_RETURN_DONE(funcctx);
+}
+
+PG_FUNCTION_INFO_V1(token_type_current);
+Datum token_type_current(PG_FUNCTION_ARGS);
+Datum
+token_type_current(PG_FUNCTION_ARGS) {
+   FuncCallContext  *funcctx;
+   Datum result;
+
+   if (SRF_IS_FIRSTCALL()) {
+       funcctx = SRF_FIRSTCALL_INIT();
+       if ( current_parser_id==InvalidOid ) 
+           current_parser_id = name2id_prs( char2text("default") );
+       setup_firstcall(funcctx, current_parser_id );
+   }
+
+   funcctx = SRF_PERCALL_SETUP();
+
+   if (  (result=process_call(funcctx)) != (Datum)0 )
+       SRF_RETURN_NEXT(funcctx, result);
+   SRF_RETURN_DONE(funcctx);
+}
+
+
+PG_FUNCTION_INFO_V1(set_curprs);
+Datum set_curprs(PG_FUNCTION_ARGS);
+Datum
+set_curprs(PG_FUNCTION_ARGS) {
+        findprs(PG_GETARG_OID(0));
+        current_parser_id=PG_GETARG_OID(0);
+        PG_RETURN_VOID();
+}
+
+PG_FUNCTION_INFO_V1(set_curprs_byname);
+Datum set_curprs_byname(PG_FUNCTION_ARGS);
+Datum
+set_curprs_byname(PG_FUNCTION_ARGS) {
+        text *name=PG_GETARG_TEXT_P(0);
+    
+        DirectFunctionCall1(
+                set_curprs,
+                ObjectIdGetDatum( name2id_prs(name) )
+        );
+        PG_FREE_IF_COPY(name, 0);
+        PG_RETURN_VOID();
+}
+
+typedef struct {
+   int type;
+   char    *lexem;
+} LexemEntry;
+
+typedef struct {
+   int cur;
+   int len;
+   LexemEntry  *list;
+} PrsStorage;
+   
+
+static void
+prs_setup_firstcall(FuncCallContext  *funcctx, int prsid, text *txt) {
+   TupleDesc            tupdesc;
+   MemoryContext     oldcontext;
+   PrsStorage  *st;
+   WParserInfo *prs = findprs(prsid); 
+   char    *lex=NULL;
+   int     llen=0, type=0; 
+
+   oldcontext = MemoryContextSwitchTo(funcctx->multi_call_memory_ctx);
+
+   st=(PrsStorage*)palloc( sizeof(PrsStorage) );
+   st->cur=0;
+   st->len=16;
+   st->list=(LexemEntry*)palloc( sizeof(LexemEntry)*st->len );
+
+   prs->prs = (void*)DatumGetPointer(
+       FunctionCall2(
+           &(prs->start_info),
+           PointerGetDatum(VARDATA(txt)),
+           Int32GetDatum(VARSIZE(txt)-VARHDRSZ)
+       )
+   );
+
+   while( ( type=DatumGetInt32(FunctionCall3(
+           &(prs->getlexeme_info),
+           PointerGetDatum(prs->prs),
+           PointerGetDatum(&lex),
+           PointerGetDatum(&llen))) ) != 0 ) {
+
+       if ( st->cur>=st->len ) {
+           st->len=2*st->len;
+           st->list=(LexemEntry*)repalloc(st->list, sizeof(LexemEntry)*st->len);
+       }
+       st->list[st->cur].lexem = palloc(llen+1);
+       memcpy( st->list[st->cur].lexem, lex, llen);
+       st->list[st->cur].lexem[llen]='\0';
+       st->list[st->cur].type=type;
+       st->cur++;
+   }
+       
+   FunctionCall1(
+       &(prs->end_info),
+       PointerGetDatum(prs->prs)
+   );
+
+   st->len=st->cur;
+   st->cur=0;
+   
+   funcctx->user_fctx = (void*)st;
+   tupdesc = RelationNameGetTupleDesc("tokenout");
+   funcctx->slot = TupleDescGetSlot(tupdesc);
+   funcctx->attinmeta = TupleDescGetAttInMetadata(tupdesc);
+   MemoryContextSwitchTo(oldcontext);
+}
+
+static Datum
+prs_process_call(FuncCallContext  *funcctx) {
+   PrsStorage  *st;
+
+   st=(PrsStorage*)funcctx->user_fctx;
+   if (  st->cur < st->len ) {
+       Datum result;
+       char* values[2];
+       char    tid[16];
+       HeapTuple    tuple;
+
+       values[0]=tid;
+       sprintf(tid,"%d",st->list[st->cur].type);
+       values[1]=st->list[st->cur].lexem;
+       tuple = BuildTupleFromCStrings(funcctx->attinmeta, values);
+       result = TupleGetDatum(funcctx->slot, tuple);
+
+       pfree(values[1]);
+       st->cur++;
+       return result;
+   } else {
+       if ( st->list ) pfree(st->list);
+       pfree(st);
+   }
+   return (Datum)0;
+}
+
+           
+
+PG_FUNCTION_INFO_V1(parse);
+Datum parse(PG_FUNCTION_ARGS);
+Datum
+parse(PG_FUNCTION_ARGS) {
+   FuncCallContext  *funcctx;
+   Datum result;
+
+   if (SRF_IS_FIRSTCALL()) {
+       text *txt = PG_GETARG_TEXT_P(1); 
+       funcctx = SRF_FIRSTCALL_INIT();
+       prs_setup_firstcall(funcctx, PG_GETARG_OID(0),txt );
+       PG_FREE_IF_COPY(txt,1);
+   }
+
+   funcctx = SRF_PERCALL_SETUP();
+
+   if (  (result=prs_process_call(funcctx)) != (Datum)0 )
+       SRF_RETURN_NEXT(funcctx, result);
+   SRF_RETURN_DONE(funcctx);
+}
+
+PG_FUNCTION_INFO_V1(parse_byname);
+Datum parse_byname(PG_FUNCTION_ARGS);
+Datum
+parse_byname(PG_FUNCTION_ARGS) {
+   FuncCallContext  *funcctx;
+   Datum result;
+
+   if (SRF_IS_FIRSTCALL()) {
+       text *name = PG_GETARG_TEXT_P(0); 
+       text *txt = PG_GETARG_TEXT_P(1); 
+       funcctx = SRF_FIRSTCALL_INIT();
+       prs_setup_firstcall(funcctx, name2id_prs( name ),txt );
+       PG_FREE_IF_COPY(name,0);
+       PG_FREE_IF_COPY(txt,1);
+   }
+
+   funcctx = SRF_PERCALL_SETUP();
+
+   if (  (result=prs_process_call(funcctx)) != (Datum)0 )
+       SRF_RETURN_NEXT(funcctx, result);
+   SRF_RETURN_DONE(funcctx);
+}
+
+
+PG_FUNCTION_INFO_V1(parse_current);
+Datum parse_current(PG_FUNCTION_ARGS);
+Datum
+parse_current(PG_FUNCTION_ARGS) {
+   FuncCallContext  *funcctx;
+   Datum result;
+
+   if (SRF_IS_FIRSTCALL()) {
+       text *txt = PG_GETARG_TEXT_P(0); 
+       funcctx = SRF_FIRSTCALL_INIT();
+       if ( current_parser_id==InvalidOid ) 
+           current_parser_id = name2id_prs( char2text("default") );
+       prs_setup_firstcall(funcctx, current_parser_id,txt );
+       PG_FREE_IF_COPY(txt,0);
+   }
+
+   funcctx = SRF_PERCALL_SETUP();
+
+   if (  (result=prs_process_call(funcctx)) != (Datum)0 )
+       SRF_RETURN_NEXT(funcctx, result);
+   SRF_RETURN_DONE(funcctx);
+}
+
+PG_FUNCTION_INFO_V1(headline);
+Datum headline(PG_FUNCTION_ARGS);
+Datum
+headline(PG_FUNCTION_ARGS) {
+   TSCfgInfo *cfg=findcfg(PG_GETARG_OID(0));
+   text       *in = PG_GETARG_TEXT_P(1);
+   QUERYTYPE  *query = (QUERYTYPE *) DatumGetPointer(PG_DETOAST_DATUM(PG_GETARG_DATUM(2)));
+   text       *opt=( PG_NARGS()>3 && PG_GETARG_POINTER(3) ) ? PG_GETARG_TEXT_P(3) : NULL;
+   HLPRSTEXT   prs;
+   text *out;
+   WParserInfo *prsobj = findprs(cfg->prs_id);
+
+   memset(&prs,0,sizeof(HLPRSTEXT));
+   prs.lenwords = 32;
+   prs.words = (HLWORD *) palloc(sizeof(HLWORD) * prs.lenwords);
+   hlparsetext(cfg, &prs, query, VARDATA(in), VARSIZE(in) - VARHDRSZ);
+
+
+   FunctionCall3(
+       &(prsobj->headline_info),
+       PointerGetDatum(&prs),
+       PointerGetDatum(opt),
+       PointerGetDatum(query)
+   );
+
+   out = genhl(&prs);
+
+   PG_FREE_IF_COPY(in,1);
+   PG_FREE_IF_COPY(query,2);
+   if ( opt ) PG_FREE_IF_COPY(opt,3);
+   pfree(prs.words);
+   pfree(prs.startsel);
+   pfree(prs.stopsel);
+
+   PG_RETURN_POINTER(out);
+}
+
+
+PG_FUNCTION_INFO_V1(headline_byname);
+Datum headline_byname(PG_FUNCTION_ARGS);
+Datum
+headline_byname(PG_FUNCTION_ARGS) {
+   text *cfg=PG_GETARG_TEXT_P(0);
+
+   Datum out=DirectFunctionCall4(
+       headline,
+       ObjectIdGetDatum(name2id_cfg( cfg ) ),
+       PG_GETARG_DATUM(1),
+       PG_GETARG_DATUM(2),
+       ( PG_NARGS()>3 ) ? PG_GETARG_DATUM(3) : PointerGetDatum(NULL)
+   );
+
+   PG_FREE_IF_COPY(cfg,0);
+   PG_RETURN_DATUM(out);   
+}
+
+PG_FUNCTION_INFO_V1(headline_current);
+Datum headline_current(PG_FUNCTION_ARGS);
+Datum
+headline_current(PG_FUNCTION_ARGS) {
+   PG_RETURN_DATUM(DirectFunctionCall4(
+       headline,
+       ObjectIdGetDatum(get_currcfg()),
+       PG_GETARG_DATUM(0),
+       PG_GETARG_DATUM(1),
+       ( PG_NARGS()>2 ) ? PG_GETARG_DATUM(2) : PointerGetDatum(NULL)
+   ));
+}
+
+
+


diff --git a/contrib/tsearch2/wparser.h b/contrib/tsearch2/wparser.h

new file mode 100644 (file)

index 0000000..a8afc56


--- /dev/null
+++ b/contrib/tsearch2/wparser.h
@@ -0,0 +1,28 @@
+#ifndef __WPARSER_H__
+#define __WPARSER_H__
+#include "postgres.h"
+#include "fmgr.h"
+
+typedef struct {
+   Oid prs_id;
+   FmgrInfo start_info;
+   FmgrInfo getlexeme_info;
+   FmgrInfo end_info;
+   FmgrInfo headline_info;
+   Oid lextype;
+   void *prs;
+} WParserInfo;
+
+void init_prs(Oid id, WParserInfo *prs);
+WParserInfo* findprs(Oid id);
+Oid name2id_prs(text *name);
+void   reset_prs(void);
+
+
+typedef struct {
+   int lexid;
+   char    *alias;
+   char    *descr;
+} LexDescr;
+
+#endif


diff --git a/contrib/tsearch2/wparser_def.c b/contrib/tsearch2/wparser_def.c

new file mode 100644 (file)

index 0000000..eec8b03


--- /dev/null
+++ b/contrib/tsearch2/wparser_def.c
@@ -0,0 +1,291 @@
+/* 
+ * default word parser 
+ * Teodor Sigaev 
+ */
+#include 
+#include 
+#include 
+
+#include "postgres.h"
+#include "utils/builtins.h"
+
+#include "dict.h"
+#include "wparser.h"
+#include "common.h"
+#include "ts_cfg.h"
+#include "wordparser/parser.h"
+#include "wordparser/deflex.h"
+
+PG_FUNCTION_INFO_V1(prsd_lextype);
+Datum prsd_lextype(PG_FUNCTION_ARGS);
+
+Datum 
+prsd_lextype(PG_FUNCTION_ARGS) {
+   LexDescr *descr=(LexDescr*)palloc(sizeof(LexDescr)*(LASTNUM+1));
+   int i;
+
+   for(i=1;i<=LASTNUM;i++) {
+       descr[i-1].lexid = i;
+       descr[i-1].alias = pstrdup(tok_alias[i]);
+       descr[i-1].descr = pstrdup(lex_descr[i]);
+   }
+   
+   descr[LASTNUM].lexid=0;
+       
+   PG_RETURN_POINTER(descr);
+}
+
+PG_FUNCTION_INFO_V1(prsd_start);
+Datum prsd_start(PG_FUNCTION_ARGS);
+Datum 
+prsd_start(PG_FUNCTION_ARGS) {
+   start_parse_str( (char*)PG_GETARG_POINTER(0), PG_GETARG_INT32(1) );
+   PG_RETURN_POINTER(NULL);
+}
+
+PG_FUNCTION_INFO_V1(prsd_getlexeme);
+Datum prsd_getlexeme(PG_FUNCTION_ARGS);
+Datum 
+prsd_getlexeme(PG_FUNCTION_ARGS) {
+   /* ParserState *p=(ParserState*)PG_GETARG_POINTER(0); */
+   char **t=(char**)PG_GETARG_POINTER(1); 
+   int *tlen=(int*)PG_GETARG_POINTER(2);
+   int  type=tsearch2_yylex();
+
+   *t = token;
+   *tlen = tokenlen;
+   PG_RETURN_INT32(type);
+}
+
+PG_FUNCTION_INFO_V1(prsd_end);
+Datum prsd_end(PG_FUNCTION_ARGS);
+Datum 
+prsd_end(PG_FUNCTION_ARGS) {
+   /* ParserState *p=(ParserState*)PG_GETARG_POINTER(0); */
+   end_parse();
+   PG_RETURN_VOID();
+}
+
+#define LEAVETOKEN(x)  ( (x)==12 )
+#define COMPLEXTOKEN(x)    ( (x)==5 || (x)==15 || (x)==16 || (x)==17 )
+#define ENDPUNCTOKEN(x)    ( (x)==12 )
+
+
+#define IDIGNORE(x) ( (x)==13 || (x)==14 || (x)==12 || (x)==23 )
+#define HLIDIGNORE(x) ( (x)==5 || (x)==13 || (x)==15 || (x)==16 || (x)==17 )
+#define NONWORDTOKEN(x)    ( (x)==12 || HLIDIGNORE(x) )
+#define NOENDTOKEN(x)  ( NONWORDTOKEN(x) || (x)==7 || (x)==8 || (x)==20 || (x)==21 || (x)==22 || IDIGNORE(x) )
+
+typedef struct {
+   HLWORD  *words;
+   int len;
+} hlCheck;
+
+static bool
+checkcondition_HL(void *checkval, ITEM *val) {
+   int i;
+   for(i=0;i<((hlCheck*)checkval)->len;i++) {
+       if ( ((hlCheck*)checkval)->words[i].item==val )
+           return true;
+   }
+   return false;
+}
+
+
+static bool
+hlCover(HLPRSTEXT *prs, QUERYTYPE *query, int *p, int *q) {
+   int i,j;
+   ITEM    *item=GETQUERY(query);
+   int pos=*p;
+   *q=0;
+   *p=0x7fffffff;
+
+   for(j=0;jsize;j++) {
+       if ( item->type != VAL ) {
+           item++;
+           continue;
+       }
+       for(i=pos;icurwords;i++) {
+           if ( prs->words[i].item == item ) {
+               if ( i>*q) 
+                   *q = i;
+               break;
+           }
+       }
+       item++;
+   }
+
+   if ( *q==0 )
+       return false;
+
+   item=GETQUERY(query);
+   for(j=0;jsize;j++) {
+       if ( item->type != VAL ) {
+           item++;
+           continue;
+       }
+       for(i=*q;i>=pos;i--) {
+           if ( prs->words[i].item == item ) {
+               if ( i<*p )
+                   *p=i;
+               break;
+           }
+       }
+       item++;
+   }   
+
+   if ( *p<=*q ) {
+       hlCheck ch={ &(prs->words[*p]), *q-*p+1 };
+       if ( TS_execute(GETQUERY(query), &ch, false, checkcondition_HL) ) { 
+           return true;
+       } else {
+           (*p)++;
+           return hlCover(prs,query,p,q);
+       }
+   }
+
+   return false;
+}
+
+PG_FUNCTION_INFO_V1(prsd_headline);
+Datum prsd_headline(PG_FUNCTION_ARGS);
+Datum 
+prsd_headline(PG_FUNCTION_ARGS) {
+   HLPRSTEXT   *prs=(HLPRSTEXT*)PG_GETARG_POINTER(0);
+   text    *opt=(text*)PG_GETARG_POINTER(1); /* can't be toasted */
+   QUERYTYPE   *query=(QUERYTYPE*)PG_GETARG_POINTER(2); /* can't be toasted */
+   /* from opt + start and and tag */
+   int min_words=15;   
+   int max_words=35;   
+   int shortword=3;    
+
+   int p=0,q=0;
+   int bestb=-1,beste=-1;
+   int bestlen=-1;
+   int pose=0, poslen, curlen;
+
+   int i;
+
+   /*config*/
+   prs->startsel=NULL;
+   prs->stopsel=NULL;
+   if ( opt ) {
+       Map *map,*mptr;
+       
+       parse_cfgdict(opt,&map);
+       mptr=map;
+
+       while(mptr && mptr->key) {
+           if ( strcasecmp(mptr->key,"MaxWords")==0 )
+               max_words=pg_atoi(mptr->value,4,1);
+           else if ( strcasecmp(mptr->key,"MinWords")==0 )
+               min_words=pg_atoi(mptr->value,4,1);
+           else if ( strcasecmp(mptr->key,"ShortWord")==0 )
+               shortword=pg_atoi(mptr->value,4,1);
+           else if ( strcasecmp(mptr->key,"StartSel")==0 )
+               prs->startsel=pstrdup(mptr->value);
+           else if ( strcasecmp(mptr->key,"StopSel")==0 )
+               prs->stopsel=pstrdup(mptr->value);
+               
+           pfree(mptr->key);
+           pfree(mptr->value);
+
+           mptr++;
+       }
+       pfree(map);
+
+       if ( min_words >= max_words )
+           elog(ERROR,"Must be MinWords < MaxWords");
+       if ( min_words<=0 )
+           elog(ERROR,"Must be MinWords > 0");
+       if ( shortword<0 )
+           elog(ERROR,"Must be ShortWord >= 0");
+   }
+
+   while( hlCover(prs,query,&p,&q) ) {
+       /* find cover len in words */
+       curlen=0;
+       poslen=0;
+       for(i=p;i<=q && curlen < max_words ; i++) {
+           if ( !NONWORDTOKEN(prs->words[i].type) ) 
+               curlen++;
+           if ( prs->words[i].item && !prs->words[i].repeated )
+               poslen++; 
+           pose=i;
+       }
+
+       if ( poslenwords[beste].type) || prs->words[beste].len <= shortword) ) { 
+           /* best already finded, so try one more cover */
+           p++;
+           continue;
+       }
+
+       if ( curlen < max_words ) { /* find good end */
+           for(i=i-1 ;icurwords && curlen
+               if ( i!=q ) {
+                   if ( !NONWORDTOKEN(prs->words[i].type) ) 
+                       curlen++;
+                   if ( prs->words[i].item && !prs->words[i].repeated )
+                       poslen++;
+               }
+               pose=i;
+               if ( NOENDTOKEN(prs->words[i].type) || prs->words[i].len <= shortword ) 
+                   continue;
+               if ( curlen>=min_words )    
+                   break;
+           }
+       } else { /* shorter cover :((( */
+           for(;curlen>min_words;i--) {
+               if ( !NONWORDTOKEN(prs->words[i].type) ) 
+                   curlen--;
+               if ( prs->words[i].item && !prs->words[i].repeated )
+                   poslen--;
+               pose=i;
+               if ( NOENDTOKEN(prs->words[i].type) || prs->words[i].len <= shortword ) 
+                   continue;
+               break;
+           }
+       }
+
+       if ( bestlen <0 || (poslen>bestlen && !(NOENDTOKEN(prs->words[pose].type) || prs->words[pose].len <= shortword)) || 
+               ( bestlen>=0 && !(NOENDTOKEN(prs->words[pose].type)  || prs->words[pose].len <= shortword) && 
+                   (NOENDTOKEN(prs->words[beste].type) || prs->words[beste].len <= shortword) ) ) {
+           bestb=p; beste=pose;
+           bestlen=poslen;
+       } 
+
+       p++;
+   }
+
+   if ( bestlen<0 ) {
+       curlen=0;
+       poslen=0;
+       for(i=0;icurwords && curlen
+           if ( !NONWORDTOKEN(prs->words[i].type) ) 
+               curlen++;
+           pose=i;
+       }
+       bestb=0; beste=pose;
+   }
+
+   for(i=bestb;i<=beste;i++) {
+       if ( prs->words[i].item )
+           prs->words[i].selected=1;
+       if ( prs->words[i].repeated )
+           prs->words[i].skip=1;
+       if ( HLIDIGNORE(prs->words[i].type) )
+           prs->words[i].replace=1;
+
+       prs->words[i].in=1;
+   }
+
+   if (!prs->startsel)
+       prs->startsel=pstrdup("");

+   if (!prs->stopsel)
+       prs->stopsel=pstrdup("");
+        prs->startsellen=strlen(prs->startsel);
+   prs->stopsellen=strlen(prs->stopsel);
+
+   PG_RETURN_POINTER(prs);
+}
+




This is the main PostgreSQL git repository.
RSS
Atom
+           if ( ptr->item == item ) {
+               if ( ptr->pos > *q ) {
+                   *q = ptr->pos;
+                   lastpos= ptr - doc;
+               } 
+               break;
+           } 
+           ptr++;
+       }
+
+       item++;
+   }
+
+   if (*q==0 )
+       return false;
+
+   if (*q==oldq) { /* already check this pos */
+       (*pos)++;
+       return Cover(doc, len, query, pos,p,q);
+   } 
+
+   item=GETQUERY(query);
+   for(i=0; isize; i++) {
+       if ( item->type != VAL ) {
+           item++;
+           continue;
+       }
+       ptr = doc + lastpos;
+
+       while(ptr>=doc+*pos) {
+           if ( ptr->item == item ) {
+               if ( ptr->pos < *p ) {
+                   *p = ptr->pos;
+                   f=ptr;
+               }
+               break;
+           }
+           ptr--;
+       }
+       item++;
+   }
+ 
+   if ( *p<=*q ) {
+       ChkDocR ch = { f, (doc + lastpos)-f+1 };
+       *pos = f-doc+1;
+       if ( TS_execute(GETQUERY(query), &ch, false, checkcondition_DR) ) { 
+ /*elog(NOTICE,"OP:%d NP:%d P:%d Q:%d", *pos, lastpos, *p, *q);*/ 
+           return true;
+       } else
+           return Cover(doc, len, query, pos,p,q); 
+   }
+ 
+   return false;
+}
+
+static DocRepresentation*
+get_docrep(tsvector     *txt, QUERYTYPE  *query, int *doclen) {
+   ITEM    *item=GETQUERY(query);
+   WordEntry *entry;
+   WordEntryPos    *post;
+   int4    dimt,j,i;
+   int len=query->size*4,cur=0;
+   DocRepresentation *doc;
+
+   *(uint16*)POSNULL = lengthof(POSNULL)-1;
+   doc = (DocRepresentation*)palloc(sizeof(DocRepresentation)*len);
+   for(i=0; isize; i++) {
+       if ( item[i].type != VAL )
+           continue;
+
+       entry=find_wordentry(txt,query,&(item[i]));
+       if ( !entry )
+           continue;
+
+       if ( entry->haspos ) {
+           dimt = POSDATALEN(txt,entry);
+           post = POSDATAPTR(txt,entry);
+       } else {
+           dimt = *(uint16*)POSNULL;
+           post = POSNULL+1;
+       }
+
+       while( cur+dimt >= len ) {
+           len*=2;
+           doc = (DocRepresentation*)repalloc(doc,sizeof(DocRepresentation)*len);
+       }
+
+       for(j=0;j
+           doc[cur].item=&(item[i]);
+           doc[cur].pos=post[j].pos;
+           cur++;
+       }
+   }
+
+   *doclen=cur;
+   
+   if ( cur>0 ) {
+       if ( cur>1 ) 
+           qsort((void *) doc, cur, sizeof(DocRepresentation), compareDocR);
+       return doc;
+   }
+   
+   pfree(doc);
+   return NULL;
+}
+
+
+Datum
+rank_cd(PG_FUNCTION_ARGS) {
+   int K = PG_GETARG_INT32(0);
+   tsvector       *txt = (tsvector *) PG_DETOAST_DATUM(PG_GETARG_DATUM(1));
+   QUERYTYPE  *query = (QUERYTYPE *) PG_DETOAST_DATUM(PG_GETARG_DATUM(2));
+   int method=DEF_NORM_METHOD;
+   DocRepresentation   *doc;
+   float   res=0.0;
+   int p=0,q=0,len,cur;
+
+   doc = get_docrep(txt, query, &len);
+   if ( !doc ) {
+       PG_FREE_IF_COPY(txt, 1);
+       PG_FREE_IF_COPY(query, 2);
+       PG_RETURN_FLOAT4(0.0);
+   }
+
+   cur=0;
+   if (K<=0)
+       K=4;    
+   while( Cover(doc, len, query, &cur, &p, &q) ) 
+       res += ( q-p+1 > K ) ? ((float)K)/((float)(q-p+1)) : 1.0;
+
+   if ( PG_NARGS() == 4 )
+       method=PG_GETARG_INT32(3);
+
+        switch(method) {
+       case 0: break;
+       case 1: res /= log((float)cnt_length(txt)); break;
+       case 2: res /= (float)cnt_length(txt); break;
+       default:
+       elog(ERROR,"Unknown normalization method: %d",method);
+        }
+
+   pfree(doc);
+   PG_FREE_IF_COPY(txt, 1);
+   PG_FREE_IF_COPY(query, 2);
+
+   PG_RETURN_FLOAT4(res);
+}
+
+
+Datum
+rank_cd_def(PG_FUNCTION_ARGS) {
+   PG_RETURN_DATUM( DirectFunctionCall4(   
+       rank_cd,
+       Int32GetDatum(-1),
+       PG_GETARG_DATUM(0),
+       PG_GETARG_DATUM(1),
+       ( PG_NARGS() == 3 ) ? PG_GETARG_DATUM(2) : Int32GetDatum(DEF_NORM_METHOD)
+   )); 
+}
+
+/**************debug*************/
+
+typedef struct {
+   char    *w;
+   int2    len;
+   int2    pos;
+   int2    start;
+   int2    finish;
+} DocWord;
+
+static int
+compareDocWord(const void *a, const void *b) {
+   if ( ((DocWord *) a)->pos == ((DocWord *) b)->pos )
+       return 1;
+   return ( ((DocWord *) a)->pos > ((DocWord *) b)->pos ) ? 1 : -1;
+}
+
+
+Datum 
+get_covers(PG_FUNCTION_ARGS) {
+   tsvector     *txt = (tsvector *) PG_DETOAST_DATUM(PG_GETARG_DATUM(0));
+   QUERYTYPE  *query = (QUERYTYPE *) PG_DETOAST_DATUM(PG_GETARG_DATUM(1));
+   WordEntry       *pptr=ARRPTR(txt);
+   int i,dlen=0,j,cur=0,len=0,rlen;
+   DocWord *dw,*dwptr;
+   text    *out;
+   char *cptr;
+   DocRepresentation *doc;
+   int pos=0,p,q,olddwpos=0;
+   int ncover=1;
+
+   doc = get_docrep(txt, query, &rlen);
+
+   if ( !doc ) {
+       out=palloc(VARHDRSZ);
+       VARATT_SIZEP(out) = VARHDRSZ;
+       PG_FREE_IF_COPY(txt,0);
+       PG_FREE_IF_COPY(query,1);
+       PG_RETURN_POINTER(out);
+   }
+
+   for(i=0;isize;i++) {
+       if (!pptr[i].haspos)
+           elog(ERROR,"No pos info");
+        dlen += POSDATALEN(txt,&(pptr[i]));
+   }
+
+   dwptr=dw=palloc(sizeof(DocWord)*dlen);
+   memset(dw,0,sizeof(DocWord)*dlen);
+
+   for(i=0;isize;i++) {
+       WordEntryPos    *posdata = POSDATAPTR(txt,&(pptr[i]));
+       for(j=0;j
+           dw[cur].w=STRPTR(txt)+pptr[i].pos;  
+           dw[cur].len=pptr[i].len;    
+           dw[cur].pos=posdata[j].pos;
+           cur++;
+       }
+       len+=(pptr[i].len + 1) * (int)POSDATALEN(txt,&(pptr[i]));
+   }
+   qsort((void *) dw, dlen, sizeof(DocWord), compareDocWord);
+
+   while( Cover(doc, rlen, query, &pos, &p, &q) ) {
+       dwptr=dw+olddwpos;
+       while(dwptr->pos < p && dwptr-dw
+           dwptr++;
+       olddwpos=dwptr-dw;
+       dwptr->start=ncover;
+       while(dwptr->pos < q+1 && dwptr-dw
+           dwptr++;
+       (dwptr-1)->finish=ncover;
+       len+= 4 /* {}+two spaces */ + 2*16 /*numbers*/;
+       ncover++; 
+   } 
+   
+   out=palloc(VARHDRSZ+len);
+   cptr=((char*)out)+VARHDRSZ;
+   dwptr=dw;
+
+   while( dwptr-dw < dlen) {
+       if ( dwptr->start ) {
+           sprintf(cptr,"{%d ",dwptr->start);
+           cptr=strchr(cptr,'\0');
+       }
+       memcpy(cptr,dwptr->w,dwptr->len);
+       cptr+=dwptr->len;
+       *cptr=' ';
+       cptr++;
+       if ( dwptr->finish ) { 
+           sprintf(cptr,"}%d ",dwptr->finish);
+           cptr=strchr(cptr,'\0');
+       }
+       dwptr++;
+   }   
+
+   VARATT_SIZEP(out) = cptr - ((char*)out);
+   
+   pfree(dw);
+   pfree(doc);
+
+   PG_FREE_IF_COPY(txt,0);
+   PG_FREE_IF_COPY(query,1);
+   PG_RETURN_POINTER(out);
+}
+


diff --git a/contrib/tsearch2/rewrite.c b/contrib/tsearch2/rewrite.c

new file mode 100644 (file)

index 0000000..d5bc0f6


--- /dev/null
+++ b/contrib/tsearch2/rewrite.c
@@ -0,0 +1,292 @@
+/*
+ * Rewrite routines of query tree
+ * Teodor Sigaev 
+ */
+
+#include "postgres.h"
+
+#include 
+
+#include "access/gist.h"
+#include "access/itup.h"
+#include "access/rtree.h"
+#include "utils/elog.h"
+#include "utils/palloc.h"
+#include "utils/array.h"
+#include "utils/builtins.h"
+#include "storage/bufpage.h"
+
+#include "query.h"
+#include "rewrite.h"
+
+typedef struct NODE
+{
+   struct NODE *left;
+   struct NODE *right;
+   ITEM       *valnode;
+}  NODE;
+
+/*
+ * make query tree from plain view of query
+ */
+static NODE *
+maketree(ITEM * in)
+{
+   NODE       *node = (NODE *) palloc(sizeof(NODE));
+
+   node->valnode = in;
+   node->right = node->left = NULL;
+   if (in->type == OPR)
+   {
+       node->right = maketree(in + 1);
+       if (in->val != (int4) '!')
+           node->left = maketree(in + in->left);
+   }
+   return node;
+}
+
+typedef struct
+{
+   ITEM       *ptr;
+   int4        len;
+   int4        cur;
+}  PLAINTREE;
+
+static void
+plainnode(PLAINTREE * state, NODE * node)
+{
+   if (state->cur == state->len)
+   {
+       state->len *= 2;
+       state->ptr = (ITEM *) repalloc((void *) state->ptr, state->len * sizeof(ITEM));
+   }
+   memcpy((void *) &(state->ptr[state->cur]), (void *) node->valnode, sizeof(ITEM));
+   if (node->valnode->type == VAL)
+       state->cur++;
+   else if (node->valnode->val == (int4) '!')
+   {
+       state->ptr[state->cur].left = 1;
+       state->cur++;
+       plainnode(state, node->right);
+   }
+   else
+   {
+       int4        cur = state->cur;
+
+       state->cur++;
+       plainnode(state, node->right);
+       state->ptr[cur].left = state->cur - cur;
+       plainnode(state, node->left);
+   }
+   pfree(node);
+}
+
+/*
+ * make plain view of tree from 'normal' view of tree
+ */
+static ITEM *
+plaintree(NODE * root, int4 *len)
+{
+   PLAINTREE   pl;
+
+   pl.cur = 0;
+   pl.len = 16;
+   if (root && (root->valnode->type == VAL || root->valnode->type == OPR))
+   {
+       pl.ptr = (ITEM *) palloc(pl.len * sizeof(ITEM));
+       plainnode(&pl, root);
+   }
+   else
+       pl.ptr = NULL;
+   *len = pl.cur;
+   return pl.ptr;
+}
+
+static void
+freetree(NODE * node)
+{
+   if (!node)
+       return;
+   if (node->left)
+       freetree(node->left);
+   if (node->right)
+       freetree(node->right);
+   pfree(node);
+}
+
+/*
+ * clean tree for ! operator.
+ * It's usefull for debug, but in
+ * other case, such view is used with search in index.
+ * Operator ! always return TRUE
+ */
+static NODE *
+clean_NOT_intree(NODE * node)
+{
+   if (node->valnode->type == VAL)
+       return node;
+
+   if (node->valnode->val == (int4) '!')
+   {
+       freetree(node);
+       return NULL;
+   }
+
+   /* operator & or | */
+   if (node->valnode->val == (int4) '|')
+   {
+       if ((node->left = clean_NOT_intree(node->left)) == NULL ||
+           (node->right = clean_NOT_intree(node->right)) == NULL)
+       {
+           freetree(node);
+           return NULL;
+       }
+   }
+   else
+   {
+       NODE       *res = node;
+
+       node->left = clean_NOT_intree(node->left);
+       node->right = clean_NOT_intree(node->right);
+       if (node->left == NULL && node->right == NULL)
+       {
+           pfree(node);
+           res = NULL;
+       }
+       else if (node->left == NULL)
+       {
+           res = node->right;
+           pfree(node);
+       }
+       else if (node->right == NULL)
+       {
+           res = node->left;
+           pfree(node);
+       }
+       return res;
+   }
+   return node;
+}
+
+ITEM *
+clean_NOT_v2(ITEM * ptr, int4 *len)
+{
+   NODE       *root = maketree(ptr);
+
+   return plaintree(clean_NOT_intree(root), len);
+}
+
+#define V_UNKNOWN  0
+#define V_TRUE     1
+#define V_FALSE        2
+
+/*
+ * Clean query tree from values which is always in
+ * text (stopword)
+ */
+static NODE *
+clean_fakeval_intree(NODE * node, char *result)
+{
+   char        lresult = V_UNKNOWN,
+               rresult = V_UNKNOWN;
+
+   if (node->valnode->type == VAL)
+       return node;
+   else if (node->valnode->type == VALTRUE)
+   {
+       pfree(node);
+       *result = V_TRUE;
+       return NULL;
+   }
+
+
+   if (node->valnode->val == (int4) '!')
+   {
+       node->right = clean_fakeval_intree(node->right, &rresult);
+       if (!node->right)
+       {
+           *result = (rresult == V_TRUE) ? V_FALSE : V_TRUE;
+           freetree(node);
+           return NULL;
+       }
+   }
+   else if (node->valnode->val == (int4) '|')
+   {
+       NODE       *res = node;
+
+       node->left = clean_fakeval_intree(node->left, &lresult);
+       node->right = clean_fakeval_intree(node->right, &rresult);
+       if (lresult == V_TRUE || rresult == V_TRUE)
+       {
+           freetree(node);
+           *result = V_TRUE;
+           return NULL;
+       }
+       else if (lresult == V_FALSE && rresult == V_FALSE)
+       {
+           freetree(node);
+           *result = V_FALSE;
+           return NULL;
+       }
+       else if (lresult == V_FALSE)
+       {
+           res = node->right;
+           pfree(node);
+       }
+       else if (rresult == V_FALSE)
+       {
+           res = node->left;
+           pfree(node);
+       }
+       return res;
+   }
+   else
+   {
+       NODE       *res = node;
+
+       node->left = clean_fakeval_intree(node->left, &lresult);
+       node->right = clean_fakeval_intree(node->right, &rresult);
+       if (lresult == V_FALSE || rresult == V_FALSE)
+       {
+           freetree(node);
+           *result = V_FALSE;
+           return NULL;
+       }
+       else if (lresult == V_TRUE && rresult == V_TRUE)
+       {
+           freetree(node);
+           *result = V_TRUE;
+           return NULL;
+       }
+       else if (lresult == V_TRUE)
+       {
+           res = node->right;
+           pfree(node);
+       }
+       else if (rresult == V_TRUE)
+       {
+           res = node->left;
+           pfree(node);
+       }
+       return res;
+   }
+   return node;
+}
+
+ITEM *
+clean_fakeval_v2(ITEM * ptr, int4 *len)
+{
+   NODE       *root = maketree(ptr);
+   char        result = V_UNKNOWN;
+   NODE       *resroot;
+
+   resroot = clean_fakeval_intree(root, &result);
+   if (result != V_UNKNOWN)
+   {
+       elog(NOTICE, "Query contains only stopword(s) or doesn't contain lexem(s), ignored");
+       *len = 0;
+       return NULL;
+   }
+
+   return plaintree(resroot, len);
+}


diff --git a/contrib/tsearch2/rewrite.h b/contrib/tsearch2/rewrite.h

new file mode 100644 (file)

index 0000000..d47788a


--- /dev/null
+++ b/contrib/tsearch2/rewrite.h
@@ -0,0 +1,7 @@
+#ifndef __REWRITE_H__
+#define __REWRITE_H__
+
+ITEM      *clean_NOT_v2(ITEM * ptr, int4 *len);
+ITEM      *clean_fakeval_v2(ITEM * ptr, int4 *len);
+
+#endif


diff --git a/contrib/tsearch2/snmap.c b/contrib/tsearch2/snmap.c

new file mode 100644 (file)

index 0000000..fe138ad


--- /dev/null
+++ b/contrib/tsearch2/snmap.c
@@ -0,0 +1,75 @@
+/* 
+ * simple but fast map from str to Oid
+ * Teodor Sigaev 
+ */
+#include 
+#include 
+#include 
+
+#include "postgres.h"
+#include "snmap.h"
+#include "common.h"
+
+static int
+compareSNMapEntry(const void *a, const void *b) {
+   return strcmp( ((SNMapEntry*)a)->key, ((SNMapEntry*)b)->key );
+}
+
+void 
+addSNMap( SNMap *map, char *key, Oid value ) {
+   if (map->len>=map->reallen) {
+       SNMapEntry *tmp;
+       int len = (map->reallen) ? 2*map->reallen : 16;
+       tmp=(SNMapEntry*)realloc(map->list, sizeof(SNMapEntry) * len);
+       if ( !tmp )
+           elog(ERROR, "No memory");
+       map->reallen=len;
+       map->list=tmp;
+   }
+   map->list[ map->len ].key = strdup(key);
+   if ( ! map->list[ map->len ].key )
+       elog(ERROR, "No memory");
+   map->list[ map->len ].value=value;
+   map->len++;
+   if ( map->len>1 ) qsort(map->list, map->len, sizeof(SNMapEntry), compareSNMapEntry);
+}
+
+void 
+addSNMap_t( SNMap *map, text *key, Oid value ) {
+   char *k=text2char( key );
+   addSNMap(map, k, value);
+   pfree(k);
+}
+
+Oid 
+findSNMap( SNMap *map, char *key ) {
+   SNMapEntry *ptr;
+   SNMapEntry ks = {key, 0};
+   if ( map->len==0 || !map->list )
+       return 0;   
+   ptr = (SNMapEntry*) bsearch(&ks, map->list, map->len, sizeof(SNMapEntry), compareSNMapEntry);
+   return (ptr) ? ptr->value : 0;
+}
+
+Oid  
+findSNMap_t( SNMap *map, text *key ) {
+   char *k=text2char(key);
+   int res;
+   res= findSNMap(map, k);
+   pfree(k);
+   return res;
+}
+
+void freeSNMap( SNMap *map ) {
+   SNMapEntry *entry=map->list;
+   if ( map->list ) {
+       while( map->len ) {
+           if ( entry->key ) free(entry->key);
+           entry++; map->len--;
+       }
+       free( map->list );
+   }
+   memset(map,0,sizeof(SNMap));
+}
+
+


diff --git a/contrib/tsearch2/snmap.h b/contrib/tsearch2/snmap.h

new file mode 100644 (file)

index 0000000..b485601


--- /dev/null
+++ b/contrib/tsearch2/snmap.h
@@ -0,0 +1,23 @@
+#ifndef __SNMAP_H__
+#define __SNMAP_H__
+
+#include "postgres.h"
+
+typedef struct {
+   char    *key;
+   Oid value;
+} SNMapEntry;
+
+typedef struct {
+   int len;
+   int reallen;
+   SNMapEntry  *list;
+} SNMap;
+
+void addSNMap( SNMap *map, char *key, Oid value );
+void addSNMap_t( SNMap *map, text *key, Oid value );
+Oid findSNMap( SNMap *map, char *key );
+Oid findSNMap_t( SNMap *map, text *key );
+void freeSNMap( SNMap *map );
+
+#endif


diff --git a/contrib/tsearch2/snowball/api.c b/contrib/tsearch2/snowball/api.c

new file mode 100644 (file)

index 0000000..c9019ce


--- /dev/null
+++ b/contrib/tsearch2/snowball/api.c
@@ -0,0 +1,48 @@
+
+#include "header.h"
+
+extern struct SN_env * SN_create_env(int S_size, int I_size, int B_size)
+{   struct SN_env * z = (struct SN_env *) calloc(1, sizeof(struct SN_env));
+    z->p = create_s();
+    if (S_size)
+    {   z->S = (symbol * *) calloc(S_size, sizeof(symbol *));
+        {   int i;
+            for (i = 0; i < S_size; i++) z->S[i] = create_s();
+        }
+        z->S_size = S_size;
+    }
+
+    if (I_size)
+    {   z->I = (int *) calloc(I_size, sizeof(int));
+        z->I_size = I_size;
+    }
+
+    if (B_size)
+    {   z->B = (symbol *) calloc(B_size, sizeof(symbol));
+        z->B_size = B_size;
+    }
+
+    return z;
+}
+
+extern void SN_close_env(struct SN_env * z)
+{
+    if (z->S_size)
+    {
+        {   int i;
+            for (i = 0; i < z->S_size; i++) lose_s(z->S[i]);
+        }
+        free(z->S);
+    }
+    if (z->I_size) free(z->I);
+    if (z->B_size) free(z->B);
+    if (z->p) lose_s(z->p);
+    free(z);
+}
+
+extern void SN_set_current(struct SN_env * z, int size, const symbol * s)
+{
+    replace_s(z, 0, z->l, size, s);
+    z->c = 0;
+}
+


diff --git a/contrib/tsearch2/snowball/api.h b/contrib/tsearch2/snowball/api.h

new file mode 100644 (file)

index 0000000..3e8b6e1


--- /dev/null
+++ b/contrib/tsearch2/snowball/api.h
@@ -0,0 +1,27 @@
+
+typedef unsigned char symbol;
+
+/* Or replace 'char' above with 'short' for 16 bit characters.
+
+   More precisely, replace 'char' with whatever type guarantees the
+   character width you need. Note however that sizeof(symbol) should divide
+   HEAD, defined in header.h as 2*sizeof(int), without remainder, otherwise
+   there is an alignment problem. In the unlikely event of a problem here,
+   consult Martin Porter.
+
+*/
+
+struct SN_env {
+    symbol * p;
+    int c; int a; int l; int lb; int bra; int ket;
+    int S_size; int I_size; int B_size;
+    symbol * * S;
+    int * I;
+    symbol * B;
+};
+
+extern struct SN_env * SN_create_env(int S_size, int I_size, int B_size);
+extern void SN_close_env(struct SN_env * z);
+
+extern void SN_set_current(struct SN_env * z, int size, const symbol * s);
+


diff --git a/contrib/tsearch2/snowball/english_stem.c b/contrib/tsearch2/snowball/english_stem.c

new file mode 100644 (file)

index 0000000..6715c7c


--- /dev/null
+++ b/contrib/tsearch2/snowball/english_stem.c
@@ -0,0 +1,894 @@
+
+/* This file was generated automatically by the Snowball to ANSI C compiler */
+
+#include "header.h"
+
+extern int english_stem(struct SN_env * z);
+static int r_exception2(struct SN_env * z);
+static int r_exception1(struct SN_env * z);
+static int r_Step_5(struct SN_env * z);
+static int r_Step_4(struct SN_env * z);
+static int r_Step_3(struct SN_env * z);
+static int r_Step_2(struct SN_env * z);
+static int r_Step_1c(struct SN_env * z);
+static int r_Step_1b(struct SN_env * z);
+static int r_Step_1a(struct SN_env * z);
+static int r_R2(struct SN_env * z);
+static int r_R1(struct SN_env * z);
+static int r_shortv(struct SN_env * z);
+static int r_mark_regions(struct SN_env * z);
+static int r_postlude(struct SN_env * z);
+static int r_prelude(struct SN_env * z);
+
+extern struct SN_env * english_create_env(void);
+extern void english_close_env(struct SN_env * z);
+
+static symbol s_0_0[5] = { 'g', 'e', 'n', 'e', 'r' };
+
+static struct among a_0[1] =
+{
+/*  0 */ { 5, s_0_0, -1, -1, 0}
+};
+
+static symbol s_1_0[3] = { 'i', 'e', 'd' };
+static symbol s_1_1[1] = { 's' };
+static symbol s_1_2[3] = { 'i', 'e', 's' };
+static symbol s_1_3[4] = { 's', 's', 'e', 's' };
+static symbol s_1_4[2] = { 's', 's' };
+static symbol s_1_5[2] = { 'u', 's' };
+
+static struct among a_1[6] =
+{
+/*  0 */ { 3, s_1_0, -1, 2, 0},
+/*  1 */ { 1, s_1_1, -1, 3, 0},
+/*  2 */ { 3, s_1_2, 1, 2, 0},
+/*  3 */ { 4, s_1_3, 1, 1, 0},
+/*  4 */ { 2, s_1_4, 1, -1, 0},
+/*  5 */ { 2, s_1_5, 1, -1, 0}
+};
+
+static symbol s_2_1[2] = { 'b', 'b' };
+static symbol s_2_2[2] = { 'd', 'd' };
+static symbol s_2_3[2] = { 'f', 'f' };
+static symbol s_2_4[2] = { 'g', 'g' };
+static symbol s_2_5[2] = { 'b', 'l' };
+static symbol s_2_6[2] = { 'm', 'm' };
+static symbol s_2_7[2] = { 'n', 'n' };
+static symbol s_2_8[2] = { 'p', 'p' };
+static symbol s_2_9[2] = { 'r', 'r' };
+static symbol s_2_10[2] = { 'a', 't' };
+static symbol s_2_11[2] = { 't', 't' };
+static symbol s_2_12[2] = { 'i', 'z' };
+
+static struct among a_2[13] =
+{
+/*  0 */ { 0, 0, -1, 3, 0},
+/*  1 */ { 2, s_2_1, 0, 2, 0},
+/*  2 */ { 2, s_2_2, 0, 2, 0},
+/*  3 */ { 2, s_2_3, 0, 2, 0},
+/*  4 */ { 2, s_2_4, 0, 2, 0},
+/*  5 */ { 2, s_2_5, 0, 1, 0},
+/*  6 */ { 2, s_2_6, 0, 2, 0},
+/*  7 */ { 2, s_2_7, 0, 2, 0},
+/*  8 */ { 2, s_2_8, 0, 2, 0},
+/*  9 */ { 2, s_2_9, 0, 2, 0},
+/* 10 */ { 2, s_2_10, 0, 1, 0},
+/* 11 */ { 2, s_2_11, 0, 2, 0},
+/* 12 */ { 2, s_2_12, 0, 1, 0}
+};
+
+static symbol s_3_0[2] = { 'e', 'd' };
+static symbol s_3_1[3] = { 'e', 'e', 'd' };
+static symbol s_3_2[3] = { 'i', 'n', 'g' };
+static symbol s_3_3[4] = { 'e', 'd', 'l', 'y' };
+static symbol s_3_4[5] = { 'e', 'e', 'd', 'l', 'y' };
+static symbol s_3_5[5] = { 'i', 'n', 'g', 'l', 'y' };
+
+static struct among a_3[6] =
+{
+/*  0 */ { 2, s_3_0, -1, 2, 0},
+/*  1 */ { 3, s_3_1, 0, 1, 0},
+/*  2 */ { 3, s_3_2, -1, 2, 0},
+/*  3 */ { 4, s_3_3, -1, 2, 0},
+/*  4 */ { 5, s_3_4, 3, 1, 0},
+/*  5 */ { 5, s_3_5, -1, 2, 0}
+};
+
+static symbol s_4_0[4] = { 'a', 'n', 'c', 'i' };
+static symbol s_4_1[4] = { 'e', 'n', 'c', 'i' };
+static symbol s_4_2[3] = { 'o', 'g', 'i' };
+static symbol s_4_3[2] = { 'l', 'i' };
+static symbol s_4_4[3] = { 'b', 'l', 'i' };
+static symbol s_4_5[4] = { 'a', 'b', 'l', 'i' };
+static symbol s_4_6[4] = { 'a', 'l', 'l', 'i' };
+static symbol s_4_7[5] = { 'f', 'u', 'l', 'l', 'i' };
+static symbol s_4_8[6] = { 'l', 'e', 's', 's', 'l', 'i' };
+static symbol s_4_9[5] = { 'o', 'u', 's', 'l', 'i' };
+static symbol s_4_10[5] = { 'e', 'n', 't', 'l', 'i' };
+static symbol s_4_11[5] = { 'a', 'l', 'i', 't', 'i' };
+static symbol s_4_12[6] = { 'b', 'i', 'l', 'i', 't', 'i' };
+static symbol s_4_13[5] = { 'i', 'v', 'i', 't', 'i' };
+static symbol s_4_14[6] = { 't', 'i', 'o', 'n', 'a', 'l' };
+static symbol s_4_15[7] = { 'a', 't', 'i', 'o', 'n', 'a', 'l' };
+static symbol s_4_16[5] = { 'a', 'l', 'i', 's', 'm' };
+static symbol s_4_17[5] = { 'a', 't', 'i', 'o', 'n' };
+static symbol s_4_18[7] = { 'i', 'z', 'a', 't', 'i', 'o', 'n' };
+static symbol s_4_19[4] = { 'i', 'z', 'e', 'r' };
+static symbol s_4_20[4] = { 'a', 't', 'o', 'r' };
+static symbol s_4_21[7] = { 'i', 'v', 'e', 'n', 'e', 's', 's' };
+static symbol s_4_22[7] = { 'f', 'u', 'l', 'n', 'e', 's', 's' };
+static symbol s_4_23[7] = { 'o', 'u', 's', 'n', 'e', 's', 's' };
+
+static struct among a_4[24] =
+{
+/*  0 */ { 4, s_4_0, -1, 3, 0},
+/*  1 */ { 4, s_4_1, -1, 2, 0},
+/*  2 */ { 3, s_4_2, -1, 13, 0},
+/*  3 */ { 2, s_4_3, -1, 16, 0},
+/*  4 */ { 3, s_4_4, 3, 12, 0},
+/*  5 */ { 4, s_4_5, 4, 4, 0},
+/*  6 */ { 4, s_4_6, 3, 8, 0},
+/*  7 */ { 5, s_4_7, 3, 14, 0},
+/*  8 */ { 6, s_4_8, 3, 15, 0},
+/*  9 */ { 5, s_4_9, 3, 10, 0},
+/* 10 */ { 5, s_4_10, 3, 5, 0},
+/* 11 */ { 5, s_4_11, -1, 8, 0},
+/* 12 */ { 6, s_4_12, -1, 12, 0},
+/* 13 */ { 5, s_4_13, -1, 11, 0},
+/* 14 */ { 6, s_4_14, -1, 1, 0},
+/* 15 */ { 7, s_4_15, 14, 7, 0},
+/* 16 */ { 5, s_4_16, -1, 8, 0},
+/* 17 */ { 5, s_4_17, -1, 7, 0},
+/* 18 */ { 7, s_4_18, 17, 6, 0},
+/* 19 */ { 4, s_4_19, -1, 6, 0},
+/* 20 */ { 4, s_4_20, -1, 7, 0},
+/* 21 */ { 7, s_4_21, -1, 11, 0},
+/* 22 */ { 7, s_4_22, -1, 9, 0},
+/* 23 */ { 7, s_4_23, -1, 10, 0}
+};
+
+static symbol s_5_0[5] = { 'i', 'c', 'a', 't', 'e' };
+static symbol s_5_1[5] = { 'a', 't', 'i', 'v', 'e' };
+static symbol s_5_2[5] = { 'a', 'l', 'i', 'z', 'e' };
+static symbol s_5_3[5] = { 'i', 'c', 'i', 't', 'i' };
+static symbol s_5_4[4] = { 'i', 'c', 'a', 'l' };
+static symbol s_5_5[6] = { 't', 'i', 'o', 'n', 'a', 'l' };
+static symbol s_5_6[7] = { 'a', 't', 'i', 'o', 'n', 'a', 'l' };
+static symbol s_5_7[3] = { 'f', 'u', 'l' };
+static symbol s_5_8[4] = { 'n', 'e', 's', 's' };
+
+static struct among a_5[9] =
+{
+/*  0 */ { 5, s_5_0, -1, 4, 0},
+/*  1 */ { 5, s_5_1, -1, 6, 0},
+/*  2 */ { 5, s_5_2, -1, 3, 0},
+/*  3 */ { 5, s_5_3, -1, 4, 0},
+/*  4 */ { 4, s_5_4, -1, 4, 0},
+/*  5 */ { 6, s_5_5, -1, 1, 0},
+/*  6 */ { 7, s_5_6, 5, 2, 0},
+/*  7 */ { 3, s_5_7, -1, 5, 0},
+/*  8 */ { 4, s_5_8, -1, 5, 0}
+};
+
+static symbol s_6_0[2] = { 'i', 'c' };
+static symbol s_6_1[4] = { 'a', 'n', 'c', 'e' };
+static symbol s_6_2[4] = { 'e', 'n', 'c', 'e' };
+static symbol s_6_3[4] = { 'a', 'b', 'l', 'e' };
+static symbol s_6_4[4] = { 'i', 'b', 'l', 'e' };
+static symbol s_6_5[3] = { 'a', 't', 'e' };
+static symbol s_6_6[3] = { 'i', 'v', 'e' };
+static symbol s_6_7[3] = { 'i', 'z', 'e' };
+static symbol s_6_8[3] = { 'i', 't', 'i' };
+static symbol s_6_9[2] = { 'a', 'l' };
+static symbol s_6_10[3] = { 'i', 's', 'm' };
+static symbol s_6_11[3] = { 'i', 'o', 'n' };
+static symbol s_6_12[2] = { 'e', 'r' };
+static symbol s_6_13[3] = { 'o', 'u', 's' };
+static symbol s_6_14[3] = { 'a', 'n', 't' };
+static symbol s_6_15[3] = { 'e', 'n', 't' };
+static symbol s_6_16[4] = { 'm', 'e', 'n', 't' };
+static symbol s_6_17[5] = { 'e', 'm', 'e', 'n', 't' };
+
+static struct among a_6[18] =
+{
+/*  0 */ { 2, s_6_0, -1, 1, 0},
+/*  1 */ { 4, s_6_1, -1, 1, 0},
+/*  2 */ { 4, s_6_2, -1, 1, 0},
+/*  3 */ { 4, s_6_3, -1, 1, 0},
+/*  4 */ { 4, s_6_4, -1, 1, 0},
+/*  5 */ { 3, s_6_5, -1, 1, 0},
+/*  6 */ { 3, s_6_6, -1, 1, 0},
+/*  7 */ { 3, s_6_7, -1, 1, 0},
+/*  8 */ { 3, s_6_8, -1, 1, 0},
+/*  9 */ { 2, s_6_9, -1, 1, 0},
+/* 10 */ { 3, s_6_10, -1, 1, 0},
+/* 11 */ { 3, s_6_11, -1, 2, 0},
+/* 12 */ { 2, s_6_12, -1, 1, 0},
+/* 13 */ { 3, s_6_13, -1, 1, 0},
+/* 14 */ { 3, s_6_14, -1, 1, 0},
+/* 15 */ { 3, s_6_15, -1, 1, 0},
+/* 16 */ { 4, s_6_16, 15, 1, 0},
+/* 17 */ { 5, s_6_17, 16, 1, 0}
+};
+
+static symbol s_7_0[1] = { 'e' };
+static symbol s_7_1[1] = { 'l' };
+
+static struct among a_7[2] =
+{
+/*  0 */ { 1, s_7_0, -1, 1, 0},
+/*  1 */ { 1, s_7_1, -1, 2, 0}
+};
+
+static symbol s_8_0[7] = { 's', 'u', 'c', 'c', 'e', 'e', 'd' };
+static symbol s_8_1[7] = { 'p', 'r', 'o', 'c', 'e', 'e', 'd' };
+static symbol s_8_2[6] = { 'e', 'x', 'c', 'e', 'e', 'd' };
+static symbol s_8_3[7] = { 'c', 'a', 'n', 'n', 'i', 'n', 'g' };
+static symbol s_8_4[6] = { 'i', 'n', 'n', 'i', 'n', 'g' };
+static symbol s_8_5[7] = { 'e', 'a', 'r', 'r', 'i', 'n', 'g' };
+static symbol s_8_6[7] = { 'h', 'e', 'r', 'r', 'i', 'n', 'g' };
+static symbol s_8_7[6] = { 'o', 'u', 't', 'i', 'n', 'g' };
+
+static struct among a_8[8] =
+{
+/*  0 */ { 7, s_8_0, -1, -1, 0},
+/*  1 */ { 7, s_8_1, -1, -1, 0},
+/*  2 */ { 6, s_8_2, -1, -1, 0},
+/*  3 */ { 7, s_8_3, -1, -1, 0},
+/*  4 */ { 6, s_8_4, -1, -1, 0},
+/*  5 */ { 7, s_8_5, -1, -1, 0},
+/*  6 */ { 7, s_8_6, -1, -1, 0},
+/*  7 */ { 6, s_8_7, -1, -1, 0}
+};
+
+static symbol s_9_0[5] = { 'a', 'n', 'd', 'e', 's' };
+static symbol s_9_1[5] = { 'a', 't', 'l', 'a', 's' };
+static symbol s_9_2[4] = { 'b', 'i', 'a', 's' };
+static symbol s_9_3[6] = { 'c', 'o', 's', 'm', 'o', 's' };
+static symbol s_9_4[5] = { 'd', 'y', 'i', 'n', 'g' };
+static symbol s_9_5[5] = { 'e', 'a', 'r', 'l', 'y' };
+static symbol s_9_6[6] = { 'g', 'e', 'n', 't', 'l', 'y' };
+static symbol s_9_7[4] = { 'h', 'o', 'w', 'e' };
+static symbol s_9_8[4] = { 'i', 'd', 'l', 'y' };
+static symbol s_9_9[5] = { 'l', 'y', 'i', 'n', 'g' };
+static symbol s_9_10[4] = { 'n', 'e', 'w', 's' };
+static symbol s_9_11[4] = { 'o', 'n', 'l', 'y' };
+static symbol s_9_12[6] = { 's', 'i', 'n', 'g', 'l', 'y' };
+static symbol s_9_13[5] = { 's', 'k', 'i', 'e', 's' };
+static symbol s_9_14[4] = { 's', 'k', 'i', 's' };
+static symbol s_9_15[3] = { 's', 'k', 'y' };
+static symbol s_9_16[5] = { 't', 'y', 'i', 'n', 'g' };
+static symbol s_9_17[4] = { 'u', 'g', 'l', 'y' };
+
+static struct among a_9[18] =
+{
+/*  0 */ { 5, s_9_0, -1, -1, 0},
+/*  1 */ { 5, s_9_1, -1, -1, 0},
+/*  2 */ { 4, s_9_2, -1, -1, 0},
+/*  3 */ { 6, s_9_3, -1, -1, 0},
+/*  4 */ { 5, s_9_4, -1, 3, 0},
+/*  5 */ { 5, s_9_5, -1, 9, 0},
+/*  6 */ { 6, s_9_6, -1, 7, 0},
+/*  7 */ { 4, s_9_7, -1, -1, 0},
+/*  8 */ { 4, s_9_8, -1, 6, 0},
+/*  9 */ { 5, s_9_9, -1, 4, 0},
+/* 10 */ { 4, s_9_10, -1, -1, 0},
+/* 11 */ { 4, s_9_11, -1, 10, 0},
+/* 12 */ { 6, s_9_12, -1, 11, 0},
+/* 13 */ { 5, s_9_13, -1, 2, 0},
+/* 14 */ { 4, s_9_14, -1, 1, 0},
+/* 15 */ { 3, s_9_15, -1, -1, 0},
+/* 16 */ { 5, s_9_16, -1, 5, 0},
+/* 17 */ { 4, s_9_17, -1, 8, 0}
+};
+
+static unsigned char g_v[] = { 17, 65, 16, 1 };
+
+static unsigned char g_v_WXY[] = { 1, 17, 65, 208, 1 };
+
+static unsigned char g_valid_LI[] = { 55, 141, 2 };
+
+static symbol s_0[] = { 'y' };
+static symbol s_1[] = { 'Y' };
+static symbol s_2[] = { 'y' };
+static symbol s_3[] = { 'Y' };
+static symbol s_4[] = { 's', 's' };
+static symbol s_5[] = { 'i', 'e' };
+static symbol s_6[] = { 'i' };
+static symbol s_7[] = { 'e', 'e' };
+static symbol s_8[] = { 'e' };
+static symbol s_9[] = { 'e' };
+static symbol s_10[] = { 'y' };
+static symbol s_11[] = { 'Y' };
+static symbol s_12[] = { 'i' };
+static symbol s_13[] = { 't', 'i', 'o', 'n' };
+static symbol s_14[] = { 'e', 'n', 'c', 'e' };
+static symbol s_15[] = { 'a', 'n', 'c', 'e' };
+static symbol s_16[] = { 'a', 'b', 'l', 'e' };
+static symbol s_17[] = { 'e', 'n', 't' };
+static symbol s_18[] = { 'i', 'z', 'e' };
+static symbol s_19[] = { 'a', 't', 'e' };
+static symbol s_20[] = { 'a', 'l' };
+static symbol s_21[] = { 'f', 'u', 'l' };
+static symbol s_22[] = { 'o', 'u', 's' };
+static symbol s_23[] = { 'i', 'v', 'e' };
+static symbol s_24[] = { 'b', 'l', 'e' };
+static symbol s_25[] = { 'l' };
+static symbol s_26[] = { 'o', 'g' };
+static symbol s_27[] = { 'f', 'u', 'l' };
+static symbol s_28[] = { 'l', 'e', 's', 's' };
+static symbol s_29[] = { 't', 'i', 'o', 'n' };
+static symbol s_30[] = { 'a', 't', 'e' };
+static symbol s_31[] = { 'a', 'l' };
+static symbol s_32[] = { 'i', 'c' };
+static symbol s_33[] = { 's' };
+static symbol s_34[] = { 't' };
+static symbol s_35[] = { 'l' };
+static symbol s_36[] = { 's', 'k', 'i' };
+static symbol s_37[] = { 's', 'k', 'y' };
+static symbol s_38[] = { 'd', 'i', 'e' };
+static symbol s_39[] = { 'l', 'i', 'e' };
+static symbol s_40[] = { 't', 'i', 'e' };
+static symbol s_41[] = { 'i', 'd', 'l' };
+static symbol s_42[] = { 'g', 'e', 'n', 't', 'l' };
+static symbol s_43[] = { 'u', 'g', 'l', 'i' };
+static symbol s_44[] = { 'e', 'a', 'r', 'l', 'i' };
+static symbol s_45[] = { 'o', 'n', 'l', 'i' };
+static symbol s_46[] = { 's', 'i', 'n', 'g', 'l' };
+static symbol s_47[] = { 'Y' };
+static symbol s_48[] = { 'y' };
+
+static int r_prelude(struct SN_env * z) {
+    z->B[0] = 0; /* unset Y_found, line 24 */
+    {   int c = z->c; /* do, line 25 */
+        z->bra = z->c; /* [, line 25 */
+        if (!(eq_s(z, 1, s_0))) goto lab0;
+        z->ket = z->c; /* ], line 25 */
+        if (!(in_grouping(z, g_v, 97, 121))) goto lab0;
+        slice_from_s(z, 1, s_1); /* <-, line 25 */
+        z->B[0] = 1; /* set Y_found, line 25 */
+    lab0:
+        z->c = c;
+    }
+    {   int c = z->c; /* do, line 26 */
+        while(1) { /* repeat, line 26 */
+            int c = z->c;
+            while(1) { /* goto, line 26 */
+                int c = z->c;
+                if (!(in_grouping(z, g_v, 97, 121))) goto lab3;
+                z->bra = z->c; /* [, line 26 */
+                if (!(eq_s(z, 1, s_2))) goto lab3;
+                z->ket = z->c; /* ], line 26 */
+                z->c = c;
+                break;
+            lab3:
+                z->c = c;
+                if (z->c >= z->l) goto lab2;
+                z->c++;
+            }
+            slice_from_s(z, 1, s_3); /* <-, line 26 */
+            z->B[0] = 1; /* set Y_found, line 26 */
+            continue;
+        lab2:
+            z->c = c;
+            break;
+        }
+    lab1:
+        z->c = c;
+    }
+    return 1;
+}
+
+static int r_mark_regions(struct SN_env * z) {
+    z->I[0] = z->l;
+    z->I[1] = z->l;
+    {   int c = z->c; /* do, line 32 */
+        {   int c = z->c; /* or, line 36 */
+            if (!(find_among(z, a_0, 1))) goto lab2; /* among, line 33 */
+            goto lab1;
+        lab2:
+            z->c = c;
+            while(1) { /* gopast, line 36 */
+                if (!(in_grouping(z, g_v, 97, 121))) goto lab3;
+                break;
+            lab3:
+                if (z->c >= z->l) goto lab0;
+                z->c++;
+            }
+            while(1) { /* gopast, line 36 */
+                if (!(out_grouping(z, g_v, 97, 121))) goto lab4;
+                break;
+            lab4:
+                if (z->c >= z->l) goto lab0;
+                z->c++;
+            }
+        }
+    lab1:
+        z->I[0] = z->c; /* setmark p1, line 37 */
+        while(1) { /* gopast, line 38 */
+            if (!(in_grouping(z, g_v, 97, 121))) goto lab5;
+            break;
+        lab5:
+            if (z->c >= z->l) goto lab0;
+            z->c++;
+        }
+        while(1) { /* gopast, line 38 */
+            if (!(out_grouping(z, g_v, 97, 121))) goto lab6;
+            break;
+        lab6:
+            if (z->c >= z->l) goto lab0;
+            z->c++;
+        }
+        z->I[1] = z->c; /* setmark p2, line 38 */
+    lab0:
+        z->c = c;
+    }
+    return 1;
+}
+
+static int r_shortv(struct SN_env * z) {
+    {   int m = z->l - z->c; /* or, line 46 */
+        if (!(out_grouping_b(z, g_v_WXY, 89, 121))) goto lab1;
+        if (!(in_grouping_b(z, g_v, 97, 121))) goto lab1;
+        if (!(out_grouping_b(z, g_v, 97, 121))) goto lab1;
+        goto lab0;
+    lab1:
+        z->c = z->l - m;
+        if (!(out_grouping_b(z, g_v, 97, 121))) return 0;
+        if (!(in_grouping_b(z, g_v, 97, 121))) return 0;
+        if (z->c > z->lb) return 0; /* atlimit, line 47 */
+    }
+lab0:
+    return 1;
+}
+
+static int r_R1(struct SN_env * z) {
+    if (!(z->I[0] <= z->c)) return 0;
+    return 1;
+}
+
+static int r_R2(struct SN_env * z) {
+    if (!(z->I[1] <= z->c)) return 0;
+    return 1;
+}
+
+static int r_Step_1a(struct SN_env * z) {
+    int among_var;
+    z->ket = z->c; /* [, line 54 */
+    among_var = find_among_b(z, a_1, 6); /* substring, line 54 */
+    if (!(among_var)) return 0;
+    z->bra = z->c; /* ], line 54 */
+    switch(among_var) {
+        case 0: return 0;
+        case 1:
+            slice_from_s(z, 2, s_4); /* <-, line 55 */
+            break;
+        case 2:
+            {   int m = z->l - z->c; /* or, line 57 */
+                if (z->c <= z->lb) goto lab1;
+                z->c--; /* next, line 57 */
+                if (z->c > z->lb) goto lab1; /* atlimit, line 57 */
+                slice_from_s(z, 2, s_5); /* <-, line 57 */
+                goto lab0;
+            lab1:
+                z->c = z->l - m;
+                slice_from_s(z, 1, s_6); /* <-, line 57 */
+            }
+        lab0:
+            break;
+        case 3:
+            if (z->c <= z->lb) return 0;
+            z->c--; /* next, line 58 */
+            while(1) { /* gopast, line 58 */
+                if (!(in_grouping_b(z, g_v, 97, 121))) goto lab2;
+                break;
+            lab2:
+                if (z->c <= z->lb) return 0;
+                z->c--;
+            }
+            slice_del(z); /* delete, line 58 */
+            break;
+    }
+    return 1;
+}
+
+static int r_Step_1b(struct SN_env * z) {
+    int among_var;
+    z->ket = z->c; /* [, line 64 */
+    among_var = find_among_b(z, a_3, 6); /* substring, line 64 */
+    if (!(among_var)) return 0;
+    z->bra = z->c; /* ], line 64 */
+    switch(among_var) {
+        case 0: return 0;
+        case 1:
+            if (!r_R1(z)) return 0; /* call R1, line 66 */
+            slice_from_s(z, 2, s_7); /* <-, line 66 */
+            break;
+        case 2:
+            {   int m_test = z->l - z->c; /* test, line 69 */
+                while(1) { /* gopast, line 69 */
+                    if (!(in_grouping_b(z, g_v, 97, 121))) goto lab0;
+                    break;
+                lab0:
+                    if (z->c <= z->lb) return 0;
+                    z->c--;
+                }
+                z->c = z->l - m_test;
+            }
+            slice_del(z); /* delete, line 69 */
+            {   int m_test = z->l - z->c; /* test, line 70 */
+                among_var = find_among_b(z, a_2, 13); /* substring, line 70 */
+                if (!(among_var)) return 0;
+                z->c = z->l - m_test;
+            }
+            switch(among_var) {
+                case 0: return 0;
+                case 1:
+                    {   int c = z->c;
+                        insert_s(z, z->c, z->c, 1, s_8); /* <+, line 72 */
+                        z->c = c;
+                    }
+                    break;
+                case 2:
+                    z->ket = z->c; /* [, line 75 */
+                    if (z->c <= z->lb) return 0;
+                    z->c--; /* next, line 75 */
+                    z->bra = z->c; /* ], line 75 */
+                    slice_del(z); /* delete, line 75 */
+                    break;
+                case 3:
+                    if (z->c != z->I[0]) return 0; /* atmark, line 76 */
+                    {   int m_test = z->l - z->c; /* test, line 76 */
+                        if (!r_shortv(z)) return 0; /* call shortv, line 76 */
+                        z->c = z->l - m_test;
+                    }
+                    {   int c = z->c;
+                        insert_s(z, z->c, z->c, 1, s_9); /* <+, line 76 */
+                        z->c = c;
+                    }
+                    break;
+            }
+            break;
+    }
+    return 1;
+}
+
+static int r_Step_1c(struct SN_env * z) {
+    z->ket = z->c; /* [, line 83 */
+    {   int m = z->l - z->c; /* or, line 83 */
+        if (!(eq_s_b(z, 1, s_10))) goto lab1;
+        goto lab0;
+    lab1:
+        z->c = z->l - m;
+        if (!(eq_s_b(z, 1, s_11))) return 0;
+    }
+lab0:
+    z->bra = z->c; /* ], line 83 */
+    if (!(out_grouping_b(z, g_v, 97, 121))) return 0;
+    {   int m = z->l - z->c; /* not, line 84 */
+        if (z->c > z->lb) goto lab2; /* atlimit, line 84 */
+        return 0;
+    lab2:
+        z->c = z->l - m;
+    }
+    slice_from_s(z, 1, s_12); /* <-, line 85 */
+    return 1;
+}
+
+static int r_Step_2(struct SN_env * z) {
+    int among_var;
+    z->ket = z->c; /* [, line 89 */
+    among_var = find_among_b(z, a_4, 24); /* substring, line 89 */
+    if (!(among_var)) return 0;
+    z->bra = z->c; /* ], line 89 */
+    if (!r_R1(z)) return 0; /* call R1, line 89 */
+    switch(among_var) {
+        case 0: return 0;
+        case 1:
+            slice_from_s(z, 4, s_13); /* <-, line 90 */
+            break;
+        case 2:
+            slice_from_s(z, 4, s_14); /* <-, line 91 */
+            break;
+        case 3:
+            slice_from_s(z, 4, s_15); /* <-, line 92 */
+            break;
+        case 4:
+            slice_from_s(z, 4, s_16); /* <-, line 93 */
+            break;
+        case 5:
+            slice_from_s(z, 3, s_17); /* <-, line 94 */
+            break;
+        case 6:
+            slice_from_s(z, 3, s_18); /* <-, line 96 */
+            break;
+        case 7:
+            slice_from_s(z, 3, s_19); /* <-, line 98 */
+            break;
+        case 8:
+            slice_from_s(z, 2, s_20); /* <-, line 100 */
+            break;
+        case 9:
+            slice_from_s(z, 3, s_21); /* <-, line 101 */
+            break;
+        case 10:
+            slice_from_s(z, 3, s_22); /* <-, line 103 */
+            break;
+        case 11:
+            slice_from_s(z, 3, s_23); /* <-, line 105 */
+            break;
+        case 12:
+            slice_from_s(z, 3, s_24); /* <-, line 107 */
+            break;
+        case 13:
+            if (!(eq_s_b(z, 1, s_25))) return 0;
+            slice_from_s(z, 2, s_26); /* <-, line 108 */
+            break;
+        case 14:
+            slice_from_s(z, 3, s_27); /* <-, line 109 */
+            break;
+        case 15:
+            slice_from_s(z, 4, s_28); /* <-, line 110 */
+            break;
+        case 16:
+            if (!(in_grouping_b(z, g_valid_LI, 99, 116))) return 0;
+            slice_del(z); /* delete, line 111 */
+            break;
+    }
+    return 1;
+}
+
+static int r_Step_3(struct SN_env * z) {
+    int among_var;
+    z->ket = z->c; /* [, line 116 */
+    among_var = find_among_b(z, a_5, 9); /* substring, line 116 */
+    if (!(among_var)) return 0;
+    z->bra = z->c; /* ], line 116 */
+    if (!r_R1(z)) return 0; /* call R1, line 116 */
+    switch(among_var) {
+        case 0: return 0;
+        case 1:
+            slice_from_s(z, 4, s_29); /* <-, line 117 */
+            break;
+        case 2:
+            slice_from_s(z, 3, s_30); /* <-, line 118 */
+            break;
+        case 3:
+            slice_from_s(z, 2, s_31); /* <-, line 119 */
+            break;
+        case 4:
+            slice_from_s(z, 2, s_32); /* <-, line 121 */
+            break;
+        case 5:
+            slice_del(z); /* delete, line 123 */
+            break;
+        case 6:
+            if (!r_R2(z)) return 0; /* call R2, line 125 */
+            slice_del(z); /* delete, line 125 */
+            break;
+    }
+    return 1;
+}
+
+static int r_Step_4(struct SN_env * z) {
+    int among_var;
+    z->ket = z->c; /* [, line 130 */
+    among_var = find_among_b(z, a_6, 18); /* substring, line 130 */
+    if (!(among_var)) return 0;
+    z->bra = z->c; /* ], line 130 */
+    if (!r_R2(z)) return 0; /* call R2, line 130 */
+    switch(among_var) {
+        case 0: return 0;
+        case 1:
+            slice_del(z); /* delete, line 133 */
+            break;
+        case 2:
+            {   int m = z->l - z->c; /* or, line 134 */
+                if (!(eq_s_b(z, 1, s_33))) goto lab1;
+                goto lab0;
+            lab1:
+                z->c = z->l - m;
+                if (!(eq_s_b(z, 1, s_34))) return 0;
+            }
+        lab0:
+            slice_del(z); /* delete, line 134 */
+            break;
+    }
+    return 1;
+}
+
+static int r_Step_5(struct SN_env * z) {
+    int among_var;
+    z->ket = z->c; /* [, line 139 */
+    among_var = find_among_b(z, a_7, 2); /* substring, line 139 */
+    if (!(among_var)) return 0;
+    z->bra = z->c; /* ], line 139 */
+    switch(among_var) {
+        case 0: return 0;
+        case 1:
+            {   int m = z->l - z->c; /* or, line 140 */
+                if (!r_R2(z)) goto lab1; /* call R2, line 140 */
+                goto lab0;
+            lab1:
+                z->c = z->l - m;
+                if (!r_R1(z)) return 0; /* call R1, line 140 */
+                {   int m = z->l - z->c; /* not, line 140 */
+                    if (!r_shortv(z)) goto lab2; /* call shortv, line 140 */
+                    return 0;
+                lab2:
+                    z->c = z->l - m;
+                }
+            }
+        lab0:
+            slice_del(z); /* delete, line 140 */
+            break;
+        case 2:
+            if (!r_R2(z)) return 0; /* call R2, line 141 */
+            if (!(eq_s_b(z, 1, s_35))) return 0;
+            slice_del(z); /* delete, line 141 */
+            break;
+    }
+    return 1;
+}
+
+static int r_exception2(struct SN_env * z) {
+    z->ket = z->c; /* [, line 147 */
+    if (!(find_among_b(z, a_8, 8))) return 0; /* substring, line 147 */
+    z->bra = z->c; /* ], line 147 */
+    if (z->c > z->lb) return 0; /* atlimit, line 147 */
+    return 1;
+}
+
+static int r_exception1(struct SN_env * z) {
+    int among_var;
+    z->bra = z->c; /* [, line 159 */
+    among_var = find_among(z, a_9, 18); /* substring, line 159 */
+    if (!(among_var)) return 0;
+    z->ket = z->c; /* ], line 159 */
+    if (z->c < z->l) return 0; /* atlimit, line 159 */
+    switch(among_var) {
+        case 0: return 0;
+        case 1:
+            slice_from_s(z, 3, s_36); /* <-, line 163 */
+            break;
+        case 2:
+            slice_from_s(z, 3, s_37); /* <-, line 164 */
+            break;
+        case 3:
+            slice_from_s(z, 3, s_38); /* <-, line 165 */
+            break;
+        case 4:
+            slice_from_s(z, 3, s_39); /* <-, line 166 */
+            break;
+        case 5:
+            slice_from_s(z, 3, s_40); /* <-, line 167 */
+            break;
+        case 6:
+            slice_from_s(z, 3, s_41); /* <-, line 171 */
+            break;
+        case 7:
+            slice_from_s(z, 5, s_42); /* <-, line 172 */
+            break;
+        case 8:
+            slice_from_s(z, 4, s_43); /* <-, line 173 */
+            break;
+        case 9:
+            slice_from_s(z, 5, s_44); /* <-, line 174 */
+            break;
+        case 10:
+            slice_from_s(z, 4, s_45); /* <-, line 175 */
+            break;
+        case 11:
+            slice_from_s(z, 5, s_46); /* <-, line 176 */
+            break;
+    }
+    return 1;
+}
+
+static int r_postlude(struct SN_env * z) {
+    if (!(z->B[0])) return 0; /* Boolean test Y_found, line 192 */
+    while(1) { /* repeat, line 192 */
+        int c = z->c;
+        while(1) { /* goto, line 192 */
+            int c = z->c;
+            z->bra = z->c; /* [, line 192 */
+            if (!(eq_s(z, 1, s_47))) goto lab1;
+            z->ket = z->c; /* ], line 192 */
+            z->c = c;
+            break;
+        lab1:
+            z->c = c;
+            if (z->c >= z->l) goto lab0;
+            z->c++;
+        }
+        slice_from_s(z, 1, s_48); /* <-, line 192 */
+        continue;
+    lab0:
+        z->c = c;
+        break;
+    }
+    return 1;
+}
+
+extern int english_stem(struct SN_env * z) {
+    {   int c = z->c; /* or, line 196 */
+        if (!r_exception1(z)) goto lab1; /* call exception1, line 196 */
+        goto lab0;
+    lab1:
+        z->c = c;
+        {   int c_test = z->c; /* test, line 198 */
+            {   int c = z->c + 3;
+                if (0 > c || c > z->l) return 0;
+                z->c = c; /* hop, line 198 */
+            }
+            z->c = c_test;
+        }
+        {   int c = z->c; /* do, line 199 */
+            if (!r_prelude(z)) goto lab2; /* call prelude, line 199 */
+        lab2:
+            z->c = c;
+        }
+        {   int c = z->c; /* do, line 200 */
+            if (!r_mark_regions(z)) goto lab3; /* call mark_regions, line 200 */
+        lab3:
+            z->c = c;
+        }
+        z->lb = z->c; z->c = z->l; /* backwards, line 201 */
+
+        {   int m = z->l - z->c; /* do, line 203 */
+            if (!r_Step_1a(z)) goto lab4; /* call Step_1a, line 203 */
+        lab4:
+            z->c = z->l - m;
+        }
+        {   int m = z->l - z->c; /* or, line 205 */
+            if (!r_exception2(z)) goto lab6; /* call exception2, line 205 */
+            goto lab5;
+        lab6:
+            z->c = z->l - m;
+            {   int m = z->l - z->c; /* do, line 207 */
+                if (!r_Step_1b(z)) goto lab7; /* call Step_1b, line 207 */
+            lab7:
+                z->c = z->l - m;
+            }
+            {   int m = z->l - z->c; /* do, line 208 */
+                if (!r_Step_1c(z)) goto lab8; /* call Step_1c, line 208 */
+            lab8:
+                z->c = z->l - m;
+            }
+            {   int m = z->l - z->c; /* do, line 210 */
+                if (!r_Step_2(z)) goto lab9; /* call Step_2, line 210 */
+            lab9:
+                z->c = z->l - m;
+            }
+            {   int m = z->l - z->c; /* do, line 211 */
+                if (!r_Step_3(z)) goto lab10; /* call Step_3, line 211 */
+            lab10:
+                z->c = z->l - m;
+            }
+            {   int m = z->l - z->c; /* do, line 212 */
+                if (!r_Step_4(z)) goto lab11; /* call Step_4, line 212 */
+            lab11:
+                z->c = z->l - m;
+            }
+            {   int m = z->l - z->c; /* do, line 214 */
+                if (!r_Step_5(z)) goto lab12; /* call Step_5, line 214 */
+            lab12:
+                z->c = z->l - m;
+            }
+        }
+    lab5:
+        z->c = z->lb;
+        {   int c = z->c; /* do, line 217 */
+            if (!r_postlude(z)) goto lab13; /* call postlude, line 217 */
+        lab13:
+            z->c = c;
+        }
+    }
+lab0:
+    return 1;
+}
+
+extern struct SN_env * english_create_env(void) { return SN_create_env(0, 2, 1); }
+
+extern void english_close_env(struct SN_env * z) { SN_close_env(z); }
+


diff --git a/contrib/tsearch2/snowball/english_stem.h b/contrib/tsearch2/snowball/english_stem.h

new file mode 100644 (file)

index 0000000..bfefcd5


--- /dev/null
+++ b/contrib/tsearch2/snowball/english_stem.h
@@ -0,0 +1,8 @@
+
+/* This file was generated automatically by the Snowball to ANSI C compiler */
+
+extern struct SN_env * english_create_env(void);
+extern void english_close_env(struct SN_env * z);
+
+extern int english_stem(struct SN_env * z);
+


diff --git a/contrib/tsearch2/snowball/header.h b/contrib/tsearch2/snowball/header.h

new file mode 100644 (file)

index 0000000..aaec3ae


--- /dev/null
+++ b/contrib/tsearch2/snowball/header.h
@@ -0,0 +1,57 @@
+
+#include 
+
+#include "api.h"
+
+#define MAXINT INT_MAX
+#define MININT INT_MIN
+
+#define HEAD 2*sizeof(int)
+
+#define SIZE(p)        ((int *)(p))[-1]
+#define SET_SIZE(p, n) ((int *)(p))[-1] = n
+#define CAPACITY(p)    ((int *)(p))[-2]
+
+struct among
+{   int s_size;     /* number of chars in string */
+    symbol * s;       /* search string */
+    int substring_i;/* index to longest matching substring */
+    int result;     /* result of the lookup */
+    int (* function)(struct SN_env *);
+};
+
+extern symbol * create_s(void);
+extern void lose_s(symbol * p);
+
+extern int in_grouping(struct SN_env * z, unsigned char * s, int min, int max);
+extern int in_grouping_b(struct SN_env * z, unsigned char * s, int min, int max);
+extern int out_grouping(struct SN_env * z, unsigned char * s, int min, int max);
+extern int out_grouping_b(struct SN_env * z, unsigned char * s, int min, int max);
+
+extern int in_range(struct SN_env * z, int min, int max);
+extern int in_range_b(struct SN_env * z, int min, int max);
+extern int out_range(struct SN_env * z, int min, int max);
+extern int out_range_b(struct SN_env * z, int min, int max);
+
+extern int eq_s(struct SN_env * z, int s_size, symbol * s);
+extern int eq_s_b(struct SN_env * z, int s_size, symbol * s);
+extern int eq_v(struct SN_env * z, symbol * p);
+extern int eq_v_b(struct SN_env * z, symbol * p);
+
+extern int find_among(struct SN_env * z, struct among * v, int v_size);
+extern int find_among_b(struct SN_env * z, struct among * v, int v_size);
+
+extern symbol * increase_size(symbol * p, int n);
+extern int replace_s(struct SN_env * z, int c_bra, int c_ket, int s_size, const symbol * s);
+extern void slice_from_s(struct SN_env * z, int s_size, symbol * s);
+extern void slice_from_v(struct SN_env * z, symbol * p);
+extern void slice_del(struct SN_env * z);
+
+extern void insert_s(struct SN_env * z, int bra, int ket, int s_size, symbol * s);
+extern void insert_v(struct SN_env * z, int bra, int ket, symbol * p);
+
+extern symbol * slice_to(struct SN_env * z, symbol * p);
+extern symbol * assign_to(struct SN_env * z, symbol * p);
+
+extern void debug(struct SN_env * z, int number, int line_count);
+


diff --git a/contrib/tsearch2/snowball/russian_stem.c b/contrib/tsearch2/snowball/russian_stem.c

new file mode 100644 (file)

index 0000000..14fd491


--- /dev/null
+++ b/contrib/tsearch2/snowball/russian_stem.c
@@ -0,0 +1,626 @@
+
+/* This file was generated automatically by the Snowball to ANSI C compiler */
+
+#include "header.h"
+
+extern int russian_stem(struct SN_env * z);
+static int r_tidy_up(struct SN_env * z);
+static int r_derivational(struct SN_env * z);
+static int r_noun(struct SN_env * z);
+static int r_verb(struct SN_env * z);
+static int r_reflexive(struct SN_env * z);
+static int r_adjectival(struct SN_env * z);
+static int r_adjective(struct SN_env * z);
+static int r_perfective_gerund(struct SN_env * z);
+static int r_R2(struct SN_env * z);
+static int r_mark_regions(struct SN_env * z);
+
+extern struct SN_env * russian_create_env(void);
+extern void russian_close_env(struct SN_env * z);
+
+static symbol s_0_0[3] = { 215, 219, 201 };
+static symbol s_0_1[4] = { 201, 215, 219, 201 };
+static symbol s_0_2[4] = { 217, 215, 219, 201 };
+static symbol s_0_3[1] = { 215 };
+static symbol s_0_4[2] = { 201, 215 };
+static symbol s_0_5[2] = { 217, 215 };
+static symbol s_0_6[5] = { 215, 219, 201, 211, 216 };
+static symbol s_0_7[6] = { 201, 215, 219, 201, 211, 216 };
+static symbol s_0_8[6] = { 217, 215, 219, 201, 211, 216 };
+
+static struct among a_0[9] =
+{
+/*  0 */ { 3, s_0_0, -1, 1, 0},
+/*  1 */ { 4, s_0_1, 0, 2, 0},
+/*  2 */ { 4, s_0_2, 0, 2, 0},
+/*  3 */ { 1, s_0_3, -1, 1, 0},
+/*  4 */ { 2, s_0_4, 3, 2, 0},
+/*  5 */ { 2, s_0_5, 3, 2, 0},
+/*  6 */ { 5, s_0_6, -1, 1, 0},
+/*  7 */ { 6, s_0_7, 6, 2, 0},
+/*  8 */ { 6, s_0_8, 6, 2, 0}
+};
+
+static symbol s_1_0[2] = { 192, 192 };
+static symbol s_1_1[2] = { 197, 192 };
+static symbol s_1_2[2] = { 207, 192 };
+static symbol s_1_3[2] = { 213, 192 };
+static symbol s_1_4[2] = { 197, 197 };
+static symbol s_1_5[2] = { 201, 197 };
+static symbol s_1_6[2] = { 207, 197 };
+static symbol s_1_7[2] = { 217, 197 };
+static symbol s_1_8[2] = { 201, 200 };
+static symbol s_1_9[2] = { 217, 200 };
+static symbol s_1_10[3] = { 201, 205, 201 };
+static symbol s_1_11[3] = { 217, 205, 201 };
+static symbol s_1_12[2] = { 197, 202 };
+static symbol s_1_13[2] = { 201, 202 };
+static symbol s_1_14[2] = { 207, 202 };
+static symbol s_1_15[2] = { 217, 202 };
+static symbol s_1_16[2] = { 197, 205 };
+static symbol s_1_17[2] = { 201, 205 };
+static symbol s_1_18[2] = { 207, 205 };
+static symbol s_1_19[2] = { 217, 205 };
+static symbol s_1_20[3] = { 197, 199, 207 };
+static symbol s_1_21[3] = { 207, 199, 207 };
+static symbol s_1_22[2] = { 193, 209 };
+static symbol s_1_23[2] = { 209, 209 };
+static symbol s_1_24[3] = { 197, 205, 213 };
+static symbol s_1_25[3] = { 207, 205, 213 };
+
+static struct among a_1[26] =
+{
+/*  0 */ { 2, s_1_0, -1, 1, 0},
+/*  1 */ { 2, s_1_1, -1, 1, 0},
+/*  2 */ { 2, s_1_2, -1, 1, 0},
+/*  3 */ { 2, s_1_3, -1, 1, 0},
+/*  4 */ { 2, s_1_4, -1, 1, 0},
+/*  5 */ { 2, s_1_5, -1, 1, 0},
+/*  6 */ { 2, s_1_6, -1, 1, 0},
+/*  7 */ { 2, s_1_7, -1, 1, 0},
+/*  8 */ { 2, s_1_8, -1, 1, 0},
+/*  9 */ { 2, s_1_9, -1, 1, 0},
+/* 10 */ { 3, s_1_10, -1, 1, 0},
+/* 11 */ { 3, s_1_11, -1, 1, 0},
+/* 12 */ { 2, s_1_12, -1, 1, 0},
+/* 13 */ { 2, s_1_13, -1, 1, 0},
+/* 14 */ { 2, s_1_14, -1, 1, 0},
+/* 15 */ { 2, s_1_15, -1, 1, 0},
+/* 16 */ { 2, s_1_16, -1, 1, 0},
+/* 17 */ { 2, s_1_17, -1, 1, 0},
+/* 18 */ { 2, s_1_18, -1, 1, 0},
+/* 19 */ { 2, s_1_19, -1, 1, 0},
+/* 20 */ { 3, s_1_20, -1, 1, 0},
+/* 21 */ { 3, s_1_21, -1, 1, 0},
+/* 22 */ { 2, s_1_22, -1, 1, 0},
+/* 23 */ { 2, s_1_23, -1, 1, 0},
+/* 24 */ { 3, s_1_24, -1, 1, 0},
+/* 25 */ { 3, s_1_25, -1, 1, 0}
+};
+
+static symbol s_2_0[2] = { 197, 205 };
+static symbol s_2_1[2] = { 206, 206 };
+static symbol s_2_2[2] = { 215, 219 };
+static symbol s_2_3[3] = { 201, 215, 219 };
+static symbol s_2_4[3] = { 217, 215, 219 };
+static symbol s_2_5[1] = { 221 };
+static symbol s_2_6[2] = { 192, 221 };
+static symbol s_2_7[3] = { 213, 192, 221 };
+
+static struct among a_2[8] =
+{
+/*  0 */ { 2, s_2_0, -1, 1, 0},
+/*  1 */ { 2, s_2_1, -1, 1, 0},
+/*  2 */ { 2, s_2_2, -1, 1, 0},
+/*  3 */ { 3, s_2_3, 2, 2, 0},
+/*  4 */ { 3, s_2_4, 2, 2, 0},
+/*  5 */ { 1, s_2_5, -1, 1, 0},
+/*  6 */ { 2, s_2_6, 5, 1, 0},
+/*  7 */ { 3, s_2_7, 6, 2, 0}
+};
+
+static symbol s_3_0[2] = { 211, 209 };
+static symbol s_3_1[2] = { 211, 216 };
+
+static struct among a_3[2] =
+{
+/*  0 */ { 2, s_3_0, -1, 1, 0},
+/*  1 */ { 2, s_3_1, -1, 1, 0}
+};
+
+static symbol s_4_0[1] = { 192 };
+static symbol s_4_1[2] = { 213, 192 };
+static symbol s_4_2[2] = { 204, 193 };
+static symbol s_4_3[3] = { 201, 204, 193 };
+static symbol s_4_4[3] = { 217, 204, 193 };
+static symbol s_4_5[2] = { 206, 193 };
+static symbol s_4_6[3] = { 197, 206, 193 };
+static symbol s_4_7[3] = { 197, 212, 197 };
+static symbol s_4_8[3] = { 201, 212, 197 };
+static symbol s_4_9[3] = { 202, 212, 197 };
+static symbol s_4_10[4] = { 197, 202, 212, 197 };
+static symbol s_4_11[4] = { 213, 202, 212, 197 };
+static symbol s_4_12[2] = { 204, 201 };
+static symbol s_4_13[3] = { 201, 204, 201 };
+static symbol s_4_14[3] = { 217, 204, 201 };
+static symbol s_4_15[1] = { 202 };
+static symbol s_4_16[2] = { 197, 202 };
+static symbol s_4_17[2] = { 213, 202 };
+static symbol s_4_18[1] = { 204 };
+static symbol s_4_19[2] = { 201, 204 };
+static symbol s_4_20[2] = { 217, 204 };
+static symbol s_4_21[2] = { 197, 205 };
+static symbol s_4_22[2] = { 201, 205 };
+static symbol s_4_23[2] = { 217, 205 };
+static symbol s_4_24[1] = { 206 };
+static symbol s_4_25[2] = { 197, 206 };
+static symbol s_4_26[2] = { 204, 207 };
+static symbol s_4_27[3] = { 201, 204, 207 };
+static symbol s_4_28[3] = { 217, 204, 207 };
+static symbol s_4_29[2] = { 206, 207 };
+static symbol s_4_30[3] = { 197, 206, 207 };
+static symbol s_4_31[3] = { 206, 206, 207 };
+static symbol s_4_32[2] = { 192, 212 };
+static symbol s_4_33[3] = { 213, 192, 212 };
+static symbol s_4_34[2] = { 197, 212 };
+static symbol s_4_35[3] = { 213, 197, 212 };
+static symbol s_4_36[2] = { 201, 212 };
+static symbol s_4_37[2] = { 209, 212 };
+static symbol s_4_38[2] = { 217, 212 };
+static symbol s_4_39[2] = { 212, 216 };
+static symbol s_4_40[3] = { 201, 212, 216 };
+static symbol s_4_41[3] = { 217, 212, 216 };
+static symbol s_4_42[3] = { 197, 219, 216 };
+static symbol s_4_43[3] = { 201, 219, 216 };
+static symbol s_4_44[2] = { 206, 217 };
+static symbol s_4_45[3] = { 197, 206, 217 };
+
+static struct among a_4[46] =
+{
+/*  0 */ { 1, s_4_0, -1, 2, 0},
+/*  1 */ { 2, s_4_1, 0, 2, 0},
+/*  2 */ { 2, s_4_2, -1, 1, 0},
+/*  3 */ { 3, s_4_3, 2, 2, 0},
+/*  4 */ { 3, s_4_4, 2, 2, 0},
+/*  5 */ { 2, s_4_5, -1, 1, 0},
+/*  6 */ { 3, s_4_6, 5, 2, 0},
+/*  7 */ { 3, s_4_7, -1, 1, 0},
+/*  8 */ { 3, s_4_8, -1, 2, 0},
+/*  9 */ { 3, s_4_9, -1, 1, 0},
+/* 10 */ { 4, s_4_10, 9, 2, 0},
+/* 11 */ { 4, s_4_11, 9, 2, 0},
+/* 12 */ { 2, s_4_12, -1, 1, 0},
+/* 13 */ { 3, s_4_13, 12, 2, 0},
+/* 14 */ { 3, s_4_14, 12, 2, 0},
+/* 15 */ { 1, s_4_15, -1, 1, 0},
+/* 16 */ { 2, s_4_16, 15, 2, 0},
+/* 17 */ { 2, s_4_17, 15, 2, 0},
+/* 18 */ { 1, s_4_18, -1, 1, 0},
+/* 19 */ { 2, s_4_19, 18, 2, 0},
+/* 20 */ { 2, s_4_20, 18, 2, 0},
+/* 21 */ { 2, s_4_21, -1, 1, 0},
+/* 22 */ { 2, s_4_22, -1, 2, 0},
+/* 23 */ { 2, s_4_23, -1, 2, 0},
+/* 24 */ { 1, s_4_24, -1, 1, 0},
+/* 25 */ { 2, s_4_25, 24, 2, 0},
+/* 26 */ { 2, s_4_26, -1, 1, 0},
+/* 27 */ { 3, s_4_27, 26, 2, 0},
+/* 28 */ { 3, s_4_28, 26, 2, 0},
+/* 29 */ { 2, s_4_29, -1, 1, 0},
+/* 30 */ { 3, s_4_30, 29, 2, 0},
+/* 31 */ { 3, s_4_31, 29, 1, 0},
+/* 32 */ { 2, s_4_32, -1, 1, 0},
+/* 33 */ { 3, s_4_33, 32, 2, 0},
+/* 34 */ { 2, s_4_34, -1, 1, 0},
+/* 35 */ { 3, s_4_35, 34, 2, 0},
+/* 36 */ { 2, s_4_36, -1, 2, 0},
+/* 37 */ { 2, s_4_37, -1, 2, 0},
+/* 38 */ { 2, s_4_38, -1, 2, 0},
+/* 39 */ { 2, s_4_39, -1, 1, 0},
+/* 40 */ { 3, s_4_40, 39, 2, 0},
+/* 41 */ { 3, s_4_41, 39, 2, 0},
+/* 42 */ { 3, s_4_42, -1, 1, 0},
+/* 43 */ { 3, s_4_43, -1, 2, 0},
+/* 44 */ { 2, s_4_44, -1, 1, 0},
+/* 45 */ { 3, s_4_45, 44, 2, 0}
+};
+
+static symbol s_5_0[1] = { 192 };
+static symbol s_5_1[2] = { 201, 192 };
+static symbol s_5_2[2] = { 216, 192 };
+static symbol s_5_3[1] = { 193 };
+static symbol s_5_4[1] = { 197 };
+static symbol s_5_5[2] = { 201, 197 };
+static symbol s_5_6[2] = { 216, 197 };
+static symbol s_5_7[2] = { 193, 200 };
+static symbol s_5_8[2] = { 209, 200 };
+static symbol s_5_9[3] = { 201, 209, 200 };
+static symbol s_5_10[1] = { 201 };
+static symbol s_5_11[2] = { 197, 201 };
+static symbol s_5_12[2] = { 201, 201 };
+static symbol s_5_13[3] = { 193, 205, 201 };
+static symbol s_5_14[3] = { 209, 205, 201 };
+static symbol s_5_15[4] = { 201, 209, 205, 201 };
+static symbol s_5_16[1] = { 202 };
+static symbol s_5_17[2] = { 197, 202 };
+static symbol s_5_18[3] = { 201, 197, 202 };
+static symbol s_5_19[2] = { 201, 202 };
+static symbol s_5_20[2] = { 207, 202 };
+static symbol s_5_21[2] = { 193, 205 };
+static symbol s_5_22[2] = { 197, 205 };
+static symbol s_5_23[3] = { 201, 197, 205 };
+static symbol s_5_24[2] = { 207, 205 };
+static symbol s_5_25[2] = { 209, 205 };
+static symbol s_5_26[3] = { 201, 209, 205 };
+static symbol s_5_27[1] = { 207 };
+static symbol s_5_28[1] = { 209 };
+static symbol s_5_29[2] = { 201, 209 };
+static symbol s_5_30[2] = { 216, 209 };
+static symbol s_5_31[1] = { 213 };
+static symbol s_5_32[2] = { 197, 215 };
+static symbol s_5_33[2] = { 207, 215 };
+static symbol s_5_34[1] = { 216 };
+static symbol s_5_35[1] = { 217 };
+
+static struct among a_5[36] =
+{
+/*  0 */ { 1, s_5_0, -1, 1, 0},
+/*  1 */ { 2, s_5_1, 0, 1, 0},
+/*  2 */ { 2, s_5_2, 0, 1, 0},
+/*  3 */ { 1, s_5_3, -1, 1, 0},
+/*  4 */ { 1, s_5_4, -1, 1, 0},
+/*  5 */ { 2, s_5_5, 4, 1, 0},
+/*  6 */ { 2, s_5_6, 4, 1, 0},
+/*  7 */ { 2, s_5_7, -1, 1, 0},
+/*  8 */ { 2, s_5_8, -1, 1, 0},
+/*  9 */ { 3, s_5_9, 8, 1, 0},
+/* 10 */ { 1, s_5_10, -1, 1, 0},
+/* 11 */ { 2, s_5_11, 10, 1, 0},
+/* 12 */ { 2, s_5_12, 10, 1, 0},
+/* 13 */ { 3, s_5_13, 10, 1, 0},
+/* 14 */ { 3, s_5_14, 10, 1, 0},
+/* 15 */ { 4, s_5_15, 14, 1, 0},
+/* 16 */ { 1, s_5_16, -1, 1, 0},
+/* 17 */ { 2, s_5_17, 16, 1, 0},
+/* 18 */ { 3, s_5_18, 17, 1, 0},
+/* 19 */ { 2, s_5_19, 16, 1, 0},
+/* 20 */ { 2, s_5_20, 16, 1, 0},
+/* 21 */ { 2, s_5_21, -1, 1, 0},
+/* 22 */ { 2, s_5_22, -1, 1, 0},
+/* 23 */ { 3, s_5_23, 22, 1, 0},
+/* 24 */ { 2, s_5_24, -1, 1, 0},
+/* 25 */ { 2, s_5_25, -1, 1, 0},
+/* 26 */ { 3, s_5_26, 25, 1, 0},
+/* 27 */ { 1, s_5_27, -1, 1, 0},
+/* 28 */ { 1, s_5_28, -1, 1, 0},
+/* 29 */ { 2, s_5_29, 28, 1, 0},
+/* 30 */ { 2, s_5_30, 28, 1, 0},
+/* 31 */ { 1, s_5_31, -1, 1, 0},
+/* 32 */ { 2, s_5_32, -1, 1, 0},
+/* 33 */ { 2, s_5_33, -1, 1, 0},
+/* 34 */ { 1, s_5_34, -1, 1, 0},
+/* 35 */ { 1, s_5_35, -1, 1, 0}
+};
+
+static symbol s_6_0[3] = { 207, 211, 212 };
+static symbol s_6_1[4] = { 207, 211, 212, 216 };
+
+static struct among a_6[2] =
+{
+/*  0 */ { 3, s_6_0, -1, 1, 0},
+/*  1 */ { 4, s_6_1, -1, 1, 0}
+};
+
+static symbol s_7_0[4] = { 197, 202, 219, 197 };
+static symbol s_7_1[1] = { 206 };
+static symbol s_7_2[1] = { 216 };
+static symbol s_7_3[3] = { 197, 202, 219 };
+
+static struct among a_7[4] =
+{
+/*  0 */ { 4, s_7_0, -1, 1, 0},
+/*  1 */ { 1, s_7_1, -1, 2, 0},
+/*  2 */ { 1, s_7_2, -1, 3, 0},
+/*  3 */ { 3, s_7_3, -1, 1, 0}
+};
+
+static unsigned char g_v[] = { 35, 130, 34, 18 };
+
+static symbol s_0[] = { 193 };
+static symbol s_1[] = { 209 };
+static symbol s_2[] = { 193 };
+static symbol s_3[] = { 209 };
+static symbol s_4[] = { 193 };
+static symbol s_5[] = { 209 };
+static symbol s_6[] = { 206 };
+static symbol s_7[] = { 206 };
+static symbol s_8[] = { 206 };
+static symbol s_9[] = { 201 };
+
+static int r_mark_regions(struct SN_env * z) {
+    z->I[0] = z->l;
+    z->I[1] = z->l;
+    {   int c = z->c; /* do, line 100 */
+        while(1) { /* gopast, line 101 */
+            if (!(in_grouping(z, g_v, 192, 220))) goto lab1;
+            break;
+        lab1:
+            if (z->c >= z->l) goto lab0;
+            z->c++;
+        }
+        z->I[0] = z->c; /* setmark pV, line 101 */
+        while(1) { /* gopast, line 101 */
+            if (!(out_grouping(z, g_v, 192, 220))) goto lab2;
+            break;
+        lab2:
+            if (z->c >= z->l) goto lab0;
+            z->c++;
+        }
+        while(1) { /* gopast, line 102 */
+            if (!(in_grouping(z, g_v, 192, 220))) goto lab3;
+            break;
+        lab3:
+            if (z->c >= z->l) goto lab0;
+            z->c++;
+        }
+        while(1) { /* gopast, line 102 */
+            if (!(out_grouping(z, g_v, 192, 220))) goto lab4;
+            break;
+        lab4:
+            if (z->c >= z->l) goto lab0;
+            z->c++;
+        }
+        z->I[1] = z->c; /* setmark p2, line 102 */
+    lab0:
+        z->c = c;
+    }
+    return 1;
+}
+
+static int r_R2(struct SN_env * z) {
+    if (!(z->I[1] <= z->c)) return 0;
+    return 1;
+}
+
+static int r_perfective_gerund(struct SN_env * z) {
+    int among_var;
+    z->ket = z->c; /* [, line 111 */
+    among_var = find_among_b(z, a_0, 9); /* substring, line 111 */
+    if (!(among_var)) return 0;
+    z->bra = z->c; /* ], line 111 */
+    switch(among_var) {
+        case 0: return 0;
+        case 1:
+            {   int m = z->l - z->c; /* or, line 115 */
+                if (!(eq_s_b(z, 1, s_0))) goto lab1;
+                goto lab0;
+            lab1:
+                z->c = z->l - m;
+                if (!(eq_s_b(z, 1, s_1))) return 0;
+            }
+        lab0:
+            slice_del(z); /* delete, line 115 */
+            break;
+        case 2:
+            slice_del(z); /* delete, line 122 */
+            break;
+    }
+    return 1;
+}
+
+static int r_adjective(struct SN_env * z) {
+    int among_var;
+    z->ket = z->c; /* [, line 127 */
+    among_var = find_among_b(z, a_1, 26); /* substring, line 127 */
+    if (!(among_var)) return 0;
+    z->bra = z->c; /* ], line 127 */
+    switch(among_var) {
+        case 0: return 0;
+        case 1:
+            slice_del(z); /* delete, line 136 */
+            break;
+    }
+    return 1;
+}
+
+static int r_adjectival(struct SN_env * z) {
+    int among_var;
+    if (!r_adjective(z)) return 0; /* call adjective, line 141 */
+    {   int m = z->l - z->c; /* try, line 148 */
+        z->ket = z->c; /* [, line 149 */
+        among_var = find_among_b(z, a_2, 8); /* substring, line 149 */
+        if (!(among_var)) { z->c = z->l - m; goto lab0; }
+        z->bra = z->c; /* ], line 149 */
+        switch(among_var) {
+            case 0: { z->c = z->l - m; goto lab0; }
+            case 1:
+                {   int m = z->l - z->c; /* or, line 154 */
+                    if (!(eq_s_b(z, 1, s_2))) goto lab2;
+                    goto lab1;
+                lab2:
+                    z->c = z->l - m;
+                    if (!(eq_s_b(z, 1, s_3))) { z->c = z->l - m; goto lab0; }
+                }
+            lab1:
+                slice_del(z); /* delete, line 154 */
+                break;
+            case 2:
+                slice_del(z); /* delete, line 161 */
+                break;
+        }
+    lab0:
+        ;
+    }
+    return 1;
+}
+
+static int r_reflexive(struct SN_env * z) {
+    int among_var;
+    z->ket = z->c; /* [, line 168 */
+    among_var = find_among_b(z, a_3, 2); /* substring, line 168 */
+    if (!(among_var)) return 0;
+    z->bra = z->c; /* ], line 168 */
+    switch(among_var) {
+        case 0: return 0;
+        case 1:
+            slice_del(z); /* delete, line 171 */
+            break;
+    }
+    return 1;
+}
+
+static int r_verb(struct SN_env * z) {
+    int among_var;
+    z->ket = z->c; /* [, line 176 */
+    among_var = find_among_b(z, a_4, 46); /* substring, line 176 */
+    if (!(among_var)) return 0;
+    z->bra = z->c; /* ], line 176 */
+    switch(among_var) {
+        case 0: return 0;
+        case 1:
+            {   int m = z->l - z->c; /* or, line 182 */
+                if (!(eq_s_b(z, 1, s_4))) goto lab1;
+                goto lab0;
+            lab1:
+                z->c = z->l - m;
+                if (!(eq_s_b(z, 1, s_5))) return 0;
+            }
+        lab0:
+            slice_del(z); /* delete, line 182 */
+            break;
+        case 2:
+            slice_del(z); /* delete, line 190 */
+            break;
+    }
+    return 1;
+}
+
+static int r_noun(struct SN_env * z) {
+    int among_var;
+    z->ket = z->c; /* [, line 199 */
+    among_var = find_among_b(z, a_5, 36); /* substring, line 199 */
+    if (!(among_var)) return 0;
+    z->bra = z->c; /* ], line 199 */
+    switch(among_var) {
+        case 0: return 0;
+        case 1:
+            slice_del(z); /* delete, line 206 */
+            break;
+    }
+    return 1;
+}
+
+static int r_derivational(struct SN_env * z) {
+    int among_var;
+    z->ket = z->c; /* [, line 215 */
+    among_var = find_among_b(z, a_6, 2); /* substring, line 215 */
+    if (!(among_var)) return 0;
+    z->bra = z->c; /* ], line 215 */
+    if (!r_R2(z)) return 0; /* call R2, line 215 */
+    switch(among_var) {
+        case 0: return 0;
+        case 1:
+            slice_del(z); /* delete, line 218 */
+            break;
+    }
+    return 1;
+}
+
+static int r_tidy_up(struct SN_env * z) {
+    int among_var;
+    z->ket = z->c; /* [, line 223 */
+    among_var = find_among_b(z, a_7, 4); /* substring, line 223 */
+    if (!(among_var)) return 0;
+    z->bra = z->c; /* ], line 223 */
+    switch(among_var) {
+        case 0: return 0;
+        case 1:
+            slice_del(z); /* delete, line 227 */
+            z->ket = z->c; /* [, line 228 */
+            if (!(eq_s_b(z, 1, s_6))) return 0;
+            z->bra = z->c; /* ], line 228 */
+            if (!(eq_s_b(z, 1, s_7))) return 0;
+            slice_del(z); /* delete, line 228 */
+            break;
+        case 2:
+            if (!(eq_s_b(z, 1, s_8))) return 0;
+            slice_del(z); /* delete, line 231 */
+            break;
+        case 3:
+            slice_del(z); /* delete, line 233 */
+            break;
+    }
+    return 1;
+}
+
+extern int russian_stem(struct SN_env * z) {
+    {   int c = z->c; /* do, line 240 */
+        if (!r_mark_regions(z)) goto lab0; /* call mark_regions, line 240 */
+    lab0:
+        z->c = c;
+    }
+    z->lb = z->c; z->c = z->l; /* backwards, line 241 */
+
+    {   int m = z->l - z->c; /* setlimit, line 241 */
+        int m3;
+        if (z->c < z->I[0]) return 0;
+        z->c = z->I[0]; /* tomark, line 241 */
+        m3 = z->lb; z->lb = z->c;
+        z->c = z->l - m;
+        {   int m = z->l - z->c; /* do, line 242 */
+            {   int m = z->l - z->c; /* or, line 243 */
+                if (!r_perfective_gerund(z)) goto lab3; /* call perfective_gerund, line 243 */
+                goto lab2;
+            lab3:
+                z->c = z->l - m;
+                {   int m = z->l - z->c; /* try, line 244 */
+                    if (!r_reflexive(z)) { z->c = z->l - m; goto lab4; } /* call reflexive, line 244 */
+                lab4:
+                    ;
+                }
+                {   int m = z->l - z->c; /* or, line 245 */
+                    if (!r_adjectival(z)) goto lab6; /* call adjectival, line 245 */
+                    goto lab5;
+                lab6:
+                    z->c = z->l - m;
+                    if (!r_verb(z)) goto lab7; /* call verb, line 245 */
+                    goto lab5;
+                lab7:
+                    z->c = z->l - m;
+                    if (!r_noun(z)) goto lab1; /* call noun, line 245 */
+                }
+            lab5:
+                ;
+            }
+        lab2:
+        lab1:
+            z->c = z->l - m;
+        }
+        {   int m = z->l - z->c; /* try, line 248 */
+            z->ket = z->c; /* [, line 248 */
+            if (!(eq_s_b(z, 1, s_9))) { z->c = z->l - m; goto lab8; }
+            z->bra = z->c; /* ], line 248 */
+            slice_del(z); /* delete, line 248 */
+        lab8:
+            ;
+        }
+        {   int m = z->l - z->c; /* do, line 251 */
+            if (!r_derivational(z)) goto lab9; /* call derivational, line 251 */
+        lab9:
+            z->c = z->l - m;
+        }
+        {   int m = z->l - z->c; /* do, line 252 */
+            if (!r_tidy_up(z)) goto lab10; /* call tidy_up, line 252 */
+        lab10:
+            z->c = z->l - m;
+        }
+        z->lb = m3;
+    }
+    z->c = z->lb;
+    return 1;
+}
+
+extern struct SN_env * russian_create_env(void) { return SN_create_env(0, 2, 0); }
+
+extern void russian_close_env(struct SN_env * z) { SN_close_env(z); }
+


diff --git a/contrib/tsearch2/snowball/russian_stem.h b/contrib/tsearch2/snowball/russian_stem.h

new file mode 100644 (file)

index 0000000..7dc26d4


--- /dev/null
+++ b/contrib/tsearch2/snowball/russian_stem.h
@@ -0,0 +1,8 @@
+
+/* This file was generated automatically by the Snowball to ANSI C compiler */
+
+extern struct SN_env * russian_create_env(void);
+extern void russian_close_env(struct SN_env * z);
+
+extern int russian_stem(struct SN_env * z);
+


diff --git a/contrib/tsearch2/snowball/utilities.c b/contrib/tsearch2/snowball/utilities.c

new file mode 100644 (file)

index 0000000..5dc7524


--- /dev/null
+++ b/contrib/tsearch2/snowball/utilities.c
@@ -0,0 +1,328 @@
+
+#include 
+#include 
+#include 
+
+#include "header.h"
+
+#define unless(C) if(!(C))
+
+#define CREATE_SIZE 1
+
+extern symbol * create_s(void)
+{   symbol * p = (symbol *) (HEAD + (char *) malloc(HEAD + (CREATE_SIZE + 1) * sizeof(symbol)));
+    CAPACITY(p) = CREATE_SIZE;
+    SET_SIZE(p, CREATE_SIZE);
+    return p;
+}
+
+extern void lose_s(symbol * p) { free((char *) p - HEAD); }
+
+extern int in_grouping(struct SN_env * z, unsigned char * s, int min, int max)
+{   if (z->c >= z->l) return 0;
+    {   int ch = z->p[z->c];
+        if
+        (ch > max || (ch -= min) < 0 ||
+         (s[ch >> 3] & (0X1 << (ch & 0X7))) == 0) return 0;
+    }
+    z->c++; return 1;
+}
+
+extern int in_grouping_b(struct SN_env * z, unsigned char * s, int min, int max)
+{   if (z->c <= z->lb) return 0;
+    {   int ch = z->p[z->c - 1];
+        if
+        (ch > max || (ch -= min) < 0 ||
+         (s[ch >> 3] & (0X1 << (ch & 0X7))) == 0) return 0;
+    }
+    z->c--; return 1;
+}
+
+extern int out_grouping(struct SN_env * z, unsigned char * s, int min, int max)
+{   if (z->c >= z->l) return 0;
+    {   int ch = z->p[z->c];
+        unless
+        (ch > max || (ch -= min) < 0 ||
+         (s[ch >> 3] & (0X1 << (ch & 0X7))) == 0) return 0;
+    }
+    z->c++; return 1;
+}
+
+extern int out_grouping_b(struct SN_env * z, unsigned char * s, int min, int max)
+{   if (z->c <= z->lb) return 0;
+    {   int ch = z->p[z->c - 1];
+        unless
+        (ch > max || (ch -= min) < 0 ||
+         (s[ch >> 3] & (0X1 << (ch & 0X7))) == 0) return 0;
+    }
+    z->c--; return 1;
+}
+
+
+extern int in_range(struct SN_env * z, int min, int max)
+{   if (z->c >= z->l) return 0;
+    {   int ch = z->p[z->c];
+        if
+        (ch > max || ch < min) return 0;
+    }
+    z->c++; return 1;
+}
+
+extern int in_range_b(struct SN_env * z, int min, int max)
+{   if (z->c <= z->lb) return 0;
+    {   int ch = z->p[z->c - 1];
+        if
+        (ch > max || ch < min) return 0;
+    }
+    z->c--; return 1;
+}
+
+extern int out_range(struct SN_env * z, int min, int max)
+{   if (z->c >= z->l) return 0;
+    {   int ch = z->p[z->c];
+        unless
+        (ch > max || ch < min) return 0;
+    }
+    z->c++; return 1;
+}
+
+extern int out_range_b(struct SN_env * z, int min, int max)
+{   if (z->c <= z->lb) return 0;
+    {   int ch = z->p[z->c - 1];
+        unless
+        (ch > max || ch < min) return 0;
+    }
+    z->c--; return 1;
+}
+
+extern int eq_s(struct SN_env * z, int s_size, symbol * s)
+{   if (z->l - z->c < s_size ||
+        memcmp(z->p + z->c, s, s_size * sizeof(symbol)) != 0) return 0;
+    z->c += s_size; return 1;
+}
+
+extern int eq_s_b(struct SN_env * z, int s_size, symbol * s)
+{   if (z->c - z->lb < s_size ||
+        memcmp(z->p + z->c - s_size, s, s_size * sizeof(symbol)) != 0) return 0;
+    z->c -= s_size; return 1;
+}
+
+extern int eq_v(struct SN_env * z, symbol * p)
+{   return eq_s(z, SIZE(p), p);
+}
+
+extern int eq_v_b(struct SN_env * z, symbol * p)
+{   return eq_s_b(z, SIZE(p), p);
+}
+
+extern int find_among(struct SN_env * z, struct among * v, int v_size)
+{
+    int i = 0;
+    int j = v_size;
+
+    int c = z->c; int l = z->l;
+    symbol * q = z->p + c;
+
+    struct among * w;
+
+    int common_i = 0;
+    int common_j = 0;
+
+    int first_key_inspected = 0;
+
+    while(1)
+    {   int k = i + ((j - i) >> 1);
+        int diff = 0;
+        int common = common_i < common_j ? common_i : common_j; /* smaller */
+        w = v + k;
+        {   int i; for (i = common; i < w->s_size; i++)
+            {   if (c + common == l) { diff = -1; break; }
+                diff = q[common] - w->s[i];
+                if (diff != 0) break;
+                common++;
+            }
+        }
+        if (diff < 0) { j = k; common_j = common; }
+                 else { i = k; common_i = common; }
+        if (j - i <= 1)
+        {   if (i > 0) break; /* v->s has been inspected */
+            if (j == i) break; /* only one item in v */
+
+            /* - but now we need to go round once more to get
+               v->s inspected. This looks messy, but is actually
+               the optimal approach.  */
+
+            if (first_key_inspected) break;
+            first_key_inspected = 1;
+        }
+    }
+    while(1)
+    {   w = v + i;
+        if (common_i >= w->s_size)
+        {   z->c = c + w->s_size;
+            if (w->function == 0) return w->result;
+            {   int res = w->function(z);
+                z->c = c + w->s_size;
+                if (res) return w->result;
+            }
+        }
+        i = w->substring_i;
+        if (i < 0) return 0;
+    }
+}
+
+/* find_among_b is for backwards processing. Same comments apply */
+
+extern int find_among_b(struct SN_env * z, struct among * v, int v_size)
+{
+    int i = 0;
+    int j = v_size;
+
+    int c = z->c; int lb = z->lb;
+    symbol * q = z->p + c - 1;
+
+    struct among * w;
+
+    int common_i = 0;
+    int common_j = 0;
+
+    int first_key_inspected = 0;
+
+    while(1)
+    {   int k = i + ((j - i) >> 1);
+        int diff = 0;
+        int common = common_i < common_j ? common_i : common_j;
+        w = v + k;
+        {   int i; for (i = w->s_size - 1 - common; i >= 0; i--)
+            {   if (c - common == lb) { diff = -1; break; }
+                diff = q[- common] - w->s[i];
+                if (diff != 0) break;
+                common++;
+            }
+        }
+        if (diff < 0) { j = k; common_j = common; }
+                 else { i = k; common_i = common; }
+        if (j - i <= 1)
+        {   if (i > 0) break;
+            if (j == i) break;
+            if (first_key_inspected) break;
+            first_key_inspected = 1;
+        }
+    }
+    while(1)
+    {   w = v + i;
+        if (common_i >= w->s_size)
+        {   z->c = c - w->s_size;
+            if (w->function == 0) return w->result;
+            {   int res = w->function(z);
+                z->c = c - w->s_size;
+                if (res) return w->result;
+            }
+        }
+        i = w->substring_i;
+        if (i < 0) return 0;
+    }
+}
+
+
+extern symbol * increase_size(symbol * p, int n)
+{   int new_size = n + 20;
+    symbol * q = (symbol *) (HEAD + (char *) malloc(HEAD + (new_size + 1) * sizeof(symbol)));
+    CAPACITY(q) = new_size;
+    memmove(q, p, CAPACITY(p) * sizeof(symbol)); lose_s(p); return q;
+}
+
+/* to replace symbols between c_bra and c_ket in z->p by the
+   s_size symbols at s
+*/
+
+extern int replace_s(struct SN_env * z, int c_bra, int c_ket, int s_size, const symbol * s)
+{   int adjustment = s_size - (c_ket - c_bra);
+    int len = SIZE(z->p);
+    if (adjustment != 0)
+    {   if (adjustment + len > CAPACITY(z->p)) z->p = increase_size(z->p, adjustment + len);
+        memmove(z->p + c_ket + adjustment, z->p + c_ket, (len - c_ket) * sizeof(symbol));
+        SET_SIZE(z->p, adjustment + len);
+        z->l += adjustment;
+        if (z->c >= c_ket) z->c += adjustment; else
+            if (z->c > c_bra) z->c = c_bra;
+    }
+    unless (s_size == 0) memmove(z->p + c_bra, s, s_size * sizeof(symbol));
+    return adjustment;
+}
+
+static void slice_check(struct SN_env * z)
+{
+    if (!(0 <= z->bra &&
+          z->bra <= z->ket &&
+          z->ket <= z->l &&
+          z->l <= SIZE(z->p)))   /* this line could be removed */
+    {
+        fprintf(stderr, "faulty slice operation:\n");
+        debug(z, -1, 0);
+        exit(1);
+    }
+}
+
+extern void slice_from_s(struct SN_env * z, int s_size, symbol * s)
+{   slice_check(z);
+    replace_s(z, z->bra, z->ket, s_size, s);
+}
+
+extern void slice_from_v(struct SN_env * z, symbol * p)
+{   slice_from_s(z, SIZE(p), p);
+}
+
+extern void slice_del(struct SN_env * z)
+{   slice_from_s(z, 0, 0);
+}
+
+extern void insert_s(struct SN_env * z, int bra, int ket, int s_size, symbol * s)
+{   int adjustment = replace_s(z, bra, ket, s_size, s);
+    if (bra <= z->bra) z->bra += adjustment;
+    if (bra <= z->ket) z->ket += adjustment;
+}
+
+extern void insert_v(struct SN_env * z, int bra, int ket, symbol * p)
+{   int adjustment = replace_s(z, bra, ket, SIZE(p), p);
+    if (bra <= z->bra) z->bra += adjustment;
+    if (bra <= z->ket) z->ket += adjustment;
+}
+
+extern symbol * slice_to(struct SN_env * z, symbol * p)
+{   slice_check(z);
+    {   int len = z->ket - z->bra;
+        if (CAPACITY(p) < len) p = increase_size(p, len);
+        memmove(p, z->p + z->bra, len * sizeof(symbol));
+        SET_SIZE(p, len);
+    }
+    return p;
+}
+
+extern symbol * assign_to(struct SN_env * z, symbol * p)
+{   int len = z->l;
+    if (CAPACITY(p) < len) p = increase_size(p, len);
+    memmove(p, z->p, len * sizeof(symbol));
+    SET_SIZE(p, len);
+    return p;
+}
+
+extern void debug(struct SN_env * z, int number, int line_count)
+{   int i;
+    int limit = SIZE(z->p);
+    /*if (number >= 0) printf("%3d (line %4d): '", number, line_count);*/
+    if (number >= 0) printf("%3d (line %4d): [%d]'", number, line_count,limit);
+    for (i = 0; i <= limit; i++)
+    {   if (z->lb == i) printf("{");
+        if (z->bra == i) printf("[");
+        if (z->c == i) printf("|");
+        if (z->ket == i) printf("]");
+        if (z->l == i) printf("}");
+        if (i < limit)
+        {   int ch = z->p[i];
+            if (ch == 0) ch = '#';
+            printf("%c", ch);
+        }
+    }
+    printf("'\n");
+}


diff --git a/contrib/tsearch2/sql/tsearch2.sql b/contrib/tsearch2/sql/tsearch2.sql

new file mode 100644 (file)

index 0000000..6ca6480


--- /dev/null
+++ b/contrib/tsearch2/sql/tsearch2.sql
@@ -0,0 +1,243 @@
+--
+-- first, define the datatype.  Turn off echoing so that expected file
+-- does not depend on contents of seg.sql.
+--
+\set ECHO none
+\i tsearch2.sql
+\set ECHO all
+
+--tsvector
+SELECT '1'::tsvector;
+SELECT '1 '::tsvector;
+SELECT ' 1'::tsvector;
+SELECT ' 1 '::tsvector;
+SELECT '1 2'::tsvector;
+SELECT '\'1 2\''::tsvector;
+SELECT '\'1 \\\'2\''::tsvector;
+SELECT '\'1 \\\'2\'3'::tsvector;
+SELECT '\'1 \\\'2\' 3'::tsvector;
+SELECT '\'1 \\\'2\' \' 3\' 4 '::tsvector;
+select '\'w\':4A,3B,2C,1D,5 a:8';
+select 'a:3A b:2a'::tsvector || 'ba:1234 a:1B';
+select setweight('w:12B w:13* w:12,5,6 a:1,3* a:3 w asd:1dc asd zxc:81,567,222A'::tsvector, 'c');
+select strip('w:12B w:13* w:12,5,6 a:1,3* a:3 w asd:1dc asd'::tsvector);
+
+
+--tsquery
+SELECT '1'::tsquery;
+SELECT '1 '::tsquery;
+SELECT ' 1'::tsquery;
+SELECT ' 1 '::tsquery;
+SELECT '\'1 2\''::tsquery;
+SELECT '\'1 \\\'2\''::tsquery;
+SELECT '!1'::tsquery;
+SELECT '1|2'::tsquery;
+SELECT '1|!2'::tsquery;
+SELECT '!1|2'::tsquery;
+SELECT '!1|!2'::tsquery;
+SELECT '!(!1|!2)'::tsquery;
+SELECT '!(!1|2)'::tsquery;
+SELECT '!(1|!2)'::tsquery;
+SELECT '!(1|2)'::tsquery;
+SELECT '1&2'::tsquery;
+SELECT '!1&2'::tsquery;
+SELECT '1&!2'::tsquery;
+SELECT '!1&!2'::tsquery;
+SELECT '(1&2)'::tsquery;
+SELECT '1&(2)'::tsquery;
+SELECT '!(1)&2'::tsquery;
+SELECT '!(1&2)'::tsquery;
+SELECT '1|2&3'::tsquery;
+SELECT '1|(2&3)'::tsquery;
+SELECT '(1|2)&3'::tsquery;
+SELECT '1|2&!3'::tsquery;
+SELECT '1|!2&3'::tsquery;
+SELECT '!1|2&3'::tsquery;
+SELECT '!1|(2&3)'::tsquery;
+SELECT '!(1|2)&3'::tsquery;
+SELECT '(!1|2)&3'::tsquery;
+SELECT '1|(2|(4|(5|6)))'::tsquery;
+SELECT '1|2|4|5|6'::tsquery;
+SELECT '1&(2&(4&(5&6)))'::tsquery;
+SELECT '1&2&4&5&6'::tsquery;
+SELECT '1&(2&(4&(5|6)))'::tsquery;
+SELECT '1&(2&(4&(5|!6)))'::tsquery;
+SELECT '1&(\'2\'&(\' 4\'&(\\|5 | \'6 \\\' !|&\')))'::tsquery;
+SELECT '\'the wether\':dc & \' sKies \':BC & a:d b:a';
+
+select lexize('simple', 'ASD56 hsdkf');
+select lexize('en_stem', 'SKIES Problems identity');
+
+select * from token_type('default');
+select * from parse('default', '345 [email protected] \' http://www.com/ http://aew.werc.ewr/?ad=qwe&dw 1aew.werc.ewr/?ad=qwe&dw 2aew.werc.ewr http://3aew.werc.ewr/?ad=qwe&dw http://4aew.werc.ewr http://5aew.werc.ewr:8100/?  ad=qwe&dw 6aew.werc.ewr:8100/?ad=qwe&dw 7aew.werc.ewr:8100/?ad=qwe&dw=%20%32 +4.0e-10 qwe qwe qwqwe 234.435 455 5.005 [email protected] qwe-wer asdf qwer jf sdjk ewr1> ewri2 ">
+/usr/local/fff /awdf/dwqe/4325 rewt/ewr wefjn /wqe-324/ewr gist.h gist.h.c gist.c. readline 4.2 4.2. 4.2, readline-4.2 readline-4.2. 234 
+ wow  < jqw <> qwerty');
+
+SELECT to_tsvector('default', '345 [email protected] \' http://www.com/ http://aew.werc.ewr/?ad=qwe&dw 1aew.werc.ewr/?ad=qwe&dw 2aew.werc.ewr http://3aew.werc.ewr/?ad=qwe&dw http://4aew.werc.ewr http://5aew.werc.ewr:8100/?  ad=qwe&dw 6aew.werc.ewr:8100/?ad=qwe&dw 7aew.werc.ewr:8100/?ad=qwe&dw=%20%32 +4.0e-10 qwe qwe qwqwe 234.435 455 5.005 [email protected] qwe-wer asdf qwer jf sdjk ewr1> ewri2 ">
+/usr/local/fff /awdf/dwqe/4325 rewt/ewr wefjn /wqe-324/ewr gist.h gist.h.c gist.c. readline 4.2 4.2. 4.2, readline-4.2 readline-4.2. 234 
+ wow  < jqw <> qwerty');
+
+SELECT length(to_tsvector('default', '345 qw'));
+
+SELECT length(to_tsvector('default', '345 [email protected] \' http://www.com/ http://aew.werc.ewr/?ad=qwe&dw 1aew.werc.ewr/?ad=qwe&dw 2aew.werc.ewr http://3aew.werc.ewr/?ad=qwe&dw http://4aew.werc.ewr http://5aew.werc.ewr:8100/?  ad=qwe&dw 6aew.werc.ewr:8100/?ad=qwe&dw 7aew.werc.ewr:8100/?ad=qwe&dw=%20%32 +4.0e-10 qwe qwe qwqwe 234.435 455 5.005 [email protected] qwe-wer asdf qwer jf sdjk ewr1> ewri2 ">
+/usr/local/fff /awdf/dwqe/4325 rewt/ewr wefjn /wqe-324/ewr gist.h gist.h.c gist.c. readline 4.2 4.2. 4.2, readline-4.2 readline-4.2. 234 
+ wow  < jqw <> qwerty'));
+
+
+select to_tsquery('default', 'qwe & sKies '); 
+select to_tsquery('simple', 'qwe & sKies '); 
+select to_tsquery('default', '\'the wether\':dc & \'           sKies \':BC ');
+select 'a b:89  ca:23A,64b d:34c'::tsvector @@ 'd:AC & ca';
+select 'a b:89  ca:23A,64b d:34c'::tsvector @@ 'd:AC & ca:B';
+select 'a b:89  ca:23A,64b d:34c'::tsvector @@ 'd:AC & ca:A';
+select 'a b:89  ca:23A,64b d:34c'::tsvector @@ 'd:AC & ca:C';
+select 'a b:89  ca:23A,64b d:34c'::tsvector @@ 'd:AC & ca:CB';
+
+CREATE TABLE test_tsvector( t text, a tsvector );
+
+\copy test_tsvector from 'data/test_tsearch.data'
+
+SELECT count(*) FROM test_tsvector WHERE a @@ 'wr|qh';
+SELECT count(*) FROM test_tsvector WHERE a @@ 'wr&qh';
+SELECT count(*) FROM test_tsvector WHERE a @@ 'eq&yt';
+SELECT count(*) FROM test_tsvector WHERE a @@ 'eq|yt';
+SELECT count(*) FROM test_tsvector WHERE a @@ '(eq&yt)|(wr&qh)';
+SELECT count(*) FROM test_tsvector WHERE a @@ '(eq|yt)&(wr|qh)';
+
+create index wowidx on test_tsvector using gist (a);
+set enable_seqscan=off;
+
+SELECT count(*) FROM test_tsvector WHERE a @@ 'wr|qh';
+SELECT count(*) FROM test_tsvector WHERE a @@ 'wr&qh';
+SELECT count(*) FROM test_tsvector WHERE a @@ 'eq&yt';
+SELECT count(*) FROM test_tsvector WHERE a @@ 'eq|yt';
+SELECT count(*) FROM test_tsvector WHERE a @@ '(eq&yt)|(wr&qh)';
+SELECT count(*) FROM test_tsvector WHERE a @@ '(eq|yt)&(wr|qh)';
+
+select set_curcfg('default');
+
+CREATE TRIGGER tsvectorupdate
+BEFORE UPDATE OR INSERT ON test_tsvector
+FOR EACH ROW EXECUTE PROCEDURE tsearch2(a, t);
+
+SELECT count(*) FROM test_tsvector WHERE a @@ to_tsquery('345&qwerty');
+
+INSERT INTO test_tsvector (t) VALUES ('345 qwerty');
+
+SELECT count(*) FROM test_tsvector WHERE a @@ to_tsquery('345&qwerty');
+
+UPDATE test_tsvector SET t = null WHERE t = '345 qwerty';
+
+SELECT count(*) FROM test_tsvector WHERE a @@ to_tsquery('345&qwerty');
+
+drop trigger tsvectorupdate on test_tsvector;
+create function wow(text) returns text as 'select $1 || \' copyright\'; ' language sql;
+create trigger tsvectorupdate before update or insert on test_tsvector
+for each row execute procedure tsearch2(a, wow, t);
+insert into test_tsvector (t) values ('345 qwerty');
+select count(*) FROM test_tsvector WHERE a @@ to_tsquery('345&qwerty');
+select count(*) FROM test_tsvector WHERE a @@ to_tsquery('copyright');
+
+select rank(' a:1 s:2C d g'::tsvector, 'a | s');
+select rank(' a:1 s:2B d g'::tsvector, 'a | s');
+select rank(' a:1 s:2 d g'::tsvector, 'a | s');
+select rank(' a:1 s:2C d g'::tsvector, 'a & s');
+select rank(' a:1 s:2B d g'::tsvector, 'a & s');
+select rank(' a:1 s:2 d g'::tsvector, 'a & s');
+
+insert into test_tsvector (t) values ('foo bar foo the over foo qq bar');
+select * from stat('select a from test_tsvector') order by ndoc desc, nentry desc, word;
+
+select reset_tsearch();
+select to_tsquery('default', 'skies & books');
+
+select rank_cd(to_tsvector('Erosion It took the sea a thousand years,
+A thousand years to trace
+The granite features of this cliff
+In crag and scarp and base.
+It took the sea an hour one night
+An hour of storm to place
+The sculpture of these granite seams,
+Upon a woman s face. E.  J.  Pratt  (1882 1964)
+'), to_tsquery('sea&thousand&years'));
+
+select rank_cd(to_tsvector('Erosion It took the sea a thousand years,
+A thousand years to trace
+The granite features of this cliff
+In crag and scarp and base.
+It took the sea an hour one night
+An hour of storm to place
+The sculpture of these granite seams,
+Upon a woman s face. E.  J.  Pratt  (1882 1964)
+'), to_tsquery('granite&sea'));
+
+select rank_cd(to_tsvector('Erosion It took the sea a thousand years,
+A thousand years to trace
+The granite features of this cliff
+In crag and scarp and base.
+It took the sea an hour one night
+An hour of storm to place
+The sculpture of these granite seams,
+Upon a woman s face. E.  J.  Pratt  (1882 1964)
+'), to_tsquery('sea'));
+
+select get_covers(to_tsvector('Erosion It took the sea a thousand years,
+A thousand years to trace
+The granite features of this cliff
+In crag and scarp and base.
+It took the sea an hour one night
+An hour of storm to place
+The sculpture of these granite seams,
+Upon a woman s face. E.  J.  Pratt  (1882 1964)
+'), to_tsquery('sea&thousand&years'));
+
+select get_covers(to_tsvector('Erosion It took the sea a thousand years,
+A thousand years to trace
+The granite features of this cliff
+In crag and scarp and base.
+It took the sea an hour one night
+An hour of storm to place
+The sculpture of these granite seams,
+Upon a woman s face. E.  J.  Pratt  (1882 1964)
+'), to_tsquery('granite&sea'));
+
+select get_covers(to_tsvector('Erosion It took the sea a thousand years,
+A thousand years to trace
+The granite features of this cliff
+In crag and scarp and base.
+It took the sea an hour one night
+An hour of storm to place
+The sculpture of these granite seams,
+Upon a woman s face. E.  J.  Pratt  (1882 1964)
+'), to_tsquery('sea'));
+
+select headline('Erosion It took the sea a thousand years,
+A thousand years to trace
+The granite features of this cliff
+In crag and scarp and base.
+It took the sea an hour one night
+An hour of storm to place
+The sculpture of these granite seams,
+Upon a woman s face. E.  J.  Pratt  (1882 1964)
+', to_tsquery('sea&thousand&years'));
+ 
+select headline('Erosion It took the sea a thousand years,
+A thousand years to trace
+The granite features of this cliff
+In crag and scarp and base.
+It took the sea an hour one night
+An hour of storm to place
+The sculpture of these granite seams,
+Upon a woman s face. E.  J.  Pratt  (1882 1964)
+', to_tsquery('granite&sea'));
+ 
+select headline('Erosion It took the sea a thousand years,
+A thousand years to trace
+The granite features of this cliff
+In crag and scarp and base.
+It took the sea an hour one night
+An hour of storm to place
+The sculpture of these granite seams,
+Upon a woman s face. E.  J.  Pratt  (1882 1964)
+', to_tsquery('sea'));
+


diff --git a/contrib/tsearch2/stopword.c b/contrib/tsearch2/stopword.c

new file mode 100644 (file)

index 0000000..7f7806f


--- /dev/null
+++ b/contrib/tsearch2/stopword.c
@@ -0,0 +1,101 @@
+/* 
+ * stopword library
+ * Teodor Sigaev 
+ */
+#include 
+#include 
+#include 
+#include 
+
+#include "postgres.h"
+#include "common.h"
+#include "dict.h"
+
+#define STOPBUFLEN 4096
+
+char*
+lowerstr(char *str) {
+   char *ptr=str;
+   while(*ptr) {
+       *ptr = tolower(*(unsigned char*)ptr);
+       ptr++;
+   }
+   return str;
+}
+
+void
+freestoplist(StopList *s) {
+   char **ptr=s->stop;
+   if ( ptr )
+       while( *ptr && s->len >0 ) {
+           free(*ptr);
+           ptr++; s->len--;
+       free(s->stop);
+   }
+   memset(s,0,sizeof(StopList));
+}
+
+void
+readstoplist(text *in, StopList *s) {
+   char **stop=NULL;
+   s->len=0;
+   if ( in && VARSIZE(in) - VARHDRSZ > 0 ) {
+       char *filename=text2char(in);
+       FILE    *hin=NULL;
+       char    buf[STOPBUFLEN];
+       int reallen=0;
+
+       if ( (hin=fopen(filename,"r")) == NULL )
+           elog(ERROR,"Can't open file '%s': %s", filename, strerror(errno));
+       while( fgets(buf,STOPBUFLEN,hin) ) {
+           buf[strlen(buf)-1] = '\0';
+           if ( *buf=='\0' ) continue;
+
+           if ( s->len>= reallen ) {
+               char **tmp;
+               reallen=(reallen) ? reallen*2 : 16;
+               tmp=(char**)realloc((void*)stop, sizeof(char*)*reallen);
+               if (!tmp) {
+                   freestoplist(s);
+                   fclose(hin); 
+                   elog(ERROR,"Not enough memory");
+               }
+               stop=tmp;
+           }
+    
+           stop[s->len]=strdup(buf);
+           if ( !stop[s->len] ) {
+               freestoplist(s);
+               fclose(hin); 
+               elog(ERROR,"Not enough memory");
+           }
+           if ( s->wordop ) 
+               stop[s->len]=(s->wordop)(stop[s->len]);
+
+           (s->len)++; 
+       }
+       fclose(hin);
+       pfree(filename); 
+   }
+   s->stop=stop;
+} 
+
+static int
+comparestr(const void *a, const void *b) {
+   return strcmp( *(char**)a, *(char**)b );
+}
+
+void
+sortstoplist(StopList *s) {
+   if (s->stop && s->len>0)
+       qsort(s->stop, s->len, sizeof(char*), comparestr);
+}
+
+bool
+searchstoplist(StopList *s, char *key) {
+   if ( s->wordop ) 
+       key=(*(s->wordop))(key);
+   return ( s->stop && s->len>0 && bsearch(&key, s->stop, s->len, sizeof(char*), comparestr) ) ? true : false;
+}
+
+


diff --git a/contrib/tsearch2/stopword/english.stop b/contrib/tsearch2/stopword/english.stop

new file mode 100644 (file)

index 0000000..a913011


--- /dev/null
+++ b/contrib/tsearch2/stopword/english.stop
@@ -0,0 +1,128 @@
+i
+me
+my
+myself
+we
+our
+ours
+ourselves
+you
+your
+yours
+yourself
+yourselves
+he
+him
+his
+himself
+she
+her
+hers
+herself
+it
+its
+itself
+they
+them
+their
+theirs
+themselves
+what
+which
+who
+whom
+this
+that
+these
+those
+am
+is
+are
+was
+were
+be
+been
+being
+have
+has
+had
+having
+do
+does
+did
+doing
+a
+an
+the
+and
+but
+if
+or
+because
+as
+until
+while
+of
+at
+by
+for
+with
+about
+against
+between
+into
+through
+during
+before
+after
+above
+below
+to
+from
+up
+down
+in
+out
+on
+off
+over
+under
+again
+further
+then
+once
+here
+there
+when
+where
+why
+how
+all
+any
+both
+each
+few
+more
+most
+other
+some
+such
+no
+nor
+not
+only
+own
+same
+so
+than
+too
+very
+s
+t
+can
+will
+just
+don
+should
+now
+


diff --git a/contrib/tsearch2/stopword/russian.stop b/contrib/tsearch2/stopword/russian.stop

new file mode 100644 (file)

index 0000000..1877e3a


--- /dev/null
+++ b/contrib/tsearch2/stopword/russian.stop
@@ -0,0 +1,151 @@
+É
+×
+×Ï
+ÎÅ
+ÞÔÏ
+ÏÎ
+ÎÁ
+Ñ
+Ó
+ÓÏ
+ËÁË
+Á
+ÔÏ
+×ÓÅ
+ÏÎÁ
+ÔÁË
+ÅÇÏ
+ÎÏ
+ÄÁ
+ÔÙ
+Ë
+Õ
+ÖÅ
+×Ù
+ÚÁ
+ÂÙ
+ÐÏ
+ÔÏÌØËÏ
+ÅÅ
+ÍÎÅ
+ÂÙÌÏ
+×ÏÔ
+ÏÔ
+ÍÅÎÑ
+ÅÝÅ
+ÎÅÔ
+Ï
+ÉÚ
+ÅÍÕ
+ÔÅÐÅÒØ
+ËÏÇÄÁ
+ÄÁÖÅ
+ÎÕ
+×ÄÒÕÇ
+ÌÉ
+ÅÓÌÉ
+ÕÖÅ
+ÉÌÉ
+ÎÉ
+ÂÙÔØ
+ÂÙÌ
+ÎÅÇÏ
+ÄÏ
+×ÁÓ
+ÎÉÂÕÄØ
+ÏÐÑÔØ
+ÕÖ
+×ÁÍ
+×ÅÄØ
+ÔÁÍ
+ÐÏÔÏÍ
+ÓÅÂÑ
+ÎÉÞÅÇÏ
+ÅÊ
+ÍÏÖÅÔ
+ÏÎÉ
+ÔÕÔ
+ÇÄÅ
+ÅÓÔØ
+ÎÁÄÏ
+ÎÅÊ
+ÄÌÑ
+ÍÙ
+ÔÅÂÑ
+ÉÈ
+ÞÅÍ
+ÂÙÌÁ
+ÓÁÍ
+ÞÔÏÂ
+ÂÅÚ
+ÂÕÄÔÏ
+ÞÅÇÏ
+ÒÁÚ
+ÔÏÖÅ
+ÓÅÂÅ
+ÐÏÄ
+ÂÕÄÅÔ
+Ö
+ÔÏÇÄÁ
+ËÔÏ
+ÜÔÏÔ
+ÔÏÇÏ
+ÐÏÔÏÍÕ
+ÜÔÏÇÏ
+ËÁËÏÊ
+ÓÏ×ÓÅÍ
+ÎÉÍ
+ÚÄÅÓØ
+ÜÔÏÍ
+ÏÄÉÎ
+ÐÏÞÔÉ
+ÍÏÊ
+ÔÅÍ
+ÞÔÏÂÙ
+ÎÅÅ
+ÓÅÊÞÁÓ
+ÂÙÌÉ
+ËÕÄÁ
+ÚÁÞÅÍ
+×ÓÅÈ
+ÎÉËÏÇÄÁ
+ÍÏÖÎÏ
+ÐÒÉ
+ÎÁËÏÎÅÃ
+Ä×Á
+ÏÂ
+ÄÒÕÇÏÊ
+ÈÏÔØ
+ÐÏÓÌÅ
+ÎÁÄ
+ÂÏÌØÛÅ
+ÔÏÔ
+ÞÅÒÅÚ
+ÜÔÉ
+ÎÁÓ
+ÐÒÏ
+×ÓÅÇÏ
+ÎÉÈ
+ËÁËÁÑ
+ÍÎÏÇÏ
+ÒÁÚ×Å
+ÔÒÉ
+ÜÔÕ
+ÍÏÑ
+×ÐÒÏÞÅÍ
+ÈÏÒÏÛÏ
+Ó×ÏÀ
+ÜÔÏÊ
+ÐÅÒÅÄ
+ÉÎÏÇÄÁ
+ÌÕÞÛÅ
+ÞÕÔØ
+ÔÏÍ
+ÎÅÌØÚÑ
+ÔÁËÏÊ
+ÉÍ
+ÂÏÌÅÅ
+×ÓÅÇÄÁ
+ËÏÎÅÞÎÏ
+×ÓÀ
+ÍÅÖÄÕ


diff --git a/contrib/tsearch2/ts_cfg.c b/contrib/tsearch2/ts_cfg.c

new file mode 100644 (file)

index 0000000..7c9f20c


--- /dev/null
+++ b/contrib/tsearch2/ts_cfg.c
@@ -0,0 +1,509 @@
+/* 
+ * interface functions to tscfg 
+ * Teodor Sigaev 
+ */
+#include 
+#include 
+#include 
+#include 
+#include 
+
+#include "postgres.h"
+#include "fmgr.h"
+#include "utils/array.h"
+#include "catalog/pg_type.h"
+#include "executor/spi.h"
+
+#include "ts_cfg.h"
+#include "dict.h"
+#include "wparser.h"
+#include "snmap.h"
+#include "common.h"
+#include "tsvector.h"
+
+/*********top interface**********/
+
+static void *plan_getcfg_bylocale=NULL;
+static void *plan_getcfg=NULL;
+static void *plan_getmap=NULL;
+static void *plan_name2id=NULL;
+static Oid current_cfg_id=0;
+
+void
+init_cfg(Oid id, TSCfgInfo *cfg) {
+   Oid arg[2]={ OIDOID, OIDOID };
+   bool isnull;
+   Datum pars[2]={ ObjectIdGetDatum(id), ObjectIdGetDatum(id) } ;
+   int stat,i,j;
+   text *ptr;
+   text *prsname=NULL;
+   MemoryContext   oldcontext;
+
+   memset(cfg,0,sizeof(TSCfgInfo));
+   SPI_connect();
+   if ( !plan_getcfg ) {
+       plan_getcfg = SPI_saveplan( SPI_prepare( "select prs_name from pg_ts_cfg where oid = $1" , 1, arg ) );
+       if ( !plan_getcfg ) 
+           ts_error(ERROR, "SPI_prepare() failed");
+   }
+
+   stat = SPI_execp(plan_getcfg, pars, " ", 1);
+   if ( stat < 0 )
+       ts_error (ERROR, "SPI_execp return %d", stat);
+   if ( SPI_processed > 0 ) {
+       prsname = (text*) DatumGetPointer( 
+           SPI_getbinval(SPI_tuptable->vals[0], SPI_tuptable->tupdesc, 1, &isnull) 
+       );
+       oldcontext = MemoryContextSwitchTo(TopMemoryContext);
+       prsname = ptextdup( prsname );
+       MemoryContextSwitchTo(oldcontext);
+       
+       cfg->id=id;
+   } else 
+       ts_error(ERROR, "No tsearch cfg with id %d", id);
+
+   arg[0]=TEXTOID;
+   if ( !plan_getmap ) {
+       plan_getmap = SPI_saveplan( SPI_prepare( "select lt.tokid, pg_ts_cfgmap.dict_name from pg_ts_cfgmap, pg_ts_cfg, token_type( $1 ) as lt where lt.alias = pg_ts_cfgmap.tok_alias and pg_ts_cfgmap.ts_name = pg_ts_cfg.ts_name and pg_ts_cfg.oid= $2 order by lt.tokid desc;" , 2, arg ) );
+       if ( !plan_getmap )
+           ts_error(ERROR, "SPI_prepare() failed");
+   }
+
+   pars[0]=PointerGetDatum( prsname );
+   stat = SPI_execp(plan_getmap, pars, " ", 0);
+   if ( stat < 0 )
+       ts_error (ERROR, "SPI_execp return %d", stat);
+   if ( SPI_processed <= 0 )
+       ts_error(ERROR, "No parser with id %d", id);
+
+   for(i=0;i
+       int lexid = DatumGetInt32(SPI_getbinval(SPI_tuptable->vals[i], SPI_tuptable->tupdesc, 1, &isnull));
+       ArrayType *toasted_a = (ArrayType*)PointerGetDatum(SPI_getbinval(SPI_tuptable->vals[i], SPI_tuptable->tupdesc, 2, &isnull));
+       ArrayType *a;
+
+       if ( !cfg->map ) {
+           cfg->len=lexid+1;
+           cfg->map = (ListDictionary*)malloc( sizeof(ListDictionary)*cfg->len );
+           if ( !cfg->map )
+               ts_error(ERROR,"No memory");
+           memset( cfg->map, 0, sizeof(ListDictionary)*cfg->len );
+       }
+
+       if (isnull)
+           continue;
+
+       a=(ArrayType*)PointerGetDatum( PG_DETOAST_DATUM( DatumGetPointer(toasted_a) ) );
+       
+       if ( ARR_NDIM(a) != 1 )
+           ts_error(ERROR,"Wrong dimension");
+       if ( ARRNELEMS(a) < 1 )
+           continue;
+
+       cfg->map[lexid].len=ARRNELEMS(a);
+       cfg->map[lexid].dict_id=(Datum*)malloc( sizeof(Datum)*cfg->map[lexid].len );
+       memset(cfg->map[lexid].dict_id,0,sizeof(Datum)*cfg->map[lexid].len );
+       ptr=(text*)ARR_DATA_PTR(a);
+       oldcontext = MemoryContextSwitchTo(TopMemoryContext);
+       for(j=0;jmap[lexid].len;j++) {
+           cfg->map[lexid].dict_id[j] = PointerGetDatum(ptextdup(ptr));
+           ptr=NEXTVAL(ptr);
+       } 
+       MemoryContextSwitchTo(oldcontext);
+
+       if ( a != toasted_a ) 
+           pfree(a);
+   }
+   
+   SPI_finish();
+   cfg->prs_id = name2id_prs( prsname );
+   pfree(prsname);
+   for(i=0;ilen;i++) {
+       for(j=0;jmap[i].len;j++) {
+           ptr = (text*)DatumGetPointer( cfg->map[i].dict_id[j] );
+           cfg->map[i].dict_id[j] = ObjectIdGetDatum( name2id_dict(ptr) );
+           pfree(ptr);
+       }
+   }
+}
+
+typedef struct {
+   TSCfgInfo   *last_cfg;
+   int     len;
+   int     reallen;
+   TSCfgInfo   *list;
+   SNMap       name2id_map;
+} CFGList;
+
+static CFGList CList = {NULL,0,0,NULL,{0,0,NULL}};
+
+void
+reset_cfg(void) {
+        freeSNMap( &(CList.name2id_map) );
+        if ( CList.list ) {
+       int i,j;
+       for(i=0;i
+           if ( CList.list[i].map ) {
+               for(j=0;j
+                   if ( CList.list[i].map[j].dict_id )
+                       free(CList.list[i].map[j].dict_id);
+               free( CList.list[i].map );
+           }
+                free(CList.list);
+   }
+        memset(&CList,0,sizeof(CFGList));
+}
+
+static int
+comparecfg(const void *a, const void *b) {
+   return ((TSCfgInfo*)a)->id - ((TSCfgInfo*)b)->id;
+}
+
+TSCfgInfo *
+findcfg(Oid id) {
+   /* last used cfg */
+   if ( CList.last_cfg && CList.last_cfg->id==id )
+       return CList.last_cfg;
+
+   /* already used cfg */
+   if ( CList.len != 0 ) {
+       TSCfgInfo key;
+       key.id=id;
+       CList.last_cfg = bsearch(&key, CList.list, CList.len, sizeof(TSCfgInfo), comparecfg);
+       if ( CList.last_cfg != NULL )
+           return CList.last_cfg;
+   }
+
+   /* last chance */
+   if ( CList.len==CList.reallen ) {
+       TSCfgInfo *tmp;
+       int reallen = ( CList.reallen ) ? 2*CList.reallen : 16;
+       tmp=(TSCfgInfo*)realloc(CList.list,sizeof(TSCfgInfo)*reallen);
+       if ( !tmp ) 
+           ts_error(ERROR,"No memory");
+       CList.reallen=reallen;
+       CList.list=tmp;
+   }
+   CList.last_cfg=&(CList.list[CList.len]);
+   init_cfg(id, CList.last_cfg);
+   CList.len++;
+   qsort(CList.list, CList.len, sizeof(TSCfgInfo), comparecfg);
+   return findcfg(id); /* qsort changed order!! */;
+}
+
+
+Oid
+name2id_cfg(text *name) {
+   Oid arg[1]={ TEXTOID };
+   bool isnull;
+   Datum pars[1]={ PointerGetDatum(name) };
+   int stat;
+   Oid id=findSNMap_t( &(CList.name2id_map), name );
+   
+   if ( id ) 
+       return id;
+   
+   SPI_connect();
+   if ( !plan_name2id ) {
+       plan_name2id = SPI_saveplan( SPI_prepare( "select oid from pg_ts_cfg where ts_name = $1" , 1, arg ) );
+       if ( !plan_name2id ) 
+           elog(ERROR, "SPI_prepare() failed");
+   }
+
+   stat = SPI_execp(plan_name2id, pars, " ", 1);
+   if ( stat < 0 )
+       elog (ERROR, "SPI_execp return %d", stat);
+   if ( SPI_processed > 0 ) {
+       id=DatumGetObjectId( SPI_getbinval(SPI_tuptable->vals[0], SPI_tuptable->tupdesc, 1, &isnull) );
+       if ( isnull ) 
+           elog(ERROR, "Null id for tsearch config");
+   } else 
+       elog(ERROR, "No tsearch config");
+   SPI_finish();
+   addSNMap_t( &(CList.name2id_map), name, id );
+   return id;
+}
+
+
+void 
+parsetext_v2(TSCfgInfo *cfg, PRSTEXT * prs, char *buf, int4 buflen) {
+   int type, lenlemm, i;
+   char    *lemm=NULL;
+   WParserInfo *prsobj = findprs(cfg->prs_id);
+
+   prsobj->prs=(void*)DatumGetPointer(
+       FunctionCall2(
+           &(prsobj->start_info),
+           PointerGetDatum(buf),
+           Int32GetDatum(buflen)
+       )
+   );
+
+   while( ( type=DatumGetInt32(FunctionCall3(
+           &(prsobj->getlexeme_info),
+           PointerGetDatum(prsobj->prs),
+           PointerGetDatum(&lemm),
+           PointerGetDatum(&lenlemm))) ) != 0 ) {
+
+       if ( lenlemm >= MAXSTRLEN )
+           elog(ERROR, "Word is too long");
+
+
+       if ( type >= cfg->len ) /* skip this type of lexem */
+           continue; 
+
+       for(i=0;imap[type].len;i++) {
+           DictInfo    *dict=finddict( DatumGetObjectId(cfg->map[type].dict_id[i]) );
+           char    **norms, **ptr;
+   
+           norms = ptr = (char**)DatumGetPointer(
+               FunctionCall3(
+                   &(dict->lexize_info),
+                   PointerGetDatum(dict->dictionary),
+                   PointerGetDatum(lemm),
+                   PointerGetDatum(lenlemm)
+               )
+           );
+           if ( !norms ) /* dictionary doesn't know this lexem */
+               continue;
+
+           prs->pos++; /*set pos*/
+
+           while( *ptr ) {
+               if (prs->curwords == prs->lenwords) {
+                   prs->lenwords *= 2;
+                   prs->words = (WORD *) repalloc((void *) prs->words, prs->lenwords * sizeof(WORD));
+               }
+
+               prs->words[prs->curwords].len = strlen(*ptr);
+               prs->words[prs->curwords].word = *ptr;
+               prs->words[prs->curwords].alen = 0;
+               prs->words[prs->curwords].pos.pos = LIMITPOS(prs->pos);
+               ptr++;
+               prs->curwords++;
+           }
+           pfree(norms);
+           break; /* lexem already normalized or is stop word*/
+       }
+   }
+
+   FunctionCall1(
+       &(prsobj->end_info),
+       PointerGetDatum(prsobj->prs)
+   );
+}
+
+static void
+hladdword(HLPRSTEXT * prs, char *buf, int4 buflen, int type) {
+   while (prs->curwords >= prs->lenwords) {
+       prs->lenwords *= 2;
+       prs->words = (HLWORD *) repalloc((void *) prs->words, prs->lenwords * sizeof(HLWORD));
+   }
+   memset( &(prs->words[prs->curwords]), 0, sizeof(HLWORD) ); 
+   prs->words[prs->curwords].type = (uint8)type;
+   prs->words[prs->curwords].len = buflen; 
+   prs->words[prs->curwords].word = palloc(buflen);
+   memcpy(prs->words[prs->curwords].word, buf, buflen);
+   prs->curwords++;    
+}
+
+static void
+hlfinditem(HLPRSTEXT * prs, QUERYTYPE *query, char *buf, int buflen ) {
+   int i;
+   ITEM    *item=GETQUERY(query);
+   HLWORD  *word=&( prs->words[prs->curwords-1] );
+
+   while (prs->curwords + query->size >= prs->lenwords) {
+       prs->lenwords *= 2;
+       prs->words = (HLWORD *) repalloc((void *) prs->words, prs->lenwords * sizeof(HLWORD));
+   }
+
+   for(i=0; isize; i++) { 
+       if ( item->type == VAL && item->length == buflen && strncmp( GETOPERAND(query) + item->distance, buf, buflen )==0 ) {
+           if ( word->item ) {
+               memcpy( &(prs->words[prs->curwords]), word, sizeof(HLWORD) );
+               prs->words[prs->curwords].item=item;
+               prs->words[prs->curwords].repeated=1;
+               prs->curwords++;
+           } else 
+               word->item=item;    
+       }
+       item++;
+   }
+}
+
+void 
+hlparsetext(TSCfgInfo *cfg, HLPRSTEXT * prs, QUERYTYPE *query, char *buf, int4 buflen) {
+   int type, lenlemm, i;
+   char    *lemm=NULL;
+   WParserInfo *prsobj = findprs(cfg->prs_id);
+
+   prsobj->prs=(void*)DatumGetPointer(
+       FunctionCall2(
+           &(prsobj->start_info),
+           PointerGetDatum(buf),
+           Int32GetDatum(buflen)
+       )
+   );
+
+   while( ( type=DatumGetInt32(FunctionCall3(
+           &(prsobj->getlexeme_info),
+           PointerGetDatum(prsobj->prs),
+           PointerGetDatum(&lemm),
+           PointerGetDatum(&lenlemm))) ) != 0 ) {
+
+       if ( lenlemm >= MAXSTRLEN )
+           elog(ERROR, "Word is too long");
+
+       hladdword(prs,lemm,lenlemm,type);
+
+       if ( type >= cfg->len ) 
+           continue; 
+
+       for(i=0;imap[type].len;i++) {
+           DictInfo    *dict=finddict( DatumGetObjectId(cfg->map[type].dict_id[i]) );
+           char    **norms, **ptr;
+   
+           norms = ptr = (char**)DatumGetPointer(
+               FunctionCall3(
+                   &(dict->lexize_info),
+                   PointerGetDatum(dict->dictionary),
+                   PointerGetDatum(lemm),
+                   PointerGetDatum(lenlemm)
+               )
+           );
+           if ( !norms ) /* dictionary doesn't know this lexem */
+               continue;
+
+           while( *ptr ) {
+               hlfinditem(prs,query,*ptr,strlen(*ptr));
+               pfree(*ptr);
+               ptr++;
+           }
+           pfree(norms);
+           break; /* lexem already normalized or is stop word*/
+       }
+   }
+
+   FunctionCall1(
+       &(prsobj->end_info),
+       PointerGetDatum(prsobj->prs)
+   );
+}
+
+text* 
+genhl(HLPRSTEXT * prs) {
+   text *out;
+   int len=128;
+   char *ptr;
+   HLWORD  *wrd=prs->words;
+
+   out = (text*)palloc( len );
+   ptr=((char*)out) + VARHDRSZ;
+
+   while( wrd - prs->words < prs->curwords ) {
+       while (  wrd->len + prs->stopsellen + prs->startsellen + (ptr - ((char*)out)) >= len ) {
+           int dist = ptr - ((char*)out);
+           len*= 2;
+           out = (text *) repalloc(out, len);
+           ptr=((char*)out) + dist;
+       }
+
+       if ( wrd->in && !wrd->skip && !wrd->repeated ) {
+           if ( wrd->replace ) {
+               *ptr=' ';
+               ptr++;
+           } else {
+               if (wrd->selected) {
+                   memcpy(ptr,prs->startsel,prs->startsellen);
+                   ptr+=prs->startsellen;
+               }
+               memcpy(ptr,wrd->word,wrd->len);
+               ptr+=wrd->len;
+               if (wrd->selected) {
+                   memcpy(ptr,prs->stopsel,prs->stopsellen);
+                   ptr+=prs->stopsellen;
+               }
+           }
+       }
+
+       if ( !wrd->repeated )
+           pfree(wrd->word);
+
+       wrd++;
+   }
+
+   VARATT_SIZEP(out)=ptr - ((char*)out);
+   return out; 
+}
+
+int  
+get_currcfg(void) {
+   Oid arg[1]={ TEXTOID };
+   const char *curlocale;
+   Datum pars[1];
+   bool isnull;
+   int stat;
+
+   if ( current_cfg_id > 0 )
+       return current_cfg_id;
+
+   SPI_connect();
+   if ( !plan_getcfg_bylocale ) {
+       plan_getcfg_bylocale=SPI_saveplan( SPI_prepare( "select oid from pg_ts_cfg where locale = $1 ", 1, arg ) );
+       if ( !plan_getcfg_bylocale )
+           elog(ERROR, "SPI_prepare() failed");
+   }
+
+   curlocale = setlocale(LC_CTYPE, NULL);
+   pars[0] = PointerGetDatum( char2text((char*)curlocale) );
+   stat = SPI_execp(plan_getcfg_bylocale, pars, " ", 1);
+
+   if ( stat < 0 )
+       elog (ERROR, "SPI_execp return %d", stat);
+   if ( SPI_processed > 0 )
+       current_cfg_id = DatumGetObjectId( SPI_getbinval(SPI_tuptable->vals[0], SPI_tuptable->tupdesc, 1, &isnull) );
+   else 
+       elog(ERROR,"Can't find tsearch config by locale");
+
+   pfree(DatumGetPointer(pars[0]));
+   SPI_finish();
+   return current_cfg_id;
+}
+
+PG_FUNCTION_INFO_V1(set_curcfg);
+Datum set_curcfg(PG_FUNCTION_ARGS);
+Datum
+set_curcfg(PG_FUNCTION_ARGS) {
+        findcfg(PG_GETARG_OID(0));
+        current_cfg_id=PG_GETARG_OID(0);
+        PG_RETURN_VOID();
+}
+                
+PG_FUNCTION_INFO_V1(set_curcfg_byname);
+Datum set_curcfg_byname(PG_FUNCTION_ARGS);
+Datum
+set_curcfg_byname(PG_FUNCTION_ARGS) {
+        text *name=PG_GETARG_TEXT_P(0);
+   
+        DirectFunctionCall1(
+                set_curcfg,
+                ObjectIdGetDatum( name2id_cfg(name) )
+        );
+        PG_FREE_IF_COPY(name, 0);
+        PG_RETURN_VOID();      
+}       
+
+PG_FUNCTION_INFO_V1(show_curcfg);
+Datum show_curcfg(PG_FUNCTION_ARGS);
+Datum
+show_curcfg(PG_FUNCTION_ARGS) {
+   PG_RETURN_OID( get_currcfg() ); 
+}
+
+PG_FUNCTION_INFO_V1(reset_tsearch);
+Datum reset_tsearch(PG_FUNCTION_ARGS);
+Datum
+reset_tsearch(PG_FUNCTION_ARGS) {
+   ts_error(NOTICE,"TSearch cache cleaned");
+   PG_RETURN_VOID(); 
+}


diff --git a/contrib/tsearch2/ts_cfg.h b/contrib/tsearch2/ts_cfg.h

new file mode 100644 (file)

index 0000000..01006c1


--- /dev/null
+++ b/contrib/tsearch2/ts_cfg.h
@@ -0,0 +1,68 @@
+#ifndef __TS_CFG_H__
+#define __TS_CFG_H__
+#include "postgres.h"
+#include "query.h"
+
+typedef struct {
+   int len;
+   Datum   *dict_id;
+} ListDictionary;
+
+typedef struct {
+   Oid id;
+   Oid prs_id;
+   int len;
+   ListDictionary  *map;   
+}  TSCfgInfo;
+
+Oid name2id_cfg(text *name);
+TSCfgInfo * findcfg(Oid id);
+void init_cfg(Oid id, TSCfgInfo *cfg);
+void reset_cfg(void);
+
+typedef struct {
+        uint16          len;
+   union {
+       uint16      pos;
+       uint16      *apos;
+   } pos;
+        char       *word;
+   uint32  alen;
+}       WORD;
+   
+typedef struct {
+        WORD       *words;
+        int4            lenwords;
+        int4            curwords;
+   int4        pos;
+}       PRSTEXT;
+
+typedef struct {
+        uint16    len;
+   uint8    selected:1,
+         in:1,
+         skip:1,
+         replace:1,
+         repeated:1;
+   uint8   type;
+        char      *word;
+   ITEM      *item;
+}       HLWORD;
+   
+typedef struct {
+        HLWORD       *words;
+        int4            lenwords;
+        int4            curwords;
+        char           *startsel;
+        char            *stopsel;
+        int2            startsellen;
+        int2            stopsellen;
+}       HLPRSTEXT;
+
+void hlparsetext(TSCfgInfo *cfg, HLPRSTEXT * prs, QUERYTYPE *query, char *buf, int4 buflen);
+text* genhl(HLPRSTEXT * prs);
+
+void parsetext_v2(TSCfgInfo *cfg, PRSTEXT * prs, char *buf, int4 buflen);
+int  get_currcfg(void);
+
+#endif


diff --git a/contrib/tsearch2/ts_stat.c b/contrib/tsearch2/ts_stat.c

new file mode 100644 (file)

index 0000000..9099981


--- /dev/null
+++ b/contrib/tsearch2/ts_stat.c
@@ -0,0 +1,412 @@
+/*
+ * stat functions
+ */
+
+#include "tsvector.h"
+#include "ts_stat.h"
+#include "funcapi.h"
+#include "catalog/pg_type.h"
+#include "executor/spi.h"
+#include "common.h"
+
+PG_FUNCTION_INFO_V1(tsstat_in);
+Datum           tsstat_in(PG_FUNCTION_ARGS);
+Datum           
+tsstat_in(PG_FUNCTION_ARGS) {
+   tsstat *stat=palloc(STATHDRSIZE);
+   stat->len=STATHDRSIZE;
+   stat->size=0;
+   PG_RETURN_POINTER(stat);
+}
+
+PG_FUNCTION_INFO_V1(tsstat_out);
+Datum           tsstat_out(PG_FUNCTION_ARGS);
+Datum           
+tsstat_out(PG_FUNCTION_ARGS) {
+   elog(ERROR,"Unimplemented");
+   PG_RETURN_NULL();
+}
+
+static WordEntry**
+SEI_realloc( WordEntry** in, uint32 *len ) {
+   if ( *len==0 || in==NULL ) {
+       *len=8;
+       in=palloc( sizeof(WordEntry*)* (*len) );
+   } else {
+       *len *= 2;
+       in=repalloc( in, sizeof(WordEntry*)* (*len) );
+   }
+   return in;
+}
+
+static int
+compareStatWord(StatEntry *a, WordEntry *b, tsstat *stat, tsvector *txt) {
+   if ( a->len == b->len ) 
+       return strncmp(
+           STATSTRPTR(stat) + a->pos,
+           STRPTR(txt) + b->pos,
+           a->len
+       );
+   return ( a->len > b->len ) ? 1 : -1;
+}
+
+static tsstat*
+formstat(tsstat *stat, tsvector *txt, WordEntry** entry, uint32 len) {
+   tsstat  *newstat;
+   uint32 totallen, nentry;
+   uint32  slen=0;
+   WordEntry   **ptr=entry;
+   char    *curptr;
+   StatEntry   *sptr,*nptr;
+
+   while(ptr-entry
+       slen += (*ptr)->len;
+       ptr++;
+   }
+
+   nentry=stat->size + len;
+   slen+=STATSTRSIZE(stat);
+   totallen=CALCSTATSIZE(nentry,slen);
+   newstat=palloc(totallen);
+   newstat->len=totallen;
+   newstat->size=nentry;
+
+   memcpy(STATSTRPTR(newstat), STATSTRPTR(stat), STATSTRSIZE(stat));
+   curptr=STATSTRPTR(newstat) + STATSTRSIZE(stat);
+
+   ptr=entry;
+   sptr=STATPTR(stat);
+   nptr=STATPTR(newstat);
+
+   if ( len == 1 ) {
+       StatEntry *StopLow = STATPTR(stat);
+       StatEntry *StopHigh = (StatEntry*)STATSTRPTR(stat);
+
+       while (StopLow < StopHigh) {
+           sptr=StopLow + (StopHigh - StopLow) / 2;
+           if ( compareStatWord(sptr,*ptr,stat,txt) < 0 )
+               StopLow = sptr + 1;
+           else
+               StopHigh = sptr; 
+       }
+       nptr =STATPTR(newstat) + (StopLow-STATPTR(stat));
+       memcpy( STATPTR(newstat), STATPTR(stat), sizeof(StatEntry) * (StopLow-STATPTR(stat)) );
+       nptr->nentry=POSDATALEN(txt,*ptr);
+       if ( nptr->nentry==0 )
+               nptr->nentry=1; 
+       nptr->ndoc=1;
+       nptr->len=(*ptr)->len;
+       memcpy(curptr, STRPTR(txt) + (*ptr)->pos, nptr->len);
+       nptr->pos = curptr - STATSTRPTR(newstat);
+       memcpy( nptr+1, StopLow, sizeof(StatEntry) * ( ((StatEntry*)STATSTRPTR(stat))-StopLow ) );
+   } else {
+       while( sptr-STATPTR(stat) < stat->size && ptr-entry
+           if ( compareStatWord(sptr,*ptr,stat,txt) < 0 ) {
+               memcpy(nptr, sptr, sizeof(StatEntry));
+               sptr++;
+           } else {
+               nptr->nentry=POSDATALEN(txt,*ptr);
+               if ( nptr->nentry==0 )
+                   nptr->nentry=1; 
+               nptr->ndoc=1;
+               nptr->len=(*ptr)->len;
+               memcpy(curptr, STRPTR(txt) + (*ptr)->pos, nptr->len);
+               nptr->pos = curptr - STATSTRPTR(newstat);
+               curptr += nptr->len;
+               ptr++;
+           }
+           nptr++;
+       }
+
+       memcpy( nptr, sptr, sizeof(StatEntry)*( stat->size - (sptr-STATPTR(stat)) ) ); 
+       
+       while(ptr-entry
+           nptr->nentry=POSDATALEN(txt,*ptr);
+           if ( nptr->nentry==0 )
+               nptr->nentry=1; 
+           nptr->ndoc=1;
+           nptr->len=(*ptr)->len;
+           memcpy(curptr, STRPTR(txt) + (*ptr)->pos, nptr->len);
+           nptr->pos = curptr - STATSTRPTR(newstat);
+           curptr += nptr->len;
+           ptr++; nptr++;
+       }
+   }
+
+   return newstat;
+} 
+
+PG_FUNCTION_INFO_V1(ts_accum);
+Datum           ts_accum(PG_FUNCTION_ARGS);
+Datum 
+ts_accum(PG_FUNCTION_ARGS) {
+   tsstat *newstat,*stat= (tsstat*)PG_GETARG_POINTER(0);
+   tsvector  *txt = (tsvector *) PG_DETOAST_DATUM(PG_GETARG_DATUM(1));
+   WordEntry   **newentry=NULL;
+   uint32  len=0, cur=0;
+   StatEntry   *sptr;
+   WordEntry   *wptr;
+
+   if ( stat==NULL || PG_ARGISNULL(0) ) { /* Init in first */ 
+       stat=palloc(STATHDRSIZE);
+       stat->len=STATHDRSIZE;
+       stat->size=0;
+   }
+
+   /* simple check of correctness */
+   if ( txt==NULL || PG_ARGISNULL(1) || txt->size==0 ) {
+       PG_FREE_IF_COPY(txt,1); 
+       PG_RETURN_POINTER(stat);
+   }
+
+   sptr=STATPTR(stat);
+   wptr=ARRPTR(txt);
+
+   if ( stat->size < 100*txt->size ) { /* merge */
+       while( sptr-STATPTR(stat) < stat->size && wptr-ARRPTR(txt) < txt->size ) {
+           int cmp = compareStatWord(sptr,wptr,stat,txt);
+           if ( cmp<0 ) {
+               sptr++;
+           } else if ( cmp==0 ) {
+               int n=POSDATALEN(txt,wptr);
+   
+               if (n==0) n=1;
+               sptr->ndoc++;
+               sptr->nentry +=n ;
+               sptr++; wptr++;
+           } else {
+               if ( cur==len )
+                   newentry=SEI_realloc(newentry, &len);
+               newentry[cur]=wptr;
+               wptr++; cur++;
+           }
+       }
+
+       while( wptr-ARRPTR(txt) < txt->size ) {
+           if ( cur==len )
+               newentry=SEI_realloc(newentry, &len);
+           newentry[cur]=wptr;
+           wptr++; cur++;
+       }
+   } else { /* search */
+       while( wptr-ARRPTR(txt) < txt->size ) {
+           StatEntry *StopLow = STATPTR(stat);
+           StatEntry *StopHigh = (StatEntry*)STATSTRPTR(stat);
+           int cmp;
+
+           while (StopLow < StopHigh) {
+               sptr=StopLow + (StopHigh - StopLow) / 2;
+               cmp =  compareStatWord(sptr,wptr,stat,txt);
+               if (cmp==0) {
+                   int n=POSDATALEN(txt,wptr);
+                   if (n==0) n=1;
+                   sptr->ndoc++;
+                   sptr->nentry +=n ;
+                   break;
+               } else if ( cmp < 0 )
+                   StopLow = sptr + 1;
+               else
+                   StopHigh = sptr; 
+           }
+       
+           if ( StopLow >= StopHigh ) { /* not found */
+               if ( cur==len )
+                   newentry=SEI_realloc(newentry, &len);
+               newentry[cur]=wptr;
+               cur++;
+           }
+           wptr++;
+       }   
+   }
+
+   
+   if ( cur==0 ) { /* no new words */ 
+       PG_FREE_IF_COPY(txt,1);
+       PG_RETURN_POINTER(stat);
+   }
+
+   newstat = formstat(stat, txt, newentry, cur);
+   pfree(newentry);
+   PG_FREE_IF_COPY(txt,1);
+   /* pfree(stat); */
+
+   PG_RETURN_POINTER(newstat);
+}
+
+typedef struct {
+   uint32  cur;
+   tsvector *stat;
+} StatStorage;
+
+static void
+ts_setup_firstcall(FuncCallContext  *funcctx, tsstat *stat) {
+   TupleDesc            tupdesc;
+   MemoryContext     oldcontext;
+   StatStorage     *st;
+   
+   oldcontext = MemoryContextSwitchTo(funcctx->multi_call_memory_ctx);
+   st=palloc( sizeof(StatStorage) );
+   st->cur=0;
+   st->stat=palloc( stat->len );
+   memcpy(st->stat, stat, stat->len);
+   funcctx->user_fctx = (void*)st;
+   tupdesc = RelationNameGetTupleDesc("statinfo");
+   funcctx->slot = TupleDescGetSlot(tupdesc);
+   funcctx->attinmeta = TupleDescGetAttInMetadata(tupdesc);
+   MemoryContextSwitchTo(oldcontext);
+}
+
+
+static Datum
+ts_process_call(FuncCallContext  *funcctx) {
+   StatStorage     *st;
+   st=(StatStorage*)funcctx->user_fctx;
+
+   if ( st->cur < st->stat->size ) {
+       Datum result;
+       char* values[3];
+       char    ndoc[16];
+       char    nentry[16];
+       StatEntry *entry=STATPTR(st->stat) + st->cur;
+       HeapTuple    tuple;
+
+       values[1]=ndoc;
+       sprintf(ndoc,"%d",entry->ndoc);
+       values[2]=nentry;
+       sprintf(nentry,"%d",entry->nentry);
+       values[0]=palloc( entry->len+1 );
+       memcpy( values[0], STATSTRPTR(st->stat)+entry->pos, entry->len);
+       (values[0])[entry->len]='\0';
+
+       tuple = BuildTupleFromCStrings(funcctx->attinmeta, values);
+       result = TupleGetDatum(funcctx->slot, tuple);
+
+       pfree(values[0]);
+       st->cur++;
+       return result;  
+   } else {
+       pfree(st->stat);
+       pfree(st);
+   }
+   
+   return (Datum)0;
+}
+
+PG_FUNCTION_INFO_V1(ts_accum_finish);
+Datum           ts_accum_finish(PG_FUNCTION_ARGS);
+Datum 
+ts_accum_finish(PG_FUNCTION_ARGS) {
+   FuncCallContext  *funcctx;
+   Datum result;
+
+   if (SRF_IS_FIRSTCALL()) {
+       funcctx = SRF_FIRSTCALL_INIT();
+       ts_setup_firstcall(funcctx, (tsstat*)PG_GETARG_POINTER(0) );
+   }
+
+   funcctx = SRF_PERCALL_SETUP();
+   if (  (result=ts_process_call(funcctx)) != (Datum)0 )
+       SRF_RETURN_NEXT(funcctx, result);
+   SRF_RETURN_DONE(funcctx);
+}
+
+static Oid tiOid=InvalidOid;
+static void 
+get_ti_Oid(void) {
+   int ret;
+   bool isnull; 
+
+   if ( (ret = SPI_exec("select oid from pg_type where typname='tsvector'",1)) < 0 )   
+       elog(ERROR, "SPI_exec to get tsvector oid returns %d", ret);
+
+   if ( SPI_processed<0 )
+       elog(ERROR, "There is no tsvector type");
+   tiOid = DatumGetObjectId( SPI_getbinval(SPI_tuptable->vals[0], SPI_tuptable->tupdesc, 1, &isnull) );
+   if ( tiOid==InvalidOid )
+       elog(ERROR, "tsvector type has InvalidOid");
+}
+
+static tsstat*
+ts_stat_sql(text *txt) {
+   char *query=text2char(txt);
+   int i;
+   tsstat *newstat,*stat;
+   bool isnull;
+   Portal portal;
+   void    *plan;
+
+   if ( tiOid==InvalidOid ) 
+       get_ti_Oid();
+
+   if ( (plan = SPI_prepare(query,0,NULL))==NULL )
+       elog(ERROR, "SPI_prepare('%s') returns NULL",query);
+
+   if ( (portal = SPI_cursor_open(NULL, plan, NULL, NULL)) == NULL )
+       elog(ERROR, "SPI_cursor_open('%s') returns NULL",query);
+
+   SPI_cursor_fetch(portal, true, 100);
+
+   if ( SPI_tuptable->tupdesc->natts != 1 )
+       elog(ERROR, "Number of fields doesn't equal to 1");
+
+   if ( SPI_gettypeid(SPI_tuptable->tupdesc, 1) != tiOid )
+       elog(ERROR, "Column isn't of tsvector type");
+
+   stat=palloc(STATHDRSIZE);
+   stat->len=STATHDRSIZE;
+   stat->size=0;
+
+   while(SPI_processed>0) {
+       for(i=0;i
+           Datum data=SPI_getbinval(SPI_tuptable->vals[i], SPI_tuptable->tupdesc, 1, &isnull);
+
+           if ( !isnull ) {
+               newstat = (tsstat*)DatumGetPointer(DirectFunctionCall2(
+                   ts_accum,
+                   PointerGetDatum(stat),
+                   data
+               ));
+               if ( stat!=newstat && stat )
+                   pfree(stat);
+               stat=newstat;
+           }
+       } 
+
+       SPI_freetuptable(SPI_tuptable);
+       SPI_cursor_fetch(portal, true, 100);        
+   }   
+
+   SPI_freetuptable(SPI_tuptable);
+   SPI_cursor_close(portal);
+   SPI_freeplan(plan);
+   pfree(query);
+
+   return stat;    
+}
+
+PG_FUNCTION_INFO_V1(ts_stat);
+Datum           ts_stat(PG_FUNCTION_ARGS);
+Datum 
+ts_stat(PG_FUNCTION_ARGS) {
+   FuncCallContext  *funcctx;
+   Datum result;
+
+   if (SRF_IS_FIRSTCALL()) {
+       tsstat *stat;
+       text    *txt=PG_GETARG_TEXT_P(0);
+   
+       funcctx = SRF_FIRSTCALL_INIT();
+       SPI_connect();
+       stat = ts_stat_sql(txt);
+       PG_FREE_IF_COPY(txt,0); 
+       ts_setup_firstcall(funcctx, stat );
+       SPI_finish();
+   }
+
+   funcctx = SRF_PERCALL_SETUP();
+   if (  (result=ts_process_call(funcctx)) != (Datum)0 )
+       SRF_RETURN_NEXT(funcctx, result);
+   SRF_RETURN_DONE(funcctx);
+}
+
+


diff --git a/contrib/tsearch2/ts_stat.h b/contrib/tsearch2/ts_stat.h

new file mode 100644 (file)

index 0000000..c32b17a


--- /dev/null
+++ b/contrib/tsearch2/ts_stat.h
@@ -0,0 +1,32 @@
+#ifndef __TXTIDX_STAT_H__
+#define __TXTIDX_STAT_H__
+
+#include "postgres.h"
+
+#include "access/gist.h"
+#include "access/itup.h"
+#include "utils/elog.h"
+#include "utils/palloc.h"
+#include "utils/builtins.h"
+#include "storage/bufpage.h"
+
+typedef struct {
+   uint32  len;
+   uint32  pos;
+   uint32  ndoc;   
+   uint32  nentry; 
+}  StatEntry;
+
+typedef struct {
+   int4        len;
+   int4        size;
+   char        data[1];
+}  tsstat;
+
+#define STATHDRSIZE (sizeof(int4)*2)
+#define CALCSTATSIZE(x, lenstr) ( x * sizeof(StatEntry) + STATHDRSIZE + lenstr )
+#define STATPTR(x) ( (StatEntry*) ( (char*)x + STATHDRSIZE ) )
+#define STATSTRPTR(x)  ( (char*)x + STATHDRSIZE + ( sizeof(StatEntry) * ((tsvector*)x)->size ) )
+#define STATSTRSIZE(x) ( ((tsvector*)x)->len - STATHDRSIZE - ( sizeof(StatEntry) * ((tsvector*)x)->size ) )
+
+#endif


diff --git a/contrib/tsearch2/tsearch.sql._in b/contrib/tsearch2/tsearch.sql._in

new file mode 100644 (file)

index 0000000..91ffbc8


--- /dev/null
+++ b/contrib/tsearch2/tsearch.sql._in
@@ -0,0 +1,674 @@
+-- Adjust this setting to control where the objects get CREATEd.
+SET search_path = public;
+
+BEGIN;
+
+--dict conf
+CREATE TABLE pg_ts_dict (
+   dict_name   text not null primary key,
+   dict_init   oid,
+   dict_initoption text,
+   dict_lexize oid not null,
+   dict_comment    text
+) with oids;
+
+--dict interface
+CREATE FUNCTION lexize(oid, text) 
+   returns _text
+   as 'MODULE_PATHNAME'
+   language 'C'
+   with (isstrict);
+
+CREATE FUNCTION lexize(text, text)
+        returns _text
+        as 'MODULE_PATHNAME', 'lexize_byname'
+        language 'C'
+        with (isstrict);
+
+CREATE FUNCTION lexize(text)
+        returns _text
+        as 'MODULE_PATHNAME', 'lexize_bycurrent'
+        language 'C'
+        with (isstrict);
+
+CREATE FUNCTION set_curdict(int)
+   returns void
+   as 'MODULE_PATHNAME'
+   language 'C'
+   with (isstrict);
+
+CREATE FUNCTION set_curdict(text)
+   returns void
+   as 'MODULE_PATHNAME', 'set_curdict_byname'
+   language 'C'
+   with (isstrict);
+
+--built-in dictionaries
+CREATE FUNCTION dex_init(text)
+   returns internal
+   as 'MODULE_PATHNAME' 
+   language 'C';
+
+CREATE FUNCTION dex_lexize(internal,internal,int4)
+   returns internal
+   as 'MODULE_PATHNAME'
+   language 'C'
+   with (isstrict);
+
+insert into pg_ts_dict select 
+   'simple', 
+   (select oid from pg_proc where proname='dex_init'),
+   null,
+   (select oid from pg_proc where proname='dex_lexize'),
+   'Simple example of dictionary.'
+;
+    
+CREATE FUNCTION snb_en_init(text)
+   returns internal
+   as 'MODULE_PATHNAME' 
+   language 'C';
+
+CREATE FUNCTION snb_lexize(internal,internal,int4)
+   returns internal
+   as 'MODULE_PATHNAME'
+   language 'C'
+   with (isstrict);
+
+insert into pg_ts_dict select 
+   'en_stem', 
+   (select oid from pg_proc where proname='snb_en_init'),
+   'DATA_PATH/english.stop',
+   (select oid from pg_proc where proname='snb_lexize'),
+   'English Stemmer. Snowball.'
+;
+
+CREATE FUNCTION snb_ru_init(text)
+   returns internal
+   as 'MODULE_PATHNAME' 
+   language 'C';
+
+insert into pg_ts_dict select 
+   'ru_stem', 
+   (select oid from pg_proc where proname='snb_ru_init'),
+   'DATA_PATH/russian.stop',
+   (select oid from pg_proc where proname='snb_lexize'),
+   'Russian Stemmer. Snowball.'
+;
+    
+CREATE FUNCTION spell_init(text)
+   returns internal
+   as 'MODULE_PATHNAME' 
+   language 'C';
+
+CREATE FUNCTION spell_lexize(internal,internal,int4)
+   returns internal
+   as 'MODULE_PATHNAME'
+   language 'C'
+   with (isstrict);
+
+insert into pg_ts_dict select 
+   'ispell_template', 
+   (select oid from pg_proc where proname='spell_init'),
+   null,
+   (select oid from pg_proc where proname='spell_lexize'),
+   'ISpell interface. Must have .dict and .aff files'
+;
+
+CREATE FUNCTION syn_init(text)
+   returns internal
+   as 'MODULE_PATHNAME' 
+   language 'C';
+
+CREATE FUNCTION syn_lexize(internal,internal,int4)
+   returns internal
+   as 'MODULE_PATHNAME'
+   language 'C'
+   with (isstrict);
+
+insert into pg_ts_dict select 
+   'synonym', 
+   (select oid from pg_proc where proname='syn_init'),
+   null,
+   (select oid from pg_proc where proname='syn_lexize'),
+   'Example of synonym dictionary'
+;
+
+--dict conf
+CREATE TABLE pg_ts_parser (
+   prs_name    text not null primary key,
+   prs_start   oid not null,
+   prs_nexttoken   oid not null,
+   prs_end     oid not null,
+   prs_headline    oid not null,
+   prs_lextype oid not null,
+   prs_comment text
+) with oids;
+
+--sql-level interface
+CREATE TYPE tokentype 
+   as (tokid int4, alias text, descr text); 
+
+CREATE FUNCTION token_type(int4)
+   returns setof tokentype
+   as 'MODULE_PATHNAME'
+   language 'C'
+   with (isstrict);
+
+CREATE FUNCTION token_type(text)
+   returns setof tokentype
+   as 'MODULE_PATHNAME', 'token_type_byname'
+   language 'C'
+   with (isstrict);
+
+CREATE FUNCTION token_type()
+   returns setof tokentype
+   as 'MODULE_PATHNAME', 'token_type_current'
+   language 'C'
+   with (isstrict);
+
+CREATE FUNCTION set_curprs(int)
+   returns void
+   as 'MODULE_PATHNAME'
+   language 'C'
+   with (isstrict);
+
+CREATE FUNCTION set_curprs(text)
+   returns void
+   as 'MODULE_PATHNAME', 'set_curprs_byname'
+   language 'C'
+   with (isstrict);
+
+CREATE TYPE tokenout 
+   as (tokid int4, token text);
+
+CREATE FUNCTION parse(oid,text)
+   returns setof tokenout
+   as 'MODULE_PATHNAME'
+   language 'C'
+   with (isstrict);
+ 
+CREATE FUNCTION parse(text,text)
+   returns setof tokenout
+   as 'MODULE_PATHNAME', 'parse_byname'
+   language 'C'
+   with (isstrict);
+ 
+CREATE FUNCTION parse(text)
+   returns setof tokenout
+   as 'MODULE_PATHNAME', 'parse_current'
+   language 'C'
+   with (isstrict);
+ 
+--default parser
+CREATE FUNCTION prsd_start(internal,int4)
+   returns internal
+   as 'MODULE_PATHNAME'
+   language 'C';
+
+CREATE FUNCTION prsd_getlexeme(internal,internal,internal)
+   returns int4
+   as 'MODULE_PATHNAME'
+   language 'C';
+
+CREATE FUNCTION prsd_end(internal)
+   returns void
+   as 'MODULE_PATHNAME'
+   language 'C';
+
+CREATE FUNCTION prsd_lextype(internal)
+   returns internal
+   as 'MODULE_PATHNAME'
+   language 'C';
+
+CREATE FUNCTION prsd_headline(internal,internal,internal)
+   returns internal
+   as 'MODULE_PATHNAME'
+   language 'C';
+
+insert into pg_ts_parser select
+   'default',
+   (select oid from pg_proc where proname='prsd_start'),   
+   (select oid from pg_proc where proname='prsd_getlexeme'),   
+   (select oid from pg_proc where proname='prsd_end'), 
+   (select oid from pg_proc where proname='prsd_headline'),
+   (select oid from pg_proc where proname='prsd_lextype'),
+   'Parser from OpenFTS v0.34'
+;  
+
+--tsearch config
+
+CREATE TABLE pg_ts_cfg (
+   ts_name     text not null primary key,
+   prs_name    text not null,
+   locale      text
+) with oids;
+
+CREATE TABLE pg_ts_cfgmap (
+   ts_name     text not null,
+   tok_alias   text not null,
+   dict_name   text[],
+   primary key (ts_name,tok_alias)
+) with oids;
+
+CREATE FUNCTION set_curcfg(int)
+   returns void
+   as 'MODULE_PATHNAME'
+   language 'C'
+   with (isstrict);
+
+CREATE FUNCTION set_curcfg(text)
+   returns void
+   as 'MODULE_PATHNAME', 'set_curcfg_byname'
+   language 'C'
+   with (isstrict);
+
+CREATE FUNCTION show_curcfg()
+   returns oid
+   as 'MODULE_PATHNAME'
+   language 'C'
+   with (isstrict);
+
+insert into pg_ts_cfg values ('default', 'default','C');
+insert into pg_ts_cfg values ('default_russian', 'default','ru_RU.KOI8-R');
+insert into pg_ts_cfg values ('simple', 'default');
+
+copy pg_ts_cfgmap from stdin;
+default    lword   {en_stem}
+default    nlword  {simple}
+default    word    {simple}
+default    email   {simple}
+default    url {simple}
+default    host    {simple}
+default    sfloat  {simple}
+default    version {simple}
+default    part_hword  {simple}
+default    nlpart_hword    {simple}
+default    lpart_hword {en_stem}
+default    hword   {simple}
+default    lhword  {en_stem}
+default    nlhword {simple}
+default    uri {simple}
+default    file    {simple}
+default    float   {simple}
+default    int {simple}
+default    uint    {simple}
+default_russian    lword   {en_stem}
+default_russian    nlword  {ru_stem}
+default_russian    word    {ru_stem}
+default_russian    email   {simple}
+default_russian    url {simple}
+default_russian    host    {simple}
+default_russian    sfloat  {simple}
+default_russian    version {simple}
+default_russian    part_hword  {simple}
+default_russian    nlpart_hword    {ru_stem}
+default_russian    lpart_hword {en_stem}
+default_russian    hword   {ru_stem}
+default_russian    lhword  {en_stem}
+default_russian    nlhword {ru_stem}
+default_russian    uri {simple}
+default_russian    file    {simple}
+default_russian    float   {simple}
+default_russian    int {simple}
+default_russian    uint    {simple}
+simple lword   {simple}
+simple nlword  {simple}
+simple word    {simple}
+simple email   {simple}
+simple url {simple}
+simple host    {simple}
+simple sfloat  {simple}
+simple version {simple}
+simple part_hword  {simple}
+simple nlpart_hword    {simple}
+simple lpart_hword {simple}
+simple hword   {simple}
+simple lhword  {simple}
+simple nlhword {simple}
+simple uri {simple}
+simple file    {simple}
+simple float   {simple}
+simple int {simple}
+simple uint    {simple}
+\.
+
+--tsvector type
+CREATE FUNCTION tsvector_in(cstring)
+RETURNS tsvector
+AS 'MODULE_PATHNAME'
+LANGUAGE 'C' with (isstrict);
+
+CREATE FUNCTION tsvector_out(tsvector)
+RETURNS cstring
+AS 'MODULE_PATHNAME'
+LANGUAGE 'C' with (isstrict);
+
+CREATE TYPE tsvector (
+        INTERNALLENGTH = -1,
+        INPUT = tsvector_in,
+        OUTPUT = tsvector_out,
+        STORAGE = extended
+);
+
+CREATE FUNCTION length(tsvector)
+RETURNS int4
+AS 'MODULE_PATHNAME', 'tsvector_length'
+LANGUAGE 'C' with (isstrict,iscachable);
+
+CREATE FUNCTION to_tsvector(oid, text)
+RETURNS tsvector
+AS 'MODULE_PATHNAME'
+LANGUAGE 'C' with (isstrict,iscachable);
+
+CREATE FUNCTION to_tsvector(text, text)
+RETURNS tsvector
+AS 'MODULE_PATHNAME', 'to_tsvector_name'
+LANGUAGE 'C' with (isstrict,iscachable);
+
+CREATE FUNCTION to_tsvector(text)
+RETURNS tsvector
+AS 'MODULE_PATHNAME', 'to_tsvector_current'
+LANGUAGE 'C' with (isstrict,iscachable);
+
+CREATE FUNCTION strip(tsvector)
+RETURNS tsvector
+AS 'MODULE_PATHNAME'
+LANGUAGE 'C' with (isstrict,iscachable);
+
+CREATE FUNCTION setweight(tsvector,"char")
+RETURNS tsvector
+AS 'MODULE_PATHNAME'
+LANGUAGE 'C' with (isstrict,iscachable);
+
+CREATE FUNCTION concat(tsvector,tsvector)
+RETURNS tsvector
+AS 'MODULE_PATHNAME'
+LANGUAGE 'C' with (isstrict,iscachable);
+
+CREATE OPERATOR || (
+        LEFTARG = tsvector,
+        RIGHTARG = tsvector,
+        PROCEDURE = concat
+);
+
+--query type
+CREATE FUNCTION tsquery_in(cstring)
+RETURNS tsquery
+AS 'MODULE_PATHNAME'
+LANGUAGE 'C' with (isstrict);
+
+CREATE FUNCTION tsquery_out(tsquery)
+RETURNS cstring
+AS 'MODULE_PATHNAME'
+LANGUAGE 'C' with (isstrict);
+
+CREATE TYPE tsquery (
+        INTERNALLENGTH = -1,
+        INPUT = tsquery_in,
+        OUTPUT = tsquery_out
+);
+
+CREATE FUNCTION querytree(tsquery)
+RETURNS text
+AS 'MODULE_PATHNAME', 'tsquerytree'
+LANGUAGE 'C' with (isstrict);
+
+CREATE FUNCTION to_tsquery(oid, text)
+RETURNS tsquery
+AS 'MODULE_PATHNAME'
+LANGUAGE 'c' with (isstrict,iscachable);
+
+CREATE FUNCTION to_tsquery(text, text)
+RETURNS tsquery
+AS 'MODULE_PATHNAME','to_tsquery_name'
+LANGUAGE 'c' with (isstrict,iscachable);
+
+CREATE FUNCTION to_tsquery(text)
+RETURNS tsquery
+AS 'MODULE_PATHNAME','to_tsquery_current'
+LANGUAGE 'c' with (isstrict,iscachable);
+
+--operations
+CREATE FUNCTION exectsq(tsvector, tsquery)
+RETURNS bool
+AS 'MODULE_PATHNAME'
+LANGUAGE 'C' with (isstrict, iscachable);
+  
+COMMENT ON FUNCTION exectsq(tsvector, tsquery) IS 'boolean operation with text index';
+
+CREATE FUNCTION rexectsq(tsquery, tsvector)
+RETURNS bool
+AS 'MODULE_PATHNAME'
+LANGUAGE 'C' with (isstrict, iscachable);
+
+COMMENT ON FUNCTION rexectsq(tsquery, tsvector) IS 'boolean operation with text index';
+
+CREATE OPERATOR @@ (
+        LEFTARG = tsvector,
+        RIGHTARG = tsquery,
+        PROCEDURE = exectsq,
+        COMMUTATOR = '@@',
+        RESTRICT = contsel,
+        JOIN = contjoinsel
+);
+CREATE OPERATOR @@ (
+        LEFTARG = tsquery,
+        RIGHTARG = tsvector,
+        PROCEDURE = rexectsq,
+        COMMUTATOR = '@@',
+        RESTRICT = contsel,
+        JOIN = contjoinsel
+);
+
+--Trigger
+CREATE FUNCTION tsearch2()
+RETURNS trigger
+AS 'MODULE_PATHNAME'
+LANGUAGE 'C';
+
+--Relevation
+CREATE FUNCTION rank(float4[], tsvector, tsquery)
+RETURNS float4
+AS 'MODULE_PATHNAME'
+LANGUAGE 'C' WITH (isstrict, iscachable);
+
+CREATE FUNCTION rank(float4[], tsvector, tsquery, int4)
+RETURNS float4
+AS 'MODULE_PATHNAME'
+LANGUAGE 'C' WITH (isstrict, iscachable);
+
+CREATE FUNCTION rank(tsvector, tsquery)
+RETURNS float4
+AS 'MODULE_PATHNAME', 'rank_def'
+LANGUAGE 'C' WITH (isstrict, iscachable);
+
+CREATE FUNCTION rank(tsvector, tsquery, int4)
+RETURNS float4
+AS 'MODULE_PATHNAME', 'rank_def'
+LANGUAGE 'C' WITH (isstrict, iscachable);
+
+CREATE FUNCTION rank_cd(int4, tsvector, tsquery)
+RETURNS float4
+AS 'MODULE_PATHNAME'
+LANGUAGE 'C' WITH (isstrict, iscachable);
+
+CREATE FUNCTION rank_cd(int4, tsvector, tsquery, int4)
+RETURNS float4
+AS 'MODULE_PATHNAME'
+LANGUAGE 'C' WITH (isstrict, iscachable);
+
+CREATE FUNCTION rank_cd(tsvector, tsquery)
+RETURNS float4
+AS 'MODULE_PATHNAME', 'rank_cd_def'
+LANGUAGE 'C' WITH (isstrict, iscachable);
+
+CREATE FUNCTION rank_cd(tsvector, tsquery, int4)
+RETURNS float4
+AS 'MODULE_PATHNAME', 'rank_cd_def'
+LANGUAGE 'C' WITH (isstrict, iscachable);
+
+CREATE FUNCTION headline(oid, text, tsquery, text)
+RETURNS text
+AS 'MODULE_PATHNAME', 'headline'
+LANGUAGE 'C' WITH (isstrict, iscachable);
+
+CREATE FUNCTION headline(oid, text, tsquery)
+RETURNS text
+AS 'MODULE_PATHNAME', 'headline'
+LANGUAGE 'C' WITH (isstrict, iscachable);
+
+CREATE FUNCTION headline(text, text, tsquery, text)
+RETURNS text
+AS 'MODULE_PATHNAME', 'headline_byname'
+LANGUAGE 'C' WITH (isstrict, iscachable);
+
+CREATE FUNCTION headline(text, text, tsquery)
+RETURNS text
+AS 'MODULE_PATHNAME', 'headline_byname'
+LANGUAGE 'C' WITH (isstrict, iscachable);
+
+CREATE FUNCTION headline(text, tsquery, text)
+RETURNS text
+AS 'MODULE_PATHNAME', 'headline_current'
+LANGUAGE 'C' WITH (isstrict, iscachable);
+
+CREATE FUNCTION headline(text, tsquery)
+RETURNS text
+AS 'MODULE_PATHNAME', 'headline_current'
+LANGUAGE 'C' WITH (isstrict, iscachable);
+
+--GiST
+--GiST key type 
+CREATE FUNCTION gtsvector_in(cstring)
+RETURNS gtsvector
+AS 'MODULE_PATHNAME'
+LANGUAGE 'C' with (isstrict);
+
+CREATE FUNCTION gtsvector_out(gtsvector)
+RETURNS cstring
+AS 'MODULE_PATHNAME'
+LANGUAGE 'C' with (isstrict);
+
+CREATE TYPE gtsvector (
+        INTERNALLENGTH = -1,
+        INPUT = gtsvector_in,
+        OUTPUT = gtsvector_out
+);
+
+-- support FUNCTIONs
+CREATE FUNCTION gtsvector_consistent(gtsvector,internal,int4)
+RETURNS bool
+AS 'MODULE_PATHNAME'
+LANGUAGE 'C';
+  
+CREATE FUNCTION gtsvector_compress(internal)
+RETURNS internal
+AS 'MODULE_PATHNAME'
+LANGUAGE 'C';
+
+CREATE FUNCTION gtsvector_decompress(internal)
+RETURNS internal
+AS 'MODULE_PATHNAME'
+LANGUAGE 'C';
+
+CREATE FUNCTION gtsvector_penalty(internal,internal,internal)
+RETURNS internal
+AS 'MODULE_PATHNAME'
+LANGUAGE 'C' with (isstrict);
+
+CREATE FUNCTION gtsvector_picksplit(internal, internal)
+RETURNS internal
+AS 'MODULE_PATHNAME'
+LANGUAGE 'C';
+
+CREATE FUNCTION gtsvector_union(bytea, internal)
+RETURNS _int4
+AS 'MODULE_PATHNAME'
+LANGUAGE 'C';
+
+CREATE FUNCTION gtsvector_same(gtsvector, gtsvector, internal)
+RETURNS internal
+AS 'MODULE_PATHNAME'
+LANGUAGE 'C';
+
+-- CREATE the OPERATOR class
+CREATE OPERATOR CLASS gist_tsvector_ops
+DEFAULT FOR TYPE tsvector USING gist
+AS
+        OPERATOR        1       @@ (tsvector, tsquery)  RECHECK ,
+        FUNCTION        1       gtsvector_consistent (gtsvector, internal, int4),
+        FUNCTION        2       gtsvector_union (bytea, internal),
+        FUNCTION        3       gtsvector_compress (internal),
+        FUNCTION        4       gtsvector_decompress (internal),
+        FUNCTION        5       gtsvector_penalty (internal, internal, internal),
+        FUNCTION        6       gtsvector_picksplit (internal, internal),
+        FUNCTION        7       gtsvector_same (gtsvector, gtsvector, internal),
+        STORAGE         gtsvector;
+
+
+--stat info
+CREATE TYPE statinfo 
+   as (word text, ndoc int4, nentry int4);
+
+--REATE FUNCTION tsstat_in(cstring)
+--RETURNS tsstat
+--AS 'MODULE_PATHNAME'
+--LANGUAGE 'C' with (isstrict);
+--
+--CREATE FUNCTION tsstat_out(tsstat)
+--RETURNS cstring
+--AS 'MODULE_PATHNAME'
+--LANGUAGE 'C' with (isstrict);
+--
+--CREATE TYPE tsstat (
+--        INTERNALLENGTH = -1,
+--        INPUT = tsstat_in,
+--        OUTPUT = tsstat_out,
+--        STORAGE = plain
+--);
+--
+--CREATE FUNCTION ts_accum(tsstat,tsvector)
+--RETURNS tsstat
+--AS 'MODULE_PATHNAME'
+--LANGUAGE 'C' with (isstrict);
+--
+--CREATE FUNCTION ts_accum_finish(tsstat)
+-- returns setof statinfo
+-- as 'MODULE_PATHNAME'
+-- language 'C'
+-- with (isstrict);
+--
+--CREATE AGGREGATE stat (
+-- BASETYPE=tsvector,
+-- SFUNC=ts_accum,
+-- STYPE=tsstat,
+-- FINALFUNC = ts_accum_finish,
+-- initcond = ''
+--); 
+
+CREATE FUNCTION stat(text)
+   returns setof statinfo
+   as 'MODULE_PATHNAME', 'ts_stat'
+   language 'C'
+   with (isstrict);
+
+--reset - just for debuging
+CREATE FUNCTION reset_tsearch()
+        returns void
+        as 'MODULE_PATHNAME'
+        language 'C'
+        with (isstrict);
+
+--get cover (debug for rank_cd)
+CREATE FUNCTION get_covers(tsvector,tsquery)
+        returns text
+        as 'MODULE_PATHNAME'
+        language 'C'
+        with (isstrict);
+
+
+--example of ISpell dictionary
+--update pg_ts_dict set dict_initoption='DictFile="/usr/local/share/ispell/russian.dict" ,AffFile ="/usr/local/share/ispell/russian.aff", StopFile="/usr/local/share/ispell/russian.stop"' where dict_id=4;
+--example of synonym dict
+--update pg_ts_dict set dict_initoption='/usr/local/share/ispell/english.syn' where dict_id=5;
+END;


diff --git a/contrib/tsearch2/tsvector.c b/contrib/tsearch2/tsvector.c

new file mode 100644 (file)

index 0000000..ff0794d


--- /dev/null
+++ b/contrib/tsearch2/tsvector.c
@@ -0,0 +1,804 @@
+/*
+ * In/Out definitions for tsvector type
+ * Internal structure:
+ * string of values, array of position lexem in string and it's length
+ * Teodor Sigaev 
+ */
+#include "postgres.h"
+
+#include "access/gist.h"
+#include "access/itup.h"
+#include "utils/elog.h"
+#include "utils/palloc.h"
+#include "utils/builtins.h"
+#include "storage/bufpage.h"
+#include "executor/spi.h"
+#include "commands/trigger.h"
+#include "nodes/pg_list.h"
+#include "catalog/namespace.h"
+
+#include "utils/pg_locale.h"
+
+#include              /* tolower */
+#include "tsvector.h"
+#include "query.h"
+#include "ts_cfg.h"
+#include "common.h"
+
+PG_FUNCTION_INFO_V1(tsvector_in);
+Datum      tsvector_in(PG_FUNCTION_ARGS);
+
+PG_FUNCTION_INFO_V1(tsvector_out);
+Datum      tsvector_out(PG_FUNCTION_ARGS);
+
+PG_FUNCTION_INFO_V1(to_tsvector);
+Datum      to_tsvector(PG_FUNCTION_ARGS);
+PG_FUNCTION_INFO_V1(to_tsvector_current);
+Datum      to_tsvector_current(PG_FUNCTION_ARGS);
+PG_FUNCTION_INFO_V1(to_tsvector_name);
+Datum      to_tsvector_name(PG_FUNCTION_ARGS);
+
+PG_FUNCTION_INFO_V1(tsearch2);
+Datum      tsearch2(PG_FUNCTION_ARGS);
+
+PG_FUNCTION_INFO_V1(tsvector_length);
+Datum      tsvector_length(PG_FUNCTION_ARGS);
+
+/*
+ * in/out text index type
+ */
+static int 
+comparePos(const void *a, const void *b) {
+   if ( ((WordEntryPos *) a)->pos == ((WordEntryPos *) b)->pos )
+       return 1;
+   return ( ((WordEntryPos *) a)->pos > ((WordEntryPos *) b)->pos ) ? 1 : -1;
+}
+
+static int
+uniquePos(WordEntryPos *a, int4 l) {
+   WordEntryPos *ptr, *res;
+
+   res=a;
+   if (l==1)
+       return l;
+
+   qsort((void *) a, l, sizeof(WordEntryPos), comparePos);
+
+   ptr = a + 1;
+   while (ptr - a < l) {
+       if ( ptr->pos != res->pos ) {
+           res++;
+           res->pos = ptr->pos;
+           res->weight = ptr->weight;
+           if ( res-a >= MAXNUMPOS-1 || res->pos == MAXENTRYPOS-1 )
+               break;
+       } else if ( ptr->weight > res->weight )
+           res->weight = ptr->weight;
+       ptr++;
+   }
+   return res + 1 - a;
+}
+
+static char *BufferStr;
+static int
+compareentry(const void *a, const void *b)
+{
+   if ( ((WordEntryIN *) a)->entry.len == ((WordEntryIN *) b)->entry.len)
+   {
+       return strncmp(
+                      &BufferStr[((WordEntryIN *) a)->entry.pos],
+                      &BufferStr[((WordEntryIN *) b)->entry.pos],
+                      ((WordEntryIN *) a)->entry.len);
+   }
+   return ( ((WordEntryIN *) a)->entry.len > ((WordEntryIN *) b)->entry.len ) ? 1 : -1;
+}
+
+static int
+uniqueentry(WordEntryIN * a, int4 l, char *buf, int4 *outbuflen)
+{
+   WordEntryIN  *ptr,
+              *res;
+
+   res = a;
+   if (l == 1) {
+       if ( a->entry.haspos ) {
+           *(uint16*)(a->pos) = uniquePos( &(a->pos[1]), *(uint16*)(a->pos));
+           *outbuflen = SHORTALIGN(res->entry.len) + (*(uint16*)(a->pos) +1 )*sizeof(WordEntryPos);
+       }
+       return l;
+   }
+
+   ptr = a + 1;
+   BufferStr = buf;
+   qsort((void *) a, l, sizeof(WordEntryIN), compareentry);
+
+   while (ptr - a < l)
+   {
+       if (!(ptr->entry.len == res->entry.len &&
+             strncmp(&buf[ptr->entry.pos], &buf[res->entry.pos], res->entry.len) == 0))
+       {
+           if ( res->entry.haspos ) {
+               *(uint16*)(res->pos) = uniquePos( &(res->pos[1]), *(uint16*)(res->pos));
+               *outbuflen += *(uint16*)(res->pos) * sizeof(WordEntryPos);
+           }
+           *outbuflen += SHORTALIGN(res->entry.len);
+           res++;
+           memcpy(res,ptr,sizeof(WordEntryIN));
+       } else if ( ptr->entry.haspos ){
+           if ( res->entry.haspos ) {
+               int4 len=*(uint16*)(ptr->pos) + 1 + *(uint16*)(res->pos);
+               res->pos=(WordEntryPos*)repalloc( res->pos, len*sizeof(WordEntryPos));
+               memcpy( &(res->pos[ *(uint16*)(res->pos) + 1 ]), 
+                   &(ptr->pos[1]), *(uint16*)(ptr->pos) * sizeof(WordEntryPos));
+               *(uint16*)(res->pos) += *(uint16*)(ptr->pos);
+               pfree( ptr->pos );
+           } else {
+               res->entry.haspos=1;
+               res->pos = ptr->pos;
+           }
+       }
+       ptr++;
+   }
+   if ( res->entry.haspos ) {
+       *(uint16*)(res->pos) = uniquePos( &(res->pos[1]), *(uint16*)(res->pos));
+       *outbuflen += *(uint16*)(res->pos) * sizeof(WordEntryPos);
+   }
+   *outbuflen += SHORTALIGN(res->entry.len);
+
+   return res + 1 - a;
+}
+
+#define WAITWORD   1
+#define WAITENDWORD 2
+#define WAITNEXTCHAR   3
+#define WAITENDCMPLX   4
+#define WAITPOSINFO    5
+#define INPOSINFO  6
+#define WAITPOSDELIM   7
+
+#define RESIZEPRSBUF \
+do { \
+   if ( state->curpos - state->word + 1 >= state->len ) \
+   { \
+       int4 clen = state->curpos - state->word; \
+       state->len *= 2; \
+       state->word = (char*)repalloc( (void*)state->word, state->len ); \
+       state->curpos = state->word + clen; \
+   } \
+} while (0)
+
+int4
+gettoken_tsvector(TI_IN_STATE * state)
+{
+   int4        oldstate = 0;
+
+   state->curpos = state->word;
+   state->state = WAITWORD;
+   state->alen=0;
+
+   while (1)
+   {
+       if (state->state == WAITWORD)
+       {
+           if (*(state->prsbuf) == '\0')
+               return 0;
+           else if (*(state->prsbuf) == '\'')
+               state->state = WAITENDCMPLX;
+           else if (*(state->prsbuf) == '\\')
+           {
+               state->state = WAITNEXTCHAR;
+               oldstate = WAITENDWORD;
+           }
+           else if (state->oprisdelim && ISOPERATOR(*(state->prsbuf)))
+               elog(ERROR, "Syntax error");
+           else if (*(state->prsbuf) != ' ')
+           {
+               *(state->curpos) = *(state->prsbuf);
+               state->curpos++;
+               state->state = WAITENDWORD;
+           }
+       }
+       else if (state->state == WAITNEXTCHAR)
+       {
+           if (*(state->prsbuf) == '\0')
+               elog(ERROR, "There is no escaped character");
+           else
+           {
+               RESIZEPRSBUF;
+               *(state->curpos) = *(state->prsbuf);
+               state->curpos++;
+               state->state = oldstate;
+           }
+       }
+       else if (state->state == WAITENDWORD)
+       {
+           if (*(state->prsbuf) == '\\')
+           {
+               state->state = WAITNEXTCHAR;
+               oldstate = WAITENDWORD;
+           }
+           else if (*(state->prsbuf) == ' ' || *(state->prsbuf) == '\0' ||
+                    (state->oprisdelim && ISOPERATOR(*(state->prsbuf))))
+           {
+               RESIZEPRSBUF;
+               if (state->curpos == state->word)
+                   elog(ERROR, "Syntax error");
+               *(state->curpos) = '\0';
+               return 1; 
+           } else if ( *(state->prsbuf) == ':' ) {
+               if (state->curpos == state->word)
+                   elog(ERROR, "Syntax error");
+               *(state->curpos) = '\0';
+               if ( state->oprisdelim )
+                   return 1;
+               else
+                   state->state = INPOSINFO;
+           }
+           else
+           {
+               RESIZEPRSBUF;
+               *(state->curpos) = *(state->prsbuf);
+               state->curpos++;
+           }
+       }
+       else if (state->state == WAITENDCMPLX)
+       {
+           if (*(state->prsbuf) == '\'')
+           {
+               RESIZEPRSBUF;
+               *(state->curpos) = '\0';
+               if (state->curpos == state->word)
+                   elog(ERROR, "Syntax error");
+               if ( state->oprisdelim ) {
+                   state->prsbuf++;
+                   return 1;
+               } else
+                   state->state = WAITPOSINFO;
+           }
+           else if (*(state->prsbuf) == '\\')
+           {
+               state->state = WAITNEXTCHAR;
+               oldstate = WAITENDCMPLX;
+           }
+           else if (*(state->prsbuf) == '\0')
+               elog(ERROR, "Syntax error");
+           else
+           {
+               RESIZEPRSBUF;
+               *(state->curpos) = *(state->prsbuf);
+               state->curpos++;
+           }
+       } else if (state->state == WAITPOSINFO) {
+           if ( *(state->prsbuf) == ':' )
+               state->state=INPOSINFO;
+           else
+               return 1;
+       } else if (state->state == INPOSINFO) {
+           if ( isdigit(*(state->prsbuf)) ) {
+               if ( state->alen==0 ) {
+                   state->alen=4;
+                   state->pos = (WordEntryPos*)palloc( sizeof(WordEntryPos)*state->alen );
+                   *(uint16*)(state->pos)=0;
+               } else if ( *(uint16*)(state->pos) +1 >= state->alen ) {
+                   state->alen *= 2; 
+                   state->pos = (WordEntryPos*)repalloc( state->pos, sizeof(WordEntryPos)*state->alen );
+               }
+               (  *(uint16*)(state->pos) )++;
+               state->pos[ *(uint16*)(state->pos) ].pos = LIMITPOS(atoi(state->prsbuf));
+               if ( state->pos[ *(uint16*)(state->pos) ].pos == 0 )
+                   elog(ERROR,"Wrong position info");
+               state->pos[ *(uint16*)(state->pos) ].weight = 0;
+               state->state = WAITPOSDELIM;
+           } else
+               elog(ERROR,"Syntax error");
+       } else if (state->state == WAITPOSDELIM) {
+           if ( *(state->prsbuf) == ',' ) {
+               state->state = INPOSINFO;
+           } else if ( tolower(*(state->prsbuf)) == 'a' || *(state->prsbuf)=='*' ) {
+               if ( state->pos[ *(uint16*)(state->pos) ].weight )
+                   elog(ERROR,"Syntax error");
+               state->pos[ *(uint16*)(state->pos) ].weight = 3;
+           } else if ( tolower(*(state->prsbuf)) == 'b' ) {
+               if ( state->pos[ *(uint16*)(state->pos) ].weight )
+                   elog(ERROR,"Syntax error");
+               state->pos[ *(uint16*)(state->pos) ].weight = 2;
+           } else if ( tolower(*(state->prsbuf)) == 'c' ) {
+               if ( state->pos[ *(uint16*)(state->pos) ].weight )
+                   elog(ERROR,"Syntax error");
+               state->pos[ *(uint16*)(state->pos) ].weight = 1;
+           } else if ( tolower(*(state->prsbuf)) == 'd' ) {
+               if ( state->pos[ *(uint16*)(state->pos) ].weight )
+                   elog(ERROR,"Syntax error");
+               state->pos[ *(uint16*)(state->pos) ].weight = 0;
+           } else if ( isspace(*(state->prsbuf)) || *(state->prsbuf) == '\0' ) {
+               return 1;
+           } else if ( !isdigit(*(state->prsbuf)) )
+               elog(ERROR,"Syntax error");
+       } else
+           elog(ERROR, "Inner bug :(");
+       state->prsbuf++;
+   }
+
+   return 0;
+}
+
+Datum
+tsvector_in(PG_FUNCTION_ARGS)
+{
+   char       *buf = PG_GETARG_CSTRING(0);
+   TI_IN_STATE state;
+   WordEntryIN  *arr;
+   WordEntry  *inarr;
+   int4        len = 0,
+               totallen = 64;
+   tsvector       *in;
+   char       *tmpbuf,
+              *cur;
+   int4        i,
+               buflen = 256;
+
+   state.prsbuf = buf;
+   state.len = 32;
+   state.word = (char *) palloc(state.len);
+   state.oprisdelim = false;
+
+   arr = (WordEntryIN *) palloc(sizeof(WordEntryIN) * totallen);
+   cur = tmpbuf = (char *) palloc(buflen);
+   while (gettoken_tsvector(&state))
+   {
+       if (len >= totallen)
+       {
+           totallen *= 2;
+           arr = (WordEntryIN *) repalloc((void *) arr, sizeof(WordEntryIN) * totallen);
+       }
+       while ((cur - tmpbuf) + (state.curpos - state.word) >= buflen)
+       {
+           int4        dist = cur - tmpbuf;
+
+           buflen *= 2;
+           tmpbuf = (char *) repalloc((void *) tmpbuf, buflen);
+           cur = tmpbuf + dist;
+       }
+       if (state.curpos - state.word >= MAXSTRLEN)
+           elog(ERROR, "Word is too long");
+       arr[len].entry.len= state.curpos - state.word;
+       if (cur - tmpbuf > MAXSTRPOS)
+           elog(ERROR, "Too long value");
+       arr[len].entry.pos=cur - tmpbuf;
+       memcpy((void *) cur, (void *) state.word, arr[len].entry.len);
+       cur += arr[len].entry.len;
+       if ( state.alen ) {
+           arr[len].entry.haspos=1;
+           arr[len].pos = state.pos;
+       } else
+           arr[len].entry.haspos=0;
+       len++;
+   }
+   pfree(state.word);
+
+   if ( len > 0 )
+       len = uniqueentry(arr, len, tmpbuf, &buflen);
+   totallen = CALCDATASIZE(len, buflen);
+   in = (tsvector *) palloc(totallen);
+   memset(in,0,totallen);
+   in->len = totallen;
+   in->size = len;
+   cur = STRPTR(in);
+   inarr = ARRPTR(in);
+   for (i = 0; i < len; i++)
+   {
+       memcpy((void *) cur, (void *) &tmpbuf[arr[i].entry.pos], arr[i].entry.len);
+       arr[i].entry.pos=cur - STRPTR(in);
+       cur += SHORTALIGN(arr[i].entry.len);
+       if ( arr[i].entry.haspos ) {
+           memcpy( cur, arr[i].pos, (*(uint16*)arr[i].pos + 1) * sizeof(WordEntryPos));
+           cur +=  (*(uint16*)arr[i].pos + 1) * sizeof(WordEntryPos);
+           pfree( arr[i].pos ); 
+       }
+       memcpy( &(inarr[i]), &(arr[i].entry), sizeof(WordEntry) );
+   }
+   pfree(tmpbuf);
+   pfree(arr);
+   PG_RETURN_POINTER(in);
+}
+
+Datum
+tsvector_length(PG_FUNCTION_ARGS)
+{
+   tsvector       *in = (tsvector *) PG_DETOAST_DATUM(PG_GETARG_DATUM(0));
+   int4        ret = in->size;
+
+   PG_FREE_IF_COPY(in, 0);
+   PG_RETURN_INT32(ret);
+}
+
+Datum
+tsvector_out(PG_FUNCTION_ARGS)
+{
+   tsvector       *out = (tsvector *) PG_DETOAST_DATUM(PG_GETARG_DATUM(0));
+   char       *outbuf;
+   int4        i,
+               j,
+               lenbuf = 0, pp;
+   WordEntry  *ptr = ARRPTR(out);
+   char       *curin,
+              *curout;
+
+       lenbuf=out->size * 2 /* '' */ + out->size - 1 /* space */ + 2 /*\0*/;
+       for (i = 0; i < out->size; i++) {
+               lenbuf += ptr[i].len*2 /*for escape */;
+               if ( ptr[i].haspos )
+                       lenbuf += 7*POSDATALEN(out, &(ptr[i]));
+       }
+
+   curout = outbuf = (char *) palloc(lenbuf);
+   for (i = 0; i < out->size; i++)
+   {
+       curin = STRPTR(out)+ptr->pos;
+       if (i != 0)
+           *curout++ = ' ';
+       *curout++ = '\'';
+       j = ptr->len;
+       while (j--)
+       {
+           if (*curin == '\'')
+           {
+               int4        pos = curout - outbuf;
+
+               outbuf = (char *) repalloc((void *) outbuf, ++lenbuf);
+               curout = outbuf + pos;
+               *curout++ = '\\';
+           }
+           *curout++ = *curin++;
+       }
+       *curout++ = '\'';
+       if ( (pp=POSDATALEN(out,ptr)) != 0 ) {
+           WordEntryPos *wptr;
+           *curout++ = ':';
+           wptr=POSDATAPTR(out,ptr);
+           while(pp) {
+               sprintf(curout,"%d",wptr->pos);
+               curout=strchr(curout,'\0');
+               switch( wptr->weight ) {
+                   case 3:   *curout++ = 'A'; break;
+                   case 2:   *curout++ = 'B'; break;
+                   case 1:   *curout++ = 'C'; break;
+                   case 0: 
+                   default: break;
+               }
+               if ( pp>1 )     *curout++ = ',';
+               pp--; wptr++;
+           }
+       }
+       ptr++;
+   }
+   *curout='\0';
+   outbuf[lenbuf - 1] = '\0';
+   PG_FREE_IF_COPY(out, 0);
+   PG_RETURN_POINTER(outbuf);
+}
+
+static int
+compareWORD(const void *a, const void *b)
+{
+   if (((WORD *) a)->len == ((WORD *) b)->len) {
+       int res = strncmp(
+                      ((WORD *) a)->word,
+                      ((WORD *) b)->word,
+                      ((WORD *) b)->len);
+       if ( res==0 ) 
+           return ( ((WORD *) a)->pos.pos > ((WORD *) b)->pos.pos ) ? 1 : -1;
+       return res;
+   }
+   return (((WORD *) a)->len > ((WORD *) b)->len) ? 1 : -1;
+}
+
+static int
+uniqueWORD(WORD * a, int4 l)
+{
+   WORD       *ptr,
+              *res;
+   int tmppos;
+
+   if (l == 1) {
+       tmppos=LIMITPOS(a->pos.pos);
+       a->alen=2;
+       a->pos.apos=(uint16*)palloc( sizeof(uint16)*a->alen );
+       a->pos.apos[0]=1;
+       a->pos.apos[1]=tmppos;
+       return l;
+   }
+
+   res = a;
+   ptr = a + 1;
+
+   qsort((void *) a, l, sizeof(WORD), compareWORD);
+   tmppos=LIMITPOS(a->pos.pos);
+   a->alen=2;
+   a->pos.apos=(uint16*)palloc( sizeof(uint16)*a->alen );
+   a->pos.apos[0]=1;
+   a->pos.apos[1]=tmppos;
+
+   while (ptr - a < l)
+   {
+       if (!(ptr->len == res->len &&
+             strncmp(ptr->word, res->word, res->len) == 0))
+       {
+           res++;
+           res->len = ptr->len;
+           res->word = ptr->word;
+           tmppos=LIMITPOS(ptr->pos.pos);
+           res->alen=2;
+           res->pos.apos=(uint16*)palloc( sizeof(uint16)*res->alen );
+           res->pos.apos[0]=1;
+           res->pos.apos[1]=tmppos;
+       } else {
+           pfree(ptr->word);
+           if ( res->pos.apos[0] < MAXNUMPOS-1 && res->pos.apos[ res->pos.apos[0] ] != MAXENTRYPOS-1 ) {
+               if ( res->pos.apos[0]+1 >= res->alen ) {
+                   res->alen*=2;
+                   res->pos.apos=(uint16*)repalloc( res->pos.apos, sizeof(uint16)*res->alen );
+               }
+               res->pos.apos[ res->pos.apos[0]+1 ] = LIMITPOS(ptr->pos.pos);
+               res->pos.apos[0]++; 
+           }
+       }
+       ptr++;
+   }
+
+   return res + 1 - a;
+}
+
+/*
+ * make value of tsvector
+ */
+static tsvector *
+makevalue(PRSTEXT * prs)
+{
+   int4        i,j,
+               lenstr = 0,
+               totallen;
+   tsvector       *in;
+   WordEntry  *ptr;
+   char       *str,
+              *cur;
+
+   prs->curwords = uniqueWORD(prs->words, prs->curwords);
+   for (i = 0; i < prs->curwords; i++) {
+       lenstr += SHORTALIGN(prs->words[i].len);
+
+       if ( prs->words[i].alen )
+           lenstr += sizeof(uint16) + prs->words[i].pos.apos[0] * sizeof(WordEntryPos);
+   }
+
+   totallen = CALCDATASIZE(prs->curwords, lenstr);
+   in = (tsvector *) palloc(totallen);
+   memset(in,0,totallen);  
+   in->len = totallen;
+   in->size = prs->curwords;
+
+   ptr = ARRPTR(in);
+   cur = str = STRPTR(in);
+   for (i = 0; i < prs->curwords; i++)
+   {
+       ptr->len = prs->words[i].len;
+       if (cur - str > MAXSTRPOS)
+           elog(ERROR, "Value is too big");
+       ptr->pos= cur - str;
+       memcpy((void *) cur, (void *) prs->words[i].word, prs->words[i].len);
+       pfree(prs->words[i].word);
+       cur += SHORTALIGN(prs->words[i].len);
+       if ( prs->words[i].alen ) {
+           WordEntryPos *wptr;
+           
+           ptr->haspos=1;
+           *(uint16*)cur = prs->words[i].pos.apos[0];
+           wptr=POSDATAPTR(in,ptr);
+           for(j=0;j<*(uint16*)cur;j++) {
+               wptr[j].weight=0;
+               wptr[j].pos=prs->words[i].pos.apos[j+1];
+           }
+           cur += sizeof(uint16) + prs->words[i].pos.apos[0] * sizeof(WordEntryPos);
+           pfree(prs->words[i].pos.apos);
+       } else
+           ptr->haspos=0;
+       ptr++;
+   }
+   pfree(prs->words);
+   return in;
+}
+
+
+Datum
+to_tsvector(PG_FUNCTION_ARGS)
+{
+   text       *in = PG_GETARG_TEXT_P(1);
+   PRSTEXT     prs;
+   tsvector       *out = NULL;
+   TSCfgInfo *cfg=findcfg(PG_GETARG_INT32(0)); 
+
+   prs.lenwords = 32;
+   prs.curwords = 0;
+   prs.pos = 0;
+   prs.words = (WORD *) palloc(sizeof(WORD) * prs.lenwords);
+   
+   parsetext_v2(cfg, &prs, VARDATA(in), VARSIZE(in) - VARHDRSZ);
+   PG_FREE_IF_COPY(in, 1);
+
+   if (prs.curwords)
+       out = makevalue(&prs);
+   else {
+       pfree(prs.words);
+       out = palloc(CALCDATASIZE(0,0));
+       out->len = CALCDATASIZE(0,0);
+       out->size = 0;
+   } 
+   PG_RETURN_POINTER(out);
+}
+
+Datum
+to_tsvector_name(PG_FUNCTION_ARGS) {
+   text       *cfg=PG_GETARG_TEXT_P(0);
+   Datum res = DirectFunctionCall3(
+       to_tsvector,
+       Int32GetDatum( name2id_cfg( cfg ) ),
+       PG_GETARG_DATUM(1),
+       (Datum)0
+   );
+   PG_FREE_IF_COPY(cfg,0);
+   PG_RETURN_DATUM(res);   
+}
+
+Datum
+to_tsvector_current(PG_FUNCTION_ARGS) {
+   Datum res = DirectFunctionCall3(
+       to_tsvector,
+       Int32GetDatum( get_currcfg() ),
+       PG_GETARG_DATUM(0),
+       (Datum)0
+   );
+   PG_RETURN_DATUM(res);   
+}
+
+static Oid
+findFunc(char *fname) {
+   FuncCandidateList clist,ptr;
+   Oid funcid = InvalidOid;
+   List *names=makeList1(makeString(fname));
+
+   ptr = clist = FuncnameGetCandidates(names, 1);
+   freeList(names);
+
+   if ( !ptr )
+       return funcid;
+
+   while(ptr) {
+       if ( ptr->args[0] == TEXTOID && funcid == InvalidOid )
+           funcid=ptr->oid;
+       clist=ptr->next;
+       pfree(ptr);
+       ptr=clist;
+   }
+
+   return funcid;
+}
+
+/*
+ * Trigger
+ */
+Datum
+tsearch2(PG_FUNCTION_ARGS)
+{
+   TriggerData *trigdata;
+   Trigger    *trigger;
+   Relation    rel;
+   HeapTuple   rettuple = NULL;
+   TSCfgInfo *cfg=findcfg(get_currcfg()); 
+   int         numidxattr,
+               i;
+   PRSTEXT     prs;
+   Datum       datum = (Datum) 0;
+   Oid     funcoid = InvalidOid;
+
+   if (!CALLED_AS_TRIGGER(fcinfo))
+       elog(ERROR, "TSearch: Not fired by trigger manager");
+
+   trigdata = (TriggerData *) fcinfo->context;
+   if (TRIGGER_FIRED_FOR_STATEMENT(trigdata->tg_event))
+       elog(ERROR, "TSearch: Can't process STATEMENT events");
+   if (TRIGGER_FIRED_AFTER(trigdata->tg_event))
+       elog(ERROR, "TSearch: Must be fired BEFORE event");
+
+   if (TRIGGER_FIRED_BY_INSERT(trigdata->tg_event))
+       rettuple = trigdata->tg_trigtuple;
+   else if (TRIGGER_FIRED_BY_UPDATE(trigdata->tg_event))
+       rettuple = trigdata->tg_newtuple;
+   else
+       elog(ERROR, "TSearch: Unknown event");
+
+   trigger = trigdata->tg_trigger;
+   rel = trigdata->tg_relation;
+
+   if (trigger->tgnargs < 2)
+       elog(ERROR, "TSearch: format tsearch2(tsvector_field, text_field1,...)");
+
+   numidxattr = SPI_fnumber(rel->rd_att, trigger->tgargs[0]);
+   if (numidxattr == SPI_ERROR_NOATTRIBUTE)
+       elog(ERROR, "TSearch: Can not find tsvector_field");
+
+   prs.lenwords = 32;
+   prs.curwords = 0;
+   prs.pos = 0;
+   prs.words = (WORD *) palloc(sizeof(WORD) * prs.lenwords);
+
+   /* find all words in indexable column */
+   for (i = 1; i < trigger->tgnargs; i++)
+   {
+       int         numattr;
+       Oid         oidtype;
+       Datum       txt_toasted;
+       bool        isnull;
+       text       *txt;
+
+       numattr = SPI_fnumber(rel->rd_att, trigger->tgargs[i]);
+       if (numattr == SPI_ERROR_NOATTRIBUTE)
+       {
+           funcoid=findFunc(trigger->tgargs[i]);
+           if ( funcoid==InvalidOid )
+               elog(ERROR,"TSearch: can't find function or field '%s'",trigger->tgargs[i]);
+           continue;
+       }
+       oidtype = SPI_gettypeid(rel->rd_att, numattr);
+       /* We assume char() and varchar() are binary-equivalent to text */
+       if (!(oidtype == TEXTOID ||
+             oidtype == VARCHAROID ||
+             oidtype == BPCHAROID))
+       {
+           elog(WARNING, "TSearch: '%s' is not of character type",
+                trigger->tgargs[i]);
+           continue;
+       }
+       txt_toasted = SPI_getbinval(rettuple, rel->rd_att, numattr, &isnull);
+       if (isnull)
+           continue;
+
+       if ( funcoid!=InvalidOid ) {
+           text *txttmp = (text *) DatumGetPointer( OidFunctionCall1(
+               funcoid,
+               PointerGetDatum(txt_toasted)
+           ));
+           txt = (text *) DatumGetPointer(PG_DETOAST_DATUM(PointerGetDatum(txttmp)));
+           if ( txt == txttmp )
+               txt_toasted = PointerGetDatum(txt);
+       } else
+            txt = (text *) DatumGetPointer(PG_DETOAST_DATUM(PointerGetDatum(txt_toasted)));
+
+       parsetext_v2(cfg, &prs, VARDATA(txt), VARSIZE(txt) - VARHDRSZ);
+       if (txt != (text*)DatumGetPointer(txt_toasted) )
+           pfree(txt);
+   }
+
+   /* make tsvector value */
+   if (prs.curwords)
+   {
+       datum = PointerGetDatum(makevalue(&prs));
+       rettuple = SPI_modifytuple(rel, rettuple, 1, &numidxattr,
+                                  &datum, NULL);
+       pfree(DatumGetPointer(datum));
+   }
+   else
+   {
+       tsvector *out = palloc(CALCDATASIZE(0,0));
+       out->len = CALCDATASIZE(0,0);
+       out->size = 0;
+       datum = PointerGetDatum(out);
+       pfree(prs.words);
+       rettuple = SPI_modifytuple(rel, rettuple, 1, &numidxattr,
+                                  &datum, NULL);
+   }
+
+   if (rettuple == NULL)
+       elog(ERROR, "TSearch: %d returned by SPI_modifytuple", SPI_result);
+
+   return PointerGetDatum(rettuple);
+}


diff --git a/contrib/tsearch2/tsvector.h b/contrib/tsearch2/tsvector.h

new file mode 100644 (file)

index 0000000..31e6a4b


--- /dev/null
+++ b/contrib/tsearch2/tsvector.h
@@ -0,0 +1,71 @@
+#ifndef __TXTIDX_H__
+#define __TXTIDX_H__
+
+/*
+#define TXTIDX_DEBUG
+*/
+
+#include "postgres.h"
+
+#include "access/gist.h"
+#include "access/itup.h"
+#include "utils/elog.h"
+#include "utils/palloc.h"
+#include "utils/builtins.h"
+#include "storage/bufpage.h"
+
+typedef struct {
+   uint32
+       haspos:1,
+       len:11, /* MAX 2Kb */
+       pos:20; /* MAX 1Mb */
+}  WordEntry;
+#define MAXSTRLEN ( 1<<11 )
+#define MAXSTRPOS ( 1<<20 )
+
+typedef struct {
+   uint16
+       weight:2,
+       pos:14;
+} WordEntryPos;
+#define MAXENTRYPOS    (1<<14)
+#define MAXNUMPOS  256
+#define LIMITPOS(x)    ( ( (x) >= MAXENTRYPOS ) ? (MAXENTRYPOS-1) : (x) )
+
+typedef struct
+{
+   int4        len;
+   int4        size;
+   char        data[1];
+}  tsvector;
+
+#define DATAHDRSIZE (sizeof(int4)*2)
+#define CALCDATASIZE(x, lenstr) ( x * sizeof(WordEntry) + DATAHDRSIZE + lenstr )
+#define ARRPTR(x)  ( (WordEntry*) ( (char*)x + DATAHDRSIZE ) )
+#define STRPTR(x)  ( (char*)x + DATAHDRSIZE + ( sizeof(WordEntry) * ((tsvector*)x)->size ) )
+#define STRSIZE(x) ( ((tsvector*)x)->len - DATAHDRSIZE - ( sizeof(WordEntry) * ((tsvector*)x)->size ) )
+#define _POSDATAPTR(x,e)   (STRPTR(x)+((WordEntry*)(e))->pos+SHORTALIGN(((WordEntry*)(e))->len))
+#define POSDATALEN(x,e) ( ( ((WordEntry*)(e))->haspos ) ? (*(uint16*)_POSDATAPTR(x,e)) : 0 ) 
+#define POSDATAPTR(x,e)    ( (WordEntryPos*)( _POSDATAPTR(x,e)+sizeof(uint16) ) )
+
+
+typedef struct {
+   WordEntry   entry;
+   WordEntryPos    *pos;
+}  WordEntryIN;
+
+typedef struct
+{
+   char       *prsbuf;
+   char       *word;
+   char       *curpos;
+   int4        len;
+   int4        state;
+   int4        alen;
+   WordEntryPos    *pos;
+   bool        oprisdelim;
+}  TI_IN_STATE;
+
+int4       gettoken_tsvector(TI_IN_STATE * state);
+
+#endif


diff --git a/contrib/tsearch2/tsvector_op.c b/contrib/tsearch2/tsvector_op.c

new file mode 100644 (file)

index 0000000..3f38014


--- /dev/null
+++ b/contrib/tsearch2/tsvector_op.c
@@ -0,0 +1,264 @@
+/*
+ * Operations for tsvector type
+ * Teodor Sigaev 
+ */
+#include "postgres.h"
+
+#include "access/gist.h"
+#include "access/itup.h"
+#include "utils/elog.h"
+#include "utils/palloc.h"
+#include "utils/builtins.h"
+#include "storage/bufpage.h"
+#include "executor/spi.h"
+#include "commands/trigger.h"
+#include "nodes/pg_list.h"
+#include "catalog/namespace.h"
+
+#include "utils/pg_locale.h"
+
+#include              /* tolower */
+#include "tsvector.h"
+#include "query.h"
+#include "ts_cfg.h"
+#include "common.h"
+
+PG_FUNCTION_INFO_V1(strip);
+Datum      strip(PG_FUNCTION_ARGS);
+
+PG_FUNCTION_INFO_V1(setweight);
+Datum      setweight(PG_FUNCTION_ARGS);
+
+PG_FUNCTION_INFO_V1(concat);
+Datum      concat(PG_FUNCTION_ARGS);
+
+Datum
+strip(PG_FUNCTION_ARGS)
+{
+   tsvector       *in = (tsvector *) PG_DETOAST_DATUM(PG_GETARG_DATUM(0));
+   tsvector    *out;
+   int i,len=0;
+   WordEntry *arrin=ARRPTR(in), *arrout;
+   char *cur;
+
+   for(i=0;isize;i++) 
+       len += SHORTALIGN( arrin[i].len );
+
+   len = CALCDATASIZE(in->size, len);
+   out=(tsvector*)palloc(len);
+   memset(out,0,len);
+   out->len=len;
+   out->size=in->size;
+   arrout=ARRPTR(out);
+   cur=STRPTR(out);
+   for(i=0;isize;i++) {
+       memcpy(cur, STRPTR(in)+arrin[i].pos, arrin[i].len);
+       arrout[i].haspos = 0;
+       arrout[i].len = arrin[i].len;
+       arrout[i].pos = cur - STRPTR(out);
+       cur += SHORTALIGN( arrout[i].len );
+   }
+       
+   PG_FREE_IF_COPY(in, 0);
+   PG_RETURN_POINTER(out);
+}
+
+Datum
+setweight(PG_FUNCTION_ARGS)
+{
+   tsvector       *in = (tsvector *) PG_DETOAST_DATUM(PG_GETARG_DATUM(0));
+   char       cw = PG_GETARG_CHAR(1);
+   tsvector    *out;
+   int i,j;
+   WordEntry *entry;
+   WordEntryPos *p;
+   int w=0;
+
+   switch(tolower(cw)) {
+       case 'a': w=3; break;
+       case 'b': w=2; break;
+       case 'c': w=1; break;
+       case 'd': w=0; break;
+       default: elog(ERROR,"Unknown weight");
+   }
+
+   out=(tsvector*)palloc(in->len);
+   memcpy(out,in,in->len);
+   entry=ARRPTR(out);
+   i=out->size;    
+   while(i--) {
+       if ( (j=POSDATALEN(out,entry)) != 0 ) {
+           p=POSDATAPTR(out,entry);
+           while(j--) {
+               p->weight=w;
+               p++;
+           }
+       }
+       entry++;
+   }
+       
+   PG_FREE_IF_COPY(in, 0);
+   PG_RETURN_POINTER(out);
+}
+
+static int
+compareEntry(char *ptra, WordEntry* a, char *ptrb, WordEntry* b)
+{
+        if ( a->len == b->len)
+        {
+                return strncmp(
+                                           ptra + a->pos,
+                                           ptrb + b->pos,
+                                           a->len);
+        }
+        return ( a->len > b->len ) ? 1 : -1;
+}
+
+static int4
+add_pos(tsvector *src, WordEntry *srcptr, tsvector *dest, WordEntry *destptr, int4 maxpos ) {
+   uint16 *clen = (uint16*)_POSDATAPTR(dest,destptr);
+   int i;
+   uint16 slen = POSDATALEN(src, srcptr), startlen;
+   WordEntryPos *spos=POSDATAPTR(src, srcptr), *dpos=POSDATAPTR(dest,destptr);
+
+   if ( ! destptr->haspos ) 
+       *clen=0;
+
+   startlen = *clen;
+   for(i=0; i
+       dpos[ *clen ].weight = spos[i].weight; 
+       dpos[ *clen ].pos    = LIMITPOS(spos[i].pos + maxpos);
+       (*clen)++;
+   }
+
+   if ( *clen != startlen )
+       destptr->haspos=1; 
+   return  *clen - startlen;
+}
+
+
+Datum
+concat(PG_FUNCTION_ARGS) {
+   tsvector       *in1 = (tsvector *) PG_DETOAST_DATUM(PG_GETARG_DATUM(0));
+   tsvector       *in2 = (tsvector *) PG_DETOAST_DATUM(PG_GETARG_DATUM(1));
+   tsvector       *out;
+   WordEntry *ptr;
+   WordEntry *ptr1,*ptr2;
+   WordEntryPos *p;
+   int maxpos=0,i,j,i1,i2;
+   char *cur;
+   char *data,*data1,*data2;
+
+   ptr=ARRPTR(in1);
+   i=in1->size;
+   while(i--) {
+       if ( (j=POSDATALEN(in1,ptr)) != 0 ) {
+           p=POSDATAPTR(in1,ptr);
+           while(j--) {
+               if ( p->pos > maxpos ) 
+                   maxpos = p->pos;
+               p++;
+           }
+       }
+       ptr++;
+   }
+   
+   ptr1=ARRPTR(in1); ptr2=ARRPTR(in2);
+   data1=STRPTR(in1); data2=STRPTR(in2);
+   i1=in1->size;   i2=in2->size;
+   out=(tsvector*)palloc( in1->len + in2->len );
+   memset(out,0,in1->len + in2->len);
+   out->len = in1->len + in2->len;
+   out->size = in1->size + in2->size;
+   data=cur=STRPTR(out);
+   ptr=ARRPTR(out);
+   while( i1 && i2 ) {
+       int cmp=compareEntry(data1,ptr1,data2,ptr2);
+       if ( cmp < 0 ) { /* in1 first */
+           ptr->haspos = ptr1->haspos;
+           ptr->len = ptr1->len;
+           memcpy( cur, data1 + ptr1->pos, ptr1->len );
+           ptr->pos = cur - data; 
+           cur+=SHORTALIGN(ptr1->len);
+           if ( ptr->haspos ) {
+               memcpy(cur, _POSDATAPTR(in1, ptr1), POSDATALEN(in1, ptr1)*sizeof(WordEntryPos) + sizeof(uint16));
+               cur+=POSDATALEN(in1, ptr1)*sizeof(WordEntryPos) + sizeof(uint16);
+           }
+           ptr++; ptr1++; i1--;
+       } else if ( cmp>0 ) { /* in2 first */ 
+           ptr->haspos = ptr2->haspos;
+           ptr->len = ptr2->len;
+           memcpy( cur, data2 + ptr2->pos, ptr2->len );
+           ptr->pos = cur - data; 
+           cur+=SHORTALIGN(ptr2->len);
+           if ( ptr->haspos ) {
+               int addlen = add_pos(in2, ptr2, out, ptr, maxpos );
+               if ( addlen == 0 )
+                   ptr->haspos=0;
+               else
+                   cur += addlen*sizeof(WordEntryPos) + sizeof(uint16); 
+           }
+           ptr++; ptr2++; i2--;
+       } else {
+           ptr->haspos = ptr1->haspos | ptr2->haspos;
+           ptr->len = ptr1->len;
+           memcpy( cur, data1 + ptr1->pos, ptr1->len );
+           ptr->pos = cur - data; 
+           cur+=SHORTALIGN(ptr1->len);
+           if ( ptr->haspos ) {
+               if ( ptr1->haspos ) {
+                   memcpy(cur, _POSDATAPTR(in1, ptr1), POSDATALEN(in1, ptr1)*sizeof(WordEntryPos) + sizeof(uint16));
+                   cur+=POSDATALEN(in1, ptr1)*sizeof(WordEntryPos) + sizeof(uint16);
+                   if ( ptr2->haspos )
+                       cur += add_pos(in2, ptr2, out, ptr, maxpos )*sizeof(WordEntryPos);
+               } else if ( ptr2->haspos ) {
+                   int addlen = add_pos(in2, ptr2, out, ptr, maxpos );
+                   if ( addlen == 0 )
+                       ptr->haspos=0;
+                   else
+                       cur += addlen*sizeof(WordEntryPos) + sizeof(uint16); 
+               }
+           }
+           ptr++; ptr1++; ptr2++; i1--; i2--;
+       }
+   }
+
+   while(i1) {
+       ptr->haspos = ptr1->haspos;
+       ptr->len = ptr1->len;
+       memcpy( cur, data1 + ptr1->pos, ptr1->len );
+       ptr->pos = cur - data; 
+       cur+=SHORTALIGN(ptr1->len);
+       if ( ptr->haspos ) {
+           memcpy(cur, _POSDATAPTR(in1, ptr1), POSDATALEN(in1, ptr1)*sizeof(WordEntryPos) + sizeof(uint16));
+           cur+=POSDATALEN(in1, ptr1)*sizeof(WordEntryPos) + sizeof(uint16);
+       }
+       ptr++; ptr1++; i1--;
+   }
+
+   while(i2) {
+       ptr->haspos = ptr2->haspos;
+       ptr->len = ptr2->len;
+       memcpy( cur, data2 + ptr2->pos, ptr2->len );
+       ptr->pos = cur - data; 
+       cur+=SHORTALIGN(ptr2->len);
+       if ( ptr->haspos ) {
+           int addlen = add_pos(in2, ptr2, out, ptr, maxpos );
+           if ( addlen == 0 )
+               ptr->haspos=0;
+           else
+               cur += addlen*sizeof(WordEntryPos) + sizeof(uint16); 
+       }
+       ptr++; ptr2++; i2--;
+   }
+   
+   out->size=ptr-ARRPTR(out);
+   out->len = CALCDATASIZE( out->size, cur-data );
+   if ( data != STRPTR(out) )
+       memmove( STRPTR(out), data, cur-data );
+
+   PG_FREE_IF_COPY(in1, 0);
+   PG_FREE_IF_COPY(in2, 1);
+   PG_RETURN_POINTER(out);
+}
+


diff --git a/contrib/tsearch2/untsearch.sql.in b/contrib/tsearch2/untsearch.sql.in

new file mode 100644 (file)

index 0000000..a4fe145


--- /dev/null
+++ b/contrib/tsearch2/untsearch.sql.in
@@ -0,0 +1,62 @@
+BEGIN;
+
+--Be careful !!!
+--script drops all indices, triggers and columns with types defined
+--in tsearch2.sql
+
+
+DROP OPERATOR CLASS gist_tsvector_ops USING gist CASCADE;
+
+
+DROP OPERATOR || (tsvector, tsvector);
+DROP OPERATOR @@ (tsvector, tsquery);
+DROP OPERATOR @@ (tsquery, tsvector);
+
+DROP AGGREGATE stat(tsvector);
+
+DROP TABLE pg_ts_dict;
+DROP TABLE pg_ts_parser;
+DROP TABLE pg_ts_cfg;
+DROP TABLE pg_ts_cfgmap;
+
+DROP TYPE tokentype CASCADE;
+DROP TYPE tokenout CASCADE;
+DROP TYPE tsvector CASCADE;
+DROP TYPE tsquery CASCADE;
+DROP TYPE gtsvector CASCADE;
+DROP TYPE tsstat CASCADE;
+DROP TYPE statinfo CASCADE;
+
+DROP FUNCTION lexize(oid, text) ;
+DROP FUNCTION lexize(text, text);
+DROP FUNCTION lexize(text);
+DROP FUNCTION set_curdict(int);
+DROP FUNCTION set_curdict(text);
+DROP FUNCTION dex_init(text);
+DROP FUNCTION dex_lexize(internal,internal,int4);
+DROP FUNCTION snb_en_init(text);
+DROP FUNCTION snb_lexize(internal,internal,int4);
+DROP FUNCTION snb_ru_init(text);
+DROP FUNCTION spell_init(text);
+DROP FUNCTION spell_lexize(internal,internal,int4);
+DROP FUNCTION syn_init(text);
+DROP FUNCTION syn_lexize(internal,internal,int4);
+DROP FUNCTION set_curprs(int);
+DROP FUNCTION set_curprs(text);
+DROP FUNCTION prsd_start(internal,int4);
+DROP FUNCTION prsd_getlexeme(internal,internal,internal);
+DROP FUNCTION prsd_end(internal);
+DROP FUNCTION prsd_lextype(internal);
+DROP FUNCTION prsd_headline(internal,internal,internal);
+DROP FUNCTION set_curcfg(int);
+DROP FUNCTION set_curcfg(text);
+DROP FUNCTION show_curcfg();
+DROP FUNCTION gtsvector_compress(internal);
+DROP FUNCTION gtsvector_decompress(internal);
+DROP FUNCTION gtsvector_penalty(internal,internal,internal);
+DROP FUNCTION gtsvector_picksplit(internal, internal);
+DROP FUNCTION gtsvector_union(bytea, internal);
+DROP FUNCTION reset_tsearch();
+DROP FUNCTION tsearch2() CASCADE;
+
+END;


diff --git a/contrib/tsearch2/wordparser/deflex.c b/contrib/tsearch2/wordparser/deflex.c

new file mode 100644 (file)

index 0000000..ea596c5


--- /dev/null
+++ b/contrib/tsearch2/wordparser/deflex.c
@@ -0,0 +1,56 @@
+#include "deflex.h"
+
+const char *lex_descr[]={
+   "",
+   "Latin word",
+   "Non-latin word",
+   "Word",
+   "Email",
+   "URL",
+   "Host",
+   "Scientific notation",
+   "VERSION",
+   "Part of hyphenated word",
+   "Non-latin part of hyphenated word",
+   "Latin part of hyphenated word",
+   "Space symbols",
+   "HTML Tag",
+   "HTTP head",
+   "Hyphenated word",
+   "Latin hyphenated word",
+   "Non-latin hyphenated word",
+   "URI",
+   "File or path name",
+   "Decimal notation",
+   "Signed integer",
+   "Unsigned integer",
+   "HTML Entity"
+};
+
+const char *tok_alias[]={
+   "",
+   "lword",
+   "nlword",
+   "word",
+   "email",
+   "url",
+   "host",
+   "sfloat",
+   "version",
+   "part_hword",
+   "nlpart_hword",
+   "lpart_hword",
+   "blank",
+   "tag",
+   "http",
+   "hword",
+   "lhword",
+   "nlhword",
+   "uri",
+   "file",
+   "float",
+   "int",
+   "uint",
+   "entity"
+};
+


diff --git a/contrib/tsearch2/wordparser/deflex.h b/contrib/tsearch2/wordparser/deflex.h

new file mode 100644 (file)

index 0000000..651d1f9


--- /dev/null
+++ b/contrib/tsearch2/wordparser/deflex.h
@@ -0,0 +1,34 @@
+#ifndef __DEFLEX_H__
+#define __DEFLEX_H__
+
+/* rememder !!!! */
+#define LASTNUM        23
+
+#define LATWORD        1
+#define CYRWORD        2
+#define UWORD      3
+#define EMAIL      4
+#define FURL       5
+#define HOST       6
+#define SCIENTIFIC 7
+#define VERSIONNUMBER  8
+#define PARTHYPHENWORD 9
+#define CYRPARTHYPHENWORD  10
+#define LATPARTHYPHENWORD  11
+#define SPACE      12
+#define TAG            13
+#define HTTP       14
+#define HYPHENWORD 15
+#define LATHYPHENWORD  16
+#define CYRHYPHENWORD  17
+#define URI        18
+#define FILEPATH   19
+#define DECIMAL        20
+#define SIGNEDINT  21
+#define UNSIGNEDINT 22
+#define HTMLENTITY 23
+
+extern const char *lex_descr[];
+extern const char *tok_alias[];
+
+#endif


diff --git a/contrib/tsearch2/wordparser/parser.h b/contrib/tsearch2/wordparser/parser.h

new file mode 100644 (file)

index 0000000..55cf005


--- /dev/null
+++ b/contrib/tsearch2/wordparser/parser.h
@@ -0,0 +1,11 @@
+#ifndef __PARSER_H__
+#define __PARSER_H__
+
+char      *token;
+int            tokenlen;
+int            tsearch2_yylex(void);
+void       start_parse_str(char *, int);
+void       start_parse_fh(FILE *, int);
+void       end_parse(void);
+
+#endif


diff --git a/contrib/tsearch2/wordparser/parser.l b/contrib/tsearch2/wordparser/parser.l

new file mode 100644 (file)

index 0000000..49824f5


--- /dev/null
+++ b/contrib/tsearch2/wordparser/parser.l
@@ -0,0 +1,346 @@
+%{
+#include "postgres.h"
+
+#include "deflex.h"
+#include "parser.h"
+#include "common.h"
+
+/* Avoid exit() on fatal scanner errors */
+#define fprintf(file, fmt, msg)  ts_error(ERROR, fmt, msg)
+
+/* postgres allocation function */
+#define free    pfree
+#define malloc  palloc
+#define realloc repalloc
+
+#ifdef strdup
+#undef strdup
+#endif
+#define strdup  pstrdup
+
+char *token = NULL;  /* pointer to token */
+char *s     = NULL;  /* to return WHOLE hyphenated-word */
+
+YY_BUFFER_STATE buf = NULL; /* buffer to parse; it need for parse from string */
+
+int lrlimit = -1;  /* for limiting read from filehandle ( -1 - unlimited read ) */
+int bytestoread = 0;   /* for limiting read from filehandle */
+
+/* redefine macro for read limited length */
+#define YY_INPUT(buf,result,max_size) \
+   if ( yy_current_buffer->yy_is_interactive ) { \
+                int c = '*', n; \
+                for ( n = 0; n < max_size && \
+                             (c = getc( tsearch2_yyin )) != EOF && c != '\n'; ++n ) \
+                        buf[n] = (char) c; \
+                if ( c == '\n' ) \
+                        buf[n++] = (char) c; \
+                if ( c == EOF && ferror( tsearch2_yyin ) ) \
+                        YY_FATAL_ERROR( "input in flex scanner failed" ); \
+                result = n; \
+        }  else { \
+       if ( lrlimit == 0 ) \
+           result=YY_NULL; \
+       else { \
+           if ( lrlimit>0 ) { \
+               bytestoread = ( lrlimit > max_size ) ? max_size : lrlimit; \
+               lrlimit -= bytestoread; \
+           } else \
+               bytestoread = max_size; \
+               if ( ((result = fread( buf, 1, bytestoread, tsearch2_yyin )) == 0) \
+                       && ferror( tsearch2_yyin ) ) \
+                       YY_FATAL_ERROR( "input in flex scanner failed" ); \
+       } \
+   }
+
+%}
+
+%option 8bit
+%option never-interactive
+%option nounput
+%option noyywrap
+
+/* parser's state for parsing hyphenated-word */
+%x DELIM  
+/* parser's state for parsing URL*/
+%x URL  
+%x SERVER  
+
+/* parser's state for parsing TAGS */
+%x INTAG
+%x QINTAG
+%x INCOMMENT
+%x INSCRIPT
+
+/* cyrillic koi8 char */
+CYRALNUM   [0-9\200-\377]
+CYRALPHA   [\200-\377]
+ALPHA      [a-zA-Z\200-\377]
+ALNUM      [0-9a-zA-Z\200-\377]
+
+
+HOSTNAME   ([-_[:alnum:]]+\.)+[[:alpha:]]+
+URI        [-_[:alnum:]/%,\.;=&?#]+
+
+%%
+
+"<"[Ss][Cc][Rr][Ii][Pp][Tt] { BEGIN INSCRIPT; }
+
+"" {
+   BEGIN INITIAL; 
+   *tsearch2_yytext=' '; *(tsearch2_yytext+1) = '\0'; 
+   token = tsearch2_yytext;
+   tokenlen = tsearch2_yyleng;
+   return SPACE;
+}
+
+""   { 
+   BEGIN INITIAL;
+   *tsearch2_yytext=' '; *(tsearch2_yytext+1) = '\0'; 
+   token = tsearch2_yytext;
+   tokenlen = tsearch2_yyleng;
+   return SPACE;
+}
+
+
+"<"[\![:alpha:]]   { BEGIN INTAG; }
+
+"
+
+"\""    { BEGIN QINTAG; }
+
+"\\\"" ;
+
+"\""   { BEGIN INTAG; }
+
+">" { 
+   BEGIN INITIAL;
+   token = tsearch2_yytext;
+   *tsearch2_yytext=' '; 
+   token = tsearch2_yytext;
+   tokenlen = 1;
+   return TAG;
+}
+
+.|\n  ;
+
+\&(quot|amp|nbsp|lt|gt)\;   {
+   token = tsearch2_yytext;
+   tokenlen = tsearch2_yyleng;
+   return HTMLENTITY;
+}
+
+\&\#[0-9][0-9]?[0-9]?\; {
+   token = tsearch2_yytext;
+   tokenlen = tsearch2_yyleng;
+   return HTMLENTITY;
+}
+ 
+[-_\.[:alnum:]]+@{HOSTNAME}  /* Emails */ { 
+   token = tsearch2_yytext; 
+   tokenlen = tsearch2_yyleng;
+   return EMAIL; 
+}
+
+[+-]?[0-9]+(\.[0-9]+)?[eEdD][+-]?[0-9]+  /* float */   { 
+   token = tsearch2_yytext; 
+   tokenlen = tsearch2_yyleng;
+   return SCIENTIFIC; 
+}
+
+[0-9]+\.[0-9]+\.[0-9\.]*[0-9] {
+   token = tsearch2_yytext;
+   tokenlen = tsearch2_yyleng;
+   return VERSIONNUMBER;
+}
+
+[+-]?[0-9]+\.[0-9]+ {
+   token = tsearch2_yytext;
+   tokenlen = tsearch2_yyleng;
+   return DECIMAL;
+}
+
+[+-][0-9]+ { 
+   token = tsearch2_yytext; 
+   tokenlen = tsearch2_yyleng;
+   return SIGNEDINT; 
+}
+
+[0-9]+ { 
+   token = tsearch2_yytext; 
+   tokenlen = tsearch2_yyleng;
+   return UNSIGNEDINT; 
+}
+
+http"://"        { 
+   BEGIN URL; 
+   token = tsearch2_yytext;
+   tokenlen = tsearch2_yyleng;
+   return HTTP;
+}
+
+ftp"://"        { 
+   BEGIN URL; 
+   token = tsearch2_yytext;
+   tokenlen = tsearch2_yyleng;
+   return HTTP;
+}
+
+{HOSTNAME}[/:]{URI} { 
+   BEGIN SERVER;
+   if (s) { free(s); s=NULL; } 
+   s = strdup( tsearch2_yytext ); 
+   tokenlen = tsearch2_yyleng;
+   yyless( 0 ); 
+   token = s;
+   return FURL;
+}
+
+{HOSTNAME} {
+   token = tsearch2_yytext; 
+   tokenlen = tsearch2_yyleng;
+   return HOST;
+}
+
+[/:]{URI}  {
+   token = tsearch2_yytext;
+   tokenlen = tsearch2_yyleng;
+   return URI;
+}
+
+[[:alnum:]\./_-]+"/"[[:alnum:]\./_-]+ {
+   token = tsearch2_yytext;
+   tokenlen = tsearch2_yyleng;
+   return FILEPATH;
+}
+
+({CYRALPHA}+-)+{CYRALPHA}+ /* composite-word */    {
+   BEGIN DELIM;
+   if (s) { free(s); s=NULL; } 
+   s = strdup( tsearch2_yytext );
+   tokenlen = tsearch2_yyleng;
+   yyless( 0 );
+   token = s;
+   return CYRHYPHENWORD;
+}
+
+([[:alpha:]]+-)+[[:alpha:]]+ /* composite-word */  {
+    BEGIN DELIM;
+   if (s) { free(s); s=NULL; } 
+   s = strdup( tsearch2_yytext );
+   tokenlen = tsearch2_yyleng;
+   yyless( 0 );
+   token = s;
+   return LATHYPHENWORD;
+}
+
+({ALNUM}+-)+{ALNUM}+ /* composite-word */  {
+   BEGIN DELIM;
+   if (s) { free(s); s=NULL; } 
+   s = strdup( tsearch2_yytext );
+   tokenlen = tsearch2_yyleng;
+   yyless( 0 );
+   token = s;
+   return HYPHENWORD;
+}
+
+[0-9]+\.[0-9]+\.[0-9\.]*[0-9] {
+   token = tsearch2_yytext;
+   tokenlen = tsearch2_yyleng;
+   return VERSIONNUMBER;
+}
+
+\+?[0-9]+\.[0-9]+ {
+   token = tsearch2_yytext;
+   tokenlen = tsearch2_yyleng;
+   return DECIMAL;
+}
+
+{CYRALPHA}+  /* one word in composite-word */   { 
+   token = tsearch2_yytext; 
+   tokenlen = tsearch2_yyleng;
+   return CYRPARTHYPHENWORD; 
+}
+
+[[:alpha:]]+  /* one word in composite-word */  { 
+   token = tsearch2_yytext; 
+   tokenlen = tsearch2_yyleng;
+   return LATPARTHYPHENWORD; 
+}
+
+{ALNUM}+  /* one word in composite-word */  { 
+   token = tsearch2_yytext; 
+   tokenlen = tsearch2_yyleng;
+   return PARTHYPHENWORD; 
+}
+
+-  { 
+   token = tsearch2_yytext;
+   tokenlen = tsearch2_yyleng;
+   return SPACE;
+}
+
+.|\n /* return in basic state */ {
+   BEGIN INITIAL;
+   yyless( 0 );
+}
+
+{CYRALPHA}+ /* normal word */  { 
+   token = tsearch2_yytext; 
+   tokenlen = tsearch2_yyleng;
+   return CYRWORD; 
+}
+
+[[:alpha:]]+ /* normal word */ { 
+   token = tsearch2_yytext; 
+   tokenlen = tsearch2_yyleng;
+   return LATWORD; 
+}
+
+{ALNUM}+ /* normal word */ { 
+   token = tsearch2_yytext; 
+   tokenlen = tsearch2_yyleng;
+   return UWORD; 
+}
+
+[ \r\n\t]+ {
+   token = tsearch2_yytext;
+   tokenlen = tsearch2_yyleng;
+   return SPACE;
+}
+
+. {
+   token = tsearch2_yytext;
+   tokenlen = tsearch2_yyleng;
+   return SPACE;
+} 
+
+%%
+
+/* clearing after parsing from string */
+void end_parse() {
+   if (s) { free(s); s=NULL; } 
+   tsearch2_yy_delete_buffer( buf );
+   buf = NULL;
+} 
+
+/* start parse from string */
+void start_parse_str(char* str, int limit) {
+   if (buf) end_parse();
+   buf = tsearch2_yy_scan_bytes( str, limit );
+   tsearch2_yy_switch_to_buffer( buf );
+   BEGIN INITIAL;
+}
+
+/* start parse from filehandle */
+void start_parse_fh( FILE* fh, int limit ) {
+   if (buf) end_parse();
+   lrlimit = ( limit ) ? limit : -1;
+   buf = tsearch2_yy_create_buffer( fh, YY_BUF_SIZE );
+   tsearch2_yy_switch_to_buffer( buf );
+   BEGIN INITIAL;
+}
+
+


diff --git a/contrib/tsearch2/wparser.c b/contrib/tsearch2/wparser.c

new file mode 100644 (file)

index 0000000..deff94c


--- /dev/null
+++ b/contrib/tsearch2/wparser.c
@@ -0,0 +1,529 @@
+/* 
+ * interface functions to parser 
+ * Teodor Sigaev 
+ */
+#include 
+#include 
+#include 
+#include 
+
+#include "postgres.h"
+#include "fmgr.h"
+#include "utils/array.h"
+#include "catalog/pg_type.h"
+#include "executor/spi.h"
+#include "funcapi.h"
+
+#include "wparser.h"
+#include "ts_cfg.h"
+#include "snmap.h"
+#include "common.h"
+
+/*********top interface**********/
+
+static void *plan_getparser=NULL;
+static Oid current_parser_id=InvalidOid;
+
+void
+init_prs(Oid id, WParserInfo *prs) {
+   Oid arg[1]={ OIDOID };
+   bool isnull;
+   Datum pars[1]={ ObjectIdGetDatum(id) };
+   int stat;
+
+   memset(prs,0,sizeof(WParserInfo));
+   SPI_connect();
+   if ( !plan_getparser ) {
+       plan_getparser = SPI_saveplan( SPI_prepare( "select prs_start, prs_nexttoken, prs_end, prs_lextype, prs_headline from pg_ts_parser where oid = $1" , 1, arg ) );
+       if ( !plan_getparser ) 
+           ts_error(ERROR, "SPI_prepare() failed");
+   }
+
+   stat = SPI_execp(plan_getparser, pars, " ", 1);
+   if ( stat < 0 )
+       ts_error (ERROR, "SPI_execp return %d", stat);
+   if ( SPI_processed > 0 ) {
+       Oid oid=InvalidOid;
+       oid=DatumGetObjectId( SPI_getbinval(SPI_tuptable->vals[0], SPI_tuptable->tupdesc, 1, &isnull) );
+       fmgr_info_cxt(oid, &(prs->start_info), TopMemoryContext);
+       oid=DatumGetObjectId( SPI_getbinval(SPI_tuptable->vals[0], SPI_tuptable->tupdesc, 2, &isnull) );
+       fmgr_info_cxt(oid, &(prs->getlexeme_info), TopMemoryContext);
+       oid=DatumGetObjectId( SPI_getbinval(SPI_tuptable->vals[0], SPI_tuptable->tupdesc, 3, &isnull) );
+       fmgr_info_cxt(oid, &(prs->end_info), TopMemoryContext);
+       prs->lextype=DatumGetObjectId( SPI_getbinval(SPI_tuptable->vals[0], SPI_tuptable->tupdesc, 4, &isnull) );
+       oid=DatumGetObjectId( SPI_getbinval(SPI_tuptable->vals[0], SPI_tuptable->tupdesc, 5, &isnull) );
+       fmgr_info_cxt(oid, &(prs->headline_info), TopMemoryContext);
+       prs->prs_id=id;
+   } else 
+       ts_error(ERROR, "No parser with id %d", id);
+   SPI_finish();
+}
+
+typedef struct {
+   WParserInfo *last_prs;
+   int     len;
+   int     reallen;
+   WParserInfo *list;
+   SNMap       name2id_map;
+} PrsList;
+
+static PrsList PList = {NULL,0,0,NULL,{0,0,NULL}};
+
+void    
+reset_prs(void) {
+   freeSNMap( &(PList.name2id_map) );
+   if ( PList.list )
+       free(PList.list);
+   memset(&PList,0,sizeof(PrsList));
+}
+
+static int
+compareprs(const void *a, const void *b) {
+   return ((WParserInfo*)a)->prs_id - ((WParserInfo*)b)->prs_id;
+}
+
+WParserInfo *
+findprs(Oid id) {
+   /* last used prs */
+   if ( PList.last_prs && PList.last_prs->prs_id==id )
+       return PList.last_prs;
+
+   /* already used prs */
+   if ( PList.len != 0 ) {
+       WParserInfo key;
+       key.prs_id=id;
+       PList.last_prs = bsearch(&key, PList.list, PList.len, sizeof(WParserInfo), compareprs);
+       if ( PList.last_prs != NULL )
+           return PList.last_prs;
+   }
+
+   /* last chance */
+   if ( PList.len==PList.reallen ) {
+       WParserInfo *tmp;
+       int reallen = ( PList.reallen ) ? 2*PList.reallen : 16;
+       tmp=(WParserInfo*)realloc(PList.list,sizeof(WParserInfo)*reallen);
+       if ( !tmp ) 
+           ts_error(ERROR,"No memory");
+       PList.reallen=reallen;
+       PList.list=tmp;
+   }
+   PList.last_prs=&(PList.list[PList.len]);
+   init_prs(id, PList.last_prs);
+   PList.len++;
+   qsort(PList.list, PList.len, sizeof(WParserInfo), compareprs);
+   return findprs(id); /* qsort changed order!! */;
+}
+
+static void *plan_name2id=NULL;
+
+Oid
+name2id_prs(text *name) {
+   Oid arg[1]={ TEXTOID };
+   bool isnull;
+   Datum pars[1]={ PointerGetDatum(name) };
+   int stat;
+   Oid id=findSNMap_t( &(PList.name2id_map), name );
+   
+   if ( id ) 
+       return id;
+   
+
+   SPI_connect();
+   if ( !plan_name2id ) {
+       plan_name2id = SPI_saveplan( SPI_prepare( "select oid from pg_ts_parser where prs_name = $1" , 1, arg ) );
+       if ( !plan_name2id ) 
+           ts_error(ERROR, "SPI_prepare() failed");
+   }
+
+   stat = SPI_execp(plan_name2id, pars, " ", 1);
+   if ( stat < 0 )
+       ts_error (ERROR, "SPI_execp return %d", stat);
+   if ( SPI_processed > 0 )
+       id=DatumGetObjectId( SPI_getbinval(SPI_tuptable->vals[0], SPI_tuptable->tupdesc, 1, &isnull) );
+   else 
+       ts_error(ERROR, "No parser '%s'", text2char(name));
+   SPI_finish();
+   addSNMap_t( &(PList.name2id_map), name, id );
+   return id;
+}
+
+
+/******sql-level interface******/
+typedef struct {
+   int     cur;
+   LexDescr    *list;
+} TypeStorage;
+
+static void
+setup_firstcall(FuncCallContext  *funcctx, Oid prsid) {
+   TupleDesc            tupdesc;
+   MemoryContext     oldcontext;
+   TypeStorage     *st;
+   WParserInfo *prs = findprs(prsid); 
+
+   oldcontext = MemoryContextSwitchTo(funcctx->multi_call_memory_ctx);
+
+   st=(TypeStorage*)palloc( sizeof(TypeStorage) );
+   st->cur=0;
+   st->list = (LexDescr*)DatumGetPointer(
+       OidFunctionCall1( prs->lextype, PointerGetDatum(prs->prs) )
+   );
+   funcctx->user_fctx = (void*)st;
+   tupdesc = RelationNameGetTupleDesc("tokentype");
+   funcctx->slot = TupleDescGetSlot(tupdesc);
+   funcctx->attinmeta = TupleDescGetAttInMetadata(tupdesc);
+   MemoryContextSwitchTo(oldcontext);
+}
+
+static Datum
+process_call(FuncCallContext  *funcctx) {
+   TypeStorage     *st;
+
+   st=(TypeStorage*)funcctx->user_fctx;
+   if (  st->list && st->list[st->cur].lexid ) {
+       Datum result;
+       char* values[3];
+       char    txtid[16];
+       HeapTuple    tuple;
+
+       values[0]=txtid;
+       sprintf(txtid,"%d",st->list[st->cur].lexid);
+       values[1]=st->list[st->cur].alias;
+       values[2]=st->list[st->cur].descr;
+
+       tuple = BuildTupleFromCStrings(funcctx->attinmeta, values);
+       result = TupleGetDatum(funcctx->slot, tuple);
+
+       pfree(values[1]);
+       pfree(values[2]);
+       st->cur++;
+       return result;
+   } else {
+       if ( st->list ) pfree(st->list);
+       pfree(st);
+   }
+   return (Datum)0;
+}
+
+PG_FUNCTION_INFO_V1(token_type);
+Datum token_type(PG_FUNCTION_ARGS);
+
+Datum
+token_type(PG_FUNCTION_ARGS) {
+   FuncCallContext  *funcctx;
+   Datum result;
+
+   if (SRF_IS_FIRSTCALL()) { 
+       funcctx = SRF_FIRSTCALL_INIT();
+       setup_firstcall(funcctx, PG_GETARG_OID(0) );
+   }
+
+   funcctx = SRF_PERCALL_SETUP();
+
+   if (  (result=process_call(funcctx)) != (Datum)0 )
+       SRF_RETURN_NEXT(funcctx, result);
+   SRF_RETURN_DONE(funcctx);
+}
+
+PG_FUNCTION_INFO_V1(token_type_byname);
+Datum token_type_byname(PG_FUNCTION_ARGS);
+Datum
+token_type_byname(PG_FUNCTION_ARGS) {
+   FuncCallContext  *funcctx;
+   Datum result;
+
+   if (SRF_IS_FIRSTCALL()) {
+       text *name = PG_GETARG_TEXT_P(0); 
+       funcctx = SRF_FIRSTCALL_INIT();
+       setup_firstcall(funcctx, name2id_prs( name ) );
+       PG_FREE_IF_COPY(name,0);
+   }
+
+   funcctx = SRF_PERCALL_SETUP();
+
+   if (  (result=process_call(funcctx)) != (Datum)0 )
+       SRF_RETURN_NEXT(funcctx, result);
+   SRF_RETURN_DONE(funcctx);
+}
+
+PG_FUNCTION_INFO_V1(token_type_current);
+Datum token_type_current(PG_FUNCTION_ARGS);
+Datum
+token_type_current(PG_FUNCTION_ARGS) {
+   FuncCallContext  *funcctx;
+   Datum result;
+
+   if (SRF_IS_FIRSTCALL()) {
+       funcctx = SRF_FIRSTCALL_INIT();
+       if ( current_parser_id==InvalidOid ) 
+           current_parser_id = name2id_prs( char2text("default") );
+       setup_firstcall(funcctx, current_parser_id );
+   }
+
+   funcctx = SRF_PERCALL_SETUP();
+
+   if (  (result=process_call(funcctx)) != (Datum)0 )
+       SRF_RETURN_NEXT(funcctx, result);
+   SRF_RETURN_DONE(funcctx);
+}
+
+
+PG_FUNCTION_INFO_V1(set_curprs);
+Datum set_curprs(PG_FUNCTION_ARGS);
+Datum
+set_curprs(PG_FUNCTION_ARGS) {
+        findprs(PG_GETARG_OID(0));
+        current_parser_id=PG_GETARG_OID(0);
+        PG_RETURN_VOID();
+}
+
+PG_FUNCTION_INFO_V1(set_curprs_byname);
+Datum set_curprs_byname(PG_FUNCTION_ARGS);
+Datum
+set_curprs_byname(PG_FUNCTION_ARGS) {
+        text *name=PG_GETARG_TEXT_P(0);
+    
+        DirectFunctionCall1(
+                set_curprs,
+                ObjectIdGetDatum( name2id_prs(name) )
+        );
+        PG_FREE_IF_COPY(name, 0);
+        PG_RETURN_VOID();
+}
+
+typedef struct {
+   int type;
+   char    *lexem;
+} LexemEntry;
+
+typedef struct {
+   int cur;
+   int len;
+   LexemEntry  *list;
+} PrsStorage;
+   
+
+static void
+prs_setup_firstcall(FuncCallContext  *funcctx, int prsid, text *txt) {
+   TupleDesc            tupdesc;
+   MemoryContext     oldcontext;
+   PrsStorage  *st;
+   WParserInfo *prs = findprs(prsid); 
+   char    *lex=NULL;
+   int     llen=0, type=0; 
+
+   oldcontext = MemoryContextSwitchTo(funcctx->multi_call_memory_ctx);
+
+   st=(PrsStorage*)palloc( sizeof(PrsStorage) );
+   st->cur=0;
+   st->len=16;
+   st->list=(LexemEntry*)palloc( sizeof(LexemEntry)*st->len );
+
+   prs->prs = (void*)DatumGetPointer(
+       FunctionCall2(
+           &(prs->start_info),
+           PointerGetDatum(VARDATA(txt)),
+           Int32GetDatum(VARSIZE(txt)-VARHDRSZ)
+       )
+   );
+
+   while( ( type=DatumGetInt32(FunctionCall3(
+           &(prs->getlexeme_info),
+           PointerGetDatum(prs->prs),
+           PointerGetDatum(&lex),
+           PointerGetDatum(&llen))) ) != 0 ) {
+
+       if ( st->cur>=st->len ) {
+           st->len=2*st->len;
+           st->list=(LexemEntry*)repalloc(st->list, sizeof(LexemEntry)*st->len);
+       }
+       st->list[st->cur].lexem = palloc(llen+1);
+       memcpy( st->list[st->cur].lexem, lex, llen);
+       st->list[st->cur].lexem[llen]='\0';
+       st->list[st->cur].type=type;
+       st->cur++;
+   }
+       
+   FunctionCall1(
+       &(prs->end_info),
+       PointerGetDatum(prs->prs)
+   );
+
+   st->len=st->cur;
+   st->cur=0;
+   
+   funcctx->user_fctx = (void*)st;
+   tupdesc = RelationNameGetTupleDesc("tokenout");
+   funcctx->slot = TupleDescGetSlot(tupdesc);
+   funcctx->attinmeta = TupleDescGetAttInMetadata(tupdesc);
+   MemoryContextSwitchTo(oldcontext);
+}
+
+static Datum
+prs_process_call(FuncCallContext  *funcctx) {
+   PrsStorage  *st;
+
+   st=(PrsStorage*)funcctx->user_fctx;
+   if (  st->cur < st->len ) {
+       Datum result;
+       char* values[2];
+       char    tid[16];
+       HeapTuple    tuple;
+
+       values[0]=tid;
+       sprintf(tid,"%d",st->list[st->cur].type);
+       values[1]=st->list[st->cur].lexem;
+       tuple = BuildTupleFromCStrings(funcctx->attinmeta, values);
+       result = TupleGetDatum(funcctx->slot, tuple);
+
+       pfree(values[1]);
+       st->cur++;
+       return result;
+   } else {
+       if ( st->list ) pfree(st->list);
+       pfree(st);
+   }
+   return (Datum)0;
+}
+
+           
+
+PG_FUNCTION_INFO_V1(parse);
+Datum parse(PG_FUNCTION_ARGS);
+Datum
+parse(PG_FUNCTION_ARGS) {
+   FuncCallContext  *funcctx;
+   Datum result;
+
+   if (SRF_IS_FIRSTCALL()) {
+       text *txt = PG_GETARG_TEXT_P(1); 
+       funcctx = SRF_FIRSTCALL_INIT();
+       prs_setup_firstcall(funcctx, PG_GETARG_OID(0),txt );
+       PG_FREE_IF_COPY(txt,1);
+   }
+
+   funcctx = SRF_PERCALL_SETUP();
+
+   if (  (result=prs_process_call(funcctx)) != (Datum)0 )
+       SRF_RETURN_NEXT(funcctx, result);
+   SRF_RETURN_DONE(funcctx);
+}
+
+PG_FUNCTION_INFO_V1(parse_byname);
+Datum parse_byname(PG_FUNCTION_ARGS);
+Datum
+parse_byname(PG_FUNCTION_ARGS) {
+   FuncCallContext  *funcctx;
+   Datum result;
+
+   if (SRF_IS_FIRSTCALL()) {
+       text *name = PG_GETARG_TEXT_P(0); 
+       text *txt = PG_GETARG_TEXT_P(1); 
+       funcctx = SRF_FIRSTCALL_INIT();
+       prs_setup_firstcall(funcctx, name2id_prs( name ),txt );
+       PG_FREE_IF_COPY(name,0);
+       PG_FREE_IF_COPY(txt,1);
+   }
+
+   funcctx = SRF_PERCALL_SETUP();
+
+   if (  (result=prs_process_call(funcctx)) != (Datum)0 )
+       SRF_RETURN_NEXT(funcctx, result);
+   SRF_RETURN_DONE(funcctx);
+}
+
+
+PG_FUNCTION_INFO_V1(parse_current);
+Datum parse_current(PG_FUNCTION_ARGS);
+Datum
+parse_current(PG_FUNCTION_ARGS) {
+   FuncCallContext  *funcctx;
+   Datum result;
+
+   if (SRF_IS_FIRSTCALL()) {
+       text *txt = PG_GETARG_TEXT_P(0); 
+       funcctx = SRF_FIRSTCALL_INIT();
+       if ( current_parser_id==InvalidOid ) 
+           current_parser_id = name2id_prs( char2text("default") );
+       prs_setup_firstcall(funcctx, current_parser_id,txt );
+       PG_FREE_IF_COPY(txt,0);
+   }
+
+   funcctx = SRF_PERCALL_SETUP();
+
+   if (  (result=prs_process_call(funcctx)) != (Datum)0 )
+       SRF_RETURN_NEXT(funcctx, result);
+   SRF_RETURN_DONE(funcctx);
+}
+
+PG_FUNCTION_INFO_V1(headline);
+Datum headline(PG_FUNCTION_ARGS);
+Datum
+headline(PG_FUNCTION_ARGS) {
+   TSCfgInfo *cfg=findcfg(PG_GETARG_OID(0));
+   text       *in = PG_GETARG_TEXT_P(1);
+   QUERYTYPE  *query = (QUERYTYPE *) DatumGetPointer(PG_DETOAST_DATUM(PG_GETARG_DATUM(2)));
+   text       *opt=( PG_NARGS()>3 && PG_GETARG_POINTER(3) ) ? PG_GETARG_TEXT_P(3) : NULL;
+   HLPRSTEXT   prs;
+   text *out;
+   WParserInfo *prsobj = findprs(cfg->prs_id);
+
+   memset(&prs,0,sizeof(HLPRSTEXT));
+   prs.lenwords = 32;
+   prs.words = (HLWORD *) palloc(sizeof(HLWORD) * prs.lenwords);
+   hlparsetext(cfg, &prs, query, VARDATA(in), VARSIZE(in) - VARHDRSZ);
+
+
+   FunctionCall3(
+       &(prsobj->headline_info),
+       PointerGetDatum(&prs),
+       PointerGetDatum(opt),
+       PointerGetDatum(query)
+   );
+
+   out = genhl(&prs);
+
+   PG_FREE_IF_COPY(in,1);
+   PG_FREE_IF_COPY(query,2);
+   if ( opt ) PG_FREE_IF_COPY(opt,3);
+   pfree(prs.words);
+   pfree(prs.startsel);
+   pfree(prs.stopsel);
+
+   PG_RETURN_POINTER(out);
+}
+
+
+PG_FUNCTION_INFO_V1(headline_byname);
+Datum headline_byname(PG_FUNCTION_ARGS);
+Datum
+headline_byname(PG_FUNCTION_ARGS) {
+   text *cfg=PG_GETARG_TEXT_P(0);
+
+   Datum out=DirectFunctionCall4(
+       headline,
+       ObjectIdGetDatum(name2id_cfg( cfg ) ),
+       PG_GETARG_DATUM(1),
+       PG_GETARG_DATUM(2),
+       ( PG_NARGS()>3 ) ? PG_GETARG_DATUM(3) : PointerGetDatum(NULL)
+   );
+
+   PG_FREE_IF_COPY(cfg,0);
+   PG_RETURN_DATUM(out);   
+}
+
+PG_FUNCTION_INFO_V1(headline_current);
+Datum headline_current(PG_FUNCTION_ARGS);
+Datum
+headline_current(PG_FUNCTION_ARGS) {
+   PG_RETURN_DATUM(DirectFunctionCall4(
+       headline,
+       ObjectIdGetDatum(get_currcfg()),
+       PG_GETARG_DATUM(0),
+       PG_GETARG_DATUM(1),
+       ( PG_NARGS()>2 ) ? PG_GETARG_DATUM(2) : PointerGetDatum(NULL)
+   ));
+}
+
+
+


diff --git a/contrib/tsearch2/wparser.h b/contrib/tsearch2/wparser.h

new file mode 100644 (file)

index 0000000..a8afc56


--- /dev/null
+++ b/contrib/tsearch2/wparser.h
@@ -0,0 +1,28 @@
+#ifndef __WPARSER_H__
+#define __WPARSER_H__
+#include "postgres.h"
+#include "fmgr.h"
+
+typedef struct {
+   Oid prs_id;
+   FmgrInfo start_info;
+   FmgrInfo getlexeme_info;
+   FmgrInfo end_info;
+   FmgrInfo headline_info;
+   Oid lextype;
+   void *prs;
+} WParserInfo;
+
+void init_prs(Oid id, WParserInfo *prs);
+WParserInfo* findprs(Oid id);
+Oid name2id_prs(text *name);
+void   reset_prs(void);
+
+
+typedef struct {
+   int lexid;
+   char    *alias;
+   char    *descr;
+} LexDescr;
+
+#endif


diff --git a/contrib/tsearch2/wparser_def.c b/contrib/tsearch2/wparser_def.c

new file mode 100644 (file)

index 0000000..eec8b03


--- /dev/null
+++ b/contrib/tsearch2/wparser_def.c
@@ -0,0 +1,291 @@
+/* 
+ * default word parser 
+ * Teodor Sigaev 
+ */
+#include 
+#include 
+#include 
+
+#include "postgres.h"
+#include "utils/builtins.h"
+
+#include "dict.h"
+#include "wparser.h"
+#include "common.h"
+#include "ts_cfg.h"
+#include "wordparser/parser.h"
+#include "wordparser/deflex.h"
+
+PG_FUNCTION_INFO_V1(prsd_lextype);
+Datum prsd_lextype(PG_FUNCTION_ARGS);
+
+Datum 
+prsd_lextype(PG_FUNCTION_ARGS) {
+   LexDescr *descr=(LexDescr*)palloc(sizeof(LexDescr)*(LASTNUM+1));
+   int i;
+
+   for(i=1;i<=LASTNUM;i++) {
+       descr[i-1].lexid = i;
+       descr[i-1].alias = pstrdup(tok_alias[i]);
+       descr[i-1].descr = pstrdup(lex_descr[i]);
+   }
+   
+   descr[LASTNUM].lexid=0;
+       
+   PG_RETURN_POINTER(descr);
+}
+
+PG_FUNCTION_INFO_V1(prsd_start);
+Datum prsd_start(PG_FUNCTION_ARGS);
+Datum 
+prsd_start(PG_FUNCTION_ARGS) {
+   start_parse_str( (char*)PG_GETARG_POINTER(0), PG_GETARG_INT32(1) );
+   PG_RETURN_POINTER(NULL);
+}
+
+PG_FUNCTION_INFO_V1(prsd_getlexeme);
+Datum prsd_getlexeme(PG_FUNCTION_ARGS);
+Datum 
+prsd_getlexeme(PG_FUNCTION_ARGS) {
+   /* ParserState *p=(ParserState*)PG_GETARG_POINTER(0); */
+   char **t=(char**)PG_GETARG_POINTER(1); 
+   int *tlen=(int*)PG_GETARG_POINTER(2);
+   int  type=tsearch2_yylex();
+
+   *t = token;
+   *tlen = tokenlen;
+   PG_RETURN_INT32(type);
+}
+
+PG_FUNCTION_INFO_V1(prsd_end);
+Datum prsd_end(PG_FUNCTION_ARGS);
+Datum 
+prsd_end(PG_FUNCTION_ARGS) {
+   /* ParserState *p=(ParserState*)PG_GETARG_POINTER(0); */
+   end_parse();
+   PG_RETURN_VOID();
+}
+
+#define LEAVETOKEN(x)  ( (x)==12 )
+#define COMPLEXTOKEN(x)    ( (x)==5 || (x)==15 || (x)==16 || (x)==17 )
+#define ENDPUNCTOKEN(x)    ( (x)==12 )
+
+
+#define IDIGNORE(x) ( (x)==13 || (x)==14 || (x)==12 || (x)==23 )
+#define HLIDIGNORE(x) ( (x)==5 || (x)==13 || (x)==15 || (x)==16 || (x)==17 )
+#define NONWORDTOKEN(x)    ( (x)==12 || HLIDIGNORE(x) )
+#define NOENDTOKEN(x)  ( NONWORDTOKEN(x) || (x)==7 || (x)==8 || (x)==20 || (x)==21 || (x)==22 || IDIGNORE(x) )
+
+typedef struct {
+   HLWORD  *words;
+   int len;
+} hlCheck;
+
+static bool
+checkcondition_HL(void *checkval, ITEM *val) {
+   int i;
+   for(i=0;i<((hlCheck*)checkval)->len;i++) {
+       if ( ((hlCheck*)checkval)->words[i].item==val )
+           return true;
+   }
+   return false;
+}
+
+
+static bool
+hlCover(HLPRSTEXT *prs, QUERYTYPE *query, int *p, int *q) {
+   int i,j;
+   ITEM    *item=GETQUERY(query);
+   int pos=*p;
+   *q=0;
+   *p=0x7fffffff;
+
+   for(j=0;jsize;j++) {
+       if ( item->type != VAL ) {
+           item++;
+           continue;
+       }
+       for(i=pos;icurwords;i++) {
+           if ( prs->words[i].item == item ) {
+               if ( i>*q) 
+                   *q = i;
+               break;
+           }
+       }
+       item++;
+   }
+
+   if ( *q==0 )
+       return false;
+
+   item=GETQUERY(query);
+   for(j=0;jsize;j++) {
+       if ( item->type != VAL ) {
+           item++;
+           continue;
+       }
+       for(i=*q;i>=pos;i--) {
+           if ( prs->words[i].item == item ) {
+               if ( i<*p )
+                   *p=i;
+               break;
+           }
+       }
+       item++;
+   }   
+
+   if ( *p<=*q ) {
+       hlCheck ch={ &(prs->words[*p]), *q-*p+1 };
+       if ( TS_execute(GETQUERY(query), &ch, false, checkcondition_HL) ) { 
+           return true;
+       } else {
+           (*p)++;
+           return hlCover(prs,query,p,q);
+       }
+   }
+
+   return false;
+}
+
+PG_FUNCTION_INFO_V1(prsd_headline);
+Datum prsd_headline(PG_FUNCTION_ARGS);
+Datum 
+prsd_headline(PG_FUNCTION_ARGS) {
+   HLPRSTEXT   *prs=(HLPRSTEXT*)PG_GETARG_POINTER(0);
+   text    *opt=(text*)PG_GETARG_POINTER(1); /* can't be toasted */
+   QUERYTYPE   *query=(QUERYTYPE*)PG_GETARG_POINTER(2); /* can't be toasted */
+   /* from opt + start and and tag */
+   int min_words=15;   
+   int max_words=35;   
+   int shortword=3;    
+
+   int p=0,q=0;
+   int bestb=-1,beste=-1;
+   int bestlen=-1;
+   int pose=0, poslen, curlen;
+
+   int i;
+
+   /*config*/
+   prs->startsel=NULL;
+   prs->stopsel=NULL;
+   if ( opt ) {
+       Map *map,*mptr;
+       
+       parse_cfgdict(opt,&map);
+       mptr=map;
+
+       while(mptr && mptr->key) {
+           if ( strcasecmp(mptr->key,"MaxWords")==0 )
+               max_words=pg_atoi(mptr->value,4,1);
+           else if ( strcasecmp(mptr->key,"MinWords")==0 )
+               min_words=pg_atoi(mptr->value,4,1);
+           else if ( strcasecmp(mptr->key,"ShortWord")==0 )
+               shortword=pg_atoi(mptr->value,4,1);
+           else if ( strcasecmp(mptr->key,"StartSel")==0 )
+               prs->startsel=pstrdup(mptr->value);
+           else if ( strcasecmp(mptr->key,"StopSel")==0 )
+               prs->stopsel=pstrdup(mptr->value);
+               
+           pfree(mptr->key);
+           pfree(mptr->value);
+
+           mptr++;
+       }
+       pfree(map);
+
+       if ( min_words >= max_words )
+           elog(ERROR,"Must be MinWords < MaxWords");
+       if ( min_words<=0 )
+           elog(ERROR,"Must be MinWords > 0");
+       if ( shortword<0 )
+           elog(ERROR,"Must be ShortWord >= 0");
+   }
+
+   while( hlCover(prs,query,&p,&q) ) {
+       /* find cover len in words */
+       curlen=0;
+       poslen=0;
+       for(i=p;i<=q && curlen < max_words ; i++) {
+           if ( !NONWORDTOKEN(prs->words[i].type) ) 
+               curlen++;
+           if ( prs->words[i].item && !prs->words[i].repeated )
+               poslen++; 
+           pose=i;
+       }
+
+       if ( poslenwords[beste].type) || prs->words[beste].len <= shortword) ) { 
+           /* best already finded, so try one more cover */
+           p++;
+           continue;
+       }
+
+       if ( curlen < max_words ) { /* find good end */
+           for(i=i-1 ;icurwords && curlen
+               if ( i!=q ) {
+                   if ( !NONWORDTOKEN(prs->words[i].type) ) 
+                       curlen++;
+                   if ( prs->words[i].item && !prs->words[i].repeated )
+                       poslen++;
+               }
+               pose=i;
+               if ( NOENDTOKEN(prs->words[i].type) || prs->words[i].len <= shortword ) 
+                   continue;
+               if ( curlen>=min_words )    
+                   break;
+           }
+       } else { /* shorter cover :((( */
+           for(;curlen>min_words;i--) {
+               if ( !NONWORDTOKEN(prs->words[i].type) ) 
+                   curlen--;
+               if ( prs->words[i].item && !prs->words[i].repeated )
+                   poslen--;
+               pose=i;
+               if ( NOENDTOKEN(prs->words[i].type) || prs->words[i].len <= shortword ) 
+                   continue;
+               break;
+           }
+       }
+
+       if ( bestlen <0 || (poslen>bestlen && !(NOENDTOKEN(prs->words[pose].type) || prs->words[pose].len <= shortword)) || 
+               ( bestlen>=0 && !(NOENDTOKEN(prs->words[pose].type)  || prs->words[pose].len <= shortword) && 
+                   (NOENDTOKEN(prs->words[beste].type) || prs->words[beste].len <= shortword) ) ) {
+           bestb=p; beste=pose;
+           bestlen=poslen;
+       } 
+
+       p++;
+   }
+
+   if ( bestlen<0 ) {
+       curlen=0;
+       poslen=0;
+       for(i=0;icurwords && curlen
+           if ( !NONWORDTOKEN(prs->words[i].type) ) 
+               curlen++;
+           pose=i;
+       }
+       bestb=0; beste=pose;
+   }
+
+   for(i=bestb;i<=beste;i++) {
+       if ( prs->words[i].item )
+           prs->words[i].selected=1;
+       if ( prs->words[i].repeated )
+           prs->words[i].skip=1;
+       if ( HLIDIGNORE(prs->words[i].type) )
+           prs->words[i].replace=1;
+
+       prs->words[i].in=1;
+   }
+
+   if (!prs->startsel)
+       prs->startsel=pstrdup("");

+   if (!prs->stopsel)
+       prs->stopsel=pstrdup("");
+        prs->startsellen=strlen(prs->startsel);
+   prs->stopsellen=strlen(prs->stopsel);
+
+   PG_RETURN_POINTER(prs);
+}
+




This is the main PostgreSQL git repository.
RSS
Atom
+           doc[cur].item=&(item[i]);
+           doc[cur].pos=post[j].pos;
+           cur++;
+       }
+   }
+
+   *doclen=cur;
+   
+   if ( cur>0 ) {
+       if ( cur>1 ) 
+           qsort((void *) doc, cur, sizeof(DocRepresentation), compareDocR);
+       return doc;
+   }
+   
+   pfree(doc);
+   return NULL;
+}
+
+
+Datum
+rank_cd(PG_FUNCTION_ARGS) {
+   int K = PG_GETARG_INT32(0);
+   tsvector       *txt = (tsvector *) PG_DETOAST_DATUM(PG_GETARG_DATUM(1));
+   QUERYTYPE  *query = (QUERYTYPE *) PG_DETOAST_DATUM(PG_GETARG_DATUM(2));
+   int method=DEF_NORM_METHOD;
+   DocRepresentation   *doc;
+   float   res=0.0;
+   int p=0,q=0,len,cur;
+
+   doc = get_docrep(txt, query, &len);
+   if ( !doc ) {
+       PG_FREE_IF_COPY(txt, 1);
+       PG_FREE_IF_COPY(query, 2);
+       PG_RETURN_FLOAT4(0.0);
+   }
+
+   cur=0;
+   if (K<=0)
+       K=4;    
+   while( Cover(doc, len, query, &cur, &p, &q) ) 
+       res += ( q-p+1 > K ) ? ((float)K)/((float)(q-p+1)) : 1.0;
+
+   if ( PG_NARGS() == 4 )
+       method=PG_GETARG_INT32(3);
+
+        switch(method) {
+       case 0: break;
+       case 1: res /= log((float)cnt_length(txt)); break;
+       case 2: res /= (float)cnt_length(txt); break;
+       default:
+       elog(ERROR,"Unknown normalization method: %d",method);
+        }
+
+   pfree(doc);
+   PG_FREE_IF_COPY(txt, 1);
+   PG_FREE_IF_COPY(query, 2);
+
+   PG_RETURN_FLOAT4(res);
+}
+
+
+Datum
+rank_cd_def(PG_FUNCTION_ARGS) {
+   PG_RETURN_DATUM( DirectFunctionCall4(   
+       rank_cd,
+       Int32GetDatum(-1),
+       PG_GETARG_DATUM(0),
+       PG_GETARG_DATUM(1),
+       ( PG_NARGS() == 3 ) ? PG_GETARG_DATUM(2) : Int32GetDatum(DEF_NORM_METHOD)
+   )); 
+}
+
+/**************debug*************/
+
+typedef struct {
+   char    *w;
+   int2    len;
+   int2    pos;
+   int2    start;
+   int2    finish;
+} DocWord;
+
+static int
+compareDocWord(const void *a, const void *b) {
+   if ( ((DocWord *) a)->pos == ((DocWord *) b)->pos )
+       return 1;
+   return ( ((DocWord *) a)->pos > ((DocWord *) b)->pos ) ? 1 : -1;
+}
+
+
+Datum 
+get_covers(PG_FUNCTION_ARGS) {
+   tsvector     *txt = (tsvector *) PG_DETOAST_DATUM(PG_GETARG_DATUM(0));
+   QUERYTYPE  *query = (QUERYTYPE *) PG_DETOAST_DATUM(PG_GETARG_DATUM(1));
+   WordEntry       *pptr=ARRPTR(txt);
+   int i,dlen=0,j,cur=0,len=0,rlen;
+   DocWord *dw,*dwptr;
+   text    *out;
+   char *cptr;
+   DocRepresentation *doc;
+   int pos=0,p,q,olddwpos=0;
+   int ncover=1;
+
+   doc = get_docrep(txt, query, &rlen);
+
+   if ( !doc ) {
+       out=palloc(VARHDRSZ);
+       VARATT_SIZEP(out) = VARHDRSZ;
+       PG_FREE_IF_COPY(txt,0);
+       PG_FREE_IF_COPY(query,1);
+       PG_RETURN_POINTER(out);
+   }
+
+   for(i=0;isize;i++) {
+       if (!pptr[i].haspos)
+           elog(ERROR,"No pos info");
+        dlen += POSDATALEN(txt,&(pptr[i]));
+   }
+
+   dwptr=dw=palloc(sizeof(DocWord)*dlen);
+   memset(dw,0,sizeof(DocWord)*dlen);
+
+   for(i=0;isize;i++) {
+       WordEntryPos    *posdata = POSDATAPTR(txt,&(pptr[i]));
+       for(j=0;j
+           dw[cur].w=STRPTR(txt)+pptr[i].pos;  
+           dw[cur].len=pptr[i].len;    
+           dw[cur].pos=posdata[j].pos;
+           cur++;
+       }
+       len+=(pptr[i].len + 1) * (int)POSDATALEN(txt,&(pptr[i]));
+   }
+   qsort((void *) dw, dlen, sizeof(DocWord), compareDocWord);
+
+   while( Cover(doc, rlen, query, &pos, &p, &q) ) {
+       dwptr=dw+olddwpos;
+       while(dwptr->pos < p && dwptr-dw
+           dwptr++;
+       olddwpos=dwptr-dw;
+       dwptr->start=ncover;
+       while(dwptr->pos < q+1 && dwptr-dw
+           dwptr++;
+       (dwptr-1)->finish=ncover;
+       len+= 4 /* {}+two spaces */ + 2*16 /*numbers*/;
+       ncover++; 
+   } 
+   
+   out=palloc(VARHDRSZ+len);
+   cptr=((char*)out)+VARHDRSZ;
+   dwptr=dw;
+
+   while( dwptr-dw < dlen) {
+       if ( dwptr->start ) {
+           sprintf(cptr,"{%d ",dwptr->start);
+           cptr=strchr(cptr,'\0');
+       }
+       memcpy(cptr,dwptr->w,dwptr->len);
+       cptr+=dwptr->len;
+       *cptr=' ';
+       cptr++;
+       if ( dwptr->finish ) { 
+           sprintf(cptr,"}%d ",dwptr->finish);
+           cptr=strchr(cptr,'\0');
+       }
+       dwptr++;
+   }   
+
+   VARATT_SIZEP(out) = cptr - ((char*)out);
+   
+   pfree(dw);
+   pfree(doc);
+
+   PG_FREE_IF_COPY(txt,0);
+   PG_FREE_IF_COPY(query,1);
+   PG_RETURN_POINTER(out);
+}
+


diff --git a/contrib/tsearch2/rewrite.c b/contrib/tsearch2/rewrite.c

new file mode 100644 (file)

index 0000000..d5bc0f6


--- /dev/null
+++ b/contrib/tsearch2/rewrite.c
@@ -0,0 +1,292 @@
+/*
+ * Rewrite routines of query tree
+ * Teodor Sigaev 
+ */
+
+#include "postgres.h"
+
+#include 
+
+#include "access/gist.h"
+#include "access/itup.h"
+#include "access/rtree.h"
+#include "utils/elog.h"
+#include "utils/palloc.h"
+#include "utils/array.h"
+#include "utils/builtins.h"
+#include "storage/bufpage.h"
+
+#include "query.h"
+#include "rewrite.h"
+
+typedef struct NODE
+{
+   struct NODE *left;
+   struct NODE *right;
+   ITEM       *valnode;
+}  NODE;
+
+/*
+ * make query tree from plain view of query
+ */
+static NODE *
+maketree(ITEM * in)
+{
+   NODE       *node = (NODE *) palloc(sizeof(NODE));
+
+   node->valnode = in;
+   node->right = node->left = NULL;
+   if (in->type == OPR)
+   {
+       node->right = maketree(in + 1);
+       if (in->val != (int4) '!')
+           node->left = maketree(in + in->left);
+   }
+   return node;
+}
+
+typedef struct
+{
+   ITEM       *ptr;
+   int4        len;
+   int4        cur;
+}  PLAINTREE;
+
+static void
+plainnode(PLAINTREE * state, NODE * node)
+{
+   if (state->cur == state->len)
+   {
+       state->len *= 2;
+       state->ptr = (ITEM *) repalloc((void *) state->ptr, state->len * sizeof(ITEM));
+   }
+   memcpy((void *) &(state->ptr[state->cur]), (void *) node->valnode, sizeof(ITEM));
+   if (node->valnode->type == VAL)
+       state->cur++;
+   else if (node->valnode->val == (int4) '!')
+   {
+       state->ptr[state->cur].left = 1;
+       state->cur++;
+       plainnode(state, node->right);
+   }
+   else
+   {
+       int4        cur = state->cur;
+
+       state->cur++;
+       plainnode(state, node->right);
+       state->ptr[cur].left = state->cur - cur;
+       plainnode(state, node->left);
+   }
+   pfree(node);
+}
+
+/*
+ * make plain view of tree from 'normal' view of tree
+ */
+static ITEM *
+plaintree(NODE * root, int4 *len)
+{
+   PLAINTREE   pl;
+
+   pl.cur = 0;
+   pl.len = 16;
+   if (root && (root->valnode->type == VAL || root->valnode->type == OPR))
+   {
+       pl.ptr = (ITEM *) palloc(pl.len * sizeof(ITEM));
+       plainnode(&pl, root);
+   }
+   else
+       pl.ptr = NULL;
+   *len = pl.cur;
+   return pl.ptr;
+}
+
+static void
+freetree(NODE * node)
+{
+   if (!node)
+       return;
+   if (node->left)
+       freetree(node->left);
+   if (node->right)
+       freetree(node->right);
+   pfree(node);
+}
+
+/*
+ * clean tree for ! operator.
+ * It's usefull for debug, but in
+ * other case, such view is used with search in index.
+ * Operator ! always return TRUE
+ */
+static NODE *
+clean_NOT_intree(NODE * node)
+{
+   if (node->valnode->type == VAL)
+       return node;
+
+   if (node->valnode->val == (int4) '!')
+   {
+       freetree(node);
+       return NULL;
+   }
+
+   /* operator & or | */
+   if (node->valnode->val == (int4) '|')
+   {
+       if ((node->left = clean_NOT_intree(node->left)) == NULL ||
+           (node->right = clean_NOT_intree(node->right)) == NULL)
+       {
+           freetree(node);
+           return NULL;
+       }
+   }
+   else
+   {
+       NODE       *res = node;
+
+       node->left = clean_NOT_intree(node->left);
+       node->right = clean_NOT_intree(node->right);
+       if (node->left == NULL && node->right == NULL)
+       {
+           pfree(node);
+           res = NULL;
+       }
+       else if (node->left == NULL)
+       {
+           res = node->right;
+           pfree(node);
+       }
+       else if (node->right == NULL)
+       {
+           res = node->left;
+           pfree(node);
+       }
+       return res;
+   }
+   return node;
+}
+
+ITEM *
+clean_NOT_v2(ITEM * ptr, int4 *len)
+{
+   NODE       *root = maketree(ptr);
+
+   return plaintree(clean_NOT_intree(root), len);
+}
+
+#define V_UNKNOWN  0
+#define V_TRUE     1
+#define V_FALSE        2
+
+/*
+ * Clean query tree from values which is always in
+ * text (stopword)
+ */
+static NODE *
+clean_fakeval_intree(NODE * node, char *result)
+{
+   char        lresult = V_UNKNOWN,
+               rresult = V_UNKNOWN;
+
+   if (node->valnode->type == VAL)
+       return node;
+   else if (node->valnode->type == VALTRUE)
+   {
+       pfree(node);
+       *result = V_TRUE;
+       return NULL;
+   }
+
+
+   if (node->valnode->val == (int4) '!')
+   {
+       node->right = clean_fakeval_intree(node->right, &rresult);
+       if (!node->right)
+       {
+           *result = (rresult == V_TRUE) ? V_FALSE : V_TRUE;
+           freetree(node);
+           return NULL;
+       }
+   }
+   else if (node->valnode->val == (int4) '|')
+   {
+       NODE       *res = node;
+
+       node->left = clean_fakeval_intree(node->left, &lresult);
+       node->right = clean_fakeval_intree(node->right, &rresult);
+       if (lresult == V_TRUE || rresult == V_TRUE)
+       {
+           freetree(node);
+           *result = V_TRUE;
+           return NULL;
+       }
+       else if (lresult == V_FALSE && rresult == V_FALSE)
+       {
+           freetree(node);
+           *result = V_FALSE;
+           return NULL;
+       }
+       else if (lresult == V_FALSE)
+       {
+           res = node->right;
+           pfree(node);
+       }
+       else if (rresult == V_FALSE)
+       {
+           res = node->left;
+           pfree(node);
+       }
+       return res;
+   }
+   else
+   {
+       NODE       *res = node;
+
+       node->left = clean_fakeval_intree(node->left, &lresult);
+       node->right = clean_fakeval_intree(node->right, &rresult);
+       if (lresult == V_FALSE || rresult == V_FALSE)
+       {
+           freetree(node);
+           *result = V_FALSE;
+           return NULL;
+       }
+       else if (lresult == V_TRUE && rresult == V_TRUE)
+       {
+           freetree(node);
+           *result = V_TRUE;
+           return NULL;
+       }
+       else if (lresult == V_TRUE)
+       {
+           res = node->right;
+           pfree(node);
+       }
+       else if (rresult == V_TRUE)
+       {
+           res = node->left;
+           pfree(node);
+       }
+       return res;
+   }
+   return node;
+}
+
+ITEM *
+clean_fakeval_v2(ITEM * ptr, int4 *len)
+{
+   NODE       *root = maketree(ptr);
+   char        result = V_UNKNOWN;
+   NODE       *resroot;
+
+   resroot = clean_fakeval_intree(root, &result);
+   if (result != V_UNKNOWN)
+   {
+       elog(NOTICE, "Query contains only stopword(s) or doesn't contain lexem(s), ignored");
+       *len = 0;
+       return NULL;
+   }
+
+   return plaintree(resroot, len);
+}


diff --git a/contrib/tsearch2/rewrite.h b/contrib/tsearch2/rewrite.h

new file mode 100644 (file)

index 0000000..d47788a


--- /dev/null
+++ b/contrib/tsearch2/rewrite.h
@@ -0,0 +1,7 @@
+#ifndef __REWRITE_H__
+#define __REWRITE_H__
+
+ITEM      *clean_NOT_v2(ITEM * ptr, int4 *len);
+ITEM      *clean_fakeval_v2(ITEM * ptr, int4 *len);
+
+#endif


diff --git a/contrib/tsearch2/snmap.c b/contrib/tsearch2/snmap.c

new file mode 100644 (file)

index 0000000..fe138ad


--- /dev/null
+++ b/contrib/tsearch2/snmap.c
@@ -0,0 +1,75 @@
+/* 
+ * simple but fast map from str to Oid
+ * Teodor Sigaev 
+ */
+#include 
+#include 
+#include 
+
+#include "postgres.h"
+#include "snmap.h"
+#include "common.h"
+
+static int
+compareSNMapEntry(const void *a, const void *b) {
+   return strcmp( ((SNMapEntry*)a)->key, ((SNMapEntry*)b)->key );
+}
+
+void 
+addSNMap( SNMap *map, char *key, Oid value ) {
+   if (map->len>=map->reallen) {
+       SNMapEntry *tmp;
+       int len = (map->reallen) ? 2*map->reallen : 16;
+       tmp=(SNMapEntry*)realloc(map->list, sizeof(SNMapEntry) * len);
+       if ( !tmp )
+           elog(ERROR, "No memory");
+       map->reallen=len;
+       map->list=tmp;
+   }
+   map->list[ map->len ].key = strdup(key);
+   if ( ! map->list[ map->len ].key )
+       elog(ERROR, "No memory");
+   map->list[ map->len ].value=value;
+   map->len++;
+   if ( map->len>1 ) qsort(map->list, map->len, sizeof(SNMapEntry), compareSNMapEntry);
+}
+
+void 
+addSNMap_t( SNMap *map, text *key, Oid value ) {
+   char *k=text2char( key );
+   addSNMap(map, k, value);
+   pfree(k);
+}
+
+Oid 
+findSNMap( SNMap *map, char *key ) {
+   SNMapEntry *ptr;
+   SNMapEntry ks = {key, 0};
+   if ( map->len==0 || !map->list )
+       return 0;   
+   ptr = (SNMapEntry*) bsearch(&ks, map->list, map->len, sizeof(SNMapEntry), compareSNMapEntry);
+   return (ptr) ? ptr->value : 0;
+}
+
+Oid  
+findSNMap_t( SNMap *map, text *key ) {
+   char *k=text2char(key);
+   int res;
+   res= findSNMap(map, k);
+   pfree(k);
+   return res;
+}
+
+void freeSNMap( SNMap *map ) {
+   SNMapEntry *entry=map->list;
+   if ( map->list ) {
+       while( map->len ) {
+           if ( entry->key ) free(entry->key);
+           entry++; map->len--;
+       }
+       free( map->list );
+   }
+   memset(map,0,sizeof(SNMap));
+}
+
+


diff --git a/contrib/tsearch2/snmap.h b/contrib/tsearch2/snmap.h

new file mode 100644 (file)

index 0000000..b485601


--- /dev/null
+++ b/contrib/tsearch2/snmap.h
@@ -0,0 +1,23 @@
+#ifndef __SNMAP_H__
+#define __SNMAP_H__
+
+#include "postgres.h"
+
+typedef struct {
+   char    *key;
+   Oid value;
+} SNMapEntry;
+
+typedef struct {
+   int len;
+   int reallen;
+   SNMapEntry  *list;
+} SNMap;
+
+void addSNMap( SNMap *map, char *key, Oid value );
+void addSNMap_t( SNMap *map, text *key, Oid value );
+Oid findSNMap( SNMap *map, char *key );
+Oid findSNMap_t( SNMap *map, text *key );
+void freeSNMap( SNMap *map );
+
+#endif


diff --git a/contrib/tsearch2/snowball/api.c b/contrib/tsearch2/snowball/api.c

new file mode 100644 (file)

index 0000000..c9019ce


--- /dev/null
+++ b/contrib/tsearch2/snowball/api.c
@@ -0,0 +1,48 @@
+
+#include "header.h"
+
+extern struct SN_env * SN_create_env(int S_size, int I_size, int B_size)
+{   struct SN_env * z = (struct SN_env *) calloc(1, sizeof(struct SN_env));
+    z->p = create_s();
+    if (S_size)
+    {   z->S = (symbol * *) calloc(S_size, sizeof(symbol *));
+        {   int i;
+            for (i = 0; i < S_size; i++) z->S[i] = create_s();
+        }
+        z->S_size = S_size;
+    }
+
+    if (I_size)
+    {   z->I = (int *) calloc(I_size, sizeof(int));
+        z->I_size = I_size;
+    }
+
+    if (B_size)
+    {   z->B = (symbol *) calloc(B_size, sizeof(symbol));
+        z->B_size = B_size;
+    }
+
+    return z;
+}
+
+extern void SN_close_env(struct SN_env * z)
+{
+    if (z->S_size)
+    {
+        {   int i;
+            for (i = 0; i < z->S_size; i++) lose_s(z->S[i]);
+        }
+        free(z->S);
+    }
+    if (z->I_size) free(z->I);
+    if (z->B_size) free(z->B);
+    if (z->p) lose_s(z->p);
+    free(z);
+}
+
+extern void SN_set_current(struct SN_env * z, int size, const symbol * s)
+{
+    replace_s(z, 0, z->l, size, s);
+    z->c = 0;
+}
+


diff --git a/contrib/tsearch2/snowball/api.h b/contrib/tsearch2/snowball/api.h

new file mode 100644 (file)

index 0000000..3e8b6e1


--- /dev/null
+++ b/contrib/tsearch2/snowball/api.h
@@ -0,0 +1,27 @@
+
+typedef unsigned char symbol;
+
+/* Or replace 'char' above with 'short' for 16 bit characters.
+
+   More precisely, replace 'char' with whatever type guarantees the
+   character width you need. Note however that sizeof(symbol) should divide
+   HEAD, defined in header.h as 2*sizeof(int), without remainder, otherwise
+   there is an alignment problem. In the unlikely event of a problem here,
+   consult Martin Porter.
+
+*/
+
+struct SN_env {
+    symbol * p;
+    int c; int a; int l; int lb; int bra; int ket;
+    int S_size; int I_size; int B_size;
+    symbol * * S;
+    int * I;
+    symbol * B;
+};
+
+extern struct SN_env * SN_create_env(int S_size, int I_size, int B_size);
+extern void SN_close_env(struct SN_env * z);
+
+extern void SN_set_current(struct SN_env * z, int size, const symbol * s);
+


diff --git a/contrib/tsearch2/snowball/english_stem.c b/contrib/tsearch2/snowball/english_stem.c

new file mode 100644 (file)

index 0000000..6715c7c


--- /dev/null
+++ b/contrib/tsearch2/snowball/english_stem.c
@@ -0,0 +1,894 @@
+
+/* This file was generated automatically by the Snowball to ANSI C compiler */
+
+#include "header.h"
+
+extern int english_stem(struct SN_env * z);
+static int r_exception2(struct SN_env * z);
+static int r_exception1(struct SN_env * z);
+static int r_Step_5(struct SN_env * z);
+static int r_Step_4(struct SN_env * z);
+static int r_Step_3(struct SN_env * z);
+static int r_Step_2(struct SN_env * z);
+static int r_Step_1c(struct SN_env * z);
+static int r_Step_1b(struct SN_env * z);
+static int r_Step_1a(struct SN_env * z);
+static int r_R2(struct SN_env * z);
+static int r_R1(struct SN_env * z);
+static int r_shortv(struct SN_env * z);
+static int r_mark_regions(struct SN_env * z);
+static int r_postlude(struct SN_env * z);
+static int r_prelude(struct SN_env * z);
+
+extern struct SN_env * english_create_env(void);
+extern void english_close_env(struct SN_env * z);
+
+static symbol s_0_0[5] = { 'g', 'e', 'n', 'e', 'r' };
+
+static struct among a_0[1] =
+{
+/*  0 */ { 5, s_0_0, -1, -1, 0}
+};
+
+static symbol s_1_0[3] = { 'i', 'e', 'd' };
+static symbol s_1_1[1] = { 's' };
+static symbol s_1_2[3] = { 'i', 'e', 's' };
+static symbol s_1_3[4] = { 's', 's', 'e', 's' };
+static symbol s_1_4[2] = { 's', 's' };
+static symbol s_1_5[2] = { 'u', 's' };
+
+static struct among a_1[6] =
+{
+/*  0 */ { 3, s_1_0, -1, 2, 0},
+/*  1 */ { 1, s_1_1, -1, 3, 0},
+/*  2 */ { 3, s_1_2, 1, 2, 0},
+/*  3 */ { 4, s_1_3, 1, 1, 0},
+/*  4 */ { 2, s_1_4, 1, -1, 0},
+/*  5 */ { 2, s_1_5, 1, -1, 0}
+};
+
+static symbol s_2_1[2] = { 'b', 'b' };
+static symbol s_2_2[2] = { 'd', 'd' };
+static symbol s_2_3[2] = { 'f', 'f' };
+static symbol s_2_4[2] = { 'g', 'g' };
+static symbol s_2_5[2] = { 'b', 'l' };
+static symbol s_2_6[2] = { 'm', 'm' };
+static symbol s_2_7[2] = { 'n', 'n' };
+static symbol s_2_8[2] = { 'p', 'p' };
+static symbol s_2_9[2] = { 'r', 'r' };
+static symbol s_2_10[2] = { 'a', 't' };
+static symbol s_2_11[2] = { 't', 't' };
+static symbol s_2_12[2] = { 'i', 'z' };
+
+static struct among a_2[13] =
+{
+/*  0 */ { 0, 0, -1, 3, 0},
+/*  1 */ { 2, s_2_1, 0, 2, 0},
+/*  2 */ { 2, s_2_2, 0, 2, 0},
+/*  3 */ { 2, s_2_3, 0, 2, 0},
+/*  4 */ { 2, s_2_4, 0, 2, 0},
+/*  5 */ { 2, s_2_5, 0, 1, 0},
+/*  6 */ { 2, s_2_6, 0, 2, 0},
+/*  7 */ { 2, s_2_7, 0, 2, 0},
+/*  8 */ { 2, s_2_8, 0, 2, 0},
+/*  9 */ { 2, s_2_9, 0, 2, 0},
+/* 10 */ { 2, s_2_10, 0, 1, 0},
+/* 11 */ { 2, s_2_11, 0, 2, 0},
+/* 12 */ { 2, s_2_12, 0, 1, 0}
+};
+
+static symbol s_3_0[2] = { 'e', 'd' };
+static symbol s_3_1[3] = { 'e', 'e', 'd' };
+static symbol s_3_2[3] = { 'i', 'n', 'g' };
+static symbol s_3_3[4] = { 'e', 'd', 'l', 'y' };
+static symbol s_3_4[5] = { 'e', 'e', 'd', 'l', 'y' };
+static symbol s_3_5[5] = { 'i', 'n', 'g', 'l', 'y' };
+
+static struct among a_3[6] =
+{
+/*  0 */ { 2, s_3_0, -1, 2, 0},
+/*  1 */ { 3, s_3_1, 0, 1, 0},
+/*  2 */ { 3, s_3_2, -1, 2, 0},
+/*  3 */ { 4, s_3_3, -1, 2, 0},
+/*  4 */ { 5, s_3_4, 3, 1, 0},
+/*  5 */ { 5, s_3_5, -1, 2, 0}
+};
+
+static symbol s_4_0[4] = { 'a', 'n', 'c', 'i' };
+static symbol s_4_1[4] = { 'e', 'n', 'c', 'i' };
+static symbol s_4_2[3] = { 'o', 'g', 'i' };
+static symbol s_4_3[2] = { 'l', 'i' };
+static symbol s_4_4[3] = { 'b', 'l', 'i' };
+static symbol s_4_5[4] = { 'a', 'b', 'l', 'i' };
+static symbol s_4_6[4] = { 'a', 'l', 'l', 'i' };
+static symbol s_4_7[5] = { 'f', 'u', 'l', 'l', 'i' };
+static symbol s_4_8[6] = { 'l', 'e', 's', 's', 'l', 'i' };
+static symbol s_4_9[5] = { 'o', 'u', 's', 'l', 'i' };
+static symbol s_4_10[5] = { 'e', 'n', 't', 'l', 'i' };
+static symbol s_4_11[5] = { 'a', 'l', 'i', 't', 'i' };
+static symbol s_4_12[6] = { 'b', 'i', 'l', 'i', 't', 'i' };
+static symbol s_4_13[5] = { 'i', 'v', 'i', 't', 'i' };
+static symbol s_4_14[6] = { 't', 'i', 'o', 'n', 'a', 'l' };
+static symbol s_4_15[7] = { 'a', 't', 'i', 'o', 'n', 'a', 'l' };
+static symbol s_4_16[5] = { 'a', 'l', 'i', 's', 'm' };
+static symbol s_4_17[5] = { 'a', 't', 'i', 'o', 'n' };
+static symbol s_4_18[7] = { 'i', 'z', 'a', 't', 'i', 'o', 'n' };
+static symbol s_4_19[4] = { 'i', 'z', 'e', 'r' };
+static symbol s_4_20[4] = { 'a', 't', 'o', 'r' };
+static symbol s_4_21[7] = { 'i', 'v', 'e', 'n', 'e', 's', 's' };
+static symbol s_4_22[7] = { 'f', 'u', 'l', 'n', 'e', 's', 's' };
+static symbol s_4_23[7] = { 'o', 'u', 's', 'n', 'e', 's', 's' };
+
+static struct among a_4[24] =
+{
+/*  0 */ { 4, s_4_0, -1, 3, 0},
+/*  1 */ { 4, s_4_1, -1, 2, 0},
+/*  2 */ { 3, s_4_2, -1, 13, 0},
+/*  3 */ { 2, s_4_3, -1, 16, 0},
+/*  4 */ { 3, s_4_4, 3, 12, 0},
+/*  5 */ { 4, s_4_5, 4, 4, 0},
+/*  6 */ { 4, s_4_6, 3, 8, 0},
+/*  7 */ { 5, s_4_7, 3, 14, 0},
+/*  8 */ { 6, s_4_8, 3, 15, 0},
+/*  9 */ { 5, s_4_9, 3, 10, 0},
+/* 10 */ { 5, s_4_10, 3, 5, 0},
+/* 11 */ { 5, s_4_11, -1, 8, 0},
+/* 12 */ { 6, s_4_12, -1, 12, 0},
+/* 13 */ { 5, s_4_13, -1, 11, 0},
+/* 14 */ { 6, s_4_14, -1, 1, 0},
+/* 15 */ { 7, s_4_15, 14, 7, 0},
+/* 16 */ { 5, s_4_16, -1, 8, 0},
+/* 17 */ { 5, s_4_17, -1, 7, 0},
+/* 18 */ { 7, s_4_18, 17, 6, 0},
+/* 19 */ { 4, s_4_19, -1, 6, 0},
+/* 20 */ { 4, s_4_20, -1, 7, 0},
+/* 21 */ { 7, s_4_21, -1, 11, 0},
+/* 22 */ { 7, s_4_22, -1, 9, 0},
+/* 23 */ { 7, s_4_23, -1, 10, 0}
+};
+
+static symbol s_5_0[5] = { 'i', 'c', 'a', 't', 'e' };
+static symbol s_5_1[5] = { 'a', 't', 'i', 'v', 'e' };
+static symbol s_5_2[5] = { 'a', 'l', 'i', 'z', 'e' };
+static symbol s_5_3[5] = { 'i', 'c', 'i', 't', 'i' };
+static symbol s_5_4[4] = { 'i', 'c', 'a', 'l' };
+static symbol s_5_5[6] = { 't', 'i', 'o', 'n', 'a', 'l' };
+static symbol s_5_6[7] = { 'a', 't', 'i', 'o', 'n', 'a', 'l' };
+static symbol s_5_7[3] = { 'f', 'u', 'l' };
+static symbol s_5_8[4] = { 'n', 'e', 's', 's' };
+
+static struct among a_5[9] =
+{
+/*  0 */ { 5, s_5_0, -1, 4, 0},
+/*  1 */ { 5, s_5_1, -1, 6, 0},
+/*  2 */ { 5, s_5_2, -1, 3, 0},
+/*  3 */ { 5, s_5_3, -1, 4, 0},
+/*  4 */ { 4, s_5_4, -1, 4, 0},
+/*  5 */ { 6, s_5_5, -1, 1, 0},
+/*  6 */ { 7, s_5_6, 5, 2, 0},
+/*  7 */ { 3, s_5_7, -1, 5, 0},
+/*  8 */ { 4, s_5_8, -1, 5, 0}
+};
+
+static symbol s_6_0[2] = { 'i', 'c' };
+static symbol s_6_1[4] = { 'a', 'n', 'c', 'e' };
+static symbol s_6_2[4] = { 'e', 'n', 'c', 'e' };
+static symbol s_6_3[4] = { 'a', 'b', 'l', 'e' };
+static symbol s_6_4[4] = { 'i', 'b', 'l', 'e' };
+static symbol s_6_5[3] = { 'a', 't', 'e' };
+static symbol s_6_6[3] = { 'i', 'v', 'e' };
+static symbol s_6_7[3] = { 'i', 'z', 'e' };
+static symbol s_6_8[3] = { 'i', 't', 'i' };
+static symbol s_6_9[2] = { 'a', 'l' };
+static symbol s_6_10[3] = { 'i', 's', 'm' };
+static symbol s_6_11[3] = { 'i', 'o', 'n' };
+static symbol s_6_12[2] = { 'e', 'r' };
+static symbol s_6_13[3] = { 'o', 'u', 's' };
+static symbol s_6_14[3] = { 'a', 'n', 't' };
+static symbol s_6_15[3] = { 'e', 'n', 't' };
+static symbol s_6_16[4] = { 'm', 'e', 'n', 't' };
+static symbol s_6_17[5] = { 'e', 'm', 'e', 'n', 't' };
+
+static struct among a_6[18] =
+{
+/*  0 */ { 2, s_6_0, -1, 1, 0},
+/*  1 */ { 4, s_6_1, -1, 1, 0},
+/*  2 */ { 4, s_6_2, -1, 1, 0},
+/*  3 */ { 4, s_6_3, -1, 1, 0},
+/*  4 */ { 4, s_6_4, -1, 1, 0},
+/*  5 */ { 3, s_6_5, -1, 1, 0},
+/*  6 */ { 3, s_6_6, -1, 1, 0},
+/*  7 */ { 3, s_6_7, -1, 1, 0},
+/*  8 */ { 3, s_6_8, -1, 1, 0},
+/*  9 */ { 2, s_6_9, -1, 1, 0},
+/* 10 */ { 3, s_6_10, -1, 1, 0},
+/* 11 */ { 3, s_6_11, -1, 2, 0},
+/* 12 */ { 2, s_6_12, -1, 1, 0},
+/* 13 */ { 3, s_6_13, -1, 1, 0},
+/* 14 */ { 3, s_6_14, -1, 1, 0},
+/* 15 */ { 3, s_6_15, -1, 1, 0},
+/* 16 */ { 4, s_6_16, 15, 1, 0},
+/* 17 */ { 5, s_6_17, 16, 1, 0}
+};
+
+static symbol s_7_0[1] = { 'e' };
+static symbol s_7_1[1] = { 'l' };
+
+static struct among a_7[2] =
+{
+/*  0 */ { 1, s_7_0, -1, 1, 0},
+/*  1 */ { 1, s_7_1, -1, 2, 0}
+};
+
+static symbol s_8_0[7] = { 's', 'u', 'c', 'c', 'e', 'e', 'd' };
+static symbol s_8_1[7] = { 'p', 'r', 'o', 'c', 'e', 'e', 'd' };
+static symbol s_8_2[6] = { 'e', 'x', 'c', 'e', 'e', 'd' };
+static symbol s_8_3[7] = { 'c', 'a', 'n', 'n', 'i', 'n', 'g' };
+static symbol s_8_4[6] = { 'i', 'n', 'n', 'i', 'n', 'g' };
+static symbol s_8_5[7] = { 'e', 'a', 'r', 'r', 'i', 'n', 'g' };
+static symbol s_8_6[7] = { 'h', 'e', 'r', 'r', 'i', 'n', 'g' };
+static symbol s_8_7[6] = { 'o', 'u', 't', 'i', 'n', 'g' };
+
+static struct among a_8[8] =
+{
+/*  0 */ { 7, s_8_0, -1, -1, 0},
+/*  1 */ { 7, s_8_1, -1, -1, 0},
+/*  2 */ { 6, s_8_2, -1, -1, 0},
+/*  3 */ { 7, s_8_3, -1, -1, 0},
+/*  4 */ { 6, s_8_4, -1, -1, 0},
+/*  5 */ { 7, s_8_5, -1, -1, 0},
+/*  6 */ { 7, s_8_6, -1, -1, 0},
+/*  7 */ { 6, s_8_7, -1, -1, 0}
+};
+
+static symbol s_9_0[5] = { 'a', 'n', 'd', 'e', 's' };
+static symbol s_9_1[5] = { 'a', 't', 'l', 'a', 's' };
+static symbol s_9_2[4] = { 'b', 'i', 'a', 's' };
+static symbol s_9_3[6] = { 'c', 'o', 's', 'm', 'o', 's' };
+static symbol s_9_4[5] = { 'd', 'y', 'i', 'n', 'g' };
+static symbol s_9_5[5] = { 'e', 'a', 'r', 'l', 'y' };
+static symbol s_9_6[6] = { 'g', 'e', 'n', 't', 'l', 'y' };
+static symbol s_9_7[4] = { 'h', 'o', 'w', 'e' };
+static symbol s_9_8[4] = { 'i', 'd', 'l', 'y' };
+static symbol s_9_9[5] = { 'l', 'y', 'i', 'n', 'g' };
+static symbol s_9_10[4] = { 'n', 'e', 'w', 's' };
+static symbol s_9_11[4] = { 'o', 'n', 'l', 'y' };
+static symbol s_9_12[6] = { 's', 'i', 'n', 'g', 'l', 'y' };
+static symbol s_9_13[5] = { 's', 'k', 'i', 'e', 's' };
+static symbol s_9_14[4] = { 's', 'k', 'i', 's' };
+static symbol s_9_15[3] = { 's', 'k', 'y' };
+static symbol s_9_16[5] = { 't', 'y', 'i', 'n', 'g' };
+static symbol s_9_17[4] = { 'u', 'g', 'l', 'y' };
+
+static struct among a_9[18] =
+{
+/*  0 */ { 5, s_9_0, -1, -1, 0},
+/*  1 */ { 5, s_9_1, -1, -1, 0},
+/*  2 */ { 4, s_9_2, -1, -1, 0},
+/*  3 */ { 6, s_9_3, -1, -1, 0},
+/*  4 */ { 5, s_9_4, -1, 3, 0},
+/*  5 */ { 5, s_9_5, -1, 9, 0},
+/*  6 */ { 6, s_9_6, -1, 7, 0},
+/*  7 */ { 4, s_9_7, -1, -1, 0},
+/*  8 */ { 4, s_9_8, -1, 6, 0},
+/*  9 */ { 5, s_9_9, -1, 4, 0},
+/* 10 */ { 4, s_9_10, -1, -1, 0},
+/* 11 */ { 4, s_9_11, -1, 10, 0},
+/* 12 */ { 6, s_9_12, -1, 11, 0},
+/* 13 */ { 5, s_9_13, -1, 2, 0},
+/* 14 */ { 4, s_9_14, -1, 1, 0},
+/* 15 */ { 3, s_9_15, -1, -1, 0},
+/* 16 */ { 5, s_9_16, -1, 5, 0},
+/* 17 */ { 4, s_9_17, -1, 8, 0}
+};
+
+static unsigned char g_v[] = { 17, 65, 16, 1 };
+
+static unsigned char g_v_WXY[] = { 1, 17, 65, 208, 1 };
+
+static unsigned char g_valid_LI[] = { 55, 141, 2 };
+
+static symbol s_0[] = { 'y' };
+static symbol s_1[] = { 'Y' };
+static symbol s_2[] = { 'y' };
+static symbol s_3[] = { 'Y' };
+static symbol s_4[] = { 's', 's' };
+static symbol s_5[] = { 'i', 'e' };
+static symbol s_6[] = { 'i' };
+static symbol s_7[] = { 'e', 'e' };
+static symbol s_8[] = { 'e' };
+static symbol s_9[] = { 'e' };
+static symbol s_10[] = { 'y' };
+static symbol s_11[] = { 'Y' };
+static symbol s_12[] = { 'i' };
+static symbol s_13[] = { 't', 'i', 'o', 'n' };
+static symbol s_14[] = { 'e', 'n', 'c', 'e' };
+static symbol s_15[] = { 'a', 'n', 'c', 'e' };
+static symbol s_16[] = { 'a', 'b', 'l', 'e' };
+static symbol s_17[] = { 'e', 'n', 't' };
+static symbol s_18[] = { 'i', 'z', 'e' };
+static symbol s_19[] = { 'a', 't', 'e' };
+static symbol s_20[] = { 'a', 'l' };
+static symbol s_21[] = { 'f', 'u', 'l' };
+static symbol s_22[] = { 'o', 'u', 's' };
+static symbol s_23[] = { 'i', 'v', 'e' };
+static symbol s_24[] = { 'b', 'l', 'e' };
+static symbol s_25[] = { 'l' };
+static symbol s_26[] = { 'o', 'g' };
+static symbol s_27[] = { 'f', 'u', 'l' };
+static symbol s_28[] = { 'l', 'e', 's', 's' };
+static symbol s_29[] = { 't', 'i', 'o', 'n' };
+static symbol s_30[] = { 'a', 't', 'e' };
+static symbol s_31[] = { 'a', 'l' };
+static symbol s_32[] = { 'i', 'c' };
+static symbol s_33[] = { 's' };
+static symbol s_34[] = { 't' };
+static symbol s_35[] = { 'l' };
+static symbol s_36[] = { 's', 'k', 'i' };
+static symbol s_37[] = { 's', 'k', 'y' };
+static symbol s_38[] = { 'd', 'i', 'e' };
+static symbol s_39[] = { 'l', 'i', 'e' };
+static symbol s_40[] = { 't', 'i', 'e' };
+static symbol s_41[] = { 'i', 'd', 'l' };
+static symbol s_42[] = { 'g', 'e', 'n', 't', 'l' };
+static symbol s_43[] = { 'u', 'g', 'l', 'i' };
+static symbol s_44[] = { 'e', 'a', 'r', 'l', 'i' };
+static symbol s_45[] = { 'o', 'n', 'l', 'i' };
+static symbol s_46[] = { 's', 'i', 'n', 'g', 'l' };
+static symbol s_47[] = { 'Y' };
+static symbol s_48[] = { 'y' };
+
+static int r_prelude(struct SN_env * z) {
+    z->B[0] = 0; /* unset Y_found, line 24 */
+    {   int c = z->c; /* do, line 25 */
+        z->bra = z->c; /* [, line 25 */
+        if (!(eq_s(z, 1, s_0))) goto lab0;
+        z->ket = z->c; /* ], line 25 */
+        if (!(in_grouping(z, g_v, 97, 121))) goto lab0;
+        slice_from_s(z, 1, s_1); /* <-, line 25 */
+        z->B[0] = 1; /* set Y_found, line 25 */
+    lab0:
+        z->c = c;
+    }
+    {   int c = z->c; /* do, line 26 */
+        while(1) { /* repeat, line 26 */
+            int c = z->c;
+            while(1) { /* goto, line 26 */
+                int c = z->c;
+                if (!(in_grouping(z, g_v, 97, 121))) goto lab3;
+                z->bra = z->c; /* [, line 26 */
+                if (!(eq_s(z, 1, s_2))) goto lab3;
+                z->ket = z->c; /* ], line 26 */
+                z->c = c;
+                break;
+            lab3:
+                z->c = c;
+                if (z->c >= z->l) goto lab2;
+                z->c++;
+            }
+            slice_from_s(z, 1, s_3); /* <-, line 26 */
+            z->B[0] = 1; /* set Y_found, line 26 */
+            continue;
+        lab2:
+            z->c = c;
+            break;
+        }
+    lab1:
+        z->c = c;
+    }
+    return 1;
+}
+
+static int r_mark_regions(struct SN_env * z) {
+    z->I[0] = z->l;
+    z->I[1] = z->l;
+    {   int c = z->c; /* do, line 32 */
+        {   int c = z->c; /* or, line 36 */
+            if (!(find_among(z, a_0, 1))) goto lab2; /* among, line 33 */
+            goto lab1;
+        lab2:
+            z->c = c;
+            while(1) { /* gopast, line 36 */
+                if (!(in_grouping(z, g_v, 97, 121))) goto lab3;
+                break;
+            lab3:
+                if (z->c >= z->l) goto lab0;
+                z->c++;
+            }
+            while(1) { /* gopast, line 36 */
+                if (!(out_grouping(z, g_v, 97, 121))) goto lab4;
+                break;
+            lab4:
+                if (z->c >= z->l) goto lab0;
+                z->c++;
+            }
+        }
+    lab1:
+        z->I[0] = z->c; /* setmark p1, line 37 */
+        while(1) { /* gopast, line 38 */
+            if (!(in_grouping(z, g_v, 97, 121))) goto lab5;
+            break;
+        lab5:
+            if (z->c >= z->l) goto lab0;
+            z->c++;
+        }
+        while(1) { /* gopast, line 38 */
+            if (!(out_grouping(z, g_v, 97, 121))) goto lab6;
+            break;
+        lab6:
+            if (z->c >= z->l) goto lab0;
+            z->c++;
+        }
+        z->I[1] = z->c; /* setmark p2, line 38 */
+    lab0:
+        z->c = c;
+    }
+    return 1;
+}
+
+static int r_shortv(struct SN_env * z) {
+    {   int m = z->l - z->c; /* or, line 46 */
+        if (!(out_grouping_b(z, g_v_WXY, 89, 121))) goto lab1;
+        if (!(in_grouping_b(z, g_v, 97, 121))) goto lab1;
+        if (!(out_grouping_b(z, g_v, 97, 121))) goto lab1;
+        goto lab0;
+    lab1:
+        z->c = z->l - m;
+        if (!(out_grouping_b(z, g_v, 97, 121))) return 0;
+        if (!(in_grouping_b(z, g_v, 97, 121))) return 0;
+        if (z->c > z->lb) return 0; /* atlimit, line 47 */
+    }
+lab0:
+    return 1;
+}
+
+static int r_R1(struct SN_env * z) {
+    if (!(z->I[0] <= z->c)) return 0;
+    return 1;
+}
+
+static int r_R2(struct SN_env * z) {
+    if (!(z->I[1] <= z->c)) return 0;
+    return 1;
+}
+
+static int r_Step_1a(struct SN_env * z) {
+    int among_var;
+    z->ket = z->c; /* [, line 54 */
+    among_var = find_among_b(z, a_1, 6); /* substring, line 54 */
+    if (!(among_var)) return 0;
+    z->bra = z->c; /* ], line 54 */
+    switch(among_var) {
+        case 0: return 0;
+        case 1:
+            slice_from_s(z, 2, s_4); /* <-, line 55 */
+            break;
+        case 2:
+            {   int m = z->l - z->c; /* or, line 57 */
+                if (z->c <= z->lb) goto lab1;
+                z->c--; /* next, line 57 */
+                if (z->c > z->lb) goto lab1; /* atlimit, line 57 */
+                slice_from_s(z, 2, s_5); /* <-, line 57 */
+                goto lab0;
+            lab1:
+                z->c = z->l - m;
+                slice_from_s(z, 1, s_6); /* <-, line 57 */
+            }
+        lab0:
+            break;
+        case 3:
+            if (z->c <= z->lb) return 0;
+            z->c--; /* next, line 58 */
+            while(1) { /* gopast, line 58 */
+                if (!(in_grouping_b(z, g_v, 97, 121))) goto lab2;
+                break;
+            lab2:
+                if (z->c <= z->lb) return 0;
+                z->c--;
+            }
+            slice_del(z); /* delete, line 58 */
+            break;
+    }
+    return 1;
+}
+
+static int r_Step_1b(struct SN_env * z) {
+    int among_var;
+    z->ket = z->c; /* [, line 64 */
+    among_var = find_among_b(z, a_3, 6); /* substring, line 64 */
+    if (!(among_var)) return 0;
+    z->bra = z->c; /* ], line 64 */
+    switch(among_var) {
+        case 0: return 0;
+        case 1:
+            if (!r_R1(z)) return 0; /* call R1, line 66 */
+            slice_from_s(z, 2, s_7); /* <-, line 66 */
+            break;
+        case 2:
+            {   int m_test = z->l - z->c; /* test, line 69 */
+                while(1) { /* gopast, line 69 */
+                    if (!(in_grouping_b(z, g_v, 97, 121))) goto lab0;
+                    break;
+                lab0:
+                    if (z->c <= z->lb) return 0;
+                    z->c--;
+                }
+                z->c = z->l - m_test;
+            }
+            slice_del(z); /* delete, line 69 */
+            {   int m_test = z->l - z->c; /* test, line 70 */
+                among_var = find_among_b(z, a_2, 13); /* substring, line 70 */
+                if (!(among_var)) return 0;
+                z->c = z->l - m_test;
+            }
+            switch(among_var) {
+                case 0: return 0;
+                case 1:
+                    {   int c = z->c;
+                        insert_s(z, z->c, z->c, 1, s_8); /* <+, line 72 */
+                        z->c = c;
+                    }
+                    break;
+                case 2:
+                    z->ket = z->c; /* [, line 75 */
+                    if (z->c <= z->lb) return 0;
+                    z->c--; /* next, line 75 */
+                    z->bra = z->c; /* ], line 75 */
+                    slice_del(z); /* delete, line 75 */
+                    break;
+                case 3:
+                    if (z->c != z->I[0]) return 0; /* atmark, line 76 */
+                    {   int m_test = z->l - z->c; /* test, line 76 */
+                        if (!r_shortv(z)) return 0; /* call shortv, line 76 */
+                        z->c = z->l - m_test;
+                    }
+                    {   int c = z->c;
+                        insert_s(z, z->c, z->c, 1, s_9); /* <+, line 76 */
+                        z->c = c;
+                    }
+                    break;
+            }
+            break;
+    }
+    return 1;
+}
+
+static int r_Step_1c(struct SN_env * z) {
+    z->ket = z->c; /* [, line 83 */
+    {   int m = z->l - z->c; /* or, line 83 */
+        if (!(eq_s_b(z, 1, s_10))) goto lab1;
+        goto lab0;
+    lab1:
+        z->c = z->l - m;
+        if (!(eq_s_b(z, 1, s_11))) return 0;
+    }
+lab0:
+    z->bra = z->c; /* ], line 83 */
+    if (!(out_grouping_b(z, g_v, 97, 121))) return 0;
+    {   int m = z->l - z->c; /* not, line 84 */
+        if (z->c > z->lb) goto lab2; /* atlimit, line 84 */
+        return 0;
+    lab2:
+        z->c = z->l - m;
+    }
+    slice_from_s(z, 1, s_12); /* <-, line 85 */
+    return 1;
+}
+
+static int r_Step_2(struct SN_env * z) {
+    int among_var;
+    z->ket = z->c; /* [, line 89 */
+    among_var = find_among_b(z, a_4, 24); /* substring, line 89 */
+    if (!(among_var)) return 0;
+    z->bra = z->c; /* ], line 89 */
+    if (!r_R1(z)) return 0; /* call R1, line 89 */
+    switch(among_var) {
+        case 0: return 0;
+        case 1:
+            slice_from_s(z, 4, s_13); /* <-, line 90 */
+            break;
+        case 2:
+            slice_from_s(z, 4, s_14); /* <-, line 91 */
+            break;
+        case 3:
+            slice_from_s(z, 4, s_15); /* <-, line 92 */
+            break;
+        case 4:
+            slice_from_s(z, 4, s_16); /* <-, line 93 */
+            break;
+        case 5:
+            slice_from_s(z, 3, s_17); /* <-, line 94 */
+            break;
+        case 6:
+            slice_from_s(z, 3, s_18); /* <-, line 96 */
+            break;
+        case 7:
+            slice_from_s(z, 3, s_19); /* <-, line 98 */
+            break;
+        case 8:
+            slice_from_s(z, 2, s_20); /* <-, line 100 */
+            break;
+        case 9:
+            slice_from_s(z, 3, s_21); /* <-, line 101 */
+            break;
+        case 10:
+            slice_from_s(z, 3, s_22); /* <-, line 103 */
+            break;
+        case 11:
+            slice_from_s(z, 3, s_23); /* <-, line 105 */
+            break;
+        case 12:
+            slice_from_s(z, 3, s_24); /* <-, line 107 */
+            break;
+        case 13:
+            if (!(eq_s_b(z, 1, s_25))) return 0;
+            slice_from_s(z, 2, s_26); /* <-, line 108 */
+            break;
+        case 14:
+            slice_from_s(z, 3, s_27); /* <-, line 109 */
+            break;
+        case 15:
+            slice_from_s(z, 4, s_28); /* <-, line 110 */
+            break;
+        case 16:
+            if (!(in_grouping_b(z, g_valid_LI, 99, 116))) return 0;
+            slice_del(z); /* delete, line 111 */
+            break;
+    }
+    return 1;
+}
+
+static int r_Step_3(struct SN_env * z) {
+    int among_var;
+    z->ket = z->c; /* [, line 116 */
+    among_var = find_among_b(z, a_5, 9); /* substring, line 116 */
+    if (!(among_var)) return 0;
+    z->bra = z->c; /* ], line 116 */
+    if (!r_R1(z)) return 0; /* call R1, line 116 */
+    switch(among_var) {
+        case 0: return 0;
+        case 1:
+            slice_from_s(z, 4, s_29); /* <-, line 117 */
+            break;
+        case 2:
+            slice_from_s(z, 3, s_30); /* <-, line 118 */
+            break;
+        case 3:
+            slice_from_s(z, 2, s_31); /* <-, line 119 */
+            break;
+        case 4:
+            slice_from_s(z, 2, s_32); /* <-, line 121 */
+            break;
+        case 5:
+            slice_del(z); /* delete, line 123 */
+            break;
+        case 6:
+            if (!r_R2(z)) return 0; /* call R2, line 125 */
+            slice_del(z); /* delete, line 125 */
+            break;
+    }
+    return 1;
+}
+
+static int r_Step_4(struct SN_env * z) {
+    int among_var;
+    z->ket = z->c; /* [, line 130 */
+    among_var = find_among_b(z, a_6, 18); /* substring, line 130 */
+    if (!(among_var)) return 0;
+    z->bra = z->c; /* ], line 130 */
+    if (!r_R2(z)) return 0; /* call R2, line 130 */
+    switch(among_var) {
+        case 0: return 0;
+        case 1:
+            slice_del(z); /* delete, line 133 */
+            break;
+        case 2:
+            {   int m = z->l - z->c; /* or, line 134 */
+                if (!(eq_s_b(z, 1, s_33))) goto lab1;
+                goto lab0;
+            lab1:
+                z->c = z->l - m;
+                if (!(eq_s_b(z, 1, s_34))) return 0;
+            }
+        lab0:
+            slice_del(z); /* delete, line 134 */
+            break;
+    }
+    return 1;
+}
+
+static int r_Step_5(struct SN_env * z) {
+    int among_var;
+    z->ket = z->c; /* [, line 139 */
+    among_var = find_among_b(z, a_7, 2); /* substring, line 139 */
+    if (!(among_var)) return 0;
+    z->bra = z->c; /* ], line 139 */
+    switch(among_var) {
+        case 0: return 0;
+        case 1:
+            {   int m = z->l - z->c; /* or, line 140 */
+                if (!r_R2(z)) goto lab1; /* call R2, line 140 */
+                goto lab0;
+            lab1:
+                z->c = z->l - m;
+                if (!r_R1(z)) return 0; /* call R1, line 140 */
+                {   int m = z->l - z->c; /* not, line 140 */
+                    if (!r_shortv(z)) goto lab2; /* call shortv, line 140 */
+                    return 0;
+                lab2:
+                    z->c = z->l - m;
+                }
+            }
+        lab0:
+            slice_del(z); /* delete, line 140 */
+            break;
+        case 2:
+            if (!r_R2(z)) return 0; /* call R2, line 141 */
+            if (!(eq_s_b(z, 1, s_35))) return 0;
+            slice_del(z); /* delete, line 141 */
+            break;
+    }
+    return 1;
+}
+
+static int r_exception2(struct SN_env * z) {
+    z->ket = z->c; /* [, line 147 */
+    if (!(find_among_b(z, a_8, 8))) return 0; /* substring, line 147 */
+    z->bra = z->c; /* ], line 147 */
+    if (z->c > z->lb) return 0; /* atlimit, line 147 */
+    return 1;
+}
+
+static int r_exception1(struct SN_env * z) {
+    int among_var;
+    z->bra = z->c; /* [, line 159 */
+    among_var = find_among(z, a_9, 18); /* substring, line 159 */
+    if (!(among_var)) return 0;
+    z->ket = z->c; /* ], line 159 */
+    if (z->c < z->l) return 0; /* atlimit, line 159 */
+    switch(among_var) {
+        case 0: return 0;
+        case 1:
+            slice_from_s(z, 3, s_36); /* <-, line 163 */
+            break;
+        case 2:
+            slice_from_s(z, 3, s_37); /* <-, line 164 */
+            break;
+        case 3:
+            slice_from_s(z, 3, s_38); /* <-, line 165 */
+            break;
+        case 4:
+            slice_from_s(z, 3, s_39); /* <-, line 166 */
+            break;
+        case 5:
+            slice_from_s(z, 3, s_40); /* <-, line 167 */
+            break;
+        case 6:
+            slice_from_s(z, 3, s_41); /* <-, line 171 */
+            break;
+        case 7:
+            slice_from_s(z, 5, s_42); /* <-, line 172 */
+            break;
+        case 8:
+            slice_from_s(z, 4, s_43); /* <-, line 173 */
+            break;
+        case 9:
+            slice_from_s(z, 5, s_44); /* <-, line 174 */
+            break;
+        case 10:
+            slice_from_s(z, 4, s_45); /* <-, line 175 */
+            break;
+        case 11:
+            slice_from_s(z, 5, s_46); /* <-, line 176 */
+            break;
+    }
+    return 1;
+}
+
+static int r_postlude(struct SN_env * z) {
+    if (!(z->B[0])) return 0; /* Boolean test Y_found, line 192 */
+    while(1) { /* repeat, line 192 */
+        int c = z->c;
+        while(1) { /* goto, line 192 */
+            int c = z->c;
+            z->bra = z->c; /* [, line 192 */
+            if (!(eq_s(z, 1, s_47))) goto lab1;
+            z->ket = z->c; /* ], line 192 */
+            z->c = c;
+            break;
+        lab1:
+            z->c = c;
+            if (z->c >= z->l) goto lab0;
+            z->c++;
+        }
+        slice_from_s(z, 1, s_48); /* <-, line 192 */
+        continue;
+    lab0:
+        z->c = c;
+        break;
+    }
+    return 1;
+}
+
+extern int english_stem(struct SN_env * z) {
+    {   int c = z->c; /* or, line 196 */
+        if (!r_exception1(z)) goto lab1; /* call exception1, line 196 */
+        goto lab0;
+    lab1:
+        z->c = c;
+        {   int c_test = z->c; /* test, line 198 */
+            {   int c = z->c + 3;
+                if (0 > c || c > z->l) return 0;
+                z->c = c; /* hop, line 198 */
+            }
+            z->c = c_test;
+        }
+        {   int c = z->c; /* do, line 199 */
+            if (!r_prelude(z)) goto lab2; /* call prelude, line 199 */
+        lab2:
+            z->c = c;
+        }
+        {   int c = z->c; /* do, line 200 */
+            if (!r_mark_regions(z)) goto lab3; /* call mark_regions, line 200 */
+        lab3:
+            z->c = c;
+        }
+        z->lb = z->c; z->c = z->l; /* backwards, line 201 */
+
+        {   int m = z->l - z->c; /* do, line 203 */
+            if (!r_Step_1a(z)) goto lab4; /* call Step_1a, line 203 */
+        lab4:
+            z->c = z->l - m;
+        }
+        {   int m = z->l - z->c; /* or, line 205 */
+            if (!r_exception2(z)) goto lab6; /* call exception2, line 205 */
+            goto lab5;
+        lab6:
+            z->c = z->l - m;
+            {   int m = z->l - z->c; /* do, line 207 */
+                if (!r_Step_1b(z)) goto lab7; /* call Step_1b, line 207 */
+            lab7:
+                z->c = z->l - m;
+            }
+            {   int m = z->l - z->c; /* do, line 208 */
+                if (!r_Step_1c(z)) goto lab8; /* call Step_1c, line 208 */
+            lab8:
+                z->c = z->l - m;
+            }
+            {   int m = z->l - z->c; /* do, line 210 */
+                if (!r_Step_2(z)) goto lab9; /* call Step_2, line 210 */
+            lab9:
+                z->c = z->l - m;
+            }
+            {   int m = z->l - z->c; /* do, line 211 */
+                if (!r_Step_3(z)) goto lab10; /* call Step_3, line 211 */
+            lab10:
+                z->c = z->l - m;
+            }
+            {   int m = z->l - z->c; /* do, line 212 */
+                if (!r_Step_4(z)) goto lab11; /* call Step_4, line 212 */
+            lab11:
+                z->c = z->l - m;
+            }
+            {   int m = z->l - z->c; /* do, line 214 */
+                if (!r_Step_5(z)) goto lab12; /* call Step_5, line 214 */
+            lab12:
+                z->c = z->l - m;
+            }
+        }
+    lab5:
+        z->c = z->lb;
+        {   int c = z->c; /* do, line 217 */
+            if (!r_postlude(z)) goto lab13; /* call postlude, line 217 */
+        lab13:
+            z->c = c;
+        }
+    }
+lab0:
+    return 1;
+}
+
+extern struct SN_env * english_create_env(void) { return SN_create_env(0, 2, 1); }
+
+extern void english_close_env(struct SN_env * z) { SN_close_env(z); }
+


diff --git a/contrib/tsearch2/snowball/english_stem.h b/contrib/tsearch2/snowball/english_stem.h

new file mode 100644 (file)

index 0000000..bfefcd5


--- /dev/null
+++ b/contrib/tsearch2/snowball/english_stem.h
@@ -0,0 +1,8 @@
+
+/* This file was generated automatically by the Snowball to ANSI C compiler */
+
+extern struct SN_env * english_create_env(void);
+extern void english_close_env(struct SN_env * z);
+
+extern int english_stem(struct SN_env * z);
+


diff --git a/contrib/tsearch2/snowball/header.h b/contrib/tsearch2/snowball/header.h

new file mode 100644 (file)

index 0000000..aaec3ae


--- /dev/null
+++ b/contrib/tsearch2/snowball/header.h
@@ -0,0 +1,57 @@
+
+#include 
+
+#include "api.h"
+
+#define MAXINT INT_MAX
+#define MININT INT_MIN
+
+#define HEAD 2*sizeof(int)
+
+#define SIZE(p)        ((int *)(p))[-1]
+#define SET_SIZE(p, n) ((int *)(p))[-1] = n
+#define CAPACITY(p)    ((int *)(p))[-2]
+
+struct among
+{   int s_size;     /* number of chars in string */
+    symbol * s;       /* search string */
+    int substring_i;/* index to longest matching substring */
+    int result;     /* result of the lookup */
+    int (* function)(struct SN_env *);
+};
+
+extern symbol * create_s(void);
+extern void lose_s(symbol * p);
+
+extern int in_grouping(struct SN_env * z, unsigned char * s, int min, int max);
+extern int in_grouping_b(struct SN_env * z, unsigned char * s, int min, int max);
+extern int out_grouping(struct SN_env * z, unsigned char * s, int min, int max);
+extern int out_grouping_b(struct SN_env * z, unsigned char * s, int min, int max);
+
+extern int in_range(struct SN_env * z, int min, int max);
+extern int in_range_b(struct SN_env * z, int min, int max);
+extern int out_range(struct SN_env * z, int min, int max);
+extern int out_range_b(struct SN_env * z, int min, int max);
+
+extern int eq_s(struct SN_env * z, int s_size, symbol * s);
+extern int eq_s_b(struct SN_env * z, int s_size, symbol * s);
+extern int eq_v(struct SN_env * z, symbol * p);
+extern int eq_v_b(struct SN_env * z, symbol * p);
+
+extern int find_among(struct SN_env * z, struct among * v, int v_size);
+extern int find_among_b(struct SN_env * z, struct among * v, int v_size);
+
+extern symbol * increase_size(symbol * p, int n);
+extern int replace_s(struct SN_env * z, int c_bra, int c_ket, int s_size, const symbol * s);
+extern void slice_from_s(struct SN_env * z, int s_size, symbol * s);
+extern void slice_from_v(struct SN_env * z, symbol * p);
+extern void slice_del(struct SN_env * z);
+
+extern void insert_s(struct SN_env * z, int bra, int ket, int s_size, symbol * s);
+extern void insert_v(struct SN_env * z, int bra, int ket, symbol * p);
+
+extern symbol * slice_to(struct SN_env * z, symbol * p);
+extern symbol * assign_to(struct SN_env * z, symbol * p);
+
+extern void debug(struct SN_env * z, int number, int line_count);
+


diff --git a/contrib/tsearch2/snowball/russian_stem.c b/contrib/tsearch2/snowball/russian_stem.c

new file mode 100644 (file)

index 0000000..14fd491


--- /dev/null
+++ b/contrib/tsearch2/snowball/russian_stem.c
@@ -0,0 +1,626 @@
+
+/* This file was generated automatically by the Snowball to ANSI C compiler */
+
+#include "header.h"
+
+extern int russian_stem(struct SN_env * z);
+static int r_tidy_up(struct SN_env * z);
+static int r_derivational(struct SN_env * z);
+static int r_noun(struct SN_env * z);
+static int r_verb(struct SN_env * z);
+static int r_reflexive(struct SN_env * z);
+static int r_adjectival(struct SN_env * z);
+static int r_adjective(struct SN_env * z);
+static int r_perfective_gerund(struct SN_env * z);
+static int r_R2(struct SN_env * z);
+static int r_mark_regions(struct SN_env * z);
+
+extern struct SN_env * russian_create_env(void);
+extern void russian_close_env(struct SN_env * z);
+
+static symbol s_0_0[3] = { 215, 219, 201 };
+static symbol s_0_1[4] = { 201, 215, 219, 201 };
+static symbol s_0_2[4] = { 217, 215, 219, 201 };
+static symbol s_0_3[1] = { 215 };
+static symbol s_0_4[2] = { 201, 215 };
+static symbol s_0_5[2] = { 217, 215 };
+static symbol s_0_6[5] = { 215, 219, 201, 211, 216 };
+static symbol s_0_7[6] = { 201, 215, 219, 201, 211, 216 };
+static symbol s_0_8[6] = { 217, 215, 219, 201, 211, 216 };
+
+static struct among a_0[9] =
+{
+/*  0 */ { 3, s_0_0, -1, 1, 0},
+/*  1 */ { 4, s_0_1, 0, 2, 0},
+/*  2 */ { 4, s_0_2, 0, 2, 0},
+/*  3 */ { 1, s_0_3, -1, 1, 0},
+/*  4 */ { 2, s_0_4, 3, 2, 0},
+/*  5 */ { 2, s_0_5, 3, 2, 0},
+/*  6 */ { 5, s_0_6, -1, 1, 0},
+/*  7 */ { 6, s_0_7, 6, 2, 0},
+/*  8 */ { 6, s_0_8, 6, 2, 0}
+};
+
+static symbol s_1_0[2] = { 192, 192 };
+static symbol s_1_1[2] = { 197, 192 };
+static symbol s_1_2[2] = { 207, 192 };
+static symbol s_1_3[2] = { 213, 192 };
+static symbol s_1_4[2] = { 197, 197 };
+static symbol s_1_5[2] = { 201, 197 };
+static symbol s_1_6[2] = { 207, 197 };
+static symbol s_1_7[2] = { 217, 197 };
+static symbol s_1_8[2] = { 201, 200 };
+static symbol s_1_9[2] = { 217, 200 };
+static symbol s_1_10[3] = { 201, 205, 201 };
+static symbol s_1_11[3] = { 217, 205, 201 };
+static symbol s_1_12[2] = { 197, 202 };
+static symbol s_1_13[2] = { 201, 202 };
+static symbol s_1_14[2] = { 207, 202 };
+static symbol s_1_15[2] = { 217, 202 };
+static symbol s_1_16[2] = { 197, 205 };
+static symbol s_1_17[2] = { 201, 205 };
+static symbol s_1_18[2] = { 207, 205 };
+static symbol s_1_19[2] = { 217, 205 };
+static symbol s_1_20[3] = { 197, 199, 207 };
+static symbol s_1_21[3] = { 207, 199, 207 };
+static symbol s_1_22[2] = { 193, 209 };
+static symbol s_1_23[2] = { 209, 209 };
+static symbol s_1_24[3] = { 197, 205, 213 };
+static symbol s_1_25[3] = { 207, 205, 213 };
+
+static struct among a_1[26] =
+{
+/*  0 */ { 2, s_1_0, -1, 1, 0},
+/*  1 */ { 2, s_1_1, -1, 1, 0},
+/*  2 */ { 2, s_1_2, -1, 1, 0},
+/*  3 */ { 2, s_1_3, -1, 1, 0},
+/*  4 */ { 2, s_1_4, -1, 1, 0},
+/*  5 */ { 2, s_1_5, -1, 1, 0},
+/*  6 */ { 2, s_1_6, -1, 1, 0},
+/*  7 */ { 2, s_1_7, -1, 1, 0},
+/*  8 */ { 2, s_1_8, -1, 1, 0},
+/*  9 */ { 2, s_1_9, -1, 1, 0},
+/* 10 */ { 3, s_1_10, -1, 1, 0},
+/* 11 */ { 3, s_1_11, -1, 1, 0},
+/* 12 */ { 2, s_1_12, -1, 1, 0},
+/* 13 */ { 2, s_1_13, -1, 1, 0},
+/* 14 */ { 2, s_1_14, -1, 1, 0},
+/* 15 */ { 2, s_1_15, -1, 1, 0},
+/* 16 */ { 2, s_1_16, -1, 1, 0},
+/* 17 */ { 2, s_1_17, -1, 1, 0},
+/* 18 */ { 2, s_1_18, -1, 1, 0},
+/* 19 */ { 2, s_1_19, -1, 1, 0},
+/* 20 */ { 3, s_1_20, -1, 1, 0},
+/* 21 */ { 3, s_1_21, -1, 1, 0},
+/* 22 */ { 2, s_1_22, -1, 1, 0},
+/* 23 */ { 2, s_1_23, -1, 1, 0},
+/* 24 */ { 3, s_1_24, -1, 1, 0},
+/* 25 */ { 3, s_1_25, -1, 1, 0}
+};
+
+static symbol s_2_0[2] = { 197, 205 };
+static symbol s_2_1[2] = { 206, 206 };
+static symbol s_2_2[2] = { 215, 219 };
+static symbol s_2_3[3] = { 201, 215, 219 };
+static symbol s_2_4[3] = { 217, 215, 219 };
+static symbol s_2_5[1] = { 221 };
+static symbol s_2_6[2] = { 192, 221 };
+static symbol s_2_7[3] = { 213, 192, 221 };
+
+static struct among a_2[8] =
+{
+/*  0 */ { 2, s_2_0, -1, 1, 0},
+/*  1 */ { 2, s_2_1, -1, 1, 0},
+/*  2 */ { 2, s_2_2, -1, 1, 0},
+/*  3 */ { 3, s_2_3, 2, 2, 0},
+/*  4 */ { 3, s_2_4, 2, 2, 0},
+/*  5 */ { 1, s_2_5, -1, 1, 0},
+/*  6 */ { 2, s_2_6, 5, 1, 0},
+/*  7 */ { 3, s_2_7, 6, 2, 0}
+};
+
+static symbol s_3_0[2] = { 211, 209 };
+static symbol s_3_1[2] = { 211, 216 };
+
+static struct among a_3[2] =
+{
+/*  0 */ { 2, s_3_0, -1, 1, 0},
+/*  1 */ { 2, s_3_1, -1, 1, 0}
+};
+
+static symbol s_4_0[1] = { 192 };
+static symbol s_4_1[2] = { 213, 192 };
+static symbol s_4_2[2] = { 204, 193 };
+static symbol s_4_3[3] = { 201, 204, 193 };
+static symbol s_4_4[3] = { 217, 204, 193 };
+static symbol s_4_5[2] = { 206, 193 };
+static symbol s_4_6[3] = { 197, 206, 193 };
+static symbol s_4_7[3] = { 197, 212, 197 };
+static symbol s_4_8[3] = { 201, 212, 197 };
+static symbol s_4_9[3] = { 202, 212, 197 };
+static symbol s_4_10[4] = { 197, 202, 212, 197 };
+static symbol s_4_11[4] = { 213, 202, 212, 197 };
+static symbol s_4_12[2] = { 204, 201 };
+static symbol s_4_13[3] = { 201, 204, 201 };
+static symbol s_4_14[3] = { 217, 204, 201 };
+static symbol s_4_15[1] = { 202 };
+static symbol s_4_16[2] = { 197, 202 };
+static symbol s_4_17[2] = { 213, 202 };
+static symbol s_4_18[1] = { 204 };
+static symbol s_4_19[2] = { 201, 204 };
+static symbol s_4_20[2] = { 217, 204 };
+static symbol s_4_21[2] = { 197, 205 };
+static symbol s_4_22[2] = { 201, 205 };
+static symbol s_4_23[2] = { 217, 205 };
+static symbol s_4_24[1] = { 206 };
+static symbol s_4_25[2] = { 197, 206 };
+static symbol s_4_26[2] = { 204, 207 };
+static symbol s_4_27[3] = { 201, 204, 207 };
+static symbol s_4_28[3] = { 217, 204, 207 };
+static symbol s_4_29[2] = { 206, 207 };
+static symbol s_4_30[3] = { 197, 206, 207 };
+static symbol s_4_31[3] = { 206, 206, 207 };
+static symbol s_4_32[2] = { 192, 212 };
+static symbol s_4_33[3] = { 213, 192, 212 };
+static symbol s_4_34[2] = { 197, 212 };
+static symbol s_4_35[3] = { 213, 197, 212 };
+static symbol s_4_36[2] = { 201, 212 };
+static symbol s_4_37[2] = { 209, 212 };
+static symbol s_4_38[2] = { 217, 212 };
+static symbol s_4_39[2] = { 212, 216 };
+static symbol s_4_40[3] = { 201, 212, 216 };
+static symbol s_4_41[3] = { 217, 212, 216 };
+static symbol s_4_42[3] = { 197, 219, 216 };
+static symbol s_4_43[3] = { 201, 219, 216 };
+static symbol s_4_44[2] = { 206, 217 };
+static symbol s_4_45[3] = { 197, 206, 217 };
+
+static struct among a_4[46] =
+{
+/*  0 */ { 1, s_4_0, -1, 2, 0},
+/*  1 */ { 2, s_4_1, 0, 2, 0},
+/*  2 */ { 2, s_4_2, -1, 1, 0},
+/*  3 */ { 3, s_4_3, 2, 2, 0},
+/*  4 */ { 3, s_4_4, 2, 2, 0},
+/*  5 */ { 2, s_4_5, -1, 1, 0},
+/*  6 */ { 3, s_4_6, 5, 2, 0},
+/*  7 */ { 3, s_4_7, -1, 1, 0},
+/*  8 */ { 3, s_4_8, -1, 2, 0},
+/*  9 */ { 3, s_4_9, -1, 1, 0},
+/* 10 */ { 4, s_4_10, 9, 2, 0},
+/* 11 */ { 4, s_4_11, 9, 2, 0},
+/* 12 */ { 2, s_4_12, -1, 1, 0},
+/* 13 */ { 3, s_4_13, 12, 2, 0},
+/* 14 */ { 3, s_4_14, 12, 2, 0},
+/* 15 */ { 1, s_4_15, -1, 1, 0},
+/* 16 */ { 2, s_4_16, 15, 2, 0},
+/* 17 */ { 2, s_4_17, 15, 2, 0},
+/* 18 */ { 1, s_4_18, -1, 1, 0},
+/* 19 */ { 2, s_4_19, 18, 2, 0},
+/* 20 */ { 2, s_4_20, 18, 2, 0},
+/* 21 */ { 2, s_4_21, -1, 1, 0},
+/* 22 */ { 2, s_4_22, -1, 2, 0},
+/* 23 */ { 2, s_4_23, -1, 2, 0},
+/* 24 */ { 1, s_4_24, -1, 1, 0},
+/* 25 */ { 2, s_4_25, 24, 2, 0},
+/* 26 */ { 2, s_4_26, -1, 1, 0},
+/* 27 */ { 3, s_4_27, 26, 2, 0},
+/* 28 */ { 3, s_4_28, 26, 2, 0},
+/* 29 */ { 2, s_4_29, -1, 1, 0},
+/* 30 */ { 3, s_4_30, 29, 2, 0},
+/* 31 */ { 3, s_4_31, 29, 1, 0},
+/* 32 */ { 2, s_4_32, -1, 1, 0},
+/* 33 */ { 3, s_4_33, 32, 2, 0},
+/* 34 */ { 2, s_4_34, -1, 1, 0},
+/* 35 */ { 3, s_4_35, 34, 2, 0},
+/* 36 */ { 2, s_4_36, -1, 2, 0},
+/* 37 */ { 2, s_4_37, -1, 2, 0},
+/* 38 */ { 2, s_4_38, -1, 2, 0},
+/* 39 */ { 2, s_4_39, -1, 1, 0},
+/* 40 */ { 3, s_4_40, 39, 2, 0},
+/* 41 */ { 3, s_4_41, 39, 2, 0},
+/* 42 */ { 3, s_4_42, -1, 1, 0},
+/* 43 */ { 3, s_4_43, -1, 2, 0},
+/* 44 */ { 2, s_4_44, -1, 1, 0},
+/* 45 */ { 3, s_4_45, 44, 2, 0}
+};
+
+static symbol s_5_0[1] = { 192 };
+static symbol s_5_1[2] = { 201, 192 };
+static symbol s_5_2[2] = { 216, 192 };
+static symbol s_5_3[1] = { 193 };
+static symbol s_5_4[1] = { 197 };
+static symbol s_5_5[2] = { 201, 197 };
+static symbol s_5_6[2] = { 216, 197 };
+static symbol s_5_7[2] = { 193, 200 };
+static symbol s_5_8[2] = { 209, 200 };
+static symbol s_5_9[3] = { 201, 209, 200 };
+static symbol s_5_10[1] = { 201 };
+static symbol s_5_11[2] = { 197, 201 };
+static symbol s_5_12[2] = { 201, 201 };
+static symbol s_5_13[3] = { 193, 205, 201 };
+static symbol s_5_14[3] = { 209, 205, 201 };
+static symbol s_5_15[4] = { 201, 209, 205, 201 };
+static symbol s_5_16[1] = { 202 };
+static symbol s_5_17[2] = { 197, 202 };
+static symbol s_5_18[3] = { 201, 197, 202 };
+static symbol s_5_19[2] = { 201, 202 };
+static symbol s_5_20[2] = { 207, 202 };
+static symbol s_5_21[2] = { 193, 205 };
+static symbol s_5_22[2] = { 197, 205 };
+static symbol s_5_23[3] = { 201, 197, 205 };
+static symbol s_5_24[2] = { 207, 205 };
+static symbol s_5_25[2] = { 209, 205 };
+static symbol s_5_26[3] = { 201, 209, 205 };
+static symbol s_5_27[1] = { 207 };
+static symbol s_5_28[1] = { 209 };
+static symbol s_5_29[2] = { 201, 209 };
+static symbol s_5_30[2] = { 216, 209 };
+static symbol s_5_31[1] = { 213 };
+static symbol s_5_32[2] = { 197, 215 };
+static symbol s_5_33[2] = { 207, 215 };
+static symbol s_5_34[1] = { 216 };
+static symbol s_5_35[1] = { 217 };
+
+static struct among a_5[36] =
+{
+/*  0 */ { 1, s_5_0, -1, 1, 0},
+/*  1 */ { 2, s_5_1, 0, 1, 0},
+/*  2 */ { 2, s_5_2, 0, 1, 0},
+/*  3 */ { 1, s_5_3, -1, 1, 0},
+/*  4 */ { 1, s_5_4, -1, 1, 0},
+/*  5 */ { 2, s_5_5, 4, 1, 0},
+/*  6 */ { 2, s_5_6, 4, 1, 0},
+/*  7 */ { 2, s_5_7, -1, 1, 0},
+/*  8 */ { 2, s_5_8, -1, 1, 0},
+/*  9 */ { 3, s_5_9, 8, 1, 0},
+/* 10 */ { 1, s_5_10, -1, 1, 0},
+/* 11 */ { 2, s_5_11, 10, 1, 0},
+/* 12 */ { 2, s_5_12, 10, 1, 0},
+/* 13 */ { 3, s_5_13, 10, 1, 0},
+/* 14 */ { 3, s_5_14, 10, 1, 0},
+/* 15 */ { 4, s_5_15, 14, 1, 0},
+/* 16 */ { 1, s_5_16, -1, 1, 0},
+/* 17 */ { 2, s_5_17, 16, 1, 0},
+/* 18 */ { 3, s_5_18, 17, 1, 0},
+/* 19 */ { 2, s_5_19, 16, 1, 0},
+/* 20 */ { 2, s_5_20, 16, 1, 0},
+/* 21 */ { 2, s_5_21, -1, 1, 0},
+/* 22 */ { 2, s_5_22, -1, 1, 0},
+/* 23 */ { 3, s_5_23, 22, 1, 0},
+/* 24 */ { 2, s_5_24, -1, 1, 0},
+/* 25 */ { 2, s_5_25, -1, 1, 0},
+/* 26 */ { 3, s_5_26, 25, 1, 0},
+/* 27 */ { 1, s_5_27, -1, 1, 0},
+/* 28 */ { 1, s_5_28, -1, 1, 0},
+/* 29 */ { 2, s_5_29, 28, 1, 0},
+/* 30 */ { 2, s_5_30, 28, 1, 0},
+/* 31 */ { 1, s_5_31, -1, 1, 0},
+/* 32 */ { 2, s_5_32, -1, 1, 0},
+/* 33 */ { 2, s_5_33, -1, 1, 0},
+/* 34 */ { 1, s_5_34, -1, 1, 0},
+/* 35 */ { 1, s_5_35, -1, 1, 0}
+};
+
+static symbol s_6_0[3] = { 207, 211, 212 };
+static symbol s_6_1[4] = { 207, 211, 212, 216 };
+
+static struct among a_6[2] =
+{
+/*  0 */ { 3, s_6_0, -1, 1, 0},
+/*  1 */ { 4, s_6_1, -1, 1, 0}
+};
+
+static symbol s_7_0[4] = { 197, 202, 219, 197 };
+static symbol s_7_1[1] = { 206 };
+static symbol s_7_2[1] = { 216 };
+static symbol s_7_3[3] = { 197, 202, 219 };
+
+static struct among a_7[4] =
+{
+/*  0 */ { 4, s_7_0, -1, 1, 0},
+/*  1 */ { 1, s_7_1, -1, 2, 0},
+/*  2 */ { 1, s_7_2, -1, 3, 0},
+/*  3 */ { 3, s_7_3, -1, 1, 0}
+};
+
+static unsigned char g_v[] = { 35, 130, 34, 18 };
+
+static symbol s_0[] = { 193 };
+static symbol s_1[] = { 209 };
+static symbol s_2[] = { 193 };
+static symbol s_3[] = { 209 };
+static symbol s_4[] = { 193 };
+static symbol s_5[] = { 209 };
+static symbol s_6[] = { 206 };
+static symbol s_7[] = { 206 };
+static symbol s_8[] = { 206 };
+static symbol s_9[] = { 201 };
+
+static int r_mark_regions(struct SN_env * z) {
+    z->I[0] = z->l;
+    z->I[1] = z->l;
+    {   int c = z->c; /* do, line 100 */
+        while(1) { /* gopast, line 101 */
+            if (!(in_grouping(z, g_v, 192, 220))) goto lab1;
+            break;
+        lab1:
+            if (z->c >= z->l) goto lab0;
+            z->c++;
+        }
+        z->I[0] = z->c; /* setmark pV, line 101 */
+        while(1) { /* gopast, line 101 */
+            if (!(out_grouping(z, g_v, 192, 220))) goto lab2;
+            break;
+        lab2:
+            if (z->c >= z->l) goto lab0;
+            z->c++;
+        }
+        while(1) { /* gopast, line 102 */
+            if (!(in_grouping(z, g_v, 192, 220))) goto lab3;
+            break;
+        lab3:
+            if (z->c >= z->l) goto lab0;
+            z->c++;
+        }
+        while(1) { /* gopast, line 102 */
+            if (!(out_grouping(z, g_v, 192, 220))) goto lab4;
+            break;
+        lab4:
+            if (z->c >= z->l) goto lab0;
+            z->c++;
+        }
+        z->I[1] = z->c; /* setmark p2, line 102 */
+    lab0:
+        z->c = c;
+    }
+    return 1;
+}
+
+static int r_R2(struct SN_env * z) {
+    if (!(z->I[1] <= z->c)) return 0;
+    return 1;
+}
+
+static int r_perfective_gerund(struct SN_env * z) {
+    int among_var;
+    z->ket = z->c; /* [, line 111 */
+    among_var = find_among_b(z, a_0, 9); /* substring, line 111 */
+    if (!(among_var)) return 0;
+    z->bra = z->c; /* ], line 111 */
+    switch(among_var) {
+        case 0: return 0;
+        case 1:
+            {   int m = z->l - z->c; /* or, line 115 */
+                if (!(eq_s_b(z, 1, s_0))) goto lab1;
+                goto lab0;
+            lab1:
+                z->c = z->l - m;
+                if (!(eq_s_b(z, 1, s_1))) return 0;
+            }
+        lab0:
+            slice_del(z); /* delete, line 115 */
+            break;
+        case 2:
+            slice_del(z); /* delete, line 122 */
+            break;
+    }
+    return 1;
+}
+
+static int r_adjective(struct SN_env * z) {
+    int among_var;
+    z->ket = z->c; /* [, line 127 */
+    among_var = find_among_b(z, a_1, 26); /* substring, line 127 */
+    if (!(among_var)) return 0;
+    z->bra = z->c; /* ], line 127 */
+    switch(among_var) {
+        case 0: return 0;
+        case 1:
+            slice_del(z); /* delete, line 136 */
+            break;
+    }
+    return 1;
+}
+
+static int r_adjectival(struct SN_env * z) {
+    int among_var;
+    if (!r_adjective(z)) return 0; /* call adjective, line 141 */
+    {   int m = z->l - z->c; /* try, line 148 */
+        z->ket = z->c; /* [, line 149 */
+        among_var = find_among_b(z, a_2, 8); /* substring, line 149 */
+        if (!(among_var)) { z->c = z->l - m; goto lab0; }
+        z->bra = z->c; /* ], line 149 */
+        switch(among_var) {
+            case 0: { z->c = z->l - m; goto lab0; }
+            case 1:
+                {   int m = z->l - z->c; /* or, line 154 */
+                    if (!(eq_s_b(z, 1, s_2))) goto lab2;
+                    goto lab1;
+                lab2:
+                    z->c = z->l - m;
+                    if (!(eq_s_b(z, 1, s_3))) { z->c = z->l - m; goto lab0; }
+                }
+            lab1:
+                slice_del(z); /* delete, line 154 */
+                break;
+            case 2:
+                slice_del(z); /* delete, line 161 */
+                break;
+        }
+    lab0:
+        ;
+    }
+    return 1;
+}
+
+static int r_reflexive(struct SN_env * z) {
+    int among_var;
+    z->ket = z->c; /* [, line 168 */
+    among_var = find_among_b(z, a_3, 2); /* substring, line 168 */
+    if (!(among_var)) return 0;
+    z->bra = z->c; /* ], line 168 */
+    switch(among_var) {
+        case 0: return 0;
+        case 1:
+            slice_del(z); /* delete, line 171 */
+            break;
+    }
+    return 1;
+}
+
+static int r_verb(struct SN_env * z) {
+    int among_var;
+    z->ket = z->c; /* [, line 176 */
+    among_var = find_among_b(z, a_4, 46); /* substring, line 176 */
+    if (!(among_var)) return 0;
+    z->bra = z->c; /* ], line 176 */
+    switch(among_var) {
+        case 0: return 0;
+        case 1:
+            {   int m = z->l - z->c; /* or, line 182 */
+                if (!(eq_s_b(z, 1, s_4))) goto lab1;
+                goto lab0;
+            lab1:
+                z->c = z->l - m;
+                if (!(eq_s_b(z, 1, s_5))) return 0;
+            }
+        lab0:
+            slice_del(z); /* delete, line 182 */
+            break;
+        case 2:
+            slice_del(z); /* delete, line 190 */
+            break;
+    }
+    return 1;
+}
+
+static int r_noun(struct SN_env * z) {
+    int among_var;
+    z->ket = z->c; /* [, line 199 */
+    among_var = find_among_b(z, a_5, 36); /* substring, line 199 */
+    if (!(among_var)) return 0;
+    z->bra = z->c; /* ], line 199 */
+    switch(among_var) {
+        case 0: return 0;
+        case 1:
+            slice_del(z); /* delete, line 206 */
+            break;
+    }
+    return 1;
+}
+
+static int r_derivational(struct SN_env * z) {
+    int among_var;
+    z->ket = z->c; /* [, line 215 */
+    among_var = find_among_b(z, a_6, 2); /* substring, line 215 */
+    if (!(among_var)) return 0;
+    z->bra = z->c; /* ], line 215 */
+    if (!r_R2(z)) return 0; /* call R2, line 215 */
+    switch(among_var) {
+        case 0: return 0;
+        case 1:
+            slice_del(z); /* delete, line 218 */
+            break;
+    }
+    return 1;
+}
+
+static int r_tidy_up(struct SN_env * z) {
+    int among_var;
+    z->ket = z->c; /* [, line 223 */
+    among_var = find_among_b(z, a_7, 4); /* substring, line 223 */
+    if (!(among_var)) return 0;
+    z->bra = z->c; /* ], line 223 */
+    switch(among_var) {
+        case 0: return 0;
+        case 1:
+            slice_del(z); /* delete, line 227 */
+            z->ket = z->c; /* [, line 228 */
+            if (!(eq_s_b(z, 1, s_6))) return 0;
+            z->bra = z->c; /* ], line 228 */
+            if (!(eq_s_b(z, 1, s_7))) return 0;
+            slice_del(z); /* delete, line 228 */
+            break;
+        case 2:
+            if (!(eq_s_b(z, 1, s_8))) return 0;
+            slice_del(z); /* delete, line 231 */
+            break;
+        case 3:
+            slice_del(z); /* delete, line 233 */
+            break;
+    }
+    return 1;
+}
+
+extern int russian_stem(struct SN_env * z) {
+    {   int c = z->c; /* do, line 240 */
+        if (!r_mark_regions(z)) goto lab0; /* call mark_regions, line 240 */
+    lab0:
+        z->c = c;
+    }
+    z->lb = z->c; z->c = z->l; /* backwards, line 241 */
+
+    {   int m = z->l - z->c; /* setlimit, line 241 */
+        int m3;
+        if (z->c < z->I[0]) return 0;
+        z->c = z->I[0]; /* tomark, line 241 */
+        m3 = z->lb; z->lb = z->c;
+        z->c = z->l - m;
+        {   int m = z->l - z->c; /* do, line 242 */
+            {   int m = z->l - z->c; /* or, line 243 */
+                if (!r_perfective_gerund(z)) goto lab3; /* call perfective_gerund, line 243 */
+                goto lab2;
+            lab3:
+                z->c = z->l - m;
+                {   int m = z->l - z->c; /* try, line 244 */
+                    if (!r_reflexive(z)) { z->c = z->l - m; goto lab4; } /* call reflexive, line 244 */
+                lab4:
+                    ;
+                }
+                {   int m = z->l - z->c; /* or, line 245 */
+                    if (!r_adjectival(z)) goto lab6; /* call adjectival, line 245 */
+                    goto lab5;
+                lab6:
+                    z->c = z->l - m;
+                    if (!r_verb(z)) goto lab7; /* call verb, line 245 */
+                    goto lab5;
+                lab7:
+                    z->c = z->l - m;
+                    if (!r_noun(z)) goto lab1; /* call noun, line 245 */
+                }
+            lab5:
+                ;
+            }
+        lab2:
+        lab1:
+            z->c = z->l - m;
+        }
+        {   int m = z->l - z->c; /* try, line 248 */
+            z->ket = z->c; /* [, line 248 */
+            if (!(eq_s_b(z, 1, s_9))) { z->c = z->l - m; goto lab8; }
+            z->bra = z->c; /* ], line 248 */
+            slice_del(z); /* delete, line 248 */
+        lab8:
+            ;
+        }
+        {   int m = z->l - z->c; /* do, line 251 */
+            if (!r_derivational(z)) goto lab9; /* call derivational, line 251 */
+        lab9:
+            z->c = z->l - m;
+        }
+        {   int m = z->l - z->c; /* do, line 252 */
+            if (!r_tidy_up(z)) goto lab10; /* call tidy_up, line 252 */
+        lab10:
+            z->c = z->l - m;
+        }
+        z->lb = m3;
+    }
+    z->c = z->lb;
+    return 1;
+}
+
+extern struct SN_env * russian_create_env(void) { return SN_create_env(0, 2, 0); }
+
+extern void russian_close_env(struct SN_env * z) { SN_close_env(z); }
+


diff --git a/contrib/tsearch2/snowball/russian_stem.h b/contrib/tsearch2/snowball/russian_stem.h

new file mode 100644 (file)

index 0000000..7dc26d4


--- /dev/null
+++ b/contrib/tsearch2/snowball/russian_stem.h
@@ -0,0 +1,8 @@
+
+/* This file was generated automatically by the Snowball to ANSI C compiler */
+
+extern struct SN_env * russian_create_env(void);
+extern void russian_close_env(struct SN_env * z);
+
+extern int russian_stem(struct SN_env * z);
+


diff --git a/contrib/tsearch2/snowball/utilities.c b/contrib/tsearch2/snowball/utilities.c

new file mode 100644 (file)

index 0000000..5dc7524


--- /dev/null
+++ b/contrib/tsearch2/snowball/utilities.c
@@ -0,0 +1,328 @@
+
+#include 
+#include 
+#include 
+
+#include "header.h"
+
+#define unless(C) if(!(C))
+
+#define CREATE_SIZE 1
+
+extern symbol * create_s(void)
+{   symbol * p = (symbol *) (HEAD + (char *) malloc(HEAD + (CREATE_SIZE + 1) * sizeof(symbol)));
+    CAPACITY(p) = CREATE_SIZE;
+    SET_SIZE(p, CREATE_SIZE);
+    return p;
+}
+
+extern void lose_s(symbol * p) { free((char *) p - HEAD); }
+
+extern int in_grouping(struct SN_env * z, unsigned char * s, int min, int max)
+{   if (z->c >= z->l) return 0;
+    {   int ch = z->p[z->c];
+        if
+        (ch > max || (ch -= min) < 0 ||
+         (s[ch >> 3] & (0X1 << (ch & 0X7))) == 0) return 0;
+    }
+    z->c++; return 1;
+}
+
+extern int in_grouping_b(struct SN_env * z, unsigned char * s, int min, int max)
+{   if (z->c <= z->lb) return 0;
+    {   int ch = z->p[z->c - 1];
+        if
+        (ch > max || (ch -= min) < 0 ||
+         (s[ch >> 3] & (0X1 << (ch & 0X7))) == 0) return 0;
+    }
+    z->c--; return 1;
+}
+
+extern int out_grouping(struct SN_env * z, unsigned char * s, int min, int max)
+{   if (z->c >= z->l) return 0;
+    {   int ch = z->p[z->c];
+        unless
+        (ch > max || (ch -= min) < 0 ||
+         (s[ch >> 3] & (0X1 << (ch & 0X7))) == 0) return 0;
+    }
+    z->c++; return 1;
+}
+
+extern int out_grouping_b(struct SN_env * z, unsigned char * s, int min, int max)
+{   if (z->c <= z->lb) return 0;
+    {   int ch = z->p[z->c - 1];
+        unless
+        (ch > max || (ch -= min) < 0 ||
+         (s[ch >> 3] & (0X1 << (ch & 0X7))) == 0) return 0;
+    }
+    z->c--; return 1;
+}
+
+
+extern int in_range(struct SN_env * z, int min, int max)
+{   if (z->c >= z->l) return 0;
+    {   int ch = z->p[z->c];
+        if
+        (ch > max || ch < min) return 0;
+    }
+    z->c++; return 1;
+}
+
+extern int in_range_b(struct SN_env * z, int min, int max)
+{   if (z->c <= z->lb) return 0;
+    {   int ch = z->p[z->c - 1];
+        if
+        (ch > max || ch < min) return 0;
+    }
+    z->c--; return 1;
+}
+
+extern int out_range(struct SN_env * z, int min, int max)
+{   if (z->c >= z->l) return 0;
+    {   int ch = z->p[z->c];
+        unless
+        (ch > max || ch < min) return 0;
+    }
+    z->c++; return 1;
+}
+
+extern int out_range_b(struct SN_env * z, int min, int max)
+{   if (z->c <= z->lb) return 0;
+    {   int ch = z->p[z->c - 1];
+        unless
+        (ch > max || ch < min) return 0;
+    }
+    z->c--; return 1;
+}
+
+extern int eq_s(struct SN_env * z, int s_size, symbol * s)
+{   if (z->l - z->c < s_size ||
+        memcmp(z->p + z->c, s, s_size * sizeof(symbol)) != 0) return 0;
+    z->c += s_size; return 1;
+}
+
+extern int eq_s_b(struct SN_env * z, int s_size, symbol * s)
+{   if (z->c - z->lb < s_size ||
+        memcmp(z->p + z->c - s_size, s, s_size * sizeof(symbol)) != 0) return 0;
+    z->c -= s_size; return 1;
+}
+
+extern int eq_v(struct SN_env * z, symbol * p)
+{   return eq_s(z, SIZE(p), p);
+}
+
+extern int eq_v_b(struct SN_env * z, symbol * p)
+{   return eq_s_b(z, SIZE(p), p);
+}
+
+extern int find_among(struct SN_env * z, struct among * v, int v_size)
+{
+    int i = 0;
+    int j = v_size;
+
+    int c = z->c; int l = z->l;
+    symbol * q = z->p + c;
+
+    struct among * w;
+
+    int common_i = 0;
+    int common_j = 0;
+
+    int first_key_inspected = 0;
+
+    while(1)
+    {   int k = i + ((j - i) >> 1);
+        int diff = 0;
+        int common = common_i < common_j ? common_i : common_j; /* smaller */
+        w = v + k;
+        {   int i; for (i = common; i < w->s_size; i++)
+            {   if (c + common == l) { diff = -1; break; }
+                diff = q[common] - w->s[i];
+                if (diff != 0) break;
+                common++;
+            }
+        }
+        if (diff < 0) { j = k; common_j = common; }
+                 else { i = k; common_i = common; }
+        if (j - i <= 1)
+        {   if (i > 0) break; /* v->s has been inspected */
+            if (j == i) break; /* only one item in v */
+
+            /* - but now we need to go round once more to get
+               v->s inspected. This looks messy, but is actually
+               the optimal approach.  */
+
+            if (first_key_inspected) break;
+            first_key_inspected = 1;
+        }
+    }
+    while(1)
+    {   w = v + i;
+        if (common_i >= w->s_size)
+        {   z->c = c + w->s_size;
+            if (w->function == 0) return w->result;
+            {   int res = w->function(z);
+                z->c = c + w->s_size;
+                if (res) return w->result;
+            }
+        }
+        i = w->substring_i;
+        if (i < 0) return 0;
+    }
+}
+
+/* find_among_b is for backwards processing. Same comments apply */
+
+extern int find_among_b(struct SN_env * z, struct among * v, int v_size)
+{
+    int i = 0;
+    int j = v_size;
+
+    int c = z->c; int lb = z->lb;
+    symbol * q = z->p + c - 1;
+
+    struct among * w;
+
+    int common_i = 0;
+    int common_j = 0;
+
+    int first_key_inspected = 0;
+
+    while(1)
+    {   int k = i + ((j - i) >> 1);
+        int diff = 0;
+        int common = common_i < common_j ? common_i : common_j;
+        w = v + k;
+        {   int i; for (i = w->s_size - 1 - common; i >= 0; i--)
+            {   if (c - common == lb) { diff = -1; break; }
+                diff = q[- common] - w->s[i];
+                if (diff != 0) break;
+                common++;
+            }
+        }
+        if (diff < 0) { j = k; common_j = common; }
+                 else { i = k; common_i = common; }
+        if (j - i <= 1)
+        {   if (i > 0) break;
+            if (j == i) break;
+            if (first_key_inspected) break;
+            first_key_inspected = 1;
+        }
+    }
+    while(1)
+    {   w = v + i;
+        if (common_i >= w->s_size)
+        {   z->c = c - w->s_size;
+            if (w->function == 0) return w->result;
+            {   int res = w->function(z);
+                z->c = c - w->s_size;
+                if (res) return w->result;
+            }
+        }
+        i = w->substring_i;
+        if (i < 0) return 0;
+    }
+}
+
+
+extern symbol * increase_size(symbol * p, int n)
+{   int new_size = n + 20;
+    symbol * q = (symbol *) (HEAD + (char *) malloc(HEAD + (new_size + 1) * sizeof(symbol)));
+    CAPACITY(q) = new_size;
+    memmove(q, p, CAPACITY(p) * sizeof(symbol)); lose_s(p); return q;
+}
+
+/* to replace symbols between c_bra and c_ket in z->p by the
+   s_size symbols at s
+*/
+
+extern int replace_s(struct SN_env * z, int c_bra, int c_ket, int s_size, const symbol * s)
+{   int adjustment = s_size - (c_ket - c_bra);
+    int len = SIZE(z->p);
+    if (adjustment != 0)
+    {   if (adjustment + len > CAPACITY(z->p)) z->p = increase_size(z->p, adjustment + len);
+        memmove(z->p + c_ket + adjustment, z->p + c_ket, (len - c_ket) * sizeof(symbol));
+        SET_SIZE(z->p, adjustment + len);
+        z->l += adjustment;
+        if (z->c >= c_ket) z->c += adjustment; else
+            if (z->c > c_bra) z->c = c_bra;
+    }
+    unless (s_size == 0) memmove(z->p + c_bra, s, s_size * sizeof(symbol));
+    return adjustment;
+}
+
+static void slice_check(struct SN_env * z)
+{
+    if (!(0 <= z->bra &&
+          z->bra <= z->ket &&
+          z->ket <= z->l &&
+          z->l <= SIZE(z->p)))   /* this line could be removed */
+    {
+        fprintf(stderr, "faulty slice operation:\n");
+        debug(z, -1, 0);
+        exit(1);
+    }
+}
+
+extern void slice_from_s(struct SN_env * z, int s_size, symbol * s)
+{   slice_check(z);
+    replace_s(z, z->bra, z->ket, s_size, s);
+}
+
+extern void slice_from_v(struct SN_env * z, symbol * p)
+{   slice_from_s(z, SIZE(p), p);
+}
+
+extern void slice_del(struct SN_env * z)
+{   slice_from_s(z, 0, 0);
+}
+
+extern void insert_s(struct SN_env * z, int bra, int ket, int s_size, symbol * s)
+{   int adjustment = replace_s(z, bra, ket, s_size, s);
+    if (bra <= z->bra) z->bra += adjustment;
+    if (bra <= z->ket) z->ket += adjustment;
+}
+
+extern void insert_v(struct SN_env * z, int bra, int ket, symbol * p)
+{   int adjustment = replace_s(z, bra, ket, SIZE(p), p);
+    if (bra <= z->bra) z->bra += adjustment;
+    if (bra <= z->ket) z->ket += adjustment;
+}
+
+extern symbol * slice_to(struct SN_env * z, symbol * p)
+{   slice_check(z);
+    {   int len = z->ket - z->bra;
+        if (CAPACITY(p) < len) p = increase_size(p, len);
+        memmove(p, z->p + z->bra, len * sizeof(symbol));
+        SET_SIZE(p, len);
+    }
+    return p;
+}
+
+extern symbol * assign_to(struct SN_env * z, symbol * p)
+{   int len = z->l;
+    if (CAPACITY(p) < len) p = increase_size(p, len);
+    memmove(p, z->p, len * sizeof(symbol));
+    SET_SIZE(p, len);
+    return p;
+}
+
+extern void debug(struct SN_env * z, int number, int line_count)
+{   int i;
+    int limit = SIZE(z->p);
+    /*if (number >= 0) printf("%3d (line %4d): '", number, line_count);*/
+    if (number >= 0) printf("%3d (line %4d): [%d]'", number, line_count,limit);
+    for (i = 0; i <= limit; i++)
+    {   if (z->lb == i) printf("{");
+        if (z->bra == i) printf("[");
+        if (z->c == i) printf("|");
+        if (z->ket == i) printf("]");
+        if (z->l == i) printf("}");
+        if (i < limit)
+        {   int ch = z->p[i];
+            if (ch == 0) ch = '#';
+            printf("%c", ch);
+        }
+    }
+    printf("'\n");
+}


diff --git a/contrib/tsearch2/sql/tsearch2.sql b/contrib/tsearch2/sql/tsearch2.sql

new file mode 100644 (file)

index 0000000..6ca6480


--- /dev/null
+++ b/contrib/tsearch2/sql/tsearch2.sql
@@ -0,0 +1,243 @@
+--
+-- first, define the datatype.  Turn off echoing so that expected file
+-- does not depend on contents of seg.sql.
+--
+\set ECHO none
+\i tsearch2.sql
+\set ECHO all
+
+--tsvector
+SELECT '1'::tsvector;
+SELECT '1 '::tsvector;
+SELECT ' 1'::tsvector;
+SELECT ' 1 '::tsvector;
+SELECT '1 2'::tsvector;
+SELECT '\'1 2\''::tsvector;
+SELECT '\'1 \\\'2\''::tsvector;
+SELECT '\'1 \\\'2\'3'::tsvector;
+SELECT '\'1 \\\'2\' 3'::tsvector;
+SELECT '\'1 \\\'2\' \' 3\' 4 '::tsvector;
+select '\'w\':4A,3B,2C,1D,5 a:8';
+select 'a:3A b:2a'::tsvector || 'ba:1234 a:1B';
+select setweight('w:12B w:13* w:12,5,6 a:1,3* a:3 w asd:1dc asd zxc:81,567,222A'::tsvector, 'c');
+select strip('w:12B w:13* w:12,5,6 a:1,3* a:3 w asd:1dc asd'::tsvector);
+
+
+--tsquery
+SELECT '1'::tsquery;
+SELECT '1 '::tsquery;
+SELECT ' 1'::tsquery;
+SELECT ' 1 '::tsquery;
+SELECT '\'1 2\''::tsquery;
+SELECT '\'1 \\\'2\''::tsquery;
+SELECT '!1'::tsquery;
+SELECT '1|2'::tsquery;
+SELECT '1|!2'::tsquery;
+SELECT '!1|2'::tsquery;
+SELECT '!1|!2'::tsquery;
+SELECT '!(!1|!2)'::tsquery;
+SELECT '!(!1|2)'::tsquery;
+SELECT '!(1|!2)'::tsquery;
+SELECT '!(1|2)'::tsquery;
+SELECT '1&2'::tsquery;
+SELECT '!1&2'::tsquery;
+SELECT '1&!2'::tsquery;
+SELECT '!1&!2'::tsquery;
+SELECT '(1&2)'::tsquery;
+SELECT '1&(2)'::tsquery;
+SELECT '!(1)&2'::tsquery;
+SELECT '!(1&2)'::tsquery;
+SELECT '1|2&3'::tsquery;
+SELECT '1|(2&3)'::tsquery;
+SELECT '(1|2)&3'::tsquery;
+SELECT '1|2&!3'::tsquery;
+SELECT '1|!2&3'::tsquery;
+SELECT '!1|2&3'::tsquery;
+SELECT '!1|(2&3)'::tsquery;
+SELECT '!(1|2)&3'::tsquery;
+SELECT '(!1|2)&3'::tsquery;
+SELECT '1|(2|(4|(5|6)))'::tsquery;
+SELECT '1|2|4|5|6'::tsquery;
+SELECT '1&(2&(4&(5&6)))'::tsquery;
+SELECT '1&2&4&5&6'::tsquery;
+SELECT '1&(2&(4&(5|6)))'::tsquery;
+SELECT '1&(2&(4&(5|!6)))'::tsquery;
+SELECT '1&(\'2\'&(\' 4\'&(\\|5 | \'6 \\\' !|&\')))'::tsquery;
+SELECT '\'the wether\':dc & \' sKies \':BC & a:d b:a';
+
+select lexize('simple', 'ASD56 hsdkf');
+select lexize('en_stem', 'SKIES Problems identity');
+
+select * from token_type('default');
+select * from parse('default', '345 [email protected] \' http://www.com/ http://aew.werc.ewr/?ad=qwe&dw 1aew.werc.ewr/?ad=qwe&dw 2aew.werc.ewr http://3aew.werc.ewr/?ad=qwe&dw http://4aew.werc.ewr http://5aew.werc.ewr:8100/?  ad=qwe&dw 6aew.werc.ewr:8100/?ad=qwe&dw 7aew.werc.ewr:8100/?ad=qwe&dw=%20%32 +4.0e-10 qwe qwe qwqwe 234.435 455 5.005 [email protected] qwe-wer asdf qwer jf sdjk ewr1> ewri2 ">
+/usr/local/fff /awdf/dwqe/4325 rewt/ewr wefjn /wqe-324/ewr gist.h gist.h.c gist.c. readline 4.2 4.2. 4.2, readline-4.2 readline-4.2. 234 
+ wow  < jqw <> qwerty');
+
+SELECT to_tsvector('default', '345 [email protected] \' http://www.com/ http://aew.werc.ewr/?ad=qwe&dw 1aew.werc.ewr/?ad=qwe&dw 2aew.werc.ewr http://3aew.werc.ewr/?ad=qwe&dw http://4aew.werc.ewr http://5aew.werc.ewr:8100/?  ad=qwe&dw 6aew.werc.ewr:8100/?ad=qwe&dw 7aew.werc.ewr:8100/?ad=qwe&dw=%20%32 +4.0e-10 qwe qwe qwqwe 234.435 455 5.005 [email protected] qwe-wer asdf qwer jf sdjk ewr1> ewri2 ">
+/usr/local/fff /awdf/dwqe/4325 rewt/ewr wefjn /wqe-324/ewr gist.h gist.h.c gist.c. readline 4.2 4.2. 4.2, readline-4.2 readline-4.2. 234 
+ wow  < jqw <> qwerty');
+
+SELECT length(to_tsvector('default', '345 qw'));
+
+SELECT length(to_tsvector('default', '345 [email protected] \' http://www.com/ http://aew.werc.ewr/?ad=qwe&dw 1aew.werc.ewr/?ad=qwe&dw 2aew.werc.ewr http://3aew.werc.ewr/?ad=qwe&dw http://4aew.werc.ewr http://5aew.werc.ewr:8100/?  ad=qwe&dw 6aew.werc.ewr:8100/?ad=qwe&dw 7aew.werc.ewr:8100/?ad=qwe&dw=%20%32 +4.0e-10 qwe qwe qwqwe 234.435 455 5.005 [email protected] qwe-wer asdf qwer jf sdjk ewr1> ewri2 ">
+/usr/local/fff /awdf/dwqe/4325 rewt/ewr wefjn /wqe-324/ewr gist.h gist.h.c gist.c. readline 4.2 4.2. 4.2, readline-4.2 readline-4.2. 234 
+ wow  < jqw <> qwerty'));
+
+
+select to_tsquery('default', 'qwe & sKies '); 
+select to_tsquery('simple', 'qwe & sKies '); 
+select to_tsquery('default', '\'the wether\':dc & \'           sKies \':BC ');
+select 'a b:89  ca:23A,64b d:34c'::tsvector @@ 'd:AC & ca';
+select 'a b:89  ca:23A,64b d:34c'::tsvector @@ 'd:AC & ca:B';
+select 'a b:89  ca:23A,64b d:34c'::tsvector @@ 'd:AC & ca:A';
+select 'a b:89  ca:23A,64b d:34c'::tsvector @@ 'd:AC & ca:C';
+select 'a b:89  ca:23A,64b d:34c'::tsvector @@ 'd:AC & ca:CB';
+
+CREATE TABLE test_tsvector( t text, a tsvector );
+
+\copy test_tsvector from 'data/test_tsearch.data'
+
+SELECT count(*) FROM test_tsvector WHERE a @@ 'wr|qh';
+SELECT count(*) FROM test_tsvector WHERE a @@ 'wr&qh';
+SELECT count(*) FROM test_tsvector WHERE a @@ 'eq&yt';
+SELECT count(*) FROM test_tsvector WHERE a @@ 'eq|yt';
+SELECT count(*) FROM test_tsvector WHERE a @@ '(eq&yt)|(wr&qh)';
+SELECT count(*) FROM test_tsvector WHERE a @@ '(eq|yt)&(wr|qh)';
+
+create index wowidx on test_tsvector using gist (a);
+set enable_seqscan=off;
+
+SELECT count(*) FROM test_tsvector WHERE a @@ 'wr|qh';
+SELECT count(*) FROM test_tsvector WHERE a @@ 'wr&qh';
+SELECT count(*) FROM test_tsvector WHERE a @@ 'eq&yt';
+SELECT count(*) FROM test_tsvector WHERE a @@ 'eq|yt';
+SELECT count(*) FROM test_tsvector WHERE a @@ '(eq&yt)|(wr&qh)';
+SELECT count(*) FROM test_tsvector WHERE a @@ '(eq|yt)&(wr|qh)';
+
+select set_curcfg('default');
+
+CREATE TRIGGER tsvectorupdate
+BEFORE UPDATE OR INSERT ON test_tsvector
+FOR EACH ROW EXECUTE PROCEDURE tsearch2(a, t);
+
+SELECT count(*) FROM test_tsvector WHERE a @@ to_tsquery('345&qwerty');
+
+INSERT INTO test_tsvector (t) VALUES ('345 qwerty');
+
+SELECT count(*) FROM test_tsvector WHERE a @@ to_tsquery('345&qwerty');
+
+UPDATE test_tsvector SET t = null WHERE t = '345 qwerty';
+
+SELECT count(*) FROM test_tsvector WHERE a @@ to_tsquery('345&qwerty');
+
+drop trigger tsvectorupdate on test_tsvector;
+create function wow(text) returns text as 'select $1 || \' copyright\'; ' language sql;
+create trigger tsvectorupdate before update or insert on test_tsvector
+for each row execute procedure tsearch2(a, wow, t);
+insert into test_tsvector (t) values ('345 qwerty');
+select count(*) FROM test_tsvector WHERE a @@ to_tsquery('345&qwerty');
+select count(*) FROM test_tsvector WHERE a @@ to_tsquery('copyright');
+
+select rank(' a:1 s:2C d g'::tsvector, 'a | s');
+select rank(' a:1 s:2B d g'::tsvector, 'a | s');
+select rank(' a:1 s:2 d g'::tsvector, 'a | s');
+select rank(' a:1 s:2C d g'::tsvector, 'a & s');
+select rank(' a:1 s:2B d g'::tsvector, 'a & s');
+select rank(' a:1 s:2 d g'::tsvector, 'a & s');
+
+insert into test_tsvector (t) values ('foo bar foo the over foo qq bar');
+select * from stat('select a from test_tsvector') order by ndoc desc, nentry desc, word;
+
+select reset_tsearch();
+select to_tsquery('default', 'skies & books');
+
+select rank_cd(to_tsvector('Erosion It took the sea a thousand years,
+A thousand years to trace
+The granite features of this cliff
+In crag and scarp and base.
+It took the sea an hour one night
+An hour of storm to place
+The sculpture of these granite seams,
+Upon a woman s face. E.  J.  Pratt  (1882 1964)
+'), to_tsquery('sea&thousand&years'));
+
+select rank_cd(to_tsvector('Erosion It took the sea a thousand years,
+A thousand years to trace
+The granite features of this cliff
+In crag and scarp and base.
+It took the sea an hour one night
+An hour of storm to place
+The sculpture of these granite seams,
+Upon a woman s face. E.  J.  Pratt  (1882 1964)
+'), to_tsquery('granite&sea'));
+
+select rank_cd(to_tsvector('Erosion It took the sea a thousand years,
+A thousand years to trace
+The granite features of this cliff
+In crag and scarp and base.
+It took the sea an hour one night
+An hour of storm to place
+The sculpture of these granite seams,
+Upon a woman s face. E.  J.  Pratt  (1882 1964)
+'), to_tsquery('sea'));
+
+select get_covers(to_tsvector('Erosion It took the sea a thousand years,
+A thousand years to trace
+The granite features of this cliff
+In crag and scarp and base.
+It took the sea an hour one night
+An hour of storm to place
+The sculpture of these granite seams,
+Upon a woman s face. E.  J.  Pratt  (1882 1964)
+'), to_tsquery('sea&thousand&years'));
+
+select get_covers(to_tsvector('Erosion It took the sea a thousand years,
+A thousand years to trace
+The granite features of this cliff
+In crag and scarp and base.
+It took the sea an hour one night
+An hour of storm to place
+The sculpture of these granite seams,
+Upon a woman s face. E.  J.  Pratt  (1882 1964)
+'), to_tsquery('granite&sea'));
+
+select get_covers(to_tsvector('Erosion It took the sea a thousand years,
+A thousand years to trace
+The granite features of this cliff
+In crag and scarp and base.
+It took the sea an hour one night
+An hour of storm to place
+The sculpture of these granite seams,
+Upon a woman s face. E.  J.  Pratt  (1882 1964)
+'), to_tsquery('sea'));
+
+select headline('Erosion It took the sea a thousand years,
+A thousand years to trace
+The granite features of this cliff
+In crag and scarp and base.
+It took the sea an hour one night
+An hour of storm to place
+The sculpture of these granite seams,
+Upon a woman s face. E.  J.  Pratt  (1882 1964)
+', to_tsquery('sea&thousand&years'));
+ 
+select headline('Erosion It took the sea a thousand years,
+A thousand years to trace
+The granite features of this cliff
+In crag and scarp and base.
+It took the sea an hour one night
+An hour of storm to place
+The sculpture of these granite seams,
+Upon a woman s face. E.  J.  Pratt  (1882 1964)
+', to_tsquery('granite&sea'));
+ 
+select headline('Erosion It took the sea a thousand years,
+A thousand years to trace
+The granite features of this cliff
+In crag and scarp and base.
+It took the sea an hour one night
+An hour of storm to place
+The sculpture of these granite seams,
+Upon a woman s face. E.  J.  Pratt  (1882 1964)
+', to_tsquery('sea'));
+


diff --git a/contrib/tsearch2/stopword.c b/contrib/tsearch2/stopword.c

new file mode 100644 (file)

index 0000000..7f7806f


--- /dev/null
+++ b/contrib/tsearch2/stopword.c
@@ -0,0 +1,101 @@
+/* 
+ * stopword library
+ * Teodor Sigaev 
+ */
+#include 
+#include 
+#include 
+#include 
+
+#include "postgres.h"
+#include "common.h"
+#include "dict.h"
+
+#define STOPBUFLEN 4096
+
+char*
+lowerstr(char *str) {
+   char *ptr=str;
+   while(*ptr) {
+       *ptr = tolower(*(unsigned char*)ptr);
+       ptr++;
+   }
+   return str;
+}
+
+void
+freestoplist(StopList *s) {
+   char **ptr=s->stop;
+   if ( ptr )
+       while( *ptr && s->len >0 ) {
+           free(*ptr);
+           ptr++; s->len--;
+       free(s->stop);
+   }
+   memset(s,0,sizeof(StopList));
+}
+
+void
+readstoplist(text *in, StopList *s) {
+   char **stop=NULL;
+   s->len=0;
+   if ( in && VARSIZE(in) - VARHDRSZ > 0 ) {
+       char *filename=text2char(in);
+       FILE    *hin=NULL;
+       char    buf[STOPBUFLEN];
+       int reallen=0;
+
+       if ( (hin=fopen(filename,"r")) == NULL )
+           elog(ERROR,"Can't open file '%s': %s", filename, strerror(errno));
+       while( fgets(buf,STOPBUFLEN,hin) ) {
+           buf[strlen(buf)-1] = '\0';
+           if ( *buf=='\0' ) continue;
+
+           if ( s->len>= reallen ) {
+               char **tmp;
+               reallen=(reallen) ? reallen*2 : 16;
+               tmp=(char**)realloc((void*)stop, sizeof(char*)*reallen);
+               if (!tmp) {
+                   freestoplist(s);
+                   fclose(hin); 
+                   elog(ERROR,"Not enough memory");
+               }
+               stop=tmp;
+           }
+    
+           stop[s->len]=strdup(buf);
+           if ( !stop[s->len] ) {
+               freestoplist(s);
+               fclose(hin); 
+               elog(ERROR,"Not enough memory");
+           }
+           if ( s->wordop ) 
+               stop[s->len]=(s->wordop)(stop[s->len]);
+
+           (s->len)++; 
+       }
+       fclose(hin);
+       pfree(filename); 
+   }
+   s->stop=stop;
+} 
+
+static int
+comparestr(const void *a, const void *b) {
+   return strcmp( *(char**)a, *(char**)b );
+}
+
+void
+sortstoplist(StopList *s) {
+   if (s->stop && s->len>0)
+       qsort(s->stop, s->len, sizeof(char*), comparestr);
+}
+
+bool
+searchstoplist(StopList *s, char *key) {
+   if ( s->wordop ) 
+       key=(*(s->wordop))(key);
+   return ( s->stop && s->len>0 && bsearch(&key, s->stop, s->len, sizeof(char*), comparestr) ) ? true : false;
+}
+
+


diff --git a/contrib/tsearch2/stopword/english.stop b/contrib/tsearch2/stopword/english.stop

new file mode 100644 (file)

index 0000000..a913011


--- /dev/null
+++ b/contrib/tsearch2/stopword/english.stop
@@ -0,0 +1,128 @@
+i
+me
+my
+myself
+we
+our
+ours
+ourselves
+you
+your
+yours
+yourself
+yourselves
+he
+him
+his
+himself
+she
+her
+hers
+herself
+it
+its
+itself
+they
+them
+their
+theirs
+themselves
+what
+which
+who
+whom
+this
+that
+these
+those
+am
+is
+are
+was
+were
+be
+been
+being
+have
+has
+had
+having
+do
+does
+did
+doing
+a
+an
+the
+and
+but
+if
+or
+because
+as
+until
+while
+of
+at
+by
+for
+with
+about
+against
+between
+into
+through
+during
+before
+after
+above
+below
+to
+from
+up
+down
+in
+out
+on
+off
+over
+under
+again
+further
+then
+once
+here
+there
+when
+where
+why
+how
+all
+any
+both
+each
+few
+more
+most
+other
+some
+such
+no
+nor
+not
+only
+own
+same
+so
+than
+too
+very
+s
+t
+can
+will
+just
+don
+should
+now
+


diff --git a/contrib/tsearch2/stopword/russian.stop b/contrib/tsearch2/stopword/russian.stop

new file mode 100644 (file)

index 0000000..1877e3a


--- /dev/null
+++ b/contrib/tsearch2/stopword/russian.stop
@@ -0,0 +1,151 @@
+É
+×
+×Ï
+ÎÅ
+ÞÔÏ
+ÏÎ
+ÎÁ
+Ñ
+Ó
+ÓÏ
+ËÁË
+Á
+ÔÏ
+×ÓÅ
+ÏÎÁ
+ÔÁË
+ÅÇÏ
+ÎÏ
+ÄÁ
+ÔÙ
+Ë
+Õ
+ÖÅ
+×Ù
+ÚÁ
+ÂÙ
+ÐÏ
+ÔÏÌØËÏ
+ÅÅ
+ÍÎÅ
+ÂÙÌÏ
+×ÏÔ
+ÏÔ
+ÍÅÎÑ
+ÅÝÅ
+ÎÅÔ
+Ï
+ÉÚ
+ÅÍÕ
+ÔÅÐÅÒØ
+ËÏÇÄÁ
+ÄÁÖÅ
+ÎÕ
+×ÄÒÕÇ
+ÌÉ
+ÅÓÌÉ
+ÕÖÅ
+ÉÌÉ
+ÎÉ
+ÂÙÔØ
+ÂÙÌ
+ÎÅÇÏ
+ÄÏ
+×ÁÓ
+ÎÉÂÕÄØ
+ÏÐÑÔØ
+ÕÖ
+×ÁÍ
+×ÅÄØ
+ÔÁÍ
+ÐÏÔÏÍ
+ÓÅÂÑ
+ÎÉÞÅÇÏ
+ÅÊ
+ÍÏÖÅÔ
+ÏÎÉ
+ÔÕÔ
+ÇÄÅ
+ÅÓÔØ
+ÎÁÄÏ
+ÎÅÊ
+ÄÌÑ
+ÍÙ
+ÔÅÂÑ
+ÉÈ
+ÞÅÍ
+ÂÙÌÁ
+ÓÁÍ
+ÞÔÏÂ
+ÂÅÚ
+ÂÕÄÔÏ
+ÞÅÇÏ
+ÒÁÚ
+ÔÏÖÅ
+ÓÅÂÅ
+ÐÏÄ
+ÂÕÄÅÔ
+Ö
+ÔÏÇÄÁ
+ËÔÏ
+ÜÔÏÔ
+ÔÏÇÏ
+ÐÏÔÏÍÕ
+ÜÔÏÇÏ
+ËÁËÏÊ
+ÓÏ×ÓÅÍ
+ÎÉÍ
+ÚÄÅÓØ
+ÜÔÏÍ
+ÏÄÉÎ
+ÐÏÞÔÉ
+ÍÏÊ
+ÔÅÍ
+ÞÔÏÂÙ
+ÎÅÅ
+ÓÅÊÞÁÓ
+ÂÙÌÉ
+ËÕÄÁ
+ÚÁÞÅÍ
+×ÓÅÈ
+ÎÉËÏÇÄÁ
+ÍÏÖÎÏ
+ÐÒÉ
+ÎÁËÏÎÅÃ
+Ä×Á
+ÏÂ
+ÄÒÕÇÏÊ
+ÈÏÔØ
+ÐÏÓÌÅ
+ÎÁÄ
+ÂÏÌØÛÅ
+ÔÏÔ
+ÞÅÒÅÚ
+ÜÔÉ
+ÎÁÓ
+ÐÒÏ
+×ÓÅÇÏ
+ÎÉÈ
+ËÁËÁÑ
+ÍÎÏÇÏ
+ÒÁÚ×Å
+ÔÒÉ
+ÜÔÕ
+ÍÏÑ
+×ÐÒÏÞÅÍ
+ÈÏÒÏÛÏ
+Ó×ÏÀ
+ÜÔÏÊ
+ÐÅÒÅÄ
+ÉÎÏÇÄÁ
+ÌÕÞÛÅ
+ÞÕÔØ
+ÔÏÍ
+ÎÅÌØÚÑ
+ÔÁËÏÊ
+ÉÍ
+ÂÏÌÅÅ
+×ÓÅÇÄÁ
+ËÏÎÅÞÎÏ
+×ÓÀ
+ÍÅÖÄÕ


diff --git a/contrib/tsearch2/ts_cfg.c b/contrib/tsearch2/ts_cfg.c

new file mode 100644 (file)

index 0000000..7c9f20c


--- /dev/null
+++ b/contrib/tsearch2/ts_cfg.c
@@ -0,0 +1,509 @@
+/* 
+ * interface functions to tscfg 
+ * Teodor Sigaev 
+ */
+#include 
+#include 
+#include 
+#include 
+#include 
+
+#include "postgres.h"
+#include "fmgr.h"
+#include "utils/array.h"
+#include "catalog/pg_type.h"
+#include "executor/spi.h"
+
+#include "ts_cfg.h"
+#include "dict.h"
+#include "wparser.h"
+#include "snmap.h"
+#include "common.h"
+#include "tsvector.h"
+
+/*********top interface**********/
+
+static void *plan_getcfg_bylocale=NULL;
+static void *plan_getcfg=NULL;
+static void *plan_getmap=NULL;
+static void *plan_name2id=NULL;
+static Oid current_cfg_id=0;
+
+void
+init_cfg(Oid id, TSCfgInfo *cfg) {
+   Oid arg[2]={ OIDOID, OIDOID };
+   bool isnull;
+   Datum pars[2]={ ObjectIdGetDatum(id), ObjectIdGetDatum(id) } ;
+   int stat,i,j;
+   text *ptr;
+   text *prsname=NULL;
+   MemoryContext   oldcontext;
+
+   memset(cfg,0,sizeof(TSCfgInfo));
+   SPI_connect();
+   if ( !plan_getcfg ) {
+       plan_getcfg = SPI_saveplan( SPI_prepare( "select prs_name from pg_ts_cfg where oid = $1" , 1, arg ) );
+       if ( !plan_getcfg ) 
+           ts_error(ERROR, "SPI_prepare() failed");
+   }
+
+   stat = SPI_execp(plan_getcfg, pars, " ", 1);
+   if ( stat < 0 )
+       ts_error (ERROR, "SPI_execp return %d", stat);
+   if ( SPI_processed > 0 ) {
+       prsname = (text*) DatumGetPointer( 
+           SPI_getbinval(SPI_tuptable->vals[0], SPI_tuptable->tupdesc, 1, &isnull) 
+       );
+       oldcontext = MemoryContextSwitchTo(TopMemoryContext);
+       prsname = ptextdup( prsname );
+       MemoryContextSwitchTo(oldcontext);
+       
+       cfg->id=id;
+   } else 
+       ts_error(ERROR, "No tsearch cfg with id %d", id);
+
+   arg[0]=TEXTOID;
+   if ( !plan_getmap ) {
+       plan_getmap = SPI_saveplan( SPI_prepare( "select lt.tokid, pg_ts_cfgmap.dict_name from pg_ts_cfgmap, pg_ts_cfg, token_type( $1 ) as lt where lt.alias = pg_ts_cfgmap.tok_alias and pg_ts_cfgmap.ts_name = pg_ts_cfg.ts_name and pg_ts_cfg.oid= $2 order by lt.tokid desc;" , 2, arg ) );
+       if ( !plan_getmap )
+           ts_error(ERROR, "SPI_prepare() failed");
+   }
+
+   pars[0]=PointerGetDatum( prsname );
+   stat = SPI_execp(plan_getmap, pars, " ", 0);
+   if ( stat < 0 )
+       ts_error (ERROR, "SPI_execp return %d", stat);
+   if ( SPI_processed <= 0 )
+       ts_error(ERROR, "No parser with id %d", id);
+
+   for(i=0;i
+       int lexid = DatumGetInt32(SPI_getbinval(SPI_tuptable->vals[i], SPI_tuptable->tupdesc, 1, &isnull));
+       ArrayType *toasted_a = (ArrayType*)PointerGetDatum(SPI_getbinval(SPI_tuptable->vals[i], SPI_tuptable->tupdesc, 2, &isnull));
+       ArrayType *a;
+
+       if ( !cfg->map ) {
+           cfg->len=lexid+1;
+           cfg->map = (ListDictionary*)malloc( sizeof(ListDictionary)*cfg->len );
+           if ( !cfg->map )
+               ts_error(ERROR,"No memory");
+           memset( cfg->map, 0, sizeof(ListDictionary)*cfg->len );
+       }
+
+       if (isnull)
+           continue;
+
+       a=(ArrayType*)PointerGetDatum( PG_DETOAST_DATUM( DatumGetPointer(toasted_a) ) );
+       
+       if ( ARR_NDIM(a) != 1 )
+           ts_error(ERROR,"Wrong dimension");
+       if ( ARRNELEMS(a) < 1 )
+           continue;
+
+       cfg->map[lexid].len=ARRNELEMS(a);
+       cfg->map[lexid].dict_id=(Datum*)malloc( sizeof(Datum)*cfg->map[lexid].len );
+       memset(cfg->map[lexid].dict_id,0,sizeof(Datum)*cfg->map[lexid].len );
+       ptr=(text*)ARR_DATA_PTR(a);
+       oldcontext = MemoryContextSwitchTo(TopMemoryContext);
+       for(j=0;jmap[lexid].len;j++) {
+           cfg->map[lexid].dict_id[j] = PointerGetDatum(ptextdup(ptr));
+           ptr=NEXTVAL(ptr);
+       } 
+       MemoryContextSwitchTo(oldcontext);
+
+       if ( a != toasted_a ) 
+           pfree(a);
+   }
+   
+   SPI_finish();
+   cfg->prs_id = name2id_prs( prsname );
+   pfree(prsname);
+   for(i=0;ilen;i++) {
+       for(j=0;jmap[i].len;j++) {
+           ptr = (text*)DatumGetPointer( cfg->map[i].dict_id[j] );
+           cfg->map[i].dict_id[j] = ObjectIdGetDatum( name2id_dict(ptr) );
+           pfree(ptr);
+       }
+   }
+}
+
+typedef struct {
+   TSCfgInfo   *last_cfg;
+   int     len;
+   int     reallen;
+   TSCfgInfo   *list;
+   SNMap       name2id_map;
+} CFGList;
+
+static CFGList CList = {NULL,0,0,NULL,{0,0,NULL}};
+
+void
+reset_cfg(void) {
+        freeSNMap( &(CList.name2id_map) );
+        if ( CList.list ) {
+       int i,j;
+       for(i=0;i
+           if ( CList.list[i].map ) {
+               for(j=0;j
+                   if ( CList.list[i].map[j].dict_id )
+                       free(CList.list[i].map[j].dict_id);
+               free( CList.list[i].map );
+           }
+                free(CList.list);
+   }
+        memset(&CList,0,sizeof(CFGList));
+}
+
+static int
+comparecfg(const void *a, const void *b) {
+   return ((TSCfgInfo*)a)->id - ((TSCfgInfo*)b)->id;
+}
+
+TSCfgInfo *
+findcfg(Oid id) {
+   /* last used cfg */
+   if ( CList.last_cfg && CList.last_cfg->id==id )
+       return CList.last_cfg;
+
+   /* already used cfg */
+   if ( CList.len != 0 ) {
+       TSCfgInfo key;
+       key.id=id;
+       CList.last_cfg = bsearch(&key, CList.list, CList.len, sizeof(TSCfgInfo), comparecfg);
+       if ( CList.last_cfg != NULL )
+           return CList.last_cfg;
+   }
+
+   /* last chance */
+   if ( CList.len==CList.reallen ) {
+       TSCfgInfo *tmp;
+       int reallen = ( CList.reallen ) ? 2*CList.reallen : 16;
+       tmp=(TSCfgInfo*)realloc(CList.list,sizeof(TSCfgInfo)*reallen);
+       if ( !tmp ) 
+           ts_error(ERROR,"No memory");
+       CList.reallen=reallen;
+       CList.list=tmp;
+   }
+   CList.last_cfg=&(CList.list[CList.len]);
+   init_cfg(id, CList.last_cfg);
+   CList.len++;
+   qsort(CList.list, CList.len, sizeof(TSCfgInfo), comparecfg);
+   return findcfg(id); /* qsort changed order!! */;
+}
+
+
+Oid
+name2id_cfg(text *name) {
+   Oid arg[1]={ TEXTOID };
+   bool isnull;
+   Datum pars[1]={ PointerGetDatum(name) };
+   int stat;
+   Oid id=findSNMap_t( &(CList.name2id_map), name );
+   
+   if ( id ) 
+       return id;
+   
+   SPI_connect();
+   if ( !plan_name2id ) {
+       plan_name2id = SPI_saveplan( SPI_prepare( "select oid from pg_ts_cfg where ts_name = $1" , 1, arg ) );
+       if ( !plan_name2id ) 
+           elog(ERROR, "SPI_prepare() failed");
+   }
+
+   stat = SPI_execp(plan_name2id, pars, " ", 1);
+   if ( stat < 0 )
+       elog (ERROR, "SPI_execp return %d", stat);
+   if ( SPI_processed > 0 ) {
+       id=DatumGetObjectId( SPI_getbinval(SPI_tuptable->vals[0], SPI_tuptable->tupdesc, 1, &isnull) );
+       if ( isnull ) 
+           elog(ERROR, "Null id for tsearch config");
+   } else 
+       elog(ERROR, "No tsearch config");
+   SPI_finish();
+   addSNMap_t( &(CList.name2id_map), name, id );
+   return id;
+}
+
+
+void 
+parsetext_v2(TSCfgInfo *cfg, PRSTEXT * prs, char *buf, int4 buflen) {
+   int type, lenlemm, i;
+   char    *lemm=NULL;
+   WParserInfo *prsobj = findprs(cfg->prs_id);
+
+   prsobj->prs=(void*)DatumGetPointer(
+       FunctionCall2(
+           &(prsobj->start_info),
+           PointerGetDatum(buf),
+           Int32GetDatum(buflen)
+       )
+   );
+
+   while( ( type=DatumGetInt32(FunctionCall3(
+           &(prsobj->getlexeme_info),
+           PointerGetDatum(prsobj->prs),
+           PointerGetDatum(&lemm),
+           PointerGetDatum(&lenlemm))) ) != 0 ) {
+
+       if ( lenlemm >= MAXSTRLEN )
+           elog(ERROR, "Word is too long");
+
+
+       if ( type >= cfg->len ) /* skip this type of lexem */
+           continue; 
+
+       for(i=0;imap[type].len;i++) {
+           DictInfo    *dict=finddict( DatumGetObjectId(cfg->map[type].dict_id[i]) );
+           char    **norms, **ptr;
+   
+           norms = ptr = (char**)DatumGetPointer(
+               FunctionCall3(
+                   &(dict->lexize_info),
+                   PointerGetDatum(dict->dictionary),
+                   PointerGetDatum(lemm),
+                   PointerGetDatum(lenlemm)
+               )
+           );
+           if ( !norms ) /* dictionary doesn't know this lexem */
+               continue;
+
+           prs->pos++; /*set pos*/
+
+           while( *ptr ) {
+               if (prs->curwords == prs->lenwords) {
+                   prs->lenwords *= 2;
+                   prs->words = (WORD *) repalloc((void *) prs->words, prs->lenwords * sizeof(WORD));
+               }
+
+               prs->words[prs->curwords].len = strlen(*ptr);
+               prs->words[prs->curwords].word = *ptr;
+               prs->words[prs->curwords].alen = 0;
+               prs->words[prs->curwords].pos.pos = LIMITPOS(prs->pos);
+               ptr++;
+               prs->curwords++;
+           }
+           pfree(norms);
+           break; /* lexem already normalized or is stop word*/
+       }
+   }
+
+   FunctionCall1(
+       &(prsobj->end_info),
+       PointerGetDatum(prsobj->prs)
+   );
+}
+
+static void
+hladdword(HLPRSTEXT * prs, char *buf, int4 buflen, int type) {
+   while (prs->curwords >= prs->lenwords) {
+       prs->lenwords *= 2;
+       prs->words = (HLWORD *) repalloc((void *) prs->words, prs->lenwords * sizeof(HLWORD));
+   }
+   memset( &(prs->words[prs->curwords]), 0, sizeof(HLWORD) ); 
+   prs->words[prs->curwords].type = (uint8)type;
+   prs->words[prs->curwords].len = buflen; 
+   prs->words[prs->curwords].word = palloc(buflen);
+   memcpy(prs->words[prs->curwords].word, buf, buflen);
+   prs->curwords++;    
+}
+
+static void
+hlfinditem(HLPRSTEXT * prs, QUERYTYPE *query, char *buf, int buflen ) {
+   int i;
+   ITEM    *item=GETQUERY(query);
+   HLWORD  *word=&( prs->words[prs->curwords-1] );
+
+   while (prs->curwords + query->size >= prs->lenwords) {
+       prs->lenwords *= 2;
+       prs->words = (HLWORD *) repalloc((void *) prs->words, prs->lenwords * sizeof(HLWORD));
+   }
+
+   for(i=0; isize; i++) { 
+       if ( item->type == VAL && item->length == buflen && strncmp( GETOPERAND(query) + item->distance, buf, buflen )==0 ) {
+           if ( word->item ) {
+               memcpy( &(prs->words[prs->curwords]), word, sizeof(HLWORD) );
+               prs->words[prs->curwords].item=item;
+               prs->words[prs->curwords].repeated=1;
+               prs->curwords++;
+           } else 
+               word->item=item;    
+       }
+       item++;
+   }
+}
+
+void 
+hlparsetext(TSCfgInfo *cfg, HLPRSTEXT * prs, QUERYTYPE *query, char *buf, int4 buflen) {
+   int type, lenlemm, i;
+   char    *lemm=NULL;
+   WParserInfo *prsobj = findprs(cfg->prs_id);
+
+   prsobj->prs=(void*)DatumGetPointer(
+       FunctionCall2(
+           &(prsobj->start_info),
+           PointerGetDatum(buf),
+           Int32GetDatum(buflen)
+       )
+   );
+
+   while( ( type=DatumGetInt32(FunctionCall3(
+           &(prsobj->getlexeme_info),
+           PointerGetDatum(prsobj->prs),
+           PointerGetDatum(&lemm),
+           PointerGetDatum(&lenlemm))) ) != 0 ) {
+
+       if ( lenlemm >= MAXSTRLEN )
+           elog(ERROR, "Word is too long");
+
+       hladdword(prs,lemm,lenlemm,type);
+
+       if ( type >= cfg->len ) 
+           continue; 
+
+       for(i=0;imap[type].len;i++) {
+           DictInfo    *dict=finddict( DatumGetObjectId(cfg->map[type].dict_id[i]) );
+           char    **norms, **ptr;
+   
+           norms = ptr = (char**)DatumGetPointer(
+               FunctionCall3(
+                   &(dict->lexize_info),
+                   PointerGetDatum(dict->dictionary),
+                   PointerGetDatum(lemm),
+                   PointerGetDatum(lenlemm)
+               )
+           );
+           if ( !norms ) /* dictionary doesn't know this lexem */
+               continue;
+
+           while( *ptr ) {
+               hlfinditem(prs,query,*ptr,strlen(*ptr));
+               pfree(*ptr);
+               ptr++;
+           }
+           pfree(norms);
+           break; /* lexem already normalized or is stop word*/
+       }
+   }
+
+   FunctionCall1(
+       &(prsobj->end_info),
+       PointerGetDatum(prsobj->prs)
+   );
+}
+
+text* 
+genhl(HLPRSTEXT * prs) {
+   text *out;
+   int len=128;
+   char *ptr;
+   HLWORD  *wrd=prs->words;
+
+   out = (text*)palloc( len );
+   ptr=((char*)out) + VARHDRSZ;
+
+   while( wrd - prs->words < prs->curwords ) {
+       while (  wrd->len + prs->stopsellen + prs->startsellen + (ptr - ((char*)out)) >= len ) {
+           int dist = ptr - ((char*)out);
+           len*= 2;
+           out = (text *) repalloc(out, len);
+           ptr=((char*)out) + dist;
+       }
+
+       if ( wrd->in && !wrd->skip && !wrd->repeated ) {
+           if ( wrd->replace ) {
+               *ptr=' ';
+               ptr++;
+           } else {
+               if (wrd->selected) {
+                   memcpy(ptr,prs->startsel,prs->startsellen);
+                   ptr+=prs->startsellen;
+               }
+               memcpy(ptr,wrd->word,wrd->len);
+               ptr+=wrd->len;
+               if (wrd->selected) {
+                   memcpy(ptr,prs->stopsel,prs->stopsellen);
+                   ptr+=prs->stopsellen;
+               }
+           }
+       }
+
+       if ( !wrd->repeated )
+           pfree(wrd->word);
+
+       wrd++;
+   }
+
+   VARATT_SIZEP(out)=ptr - ((char*)out);
+   return out; 
+}
+
+int  
+get_currcfg(void) {
+   Oid arg[1]={ TEXTOID };
+   const char *curlocale;
+   Datum pars[1];
+   bool isnull;
+   int stat;
+
+   if ( current_cfg_id > 0 )
+       return current_cfg_id;
+
+   SPI_connect();
+   if ( !plan_getcfg_bylocale ) {
+       plan_getcfg_bylocale=SPI_saveplan( SPI_prepare( "select oid from pg_ts_cfg where locale = $1 ", 1, arg ) );
+       if ( !plan_getcfg_bylocale )
+           elog(ERROR, "SPI_prepare() failed");
+   }
+
+   curlocale = setlocale(LC_CTYPE, NULL);
+   pars[0] = PointerGetDatum( char2text((char*)curlocale) );
+   stat = SPI_execp(plan_getcfg_bylocale, pars, " ", 1);
+
+   if ( stat < 0 )
+       elog (ERROR, "SPI_execp return %d", stat);
+   if ( SPI_processed > 0 )
+       current_cfg_id = DatumGetObjectId( SPI_getbinval(SPI_tuptable->vals[0], SPI_tuptable->tupdesc, 1, &isnull) );
+   else 
+       elog(ERROR,"Can't find tsearch config by locale");
+
+   pfree(DatumGetPointer(pars[0]));
+   SPI_finish();
+   return current_cfg_id;
+}
+
+PG_FUNCTION_INFO_V1(set_curcfg);
+Datum set_curcfg(PG_FUNCTION_ARGS);
+Datum
+set_curcfg(PG_FUNCTION_ARGS) {
+        findcfg(PG_GETARG_OID(0));
+        current_cfg_id=PG_GETARG_OID(0);
+        PG_RETURN_VOID();
+}
+                
+PG_FUNCTION_INFO_V1(set_curcfg_byname);
+Datum set_curcfg_byname(PG_FUNCTION_ARGS);
+Datum
+set_curcfg_byname(PG_FUNCTION_ARGS) {
+        text *name=PG_GETARG_TEXT_P(0);
+   
+        DirectFunctionCall1(
+                set_curcfg,
+                ObjectIdGetDatum( name2id_cfg(name) )
+        );
+        PG_FREE_IF_COPY(name, 0);
+        PG_RETURN_VOID();      
+}       
+
+PG_FUNCTION_INFO_V1(show_curcfg);
+Datum show_curcfg(PG_FUNCTION_ARGS);
+Datum
+show_curcfg(PG_FUNCTION_ARGS) {
+   PG_RETURN_OID( get_currcfg() ); 
+}
+
+PG_FUNCTION_INFO_V1(reset_tsearch);
+Datum reset_tsearch(PG_FUNCTION_ARGS);
+Datum
+reset_tsearch(PG_FUNCTION_ARGS) {
+   ts_error(NOTICE,"TSearch cache cleaned");
+   PG_RETURN_VOID(); 
+}


diff --git a/contrib/tsearch2/ts_cfg.h b/contrib/tsearch2/ts_cfg.h

new file mode 100644 (file)

index 0000000..01006c1


--- /dev/null
+++ b/contrib/tsearch2/ts_cfg.h
@@ -0,0 +1,68 @@
+#ifndef __TS_CFG_H__
+#define __TS_CFG_H__
+#include "postgres.h"
+#include "query.h"
+
+typedef struct {
+   int len;
+   Datum   *dict_id;
+} ListDictionary;
+
+typedef struct {
+   Oid id;
+   Oid prs_id;
+   int len;
+   ListDictionary  *map;   
+}  TSCfgInfo;
+
+Oid name2id_cfg(text *name);
+TSCfgInfo * findcfg(Oid id);
+void init_cfg(Oid id, TSCfgInfo *cfg);
+void reset_cfg(void);
+
+typedef struct {
+        uint16          len;
+   union {
+       uint16      pos;
+       uint16      *apos;
+   } pos;
+        char       *word;
+   uint32  alen;
+}       WORD;
+   
+typedef struct {
+        WORD       *words;
+        int4            lenwords;
+        int4            curwords;
+   int4        pos;
+}       PRSTEXT;
+
+typedef struct {
+        uint16    len;
+   uint8    selected:1,
+         in:1,
+         skip:1,
+         replace:1,
+         repeated:1;
+   uint8   type;
+        char      *word;
+   ITEM      *item;
+}       HLWORD;
+   
+typedef struct {
+        HLWORD       *words;
+        int4            lenwords;
+        int4            curwords;
+        char           *startsel;
+        char            *stopsel;
+        int2            startsellen;
+        int2            stopsellen;
+}       HLPRSTEXT;
+
+void hlparsetext(TSCfgInfo *cfg, HLPRSTEXT * prs, QUERYTYPE *query, char *buf, int4 buflen);
+text* genhl(HLPRSTEXT * prs);
+
+void parsetext_v2(TSCfgInfo *cfg, PRSTEXT * prs, char *buf, int4 buflen);
+int  get_currcfg(void);
+
+#endif


diff --git a/contrib/tsearch2/ts_stat.c b/contrib/tsearch2/ts_stat.c

new file mode 100644 (file)

index 0000000..9099981


--- /dev/null
+++ b/contrib/tsearch2/ts_stat.c
@@ -0,0 +1,412 @@
+/*
+ * stat functions
+ */
+
+#include "tsvector.h"
+#include "ts_stat.h"
+#include "funcapi.h"
+#include "catalog/pg_type.h"
+#include "executor/spi.h"
+#include "common.h"
+
+PG_FUNCTION_INFO_V1(tsstat_in);
+Datum           tsstat_in(PG_FUNCTION_ARGS);
+Datum           
+tsstat_in(PG_FUNCTION_ARGS) {
+   tsstat *stat=palloc(STATHDRSIZE);
+   stat->len=STATHDRSIZE;
+   stat->size=0;
+   PG_RETURN_POINTER(stat);
+}
+
+PG_FUNCTION_INFO_V1(tsstat_out);
+Datum           tsstat_out(PG_FUNCTION_ARGS);
+Datum           
+tsstat_out(PG_FUNCTION_ARGS) {
+   elog(ERROR,"Unimplemented");
+   PG_RETURN_NULL();
+}
+
+static WordEntry**
+SEI_realloc( WordEntry** in, uint32 *len ) {
+   if ( *len==0 || in==NULL ) {
+       *len=8;
+       in=palloc( sizeof(WordEntry*)* (*len) );
+   } else {
+       *len *= 2;
+       in=repalloc( in, sizeof(WordEntry*)* (*len) );
+   }
+   return in;
+}
+
+static int
+compareStatWord(StatEntry *a, WordEntry *b, tsstat *stat, tsvector *txt) {
+   if ( a->len == b->len ) 
+       return strncmp(
+           STATSTRPTR(stat) + a->pos,
+           STRPTR(txt) + b->pos,
+           a->len
+       );
+   return ( a->len > b->len ) ? 1 : -1;
+}
+
+static tsstat*
+formstat(tsstat *stat, tsvector *txt, WordEntry** entry, uint32 len) {
+   tsstat  *newstat;
+   uint32 totallen, nentry;
+   uint32  slen=0;
+   WordEntry   **ptr=entry;
+   char    *curptr;
+   StatEntry   *sptr,*nptr;
+
+   while(ptr-entry
+       slen += (*ptr)->len;
+       ptr++;
+   }
+
+   nentry=stat->size + len;
+   slen+=STATSTRSIZE(stat);
+   totallen=CALCSTATSIZE(nentry,slen);
+   newstat=palloc(totallen);
+   newstat->len=totallen;
+   newstat->size=nentry;
+
+   memcpy(STATSTRPTR(newstat), STATSTRPTR(stat), STATSTRSIZE(stat));
+   curptr=STATSTRPTR(newstat) + STATSTRSIZE(stat);
+
+   ptr=entry;
+   sptr=STATPTR(stat);
+   nptr=STATPTR(newstat);
+
+   if ( len == 1 ) {
+       StatEntry *StopLow = STATPTR(stat);
+       StatEntry *StopHigh = (StatEntry*)STATSTRPTR(stat);
+
+       while (StopLow < StopHigh) {
+           sptr=StopLow + (StopHigh - StopLow) / 2;
+           if ( compareStatWord(sptr,*ptr,stat,txt) < 0 )
+               StopLow = sptr + 1;
+           else
+               StopHigh = sptr; 
+       }
+       nptr =STATPTR(newstat) + (StopLow-STATPTR(stat));
+       memcpy( STATPTR(newstat), STATPTR(stat), sizeof(StatEntry) * (StopLow-STATPTR(stat)) );
+       nptr->nentry=POSDATALEN(txt,*ptr);
+       if ( nptr->nentry==0 )
+               nptr->nentry=1; 
+       nptr->ndoc=1;
+       nptr->len=(*ptr)->len;
+       memcpy(curptr, STRPTR(txt) + (*ptr)->pos, nptr->len);
+       nptr->pos = curptr - STATSTRPTR(newstat);
+       memcpy( nptr+1, StopLow, sizeof(StatEntry) * ( ((StatEntry*)STATSTRPTR(stat))-StopLow ) );
+   } else {
+       while( sptr-STATPTR(stat) < stat->size && ptr-entry
+           if ( compareStatWord(sptr,*ptr,stat,txt) < 0 ) {
+               memcpy(nptr, sptr, sizeof(StatEntry));
+               sptr++;
+           } else {
+               nptr->nentry=POSDATALEN(txt,*ptr);
+               if ( nptr->nentry==0 )
+                   nptr->nentry=1; 
+               nptr->ndoc=1;
+               nptr->len=(*ptr)->len;
+               memcpy(curptr, STRPTR(txt) + (*ptr)->pos, nptr->len);
+               nptr->pos = curptr - STATSTRPTR(newstat);
+               curptr += nptr->len;
+               ptr++;
+           }
+           nptr++;
+       }
+
+       memcpy( nptr, sptr, sizeof(StatEntry)*( stat->size - (sptr-STATPTR(stat)) ) ); 
+       
+       while(ptr-entry
+           nptr->nentry=POSDATALEN(txt,*ptr);
+           if ( nptr->nentry==0 )
+               nptr->nentry=1; 
+           nptr->ndoc=1;
+           nptr->len=(*ptr)->len;
+           memcpy(curptr, STRPTR(txt) + (*ptr)->pos, nptr->len);
+           nptr->pos = curptr - STATSTRPTR(newstat);
+           curptr += nptr->len;
+           ptr++; nptr++;
+       }
+   }
+
+   return newstat;
+} 
+
+PG_FUNCTION_INFO_V1(ts_accum);
+Datum           ts_accum(PG_FUNCTION_ARGS);
+Datum 
+ts_accum(PG_FUNCTION_ARGS) {
+   tsstat *newstat,*stat= (tsstat*)PG_GETARG_POINTER(0);
+   tsvector  *txt = (tsvector *) PG_DETOAST_DATUM(PG_GETARG_DATUM(1));
+   WordEntry   **newentry=NULL;
+   uint32  len=0, cur=0;
+   StatEntry   *sptr;
+   WordEntry   *wptr;
+
+   if ( stat==NULL || PG_ARGISNULL(0) ) { /* Init in first */ 
+       stat=palloc(STATHDRSIZE);
+       stat->len=STATHDRSIZE;
+       stat->size=0;
+   }
+
+   /* simple check of correctness */
+   if ( txt==NULL || PG_ARGISNULL(1) || txt->size==0 ) {
+       PG_FREE_IF_COPY(txt,1); 
+       PG_RETURN_POINTER(stat);
+   }
+
+   sptr=STATPTR(stat);
+   wptr=ARRPTR(txt);
+
+   if ( stat->size < 100*txt->size ) { /* merge */
+       while( sptr-STATPTR(stat) < stat->size && wptr-ARRPTR(txt) < txt->size ) {
+           int cmp = compareStatWord(sptr,wptr,stat,txt);
+           if ( cmp<0 ) {
+               sptr++;
+           } else if ( cmp==0 ) {
+               int n=POSDATALEN(txt,wptr);
+   
+               if (n==0) n=1;
+               sptr->ndoc++;
+               sptr->nentry +=n ;
+               sptr++; wptr++;
+           } else {
+               if ( cur==len )
+                   newentry=SEI_realloc(newentry, &len);
+               newentry[cur]=wptr;
+               wptr++; cur++;
+           }
+       }
+
+       while( wptr-ARRPTR(txt) < txt->size ) {
+           if ( cur==len )
+               newentry=SEI_realloc(newentry, &len);
+           newentry[cur]=wptr;
+           wptr++; cur++;
+       }
+   } else { /* search */
+       while( wptr-ARRPTR(txt) < txt->size ) {
+           StatEntry *StopLow = STATPTR(stat);
+           StatEntry *StopHigh = (StatEntry*)STATSTRPTR(stat);
+           int cmp;
+
+           while (StopLow < StopHigh) {
+               sptr=StopLow + (StopHigh - StopLow) / 2;
+               cmp =  compareStatWord(sptr,wptr,stat,txt);
+               if (cmp==0) {
+                   int n=POSDATALEN(txt,wptr);
+                   if (n==0) n=1;
+                   sptr->ndoc++;
+                   sptr->nentry +=n ;
+                   break;
+               } else if ( cmp < 0 )
+                   StopLow = sptr + 1;
+               else
+                   StopHigh = sptr; 
+           }
+       
+           if ( StopLow >= StopHigh ) { /* not found */
+               if ( cur==len )
+                   newentry=SEI_realloc(newentry, &len);
+               newentry[cur]=wptr;
+               cur++;
+           }
+           wptr++;
+       }   
+   }
+
+   
+   if ( cur==0 ) { /* no new words */ 
+       PG_FREE_IF_COPY(txt,1);
+       PG_RETURN_POINTER(stat);
+   }
+
+   newstat = formstat(stat, txt, newentry, cur);
+   pfree(newentry);
+   PG_FREE_IF_COPY(txt,1);
+   /* pfree(stat); */
+
+   PG_RETURN_POINTER(newstat);
+}
+
+typedef struct {
+   uint32  cur;
+   tsvector *stat;
+} StatStorage;
+
+static void
+ts_setup_firstcall(FuncCallContext  *funcctx, tsstat *stat) {
+   TupleDesc            tupdesc;
+   MemoryContext     oldcontext;
+   StatStorage     *st;
+   
+   oldcontext = MemoryContextSwitchTo(funcctx->multi_call_memory_ctx);
+   st=palloc( sizeof(StatStorage) );
+   st->cur=0;
+   st->stat=palloc( stat->len );
+   memcpy(st->stat, stat, stat->len);
+   funcctx->user_fctx = (void*)st;
+   tupdesc = RelationNameGetTupleDesc("statinfo");
+   funcctx->slot = TupleDescGetSlot(tupdesc);
+   funcctx->attinmeta = TupleDescGetAttInMetadata(tupdesc);
+   MemoryContextSwitchTo(oldcontext);
+}
+
+
+static Datum
+ts_process_call(FuncCallContext  *funcctx) {
+   StatStorage     *st;
+   st=(StatStorage*)funcctx->user_fctx;
+
+   if ( st->cur < st->stat->size ) {
+       Datum result;
+       char* values[3];
+       char    ndoc[16];
+       char    nentry[16];
+       StatEntry *entry=STATPTR(st->stat) + st->cur;
+       HeapTuple    tuple;
+
+       values[1]=ndoc;
+       sprintf(ndoc,"%d",entry->ndoc);
+       values[2]=nentry;
+       sprintf(nentry,"%d",entry->nentry);
+       values[0]=palloc( entry->len+1 );
+       memcpy( values[0], STATSTRPTR(st->stat)+entry->pos, entry->len);
+       (values[0])[entry->len]='\0';
+
+       tuple = BuildTupleFromCStrings(funcctx->attinmeta, values);
+       result = TupleGetDatum(funcctx->slot, tuple);
+
+       pfree(values[0]);
+       st->cur++;
+       return result;  
+   } else {
+       pfree(st->stat);
+       pfree(st);
+   }
+   
+   return (Datum)0;
+}
+
+PG_FUNCTION_INFO_V1(ts_accum_finish);
+Datum           ts_accum_finish(PG_FUNCTION_ARGS);
+Datum 
+ts_accum_finish(PG_FUNCTION_ARGS) {
+   FuncCallContext  *funcctx;
+   Datum result;
+
+   if (SRF_IS_FIRSTCALL()) {
+       funcctx = SRF_FIRSTCALL_INIT();
+       ts_setup_firstcall(funcctx, (tsstat*)PG_GETARG_POINTER(0) );
+   }
+
+   funcctx = SRF_PERCALL_SETUP();
+   if (  (result=ts_process_call(funcctx)) != (Datum)0 )
+       SRF_RETURN_NEXT(funcctx, result);
+   SRF_RETURN_DONE(funcctx);
+}
+
+static Oid tiOid=InvalidOid;
+static void 
+get_ti_Oid(void) {
+   int ret;
+   bool isnull; 
+
+   if ( (ret = SPI_exec("select oid from pg_type where typname='tsvector'",1)) < 0 )   
+       elog(ERROR, "SPI_exec to get tsvector oid returns %d", ret);
+
+   if ( SPI_processed<0 )
+       elog(ERROR, "There is no tsvector type");
+   tiOid = DatumGetObjectId( SPI_getbinval(SPI_tuptable->vals[0], SPI_tuptable->tupdesc, 1, &isnull) );
+   if ( tiOid==InvalidOid )
+       elog(ERROR, "tsvector type has InvalidOid");
+}
+
+static tsstat*
+ts_stat_sql(text *txt) {
+   char *query=text2char(txt);
+   int i;
+   tsstat *newstat,*stat;
+   bool isnull;
+   Portal portal;
+   void    *plan;
+
+   if ( tiOid==InvalidOid ) 
+       get_ti_Oid();
+
+   if ( (plan = SPI_prepare(query,0,NULL))==NULL )
+       elog(ERROR, "SPI_prepare('%s') returns NULL",query);
+
+   if ( (portal = SPI_cursor_open(NULL, plan, NULL, NULL)) == NULL )
+       elog(ERROR, "SPI_cursor_open('%s') returns NULL",query);
+
+   SPI_cursor_fetch(portal, true, 100);
+
+   if ( SPI_tuptable->tupdesc->natts != 1 )
+       elog(ERROR, "Number of fields doesn't equal to 1");
+
+   if ( SPI_gettypeid(SPI_tuptable->tupdesc, 1) != tiOid )
+       elog(ERROR, "Column isn't of tsvector type");
+
+   stat=palloc(STATHDRSIZE);
+   stat->len=STATHDRSIZE;
+   stat->size=0;
+
+   while(SPI_processed>0) {
+       for(i=0;i
+           Datum data=SPI_getbinval(SPI_tuptable->vals[i], SPI_tuptable->tupdesc, 1, &isnull);
+
+           if ( !isnull ) {
+               newstat = (tsstat*)DatumGetPointer(DirectFunctionCall2(
+                   ts_accum,
+                   PointerGetDatum(stat),
+                   data
+               ));
+               if ( stat!=newstat && stat )
+                   pfree(stat);
+               stat=newstat;
+           }
+       } 
+
+       SPI_freetuptable(SPI_tuptable);
+       SPI_cursor_fetch(portal, true, 100);        
+   }   
+
+   SPI_freetuptable(SPI_tuptable);
+   SPI_cursor_close(portal);
+   SPI_freeplan(plan);
+   pfree(query);
+
+   return stat;    
+}
+
+PG_FUNCTION_INFO_V1(ts_stat);
+Datum           ts_stat(PG_FUNCTION_ARGS);
+Datum 
+ts_stat(PG_FUNCTION_ARGS) {
+   FuncCallContext  *funcctx;
+   Datum result;
+
+   if (SRF_IS_FIRSTCALL()) {
+       tsstat *stat;
+       text    *txt=PG_GETARG_TEXT_P(0);
+   
+       funcctx = SRF_FIRSTCALL_INIT();
+       SPI_connect();
+       stat = ts_stat_sql(txt);
+       PG_FREE_IF_COPY(txt,0); 
+       ts_setup_firstcall(funcctx, stat );
+       SPI_finish();
+   }
+
+   funcctx = SRF_PERCALL_SETUP();
+   if (  (result=ts_process_call(funcctx)) != (Datum)0 )
+       SRF_RETURN_NEXT(funcctx, result);
+   SRF_RETURN_DONE(funcctx);
+}
+
+


diff --git a/contrib/tsearch2/ts_stat.h b/contrib/tsearch2/ts_stat.h

new file mode 100644 (file)

index 0000000..c32b17a


--- /dev/null
+++ b/contrib/tsearch2/ts_stat.h
@@ -0,0 +1,32 @@
+#ifndef __TXTIDX_STAT_H__
+#define __TXTIDX_STAT_H__
+
+#include "postgres.h"
+
+#include "access/gist.h"
+#include "access/itup.h"
+#include "utils/elog.h"
+#include "utils/palloc.h"
+#include "utils/builtins.h"
+#include "storage/bufpage.h"
+
+typedef struct {
+   uint32  len;
+   uint32  pos;
+   uint32  ndoc;   
+   uint32  nentry; 
+}  StatEntry;
+
+typedef struct {
+   int4        len;
+   int4        size;
+   char        data[1];
+}  tsstat;
+
+#define STATHDRSIZE (sizeof(int4)*2)
+#define CALCSTATSIZE(x, lenstr) ( x * sizeof(StatEntry) + STATHDRSIZE + lenstr )
+#define STATPTR(x) ( (StatEntry*) ( (char*)x + STATHDRSIZE ) )
+#define STATSTRPTR(x)  ( (char*)x + STATHDRSIZE + ( sizeof(StatEntry) * ((tsvector*)x)->size ) )
+#define STATSTRSIZE(x) ( ((tsvector*)x)->len - STATHDRSIZE - ( sizeof(StatEntry) * ((tsvector*)x)->size ) )
+
+#endif


diff --git a/contrib/tsearch2/tsearch.sql._in b/contrib/tsearch2/tsearch.sql._in

new file mode 100644 (file)

index 0000000..91ffbc8


--- /dev/null
+++ b/contrib/tsearch2/tsearch.sql._in
@@ -0,0 +1,674 @@
+-- Adjust this setting to control where the objects get CREATEd.
+SET search_path = public;
+
+BEGIN;
+
+--dict conf
+CREATE TABLE pg_ts_dict (
+   dict_name   text not null primary key,
+   dict_init   oid,
+   dict_initoption text,
+   dict_lexize oid not null,
+   dict_comment    text
+) with oids;
+
+--dict interface
+CREATE FUNCTION lexize(oid, text) 
+   returns _text
+   as 'MODULE_PATHNAME'
+   language 'C'
+   with (isstrict);
+
+CREATE FUNCTION lexize(text, text)
+        returns _text
+        as 'MODULE_PATHNAME', 'lexize_byname'
+        language 'C'
+        with (isstrict);
+
+CREATE FUNCTION lexize(text)
+        returns _text
+        as 'MODULE_PATHNAME', 'lexize_bycurrent'
+        language 'C'
+        with (isstrict);
+
+CREATE FUNCTION set_curdict(int)
+   returns void
+   as 'MODULE_PATHNAME'
+   language 'C'
+   with (isstrict);
+
+CREATE FUNCTION set_curdict(text)
+   returns void
+   as 'MODULE_PATHNAME', 'set_curdict_byname'
+   language 'C'
+   with (isstrict);
+
+--built-in dictionaries
+CREATE FUNCTION dex_init(text)
+   returns internal
+   as 'MODULE_PATHNAME' 
+   language 'C';
+
+CREATE FUNCTION dex_lexize(internal,internal,int4)
+   returns internal
+   as 'MODULE_PATHNAME'
+   language 'C'
+   with (isstrict);
+
+insert into pg_ts_dict select 
+   'simple', 
+   (select oid from pg_proc where proname='dex_init'),
+   null,
+   (select oid from pg_proc where proname='dex_lexize'),
+   'Simple example of dictionary.'
+;
+    
+CREATE FUNCTION snb_en_init(text)
+   returns internal
+   as 'MODULE_PATHNAME' 
+   language 'C';
+
+CREATE FUNCTION snb_lexize(internal,internal,int4)
+   returns internal
+   as 'MODULE_PATHNAME'
+   language 'C'
+   with (isstrict);
+
+insert into pg_ts_dict select 
+   'en_stem', 
+   (select oid from pg_proc where proname='snb_en_init'),
+   'DATA_PATH/english.stop',
+   (select oid from pg_proc where proname='snb_lexize'),
+   'English Stemmer. Snowball.'
+;
+
+CREATE FUNCTION snb_ru_init(text)
+   returns internal
+   as 'MODULE_PATHNAME' 
+   language 'C';
+
+insert into pg_ts_dict select 
+   'ru_stem', 
+   (select oid from pg_proc where proname='snb_ru_init'),
+   'DATA_PATH/russian.stop',
+   (select oid from pg_proc where proname='snb_lexize'),
+   'Russian Stemmer. Snowball.'
+;
+    
+CREATE FUNCTION spell_init(text)
+   returns internal
+   as 'MODULE_PATHNAME' 
+   language 'C';
+
+CREATE FUNCTION spell_lexize(internal,internal,int4)
+   returns internal
+   as 'MODULE_PATHNAME'
+   language 'C'
+   with (isstrict);
+
+insert into pg_ts_dict select 
+   'ispell_template', 
+   (select oid from pg_proc where proname='spell_init'),
+   null,
+   (select oid from pg_proc where proname='spell_lexize'),
+   'ISpell interface. Must have .dict and .aff files'
+;
+
+CREATE FUNCTION syn_init(text)
+   returns internal
+   as 'MODULE_PATHNAME' 
+   language 'C';
+
+CREATE FUNCTION syn_lexize(internal,internal,int4)
+   returns internal
+   as 'MODULE_PATHNAME'
+   language 'C'
+   with (isstrict);
+
+insert into pg_ts_dict select 
+   'synonym', 
+   (select oid from pg_proc where proname='syn_init'),
+   null,
+   (select oid from pg_proc where proname='syn_lexize'),
+   'Example of synonym dictionary'
+;
+
+--dict conf
+CREATE TABLE pg_ts_parser (
+   prs_name    text not null primary key,
+   prs_start   oid not null,
+   prs_nexttoken   oid not null,
+   prs_end     oid not null,
+   prs_headline    oid not null,
+   prs_lextype oid not null,
+   prs_comment text
+) with oids;
+
+--sql-level interface
+CREATE TYPE tokentype 
+   as (tokid int4, alias text, descr text); 
+
+CREATE FUNCTION token_type(int4)
+   returns setof tokentype
+   as 'MODULE_PATHNAME'
+   language 'C'
+   with (isstrict);
+
+CREATE FUNCTION token_type(text)
+   returns setof tokentype
+   as 'MODULE_PATHNAME', 'token_type_byname'
+   language 'C'
+   with (isstrict);
+
+CREATE FUNCTION token_type()
+   returns setof tokentype
+   as 'MODULE_PATHNAME', 'token_type_current'
+   language 'C'
+   with (isstrict);
+
+CREATE FUNCTION set_curprs(int)
+   returns void
+   as 'MODULE_PATHNAME'
+   language 'C'
+   with (isstrict);
+
+CREATE FUNCTION set_curprs(text)
+   returns void
+   as 'MODULE_PATHNAME', 'set_curprs_byname'
+   language 'C'
+   with (isstrict);
+
+CREATE TYPE tokenout 
+   as (tokid int4, token text);
+
+CREATE FUNCTION parse(oid,text)
+   returns setof tokenout
+   as 'MODULE_PATHNAME'
+   language 'C'
+   with (isstrict);
+ 
+CREATE FUNCTION parse(text,text)
+   returns setof tokenout
+   as 'MODULE_PATHNAME', 'parse_byname'
+   language 'C'
+   with (isstrict);
+ 
+CREATE FUNCTION parse(text)
+   returns setof tokenout
+   as 'MODULE_PATHNAME', 'parse_current'
+   language 'C'
+   with (isstrict);
+ 
+--default parser
+CREATE FUNCTION prsd_start(internal,int4)
+   returns internal
+   as 'MODULE_PATHNAME'
+   language 'C';
+
+CREATE FUNCTION prsd_getlexeme(internal,internal,internal)
+   returns int4
+   as 'MODULE_PATHNAME'
+   language 'C';
+
+CREATE FUNCTION prsd_end(internal)
+   returns void
+   as 'MODULE_PATHNAME'
+   language 'C';
+
+CREATE FUNCTION prsd_lextype(internal)
+   returns internal
+   as 'MODULE_PATHNAME'
+   language 'C';
+
+CREATE FUNCTION prsd_headline(internal,internal,internal)
+   returns internal
+   as 'MODULE_PATHNAME'
+   language 'C';
+
+insert into pg_ts_parser select
+   'default',
+   (select oid from pg_proc where proname='prsd_start'),   
+   (select oid from pg_proc where proname='prsd_getlexeme'),   
+   (select oid from pg_proc where proname='prsd_end'), 
+   (select oid from pg_proc where proname='prsd_headline'),
+   (select oid from pg_proc where proname='prsd_lextype'),
+   'Parser from OpenFTS v0.34'
+;  
+
+--tsearch config
+
+CREATE TABLE pg_ts_cfg (
+   ts_name     text not null primary key,
+   prs_name    text not null,
+   locale      text
+) with oids;
+
+CREATE TABLE pg_ts_cfgmap (
+   ts_name     text not null,
+   tok_alias   text not null,
+   dict_name   text[],
+   primary key (ts_name,tok_alias)
+) with oids;
+
+CREATE FUNCTION set_curcfg(int)
+   returns void
+   as 'MODULE_PATHNAME'
+   language 'C'
+   with (isstrict);
+
+CREATE FUNCTION set_curcfg(text)
+   returns void
+   as 'MODULE_PATHNAME', 'set_curcfg_byname'
+   language 'C'
+   with (isstrict);
+
+CREATE FUNCTION show_curcfg()
+   returns oid
+   as 'MODULE_PATHNAME'
+   language 'C'
+   with (isstrict);
+
+insert into pg_ts_cfg values ('default', 'default','C');
+insert into pg_ts_cfg values ('default_russian', 'default','ru_RU.KOI8-R');
+insert into pg_ts_cfg values ('simple', 'default');
+
+copy pg_ts_cfgmap from stdin;
+default    lword   {en_stem}
+default    nlword  {simple}
+default    word    {simple}
+default    email   {simple}
+default    url {simple}
+default    host    {simple}
+default    sfloat  {simple}
+default    version {simple}
+default    part_hword  {simple}
+default    nlpart_hword    {simple}
+default    lpart_hword {en_stem}
+default    hword   {simple}
+default    lhword  {en_stem}
+default    nlhword {simple}
+default    uri {simple}
+default    file    {simple}
+default    float   {simple}
+default    int {simple}
+default    uint    {simple}
+default_russian    lword   {en_stem}
+default_russian    nlword  {ru_stem}
+default_russian    word    {ru_stem}
+default_russian    email   {simple}
+default_russian    url {simple}
+default_russian    host    {simple}
+default_russian    sfloat  {simple}
+default_russian    version {simple}
+default_russian    part_hword  {simple}
+default_russian    nlpart_hword    {ru_stem}
+default_russian    lpart_hword {en_stem}
+default_russian    hword   {ru_stem}
+default_russian    lhword  {en_stem}
+default_russian    nlhword {ru_stem}
+default_russian    uri {simple}
+default_russian    file    {simple}
+default_russian    float   {simple}
+default_russian    int {simple}
+default_russian    uint    {simple}
+simple lword   {simple}
+simple nlword  {simple}
+simple word    {simple}
+simple email   {simple}
+simple url {simple}
+simple host    {simple}
+simple sfloat  {simple}
+simple version {simple}
+simple part_hword  {simple}
+simple nlpart_hword    {simple}
+simple lpart_hword {simple}
+simple hword   {simple}
+simple lhword  {simple}
+simple nlhword {simple}
+simple uri {simple}
+simple file    {simple}
+simple float   {simple}
+simple int {simple}
+simple uint    {simple}
+\.
+
+--tsvector type
+CREATE FUNCTION tsvector_in(cstring)
+RETURNS tsvector
+AS 'MODULE_PATHNAME'
+LANGUAGE 'C' with (isstrict);
+
+CREATE FUNCTION tsvector_out(tsvector)
+RETURNS cstring
+AS 'MODULE_PATHNAME'
+LANGUAGE 'C' with (isstrict);
+
+CREATE TYPE tsvector (
+        INTERNALLENGTH = -1,
+        INPUT = tsvector_in,
+        OUTPUT = tsvector_out,
+        STORAGE = extended
+);
+
+CREATE FUNCTION length(tsvector)
+RETURNS int4
+AS 'MODULE_PATHNAME', 'tsvector_length'
+LANGUAGE 'C' with (isstrict,iscachable);
+
+CREATE FUNCTION to_tsvector(oid, text)
+RETURNS tsvector
+AS 'MODULE_PATHNAME'
+LANGUAGE 'C' with (isstrict,iscachable);
+
+CREATE FUNCTION to_tsvector(text, text)
+RETURNS tsvector
+AS 'MODULE_PATHNAME', 'to_tsvector_name'
+LANGUAGE 'C' with (isstrict,iscachable);
+
+CREATE FUNCTION to_tsvector(text)
+RETURNS tsvector
+AS 'MODULE_PATHNAME', 'to_tsvector_current'
+LANGUAGE 'C' with (isstrict,iscachable);
+
+CREATE FUNCTION strip(tsvector)
+RETURNS tsvector
+AS 'MODULE_PATHNAME'
+LANGUAGE 'C' with (isstrict,iscachable);
+
+CREATE FUNCTION setweight(tsvector,"char")
+RETURNS tsvector
+AS 'MODULE_PATHNAME'
+LANGUAGE 'C' with (isstrict,iscachable);
+
+CREATE FUNCTION concat(tsvector,tsvector)
+RETURNS tsvector
+AS 'MODULE_PATHNAME'
+LANGUAGE 'C' with (isstrict,iscachable);
+
+CREATE OPERATOR || (
+        LEFTARG = tsvector,
+        RIGHTARG = tsvector,
+        PROCEDURE = concat
+);
+
+--query type
+CREATE FUNCTION tsquery_in(cstring)
+RETURNS tsquery
+AS 'MODULE_PATHNAME'
+LANGUAGE 'C' with (isstrict);
+
+CREATE FUNCTION tsquery_out(tsquery)
+RETURNS cstring
+AS 'MODULE_PATHNAME'
+LANGUAGE 'C' with (isstrict);
+
+CREATE TYPE tsquery (
+        INTERNALLENGTH = -1,
+        INPUT = tsquery_in,
+        OUTPUT = tsquery_out
+);
+
+CREATE FUNCTION querytree(tsquery)
+RETURNS text
+AS 'MODULE_PATHNAME', 'tsquerytree'
+LANGUAGE 'C' with (isstrict);
+
+CREATE FUNCTION to_tsquery(oid, text)
+RETURNS tsquery
+AS 'MODULE_PATHNAME'
+LANGUAGE 'c' with (isstrict,iscachable);
+
+CREATE FUNCTION to_tsquery(text, text)
+RETURNS tsquery
+AS 'MODULE_PATHNAME','to_tsquery_name'
+LANGUAGE 'c' with (isstrict,iscachable);
+
+CREATE FUNCTION to_tsquery(text)
+RETURNS tsquery
+AS 'MODULE_PATHNAME','to_tsquery_current'
+LANGUAGE 'c' with (isstrict,iscachable);
+
+--operations
+CREATE FUNCTION exectsq(tsvector, tsquery)
+RETURNS bool
+AS 'MODULE_PATHNAME'
+LANGUAGE 'C' with (isstrict, iscachable);
+  
+COMMENT ON FUNCTION exectsq(tsvector, tsquery) IS 'boolean operation with text index';
+
+CREATE FUNCTION rexectsq(tsquery, tsvector)
+RETURNS bool
+AS 'MODULE_PATHNAME'
+LANGUAGE 'C' with (isstrict, iscachable);
+
+COMMENT ON FUNCTION rexectsq(tsquery, tsvector) IS 'boolean operation with text index';
+
+CREATE OPERATOR @@ (
+        LEFTARG = tsvector,
+        RIGHTARG = tsquery,
+        PROCEDURE = exectsq,
+        COMMUTATOR = '@@',
+        RESTRICT = contsel,
+        JOIN = contjoinsel
+);
+CREATE OPERATOR @@ (
+        LEFTARG = tsquery,
+        RIGHTARG = tsvector,
+        PROCEDURE = rexectsq,
+        COMMUTATOR = '@@',
+        RESTRICT = contsel,
+        JOIN = contjoinsel
+);
+
+--Trigger
+CREATE FUNCTION tsearch2()
+RETURNS trigger
+AS 'MODULE_PATHNAME'
+LANGUAGE 'C';
+
+--Relevation
+CREATE FUNCTION rank(float4[], tsvector, tsquery)
+RETURNS float4
+AS 'MODULE_PATHNAME'
+LANGUAGE 'C' WITH (isstrict, iscachable);
+
+CREATE FUNCTION rank(float4[], tsvector, tsquery, int4)
+RETURNS float4
+AS 'MODULE_PATHNAME'
+LANGUAGE 'C' WITH (isstrict, iscachable);
+
+CREATE FUNCTION rank(tsvector, tsquery)
+RETURNS float4
+AS 'MODULE_PATHNAME', 'rank_def'
+LANGUAGE 'C' WITH (isstrict, iscachable);
+
+CREATE FUNCTION rank(tsvector, tsquery, int4)
+RETURNS float4
+AS 'MODULE_PATHNAME', 'rank_def'
+LANGUAGE 'C' WITH (isstrict, iscachable);
+
+CREATE FUNCTION rank_cd(int4, tsvector, tsquery)
+RETURNS float4
+AS 'MODULE_PATHNAME'
+LANGUAGE 'C' WITH (isstrict, iscachable);
+
+CREATE FUNCTION rank_cd(int4, tsvector, tsquery, int4)
+RETURNS float4
+AS 'MODULE_PATHNAME'
+LANGUAGE 'C' WITH (isstrict, iscachable);
+
+CREATE FUNCTION rank_cd(tsvector, tsquery)
+RETURNS float4
+AS 'MODULE_PATHNAME', 'rank_cd_def'
+LANGUAGE 'C' WITH (isstrict, iscachable);
+
+CREATE FUNCTION rank_cd(tsvector, tsquery, int4)
+RETURNS float4
+AS 'MODULE_PATHNAME', 'rank_cd_def'
+LANGUAGE 'C' WITH (isstrict, iscachable);
+
+CREATE FUNCTION headline(oid, text, tsquery, text)
+RETURNS text
+AS 'MODULE_PATHNAME', 'headline'
+LANGUAGE 'C' WITH (isstrict, iscachable);
+
+CREATE FUNCTION headline(oid, text, tsquery)
+RETURNS text
+AS 'MODULE_PATHNAME', 'headline'
+LANGUAGE 'C' WITH (isstrict, iscachable);
+
+CREATE FUNCTION headline(text, text, tsquery, text)
+RETURNS text
+AS 'MODULE_PATHNAME', 'headline_byname'
+LANGUAGE 'C' WITH (isstrict, iscachable);
+
+CREATE FUNCTION headline(text, text, tsquery)
+RETURNS text
+AS 'MODULE_PATHNAME', 'headline_byname'
+LANGUAGE 'C' WITH (isstrict, iscachable);
+
+CREATE FUNCTION headline(text, tsquery, text)
+RETURNS text
+AS 'MODULE_PATHNAME', 'headline_current'
+LANGUAGE 'C' WITH (isstrict, iscachable);
+
+CREATE FUNCTION headline(text, tsquery)
+RETURNS text
+AS 'MODULE_PATHNAME', 'headline_current'
+LANGUAGE 'C' WITH (isstrict, iscachable);
+
+--GiST
+--GiST key type 
+CREATE FUNCTION gtsvector_in(cstring)
+RETURNS gtsvector
+AS 'MODULE_PATHNAME'
+LANGUAGE 'C' with (isstrict);
+
+CREATE FUNCTION gtsvector_out(gtsvector)
+RETURNS cstring
+AS 'MODULE_PATHNAME'
+LANGUAGE 'C' with (isstrict);
+
+CREATE TYPE gtsvector (
+        INTERNALLENGTH = -1,
+        INPUT = gtsvector_in,
+        OUTPUT = gtsvector_out
+);
+
+-- support FUNCTIONs
+CREATE FUNCTION gtsvector_consistent(gtsvector,internal,int4)
+RETURNS bool
+AS 'MODULE_PATHNAME'
+LANGUAGE 'C';
+  
+CREATE FUNCTION gtsvector_compress(internal)
+RETURNS internal
+AS 'MODULE_PATHNAME'
+LANGUAGE 'C';
+
+CREATE FUNCTION gtsvector_decompress(internal)
+RETURNS internal
+AS 'MODULE_PATHNAME'
+LANGUAGE 'C';
+
+CREATE FUNCTION gtsvector_penalty(internal,internal,internal)
+RETURNS internal
+AS 'MODULE_PATHNAME'
+LANGUAGE 'C' with (isstrict);
+
+CREATE FUNCTION gtsvector_picksplit(internal, internal)
+RETURNS internal
+AS 'MODULE_PATHNAME'
+LANGUAGE 'C';
+
+CREATE FUNCTION gtsvector_union(bytea, internal)
+RETURNS _int4
+AS 'MODULE_PATHNAME'
+LANGUAGE 'C';
+
+CREATE FUNCTION gtsvector_same(gtsvector, gtsvector, internal)
+RETURNS internal
+AS 'MODULE_PATHNAME'
+LANGUAGE 'C';
+
+-- CREATE the OPERATOR class
+CREATE OPERATOR CLASS gist_tsvector_ops
+DEFAULT FOR TYPE tsvector USING gist
+AS
+        OPERATOR        1       @@ (tsvector, tsquery)  RECHECK ,
+        FUNCTION        1       gtsvector_consistent (gtsvector, internal, int4),
+        FUNCTION        2       gtsvector_union (bytea, internal),
+        FUNCTION        3       gtsvector_compress (internal),
+        FUNCTION        4       gtsvector_decompress (internal),
+        FUNCTION        5       gtsvector_penalty (internal, internal, internal),
+        FUNCTION        6       gtsvector_picksplit (internal, internal),
+        FUNCTION        7       gtsvector_same (gtsvector, gtsvector, internal),
+        STORAGE         gtsvector;
+
+
+--stat info
+CREATE TYPE statinfo 
+   as (word text, ndoc int4, nentry int4);
+
+--REATE FUNCTION tsstat_in(cstring)
+--RETURNS tsstat
+--AS 'MODULE_PATHNAME'
+--LANGUAGE 'C' with (isstrict);
+--
+--CREATE FUNCTION tsstat_out(tsstat)
+--RETURNS cstring
+--AS 'MODULE_PATHNAME'
+--LANGUAGE 'C' with (isstrict);
+--
+--CREATE TYPE tsstat (
+--        INTERNALLENGTH = -1,
+--        INPUT = tsstat_in,
+--        OUTPUT = tsstat_out,
+--        STORAGE = plain
+--);
+--
+--CREATE FUNCTION ts_accum(tsstat,tsvector)
+--RETURNS tsstat
+--AS 'MODULE_PATHNAME'
+--LANGUAGE 'C' with (isstrict);
+--
+--CREATE FUNCTION ts_accum_finish(tsstat)
+-- returns setof statinfo
+-- as 'MODULE_PATHNAME'
+-- language 'C'
+-- with (isstrict);
+--
+--CREATE AGGREGATE stat (
+-- BASETYPE=tsvector,
+-- SFUNC=ts_accum,
+-- STYPE=tsstat,
+-- FINALFUNC = ts_accum_finish,
+-- initcond = ''
+--); 
+
+CREATE FUNCTION stat(text)
+   returns setof statinfo
+   as 'MODULE_PATHNAME', 'ts_stat'
+   language 'C'
+   with (isstrict);
+
+--reset - just for debuging
+CREATE FUNCTION reset_tsearch()
+        returns void
+        as 'MODULE_PATHNAME'
+        language 'C'
+        with (isstrict);
+
+--get cover (debug for rank_cd)
+CREATE FUNCTION get_covers(tsvector,tsquery)
+        returns text
+        as 'MODULE_PATHNAME'
+        language 'C'
+        with (isstrict);
+
+
+--example of ISpell dictionary
+--update pg_ts_dict set dict_initoption='DictFile="/usr/local/share/ispell/russian.dict" ,AffFile ="/usr/local/share/ispell/russian.aff", StopFile="/usr/local/share/ispell/russian.stop"' where dict_id=4;
+--example of synonym dict
+--update pg_ts_dict set dict_initoption='/usr/local/share/ispell/english.syn' where dict_id=5;
+END;


diff --git a/contrib/tsearch2/tsvector.c b/contrib/tsearch2/tsvector.c

new file mode 100644 (file)

index 0000000..ff0794d


--- /dev/null
+++ b/contrib/tsearch2/tsvector.c
@@ -0,0 +1,804 @@
+/*
+ * In/Out definitions for tsvector type
+ * Internal structure:
+ * string of values, array of position lexem in string and it's length
+ * Teodor Sigaev 
+ */
+#include "postgres.h"
+
+#include "access/gist.h"
+#include "access/itup.h"
+#include "utils/elog.h"
+#include "utils/palloc.h"
+#include "utils/builtins.h"
+#include "storage/bufpage.h"
+#include "executor/spi.h"
+#include "commands/trigger.h"
+#include "nodes/pg_list.h"
+#include "catalog/namespace.h"
+
+#include "utils/pg_locale.h"
+
+#include              /* tolower */
+#include "tsvector.h"
+#include "query.h"
+#include "ts_cfg.h"
+#include "common.h"
+
+PG_FUNCTION_INFO_V1(tsvector_in);
+Datum      tsvector_in(PG_FUNCTION_ARGS);
+
+PG_FUNCTION_INFO_V1(tsvector_out);
+Datum      tsvector_out(PG_FUNCTION_ARGS);
+
+PG_FUNCTION_INFO_V1(to_tsvector);
+Datum      to_tsvector(PG_FUNCTION_ARGS);
+PG_FUNCTION_INFO_V1(to_tsvector_current);
+Datum      to_tsvector_current(PG_FUNCTION_ARGS);
+PG_FUNCTION_INFO_V1(to_tsvector_name);
+Datum      to_tsvector_name(PG_FUNCTION_ARGS);
+
+PG_FUNCTION_INFO_V1(tsearch2);
+Datum      tsearch2(PG_FUNCTION_ARGS);
+
+PG_FUNCTION_INFO_V1(tsvector_length);
+Datum      tsvector_length(PG_FUNCTION_ARGS);
+
+/*
+ * in/out text index type
+ */
+static int 
+comparePos(const void *a, const void *b) {
+   if ( ((WordEntryPos *) a)->pos == ((WordEntryPos *) b)->pos )
+       return 1;
+   return ( ((WordEntryPos *) a)->pos > ((WordEntryPos *) b)->pos ) ? 1 : -1;
+}
+
+static int
+uniquePos(WordEntryPos *a, int4 l) {
+   WordEntryPos *ptr, *res;
+
+   res=a;
+   if (l==1)
+       return l;
+
+   qsort((void *) a, l, sizeof(WordEntryPos), comparePos);
+
+   ptr = a + 1;
+   while (ptr - a < l) {
+       if ( ptr->pos != res->pos ) {
+           res++;
+           res->pos = ptr->pos;
+           res->weight = ptr->weight;
+           if ( res-a >= MAXNUMPOS-1 || res->pos == MAXENTRYPOS-1 )
+               break;
+       } else if ( ptr->weight > res->weight )
+           res->weight = ptr->weight;
+       ptr++;
+   }
+   return res + 1 - a;
+}
+
+static char *BufferStr;
+static int
+compareentry(const void *a, const void *b)
+{
+   if ( ((WordEntryIN *) a)->entry.len == ((WordEntryIN *) b)->entry.len)
+   {
+       return strncmp(
+                      &BufferStr[((WordEntryIN *) a)->entry.pos],
+                      &BufferStr[((WordEntryIN *) b)->entry.pos],
+                      ((WordEntryIN *) a)->entry.len);
+   }
+   return ( ((WordEntryIN *) a)->entry.len > ((WordEntryIN *) b)->entry.len ) ? 1 : -1;
+}
+
+static int
+uniqueentry(WordEntryIN * a, int4 l, char *buf, int4 *outbuflen)
+{
+   WordEntryIN  *ptr,
+              *res;
+
+   res = a;
+   if (l == 1) {
+       if ( a->entry.haspos ) {
+           *(uint16*)(a->pos) = uniquePos( &(a->pos[1]), *(uint16*)(a->pos));
+           *outbuflen = SHORTALIGN(res->entry.len) + (*(uint16*)(a->pos) +1 )*sizeof(WordEntryPos);
+       }
+       return l;
+   }
+
+   ptr = a + 1;
+   BufferStr = buf;
+   qsort((void *) a, l, sizeof(WordEntryIN), compareentry);
+
+   while (ptr - a < l)
+   {
+       if (!(ptr->entry.len == res->entry.len &&
+             strncmp(&buf[ptr->entry.pos], &buf[res->entry.pos], res->entry.len) == 0))
+       {
+           if ( res->entry.haspos ) {
+               *(uint16*)(res->pos) = uniquePos( &(res->pos[1]), *(uint16*)(res->pos));
+               *outbuflen += *(uint16*)(res->pos) * sizeof(WordEntryPos);
+           }
+           *outbuflen += SHORTALIGN(res->entry.len);
+           res++;
+           memcpy(res,ptr,sizeof(WordEntryIN));
+       } else if ( ptr->entry.haspos ){
+           if ( res->entry.haspos ) {
+               int4 len=*(uint16*)(ptr->pos) + 1 + *(uint16*)(res->pos);
+               res->pos=(WordEntryPos*)repalloc( res->pos, len*sizeof(WordEntryPos));
+               memcpy( &(res->pos[ *(uint16*)(res->pos) + 1 ]), 
+                   &(ptr->pos[1]), *(uint16*)(ptr->pos) * sizeof(WordEntryPos));
+               *(uint16*)(res->pos) += *(uint16*)(ptr->pos);
+               pfree( ptr->pos );
+           } else {
+               res->entry.haspos=1;
+               res->pos = ptr->pos;
+           }
+       }
+       ptr++;
+   }
+   if ( res->entry.haspos ) {
+       *(uint16*)(res->pos) = uniquePos( &(res->pos[1]), *(uint16*)(res->pos));
+       *outbuflen += *(uint16*)(res->pos) * sizeof(WordEntryPos);
+   }
+   *outbuflen += SHORTALIGN(res->entry.len);
+
+   return res + 1 - a;
+}
+
+#define WAITWORD   1
+#define WAITENDWORD 2
+#define WAITNEXTCHAR   3
+#define WAITENDCMPLX   4
+#define WAITPOSINFO    5
+#define INPOSINFO  6
+#define WAITPOSDELIM   7
+
+#define RESIZEPRSBUF \
+do { \
+   if ( state->curpos - state->word + 1 >= state->len ) \
+   { \
+       int4 clen = state->curpos - state->word; \
+       state->len *= 2; \
+       state->word = (char*)repalloc( (void*)state->word, state->len ); \
+       state->curpos = state->word + clen; \
+   } \
+} while (0)
+
+int4
+gettoken_tsvector(TI_IN_STATE * state)
+{
+   int4        oldstate = 0;
+
+   state->curpos = state->word;
+   state->state = WAITWORD;
+   state->alen=0;
+
+   while (1)
+   {
+       if (state->state == WAITWORD)
+       {
+           if (*(state->prsbuf) == '\0')
+               return 0;
+           else if (*(state->prsbuf) == '\'')
+               state->state = WAITENDCMPLX;
+           else if (*(state->prsbuf) == '\\')
+           {
+               state->state = WAITNEXTCHAR;
+               oldstate = WAITENDWORD;
+           }
+           else if (state->oprisdelim && ISOPERATOR(*(state->prsbuf)))
+               elog(ERROR, "Syntax error");
+           else if (*(state->prsbuf) != ' ')
+           {
+               *(state->curpos) = *(state->prsbuf);
+               state->curpos++;
+               state->state = WAITENDWORD;
+           }
+       }
+       else if (state->state == WAITNEXTCHAR)
+       {
+           if (*(state->prsbuf) == '\0')
+               elog(ERROR, "There is no escaped character");
+           else
+           {
+               RESIZEPRSBUF;
+               *(state->curpos) = *(state->prsbuf);
+               state->curpos++;
+               state->state = oldstate;
+           }
+       }
+       else if (state->state == WAITENDWORD)
+       {
+           if (*(state->prsbuf) == '\\')
+           {
+               state->state = WAITNEXTCHAR;
+               oldstate = WAITENDWORD;
+           }
+           else if (*(state->prsbuf) == ' ' || *(state->prsbuf) == '\0' ||
+                    (state->oprisdelim && ISOPERATOR(*(state->prsbuf))))
+           {
+               RESIZEPRSBUF;
+               if (state->curpos == state->word)
+                   elog(ERROR, "Syntax error");
+               *(state->curpos) = '\0';
+               return 1; 
+           } else if ( *(state->prsbuf) == ':' ) {
+               if (state->curpos == state->word)
+                   elog(ERROR, "Syntax error");
+               *(state->curpos) = '\0';
+               if ( state->oprisdelim )
+                   return 1;
+               else
+                   state->state = INPOSINFO;
+           }
+           else
+           {
+               RESIZEPRSBUF;
+               *(state->curpos) = *(state->prsbuf);
+               state->curpos++;
+           }
+       }
+       else if (state->state == WAITENDCMPLX)
+       {
+           if (*(state->prsbuf) == '\'')
+           {
+               RESIZEPRSBUF;
+               *(state->curpos) = '\0';
+               if (state->curpos == state->word)
+                   elog(ERROR, "Syntax error");
+               if ( state->oprisdelim ) {
+                   state->prsbuf++;
+                   return 1;
+               } else
+                   state->state = WAITPOSINFO;
+           }
+           else if (*(state->prsbuf) == '\\')
+           {
+               state->state = WAITNEXTCHAR;
+               oldstate = WAITENDCMPLX;
+           }
+           else if (*(state->prsbuf) == '\0')
+               elog(ERROR, "Syntax error");
+           else
+           {
+               RESIZEPRSBUF;
+               *(state->curpos) = *(state->prsbuf);
+               state->curpos++;
+           }
+       } else if (state->state == WAITPOSINFO) {
+           if ( *(state->prsbuf) == ':' )
+               state->state=INPOSINFO;
+           else
+               return 1;
+       } else if (state->state == INPOSINFO) {
+           if ( isdigit(*(state->prsbuf)) ) {
+               if ( state->alen==0 ) {
+                   state->alen=4;
+                   state->pos = (WordEntryPos*)palloc( sizeof(WordEntryPos)*state->alen );
+                   *(uint16*)(state->pos)=0;
+               } else if ( *(uint16*)(state->pos) +1 >= state->alen ) {
+                   state->alen *= 2; 
+                   state->pos = (WordEntryPos*)repalloc( state->pos, sizeof(WordEntryPos)*state->alen );
+               }
+               (  *(uint16*)(state->pos) )++;
+               state->pos[ *(uint16*)(state->pos) ].pos = LIMITPOS(atoi(state->prsbuf));
+               if ( state->pos[ *(uint16*)(state->pos) ].pos == 0 )
+                   elog(ERROR,"Wrong position info");
+               state->pos[ *(uint16*)(state->pos) ].weight = 0;
+               state->state = WAITPOSDELIM;
+           } else
+               elog(ERROR,"Syntax error");
+       } else if (state->state == WAITPOSDELIM) {
+           if ( *(state->prsbuf) == ',' ) {
+               state->state = INPOSINFO;
+           } else if ( tolower(*(state->prsbuf)) == 'a' || *(state->prsbuf)=='*' ) {
+               if ( state->pos[ *(uint16*)(state->pos) ].weight )
+                   elog(ERROR,"Syntax error");
+               state->pos[ *(uint16*)(state->pos) ].weight = 3;
+           } else if ( tolower(*(state->prsbuf)) == 'b' ) {
+               if ( state->pos[ *(uint16*)(state->pos) ].weight )
+                   elog(ERROR,"Syntax error");
+               state->pos[ *(uint16*)(state->pos) ].weight = 2;
+           } else if ( tolower(*(state->prsbuf)) == 'c' ) {
+               if ( state->pos[ *(uint16*)(state->pos) ].weight )
+                   elog(ERROR,"Syntax error");
+               state->pos[ *(uint16*)(state->pos) ].weight = 1;
+           } else if ( tolower(*(state->prsbuf)) == 'd' ) {
+               if ( state->pos[ *(uint16*)(state->pos) ].weight )
+                   elog(ERROR,"Syntax error");
+               state->pos[ *(uint16*)(state->pos) ].weight = 0;
+           } else if ( isspace(*(state->prsbuf)) || *(state->prsbuf) == '\0' ) {
+               return 1;
+           } else if ( !isdigit(*(state->prsbuf)) )
+               elog(ERROR,"Syntax error");
+       } else
+           elog(ERROR, "Inner bug :(");
+       state->prsbuf++;
+   }
+
+   return 0;
+}
+
+Datum
+tsvector_in(PG_FUNCTION_ARGS)
+{
+   char       *buf = PG_GETARG_CSTRING(0);
+   TI_IN_STATE state;
+   WordEntryIN  *arr;
+   WordEntry  *inarr;
+   int4        len = 0,
+               totallen = 64;
+   tsvector       *in;
+   char       *tmpbuf,
+              *cur;
+   int4        i,
+               buflen = 256;
+
+   state.prsbuf = buf;
+   state.len = 32;
+   state.word = (char *) palloc(state.len);
+   state.oprisdelim = false;
+
+   arr = (WordEntryIN *) palloc(sizeof(WordEntryIN) * totallen);
+   cur = tmpbuf = (char *) palloc(buflen);
+   while (gettoken_tsvector(&state))
+   {
+       if (len >= totallen)
+       {
+           totallen *= 2;
+           arr = (WordEntryIN *) repalloc((void *) arr, sizeof(WordEntryIN) * totallen);
+       }
+       while ((cur - tmpbuf) + (state.curpos - state.word) >= buflen)
+       {
+           int4        dist = cur - tmpbuf;
+
+           buflen *= 2;
+           tmpbuf = (char *) repalloc((void *) tmpbuf, buflen);
+           cur = tmpbuf + dist;
+       }
+       if (state.curpos - state.word >= MAXSTRLEN)
+           elog(ERROR, "Word is too long");
+       arr[len].entry.len= state.curpos - state.word;
+       if (cur - tmpbuf > MAXSTRPOS)
+           elog(ERROR, "Too long value");
+       arr[len].entry.pos=cur - tmpbuf;
+       memcpy((void *) cur, (void *) state.word, arr[len].entry.len);
+       cur += arr[len].entry.len;
+       if ( state.alen ) {
+           arr[len].entry.haspos=1;
+           arr[len].pos = state.pos;
+       } else
+           arr[len].entry.haspos=0;
+       len++;
+   }
+   pfree(state.word);
+
+   if ( len > 0 )
+       len = uniqueentry(arr, len, tmpbuf, &buflen);
+   totallen = CALCDATASIZE(len, buflen);
+   in = (tsvector *) palloc(totallen);
+   memset(in,0,totallen);
+   in->len = totallen;
+   in->size = len;
+   cur = STRPTR(in);
+   inarr = ARRPTR(in);
+   for (i = 0; i < len; i++)
+   {
+       memcpy((void *) cur, (void *) &tmpbuf[arr[i].entry.pos], arr[i].entry.len);
+       arr[i].entry.pos=cur - STRPTR(in);
+       cur += SHORTALIGN(arr[i].entry.len);
+       if ( arr[i].entry.haspos ) {
+           memcpy( cur, arr[i].pos, (*(uint16*)arr[i].pos + 1) * sizeof(WordEntryPos));
+           cur +=  (*(uint16*)arr[i].pos + 1) * sizeof(WordEntryPos);
+           pfree( arr[i].pos ); 
+       }
+       memcpy( &(inarr[i]), &(arr[i].entry), sizeof(WordEntry) );
+   }
+   pfree(tmpbuf);
+   pfree(arr);
+   PG_RETURN_POINTER(in);
+}
+
+Datum
+tsvector_length(PG_FUNCTION_ARGS)
+{
+   tsvector       *in = (tsvector *) PG_DETOAST_DATUM(PG_GETARG_DATUM(0));
+   int4        ret = in->size;
+
+   PG_FREE_IF_COPY(in, 0);
+   PG_RETURN_INT32(ret);
+}
+
+Datum
+tsvector_out(PG_FUNCTION_ARGS)
+{
+   tsvector       *out = (tsvector *) PG_DETOAST_DATUM(PG_GETARG_DATUM(0));
+   char       *outbuf;
+   int4        i,
+               j,
+               lenbuf = 0, pp;
+   WordEntry  *ptr = ARRPTR(out);
+   char       *curin,
+              *curout;
+
+       lenbuf=out->size * 2 /* '' */ + out->size - 1 /* space */ + 2 /*\0*/;
+       for (i = 0; i < out->size; i++) {
+               lenbuf += ptr[i].len*2 /*for escape */;
+               if ( ptr[i].haspos )
+                       lenbuf += 7*POSDATALEN(out, &(ptr[i]));
+       }
+
+   curout = outbuf = (char *) palloc(lenbuf);
+   for (i = 0; i < out->size; i++)
+   {
+       curin = STRPTR(out)+ptr->pos;
+       if (i != 0)
+           *curout++ = ' ';
+       *curout++ = '\'';
+       j = ptr->len;
+       while (j--)
+       {
+           if (*curin == '\'')
+           {
+               int4        pos = curout - outbuf;
+
+               outbuf = (char *) repalloc((void *) outbuf, ++lenbuf);
+               curout = outbuf + pos;
+               *curout++ = '\\';
+           }
+           *curout++ = *curin++;
+       }
+       *curout++ = '\'';
+       if ( (pp=POSDATALEN(out,ptr)) != 0 ) {
+           WordEntryPos *wptr;
+           *curout++ = ':';
+           wptr=POSDATAPTR(out,ptr);
+           while(pp) {
+               sprintf(curout,"%d",wptr->pos);
+               curout=strchr(curout,'\0');
+               switch( wptr->weight ) {
+                   case 3:   *curout++ = 'A'; break;
+                   case 2:   *curout++ = 'B'; break;
+                   case 1:   *curout++ = 'C'; break;
+                   case 0: 
+                   default: break;
+               }
+               if ( pp>1 )     *curout++ = ',';
+               pp--; wptr++;
+           }
+       }
+       ptr++;
+   }
+   *curout='\0';
+   outbuf[lenbuf - 1] = '\0';
+   PG_FREE_IF_COPY(out, 0);
+   PG_RETURN_POINTER(outbuf);
+}
+
+static int
+compareWORD(const void *a, const void *b)
+{
+   if (((WORD *) a)->len == ((WORD *) b)->len) {
+       int res = strncmp(
+                      ((WORD *) a)->word,
+                      ((WORD *) b)->word,
+                      ((WORD *) b)->len);
+       if ( res==0 ) 
+           return ( ((WORD *) a)->pos.pos > ((WORD *) b)->pos.pos ) ? 1 : -1;
+       return res;
+   }
+   return (((WORD *) a)->len > ((WORD *) b)->len) ? 1 : -1;
+}
+
+static int
+uniqueWORD(WORD * a, int4 l)
+{
+   WORD       *ptr,
+              *res;
+   int tmppos;
+
+   if (l == 1) {
+       tmppos=LIMITPOS(a->pos.pos);
+       a->alen=2;
+       a->pos.apos=(uint16*)palloc( sizeof(uint16)*a->alen );
+       a->pos.apos[0]=1;
+       a->pos.apos[1]=tmppos;
+       return l;
+   }
+
+   res = a;
+   ptr = a + 1;
+
+   qsort((void *) a, l, sizeof(WORD), compareWORD);
+   tmppos=LIMITPOS(a->pos.pos);
+   a->alen=2;
+   a->pos.apos=(uint16*)palloc( sizeof(uint16)*a->alen );
+   a->pos.apos[0]=1;
+   a->pos.apos[1]=tmppos;
+
+   while (ptr - a < l)
+   {
+       if (!(ptr->len == res->len &&
+             strncmp(ptr->word, res->word, res->len) == 0))
+       {
+           res++;
+           res->len = ptr->len;
+           res->word = ptr->word;
+           tmppos=LIMITPOS(ptr->pos.pos);
+           res->alen=2;
+           res->pos.apos=(uint16*)palloc( sizeof(uint16)*res->alen );
+           res->pos.apos[0]=1;
+           res->pos.apos[1]=tmppos;
+       } else {
+           pfree(ptr->word);
+           if ( res->pos.apos[0] < MAXNUMPOS-1 && res->pos.apos[ res->pos.apos[0] ] != MAXENTRYPOS-1 ) {
+               if ( res->pos.apos[0]+1 >= res->alen ) {
+                   res->alen*=2;
+                   res->pos.apos=(uint16*)repalloc( res->pos.apos, sizeof(uint16)*res->alen );
+               }
+               res->pos.apos[ res->pos.apos[0]+1 ] = LIMITPOS(ptr->pos.pos);
+               res->pos.apos[0]++; 
+           }
+       }
+       ptr++;
+   }
+
+   return res + 1 - a;
+}
+
+/*
+ * make value of tsvector
+ */
+static tsvector *
+makevalue(PRSTEXT * prs)
+{
+   int4        i,j,
+               lenstr = 0,
+               totallen;
+   tsvector       *in;
+   WordEntry  *ptr;
+   char       *str,
+              *cur;
+
+   prs->curwords = uniqueWORD(prs->words, prs->curwords);
+   for (i = 0; i < prs->curwords; i++) {
+       lenstr += SHORTALIGN(prs->words[i].len);
+
+       if ( prs->words[i].alen )
+           lenstr += sizeof(uint16) + prs->words[i].pos.apos[0] * sizeof(WordEntryPos);
+   }
+
+   totallen = CALCDATASIZE(prs->curwords, lenstr);
+   in = (tsvector *) palloc(totallen);
+   memset(in,0,totallen);  
+   in->len = totallen;
+   in->size = prs->curwords;
+
+   ptr = ARRPTR(in);
+   cur = str = STRPTR(in);
+   for (i = 0; i < prs->curwords; i++)
+   {
+       ptr->len = prs->words[i].len;
+       if (cur - str > MAXSTRPOS)
+           elog(ERROR, "Value is too big");
+       ptr->pos= cur - str;
+       memcpy((void *) cur, (void *) prs->words[i].word, prs->words[i].len);
+       pfree(prs->words[i].word);
+       cur += SHORTALIGN(prs->words[i].len);
+       if ( prs->words[i].alen ) {
+           WordEntryPos *wptr;
+           
+           ptr->haspos=1;
+           *(uint16*)cur = prs->words[i].pos.apos[0];
+           wptr=POSDATAPTR(in,ptr);
+           for(j=0;j<*(uint16*)cur;j++) {
+               wptr[j].weight=0;
+               wptr[j].pos=prs->words[i].pos.apos[j+1];
+           }
+           cur += sizeof(uint16) + prs->words[i].pos.apos[0] * sizeof(WordEntryPos);
+           pfree(prs->words[i].pos.apos);
+       } else
+           ptr->haspos=0;
+       ptr++;
+   }
+   pfree(prs->words);
+   return in;
+}
+
+
+Datum
+to_tsvector(PG_FUNCTION_ARGS)
+{
+   text       *in = PG_GETARG_TEXT_P(1);
+   PRSTEXT     prs;
+   tsvector       *out = NULL;
+   TSCfgInfo *cfg=findcfg(PG_GETARG_INT32(0)); 
+
+   prs.lenwords = 32;
+   prs.curwords = 0;
+   prs.pos = 0;
+   prs.words = (WORD *) palloc(sizeof(WORD) * prs.lenwords);
+   
+   parsetext_v2(cfg, &prs, VARDATA(in), VARSIZE(in) - VARHDRSZ);
+   PG_FREE_IF_COPY(in, 1);
+
+   if (prs.curwords)
+       out = makevalue(&prs);
+   else {
+       pfree(prs.words);
+       out = palloc(CALCDATASIZE(0,0));
+       out->len = CALCDATASIZE(0,0);
+       out->size = 0;
+   } 
+   PG_RETURN_POINTER(out);
+}
+
+Datum
+to_tsvector_name(PG_FUNCTION_ARGS) {
+   text       *cfg=PG_GETARG_TEXT_P(0);
+   Datum res = DirectFunctionCall3(
+       to_tsvector,
+       Int32GetDatum( name2id_cfg( cfg ) ),
+       PG_GETARG_DATUM(1),
+       (Datum)0
+   );
+   PG_FREE_IF_COPY(cfg,0);
+   PG_RETURN_DATUM(res);   
+}
+
+Datum
+to_tsvector_current(PG_FUNCTION_ARGS) {
+   Datum res = DirectFunctionCall3(
+       to_tsvector,
+       Int32GetDatum( get_currcfg() ),
+       PG_GETARG_DATUM(0),
+       (Datum)0
+   );
+   PG_RETURN_DATUM(res);   
+}
+
+static Oid
+findFunc(char *fname) {
+   FuncCandidateList clist,ptr;
+   Oid funcid = InvalidOid;
+   List *names=makeList1(makeString(fname));
+
+   ptr = clist = FuncnameGetCandidates(names, 1);
+   freeList(names);
+
+   if ( !ptr )
+       return funcid;
+
+   while(ptr) {
+       if ( ptr->args[0] == TEXTOID && funcid == InvalidOid )
+           funcid=ptr->oid;
+       clist=ptr->next;
+       pfree(ptr);
+       ptr=clist;
+   }
+
+   return funcid;
+}
+
+/*
+ * Trigger
+ */
+Datum
+tsearch2(PG_FUNCTION_ARGS)
+{
+   TriggerData *trigdata;
+   Trigger    *trigger;
+   Relation    rel;
+   HeapTuple   rettuple = NULL;
+   TSCfgInfo *cfg=findcfg(get_currcfg()); 
+   int         numidxattr,
+               i;
+   PRSTEXT     prs;
+   Datum       datum = (Datum) 0;
+   Oid     funcoid = InvalidOid;
+
+   if (!CALLED_AS_TRIGGER(fcinfo))
+       elog(ERROR, "TSearch: Not fired by trigger manager");
+
+   trigdata = (TriggerData *) fcinfo->context;
+   if (TRIGGER_FIRED_FOR_STATEMENT(trigdata->tg_event))
+       elog(ERROR, "TSearch: Can't process STATEMENT events");
+   if (TRIGGER_FIRED_AFTER(trigdata->tg_event))
+       elog(ERROR, "TSearch: Must be fired BEFORE event");
+
+   if (TRIGGER_FIRED_BY_INSERT(trigdata->tg_event))
+       rettuple = trigdata->tg_trigtuple;
+   else if (TRIGGER_FIRED_BY_UPDATE(trigdata->tg_event))
+       rettuple = trigdata->tg_newtuple;
+   else
+       elog(ERROR, "TSearch: Unknown event");
+
+   trigger = trigdata->tg_trigger;
+   rel = trigdata->tg_relation;
+
+   if (trigger->tgnargs < 2)
+       elog(ERROR, "TSearch: format tsearch2(tsvector_field, text_field1,...)");
+
+   numidxattr = SPI_fnumber(rel->rd_att, trigger->tgargs[0]);
+   if (numidxattr == SPI_ERROR_NOATTRIBUTE)
+       elog(ERROR, "TSearch: Can not find tsvector_field");
+
+   prs.lenwords = 32;
+   prs.curwords = 0;
+   prs.pos = 0;
+   prs.words = (WORD *) palloc(sizeof(WORD) * prs.lenwords);
+
+   /* find all words in indexable column */
+   for (i = 1; i < trigger->tgnargs; i++)
+   {
+       int         numattr;
+       Oid         oidtype;
+       Datum       txt_toasted;
+       bool        isnull;
+       text       *txt;
+
+       numattr = SPI_fnumber(rel->rd_att, trigger->tgargs[i]);
+       if (numattr == SPI_ERROR_NOATTRIBUTE)
+       {
+           funcoid=findFunc(trigger->tgargs[i]);
+           if ( funcoid==InvalidOid )
+               elog(ERROR,"TSearch: can't find function or field '%s'",trigger->tgargs[i]);
+           continue;
+       }
+       oidtype = SPI_gettypeid(rel->rd_att, numattr);
+       /* We assume char() and varchar() are binary-equivalent to text */
+       if (!(oidtype == TEXTOID ||
+             oidtype == VARCHAROID ||
+             oidtype == BPCHAROID))
+       {
+           elog(WARNING, "TSearch: '%s' is not of character type",
+                trigger->tgargs[i]);
+           continue;
+       }
+       txt_toasted = SPI_getbinval(rettuple, rel->rd_att, numattr, &isnull);
+       if (isnull)
+           continue;
+
+       if ( funcoid!=InvalidOid ) {
+           text *txttmp = (text *) DatumGetPointer( OidFunctionCall1(
+               funcoid,
+               PointerGetDatum(txt_toasted)
+           ));
+           txt = (text *) DatumGetPointer(PG_DETOAST_DATUM(PointerGetDatum(txttmp)));
+           if ( txt == txttmp )
+               txt_toasted = PointerGetDatum(txt);
+       } else
+            txt = (text *) DatumGetPointer(PG_DETOAST_DATUM(PointerGetDatum(txt_toasted)));
+
+       parsetext_v2(cfg, &prs, VARDATA(txt), VARSIZE(txt) - VARHDRSZ);
+       if (txt != (text*)DatumGetPointer(txt_toasted) )
+           pfree(txt);
+   }
+
+   /* make tsvector value */
+   if (prs.curwords)
+   {
+       datum = PointerGetDatum(makevalue(&prs));
+       rettuple = SPI_modifytuple(rel, rettuple, 1, &numidxattr,
+                                  &datum, NULL);
+       pfree(DatumGetPointer(datum));
+   }
+   else
+   {
+       tsvector *out = palloc(CALCDATASIZE(0,0));
+       out->len = CALCDATASIZE(0,0);
+       out->size = 0;
+       datum = PointerGetDatum(out);
+       pfree(prs.words);
+       rettuple = SPI_modifytuple(rel, rettuple, 1, &numidxattr,
+                                  &datum, NULL);
+   }
+
+   if (rettuple == NULL)
+       elog(ERROR, "TSearch: %d returned by SPI_modifytuple", SPI_result);
+
+   return PointerGetDatum(rettuple);
+}


diff --git a/contrib/tsearch2/tsvector.h b/contrib/tsearch2/tsvector.h

new file mode 100644 (file)

index 0000000..31e6a4b


--- /dev/null
+++ b/contrib/tsearch2/tsvector.h
@@ -0,0 +1,71 @@
+#ifndef __TXTIDX_H__
+#define __TXTIDX_H__
+
+/*
+#define TXTIDX_DEBUG
+*/
+
+#include "postgres.h"
+
+#include "access/gist.h"
+#include "access/itup.h"
+#include "utils/elog.h"
+#include "utils/palloc.h"
+#include "utils/builtins.h"
+#include "storage/bufpage.h"
+
+typedef struct {
+   uint32
+       haspos:1,
+       len:11, /* MAX 2Kb */
+       pos:20; /* MAX 1Mb */
+}  WordEntry;
+#define MAXSTRLEN ( 1<<11 )
+#define MAXSTRPOS ( 1<<20 )
+
+typedef struct {
+   uint16
+       weight:2,
+       pos:14;
+} WordEntryPos;
+#define MAXENTRYPOS    (1<<14)
+#define MAXNUMPOS  256
+#define LIMITPOS(x)    ( ( (x) >= MAXENTRYPOS ) ? (MAXENTRYPOS-1) : (x) )
+
+typedef struct
+{
+   int4        len;
+   int4        size;
+   char        data[1];
+}  tsvector;
+
+#define DATAHDRSIZE (sizeof(int4)*2)
+#define CALCDATASIZE(x, lenstr) ( x * sizeof(WordEntry) + DATAHDRSIZE + lenstr )
+#define ARRPTR(x)  ( (WordEntry*) ( (char*)x + DATAHDRSIZE ) )
+#define STRPTR(x)  ( (char*)x + DATAHDRSIZE + ( sizeof(WordEntry) * ((tsvector*)x)->size ) )
+#define STRSIZE(x) ( ((tsvector*)x)->len - DATAHDRSIZE - ( sizeof(WordEntry) * ((tsvector*)x)->size ) )
+#define _POSDATAPTR(x,e)   (STRPTR(x)+((WordEntry*)(e))->pos+SHORTALIGN(((WordEntry*)(e))->len))
+#define POSDATALEN(x,e) ( ( ((WordEntry*)(e))->haspos ) ? (*(uint16*)_POSDATAPTR(x,e)) : 0 ) 
+#define POSDATAPTR(x,e)    ( (WordEntryPos*)( _POSDATAPTR(x,e)+sizeof(uint16) ) )
+
+
+typedef struct {
+   WordEntry   entry;
+   WordEntryPos    *pos;
+}  WordEntryIN;
+
+typedef struct
+{
+   char       *prsbuf;
+   char       *word;
+   char       *curpos;
+   int4        len;
+   int4        state;
+   int4        alen;
+   WordEntryPos    *pos;
+   bool        oprisdelim;
+}  TI_IN_STATE;
+
+int4       gettoken_tsvector(TI_IN_STATE * state);
+
+#endif


diff --git a/contrib/tsearch2/tsvector_op.c b/contrib/tsearch2/tsvector_op.c

new file mode 100644 (file)

index 0000000..3f38014


--- /dev/null
+++ b/contrib/tsearch2/tsvector_op.c
@@ -0,0 +1,264 @@
+/*
+ * Operations for tsvector type
+ * Teodor Sigaev 
+ */
+#include "postgres.h"
+
+#include "access/gist.h"
+#include "access/itup.h"
+#include "utils/elog.h"
+#include "utils/palloc.h"
+#include "utils/builtins.h"
+#include "storage/bufpage.h"
+#include "executor/spi.h"
+#include "commands/trigger.h"
+#include "nodes/pg_list.h"
+#include "catalog/namespace.h"
+
+#include "utils/pg_locale.h"
+
+#include              /* tolower */
+#include "tsvector.h"
+#include "query.h"
+#include "ts_cfg.h"
+#include "common.h"
+
+PG_FUNCTION_INFO_V1(strip);
+Datum      strip(PG_FUNCTION_ARGS);
+
+PG_FUNCTION_INFO_V1(setweight);
+Datum      setweight(PG_FUNCTION_ARGS);
+
+PG_FUNCTION_INFO_V1(concat);
+Datum      concat(PG_FUNCTION_ARGS);
+
+Datum
+strip(PG_FUNCTION_ARGS)
+{
+   tsvector       *in = (tsvector *) PG_DETOAST_DATUM(PG_GETARG_DATUM(0));
+   tsvector    *out;
+   int i,len=0;
+   WordEntry *arrin=ARRPTR(in), *arrout;
+   char *cur;
+
+   for(i=0;isize;i++) 
+       len += SHORTALIGN( arrin[i].len );
+
+   len = CALCDATASIZE(in->size, len);
+   out=(tsvector*)palloc(len);
+   memset(out,0,len);
+   out->len=len;
+   out->size=in->size;
+   arrout=ARRPTR(out);
+   cur=STRPTR(out);
+   for(i=0;isize;i++) {
+       memcpy(cur, STRPTR(in)+arrin[i].pos, arrin[i].len);
+       arrout[i].haspos = 0;
+       arrout[i].len = arrin[i].len;
+       arrout[i].pos = cur - STRPTR(out);
+       cur += SHORTALIGN( arrout[i].len );
+   }
+       
+   PG_FREE_IF_COPY(in, 0);
+   PG_RETURN_POINTER(out);
+}
+
+Datum
+setweight(PG_FUNCTION_ARGS)
+{
+   tsvector       *in = (tsvector *) PG_DETOAST_DATUM(PG_GETARG_DATUM(0));
+   char       cw = PG_GETARG_CHAR(1);
+   tsvector    *out;
+   int i,j;
+   WordEntry *entry;
+   WordEntryPos *p;
+   int w=0;
+
+   switch(tolower(cw)) {
+       case 'a': w=3; break;
+       case 'b': w=2; break;
+       case 'c': w=1; break;
+       case 'd': w=0; break;
+       default: elog(ERROR,"Unknown weight");
+   }
+
+   out=(tsvector*)palloc(in->len);
+   memcpy(out,in,in->len);
+   entry=ARRPTR(out);
+   i=out->size;    
+   while(i--) {
+       if ( (j=POSDATALEN(out,entry)) != 0 ) {
+           p=POSDATAPTR(out,entry);
+           while(j--) {
+               p->weight=w;
+               p++;
+           }
+       }
+       entry++;
+   }
+       
+   PG_FREE_IF_COPY(in, 0);
+   PG_RETURN_POINTER(out);
+}
+
+static int
+compareEntry(char *ptra, WordEntry* a, char *ptrb, WordEntry* b)
+{
+        if ( a->len == b->len)
+        {
+                return strncmp(
+                                           ptra + a->pos,
+                                           ptrb + b->pos,
+                                           a->len);
+        }
+        return ( a->len > b->len ) ? 1 : -1;
+}
+
+static int4
+add_pos(tsvector *src, WordEntry *srcptr, tsvector *dest, WordEntry *destptr, int4 maxpos ) {
+   uint16 *clen = (uint16*)_POSDATAPTR(dest,destptr);
+   int i;
+   uint16 slen = POSDATALEN(src, srcptr), startlen;
+   WordEntryPos *spos=POSDATAPTR(src, srcptr), *dpos=POSDATAPTR(dest,destptr);
+
+   if ( ! destptr->haspos ) 
+       *clen=0;
+
+   startlen = *clen;
+   for(i=0; i
+       dpos[ *clen ].weight = spos[i].weight; 
+       dpos[ *clen ].pos    = LIMITPOS(spos[i].pos + maxpos);
+       (*clen)++;
+   }
+
+   if ( *clen != startlen )
+       destptr->haspos=1; 
+   return  *clen - startlen;
+}
+
+
+Datum
+concat(PG_FUNCTION_ARGS) {
+   tsvector       *in1 = (tsvector *) PG_DETOAST_DATUM(PG_GETARG_DATUM(0));
+   tsvector       *in2 = (tsvector *) PG_DETOAST_DATUM(PG_GETARG_DATUM(1));
+   tsvector       *out;
+   WordEntry *ptr;
+   WordEntry *ptr1,*ptr2;
+   WordEntryPos *p;
+   int maxpos=0,i,j,i1,i2;
+   char *cur;
+   char *data,*data1,*data2;
+
+   ptr=ARRPTR(in1);
+   i=in1->size;
+   while(i--) {
+       if ( (j=POSDATALEN(in1,ptr)) != 0 ) {
+           p=POSDATAPTR(in1,ptr);
+           while(j--) {
+               if ( p->pos > maxpos ) 
+                   maxpos = p->pos;
+               p++;
+           }
+       }
+       ptr++;
+   }
+   
+   ptr1=ARRPTR(in1); ptr2=ARRPTR(in2);
+   data1=STRPTR(in1); data2=STRPTR(in2);
+   i1=in1->size;   i2=in2->size;
+   out=(tsvector*)palloc( in1->len + in2->len );
+   memset(out,0,in1->len + in2->len);
+   out->len = in1->len + in2->len;
+   out->size = in1->size + in2->size;
+   data=cur=STRPTR(out);
+   ptr=ARRPTR(out);
+   while( i1 && i2 ) {
+       int cmp=compareEntry(data1,ptr1,data2,ptr2);
+       if ( cmp < 0 ) { /* in1 first */
+           ptr->haspos = ptr1->haspos;
+           ptr->len = ptr1->len;
+           memcpy( cur, data1 + ptr1->pos, ptr1->len );
+           ptr->pos = cur - data; 
+           cur+=SHORTALIGN(ptr1->len);
+           if ( ptr->haspos ) {
+               memcpy(cur, _POSDATAPTR(in1, ptr1), POSDATALEN(in1, ptr1)*sizeof(WordEntryPos) + sizeof(uint16));
+               cur+=POSDATALEN(in1, ptr1)*sizeof(WordEntryPos) + sizeof(uint16);
+           }
+           ptr++; ptr1++; i1--;
+       } else if ( cmp>0 ) { /* in2 first */ 
+           ptr->haspos = ptr2->haspos;
+           ptr->len = ptr2->len;
+           memcpy( cur, data2 + ptr2->pos, ptr2->len );
+           ptr->pos = cur - data; 
+           cur+=SHORTALIGN(ptr2->len);
+           if ( ptr->haspos ) {
+               int addlen = add_pos(in2, ptr2, out, ptr, maxpos );
+               if ( addlen == 0 )
+                   ptr->haspos=0;
+               else
+                   cur += addlen*sizeof(WordEntryPos) + sizeof(uint16); 
+           }
+           ptr++; ptr2++; i2--;
+       } else {
+           ptr->haspos = ptr1->haspos | ptr2->haspos;
+           ptr->len = ptr1->len;
+           memcpy( cur, data1 + ptr1->pos, ptr1->len );
+           ptr->pos = cur - data; 
+           cur+=SHORTALIGN(ptr1->len);
+           if ( ptr->haspos ) {
+               if ( ptr1->haspos ) {
+                   memcpy(cur, _POSDATAPTR(in1, ptr1), POSDATALEN(in1, ptr1)*sizeof(WordEntryPos) + sizeof(uint16));
+                   cur+=POSDATALEN(in1, ptr1)*sizeof(WordEntryPos) + sizeof(uint16);
+                   if ( ptr2->haspos )
+                       cur += add_pos(in2, ptr2, out, ptr, maxpos )*sizeof(WordEntryPos);
+               } else if ( ptr2->haspos ) {
+                   int addlen = add_pos(in2, ptr2, out, ptr, maxpos );
+                   if ( addlen == 0 )
+                       ptr->haspos=0;
+                   else
+                       cur += addlen*sizeof(WordEntryPos) + sizeof(uint16); 
+               }
+           }
+           ptr++; ptr1++; ptr2++; i1--; i2--;
+       }
+   }
+
+   while(i1) {
+       ptr->haspos = ptr1->haspos;
+       ptr->len = ptr1->len;
+       memcpy( cur, data1 + ptr1->pos, ptr1->len );
+       ptr->pos = cur - data; 
+       cur+=SHORTALIGN(ptr1->len);
+       if ( ptr->haspos ) {
+           memcpy(cur, _POSDATAPTR(in1, ptr1), POSDATALEN(in1, ptr1)*sizeof(WordEntryPos) + sizeof(uint16));
+           cur+=POSDATALEN(in1, ptr1)*sizeof(WordEntryPos) + sizeof(uint16);
+       }
+       ptr++; ptr1++; i1--;
+   }
+
+   while(i2) {
+       ptr->haspos = ptr2->haspos;
+       ptr->len = ptr2->len;
+       memcpy( cur, data2 + ptr2->pos, ptr2->len );
+       ptr->pos = cur - data; 
+       cur+=SHORTALIGN(ptr2->len);
+       if ( ptr->haspos ) {
+           int addlen = add_pos(in2, ptr2, out, ptr, maxpos );
+           if ( addlen == 0 )
+               ptr->haspos=0;
+           else
+               cur += addlen*sizeof(WordEntryPos) + sizeof(uint16); 
+       }
+       ptr++; ptr2++; i2--;
+   }
+   
+   out->size=ptr-ARRPTR(out);
+   out->len = CALCDATASIZE( out->size, cur-data );
+   if ( data != STRPTR(out) )
+       memmove( STRPTR(out), data, cur-data );
+
+   PG_FREE_IF_COPY(in1, 0);
+   PG_FREE_IF_COPY(in2, 1);
+   PG_RETURN_POINTER(out);
+}
+


diff --git a/contrib/tsearch2/untsearch.sql.in b/contrib/tsearch2/untsearch.sql.in

new file mode 100644 (file)

index 0000000..a4fe145


--- /dev/null
+++ b/contrib/tsearch2/untsearch.sql.in
@@ -0,0 +1,62 @@
+BEGIN;
+
+--Be careful !!!
+--script drops all indices, triggers and columns with types defined
+--in tsearch2.sql
+
+
+DROP OPERATOR CLASS gist_tsvector_ops USING gist CASCADE;
+
+
+DROP OPERATOR || (tsvector, tsvector);
+DROP OPERATOR @@ (tsvector, tsquery);
+DROP OPERATOR @@ (tsquery, tsvector);
+
+DROP AGGREGATE stat(tsvector);
+
+DROP TABLE pg_ts_dict;
+DROP TABLE pg_ts_parser;
+DROP TABLE pg_ts_cfg;
+DROP TABLE pg_ts_cfgmap;
+
+DROP TYPE tokentype CASCADE;
+DROP TYPE tokenout CASCADE;
+DROP TYPE tsvector CASCADE;
+DROP TYPE tsquery CASCADE;
+DROP TYPE gtsvector CASCADE;
+DROP TYPE tsstat CASCADE;
+DROP TYPE statinfo CASCADE;
+
+DROP FUNCTION lexize(oid, text) ;
+DROP FUNCTION lexize(text, text);
+DROP FUNCTION lexize(text);
+DROP FUNCTION set_curdict(int);
+DROP FUNCTION set_curdict(text);
+DROP FUNCTION dex_init(text);
+DROP FUNCTION dex_lexize(internal,internal,int4);
+DROP FUNCTION snb_en_init(text);
+DROP FUNCTION snb_lexize(internal,internal,int4);
+DROP FUNCTION snb_ru_init(text);
+DROP FUNCTION spell_init(text);
+DROP FUNCTION spell_lexize(internal,internal,int4);
+DROP FUNCTION syn_init(text);
+DROP FUNCTION syn_lexize(internal,internal,int4);
+DROP FUNCTION set_curprs(int);
+DROP FUNCTION set_curprs(text);
+DROP FUNCTION prsd_start(internal,int4);
+DROP FUNCTION prsd_getlexeme(internal,internal,internal);
+DROP FUNCTION prsd_end(internal);
+DROP FUNCTION prsd_lextype(internal);
+DROP FUNCTION prsd_headline(internal,internal,internal);
+DROP FUNCTION set_curcfg(int);
+DROP FUNCTION set_curcfg(text);
+DROP FUNCTION show_curcfg();
+DROP FUNCTION gtsvector_compress(internal);
+DROP FUNCTION gtsvector_decompress(internal);
+DROP FUNCTION gtsvector_penalty(internal,internal,internal);
+DROP FUNCTION gtsvector_picksplit(internal, internal);
+DROP FUNCTION gtsvector_union(bytea, internal);
+DROP FUNCTION reset_tsearch();
+DROP FUNCTION tsearch2() CASCADE;
+
+END;


diff --git a/contrib/tsearch2/wordparser/deflex.c b/contrib/tsearch2/wordparser/deflex.c

new file mode 100644 (file)

index 0000000..ea596c5


--- /dev/null
+++ b/contrib/tsearch2/wordparser/deflex.c
@@ -0,0 +1,56 @@
+#include "deflex.h"
+
+const char *lex_descr[]={
+   "",
+   "Latin word",
+   "Non-latin word",
+   "Word",
+   "Email",
+   "URL",
+   "Host",
+   "Scientific notation",
+   "VERSION",
+   "Part of hyphenated word",
+   "Non-latin part of hyphenated word",
+   "Latin part of hyphenated word",
+   "Space symbols",
+   "HTML Tag",
+   "HTTP head",
+   "Hyphenated word",
+   "Latin hyphenated word",
+   "Non-latin hyphenated word",
+   "URI",
+   "File or path name",
+   "Decimal notation",
+   "Signed integer",
+   "Unsigned integer",
+   "HTML Entity"
+};
+
+const char *tok_alias[]={
+   "",
+   "lword",
+   "nlword",
+   "word",
+   "email",
+   "url",
+   "host",
+   "sfloat",
+   "version",
+   "part_hword",
+   "nlpart_hword",
+   "lpart_hword",
+   "blank",
+   "tag",
+   "http",
+   "hword",
+   "lhword",
+   "nlhword",
+   "uri",
+   "file",
+   "float",
+   "int",
+   "uint",
+   "entity"
+};
+


diff --git a/contrib/tsearch2/wordparser/deflex.h b/contrib/tsearch2/wordparser/deflex.h

new file mode 100644 (file)

index 0000000..651d1f9


--- /dev/null
+++ b/contrib/tsearch2/wordparser/deflex.h
@@ -0,0 +1,34 @@
+#ifndef __DEFLEX_H__
+#define __DEFLEX_H__
+
+/* rememder !!!! */
+#define LASTNUM        23
+
+#define LATWORD        1
+#define CYRWORD        2
+#define UWORD      3
+#define EMAIL      4
+#define FURL       5
+#define HOST       6
+#define SCIENTIFIC 7
+#define VERSIONNUMBER  8
+#define PARTHYPHENWORD 9
+#define CYRPARTHYPHENWORD  10
+#define LATPARTHYPHENWORD  11
+#define SPACE      12
+#define TAG            13
+#define HTTP       14
+#define HYPHENWORD 15
+#define LATHYPHENWORD  16
+#define CYRHYPHENWORD  17
+#define URI        18
+#define FILEPATH   19
+#define DECIMAL        20
+#define SIGNEDINT  21
+#define UNSIGNEDINT 22
+#define HTMLENTITY 23
+
+extern const char *lex_descr[];
+extern const char *tok_alias[];
+
+#endif


diff --git a/contrib/tsearch2/wordparser/parser.h b/contrib/tsearch2/wordparser/parser.h

new file mode 100644 (file)

index 0000000..55cf005


--- /dev/null
+++ b/contrib/tsearch2/wordparser/parser.h
@@ -0,0 +1,11 @@
+#ifndef __PARSER_H__
+#define __PARSER_H__
+
+char      *token;
+int            tokenlen;
+int            tsearch2_yylex(void);
+void       start_parse_str(char *, int);
+void       start_parse_fh(FILE *, int);
+void       end_parse(void);
+
+#endif


diff --git a/contrib/tsearch2/wordparser/parser.l b/contrib/tsearch2/wordparser/parser.l

new file mode 100644 (file)

index 0000000..49824f5


--- /dev/null
+++ b/contrib/tsearch2/wordparser/parser.l
@@ -0,0 +1,346 @@
+%{
+#include "postgres.h"
+
+#include "deflex.h"
+#include "parser.h"
+#include "common.h"
+
+/* Avoid exit() on fatal scanner errors */
+#define fprintf(file, fmt, msg)  ts_error(ERROR, fmt, msg)
+
+/* postgres allocation function */
+#define free    pfree
+#define malloc  palloc
+#define realloc repalloc
+
+#ifdef strdup
+#undef strdup
+#endif
+#define strdup  pstrdup
+
+char *token = NULL;  /* pointer to token */
+char *s     = NULL;  /* to return WHOLE hyphenated-word */
+
+YY_BUFFER_STATE buf = NULL; /* buffer to parse; it need for parse from string */
+
+int lrlimit = -1;  /* for limiting read from filehandle ( -1 - unlimited read ) */
+int bytestoread = 0;   /* for limiting read from filehandle */
+
+/* redefine macro for read limited length */
+#define YY_INPUT(buf,result,max_size) \
+   if ( yy_current_buffer->yy_is_interactive ) { \
+                int c = '*', n; \
+                for ( n = 0; n < max_size && \
+                             (c = getc( tsearch2_yyin )) != EOF && c != '\n'; ++n ) \
+                        buf[n] = (char) c; \
+                if ( c == '\n' ) \
+                        buf[n++] = (char) c; \
+                if ( c == EOF && ferror( tsearch2_yyin ) ) \
+                        YY_FATAL_ERROR( "input in flex scanner failed" ); \
+                result = n; \
+        }  else { \
+       if ( lrlimit == 0 ) \
+           result=YY_NULL; \
+       else { \
+           if ( lrlimit>0 ) { \
+               bytestoread = ( lrlimit > max_size ) ? max_size : lrlimit; \
+               lrlimit -= bytestoread; \
+           } else \
+               bytestoread = max_size; \
+               if ( ((result = fread( buf, 1, bytestoread, tsearch2_yyin )) == 0) \
+                       && ferror( tsearch2_yyin ) ) \
+                       YY_FATAL_ERROR( "input in flex scanner failed" ); \
+       } \
+   }
+
+%}
+
+%option 8bit
+%option never-interactive
+%option nounput
+%option noyywrap
+
+/* parser's state for parsing hyphenated-word */
+%x DELIM  
+/* parser's state for parsing URL*/
+%x URL  
+%x SERVER  
+
+/* parser's state for parsing TAGS */
+%x INTAG
+%x QINTAG
+%x INCOMMENT
+%x INSCRIPT
+
+/* cyrillic koi8 char */
+CYRALNUM   [0-9\200-\377]
+CYRALPHA   [\200-\377]
+ALPHA      [a-zA-Z\200-\377]
+ALNUM      [0-9a-zA-Z\200-\377]
+
+
+HOSTNAME   ([-_[:alnum:]]+\.)+[[:alpha:]]+
+URI        [-_[:alnum:]/%,\.;=&?#]+
+
+%%
+
+"<"[Ss][Cc][Rr][Ii][Pp][Tt] { BEGIN INSCRIPT; }
+
+"" {
+   BEGIN INITIAL; 
+   *tsearch2_yytext=' '; *(tsearch2_yytext+1) = '\0'; 
+   token = tsearch2_yytext;
+   tokenlen = tsearch2_yyleng;
+   return SPACE;
+}
+
+""   { 
+   BEGIN INITIAL;
+   *tsearch2_yytext=' '; *(tsearch2_yytext+1) = '\0'; 
+   token = tsearch2_yytext;
+   tokenlen = tsearch2_yyleng;
+   return SPACE;
+}
+
+
+"<"[\![:alpha:]]   { BEGIN INTAG; }
+
+"
+
+"\""    { BEGIN QINTAG; }
+
+"\\\"" ;
+
+"\""   { BEGIN INTAG; }
+
+">" { 
+   BEGIN INITIAL;
+   token = tsearch2_yytext;
+   *tsearch2_yytext=' '; 
+   token = tsearch2_yytext;
+   tokenlen = 1;
+   return TAG;
+}
+
+.|\n  ;
+
+\&(quot|amp|nbsp|lt|gt)\;   {
+   token = tsearch2_yytext;
+   tokenlen = tsearch2_yyleng;
+   return HTMLENTITY;
+}
+
+\&\#[0-9][0-9]?[0-9]?\; {
+   token = tsearch2_yytext;
+   tokenlen = tsearch2_yyleng;
+   return HTMLENTITY;
+}
+ 
+[-_\.[:alnum:]]+@{HOSTNAME}  /* Emails */ { 
+   token = tsearch2_yytext; 
+   tokenlen = tsearch2_yyleng;
+   return EMAIL; 
+}
+
+[+-]?[0-9]+(\.[0-9]+)?[eEdD][+-]?[0-9]+  /* float */   { 
+   token = tsearch2_yytext; 
+   tokenlen = tsearch2_yyleng;
+   return SCIENTIFIC; 
+}
+
+[0-9]+\.[0-9]+\.[0-9\.]*[0-9] {
+   token = tsearch2_yytext;
+   tokenlen = tsearch2_yyleng;
+   return VERSIONNUMBER;
+}
+
+[+-]?[0-9]+\.[0-9]+ {
+   token = tsearch2_yytext;
+   tokenlen = tsearch2_yyleng;
+   return DECIMAL;
+}
+
+[+-][0-9]+ { 
+   token = tsearch2_yytext; 
+   tokenlen = tsearch2_yyleng;
+   return SIGNEDINT; 
+}
+
+[0-9]+ { 
+   token = tsearch2_yytext; 
+   tokenlen = tsearch2_yyleng;
+   return UNSIGNEDINT; 
+}
+
+http"://"        { 
+   BEGIN URL; 
+   token = tsearch2_yytext;
+   tokenlen = tsearch2_yyleng;
+   return HTTP;
+}
+
+ftp"://"        { 
+   BEGIN URL; 
+   token = tsearch2_yytext;
+   tokenlen = tsearch2_yyleng;
+   return HTTP;
+}
+
+{HOSTNAME}[/:]{URI} { 
+   BEGIN SERVER;
+   if (s) { free(s); s=NULL; } 
+   s = strdup( tsearch2_yytext ); 
+   tokenlen = tsearch2_yyleng;
+   yyless( 0 ); 
+   token = s;
+   return FURL;
+}
+
+{HOSTNAME} {
+   token = tsearch2_yytext; 
+   tokenlen = tsearch2_yyleng;
+   return HOST;
+}
+
+[/:]{URI}  {
+   token = tsearch2_yytext;
+   tokenlen = tsearch2_yyleng;
+   return URI;
+}
+
+[[:alnum:]\./_-]+"/"[[:alnum:]\./_-]+ {
+   token = tsearch2_yytext;
+   tokenlen = tsearch2_yyleng;
+   return FILEPATH;
+}
+
+({CYRALPHA}+-)+{CYRALPHA}+ /* composite-word */    {
+   BEGIN DELIM;
+   if (s) { free(s); s=NULL; } 
+   s = strdup( tsearch2_yytext );
+   tokenlen = tsearch2_yyleng;
+   yyless( 0 );
+   token = s;
+   return CYRHYPHENWORD;
+}
+
+([[:alpha:]]+-)+[[:alpha:]]+ /* composite-word */  {
+    BEGIN DELIM;
+   if (s) { free(s); s=NULL; } 
+   s = strdup( tsearch2_yytext );
+   tokenlen = tsearch2_yyleng;
+   yyless( 0 );
+   token = s;
+   return LATHYPHENWORD;
+}
+
+({ALNUM}+-)+{ALNUM}+ /* composite-word */  {
+   BEGIN DELIM;
+   if (s) { free(s); s=NULL; } 
+   s = strdup( tsearch2_yytext );
+   tokenlen = tsearch2_yyleng;
+   yyless( 0 );
+   token = s;
+   return HYPHENWORD;
+}
+
+[0-9]+\.[0-9]+\.[0-9\.]*[0-9] {
+   token = tsearch2_yytext;
+   tokenlen = tsearch2_yyleng;
+   return VERSIONNUMBER;
+}
+
+\+?[0-9]+\.[0-9]+ {
+   token = tsearch2_yytext;
+   tokenlen = tsearch2_yyleng;
+   return DECIMAL;
+}
+
+{CYRALPHA}+  /* one word in composite-word */   { 
+   token = tsearch2_yytext; 
+   tokenlen = tsearch2_yyleng;
+   return CYRPARTHYPHENWORD; 
+}
+
+[[:alpha:]]+  /* one word in composite-word */  { 
+   token = tsearch2_yytext; 
+   tokenlen = tsearch2_yyleng;
+   return LATPARTHYPHENWORD; 
+}
+
+{ALNUM}+  /* one word in composite-word */  { 
+   token = tsearch2_yytext; 
+   tokenlen = tsearch2_yyleng;
+   return PARTHYPHENWORD; 
+}
+
+-  { 
+   token = tsearch2_yytext;
+   tokenlen = tsearch2_yyleng;
+   return SPACE;
+}
+
+.|\n /* return in basic state */ {
+   BEGIN INITIAL;
+   yyless( 0 );
+}
+
+{CYRALPHA}+ /* normal word */  { 
+   token = tsearch2_yytext; 
+   tokenlen = tsearch2_yyleng;
+   return CYRWORD; 
+}
+
+[[:alpha:]]+ /* normal word */ { 
+   token = tsearch2_yytext; 
+   tokenlen = tsearch2_yyleng;
+   return LATWORD; 
+}
+
+{ALNUM}+ /* normal word */ { 
+   token = tsearch2_yytext; 
+   tokenlen = tsearch2_yyleng;
+   return UWORD; 
+}
+
+[ \r\n\t]+ {
+   token = tsearch2_yytext;
+   tokenlen = tsearch2_yyleng;
+   return SPACE;
+}
+
+. {
+   token = tsearch2_yytext;
+   tokenlen = tsearch2_yyleng;
+   return SPACE;
+} 
+
+%%
+
+/* clearing after parsing from string */
+void end_parse() {
+   if (s) { free(s); s=NULL; } 
+   tsearch2_yy_delete_buffer( buf );
+   buf = NULL;
+} 
+
+/* start parse from string */
+void start_parse_str(char* str, int limit) {
+   if (buf) end_parse();
+   buf = tsearch2_yy_scan_bytes( str, limit );
+   tsearch2_yy_switch_to_buffer( buf );
+   BEGIN INITIAL;
+}
+
+/* start parse from filehandle */
+void start_parse_fh( FILE* fh, int limit ) {
+   if (buf) end_parse();
+   lrlimit = ( limit ) ? limit : -1;
+   buf = tsearch2_yy_create_buffer( fh, YY_BUF_SIZE );
+   tsearch2_yy_switch_to_buffer( buf );
+   BEGIN INITIAL;
+}
+
+


diff --git a/contrib/tsearch2/wparser.c b/contrib/tsearch2/wparser.c

new file mode 100644 (file)

index 0000000..deff94c


--- /dev/null
+++ b/contrib/tsearch2/wparser.c
@@ -0,0 +1,529 @@
+/* 
+ * interface functions to parser 
+ * Teodor Sigaev 
+ */
+#include 
+#include 
+#include 
+#include 
+
+#include "postgres.h"
+#include "fmgr.h"
+#include "utils/array.h"
+#include "catalog/pg_type.h"
+#include "executor/spi.h"
+#include "funcapi.h"
+
+#include "wparser.h"
+#include "ts_cfg.h"
+#include "snmap.h"
+#include "common.h"
+
+/*********top interface**********/
+
+static void *plan_getparser=NULL;
+static Oid current_parser_id=InvalidOid;
+
+void
+init_prs(Oid id, WParserInfo *prs) {
+   Oid arg[1]={ OIDOID };
+   bool isnull;
+   Datum pars[1]={ ObjectIdGetDatum(id) };
+   int stat;
+
+   memset(prs,0,sizeof(WParserInfo));
+   SPI_connect();
+   if ( !plan_getparser ) {
+       plan_getparser = SPI_saveplan( SPI_prepare( "select prs_start, prs_nexttoken, prs_end, prs_lextype, prs_headline from pg_ts_parser where oid = $1" , 1, arg ) );
+       if ( !plan_getparser ) 
+           ts_error(ERROR, "SPI_prepare() failed");
+   }
+
+   stat = SPI_execp(plan_getparser, pars, " ", 1);
+   if ( stat < 0 )
+       ts_error (ERROR, "SPI_execp return %d", stat);
+   if ( SPI_processed > 0 ) {
+       Oid oid=InvalidOid;
+       oid=DatumGetObjectId( SPI_getbinval(SPI_tuptable->vals[0], SPI_tuptable->tupdesc, 1, &isnull) );
+       fmgr_info_cxt(oid, &(prs->start_info), TopMemoryContext);
+       oid=DatumGetObjectId( SPI_getbinval(SPI_tuptable->vals[0], SPI_tuptable->tupdesc, 2, &isnull) );
+       fmgr_info_cxt(oid, &(prs->getlexeme_info), TopMemoryContext);
+       oid=DatumGetObjectId( SPI_getbinval(SPI_tuptable->vals[0], SPI_tuptable->tupdesc, 3, &isnull) );
+       fmgr_info_cxt(oid, &(prs->end_info), TopMemoryContext);
+       prs->lextype=DatumGetObjectId( SPI_getbinval(SPI_tuptable->vals[0], SPI_tuptable->tupdesc, 4, &isnull) );
+       oid=DatumGetObjectId( SPI_getbinval(SPI_tuptable->vals[0], SPI_tuptable->tupdesc, 5, &isnull) );
+       fmgr_info_cxt(oid, &(prs->headline_info), TopMemoryContext);
+       prs->prs_id=id;
+   } else 
+       ts_error(ERROR, "No parser with id %d", id);
+   SPI_finish();
+}
+
+typedef struct {
+   WParserInfo *last_prs;
+   int     len;
+   int     reallen;
+   WParserInfo *list;
+   SNMap       name2id_map;
+} PrsList;
+
+static PrsList PList = {NULL,0,0,NULL,{0,0,NULL}};
+
+void    
+reset_prs(void) {
+   freeSNMap( &(PList.name2id_map) );
+   if ( PList.list )
+       free(PList.list);
+   memset(&PList,0,sizeof(PrsList));
+}
+
+static int
+compareprs(const void *a, const void *b) {
+   return ((WParserInfo*)a)->prs_id - ((WParserInfo*)b)->prs_id;
+}
+
+WParserInfo *
+findprs(Oid id) {
+   /* last used prs */
+   if ( PList.last_prs && PList.last_prs->prs_id==id )
+       return PList.last_prs;
+
+   /* already used prs */
+   if ( PList.len != 0 ) {
+       WParserInfo key;
+       key.prs_id=id;
+       PList.last_prs = bsearch(&key, PList.list, PList.len, sizeof(WParserInfo), compareprs);
+       if ( PList.last_prs != NULL )
+           return PList.last_prs;
+   }
+
+   /* last chance */
+   if ( PList.len==PList.reallen ) {
+       WParserInfo *tmp;
+       int reallen = ( PList.reallen ) ? 2*PList.reallen : 16;
+       tmp=(WParserInfo*)realloc(PList.list,sizeof(WParserInfo)*reallen);
+       if ( !tmp ) 
+           ts_error(ERROR,"No memory");
+       PList.reallen=reallen;
+       PList.list=tmp;
+   }
+   PList.last_prs=&(PList.list[PList.len]);
+   init_prs(id, PList.last_prs);
+   PList.len++;
+   qsort(PList.list, PList.len, sizeof(WParserInfo), compareprs);
+   return findprs(id); /* qsort changed order!! */;
+}
+
+static void *plan_name2id=NULL;
+
+Oid
+name2id_prs(text *name) {
+   Oid arg[1]={ TEXTOID };
+   bool isnull;
+   Datum pars[1]={ PointerGetDatum(name) };
+   int stat;
+   Oid id=findSNMap_t( &(PList.name2id_map), name );
+   
+   if ( id ) 
+       return id;
+   
+
+   SPI_connect();
+   if ( !plan_name2id ) {
+       plan_name2id = SPI_saveplan( SPI_prepare( "select oid from pg_ts_parser where prs_name = $1" , 1, arg ) );
+       if ( !plan_name2id ) 
+           ts_error(ERROR, "SPI_prepare() failed");
+   }
+
+   stat = SPI_execp(plan_name2id, pars, " ", 1);
+   if ( stat < 0 )
+       ts_error (ERROR, "SPI_execp return %d", stat);
+   if ( SPI_processed > 0 )
+       id=DatumGetObjectId( SPI_getbinval(SPI_tuptable->vals[0], SPI_tuptable->tupdesc, 1, &isnull) );
+   else 
+       ts_error(ERROR, "No parser '%s'", text2char(name));
+   SPI_finish();
+   addSNMap_t( &(PList.name2id_map), name, id );
+   return id;
+}
+
+
+/******sql-level interface******/
+typedef struct {
+   int     cur;
+   LexDescr    *list;
+} TypeStorage;
+
+static void
+setup_firstcall(FuncCallContext  *funcctx, Oid prsid) {
+   TupleDesc            tupdesc;
+   MemoryContext     oldcontext;
+   TypeStorage     *st;
+   WParserInfo *prs = findprs(prsid); 
+
+   oldcontext = MemoryContextSwitchTo(funcctx->multi_call_memory_ctx);
+
+   st=(TypeStorage*)palloc( sizeof(TypeStorage) );
+   st->cur=0;
+   st->list = (LexDescr*)DatumGetPointer(
+       OidFunctionCall1( prs->lextype, PointerGetDatum(prs->prs) )
+   );
+   funcctx->user_fctx = (void*)st;
+   tupdesc = RelationNameGetTupleDesc("tokentype");
+   funcctx->slot = TupleDescGetSlot(tupdesc);
+   funcctx->attinmeta = TupleDescGetAttInMetadata(tupdesc);
+   MemoryContextSwitchTo(oldcontext);
+}
+
+static Datum
+process_call(FuncCallContext  *funcctx) {
+   TypeStorage     *st;
+
+   st=(TypeStorage*)funcctx->user_fctx;
+   if (  st->list && st->list[st->cur].lexid ) {
+       Datum result;
+       char* values[3];
+       char    txtid[16];
+       HeapTuple    tuple;
+
+       values[0]=txtid;
+       sprintf(txtid,"%d",st->list[st->cur].lexid);
+       values[1]=st->list[st->cur].alias;
+       values[2]=st->list[st->cur].descr;
+
+       tuple = BuildTupleFromCStrings(funcctx->attinmeta, values);
+       result = TupleGetDatum(funcctx->slot, tuple);
+
+       pfree(values[1]);
+       pfree(values[2]);
+       st->cur++;
+       return result;
+   } else {
+       if ( st->list ) pfree(st->list);
+       pfree(st);
+   }
+   return (Datum)0;
+}
+
+PG_FUNCTION_INFO_V1(token_type);
+Datum token_type(PG_FUNCTION_ARGS);
+
+Datum
+token_type(PG_FUNCTION_ARGS) {
+   FuncCallContext  *funcctx;
+   Datum result;
+
+   if (SRF_IS_FIRSTCALL()) { 
+       funcctx = SRF_FIRSTCALL_INIT();
+       setup_firstcall(funcctx, PG_GETARG_OID(0) );
+   }
+
+   funcctx = SRF_PERCALL_SETUP();
+
+   if (  (result=process_call(funcctx)) != (Datum)0 )
+       SRF_RETURN_NEXT(funcctx, result);
+   SRF_RETURN_DONE(funcctx);
+}
+
+PG_FUNCTION_INFO_V1(token_type_byname);
+Datum token_type_byname(PG_FUNCTION_ARGS);
+Datum
+token_type_byname(PG_FUNCTION_ARGS) {
+   FuncCallContext  *funcctx;
+   Datum result;
+
+   if (SRF_IS_FIRSTCALL()) {
+       text *name = PG_GETARG_TEXT_P(0); 
+       funcctx = SRF_FIRSTCALL_INIT();
+       setup_firstcall(funcctx, name2id_prs( name ) );
+       PG_FREE_IF_COPY(name,0);
+   }
+
+   funcctx = SRF_PERCALL_SETUP();
+
+   if (  (result=process_call(funcctx)) != (Datum)0 )
+       SRF_RETURN_NEXT(funcctx, result);
+   SRF_RETURN_DONE(funcctx);
+}
+
+PG_FUNCTION_INFO_V1(token_type_current);
+Datum token_type_current(PG_FUNCTION_ARGS);
+Datum
+token_type_current(PG_FUNCTION_ARGS) {
+   FuncCallContext  *funcctx;
+   Datum result;
+
+   if (SRF_IS_FIRSTCALL()) {
+       funcctx = SRF_FIRSTCALL_INIT();
+       if ( current_parser_id==InvalidOid ) 
+           current_parser_id = name2id_prs( char2text("default") );
+       setup_firstcall(funcctx, current_parser_id );
+   }
+
+   funcctx = SRF_PERCALL_SETUP();
+
+   if (  (result=process_call(funcctx)) != (Datum)0 )
+       SRF_RETURN_NEXT(funcctx, result);
+   SRF_RETURN_DONE(funcctx);
+}
+
+
+PG_FUNCTION_INFO_V1(set_curprs);
+Datum set_curprs(PG_FUNCTION_ARGS);
+Datum
+set_curprs(PG_FUNCTION_ARGS) {
+        findprs(PG_GETARG_OID(0));
+        current_parser_id=PG_GETARG_OID(0);
+        PG_RETURN_VOID();
+}
+
+PG_FUNCTION_INFO_V1(set_curprs_byname);
+Datum set_curprs_byname(PG_FUNCTION_ARGS);
+Datum
+set_curprs_byname(PG_FUNCTION_ARGS) {
+        text *name=PG_GETARG_TEXT_P(0);
+    
+        DirectFunctionCall1(
+                set_curprs,
+                ObjectIdGetDatum( name2id_prs(name) )
+        );
+        PG_FREE_IF_COPY(name, 0);
+        PG_RETURN_VOID();
+}
+
+typedef struct {
+   int type;
+   char    *lexem;
+} LexemEntry;
+
+typedef struct {
+   int cur;
+   int len;
+   LexemEntry  *list;
+} PrsStorage;
+   
+
+static void
+prs_setup_firstcall(FuncCallContext  *funcctx, int prsid, text *txt) {
+   TupleDesc            tupdesc;
+   MemoryContext     oldcontext;
+   PrsStorage  *st;
+   WParserInfo *prs = findprs(prsid); 
+   char    *lex=NULL;
+   int     llen=0, type=0; 
+
+   oldcontext = MemoryContextSwitchTo(funcctx->multi_call_memory_ctx);
+
+   st=(PrsStorage*)palloc( sizeof(PrsStorage) );
+   st->cur=0;
+   st->len=16;
+   st->list=(LexemEntry*)palloc( sizeof(LexemEntry)*st->len );
+
+   prs->prs = (void*)DatumGetPointer(
+       FunctionCall2(
+           &(prs->start_info),
+           PointerGetDatum(VARDATA(txt)),
+           Int32GetDatum(VARSIZE(txt)-VARHDRSZ)
+       )
+   );
+
+   while( ( type=DatumGetInt32(FunctionCall3(
+           &(prs->getlexeme_info),
+           PointerGetDatum(prs->prs),
+           PointerGetDatum(&lex),
+           PointerGetDatum(&llen))) ) != 0 ) {
+
+       if ( st->cur>=st->len ) {
+           st->len=2*st->len;
+           st->list=(LexemEntry*)repalloc(st->list, sizeof(LexemEntry)*st->len);
+       }
+       st->list[st->cur].lexem = palloc(llen+1);
+       memcpy( st->list[st->cur].lexem, lex, llen);
+       st->list[st->cur].lexem[llen]='\0';
+       st->list[st->cur].type=type;
+       st->cur++;
+   }
+       
+   FunctionCall1(
+       &(prs->end_info),
+       PointerGetDatum(prs->prs)
+   );
+
+   st->len=st->cur;
+   st->cur=0;
+   
+   funcctx->user_fctx = (void*)st;
+   tupdesc = RelationNameGetTupleDesc("tokenout");
+   funcctx->slot = TupleDescGetSlot(tupdesc);
+   funcctx->attinmeta = TupleDescGetAttInMetadata(tupdesc);
+   MemoryContextSwitchTo(oldcontext);
+}
+
+static Datum
+prs_process_call(FuncCallContext  *funcctx) {
+   PrsStorage  *st;
+
+   st=(PrsStorage*)funcctx->user_fctx;
+   if (  st->cur < st->len ) {
+       Datum result;
+       char* values[2];
+       char    tid[16];
+       HeapTuple    tuple;
+
+       values[0]=tid;
+       sprintf(tid,"%d",st->list[st->cur].type);
+       values[1]=st->list[st->cur].lexem;
+       tuple = BuildTupleFromCStrings(funcctx->attinmeta, values);
+       result = TupleGetDatum(funcctx->slot, tuple);
+
+       pfree(values[1]);
+       st->cur++;
+       return result;
+   } else {
+       if ( st->list ) pfree(st->list);
+       pfree(st);
+   }
+   return (Datum)0;
+}
+
+           
+
+PG_FUNCTION_INFO_V1(parse);
+Datum parse(PG_FUNCTION_ARGS);
+Datum
+parse(PG_FUNCTION_ARGS) {
+   FuncCallContext  *funcctx;
+   Datum result;
+
+   if (SRF_IS_FIRSTCALL()) {
+       text *txt = PG_GETARG_TEXT_P(1); 
+       funcctx = SRF_FIRSTCALL_INIT();
+       prs_setup_firstcall(funcctx, PG_GETARG_OID(0),txt );
+       PG_FREE_IF_COPY(txt,1);
+   }
+
+   funcctx = SRF_PERCALL_SETUP();
+
+   if (  (result=prs_process_call(funcctx)) != (Datum)0 )
+       SRF_RETURN_NEXT(funcctx, result);
+   SRF_RETURN_DONE(funcctx);
+}
+
+PG_FUNCTION_INFO_V1(parse_byname);
+Datum parse_byname(PG_FUNCTION_ARGS);
+Datum
+parse_byname(PG_FUNCTION_ARGS) {
+   FuncCallContext  *funcctx;
+   Datum result;
+
+   if (SRF_IS_FIRSTCALL()) {
+       text *name = PG_GETARG_TEXT_P(0); 
+       text *txt = PG_GETARG_TEXT_P(1); 
+       funcctx = SRF_FIRSTCALL_INIT();
+       prs_setup_firstcall(funcctx, name2id_prs( name ),txt );
+       PG_FREE_IF_COPY(name,0);
+       PG_FREE_IF_COPY(txt,1);
+   }
+
+   funcctx = SRF_PERCALL_SETUP();
+
+   if (  (result=prs_process_call(funcctx)) != (Datum)0 )
+       SRF_RETURN_NEXT(funcctx, result);
+   SRF_RETURN_DONE(funcctx);
+}
+
+
+PG_FUNCTION_INFO_V1(parse_current);
+Datum parse_current(PG_FUNCTION_ARGS);
+Datum
+parse_current(PG_FUNCTION_ARGS) {
+   FuncCallContext  *funcctx;
+   Datum result;
+
+   if (SRF_IS_FIRSTCALL()) {
+       text *txt = PG_GETARG_TEXT_P(0); 
+       funcctx = SRF_FIRSTCALL_INIT();
+       if ( current_parser_id==InvalidOid ) 
+           current_parser_id = name2id_prs( char2text("default") );
+       prs_setup_firstcall(funcctx, current_parser_id,txt );
+       PG_FREE_IF_COPY(txt,0);
+   }
+
+   funcctx = SRF_PERCALL_SETUP();
+
+   if (  (result=prs_process_call(funcctx)) != (Datum)0 )
+       SRF_RETURN_NEXT(funcctx, result);
+   SRF_RETURN_DONE(funcctx);
+}
+
+PG_FUNCTION_INFO_V1(headline);
+Datum headline(PG_FUNCTION_ARGS);
+Datum
+headline(PG_FUNCTION_ARGS) {
+   TSCfgInfo *cfg=findcfg(PG_GETARG_OID(0));
+   text       *in = PG_GETARG_TEXT_P(1);
+   QUERYTYPE  *query = (QUERYTYPE *) DatumGetPointer(PG_DETOAST_DATUM(PG_GETARG_DATUM(2)));
+   text       *opt=( PG_NARGS()>3 && PG_GETARG_POINTER(3) ) ? PG_GETARG_TEXT_P(3) : NULL;
+   HLPRSTEXT   prs;
+   text *out;
+   WParserInfo *prsobj = findprs(cfg->prs_id);
+
+   memset(&prs,0,sizeof(HLPRSTEXT));
+   prs.lenwords = 32;
+   prs.words = (HLWORD *) palloc(sizeof(HLWORD) * prs.lenwords);
+   hlparsetext(cfg, &prs, query, VARDATA(in), VARSIZE(in) - VARHDRSZ);
+
+
+   FunctionCall3(
+       &(prsobj->headline_info),
+       PointerGetDatum(&prs),
+       PointerGetDatum(opt),
+       PointerGetDatum(query)
+   );
+
+   out = genhl(&prs);
+
+   PG_FREE_IF_COPY(in,1);
+   PG_FREE_IF_COPY(query,2);
+   if ( opt ) PG_FREE_IF_COPY(opt,3);
+   pfree(prs.words);
+   pfree(prs.startsel);
+   pfree(prs.stopsel);
+
+   PG_RETURN_POINTER(out);
+}
+
+
+PG_FUNCTION_INFO_V1(headline_byname);
+Datum headline_byname(PG_FUNCTION_ARGS);
+Datum
+headline_byname(PG_FUNCTION_ARGS) {
+   text *cfg=PG_GETARG_TEXT_P(0);
+
+   Datum out=DirectFunctionCall4(
+       headline,
+       ObjectIdGetDatum(name2id_cfg( cfg ) ),
+       PG_GETARG_DATUM(1),
+       PG_GETARG_DATUM(2),
+       ( PG_NARGS()>3 ) ? PG_GETARG_DATUM(3) : PointerGetDatum(NULL)
+   );
+
+   PG_FREE_IF_COPY(cfg,0);
+   PG_RETURN_DATUM(out);   
+}
+
+PG_FUNCTION_INFO_V1(headline_current);
+Datum headline_current(PG_FUNCTION_ARGS);
+Datum
+headline_current(PG_FUNCTION_ARGS) {
+   PG_RETURN_DATUM(DirectFunctionCall4(
+       headline,
+       ObjectIdGetDatum(get_currcfg()),
+       PG_GETARG_DATUM(0),
+       PG_GETARG_DATUM(1),
+       ( PG_NARGS()>2 ) ? PG_GETARG_DATUM(2) : PointerGetDatum(NULL)
+   ));
+}
+
+
+


diff --git a/contrib/tsearch2/wparser.h b/contrib/tsearch2/wparser.h

new file mode 100644 (file)

index 0000000..a8afc56


--- /dev/null
+++ b/contrib/tsearch2/wparser.h
@@ -0,0 +1,28 @@
+#ifndef __WPARSER_H__
+#define __WPARSER_H__
+#include "postgres.h"
+#include "fmgr.h"
+
+typedef struct {
+   Oid prs_id;
+   FmgrInfo start_info;
+   FmgrInfo getlexeme_info;
+   FmgrInfo end_info;
+   FmgrInfo headline_info;
+   Oid lextype;
+   void *prs;
+} WParserInfo;
+
+void init_prs(Oid id, WParserInfo *prs);
+WParserInfo* findprs(Oid id);
+Oid name2id_prs(text *name);
+void   reset_prs(void);
+
+
+typedef struct {
+   int lexid;
+   char    *alias;
+   char    *descr;
+} LexDescr;
+
+#endif


diff --git a/contrib/tsearch2/wparser_def.c b/contrib/tsearch2/wparser_def.c

new file mode 100644 (file)

index 0000000..eec8b03


--- /dev/null
+++ b/contrib/tsearch2/wparser_def.c
@@ -0,0 +1,291 @@
+/* 
+ * default word parser 
+ * Teodor Sigaev 
+ */
+#include 
+#include 
+#include 
+
+#include "postgres.h"
+#include "utils/builtins.h"
+
+#include "dict.h"
+#include "wparser.h"
+#include "common.h"
+#include "ts_cfg.h"
+#include "wordparser/parser.h"
+#include "wordparser/deflex.h"
+
+PG_FUNCTION_INFO_V1(prsd_lextype);
+Datum prsd_lextype(PG_FUNCTION_ARGS);
+
+Datum 
+prsd_lextype(PG_FUNCTION_ARGS) {
+   LexDescr *descr=(LexDescr*)palloc(sizeof(LexDescr)*(LASTNUM+1));
+   int i;
+
+   for(i=1;i<=LASTNUM;i++) {
+       descr[i-1].lexid = i;
+       descr[i-1].alias = pstrdup(tok_alias[i]);
+       descr[i-1].descr = pstrdup(lex_descr[i]);
+   }
+   
+   descr[LASTNUM].lexid=0;
+       
+   PG_RETURN_POINTER(descr);
+}
+
+PG_FUNCTION_INFO_V1(prsd_start);
+Datum prsd_start(PG_FUNCTION_ARGS);
+Datum 
+prsd_start(PG_FUNCTION_ARGS) {
+   start_parse_str( (char*)PG_GETARG_POINTER(0), PG_GETARG_INT32(1) );
+   PG_RETURN_POINTER(NULL);
+}
+
+PG_FUNCTION_INFO_V1(prsd_getlexeme);
+Datum prsd_getlexeme(PG_FUNCTION_ARGS);
+Datum 
+prsd_getlexeme(PG_FUNCTION_ARGS) {
+   /* ParserState *p=(ParserState*)PG_GETARG_POINTER(0); */
+   char **t=(char**)PG_GETARG_POINTER(1); 
+   int *tlen=(int*)PG_GETARG_POINTER(2);
+   int  type=tsearch2_yylex();
+
+   *t = token;
+   *tlen = tokenlen;
+   PG_RETURN_INT32(type);
+}
+
+PG_FUNCTION_INFO_V1(prsd_end);
+Datum prsd_end(PG_FUNCTION_ARGS);
+Datum 
+prsd_end(PG_FUNCTION_ARGS) {
+   /* ParserState *p=(ParserState*)PG_GETARG_POINTER(0); */
+   end_parse();
+   PG_RETURN_VOID();
+}
+
+#define LEAVETOKEN(x)  ( (x)==12 )
+#define COMPLEXTOKEN(x)    ( (x)==5 || (x)==15 || (x)==16 || (x)==17 )
+#define ENDPUNCTOKEN(x)    ( (x)==12 )
+
+
+#define IDIGNORE(x) ( (x)==13 || (x)==14 || (x)==12 || (x)==23 )
+#define HLIDIGNORE(x) ( (x)==5 || (x)==13 || (x)==15 || (x)==16 || (x)==17 )
+#define NONWORDTOKEN(x)    ( (x)==12 || HLIDIGNORE(x) )
+#define NOENDTOKEN(x)  ( NONWORDTOKEN(x) || (x)==7 || (x)==8 || (x)==20 || (x)==21 || (x)==22 || IDIGNORE(x) )
+
+typedef struct {
+   HLWORD  *words;
+   int len;
+} hlCheck;
+
+static bool
+checkcondition_HL(void *checkval, ITEM *val) {
+   int i;
+   for(i=0;i<((hlCheck*)checkval)->len;i++) {
+       if ( ((hlCheck*)checkval)->words[i].item==val )
+           return true;
+   }
+   return false;
+}
+
+
+static bool
+hlCover(HLPRSTEXT *prs, QUERYTYPE *query, int *p, int *q) {
+   int i,j;
+   ITEM    *item=GETQUERY(query);
+   int pos=*p;
+   *q=0;
+   *p=0x7fffffff;
+
+   for(j=0;jsize;j++) {
+       if ( item->type != VAL ) {
+           item++;
+           continue;
+       }
+       for(i=pos;icurwords;i++) {
+           if ( prs->words[i].item == item ) {
+               if ( i>*q) 
+                   *q = i;
+               break;
+           }
+       }
+       item++;
+   }
+
+   if ( *q==0 )
+       return false;
+
+   item=GETQUERY(query);
+   for(j=0;jsize;j++) {
+       if ( item->type != VAL ) {
+           item++;
+           continue;
+       }
+       for(i=*q;i>=pos;i--) {
+           if ( prs->words[i].item == item ) {
+               if ( i<*p )
+                   *p=i;
+               break;
+           }
+       }
+       item++;
+   }   
+
+   if ( *p<=*q ) {
+       hlCheck ch={ &(prs->words[*p]), *q-*p+1 };
+       if ( TS_execute(GETQUERY(query), &ch, false, checkcondition_HL) ) { 
+           return true;
+       } else {
+           (*p)++;
+           return hlCover(prs,query,p,q);
+       }
+   }
+
+   return false;
+}
+
+PG_FUNCTION_INFO_V1(prsd_headline);
+Datum prsd_headline(PG_FUNCTION_ARGS);
+Datum 
+prsd_headline(PG_FUNCTION_ARGS) {
+   HLPRSTEXT   *prs=(HLPRSTEXT*)PG_GETARG_POINTER(0);
+   text    *opt=(text*)PG_GETARG_POINTER(1); /* can't be toasted */
+   QUERYTYPE   *query=(QUERYTYPE*)PG_GETARG_POINTER(2); /* can't be toasted */
+   /* from opt + start and and tag */
+   int min_words=15;   
+   int max_words=35;   
+   int shortword=3;    
+
+   int p=0,q=0;
+   int bestb=-1,beste=-1;
+   int bestlen=-1;
+   int pose=0, poslen, curlen;
+
+   int i;
+
+   /*config*/
+   prs->startsel=NULL;
+   prs->stopsel=NULL;
+   if ( opt ) {
+       Map *map,*mptr;
+       
+       parse_cfgdict(opt,&map);
+       mptr=map;
+
+       while(mptr && mptr->key) {
+           if ( strcasecmp(mptr->key,"MaxWords")==0 )
+               max_words=pg_atoi(mptr->value,4,1);
+           else if ( strcasecmp(mptr->key,"MinWords")==0 )
+               min_words=pg_atoi(mptr->value,4,1);
+           else if ( strcasecmp(mptr->key,"ShortWord")==0 )
+               shortword=pg_atoi(mptr->value,4,1);
+           else if ( strcasecmp(mptr->key,"StartSel")==0 )
+               prs->startsel=pstrdup(mptr->value);
+           else if ( strcasecmp(mptr->key,"StopSel")==0 )
+               prs->stopsel=pstrdup(mptr->value);
+               
+           pfree(mptr->key);
+           pfree(mptr->value);
+
+           mptr++;
+       }
+       pfree(map);
+
+       if ( min_words >= max_words )
+           elog(ERROR,"Must be MinWords < MaxWords");
+       if ( min_words<=0 )
+           elog(ERROR,"Must be MinWords > 0");
+       if ( shortword<0 )
+           elog(ERROR,"Must be ShortWord >= 0");
+   }
+
+   while( hlCover(prs,query,&p,&q) ) {
+       /* find cover len in words */
+       curlen=0;
+       poslen=0;
+       for(i=p;i<=q && curlen < max_words ; i++) {
+           if ( !NONWORDTOKEN(prs->words[i].type) ) 
+               curlen++;
+           if ( prs->words[i].item && !prs->words[i].repeated )
+               poslen++; 
+           pose=i;
+       }
+
+       if ( poslenwords[beste].type) || prs->words[beste].len <= shortword) ) { 
+           /* best already finded, so try one more cover */
+           p++;
+           continue;
+       }
+
+       if ( curlen < max_words ) { /* find good end */
+           for(i=i-1 ;icurwords && curlen
+               if ( i!=q ) {
+                   if ( !NONWORDTOKEN(prs->words[i].type) ) 
+                       curlen++;
+                   if ( prs->words[i].item && !prs->words[i].repeated )
+                       poslen++;
+               }
+               pose=i;
+               if ( NOENDTOKEN(prs->words[i].type) || prs->words[i].len <= shortword ) 
+                   continue;
+               if ( curlen>=min_words )    
+                   break;
+           }
+       } else { /* shorter cover :((( */
+           for(;curlen>min_words;i--) {
+               if ( !NONWORDTOKEN(prs->words[i].type) ) 
+                   curlen--;
+               if ( prs->words[i].item && !prs->words[i].repeated )
+                   poslen--;
+               pose=i;
+               if ( NOENDTOKEN(prs->words[i].type) || prs->words[i].len <= shortword ) 
+                   continue;
+               break;
+           }
+       }
+
+       if ( bestlen <0 || (poslen>bestlen && !(NOENDTOKEN(prs->words[pose].type) || prs->words[pose].len <= shortword)) || 
+               ( bestlen>=0 && !(NOENDTOKEN(prs->words[pose].type)  || prs->words[pose].len <= shortword) && 
+                   (NOENDTOKEN(prs->words[beste].type) || prs->words[beste].len <= shortword) ) ) {
+           bestb=p; beste=pose;
+           bestlen=poslen;
+       } 
+
+       p++;
+   }
+
+   if ( bestlen<0 ) {
+       curlen=0;
+       poslen=0;
+       for(i=0;icurwords && curlen
+           if ( !NONWORDTOKEN(prs->words[i].type) ) 
+               curlen++;
+           pose=i;
+       }
+       bestb=0; beste=pose;
+   }
+
+   for(i=bestb;i<=beste;i++) {
+       if ( prs->words[i].item )
+           prs->words[i].selected=1;
+       if ( prs->words[i].repeated )
+           prs->words[i].skip=1;
+       if ( HLIDIGNORE(prs->words[i].type) )
+           prs->words[i].replace=1;
+
+       prs->words[i].in=1;
+   }
+
+   if (!prs->startsel)
+       prs->startsel=pstrdup("");

+   if (!prs->stopsel)
+       prs->stopsel=pstrdup("");
+        prs->startsellen=strlen(prs->startsel);
+   prs->stopsellen=strlen(prs->stopsel);
+
+   PG_RETURN_POINTER(prs);
+}
+




This is the main PostgreSQL git repository.
RSS
Atom
+           dw[cur].w=STRPTR(txt)+pptr[i].pos;  
+           dw[cur].len=pptr[i].len;    
+           dw[cur].pos=posdata[j].pos;
+           cur++;
+       }
+       len+=(pptr[i].len + 1) * (int)POSDATALEN(txt,&(pptr[i]));
+   }
+   qsort((void *) dw, dlen, sizeof(DocWord), compareDocWord);
+
+   while( Cover(doc, rlen, query, &pos, &p, &q) ) {
+       dwptr=dw+olddwpos;
+       while(dwptr->pos < p && dwptr-dw
+           dwptr++;
+       olddwpos=dwptr-dw;
+       dwptr->start=ncover;
+       while(dwptr->pos < q+1 && dwptr-dw
+           dwptr++;
+       (dwptr-1)->finish=ncover;
+       len+= 4 /* {}+two spaces */ + 2*16 /*numbers*/;
+       ncover++; 
+   } 
+   
+   out=palloc(VARHDRSZ+len);
+   cptr=((char*)out)+VARHDRSZ;
+   dwptr=dw;
+
+   while( dwptr-dw < dlen) {
+       if ( dwptr->start ) {
+           sprintf(cptr,"{%d ",dwptr->start);
+           cptr=strchr(cptr,'\0');
+       }
+       memcpy(cptr,dwptr->w,dwptr->len);
+       cptr+=dwptr->len;
+       *cptr=' ';
+       cptr++;
+       if ( dwptr->finish ) { 
+           sprintf(cptr,"}%d ",dwptr->finish);
+           cptr=strchr(cptr,'\0');
+       }
+       dwptr++;
+   }   
+
+   VARATT_SIZEP(out) = cptr - ((char*)out);
+   
+   pfree(dw);
+   pfree(doc);
+
+   PG_FREE_IF_COPY(txt,0);
+   PG_FREE_IF_COPY(query,1);
+   PG_RETURN_POINTER(out);
+}
+


diff --git a/contrib/tsearch2/rewrite.c b/contrib/tsearch2/rewrite.c

new file mode 100644 (file)

index 0000000..d5bc0f6


--- /dev/null
+++ b/contrib/tsearch2/rewrite.c
@@ -0,0 +1,292 @@
+/*
+ * Rewrite routines of query tree
+ * Teodor Sigaev 
+ */
+
+#include "postgres.h"
+
+#include 
+
+#include "access/gist.h"
+#include "access/itup.h"
+#include "access/rtree.h"
+#include "utils/elog.h"
+#include "utils/palloc.h"
+#include "utils/array.h"
+#include "utils/builtins.h"
+#include "storage/bufpage.h"
+
+#include "query.h"
+#include "rewrite.h"
+
+typedef struct NODE
+{
+   struct NODE *left;
+   struct NODE *right;
+   ITEM       *valnode;
+}  NODE;
+
+/*
+ * make query tree from plain view of query
+ */
+static NODE *
+maketree(ITEM * in)
+{
+   NODE       *node = (NODE *) palloc(sizeof(NODE));
+
+   node->valnode = in;
+   node->right = node->left = NULL;
+   if (in->type == OPR)
+   {
+       node->right = maketree(in + 1);
+       if (in->val != (int4) '!')
+           node->left = maketree(in + in->left);
+   }
+   return node;
+}
+
+typedef struct
+{
+   ITEM       *ptr;
+   int4        len;
+   int4        cur;
+}  PLAINTREE;
+
+static void
+plainnode(PLAINTREE * state, NODE * node)
+{
+   if (state->cur == state->len)
+   {
+       state->len *= 2;
+       state->ptr = (ITEM *) repalloc((void *) state->ptr, state->len * sizeof(ITEM));
+   }
+   memcpy((void *) &(state->ptr[state->cur]), (void *) node->valnode, sizeof(ITEM));
+   if (node->valnode->type == VAL)
+       state->cur++;
+   else if (node->valnode->val == (int4) '!')
+   {
+       state->ptr[state->cur].left = 1;
+       state->cur++;
+       plainnode(state, node->right);
+   }
+   else
+   {
+       int4        cur = state->cur;
+
+       state->cur++;
+       plainnode(state, node->right);
+       state->ptr[cur].left = state->cur - cur;
+       plainnode(state, node->left);
+   }
+   pfree(node);
+}
+
+/*
+ * make plain view of tree from 'normal' view of tree
+ */
+static ITEM *
+plaintree(NODE * root, int4 *len)
+{
+   PLAINTREE   pl;
+
+   pl.cur = 0;
+   pl.len = 16;
+   if (root && (root->valnode->type == VAL || root->valnode->type == OPR))
+   {
+       pl.ptr = (ITEM *) palloc(pl.len * sizeof(ITEM));
+       plainnode(&pl, root);
+   }
+   else
+       pl.ptr = NULL;
+   *len = pl.cur;
+   return pl.ptr;
+}
+
+static void
+freetree(NODE * node)
+{
+   if (!node)
+       return;
+   if (node->left)
+       freetree(node->left);
+   if (node->right)
+       freetree(node->right);
+   pfree(node);
+}
+
+/*
+ * clean tree for ! operator.
+ * It's usefull for debug, but in
+ * other case, such view is used with search in index.
+ * Operator ! always return TRUE
+ */
+static NODE *
+clean_NOT_intree(NODE * node)
+{
+   if (node->valnode->type == VAL)
+       return node;
+
+   if (node->valnode->val == (int4) '!')
+   {
+       freetree(node);
+       return NULL;
+   }
+
+   /* operator & or | */
+   if (node->valnode->val == (int4) '|')
+   {
+       if ((node->left = clean_NOT_intree(node->left)) == NULL ||
+           (node->right = clean_NOT_intree(node->right)) == NULL)
+       {
+           freetree(node);
+           return NULL;
+       }
+   }
+   else
+   {
+       NODE       *res = node;
+
+       node->left = clean_NOT_intree(node->left);
+       node->right = clean_NOT_intree(node->right);
+       if (node->left == NULL && node->right == NULL)
+       {
+           pfree(node);
+           res = NULL;
+       }
+       else if (node->left == NULL)
+       {
+           res = node->right;
+           pfree(node);
+       }
+       else if (node->right == NULL)
+       {
+           res = node->left;
+           pfree(node);
+       }
+       return res;
+   }
+   return node;
+}
+
+ITEM *
+clean_NOT_v2(ITEM * ptr, int4 *len)
+{
+   NODE       *root = maketree(ptr);
+
+   return plaintree(clean_NOT_intree(root), len);
+}
+
+#define V_UNKNOWN  0
+#define V_TRUE     1
+#define V_FALSE        2
+
+/*
+ * Clean query tree from values which is always in
+ * text (stopword)
+ */
+static NODE *
+clean_fakeval_intree(NODE * node, char *result)
+{
+   char        lresult = V_UNKNOWN,
+               rresult = V_UNKNOWN;
+
+   if (node->valnode->type == VAL)
+       return node;
+   else if (node->valnode->type == VALTRUE)
+   {
+       pfree(node);
+       *result = V_TRUE;
+       return NULL;
+   }
+
+
+   if (node->valnode->val == (int4) '!')
+   {
+       node->right = clean_fakeval_intree(node->right, &rresult);
+       if (!node->right)
+       {
+           *result = (rresult == V_TRUE) ? V_FALSE : V_TRUE;
+           freetree(node);
+           return NULL;
+       }
+   }
+   else if (node->valnode->val == (int4) '|')
+   {
+       NODE       *res = node;
+
+       node->left = clean_fakeval_intree(node->left, &lresult);
+       node->right = clean_fakeval_intree(node->right, &rresult);
+       if (lresult == V_TRUE || rresult == V_TRUE)
+       {
+           freetree(node);
+           *result = V_TRUE;
+           return NULL;
+       }
+       else if (lresult == V_FALSE && rresult == V_FALSE)
+       {
+           freetree(node);
+           *result = V_FALSE;
+           return NULL;
+       }
+       else if (lresult == V_FALSE)
+       {
+           res = node->right;
+           pfree(node);
+       }
+       else if (rresult == V_FALSE)
+       {
+           res = node->left;
+           pfree(node);
+       }
+       return res;
+   }
+   else
+   {
+       NODE       *res = node;
+
+       node->left = clean_fakeval_intree(node->left, &lresult);
+       node->right = clean_fakeval_intree(node->right, &rresult);
+       if (lresult == V_FALSE || rresult == V_FALSE)
+       {
+           freetree(node);
+           *result = V_FALSE;
+           return NULL;
+       }
+       else if (lresult == V_TRUE && rresult == V_TRUE)
+       {
+           freetree(node);
+           *result = V_TRUE;
+           return NULL;
+       }
+       else if (lresult == V_TRUE)
+       {
+           res = node->right;
+           pfree(node);
+       }
+       else if (rresult == V_TRUE)
+       {
+           res = node->left;
+           pfree(node);
+       }
+       return res;
+   }
+   return node;
+}
+
+ITEM *
+clean_fakeval_v2(ITEM * ptr, int4 *len)
+{
+   NODE       *root = maketree(ptr);
+   char        result = V_UNKNOWN;
+   NODE       *resroot;
+
+   resroot = clean_fakeval_intree(root, &result);
+   if (result != V_UNKNOWN)
+   {
+       elog(NOTICE, "Query contains only stopword(s) or doesn't contain lexem(s), ignored");
+       *len = 0;
+       return NULL;
+   }
+
+   return plaintree(resroot, len);
+}


diff --git a/contrib/tsearch2/rewrite.h b/contrib/tsearch2/rewrite.h

new file mode 100644 (file)

index 0000000..d47788a


--- /dev/null
+++ b/contrib/tsearch2/rewrite.h
@@ -0,0 +1,7 @@
+#ifndef __REWRITE_H__
+#define __REWRITE_H__
+
+ITEM      *clean_NOT_v2(ITEM * ptr, int4 *len);
+ITEM      *clean_fakeval_v2(ITEM * ptr, int4 *len);
+
+#endif


diff --git a/contrib/tsearch2/snmap.c b/contrib/tsearch2/snmap.c

new file mode 100644 (file)

index 0000000..fe138ad


--- /dev/null
+++ b/contrib/tsearch2/snmap.c
@@ -0,0 +1,75 @@
+/* 
+ * simple but fast map from str to Oid
+ * Teodor Sigaev 
+ */
+#include 
+#include 
+#include 
+
+#include "postgres.h"
+#include "snmap.h"
+#include "common.h"
+
+static int
+compareSNMapEntry(const void *a, const void *b) {
+   return strcmp( ((SNMapEntry*)a)->key, ((SNMapEntry*)b)->key );
+}
+
+void 
+addSNMap( SNMap *map, char *key, Oid value ) {
+   if (map->len>=map->reallen) {
+       SNMapEntry *tmp;
+       int len = (map->reallen) ? 2*map->reallen : 16;
+       tmp=(SNMapEntry*)realloc(map->list, sizeof(SNMapEntry) * len);
+       if ( !tmp )
+           elog(ERROR, "No memory");
+       map->reallen=len;
+       map->list=tmp;
+   }
+   map->list[ map->len ].key = strdup(key);
+   if ( ! map->list[ map->len ].key )
+       elog(ERROR, "No memory");
+   map->list[ map->len ].value=value;
+   map->len++;
+   if ( map->len>1 ) qsort(map->list, map->len, sizeof(SNMapEntry), compareSNMapEntry);
+}
+
+void 
+addSNMap_t( SNMap *map, text *key, Oid value ) {
+   char *k=text2char( key );
+   addSNMap(map, k, value);
+   pfree(k);
+}
+
+Oid 
+findSNMap( SNMap *map, char *key ) {
+   SNMapEntry *ptr;
+   SNMapEntry ks = {key, 0};
+   if ( map->len==0 || !map->list )
+       return 0;   
+   ptr = (SNMapEntry*) bsearch(&ks, map->list, map->len, sizeof(SNMapEntry), compareSNMapEntry);
+   return (ptr) ? ptr->value : 0;
+}
+
+Oid  
+findSNMap_t( SNMap *map, text *key ) {
+   char *k=text2char(key);
+   int res;
+   res= findSNMap(map, k);
+   pfree(k);
+   return res;
+}
+
+void freeSNMap( SNMap *map ) {
+   SNMapEntry *entry=map->list;
+   if ( map->list ) {
+       while( map->len ) {
+           if ( entry->key ) free(entry->key);
+           entry++; map->len--;
+       }
+       free( map->list );
+   }
+   memset(map,0,sizeof(SNMap));
+}
+
+


diff --git a/contrib/tsearch2/snmap.h b/contrib/tsearch2/snmap.h

new file mode 100644 (file)

index 0000000..b485601


--- /dev/null
+++ b/contrib/tsearch2/snmap.h
@@ -0,0 +1,23 @@
+#ifndef __SNMAP_H__
+#define __SNMAP_H__
+
+#include "postgres.h"
+
+typedef struct {
+   char    *key;
+   Oid value;
+} SNMapEntry;
+
+typedef struct {
+   int len;
+   int reallen;
+   SNMapEntry  *list;
+} SNMap;
+
+void addSNMap( SNMap *map, char *key, Oid value );
+void addSNMap_t( SNMap *map, text *key, Oid value );
+Oid findSNMap( SNMap *map, char *key );
+Oid findSNMap_t( SNMap *map, text *key );
+void freeSNMap( SNMap *map );
+
+#endif


diff --git a/contrib/tsearch2/snowball/api.c b/contrib/tsearch2/snowball/api.c

new file mode 100644 (file)

index 0000000..c9019ce


--- /dev/null
+++ b/contrib/tsearch2/snowball/api.c
@@ -0,0 +1,48 @@
+
+#include "header.h"
+
+extern struct SN_env * SN_create_env(int S_size, int I_size, int B_size)
+{   struct SN_env * z = (struct SN_env *) calloc(1, sizeof(struct SN_env));
+    z->p = create_s();
+    if (S_size)
+    {   z->S = (symbol * *) calloc(S_size, sizeof(symbol *));
+        {   int i;
+            for (i = 0; i < S_size; i++) z->S[i] = create_s();
+        }
+        z->S_size = S_size;
+    }
+
+    if (I_size)
+    {   z->I = (int *) calloc(I_size, sizeof(int));
+        z->I_size = I_size;
+    }
+
+    if (B_size)
+    {   z->B = (symbol *) calloc(B_size, sizeof(symbol));
+        z->B_size = B_size;
+    }
+
+    return z;
+}
+
+extern void SN_close_env(struct SN_env * z)
+{
+    if (z->S_size)
+    {
+        {   int i;
+            for (i = 0; i < z->S_size; i++) lose_s(z->S[i]);
+        }
+        free(z->S);
+    }
+    if (z->I_size) free(z->I);
+    if (z->B_size) free(z->B);
+    if (z->p) lose_s(z->p);
+    free(z);
+}
+
+extern void SN_set_current(struct SN_env * z, int size, const symbol * s)
+{
+    replace_s(z, 0, z->l, size, s);
+    z->c = 0;
+}
+


diff --git a/contrib/tsearch2/snowball/api.h b/contrib/tsearch2/snowball/api.h

new file mode 100644 (file)

index 0000000..3e8b6e1


--- /dev/null
+++ b/contrib/tsearch2/snowball/api.h
@@ -0,0 +1,27 @@
+
+typedef unsigned char symbol;
+
+/* Or replace 'char' above with 'short' for 16 bit characters.
+
+   More precisely, replace 'char' with whatever type guarantees the
+   character width you need. Note however that sizeof(symbol) should divide
+   HEAD, defined in header.h as 2*sizeof(int), without remainder, otherwise
+   there is an alignment problem. In the unlikely event of a problem here,
+   consult Martin Porter.
+
+*/
+
+struct SN_env {
+    symbol * p;
+    int c; int a; int l; int lb; int bra; int ket;
+    int S_size; int I_size; int B_size;
+    symbol * * S;
+    int * I;
+    symbol * B;
+};
+
+extern struct SN_env * SN_create_env(int S_size, int I_size, int B_size);
+extern void SN_close_env(struct SN_env * z);
+
+extern void SN_set_current(struct SN_env * z, int size, const symbol * s);
+


diff --git a/contrib/tsearch2/snowball/english_stem.c b/contrib/tsearch2/snowball/english_stem.c

new file mode 100644 (file)

index 0000000..6715c7c


--- /dev/null
+++ b/contrib/tsearch2/snowball/english_stem.c
@@ -0,0 +1,894 @@
+
+/* This file was generated automatically by the Snowball to ANSI C compiler */
+
+#include "header.h"
+
+extern int english_stem(struct SN_env * z);
+static int r_exception2(struct SN_env * z);
+static int r_exception1(struct SN_env * z);
+static int r_Step_5(struct SN_env * z);
+static int r_Step_4(struct SN_env * z);
+static int r_Step_3(struct SN_env * z);
+static int r_Step_2(struct SN_env * z);
+static int r_Step_1c(struct SN_env * z);
+static int r_Step_1b(struct SN_env * z);
+static int r_Step_1a(struct SN_env * z);
+static int r_R2(struct SN_env * z);
+static int r_R1(struct SN_env * z);
+static int r_shortv(struct SN_env * z);
+static int r_mark_regions(struct SN_env * z);
+static int r_postlude(struct SN_env * z);
+static int r_prelude(struct SN_env * z);
+
+extern struct SN_env * english_create_env(void);
+extern void english_close_env(struct SN_env * z);
+
+static symbol s_0_0[5] = { 'g', 'e', 'n', 'e', 'r' };
+
+static struct among a_0[1] =
+{
+/*  0 */ { 5, s_0_0, -1, -1, 0}
+};
+
+static symbol s_1_0[3] = { 'i', 'e', 'd' };
+static symbol s_1_1[1] = { 's' };
+static symbol s_1_2[3] = { 'i', 'e', 's' };
+static symbol s_1_3[4] = { 's', 's', 'e', 's' };
+static symbol s_1_4[2] = { 's', 's' };
+static symbol s_1_5[2] = { 'u', 's' };
+
+static struct among a_1[6] =
+{
+/*  0 */ { 3, s_1_0, -1, 2, 0},
+/*  1 */ { 1, s_1_1, -1, 3, 0},
+/*  2 */ { 3, s_1_2, 1, 2, 0},
+/*  3 */ { 4, s_1_3, 1, 1, 0},
+/*  4 */ { 2, s_1_4, 1, -1, 0},
+/*  5 */ { 2, s_1_5, 1, -1, 0}
+};
+
+static symbol s_2_1[2] = { 'b', 'b' };
+static symbol s_2_2[2] = { 'd', 'd' };
+static symbol s_2_3[2] = { 'f', 'f' };
+static symbol s_2_4[2] = { 'g', 'g' };
+static symbol s_2_5[2] = { 'b', 'l' };
+static symbol s_2_6[2] = { 'm', 'm' };
+static symbol s_2_7[2] = { 'n', 'n' };
+static symbol s_2_8[2] = { 'p', 'p' };
+static symbol s_2_9[2] = { 'r', 'r' };
+static symbol s_2_10[2] = { 'a', 't' };
+static symbol s_2_11[2] = { 't', 't' };
+static symbol s_2_12[2] = { 'i', 'z' };
+
+static struct among a_2[13] =
+{
+/*  0 */ { 0, 0, -1, 3, 0},
+/*  1 */ { 2, s_2_1, 0, 2, 0},
+/*  2 */ { 2, s_2_2, 0, 2, 0},
+/*  3 */ { 2, s_2_3, 0, 2, 0},
+/*  4 */ { 2, s_2_4, 0, 2, 0},
+/*  5 */ { 2, s_2_5, 0, 1, 0},
+/*  6 */ { 2, s_2_6, 0, 2, 0},
+/*  7 */ { 2, s_2_7, 0, 2, 0},
+/*  8 */ { 2, s_2_8, 0, 2, 0},
+/*  9 */ { 2, s_2_9, 0, 2, 0},
+/* 10 */ { 2, s_2_10, 0, 1, 0},
+/* 11 */ { 2, s_2_11, 0, 2, 0},
+/* 12 */ { 2, s_2_12, 0, 1, 0}
+};
+
+static symbol s_3_0[2] = { 'e', 'd' };
+static symbol s_3_1[3] = { 'e', 'e', 'd' };
+static symbol s_3_2[3] = { 'i', 'n', 'g' };
+static symbol s_3_3[4] = { 'e', 'd', 'l', 'y' };
+static symbol s_3_4[5] = { 'e', 'e', 'd', 'l', 'y' };
+static symbol s_3_5[5] = { 'i', 'n', 'g', 'l', 'y' };
+
+static struct among a_3[6] =
+{
+/*  0 */ { 2, s_3_0, -1, 2, 0},
+/*  1 */ { 3, s_3_1, 0, 1, 0},
+/*  2 */ { 3, s_3_2, -1, 2, 0},
+/*  3 */ { 4, s_3_3, -1, 2, 0},
+/*  4 */ { 5, s_3_4, 3, 1, 0},
+/*  5 */ { 5, s_3_5, -1, 2, 0}
+};
+
+static symbol s_4_0[4] = { 'a', 'n', 'c', 'i' };
+static symbol s_4_1[4] = { 'e', 'n', 'c', 'i' };
+static symbol s_4_2[3] = { 'o', 'g', 'i' };
+static symbol s_4_3[2] = { 'l', 'i' };
+static symbol s_4_4[3] = { 'b', 'l', 'i' };
+static symbol s_4_5[4] = { 'a', 'b', 'l', 'i' };
+static symbol s_4_6[4] = { 'a', 'l', 'l', 'i' };
+static symbol s_4_7[5] = { 'f', 'u', 'l', 'l', 'i' };
+static symbol s_4_8[6] = { 'l', 'e', 's', 's', 'l', 'i' };
+static symbol s_4_9[5] = { 'o', 'u', 's', 'l', 'i' };
+static symbol s_4_10[5] = { 'e', 'n', 't', 'l', 'i' };
+static symbol s_4_11[5] = { 'a', 'l', 'i', 't', 'i' };
+static symbol s_4_12[6] = { 'b', 'i', 'l', 'i', 't', 'i' };
+static symbol s_4_13[5] = { 'i', 'v', 'i', 't', 'i' };
+static symbol s_4_14[6] = { 't', 'i', 'o', 'n', 'a', 'l' };
+static symbol s_4_15[7] = { 'a', 't', 'i', 'o', 'n', 'a', 'l' };
+static symbol s_4_16[5] = { 'a', 'l', 'i', 's', 'm' };
+static symbol s_4_17[5] = { 'a', 't', 'i', 'o', 'n' };
+static symbol s_4_18[7] = { 'i', 'z', 'a', 't', 'i', 'o', 'n' };
+static symbol s_4_19[4] = { 'i', 'z', 'e', 'r' };
+static symbol s_4_20[4] = { 'a', 't', 'o', 'r' };
+static symbol s_4_21[7] = { 'i', 'v', 'e', 'n', 'e', 's', 's' };
+static symbol s_4_22[7] = { 'f', 'u', 'l', 'n', 'e', 's', 's' };
+static symbol s_4_23[7] = { 'o', 'u', 's', 'n', 'e', 's', 's' };
+
+static struct among a_4[24] =
+{
+/*  0 */ { 4, s_4_0, -1, 3, 0},
+/*  1 */ { 4, s_4_1, -1, 2, 0},
+/*  2 */ { 3, s_4_2, -1, 13, 0},
+/*  3 */ { 2, s_4_3, -1, 16, 0},
+/*  4 */ { 3, s_4_4, 3, 12, 0},
+/*  5 */ { 4, s_4_5, 4, 4, 0},
+/*  6 */ { 4, s_4_6, 3, 8, 0},
+/*  7 */ { 5, s_4_7, 3, 14, 0},
+/*  8 */ { 6, s_4_8, 3, 15, 0},
+/*  9 */ { 5, s_4_9, 3, 10, 0},
+/* 10 */ { 5, s_4_10, 3, 5, 0},
+/* 11 */ { 5, s_4_11, -1, 8, 0},
+/* 12 */ { 6, s_4_12, -1, 12, 0},
+/* 13 */ { 5, s_4_13, -1, 11, 0},
+/* 14 */ { 6, s_4_14, -1, 1, 0},
+/* 15 */ { 7, s_4_15, 14, 7, 0},
+/* 16 */ { 5, s_4_16, -1, 8, 0},
+/* 17 */ { 5, s_4_17, -1, 7, 0},
+/* 18 */ { 7, s_4_18, 17, 6, 0},
+/* 19 */ { 4, s_4_19, -1, 6, 0},
+/* 20 */ { 4, s_4_20, -1, 7, 0},
+/* 21 */ { 7, s_4_21, -1, 11, 0},
+/* 22 */ { 7, s_4_22, -1, 9, 0},
+/* 23 */ { 7, s_4_23, -1, 10, 0}
+};
+
+static symbol s_5_0[5] = { 'i', 'c', 'a', 't', 'e' };
+static symbol s_5_1[5] = { 'a', 't', 'i', 'v', 'e' };
+static symbol s_5_2[5] = { 'a', 'l', 'i', 'z', 'e' };
+static symbol s_5_3[5] = { 'i', 'c', 'i', 't', 'i' };
+static symbol s_5_4[4] = { 'i', 'c', 'a', 'l' };
+static symbol s_5_5[6] = { 't', 'i', 'o', 'n', 'a', 'l' };
+static symbol s_5_6[7] = { 'a', 't', 'i', 'o', 'n', 'a', 'l' };
+static symbol s_5_7[3] = { 'f', 'u', 'l' };
+static symbol s_5_8[4] = { 'n', 'e', 's', 's' };
+
+static struct among a_5[9] =
+{
+/*  0 */ { 5, s_5_0, -1, 4, 0},
+/*  1 */ { 5, s_5_1, -1, 6, 0},
+/*  2 */ { 5, s_5_2, -1, 3, 0},
+/*  3 */ { 5, s_5_3, -1, 4, 0},
+/*  4 */ { 4, s_5_4, -1, 4, 0},
+/*  5 */ { 6, s_5_5, -1, 1, 0},
+/*  6 */ { 7, s_5_6, 5, 2, 0},
+/*  7 */ { 3, s_5_7, -1, 5, 0},
+/*  8 */ { 4, s_5_8, -1, 5, 0}
+};
+
+static symbol s_6_0[2] = { 'i', 'c' };
+static symbol s_6_1[4] = { 'a', 'n', 'c', 'e' };
+static symbol s_6_2[4] = { 'e', 'n', 'c', 'e' };
+static symbol s_6_3[4] = { 'a', 'b', 'l', 'e' };
+static symbol s_6_4[4] = { 'i', 'b', 'l', 'e' };
+static symbol s_6_5[3] = { 'a', 't', 'e' };
+static symbol s_6_6[3] = { 'i', 'v', 'e' };
+static symbol s_6_7[3] = { 'i', 'z', 'e' };
+static symbol s_6_8[3] = { 'i', 't', 'i' };
+static symbol s_6_9[2] = { 'a', 'l' };
+static symbol s_6_10[3] = { 'i', 's', 'm' };
+static symbol s_6_11[3] = { 'i', 'o', 'n' };
+static symbol s_6_12[2] = { 'e', 'r' };
+static symbol s_6_13[3] = { 'o', 'u', 's' };
+static symbol s_6_14[3] = { 'a', 'n', 't' };
+static symbol s_6_15[3] = { 'e', 'n', 't' };
+static symbol s_6_16[4] = { 'm', 'e', 'n', 't' };
+static symbol s_6_17[5] = { 'e', 'm', 'e', 'n', 't' };
+
+static struct among a_6[18] =
+{
+/*  0 */ { 2, s_6_0, -1, 1, 0},
+/*  1 */ { 4, s_6_1, -1, 1, 0},
+/*  2 */ { 4, s_6_2, -1, 1, 0},
+/*  3 */ { 4, s_6_3, -1, 1, 0},
+/*  4 */ { 4, s_6_4, -1, 1, 0},
+/*  5 */ { 3, s_6_5, -1, 1, 0},
+/*  6 */ { 3, s_6_6, -1, 1, 0},
+/*  7 */ { 3, s_6_7, -1, 1, 0},
+/*  8 */ { 3, s_6_8, -1, 1, 0},
+/*  9 */ { 2, s_6_9, -1, 1, 0},
+/* 10 */ { 3, s_6_10, -1, 1, 0},
+/* 11 */ { 3, s_6_11, -1, 2, 0},
+/* 12 */ { 2, s_6_12, -1, 1, 0},
+/* 13 */ { 3, s_6_13, -1, 1, 0},
+/* 14 */ { 3, s_6_14, -1, 1, 0},
+/* 15 */ { 3, s_6_15, -1, 1, 0},
+/* 16 */ { 4, s_6_16, 15, 1, 0},
+/* 17 */ { 5, s_6_17, 16, 1, 0}
+};
+
+static symbol s_7_0[1] = { 'e' };
+static symbol s_7_1[1] = { 'l' };
+
+static struct among a_7[2] =
+{
+/*  0 */ { 1, s_7_0, -1, 1, 0},
+/*  1 */ { 1, s_7_1, -1, 2, 0}
+};
+
+static symbol s_8_0[7] = { 's', 'u', 'c', 'c', 'e', 'e', 'd' };
+static symbol s_8_1[7] = { 'p', 'r', 'o', 'c', 'e', 'e', 'd' };
+static symbol s_8_2[6] = { 'e', 'x', 'c', 'e', 'e', 'd' };
+static symbol s_8_3[7] = { 'c', 'a', 'n', 'n', 'i', 'n', 'g' };
+static symbol s_8_4[6] = { 'i', 'n', 'n', 'i', 'n', 'g' };
+static symbol s_8_5[7] = { 'e', 'a', 'r', 'r', 'i', 'n', 'g' };
+static symbol s_8_6[7] = { 'h', 'e', 'r', 'r', 'i', 'n', 'g' };
+static symbol s_8_7[6] = { 'o', 'u', 't', 'i', 'n', 'g' };
+
+static struct among a_8[8] =
+{
+/*  0 */ { 7, s_8_0, -1, -1, 0},
+/*  1 */ { 7, s_8_1, -1, -1, 0},
+/*  2 */ { 6, s_8_2, -1, -1, 0},
+/*  3 */ { 7, s_8_3, -1, -1, 0},
+/*  4 */ { 6, s_8_4, -1, -1, 0},
+/*  5 */ { 7, s_8_5, -1, -1, 0},
+/*  6 */ { 7, s_8_6, -1, -1, 0},
+/*  7 */ { 6, s_8_7, -1, -1, 0}
+};
+
+static symbol s_9_0[5] = { 'a', 'n', 'd', 'e', 's' };
+static symbol s_9_1[5] = { 'a', 't', 'l', 'a', 's' };
+static symbol s_9_2[4] = { 'b', 'i', 'a', 's' };
+static symbol s_9_3[6] = { 'c', 'o', 's', 'm', 'o', 's' };
+static symbol s_9_4[5] = { 'd', 'y', 'i', 'n', 'g' };
+static symbol s_9_5[5] = { 'e', 'a', 'r', 'l', 'y' };
+static symbol s_9_6[6] = { 'g', 'e', 'n', 't', 'l', 'y' };
+static symbol s_9_7[4] = { 'h', 'o', 'w', 'e' };
+static symbol s_9_8[4] = { 'i', 'd', 'l', 'y' };
+static symbol s_9_9[5] = { 'l', 'y', 'i', 'n', 'g' };
+static symbol s_9_10[4] = { 'n', 'e', 'w', 's' };
+static symbol s_9_11[4] = { 'o', 'n', 'l', 'y' };
+static symbol s_9_12[6] = { 's', 'i', 'n', 'g', 'l', 'y' };
+static symbol s_9_13[5] = { 's', 'k', 'i', 'e', 's' };
+static symbol s_9_14[4] = { 's', 'k', 'i', 's' };
+static symbol s_9_15[3] = { 's', 'k', 'y' };
+static symbol s_9_16[5] = { 't', 'y', 'i', 'n', 'g' };
+static symbol s_9_17[4] = { 'u', 'g', 'l', 'y' };
+
+static struct among a_9[18] =
+{
+/*  0 */ { 5, s_9_0, -1, -1, 0},
+/*  1 */ { 5, s_9_1, -1, -1, 0},
+/*  2 */ { 4, s_9_2, -1, -1, 0},
+/*  3 */ { 6, s_9_3, -1, -1, 0},
+/*  4 */ { 5, s_9_4, -1, 3, 0},
+/*  5 */ { 5, s_9_5, -1, 9, 0},
+/*  6 */ { 6, s_9_6, -1, 7, 0},
+/*  7 */ { 4, s_9_7, -1, -1, 0},
+/*  8 */ { 4, s_9_8, -1, 6, 0},
+/*  9 */ { 5, s_9_9, -1, 4, 0},
+/* 10 */ { 4, s_9_10, -1, -1, 0},
+/* 11 */ { 4, s_9_11, -1, 10, 0},
+/* 12 */ { 6, s_9_12, -1, 11, 0},
+/* 13 */ { 5, s_9_13, -1, 2, 0},
+/* 14 */ { 4, s_9_14, -1, 1, 0},
+/* 15 */ { 3, s_9_15, -1, -1, 0},
+/* 16 */ { 5, s_9_16, -1, 5, 0},
+/* 17 */ { 4, s_9_17, -1, 8, 0}
+};
+
+static unsigned char g_v[] = { 17, 65, 16, 1 };
+
+static unsigned char g_v_WXY[] = { 1, 17, 65, 208, 1 };
+
+static unsigned char g_valid_LI[] = { 55, 141, 2 };
+
+static symbol s_0[] = { 'y' };
+static symbol s_1[] = { 'Y' };
+static symbol s_2[] = { 'y' };
+static symbol s_3[] = { 'Y' };
+static symbol s_4[] = { 's', 's' };
+static symbol s_5[] = { 'i', 'e' };
+static symbol s_6[] = { 'i' };
+static symbol s_7[] = { 'e', 'e' };
+static symbol s_8[] = { 'e' };
+static symbol s_9[] = { 'e' };
+static symbol s_10[] = { 'y' };
+static symbol s_11[] = { 'Y' };
+static symbol s_12[] = { 'i' };
+static symbol s_13[] = { 't', 'i', 'o', 'n' };
+static symbol s_14[] = { 'e', 'n', 'c', 'e' };
+static symbol s_15[] = { 'a', 'n', 'c', 'e' };
+static symbol s_16[] = { 'a', 'b', 'l', 'e' };
+static symbol s_17[] = { 'e', 'n', 't' };
+static symbol s_18[] = { 'i', 'z', 'e' };
+static symbol s_19[] = { 'a', 't', 'e' };
+static symbol s_20[] = { 'a', 'l' };
+static symbol s_21[] = { 'f', 'u', 'l' };
+static symbol s_22[] = { 'o', 'u', 's' };
+static symbol s_23[] = { 'i', 'v', 'e' };
+static symbol s_24[] = { 'b', 'l', 'e' };
+static symbol s_25[] = { 'l' };
+static symbol s_26[] = { 'o', 'g' };
+static symbol s_27[] = { 'f', 'u', 'l' };
+static symbol s_28[] = { 'l', 'e', 's', 's' };
+static symbol s_29[] = { 't', 'i', 'o', 'n' };
+static symbol s_30[] = { 'a', 't', 'e' };
+static symbol s_31[] = { 'a', 'l' };
+static symbol s_32[] = { 'i', 'c' };
+static symbol s_33[] = { 's' };
+static symbol s_34[] = { 't' };
+static symbol s_35[] = { 'l' };
+static symbol s_36[] = { 's', 'k', 'i' };
+static symbol s_37[] = { 's', 'k', 'y' };
+static symbol s_38[] = { 'd', 'i', 'e' };
+static symbol s_39[] = { 'l', 'i', 'e' };
+static symbol s_40[] = { 't', 'i', 'e' };
+static symbol s_41[] = { 'i', 'd', 'l' };
+static symbol s_42[] = { 'g', 'e', 'n', 't', 'l' };
+static symbol s_43[] = { 'u', 'g', 'l', 'i' };
+static symbol s_44[] = { 'e', 'a', 'r', 'l', 'i' };
+static symbol s_45[] = { 'o', 'n', 'l', 'i' };
+static symbol s_46[] = { 's', 'i', 'n', 'g', 'l' };
+static symbol s_47[] = { 'Y' };
+static symbol s_48[] = { 'y' };
+
+static int r_prelude(struct SN_env * z) {
+    z->B[0] = 0; /* unset Y_found, line 24 */
+    {   int c = z->c; /* do, line 25 */
+        z->bra = z->c; /* [, line 25 */
+        if (!(eq_s(z, 1, s_0))) goto lab0;
+        z->ket = z->c; /* ], line 25 */
+        if (!(in_grouping(z, g_v, 97, 121))) goto lab0;
+        slice_from_s(z, 1, s_1); /* <-, line 25 */
+        z->B[0] = 1; /* set Y_found, line 25 */
+    lab0:
+        z->c = c;
+    }
+    {   int c = z->c; /* do, line 26 */
+        while(1) { /* repeat, line 26 */
+            int c = z->c;
+            while(1) { /* goto, line 26 */
+                int c = z->c;
+                if (!(in_grouping(z, g_v, 97, 121))) goto lab3;
+                z->bra = z->c; /* [, line 26 */
+                if (!(eq_s(z, 1, s_2))) goto lab3;
+                z->ket = z->c; /* ], line 26 */
+                z->c = c;
+                break;
+            lab3:
+                z->c = c;
+                if (z->c >= z->l) goto lab2;
+                z->c++;
+            }
+            slice_from_s(z, 1, s_3); /* <-, line 26 */
+            z->B[0] = 1; /* set Y_found, line 26 */
+            continue;
+        lab2:
+            z->c = c;
+            break;
+        }
+    lab1:
+        z->c = c;
+    }
+    return 1;
+}
+
+static int r_mark_regions(struct SN_env * z) {
+    z->I[0] = z->l;
+    z->I[1] = z->l;
+    {   int c = z->c; /* do, line 32 */
+        {   int c = z->c; /* or, line 36 */
+            if (!(find_among(z, a_0, 1))) goto lab2; /* among, line 33 */
+            goto lab1;
+        lab2:
+            z->c = c;
+            while(1) { /* gopast, line 36 */
+                if (!(in_grouping(z, g_v, 97, 121))) goto lab3;
+                break;
+            lab3:
+                if (z->c >= z->l) goto lab0;
+                z->c++;
+            }
+            while(1) { /* gopast, line 36 */
+                if (!(out_grouping(z, g_v, 97, 121))) goto lab4;
+                break;
+            lab4:
+                if (z->c >= z->l) goto lab0;
+                z->c++;
+            }
+        }
+    lab1:
+        z->I[0] = z->c; /* setmark p1, line 37 */
+        while(1) { /* gopast, line 38 */
+            if (!(in_grouping(z, g_v, 97, 121))) goto lab5;
+            break;
+        lab5:
+            if (z->c >= z->l) goto lab0;
+            z->c++;
+        }
+        while(1) { /* gopast, line 38 */
+            if (!(out_grouping(z, g_v, 97, 121))) goto lab6;
+            break;
+        lab6:
+            if (z->c >= z->l) goto lab0;
+            z->c++;
+        }
+        z->I[1] = z->c; /* setmark p2, line 38 */
+    lab0:
+        z->c = c;
+    }
+    return 1;
+}
+
+static int r_shortv(struct SN_env * z) {
+    {   int m = z->l - z->c; /* or, line 46 */
+        if (!(out_grouping_b(z, g_v_WXY, 89, 121))) goto lab1;
+        if (!(in_grouping_b(z, g_v, 97, 121))) goto lab1;
+        if (!(out_grouping_b(z, g_v, 97, 121))) goto lab1;
+        goto lab0;
+    lab1:
+        z->c = z->l - m;
+        if (!(out_grouping_b(z, g_v, 97, 121))) return 0;
+        if (!(in_grouping_b(z, g_v, 97, 121))) return 0;
+        if (z->c > z->lb) return 0; /* atlimit, line 47 */
+    }
+lab0:
+    return 1;
+}
+
+static int r_R1(struct SN_env * z) {
+    if (!(z->I[0] <= z->c)) return 0;
+    return 1;
+}
+
+static int r_R2(struct SN_env * z) {
+    if (!(z->I[1] <= z->c)) return 0;
+    return 1;
+}
+
+static int r_Step_1a(struct SN_env * z) {
+    int among_var;
+    z->ket = z->c; /* [, line 54 */
+    among_var = find_among_b(z, a_1, 6); /* substring, line 54 */
+    if (!(among_var)) return 0;
+    z->bra = z->c; /* ], line 54 */
+    switch(among_var) {
+        case 0: return 0;
+        case 1:
+            slice_from_s(z, 2, s_4); /* <-, line 55 */
+            break;
+        case 2:
+            {   int m = z->l - z->c; /* or, line 57 */
+                if (z->c <= z->lb) goto lab1;
+                z->c--; /* next, line 57 */
+                if (z->c > z->lb) goto lab1; /* atlimit, line 57 */
+                slice_from_s(z, 2, s_5); /* <-, line 57 */
+                goto lab0;
+            lab1:
+                z->c = z->l - m;
+                slice_from_s(z, 1, s_6); /* <-, line 57 */
+            }
+        lab0:
+            break;
+        case 3:
+            if (z->c <= z->lb) return 0;
+            z->c--; /* next, line 58 */
+            while(1) { /* gopast, line 58 */
+                if (!(in_grouping_b(z, g_v, 97, 121))) goto lab2;
+                break;
+            lab2:
+                if (z->c <= z->lb) return 0;
+                z->c--;
+            }
+            slice_del(z); /* delete, line 58 */
+            break;
+    }
+    return 1;
+}
+
+static int r_Step_1b(struct SN_env * z) {
+    int among_var;
+    z->ket = z->c; /* [, line 64 */
+    among_var = find_among_b(z, a_3, 6); /* substring, line 64 */
+    if (!(among_var)) return 0;
+    z->bra = z->c; /* ], line 64 */
+    switch(among_var) {
+        case 0: return 0;
+        case 1:
+            if (!r_R1(z)) return 0; /* call R1, line 66 */
+            slice_from_s(z, 2, s_7); /* <-, line 66 */
+            break;
+        case 2:
+            {   int m_test = z->l - z->c; /* test, line 69 */
+                while(1) { /* gopast, line 69 */
+                    if (!(in_grouping_b(z, g_v, 97, 121))) goto lab0;
+                    break;
+                lab0:
+                    if (z->c <= z->lb) return 0;
+                    z->c--;
+                }
+                z->c = z->l - m_test;
+            }
+            slice_del(z); /* delete, line 69 */
+            {   int m_test = z->l - z->c; /* test, line 70 */
+                among_var = find_among_b(z, a_2, 13); /* substring, line 70 */
+                if (!(among_var)) return 0;
+                z->c = z->l - m_test;
+            }
+            switch(among_var) {
+                case 0: return 0;
+                case 1:
+                    {   int c = z->c;
+                        insert_s(z, z->c, z->c, 1, s_8); /* <+, line 72 */
+                        z->c = c;
+                    }
+                    break;
+                case 2:
+                    z->ket = z->c; /* [, line 75 */
+                    if (z->c <= z->lb) return 0;
+                    z->c--; /* next, line 75 */
+                    z->bra = z->c; /* ], line 75 */
+                    slice_del(z); /* delete, line 75 */
+                    break;
+                case 3:
+                    if (z->c != z->I[0]) return 0; /* atmark, line 76 */
+                    {   int m_test = z->l - z->c; /* test, line 76 */
+                        if (!r_shortv(z)) return 0; /* call shortv, line 76 */
+                        z->c = z->l - m_test;
+                    }
+                    {   int c = z->c;
+                        insert_s(z, z->c, z->c, 1, s_9); /* <+, line 76 */
+                        z->c = c;
+                    }
+                    break;
+            }
+            break;
+    }
+    return 1;
+}
+
+static int r_Step_1c(struct SN_env * z) {
+    z->ket = z->c; /* [, line 83 */
+    {   int m = z->l - z->c; /* or, line 83 */
+        if (!(eq_s_b(z, 1, s_10))) goto lab1;
+        goto lab0;
+    lab1:
+        z->c = z->l - m;
+        if (!(eq_s_b(z, 1, s_11))) return 0;
+    }
+lab0:
+    z->bra = z->c; /* ], line 83 */
+    if (!(out_grouping_b(z, g_v, 97, 121))) return 0;
+    {   int m = z->l - z->c; /* not, line 84 */
+        if (z->c > z->lb) goto lab2; /* atlimit, line 84 */
+        return 0;
+    lab2:
+        z->c = z->l - m;
+    }
+    slice_from_s(z, 1, s_12); /* <-, line 85 */
+    return 1;
+}
+
+static int r_Step_2(struct SN_env * z) {
+    int among_var;
+    z->ket = z->c; /* [, line 89 */
+    among_var = find_among_b(z, a_4, 24); /* substring, line 89 */
+    if (!(among_var)) return 0;
+    z->bra = z->c; /* ], line 89 */
+    if (!r_R1(z)) return 0; /* call R1, line 89 */
+    switch(among_var) {
+        case 0: return 0;
+        case 1:
+            slice_from_s(z, 4, s_13); /* <-, line 90 */
+            break;
+        case 2:
+            slice_from_s(z, 4, s_14); /* <-, line 91 */
+            break;
+        case 3:
+            slice_from_s(z, 4, s_15); /* <-, line 92 */
+            break;
+        case 4:
+            slice_from_s(z, 4, s_16); /* <-, line 93 */
+            break;
+        case 5:
+            slice_from_s(z, 3, s_17); /* <-, line 94 */
+            break;
+        case 6:
+            slice_from_s(z, 3, s_18); /* <-, line 96 */
+            break;
+        case 7:
+            slice_from_s(z, 3, s_19); /* <-, line 98 */
+            break;
+        case 8:
+            slice_from_s(z, 2, s_20); /* <-, line 100 */
+            break;
+        case 9:
+            slice_from_s(z, 3, s_21); /* <-, line 101 */
+            break;
+        case 10:
+            slice_from_s(z, 3, s_22); /* <-, line 103 */
+            break;
+        case 11:
+            slice_from_s(z, 3, s_23); /* <-, line 105 */
+            break;
+        case 12:
+            slice_from_s(z, 3, s_24); /* <-, line 107 */
+            break;
+        case 13:
+            if (!(eq_s_b(z, 1, s_25))) return 0;
+            slice_from_s(z, 2, s_26); /* <-, line 108 */
+            break;
+        case 14:
+            slice_from_s(z, 3, s_27); /* <-, line 109 */
+            break;
+        case 15:
+            slice_from_s(z, 4, s_28); /* <-, line 110 */
+            break;
+        case 16:
+            if (!(in_grouping_b(z, g_valid_LI, 99, 116))) return 0;
+            slice_del(z); /* delete, line 111 */
+            break;
+    }
+    return 1;
+}
+
+static int r_Step_3(struct SN_env * z) {
+    int among_var;
+    z->ket = z->c; /* [, line 116 */
+    among_var = find_among_b(z, a_5, 9); /* substring, line 116 */
+    if (!(among_var)) return 0;
+    z->bra = z->c; /* ], line 116 */
+    if (!r_R1(z)) return 0; /* call R1, line 116 */
+    switch(among_var) {
+        case 0: return 0;
+        case 1:
+            slice_from_s(z, 4, s_29); /* <-, line 117 */
+            break;
+        case 2:
+            slice_from_s(z, 3, s_30); /* <-, line 118 */
+            break;
+        case 3:
+            slice_from_s(z, 2, s_31); /* <-, line 119 */
+            break;
+        case 4:
+            slice_from_s(z, 2, s_32); /* <-, line 121 */
+            break;
+        case 5:
+            slice_del(z); /* delete, line 123 */
+            break;
+        case 6:
+            if (!r_R2(z)) return 0; /* call R2, line 125 */
+            slice_del(z); /* delete, line 125 */
+            break;
+    }
+    return 1;
+}
+
+static int r_Step_4(struct SN_env * z) {
+    int among_var;
+    z->ket = z->c; /* [, line 130 */
+    among_var = find_among_b(z, a_6, 18); /* substring, line 130 */
+    if (!(among_var)) return 0;
+    z->bra = z->c; /* ], line 130 */
+    if (!r_R2(z)) return 0; /* call R2, line 130 */
+    switch(among_var) {
+        case 0: return 0;
+        case 1:
+            slice_del(z); /* delete, line 133 */
+            break;
+        case 2:
+            {   int m = z->l - z->c; /* or, line 134 */
+                if (!(eq_s_b(z, 1, s_33))) goto lab1;
+                goto lab0;
+            lab1:
+                z->c = z->l - m;
+                if (!(eq_s_b(z, 1, s_34))) return 0;
+            }
+        lab0:
+            slice_del(z); /* delete, line 134 */
+            break;
+    }
+    return 1;
+}
+
+static int r_Step_5(struct SN_env * z) {
+    int among_var;
+    z->ket = z->c; /* [, line 139 */
+    among_var = find_among_b(z, a_7, 2); /* substring, line 139 */
+    if (!(among_var)) return 0;
+    z->bra = z->c; /* ], line 139 */
+    switch(among_var) {
+        case 0: return 0;
+        case 1:
+            {   int m = z->l - z->c; /* or, line 140 */
+                if (!r_R2(z)) goto lab1; /* call R2, line 140 */
+                goto lab0;
+            lab1:
+                z->c = z->l - m;
+                if (!r_R1(z)) return 0; /* call R1, line 140 */
+                {   int m = z->l - z->c; /* not, line 140 */
+                    if (!r_shortv(z)) goto lab2; /* call shortv, line 140 */
+                    return 0;
+                lab2:
+                    z->c = z->l - m;
+                }
+            }
+        lab0:
+            slice_del(z); /* delete, line 140 */
+            break;
+        case 2:
+            if (!r_R2(z)) return 0; /* call R2, line 141 */
+            if (!(eq_s_b(z, 1, s_35))) return 0;
+            slice_del(z); /* delete, line 141 */
+            break;
+    }
+    return 1;
+}
+
+static int r_exception2(struct SN_env * z) {
+    z->ket = z->c; /* [, line 147 */
+    if (!(find_among_b(z, a_8, 8))) return 0; /* substring, line 147 */
+    z->bra = z->c; /* ], line 147 */
+    if (z->c > z->lb) return 0; /* atlimit, line 147 */
+    return 1;
+}
+
+static int r_exception1(struct SN_env * z) {
+    int among_var;
+    z->bra = z->c; /* [, line 159 */
+    among_var = find_among(z, a_9, 18); /* substring, line 159 */
+    if (!(among_var)) return 0;
+    z->ket = z->c; /* ], line 159 */
+    if (z->c < z->l) return 0; /* atlimit, line 159 */
+    switch(among_var) {
+        case 0: return 0;
+        case 1:
+            slice_from_s(z, 3, s_36); /* <-, line 163 */
+            break;
+        case 2:
+            slice_from_s(z, 3, s_37); /* <-, line 164 */
+            break;
+        case 3:
+            slice_from_s(z, 3, s_38); /* <-, line 165 */
+            break;
+        case 4:
+            slice_from_s(z, 3, s_39); /* <-, line 166 */
+            break;
+        case 5:
+            slice_from_s(z, 3, s_40); /* <-, line 167 */
+            break;
+        case 6:
+            slice_from_s(z, 3, s_41); /* <-, line 171 */
+            break;
+        case 7:
+            slice_from_s(z, 5, s_42); /* <-, line 172 */
+            break;
+        case 8:
+            slice_from_s(z, 4, s_43); /* <-, line 173 */
+            break;
+        case 9:
+            slice_from_s(z, 5, s_44); /* <-, line 174 */
+            break;
+        case 10:
+            slice_from_s(z, 4, s_45); /* <-, line 175 */
+            break;
+        case 11:
+            slice_from_s(z, 5, s_46); /* <-, line 176 */
+            break;
+    }
+    return 1;
+}
+
+static int r_postlude(struct SN_env * z) {
+    if (!(z->B[0])) return 0; /* Boolean test Y_found, line 192 */
+    while(1) { /* repeat, line 192 */
+        int c = z->c;
+        while(1) { /* goto, line 192 */
+            int c = z->c;
+            z->bra = z->c; /* [, line 192 */
+            if (!(eq_s(z, 1, s_47))) goto lab1;
+            z->ket = z->c; /* ], line 192 */
+            z->c = c;
+            break;
+        lab1:
+            z->c = c;
+            if (z->c >= z->l) goto lab0;
+            z->c++;
+        }
+        slice_from_s(z, 1, s_48); /* <-, line 192 */
+        continue;
+    lab0:
+        z->c = c;
+        break;
+    }
+    return 1;
+}
+
+extern int english_stem(struct SN_env * z) {
+    {   int c = z->c; /* or, line 196 */
+        if (!r_exception1(z)) goto lab1; /* call exception1, line 196 */
+        goto lab0;
+    lab1:
+        z->c = c;
+        {   int c_test = z->c; /* test, line 198 */
+            {   int c = z->c + 3;
+                if (0 > c || c > z->l) return 0;
+                z->c = c; /* hop, line 198 */
+            }
+            z->c = c_test;
+        }
+        {   int c = z->c; /* do, line 199 */
+            if (!r_prelude(z)) goto lab2; /* call prelude, line 199 */
+        lab2:
+            z->c = c;
+        }
+        {   int c = z->c; /* do, line 200 */
+            if (!r_mark_regions(z)) goto lab3; /* call mark_regions, line 200 */
+        lab3:
+            z->c = c;
+        }
+        z->lb = z->c; z->c = z->l; /* backwards, line 201 */
+
+        {   int m = z->l - z->c; /* do, line 203 */
+            if (!r_Step_1a(z)) goto lab4; /* call Step_1a, line 203 */
+        lab4:
+            z->c = z->l - m;
+        }
+        {   int m = z->l - z->c; /* or, line 205 */
+            if (!r_exception2(z)) goto lab6; /* call exception2, line 205 */
+            goto lab5;
+        lab6:
+            z->c = z->l - m;
+            {   int m = z->l - z->c; /* do, line 207 */
+                if (!r_Step_1b(z)) goto lab7; /* call Step_1b, line 207 */
+            lab7:
+                z->c = z->l - m;
+            }
+            {   int m = z->l - z->c; /* do, line 208 */
+                if (!r_Step_1c(z)) goto lab8; /* call Step_1c, line 208 */
+            lab8:
+                z->c = z->l - m;
+            }
+            {   int m = z->l - z->c; /* do, line 210 */
+                if (!r_Step_2(z)) goto lab9; /* call Step_2, line 210 */
+            lab9:
+                z->c = z->l - m;
+            }
+            {   int m = z->l - z->c; /* do, line 211 */
+                if (!r_Step_3(z)) goto lab10; /* call Step_3, line 211 */
+            lab10:
+                z->c = z->l - m;
+            }
+            {   int m = z->l - z->c; /* do, line 212 */
+                if (!r_Step_4(z)) goto lab11; /* call Step_4, line 212 */
+            lab11:
+                z->c = z->l - m;
+            }
+            {   int m = z->l - z->c; /* do, line 214 */
+                if (!r_Step_5(z)) goto lab12; /* call Step_5, line 214 */
+            lab12:
+                z->c = z->l - m;
+            }
+        }
+    lab5:
+        z->c = z->lb;
+        {   int c = z->c; /* do, line 217 */
+            if (!r_postlude(z)) goto lab13; /* call postlude, line 217 */
+        lab13:
+            z->c = c;
+        }
+    }
+lab0:
+    return 1;
+}
+
+extern struct SN_env * english_create_env(void) { return SN_create_env(0, 2, 1); }
+
+extern void english_close_env(struct SN_env * z) { SN_close_env(z); }
+


diff --git a/contrib/tsearch2/snowball/english_stem.h b/contrib/tsearch2/snowball/english_stem.h

new file mode 100644 (file)

index 0000000..bfefcd5


--- /dev/null
+++ b/contrib/tsearch2/snowball/english_stem.h
@@ -0,0 +1,8 @@
+
+/* This file was generated automatically by the Snowball to ANSI C compiler */
+
+extern struct SN_env * english_create_env(void);
+extern void english_close_env(struct SN_env * z);
+
+extern int english_stem(struct SN_env * z);
+


diff --git a/contrib/tsearch2/snowball/header.h b/contrib/tsearch2/snowball/header.h

new file mode 100644 (file)

index 0000000..aaec3ae


--- /dev/null
+++ b/contrib/tsearch2/snowball/header.h
@@ -0,0 +1,57 @@
+
+#include 
+
+#include "api.h"
+
+#define MAXINT INT_MAX
+#define MININT INT_MIN
+
+#define HEAD 2*sizeof(int)
+
+#define SIZE(p)        ((int *)(p))[-1]
+#define SET_SIZE(p, n) ((int *)(p))[-1] = n
+#define CAPACITY(p)    ((int *)(p))[-2]
+
+struct among
+{   int s_size;     /* number of chars in string */
+    symbol * s;       /* search string */
+    int substring_i;/* index to longest matching substring */
+    int result;     /* result of the lookup */
+    int (* function)(struct SN_env *);
+};
+
+extern symbol * create_s(void);
+extern void lose_s(symbol * p);
+
+extern int in_grouping(struct SN_env * z, unsigned char * s, int min, int max);
+extern int in_grouping_b(struct SN_env * z, unsigned char * s, int min, int max);
+extern int out_grouping(struct SN_env * z, unsigned char * s, int min, int max);
+extern int out_grouping_b(struct SN_env * z, unsigned char * s, int min, int max);
+
+extern int in_range(struct SN_env * z, int min, int max);
+extern int in_range_b(struct SN_env * z, int min, int max);
+extern int out_range(struct SN_env * z, int min, int max);
+extern int out_range_b(struct SN_env * z, int min, int max);
+
+extern int eq_s(struct SN_env * z, int s_size, symbol * s);
+extern int eq_s_b(struct SN_env * z, int s_size, symbol * s);
+extern int eq_v(struct SN_env * z, symbol * p);
+extern int eq_v_b(struct SN_env * z, symbol * p);
+
+extern int find_among(struct SN_env * z, struct among * v, int v_size);
+extern int find_among_b(struct SN_env * z, struct among * v, int v_size);
+
+extern symbol * increase_size(symbol * p, int n);
+extern int replace_s(struct SN_env * z, int c_bra, int c_ket, int s_size, const symbol * s);
+extern void slice_from_s(struct SN_env * z, int s_size, symbol * s);
+extern void slice_from_v(struct SN_env * z, symbol * p);
+extern void slice_del(struct SN_env * z);
+
+extern void insert_s(struct SN_env * z, int bra, int ket, int s_size, symbol * s);
+extern void insert_v(struct SN_env * z, int bra, int ket, symbol * p);
+
+extern symbol * slice_to(struct SN_env * z, symbol * p);
+extern symbol * assign_to(struct SN_env * z, symbol * p);
+
+extern void debug(struct SN_env * z, int number, int line_count);
+


diff --git a/contrib/tsearch2/snowball/russian_stem.c b/contrib/tsearch2/snowball/russian_stem.c

new file mode 100644 (file)

index 0000000..14fd491


--- /dev/null
+++ b/contrib/tsearch2/snowball/russian_stem.c
@@ -0,0 +1,626 @@
+
+/* This file was generated automatically by the Snowball to ANSI C compiler */
+
+#include "header.h"
+
+extern int russian_stem(struct SN_env * z);
+static int r_tidy_up(struct SN_env * z);
+static int r_derivational(struct SN_env * z);
+static int r_noun(struct SN_env * z);
+static int r_verb(struct SN_env * z);
+static int r_reflexive(struct SN_env * z);
+static int r_adjectival(struct SN_env * z);
+static int r_adjective(struct SN_env * z);
+static int r_perfective_gerund(struct SN_env * z);
+static int r_R2(struct SN_env * z);
+static int r_mark_regions(struct SN_env * z);
+
+extern struct SN_env * russian_create_env(void);
+extern void russian_close_env(struct SN_env * z);
+
+static symbol s_0_0[3] = { 215, 219, 201 };
+static symbol s_0_1[4] = { 201, 215, 219, 201 };
+static symbol s_0_2[4] = { 217, 215, 219, 201 };
+static symbol s_0_3[1] = { 215 };
+static symbol s_0_4[2] = { 201, 215 };
+static symbol s_0_5[2] = { 217, 215 };
+static symbol s_0_6[5] = { 215, 219, 201, 211, 216 };
+static symbol s_0_7[6] = { 201, 215, 219, 201, 211, 216 };
+static symbol s_0_8[6] = { 217, 215, 219, 201, 211, 216 };
+
+static struct among a_0[9] =
+{
+/*  0 */ { 3, s_0_0, -1, 1, 0},
+/*  1 */ { 4, s_0_1, 0, 2, 0},
+/*  2 */ { 4, s_0_2, 0, 2, 0},
+/*  3 */ { 1, s_0_3, -1, 1, 0},
+/*  4 */ { 2, s_0_4, 3, 2, 0},
+/*  5 */ { 2, s_0_5, 3, 2, 0},
+/*  6 */ { 5, s_0_6, -1, 1, 0},
+/*  7 */ { 6, s_0_7, 6, 2, 0},
+/*  8 */ { 6, s_0_8, 6, 2, 0}
+};
+
+static symbol s_1_0[2] = { 192, 192 };
+static symbol s_1_1[2] = { 197, 192 };
+static symbol s_1_2[2] = { 207, 192 };
+static symbol s_1_3[2] = { 213, 192 };
+static symbol s_1_4[2] = { 197, 197 };
+static symbol s_1_5[2] = { 201, 197 };
+static symbol s_1_6[2] = { 207, 197 };
+static symbol s_1_7[2] = { 217, 197 };
+static symbol s_1_8[2] = { 201, 200 };
+static symbol s_1_9[2] = { 217, 200 };
+static symbol s_1_10[3] = { 201, 205, 201 };
+static symbol s_1_11[3] = { 217, 205, 201 };
+static symbol s_1_12[2] = { 197, 202 };
+static symbol s_1_13[2] = { 201, 202 };
+static symbol s_1_14[2] = { 207, 202 };
+static symbol s_1_15[2] = { 217, 202 };
+static symbol s_1_16[2] = { 197, 205 };
+static symbol s_1_17[2] = { 201, 205 };
+static symbol s_1_18[2] = { 207, 205 };
+static symbol s_1_19[2] = { 217, 205 };
+static symbol s_1_20[3] = { 197, 199, 207 };
+static symbol s_1_21[3] = { 207, 199, 207 };
+static symbol s_1_22[2] = { 193, 209 };
+static symbol s_1_23[2] = { 209, 209 };
+static symbol s_1_24[3] = { 197, 205, 213 };
+static symbol s_1_25[3] = { 207, 205, 213 };
+
+static struct among a_1[26] =
+{
+/*  0 */ { 2, s_1_0, -1, 1, 0},
+/*  1 */ { 2, s_1_1, -1, 1, 0},
+/*  2 */ { 2, s_1_2, -1, 1, 0},
+/*  3 */ { 2, s_1_3, -1, 1, 0},
+/*  4 */ { 2, s_1_4, -1, 1, 0},
+/*  5 */ { 2, s_1_5, -1, 1, 0},
+/*  6 */ { 2, s_1_6, -1, 1, 0},
+/*  7 */ { 2, s_1_7, -1, 1, 0},
+/*  8 */ { 2, s_1_8, -1, 1, 0},
+/*  9 */ { 2, s_1_9, -1, 1, 0},
+/* 10 */ { 3, s_1_10, -1, 1, 0},
+/* 11 */ { 3, s_1_11, -1, 1, 0},
+/* 12 */ { 2, s_1_12, -1, 1, 0},
+/* 13 */ { 2, s_1_13, -1, 1, 0},
+/* 14 */ { 2, s_1_14, -1, 1, 0},
+/* 15 */ { 2, s_1_15, -1, 1, 0},
+/* 16 */ { 2, s_1_16, -1, 1, 0},
+/* 17 */ { 2, s_1_17, -1, 1, 0},
+/* 18 */ { 2, s_1_18, -1, 1, 0},
+/* 19 */ { 2, s_1_19, -1, 1, 0},
+/* 20 */ { 3, s_1_20, -1, 1, 0},
+/* 21 */ { 3, s_1_21, -1, 1, 0},
+/* 22 */ { 2, s_1_22, -1, 1, 0},
+/* 23 */ { 2, s_1_23, -1, 1, 0},
+/* 24 */ { 3, s_1_24, -1, 1, 0},
+/* 25 */ { 3, s_1_25, -1, 1, 0}
+};
+
+static symbol s_2_0[2] = { 197, 205 };
+static symbol s_2_1[2] = { 206, 206 };
+static symbol s_2_2[2] = { 215, 219 };
+static symbol s_2_3[3] = { 201, 215, 219 };
+static symbol s_2_4[3] = { 217, 215, 219 };
+static symbol s_2_5[1] = { 221 };
+static symbol s_2_6[2] = { 192, 221 };
+static symbol s_2_7[3] = { 213, 192, 221 };
+
+static struct among a_2[8] =
+{
+/*  0 */ { 2, s_2_0, -1, 1, 0},
+/*  1 */ { 2, s_2_1, -1, 1, 0},
+/*  2 */ { 2, s_2_2, -1, 1, 0},
+/*  3 */ { 3, s_2_3, 2, 2, 0},
+/*  4 */ { 3, s_2_4, 2, 2, 0},
+/*  5 */ { 1, s_2_5, -1, 1, 0},
+/*  6 */ { 2, s_2_6, 5, 1, 0},
+/*  7 */ { 3, s_2_7, 6, 2, 0}
+};
+
+static symbol s_3_0[2] = { 211, 209 };
+static symbol s_3_1[2] = { 211, 216 };
+
+static struct among a_3[2] =
+{
+/*  0 */ { 2, s_3_0, -1, 1, 0},
+/*  1 */ { 2, s_3_1, -1, 1, 0}
+};
+
+static symbol s_4_0[1] = { 192 };
+static symbol s_4_1[2] = { 213, 192 };
+static symbol s_4_2[2] = { 204, 193 };
+static symbol s_4_3[3] = { 201, 204, 193 };
+static symbol s_4_4[3] = { 217, 204, 193 };
+static symbol s_4_5[2] = { 206, 193 };
+static symbol s_4_6[3] = { 197, 206, 193 };
+static symbol s_4_7[3] = { 197, 212, 197 };
+static symbol s_4_8[3] = { 201, 212, 197 };
+static symbol s_4_9[3] = { 202, 212, 197 };
+static symbol s_4_10[4] = { 197, 202, 212, 197 };
+static symbol s_4_11[4] = { 213, 202, 212, 197 };
+static symbol s_4_12[2] = { 204, 201 };
+static symbol s_4_13[3] = { 201, 204, 201 };
+static symbol s_4_14[3] = { 217, 204, 201 };
+static symbol s_4_15[1] = { 202 };
+static symbol s_4_16[2] = { 197, 202 };
+static symbol s_4_17[2] = { 213, 202 };
+static symbol s_4_18[1] = { 204 };
+static symbol s_4_19[2] = { 201, 204 };
+static symbol s_4_20[2] = { 217, 204 };
+static symbol s_4_21[2] = { 197, 205 };
+static symbol s_4_22[2] = { 201, 205 };
+static symbol s_4_23[2] = { 217, 205 };
+static symbol s_4_24[1] = { 206 };
+static symbol s_4_25[2] = { 197, 206 };
+static symbol s_4_26[2] = { 204, 207 };
+static symbol s_4_27[3] = { 201, 204, 207 };
+static symbol s_4_28[3] = { 217, 204, 207 };
+static symbol s_4_29[2] = { 206, 207 };
+static symbol s_4_30[3] = { 197, 206, 207 };
+static symbol s_4_31[3] = { 206, 206, 207 };
+static symbol s_4_32[2] = { 192, 212 };
+static symbol s_4_33[3] = { 213, 192, 212 };
+static symbol s_4_34[2] = { 197, 212 };
+static symbol s_4_35[3] = { 213, 197, 212 };
+static symbol s_4_36[2] = { 201, 212 };
+static symbol s_4_37[2] = { 209, 212 };
+static symbol s_4_38[2] = { 217, 212 };
+static symbol s_4_39[2] = { 212, 216 };
+static symbol s_4_40[3] = { 201, 212, 216 };
+static symbol s_4_41[3] = { 217, 212, 216 };
+static symbol s_4_42[3] = { 197, 219, 216 };
+static symbol s_4_43[3] = { 201, 219, 216 };
+static symbol s_4_44[2] = { 206, 217 };
+static symbol s_4_45[3] = { 197, 206, 217 };
+
+static struct among a_4[46] =
+{
+/*  0 */ { 1, s_4_0, -1, 2, 0},
+/*  1 */ { 2, s_4_1, 0, 2, 0},
+/*  2 */ { 2, s_4_2, -1, 1, 0},
+/*  3 */ { 3, s_4_3, 2, 2, 0},
+/*  4 */ { 3, s_4_4, 2, 2, 0},
+/*  5 */ { 2, s_4_5, -1, 1, 0},
+/*  6 */ { 3, s_4_6, 5, 2, 0},
+/*  7 */ { 3, s_4_7, -1, 1, 0},
+/*  8 */ { 3, s_4_8, -1, 2, 0},
+/*  9 */ { 3, s_4_9, -1, 1, 0},
+/* 10 */ { 4, s_4_10, 9, 2, 0},
+/* 11 */ { 4, s_4_11, 9, 2, 0},
+/* 12 */ { 2, s_4_12, -1, 1, 0},
+/* 13 */ { 3, s_4_13, 12, 2, 0},
+/* 14 */ { 3, s_4_14, 12, 2, 0},
+/* 15 */ { 1, s_4_15, -1, 1, 0},
+/* 16 */ { 2, s_4_16, 15, 2, 0},
+/* 17 */ { 2, s_4_17, 15, 2, 0},
+/* 18 */ { 1, s_4_18, -1, 1, 0},
+/* 19 */ { 2, s_4_19, 18, 2, 0},
+/* 20 */ { 2, s_4_20, 18, 2, 0},
+/* 21 */ { 2, s_4_21, -1, 1, 0},
+/* 22 */ { 2, s_4_22, -1, 2, 0},
+/* 23 */ { 2, s_4_23, -1, 2, 0},
+/* 24 */ { 1, s_4_24, -1, 1, 0},
+/* 25 */ { 2, s_4_25, 24, 2, 0},
+/* 26 */ { 2, s_4_26, -1, 1, 0},
+/* 27 */ { 3, s_4_27, 26, 2, 0},
+/* 28 */ { 3, s_4_28, 26, 2, 0},
+/* 29 */ { 2, s_4_29, -1, 1, 0},
+/* 30 */ { 3, s_4_30, 29, 2, 0},
+/* 31 */ { 3, s_4_31, 29, 1, 0},
+/* 32 */ { 2, s_4_32, -1, 1, 0},
+/* 33 */ { 3, s_4_33, 32, 2, 0},
+/* 34 */ { 2, s_4_34, -1, 1, 0},
+/* 35 */ { 3, s_4_35, 34, 2, 0},
+/* 36 */ { 2, s_4_36, -1, 2, 0},
+/* 37 */ { 2, s_4_37, -1, 2, 0},
+/* 38 */ { 2, s_4_38, -1, 2, 0},
+/* 39 */ { 2, s_4_39, -1, 1, 0},
+/* 40 */ { 3, s_4_40, 39, 2, 0},
+/* 41 */ { 3, s_4_41, 39, 2, 0},
+/* 42 */ { 3, s_4_42, -1, 1, 0},
+/* 43 */ { 3, s_4_43, -1, 2, 0},
+/* 44 */ { 2, s_4_44, -1, 1, 0},
+/* 45 */ { 3, s_4_45, 44, 2, 0}
+};
+
+static symbol s_5_0[1] = { 192 };
+static symbol s_5_1[2] = { 201, 192 };
+static symbol s_5_2[2] = { 216, 192 };
+static symbol s_5_3[1] = { 193 };
+static symbol s_5_4[1] = { 197 };
+static symbol s_5_5[2] = { 201, 197 };
+static symbol s_5_6[2] = { 216, 197 };
+static symbol s_5_7[2] = { 193, 200 };
+static symbol s_5_8[2] = { 209, 200 };
+static symbol s_5_9[3] = { 201, 209, 200 };
+static symbol s_5_10[1] = { 201 };
+static symbol s_5_11[2] = { 197, 201 };
+static symbol s_5_12[2] = { 201, 201 };
+static symbol s_5_13[3] = { 193, 205, 201 };
+static symbol s_5_14[3] = { 209, 205, 201 };
+static symbol s_5_15[4] = { 201, 209, 205, 201 };
+static symbol s_5_16[1] = { 202 };
+static symbol s_5_17[2] = { 197, 202 };
+static symbol s_5_18[3] = { 201, 197, 202 };
+static symbol s_5_19[2] = { 201, 202 };
+static symbol s_5_20[2] = { 207, 202 };
+static symbol s_5_21[2] = { 193, 205 };
+static symbol s_5_22[2] = { 197, 205 };
+static symbol s_5_23[3] = { 201, 197, 205 };
+static symbol s_5_24[2] = { 207, 205 };
+static symbol s_5_25[2] = { 209, 205 };
+static symbol s_5_26[3] = { 201, 209, 205 };
+static symbol s_5_27[1] = { 207 };
+static symbol s_5_28[1] = { 209 };
+static symbol s_5_29[2] = { 201, 209 };
+static symbol s_5_30[2] = { 216, 209 };
+static symbol s_5_31[1] = { 213 };
+static symbol s_5_32[2] = { 197, 215 };
+static symbol s_5_33[2] = { 207, 215 };
+static symbol s_5_34[1] = { 216 };
+static symbol s_5_35[1] = { 217 };
+
+static struct among a_5[36] =
+{
+/*  0 */ { 1, s_5_0, -1, 1, 0},
+/*  1 */ { 2, s_5_1, 0, 1, 0},
+/*  2 */ { 2, s_5_2, 0, 1, 0},
+/*  3 */ { 1, s_5_3, -1, 1, 0},
+/*  4 */ { 1, s_5_4, -1, 1, 0},
+/*  5 */ { 2, s_5_5, 4, 1, 0},
+/*  6 */ { 2, s_5_6, 4, 1, 0},
+/*  7 */ { 2, s_5_7, -1, 1, 0},
+/*  8 */ { 2, s_5_8, -1, 1, 0},
+/*  9 */ { 3, s_5_9, 8, 1, 0},
+/* 10 */ { 1, s_5_10, -1, 1, 0},
+/* 11 */ { 2, s_5_11, 10, 1, 0},
+/* 12 */ { 2, s_5_12, 10, 1, 0},
+/* 13 */ { 3, s_5_13, 10, 1, 0},
+/* 14 */ { 3, s_5_14, 10, 1, 0},
+/* 15 */ { 4, s_5_15, 14, 1, 0},
+/* 16 */ { 1, s_5_16, -1, 1, 0},
+/* 17 */ { 2, s_5_17, 16, 1, 0},
+/* 18 */ { 3, s_5_18, 17, 1, 0},
+/* 19 */ { 2, s_5_19, 16, 1, 0},
+/* 20 */ { 2, s_5_20, 16, 1, 0},
+/* 21 */ { 2, s_5_21, -1, 1, 0},
+/* 22 */ { 2, s_5_22, -1, 1, 0},
+/* 23 */ { 3, s_5_23, 22, 1, 0},
+/* 24 */ { 2, s_5_24, -1, 1, 0},
+/* 25 */ { 2, s_5_25, -1, 1, 0},
+/* 26 */ { 3, s_5_26, 25, 1, 0},
+/* 27 */ { 1, s_5_27, -1, 1, 0},
+/* 28 */ { 1, s_5_28, -1, 1, 0},
+/* 29 */ { 2, s_5_29, 28, 1, 0},
+/* 30 */ { 2, s_5_30, 28, 1, 0},
+/* 31 */ { 1, s_5_31, -1, 1, 0},
+/* 32 */ { 2, s_5_32, -1, 1, 0},
+/* 33 */ { 2, s_5_33, -1, 1, 0},
+/* 34 */ { 1, s_5_34, -1, 1, 0},
+/* 35 */ { 1, s_5_35, -1, 1, 0}
+};
+
+static symbol s_6_0[3] = { 207, 211, 212 };
+static symbol s_6_1[4] = { 207, 211, 212, 216 };
+
+static struct among a_6[2] =
+{
+/*  0 */ { 3, s_6_0, -1, 1, 0},
+/*  1 */ { 4, s_6_1, -1, 1, 0}
+};
+
+static symbol s_7_0[4] = { 197, 202, 219, 197 };
+static symbol s_7_1[1] = { 206 };
+static symbol s_7_2[1] = { 216 };
+static symbol s_7_3[3] = { 197, 202, 219 };
+
+static struct among a_7[4] =
+{
+/*  0 */ { 4, s_7_0, -1, 1, 0},
+/*  1 */ { 1, s_7_1, -1, 2, 0},
+/*  2 */ { 1, s_7_2, -1, 3, 0},
+/*  3 */ { 3, s_7_3, -1, 1, 0}
+};
+
+static unsigned char g_v[] = { 35, 130, 34, 18 };
+
+static symbol s_0[] = { 193 };
+static symbol s_1[] = { 209 };
+static symbol s_2[] = { 193 };
+static symbol s_3[] = { 209 };
+static symbol s_4[] = { 193 };
+static symbol s_5[] = { 209 };
+static symbol s_6[] = { 206 };
+static symbol s_7[] = { 206 };
+static symbol s_8[] = { 206 };
+static symbol s_9[] = { 201 };
+
+static int r_mark_regions(struct SN_env * z) {
+    z->I[0] = z->l;
+    z->I[1] = z->l;
+    {   int c = z->c; /* do, line 100 */
+        while(1) { /* gopast, line 101 */
+            if (!(in_grouping(z, g_v, 192, 220))) goto lab1;
+            break;
+        lab1:
+            if (z->c >= z->l) goto lab0;
+            z->c++;
+        }
+        z->I[0] = z->c; /* setmark pV, line 101 */
+        while(1) { /* gopast, line 101 */
+            if (!(out_grouping(z, g_v, 192, 220))) goto lab2;
+            break;
+        lab2:
+            if (z->c >= z->l) goto lab0;
+            z->c++;
+        }
+        while(1) { /* gopast, line 102 */
+            if (!(in_grouping(z, g_v, 192, 220))) goto lab3;
+            break;
+        lab3:
+            if (z->c >= z->l) goto lab0;
+            z->c++;
+        }
+        while(1) { /* gopast, line 102 */
+            if (!(out_grouping(z, g_v, 192, 220))) goto lab4;
+            break;
+        lab4:
+            if (z->c >= z->l) goto lab0;
+            z->c++;
+        }
+        z->I[1] = z->c; /* setmark p2, line 102 */
+    lab0:
+        z->c = c;
+    }
+    return 1;
+}
+
+static int r_R2(struct SN_env * z) {
+    if (!(z->I[1] <= z->c)) return 0;
+    return 1;
+}
+
+static int r_perfective_gerund(struct SN_env * z) {
+    int among_var;
+    z->ket = z->c; /* [, line 111 */
+    among_var = find_among_b(z, a_0, 9); /* substring, line 111 */
+    if (!(among_var)) return 0;
+    z->bra = z->c; /* ], line 111 */
+    switch(among_var) {
+        case 0: return 0;
+        case 1:
+            {   int m = z->l - z->c; /* or, line 115 */
+                if (!(eq_s_b(z, 1, s_0))) goto lab1;
+                goto lab0;
+            lab1:
+                z->c = z->l - m;
+                if (!(eq_s_b(z, 1, s_1))) return 0;
+            }
+        lab0:
+            slice_del(z); /* delete, line 115 */
+            break;
+        case 2:
+            slice_del(z); /* delete, line 122 */
+            break;
+    }
+    return 1;
+}
+
+static int r_adjective(struct SN_env * z) {
+    int among_var;
+    z->ket = z->c; /* [, line 127 */
+    among_var = find_among_b(z, a_1, 26); /* substring, line 127 */
+    if (!(among_var)) return 0;
+    z->bra = z->c; /* ], line 127 */
+    switch(among_var) {
+        case 0: return 0;
+        case 1:
+            slice_del(z); /* delete, line 136 */
+            break;
+    }
+    return 1;
+}
+
+static int r_adjectival(struct SN_env * z) {
+    int among_var;
+    if (!r_adjective(z)) return 0; /* call adjective, line 141 */
+    {   int m = z->l - z->c; /* try, line 148 */
+        z->ket = z->c; /* [, line 149 */
+        among_var = find_among_b(z, a_2, 8); /* substring, line 149 */
+        if (!(among_var)) { z->c = z->l - m; goto lab0; }
+        z->bra = z->c; /* ], line 149 */
+        switch(among_var) {
+            case 0: { z->c = z->l - m; goto lab0; }
+            case 1:
+                {   int m = z->l - z->c; /* or, line 154 */
+                    if (!(eq_s_b(z, 1, s_2))) goto lab2;
+                    goto lab1;
+                lab2:
+                    z->c = z->l - m;
+                    if (!(eq_s_b(z, 1, s_3))) { z->c = z->l - m; goto lab0; }
+                }
+            lab1:
+                slice_del(z); /* delete, line 154 */
+                break;
+            case 2:
+                slice_del(z); /* delete, line 161 */
+                break;
+        }
+    lab0:
+        ;
+    }
+    return 1;
+}
+
+static int r_reflexive(struct SN_env * z) {
+    int among_var;
+    z->ket = z->c; /* [, line 168 */
+    among_var = find_among_b(z, a_3, 2); /* substring, line 168 */
+    if (!(among_var)) return 0;
+    z->bra = z->c; /* ], line 168 */
+    switch(among_var) {
+        case 0: return 0;
+        case 1:
+            slice_del(z); /* delete, line 171 */
+            break;
+    }
+    return 1;
+}
+
+static int r_verb(struct SN_env * z) {
+    int among_var;
+    z->ket = z->c; /* [, line 176 */
+    among_var = find_among_b(z, a_4, 46); /* substring, line 176 */
+    if (!(among_var)) return 0;
+    z->bra = z->c; /* ], line 176 */
+    switch(among_var) {
+        case 0: return 0;
+        case 1:
+            {   int m = z->l - z->c; /* or, line 182 */
+                if (!(eq_s_b(z, 1, s_4))) goto lab1;
+                goto lab0;
+            lab1:
+                z->c = z->l - m;
+                if (!(eq_s_b(z, 1, s_5))) return 0;
+            }
+        lab0:
+            slice_del(z); /* delete, line 182 */
+            break;
+        case 2:
+            slice_del(z); /* delete, line 190 */
+            break;
+    }
+    return 1;
+}
+
+static int r_noun(struct SN_env * z) {
+    int among_var;
+    z->ket = z->c; /* [, line 199 */
+    among_var = find_among_b(z, a_5, 36); /* substring, line 199 */
+    if (!(among_var)) return 0;
+    z->bra = z->c; /* ], line 199 */
+    switch(among_var) {
+        case 0: return 0;
+        case 1:
+            slice_del(z); /* delete, line 206 */
+            break;
+    }
+    return 1;
+}
+
+static int r_derivational(struct SN_env * z) {
+    int among_var;
+    z->ket = z->c; /* [, line 215 */
+    among_var = find_among_b(z, a_6, 2); /* substring, line 215 */
+    if (!(among_var)) return 0;
+    z->bra = z->c; /* ], line 215 */
+    if (!r_R2(z)) return 0; /* call R2, line 215 */
+    switch(among_var) {
+        case 0: return 0;
+        case 1:
+            slice_del(z); /* delete, line 218 */
+            break;
+    }
+    return 1;
+}
+
+static int r_tidy_up(struct SN_env * z) {
+    int among_var;
+    z->ket = z->c; /* [, line 223 */
+    among_var = find_among_b(z, a_7, 4); /* substring, line 223 */
+    if (!(among_var)) return 0;
+    z->bra = z->c; /* ], line 223 */
+    switch(among_var) {
+        case 0: return 0;
+        case 1:
+            slice_del(z); /* delete, line 227 */
+            z->ket = z->c; /* [, line 228 */
+            if (!(eq_s_b(z, 1, s_6))) return 0;
+            z->bra = z->c; /* ], line 228 */
+            if (!(eq_s_b(z, 1, s_7))) return 0;
+            slice_del(z); /* delete, line 228 */
+            break;
+        case 2:
+            if (!(eq_s_b(z, 1, s_8))) return 0;
+            slice_del(z); /* delete, line 231 */
+            break;
+        case 3:
+            slice_del(z); /* delete, line 233 */
+            break;
+    }
+    return 1;
+}
+
+extern int russian_stem(struct SN_env * z) {
+    {   int c = z->c; /* do, line 240 */
+        if (!r_mark_regions(z)) goto lab0; /* call mark_regions, line 240 */
+    lab0:
+        z->c = c;
+    }
+    z->lb = z->c; z->c = z->l; /* backwards, line 241 */
+
+    {   int m = z->l - z->c; /* setlimit, line 241 */
+        int m3;
+        if (z->c < z->I[0]) return 0;
+        z->c = z->I[0]; /* tomark, line 241 */
+        m3 = z->lb; z->lb = z->c;
+        z->c = z->l - m;
+        {   int m = z->l - z->c; /* do, line 242 */
+            {   int m = z->l - z->c; /* or, line 243 */
+                if (!r_perfective_gerund(z)) goto lab3; /* call perfective_gerund, line 243 */
+                goto lab2;
+            lab3:
+                z->c = z->l - m;
+                {   int m = z->l - z->c; /* try, line 244 */
+                    if (!r_reflexive(z)) { z->c = z->l - m; goto lab4; } /* call reflexive, line 244 */
+                lab4:
+                    ;
+                }
+                {   int m = z->l - z->c; /* or, line 245 */
+                    if (!r_adjectival(z)) goto lab6; /* call adjectival, line 245 */
+                    goto lab5;
+                lab6:
+                    z->c = z->l - m;
+                    if (!r_verb(z)) goto lab7; /* call verb, line 245 */
+                    goto lab5;
+                lab7:
+                    z->c = z->l - m;
+                    if (!r_noun(z)) goto lab1; /* call noun, line 245 */
+                }
+            lab5:
+                ;
+            }
+        lab2:
+        lab1:
+            z->c = z->l - m;
+        }
+        {   int m = z->l - z->c; /* try, line 248 */
+            z->ket = z->c; /* [, line 248 */
+            if (!(eq_s_b(z, 1, s_9))) { z->c = z->l - m; goto lab8; }
+            z->bra = z->c; /* ], line 248 */
+            slice_del(z); /* delete, line 248 */
+        lab8:
+            ;
+        }
+        {   int m = z->l - z->c; /* do, line 251 */
+            if (!r_derivational(z)) goto lab9; /* call derivational, line 251 */
+        lab9:
+            z->c = z->l - m;
+        }
+        {   int m = z->l - z->c; /* do, line 252 */
+            if (!r_tidy_up(z)) goto lab10; /* call tidy_up, line 252 */
+        lab10:
+            z->c = z->l - m;
+        }
+        z->lb = m3;
+    }
+    z->c = z->lb;
+    return 1;
+}
+
+extern struct SN_env * russian_create_env(void) { return SN_create_env(0, 2, 0); }
+
+extern void russian_close_env(struct SN_env * z) { SN_close_env(z); }
+


diff --git a/contrib/tsearch2/snowball/russian_stem.h b/contrib/tsearch2/snowball/russian_stem.h

new file mode 100644 (file)

index 0000000..7dc26d4


--- /dev/null
+++ b/contrib/tsearch2/snowball/russian_stem.h
@@ -0,0 +1,8 @@
+
+/* This file was generated automatically by the Snowball to ANSI C compiler */
+
+extern struct SN_env * russian_create_env(void);
+extern void russian_close_env(struct SN_env * z);
+
+extern int russian_stem(struct SN_env * z);
+


diff --git a/contrib/tsearch2/snowball/utilities.c b/contrib/tsearch2/snowball/utilities.c

new file mode 100644 (file)

index 0000000..5dc7524


--- /dev/null
+++ b/contrib/tsearch2/snowball/utilities.c
@@ -0,0 +1,328 @@
+
+#include 
+#include 
+#include 
+
+#include "header.h"
+
+#define unless(C) if(!(C))
+
+#define CREATE_SIZE 1
+
+extern symbol * create_s(void)
+{   symbol * p = (symbol *) (HEAD + (char *) malloc(HEAD + (CREATE_SIZE + 1) * sizeof(symbol)));
+    CAPACITY(p) = CREATE_SIZE;
+    SET_SIZE(p, CREATE_SIZE);
+    return p;
+}
+
+extern void lose_s(symbol * p) { free((char *) p - HEAD); }
+
+extern int in_grouping(struct SN_env * z, unsigned char * s, int min, int max)
+{   if (z->c >= z->l) return 0;
+    {   int ch = z->p[z->c];
+        if
+        (ch > max || (ch -= min) < 0 ||
+         (s[ch >> 3] & (0X1 << (ch & 0X7))) == 0) return 0;
+    }
+    z->c++; return 1;
+}
+
+extern int in_grouping_b(struct SN_env * z, unsigned char * s, int min, int max)
+{   if (z->c <= z->lb) return 0;
+    {   int ch = z->p[z->c - 1];
+        if
+        (ch > max || (ch -= min) < 0 ||
+         (s[ch >> 3] & (0X1 << (ch & 0X7))) == 0) return 0;
+    }
+    z->c--; return 1;
+}
+
+extern int out_grouping(struct SN_env * z, unsigned char * s, int min, int max)
+{   if (z->c >= z->l) return 0;
+    {   int ch = z->p[z->c];
+        unless
+        (ch > max || (ch -= min) < 0 ||
+         (s[ch >> 3] & (0X1 << (ch & 0X7))) == 0) return 0;
+    }
+    z->c++; return 1;
+}
+
+extern int out_grouping_b(struct SN_env * z, unsigned char * s, int min, int max)
+{   if (z->c <= z->lb) return 0;
+    {   int ch = z->p[z->c - 1];
+        unless
+        (ch > max || (ch -= min) < 0 ||
+         (s[ch >> 3] & (0X1 << (ch & 0X7))) == 0) return 0;
+    }
+    z->c--; return 1;
+}
+
+
+extern int in_range(struct SN_env * z, int min, int max)
+{   if (z->c >= z->l) return 0;
+    {   int ch = z->p[z->c];
+        if
+        (ch > max || ch < min) return 0;
+    }
+    z->c++; return 1;
+}
+
+extern int in_range_b(struct SN_env * z, int min, int max)
+{   if (z->c <= z->lb) return 0;
+    {   int ch = z->p[z->c - 1];
+        if
+        (ch > max || ch < min) return 0;
+    }
+    z->c--; return 1;
+}
+
+extern int out_range(struct SN_env * z, int min, int max)
+{   if (z->c >= z->l) return 0;
+    {   int ch = z->p[z->c];
+        unless
+        (ch > max || ch < min) return 0;
+    }
+    z->c++; return 1;
+}
+
+extern int out_range_b(struct SN_env * z, int min, int max)
+{   if (z->c <= z->lb) return 0;
+    {   int ch = z->p[z->c - 1];
+        unless
+        (ch > max || ch < min) return 0;
+    }
+    z->c--; return 1;
+}
+
+extern int eq_s(struct SN_env * z, int s_size, symbol * s)
+{   if (z->l - z->c < s_size ||
+        memcmp(z->p + z->c, s, s_size * sizeof(symbol)) != 0) return 0;
+    z->c += s_size; return 1;
+}
+
+extern int eq_s_b(struct SN_env * z, int s_size, symbol * s)
+{   if (z->c - z->lb < s_size ||
+        memcmp(z->p + z->c - s_size, s, s_size * sizeof(symbol)) != 0) return 0;
+    z->c -= s_size; return 1;
+}
+
+extern int eq_v(struct SN_env * z, symbol * p)
+{   return eq_s(z, SIZE(p), p);
+}
+
+extern int eq_v_b(struct SN_env * z, symbol * p)
+{   return eq_s_b(z, SIZE(p), p);
+}
+
+extern int find_among(struct SN_env * z, struct among * v, int v_size)
+{
+    int i = 0;
+    int j = v_size;
+
+    int c = z->c; int l = z->l;
+    symbol * q = z->p + c;
+
+    struct among * w;
+
+    int common_i = 0;
+    int common_j = 0;
+
+    int first_key_inspected = 0;
+
+    while(1)
+    {   int k = i + ((j - i) >> 1);
+        int diff = 0;
+        int common = common_i < common_j ? common_i : common_j; /* smaller */
+        w = v + k;
+        {   int i; for (i = common; i < w->s_size; i++)
+            {   if (c + common == l) { diff = -1; break; }
+                diff = q[common] - w->s[i];
+                if (diff != 0) break;
+                common++;
+            }
+        }
+        if (diff < 0) { j = k; common_j = common; }
+                 else { i = k; common_i = common; }
+        if (j - i <= 1)
+        {   if (i > 0) break; /* v->s has been inspected */
+            if (j == i) break; /* only one item in v */
+
+            /* - but now we need to go round once more to get
+               v->s inspected. This looks messy, but is actually
+               the optimal approach.  */
+
+            if (first_key_inspected) break;
+            first_key_inspected = 1;
+        }
+    }
+    while(1)
+    {   w = v + i;
+        if (common_i >= w->s_size)
+        {   z->c = c + w->s_size;
+            if (w->function == 0) return w->result;
+            {   int res = w->function(z);
+                z->c = c + w->s_size;
+                if (res) return w->result;
+            }
+        }
+        i = w->substring_i;
+        if (i < 0) return 0;
+    }
+}
+
+/* find_among_b is for backwards processing. Same comments apply */
+
+extern int find_among_b(struct SN_env * z, struct among * v, int v_size)
+{
+    int i = 0;
+    int j = v_size;
+
+    int c = z->c; int lb = z->lb;
+    symbol * q = z->p + c - 1;
+
+    struct among * w;
+
+    int common_i = 0;
+    int common_j = 0;
+
+    int first_key_inspected = 0;
+
+    while(1)
+    {   int k = i + ((j - i) >> 1);
+        int diff = 0;
+        int common = common_i < common_j ? common_i : common_j;
+        w = v + k;
+        {   int i; for (i = w->s_size - 1 - common; i >= 0; i--)
+            {   if (c - common == lb) { diff = -1; break; }
+                diff = q[- common] - w->s[i];
+                if (diff != 0) break;
+                common++;
+            }
+        }
+        if (diff < 0) { j = k; common_j = common; }
+                 else { i = k; common_i = common; }
+        if (j - i <= 1)
+        {   if (i > 0) break;
+            if (j == i) break;
+            if (first_key_inspected) break;
+            first_key_inspected = 1;
+        }
+    }
+    while(1)
+    {   w = v + i;
+        if (common_i >= w->s_size)
+        {   z->c = c - w->s_size;
+            if (w->function == 0) return w->result;
+            {   int res = w->function(z);
+                z->c = c - w->s_size;
+                if (res) return w->result;
+            }
+        }
+        i = w->substring_i;
+        if (i < 0) return 0;
+    }
+}
+
+
+extern symbol * increase_size(symbol * p, int n)
+{   int new_size = n + 20;
+    symbol * q = (symbol *) (HEAD + (char *) malloc(HEAD + (new_size + 1) * sizeof(symbol)));
+    CAPACITY(q) = new_size;
+    memmove(q, p, CAPACITY(p) * sizeof(symbol)); lose_s(p); return q;
+}
+
+/* to replace symbols between c_bra and c_ket in z->p by the
+   s_size symbols at s
+*/
+
+extern int replace_s(struct SN_env * z, int c_bra, int c_ket, int s_size, const symbol * s)
+{   int adjustment = s_size - (c_ket - c_bra);
+    int len = SIZE(z->p);
+    if (adjustment != 0)
+    {   if (adjustment + len > CAPACITY(z->p)) z->p = increase_size(z->p, adjustment + len);
+        memmove(z->p + c_ket + adjustment, z->p + c_ket, (len - c_ket) * sizeof(symbol));
+        SET_SIZE(z->p, adjustment + len);
+        z->l += adjustment;
+        if (z->c >= c_ket) z->c += adjustment; else
+            if (z->c > c_bra) z->c = c_bra;
+    }
+    unless (s_size == 0) memmove(z->p + c_bra, s, s_size * sizeof(symbol));
+    return adjustment;
+}
+
+static void slice_check(struct SN_env * z)
+{
+    if (!(0 <= z->bra &&
+          z->bra <= z->ket &&
+          z->ket <= z->l &&
+          z->l <= SIZE(z->p)))   /* this line could be removed */
+    {
+        fprintf(stderr, "faulty slice operation:\n");
+        debug(z, -1, 0);
+        exit(1);
+    }
+}
+
+extern void slice_from_s(struct SN_env * z, int s_size, symbol * s)
+{   slice_check(z);
+    replace_s(z, z->bra, z->ket, s_size, s);
+}
+
+extern void slice_from_v(struct SN_env * z, symbol * p)
+{   slice_from_s(z, SIZE(p), p);
+}
+
+extern void slice_del(struct SN_env * z)
+{   slice_from_s(z, 0, 0);
+}
+
+extern void insert_s(struct SN_env * z, int bra, int ket, int s_size, symbol * s)
+{   int adjustment = replace_s(z, bra, ket, s_size, s);
+    if (bra <= z->bra) z->bra += adjustment;
+    if (bra <= z->ket) z->ket += adjustment;
+}
+
+extern void insert_v(struct SN_env * z, int bra, int ket, symbol * p)
+{   int adjustment = replace_s(z, bra, ket, SIZE(p), p);
+    if (bra <= z->bra) z->bra += adjustment;
+    if (bra <= z->ket) z->ket += adjustment;
+}
+
+extern symbol * slice_to(struct SN_env * z, symbol * p)
+{   slice_check(z);
+    {   int len = z->ket - z->bra;
+        if (CAPACITY(p) < len) p = increase_size(p, len);
+        memmove(p, z->p + z->bra, len * sizeof(symbol));
+        SET_SIZE(p, len);
+    }
+    return p;
+}
+
+extern symbol * assign_to(struct SN_env * z, symbol * p)
+{   int len = z->l;
+    if (CAPACITY(p) < len) p = increase_size(p, len);
+    memmove(p, z->p, len * sizeof(symbol));
+    SET_SIZE(p, len);
+    return p;
+}
+
+extern void debug(struct SN_env * z, int number, int line_count)
+{   int i;
+    int limit = SIZE(z->p);
+    /*if (number >= 0) printf("%3d (line %4d): '", number, line_count);*/
+    if (number >= 0) printf("%3d (line %4d): [%d]'", number, line_count,limit);
+    for (i = 0; i <= limit; i++)
+    {   if (z->lb == i) printf("{");
+        if (z->bra == i) printf("[");
+        if (z->c == i) printf("|");
+        if (z->ket == i) printf("]");
+        if (z->l == i) printf("}");
+        if (i < limit)
+        {   int ch = z->p[i];
+            if (ch == 0) ch = '#';
+            printf("%c", ch);
+        }
+    }
+    printf("'\n");
+}


diff --git a/contrib/tsearch2/sql/tsearch2.sql b/contrib/tsearch2/sql/tsearch2.sql

new file mode 100644 (file)

index 0000000..6ca6480


--- /dev/null
+++ b/contrib/tsearch2/sql/tsearch2.sql
@@ -0,0 +1,243 @@
+--
+-- first, define the datatype.  Turn off echoing so that expected file
+-- does not depend on contents of seg.sql.
+--
+\set ECHO none
+\i tsearch2.sql
+\set ECHO all
+
+--tsvector
+SELECT '1'::tsvector;
+SELECT '1 '::tsvector;
+SELECT ' 1'::tsvector;
+SELECT ' 1 '::tsvector;
+SELECT '1 2'::tsvector;
+SELECT '\'1 2\''::tsvector;
+SELECT '\'1 \\\'2\''::tsvector;
+SELECT '\'1 \\\'2\'3'::tsvector;
+SELECT '\'1 \\\'2\' 3'::tsvector;
+SELECT '\'1 \\\'2\' \' 3\' 4 '::tsvector;
+select '\'w\':4A,3B,2C,1D,5 a:8';
+select 'a:3A b:2a'::tsvector || 'ba:1234 a:1B';
+select setweight('w:12B w:13* w:12,5,6 a:1,3* a:3 w asd:1dc asd zxc:81,567,222A'::tsvector, 'c');
+select strip('w:12B w:13* w:12,5,6 a:1,3* a:3 w asd:1dc asd'::tsvector);
+
+
+--tsquery
+SELECT '1'::tsquery;
+SELECT '1 '::tsquery;
+SELECT ' 1'::tsquery;
+SELECT ' 1 '::tsquery;
+SELECT '\'1 2\''::tsquery;
+SELECT '\'1 \\\'2\''::tsquery;
+SELECT '!1'::tsquery;
+SELECT '1|2'::tsquery;
+SELECT '1|!2'::tsquery;
+SELECT '!1|2'::tsquery;
+SELECT '!1|!2'::tsquery;
+SELECT '!(!1|!2)'::tsquery;
+SELECT '!(!1|2)'::tsquery;
+SELECT '!(1|!2)'::tsquery;
+SELECT '!(1|2)'::tsquery;
+SELECT '1&2'::tsquery;
+SELECT '!1&2'::tsquery;
+SELECT '1&!2'::tsquery;
+SELECT '!1&!2'::tsquery;
+SELECT '(1&2)'::tsquery;
+SELECT '1&(2)'::tsquery;
+SELECT '!(1)&2'::tsquery;
+SELECT '!(1&2)'::tsquery;
+SELECT '1|2&3'::tsquery;
+SELECT '1|(2&3)'::tsquery;
+SELECT '(1|2)&3'::tsquery;
+SELECT '1|2&!3'::tsquery;
+SELECT '1|!2&3'::tsquery;
+SELECT '!1|2&3'::tsquery;
+SELECT '!1|(2&3)'::tsquery;
+SELECT '!(1|2)&3'::tsquery;
+SELECT '(!1|2)&3'::tsquery;
+SELECT '1|(2|(4|(5|6)))'::tsquery;
+SELECT '1|2|4|5|6'::tsquery;
+SELECT '1&(2&(4&(5&6)))'::tsquery;
+SELECT '1&2&4&5&6'::tsquery;
+SELECT '1&(2&(4&(5|6)))'::tsquery;
+SELECT '1&(2&(4&(5|!6)))'::tsquery;
+SELECT '1&(\'2\'&(\' 4\'&(\\|5 | \'6 \\\' !|&\')))'::tsquery;
+SELECT '\'the wether\':dc & \' sKies \':BC & a:d b:a';
+
+select lexize('simple', 'ASD56 hsdkf');
+select lexize('en_stem', 'SKIES Problems identity');
+
+select * from token_type('default');
+select * from parse('default', '345 [email protected] \' http://www.com/ http://aew.werc.ewr/?ad=qwe&dw 1aew.werc.ewr/?ad=qwe&dw 2aew.werc.ewr http://3aew.werc.ewr/?ad=qwe&dw http://4aew.werc.ewr http://5aew.werc.ewr:8100/?  ad=qwe&dw 6aew.werc.ewr:8100/?ad=qwe&dw 7aew.werc.ewr:8100/?ad=qwe&dw=%20%32 +4.0e-10 qwe qwe qwqwe 234.435 455 5.005 [email protected] qwe-wer asdf qwer jf sdjk ewr1> ewri2 ">
+/usr/local/fff /awdf/dwqe/4325 rewt/ewr wefjn /wqe-324/ewr gist.h gist.h.c gist.c. readline 4.2 4.2. 4.2, readline-4.2 readline-4.2. 234 
+ wow  < jqw <> qwerty');
+
+SELECT to_tsvector('default', '345 [email protected] \' http://www.com/ http://aew.werc.ewr/?ad=qwe&dw 1aew.werc.ewr/?ad=qwe&dw 2aew.werc.ewr http://3aew.werc.ewr/?ad=qwe&dw http://4aew.werc.ewr http://5aew.werc.ewr:8100/?  ad=qwe&dw 6aew.werc.ewr:8100/?ad=qwe&dw 7aew.werc.ewr:8100/?ad=qwe&dw=%20%32 +4.0e-10 qwe qwe qwqwe 234.435 455 5.005 [email protected] qwe-wer asdf qwer jf sdjk ewr1> ewri2 ">
+/usr/local/fff /awdf/dwqe/4325 rewt/ewr wefjn /wqe-324/ewr gist.h gist.h.c gist.c. readline 4.2 4.2. 4.2, readline-4.2 readline-4.2. 234 
+ wow  < jqw <> qwerty');
+
+SELECT length(to_tsvector('default', '345 qw'));
+
+SELECT length(to_tsvector('default', '345 [email protected] \' http://www.com/ http://aew.werc.ewr/?ad=qwe&dw 1aew.werc.ewr/?ad=qwe&dw 2aew.werc.ewr http://3aew.werc.ewr/?ad=qwe&dw http://4aew.werc.ewr http://5aew.werc.ewr:8100/?  ad=qwe&dw 6aew.werc.ewr:8100/?ad=qwe&dw 7aew.werc.ewr:8100/?ad=qwe&dw=%20%32 +4.0e-10 qwe qwe qwqwe 234.435 455 5.005 [email protected] qwe-wer asdf qwer jf sdjk ewr1> ewri2 ">
+/usr/local/fff /awdf/dwqe/4325 rewt/ewr wefjn /wqe-324/ewr gist.h gist.h.c gist.c. readline 4.2 4.2. 4.2, readline-4.2 readline-4.2. 234 
+ wow  < jqw <> qwerty'));
+
+
+select to_tsquery('default', 'qwe & sKies '); 
+select to_tsquery('simple', 'qwe & sKies '); 
+select to_tsquery('default', '\'the wether\':dc & \'           sKies \':BC ');
+select 'a b:89  ca:23A,64b d:34c'::tsvector @@ 'd:AC & ca';
+select 'a b:89  ca:23A,64b d:34c'::tsvector @@ 'd:AC & ca:B';
+select 'a b:89  ca:23A,64b d:34c'::tsvector @@ 'd:AC & ca:A';
+select 'a b:89  ca:23A,64b d:34c'::tsvector @@ 'd:AC & ca:C';
+select 'a b:89  ca:23A,64b d:34c'::tsvector @@ 'd:AC & ca:CB';
+
+CREATE TABLE test_tsvector( t text, a tsvector );
+
+\copy test_tsvector from 'data/test_tsearch.data'
+
+SELECT count(*) FROM test_tsvector WHERE a @@ 'wr|qh';
+SELECT count(*) FROM test_tsvector WHERE a @@ 'wr&qh';
+SELECT count(*) FROM test_tsvector WHERE a @@ 'eq&yt';
+SELECT count(*) FROM test_tsvector WHERE a @@ 'eq|yt';
+SELECT count(*) FROM test_tsvector WHERE a @@ '(eq&yt)|(wr&qh)';
+SELECT count(*) FROM test_tsvector WHERE a @@ '(eq|yt)&(wr|qh)';
+
+create index wowidx on test_tsvector using gist (a);
+set enable_seqscan=off;
+
+SELECT count(*) FROM test_tsvector WHERE a @@ 'wr|qh';
+SELECT count(*) FROM test_tsvector WHERE a @@ 'wr&qh';
+SELECT count(*) FROM test_tsvector WHERE a @@ 'eq&yt';
+SELECT count(*) FROM test_tsvector WHERE a @@ 'eq|yt';
+SELECT count(*) FROM test_tsvector WHERE a @@ '(eq&yt)|(wr&qh)';
+SELECT count(*) FROM test_tsvector WHERE a @@ '(eq|yt)&(wr|qh)';
+
+select set_curcfg('default');
+
+CREATE TRIGGER tsvectorupdate
+BEFORE UPDATE OR INSERT ON test_tsvector
+FOR EACH ROW EXECUTE PROCEDURE tsearch2(a, t);
+
+SELECT count(*) FROM test_tsvector WHERE a @@ to_tsquery('345&qwerty');
+
+INSERT INTO test_tsvector (t) VALUES ('345 qwerty');
+
+SELECT count(*) FROM test_tsvector WHERE a @@ to_tsquery('345&qwerty');
+
+UPDATE test_tsvector SET t = null WHERE t = '345 qwerty';
+
+SELECT count(*) FROM test_tsvector WHERE a @@ to_tsquery('345&qwerty');
+
+drop trigger tsvectorupdate on test_tsvector;
+create function wow(text) returns text as 'select $1 || \' copyright\'; ' language sql;
+create trigger tsvectorupdate before update or insert on test_tsvector
+for each row execute procedure tsearch2(a, wow, t);
+insert into test_tsvector (t) values ('345 qwerty');
+select count(*) FROM test_tsvector WHERE a @@ to_tsquery('345&qwerty');
+select count(*) FROM test_tsvector WHERE a @@ to_tsquery('copyright');
+
+select rank(' a:1 s:2C d g'::tsvector, 'a | s');
+select rank(' a:1 s:2B d g'::tsvector, 'a | s');
+select rank(' a:1 s:2 d g'::tsvector, 'a | s');
+select rank(' a:1 s:2C d g'::tsvector, 'a & s');
+select rank(' a:1 s:2B d g'::tsvector, 'a & s');
+select rank(' a:1 s:2 d g'::tsvector, 'a & s');
+
+insert into test_tsvector (t) values ('foo bar foo the over foo qq bar');
+select * from stat('select a from test_tsvector') order by ndoc desc, nentry desc, word;
+
+select reset_tsearch();
+select to_tsquery('default', 'skies & books');
+
+select rank_cd(to_tsvector('Erosion It took the sea a thousand years,
+A thousand years to trace
+The granite features of this cliff
+In crag and scarp and base.
+It took the sea an hour one night
+An hour of storm to place
+The sculpture of these granite seams,
+Upon a woman s face. E.  J.  Pratt  (1882 1964)
+'), to_tsquery('sea&thousand&years'));
+
+select rank_cd(to_tsvector('Erosion It took the sea a thousand years,
+A thousand years to trace
+The granite features of this cliff
+In crag and scarp and base.
+It took the sea an hour one night
+An hour of storm to place
+The sculpture of these granite seams,
+Upon a woman s face. E.  J.  Pratt  (1882 1964)
+'), to_tsquery('granite&sea'));
+
+select rank_cd(to_tsvector('Erosion It took the sea a thousand years,
+A thousand years to trace
+The granite features of this cliff
+In crag and scarp and base.
+It took the sea an hour one night
+An hour of storm to place
+The sculpture of these granite seams,
+Upon a woman s face. E.  J.  Pratt  (1882 1964)
+'), to_tsquery('sea'));
+
+select get_covers(to_tsvector('Erosion It took the sea a thousand years,
+A thousand years to trace
+The granite features of this cliff
+In crag and scarp and base.
+It took the sea an hour one night
+An hour of storm to place
+The sculpture of these granite seams,
+Upon a woman s face. E.  J.  Pratt  (1882 1964)
+'), to_tsquery('sea&thousand&years'));
+
+select get_covers(to_tsvector('Erosion It took the sea a thousand years,
+A thousand years to trace
+The granite features of this cliff
+In crag and scarp and base.
+It took the sea an hour one night
+An hour of storm to place
+The sculpture of these granite seams,
+Upon a woman s face. E.  J.  Pratt  (1882 1964)
+'), to_tsquery('granite&sea'));
+
+select get_covers(to_tsvector('Erosion It took the sea a thousand years,
+A thousand years to trace
+The granite features of this cliff
+In crag and scarp and base.
+It took the sea an hour one night
+An hour of storm to place
+The sculpture of these granite seams,
+Upon a woman s face. E.  J.  Pratt  (1882 1964)
+'), to_tsquery('sea'));
+
+select headline('Erosion It took the sea a thousand years,
+A thousand years to trace
+The granite features of this cliff
+In crag and scarp and base.
+It took the sea an hour one night
+An hour of storm to place
+The sculpture of these granite seams,
+Upon a woman s face. E.  J.  Pratt  (1882 1964)
+', to_tsquery('sea&thousand&years'));
+ 
+select headline('Erosion It took the sea a thousand years,
+A thousand years to trace
+The granite features of this cliff
+In crag and scarp and base.
+It took the sea an hour one night
+An hour of storm to place
+The sculpture of these granite seams,
+Upon a woman s face. E.  J.  Pratt  (1882 1964)
+', to_tsquery('granite&sea'));
+ 
+select headline('Erosion It took the sea a thousand years,
+A thousand years to trace
+The granite features of this cliff
+In crag and scarp and base.
+It took the sea an hour one night
+An hour of storm to place
+The sculpture of these granite seams,
+Upon a woman s face. E.  J.  Pratt  (1882 1964)
+', to_tsquery('sea'));
+


diff --git a/contrib/tsearch2/stopword.c b/contrib/tsearch2/stopword.c

new file mode 100644 (file)

index 0000000..7f7806f


--- /dev/null
+++ b/contrib/tsearch2/stopword.c
@@ -0,0 +1,101 @@
+/* 
+ * stopword library
+ * Teodor Sigaev 
+ */
+#include 
+#include 
+#include 
+#include 
+
+#include "postgres.h"
+#include "common.h"
+#include "dict.h"
+
+#define STOPBUFLEN 4096
+
+char*
+lowerstr(char *str) {
+   char *ptr=str;
+   while(*ptr) {
+       *ptr = tolower(*(unsigned char*)ptr);
+       ptr++;
+   }
+   return str;
+}
+
+void
+freestoplist(StopList *s) {
+   char **ptr=s->stop;
+   if ( ptr )
+       while( *ptr && s->len >0 ) {
+           free(*ptr);
+           ptr++; s->len--;
+       free(s->stop);
+   }
+   memset(s,0,sizeof(StopList));
+}
+
+void
+readstoplist(text *in, StopList *s) {
+   char **stop=NULL;
+   s->len=0;
+   if ( in && VARSIZE(in) - VARHDRSZ > 0 ) {
+       char *filename=text2char(in);
+       FILE    *hin=NULL;
+       char    buf[STOPBUFLEN];
+       int reallen=0;
+
+       if ( (hin=fopen(filename,"r")) == NULL )
+           elog(ERROR,"Can't open file '%s': %s", filename, strerror(errno));
+       while( fgets(buf,STOPBUFLEN,hin) ) {
+           buf[strlen(buf)-1] = '\0';
+           if ( *buf=='\0' ) continue;
+
+           if ( s->len>= reallen ) {
+               char **tmp;
+               reallen=(reallen) ? reallen*2 : 16;
+               tmp=(char**)realloc((void*)stop, sizeof(char*)*reallen);
+               if (!tmp) {
+                   freestoplist(s);
+                   fclose(hin); 
+                   elog(ERROR,"Not enough memory");
+               }
+               stop=tmp;
+           }
+    
+           stop[s->len]=strdup(buf);
+           if ( !stop[s->len] ) {
+               freestoplist(s);
+               fclose(hin); 
+               elog(ERROR,"Not enough memory");
+           }
+           if ( s->wordop ) 
+               stop[s->len]=(s->wordop)(stop[s->len]);
+
+           (s->len)++; 
+       }
+       fclose(hin);
+       pfree(filename); 
+   }
+   s->stop=stop;
+} 
+
+static int
+comparestr(const void *a, const void *b) {
+   return strcmp( *(char**)a, *(char**)b );
+}
+
+void
+sortstoplist(StopList *s) {
+   if (s->stop && s->len>0)
+       qsort(s->stop, s->len, sizeof(char*), comparestr);
+}
+
+bool
+searchstoplist(StopList *s, char *key) {
+   if ( s->wordop ) 
+       key=(*(s->wordop))(key);
+   return ( s->stop && s->len>0 && bsearch(&key, s->stop, s->len, sizeof(char*), comparestr) ) ? true : false;
+}
+
+


diff --git a/contrib/tsearch2/stopword/english.stop b/contrib/tsearch2/stopword/english.stop

new file mode 100644 (file)

index 0000000..a913011


--- /dev/null
+++ b/contrib/tsearch2/stopword/english.stop
@@ -0,0 +1,128 @@
+i
+me
+my
+myself
+we
+our
+ours
+ourselves
+you
+your
+yours
+yourself
+yourselves
+he
+him
+his
+himself
+she
+her
+hers
+herself
+it
+its
+itself
+they
+them
+their
+theirs
+themselves
+what
+which
+who
+whom
+this
+that
+these
+those
+am
+is
+are
+was
+were
+be
+been
+being
+have
+has
+had
+having
+do
+does
+did
+doing
+a
+an
+the
+and
+but
+if
+or
+because
+as
+until
+while
+of
+at
+by
+for
+with
+about
+against
+between
+into
+through
+during
+before
+after
+above
+below
+to
+from
+up
+down
+in
+out
+on
+off
+over
+under
+again
+further
+then
+once
+here
+there
+when
+where
+why
+how
+all
+any
+both
+each
+few
+more
+most
+other
+some
+such
+no
+nor
+not
+only
+own
+same
+so
+than
+too
+very
+s
+t
+can
+will
+just
+don
+should
+now
+


diff --git a/contrib/tsearch2/stopword/russian.stop b/contrib/tsearch2/stopword/russian.stop

new file mode 100644 (file)

index 0000000..1877e3a


--- /dev/null
+++ b/contrib/tsearch2/stopword/russian.stop
@@ -0,0 +1,151 @@
+É
+×
+×Ï
+ÎÅ
+ÞÔÏ
+ÏÎ
+ÎÁ
+Ñ
+Ó
+ÓÏ
+ËÁË
+Á
+ÔÏ
+×ÓÅ
+ÏÎÁ
+ÔÁË
+ÅÇÏ
+ÎÏ
+ÄÁ
+ÔÙ
+Ë
+Õ
+ÖÅ
+×Ù
+ÚÁ
+ÂÙ
+ÐÏ
+ÔÏÌØËÏ
+ÅÅ
+ÍÎÅ
+ÂÙÌÏ
+×ÏÔ
+ÏÔ
+ÍÅÎÑ
+ÅÝÅ
+ÎÅÔ
+Ï
+ÉÚ
+ÅÍÕ
+ÔÅÐÅÒØ
+ËÏÇÄÁ
+ÄÁÖÅ
+ÎÕ
+×ÄÒÕÇ
+ÌÉ
+ÅÓÌÉ
+ÕÖÅ
+ÉÌÉ
+ÎÉ
+ÂÙÔØ
+ÂÙÌ
+ÎÅÇÏ
+ÄÏ
+×ÁÓ
+ÎÉÂÕÄØ
+ÏÐÑÔØ
+ÕÖ
+×ÁÍ
+×ÅÄØ
+ÔÁÍ
+ÐÏÔÏÍ
+ÓÅÂÑ
+ÎÉÞÅÇÏ
+ÅÊ
+ÍÏÖÅÔ
+ÏÎÉ
+ÔÕÔ
+ÇÄÅ
+ÅÓÔØ
+ÎÁÄÏ
+ÎÅÊ
+ÄÌÑ
+ÍÙ
+ÔÅÂÑ
+ÉÈ
+ÞÅÍ
+ÂÙÌÁ
+ÓÁÍ
+ÞÔÏÂ
+ÂÅÚ
+ÂÕÄÔÏ
+ÞÅÇÏ
+ÒÁÚ
+ÔÏÖÅ
+ÓÅÂÅ
+ÐÏÄ
+ÂÕÄÅÔ
+Ö
+ÔÏÇÄÁ
+ËÔÏ
+ÜÔÏÔ
+ÔÏÇÏ
+ÐÏÔÏÍÕ
+ÜÔÏÇÏ
+ËÁËÏÊ
+ÓÏ×ÓÅÍ
+ÎÉÍ
+ÚÄÅÓØ
+ÜÔÏÍ
+ÏÄÉÎ
+ÐÏÞÔÉ
+ÍÏÊ
+ÔÅÍ
+ÞÔÏÂÙ
+ÎÅÅ
+ÓÅÊÞÁÓ
+ÂÙÌÉ
+ËÕÄÁ
+ÚÁÞÅÍ
+×ÓÅÈ
+ÎÉËÏÇÄÁ
+ÍÏÖÎÏ
+ÐÒÉ
+ÎÁËÏÎÅÃ
+Ä×Á
+ÏÂ
+ÄÒÕÇÏÊ
+ÈÏÔØ
+ÐÏÓÌÅ
+ÎÁÄ
+ÂÏÌØÛÅ
+ÔÏÔ
+ÞÅÒÅÚ
+ÜÔÉ
+ÎÁÓ
+ÐÒÏ
+×ÓÅÇÏ
+ÎÉÈ
+ËÁËÁÑ
+ÍÎÏÇÏ
+ÒÁÚ×Å
+ÔÒÉ
+ÜÔÕ
+ÍÏÑ
+×ÐÒÏÞÅÍ
+ÈÏÒÏÛÏ
+Ó×ÏÀ
+ÜÔÏÊ
+ÐÅÒÅÄ
+ÉÎÏÇÄÁ
+ÌÕÞÛÅ
+ÞÕÔØ
+ÔÏÍ
+ÎÅÌØÚÑ
+ÔÁËÏÊ
+ÉÍ
+ÂÏÌÅÅ
+×ÓÅÇÄÁ
+ËÏÎÅÞÎÏ
+×ÓÀ
+ÍÅÖÄÕ


diff --git a/contrib/tsearch2/ts_cfg.c b/contrib/tsearch2/ts_cfg.c

new file mode 100644 (file)

index 0000000..7c9f20c


--- /dev/null
+++ b/contrib/tsearch2/ts_cfg.c
@@ -0,0 +1,509 @@
+/* 
+ * interface functions to tscfg 
+ * Teodor Sigaev 
+ */
+#include 
+#include 
+#include 
+#include 
+#include 
+
+#include "postgres.h"
+#include "fmgr.h"
+#include "utils/array.h"
+#include "catalog/pg_type.h"
+#include "executor/spi.h"
+
+#include "ts_cfg.h"
+#include "dict.h"
+#include "wparser.h"
+#include "snmap.h"
+#include "common.h"
+#include "tsvector.h"
+
+/*********top interface**********/
+
+static void *plan_getcfg_bylocale=NULL;
+static void *plan_getcfg=NULL;
+static void *plan_getmap=NULL;
+static void *plan_name2id=NULL;
+static Oid current_cfg_id=0;
+
+void
+init_cfg(Oid id, TSCfgInfo *cfg) {
+   Oid arg[2]={ OIDOID, OIDOID };
+   bool isnull;
+   Datum pars[2]={ ObjectIdGetDatum(id), ObjectIdGetDatum(id) } ;
+   int stat,i,j;
+   text *ptr;
+   text *prsname=NULL;
+   MemoryContext   oldcontext;
+
+   memset(cfg,0,sizeof(TSCfgInfo));
+   SPI_connect();
+   if ( !plan_getcfg ) {
+       plan_getcfg = SPI_saveplan( SPI_prepare( "select prs_name from pg_ts_cfg where oid = $1" , 1, arg ) );
+       if ( !plan_getcfg ) 
+           ts_error(ERROR, "SPI_prepare() failed");
+   }
+
+   stat = SPI_execp(plan_getcfg, pars, " ", 1);
+   if ( stat < 0 )
+       ts_error (ERROR, "SPI_execp return %d", stat);
+   if ( SPI_processed > 0 ) {
+       prsname = (text*) DatumGetPointer( 
+           SPI_getbinval(SPI_tuptable->vals[0], SPI_tuptable->tupdesc, 1, &isnull) 
+       );
+       oldcontext = MemoryContextSwitchTo(TopMemoryContext);
+       prsname = ptextdup( prsname );
+       MemoryContextSwitchTo(oldcontext);
+       
+       cfg->id=id;
+   } else 
+       ts_error(ERROR, "No tsearch cfg with id %d", id);
+
+   arg[0]=TEXTOID;
+   if ( !plan_getmap ) {
+       plan_getmap = SPI_saveplan( SPI_prepare( "select lt.tokid, pg_ts_cfgmap.dict_name from pg_ts_cfgmap, pg_ts_cfg, token_type( $1 ) as lt where lt.alias = pg_ts_cfgmap.tok_alias and pg_ts_cfgmap.ts_name = pg_ts_cfg.ts_name and pg_ts_cfg.oid= $2 order by lt.tokid desc;" , 2, arg ) );
+       if ( !plan_getmap )
+           ts_error(ERROR, "SPI_prepare() failed");
+   }
+
+   pars[0]=PointerGetDatum( prsname );
+   stat = SPI_execp(plan_getmap, pars, " ", 0);
+   if ( stat < 0 )
+       ts_error (ERROR, "SPI_execp return %d", stat);
+   if ( SPI_processed <= 0 )
+       ts_error(ERROR, "No parser with id %d", id);
+
+   for(i=0;i
+       int lexid = DatumGetInt32(SPI_getbinval(SPI_tuptable->vals[i], SPI_tuptable->tupdesc, 1, &isnull));
+       ArrayType *toasted_a = (ArrayType*)PointerGetDatum(SPI_getbinval(SPI_tuptable->vals[i], SPI_tuptable->tupdesc, 2, &isnull));
+       ArrayType *a;
+
+       if ( !cfg->map ) {
+           cfg->len=lexid+1;
+           cfg->map = (ListDictionary*)malloc( sizeof(ListDictionary)*cfg->len );
+           if ( !cfg->map )
+               ts_error(ERROR,"No memory");
+           memset( cfg->map, 0, sizeof(ListDictionary)*cfg->len );
+       }
+
+       if (isnull)
+           continue;
+
+       a=(ArrayType*)PointerGetDatum( PG_DETOAST_DATUM( DatumGetPointer(toasted_a) ) );
+       
+       if ( ARR_NDIM(a) != 1 )
+           ts_error(ERROR,"Wrong dimension");
+       if ( ARRNELEMS(a) < 1 )
+           continue;
+
+       cfg->map[lexid].len=ARRNELEMS(a);
+       cfg->map[lexid].dict_id=(Datum*)malloc( sizeof(Datum)*cfg->map[lexid].len );
+       memset(cfg->map[lexid].dict_id,0,sizeof(Datum)*cfg->map[lexid].len );
+       ptr=(text*)ARR_DATA_PTR(a);
+       oldcontext = MemoryContextSwitchTo(TopMemoryContext);
+       for(j=0;jmap[lexid].len;j++) {
+           cfg->map[lexid].dict_id[j] = PointerGetDatum(ptextdup(ptr));
+           ptr=NEXTVAL(ptr);
+       } 
+       MemoryContextSwitchTo(oldcontext);
+
+       if ( a != toasted_a ) 
+           pfree(a);
+   }
+   
+   SPI_finish();
+   cfg->prs_id = name2id_prs( prsname );
+   pfree(prsname);
+   for(i=0;ilen;i++) {
+       for(j=0;jmap[i].len;j++) {
+           ptr = (text*)DatumGetPointer( cfg->map[i].dict_id[j] );
+           cfg->map[i].dict_id[j] = ObjectIdGetDatum( name2id_dict(ptr) );
+           pfree(ptr);
+       }
+   }
+}
+
+typedef struct {
+   TSCfgInfo   *last_cfg;
+   int     len;
+   int     reallen;
+   TSCfgInfo   *list;
+   SNMap       name2id_map;
+} CFGList;
+
+static CFGList CList = {NULL,0,0,NULL,{0,0,NULL}};
+
+void
+reset_cfg(void) {
+        freeSNMap( &(CList.name2id_map) );
+        if ( CList.list ) {
+       int i,j;
+       for(i=0;i
+           if ( CList.list[i].map ) {
+               for(j=0;j
+                   if ( CList.list[i].map[j].dict_id )
+                       free(CList.list[i].map[j].dict_id);
+               free( CList.list[i].map );
+           }
+                free(CList.list);
+   }
+        memset(&CList,0,sizeof(CFGList));
+}
+
+static int
+comparecfg(const void *a, const void *b) {
+   return ((TSCfgInfo*)a)->id - ((TSCfgInfo*)b)->id;
+}
+
+TSCfgInfo *
+findcfg(Oid id) {
+   /* last used cfg */
+   if ( CList.last_cfg && CList.last_cfg->id==id )
+       return CList.last_cfg;
+
+   /* already used cfg */
+   if ( CList.len != 0 ) {
+       TSCfgInfo key;
+       key.id=id;
+       CList.last_cfg = bsearch(&key, CList.list, CList.len, sizeof(TSCfgInfo), comparecfg);
+       if ( CList.last_cfg != NULL )
+           return CList.last_cfg;
+   }
+
+   /* last chance */
+   if ( CList.len==CList.reallen ) {
+       TSCfgInfo *tmp;
+       int reallen = ( CList.reallen ) ? 2*CList.reallen : 16;
+       tmp=(TSCfgInfo*)realloc(CList.list,sizeof(TSCfgInfo)*reallen);
+       if ( !tmp ) 
+           ts_error(ERROR,"No memory");
+       CList.reallen=reallen;
+       CList.list=tmp;
+   }
+   CList.last_cfg=&(CList.list[CList.len]);
+   init_cfg(id, CList.last_cfg);
+   CList.len++;
+   qsort(CList.list, CList.len, sizeof(TSCfgInfo), comparecfg);
+   return findcfg(id); /* qsort changed order!! */;
+}
+
+
+Oid
+name2id_cfg(text *name) {
+   Oid arg[1]={ TEXTOID };
+   bool isnull;
+   Datum pars[1]={ PointerGetDatum(name) };
+   int stat;
+   Oid id=findSNMap_t( &(CList.name2id_map), name );
+   
+   if ( id ) 
+       return id;
+   
+   SPI_connect();
+   if ( !plan_name2id ) {
+       plan_name2id = SPI_saveplan( SPI_prepare( "select oid from pg_ts_cfg where ts_name = $1" , 1, arg ) );
+       if ( !plan_name2id ) 
+           elog(ERROR, "SPI_prepare() failed");
+   }
+
+   stat = SPI_execp(plan_name2id, pars, " ", 1);
+   if ( stat < 0 )
+       elog (ERROR, "SPI_execp return %d", stat);
+   if ( SPI_processed > 0 ) {
+       id=DatumGetObjectId( SPI_getbinval(SPI_tuptable->vals[0], SPI_tuptable->tupdesc, 1, &isnull) );
+       if ( isnull ) 
+           elog(ERROR, "Null id for tsearch config");
+   } else 
+       elog(ERROR, "No tsearch config");
+   SPI_finish();
+   addSNMap_t( &(CList.name2id_map), name, id );
+   return id;
+}
+
+
+void 
+parsetext_v2(TSCfgInfo *cfg, PRSTEXT * prs, char *buf, int4 buflen) {
+   int type, lenlemm, i;
+   char    *lemm=NULL;
+   WParserInfo *prsobj = findprs(cfg->prs_id);
+
+   prsobj->prs=(void*)DatumGetPointer(
+       FunctionCall2(
+           &(prsobj->start_info),
+           PointerGetDatum(buf),
+           Int32GetDatum(buflen)
+       )
+   );
+
+   while( ( type=DatumGetInt32(FunctionCall3(
+           &(prsobj->getlexeme_info),
+           PointerGetDatum(prsobj->prs),
+           PointerGetDatum(&lemm),
+           PointerGetDatum(&lenlemm))) ) != 0 ) {
+
+       if ( lenlemm >= MAXSTRLEN )
+           elog(ERROR, "Word is too long");
+
+
+       if ( type >= cfg->len ) /* skip this type of lexem */
+           continue; 
+
+       for(i=0;imap[type].len;i++) {
+           DictInfo    *dict=finddict( DatumGetObjectId(cfg->map[type].dict_id[i]) );
+           char    **norms, **ptr;
+   
+           norms = ptr = (char**)DatumGetPointer(
+               FunctionCall3(
+                   &(dict->lexize_info),
+                   PointerGetDatum(dict->dictionary),
+                   PointerGetDatum(lemm),
+                   PointerGetDatum(lenlemm)
+               )
+           );
+           if ( !norms ) /* dictionary doesn't know this lexem */
+               continue;
+
+           prs->pos++; /*set pos*/
+
+           while( *ptr ) {
+               if (prs->curwords == prs->lenwords) {
+                   prs->lenwords *= 2;
+                   prs->words = (WORD *) repalloc((void *) prs->words, prs->lenwords * sizeof(WORD));
+               }
+
+               prs->words[prs->curwords].len = strlen(*ptr);
+               prs->words[prs->curwords].word = *ptr;
+               prs->words[prs->curwords].alen = 0;
+               prs->words[prs->curwords].pos.pos = LIMITPOS(prs->pos);
+               ptr++;
+               prs->curwords++;
+           }
+           pfree(norms);
+           break; /* lexem already normalized or is stop word*/
+       }
+   }
+
+   FunctionCall1(
+       &(prsobj->end_info),
+       PointerGetDatum(prsobj->prs)
+   );
+}
+
+static void
+hladdword(HLPRSTEXT * prs, char *buf, int4 buflen, int type) {
+   while (prs->curwords >= prs->lenwords) {
+       prs->lenwords *= 2;
+       prs->words = (HLWORD *) repalloc((void *) prs->words, prs->lenwords * sizeof(HLWORD));
+   }
+   memset( &(prs->words[prs->curwords]), 0, sizeof(HLWORD) ); 
+   prs->words[prs->curwords].type = (uint8)type;
+   prs->words[prs->curwords].len = buflen; 
+   prs->words[prs->curwords].word = palloc(buflen);
+   memcpy(prs->words[prs->curwords].word, buf, buflen);
+   prs->curwords++;    
+}
+
+static void
+hlfinditem(HLPRSTEXT * prs, QUERYTYPE *query, char *buf, int buflen ) {
+   int i;
+   ITEM    *item=GETQUERY(query);
+   HLWORD  *word=&( prs->words[prs->curwords-1] );
+
+   while (prs->curwords + query->size >= prs->lenwords) {
+       prs->lenwords *= 2;
+       prs->words = (HLWORD *) repalloc((void *) prs->words, prs->lenwords * sizeof(HLWORD));
+   }
+
+   for(i=0; isize; i++) { 
+       if ( item->type == VAL && item->length == buflen && strncmp( GETOPERAND(query) + item->distance, buf, buflen )==0 ) {
+           if ( word->item ) {
+               memcpy( &(prs->words[prs->curwords]), word, sizeof(HLWORD) );
+               prs->words[prs->curwords].item=item;
+               prs->words[prs->curwords].repeated=1;
+               prs->curwords++;
+           } else 
+               word->item=item;    
+       }
+       item++;
+   }
+}
+
+void 
+hlparsetext(TSCfgInfo *cfg, HLPRSTEXT * prs, QUERYTYPE *query, char *buf, int4 buflen) {
+   int type, lenlemm, i;
+   char    *lemm=NULL;
+   WParserInfo *prsobj = findprs(cfg->prs_id);
+
+   prsobj->prs=(void*)DatumGetPointer(
+       FunctionCall2(
+           &(prsobj->start_info),
+           PointerGetDatum(buf),
+           Int32GetDatum(buflen)
+       )
+   );
+
+   while( ( type=DatumGetInt32(FunctionCall3(
+           &(prsobj->getlexeme_info),
+           PointerGetDatum(prsobj->prs),
+           PointerGetDatum(&lemm),
+           PointerGetDatum(&lenlemm))) ) != 0 ) {
+
+       if ( lenlemm >= MAXSTRLEN )
+           elog(ERROR, "Word is too long");
+
+       hladdword(prs,lemm,lenlemm,type);
+
+       if ( type >= cfg->len ) 
+           continue; 
+
+       for(i=0;imap[type].len;i++) {
+           DictInfo    *dict=finddict( DatumGetObjectId(cfg->map[type].dict_id[i]) );
+           char    **norms, **ptr;
+   
+           norms = ptr = (char**)DatumGetPointer(
+               FunctionCall3(
+                   &(dict->lexize_info),
+                   PointerGetDatum(dict->dictionary),
+                   PointerGetDatum(lemm),
+                   PointerGetDatum(lenlemm)
+               )
+           );
+           if ( !norms ) /* dictionary doesn't know this lexem */
+               continue;
+
+           while( *ptr ) {
+               hlfinditem(prs,query,*ptr,strlen(*ptr));
+               pfree(*ptr);
+               ptr++;
+           }
+           pfree(norms);
+           break; /* lexem already normalized or is stop word*/
+       }
+   }
+
+   FunctionCall1(
+       &(prsobj->end_info),
+       PointerGetDatum(prsobj->prs)
+   );
+}
+
+text* 
+genhl(HLPRSTEXT * prs) {
+   text *out;
+   int len=128;
+   char *ptr;
+   HLWORD  *wrd=prs->words;
+
+   out = (text*)palloc( len );
+   ptr=((char*)out) + VARHDRSZ;
+
+   while( wrd - prs->words < prs->curwords ) {
+       while (  wrd->len + prs->stopsellen + prs->startsellen + (ptr - ((char*)out)) >= len ) {
+           int dist = ptr - ((char*)out);
+           len*= 2;
+           out = (text *) repalloc(out, len);
+           ptr=((char*)out) + dist;
+       }
+
+       if ( wrd->in && !wrd->skip && !wrd->repeated ) {
+           if ( wrd->replace ) {
+               *ptr=' ';
+               ptr++;
+           } else {
+               if (wrd->selected) {
+                   memcpy(ptr,prs->startsel,prs->startsellen);
+                   ptr+=prs->startsellen;
+               }
+               memcpy(ptr,wrd->word,wrd->len);
+               ptr+=wrd->len;
+               if (wrd->selected) {
+                   memcpy(ptr,prs->stopsel,prs->stopsellen);
+                   ptr+=prs->stopsellen;
+               }
+           }
+       }
+
+       if ( !wrd->repeated )
+           pfree(wrd->word);
+
+       wrd++;
+   }
+
+   VARATT_SIZEP(out)=ptr - ((char*)out);
+   return out; 
+}
+
+int  
+get_currcfg(void) {
+   Oid arg[1]={ TEXTOID };
+   const char *curlocale;
+   Datum pars[1];
+   bool isnull;
+   int stat;
+
+   if ( current_cfg_id > 0 )
+       return current_cfg_id;
+
+   SPI_connect();
+   if ( !plan_getcfg_bylocale ) {
+       plan_getcfg_bylocale=SPI_saveplan( SPI_prepare( "select oid from pg_ts_cfg where locale = $1 ", 1, arg ) );
+       if ( !plan_getcfg_bylocale )
+           elog(ERROR, "SPI_prepare() failed");
+   }
+
+   curlocale = setlocale(LC_CTYPE, NULL);
+   pars[0] = PointerGetDatum( char2text((char*)curlocale) );
+   stat = SPI_execp(plan_getcfg_bylocale, pars, " ", 1);
+
+   if ( stat < 0 )
+       elog (ERROR, "SPI_execp return %d", stat);
+   if ( SPI_processed > 0 )
+       current_cfg_id = DatumGetObjectId( SPI_getbinval(SPI_tuptable->vals[0], SPI_tuptable->tupdesc, 1, &isnull) );
+   else 
+       elog(ERROR,"Can't find tsearch config by locale");
+
+   pfree(DatumGetPointer(pars[0]));
+   SPI_finish();
+   return current_cfg_id;
+}
+
+PG_FUNCTION_INFO_V1(set_curcfg);
+Datum set_curcfg(PG_FUNCTION_ARGS);
+Datum
+set_curcfg(PG_FUNCTION_ARGS) {
+        findcfg(PG_GETARG_OID(0));
+        current_cfg_id=PG_GETARG_OID(0);
+        PG_RETURN_VOID();
+}
+                
+PG_FUNCTION_INFO_V1(set_curcfg_byname);
+Datum set_curcfg_byname(PG_FUNCTION_ARGS);
+Datum
+set_curcfg_byname(PG_FUNCTION_ARGS) {
+        text *name=PG_GETARG_TEXT_P(0);
+   
+        DirectFunctionCall1(
+                set_curcfg,
+                ObjectIdGetDatum( name2id_cfg(name) )
+        );
+        PG_FREE_IF_COPY(name, 0);
+        PG_RETURN_VOID();      
+}       
+
+PG_FUNCTION_INFO_V1(show_curcfg);
+Datum show_curcfg(PG_FUNCTION_ARGS);
+Datum
+show_curcfg(PG_FUNCTION_ARGS) {
+   PG_RETURN_OID( get_currcfg() ); 
+}
+
+PG_FUNCTION_INFO_V1(reset_tsearch);
+Datum reset_tsearch(PG_FUNCTION_ARGS);
+Datum
+reset_tsearch(PG_FUNCTION_ARGS) {
+   ts_error(NOTICE,"TSearch cache cleaned");
+   PG_RETURN_VOID(); 
+}


diff --git a/contrib/tsearch2/ts_cfg.h b/contrib/tsearch2/ts_cfg.h

new file mode 100644 (file)

index 0000000..01006c1


--- /dev/null
+++ b/contrib/tsearch2/ts_cfg.h
@@ -0,0 +1,68 @@
+#ifndef __TS_CFG_H__
+#define __TS_CFG_H__
+#include "postgres.h"
+#include "query.h"
+
+typedef struct {
+   int len;
+   Datum   *dict_id;
+} ListDictionary;
+
+typedef struct {
+   Oid id;
+   Oid prs_id;
+   int len;
+   ListDictionary  *map;   
+}  TSCfgInfo;
+
+Oid name2id_cfg(text *name);
+TSCfgInfo * findcfg(Oid id);
+void init_cfg(Oid id, TSCfgInfo *cfg);
+void reset_cfg(void);
+
+typedef struct {
+        uint16          len;
+   union {
+       uint16      pos;
+       uint16      *apos;
+   } pos;
+        char       *word;
+   uint32  alen;
+}       WORD;
+   
+typedef struct {
+        WORD       *words;
+        int4            lenwords;
+        int4            curwords;
+   int4        pos;
+}       PRSTEXT;
+
+typedef struct {
+        uint16    len;
+   uint8    selected:1,
+         in:1,
+         skip:1,
+         replace:1,
+         repeated:1;
+   uint8   type;
+        char      *word;
+   ITEM      *item;
+}       HLWORD;
+   
+typedef struct {
+        HLWORD       *words;
+        int4            lenwords;
+        int4            curwords;
+        char           *startsel;
+        char            *stopsel;
+        int2            startsellen;
+        int2            stopsellen;
+}       HLPRSTEXT;
+
+void hlparsetext(TSCfgInfo *cfg, HLPRSTEXT * prs, QUERYTYPE *query, char *buf, int4 buflen);
+text* genhl(HLPRSTEXT * prs);
+
+void parsetext_v2(TSCfgInfo *cfg, PRSTEXT * prs, char *buf, int4 buflen);
+int  get_currcfg(void);
+
+#endif


diff --git a/contrib/tsearch2/ts_stat.c b/contrib/tsearch2/ts_stat.c

new file mode 100644 (file)

index 0000000..9099981


--- /dev/null
+++ b/contrib/tsearch2/ts_stat.c
@@ -0,0 +1,412 @@
+/*
+ * stat functions
+ */
+
+#include "tsvector.h"
+#include "ts_stat.h"
+#include "funcapi.h"
+#include "catalog/pg_type.h"
+#include "executor/spi.h"
+#include "common.h"
+
+PG_FUNCTION_INFO_V1(tsstat_in);
+Datum           tsstat_in(PG_FUNCTION_ARGS);
+Datum           
+tsstat_in(PG_FUNCTION_ARGS) {
+   tsstat *stat=palloc(STATHDRSIZE);
+   stat->len=STATHDRSIZE;
+   stat->size=0;
+   PG_RETURN_POINTER(stat);
+}
+
+PG_FUNCTION_INFO_V1(tsstat_out);
+Datum           tsstat_out(PG_FUNCTION_ARGS);
+Datum           
+tsstat_out(PG_FUNCTION_ARGS) {
+   elog(ERROR,"Unimplemented");
+   PG_RETURN_NULL();
+}
+
+static WordEntry**
+SEI_realloc( WordEntry** in, uint32 *len ) {
+   if ( *len==0 || in==NULL ) {
+       *len=8;
+       in=palloc( sizeof(WordEntry*)* (*len) );
+   } else {
+       *len *= 2;
+       in=repalloc( in, sizeof(WordEntry*)* (*len) );
+   }
+   return in;
+}
+
+static int
+compareStatWord(StatEntry *a, WordEntry *b, tsstat *stat, tsvector *txt) {
+   if ( a->len == b->len ) 
+       return strncmp(
+           STATSTRPTR(stat) + a->pos,
+           STRPTR(txt) + b->pos,
+           a->len
+       );
+   return ( a->len > b->len ) ? 1 : -1;
+}
+
+static tsstat*
+formstat(tsstat *stat, tsvector *txt, WordEntry** entry, uint32 len) {
+   tsstat  *newstat;
+   uint32 totallen, nentry;
+   uint32  slen=0;
+   WordEntry   **ptr=entry;
+   char    *curptr;
+   StatEntry   *sptr,*nptr;
+
+   while(ptr-entry
+       slen += (*ptr)->len;
+       ptr++;
+   }
+
+   nentry=stat->size + len;
+   slen+=STATSTRSIZE(stat);
+   totallen=CALCSTATSIZE(nentry,slen);
+   newstat=palloc(totallen);
+   newstat->len=totallen;
+   newstat->size=nentry;
+
+   memcpy(STATSTRPTR(newstat), STATSTRPTR(stat), STATSTRSIZE(stat));
+   curptr=STATSTRPTR(newstat) + STATSTRSIZE(stat);
+
+   ptr=entry;
+   sptr=STATPTR(stat);
+   nptr=STATPTR(newstat);
+
+   if ( len == 1 ) {
+       StatEntry *StopLow = STATPTR(stat);
+       StatEntry *StopHigh = (StatEntry*)STATSTRPTR(stat);
+
+       while (StopLow < StopHigh) {
+           sptr=StopLow + (StopHigh - StopLow) / 2;
+           if ( compareStatWord(sptr,*ptr,stat,txt) < 0 )
+               StopLow = sptr + 1;
+           else
+               StopHigh = sptr; 
+       }
+       nptr =STATPTR(newstat) + (StopLow-STATPTR(stat));
+       memcpy( STATPTR(newstat), STATPTR(stat), sizeof(StatEntry) * (StopLow-STATPTR(stat)) );
+       nptr->nentry=POSDATALEN(txt,*ptr);
+       if ( nptr->nentry==0 )
+               nptr->nentry=1; 
+       nptr->ndoc=1;
+       nptr->len=(*ptr)->len;
+       memcpy(curptr, STRPTR(txt) + (*ptr)->pos, nptr->len);
+       nptr->pos = curptr - STATSTRPTR(newstat);
+       memcpy( nptr+1, StopLow, sizeof(StatEntry) * ( ((StatEntry*)STATSTRPTR(stat))-StopLow ) );
+   } else {
+       while( sptr-STATPTR(stat) < stat->size && ptr-entry
+           if ( compareStatWord(sptr,*ptr,stat,txt) < 0 ) {
+               memcpy(nptr, sptr, sizeof(StatEntry));
+               sptr++;
+           } else {
+               nptr->nentry=POSDATALEN(txt,*ptr);
+               if ( nptr->nentry==0 )
+                   nptr->nentry=1; 
+               nptr->ndoc=1;
+               nptr->len=(*ptr)->len;
+               memcpy(curptr, STRPTR(txt) + (*ptr)->pos, nptr->len);
+               nptr->pos = curptr - STATSTRPTR(newstat);
+               curptr += nptr->len;
+               ptr++;
+           }
+           nptr++;
+       }
+
+       memcpy( nptr, sptr, sizeof(StatEntry)*( stat->size - (sptr-STATPTR(stat)) ) ); 
+       
+       while(ptr-entry
+           nptr->nentry=POSDATALEN(txt,*ptr);
+           if ( nptr->nentry==0 )
+               nptr->nentry=1; 
+           nptr->ndoc=1;
+           nptr->len=(*ptr)->len;
+           memcpy(curptr, STRPTR(txt) + (*ptr)->pos, nptr->len);
+           nptr->pos = curptr - STATSTRPTR(newstat);
+           curptr += nptr->len;
+           ptr++; nptr++;
+       }
+   }
+
+   return newstat;
+} 
+
+PG_FUNCTION_INFO_V1(ts_accum);
+Datum           ts_accum(PG_FUNCTION_ARGS);
+Datum 
+ts_accum(PG_FUNCTION_ARGS) {
+   tsstat *newstat,*stat= (tsstat*)PG_GETARG_POINTER(0);
+   tsvector  *txt = (tsvector *) PG_DETOAST_DATUM(PG_GETARG_DATUM(1));
+   WordEntry   **newentry=NULL;
+   uint32  len=0, cur=0;
+   StatEntry   *sptr;
+   WordEntry   *wptr;
+
+   if ( stat==NULL || PG_ARGISNULL(0) ) { /* Init in first */ 
+       stat=palloc(STATHDRSIZE);
+       stat->len=STATHDRSIZE;
+       stat->size=0;
+   }
+
+   /* simple check of correctness */
+   if ( txt==NULL || PG_ARGISNULL(1) || txt->size==0 ) {
+       PG_FREE_IF_COPY(txt,1); 
+       PG_RETURN_POINTER(stat);
+   }
+
+   sptr=STATPTR(stat);
+   wptr=ARRPTR(txt);
+
+   if ( stat->size < 100*txt->size ) { /* merge */
+       while( sptr-STATPTR(stat) < stat->size && wptr-ARRPTR(txt) < txt->size ) {
+           int cmp = compareStatWord(sptr,wptr,stat,txt);
+           if ( cmp<0 ) {
+               sptr++;
+           } else if ( cmp==0 ) {
+               int n=POSDATALEN(txt,wptr);
+   
+               if (n==0) n=1;
+               sptr->ndoc++;
+               sptr->nentry +=n ;
+               sptr++; wptr++;
+           } else {
+               if ( cur==len )
+                   newentry=SEI_realloc(newentry, &len);
+               newentry[cur]=wptr;
+               wptr++; cur++;
+           }
+       }
+
+       while( wptr-ARRPTR(txt) < txt->size ) {
+           if ( cur==len )
+               newentry=SEI_realloc(newentry, &len);
+           newentry[cur]=wptr;
+           wptr++; cur++;
+       }
+   } else { /* search */
+       while( wptr-ARRPTR(txt) < txt->size ) {
+           StatEntry *StopLow = STATPTR(stat);
+           StatEntry *StopHigh = (StatEntry*)STATSTRPTR(stat);
+           int cmp;
+
+           while (StopLow < StopHigh) {
+               sptr=StopLow + (StopHigh - StopLow) / 2;
+               cmp =  compareStatWord(sptr,wptr,stat,txt);
+               if (cmp==0) {
+                   int n=POSDATALEN(txt,wptr);
+                   if (n==0) n=1;
+                   sptr->ndoc++;
+                   sptr->nentry +=n ;
+                   break;
+               } else if ( cmp < 0 )
+                   StopLow = sptr + 1;
+               else
+                   StopHigh = sptr; 
+           }
+       
+           if ( StopLow >= StopHigh ) { /* not found */
+               if ( cur==len )
+                   newentry=SEI_realloc(newentry, &len);
+               newentry[cur]=wptr;
+               cur++;
+           }
+           wptr++;
+       }   
+   }
+
+   
+   if ( cur==0 ) { /* no new words */ 
+       PG_FREE_IF_COPY(txt,1);
+       PG_RETURN_POINTER(stat);
+   }
+
+   newstat = formstat(stat, txt, newentry, cur);
+   pfree(newentry);
+   PG_FREE_IF_COPY(txt,1);
+   /* pfree(stat); */
+
+   PG_RETURN_POINTER(newstat);
+}
+
+typedef struct {
+   uint32  cur;
+   tsvector *stat;
+} StatStorage;
+
+static void
+ts_setup_firstcall(FuncCallContext  *funcctx, tsstat *stat) {
+   TupleDesc            tupdesc;
+   MemoryContext     oldcontext;
+   StatStorage     *st;
+   
+   oldcontext = MemoryContextSwitchTo(funcctx->multi_call_memory_ctx);
+   st=palloc( sizeof(StatStorage) );
+   st->cur=0;
+   st->stat=palloc( stat->len );
+   memcpy(st->stat, stat, stat->len);
+   funcctx->user_fctx = (void*)st;
+   tupdesc = RelationNameGetTupleDesc("statinfo");
+   funcctx->slot = TupleDescGetSlot(tupdesc);
+   funcctx->attinmeta = TupleDescGetAttInMetadata(tupdesc);
+   MemoryContextSwitchTo(oldcontext);
+}
+
+
+static Datum
+ts_process_call(FuncCallContext  *funcctx) {
+   StatStorage     *st;
+   st=(StatStorage*)funcctx->user_fctx;
+
+   if ( st->cur < st->stat->size ) {
+       Datum result;
+       char* values[3];
+       char    ndoc[16];
+       char    nentry[16];
+       StatEntry *entry=STATPTR(st->stat) + st->cur;
+       HeapTuple    tuple;
+
+       values[1]=ndoc;
+       sprintf(ndoc,"%d",entry->ndoc);
+       values[2]=nentry;
+       sprintf(nentry,"%d",entry->nentry);
+       values[0]=palloc( entry->len+1 );
+       memcpy( values[0], STATSTRPTR(st->stat)+entry->pos, entry->len);
+       (values[0])[entry->len]='\0';
+
+       tuple = BuildTupleFromCStrings(funcctx->attinmeta, values);
+       result = TupleGetDatum(funcctx->slot, tuple);
+
+       pfree(values[0]);
+       st->cur++;
+       return result;  
+   } else {
+       pfree(st->stat);
+       pfree(st);
+   }
+   
+   return (Datum)0;
+}
+
+PG_FUNCTION_INFO_V1(ts_accum_finish);
+Datum           ts_accum_finish(PG_FUNCTION_ARGS);
+Datum 
+ts_accum_finish(PG_FUNCTION_ARGS) {
+   FuncCallContext  *funcctx;
+   Datum result;
+
+   if (SRF_IS_FIRSTCALL()) {
+       funcctx = SRF_FIRSTCALL_INIT();
+       ts_setup_firstcall(funcctx, (tsstat*)PG_GETARG_POINTER(0) );
+   }
+
+   funcctx = SRF_PERCALL_SETUP();
+   if (  (result=ts_process_call(funcctx)) != (Datum)0 )
+       SRF_RETURN_NEXT(funcctx, result);
+   SRF_RETURN_DONE(funcctx);
+}
+
+static Oid tiOid=InvalidOid;
+static void 
+get_ti_Oid(void) {
+   int ret;
+   bool isnull; 
+
+   if ( (ret = SPI_exec("select oid from pg_type where typname='tsvector'",1)) < 0 )   
+       elog(ERROR, "SPI_exec to get tsvector oid returns %d", ret);
+
+   if ( SPI_processed<0 )
+       elog(ERROR, "There is no tsvector type");
+   tiOid = DatumGetObjectId( SPI_getbinval(SPI_tuptable->vals[0], SPI_tuptable->tupdesc, 1, &isnull) );
+   if ( tiOid==InvalidOid )
+       elog(ERROR, "tsvector type has InvalidOid");
+}
+
+static tsstat*
+ts_stat_sql(text *txt) {
+   char *query=text2char(txt);
+   int i;
+   tsstat *newstat,*stat;
+   bool isnull;
+   Portal portal;
+   void    *plan;
+
+   if ( tiOid==InvalidOid ) 
+       get_ti_Oid();
+
+   if ( (plan = SPI_prepare(query,0,NULL))==NULL )
+       elog(ERROR, "SPI_prepare('%s') returns NULL",query);
+
+   if ( (portal = SPI_cursor_open(NULL, plan, NULL, NULL)) == NULL )
+       elog(ERROR, "SPI_cursor_open('%s') returns NULL",query);
+
+   SPI_cursor_fetch(portal, true, 100);
+
+   if ( SPI_tuptable->tupdesc->natts != 1 )
+       elog(ERROR, "Number of fields doesn't equal to 1");
+
+   if ( SPI_gettypeid(SPI_tuptable->tupdesc, 1) != tiOid )
+       elog(ERROR, "Column isn't of tsvector type");
+
+   stat=palloc(STATHDRSIZE);
+   stat->len=STATHDRSIZE;
+   stat->size=0;
+
+   while(SPI_processed>0) {
+       for(i=0;i
+           Datum data=SPI_getbinval(SPI_tuptable->vals[i], SPI_tuptable->tupdesc, 1, &isnull);
+
+           if ( !isnull ) {
+               newstat = (tsstat*)DatumGetPointer(DirectFunctionCall2(
+                   ts_accum,
+                   PointerGetDatum(stat),
+                   data
+               ));
+               if ( stat!=newstat && stat )
+                   pfree(stat);
+               stat=newstat;
+           }
+       } 
+
+       SPI_freetuptable(SPI_tuptable);
+       SPI_cursor_fetch(portal, true, 100);        
+   }   
+
+   SPI_freetuptable(SPI_tuptable);
+   SPI_cursor_close(portal);
+   SPI_freeplan(plan);
+   pfree(query);
+
+   return stat;    
+}
+
+PG_FUNCTION_INFO_V1(ts_stat);
+Datum           ts_stat(PG_FUNCTION_ARGS);
+Datum 
+ts_stat(PG_FUNCTION_ARGS) {
+   FuncCallContext  *funcctx;
+   Datum result;
+
+   if (SRF_IS_FIRSTCALL()) {
+       tsstat *stat;
+       text    *txt=PG_GETARG_TEXT_P(0);
+   
+       funcctx = SRF_FIRSTCALL_INIT();
+       SPI_connect();
+       stat = ts_stat_sql(txt);
+       PG_FREE_IF_COPY(txt,0); 
+       ts_setup_firstcall(funcctx, stat );
+       SPI_finish();
+   }
+
+   funcctx = SRF_PERCALL_SETUP();
+   if (  (result=ts_process_call(funcctx)) != (Datum)0 )
+       SRF_RETURN_NEXT(funcctx, result);
+   SRF_RETURN_DONE(funcctx);
+}
+
+


diff --git a/contrib/tsearch2/ts_stat.h b/contrib/tsearch2/ts_stat.h

new file mode 100644 (file)

index 0000000..c32b17a


--- /dev/null
+++ b/contrib/tsearch2/ts_stat.h
@@ -0,0 +1,32 @@
+#ifndef __TXTIDX_STAT_H__
+#define __TXTIDX_STAT_H__
+
+#include "postgres.h"
+
+#include "access/gist.h"
+#include "access/itup.h"
+#include "utils/elog.h"
+#include "utils/palloc.h"
+#include "utils/builtins.h"
+#include "storage/bufpage.h"
+
+typedef struct {
+   uint32  len;
+   uint32  pos;
+   uint32  ndoc;   
+   uint32  nentry; 
+}  StatEntry;
+
+typedef struct {
+   int4        len;
+   int4        size;
+   char        data[1];
+}  tsstat;
+
+#define STATHDRSIZE (sizeof(int4)*2)
+#define CALCSTATSIZE(x, lenstr) ( x * sizeof(StatEntry) + STATHDRSIZE + lenstr )
+#define STATPTR(x) ( (StatEntry*) ( (char*)x + STATHDRSIZE ) )
+#define STATSTRPTR(x)  ( (char*)x + STATHDRSIZE + ( sizeof(StatEntry) * ((tsvector*)x)->size ) )
+#define STATSTRSIZE(x) ( ((tsvector*)x)->len - STATHDRSIZE - ( sizeof(StatEntry) * ((tsvector*)x)->size ) )
+
+#endif


diff --git a/contrib/tsearch2/tsearch.sql._in b/contrib/tsearch2/tsearch.sql._in

new file mode 100644 (file)

index 0000000..91ffbc8


--- /dev/null
+++ b/contrib/tsearch2/tsearch.sql._in
@@ -0,0 +1,674 @@
+-- Adjust this setting to control where the objects get CREATEd.
+SET search_path = public;
+
+BEGIN;
+
+--dict conf
+CREATE TABLE pg_ts_dict (
+   dict_name   text not null primary key,
+   dict_init   oid,
+   dict_initoption text,
+   dict_lexize oid not null,
+   dict_comment    text
+) with oids;
+
+--dict interface
+CREATE FUNCTION lexize(oid, text) 
+   returns _text
+   as 'MODULE_PATHNAME'
+   language 'C'
+   with (isstrict);
+
+CREATE FUNCTION lexize(text, text)
+        returns _text
+        as 'MODULE_PATHNAME', 'lexize_byname'
+        language 'C'
+        with (isstrict);
+
+CREATE FUNCTION lexize(text)
+        returns _text
+        as 'MODULE_PATHNAME', 'lexize_bycurrent'
+        language 'C'
+        with (isstrict);
+
+CREATE FUNCTION set_curdict(int)
+   returns void
+   as 'MODULE_PATHNAME'
+   language 'C'
+   with (isstrict);
+
+CREATE FUNCTION set_curdict(text)
+   returns void
+   as 'MODULE_PATHNAME', 'set_curdict_byname'
+   language 'C'
+   with (isstrict);
+
+--built-in dictionaries
+CREATE FUNCTION dex_init(text)
+   returns internal
+   as 'MODULE_PATHNAME' 
+   language 'C';
+
+CREATE FUNCTION dex_lexize(internal,internal,int4)
+   returns internal
+   as 'MODULE_PATHNAME'
+   language 'C'
+   with (isstrict);
+
+insert into pg_ts_dict select 
+   'simple', 
+   (select oid from pg_proc where proname='dex_init'),
+   null,
+   (select oid from pg_proc where proname='dex_lexize'),
+   'Simple example of dictionary.'
+;
+    
+CREATE FUNCTION snb_en_init(text)
+   returns internal
+   as 'MODULE_PATHNAME' 
+   language 'C';
+
+CREATE FUNCTION snb_lexize(internal,internal,int4)
+   returns internal
+   as 'MODULE_PATHNAME'
+   language 'C'
+   with (isstrict);
+
+insert into pg_ts_dict select 
+   'en_stem', 
+   (select oid from pg_proc where proname='snb_en_init'),
+   'DATA_PATH/english.stop',
+   (select oid from pg_proc where proname='snb_lexize'),
+   'English Stemmer. Snowball.'
+;
+
+CREATE FUNCTION snb_ru_init(text)
+   returns internal
+   as 'MODULE_PATHNAME' 
+   language 'C';
+
+insert into pg_ts_dict select 
+   'ru_stem', 
+   (select oid from pg_proc where proname='snb_ru_init'),
+   'DATA_PATH/russian.stop',
+   (select oid from pg_proc where proname='snb_lexize'),
+   'Russian Stemmer. Snowball.'
+;
+    
+CREATE FUNCTION spell_init(text)
+   returns internal
+   as 'MODULE_PATHNAME' 
+   language 'C';
+
+CREATE FUNCTION spell_lexize(internal,internal,int4)
+   returns internal
+   as 'MODULE_PATHNAME'
+   language 'C'
+   with (isstrict);
+
+insert into pg_ts_dict select 
+   'ispell_template', 
+   (select oid from pg_proc where proname='spell_init'),
+   null,
+   (select oid from pg_proc where proname='spell_lexize'),
+   'ISpell interface. Must have .dict and .aff files'
+;
+
+CREATE FUNCTION syn_init(text)
+   returns internal
+   as 'MODULE_PATHNAME' 
+   language 'C';
+
+CREATE FUNCTION syn_lexize(internal,internal,int4)
+   returns internal
+   as 'MODULE_PATHNAME'
+   language 'C'
+   with (isstrict);
+
+insert into pg_ts_dict select 
+   'synonym', 
+   (select oid from pg_proc where proname='syn_init'),
+   null,
+   (select oid from pg_proc where proname='syn_lexize'),
+   'Example of synonym dictionary'
+;
+
+--dict conf
+CREATE TABLE pg_ts_parser (
+   prs_name    text not null primary key,
+   prs_start   oid not null,
+   prs_nexttoken   oid not null,
+   prs_end     oid not null,
+   prs_headline    oid not null,
+   prs_lextype oid not null,
+   prs_comment text
+) with oids;
+
+--sql-level interface
+CREATE TYPE tokentype 
+   as (tokid int4, alias text, descr text); 
+
+CREATE FUNCTION token_type(int4)
+   returns setof tokentype
+   as 'MODULE_PATHNAME'
+   language 'C'
+   with (isstrict);
+
+CREATE FUNCTION token_type(text)
+   returns setof tokentype
+   as 'MODULE_PATHNAME', 'token_type_byname'
+   language 'C'
+   with (isstrict);
+
+CREATE FUNCTION token_type()
+   returns setof tokentype
+   as 'MODULE_PATHNAME', 'token_type_current'
+   language 'C'
+   with (isstrict);
+
+CREATE FUNCTION set_curprs(int)
+   returns void
+   as 'MODULE_PATHNAME'
+   language 'C'
+   with (isstrict);
+
+CREATE FUNCTION set_curprs(text)
+   returns void
+   as 'MODULE_PATHNAME', 'set_curprs_byname'
+   language 'C'
+   with (isstrict);
+
+CREATE TYPE tokenout 
+   as (tokid int4, token text);
+
+CREATE FUNCTION parse(oid,text)
+   returns setof tokenout
+   as 'MODULE_PATHNAME'
+   language 'C'
+   with (isstrict);
+ 
+CREATE FUNCTION parse(text,text)
+   returns setof tokenout
+   as 'MODULE_PATHNAME', 'parse_byname'
+   language 'C'
+   with (isstrict);
+ 
+CREATE FUNCTION parse(text)
+   returns setof tokenout
+   as 'MODULE_PATHNAME', 'parse_current'
+   language 'C'
+   with (isstrict);
+ 
+--default parser
+CREATE FUNCTION prsd_start(internal,int4)
+   returns internal
+   as 'MODULE_PATHNAME'
+   language 'C';
+
+CREATE FUNCTION prsd_getlexeme(internal,internal,internal)
+   returns int4
+   as 'MODULE_PATHNAME'
+   language 'C';
+
+CREATE FUNCTION prsd_end(internal)
+   returns void
+   as 'MODULE_PATHNAME'
+   language 'C';
+
+CREATE FUNCTION prsd_lextype(internal)
+   returns internal
+   as 'MODULE_PATHNAME'
+   language 'C';
+
+CREATE FUNCTION prsd_headline(internal,internal,internal)
+   returns internal
+   as 'MODULE_PATHNAME'
+   language 'C';
+
+insert into pg_ts_parser select
+   'default',
+   (select oid from pg_proc where proname='prsd_start'),   
+   (select oid from pg_proc where proname='prsd_getlexeme'),   
+   (select oid from pg_proc where proname='prsd_end'), 
+   (select oid from pg_proc where proname='prsd_headline'),
+   (select oid from pg_proc where proname='prsd_lextype'),
+   'Parser from OpenFTS v0.34'
+;  
+
+--tsearch config
+
+CREATE TABLE pg_ts_cfg (
+   ts_name     text not null primary key,
+   prs_name    text not null,
+   locale      text
+) with oids;
+
+CREATE TABLE pg_ts_cfgmap (
+   ts_name     text not null,
+   tok_alias   text not null,
+   dict_name   text[],
+   primary key (ts_name,tok_alias)
+) with oids;
+
+CREATE FUNCTION set_curcfg(int)
+   returns void
+   as 'MODULE_PATHNAME'
+   language 'C'
+   with (isstrict);
+
+CREATE FUNCTION set_curcfg(text)
+   returns void
+   as 'MODULE_PATHNAME', 'set_curcfg_byname'
+   language 'C'
+   with (isstrict);
+
+CREATE FUNCTION show_curcfg()
+   returns oid
+   as 'MODULE_PATHNAME'
+   language 'C'
+   with (isstrict);
+
+insert into pg_ts_cfg values ('default', 'default','C');
+insert into pg_ts_cfg values ('default_russian', 'default','ru_RU.KOI8-R');
+insert into pg_ts_cfg values ('simple', 'default');
+
+copy pg_ts_cfgmap from stdin;
+default    lword   {en_stem}
+default    nlword  {simple}
+default    word    {simple}
+default    email   {simple}
+default    url {simple}
+default    host    {simple}
+default    sfloat  {simple}
+default    version {simple}
+default    part_hword  {simple}
+default    nlpart_hword    {simple}
+default    lpart_hword {en_stem}
+default    hword   {simple}
+default    lhword  {en_stem}
+default    nlhword {simple}
+default    uri {simple}
+default    file    {simple}
+default    float   {simple}
+default    int {simple}
+default    uint    {simple}
+default_russian    lword   {en_stem}
+default_russian    nlword  {ru_stem}
+default_russian    word    {ru_stem}
+default_russian    email   {simple}
+default_russian    url {simple}
+default_russian    host    {simple}
+default_russian    sfloat  {simple}
+default_russian    version {simple}
+default_russian    part_hword  {simple}
+default_russian    nlpart_hword    {ru_stem}
+default_russian    lpart_hword {en_stem}
+default_russian    hword   {ru_stem}
+default_russian    lhword  {en_stem}
+default_russian    nlhword {ru_stem}
+default_russian    uri {simple}
+default_russian    file    {simple}
+default_russian    float   {simple}
+default_russian    int {simple}
+default_russian    uint    {simple}
+simple lword   {simple}
+simple nlword  {simple}
+simple word    {simple}
+simple email   {simple}
+simple url {simple}
+simple host    {simple}
+simple sfloat  {simple}
+simple version {simple}
+simple part_hword  {simple}
+simple nlpart_hword    {simple}
+simple lpart_hword {simple}
+simple hword   {simple}
+simple lhword  {simple}
+simple nlhword {simple}
+simple uri {simple}
+simple file    {simple}
+simple float   {simple}
+simple int {simple}
+simple uint    {simple}
+\.
+
+--tsvector type
+CREATE FUNCTION tsvector_in(cstring)
+RETURNS tsvector
+AS 'MODULE_PATHNAME'
+LANGUAGE 'C' with (isstrict);
+
+CREATE FUNCTION tsvector_out(tsvector)
+RETURNS cstring
+AS 'MODULE_PATHNAME'
+LANGUAGE 'C' with (isstrict);
+
+CREATE TYPE tsvector (
+        INTERNALLENGTH = -1,
+        INPUT = tsvector_in,
+        OUTPUT = tsvector_out,
+        STORAGE = extended
+);
+
+CREATE FUNCTION length(tsvector)
+RETURNS int4
+AS 'MODULE_PATHNAME', 'tsvector_length'
+LANGUAGE 'C' with (isstrict,iscachable);
+
+CREATE FUNCTION to_tsvector(oid, text)
+RETURNS tsvector
+AS 'MODULE_PATHNAME'
+LANGUAGE 'C' with (isstrict,iscachable);
+
+CREATE FUNCTION to_tsvector(text, text)
+RETURNS tsvector
+AS 'MODULE_PATHNAME', 'to_tsvector_name'
+LANGUAGE 'C' with (isstrict,iscachable);
+
+CREATE FUNCTION to_tsvector(text)
+RETURNS tsvector
+AS 'MODULE_PATHNAME', 'to_tsvector_current'
+LANGUAGE 'C' with (isstrict,iscachable);
+
+CREATE FUNCTION strip(tsvector)
+RETURNS tsvector
+AS 'MODULE_PATHNAME'
+LANGUAGE 'C' with (isstrict,iscachable);
+
+CREATE FUNCTION setweight(tsvector,"char")
+RETURNS tsvector
+AS 'MODULE_PATHNAME'
+LANGUAGE 'C' with (isstrict,iscachable);
+
+CREATE FUNCTION concat(tsvector,tsvector)
+RETURNS tsvector
+AS 'MODULE_PATHNAME'
+LANGUAGE 'C' with (isstrict,iscachable);
+
+CREATE OPERATOR || (
+        LEFTARG = tsvector,
+        RIGHTARG = tsvector,
+        PROCEDURE = concat
+);
+
+--query type
+CREATE FUNCTION tsquery_in(cstring)
+RETURNS tsquery
+AS 'MODULE_PATHNAME'
+LANGUAGE 'C' with (isstrict);
+
+CREATE FUNCTION tsquery_out(tsquery)
+RETURNS cstring
+AS 'MODULE_PATHNAME'
+LANGUAGE 'C' with (isstrict);
+
+CREATE TYPE tsquery (
+        INTERNALLENGTH = -1,
+        INPUT = tsquery_in,
+        OUTPUT = tsquery_out
+);
+
+CREATE FUNCTION querytree(tsquery)
+RETURNS text
+AS 'MODULE_PATHNAME', 'tsquerytree'
+LANGUAGE 'C' with (isstrict);
+
+CREATE FUNCTION to_tsquery(oid, text)
+RETURNS tsquery
+AS 'MODULE_PATHNAME'
+LANGUAGE 'c' with (isstrict,iscachable);
+
+CREATE FUNCTION to_tsquery(text, text)
+RETURNS tsquery
+AS 'MODULE_PATHNAME','to_tsquery_name'
+LANGUAGE 'c' with (isstrict,iscachable);
+
+CREATE FUNCTION to_tsquery(text)
+RETURNS tsquery
+AS 'MODULE_PATHNAME','to_tsquery_current'
+LANGUAGE 'c' with (isstrict,iscachable);
+
+--operations
+CREATE FUNCTION exectsq(tsvector, tsquery)
+RETURNS bool
+AS 'MODULE_PATHNAME'
+LANGUAGE 'C' with (isstrict, iscachable);
+  
+COMMENT ON FUNCTION exectsq(tsvector, tsquery) IS 'boolean operation with text index';
+
+CREATE FUNCTION rexectsq(tsquery, tsvector)
+RETURNS bool
+AS 'MODULE_PATHNAME'
+LANGUAGE 'C' with (isstrict, iscachable);
+
+COMMENT ON FUNCTION rexectsq(tsquery, tsvector) IS 'boolean operation with text index';
+
+CREATE OPERATOR @@ (
+        LEFTARG = tsvector,
+        RIGHTARG = tsquery,
+        PROCEDURE = exectsq,
+        COMMUTATOR = '@@',
+        RESTRICT = contsel,
+        JOIN = contjoinsel
+);
+CREATE OPERATOR @@ (
+        LEFTARG = tsquery,
+        RIGHTARG = tsvector,
+        PROCEDURE = rexectsq,
+        COMMUTATOR = '@@',
+        RESTRICT = contsel,
+        JOIN = contjoinsel
+);
+
+--Trigger
+CREATE FUNCTION tsearch2()
+RETURNS trigger
+AS 'MODULE_PATHNAME'
+LANGUAGE 'C';
+
+--Relevation
+CREATE FUNCTION rank(float4[], tsvector, tsquery)
+RETURNS float4
+AS 'MODULE_PATHNAME'
+LANGUAGE 'C' WITH (isstrict, iscachable);
+
+CREATE FUNCTION rank(float4[], tsvector, tsquery, int4)
+RETURNS float4
+AS 'MODULE_PATHNAME'
+LANGUAGE 'C' WITH (isstrict, iscachable);
+
+CREATE FUNCTION rank(tsvector, tsquery)
+RETURNS float4
+AS 'MODULE_PATHNAME', 'rank_def'
+LANGUAGE 'C' WITH (isstrict, iscachable);
+
+CREATE FUNCTION rank(tsvector, tsquery, int4)
+RETURNS float4
+AS 'MODULE_PATHNAME', 'rank_def'
+LANGUAGE 'C' WITH (isstrict, iscachable);
+
+CREATE FUNCTION rank_cd(int4, tsvector, tsquery)
+RETURNS float4
+AS 'MODULE_PATHNAME'
+LANGUAGE 'C' WITH (isstrict, iscachable);
+
+CREATE FUNCTION rank_cd(int4, tsvector, tsquery, int4)
+RETURNS float4
+AS 'MODULE_PATHNAME'
+LANGUAGE 'C' WITH (isstrict, iscachable);
+
+CREATE FUNCTION rank_cd(tsvector, tsquery)
+RETURNS float4
+AS 'MODULE_PATHNAME', 'rank_cd_def'
+LANGUAGE 'C' WITH (isstrict, iscachable);
+
+CREATE FUNCTION rank_cd(tsvector, tsquery, int4)
+RETURNS float4
+AS 'MODULE_PATHNAME', 'rank_cd_def'
+LANGUAGE 'C' WITH (isstrict, iscachable);
+
+CREATE FUNCTION headline(oid, text, tsquery, text)
+RETURNS text
+AS 'MODULE_PATHNAME', 'headline'
+LANGUAGE 'C' WITH (isstrict, iscachable);
+
+CREATE FUNCTION headline(oid, text, tsquery)
+RETURNS text
+AS 'MODULE_PATHNAME', 'headline'
+LANGUAGE 'C' WITH (isstrict, iscachable);
+
+CREATE FUNCTION headline(text, text, tsquery, text)
+RETURNS text
+AS 'MODULE_PATHNAME', 'headline_byname'
+LANGUAGE 'C' WITH (isstrict, iscachable);
+
+CREATE FUNCTION headline(text, text, tsquery)
+RETURNS text
+AS 'MODULE_PATHNAME', 'headline_byname'
+LANGUAGE 'C' WITH (isstrict, iscachable);
+
+CREATE FUNCTION headline(text, tsquery, text)
+RETURNS text
+AS 'MODULE_PATHNAME', 'headline_current'
+LANGUAGE 'C' WITH (isstrict, iscachable);
+
+CREATE FUNCTION headline(text, tsquery)
+RETURNS text
+AS 'MODULE_PATHNAME', 'headline_current'
+LANGUAGE 'C' WITH (isstrict, iscachable);
+
+--GiST
+--GiST key type 
+CREATE FUNCTION gtsvector_in(cstring)
+RETURNS gtsvector
+AS 'MODULE_PATHNAME'
+LANGUAGE 'C' with (isstrict);
+
+CREATE FUNCTION gtsvector_out(gtsvector)
+RETURNS cstring
+AS 'MODULE_PATHNAME'
+LANGUAGE 'C' with (isstrict);
+
+CREATE TYPE gtsvector (
+        INTERNALLENGTH = -1,
+        INPUT = gtsvector_in,
+        OUTPUT = gtsvector_out
+);
+
+-- support FUNCTIONs
+CREATE FUNCTION gtsvector_consistent(gtsvector,internal,int4)
+RETURNS bool
+AS 'MODULE_PATHNAME'
+LANGUAGE 'C';
+  
+CREATE FUNCTION gtsvector_compress(internal)
+RETURNS internal
+AS 'MODULE_PATHNAME'
+LANGUAGE 'C';
+
+CREATE FUNCTION gtsvector_decompress(internal)
+RETURNS internal
+AS 'MODULE_PATHNAME'
+LANGUAGE 'C';
+
+CREATE FUNCTION gtsvector_penalty(internal,internal,internal)
+RETURNS internal
+AS 'MODULE_PATHNAME'
+LANGUAGE 'C' with (isstrict);
+
+CREATE FUNCTION gtsvector_picksplit(internal, internal)
+RETURNS internal
+AS 'MODULE_PATHNAME'
+LANGUAGE 'C';
+
+CREATE FUNCTION gtsvector_union(bytea, internal)
+RETURNS _int4
+AS 'MODULE_PATHNAME'
+LANGUAGE 'C';
+
+CREATE FUNCTION gtsvector_same(gtsvector, gtsvector, internal)
+RETURNS internal
+AS 'MODULE_PATHNAME'
+LANGUAGE 'C';
+
+-- CREATE the OPERATOR class
+CREATE OPERATOR CLASS gist_tsvector_ops
+DEFAULT FOR TYPE tsvector USING gist
+AS
+        OPERATOR        1       @@ (tsvector, tsquery)  RECHECK ,
+        FUNCTION        1       gtsvector_consistent (gtsvector, internal, int4),
+        FUNCTION        2       gtsvector_union (bytea, internal),
+        FUNCTION        3       gtsvector_compress (internal),
+        FUNCTION        4       gtsvector_decompress (internal),
+        FUNCTION        5       gtsvector_penalty (internal, internal, internal),
+        FUNCTION        6       gtsvector_picksplit (internal, internal),
+        FUNCTION        7       gtsvector_same (gtsvector, gtsvector, internal),
+        STORAGE         gtsvector;
+
+
+--stat info
+CREATE TYPE statinfo 
+   as (word text, ndoc int4, nentry int4);
+
+--REATE FUNCTION tsstat_in(cstring)
+--RETURNS tsstat
+--AS 'MODULE_PATHNAME'
+--LANGUAGE 'C' with (isstrict);
+--
+--CREATE FUNCTION tsstat_out(tsstat)
+--RETURNS cstring
+--AS 'MODULE_PATHNAME'
+--LANGUAGE 'C' with (isstrict);
+--
+--CREATE TYPE tsstat (
+--        INTERNALLENGTH = -1,
+--        INPUT = tsstat_in,
+--        OUTPUT = tsstat_out,
+--        STORAGE = plain
+--);
+--
+--CREATE FUNCTION ts_accum(tsstat,tsvector)
+--RETURNS tsstat
+--AS 'MODULE_PATHNAME'
+--LANGUAGE 'C' with (isstrict);
+--
+--CREATE FUNCTION ts_accum_finish(tsstat)
+-- returns setof statinfo
+-- as 'MODULE_PATHNAME'
+-- language 'C'
+-- with (isstrict);
+--
+--CREATE AGGREGATE stat (
+-- BASETYPE=tsvector,
+-- SFUNC=ts_accum,
+-- STYPE=tsstat,
+-- FINALFUNC = ts_accum_finish,
+-- initcond = ''
+--); 
+
+CREATE FUNCTION stat(text)
+   returns setof statinfo
+   as 'MODULE_PATHNAME', 'ts_stat'
+   language 'C'
+   with (isstrict);
+
+--reset - just for debuging
+CREATE FUNCTION reset_tsearch()
+        returns void
+        as 'MODULE_PATHNAME'
+        language 'C'
+        with (isstrict);
+
+--get cover (debug for rank_cd)
+CREATE FUNCTION get_covers(tsvector,tsquery)
+        returns text
+        as 'MODULE_PATHNAME'
+        language 'C'
+        with (isstrict);
+
+
+--example of ISpell dictionary
+--update pg_ts_dict set dict_initoption='DictFile="/usr/local/share/ispell/russian.dict" ,AffFile ="/usr/local/share/ispell/russian.aff", StopFile="/usr/local/share/ispell/russian.stop"' where dict_id=4;
+--example of synonym dict
+--update pg_ts_dict set dict_initoption='/usr/local/share/ispell/english.syn' where dict_id=5;
+END;


diff --git a/contrib/tsearch2/tsvector.c b/contrib/tsearch2/tsvector.c

new file mode 100644 (file)

index 0000000..ff0794d


--- /dev/null
+++ b/contrib/tsearch2/tsvector.c
@@ -0,0 +1,804 @@
+/*
+ * In/Out definitions for tsvector type
+ * Internal structure:
+ * string of values, array of position lexem in string and it's length
+ * Teodor Sigaev 
+ */
+#include "postgres.h"
+
+#include "access/gist.h"
+#include "access/itup.h"
+#include "utils/elog.h"
+#include "utils/palloc.h"
+#include "utils/builtins.h"
+#include "storage/bufpage.h"
+#include "executor/spi.h"
+#include "commands/trigger.h"
+#include "nodes/pg_list.h"
+#include "catalog/namespace.h"
+
+#include "utils/pg_locale.h"
+
+#include              /* tolower */
+#include "tsvector.h"
+#include "query.h"
+#include "ts_cfg.h"
+#include "common.h"
+
+PG_FUNCTION_INFO_V1(tsvector_in);
+Datum      tsvector_in(PG_FUNCTION_ARGS);
+
+PG_FUNCTION_INFO_V1(tsvector_out);
+Datum      tsvector_out(PG_FUNCTION_ARGS);
+
+PG_FUNCTION_INFO_V1(to_tsvector);
+Datum      to_tsvector(PG_FUNCTION_ARGS);
+PG_FUNCTION_INFO_V1(to_tsvector_current);
+Datum      to_tsvector_current(PG_FUNCTION_ARGS);
+PG_FUNCTION_INFO_V1(to_tsvector_name);
+Datum      to_tsvector_name(PG_FUNCTION_ARGS);
+
+PG_FUNCTION_INFO_V1(tsearch2);
+Datum      tsearch2(PG_FUNCTION_ARGS);
+
+PG_FUNCTION_INFO_V1(tsvector_length);
+Datum      tsvector_length(PG_FUNCTION_ARGS);
+
+/*
+ * in/out text index type
+ */
+static int 
+comparePos(const void *a, const void *b) {
+   if ( ((WordEntryPos *) a)->pos == ((WordEntryPos *) b)->pos )
+       return 1;
+   return ( ((WordEntryPos *) a)->pos > ((WordEntryPos *) b)->pos ) ? 1 : -1;
+}
+
+static int
+uniquePos(WordEntryPos *a, int4 l) {
+   WordEntryPos *ptr, *res;
+
+   res=a;
+   if (l==1)
+       return l;
+
+   qsort((void *) a, l, sizeof(WordEntryPos), comparePos);
+
+   ptr = a + 1;
+   while (ptr - a < l) {
+       if ( ptr->pos != res->pos ) {
+           res++;
+           res->pos = ptr->pos;
+           res->weight = ptr->weight;
+           if ( res-a >= MAXNUMPOS-1 || res->pos == MAXENTRYPOS-1 )
+               break;
+       } else if ( ptr->weight > res->weight )
+           res->weight = ptr->weight;
+       ptr++;
+   }
+   return res + 1 - a;
+}
+
+static char *BufferStr;
+static int
+compareentry(const void *a, const void *b)
+{
+   if ( ((WordEntryIN *) a)->entry.len == ((WordEntryIN *) b)->entry.len)
+   {
+       return strncmp(
+                      &BufferStr[((WordEntryIN *) a)->entry.pos],
+                      &BufferStr[((WordEntryIN *) b)->entry.pos],
+                      ((WordEntryIN *) a)->entry.len);
+   }
+   return ( ((WordEntryIN *) a)->entry.len > ((WordEntryIN *) b)->entry.len ) ? 1 : -1;
+}
+
+static int
+uniqueentry(WordEntryIN * a, int4 l, char *buf, int4 *outbuflen)
+{
+   WordEntryIN  *ptr,
+              *res;
+
+   res = a;
+   if (l == 1) {
+       if ( a->entry.haspos ) {
+           *(uint16*)(a->pos) = uniquePos( &(a->pos[1]), *(uint16*)(a->pos));
+           *outbuflen = SHORTALIGN(res->entry.len) + (*(uint16*)(a->pos) +1 )*sizeof(WordEntryPos);
+       }
+       return l;
+   }
+
+   ptr = a + 1;
+   BufferStr = buf;
+   qsort((void *) a, l, sizeof(WordEntryIN), compareentry);
+
+   while (ptr - a < l)
+   {
+       if (!(ptr->entry.len == res->entry.len &&
+             strncmp(&buf[ptr->entry.pos], &buf[res->entry.pos], res->entry.len) == 0))
+       {
+           if ( res->entry.haspos ) {
+               *(uint16*)(res->pos) = uniquePos( &(res->pos[1]), *(uint16*)(res->pos));
+               *outbuflen += *(uint16*)(res->pos) * sizeof(WordEntryPos);
+           }
+           *outbuflen += SHORTALIGN(res->entry.len);
+           res++;
+           memcpy(res,ptr,sizeof(WordEntryIN));
+       } else if ( ptr->entry.haspos ){
+           if ( res->entry.haspos ) {
+               int4 len=*(uint16*)(ptr->pos) + 1 + *(uint16*)(res->pos);
+               res->pos=(WordEntryPos*)repalloc( res->pos, len*sizeof(WordEntryPos));
+               memcpy( &(res->pos[ *(uint16*)(res->pos) + 1 ]), 
+                   &(ptr->pos[1]), *(uint16*)(ptr->pos) * sizeof(WordEntryPos));
+               *(uint16*)(res->pos) += *(uint16*)(ptr->pos);
+               pfree( ptr->pos );
+           } else {
+               res->entry.haspos=1;
+               res->pos = ptr->pos;
+           }
+       }
+       ptr++;
+   }
+   if ( res->entry.haspos ) {
+       *(uint16*)(res->pos) = uniquePos( &(res->pos[1]), *(uint16*)(res->pos));
+       *outbuflen += *(uint16*)(res->pos) * sizeof(WordEntryPos);
+   }
+   *outbuflen += SHORTALIGN(res->entry.len);
+
+   return res + 1 - a;
+}
+
+#define WAITWORD   1
+#define WAITENDWORD 2
+#define WAITNEXTCHAR   3
+#define WAITENDCMPLX   4
+#define WAITPOSINFO    5
+#define INPOSINFO  6
+#define WAITPOSDELIM   7
+
+#define RESIZEPRSBUF \
+do { \
+   if ( state->curpos - state->word + 1 >= state->len ) \
+   { \
+       int4 clen = state->curpos - state->word; \
+       state->len *= 2; \
+       state->word = (char*)repalloc( (void*)state->word, state->len ); \
+       state->curpos = state->word + clen; \
+   } \
+} while (0)
+
+int4
+gettoken_tsvector(TI_IN_STATE * state)
+{
+   int4        oldstate = 0;
+
+   state->curpos = state->word;
+   state->state = WAITWORD;
+   state->alen=0;
+
+   while (1)
+   {
+       if (state->state == WAITWORD)
+       {
+           if (*(state->prsbuf) == '\0')
+               return 0;
+           else if (*(state->prsbuf) == '\'')
+               state->state = WAITENDCMPLX;
+           else if (*(state->prsbuf) == '\\')
+           {
+               state->state = WAITNEXTCHAR;
+               oldstate = WAITENDWORD;
+           }
+           else if (state->oprisdelim && ISOPERATOR(*(state->prsbuf)))
+               elog(ERROR, "Syntax error");
+           else if (*(state->prsbuf) != ' ')
+           {
+               *(state->curpos) = *(state->prsbuf);
+               state->curpos++;
+               state->state = WAITENDWORD;
+           }
+       }
+       else if (state->state == WAITNEXTCHAR)
+       {
+           if (*(state->prsbuf) == '\0')
+               elog(ERROR, "There is no escaped character");
+           else
+           {
+               RESIZEPRSBUF;
+               *(state->curpos) = *(state->prsbuf);
+               state->curpos++;
+               state->state = oldstate;
+           }
+       }
+       else if (state->state == WAITENDWORD)
+       {
+           if (*(state->prsbuf) == '\\')
+           {
+               state->state = WAITNEXTCHAR;
+               oldstate = WAITENDWORD;
+           }
+           else if (*(state->prsbuf) == ' ' || *(state->prsbuf) == '\0' ||
+                    (state->oprisdelim && ISOPERATOR(*(state->prsbuf))))
+           {
+               RESIZEPRSBUF;
+               if (state->curpos == state->word)
+                   elog(ERROR, "Syntax error");
+               *(state->curpos) = '\0';
+               return 1; 
+           } else if ( *(state->prsbuf) == ':' ) {
+               if (state->curpos == state->word)
+                   elog(ERROR, "Syntax error");
+               *(state->curpos) = '\0';
+               if ( state->oprisdelim )
+                   return 1;
+               else
+                   state->state = INPOSINFO;
+           }
+           else
+           {
+               RESIZEPRSBUF;
+               *(state->curpos) = *(state->prsbuf);
+               state->curpos++;
+           }
+       }
+       else if (state->state == WAITENDCMPLX)
+       {
+           if (*(state->prsbuf) == '\'')
+           {
+               RESIZEPRSBUF;
+               *(state->curpos) = '\0';
+               if (state->curpos == state->word)
+                   elog(ERROR, "Syntax error");
+               if ( state->oprisdelim ) {
+                   state->prsbuf++;
+                   return 1;
+               } else
+                   state->state = WAITPOSINFO;
+           }
+           else if (*(state->prsbuf) == '\\')
+           {
+               state->state = WAITNEXTCHAR;
+               oldstate = WAITENDCMPLX;
+           }
+           else if (*(state->prsbuf) == '\0')
+               elog(ERROR, "Syntax error");
+           else
+           {
+               RESIZEPRSBUF;
+               *(state->curpos) = *(state->prsbuf);
+               state->curpos++;
+           }
+       } else if (state->state == WAITPOSINFO) {
+           if ( *(state->prsbuf) == ':' )
+               state->state=INPOSINFO;
+           else
+               return 1;
+       } else if (state->state == INPOSINFO) {
+           if ( isdigit(*(state->prsbuf)) ) {
+               if ( state->alen==0 ) {
+                   state->alen=4;
+                   state->pos = (WordEntryPos*)palloc( sizeof(WordEntryPos)*state->alen );
+                   *(uint16*)(state->pos)=0;
+               } else if ( *(uint16*)(state->pos) +1 >= state->alen ) {
+                   state->alen *= 2; 
+                   state->pos = (WordEntryPos*)repalloc( state->pos, sizeof(WordEntryPos)*state->alen );
+               }
+               (  *(uint16*)(state->pos) )++;
+               state->pos[ *(uint16*)(state->pos) ].pos = LIMITPOS(atoi(state->prsbuf));
+               if ( state->pos[ *(uint16*)(state->pos) ].pos == 0 )
+                   elog(ERROR,"Wrong position info");
+               state->pos[ *(uint16*)(state->pos) ].weight = 0;
+               state->state = WAITPOSDELIM;
+           } else
+               elog(ERROR,"Syntax error");
+       } else if (state->state == WAITPOSDELIM) {
+           if ( *(state->prsbuf) == ',' ) {
+               state->state = INPOSINFO;
+           } else if ( tolower(*(state->prsbuf)) == 'a' || *(state->prsbuf)=='*' ) {
+               if ( state->pos[ *(uint16*)(state->pos) ].weight )
+                   elog(ERROR,"Syntax error");
+               state->pos[ *(uint16*)(state->pos) ].weight = 3;
+           } else if ( tolower(*(state->prsbuf)) == 'b' ) {
+               if ( state->pos[ *(uint16*)(state->pos) ].weight )
+                   elog(ERROR,"Syntax error");
+               state->pos[ *(uint16*)(state->pos) ].weight = 2;
+           } else if ( tolower(*(state->prsbuf)) == 'c' ) {
+               if ( state->pos[ *(uint16*)(state->pos) ].weight )
+                   elog(ERROR,"Syntax error");
+               state->pos[ *(uint16*)(state->pos) ].weight = 1;
+           } else if ( tolower(*(state->prsbuf)) == 'd' ) {
+               if ( state->pos[ *(uint16*)(state->pos) ].weight )
+                   elog(ERROR,"Syntax error");
+               state->pos[ *(uint16*)(state->pos) ].weight = 0;
+           } else if ( isspace(*(state->prsbuf)) || *(state->prsbuf) == '\0' ) {
+               return 1;
+           } else if ( !isdigit(*(state->prsbuf)) )
+               elog(ERROR,"Syntax error");
+       } else
+           elog(ERROR, "Inner bug :(");
+       state->prsbuf++;
+   }
+
+   return 0;
+}
+
+Datum
+tsvector_in(PG_FUNCTION_ARGS)
+{
+   char       *buf = PG_GETARG_CSTRING(0);
+   TI_IN_STATE state;
+   WordEntryIN  *arr;
+   WordEntry  *inarr;
+   int4        len = 0,
+               totallen = 64;
+   tsvector       *in;
+   char       *tmpbuf,
+              *cur;
+   int4        i,
+               buflen = 256;
+
+   state.prsbuf = buf;
+   state.len = 32;
+   state.word = (char *) palloc(state.len);
+   state.oprisdelim = false;
+
+   arr = (WordEntryIN *) palloc(sizeof(WordEntryIN) * totallen);
+   cur = tmpbuf = (char *) palloc(buflen);
+   while (gettoken_tsvector(&state))
+   {
+       if (len >= totallen)
+       {
+           totallen *= 2;
+           arr = (WordEntryIN *) repalloc((void *) arr, sizeof(WordEntryIN) * totallen);
+       }
+       while ((cur - tmpbuf) + (state.curpos - state.word) >= buflen)
+       {
+           int4        dist = cur - tmpbuf;
+
+           buflen *= 2;
+           tmpbuf = (char *) repalloc((void *) tmpbuf, buflen);
+           cur = tmpbuf + dist;
+       }
+       if (state.curpos - state.word >= MAXSTRLEN)
+           elog(ERROR, "Word is too long");
+       arr[len].entry.len= state.curpos - state.word;
+       if (cur - tmpbuf > MAXSTRPOS)
+           elog(ERROR, "Too long value");
+       arr[len].entry.pos=cur - tmpbuf;
+       memcpy((void *) cur, (void *) state.word, arr[len].entry.len);
+       cur += arr[len].entry.len;
+       if ( state.alen ) {
+           arr[len].entry.haspos=1;
+           arr[len].pos = state.pos;
+       } else
+           arr[len].entry.haspos=0;
+       len++;
+   }
+   pfree(state.word);
+
+   if ( len > 0 )
+       len = uniqueentry(arr, len, tmpbuf, &buflen);
+   totallen = CALCDATASIZE(len, buflen);
+   in = (tsvector *) palloc(totallen);
+   memset(in,0,totallen);
+   in->len = totallen;
+   in->size = len;
+   cur = STRPTR(in);
+   inarr = ARRPTR(in);
+   for (i = 0; i < len; i++)
+   {
+       memcpy((void *) cur, (void *) &tmpbuf[arr[i].entry.pos], arr[i].entry.len);
+       arr[i].entry.pos=cur - STRPTR(in);
+       cur += SHORTALIGN(arr[i].entry.len);
+       if ( arr[i].entry.haspos ) {
+           memcpy( cur, arr[i].pos, (*(uint16*)arr[i].pos + 1) * sizeof(WordEntryPos));
+           cur +=  (*(uint16*)arr[i].pos + 1) * sizeof(WordEntryPos);
+           pfree( arr[i].pos ); 
+       }
+       memcpy( &(inarr[i]), &(arr[i].entry), sizeof(WordEntry) );
+   }
+   pfree(tmpbuf);
+   pfree(arr);
+   PG_RETURN_POINTER(in);
+}
+
+Datum
+tsvector_length(PG_FUNCTION_ARGS)
+{
+   tsvector       *in = (tsvector *) PG_DETOAST_DATUM(PG_GETARG_DATUM(0));
+   int4        ret = in->size;
+
+   PG_FREE_IF_COPY(in, 0);
+   PG_RETURN_INT32(ret);
+}
+
+Datum
+tsvector_out(PG_FUNCTION_ARGS)
+{
+   tsvector       *out = (tsvector *) PG_DETOAST_DATUM(PG_GETARG_DATUM(0));
+   char       *outbuf;
+   int4        i,
+               j,
+               lenbuf = 0, pp;
+   WordEntry  *ptr = ARRPTR(out);
+   char       *curin,
+              *curout;
+
+       lenbuf=out->size * 2 /* '' */ + out->size - 1 /* space */ + 2 /*\0*/;
+       for (i = 0; i < out->size; i++) {
+               lenbuf += ptr[i].len*2 /*for escape */;
+               if ( ptr[i].haspos )
+                       lenbuf += 7*POSDATALEN(out, &(ptr[i]));
+       }
+
+   curout = outbuf = (char *) palloc(lenbuf);
+   for (i = 0; i < out->size; i++)
+   {
+       curin = STRPTR(out)+ptr->pos;
+       if (i != 0)
+           *curout++ = ' ';
+       *curout++ = '\'';
+       j = ptr->len;
+       while (j--)
+       {
+           if (*curin == '\'')
+           {
+               int4        pos = curout - outbuf;
+
+               outbuf = (char *) repalloc((void *) outbuf, ++lenbuf);
+               curout = outbuf + pos;
+               *curout++ = '\\';
+           }
+           *curout++ = *curin++;
+       }
+       *curout++ = '\'';
+       if ( (pp=POSDATALEN(out,ptr)) != 0 ) {
+           WordEntryPos *wptr;
+           *curout++ = ':';
+           wptr=POSDATAPTR(out,ptr);
+           while(pp) {
+               sprintf(curout,"%d",wptr->pos);
+               curout=strchr(curout,'\0');
+               switch( wptr->weight ) {
+                   case 3:   *curout++ = 'A'; break;
+                   case 2:   *curout++ = 'B'; break;
+                   case 1:   *curout++ = 'C'; break;
+                   case 0: 
+                   default: break;
+               }
+               if ( pp>1 )     *curout++ = ',';
+               pp--; wptr++;
+           }
+       }
+       ptr++;
+   }
+   *curout='\0';
+   outbuf[lenbuf - 1] = '\0';
+   PG_FREE_IF_COPY(out, 0);
+   PG_RETURN_POINTER(outbuf);
+}
+
+static int
+compareWORD(const void *a, const void *b)
+{
+   if (((WORD *) a)->len == ((WORD *) b)->len) {
+       int res = strncmp(
+                      ((WORD *) a)->word,
+                      ((WORD *) b)->word,
+                      ((WORD *) b)->len);
+       if ( res==0 ) 
+           return ( ((WORD *) a)->pos.pos > ((WORD *) b)->pos.pos ) ? 1 : -1;
+       return res;
+   }
+   return (((WORD *) a)->len > ((WORD *) b)->len) ? 1 : -1;
+}
+
+static int
+uniqueWORD(WORD * a, int4 l)
+{
+   WORD       *ptr,
+              *res;
+   int tmppos;
+
+   if (l == 1) {
+       tmppos=LIMITPOS(a->pos.pos);
+       a->alen=2;
+       a->pos.apos=(uint16*)palloc( sizeof(uint16)*a->alen );
+       a->pos.apos[0]=1;
+       a->pos.apos[1]=tmppos;
+       return l;
+   }
+
+   res = a;
+   ptr = a + 1;
+
+   qsort((void *) a, l, sizeof(WORD), compareWORD);
+   tmppos=LIMITPOS(a->pos.pos);
+   a->alen=2;
+   a->pos.apos=(uint16*)palloc( sizeof(uint16)*a->alen );
+   a->pos.apos[0]=1;
+   a->pos.apos[1]=tmppos;
+
+   while (ptr - a < l)
+   {
+       if (!(ptr->len == res->len &&
+             strncmp(ptr->word, res->word, res->len) == 0))
+       {
+           res++;
+           res->len = ptr->len;
+           res->word = ptr->word;
+           tmppos=LIMITPOS(ptr->pos.pos);
+           res->alen=2;
+           res->pos.apos=(uint16*)palloc( sizeof(uint16)*res->alen );
+           res->pos.apos[0]=1;
+           res->pos.apos[1]=tmppos;
+       } else {
+           pfree(ptr->word);
+           if ( res->pos.apos[0] < MAXNUMPOS-1 && res->pos.apos[ res->pos.apos[0] ] != MAXENTRYPOS-1 ) {
+               if ( res->pos.apos[0]+1 >= res->alen ) {
+                   res->alen*=2;
+                   res->pos.apos=(uint16*)repalloc( res->pos.apos, sizeof(uint16)*res->alen );
+               }
+               res->pos.apos[ res->pos.apos[0]+1 ] = LIMITPOS(ptr->pos.pos);
+               res->pos.apos[0]++; 
+           }
+       }
+       ptr++;
+   }
+
+   return res + 1 - a;
+}
+
+/*
+ * make value of tsvector
+ */
+static tsvector *
+makevalue(PRSTEXT * prs)
+{
+   int4        i,j,
+               lenstr = 0,
+               totallen;
+   tsvector       *in;
+   WordEntry  *ptr;
+   char       *str,
+              *cur;
+
+   prs->curwords = uniqueWORD(prs->words, prs->curwords);
+   for (i = 0; i < prs->curwords; i++) {
+       lenstr += SHORTALIGN(prs->words[i].len);
+
+       if ( prs->words[i].alen )
+           lenstr += sizeof(uint16) + prs->words[i].pos.apos[0] * sizeof(WordEntryPos);
+   }
+
+   totallen = CALCDATASIZE(prs->curwords, lenstr);
+   in = (tsvector *) palloc(totallen);
+   memset(in,0,totallen);  
+   in->len = totallen;
+   in->size = prs->curwords;
+
+   ptr = ARRPTR(in);
+   cur = str = STRPTR(in);
+   for (i = 0; i < prs->curwords; i++)
+   {
+       ptr->len = prs->words[i].len;
+       if (cur - str > MAXSTRPOS)
+           elog(ERROR, "Value is too big");
+       ptr->pos= cur - str;
+       memcpy((void *) cur, (void *) prs->words[i].word, prs->words[i].len);
+       pfree(prs->words[i].word);
+       cur += SHORTALIGN(prs->words[i].len);
+       if ( prs->words[i].alen ) {
+           WordEntryPos *wptr;
+           
+           ptr->haspos=1;
+           *(uint16*)cur = prs->words[i].pos.apos[0];
+           wptr=POSDATAPTR(in,ptr);
+           for(j=0;j<*(uint16*)cur;j++) {
+               wptr[j].weight=0;
+               wptr[j].pos=prs->words[i].pos.apos[j+1];
+           }
+           cur += sizeof(uint16) + prs->words[i].pos.apos[0] * sizeof(WordEntryPos);
+           pfree(prs->words[i].pos.apos);
+       } else
+           ptr->haspos=0;
+       ptr++;
+   }
+   pfree(prs->words);
+   return in;
+}
+
+
+Datum
+to_tsvector(PG_FUNCTION_ARGS)
+{
+   text       *in = PG_GETARG_TEXT_P(1);
+   PRSTEXT     prs;
+   tsvector       *out = NULL;
+   TSCfgInfo *cfg=findcfg(PG_GETARG_INT32(0)); 
+
+   prs.lenwords = 32;
+   prs.curwords = 0;
+   prs.pos = 0;
+   prs.words = (WORD *) palloc(sizeof(WORD) * prs.lenwords);
+   
+   parsetext_v2(cfg, &prs, VARDATA(in), VARSIZE(in) - VARHDRSZ);
+   PG_FREE_IF_COPY(in, 1);
+
+   if (prs.curwords)
+       out = makevalue(&prs);
+   else {
+       pfree(prs.words);
+       out = palloc(CALCDATASIZE(0,0));
+       out->len = CALCDATASIZE(0,0);
+       out->size = 0;
+   } 
+   PG_RETURN_POINTER(out);
+}
+
+Datum
+to_tsvector_name(PG_FUNCTION_ARGS) {
+   text       *cfg=PG_GETARG_TEXT_P(0);
+   Datum res = DirectFunctionCall3(
+       to_tsvector,
+       Int32GetDatum( name2id_cfg( cfg ) ),
+       PG_GETARG_DATUM(1),
+       (Datum)0
+   );
+   PG_FREE_IF_COPY(cfg,0);
+   PG_RETURN_DATUM(res);   
+}
+
+Datum
+to_tsvector_current(PG_FUNCTION_ARGS) {
+   Datum res = DirectFunctionCall3(
+       to_tsvector,
+       Int32GetDatum( get_currcfg() ),
+       PG_GETARG_DATUM(0),
+       (Datum)0
+   );
+   PG_RETURN_DATUM(res);   
+}
+
+static Oid
+findFunc(char *fname) {
+   FuncCandidateList clist,ptr;
+   Oid funcid = InvalidOid;
+   List *names=makeList1(makeString(fname));
+
+   ptr = clist = FuncnameGetCandidates(names, 1);
+   freeList(names);
+
+   if ( !ptr )
+       return funcid;
+
+   while(ptr) {
+       if ( ptr->args[0] == TEXTOID && funcid == InvalidOid )
+           funcid=ptr->oid;
+       clist=ptr->next;
+       pfree(ptr);
+       ptr=clist;
+   }
+
+   return funcid;
+}
+
+/*
+ * Trigger
+ */
+Datum
+tsearch2(PG_FUNCTION_ARGS)
+{
+   TriggerData *trigdata;
+   Trigger    *trigger;
+   Relation    rel;
+   HeapTuple   rettuple = NULL;
+   TSCfgInfo *cfg=findcfg(get_currcfg()); 
+   int         numidxattr,
+               i;
+   PRSTEXT     prs;
+   Datum       datum = (Datum) 0;
+   Oid     funcoid = InvalidOid;
+
+   if (!CALLED_AS_TRIGGER(fcinfo))
+       elog(ERROR, "TSearch: Not fired by trigger manager");
+
+   trigdata = (TriggerData *) fcinfo->context;
+   if (TRIGGER_FIRED_FOR_STATEMENT(trigdata->tg_event))
+       elog(ERROR, "TSearch: Can't process STATEMENT events");
+   if (TRIGGER_FIRED_AFTER(trigdata->tg_event))
+       elog(ERROR, "TSearch: Must be fired BEFORE event");
+
+   if (TRIGGER_FIRED_BY_INSERT(trigdata->tg_event))
+       rettuple = trigdata->tg_trigtuple;
+   else if (TRIGGER_FIRED_BY_UPDATE(trigdata->tg_event))
+       rettuple = trigdata->tg_newtuple;
+   else
+       elog(ERROR, "TSearch: Unknown event");
+
+   trigger = trigdata->tg_trigger;
+   rel = trigdata->tg_relation;
+
+   if (trigger->tgnargs < 2)
+       elog(ERROR, "TSearch: format tsearch2(tsvector_field, text_field1,...)");
+
+   numidxattr = SPI_fnumber(rel->rd_att, trigger->tgargs[0]);
+   if (numidxattr == SPI_ERROR_NOATTRIBUTE)
+       elog(ERROR, "TSearch: Can not find tsvector_field");
+
+   prs.lenwords = 32;
+   prs.curwords = 0;
+   prs.pos = 0;
+   prs.words = (WORD *) palloc(sizeof(WORD) * prs.lenwords);
+
+   /* find all words in indexable column */
+   for (i = 1; i < trigger->tgnargs; i++)
+   {
+       int         numattr;
+       Oid         oidtype;
+       Datum       txt_toasted;
+       bool        isnull;
+       text       *txt;
+
+       numattr = SPI_fnumber(rel->rd_att, trigger->tgargs[i]);
+       if (numattr == SPI_ERROR_NOATTRIBUTE)
+       {
+           funcoid=findFunc(trigger->tgargs[i]);
+           if ( funcoid==InvalidOid )
+               elog(ERROR,"TSearch: can't find function or field '%s'",trigger->tgargs[i]);
+           continue;
+       }
+       oidtype = SPI_gettypeid(rel->rd_att, numattr);
+       /* We assume char() and varchar() are binary-equivalent to text */
+       if (!(oidtype == TEXTOID ||
+             oidtype == VARCHAROID ||
+             oidtype == BPCHAROID))
+       {
+           elog(WARNING, "TSearch: '%s' is not of character type",
+                trigger->tgargs[i]);
+           continue;
+       }
+       txt_toasted = SPI_getbinval(rettuple, rel->rd_att, numattr, &isnull);
+       if (isnull)
+           continue;
+
+       if ( funcoid!=InvalidOid ) {
+           text *txttmp = (text *) DatumGetPointer( OidFunctionCall1(
+               funcoid,
+               PointerGetDatum(txt_toasted)
+           ));
+           txt = (text *) DatumGetPointer(PG_DETOAST_DATUM(PointerGetDatum(txttmp)));
+           if ( txt == txttmp )
+               txt_toasted = PointerGetDatum(txt);
+       } else
+            txt = (text *) DatumGetPointer(PG_DETOAST_DATUM(PointerGetDatum(txt_toasted)));
+
+       parsetext_v2(cfg, &prs, VARDATA(txt), VARSIZE(txt) - VARHDRSZ);
+       if (txt != (text*)DatumGetPointer(txt_toasted) )
+           pfree(txt);
+   }
+
+   /* make tsvector value */
+   if (prs.curwords)
+   {
+       datum = PointerGetDatum(makevalue(&prs));
+       rettuple = SPI_modifytuple(rel, rettuple, 1, &numidxattr,
+                                  &datum, NULL);
+       pfree(DatumGetPointer(datum));
+   }
+   else
+   {
+       tsvector *out = palloc(CALCDATASIZE(0,0));
+       out->len = CALCDATASIZE(0,0);
+       out->size = 0;
+       datum = PointerGetDatum(out);
+       pfree(prs.words);
+       rettuple = SPI_modifytuple(rel, rettuple, 1, &numidxattr,
+                                  &datum, NULL);
+   }
+
+   if (rettuple == NULL)
+       elog(ERROR, "TSearch: %d returned by SPI_modifytuple", SPI_result);
+
+   return PointerGetDatum(rettuple);
+}


diff --git a/contrib/tsearch2/tsvector.h b/contrib/tsearch2/tsvector.h

new file mode 100644 (file)

index 0000000..31e6a4b


--- /dev/null
+++ b/contrib/tsearch2/tsvector.h
@@ -0,0 +1,71 @@
+#ifndef __TXTIDX_H__
+#define __TXTIDX_H__
+
+/*
+#define TXTIDX_DEBUG
+*/
+
+#include "postgres.h"
+
+#include "access/gist.h"
+#include "access/itup.h"
+#include "utils/elog.h"
+#include "utils/palloc.h"
+#include "utils/builtins.h"
+#include "storage/bufpage.h"
+
+typedef struct {
+   uint32
+       haspos:1,
+       len:11, /* MAX 2Kb */
+       pos:20; /* MAX 1Mb */
+}  WordEntry;
+#define MAXSTRLEN ( 1<<11 )
+#define MAXSTRPOS ( 1<<20 )
+
+typedef struct {
+   uint16
+       weight:2,
+       pos:14;
+} WordEntryPos;
+#define MAXENTRYPOS    (1<<14)
+#define MAXNUMPOS  256
+#define LIMITPOS(x)    ( ( (x) >= MAXENTRYPOS ) ? (MAXENTRYPOS-1) : (x) )
+
+typedef struct
+{
+   int4        len;
+   int4        size;
+   char        data[1];
+}  tsvector;
+
+#define DATAHDRSIZE (sizeof(int4)*2)
+#define CALCDATASIZE(x, lenstr) ( x * sizeof(WordEntry) + DATAHDRSIZE + lenstr )
+#define ARRPTR(x)  ( (WordEntry*) ( (char*)x + DATAHDRSIZE ) )
+#define STRPTR(x)  ( (char*)x + DATAHDRSIZE + ( sizeof(WordEntry) * ((tsvector*)x)->size ) )
+#define STRSIZE(x) ( ((tsvector*)x)->len - DATAHDRSIZE - ( sizeof(WordEntry) * ((tsvector*)x)->size ) )
+#define _POSDATAPTR(x,e)   (STRPTR(x)+((WordEntry*)(e))->pos+SHORTALIGN(((WordEntry*)(e))->len))
+#define POSDATALEN(x,e) ( ( ((WordEntry*)(e))->haspos ) ? (*(uint16*)_POSDATAPTR(x,e)) : 0 ) 
+#define POSDATAPTR(x,e)    ( (WordEntryPos*)( _POSDATAPTR(x,e)+sizeof(uint16) ) )
+
+
+typedef struct {
+   WordEntry   entry;
+   WordEntryPos    *pos;
+}  WordEntryIN;
+
+typedef struct
+{
+   char       *prsbuf;
+   char       *word;
+   char       *curpos;
+   int4        len;
+   int4        state;
+   int4        alen;
+   WordEntryPos    *pos;
+   bool        oprisdelim;
+}  TI_IN_STATE;
+
+int4       gettoken_tsvector(TI_IN_STATE * state);
+
+#endif


diff --git a/contrib/tsearch2/tsvector_op.c b/contrib/tsearch2/tsvector_op.c

new file mode 100644 (file)

index 0000000..3f38014


--- /dev/null
+++ b/contrib/tsearch2/tsvector_op.c
@@ -0,0 +1,264 @@
+/*
+ * Operations for tsvector type
+ * Teodor Sigaev 
+ */
+#include "postgres.h"
+
+#include "access/gist.h"
+#include "access/itup.h"
+#include "utils/elog.h"
+#include "utils/palloc.h"
+#include "utils/builtins.h"
+#include "storage/bufpage.h"
+#include "executor/spi.h"
+#include "commands/trigger.h"
+#include "nodes/pg_list.h"
+#include "catalog/namespace.h"
+
+#include "utils/pg_locale.h"
+
+#include              /* tolower */
+#include "tsvector.h"
+#include "query.h"
+#include "ts_cfg.h"
+#include "common.h"
+
+PG_FUNCTION_INFO_V1(strip);
+Datum      strip(PG_FUNCTION_ARGS);
+
+PG_FUNCTION_INFO_V1(setweight);
+Datum      setweight(PG_FUNCTION_ARGS);
+
+PG_FUNCTION_INFO_V1(concat);
+Datum      concat(PG_FUNCTION_ARGS);
+
+Datum
+strip(PG_FUNCTION_ARGS)
+{
+   tsvector       *in = (tsvector *) PG_DETOAST_DATUM(PG_GETARG_DATUM(0));
+   tsvector    *out;
+   int i,len=0;
+   WordEntry *arrin=ARRPTR(in), *arrout;
+   char *cur;
+
+   for(i=0;isize;i++) 
+       len += SHORTALIGN( arrin[i].len );
+
+   len = CALCDATASIZE(in->size, len);
+   out=(tsvector*)palloc(len);
+   memset(out,0,len);
+   out->len=len;
+   out->size=in->size;
+   arrout=ARRPTR(out);
+   cur=STRPTR(out);
+   for(i=0;isize;i++) {
+       memcpy(cur, STRPTR(in)+arrin[i].pos, arrin[i].len);
+       arrout[i].haspos = 0;
+       arrout[i].len = arrin[i].len;
+       arrout[i].pos = cur - STRPTR(out);
+       cur += SHORTALIGN( arrout[i].len );
+   }
+       
+   PG_FREE_IF_COPY(in, 0);
+   PG_RETURN_POINTER(out);
+}
+
+Datum
+setweight(PG_FUNCTION_ARGS)
+{
+   tsvector       *in = (tsvector *) PG_DETOAST_DATUM(PG_GETARG_DATUM(0));
+   char       cw = PG_GETARG_CHAR(1);
+   tsvector    *out;
+   int i,j;
+   WordEntry *entry;
+   WordEntryPos *p;
+   int w=0;
+
+   switch(tolower(cw)) {
+       case 'a': w=3; break;
+       case 'b': w=2; break;
+       case 'c': w=1; break;
+       case 'd': w=0; break;
+       default: elog(ERROR,"Unknown weight");
+   }
+
+   out=(tsvector*)palloc(in->len);
+   memcpy(out,in,in->len);
+   entry=ARRPTR(out);
+   i=out->size;    
+   while(i--) {
+       if ( (j=POSDATALEN(out,entry)) != 0 ) {
+           p=POSDATAPTR(out,entry);
+           while(j--) {
+               p->weight=w;
+               p++;
+           }
+       }
+       entry++;
+   }
+       
+   PG_FREE_IF_COPY(in, 0);
+   PG_RETURN_POINTER(out);
+}
+
+static int
+compareEntry(char *ptra, WordEntry* a, char *ptrb, WordEntry* b)
+{
+        if ( a->len == b->len)
+        {
+                return strncmp(
+                                           ptra + a->pos,
+                                           ptrb + b->pos,
+                                           a->len);
+        }
+        return ( a->len > b->len ) ? 1 : -1;
+}
+
+static int4
+add_pos(tsvector *src, WordEntry *srcptr, tsvector *dest, WordEntry *destptr, int4 maxpos ) {
+   uint16 *clen = (uint16*)_POSDATAPTR(dest,destptr);
+   int i;
+   uint16 slen = POSDATALEN(src, srcptr), startlen;
+   WordEntryPos *spos=POSDATAPTR(src, srcptr), *dpos=POSDATAPTR(dest,destptr);
+
+   if ( ! destptr->haspos ) 
+       *clen=0;
+
+   startlen = *clen;
+   for(i=0; i
+       dpos[ *clen ].weight = spos[i].weight; 
+       dpos[ *clen ].pos    = LIMITPOS(spos[i].pos + maxpos);
+       (*clen)++;
+   }
+
+   if ( *clen != startlen )
+       destptr->haspos=1; 
+   return  *clen - startlen;
+}
+
+
+Datum
+concat(PG_FUNCTION_ARGS) {
+   tsvector       *in1 = (tsvector *) PG_DETOAST_DATUM(PG_GETARG_DATUM(0));
+   tsvector       *in2 = (tsvector *) PG_DETOAST_DATUM(PG_GETARG_DATUM(1));
+   tsvector       *out;
+   WordEntry *ptr;
+   WordEntry *ptr1,*ptr2;
+   WordEntryPos *p;
+   int maxpos=0,i,j,i1,i2;
+   char *cur;
+   char *data,*data1,*data2;
+
+   ptr=ARRPTR(in1);
+   i=in1->size;
+   while(i--) {
+       if ( (j=POSDATALEN(in1,ptr)) != 0 ) {
+           p=POSDATAPTR(in1,ptr);
+           while(j--) {
+               if ( p->pos > maxpos ) 
+                   maxpos = p->pos;
+               p++;
+           }
+       }
+       ptr++;
+   }
+   
+   ptr1=ARRPTR(in1); ptr2=ARRPTR(in2);
+   data1=STRPTR(in1); data2=STRPTR(in2);
+   i1=in1->size;   i2=in2->size;
+   out=(tsvector*)palloc( in1->len + in2->len );
+   memset(out,0,in1->len + in2->len);
+   out->len = in1->len + in2->len;
+   out->size = in1->size + in2->size;
+   data=cur=STRPTR(out);
+   ptr=ARRPTR(out);
+   while( i1 && i2 ) {
+       int cmp=compareEntry(data1,ptr1,data2,ptr2);
+       if ( cmp < 0 ) { /* in1 first */
+           ptr->haspos = ptr1->haspos;
+           ptr->len = ptr1->len;
+           memcpy( cur, data1 + ptr1->pos, ptr1->len );
+           ptr->pos = cur - data; 
+           cur+=SHORTALIGN(ptr1->len);
+           if ( ptr->haspos ) {
+               memcpy(cur, _POSDATAPTR(in1, ptr1), POSDATALEN(in1, ptr1)*sizeof(WordEntryPos) + sizeof(uint16));
+               cur+=POSDATALEN(in1, ptr1)*sizeof(WordEntryPos) + sizeof(uint16);
+           }
+           ptr++; ptr1++; i1--;
+       } else if ( cmp>0 ) { /* in2 first */ 
+           ptr->haspos = ptr2->haspos;
+           ptr->len = ptr2->len;
+           memcpy( cur, data2 + ptr2->pos, ptr2->len );
+           ptr->pos = cur - data; 
+           cur+=SHORTALIGN(ptr2->len);
+           if ( ptr->haspos ) {
+               int addlen = add_pos(in2, ptr2, out, ptr, maxpos );
+               if ( addlen == 0 )
+                   ptr->haspos=0;
+               else
+                   cur += addlen*sizeof(WordEntryPos) + sizeof(uint16); 
+           }
+           ptr++; ptr2++; i2--;
+       } else {
+           ptr->haspos = ptr1->haspos | ptr2->haspos;
+           ptr->len = ptr1->len;
+           memcpy( cur, data1 + ptr1->pos, ptr1->len );
+           ptr->pos = cur - data; 
+           cur+=SHORTALIGN(ptr1->len);
+           if ( ptr->haspos ) {
+               if ( ptr1->haspos ) {
+                   memcpy(cur, _POSDATAPTR(in1, ptr1), POSDATALEN(in1, ptr1)*sizeof(WordEntryPos) + sizeof(uint16));
+                   cur+=POSDATALEN(in1, ptr1)*sizeof(WordEntryPos) + sizeof(uint16);
+                   if ( ptr2->haspos )
+                       cur += add_pos(in2, ptr2, out, ptr, maxpos )*sizeof(WordEntryPos);
+               } else if ( ptr2->haspos ) {
+                   int addlen = add_pos(in2, ptr2, out, ptr, maxpos );
+                   if ( addlen == 0 )
+                       ptr->haspos=0;
+                   else
+                       cur += addlen*sizeof(WordEntryPos) + sizeof(uint16); 
+               }
+           }
+           ptr++; ptr1++; ptr2++; i1--; i2--;
+       }
+   }
+
+   while(i1) {
+       ptr->haspos = ptr1->haspos;
+       ptr->len = ptr1->len;
+       memcpy( cur, data1 + ptr1->pos, ptr1->len );
+       ptr->pos = cur - data; 
+       cur+=SHORTALIGN(ptr1->len);
+       if ( ptr->haspos ) {
+           memcpy(cur, _POSDATAPTR(in1, ptr1), POSDATALEN(in1, ptr1)*sizeof(WordEntryPos) + sizeof(uint16));
+           cur+=POSDATALEN(in1, ptr1)*sizeof(WordEntryPos) + sizeof(uint16);
+       }
+       ptr++; ptr1++; i1--;
+   }
+
+   while(i2) {
+       ptr->haspos = ptr2->haspos;
+       ptr->len = ptr2->len;
+       memcpy( cur, data2 + ptr2->pos, ptr2->len );
+       ptr->pos = cur - data; 
+       cur+=SHORTALIGN(ptr2->len);
+       if ( ptr->haspos ) {
+           int addlen = add_pos(in2, ptr2, out, ptr, maxpos );
+           if ( addlen == 0 )
+               ptr->haspos=0;
+           else
+               cur += addlen*sizeof(WordEntryPos) + sizeof(uint16); 
+       }
+       ptr++; ptr2++; i2--;
+   }
+   
+   out->size=ptr-ARRPTR(out);
+   out->len = CALCDATASIZE( out->size, cur-data );
+   if ( data != STRPTR(out) )
+       memmove( STRPTR(out), data, cur-data );
+
+   PG_FREE_IF_COPY(in1, 0);
+   PG_FREE_IF_COPY(in2, 1);
+   PG_RETURN_POINTER(out);
+}
+


diff --git a/contrib/tsearch2/untsearch.sql.in b/contrib/tsearch2/untsearch.sql.in

new file mode 100644 (file)

index 0000000..a4fe145


--- /dev/null
+++ b/contrib/tsearch2/untsearch.sql.in
@@ -0,0 +1,62 @@
+BEGIN;
+
+--Be careful !!!
+--script drops all indices, triggers and columns with types defined
+--in tsearch2.sql
+
+
+DROP OPERATOR CLASS gist_tsvector_ops USING gist CASCADE;
+
+
+DROP OPERATOR || (tsvector, tsvector);
+DROP OPERATOR @@ (tsvector, tsquery);
+DROP OPERATOR @@ (tsquery, tsvector);
+
+DROP AGGREGATE stat(tsvector);
+
+DROP TABLE pg_ts_dict;
+DROP TABLE pg_ts_parser;
+DROP TABLE pg_ts_cfg;
+DROP TABLE pg_ts_cfgmap;
+
+DROP TYPE tokentype CASCADE;
+DROP TYPE tokenout CASCADE;
+DROP TYPE tsvector CASCADE;
+DROP TYPE tsquery CASCADE;
+DROP TYPE gtsvector CASCADE;
+DROP TYPE tsstat CASCADE;
+DROP TYPE statinfo CASCADE;
+
+DROP FUNCTION lexize(oid, text) ;
+DROP FUNCTION lexize(text, text);
+DROP FUNCTION lexize(text);
+DROP FUNCTION set_curdict(int);
+DROP FUNCTION set_curdict(text);
+DROP FUNCTION dex_init(text);
+DROP FUNCTION dex_lexize(internal,internal,int4);
+DROP FUNCTION snb_en_init(text);
+DROP FUNCTION snb_lexize(internal,internal,int4);
+DROP FUNCTION snb_ru_init(text);
+DROP FUNCTION spell_init(text);
+DROP FUNCTION spell_lexize(internal,internal,int4);
+DROP FUNCTION syn_init(text);
+DROP FUNCTION syn_lexize(internal,internal,int4);
+DROP FUNCTION set_curprs(int);
+DROP FUNCTION set_curprs(text);
+DROP FUNCTION prsd_start(internal,int4);
+DROP FUNCTION prsd_getlexeme(internal,internal,internal);
+DROP FUNCTION prsd_end(internal);
+DROP FUNCTION prsd_lextype(internal);
+DROP FUNCTION prsd_headline(internal,internal,internal);
+DROP FUNCTION set_curcfg(int);
+DROP FUNCTION set_curcfg(text);
+DROP FUNCTION show_curcfg();
+DROP FUNCTION gtsvector_compress(internal);
+DROP FUNCTION gtsvector_decompress(internal);
+DROP FUNCTION gtsvector_penalty(internal,internal,internal);
+DROP FUNCTION gtsvector_picksplit(internal, internal);
+DROP FUNCTION gtsvector_union(bytea, internal);
+DROP FUNCTION reset_tsearch();
+DROP FUNCTION tsearch2() CASCADE;
+
+END;


diff --git a/contrib/tsearch2/wordparser/deflex.c b/contrib/tsearch2/wordparser/deflex.c

new file mode 100644 (file)

index 0000000..ea596c5


--- /dev/null
+++ b/contrib/tsearch2/wordparser/deflex.c
@@ -0,0 +1,56 @@
+#include "deflex.h"
+
+const char *lex_descr[]={
+   "",
+   "Latin word",
+   "Non-latin word",
+   "Word",
+   "Email",
+   "URL",
+   "Host",
+   "Scientific notation",
+   "VERSION",
+   "Part of hyphenated word",
+   "Non-latin part of hyphenated word",
+   "Latin part of hyphenated word",
+   "Space symbols",
+   "HTML Tag",
+   "HTTP head",
+   "Hyphenated word",
+   "Latin hyphenated word",
+   "Non-latin hyphenated word",
+   "URI",
+   "File or path name",
+   "Decimal notation",
+   "Signed integer",
+   "Unsigned integer",
+   "HTML Entity"
+};
+
+const char *tok_alias[]={
+   "",
+   "lword",
+   "nlword",
+   "word",
+   "email",
+   "url",
+   "host",
+   "sfloat",
+   "version",
+   "part_hword",
+   "nlpart_hword",
+   "lpart_hword",
+   "blank",
+   "tag",
+   "http",
+   "hword",
+   "lhword",
+   "nlhword",
+   "uri",
+   "file",
+   "float",
+   "int",
+   "uint",
+   "entity"
+};
+


diff --git a/contrib/tsearch2/wordparser/deflex.h b/contrib/tsearch2/wordparser/deflex.h

new file mode 100644 (file)

index 0000000..651d1f9


--- /dev/null
+++ b/contrib/tsearch2/wordparser/deflex.h
@@ -0,0 +1,34 @@
+#ifndef __DEFLEX_H__
+#define __DEFLEX_H__
+
+/* rememder !!!! */
+#define LASTNUM        23
+
+#define LATWORD        1
+#define CYRWORD        2
+#define UWORD      3
+#define EMAIL      4
+#define FURL       5
+#define HOST       6
+#define SCIENTIFIC 7
+#define VERSIONNUMBER  8
+#define PARTHYPHENWORD 9
+#define CYRPARTHYPHENWORD  10
+#define LATPARTHYPHENWORD  11
+#define SPACE      12
+#define TAG            13
+#define HTTP       14
+#define HYPHENWORD 15
+#define LATHYPHENWORD  16
+#define CYRHYPHENWORD  17
+#define URI        18
+#define FILEPATH   19
+#define DECIMAL        20
+#define SIGNEDINT  21
+#define UNSIGNEDINT 22
+#define HTMLENTITY 23
+
+extern const char *lex_descr[];
+extern const char *tok_alias[];
+
+#endif


diff --git a/contrib/tsearch2/wordparser/parser.h b/contrib/tsearch2/wordparser/parser.h

new file mode 100644 (file)

index 0000000..55cf005


--- /dev/null
+++ b/contrib/tsearch2/wordparser/parser.h
@@ -0,0 +1,11 @@
+#ifndef __PARSER_H__
+#define __PARSER_H__
+
+char      *token;
+int            tokenlen;
+int            tsearch2_yylex(void);
+void       start_parse_str(char *, int);
+void       start_parse_fh(FILE *, int);
+void       end_parse(void);
+
+#endif


diff --git a/contrib/tsearch2/wordparser/parser.l b/contrib/tsearch2/wordparser/parser.l

new file mode 100644 (file)

index 0000000..49824f5


--- /dev/null
+++ b/contrib/tsearch2/wordparser/parser.l
@@ -0,0 +1,346 @@
+%{
+#include "postgres.h"
+
+#include "deflex.h"
+#include "parser.h"
+#include "common.h"
+
+/* Avoid exit() on fatal scanner errors */
+#define fprintf(file, fmt, msg)  ts_error(ERROR, fmt, msg)
+
+/* postgres allocation function */
+#define free    pfree
+#define malloc  palloc
+#define realloc repalloc
+
+#ifdef strdup
+#undef strdup
+#endif
+#define strdup  pstrdup
+
+char *token = NULL;  /* pointer to token */
+char *s     = NULL;  /* to return WHOLE hyphenated-word */
+
+YY_BUFFER_STATE buf = NULL; /* buffer to parse; it need for parse from string */
+
+int lrlimit = -1;  /* for limiting read from filehandle ( -1 - unlimited read ) */
+int bytestoread = 0;   /* for limiting read from filehandle */
+
+/* redefine macro for read limited length */
+#define YY_INPUT(buf,result,max_size) \
+   if ( yy_current_buffer->yy_is_interactive ) { \
+                int c = '*', n; \
+                for ( n = 0; n < max_size && \
+                             (c = getc( tsearch2_yyin )) != EOF && c != '\n'; ++n ) \
+                        buf[n] = (char) c; \
+                if ( c == '\n' ) \
+                        buf[n++] = (char) c; \
+                if ( c == EOF && ferror( tsearch2_yyin ) ) \
+                        YY_FATAL_ERROR( "input in flex scanner failed" ); \
+                result = n; \
+        }  else { \
+       if ( lrlimit == 0 ) \
+           result=YY_NULL; \
+       else { \
+           if ( lrlimit>0 ) { \
+               bytestoread = ( lrlimit > max_size ) ? max_size : lrlimit; \
+               lrlimit -= bytestoread; \
+           } else \
+               bytestoread = max_size; \
+               if ( ((result = fread( buf, 1, bytestoread, tsearch2_yyin )) == 0) \
+                       && ferror( tsearch2_yyin ) ) \
+                       YY_FATAL_ERROR( "input in flex scanner failed" ); \
+       } \
+   }
+
+%}
+
+%option 8bit
+%option never-interactive
+%option nounput
+%option noyywrap
+
+/* parser's state for parsing hyphenated-word */
+%x DELIM  
+/* parser's state for parsing URL*/
+%x URL  
+%x SERVER  
+
+/* parser's state for parsing TAGS */
+%x INTAG
+%x QINTAG
+%x INCOMMENT
+%x INSCRIPT
+
+/* cyrillic koi8 char */
+CYRALNUM   [0-9\200-\377]
+CYRALPHA   [\200-\377]
+ALPHA      [a-zA-Z\200-\377]
+ALNUM      [0-9a-zA-Z\200-\377]
+
+
+HOSTNAME   ([-_[:alnum:]]+\.)+[[:alpha:]]+
+URI        [-_[:alnum:]/%,\.;=&?#]+
+
+%%
+
+"<"[Ss][Cc][Rr][Ii][Pp][Tt] { BEGIN INSCRIPT; }
+
+"" {
+   BEGIN INITIAL; 
+   *tsearch2_yytext=' '; *(tsearch2_yytext+1) = '\0'; 
+   token = tsearch2_yytext;
+   tokenlen = tsearch2_yyleng;
+   return SPACE;
+}
+
+""   { 
+   BEGIN INITIAL;
+   *tsearch2_yytext=' '; *(tsearch2_yytext+1) = '\0'; 
+   token = tsearch2_yytext;
+   tokenlen = tsearch2_yyleng;
+   return SPACE;
+}
+
+
+"<"[\![:alpha:]]   { BEGIN INTAG; }
+
+"
+
+"\""    { BEGIN QINTAG; }
+
+"\\\"" ;
+
+"\""   { BEGIN INTAG; }
+
+">" { 
+   BEGIN INITIAL;
+   token = tsearch2_yytext;
+   *tsearch2_yytext=' '; 
+   token = tsearch2_yytext;
+   tokenlen = 1;
+   return TAG;
+}
+
+.|\n  ;
+
+\&(quot|amp|nbsp|lt|gt)\;   {
+   token = tsearch2_yytext;
+   tokenlen = tsearch2_yyleng;
+   return HTMLENTITY;
+}
+
+\&\#[0-9][0-9]?[0-9]?\; {
+   token = tsearch2_yytext;
+   tokenlen = tsearch2_yyleng;
+   return HTMLENTITY;
+}
+ 
+[-_\.[:alnum:]]+@{HOSTNAME}  /* Emails */ { 
+   token = tsearch2_yytext; 
+   tokenlen = tsearch2_yyleng;
+   return EMAIL; 
+}
+
+[+-]?[0-9]+(\.[0-9]+)?[eEdD][+-]?[0-9]+  /* float */   { 
+   token = tsearch2_yytext; 
+   tokenlen = tsearch2_yyleng;
+   return SCIENTIFIC; 
+}
+
+[0-9]+\.[0-9]+\.[0-9\.]*[0-9] {
+   token = tsearch2_yytext;
+   tokenlen = tsearch2_yyleng;
+   return VERSIONNUMBER;
+}
+
+[+-]?[0-9]+\.[0-9]+ {
+   token = tsearch2_yytext;
+   tokenlen = tsearch2_yyleng;
+   return DECIMAL;
+}
+
+[+-][0-9]+ { 
+   token = tsearch2_yytext; 
+   tokenlen = tsearch2_yyleng;
+   return SIGNEDINT; 
+}
+
+[0-9]+ { 
+   token = tsearch2_yytext; 
+   tokenlen = tsearch2_yyleng;
+   return UNSIGNEDINT; 
+}
+
+http"://"        { 
+   BEGIN URL; 
+   token = tsearch2_yytext;
+   tokenlen = tsearch2_yyleng;
+   return HTTP;
+}
+
+ftp"://"        { 
+   BEGIN URL; 
+   token = tsearch2_yytext;
+   tokenlen = tsearch2_yyleng;
+   return HTTP;
+}
+
+{HOSTNAME}[/:]{URI} { 
+   BEGIN SERVER;
+   if (s) { free(s); s=NULL; } 
+   s = strdup( tsearch2_yytext ); 
+   tokenlen = tsearch2_yyleng;
+   yyless( 0 ); 
+   token = s;
+   return FURL;
+}
+
+{HOSTNAME} {
+   token = tsearch2_yytext; 
+   tokenlen = tsearch2_yyleng;
+   return HOST;
+}
+
+[/:]{URI}  {
+   token = tsearch2_yytext;
+   tokenlen = tsearch2_yyleng;
+   return URI;
+}
+
+[[:alnum:]\./_-]+"/"[[:alnum:]\./_-]+ {
+   token = tsearch2_yytext;
+   tokenlen = tsearch2_yyleng;
+   return FILEPATH;
+}
+
+({CYRALPHA}+-)+{CYRALPHA}+ /* composite-word */    {
+   BEGIN DELIM;
+   if (s) { free(s); s=NULL; } 
+   s = strdup( tsearch2_yytext );
+   tokenlen = tsearch2_yyleng;
+   yyless( 0 );
+   token = s;
+   return CYRHYPHENWORD;
+}
+
+([[:alpha:]]+-)+[[:alpha:]]+ /* composite-word */  {
+    BEGIN DELIM;
+   if (s) { free(s); s=NULL; } 
+   s = strdup( tsearch2_yytext );
+   tokenlen = tsearch2_yyleng;
+   yyless( 0 );
+   token = s;
+   return LATHYPHENWORD;
+}
+
+({ALNUM}+-)+{ALNUM}+ /* composite-word */  {
+   BEGIN DELIM;
+   if (s) { free(s); s=NULL; } 
+   s = strdup( tsearch2_yytext );
+   tokenlen = tsearch2_yyleng;
+   yyless( 0 );
+   token = s;
+   return HYPHENWORD;
+}
+
+[0-9]+\.[0-9]+\.[0-9\.]*[0-9] {
+   token = tsearch2_yytext;
+   tokenlen = tsearch2_yyleng;
+   return VERSIONNUMBER;
+}
+
+\+?[0-9]+\.[0-9]+ {
+   token = tsearch2_yytext;
+   tokenlen = tsearch2_yyleng;
+   return DECIMAL;
+}
+
+{CYRALPHA}+  /* one word in composite-word */   { 
+   token = tsearch2_yytext; 
+   tokenlen = tsearch2_yyleng;
+   return CYRPARTHYPHENWORD; 
+}
+
+[[:alpha:]]+  /* one word in composite-word */  { 
+   token = tsearch2_yytext; 
+   tokenlen = tsearch2_yyleng;
+   return LATPARTHYPHENWORD; 
+}
+
+{ALNUM}+  /* one word in composite-word */  { 
+   token = tsearch2_yytext; 
+   tokenlen = tsearch2_yyleng;
+   return PARTHYPHENWORD; 
+}
+
+-  { 
+   token = tsearch2_yytext;
+   tokenlen = tsearch2_yyleng;
+   return SPACE;
+}
+
+.|\n /* return in basic state */ {
+   BEGIN INITIAL;
+   yyless( 0 );
+}
+
+{CYRALPHA}+ /* normal word */  { 
+   token = tsearch2_yytext; 
+   tokenlen = tsearch2_yyleng;
+   return CYRWORD; 
+}
+
+[[:alpha:]]+ /* normal word */ { 
+   token = tsearch2_yytext; 
+   tokenlen = tsearch2_yyleng;
+   return LATWORD; 
+}
+
+{ALNUM}+ /* normal word */ { 
+   token = tsearch2_yytext; 
+   tokenlen = tsearch2_yyleng;
+   return UWORD; 
+}
+
+[ \r\n\t]+ {
+   token = tsearch2_yytext;
+   tokenlen = tsearch2_yyleng;
+   return SPACE;
+}
+
+. {
+   token = tsearch2_yytext;
+   tokenlen = tsearch2_yyleng;
+   return SPACE;
+} 
+
+%%
+
+/* clearing after parsing from string */
+void end_parse() {
+   if (s) { free(s); s=NULL; } 
+   tsearch2_yy_delete_buffer( buf );
+   buf = NULL;
+} 
+
+/* start parse from string */
+void start_parse_str(char* str, int limit) {
+   if (buf) end_parse();
+   buf = tsearch2_yy_scan_bytes( str, limit );
+   tsearch2_yy_switch_to_buffer( buf );
+   BEGIN INITIAL;
+}
+
+/* start parse from filehandle */
+void start_parse_fh( FILE* fh, int limit ) {
+   if (buf) end_parse();
+   lrlimit = ( limit ) ? limit : -1;
+   buf = tsearch2_yy_create_buffer( fh, YY_BUF_SIZE );
+   tsearch2_yy_switch_to_buffer( buf );
+   BEGIN INITIAL;
+}
+
+


diff --git a/contrib/tsearch2/wparser.c b/contrib/tsearch2/wparser.c

new file mode 100644 (file)

index 0000000..deff94c


--- /dev/null
+++ b/contrib/tsearch2/wparser.c
@@ -0,0 +1,529 @@
+/* 
+ * interface functions to parser 
+ * Teodor Sigaev 
+ */
+#include 
+#include 
+#include 
+#include 
+
+#include "postgres.h"
+#include "fmgr.h"
+#include "utils/array.h"
+#include "catalog/pg_type.h"
+#include "executor/spi.h"
+#include "funcapi.h"
+
+#include "wparser.h"
+#include "ts_cfg.h"
+#include "snmap.h"
+#include "common.h"
+
+/*********top interface**********/
+
+static void *plan_getparser=NULL;
+static Oid current_parser_id=InvalidOid;
+
+void
+init_prs(Oid id, WParserInfo *prs) {
+   Oid arg[1]={ OIDOID };
+   bool isnull;
+   Datum pars[1]={ ObjectIdGetDatum(id) };
+   int stat;
+
+   memset(prs,0,sizeof(WParserInfo));
+   SPI_connect();
+   if ( !plan_getparser ) {
+       plan_getparser = SPI_saveplan( SPI_prepare( "select prs_start, prs_nexttoken, prs_end, prs_lextype, prs_headline from pg_ts_parser where oid = $1" , 1, arg ) );
+       if ( !plan_getparser ) 
+           ts_error(ERROR, "SPI_prepare() failed");
+   }
+
+   stat = SPI_execp(plan_getparser, pars, " ", 1);
+   if ( stat < 0 )
+       ts_error (ERROR, "SPI_execp return %d", stat);
+   if ( SPI_processed > 0 ) {
+       Oid oid=InvalidOid;
+       oid=DatumGetObjectId( SPI_getbinval(SPI_tuptable->vals[0], SPI_tuptable->tupdesc, 1, &isnull) );
+       fmgr_info_cxt(oid, &(prs->start_info), TopMemoryContext);
+       oid=DatumGetObjectId( SPI_getbinval(SPI_tuptable->vals[0], SPI_tuptable->tupdesc, 2, &isnull) );
+       fmgr_info_cxt(oid, &(prs->getlexeme_info), TopMemoryContext);
+       oid=DatumGetObjectId( SPI_getbinval(SPI_tuptable->vals[0], SPI_tuptable->tupdesc, 3, &isnull) );
+       fmgr_info_cxt(oid, &(prs->end_info), TopMemoryContext);
+       prs->lextype=DatumGetObjectId( SPI_getbinval(SPI_tuptable->vals[0], SPI_tuptable->tupdesc, 4, &isnull) );
+       oid=DatumGetObjectId( SPI_getbinval(SPI_tuptable->vals[0], SPI_tuptable->tupdesc, 5, &isnull) );
+       fmgr_info_cxt(oid, &(prs->headline_info), TopMemoryContext);
+       prs->prs_id=id;
+   } else 
+       ts_error(ERROR, "No parser with id %d", id);
+   SPI_finish();
+}
+
+typedef struct {
+   WParserInfo *last_prs;
+   int     len;
+   int     reallen;
+   WParserInfo *list;
+   SNMap       name2id_map;
+} PrsList;
+
+static PrsList PList = {NULL,0,0,NULL,{0,0,NULL}};
+
+void    
+reset_prs(void) {
+   freeSNMap( &(PList.name2id_map) );
+   if ( PList.list )
+       free(PList.list);
+   memset(&PList,0,sizeof(PrsList));
+}
+
+static int
+compareprs(const void *a, const void *b) {
+   return ((WParserInfo*)a)->prs_id - ((WParserInfo*)b)->prs_id;
+}
+
+WParserInfo *
+findprs(Oid id) {
+   /* last used prs */
+   if ( PList.last_prs && PList.last_prs->prs_id==id )
+       return PList.last_prs;
+
+   /* already used prs */
+   if ( PList.len != 0 ) {
+       WParserInfo key;
+       key.prs_id=id;
+       PList.last_prs = bsearch(&key, PList.list, PList.len, sizeof(WParserInfo), compareprs);
+       if ( PList.last_prs != NULL )
+           return PList.last_prs;
+   }
+
+   /* last chance */
+   if ( PList.len==PList.reallen ) {
+       WParserInfo *tmp;
+       int reallen = ( PList.reallen ) ? 2*PList.reallen : 16;
+       tmp=(WParserInfo*)realloc(PList.list,sizeof(WParserInfo)*reallen);
+       if ( !tmp ) 
+           ts_error(ERROR,"No memory");
+       PList.reallen=reallen;
+       PList.list=tmp;
+   }
+   PList.last_prs=&(PList.list[PList.len]);
+   init_prs(id, PList.last_prs);
+   PList.len++;
+   qsort(PList.list, PList.len, sizeof(WParserInfo), compareprs);
+   return findprs(id); /* qsort changed order!! */;
+}
+
+static void *plan_name2id=NULL;
+
+Oid
+name2id_prs(text *name) {
+   Oid arg[1]={ TEXTOID };
+   bool isnull;
+   Datum pars[1]={ PointerGetDatum(name) };
+   int stat;
+   Oid id=findSNMap_t( &(PList.name2id_map), name );
+   
+   if ( id ) 
+       return id;
+   
+
+   SPI_connect();
+   if ( !plan_name2id ) {
+       plan_name2id = SPI_saveplan( SPI_prepare( "select oid from pg_ts_parser where prs_name = $1" , 1, arg ) );
+       if ( !plan_name2id ) 
+           ts_error(ERROR, "SPI_prepare() failed");
+   }
+
+   stat = SPI_execp(plan_name2id, pars, " ", 1);
+   if ( stat < 0 )
+       ts_error (ERROR, "SPI_execp return %d", stat);
+   if ( SPI_processed > 0 )
+       id=DatumGetObjectId( SPI_getbinval(SPI_tuptable->vals[0], SPI_tuptable->tupdesc, 1, &isnull) );
+   else 
+       ts_error(ERROR, "No parser '%s'", text2char(name));
+   SPI_finish();
+   addSNMap_t( &(PList.name2id_map), name, id );
+   return id;
+}
+
+
+/******sql-level interface******/
+typedef struct {
+   int     cur;
+   LexDescr    *list;
+} TypeStorage;
+
+static void
+setup_firstcall(FuncCallContext  *funcctx, Oid prsid) {
+   TupleDesc            tupdesc;
+   MemoryContext     oldcontext;
+   TypeStorage     *st;
+   WParserInfo *prs = findprs(prsid); 
+
+   oldcontext = MemoryContextSwitchTo(funcctx->multi_call_memory_ctx);
+
+   st=(TypeStorage*)palloc( sizeof(TypeStorage) );
+   st->cur=0;
+   st->list = (LexDescr*)DatumGetPointer(
+       OidFunctionCall1( prs->lextype, PointerGetDatum(prs->prs) )
+   );
+   funcctx->user_fctx = (void*)st;
+   tupdesc = RelationNameGetTupleDesc("tokentype");
+   funcctx->slot = TupleDescGetSlot(tupdesc);
+   funcctx->attinmeta = TupleDescGetAttInMetadata(tupdesc);
+   MemoryContextSwitchTo(oldcontext);
+}
+
+static Datum
+process_call(FuncCallContext  *funcctx) {
+   TypeStorage     *st;
+
+   st=(TypeStorage*)funcctx->user_fctx;
+   if (  st->list && st->list[st->cur].lexid ) {
+       Datum result;
+       char* values[3];
+       char    txtid[16];
+       HeapTuple    tuple;
+
+       values[0]=txtid;
+       sprintf(txtid,"%d",st->list[st->cur].lexid);
+       values[1]=st->list[st->cur].alias;
+       values[2]=st->list[st->cur].descr;
+
+       tuple = BuildTupleFromCStrings(funcctx->attinmeta, values);
+       result = TupleGetDatum(funcctx->slot, tuple);
+
+       pfree(values[1]);
+       pfree(values[2]);
+       st->cur++;
+       return result;
+   } else {
+       if ( st->list ) pfree(st->list);
+       pfree(st);
+   }
+   return (Datum)0;
+}
+
+PG_FUNCTION_INFO_V1(token_type);
+Datum token_type(PG_FUNCTION_ARGS);
+
+Datum
+token_type(PG_FUNCTION_ARGS) {
+   FuncCallContext  *funcctx;
+   Datum result;
+
+   if (SRF_IS_FIRSTCALL()) { 
+       funcctx = SRF_FIRSTCALL_INIT();
+       setup_firstcall(funcctx, PG_GETARG_OID(0) );
+   }
+
+   funcctx = SRF_PERCALL_SETUP();
+
+   if (  (result=process_call(funcctx)) != (Datum)0 )
+       SRF_RETURN_NEXT(funcctx, result);
+   SRF_RETURN_DONE(funcctx);
+}
+
+PG_FUNCTION_INFO_V1(token_type_byname);
+Datum token_type_byname(PG_FUNCTION_ARGS);
+Datum
+token_type_byname(PG_FUNCTION_ARGS) {
+   FuncCallContext  *funcctx;
+   Datum result;
+
+   if (SRF_IS_FIRSTCALL()) {
+       text *name = PG_GETARG_TEXT_P(0); 
+       funcctx = SRF_FIRSTCALL_INIT();
+       setup_firstcall(funcctx, name2id_prs( name ) );
+       PG_FREE_IF_COPY(name,0);
+   }
+
+   funcctx = SRF_PERCALL_SETUP();
+
+   if (  (result=process_call(funcctx)) != (Datum)0 )
+       SRF_RETURN_NEXT(funcctx, result);
+   SRF_RETURN_DONE(funcctx);
+}
+
+PG_FUNCTION_INFO_V1(token_type_current);
+Datum token_type_current(PG_FUNCTION_ARGS);
+Datum
+token_type_current(PG_FUNCTION_ARGS) {
+   FuncCallContext  *funcctx;
+   Datum result;
+
+   if (SRF_IS_FIRSTCALL()) {
+       funcctx = SRF_FIRSTCALL_INIT();
+       if ( current_parser_id==InvalidOid ) 
+           current_parser_id = name2id_prs( char2text("default") );
+       setup_firstcall(funcctx, current_parser_id );
+   }
+
+   funcctx = SRF_PERCALL_SETUP();
+
+   if (  (result=process_call(funcctx)) != (Datum)0 )
+       SRF_RETURN_NEXT(funcctx, result);
+   SRF_RETURN_DONE(funcctx);
+}
+
+
+PG_FUNCTION_INFO_V1(set_curprs);
+Datum set_curprs(PG_FUNCTION_ARGS);
+Datum
+set_curprs(PG_FUNCTION_ARGS) {
+        findprs(PG_GETARG_OID(0));
+        current_parser_id=PG_GETARG_OID(0);
+        PG_RETURN_VOID();
+}
+
+PG_FUNCTION_INFO_V1(set_curprs_byname);
+Datum set_curprs_byname(PG_FUNCTION_ARGS);
+Datum
+set_curprs_byname(PG_FUNCTION_ARGS) {
+        text *name=PG_GETARG_TEXT_P(0);
+    
+        DirectFunctionCall1(
+                set_curprs,
+                ObjectIdGetDatum( name2id_prs(name) )
+        );
+        PG_FREE_IF_COPY(name, 0);
+        PG_RETURN_VOID();
+}
+
+typedef struct {
+   int type;
+   char    *lexem;
+} LexemEntry;
+
+typedef struct {
+   int cur;
+   int len;
+   LexemEntry  *list;
+} PrsStorage;
+   
+
+static void
+prs_setup_firstcall(FuncCallContext  *funcctx, int prsid, text *txt) {
+   TupleDesc            tupdesc;
+   MemoryContext     oldcontext;
+   PrsStorage  *st;
+   WParserInfo *prs = findprs(prsid); 
+   char    *lex=NULL;
+   int     llen=0, type=0; 
+
+   oldcontext = MemoryContextSwitchTo(funcctx->multi_call_memory_ctx);
+
+   st=(PrsStorage*)palloc( sizeof(PrsStorage) );
+   st->cur=0;
+   st->len=16;
+   st->list=(LexemEntry*)palloc( sizeof(LexemEntry)*st->len );
+
+   prs->prs = (void*)DatumGetPointer(
+       FunctionCall2(
+           &(prs->start_info),
+           PointerGetDatum(VARDATA(txt)),
+           Int32GetDatum(VARSIZE(txt)-VARHDRSZ)
+       )
+   );
+
+   while( ( type=DatumGetInt32(FunctionCall3(
+           &(prs->getlexeme_info),
+           PointerGetDatum(prs->prs),
+           PointerGetDatum(&lex),
+           PointerGetDatum(&llen))) ) != 0 ) {
+
+       if ( st->cur>=st->len ) {
+           st->len=2*st->len;
+           st->list=(LexemEntry*)repalloc(st->list, sizeof(LexemEntry)*st->len);
+       }
+       st->list[st->cur].lexem = palloc(llen+1);
+       memcpy( st->list[st->cur].lexem, lex, llen);
+       st->list[st->cur].lexem[llen]='\0';
+       st->list[st->cur].type=type;
+       st->cur++;
+   }
+       
+   FunctionCall1(
+       &(prs->end_info),
+       PointerGetDatum(prs->prs)
+   );
+
+   st->len=st->cur;
+   st->cur=0;
+   
+   funcctx->user_fctx = (void*)st;
+   tupdesc = RelationNameGetTupleDesc("tokenout");
+   funcctx->slot = TupleDescGetSlot(tupdesc);
+   funcctx->attinmeta = TupleDescGetAttInMetadata(tupdesc);
+   MemoryContextSwitchTo(oldcontext);
+}
+
+static Datum
+prs_process_call(FuncCallContext  *funcctx) {
+   PrsStorage  *st;
+
+   st=(PrsStorage*)funcctx->user_fctx;
+   if (  st->cur < st->len ) {
+       Datum result;
+       char* values[2];
+       char    tid[16];
+       HeapTuple    tuple;
+
+       values[0]=tid;
+       sprintf(tid,"%d",st->list[st->cur].type);
+       values[1]=st->list[st->cur].lexem;
+       tuple = BuildTupleFromCStrings(funcctx->attinmeta, values);
+       result = TupleGetDatum(funcctx->slot, tuple);
+
+       pfree(values[1]);
+       st->cur++;
+       return result;
+   } else {
+       if ( st->list ) pfree(st->list);
+       pfree(st);
+   }
+   return (Datum)0;
+}
+
+           
+
+PG_FUNCTION_INFO_V1(parse);
+Datum parse(PG_FUNCTION_ARGS);
+Datum
+parse(PG_FUNCTION_ARGS) {
+   FuncCallContext  *funcctx;
+   Datum result;
+
+   if (SRF_IS_FIRSTCALL()) {
+       text *txt = PG_GETARG_TEXT_P(1); 
+       funcctx = SRF_FIRSTCALL_INIT();
+       prs_setup_firstcall(funcctx, PG_GETARG_OID(0),txt );
+       PG_FREE_IF_COPY(txt,1);
+   }
+
+   funcctx = SRF_PERCALL_SETUP();
+
+   if (  (result=prs_process_call(funcctx)) != (Datum)0 )
+       SRF_RETURN_NEXT(funcctx, result);
+   SRF_RETURN_DONE(funcctx);
+}
+
+PG_FUNCTION_INFO_V1(parse_byname);
+Datum parse_byname(PG_FUNCTION_ARGS);
+Datum
+parse_byname(PG_FUNCTION_ARGS) {
+   FuncCallContext  *funcctx;
+   Datum result;
+
+   if (SRF_IS_FIRSTCALL()) {
+       text *name = PG_GETARG_TEXT_P(0); 
+       text *txt = PG_GETARG_TEXT_P(1); 
+       funcctx = SRF_FIRSTCALL_INIT();
+       prs_setup_firstcall(funcctx, name2id_prs( name ),txt );
+       PG_FREE_IF_COPY(name,0);
+       PG_FREE_IF_COPY(txt,1);
+   }
+
+   funcctx = SRF_PERCALL_SETUP();
+
+   if (  (result=prs_process_call(funcctx)) != (Datum)0 )
+       SRF_RETURN_NEXT(funcctx, result);
+   SRF_RETURN_DONE(funcctx);
+}
+
+
+PG_FUNCTION_INFO_V1(parse_current);
+Datum parse_current(PG_FUNCTION_ARGS);
+Datum
+parse_current(PG_FUNCTION_ARGS) {
+   FuncCallContext  *funcctx;
+   Datum result;
+
+   if (SRF_IS_FIRSTCALL()) {
+       text *txt = PG_GETARG_TEXT_P(0); 
+       funcctx = SRF_FIRSTCALL_INIT();
+       if ( current_parser_id==InvalidOid ) 
+           current_parser_id = name2id_prs( char2text("default") );
+       prs_setup_firstcall(funcctx, current_parser_id,txt );
+       PG_FREE_IF_COPY(txt,0);
+   }
+
+   funcctx = SRF_PERCALL_SETUP();
+
+   if (  (result=prs_process_call(funcctx)) != (Datum)0 )
+       SRF_RETURN_NEXT(funcctx, result);
+   SRF_RETURN_DONE(funcctx);
+}
+
+PG_FUNCTION_INFO_V1(headline);
+Datum headline(PG_FUNCTION_ARGS);
+Datum
+headline(PG_FUNCTION_ARGS) {
+   TSCfgInfo *cfg=findcfg(PG_GETARG_OID(0));
+   text       *in = PG_GETARG_TEXT_P(1);
+   QUERYTYPE  *query = (QUERYTYPE *) DatumGetPointer(PG_DETOAST_DATUM(PG_GETARG_DATUM(2)));
+   text       *opt=( PG_NARGS()>3 && PG_GETARG_POINTER(3) ) ? PG_GETARG_TEXT_P(3) : NULL;
+   HLPRSTEXT   prs;
+   text *out;
+   WParserInfo *prsobj = findprs(cfg->prs_id);
+
+   memset(&prs,0,sizeof(HLPRSTEXT));
+   prs.lenwords = 32;
+   prs.words = (HLWORD *) palloc(sizeof(HLWORD) * prs.lenwords);
+   hlparsetext(cfg, &prs, query, VARDATA(in), VARSIZE(in) - VARHDRSZ);
+
+
+   FunctionCall3(
+       &(prsobj->headline_info),
+       PointerGetDatum(&prs),
+       PointerGetDatum(opt),
+       PointerGetDatum(query)
+   );
+
+   out = genhl(&prs);
+
+   PG_FREE_IF_COPY(in,1);
+   PG_FREE_IF_COPY(query,2);
+   if ( opt ) PG_FREE_IF_COPY(opt,3);
+   pfree(prs.words);
+   pfree(prs.startsel);
+   pfree(prs.stopsel);
+
+   PG_RETURN_POINTER(out);
+}
+
+
+PG_FUNCTION_INFO_V1(headline_byname);
+Datum headline_byname(PG_FUNCTION_ARGS);
+Datum
+headline_byname(PG_FUNCTION_ARGS) {
+   text *cfg=PG_GETARG_TEXT_P(0);
+
+   Datum out=DirectFunctionCall4(
+       headline,
+       ObjectIdGetDatum(name2id_cfg( cfg ) ),
+       PG_GETARG_DATUM(1),
+       PG_GETARG_DATUM(2),
+       ( PG_NARGS()>3 ) ? PG_GETARG_DATUM(3) : PointerGetDatum(NULL)
+   );
+
+   PG_FREE_IF_COPY(cfg,0);
+   PG_RETURN_DATUM(out);   
+}
+
+PG_FUNCTION_INFO_V1(headline_current);
+Datum headline_current(PG_FUNCTION_ARGS);
+Datum
+headline_current(PG_FUNCTION_ARGS) {
+   PG_RETURN_DATUM(DirectFunctionCall4(
+       headline,
+       ObjectIdGetDatum(get_currcfg()),
+       PG_GETARG_DATUM(0),
+       PG_GETARG_DATUM(1),
+       ( PG_NARGS()>2 ) ? PG_GETARG_DATUM(2) : PointerGetDatum(NULL)
+   ));
+}
+
+
+


diff --git a/contrib/tsearch2/wparser.h b/contrib/tsearch2/wparser.h

new file mode 100644 (file)

index 0000000..a8afc56


--- /dev/null
+++ b/contrib/tsearch2/wparser.h
@@ -0,0 +1,28 @@
+#ifndef __WPARSER_H__
+#define __WPARSER_H__
+#include "postgres.h"
+#include "fmgr.h"
+
+typedef struct {
+   Oid prs_id;
+   FmgrInfo start_info;
+   FmgrInfo getlexeme_info;
+   FmgrInfo end_info;
+   FmgrInfo headline_info;
+   Oid lextype;
+   void *prs;
+} WParserInfo;
+
+void init_prs(Oid id, WParserInfo *prs);
+WParserInfo* findprs(Oid id);
+Oid name2id_prs(text *name);
+void   reset_prs(void);
+
+
+typedef struct {
+   int lexid;
+   char    *alias;
+   char    *descr;
+} LexDescr;
+
+#endif


diff --git a/contrib/tsearch2/wparser_def.c b/contrib/tsearch2/wparser_def.c

new file mode 100644 (file)

index 0000000..eec8b03


--- /dev/null
+++ b/contrib/tsearch2/wparser_def.c
@@ -0,0 +1,291 @@
+/* 
+ * default word parser 
+ * Teodor Sigaev 
+ */
+#include 
+#include 
+#include 
+
+#include "postgres.h"
+#include "utils/builtins.h"
+
+#include "dict.h"
+#include "wparser.h"
+#include "common.h"
+#include "ts_cfg.h"
+#include "wordparser/parser.h"
+#include "wordparser/deflex.h"
+
+PG_FUNCTION_INFO_V1(prsd_lextype);
+Datum prsd_lextype(PG_FUNCTION_ARGS);
+
+Datum 
+prsd_lextype(PG_FUNCTION_ARGS) {
+   LexDescr *descr=(LexDescr*)palloc(sizeof(LexDescr)*(LASTNUM+1));
+   int i;
+
+   for(i=1;i<=LASTNUM;i++) {
+       descr[i-1].lexid = i;
+       descr[i-1].alias = pstrdup(tok_alias[i]);
+       descr[i-1].descr = pstrdup(lex_descr[i]);
+   }
+   
+   descr[LASTNUM].lexid=0;
+       
+   PG_RETURN_POINTER(descr);
+}
+
+PG_FUNCTION_INFO_V1(prsd_start);
+Datum prsd_start(PG_FUNCTION_ARGS);
+Datum 
+prsd_start(PG_FUNCTION_ARGS) {
+   start_parse_str( (char*)PG_GETARG_POINTER(0), PG_GETARG_INT32(1) );
+   PG_RETURN_POINTER(NULL);
+}
+
+PG_FUNCTION_INFO_V1(prsd_getlexeme);
+Datum prsd_getlexeme(PG_FUNCTION_ARGS);
+Datum 
+prsd_getlexeme(PG_FUNCTION_ARGS) {
+   /* ParserState *p=(ParserState*)PG_GETARG_POINTER(0); */
+   char **t=(char**)PG_GETARG_POINTER(1); 
+   int *tlen=(int*)PG_GETARG_POINTER(2);
+   int  type=tsearch2_yylex();
+
+   *t = token;
+   *tlen = tokenlen;
+   PG_RETURN_INT32(type);
+}
+
+PG_FUNCTION_INFO_V1(prsd_end);
+Datum prsd_end(PG_FUNCTION_ARGS);
+Datum 
+prsd_end(PG_FUNCTION_ARGS) {
+   /* ParserState *p=(ParserState*)PG_GETARG_POINTER(0); */
+   end_parse();
+   PG_RETURN_VOID();
+}
+
+#define LEAVETOKEN(x)  ( (x)==12 )
+#define COMPLEXTOKEN(x)    ( (x)==5 || (x)==15 || (x)==16 || (x)==17 )
+#define ENDPUNCTOKEN(x)    ( (x)==12 )
+
+
+#define IDIGNORE(x) ( (x)==13 || (x)==14 || (x)==12 || (x)==23 )
+#define HLIDIGNORE(x) ( (x)==5 || (x)==13 || (x)==15 || (x)==16 || (x)==17 )
+#define NONWORDTOKEN(x)    ( (x)==12 || HLIDIGNORE(x) )
+#define NOENDTOKEN(x)  ( NONWORDTOKEN(x) || (x)==7 || (x)==8 || (x)==20 || (x)==21 || (x)==22 || IDIGNORE(x) )
+
+typedef struct {
+   HLWORD  *words;
+   int len;
+} hlCheck;
+
+static bool
+checkcondition_HL(void *checkval, ITEM *val) {
+   int i;
+   for(i=0;i<((hlCheck*)checkval)->len;i++) {
+       if ( ((hlCheck*)checkval)->words[i].item==val )
+           return true;
+   }
+   return false;
+}
+
+
+static bool
+hlCover(HLPRSTEXT *prs, QUERYTYPE *query, int *p, int *q) {
+   int i,j;
+   ITEM    *item=GETQUERY(query);
+   int pos=*p;
+   *q=0;
+   *p=0x7fffffff;
+
+   for(j=0;jsize;j++) {
+       if ( item->type != VAL ) {
+           item++;
+           continue;
+       }
+       for(i=pos;icurwords;i++) {
+           if ( prs->words[i].item == item ) {
+               if ( i>*q) 
+                   *q = i;
+               break;
+           }
+       }
+       item++;
+   }
+
+   if ( *q==0 )
+       return false;
+
+   item=GETQUERY(query);
+   for(j=0;jsize;j++) {
+       if ( item->type != VAL ) {
+           item++;
+           continue;
+       }
+       for(i=*q;i>=pos;i--) {
+           if ( prs->words[i].item == item ) {
+               if ( i<*p )
+                   *p=i;
+               break;
+           }
+       }
+       item++;
+   }   
+
+   if ( *p<=*q ) {
+       hlCheck ch={ &(prs->words[*p]), *q-*p+1 };
+       if ( TS_execute(GETQUERY(query), &ch, false, checkcondition_HL) ) { 
+           return true;
+       } else {
+           (*p)++;
+           return hlCover(prs,query,p,q);
+       }
+   }
+
+   return false;
+}
+
+PG_FUNCTION_INFO_V1(prsd_headline);
+Datum prsd_headline(PG_FUNCTION_ARGS);
+Datum 
+prsd_headline(PG_FUNCTION_ARGS) {
+   HLPRSTEXT   *prs=(HLPRSTEXT*)PG_GETARG_POINTER(0);
+   text    *opt=(text*)PG_GETARG_POINTER(1); /* can't be toasted */
+   QUERYTYPE   *query=(QUERYTYPE*)PG_GETARG_POINTER(2); /* can't be toasted */
+   /* from opt + start and and tag */
+   int min_words=15;   
+   int max_words=35;   
+   int shortword=3;    
+
+   int p=0,q=0;
+   int bestb=-1,beste=-1;
+   int bestlen=-1;
+   int pose=0, poslen, curlen;
+
+   int i;
+
+   /*config*/
+   prs->startsel=NULL;
+   prs->stopsel=NULL;
+   if ( opt ) {
+       Map *map,*mptr;
+       
+       parse_cfgdict(opt,&map);
+       mptr=map;
+
+       while(mptr && mptr->key) {
+           if ( strcasecmp(mptr->key,"MaxWords")==0 )
+               max_words=pg_atoi(mptr->value,4,1);
+           else if ( strcasecmp(mptr->key,"MinWords")==0 )
+               min_words=pg_atoi(mptr->value,4,1);
+           else if ( strcasecmp(mptr->key,"ShortWord")==0 )
+               shortword=pg_atoi(mptr->value,4,1);
+           else if ( strcasecmp(mptr->key,"StartSel")==0 )
+               prs->startsel=pstrdup(mptr->value);
+           else if ( strcasecmp(mptr->key,"StopSel")==0 )
+               prs->stopsel=pstrdup(mptr->value);
+               
+           pfree(mptr->key);
+           pfree(mptr->value);
+
+           mptr++;
+       }
+       pfree(map);
+
+       if ( min_words >= max_words )
+           elog(ERROR,"Must be MinWords < MaxWords");
+       if ( min_words<=0 )
+           elog(ERROR,"Must be MinWords > 0");
+       if ( shortword<0 )
+           elog(ERROR,"Must be ShortWord >= 0");
+   }
+
+   while( hlCover(prs,query,&p,&q) ) {
+       /* find cover len in words */
+       curlen=0;
+       poslen=0;
+       for(i=p;i<=q && curlen < max_words ; i++) {
+           if ( !NONWORDTOKEN(prs->words[i].type) ) 
+               curlen++;
+           if ( prs->words[i].item && !prs->words[i].repeated )
+               poslen++; 
+           pose=i;
+       }
+
+       if ( poslenwords[beste].type) || prs->words[beste].len <= shortword) ) { 
+           /* best already finded, so try one more cover */
+           p++;
+           continue;
+       }
+
+       if ( curlen < max_words ) { /* find good end */
+           for(i=i-1 ;icurwords && curlen
+               if ( i!=q ) {
+                   if ( !NONWORDTOKEN(prs->words[i].type) ) 
+                       curlen++;
+                   if ( prs->words[i].item && !prs->words[i].repeated )
+                       poslen++;
+               }
+               pose=i;
+               if ( NOENDTOKEN(prs->words[i].type) || prs->words[i].len <= shortword ) 
+                   continue;
+               if ( curlen>=min_words )    
+                   break;
+           }
+       } else { /* shorter cover :((( */
+           for(;curlen>min_words;i--) {
+               if ( !NONWORDTOKEN(prs->words[i].type) ) 
+                   curlen--;
+               if ( prs->words[i].item && !prs->words[i].repeated )
+                   poslen--;
+               pose=i;
+               if ( NOENDTOKEN(prs->words[i].type) || prs->words[i].len <= shortword ) 
+                   continue;
+               break;
+           }
+       }
+
+       if ( bestlen <0 || (poslen>bestlen && !(NOENDTOKEN(prs->words[pose].type) || prs->words[pose].len <= shortword)) || 
+               ( bestlen>=0 && !(NOENDTOKEN(prs->words[pose].type)  || prs->words[pose].len <= shortword) && 
+                   (NOENDTOKEN(prs->words[beste].type) || prs->words[beste].len <= shortword) ) ) {
+           bestb=p; beste=pose;
+           bestlen=poslen;
+       } 
+
+       p++;
+   }
+
+   if ( bestlen<0 ) {
+       curlen=0;
+       poslen=0;
+       for(i=0;icurwords && curlen
+           if ( !NONWORDTOKEN(prs->words[i].type) ) 
+               curlen++;
+           pose=i;
+       }
+       bestb=0; beste=pose;
+   }
+
+   for(i=bestb;i<=beste;i++) {
+       if ( prs->words[i].item )
+           prs->words[i].selected=1;
+       if ( prs->words[i].repeated )
+           prs->words[i].skip=1;
+       if ( HLIDIGNORE(prs->words[i].type) )
+           prs->words[i].replace=1;
+
+       prs->words[i].in=1;
+   }
+
+   if (!prs->startsel)
+       prs->startsel=pstrdup("");

+   if (!prs->stopsel)
+       prs->stopsel=pstrdup("");
+        prs->startsellen=strlen(prs->startsel);
+   prs->stopsellen=strlen(prs->stopsel);
+
+   PG_RETURN_POINTER(prs);
+}
+




This is the main PostgreSQL git repository.
RSS
Atom
+           dwptr++;
+       olddwpos=dwptr-dw;
+       dwptr->start=ncover;
+       while(dwptr->pos < q+1 && dwptr-dw
+           dwptr++;
+       (dwptr-1)->finish=ncover;
+       len+= 4 /* {}+two spaces */ + 2*16 /*numbers*/;
+       ncover++; 
+   } 
+   
+   out=palloc(VARHDRSZ+len);
+   cptr=((char*)out)+VARHDRSZ;
+   dwptr=dw;
+
+   while( dwptr-dw < dlen) {
+       if ( dwptr->start ) {
+           sprintf(cptr,"{%d ",dwptr->start);
+           cptr=strchr(cptr,'\0');
+       }
+       memcpy(cptr,dwptr->w,dwptr->len);
+       cptr+=dwptr->len;
+       *cptr=' ';
+       cptr++;
+       if ( dwptr->finish ) { 
+           sprintf(cptr,"}%d ",dwptr->finish);
+           cptr=strchr(cptr,'\0');
+       }
+       dwptr++;
+   }   
+
+   VARATT_SIZEP(out) = cptr - ((char*)out);
+   
+   pfree(dw);
+   pfree(doc);
+
+   PG_FREE_IF_COPY(txt,0);
+   PG_FREE_IF_COPY(query,1);
+   PG_RETURN_POINTER(out);
+}
+
+           dwptr++;
+       (dwptr-1)->finish=ncover;
+       len+= 4 /* {}+two spaces */ + 2*16 /*numbers*/;
+       ncover++; 
+   } 
+   
+   out=palloc(VARHDRSZ+len);
+   cptr=((char*)out)+VARHDRSZ;
+   dwptr=dw;
+
+   while( dwptr-dw < dlen) {
+       if ( dwptr->start ) {
+           sprintf(cptr,"{%d ",dwptr->start);
+           cptr=strchr(cptr,'\0');
+       }
+       memcpy(cptr,dwptr->w,dwptr->len);
+       cptr+=dwptr->len;
+       *cptr=' ';
+       cptr++;
+       if ( dwptr->finish ) { 
+           sprintf(cptr,"}%d ",dwptr->finish);
+           cptr=strchr(cptr,'\0');
+       }
+       dwptr++;
+   }   
+
+   VARATT_SIZEP(out) = cptr - ((char*)out);
+   
+   pfree(dw);
+   pfree(doc);
+
+   PG_FREE_IF_COPY(txt,0);
+   PG_FREE_IF_COPY(query,1);
+   PG_RETURN_POINTER(out);
+}
+
diff --git a/contrib/tsearch2/rewrite.c b/contrib/tsearch2/rewrite.c

new file mode 100644 (file)

index 0000000..d5bc0f6
--- /dev/null
+++ b/contrib/tsearch2/rewrite.c
@@ -0,0 +1,292 @@
+/*
+ * Rewrite routines of query tree
+ * Teodor Sigaev 
+ */
+
+#include "postgres.h"
+
+#include 
+
+#include "access/gist.h"
+#include "access/itup.h"
+#include "access/rtree.h"
+#include "utils/elog.h"
+#include "utils/palloc.h"
+#include "utils/array.h"
+#include "utils/builtins.h"
+#include "storage/bufpage.h"
+
+#include "query.h"
+#include "rewrite.h"
+
+typedef struct NODE
+{
+   struct NODE *left;
+   struct NODE *right;
+   ITEM       *valnode;
+}  NODE;
+
+/*
+ * make query tree from plain view of query
+ */
+static NODE *
+maketree(ITEM * in)
+{
+   NODE       *node = (NODE *) palloc(sizeof(NODE));
+
+   node->valnode = in;
+   node->right = node->left = NULL;
+   if (in->type == OPR)
+   {
+       node->right = maketree(in + 1);
+       if (in->val != (int4) '!')
+           node->left = maketree(in + in->left);
+   }
+   return node;
+}
+
+typedef struct
+{
+   ITEM       *ptr;
+   int4        len;
+   int4        cur;
+}  PLAINTREE;
+
+static void
+plainnode(PLAINTREE * state, NODE * node)
+{
+   if (state->cur == state->len)
+   {
+       state->len *= 2;
+       state->ptr = (ITEM *) repalloc((void *) state->ptr, state->len * sizeof(ITEM));
+   }
+   memcpy((void *) &(state->ptr[state->cur]), (void *) node->valnode, sizeof(ITEM));
+   if (node->valnode->type == VAL)
+       state->cur++;
+   else if (node->valnode->val == (int4) '!')
+   {
+       state->ptr[state->cur].left = 1;
+       state->cur++;
+       plainnode(state, node->right);
+   }
+   else
+   {
+       int4        cur = state->cur;
+
+       state->cur++;
+       plainnode(state, node->right);
+       state->ptr[cur].left = state->cur - cur;
+       plainnode(state, node->left);
+   }
+   pfree(node);
+}
+
+/*
+ * make plain view of tree from 'normal' view of tree
+ */
+static ITEM *
+plaintree(NODE * root, int4 *len)
+{
+   PLAINTREE   pl;
+
+   pl.cur = 0;
+   pl.len = 16;
+   if (root && (root->valnode->type == VAL || root->valnode->type == OPR))
+   {
+       pl.ptr = (ITEM *) palloc(pl.len * sizeof(ITEM));
+       plainnode(&pl, root);
+   }
+   else
+       pl.ptr = NULL;
+   *len = pl.cur;
+   return pl.ptr;
+}
+
+static void
+freetree(NODE * node)
+{
+   if (!node)
+       return;
+   if (node->left)
+       freetree(node->left);
+   if (node->right)
+       freetree(node->right);
+   pfree(node);
+}
+
+/*
+ * clean tree for ! operator.
+ * It's usefull for debug, but in
+ * other case, such view is used with search in index.
+ * Operator ! always return TRUE
+ */
+static NODE *
+clean_NOT_intree(NODE * node)
+{
+   if (node->valnode->type == VAL)
+       return node;
+
+   if (node->valnode->val == (int4) '!')
+   {
+       freetree(node);
+       return NULL;
+   }
+
+   /* operator & or | */
+   if (node->valnode->val == (int4) '|')
+   {
+       if ((node->left = clean_NOT_intree(node->left)) == NULL ||
+           (node->right = clean_NOT_intree(node->right)) == NULL)
+       {
+           freetree(node);
+           return NULL;
+       }
+   }
+   else
+   {
+       NODE       *res = node;
+
+       node->left = clean_NOT_intree(node->left);
+       node->right = clean_NOT_intree(node->right);
+       if (node->left == NULL && node->right == NULL)
+       {
+           pfree(node);
+           res = NULL;
+       }
+       else if (node->left == NULL)
+       {
+           res = node->right;
+           pfree(node);
+       }
+       else if (node->right == NULL)
+       {
+           res = node->left;
+           pfree(node);
+       }
+       return res;
+   }
+   return node;
+}
+
+ITEM *
+clean_NOT_v2(ITEM * ptr, int4 *len)
+{
+   NODE       *root = maketree(ptr);
+
+   return plaintree(clean_NOT_intree(root), len);
+}
+
+#define V_UNKNOWN  0
+#define V_TRUE     1
+#define V_FALSE        2
+
+/*
+ * Clean query tree from values which is always in
+ * text (stopword)
+ */
+static NODE *
+clean_fakeval_intree(NODE * node, char *result)
+{
+   char        lresult = V_UNKNOWN,
+               rresult = V_UNKNOWN;
+
+   if (node->valnode->type == VAL)
+       return node;
+   else if (node->valnode->type == VALTRUE)
+   {
+       pfree(node);
+       *result = V_TRUE;
+       return NULL;
+   }
+
+
+   if (node->valnode->val == (int4) '!')
+   {
+       node->right = clean_fakeval_intree(node->right, &rresult);
+       if (!node->right)
+       {
+           *result = (rresult == V_TRUE) ? V_FALSE : V_TRUE;
+           freetree(node);
+           return NULL;
+       }
+   }
+   else if (node->valnode->val == (int4) '|')
+   {
+       NODE       *res = node;
+
+       node->left = clean_fakeval_intree(node->left, &lresult);
+       node->right = clean_fakeval_intree(node->right, &rresult);
+       if (lresult == V_TRUE || rresult == V_TRUE)
+       {
+           freetree(node);
+           *result = V_TRUE;
+           return NULL;
+       }
+       else if (lresult == V_FALSE && rresult == V_FALSE)
+       {
+           freetree(node);
+           *result = V_FALSE;
+           return NULL;
+       }
+       else if (lresult == V_FALSE)
+       {
+           res = node->right;
+           pfree(node);
+       }
+       else if (rresult == V_FALSE)
+       {
+           res = node->left;
+           pfree(node);
+       }
+       return res;
+   }
+   else
+   {
+       NODE       *res = node;
+
+       node->left = clean_fakeval_intree(node->left, &lresult);
+       node->right = clean_fakeval_intree(node->right, &rresult);
+       if (lresult == V_FALSE || rresult == V_FALSE)
+       {
+           freetree(node);
+           *result = V_FALSE;
+           return NULL;
+       }
+       else if (lresult == V_TRUE && rresult == V_TRUE)
+       {
+           freetree(node);
+           *result = V_TRUE;
+           return NULL;
+       }
+       else if (lresult == V_TRUE)
+       {
+           res = node->right;
+           pfree(node);
+       }
+       else if (rresult == V_TRUE)
+       {
+           res = node->left;
+           pfree(node);
+       }
+       return res;
+   }
+   return node;
+}
+
+ITEM *
+clean_fakeval_v2(ITEM * ptr, int4 *len)
+{
+   NODE       *root = maketree(ptr);
+   char        result = V_UNKNOWN;
+   NODE       *resroot;
+
+   resroot = clean_fakeval_intree(root, &result);
+   if (result != V_UNKNOWN)
+   {
+       elog(NOTICE, "Query contains only stopword(s) or doesn't contain lexem(s), ignored");
+       *len = 0;
+       return NULL;
+   }
+
+   return plaintree(resroot, len);
+}
diff --git a/contrib/tsearch2/rewrite.h b/contrib/tsearch2/rewrite.h

new file mode 100644 (file)

index 0000000..d47788a
--- /dev/null
+++ b/contrib/tsearch2/rewrite.h
@@ -0,0 +1,7 @@
+#ifndef __REWRITE_H__
+#define __REWRITE_H__
+
+ITEM      *clean_NOT_v2(ITEM * ptr, int4 *len);
+ITEM      *clean_fakeval_v2(ITEM * ptr, int4 *len);
+
+#endif
diff --git a/contrib/tsearch2/snmap.c b/contrib/tsearch2/snmap.c

new file mode 100644 (file)

index 0000000..fe138ad
--- /dev/null
+++ b/contrib/tsearch2/snmap.c
@@ -0,0 +1,75 @@
+/* 
+ * simple but fast map from str to Oid
+ * Teodor Sigaev 
+ */
+#include 
+#include 
+#include 
+
+#include "postgres.h"
+#include "snmap.h"
+#include "common.h"
+
+static int
+compareSNMapEntry(const void *a, const void *b) {
+   return strcmp( ((SNMapEntry*)a)->key, ((SNMapEntry*)b)->key );
+}
+
+void 
+addSNMap( SNMap *map, char *key, Oid value ) {
+   if (map->len>=map->reallen) {
+       SNMapEntry *tmp;
+       int len = (map->reallen) ? 2*map->reallen : 16;
+       tmp=(SNMapEntry*)realloc(map->list, sizeof(SNMapEntry) * len);
+       if ( !tmp )
+           elog(ERROR, "No memory");
+       map->reallen=len;
+       map->list=tmp;
+   }
+   map->list[ map->len ].key = strdup(key);
+   if ( ! map->list[ map->len ].key )
+       elog(ERROR, "No memory");
+   map->list[ map->len ].value=value;
+   map->len++;
+   if ( map->len>1 ) qsort(map->list, map->len, sizeof(SNMapEntry), compareSNMapEntry);
+}
+
+void 
+addSNMap_t( SNMap *map, text *key, Oid value ) {
+   char *k=text2char( key );
+   addSNMap(map, k, value);
+   pfree(k);
+}
+
+Oid 
+findSNMap( SNMap *map, char *key ) {
+   SNMapEntry *ptr;
+   SNMapEntry ks = {key, 0};
+   if ( map->len==0 || !map->list )
+       return 0;   
+   ptr = (SNMapEntry*) bsearch(&ks, map->list, map->len, sizeof(SNMapEntry), compareSNMapEntry);
+   return (ptr) ? ptr->value : 0;
+}
+
+Oid  
+findSNMap_t( SNMap *map, text *key ) {
+   char *k=text2char(key);
+   int res;
+   res= findSNMap(map, k);
+   pfree(k);
+   return res;
+}
+
+void freeSNMap( SNMap *map ) {
+   SNMapEntry *entry=map->list;
+   if ( map->list ) {
+       while( map->len ) {
+           if ( entry->key ) free(entry->key);
+           entry++; map->len--;
+       }
+       free( map->list );
+   }
+   memset(map,0,sizeof(SNMap));
+}
+
+
diff --git a/contrib/tsearch2/snmap.h b/contrib/tsearch2/snmap.h

new file mode 100644 (file)

index 0000000..b485601
--- /dev/null
+++ b/contrib/tsearch2/snmap.h
@@ -0,0 +1,23 @@
+#ifndef __SNMAP_H__
+#define __SNMAP_H__
+
+#include "postgres.h"
+
+typedef struct {
+   char    *key;
+   Oid value;
+} SNMapEntry;
+
+typedef struct {
+   int len;
+   int reallen;
+   SNMapEntry  *list;
+} SNMap;
+
+void addSNMap( SNMap *map, char *key, Oid value );
+void addSNMap_t( SNMap *map, text *key, Oid value );
+Oid findSNMap( SNMap *map, char *key );
+Oid findSNMap_t( SNMap *map, text *key );
+void freeSNMap( SNMap *map );
+
+#endif
diff --git a/contrib/tsearch2/snowball/api.c b/contrib/tsearch2/snowball/api.c

new file mode 100644 (file)

index 0000000..c9019ce
--- /dev/null
+++ b/contrib/tsearch2/snowball/api.c
@@ -0,0 +1,48 @@
+
+#include "header.h"
+
+extern struct SN_env * SN_create_env(int S_size, int I_size, int B_size)
+{   struct SN_env * z = (struct SN_env *) calloc(1, sizeof(struct SN_env));
+    z->p = create_s();
+    if (S_size)
+    {   z->S = (symbol * *) calloc(S_size, sizeof(symbol *));
+        {   int i;
+            for (i = 0; i < S_size; i++) z->S[i] = create_s();
+        }
+        z->S_size = S_size;
+    }
+
+    if (I_size)
+    {   z->I = (int *) calloc(I_size, sizeof(int));
+        z->I_size = I_size;
+    }
+
+    if (B_size)
+    {   z->B = (symbol *) calloc(B_size, sizeof(symbol));
+        z->B_size = B_size;
+    }
+
+    return z;
+}
+
+extern void SN_close_env(struct SN_env * z)
+{
+    if (z->S_size)
+    {
+        {   int i;
+            for (i = 0; i < z->S_size; i++) lose_s(z->S[i]);
+        }
+        free(z->S);
+    }
+    if (z->I_size) free(z->I);
+    if (z->B_size) free(z->B);
+    if (z->p) lose_s(z->p);
+    free(z);
+}
+
+extern void SN_set_current(struct SN_env * z, int size, const symbol * s)
+{
+    replace_s(z, 0, z->l, size, s);
+    z->c = 0;
+}
+
diff --git a/contrib/tsearch2/snowball/api.h b/contrib/tsearch2/snowball/api.h

new file mode 100644 (file)

index 0000000..3e8b6e1
--- /dev/null
+++ b/contrib/tsearch2/snowball/api.h
@@ -0,0 +1,27 @@
+
+typedef unsigned char symbol;
+
+/* Or replace 'char' above with 'short' for 16 bit characters.
+
+   More precisely, replace 'char' with whatever type guarantees the
+   character width you need. Note however that sizeof(symbol) should divide
+   HEAD, defined in header.h as 2*sizeof(int), without remainder, otherwise
+   there is an alignment problem. In the unlikely event of a problem here,
+   consult Martin Porter.
+
+*/
+
+struct SN_env {
+    symbol * p;
+    int c; int a; int l; int lb; int bra; int ket;
+    int S_size; int I_size; int B_size;
+    symbol * * S;
+    int * I;
+    symbol * B;
+};
+
+extern struct SN_env * SN_create_env(int S_size, int I_size, int B_size);
+extern void SN_close_env(struct SN_env * z);
+
+extern void SN_set_current(struct SN_env * z, int size, const symbol * s);
+
diff --git a/contrib/tsearch2/snowball/english_stem.c b/contrib/tsearch2/snowball/english_stem.c

new file mode 100644 (file)

index 0000000..6715c7c
--- /dev/null
+++ b/contrib/tsearch2/snowball/english_stem.c
@@ -0,0 +1,894 @@
+
+/* This file was generated automatically by the Snowball to ANSI C compiler */
+
+#include "header.h"
+
+extern int english_stem(struct SN_env * z);
+static int r_exception2(struct SN_env * z);
+static int r_exception1(struct SN_env * z);
+static int r_Step_5(struct SN_env * z);
+static int r_Step_4(struct SN_env * z);
+static int r_Step_3(struct SN_env * z);
+static int r_Step_2(struct SN_env * z);
+static int r_Step_1c(struct SN_env * z);
+static int r_Step_1b(struct SN_env * z);
+static int r_Step_1a(struct SN_env * z);
+static int r_R2(struct SN_env * z);
+static int r_R1(struct SN_env * z);
+static int r_shortv(struct SN_env * z);
+static int r_mark_regions(struct SN_env * z);
+static int r_postlude(struct SN_env * z);
+static int r_prelude(struct SN_env * z);
+
+extern struct SN_env * english_create_env(void);
+extern void english_close_env(struct SN_env * z);
+
+static symbol s_0_0[5] = { 'g', 'e', 'n', 'e', 'r' };
+
+static struct among a_0[1] =
+{
+/*  0 */ { 5, s_0_0, -1, -1, 0}
+};
+
+static symbol s_1_0[3] = { 'i', 'e', 'd' };
+static symbol s_1_1[1] = { 's' };
+static symbol s_1_2[3] = { 'i', 'e', 's' };
+static symbol s_1_3[4] = { 's', 's', 'e', 's' };
+static symbol s_1_4[2] = { 's', 's' };
+static symbol s_1_5[2] = { 'u', 's' };
+
+static struct among a_1[6] =
+{
+/*  0 */ { 3, s_1_0, -1, 2, 0},
+/*  1 */ { 1, s_1_1, -1, 3, 0},
+/*  2 */ { 3, s_1_2, 1, 2, 0},
+/*  3 */ { 4, s_1_3, 1, 1, 0},
+/*  4 */ { 2, s_1_4, 1, -1, 0},
+/*  5 */ { 2, s_1_5, 1, -1, 0}
+};
+
+static symbol s_2_1[2] = { 'b', 'b' };
+static symbol s_2_2[2] = { 'd', 'd' };
+static symbol s_2_3[2] = { 'f', 'f' };
+static symbol s_2_4[2] = { 'g', 'g' };
+static symbol s_2_5[2] = { 'b', 'l' };
+static symbol s_2_6[2] = { 'm', 'm' };
+static symbol s_2_7[2] = { 'n', 'n' };
+static symbol s_2_8[2] = { 'p', 'p' };
+static symbol s_2_9[2] = { 'r', 'r' };
+static symbol s_2_10[2] = { 'a', 't' };
+static symbol s_2_11[2] = { 't', 't' };
+static symbol s_2_12[2] = { 'i', 'z' };
+
+static struct among a_2[13] =
+{
+/*  0 */ { 0, 0, -1, 3, 0},
+/*  1 */ { 2, s_2_1, 0, 2, 0},
+/*  2 */ { 2, s_2_2, 0, 2, 0},
+/*  3 */ { 2, s_2_3, 0, 2, 0},
+/*  4 */ { 2, s_2_4, 0, 2, 0},
+/*  5 */ { 2, s_2_5, 0, 1, 0},
+/*  6 */ { 2, s_2_6, 0, 2, 0},
+/*  7 */ { 2, s_2_7, 0, 2, 0},
+/*  8 */ { 2, s_2_8, 0, 2, 0},
+/*  9 */ { 2, s_2_9, 0, 2, 0},
+/* 10 */ { 2, s_2_10, 0, 1, 0},
+/* 11 */ { 2, s_2_11, 0, 2, 0},
+/* 12 */ { 2, s_2_12, 0, 1, 0}
+};
+
+static symbol s_3_0[2] = { 'e', 'd' };
+static symbol s_3_1[3] = { 'e', 'e', 'd' };
+static symbol s_3_2[3] = { 'i', 'n', 'g' };
+static symbol s_3_3[4] = { 'e', 'd', 'l', 'y' };
+static symbol s_3_4[5] = { 'e', 'e', 'd', 'l', 'y' };
+static symbol s_3_5[5] = { 'i', 'n', 'g', 'l', 'y' };
+
+static struct among a_3[6] =
+{
+/*  0 */ { 2, s_3_0, -1, 2, 0},
+/*  1 */ { 3, s_3_1, 0, 1, 0},
+/*  2 */ { 3, s_3_2, -1, 2, 0},
+/*  3 */ { 4, s_3_3, -1, 2, 0},
+/*  4 */ { 5, s_3_4, 3, 1, 0},
+/*  5 */ { 5, s_3_5, -1, 2, 0}
+};
+
+static symbol s_4_0[4] = { 'a', 'n', 'c', 'i' };
+static symbol s_4_1[4] = { 'e', 'n', 'c', 'i' };
+static symbol s_4_2[3] = { 'o', 'g', 'i' };
+static symbol s_4_3[2] = { 'l', 'i' };
+static symbol s_4_4[3] = { 'b', 'l', 'i' };
+static symbol s_4_5[4] = { 'a', 'b', 'l', 'i' };
+static symbol s_4_6[4] = { 'a', 'l', 'l', 'i' };
+static symbol s_4_7[5] = { 'f', 'u', 'l', 'l', 'i' };
+static symbol s_4_8[6] = { 'l', 'e', 's', 's', 'l', 'i' };
+static symbol s_4_9[5] = { 'o', 'u', 's', 'l', 'i' };
+static symbol s_4_10[5] = { 'e', 'n', 't', 'l', 'i' };
+static symbol s_4_11[5] = { 'a', 'l', 'i', 't', 'i' };
+static symbol s_4_12[6] = { 'b', 'i', 'l', 'i', 't', 'i' };
+static symbol s_4_13[5] = { 'i', 'v', 'i', 't', 'i' };
+static symbol s_4_14[6] = { 't', 'i', 'o', 'n', 'a', 'l' };
+static symbol s_4_15[7] = { 'a', 't', 'i', 'o', 'n', 'a', 'l' };
+static symbol s_4_16[5] = { 'a', 'l', 'i', 's', 'm' };
+static symbol s_4_17[5] = { 'a', 't', 'i', 'o', 'n' };
+static symbol s_4_18[7] = { 'i', 'z', 'a', 't', 'i', 'o', 'n' };
+static symbol s_4_19[4] = { 'i', 'z', 'e', 'r' };
+static symbol s_4_20[4] = { 'a', 't', 'o', 'r' };
+static symbol s_4_21[7] = { 'i', 'v', 'e', 'n', 'e', 's', 's' };
+static symbol s_4_22[7] = { 'f', 'u', 'l', 'n', 'e', 's', 's' };
+static symbol s_4_23[7] = { 'o', 'u', 's', 'n', 'e', 's', 's' };
+
+static struct among a_4[24] =
+{
+/*  0 */ { 4, s_4_0, -1, 3, 0},
+/*  1 */ { 4, s_4_1, -1, 2, 0},
+/*  2 */ { 3, s_4_2, -1, 13, 0},
+/*  3 */ { 2, s_4_3, -1, 16, 0},
+/*  4 */ { 3, s_4_4, 3, 12, 0},
+/*  5 */ { 4, s_4_5, 4, 4, 0},
+/*  6 */ { 4, s_4_6, 3, 8, 0},
+/*  7 */ { 5, s_4_7, 3, 14, 0},
+/*  8 */ { 6, s_4_8, 3, 15, 0},
+/*  9 */ { 5, s_4_9, 3, 10, 0},
+/* 10 */ { 5, s_4_10, 3, 5, 0},
+/* 11 */ { 5, s_4_11, -1, 8, 0},
+/* 12 */ { 6, s_4_12, -1, 12, 0},
+/* 13 */ { 5, s_4_13, -1, 11, 0},
+/* 14 */ { 6, s_4_14, -1, 1, 0},
+/* 15 */ { 7, s_4_15, 14, 7, 0},
+/* 16 */ { 5, s_4_16, -1, 8, 0},
+/* 17 */ { 5, s_4_17, -1, 7, 0},
+/* 18 */ { 7, s_4_18, 17, 6, 0},
+/* 19 */ { 4, s_4_19, -1, 6, 0},
+/* 20 */ { 4, s_4_20, -1, 7, 0},
+/* 21 */ { 7, s_4_21, -1, 11, 0},
+/* 22 */ { 7, s_4_22, -1, 9, 0},
+/* 23 */ { 7, s_4_23, -1, 10, 0}
+};
+
+static symbol s_5_0[5] = { 'i', 'c', 'a', 't', 'e' };
+static symbol s_5_1[5] = { 'a', 't', 'i', 'v', 'e' };
+static symbol s_5_2[5] = { 'a', 'l', 'i', 'z', 'e' };
+static symbol s_5_3[5] = { 'i', 'c', 'i', 't', 'i' };
+static symbol s_5_4[4] = { 'i', 'c', 'a', 'l' };
+static symbol s_5_5[6] = { 't', 'i', 'o', 'n', 'a', 'l' };
+static symbol s_5_6[7] = { 'a', 't', 'i', 'o', 'n', 'a', 'l' };
+static symbol s_5_7[3] = { 'f', 'u', 'l' };
+static symbol s_5_8[4] = { 'n', 'e', 's', 's' };
+
+static struct among a_5[9] =
+{
+/*  0 */ { 5, s_5_0, -1, 4, 0},
+/*  1 */ { 5, s_5_1, -1, 6, 0},
+/*  2 */ { 5, s_5_2, -1, 3, 0},
+/*  3 */ { 5, s_5_3, -1, 4, 0},
+/*  4 */ { 4, s_5_4, -1, 4, 0},
+/*  5 */ { 6, s_5_5, -1, 1, 0},
+/*  6 */ { 7, s_5_6, 5, 2, 0},
+/*  7 */ { 3, s_5_7, -1, 5, 0},
+/*  8 */ { 4, s_5_8, -1, 5, 0}
+};
+
+static symbol s_6_0[2] = { 'i', 'c' };
+static symbol s_6_1[4] = { 'a', 'n', 'c', 'e' };
+static symbol s_6_2[4] = { 'e', 'n', 'c', 'e' };
+static symbol s_6_3[4] = { 'a', 'b', 'l', 'e' };
+static symbol s_6_4[4] = { 'i', 'b', 'l', 'e' };
+static symbol s_6_5[3] = { 'a', 't', 'e' };
+static symbol s_6_6[3] = { 'i', 'v', 'e' };
+static symbol s_6_7[3] = { 'i', 'z', 'e' };
+static symbol s_6_8[3] = { 'i', 't', 'i' };
+static symbol s_6_9[2] = { 'a', 'l' };
+static symbol s_6_10[3] = { 'i', 's', 'm' };
+static symbol s_6_11[3] = { 'i', 'o', 'n' };
+static symbol s_6_12[2] = { 'e', 'r' };
+static symbol s_6_13[3] = { 'o', 'u', 's' };
+static symbol s_6_14[3] = { 'a', 'n', 't' };
+static symbol s_6_15[3] = { 'e', 'n', 't' };
+static symbol s_6_16[4] = { 'm', 'e', 'n', 't' };
+static symbol s_6_17[5] = { 'e', 'm', 'e', 'n', 't' };
+
+static struct among a_6[18] =
+{
+/*  0 */ { 2, s_6_0, -1, 1, 0},
+/*  1 */ { 4, s_6_1, -1, 1, 0},
+/*  2 */ { 4, s_6_2, -1, 1, 0},
+/*  3 */ { 4, s_6_3, -1, 1, 0},
+/*  4 */ { 4, s_6_4, -1, 1, 0},
+/*  5 */ { 3, s_6_5, -1, 1, 0},
+/*  6 */ { 3, s_6_6, -1, 1, 0},
+/*  7 */ { 3, s_6_7, -1, 1, 0},
+/*  8 */ { 3, s_6_8, -1, 1, 0},
+/*  9 */ { 2, s_6_9, -1, 1, 0},
+/* 10 */ { 3, s_6_10, -1, 1, 0},
+/* 11 */ { 3, s_6_11, -1, 2, 0},
+/* 12 */ { 2, s_6_12, -1, 1, 0},
+/* 13 */ { 3, s_6_13, -1, 1, 0},
+/* 14 */ { 3, s_6_14, -1, 1, 0},
+/* 15 */ { 3, s_6_15, -1, 1, 0},
+/* 16 */ { 4, s_6_16, 15, 1, 0},
+/* 17 */ { 5, s_6_17, 16, 1, 0}
+};
+
+static symbol s_7_0[1] = { 'e' };
+static symbol s_7_1[1] = { 'l' };
+
+static struct among a_7[2] =
+{
+/*  0 */ { 1, s_7_0, -1, 1, 0},
+/*  1 */ { 1, s_7_1, -1, 2, 0}
+};
+
+static symbol s_8_0[7] = { 's', 'u', 'c', 'c', 'e', 'e', 'd' };
+static symbol s_8_1[7] = { 'p', 'r', 'o', 'c', 'e', 'e', 'd' };
+static symbol s_8_2[6] = { 'e', 'x', 'c', 'e', 'e', 'd' };
+static symbol s_8_3[7] = { 'c', 'a', 'n', 'n', 'i', 'n', 'g' };
+static symbol s_8_4[6] = { 'i', 'n', 'n', 'i', 'n', 'g' };
+static symbol s_8_5[7] = { 'e', 'a', 'r', 'r', 'i', 'n', 'g' };
+static symbol s_8_6[7] = { 'h', 'e', 'r', 'r', 'i', 'n', 'g' };
+static symbol s_8_7[6] = { 'o', 'u', 't', 'i', 'n', 'g' };
+
+static struct among a_8[8] =
+{
+/*  0 */ { 7, s_8_0, -1, -1, 0},
+/*  1 */ { 7, s_8_1, -1, -1, 0},
+/*  2 */ { 6, s_8_2, -1, -1, 0},
+/*  3 */ { 7, s_8_3, -1, -1, 0},
+/*  4 */ { 6, s_8_4, -1, -1, 0},
+/*  5 */ { 7, s_8_5, -1, -1, 0},
+/*  6 */ { 7, s_8_6, -1, -1, 0},
+/*  7 */ { 6, s_8_7, -1, -1, 0}
+};
+
+static symbol s_9_0[5] = { 'a', 'n', 'd', 'e', 's' };
+static symbol s_9_1[5] = { 'a', 't', 'l', 'a', 's' };
+static symbol s_9_2[4] = { 'b', 'i', 'a', 's' };
+static symbol s_9_3[6] = { 'c', 'o', 's', 'm', 'o', 's' };
+static symbol s_9_4[5] = { 'd', 'y', 'i', 'n', 'g' };
+static symbol s_9_5[5] = { 'e', 'a', 'r', 'l', 'y' };
+static symbol s_9_6[6] = { 'g', 'e', 'n', 't', 'l', 'y' };
+static symbol s_9_7[4] = { 'h', 'o', 'w', 'e' };
+static symbol s_9_8[4] = { 'i', 'd', 'l', 'y' };
+static symbol s_9_9[5] = { 'l', 'y', 'i', 'n', 'g' };
+static symbol s_9_10[4] = { 'n', 'e', 'w', 's' };
+static symbol s_9_11[4] = { 'o', 'n', 'l', 'y' };
+static symbol s_9_12[6] = { 's', 'i', 'n', 'g', 'l', 'y' };
+static symbol s_9_13[5] = { 's', 'k', 'i', 'e', 's' };
+static symbol s_9_14[4] = { 's', 'k', 'i', 's' };
+static symbol s_9_15[3] = { 's', 'k', 'y' };
+static symbol s_9_16[5] = { 't', 'y', 'i', 'n', 'g' };
+static symbol s_9_17[4] = { 'u', 'g', 'l', 'y' };
+
+static struct among a_9[18] =
+{
+/*  0 */ { 5, s_9_0, -1, -1, 0},
+/*  1 */ { 5, s_9_1, -1, -1, 0},
+/*  2 */ { 4, s_9_2, -1, -1, 0},
+/*  3 */ { 6, s_9_3, -1, -1, 0},
+/*  4 */ { 5, s_9_4, -1, 3, 0},
+/*  5 */ { 5, s_9_5, -1, 9, 0},
+/*  6 */ { 6, s_9_6, -1, 7, 0},
+/*  7 */ { 4, s_9_7, -1, -1, 0},
+/*  8 */ { 4, s_9_8, -1, 6, 0},
+/*  9 */ { 5, s_9_9, -1, 4, 0},
+/* 10 */ { 4, s_9_10, -1, -1, 0},
+/* 11 */ { 4, s_9_11, -1, 10, 0},
+/* 12 */ { 6, s_9_12, -1, 11, 0},
+/* 13 */ { 5, s_9_13, -1, 2, 0},
+/* 14 */ { 4, s_9_14, -1, 1, 0},
+/* 15 */ { 3, s_9_15, -1, -1, 0},
+/* 16 */ { 5, s_9_16, -1, 5, 0},
+/* 17 */ { 4, s_9_17, -1, 8, 0}
+};
+
+static unsigned char g_v[] = { 17, 65, 16, 1 };
+
+static unsigned char g_v_WXY[] = { 1, 17, 65, 208, 1 };
+
+static unsigned char g_valid_LI[] = { 55, 141, 2 };
+
+static symbol s_0[] = { 'y' };
+static symbol s_1[] = { 'Y' };
+static symbol s_2[] = { 'y' };
+static symbol s_3[] = { 'Y' };
+static symbol s_4[] = { 's', 's' };
+static symbol s_5[] = { 'i', 'e' };
+static symbol s_6[] = { 'i' };
+static symbol s_7[] = { 'e', 'e' };
+static symbol s_8[] = { 'e' };
+static symbol s_9[] = { 'e' };
+static symbol s_10[] = { 'y' };
+static symbol s_11[] = { 'Y' };
+static symbol s_12[] = { 'i' };
+static symbol s_13[] = { 't', 'i', 'o', 'n' };
+static symbol s_14[] = { 'e', 'n', 'c', 'e' };
+static symbol s_15[] = { 'a', 'n', 'c', 'e' };
+static symbol s_16[] = { 'a', 'b', 'l', 'e' };
+static symbol s_17[] = { 'e', 'n', 't' };
+static symbol s_18[] = { 'i', 'z', 'e' };
+static symbol s_19[] = { 'a', 't', 'e' };
+static symbol s_20[] = { 'a', 'l' };
+static symbol s_21[] = { 'f', 'u', 'l' };
+static symbol s_22[] = { 'o', 'u', 's' };
+static symbol s_23[] = { 'i', 'v', 'e' };
+static symbol s_24[] = { 'b', 'l', 'e' };
+static symbol s_25[] = { 'l' };
+static symbol s_26[] = { 'o', 'g' };
+static symbol s_27[] = { 'f', 'u', 'l' };
+static symbol s_28[] = { 'l', 'e', 's', 's' };
+static symbol s_29[] = { 't', 'i', 'o', 'n' };
+static symbol s_30[] = { 'a', 't', 'e' };
+static symbol s_31[] = { 'a', 'l' };
+static symbol s_32[] = { 'i', 'c' };
+static symbol s_33[] = { 's' };
+static symbol s_34[] = { 't' };
+static symbol s_35[] = { 'l' };
+static symbol s_36[] = { 's', 'k', 'i' };
+static symbol s_37[] = { 's', 'k', 'y' };
+static symbol s_38[] = { 'd', 'i', 'e' };
+static symbol s_39[] = { 'l', 'i', 'e' };
+static symbol s_40[] = { 't', 'i', 'e' };
+static symbol s_41[] = { 'i', 'd', 'l' };
+static symbol s_42[] = { 'g', 'e', 'n', 't', 'l' };
+static symbol s_43[] = { 'u', 'g', 'l', 'i' };
+static symbol s_44[] = { 'e', 'a', 'r', 'l', 'i' };
+static symbol s_45[] = { 'o', 'n', 'l', 'i' };
+static symbol s_46[] = { 's', 'i', 'n', 'g', 'l' };
+static symbol s_47[] = { 'Y' };
+static symbol s_48[] = { 'y' };
+
+static int r_prelude(struct SN_env * z) {
+    z->B[0] = 0; /* unset Y_found, line 24 */
+    {   int c = z->c; /* do, line 25 */
+        z->bra = z->c; /* [, line 25 */
+        if (!(eq_s(z, 1, s_0))) goto lab0;
+        z->ket = z->c; /* ], line 25 */
+        if (!(in_grouping(z, g_v, 97, 121))) goto lab0;
+        slice_from_s(z, 1, s_1); /* <-, line 25 */
+        z->B[0] = 1; /* set Y_found, line 25 */
+    lab0:
+        z->c = c;
+    }
+    {   int c = z->c; /* do, line 26 */
+        while(1) { /* repeat, line 26 */
+            int c = z->c;
+            while(1) { /* goto, line 26 */
+                int c = z->c;
+                if (!(in_grouping(z, g_v, 97, 121))) goto lab3;
+                z->bra = z->c; /* [, line 26 */
+                if (!(eq_s(z, 1, s_2))) goto lab3;
+                z->ket = z->c; /* ], line 26 */
+                z->c = c;
+                break;
+            lab3:
+                z->c = c;
+                if (z->c >= z->l) goto lab2;
+                z->c++;
+            }
+            slice_from_s(z, 1, s_3); /* <-, line 26 */
+            z->B[0] = 1; /* set Y_found, line 26 */
+            continue;
+        lab2:
+            z->c = c;
+            break;
+        }
+    lab1:
+        z->c = c;
+    }
+    return 1;
+}
+
+static int r_mark_regions(struct SN_env * z) {
+    z->I[0] = z->l;
+    z->I[1] = z->l;
+    {   int c = z->c; /* do, line 32 */
+        {   int c = z->c; /* or, line 36 */
+            if (!(find_among(z, a_0, 1))) goto lab2; /* among, line 33 */
+            goto lab1;
+        lab2:
+            z->c = c;
+            while(1) { /* gopast, line 36 */
+                if (!(in_grouping(z, g_v, 97, 121))) goto lab3;
+                break;
+            lab3:
+                if (z->c >= z->l) goto lab0;
+                z->c++;
+            }
+            while(1) { /* gopast, line 36 */
+                if (!(out_grouping(z, g_v, 97, 121))) goto lab4;
+                break;
+            lab4:
+                if (z->c >= z->l) goto lab0;
+                z->c++;
+            }
+        }
+    lab1:
+        z->I[0] = z->c; /* setmark p1, line 37 */
+        while(1) { /* gopast, line 38 */
+            if (!(in_grouping(z, g_v, 97, 121))) goto lab5;
+            break;
+        lab5:
+            if (z->c >= z->l) goto lab0;
+            z->c++;
+        }
+        while(1) { /* gopast, line 38 */
+            if (!(out_grouping(z, g_v, 97, 121))) goto lab6;
+            break;
+        lab6:
+            if (z->c >= z->l) goto lab0;
+            z->c++;
+        }
+        z->I[1] = z->c; /* setmark p2, line 38 */
+    lab0:
+        z->c = c;
+    }
+    return 1;
+}
+
+static int r_shortv(struct SN_env * z) {
+    {   int m = z->l - z->c; /* or, line 46 */
+        if (!(out_grouping_b(z, g_v_WXY, 89, 121))) goto lab1;
+        if (!(in_grouping_b(z, g_v, 97, 121))) goto lab1;
+        if (!(out_grouping_b(z, g_v, 97, 121))) goto lab1;
+        goto lab0;
+    lab1:
+        z->c = z->l - m;
+        if (!(out_grouping_b(z, g_v, 97, 121))) return 0;
+        if (!(in_grouping_b(z, g_v, 97, 121))) return 0;
+        if (z->c > z->lb) return 0; /* atlimit, line 47 */
+    }
+lab0:
+    return 1;
+}
+
+static int r_R1(struct SN_env * z) {
+    if (!(z->I[0] <= z->c)) return 0;
+    return 1;
+}
+
+static int r_R2(struct SN_env * z) {
+    if (!(z->I[1] <= z->c)) return 0;
+    return 1;
+}
+
+static int r_Step_1a(struct SN_env * z) {
+    int among_var;
+    z->ket = z->c; /* [, line 54 */
+    among_var = find_among_b(z, a_1, 6); /* substring, line 54 */
+    if (!(among_var)) return 0;
+    z->bra = z->c; /* ], line 54 */
+    switch(among_var) {
+        case 0: return 0;
+        case 1:
+            slice_from_s(z, 2, s_4); /* <-, line 55 */
+            break;
+        case 2:
+            {   int m = z->l - z->c; /* or, line 57 */
+                if (z->c <= z->lb) goto lab1;
+                z->c--; /* next, line 57 */
+                if (z->c > z->lb) goto lab1; /* atlimit, line 57 */
+                slice_from_s(z, 2, s_5); /* <-, line 57 */
+                goto lab0;
+            lab1:
+                z->c = z->l - m;
+                slice_from_s(z, 1, s_6); /* <-, line 57 */
+            }
+        lab0:
+            break;
+        case 3:
+            if (z->c <= z->lb) return 0;
+            z->c--; /* next, line 58 */
+            while(1) { /* gopast, line 58 */
+                if (!(in_grouping_b(z, g_v, 97, 121))) goto lab2;
+                break;
+            lab2:
+                if (z->c <= z->lb) return 0;
+                z->c--;
+            }
+            slice_del(z); /* delete, line 58 */
+            break;
+    }
+    return 1;
+}
+
+static int r_Step_1b(struct SN_env * z) {
+    int among_var;
+    z->ket = z->c; /* [, line 64 */
+    among_var = find_among_b(z, a_3, 6); /* substring, line 64 */
+    if (!(among_var)) return 0;
+    z->bra = z->c; /* ], line 64 */
+    switch(among_var) {
+        case 0: return 0;
+        case 1:
+            if (!r_R1(z)) return 0; /* call R1, line 66 */
+            slice_from_s(z, 2, s_7); /* <-, line 66 */
+            break;
+        case 2:
+            {   int m_test = z->l - z->c; /* test, line 69 */
+                while(1) { /* gopast, line 69 */
+                    if (!(in_grouping_b(z, g_v, 97, 121))) goto lab0;
+                    break;
+                lab0:
+                    if (z->c <= z->lb) return 0;
+                    z->c--;
+                }
+                z->c = z->l - m_test;
+            }
+            slice_del(z); /* delete, line 69 */
+            {   int m_test = z->l - z->c; /* test, line 70 */
+                among_var = find_among_b(z, a_2, 13); /* substring, line 70 */
+                if (!(among_var)) return 0;
+                z->c = z->l - m_test;
+            }
+            switch(among_var) {
+                case 0: return 0;
+                case 1:
+                    {   int c = z->c;
+                        insert_s(z, z->c, z->c, 1, s_8); /* <+, line 72 */
+                        z->c = c;
+                    }
+                    break;
+                case 2:
+                    z->ket = z->c; /* [, line 75 */
+                    if (z->c <= z->lb) return 0;
+                    z->c--; /* next, line 75 */
+                    z->bra = z->c; /* ], line 75 */
+                    slice_del(z); /* delete, line 75 */
+                    break;
+                case 3:
+                    if (z->c != z->I[0]) return 0; /* atmark, line 76 */
+                    {   int m_test = z->l - z->c; /* test, line 76 */
+                        if (!r_shortv(z)) return 0; /* call shortv, line 76 */
+                        z->c = z->l - m_test;
+                    }
+                    {   int c = z->c;
+                        insert_s(z, z->c, z->c, 1, s_9); /* <+, line 76 */
+                        z->c = c;
+                    }
+                    break;
+            }
+            break;
+    }
+    return 1;
+}
+
+static int r_Step_1c(struct SN_env * z) {
+    z->ket = z->c; /* [, line 83 */
+    {   int m = z->l - z->c; /* or, line 83 */
+        if (!(eq_s_b(z, 1, s_10))) goto lab1;
+        goto lab0;
+    lab1:
+        z->c = z->l - m;
+        if (!(eq_s_b(z, 1, s_11))) return 0;
+    }
+lab0:
+    z->bra = z->c; /* ], line 83 */
+    if (!(out_grouping_b(z, g_v, 97, 121))) return 0;
+    {   int m = z->l - z->c; /* not, line 84 */
+        if (z->c > z->lb) goto lab2; /* atlimit, line 84 */
+        return 0;
+    lab2:
+        z->c = z->l - m;
+    }
+    slice_from_s(z, 1, s_12); /* <-, line 85 */
+    return 1;
+}
+
+static int r_Step_2(struct SN_env * z) {
+    int among_var;
+    z->ket = z->c; /* [, line 89 */
+    among_var = find_among_b(z, a_4, 24); /* substring, line 89 */
+    if (!(among_var)) return 0;
+    z->bra = z->c; /* ], line 89 */
+    if (!r_R1(z)) return 0; /* call R1, line 89 */
+    switch(among_var) {
+        case 0: return 0;
+        case 1:
+            slice_from_s(z, 4, s_13); /* <-, line 90 */
+            break;
+        case 2:
+            slice_from_s(z, 4, s_14); /* <-, line 91 */
+            break;
+        case 3:
+            slice_from_s(z, 4, s_15); /* <-, line 92 */
+            break;
+        case 4:
+            slice_from_s(z, 4, s_16); /* <-, line 93 */
+            break;
+        case 5:
+            slice_from_s(z, 3, s_17); /* <-, line 94 */
+            break;
+        case 6:
+            slice_from_s(z, 3, s_18); /* <-, line 96 */
+            break;
+        case 7:
+            slice_from_s(z, 3, s_19); /* <-, line 98 */
+            break;
+        case 8:
+            slice_from_s(z, 2, s_20); /* <-, line 100 */
+            break;
+        case 9:
+            slice_from_s(z, 3, s_21); /* <-, line 101 */
+            break;
+        case 10:
+            slice_from_s(z, 3, s_22); /* <-, line 103 */
+            break;
+        case 11:
+            slice_from_s(z, 3, s_23); /* <-, line 105 */
+            break;
+        case 12:
+            slice_from_s(z, 3, s_24); /* <-, line 107 */
+            break;
+        case 13:
+            if (!(eq_s_b(z, 1, s_25))) return 0;
+            slice_from_s(z, 2, s_26); /* <-, line 108 */
+            break;
+        case 14:
+            slice_from_s(z, 3, s_27); /* <-, line 109 */
+            break;
+        case 15:
+            slice_from_s(z, 4, s_28); /* <-, line 110 */
+            break;
+        case 16:
+            if (!(in_grouping_b(z, g_valid_LI, 99, 116))) return 0;
+            slice_del(z); /* delete, line 111 */
+            break;
+    }
+    return 1;
+}
+
+static int r_Step_3(struct SN_env * z) {
+    int among_var;
+    z->ket = z->c; /* [, line 116 */
+    among_var = find_among_b(z, a_5, 9); /* substring, line 116 */
+    if (!(among_var)) return 0;
+    z->bra = z->c; /* ], line 116 */
+    if (!r_R1(z)) return 0; /* call R1, line 116 */
+    switch(among_var) {
+        case 0: return 0;
+        case 1:
+            slice_from_s(z, 4, s_29); /* <-, line 117 */
+            break;
+        case 2:
+            slice_from_s(z, 3, s_30); /* <-, line 118 */
+            break;
+        case 3:
+            slice_from_s(z, 2, s_31); /* <-, line 119 */
+            break;
+        case 4:
+            slice_from_s(z, 2, s_32); /* <-, line 121 */
+            break;
+        case 5:
+            slice_del(z); /* delete, line 123 */
+            break;
+        case 6:
+            if (!r_R2(z)) return 0; /* call R2, line 125 */
+            slice_del(z); /* delete, line 125 */
+            break;
+    }
+    return 1;
+}
+
+static int r_Step_4(struct SN_env * z) {
+    int among_var;
+    z->ket = z->c; /* [, line 130 */
+    among_var = find_among_b(z, a_6, 18); /* substring, line 130 */
+    if (!(among_var)) return 0;
+    z->bra = z->c; /* ], line 130 */
+    if (!r_R2(z)) return 0; /* call R2, line 130 */
+    switch(among_var) {
+        case 0: return 0;
+        case 1:
+            slice_del(z); /* delete, line 133 */
+            break;
+        case 2:
+            {   int m = z->l - z->c; /* or, line 134 */
+                if (!(eq_s_b(z, 1, s_33))) goto lab1;
+                goto lab0;
+            lab1:
+                z->c = z->l - m;
+                if (!(eq_s_b(z, 1, s_34))) return 0;
+            }
+        lab0:
+            slice_del(z); /* delete, line 134 */
+            break;
+    }
+    return 1;
+}
+
+static int r_Step_5(struct SN_env * z) {
+    int among_var;
+    z->ket = z->c; /* [, line 139 */
+    among_var = find_among_b(z, a_7, 2); /* substring, line 139 */
+    if (!(among_var)) return 0;
+    z->bra = z->c; /* ], line 139 */
+    switch(among_var) {
+        case 0: return 0;
+        case 1:
+            {   int m = z->l - z->c; /* or, line 140 */
+                if (!r_R2(z)) goto lab1; /* call R2, line 140 */
+                goto lab0;
+            lab1:
+                z->c = z->l - m;
+                if (!r_R1(z)) return 0; /* call R1, line 140 */
+                {   int m = z->l - z->c; /* not, line 140 */
+                    if (!r_shortv(z)) goto lab2; /* call shortv, line 140 */
+                    return 0;
+                lab2:
+                    z->c = z->l - m;
+                }
+            }
+        lab0:
+            slice_del(z); /* delete, line 140 */
+            break;
+        case 2:
+            if (!r_R2(z)) return 0; /* call R2, line 141 */
+            if (!(eq_s_b(z, 1, s_35))) return 0;
+            slice_del(z); /* delete, line 141 */
+            break;
+    }
+    return 1;
+}
+
+static int r_exception2(struct SN_env * z) {
+    z->ket = z->c; /* [, line 147 */
+    if (!(find_among_b(z, a_8, 8))) return 0; /* substring, line 147 */
+    z->bra = z->c; /* ], line 147 */
+    if (z->c > z->lb) return 0; /* atlimit, line 147 */
+    return 1;
+}
+
+static int r_exception1(struct SN_env * z) {
+    int among_var;
+    z->bra = z->c; /* [, line 159 */
+    among_var = find_among(z, a_9, 18); /* substring, line 159 */
+    if (!(among_var)) return 0;
+    z->ket = z->c; /* ], line 159 */
+    if (z->c < z->l) return 0; /* atlimit, line 159 */
+    switch(among_var) {
+        case 0: return 0;
+        case 1:
+            slice_from_s(z, 3, s_36); /* <-, line 163 */
+            break;
+        case 2:
+            slice_from_s(z, 3, s_37); /* <-, line 164 */
+            break;
+        case 3:
+            slice_from_s(z, 3, s_38); /* <-, line 165 */
+            break;
+        case 4:
+            slice_from_s(z, 3, s_39); /* <-, line 166 */
+            break;
+        case 5:
+            slice_from_s(z, 3, s_40); /* <-, line 167 */
+            break;
+        case 6:
+            slice_from_s(z, 3, s_41); /* <-, line 171 */
+            break;
+        case 7:
+            slice_from_s(z, 5, s_42); /* <-, line 172 */
+            break;
+        case 8:
+            slice_from_s(z, 4, s_43); /* <-, line 173 */
+            break;
+        case 9:
+            slice_from_s(z, 5, s_44); /* <-, line 174 */
+            break;
+        case 10:
+            slice_from_s(z, 4, s_45); /* <-, line 175 */
+            break;
+        case 11:
+            slice_from_s(z, 5, s_46); /* <-, line 176 */
+            break;
+    }
+    return 1;
+}
+
+static int r_postlude(struct SN_env * z) {
+    if (!(z->B[0])) return 0; /* Boolean test Y_found, line 192 */
+    while(1) { /* repeat, line 192 */
+        int c = z->c;
+        while(1) { /* goto, line 192 */
+            int c = z->c;
+            z->bra = z->c; /* [, line 192 */
+            if (!(eq_s(z, 1, s_47))) goto lab1;
+            z->ket = z->c; /* ], line 192 */
+            z->c = c;
+            break;
+        lab1:
+            z->c = c;
+            if (z->c >= z->l) goto lab0;
+            z->c++;
+        }
+        slice_from_s(z, 1, s_48); /* <-, line 192 */
+        continue;
+    lab0:
+        z->c = c;
+        break;
+    }
+    return 1;
+}
+
+extern int english_stem(struct SN_env * z) {
+    {   int c = z->c; /* or, line 196 */
+        if (!r_exception1(z)) goto lab1; /* call exception1, line 196 */
+        goto lab0;
+    lab1:
+        z->c = c;
+        {   int c_test = z->c; /* test, line 198 */
+            {   int c = z->c + 3;
+                if (0 > c || c > z->l) return 0;
+                z->c = c; /* hop, line 198 */
+            }
+            z->c = c_test;
+        }
+        {   int c = z->c; /* do, line 199 */
+            if (!r_prelude(z)) goto lab2; /* call prelude, line 199 */
+        lab2:
+            z->c = c;
+        }
+        {   int c = z->c; /* do, line 200 */
+            if (!r_mark_regions(z)) goto lab3; /* call mark_regions, line 200 */
+        lab3:
+            z->c = c;
+        }
+        z->lb = z->c; z->c = z->l; /* backwards, line 201 */
+
+        {   int m = z->l - z->c; /* do, line 203 */
+            if (!r_Step_1a(z)) goto lab4; /* call Step_1a, line 203 */
+        lab4:
+            z->c = z->l - m;
+        }
+        {   int m = z->l - z->c; /* or, line 205 */
+            if (!r_exception2(z)) goto lab6; /* call exception2, line 205 */
+            goto lab5;
+        lab6:
+            z->c = z->l - m;
+            {   int m = z->l - z->c; /* do, line 207 */
+                if (!r_Step_1b(z)) goto lab7; /* call Step_1b, line 207 */
+            lab7:
+                z->c = z->l - m;
+            }
+            {   int m = z->l - z->c; /* do, line 208 */
+                if (!r_Step_1c(z)) goto lab8; /* call Step_1c, line 208 */
+            lab8:
+                z->c = z->l - m;
+            }
+            {   int m = z->l - z->c; /* do, line 210 */
+                if (!r_Step_2(z)) goto lab9; /* call Step_2, line 210 */
+            lab9:
+                z->c = z->l - m;
+            }
+            {   int m = z->l - z->c; /* do, line 211 */
+                if (!r_Step_3(z)) goto lab10; /* call Step_3, line 211 */
+            lab10:
+                z->c = z->l - m;
+            }
+            {   int m = z->l - z->c; /* do, line 212 */
+                if (!r_Step_4(z)) goto lab11; /* call Step_4, line 212 */
+            lab11:
+                z->c = z->l - m;
+            }
+            {   int m = z->l - z->c; /* do, line 214 */
+                if (!r_Step_5(z)) goto lab12; /* call Step_5, line 214 */
+            lab12:
+                z->c = z->l - m;
+            }
+        }
+    lab5:
+        z->c = z->lb;
+        {   int c = z->c; /* do, line 217 */
+            if (!r_postlude(z)) goto lab13; /* call postlude, line 217 */
+        lab13:
+            z->c = c;
+        }
+    }
+lab0:
+    return 1;
+}
+
+extern struct SN_env * english_create_env(void) { return SN_create_env(0, 2, 1); }
+
+extern void english_close_env(struct SN_env * z) { SN_close_env(z); }
+
diff --git a/contrib/tsearch2/snowball/english_stem.h b/contrib/tsearch2/snowball/english_stem.h

new file mode 100644 (file)

index 0000000..bfefcd5
--- /dev/null
+++ b/contrib/tsearch2/snowball/english_stem.h
@@ -0,0 +1,8 @@
+
+/* This file was generated automatically by the Snowball to ANSI C compiler */
+
+extern struct SN_env * english_create_env(void);
+extern void english_close_env(struct SN_env * z);
+
+extern int english_stem(struct SN_env * z);
+
diff --git a/contrib/tsearch2/snowball/header.h b/contrib/tsearch2/snowball/header.h

new file mode 100644 (file)

index 0000000..aaec3ae
--- /dev/null
+++ b/contrib/tsearch2/snowball/header.h
@@ -0,0 +1,57 @@
+
+#include 
+
+#include "api.h"
+
+#define MAXINT INT_MAX
+#define MININT INT_MIN
+
+#define HEAD 2*sizeof(int)
+
+#define SIZE(p)        ((int *)(p))[-1]
+#define SET_SIZE(p, n) ((int *)(p))[-1] = n
+#define CAPACITY(p)    ((int *)(p))[-2]
+
+struct among
+{   int s_size;     /* number of chars in string */
+    symbol * s;       /* search string */
+    int substring_i;/* index to longest matching substring */
+    int result;     /* result of the lookup */
+    int (* function)(struct SN_env *);
+};
+
+extern symbol * create_s(void);
+extern void lose_s(symbol * p);
+
+extern int in_grouping(struct SN_env * z, unsigned char * s, int min, int max);
+extern int in_grouping_b(struct SN_env * z, unsigned char * s, int min, int max);
+extern int out_grouping(struct SN_env * z, unsigned char * s, int min, int max);
+extern int out_grouping_b(struct SN_env * z, unsigned char * s, int min, int max);
+
+extern int in_range(struct SN_env * z, int min, int max);
+extern int in_range_b(struct SN_env * z, int min, int max);
+extern int out_range(struct SN_env * z, int min, int max);
+extern int out_range_b(struct SN_env * z, int min, int max);
+
+extern int eq_s(struct SN_env * z, int s_size, symbol * s);
+extern int eq_s_b(struct SN_env * z, int s_size, symbol * s);
+extern int eq_v(struct SN_env * z, symbol * p);
+extern int eq_v_b(struct SN_env * z, symbol * p);
+
+extern int find_among(struct SN_env * z, struct among * v, int v_size);
+extern int find_among_b(struct SN_env * z, struct among * v, int v_size);
+
+extern symbol * increase_size(symbol * p, int n);
+extern int replace_s(struct SN_env * z, int c_bra, int c_ket, int s_size, const symbol * s);
+extern void slice_from_s(struct SN_env * z, int s_size, symbol * s);
+extern void slice_from_v(struct SN_env * z, symbol * p);
+extern void slice_del(struct SN_env * z);
+
+extern void insert_s(struct SN_env * z, int bra, int ket, int s_size, symbol * s);
+extern void insert_v(struct SN_env * z, int bra, int ket, symbol * p);
+
+extern symbol * slice_to(struct SN_env * z, symbol * p);
+extern symbol * assign_to(struct SN_env * z, symbol * p);
+
+extern void debug(struct SN_env * z, int number, int line_count);
+
diff --git a/contrib/tsearch2/snowball/russian_stem.c b/contrib/tsearch2/snowball/russian_stem.c

new file mode 100644 (file)

index 0000000..14fd491
--- /dev/null
+++ b/contrib/tsearch2/snowball/russian_stem.c
@@ -0,0 +1,626 @@
+
+/* This file was generated automatically by the Snowball to ANSI C compiler */
+
+#include "header.h"
+
+extern int russian_stem(struct SN_env * z);
+static int r_tidy_up(struct SN_env * z);
+static int r_derivational(struct SN_env * z);
+static int r_noun(struct SN_env * z);
+static int r_verb(struct SN_env * z);
+static int r_reflexive(struct SN_env * z);
+static int r_adjectival(struct SN_env * z);
+static int r_adjective(struct SN_env * z);
+static int r_perfective_gerund(struct SN_env * z);
+static int r_R2(struct SN_env * z);
+static int r_mark_regions(struct SN_env * z);
+
+extern struct SN_env * russian_create_env(void);
+extern void russian_close_env(struct SN_env * z);
+
+static symbol s_0_0[3] = { 215, 219, 201 };
+static symbol s_0_1[4] = { 201, 215, 219, 201 };
+static symbol s_0_2[4] = { 217, 215, 219, 201 };
+static symbol s_0_3[1] = { 215 };
+static symbol s_0_4[2] = { 201, 215 };
+static symbol s_0_5[2] = { 217, 215 };
+static symbol s_0_6[5] = { 215, 219, 201, 211, 216 };
+static symbol s_0_7[6] = { 201, 215, 219, 201, 211, 216 };
+static symbol s_0_8[6] = { 217, 215, 219, 201, 211, 216 };
+
+static struct among a_0[9] =
+{
+/*  0 */ { 3, s_0_0, -1, 1, 0},
+/*  1 */ { 4, s_0_1, 0, 2, 0},
+/*  2 */ { 4, s_0_2, 0, 2, 0},
+/*  3 */ { 1, s_0_3, -1, 1, 0},
+/*  4 */ { 2, s_0_4, 3, 2, 0},
+/*  5 */ { 2, s_0_5, 3, 2, 0},
+/*  6 */ { 5, s_0_6, -1, 1, 0},
+/*  7 */ { 6, s_0_7, 6, 2, 0},
+/*  8 */ { 6, s_0_8, 6, 2, 0}
+};
+
+static symbol s_1_0[2] = { 192, 192 };
+static symbol s_1_1[2] = { 197, 192 };
+static symbol s_1_2[2] = { 207, 192 };
+static symbol s_1_3[2] = { 213, 192 };
+static symbol s_1_4[2] = { 197, 197 };
+static symbol s_1_5[2] = { 201, 197 };
+static symbol s_1_6[2] = { 207, 197 };
+static symbol s_1_7[2] = { 217, 197 };
+static symbol s_1_8[2] = { 201, 200 };
+static symbol s_1_9[2] = { 217, 200 };
+static symbol s_1_10[3] = { 201, 205, 201 };
+static symbol s_1_11[3] = { 217, 205, 201 };
+static symbol s_1_12[2] = { 197, 202 };
+static symbol s_1_13[2] = { 201, 202 };
+static symbol s_1_14[2] = { 207, 202 };
+static symbol s_1_15[2] = { 217, 202 };
+static symbol s_1_16[2] = { 197, 205 };
+static symbol s_1_17[2] = { 201, 205 };
+static symbol s_1_18[2] = { 207, 205 };
+static symbol s_1_19[2] = { 217, 205 };
+static symbol s_1_20[3] = { 197, 199, 207 };
+static symbol s_1_21[3] = { 207, 199, 207 };
+static symbol s_1_22[2] = { 193, 209 };
+static symbol s_1_23[2] = { 209, 209 };
+static symbol s_1_24[3] = { 197, 205, 213 };
+static symbol s_1_25[3] = { 207, 205, 213 };
+
+static struct among a_1[26] =
+{
+/*  0 */ { 2, s_1_0, -1, 1, 0},
+/*  1 */ { 2, s_1_1, -1, 1, 0},
+/*  2 */ { 2, s_1_2, -1, 1, 0},
+/*  3 */ { 2, s_1_3, -1, 1, 0},
+/*  4 */ { 2, s_1_4, -1, 1, 0},
+/*  5 */ { 2, s_1_5, -1, 1, 0},
+/*  6 */ { 2, s_1_6, -1, 1, 0},
+/*  7 */ { 2, s_1_7, -1, 1, 0},
+/*  8 */ { 2, s_1_8, -1, 1, 0},
+/*  9 */ { 2, s_1_9, -1, 1, 0},
+/* 10 */ { 3, s_1_10, -1, 1, 0},
+/* 11 */ { 3, s_1_11, -1, 1, 0},
+/* 12 */ { 2, s_1_12, -1, 1, 0},
+/* 13 */ { 2, s_1_13, -1, 1, 0},
+/* 14 */ { 2, s_1_14, -1, 1, 0},
+/* 15 */ { 2, s_1_15, -1, 1, 0},
+/* 16 */ { 2, s_1_16, -1, 1, 0},
+/* 17 */ { 2, s_1_17, -1, 1, 0},
+/* 18 */ { 2, s_1_18, -1, 1, 0},
+/* 19 */ { 2, s_1_19, -1, 1, 0},
+/* 20 */ { 3, s_1_20, -1, 1, 0},
+/* 21 */ { 3, s_1_21, -1, 1, 0},
+/* 22 */ { 2, s_1_22, -1, 1, 0},
+/* 23 */ { 2, s_1_23, -1, 1, 0},
+/* 24 */ { 3, s_1_24, -1, 1, 0},
+/* 25 */ { 3, s_1_25, -1, 1, 0}
+};
+
+static symbol s_2_0[2] = { 197, 205 };
+static symbol s_2_1[2] = { 206, 206 };
+static symbol s_2_2[2] = { 215, 219 };
+static symbol s_2_3[3] = { 201, 215, 219 };
+static symbol s_2_4[3] = { 217, 215, 219 };
+static symbol s_2_5[1] = { 221 };
+static symbol s_2_6[2] = { 192, 221 };
+static symbol s_2_7[3] = { 213, 192, 221 };
+
+static struct among a_2[8] =
+{
+/*  0 */ { 2, s_2_0, -1, 1, 0},
+/*  1 */ { 2, s_2_1, -1, 1, 0},
+/*  2 */ { 2, s_2_2, -1, 1, 0},
+/*  3 */ { 3, s_2_3, 2, 2, 0},
+/*  4 */ { 3, s_2_4, 2, 2, 0},
+/*  5 */ { 1, s_2_5, -1, 1, 0},
+/*  6 */ { 2, s_2_6, 5, 1, 0},
+/*  7 */ { 3, s_2_7, 6, 2, 0}
+};
+
+static symbol s_3_0[2] = { 211, 209 };
+static symbol s_3_1[2] = { 211, 216 };
+
+static struct among a_3[2] =
+{
+/*  0 */ { 2, s_3_0, -1, 1, 0},
+/*  1 */ { 2, s_3_1, -1, 1, 0}
+};
+
+static symbol s_4_0[1] = { 192 };
+static symbol s_4_1[2] = { 213, 192 };
+static symbol s_4_2[2] = { 204, 193 };
+static symbol s_4_3[3] = { 201, 204, 193 };
+static symbol s_4_4[3] = { 217, 204, 193 };
+static symbol s_4_5[2] = { 206, 193 };
+static symbol s_4_6[3] = { 197, 206, 193 };
+static symbol s_4_7[3] = { 197, 212, 197 };
+static symbol s_4_8[3] = { 201, 212, 197 };
+static symbol s_4_9[3] = { 202, 212, 197 };
+static symbol s_4_10[4] = { 197, 202, 212, 197 };
+static symbol s_4_11[4] = { 213, 202, 212, 197 };
+static symbol s_4_12[2] = { 204, 201 };
+static symbol s_4_13[3] = { 201, 204, 201 };
+static symbol s_4_14[3] = { 217, 204, 201 };
+static symbol s_4_15[1] = { 202 };
+static symbol s_4_16[2] = { 197, 202 };
+static symbol s_4_17[2] = { 213, 202 };
+static symbol s_4_18[1] = { 204 };
+static symbol s_4_19[2] = { 201, 204 };
+static symbol s_4_20[2] = { 217, 204 };
+static symbol s_4_21[2] = { 197, 205 };
+static symbol s_4_22[2] = { 201, 205 };
+static symbol s_4_23[2] = { 217, 205 };
+static symbol s_4_24[1] = { 206 };
+static symbol s_4_25[2] = { 197, 206 };
+static symbol s_4_26[2] = { 204, 207 };
+static symbol s_4_27[3] = { 201, 204, 207 };
+static symbol s_4_28[3] = { 217, 204, 207 };
+static symbol s_4_29[2] = { 206, 207 };
+static symbol s_4_30[3] = { 197, 206, 207 };
+static symbol s_4_31[3] = { 206, 206, 207 };
+static symbol s_4_32[2] = { 192, 212 };
+static symbol s_4_33[3] = { 213, 192, 212 };
+static symbol s_4_34[2] = { 197, 212 };
+static symbol s_4_35[3] = { 213, 197, 212 };
+static symbol s_4_36[2] = { 201, 212 };
+static symbol s_4_37[2] = { 209, 212 };
+static symbol s_4_38[2] = { 217, 212 };
+static symbol s_4_39[2] = { 212, 216 };
+static symbol s_4_40[3] = { 201, 212, 216 };
+static symbol s_4_41[3] = { 217, 212, 216 };
+static symbol s_4_42[3] = { 197, 219, 216 };
+static symbol s_4_43[3] = { 201, 219, 216 };
+static symbol s_4_44[2] = { 206, 217 };
+static symbol s_4_45[3] = { 197, 206, 217 };
+
+static struct among a_4[46] =
+{
+/*  0 */ { 1, s_4_0, -1, 2, 0},
+/*  1 */ { 2, s_4_1, 0, 2, 0},
+/*  2 */ { 2, s_4_2, -1, 1, 0},
+/*  3 */ { 3, s_4_3, 2, 2, 0},
+/*  4 */ { 3, s_4_4, 2, 2, 0},
+/*  5 */ { 2, s_4_5, -1, 1, 0},
+/*  6 */ { 3, s_4_6, 5, 2, 0},
+/*  7 */ { 3, s_4_7, -1, 1, 0},
+/*  8 */ { 3, s_4_8, -1, 2, 0},
+/*  9 */ { 3, s_4_9, -1, 1, 0},
+/* 10 */ { 4, s_4_10, 9, 2, 0},
+/* 11 */ { 4, s_4_11, 9, 2, 0},
+/* 12 */ { 2, s_4_12, -1, 1, 0},
+/* 13 */ { 3, s_4_13, 12, 2, 0},
+/* 14 */ { 3, s_4_14, 12, 2, 0},
+/* 15 */ { 1, s_4_15, -1, 1, 0},
+/* 16 */ { 2, s_4_16, 15, 2, 0},
+/* 17 */ { 2, s_4_17, 15, 2, 0},
+/* 18 */ { 1, s_4_18, -1, 1, 0},
+/* 19 */ { 2, s_4_19, 18, 2, 0},
+/* 20 */ { 2, s_4_20, 18, 2, 0},
+/* 21 */ { 2, s_4_21, -1, 1, 0},
+/* 22 */ { 2, s_4_22, -1, 2, 0},
+/* 23 */ { 2, s_4_23, -1, 2, 0},
+/* 24 */ { 1, s_4_24, -1, 1, 0},
+/* 25 */ { 2, s_4_25, 24, 2, 0},
+/* 26 */ { 2, s_4_26, -1, 1, 0},
+/* 27 */ { 3, s_4_27, 26, 2, 0},
+/* 28 */ { 3, s_4_28, 26, 2, 0},
+/* 29 */ { 2, s_4_29, -1, 1, 0},
+/* 30 */ { 3, s_4_30, 29, 2, 0},
+/* 31 */ { 3, s_4_31, 29, 1, 0},
+/* 32 */ { 2, s_4_32, -1, 1, 0},
+/* 33 */ { 3, s_4_33, 32, 2, 0},
+/* 34 */ { 2, s_4_34, -1, 1, 0},
+/* 35 */ { 3, s_4_35, 34, 2, 0},
+/* 36 */ { 2, s_4_36, -1, 2, 0},
+/* 37 */ { 2, s_4_37, -1, 2, 0},
+/* 38 */ { 2, s_4_38, -1, 2, 0},
+/* 39 */ { 2, s_4_39, -1, 1, 0},
+/* 40 */ { 3, s_4_40, 39, 2, 0},
+/* 41 */ { 3, s_4_41, 39, 2, 0},
+/* 42 */ { 3, s_4_42, -1, 1, 0},
+/* 43 */ { 3, s_4_43, -1, 2, 0},
+/* 44 */ { 2, s_4_44, -1, 1, 0},
+/* 45 */ { 3, s_4_45, 44, 2, 0}
+};
+
+static symbol s_5_0[1] = { 192 };
+static symbol s_5_1[2] = { 201, 192 };
+static symbol s_5_2[2] = { 216, 192 };
+static symbol s_5_3[1] = { 193 };
+static symbol s_5_4[1] = { 197 };
+static symbol s_5_5[2] = { 201, 197 };
+static symbol s_5_6[2] = { 216, 197 };
+static symbol s_5_7[2] = { 193, 200 };
+static symbol s_5_8[2] = { 209, 200 };
+static symbol s_5_9[3] = { 201, 209, 200 };
+static symbol s_5_10[1] = { 201 };
+static symbol s_5_11[2] = { 197, 201 };
+static symbol s_5_12[2] = { 201, 201 };
+static symbol s_5_13[3] = { 193, 205, 201 };
+static symbol s_5_14[3] = { 209, 205, 201 };
+static symbol s_5_15[4] = { 201, 209, 205, 201 };
+static symbol s_5_16[1] = { 202 };
+static symbol s_5_17[2] = { 197, 202 };
+static symbol s_5_18[3] = { 201, 197, 202 };
+static symbol s_5_19[2] = { 201, 202 };
+static symbol s_5_20[2] = { 207, 202 };
+static symbol s_5_21[2] = { 193, 205 };
+static symbol s_5_22[2] = { 197, 205 };
+static symbol s_5_23[3] = { 201, 197, 205 };
+static symbol s_5_24[2] = { 207, 205 };
+static symbol s_5_25[2] = { 209, 205 };
+static symbol s_5_26[3] = { 201, 209, 205 };
+static symbol s_5_27[1] = { 207 };
+static symbol s_5_28[1] = { 209 };
+static symbol s_5_29[2] = { 201, 209 };
+static symbol s_5_30[2] = { 216, 209 };
+static symbol s_5_31[1] = { 213 };
+static symbol s_5_32[2] = { 197, 215 };
+static symbol s_5_33[2] = { 207, 215 };
+static symbol s_5_34[1] = { 216 };
+static symbol s_5_35[1] = { 217 };
+
+static struct among a_5[36] =
+{
+/*  0 */ { 1, s_5_0, -1, 1, 0},
+/*  1 */ { 2, s_5_1, 0, 1, 0},
+/*  2 */ { 2, s_5_2, 0, 1, 0},
+/*  3 */ { 1, s_5_3, -1, 1, 0},
+/*  4 */ { 1, s_5_4, -1, 1, 0},
+/*  5 */ { 2, s_5_5, 4, 1, 0},
+/*  6 */ { 2, s_5_6, 4, 1, 0},
+/*  7 */ { 2, s_5_7, -1, 1, 0},
+/*  8 */ { 2, s_5_8, -1, 1, 0},
+/*  9 */ { 3, s_5_9, 8, 1, 0},
+/* 10 */ { 1, s_5_10, -1, 1, 0},
+/* 11 */ { 2, s_5_11, 10, 1, 0},
+/* 12 */ { 2, s_5_12, 10, 1, 0},
+/* 13 */ { 3, s_5_13, 10, 1, 0},
+/* 14 */ { 3, s_5_14, 10, 1, 0},
+/* 15 */ { 4, s_5_15, 14, 1, 0},
+/* 16 */ { 1, s_5_16, -1, 1, 0},
+/* 17 */ { 2, s_5_17, 16, 1, 0},
+/* 18 */ { 3, s_5_18, 17, 1, 0},
+/* 19 */ { 2, s_5_19, 16, 1, 0},
+/* 20 */ { 2, s_5_20, 16, 1, 0},
+/* 21 */ { 2, s_5_21, -1, 1, 0},
+/* 22 */ { 2, s_5_22, -1, 1, 0},
+/* 23 */ { 3, s_5_23, 22, 1, 0},
+/* 24 */ { 2, s_5_24, -1, 1, 0},
+/* 25 */ { 2, s_5_25, -1, 1, 0},
+/* 26 */ { 3, s_5_26, 25, 1, 0},
+/* 27 */ { 1, s_5_27, -1, 1, 0},
+/* 28 */ { 1, s_5_28, -1, 1, 0},
+/* 29 */ { 2, s_5_29, 28, 1, 0},
+/* 30 */ { 2, s_5_30, 28, 1, 0},
+/* 31 */ { 1, s_5_31, -1, 1, 0},
+/* 32 */ { 2, s_5_32, -1, 1, 0},
+/* 33 */ { 2, s_5_33, -1, 1, 0},
+/* 34 */ { 1, s_5_34, -1, 1, 0},
+/* 35 */ { 1, s_5_35, -1, 1, 0}
+};
+
+static symbol s_6_0[3] = { 207, 211, 212 };
+static symbol s_6_1[4] = { 207, 211, 212, 216 };
+
+static struct among a_6[2] =
+{
+/*  0 */ { 3, s_6_0, -1, 1, 0},
+/*  1 */ { 4, s_6_1, -1, 1, 0}
+};
+
+static symbol s_7_0[4] = { 197, 202, 219, 197 };
+static symbol s_7_1[1] = { 206 };
+static symbol s_7_2[1] = { 216 };
+static symbol s_7_3[3] = { 197, 202, 219 };
+
+static struct among a_7[4] =
+{
+/*  0 */ { 4, s_7_0, -1, 1, 0},
+/*  1 */ { 1, s_7_1, -1, 2, 0},
+/*  2 */ { 1, s_7_2, -1, 3, 0},
+/*  3 */ { 3, s_7_3, -1, 1, 0}
+};
+
+static unsigned char g_v[] = { 35, 130, 34, 18 };
+
+static symbol s_0[] = { 193 };
+static symbol s_1[] = { 209 };
+static symbol s_2[] = { 193 };
+static symbol s_3[] = { 209 };
+static symbol s_4[] = { 193 };
+static symbol s_5[] = { 209 };
+static symbol s_6[] = { 206 };
+static symbol s_7[] = { 206 };
+static symbol s_8[] = { 206 };
+static symbol s_9[] = { 201 };
+
+static int r_mark_regions(struct SN_env * z) {
+    z->I[0] = z->l;
+    z->I[1] = z->l;
+    {   int c = z->c; /* do, line 100 */
+        while(1) { /* gopast, line 101 */
+            if (!(in_grouping(z, g_v, 192, 220))) goto lab1;
+            break;
+        lab1:
+            if (z->c >= z->l) goto lab0;
+            z->c++;
+        }
+        z->I[0] = z->c; /* setmark pV, line 101 */
+        while(1) { /* gopast, line 101 */
+            if (!(out_grouping(z, g_v, 192, 220))) goto lab2;
+            break;
+        lab2:
+            if (z->c >= z->l) goto lab0;
+            z->c++;
+        }
+        while(1) { /* gopast, line 102 */
+            if (!(in_grouping(z, g_v, 192, 220))) goto lab3;
+            break;
+        lab3:
+            if (z->c >= z->l) goto lab0;
+            z->c++;
+        }
+        while(1) { /* gopast, line 102 */
+            if (!(out_grouping(z, g_v, 192, 220))) goto lab4;
+            break;
+        lab4:
+            if (z->c >= z->l) goto lab0;
+            z->c++;
+        }
+        z->I[1] = z->c; /* setmark p2, line 102 */
+    lab0:
+        z->c = c;
+    }
+    return 1;
+}
+
+static int r_R2(struct SN_env * z) {
+    if (!(z->I[1] <= z->c)) return 0;
+    return 1;
+}
+
+static int r_perfective_gerund(struct SN_env * z) {
+    int among_var;
+    z->ket = z->c; /* [, line 111 */
+    among_var = find_among_b(z, a_0, 9); /* substring, line 111 */
+    if (!(among_var)) return 0;
+    z->bra = z->c; /* ], line 111 */
+    switch(among_var) {
+        case 0: return 0;
+        case 1:
+            {   int m = z->l - z->c; /* or, line 115 */
+                if (!(eq_s_b(z, 1, s_0))) goto lab1;
+                goto lab0;
+            lab1:
+                z->c = z->l - m;
+                if (!(eq_s_b(z, 1, s_1))) return 0;
+            }
+        lab0:
+            slice_del(z); /* delete, line 115 */
+            break;
+        case 2:
+            slice_del(z); /* delete, line 122 */
+            break;
+    }
+    return 1;
+}
+
+static int r_adjective(struct SN_env * z) {
+    int among_var;
+    z->ket = z->c; /* [, line 127 */
+    among_var = find_among_b(z, a_1, 26); /* substring, line 127 */
+    if (!(among_var)) return 0;
+    z->bra = z->c; /* ], line 127 */
+    switch(among_var) {
+        case 0: return 0;
+        case 1:
+            slice_del(z); /* delete, line 136 */
+            break;
+    }
+    return 1;
+}
+
+static int r_adjectival(struct SN_env * z) {
+    int among_var;
+    if (!r_adjective(z)) return 0; /* call adjective, line 141 */
+    {   int m = z->l - z->c; /* try, line 148 */
+        z->ket = z->c; /* [, line 149 */
+        among_var = find_among_b(z, a_2, 8); /* substring, line 149 */
+        if (!(among_var)) { z->c = z->l - m; goto lab0; }
+        z->bra = z->c; /* ], line 149 */
+        switch(among_var) {
+            case 0: { z->c = z->l - m; goto lab0; }
+            case 1:
+                {   int m = z->l - z->c; /* or, line 154 */
+                    if (!(eq_s_b(z, 1, s_2))) goto lab2;
+                    goto lab1;
+                lab2:
+                    z->c = z->l - m;
+                    if (!(eq_s_b(z, 1, s_3))) { z->c = z->l - m; goto lab0; }
+                }
+            lab1:
+                slice_del(z); /* delete, line 154 */
+                break;
+            case 2:
+                slice_del(z); /* delete, line 161 */
+                break;
+        }
+    lab0:
+        ;
+    }
+    return 1;
+}
+
+static int r_reflexive(struct SN_env * z) {
+    int among_var;
+    z->ket = z->c; /* [, line 168 */
+    among_var = find_among_b(z, a_3, 2); /* substring, line 168 */
+    if (!(among_var)) return 0;
+    z->bra = z->c; /* ], line 168 */
+    switch(among_var) {
+        case 0: return 0;
+        case 1:
+            slice_del(z); /* delete, line 171 */
+            break;
+    }
+    return 1;
+}
+
+static int r_verb(struct SN_env * z) {
+    int among_var;
+    z->ket = z->c; /* [, line 176 */
+    among_var = find_among_b(z, a_4, 46); /* substring, line 176 */
+    if (!(among_var)) return 0;
+    z->bra = z->c; /* ], line 176 */
+    switch(among_var) {
+        case 0: return 0;
+        case 1:
+            {   int m = z->l - z->c; /* or, line 182 */
+                if (!(eq_s_b(z, 1, s_4))) goto lab1;
+                goto lab0;
+            lab1:
+                z->c = z->l - m;
+                if (!(eq_s_b(z, 1, s_5))) return 0;
+            }
+        lab0:
+            slice_del(z); /* delete, line 182 */
+            break;
+        case 2:
+            slice_del(z); /* delete, line 190 */
+            break;
+    }
+    return 1;
+}
+
+static int r_noun(struct SN_env * z) {
+    int among_var;
+    z->ket = z->c; /* [, line 199 */
+    among_var = find_among_b(z, a_5, 36); /* substring, line 199 */
+    if (!(among_var)) return 0;
+    z->bra = z->c; /* ], line 199 */
+    switch(among_var) {
+        case 0: return 0;
+        case 1:
+            slice_del(z); /* delete, line 206 */
+            break;
+    }
+    return 1;
+}
+
+static int r_derivational(struct SN_env * z) {
+    int among_var;
+    z->ket = z->c; /* [, line 215 */
+    among_var = find_among_b(z, a_6, 2); /* substring, line 215 */
+    if (!(among_var)) return 0;
+    z->bra = z->c; /* ], line 215 */
+    if (!r_R2(z)) return 0; /* call R2, line 215 */
+    switch(among_var) {
+        case 0: return 0;
+        case 1:
+            slice_del(z); /* delete, line 218 */
+            break;
+    }
+    return 1;
+}
+
+static int r_tidy_up(struct SN_env * z) {
+    int among_var;
+    z->ket = z->c; /* [, line 223 */
+    among_var = find_among_b(z, a_7, 4); /* substring, line 223 */
+    if (!(among_var)) return 0;
+    z->bra = z->c; /* ], line 223 */
+    switch(among_var) {
+        case 0: return 0;
+        case 1:
+            slice_del(z); /* delete, line 227 */
+            z->ket = z->c; /* [, line 228 */
+            if (!(eq_s_b(z, 1, s_6))) return 0;
+            z->bra = z->c; /* ], line 228 */
+            if (!(eq_s_b(z, 1, s_7))) return 0;
+            slice_del(z); /* delete, line 228 */
+            break;
+        case 2:
+            if (!(eq_s_b(z, 1, s_8))) return 0;
+            slice_del(z); /* delete, line 231 */
+            break;
+        case 3:
+            slice_del(z); /* delete, line 233 */
+            break;
+    }
+    return 1;
+}
+
+extern int russian_stem(struct SN_env * z) {
+    {   int c = z->c; /* do, line 240 */
+        if (!r_mark_regions(z)) goto lab0; /* call mark_regions, line 240 */
+    lab0:
+        z->c = c;
+    }
+    z->lb = z->c; z->c = z->l; /* backwards, line 241 */
+
+    {   int m = z->l - z->c; /* setlimit, line 241 */
+        int m3;
+        if (z->c < z->I[0]) return 0;
+        z->c = z->I[0]; /* tomark, line 241 */
+        m3 = z->lb; z->lb = z->c;
+        z->c = z->l - m;
+        {   int m = z->l - z->c; /* do, line 242 */
+            {   int m = z->l - z->c; /* or, line 243 */
+                if (!r_perfective_gerund(z)) goto lab3; /* call perfective_gerund, line 243 */
+                goto lab2;
+            lab3:
+                z->c = z->l - m;
+                {   int m = z->l - z->c; /* try, line 244 */
+                    if (!r_reflexive(z)) { z->c = z->l - m; goto lab4; } /* call reflexive, line 244 */
+                lab4:
+                    ;
+                }
+                {   int m = z->l - z->c; /* or, line 245 */
+                    if (!r_adjectival(z)) goto lab6; /* call adjectival, line 245 */
+                    goto lab5;
+                lab6:
+                    z->c = z->l - m;
+                    if (!r_verb(z)) goto lab7; /* call verb, line 245 */
+                    goto lab5;
+                lab7:
+                    z->c = z->l - m;
+                    if (!r_noun(z)) goto lab1; /* call noun, line 245 */
+                }
+            lab5:
+                ;
+            }
+        lab2:
+        lab1:
+            z->c = z->l - m;
+        }
+        {   int m = z->l - z->c; /* try, line 248 */
+            z->ket = z->c; /* [, line 248 */
+            if (!(eq_s_b(z, 1, s_9))) { z->c = z->l - m; goto lab8; }
+            z->bra = z->c; /* ], line 248 */
+            slice_del(z); /* delete, line 248 */
+        lab8:
+            ;
+        }
+        {   int m = z->l - z->c; /* do, line 251 */
+            if (!r_derivational(z)) goto lab9; /* call derivational, line 251 */
+        lab9:
+            z->c = z->l - m;
+        }
+        {   int m = z->l - z->c; /* do, line 252 */
+            if (!r_tidy_up(z)) goto lab10; /* call tidy_up, line 252 */
+        lab10:
+            z->c = z->l - m;
+        }
+        z->lb = m3;
+    }
+    z->c = z->lb;
+    return 1;
+}
+
+extern struct SN_env * russian_create_env(void) { return SN_create_env(0, 2, 0); }
+
+extern void russian_close_env(struct SN_env * z) { SN_close_env(z); }
+
diff --git a/contrib/tsearch2/snowball/russian_stem.h b/contrib/tsearch2/snowball/russian_stem.h

new file mode 100644 (file)

index 0000000..7dc26d4
--- /dev/null
+++ b/contrib/tsearch2/snowball/russian_stem.h
@@ -0,0 +1,8 @@
+
+/* This file was generated automatically by the Snowball to ANSI C compiler */
+
+extern struct SN_env * russian_create_env(void);
+extern void russian_close_env(struct SN_env * z);
+
+extern int russian_stem(struct SN_env * z);
+
diff --git a/contrib/tsearch2/snowball/utilities.c b/contrib/tsearch2/snowball/utilities.c

new file mode 100644 (file)

index 0000000..5dc7524
--- /dev/null
+++ b/contrib/tsearch2/snowball/utilities.c
@@ -0,0 +1,328 @@
+
+#include 
+#include 
+#include 
+
+#include "header.h"
+
+#define unless(C) if(!(C))
+
+#define CREATE_SIZE 1
+
+extern symbol * create_s(void)
+{   symbol * p = (symbol *) (HEAD + (char *) malloc(HEAD + (CREATE_SIZE + 1) * sizeof(symbol)));
+    CAPACITY(p) = CREATE_SIZE;
+    SET_SIZE(p, CREATE_SIZE);
+    return p;
+}
+
+extern void lose_s(symbol * p) { free((char *) p - HEAD); }
+
+extern int in_grouping(struct SN_env * z, unsigned char * s, int min, int max)
+{   if (z->c >= z->l) return 0;
+    {   int ch = z->p[z->c];
+        if
+        (ch > max || (ch -= min) < 0 ||
+         (s[ch >> 3] & (0X1 << (ch & 0X7))) == 0) return 0;
+    }
+    z->c++; return 1;
+}
+
+extern int in_grouping_b(struct SN_env * z, unsigned char * s, int min, int max)
+{   if (z->c <= z->lb) return 0;
+    {   int ch = z->p[z->c - 1];
+        if
+        (ch > max || (ch -= min) < 0 ||
+         (s[ch >> 3] & (0X1 << (ch & 0X7))) == 0) return 0;
+    }
+    z->c--; return 1;
+}
+
+extern int out_grouping(struct SN_env * z, unsigned char * s, int min, int max)
+{   if (z->c >= z->l) return 0;
+    {   int ch = z->p[z->c];
+        unless
+        (ch > max || (ch -= min) < 0 ||
+         (s[ch >> 3] & (0X1 << (ch & 0X7))) == 0) return 0;
+    }
+    z->c++; return 1;
+}
+
+extern int out_grouping_b(struct SN_env * z, unsigned char * s, int min, int max)
+{   if (z->c <= z->lb) return 0;
+    {   int ch = z->p[z->c - 1];
+        unless
+        (ch > max || (ch -= min) < 0 ||
+         (s[ch >> 3] & (0X1 << (ch & 0X7))) == 0) return 0;
+    }
+    z->c--; return 1;
+}
+
+
+extern int in_range(struct SN_env * z, int min, int max)
+{   if (z->c >= z->l) return 0;
+    {   int ch = z->p[z->c];
+        if
+        (ch > max || ch < min) return 0;
+    }
+    z->c++; return 1;
+}
+
+extern int in_range_b(struct SN_env * z, int min, int max)
+{   if (z->c <= z->lb) return 0;
+    {   int ch = z->p[z->c - 1];
+        if
+        (ch > max || ch < min) return 0;
+    }
+    z->c--; return 1;
+}
+
+extern int out_range(struct SN_env * z, int min, int max)
+{   if (z->c >= z->l) return 0;
+    {   int ch = z->p[z->c];
+        unless
+        (ch > max || ch < min) return 0;
+    }
+    z->c++; return 1;
+}
+
+extern int out_range_b(struct SN_env * z, int min, int max)
+{   if (z->c <= z->lb) return 0;
+    {   int ch = z->p[z->c - 1];
+        unless
+        (ch > max || ch < min) return 0;
+    }
+    z->c--; return 1;
+}
+
+extern int eq_s(struct SN_env * z, int s_size, symbol * s)
+{   if (z->l - z->c < s_size ||
+        memcmp(z->p + z->c, s, s_size * sizeof(symbol)) != 0) return 0;
+    z->c += s_size; return 1;
+}
+
+extern int eq_s_b(struct SN_env * z, int s_size, symbol * s)
+{   if (z->c - z->lb < s_size ||
+        memcmp(z->p + z->c - s_size, s, s_size * sizeof(symbol)) != 0) return 0;
+    z->c -= s_size; return 1;
+}
+
+extern int eq_v(struct SN_env * z, symbol * p)
+{   return eq_s(z, SIZE(p), p);
+}
+
+extern int eq_v_b(struct SN_env * z, symbol * p)
+{   return eq_s_b(z, SIZE(p), p);
+}
+
+extern int find_among(struct SN_env * z, struct among * v, int v_size)
+{
+    int i = 0;
+    int j = v_size;
+
+    int c = z->c; int l = z->l;
+    symbol * q = z->p + c;
+
+    struct among * w;
+
+    int common_i = 0;
+    int common_j = 0;
+
+    int first_key_inspected = 0;
+
+    while(1)
+    {   int k = i + ((j - i) >> 1);
+        int diff = 0;
+        int common = common_i < common_j ? common_i : common_j; /* smaller */
+        w = v + k;
+        {   int i; for (i = common; i < w->s_size; i++)
+            {   if (c + common == l) { diff = -1; break; }
+                diff = q[common] - w->s[i];
+                if (diff != 0) break;
+                common++;
+            }
+        }
+        if (diff < 0) { j = k; common_j = common; }
+                 else { i = k; common_i = common; }
+        if (j - i <= 1)
+        {   if (i > 0) break; /* v->s has been inspected */
+            if (j == i) break; /* only one item in v */
+
+            /* - but now we need to go round once more to get
+               v->s inspected. This looks messy, but is actually
+               the optimal approach.  */
+
+            if (first_key_inspected) break;
+            first_key_inspected = 1;
+        }
+    }
+    while(1)
+    {   w = v + i;
+        if (common_i >= w->s_size)
+        {   z->c = c + w->s_size;
+            if (w->function == 0) return w->result;
+            {   int res = w->function(z);
+                z->c = c + w->s_size;
+                if (res) return w->result;
+            }
+        }
+        i = w->substring_i;
+        if (i < 0) return 0;
+    }
+}
+
+/* find_among_b is for backwards processing. Same comments apply */
+
+extern int find_among_b(struct SN_env * z, struct among * v, int v_size)
+{
+    int i = 0;
+    int j = v_size;
+
+    int c = z->c; int lb = z->lb;
+    symbol * q = z->p + c - 1;
+
+    struct among * w;
+
+    int common_i = 0;
+    int common_j = 0;
+
+    int first_key_inspected = 0;
+
+    while(1)
+    {   int k = i + ((j - i) >> 1);
+        int diff = 0;
+        int common = common_i < common_j ? common_i : common_j;
+        w = v + k;
+        {   int i; for (i = w->s_size - 1 - common; i >= 0; i--)
+            {   if (c - common == lb) { diff = -1; break; }
+                diff = q[- common] - w->s[i];
+                if (diff != 0) break;
+                common++;
+            }
+        }
+        if (diff < 0) { j = k; common_j = common; }
+                 else { i = k; common_i = common; }
+        if (j - i <= 1)
+        {   if (i > 0) break;
+            if (j == i) break;
+            if (first_key_inspected) break;
+            first_key_inspected = 1;
+        }
+    }
+    while(1)
+    {   w = v + i;
+        if (common_i >= w->s_size)
+        {   z->c = c - w->s_size;
+            if (w->function == 0) return w->result;
+            {   int res = w->function(z);
+                z->c = c - w->s_size;
+                if (res) return w->result;
+            }
+        }
+        i = w->substring_i;
+        if (i < 0) return 0;
+    }
+}
+
+
+extern symbol * increase_size(symbol * p, int n)
+{   int new_size = n + 20;
+    symbol * q = (symbol *) (HEAD + (char *) malloc(HEAD + (new_size + 1) * sizeof(symbol)));
+    CAPACITY(q) = new_size;
+    memmove(q, p, CAPACITY(p) * sizeof(symbol)); lose_s(p); return q;
+}
+
+/* to replace symbols between c_bra and c_ket in z->p by the
+   s_size symbols at s
+*/
+
+extern int replace_s(struct SN_env * z, int c_bra, int c_ket, int s_size, const symbol * s)
+{   int adjustment = s_size - (c_ket - c_bra);
+    int len = SIZE(z->p);
+    if (adjustment != 0)
+    {   if (adjustment + len > CAPACITY(z->p)) z->p = increase_size(z->p, adjustment + len);
+        memmove(z->p + c_ket + adjustment, z->p + c_ket, (len - c_ket) * sizeof(symbol));
+        SET_SIZE(z->p, adjustment + len);
+        z->l += adjustment;
+        if (z->c >= c_ket) z->c += adjustment; else
+            if (z->c > c_bra) z->c = c_bra;
+    }
+    unless (s_size == 0) memmove(z->p + c_bra, s, s_size * sizeof(symbol));
+    return adjustment;
+}
+
+static void slice_check(struct SN_env * z)
+{
+    if (!(0 <= z->bra &&
+          z->bra <= z->ket &&
+          z->ket <= z->l &&
+          z->l <= SIZE(z->p)))   /* this line could be removed */
+    {
+        fprintf(stderr, "faulty slice operation:\n");
+        debug(z, -1, 0);
+        exit(1);
+    }
+}
+
+extern void slice_from_s(struct SN_env * z, int s_size, symbol * s)
+{   slice_check(z);
+    replace_s(z, z->bra, z->ket, s_size, s);
+}
+
+extern void slice_from_v(struct SN_env * z, symbol * p)
+{   slice_from_s(z, SIZE(p), p);
+}
+
+extern void slice_del(struct SN_env * z)
+{   slice_from_s(z, 0, 0);
+}
+
+extern void insert_s(struct SN_env * z, int bra, int ket, int s_size, symbol * s)
+{   int adjustment = replace_s(z, bra, ket, s_size, s);
+    if (bra <= z->bra) z->bra += adjustment;
+    if (bra <= z->ket) z->ket += adjustment;
+}
+
+extern void insert_v(struct SN_env * z, int bra, int ket, symbol * p)
+{   int adjustment = replace_s(z, bra, ket, SIZE(p), p);
+    if (bra <= z->bra) z->bra += adjustment;
+    if (bra <= z->ket) z->ket += adjustment;
+}
+
+extern symbol * slice_to(struct SN_env * z, symbol * p)
+{   slice_check(z);
+    {   int len = z->ket - z->bra;
+        if (CAPACITY(p) < len) p = increase_size(p, len);
+        memmove(p, z->p + z->bra, len * sizeof(symbol));
+        SET_SIZE(p, len);
+    }
+    return p;
+}
+
+extern symbol * assign_to(struct SN_env * z, symbol * p)
+{   int len = z->l;
+    if (CAPACITY(p) < len) p = increase_size(p, len);
+    memmove(p, z->p, len * sizeof(symbol));
+    SET_SIZE(p, len);
+    return p;
+}
+
+extern void debug(struct SN_env * z, int number, int line_count)
+{   int i;
+    int limit = SIZE(z->p);
+    /*if (number >= 0) printf("%3d (line %4d): '", number, line_count);*/
+    if (number >= 0) printf("%3d (line %4d): [%d]'", number, line_count,limit);
+    for (i = 0; i <= limit; i++)
+    {   if (z->lb == i) printf("{");
+        if (z->bra == i) printf("[");
+        if (z->c == i) printf("|");
+        if (z->ket == i) printf("]");
+        if (z->l == i) printf("}");
+        if (i < limit)
+        {   int ch = z->p[i];
+            if (ch == 0) ch = '#';
+            printf("%c", ch);
+        }
+    }
+    printf("'\n");
+}
diff --git a/contrib/tsearch2/sql/tsearch2.sql b/contrib/tsearch2/sql/tsearch2.sql

new file mode 100644 (file)

index 0000000..6ca6480
--- /dev/null
+++ b/contrib/tsearch2/sql/tsearch2.sql
@@ -0,0 +1,243 @@
+--
+-- first, define the datatype.  Turn off echoing so that expected file
+-- does not depend on contents of seg.sql.
+--
+\set ECHO none
+\i tsearch2.sql
+\set ECHO all
+
+--tsvector
+SELECT '1'::tsvector;
+SELECT '1 '::tsvector;
+SELECT ' 1'::tsvector;
+SELECT ' 1 '::tsvector;
+SELECT '1 2'::tsvector;
+SELECT '\'1 2\''::tsvector;
+SELECT '\'1 \\\'2\''::tsvector;
+SELECT '\'1 \\\'2\'3'::tsvector;
+SELECT '\'1 \\\'2\' 3'::tsvector;
+SELECT '\'1 \\\'2\' \' 3\' 4 '::tsvector;
+select '\'w\':4A,3B,2C,1D,5 a:8';
+select 'a:3A b:2a'::tsvector || 'ba:1234 a:1B';
+select setweight('w:12B w:13* w:12,5,6 a:1,3* a:3 w asd:1dc asd zxc:81,567,222A'::tsvector, 'c');
+select strip('w:12B w:13* w:12,5,6 a:1,3* a:3 w asd:1dc asd'::tsvector);
+
+
+--tsquery
+SELECT '1'::tsquery;
+SELECT '1 '::tsquery;
+SELECT ' 1'::tsquery;
+SELECT ' 1 '::tsquery;
+SELECT '\'1 2\''::tsquery;
+SELECT '\'1 \\\'2\''::tsquery;
+SELECT '!1'::tsquery;
+SELECT '1|2'::tsquery;
+SELECT '1|!2'::tsquery;
+SELECT '!1|2'::tsquery;
+SELECT '!1|!2'::tsquery;
+SELECT '!(!1|!2)'::tsquery;
+SELECT '!(!1|2)'::tsquery;
+SELECT '!(1|!2)'::tsquery;
+SELECT '!(1|2)'::tsquery;
+SELECT '1&2'::tsquery;
+SELECT '!1&2'::tsquery;
+SELECT '1&!2'::tsquery;
+SELECT '!1&!2'::tsquery;
+SELECT '(1&2)'::tsquery;
+SELECT '1&(2)'::tsquery;
+SELECT '!(1)&2'::tsquery;
+SELECT '!(1&2)'::tsquery;
+SELECT '1|2&3'::tsquery;
+SELECT '1|(2&3)'::tsquery;
+SELECT '(1|2)&3'::tsquery;
+SELECT '1|2&!3'::tsquery;
+SELECT '1|!2&3'::tsquery;
+SELECT '!1|2&3'::tsquery;
+SELECT '!1|(2&3)'::tsquery;
+SELECT '!(1|2)&3'::tsquery;
+SELECT '(!1|2)&3'::tsquery;
+SELECT '1|(2|(4|(5|6)))'::tsquery;
+SELECT '1|2|4|5|6'::tsquery;
+SELECT '1&(2&(4&(5&6)))'::tsquery;
+SELECT '1&2&4&5&6'::tsquery;
+SELECT '1&(2&(4&(5|6)))'::tsquery;
+SELECT '1&(2&(4&(5|!6)))'::tsquery;
+SELECT '1&(\'2\'&(\' 4\'&(\\|5 | \'6 \\\' !|&\')))'::tsquery;
+SELECT '\'the wether\':dc & \' sKies \':BC & a:d b:a';
+
+select lexize('simple', 'ASD56 hsdkf');
+select lexize('en_stem', 'SKIES Problems identity');
+
+select * from token_type('default');
+select * from parse('default', '345 [email protected] \' http://www.com/ http://aew.werc.ewr/?ad=qwe&dw 1aew.werc.ewr/?ad=qwe&dw 2aew.werc.ewr http://3aew.werc.ewr/?ad=qwe&dw http://4aew.werc.ewr http://5aew.werc.ewr:8100/?  ad=qwe&dw 6aew.werc.ewr:8100/?ad=qwe&dw 7aew.werc.ewr:8100/?ad=qwe&dw=%20%32 +4.0e-10 qwe qwe qwqwe 234.435 455 5.005 [email protected] qwe-wer asdf qwer jf sdjk ewr1> ewri2 ">
+/usr/local/fff /awdf/dwqe/4325 rewt/ewr wefjn /wqe-324/ewr gist.h gist.h.c gist.c. readline 4.2 4.2. 4.2, readline-4.2 readline-4.2. 234 
+ wow  < jqw <> qwerty');
+
+SELECT to_tsvector('default', '345 [email protected] \' http://www.com/ http://aew.werc.ewr/?ad=qwe&dw 1aew.werc.ewr/?ad=qwe&dw 2aew.werc.ewr http://3aew.werc.ewr/?ad=qwe&dw http://4aew.werc.ewr http://5aew.werc.ewr:8100/?  ad=qwe&dw 6aew.werc.ewr:8100/?ad=qwe&dw 7aew.werc.ewr:8100/?ad=qwe&dw=%20%32 +4.0e-10 qwe qwe qwqwe 234.435 455 5.005 [email protected] qwe-wer asdf qwer jf sdjk ewr1> ewri2 ">
+/usr/local/fff /awdf/dwqe/4325 rewt/ewr wefjn /wqe-324/ewr gist.h gist.h.c gist.c. readline 4.2 4.2. 4.2, readline-4.2 readline-4.2. 234 
+ wow  < jqw <> qwerty');
+
+SELECT length(to_tsvector('default', '345 qw'));
+
+SELECT length(to_tsvector('default', '345 [email protected] \' http://www.com/ http://aew.werc.ewr/?ad=qwe&dw 1aew.werc.ewr/?ad=qwe&dw 2aew.werc.ewr http://3aew.werc.ewr/?ad=qwe&dw http://4aew.werc.ewr http://5aew.werc.ewr:8100/?  ad=qwe&dw 6aew.werc.ewr:8100/?ad=qwe&dw 7aew.werc.ewr:8100/?ad=qwe&dw=%20%32 +4.0e-10 qwe qwe qwqwe 234.435 455 5.005 [email protected] qwe-wer asdf qwer jf sdjk ewr1> ewri2 ">
+/usr/local/fff /awdf/dwqe/4325 rewt/ewr wefjn /wqe-324/ewr gist.h gist.h.c gist.c. readline 4.2 4.2. 4.2, readline-4.2 readline-4.2. 234 
+ wow  < jqw <> qwerty'));
+
+
+select to_tsquery('default', 'qwe & sKies '); 
+select to_tsquery('simple', 'qwe & sKies '); 
+select to_tsquery('default', '\'the wether\':dc & \'           sKies \':BC ');
+select 'a b:89  ca:23A,64b d:34c'::tsvector @@ 'd:AC & ca';
+select 'a b:89  ca:23A,64b d:34c'::tsvector @@ 'd:AC & ca:B';
+select 'a b:89  ca:23A,64b d:34c'::tsvector @@ 'd:AC & ca:A';
+select 'a b:89  ca:23A,64b d:34c'::tsvector @@ 'd:AC & ca:C';
+select 'a b:89  ca:23A,64b d:34c'::tsvector @@ 'd:AC & ca:CB';
+
+CREATE TABLE test_tsvector( t text, a tsvector );
+
+\copy test_tsvector from 'data/test_tsearch.data'
+
+SELECT count(*) FROM test_tsvector WHERE a @@ 'wr|qh';
+SELECT count(*) FROM test_tsvector WHERE a @@ 'wr&qh';
+SELECT count(*) FROM test_tsvector WHERE a @@ 'eq&yt';
+SELECT count(*) FROM test_tsvector WHERE a @@ 'eq|yt';
+SELECT count(*) FROM test_tsvector WHERE a @@ '(eq&yt)|(wr&qh)';
+SELECT count(*) FROM test_tsvector WHERE a @@ '(eq|yt)&(wr|qh)';
+
+create index wowidx on test_tsvector using gist (a);
+set enable_seqscan=off;
+
+SELECT count(*) FROM test_tsvector WHERE a @@ 'wr|qh';
+SELECT count(*) FROM test_tsvector WHERE a @@ 'wr&qh';
+SELECT count(*) FROM test_tsvector WHERE a @@ 'eq&yt';
+SELECT count(*) FROM test_tsvector WHERE a @@ 'eq|yt';
+SELECT count(*) FROM test_tsvector WHERE a @@ '(eq&yt)|(wr&qh)';
+SELECT count(*) FROM test_tsvector WHERE a @@ '(eq|yt)&(wr|qh)';
+
+select set_curcfg('default');
+
+CREATE TRIGGER tsvectorupdate
+BEFORE UPDATE OR INSERT ON test_tsvector
+FOR EACH ROW EXECUTE PROCEDURE tsearch2(a, t);
+
+SELECT count(*) FROM test_tsvector WHERE a @@ to_tsquery('345&qwerty');
+
+INSERT INTO test_tsvector (t) VALUES ('345 qwerty');
+
+SELECT count(*) FROM test_tsvector WHERE a @@ to_tsquery('345&qwerty');
+
+UPDATE test_tsvector SET t = null WHERE t = '345 qwerty';
+
+SELECT count(*) FROM test_tsvector WHERE a @@ to_tsquery('345&qwerty');
+
+drop trigger tsvectorupdate on test_tsvector;
+create function wow(text) returns text as 'select $1 || \' copyright\'; ' language sql;
+create trigger tsvectorupdate before update or insert on test_tsvector
+for each row execute procedure tsearch2(a, wow, t);
+insert into test_tsvector (t) values ('345 qwerty');
+select count(*) FROM test_tsvector WHERE a @@ to_tsquery('345&qwerty');
+select count(*) FROM test_tsvector WHERE a @@ to_tsquery('copyright');
+
+select rank(' a:1 s:2C d g'::tsvector, 'a | s');
+select rank(' a:1 s:2B d g'::tsvector, 'a | s');
+select rank(' a:1 s:2 d g'::tsvector, 'a | s');
+select rank(' a:1 s:2C d g'::tsvector, 'a & s');
+select rank(' a:1 s:2B d g'::tsvector, 'a & s');
+select rank(' a:1 s:2 d g'::tsvector, 'a & s');
+
+insert into test_tsvector (t) values ('foo bar foo the over foo qq bar');
+select * from stat('select a from test_tsvector') order by ndoc desc, nentry desc, word;
+
+select reset_tsearch();
+select to_tsquery('default', 'skies & books');
+
+select rank_cd(to_tsvector('Erosion It took the sea a thousand years,
+A thousand years to trace
+The granite features of this cliff
+In crag and scarp and base.
+It took the sea an hour one night
+An hour of storm to place
+The sculpture of these granite seams,
+Upon a woman s face. E.  J.  Pratt  (1882 1964)
+'), to_tsquery('sea&thousand&years'));
+
+select rank_cd(to_tsvector('Erosion It took the sea a thousand years,
+A thousand years to trace
+The granite features of this cliff
+In crag and scarp and base.
+It took the sea an hour one night
+An hour of storm to place
+The sculpture of these granite seams,
+Upon a woman s face. E.  J.  Pratt  (1882 1964)
+'), to_tsquery('granite&sea'));
+
+select rank_cd(to_tsvector('Erosion It took the sea a thousand years,
+A thousand years to trace
+The granite features of this cliff
+In crag and scarp and base.
+It took the sea an hour one night
+An hour of storm to place
+The sculpture of these granite seams,
+Upon a woman s face. E.  J.  Pratt  (1882 1964)
+'), to_tsquery('sea'));
+
+select get_covers(to_tsvector('Erosion It took the sea a thousand years,
+A thousand years to trace
+The granite features of this cliff
+In crag and scarp and base.
+It took the sea an hour one night
+An hour of storm to place
+The sculpture of these granite seams,
+Upon a woman s face. E.  J.  Pratt  (1882 1964)
+'), to_tsquery('sea&thousand&years'));
+
+select get_covers(to_tsvector('Erosion It took the sea a thousand years,
+A thousand years to trace
+The granite features of this cliff
+In crag and scarp and base.
+It took the sea an hour one night
+An hour of storm to place
+The sculpture of these granite seams,
+Upon a woman s face. E.  J.  Pratt  (1882 1964)
+'), to_tsquery('granite&sea'));
+
+select get_covers(to_tsvector('Erosion It took the sea a thousand years,
+A thousand years to trace
+The granite features of this cliff
+In crag and scarp and base.
+It took the sea an hour one night
+An hour of storm to place
+The sculpture of these granite seams,
+Upon a woman s face. E.  J.  Pratt  (1882 1964)
+'), to_tsquery('sea'));
+
+select headline('Erosion It took the sea a thousand years,
+A thousand years to trace
+The granite features of this cliff
+In crag and scarp and base.
+It took the sea an hour one night
+An hour of storm to place
+The sculpture of these granite seams,
+Upon a woman s face. E.  J.  Pratt  (1882 1964)
+', to_tsquery('sea&thousand&years'));
+ 
+select headline('Erosion It took the sea a thousand years,
+A thousand years to trace
+The granite features of this cliff
+In crag and scarp and base.
+It took the sea an hour one night
+An hour of storm to place
+The sculpture of these granite seams,
+Upon a woman s face. E.  J.  Pratt  (1882 1964)
+', to_tsquery('granite&sea'));
+ 
+select headline('Erosion It took the sea a thousand years,
+A thousand years to trace
+The granite features of this cliff
+In crag and scarp and base.
+It took the sea an hour one night
+An hour of storm to place
+The sculpture of these granite seams,
+Upon a woman s face. E.  J.  Pratt  (1882 1964)
+', to_tsquery('sea'));
+
diff --git a/contrib/tsearch2/stopword.c b/contrib/tsearch2/stopword.c

new file mode 100644 (file)

index 0000000..7f7806f
--- /dev/null
+++ b/contrib/tsearch2/stopword.c
@@ -0,0 +1,101 @@
+/* 
+ * stopword library
+ * Teodor Sigaev 
+ */
+#include 
+#include 
+#include 
+#include 
+
+#include "postgres.h"
+#include "common.h"
+#include "dict.h"
+
+#define STOPBUFLEN 4096
+
+char*
+lowerstr(char *str) {
+   char *ptr=str;
+   while(*ptr) {
+       *ptr = tolower(*(unsigned char*)ptr);
+       ptr++;
+   }
+   return str;
+}
+
+void
+freestoplist(StopList *s) {
+   char **ptr=s->stop;
+   if ( ptr )
+       while( *ptr && s->len >0 ) {
+           free(*ptr);
+           ptr++; s->len--;
+       free(s->stop);
+   }
+   memset(s,0,sizeof(StopList));
+}
+
+void
+readstoplist(text *in, StopList *s) {
+   char **stop=NULL;
+   s->len=0;
+   if ( in && VARSIZE(in) - VARHDRSZ > 0 ) {
+       char *filename=text2char(in);
+       FILE    *hin=NULL;
+       char    buf[STOPBUFLEN];
+       int reallen=0;
+
+       if ( (hin=fopen(filename,"r")) == NULL )
+           elog(ERROR,"Can't open file '%s': %s", filename, strerror(errno));
+       while( fgets(buf,STOPBUFLEN,hin) ) {
+           buf[strlen(buf)-1] = '\0';
+           if ( *buf=='\0' ) continue;
+
+           if ( s->len>= reallen ) {
+               char **tmp;
+               reallen=(reallen) ? reallen*2 : 16;
+               tmp=(char**)realloc((void*)stop, sizeof(char*)*reallen);
+               if (!tmp) {
+                   freestoplist(s);
+                   fclose(hin); 
+                   elog(ERROR,"Not enough memory");
+               }
+               stop=tmp;
+           }
+    
+           stop[s->len]=strdup(buf);
+           if ( !stop[s->len] ) {
+               freestoplist(s);
+               fclose(hin); 
+               elog(ERROR,"Not enough memory");
+           }
+           if ( s->wordop ) 
+               stop[s->len]=(s->wordop)(stop[s->len]);
+
+           (s->len)++; 
+       }
+       fclose(hin);
+       pfree(filename); 
+   }
+   s->stop=stop;
+} 
+
+static int
+comparestr(const void *a, const void *b) {
+   return strcmp( *(char**)a, *(char**)b );
+}
+
+void
+sortstoplist(StopList *s) {
+   if (s->stop && s->len>0)
+       qsort(s->stop, s->len, sizeof(char*), comparestr);
+}
+
+bool
+searchstoplist(StopList *s, char *key) {
+   if ( s->wordop ) 
+       key=(*(s->wordop))(key);
+   return ( s->stop && s->len>0 && bsearch(&key, s->stop, s->len, sizeof(char*), comparestr) ) ? true : false;
+}
+
+
diff --git a/contrib/tsearch2/stopword/english.stop b/contrib/tsearch2/stopword/english.stop

new file mode 100644 (file)

index 0000000..a913011
--- /dev/null
+++ b/contrib/tsearch2/stopword/english.stop
@@ -0,0 +1,128 @@
+i
+me
+my
+myself
+we
+our
+ours
+ourselves
+you
+your
+yours
+yourself
+yourselves
+he
+him
+his
+himself
+she
+her
+hers
+herself
+it
+its
+itself
+they
+them
+their
+theirs
+themselves
+what
+which
+who
+whom
+this
+that
+these
+those
+am
+is
+are
+was
+were
+be
+been
+being
+have
+has
+had
+having
+do
+does
+did
+doing
+a
+an
+the
+and
+but
+if
+or
+because
+as
+until
+while
+of
+at
+by
+for
+with
+about
+against
+between
+into
+through
+during
+before
+after
+above
+below
+to
+from
+up
+down
+in
+out
+on
+off
+over
+under
+again
+further
+then
+once
+here
+there
+when
+where
+why
+how
+all
+any
+both
+each
+few
+more
+most
+other
+some
+such
+no
+nor
+not
+only
+own
+same
+so
+than
+too
+very
+s
+t
+can
+will
+just
+don
+should
+now
+
diff --git a/contrib/tsearch2/stopword/russian.stop b/contrib/tsearch2/stopword/russian.stop

new file mode 100644 (file)

index 0000000..1877e3a
--- /dev/null
+++ b/contrib/tsearch2/stopword/russian.stop
@@ -0,0 +1,151 @@
+É
+×
+×Ï
+ÎÅ
+ÞÔÏ
+ÏÎ
+ÎÁ
+Ñ
+Ó
+ÓÏ
+ËÁË
+Á
+ÔÏ
+×ÓÅ
+ÏÎÁ
+ÔÁË
+ÅÇÏ
+ÎÏ
+ÄÁ
+ÔÙ
+Ë
+Õ
+ÖÅ
+×Ù
+ÚÁ
+ÂÙ
+ÐÏ
+ÔÏÌØËÏ
+ÅÅ
+ÍÎÅ
+ÂÙÌÏ
+×ÏÔ
+ÏÔ
+ÍÅÎÑ
+ÅÝÅ
+ÎÅÔ
+Ï
+ÉÚ
+ÅÍÕ
+ÔÅÐÅÒØ
+ËÏÇÄÁ
+ÄÁÖÅ
+ÎÕ
+×ÄÒÕÇ
+ÌÉ
+ÅÓÌÉ
+ÕÖÅ
+ÉÌÉ
+ÎÉ
+ÂÙÔØ
+ÂÙÌ
+ÎÅÇÏ
+ÄÏ
+×ÁÓ
+ÎÉÂÕÄØ
+ÏÐÑÔØ
+ÕÖ
+×ÁÍ
+×ÅÄØ
+ÔÁÍ
+ÐÏÔÏÍ
+ÓÅÂÑ
+ÎÉÞÅÇÏ
+ÅÊ
+ÍÏÖÅÔ
+ÏÎÉ
+ÔÕÔ
+ÇÄÅ
+ÅÓÔØ
+ÎÁÄÏ
+ÎÅÊ
+ÄÌÑ
+ÍÙ
+ÔÅÂÑ
+ÉÈ
+ÞÅÍ
+ÂÙÌÁ
+ÓÁÍ
+ÞÔÏÂ
+ÂÅÚ
+ÂÕÄÔÏ
+ÞÅÇÏ
+ÒÁÚ
+ÔÏÖÅ
+ÓÅÂÅ
+ÐÏÄ
+ÂÕÄÅÔ
+Ö
+ÔÏÇÄÁ
+ËÔÏ
+ÜÔÏÔ
+ÔÏÇÏ
+ÐÏÔÏÍÕ
+ÜÔÏÇÏ
+ËÁËÏÊ
+ÓÏ×ÓÅÍ
+ÎÉÍ
+ÚÄÅÓØ
+ÜÔÏÍ
+ÏÄÉÎ
+ÐÏÞÔÉ
+ÍÏÊ
+ÔÅÍ
+ÞÔÏÂÙ
+ÎÅÅ
+ÓÅÊÞÁÓ
+ÂÙÌÉ
+ËÕÄÁ
+ÚÁÞÅÍ
+×ÓÅÈ
+ÎÉËÏÇÄÁ
+ÍÏÖÎÏ
+ÐÒÉ
+ÎÁËÏÎÅÃ
+Ä×Á
+ÏÂ
+ÄÒÕÇÏÊ
+ÈÏÔØ
+ÐÏÓÌÅ
+ÎÁÄ
+ÂÏÌØÛÅ
+ÔÏÔ
+ÞÅÒÅÚ
+ÜÔÉ
+ÎÁÓ
+ÐÒÏ
+×ÓÅÇÏ
+ÎÉÈ
+ËÁËÁÑ
+ÍÎÏÇÏ
+ÒÁÚ×Å
+ÔÒÉ
+ÜÔÕ
+ÍÏÑ
+×ÐÒÏÞÅÍ
+ÈÏÒÏÛÏ
+Ó×ÏÀ
+ÜÔÏÊ
+ÐÅÒÅÄ
+ÉÎÏÇÄÁ
+ÌÕÞÛÅ
+ÞÕÔØ
+ÔÏÍ
+ÎÅÌØÚÑ
+ÔÁËÏÊ
+ÉÍ
+ÂÏÌÅÅ
+×ÓÅÇÄÁ
+ËÏÎÅÞÎÏ
+×ÓÀ
+ÍÅÖÄÕ
diff --git a/contrib/tsearch2/ts_cfg.c b/contrib/tsearch2/ts_cfg.c

new file mode 100644 (file)

index 0000000..7c9f20c
--- /dev/null
+++ b/contrib/tsearch2/ts_cfg.c
@@ -0,0 +1,509 @@
+/* 
+ * interface functions to tscfg 
+ * Teodor Sigaev 
+ */
+#include 
+#include 
+#include 
+#include 
+#include 
+
+#include "postgres.h"
+#include "fmgr.h"
+#include "utils/array.h"
+#include "catalog/pg_type.h"
+#include "executor/spi.h"
+
+#include "ts_cfg.h"
+#include "dict.h"
+#include "wparser.h"
+#include "snmap.h"
+#include "common.h"
+#include "tsvector.h"
+
+/*********top interface**********/
+
+static void *plan_getcfg_bylocale=NULL;
+static void *plan_getcfg=NULL;
+static void *plan_getmap=NULL;
+static void *plan_name2id=NULL;
+static Oid current_cfg_id=0;
+
+void
+init_cfg(Oid id, TSCfgInfo *cfg) {
+   Oid arg[2]={ OIDOID, OIDOID };
+   bool isnull;
+   Datum pars[2]={ ObjectIdGetDatum(id), ObjectIdGetDatum(id) } ;
+   int stat,i,j;
+   text *ptr;
+   text *prsname=NULL;
+   MemoryContext   oldcontext;
+
+   memset(cfg,0,sizeof(TSCfgInfo));
+   SPI_connect();
+   if ( !plan_getcfg ) {
+       plan_getcfg = SPI_saveplan( SPI_prepare( "select prs_name from pg_ts_cfg where oid = $1" , 1, arg ) );
+       if ( !plan_getcfg ) 
+           ts_error(ERROR, "SPI_prepare() failed");
+   }
+
+   stat = SPI_execp(plan_getcfg, pars, " ", 1);
+   if ( stat < 0 )
+       ts_error (ERROR, "SPI_execp return %d", stat);
+   if ( SPI_processed > 0 ) {
+       prsname = (text*) DatumGetPointer( 
+           SPI_getbinval(SPI_tuptable->vals[0], SPI_tuptable->tupdesc, 1, &isnull) 
+       );
+       oldcontext = MemoryContextSwitchTo(TopMemoryContext);
+       prsname = ptextdup( prsname );
+       MemoryContextSwitchTo(oldcontext);
+       
+       cfg->id=id;
+   } else 
+       ts_error(ERROR, "No tsearch cfg with id %d", id);
+
+   arg[0]=TEXTOID;
+   if ( !plan_getmap ) {
+       plan_getmap = SPI_saveplan( SPI_prepare( "select lt.tokid, pg_ts_cfgmap.dict_name from pg_ts_cfgmap, pg_ts_cfg, token_type( $1 ) as lt where lt.alias = pg_ts_cfgmap.tok_alias and pg_ts_cfgmap.ts_name = pg_ts_cfg.ts_name and pg_ts_cfg.oid= $2 order by lt.tokid desc;" , 2, arg ) );
+       if ( !plan_getmap )
+           ts_error(ERROR, "SPI_prepare() failed");
+   }
+
+   pars[0]=PointerGetDatum( prsname );
+   stat = SPI_execp(plan_getmap, pars, " ", 0);
+   if ( stat < 0 )
+       ts_error (ERROR, "SPI_execp return %d", stat);
+   if ( SPI_processed <= 0 )
+       ts_error(ERROR, "No parser with id %d", id);
+
+   for(i=0;i
+       int lexid = DatumGetInt32(SPI_getbinval(SPI_tuptable->vals[i], SPI_tuptable->tupdesc, 1, &isnull));
+       ArrayType *toasted_a = (ArrayType*)PointerGetDatum(SPI_getbinval(SPI_tuptable->vals[i], SPI_tuptable->tupdesc, 2, &isnull));
+       ArrayType *a;
+
+       if ( !cfg->map ) {
+           cfg->len=lexid+1;
+           cfg->map = (ListDictionary*)malloc( sizeof(ListDictionary)*cfg->len );
+           if ( !cfg->map )
+               ts_error(ERROR,"No memory");
+           memset( cfg->map, 0, sizeof(ListDictionary)*cfg->len );
+       }
+
+       if (isnull)
+           continue;
+
+       a=(ArrayType*)PointerGetDatum( PG_DETOAST_DATUM( DatumGetPointer(toasted_a) ) );
+       
+       if ( ARR_NDIM(a) != 1 )
+           ts_error(ERROR,"Wrong dimension");
+       if ( ARRNELEMS(a) < 1 )
+           continue;
+
+       cfg->map[lexid].len=ARRNELEMS(a);
+       cfg->map[lexid].dict_id=(Datum*)malloc( sizeof(Datum)*cfg->map[lexid].len );
+       memset(cfg->map[lexid].dict_id,0,sizeof(Datum)*cfg->map[lexid].len );
+       ptr=(text*)ARR_DATA_PTR(a);
+       oldcontext = MemoryContextSwitchTo(TopMemoryContext);
+       for(j=0;jmap[lexid].len;j++) {
+           cfg->map[lexid].dict_id[j] = PointerGetDatum(ptextdup(ptr));
+           ptr=NEXTVAL(ptr);
+       } 
+       MemoryContextSwitchTo(oldcontext);
+
+       if ( a != toasted_a ) 
+           pfree(a);
+   }
+   
+   SPI_finish();
+   cfg->prs_id = name2id_prs( prsname );
+   pfree(prsname);
+   for(i=0;ilen;i++) {
+       for(j=0;jmap[i].len;j++) {
+           ptr = (text*)DatumGetPointer( cfg->map[i].dict_id[j] );
+           cfg->map[i].dict_id[j] = ObjectIdGetDatum( name2id_dict(ptr) );
+           pfree(ptr);
+       }
+   }
+}
+
+typedef struct {
+   TSCfgInfo   *last_cfg;
+   int     len;
+   int     reallen;
+   TSCfgInfo   *list;
+   SNMap       name2id_map;
+} CFGList;
+
+static CFGList CList = {NULL,0,0,NULL,{0,0,NULL}};
+
+void
+reset_cfg(void) {
+        freeSNMap( &(CList.name2id_map) );
+        if ( CList.list ) {
+       int i,j;
+       for(i=0;i
+           if ( CList.list[i].map ) {
+               for(j=0;j
+                   if ( CList.list[i].map[j].dict_id )
+                       free(CList.list[i].map[j].dict_id);
+               free( CList.list[i].map );
+           }
+                free(CList.list);
+   }
+        memset(&CList,0,sizeof(CFGList));
+}
+
+static int
+comparecfg(const void *a, const void *b) {
+   return ((TSCfgInfo*)a)->id - ((TSCfgInfo*)b)->id;
+}
+
+TSCfgInfo *
+findcfg(Oid id) {
+   /* last used cfg */
+   if ( CList.last_cfg && CList.last_cfg->id==id )
+       return CList.last_cfg;
+
+   /* already used cfg */
+   if ( CList.len != 0 ) {
+       TSCfgInfo key;
+       key.id=id;
+       CList.last_cfg = bsearch(&key, CList.list, CList.len, sizeof(TSCfgInfo), comparecfg);
+       if ( CList.last_cfg != NULL )
+           return CList.last_cfg;
+   }
+
+   /* last chance */
+   if ( CList.len==CList.reallen ) {
+       TSCfgInfo *tmp;
+       int reallen = ( CList.reallen ) ? 2*CList.reallen : 16;
+       tmp=(TSCfgInfo*)realloc(CList.list,sizeof(TSCfgInfo)*reallen);
+       if ( !tmp ) 
+           ts_error(ERROR,"No memory");
+       CList.reallen=reallen;
+       CList.list=tmp;
+   }
+   CList.last_cfg=&(CList.list[CList.len]);
+   init_cfg(id, CList.last_cfg);
+   CList.len++;
+   qsort(CList.list, CList.len, sizeof(TSCfgInfo), comparecfg);
+   return findcfg(id); /* qsort changed order!! */;
+}
+
+
+Oid
+name2id_cfg(text *name) {
+   Oid arg[1]={ TEXTOID };
+   bool isnull;
+   Datum pars[1]={ PointerGetDatum(name) };
+   int stat;
+   Oid id=findSNMap_t( &(CList.name2id_map), name );
+   
+   if ( id ) 
+       return id;
+   
+   SPI_connect();
+   if ( !plan_name2id ) {
+       plan_name2id = SPI_saveplan( SPI_prepare( "select oid from pg_ts_cfg where ts_name = $1" , 1, arg ) );
+       if ( !plan_name2id ) 
+           elog(ERROR, "SPI_prepare() failed");
+   }
+
+   stat = SPI_execp(plan_name2id, pars, " ", 1);
+   if ( stat < 0 )
+       elog (ERROR, "SPI_execp return %d", stat);
+   if ( SPI_processed > 0 ) {
+       id=DatumGetObjectId( SPI_getbinval(SPI_tuptable->vals[0], SPI_tuptable->tupdesc, 1, &isnull) );
+       if ( isnull ) 
+           elog(ERROR, "Null id for tsearch config");
+   } else 
+       elog(ERROR, "No tsearch config");
+   SPI_finish();
+   addSNMap_t( &(CList.name2id_map), name, id );
+   return id;
+}
+
+
+void 
+parsetext_v2(TSCfgInfo *cfg, PRSTEXT * prs, char *buf, int4 buflen) {
+   int type, lenlemm, i;
+   char    *lemm=NULL;
+   WParserInfo *prsobj = findprs(cfg->prs_id);
+
+   prsobj->prs=(void*)DatumGetPointer(
+       FunctionCall2(
+           &(prsobj->start_info),
+           PointerGetDatum(buf),
+           Int32GetDatum(buflen)
+       )
+   );
+
+   while( ( type=DatumGetInt32(FunctionCall3(
+           &(prsobj->getlexeme_info),
+           PointerGetDatum(prsobj->prs),
+           PointerGetDatum(&lemm),
+           PointerGetDatum(&lenlemm))) ) != 0 ) {
+
+       if ( lenlemm >= MAXSTRLEN )
+           elog(ERROR, "Word is too long");
+
+
+       if ( type >= cfg->len ) /* skip this type of lexem */
+           continue; 
+
+       for(i=0;imap[type].len;i++) {
+           DictInfo    *dict=finddict( DatumGetObjectId(cfg->map[type].dict_id[i]) );
+           char    **norms, **ptr;
+   
+           norms = ptr = (char**)DatumGetPointer(
+               FunctionCall3(
+                   &(dict->lexize_info),
+                   PointerGetDatum(dict->dictionary),
+                   PointerGetDatum(lemm),
+                   PointerGetDatum(lenlemm)
+               )
+           );
+           if ( !norms ) /* dictionary doesn't know this lexem */
+               continue;
+
+           prs->pos++; /*set pos*/
+
+           while( *ptr ) {
+               if (prs->curwords == prs->lenwords) {
+                   prs->lenwords *= 2;
+                   prs->words = (WORD *) repalloc((void *) prs->words, prs->lenwords * sizeof(WORD));
+               }
+
+               prs->words[prs->curwords].len = strlen(*ptr);
+               prs->words[prs->curwords].word = *ptr;
+               prs->words[prs->curwords].alen = 0;
+               prs->words[prs->curwords].pos.pos = LIMITPOS(prs->pos);
+               ptr++;
+               prs->curwords++;
+           }
+           pfree(norms);
+           break; /* lexem already normalized or is stop word*/
+       }
+   }
+
+   FunctionCall1(
+       &(prsobj->end_info),
+       PointerGetDatum(prsobj->prs)
+   );
+}
+
+static void
+hladdword(HLPRSTEXT * prs, char *buf, int4 buflen, int type) {
+   while (prs->curwords >= prs->lenwords) {
+       prs->lenwords *= 2;
+       prs->words = (HLWORD *) repalloc((void *) prs->words, prs->lenwords * sizeof(HLWORD));
+   }
+   memset( &(prs->words[prs->curwords]), 0, sizeof(HLWORD) ); 
+   prs->words[prs->curwords].type = (uint8)type;
+   prs->words[prs->curwords].len = buflen; 
+   prs->words[prs->curwords].word = palloc(buflen);
+   memcpy(prs->words[prs->curwords].word, buf, buflen);
+   prs->curwords++;    
+}
+
+static void
+hlfinditem(HLPRSTEXT * prs, QUERYTYPE *query, char *buf, int buflen ) {
+   int i;
+   ITEM    *item=GETQUERY(query);
+   HLWORD  *word=&( prs->words[prs->curwords-1] );
+
+   while (prs->curwords + query->size >= prs->lenwords) {
+       prs->lenwords *= 2;
+       prs->words = (HLWORD *) repalloc((void *) prs->words, prs->lenwords * sizeof(HLWORD));
+   }
+
+   for(i=0; isize; i++) { 
+       if ( item->type == VAL && item->length == buflen && strncmp( GETOPERAND(query) + item->distance, buf, buflen )==0 ) {
+           if ( word->item ) {
+               memcpy( &(prs->words[prs->curwords]), word, sizeof(HLWORD) );
+               prs->words[prs->curwords].item=item;
+               prs->words[prs->curwords].repeated=1;
+               prs->curwords++;
+           } else 
+               word->item=item;    
+       }
+       item++;
+   }
+}
+
+void 
+hlparsetext(TSCfgInfo *cfg, HLPRSTEXT * prs, QUERYTYPE *query, char *buf, int4 buflen) {
+   int type, lenlemm, i;
+   char    *lemm=NULL;
+   WParserInfo *prsobj = findprs(cfg->prs_id);
+
+   prsobj->prs=(void*)DatumGetPointer(
+       FunctionCall2(
+           &(prsobj->start_info),
+           PointerGetDatum(buf),
+           Int32GetDatum(buflen)
+       )
+   );
+
+   while( ( type=DatumGetInt32(FunctionCall3(
+           &(prsobj->getlexeme_info),
+           PointerGetDatum(prsobj->prs),
+           PointerGetDatum(&lemm),
+           PointerGetDatum(&lenlemm))) ) != 0 ) {
+
+       if ( lenlemm >= MAXSTRLEN )
+           elog(ERROR, "Word is too long");
+
+       hladdword(prs,lemm,lenlemm,type);
+
+       if ( type >= cfg->len ) 
+           continue; 
+
+       for(i=0;imap[type].len;i++) {
+           DictInfo    *dict=finddict( DatumGetObjectId(cfg->map[type].dict_id[i]) );
+           char    **norms, **ptr;
+   
+           norms = ptr = (char**)DatumGetPointer(
+               FunctionCall3(
+                   &(dict->lexize_info),
+                   PointerGetDatum(dict->dictionary),
+                   PointerGetDatum(lemm),
+                   PointerGetDatum(lenlemm)
+               )
+           );
+           if ( !norms ) /* dictionary doesn't know this lexem */
+               continue;
+
+           while( *ptr ) {
+               hlfinditem(prs,query,*ptr,strlen(*ptr));
+               pfree(*ptr);
+               ptr++;
+           }
+           pfree(norms);
+           break; /* lexem already normalized or is stop word*/
+       }
+   }
+
+   FunctionCall1(
+       &(prsobj->end_info),
+       PointerGetDatum(prsobj->prs)
+   );
+}
+
+text* 
+genhl(HLPRSTEXT * prs) {
+   text *out;
+   int len=128;
+   char *ptr;
+   HLWORD  *wrd=prs->words;
+
+   out = (text*)palloc( len );
+   ptr=((char*)out) + VARHDRSZ;
+
+   while( wrd - prs->words < prs->curwords ) {
+       while (  wrd->len + prs->stopsellen + prs->startsellen + (ptr - ((char*)out)) >= len ) {
+           int dist = ptr - ((char*)out);
+           len*= 2;
+           out = (text *) repalloc(out, len);
+           ptr=((char*)out) + dist;
+       }
+
+       if ( wrd->in && !wrd->skip && !wrd->repeated ) {
+           if ( wrd->replace ) {
+               *ptr=' ';
+               ptr++;
+           } else {
+               if (wrd->selected) {
+                   memcpy(ptr,prs->startsel,prs->startsellen);
+                   ptr+=prs->startsellen;
+               }
+               memcpy(ptr,wrd->word,wrd->len);
+               ptr+=wrd->len;
+               if (wrd->selected) {
+                   memcpy(ptr,prs->stopsel,prs->stopsellen);
+                   ptr+=prs->stopsellen;
+               }
+           }
+       }
+
+       if ( !wrd->repeated )
+           pfree(wrd->word);
+
+       wrd++;
+   }
+
+   VARATT_SIZEP(out)=ptr - ((char*)out);
+   return out; 
+}
+
+int  
+get_currcfg(void) {
+   Oid arg[1]={ TEXTOID };
+   const char *curlocale;
+   Datum pars[1];
+   bool isnull;
+   int stat;
+
+   if ( current_cfg_id > 0 )
+       return current_cfg_id;
+
+   SPI_connect();
+   if ( !plan_getcfg_bylocale ) {
+       plan_getcfg_bylocale=SPI_saveplan( SPI_prepare( "select oid from pg_ts_cfg where locale = $1 ", 1, arg ) );
+       if ( !plan_getcfg_bylocale )
+           elog(ERROR, "SPI_prepare() failed");
+   }
+
+   curlocale = setlocale(LC_CTYPE, NULL);
+   pars[0] = PointerGetDatum( char2text((char*)curlocale) );
+   stat = SPI_execp(plan_getcfg_bylocale, pars, " ", 1);
+
+   if ( stat < 0 )
+       elog (ERROR, "SPI_execp return %d", stat);
+   if ( SPI_processed > 0 )
+       current_cfg_id = DatumGetObjectId( SPI_getbinval(SPI_tuptable->vals[0], SPI_tuptable->tupdesc, 1, &isnull) );
+   else 
+       elog(ERROR,"Can't find tsearch config by locale");
+
+   pfree(DatumGetPointer(pars[0]));
+   SPI_finish();
+   return current_cfg_id;
+}
+
+PG_FUNCTION_INFO_V1(set_curcfg);
+Datum set_curcfg(PG_FUNCTION_ARGS);
+Datum
+set_curcfg(PG_FUNCTION_ARGS) {
+        findcfg(PG_GETARG_OID(0));
+        current_cfg_id=PG_GETARG_OID(0);
+        PG_RETURN_VOID();
+}
+                
+PG_FUNCTION_INFO_V1(set_curcfg_byname);
+Datum set_curcfg_byname(PG_FUNCTION_ARGS);
+Datum
+set_curcfg_byname(PG_FUNCTION_ARGS) {
+        text *name=PG_GETARG_TEXT_P(0);
+   
+        DirectFunctionCall1(
+                set_curcfg,
+                ObjectIdGetDatum( name2id_cfg(name) )
+        );
+        PG_FREE_IF_COPY(name, 0);
+        PG_RETURN_VOID();      
+}       
+
+PG_FUNCTION_INFO_V1(show_curcfg);
+Datum show_curcfg(PG_FUNCTION_ARGS);
+Datum
+show_curcfg(PG_FUNCTION_ARGS) {
+   PG_RETURN_OID( get_currcfg() ); 
+}
+
+PG_FUNCTION_INFO_V1(reset_tsearch);
+Datum reset_tsearch(PG_FUNCTION_ARGS);
+Datum
+reset_tsearch(PG_FUNCTION_ARGS) {
+   ts_error(NOTICE,"TSearch cache cleaned");
+   PG_RETURN_VOID(); 
+}


diff --git a/contrib/tsearch2/ts_cfg.h b/contrib/tsearch2/ts_cfg.h

new file mode 100644 (file)

index 0000000..01006c1


--- /dev/null
+++ b/contrib/tsearch2/ts_cfg.h
@@ -0,0 +1,68 @@
+#ifndef __TS_CFG_H__
+#define __TS_CFG_H__
+#include "postgres.h"
+#include "query.h"
+
+typedef struct {
+   int len;
+   Datum   *dict_id;
+} ListDictionary;
+
+typedef struct {
+   Oid id;
+   Oid prs_id;
+   int len;
+   ListDictionary  *map;   
+}  TSCfgInfo;
+
+Oid name2id_cfg(text *name);
+TSCfgInfo * findcfg(Oid id);
+void init_cfg(Oid id, TSCfgInfo *cfg);
+void reset_cfg(void);
+
+typedef struct {
+        uint16          len;
+   union {
+       uint16      pos;
+       uint16      *apos;
+   } pos;
+        char       *word;
+   uint32  alen;
+}       WORD;
+   
+typedef struct {
+        WORD       *words;
+        int4            lenwords;
+        int4            curwords;
+   int4        pos;
+}       PRSTEXT;
+
+typedef struct {
+        uint16    len;
+   uint8    selected:1,
+         in:1,
+         skip:1,
+         replace:1,
+         repeated:1;
+   uint8   type;
+        char      *word;
+   ITEM      *item;
+}       HLWORD;
+   
+typedef struct {
+        HLWORD       *words;
+        int4            lenwords;
+        int4            curwords;
+        char           *startsel;
+        char            *stopsel;
+        int2            startsellen;
+        int2            stopsellen;
+}       HLPRSTEXT;
+
+void hlparsetext(TSCfgInfo *cfg, HLPRSTEXT * prs, QUERYTYPE *query, char *buf, int4 buflen);
+text* genhl(HLPRSTEXT * prs);
+
+void parsetext_v2(TSCfgInfo *cfg, PRSTEXT * prs, char *buf, int4 buflen);
+int  get_currcfg(void);
+
+#endif


diff --git a/contrib/tsearch2/ts_stat.c b/contrib/tsearch2/ts_stat.c

new file mode 100644 (file)

index 0000000..9099981


--- /dev/null
+++ b/contrib/tsearch2/ts_stat.c
@@ -0,0 +1,412 @@
+/*
+ * stat functions
+ */
+
+#include "tsvector.h"
+#include "ts_stat.h"
+#include "funcapi.h"
+#include "catalog/pg_type.h"
+#include "executor/spi.h"
+#include "common.h"
+
+PG_FUNCTION_INFO_V1(tsstat_in);
+Datum           tsstat_in(PG_FUNCTION_ARGS);
+Datum           
+tsstat_in(PG_FUNCTION_ARGS) {
+   tsstat *stat=palloc(STATHDRSIZE);
+   stat->len=STATHDRSIZE;
+   stat->size=0;
+   PG_RETURN_POINTER(stat);
+}
+
+PG_FUNCTION_INFO_V1(tsstat_out);
+Datum           tsstat_out(PG_FUNCTION_ARGS);
+Datum           
+tsstat_out(PG_FUNCTION_ARGS) {
+   elog(ERROR,"Unimplemented");
+   PG_RETURN_NULL();
+}
+
+static WordEntry**
+SEI_realloc( WordEntry** in, uint32 *len ) {
+   if ( *len==0 || in==NULL ) {
+       *len=8;
+       in=palloc( sizeof(WordEntry*)* (*len) );
+   } else {
+       *len *= 2;
+       in=repalloc( in, sizeof(WordEntry*)* (*len) );
+   }
+   return in;
+}
+
+static int
+compareStatWord(StatEntry *a, WordEntry *b, tsstat *stat, tsvector *txt) {
+   if ( a->len == b->len ) 
+       return strncmp(
+           STATSTRPTR(stat) + a->pos,
+           STRPTR(txt) + b->pos,
+           a->len
+       );
+   return ( a->len > b->len ) ? 1 : -1;
+}
+
+static tsstat*
+formstat(tsstat *stat, tsvector *txt, WordEntry** entry, uint32 len) {
+   tsstat  *newstat;
+   uint32 totallen, nentry;
+   uint32  slen=0;
+   WordEntry   **ptr=entry;
+   char    *curptr;
+   StatEntry   *sptr,*nptr;
+
+   while(ptr-entry
+       slen += (*ptr)->len;
+       ptr++;
+   }
+
+   nentry=stat->size + len;
+   slen+=STATSTRSIZE(stat);
+   totallen=CALCSTATSIZE(nentry,slen);
+   newstat=palloc(totallen);
+   newstat->len=totallen;
+   newstat->size=nentry;
+
+   memcpy(STATSTRPTR(newstat), STATSTRPTR(stat), STATSTRSIZE(stat));
+   curptr=STATSTRPTR(newstat) + STATSTRSIZE(stat);
+
+   ptr=entry;
+   sptr=STATPTR(stat);
+   nptr=STATPTR(newstat);
+
+   if ( len == 1 ) {
+       StatEntry *StopLow = STATPTR(stat);
+       StatEntry *StopHigh = (StatEntry*)STATSTRPTR(stat);
+
+       while (StopLow < StopHigh) {
+           sptr=StopLow + (StopHigh - StopLow) / 2;
+           if ( compareStatWord(sptr,*ptr,stat,txt) < 0 )
+               StopLow = sptr + 1;
+           else
+               StopHigh = sptr; 
+       }
+       nptr =STATPTR(newstat) + (StopLow-STATPTR(stat));
+       memcpy( STATPTR(newstat), STATPTR(stat), sizeof(StatEntry) * (StopLow-STATPTR(stat)) );
+       nptr->nentry=POSDATALEN(txt,*ptr);
+       if ( nptr->nentry==0 )
+               nptr->nentry=1; 
+       nptr->ndoc=1;
+       nptr->len=(*ptr)->len;
+       memcpy(curptr, STRPTR(txt) + (*ptr)->pos, nptr->len);
+       nptr->pos = curptr - STATSTRPTR(newstat);
+       memcpy( nptr+1, StopLow, sizeof(StatEntry) * ( ((StatEntry*)STATSTRPTR(stat))-StopLow ) );
+   } else {
+       while( sptr-STATPTR(stat) < stat->size && ptr-entry
+           if ( compareStatWord(sptr,*ptr,stat,txt) < 0 ) {
+               memcpy(nptr, sptr, sizeof(StatEntry));
+               sptr++;
+           } else {
+               nptr->nentry=POSDATALEN(txt,*ptr);
+               if ( nptr->nentry==0 )
+                   nptr->nentry=1; 
+               nptr->ndoc=1;
+               nptr->len=(*ptr)->len;
+               memcpy(curptr, STRPTR(txt) + (*ptr)->pos, nptr->len);
+               nptr->pos = curptr - STATSTRPTR(newstat);
+               curptr += nptr->len;
+               ptr++;
+           }
+           nptr++;
+       }
+
+       memcpy( nptr, sptr, sizeof(StatEntry)*( stat->size - (sptr-STATPTR(stat)) ) ); 
+       
+       while(ptr-entry
+           nptr->nentry=POSDATALEN(txt,*ptr);
+           if ( nptr->nentry==0 )
+               nptr->nentry=1; 
+           nptr->ndoc=1;
+           nptr->len=(*ptr)->len;
+           memcpy(curptr, STRPTR(txt) + (*ptr)->pos, nptr->len);
+           nptr->pos = curptr - STATSTRPTR(newstat);
+           curptr += nptr->len;
+           ptr++; nptr++;
+       }
+   }
+
+   return newstat;
+} 
+
+PG_FUNCTION_INFO_V1(ts_accum);
+Datum           ts_accum(PG_FUNCTION_ARGS);
+Datum 
+ts_accum(PG_FUNCTION_ARGS) {
+   tsstat *newstat,*stat= (tsstat*)PG_GETARG_POINTER(0);
+   tsvector  *txt = (tsvector *) PG_DETOAST_DATUM(PG_GETARG_DATUM(1));
+   WordEntry   **newentry=NULL;
+   uint32  len=0, cur=0;
+   StatEntry   *sptr;
+   WordEntry   *wptr;
+
+   if ( stat==NULL || PG_ARGISNULL(0) ) { /* Init in first */ 
+       stat=palloc(STATHDRSIZE);
+       stat->len=STATHDRSIZE;
+       stat->size=0;
+   }
+
+   /* simple check of correctness */
+   if ( txt==NULL || PG_ARGISNULL(1) || txt->size==0 ) {
+       PG_FREE_IF_COPY(txt,1); 
+       PG_RETURN_POINTER(stat);
+   }
+
+   sptr=STATPTR(stat);
+   wptr=ARRPTR(txt);
+
+   if ( stat->size < 100*txt->size ) { /* merge */
+       while( sptr-STATPTR(stat) < stat->size && wptr-ARRPTR(txt) < txt->size ) {
+           int cmp = compareStatWord(sptr,wptr,stat,txt);
+           if ( cmp<0 ) {
+               sptr++;
+           } else if ( cmp==0 ) {
+               int n=POSDATALEN(txt,wptr);
+   
+               if (n==0) n=1;
+               sptr->ndoc++;
+               sptr->nentry +=n ;
+               sptr++; wptr++;
+           } else {
+               if ( cur==len )
+                   newentry=SEI_realloc(newentry, &len);
+               newentry[cur]=wptr;
+               wptr++; cur++;
+           }
+       }
+
+       while( wptr-ARRPTR(txt) < txt->size ) {
+           if ( cur==len )
+               newentry=SEI_realloc(newentry, &len);
+           newentry[cur]=wptr;
+           wptr++; cur++;
+       }
+   } else { /* search */
+       while( wptr-ARRPTR(txt) < txt->size ) {
+           StatEntry *StopLow = STATPTR(stat);
+           StatEntry *StopHigh = (StatEntry*)STATSTRPTR(stat);
+           int cmp;
+
+           while (StopLow < StopHigh) {
+               sptr=StopLow + (StopHigh - StopLow) / 2;
+               cmp =  compareStatWord(sptr,wptr,stat,txt);
+               if (cmp==0) {
+                   int n=POSDATALEN(txt,wptr);
+                   if (n==0) n=1;
+                   sptr->ndoc++;
+                   sptr->nentry +=n ;
+                   break;
+               } else if ( cmp < 0 )
+                   StopLow = sptr + 1;
+               else
+                   StopHigh = sptr; 
+           }
+       
+           if ( StopLow >= StopHigh ) { /* not found */
+               if ( cur==len )
+                   newentry=SEI_realloc(newentry, &len);
+               newentry[cur]=wptr;
+               cur++;
+           }
+           wptr++;
+       }   
+   }
+
+   
+   if ( cur==0 ) { /* no new words */ 
+       PG_FREE_IF_COPY(txt,1);
+       PG_RETURN_POINTER(stat);
+   }
+
+   newstat = formstat(stat, txt, newentry, cur);
+   pfree(newentry);
+   PG_FREE_IF_COPY(txt,1);
+   /* pfree(stat); */
+
+   PG_RETURN_POINTER(newstat);
+}
+
+typedef struct {
+   uint32  cur;
+   tsvector *stat;
+} StatStorage;
+
+static void
+ts_setup_firstcall(FuncCallContext  *funcctx, tsstat *stat) {
+   TupleDesc            tupdesc;
+   MemoryContext     oldcontext;
+   StatStorage     *st;
+   
+   oldcontext = MemoryContextSwitchTo(funcctx->multi_call_memory_ctx);
+   st=palloc( sizeof(StatStorage) );
+   st->cur=0;
+   st->stat=palloc( stat->len );
+   memcpy(st->stat, stat, stat->len);
+   funcctx->user_fctx = (void*)st;
+   tupdesc = RelationNameGetTupleDesc("statinfo");
+   funcctx->slot = TupleDescGetSlot(tupdesc);
+   funcctx->attinmeta = TupleDescGetAttInMetadata(tupdesc);
+   MemoryContextSwitchTo(oldcontext);
+}
+
+
+static Datum
+ts_process_call(FuncCallContext  *funcctx) {
+   StatStorage     *st;
+   st=(StatStorage*)funcctx->user_fctx;
+
+   if ( st->cur < st->stat->size ) {
+       Datum result;
+       char* values[3];
+       char    ndoc[16];
+       char    nentry[16];
+       StatEntry *entry=STATPTR(st->stat) + st->cur;
+       HeapTuple    tuple;
+
+       values[1]=ndoc;
+       sprintf(ndoc,"%d",entry->ndoc);
+       values[2]=nentry;
+       sprintf(nentry,"%d",entry->nentry);
+       values[0]=palloc( entry->len+1 );
+       memcpy( values[0], STATSTRPTR(st->stat)+entry->pos, entry->len);
+       (values[0])[entry->len]='\0';
+
+       tuple = BuildTupleFromCStrings(funcctx->attinmeta, values);
+       result = TupleGetDatum(funcctx->slot, tuple);
+
+       pfree(values[0]);
+       st->cur++;
+       return result;  
+   } else {
+       pfree(st->stat);
+       pfree(st);
+   }
+   
+   return (Datum)0;
+}
+
+PG_FUNCTION_INFO_V1(ts_accum_finish);
+Datum           ts_accum_finish(PG_FUNCTION_ARGS);
+Datum 
+ts_accum_finish(PG_FUNCTION_ARGS) {
+   FuncCallContext  *funcctx;
+   Datum result;
+
+   if (SRF_IS_FIRSTCALL()) {
+       funcctx = SRF_FIRSTCALL_INIT();
+       ts_setup_firstcall(funcctx, (tsstat*)PG_GETARG_POINTER(0) );
+   }
+
+   funcctx = SRF_PERCALL_SETUP();
+   if (  (result=ts_process_call(funcctx)) != (Datum)0 )
+       SRF_RETURN_NEXT(funcctx, result);
+   SRF_RETURN_DONE(funcctx);
+}
+
+static Oid tiOid=InvalidOid;
+static void 
+get_ti_Oid(void) {
+   int ret;
+   bool isnull; 
+
+   if ( (ret = SPI_exec("select oid from pg_type where typname='tsvector'",1)) < 0 )   
+       elog(ERROR, "SPI_exec to get tsvector oid returns %d", ret);
+
+   if ( SPI_processed<0 )
+       elog(ERROR, "There is no tsvector type");
+   tiOid = DatumGetObjectId( SPI_getbinval(SPI_tuptable->vals[0], SPI_tuptable->tupdesc, 1, &isnull) );
+   if ( tiOid==InvalidOid )
+       elog(ERROR, "tsvector type has InvalidOid");
+}
+
+static tsstat*
+ts_stat_sql(text *txt) {
+   char *query=text2char(txt);
+   int i;
+   tsstat *newstat,*stat;
+   bool isnull;
+   Portal portal;
+   void    *plan;
+
+   if ( tiOid==InvalidOid ) 
+       get_ti_Oid();
+
+   if ( (plan = SPI_prepare(query,0,NULL))==NULL )
+       elog(ERROR, "SPI_prepare('%s') returns NULL",query);
+
+   if ( (portal = SPI_cursor_open(NULL, plan, NULL, NULL)) == NULL )
+       elog(ERROR, "SPI_cursor_open('%s') returns NULL",query);
+
+   SPI_cursor_fetch(portal, true, 100);
+
+   if ( SPI_tuptable->tupdesc->natts != 1 )
+       elog(ERROR, "Number of fields doesn't equal to 1");
+
+   if ( SPI_gettypeid(SPI_tuptable->tupdesc, 1) != tiOid )
+       elog(ERROR, "Column isn't of tsvector type");
+
+   stat=palloc(STATHDRSIZE);
+   stat->len=STATHDRSIZE;
+   stat->size=0;
+
+   while(SPI_processed>0) {
+       for(i=0;i
+           Datum data=SPI_getbinval(SPI_tuptable->vals[i], SPI_tuptable->tupdesc, 1, &isnull);
+
+           if ( !isnull ) {
+               newstat = (tsstat*)DatumGetPointer(DirectFunctionCall2(
+                   ts_accum,
+                   PointerGetDatum(stat),
+                   data
+               ));
+               if ( stat!=newstat && stat )
+                   pfree(stat);
+               stat=newstat;
+           }
+       } 
+
+       SPI_freetuptable(SPI_tuptable);
+       SPI_cursor_fetch(portal, true, 100);        
+   }   
+
+   SPI_freetuptable(SPI_tuptable);
+   SPI_cursor_close(portal);
+   SPI_freeplan(plan);
+   pfree(query);
+
+   return stat;    
+}
+
+PG_FUNCTION_INFO_V1(ts_stat);
+Datum           ts_stat(PG_FUNCTION_ARGS);
+Datum 
+ts_stat(PG_FUNCTION_ARGS) {
+   FuncCallContext  *funcctx;
+   Datum result;
+
+   if (SRF_IS_FIRSTCALL()) {
+       tsstat *stat;
+       text    *txt=PG_GETARG_TEXT_P(0);
+   
+       funcctx = SRF_FIRSTCALL_INIT();
+       SPI_connect();
+       stat = ts_stat_sql(txt);
+       PG_FREE_IF_COPY(txt,0); 
+       ts_setup_firstcall(funcctx, stat );
+       SPI_finish();
+   }
+
+   funcctx = SRF_PERCALL_SETUP();
+   if (  (result=ts_process_call(funcctx)) != (Datum)0 )
+       SRF_RETURN_NEXT(funcctx, result);
+   SRF_RETURN_DONE(funcctx);
+}
+
+


diff --git a/contrib/tsearch2/ts_stat.h b/contrib/tsearch2/ts_stat.h

new file mode 100644 (file)

index 0000000..c32b17a


--- /dev/null
+++ b/contrib/tsearch2/ts_stat.h
@@ -0,0 +1,32 @@
+#ifndef __TXTIDX_STAT_H__
+#define __TXTIDX_STAT_H__
+
+#include "postgres.h"
+
+#include "access/gist.h"
+#include "access/itup.h"
+#include "utils/elog.h"
+#include "utils/palloc.h"
+#include "utils/builtins.h"
+#include "storage/bufpage.h"
+
+typedef struct {
+   uint32  len;
+   uint32  pos;
+   uint32  ndoc;   
+   uint32  nentry; 
+}  StatEntry;
+
+typedef struct {
+   int4        len;
+   int4        size;
+   char        data[1];
+}  tsstat;
+
+#define STATHDRSIZE (sizeof(int4)*2)
+#define CALCSTATSIZE(x, lenstr) ( x * sizeof(StatEntry) + STATHDRSIZE + lenstr )
+#define STATPTR(x) ( (StatEntry*) ( (char*)x + STATHDRSIZE ) )
+#define STATSTRPTR(x)  ( (char*)x + STATHDRSIZE + ( sizeof(StatEntry) * ((tsvector*)x)->size ) )
+#define STATSTRSIZE(x) ( ((tsvector*)x)->len - STATHDRSIZE - ( sizeof(StatEntry) * ((tsvector*)x)->size ) )
+
+#endif


diff --git a/contrib/tsearch2/tsearch.sql._in b/contrib/tsearch2/tsearch.sql._in

new file mode 100644 (file)

index 0000000..91ffbc8


--- /dev/null
+++ b/contrib/tsearch2/tsearch.sql._in
@@ -0,0 +1,674 @@
+-- Adjust this setting to control where the objects get CREATEd.
+SET search_path = public;
+
+BEGIN;
+
+--dict conf
+CREATE TABLE pg_ts_dict (
+   dict_name   text not null primary key,
+   dict_init   oid,
+   dict_initoption text,
+   dict_lexize oid not null,
+   dict_comment    text
+) with oids;
+
+--dict interface
+CREATE FUNCTION lexize(oid, text) 
+   returns _text
+   as 'MODULE_PATHNAME'
+   language 'C'
+   with (isstrict);
+
+CREATE FUNCTION lexize(text, text)
+        returns _text
+        as 'MODULE_PATHNAME', 'lexize_byname'
+        language 'C'
+        with (isstrict);
+
+CREATE FUNCTION lexize(text)
+        returns _text
+        as 'MODULE_PATHNAME', 'lexize_bycurrent'
+        language 'C'
+        with (isstrict);
+
+CREATE FUNCTION set_curdict(int)
+   returns void
+   as 'MODULE_PATHNAME'
+   language 'C'
+   with (isstrict);
+
+CREATE FUNCTION set_curdict(text)
+   returns void
+   as 'MODULE_PATHNAME', 'set_curdict_byname'
+   language 'C'
+   with (isstrict);
+
+--built-in dictionaries
+CREATE FUNCTION dex_init(text)
+   returns internal
+   as 'MODULE_PATHNAME' 
+   language 'C';
+
+CREATE FUNCTION dex_lexize(internal,internal,int4)
+   returns internal
+   as 'MODULE_PATHNAME'
+   language 'C'
+   with (isstrict);
+
+insert into pg_ts_dict select 
+   'simple', 
+   (select oid from pg_proc where proname='dex_init'),
+   null,
+   (select oid from pg_proc where proname='dex_lexize'),
+   'Simple example of dictionary.'
+;
+    
+CREATE FUNCTION snb_en_init(text)
+   returns internal
+   as 'MODULE_PATHNAME' 
+   language 'C';
+
+CREATE FUNCTION snb_lexize(internal,internal,int4)
+   returns internal
+   as 'MODULE_PATHNAME'
+   language 'C'
+   with (isstrict);
+
+insert into pg_ts_dict select 
+   'en_stem', 
+   (select oid from pg_proc where proname='snb_en_init'),
+   'DATA_PATH/english.stop',
+   (select oid from pg_proc where proname='snb_lexize'),
+   'English Stemmer. Snowball.'
+;
+
+CREATE FUNCTION snb_ru_init(text)
+   returns internal
+   as 'MODULE_PATHNAME' 
+   language 'C';
+
+insert into pg_ts_dict select 
+   'ru_stem', 
+   (select oid from pg_proc where proname='snb_ru_init'),
+   'DATA_PATH/russian.stop',
+   (select oid from pg_proc where proname='snb_lexize'),
+   'Russian Stemmer. Snowball.'
+;
+    
+CREATE FUNCTION spell_init(text)
+   returns internal
+   as 'MODULE_PATHNAME' 
+   language 'C';
+
+CREATE FUNCTION spell_lexize(internal,internal,int4)
+   returns internal
+   as 'MODULE_PATHNAME'
+   language 'C'
+   with (isstrict);
+
+insert into pg_ts_dict select 
+   'ispell_template', 
+   (select oid from pg_proc where proname='spell_init'),
+   null,
+   (select oid from pg_proc where proname='spell_lexize'),
+   'ISpell interface. Must have .dict and .aff files'
+;
+
+CREATE FUNCTION syn_init(text)
+   returns internal
+   as 'MODULE_PATHNAME' 
+   language 'C';
+
+CREATE FUNCTION syn_lexize(internal,internal,int4)
+   returns internal
+   as 'MODULE_PATHNAME'
+   language 'C'
+   with (isstrict);
+
+insert into pg_ts_dict select 
+   'synonym', 
+   (select oid from pg_proc where proname='syn_init'),
+   null,
+   (select oid from pg_proc where proname='syn_lexize'),
+   'Example of synonym dictionary'
+;
+
+--dict conf
+CREATE TABLE pg_ts_parser (
+   prs_name    text not null primary key,
+   prs_start   oid not null,
+   prs_nexttoken   oid not null,
+   prs_end     oid not null,
+   prs_headline    oid not null,
+   prs_lextype oid not null,
+   prs_comment text
+) with oids;
+
+--sql-level interface
+CREATE TYPE tokentype 
+   as (tokid int4, alias text, descr text); 
+
+CREATE FUNCTION token_type(int4)
+   returns setof tokentype
+   as 'MODULE_PATHNAME'
+   language 'C'
+   with (isstrict);
+
+CREATE FUNCTION token_type(text)
+   returns setof tokentype
+   as 'MODULE_PATHNAME', 'token_type_byname'
+   language 'C'
+   with (isstrict);
+
+CREATE FUNCTION token_type()
+   returns setof tokentype
+   as 'MODULE_PATHNAME', 'token_type_current'
+   language 'C'
+   with (isstrict);
+
+CREATE FUNCTION set_curprs(int)
+   returns void
+   as 'MODULE_PATHNAME'
+   language 'C'
+   with (isstrict);
+
+CREATE FUNCTION set_curprs(text)
+   returns void
+   as 'MODULE_PATHNAME', 'set_curprs_byname'
+   language 'C'
+   with (isstrict);
+
+CREATE TYPE tokenout 
+   as (tokid int4, token text);
+
+CREATE FUNCTION parse(oid,text)
+   returns setof tokenout
+   as 'MODULE_PATHNAME'
+   language 'C'
+   with (isstrict);
+ 
+CREATE FUNCTION parse(text,text)
+   returns setof tokenout
+   as 'MODULE_PATHNAME', 'parse_byname'
+   language 'C'
+   with (isstrict);
+ 
+CREATE FUNCTION parse(text)
+   returns setof tokenout
+   as 'MODULE_PATHNAME', 'parse_current'
+   language 'C'
+   with (isstrict);
+ 
+--default parser
+CREATE FUNCTION prsd_start(internal,int4)
+   returns internal
+   as 'MODULE_PATHNAME'
+   language 'C';
+
+CREATE FUNCTION prsd_getlexeme(internal,internal,internal)
+   returns int4
+   as 'MODULE_PATHNAME'
+   language 'C';
+
+CREATE FUNCTION prsd_end(internal)
+   returns void
+   as 'MODULE_PATHNAME'
+   language 'C';
+
+CREATE FUNCTION prsd_lextype(internal)
+   returns internal
+   as 'MODULE_PATHNAME'
+   language 'C';
+
+CREATE FUNCTION prsd_headline(internal,internal,internal)
+   returns internal
+   as 'MODULE_PATHNAME'
+   language 'C';
+
+insert into pg_ts_parser select
+   'default',
+   (select oid from pg_proc where proname='prsd_start'),   
+   (select oid from pg_proc where proname='prsd_getlexeme'),   
+   (select oid from pg_proc where proname='prsd_end'), 
+   (select oid from pg_proc where proname='prsd_headline'),
+   (select oid from pg_proc where proname='prsd_lextype'),
+   'Parser from OpenFTS v0.34'
+;  
+
+--tsearch config
+
+CREATE TABLE pg_ts_cfg (
+   ts_name     text not null primary key,
+   prs_name    text not null,
+   locale      text
+) with oids;
+
+CREATE TABLE pg_ts_cfgmap (
+   ts_name     text not null,
+   tok_alias   text not null,
+   dict_name   text[],
+   primary key (ts_name,tok_alias)
+) with oids;
+
+CREATE FUNCTION set_curcfg(int)
+   returns void
+   as 'MODULE_PATHNAME'
+   language 'C'
+   with (isstrict);
+
+CREATE FUNCTION set_curcfg(text)
+   returns void
+   as 'MODULE_PATHNAME', 'set_curcfg_byname'
+   language 'C'
+   with (isstrict);
+
+CREATE FUNCTION show_curcfg()
+   returns oid
+   as 'MODULE_PATHNAME'
+   language 'C'
+   with (isstrict);
+
+insert into pg_ts_cfg values ('default', 'default','C');
+insert into pg_ts_cfg values ('default_russian', 'default','ru_RU.KOI8-R');
+insert into pg_ts_cfg values ('simple', 'default');
+
+copy pg_ts_cfgmap from stdin;
+default    lword   {en_stem}
+default    nlword  {simple}
+default    word    {simple}
+default    email   {simple}
+default    url {simple}
+default    host    {simple}
+default    sfloat  {simple}
+default    version {simple}
+default    part_hword  {simple}
+default    nlpart_hword    {simple}
+default    lpart_hword {en_stem}
+default    hword   {simple}
+default    lhword  {en_stem}
+default    nlhword {simple}
+default    uri {simple}
+default    file    {simple}
+default    float   {simple}
+default    int {simple}
+default    uint    {simple}
+default_russian    lword   {en_stem}
+default_russian    nlword  {ru_stem}
+default_russian    word    {ru_stem}
+default_russian    email   {simple}
+default_russian    url {simple}
+default_russian    host    {simple}
+default_russian    sfloat  {simple}
+default_russian    version {simple}
+default_russian    part_hword  {simple}
+default_russian    nlpart_hword    {ru_stem}
+default_russian    lpart_hword {en_stem}
+default_russian    hword   {ru_stem}
+default_russian    lhword  {en_stem}
+default_russian    nlhword {ru_stem}
+default_russian    uri {simple}
+default_russian    file    {simple}
+default_russian    float   {simple}
+default_russian    int {simple}
+default_russian    uint    {simple}
+simple lword   {simple}
+simple nlword  {simple}
+simple word    {simple}
+simple email   {simple}
+simple url {simple}
+simple host    {simple}
+simple sfloat  {simple}
+simple version {simple}
+simple part_hword  {simple}
+simple nlpart_hword    {simple}
+simple lpart_hword {simple}
+simple hword   {simple}
+simple lhword  {simple}
+simple nlhword {simple}
+simple uri {simple}
+simple file    {simple}
+simple float   {simple}
+simple int {simple}
+simple uint    {simple}
+\.
+
+--tsvector type
+CREATE FUNCTION tsvector_in(cstring)
+RETURNS tsvector
+AS 'MODULE_PATHNAME'
+LANGUAGE 'C' with (isstrict);
+
+CREATE FUNCTION tsvector_out(tsvector)
+RETURNS cstring
+AS 'MODULE_PATHNAME'
+LANGUAGE 'C' with (isstrict);
+
+CREATE TYPE tsvector (
+        INTERNALLENGTH = -1,
+        INPUT = tsvector_in,
+        OUTPUT = tsvector_out,
+        STORAGE = extended
+);
+
+CREATE FUNCTION length(tsvector)
+RETURNS int4
+AS 'MODULE_PATHNAME', 'tsvector_length'
+LANGUAGE 'C' with (isstrict,iscachable);
+
+CREATE FUNCTION to_tsvector(oid, text)
+RETURNS tsvector
+AS 'MODULE_PATHNAME'
+LANGUAGE 'C' with (isstrict,iscachable);
+
+CREATE FUNCTION to_tsvector(text, text)
+RETURNS tsvector
+AS 'MODULE_PATHNAME', 'to_tsvector_name'
+LANGUAGE 'C' with (isstrict,iscachable);
+
+CREATE FUNCTION to_tsvector(text)
+RETURNS tsvector
+AS 'MODULE_PATHNAME', 'to_tsvector_current'
+LANGUAGE 'C' with (isstrict,iscachable);
+
+CREATE FUNCTION strip(tsvector)
+RETURNS tsvector
+AS 'MODULE_PATHNAME'
+LANGUAGE 'C' with (isstrict,iscachable);
+
+CREATE FUNCTION setweight(tsvector,"char")
+RETURNS tsvector
+AS 'MODULE_PATHNAME'
+LANGUAGE 'C' with (isstrict,iscachable);
+
+CREATE FUNCTION concat(tsvector,tsvector)
+RETURNS tsvector
+AS 'MODULE_PATHNAME'
+LANGUAGE 'C' with (isstrict,iscachable);
+
+CREATE OPERATOR || (
+        LEFTARG = tsvector,
+        RIGHTARG = tsvector,
+        PROCEDURE = concat
+);
+
+--query type
+CREATE FUNCTION tsquery_in(cstring)
+RETURNS tsquery
+AS 'MODULE_PATHNAME'
+LANGUAGE 'C' with (isstrict);
+
+CREATE FUNCTION tsquery_out(tsquery)
+RETURNS cstring
+AS 'MODULE_PATHNAME'
+LANGUAGE 'C' with (isstrict);
+
+CREATE TYPE tsquery (
+        INTERNALLENGTH = -1,
+        INPUT = tsquery_in,
+        OUTPUT = tsquery_out
+);
+
+CREATE FUNCTION querytree(tsquery)
+RETURNS text
+AS 'MODULE_PATHNAME', 'tsquerytree'
+LANGUAGE 'C' with (isstrict);
+
+CREATE FUNCTION to_tsquery(oid, text)
+RETURNS tsquery
+AS 'MODULE_PATHNAME'
+LANGUAGE 'c' with (isstrict,iscachable);
+
+CREATE FUNCTION to_tsquery(text, text)
+RETURNS tsquery
+AS 'MODULE_PATHNAME','to_tsquery_name'
+LANGUAGE 'c' with (isstrict,iscachable);
+
+CREATE FUNCTION to_tsquery(text)
+RETURNS tsquery
+AS 'MODULE_PATHNAME','to_tsquery_current'
+LANGUAGE 'c' with (isstrict,iscachable);
+
+--operations
+CREATE FUNCTION exectsq(tsvector, tsquery)
+RETURNS bool
+AS 'MODULE_PATHNAME'
+LANGUAGE 'C' with (isstrict, iscachable);
+  
+COMMENT ON FUNCTION exectsq(tsvector, tsquery) IS 'boolean operation with text index';
+
+CREATE FUNCTION rexectsq(tsquery, tsvector)
+RETURNS bool
+AS 'MODULE_PATHNAME'
+LANGUAGE 'C' with (isstrict, iscachable);
+
+COMMENT ON FUNCTION rexectsq(tsquery, tsvector) IS 'boolean operation with text index';
+
+CREATE OPERATOR @@ (
+        LEFTARG = tsvector,
+        RIGHTARG = tsquery,
+        PROCEDURE = exectsq,
+        COMMUTATOR = '@@',
+        RESTRICT = contsel,
+        JOIN = contjoinsel
+);
+CREATE OPERATOR @@ (
+        LEFTARG = tsquery,
+        RIGHTARG = tsvector,
+        PROCEDURE = rexectsq,
+        COMMUTATOR = '@@',
+        RESTRICT = contsel,
+        JOIN = contjoinsel
+);
+
+--Trigger
+CREATE FUNCTION tsearch2()
+RETURNS trigger
+AS 'MODULE_PATHNAME'
+LANGUAGE 'C';
+
+--Relevation
+CREATE FUNCTION rank(float4[], tsvector, tsquery)
+RETURNS float4
+AS 'MODULE_PATHNAME'
+LANGUAGE 'C' WITH (isstrict, iscachable);
+
+CREATE FUNCTION rank(float4[], tsvector, tsquery, int4)
+RETURNS float4
+AS 'MODULE_PATHNAME'
+LANGUAGE 'C' WITH (isstrict, iscachable);
+
+CREATE FUNCTION rank(tsvector, tsquery)
+RETURNS float4
+AS 'MODULE_PATHNAME', 'rank_def'
+LANGUAGE 'C' WITH (isstrict, iscachable);
+
+CREATE FUNCTION rank(tsvector, tsquery, int4)
+RETURNS float4
+AS 'MODULE_PATHNAME', 'rank_def'
+LANGUAGE 'C' WITH (isstrict, iscachable);
+
+CREATE FUNCTION rank_cd(int4, tsvector, tsquery)
+RETURNS float4
+AS 'MODULE_PATHNAME'
+LANGUAGE 'C' WITH (isstrict, iscachable);
+
+CREATE FUNCTION rank_cd(int4, tsvector, tsquery, int4)
+RETURNS float4
+AS 'MODULE_PATHNAME'
+LANGUAGE 'C' WITH (isstrict, iscachable);
+
+CREATE FUNCTION rank_cd(tsvector, tsquery)
+RETURNS float4
+AS 'MODULE_PATHNAME', 'rank_cd_def'
+LANGUAGE 'C' WITH (isstrict, iscachable);
+
+CREATE FUNCTION rank_cd(tsvector, tsquery, int4)
+RETURNS float4
+AS 'MODULE_PATHNAME', 'rank_cd_def'
+LANGUAGE 'C' WITH (isstrict, iscachable);
+
+CREATE FUNCTION headline(oid, text, tsquery, text)
+RETURNS text
+AS 'MODULE_PATHNAME', 'headline'
+LANGUAGE 'C' WITH (isstrict, iscachable);
+
+CREATE FUNCTION headline(oid, text, tsquery)
+RETURNS text
+AS 'MODULE_PATHNAME', 'headline'
+LANGUAGE 'C' WITH (isstrict, iscachable);
+
+CREATE FUNCTION headline(text, text, tsquery, text)
+RETURNS text
+AS 'MODULE_PATHNAME', 'headline_byname'
+LANGUAGE 'C' WITH (isstrict, iscachable);
+
+CREATE FUNCTION headline(text, text, tsquery)
+RETURNS text
+AS 'MODULE_PATHNAME', 'headline_byname'
+LANGUAGE 'C' WITH (isstrict, iscachable);
+
+CREATE FUNCTION headline(text, tsquery, text)
+RETURNS text
+AS 'MODULE_PATHNAME', 'headline_current'
+LANGUAGE 'C' WITH (isstrict, iscachable);
+
+CREATE FUNCTION headline(text, tsquery)
+RETURNS text
+AS 'MODULE_PATHNAME', 'headline_current'
+LANGUAGE 'C' WITH (isstrict, iscachable);
+
+--GiST
+--GiST key type 
+CREATE FUNCTION gtsvector_in(cstring)
+RETURNS gtsvector
+AS 'MODULE_PATHNAME'
+LANGUAGE 'C' with (isstrict);
+
+CREATE FUNCTION gtsvector_out(gtsvector)
+RETURNS cstring
+AS 'MODULE_PATHNAME'
+LANGUAGE 'C' with (isstrict);
+
+CREATE TYPE gtsvector (
+        INTERNALLENGTH = -1,
+        INPUT = gtsvector_in,
+        OUTPUT = gtsvector_out
+);
+
+-- support FUNCTIONs
+CREATE FUNCTION gtsvector_consistent(gtsvector,internal,int4)
+RETURNS bool
+AS 'MODULE_PATHNAME'
+LANGUAGE 'C';
+  
+CREATE FUNCTION gtsvector_compress(internal)
+RETURNS internal
+AS 'MODULE_PATHNAME'
+LANGUAGE 'C';
+
+CREATE FUNCTION gtsvector_decompress(internal)
+RETURNS internal
+AS 'MODULE_PATHNAME'
+LANGUAGE 'C';
+
+CREATE FUNCTION gtsvector_penalty(internal,internal,internal)
+RETURNS internal
+AS 'MODULE_PATHNAME'
+LANGUAGE 'C' with (isstrict);
+
+CREATE FUNCTION gtsvector_picksplit(internal, internal)
+RETURNS internal
+AS 'MODULE_PATHNAME'
+LANGUAGE 'C';
+
+CREATE FUNCTION gtsvector_union(bytea, internal)
+RETURNS _int4
+AS 'MODULE_PATHNAME'
+LANGUAGE 'C';
+
+CREATE FUNCTION gtsvector_same(gtsvector, gtsvector, internal)
+RETURNS internal
+AS 'MODULE_PATHNAME'
+LANGUAGE 'C';
+
+-- CREATE the OPERATOR class
+CREATE OPERATOR CLASS gist_tsvector_ops
+DEFAULT FOR TYPE tsvector USING gist
+AS
+        OPERATOR        1       @@ (tsvector, tsquery)  RECHECK ,
+        FUNCTION        1       gtsvector_consistent (gtsvector, internal, int4),
+        FUNCTION        2       gtsvector_union (bytea, internal),
+        FUNCTION        3       gtsvector_compress (internal),
+        FUNCTION        4       gtsvector_decompress (internal),
+        FUNCTION        5       gtsvector_penalty (internal, internal, internal),
+        FUNCTION        6       gtsvector_picksplit (internal, internal),
+        FUNCTION        7       gtsvector_same (gtsvector, gtsvector, internal),
+        STORAGE         gtsvector;
+
+
+--stat info
+CREATE TYPE statinfo 
+   as (word text, ndoc int4, nentry int4);
+
+--REATE FUNCTION tsstat_in(cstring)
+--RETURNS tsstat
+--AS 'MODULE_PATHNAME'
+--LANGUAGE 'C' with (isstrict);
+--
+--CREATE FUNCTION tsstat_out(tsstat)
+--RETURNS cstring
+--AS 'MODULE_PATHNAME'
+--LANGUAGE 'C' with (isstrict);
+--
+--CREATE TYPE tsstat (
+--        INTERNALLENGTH = -1,
+--        INPUT = tsstat_in,
+--        OUTPUT = tsstat_out,
+--        STORAGE = plain
+--);
+--
+--CREATE FUNCTION ts_accum(tsstat,tsvector)
+--RETURNS tsstat
+--AS 'MODULE_PATHNAME'
+--LANGUAGE 'C' with (isstrict);
+--
+--CREATE FUNCTION ts_accum_finish(tsstat)
+-- returns setof statinfo
+-- as 'MODULE_PATHNAME'
+-- language 'C'
+-- with (isstrict);
+--
+--CREATE AGGREGATE stat (
+-- BASETYPE=tsvector,
+-- SFUNC=ts_accum,
+-- STYPE=tsstat,
+-- FINALFUNC = ts_accum_finish,
+-- initcond = ''
+--); 
+
+CREATE FUNCTION stat(text)
+   returns setof statinfo
+   as 'MODULE_PATHNAME', 'ts_stat'
+   language 'C'
+   with (isstrict);
+
+--reset - just for debuging
+CREATE FUNCTION reset_tsearch()
+        returns void
+        as 'MODULE_PATHNAME'
+        language 'C'
+        with (isstrict);
+
+--get cover (debug for rank_cd)
+CREATE FUNCTION get_covers(tsvector,tsquery)
+        returns text
+        as 'MODULE_PATHNAME'
+        language 'C'
+        with (isstrict);
+
+
+--example of ISpell dictionary
+--update pg_ts_dict set dict_initoption='DictFile="/usr/local/share/ispell/russian.dict" ,AffFile ="/usr/local/share/ispell/russian.aff", StopFile="/usr/local/share/ispell/russian.stop"' where dict_id=4;
+--example of synonym dict
+--update pg_ts_dict set dict_initoption='/usr/local/share/ispell/english.syn' where dict_id=5;
+END;


diff --git a/contrib/tsearch2/tsvector.c b/contrib/tsearch2/tsvector.c

new file mode 100644 (file)

index 0000000..ff0794d


--- /dev/null
+++ b/contrib/tsearch2/tsvector.c
@@ -0,0 +1,804 @@
+/*
+ * In/Out definitions for tsvector type
+ * Internal structure:
+ * string of values, array of position lexem in string and it's length
+ * Teodor Sigaev 
+ */
+#include "postgres.h"
+
+#include "access/gist.h"
+#include "access/itup.h"
+#include "utils/elog.h"
+#include "utils/palloc.h"
+#include "utils/builtins.h"
+#include "storage/bufpage.h"
+#include "executor/spi.h"
+#include "commands/trigger.h"
+#include "nodes/pg_list.h"
+#include "catalog/namespace.h"
+
+#include "utils/pg_locale.h"
+
+#include              /* tolower */
+#include "tsvector.h"
+#include "query.h"
+#include "ts_cfg.h"
+#include "common.h"
+
+PG_FUNCTION_INFO_V1(tsvector_in);
+Datum      tsvector_in(PG_FUNCTION_ARGS);
+
+PG_FUNCTION_INFO_V1(tsvector_out);
+Datum      tsvector_out(PG_FUNCTION_ARGS);
+
+PG_FUNCTION_INFO_V1(to_tsvector);
+Datum      to_tsvector(PG_FUNCTION_ARGS);
+PG_FUNCTION_INFO_V1(to_tsvector_current);
+Datum      to_tsvector_current(PG_FUNCTION_ARGS);
+PG_FUNCTION_INFO_V1(to_tsvector_name);
+Datum      to_tsvector_name(PG_FUNCTION_ARGS);
+
+PG_FUNCTION_INFO_V1(tsearch2);
+Datum      tsearch2(PG_FUNCTION_ARGS);
+
+PG_FUNCTION_INFO_V1(tsvector_length);
+Datum      tsvector_length(PG_FUNCTION_ARGS);
+
+/*
+ * in/out text index type
+ */
+static int 
+comparePos(const void *a, const void *b) {
+   if ( ((WordEntryPos *) a)->pos == ((WordEntryPos *) b)->pos )
+       return 1;
+   return ( ((WordEntryPos *) a)->pos > ((WordEntryPos *) b)->pos ) ? 1 : -1;
+}
+
+static int
+uniquePos(WordEntryPos *a, int4 l) {
+   WordEntryPos *ptr, *res;
+
+   res=a;
+   if (l==1)
+       return l;
+
+   qsort((void *) a, l, sizeof(WordEntryPos), comparePos);
+
+   ptr = a + 1;
+   while (ptr - a < l) {
+       if ( ptr->pos != res->pos ) {
+           res++;
+           res->pos = ptr->pos;
+           res->weight = ptr->weight;
+           if ( res-a >= MAXNUMPOS-1 || res->pos == MAXENTRYPOS-1 )
+               break;
+       } else if ( ptr->weight > res->weight )
+           res->weight = ptr->weight;
+       ptr++;
+   }
+   return res + 1 - a;
+}
+
+static char *BufferStr;
+static int
+compareentry(const void *a, const void *b)
+{
+   if ( ((WordEntryIN *) a)->entry.len == ((WordEntryIN *) b)->entry.len)
+   {
+       return strncmp(
+                      &BufferStr[((WordEntryIN *) a)->entry.pos],
+                      &BufferStr[((WordEntryIN *) b)->entry.pos],
+                      ((WordEntryIN *) a)->entry.len);
+   }
+   return ( ((WordEntryIN *) a)->entry.len > ((WordEntryIN *) b)->entry.len ) ? 1 : -1;
+}
+
+static int
+uniqueentry(WordEntryIN * a, int4 l, char *buf, int4 *outbuflen)
+{
+   WordEntryIN  *ptr,
+              *res;
+
+   res = a;
+   if (l == 1) {
+       if ( a->entry.haspos ) {
+           *(uint16*)(a->pos) = uniquePos( &(a->pos[1]), *(uint16*)(a->pos));
+           *outbuflen = SHORTALIGN(res->entry.len) + (*(uint16*)(a->pos) +1 )*sizeof(WordEntryPos);
+       }
+       return l;
+   }
+
+   ptr = a + 1;
+   BufferStr = buf;
+   qsort((void *) a, l, sizeof(WordEntryIN), compareentry);
+
+   while (ptr - a < l)
+   {
+       if (!(ptr->entry.len == res->entry.len &&
+             strncmp(&buf[ptr->entry.pos], &buf[res->entry.pos], res->entry.len) == 0))
+       {
+           if ( res->entry.haspos ) {
+               *(uint16*)(res->pos) = uniquePos( &(res->pos[1]), *(uint16*)(res->pos));
+               *outbuflen += *(uint16*)(res->pos) * sizeof(WordEntryPos);
+           }
+           *outbuflen += SHORTALIGN(res->entry.len);
+           res++;
+           memcpy(res,ptr,sizeof(WordEntryIN));
+       } else if ( ptr->entry.haspos ){
+           if ( res->entry.haspos ) {
+               int4 len=*(uint16*)(ptr->pos) + 1 + *(uint16*)(res->pos);
+               res->pos=(WordEntryPos*)repalloc( res->pos, len*sizeof(WordEntryPos));
+               memcpy( &(res->pos[ *(uint16*)(res->pos) + 1 ]), 
+                   &(ptr->pos[1]), *(uint16*)(ptr->pos) * sizeof(WordEntryPos));
+               *(uint16*)(res->pos) += *(uint16*)(ptr->pos);
+               pfree( ptr->pos );
+           } else {
+               res->entry.haspos=1;
+               res->pos = ptr->pos;
+           }
+       }
+       ptr++;
+   }
+   if ( res->entry.haspos ) {
+       *(uint16*)(res->pos) = uniquePos( &(res->pos[1]), *(uint16*)(res->pos));
+       *outbuflen += *(uint16*)(res->pos) * sizeof(WordEntryPos);
+   }
+   *outbuflen += SHORTALIGN(res->entry.len);
+
+   return res + 1 - a;
+}
+
+#define WAITWORD   1
+#define WAITENDWORD 2
+#define WAITNEXTCHAR   3
+#define WAITENDCMPLX   4
+#define WAITPOSINFO    5
+#define INPOSINFO  6
+#define WAITPOSDELIM   7
+
+#define RESIZEPRSBUF \
+do { \
+   if ( state->curpos - state->word + 1 >= state->len ) \
+   { \
+       int4 clen = state->curpos - state->word; \
+       state->len *= 2; \
+       state->word = (char*)repalloc( (void*)state->word, state->len ); \
+       state->curpos = state->word + clen; \
+   } \
+} while (0)
+
+int4
+gettoken_tsvector(TI_IN_STATE * state)
+{
+   int4        oldstate = 0;
+
+   state->curpos = state->word;
+   state->state = WAITWORD;
+   state->alen=0;
+
+   while (1)
+   {
+       if (state->state == WAITWORD)
+       {
+           if (*(state->prsbuf) == '\0')
+               return 0;
+           else if (*(state->prsbuf) == '\'')
+               state->state = WAITENDCMPLX;
+           else if (*(state->prsbuf) == '\\')
+           {
+               state->state = WAITNEXTCHAR;
+               oldstate = WAITENDWORD;
+           }
+           else if (state->oprisdelim && ISOPERATOR(*(state->prsbuf)))
+               elog(ERROR, "Syntax error");
+           else if (*(state->prsbuf) != ' ')
+           {
+               *(state->curpos) = *(state->prsbuf);
+               state->curpos++;
+               state->state = WAITENDWORD;
+           }
+       }
+       else if (state->state == WAITNEXTCHAR)
+       {
+           if (*(state->prsbuf) == '\0')
+               elog(ERROR, "There is no escaped character");
+           else
+           {
+               RESIZEPRSBUF;
+               *(state->curpos) = *(state->prsbuf);
+               state->curpos++;
+               state->state = oldstate;
+           }
+       }
+       else if (state->state == WAITENDWORD)
+       {
+           if (*(state->prsbuf) == '\\')
+           {
+               state->state = WAITNEXTCHAR;
+               oldstate = WAITENDWORD;
+           }
+           else if (*(state->prsbuf) == ' ' || *(state->prsbuf) == '\0' ||
+                    (state->oprisdelim && ISOPERATOR(*(state->prsbuf))))
+           {
+               RESIZEPRSBUF;
+               if (state->curpos == state->word)
+                   elog(ERROR, "Syntax error");
+               *(state->curpos) = '\0';
+               return 1; 
+           } else if ( *(state->prsbuf) == ':' ) {
+               if (state->curpos == state->word)
+                   elog(ERROR, "Syntax error");
+               *(state->curpos) = '\0';
+               if ( state->oprisdelim )
+                   return 1;
+               else
+                   state->state = INPOSINFO;
+           }
+           else
+           {
+               RESIZEPRSBUF;
+               *(state->curpos) = *(state->prsbuf);
+               state->curpos++;
+           }
+       }
+       else if (state->state == WAITENDCMPLX)
+       {
+           if (*(state->prsbuf) == '\'')
+           {
+               RESIZEPRSBUF;
+               *(state->curpos) = '\0';
+               if (state->curpos == state->word)
+                   elog(ERROR, "Syntax error");
+               if ( state->oprisdelim ) {
+                   state->prsbuf++;
+                   return 1;
+               } else
+                   state->state = WAITPOSINFO;
+           }
+           else if (*(state->prsbuf) == '\\')
+           {
+               state->state = WAITNEXTCHAR;
+               oldstate = WAITENDCMPLX;
+           }
+           else if (*(state->prsbuf) == '\0')
+               elog(ERROR, "Syntax error");
+           else
+           {
+               RESIZEPRSBUF;
+               *(state->curpos) = *(state->prsbuf);
+               state->curpos++;
+           }
+       } else if (state->state == WAITPOSINFO) {
+           if ( *(state->prsbuf) == ':' )
+               state->state=INPOSINFO;
+           else
+               return 1;
+       } else if (state->state == INPOSINFO) {
+           if ( isdigit(*(state->prsbuf)) ) {
+               if ( state->alen==0 ) {
+                   state->alen=4;
+                   state->pos = (WordEntryPos*)palloc( sizeof(WordEntryPos)*state->alen );
+                   *(uint16*)(state->pos)=0;
+               } else if ( *(uint16*)(state->pos) +1 >= state->alen ) {
+                   state->alen *= 2; 
+                   state->pos = (WordEntryPos*)repalloc( state->pos, sizeof(WordEntryPos)*state->alen );
+               }
+               (  *(uint16*)(state->pos) )++;
+               state->pos[ *(uint16*)(state->pos) ].pos = LIMITPOS(atoi(state->prsbuf));
+               if ( state->pos[ *(uint16*)(state->pos) ].pos == 0 )
+                   elog(ERROR,"Wrong position info");
+               state->pos[ *(uint16*)(state->pos) ].weight = 0;
+               state->state = WAITPOSDELIM;
+           } else
+               elog(ERROR,"Syntax error");
+       } else if (state->state == WAITPOSDELIM) {
+           if ( *(state->prsbuf) == ',' ) {
+               state->state = INPOSINFO;
+           } else if ( tolower(*(state->prsbuf)) == 'a' || *(state->prsbuf)=='*' ) {
+               if ( state->pos[ *(uint16*)(state->pos) ].weight )
+                   elog(ERROR,"Syntax error");
+               state->pos[ *(uint16*)(state->pos) ].weight = 3;
+           } else if ( tolower(*(state->prsbuf)) == 'b' ) {
+               if ( state->pos[ *(uint16*)(state->pos) ].weight )
+                   elog(ERROR,"Syntax error");
+               state->pos[ *(uint16*)(state->pos) ].weight = 2;
+           } else if ( tolower(*(state->prsbuf)) == 'c' ) {
+               if ( state->pos[ *(uint16*)(state->pos) ].weight )
+                   elog(ERROR,"Syntax error");
+               state->pos[ *(uint16*)(state->pos) ].weight = 1;
+           } else if ( tolower(*(state->prsbuf)) == 'd' ) {
+               if ( state->pos[ *(uint16*)(state->pos) ].weight )
+                   elog(ERROR,"Syntax error");
+               state->pos[ *(uint16*)(state->pos) ].weight = 0;
+           } else if ( isspace(*(state->prsbuf)) || *(state->prsbuf) == '\0' ) {
+               return 1;
+           } else if ( !isdigit(*(state->prsbuf)) )
+               elog(ERROR,"Syntax error");
+       } else
+           elog(ERROR, "Inner bug :(");
+       state->prsbuf++;
+   }
+
+   return 0;
+}
+
+Datum
+tsvector_in(PG_FUNCTION_ARGS)
+{
+   char       *buf = PG_GETARG_CSTRING(0);
+   TI_IN_STATE state;
+   WordEntryIN  *arr;
+   WordEntry  *inarr;
+   int4        len = 0,
+               totallen = 64;
+   tsvector       *in;
+   char       *tmpbuf,
+              *cur;
+   int4        i,
+               buflen = 256;
+
+   state.prsbuf = buf;
+   state.len = 32;
+   state.word = (char *) palloc(state.len);
+   state.oprisdelim = false;
+
+   arr = (WordEntryIN *) palloc(sizeof(WordEntryIN) * totallen);
+   cur = tmpbuf = (char *) palloc(buflen);
+   while (gettoken_tsvector(&state))
+   {
+       if (len >= totallen)
+       {
+           totallen *= 2;
+           arr = (WordEntryIN *) repalloc((void *) arr, sizeof(WordEntryIN) * totallen);
+       }
+       while ((cur - tmpbuf) + (state.curpos - state.word) >= buflen)
+       {
+           int4        dist = cur - tmpbuf;
+
+           buflen *= 2;
+           tmpbuf = (char *) repalloc((void *) tmpbuf, buflen);
+           cur = tmpbuf + dist;
+       }
+       if (state.curpos - state.word >= MAXSTRLEN)
+           elog(ERROR, "Word is too long");
+       arr[len].entry.len= state.curpos - state.word;
+       if (cur - tmpbuf > MAXSTRPOS)
+           elog(ERROR, "Too long value");
+       arr[len].entry.pos=cur - tmpbuf;
+       memcpy((void *) cur, (void *) state.word, arr[len].entry.len);
+       cur += arr[len].entry.len;
+       if ( state.alen ) {
+           arr[len].entry.haspos=1;
+           arr[len].pos = state.pos;
+       } else
+           arr[len].entry.haspos=0;
+       len++;
+   }
+   pfree(state.word);
+
+   if ( len > 0 )
+       len = uniqueentry(arr, len, tmpbuf, &buflen);
+   totallen = CALCDATASIZE(len, buflen);
+   in = (tsvector *) palloc(totallen);
+   memset(in,0,totallen);
+   in->len = totallen;
+   in->size = len;
+   cur = STRPTR(in);
+   inarr = ARRPTR(in);
+   for (i = 0; i < len; i++)
+   {
+       memcpy((void *) cur, (void *) &tmpbuf[arr[i].entry.pos], arr[i].entry.len);
+       arr[i].entry.pos=cur - STRPTR(in);
+       cur += SHORTALIGN(arr[i].entry.len);
+       if ( arr[i].entry.haspos ) {
+           memcpy( cur, arr[i].pos, (*(uint16*)arr[i].pos + 1) * sizeof(WordEntryPos));
+           cur +=  (*(uint16*)arr[i].pos + 1) * sizeof(WordEntryPos);
+           pfree( arr[i].pos ); 
+       }
+       memcpy( &(inarr[i]), &(arr[i].entry), sizeof(WordEntry) );
+   }
+   pfree(tmpbuf);
+   pfree(arr);
+   PG_RETURN_POINTER(in);
+}
+
+Datum
+tsvector_length(PG_FUNCTION_ARGS)
+{
+   tsvector       *in = (tsvector *) PG_DETOAST_DATUM(PG_GETARG_DATUM(0));
+   int4        ret = in->size;
+
+   PG_FREE_IF_COPY(in, 0);
+   PG_RETURN_INT32(ret);
+}
+
+Datum
+tsvector_out(PG_FUNCTION_ARGS)
+{
+   tsvector       *out = (tsvector *) PG_DETOAST_DATUM(PG_GETARG_DATUM(0));
+   char       *outbuf;
+   int4        i,
+               j,
+               lenbuf = 0, pp;
+   WordEntry  *ptr = ARRPTR(out);
+   char       *curin,
+              *curout;
+
+       lenbuf=out->size * 2 /* '' */ + out->size - 1 /* space */ + 2 /*\0*/;
+       for (i = 0; i < out->size; i++) {
+               lenbuf += ptr[i].len*2 /*for escape */;
+               if ( ptr[i].haspos )
+                       lenbuf += 7*POSDATALEN(out, &(ptr[i]));
+       }
+
+   curout = outbuf = (char *) palloc(lenbuf);
+   for (i = 0; i < out->size; i++)
+   {
+       curin = STRPTR(out)+ptr->pos;
+       if (i != 0)
+           *curout++ = ' ';
+       *curout++ = '\'';
+       j = ptr->len;
+       while (j--)
+       {
+           if (*curin == '\'')
+           {
+               int4        pos = curout - outbuf;
+
+               outbuf = (char *) repalloc((void *) outbuf, ++lenbuf);
+               curout = outbuf + pos;
+               *curout++ = '\\';
+           }
+           *curout++ = *curin++;
+       }
+       *curout++ = '\'';
+       if ( (pp=POSDATALEN(out,ptr)) != 0 ) {
+           WordEntryPos *wptr;
+           *curout++ = ':';
+           wptr=POSDATAPTR(out,ptr);
+           while(pp) {
+               sprintf(curout,"%d",wptr->pos);
+               curout=strchr(curout,'\0');
+               switch( wptr->weight ) {
+                   case 3:   *curout++ = 'A'; break;
+                   case 2:   *curout++ = 'B'; break;
+                   case 1:   *curout++ = 'C'; break;
+                   case 0: 
+                   default: break;
+               }
+               if ( pp>1 )     *curout++ = ',';
+               pp--; wptr++;
+           }
+       }
+       ptr++;
+   }
+   *curout='\0';
+   outbuf[lenbuf - 1] = '\0';
+   PG_FREE_IF_COPY(out, 0);
+   PG_RETURN_POINTER(outbuf);
+}
+
+static int
+compareWORD(const void *a, const void *b)
+{
+   if (((WORD *) a)->len == ((WORD *) b)->len) {
+       int res = strncmp(
+                      ((WORD *) a)->word,
+                      ((WORD *) b)->word,
+                      ((WORD *) b)->len);
+       if ( res==0 ) 
+           return ( ((WORD *) a)->pos.pos > ((WORD *) b)->pos.pos ) ? 1 : -1;
+       return res;
+   }
+   return (((WORD *) a)->len > ((WORD *) b)->len) ? 1 : -1;
+}
+
+static int
+uniqueWORD(WORD * a, int4 l)
+{
+   WORD       *ptr,
+              *res;
+   int tmppos;
+
+   if (l == 1) {
+       tmppos=LIMITPOS(a->pos.pos);
+       a->alen=2;
+       a->pos.apos=(uint16*)palloc( sizeof(uint16)*a->alen );
+       a->pos.apos[0]=1;
+       a->pos.apos[1]=tmppos;
+       return l;
+   }
+
+   res = a;
+   ptr = a + 1;
+
+   qsort((void *) a, l, sizeof(WORD), compareWORD);
+   tmppos=LIMITPOS(a->pos.pos);
+   a->alen=2;
+   a->pos.apos=(uint16*)palloc( sizeof(uint16)*a->alen );
+   a->pos.apos[0]=1;
+   a->pos.apos[1]=tmppos;
+
+   while (ptr - a < l)
+   {
+       if (!(ptr->len == res->len &&
+             strncmp(ptr->word, res->word, res->len) == 0))
+       {
+           res++;
+           res->len = ptr->len;
+           res->word = ptr->word;
+           tmppos=LIMITPOS(ptr->pos.pos);
+           res->alen=2;
+           res->pos.apos=(uint16*)palloc( sizeof(uint16)*res->alen );
+           res->pos.apos[0]=1;
+           res->pos.apos[1]=tmppos;
+       } else {
+           pfree(ptr->word);
+           if ( res->pos.apos[0] < MAXNUMPOS-1 && res->pos.apos[ res->pos.apos[0] ] != MAXENTRYPOS-1 ) {
+               if ( res->pos.apos[0]+1 >= res->alen ) {
+                   res->alen*=2;
+                   res->pos.apos=(uint16*)repalloc( res->pos.apos, sizeof(uint16)*res->alen );
+               }
+               res->pos.apos[ res->pos.apos[0]+1 ] = LIMITPOS(ptr->pos.pos);
+               res->pos.apos[0]++; 
+           }
+       }
+       ptr++;
+   }
+
+   return res + 1 - a;
+}
+
+/*
+ * make value of tsvector
+ */
+static tsvector *
+makevalue(PRSTEXT * prs)
+{
+   int4        i,j,
+               lenstr = 0,
+               totallen;
+   tsvector       *in;
+   WordEntry  *ptr;
+   char       *str,
+              *cur;
+
+   prs->curwords = uniqueWORD(prs->words, prs->curwords);
+   for (i = 0; i < prs->curwords; i++) {
+       lenstr += SHORTALIGN(prs->words[i].len);
+
+       if ( prs->words[i].alen )
+           lenstr += sizeof(uint16) + prs->words[i].pos.apos[0] * sizeof(WordEntryPos);
+   }
+
+   totallen = CALCDATASIZE(prs->curwords, lenstr);
+   in = (tsvector *) palloc(totallen);
+   memset(in,0,totallen);  
+   in->len = totallen;
+   in->size = prs->curwords;
+
+   ptr = ARRPTR(in);
+   cur = str = STRPTR(in);
+   for (i = 0; i < prs->curwords; i++)
+   {
+       ptr->len = prs->words[i].len;
+       if (cur - str > MAXSTRPOS)
+           elog(ERROR, "Value is too big");
+       ptr->pos= cur - str;
+       memcpy((void *) cur, (void *) prs->words[i].word, prs->words[i].len);
+       pfree(prs->words[i].word);
+       cur += SHORTALIGN(prs->words[i].len);
+       if ( prs->words[i].alen ) {
+           WordEntryPos *wptr;
+           
+           ptr->haspos=1;
+           *(uint16*)cur = prs->words[i].pos.apos[0];
+           wptr=POSDATAPTR(in,ptr);
+           for(j=0;j<*(uint16*)cur;j++) {
+               wptr[j].weight=0;
+               wptr[j].pos=prs->words[i].pos.apos[j+1];
+           }
+           cur += sizeof(uint16) + prs->words[i].pos.apos[0] * sizeof(WordEntryPos);
+           pfree(prs->words[i].pos.apos);
+       } else
+           ptr->haspos=0;
+       ptr++;
+   }
+   pfree(prs->words);
+   return in;
+}
+
+
+Datum
+to_tsvector(PG_FUNCTION_ARGS)
+{
+   text       *in = PG_GETARG_TEXT_P(1);
+   PRSTEXT     prs;
+   tsvector       *out = NULL;
+   TSCfgInfo *cfg=findcfg(PG_GETARG_INT32(0)); 
+
+   prs.lenwords = 32;
+   prs.curwords = 0;
+   prs.pos = 0;
+   prs.words = (WORD *) palloc(sizeof(WORD) * prs.lenwords);
+   
+   parsetext_v2(cfg, &prs, VARDATA(in), VARSIZE(in) - VARHDRSZ);
+   PG_FREE_IF_COPY(in, 1);
+
+   if (prs.curwords)
+       out = makevalue(&prs);
+   else {
+       pfree(prs.words);
+       out = palloc(CALCDATASIZE(0,0));
+       out->len = CALCDATASIZE(0,0);
+       out->size = 0;
+   } 
+   PG_RETURN_POINTER(out);
+}
+
+Datum
+to_tsvector_name(PG_FUNCTION_ARGS) {
+   text       *cfg=PG_GETARG_TEXT_P(0);
+   Datum res = DirectFunctionCall3(
+       to_tsvector,
+       Int32GetDatum( name2id_cfg( cfg ) ),
+       PG_GETARG_DATUM(1),
+       (Datum)0
+   );
+   PG_FREE_IF_COPY(cfg,0);
+   PG_RETURN_DATUM(res);   
+}
+
+Datum
+to_tsvector_current(PG_FUNCTION_ARGS) {
+   Datum res = DirectFunctionCall3(
+       to_tsvector,
+       Int32GetDatum( get_currcfg() ),
+       PG_GETARG_DATUM(0),
+       (Datum)0
+   );
+   PG_RETURN_DATUM(res);   
+}
+
+static Oid
+findFunc(char *fname) {
+   FuncCandidateList clist,ptr;
+   Oid funcid = InvalidOid;
+   List *names=makeList1(makeString(fname));
+
+   ptr = clist = FuncnameGetCandidates(names, 1);
+   freeList(names);
+
+   if ( !ptr )
+       return funcid;
+
+   while(ptr) {
+       if ( ptr->args[0] == TEXTOID && funcid == InvalidOid )
+           funcid=ptr->oid;
+       clist=ptr->next;
+       pfree(ptr);
+       ptr=clist;
+   }
+
+   return funcid;
+}
+
+/*
+ * Trigger
+ */
+Datum
+tsearch2(PG_FUNCTION_ARGS)
+{
+   TriggerData *trigdata;
+   Trigger    *trigger;
+   Relation    rel;
+   HeapTuple   rettuple = NULL;
+   TSCfgInfo *cfg=findcfg(get_currcfg()); 
+   int         numidxattr,
+               i;
+   PRSTEXT     prs;
+   Datum       datum = (Datum) 0;
+   Oid     funcoid = InvalidOid;
+
+   if (!CALLED_AS_TRIGGER(fcinfo))
+       elog(ERROR, "TSearch: Not fired by trigger manager");
+
+   trigdata = (TriggerData *) fcinfo->context;
+   if (TRIGGER_FIRED_FOR_STATEMENT(trigdata->tg_event))
+       elog(ERROR, "TSearch: Can't process STATEMENT events");
+   if (TRIGGER_FIRED_AFTER(trigdata->tg_event))
+       elog(ERROR, "TSearch: Must be fired BEFORE event");
+
+   if (TRIGGER_FIRED_BY_INSERT(trigdata->tg_event))
+       rettuple = trigdata->tg_trigtuple;
+   else if (TRIGGER_FIRED_BY_UPDATE(trigdata->tg_event))
+       rettuple = trigdata->tg_newtuple;
+   else
+       elog(ERROR, "TSearch: Unknown event");
+
+   trigger = trigdata->tg_trigger;
+   rel = trigdata->tg_relation;
+
+   if (trigger->tgnargs < 2)
+       elog(ERROR, "TSearch: format tsearch2(tsvector_field, text_field1,...)");
+
+   numidxattr = SPI_fnumber(rel->rd_att, trigger->tgargs[0]);
+   if (numidxattr == SPI_ERROR_NOATTRIBUTE)
+       elog(ERROR, "TSearch: Can not find tsvector_field");
+
+   prs.lenwords = 32;
+   prs.curwords = 0;
+   prs.pos = 0;
+   prs.words = (WORD *) palloc(sizeof(WORD) * prs.lenwords);
+
+   /* find all words in indexable column */
+   for (i = 1; i < trigger->tgnargs; i++)
+   {
+       int         numattr;
+       Oid         oidtype;
+       Datum       txt_toasted;
+       bool        isnull;
+       text       *txt;
+
+       numattr = SPI_fnumber(rel->rd_att, trigger->tgargs[i]);
+       if (numattr == SPI_ERROR_NOATTRIBUTE)
+       {
+           funcoid=findFunc(trigger->tgargs[i]);
+           if ( funcoid==InvalidOid )
+               elog(ERROR,"TSearch: can't find function or field '%s'",trigger->tgargs[i]);
+           continue;
+       }
+       oidtype = SPI_gettypeid(rel->rd_att, numattr);
+       /* We assume char() and varchar() are binary-equivalent to text */
+       if (!(oidtype == TEXTOID ||
+             oidtype == VARCHAROID ||
+             oidtype == BPCHAROID))
+       {
+           elog(WARNING, "TSearch: '%s' is not of character type",
+                trigger->tgargs[i]);
+           continue;
+       }
+       txt_toasted = SPI_getbinval(rettuple, rel->rd_att, numattr, &isnull);
+       if (isnull)
+           continue;
+
+       if ( funcoid!=InvalidOid ) {
+           text *txttmp = (text *) DatumGetPointer( OidFunctionCall1(
+               funcoid,
+               PointerGetDatum(txt_toasted)
+           ));
+           txt = (text *) DatumGetPointer(PG_DETOAST_DATUM(PointerGetDatum(txttmp)));
+           if ( txt == txttmp )
+               txt_toasted = PointerGetDatum(txt);
+       } else
+            txt = (text *) DatumGetPointer(PG_DETOAST_DATUM(PointerGetDatum(txt_toasted)));
+
+       parsetext_v2(cfg, &prs, VARDATA(txt), VARSIZE(txt) - VARHDRSZ);
+       if (txt != (text*)DatumGetPointer(txt_toasted) )
+           pfree(txt);
+   }
+
+   /* make tsvector value */
+   if (prs.curwords)
+   {
+       datum = PointerGetDatum(makevalue(&prs));
+       rettuple = SPI_modifytuple(rel, rettuple, 1, &numidxattr,
+                                  &datum, NULL);
+       pfree(DatumGetPointer(datum));
+   }
+   else
+   {
+       tsvector *out = palloc(CALCDATASIZE(0,0));
+       out->len = CALCDATASIZE(0,0);
+       out->size = 0;
+       datum = PointerGetDatum(out);
+       pfree(prs.words);
+       rettuple = SPI_modifytuple(rel, rettuple, 1, &numidxattr,
+                                  &datum, NULL);
+   }
+
+   if (rettuple == NULL)
+       elog(ERROR, "TSearch: %d returned by SPI_modifytuple", SPI_result);
+
+   return PointerGetDatum(rettuple);
+}


diff --git a/contrib/tsearch2/tsvector.h b/contrib/tsearch2/tsvector.h

new file mode 100644 (file)

index 0000000..31e6a4b


--- /dev/null
+++ b/contrib/tsearch2/tsvector.h
@@ -0,0 +1,71 @@
+#ifndef __TXTIDX_H__
+#define __TXTIDX_H__
+
+/*
+#define TXTIDX_DEBUG
+*/
+
+#include "postgres.h"
+
+#include "access/gist.h"
+#include "access/itup.h"
+#include "utils/elog.h"
+#include "utils/palloc.h"
+#include "utils/builtins.h"
+#include "storage/bufpage.h"
+
+typedef struct {
+   uint32
+       haspos:1,
+       len:11, /* MAX 2Kb */
+       pos:20; /* MAX 1Mb */
+}  WordEntry;
+#define MAXSTRLEN ( 1<<11 )
+#define MAXSTRPOS ( 1<<20 )
+
+typedef struct {
+   uint16
+       weight:2,
+       pos:14;
+} WordEntryPos;
+#define MAXENTRYPOS    (1<<14)
+#define MAXNUMPOS  256
+#define LIMITPOS(x)    ( ( (x) >= MAXENTRYPOS ) ? (MAXENTRYPOS-1) : (x) )
+
+typedef struct
+{
+   int4        len;
+   int4        size;
+   char        data[1];
+}  tsvector;
+
+#define DATAHDRSIZE (sizeof(int4)*2)
+#define CALCDATASIZE(x, lenstr) ( x * sizeof(WordEntry) + DATAHDRSIZE + lenstr )
+#define ARRPTR(x)  ( (WordEntry*) ( (char*)x + DATAHDRSIZE ) )
+#define STRPTR(x)  ( (char*)x + DATAHDRSIZE + ( sizeof(WordEntry) * ((tsvector*)x)->size ) )
+#define STRSIZE(x) ( ((tsvector*)x)->len - DATAHDRSIZE - ( sizeof(WordEntry) * ((tsvector*)x)->size ) )
+#define _POSDATAPTR(x,e)   (STRPTR(x)+((WordEntry*)(e))->pos+SHORTALIGN(((WordEntry*)(e))->len))
+#define POSDATALEN(x,e) ( ( ((WordEntry*)(e))->haspos ) ? (*(uint16*)_POSDATAPTR(x,e)) : 0 ) 
+#define POSDATAPTR(x,e)    ( (WordEntryPos*)( _POSDATAPTR(x,e)+sizeof(uint16) ) )
+
+
+typedef struct {
+   WordEntry   entry;
+   WordEntryPos    *pos;
+}  WordEntryIN;
+
+typedef struct
+{
+   char       *prsbuf;
+   char       *word;
+   char       *curpos;
+   int4        len;
+   int4        state;
+   int4        alen;
+   WordEntryPos    *pos;
+   bool        oprisdelim;
+}  TI_IN_STATE;
+
+int4       gettoken_tsvector(TI_IN_STATE * state);
+
+#endif


diff --git a/contrib/tsearch2/tsvector_op.c b/contrib/tsearch2/tsvector_op.c

new file mode 100644 (file)

index 0000000..3f38014


--- /dev/null
+++ b/contrib/tsearch2/tsvector_op.c
@@ -0,0 +1,264 @@
+/*
+ * Operations for tsvector type
+ * Teodor Sigaev 
+ */
+#include "postgres.h"
+
+#include "access/gist.h"
+#include "access/itup.h"
+#include "utils/elog.h"
+#include "utils/palloc.h"
+#include "utils/builtins.h"
+#include "storage/bufpage.h"
+#include "executor/spi.h"
+#include "commands/trigger.h"
+#include "nodes/pg_list.h"
+#include "catalog/namespace.h"
+
+#include "utils/pg_locale.h"
+
+#include              /* tolower */
+#include "tsvector.h"
+#include "query.h"
+#include "ts_cfg.h"
+#include "common.h"
+
+PG_FUNCTION_INFO_V1(strip);
+Datum      strip(PG_FUNCTION_ARGS);
+
+PG_FUNCTION_INFO_V1(setweight);
+Datum      setweight(PG_FUNCTION_ARGS);
+
+PG_FUNCTION_INFO_V1(concat);
+Datum      concat(PG_FUNCTION_ARGS);
+
+Datum
+strip(PG_FUNCTION_ARGS)
+{
+   tsvector       *in = (tsvector *) PG_DETOAST_DATUM(PG_GETARG_DATUM(0));
+   tsvector    *out;
+   int i,len=0;
+   WordEntry *arrin=ARRPTR(in), *arrout;
+   char *cur;
+
+   for(i=0;isize;i++) 
+       len += SHORTALIGN( arrin[i].len );
+
+   len = CALCDATASIZE(in->size, len);
+   out=(tsvector*)palloc(len);
+   memset(out,0,len);
+   out->len=len;
+   out->size=in->size;
+   arrout=ARRPTR(out);
+   cur=STRPTR(out);
+   for(i=0;isize;i++) {
+       memcpy(cur, STRPTR(in)+arrin[i].pos, arrin[i].len);
+       arrout[i].haspos = 0;
+       arrout[i].len = arrin[i].len;
+       arrout[i].pos = cur - STRPTR(out);
+       cur += SHORTALIGN( arrout[i].len );
+   }
+       
+   PG_FREE_IF_COPY(in, 0);
+   PG_RETURN_POINTER(out);
+}
+
+Datum
+setweight(PG_FUNCTION_ARGS)
+{
+   tsvector       *in = (tsvector *) PG_DETOAST_DATUM(PG_GETARG_DATUM(0));
+   char       cw = PG_GETARG_CHAR(1);
+   tsvector    *out;
+   int i,j;
+   WordEntry *entry;
+   WordEntryPos *p;
+   int w=0;
+
+   switch(tolower(cw)) {
+       case 'a': w=3; break;
+       case 'b': w=2; break;
+       case 'c': w=1; break;
+       case 'd': w=0; break;
+       default: elog(ERROR,"Unknown weight");
+   }
+
+   out=(tsvector*)palloc(in->len);
+   memcpy(out,in,in->len);
+   entry=ARRPTR(out);
+   i=out->size;    
+   while(i--) {
+       if ( (j=POSDATALEN(out,entry)) != 0 ) {
+           p=POSDATAPTR(out,entry);
+           while(j--) {
+               p->weight=w;
+               p++;
+           }
+       }
+       entry++;
+   }
+       
+   PG_FREE_IF_COPY(in, 0);
+   PG_RETURN_POINTER(out);
+}
+
+static int
+compareEntry(char *ptra, WordEntry* a, char *ptrb, WordEntry* b)
+{
+        if ( a->len == b->len)
+        {
+                return strncmp(
+                                           ptra + a->pos,
+                                           ptrb + b->pos,
+                                           a->len);
+        }
+        return ( a->len > b->len ) ? 1 : -1;
+}
+
+static int4
+add_pos(tsvector *src, WordEntry *srcptr, tsvector *dest, WordEntry *destptr, int4 maxpos ) {
+   uint16 *clen = (uint16*)_POSDATAPTR(dest,destptr);
+   int i;
+   uint16 slen = POSDATALEN(src, srcptr), startlen;
+   WordEntryPos *spos=POSDATAPTR(src, srcptr), *dpos=POSDATAPTR(dest,destptr);
+
+   if ( ! destptr->haspos ) 
+       *clen=0;
+
+   startlen = *clen;
+   for(i=0; i
+       dpos[ *clen ].weight = spos[i].weight; 
+       dpos[ *clen ].pos    = LIMITPOS(spos[i].pos + maxpos);
+       (*clen)++;
+   }
+
+   if ( *clen != startlen )
+       destptr->haspos=1; 
+   return  *clen - startlen;
+}
+
+
+Datum
+concat(PG_FUNCTION_ARGS) {
+   tsvector       *in1 = (tsvector *) PG_DETOAST_DATUM(PG_GETARG_DATUM(0));
+   tsvector       *in2 = (tsvector *) PG_DETOAST_DATUM(PG_GETARG_DATUM(1));
+   tsvector       *out;
+   WordEntry *ptr;
+   WordEntry *ptr1,*ptr2;
+   WordEntryPos *p;
+   int maxpos=0,i,j,i1,i2;
+   char *cur;
+   char *data,*data1,*data2;
+
+   ptr=ARRPTR(in1);
+   i=in1->size;
+   while(i--) {
+       if ( (j=POSDATALEN(in1,ptr)) != 0 ) {
+           p=POSDATAPTR(in1,ptr);
+           while(j--) {
+               if ( p->pos > maxpos ) 
+                   maxpos = p->pos;
+               p++;
+           }
+       }
+       ptr++;
+   }
+   
+   ptr1=ARRPTR(in1); ptr2=ARRPTR(in2);
+   data1=STRPTR(in1); data2=STRPTR(in2);
+   i1=in1->size;   i2=in2->size;
+   out=(tsvector*)palloc( in1->len + in2->len );
+   memset(out,0,in1->len + in2->len);
+   out->len = in1->len + in2->len;
+   out->size = in1->size + in2->size;
+   data=cur=STRPTR(out);
+   ptr=ARRPTR(out);
+   while( i1 && i2 ) {
+       int cmp=compareEntry(data1,ptr1,data2,ptr2);
+       if ( cmp < 0 ) { /* in1 first */
+           ptr->haspos = ptr1->haspos;
+           ptr->len = ptr1->len;
+           memcpy( cur, data1 + ptr1->pos, ptr1->len );
+           ptr->pos = cur - data; 
+           cur+=SHORTALIGN(ptr1->len);
+           if ( ptr->haspos ) {
+               memcpy(cur, _POSDATAPTR(in1, ptr1), POSDATALEN(in1, ptr1)*sizeof(WordEntryPos) + sizeof(uint16));
+               cur+=POSDATALEN(in1, ptr1)*sizeof(WordEntryPos) + sizeof(uint16);
+           }
+           ptr++; ptr1++; i1--;
+       } else if ( cmp>0 ) { /* in2 first */ 
+           ptr->haspos = ptr2->haspos;
+           ptr->len = ptr2->len;
+           memcpy( cur, data2 + ptr2->pos, ptr2->len );
+           ptr->pos = cur - data; 
+           cur+=SHORTALIGN(ptr2->len);
+           if ( ptr->haspos ) {
+               int addlen = add_pos(in2, ptr2, out, ptr, maxpos );
+               if ( addlen == 0 )
+                   ptr->haspos=0;
+               else
+                   cur += addlen*sizeof(WordEntryPos) + sizeof(uint16); 
+           }
+           ptr++; ptr2++; i2--;
+       } else {
+           ptr->haspos = ptr1->haspos | ptr2->haspos;
+           ptr->len = ptr1->len;
+           memcpy( cur, data1 + ptr1->pos, ptr1->len );
+           ptr->pos = cur - data; 
+           cur+=SHORTALIGN(ptr1->len);
+           if ( ptr->haspos ) {
+               if ( ptr1->haspos ) {
+                   memcpy(cur, _POSDATAPTR(in1, ptr1), POSDATALEN(in1, ptr1)*sizeof(WordEntryPos) + sizeof(uint16));
+                   cur+=POSDATALEN(in1, ptr1)*sizeof(WordEntryPos) + sizeof(uint16);
+                   if ( ptr2->haspos )
+                       cur += add_pos(in2, ptr2, out, ptr, maxpos )*sizeof(WordEntryPos);
+               } else if ( ptr2->haspos ) {
+                   int addlen = add_pos(in2, ptr2, out, ptr, maxpos );
+                   if ( addlen == 0 )
+                       ptr->haspos=0;
+                   else
+                       cur += addlen*sizeof(WordEntryPos) + sizeof(uint16); 
+               }
+           }
+           ptr++; ptr1++; ptr2++; i1--; i2--;
+       }
+   }
+
+   while(i1) {
+       ptr->haspos = ptr1->haspos;
+       ptr->len = ptr1->len;
+       memcpy( cur, data1 + ptr1->pos, ptr1->len );
+       ptr->pos = cur - data; 
+       cur+=SHORTALIGN(ptr1->len);
+       if ( ptr->haspos ) {
+           memcpy(cur, _POSDATAPTR(in1, ptr1), POSDATALEN(in1, ptr1)*sizeof(WordEntryPos) + sizeof(uint16));
+           cur+=POSDATALEN(in1, ptr1)*sizeof(WordEntryPos) + sizeof(uint16);
+       }
+       ptr++; ptr1++; i1--;
+   }
+
+   while(i2) {
+       ptr->haspos = ptr2->haspos;
+       ptr->len = ptr2->len;
+       memcpy( cur, data2 + ptr2->pos, ptr2->len );
+       ptr->pos = cur - data; 
+       cur+=SHORTALIGN(ptr2->len);
+       if ( ptr->haspos ) {
+           int addlen = add_pos(in2, ptr2, out, ptr, maxpos );
+           if ( addlen == 0 )
+               ptr->haspos=0;
+           else
+               cur += addlen*sizeof(WordEntryPos) + sizeof(uint16); 
+       }
+       ptr++; ptr2++; i2--;
+   }
+   
+   out->size=ptr-ARRPTR(out);
+   out->len = CALCDATASIZE( out->size, cur-data );
+   if ( data != STRPTR(out) )
+       memmove( STRPTR(out), data, cur-data );
+
+   PG_FREE_IF_COPY(in1, 0);
+   PG_FREE_IF_COPY(in2, 1);
+   PG_RETURN_POINTER(out);
+}
+


diff --git a/contrib/tsearch2/untsearch.sql.in b/contrib/tsearch2/untsearch.sql.in

new file mode 100644 (file)

index 0000000..a4fe145


--- /dev/null
+++ b/contrib/tsearch2/untsearch.sql.in
@@ -0,0 +1,62 @@
+BEGIN;
+
+--Be careful !!!
+--script drops all indices, triggers and columns with types defined
+--in tsearch2.sql
+
+
+DROP OPERATOR CLASS gist_tsvector_ops USING gist CASCADE;
+
+
+DROP OPERATOR || (tsvector, tsvector);
+DROP OPERATOR @@ (tsvector, tsquery);
+DROP OPERATOR @@ (tsquery, tsvector);
+
+DROP AGGREGATE stat(tsvector);
+
+DROP TABLE pg_ts_dict;
+DROP TABLE pg_ts_parser;
+DROP TABLE pg_ts_cfg;
+DROP TABLE pg_ts_cfgmap;
+
+DROP TYPE tokentype CASCADE;
+DROP TYPE tokenout CASCADE;
+DROP TYPE tsvector CASCADE;
+DROP TYPE tsquery CASCADE;
+DROP TYPE gtsvector CASCADE;
+DROP TYPE tsstat CASCADE;
+DROP TYPE statinfo CASCADE;
+
+DROP FUNCTION lexize(oid, text) ;
+DROP FUNCTION lexize(text, text);
+DROP FUNCTION lexize(text);
+DROP FUNCTION set_curdict(int);
+DROP FUNCTION set_curdict(text);
+DROP FUNCTION dex_init(text);
+DROP FUNCTION dex_lexize(internal,internal,int4);
+DROP FUNCTION snb_en_init(text);
+DROP FUNCTION snb_lexize(internal,internal,int4);
+DROP FUNCTION snb_ru_init(text);
+DROP FUNCTION spell_init(text);
+DROP FUNCTION spell_lexize(internal,internal,int4);
+DROP FUNCTION syn_init(text);
+DROP FUNCTION syn_lexize(internal,internal,int4);
+DROP FUNCTION set_curprs(int);
+DROP FUNCTION set_curprs(text);
+DROP FUNCTION prsd_start(internal,int4);
+DROP FUNCTION prsd_getlexeme(internal,internal,internal);
+DROP FUNCTION prsd_end(internal);
+DROP FUNCTION prsd_lextype(internal);
+DROP FUNCTION prsd_headline(internal,internal,internal);
+DROP FUNCTION set_curcfg(int);
+DROP FUNCTION set_curcfg(text);
+DROP FUNCTION show_curcfg();
+DROP FUNCTION gtsvector_compress(internal);
+DROP FUNCTION gtsvector_decompress(internal);
+DROP FUNCTION gtsvector_penalty(internal,internal,internal);
+DROP FUNCTION gtsvector_picksplit(internal, internal);
+DROP FUNCTION gtsvector_union(bytea, internal);
+DROP FUNCTION reset_tsearch();
+DROP FUNCTION tsearch2() CASCADE;
+
+END;


diff --git a/contrib/tsearch2/wordparser/deflex.c b/contrib/tsearch2/wordparser/deflex.c

new file mode 100644 (file)

index 0000000..ea596c5


--- /dev/null
+++ b/contrib/tsearch2/wordparser/deflex.c
@@ -0,0 +1,56 @@
+#include "deflex.h"
+
+const char *lex_descr[]={
+   "",
+   "Latin word",
+   "Non-latin word",
+   "Word",
+   "Email",
+   "URL",
+   "Host",
+   "Scientific notation",
+   "VERSION",
+   "Part of hyphenated word",
+   "Non-latin part of hyphenated word",
+   "Latin part of hyphenated word",
+   "Space symbols",
+   "HTML Tag",
+   "HTTP head",
+   "Hyphenated word",
+   "Latin hyphenated word",
+   "Non-latin hyphenated word",
+   "URI",
+   "File or path name",
+   "Decimal notation",
+   "Signed integer",
+   "Unsigned integer",
+   "HTML Entity"
+};
+
+const char *tok_alias[]={
+   "",
+   "lword",
+   "nlword",
+   "word",
+   "email",
+   "url",
+   "host",
+   "sfloat",
+   "version",
+   "part_hword",
+   "nlpart_hword",
+   "lpart_hword",
+   "blank",
+   "tag",
+   "http",
+   "hword",
+   "lhword",
+   "nlhword",
+   "uri",
+   "file",
+   "float",
+   "int",
+   "uint",
+   "entity"
+};
+


diff --git a/contrib/tsearch2/wordparser/deflex.h b/contrib/tsearch2/wordparser/deflex.h

new file mode 100644 (file)

index 0000000..651d1f9


--- /dev/null
+++ b/contrib/tsearch2/wordparser/deflex.h
@@ -0,0 +1,34 @@
+#ifndef __DEFLEX_H__
+#define __DEFLEX_H__
+
+/* rememder !!!! */
+#define LASTNUM        23
+
+#define LATWORD        1
+#define CYRWORD        2
+#define UWORD      3
+#define EMAIL      4
+#define FURL       5
+#define HOST       6
+#define SCIENTIFIC 7
+#define VERSIONNUMBER  8
+#define PARTHYPHENWORD 9
+#define CYRPARTHYPHENWORD  10
+#define LATPARTHYPHENWORD  11
+#define SPACE      12
+#define TAG            13
+#define HTTP       14
+#define HYPHENWORD 15
+#define LATHYPHENWORD  16
+#define CYRHYPHENWORD  17
+#define URI        18
+#define FILEPATH   19
+#define DECIMAL        20
+#define SIGNEDINT  21
+#define UNSIGNEDINT 22
+#define HTMLENTITY 23
+
+extern const char *lex_descr[];
+extern const char *tok_alias[];
+
+#endif


diff --git a/contrib/tsearch2/wordparser/parser.h b/contrib/tsearch2/wordparser/parser.h

new file mode 100644 (file)

index 0000000..55cf005


--- /dev/null
+++ b/contrib/tsearch2/wordparser/parser.h
@@ -0,0 +1,11 @@
+#ifndef __PARSER_H__
+#define __PARSER_H__
+
+char      *token;
+int            tokenlen;
+int            tsearch2_yylex(void);
+void       start_parse_str(char *, int);
+void       start_parse_fh(FILE *, int);
+void       end_parse(void);
+
+#endif


diff --git a/contrib/tsearch2/wordparser/parser.l b/contrib/tsearch2/wordparser/parser.l

new file mode 100644 (file)

index 0000000..49824f5


--- /dev/null
+++ b/contrib/tsearch2/wordparser/parser.l
@@ -0,0 +1,346 @@
+%{
+#include "postgres.h"
+
+#include "deflex.h"
+#include "parser.h"
+#include "common.h"
+
+/* Avoid exit() on fatal scanner errors */
+#define fprintf(file, fmt, msg)  ts_error(ERROR, fmt, msg)
+
+/* postgres allocation function */
+#define free    pfree
+#define malloc  palloc
+#define realloc repalloc
+
+#ifdef strdup
+#undef strdup
+#endif
+#define strdup  pstrdup
+
+char *token = NULL;  /* pointer to token */
+char *s     = NULL;  /* to return WHOLE hyphenated-word */
+
+YY_BUFFER_STATE buf = NULL; /* buffer to parse; it need for parse from string */
+
+int lrlimit = -1;  /* for limiting read from filehandle ( -1 - unlimited read ) */
+int bytestoread = 0;   /* for limiting read from filehandle */
+
+/* redefine macro for read limited length */
+#define YY_INPUT(buf,result,max_size) \
+   if ( yy_current_buffer->yy_is_interactive ) { \
+                int c = '*', n; \
+                for ( n = 0; n < max_size && \
+                             (c = getc( tsearch2_yyin )) != EOF && c != '\n'; ++n ) \
+                        buf[n] = (char) c; \
+                if ( c == '\n' ) \
+                        buf[n++] = (char) c; \
+                if ( c == EOF && ferror( tsearch2_yyin ) ) \
+                        YY_FATAL_ERROR( "input in flex scanner failed" ); \
+                result = n; \
+        }  else { \
+       if ( lrlimit == 0 ) \
+           result=YY_NULL; \
+       else { \
+           if ( lrlimit>0 ) { \
+               bytestoread = ( lrlimit > max_size ) ? max_size : lrlimit; \
+               lrlimit -= bytestoread; \
+           } else \
+               bytestoread = max_size; \
+               if ( ((result = fread( buf, 1, bytestoread, tsearch2_yyin )) == 0) \
+                       && ferror( tsearch2_yyin ) ) \
+                       YY_FATAL_ERROR( "input in flex scanner failed" ); \
+       } \
+   }
+
+%}
+
+%option 8bit
+%option never-interactive
+%option nounput
+%option noyywrap
+
+/* parser's state for parsing hyphenated-word */
+%x DELIM  
+/* parser's state for parsing URL*/
+%x URL  
+%x SERVER  
+
+/* parser's state for parsing TAGS */
+%x INTAG
+%x QINTAG
+%x INCOMMENT
+%x INSCRIPT
+
+/* cyrillic koi8 char */
+CYRALNUM   [0-9\200-\377]
+CYRALPHA   [\200-\377]
+ALPHA      [a-zA-Z\200-\377]
+ALNUM      [0-9a-zA-Z\200-\377]
+
+
+HOSTNAME   ([-_[:alnum:]]+\.)+[[:alpha:]]+
+URI        [-_[:alnum:]/%,\.;=&?#]+
+
+%%
+
+"<"[Ss][Cc][Rr][Ii][Pp][Tt] { BEGIN INSCRIPT; }
+
+"" {
+   BEGIN INITIAL; 
+   *tsearch2_yytext=' '; *(tsearch2_yytext+1) = '\0'; 
+   token = tsearch2_yytext;
+   tokenlen = tsearch2_yyleng;
+   return SPACE;
+}
+
+""   { 
+   BEGIN INITIAL;
+   *tsearch2_yytext=' '; *(tsearch2_yytext+1) = '\0'; 
+   token = tsearch2_yytext;
+   tokenlen = tsearch2_yyleng;
+   return SPACE;
+}
+
+
+"<"[\![:alpha:]]   { BEGIN INTAG; }
+
+"
+
+"\""    { BEGIN QINTAG; }
+
+"\\\"" ;
+
+"\""   { BEGIN INTAG; }
+
+">" { 
+   BEGIN INITIAL;
+   token = tsearch2_yytext;
+   *tsearch2_yytext=' '; 
+   token = tsearch2_yytext;
+   tokenlen = 1;
+   return TAG;
+}
+
+.|\n  ;
+
+\&(quot|amp|nbsp|lt|gt)\;   {
+   token = tsearch2_yytext;
+   tokenlen = tsearch2_yyleng;
+   return HTMLENTITY;
+}
+
+\&\#[0-9][0-9]?[0-9]?\; {
+   token = tsearch2_yytext;
+   tokenlen = tsearch2_yyleng;
+   return HTMLENTITY;
+}
+ 
+[-_\.[:alnum:]]+@{HOSTNAME}  /* Emails */ { 
+   token = tsearch2_yytext; 
+   tokenlen = tsearch2_yyleng;
+   return EMAIL; 
+}
+
+[+-]?[0-9]+(\.[0-9]+)?[eEdD][+-]?[0-9]+  /* float */   { 
+   token = tsearch2_yytext; 
+   tokenlen = tsearch2_yyleng;
+   return SCIENTIFIC; 
+}
+
+[0-9]+\.[0-9]+\.[0-9\.]*[0-9] {
+   token = tsearch2_yytext;
+   tokenlen = tsearch2_yyleng;
+   return VERSIONNUMBER;
+}
+
+[+-]?[0-9]+\.[0-9]+ {
+   token = tsearch2_yytext;
+   tokenlen = tsearch2_yyleng;
+   return DECIMAL;
+}
+
+[+-][0-9]+ { 
+   token = tsearch2_yytext; 
+   tokenlen = tsearch2_yyleng;
+   return SIGNEDINT; 
+}
+
+[0-9]+ { 
+   token = tsearch2_yytext; 
+   tokenlen = tsearch2_yyleng;
+   return UNSIGNEDINT; 
+}
+
+http"://"        { 
+   BEGIN URL; 
+   token = tsearch2_yytext;
+   tokenlen = tsearch2_yyleng;
+   return HTTP;
+}
+
+ftp"://"        { 
+   BEGIN URL; 
+   token = tsearch2_yytext;
+   tokenlen = tsearch2_yyleng;
+   return HTTP;
+}
+
+{HOSTNAME}[/:]{URI} { 
+   BEGIN SERVER;
+   if (s) { free(s); s=NULL; } 
+   s = strdup( tsearch2_yytext ); 
+   tokenlen = tsearch2_yyleng;
+   yyless( 0 ); 
+   token = s;
+   return FURL;
+}
+
+{HOSTNAME} {
+   token = tsearch2_yytext; 
+   tokenlen = tsearch2_yyleng;
+   return HOST;
+}
+
+[/:]{URI}  {
+   token = tsearch2_yytext;
+   tokenlen = tsearch2_yyleng;
+   return URI;
+}
+
+[[:alnum:]\./_-]+"/"[[:alnum:]\./_-]+ {
+   token = tsearch2_yytext;
+   tokenlen = tsearch2_yyleng;
+   return FILEPATH;
+}
+
+({CYRALPHA}+-)+{CYRALPHA}+ /* composite-word */    {
+   BEGIN DELIM;
+   if (s) { free(s); s=NULL; } 
+   s = strdup( tsearch2_yytext );
+   tokenlen = tsearch2_yyleng;
+   yyless( 0 );
+   token = s;
+   return CYRHYPHENWORD;
+}
+
+([[:alpha:]]+-)+[[:alpha:]]+ /* composite-word */  {
+    BEGIN DELIM;
+   if (s) { free(s); s=NULL; } 
+   s = strdup( tsearch2_yytext );
+   tokenlen = tsearch2_yyleng;
+   yyless( 0 );
+   token = s;
+   return LATHYPHENWORD;
+}
+
+({ALNUM}+-)+{ALNUM}+ /* composite-word */  {
+   BEGIN DELIM;
+   if (s) { free(s); s=NULL; } 
+   s = strdup( tsearch2_yytext );
+   tokenlen = tsearch2_yyleng;
+   yyless( 0 );
+   token = s;
+   return HYPHENWORD;
+}
+
+[0-9]+\.[0-9]+\.[0-9\.]*[0-9] {
+   token = tsearch2_yytext;
+   tokenlen = tsearch2_yyleng;
+   return VERSIONNUMBER;
+}
+
+\+?[0-9]+\.[0-9]+ {
+   token = tsearch2_yytext;
+   tokenlen = tsearch2_yyleng;
+   return DECIMAL;
+}
+
+{CYRALPHA}+  /* one word in composite-word */   { 
+   token = tsearch2_yytext; 
+   tokenlen = tsearch2_yyleng;
+   return CYRPARTHYPHENWORD; 
+}
+
+[[:alpha:]]+  /* one word in composite-word */  { 
+   token = tsearch2_yytext; 
+   tokenlen = tsearch2_yyleng;
+   return LATPARTHYPHENWORD; 
+}
+
+{ALNUM}+  /* one word in composite-word */  { 
+   token = tsearch2_yytext; 
+   tokenlen = tsearch2_yyleng;
+   return PARTHYPHENWORD; 
+}
+
+-  { 
+   token = tsearch2_yytext;
+   tokenlen = tsearch2_yyleng;
+   return SPACE;
+}
+
+.|\n /* return in basic state */ {
+   BEGIN INITIAL;
+   yyless( 0 );
+}
+
+{CYRALPHA}+ /* normal word */  { 
+   token = tsearch2_yytext; 
+   tokenlen = tsearch2_yyleng;
+   return CYRWORD; 
+}
+
+[[:alpha:]]+ /* normal word */ { 
+   token = tsearch2_yytext; 
+   tokenlen = tsearch2_yyleng;
+   return LATWORD; 
+}
+
+{ALNUM}+ /* normal word */ { 
+   token = tsearch2_yytext; 
+   tokenlen = tsearch2_yyleng;
+   return UWORD; 
+}
+
+[ \r\n\t]+ {
+   token = tsearch2_yytext;
+   tokenlen = tsearch2_yyleng;
+   return SPACE;
+}
+
+. {
+   token = tsearch2_yytext;
+   tokenlen = tsearch2_yyleng;
+   return SPACE;
+} 
+
+%%
+
+/* clearing after parsing from string */
+void end_parse() {
+   if (s) { free(s); s=NULL; } 
+   tsearch2_yy_delete_buffer( buf );
+   buf = NULL;
+} 
+
+/* start parse from string */
+void start_parse_str(char* str, int limit) {
+   if (buf) end_parse();
+   buf = tsearch2_yy_scan_bytes( str, limit );
+   tsearch2_yy_switch_to_buffer( buf );
+   BEGIN INITIAL;
+}
+
+/* start parse from filehandle */
+void start_parse_fh( FILE* fh, int limit ) {
+   if (buf) end_parse();
+   lrlimit = ( limit ) ? limit : -1;
+   buf = tsearch2_yy_create_buffer( fh, YY_BUF_SIZE );
+   tsearch2_yy_switch_to_buffer( buf );
+   BEGIN INITIAL;
+}
+
+


diff --git a/contrib/tsearch2/wparser.c b/contrib/tsearch2/wparser.c

new file mode 100644 (file)

index 0000000..deff94c


--- /dev/null
+++ b/contrib/tsearch2/wparser.c
@@ -0,0 +1,529 @@
+/* 
+ * interface functions to parser 
+ * Teodor Sigaev 
+ */
+#include 
+#include 
+#include 
+#include 
+
+#include "postgres.h"
+#include "fmgr.h"
+#include "utils/array.h"
+#include "catalog/pg_type.h"
+#include "executor/spi.h"
+#include "funcapi.h"
+
+#include "wparser.h"
+#include "ts_cfg.h"
+#include "snmap.h"
+#include "common.h"
+
+/*********top interface**********/
+
+static void *plan_getparser=NULL;
+static Oid current_parser_id=InvalidOid;
+
+void
+init_prs(Oid id, WParserInfo *prs) {
+   Oid arg[1]={ OIDOID };
+   bool isnull;
+   Datum pars[1]={ ObjectIdGetDatum(id) };
+   int stat;
+
+   memset(prs,0,sizeof(WParserInfo));
+   SPI_connect();
+   if ( !plan_getparser ) {
+       plan_getparser = SPI_saveplan( SPI_prepare( "select prs_start, prs_nexttoken, prs_end, prs_lextype, prs_headline from pg_ts_parser where oid = $1" , 1, arg ) );
+       if ( !plan_getparser ) 
+           ts_error(ERROR, "SPI_prepare() failed");
+   }
+
+   stat = SPI_execp(plan_getparser, pars, " ", 1);
+   if ( stat < 0 )
+       ts_error (ERROR, "SPI_execp return %d", stat);
+   if ( SPI_processed > 0 ) {
+       Oid oid=InvalidOid;
+       oid=DatumGetObjectId( SPI_getbinval(SPI_tuptable->vals[0], SPI_tuptable->tupdesc, 1, &isnull) );
+       fmgr_info_cxt(oid, &(prs->start_info), TopMemoryContext);
+       oid=DatumGetObjectId( SPI_getbinval(SPI_tuptable->vals[0], SPI_tuptable->tupdesc, 2, &isnull) );
+       fmgr_info_cxt(oid, &(prs->getlexeme_info), TopMemoryContext);
+       oid=DatumGetObjectId( SPI_getbinval(SPI_tuptable->vals[0], SPI_tuptable->tupdesc, 3, &isnull) );
+       fmgr_info_cxt(oid, &(prs->end_info), TopMemoryContext);
+       prs->lextype=DatumGetObjectId( SPI_getbinval(SPI_tuptable->vals[0], SPI_tuptable->tupdesc, 4, &isnull) );
+       oid=DatumGetObjectId( SPI_getbinval(SPI_tuptable->vals[0], SPI_tuptable->tupdesc, 5, &isnull) );
+       fmgr_info_cxt(oid, &(prs->headline_info), TopMemoryContext);
+       prs->prs_id=id;
+   } else 
+       ts_error(ERROR, "No parser with id %d", id);
+   SPI_finish();
+}
+
+typedef struct {
+   WParserInfo *last_prs;
+   int     len;
+   int     reallen;
+   WParserInfo *list;
+   SNMap       name2id_map;
+} PrsList;
+
+static PrsList PList = {NULL,0,0,NULL,{0,0,NULL}};
+
+void    
+reset_prs(void) {
+   freeSNMap( &(PList.name2id_map) );
+   if ( PList.list )
+       free(PList.list);
+   memset(&PList,0,sizeof(PrsList));
+}
+
+static int
+compareprs(const void *a, const void *b) {
+   return ((WParserInfo*)a)->prs_id - ((WParserInfo*)b)->prs_id;
+}
+
+WParserInfo *
+findprs(Oid id) {
+   /* last used prs */
+   if ( PList.last_prs && PList.last_prs->prs_id==id )
+       return PList.last_prs;
+
+   /* already used prs */
+   if ( PList.len != 0 ) {
+       WParserInfo key;
+       key.prs_id=id;
+       PList.last_prs = bsearch(&key, PList.list, PList.len, sizeof(WParserInfo), compareprs);
+       if ( PList.last_prs != NULL )
+           return PList.last_prs;
+   }
+
+   /* last chance */
+   if ( PList.len==PList.reallen ) {
+       WParserInfo *tmp;
+       int reallen = ( PList.reallen ) ? 2*PList.reallen : 16;
+       tmp=(WParserInfo*)realloc(PList.list,sizeof(WParserInfo)*reallen);
+       if ( !tmp ) 
+           ts_error(ERROR,"No memory");
+       PList.reallen=reallen;
+       PList.list=tmp;
+   }
+   PList.last_prs=&(PList.list[PList.len]);
+   init_prs(id, PList.last_prs);
+   PList.len++;
+   qsort(PList.list, PList.len, sizeof(WParserInfo), compareprs);
+   return findprs(id); /* qsort changed order!! */;
+}
+
+static void *plan_name2id=NULL;
+
+Oid
+name2id_prs(text *name) {
+   Oid arg[1]={ TEXTOID };
+   bool isnull;
+   Datum pars[1]={ PointerGetDatum(name) };
+   int stat;
+   Oid id=findSNMap_t( &(PList.name2id_map), name );
+   
+   if ( id ) 
+       return id;
+   
+
+   SPI_connect();
+   if ( !plan_name2id ) {
+       plan_name2id = SPI_saveplan( SPI_prepare( "select oid from pg_ts_parser where prs_name = $1" , 1, arg ) );
+       if ( !plan_name2id ) 
+           ts_error(ERROR, "SPI_prepare() failed");
+   }
+
+   stat = SPI_execp(plan_name2id, pars, " ", 1);
+   if ( stat < 0 )
+       ts_error (ERROR, "SPI_execp return %d", stat);
+   if ( SPI_processed > 0 )
+       id=DatumGetObjectId( SPI_getbinval(SPI_tuptable->vals[0], SPI_tuptable->tupdesc, 1, &isnull) );
+   else 
+       ts_error(ERROR, "No parser '%s'", text2char(name));
+   SPI_finish();
+   addSNMap_t( &(PList.name2id_map), name, id );
+   return id;
+}
+
+
+/******sql-level interface******/
+typedef struct {
+   int     cur;
+   LexDescr    *list;
+} TypeStorage;
+
+static void
+setup_firstcall(FuncCallContext  *funcctx, Oid prsid) {
+   TupleDesc            tupdesc;
+   MemoryContext     oldcontext;
+   TypeStorage     *st;
+   WParserInfo *prs = findprs(prsid); 
+
+   oldcontext = MemoryContextSwitchTo(funcctx->multi_call_memory_ctx);
+
+   st=(TypeStorage*)palloc( sizeof(TypeStorage) );
+   st->cur=0;
+   st->list = (LexDescr*)DatumGetPointer(
+       OidFunctionCall1( prs->lextype, PointerGetDatum(prs->prs) )
+   );
+   funcctx->user_fctx = (void*)st;
+   tupdesc = RelationNameGetTupleDesc("tokentype");
+   funcctx->slot = TupleDescGetSlot(tupdesc);
+   funcctx->attinmeta = TupleDescGetAttInMetadata(tupdesc);
+   MemoryContextSwitchTo(oldcontext);
+}
+
+static Datum
+process_call(FuncCallContext  *funcctx) {
+   TypeStorage     *st;
+
+   st=(TypeStorage*)funcctx->user_fctx;
+   if (  st->list && st->list[st->cur].lexid ) {
+       Datum result;
+       char* values[3];
+       char    txtid[16];
+       HeapTuple    tuple;
+
+       values[0]=txtid;
+       sprintf(txtid,"%d",st->list[st->cur].lexid);
+       values[1]=st->list[st->cur].alias;
+       values[2]=st->list[st->cur].descr;
+
+       tuple = BuildTupleFromCStrings(funcctx->attinmeta, values);
+       result = TupleGetDatum(funcctx->slot, tuple);
+
+       pfree(values[1]);
+       pfree(values[2]);
+       st->cur++;
+       return result;
+   } else {
+       if ( st->list ) pfree(st->list);
+       pfree(st);
+   }
+   return (Datum)0;
+}
+
+PG_FUNCTION_INFO_V1(token_type);
+Datum token_type(PG_FUNCTION_ARGS);
+
+Datum
+token_type(PG_FUNCTION_ARGS) {
+   FuncCallContext  *funcctx;
+   Datum result;
+
+   if (SRF_IS_FIRSTCALL()) { 
+       funcctx = SRF_FIRSTCALL_INIT();
+       setup_firstcall(funcctx, PG_GETARG_OID(0) );
+   }
+
+   funcctx = SRF_PERCALL_SETUP();
+
+   if (  (result=process_call(funcctx)) != (Datum)0 )
+       SRF_RETURN_NEXT(funcctx, result);
+   SRF_RETURN_DONE(funcctx);
+}
+
+PG_FUNCTION_INFO_V1(token_type_byname);
+Datum token_type_byname(PG_FUNCTION_ARGS);
+Datum
+token_type_byname(PG_FUNCTION_ARGS) {
+   FuncCallContext  *funcctx;
+   Datum result;
+
+   if (SRF_IS_FIRSTCALL()) {
+       text *name = PG_GETARG_TEXT_P(0); 
+       funcctx = SRF_FIRSTCALL_INIT();
+       setup_firstcall(funcctx, name2id_prs( name ) );
+       PG_FREE_IF_COPY(name,0);
+   }
+
+   funcctx = SRF_PERCALL_SETUP();
+
+   if (  (result=process_call(funcctx)) != (Datum)0 )
+       SRF_RETURN_NEXT(funcctx, result);
+   SRF_RETURN_DONE(funcctx);
+}
+
+PG_FUNCTION_INFO_V1(token_type_current);
+Datum token_type_current(PG_FUNCTION_ARGS);
+Datum
+token_type_current(PG_FUNCTION_ARGS) {
+   FuncCallContext  *funcctx;
+   Datum result;
+
+   if (SRF_IS_FIRSTCALL()) {
+       funcctx = SRF_FIRSTCALL_INIT();
+       if ( current_parser_id==InvalidOid ) 
+           current_parser_id = name2id_prs( char2text("default") );
+       setup_firstcall(funcctx, current_parser_id );
+   }
+
+   funcctx = SRF_PERCALL_SETUP();
+
+   if (  (result=process_call(funcctx)) != (Datum)0 )
+       SRF_RETURN_NEXT(funcctx, result);
+   SRF_RETURN_DONE(funcctx);
+}
+
+
+PG_FUNCTION_INFO_V1(set_curprs);
+Datum set_curprs(PG_FUNCTION_ARGS);
+Datum
+set_curprs(PG_FUNCTION_ARGS) {
+        findprs(PG_GETARG_OID(0));
+        current_parser_id=PG_GETARG_OID(0);
+        PG_RETURN_VOID();
+}
+
+PG_FUNCTION_INFO_V1(set_curprs_byname);
+Datum set_curprs_byname(PG_FUNCTION_ARGS);
+Datum
+set_curprs_byname(PG_FUNCTION_ARGS) {
+        text *name=PG_GETARG_TEXT_P(0);
+    
+        DirectFunctionCall1(
+                set_curprs,
+                ObjectIdGetDatum( name2id_prs(name) )
+        );
+        PG_FREE_IF_COPY(name, 0);
+        PG_RETURN_VOID();
+}
+
+typedef struct {
+   int type;
+   char    *lexem;
+} LexemEntry;
+
+typedef struct {
+   int cur;
+   int len;
+   LexemEntry  *list;
+} PrsStorage;
+   
+
+static void
+prs_setup_firstcall(FuncCallContext  *funcctx, int prsid, text *txt) {
+   TupleDesc            tupdesc;
+   MemoryContext     oldcontext;
+   PrsStorage  *st;
+   WParserInfo *prs = findprs(prsid); 
+   char    *lex=NULL;
+   int     llen=0, type=0; 
+
+   oldcontext = MemoryContextSwitchTo(funcctx->multi_call_memory_ctx);
+
+   st=(PrsStorage*)palloc( sizeof(PrsStorage) );
+   st->cur=0;
+   st->len=16;
+   st->list=(LexemEntry*)palloc( sizeof(LexemEntry)*st->len );
+
+   prs->prs = (void*)DatumGetPointer(
+       FunctionCall2(
+           &(prs->start_info),
+           PointerGetDatum(VARDATA(txt)),
+           Int32GetDatum(VARSIZE(txt)-VARHDRSZ)
+       )
+   );
+
+   while( ( type=DatumGetInt32(FunctionCall3(
+           &(prs->getlexeme_info),
+           PointerGetDatum(prs->prs),
+           PointerGetDatum(&lex),
+           PointerGetDatum(&llen))) ) != 0 ) {
+
+       if ( st->cur>=st->len ) {
+           st->len=2*st->len;
+           st->list=(LexemEntry*)repalloc(st->list, sizeof(LexemEntry)*st->len);
+       }
+       st->list[st->cur].lexem = palloc(llen+1);
+       memcpy( st->list[st->cur].lexem, lex, llen);
+       st->list[st->cur].lexem[llen]='\0';
+       st->list[st->cur].type=type;
+       st->cur++;
+   }
+       
+   FunctionCall1(
+       &(prs->end_info),
+       PointerGetDatum(prs->prs)
+   );
+
+   st->len=st->cur;
+   st->cur=0;
+   
+   funcctx->user_fctx = (void*)st;
+   tupdesc = RelationNameGetTupleDesc("tokenout");
+   funcctx->slot = TupleDescGetSlot(tupdesc);
+   funcctx->attinmeta = TupleDescGetAttInMetadata(tupdesc);
+   MemoryContextSwitchTo(oldcontext);
+}
+
+static Datum
+prs_process_call(FuncCallContext  *funcctx) {
+   PrsStorage  *st;
+
+   st=(PrsStorage*)funcctx->user_fctx;
+   if (  st->cur < st->len ) {
+       Datum result;
+       char* values[2];
+       char    tid[16];
+       HeapTuple    tuple;
+
+       values[0]=tid;
+       sprintf(tid,"%d",st->list[st->cur].type);
+       values[1]=st->list[st->cur].lexem;
+       tuple = BuildTupleFromCStrings(funcctx->attinmeta, values);
+       result = TupleGetDatum(funcctx->slot, tuple);
+
+       pfree(values[1]);
+       st->cur++;
+       return result;
+   } else {
+       if ( st->list ) pfree(st->list);
+       pfree(st);
+   }
+   return (Datum)0;
+}
+
+           
+
+PG_FUNCTION_INFO_V1(parse);
+Datum parse(PG_FUNCTION_ARGS);
+Datum
+parse(PG_FUNCTION_ARGS) {
+   FuncCallContext  *funcctx;
+   Datum result;
+
+   if (SRF_IS_FIRSTCALL()) {
+       text *txt = PG_GETARG_TEXT_P(1); 
+       funcctx = SRF_FIRSTCALL_INIT();
+       prs_setup_firstcall(funcctx, PG_GETARG_OID(0),txt );
+       PG_FREE_IF_COPY(txt,1);
+   }
+
+   funcctx = SRF_PERCALL_SETUP();
+
+   if (  (result=prs_process_call(funcctx)) != (Datum)0 )
+       SRF_RETURN_NEXT(funcctx, result);
+   SRF_RETURN_DONE(funcctx);
+}
+
+PG_FUNCTION_INFO_V1(parse_byname);
+Datum parse_byname(PG_FUNCTION_ARGS);
+Datum
+parse_byname(PG_FUNCTION_ARGS) {
+   FuncCallContext  *funcctx;
+   Datum result;
+
+   if (SRF_IS_FIRSTCALL()) {
+       text *name = PG_GETARG_TEXT_P(0); 
+       text *txt = PG_GETARG_TEXT_P(1); 
+       funcctx = SRF_FIRSTCALL_INIT();
+       prs_setup_firstcall(funcctx, name2id_prs( name ),txt );
+       PG_FREE_IF_COPY(name,0);
+       PG_FREE_IF_COPY(txt,1);
+   }
+
+   funcctx = SRF_PERCALL_SETUP();
+
+   if (  (result=prs_process_call(funcctx)) != (Datum)0 )
+       SRF_RETURN_NEXT(funcctx, result);
+   SRF_RETURN_DONE(funcctx);
+}
+
+
+PG_FUNCTION_INFO_V1(parse_current);
+Datum parse_current(PG_FUNCTION_ARGS);
+Datum
+parse_current(PG_FUNCTION_ARGS) {
+   FuncCallContext  *funcctx;
+   Datum result;
+
+   if (SRF_IS_FIRSTCALL()) {
+       text *txt = PG_GETARG_TEXT_P(0); 
+       funcctx = SRF_FIRSTCALL_INIT();
+       if ( current_parser_id==InvalidOid ) 
+           current_parser_id = name2id_prs( char2text("default") );
+       prs_setup_firstcall(funcctx, current_parser_id,txt );
+       PG_FREE_IF_COPY(txt,0);
+   }
+
+   funcctx = SRF_PERCALL_SETUP();
+
+   if (  (result=prs_process_call(funcctx)) != (Datum)0 )
+       SRF_RETURN_NEXT(funcctx, result);
+   SRF_RETURN_DONE(funcctx);
+}
+
+PG_FUNCTION_INFO_V1(headline);
+Datum headline(PG_FUNCTION_ARGS);
+Datum
+headline(PG_FUNCTION_ARGS) {
+   TSCfgInfo *cfg=findcfg(PG_GETARG_OID(0));
+   text       *in = PG_GETARG_TEXT_P(1);
+   QUERYTYPE  *query = (QUERYTYPE *) DatumGetPointer(PG_DETOAST_DATUM(PG_GETARG_DATUM(2)));
+   text       *opt=( PG_NARGS()>3 && PG_GETARG_POINTER(3) ) ? PG_GETARG_TEXT_P(3) : NULL;
+   HLPRSTEXT   prs;
+   text *out;
+   WParserInfo *prsobj = findprs(cfg->prs_id);
+
+   memset(&prs,0,sizeof(HLPRSTEXT));
+   prs.lenwords = 32;
+   prs.words = (HLWORD *) palloc(sizeof(HLWORD) * prs.lenwords);
+   hlparsetext(cfg, &prs, query, VARDATA(in), VARSIZE(in) - VARHDRSZ);
+
+
+   FunctionCall3(
+       &(prsobj->headline_info),
+       PointerGetDatum(&prs),
+       PointerGetDatum(opt),
+       PointerGetDatum(query)
+   );
+
+   out = genhl(&prs);
+
+   PG_FREE_IF_COPY(in,1);
+   PG_FREE_IF_COPY(query,2);
+   if ( opt ) PG_FREE_IF_COPY(opt,3);
+   pfree(prs.words);
+   pfree(prs.startsel);
+   pfree(prs.stopsel);
+
+   PG_RETURN_POINTER(out);
+}
+
+
+PG_FUNCTION_INFO_V1(headline_byname);
+Datum headline_byname(PG_FUNCTION_ARGS);
+Datum
+headline_byname(PG_FUNCTION_ARGS) {
+   text *cfg=PG_GETARG_TEXT_P(0);
+
+   Datum out=DirectFunctionCall4(
+       headline,
+       ObjectIdGetDatum(name2id_cfg( cfg ) ),
+       PG_GETARG_DATUM(1),
+       PG_GETARG_DATUM(2),
+       ( PG_NARGS()>3 ) ? PG_GETARG_DATUM(3) : PointerGetDatum(NULL)
+   );
+
+   PG_FREE_IF_COPY(cfg,0);
+   PG_RETURN_DATUM(out);   
+}
+
+PG_FUNCTION_INFO_V1(headline_current);
+Datum headline_current(PG_FUNCTION_ARGS);
+Datum
+headline_current(PG_FUNCTION_ARGS) {
+   PG_RETURN_DATUM(DirectFunctionCall4(
+       headline,
+       ObjectIdGetDatum(get_currcfg()),
+       PG_GETARG_DATUM(0),
+       PG_GETARG_DATUM(1),
+       ( PG_NARGS()>2 ) ? PG_GETARG_DATUM(2) : PointerGetDatum(NULL)
+   ));
+}
+
+
+


diff --git a/contrib/tsearch2/wparser.h b/contrib/tsearch2/wparser.h

new file mode 100644 (file)

index 0000000..a8afc56


--- /dev/null
+++ b/contrib/tsearch2/wparser.h
@@ -0,0 +1,28 @@
+#ifndef __WPARSER_H__
+#define __WPARSER_H__
+#include "postgres.h"
+#include "fmgr.h"
+
+typedef struct {
+   Oid prs_id;
+   FmgrInfo start_info;
+   FmgrInfo getlexeme_info;
+   FmgrInfo end_info;
+   FmgrInfo headline_info;
+   Oid lextype;
+   void *prs;
+} WParserInfo;
+
+void init_prs(Oid id, WParserInfo *prs);
+WParserInfo* findprs(Oid id);
+Oid name2id_prs(text *name);
+void   reset_prs(void);
+
+
+typedef struct {
+   int lexid;
+   char    *alias;
+   char    *descr;
+} LexDescr;
+
+#endif


diff --git a/contrib/tsearch2/wparser_def.c b/contrib/tsearch2/wparser_def.c

new file mode 100644 (file)

index 0000000..eec8b03


--- /dev/null
+++ b/contrib/tsearch2/wparser_def.c
@@ -0,0 +1,291 @@
+/* 
+ * default word parser 
+ * Teodor Sigaev 
+ */
+#include 
+#include 
+#include 
+
+#include "postgres.h"
+#include "utils/builtins.h"
+
+#include "dict.h"
+#include "wparser.h"
+#include "common.h"
+#include "ts_cfg.h"
+#include "wordparser/parser.h"
+#include "wordparser/deflex.h"
+
+PG_FUNCTION_INFO_V1(prsd_lextype);
+Datum prsd_lextype(PG_FUNCTION_ARGS);
+
+Datum 
+prsd_lextype(PG_FUNCTION_ARGS) {
+   LexDescr *descr=(LexDescr*)palloc(sizeof(LexDescr)*(LASTNUM+1));
+   int i;
+
+   for(i=1;i<=LASTNUM;i++) {
+       descr[i-1].lexid = i;
+       descr[i-1].alias = pstrdup(tok_alias[i]);
+       descr[i-1].descr = pstrdup(lex_descr[i]);
+   }
+   
+   descr[LASTNUM].lexid=0;
+       
+   PG_RETURN_POINTER(descr);
+}
+
+PG_FUNCTION_INFO_V1(prsd_start);
+Datum prsd_start(PG_FUNCTION_ARGS);
+Datum 
+prsd_start(PG_FUNCTION_ARGS) {
+   start_parse_str( (char*)PG_GETARG_POINTER(0), PG_GETARG_INT32(1) );
+   PG_RETURN_POINTER(NULL);
+}
+
+PG_FUNCTION_INFO_V1(prsd_getlexeme);
+Datum prsd_getlexeme(PG_FUNCTION_ARGS);
+Datum 
+prsd_getlexeme(PG_FUNCTION_ARGS) {
+   /* ParserState *p=(ParserState*)PG_GETARG_POINTER(0); */
+   char **t=(char**)PG_GETARG_POINTER(1); 
+   int *tlen=(int*)PG_GETARG_POINTER(2);
+   int  type=tsearch2_yylex();
+
+   *t = token;
+   *tlen = tokenlen;
+   PG_RETURN_INT32(type);
+}
+
+PG_FUNCTION_INFO_V1(prsd_end);
+Datum prsd_end(PG_FUNCTION_ARGS);
+Datum 
+prsd_end(PG_FUNCTION_ARGS) {
+   /* ParserState *p=(ParserState*)PG_GETARG_POINTER(0); */
+   end_parse();
+   PG_RETURN_VOID();
+}
+
+#define LEAVETOKEN(x)  ( (x)==12 )
+#define COMPLEXTOKEN(x)    ( (x)==5 || (x)==15 || (x)==16 || (x)==17 )
+#define ENDPUNCTOKEN(x)    ( (x)==12 )
+
+
+#define IDIGNORE(x) ( (x)==13 || (x)==14 || (x)==12 || (x)==23 )
+#define HLIDIGNORE(x) ( (x)==5 || (x)==13 || (x)==15 || (x)==16 || (x)==17 )
+#define NONWORDTOKEN(x)    ( (x)==12 || HLIDIGNORE(x) )
+#define NOENDTOKEN(x)  ( NONWORDTOKEN(x) || (x)==7 || (x)==8 || (x)==20 || (x)==21 || (x)==22 || IDIGNORE(x) )
+
+typedef struct {
+   HLWORD  *words;
+   int len;
+} hlCheck;
+
+static bool
+checkcondition_HL(void *checkval, ITEM *val) {
+   int i;
+   for(i=0;i<((hlCheck*)checkval)->len;i++) {
+       if ( ((hlCheck*)checkval)->words[i].item==val )
+           return true;
+   }
+   return false;
+}
+
+
+static bool
+hlCover(HLPRSTEXT *prs, QUERYTYPE *query, int *p, int *q) {
+   int i,j;
+   ITEM    *item=GETQUERY(query);
+   int pos=*p;
+   *q=0;
+   *p=0x7fffffff;
+
+   for(j=0;jsize;j++) {
+       if ( item->type != VAL ) {
+           item++;
+           continue;
+       }
+       for(i=pos;icurwords;i++) {
+           if ( prs->words[i].item == item ) {
+               if ( i>*q) 
+                   *q = i;
+               break;
+           }
+       }
+       item++;
+   }
+
+   if ( *q==0 )
+       return false;
+
+   item=GETQUERY(query);
+   for(j=0;jsize;j++) {
+       if ( item->type != VAL ) {
+           item++;
+           continue;
+       }
+       for(i=*q;i>=pos;i--) {
+           if ( prs->words[i].item == item ) {
+               if ( i<*p )
+                   *p=i;
+               break;
+           }
+       }
+       item++;
+   }   
+
+   if ( *p<=*q ) {
+       hlCheck ch={ &(prs->words[*p]), *q-*p+1 };
+       if ( TS_execute(GETQUERY(query), &ch, false, checkcondition_HL) ) { 
+           return true;
+       } else {
+           (*p)++;
+           return hlCover(prs,query,p,q);
+       }
+   }
+
+   return false;
+}
+
+PG_FUNCTION_INFO_V1(prsd_headline);
+Datum prsd_headline(PG_FUNCTION_ARGS);
+Datum 
+prsd_headline(PG_FUNCTION_ARGS) {
+   HLPRSTEXT   *prs=(HLPRSTEXT*)PG_GETARG_POINTER(0);
+   text    *opt=(text*)PG_GETARG_POINTER(1); /* can't be toasted */
+   QUERYTYPE   *query=(QUERYTYPE*)PG_GETARG_POINTER(2); /* can't be toasted */
+   /* from opt + start and and tag */
+   int min_words=15;   
+   int max_words=35;   
+   int shortword=3;    
+
+   int p=0,q=0;
+   int bestb=-1,beste=-1;
+   int bestlen=-1;
+   int pose=0, poslen, curlen;
+
+   int i;
+
+   /*config*/
+   prs->startsel=NULL;
+   prs->stopsel=NULL;
+   if ( opt ) {
+       Map *map,*mptr;
+       
+       parse_cfgdict(opt,&map);
+       mptr=map;
+
+       while(mptr && mptr->key) {
+           if ( strcasecmp(mptr->key,"MaxWords")==0 )
+               max_words=pg_atoi(mptr->value,4,1);
+           else if ( strcasecmp(mptr->key,"MinWords")==0 )
+               min_words=pg_atoi(mptr->value,4,1);
+           else if ( strcasecmp(mptr->key,"ShortWord")==0 )
+               shortword=pg_atoi(mptr->value,4,1);
+           else if ( strcasecmp(mptr->key,"StartSel")==0 )
+               prs->startsel=pstrdup(mptr->value);
+           else if ( strcasecmp(mptr->key,"StopSel")==0 )
+               prs->stopsel=pstrdup(mptr->value);
+               
+           pfree(mptr->key);
+           pfree(mptr->value);
+
+           mptr++;
+       }
+       pfree(map);
+
+       if ( min_words >= max_words )
+           elog(ERROR,"Must be MinWords < MaxWords");
+       if ( min_words<=0 )
+           elog(ERROR,"Must be MinWords > 0");
+       if ( shortword<0 )
+           elog(ERROR,"Must be ShortWord >= 0");
+   }
+
+   while( hlCover(prs,query,&p,&q) ) {
+       /* find cover len in words */
+       curlen=0;
+       poslen=0;
+       for(i=p;i<=q && curlen < max_words ; i++) {
+           if ( !NONWORDTOKEN(prs->words[i].type) ) 
+               curlen++;
+           if ( prs->words[i].item && !prs->words[i].repeated )
+               poslen++; 
+           pose=i;
+       }
+
+       if ( poslenwords[beste].type) || prs->words[beste].len <= shortword) ) { 
+           /* best already finded, so try one more cover */
+           p++;
+           continue;
+       }
+
+       if ( curlen < max_words ) { /* find good end */
+           for(i=i-1 ;icurwords && curlen
+               if ( i!=q ) {
+                   if ( !NONWORDTOKEN(prs->words[i].type) ) 
+                       curlen++;
+                   if ( prs->words[i].item && !prs->words[i].repeated )
+                       poslen++;
+               }
+               pose=i;
+               if ( NOENDTOKEN(prs->words[i].type) || prs->words[i].len <= shortword ) 
+                   continue;
+               if ( curlen>=min_words )    
+                   break;
+           }
+       } else { /* shorter cover :((( */
+           for(;curlen>min_words;i--) {
+               if ( !NONWORDTOKEN(prs->words[i].type) ) 
+                   curlen--;
+               if ( prs->words[i].item && !prs->words[i].repeated )
+                   poslen--;
+               pose=i;
+               if ( NOENDTOKEN(prs->words[i].type) || prs->words[i].len <= shortword ) 
+                   continue;
+               break;
+           }
+       }
+
+       if ( bestlen <0 || (poslen>bestlen && !(NOENDTOKEN(prs->words[pose].type) || prs->words[pose].len <= shortword)) || 
+               ( bestlen>=0 && !(NOENDTOKEN(prs->words[pose].type)  || prs->words[pose].len <= shortword) && 
+                   (NOENDTOKEN(prs->words[beste].type) || prs->words[beste].len <= shortword) ) ) {
+           bestb=p; beste=pose;
+           bestlen=poslen;
+       } 
+
+       p++;
+   }
+
+   if ( bestlen<0 ) {
+       curlen=0;
+       poslen=0;
+       for(i=0;icurwords && curlen
+           if ( !NONWORDTOKEN(prs->words[i].type) ) 
+               curlen++;
+           pose=i;
+       }
+       bestb=0; beste=pose;
+   }
+
+   for(i=bestb;i<=beste;i++) {
+       if ( prs->words[i].item )
+           prs->words[i].selected=1;
+       if ( prs->words[i].repeated )
+           prs->words[i].skip=1;
+       if ( HLIDIGNORE(prs->words[i].type) )
+           prs->words[i].replace=1;
+
+       prs->words[i].in=1;
+   }
+
+   if (!prs->startsel)
+       prs->startsel=pstrdup("");

+   if (!prs->stopsel)
+       prs->stopsel=pstrdup("");
+        prs->startsellen=strlen(prs->startsel);
+   prs->stopsellen=strlen(prs->stopsel);
+
+   PG_RETURN_POINTER(prs);
+}
+




This is the main PostgreSQL git repository.
RSS
Atom
+       int lexid = DatumGetInt32(SPI_getbinval(SPI_tuptable->vals[i], SPI_tuptable->tupdesc, 1, &isnull));
+       ArrayType *toasted_a = (ArrayType*)PointerGetDatum(SPI_getbinval(SPI_tuptable->vals[i], SPI_tuptable->tupdesc, 2, &isnull));
+       ArrayType *a;
+
+       if ( !cfg->map ) {
+           cfg->len=lexid+1;
+           cfg->map = (ListDictionary*)malloc( sizeof(ListDictionary)*cfg->len );
+           if ( !cfg->map )
+               ts_error(ERROR,"No memory");
+           memset( cfg->map, 0, sizeof(ListDictionary)*cfg->len );
+       }
+
+       if (isnull)
+           continue;
+
+       a=(ArrayType*)PointerGetDatum( PG_DETOAST_DATUM( DatumGetPointer(toasted_a) ) );
+       
+       if ( ARR_NDIM(a) != 1 )
+           ts_error(ERROR,"Wrong dimension");
+       if ( ARRNELEMS(a) < 1 )
+           continue;
+
+       cfg->map[lexid].len=ARRNELEMS(a);
+       cfg->map[lexid].dict_id=(Datum*)malloc( sizeof(Datum)*cfg->map[lexid].len );
+       memset(cfg->map[lexid].dict_id,0,sizeof(Datum)*cfg->map[lexid].len );
+       ptr=(text*)ARR_DATA_PTR(a);
+       oldcontext = MemoryContextSwitchTo(TopMemoryContext);
+       for(j=0;jmap[lexid].len;j++) {
+           cfg->map[lexid].dict_id[j] = PointerGetDatum(ptextdup(ptr));
+           ptr=NEXTVAL(ptr);
+       } 
+       MemoryContextSwitchTo(oldcontext);
+
+       if ( a != toasted_a ) 
+           pfree(a);
+   }
+   
+   SPI_finish();
+   cfg->prs_id = name2id_prs( prsname );
+   pfree(prsname);
+   for(i=0;ilen;i++) {
+       for(j=0;jmap[i].len;j++) {
+           ptr = (text*)DatumGetPointer( cfg->map[i].dict_id[j] );
+           cfg->map[i].dict_id[j] = ObjectIdGetDatum( name2id_dict(ptr) );
+           pfree(ptr);
+       }
+   }
+}
+
+typedef struct {
+   TSCfgInfo   *last_cfg;
+   int     len;
+   int     reallen;
+   TSCfgInfo   *list;
+   SNMap       name2id_map;
+} CFGList;
+
+static CFGList CList = {NULL,0,0,NULL,{0,0,NULL}};
+
+void
+reset_cfg(void) {
+        freeSNMap( &(CList.name2id_map) );
+        if ( CList.list ) {
+       int i,j;
+       for(i=0;i
+           if ( CList.list[i].map ) {
+               for(j=0;j
+                   if ( CList.list[i].map[j].dict_id )
+                       free(CList.list[i].map[j].dict_id);
+               free( CList.list[i].map );
+           }
+                free(CList.list);
+   }
+        memset(&CList,0,sizeof(CFGList));
+}
+
+static int
+comparecfg(const void *a, const void *b) {
+   return ((TSCfgInfo*)a)->id - ((TSCfgInfo*)b)->id;
+}
+
+TSCfgInfo *
+findcfg(Oid id) {
+   /* last used cfg */
+   if ( CList.last_cfg && CList.last_cfg->id==id )
+       return CList.last_cfg;
+
+   /* already used cfg */
+   if ( CList.len != 0 ) {
+       TSCfgInfo key;
+       key.id=id;
+       CList.last_cfg = bsearch(&key, CList.list, CList.len, sizeof(TSCfgInfo), comparecfg);
+       if ( CList.last_cfg != NULL )
+           return CList.last_cfg;
+   }
+
+   /* last chance */
+   if ( CList.len==CList.reallen ) {
+       TSCfgInfo *tmp;
+       int reallen = ( CList.reallen ) ? 2*CList.reallen : 16;
+       tmp=(TSCfgInfo*)realloc(CList.list,sizeof(TSCfgInfo)*reallen);
+       if ( !tmp ) 
+           ts_error(ERROR,"No memory");
+       CList.reallen=reallen;
+       CList.list=tmp;
+   }
+   CList.last_cfg=&(CList.list[CList.len]);
+   init_cfg(id, CList.last_cfg);
+   CList.len++;
+   qsort(CList.list, CList.len, sizeof(TSCfgInfo), comparecfg);
+   return findcfg(id); /* qsort changed order!! */;
+}
+
+
+Oid
+name2id_cfg(text *name) {
+   Oid arg[1]={ TEXTOID };
+   bool isnull;
+   Datum pars[1]={ PointerGetDatum(name) };
+   int stat;
+   Oid id=findSNMap_t( &(CList.name2id_map), name );
+   
+   if ( id ) 
+       return id;
+   
+   SPI_connect();
+   if ( !plan_name2id ) {
+       plan_name2id = SPI_saveplan( SPI_prepare( "select oid from pg_ts_cfg where ts_name = $1" , 1, arg ) );
+       if ( !plan_name2id ) 
+           elog(ERROR, "SPI_prepare() failed");
+   }
+
+   stat = SPI_execp(plan_name2id, pars, " ", 1);
+   if ( stat < 0 )
+       elog (ERROR, "SPI_execp return %d", stat);
+   if ( SPI_processed > 0 ) {
+       id=DatumGetObjectId( SPI_getbinval(SPI_tuptable->vals[0], SPI_tuptable->tupdesc, 1, &isnull) );
+       if ( isnull ) 
+           elog(ERROR, "Null id for tsearch config");
+   } else 
+       elog(ERROR, "No tsearch config");
+   SPI_finish();
+   addSNMap_t( &(CList.name2id_map), name, id );
+   return id;
+}
+
+
+void 
+parsetext_v2(TSCfgInfo *cfg, PRSTEXT * prs, char *buf, int4 buflen) {
+   int type, lenlemm, i;
+   char    *lemm=NULL;
+   WParserInfo *prsobj = findprs(cfg->prs_id);
+
+   prsobj->prs=(void*)DatumGetPointer(
+       FunctionCall2(
+           &(prsobj->start_info),
+           PointerGetDatum(buf),
+           Int32GetDatum(buflen)
+       )
+   );
+
+   while( ( type=DatumGetInt32(FunctionCall3(
+           &(prsobj->getlexeme_info),
+           PointerGetDatum(prsobj->prs),
+           PointerGetDatum(&lemm),
+           PointerGetDatum(&lenlemm))) ) != 0 ) {
+
+       if ( lenlemm >= MAXSTRLEN )
+           elog(ERROR, "Word is too long");
+
+
+       if ( type >= cfg->len ) /* skip this type of lexem */
+           continue; 
+
+       for(i=0;imap[type].len;i++) {
+           DictInfo    *dict=finddict( DatumGetObjectId(cfg->map[type].dict_id[i]) );
+           char    **norms, **ptr;
+   
+           norms = ptr = (char**)DatumGetPointer(
+               FunctionCall3(
+                   &(dict->lexize_info),
+                   PointerGetDatum(dict->dictionary),
+                   PointerGetDatum(lemm),
+                   PointerGetDatum(lenlemm)
+               )
+           );
+           if ( !norms ) /* dictionary doesn't know this lexem */
+               continue;
+
+           prs->pos++; /*set pos*/
+
+           while( *ptr ) {
+               if (prs->curwords == prs->lenwords) {
+                   prs->lenwords *= 2;
+                   prs->words = (WORD *) repalloc((void *) prs->words, prs->lenwords * sizeof(WORD));
+               }
+
+               prs->words[prs->curwords].len = strlen(*ptr);
+               prs->words[prs->curwords].word = *ptr;
+               prs->words[prs->curwords].alen = 0;
+               prs->words[prs->curwords].pos.pos = LIMITPOS(prs->pos);
+               ptr++;
+               prs->curwords++;
+           }
+           pfree(norms);
+           break; /* lexem already normalized or is stop word*/
+       }
+   }
+
+   FunctionCall1(
+       &(prsobj->end_info),
+       PointerGetDatum(prsobj->prs)
+   );
+}
+
+static void
+hladdword(HLPRSTEXT * prs, char *buf, int4 buflen, int type) {
+   while (prs->curwords >= prs->lenwords) {
+       prs->lenwords *= 2;
+       prs->words = (HLWORD *) repalloc((void *) prs->words, prs->lenwords * sizeof(HLWORD));
+   }
+   memset( &(prs->words[prs->curwords]), 0, sizeof(HLWORD) ); 
+   prs->words[prs->curwords].type = (uint8)type;
+   prs->words[prs->curwords].len = buflen; 
+   prs->words[prs->curwords].word = palloc(buflen);
+   memcpy(prs->words[prs->curwords].word, buf, buflen);
+   prs->curwords++;    
+}
+
+static void
+hlfinditem(HLPRSTEXT * prs, QUERYTYPE *query, char *buf, int buflen ) {
+   int i;
+   ITEM    *item=GETQUERY(query);
+   HLWORD  *word=&( prs->words[prs->curwords-1] );
+
+   while (prs->curwords + query->size >= prs->lenwords) {
+       prs->lenwords *= 2;
+       prs->words = (HLWORD *) repalloc((void *) prs->words, prs->lenwords * sizeof(HLWORD));
+   }
+
+   for(i=0; isize; i++) { 
+       if ( item->type == VAL && item->length == buflen && strncmp( GETOPERAND(query) + item->distance, buf, buflen )==0 ) {
+           if ( word->item ) {
+               memcpy( &(prs->words[prs->curwords]), word, sizeof(HLWORD) );
+               prs->words[prs->curwords].item=item;
+               prs->words[prs->curwords].repeated=1;
+               prs->curwords++;
+           } else 
+               word->item=item;    
+       }
+       item++;
+   }
+}
+
+void 
+hlparsetext(TSCfgInfo *cfg, HLPRSTEXT * prs, QUERYTYPE *query, char *buf, int4 buflen) {
+   int type, lenlemm, i;
+   char    *lemm=NULL;
+   WParserInfo *prsobj = findprs(cfg->prs_id);
+
+   prsobj->prs=(void*)DatumGetPointer(
+       FunctionCall2(
+           &(prsobj->start_info),
+           PointerGetDatum(buf),
+           Int32GetDatum(buflen)
+       )
+   );
+
+   while( ( type=DatumGetInt32(FunctionCall3(
+           &(prsobj->getlexeme_info),
+           PointerGetDatum(prsobj->prs),
+           PointerGetDatum(&lemm),
+           PointerGetDatum(&lenlemm))) ) != 0 ) {
+
+       if ( lenlemm >= MAXSTRLEN )
+           elog(ERROR, "Word is too long");
+
+       hladdword(prs,lemm,lenlemm,type);
+
+       if ( type >= cfg->len ) 
+           continue; 
+
+       for(i=0;imap[type].len;i++) {
+           DictInfo    *dict=finddict( DatumGetObjectId(cfg->map[type].dict_id[i]) );
+           char    **norms, **ptr;
+   
+           norms = ptr = (char**)DatumGetPointer(
+               FunctionCall3(
+                   &(dict->lexize_info),
+                   PointerGetDatum(dict->dictionary),
+                   PointerGetDatum(lemm),
+                   PointerGetDatum(lenlemm)
+               )
+           );
+           if ( !norms ) /* dictionary doesn't know this lexem */
+               continue;
+
+           while( *ptr ) {
+               hlfinditem(prs,query,*ptr,strlen(*ptr));
+               pfree(*ptr);
+               ptr++;
+           }
+           pfree(norms);
+           break; /* lexem already normalized or is stop word*/
+       }
+   }
+
+   FunctionCall1(
+       &(prsobj->end_info),
+       PointerGetDatum(prsobj->prs)
+   );
+}
+
+text* 
+genhl(HLPRSTEXT * prs) {
+   text *out;
+   int len=128;
+   char *ptr;
+   HLWORD  *wrd=prs->words;
+
+   out = (text*)palloc( len );
+   ptr=((char*)out) + VARHDRSZ;
+
+   while( wrd - prs->words < prs->curwords ) {
+       while (  wrd->len + prs->stopsellen + prs->startsellen + (ptr - ((char*)out)) >= len ) {
+           int dist = ptr - ((char*)out);
+           len*= 2;
+           out = (text *) repalloc(out, len);
+           ptr=((char*)out) + dist;
+       }
+
+       if ( wrd->in && !wrd->skip && !wrd->repeated ) {
+           if ( wrd->replace ) {
+               *ptr=' ';
+               ptr++;
+           } else {
+               if (wrd->selected) {
+                   memcpy(ptr,prs->startsel,prs->startsellen);
+                   ptr+=prs->startsellen;
+               }
+               memcpy(ptr,wrd->word,wrd->len);
+               ptr+=wrd->len;
+               if (wrd->selected) {
+                   memcpy(ptr,prs->stopsel,prs->stopsellen);
+                   ptr+=prs->stopsellen;
+               }
+           }
+       }
+
+       if ( !wrd->repeated )
+           pfree(wrd->word);
+
+       wrd++;
+   }
+
+   VARATT_SIZEP(out)=ptr - ((char*)out);
+   return out; 
+}
+
+int  
+get_currcfg(void) {
+   Oid arg[1]={ TEXTOID };
+   const char *curlocale;
+   Datum pars[1];
+   bool isnull;
+   int stat;
+
+   if ( current_cfg_id > 0 )
+       return current_cfg_id;
+
+   SPI_connect();
+   if ( !plan_getcfg_bylocale ) {
+       plan_getcfg_bylocale=SPI_saveplan( SPI_prepare( "select oid from pg_ts_cfg where locale = $1 ", 1, arg ) );
+       if ( !plan_getcfg_bylocale )
+           elog(ERROR, "SPI_prepare() failed");
+   }
+
+   curlocale = setlocale(LC_CTYPE, NULL);
+   pars[0] = PointerGetDatum( char2text((char*)curlocale) );
+   stat = SPI_execp(plan_getcfg_bylocale, pars, " ", 1);
+
+   if ( stat < 0 )
+       elog (ERROR, "SPI_execp return %d", stat);
+   if ( SPI_processed > 0 )
+       current_cfg_id = DatumGetObjectId( SPI_getbinval(SPI_tuptable->vals[0], SPI_tuptable->tupdesc, 1, &isnull) );
+   else 
+       elog(ERROR,"Can't find tsearch config by locale");
+
+   pfree(DatumGetPointer(pars[0]));
+   SPI_finish();
+   return current_cfg_id;
+}
+
+PG_FUNCTION_INFO_V1(set_curcfg);
+Datum set_curcfg(PG_FUNCTION_ARGS);
+Datum
+set_curcfg(PG_FUNCTION_ARGS) {
+        findcfg(PG_GETARG_OID(0));
+        current_cfg_id=PG_GETARG_OID(0);
+        PG_RETURN_VOID();
+}
+                
+PG_FUNCTION_INFO_V1(set_curcfg_byname);
+Datum set_curcfg_byname(PG_FUNCTION_ARGS);
+Datum
+set_curcfg_byname(PG_FUNCTION_ARGS) {
+        text *name=PG_GETARG_TEXT_P(0);
+   
+        DirectFunctionCall1(
+                set_curcfg,
+                ObjectIdGetDatum( name2id_cfg(name) )
+        );
+        PG_FREE_IF_COPY(name, 0);
+        PG_RETURN_VOID();      
+}       
+
+PG_FUNCTION_INFO_V1(show_curcfg);
+Datum show_curcfg(PG_FUNCTION_ARGS);
+Datum
+show_curcfg(PG_FUNCTION_ARGS) {
+   PG_RETURN_OID( get_currcfg() ); 
+}
+
+PG_FUNCTION_INFO_V1(reset_tsearch);
+Datum reset_tsearch(PG_FUNCTION_ARGS);
+Datum
+reset_tsearch(PG_FUNCTION_ARGS) {
+   ts_error(NOTICE,"TSearch cache cleaned");
+   PG_RETURN_VOID(); 
+}


diff --git a/contrib/tsearch2/ts_cfg.h b/contrib/tsearch2/ts_cfg.h

new file mode 100644 (file)

index 0000000..01006c1


--- /dev/null
+++ b/contrib/tsearch2/ts_cfg.h
@@ -0,0 +1,68 @@
+#ifndef __TS_CFG_H__
+#define __TS_CFG_H__
+#include "postgres.h"
+#include "query.h"
+
+typedef struct {
+   int len;
+   Datum   *dict_id;
+} ListDictionary;
+
+typedef struct {
+   Oid id;
+   Oid prs_id;
+   int len;
+   ListDictionary  *map;   
+}  TSCfgInfo;
+
+Oid name2id_cfg(text *name);
+TSCfgInfo * findcfg(Oid id);
+void init_cfg(Oid id, TSCfgInfo *cfg);
+void reset_cfg(void);
+
+typedef struct {
+        uint16          len;
+   union {
+       uint16      pos;
+       uint16      *apos;
+   } pos;
+        char       *word;
+   uint32  alen;
+}       WORD;
+   
+typedef struct {
+        WORD       *words;
+        int4            lenwords;
+        int4            curwords;
+   int4        pos;
+}       PRSTEXT;
+
+typedef struct {
+        uint16    len;
+   uint8    selected:1,
+         in:1,
+         skip:1,
+         replace:1,
+         repeated:1;
+   uint8   type;
+        char      *word;
+   ITEM      *item;
+}       HLWORD;
+   
+typedef struct {
+        HLWORD       *words;
+        int4            lenwords;
+        int4            curwords;
+        char           *startsel;
+        char            *stopsel;
+        int2            startsellen;
+        int2            stopsellen;
+}       HLPRSTEXT;
+
+void hlparsetext(TSCfgInfo *cfg, HLPRSTEXT * prs, QUERYTYPE *query, char *buf, int4 buflen);
+text* genhl(HLPRSTEXT * prs);
+
+void parsetext_v2(TSCfgInfo *cfg, PRSTEXT * prs, char *buf, int4 buflen);
+int  get_currcfg(void);
+
+#endif


diff --git a/contrib/tsearch2/ts_stat.c b/contrib/tsearch2/ts_stat.c

new file mode 100644 (file)

index 0000000..9099981


--- /dev/null
+++ b/contrib/tsearch2/ts_stat.c
@@ -0,0 +1,412 @@
+/*
+ * stat functions
+ */
+
+#include "tsvector.h"
+#include "ts_stat.h"
+#include "funcapi.h"
+#include "catalog/pg_type.h"
+#include "executor/spi.h"
+#include "common.h"
+
+PG_FUNCTION_INFO_V1(tsstat_in);
+Datum           tsstat_in(PG_FUNCTION_ARGS);
+Datum           
+tsstat_in(PG_FUNCTION_ARGS) {
+   tsstat *stat=palloc(STATHDRSIZE);
+   stat->len=STATHDRSIZE;
+   stat->size=0;
+   PG_RETURN_POINTER(stat);
+}
+
+PG_FUNCTION_INFO_V1(tsstat_out);
+Datum           tsstat_out(PG_FUNCTION_ARGS);
+Datum           
+tsstat_out(PG_FUNCTION_ARGS) {
+   elog(ERROR,"Unimplemented");
+   PG_RETURN_NULL();
+}
+
+static WordEntry**
+SEI_realloc( WordEntry** in, uint32 *len ) {
+   if ( *len==0 || in==NULL ) {
+       *len=8;
+       in=palloc( sizeof(WordEntry*)* (*len) );
+   } else {
+       *len *= 2;
+       in=repalloc( in, sizeof(WordEntry*)* (*len) );
+   }
+   return in;
+}
+
+static int
+compareStatWord(StatEntry *a, WordEntry *b, tsstat *stat, tsvector *txt) {
+   if ( a->len == b->len ) 
+       return strncmp(
+           STATSTRPTR(stat) + a->pos,
+           STRPTR(txt) + b->pos,
+           a->len
+       );
+   return ( a->len > b->len ) ? 1 : -1;
+}
+
+static tsstat*
+formstat(tsstat *stat, tsvector *txt, WordEntry** entry, uint32 len) {
+   tsstat  *newstat;
+   uint32 totallen, nentry;
+   uint32  slen=0;
+   WordEntry   **ptr=entry;
+   char    *curptr;
+   StatEntry   *sptr,*nptr;
+
+   while(ptr-entry
+       slen += (*ptr)->len;
+       ptr++;
+   }
+
+   nentry=stat->size + len;
+   slen+=STATSTRSIZE(stat);
+   totallen=CALCSTATSIZE(nentry,slen);
+   newstat=palloc(totallen);
+   newstat->len=totallen;
+   newstat->size=nentry;
+
+   memcpy(STATSTRPTR(newstat), STATSTRPTR(stat), STATSTRSIZE(stat));
+   curptr=STATSTRPTR(newstat) + STATSTRSIZE(stat);
+
+   ptr=entry;
+   sptr=STATPTR(stat);
+   nptr=STATPTR(newstat);
+
+   if ( len == 1 ) {
+       StatEntry *StopLow = STATPTR(stat);
+       StatEntry *StopHigh = (StatEntry*)STATSTRPTR(stat);
+
+       while (StopLow < StopHigh) {
+           sptr=StopLow + (StopHigh - StopLow) / 2;
+           if ( compareStatWord(sptr,*ptr,stat,txt) < 0 )
+               StopLow = sptr + 1;
+           else
+               StopHigh = sptr; 
+       }
+       nptr =STATPTR(newstat) + (StopLow-STATPTR(stat));
+       memcpy( STATPTR(newstat), STATPTR(stat), sizeof(StatEntry) * (StopLow-STATPTR(stat)) );
+       nptr->nentry=POSDATALEN(txt,*ptr);
+       if ( nptr->nentry==0 )
+               nptr->nentry=1; 
+       nptr->ndoc=1;
+       nptr->len=(*ptr)->len;
+       memcpy(curptr, STRPTR(txt) + (*ptr)->pos, nptr->len);
+       nptr->pos = curptr - STATSTRPTR(newstat);
+       memcpy( nptr+1, StopLow, sizeof(StatEntry) * ( ((StatEntry*)STATSTRPTR(stat))-StopLow ) );
+   } else {
+       while( sptr-STATPTR(stat) < stat->size && ptr-entry
+           if ( compareStatWord(sptr,*ptr,stat,txt) < 0 ) {
+               memcpy(nptr, sptr, sizeof(StatEntry));
+               sptr++;
+           } else {
+               nptr->nentry=POSDATALEN(txt,*ptr);
+               if ( nptr->nentry==0 )
+                   nptr->nentry=1; 
+               nptr->ndoc=1;
+               nptr->len=(*ptr)->len;
+               memcpy(curptr, STRPTR(txt) + (*ptr)->pos, nptr->len);
+               nptr->pos = curptr - STATSTRPTR(newstat);
+               curptr += nptr->len;
+               ptr++;
+           }
+           nptr++;
+       }
+
+       memcpy( nptr, sptr, sizeof(StatEntry)*( stat->size - (sptr-STATPTR(stat)) ) ); 
+       
+       while(ptr-entry
+           nptr->nentry=POSDATALEN(txt,*ptr);
+           if ( nptr->nentry==0 )
+               nptr->nentry=1; 
+           nptr->ndoc=1;
+           nptr->len=(*ptr)->len;
+           memcpy(curptr, STRPTR(txt) + (*ptr)->pos, nptr->len);
+           nptr->pos = curptr - STATSTRPTR(newstat);
+           curptr += nptr->len;
+           ptr++; nptr++;
+       }
+   }
+
+   return newstat;
+} 
+
+PG_FUNCTION_INFO_V1(ts_accum);
+Datum           ts_accum(PG_FUNCTION_ARGS);
+Datum 
+ts_accum(PG_FUNCTION_ARGS) {
+   tsstat *newstat,*stat= (tsstat*)PG_GETARG_POINTER(0);
+   tsvector  *txt = (tsvector *) PG_DETOAST_DATUM(PG_GETARG_DATUM(1));
+   WordEntry   **newentry=NULL;
+   uint32  len=0, cur=0;
+   StatEntry   *sptr;
+   WordEntry   *wptr;
+
+   if ( stat==NULL || PG_ARGISNULL(0) ) { /* Init in first */ 
+       stat=palloc(STATHDRSIZE);
+       stat->len=STATHDRSIZE;
+       stat->size=0;
+   }
+
+   /* simple check of correctness */
+   if ( txt==NULL || PG_ARGISNULL(1) || txt->size==0 ) {
+       PG_FREE_IF_COPY(txt,1); 
+       PG_RETURN_POINTER(stat);
+   }
+
+   sptr=STATPTR(stat);
+   wptr=ARRPTR(txt);
+
+   if ( stat->size < 100*txt->size ) { /* merge */
+       while( sptr-STATPTR(stat) < stat->size && wptr-ARRPTR(txt) < txt->size ) {
+           int cmp = compareStatWord(sptr,wptr,stat,txt);
+           if ( cmp<0 ) {
+               sptr++;
+           } else if ( cmp==0 ) {
+               int n=POSDATALEN(txt,wptr);
+   
+               if (n==0) n=1;
+               sptr->ndoc++;
+               sptr->nentry +=n ;
+               sptr++; wptr++;
+           } else {
+               if ( cur==len )
+                   newentry=SEI_realloc(newentry, &len);
+               newentry[cur]=wptr;
+               wptr++; cur++;
+           }
+       }
+
+       while( wptr-ARRPTR(txt) < txt->size ) {
+           if ( cur==len )
+               newentry=SEI_realloc(newentry, &len);
+           newentry[cur]=wptr;
+           wptr++; cur++;
+       }
+   } else { /* search */
+       while( wptr-ARRPTR(txt) < txt->size ) {
+           StatEntry *StopLow = STATPTR(stat);
+           StatEntry *StopHigh = (StatEntry*)STATSTRPTR(stat);
+           int cmp;
+
+           while (StopLow < StopHigh) {
+               sptr=StopLow + (StopHigh - StopLow) / 2;
+               cmp =  compareStatWord(sptr,wptr,stat,txt);
+               if (cmp==0) {
+                   int n=POSDATALEN(txt,wptr);
+                   if (n==0) n=1;
+                   sptr->ndoc++;
+                   sptr->nentry +=n ;
+                   break;
+               } else if ( cmp < 0 )
+                   StopLow = sptr + 1;
+               else
+                   StopHigh = sptr; 
+           }
+       
+           if ( StopLow >= StopHigh ) { /* not found */
+               if ( cur==len )
+                   newentry=SEI_realloc(newentry, &len);
+               newentry[cur]=wptr;
+               cur++;
+           }
+           wptr++;
+       }   
+   }
+
+   
+   if ( cur==0 ) { /* no new words */ 
+       PG_FREE_IF_COPY(txt,1);
+       PG_RETURN_POINTER(stat);
+   }
+
+   newstat = formstat(stat, txt, newentry, cur);
+   pfree(newentry);
+   PG_FREE_IF_COPY(txt,1);
+   /* pfree(stat); */
+
+   PG_RETURN_POINTER(newstat);
+}
+
+typedef struct {
+   uint32  cur;
+   tsvector *stat;
+} StatStorage;
+
+static void
+ts_setup_firstcall(FuncCallContext  *funcctx, tsstat *stat) {
+   TupleDesc            tupdesc;
+   MemoryContext     oldcontext;
+   StatStorage     *st;
+   
+   oldcontext = MemoryContextSwitchTo(funcctx->multi_call_memory_ctx);
+   st=palloc( sizeof(StatStorage) );
+   st->cur=0;
+   st->stat=palloc( stat->len );
+   memcpy(st->stat, stat, stat->len);
+   funcctx->user_fctx = (void*)st;
+   tupdesc = RelationNameGetTupleDesc("statinfo");
+   funcctx->slot = TupleDescGetSlot(tupdesc);
+   funcctx->attinmeta = TupleDescGetAttInMetadata(tupdesc);
+   MemoryContextSwitchTo(oldcontext);
+}
+
+
+static Datum
+ts_process_call(FuncCallContext  *funcctx) {
+   StatStorage     *st;
+   st=(StatStorage*)funcctx->user_fctx;
+
+   if ( st->cur < st->stat->size ) {
+       Datum result;
+       char* values[3];
+       char    ndoc[16];
+       char    nentry[16];
+       StatEntry *entry=STATPTR(st->stat) + st->cur;
+       HeapTuple    tuple;
+
+       values[1]=ndoc;
+       sprintf(ndoc,"%d",entry->ndoc);
+       values[2]=nentry;
+       sprintf(nentry,"%d",entry->nentry);
+       values[0]=palloc( entry->len+1 );
+       memcpy( values[0], STATSTRPTR(st->stat)+entry->pos, entry->len);
+       (values[0])[entry->len]='\0';
+
+       tuple = BuildTupleFromCStrings(funcctx->attinmeta, values);
+       result = TupleGetDatum(funcctx->slot, tuple);
+
+       pfree(values[0]);
+       st->cur++;
+       return result;  
+   } else {
+       pfree(st->stat);
+       pfree(st);
+   }
+   
+   return (Datum)0;
+}
+
+PG_FUNCTION_INFO_V1(ts_accum_finish);
+Datum           ts_accum_finish(PG_FUNCTION_ARGS);
+Datum 
+ts_accum_finish(PG_FUNCTION_ARGS) {
+   FuncCallContext  *funcctx;
+   Datum result;
+
+   if (SRF_IS_FIRSTCALL()) {
+       funcctx = SRF_FIRSTCALL_INIT();
+       ts_setup_firstcall(funcctx, (tsstat*)PG_GETARG_POINTER(0) );
+   }
+
+   funcctx = SRF_PERCALL_SETUP();
+   if (  (result=ts_process_call(funcctx)) != (Datum)0 )
+       SRF_RETURN_NEXT(funcctx, result);
+   SRF_RETURN_DONE(funcctx);
+}
+
+static Oid tiOid=InvalidOid;
+static void 
+get_ti_Oid(void) {
+   int ret;
+   bool isnull; 
+
+   if ( (ret = SPI_exec("select oid from pg_type where typname='tsvector'",1)) < 0 )   
+       elog(ERROR, "SPI_exec to get tsvector oid returns %d", ret);
+
+   if ( SPI_processed<0 )
+       elog(ERROR, "There is no tsvector type");
+   tiOid = DatumGetObjectId( SPI_getbinval(SPI_tuptable->vals[0], SPI_tuptable->tupdesc, 1, &isnull) );
+   if ( tiOid==InvalidOid )
+       elog(ERROR, "tsvector type has InvalidOid");
+}
+
+static tsstat*
+ts_stat_sql(text *txt) {
+   char *query=text2char(txt);
+   int i;
+   tsstat *newstat,*stat;
+   bool isnull;
+   Portal portal;
+   void    *plan;
+
+   if ( tiOid==InvalidOid ) 
+       get_ti_Oid();
+
+   if ( (plan = SPI_prepare(query,0,NULL))==NULL )
+       elog(ERROR, "SPI_prepare('%s') returns NULL",query);
+
+   if ( (portal = SPI_cursor_open(NULL, plan, NULL, NULL)) == NULL )
+       elog(ERROR, "SPI_cursor_open('%s') returns NULL",query);
+
+   SPI_cursor_fetch(portal, true, 100);
+
+   if ( SPI_tuptable->tupdesc->natts != 1 )
+       elog(ERROR, "Number of fields doesn't equal to 1");
+
+   if ( SPI_gettypeid(SPI_tuptable->tupdesc, 1) != tiOid )
+       elog(ERROR, "Column isn't of tsvector type");
+
+   stat=palloc(STATHDRSIZE);
+   stat->len=STATHDRSIZE;
+   stat->size=0;
+
+   while(SPI_processed>0) {
+       for(i=0;i
+           Datum data=SPI_getbinval(SPI_tuptable->vals[i], SPI_tuptable->tupdesc, 1, &isnull);
+
+           if ( !isnull ) {
+               newstat = (tsstat*)DatumGetPointer(DirectFunctionCall2(
+                   ts_accum,
+                   PointerGetDatum(stat),
+                   data
+               ));
+               if ( stat!=newstat && stat )
+                   pfree(stat);
+               stat=newstat;
+           }
+       } 
+
+       SPI_freetuptable(SPI_tuptable);
+       SPI_cursor_fetch(portal, true, 100);        
+   }   
+
+   SPI_freetuptable(SPI_tuptable);
+   SPI_cursor_close(portal);
+   SPI_freeplan(plan);
+   pfree(query);
+
+   return stat;    
+}
+
+PG_FUNCTION_INFO_V1(ts_stat);
+Datum           ts_stat(PG_FUNCTION_ARGS);
+Datum 
+ts_stat(PG_FUNCTION_ARGS) {
+   FuncCallContext  *funcctx;
+   Datum result;
+
+   if (SRF_IS_FIRSTCALL()) {
+       tsstat *stat;
+       text    *txt=PG_GETARG_TEXT_P(0);
+   
+       funcctx = SRF_FIRSTCALL_INIT();
+       SPI_connect();
+       stat = ts_stat_sql(txt);
+       PG_FREE_IF_COPY(txt,0); 
+       ts_setup_firstcall(funcctx, stat );
+       SPI_finish();
+   }
+
+   funcctx = SRF_PERCALL_SETUP();
+   if (  (result=ts_process_call(funcctx)) != (Datum)0 )
+       SRF_RETURN_NEXT(funcctx, result);
+   SRF_RETURN_DONE(funcctx);
+}
+
+


diff --git a/contrib/tsearch2/ts_stat.h b/contrib/tsearch2/ts_stat.h

new file mode 100644 (file)

index 0000000..c32b17a


--- /dev/null
+++ b/contrib/tsearch2/ts_stat.h
@@ -0,0 +1,32 @@
+#ifndef __TXTIDX_STAT_H__
+#define __TXTIDX_STAT_H__
+
+#include "postgres.h"
+
+#include "access/gist.h"
+#include "access/itup.h"
+#include "utils/elog.h"
+#include "utils/palloc.h"
+#include "utils/builtins.h"
+#include "storage/bufpage.h"
+
+typedef struct {
+   uint32  len;
+   uint32  pos;
+   uint32  ndoc;   
+   uint32  nentry; 
+}  StatEntry;
+
+typedef struct {
+   int4        len;
+   int4        size;
+   char        data[1];
+}  tsstat;
+
+#define STATHDRSIZE (sizeof(int4)*2)
+#define CALCSTATSIZE(x, lenstr) ( x * sizeof(StatEntry) + STATHDRSIZE + lenstr )
+#define STATPTR(x) ( (StatEntry*) ( (char*)x + STATHDRSIZE ) )
+#define STATSTRPTR(x)  ( (char*)x + STATHDRSIZE + ( sizeof(StatEntry) * ((tsvector*)x)->size ) )
+#define STATSTRSIZE(x) ( ((tsvector*)x)->len - STATHDRSIZE - ( sizeof(StatEntry) * ((tsvector*)x)->size ) )
+
+#endif


diff --git a/contrib/tsearch2/tsearch.sql._in b/contrib/tsearch2/tsearch.sql._in

new file mode 100644 (file)

index 0000000..91ffbc8


--- /dev/null
+++ b/contrib/tsearch2/tsearch.sql._in
@@ -0,0 +1,674 @@
+-- Adjust this setting to control where the objects get CREATEd.
+SET search_path = public;
+
+BEGIN;
+
+--dict conf
+CREATE TABLE pg_ts_dict (
+   dict_name   text not null primary key,
+   dict_init   oid,
+   dict_initoption text,
+   dict_lexize oid not null,
+   dict_comment    text
+) with oids;
+
+--dict interface
+CREATE FUNCTION lexize(oid, text) 
+   returns _text
+   as 'MODULE_PATHNAME'
+   language 'C'
+   with (isstrict);
+
+CREATE FUNCTION lexize(text, text)
+        returns _text
+        as 'MODULE_PATHNAME', 'lexize_byname'
+        language 'C'
+        with (isstrict);
+
+CREATE FUNCTION lexize(text)
+        returns _text
+        as 'MODULE_PATHNAME', 'lexize_bycurrent'
+        language 'C'
+        with (isstrict);
+
+CREATE FUNCTION set_curdict(int)
+   returns void
+   as 'MODULE_PATHNAME'
+   language 'C'
+   with (isstrict);
+
+CREATE FUNCTION set_curdict(text)
+   returns void
+   as 'MODULE_PATHNAME', 'set_curdict_byname'
+   language 'C'
+   with (isstrict);
+
+--built-in dictionaries
+CREATE FUNCTION dex_init(text)
+   returns internal
+   as 'MODULE_PATHNAME' 
+   language 'C';
+
+CREATE FUNCTION dex_lexize(internal,internal,int4)
+   returns internal
+   as 'MODULE_PATHNAME'
+   language 'C'
+   with (isstrict);
+
+insert into pg_ts_dict select 
+   'simple', 
+   (select oid from pg_proc where proname='dex_init'),
+   null,
+   (select oid from pg_proc where proname='dex_lexize'),
+   'Simple example of dictionary.'
+;
+    
+CREATE FUNCTION snb_en_init(text)
+   returns internal
+   as 'MODULE_PATHNAME' 
+   language 'C';
+
+CREATE FUNCTION snb_lexize(internal,internal,int4)
+   returns internal
+   as 'MODULE_PATHNAME'
+   language 'C'
+   with (isstrict);
+
+insert into pg_ts_dict select 
+   'en_stem', 
+   (select oid from pg_proc where proname='snb_en_init'),
+   'DATA_PATH/english.stop',
+   (select oid from pg_proc where proname='snb_lexize'),
+   'English Stemmer. Snowball.'
+;
+
+CREATE FUNCTION snb_ru_init(text)
+   returns internal
+   as 'MODULE_PATHNAME' 
+   language 'C';
+
+insert into pg_ts_dict select 
+   'ru_stem', 
+   (select oid from pg_proc where proname='snb_ru_init'),
+   'DATA_PATH/russian.stop',
+   (select oid from pg_proc where proname='snb_lexize'),
+   'Russian Stemmer. Snowball.'
+;
+    
+CREATE FUNCTION spell_init(text)
+   returns internal
+   as 'MODULE_PATHNAME' 
+   language 'C';
+
+CREATE FUNCTION spell_lexize(internal,internal,int4)
+   returns internal
+   as 'MODULE_PATHNAME'
+   language 'C'
+   with (isstrict);
+
+insert into pg_ts_dict select 
+   'ispell_template', 
+   (select oid from pg_proc where proname='spell_init'),
+   null,
+   (select oid from pg_proc where proname='spell_lexize'),
+   'ISpell interface. Must have .dict and .aff files'
+;
+
+CREATE FUNCTION syn_init(text)
+   returns internal
+   as 'MODULE_PATHNAME' 
+   language 'C';
+
+CREATE FUNCTION syn_lexize(internal,internal,int4)
+   returns internal
+   as 'MODULE_PATHNAME'
+   language 'C'
+   with (isstrict);
+
+insert into pg_ts_dict select 
+   'synonym', 
+   (select oid from pg_proc where proname='syn_init'),
+   null,
+   (select oid from pg_proc where proname='syn_lexize'),
+   'Example of synonym dictionary'
+;
+
+--dict conf
+CREATE TABLE pg_ts_parser (
+   prs_name    text not null primary key,
+   prs_start   oid not null,
+   prs_nexttoken   oid not null,
+   prs_end     oid not null,
+   prs_headline    oid not null,
+   prs_lextype oid not null,
+   prs_comment text
+) with oids;
+
+--sql-level interface
+CREATE TYPE tokentype 
+   as (tokid int4, alias text, descr text); 
+
+CREATE FUNCTION token_type(int4)
+   returns setof tokentype
+   as 'MODULE_PATHNAME'
+   language 'C'
+   with (isstrict);
+
+CREATE FUNCTION token_type(text)
+   returns setof tokentype
+   as 'MODULE_PATHNAME', 'token_type_byname'
+   language 'C'
+   with (isstrict);
+
+CREATE FUNCTION token_type()
+   returns setof tokentype
+   as 'MODULE_PATHNAME', 'token_type_current'
+   language 'C'
+   with (isstrict);
+
+CREATE FUNCTION set_curprs(int)
+   returns void
+   as 'MODULE_PATHNAME'
+   language 'C'
+   with (isstrict);
+
+CREATE FUNCTION set_curprs(text)
+   returns void
+   as 'MODULE_PATHNAME', 'set_curprs_byname'
+   language 'C'
+   with (isstrict);
+
+CREATE TYPE tokenout 
+   as (tokid int4, token text);
+
+CREATE FUNCTION parse(oid,text)
+   returns setof tokenout
+   as 'MODULE_PATHNAME'
+   language 'C'
+   with (isstrict);
+ 
+CREATE FUNCTION parse(text,text)
+   returns setof tokenout
+   as 'MODULE_PATHNAME', 'parse_byname'
+   language 'C'
+   with (isstrict);
+ 
+CREATE FUNCTION parse(text)
+   returns setof tokenout
+   as 'MODULE_PATHNAME', 'parse_current'
+   language 'C'
+   with (isstrict);
+ 
+--default parser
+CREATE FUNCTION prsd_start(internal,int4)
+   returns internal
+   as 'MODULE_PATHNAME'
+   language 'C';
+
+CREATE FUNCTION prsd_getlexeme(internal,internal,internal)
+   returns int4
+   as 'MODULE_PATHNAME'
+   language 'C';
+
+CREATE FUNCTION prsd_end(internal)
+   returns void
+   as 'MODULE_PATHNAME'
+   language 'C';
+
+CREATE FUNCTION prsd_lextype(internal)
+   returns internal
+   as 'MODULE_PATHNAME'
+   language 'C';
+
+CREATE FUNCTION prsd_headline(internal,internal,internal)
+   returns internal
+   as 'MODULE_PATHNAME'
+   language 'C';
+
+insert into pg_ts_parser select
+   'default',
+   (select oid from pg_proc where proname='prsd_start'),   
+   (select oid from pg_proc where proname='prsd_getlexeme'),   
+   (select oid from pg_proc where proname='prsd_end'), 
+   (select oid from pg_proc where proname='prsd_headline'),
+   (select oid from pg_proc where proname='prsd_lextype'),
+   'Parser from OpenFTS v0.34'
+;  
+
+--tsearch config
+
+CREATE TABLE pg_ts_cfg (
+   ts_name     text not null primary key,
+   prs_name    text not null,
+   locale      text
+) with oids;
+
+CREATE TABLE pg_ts_cfgmap (
+   ts_name     text not null,
+   tok_alias   text not null,
+   dict_name   text[],
+   primary key (ts_name,tok_alias)
+) with oids;
+
+CREATE FUNCTION set_curcfg(int)
+   returns void
+   as 'MODULE_PATHNAME'
+   language 'C'
+   with (isstrict);
+
+CREATE FUNCTION set_curcfg(text)
+   returns void
+   as 'MODULE_PATHNAME', 'set_curcfg_byname'
+   language 'C'
+   with (isstrict);
+
+CREATE FUNCTION show_curcfg()
+   returns oid
+   as 'MODULE_PATHNAME'
+   language 'C'
+   with (isstrict);
+
+insert into pg_ts_cfg values ('default', 'default','C');
+insert into pg_ts_cfg values ('default_russian', 'default','ru_RU.KOI8-R');
+insert into pg_ts_cfg values ('simple', 'default');
+
+copy pg_ts_cfgmap from stdin;
+default    lword   {en_stem}
+default    nlword  {simple}
+default    word    {simple}
+default    email   {simple}
+default    url {simple}
+default    host    {simple}
+default    sfloat  {simple}
+default    version {simple}
+default    part_hword  {simple}
+default    nlpart_hword    {simple}
+default    lpart_hword {en_stem}
+default    hword   {simple}
+default    lhword  {en_stem}
+default    nlhword {simple}
+default    uri {simple}
+default    file    {simple}
+default    float   {simple}
+default    int {simple}
+default    uint    {simple}
+default_russian    lword   {en_stem}
+default_russian    nlword  {ru_stem}
+default_russian    word    {ru_stem}
+default_russian    email   {simple}
+default_russian    url {simple}
+default_russian    host    {simple}
+default_russian    sfloat  {simple}
+default_russian    version {simple}
+default_russian    part_hword  {simple}
+default_russian    nlpart_hword    {ru_stem}
+default_russian    lpart_hword {en_stem}
+default_russian    hword   {ru_stem}
+default_russian    lhword  {en_stem}
+default_russian    nlhword {ru_stem}
+default_russian    uri {simple}
+default_russian    file    {simple}
+default_russian    float   {simple}
+default_russian    int {simple}
+default_russian    uint    {simple}
+simple lword   {simple}
+simple nlword  {simple}
+simple word    {simple}
+simple email   {simple}
+simple url {simple}
+simple host    {simple}
+simple sfloat  {simple}
+simple version {simple}
+simple part_hword  {simple}
+simple nlpart_hword    {simple}
+simple lpart_hword {simple}
+simple hword   {simple}
+simple lhword  {simple}
+simple nlhword {simple}
+simple uri {simple}
+simple file    {simple}
+simple float   {simple}
+simple int {simple}
+simple uint    {simple}
+\.
+
+--tsvector type
+CREATE FUNCTION tsvector_in(cstring)
+RETURNS tsvector
+AS 'MODULE_PATHNAME'
+LANGUAGE 'C' with (isstrict);
+
+CREATE FUNCTION tsvector_out(tsvector)
+RETURNS cstring
+AS 'MODULE_PATHNAME'
+LANGUAGE 'C' with (isstrict);
+
+CREATE TYPE tsvector (
+        INTERNALLENGTH = -1,
+        INPUT = tsvector_in,
+        OUTPUT = tsvector_out,
+        STORAGE = extended
+);
+
+CREATE FUNCTION length(tsvector)
+RETURNS int4
+AS 'MODULE_PATHNAME', 'tsvector_length'
+LANGUAGE 'C' with (isstrict,iscachable);
+
+CREATE FUNCTION to_tsvector(oid, text)
+RETURNS tsvector
+AS 'MODULE_PATHNAME'
+LANGUAGE 'C' with (isstrict,iscachable);
+
+CREATE FUNCTION to_tsvector(text, text)
+RETURNS tsvector
+AS 'MODULE_PATHNAME', 'to_tsvector_name'
+LANGUAGE 'C' with (isstrict,iscachable);
+
+CREATE FUNCTION to_tsvector(text)
+RETURNS tsvector
+AS 'MODULE_PATHNAME', 'to_tsvector_current'
+LANGUAGE 'C' with (isstrict,iscachable);
+
+CREATE FUNCTION strip(tsvector)
+RETURNS tsvector
+AS 'MODULE_PATHNAME'
+LANGUAGE 'C' with (isstrict,iscachable);
+
+CREATE FUNCTION setweight(tsvector,"char")
+RETURNS tsvector
+AS 'MODULE_PATHNAME'
+LANGUAGE 'C' with (isstrict,iscachable);
+
+CREATE FUNCTION concat(tsvector,tsvector)
+RETURNS tsvector
+AS 'MODULE_PATHNAME'
+LANGUAGE 'C' with (isstrict,iscachable);
+
+CREATE OPERATOR || (
+        LEFTARG = tsvector,
+        RIGHTARG = tsvector,
+        PROCEDURE = concat
+);
+
+--query type
+CREATE FUNCTION tsquery_in(cstring)
+RETURNS tsquery
+AS 'MODULE_PATHNAME'
+LANGUAGE 'C' with (isstrict);
+
+CREATE FUNCTION tsquery_out(tsquery)
+RETURNS cstring
+AS 'MODULE_PATHNAME'
+LANGUAGE 'C' with (isstrict);
+
+CREATE TYPE tsquery (
+        INTERNALLENGTH = -1,
+        INPUT = tsquery_in,
+        OUTPUT = tsquery_out
+);
+
+CREATE FUNCTION querytree(tsquery)
+RETURNS text
+AS 'MODULE_PATHNAME', 'tsquerytree'
+LANGUAGE 'C' with (isstrict);
+
+CREATE FUNCTION to_tsquery(oid, text)
+RETURNS tsquery
+AS 'MODULE_PATHNAME'
+LANGUAGE 'c' with (isstrict,iscachable);
+
+CREATE FUNCTION to_tsquery(text, text)
+RETURNS tsquery
+AS 'MODULE_PATHNAME','to_tsquery_name'
+LANGUAGE 'c' with (isstrict,iscachable);
+
+CREATE FUNCTION to_tsquery(text)
+RETURNS tsquery
+AS 'MODULE_PATHNAME','to_tsquery_current'
+LANGUAGE 'c' with (isstrict,iscachable);
+
+--operations
+CREATE FUNCTION exectsq(tsvector, tsquery)
+RETURNS bool
+AS 'MODULE_PATHNAME'
+LANGUAGE 'C' with (isstrict, iscachable);
+  
+COMMENT ON FUNCTION exectsq(tsvector, tsquery) IS 'boolean operation with text index';
+
+CREATE FUNCTION rexectsq(tsquery, tsvector)
+RETURNS bool
+AS 'MODULE_PATHNAME'
+LANGUAGE 'C' with (isstrict, iscachable);
+
+COMMENT ON FUNCTION rexectsq(tsquery, tsvector) IS 'boolean operation with text index';
+
+CREATE OPERATOR @@ (
+        LEFTARG = tsvector,
+        RIGHTARG = tsquery,
+        PROCEDURE = exectsq,
+        COMMUTATOR = '@@',
+        RESTRICT = contsel,
+        JOIN = contjoinsel
+);
+CREATE OPERATOR @@ (
+        LEFTARG = tsquery,
+        RIGHTARG = tsvector,
+        PROCEDURE = rexectsq,
+        COMMUTATOR = '@@',
+        RESTRICT = contsel,
+        JOIN = contjoinsel
+);
+
+--Trigger
+CREATE FUNCTION tsearch2()
+RETURNS trigger
+AS 'MODULE_PATHNAME'
+LANGUAGE 'C';
+
+--Relevation
+CREATE FUNCTION rank(float4[], tsvector, tsquery)
+RETURNS float4
+AS 'MODULE_PATHNAME'
+LANGUAGE 'C' WITH (isstrict, iscachable);
+
+CREATE FUNCTION rank(float4[], tsvector, tsquery, int4)
+RETURNS float4
+AS 'MODULE_PATHNAME'
+LANGUAGE 'C' WITH (isstrict, iscachable);
+
+CREATE FUNCTION rank(tsvector, tsquery)
+RETURNS float4
+AS 'MODULE_PATHNAME', 'rank_def'
+LANGUAGE 'C' WITH (isstrict, iscachable);
+
+CREATE FUNCTION rank(tsvector, tsquery, int4)
+RETURNS float4
+AS 'MODULE_PATHNAME', 'rank_def'
+LANGUAGE 'C' WITH (isstrict, iscachable);
+
+CREATE FUNCTION rank_cd(int4, tsvector, tsquery)
+RETURNS float4
+AS 'MODULE_PATHNAME'
+LANGUAGE 'C' WITH (isstrict, iscachable);
+
+CREATE FUNCTION rank_cd(int4, tsvector, tsquery, int4)
+RETURNS float4
+AS 'MODULE_PATHNAME'
+LANGUAGE 'C' WITH (isstrict, iscachable);
+
+CREATE FUNCTION rank_cd(tsvector, tsquery)
+RETURNS float4
+AS 'MODULE_PATHNAME', 'rank_cd_def'
+LANGUAGE 'C' WITH (isstrict, iscachable);
+
+CREATE FUNCTION rank_cd(tsvector, tsquery, int4)
+RETURNS float4
+AS 'MODULE_PATHNAME', 'rank_cd_def'
+LANGUAGE 'C' WITH (isstrict, iscachable);
+
+CREATE FUNCTION headline(oid, text, tsquery, text)
+RETURNS text
+AS 'MODULE_PATHNAME', 'headline'
+LANGUAGE 'C' WITH (isstrict, iscachable);
+
+CREATE FUNCTION headline(oid, text, tsquery)
+RETURNS text
+AS 'MODULE_PATHNAME', 'headline'
+LANGUAGE 'C' WITH (isstrict, iscachable);
+
+CREATE FUNCTION headline(text, text, tsquery, text)
+RETURNS text
+AS 'MODULE_PATHNAME', 'headline_byname'
+LANGUAGE 'C' WITH (isstrict, iscachable);
+
+CREATE FUNCTION headline(text, text, tsquery)
+RETURNS text
+AS 'MODULE_PATHNAME', 'headline_byname'
+LANGUAGE 'C' WITH (isstrict, iscachable);
+
+CREATE FUNCTION headline(text, tsquery, text)
+RETURNS text
+AS 'MODULE_PATHNAME', 'headline_current'
+LANGUAGE 'C' WITH (isstrict, iscachable);
+
+CREATE FUNCTION headline(text, tsquery)
+RETURNS text
+AS 'MODULE_PATHNAME', 'headline_current'
+LANGUAGE 'C' WITH (isstrict, iscachable);
+
+--GiST
+--GiST key type 
+CREATE FUNCTION gtsvector_in(cstring)
+RETURNS gtsvector
+AS 'MODULE_PATHNAME'
+LANGUAGE 'C' with (isstrict);
+
+CREATE FUNCTION gtsvector_out(gtsvector)
+RETURNS cstring
+AS 'MODULE_PATHNAME'
+LANGUAGE 'C' with (isstrict);
+
+CREATE TYPE gtsvector (
+        INTERNALLENGTH = -1,
+        INPUT = gtsvector_in,
+        OUTPUT = gtsvector_out
+);
+
+-- support FUNCTIONs
+CREATE FUNCTION gtsvector_consistent(gtsvector,internal,int4)
+RETURNS bool
+AS 'MODULE_PATHNAME'
+LANGUAGE 'C';
+  
+CREATE FUNCTION gtsvector_compress(internal)
+RETURNS internal
+AS 'MODULE_PATHNAME'
+LANGUAGE 'C';
+
+CREATE FUNCTION gtsvector_decompress(internal)
+RETURNS internal
+AS 'MODULE_PATHNAME'
+LANGUAGE 'C';
+
+CREATE FUNCTION gtsvector_penalty(internal,internal,internal)
+RETURNS internal
+AS 'MODULE_PATHNAME'
+LANGUAGE 'C' with (isstrict);
+
+CREATE FUNCTION gtsvector_picksplit(internal, internal)
+RETURNS internal
+AS 'MODULE_PATHNAME'
+LANGUAGE 'C';
+
+CREATE FUNCTION gtsvector_union(bytea, internal)
+RETURNS _int4
+AS 'MODULE_PATHNAME'
+LANGUAGE 'C';
+
+CREATE FUNCTION gtsvector_same(gtsvector, gtsvector, internal)
+RETURNS internal
+AS 'MODULE_PATHNAME'
+LANGUAGE 'C';
+
+-- CREATE the OPERATOR class
+CREATE OPERATOR CLASS gist_tsvector_ops
+DEFAULT FOR TYPE tsvector USING gist
+AS
+        OPERATOR        1       @@ (tsvector, tsquery)  RECHECK ,
+        FUNCTION        1       gtsvector_consistent (gtsvector, internal, int4),
+        FUNCTION        2       gtsvector_union (bytea, internal),
+        FUNCTION        3       gtsvector_compress (internal),
+        FUNCTION        4       gtsvector_decompress (internal),
+        FUNCTION        5       gtsvector_penalty (internal, internal, internal),
+        FUNCTION        6       gtsvector_picksplit (internal, internal),
+        FUNCTION        7       gtsvector_same (gtsvector, gtsvector, internal),
+        STORAGE         gtsvector;
+
+
+--stat info
+CREATE TYPE statinfo 
+   as (word text, ndoc int4, nentry int4);
+
+--REATE FUNCTION tsstat_in(cstring)
+--RETURNS tsstat
+--AS 'MODULE_PATHNAME'
+--LANGUAGE 'C' with (isstrict);
+--
+--CREATE FUNCTION tsstat_out(tsstat)
+--RETURNS cstring
+--AS 'MODULE_PATHNAME'
+--LANGUAGE 'C' with (isstrict);
+--
+--CREATE TYPE tsstat (
+--        INTERNALLENGTH = -1,
+--        INPUT = tsstat_in,
+--        OUTPUT = tsstat_out,
+--        STORAGE = plain
+--);
+--
+--CREATE FUNCTION ts_accum(tsstat,tsvector)
+--RETURNS tsstat
+--AS 'MODULE_PATHNAME'
+--LANGUAGE 'C' with (isstrict);
+--
+--CREATE FUNCTION ts_accum_finish(tsstat)
+-- returns setof statinfo
+-- as 'MODULE_PATHNAME'
+-- language 'C'
+-- with (isstrict);
+--
+--CREATE AGGREGATE stat (
+-- BASETYPE=tsvector,
+-- SFUNC=ts_accum,
+-- STYPE=tsstat,
+-- FINALFUNC = ts_accum_finish,
+-- initcond = ''
+--); 
+
+CREATE FUNCTION stat(text)
+   returns setof statinfo
+   as 'MODULE_PATHNAME', 'ts_stat'
+   language 'C'
+   with (isstrict);
+
+--reset - just for debuging
+CREATE FUNCTION reset_tsearch()
+        returns void
+        as 'MODULE_PATHNAME'
+        language 'C'
+        with (isstrict);
+
+--get cover (debug for rank_cd)
+CREATE FUNCTION get_covers(tsvector,tsquery)
+        returns text
+        as 'MODULE_PATHNAME'
+        language 'C'
+        with (isstrict);
+
+
+--example of ISpell dictionary
+--update pg_ts_dict set dict_initoption='DictFile="/usr/local/share/ispell/russian.dict" ,AffFile ="/usr/local/share/ispell/russian.aff", StopFile="/usr/local/share/ispell/russian.stop"' where dict_id=4;
+--example of synonym dict
+--update pg_ts_dict set dict_initoption='/usr/local/share/ispell/english.syn' where dict_id=5;
+END;


diff --git a/contrib/tsearch2/tsvector.c b/contrib/tsearch2/tsvector.c

new file mode 100644 (file)

index 0000000..ff0794d


--- /dev/null
+++ b/contrib/tsearch2/tsvector.c
@@ -0,0 +1,804 @@
+/*
+ * In/Out definitions for tsvector type
+ * Internal structure:
+ * string of values, array of position lexem in string and it's length
+ * Teodor Sigaev 
+ */
+#include "postgres.h"
+
+#include "access/gist.h"
+#include "access/itup.h"
+#include "utils/elog.h"
+#include "utils/palloc.h"
+#include "utils/builtins.h"
+#include "storage/bufpage.h"
+#include "executor/spi.h"
+#include "commands/trigger.h"
+#include "nodes/pg_list.h"
+#include "catalog/namespace.h"
+
+#include "utils/pg_locale.h"
+
+#include              /* tolower */
+#include "tsvector.h"
+#include "query.h"
+#include "ts_cfg.h"
+#include "common.h"
+
+PG_FUNCTION_INFO_V1(tsvector_in);
+Datum      tsvector_in(PG_FUNCTION_ARGS);
+
+PG_FUNCTION_INFO_V1(tsvector_out);
+Datum      tsvector_out(PG_FUNCTION_ARGS);
+
+PG_FUNCTION_INFO_V1(to_tsvector);
+Datum      to_tsvector(PG_FUNCTION_ARGS);
+PG_FUNCTION_INFO_V1(to_tsvector_current);
+Datum      to_tsvector_current(PG_FUNCTION_ARGS);
+PG_FUNCTION_INFO_V1(to_tsvector_name);
+Datum      to_tsvector_name(PG_FUNCTION_ARGS);
+
+PG_FUNCTION_INFO_V1(tsearch2);
+Datum      tsearch2(PG_FUNCTION_ARGS);
+
+PG_FUNCTION_INFO_V1(tsvector_length);
+Datum      tsvector_length(PG_FUNCTION_ARGS);
+
+/*
+ * in/out text index type
+ */
+static int 
+comparePos(const void *a, const void *b) {
+   if ( ((WordEntryPos *) a)->pos == ((WordEntryPos *) b)->pos )
+       return 1;
+   return ( ((WordEntryPos *) a)->pos > ((WordEntryPos *) b)->pos ) ? 1 : -1;
+}
+
+static int
+uniquePos(WordEntryPos *a, int4 l) {
+   WordEntryPos *ptr, *res;
+
+   res=a;
+   if (l==1)
+       return l;
+
+   qsort((void *) a, l, sizeof(WordEntryPos), comparePos);
+
+   ptr = a + 1;
+   while (ptr - a < l) {
+       if ( ptr->pos != res->pos ) {
+           res++;
+           res->pos = ptr->pos;
+           res->weight = ptr->weight;
+           if ( res-a >= MAXNUMPOS-1 || res->pos == MAXENTRYPOS-1 )
+               break;
+       } else if ( ptr->weight > res->weight )
+           res->weight = ptr->weight;
+       ptr++;
+   }
+   return res + 1 - a;
+}
+
+static char *BufferStr;
+static int
+compareentry(const void *a, const void *b)
+{
+   if ( ((WordEntryIN *) a)->entry.len == ((WordEntryIN *) b)->entry.len)
+   {
+       return strncmp(
+                      &BufferStr[((WordEntryIN *) a)->entry.pos],
+                      &BufferStr[((WordEntryIN *) b)->entry.pos],
+                      ((WordEntryIN *) a)->entry.len);
+   }
+   return ( ((WordEntryIN *) a)->entry.len > ((WordEntryIN *) b)->entry.len ) ? 1 : -1;
+}
+
+static int
+uniqueentry(WordEntryIN * a, int4 l, char *buf, int4 *outbuflen)
+{
+   WordEntryIN  *ptr,
+              *res;
+
+   res = a;
+   if (l == 1) {
+       if ( a->entry.haspos ) {
+           *(uint16*)(a->pos) = uniquePos( &(a->pos[1]), *(uint16*)(a->pos));
+           *outbuflen = SHORTALIGN(res->entry.len) + (*(uint16*)(a->pos) +1 )*sizeof(WordEntryPos);
+       }
+       return l;
+   }
+
+   ptr = a + 1;
+   BufferStr = buf;
+   qsort((void *) a, l, sizeof(WordEntryIN), compareentry);
+
+   while (ptr - a < l)
+   {
+       if (!(ptr->entry.len == res->entry.len &&
+             strncmp(&buf[ptr->entry.pos], &buf[res->entry.pos], res->entry.len) == 0))
+       {
+           if ( res->entry.haspos ) {
+               *(uint16*)(res->pos) = uniquePos( &(res->pos[1]), *(uint16*)(res->pos));
+               *outbuflen += *(uint16*)(res->pos) * sizeof(WordEntryPos);
+           }
+           *outbuflen += SHORTALIGN(res->entry.len);
+           res++;
+           memcpy(res,ptr,sizeof(WordEntryIN));
+       } else if ( ptr->entry.haspos ){
+           if ( res->entry.haspos ) {
+               int4 len=*(uint16*)(ptr->pos) + 1 + *(uint16*)(res->pos);
+               res->pos=(WordEntryPos*)repalloc( res->pos, len*sizeof(WordEntryPos));
+               memcpy( &(res->pos[ *(uint16*)(res->pos) + 1 ]), 
+                   &(ptr->pos[1]), *(uint16*)(ptr->pos) * sizeof(WordEntryPos));
+               *(uint16*)(res->pos) += *(uint16*)(ptr->pos);
+               pfree( ptr->pos );
+           } else {
+               res->entry.haspos=1;
+               res->pos = ptr->pos;
+           }
+       }
+       ptr++;
+   }
+   if ( res->entry.haspos ) {
+       *(uint16*)(res->pos) = uniquePos( &(res->pos[1]), *(uint16*)(res->pos));
+       *outbuflen += *(uint16*)(res->pos) * sizeof(WordEntryPos);
+   }
+   *outbuflen += SHORTALIGN(res->entry.len);
+
+   return res + 1 - a;
+}
+
+#define WAITWORD   1
+#define WAITENDWORD 2
+#define WAITNEXTCHAR   3
+#define WAITENDCMPLX   4
+#define WAITPOSINFO    5
+#define INPOSINFO  6
+#define WAITPOSDELIM   7
+
+#define RESIZEPRSBUF \
+do { \
+   if ( state->curpos - state->word + 1 >= state->len ) \
+   { \
+       int4 clen = state->curpos - state->word; \
+       state->len *= 2; \
+       state->word = (char*)repalloc( (void*)state->word, state->len ); \
+       state->curpos = state->word + clen; \
+   } \
+} while (0)
+
+int4
+gettoken_tsvector(TI_IN_STATE * state)
+{
+   int4        oldstate = 0;
+
+   state->curpos = state->word;
+   state->state = WAITWORD;
+   state->alen=0;
+
+   while (1)
+   {
+       if (state->state == WAITWORD)
+       {
+           if (*(state->prsbuf) == '\0')
+               return 0;
+           else if (*(state->prsbuf) == '\'')
+               state->state = WAITENDCMPLX;
+           else if (*(state->prsbuf) == '\\')
+           {
+               state->state = WAITNEXTCHAR;
+               oldstate = WAITENDWORD;
+           }
+           else if (state->oprisdelim && ISOPERATOR(*(state->prsbuf)))
+               elog(ERROR, "Syntax error");
+           else if (*(state->prsbuf) != ' ')
+           {
+               *(state->curpos) = *(state->prsbuf);
+               state->curpos++;
+               state->state = WAITENDWORD;
+           }
+       }
+       else if (state->state == WAITNEXTCHAR)
+       {
+           if (*(state->prsbuf) == '\0')
+               elog(ERROR, "There is no escaped character");
+           else
+           {
+               RESIZEPRSBUF;
+               *(state->curpos) = *(state->prsbuf);
+               state->curpos++;
+               state->state = oldstate;
+           }
+       }
+       else if (state->state == WAITENDWORD)
+       {
+           if (*(state->prsbuf) == '\\')
+           {
+               state->state = WAITNEXTCHAR;
+               oldstate = WAITENDWORD;
+           }
+           else if (*(state->prsbuf) == ' ' || *(state->prsbuf) == '\0' ||
+                    (state->oprisdelim && ISOPERATOR(*(state->prsbuf))))
+           {
+               RESIZEPRSBUF;
+               if (state->curpos == state->word)
+                   elog(ERROR, "Syntax error");
+               *(state->curpos) = '\0';
+               return 1; 
+           } else if ( *(state->prsbuf) == ':' ) {
+               if (state->curpos == state->word)
+                   elog(ERROR, "Syntax error");
+               *(state->curpos) = '\0';
+               if ( state->oprisdelim )
+                   return 1;
+               else
+                   state->state = INPOSINFO;
+           }
+           else
+           {
+               RESIZEPRSBUF;
+               *(state->curpos) = *(state->prsbuf);
+               state->curpos++;
+           }
+       }
+       else if (state->state == WAITENDCMPLX)
+       {
+           if (*(state->prsbuf) == '\'')
+           {
+               RESIZEPRSBUF;
+               *(state->curpos) = '\0';
+               if (state->curpos == state->word)
+                   elog(ERROR, "Syntax error");
+               if ( state->oprisdelim ) {
+                   state->prsbuf++;
+                   return 1;
+               } else
+                   state->state = WAITPOSINFO;
+           }
+           else if (*(state->prsbuf) == '\\')
+           {
+               state->state = WAITNEXTCHAR;
+               oldstate = WAITENDCMPLX;
+           }
+           else if (*(state->prsbuf) == '\0')
+               elog(ERROR, "Syntax error");
+           else
+           {
+               RESIZEPRSBUF;
+               *(state->curpos) = *(state->prsbuf);
+               state->curpos++;
+           }
+       } else if (state->state == WAITPOSINFO) {
+           if ( *(state->prsbuf) == ':' )
+               state->state=INPOSINFO;
+           else
+               return 1;
+       } else if (state->state == INPOSINFO) {
+           if ( isdigit(*(state->prsbuf)) ) {
+               if ( state->alen==0 ) {
+                   state->alen=4;
+                   state->pos = (WordEntryPos*)palloc( sizeof(WordEntryPos)*state->alen );
+                   *(uint16*)(state->pos)=0;
+               } else if ( *(uint16*)(state->pos) +1 >= state->alen ) {
+                   state->alen *= 2; 
+                   state->pos = (WordEntryPos*)repalloc( state->pos, sizeof(WordEntryPos)*state->alen );
+               }
+               (  *(uint16*)(state->pos) )++;
+               state->pos[ *(uint16*)(state->pos) ].pos = LIMITPOS(atoi(state->prsbuf));
+               if ( state->pos[ *(uint16*)(state->pos) ].pos == 0 )
+                   elog(ERROR,"Wrong position info");
+               state->pos[ *(uint16*)(state->pos) ].weight = 0;
+               state->state = WAITPOSDELIM;
+           } else
+               elog(ERROR,"Syntax error");
+       } else if (state->state == WAITPOSDELIM) {
+           if ( *(state->prsbuf) == ',' ) {
+               state->state = INPOSINFO;
+           } else if ( tolower(*(state->prsbuf)) == 'a' || *(state->prsbuf)=='*' ) {
+               if ( state->pos[ *(uint16*)(state->pos) ].weight )
+                   elog(ERROR,"Syntax error");
+               state->pos[ *(uint16*)(state->pos) ].weight = 3;
+           } else if ( tolower(*(state->prsbuf)) == 'b' ) {
+               if ( state->pos[ *(uint16*)(state->pos) ].weight )
+                   elog(ERROR,"Syntax error");
+               state->pos[ *(uint16*)(state->pos) ].weight = 2;
+           } else if ( tolower(*(state->prsbuf)) == 'c' ) {
+               if ( state->pos[ *(uint16*)(state->pos) ].weight )
+                   elog(ERROR,"Syntax error");
+               state->pos[ *(uint16*)(state->pos) ].weight = 1;
+           } else if ( tolower(*(state->prsbuf)) == 'd' ) {
+               if ( state->pos[ *(uint16*)(state->pos) ].weight )
+                   elog(ERROR,"Syntax error");
+               state->pos[ *(uint16*)(state->pos) ].weight = 0;
+           } else if ( isspace(*(state->prsbuf)) || *(state->prsbuf) == '\0' ) {
+               return 1;
+           } else if ( !isdigit(*(state->prsbuf)) )
+               elog(ERROR,"Syntax error");
+       } else
+           elog(ERROR, "Inner bug :(");
+       state->prsbuf++;
+   }
+
+   return 0;
+}
+
+Datum
+tsvector_in(PG_FUNCTION_ARGS)
+{
+   char       *buf = PG_GETARG_CSTRING(0);
+   TI_IN_STATE state;
+   WordEntryIN  *arr;
+   WordEntry  *inarr;
+   int4        len = 0,
+               totallen = 64;
+   tsvector       *in;
+   char       *tmpbuf,
+              *cur;
+   int4        i,
+               buflen = 256;
+
+   state.prsbuf = buf;
+   state.len = 32;
+   state.word = (char *) palloc(state.len);
+   state.oprisdelim = false;
+
+   arr = (WordEntryIN *) palloc(sizeof(WordEntryIN) * totallen);
+   cur = tmpbuf = (char *) palloc(buflen);
+   while (gettoken_tsvector(&state))
+   {
+       if (len >= totallen)
+       {
+           totallen *= 2;
+           arr = (WordEntryIN *) repalloc((void *) arr, sizeof(WordEntryIN) * totallen);
+       }
+       while ((cur - tmpbuf) + (state.curpos - state.word) >= buflen)
+       {
+           int4        dist = cur - tmpbuf;
+
+           buflen *= 2;
+           tmpbuf = (char *) repalloc((void *) tmpbuf, buflen);
+           cur = tmpbuf + dist;
+       }
+       if (state.curpos - state.word >= MAXSTRLEN)
+           elog(ERROR, "Word is too long");
+       arr[len].entry.len= state.curpos - state.word;
+       if (cur - tmpbuf > MAXSTRPOS)
+           elog(ERROR, "Too long value");
+       arr[len].entry.pos=cur - tmpbuf;
+       memcpy((void *) cur, (void *) state.word, arr[len].entry.len);
+       cur += arr[len].entry.len;
+       if ( state.alen ) {
+           arr[len].entry.haspos=1;
+           arr[len].pos = state.pos;
+       } else
+           arr[len].entry.haspos=0;
+       len++;
+   }
+   pfree(state.word);
+
+   if ( len > 0 )
+       len = uniqueentry(arr, len, tmpbuf, &buflen);
+   totallen = CALCDATASIZE(len, buflen);
+   in = (tsvector *) palloc(totallen);
+   memset(in,0,totallen);
+   in->len = totallen;
+   in->size = len;
+   cur = STRPTR(in);
+   inarr = ARRPTR(in);
+   for (i = 0; i < len; i++)
+   {
+       memcpy((void *) cur, (void *) &tmpbuf[arr[i].entry.pos], arr[i].entry.len);
+       arr[i].entry.pos=cur - STRPTR(in);
+       cur += SHORTALIGN(arr[i].entry.len);
+       if ( arr[i].entry.haspos ) {
+           memcpy( cur, arr[i].pos, (*(uint16*)arr[i].pos + 1) * sizeof(WordEntryPos));
+           cur +=  (*(uint16*)arr[i].pos + 1) * sizeof(WordEntryPos);
+           pfree( arr[i].pos ); 
+       }
+       memcpy( &(inarr[i]), &(arr[i].entry), sizeof(WordEntry) );
+   }
+   pfree(tmpbuf);
+   pfree(arr);
+   PG_RETURN_POINTER(in);
+}
+
+Datum
+tsvector_length(PG_FUNCTION_ARGS)
+{
+   tsvector       *in = (tsvector *) PG_DETOAST_DATUM(PG_GETARG_DATUM(0));
+   int4        ret = in->size;
+
+   PG_FREE_IF_COPY(in, 0);
+   PG_RETURN_INT32(ret);
+}
+
+Datum
+tsvector_out(PG_FUNCTION_ARGS)
+{
+   tsvector       *out = (tsvector *) PG_DETOAST_DATUM(PG_GETARG_DATUM(0));
+   char       *outbuf;
+   int4        i,
+               j,
+               lenbuf = 0, pp;
+   WordEntry  *ptr = ARRPTR(out);
+   char       *curin,
+              *curout;
+
+       lenbuf=out->size * 2 /* '' */ + out->size - 1 /* space */ + 2 /*\0*/;
+       for (i = 0; i < out->size; i++) {
+               lenbuf += ptr[i].len*2 /*for escape */;
+               if ( ptr[i].haspos )
+                       lenbuf += 7*POSDATALEN(out, &(ptr[i]));
+       }
+
+   curout = outbuf = (char *) palloc(lenbuf);
+   for (i = 0; i < out->size; i++)
+   {
+       curin = STRPTR(out)+ptr->pos;
+       if (i != 0)
+           *curout++ = ' ';
+       *curout++ = '\'';
+       j = ptr->len;
+       while (j--)
+       {
+           if (*curin == '\'')
+           {
+               int4        pos = curout - outbuf;
+
+               outbuf = (char *) repalloc((void *) outbuf, ++lenbuf);
+               curout = outbuf + pos;
+               *curout++ = '\\';
+           }
+           *curout++ = *curin++;
+       }
+       *curout++ = '\'';
+       if ( (pp=POSDATALEN(out,ptr)) != 0 ) {
+           WordEntryPos *wptr;
+           *curout++ = ':';
+           wptr=POSDATAPTR(out,ptr);
+           while(pp) {
+               sprintf(curout,"%d",wptr->pos);
+               curout=strchr(curout,'\0');
+               switch( wptr->weight ) {
+                   case 3:   *curout++ = 'A'; break;
+                   case 2:   *curout++ = 'B'; break;
+                   case 1:   *curout++ = 'C'; break;
+                   case 0: 
+                   default: break;
+               }
+               if ( pp>1 )     *curout++ = ',';
+               pp--; wptr++;
+           }
+       }
+       ptr++;
+   }
+   *curout='\0';
+   outbuf[lenbuf - 1] = '\0';
+   PG_FREE_IF_COPY(out, 0);
+   PG_RETURN_POINTER(outbuf);
+}
+
+static int
+compareWORD(const void *a, const void *b)
+{
+   if (((WORD *) a)->len == ((WORD *) b)->len) {
+       int res = strncmp(
+                      ((WORD *) a)->word,
+                      ((WORD *) b)->word,
+                      ((WORD *) b)->len);
+       if ( res==0 ) 
+           return ( ((WORD *) a)->pos.pos > ((WORD *) b)->pos.pos ) ? 1 : -1;
+       return res;
+   }
+   return (((WORD *) a)->len > ((WORD *) b)->len) ? 1 : -1;
+}
+
+static int
+uniqueWORD(WORD * a, int4 l)
+{
+   WORD       *ptr,
+              *res;
+   int tmppos;
+
+   if (l == 1) {
+       tmppos=LIMITPOS(a->pos.pos);
+       a->alen=2;
+       a->pos.apos=(uint16*)palloc( sizeof(uint16)*a->alen );
+       a->pos.apos[0]=1;
+       a->pos.apos[1]=tmppos;
+       return l;
+   }
+
+   res = a;
+   ptr = a + 1;
+
+   qsort((void *) a, l, sizeof(WORD), compareWORD);
+   tmppos=LIMITPOS(a->pos.pos);
+   a->alen=2;
+   a->pos.apos=(uint16*)palloc( sizeof(uint16)*a->alen );
+   a->pos.apos[0]=1;
+   a->pos.apos[1]=tmppos;
+
+   while (ptr - a < l)
+   {
+       if (!(ptr->len == res->len &&
+             strncmp(ptr->word, res->word, res->len) == 0))
+       {
+           res++;
+           res->len = ptr->len;
+           res->word = ptr->word;
+           tmppos=LIMITPOS(ptr->pos.pos);
+           res->alen=2;
+           res->pos.apos=(uint16*)palloc( sizeof(uint16)*res->alen );
+           res->pos.apos[0]=1;
+           res->pos.apos[1]=tmppos;
+       } else {
+           pfree(ptr->word);
+           if ( res->pos.apos[0] < MAXNUMPOS-1 && res->pos.apos[ res->pos.apos[0] ] != MAXENTRYPOS-1 ) {
+               if ( res->pos.apos[0]+1 >= res->alen ) {
+                   res->alen*=2;
+                   res->pos.apos=(uint16*)repalloc( res->pos.apos, sizeof(uint16)*res->alen );
+               }
+               res->pos.apos[ res->pos.apos[0]+1 ] = LIMITPOS(ptr->pos.pos);
+               res->pos.apos[0]++; 
+           }
+       }
+       ptr++;
+   }
+
+   return res + 1 - a;
+}
+
+/*
+ * make value of tsvector
+ */
+static tsvector *
+makevalue(PRSTEXT * prs)
+{
+   int4        i,j,
+               lenstr = 0,
+               totallen;
+   tsvector       *in;
+   WordEntry  *ptr;
+   char       *str,
+              *cur;
+
+   prs->curwords = uniqueWORD(prs->words, prs->curwords);
+   for (i = 0; i < prs->curwords; i++) {
+       lenstr += SHORTALIGN(prs->words[i].len);
+
+       if ( prs->words[i].alen )
+           lenstr += sizeof(uint16) + prs->words[i].pos.apos[0] * sizeof(WordEntryPos);
+   }
+
+   totallen = CALCDATASIZE(prs->curwords, lenstr);
+   in = (tsvector *) palloc(totallen);
+   memset(in,0,totallen);  
+   in->len = totallen;
+   in->size = prs->curwords;
+
+   ptr = ARRPTR(in);
+   cur = str = STRPTR(in);
+   for (i = 0; i < prs->curwords; i++)
+   {
+       ptr->len = prs->words[i].len;
+       if (cur - str > MAXSTRPOS)
+           elog(ERROR, "Value is too big");
+       ptr->pos= cur - str;
+       memcpy((void *) cur, (void *) prs->words[i].word, prs->words[i].len);
+       pfree(prs->words[i].word);
+       cur += SHORTALIGN(prs->words[i].len);
+       if ( prs->words[i].alen ) {
+           WordEntryPos *wptr;
+           
+           ptr->haspos=1;
+           *(uint16*)cur = prs->words[i].pos.apos[0];
+           wptr=POSDATAPTR(in,ptr);
+           for(j=0;j<*(uint16*)cur;j++) {
+               wptr[j].weight=0;
+               wptr[j].pos=prs->words[i].pos.apos[j+1];
+           }
+           cur += sizeof(uint16) + prs->words[i].pos.apos[0] * sizeof(WordEntryPos);
+           pfree(prs->words[i].pos.apos);
+       } else
+           ptr->haspos=0;
+       ptr++;
+   }
+   pfree(prs->words);
+   return in;
+}
+
+
+Datum
+to_tsvector(PG_FUNCTION_ARGS)
+{
+   text       *in = PG_GETARG_TEXT_P(1);
+   PRSTEXT     prs;
+   tsvector       *out = NULL;
+   TSCfgInfo *cfg=findcfg(PG_GETARG_INT32(0)); 
+
+   prs.lenwords = 32;
+   prs.curwords = 0;
+   prs.pos = 0;
+   prs.words = (WORD *) palloc(sizeof(WORD) * prs.lenwords);
+   
+   parsetext_v2(cfg, &prs, VARDATA(in), VARSIZE(in) - VARHDRSZ);
+   PG_FREE_IF_COPY(in, 1);
+
+   if (prs.curwords)
+       out = makevalue(&prs);
+   else {
+       pfree(prs.words);
+       out = palloc(CALCDATASIZE(0,0));
+       out->len = CALCDATASIZE(0,0);
+       out->size = 0;
+   } 
+   PG_RETURN_POINTER(out);
+}
+
+Datum
+to_tsvector_name(PG_FUNCTION_ARGS) {
+   text       *cfg=PG_GETARG_TEXT_P(0);
+   Datum res = DirectFunctionCall3(
+       to_tsvector,
+       Int32GetDatum( name2id_cfg( cfg ) ),
+       PG_GETARG_DATUM(1),
+       (Datum)0
+   );
+   PG_FREE_IF_COPY(cfg,0);
+   PG_RETURN_DATUM(res);   
+}
+
+Datum
+to_tsvector_current(PG_FUNCTION_ARGS) {
+   Datum res = DirectFunctionCall3(
+       to_tsvector,
+       Int32GetDatum( get_currcfg() ),
+       PG_GETARG_DATUM(0),
+       (Datum)0
+   );
+   PG_RETURN_DATUM(res);   
+}
+
+static Oid
+findFunc(char *fname) {
+   FuncCandidateList clist,ptr;
+   Oid funcid = InvalidOid;
+   List *names=makeList1(makeString(fname));
+
+   ptr = clist = FuncnameGetCandidates(names, 1);
+   freeList(names);
+
+   if ( !ptr )
+       return funcid;
+
+   while(ptr) {
+       if ( ptr->args[0] == TEXTOID && funcid == InvalidOid )
+           funcid=ptr->oid;
+       clist=ptr->next;
+       pfree(ptr);
+       ptr=clist;
+   }
+
+   return funcid;
+}
+
+/*
+ * Trigger
+ */
+Datum
+tsearch2(PG_FUNCTION_ARGS)
+{
+   TriggerData *trigdata;
+   Trigger    *trigger;
+   Relation    rel;
+   HeapTuple   rettuple = NULL;
+   TSCfgInfo *cfg=findcfg(get_currcfg()); 
+   int         numidxattr,
+               i;
+   PRSTEXT     prs;
+   Datum       datum = (Datum) 0;
+   Oid     funcoid = InvalidOid;
+
+   if (!CALLED_AS_TRIGGER(fcinfo))
+       elog(ERROR, "TSearch: Not fired by trigger manager");
+
+   trigdata = (TriggerData *) fcinfo->context;
+   if (TRIGGER_FIRED_FOR_STATEMENT(trigdata->tg_event))
+       elog(ERROR, "TSearch: Can't process STATEMENT events");
+   if (TRIGGER_FIRED_AFTER(trigdata->tg_event))
+       elog(ERROR, "TSearch: Must be fired BEFORE event");
+
+   if (TRIGGER_FIRED_BY_INSERT(trigdata->tg_event))
+       rettuple = trigdata->tg_trigtuple;
+   else if (TRIGGER_FIRED_BY_UPDATE(trigdata->tg_event))
+       rettuple = trigdata->tg_newtuple;
+   else
+       elog(ERROR, "TSearch: Unknown event");
+
+   trigger = trigdata->tg_trigger;
+   rel = trigdata->tg_relation;
+
+   if (trigger->tgnargs < 2)
+       elog(ERROR, "TSearch: format tsearch2(tsvector_field, text_field1,...)");
+
+   numidxattr = SPI_fnumber(rel->rd_att, trigger->tgargs[0]);
+   if (numidxattr == SPI_ERROR_NOATTRIBUTE)
+       elog(ERROR, "TSearch: Can not find tsvector_field");
+
+   prs.lenwords = 32;
+   prs.curwords = 0;
+   prs.pos = 0;
+   prs.words = (WORD *) palloc(sizeof(WORD) * prs.lenwords);
+
+   /* find all words in indexable column */
+   for (i = 1; i < trigger->tgnargs; i++)
+   {
+       int         numattr;
+       Oid         oidtype;
+       Datum       txt_toasted;
+       bool        isnull;
+       text       *txt;
+
+       numattr = SPI_fnumber(rel->rd_att, trigger->tgargs[i]);
+       if (numattr == SPI_ERROR_NOATTRIBUTE)
+       {
+           funcoid=findFunc(trigger->tgargs[i]);
+           if ( funcoid==InvalidOid )
+               elog(ERROR,"TSearch: can't find function or field '%s'",trigger->tgargs[i]);
+           continue;
+       }
+       oidtype = SPI_gettypeid(rel->rd_att, numattr);
+       /* We assume char() and varchar() are binary-equivalent to text */
+       if (!(oidtype == TEXTOID ||
+             oidtype == VARCHAROID ||
+             oidtype == BPCHAROID))
+       {
+           elog(WARNING, "TSearch: '%s' is not of character type",
+                trigger->tgargs[i]);
+           continue;
+       }
+       txt_toasted = SPI_getbinval(rettuple, rel->rd_att, numattr, &isnull);
+       if (isnull)
+           continue;
+
+       if ( funcoid!=InvalidOid ) {
+           text *txttmp = (text *) DatumGetPointer( OidFunctionCall1(
+               funcoid,
+               PointerGetDatum(txt_toasted)
+           ));
+           txt = (text *) DatumGetPointer(PG_DETOAST_DATUM(PointerGetDatum(txttmp)));
+           if ( txt == txttmp )
+               txt_toasted = PointerGetDatum(txt);
+       } else
+            txt = (text *) DatumGetPointer(PG_DETOAST_DATUM(PointerGetDatum(txt_toasted)));
+
+       parsetext_v2(cfg, &prs, VARDATA(txt), VARSIZE(txt) - VARHDRSZ);
+       if (txt != (text*)DatumGetPointer(txt_toasted) )
+           pfree(txt);
+   }
+
+   /* make tsvector value */
+   if (prs.curwords)
+   {
+       datum = PointerGetDatum(makevalue(&prs));
+       rettuple = SPI_modifytuple(rel, rettuple, 1, &numidxattr,
+                                  &datum, NULL);
+       pfree(DatumGetPointer(datum));
+   }
+   else
+   {
+       tsvector *out = palloc(CALCDATASIZE(0,0));
+       out->len = CALCDATASIZE(0,0);
+       out->size = 0;
+       datum = PointerGetDatum(out);
+       pfree(prs.words);
+       rettuple = SPI_modifytuple(rel, rettuple, 1, &numidxattr,
+                                  &datum, NULL);
+   }
+
+   if (rettuple == NULL)
+       elog(ERROR, "TSearch: %d returned by SPI_modifytuple", SPI_result);
+
+   return PointerGetDatum(rettuple);
+}


diff --git a/contrib/tsearch2/tsvector.h b/contrib/tsearch2/tsvector.h

new file mode 100644 (file)

index 0000000..31e6a4b


--- /dev/null
+++ b/contrib/tsearch2/tsvector.h
@@ -0,0 +1,71 @@
+#ifndef __TXTIDX_H__
+#define __TXTIDX_H__
+
+/*
+#define TXTIDX_DEBUG
+*/
+
+#include "postgres.h"
+
+#include "access/gist.h"
+#include "access/itup.h"
+#include "utils/elog.h"
+#include "utils/palloc.h"
+#include "utils/builtins.h"
+#include "storage/bufpage.h"
+
+typedef struct {
+   uint32
+       haspos:1,
+       len:11, /* MAX 2Kb */
+       pos:20; /* MAX 1Mb */
+}  WordEntry;
+#define MAXSTRLEN ( 1<<11 )
+#define MAXSTRPOS ( 1<<20 )
+
+typedef struct {
+   uint16
+       weight:2,
+       pos:14;
+} WordEntryPos;
+#define MAXENTRYPOS    (1<<14)
+#define MAXNUMPOS  256
+#define LIMITPOS(x)    ( ( (x) >= MAXENTRYPOS ) ? (MAXENTRYPOS-1) : (x) )
+
+typedef struct
+{
+   int4        len;
+   int4        size;
+   char        data[1];
+}  tsvector;
+
+#define DATAHDRSIZE (sizeof(int4)*2)
+#define CALCDATASIZE(x, lenstr) ( x * sizeof(WordEntry) + DATAHDRSIZE + lenstr )
+#define ARRPTR(x)  ( (WordEntry*) ( (char*)x + DATAHDRSIZE ) )
+#define STRPTR(x)  ( (char*)x + DATAHDRSIZE + ( sizeof(WordEntry) * ((tsvector*)x)->size ) )
+#define STRSIZE(x) ( ((tsvector*)x)->len - DATAHDRSIZE - ( sizeof(WordEntry) * ((tsvector*)x)->size ) )
+#define _POSDATAPTR(x,e)   (STRPTR(x)+((WordEntry*)(e))->pos+SHORTALIGN(((WordEntry*)(e))->len))
+#define POSDATALEN(x,e) ( ( ((WordEntry*)(e))->haspos ) ? (*(uint16*)_POSDATAPTR(x,e)) : 0 ) 
+#define POSDATAPTR(x,e)    ( (WordEntryPos*)( _POSDATAPTR(x,e)+sizeof(uint16) ) )
+
+
+typedef struct {
+   WordEntry   entry;
+   WordEntryPos    *pos;
+}  WordEntryIN;
+
+typedef struct
+{
+   char       *prsbuf;
+   char       *word;
+   char       *curpos;
+   int4        len;
+   int4        state;
+   int4        alen;
+   WordEntryPos    *pos;
+   bool        oprisdelim;
+}  TI_IN_STATE;
+
+int4       gettoken_tsvector(TI_IN_STATE * state);
+
+#endif


diff --git a/contrib/tsearch2/tsvector_op.c b/contrib/tsearch2/tsvector_op.c

new file mode 100644 (file)

index 0000000..3f38014


--- /dev/null
+++ b/contrib/tsearch2/tsvector_op.c
@@ -0,0 +1,264 @@
+/*
+ * Operations for tsvector type
+ * Teodor Sigaev 
+ */
+#include "postgres.h"
+
+#include "access/gist.h"
+#include "access/itup.h"
+#include "utils/elog.h"
+#include "utils/palloc.h"
+#include "utils/builtins.h"
+#include "storage/bufpage.h"
+#include "executor/spi.h"
+#include "commands/trigger.h"
+#include "nodes/pg_list.h"
+#include "catalog/namespace.h"
+
+#include "utils/pg_locale.h"
+
+#include              /* tolower */
+#include "tsvector.h"
+#include "query.h"
+#include "ts_cfg.h"
+#include "common.h"
+
+PG_FUNCTION_INFO_V1(strip);
+Datum      strip(PG_FUNCTION_ARGS);
+
+PG_FUNCTION_INFO_V1(setweight);
+Datum      setweight(PG_FUNCTION_ARGS);
+
+PG_FUNCTION_INFO_V1(concat);
+Datum      concat(PG_FUNCTION_ARGS);
+
+Datum
+strip(PG_FUNCTION_ARGS)
+{
+   tsvector       *in = (tsvector *) PG_DETOAST_DATUM(PG_GETARG_DATUM(0));
+   tsvector    *out;
+   int i,len=0;
+   WordEntry *arrin=ARRPTR(in), *arrout;
+   char *cur;
+
+   for(i=0;isize;i++) 
+       len += SHORTALIGN( arrin[i].len );
+
+   len = CALCDATASIZE(in->size, len);
+   out=(tsvector*)palloc(len);
+   memset(out,0,len);
+   out->len=len;
+   out->size=in->size;
+   arrout=ARRPTR(out);
+   cur=STRPTR(out);
+   for(i=0;isize;i++) {
+       memcpy(cur, STRPTR(in)+arrin[i].pos, arrin[i].len);
+       arrout[i].haspos = 0;
+       arrout[i].len = arrin[i].len;
+       arrout[i].pos = cur - STRPTR(out);
+       cur += SHORTALIGN( arrout[i].len );
+   }
+       
+   PG_FREE_IF_COPY(in, 0);
+   PG_RETURN_POINTER(out);
+}
+
+Datum
+setweight(PG_FUNCTION_ARGS)
+{
+   tsvector       *in = (tsvector *) PG_DETOAST_DATUM(PG_GETARG_DATUM(0));
+   char       cw = PG_GETARG_CHAR(1);
+   tsvector    *out;
+   int i,j;
+   WordEntry *entry;
+   WordEntryPos *p;
+   int w=0;
+
+   switch(tolower(cw)) {
+       case 'a': w=3; break;
+       case 'b': w=2; break;
+       case 'c': w=1; break;
+       case 'd': w=0; break;
+       default: elog(ERROR,"Unknown weight");
+   }
+
+   out=(tsvector*)palloc(in->len);
+   memcpy(out,in,in->len);
+   entry=ARRPTR(out);
+   i=out->size;    
+   while(i--) {
+       if ( (j=POSDATALEN(out,entry)) != 0 ) {
+           p=POSDATAPTR(out,entry);
+           while(j--) {
+               p->weight=w;
+               p++;
+           }
+       }
+       entry++;
+   }
+       
+   PG_FREE_IF_COPY(in, 0);
+   PG_RETURN_POINTER(out);
+}
+
+static int
+compareEntry(char *ptra, WordEntry* a, char *ptrb, WordEntry* b)
+{
+        if ( a->len == b->len)
+        {
+                return strncmp(
+                                           ptra + a->pos,
+                                           ptrb + b->pos,
+                                           a->len);
+        }
+        return ( a->len > b->len ) ? 1 : -1;
+}
+
+static int4
+add_pos(tsvector *src, WordEntry *srcptr, tsvector *dest, WordEntry *destptr, int4 maxpos ) {
+   uint16 *clen = (uint16*)_POSDATAPTR(dest,destptr);
+   int i;
+   uint16 slen = POSDATALEN(src, srcptr), startlen;
+   WordEntryPos *spos=POSDATAPTR(src, srcptr), *dpos=POSDATAPTR(dest,destptr);
+
+   if ( ! destptr->haspos ) 
+       *clen=0;
+
+   startlen = *clen;
+   for(i=0; i
+       dpos[ *clen ].weight = spos[i].weight; 
+       dpos[ *clen ].pos    = LIMITPOS(spos[i].pos + maxpos);
+       (*clen)++;
+   }
+
+   if ( *clen != startlen )
+       destptr->haspos=1; 
+   return  *clen - startlen;
+}
+
+
+Datum
+concat(PG_FUNCTION_ARGS) {
+   tsvector       *in1 = (tsvector *) PG_DETOAST_DATUM(PG_GETARG_DATUM(0));
+   tsvector       *in2 = (tsvector *) PG_DETOAST_DATUM(PG_GETARG_DATUM(1));
+   tsvector       *out;
+   WordEntry *ptr;
+   WordEntry *ptr1,*ptr2;
+   WordEntryPos *p;
+   int maxpos=0,i,j,i1,i2;
+   char *cur;
+   char *data,*data1,*data2;
+
+   ptr=ARRPTR(in1);
+   i=in1->size;
+   while(i--) {
+       if ( (j=POSDATALEN(in1,ptr)) != 0 ) {
+           p=POSDATAPTR(in1,ptr);
+           while(j--) {
+               if ( p->pos > maxpos ) 
+                   maxpos = p->pos;
+               p++;
+           }
+       }
+       ptr++;
+   }
+   
+   ptr1=ARRPTR(in1); ptr2=ARRPTR(in2);
+   data1=STRPTR(in1); data2=STRPTR(in2);
+   i1=in1->size;   i2=in2->size;
+   out=(tsvector*)palloc( in1->len + in2->len );
+   memset(out,0,in1->len + in2->len);
+   out->len = in1->len + in2->len;
+   out->size = in1->size + in2->size;
+   data=cur=STRPTR(out);
+   ptr=ARRPTR(out);
+   while( i1 && i2 ) {
+       int cmp=compareEntry(data1,ptr1,data2,ptr2);
+       if ( cmp < 0 ) { /* in1 first */
+           ptr->haspos = ptr1->haspos;
+           ptr->len = ptr1->len;
+           memcpy( cur, data1 + ptr1->pos, ptr1->len );
+           ptr->pos = cur - data; 
+           cur+=SHORTALIGN(ptr1->len);
+           if ( ptr->haspos ) {
+               memcpy(cur, _POSDATAPTR(in1, ptr1), POSDATALEN(in1, ptr1)*sizeof(WordEntryPos) + sizeof(uint16));
+               cur+=POSDATALEN(in1, ptr1)*sizeof(WordEntryPos) + sizeof(uint16);
+           }
+           ptr++; ptr1++; i1--;
+       } else if ( cmp>0 ) { /* in2 first */ 
+           ptr->haspos = ptr2->haspos;
+           ptr->len = ptr2->len;
+           memcpy( cur, data2 + ptr2->pos, ptr2->len );
+           ptr->pos = cur - data; 
+           cur+=SHORTALIGN(ptr2->len);
+           if ( ptr->haspos ) {
+               int addlen = add_pos(in2, ptr2, out, ptr, maxpos );
+               if ( addlen == 0 )
+                   ptr->haspos=0;
+               else
+                   cur += addlen*sizeof(WordEntryPos) + sizeof(uint16); 
+           }
+           ptr++; ptr2++; i2--;
+       } else {
+           ptr->haspos = ptr1->haspos | ptr2->haspos;
+           ptr->len = ptr1->len;
+           memcpy( cur, data1 + ptr1->pos, ptr1->len );
+           ptr->pos = cur - data; 
+           cur+=SHORTALIGN(ptr1->len);
+           if ( ptr->haspos ) {
+               if ( ptr1->haspos ) {
+                   memcpy(cur, _POSDATAPTR(in1, ptr1), POSDATALEN(in1, ptr1)*sizeof(WordEntryPos) + sizeof(uint16));
+                   cur+=POSDATALEN(in1, ptr1)*sizeof(WordEntryPos) + sizeof(uint16);
+                   if ( ptr2->haspos )
+                       cur += add_pos(in2, ptr2, out, ptr, maxpos )*sizeof(WordEntryPos);
+               } else if ( ptr2->haspos ) {
+                   int addlen = add_pos(in2, ptr2, out, ptr, maxpos );
+                   if ( addlen == 0 )
+                       ptr->haspos=0;
+                   else
+                       cur += addlen*sizeof(WordEntryPos) + sizeof(uint16); 
+               }
+           }
+           ptr++; ptr1++; ptr2++; i1--; i2--;
+       }
+   }
+
+   while(i1) {
+       ptr->haspos = ptr1->haspos;
+       ptr->len = ptr1->len;
+       memcpy( cur, data1 + ptr1->pos, ptr1->len );
+       ptr->pos = cur - data; 
+       cur+=SHORTALIGN(ptr1->len);
+       if ( ptr->haspos ) {
+           memcpy(cur, _POSDATAPTR(in1, ptr1), POSDATALEN(in1, ptr1)*sizeof(WordEntryPos) + sizeof(uint16));
+           cur+=POSDATALEN(in1, ptr1)*sizeof(WordEntryPos) + sizeof(uint16);
+       }
+       ptr++; ptr1++; i1--;
+   }
+
+   while(i2) {
+       ptr->haspos = ptr2->haspos;
+       ptr->len = ptr2->len;
+       memcpy( cur, data2 + ptr2->pos, ptr2->len );
+       ptr->pos = cur - data; 
+       cur+=SHORTALIGN(ptr2->len);
+       if ( ptr->haspos ) {
+           int addlen = add_pos(in2, ptr2, out, ptr, maxpos );
+           if ( addlen == 0 )
+               ptr->haspos=0;
+           else
+               cur += addlen*sizeof(WordEntryPos) + sizeof(uint16); 
+       }
+       ptr++; ptr2++; i2--;
+   }
+   
+   out->size=ptr-ARRPTR(out);
+   out->len = CALCDATASIZE( out->size, cur-data );
+   if ( data != STRPTR(out) )
+       memmove( STRPTR(out), data, cur-data );
+
+   PG_FREE_IF_COPY(in1, 0);
+   PG_FREE_IF_COPY(in2, 1);
+   PG_RETURN_POINTER(out);
+}
+


diff --git a/contrib/tsearch2/untsearch.sql.in b/contrib/tsearch2/untsearch.sql.in

new file mode 100644 (file)

index 0000000..a4fe145


--- /dev/null
+++ b/contrib/tsearch2/untsearch.sql.in
@@ -0,0 +1,62 @@
+BEGIN;
+
+--Be careful !!!
+--script drops all indices, triggers and columns with types defined
+--in tsearch2.sql
+
+
+DROP OPERATOR CLASS gist_tsvector_ops USING gist CASCADE;
+
+
+DROP OPERATOR || (tsvector, tsvector);
+DROP OPERATOR @@ (tsvector, tsquery);
+DROP OPERATOR @@ (tsquery, tsvector);
+
+DROP AGGREGATE stat(tsvector);
+
+DROP TABLE pg_ts_dict;
+DROP TABLE pg_ts_parser;
+DROP TABLE pg_ts_cfg;
+DROP TABLE pg_ts_cfgmap;
+
+DROP TYPE tokentype CASCADE;
+DROP TYPE tokenout CASCADE;
+DROP TYPE tsvector CASCADE;
+DROP TYPE tsquery CASCADE;
+DROP TYPE gtsvector CASCADE;
+DROP TYPE tsstat CASCADE;
+DROP TYPE statinfo CASCADE;
+
+DROP FUNCTION lexize(oid, text) ;
+DROP FUNCTION lexize(text, text);
+DROP FUNCTION lexize(text);
+DROP FUNCTION set_curdict(int);
+DROP FUNCTION set_curdict(text);
+DROP FUNCTION dex_init(text);
+DROP FUNCTION dex_lexize(internal,internal,int4);
+DROP FUNCTION snb_en_init(text);
+DROP FUNCTION snb_lexize(internal,internal,int4);
+DROP FUNCTION snb_ru_init(text);
+DROP FUNCTION spell_init(text);
+DROP FUNCTION spell_lexize(internal,internal,int4);
+DROP FUNCTION syn_init(text);
+DROP FUNCTION syn_lexize(internal,internal,int4);
+DROP FUNCTION set_curprs(int);
+DROP FUNCTION set_curprs(text);
+DROP FUNCTION prsd_start(internal,int4);
+DROP FUNCTION prsd_getlexeme(internal,internal,internal);
+DROP FUNCTION prsd_end(internal);
+DROP FUNCTION prsd_lextype(internal);
+DROP FUNCTION prsd_headline(internal,internal,internal);
+DROP FUNCTION set_curcfg(int);
+DROP FUNCTION set_curcfg(text);
+DROP FUNCTION show_curcfg();
+DROP FUNCTION gtsvector_compress(internal);
+DROP FUNCTION gtsvector_decompress(internal);
+DROP FUNCTION gtsvector_penalty(internal,internal,internal);
+DROP FUNCTION gtsvector_picksplit(internal, internal);
+DROP FUNCTION gtsvector_union(bytea, internal);
+DROP FUNCTION reset_tsearch();
+DROP FUNCTION tsearch2() CASCADE;
+
+END;


diff --git a/contrib/tsearch2/wordparser/deflex.c b/contrib/tsearch2/wordparser/deflex.c

new file mode 100644 (file)

index 0000000..ea596c5


--- /dev/null
+++ b/contrib/tsearch2/wordparser/deflex.c
@@ -0,0 +1,56 @@
+#include "deflex.h"
+
+const char *lex_descr[]={
+   "",
+   "Latin word",
+   "Non-latin word",
+   "Word",
+   "Email",
+   "URL",
+   "Host",
+   "Scientific notation",
+   "VERSION",
+   "Part of hyphenated word",
+   "Non-latin part of hyphenated word",
+   "Latin part of hyphenated word",
+   "Space symbols",
+   "HTML Tag",
+   "HTTP head",
+   "Hyphenated word",
+   "Latin hyphenated word",
+   "Non-latin hyphenated word",
+   "URI",
+   "File or path name",
+   "Decimal notation",
+   "Signed integer",
+   "Unsigned integer",
+   "HTML Entity"
+};
+
+const char *tok_alias[]={
+   "",
+   "lword",
+   "nlword",
+   "word",
+   "email",
+   "url",
+   "host",
+   "sfloat",
+   "version",
+   "part_hword",
+   "nlpart_hword",
+   "lpart_hword",
+   "blank",
+   "tag",
+   "http",
+   "hword",
+   "lhword",
+   "nlhword",
+   "uri",
+   "file",
+   "float",
+   "int",
+   "uint",
+   "entity"
+};
+


diff --git a/contrib/tsearch2/wordparser/deflex.h b/contrib/tsearch2/wordparser/deflex.h

new file mode 100644 (file)

index 0000000..651d1f9


--- /dev/null
+++ b/contrib/tsearch2/wordparser/deflex.h
@@ -0,0 +1,34 @@
+#ifndef __DEFLEX_H__
+#define __DEFLEX_H__
+
+/* rememder !!!! */
+#define LASTNUM        23
+
+#define LATWORD        1
+#define CYRWORD        2
+#define UWORD      3
+#define EMAIL      4
+#define FURL       5
+#define HOST       6
+#define SCIENTIFIC 7
+#define VERSIONNUMBER  8
+#define PARTHYPHENWORD 9
+#define CYRPARTHYPHENWORD  10
+#define LATPARTHYPHENWORD  11
+#define SPACE      12
+#define TAG            13
+#define HTTP       14
+#define HYPHENWORD 15
+#define LATHYPHENWORD  16
+#define CYRHYPHENWORD  17
+#define URI        18
+#define FILEPATH   19
+#define DECIMAL        20
+#define SIGNEDINT  21
+#define UNSIGNEDINT 22
+#define HTMLENTITY 23
+
+extern const char *lex_descr[];
+extern const char *tok_alias[];
+
+#endif


diff --git a/contrib/tsearch2/wordparser/parser.h b/contrib/tsearch2/wordparser/parser.h

new file mode 100644 (file)

index 0000000..55cf005


--- /dev/null
+++ b/contrib/tsearch2/wordparser/parser.h
@@ -0,0 +1,11 @@
+#ifndef __PARSER_H__
+#define __PARSER_H__
+
+char      *token;
+int            tokenlen;
+int            tsearch2_yylex(void);
+void       start_parse_str(char *, int);
+void       start_parse_fh(FILE *, int);
+void       end_parse(void);
+
+#endif


diff --git a/contrib/tsearch2/wordparser/parser.l b/contrib/tsearch2/wordparser/parser.l

new file mode 100644 (file)

index 0000000..49824f5


--- /dev/null
+++ b/contrib/tsearch2/wordparser/parser.l
@@ -0,0 +1,346 @@
+%{
+#include "postgres.h"
+
+#include "deflex.h"
+#include "parser.h"
+#include "common.h"
+
+/* Avoid exit() on fatal scanner errors */
+#define fprintf(file, fmt, msg)  ts_error(ERROR, fmt, msg)
+
+/* postgres allocation function */
+#define free    pfree
+#define malloc  palloc
+#define realloc repalloc
+
+#ifdef strdup
+#undef strdup
+#endif
+#define strdup  pstrdup
+
+char *token = NULL;  /* pointer to token */
+char *s     = NULL;  /* to return WHOLE hyphenated-word */
+
+YY_BUFFER_STATE buf = NULL; /* buffer to parse; it need for parse from string */
+
+int lrlimit = -1;  /* for limiting read from filehandle ( -1 - unlimited read ) */
+int bytestoread = 0;   /* for limiting read from filehandle */
+
+/* redefine macro for read limited length */
+#define YY_INPUT(buf,result,max_size) \
+   if ( yy_current_buffer->yy_is_interactive ) { \
+                int c = '*', n; \
+                for ( n = 0; n < max_size && \
+                             (c = getc( tsearch2_yyin )) != EOF && c != '\n'; ++n ) \
+                        buf[n] = (char) c; \
+                if ( c == '\n' ) \
+                        buf[n++] = (char) c; \
+                if ( c == EOF && ferror( tsearch2_yyin ) ) \
+                        YY_FATAL_ERROR( "input in flex scanner failed" ); \
+                result = n; \
+        }  else { \
+       if ( lrlimit == 0 ) \
+           result=YY_NULL; \
+       else { \
+           if ( lrlimit>0 ) { \
+               bytestoread = ( lrlimit > max_size ) ? max_size : lrlimit; \
+               lrlimit -= bytestoread; \
+           } else \
+               bytestoread = max_size; \
+               if ( ((result = fread( buf, 1, bytestoread, tsearch2_yyin )) == 0) \
+                       && ferror( tsearch2_yyin ) ) \
+                       YY_FATAL_ERROR( "input in flex scanner failed" ); \
+       } \
+   }
+
+%}
+
+%option 8bit
+%option never-interactive
+%option nounput
+%option noyywrap
+
+/* parser's state for parsing hyphenated-word */
+%x DELIM  
+/* parser's state for parsing URL*/
+%x URL  
+%x SERVER  
+
+/* parser's state for parsing TAGS */
+%x INTAG
+%x QINTAG
+%x INCOMMENT
+%x INSCRIPT
+
+/* cyrillic koi8 char */
+CYRALNUM   [0-9\200-\377]
+CYRALPHA   [\200-\377]
+ALPHA      [a-zA-Z\200-\377]
+ALNUM      [0-9a-zA-Z\200-\377]
+
+
+HOSTNAME   ([-_[:alnum:]]+\.)+[[:alpha:]]+
+URI        [-_[:alnum:]/%,\.;=&?#]+
+
+%%
+
+"<"[Ss][Cc][Rr][Ii][Pp][Tt] { BEGIN INSCRIPT; }
+
+"" {
+   BEGIN INITIAL; 
+   *tsearch2_yytext=' '; *(tsearch2_yytext+1) = '\0'; 
+   token = tsearch2_yytext;
+   tokenlen = tsearch2_yyleng;
+   return SPACE;
+}
+
+""   { 
+   BEGIN INITIAL;
+   *tsearch2_yytext=' '; *(tsearch2_yytext+1) = '\0'; 
+   token = tsearch2_yytext;
+   tokenlen = tsearch2_yyleng;
+   return SPACE;
+}
+
+
+"<"[\![:alpha:]]   { BEGIN INTAG; }
+
+"
+
+"\""    { BEGIN QINTAG; }
+
+"\\\"" ;
+
+"\""   { BEGIN INTAG; }
+
+">" { 
+   BEGIN INITIAL;
+   token = tsearch2_yytext;
+   *tsearch2_yytext=' '; 
+   token = tsearch2_yytext;
+   tokenlen = 1;
+   return TAG;
+}
+
+.|\n  ;
+
+\&(quot|amp|nbsp|lt|gt)\;   {
+   token = tsearch2_yytext;
+   tokenlen = tsearch2_yyleng;
+   return HTMLENTITY;
+}
+
+\&\#[0-9][0-9]?[0-9]?\; {
+   token = tsearch2_yytext;
+   tokenlen = tsearch2_yyleng;
+   return HTMLENTITY;
+}
+ 
+[-_\.[:alnum:]]+@{HOSTNAME}  /* Emails */ { 
+   token = tsearch2_yytext; 
+   tokenlen = tsearch2_yyleng;
+   return EMAIL; 
+}
+
+[+-]?[0-9]+(\.[0-9]+)?[eEdD][+-]?[0-9]+  /* float */   { 
+   token = tsearch2_yytext; 
+   tokenlen = tsearch2_yyleng;
+   return SCIENTIFIC; 
+}
+
+[0-9]+\.[0-9]+\.[0-9\.]*[0-9] {
+   token = tsearch2_yytext;
+   tokenlen = tsearch2_yyleng;
+   return VERSIONNUMBER;
+}
+
+[+-]?[0-9]+\.[0-9]+ {
+   token = tsearch2_yytext;
+   tokenlen = tsearch2_yyleng;
+   return DECIMAL;
+}
+
+[+-][0-9]+ { 
+   token = tsearch2_yytext; 
+   tokenlen = tsearch2_yyleng;
+   return SIGNEDINT; 
+}
+
+[0-9]+ { 
+   token = tsearch2_yytext; 
+   tokenlen = tsearch2_yyleng;
+   return UNSIGNEDINT; 
+}
+
+http"://"        { 
+   BEGIN URL; 
+   token = tsearch2_yytext;
+   tokenlen = tsearch2_yyleng;
+   return HTTP;
+}
+
+ftp"://"        { 
+   BEGIN URL; 
+   token = tsearch2_yytext;
+   tokenlen = tsearch2_yyleng;
+   return HTTP;
+}
+
+{HOSTNAME}[/:]{URI} { 
+   BEGIN SERVER;
+   if (s) { free(s); s=NULL; } 
+   s = strdup( tsearch2_yytext ); 
+   tokenlen = tsearch2_yyleng;
+   yyless( 0 ); 
+   token = s;
+   return FURL;
+}
+
+{HOSTNAME} {
+   token = tsearch2_yytext; 
+   tokenlen = tsearch2_yyleng;
+   return HOST;
+}
+
+[/:]{URI}  {
+   token = tsearch2_yytext;
+   tokenlen = tsearch2_yyleng;
+   return URI;
+}
+
+[[:alnum:]\./_-]+"/"[[:alnum:]\./_-]+ {
+   token = tsearch2_yytext;
+   tokenlen = tsearch2_yyleng;
+   return FILEPATH;
+}
+
+({CYRALPHA}+-)+{CYRALPHA}+ /* composite-word */    {
+   BEGIN DELIM;
+   if (s) { free(s); s=NULL; } 
+   s = strdup( tsearch2_yytext );
+   tokenlen = tsearch2_yyleng;
+   yyless( 0 );
+   token = s;
+   return CYRHYPHENWORD;
+}
+
+([[:alpha:]]+-)+[[:alpha:]]+ /* composite-word */  {
+    BEGIN DELIM;
+   if (s) { free(s); s=NULL; } 
+   s = strdup( tsearch2_yytext );
+   tokenlen = tsearch2_yyleng;
+   yyless( 0 );
+   token = s;
+   return LATHYPHENWORD;
+}
+
+({ALNUM}+-)+{ALNUM}+ /* composite-word */  {
+   BEGIN DELIM;
+   if (s) { free(s); s=NULL; } 
+   s = strdup( tsearch2_yytext );
+   tokenlen = tsearch2_yyleng;
+   yyless( 0 );
+   token = s;
+   return HYPHENWORD;
+}
+
+[0-9]+\.[0-9]+\.[0-9\.]*[0-9] {
+   token = tsearch2_yytext;
+   tokenlen = tsearch2_yyleng;
+   return VERSIONNUMBER;
+}
+
+\+?[0-9]+\.[0-9]+ {
+   token = tsearch2_yytext;
+   tokenlen = tsearch2_yyleng;
+   return DECIMAL;
+}
+
+{CYRALPHA}+  /* one word in composite-word */   { 
+   token = tsearch2_yytext; 
+   tokenlen = tsearch2_yyleng;
+   return CYRPARTHYPHENWORD; 
+}
+
+[[:alpha:]]+  /* one word in composite-word */  { 
+   token = tsearch2_yytext; 
+   tokenlen = tsearch2_yyleng;
+   return LATPARTHYPHENWORD; 
+}
+
+{ALNUM}+  /* one word in composite-word */  { 
+   token = tsearch2_yytext; 
+   tokenlen = tsearch2_yyleng;
+   return PARTHYPHENWORD; 
+}
+
+-  { 
+   token = tsearch2_yytext;
+   tokenlen = tsearch2_yyleng;
+   return SPACE;
+}
+
+.|\n /* return in basic state */ {
+   BEGIN INITIAL;
+   yyless( 0 );
+}
+
+{CYRALPHA}+ /* normal word */  { 
+   token = tsearch2_yytext; 
+   tokenlen = tsearch2_yyleng;
+   return CYRWORD; 
+}
+
+[[:alpha:]]+ /* normal word */ { 
+   token = tsearch2_yytext; 
+   tokenlen = tsearch2_yyleng;
+   return LATWORD; 
+}
+
+{ALNUM}+ /* normal word */ { 
+   token = tsearch2_yytext; 
+   tokenlen = tsearch2_yyleng;
+   return UWORD; 
+}
+
+[ \r\n\t]+ {
+   token = tsearch2_yytext;
+   tokenlen = tsearch2_yyleng;
+   return SPACE;
+}
+
+. {
+   token = tsearch2_yytext;
+   tokenlen = tsearch2_yyleng;
+   return SPACE;
+} 
+
+%%
+
+/* clearing after parsing from string */
+void end_parse() {
+   if (s) { free(s); s=NULL; } 
+   tsearch2_yy_delete_buffer( buf );
+   buf = NULL;
+} 
+
+/* start parse from string */
+void start_parse_str(char* str, int limit) {
+   if (buf) end_parse();
+   buf = tsearch2_yy_scan_bytes( str, limit );
+   tsearch2_yy_switch_to_buffer( buf );
+   BEGIN INITIAL;
+}
+
+/* start parse from filehandle */
+void start_parse_fh( FILE* fh, int limit ) {
+   if (buf) end_parse();
+   lrlimit = ( limit ) ? limit : -1;
+   buf = tsearch2_yy_create_buffer( fh, YY_BUF_SIZE );
+   tsearch2_yy_switch_to_buffer( buf );
+   BEGIN INITIAL;
+}
+
+


diff --git a/contrib/tsearch2/wparser.c b/contrib/tsearch2/wparser.c

new file mode 100644 (file)

index 0000000..deff94c


--- /dev/null
+++ b/contrib/tsearch2/wparser.c
@@ -0,0 +1,529 @@
+/* 
+ * interface functions to parser 
+ * Teodor Sigaev 
+ */
+#include 
+#include 
+#include 
+#include 
+
+#include "postgres.h"
+#include "fmgr.h"
+#include "utils/array.h"
+#include "catalog/pg_type.h"
+#include "executor/spi.h"
+#include "funcapi.h"
+
+#include "wparser.h"
+#include "ts_cfg.h"
+#include "snmap.h"
+#include "common.h"
+
+/*********top interface**********/
+
+static void *plan_getparser=NULL;
+static Oid current_parser_id=InvalidOid;
+
+void
+init_prs(Oid id, WParserInfo *prs) {
+   Oid arg[1]={ OIDOID };
+   bool isnull;
+   Datum pars[1]={ ObjectIdGetDatum(id) };
+   int stat;
+
+   memset(prs,0,sizeof(WParserInfo));
+   SPI_connect();
+   if ( !plan_getparser ) {
+       plan_getparser = SPI_saveplan( SPI_prepare( "select prs_start, prs_nexttoken, prs_end, prs_lextype, prs_headline from pg_ts_parser where oid = $1" , 1, arg ) );
+       if ( !plan_getparser ) 
+           ts_error(ERROR, "SPI_prepare() failed");
+   }
+
+   stat = SPI_execp(plan_getparser, pars, " ", 1);
+   if ( stat < 0 )
+       ts_error (ERROR, "SPI_execp return %d", stat);
+   if ( SPI_processed > 0 ) {
+       Oid oid=InvalidOid;
+       oid=DatumGetObjectId( SPI_getbinval(SPI_tuptable->vals[0], SPI_tuptable->tupdesc, 1, &isnull) );
+       fmgr_info_cxt(oid, &(prs->start_info), TopMemoryContext);
+       oid=DatumGetObjectId( SPI_getbinval(SPI_tuptable->vals[0], SPI_tuptable->tupdesc, 2, &isnull) );
+       fmgr_info_cxt(oid, &(prs->getlexeme_info), TopMemoryContext);
+       oid=DatumGetObjectId( SPI_getbinval(SPI_tuptable->vals[0], SPI_tuptable->tupdesc, 3, &isnull) );
+       fmgr_info_cxt(oid, &(prs->end_info), TopMemoryContext);
+       prs->lextype=DatumGetObjectId( SPI_getbinval(SPI_tuptable->vals[0], SPI_tuptable->tupdesc, 4, &isnull) );
+       oid=DatumGetObjectId( SPI_getbinval(SPI_tuptable->vals[0], SPI_tuptable->tupdesc, 5, &isnull) );
+       fmgr_info_cxt(oid, &(prs->headline_info), TopMemoryContext);
+       prs->prs_id=id;
+   } else 
+       ts_error(ERROR, "No parser with id %d", id);
+   SPI_finish();
+}
+
+typedef struct {
+   WParserInfo *last_prs;
+   int     len;
+   int     reallen;
+   WParserInfo *list;
+   SNMap       name2id_map;
+} PrsList;
+
+static PrsList PList = {NULL,0,0,NULL,{0,0,NULL}};
+
+void    
+reset_prs(void) {
+   freeSNMap( &(PList.name2id_map) );
+   if ( PList.list )
+       free(PList.list);
+   memset(&PList,0,sizeof(PrsList));
+}
+
+static int
+compareprs(const void *a, const void *b) {
+   return ((WParserInfo*)a)->prs_id - ((WParserInfo*)b)->prs_id;
+}
+
+WParserInfo *
+findprs(Oid id) {
+   /* last used prs */
+   if ( PList.last_prs && PList.last_prs->prs_id==id )
+       return PList.last_prs;
+
+   /* already used prs */
+   if ( PList.len != 0 ) {
+       WParserInfo key;
+       key.prs_id=id;
+       PList.last_prs = bsearch(&key, PList.list, PList.len, sizeof(WParserInfo), compareprs);
+       if ( PList.last_prs != NULL )
+           return PList.last_prs;
+   }
+
+   /* last chance */
+   if ( PList.len==PList.reallen ) {
+       WParserInfo *tmp;
+       int reallen = ( PList.reallen ) ? 2*PList.reallen : 16;
+       tmp=(WParserInfo*)realloc(PList.list,sizeof(WParserInfo)*reallen);
+       if ( !tmp ) 
+           ts_error(ERROR,"No memory");
+       PList.reallen=reallen;
+       PList.list=tmp;
+   }
+   PList.last_prs=&(PList.list[PList.len]);
+   init_prs(id, PList.last_prs);
+   PList.len++;
+   qsort(PList.list, PList.len, sizeof(WParserInfo), compareprs);
+   return findprs(id); /* qsort changed order!! */;
+}
+
+static void *plan_name2id=NULL;
+
+Oid
+name2id_prs(text *name) {
+   Oid arg[1]={ TEXTOID };
+   bool isnull;
+   Datum pars[1]={ PointerGetDatum(name) };
+   int stat;
+   Oid id=findSNMap_t( &(PList.name2id_map), name );
+   
+   if ( id ) 
+       return id;
+   
+
+   SPI_connect();
+   if ( !plan_name2id ) {
+       plan_name2id = SPI_saveplan( SPI_prepare( "select oid from pg_ts_parser where prs_name = $1" , 1, arg ) );
+       if ( !plan_name2id ) 
+           ts_error(ERROR, "SPI_prepare() failed");
+   }
+
+   stat = SPI_execp(plan_name2id, pars, " ", 1);
+   if ( stat < 0 )
+       ts_error (ERROR, "SPI_execp return %d", stat);
+   if ( SPI_processed > 0 )
+       id=DatumGetObjectId( SPI_getbinval(SPI_tuptable->vals[0], SPI_tuptable->tupdesc, 1, &isnull) );
+   else 
+       ts_error(ERROR, "No parser '%s'", text2char(name));
+   SPI_finish();
+   addSNMap_t( &(PList.name2id_map), name, id );
+   return id;
+}
+
+
+/******sql-level interface******/
+typedef struct {
+   int     cur;
+   LexDescr    *list;
+} TypeStorage;
+
+static void
+setup_firstcall(FuncCallContext  *funcctx, Oid prsid) {
+   TupleDesc            tupdesc;
+   MemoryContext     oldcontext;
+   TypeStorage     *st;
+   WParserInfo *prs = findprs(prsid); 
+
+   oldcontext = MemoryContextSwitchTo(funcctx->multi_call_memory_ctx);
+
+   st=(TypeStorage*)palloc( sizeof(TypeStorage) );
+   st->cur=0;
+   st->list = (LexDescr*)DatumGetPointer(
+       OidFunctionCall1( prs->lextype, PointerGetDatum(prs->prs) )
+   );
+   funcctx->user_fctx = (void*)st;
+   tupdesc = RelationNameGetTupleDesc("tokentype");
+   funcctx->slot = TupleDescGetSlot(tupdesc);
+   funcctx->attinmeta = TupleDescGetAttInMetadata(tupdesc);
+   MemoryContextSwitchTo(oldcontext);
+}
+
+static Datum
+process_call(FuncCallContext  *funcctx) {
+   TypeStorage     *st;
+
+   st=(TypeStorage*)funcctx->user_fctx;
+   if (  st->list && st->list[st->cur].lexid ) {
+       Datum result;
+       char* values[3];
+       char    txtid[16];
+       HeapTuple    tuple;
+
+       values[0]=txtid;
+       sprintf(txtid,"%d",st->list[st->cur].lexid);
+       values[1]=st->list[st->cur].alias;
+       values[2]=st->list[st->cur].descr;
+
+       tuple = BuildTupleFromCStrings(funcctx->attinmeta, values);
+       result = TupleGetDatum(funcctx->slot, tuple);
+
+       pfree(values[1]);
+       pfree(values[2]);
+       st->cur++;
+       return result;
+   } else {
+       if ( st->list ) pfree(st->list);
+       pfree(st);
+   }
+   return (Datum)0;
+}
+
+PG_FUNCTION_INFO_V1(token_type);
+Datum token_type(PG_FUNCTION_ARGS);
+
+Datum
+token_type(PG_FUNCTION_ARGS) {
+   FuncCallContext  *funcctx;
+   Datum result;
+
+   if (SRF_IS_FIRSTCALL()) { 
+       funcctx = SRF_FIRSTCALL_INIT();
+       setup_firstcall(funcctx, PG_GETARG_OID(0) );
+   }
+
+   funcctx = SRF_PERCALL_SETUP();
+
+   if (  (result=process_call(funcctx)) != (Datum)0 )
+       SRF_RETURN_NEXT(funcctx, result);
+   SRF_RETURN_DONE(funcctx);
+}
+
+PG_FUNCTION_INFO_V1(token_type_byname);
+Datum token_type_byname(PG_FUNCTION_ARGS);
+Datum
+token_type_byname(PG_FUNCTION_ARGS) {
+   FuncCallContext  *funcctx;
+   Datum result;
+
+   if (SRF_IS_FIRSTCALL()) {
+       text *name = PG_GETARG_TEXT_P(0); 
+       funcctx = SRF_FIRSTCALL_INIT();
+       setup_firstcall(funcctx, name2id_prs( name ) );
+       PG_FREE_IF_COPY(name,0);
+   }
+
+   funcctx = SRF_PERCALL_SETUP();
+
+   if (  (result=process_call(funcctx)) != (Datum)0 )
+       SRF_RETURN_NEXT(funcctx, result);
+   SRF_RETURN_DONE(funcctx);
+}
+
+PG_FUNCTION_INFO_V1(token_type_current);
+Datum token_type_current(PG_FUNCTION_ARGS);
+Datum
+token_type_current(PG_FUNCTION_ARGS) {
+   FuncCallContext  *funcctx;
+   Datum result;
+
+   if (SRF_IS_FIRSTCALL()) {
+       funcctx = SRF_FIRSTCALL_INIT();
+       if ( current_parser_id==InvalidOid ) 
+           current_parser_id = name2id_prs( char2text("default") );
+       setup_firstcall(funcctx, current_parser_id );
+   }
+
+   funcctx = SRF_PERCALL_SETUP();
+
+   if (  (result=process_call(funcctx)) != (Datum)0 )
+       SRF_RETURN_NEXT(funcctx, result);
+   SRF_RETURN_DONE(funcctx);
+}
+
+
+PG_FUNCTION_INFO_V1(set_curprs);
+Datum set_curprs(PG_FUNCTION_ARGS);
+Datum
+set_curprs(PG_FUNCTION_ARGS) {
+        findprs(PG_GETARG_OID(0));
+        current_parser_id=PG_GETARG_OID(0);
+        PG_RETURN_VOID();
+}
+
+PG_FUNCTION_INFO_V1(set_curprs_byname);
+Datum set_curprs_byname(PG_FUNCTION_ARGS);
+Datum
+set_curprs_byname(PG_FUNCTION_ARGS) {
+        text *name=PG_GETARG_TEXT_P(0);
+    
+        DirectFunctionCall1(
+                set_curprs,
+                ObjectIdGetDatum( name2id_prs(name) )
+        );
+        PG_FREE_IF_COPY(name, 0);
+        PG_RETURN_VOID();
+}
+
+typedef struct {
+   int type;
+   char    *lexem;
+} LexemEntry;
+
+typedef struct {
+   int cur;
+   int len;
+   LexemEntry  *list;
+} PrsStorage;
+   
+
+static void
+prs_setup_firstcall(FuncCallContext  *funcctx, int prsid, text *txt) {
+   TupleDesc            tupdesc;
+   MemoryContext     oldcontext;
+   PrsStorage  *st;
+   WParserInfo *prs = findprs(prsid); 
+   char    *lex=NULL;
+   int     llen=0, type=0; 
+
+   oldcontext = MemoryContextSwitchTo(funcctx->multi_call_memory_ctx);
+
+   st=(PrsStorage*)palloc( sizeof(PrsStorage) );
+   st->cur=0;
+   st->len=16;
+   st->list=(LexemEntry*)palloc( sizeof(LexemEntry)*st->len );
+
+   prs->prs = (void*)DatumGetPointer(
+       FunctionCall2(
+           &(prs->start_info),
+           PointerGetDatum(VARDATA(txt)),
+           Int32GetDatum(VARSIZE(txt)-VARHDRSZ)
+       )
+   );
+
+   while( ( type=DatumGetInt32(FunctionCall3(
+           &(prs->getlexeme_info),
+           PointerGetDatum(prs->prs),
+           PointerGetDatum(&lex),
+           PointerGetDatum(&llen))) ) != 0 ) {
+
+       if ( st->cur>=st->len ) {
+           st->len=2*st->len;
+           st->list=(LexemEntry*)repalloc(st->list, sizeof(LexemEntry)*st->len);
+       }
+       st->list[st->cur].lexem = palloc(llen+1);
+       memcpy( st->list[st->cur].lexem, lex, llen);
+       st->list[st->cur].lexem[llen]='\0';
+       st->list[st->cur].type=type;
+       st->cur++;
+   }
+       
+   FunctionCall1(
+       &(prs->end_info),
+       PointerGetDatum(prs->prs)
+   );
+
+   st->len=st->cur;
+   st->cur=0;
+   
+   funcctx->user_fctx = (void*)st;
+   tupdesc = RelationNameGetTupleDesc("tokenout");
+   funcctx->slot = TupleDescGetSlot(tupdesc);
+   funcctx->attinmeta = TupleDescGetAttInMetadata(tupdesc);
+   MemoryContextSwitchTo(oldcontext);
+}
+
+static Datum
+prs_process_call(FuncCallContext  *funcctx) {
+   PrsStorage  *st;
+
+   st=(PrsStorage*)funcctx->user_fctx;
+   if (  st->cur < st->len ) {
+       Datum result;
+       char* values[2];
+       char    tid[16];
+       HeapTuple    tuple;
+
+       values[0]=tid;
+       sprintf(tid,"%d",st->list[st->cur].type);
+       values[1]=st->list[st->cur].lexem;
+       tuple = BuildTupleFromCStrings(funcctx->attinmeta, values);
+       result = TupleGetDatum(funcctx->slot, tuple);
+
+       pfree(values[1]);
+       st->cur++;
+       return result;
+   } else {
+       if ( st->list ) pfree(st->list);
+       pfree(st);
+   }
+   return (Datum)0;
+}
+
+           
+
+PG_FUNCTION_INFO_V1(parse);
+Datum parse(PG_FUNCTION_ARGS);
+Datum
+parse(PG_FUNCTION_ARGS) {
+   FuncCallContext  *funcctx;
+   Datum result;
+
+   if (SRF_IS_FIRSTCALL()) {
+       text *txt = PG_GETARG_TEXT_P(1); 
+       funcctx = SRF_FIRSTCALL_INIT();
+       prs_setup_firstcall(funcctx, PG_GETARG_OID(0),txt );
+       PG_FREE_IF_COPY(txt,1);
+   }
+
+   funcctx = SRF_PERCALL_SETUP();
+
+   if (  (result=prs_process_call(funcctx)) != (Datum)0 )
+       SRF_RETURN_NEXT(funcctx, result);
+   SRF_RETURN_DONE(funcctx);
+}
+
+PG_FUNCTION_INFO_V1(parse_byname);
+Datum parse_byname(PG_FUNCTION_ARGS);
+Datum
+parse_byname(PG_FUNCTION_ARGS) {
+   FuncCallContext  *funcctx;
+   Datum result;
+
+   if (SRF_IS_FIRSTCALL()) {
+       text *name = PG_GETARG_TEXT_P(0); 
+       text *txt = PG_GETARG_TEXT_P(1); 
+       funcctx = SRF_FIRSTCALL_INIT();
+       prs_setup_firstcall(funcctx, name2id_prs( name ),txt );
+       PG_FREE_IF_COPY(name,0);
+       PG_FREE_IF_COPY(txt,1);
+   }
+
+   funcctx = SRF_PERCALL_SETUP();
+
+   if (  (result=prs_process_call(funcctx)) != (Datum)0 )
+       SRF_RETURN_NEXT(funcctx, result);
+   SRF_RETURN_DONE(funcctx);
+}
+
+
+PG_FUNCTION_INFO_V1(parse_current);
+Datum parse_current(PG_FUNCTION_ARGS);
+Datum
+parse_current(PG_FUNCTION_ARGS) {
+   FuncCallContext  *funcctx;
+   Datum result;
+
+   if (SRF_IS_FIRSTCALL()) {
+       text *txt = PG_GETARG_TEXT_P(0); 
+       funcctx = SRF_FIRSTCALL_INIT();
+       if ( current_parser_id==InvalidOid ) 
+           current_parser_id = name2id_prs( char2text("default") );
+       prs_setup_firstcall(funcctx, current_parser_id,txt );
+       PG_FREE_IF_COPY(txt,0);
+   }
+
+   funcctx = SRF_PERCALL_SETUP();
+
+   if (  (result=prs_process_call(funcctx)) != (Datum)0 )
+       SRF_RETURN_NEXT(funcctx, result);
+   SRF_RETURN_DONE(funcctx);
+}
+
+PG_FUNCTION_INFO_V1(headline);
+Datum headline(PG_FUNCTION_ARGS);
+Datum
+headline(PG_FUNCTION_ARGS) {
+   TSCfgInfo *cfg=findcfg(PG_GETARG_OID(0));
+   text       *in = PG_GETARG_TEXT_P(1);
+   QUERYTYPE  *query = (QUERYTYPE *) DatumGetPointer(PG_DETOAST_DATUM(PG_GETARG_DATUM(2)));
+   text       *opt=( PG_NARGS()>3 && PG_GETARG_POINTER(3) ) ? PG_GETARG_TEXT_P(3) : NULL;
+   HLPRSTEXT   prs;
+   text *out;
+   WParserInfo *prsobj = findprs(cfg->prs_id);
+
+   memset(&prs,0,sizeof(HLPRSTEXT));
+   prs.lenwords = 32;
+   prs.words = (HLWORD *) palloc(sizeof(HLWORD) * prs.lenwords);
+   hlparsetext(cfg, &prs, query, VARDATA(in), VARSIZE(in) - VARHDRSZ);
+
+
+   FunctionCall3(
+       &(prsobj->headline_info),
+       PointerGetDatum(&prs),
+       PointerGetDatum(opt),
+       PointerGetDatum(query)
+   );
+
+   out = genhl(&prs);
+
+   PG_FREE_IF_COPY(in,1);
+   PG_FREE_IF_COPY(query,2);
+   if ( opt ) PG_FREE_IF_COPY(opt,3);
+   pfree(prs.words);
+   pfree(prs.startsel);
+   pfree(prs.stopsel);
+
+   PG_RETURN_POINTER(out);
+}
+
+
+PG_FUNCTION_INFO_V1(headline_byname);
+Datum headline_byname(PG_FUNCTION_ARGS);
+Datum
+headline_byname(PG_FUNCTION_ARGS) {
+   text *cfg=PG_GETARG_TEXT_P(0);
+
+   Datum out=DirectFunctionCall4(
+       headline,
+       ObjectIdGetDatum(name2id_cfg( cfg ) ),
+       PG_GETARG_DATUM(1),
+       PG_GETARG_DATUM(2),
+       ( PG_NARGS()>3 ) ? PG_GETARG_DATUM(3) : PointerGetDatum(NULL)
+   );
+
+   PG_FREE_IF_COPY(cfg,0);
+   PG_RETURN_DATUM(out);   
+}
+
+PG_FUNCTION_INFO_V1(headline_current);
+Datum headline_current(PG_FUNCTION_ARGS);
+Datum
+headline_current(PG_FUNCTION_ARGS) {
+   PG_RETURN_DATUM(DirectFunctionCall4(
+       headline,
+       ObjectIdGetDatum(get_currcfg()),
+       PG_GETARG_DATUM(0),
+       PG_GETARG_DATUM(1),
+       ( PG_NARGS()>2 ) ? PG_GETARG_DATUM(2) : PointerGetDatum(NULL)
+   ));
+}
+
+
+


diff --git a/contrib/tsearch2/wparser.h b/contrib/tsearch2/wparser.h

new file mode 100644 (file)

index 0000000..a8afc56


--- /dev/null
+++ b/contrib/tsearch2/wparser.h
@@ -0,0 +1,28 @@
+#ifndef __WPARSER_H__
+#define __WPARSER_H__
+#include "postgres.h"
+#include "fmgr.h"
+
+typedef struct {
+   Oid prs_id;
+   FmgrInfo start_info;
+   FmgrInfo getlexeme_info;
+   FmgrInfo end_info;
+   FmgrInfo headline_info;
+   Oid lextype;
+   void *prs;
+} WParserInfo;
+
+void init_prs(Oid id, WParserInfo *prs);
+WParserInfo* findprs(Oid id);
+Oid name2id_prs(text *name);
+void   reset_prs(void);
+
+
+typedef struct {
+   int lexid;
+   char    *alias;
+   char    *descr;
+} LexDescr;
+
+#endif


diff --git a/contrib/tsearch2/wparser_def.c b/contrib/tsearch2/wparser_def.c

new file mode 100644 (file)

index 0000000..eec8b03


--- /dev/null
+++ b/contrib/tsearch2/wparser_def.c
@@ -0,0 +1,291 @@
+/* 
+ * default word parser 
+ * Teodor Sigaev 
+ */
+#include 
+#include 
+#include 
+
+#include "postgres.h"
+#include "utils/builtins.h"
+
+#include "dict.h"
+#include "wparser.h"
+#include "common.h"
+#include "ts_cfg.h"
+#include "wordparser/parser.h"
+#include "wordparser/deflex.h"
+
+PG_FUNCTION_INFO_V1(prsd_lextype);
+Datum prsd_lextype(PG_FUNCTION_ARGS);
+
+Datum 
+prsd_lextype(PG_FUNCTION_ARGS) {
+   LexDescr *descr=(LexDescr*)palloc(sizeof(LexDescr)*(LASTNUM+1));
+   int i;
+
+   for(i=1;i<=LASTNUM;i++) {
+       descr[i-1].lexid = i;
+       descr[i-1].alias = pstrdup(tok_alias[i]);
+       descr[i-1].descr = pstrdup(lex_descr[i]);
+   }
+   
+   descr[LASTNUM].lexid=0;
+       
+   PG_RETURN_POINTER(descr);
+}
+
+PG_FUNCTION_INFO_V1(prsd_start);
+Datum prsd_start(PG_FUNCTION_ARGS);
+Datum 
+prsd_start(PG_FUNCTION_ARGS) {
+   start_parse_str( (char*)PG_GETARG_POINTER(0), PG_GETARG_INT32(1) );
+   PG_RETURN_POINTER(NULL);
+}
+
+PG_FUNCTION_INFO_V1(prsd_getlexeme);
+Datum prsd_getlexeme(PG_FUNCTION_ARGS);
+Datum 
+prsd_getlexeme(PG_FUNCTION_ARGS) {
+   /* ParserState *p=(ParserState*)PG_GETARG_POINTER(0); */
+   char **t=(char**)PG_GETARG_POINTER(1); 
+   int *tlen=(int*)PG_GETARG_POINTER(2);
+   int  type=tsearch2_yylex();
+
+   *t = token;
+   *tlen = tokenlen;
+   PG_RETURN_INT32(type);
+}
+
+PG_FUNCTION_INFO_V1(prsd_end);
+Datum prsd_end(PG_FUNCTION_ARGS);
+Datum 
+prsd_end(PG_FUNCTION_ARGS) {
+   /* ParserState *p=(ParserState*)PG_GETARG_POINTER(0); */
+   end_parse();
+   PG_RETURN_VOID();
+}
+
+#define LEAVETOKEN(x)  ( (x)==12 )
+#define COMPLEXTOKEN(x)    ( (x)==5 || (x)==15 || (x)==16 || (x)==17 )
+#define ENDPUNCTOKEN(x)    ( (x)==12 )
+
+
+#define IDIGNORE(x) ( (x)==13 || (x)==14 || (x)==12 || (x)==23 )
+#define HLIDIGNORE(x) ( (x)==5 || (x)==13 || (x)==15 || (x)==16 || (x)==17 )
+#define NONWORDTOKEN(x)    ( (x)==12 || HLIDIGNORE(x) )
+#define NOENDTOKEN(x)  ( NONWORDTOKEN(x) || (x)==7 || (x)==8 || (x)==20 || (x)==21 || (x)==22 || IDIGNORE(x) )
+
+typedef struct {
+   HLWORD  *words;
+   int len;
+} hlCheck;
+
+static bool
+checkcondition_HL(void *checkval, ITEM *val) {
+   int i;
+   for(i=0;i<((hlCheck*)checkval)->len;i++) {
+       if ( ((hlCheck*)checkval)->words[i].item==val )
+           return true;
+   }
+   return false;
+}
+
+
+static bool
+hlCover(HLPRSTEXT *prs, QUERYTYPE *query, int *p, int *q) {
+   int i,j;
+   ITEM    *item=GETQUERY(query);
+   int pos=*p;
+   *q=0;
+   *p=0x7fffffff;
+
+   for(j=0;jsize;j++) {
+       if ( item->type != VAL ) {
+           item++;
+           continue;
+       }
+       for(i=pos;icurwords;i++) {
+           if ( prs->words[i].item == item ) {
+               if ( i>*q) 
+                   *q = i;
+               break;
+           }
+       }
+       item++;
+   }
+
+   if ( *q==0 )
+       return false;
+
+   item=GETQUERY(query);
+   for(j=0;jsize;j++) {
+       if ( item->type != VAL ) {
+           item++;
+           continue;
+       }
+       for(i=*q;i>=pos;i--) {
+           if ( prs->words[i].item == item ) {
+               if ( i<*p )
+                   *p=i;
+               break;
+           }
+       }
+       item++;
+   }   
+
+   if ( *p<=*q ) {
+       hlCheck ch={ &(prs->words[*p]), *q-*p+1 };
+       if ( TS_execute(GETQUERY(query), &ch, false, checkcondition_HL) ) { 
+           return true;
+       } else {
+           (*p)++;
+           return hlCover(prs,query,p,q);
+       }
+   }
+
+   return false;
+}
+
+PG_FUNCTION_INFO_V1(prsd_headline);
+Datum prsd_headline(PG_FUNCTION_ARGS);
+Datum 
+prsd_headline(PG_FUNCTION_ARGS) {
+   HLPRSTEXT   *prs=(HLPRSTEXT*)PG_GETARG_POINTER(0);
+   text    *opt=(text*)PG_GETARG_POINTER(1); /* can't be toasted */
+   QUERYTYPE   *query=(QUERYTYPE*)PG_GETARG_POINTER(2); /* can't be toasted */
+   /* from opt + start and and tag */
+   int min_words=15;   
+   int max_words=35;   
+   int shortword=3;    
+
+   int p=0,q=0;
+   int bestb=-1,beste=-1;
+   int bestlen=-1;
+   int pose=0, poslen, curlen;
+
+   int i;
+
+   /*config*/
+   prs->startsel=NULL;
+   prs->stopsel=NULL;
+   if ( opt ) {
+       Map *map,*mptr;
+       
+       parse_cfgdict(opt,&map);
+       mptr=map;
+
+       while(mptr && mptr->key) {
+           if ( strcasecmp(mptr->key,"MaxWords")==0 )
+               max_words=pg_atoi(mptr->value,4,1);
+           else if ( strcasecmp(mptr->key,"MinWords")==0 )
+               min_words=pg_atoi(mptr->value,4,1);
+           else if ( strcasecmp(mptr->key,"ShortWord")==0 )
+               shortword=pg_atoi(mptr->value,4,1);
+           else if ( strcasecmp(mptr->key,"StartSel")==0 )
+               prs->startsel=pstrdup(mptr->value);
+           else if ( strcasecmp(mptr->key,"StopSel")==0 )
+               prs->stopsel=pstrdup(mptr->value);
+               
+           pfree(mptr->key);
+           pfree(mptr->value);
+
+           mptr++;
+       }
+       pfree(map);
+
+       if ( min_words >= max_words )
+           elog(ERROR,"Must be MinWords < MaxWords");
+       if ( min_words<=0 )
+           elog(ERROR,"Must be MinWords > 0");
+       if ( shortword<0 )
+           elog(ERROR,"Must be ShortWord >= 0");
+   }
+
+   while( hlCover(prs,query,&p,&q) ) {
+       /* find cover len in words */
+       curlen=0;
+       poslen=0;
+       for(i=p;i<=q && curlen < max_words ; i++) {
+           if ( !NONWORDTOKEN(prs->words[i].type) ) 
+               curlen++;
+           if ( prs->words[i].item && !prs->words[i].repeated )
+               poslen++; 
+           pose=i;
+       }
+
+       if ( poslenwords[beste].type) || prs->words[beste].len <= shortword) ) { 
+           /* best already finded, so try one more cover */
+           p++;
+           continue;
+       }
+
+       if ( curlen < max_words ) { /* find good end */
+           for(i=i-1 ;icurwords && curlen
+               if ( i!=q ) {
+                   if ( !NONWORDTOKEN(prs->words[i].type) ) 
+                       curlen++;
+                   if ( prs->words[i].item && !prs->words[i].repeated )
+                       poslen++;
+               }
+               pose=i;
+               if ( NOENDTOKEN(prs->words[i].type) || prs->words[i].len <= shortword ) 
+                   continue;
+               if ( curlen>=min_words )    
+                   break;
+           }
+       } else { /* shorter cover :((( */
+           for(;curlen>min_words;i--) {
+               if ( !NONWORDTOKEN(prs->words[i].type) ) 
+                   curlen--;
+               if ( prs->words[i].item && !prs->words[i].repeated )
+                   poslen--;
+               pose=i;
+               if ( NOENDTOKEN(prs->words[i].type) || prs->words[i].len <= shortword ) 
+                   continue;
+               break;
+           }
+       }
+
+       if ( bestlen <0 || (poslen>bestlen && !(NOENDTOKEN(prs->words[pose].type) || prs->words[pose].len <= shortword)) || 
+               ( bestlen>=0 && !(NOENDTOKEN(prs->words[pose].type)  || prs->words[pose].len <= shortword) && 
+                   (NOENDTOKEN(prs->words[beste].type) || prs->words[beste].len <= shortword) ) ) {
+           bestb=p; beste=pose;
+           bestlen=poslen;
+       } 
+
+       p++;
+   }
+
+   if ( bestlen<0 ) {
+       curlen=0;
+       poslen=0;
+       for(i=0;icurwords && curlen
+           if ( !NONWORDTOKEN(prs->words[i].type) ) 
+               curlen++;
+           pose=i;
+       }
+       bestb=0; beste=pose;
+   }
+
+   for(i=bestb;i<=beste;i++) {
+       if ( prs->words[i].item )
+           prs->words[i].selected=1;
+       if ( prs->words[i].repeated )
+           prs->words[i].skip=1;
+       if ( HLIDIGNORE(prs->words[i].type) )
+           prs->words[i].replace=1;
+
+       prs->words[i].in=1;
+   }
+
+   if (!prs->startsel)
+       prs->startsel=pstrdup("");

+   if (!prs->stopsel)
+       prs->stopsel=pstrdup("");
+        prs->startsellen=strlen(prs->startsel);
+   prs->stopsellen=strlen(prs->stopsel);
+
+   PG_RETURN_POINTER(prs);
+}
+




This is the main PostgreSQL git repository.
RSS
Atom
+           if ( CList.list[i].map ) {
+               for(j=0;j
+                   if ( CList.list[i].map[j].dict_id )
+                       free(CList.list[i].map[j].dict_id);
+               free( CList.list[i].map );
+           }
+                free(CList.list);
+   }
+        memset(&CList,0,sizeof(CFGList));
+}
+
+static int
+comparecfg(const void *a, const void *b) {
+   return ((TSCfgInfo*)a)->id - ((TSCfgInfo*)b)->id;
+}
+
+TSCfgInfo *
+findcfg(Oid id) {
+   /* last used cfg */
+   if ( CList.last_cfg && CList.last_cfg->id==id )
+       return CList.last_cfg;
+
+   /* already used cfg */
+   if ( CList.len != 0 ) {
+       TSCfgInfo key;
+       key.id=id;
+       CList.last_cfg = bsearch(&key, CList.list, CList.len, sizeof(TSCfgInfo), comparecfg);
+       if ( CList.last_cfg != NULL )
+           return CList.last_cfg;
+   }
+
+   /* last chance */
+   if ( CList.len==CList.reallen ) {
+       TSCfgInfo *tmp;
+       int reallen = ( CList.reallen ) ? 2*CList.reallen : 16;
+       tmp=(TSCfgInfo*)realloc(CList.list,sizeof(TSCfgInfo)*reallen);
+       if ( !tmp ) 
+           ts_error(ERROR,"No memory");
+       CList.reallen=reallen;
+       CList.list=tmp;
+   }
+   CList.last_cfg=&(CList.list[CList.len]);
+   init_cfg(id, CList.last_cfg);
+   CList.len++;
+   qsort(CList.list, CList.len, sizeof(TSCfgInfo), comparecfg);
+   return findcfg(id); /* qsort changed order!! */;
+}
+
+
+Oid
+name2id_cfg(text *name) {
+   Oid arg[1]={ TEXTOID };
+   bool isnull;
+   Datum pars[1]={ PointerGetDatum(name) };
+   int stat;
+   Oid id=findSNMap_t( &(CList.name2id_map), name );
+   
+   if ( id ) 
+       return id;
+   
+   SPI_connect();
+   if ( !plan_name2id ) {
+       plan_name2id = SPI_saveplan( SPI_prepare( "select oid from pg_ts_cfg where ts_name = $1" , 1, arg ) );
+       if ( !plan_name2id ) 
+           elog(ERROR, "SPI_prepare() failed");
+   }
+
+   stat = SPI_execp(plan_name2id, pars, " ", 1);
+   if ( stat < 0 )
+       elog (ERROR, "SPI_execp return %d", stat);
+   if ( SPI_processed > 0 ) {
+       id=DatumGetObjectId( SPI_getbinval(SPI_tuptable->vals[0], SPI_tuptable->tupdesc, 1, &isnull) );
+       if ( isnull ) 
+           elog(ERROR, "Null id for tsearch config");
+   } else 
+       elog(ERROR, "No tsearch config");
+   SPI_finish();
+   addSNMap_t( &(CList.name2id_map), name, id );
+   return id;
+}
+
+
+void 
+parsetext_v2(TSCfgInfo *cfg, PRSTEXT * prs, char *buf, int4 buflen) {
+   int type, lenlemm, i;
+   char    *lemm=NULL;
+   WParserInfo *prsobj = findprs(cfg->prs_id);
+
+   prsobj->prs=(void*)DatumGetPointer(
+       FunctionCall2(
+           &(prsobj->start_info),
+           PointerGetDatum(buf),
+           Int32GetDatum(buflen)
+       )
+   );
+
+   while( ( type=DatumGetInt32(FunctionCall3(
+           &(prsobj->getlexeme_info),
+           PointerGetDatum(prsobj->prs),
+           PointerGetDatum(&lemm),
+           PointerGetDatum(&lenlemm))) ) != 0 ) {
+
+       if ( lenlemm >= MAXSTRLEN )
+           elog(ERROR, "Word is too long");
+
+
+       if ( type >= cfg->len ) /* skip this type of lexem */
+           continue; 
+
+       for(i=0;imap[type].len;i++) {
+           DictInfo    *dict=finddict( DatumGetObjectId(cfg->map[type].dict_id[i]) );
+           char    **norms, **ptr;
+   
+           norms = ptr = (char**)DatumGetPointer(
+               FunctionCall3(
+                   &(dict->lexize_info),
+                   PointerGetDatum(dict->dictionary),
+                   PointerGetDatum(lemm),
+                   PointerGetDatum(lenlemm)
+               )
+           );
+           if ( !norms ) /* dictionary doesn't know this lexem */
+               continue;
+
+           prs->pos++; /*set pos*/
+
+           while( *ptr ) {
+               if (prs->curwords == prs->lenwords) {
+                   prs->lenwords *= 2;
+                   prs->words = (WORD *) repalloc((void *) prs->words, prs->lenwords * sizeof(WORD));
+               }
+
+               prs->words[prs->curwords].len = strlen(*ptr);
+               prs->words[prs->curwords].word = *ptr;
+               prs->words[prs->curwords].alen = 0;
+               prs->words[prs->curwords].pos.pos = LIMITPOS(prs->pos);
+               ptr++;
+               prs->curwords++;
+           }
+           pfree(norms);
+           break; /* lexem already normalized or is stop word*/
+       }
+   }
+
+   FunctionCall1(
+       &(prsobj->end_info),
+       PointerGetDatum(prsobj->prs)
+   );
+}
+
+static void
+hladdword(HLPRSTEXT * prs, char *buf, int4 buflen, int type) {
+   while (prs->curwords >= prs->lenwords) {
+       prs->lenwords *= 2;
+       prs->words = (HLWORD *) repalloc((void *) prs->words, prs->lenwords * sizeof(HLWORD));
+   }
+   memset( &(prs->words[prs->curwords]), 0, sizeof(HLWORD) ); 
+   prs->words[prs->curwords].type = (uint8)type;
+   prs->words[prs->curwords].len = buflen; 
+   prs->words[prs->curwords].word = palloc(buflen);
+   memcpy(prs->words[prs->curwords].word, buf, buflen);
+   prs->curwords++;    
+}
+
+static void
+hlfinditem(HLPRSTEXT * prs, QUERYTYPE *query, char *buf, int buflen ) {
+   int i;
+   ITEM    *item=GETQUERY(query);
+   HLWORD  *word=&( prs->words[prs->curwords-1] );
+
+   while (prs->curwords + query->size >= prs->lenwords) {
+       prs->lenwords *= 2;
+       prs->words = (HLWORD *) repalloc((void *) prs->words, prs->lenwords * sizeof(HLWORD));
+   }
+
+   for(i=0; isize; i++) { 
+       if ( item->type == VAL && item->length == buflen && strncmp( GETOPERAND(query) + item->distance, buf, buflen )==0 ) {
+           if ( word->item ) {
+               memcpy( &(prs->words[prs->curwords]), word, sizeof(HLWORD) );
+               prs->words[prs->curwords].item=item;
+               prs->words[prs->curwords].repeated=1;
+               prs->curwords++;
+           } else 
+               word->item=item;    
+       }
+       item++;
+   }
+}
+
+void 
+hlparsetext(TSCfgInfo *cfg, HLPRSTEXT * prs, QUERYTYPE *query, char *buf, int4 buflen) {
+   int type, lenlemm, i;
+   char    *lemm=NULL;
+   WParserInfo *prsobj = findprs(cfg->prs_id);
+
+   prsobj->prs=(void*)DatumGetPointer(
+       FunctionCall2(
+           &(prsobj->start_info),
+           PointerGetDatum(buf),
+           Int32GetDatum(buflen)
+       )
+   );
+
+   while( ( type=DatumGetInt32(FunctionCall3(
+           &(prsobj->getlexeme_info),
+           PointerGetDatum(prsobj->prs),
+           PointerGetDatum(&lemm),
+           PointerGetDatum(&lenlemm))) ) != 0 ) {
+
+       if ( lenlemm >= MAXSTRLEN )
+           elog(ERROR, "Word is too long");
+
+       hladdword(prs,lemm,lenlemm,type);
+
+       if ( type >= cfg->len ) 
+           continue; 
+
+       for(i=0;imap[type].len;i++) {
+           DictInfo    *dict=finddict( DatumGetObjectId(cfg->map[type].dict_id[i]) );
+           char    **norms, **ptr;
+   
+           norms = ptr = (char**)DatumGetPointer(
+               FunctionCall3(
+                   &(dict->lexize_info),
+                   PointerGetDatum(dict->dictionary),
+                   PointerGetDatum(lemm),
+                   PointerGetDatum(lenlemm)
+               )
+           );
+           if ( !norms ) /* dictionary doesn't know this lexem */
+               continue;
+
+           while( *ptr ) {
+               hlfinditem(prs,query,*ptr,strlen(*ptr));
+               pfree(*ptr);
+               ptr++;
+           }
+           pfree(norms);
+           break; /* lexem already normalized or is stop word*/
+       }
+   }
+
+   FunctionCall1(
+       &(prsobj->end_info),
+       PointerGetDatum(prsobj->prs)
+   );
+}
+
+text* 
+genhl(HLPRSTEXT * prs) {
+   text *out;
+   int len=128;
+   char *ptr;
+   HLWORD  *wrd=prs->words;
+
+   out = (text*)palloc( len );
+   ptr=((char*)out) + VARHDRSZ;
+
+   while( wrd - prs->words < prs->curwords ) {
+       while (  wrd->len + prs->stopsellen + prs->startsellen + (ptr - ((char*)out)) >= len ) {
+           int dist = ptr - ((char*)out);
+           len*= 2;
+           out = (text *) repalloc(out, len);
+           ptr=((char*)out) + dist;
+       }
+
+       if ( wrd->in && !wrd->skip && !wrd->repeated ) {
+           if ( wrd->replace ) {
+               *ptr=' ';
+               ptr++;
+           } else {
+               if (wrd->selected) {
+                   memcpy(ptr,prs->startsel,prs->startsellen);
+                   ptr+=prs->startsellen;
+               }
+               memcpy(ptr,wrd->word,wrd->len);
+               ptr+=wrd->len;
+               if (wrd->selected) {
+                   memcpy(ptr,prs->stopsel,prs->stopsellen);
+                   ptr+=prs->stopsellen;
+               }
+           }
+       }
+
+       if ( !wrd->repeated )
+           pfree(wrd->word);
+
+       wrd++;
+   }
+
+   VARATT_SIZEP(out)=ptr - ((char*)out);
+   return out; 
+}
+
+int  
+get_currcfg(void) {
+   Oid arg[1]={ TEXTOID };
+   const char *curlocale;
+   Datum pars[1];
+   bool isnull;
+   int stat;
+
+   if ( current_cfg_id > 0 )
+       return current_cfg_id;
+
+   SPI_connect();
+   if ( !plan_getcfg_bylocale ) {
+       plan_getcfg_bylocale=SPI_saveplan( SPI_prepare( "select oid from pg_ts_cfg where locale = $1 ", 1, arg ) );
+       if ( !plan_getcfg_bylocale )
+           elog(ERROR, "SPI_prepare() failed");
+   }
+
+   curlocale = setlocale(LC_CTYPE, NULL);
+   pars[0] = PointerGetDatum( char2text((char*)curlocale) );
+   stat = SPI_execp(plan_getcfg_bylocale, pars, " ", 1);
+
+   if ( stat < 0 )
+       elog (ERROR, "SPI_execp return %d", stat);
+   if ( SPI_processed > 0 )
+       current_cfg_id = DatumGetObjectId( SPI_getbinval(SPI_tuptable->vals[0], SPI_tuptable->tupdesc, 1, &isnull) );
+   else 
+       elog(ERROR,"Can't find tsearch config by locale");
+
+   pfree(DatumGetPointer(pars[0]));
+   SPI_finish();
+   return current_cfg_id;
+}
+
+PG_FUNCTION_INFO_V1(set_curcfg);
+Datum set_curcfg(PG_FUNCTION_ARGS);
+Datum
+set_curcfg(PG_FUNCTION_ARGS) {
+        findcfg(PG_GETARG_OID(0));
+        current_cfg_id=PG_GETARG_OID(0);
+        PG_RETURN_VOID();
+}
+                
+PG_FUNCTION_INFO_V1(set_curcfg_byname);
+Datum set_curcfg_byname(PG_FUNCTION_ARGS);
+Datum
+set_curcfg_byname(PG_FUNCTION_ARGS) {
+        text *name=PG_GETARG_TEXT_P(0);
+   
+        DirectFunctionCall1(
+                set_curcfg,
+                ObjectIdGetDatum( name2id_cfg(name) )
+        );
+        PG_FREE_IF_COPY(name, 0);
+        PG_RETURN_VOID();      
+}       
+
+PG_FUNCTION_INFO_V1(show_curcfg);
+Datum show_curcfg(PG_FUNCTION_ARGS);
+Datum
+show_curcfg(PG_FUNCTION_ARGS) {
+   PG_RETURN_OID( get_currcfg() ); 
+}
+
+PG_FUNCTION_INFO_V1(reset_tsearch);
+Datum reset_tsearch(PG_FUNCTION_ARGS);
+Datum
+reset_tsearch(PG_FUNCTION_ARGS) {
+   ts_error(NOTICE,"TSearch cache cleaned");
+   PG_RETURN_VOID(); 
+}
+                   if ( CList.list[i].map[j].dict_id )
+                       free(CList.list[i].map[j].dict_id);
+               free( CList.list[i].map );
+           }
+                free(CList.list);
+   }
+        memset(&CList,0,sizeof(CFGList));
+}
+
+static int
+comparecfg(const void *a, const void *b) {
+   return ((TSCfgInfo*)a)->id - ((TSCfgInfo*)b)->id;
+}
+
+TSCfgInfo *
+findcfg(Oid id) {
+   /* last used cfg */
+   if ( CList.last_cfg && CList.last_cfg->id==id )
+       return CList.last_cfg;
+
+   /* already used cfg */
+   if ( CList.len != 0 ) {
+       TSCfgInfo key;
+       key.id=id;
+       CList.last_cfg = bsearch(&key, CList.list, CList.len, sizeof(TSCfgInfo), comparecfg);
+       if ( CList.last_cfg != NULL )
+           return CList.last_cfg;
+   }
+
+   /* last chance */
+   if ( CList.len==CList.reallen ) {
+       TSCfgInfo *tmp;
+       int reallen = ( CList.reallen ) ? 2*CList.reallen : 16;
+       tmp=(TSCfgInfo*)realloc(CList.list,sizeof(TSCfgInfo)*reallen);
+       if ( !tmp ) 
+           ts_error(ERROR,"No memory");
+       CList.reallen=reallen;
+       CList.list=tmp;
+   }
+   CList.last_cfg=&(CList.list[CList.len]);
+   init_cfg(id, CList.last_cfg);
+   CList.len++;
+   qsort(CList.list, CList.len, sizeof(TSCfgInfo), comparecfg);
+   return findcfg(id); /* qsort changed order!! */;
+}
+
+
+Oid
+name2id_cfg(text *name) {
+   Oid arg[1]={ TEXTOID };
+   bool isnull;
+   Datum pars[1]={ PointerGetDatum(name) };
+   int stat;
+   Oid id=findSNMap_t( &(CList.name2id_map), name );
+   
+   if ( id ) 
+       return id;
+   
+   SPI_connect();
+   if ( !plan_name2id ) {
+       plan_name2id = SPI_saveplan( SPI_prepare( "select oid from pg_ts_cfg where ts_name = $1" , 1, arg ) );
+       if ( !plan_name2id ) 
+           elog(ERROR, "SPI_prepare() failed");
+   }
+
+   stat = SPI_execp(plan_name2id, pars, " ", 1);
+   if ( stat < 0 )
+       elog (ERROR, "SPI_execp return %d", stat);
+   if ( SPI_processed > 0 ) {
+       id=DatumGetObjectId( SPI_getbinval(SPI_tuptable->vals[0], SPI_tuptable->tupdesc, 1, &isnull) );
+       if ( isnull ) 
+           elog(ERROR, "Null id for tsearch config");
+   } else 
+       elog(ERROR, "No tsearch config");
+   SPI_finish();
+   addSNMap_t( &(CList.name2id_map), name, id );
+   return id;
+}
+
+
+void 
+parsetext_v2(TSCfgInfo *cfg, PRSTEXT * prs, char *buf, int4 buflen) {
+   int type, lenlemm, i;
+   char    *lemm=NULL;
+   WParserInfo *prsobj = findprs(cfg->prs_id);
+
+   prsobj->prs=(void*)DatumGetPointer(
+       FunctionCall2(
+           &(prsobj->start_info),
+           PointerGetDatum(buf),
+           Int32GetDatum(buflen)
+       )
+   );
+
+   while( ( type=DatumGetInt32(FunctionCall3(
+           &(prsobj->getlexeme_info),
+           PointerGetDatum(prsobj->prs),
+           PointerGetDatum(&lemm),
+           PointerGetDatum(&lenlemm))) ) != 0 ) {
+
+       if ( lenlemm >= MAXSTRLEN )
+           elog(ERROR, "Word is too long");
+
+
+       if ( type >= cfg->len ) /* skip this type of lexem */
+           continue; 
+
+       for(i=0;imap[type].len;i++) {
+           DictInfo    *dict=finddict( DatumGetObjectId(cfg->map[type].dict_id[i]) );
+           char    **norms, **ptr;
+   
+           norms = ptr = (char**)DatumGetPointer(
+               FunctionCall3(
+                   &(dict->lexize_info),
+                   PointerGetDatum(dict->dictionary),
+                   PointerGetDatum(lemm),
+                   PointerGetDatum(lenlemm)
+               )
+           );
+           if ( !norms ) /* dictionary doesn't know this lexem */
+               continue;
+
+           prs->pos++; /*set pos*/
+
+           while( *ptr ) {
+               if (prs->curwords == prs->lenwords) {
+                   prs->lenwords *= 2;
+                   prs->words = (WORD *) repalloc((void *) prs->words, prs->lenwords * sizeof(WORD));
+               }
+
+               prs->words[prs->curwords].len = strlen(*ptr);
+               prs->words[prs->curwords].word = *ptr;
+               prs->words[prs->curwords].alen = 0;
+               prs->words[prs->curwords].pos.pos = LIMITPOS(prs->pos);
+               ptr++;
+               prs->curwords++;
+           }
+           pfree(norms);
+           break; /* lexem already normalized or is stop word*/
+       }
+   }
+
+   FunctionCall1(
+       &(prsobj->end_info),
+       PointerGetDatum(prsobj->prs)
+   );
+}
+
+static void
+hladdword(HLPRSTEXT * prs, char *buf, int4 buflen, int type) {
+   while (prs->curwords >= prs->lenwords) {
+       prs->lenwords *= 2;
+       prs->words = (HLWORD *) repalloc((void *) prs->words, prs->lenwords * sizeof(HLWORD));
+   }
+   memset( &(prs->words[prs->curwords]), 0, sizeof(HLWORD) ); 
+   prs->words[prs->curwords].type = (uint8)type;
+   prs->words[prs->curwords].len = buflen; 
+   prs->words[prs->curwords].word = palloc(buflen);
+   memcpy(prs->words[prs->curwords].word, buf, buflen);
+   prs->curwords++;    
+}
+
+static void
+hlfinditem(HLPRSTEXT * prs, QUERYTYPE *query, char *buf, int buflen ) {
+   int i;
+   ITEM    *item=GETQUERY(query);
+   HLWORD  *word=&( prs->words[prs->curwords-1] );
+
+   while (prs->curwords + query->size >= prs->lenwords) {
+       prs->lenwords *= 2;
+       prs->words = (HLWORD *) repalloc((void *) prs->words, prs->lenwords * sizeof(HLWORD));
+   }
+
+   for(i=0; isize; i++) { 
+       if ( item->type == VAL && item->length == buflen && strncmp( GETOPERAND(query) + item->distance, buf, buflen )==0 ) {
+           if ( word->item ) {
+               memcpy( &(prs->words[prs->curwords]), word, sizeof(HLWORD) );
+               prs->words[prs->curwords].item=item;
+               prs->words[prs->curwords].repeated=1;
+               prs->curwords++;
+           } else 
+               word->item=item;    
+       }
+       item++;
+   }
+}
+
+void 
+hlparsetext(TSCfgInfo *cfg, HLPRSTEXT * prs, QUERYTYPE *query, char *buf, int4 buflen) {
+   int type, lenlemm, i;
+   char    *lemm=NULL;
+   WParserInfo *prsobj = findprs(cfg->prs_id);
+
+   prsobj->prs=(void*)DatumGetPointer(
+       FunctionCall2(
+           &(prsobj->start_info),
+           PointerGetDatum(buf),
+           Int32GetDatum(buflen)
+       )
+   );
+
+   while( ( type=DatumGetInt32(FunctionCall3(
+           &(prsobj->getlexeme_info),
+           PointerGetDatum(prsobj->prs),
+           PointerGetDatum(&lemm),
+           PointerGetDatum(&lenlemm))) ) != 0 ) {
+
+       if ( lenlemm >= MAXSTRLEN )
+           elog(ERROR, "Word is too long");
+
+       hladdword(prs,lemm,lenlemm,type);
+
+       if ( type >= cfg->len ) 
+           continue; 
+
+       for(i=0;imap[type].len;i++) {
+           DictInfo    *dict=finddict( DatumGetObjectId(cfg->map[type].dict_id[i]) );
+           char    **norms, **ptr;
+   
+           norms = ptr = (char**)DatumGetPointer(
+               FunctionCall3(
+                   &(dict->lexize_info),
+                   PointerGetDatum(dict->dictionary),
+                   PointerGetDatum(lemm),
+                   PointerGetDatum(lenlemm)
+               )
+           );
+           if ( !norms ) /* dictionary doesn't know this lexem */
+               continue;
+
+           while( *ptr ) {
+               hlfinditem(prs,query,*ptr,strlen(*ptr));
+               pfree(*ptr);
+               ptr++;
+           }
+           pfree(norms);
+           break; /* lexem already normalized or is stop word*/
+       }
+   }
+
+   FunctionCall1(
+       &(prsobj->end_info),
+       PointerGetDatum(prsobj->prs)
+   );
+}
+
+text* 
+genhl(HLPRSTEXT * prs) {
+   text *out;
+   int len=128;
+   char *ptr;
+   HLWORD  *wrd=prs->words;
+
+   out = (text*)palloc( len );
+   ptr=((char*)out) + VARHDRSZ;
+
+   while( wrd - prs->words < prs->curwords ) {
+       while (  wrd->len + prs->stopsellen + prs->startsellen + (ptr - ((char*)out)) >= len ) {
+           int dist = ptr - ((char*)out);
+           len*= 2;
+           out = (text *) repalloc(out, len);
+           ptr=((char*)out) + dist;
+       }
+
+       if ( wrd->in && !wrd->skip && !wrd->repeated ) {
+           if ( wrd->replace ) {
+               *ptr=' ';
+               ptr++;
+           } else {
+               if (wrd->selected) {
+                   memcpy(ptr,prs->startsel,prs->startsellen);
+                   ptr+=prs->startsellen;
+               }
+               memcpy(ptr,wrd->word,wrd->len);
+               ptr+=wrd->len;
+               if (wrd->selected) {
+                   memcpy(ptr,prs->stopsel,prs->stopsellen);
+                   ptr+=prs->stopsellen;
+               }
+           }
+       }
+
+       if ( !wrd->repeated )
+           pfree(wrd->word);
+
+       wrd++;
+   }
+
+   VARATT_SIZEP(out)=ptr - ((char*)out);
+   return out; 
+}
+
+int  
+get_currcfg(void) {
+   Oid arg[1]={ TEXTOID };
+   const char *curlocale;
+   Datum pars[1];
+   bool isnull;
+   int stat;
+
+   if ( current_cfg_id > 0 )
+       return current_cfg_id;
+
+   SPI_connect();
+   if ( !plan_getcfg_bylocale ) {
+       plan_getcfg_bylocale=SPI_saveplan( SPI_prepare( "select oid from pg_ts_cfg where locale = $1 ", 1, arg ) );
+       if ( !plan_getcfg_bylocale )
+           elog(ERROR, "SPI_prepare() failed");
+   }
+
+   curlocale = setlocale(LC_CTYPE, NULL);
+   pars[0] = PointerGetDatum( char2text((char*)curlocale) );
+   stat = SPI_execp(plan_getcfg_bylocale, pars, " ", 1);
+
+   if ( stat < 0 )
+       elog (ERROR, "SPI_execp return %d", stat);
+   if ( SPI_processed > 0 )
+       current_cfg_id = DatumGetObjectId( SPI_getbinval(SPI_tuptable->vals[0], SPI_tuptable->tupdesc, 1, &isnull) );
+   else 
+       elog(ERROR,"Can't find tsearch config by locale");
+
+   pfree(DatumGetPointer(pars[0]));
+   SPI_finish();
+   return current_cfg_id;
+}
+
+PG_FUNCTION_INFO_V1(set_curcfg);
+Datum set_curcfg(PG_FUNCTION_ARGS);
+Datum
+set_curcfg(PG_FUNCTION_ARGS) {
+        findcfg(PG_GETARG_OID(0));
+        current_cfg_id=PG_GETARG_OID(0);
+        PG_RETURN_VOID();
+}
+                
+PG_FUNCTION_INFO_V1(set_curcfg_byname);
+Datum set_curcfg_byname(PG_FUNCTION_ARGS);
+Datum
+set_curcfg_byname(PG_FUNCTION_ARGS) {
+        text *name=PG_GETARG_TEXT_P(0);
+   
+        DirectFunctionCall1(
+                set_curcfg,
+                ObjectIdGetDatum( name2id_cfg(name) )
+        );
+        PG_FREE_IF_COPY(name, 0);
+        PG_RETURN_VOID();      
+}       
+
+PG_FUNCTION_INFO_V1(show_curcfg);
+Datum show_curcfg(PG_FUNCTION_ARGS);
+Datum
+show_curcfg(PG_FUNCTION_ARGS) {
+   PG_RETURN_OID( get_currcfg() ); 
+}
+
+PG_FUNCTION_INFO_V1(reset_tsearch);
+Datum reset_tsearch(PG_FUNCTION_ARGS);
+Datum
+reset_tsearch(PG_FUNCTION_ARGS) {
+   ts_error(NOTICE,"TSearch cache cleaned");
+   PG_RETURN_VOID(); 
+}
diff --git a/contrib/tsearch2/ts_cfg.h b/contrib/tsearch2/ts_cfg.h

new file mode 100644 (file)

index 0000000..01006c1
--- /dev/null
+++ b/contrib/tsearch2/ts_cfg.h
@@ -0,0 +1,68 @@
+#ifndef __TS_CFG_H__
+#define __TS_CFG_H__
+#include "postgres.h"
+#include "query.h"
+
+typedef struct {
+   int len;
+   Datum   *dict_id;
+} ListDictionary;
+
+typedef struct {
+   Oid id;
+   Oid prs_id;
+   int len;
+   ListDictionary  *map;   
+}  TSCfgInfo;
+
+Oid name2id_cfg(text *name);
+TSCfgInfo * findcfg(Oid id);
+void init_cfg(Oid id, TSCfgInfo *cfg);
+void reset_cfg(void);
+
+typedef struct {
+        uint16          len;
+   union {
+       uint16      pos;
+       uint16      *apos;
+   } pos;
+        char       *word;
+   uint32  alen;
+}       WORD;
+   
+typedef struct {
+        WORD       *words;
+        int4            lenwords;
+        int4            curwords;
+   int4        pos;
+}       PRSTEXT;
+
+typedef struct {
+        uint16    len;
+   uint8    selected:1,
+         in:1,
+         skip:1,
+         replace:1,
+         repeated:1;
+   uint8   type;
+        char      *word;
+   ITEM      *item;
+}       HLWORD;
+   
+typedef struct {
+        HLWORD       *words;
+        int4            lenwords;
+        int4            curwords;
+        char           *startsel;
+        char            *stopsel;
+        int2            startsellen;
+        int2            stopsellen;
+}       HLPRSTEXT;
+
+void hlparsetext(TSCfgInfo *cfg, HLPRSTEXT * prs, QUERYTYPE *query, char *buf, int4 buflen);
+text* genhl(HLPRSTEXT * prs);
+
+void parsetext_v2(TSCfgInfo *cfg, PRSTEXT * prs, char *buf, int4 buflen);
+int  get_currcfg(void);
+
+#endif
diff --git a/contrib/tsearch2/ts_stat.c b/contrib/tsearch2/ts_stat.c

new file mode 100644 (file)

index 0000000..9099981
--- /dev/null
+++ b/contrib/tsearch2/ts_stat.c
@@ -0,0 +1,412 @@
+/*
+ * stat functions
+ */
+
+#include "tsvector.h"
+#include "ts_stat.h"
+#include "funcapi.h"
+#include "catalog/pg_type.h"
+#include "executor/spi.h"
+#include "common.h"
+
+PG_FUNCTION_INFO_V1(tsstat_in);
+Datum           tsstat_in(PG_FUNCTION_ARGS);
+Datum           
+tsstat_in(PG_FUNCTION_ARGS) {
+   tsstat *stat=palloc(STATHDRSIZE);
+   stat->len=STATHDRSIZE;
+   stat->size=0;
+   PG_RETURN_POINTER(stat);
+}
+
+PG_FUNCTION_INFO_V1(tsstat_out);
+Datum           tsstat_out(PG_FUNCTION_ARGS);
+Datum           
+tsstat_out(PG_FUNCTION_ARGS) {
+   elog(ERROR,"Unimplemented");
+   PG_RETURN_NULL();
+}
+
+static WordEntry**
+SEI_realloc( WordEntry** in, uint32 *len ) {
+   if ( *len==0 || in==NULL ) {
+       *len=8;
+       in=palloc( sizeof(WordEntry*)* (*len) );
+   } else {
+       *len *= 2;
+       in=repalloc( in, sizeof(WordEntry*)* (*len) );
+   }
+   return in;
+}
+
+static int
+compareStatWord(StatEntry *a, WordEntry *b, tsstat *stat, tsvector *txt) {
+   if ( a->len == b->len ) 
+       return strncmp(
+           STATSTRPTR(stat) + a->pos,
+           STRPTR(txt) + b->pos,
+           a->len
+       );
+   return ( a->len > b->len ) ? 1 : -1;
+}
+
+static tsstat*
+formstat(tsstat *stat, tsvector *txt, WordEntry** entry, uint32 len) {
+   tsstat  *newstat;
+   uint32 totallen, nentry;
+   uint32  slen=0;
+   WordEntry   **ptr=entry;
+   char    *curptr;
+   StatEntry   *sptr,*nptr;
+
+   while(ptr-entry
+       slen += (*ptr)->len;
+       ptr++;
+   }
+
+   nentry=stat->size + len;
+   slen+=STATSTRSIZE(stat);
+   totallen=CALCSTATSIZE(nentry,slen);
+   newstat=palloc(totallen);
+   newstat->len=totallen;
+   newstat->size=nentry;
+
+   memcpy(STATSTRPTR(newstat), STATSTRPTR(stat), STATSTRSIZE(stat));
+   curptr=STATSTRPTR(newstat) + STATSTRSIZE(stat);
+
+   ptr=entry;
+   sptr=STATPTR(stat);
+   nptr=STATPTR(newstat);
+
+   if ( len == 1 ) {
+       StatEntry *StopLow = STATPTR(stat);
+       StatEntry *StopHigh = (StatEntry*)STATSTRPTR(stat);
+
+       while (StopLow < StopHigh) {
+           sptr=StopLow + (StopHigh - StopLow) / 2;
+           if ( compareStatWord(sptr,*ptr,stat,txt) < 0 )
+               StopLow = sptr + 1;
+           else
+               StopHigh = sptr; 
+       }
+       nptr =STATPTR(newstat) + (StopLow-STATPTR(stat));
+       memcpy( STATPTR(newstat), STATPTR(stat), sizeof(StatEntry) * (StopLow-STATPTR(stat)) );
+       nptr->nentry=POSDATALEN(txt,*ptr);
+       if ( nptr->nentry==0 )
+               nptr->nentry=1; 
+       nptr->ndoc=1;
+       nptr->len=(*ptr)->len;
+       memcpy(curptr, STRPTR(txt) + (*ptr)->pos, nptr->len);
+       nptr->pos = curptr - STATSTRPTR(newstat);
+       memcpy( nptr+1, StopLow, sizeof(StatEntry) * ( ((StatEntry*)STATSTRPTR(stat))-StopLow ) );
+   } else {
+       while( sptr-STATPTR(stat) < stat->size && ptr-entry
+           if ( compareStatWord(sptr,*ptr,stat,txt) < 0 ) {
+               memcpy(nptr, sptr, sizeof(StatEntry));
+               sptr++;
+           } else {
+               nptr->nentry=POSDATALEN(txt,*ptr);
+               if ( nptr->nentry==0 )
+                   nptr->nentry=1; 
+               nptr->ndoc=1;
+               nptr->len=(*ptr)->len;
+               memcpy(curptr, STRPTR(txt) + (*ptr)->pos, nptr->len);
+               nptr->pos = curptr - STATSTRPTR(newstat);
+               curptr += nptr->len;
+               ptr++;
+           }
+           nptr++;
+       }
+
+       memcpy( nptr, sptr, sizeof(StatEntry)*( stat->size - (sptr-STATPTR(stat)) ) ); 
+       
+       while(ptr-entry
+           nptr->nentry=POSDATALEN(txt,*ptr);
+           if ( nptr->nentry==0 )
+               nptr->nentry=1; 
+           nptr->ndoc=1;
+           nptr->len=(*ptr)->len;
+           memcpy(curptr, STRPTR(txt) + (*ptr)->pos, nptr->len);
+           nptr->pos = curptr - STATSTRPTR(newstat);
+           curptr += nptr->len;
+           ptr++; nptr++;
+       }
+   }
+
+   return newstat;
+} 
+
+PG_FUNCTION_INFO_V1(ts_accum);
+Datum           ts_accum(PG_FUNCTION_ARGS);
+Datum 
+ts_accum(PG_FUNCTION_ARGS) {
+   tsstat *newstat,*stat= (tsstat*)PG_GETARG_POINTER(0);
+   tsvector  *txt = (tsvector *) PG_DETOAST_DATUM(PG_GETARG_DATUM(1));
+   WordEntry   **newentry=NULL;
+   uint32  len=0, cur=0;
+   StatEntry   *sptr;
+   WordEntry   *wptr;
+
+   if ( stat==NULL || PG_ARGISNULL(0) ) { /* Init in first */ 
+       stat=palloc(STATHDRSIZE);
+       stat->len=STATHDRSIZE;
+       stat->size=0;
+   }
+
+   /* simple check of correctness */
+   if ( txt==NULL || PG_ARGISNULL(1) || txt->size==0 ) {
+       PG_FREE_IF_COPY(txt,1); 
+       PG_RETURN_POINTER(stat);
+   }
+
+   sptr=STATPTR(stat);
+   wptr=ARRPTR(txt);
+
+   if ( stat->size < 100*txt->size ) { /* merge */
+       while( sptr-STATPTR(stat) < stat->size && wptr-ARRPTR(txt) < txt->size ) {
+           int cmp = compareStatWord(sptr,wptr,stat,txt);
+           if ( cmp<0 ) {
+               sptr++;
+           } else if ( cmp==0 ) {
+               int n=POSDATALEN(txt,wptr);
+   
+               if (n==0) n=1;
+               sptr->ndoc++;
+               sptr->nentry +=n ;
+               sptr++; wptr++;
+           } else {
+               if ( cur==len )
+                   newentry=SEI_realloc(newentry, &len);
+               newentry[cur]=wptr;
+               wptr++; cur++;
+           }
+       }
+
+       while( wptr-ARRPTR(txt) < txt->size ) {
+           if ( cur==len )
+               newentry=SEI_realloc(newentry, &len);
+           newentry[cur]=wptr;
+           wptr++; cur++;
+       }
+   } else { /* search */
+       while( wptr-ARRPTR(txt) < txt->size ) {
+           StatEntry *StopLow = STATPTR(stat);
+           StatEntry *StopHigh = (StatEntry*)STATSTRPTR(stat);
+           int cmp;
+
+           while (StopLow < StopHigh) {
+               sptr=StopLow + (StopHigh - StopLow) / 2;
+               cmp =  compareStatWord(sptr,wptr,stat,txt);
+               if (cmp==0) {
+                   int n=POSDATALEN(txt,wptr);
+                   if (n==0) n=1;
+                   sptr->ndoc++;
+                   sptr->nentry +=n ;
+                   break;
+               } else if ( cmp < 0 )
+                   StopLow = sptr + 1;
+               else
+                   StopHigh = sptr; 
+           }
+       
+           if ( StopLow >= StopHigh ) { /* not found */
+               if ( cur==len )
+                   newentry=SEI_realloc(newentry, &len);
+               newentry[cur]=wptr;
+               cur++;
+           }
+           wptr++;
+       }   
+   }
+
+   
+   if ( cur==0 ) { /* no new words */ 
+       PG_FREE_IF_COPY(txt,1);
+       PG_RETURN_POINTER(stat);
+   }
+
+   newstat = formstat(stat, txt, newentry, cur);
+   pfree(newentry);
+   PG_FREE_IF_COPY(txt,1);
+   /* pfree(stat); */
+
+   PG_RETURN_POINTER(newstat);
+}
+
+typedef struct {
+   uint32  cur;
+   tsvector *stat;
+} StatStorage;
+
+static void
+ts_setup_firstcall(FuncCallContext  *funcctx, tsstat *stat) {
+   TupleDesc            tupdesc;
+   MemoryContext     oldcontext;
+   StatStorage     *st;
+   
+   oldcontext = MemoryContextSwitchTo(funcctx->multi_call_memory_ctx);
+   st=palloc( sizeof(StatStorage) );
+   st->cur=0;
+   st->stat=palloc( stat->len );
+   memcpy(st->stat, stat, stat->len);
+   funcctx->user_fctx = (void*)st;
+   tupdesc = RelationNameGetTupleDesc("statinfo");
+   funcctx->slot = TupleDescGetSlot(tupdesc);
+   funcctx->attinmeta = TupleDescGetAttInMetadata(tupdesc);
+   MemoryContextSwitchTo(oldcontext);
+}
+
+
+static Datum
+ts_process_call(FuncCallContext  *funcctx) {
+   StatStorage     *st;
+   st=(StatStorage*)funcctx->user_fctx;
+
+   if ( st->cur < st->stat->size ) {
+       Datum result;
+       char* values[3];
+       char    ndoc[16];
+       char    nentry[16];
+       StatEntry *entry=STATPTR(st->stat) + st->cur;
+       HeapTuple    tuple;
+
+       values[1]=ndoc;
+       sprintf(ndoc,"%d",entry->ndoc);
+       values[2]=nentry;
+       sprintf(nentry,"%d",entry->nentry);
+       values[0]=palloc( entry->len+1 );
+       memcpy( values[0], STATSTRPTR(st->stat)+entry->pos, entry->len);
+       (values[0])[entry->len]='\0';
+
+       tuple = BuildTupleFromCStrings(funcctx->attinmeta, values);
+       result = TupleGetDatum(funcctx->slot, tuple);
+
+       pfree(values[0]);
+       st->cur++;
+       return result;  
+   } else {
+       pfree(st->stat);
+       pfree(st);
+   }
+   
+   return (Datum)0;
+}
+
+PG_FUNCTION_INFO_V1(ts_accum_finish);
+Datum           ts_accum_finish(PG_FUNCTION_ARGS);
+Datum 
+ts_accum_finish(PG_FUNCTION_ARGS) {
+   FuncCallContext  *funcctx;
+   Datum result;
+
+   if (SRF_IS_FIRSTCALL()) {
+       funcctx = SRF_FIRSTCALL_INIT();
+       ts_setup_firstcall(funcctx, (tsstat*)PG_GETARG_POINTER(0) );
+   }
+
+   funcctx = SRF_PERCALL_SETUP();
+   if (  (result=ts_process_call(funcctx)) != (Datum)0 )
+       SRF_RETURN_NEXT(funcctx, result);
+   SRF_RETURN_DONE(funcctx);
+}
+
+static Oid tiOid=InvalidOid;
+static void 
+get_ti_Oid(void) {
+   int ret;
+   bool isnull; 
+
+   if ( (ret = SPI_exec("select oid from pg_type where typname='tsvector'",1)) < 0 )   
+       elog(ERROR, "SPI_exec to get tsvector oid returns %d", ret);
+
+   if ( SPI_processed<0 )
+       elog(ERROR, "There is no tsvector type");
+   tiOid = DatumGetObjectId( SPI_getbinval(SPI_tuptable->vals[0], SPI_tuptable->tupdesc, 1, &isnull) );
+   if ( tiOid==InvalidOid )
+       elog(ERROR, "tsvector type has InvalidOid");
+}
+
+static tsstat*
+ts_stat_sql(text *txt) {
+   char *query=text2char(txt);
+   int i;
+   tsstat *newstat,*stat;
+   bool isnull;
+   Portal portal;
+   void    *plan;
+
+   if ( tiOid==InvalidOid ) 
+       get_ti_Oid();
+
+   if ( (plan = SPI_prepare(query,0,NULL))==NULL )
+       elog(ERROR, "SPI_prepare('%s') returns NULL",query);
+
+   if ( (portal = SPI_cursor_open(NULL, plan, NULL, NULL)) == NULL )
+       elog(ERROR, "SPI_cursor_open('%s') returns NULL",query);
+
+   SPI_cursor_fetch(portal, true, 100);
+
+   if ( SPI_tuptable->tupdesc->natts != 1 )
+       elog(ERROR, "Number of fields doesn't equal to 1");
+
+   if ( SPI_gettypeid(SPI_tuptable->tupdesc, 1) != tiOid )
+       elog(ERROR, "Column isn't of tsvector type");
+
+   stat=palloc(STATHDRSIZE);
+   stat->len=STATHDRSIZE;
+   stat->size=0;
+
+   while(SPI_processed>0) {
+       for(i=0;i
+           Datum data=SPI_getbinval(SPI_tuptable->vals[i], SPI_tuptable->tupdesc, 1, &isnull);
+
+           if ( !isnull ) {
+               newstat = (tsstat*)DatumGetPointer(DirectFunctionCall2(
+                   ts_accum,
+                   PointerGetDatum(stat),
+                   data
+               ));
+               if ( stat!=newstat && stat )
+                   pfree(stat);
+               stat=newstat;
+           }
+       } 
+
+       SPI_freetuptable(SPI_tuptable);
+       SPI_cursor_fetch(portal, true, 100);        
+   }   
+
+   SPI_freetuptable(SPI_tuptable);
+   SPI_cursor_close(portal);
+   SPI_freeplan(plan);
+   pfree(query);
+
+   return stat;    
+}
+
+PG_FUNCTION_INFO_V1(ts_stat);
+Datum           ts_stat(PG_FUNCTION_ARGS);
+Datum 
+ts_stat(PG_FUNCTION_ARGS) {
+   FuncCallContext  *funcctx;
+   Datum result;
+
+   if (SRF_IS_FIRSTCALL()) {
+       tsstat *stat;
+       text    *txt=PG_GETARG_TEXT_P(0);
+   
+       funcctx = SRF_FIRSTCALL_INIT();
+       SPI_connect();
+       stat = ts_stat_sql(txt);
+       PG_FREE_IF_COPY(txt,0); 
+       ts_setup_firstcall(funcctx, stat );
+       SPI_finish();
+   }
+
+   funcctx = SRF_PERCALL_SETUP();
+   if (  (result=ts_process_call(funcctx)) != (Datum)0 )
+       SRF_RETURN_NEXT(funcctx, result);
+   SRF_RETURN_DONE(funcctx);
+}
+
+


diff --git a/contrib/tsearch2/ts_stat.h b/contrib/tsearch2/ts_stat.h

new file mode 100644 (file)

index 0000000..c32b17a


--- /dev/null
+++ b/contrib/tsearch2/ts_stat.h
@@ -0,0 +1,32 @@
+#ifndef __TXTIDX_STAT_H__
+#define __TXTIDX_STAT_H__
+
+#include "postgres.h"
+
+#include "access/gist.h"
+#include "access/itup.h"
+#include "utils/elog.h"
+#include "utils/palloc.h"
+#include "utils/builtins.h"
+#include "storage/bufpage.h"
+
+typedef struct {
+   uint32  len;
+   uint32  pos;
+   uint32  ndoc;   
+   uint32  nentry; 
+}  StatEntry;
+
+typedef struct {
+   int4        len;
+   int4        size;
+   char        data[1];
+}  tsstat;
+
+#define STATHDRSIZE (sizeof(int4)*2)
+#define CALCSTATSIZE(x, lenstr) ( x * sizeof(StatEntry) + STATHDRSIZE + lenstr )
+#define STATPTR(x) ( (StatEntry*) ( (char*)x + STATHDRSIZE ) )
+#define STATSTRPTR(x)  ( (char*)x + STATHDRSIZE + ( sizeof(StatEntry) * ((tsvector*)x)->size ) )
+#define STATSTRSIZE(x) ( ((tsvector*)x)->len - STATHDRSIZE - ( sizeof(StatEntry) * ((tsvector*)x)->size ) )
+
+#endif


diff --git a/contrib/tsearch2/tsearch.sql._in b/contrib/tsearch2/tsearch.sql._in

new file mode 100644 (file)

index 0000000..91ffbc8


--- /dev/null
+++ b/contrib/tsearch2/tsearch.sql._in
@@ -0,0 +1,674 @@
+-- Adjust this setting to control where the objects get CREATEd.
+SET search_path = public;
+
+BEGIN;
+
+--dict conf
+CREATE TABLE pg_ts_dict (
+   dict_name   text not null primary key,
+   dict_init   oid,
+   dict_initoption text,
+   dict_lexize oid not null,
+   dict_comment    text
+) with oids;
+
+--dict interface
+CREATE FUNCTION lexize(oid, text) 
+   returns _text
+   as 'MODULE_PATHNAME'
+   language 'C'
+   with (isstrict);
+
+CREATE FUNCTION lexize(text, text)
+        returns _text
+        as 'MODULE_PATHNAME', 'lexize_byname'
+        language 'C'
+        with (isstrict);
+
+CREATE FUNCTION lexize(text)
+        returns _text
+        as 'MODULE_PATHNAME', 'lexize_bycurrent'
+        language 'C'
+        with (isstrict);
+
+CREATE FUNCTION set_curdict(int)
+   returns void
+   as 'MODULE_PATHNAME'
+   language 'C'
+   with (isstrict);
+
+CREATE FUNCTION set_curdict(text)
+   returns void
+   as 'MODULE_PATHNAME', 'set_curdict_byname'
+   language 'C'
+   with (isstrict);
+
+--built-in dictionaries
+CREATE FUNCTION dex_init(text)
+   returns internal
+   as 'MODULE_PATHNAME' 
+   language 'C';
+
+CREATE FUNCTION dex_lexize(internal,internal,int4)
+   returns internal
+   as 'MODULE_PATHNAME'
+   language 'C'
+   with (isstrict);
+
+insert into pg_ts_dict select 
+   'simple', 
+   (select oid from pg_proc where proname='dex_init'),
+   null,
+   (select oid from pg_proc where proname='dex_lexize'),
+   'Simple example of dictionary.'
+;
+    
+CREATE FUNCTION snb_en_init(text)
+   returns internal
+   as 'MODULE_PATHNAME' 
+   language 'C';
+
+CREATE FUNCTION snb_lexize(internal,internal,int4)
+   returns internal
+   as 'MODULE_PATHNAME'
+   language 'C'
+   with (isstrict);
+
+insert into pg_ts_dict select 
+   'en_stem', 
+   (select oid from pg_proc where proname='snb_en_init'),
+   'DATA_PATH/english.stop',
+   (select oid from pg_proc where proname='snb_lexize'),
+   'English Stemmer. Snowball.'
+;
+
+CREATE FUNCTION snb_ru_init(text)
+   returns internal
+   as 'MODULE_PATHNAME' 
+   language 'C';
+
+insert into pg_ts_dict select 
+   'ru_stem', 
+   (select oid from pg_proc where proname='snb_ru_init'),
+   'DATA_PATH/russian.stop',
+   (select oid from pg_proc where proname='snb_lexize'),
+   'Russian Stemmer. Snowball.'
+;
+    
+CREATE FUNCTION spell_init(text)
+   returns internal
+   as 'MODULE_PATHNAME' 
+   language 'C';
+
+CREATE FUNCTION spell_lexize(internal,internal,int4)
+   returns internal
+   as 'MODULE_PATHNAME'
+   language 'C'
+   with (isstrict);
+
+insert into pg_ts_dict select 
+   'ispell_template', 
+   (select oid from pg_proc where proname='spell_init'),
+   null,
+   (select oid from pg_proc where proname='spell_lexize'),
+   'ISpell interface. Must have .dict and .aff files'
+;
+
+CREATE FUNCTION syn_init(text)
+   returns internal
+   as 'MODULE_PATHNAME' 
+   language 'C';
+
+CREATE FUNCTION syn_lexize(internal,internal,int4)
+   returns internal
+   as 'MODULE_PATHNAME'
+   language 'C'
+   with (isstrict);
+
+insert into pg_ts_dict select 
+   'synonym', 
+   (select oid from pg_proc where proname='syn_init'),
+   null,
+   (select oid from pg_proc where proname='syn_lexize'),
+   'Example of synonym dictionary'
+;
+
+--dict conf
+CREATE TABLE pg_ts_parser (
+   prs_name    text not null primary key,
+   prs_start   oid not null,
+   prs_nexttoken   oid not null,
+   prs_end     oid not null,
+   prs_headline    oid not null,
+   prs_lextype oid not null,
+   prs_comment text
+) with oids;
+
+--sql-level interface
+CREATE TYPE tokentype 
+   as (tokid int4, alias text, descr text); 
+
+CREATE FUNCTION token_type(int4)
+   returns setof tokentype
+   as 'MODULE_PATHNAME'
+   language 'C'
+   with (isstrict);
+
+CREATE FUNCTION token_type(text)
+   returns setof tokentype
+   as 'MODULE_PATHNAME', 'token_type_byname'
+   language 'C'
+   with (isstrict);
+
+CREATE FUNCTION token_type()
+   returns setof tokentype
+   as 'MODULE_PATHNAME', 'token_type_current'
+   language 'C'
+   with (isstrict);
+
+CREATE FUNCTION set_curprs(int)
+   returns void
+   as 'MODULE_PATHNAME'
+   language 'C'
+   with (isstrict);
+
+CREATE FUNCTION set_curprs(text)
+   returns void
+   as 'MODULE_PATHNAME', 'set_curprs_byname'
+   language 'C'
+   with (isstrict);
+
+CREATE TYPE tokenout 
+   as (tokid int4, token text);
+
+CREATE FUNCTION parse(oid,text)
+   returns setof tokenout
+   as 'MODULE_PATHNAME'
+   language 'C'
+   with (isstrict);
+ 
+CREATE FUNCTION parse(text,text)
+   returns setof tokenout
+   as 'MODULE_PATHNAME', 'parse_byname'
+   language 'C'
+   with (isstrict);
+ 
+CREATE FUNCTION parse(text)
+   returns setof tokenout
+   as 'MODULE_PATHNAME', 'parse_current'
+   language 'C'
+   with (isstrict);
+ 
+--default parser
+CREATE FUNCTION prsd_start(internal,int4)
+   returns internal
+   as 'MODULE_PATHNAME'
+   language 'C';
+
+CREATE FUNCTION prsd_getlexeme(internal,internal,internal)
+   returns int4
+   as 'MODULE_PATHNAME'
+   language 'C';
+
+CREATE FUNCTION prsd_end(internal)
+   returns void
+   as 'MODULE_PATHNAME'
+   language 'C';
+
+CREATE FUNCTION prsd_lextype(internal)
+   returns internal
+   as 'MODULE_PATHNAME'
+   language 'C';
+
+CREATE FUNCTION prsd_headline(internal,internal,internal)
+   returns internal
+   as 'MODULE_PATHNAME'
+   language 'C';
+
+insert into pg_ts_parser select
+   'default',
+   (select oid from pg_proc where proname='prsd_start'),   
+   (select oid from pg_proc where proname='prsd_getlexeme'),   
+   (select oid from pg_proc where proname='prsd_end'), 
+   (select oid from pg_proc where proname='prsd_headline'),
+   (select oid from pg_proc where proname='prsd_lextype'),
+   'Parser from OpenFTS v0.34'
+;  
+
+--tsearch config
+
+CREATE TABLE pg_ts_cfg (
+   ts_name     text not null primary key,
+   prs_name    text not null,
+   locale      text
+) with oids;
+
+CREATE TABLE pg_ts_cfgmap (
+   ts_name     text not null,
+   tok_alias   text not null,
+   dict_name   text[],
+   primary key (ts_name,tok_alias)
+) with oids;
+
+CREATE FUNCTION set_curcfg(int)
+   returns void
+   as 'MODULE_PATHNAME'
+   language 'C'
+   with (isstrict);
+
+CREATE FUNCTION set_curcfg(text)
+   returns void
+   as 'MODULE_PATHNAME', 'set_curcfg_byname'
+   language 'C'
+   with (isstrict);
+
+CREATE FUNCTION show_curcfg()
+   returns oid
+   as 'MODULE_PATHNAME'
+   language 'C'
+   with (isstrict);
+
+insert into pg_ts_cfg values ('default', 'default','C');
+insert into pg_ts_cfg values ('default_russian', 'default','ru_RU.KOI8-R');
+insert into pg_ts_cfg values ('simple', 'default');
+
+copy pg_ts_cfgmap from stdin;
+default    lword   {en_stem}
+default    nlword  {simple}
+default    word    {simple}
+default    email   {simple}
+default    url {simple}
+default    host    {simple}
+default    sfloat  {simple}
+default    version {simple}
+default    part_hword  {simple}
+default    nlpart_hword    {simple}
+default    lpart_hword {en_stem}
+default    hword   {simple}
+default    lhword  {en_stem}
+default    nlhword {simple}
+default    uri {simple}
+default    file    {simple}
+default    float   {simple}
+default    int {simple}
+default    uint    {simple}
+default_russian    lword   {en_stem}
+default_russian    nlword  {ru_stem}
+default_russian    word    {ru_stem}
+default_russian    email   {simple}
+default_russian    url {simple}
+default_russian    host    {simple}
+default_russian    sfloat  {simple}
+default_russian    version {simple}
+default_russian    part_hword  {simple}
+default_russian    nlpart_hword    {ru_stem}
+default_russian    lpart_hword {en_stem}
+default_russian    hword   {ru_stem}
+default_russian    lhword  {en_stem}
+default_russian    nlhword {ru_stem}
+default_russian    uri {simple}
+default_russian    file    {simple}
+default_russian    float   {simple}
+default_russian    int {simple}
+default_russian    uint    {simple}
+simple lword   {simple}
+simple nlword  {simple}
+simple word    {simple}
+simple email   {simple}
+simple url {simple}
+simple host    {simple}
+simple sfloat  {simple}
+simple version {simple}
+simple part_hword  {simple}
+simple nlpart_hword    {simple}
+simple lpart_hword {simple}
+simple hword   {simple}
+simple lhword  {simple}
+simple nlhword {simple}
+simple uri {simple}
+simple file    {simple}
+simple float   {simple}
+simple int {simple}
+simple uint    {simple}
+\.
+
+--tsvector type
+CREATE FUNCTION tsvector_in(cstring)
+RETURNS tsvector
+AS 'MODULE_PATHNAME'
+LANGUAGE 'C' with (isstrict);
+
+CREATE FUNCTION tsvector_out(tsvector)
+RETURNS cstring
+AS 'MODULE_PATHNAME'
+LANGUAGE 'C' with (isstrict);
+
+CREATE TYPE tsvector (
+        INTERNALLENGTH = -1,
+        INPUT = tsvector_in,
+        OUTPUT = tsvector_out,
+        STORAGE = extended
+);
+
+CREATE FUNCTION length(tsvector)
+RETURNS int4
+AS 'MODULE_PATHNAME', 'tsvector_length'
+LANGUAGE 'C' with (isstrict,iscachable);
+
+CREATE FUNCTION to_tsvector(oid, text)
+RETURNS tsvector
+AS 'MODULE_PATHNAME'
+LANGUAGE 'C' with (isstrict,iscachable);
+
+CREATE FUNCTION to_tsvector(text, text)
+RETURNS tsvector
+AS 'MODULE_PATHNAME', 'to_tsvector_name'
+LANGUAGE 'C' with (isstrict,iscachable);
+
+CREATE FUNCTION to_tsvector(text)
+RETURNS tsvector
+AS 'MODULE_PATHNAME', 'to_tsvector_current'
+LANGUAGE 'C' with (isstrict,iscachable);
+
+CREATE FUNCTION strip(tsvector)
+RETURNS tsvector
+AS 'MODULE_PATHNAME'
+LANGUAGE 'C' with (isstrict,iscachable);
+
+CREATE FUNCTION setweight(tsvector,"char")
+RETURNS tsvector
+AS 'MODULE_PATHNAME'
+LANGUAGE 'C' with (isstrict,iscachable);
+
+CREATE FUNCTION concat(tsvector,tsvector)
+RETURNS tsvector
+AS 'MODULE_PATHNAME'
+LANGUAGE 'C' with (isstrict,iscachable);
+
+CREATE OPERATOR || (
+        LEFTARG = tsvector,
+        RIGHTARG = tsvector,
+        PROCEDURE = concat
+);
+
+--query type
+CREATE FUNCTION tsquery_in(cstring)
+RETURNS tsquery
+AS 'MODULE_PATHNAME'
+LANGUAGE 'C' with (isstrict);
+
+CREATE FUNCTION tsquery_out(tsquery)
+RETURNS cstring
+AS 'MODULE_PATHNAME'
+LANGUAGE 'C' with (isstrict);
+
+CREATE TYPE tsquery (
+        INTERNALLENGTH = -1,
+        INPUT = tsquery_in,
+        OUTPUT = tsquery_out
+);
+
+CREATE FUNCTION querytree(tsquery)
+RETURNS text
+AS 'MODULE_PATHNAME', 'tsquerytree'
+LANGUAGE 'C' with (isstrict);
+
+CREATE FUNCTION to_tsquery(oid, text)
+RETURNS tsquery
+AS 'MODULE_PATHNAME'
+LANGUAGE 'c' with (isstrict,iscachable);
+
+CREATE FUNCTION to_tsquery(text, text)
+RETURNS tsquery
+AS 'MODULE_PATHNAME','to_tsquery_name'
+LANGUAGE 'c' with (isstrict,iscachable);
+
+CREATE FUNCTION to_tsquery(text)
+RETURNS tsquery
+AS 'MODULE_PATHNAME','to_tsquery_current'
+LANGUAGE 'c' with (isstrict,iscachable);
+
+--operations
+CREATE FUNCTION exectsq(tsvector, tsquery)
+RETURNS bool
+AS 'MODULE_PATHNAME'
+LANGUAGE 'C' with (isstrict, iscachable);
+  
+COMMENT ON FUNCTION exectsq(tsvector, tsquery) IS 'boolean operation with text index';
+
+CREATE FUNCTION rexectsq(tsquery, tsvector)
+RETURNS bool
+AS 'MODULE_PATHNAME'
+LANGUAGE 'C' with (isstrict, iscachable);
+
+COMMENT ON FUNCTION rexectsq(tsquery, tsvector) IS 'boolean operation with text index';
+
+CREATE OPERATOR @@ (
+        LEFTARG = tsvector,
+        RIGHTARG = tsquery,
+        PROCEDURE = exectsq,
+        COMMUTATOR = '@@',
+        RESTRICT = contsel,
+        JOIN = contjoinsel
+);
+CREATE OPERATOR @@ (
+        LEFTARG = tsquery,
+        RIGHTARG = tsvector,
+        PROCEDURE = rexectsq,
+        COMMUTATOR = '@@',
+        RESTRICT = contsel,
+        JOIN = contjoinsel
+);
+
+--Trigger
+CREATE FUNCTION tsearch2()
+RETURNS trigger
+AS 'MODULE_PATHNAME'
+LANGUAGE 'C';
+
+--Relevation
+CREATE FUNCTION rank(float4[], tsvector, tsquery)
+RETURNS float4
+AS 'MODULE_PATHNAME'
+LANGUAGE 'C' WITH (isstrict, iscachable);
+
+CREATE FUNCTION rank(float4[], tsvector, tsquery, int4)
+RETURNS float4
+AS 'MODULE_PATHNAME'
+LANGUAGE 'C' WITH (isstrict, iscachable);
+
+CREATE FUNCTION rank(tsvector, tsquery)
+RETURNS float4
+AS 'MODULE_PATHNAME', 'rank_def'
+LANGUAGE 'C' WITH (isstrict, iscachable);
+
+CREATE FUNCTION rank(tsvector, tsquery, int4)
+RETURNS float4
+AS 'MODULE_PATHNAME', 'rank_def'
+LANGUAGE 'C' WITH (isstrict, iscachable);
+
+CREATE FUNCTION rank_cd(int4, tsvector, tsquery)
+RETURNS float4
+AS 'MODULE_PATHNAME'
+LANGUAGE 'C' WITH (isstrict, iscachable);
+
+CREATE FUNCTION rank_cd(int4, tsvector, tsquery, int4)
+RETURNS float4
+AS 'MODULE_PATHNAME'
+LANGUAGE 'C' WITH (isstrict, iscachable);
+
+CREATE FUNCTION rank_cd(tsvector, tsquery)
+RETURNS float4
+AS 'MODULE_PATHNAME', 'rank_cd_def'
+LANGUAGE 'C' WITH (isstrict, iscachable);
+
+CREATE FUNCTION rank_cd(tsvector, tsquery, int4)
+RETURNS float4
+AS 'MODULE_PATHNAME', 'rank_cd_def'
+LANGUAGE 'C' WITH (isstrict, iscachable);
+
+CREATE FUNCTION headline(oid, text, tsquery, text)
+RETURNS text
+AS 'MODULE_PATHNAME', 'headline'
+LANGUAGE 'C' WITH (isstrict, iscachable);
+
+CREATE FUNCTION headline(oid, text, tsquery)
+RETURNS text
+AS 'MODULE_PATHNAME', 'headline'
+LANGUAGE 'C' WITH (isstrict, iscachable);
+
+CREATE FUNCTION headline(text, text, tsquery, text)
+RETURNS text
+AS 'MODULE_PATHNAME', 'headline_byname'
+LANGUAGE 'C' WITH (isstrict, iscachable);
+
+CREATE FUNCTION headline(text, text, tsquery)
+RETURNS text
+AS 'MODULE_PATHNAME', 'headline_byname'
+LANGUAGE 'C' WITH (isstrict, iscachable);
+
+CREATE FUNCTION headline(text, tsquery, text)
+RETURNS text
+AS 'MODULE_PATHNAME', 'headline_current'
+LANGUAGE 'C' WITH (isstrict, iscachable);
+
+CREATE FUNCTION headline(text, tsquery)
+RETURNS text
+AS 'MODULE_PATHNAME', 'headline_current'
+LANGUAGE 'C' WITH (isstrict, iscachable);
+
+--GiST
+--GiST key type 
+CREATE FUNCTION gtsvector_in(cstring)
+RETURNS gtsvector
+AS 'MODULE_PATHNAME'
+LANGUAGE 'C' with (isstrict);
+
+CREATE FUNCTION gtsvector_out(gtsvector)
+RETURNS cstring
+AS 'MODULE_PATHNAME'
+LANGUAGE 'C' with (isstrict);
+
+CREATE TYPE gtsvector (
+        INTERNALLENGTH = -1,
+        INPUT = gtsvector_in,
+        OUTPUT = gtsvector_out
+);
+
+-- support FUNCTIONs
+CREATE FUNCTION gtsvector_consistent(gtsvector,internal,int4)
+RETURNS bool
+AS 'MODULE_PATHNAME'
+LANGUAGE 'C';
+  
+CREATE FUNCTION gtsvector_compress(internal)
+RETURNS internal
+AS 'MODULE_PATHNAME'
+LANGUAGE 'C';
+
+CREATE FUNCTION gtsvector_decompress(internal)
+RETURNS internal
+AS 'MODULE_PATHNAME'
+LANGUAGE 'C';
+
+CREATE FUNCTION gtsvector_penalty(internal,internal,internal)
+RETURNS internal
+AS 'MODULE_PATHNAME'
+LANGUAGE 'C' with (isstrict);
+
+CREATE FUNCTION gtsvector_picksplit(internal, internal)
+RETURNS internal
+AS 'MODULE_PATHNAME'
+LANGUAGE 'C';
+
+CREATE FUNCTION gtsvector_union(bytea, internal)
+RETURNS _int4
+AS 'MODULE_PATHNAME'
+LANGUAGE 'C';
+
+CREATE FUNCTION gtsvector_same(gtsvector, gtsvector, internal)
+RETURNS internal
+AS 'MODULE_PATHNAME'
+LANGUAGE 'C';
+
+-- CREATE the OPERATOR class
+CREATE OPERATOR CLASS gist_tsvector_ops
+DEFAULT FOR TYPE tsvector USING gist
+AS
+        OPERATOR        1       @@ (tsvector, tsquery)  RECHECK ,
+        FUNCTION        1       gtsvector_consistent (gtsvector, internal, int4),
+        FUNCTION        2       gtsvector_union (bytea, internal),
+        FUNCTION        3       gtsvector_compress (internal),
+        FUNCTION        4       gtsvector_decompress (internal),
+        FUNCTION        5       gtsvector_penalty (internal, internal, internal),
+        FUNCTION        6       gtsvector_picksplit (internal, internal),
+        FUNCTION        7       gtsvector_same (gtsvector, gtsvector, internal),
+        STORAGE         gtsvector;
+
+
+--stat info
+CREATE TYPE statinfo 
+   as (word text, ndoc int4, nentry int4);
+
+--REATE FUNCTION tsstat_in(cstring)
+--RETURNS tsstat
+--AS 'MODULE_PATHNAME'
+--LANGUAGE 'C' with (isstrict);
+--
+--CREATE FUNCTION tsstat_out(tsstat)
+--RETURNS cstring
+--AS 'MODULE_PATHNAME'
+--LANGUAGE 'C' with (isstrict);
+--
+--CREATE TYPE tsstat (
+--        INTERNALLENGTH = -1,
+--        INPUT = tsstat_in,
+--        OUTPUT = tsstat_out,
+--        STORAGE = plain
+--);
+--
+--CREATE FUNCTION ts_accum(tsstat,tsvector)
+--RETURNS tsstat
+--AS 'MODULE_PATHNAME'
+--LANGUAGE 'C' with (isstrict);
+--
+--CREATE FUNCTION ts_accum_finish(tsstat)
+-- returns setof statinfo
+-- as 'MODULE_PATHNAME'
+-- language 'C'
+-- with (isstrict);
+--
+--CREATE AGGREGATE stat (
+-- BASETYPE=tsvector,
+-- SFUNC=ts_accum,
+-- STYPE=tsstat,
+-- FINALFUNC = ts_accum_finish,
+-- initcond = ''
+--); 
+
+CREATE FUNCTION stat(text)
+   returns setof statinfo
+   as 'MODULE_PATHNAME', 'ts_stat'
+   language 'C'
+   with (isstrict);
+
+--reset - just for debuging
+CREATE FUNCTION reset_tsearch()
+        returns void
+        as 'MODULE_PATHNAME'
+        language 'C'
+        with (isstrict);
+
+--get cover (debug for rank_cd)
+CREATE FUNCTION get_covers(tsvector,tsquery)
+        returns text
+        as 'MODULE_PATHNAME'
+        language 'C'
+        with (isstrict);
+
+
+--example of ISpell dictionary
+--update pg_ts_dict set dict_initoption='DictFile="/usr/local/share/ispell/russian.dict" ,AffFile ="/usr/local/share/ispell/russian.aff", StopFile="/usr/local/share/ispell/russian.stop"' where dict_id=4;
+--example of synonym dict
+--update pg_ts_dict set dict_initoption='/usr/local/share/ispell/english.syn' where dict_id=5;
+END;


diff --git a/contrib/tsearch2/tsvector.c b/contrib/tsearch2/tsvector.c

new file mode 100644 (file)

index 0000000..ff0794d


--- /dev/null
+++ b/contrib/tsearch2/tsvector.c
@@ -0,0 +1,804 @@
+/*
+ * In/Out definitions for tsvector type
+ * Internal structure:
+ * string of values, array of position lexem in string and it's length
+ * Teodor Sigaev 
+ */
+#include "postgres.h"
+
+#include "access/gist.h"
+#include "access/itup.h"
+#include "utils/elog.h"
+#include "utils/palloc.h"
+#include "utils/builtins.h"
+#include "storage/bufpage.h"
+#include "executor/spi.h"
+#include "commands/trigger.h"
+#include "nodes/pg_list.h"
+#include "catalog/namespace.h"
+
+#include "utils/pg_locale.h"
+
+#include              /* tolower */
+#include "tsvector.h"
+#include "query.h"
+#include "ts_cfg.h"
+#include "common.h"
+
+PG_FUNCTION_INFO_V1(tsvector_in);
+Datum      tsvector_in(PG_FUNCTION_ARGS);
+
+PG_FUNCTION_INFO_V1(tsvector_out);
+Datum      tsvector_out(PG_FUNCTION_ARGS);
+
+PG_FUNCTION_INFO_V1(to_tsvector);
+Datum      to_tsvector(PG_FUNCTION_ARGS);
+PG_FUNCTION_INFO_V1(to_tsvector_current);
+Datum      to_tsvector_current(PG_FUNCTION_ARGS);
+PG_FUNCTION_INFO_V1(to_tsvector_name);
+Datum      to_tsvector_name(PG_FUNCTION_ARGS);
+
+PG_FUNCTION_INFO_V1(tsearch2);
+Datum      tsearch2(PG_FUNCTION_ARGS);
+
+PG_FUNCTION_INFO_V1(tsvector_length);
+Datum      tsvector_length(PG_FUNCTION_ARGS);
+
+/*
+ * in/out text index type
+ */
+static int 
+comparePos(const void *a, const void *b) {
+   if ( ((WordEntryPos *) a)->pos == ((WordEntryPos *) b)->pos )
+       return 1;
+   return ( ((WordEntryPos *) a)->pos > ((WordEntryPos *) b)->pos ) ? 1 : -1;
+}
+
+static int
+uniquePos(WordEntryPos *a, int4 l) {
+   WordEntryPos *ptr, *res;
+
+   res=a;
+   if (l==1)
+       return l;
+
+   qsort((void *) a, l, sizeof(WordEntryPos), comparePos);
+
+   ptr = a + 1;
+   while (ptr - a < l) {
+       if ( ptr->pos != res->pos ) {
+           res++;
+           res->pos = ptr->pos;
+           res->weight = ptr->weight;
+           if ( res-a >= MAXNUMPOS-1 || res->pos == MAXENTRYPOS-1 )
+               break;
+       } else if ( ptr->weight > res->weight )
+           res->weight = ptr->weight;
+       ptr++;
+   }
+   return res + 1 - a;
+}
+
+static char *BufferStr;
+static int
+compareentry(const void *a, const void *b)
+{
+   if ( ((WordEntryIN *) a)->entry.len == ((WordEntryIN *) b)->entry.len)
+   {
+       return strncmp(
+                      &BufferStr[((WordEntryIN *) a)->entry.pos],
+                      &BufferStr[((WordEntryIN *) b)->entry.pos],
+                      ((WordEntryIN *) a)->entry.len);
+   }
+   return ( ((WordEntryIN *) a)->entry.len > ((WordEntryIN *) b)->entry.len ) ? 1 : -1;
+}
+
+static int
+uniqueentry(WordEntryIN * a, int4 l, char *buf, int4 *outbuflen)
+{
+   WordEntryIN  *ptr,
+              *res;
+
+   res = a;
+   if (l == 1) {
+       if ( a->entry.haspos ) {
+           *(uint16*)(a->pos) = uniquePos( &(a->pos[1]), *(uint16*)(a->pos));
+           *outbuflen = SHORTALIGN(res->entry.len) + (*(uint16*)(a->pos) +1 )*sizeof(WordEntryPos);
+       }
+       return l;
+   }
+
+   ptr = a + 1;
+   BufferStr = buf;
+   qsort((void *) a, l, sizeof(WordEntryIN), compareentry);
+
+   while (ptr - a < l)
+   {
+       if (!(ptr->entry.len == res->entry.len &&
+             strncmp(&buf[ptr->entry.pos], &buf[res->entry.pos], res->entry.len) == 0))
+       {
+           if ( res->entry.haspos ) {
+               *(uint16*)(res->pos) = uniquePos( &(res->pos[1]), *(uint16*)(res->pos));
+               *outbuflen += *(uint16*)(res->pos) * sizeof(WordEntryPos);
+           }
+           *outbuflen += SHORTALIGN(res->entry.len);
+           res++;
+           memcpy(res,ptr,sizeof(WordEntryIN));
+       } else if ( ptr->entry.haspos ){
+           if ( res->entry.haspos ) {
+               int4 len=*(uint16*)(ptr->pos) + 1 + *(uint16*)(res->pos);
+               res->pos=(WordEntryPos*)repalloc( res->pos, len*sizeof(WordEntryPos));
+               memcpy( &(res->pos[ *(uint16*)(res->pos) + 1 ]), 
+                   &(ptr->pos[1]), *(uint16*)(ptr->pos) * sizeof(WordEntryPos));
+               *(uint16*)(res->pos) += *(uint16*)(ptr->pos);
+               pfree( ptr->pos );
+           } else {
+               res->entry.haspos=1;
+               res->pos = ptr->pos;
+           }
+       }
+       ptr++;
+   }
+   if ( res->entry.haspos ) {
+       *(uint16*)(res->pos) = uniquePos( &(res->pos[1]), *(uint16*)(res->pos));
+       *outbuflen += *(uint16*)(res->pos) * sizeof(WordEntryPos);
+   }
+   *outbuflen += SHORTALIGN(res->entry.len);
+
+   return res + 1 - a;
+}
+
+#define WAITWORD   1
+#define WAITENDWORD 2
+#define WAITNEXTCHAR   3
+#define WAITENDCMPLX   4
+#define WAITPOSINFO    5
+#define INPOSINFO  6
+#define WAITPOSDELIM   7
+
+#define RESIZEPRSBUF \
+do { \
+   if ( state->curpos - state->word + 1 >= state->len ) \
+   { \
+       int4 clen = state->curpos - state->word; \
+       state->len *= 2; \
+       state->word = (char*)repalloc( (void*)state->word, state->len ); \
+       state->curpos = state->word + clen; \
+   } \
+} while (0)
+
+int4
+gettoken_tsvector(TI_IN_STATE * state)
+{
+   int4        oldstate = 0;
+
+   state->curpos = state->word;
+   state->state = WAITWORD;
+   state->alen=0;
+
+   while (1)
+   {
+       if (state->state == WAITWORD)
+       {
+           if (*(state->prsbuf) == '\0')
+               return 0;
+           else if (*(state->prsbuf) == '\'')
+               state->state = WAITENDCMPLX;
+           else if (*(state->prsbuf) == '\\')
+           {
+               state->state = WAITNEXTCHAR;
+               oldstate = WAITENDWORD;
+           }
+           else if (state->oprisdelim && ISOPERATOR(*(state->prsbuf)))
+               elog(ERROR, "Syntax error");
+           else if (*(state->prsbuf) != ' ')
+           {
+               *(state->curpos) = *(state->prsbuf);
+               state->curpos++;
+               state->state = WAITENDWORD;
+           }
+       }
+       else if (state->state == WAITNEXTCHAR)
+       {
+           if (*(state->prsbuf) == '\0')
+               elog(ERROR, "There is no escaped character");
+           else
+           {
+               RESIZEPRSBUF;
+               *(state->curpos) = *(state->prsbuf);
+               state->curpos++;
+               state->state = oldstate;
+           }
+       }
+       else if (state->state == WAITENDWORD)
+       {
+           if (*(state->prsbuf) == '\\')
+           {
+               state->state = WAITNEXTCHAR;
+               oldstate = WAITENDWORD;
+           }
+           else if (*(state->prsbuf) == ' ' || *(state->prsbuf) == '\0' ||
+                    (state->oprisdelim && ISOPERATOR(*(state->prsbuf))))
+           {
+               RESIZEPRSBUF;
+               if (state->curpos == state->word)
+                   elog(ERROR, "Syntax error");
+               *(state->curpos) = '\0';
+               return 1; 
+           } else if ( *(state->prsbuf) == ':' ) {
+               if (state->curpos == state->word)
+                   elog(ERROR, "Syntax error");
+               *(state->curpos) = '\0';
+               if ( state->oprisdelim )
+                   return 1;
+               else
+                   state->state = INPOSINFO;
+           }
+           else
+           {
+               RESIZEPRSBUF;
+               *(state->curpos) = *(state->prsbuf);
+               state->curpos++;
+           }
+       }
+       else if (state->state == WAITENDCMPLX)
+       {
+           if (*(state->prsbuf) == '\'')
+           {
+               RESIZEPRSBUF;
+               *(state->curpos) = '\0';
+               if (state->curpos == state->word)
+                   elog(ERROR, "Syntax error");
+               if ( state->oprisdelim ) {
+                   state->prsbuf++;
+                   return 1;
+               } else
+                   state->state = WAITPOSINFO;
+           }
+           else if (*(state->prsbuf) == '\\')
+           {
+               state->state = WAITNEXTCHAR;
+               oldstate = WAITENDCMPLX;
+           }
+           else if (*(state->prsbuf) == '\0')
+               elog(ERROR, "Syntax error");
+           else
+           {
+               RESIZEPRSBUF;
+               *(state->curpos) = *(state->prsbuf);
+               state->curpos++;
+           }
+       } else if (state->state == WAITPOSINFO) {
+           if ( *(state->prsbuf) == ':' )
+               state->state=INPOSINFO;
+           else
+               return 1;
+       } else if (state->state == INPOSINFO) {
+           if ( isdigit(*(state->prsbuf)) ) {
+               if ( state->alen==0 ) {
+                   state->alen=4;
+                   state->pos = (WordEntryPos*)palloc( sizeof(WordEntryPos)*state->alen );
+                   *(uint16*)(state->pos)=0;
+               } else if ( *(uint16*)(state->pos) +1 >= state->alen ) {
+                   state->alen *= 2; 
+                   state->pos = (WordEntryPos*)repalloc( state->pos, sizeof(WordEntryPos)*state->alen );
+               }
+               (  *(uint16*)(state->pos) )++;
+               state->pos[ *(uint16*)(state->pos) ].pos = LIMITPOS(atoi(state->prsbuf));
+               if ( state->pos[ *(uint16*)(state->pos) ].pos == 0 )
+                   elog(ERROR,"Wrong position info");
+               state->pos[ *(uint16*)(state->pos) ].weight = 0;
+               state->state = WAITPOSDELIM;
+           } else
+               elog(ERROR,"Syntax error");
+       } else if (state->state == WAITPOSDELIM) {
+           if ( *(state->prsbuf) == ',' ) {
+               state->state = INPOSINFO;
+           } else if ( tolower(*(state->prsbuf)) == 'a' || *(state->prsbuf)=='*' ) {
+               if ( state->pos[ *(uint16*)(state->pos) ].weight )
+                   elog(ERROR,"Syntax error");
+               state->pos[ *(uint16*)(state->pos) ].weight = 3;
+           } else if ( tolower(*(state->prsbuf)) == 'b' ) {
+               if ( state->pos[ *(uint16*)(state->pos) ].weight )
+                   elog(ERROR,"Syntax error");
+               state->pos[ *(uint16*)(state->pos) ].weight = 2;
+           } else if ( tolower(*(state->prsbuf)) == 'c' ) {
+               if ( state->pos[ *(uint16*)(state->pos) ].weight )
+                   elog(ERROR,"Syntax error");
+               state->pos[ *(uint16*)(state->pos) ].weight = 1;
+           } else if ( tolower(*(state->prsbuf)) == 'd' ) {
+               if ( state->pos[ *(uint16*)(state->pos) ].weight )
+                   elog(ERROR,"Syntax error");
+               state->pos[ *(uint16*)(state->pos) ].weight = 0;
+           } else if ( isspace(*(state->prsbuf)) || *(state->prsbuf) == '\0' ) {
+               return 1;
+           } else if ( !isdigit(*(state->prsbuf)) )
+               elog(ERROR,"Syntax error");
+       } else
+           elog(ERROR, "Inner bug :(");
+       state->prsbuf++;
+   }
+
+   return 0;
+}
+
+Datum
+tsvector_in(PG_FUNCTION_ARGS)
+{
+   char       *buf = PG_GETARG_CSTRING(0);
+   TI_IN_STATE state;
+   WordEntryIN  *arr;
+   WordEntry  *inarr;
+   int4        len = 0,
+               totallen = 64;
+   tsvector       *in;
+   char       *tmpbuf,
+              *cur;
+   int4        i,
+               buflen = 256;
+
+   state.prsbuf = buf;
+   state.len = 32;
+   state.word = (char *) palloc(state.len);
+   state.oprisdelim = false;
+
+   arr = (WordEntryIN *) palloc(sizeof(WordEntryIN) * totallen);
+   cur = tmpbuf = (char *) palloc(buflen);
+   while (gettoken_tsvector(&state))
+   {
+       if (len >= totallen)
+       {
+           totallen *= 2;
+           arr = (WordEntryIN *) repalloc((void *) arr, sizeof(WordEntryIN) * totallen);
+       }
+       while ((cur - tmpbuf) + (state.curpos - state.word) >= buflen)
+       {
+           int4        dist = cur - tmpbuf;
+
+           buflen *= 2;
+           tmpbuf = (char *) repalloc((void *) tmpbuf, buflen);
+           cur = tmpbuf + dist;
+       }
+       if (state.curpos - state.word >= MAXSTRLEN)
+           elog(ERROR, "Word is too long");
+       arr[len].entry.len= state.curpos - state.word;
+       if (cur - tmpbuf > MAXSTRPOS)
+           elog(ERROR, "Too long value");
+       arr[len].entry.pos=cur - tmpbuf;
+       memcpy((void *) cur, (void *) state.word, arr[len].entry.len);
+       cur += arr[len].entry.len;
+       if ( state.alen ) {
+           arr[len].entry.haspos=1;
+           arr[len].pos = state.pos;
+       } else
+           arr[len].entry.haspos=0;
+       len++;
+   }
+   pfree(state.word);
+
+   if ( len > 0 )
+       len = uniqueentry(arr, len, tmpbuf, &buflen);
+   totallen = CALCDATASIZE(len, buflen);
+   in = (tsvector *) palloc(totallen);
+   memset(in,0,totallen);
+   in->len = totallen;
+   in->size = len;
+   cur = STRPTR(in);
+   inarr = ARRPTR(in);
+   for (i = 0; i < len; i++)
+   {
+       memcpy((void *) cur, (void *) &tmpbuf[arr[i].entry.pos], arr[i].entry.len);
+       arr[i].entry.pos=cur - STRPTR(in);
+       cur += SHORTALIGN(arr[i].entry.len);
+       if ( arr[i].entry.haspos ) {
+           memcpy( cur, arr[i].pos, (*(uint16*)arr[i].pos + 1) * sizeof(WordEntryPos));
+           cur +=  (*(uint16*)arr[i].pos + 1) * sizeof(WordEntryPos);
+           pfree( arr[i].pos ); 
+       }
+       memcpy( &(inarr[i]), &(arr[i].entry), sizeof(WordEntry) );
+   }
+   pfree(tmpbuf);
+   pfree(arr);
+   PG_RETURN_POINTER(in);
+}
+
+Datum
+tsvector_length(PG_FUNCTION_ARGS)
+{
+   tsvector       *in = (tsvector *) PG_DETOAST_DATUM(PG_GETARG_DATUM(0));
+   int4        ret = in->size;
+
+   PG_FREE_IF_COPY(in, 0);
+   PG_RETURN_INT32(ret);
+}
+
+Datum
+tsvector_out(PG_FUNCTION_ARGS)
+{
+   tsvector       *out = (tsvector *) PG_DETOAST_DATUM(PG_GETARG_DATUM(0));
+   char       *outbuf;
+   int4        i,
+               j,
+               lenbuf = 0, pp;
+   WordEntry  *ptr = ARRPTR(out);
+   char       *curin,
+              *curout;
+
+       lenbuf=out->size * 2 /* '' */ + out->size - 1 /* space */ + 2 /*\0*/;
+       for (i = 0; i < out->size; i++) {
+               lenbuf += ptr[i].len*2 /*for escape */;
+               if ( ptr[i].haspos )
+                       lenbuf += 7*POSDATALEN(out, &(ptr[i]));
+       }
+
+   curout = outbuf = (char *) palloc(lenbuf);
+   for (i = 0; i < out->size; i++)
+   {
+       curin = STRPTR(out)+ptr->pos;
+       if (i != 0)
+           *curout++ = ' ';
+       *curout++ = '\'';
+       j = ptr->len;
+       while (j--)
+       {
+           if (*curin == '\'')
+           {
+               int4        pos = curout - outbuf;
+
+               outbuf = (char *) repalloc((void *) outbuf, ++lenbuf);
+               curout = outbuf + pos;
+               *curout++ = '\\';
+           }
+           *curout++ = *curin++;
+       }
+       *curout++ = '\'';
+       if ( (pp=POSDATALEN(out,ptr)) != 0 ) {
+           WordEntryPos *wptr;
+           *curout++ = ':';
+           wptr=POSDATAPTR(out,ptr);
+           while(pp) {
+               sprintf(curout,"%d",wptr->pos);
+               curout=strchr(curout,'\0');
+               switch( wptr->weight ) {
+                   case 3:   *curout++ = 'A'; break;
+                   case 2:   *curout++ = 'B'; break;
+                   case 1:   *curout++ = 'C'; break;
+                   case 0: 
+                   default: break;
+               }
+               if ( pp>1 )     *curout++ = ',';
+               pp--; wptr++;
+           }
+       }
+       ptr++;
+   }
+   *curout='\0';
+   outbuf[lenbuf - 1] = '\0';
+   PG_FREE_IF_COPY(out, 0);
+   PG_RETURN_POINTER(outbuf);
+}
+
+static int
+compareWORD(const void *a, const void *b)
+{
+   if (((WORD *) a)->len == ((WORD *) b)->len) {
+       int res = strncmp(
+                      ((WORD *) a)->word,
+                      ((WORD *) b)->word,
+                      ((WORD *) b)->len);
+       if ( res==0 ) 
+           return ( ((WORD *) a)->pos.pos > ((WORD *) b)->pos.pos ) ? 1 : -1;
+       return res;
+   }
+   return (((WORD *) a)->len > ((WORD *) b)->len) ? 1 : -1;
+}
+
+static int
+uniqueWORD(WORD * a, int4 l)
+{
+   WORD       *ptr,
+              *res;
+   int tmppos;
+
+   if (l == 1) {
+       tmppos=LIMITPOS(a->pos.pos);
+       a->alen=2;
+       a->pos.apos=(uint16*)palloc( sizeof(uint16)*a->alen );
+       a->pos.apos[0]=1;
+       a->pos.apos[1]=tmppos;
+       return l;
+   }
+
+   res = a;
+   ptr = a + 1;
+
+   qsort((void *) a, l, sizeof(WORD), compareWORD);
+   tmppos=LIMITPOS(a->pos.pos);
+   a->alen=2;
+   a->pos.apos=(uint16*)palloc( sizeof(uint16)*a->alen );
+   a->pos.apos[0]=1;
+   a->pos.apos[1]=tmppos;
+
+   while (ptr - a < l)
+   {
+       if (!(ptr->len == res->len &&
+             strncmp(ptr->word, res->word, res->len) == 0))
+       {
+           res++;
+           res->len = ptr->len;
+           res->word = ptr->word;
+           tmppos=LIMITPOS(ptr->pos.pos);
+           res->alen=2;
+           res->pos.apos=(uint16*)palloc( sizeof(uint16)*res->alen );
+           res->pos.apos[0]=1;
+           res->pos.apos[1]=tmppos;
+       } else {
+           pfree(ptr->word);
+           if ( res->pos.apos[0] < MAXNUMPOS-1 && res->pos.apos[ res->pos.apos[0] ] != MAXENTRYPOS-1 ) {
+               if ( res->pos.apos[0]+1 >= res->alen ) {
+                   res->alen*=2;
+                   res->pos.apos=(uint16*)repalloc( res->pos.apos, sizeof(uint16)*res->alen );
+               }
+               res->pos.apos[ res->pos.apos[0]+1 ] = LIMITPOS(ptr->pos.pos);
+               res->pos.apos[0]++; 
+           }
+       }
+       ptr++;
+   }
+
+   return res + 1 - a;
+}
+
+/*
+ * make value of tsvector
+ */
+static tsvector *
+makevalue(PRSTEXT * prs)
+{
+   int4        i,j,
+               lenstr = 0,
+               totallen;
+   tsvector       *in;
+   WordEntry  *ptr;
+   char       *str,
+              *cur;
+
+   prs->curwords = uniqueWORD(prs->words, prs->curwords);
+   for (i = 0; i < prs->curwords; i++) {
+       lenstr += SHORTALIGN(prs->words[i].len);
+
+       if ( prs->words[i].alen )
+           lenstr += sizeof(uint16) + prs->words[i].pos.apos[0] * sizeof(WordEntryPos);
+   }
+
+   totallen = CALCDATASIZE(prs->curwords, lenstr);
+   in = (tsvector *) palloc(totallen);
+   memset(in,0,totallen);  
+   in->len = totallen;
+   in->size = prs->curwords;
+
+   ptr = ARRPTR(in);
+   cur = str = STRPTR(in);
+   for (i = 0; i < prs->curwords; i++)
+   {
+       ptr->len = prs->words[i].len;
+       if (cur - str > MAXSTRPOS)
+           elog(ERROR, "Value is too big");
+       ptr->pos= cur - str;
+       memcpy((void *) cur, (void *) prs->words[i].word, prs->words[i].len);
+       pfree(prs->words[i].word);
+       cur += SHORTALIGN(prs->words[i].len);
+       if ( prs->words[i].alen ) {
+           WordEntryPos *wptr;
+           
+           ptr->haspos=1;
+           *(uint16*)cur = prs->words[i].pos.apos[0];
+           wptr=POSDATAPTR(in,ptr);
+           for(j=0;j<*(uint16*)cur;j++) {
+               wptr[j].weight=0;
+               wptr[j].pos=prs->words[i].pos.apos[j+1];
+           }
+           cur += sizeof(uint16) + prs->words[i].pos.apos[0] * sizeof(WordEntryPos);
+           pfree(prs->words[i].pos.apos);
+       } else
+           ptr->haspos=0;
+       ptr++;
+   }
+   pfree(prs->words);
+   return in;
+}
+
+
+Datum
+to_tsvector(PG_FUNCTION_ARGS)
+{
+   text       *in = PG_GETARG_TEXT_P(1);
+   PRSTEXT     prs;
+   tsvector       *out = NULL;
+   TSCfgInfo *cfg=findcfg(PG_GETARG_INT32(0)); 
+
+   prs.lenwords = 32;
+   prs.curwords = 0;
+   prs.pos = 0;
+   prs.words = (WORD *) palloc(sizeof(WORD) * prs.lenwords);
+   
+   parsetext_v2(cfg, &prs, VARDATA(in), VARSIZE(in) - VARHDRSZ);
+   PG_FREE_IF_COPY(in, 1);
+
+   if (prs.curwords)
+       out = makevalue(&prs);
+   else {
+       pfree(prs.words);
+       out = palloc(CALCDATASIZE(0,0));
+       out->len = CALCDATASIZE(0,0);
+       out->size = 0;
+   } 
+   PG_RETURN_POINTER(out);
+}
+
+Datum
+to_tsvector_name(PG_FUNCTION_ARGS) {
+   text       *cfg=PG_GETARG_TEXT_P(0);
+   Datum res = DirectFunctionCall3(
+       to_tsvector,
+       Int32GetDatum( name2id_cfg( cfg ) ),
+       PG_GETARG_DATUM(1),
+       (Datum)0
+   );
+   PG_FREE_IF_COPY(cfg,0);
+   PG_RETURN_DATUM(res);   
+}
+
+Datum
+to_tsvector_current(PG_FUNCTION_ARGS) {
+   Datum res = DirectFunctionCall3(
+       to_tsvector,
+       Int32GetDatum( get_currcfg() ),
+       PG_GETARG_DATUM(0),
+       (Datum)0
+   );
+   PG_RETURN_DATUM(res);   
+}
+
+static Oid
+findFunc(char *fname) {
+   FuncCandidateList clist,ptr;
+   Oid funcid = InvalidOid;
+   List *names=makeList1(makeString(fname));
+
+   ptr = clist = FuncnameGetCandidates(names, 1);
+   freeList(names);
+
+   if ( !ptr )
+       return funcid;
+
+   while(ptr) {
+       if ( ptr->args[0] == TEXTOID && funcid == InvalidOid )
+           funcid=ptr->oid;
+       clist=ptr->next;
+       pfree(ptr);
+       ptr=clist;
+   }
+
+   return funcid;
+}
+
+/*
+ * Trigger
+ */
+Datum
+tsearch2(PG_FUNCTION_ARGS)
+{
+   TriggerData *trigdata;
+   Trigger    *trigger;
+   Relation    rel;
+   HeapTuple   rettuple = NULL;
+   TSCfgInfo *cfg=findcfg(get_currcfg()); 
+   int         numidxattr,
+               i;
+   PRSTEXT     prs;
+   Datum       datum = (Datum) 0;
+   Oid     funcoid = InvalidOid;
+
+   if (!CALLED_AS_TRIGGER(fcinfo))
+       elog(ERROR, "TSearch: Not fired by trigger manager");
+
+   trigdata = (TriggerData *) fcinfo->context;
+   if (TRIGGER_FIRED_FOR_STATEMENT(trigdata->tg_event))
+       elog(ERROR, "TSearch: Can't process STATEMENT events");
+   if (TRIGGER_FIRED_AFTER(trigdata->tg_event))
+       elog(ERROR, "TSearch: Must be fired BEFORE event");
+
+   if (TRIGGER_FIRED_BY_INSERT(trigdata->tg_event))
+       rettuple = trigdata->tg_trigtuple;
+   else if (TRIGGER_FIRED_BY_UPDATE(trigdata->tg_event))
+       rettuple = trigdata->tg_newtuple;
+   else
+       elog(ERROR, "TSearch: Unknown event");
+
+   trigger = trigdata->tg_trigger;
+   rel = trigdata->tg_relation;
+
+   if (trigger->tgnargs < 2)
+       elog(ERROR, "TSearch: format tsearch2(tsvector_field, text_field1,...)");
+
+   numidxattr = SPI_fnumber(rel->rd_att, trigger->tgargs[0]);
+   if (numidxattr == SPI_ERROR_NOATTRIBUTE)
+       elog(ERROR, "TSearch: Can not find tsvector_field");
+
+   prs.lenwords = 32;
+   prs.curwords = 0;
+   prs.pos = 0;
+   prs.words = (WORD *) palloc(sizeof(WORD) * prs.lenwords);
+
+   /* find all words in indexable column */
+   for (i = 1; i < trigger->tgnargs; i++)
+   {
+       int         numattr;
+       Oid         oidtype;
+       Datum       txt_toasted;
+       bool        isnull;
+       text       *txt;
+
+       numattr = SPI_fnumber(rel->rd_att, trigger->tgargs[i]);
+       if (numattr == SPI_ERROR_NOATTRIBUTE)
+       {
+           funcoid=findFunc(trigger->tgargs[i]);
+           if ( funcoid==InvalidOid )
+               elog(ERROR,"TSearch: can't find function or field '%s'",trigger->tgargs[i]);
+           continue;
+       }
+       oidtype = SPI_gettypeid(rel->rd_att, numattr);
+       /* We assume char() and varchar() are binary-equivalent to text */
+       if (!(oidtype == TEXTOID ||
+             oidtype == VARCHAROID ||
+             oidtype == BPCHAROID))
+       {
+           elog(WARNING, "TSearch: '%s' is not of character type",
+                trigger->tgargs[i]);
+           continue;
+       }
+       txt_toasted = SPI_getbinval(rettuple, rel->rd_att, numattr, &isnull);
+       if (isnull)
+           continue;
+
+       if ( funcoid!=InvalidOid ) {
+           text *txttmp = (text *) DatumGetPointer( OidFunctionCall1(
+               funcoid,
+               PointerGetDatum(txt_toasted)
+           ));
+           txt = (text *) DatumGetPointer(PG_DETOAST_DATUM(PointerGetDatum(txttmp)));
+           if ( txt == txttmp )
+               txt_toasted = PointerGetDatum(txt);
+       } else
+            txt = (text *) DatumGetPointer(PG_DETOAST_DATUM(PointerGetDatum(txt_toasted)));
+
+       parsetext_v2(cfg, &prs, VARDATA(txt), VARSIZE(txt) - VARHDRSZ);
+       if (txt != (text*)DatumGetPointer(txt_toasted) )
+           pfree(txt);
+   }
+
+   /* make tsvector value */
+   if (prs.curwords)
+   {
+       datum = PointerGetDatum(makevalue(&prs));
+       rettuple = SPI_modifytuple(rel, rettuple, 1, &numidxattr,
+                                  &datum, NULL);
+       pfree(DatumGetPointer(datum));
+   }
+   else
+   {
+       tsvector *out = palloc(CALCDATASIZE(0,0));
+       out->len = CALCDATASIZE(0,0);
+       out->size = 0;
+       datum = PointerGetDatum(out);
+       pfree(prs.words);
+       rettuple = SPI_modifytuple(rel, rettuple, 1, &numidxattr,
+                                  &datum, NULL);
+   }
+
+   if (rettuple == NULL)
+       elog(ERROR, "TSearch: %d returned by SPI_modifytuple", SPI_result);
+
+   return PointerGetDatum(rettuple);
+}


diff --git a/contrib/tsearch2/tsvector.h b/contrib/tsearch2/tsvector.h

new file mode 100644 (file)

index 0000000..31e6a4b


--- /dev/null
+++ b/contrib/tsearch2/tsvector.h
@@ -0,0 +1,71 @@
+#ifndef __TXTIDX_H__
+#define __TXTIDX_H__
+
+/*
+#define TXTIDX_DEBUG
+*/
+
+#include "postgres.h"
+
+#include "access/gist.h"
+#include "access/itup.h"
+#include "utils/elog.h"
+#include "utils/palloc.h"
+#include "utils/builtins.h"
+#include "storage/bufpage.h"
+
+typedef struct {
+   uint32
+       haspos:1,
+       len:11, /* MAX 2Kb */
+       pos:20; /* MAX 1Mb */
+}  WordEntry;
+#define MAXSTRLEN ( 1<<11 )
+#define MAXSTRPOS ( 1<<20 )
+
+typedef struct {
+   uint16
+       weight:2,
+       pos:14;
+} WordEntryPos;
+#define MAXENTRYPOS    (1<<14)
+#define MAXNUMPOS  256
+#define LIMITPOS(x)    ( ( (x) >= MAXENTRYPOS ) ? (MAXENTRYPOS-1) : (x) )
+
+typedef struct
+{
+   int4        len;
+   int4        size;
+   char        data[1];
+}  tsvector;
+
+#define DATAHDRSIZE (sizeof(int4)*2)
+#define CALCDATASIZE(x, lenstr) ( x * sizeof(WordEntry) + DATAHDRSIZE + lenstr )
+#define ARRPTR(x)  ( (WordEntry*) ( (char*)x + DATAHDRSIZE ) )
+#define STRPTR(x)  ( (char*)x + DATAHDRSIZE + ( sizeof(WordEntry) * ((tsvector*)x)->size ) )
+#define STRSIZE(x) ( ((tsvector*)x)->len - DATAHDRSIZE - ( sizeof(WordEntry) * ((tsvector*)x)->size ) )
+#define _POSDATAPTR(x,e)   (STRPTR(x)+((WordEntry*)(e))->pos+SHORTALIGN(((WordEntry*)(e))->len))
+#define POSDATALEN(x,e) ( ( ((WordEntry*)(e))->haspos ) ? (*(uint16*)_POSDATAPTR(x,e)) : 0 ) 
+#define POSDATAPTR(x,e)    ( (WordEntryPos*)( _POSDATAPTR(x,e)+sizeof(uint16) ) )
+
+
+typedef struct {
+   WordEntry   entry;
+   WordEntryPos    *pos;
+}  WordEntryIN;
+
+typedef struct
+{
+   char       *prsbuf;
+   char       *word;
+   char       *curpos;
+   int4        len;
+   int4        state;
+   int4        alen;
+   WordEntryPos    *pos;
+   bool        oprisdelim;
+}  TI_IN_STATE;
+
+int4       gettoken_tsvector(TI_IN_STATE * state);
+
+#endif


diff --git a/contrib/tsearch2/tsvector_op.c b/contrib/tsearch2/tsvector_op.c

new file mode 100644 (file)

index 0000000..3f38014


--- /dev/null
+++ b/contrib/tsearch2/tsvector_op.c
@@ -0,0 +1,264 @@
+/*
+ * Operations for tsvector type
+ * Teodor Sigaev 
+ */
+#include "postgres.h"
+
+#include "access/gist.h"
+#include "access/itup.h"
+#include "utils/elog.h"
+#include "utils/palloc.h"
+#include "utils/builtins.h"
+#include "storage/bufpage.h"
+#include "executor/spi.h"
+#include "commands/trigger.h"
+#include "nodes/pg_list.h"
+#include "catalog/namespace.h"
+
+#include "utils/pg_locale.h"
+
+#include              /* tolower */
+#include "tsvector.h"
+#include "query.h"
+#include "ts_cfg.h"
+#include "common.h"
+
+PG_FUNCTION_INFO_V1(strip);
+Datum      strip(PG_FUNCTION_ARGS);
+
+PG_FUNCTION_INFO_V1(setweight);
+Datum      setweight(PG_FUNCTION_ARGS);
+
+PG_FUNCTION_INFO_V1(concat);
+Datum      concat(PG_FUNCTION_ARGS);
+
+Datum
+strip(PG_FUNCTION_ARGS)
+{
+   tsvector       *in = (tsvector *) PG_DETOAST_DATUM(PG_GETARG_DATUM(0));
+   tsvector    *out;
+   int i,len=0;
+   WordEntry *arrin=ARRPTR(in), *arrout;
+   char *cur;
+
+   for(i=0;isize;i++) 
+       len += SHORTALIGN( arrin[i].len );
+
+   len = CALCDATASIZE(in->size, len);
+   out=(tsvector*)palloc(len);
+   memset(out,0,len);
+   out->len=len;
+   out->size=in->size;
+   arrout=ARRPTR(out);
+   cur=STRPTR(out);
+   for(i=0;isize;i++) {
+       memcpy(cur, STRPTR(in)+arrin[i].pos, arrin[i].len);
+       arrout[i].haspos = 0;
+       arrout[i].len = arrin[i].len;
+       arrout[i].pos = cur - STRPTR(out);
+       cur += SHORTALIGN( arrout[i].len );
+   }
+       
+   PG_FREE_IF_COPY(in, 0);
+   PG_RETURN_POINTER(out);
+}
+
+Datum
+setweight(PG_FUNCTION_ARGS)
+{
+   tsvector       *in = (tsvector *) PG_DETOAST_DATUM(PG_GETARG_DATUM(0));
+   char       cw = PG_GETARG_CHAR(1);
+   tsvector    *out;
+   int i,j;
+   WordEntry *entry;
+   WordEntryPos *p;
+   int w=0;
+
+   switch(tolower(cw)) {
+       case 'a': w=3; break;
+       case 'b': w=2; break;
+       case 'c': w=1; break;
+       case 'd': w=0; break;
+       default: elog(ERROR,"Unknown weight");
+   }
+
+   out=(tsvector*)palloc(in->len);
+   memcpy(out,in,in->len);
+   entry=ARRPTR(out);
+   i=out->size;    
+   while(i--) {
+       if ( (j=POSDATALEN(out,entry)) != 0 ) {
+           p=POSDATAPTR(out,entry);
+           while(j--) {
+               p->weight=w;
+               p++;
+           }
+       }
+       entry++;
+   }
+       
+   PG_FREE_IF_COPY(in, 0);
+   PG_RETURN_POINTER(out);
+}
+
+static int
+compareEntry(char *ptra, WordEntry* a, char *ptrb, WordEntry* b)
+{
+        if ( a->len == b->len)
+        {
+                return strncmp(
+                                           ptra + a->pos,
+                                           ptrb + b->pos,
+                                           a->len);
+        }
+        return ( a->len > b->len ) ? 1 : -1;
+}
+
+static int4
+add_pos(tsvector *src, WordEntry *srcptr, tsvector *dest, WordEntry *destptr, int4 maxpos ) {
+   uint16 *clen = (uint16*)_POSDATAPTR(dest,destptr);
+   int i;
+   uint16 slen = POSDATALEN(src, srcptr), startlen;
+   WordEntryPos *spos=POSDATAPTR(src, srcptr), *dpos=POSDATAPTR(dest,destptr);
+
+   if ( ! destptr->haspos ) 
+       *clen=0;
+
+   startlen = *clen;
+   for(i=0; i
+       dpos[ *clen ].weight = spos[i].weight; 
+       dpos[ *clen ].pos    = LIMITPOS(spos[i].pos + maxpos);
+       (*clen)++;
+   }
+
+   if ( *clen != startlen )
+       destptr->haspos=1; 
+   return  *clen - startlen;
+}
+
+
+Datum
+concat(PG_FUNCTION_ARGS) {
+   tsvector       *in1 = (tsvector *) PG_DETOAST_DATUM(PG_GETARG_DATUM(0));
+   tsvector       *in2 = (tsvector *) PG_DETOAST_DATUM(PG_GETARG_DATUM(1));
+   tsvector       *out;
+   WordEntry *ptr;
+   WordEntry *ptr1,*ptr2;
+   WordEntryPos *p;
+   int maxpos=0,i,j,i1,i2;
+   char *cur;
+   char *data,*data1,*data2;
+
+   ptr=ARRPTR(in1);
+   i=in1->size;
+   while(i--) {
+       if ( (j=POSDATALEN(in1,ptr)) != 0 ) {
+           p=POSDATAPTR(in1,ptr);
+           while(j--) {
+               if ( p->pos > maxpos ) 
+                   maxpos = p->pos;
+               p++;
+           }
+       }
+       ptr++;
+   }
+   
+   ptr1=ARRPTR(in1); ptr2=ARRPTR(in2);
+   data1=STRPTR(in1); data2=STRPTR(in2);
+   i1=in1->size;   i2=in2->size;
+   out=(tsvector*)palloc( in1->len + in2->len );
+   memset(out,0,in1->len + in2->len);
+   out->len = in1->len + in2->len;
+   out->size = in1->size + in2->size;
+   data=cur=STRPTR(out);
+   ptr=ARRPTR(out);
+   while( i1 && i2 ) {
+       int cmp=compareEntry(data1,ptr1,data2,ptr2);
+       if ( cmp < 0 ) { /* in1 first */
+           ptr->haspos = ptr1->haspos;
+           ptr->len = ptr1->len;
+           memcpy( cur, data1 + ptr1->pos, ptr1->len );
+           ptr->pos = cur - data; 
+           cur+=SHORTALIGN(ptr1->len);
+           if ( ptr->haspos ) {
+               memcpy(cur, _POSDATAPTR(in1, ptr1), POSDATALEN(in1, ptr1)*sizeof(WordEntryPos) + sizeof(uint16));
+               cur+=POSDATALEN(in1, ptr1)*sizeof(WordEntryPos) + sizeof(uint16);
+           }
+           ptr++; ptr1++; i1--;
+       } else if ( cmp>0 ) { /* in2 first */ 
+           ptr->haspos = ptr2->haspos;
+           ptr->len = ptr2->len;
+           memcpy( cur, data2 + ptr2->pos, ptr2->len );
+           ptr->pos = cur - data; 
+           cur+=SHORTALIGN(ptr2->len);
+           if ( ptr->haspos ) {
+               int addlen = add_pos(in2, ptr2, out, ptr, maxpos );
+               if ( addlen == 0 )
+                   ptr->haspos=0;
+               else
+                   cur += addlen*sizeof(WordEntryPos) + sizeof(uint16); 
+           }
+           ptr++; ptr2++; i2--;
+       } else {
+           ptr->haspos = ptr1->haspos | ptr2->haspos;
+           ptr->len = ptr1->len;
+           memcpy( cur, data1 + ptr1->pos, ptr1->len );
+           ptr->pos = cur - data; 
+           cur+=SHORTALIGN(ptr1->len);
+           if ( ptr->haspos ) {
+               if ( ptr1->haspos ) {
+                   memcpy(cur, _POSDATAPTR(in1, ptr1), POSDATALEN(in1, ptr1)*sizeof(WordEntryPos) + sizeof(uint16));
+                   cur+=POSDATALEN(in1, ptr1)*sizeof(WordEntryPos) + sizeof(uint16);
+                   if ( ptr2->haspos )
+                       cur += add_pos(in2, ptr2, out, ptr, maxpos )*sizeof(WordEntryPos);
+               } else if ( ptr2->haspos ) {
+                   int addlen = add_pos(in2, ptr2, out, ptr, maxpos );
+                   if ( addlen == 0 )
+                       ptr->haspos=0;
+                   else
+                       cur += addlen*sizeof(WordEntryPos) + sizeof(uint16); 
+               }
+           }
+           ptr++; ptr1++; ptr2++; i1--; i2--;
+       }
+   }
+
+   while(i1) {
+       ptr->haspos = ptr1->haspos;
+       ptr->len = ptr1->len;
+       memcpy( cur, data1 + ptr1->pos, ptr1->len );
+       ptr->pos = cur - data; 
+       cur+=SHORTALIGN(ptr1->len);
+       if ( ptr->haspos ) {
+           memcpy(cur, _POSDATAPTR(in1, ptr1), POSDATALEN(in1, ptr1)*sizeof(WordEntryPos) + sizeof(uint16));
+           cur+=POSDATALEN(in1, ptr1)*sizeof(WordEntryPos) + sizeof(uint16);
+       }
+       ptr++; ptr1++; i1--;
+   }
+
+   while(i2) {
+       ptr->haspos = ptr2->haspos;
+       ptr->len = ptr2->len;
+       memcpy( cur, data2 + ptr2->pos, ptr2->len );
+       ptr->pos = cur - data; 
+       cur+=SHORTALIGN(ptr2->len);
+       if ( ptr->haspos ) {
+           int addlen = add_pos(in2, ptr2, out, ptr, maxpos );
+           if ( addlen == 0 )
+               ptr->haspos=0;
+           else
+               cur += addlen*sizeof(WordEntryPos) + sizeof(uint16); 
+       }
+       ptr++; ptr2++; i2--;
+   }
+   
+   out->size=ptr-ARRPTR(out);
+   out->len = CALCDATASIZE( out->size, cur-data );
+   if ( data != STRPTR(out) )
+       memmove( STRPTR(out), data, cur-data );
+
+   PG_FREE_IF_COPY(in1, 0);
+   PG_FREE_IF_COPY(in2, 1);
+   PG_RETURN_POINTER(out);
+}
+


diff --git a/contrib/tsearch2/untsearch.sql.in b/contrib/tsearch2/untsearch.sql.in

new file mode 100644 (file)

index 0000000..a4fe145


--- /dev/null
+++ b/contrib/tsearch2/untsearch.sql.in
@@ -0,0 +1,62 @@
+BEGIN;
+
+--Be careful !!!
+--script drops all indices, triggers and columns with types defined
+--in tsearch2.sql
+
+
+DROP OPERATOR CLASS gist_tsvector_ops USING gist CASCADE;
+
+
+DROP OPERATOR || (tsvector, tsvector);
+DROP OPERATOR @@ (tsvector, tsquery);
+DROP OPERATOR @@ (tsquery, tsvector);
+
+DROP AGGREGATE stat(tsvector);
+
+DROP TABLE pg_ts_dict;
+DROP TABLE pg_ts_parser;
+DROP TABLE pg_ts_cfg;
+DROP TABLE pg_ts_cfgmap;
+
+DROP TYPE tokentype CASCADE;
+DROP TYPE tokenout CASCADE;
+DROP TYPE tsvector CASCADE;
+DROP TYPE tsquery CASCADE;
+DROP TYPE gtsvector CASCADE;
+DROP TYPE tsstat CASCADE;
+DROP TYPE statinfo CASCADE;
+
+DROP FUNCTION lexize(oid, text) ;
+DROP FUNCTION lexize(text, text);
+DROP FUNCTION lexize(text);
+DROP FUNCTION set_curdict(int);
+DROP FUNCTION set_curdict(text);
+DROP FUNCTION dex_init(text);
+DROP FUNCTION dex_lexize(internal,internal,int4);
+DROP FUNCTION snb_en_init(text);
+DROP FUNCTION snb_lexize(internal,internal,int4);
+DROP FUNCTION snb_ru_init(text);
+DROP FUNCTION spell_init(text);
+DROP FUNCTION spell_lexize(internal,internal,int4);
+DROP FUNCTION syn_init(text);
+DROP FUNCTION syn_lexize(internal,internal,int4);
+DROP FUNCTION set_curprs(int);
+DROP FUNCTION set_curprs(text);
+DROP FUNCTION prsd_start(internal,int4);
+DROP FUNCTION prsd_getlexeme(internal,internal,internal);
+DROP FUNCTION prsd_end(internal);
+DROP FUNCTION prsd_lextype(internal);
+DROP FUNCTION prsd_headline(internal,internal,internal);
+DROP FUNCTION set_curcfg(int);
+DROP FUNCTION set_curcfg(text);
+DROP FUNCTION show_curcfg();
+DROP FUNCTION gtsvector_compress(internal);
+DROP FUNCTION gtsvector_decompress(internal);
+DROP FUNCTION gtsvector_penalty(internal,internal,internal);
+DROP FUNCTION gtsvector_picksplit(internal, internal);
+DROP FUNCTION gtsvector_union(bytea, internal);
+DROP FUNCTION reset_tsearch();
+DROP FUNCTION tsearch2() CASCADE;
+
+END;


diff --git a/contrib/tsearch2/wordparser/deflex.c b/contrib/tsearch2/wordparser/deflex.c

new file mode 100644 (file)

index 0000000..ea596c5


--- /dev/null
+++ b/contrib/tsearch2/wordparser/deflex.c
@@ -0,0 +1,56 @@
+#include "deflex.h"
+
+const char *lex_descr[]={
+   "",
+   "Latin word",
+   "Non-latin word",
+   "Word",
+   "Email",
+   "URL",
+   "Host",
+   "Scientific notation",
+   "VERSION",
+   "Part of hyphenated word",
+   "Non-latin part of hyphenated word",
+   "Latin part of hyphenated word",
+   "Space symbols",
+   "HTML Tag",
+   "HTTP head",
+   "Hyphenated word",
+   "Latin hyphenated word",
+   "Non-latin hyphenated word",
+   "URI",
+   "File or path name",
+   "Decimal notation",
+   "Signed integer",
+   "Unsigned integer",
+   "HTML Entity"
+};
+
+const char *tok_alias[]={
+   "",
+   "lword",
+   "nlword",
+   "word",
+   "email",
+   "url",
+   "host",
+   "sfloat",
+   "version",
+   "part_hword",
+   "nlpart_hword",
+   "lpart_hword",
+   "blank",
+   "tag",
+   "http",
+   "hword",
+   "lhword",
+   "nlhword",
+   "uri",
+   "file",
+   "float",
+   "int",
+   "uint",
+   "entity"
+};
+


diff --git a/contrib/tsearch2/wordparser/deflex.h b/contrib/tsearch2/wordparser/deflex.h

new file mode 100644 (file)

index 0000000..651d1f9


--- /dev/null
+++ b/contrib/tsearch2/wordparser/deflex.h
@@ -0,0 +1,34 @@
+#ifndef __DEFLEX_H__
+#define __DEFLEX_H__
+
+/* rememder !!!! */
+#define LASTNUM        23
+
+#define LATWORD        1
+#define CYRWORD        2
+#define UWORD      3
+#define EMAIL      4
+#define FURL       5
+#define HOST       6
+#define SCIENTIFIC 7
+#define VERSIONNUMBER  8
+#define PARTHYPHENWORD 9
+#define CYRPARTHYPHENWORD  10
+#define LATPARTHYPHENWORD  11
+#define SPACE      12
+#define TAG            13
+#define HTTP       14
+#define HYPHENWORD 15
+#define LATHYPHENWORD  16
+#define CYRHYPHENWORD  17
+#define URI        18
+#define FILEPATH   19
+#define DECIMAL        20
+#define SIGNEDINT  21
+#define UNSIGNEDINT 22
+#define HTMLENTITY 23
+
+extern const char *lex_descr[];
+extern const char *tok_alias[];
+
+#endif


diff --git a/contrib/tsearch2/wordparser/parser.h b/contrib/tsearch2/wordparser/parser.h

new file mode 100644 (file)

index 0000000..55cf005


--- /dev/null
+++ b/contrib/tsearch2/wordparser/parser.h
@@ -0,0 +1,11 @@
+#ifndef __PARSER_H__
+#define __PARSER_H__
+
+char      *token;
+int            tokenlen;
+int            tsearch2_yylex(void);
+void       start_parse_str(char *, int);
+void       start_parse_fh(FILE *, int);
+void       end_parse(void);
+
+#endif


diff --git a/contrib/tsearch2/wordparser/parser.l b/contrib/tsearch2/wordparser/parser.l

new file mode 100644 (file)

index 0000000..49824f5


--- /dev/null
+++ b/contrib/tsearch2/wordparser/parser.l
@@ -0,0 +1,346 @@
+%{
+#include "postgres.h"
+
+#include "deflex.h"
+#include "parser.h"
+#include "common.h"
+
+/* Avoid exit() on fatal scanner errors */
+#define fprintf(file, fmt, msg)  ts_error(ERROR, fmt, msg)
+
+/* postgres allocation function */
+#define free    pfree
+#define malloc  palloc
+#define realloc repalloc
+
+#ifdef strdup
+#undef strdup
+#endif
+#define strdup  pstrdup
+
+char *token = NULL;  /* pointer to token */
+char *s     = NULL;  /* to return WHOLE hyphenated-word */
+
+YY_BUFFER_STATE buf = NULL; /* buffer to parse; it need for parse from string */
+
+int lrlimit = -1;  /* for limiting read from filehandle ( -1 - unlimited read ) */
+int bytestoread = 0;   /* for limiting read from filehandle */
+
+/* redefine macro for read limited length */
+#define YY_INPUT(buf,result,max_size) \
+   if ( yy_current_buffer->yy_is_interactive ) { \
+                int c = '*', n; \
+                for ( n = 0; n < max_size && \
+                             (c = getc( tsearch2_yyin )) != EOF && c != '\n'; ++n ) \
+                        buf[n] = (char) c; \
+                if ( c == '\n' ) \
+                        buf[n++] = (char) c; \
+                if ( c == EOF && ferror( tsearch2_yyin ) ) \
+                        YY_FATAL_ERROR( "input in flex scanner failed" ); \
+                result = n; \
+        }  else { \
+       if ( lrlimit == 0 ) \
+           result=YY_NULL; \
+       else { \
+           if ( lrlimit>0 ) { \
+               bytestoread = ( lrlimit > max_size ) ? max_size : lrlimit; \
+               lrlimit -= bytestoread; \
+           } else \
+               bytestoread = max_size; \
+               if ( ((result = fread( buf, 1, bytestoread, tsearch2_yyin )) == 0) \
+                       && ferror( tsearch2_yyin ) ) \
+                       YY_FATAL_ERROR( "input in flex scanner failed" ); \
+       } \
+   }
+
+%}
+
+%option 8bit
+%option never-interactive
+%option nounput
+%option noyywrap
+
+/* parser's state for parsing hyphenated-word */
+%x DELIM  
+/* parser's state for parsing URL*/
+%x URL  
+%x SERVER  
+
+/* parser's state for parsing TAGS */
+%x INTAG
+%x QINTAG
+%x INCOMMENT
+%x INSCRIPT
+
+/* cyrillic koi8 char */
+CYRALNUM   [0-9\200-\377]
+CYRALPHA   [\200-\377]
+ALPHA      [a-zA-Z\200-\377]
+ALNUM      [0-9a-zA-Z\200-\377]
+
+
+HOSTNAME   ([-_[:alnum:]]+\.)+[[:alpha:]]+
+URI        [-_[:alnum:]/%,\.;=&?#]+
+
+%%
+
+"<"[Ss][Cc][Rr][Ii][Pp][Tt] { BEGIN INSCRIPT; }
+
+"" {
+   BEGIN INITIAL; 
+   *tsearch2_yytext=' '; *(tsearch2_yytext+1) = '\0'; 
+   token = tsearch2_yytext;
+   tokenlen = tsearch2_yyleng;
+   return SPACE;
+}
+
+""   { 
+   BEGIN INITIAL;
+   *tsearch2_yytext=' '; *(tsearch2_yytext+1) = '\0'; 
+   token = tsearch2_yytext;
+   tokenlen = tsearch2_yyleng;
+   return SPACE;
+}
+
+
+"<"[\![:alpha:]]   { BEGIN INTAG; }
+
+"
+
+"\""    { BEGIN QINTAG; }
+
+"\\\"" ;
+
+"\""   { BEGIN INTAG; }
+
+">" { 
+   BEGIN INITIAL;
+   token = tsearch2_yytext;
+   *tsearch2_yytext=' '; 
+   token = tsearch2_yytext;
+   tokenlen = 1;
+   return TAG;
+}
+
+.|\n  ;
+
+\&(quot|amp|nbsp|lt|gt)\;   {
+   token = tsearch2_yytext;
+   tokenlen = tsearch2_yyleng;
+   return HTMLENTITY;
+}
+
+\&\#[0-9][0-9]?[0-9]?\; {
+   token = tsearch2_yytext;
+   tokenlen = tsearch2_yyleng;
+   return HTMLENTITY;
+}
+ 
+[-_\.[:alnum:]]+@{HOSTNAME}  /* Emails */ { 
+   token = tsearch2_yytext; 
+   tokenlen = tsearch2_yyleng;
+   return EMAIL; 
+}
+
+[+-]?[0-9]+(\.[0-9]+)?[eEdD][+-]?[0-9]+  /* float */   { 
+   token = tsearch2_yytext; 
+   tokenlen = tsearch2_yyleng;
+   return SCIENTIFIC; 
+}
+
+[0-9]+\.[0-9]+\.[0-9\.]*[0-9] {
+   token = tsearch2_yytext;
+   tokenlen = tsearch2_yyleng;
+   return VERSIONNUMBER;
+}
+
+[+-]?[0-9]+\.[0-9]+ {
+   token = tsearch2_yytext;
+   tokenlen = tsearch2_yyleng;
+   return DECIMAL;
+}
+
+[+-][0-9]+ { 
+   token = tsearch2_yytext; 
+   tokenlen = tsearch2_yyleng;
+   return SIGNEDINT; 
+}
+
+[0-9]+ { 
+   token = tsearch2_yytext; 
+   tokenlen = tsearch2_yyleng;
+   return UNSIGNEDINT; 
+}
+
+http"://"        { 
+   BEGIN URL; 
+   token = tsearch2_yytext;
+   tokenlen = tsearch2_yyleng;
+   return HTTP;
+}
+
+ftp"://"        { 
+   BEGIN URL; 
+   token = tsearch2_yytext;
+   tokenlen = tsearch2_yyleng;
+   return HTTP;
+}
+
+{HOSTNAME}[/:]{URI} { 
+   BEGIN SERVER;
+   if (s) { free(s); s=NULL; } 
+   s = strdup( tsearch2_yytext ); 
+   tokenlen = tsearch2_yyleng;
+   yyless( 0 ); 
+   token = s;
+   return FURL;
+}
+
+{HOSTNAME} {
+   token = tsearch2_yytext; 
+   tokenlen = tsearch2_yyleng;
+   return HOST;
+}
+
+[/:]{URI}  {
+   token = tsearch2_yytext;
+   tokenlen = tsearch2_yyleng;
+   return URI;
+}
+
+[[:alnum:]\./_-]+"/"[[:alnum:]\./_-]+ {
+   token = tsearch2_yytext;
+   tokenlen = tsearch2_yyleng;
+   return FILEPATH;
+}
+
+({CYRALPHA}+-)+{CYRALPHA}+ /* composite-word */    {
+   BEGIN DELIM;
+   if (s) { free(s); s=NULL; } 
+   s = strdup( tsearch2_yytext );
+   tokenlen = tsearch2_yyleng;
+   yyless( 0 );
+   token = s;
+   return CYRHYPHENWORD;
+}
+
+([[:alpha:]]+-)+[[:alpha:]]+ /* composite-word */  {
+    BEGIN DELIM;
+   if (s) { free(s); s=NULL; } 
+   s = strdup( tsearch2_yytext );
+   tokenlen = tsearch2_yyleng;
+   yyless( 0 );
+   token = s;
+   return LATHYPHENWORD;
+}
+
+({ALNUM}+-)+{ALNUM}+ /* composite-word */  {
+   BEGIN DELIM;
+   if (s) { free(s); s=NULL; } 
+   s = strdup( tsearch2_yytext );
+   tokenlen = tsearch2_yyleng;
+   yyless( 0 );
+   token = s;
+   return HYPHENWORD;
+}
+
+[0-9]+\.[0-9]+\.[0-9\.]*[0-9] {
+   token = tsearch2_yytext;
+   tokenlen = tsearch2_yyleng;
+   return VERSIONNUMBER;
+}
+
+\+?[0-9]+\.[0-9]+ {
+   token = tsearch2_yytext;
+   tokenlen = tsearch2_yyleng;
+   return DECIMAL;
+}
+
+{CYRALPHA}+  /* one word in composite-word */   { 
+   token = tsearch2_yytext; 
+   tokenlen = tsearch2_yyleng;
+   return CYRPARTHYPHENWORD; 
+}
+
+[[:alpha:]]+  /* one word in composite-word */  { 
+   token = tsearch2_yytext; 
+   tokenlen = tsearch2_yyleng;
+   return LATPARTHYPHENWORD; 
+}
+
+{ALNUM}+  /* one word in composite-word */  { 
+   token = tsearch2_yytext; 
+   tokenlen = tsearch2_yyleng;
+   return PARTHYPHENWORD; 
+}
+
+-  { 
+   token = tsearch2_yytext;
+   tokenlen = tsearch2_yyleng;
+   return SPACE;
+}
+
+.|\n /* return in basic state */ {
+   BEGIN INITIAL;
+   yyless( 0 );
+}
+
+{CYRALPHA}+ /* normal word */  { 
+   token = tsearch2_yytext; 
+   tokenlen = tsearch2_yyleng;
+   return CYRWORD; 
+}
+
+[[:alpha:]]+ /* normal word */ { 
+   token = tsearch2_yytext; 
+   tokenlen = tsearch2_yyleng;
+   return LATWORD; 
+}
+
+{ALNUM}+ /* normal word */ { 
+   token = tsearch2_yytext; 
+   tokenlen = tsearch2_yyleng;
+   return UWORD; 
+}
+
+[ \r\n\t]+ {
+   token = tsearch2_yytext;
+   tokenlen = tsearch2_yyleng;
+   return SPACE;
+}
+
+. {
+   token = tsearch2_yytext;
+   tokenlen = tsearch2_yyleng;
+   return SPACE;
+} 
+
+%%
+
+/* clearing after parsing from string */
+void end_parse() {
+   if (s) { free(s); s=NULL; } 
+   tsearch2_yy_delete_buffer( buf );
+   buf = NULL;
+} 
+
+/* start parse from string */
+void start_parse_str(char* str, int limit) {
+   if (buf) end_parse();
+   buf = tsearch2_yy_scan_bytes( str, limit );
+   tsearch2_yy_switch_to_buffer( buf );
+   BEGIN INITIAL;
+}
+
+/* start parse from filehandle */
+void start_parse_fh( FILE* fh, int limit ) {
+   if (buf) end_parse();
+   lrlimit = ( limit ) ? limit : -1;
+   buf = tsearch2_yy_create_buffer( fh, YY_BUF_SIZE );
+   tsearch2_yy_switch_to_buffer( buf );
+   BEGIN INITIAL;
+}
+
+


diff --git a/contrib/tsearch2/wparser.c b/contrib/tsearch2/wparser.c

new file mode 100644 (file)

index 0000000..deff94c


--- /dev/null
+++ b/contrib/tsearch2/wparser.c
@@ -0,0 +1,529 @@
+/* 
+ * interface functions to parser 
+ * Teodor Sigaev 
+ */
+#include 
+#include 
+#include 
+#include 
+
+#include "postgres.h"
+#include "fmgr.h"
+#include "utils/array.h"
+#include "catalog/pg_type.h"
+#include "executor/spi.h"
+#include "funcapi.h"
+
+#include "wparser.h"
+#include "ts_cfg.h"
+#include "snmap.h"
+#include "common.h"
+
+/*********top interface**********/
+
+static void *plan_getparser=NULL;
+static Oid current_parser_id=InvalidOid;
+
+void
+init_prs(Oid id, WParserInfo *prs) {
+   Oid arg[1]={ OIDOID };
+   bool isnull;
+   Datum pars[1]={ ObjectIdGetDatum(id) };
+   int stat;
+
+   memset(prs,0,sizeof(WParserInfo));
+   SPI_connect();
+   if ( !plan_getparser ) {
+       plan_getparser = SPI_saveplan( SPI_prepare( "select prs_start, prs_nexttoken, prs_end, prs_lextype, prs_headline from pg_ts_parser where oid = $1" , 1, arg ) );
+       if ( !plan_getparser ) 
+           ts_error(ERROR, "SPI_prepare() failed");
+   }
+
+   stat = SPI_execp(plan_getparser, pars, " ", 1);
+   if ( stat < 0 )
+       ts_error (ERROR, "SPI_execp return %d", stat);
+   if ( SPI_processed > 0 ) {
+       Oid oid=InvalidOid;
+       oid=DatumGetObjectId( SPI_getbinval(SPI_tuptable->vals[0], SPI_tuptable->tupdesc, 1, &isnull) );
+       fmgr_info_cxt(oid, &(prs->start_info), TopMemoryContext);
+       oid=DatumGetObjectId( SPI_getbinval(SPI_tuptable->vals[0], SPI_tuptable->tupdesc, 2, &isnull) );
+       fmgr_info_cxt(oid, &(prs->getlexeme_info), TopMemoryContext);
+       oid=DatumGetObjectId( SPI_getbinval(SPI_tuptable->vals[0], SPI_tuptable->tupdesc, 3, &isnull) );
+       fmgr_info_cxt(oid, &(prs->end_info), TopMemoryContext);
+       prs->lextype=DatumGetObjectId( SPI_getbinval(SPI_tuptable->vals[0], SPI_tuptable->tupdesc, 4, &isnull) );
+       oid=DatumGetObjectId( SPI_getbinval(SPI_tuptable->vals[0], SPI_tuptable->tupdesc, 5, &isnull) );
+       fmgr_info_cxt(oid, &(prs->headline_info), TopMemoryContext);
+       prs->prs_id=id;
+   } else 
+       ts_error(ERROR, "No parser with id %d", id);
+   SPI_finish();
+}
+
+typedef struct {
+   WParserInfo *last_prs;
+   int     len;
+   int     reallen;
+   WParserInfo *list;
+   SNMap       name2id_map;
+} PrsList;
+
+static PrsList PList = {NULL,0,0,NULL,{0,0,NULL}};
+
+void    
+reset_prs(void) {
+   freeSNMap( &(PList.name2id_map) );
+   if ( PList.list )
+       free(PList.list);
+   memset(&PList,0,sizeof(PrsList));
+}
+
+static int
+compareprs(const void *a, const void *b) {
+   return ((WParserInfo*)a)->prs_id - ((WParserInfo*)b)->prs_id;
+}
+
+WParserInfo *
+findprs(Oid id) {
+   /* last used prs */
+   if ( PList.last_prs && PList.last_prs->prs_id==id )
+       return PList.last_prs;
+
+   /* already used prs */
+   if ( PList.len != 0 ) {
+       WParserInfo key;
+       key.prs_id=id;
+       PList.last_prs = bsearch(&key, PList.list, PList.len, sizeof(WParserInfo), compareprs);
+       if ( PList.last_prs != NULL )
+           return PList.last_prs;
+   }
+
+   /* last chance */
+   if ( PList.len==PList.reallen ) {
+       WParserInfo *tmp;
+       int reallen = ( PList.reallen ) ? 2*PList.reallen : 16;
+       tmp=(WParserInfo*)realloc(PList.list,sizeof(WParserInfo)*reallen);
+       if ( !tmp ) 
+           ts_error(ERROR,"No memory");
+       PList.reallen=reallen;
+       PList.list=tmp;
+   }
+   PList.last_prs=&(PList.list[PList.len]);
+   init_prs(id, PList.last_prs);
+   PList.len++;
+   qsort(PList.list, PList.len, sizeof(WParserInfo), compareprs);
+   return findprs(id); /* qsort changed order!! */;
+}
+
+static void *plan_name2id=NULL;
+
+Oid
+name2id_prs(text *name) {
+   Oid arg[1]={ TEXTOID };
+   bool isnull;
+   Datum pars[1]={ PointerGetDatum(name) };
+   int stat;
+   Oid id=findSNMap_t( &(PList.name2id_map), name );
+   
+   if ( id ) 
+       return id;
+   
+
+   SPI_connect();
+   if ( !plan_name2id ) {
+       plan_name2id = SPI_saveplan( SPI_prepare( "select oid from pg_ts_parser where prs_name = $1" , 1, arg ) );
+       if ( !plan_name2id ) 
+           ts_error(ERROR, "SPI_prepare() failed");
+   }
+
+   stat = SPI_execp(plan_name2id, pars, " ", 1);
+   if ( stat < 0 )
+       ts_error (ERROR, "SPI_execp return %d", stat);
+   if ( SPI_processed > 0 )
+       id=DatumGetObjectId( SPI_getbinval(SPI_tuptable->vals[0], SPI_tuptable->tupdesc, 1, &isnull) );
+   else 
+       ts_error(ERROR, "No parser '%s'", text2char(name));
+   SPI_finish();
+   addSNMap_t( &(PList.name2id_map), name, id );
+   return id;
+}
+
+
+/******sql-level interface******/
+typedef struct {
+   int     cur;
+   LexDescr    *list;
+} TypeStorage;
+
+static void
+setup_firstcall(FuncCallContext  *funcctx, Oid prsid) {
+   TupleDesc            tupdesc;
+   MemoryContext     oldcontext;
+   TypeStorage     *st;
+   WParserInfo *prs = findprs(prsid); 
+
+   oldcontext = MemoryContextSwitchTo(funcctx->multi_call_memory_ctx);
+
+   st=(TypeStorage*)palloc( sizeof(TypeStorage) );
+   st->cur=0;
+   st->list = (LexDescr*)DatumGetPointer(
+       OidFunctionCall1( prs->lextype, PointerGetDatum(prs->prs) )
+   );
+   funcctx->user_fctx = (void*)st;
+   tupdesc = RelationNameGetTupleDesc("tokentype");
+   funcctx->slot = TupleDescGetSlot(tupdesc);
+   funcctx->attinmeta = TupleDescGetAttInMetadata(tupdesc);
+   MemoryContextSwitchTo(oldcontext);
+}
+
+static Datum
+process_call(FuncCallContext  *funcctx) {
+   TypeStorage     *st;
+
+   st=(TypeStorage*)funcctx->user_fctx;
+   if (  st->list && st->list[st->cur].lexid ) {
+       Datum result;
+       char* values[3];
+       char    txtid[16];
+       HeapTuple    tuple;
+
+       values[0]=txtid;
+       sprintf(txtid,"%d",st->list[st->cur].lexid);
+       values[1]=st->list[st->cur].alias;
+       values[2]=st->list[st->cur].descr;
+
+       tuple = BuildTupleFromCStrings(funcctx->attinmeta, values);
+       result = TupleGetDatum(funcctx->slot, tuple);
+
+       pfree(values[1]);
+       pfree(values[2]);
+       st->cur++;
+       return result;
+   } else {
+       if ( st->list ) pfree(st->list);
+       pfree(st);
+   }
+   return (Datum)0;
+}
+
+PG_FUNCTION_INFO_V1(token_type);
+Datum token_type(PG_FUNCTION_ARGS);
+
+Datum
+token_type(PG_FUNCTION_ARGS) {
+   FuncCallContext  *funcctx;
+   Datum result;
+
+   if (SRF_IS_FIRSTCALL()) { 
+       funcctx = SRF_FIRSTCALL_INIT();
+       setup_firstcall(funcctx, PG_GETARG_OID(0) );
+   }
+
+   funcctx = SRF_PERCALL_SETUP();
+
+   if (  (result=process_call(funcctx)) != (Datum)0 )
+       SRF_RETURN_NEXT(funcctx, result);
+   SRF_RETURN_DONE(funcctx);
+}
+
+PG_FUNCTION_INFO_V1(token_type_byname);
+Datum token_type_byname(PG_FUNCTION_ARGS);
+Datum
+token_type_byname(PG_FUNCTION_ARGS) {
+   FuncCallContext  *funcctx;
+   Datum result;
+
+   if (SRF_IS_FIRSTCALL()) {
+       text *name = PG_GETARG_TEXT_P(0); 
+       funcctx = SRF_FIRSTCALL_INIT();
+       setup_firstcall(funcctx, name2id_prs( name ) );
+       PG_FREE_IF_COPY(name,0);
+   }
+
+   funcctx = SRF_PERCALL_SETUP();
+
+   if (  (result=process_call(funcctx)) != (Datum)0 )
+       SRF_RETURN_NEXT(funcctx, result);
+   SRF_RETURN_DONE(funcctx);
+}
+
+PG_FUNCTION_INFO_V1(token_type_current);
+Datum token_type_current(PG_FUNCTION_ARGS);
+Datum
+token_type_current(PG_FUNCTION_ARGS) {
+   FuncCallContext  *funcctx;
+   Datum result;
+
+   if (SRF_IS_FIRSTCALL()) {
+       funcctx = SRF_FIRSTCALL_INIT();
+       if ( current_parser_id==InvalidOid ) 
+           current_parser_id = name2id_prs( char2text("default") );
+       setup_firstcall(funcctx, current_parser_id );
+   }
+
+   funcctx = SRF_PERCALL_SETUP();
+
+   if (  (result=process_call(funcctx)) != (Datum)0 )
+       SRF_RETURN_NEXT(funcctx, result);
+   SRF_RETURN_DONE(funcctx);
+}
+
+
+PG_FUNCTION_INFO_V1(set_curprs);
+Datum set_curprs(PG_FUNCTION_ARGS);
+Datum
+set_curprs(PG_FUNCTION_ARGS) {
+        findprs(PG_GETARG_OID(0));
+        current_parser_id=PG_GETARG_OID(0);
+        PG_RETURN_VOID();
+}
+
+PG_FUNCTION_INFO_V1(set_curprs_byname);
+Datum set_curprs_byname(PG_FUNCTION_ARGS);
+Datum
+set_curprs_byname(PG_FUNCTION_ARGS) {
+        text *name=PG_GETARG_TEXT_P(0);
+    
+        DirectFunctionCall1(
+                set_curprs,
+                ObjectIdGetDatum( name2id_prs(name) )
+        );
+        PG_FREE_IF_COPY(name, 0);
+        PG_RETURN_VOID();
+}
+
+typedef struct {
+   int type;
+   char    *lexem;
+} LexemEntry;
+
+typedef struct {
+   int cur;
+   int len;
+   LexemEntry  *list;
+} PrsStorage;
+   
+
+static void
+prs_setup_firstcall(FuncCallContext  *funcctx, int prsid, text *txt) {
+   TupleDesc            tupdesc;
+   MemoryContext     oldcontext;
+   PrsStorage  *st;
+   WParserInfo *prs = findprs(prsid); 
+   char    *lex=NULL;
+   int     llen=0, type=0; 
+
+   oldcontext = MemoryContextSwitchTo(funcctx->multi_call_memory_ctx);
+
+   st=(PrsStorage*)palloc( sizeof(PrsStorage) );
+   st->cur=0;
+   st->len=16;
+   st->list=(LexemEntry*)palloc( sizeof(LexemEntry)*st->len );
+
+   prs->prs = (void*)DatumGetPointer(
+       FunctionCall2(
+           &(prs->start_info),
+           PointerGetDatum(VARDATA(txt)),
+           Int32GetDatum(VARSIZE(txt)-VARHDRSZ)
+       )
+   );
+
+   while( ( type=DatumGetInt32(FunctionCall3(
+           &(prs->getlexeme_info),
+           PointerGetDatum(prs->prs),
+           PointerGetDatum(&lex),
+           PointerGetDatum(&llen))) ) != 0 ) {
+
+       if ( st->cur>=st->len ) {
+           st->len=2*st->len;
+           st->list=(LexemEntry*)repalloc(st->list, sizeof(LexemEntry)*st->len);
+       }
+       st->list[st->cur].lexem = palloc(llen+1);
+       memcpy( st->list[st->cur].lexem, lex, llen);
+       st->list[st->cur].lexem[llen]='\0';
+       st->list[st->cur].type=type;
+       st->cur++;
+   }
+       
+   FunctionCall1(
+       &(prs->end_info),
+       PointerGetDatum(prs->prs)
+   );
+
+   st->len=st->cur;
+   st->cur=0;
+   
+   funcctx->user_fctx = (void*)st;
+   tupdesc = RelationNameGetTupleDesc("tokenout");
+   funcctx->slot = TupleDescGetSlot(tupdesc);
+   funcctx->attinmeta = TupleDescGetAttInMetadata(tupdesc);
+   MemoryContextSwitchTo(oldcontext);
+}
+
+static Datum
+prs_process_call(FuncCallContext  *funcctx) {
+   PrsStorage  *st;
+
+   st=(PrsStorage*)funcctx->user_fctx;
+   if (  st->cur < st->len ) {
+       Datum result;
+       char* values[2];
+       char    tid[16];
+       HeapTuple    tuple;
+
+       values[0]=tid;
+       sprintf(tid,"%d",st->list[st->cur].type);
+       values[1]=st->list[st->cur].lexem;
+       tuple = BuildTupleFromCStrings(funcctx->attinmeta, values);
+       result = TupleGetDatum(funcctx->slot, tuple);
+
+       pfree(values[1]);
+       st->cur++;
+       return result;
+   } else {
+       if ( st->list ) pfree(st->list);
+       pfree(st);
+   }
+   return (Datum)0;
+}
+
+           
+
+PG_FUNCTION_INFO_V1(parse);
+Datum parse(PG_FUNCTION_ARGS);
+Datum
+parse(PG_FUNCTION_ARGS) {
+   FuncCallContext  *funcctx;
+   Datum result;
+
+   if (SRF_IS_FIRSTCALL()) {
+       text *txt = PG_GETARG_TEXT_P(1); 
+       funcctx = SRF_FIRSTCALL_INIT();
+       prs_setup_firstcall(funcctx, PG_GETARG_OID(0),txt );
+       PG_FREE_IF_COPY(txt,1);
+   }
+
+   funcctx = SRF_PERCALL_SETUP();
+
+   if (  (result=prs_process_call(funcctx)) != (Datum)0 )
+       SRF_RETURN_NEXT(funcctx, result);
+   SRF_RETURN_DONE(funcctx);
+}
+
+PG_FUNCTION_INFO_V1(parse_byname);
+Datum parse_byname(PG_FUNCTION_ARGS);
+Datum
+parse_byname(PG_FUNCTION_ARGS) {
+   FuncCallContext  *funcctx;
+   Datum result;
+
+   if (SRF_IS_FIRSTCALL()) {
+       text *name = PG_GETARG_TEXT_P(0); 
+       text *txt = PG_GETARG_TEXT_P(1); 
+       funcctx = SRF_FIRSTCALL_INIT();
+       prs_setup_firstcall(funcctx, name2id_prs( name ),txt );
+       PG_FREE_IF_COPY(name,0);
+       PG_FREE_IF_COPY(txt,1);
+   }
+
+   funcctx = SRF_PERCALL_SETUP();
+
+   if (  (result=prs_process_call(funcctx)) != (Datum)0 )
+       SRF_RETURN_NEXT(funcctx, result);
+   SRF_RETURN_DONE(funcctx);
+}
+
+
+PG_FUNCTION_INFO_V1(parse_current);
+Datum parse_current(PG_FUNCTION_ARGS);
+Datum
+parse_current(PG_FUNCTION_ARGS) {
+   FuncCallContext  *funcctx;
+   Datum result;
+
+   if (SRF_IS_FIRSTCALL()) {
+       text *txt = PG_GETARG_TEXT_P(0); 
+       funcctx = SRF_FIRSTCALL_INIT();
+       if ( current_parser_id==InvalidOid ) 
+           current_parser_id = name2id_prs( char2text("default") );
+       prs_setup_firstcall(funcctx, current_parser_id,txt );
+       PG_FREE_IF_COPY(txt,0);
+   }
+
+   funcctx = SRF_PERCALL_SETUP();
+
+   if (  (result=prs_process_call(funcctx)) != (Datum)0 )
+       SRF_RETURN_NEXT(funcctx, result);
+   SRF_RETURN_DONE(funcctx);
+}
+
+PG_FUNCTION_INFO_V1(headline);
+Datum headline(PG_FUNCTION_ARGS);
+Datum
+headline(PG_FUNCTION_ARGS) {
+   TSCfgInfo *cfg=findcfg(PG_GETARG_OID(0));
+   text       *in = PG_GETARG_TEXT_P(1);
+   QUERYTYPE  *query = (QUERYTYPE *) DatumGetPointer(PG_DETOAST_DATUM(PG_GETARG_DATUM(2)));
+   text       *opt=( PG_NARGS()>3 && PG_GETARG_POINTER(3) ) ? PG_GETARG_TEXT_P(3) : NULL;
+   HLPRSTEXT   prs;
+   text *out;
+   WParserInfo *prsobj = findprs(cfg->prs_id);
+
+   memset(&prs,0,sizeof(HLPRSTEXT));
+   prs.lenwords = 32;
+   prs.words = (HLWORD *) palloc(sizeof(HLWORD) * prs.lenwords);
+   hlparsetext(cfg, &prs, query, VARDATA(in), VARSIZE(in) - VARHDRSZ);
+
+
+   FunctionCall3(
+       &(prsobj->headline_info),
+       PointerGetDatum(&prs),
+       PointerGetDatum(opt),
+       PointerGetDatum(query)
+   );
+
+   out = genhl(&prs);
+
+   PG_FREE_IF_COPY(in,1);
+   PG_FREE_IF_COPY(query,2);
+   if ( opt ) PG_FREE_IF_COPY(opt,3);
+   pfree(prs.words);
+   pfree(prs.startsel);
+   pfree(prs.stopsel);
+
+   PG_RETURN_POINTER(out);
+}
+
+
+PG_FUNCTION_INFO_V1(headline_byname);
+Datum headline_byname(PG_FUNCTION_ARGS);
+Datum
+headline_byname(PG_FUNCTION_ARGS) {
+   text *cfg=PG_GETARG_TEXT_P(0);
+
+   Datum out=DirectFunctionCall4(
+       headline,
+       ObjectIdGetDatum(name2id_cfg( cfg ) ),
+       PG_GETARG_DATUM(1),
+       PG_GETARG_DATUM(2),
+       ( PG_NARGS()>3 ) ? PG_GETARG_DATUM(3) : PointerGetDatum(NULL)
+   );
+
+   PG_FREE_IF_COPY(cfg,0);
+   PG_RETURN_DATUM(out);   
+}
+
+PG_FUNCTION_INFO_V1(headline_current);
+Datum headline_current(PG_FUNCTION_ARGS);
+Datum
+headline_current(PG_FUNCTION_ARGS) {
+   PG_RETURN_DATUM(DirectFunctionCall4(
+       headline,
+       ObjectIdGetDatum(get_currcfg()),
+       PG_GETARG_DATUM(0),
+       PG_GETARG_DATUM(1),
+       ( PG_NARGS()>2 ) ? PG_GETARG_DATUM(2) : PointerGetDatum(NULL)
+   ));
+}
+
+
+


diff --git a/contrib/tsearch2/wparser.h b/contrib/tsearch2/wparser.h

new file mode 100644 (file)

index 0000000..a8afc56


--- /dev/null
+++ b/contrib/tsearch2/wparser.h
@@ -0,0 +1,28 @@
+#ifndef __WPARSER_H__
+#define __WPARSER_H__
+#include "postgres.h"
+#include "fmgr.h"
+
+typedef struct {
+   Oid prs_id;
+   FmgrInfo start_info;
+   FmgrInfo getlexeme_info;
+   FmgrInfo end_info;
+   FmgrInfo headline_info;
+   Oid lextype;
+   void *prs;
+} WParserInfo;
+
+void init_prs(Oid id, WParserInfo *prs);
+WParserInfo* findprs(Oid id);
+Oid name2id_prs(text *name);
+void   reset_prs(void);
+
+
+typedef struct {
+   int lexid;
+   char    *alias;
+   char    *descr;
+} LexDescr;
+
+#endif


diff --git a/contrib/tsearch2/wparser_def.c b/contrib/tsearch2/wparser_def.c

new file mode 100644 (file)

index 0000000..eec8b03


--- /dev/null
+++ b/contrib/tsearch2/wparser_def.c
@@ -0,0 +1,291 @@
+/* 
+ * default word parser 
+ * Teodor Sigaev 
+ */
+#include 
+#include 
+#include 
+
+#include "postgres.h"
+#include "utils/builtins.h"
+
+#include "dict.h"
+#include "wparser.h"
+#include "common.h"
+#include "ts_cfg.h"
+#include "wordparser/parser.h"
+#include "wordparser/deflex.h"
+
+PG_FUNCTION_INFO_V1(prsd_lextype);
+Datum prsd_lextype(PG_FUNCTION_ARGS);
+
+Datum 
+prsd_lextype(PG_FUNCTION_ARGS) {
+   LexDescr *descr=(LexDescr*)palloc(sizeof(LexDescr)*(LASTNUM+1));
+   int i;
+
+   for(i=1;i<=LASTNUM;i++) {
+       descr[i-1].lexid = i;
+       descr[i-1].alias = pstrdup(tok_alias[i]);
+       descr[i-1].descr = pstrdup(lex_descr[i]);
+   }
+   
+   descr[LASTNUM].lexid=0;
+       
+   PG_RETURN_POINTER(descr);
+}
+
+PG_FUNCTION_INFO_V1(prsd_start);
+Datum prsd_start(PG_FUNCTION_ARGS);
+Datum 
+prsd_start(PG_FUNCTION_ARGS) {
+   start_parse_str( (char*)PG_GETARG_POINTER(0), PG_GETARG_INT32(1) );
+   PG_RETURN_POINTER(NULL);
+}
+
+PG_FUNCTION_INFO_V1(prsd_getlexeme);
+Datum prsd_getlexeme(PG_FUNCTION_ARGS);
+Datum 
+prsd_getlexeme(PG_FUNCTION_ARGS) {
+   /* ParserState *p=(ParserState*)PG_GETARG_POINTER(0); */
+   char **t=(char**)PG_GETARG_POINTER(1); 
+   int *tlen=(int*)PG_GETARG_POINTER(2);
+   int  type=tsearch2_yylex();
+
+   *t = token;
+   *tlen = tokenlen;
+   PG_RETURN_INT32(type);
+}
+
+PG_FUNCTION_INFO_V1(prsd_end);
+Datum prsd_end(PG_FUNCTION_ARGS);
+Datum 
+prsd_end(PG_FUNCTION_ARGS) {
+   /* ParserState *p=(ParserState*)PG_GETARG_POINTER(0); */
+   end_parse();
+   PG_RETURN_VOID();
+}
+
+#define LEAVETOKEN(x)  ( (x)==12 )
+#define COMPLEXTOKEN(x)    ( (x)==5 || (x)==15 || (x)==16 || (x)==17 )
+#define ENDPUNCTOKEN(x)    ( (x)==12 )
+
+
+#define IDIGNORE(x) ( (x)==13 || (x)==14 || (x)==12 || (x)==23 )
+#define HLIDIGNORE(x) ( (x)==5 || (x)==13 || (x)==15 || (x)==16 || (x)==17 )
+#define NONWORDTOKEN(x)    ( (x)==12 || HLIDIGNORE(x) )
+#define NOENDTOKEN(x)  ( NONWORDTOKEN(x) || (x)==7 || (x)==8 || (x)==20 || (x)==21 || (x)==22 || IDIGNORE(x) )
+
+typedef struct {
+   HLWORD  *words;
+   int len;
+} hlCheck;
+
+static bool
+checkcondition_HL(void *checkval, ITEM *val) {
+   int i;
+   for(i=0;i<((hlCheck*)checkval)->len;i++) {
+       if ( ((hlCheck*)checkval)->words[i].item==val )
+           return true;
+   }
+   return false;
+}
+
+
+static bool
+hlCover(HLPRSTEXT *prs, QUERYTYPE *query, int *p, int *q) {
+   int i,j;
+   ITEM    *item=GETQUERY(query);
+   int pos=*p;
+   *q=0;
+   *p=0x7fffffff;
+
+   for(j=0;jsize;j++) {
+       if ( item->type != VAL ) {
+           item++;
+           continue;
+       }
+       for(i=pos;icurwords;i++) {
+           if ( prs->words[i].item == item ) {
+               if ( i>*q) 
+                   *q = i;
+               break;
+           }
+       }
+       item++;
+   }
+
+   if ( *q==0 )
+       return false;
+
+   item=GETQUERY(query);
+   for(j=0;jsize;j++) {
+       if ( item->type != VAL ) {
+           item++;
+           continue;
+       }
+       for(i=*q;i>=pos;i--) {
+           if ( prs->words[i].item == item ) {
+               if ( i<*p )
+                   *p=i;
+               break;
+           }
+       }
+       item++;
+   }   
+
+   if ( *p<=*q ) {
+       hlCheck ch={ &(prs->words[*p]), *q-*p+1 };
+       if ( TS_execute(GETQUERY(query), &ch, false, checkcondition_HL) ) { 
+           return true;
+       } else {
+           (*p)++;
+           return hlCover(prs,query,p,q);
+       }
+   }
+
+   return false;
+}
+
+PG_FUNCTION_INFO_V1(prsd_headline);
+Datum prsd_headline(PG_FUNCTION_ARGS);
+Datum 
+prsd_headline(PG_FUNCTION_ARGS) {
+   HLPRSTEXT   *prs=(HLPRSTEXT*)PG_GETARG_POINTER(0);
+   text    *opt=(text*)PG_GETARG_POINTER(1); /* can't be toasted */
+   QUERYTYPE   *query=(QUERYTYPE*)PG_GETARG_POINTER(2); /* can't be toasted */
+   /* from opt + start and and tag */
+   int min_words=15;   
+   int max_words=35;   
+   int shortword=3;    
+
+   int p=0,q=0;
+   int bestb=-1,beste=-1;
+   int bestlen=-1;
+   int pose=0, poslen, curlen;
+
+   int i;
+
+   /*config*/
+   prs->startsel=NULL;
+   prs->stopsel=NULL;
+   if ( opt ) {
+       Map *map,*mptr;
+       
+       parse_cfgdict(opt,&map);
+       mptr=map;
+
+       while(mptr && mptr->key) {
+           if ( strcasecmp(mptr->key,"MaxWords")==0 )
+               max_words=pg_atoi(mptr->value,4,1);
+           else if ( strcasecmp(mptr->key,"MinWords")==0 )
+               min_words=pg_atoi(mptr->value,4,1);
+           else if ( strcasecmp(mptr->key,"ShortWord")==0 )
+               shortword=pg_atoi(mptr->value,4,1);
+           else if ( strcasecmp(mptr->key,"StartSel")==0 )
+               prs->startsel=pstrdup(mptr->value);
+           else if ( strcasecmp(mptr->key,"StopSel")==0 )
+               prs->stopsel=pstrdup(mptr->value);
+               
+           pfree(mptr->key);
+           pfree(mptr->value);
+
+           mptr++;
+       }
+       pfree(map);
+
+       if ( min_words >= max_words )
+           elog(ERROR,"Must be MinWords < MaxWords");
+       if ( min_words<=0 )
+           elog(ERROR,"Must be MinWords > 0");
+       if ( shortword<0 )
+           elog(ERROR,"Must be ShortWord >= 0");
+   }
+
+   while( hlCover(prs,query,&p,&q) ) {
+       /* find cover len in words */
+       curlen=0;
+       poslen=0;
+       for(i=p;i<=q && curlen < max_words ; i++) {
+           if ( !NONWORDTOKEN(prs->words[i].type) ) 
+               curlen++;
+           if ( prs->words[i].item && !prs->words[i].repeated )
+               poslen++; 
+           pose=i;
+       }
+
+       if ( poslenwords[beste].type) || prs->words[beste].len <= shortword) ) { 
+           /* best already finded, so try one more cover */
+           p++;
+           continue;
+       }
+
+       if ( curlen < max_words ) { /* find good end */
+           for(i=i-1 ;icurwords && curlen
+               if ( i!=q ) {
+                   if ( !NONWORDTOKEN(prs->words[i].type) ) 
+                       curlen++;
+                   if ( prs->words[i].item && !prs->words[i].repeated )
+                       poslen++;
+               }
+               pose=i;
+               if ( NOENDTOKEN(prs->words[i].type) || prs->words[i].len <= shortword ) 
+                   continue;
+               if ( curlen>=min_words )    
+                   break;
+           }
+       } else { /* shorter cover :((( */
+           for(;curlen>min_words;i--) {
+               if ( !NONWORDTOKEN(prs->words[i].type) ) 
+                   curlen--;
+               if ( prs->words[i].item && !prs->words[i].repeated )
+                   poslen--;
+               pose=i;
+               if ( NOENDTOKEN(prs->words[i].type) || prs->words[i].len <= shortword ) 
+                   continue;
+               break;
+           }
+       }
+
+       if ( bestlen <0 || (poslen>bestlen && !(NOENDTOKEN(prs->words[pose].type) || prs->words[pose].len <= shortword)) || 
+               ( bestlen>=0 && !(NOENDTOKEN(prs->words[pose].type)  || prs->words[pose].len <= shortword) && 
+                   (NOENDTOKEN(prs->words[beste].type) || prs->words[beste].len <= shortword) ) ) {
+           bestb=p; beste=pose;
+           bestlen=poslen;
+       } 
+
+       p++;
+   }
+
+   if ( bestlen<0 ) {
+       curlen=0;
+       poslen=0;
+       for(i=0;icurwords && curlen
+           if ( !NONWORDTOKEN(prs->words[i].type) ) 
+               curlen++;
+           pose=i;
+       }
+       bestb=0; beste=pose;
+   }
+
+   for(i=bestb;i<=beste;i++) {
+       if ( prs->words[i].item )
+           prs->words[i].selected=1;
+       if ( prs->words[i].repeated )
+           prs->words[i].skip=1;
+       if ( HLIDIGNORE(prs->words[i].type) )
+           prs->words[i].replace=1;
+
+       prs->words[i].in=1;
+   }
+
+   if (!prs->startsel)
+       prs->startsel=pstrdup("");

+   if (!prs->stopsel)
+       prs->stopsel=pstrdup("");
+        prs->startsellen=strlen(prs->startsel);
+   prs->stopsellen=strlen(prs->stopsel);
+
+   PG_RETURN_POINTER(prs);
+}
+




This is the main PostgreSQL git repository.
RSS
Atom
+       slen += (*ptr)->len;
+       ptr++;
+   }
+
+   nentry=stat->size + len;
+   slen+=STATSTRSIZE(stat);
+   totallen=CALCSTATSIZE(nentry,slen);
+   newstat=palloc(totallen);
+   newstat->len=totallen;
+   newstat->size=nentry;
+
+   memcpy(STATSTRPTR(newstat), STATSTRPTR(stat), STATSTRSIZE(stat));
+   curptr=STATSTRPTR(newstat) + STATSTRSIZE(stat);
+
+   ptr=entry;
+   sptr=STATPTR(stat);
+   nptr=STATPTR(newstat);
+
+   if ( len == 1 ) {
+       StatEntry *StopLow = STATPTR(stat);
+       StatEntry *StopHigh = (StatEntry*)STATSTRPTR(stat);
+
+       while (StopLow < StopHigh) {
+           sptr=StopLow + (StopHigh - StopLow) / 2;
+           if ( compareStatWord(sptr,*ptr,stat,txt) < 0 )
+               StopLow = sptr + 1;
+           else
+               StopHigh = sptr; 
+       }
+       nptr =STATPTR(newstat) + (StopLow-STATPTR(stat));
+       memcpy( STATPTR(newstat), STATPTR(stat), sizeof(StatEntry) * (StopLow-STATPTR(stat)) );
+       nptr->nentry=POSDATALEN(txt,*ptr);
+       if ( nptr->nentry==0 )
+               nptr->nentry=1; 
+       nptr->ndoc=1;
+       nptr->len=(*ptr)->len;
+       memcpy(curptr, STRPTR(txt) + (*ptr)->pos, nptr->len);
+       nptr->pos = curptr - STATSTRPTR(newstat);
+       memcpy( nptr+1, StopLow, sizeof(StatEntry) * ( ((StatEntry*)STATSTRPTR(stat))-StopLow ) );
+   } else {
+       while( sptr-STATPTR(stat) < stat->size && ptr-entry
+           if ( compareStatWord(sptr,*ptr,stat,txt) < 0 ) {
+               memcpy(nptr, sptr, sizeof(StatEntry));
+               sptr++;
+           } else {
+               nptr->nentry=POSDATALEN(txt,*ptr);
+               if ( nptr->nentry==0 )
+                   nptr->nentry=1; 
+               nptr->ndoc=1;
+               nptr->len=(*ptr)->len;
+               memcpy(curptr, STRPTR(txt) + (*ptr)->pos, nptr->len);
+               nptr->pos = curptr - STATSTRPTR(newstat);
+               curptr += nptr->len;
+               ptr++;
+           }
+           nptr++;
+       }
+
+       memcpy( nptr, sptr, sizeof(StatEntry)*( stat->size - (sptr-STATPTR(stat)) ) ); 
+       
+       while(ptr-entry
+           nptr->nentry=POSDATALEN(txt,*ptr);
+           if ( nptr->nentry==0 )
+               nptr->nentry=1; 
+           nptr->ndoc=1;
+           nptr->len=(*ptr)->len;
+           memcpy(curptr, STRPTR(txt) + (*ptr)->pos, nptr->len);
+           nptr->pos = curptr - STATSTRPTR(newstat);
+           curptr += nptr->len;
+           ptr++; nptr++;
+       }
+   }
+
+   return newstat;
+} 
+
+PG_FUNCTION_INFO_V1(ts_accum);
+Datum           ts_accum(PG_FUNCTION_ARGS);
+Datum 
+ts_accum(PG_FUNCTION_ARGS) {
+   tsstat *newstat,*stat= (tsstat*)PG_GETARG_POINTER(0);
+   tsvector  *txt = (tsvector *) PG_DETOAST_DATUM(PG_GETARG_DATUM(1));
+   WordEntry   **newentry=NULL;
+   uint32  len=0, cur=0;
+   StatEntry   *sptr;
+   WordEntry   *wptr;
+
+   if ( stat==NULL || PG_ARGISNULL(0) ) { /* Init in first */ 
+       stat=palloc(STATHDRSIZE);
+       stat->len=STATHDRSIZE;
+       stat->size=0;
+   }
+
+   /* simple check of correctness */
+   if ( txt==NULL || PG_ARGISNULL(1) || txt->size==0 ) {
+       PG_FREE_IF_COPY(txt,1); 
+       PG_RETURN_POINTER(stat);
+   }
+
+   sptr=STATPTR(stat);
+   wptr=ARRPTR(txt);
+
+   if ( stat->size < 100*txt->size ) { /* merge */
+       while( sptr-STATPTR(stat) < stat->size && wptr-ARRPTR(txt) < txt->size ) {
+           int cmp = compareStatWord(sptr,wptr,stat,txt);
+           if ( cmp<0 ) {
+               sptr++;
+           } else if ( cmp==0 ) {
+               int n=POSDATALEN(txt,wptr);
+   
+               if (n==0) n=1;
+               sptr->ndoc++;
+               sptr->nentry +=n ;
+               sptr++; wptr++;
+           } else {
+               if ( cur==len )
+                   newentry=SEI_realloc(newentry, &len);
+               newentry[cur]=wptr;
+               wptr++; cur++;
+           }
+       }
+
+       while( wptr-ARRPTR(txt) < txt->size ) {
+           if ( cur==len )
+               newentry=SEI_realloc(newentry, &len);
+           newentry[cur]=wptr;
+           wptr++; cur++;
+       }
+   } else { /* search */
+       while( wptr-ARRPTR(txt) < txt->size ) {
+           StatEntry *StopLow = STATPTR(stat);
+           StatEntry *StopHigh = (StatEntry*)STATSTRPTR(stat);
+           int cmp;
+
+           while (StopLow < StopHigh) {
+               sptr=StopLow + (StopHigh - StopLow) / 2;
+               cmp =  compareStatWord(sptr,wptr,stat,txt);
+               if (cmp==0) {
+                   int n=POSDATALEN(txt,wptr);
+                   if (n==0) n=1;
+                   sptr->ndoc++;
+                   sptr->nentry +=n ;
+                   break;
+               } else if ( cmp < 0 )
+                   StopLow = sptr + 1;
+               else
+                   StopHigh = sptr; 
+           }
+       
+           if ( StopLow >= StopHigh ) { /* not found */
+               if ( cur==len )
+                   newentry=SEI_realloc(newentry, &len);
+               newentry[cur]=wptr;
+               cur++;
+           }
+           wptr++;
+       }   
+   }
+
+   
+   if ( cur==0 ) { /* no new words */ 
+       PG_FREE_IF_COPY(txt,1);
+       PG_RETURN_POINTER(stat);
+   }
+
+   newstat = formstat(stat, txt, newentry, cur);
+   pfree(newentry);
+   PG_FREE_IF_COPY(txt,1);
+   /* pfree(stat); */
+
+   PG_RETURN_POINTER(newstat);
+}
+
+typedef struct {
+   uint32  cur;
+   tsvector *stat;
+} StatStorage;
+
+static void
+ts_setup_firstcall(FuncCallContext  *funcctx, tsstat *stat) {
+   TupleDesc            tupdesc;
+   MemoryContext     oldcontext;
+   StatStorage     *st;
+   
+   oldcontext = MemoryContextSwitchTo(funcctx->multi_call_memory_ctx);
+   st=palloc( sizeof(StatStorage) );
+   st->cur=0;
+   st->stat=palloc( stat->len );
+   memcpy(st->stat, stat, stat->len);
+   funcctx->user_fctx = (void*)st;
+   tupdesc = RelationNameGetTupleDesc("statinfo");
+   funcctx->slot = TupleDescGetSlot(tupdesc);
+   funcctx->attinmeta = TupleDescGetAttInMetadata(tupdesc);
+   MemoryContextSwitchTo(oldcontext);
+}
+
+
+static Datum
+ts_process_call(FuncCallContext  *funcctx) {
+   StatStorage     *st;
+   st=(StatStorage*)funcctx->user_fctx;
+
+   if ( st->cur < st->stat->size ) {
+       Datum result;
+       char* values[3];
+       char    ndoc[16];
+       char    nentry[16];
+       StatEntry *entry=STATPTR(st->stat) + st->cur;
+       HeapTuple    tuple;
+
+       values[1]=ndoc;
+       sprintf(ndoc,"%d",entry->ndoc);
+       values[2]=nentry;
+       sprintf(nentry,"%d",entry->nentry);
+       values[0]=palloc( entry->len+1 );
+       memcpy( values[0], STATSTRPTR(st->stat)+entry->pos, entry->len);
+       (values[0])[entry->len]='\0';
+
+       tuple = BuildTupleFromCStrings(funcctx->attinmeta, values);
+       result = TupleGetDatum(funcctx->slot, tuple);
+
+       pfree(values[0]);
+       st->cur++;
+       return result;  
+   } else {
+       pfree(st->stat);
+       pfree(st);
+   }
+   
+   return (Datum)0;
+}
+
+PG_FUNCTION_INFO_V1(ts_accum_finish);
+Datum           ts_accum_finish(PG_FUNCTION_ARGS);
+Datum 
+ts_accum_finish(PG_FUNCTION_ARGS) {
+   FuncCallContext  *funcctx;
+   Datum result;
+
+   if (SRF_IS_FIRSTCALL()) {
+       funcctx = SRF_FIRSTCALL_INIT();
+       ts_setup_firstcall(funcctx, (tsstat*)PG_GETARG_POINTER(0) );
+   }
+
+   funcctx = SRF_PERCALL_SETUP();
+   if (  (result=ts_process_call(funcctx)) != (Datum)0 )
+       SRF_RETURN_NEXT(funcctx, result);
+   SRF_RETURN_DONE(funcctx);
+}
+
+static Oid tiOid=InvalidOid;
+static void 
+get_ti_Oid(void) {
+   int ret;
+   bool isnull; 
+
+   if ( (ret = SPI_exec("select oid from pg_type where typname='tsvector'",1)) < 0 )   
+       elog(ERROR, "SPI_exec to get tsvector oid returns %d", ret);
+
+   if ( SPI_processed<0 )
+       elog(ERROR, "There is no tsvector type");
+   tiOid = DatumGetObjectId( SPI_getbinval(SPI_tuptable->vals[0], SPI_tuptable->tupdesc, 1, &isnull) );
+   if ( tiOid==InvalidOid )
+       elog(ERROR, "tsvector type has InvalidOid");
+}
+
+static tsstat*
+ts_stat_sql(text *txt) {
+   char *query=text2char(txt);
+   int i;
+   tsstat *newstat,*stat;
+   bool isnull;
+   Portal portal;
+   void    *plan;
+
+   if ( tiOid==InvalidOid ) 
+       get_ti_Oid();
+
+   if ( (plan = SPI_prepare(query,0,NULL))==NULL )
+       elog(ERROR, "SPI_prepare('%s') returns NULL",query);
+
+   if ( (portal = SPI_cursor_open(NULL, plan, NULL, NULL)) == NULL )
+       elog(ERROR, "SPI_cursor_open('%s') returns NULL",query);
+
+   SPI_cursor_fetch(portal, true, 100);
+
+   if ( SPI_tuptable->tupdesc->natts != 1 )
+       elog(ERROR, "Number of fields doesn't equal to 1");
+
+   if ( SPI_gettypeid(SPI_tuptable->tupdesc, 1) != tiOid )
+       elog(ERROR, "Column isn't of tsvector type");
+
+   stat=palloc(STATHDRSIZE);
+   stat->len=STATHDRSIZE;
+   stat->size=0;
+
+   while(SPI_processed>0) {
+       for(i=0;i
+           Datum data=SPI_getbinval(SPI_tuptable->vals[i], SPI_tuptable->tupdesc, 1, &isnull);
+
+           if ( !isnull ) {
+               newstat = (tsstat*)DatumGetPointer(DirectFunctionCall2(
+                   ts_accum,
+                   PointerGetDatum(stat),
+                   data
+               ));
+               if ( stat!=newstat && stat )
+                   pfree(stat);
+               stat=newstat;
+           }
+       } 
+
+       SPI_freetuptable(SPI_tuptable);
+       SPI_cursor_fetch(portal, true, 100);        
+   }   
+
+   SPI_freetuptable(SPI_tuptable);
+   SPI_cursor_close(portal);
+   SPI_freeplan(plan);
+   pfree(query);
+
+   return stat;    
+}
+
+PG_FUNCTION_INFO_V1(ts_stat);
+Datum           ts_stat(PG_FUNCTION_ARGS);
+Datum 
+ts_stat(PG_FUNCTION_ARGS) {
+   FuncCallContext  *funcctx;
+   Datum result;
+
+   if (SRF_IS_FIRSTCALL()) {
+       tsstat *stat;
+       text    *txt=PG_GETARG_TEXT_P(0);
+   
+       funcctx = SRF_FIRSTCALL_INIT();
+       SPI_connect();
+       stat = ts_stat_sql(txt);
+       PG_FREE_IF_COPY(txt,0); 
+       ts_setup_firstcall(funcctx, stat );
+       SPI_finish();
+   }
+
+   funcctx = SRF_PERCALL_SETUP();
+   if (  (result=ts_process_call(funcctx)) != (Datum)0 )
+       SRF_RETURN_NEXT(funcctx, result);
+   SRF_RETURN_DONE(funcctx);
+}
+
+


diff --git a/contrib/tsearch2/ts_stat.h b/contrib/tsearch2/ts_stat.h

new file mode 100644 (file)

index 0000000..c32b17a


--- /dev/null
+++ b/contrib/tsearch2/ts_stat.h
@@ -0,0 +1,32 @@
+#ifndef __TXTIDX_STAT_H__
+#define __TXTIDX_STAT_H__
+
+#include "postgres.h"
+
+#include "access/gist.h"
+#include "access/itup.h"
+#include "utils/elog.h"
+#include "utils/palloc.h"
+#include "utils/builtins.h"
+#include "storage/bufpage.h"
+
+typedef struct {
+   uint32  len;
+   uint32  pos;
+   uint32  ndoc;   
+   uint32  nentry; 
+}  StatEntry;
+
+typedef struct {
+   int4        len;
+   int4        size;
+   char        data[1];
+}  tsstat;
+
+#define STATHDRSIZE (sizeof(int4)*2)
+#define CALCSTATSIZE(x, lenstr) ( x * sizeof(StatEntry) + STATHDRSIZE + lenstr )
+#define STATPTR(x) ( (StatEntry*) ( (char*)x + STATHDRSIZE ) )
+#define STATSTRPTR(x)  ( (char*)x + STATHDRSIZE + ( sizeof(StatEntry) * ((tsvector*)x)->size ) )
+#define STATSTRSIZE(x) ( ((tsvector*)x)->len - STATHDRSIZE - ( sizeof(StatEntry) * ((tsvector*)x)->size ) )
+
+#endif


diff --git a/contrib/tsearch2/tsearch.sql._in b/contrib/tsearch2/tsearch.sql._in

new file mode 100644 (file)

index 0000000..91ffbc8


--- /dev/null
+++ b/contrib/tsearch2/tsearch.sql._in
@@ -0,0 +1,674 @@
+-- Adjust this setting to control where the objects get CREATEd.
+SET search_path = public;
+
+BEGIN;
+
+--dict conf
+CREATE TABLE pg_ts_dict (
+   dict_name   text not null primary key,
+   dict_init   oid,
+   dict_initoption text,
+   dict_lexize oid not null,
+   dict_comment    text
+) with oids;
+
+--dict interface
+CREATE FUNCTION lexize(oid, text) 
+   returns _text
+   as 'MODULE_PATHNAME'
+   language 'C'
+   with (isstrict);
+
+CREATE FUNCTION lexize(text, text)
+        returns _text
+        as 'MODULE_PATHNAME', 'lexize_byname'
+        language 'C'
+        with (isstrict);
+
+CREATE FUNCTION lexize(text)
+        returns _text
+        as 'MODULE_PATHNAME', 'lexize_bycurrent'
+        language 'C'
+        with (isstrict);
+
+CREATE FUNCTION set_curdict(int)
+   returns void
+   as 'MODULE_PATHNAME'
+   language 'C'
+   with (isstrict);
+
+CREATE FUNCTION set_curdict(text)
+   returns void
+   as 'MODULE_PATHNAME', 'set_curdict_byname'
+   language 'C'
+   with (isstrict);
+
+--built-in dictionaries
+CREATE FUNCTION dex_init(text)
+   returns internal
+   as 'MODULE_PATHNAME' 
+   language 'C';
+
+CREATE FUNCTION dex_lexize(internal,internal,int4)
+   returns internal
+   as 'MODULE_PATHNAME'
+   language 'C'
+   with (isstrict);
+
+insert into pg_ts_dict select 
+   'simple', 
+   (select oid from pg_proc where proname='dex_init'),
+   null,
+   (select oid from pg_proc where proname='dex_lexize'),
+   'Simple example of dictionary.'
+;
+    
+CREATE FUNCTION snb_en_init(text)
+   returns internal
+   as 'MODULE_PATHNAME' 
+   language 'C';
+
+CREATE FUNCTION snb_lexize(internal,internal,int4)
+   returns internal
+   as 'MODULE_PATHNAME'
+   language 'C'
+   with (isstrict);
+
+insert into pg_ts_dict select 
+   'en_stem', 
+   (select oid from pg_proc where proname='snb_en_init'),
+   'DATA_PATH/english.stop',
+   (select oid from pg_proc where proname='snb_lexize'),
+   'English Stemmer. Snowball.'
+;
+
+CREATE FUNCTION snb_ru_init(text)
+   returns internal
+   as 'MODULE_PATHNAME' 
+   language 'C';
+
+insert into pg_ts_dict select 
+   'ru_stem', 
+   (select oid from pg_proc where proname='snb_ru_init'),
+   'DATA_PATH/russian.stop',
+   (select oid from pg_proc where proname='snb_lexize'),
+   'Russian Stemmer. Snowball.'
+;
+    
+CREATE FUNCTION spell_init(text)
+   returns internal
+   as 'MODULE_PATHNAME' 
+   language 'C';
+
+CREATE FUNCTION spell_lexize(internal,internal,int4)
+   returns internal
+   as 'MODULE_PATHNAME'
+   language 'C'
+   with (isstrict);
+
+insert into pg_ts_dict select 
+   'ispell_template', 
+   (select oid from pg_proc where proname='spell_init'),
+   null,
+   (select oid from pg_proc where proname='spell_lexize'),
+   'ISpell interface. Must have .dict and .aff files'
+;
+
+CREATE FUNCTION syn_init(text)
+   returns internal
+   as 'MODULE_PATHNAME' 
+   language 'C';
+
+CREATE FUNCTION syn_lexize(internal,internal,int4)
+   returns internal
+   as 'MODULE_PATHNAME'
+   language 'C'
+   with (isstrict);
+
+insert into pg_ts_dict select 
+   'synonym', 
+   (select oid from pg_proc where proname='syn_init'),
+   null,
+   (select oid from pg_proc where proname='syn_lexize'),
+   'Example of synonym dictionary'
+;
+
+--dict conf
+CREATE TABLE pg_ts_parser (
+   prs_name    text not null primary key,
+   prs_start   oid not null,
+   prs_nexttoken   oid not null,
+   prs_end     oid not null,
+   prs_headline    oid not null,
+   prs_lextype oid not null,
+   prs_comment text
+) with oids;
+
+--sql-level interface
+CREATE TYPE tokentype 
+   as (tokid int4, alias text, descr text); 
+
+CREATE FUNCTION token_type(int4)
+   returns setof tokentype
+   as 'MODULE_PATHNAME'
+   language 'C'
+   with (isstrict);
+
+CREATE FUNCTION token_type(text)
+   returns setof tokentype
+   as 'MODULE_PATHNAME', 'token_type_byname'
+   language 'C'
+   with (isstrict);
+
+CREATE FUNCTION token_type()
+   returns setof tokentype
+   as 'MODULE_PATHNAME', 'token_type_current'
+   language 'C'
+   with (isstrict);
+
+CREATE FUNCTION set_curprs(int)
+   returns void
+   as 'MODULE_PATHNAME'
+   language 'C'
+   with (isstrict);
+
+CREATE FUNCTION set_curprs(text)
+   returns void
+   as 'MODULE_PATHNAME', 'set_curprs_byname'
+   language 'C'
+   with (isstrict);
+
+CREATE TYPE tokenout 
+   as (tokid int4, token text);
+
+CREATE FUNCTION parse(oid,text)
+   returns setof tokenout
+   as 'MODULE_PATHNAME'
+   language 'C'
+   with (isstrict);
+ 
+CREATE FUNCTION parse(text,text)
+   returns setof tokenout
+   as 'MODULE_PATHNAME', 'parse_byname'
+   language 'C'
+   with (isstrict);
+ 
+CREATE FUNCTION parse(text)
+   returns setof tokenout
+   as 'MODULE_PATHNAME', 'parse_current'
+   language 'C'
+   with (isstrict);
+ 
+--default parser
+CREATE FUNCTION prsd_start(internal,int4)
+   returns internal
+   as 'MODULE_PATHNAME'
+   language 'C';
+
+CREATE FUNCTION prsd_getlexeme(internal,internal,internal)
+   returns int4
+   as 'MODULE_PATHNAME'
+   language 'C';
+
+CREATE FUNCTION prsd_end(internal)
+   returns void
+   as 'MODULE_PATHNAME'
+   language 'C';
+
+CREATE FUNCTION prsd_lextype(internal)
+   returns internal
+   as 'MODULE_PATHNAME'
+   language 'C';
+
+CREATE FUNCTION prsd_headline(internal,internal,internal)
+   returns internal
+   as 'MODULE_PATHNAME'
+   language 'C';
+
+insert into pg_ts_parser select
+   'default',
+   (select oid from pg_proc where proname='prsd_start'),   
+   (select oid from pg_proc where proname='prsd_getlexeme'),   
+   (select oid from pg_proc where proname='prsd_end'), 
+   (select oid from pg_proc where proname='prsd_headline'),
+   (select oid from pg_proc where proname='prsd_lextype'),
+   'Parser from OpenFTS v0.34'
+;  
+
+--tsearch config
+
+CREATE TABLE pg_ts_cfg (
+   ts_name     text not null primary key,
+   prs_name    text not null,
+   locale      text
+) with oids;
+
+CREATE TABLE pg_ts_cfgmap (
+   ts_name     text not null,
+   tok_alias   text not null,
+   dict_name   text[],
+   primary key (ts_name,tok_alias)
+) with oids;
+
+CREATE FUNCTION set_curcfg(int)
+   returns void
+   as 'MODULE_PATHNAME'
+   language 'C'
+   with (isstrict);
+
+CREATE FUNCTION set_curcfg(text)
+   returns void
+   as 'MODULE_PATHNAME', 'set_curcfg_byname'
+   language 'C'
+   with (isstrict);
+
+CREATE FUNCTION show_curcfg()
+   returns oid
+   as 'MODULE_PATHNAME'
+   language 'C'
+   with (isstrict);
+
+insert into pg_ts_cfg values ('default', 'default','C');
+insert into pg_ts_cfg values ('default_russian', 'default','ru_RU.KOI8-R');
+insert into pg_ts_cfg values ('simple', 'default');
+
+copy pg_ts_cfgmap from stdin;
+default    lword   {en_stem}
+default    nlword  {simple}
+default    word    {simple}
+default    email   {simple}
+default    url {simple}
+default    host    {simple}
+default    sfloat  {simple}
+default    version {simple}
+default    part_hword  {simple}
+default    nlpart_hword    {simple}
+default    lpart_hword {en_stem}
+default    hword   {simple}
+default    lhword  {en_stem}
+default    nlhword {simple}
+default    uri {simple}
+default    file    {simple}
+default    float   {simple}
+default    int {simple}
+default    uint    {simple}
+default_russian    lword   {en_stem}
+default_russian    nlword  {ru_stem}
+default_russian    word    {ru_stem}
+default_russian    email   {simple}
+default_russian    url {simple}
+default_russian    host    {simple}
+default_russian    sfloat  {simple}
+default_russian    version {simple}
+default_russian    part_hword  {simple}
+default_russian    nlpart_hword    {ru_stem}
+default_russian    lpart_hword {en_stem}
+default_russian    hword   {ru_stem}
+default_russian    lhword  {en_stem}
+default_russian    nlhword {ru_stem}
+default_russian    uri {simple}
+default_russian    file    {simple}
+default_russian    float   {simple}
+default_russian    int {simple}
+default_russian    uint    {simple}
+simple lword   {simple}
+simple nlword  {simple}
+simple word    {simple}
+simple email   {simple}
+simple url {simple}
+simple host    {simple}
+simple sfloat  {simple}
+simple version {simple}
+simple part_hword  {simple}
+simple nlpart_hword    {simple}
+simple lpart_hword {simple}
+simple hword   {simple}
+simple lhword  {simple}
+simple nlhword {simple}
+simple uri {simple}
+simple file    {simple}
+simple float   {simple}
+simple int {simple}
+simple uint    {simple}
+\.
+
+--tsvector type
+CREATE FUNCTION tsvector_in(cstring)
+RETURNS tsvector
+AS 'MODULE_PATHNAME'
+LANGUAGE 'C' with (isstrict);
+
+CREATE FUNCTION tsvector_out(tsvector)
+RETURNS cstring
+AS 'MODULE_PATHNAME'
+LANGUAGE 'C' with (isstrict);
+
+CREATE TYPE tsvector (
+        INTERNALLENGTH = -1,
+        INPUT = tsvector_in,
+        OUTPUT = tsvector_out,
+        STORAGE = extended
+);
+
+CREATE FUNCTION length(tsvector)
+RETURNS int4
+AS 'MODULE_PATHNAME', 'tsvector_length'
+LANGUAGE 'C' with (isstrict,iscachable);
+
+CREATE FUNCTION to_tsvector(oid, text)
+RETURNS tsvector
+AS 'MODULE_PATHNAME'
+LANGUAGE 'C' with (isstrict,iscachable);
+
+CREATE FUNCTION to_tsvector(text, text)
+RETURNS tsvector
+AS 'MODULE_PATHNAME', 'to_tsvector_name'
+LANGUAGE 'C' with (isstrict,iscachable);
+
+CREATE FUNCTION to_tsvector(text)
+RETURNS tsvector
+AS 'MODULE_PATHNAME', 'to_tsvector_current'
+LANGUAGE 'C' with (isstrict,iscachable);
+
+CREATE FUNCTION strip(tsvector)
+RETURNS tsvector
+AS 'MODULE_PATHNAME'
+LANGUAGE 'C' with (isstrict,iscachable);
+
+CREATE FUNCTION setweight(tsvector,"char")
+RETURNS tsvector
+AS 'MODULE_PATHNAME'
+LANGUAGE 'C' with (isstrict,iscachable);
+
+CREATE FUNCTION concat(tsvector,tsvector)
+RETURNS tsvector
+AS 'MODULE_PATHNAME'
+LANGUAGE 'C' with (isstrict,iscachable);
+
+CREATE OPERATOR || (
+        LEFTARG = tsvector,
+        RIGHTARG = tsvector,
+        PROCEDURE = concat
+);
+
+--query type
+CREATE FUNCTION tsquery_in(cstring)
+RETURNS tsquery
+AS 'MODULE_PATHNAME'
+LANGUAGE 'C' with (isstrict);
+
+CREATE FUNCTION tsquery_out(tsquery)
+RETURNS cstring
+AS 'MODULE_PATHNAME'
+LANGUAGE 'C' with (isstrict);
+
+CREATE TYPE tsquery (
+        INTERNALLENGTH = -1,
+        INPUT = tsquery_in,
+        OUTPUT = tsquery_out
+);
+
+CREATE FUNCTION querytree(tsquery)
+RETURNS text
+AS 'MODULE_PATHNAME', 'tsquerytree'
+LANGUAGE 'C' with (isstrict);
+
+CREATE FUNCTION to_tsquery(oid, text)
+RETURNS tsquery
+AS 'MODULE_PATHNAME'
+LANGUAGE 'c' with (isstrict,iscachable);
+
+CREATE FUNCTION to_tsquery(text, text)
+RETURNS tsquery
+AS 'MODULE_PATHNAME','to_tsquery_name'
+LANGUAGE 'c' with (isstrict,iscachable);
+
+CREATE FUNCTION to_tsquery(text)
+RETURNS tsquery
+AS 'MODULE_PATHNAME','to_tsquery_current'
+LANGUAGE 'c' with (isstrict,iscachable);
+
+--operations
+CREATE FUNCTION exectsq(tsvector, tsquery)
+RETURNS bool
+AS 'MODULE_PATHNAME'
+LANGUAGE 'C' with (isstrict, iscachable);
+  
+COMMENT ON FUNCTION exectsq(tsvector, tsquery) IS 'boolean operation with text index';
+
+CREATE FUNCTION rexectsq(tsquery, tsvector)
+RETURNS bool
+AS 'MODULE_PATHNAME'
+LANGUAGE 'C' with (isstrict, iscachable);
+
+COMMENT ON FUNCTION rexectsq(tsquery, tsvector) IS 'boolean operation with text index';
+
+CREATE OPERATOR @@ (
+        LEFTARG = tsvector,
+        RIGHTARG = tsquery,
+        PROCEDURE = exectsq,
+        COMMUTATOR = '@@',
+        RESTRICT = contsel,
+        JOIN = contjoinsel
+);
+CREATE OPERATOR @@ (
+        LEFTARG = tsquery,
+        RIGHTARG = tsvector,
+        PROCEDURE = rexectsq,
+        COMMUTATOR = '@@',
+        RESTRICT = contsel,
+        JOIN = contjoinsel
+);
+
+--Trigger
+CREATE FUNCTION tsearch2()
+RETURNS trigger
+AS 'MODULE_PATHNAME'
+LANGUAGE 'C';
+
+--Relevation
+CREATE FUNCTION rank(float4[], tsvector, tsquery)
+RETURNS float4
+AS 'MODULE_PATHNAME'
+LANGUAGE 'C' WITH (isstrict, iscachable);
+
+CREATE FUNCTION rank(float4[], tsvector, tsquery, int4)
+RETURNS float4
+AS 'MODULE_PATHNAME'
+LANGUAGE 'C' WITH (isstrict, iscachable);
+
+CREATE FUNCTION rank(tsvector, tsquery)
+RETURNS float4
+AS 'MODULE_PATHNAME', 'rank_def'
+LANGUAGE 'C' WITH (isstrict, iscachable);
+
+CREATE FUNCTION rank(tsvector, tsquery, int4)
+RETURNS float4
+AS 'MODULE_PATHNAME', 'rank_def'
+LANGUAGE 'C' WITH (isstrict, iscachable);
+
+CREATE FUNCTION rank_cd(int4, tsvector, tsquery)
+RETURNS float4
+AS 'MODULE_PATHNAME'
+LANGUAGE 'C' WITH (isstrict, iscachable);
+
+CREATE FUNCTION rank_cd(int4, tsvector, tsquery, int4)
+RETURNS float4
+AS 'MODULE_PATHNAME'
+LANGUAGE 'C' WITH (isstrict, iscachable);
+
+CREATE FUNCTION rank_cd(tsvector, tsquery)
+RETURNS float4
+AS 'MODULE_PATHNAME', 'rank_cd_def'
+LANGUAGE 'C' WITH (isstrict, iscachable);
+
+CREATE FUNCTION rank_cd(tsvector, tsquery, int4)
+RETURNS float4
+AS 'MODULE_PATHNAME', 'rank_cd_def'
+LANGUAGE 'C' WITH (isstrict, iscachable);
+
+CREATE FUNCTION headline(oid, text, tsquery, text)
+RETURNS text
+AS 'MODULE_PATHNAME', 'headline'
+LANGUAGE 'C' WITH (isstrict, iscachable);
+
+CREATE FUNCTION headline(oid, text, tsquery)
+RETURNS text
+AS 'MODULE_PATHNAME', 'headline'
+LANGUAGE 'C' WITH (isstrict, iscachable);
+
+CREATE FUNCTION headline(text, text, tsquery, text)
+RETURNS text
+AS 'MODULE_PATHNAME', 'headline_byname'
+LANGUAGE 'C' WITH (isstrict, iscachable);
+
+CREATE FUNCTION headline(text, text, tsquery)
+RETURNS text
+AS 'MODULE_PATHNAME', 'headline_byname'
+LANGUAGE 'C' WITH (isstrict, iscachable);
+
+CREATE FUNCTION headline(text, tsquery, text)
+RETURNS text
+AS 'MODULE_PATHNAME', 'headline_current'
+LANGUAGE 'C' WITH (isstrict, iscachable);
+
+CREATE FUNCTION headline(text, tsquery)
+RETURNS text
+AS 'MODULE_PATHNAME', 'headline_current'
+LANGUAGE 'C' WITH (isstrict, iscachable);
+
+--GiST
+--GiST key type 
+CREATE FUNCTION gtsvector_in(cstring)
+RETURNS gtsvector
+AS 'MODULE_PATHNAME'
+LANGUAGE 'C' with (isstrict);
+
+CREATE FUNCTION gtsvector_out(gtsvector)
+RETURNS cstring
+AS 'MODULE_PATHNAME'
+LANGUAGE 'C' with (isstrict);
+
+CREATE TYPE gtsvector (
+        INTERNALLENGTH = -1,
+        INPUT = gtsvector_in,
+        OUTPUT = gtsvector_out
+);
+
+-- support FUNCTIONs
+CREATE FUNCTION gtsvector_consistent(gtsvector,internal,int4)
+RETURNS bool
+AS 'MODULE_PATHNAME'
+LANGUAGE 'C';
+  
+CREATE FUNCTION gtsvector_compress(internal)
+RETURNS internal
+AS 'MODULE_PATHNAME'
+LANGUAGE 'C';
+
+CREATE FUNCTION gtsvector_decompress(internal)
+RETURNS internal
+AS 'MODULE_PATHNAME'
+LANGUAGE 'C';
+
+CREATE FUNCTION gtsvector_penalty(internal,internal,internal)
+RETURNS internal
+AS 'MODULE_PATHNAME'
+LANGUAGE 'C' with (isstrict);
+
+CREATE FUNCTION gtsvector_picksplit(internal, internal)
+RETURNS internal
+AS 'MODULE_PATHNAME'
+LANGUAGE 'C';
+
+CREATE FUNCTION gtsvector_union(bytea, internal)
+RETURNS _int4
+AS 'MODULE_PATHNAME'
+LANGUAGE 'C';
+
+CREATE FUNCTION gtsvector_same(gtsvector, gtsvector, internal)
+RETURNS internal
+AS 'MODULE_PATHNAME'
+LANGUAGE 'C';
+
+-- CREATE the OPERATOR class
+CREATE OPERATOR CLASS gist_tsvector_ops
+DEFAULT FOR TYPE tsvector USING gist
+AS
+        OPERATOR        1       @@ (tsvector, tsquery)  RECHECK ,
+        FUNCTION        1       gtsvector_consistent (gtsvector, internal, int4),
+        FUNCTION        2       gtsvector_union (bytea, internal),
+        FUNCTION        3       gtsvector_compress (internal),
+        FUNCTION        4       gtsvector_decompress (internal),
+        FUNCTION        5       gtsvector_penalty (internal, internal, internal),
+        FUNCTION        6       gtsvector_picksplit (internal, internal),
+        FUNCTION        7       gtsvector_same (gtsvector, gtsvector, internal),
+        STORAGE         gtsvector;
+
+
+--stat info
+CREATE TYPE statinfo 
+   as (word text, ndoc int4, nentry int4);
+
+--REATE FUNCTION tsstat_in(cstring)
+--RETURNS tsstat
+--AS 'MODULE_PATHNAME'
+--LANGUAGE 'C' with (isstrict);
+--
+--CREATE FUNCTION tsstat_out(tsstat)
+--RETURNS cstring
+--AS 'MODULE_PATHNAME'
+--LANGUAGE 'C' with (isstrict);
+--
+--CREATE TYPE tsstat (
+--        INTERNALLENGTH = -1,
+--        INPUT = tsstat_in,
+--        OUTPUT = tsstat_out,
+--        STORAGE = plain
+--);
+--
+--CREATE FUNCTION ts_accum(tsstat,tsvector)
+--RETURNS tsstat
+--AS 'MODULE_PATHNAME'
+--LANGUAGE 'C' with (isstrict);
+--
+--CREATE FUNCTION ts_accum_finish(tsstat)
+-- returns setof statinfo
+-- as 'MODULE_PATHNAME'
+-- language 'C'
+-- with (isstrict);
+--
+--CREATE AGGREGATE stat (
+-- BASETYPE=tsvector,
+-- SFUNC=ts_accum,
+-- STYPE=tsstat,
+-- FINALFUNC = ts_accum_finish,
+-- initcond = ''
+--); 
+
+CREATE FUNCTION stat(text)
+   returns setof statinfo
+   as 'MODULE_PATHNAME', 'ts_stat'
+   language 'C'
+   with (isstrict);
+
+--reset - just for debuging
+CREATE FUNCTION reset_tsearch()
+        returns void
+        as 'MODULE_PATHNAME'
+        language 'C'
+        with (isstrict);
+
+--get cover (debug for rank_cd)
+CREATE FUNCTION get_covers(tsvector,tsquery)
+        returns text
+        as 'MODULE_PATHNAME'
+        language 'C'
+        with (isstrict);
+
+
+--example of ISpell dictionary
+--update pg_ts_dict set dict_initoption='DictFile="/usr/local/share/ispell/russian.dict" ,AffFile ="/usr/local/share/ispell/russian.aff", StopFile="/usr/local/share/ispell/russian.stop"' where dict_id=4;
+--example of synonym dict
+--update pg_ts_dict set dict_initoption='/usr/local/share/ispell/english.syn' where dict_id=5;
+END;


diff --git a/contrib/tsearch2/tsvector.c b/contrib/tsearch2/tsvector.c

new file mode 100644 (file)

index 0000000..ff0794d


--- /dev/null
+++ b/contrib/tsearch2/tsvector.c
@@ -0,0 +1,804 @@
+/*
+ * In/Out definitions for tsvector type
+ * Internal structure:
+ * string of values, array of position lexem in string and it's length
+ * Teodor Sigaev 
+ */
+#include "postgres.h"
+
+#include "access/gist.h"
+#include "access/itup.h"
+#include "utils/elog.h"
+#include "utils/palloc.h"
+#include "utils/builtins.h"
+#include "storage/bufpage.h"
+#include "executor/spi.h"
+#include "commands/trigger.h"
+#include "nodes/pg_list.h"
+#include "catalog/namespace.h"
+
+#include "utils/pg_locale.h"
+
+#include              /* tolower */
+#include "tsvector.h"
+#include "query.h"
+#include "ts_cfg.h"
+#include "common.h"
+
+PG_FUNCTION_INFO_V1(tsvector_in);
+Datum      tsvector_in(PG_FUNCTION_ARGS);
+
+PG_FUNCTION_INFO_V1(tsvector_out);
+Datum      tsvector_out(PG_FUNCTION_ARGS);
+
+PG_FUNCTION_INFO_V1(to_tsvector);
+Datum      to_tsvector(PG_FUNCTION_ARGS);
+PG_FUNCTION_INFO_V1(to_tsvector_current);
+Datum      to_tsvector_current(PG_FUNCTION_ARGS);
+PG_FUNCTION_INFO_V1(to_tsvector_name);
+Datum      to_tsvector_name(PG_FUNCTION_ARGS);
+
+PG_FUNCTION_INFO_V1(tsearch2);
+Datum      tsearch2(PG_FUNCTION_ARGS);
+
+PG_FUNCTION_INFO_V1(tsvector_length);
+Datum      tsvector_length(PG_FUNCTION_ARGS);
+
+/*
+ * in/out text index type
+ */
+static int 
+comparePos(const void *a, const void *b) {
+   if ( ((WordEntryPos *) a)->pos == ((WordEntryPos *) b)->pos )
+       return 1;
+   return ( ((WordEntryPos *) a)->pos > ((WordEntryPos *) b)->pos ) ? 1 : -1;
+}
+
+static int
+uniquePos(WordEntryPos *a, int4 l) {
+   WordEntryPos *ptr, *res;
+
+   res=a;
+   if (l==1)
+       return l;
+
+   qsort((void *) a, l, sizeof(WordEntryPos), comparePos);
+
+   ptr = a + 1;
+   while (ptr - a < l) {
+       if ( ptr->pos != res->pos ) {
+           res++;
+           res->pos = ptr->pos;
+           res->weight = ptr->weight;
+           if ( res-a >= MAXNUMPOS-1 || res->pos == MAXENTRYPOS-1 )
+               break;
+       } else if ( ptr->weight > res->weight )
+           res->weight = ptr->weight;
+       ptr++;
+   }
+   return res + 1 - a;
+}
+
+static char *BufferStr;
+static int
+compareentry(const void *a, const void *b)
+{
+   if ( ((WordEntryIN *) a)->entry.len == ((WordEntryIN *) b)->entry.len)
+   {
+       return strncmp(
+                      &BufferStr[((WordEntryIN *) a)->entry.pos],
+                      &BufferStr[((WordEntryIN *) b)->entry.pos],
+                      ((WordEntryIN *) a)->entry.len);
+   }
+   return ( ((WordEntryIN *) a)->entry.len > ((WordEntryIN *) b)->entry.len ) ? 1 : -1;
+}
+
+static int
+uniqueentry(WordEntryIN * a, int4 l, char *buf, int4 *outbuflen)
+{
+   WordEntryIN  *ptr,
+              *res;
+
+   res = a;
+   if (l == 1) {
+       if ( a->entry.haspos ) {
+           *(uint16*)(a->pos) = uniquePos( &(a->pos[1]), *(uint16*)(a->pos));
+           *outbuflen = SHORTALIGN(res->entry.len) + (*(uint16*)(a->pos) +1 )*sizeof(WordEntryPos);
+       }
+       return l;
+   }
+
+   ptr = a + 1;
+   BufferStr = buf;
+   qsort((void *) a, l, sizeof(WordEntryIN), compareentry);
+
+   while (ptr - a < l)
+   {
+       if (!(ptr->entry.len == res->entry.len &&
+             strncmp(&buf[ptr->entry.pos], &buf[res->entry.pos], res->entry.len) == 0))
+       {
+           if ( res->entry.haspos ) {
+               *(uint16*)(res->pos) = uniquePos( &(res->pos[1]), *(uint16*)(res->pos));
+               *outbuflen += *(uint16*)(res->pos) * sizeof(WordEntryPos);
+           }
+           *outbuflen += SHORTALIGN(res->entry.len);
+           res++;
+           memcpy(res,ptr,sizeof(WordEntryIN));
+       } else if ( ptr->entry.haspos ){
+           if ( res->entry.haspos ) {
+               int4 len=*(uint16*)(ptr->pos) + 1 + *(uint16*)(res->pos);
+               res->pos=(WordEntryPos*)repalloc( res->pos, len*sizeof(WordEntryPos));
+               memcpy( &(res->pos[ *(uint16*)(res->pos) + 1 ]), 
+                   &(ptr->pos[1]), *(uint16*)(ptr->pos) * sizeof(WordEntryPos));
+               *(uint16*)(res->pos) += *(uint16*)(ptr->pos);
+               pfree( ptr->pos );
+           } else {
+               res->entry.haspos=1;
+               res->pos = ptr->pos;
+           }
+       }
+       ptr++;
+   }
+   if ( res->entry.haspos ) {
+       *(uint16*)(res->pos) = uniquePos( &(res->pos[1]), *(uint16*)(res->pos));
+       *outbuflen += *(uint16*)(res->pos) * sizeof(WordEntryPos);
+   }
+   *outbuflen += SHORTALIGN(res->entry.len);
+
+   return res + 1 - a;
+}
+
+#define WAITWORD   1
+#define WAITENDWORD 2
+#define WAITNEXTCHAR   3
+#define WAITENDCMPLX   4
+#define WAITPOSINFO    5
+#define INPOSINFO  6
+#define WAITPOSDELIM   7
+
+#define RESIZEPRSBUF \
+do { \
+   if ( state->curpos - state->word + 1 >= state->len ) \
+   { \
+       int4 clen = state->curpos - state->word; \
+       state->len *= 2; \
+       state->word = (char*)repalloc( (void*)state->word, state->len ); \
+       state->curpos = state->word + clen; \
+   } \
+} while (0)
+
+int4
+gettoken_tsvector(TI_IN_STATE * state)
+{
+   int4        oldstate = 0;
+
+   state->curpos = state->word;
+   state->state = WAITWORD;
+   state->alen=0;
+
+   while (1)
+   {
+       if (state->state == WAITWORD)
+       {
+           if (*(state->prsbuf) == '\0')
+               return 0;
+           else if (*(state->prsbuf) == '\'')
+               state->state = WAITENDCMPLX;
+           else if (*(state->prsbuf) == '\\')
+           {
+               state->state = WAITNEXTCHAR;
+               oldstate = WAITENDWORD;
+           }
+           else if (state->oprisdelim && ISOPERATOR(*(state->prsbuf)))
+               elog(ERROR, "Syntax error");
+           else if (*(state->prsbuf) != ' ')
+           {
+               *(state->curpos) = *(state->prsbuf);
+               state->curpos++;
+               state->state = WAITENDWORD;
+           }
+       }
+       else if (state->state == WAITNEXTCHAR)
+       {
+           if (*(state->prsbuf) == '\0')
+               elog(ERROR, "There is no escaped character");
+           else
+           {
+               RESIZEPRSBUF;
+               *(state->curpos) = *(state->prsbuf);
+               state->curpos++;
+               state->state = oldstate;
+           }
+       }
+       else if (state->state == WAITENDWORD)
+       {
+           if (*(state->prsbuf) == '\\')
+           {
+               state->state = WAITNEXTCHAR;
+               oldstate = WAITENDWORD;
+           }
+           else if (*(state->prsbuf) == ' ' || *(state->prsbuf) == '\0' ||
+                    (state->oprisdelim && ISOPERATOR(*(state->prsbuf))))
+           {
+               RESIZEPRSBUF;
+               if (state->curpos == state->word)
+                   elog(ERROR, "Syntax error");
+               *(state->curpos) = '\0';
+               return 1; 
+           } else if ( *(state->prsbuf) == ':' ) {
+               if (state->curpos == state->word)
+                   elog(ERROR, "Syntax error");
+               *(state->curpos) = '\0';
+               if ( state->oprisdelim )
+                   return 1;
+               else
+                   state->state = INPOSINFO;
+           }
+           else
+           {
+               RESIZEPRSBUF;
+               *(state->curpos) = *(state->prsbuf);
+               state->curpos++;
+           }
+       }
+       else if (state->state == WAITENDCMPLX)
+       {
+           if (*(state->prsbuf) == '\'')
+           {
+               RESIZEPRSBUF;
+               *(state->curpos) = '\0';
+               if (state->curpos == state->word)
+                   elog(ERROR, "Syntax error");
+               if ( state->oprisdelim ) {
+                   state->prsbuf++;
+                   return 1;
+               } else
+                   state->state = WAITPOSINFO;
+           }
+           else if (*(state->prsbuf) == '\\')
+           {
+               state->state = WAITNEXTCHAR;
+               oldstate = WAITENDCMPLX;
+           }
+           else if (*(state->prsbuf) == '\0')
+               elog(ERROR, "Syntax error");
+           else
+           {
+               RESIZEPRSBUF;
+               *(state->curpos) = *(state->prsbuf);
+               state->curpos++;
+           }
+       } else if (state->state == WAITPOSINFO) {
+           if ( *(state->prsbuf) == ':' )
+               state->state=INPOSINFO;
+           else
+               return 1;
+       } else if (state->state == INPOSINFO) {
+           if ( isdigit(*(state->prsbuf)) ) {
+               if ( state->alen==0 ) {
+                   state->alen=4;
+                   state->pos = (WordEntryPos*)palloc( sizeof(WordEntryPos)*state->alen );
+                   *(uint16*)(state->pos)=0;
+               } else if ( *(uint16*)(state->pos) +1 >= state->alen ) {
+                   state->alen *= 2; 
+                   state->pos = (WordEntryPos*)repalloc( state->pos, sizeof(WordEntryPos)*state->alen );
+               }
+               (  *(uint16*)(state->pos) )++;
+               state->pos[ *(uint16*)(state->pos) ].pos = LIMITPOS(atoi(state->prsbuf));
+               if ( state->pos[ *(uint16*)(state->pos) ].pos == 0 )
+                   elog(ERROR,"Wrong position info");
+               state->pos[ *(uint16*)(state->pos) ].weight = 0;
+               state->state = WAITPOSDELIM;
+           } else
+               elog(ERROR,"Syntax error");
+       } else if (state->state == WAITPOSDELIM) {
+           if ( *(state->prsbuf) == ',' ) {
+               state->state = INPOSINFO;
+           } else if ( tolower(*(state->prsbuf)) == 'a' || *(state->prsbuf)=='*' ) {
+               if ( state->pos[ *(uint16*)(state->pos) ].weight )
+                   elog(ERROR,"Syntax error");
+               state->pos[ *(uint16*)(state->pos) ].weight = 3;
+           } else if ( tolower(*(state->prsbuf)) == 'b' ) {
+               if ( state->pos[ *(uint16*)(state->pos) ].weight )
+                   elog(ERROR,"Syntax error");
+               state->pos[ *(uint16*)(state->pos) ].weight = 2;
+           } else if ( tolower(*(state->prsbuf)) == 'c' ) {
+               if ( state->pos[ *(uint16*)(state->pos) ].weight )
+                   elog(ERROR,"Syntax error");
+               state->pos[ *(uint16*)(state->pos) ].weight = 1;
+           } else if ( tolower(*(state->prsbuf)) == 'd' ) {
+               if ( state->pos[ *(uint16*)(state->pos) ].weight )
+                   elog(ERROR,"Syntax error");
+               state->pos[ *(uint16*)(state->pos) ].weight = 0;
+           } else if ( isspace(*(state->prsbuf)) || *(state->prsbuf) == '\0' ) {
+               return 1;
+           } else if ( !isdigit(*(state->prsbuf)) )
+               elog(ERROR,"Syntax error");
+       } else
+           elog(ERROR, "Inner bug :(");
+       state->prsbuf++;
+   }
+
+   return 0;
+}
+
+Datum
+tsvector_in(PG_FUNCTION_ARGS)
+{
+   char       *buf = PG_GETARG_CSTRING(0);
+   TI_IN_STATE state;
+   WordEntryIN  *arr;
+   WordEntry  *inarr;
+   int4        len = 0,
+               totallen = 64;
+   tsvector       *in;
+   char       *tmpbuf,
+              *cur;
+   int4        i,
+               buflen = 256;
+
+   state.prsbuf = buf;
+   state.len = 32;
+   state.word = (char *) palloc(state.len);
+   state.oprisdelim = false;
+
+   arr = (WordEntryIN *) palloc(sizeof(WordEntryIN) * totallen);
+   cur = tmpbuf = (char *) palloc(buflen);
+   while (gettoken_tsvector(&state))
+   {
+       if (len >= totallen)
+       {
+           totallen *= 2;
+           arr = (WordEntryIN *) repalloc((void *) arr, sizeof(WordEntryIN) * totallen);
+       }
+       while ((cur - tmpbuf) + (state.curpos - state.word) >= buflen)
+       {
+           int4        dist = cur - tmpbuf;
+
+           buflen *= 2;
+           tmpbuf = (char *) repalloc((void *) tmpbuf, buflen);
+           cur = tmpbuf + dist;
+       }
+       if (state.curpos - state.word >= MAXSTRLEN)
+           elog(ERROR, "Word is too long");
+       arr[len].entry.len= state.curpos - state.word;
+       if (cur - tmpbuf > MAXSTRPOS)
+           elog(ERROR, "Too long value");
+       arr[len].entry.pos=cur - tmpbuf;
+       memcpy((void *) cur, (void *) state.word, arr[len].entry.len);
+       cur += arr[len].entry.len;
+       if ( state.alen ) {
+           arr[len].entry.haspos=1;
+           arr[len].pos = state.pos;
+       } else
+           arr[len].entry.haspos=0;
+       len++;
+   }
+   pfree(state.word);
+
+   if ( len > 0 )
+       len = uniqueentry(arr, len, tmpbuf, &buflen);
+   totallen = CALCDATASIZE(len, buflen);
+   in = (tsvector *) palloc(totallen);
+   memset(in,0,totallen);
+   in->len = totallen;
+   in->size = len;
+   cur = STRPTR(in);
+   inarr = ARRPTR(in);
+   for (i = 0; i < len; i++)
+   {
+       memcpy((void *) cur, (void *) &tmpbuf[arr[i].entry.pos], arr[i].entry.len);
+       arr[i].entry.pos=cur - STRPTR(in);
+       cur += SHORTALIGN(arr[i].entry.len);
+       if ( arr[i].entry.haspos ) {
+           memcpy( cur, arr[i].pos, (*(uint16*)arr[i].pos + 1) * sizeof(WordEntryPos));
+           cur +=  (*(uint16*)arr[i].pos + 1) * sizeof(WordEntryPos);
+           pfree( arr[i].pos ); 
+       }
+       memcpy( &(inarr[i]), &(arr[i].entry), sizeof(WordEntry) );
+   }
+   pfree(tmpbuf);
+   pfree(arr);
+   PG_RETURN_POINTER(in);
+}
+
+Datum
+tsvector_length(PG_FUNCTION_ARGS)
+{
+   tsvector       *in = (tsvector *) PG_DETOAST_DATUM(PG_GETARG_DATUM(0));
+   int4        ret = in->size;
+
+   PG_FREE_IF_COPY(in, 0);
+   PG_RETURN_INT32(ret);
+}
+
+Datum
+tsvector_out(PG_FUNCTION_ARGS)
+{
+   tsvector       *out = (tsvector *) PG_DETOAST_DATUM(PG_GETARG_DATUM(0));
+   char       *outbuf;
+   int4        i,
+               j,
+               lenbuf = 0, pp;
+   WordEntry  *ptr = ARRPTR(out);
+   char       *curin,
+              *curout;
+
+       lenbuf=out->size * 2 /* '' */ + out->size - 1 /* space */ + 2 /*\0*/;
+       for (i = 0; i < out->size; i++) {
+               lenbuf += ptr[i].len*2 /*for escape */;
+               if ( ptr[i].haspos )
+                       lenbuf += 7*POSDATALEN(out, &(ptr[i]));
+       }
+
+   curout = outbuf = (char *) palloc(lenbuf);
+   for (i = 0; i < out->size; i++)
+   {
+       curin = STRPTR(out)+ptr->pos;
+       if (i != 0)
+           *curout++ = ' ';
+       *curout++ = '\'';
+       j = ptr->len;
+       while (j--)
+       {
+           if (*curin == '\'')
+           {
+               int4        pos = curout - outbuf;
+
+               outbuf = (char *) repalloc((void *) outbuf, ++lenbuf);
+               curout = outbuf + pos;
+               *curout++ = '\\';
+           }
+           *curout++ = *curin++;
+       }
+       *curout++ = '\'';
+       if ( (pp=POSDATALEN(out,ptr)) != 0 ) {
+           WordEntryPos *wptr;
+           *curout++ = ':';
+           wptr=POSDATAPTR(out,ptr);
+           while(pp) {
+               sprintf(curout,"%d",wptr->pos);
+               curout=strchr(curout,'\0');
+               switch( wptr->weight ) {
+                   case 3:   *curout++ = 'A'; break;
+                   case 2:   *curout++ = 'B'; break;
+                   case 1:   *curout++ = 'C'; break;
+                   case 0: 
+                   default: break;
+               }
+               if ( pp>1 )     *curout++ = ',';
+               pp--; wptr++;
+           }
+       }
+       ptr++;
+   }
+   *curout='\0';
+   outbuf[lenbuf - 1] = '\0';
+   PG_FREE_IF_COPY(out, 0);
+   PG_RETURN_POINTER(outbuf);
+}
+
+static int
+compareWORD(const void *a, const void *b)
+{
+   if (((WORD *) a)->len == ((WORD *) b)->len) {
+       int res = strncmp(
+                      ((WORD *) a)->word,
+                      ((WORD *) b)->word,
+                      ((WORD *) b)->len);
+       if ( res==0 ) 
+           return ( ((WORD *) a)->pos.pos > ((WORD *) b)->pos.pos ) ? 1 : -1;
+       return res;
+   }
+   return (((WORD *) a)->len > ((WORD *) b)->len) ? 1 : -1;
+}
+
+static int
+uniqueWORD(WORD * a, int4 l)
+{
+   WORD       *ptr,
+              *res;
+   int tmppos;
+
+   if (l == 1) {
+       tmppos=LIMITPOS(a->pos.pos);
+       a->alen=2;
+       a->pos.apos=(uint16*)palloc( sizeof(uint16)*a->alen );
+       a->pos.apos[0]=1;
+       a->pos.apos[1]=tmppos;
+       return l;
+   }
+
+   res = a;
+   ptr = a + 1;
+
+   qsort((void *) a, l, sizeof(WORD), compareWORD);
+   tmppos=LIMITPOS(a->pos.pos);
+   a->alen=2;
+   a->pos.apos=(uint16*)palloc( sizeof(uint16)*a->alen );
+   a->pos.apos[0]=1;
+   a->pos.apos[1]=tmppos;
+
+   while (ptr - a < l)
+   {
+       if (!(ptr->len == res->len &&
+             strncmp(ptr->word, res->word, res->len) == 0))
+       {
+           res++;
+           res->len = ptr->len;
+           res->word = ptr->word;
+           tmppos=LIMITPOS(ptr->pos.pos);
+           res->alen=2;
+           res->pos.apos=(uint16*)palloc( sizeof(uint16)*res->alen );
+           res->pos.apos[0]=1;
+           res->pos.apos[1]=tmppos;
+       } else {
+           pfree(ptr->word);
+           if ( res->pos.apos[0] < MAXNUMPOS-1 && res->pos.apos[ res->pos.apos[0] ] != MAXENTRYPOS-1 ) {
+               if ( res->pos.apos[0]+1 >= res->alen ) {
+                   res->alen*=2;
+                   res->pos.apos=(uint16*)repalloc( res->pos.apos, sizeof(uint16)*res->alen );
+               }
+               res->pos.apos[ res->pos.apos[0]+1 ] = LIMITPOS(ptr->pos.pos);
+               res->pos.apos[0]++; 
+           }
+       }
+       ptr++;
+   }
+
+   return res + 1 - a;
+}
+
+/*
+ * make value of tsvector
+ */
+static tsvector *
+makevalue(PRSTEXT * prs)
+{
+   int4        i,j,
+               lenstr = 0,
+               totallen;
+   tsvector       *in;
+   WordEntry  *ptr;
+   char       *str,
+              *cur;
+
+   prs->curwords = uniqueWORD(prs->words, prs->curwords);
+   for (i = 0; i < prs->curwords; i++) {
+       lenstr += SHORTALIGN(prs->words[i].len);
+
+       if ( prs->words[i].alen )
+           lenstr += sizeof(uint16) + prs->words[i].pos.apos[0] * sizeof(WordEntryPos);
+   }
+
+   totallen = CALCDATASIZE(prs->curwords, lenstr);
+   in = (tsvector *) palloc(totallen);
+   memset(in,0,totallen);  
+   in->len = totallen;
+   in->size = prs->curwords;
+
+   ptr = ARRPTR(in);
+   cur = str = STRPTR(in);
+   for (i = 0; i < prs->curwords; i++)
+   {
+       ptr->len = prs->words[i].len;
+       if (cur - str > MAXSTRPOS)
+           elog(ERROR, "Value is too big");
+       ptr->pos= cur - str;
+       memcpy((void *) cur, (void *) prs->words[i].word, prs->words[i].len);
+       pfree(prs->words[i].word);
+       cur += SHORTALIGN(prs->words[i].len);
+       if ( prs->words[i].alen ) {
+           WordEntryPos *wptr;
+           
+           ptr->haspos=1;
+           *(uint16*)cur = prs->words[i].pos.apos[0];
+           wptr=POSDATAPTR(in,ptr);
+           for(j=0;j<*(uint16*)cur;j++) {
+               wptr[j].weight=0;
+               wptr[j].pos=prs->words[i].pos.apos[j+1];
+           }
+           cur += sizeof(uint16) + prs->words[i].pos.apos[0] * sizeof(WordEntryPos);
+           pfree(prs->words[i].pos.apos);
+       } else
+           ptr->haspos=0;
+       ptr++;
+   }
+   pfree(prs->words);
+   return in;
+}
+
+
+Datum
+to_tsvector(PG_FUNCTION_ARGS)
+{
+   text       *in = PG_GETARG_TEXT_P(1);
+   PRSTEXT     prs;
+   tsvector       *out = NULL;
+   TSCfgInfo *cfg=findcfg(PG_GETARG_INT32(0)); 
+
+   prs.lenwords = 32;
+   prs.curwords = 0;
+   prs.pos = 0;
+   prs.words = (WORD *) palloc(sizeof(WORD) * prs.lenwords);
+   
+   parsetext_v2(cfg, &prs, VARDATA(in), VARSIZE(in) - VARHDRSZ);
+   PG_FREE_IF_COPY(in, 1);
+
+   if (prs.curwords)
+       out = makevalue(&prs);
+   else {
+       pfree(prs.words);
+       out = palloc(CALCDATASIZE(0,0));
+       out->len = CALCDATASIZE(0,0);
+       out->size = 0;
+   } 
+   PG_RETURN_POINTER(out);
+}
+
+Datum
+to_tsvector_name(PG_FUNCTION_ARGS) {
+   text       *cfg=PG_GETARG_TEXT_P(0);
+   Datum res = DirectFunctionCall3(
+       to_tsvector,
+       Int32GetDatum( name2id_cfg( cfg ) ),
+       PG_GETARG_DATUM(1),
+       (Datum)0
+   );
+   PG_FREE_IF_COPY(cfg,0);
+   PG_RETURN_DATUM(res);   
+}
+
+Datum
+to_tsvector_current(PG_FUNCTION_ARGS) {
+   Datum res = DirectFunctionCall3(
+       to_tsvector,
+       Int32GetDatum( get_currcfg() ),
+       PG_GETARG_DATUM(0),
+       (Datum)0
+   );
+   PG_RETURN_DATUM(res);   
+}
+
+static Oid
+findFunc(char *fname) {
+   FuncCandidateList clist,ptr;
+   Oid funcid = InvalidOid;
+   List *names=makeList1(makeString(fname));
+
+   ptr = clist = FuncnameGetCandidates(names, 1);
+   freeList(names);
+
+   if ( !ptr )
+       return funcid;
+
+   while(ptr) {
+       if ( ptr->args[0] == TEXTOID && funcid == InvalidOid )
+           funcid=ptr->oid;
+       clist=ptr->next;
+       pfree(ptr);
+       ptr=clist;
+   }
+
+   return funcid;
+}
+
+/*
+ * Trigger
+ */
+Datum
+tsearch2(PG_FUNCTION_ARGS)
+{
+   TriggerData *trigdata;
+   Trigger    *trigger;
+   Relation    rel;
+   HeapTuple   rettuple = NULL;
+   TSCfgInfo *cfg=findcfg(get_currcfg()); 
+   int         numidxattr,
+               i;
+   PRSTEXT     prs;
+   Datum       datum = (Datum) 0;
+   Oid     funcoid = InvalidOid;
+
+   if (!CALLED_AS_TRIGGER(fcinfo))
+       elog(ERROR, "TSearch: Not fired by trigger manager");
+
+   trigdata = (TriggerData *) fcinfo->context;
+   if (TRIGGER_FIRED_FOR_STATEMENT(trigdata->tg_event))
+       elog(ERROR, "TSearch: Can't process STATEMENT events");
+   if (TRIGGER_FIRED_AFTER(trigdata->tg_event))
+       elog(ERROR, "TSearch: Must be fired BEFORE event");
+
+   if (TRIGGER_FIRED_BY_INSERT(trigdata->tg_event))
+       rettuple = trigdata->tg_trigtuple;
+   else if (TRIGGER_FIRED_BY_UPDATE(trigdata->tg_event))
+       rettuple = trigdata->tg_newtuple;
+   else
+       elog(ERROR, "TSearch: Unknown event");
+
+   trigger = trigdata->tg_trigger;
+   rel = trigdata->tg_relation;
+
+   if (trigger->tgnargs < 2)
+       elog(ERROR, "TSearch: format tsearch2(tsvector_field, text_field1,...)");
+
+   numidxattr = SPI_fnumber(rel->rd_att, trigger->tgargs[0]);
+   if (numidxattr == SPI_ERROR_NOATTRIBUTE)
+       elog(ERROR, "TSearch: Can not find tsvector_field");
+
+   prs.lenwords = 32;
+   prs.curwords = 0;
+   prs.pos = 0;
+   prs.words = (WORD *) palloc(sizeof(WORD) * prs.lenwords);
+
+   /* find all words in indexable column */
+   for (i = 1; i < trigger->tgnargs; i++)
+   {
+       int         numattr;
+       Oid         oidtype;
+       Datum       txt_toasted;
+       bool        isnull;
+       text       *txt;
+
+       numattr = SPI_fnumber(rel->rd_att, trigger->tgargs[i]);
+       if (numattr == SPI_ERROR_NOATTRIBUTE)
+       {
+           funcoid=findFunc(trigger->tgargs[i]);
+           if ( funcoid==InvalidOid )
+               elog(ERROR,"TSearch: can't find function or field '%s'",trigger->tgargs[i]);
+           continue;
+       }
+       oidtype = SPI_gettypeid(rel->rd_att, numattr);
+       /* We assume char() and varchar() are binary-equivalent to text */
+       if (!(oidtype == TEXTOID ||
+             oidtype == VARCHAROID ||
+             oidtype == BPCHAROID))
+       {
+           elog(WARNING, "TSearch: '%s' is not of character type",
+                trigger->tgargs[i]);
+           continue;
+       }
+       txt_toasted = SPI_getbinval(rettuple, rel->rd_att, numattr, &isnull);
+       if (isnull)
+           continue;
+
+       if ( funcoid!=InvalidOid ) {
+           text *txttmp = (text *) DatumGetPointer( OidFunctionCall1(
+               funcoid,
+               PointerGetDatum(txt_toasted)
+           ));
+           txt = (text *) DatumGetPointer(PG_DETOAST_DATUM(PointerGetDatum(txttmp)));
+           if ( txt == txttmp )
+               txt_toasted = PointerGetDatum(txt);
+       } else
+            txt = (text *) DatumGetPointer(PG_DETOAST_DATUM(PointerGetDatum(txt_toasted)));
+
+       parsetext_v2(cfg, &prs, VARDATA(txt), VARSIZE(txt) - VARHDRSZ);
+       if (txt != (text*)DatumGetPointer(txt_toasted) )
+           pfree(txt);
+   }
+
+   /* make tsvector value */
+   if (prs.curwords)
+   {
+       datum = PointerGetDatum(makevalue(&prs));
+       rettuple = SPI_modifytuple(rel, rettuple, 1, &numidxattr,
+                                  &datum, NULL);
+       pfree(DatumGetPointer(datum));
+   }
+   else
+   {
+       tsvector *out = palloc(CALCDATASIZE(0,0));
+       out->len = CALCDATASIZE(0,0);
+       out->size = 0;
+       datum = PointerGetDatum(out);
+       pfree(prs.words);
+       rettuple = SPI_modifytuple(rel, rettuple, 1, &numidxattr,
+                                  &datum, NULL);
+   }
+
+   if (rettuple == NULL)
+       elog(ERROR, "TSearch: %d returned by SPI_modifytuple", SPI_result);
+
+   return PointerGetDatum(rettuple);
+}


diff --git a/contrib/tsearch2/tsvector.h b/contrib/tsearch2/tsvector.h

new file mode 100644 (file)

index 0000000..31e6a4b


--- /dev/null
+++ b/contrib/tsearch2/tsvector.h
@@ -0,0 +1,71 @@
+#ifndef __TXTIDX_H__
+#define __TXTIDX_H__
+
+/*
+#define TXTIDX_DEBUG
+*/
+
+#include "postgres.h"
+
+#include "access/gist.h"
+#include "access/itup.h"
+#include "utils/elog.h"
+#include "utils/palloc.h"
+#include "utils/builtins.h"
+#include "storage/bufpage.h"
+
+typedef struct {
+   uint32
+       haspos:1,
+       len:11, /* MAX 2Kb */
+       pos:20; /* MAX 1Mb */
+}  WordEntry;
+#define MAXSTRLEN ( 1<<11 )
+#define MAXSTRPOS ( 1<<20 )
+
+typedef struct {
+   uint16
+       weight:2,
+       pos:14;
+} WordEntryPos;
+#define MAXENTRYPOS    (1<<14)
+#define MAXNUMPOS  256
+#define LIMITPOS(x)    ( ( (x) >= MAXENTRYPOS ) ? (MAXENTRYPOS-1) : (x) )
+
+typedef struct
+{
+   int4        len;
+   int4        size;
+   char        data[1];
+}  tsvector;
+
+#define DATAHDRSIZE (sizeof(int4)*2)
+#define CALCDATASIZE(x, lenstr) ( x * sizeof(WordEntry) + DATAHDRSIZE + lenstr )
+#define ARRPTR(x)  ( (WordEntry*) ( (char*)x + DATAHDRSIZE ) )
+#define STRPTR(x)  ( (char*)x + DATAHDRSIZE + ( sizeof(WordEntry) * ((tsvector*)x)->size ) )
+#define STRSIZE(x) ( ((tsvector*)x)->len - DATAHDRSIZE - ( sizeof(WordEntry) * ((tsvector*)x)->size ) )
+#define _POSDATAPTR(x,e)   (STRPTR(x)+((WordEntry*)(e))->pos+SHORTALIGN(((WordEntry*)(e))->len))
+#define POSDATALEN(x,e) ( ( ((WordEntry*)(e))->haspos ) ? (*(uint16*)_POSDATAPTR(x,e)) : 0 ) 
+#define POSDATAPTR(x,e)    ( (WordEntryPos*)( _POSDATAPTR(x,e)+sizeof(uint16) ) )
+
+
+typedef struct {
+   WordEntry   entry;
+   WordEntryPos    *pos;
+}  WordEntryIN;
+
+typedef struct
+{
+   char       *prsbuf;
+   char       *word;
+   char       *curpos;
+   int4        len;
+   int4        state;
+   int4        alen;
+   WordEntryPos    *pos;
+   bool        oprisdelim;
+}  TI_IN_STATE;
+
+int4       gettoken_tsvector(TI_IN_STATE * state);
+
+#endif


diff --git a/contrib/tsearch2/tsvector_op.c b/contrib/tsearch2/tsvector_op.c

new file mode 100644 (file)

index 0000000..3f38014


--- /dev/null
+++ b/contrib/tsearch2/tsvector_op.c
@@ -0,0 +1,264 @@
+/*
+ * Operations for tsvector type
+ * Teodor Sigaev 
+ */
+#include "postgres.h"
+
+#include "access/gist.h"
+#include "access/itup.h"
+#include "utils/elog.h"
+#include "utils/palloc.h"
+#include "utils/builtins.h"
+#include "storage/bufpage.h"
+#include "executor/spi.h"
+#include "commands/trigger.h"
+#include "nodes/pg_list.h"
+#include "catalog/namespace.h"
+
+#include "utils/pg_locale.h"
+
+#include              /* tolower */
+#include "tsvector.h"
+#include "query.h"
+#include "ts_cfg.h"
+#include "common.h"
+
+PG_FUNCTION_INFO_V1(strip);
+Datum      strip(PG_FUNCTION_ARGS);
+
+PG_FUNCTION_INFO_V1(setweight);
+Datum      setweight(PG_FUNCTION_ARGS);
+
+PG_FUNCTION_INFO_V1(concat);
+Datum      concat(PG_FUNCTION_ARGS);
+
+Datum
+strip(PG_FUNCTION_ARGS)
+{
+   tsvector       *in = (tsvector *) PG_DETOAST_DATUM(PG_GETARG_DATUM(0));
+   tsvector    *out;
+   int i,len=0;
+   WordEntry *arrin=ARRPTR(in), *arrout;
+   char *cur;
+
+   for(i=0;isize;i++) 
+       len += SHORTALIGN( arrin[i].len );
+
+   len = CALCDATASIZE(in->size, len);
+   out=(tsvector*)palloc(len);
+   memset(out,0,len);
+   out->len=len;
+   out->size=in->size;
+   arrout=ARRPTR(out);
+   cur=STRPTR(out);
+   for(i=0;isize;i++) {
+       memcpy(cur, STRPTR(in)+arrin[i].pos, arrin[i].len);
+       arrout[i].haspos = 0;
+       arrout[i].len = arrin[i].len;
+       arrout[i].pos = cur - STRPTR(out);
+       cur += SHORTALIGN( arrout[i].len );
+   }
+       
+   PG_FREE_IF_COPY(in, 0);
+   PG_RETURN_POINTER(out);
+}
+
+Datum
+setweight(PG_FUNCTION_ARGS)
+{
+   tsvector       *in = (tsvector *) PG_DETOAST_DATUM(PG_GETARG_DATUM(0));
+   char       cw = PG_GETARG_CHAR(1);
+   tsvector    *out;
+   int i,j;
+   WordEntry *entry;
+   WordEntryPos *p;
+   int w=0;
+
+   switch(tolower(cw)) {
+       case 'a': w=3; break;
+       case 'b': w=2; break;
+       case 'c': w=1; break;
+       case 'd': w=0; break;
+       default: elog(ERROR,"Unknown weight");
+   }
+
+   out=(tsvector*)palloc(in->len);
+   memcpy(out,in,in->len);
+   entry=ARRPTR(out);
+   i=out->size;    
+   while(i--) {
+       if ( (j=POSDATALEN(out,entry)) != 0 ) {
+           p=POSDATAPTR(out,entry);
+           while(j--) {
+               p->weight=w;
+               p++;
+           }
+       }
+       entry++;
+   }
+       
+   PG_FREE_IF_COPY(in, 0);
+   PG_RETURN_POINTER(out);
+}
+
+static int
+compareEntry(char *ptra, WordEntry* a, char *ptrb, WordEntry* b)
+{
+        if ( a->len == b->len)
+        {
+                return strncmp(
+                                           ptra + a->pos,
+                                           ptrb + b->pos,
+                                           a->len);
+        }
+        return ( a->len > b->len ) ? 1 : -1;
+}
+
+static int4
+add_pos(tsvector *src, WordEntry *srcptr, tsvector *dest, WordEntry *destptr, int4 maxpos ) {
+   uint16 *clen = (uint16*)_POSDATAPTR(dest,destptr);
+   int i;
+   uint16 slen = POSDATALEN(src, srcptr), startlen;
+   WordEntryPos *spos=POSDATAPTR(src, srcptr), *dpos=POSDATAPTR(dest,destptr);
+
+   if ( ! destptr->haspos ) 
+       *clen=0;
+
+   startlen = *clen;
+   for(i=0; i
+       dpos[ *clen ].weight = spos[i].weight; 
+       dpos[ *clen ].pos    = LIMITPOS(spos[i].pos + maxpos);
+       (*clen)++;
+   }
+
+   if ( *clen != startlen )
+       destptr->haspos=1; 
+   return  *clen - startlen;
+}
+
+
+Datum
+concat(PG_FUNCTION_ARGS) {
+   tsvector       *in1 = (tsvector *) PG_DETOAST_DATUM(PG_GETARG_DATUM(0));
+   tsvector       *in2 = (tsvector *) PG_DETOAST_DATUM(PG_GETARG_DATUM(1));
+   tsvector       *out;
+   WordEntry *ptr;
+   WordEntry *ptr1,*ptr2;
+   WordEntryPos *p;
+   int maxpos=0,i,j,i1,i2;
+   char *cur;
+   char *data,*data1,*data2;
+
+   ptr=ARRPTR(in1);
+   i=in1->size;
+   while(i--) {
+       if ( (j=POSDATALEN(in1,ptr)) != 0 ) {
+           p=POSDATAPTR(in1,ptr);
+           while(j--) {
+               if ( p->pos > maxpos ) 
+                   maxpos = p->pos;
+               p++;
+           }
+       }
+       ptr++;
+   }
+   
+   ptr1=ARRPTR(in1); ptr2=ARRPTR(in2);
+   data1=STRPTR(in1); data2=STRPTR(in2);
+   i1=in1->size;   i2=in2->size;
+   out=(tsvector*)palloc( in1->len + in2->len );
+   memset(out,0,in1->len + in2->len);
+   out->len = in1->len + in2->len;
+   out->size = in1->size + in2->size;
+   data=cur=STRPTR(out);
+   ptr=ARRPTR(out);
+   while( i1 && i2 ) {
+       int cmp=compareEntry(data1,ptr1,data2,ptr2);
+       if ( cmp < 0 ) { /* in1 first */
+           ptr->haspos = ptr1->haspos;
+           ptr->len = ptr1->len;
+           memcpy( cur, data1 + ptr1->pos, ptr1->len );
+           ptr->pos = cur - data; 
+           cur+=SHORTALIGN(ptr1->len);
+           if ( ptr->haspos ) {
+               memcpy(cur, _POSDATAPTR(in1, ptr1), POSDATALEN(in1, ptr1)*sizeof(WordEntryPos) + sizeof(uint16));
+               cur+=POSDATALEN(in1, ptr1)*sizeof(WordEntryPos) + sizeof(uint16);
+           }
+           ptr++; ptr1++; i1--;
+       } else if ( cmp>0 ) { /* in2 first */ 
+           ptr->haspos = ptr2->haspos;
+           ptr->len = ptr2->len;
+           memcpy( cur, data2 + ptr2->pos, ptr2->len );
+           ptr->pos = cur - data; 
+           cur+=SHORTALIGN(ptr2->len);
+           if ( ptr->haspos ) {
+               int addlen = add_pos(in2, ptr2, out, ptr, maxpos );
+               if ( addlen == 0 )
+                   ptr->haspos=0;
+               else
+                   cur += addlen*sizeof(WordEntryPos) + sizeof(uint16); 
+           }
+           ptr++; ptr2++; i2--;
+       } else {
+           ptr->haspos = ptr1->haspos | ptr2->haspos;
+           ptr->len = ptr1->len;
+           memcpy( cur, data1 + ptr1->pos, ptr1->len );
+           ptr->pos = cur - data; 
+           cur+=SHORTALIGN(ptr1->len);
+           if ( ptr->haspos ) {
+               if ( ptr1->haspos ) {
+                   memcpy(cur, _POSDATAPTR(in1, ptr1), POSDATALEN(in1, ptr1)*sizeof(WordEntryPos) + sizeof(uint16));
+                   cur+=POSDATALEN(in1, ptr1)*sizeof(WordEntryPos) + sizeof(uint16);
+                   if ( ptr2->haspos )
+                       cur += add_pos(in2, ptr2, out, ptr, maxpos )*sizeof(WordEntryPos);
+               } else if ( ptr2->haspos ) {
+                   int addlen = add_pos(in2, ptr2, out, ptr, maxpos );
+                   if ( addlen == 0 )
+                       ptr->haspos=0;
+                   else
+                       cur += addlen*sizeof(WordEntryPos) + sizeof(uint16); 
+               }
+           }
+           ptr++; ptr1++; ptr2++; i1--; i2--;
+       }
+   }
+
+   while(i1) {
+       ptr->haspos = ptr1->haspos;
+       ptr->len = ptr1->len;
+       memcpy( cur, data1 + ptr1->pos, ptr1->len );
+       ptr->pos = cur - data; 
+       cur+=SHORTALIGN(ptr1->len);
+       if ( ptr->haspos ) {
+           memcpy(cur, _POSDATAPTR(in1, ptr1), POSDATALEN(in1, ptr1)*sizeof(WordEntryPos) + sizeof(uint16));
+           cur+=POSDATALEN(in1, ptr1)*sizeof(WordEntryPos) + sizeof(uint16);
+       }
+       ptr++; ptr1++; i1--;
+   }
+
+   while(i2) {
+       ptr->haspos = ptr2->haspos;
+       ptr->len = ptr2->len;
+       memcpy( cur, data2 + ptr2->pos, ptr2->len );
+       ptr->pos = cur - data; 
+       cur+=SHORTALIGN(ptr2->len);
+       if ( ptr->haspos ) {
+           int addlen = add_pos(in2, ptr2, out, ptr, maxpos );
+           if ( addlen == 0 )
+               ptr->haspos=0;
+           else
+               cur += addlen*sizeof(WordEntryPos) + sizeof(uint16); 
+       }
+       ptr++; ptr2++; i2--;
+   }
+   
+   out->size=ptr-ARRPTR(out);
+   out->len = CALCDATASIZE( out->size, cur-data );
+   if ( data != STRPTR(out) )
+       memmove( STRPTR(out), data, cur-data );
+
+   PG_FREE_IF_COPY(in1, 0);
+   PG_FREE_IF_COPY(in2, 1);
+   PG_RETURN_POINTER(out);
+}
+


diff --git a/contrib/tsearch2/untsearch.sql.in b/contrib/tsearch2/untsearch.sql.in

new file mode 100644 (file)

index 0000000..a4fe145


--- /dev/null
+++ b/contrib/tsearch2/untsearch.sql.in
@@ -0,0 +1,62 @@
+BEGIN;
+
+--Be careful !!!
+--script drops all indices, triggers and columns with types defined
+--in tsearch2.sql
+
+
+DROP OPERATOR CLASS gist_tsvector_ops USING gist CASCADE;
+
+
+DROP OPERATOR || (tsvector, tsvector);
+DROP OPERATOR @@ (tsvector, tsquery);
+DROP OPERATOR @@ (tsquery, tsvector);
+
+DROP AGGREGATE stat(tsvector);
+
+DROP TABLE pg_ts_dict;
+DROP TABLE pg_ts_parser;
+DROP TABLE pg_ts_cfg;
+DROP TABLE pg_ts_cfgmap;
+
+DROP TYPE tokentype CASCADE;
+DROP TYPE tokenout CASCADE;
+DROP TYPE tsvector CASCADE;
+DROP TYPE tsquery CASCADE;
+DROP TYPE gtsvector CASCADE;
+DROP TYPE tsstat CASCADE;
+DROP TYPE statinfo CASCADE;
+
+DROP FUNCTION lexize(oid, text) ;
+DROP FUNCTION lexize(text, text);
+DROP FUNCTION lexize(text);
+DROP FUNCTION set_curdict(int);
+DROP FUNCTION set_curdict(text);
+DROP FUNCTION dex_init(text);
+DROP FUNCTION dex_lexize(internal,internal,int4);
+DROP FUNCTION snb_en_init(text);
+DROP FUNCTION snb_lexize(internal,internal,int4);
+DROP FUNCTION snb_ru_init(text);
+DROP FUNCTION spell_init(text);
+DROP FUNCTION spell_lexize(internal,internal,int4);
+DROP FUNCTION syn_init(text);
+DROP FUNCTION syn_lexize(internal,internal,int4);
+DROP FUNCTION set_curprs(int);
+DROP FUNCTION set_curprs(text);
+DROP FUNCTION prsd_start(internal,int4);
+DROP FUNCTION prsd_getlexeme(internal,internal,internal);
+DROP FUNCTION prsd_end(internal);
+DROP FUNCTION prsd_lextype(internal);
+DROP FUNCTION prsd_headline(internal,internal,internal);
+DROP FUNCTION set_curcfg(int);
+DROP FUNCTION set_curcfg(text);
+DROP FUNCTION show_curcfg();
+DROP FUNCTION gtsvector_compress(internal);
+DROP FUNCTION gtsvector_decompress(internal);
+DROP FUNCTION gtsvector_penalty(internal,internal,internal);
+DROP FUNCTION gtsvector_picksplit(internal, internal);
+DROP FUNCTION gtsvector_union(bytea, internal);
+DROP FUNCTION reset_tsearch();
+DROP FUNCTION tsearch2() CASCADE;
+
+END;


diff --git a/contrib/tsearch2/wordparser/deflex.c b/contrib/tsearch2/wordparser/deflex.c

new file mode 100644 (file)

index 0000000..ea596c5


--- /dev/null
+++ b/contrib/tsearch2/wordparser/deflex.c
@@ -0,0 +1,56 @@
+#include "deflex.h"
+
+const char *lex_descr[]={
+   "",
+   "Latin word",
+   "Non-latin word",
+   "Word",
+   "Email",
+   "URL",
+   "Host",
+   "Scientific notation",
+   "VERSION",
+   "Part of hyphenated word",
+   "Non-latin part of hyphenated word",
+   "Latin part of hyphenated word",
+   "Space symbols",
+   "HTML Tag",
+   "HTTP head",
+   "Hyphenated word",
+   "Latin hyphenated word",
+   "Non-latin hyphenated word",
+   "URI",
+   "File or path name",
+   "Decimal notation",
+   "Signed integer",
+   "Unsigned integer",
+   "HTML Entity"
+};
+
+const char *tok_alias[]={
+   "",
+   "lword",
+   "nlword",
+   "word",
+   "email",
+   "url",
+   "host",
+   "sfloat",
+   "version",
+   "part_hword",
+   "nlpart_hword",
+   "lpart_hword",
+   "blank",
+   "tag",
+   "http",
+   "hword",
+   "lhword",
+   "nlhword",
+   "uri",
+   "file",
+   "float",
+   "int",
+   "uint",
+   "entity"
+};
+


diff --git a/contrib/tsearch2/wordparser/deflex.h b/contrib/tsearch2/wordparser/deflex.h

new file mode 100644 (file)

index 0000000..651d1f9


--- /dev/null
+++ b/contrib/tsearch2/wordparser/deflex.h
@@ -0,0 +1,34 @@
+#ifndef __DEFLEX_H__
+#define __DEFLEX_H__
+
+/* rememder !!!! */
+#define LASTNUM        23
+
+#define LATWORD        1
+#define CYRWORD        2
+#define UWORD      3
+#define EMAIL      4
+#define FURL       5
+#define HOST       6
+#define SCIENTIFIC 7
+#define VERSIONNUMBER  8
+#define PARTHYPHENWORD 9
+#define CYRPARTHYPHENWORD  10
+#define LATPARTHYPHENWORD  11
+#define SPACE      12
+#define TAG            13
+#define HTTP       14
+#define HYPHENWORD 15
+#define LATHYPHENWORD  16
+#define CYRHYPHENWORD  17
+#define URI        18
+#define FILEPATH   19
+#define DECIMAL        20
+#define SIGNEDINT  21
+#define UNSIGNEDINT 22
+#define HTMLENTITY 23
+
+extern const char *lex_descr[];
+extern const char *tok_alias[];
+
+#endif


diff --git a/contrib/tsearch2/wordparser/parser.h b/contrib/tsearch2/wordparser/parser.h

new file mode 100644 (file)

index 0000000..55cf005


--- /dev/null
+++ b/contrib/tsearch2/wordparser/parser.h
@@ -0,0 +1,11 @@
+#ifndef __PARSER_H__
+#define __PARSER_H__
+
+char      *token;
+int            tokenlen;
+int            tsearch2_yylex(void);
+void       start_parse_str(char *, int);
+void       start_parse_fh(FILE *, int);
+void       end_parse(void);
+
+#endif


diff --git a/contrib/tsearch2/wordparser/parser.l b/contrib/tsearch2/wordparser/parser.l

new file mode 100644 (file)

index 0000000..49824f5


--- /dev/null
+++ b/contrib/tsearch2/wordparser/parser.l
@@ -0,0 +1,346 @@
+%{
+#include "postgres.h"
+
+#include "deflex.h"
+#include "parser.h"
+#include "common.h"
+
+/* Avoid exit() on fatal scanner errors */
+#define fprintf(file, fmt, msg)  ts_error(ERROR, fmt, msg)
+
+/* postgres allocation function */
+#define free    pfree
+#define malloc  palloc
+#define realloc repalloc
+
+#ifdef strdup
+#undef strdup
+#endif
+#define strdup  pstrdup
+
+char *token = NULL;  /* pointer to token */
+char *s     = NULL;  /* to return WHOLE hyphenated-word */
+
+YY_BUFFER_STATE buf = NULL; /* buffer to parse; it need for parse from string */
+
+int lrlimit = -1;  /* for limiting read from filehandle ( -1 - unlimited read ) */
+int bytestoread = 0;   /* for limiting read from filehandle */
+
+/* redefine macro for read limited length */
+#define YY_INPUT(buf,result,max_size) \
+   if ( yy_current_buffer->yy_is_interactive ) { \
+                int c = '*', n; \
+                for ( n = 0; n < max_size && \
+                             (c = getc( tsearch2_yyin )) != EOF && c != '\n'; ++n ) \
+                        buf[n] = (char) c; \
+                if ( c == '\n' ) \
+                        buf[n++] = (char) c; \
+                if ( c == EOF && ferror( tsearch2_yyin ) ) \
+                        YY_FATAL_ERROR( "input in flex scanner failed" ); \
+                result = n; \
+        }  else { \
+       if ( lrlimit == 0 ) \
+           result=YY_NULL; \
+       else { \
+           if ( lrlimit>0 ) { \
+               bytestoread = ( lrlimit > max_size ) ? max_size : lrlimit; \
+               lrlimit -= bytestoread; \
+           } else \
+               bytestoread = max_size; \
+               if ( ((result = fread( buf, 1, bytestoread, tsearch2_yyin )) == 0) \
+                       && ferror( tsearch2_yyin ) ) \
+                       YY_FATAL_ERROR( "input in flex scanner failed" ); \
+       } \
+   }
+
+%}
+
+%option 8bit
+%option never-interactive
+%option nounput
+%option noyywrap
+
+/* parser's state for parsing hyphenated-word */
+%x DELIM  
+/* parser's state for parsing URL*/
+%x URL  
+%x SERVER  
+
+/* parser's state for parsing TAGS */
+%x INTAG
+%x QINTAG
+%x INCOMMENT
+%x INSCRIPT
+
+/* cyrillic koi8 char */
+CYRALNUM   [0-9\200-\377]
+CYRALPHA   [\200-\377]
+ALPHA      [a-zA-Z\200-\377]
+ALNUM      [0-9a-zA-Z\200-\377]
+
+
+HOSTNAME   ([-_[:alnum:]]+\.)+[[:alpha:]]+
+URI        [-_[:alnum:]/%,\.;=&?#]+
+
+%%
+
+"<"[Ss][Cc][Rr][Ii][Pp][Tt] { BEGIN INSCRIPT; }
+
+"" {
+   BEGIN INITIAL; 
+   *tsearch2_yytext=' '; *(tsearch2_yytext+1) = '\0'; 
+   token = tsearch2_yytext;
+   tokenlen = tsearch2_yyleng;
+   return SPACE;
+}
+
+""   { 
+   BEGIN INITIAL;
+   *tsearch2_yytext=' '; *(tsearch2_yytext+1) = '\0'; 
+   token = tsearch2_yytext;
+   tokenlen = tsearch2_yyleng;
+   return SPACE;
+}
+
+
+"<"[\![:alpha:]]   { BEGIN INTAG; }
+
+"
+
+"\""    { BEGIN QINTAG; }
+
+"\\\"" ;
+
+"\""   { BEGIN INTAG; }
+
+">" { 
+   BEGIN INITIAL;
+   token = tsearch2_yytext;
+   *tsearch2_yytext=' '; 
+   token = tsearch2_yytext;
+   tokenlen = 1;
+   return TAG;
+}
+
+.|\n  ;
+
+\&(quot|amp|nbsp|lt|gt)\;   {
+   token = tsearch2_yytext;
+   tokenlen = tsearch2_yyleng;
+   return HTMLENTITY;
+}
+
+\&\#[0-9][0-9]?[0-9]?\; {
+   token = tsearch2_yytext;
+   tokenlen = tsearch2_yyleng;
+   return HTMLENTITY;
+}
+ 
+[-_\.[:alnum:]]+@{HOSTNAME}  /* Emails */ { 
+   token = tsearch2_yytext; 
+   tokenlen = tsearch2_yyleng;
+   return EMAIL; 
+}
+
+[+-]?[0-9]+(\.[0-9]+)?[eEdD][+-]?[0-9]+  /* float */   { 
+   token = tsearch2_yytext; 
+   tokenlen = tsearch2_yyleng;
+   return SCIENTIFIC; 
+}
+
+[0-9]+\.[0-9]+\.[0-9\.]*[0-9] {
+   token = tsearch2_yytext;
+   tokenlen = tsearch2_yyleng;
+   return VERSIONNUMBER;
+}
+
+[+-]?[0-9]+\.[0-9]+ {
+   token = tsearch2_yytext;
+   tokenlen = tsearch2_yyleng;
+   return DECIMAL;
+}
+
+[+-][0-9]+ { 
+   token = tsearch2_yytext; 
+   tokenlen = tsearch2_yyleng;
+   return SIGNEDINT; 
+}
+
+[0-9]+ { 
+   token = tsearch2_yytext; 
+   tokenlen = tsearch2_yyleng;
+   return UNSIGNEDINT; 
+}
+
+http"://"        { 
+   BEGIN URL; 
+   token = tsearch2_yytext;
+   tokenlen = tsearch2_yyleng;
+   return HTTP;
+}
+
+ftp"://"        { 
+   BEGIN URL; 
+   token = tsearch2_yytext;
+   tokenlen = tsearch2_yyleng;
+   return HTTP;
+}
+
+{HOSTNAME}[/:]{URI} { 
+   BEGIN SERVER;
+   if (s) { free(s); s=NULL; } 
+   s = strdup( tsearch2_yytext ); 
+   tokenlen = tsearch2_yyleng;
+   yyless( 0 ); 
+   token = s;
+   return FURL;
+}
+
+{HOSTNAME} {
+   token = tsearch2_yytext; 
+   tokenlen = tsearch2_yyleng;
+   return HOST;
+}
+
+[/:]{URI}  {
+   token = tsearch2_yytext;
+   tokenlen = tsearch2_yyleng;
+   return URI;
+}
+
+[[:alnum:]\./_-]+"/"[[:alnum:]\./_-]+ {
+   token = tsearch2_yytext;
+   tokenlen = tsearch2_yyleng;
+   return FILEPATH;
+}
+
+({CYRALPHA}+-)+{CYRALPHA}+ /* composite-word */    {
+   BEGIN DELIM;
+   if (s) { free(s); s=NULL; } 
+   s = strdup( tsearch2_yytext );
+   tokenlen = tsearch2_yyleng;
+   yyless( 0 );
+   token = s;
+   return CYRHYPHENWORD;
+}
+
+([[:alpha:]]+-)+[[:alpha:]]+ /* composite-word */  {
+    BEGIN DELIM;
+   if (s) { free(s); s=NULL; } 
+   s = strdup( tsearch2_yytext );
+   tokenlen = tsearch2_yyleng;
+   yyless( 0 );
+   token = s;
+   return LATHYPHENWORD;
+}
+
+({ALNUM}+-)+{ALNUM}+ /* composite-word */  {
+   BEGIN DELIM;
+   if (s) { free(s); s=NULL; } 
+   s = strdup( tsearch2_yytext );
+   tokenlen = tsearch2_yyleng;
+   yyless( 0 );
+   token = s;
+   return HYPHENWORD;
+}
+
+[0-9]+\.[0-9]+\.[0-9\.]*[0-9] {
+   token = tsearch2_yytext;
+   tokenlen = tsearch2_yyleng;
+   return VERSIONNUMBER;
+}
+
+\+?[0-9]+\.[0-9]+ {
+   token = tsearch2_yytext;
+   tokenlen = tsearch2_yyleng;
+   return DECIMAL;
+}
+
+{CYRALPHA}+  /* one word in composite-word */   { 
+   token = tsearch2_yytext; 
+   tokenlen = tsearch2_yyleng;
+   return CYRPARTHYPHENWORD; 
+}
+
+[[:alpha:]]+  /* one word in composite-word */  { 
+   token = tsearch2_yytext; 
+   tokenlen = tsearch2_yyleng;
+   return LATPARTHYPHENWORD; 
+}
+
+{ALNUM}+  /* one word in composite-word */  { 
+   token = tsearch2_yytext; 
+   tokenlen = tsearch2_yyleng;
+   return PARTHYPHENWORD; 
+}
+
+-  { 
+   token = tsearch2_yytext;
+   tokenlen = tsearch2_yyleng;
+   return SPACE;
+}
+
+.|\n /* return in basic state */ {
+   BEGIN INITIAL;
+   yyless( 0 );
+}
+
+{CYRALPHA}+ /* normal word */  { 
+   token = tsearch2_yytext; 
+   tokenlen = tsearch2_yyleng;
+   return CYRWORD; 
+}
+
+[[:alpha:]]+ /* normal word */ { 
+   token = tsearch2_yytext; 
+   tokenlen = tsearch2_yyleng;
+   return LATWORD; 
+}
+
+{ALNUM}+ /* normal word */ { 
+   token = tsearch2_yytext; 
+   tokenlen = tsearch2_yyleng;
+   return UWORD; 
+}
+
+[ \r\n\t]+ {
+   token = tsearch2_yytext;
+   tokenlen = tsearch2_yyleng;
+   return SPACE;
+}
+
+. {
+   token = tsearch2_yytext;
+   tokenlen = tsearch2_yyleng;
+   return SPACE;
+} 
+
+%%
+
+/* clearing after parsing from string */
+void end_parse() {
+   if (s) { free(s); s=NULL; } 
+   tsearch2_yy_delete_buffer( buf );
+   buf = NULL;
+} 
+
+/* start parse from string */
+void start_parse_str(char* str, int limit) {
+   if (buf) end_parse();
+   buf = tsearch2_yy_scan_bytes( str, limit );
+   tsearch2_yy_switch_to_buffer( buf );
+   BEGIN INITIAL;
+}
+
+/* start parse from filehandle */
+void start_parse_fh( FILE* fh, int limit ) {
+   if (buf) end_parse();
+   lrlimit = ( limit ) ? limit : -1;
+   buf = tsearch2_yy_create_buffer( fh, YY_BUF_SIZE );
+   tsearch2_yy_switch_to_buffer( buf );
+   BEGIN INITIAL;
+}
+
+


diff --git a/contrib/tsearch2/wparser.c b/contrib/tsearch2/wparser.c

new file mode 100644 (file)

index 0000000..deff94c


--- /dev/null
+++ b/contrib/tsearch2/wparser.c
@@ -0,0 +1,529 @@
+/* 
+ * interface functions to parser 
+ * Teodor Sigaev 
+ */
+#include 
+#include 
+#include 
+#include 
+
+#include "postgres.h"
+#include "fmgr.h"
+#include "utils/array.h"
+#include "catalog/pg_type.h"
+#include "executor/spi.h"
+#include "funcapi.h"
+
+#include "wparser.h"
+#include "ts_cfg.h"
+#include "snmap.h"
+#include "common.h"
+
+/*********top interface**********/
+
+static void *plan_getparser=NULL;
+static Oid current_parser_id=InvalidOid;
+
+void
+init_prs(Oid id, WParserInfo *prs) {
+   Oid arg[1]={ OIDOID };
+   bool isnull;
+   Datum pars[1]={ ObjectIdGetDatum(id) };
+   int stat;
+
+   memset(prs,0,sizeof(WParserInfo));
+   SPI_connect();
+   if ( !plan_getparser ) {
+       plan_getparser = SPI_saveplan( SPI_prepare( "select prs_start, prs_nexttoken, prs_end, prs_lextype, prs_headline from pg_ts_parser where oid = $1" , 1, arg ) );
+       if ( !plan_getparser ) 
+           ts_error(ERROR, "SPI_prepare() failed");
+   }
+
+   stat = SPI_execp(plan_getparser, pars, " ", 1);
+   if ( stat < 0 )
+       ts_error (ERROR, "SPI_execp return %d", stat);
+   if ( SPI_processed > 0 ) {
+       Oid oid=InvalidOid;
+       oid=DatumGetObjectId( SPI_getbinval(SPI_tuptable->vals[0], SPI_tuptable->tupdesc, 1, &isnull) );
+       fmgr_info_cxt(oid, &(prs->start_info), TopMemoryContext);
+       oid=DatumGetObjectId( SPI_getbinval(SPI_tuptable->vals[0], SPI_tuptable->tupdesc, 2, &isnull) );
+       fmgr_info_cxt(oid, &(prs->getlexeme_info), TopMemoryContext);
+       oid=DatumGetObjectId( SPI_getbinval(SPI_tuptable->vals[0], SPI_tuptable->tupdesc, 3, &isnull) );
+       fmgr_info_cxt(oid, &(prs->end_info), TopMemoryContext);
+       prs->lextype=DatumGetObjectId( SPI_getbinval(SPI_tuptable->vals[0], SPI_tuptable->tupdesc, 4, &isnull) );
+       oid=DatumGetObjectId( SPI_getbinval(SPI_tuptable->vals[0], SPI_tuptable->tupdesc, 5, &isnull) );
+       fmgr_info_cxt(oid, &(prs->headline_info), TopMemoryContext);
+       prs->prs_id=id;
+   } else 
+       ts_error(ERROR, "No parser with id %d", id);
+   SPI_finish();
+}
+
+typedef struct {
+   WParserInfo *last_prs;
+   int     len;
+   int     reallen;
+   WParserInfo *list;
+   SNMap       name2id_map;
+} PrsList;
+
+static PrsList PList = {NULL,0,0,NULL,{0,0,NULL}};
+
+void    
+reset_prs(void) {
+   freeSNMap( &(PList.name2id_map) );
+   if ( PList.list )
+       free(PList.list);
+   memset(&PList,0,sizeof(PrsList));
+}
+
+static int
+compareprs(const void *a, const void *b) {
+   return ((WParserInfo*)a)->prs_id - ((WParserInfo*)b)->prs_id;
+}
+
+WParserInfo *
+findprs(Oid id) {
+   /* last used prs */
+   if ( PList.last_prs && PList.last_prs->prs_id==id )
+       return PList.last_prs;
+
+   /* already used prs */
+   if ( PList.len != 0 ) {
+       WParserInfo key;
+       key.prs_id=id;
+       PList.last_prs = bsearch(&key, PList.list, PList.len, sizeof(WParserInfo), compareprs);
+       if ( PList.last_prs != NULL )
+           return PList.last_prs;
+   }
+
+   /* last chance */
+   if ( PList.len==PList.reallen ) {
+       WParserInfo *tmp;
+       int reallen = ( PList.reallen ) ? 2*PList.reallen : 16;
+       tmp=(WParserInfo*)realloc(PList.list,sizeof(WParserInfo)*reallen);
+       if ( !tmp ) 
+           ts_error(ERROR,"No memory");
+       PList.reallen=reallen;
+       PList.list=tmp;
+   }
+   PList.last_prs=&(PList.list[PList.len]);
+   init_prs(id, PList.last_prs);
+   PList.len++;
+   qsort(PList.list, PList.len, sizeof(WParserInfo), compareprs);
+   return findprs(id); /* qsort changed order!! */;
+}
+
+static void *plan_name2id=NULL;
+
+Oid
+name2id_prs(text *name) {
+   Oid arg[1]={ TEXTOID };
+   bool isnull;
+   Datum pars[1]={ PointerGetDatum(name) };
+   int stat;
+   Oid id=findSNMap_t( &(PList.name2id_map), name );
+   
+   if ( id ) 
+       return id;
+   
+
+   SPI_connect();
+   if ( !plan_name2id ) {
+       plan_name2id = SPI_saveplan( SPI_prepare( "select oid from pg_ts_parser where prs_name = $1" , 1, arg ) );
+       if ( !plan_name2id ) 
+           ts_error(ERROR, "SPI_prepare() failed");
+   }
+
+   stat = SPI_execp(plan_name2id, pars, " ", 1);
+   if ( stat < 0 )
+       ts_error (ERROR, "SPI_execp return %d", stat);
+   if ( SPI_processed > 0 )
+       id=DatumGetObjectId( SPI_getbinval(SPI_tuptable->vals[0], SPI_tuptable->tupdesc, 1, &isnull) );
+   else 
+       ts_error(ERROR, "No parser '%s'", text2char(name));
+   SPI_finish();
+   addSNMap_t( &(PList.name2id_map), name, id );
+   return id;
+}
+
+
+/******sql-level interface******/
+typedef struct {
+   int     cur;
+   LexDescr    *list;
+} TypeStorage;
+
+static void
+setup_firstcall(FuncCallContext  *funcctx, Oid prsid) {
+   TupleDesc            tupdesc;
+   MemoryContext     oldcontext;
+   TypeStorage     *st;
+   WParserInfo *prs = findprs(prsid); 
+
+   oldcontext = MemoryContextSwitchTo(funcctx->multi_call_memory_ctx);
+
+   st=(TypeStorage*)palloc( sizeof(TypeStorage) );
+   st->cur=0;
+   st->list = (LexDescr*)DatumGetPointer(
+       OidFunctionCall1( prs->lextype, PointerGetDatum(prs->prs) )
+   );
+   funcctx->user_fctx = (void*)st;
+   tupdesc = RelationNameGetTupleDesc("tokentype");
+   funcctx->slot = TupleDescGetSlot(tupdesc);
+   funcctx->attinmeta = TupleDescGetAttInMetadata(tupdesc);
+   MemoryContextSwitchTo(oldcontext);
+}
+
+static Datum
+process_call(FuncCallContext  *funcctx) {
+   TypeStorage     *st;
+
+   st=(TypeStorage*)funcctx->user_fctx;
+   if (  st->list && st->list[st->cur].lexid ) {
+       Datum result;
+       char* values[3];
+       char    txtid[16];
+       HeapTuple    tuple;
+
+       values[0]=txtid;
+       sprintf(txtid,"%d",st->list[st->cur].lexid);
+       values[1]=st->list[st->cur].alias;
+       values[2]=st->list[st->cur].descr;
+
+       tuple = BuildTupleFromCStrings(funcctx->attinmeta, values);
+       result = TupleGetDatum(funcctx->slot, tuple);
+
+       pfree(values[1]);
+       pfree(values[2]);
+       st->cur++;
+       return result;
+   } else {
+       if ( st->list ) pfree(st->list);
+       pfree(st);
+   }
+   return (Datum)0;
+}
+
+PG_FUNCTION_INFO_V1(token_type);
+Datum token_type(PG_FUNCTION_ARGS);
+
+Datum
+token_type(PG_FUNCTION_ARGS) {
+   FuncCallContext  *funcctx;
+   Datum result;
+
+   if (SRF_IS_FIRSTCALL()) { 
+       funcctx = SRF_FIRSTCALL_INIT();
+       setup_firstcall(funcctx, PG_GETARG_OID(0) );
+   }
+
+   funcctx = SRF_PERCALL_SETUP();
+
+   if (  (result=process_call(funcctx)) != (Datum)0 )
+       SRF_RETURN_NEXT(funcctx, result);
+   SRF_RETURN_DONE(funcctx);
+}
+
+PG_FUNCTION_INFO_V1(token_type_byname);
+Datum token_type_byname(PG_FUNCTION_ARGS);
+Datum
+token_type_byname(PG_FUNCTION_ARGS) {
+   FuncCallContext  *funcctx;
+   Datum result;
+
+   if (SRF_IS_FIRSTCALL()) {
+       text *name = PG_GETARG_TEXT_P(0); 
+       funcctx = SRF_FIRSTCALL_INIT();
+       setup_firstcall(funcctx, name2id_prs( name ) );
+       PG_FREE_IF_COPY(name,0);
+   }
+
+   funcctx = SRF_PERCALL_SETUP();
+
+   if (  (result=process_call(funcctx)) != (Datum)0 )
+       SRF_RETURN_NEXT(funcctx, result);
+   SRF_RETURN_DONE(funcctx);
+}
+
+PG_FUNCTION_INFO_V1(token_type_current);
+Datum token_type_current(PG_FUNCTION_ARGS);
+Datum
+token_type_current(PG_FUNCTION_ARGS) {
+   FuncCallContext  *funcctx;
+   Datum result;
+
+   if (SRF_IS_FIRSTCALL()) {
+       funcctx = SRF_FIRSTCALL_INIT();
+       if ( current_parser_id==InvalidOid ) 
+           current_parser_id = name2id_prs( char2text("default") );
+       setup_firstcall(funcctx, current_parser_id );
+   }
+
+   funcctx = SRF_PERCALL_SETUP();
+
+   if (  (result=process_call(funcctx)) != (Datum)0 )
+       SRF_RETURN_NEXT(funcctx, result);
+   SRF_RETURN_DONE(funcctx);
+}
+
+
+PG_FUNCTION_INFO_V1(set_curprs);
+Datum set_curprs(PG_FUNCTION_ARGS);
+Datum
+set_curprs(PG_FUNCTION_ARGS) {
+        findprs(PG_GETARG_OID(0));
+        current_parser_id=PG_GETARG_OID(0);
+        PG_RETURN_VOID();
+}
+
+PG_FUNCTION_INFO_V1(set_curprs_byname);
+Datum set_curprs_byname(PG_FUNCTION_ARGS);
+Datum
+set_curprs_byname(PG_FUNCTION_ARGS) {
+        text *name=PG_GETARG_TEXT_P(0);
+    
+        DirectFunctionCall1(
+                set_curprs,
+                ObjectIdGetDatum( name2id_prs(name) )
+        );
+        PG_FREE_IF_COPY(name, 0);
+        PG_RETURN_VOID();
+}
+
+typedef struct {
+   int type;
+   char    *lexem;
+} LexemEntry;
+
+typedef struct {
+   int cur;
+   int len;
+   LexemEntry  *list;
+} PrsStorage;
+   
+
+static void
+prs_setup_firstcall(FuncCallContext  *funcctx, int prsid, text *txt) {
+   TupleDesc            tupdesc;
+   MemoryContext     oldcontext;
+   PrsStorage  *st;
+   WParserInfo *prs = findprs(prsid); 
+   char    *lex=NULL;
+   int     llen=0, type=0; 
+
+   oldcontext = MemoryContextSwitchTo(funcctx->multi_call_memory_ctx);
+
+   st=(PrsStorage*)palloc( sizeof(PrsStorage) );
+   st->cur=0;
+   st->len=16;
+   st->list=(LexemEntry*)palloc( sizeof(LexemEntry)*st->len );
+
+   prs->prs = (void*)DatumGetPointer(
+       FunctionCall2(
+           &(prs->start_info),
+           PointerGetDatum(VARDATA(txt)),
+           Int32GetDatum(VARSIZE(txt)-VARHDRSZ)
+       )
+   );
+
+   while( ( type=DatumGetInt32(FunctionCall3(
+           &(prs->getlexeme_info),
+           PointerGetDatum(prs->prs),
+           PointerGetDatum(&lex),
+           PointerGetDatum(&llen))) ) != 0 ) {
+
+       if ( st->cur>=st->len ) {
+           st->len=2*st->len;
+           st->list=(LexemEntry*)repalloc(st->list, sizeof(LexemEntry)*st->len);
+       }
+       st->list[st->cur].lexem = palloc(llen+1);
+       memcpy( st->list[st->cur].lexem, lex, llen);
+       st->list[st->cur].lexem[llen]='\0';
+       st->list[st->cur].type=type;
+       st->cur++;
+   }
+       
+   FunctionCall1(
+       &(prs->end_info),
+       PointerGetDatum(prs->prs)
+   );
+
+   st->len=st->cur;
+   st->cur=0;
+   
+   funcctx->user_fctx = (void*)st;
+   tupdesc = RelationNameGetTupleDesc("tokenout");
+   funcctx->slot = TupleDescGetSlot(tupdesc);
+   funcctx->attinmeta = TupleDescGetAttInMetadata(tupdesc);
+   MemoryContextSwitchTo(oldcontext);
+}
+
+static Datum
+prs_process_call(FuncCallContext  *funcctx) {
+   PrsStorage  *st;
+
+   st=(PrsStorage*)funcctx->user_fctx;
+   if (  st->cur < st->len ) {
+       Datum result;
+       char* values[2];
+       char    tid[16];
+       HeapTuple    tuple;
+
+       values[0]=tid;
+       sprintf(tid,"%d",st->list[st->cur].type);
+       values[1]=st->list[st->cur].lexem;
+       tuple = BuildTupleFromCStrings(funcctx->attinmeta, values);
+       result = TupleGetDatum(funcctx->slot, tuple);
+
+       pfree(values[1]);
+       st->cur++;
+       return result;
+   } else {
+       if ( st->list ) pfree(st->list);
+       pfree(st);
+   }
+   return (Datum)0;
+}
+
+           
+
+PG_FUNCTION_INFO_V1(parse);
+Datum parse(PG_FUNCTION_ARGS);
+Datum
+parse(PG_FUNCTION_ARGS) {
+   FuncCallContext  *funcctx;
+   Datum result;
+
+   if (SRF_IS_FIRSTCALL()) {
+       text *txt = PG_GETARG_TEXT_P(1); 
+       funcctx = SRF_FIRSTCALL_INIT();
+       prs_setup_firstcall(funcctx, PG_GETARG_OID(0),txt );
+       PG_FREE_IF_COPY(txt,1);
+   }
+
+   funcctx = SRF_PERCALL_SETUP();
+
+   if (  (result=prs_process_call(funcctx)) != (Datum)0 )
+       SRF_RETURN_NEXT(funcctx, result);
+   SRF_RETURN_DONE(funcctx);
+}
+
+PG_FUNCTION_INFO_V1(parse_byname);
+Datum parse_byname(PG_FUNCTION_ARGS);
+Datum
+parse_byname(PG_FUNCTION_ARGS) {
+   FuncCallContext  *funcctx;
+   Datum result;
+
+   if (SRF_IS_FIRSTCALL()) {
+       text *name = PG_GETARG_TEXT_P(0); 
+       text *txt = PG_GETARG_TEXT_P(1); 
+       funcctx = SRF_FIRSTCALL_INIT();
+       prs_setup_firstcall(funcctx, name2id_prs( name ),txt );
+       PG_FREE_IF_COPY(name,0);
+       PG_FREE_IF_COPY(txt,1);
+   }
+
+   funcctx = SRF_PERCALL_SETUP();
+
+   if (  (result=prs_process_call(funcctx)) != (Datum)0 )
+       SRF_RETURN_NEXT(funcctx, result);
+   SRF_RETURN_DONE(funcctx);
+}
+
+
+PG_FUNCTION_INFO_V1(parse_current);
+Datum parse_current(PG_FUNCTION_ARGS);
+Datum
+parse_current(PG_FUNCTION_ARGS) {
+   FuncCallContext  *funcctx;
+   Datum result;
+
+   if (SRF_IS_FIRSTCALL()) {
+       text *txt = PG_GETARG_TEXT_P(0); 
+       funcctx = SRF_FIRSTCALL_INIT();
+       if ( current_parser_id==InvalidOid ) 
+           current_parser_id = name2id_prs( char2text("default") );
+       prs_setup_firstcall(funcctx, current_parser_id,txt );
+       PG_FREE_IF_COPY(txt,0);
+   }
+
+   funcctx = SRF_PERCALL_SETUP();
+
+   if (  (result=prs_process_call(funcctx)) != (Datum)0 )
+       SRF_RETURN_NEXT(funcctx, result);
+   SRF_RETURN_DONE(funcctx);
+}
+
+PG_FUNCTION_INFO_V1(headline);
+Datum headline(PG_FUNCTION_ARGS);
+Datum
+headline(PG_FUNCTION_ARGS) {
+   TSCfgInfo *cfg=findcfg(PG_GETARG_OID(0));
+   text       *in = PG_GETARG_TEXT_P(1);
+   QUERYTYPE  *query = (QUERYTYPE *) DatumGetPointer(PG_DETOAST_DATUM(PG_GETARG_DATUM(2)));
+   text       *opt=( PG_NARGS()>3 && PG_GETARG_POINTER(3) ) ? PG_GETARG_TEXT_P(3) : NULL;
+   HLPRSTEXT   prs;
+   text *out;
+   WParserInfo *prsobj = findprs(cfg->prs_id);
+
+   memset(&prs,0,sizeof(HLPRSTEXT));
+   prs.lenwords = 32;
+   prs.words = (HLWORD *) palloc(sizeof(HLWORD) * prs.lenwords);
+   hlparsetext(cfg, &prs, query, VARDATA(in), VARSIZE(in) - VARHDRSZ);
+
+
+   FunctionCall3(
+       &(prsobj->headline_info),
+       PointerGetDatum(&prs),
+       PointerGetDatum(opt),
+       PointerGetDatum(query)
+   );
+
+   out = genhl(&prs);
+
+   PG_FREE_IF_COPY(in,1);
+   PG_FREE_IF_COPY(query,2);
+   if ( opt ) PG_FREE_IF_COPY(opt,3);
+   pfree(prs.words);
+   pfree(prs.startsel);
+   pfree(prs.stopsel);
+
+   PG_RETURN_POINTER(out);
+}
+
+
+PG_FUNCTION_INFO_V1(headline_byname);
+Datum headline_byname(PG_FUNCTION_ARGS);
+Datum
+headline_byname(PG_FUNCTION_ARGS) {
+   text *cfg=PG_GETARG_TEXT_P(0);
+
+   Datum out=DirectFunctionCall4(
+       headline,
+       ObjectIdGetDatum(name2id_cfg( cfg ) ),
+       PG_GETARG_DATUM(1),
+       PG_GETARG_DATUM(2),
+       ( PG_NARGS()>3 ) ? PG_GETARG_DATUM(3) : PointerGetDatum(NULL)
+   );
+
+   PG_FREE_IF_COPY(cfg,0);
+   PG_RETURN_DATUM(out);   
+}
+
+PG_FUNCTION_INFO_V1(headline_current);
+Datum headline_current(PG_FUNCTION_ARGS);
+Datum
+headline_current(PG_FUNCTION_ARGS) {
+   PG_RETURN_DATUM(DirectFunctionCall4(
+       headline,
+       ObjectIdGetDatum(get_currcfg()),
+       PG_GETARG_DATUM(0),
+       PG_GETARG_DATUM(1),
+       ( PG_NARGS()>2 ) ? PG_GETARG_DATUM(2) : PointerGetDatum(NULL)
+   ));
+}
+
+
+


diff --git a/contrib/tsearch2/wparser.h b/contrib/tsearch2/wparser.h

new file mode 100644 (file)

index 0000000..a8afc56


--- /dev/null
+++ b/contrib/tsearch2/wparser.h
@@ -0,0 +1,28 @@
+#ifndef __WPARSER_H__
+#define __WPARSER_H__
+#include "postgres.h"
+#include "fmgr.h"
+
+typedef struct {
+   Oid prs_id;
+   FmgrInfo start_info;
+   FmgrInfo getlexeme_info;
+   FmgrInfo end_info;
+   FmgrInfo headline_info;
+   Oid lextype;
+   void *prs;
+} WParserInfo;
+
+void init_prs(Oid id, WParserInfo *prs);
+WParserInfo* findprs(Oid id);
+Oid name2id_prs(text *name);
+void   reset_prs(void);
+
+
+typedef struct {
+   int lexid;
+   char    *alias;
+   char    *descr;
+} LexDescr;
+
+#endif


diff --git a/contrib/tsearch2/wparser_def.c b/contrib/tsearch2/wparser_def.c

new file mode 100644 (file)

index 0000000..eec8b03


--- /dev/null
+++ b/contrib/tsearch2/wparser_def.c
@@ -0,0 +1,291 @@
+/* 
+ * default word parser 
+ * Teodor Sigaev 
+ */
+#include 
+#include 
+#include 
+
+#include "postgres.h"
+#include "utils/builtins.h"
+
+#include "dict.h"
+#include "wparser.h"
+#include "common.h"
+#include "ts_cfg.h"
+#include "wordparser/parser.h"
+#include "wordparser/deflex.h"
+
+PG_FUNCTION_INFO_V1(prsd_lextype);
+Datum prsd_lextype(PG_FUNCTION_ARGS);
+
+Datum 
+prsd_lextype(PG_FUNCTION_ARGS) {
+   LexDescr *descr=(LexDescr*)palloc(sizeof(LexDescr)*(LASTNUM+1));
+   int i;
+
+   for(i=1;i<=LASTNUM;i++) {
+       descr[i-1].lexid = i;
+       descr[i-1].alias = pstrdup(tok_alias[i]);
+       descr[i-1].descr = pstrdup(lex_descr[i]);
+   }
+   
+   descr[LASTNUM].lexid=0;
+       
+   PG_RETURN_POINTER(descr);
+}
+
+PG_FUNCTION_INFO_V1(prsd_start);
+Datum prsd_start(PG_FUNCTION_ARGS);
+Datum 
+prsd_start(PG_FUNCTION_ARGS) {
+   start_parse_str( (char*)PG_GETARG_POINTER(0), PG_GETARG_INT32(1) );
+   PG_RETURN_POINTER(NULL);
+}
+
+PG_FUNCTION_INFO_V1(prsd_getlexeme);
+Datum prsd_getlexeme(PG_FUNCTION_ARGS);
+Datum 
+prsd_getlexeme(PG_FUNCTION_ARGS) {
+   /* ParserState *p=(ParserState*)PG_GETARG_POINTER(0); */
+   char **t=(char**)PG_GETARG_POINTER(1); 
+   int *tlen=(int*)PG_GETARG_POINTER(2);
+   int  type=tsearch2_yylex();
+
+   *t = token;
+   *tlen = tokenlen;
+   PG_RETURN_INT32(type);
+}
+
+PG_FUNCTION_INFO_V1(prsd_end);
+Datum prsd_end(PG_FUNCTION_ARGS);
+Datum 
+prsd_end(PG_FUNCTION_ARGS) {
+   /* ParserState *p=(ParserState*)PG_GETARG_POINTER(0); */
+   end_parse();
+   PG_RETURN_VOID();
+}
+
+#define LEAVETOKEN(x)  ( (x)==12 )
+#define COMPLEXTOKEN(x)    ( (x)==5 || (x)==15 || (x)==16 || (x)==17 )
+#define ENDPUNCTOKEN(x)    ( (x)==12 )
+
+
+#define IDIGNORE(x) ( (x)==13 || (x)==14 || (x)==12 || (x)==23 )
+#define HLIDIGNORE(x) ( (x)==5 || (x)==13 || (x)==15 || (x)==16 || (x)==17 )
+#define NONWORDTOKEN(x)    ( (x)==12 || HLIDIGNORE(x) )
+#define NOENDTOKEN(x)  ( NONWORDTOKEN(x) || (x)==7 || (x)==8 || (x)==20 || (x)==21 || (x)==22 || IDIGNORE(x) )
+
+typedef struct {
+   HLWORD  *words;
+   int len;
+} hlCheck;
+
+static bool
+checkcondition_HL(void *checkval, ITEM *val) {
+   int i;
+   for(i=0;i<((hlCheck*)checkval)->len;i++) {
+       if ( ((hlCheck*)checkval)->words[i].item==val )
+           return true;
+   }
+   return false;
+}
+
+
+static bool
+hlCover(HLPRSTEXT *prs, QUERYTYPE *query, int *p, int *q) {
+   int i,j;
+   ITEM    *item=GETQUERY(query);
+   int pos=*p;
+   *q=0;
+   *p=0x7fffffff;
+
+   for(j=0;jsize;j++) {
+       if ( item->type != VAL ) {
+           item++;
+           continue;
+       }
+       for(i=pos;icurwords;i++) {
+           if ( prs->words[i].item == item ) {
+               if ( i>*q) 
+                   *q = i;
+               break;
+           }
+       }
+       item++;
+   }
+
+   if ( *q==0 )
+       return false;
+
+   item=GETQUERY(query);
+   for(j=0;jsize;j++) {
+       if ( item->type != VAL ) {
+           item++;
+           continue;
+       }
+       for(i=*q;i>=pos;i--) {
+           if ( prs->words[i].item == item ) {
+               if ( i<*p )
+                   *p=i;
+               break;
+           }
+       }
+       item++;
+   }   
+
+   if ( *p<=*q ) {
+       hlCheck ch={ &(prs->words[*p]), *q-*p+1 };
+       if ( TS_execute(GETQUERY(query), &ch, false, checkcondition_HL) ) { 
+           return true;
+       } else {
+           (*p)++;
+           return hlCover(prs,query,p,q);
+       }
+   }
+
+   return false;
+}
+
+PG_FUNCTION_INFO_V1(prsd_headline);
+Datum prsd_headline(PG_FUNCTION_ARGS);
+Datum 
+prsd_headline(PG_FUNCTION_ARGS) {
+   HLPRSTEXT   *prs=(HLPRSTEXT*)PG_GETARG_POINTER(0);
+   text    *opt=(text*)PG_GETARG_POINTER(1); /* can't be toasted */
+   QUERYTYPE   *query=(QUERYTYPE*)PG_GETARG_POINTER(2); /* can't be toasted */
+   /* from opt + start and and tag */
+   int min_words=15;   
+   int max_words=35;   
+   int shortword=3;    
+
+   int p=0,q=0;
+   int bestb=-1,beste=-1;
+   int bestlen=-1;
+   int pose=0, poslen, curlen;
+
+   int i;
+
+   /*config*/
+   prs->startsel=NULL;
+   prs->stopsel=NULL;
+   if ( opt ) {
+       Map *map,*mptr;
+       
+       parse_cfgdict(opt,&map);
+       mptr=map;
+
+       while(mptr && mptr->key) {
+           if ( strcasecmp(mptr->key,"MaxWords")==0 )
+               max_words=pg_atoi(mptr->value,4,1);
+           else if ( strcasecmp(mptr->key,"MinWords")==0 )
+               min_words=pg_atoi(mptr->value,4,1);
+           else if ( strcasecmp(mptr->key,"ShortWord")==0 )
+               shortword=pg_atoi(mptr->value,4,1);
+           else if ( strcasecmp(mptr->key,"StartSel")==0 )
+               prs->startsel=pstrdup(mptr->value);
+           else if ( strcasecmp(mptr->key,"StopSel")==0 )
+               prs->stopsel=pstrdup(mptr->value);
+               
+           pfree(mptr->key);
+           pfree(mptr->value);
+
+           mptr++;
+       }
+       pfree(map);
+
+       if ( min_words >= max_words )
+           elog(ERROR,"Must be MinWords < MaxWords");
+       if ( min_words<=0 )
+           elog(ERROR,"Must be MinWords > 0");
+       if ( shortword<0 )
+           elog(ERROR,"Must be ShortWord >= 0");
+   }
+
+   while( hlCover(prs,query,&p,&q) ) {
+       /* find cover len in words */
+       curlen=0;
+       poslen=0;
+       for(i=p;i<=q && curlen < max_words ; i++) {
+           if ( !NONWORDTOKEN(prs->words[i].type) ) 
+               curlen++;
+           if ( prs->words[i].item && !prs->words[i].repeated )
+               poslen++; 
+           pose=i;
+       }
+
+       if ( poslenwords[beste].type) || prs->words[beste].len <= shortword) ) { 
+           /* best already finded, so try one more cover */
+           p++;
+           continue;
+       }
+
+       if ( curlen < max_words ) { /* find good end */
+           for(i=i-1 ;icurwords && curlen
+               if ( i!=q ) {
+                   if ( !NONWORDTOKEN(prs->words[i].type) ) 
+                       curlen++;
+                   if ( prs->words[i].item && !prs->words[i].repeated )
+                       poslen++;
+               }
+               pose=i;
+               if ( NOENDTOKEN(prs->words[i].type) || prs->words[i].len <= shortword ) 
+                   continue;
+               if ( curlen>=min_words )    
+                   break;
+           }
+       } else { /* shorter cover :((( */
+           for(;curlen>min_words;i--) {
+               if ( !NONWORDTOKEN(prs->words[i].type) ) 
+                   curlen--;
+               if ( prs->words[i].item && !prs->words[i].repeated )
+                   poslen--;
+               pose=i;
+               if ( NOENDTOKEN(prs->words[i].type) || prs->words[i].len <= shortword ) 
+                   continue;
+               break;
+           }
+       }
+
+       if ( bestlen <0 || (poslen>bestlen && !(NOENDTOKEN(prs->words[pose].type) || prs->words[pose].len <= shortword)) || 
+               ( bestlen>=0 && !(NOENDTOKEN(prs->words[pose].type)  || prs->words[pose].len <= shortword) && 
+                   (NOENDTOKEN(prs->words[beste].type) || prs->words[beste].len <= shortword) ) ) {
+           bestb=p; beste=pose;
+           bestlen=poslen;
+       } 
+
+       p++;
+   }
+
+   if ( bestlen<0 ) {
+       curlen=0;
+       poslen=0;
+       for(i=0;icurwords && curlen
+           if ( !NONWORDTOKEN(prs->words[i].type) ) 
+               curlen++;
+           pose=i;
+       }
+       bestb=0; beste=pose;
+   }
+
+   for(i=bestb;i<=beste;i++) {
+       if ( prs->words[i].item )
+           prs->words[i].selected=1;
+       if ( prs->words[i].repeated )
+           prs->words[i].skip=1;
+       if ( HLIDIGNORE(prs->words[i].type) )
+           prs->words[i].replace=1;
+
+       prs->words[i].in=1;
+   }
+
+   if (!prs->startsel)
+       prs->startsel=pstrdup("");

+   if (!prs->stopsel)
+       prs->stopsel=pstrdup("");
+        prs->startsellen=strlen(prs->startsel);
+   prs->stopsellen=strlen(prs->stopsel);
+
+   PG_RETURN_POINTER(prs);
+}
+




This is the main PostgreSQL git repository.
RSS
Atom
+           if ( compareStatWord(sptr,*ptr,stat,txt) < 0 ) {
+               memcpy(nptr, sptr, sizeof(StatEntry));
+               sptr++;
+           } else {
+               nptr->nentry=POSDATALEN(txt,*ptr);
+               if ( nptr->nentry==0 )
+                   nptr->nentry=1; 
+               nptr->ndoc=1;
+               nptr->len=(*ptr)->len;
+               memcpy(curptr, STRPTR(txt) + (*ptr)->pos, nptr->len);
+               nptr->pos = curptr - STATSTRPTR(newstat);
+               curptr += nptr->len;
+               ptr++;
+           }
+           nptr++;
+       }
+
+       memcpy( nptr, sptr, sizeof(StatEntry)*( stat->size - (sptr-STATPTR(stat)) ) ); 
+       
+       while(ptr-entry
+           nptr->nentry=POSDATALEN(txt,*ptr);
+           if ( nptr->nentry==0 )
+               nptr->nentry=1; 
+           nptr->ndoc=1;
+           nptr->len=(*ptr)->len;
+           memcpy(curptr, STRPTR(txt) + (*ptr)->pos, nptr->len);
+           nptr->pos = curptr - STATSTRPTR(newstat);
+           curptr += nptr->len;
+           ptr++; nptr++;
+       }
+   }
+
+   return newstat;
+} 
+
+PG_FUNCTION_INFO_V1(ts_accum);
+Datum           ts_accum(PG_FUNCTION_ARGS);
+Datum 
+ts_accum(PG_FUNCTION_ARGS) {
+   tsstat *newstat,*stat= (tsstat*)PG_GETARG_POINTER(0);
+   tsvector  *txt = (tsvector *) PG_DETOAST_DATUM(PG_GETARG_DATUM(1));
+   WordEntry   **newentry=NULL;
+   uint32  len=0, cur=0;
+   StatEntry   *sptr;
+   WordEntry   *wptr;
+
+   if ( stat==NULL || PG_ARGISNULL(0) ) { /* Init in first */ 
+       stat=palloc(STATHDRSIZE);
+       stat->len=STATHDRSIZE;
+       stat->size=0;
+   }
+
+   /* simple check of correctness */
+   if ( txt==NULL || PG_ARGISNULL(1) || txt->size==0 ) {
+       PG_FREE_IF_COPY(txt,1); 
+       PG_RETURN_POINTER(stat);
+   }
+
+   sptr=STATPTR(stat);
+   wptr=ARRPTR(txt);
+
+   if ( stat->size < 100*txt->size ) { /* merge */
+       while( sptr-STATPTR(stat) < stat->size && wptr-ARRPTR(txt) < txt->size ) {
+           int cmp = compareStatWord(sptr,wptr,stat,txt);
+           if ( cmp<0 ) {
+               sptr++;
+           } else if ( cmp==0 ) {
+               int n=POSDATALEN(txt,wptr);
+   
+               if (n==0) n=1;
+               sptr->ndoc++;
+               sptr->nentry +=n ;
+               sptr++; wptr++;
+           } else {
+               if ( cur==len )
+                   newentry=SEI_realloc(newentry, &len);
+               newentry[cur]=wptr;
+               wptr++; cur++;
+           }
+       }
+
+       while( wptr-ARRPTR(txt) < txt->size ) {
+           if ( cur==len )
+               newentry=SEI_realloc(newentry, &len);
+           newentry[cur]=wptr;
+           wptr++; cur++;
+       }
+   } else { /* search */
+       while( wptr-ARRPTR(txt) < txt->size ) {
+           StatEntry *StopLow = STATPTR(stat);
+           StatEntry *StopHigh = (StatEntry*)STATSTRPTR(stat);
+           int cmp;
+
+           while (StopLow < StopHigh) {
+               sptr=StopLow + (StopHigh - StopLow) / 2;
+               cmp =  compareStatWord(sptr,wptr,stat,txt);
+               if (cmp==0) {
+                   int n=POSDATALEN(txt,wptr);
+                   if (n==0) n=1;
+                   sptr->ndoc++;
+                   sptr->nentry +=n ;
+                   break;
+               } else if ( cmp < 0 )
+                   StopLow = sptr + 1;
+               else
+                   StopHigh = sptr; 
+           }
+       
+           if ( StopLow >= StopHigh ) { /* not found */
+               if ( cur==len )
+                   newentry=SEI_realloc(newentry, &len);
+               newentry[cur]=wptr;
+               cur++;
+           }
+           wptr++;
+       }   
+   }
+
+   
+   if ( cur==0 ) { /* no new words */ 
+       PG_FREE_IF_COPY(txt,1);
+       PG_RETURN_POINTER(stat);
+   }
+
+   newstat = formstat(stat, txt, newentry, cur);
+   pfree(newentry);
+   PG_FREE_IF_COPY(txt,1);
+   /* pfree(stat); */
+
+   PG_RETURN_POINTER(newstat);
+}
+
+typedef struct {
+   uint32  cur;
+   tsvector *stat;
+} StatStorage;
+
+static void
+ts_setup_firstcall(FuncCallContext  *funcctx, tsstat *stat) {
+   TupleDesc            tupdesc;
+   MemoryContext     oldcontext;
+   StatStorage     *st;
+   
+   oldcontext = MemoryContextSwitchTo(funcctx->multi_call_memory_ctx);
+   st=palloc( sizeof(StatStorage) );
+   st->cur=0;
+   st->stat=palloc( stat->len );
+   memcpy(st->stat, stat, stat->len);
+   funcctx->user_fctx = (void*)st;
+   tupdesc = RelationNameGetTupleDesc("statinfo");
+   funcctx->slot = TupleDescGetSlot(tupdesc);
+   funcctx->attinmeta = TupleDescGetAttInMetadata(tupdesc);
+   MemoryContextSwitchTo(oldcontext);
+}
+
+
+static Datum
+ts_process_call(FuncCallContext  *funcctx) {
+   StatStorage     *st;
+   st=(StatStorage*)funcctx->user_fctx;
+
+   if ( st->cur < st->stat->size ) {
+       Datum result;
+       char* values[3];
+       char    ndoc[16];
+       char    nentry[16];
+       StatEntry *entry=STATPTR(st->stat) + st->cur;
+       HeapTuple    tuple;
+
+       values[1]=ndoc;
+       sprintf(ndoc,"%d",entry->ndoc);
+       values[2]=nentry;
+       sprintf(nentry,"%d",entry->nentry);
+       values[0]=palloc( entry->len+1 );
+       memcpy( values[0], STATSTRPTR(st->stat)+entry->pos, entry->len);
+       (values[0])[entry->len]='\0';
+
+       tuple = BuildTupleFromCStrings(funcctx->attinmeta, values);
+       result = TupleGetDatum(funcctx->slot, tuple);
+
+       pfree(values[0]);
+       st->cur++;
+       return result;  
+   } else {
+       pfree(st->stat);
+       pfree(st);
+   }
+   
+   return (Datum)0;
+}
+
+PG_FUNCTION_INFO_V1(ts_accum_finish);
+Datum           ts_accum_finish(PG_FUNCTION_ARGS);
+Datum 
+ts_accum_finish(PG_FUNCTION_ARGS) {
+   FuncCallContext  *funcctx;
+   Datum result;
+
+   if (SRF_IS_FIRSTCALL()) {
+       funcctx = SRF_FIRSTCALL_INIT();
+       ts_setup_firstcall(funcctx, (tsstat*)PG_GETARG_POINTER(0) );
+   }
+
+   funcctx = SRF_PERCALL_SETUP();
+   if (  (result=ts_process_call(funcctx)) != (Datum)0 )
+       SRF_RETURN_NEXT(funcctx, result);
+   SRF_RETURN_DONE(funcctx);
+}
+
+static Oid tiOid=InvalidOid;
+static void 
+get_ti_Oid(void) {
+   int ret;
+   bool isnull; 
+
+   if ( (ret = SPI_exec("select oid from pg_type where typname='tsvector'",1)) < 0 )   
+       elog(ERROR, "SPI_exec to get tsvector oid returns %d", ret);
+
+   if ( SPI_processed<0 )
+       elog(ERROR, "There is no tsvector type");
+   tiOid = DatumGetObjectId( SPI_getbinval(SPI_tuptable->vals[0], SPI_tuptable->tupdesc, 1, &isnull) );
+   if ( tiOid==InvalidOid )
+       elog(ERROR, "tsvector type has InvalidOid");
+}
+
+static tsstat*
+ts_stat_sql(text *txt) {
+   char *query=text2char(txt);
+   int i;
+   tsstat *newstat,*stat;
+   bool isnull;
+   Portal portal;
+   void    *plan;
+
+   if ( tiOid==InvalidOid ) 
+       get_ti_Oid();
+
+   if ( (plan = SPI_prepare(query,0,NULL))==NULL )
+       elog(ERROR, "SPI_prepare('%s') returns NULL",query);
+
+   if ( (portal = SPI_cursor_open(NULL, plan, NULL, NULL)) == NULL )
+       elog(ERROR, "SPI_cursor_open('%s') returns NULL",query);
+
+   SPI_cursor_fetch(portal, true, 100);
+
+   if ( SPI_tuptable->tupdesc->natts != 1 )
+       elog(ERROR, "Number of fields doesn't equal to 1");
+
+   if ( SPI_gettypeid(SPI_tuptable->tupdesc, 1) != tiOid )
+       elog(ERROR, "Column isn't of tsvector type");
+
+   stat=palloc(STATHDRSIZE);
+   stat->len=STATHDRSIZE;
+   stat->size=0;
+
+   while(SPI_processed>0) {
+       for(i=0;i
+           Datum data=SPI_getbinval(SPI_tuptable->vals[i], SPI_tuptable->tupdesc, 1, &isnull);
+
+           if ( !isnull ) {
+               newstat = (tsstat*)DatumGetPointer(DirectFunctionCall2(
+                   ts_accum,
+                   PointerGetDatum(stat),
+                   data
+               ));
+               if ( stat!=newstat && stat )
+                   pfree(stat);
+               stat=newstat;
+           }
+       } 
+
+       SPI_freetuptable(SPI_tuptable);
+       SPI_cursor_fetch(portal, true, 100);        
+   }   
+
+   SPI_freetuptable(SPI_tuptable);
+   SPI_cursor_close(portal);
+   SPI_freeplan(plan);
+   pfree(query);
+
+   return stat;    
+}
+
+PG_FUNCTION_INFO_V1(ts_stat);
+Datum           ts_stat(PG_FUNCTION_ARGS);
+Datum 
+ts_stat(PG_FUNCTION_ARGS) {
+   FuncCallContext  *funcctx;
+   Datum result;
+
+   if (SRF_IS_FIRSTCALL()) {
+       tsstat *stat;
+       text    *txt=PG_GETARG_TEXT_P(0);
+   
+       funcctx = SRF_FIRSTCALL_INIT();
+       SPI_connect();
+       stat = ts_stat_sql(txt);
+       PG_FREE_IF_COPY(txt,0); 
+       ts_setup_firstcall(funcctx, stat );
+       SPI_finish();
+   }
+
+   funcctx = SRF_PERCALL_SETUP();
+   if (  (result=ts_process_call(funcctx)) != (Datum)0 )
+       SRF_RETURN_NEXT(funcctx, result);
+   SRF_RETURN_DONE(funcctx);
+}
+
+


diff --git a/contrib/tsearch2/ts_stat.h b/contrib/tsearch2/ts_stat.h

new file mode 100644 (file)

index 0000000..c32b17a


--- /dev/null
+++ b/contrib/tsearch2/ts_stat.h
@@ -0,0 +1,32 @@
+#ifndef __TXTIDX_STAT_H__
+#define __TXTIDX_STAT_H__
+
+#include "postgres.h"
+
+#include "access/gist.h"
+#include "access/itup.h"
+#include "utils/elog.h"
+#include "utils/palloc.h"
+#include "utils/builtins.h"
+#include "storage/bufpage.h"
+
+typedef struct {
+   uint32  len;
+   uint32  pos;
+   uint32  ndoc;   
+   uint32  nentry; 
+}  StatEntry;
+
+typedef struct {
+   int4        len;
+   int4        size;
+   char        data[1];
+}  tsstat;
+
+#define STATHDRSIZE (sizeof(int4)*2)
+#define CALCSTATSIZE(x, lenstr) ( x * sizeof(StatEntry) + STATHDRSIZE + lenstr )
+#define STATPTR(x) ( (StatEntry*) ( (char*)x + STATHDRSIZE ) )
+#define STATSTRPTR(x)  ( (char*)x + STATHDRSIZE + ( sizeof(StatEntry) * ((tsvector*)x)->size ) )
+#define STATSTRSIZE(x) ( ((tsvector*)x)->len - STATHDRSIZE - ( sizeof(StatEntry) * ((tsvector*)x)->size ) )
+
+#endif


diff --git a/contrib/tsearch2/tsearch.sql._in b/contrib/tsearch2/tsearch.sql._in

new file mode 100644 (file)

index 0000000..91ffbc8


--- /dev/null
+++ b/contrib/tsearch2/tsearch.sql._in
@@ -0,0 +1,674 @@
+-- Adjust this setting to control where the objects get CREATEd.
+SET search_path = public;
+
+BEGIN;
+
+--dict conf
+CREATE TABLE pg_ts_dict (
+   dict_name   text not null primary key,
+   dict_init   oid,
+   dict_initoption text,
+   dict_lexize oid not null,
+   dict_comment    text
+) with oids;
+
+--dict interface
+CREATE FUNCTION lexize(oid, text) 
+   returns _text
+   as 'MODULE_PATHNAME'
+   language 'C'
+   with (isstrict);
+
+CREATE FUNCTION lexize(text, text)
+        returns _text
+        as 'MODULE_PATHNAME', 'lexize_byname'
+        language 'C'
+        with (isstrict);
+
+CREATE FUNCTION lexize(text)
+        returns _text
+        as 'MODULE_PATHNAME', 'lexize_bycurrent'
+        language 'C'
+        with (isstrict);
+
+CREATE FUNCTION set_curdict(int)
+   returns void
+   as 'MODULE_PATHNAME'
+   language 'C'
+   with (isstrict);
+
+CREATE FUNCTION set_curdict(text)
+   returns void
+   as 'MODULE_PATHNAME', 'set_curdict_byname'
+   language 'C'
+   with (isstrict);
+
+--built-in dictionaries
+CREATE FUNCTION dex_init(text)
+   returns internal
+   as 'MODULE_PATHNAME' 
+   language 'C';
+
+CREATE FUNCTION dex_lexize(internal,internal,int4)
+   returns internal
+   as 'MODULE_PATHNAME'
+   language 'C'
+   with (isstrict);
+
+insert into pg_ts_dict select 
+   'simple', 
+   (select oid from pg_proc where proname='dex_init'),
+   null,
+   (select oid from pg_proc where proname='dex_lexize'),
+   'Simple example of dictionary.'
+;
+    
+CREATE FUNCTION snb_en_init(text)
+   returns internal
+   as 'MODULE_PATHNAME' 
+   language 'C';
+
+CREATE FUNCTION snb_lexize(internal,internal,int4)
+   returns internal
+   as 'MODULE_PATHNAME'
+   language 'C'
+   with (isstrict);
+
+insert into pg_ts_dict select 
+   'en_stem', 
+   (select oid from pg_proc where proname='snb_en_init'),
+   'DATA_PATH/english.stop',
+   (select oid from pg_proc where proname='snb_lexize'),
+   'English Stemmer. Snowball.'
+;
+
+CREATE FUNCTION snb_ru_init(text)
+   returns internal
+   as 'MODULE_PATHNAME' 
+   language 'C';
+
+insert into pg_ts_dict select 
+   'ru_stem', 
+   (select oid from pg_proc where proname='snb_ru_init'),
+   'DATA_PATH/russian.stop',
+   (select oid from pg_proc where proname='snb_lexize'),
+   'Russian Stemmer. Snowball.'
+;
+    
+CREATE FUNCTION spell_init(text)
+   returns internal
+   as 'MODULE_PATHNAME' 
+   language 'C';
+
+CREATE FUNCTION spell_lexize(internal,internal,int4)
+   returns internal
+   as 'MODULE_PATHNAME'
+   language 'C'
+   with (isstrict);
+
+insert into pg_ts_dict select 
+   'ispell_template', 
+   (select oid from pg_proc where proname='spell_init'),
+   null,
+   (select oid from pg_proc where proname='spell_lexize'),
+   'ISpell interface. Must have .dict and .aff files'
+;
+
+CREATE FUNCTION syn_init(text)
+   returns internal
+   as 'MODULE_PATHNAME' 
+   language 'C';
+
+CREATE FUNCTION syn_lexize(internal,internal,int4)
+   returns internal
+   as 'MODULE_PATHNAME'
+   language 'C'
+   with (isstrict);
+
+insert into pg_ts_dict select 
+   'synonym', 
+   (select oid from pg_proc where proname='syn_init'),
+   null,
+   (select oid from pg_proc where proname='syn_lexize'),
+   'Example of synonym dictionary'
+;
+
+--dict conf
+CREATE TABLE pg_ts_parser (
+   prs_name    text not null primary key,
+   prs_start   oid not null,
+   prs_nexttoken   oid not null,
+   prs_end     oid not null,
+   prs_headline    oid not null,
+   prs_lextype oid not null,
+   prs_comment text
+) with oids;
+
+--sql-level interface
+CREATE TYPE tokentype 
+   as (tokid int4, alias text, descr text); 
+
+CREATE FUNCTION token_type(int4)
+   returns setof tokentype
+   as 'MODULE_PATHNAME'
+   language 'C'
+   with (isstrict);
+
+CREATE FUNCTION token_type(text)
+   returns setof tokentype
+   as 'MODULE_PATHNAME', 'token_type_byname'
+   language 'C'
+   with (isstrict);
+
+CREATE FUNCTION token_type()
+   returns setof tokentype
+   as 'MODULE_PATHNAME', 'token_type_current'
+   language 'C'
+   with (isstrict);
+
+CREATE FUNCTION set_curprs(int)
+   returns void
+   as 'MODULE_PATHNAME'
+   language 'C'
+   with (isstrict);
+
+CREATE FUNCTION set_curprs(text)
+   returns void
+   as 'MODULE_PATHNAME', 'set_curprs_byname'
+   language 'C'
+   with (isstrict);
+
+CREATE TYPE tokenout 
+   as (tokid int4, token text);
+
+CREATE FUNCTION parse(oid,text)
+   returns setof tokenout
+   as 'MODULE_PATHNAME'
+   language 'C'
+   with (isstrict);
+ 
+CREATE FUNCTION parse(text,text)
+   returns setof tokenout
+   as 'MODULE_PATHNAME', 'parse_byname'
+   language 'C'
+   with (isstrict);
+ 
+CREATE FUNCTION parse(text)
+   returns setof tokenout
+   as 'MODULE_PATHNAME', 'parse_current'
+   language 'C'
+   with (isstrict);
+ 
+--default parser
+CREATE FUNCTION prsd_start(internal,int4)
+   returns internal
+   as 'MODULE_PATHNAME'
+   language 'C';
+
+CREATE FUNCTION prsd_getlexeme(internal,internal,internal)
+   returns int4
+   as 'MODULE_PATHNAME'
+   language 'C';
+
+CREATE FUNCTION prsd_end(internal)
+   returns void
+   as 'MODULE_PATHNAME'
+   language 'C';
+
+CREATE FUNCTION prsd_lextype(internal)
+   returns internal
+   as 'MODULE_PATHNAME'
+   language 'C';
+
+CREATE FUNCTION prsd_headline(internal,internal,internal)
+   returns internal
+   as 'MODULE_PATHNAME'
+   language 'C';
+
+insert into pg_ts_parser select
+   'default',
+   (select oid from pg_proc where proname='prsd_start'),   
+   (select oid from pg_proc where proname='prsd_getlexeme'),   
+   (select oid from pg_proc where proname='prsd_end'), 
+   (select oid from pg_proc where proname='prsd_headline'),
+   (select oid from pg_proc where proname='prsd_lextype'),
+   'Parser from OpenFTS v0.34'
+;  
+
+--tsearch config
+
+CREATE TABLE pg_ts_cfg (
+   ts_name     text not null primary key,
+   prs_name    text not null,
+   locale      text
+) with oids;
+
+CREATE TABLE pg_ts_cfgmap (
+   ts_name     text not null,
+   tok_alias   text not null,
+   dict_name   text[],
+   primary key (ts_name,tok_alias)
+) with oids;
+
+CREATE FUNCTION set_curcfg(int)
+   returns void
+   as 'MODULE_PATHNAME'
+   language 'C'
+   with (isstrict);
+
+CREATE FUNCTION set_curcfg(text)
+   returns void
+   as 'MODULE_PATHNAME', 'set_curcfg_byname'
+   language 'C'
+   with (isstrict);
+
+CREATE FUNCTION show_curcfg()
+   returns oid
+   as 'MODULE_PATHNAME'
+   language 'C'
+   with (isstrict);
+
+insert into pg_ts_cfg values ('default', 'default','C');
+insert into pg_ts_cfg values ('default_russian', 'default','ru_RU.KOI8-R');
+insert into pg_ts_cfg values ('simple', 'default');
+
+copy pg_ts_cfgmap from stdin;
+default    lword   {en_stem}
+default    nlword  {simple}
+default    word    {simple}
+default    email   {simple}
+default    url {simple}
+default    host    {simple}
+default    sfloat  {simple}
+default    version {simple}
+default    part_hword  {simple}
+default    nlpart_hword    {simple}
+default    lpart_hword {en_stem}
+default    hword   {simple}
+default    lhword  {en_stem}
+default    nlhword {simple}
+default    uri {simple}
+default    file    {simple}
+default    float   {simple}
+default    int {simple}
+default    uint    {simple}
+default_russian    lword   {en_stem}
+default_russian    nlword  {ru_stem}
+default_russian    word    {ru_stem}
+default_russian    email   {simple}
+default_russian    url {simple}
+default_russian    host    {simple}
+default_russian    sfloat  {simple}
+default_russian    version {simple}
+default_russian    part_hword  {simple}
+default_russian    nlpart_hword    {ru_stem}
+default_russian    lpart_hword {en_stem}
+default_russian    hword   {ru_stem}
+default_russian    lhword  {en_stem}
+default_russian    nlhword {ru_stem}
+default_russian    uri {simple}
+default_russian    file    {simple}
+default_russian    float   {simple}
+default_russian    int {simple}
+default_russian    uint    {simple}
+simple lword   {simple}
+simple nlword  {simple}
+simple word    {simple}
+simple email   {simple}
+simple url {simple}
+simple host    {simple}
+simple sfloat  {simple}
+simple version {simple}
+simple part_hword  {simple}
+simple nlpart_hword    {simple}
+simple lpart_hword {simple}
+simple hword   {simple}
+simple lhword  {simple}
+simple nlhword {simple}
+simple uri {simple}
+simple file    {simple}
+simple float   {simple}
+simple int {simple}
+simple uint    {simple}
+\.
+
+--tsvector type
+CREATE FUNCTION tsvector_in(cstring)
+RETURNS tsvector
+AS 'MODULE_PATHNAME'
+LANGUAGE 'C' with (isstrict);
+
+CREATE FUNCTION tsvector_out(tsvector)
+RETURNS cstring
+AS 'MODULE_PATHNAME'
+LANGUAGE 'C' with (isstrict);
+
+CREATE TYPE tsvector (
+        INTERNALLENGTH = -1,
+        INPUT = tsvector_in,
+        OUTPUT = tsvector_out,
+        STORAGE = extended
+);
+
+CREATE FUNCTION length(tsvector)
+RETURNS int4
+AS 'MODULE_PATHNAME', 'tsvector_length'
+LANGUAGE 'C' with (isstrict,iscachable);
+
+CREATE FUNCTION to_tsvector(oid, text)
+RETURNS tsvector
+AS 'MODULE_PATHNAME'
+LANGUAGE 'C' with (isstrict,iscachable);
+
+CREATE FUNCTION to_tsvector(text, text)
+RETURNS tsvector
+AS 'MODULE_PATHNAME', 'to_tsvector_name'
+LANGUAGE 'C' with (isstrict,iscachable);
+
+CREATE FUNCTION to_tsvector(text)
+RETURNS tsvector
+AS 'MODULE_PATHNAME', 'to_tsvector_current'
+LANGUAGE 'C' with (isstrict,iscachable);
+
+CREATE FUNCTION strip(tsvector)
+RETURNS tsvector
+AS 'MODULE_PATHNAME'
+LANGUAGE 'C' with (isstrict,iscachable);
+
+CREATE FUNCTION setweight(tsvector,"char")
+RETURNS tsvector
+AS 'MODULE_PATHNAME'
+LANGUAGE 'C' with (isstrict,iscachable);
+
+CREATE FUNCTION concat(tsvector,tsvector)
+RETURNS tsvector
+AS 'MODULE_PATHNAME'
+LANGUAGE 'C' with (isstrict,iscachable);
+
+CREATE OPERATOR || (
+        LEFTARG = tsvector,
+        RIGHTARG = tsvector,
+        PROCEDURE = concat
+);
+
+--query type
+CREATE FUNCTION tsquery_in(cstring)
+RETURNS tsquery
+AS 'MODULE_PATHNAME'
+LANGUAGE 'C' with (isstrict);
+
+CREATE FUNCTION tsquery_out(tsquery)
+RETURNS cstring
+AS 'MODULE_PATHNAME'
+LANGUAGE 'C' with (isstrict);
+
+CREATE TYPE tsquery (
+        INTERNALLENGTH = -1,
+        INPUT = tsquery_in,
+        OUTPUT = tsquery_out
+);
+
+CREATE FUNCTION querytree(tsquery)
+RETURNS text
+AS 'MODULE_PATHNAME', 'tsquerytree'
+LANGUAGE 'C' with (isstrict);
+
+CREATE FUNCTION to_tsquery(oid, text)
+RETURNS tsquery
+AS 'MODULE_PATHNAME'
+LANGUAGE 'c' with (isstrict,iscachable);
+
+CREATE FUNCTION to_tsquery(text, text)
+RETURNS tsquery
+AS 'MODULE_PATHNAME','to_tsquery_name'
+LANGUAGE 'c' with (isstrict,iscachable);
+
+CREATE FUNCTION to_tsquery(text)
+RETURNS tsquery
+AS 'MODULE_PATHNAME','to_tsquery_current'
+LANGUAGE 'c' with (isstrict,iscachable);
+
+--operations
+CREATE FUNCTION exectsq(tsvector, tsquery)
+RETURNS bool
+AS 'MODULE_PATHNAME'
+LANGUAGE 'C' with (isstrict, iscachable);
+  
+COMMENT ON FUNCTION exectsq(tsvector, tsquery) IS 'boolean operation with text index';
+
+CREATE FUNCTION rexectsq(tsquery, tsvector)
+RETURNS bool
+AS 'MODULE_PATHNAME'
+LANGUAGE 'C' with (isstrict, iscachable);
+
+COMMENT ON FUNCTION rexectsq(tsquery, tsvector) IS 'boolean operation with text index';
+
+CREATE OPERATOR @@ (
+        LEFTARG = tsvector,
+        RIGHTARG = tsquery,
+        PROCEDURE = exectsq,
+        COMMUTATOR = '@@',
+        RESTRICT = contsel,
+        JOIN = contjoinsel
+);
+CREATE OPERATOR @@ (
+        LEFTARG = tsquery,
+        RIGHTARG = tsvector,
+        PROCEDURE = rexectsq,
+        COMMUTATOR = '@@',
+        RESTRICT = contsel,
+        JOIN = contjoinsel
+);
+
+--Trigger
+CREATE FUNCTION tsearch2()
+RETURNS trigger
+AS 'MODULE_PATHNAME'
+LANGUAGE 'C';
+
+--Relevation
+CREATE FUNCTION rank(float4[], tsvector, tsquery)
+RETURNS float4
+AS 'MODULE_PATHNAME'
+LANGUAGE 'C' WITH (isstrict, iscachable);
+
+CREATE FUNCTION rank(float4[], tsvector, tsquery, int4)
+RETURNS float4
+AS 'MODULE_PATHNAME'
+LANGUAGE 'C' WITH (isstrict, iscachable);
+
+CREATE FUNCTION rank(tsvector, tsquery)
+RETURNS float4
+AS 'MODULE_PATHNAME', 'rank_def'
+LANGUAGE 'C' WITH (isstrict, iscachable);
+
+CREATE FUNCTION rank(tsvector, tsquery, int4)
+RETURNS float4
+AS 'MODULE_PATHNAME', 'rank_def'
+LANGUAGE 'C' WITH (isstrict, iscachable);
+
+CREATE FUNCTION rank_cd(int4, tsvector, tsquery)
+RETURNS float4
+AS 'MODULE_PATHNAME'
+LANGUAGE 'C' WITH (isstrict, iscachable);
+
+CREATE FUNCTION rank_cd(int4, tsvector, tsquery, int4)
+RETURNS float4
+AS 'MODULE_PATHNAME'
+LANGUAGE 'C' WITH (isstrict, iscachable);
+
+CREATE FUNCTION rank_cd(tsvector, tsquery)
+RETURNS float4
+AS 'MODULE_PATHNAME', 'rank_cd_def'
+LANGUAGE 'C' WITH (isstrict, iscachable);
+
+CREATE FUNCTION rank_cd(tsvector, tsquery, int4)
+RETURNS float4
+AS 'MODULE_PATHNAME', 'rank_cd_def'
+LANGUAGE 'C' WITH (isstrict, iscachable);
+
+CREATE FUNCTION headline(oid, text, tsquery, text)
+RETURNS text
+AS 'MODULE_PATHNAME', 'headline'
+LANGUAGE 'C' WITH (isstrict, iscachable);
+
+CREATE FUNCTION headline(oid, text, tsquery)
+RETURNS text
+AS 'MODULE_PATHNAME', 'headline'
+LANGUAGE 'C' WITH (isstrict, iscachable);
+
+CREATE FUNCTION headline(text, text, tsquery, text)
+RETURNS text
+AS 'MODULE_PATHNAME', 'headline_byname'
+LANGUAGE 'C' WITH (isstrict, iscachable);
+
+CREATE FUNCTION headline(text, text, tsquery)
+RETURNS text
+AS 'MODULE_PATHNAME', 'headline_byname'
+LANGUAGE 'C' WITH (isstrict, iscachable);
+
+CREATE FUNCTION headline(text, tsquery, text)
+RETURNS text
+AS 'MODULE_PATHNAME', 'headline_current'
+LANGUAGE 'C' WITH (isstrict, iscachable);
+
+CREATE FUNCTION headline(text, tsquery)
+RETURNS text
+AS 'MODULE_PATHNAME', 'headline_current'
+LANGUAGE 'C' WITH (isstrict, iscachable);
+
+--GiST
+--GiST key type 
+CREATE FUNCTION gtsvector_in(cstring)
+RETURNS gtsvector
+AS 'MODULE_PATHNAME'
+LANGUAGE 'C' with (isstrict);
+
+CREATE FUNCTION gtsvector_out(gtsvector)
+RETURNS cstring
+AS 'MODULE_PATHNAME'
+LANGUAGE 'C' with (isstrict);
+
+CREATE TYPE gtsvector (
+        INTERNALLENGTH = -1,
+        INPUT = gtsvector_in,
+        OUTPUT = gtsvector_out
+);
+
+-- support FUNCTIONs
+CREATE FUNCTION gtsvector_consistent(gtsvector,internal,int4)
+RETURNS bool
+AS 'MODULE_PATHNAME'
+LANGUAGE 'C';
+  
+CREATE FUNCTION gtsvector_compress(internal)
+RETURNS internal
+AS 'MODULE_PATHNAME'
+LANGUAGE 'C';
+
+CREATE FUNCTION gtsvector_decompress(internal)
+RETURNS internal
+AS 'MODULE_PATHNAME'
+LANGUAGE 'C';
+
+CREATE FUNCTION gtsvector_penalty(internal,internal,internal)
+RETURNS internal
+AS 'MODULE_PATHNAME'
+LANGUAGE 'C' with (isstrict);
+
+CREATE FUNCTION gtsvector_picksplit(internal, internal)
+RETURNS internal
+AS 'MODULE_PATHNAME'
+LANGUAGE 'C';
+
+CREATE FUNCTION gtsvector_union(bytea, internal)
+RETURNS _int4
+AS 'MODULE_PATHNAME'
+LANGUAGE 'C';
+
+CREATE FUNCTION gtsvector_same(gtsvector, gtsvector, internal)
+RETURNS internal
+AS 'MODULE_PATHNAME'
+LANGUAGE 'C';
+
+-- CREATE the OPERATOR class
+CREATE OPERATOR CLASS gist_tsvector_ops
+DEFAULT FOR TYPE tsvector USING gist
+AS
+        OPERATOR        1       @@ (tsvector, tsquery)  RECHECK ,
+        FUNCTION        1       gtsvector_consistent (gtsvector, internal, int4),
+        FUNCTION        2       gtsvector_union (bytea, internal),
+        FUNCTION        3       gtsvector_compress (internal),
+        FUNCTION        4       gtsvector_decompress (internal),
+        FUNCTION        5       gtsvector_penalty (internal, internal, internal),
+        FUNCTION        6       gtsvector_picksplit (internal, internal),
+        FUNCTION        7       gtsvector_same (gtsvector, gtsvector, internal),
+        STORAGE         gtsvector;
+
+
+--stat info
+CREATE TYPE statinfo 
+   as (word text, ndoc int4, nentry int4);
+
+--REATE FUNCTION tsstat_in(cstring)
+--RETURNS tsstat
+--AS 'MODULE_PATHNAME'
+--LANGUAGE 'C' with (isstrict);
+--
+--CREATE FUNCTION tsstat_out(tsstat)
+--RETURNS cstring
+--AS 'MODULE_PATHNAME'
+--LANGUAGE 'C' with (isstrict);
+--
+--CREATE TYPE tsstat (
+--        INTERNALLENGTH = -1,
+--        INPUT = tsstat_in,
+--        OUTPUT = tsstat_out,
+--        STORAGE = plain
+--);
+--
+--CREATE FUNCTION ts_accum(tsstat,tsvector)
+--RETURNS tsstat
+--AS 'MODULE_PATHNAME'
+--LANGUAGE 'C' with (isstrict);
+--
+--CREATE FUNCTION ts_accum_finish(tsstat)
+-- returns setof statinfo
+-- as 'MODULE_PATHNAME'
+-- language 'C'
+-- with (isstrict);
+--
+--CREATE AGGREGATE stat (
+-- BASETYPE=tsvector,
+-- SFUNC=ts_accum,
+-- STYPE=tsstat,
+-- FINALFUNC = ts_accum_finish,
+-- initcond = ''
+--); 
+
+CREATE FUNCTION stat(text)
+   returns setof statinfo
+   as 'MODULE_PATHNAME', 'ts_stat'
+   language 'C'
+   with (isstrict);
+
+--reset - just for debuging
+CREATE FUNCTION reset_tsearch()
+        returns void
+        as 'MODULE_PATHNAME'
+        language 'C'
+        with (isstrict);
+
+--get cover (debug for rank_cd)
+CREATE FUNCTION get_covers(tsvector,tsquery)
+        returns text
+        as 'MODULE_PATHNAME'
+        language 'C'
+        with (isstrict);
+
+
+--example of ISpell dictionary
+--update pg_ts_dict set dict_initoption='DictFile="/usr/local/share/ispell/russian.dict" ,AffFile ="/usr/local/share/ispell/russian.aff", StopFile="/usr/local/share/ispell/russian.stop"' where dict_id=4;
+--example of synonym dict
+--update pg_ts_dict set dict_initoption='/usr/local/share/ispell/english.syn' where dict_id=5;
+END;


diff --git a/contrib/tsearch2/tsvector.c b/contrib/tsearch2/tsvector.c

new file mode 100644 (file)

index 0000000..ff0794d


--- /dev/null
+++ b/contrib/tsearch2/tsvector.c
@@ -0,0 +1,804 @@
+/*
+ * In/Out definitions for tsvector type
+ * Internal structure:
+ * string of values, array of position lexem in string and it's length
+ * Teodor Sigaev 
+ */
+#include "postgres.h"
+
+#include "access/gist.h"
+#include "access/itup.h"
+#include "utils/elog.h"
+#include "utils/palloc.h"
+#include "utils/builtins.h"
+#include "storage/bufpage.h"
+#include "executor/spi.h"
+#include "commands/trigger.h"
+#include "nodes/pg_list.h"
+#include "catalog/namespace.h"
+
+#include "utils/pg_locale.h"
+
+#include              /* tolower */
+#include "tsvector.h"
+#include "query.h"
+#include "ts_cfg.h"
+#include "common.h"
+
+PG_FUNCTION_INFO_V1(tsvector_in);
+Datum      tsvector_in(PG_FUNCTION_ARGS);
+
+PG_FUNCTION_INFO_V1(tsvector_out);
+Datum      tsvector_out(PG_FUNCTION_ARGS);
+
+PG_FUNCTION_INFO_V1(to_tsvector);
+Datum      to_tsvector(PG_FUNCTION_ARGS);
+PG_FUNCTION_INFO_V1(to_tsvector_current);
+Datum      to_tsvector_current(PG_FUNCTION_ARGS);
+PG_FUNCTION_INFO_V1(to_tsvector_name);
+Datum      to_tsvector_name(PG_FUNCTION_ARGS);
+
+PG_FUNCTION_INFO_V1(tsearch2);
+Datum      tsearch2(PG_FUNCTION_ARGS);
+
+PG_FUNCTION_INFO_V1(tsvector_length);
+Datum      tsvector_length(PG_FUNCTION_ARGS);
+
+/*
+ * in/out text index type
+ */
+static int 
+comparePos(const void *a, const void *b) {
+   if ( ((WordEntryPos *) a)->pos == ((WordEntryPos *) b)->pos )
+       return 1;
+   return ( ((WordEntryPos *) a)->pos > ((WordEntryPos *) b)->pos ) ? 1 : -1;
+}
+
+static int
+uniquePos(WordEntryPos *a, int4 l) {
+   WordEntryPos *ptr, *res;
+
+   res=a;
+   if (l==1)
+       return l;
+
+   qsort((void *) a, l, sizeof(WordEntryPos), comparePos);
+
+   ptr = a + 1;
+   while (ptr - a < l) {
+       if ( ptr->pos != res->pos ) {
+           res++;
+           res->pos = ptr->pos;
+           res->weight = ptr->weight;
+           if ( res-a >= MAXNUMPOS-1 || res->pos == MAXENTRYPOS-1 )
+               break;
+       } else if ( ptr->weight > res->weight )
+           res->weight = ptr->weight;
+       ptr++;
+   }
+   return res + 1 - a;
+}
+
+static char *BufferStr;
+static int
+compareentry(const void *a, const void *b)
+{
+   if ( ((WordEntryIN *) a)->entry.len == ((WordEntryIN *) b)->entry.len)
+   {
+       return strncmp(
+                      &BufferStr[((WordEntryIN *) a)->entry.pos],
+                      &BufferStr[((WordEntryIN *) b)->entry.pos],
+                      ((WordEntryIN *) a)->entry.len);
+   }
+   return ( ((WordEntryIN *) a)->entry.len > ((WordEntryIN *) b)->entry.len ) ? 1 : -1;
+}
+
+static int
+uniqueentry(WordEntryIN * a, int4 l, char *buf, int4 *outbuflen)
+{
+   WordEntryIN  *ptr,
+              *res;
+
+   res = a;
+   if (l == 1) {
+       if ( a->entry.haspos ) {
+           *(uint16*)(a->pos) = uniquePos( &(a->pos[1]), *(uint16*)(a->pos));
+           *outbuflen = SHORTALIGN(res->entry.len) + (*(uint16*)(a->pos) +1 )*sizeof(WordEntryPos);
+       }
+       return l;
+   }
+
+   ptr = a + 1;
+   BufferStr = buf;
+   qsort((void *) a, l, sizeof(WordEntryIN), compareentry);
+
+   while (ptr - a < l)
+   {
+       if (!(ptr->entry.len == res->entry.len &&
+             strncmp(&buf[ptr->entry.pos], &buf[res->entry.pos], res->entry.len) == 0))
+       {
+           if ( res->entry.haspos ) {
+               *(uint16*)(res->pos) = uniquePos( &(res->pos[1]), *(uint16*)(res->pos));
+               *outbuflen += *(uint16*)(res->pos) * sizeof(WordEntryPos);
+           }
+           *outbuflen += SHORTALIGN(res->entry.len);
+           res++;
+           memcpy(res,ptr,sizeof(WordEntryIN));
+       } else if ( ptr->entry.haspos ){
+           if ( res->entry.haspos ) {
+               int4 len=*(uint16*)(ptr->pos) + 1 + *(uint16*)(res->pos);
+               res->pos=(WordEntryPos*)repalloc( res->pos, len*sizeof(WordEntryPos));
+               memcpy( &(res->pos[ *(uint16*)(res->pos) + 1 ]), 
+                   &(ptr->pos[1]), *(uint16*)(ptr->pos) * sizeof(WordEntryPos));
+               *(uint16*)(res->pos) += *(uint16*)(ptr->pos);
+               pfree( ptr->pos );
+           } else {
+               res->entry.haspos=1;
+               res->pos = ptr->pos;
+           }
+       }
+       ptr++;
+   }
+   if ( res->entry.haspos ) {
+       *(uint16*)(res->pos) = uniquePos( &(res->pos[1]), *(uint16*)(res->pos));
+       *outbuflen += *(uint16*)(res->pos) * sizeof(WordEntryPos);
+   }
+   *outbuflen += SHORTALIGN(res->entry.len);
+
+   return res + 1 - a;
+}
+
+#define WAITWORD   1
+#define WAITENDWORD 2
+#define WAITNEXTCHAR   3
+#define WAITENDCMPLX   4
+#define WAITPOSINFO    5
+#define INPOSINFO  6
+#define WAITPOSDELIM   7
+
+#define RESIZEPRSBUF \
+do { \
+   if ( state->curpos - state->word + 1 >= state->len ) \
+   { \
+       int4 clen = state->curpos - state->word; \
+       state->len *= 2; \
+       state->word = (char*)repalloc( (void*)state->word, state->len ); \
+       state->curpos = state->word + clen; \
+   } \
+} while (0)
+
+int4
+gettoken_tsvector(TI_IN_STATE * state)
+{
+   int4        oldstate = 0;
+
+   state->curpos = state->word;
+   state->state = WAITWORD;
+   state->alen=0;
+
+   while (1)
+   {
+       if (state->state == WAITWORD)
+       {
+           if (*(state->prsbuf) == '\0')
+               return 0;
+           else if (*(state->prsbuf) == '\'')
+               state->state = WAITENDCMPLX;
+           else if (*(state->prsbuf) == '\\')
+           {
+               state->state = WAITNEXTCHAR;
+               oldstate = WAITENDWORD;
+           }
+           else if (state->oprisdelim && ISOPERATOR(*(state->prsbuf)))
+               elog(ERROR, "Syntax error");
+           else if (*(state->prsbuf) != ' ')
+           {
+               *(state->curpos) = *(state->prsbuf);
+               state->curpos++;
+               state->state = WAITENDWORD;
+           }
+       }
+       else if (state->state == WAITNEXTCHAR)
+       {
+           if (*(state->prsbuf) == '\0')
+               elog(ERROR, "There is no escaped character");
+           else
+           {
+               RESIZEPRSBUF;
+               *(state->curpos) = *(state->prsbuf);
+               state->curpos++;
+               state->state = oldstate;
+           }
+       }
+       else if (state->state == WAITENDWORD)
+       {
+           if (*(state->prsbuf) == '\\')
+           {
+               state->state = WAITNEXTCHAR;
+               oldstate = WAITENDWORD;
+           }
+           else if (*(state->prsbuf) == ' ' || *(state->prsbuf) == '\0' ||
+                    (state->oprisdelim && ISOPERATOR(*(state->prsbuf))))
+           {
+               RESIZEPRSBUF;
+               if (state->curpos == state->word)
+                   elog(ERROR, "Syntax error");
+               *(state->curpos) = '\0';
+               return 1; 
+           } else if ( *(state->prsbuf) == ':' ) {
+               if (state->curpos == state->word)
+                   elog(ERROR, "Syntax error");
+               *(state->curpos) = '\0';
+               if ( state->oprisdelim )
+                   return 1;
+               else
+                   state->state = INPOSINFO;
+           }
+           else
+           {
+               RESIZEPRSBUF;
+               *(state->curpos) = *(state->prsbuf);
+               state->curpos++;
+           }
+       }
+       else if (state->state == WAITENDCMPLX)
+       {
+           if (*(state->prsbuf) == '\'')
+           {
+               RESIZEPRSBUF;
+               *(state->curpos) = '\0';
+               if (state->curpos == state->word)
+                   elog(ERROR, "Syntax error");
+               if ( state->oprisdelim ) {
+                   state->prsbuf++;
+                   return 1;
+               } else
+                   state->state = WAITPOSINFO;
+           }
+           else if (*(state->prsbuf) == '\\')
+           {
+               state->state = WAITNEXTCHAR;
+               oldstate = WAITENDCMPLX;
+           }
+           else if (*(state->prsbuf) == '\0')
+               elog(ERROR, "Syntax error");
+           else
+           {
+               RESIZEPRSBUF;
+               *(state->curpos) = *(state->prsbuf);
+               state->curpos++;
+           }
+       } else if (state->state == WAITPOSINFO) {
+           if ( *(state->prsbuf) == ':' )
+               state->state=INPOSINFO;
+           else
+               return 1;
+       } else if (state->state == INPOSINFO) {
+           if ( isdigit(*(state->prsbuf)) ) {
+               if ( state->alen==0 ) {
+                   state->alen=4;
+                   state->pos = (WordEntryPos*)palloc( sizeof(WordEntryPos)*state->alen );
+                   *(uint16*)(state->pos)=0;
+               } else if ( *(uint16*)(state->pos) +1 >= state->alen ) {
+                   state->alen *= 2; 
+                   state->pos = (WordEntryPos*)repalloc( state->pos, sizeof(WordEntryPos)*state->alen );
+               }
+               (  *(uint16*)(state->pos) )++;
+               state->pos[ *(uint16*)(state->pos) ].pos = LIMITPOS(atoi(state->prsbuf));
+               if ( state->pos[ *(uint16*)(state->pos) ].pos == 0 )
+                   elog(ERROR,"Wrong position info");
+               state->pos[ *(uint16*)(state->pos) ].weight = 0;
+               state->state = WAITPOSDELIM;
+           } else
+               elog(ERROR,"Syntax error");
+       } else if (state->state == WAITPOSDELIM) {
+           if ( *(state->prsbuf) == ',' ) {
+               state->state = INPOSINFO;
+           } else if ( tolower(*(state->prsbuf)) == 'a' || *(state->prsbuf)=='*' ) {
+               if ( state->pos[ *(uint16*)(state->pos) ].weight )
+                   elog(ERROR,"Syntax error");
+               state->pos[ *(uint16*)(state->pos) ].weight = 3;
+           } else if ( tolower(*(state->prsbuf)) == 'b' ) {
+               if ( state->pos[ *(uint16*)(state->pos) ].weight )
+                   elog(ERROR,"Syntax error");
+               state->pos[ *(uint16*)(state->pos) ].weight = 2;
+           } else if ( tolower(*(state->prsbuf)) == 'c' ) {
+               if ( state->pos[ *(uint16*)(state->pos) ].weight )
+                   elog(ERROR,"Syntax error");
+               state->pos[ *(uint16*)(state->pos) ].weight = 1;
+           } else if ( tolower(*(state->prsbuf)) == 'd' ) {
+               if ( state->pos[ *(uint16*)(state->pos) ].weight )
+                   elog(ERROR,"Syntax error");
+               state->pos[ *(uint16*)(state->pos) ].weight = 0;
+           } else if ( isspace(*(state->prsbuf)) || *(state->prsbuf) == '\0' ) {
+               return 1;
+           } else if ( !isdigit(*(state->prsbuf)) )
+               elog(ERROR,"Syntax error");
+       } else
+           elog(ERROR, "Inner bug :(");
+       state->prsbuf++;
+   }
+
+   return 0;
+}
+
+Datum
+tsvector_in(PG_FUNCTION_ARGS)
+{
+   char       *buf = PG_GETARG_CSTRING(0);
+   TI_IN_STATE state;
+   WordEntryIN  *arr;
+   WordEntry  *inarr;
+   int4        len = 0,
+               totallen = 64;
+   tsvector       *in;
+   char       *tmpbuf,
+              *cur;
+   int4        i,
+               buflen = 256;
+
+   state.prsbuf = buf;
+   state.len = 32;
+   state.word = (char *) palloc(state.len);
+   state.oprisdelim = false;
+
+   arr = (WordEntryIN *) palloc(sizeof(WordEntryIN) * totallen);
+   cur = tmpbuf = (char *) palloc(buflen);
+   while (gettoken_tsvector(&state))
+   {
+       if (len >= totallen)
+       {
+           totallen *= 2;
+           arr = (WordEntryIN *) repalloc((void *) arr, sizeof(WordEntryIN) * totallen);
+       }
+       while ((cur - tmpbuf) + (state.curpos - state.word) >= buflen)
+       {
+           int4        dist = cur - tmpbuf;
+
+           buflen *= 2;
+           tmpbuf = (char *) repalloc((void *) tmpbuf, buflen);
+           cur = tmpbuf + dist;
+       }
+       if (state.curpos - state.word >= MAXSTRLEN)
+           elog(ERROR, "Word is too long");
+       arr[len].entry.len= state.curpos - state.word;
+       if (cur - tmpbuf > MAXSTRPOS)
+           elog(ERROR, "Too long value");
+       arr[len].entry.pos=cur - tmpbuf;
+       memcpy((void *) cur, (void *) state.word, arr[len].entry.len);
+       cur += arr[len].entry.len;
+       if ( state.alen ) {
+           arr[len].entry.haspos=1;
+           arr[len].pos = state.pos;
+       } else
+           arr[len].entry.haspos=0;
+       len++;
+   }
+   pfree(state.word);
+
+   if ( len > 0 )
+       len = uniqueentry(arr, len, tmpbuf, &buflen);
+   totallen = CALCDATASIZE(len, buflen);
+   in = (tsvector *) palloc(totallen);
+   memset(in,0,totallen);
+   in->len = totallen;
+   in->size = len;
+   cur = STRPTR(in);
+   inarr = ARRPTR(in);
+   for (i = 0; i < len; i++)
+   {
+       memcpy((void *) cur, (void *) &tmpbuf[arr[i].entry.pos], arr[i].entry.len);
+       arr[i].entry.pos=cur - STRPTR(in);
+       cur += SHORTALIGN(arr[i].entry.len);
+       if ( arr[i].entry.haspos ) {
+           memcpy( cur, arr[i].pos, (*(uint16*)arr[i].pos + 1) * sizeof(WordEntryPos));
+           cur +=  (*(uint16*)arr[i].pos + 1) * sizeof(WordEntryPos);
+           pfree( arr[i].pos ); 
+       }
+       memcpy( &(inarr[i]), &(arr[i].entry), sizeof(WordEntry) );
+   }
+   pfree(tmpbuf);
+   pfree(arr);
+   PG_RETURN_POINTER(in);
+}
+
+Datum
+tsvector_length(PG_FUNCTION_ARGS)
+{
+   tsvector       *in = (tsvector *) PG_DETOAST_DATUM(PG_GETARG_DATUM(0));
+   int4        ret = in->size;
+
+   PG_FREE_IF_COPY(in, 0);
+   PG_RETURN_INT32(ret);
+}
+
+Datum
+tsvector_out(PG_FUNCTION_ARGS)
+{
+   tsvector       *out = (tsvector *) PG_DETOAST_DATUM(PG_GETARG_DATUM(0));
+   char       *outbuf;
+   int4        i,
+               j,
+               lenbuf = 0, pp;
+   WordEntry  *ptr = ARRPTR(out);
+   char       *curin,
+              *curout;
+
+       lenbuf=out->size * 2 /* '' */ + out->size - 1 /* space */ + 2 /*\0*/;
+       for (i = 0; i < out->size; i++) {
+               lenbuf += ptr[i].len*2 /*for escape */;
+               if ( ptr[i].haspos )
+                       lenbuf += 7*POSDATALEN(out, &(ptr[i]));
+       }
+
+   curout = outbuf = (char *) palloc(lenbuf);
+   for (i = 0; i < out->size; i++)
+   {
+       curin = STRPTR(out)+ptr->pos;
+       if (i != 0)
+           *curout++ = ' ';
+       *curout++ = '\'';
+       j = ptr->len;
+       while (j--)
+       {
+           if (*curin == '\'')
+           {
+               int4        pos = curout - outbuf;
+
+               outbuf = (char *) repalloc((void *) outbuf, ++lenbuf);
+               curout = outbuf + pos;
+               *curout++ = '\\';
+           }
+           *curout++ = *curin++;
+       }
+       *curout++ = '\'';
+       if ( (pp=POSDATALEN(out,ptr)) != 0 ) {
+           WordEntryPos *wptr;
+           *curout++ = ':';
+           wptr=POSDATAPTR(out,ptr);
+           while(pp) {
+               sprintf(curout,"%d",wptr->pos);
+               curout=strchr(curout,'\0');
+               switch( wptr->weight ) {
+                   case 3:   *curout++ = 'A'; break;
+                   case 2:   *curout++ = 'B'; break;
+                   case 1:   *curout++ = 'C'; break;
+                   case 0: 
+                   default: break;
+               }
+               if ( pp>1 )     *curout++ = ',';
+               pp--; wptr++;
+           }
+       }
+       ptr++;
+   }
+   *curout='\0';
+   outbuf[lenbuf - 1] = '\0';
+   PG_FREE_IF_COPY(out, 0);
+   PG_RETURN_POINTER(outbuf);
+}
+
+static int
+compareWORD(const void *a, const void *b)
+{
+   if (((WORD *) a)->len == ((WORD *) b)->len) {
+       int res = strncmp(
+                      ((WORD *) a)->word,
+                      ((WORD *) b)->word,
+                      ((WORD *) b)->len);
+       if ( res==0 ) 
+           return ( ((WORD *) a)->pos.pos > ((WORD *) b)->pos.pos ) ? 1 : -1;
+       return res;
+   }
+   return (((WORD *) a)->len > ((WORD *) b)->len) ? 1 : -1;
+}
+
+static int
+uniqueWORD(WORD * a, int4 l)
+{
+   WORD       *ptr,
+              *res;
+   int tmppos;
+
+   if (l == 1) {
+       tmppos=LIMITPOS(a->pos.pos);
+       a->alen=2;
+       a->pos.apos=(uint16*)palloc( sizeof(uint16)*a->alen );
+       a->pos.apos[0]=1;
+       a->pos.apos[1]=tmppos;
+       return l;
+   }
+
+   res = a;
+   ptr = a + 1;
+
+   qsort((void *) a, l, sizeof(WORD), compareWORD);
+   tmppos=LIMITPOS(a->pos.pos);
+   a->alen=2;
+   a->pos.apos=(uint16*)palloc( sizeof(uint16)*a->alen );
+   a->pos.apos[0]=1;
+   a->pos.apos[1]=tmppos;
+
+   while (ptr - a < l)
+   {
+       if (!(ptr->len == res->len &&
+             strncmp(ptr->word, res->word, res->len) == 0))
+       {
+           res++;
+           res->len = ptr->len;
+           res->word = ptr->word;
+           tmppos=LIMITPOS(ptr->pos.pos);
+           res->alen=2;
+           res->pos.apos=(uint16*)palloc( sizeof(uint16)*res->alen );
+           res->pos.apos[0]=1;
+           res->pos.apos[1]=tmppos;
+       } else {
+           pfree(ptr->word);
+           if ( res->pos.apos[0] < MAXNUMPOS-1 && res->pos.apos[ res->pos.apos[0] ] != MAXENTRYPOS-1 ) {
+               if ( res->pos.apos[0]+1 >= res->alen ) {
+                   res->alen*=2;
+                   res->pos.apos=(uint16*)repalloc( res->pos.apos, sizeof(uint16)*res->alen );
+               }
+               res->pos.apos[ res->pos.apos[0]+1 ] = LIMITPOS(ptr->pos.pos);
+               res->pos.apos[0]++; 
+           }
+       }
+       ptr++;
+   }
+
+   return res + 1 - a;
+}
+
+/*
+ * make value of tsvector
+ */
+static tsvector *
+makevalue(PRSTEXT * prs)
+{
+   int4        i,j,
+               lenstr = 0,
+               totallen;
+   tsvector       *in;
+   WordEntry  *ptr;
+   char       *str,
+              *cur;
+
+   prs->curwords = uniqueWORD(prs->words, prs->curwords);
+   for (i = 0; i < prs->curwords; i++) {
+       lenstr += SHORTALIGN(prs->words[i].len);
+
+       if ( prs->words[i].alen )
+           lenstr += sizeof(uint16) + prs->words[i].pos.apos[0] * sizeof(WordEntryPos);
+   }
+
+   totallen = CALCDATASIZE(prs->curwords, lenstr);
+   in = (tsvector *) palloc(totallen);
+   memset(in,0,totallen);  
+   in->len = totallen;
+   in->size = prs->curwords;
+
+   ptr = ARRPTR(in);
+   cur = str = STRPTR(in);
+   for (i = 0; i < prs->curwords; i++)
+   {
+       ptr->len = prs->words[i].len;
+       if (cur - str > MAXSTRPOS)
+           elog(ERROR, "Value is too big");
+       ptr->pos= cur - str;
+       memcpy((void *) cur, (void *) prs->words[i].word, prs->words[i].len);
+       pfree(prs->words[i].word);
+       cur += SHORTALIGN(prs->words[i].len);
+       if ( prs->words[i].alen ) {
+           WordEntryPos *wptr;
+           
+           ptr->haspos=1;
+           *(uint16*)cur = prs->words[i].pos.apos[0];
+           wptr=POSDATAPTR(in,ptr);
+           for(j=0;j<*(uint16*)cur;j++) {
+               wptr[j].weight=0;
+               wptr[j].pos=prs->words[i].pos.apos[j+1];
+           }
+           cur += sizeof(uint16) + prs->words[i].pos.apos[0] * sizeof(WordEntryPos);
+           pfree(prs->words[i].pos.apos);
+       } else
+           ptr->haspos=0;
+       ptr++;
+   }
+   pfree(prs->words);
+   return in;
+}
+
+
+Datum
+to_tsvector(PG_FUNCTION_ARGS)
+{
+   text       *in = PG_GETARG_TEXT_P(1);
+   PRSTEXT     prs;
+   tsvector       *out = NULL;
+   TSCfgInfo *cfg=findcfg(PG_GETARG_INT32(0)); 
+
+   prs.lenwords = 32;
+   prs.curwords = 0;
+   prs.pos = 0;
+   prs.words = (WORD *) palloc(sizeof(WORD) * prs.lenwords);
+   
+   parsetext_v2(cfg, &prs, VARDATA(in), VARSIZE(in) - VARHDRSZ);
+   PG_FREE_IF_COPY(in, 1);
+
+   if (prs.curwords)
+       out = makevalue(&prs);
+   else {
+       pfree(prs.words);
+       out = palloc(CALCDATASIZE(0,0));
+       out->len = CALCDATASIZE(0,0);
+       out->size = 0;
+   } 
+   PG_RETURN_POINTER(out);
+}
+
+Datum
+to_tsvector_name(PG_FUNCTION_ARGS) {
+   text       *cfg=PG_GETARG_TEXT_P(0);
+   Datum res = DirectFunctionCall3(
+       to_tsvector,
+       Int32GetDatum( name2id_cfg( cfg ) ),
+       PG_GETARG_DATUM(1),
+       (Datum)0
+   );
+   PG_FREE_IF_COPY(cfg,0);
+   PG_RETURN_DATUM(res);   
+}
+
+Datum
+to_tsvector_current(PG_FUNCTION_ARGS) {
+   Datum res = DirectFunctionCall3(
+       to_tsvector,
+       Int32GetDatum( get_currcfg() ),
+       PG_GETARG_DATUM(0),
+       (Datum)0
+   );
+   PG_RETURN_DATUM(res);   
+}
+
+static Oid
+findFunc(char *fname) {
+   FuncCandidateList clist,ptr;
+   Oid funcid = InvalidOid;
+   List *names=makeList1(makeString(fname));
+
+   ptr = clist = FuncnameGetCandidates(names, 1);
+   freeList(names);
+
+   if ( !ptr )
+       return funcid;
+
+   while(ptr) {
+       if ( ptr->args[0] == TEXTOID && funcid == InvalidOid )
+           funcid=ptr->oid;
+       clist=ptr->next;
+       pfree(ptr);
+       ptr=clist;
+   }
+
+   return funcid;
+}
+
+/*
+ * Trigger
+ */
+Datum
+tsearch2(PG_FUNCTION_ARGS)
+{
+   TriggerData *trigdata;
+   Trigger    *trigger;
+   Relation    rel;
+   HeapTuple   rettuple = NULL;
+   TSCfgInfo *cfg=findcfg(get_currcfg()); 
+   int         numidxattr,
+               i;
+   PRSTEXT     prs;
+   Datum       datum = (Datum) 0;
+   Oid     funcoid = InvalidOid;
+
+   if (!CALLED_AS_TRIGGER(fcinfo))
+       elog(ERROR, "TSearch: Not fired by trigger manager");
+
+   trigdata = (TriggerData *) fcinfo->context;
+   if (TRIGGER_FIRED_FOR_STATEMENT(trigdata->tg_event))
+       elog(ERROR, "TSearch: Can't process STATEMENT events");
+   if (TRIGGER_FIRED_AFTER(trigdata->tg_event))
+       elog(ERROR, "TSearch: Must be fired BEFORE event");
+
+   if (TRIGGER_FIRED_BY_INSERT(trigdata->tg_event))
+       rettuple = trigdata->tg_trigtuple;
+   else if (TRIGGER_FIRED_BY_UPDATE(trigdata->tg_event))
+       rettuple = trigdata->tg_newtuple;
+   else
+       elog(ERROR, "TSearch: Unknown event");
+
+   trigger = trigdata->tg_trigger;
+   rel = trigdata->tg_relation;
+
+   if (trigger->tgnargs < 2)
+       elog(ERROR, "TSearch: format tsearch2(tsvector_field, text_field1,...)");
+
+   numidxattr = SPI_fnumber(rel->rd_att, trigger->tgargs[0]);
+   if (numidxattr == SPI_ERROR_NOATTRIBUTE)
+       elog(ERROR, "TSearch: Can not find tsvector_field");
+
+   prs.lenwords = 32;
+   prs.curwords = 0;
+   prs.pos = 0;
+   prs.words = (WORD *) palloc(sizeof(WORD) * prs.lenwords);
+
+   /* find all words in indexable column */
+   for (i = 1; i < trigger->tgnargs; i++)
+   {
+       int         numattr;
+       Oid         oidtype;
+       Datum       txt_toasted;
+       bool        isnull;
+       text       *txt;
+
+       numattr = SPI_fnumber(rel->rd_att, trigger->tgargs[i]);
+       if (numattr == SPI_ERROR_NOATTRIBUTE)
+       {
+           funcoid=findFunc(trigger->tgargs[i]);
+           if ( funcoid==InvalidOid )
+               elog(ERROR,"TSearch: can't find function or field '%s'",trigger->tgargs[i]);
+           continue;
+       }
+       oidtype = SPI_gettypeid(rel->rd_att, numattr);
+       /* We assume char() and varchar() are binary-equivalent to text */
+       if (!(oidtype == TEXTOID ||
+             oidtype == VARCHAROID ||
+             oidtype == BPCHAROID))
+       {
+           elog(WARNING, "TSearch: '%s' is not of character type",
+                trigger->tgargs[i]);
+           continue;
+       }
+       txt_toasted = SPI_getbinval(rettuple, rel->rd_att, numattr, &isnull);
+       if (isnull)
+           continue;
+
+       if ( funcoid!=InvalidOid ) {
+           text *txttmp = (text *) DatumGetPointer( OidFunctionCall1(
+               funcoid,
+               PointerGetDatum(txt_toasted)
+           ));
+           txt = (text *) DatumGetPointer(PG_DETOAST_DATUM(PointerGetDatum(txttmp)));
+           if ( txt == txttmp )
+               txt_toasted = PointerGetDatum(txt);
+       } else
+            txt = (text *) DatumGetPointer(PG_DETOAST_DATUM(PointerGetDatum(txt_toasted)));
+
+       parsetext_v2(cfg, &prs, VARDATA(txt), VARSIZE(txt) - VARHDRSZ);
+       if (txt != (text*)DatumGetPointer(txt_toasted) )
+           pfree(txt);
+   }
+
+   /* make tsvector value */
+   if (prs.curwords)
+   {
+       datum = PointerGetDatum(makevalue(&prs));
+       rettuple = SPI_modifytuple(rel, rettuple, 1, &numidxattr,
+                                  &datum, NULL);
+       pfree(DatumGetPointer(datum));
+   }
+   else
+   {
+       tsvector *out = palloc(CALCDATASIZE(0,0));
+       out->len = CALCDATASIZE(0,0);
+       out->size = 0;
+       datum = PointerGetDatum(out);
+       pfree(prs.words);
+       rettuple = SPI_modifytuple(rel, rettuple, 1, &numidxattr,
+                                  &datum, NULL);
+   }
+
+   if (rettuple == NULL)
+       elog(ERROR, "TSearch: %d returned by SPI_modifytuple", SPI_result);
+
+   return PointerGetDatum(rettuple);
+}


diff --git a/contrib/tsearch2/tsvector.h b/contrib/tsearch2/tsvector.h

new file mode 100644 (file)

index 0000000..31e6a4b


--- /dev/null
+++ b/contrib/tsearch2/tsvector.h
@@ -0,0 +1,71 @@
+#ifndef __TXTIDX_H__
+#define __TXTIDX_H__
+
+/*
+#define TXTIDX_DEBUG
+*/
+
+#include "postgres.h"
+
+#include "access/gist.h"
+#include "access/itup.h"
+#include "utils/elog.h"
+#include "utils/palloc.h"
+#include "utils/builtins.h"
+#include "storage/bufpage.h"
+
+typedef struct {
+   uint32
+       haspos:1,
+       len:11, /* MAX 2Kb */
+       pos:20; /* MAX 1Mb */
+}  WordEntry;
+#define MAXSTRLEN ( 1<<11 )
+#define MAXSTRPOS ( 1<<20 )
+
+typedef struct {
+   uint16
+       weight:2,
+       pos:14;
+} WordEntryPos;
+#define MAXENTRYPOS    (1<<14)
+#define MAXNUMPOS  256
+#define LIMITPOS(x)    ( ( (x) >= MAXENTRYPOS ) ? (MAXENTRYPOS-1) : (x) )
+
+typedef struct
+{
+   int4        len;
+   int4        size;
+   char        data[1];
+}  tsvector;
+
+#define DATAHDRSIZE (sizeof(int4)*2)
+#define CALCDATASIZE(x, lenstr) ( x * sizeof(WordEntry) + DATAHDRSIZE + lenstr )
+#define ARRPTR(x)  ( (WordEntry*) ( (char*)x + DATAHDRSIZE ) )
+#define STRPTR(x)  ( (char*)x + DATAHDRSIZE + ( sizeof(WordEntry) * ((tsvector*)x)->size ) )
+#define STRSIZE(x) ( ((tsvector*)x)->len - DATAHDRSIZE - ( sizeof(WordEntry) * ((tsvector*)x)->size ) )
+#define _POSDATAPTR(x,e)   (STRPTR(x)+((WordEntry*)(e))->pos+SHORTALIGN(((WordEntry*)(e))->len))
+#define POSDATALEN(x,e) ( ( ((WordEntry*)(e))->haspos ) ? (*(uint16*)_POSDATAPTR(x,e)) : 0 ) 
+#define POSDATAPTR(x,e)    ( (WordEntryPos*)( _POSDATAPTR(x,e)+sizeof(uint16) ) )
+
+
+typedef struct {
+   WordEntry   entry;
+   WordEntryPos    *pos;
+}  WordEntryIN;
+
+typedef struct
+{
+   char       *prsbuf;
+   char       *word;
+   char       *curpos;
+   int4        len;
+   int4        state;
+   int4        alen;
+   WordEntryPos    *pos;
+   bool        oprisdelim;
+}  TI_IN_STATE;
+
+int4       gettoken_tsvector(TI_IN_STATE * state);
+
+#endif


diff --git a/contrib/tsearch2/tsvector_op.c b/contrib/tsearch2/tsvector_op.c

new file mode 100644 (file)

index 0000000..3f38014


--- /dev/null
+++ b/contrib/tsearch2/tsvector_op.c
@@ -0,0 +1,264 @@
+/*
+ * Operations for tsvector type
+ * Teodor Sigaev 
+ */
+#include "postgres.h"
+
+#include "access/gist.h"
+#include "access/itup.h"
+#include "utils/elog.h"
+#include "utils/palloc.h"
+#include "utils/builtins.h"
+#include "storage/bufpage.h"
+#include "executor/spi.h"
+#include "commands/trigger.h"
+#include "nodes/pg_list.h"
+#include "catalog/namespace.h"
+
+#include "utils/pg_locale.h"
+
+#include              /* tolower */
+#include "tsvector.h"
+#include "query.h"
+#include "ts_cfg.h"
+#include "common.h"
+
+PG_FUNCTION_INFO_V1(strip);
+Datum      strip(PG_FUNCTION_ARGS);
+
+PG_FUNCTION_INFO_V1(setweight);
+Datum      setweight(PG_FUNCTION_ARGS);
+
+PG_FUNCTION_INFO_V1(concat);
+Datum      concat(PG_FUNCTION_ARGS);
+
+Datum
+strip(PG_FUNCTION_ARGS)
+{
+   tsvector       *in = (tsvector *) PG_DETOAST_DATUM(PG_GETARG_DATUM(0));
+   tsvector    *out;
+   int i,len=0;
+   WordEntry *arrin=ARRPTR(in), *arrout;
+   char *cur;
+
+   for(i=0;isize;i++) 
+       len += SHORTALIGN( arrin[i].len );
+
+   len = CALCDATASIZE(in->size, len);
+   out=(tsvector*)palloc(len);
+   memset(out,0,len);
+   out->len=len;
+   out->size=in->size;
+   arrout=ARRPTR(out);
+   cur=STRPTR(out);
+   for(i=0;isize;i++) {
+       memcpy(cur, STRPTR(in)+arrin[i].pos, arrin[i].len);
+       arrout[i].haspos = 0;
+       arrout[i].len = arrin[i].len;
+       arrout[i].pos = cur - STRPTR(out);
+       cur += SHORTALIGN( arrout[i].len );
+   }
+       
+   PG_FREE_IF_COPY(in, 0);
+   PG_RETURN_POINTER(out);
+}
+
+Datum
+setweight(PG_FUNCTION_ARGS)
+{
+   tsvector       *in = (tsvector *) PG_DETOAST_DATUM(PG_GETARG_DATUM(0));
+   char       cw = PG_GETARG_CHAR(1);
+   tsvector    *out;
+   int i,j;
+   WordEntry *entry;
+   WordEntryPos *p;
+   int w=0;
+
+   switch(tolower(cw)) {
+       case 'a': w=3; break;
+       case 'b': w=2; break;
+       case 'c': w=1; break;
+       case 'd': w=0; break;
+       default: elog(ERROR,"Unknown weight");
+   }
+
+   out=(tsvector*)palloc(in->len);
+   memcpy(out,in,in->len);
+   entry=ARRPTR(out);
+   i=out->size;    
+   while(i--) {
+       if ( (j=POSDATALEN(out,entry)) != 0 ) {
+           p=POSDATAPTR(out,entry);
+           while(j--) {
+               p->weight=w;
+               p++;
+           }
+       }
+       entry++;
+   }
+       
+   PG_FREE_IF_COPY(in, 0);
+   PG_RETURN_POINTER(out);
+}
+
+static int
+compareEntry(char *ptra, WordEntry* a, char *ptrb, WordEntry* b)
+{
+        if ( a->len == b->len)
+        {
+                return strncmp(
+                                           ptra + a->pos,
+                                           ptrb + b->pos,
+                                           a->len);
+        }
+        return ( a->len > b->len ) ? 1 : -1;
+}
+
+static int4
+add_pos(tsvector *src, WordEntry *srcptr, tsvector *dest, WordEntry *destptr, int4 maxpos ) {
+   uint16 *clen = (uint16*)_POSDATAPTR(dest,destptr);
+   int i;
+   uint16 slen = POSDATALEN(src, srcptr), startlen;
+   WordEntryPos *spos=POSDATAPTR(src, srcptr), *dpos=POSDATAPTR(dest,destptr);
+
+   if ( ! destptr->haspos ) 
+       *clen=0;
+
+   startlen = *clen;
+   for(i=0; i
+       dpos[ *clen ].weight = spos[i].weight; 
+       dpos[ *clen ].pos    = LIMITPOS(spos[i].pos + maxpos);
+       (*clen)++;
+   }
+
+   if ( *clen != startlen )
+       destptr->haspos=1; 
+   return  *clen - startlen;
+}
+
+
+Datum
+concat(PG_FUNCTION_ARGS) {
+   tsvector       *in1 = (tsvector *) PG_DETOAST_DATUM(PG_GETARG_DATUM(0));
+   tsvector       *in2 = (tsvector *) PG_DETOAST_DATUM(PG_GETARG_DATUM(1));
+   tsvector       *out;
+   WordEntry *ptr;
+   WordEntry *ptr1,*ptr2;
+   WordEntryPos *p;
+   int maxpos=0,i,j,i1,i2;
+   char *cur;
+   char *data,*data1,*data2;
+
+   ptr=ARRPTR(in1);
+   i=in1->size;
+   while(i--) {
+       if ( (j=POSDATALEN(in1,ptr)) != 0 ) {
+           p=POSDATAPTR(in1,ptr);
+           while(j--) {
+               if ( p->pos > maxpos ) 
+                   maxpos = p->pos;
+               p++;
+           }
+       }
+       ptr++;
+   }
+   
+   ptr1=ARRPTR(in1); ptr2=ARRPTR(in2);
+   data1=STRPTR(in1); data2=STRPTR(in2);
+   i1=in1->size;   i2=in2->size;
+   out=(tsvector*)palloc( in1->len + in2->len );
+   memset(out,0,in1->len + in2->len);
+   out->len = in1->len + in2->len;
+   out->size = in1->size + in2->size;
+   data=cur=STRPTR(out);
+   ptr=ARRPTR(out);
+   while( i1 && i2 ) {
+       int cmp=compareEntry(data1,ptr1,data2,ptr2);
+       if ( cmp < 0 ) { /* in1 first */
+           ptr->haspos = ptr1->haspos;
+           ptr->len = ptr1->len;
+           memcpy( cur, data1 + ptr1->pos, ptr1->len );
+           ptr->pos = cur - data; 
+           cur+=SHORTALIGN(ptr1->len);
+           if ( ptr->haspos ) {
+               memcpy(cur, _POSDATAPTR(in1, ptr1), POSDATALEN(in1, ptr1)*sizeof(WordEntryPos) + sizeof(uint16));
+               cur+=POSDATALEN(in1, ptr1)*sizeof(WordEntryPos) + sizeof(uint16);
+           }
+           ptr++; ptr1++; i1--;
+       } else if ( cmp>0 ) { /* in2 first */ 
+           ptr->haspos = ptr2->haspos;
+           ptr->len = ptr2->len;
+           memcpy( cur, data2 + ptr2->pos, ptr2->len );
+           ptr->pos = cur - data; 
+           cur+=SHORTALIGN(ptr2->len);
+           if ( ptr->haspos ) {
+               int addlen = add_pos(in2, ptr2, out, ptr, maxpos );
+               if ( addlen == 0 )
+                   ptr->haspos=0;
+               else
+                   cur += addlen*sizeof(WordEntryPos) + sizeof(uint16); 
+           }
+           ptr++; ptr2++; i2--;
+       } else {
+           ptr->haspos = ptr1->haspos | ptr2->haspos;
+           ptr->len = ptr1->len;
+           memcpy( cur, data1 + ptr1->pos, ptr1->len );
+           ptr->pos = cur - data; 
+           cur+=SHORTALIGN(ptr1->len);
+           if ( ptr->haspos ) {
+               if ( ptr1->haspos ) {
+                   memcpy(cur, _POSDATAPTR(in1, ptr1), POSDATALEN(in1, ptr1)*sizeof(WordEntryPos) + sizeof(uint16));
+                   cur+=POSDATALEN(in1, ptr1)*sizeof(WordEntryPos) + sizeof(uint16);
+                   if ( ptr2->haspos )
+                       cur += add_pos(in2, ptr2, out, ptr, maxpos )*sizeof(WordEntryPos);
+               } else if ( ptr2->haspos ) {
+                   int addlen = add_pos(in2, ptr2, out, ptr, maxpos );
+                   if ( addlen == 0 )
+                       ptr->haspos=0;
+                   else
+                       cur += addlen*sizeof(WordEntryPos) + sizeof(uint16); 
+               }
+           }
+           ptr++; ptr1++; ptr2++; i1--; i2--;
+       }
+   }
+
+   while(i1) {
+       ptr->haspos = ptr1->haspos;
+       ptr->len = ptr1->len;
+       memcpy( cur, data1 + ptr1->pos, ptr1->len );
+       ptr->pos = cur - data; 
+       cur+=SHORTALIGN(ptr1->len);
+       if ( ptr->haspos ) {
+           memcpy(cur, _POSDATAPTR(in1, ptr1), POSDATALEN(in1, ptr1)*sizeof(WordEntryPos) + sizeof(uint16));
+           cur+=POSDATALEN(in1, ptr1)*sizeof(WordEntryPos) + sizeof(uint16);
+       }
+       ptr++; ptr1++; i1--;
+   }
+
+   while(i2) {
+       ptr->haspos = ptr2->haspos;
+       ptr->len = ptr2->len;
+       memcpy( cur, data2 + ptr2->pos, ptr2->len );
+       ptr->pos = cur - data; 
+       cur+=SHORTALIGN(ptr2->len);
+       if ( ptr->haspos ) {
+           int addlen = add_pos(in2, ptr2, out, ptr, maxpos );
+           if ( addlen == 0 )
+               ptr->haspos=0;
+           else
+               cur += addlen*sizeof(WordEntryPos) + sizeof(uint16); 
+       }
+       ptr++; ptr2++; i2--;
+   }
+   
+   out->size=ptr-ARRPTR(out);
+   out->len = CALCDATASIZE( out->size, cur-data );
+   if ( data != STRPTR(out) )
+       memmove( STRPTR(out), data, cur-data );
+
+   PG_FREE_IF_COPY(in1, 0);
+   PG_FREE_IF_COPY(in2, 1);
+   PG_RETURN_POINTER(out);
+}
+


diff --git a/contrib/tsearch2/untsearch.sql.in b/contrib/tsearch2/untsearch.sql.in

new file mode 100644 (file)

index 0000000..a4fe145


--- /dev/null
+++ b/contrib/tsearch2/untsearch.sql.in
@@ -0,0 +1,62 @@
+BEGIN;
+
+--Be careful !!!
+--script drops all indices, triggers and columns with types defined
+--in tsearch2.sql
+
+
+DROP OPERATOR CLASS gist_tsvector_ops USING gist CASCADE;
+
+
+DROP OPERATOR || (tsvector, tsvector);
+DROP OPERATOR @@ (tsvector, tsquery);
+DROP OPERATOR @@ (tsquery, tsvector);
+
+DROP AGGREGATE stat(tsvector);
+
+DROP TABLE pg_ts_dict;
+DROP TABLE pg_ts_parser;
+DROP TABLE pg_ts_cfg;
+DROP TABLE pg_ts_cfgmap;
+
+DROP TYPE tokentype CASCADE;
+DROP TYPE tokenout CASCADE;
+DROP TYPE tsvector CASCADE;
+DROP TYPE tsquery CASCADE;
+DROP TYPE gtsvector CASCADE;
+DROP TYPE tsstat CASCADE;
+DROP TYPE statinfo CASCADE;
+
+DROP FUNCTION lexize(oid, text) ;
+DROP FUNCTION lexize(text, text);
+DROP FUNCTION lexize(text);
+DROP FUNCTION set_curdict(int);
+DROP FUNCTION set_curdict(text);
+DROP FUNCTION dex_init(text);
+DROP FUNCTION dex_lexize(internal,internal,int4);
+DROP FUNCTION snb_en_init(text);
+DROP FUNCTION snb_lexize(internal,internal,int4);
+DROP FUNCTION snb_ru_init(text);
+DROP FUNCTION spell_init(text);
+DROP FUNCTION spell_lexize(internal,internal,int4);
+DROP FUNCTION syn_init(text);
+DROP FUNCTION syn_lexize(internal,internal,int4);
+DROP FUNCTION set_curprs(int);
+DROP FUNCTION set_curprs(text);
+DROP FUNCTION prsd_start(internal,int4);
+DROP FUNCTION prsd_getlexeme(internal,internal,internal);
+DROP FUNCTION prsd_end(internal);
+DROP FUNCTION prsd_lextype(internal);
+DROP FUNCTION prsd_headline(internal,internal,internal);
+DROP FUNCTION set_curcfg(int);
+DROP FUNCTION set_curcfg(text);
+DROP FUNCTION show_curcfg();
+DROP FUNCTION gtsvector_compress(internal);
+DROP FUNCTION gtsvector_decompress(internal);
+DROP FUNCTION gtsvector_penalty(internal,internal,internal);
+DROP FUNCTION gtsvector_picksplit(internal, internal);
+DROP FUNCTION gtsvector_union(bytea, internal);
+DROP FUNCTION reset_tsearch();
+DROP FUNCTION tsearch2() CASCADE;
+
+END;


diff --git a/contrib/tsearch2/wordparser/deflex.c b/contrib/tsearch2/wordparser/deflex.c

new file mode 100644 (file)

index 0000000..ea596c5


--- /dev/null
+++ b/contrib/tsearch2/wordparser/deflex.c
@@ -0,0 +1,56 @@
+#include "deflex.h"
+
+const char *lex_descr[]={
+   "",
+   "Latin word",
+   "Non-latin word",
+   "Word",
+   "Email",
+   "URL",
+   "Host",
+   "Scientific notation",
+   "VERSION",
+   "Part of hyphenated word",
+   "Non-latin part of hyphenated word",
+   "Latin part of hyphenated word",
+   "Space symbols",
+   "HTML Tag",
+   "HTTP head",
+   "Hyphenated word",
+   "Latin hyphenated word",
+   "Non-latin hyphenated word",
+   "URI",
+   "File or path name",
+   "Decimal notation",
+   "Signed integer",
+   "Unsigned integer",
+   "HTML Entity"
+};
+
+const char *tok_alias[]={
+   "",
+   "lword",
+   "nlword",
+   "word",
+   "email",
+   "url",
+   "host",
+   "sfloat",
+   "version",
+   "part_hword",
+   "nlpart_hword",
+   "lpart_hword",
+   "blank",
+   "tag",
+   "http",
+   "hword",
+   "lhword",
+   "nlhword",
+   "uri",
+   "file",
+   "float",
+   "int",
+   "uint",
+   "entity"
+};
+


diff --git a/contrib/tsearch2/wordparser/deflex.h b/contrib/tsearch2/wordparser/deflex.h

new file mode 100644 (file)

index 0000000..651d1f9


--- /dev/null
+++ b/contrib/tsearch2/wordparser/deflex.h
@@ -0,0 +1,34 @@
+#ifndef __DEFLEX_H__
+#define __DEFLEX_H__
+
+/* rememder !!!! */
+#define LASTNUM        23
+
+#define LATWORD        1
+#define CYRWORD        2
+#define UWORD      3
+#define EMAIL      4
+#define FURL       5
+#define HOST       6
+#define SCIENTIFIC 7
+#define VERSIONNUMBER  8
+#define PARTHYPHENWORD 9
+#define CYRPARTHYPHENWORD  10
+#define LATPARTHYPHENWORD  11
+#define SPACE      12
+#define TAG            13
+#define HTTP       14
+#define HYPHENWORD 15
+#define LATHYPHENWORD  16
+#define CYRHYPHENWORD  17
+#define URI        18
+#define FILEPATH   19
+#define DECIMAL        20
+#define SIGNEDINT  21
+#define UNSIGNEDINT 22
+#define HTMLENTITY 23
+
+extern const char *lex_descr[];
+extern const char *tok_alias[];
+
+#endif


diff --git a/contrib/tsearch2/wordparser/parser.h b/contrib/tsearch2/wordparser/parser.h

new file mode 100644 (file)

index 0000000..55cf005


--- /dev/null
+++ b/contrib/tsearch2/wordparser/parser.h
@@ -0,0 +1,11 @@
+#ifndef __PARSER_H__
+#define __PARSER_H__
+
+char      *token;
+int            tokenlen;
+int            tsearch2_yylex(void);
+void       start_parse_str(char *, int);
+void       start_parse_fh(FILE *, int);
+void       end_parse(void);
+
+#endif


diff --git a/contrib/tsearch2/wordparser/parser.l b/contrib/tsearch2/wordparser/parser.l

new file mode 100644 (file)

index 0000000..49824f5


--- /dev/null
+++ b/contrib/tsearch2/wordparser/parser.l
@@ -0,0 +1,346 @@
+%{
+#include "postgres.h"
+
+#include "deflex.h"
+#include "parser.h"
+#include "common.h"
+
+/* Avoid exit() on fatal scanner errors */
+#define fprintf(file, fmt, msg)  ts_error(ERROR, fmt, msg)
+
+/* postgres allocation function */
+#define free    pfree
+#define malloc  palloc
+#define realloc repalloc
+
+#ifdef strdup
+#undef strdup
+#endif
+#define strdup  pstrdup
+
+char *token = NULL;  /* pointer to token */
+char *s     = NULL;  /* to return WHOLE hyphenated-word */
+
+YY_BUFFER_STATE buf = NULL; /* buffer to parse; it need for parse from string */
+
+int lrlimit = -1;  /* for limiting read from filehandle ( -1 - unlimited read ) */
+int bytestoread = 0;   /* for limiting read from filehandle */
+
+/* redefine macro for read limited length */
+#define YY_INPUT(buf,result,max_size) \
+   if ( yy_current_buffer->yy_is_interactive ) { \
+                int c = '*', n; \
+                for ( n = 0; n < max_size && \
+                             (c = getc( tsearch2_yyin )) != EOF && c != '\n'; ++n ) \
+                        buf[n] = (char) c; \
+                if ( c == '\n' ) \
+                        buf[n++] = (char) c; \
+                if ( c == EOF && ferror( tsearch2_yyin ) ) \
+                        YY_FATAL_ERROR( "input in flex scanner failed" ); \
+                result = n; \
+        }  else { \
+       if ( lrlimit == 0 ) \
+           result=YY_NULL; \
+       else { \
+           if ( lrlimit>0 ) { \
+               bytestoread = ( lrlimit > max_size ) ? max_size : lrlimit; \
+               lrlimit -= bytestoread; \
+           } else \
+               bytestoread = max_size; \
+               if ( ((result = fread( buf, 1, bytestoread, tsearch2_yyin )) == 0) \
+                       && ferror( tsearch2_yyin ) ) \
+                       YY_FATAL_ERROR( "input in flex scanner failed" ); \
+       } \
+   }
+
+%}
+
+%option 8bit
+%option never-interactive
+%option nounput
+%option noyywrap
+
+/* parser's state for parsing hyphenated-word */
+%x DELIM  
+/* parser's state for parsing URL*/
+%x URL  
+%x SERVER  
+
+/* parser's state for parsing TAGS */
+%x INTAG
+%x QINTAG
+%x INCOMMENT
+%x INSCRIPT
+
+/* cyrillic koi8 char */
+CYRALNUM   [0-9\200-\377]
+CYRALPHA   [\200-\377]
+ALPHA      [a-zA-Z\200-\377]
+ALNUM      [0-9a-zA-Z\200-\377]
+
+
+HOSTNAME   ([-_[:alnum:]]+\.)+[[:alpha:]]+
+URI        [-_[:alnum:]/%,\.;=&?#]+
+
+%%
+
+"<"[Ss][Cc][Rr][Ii][Pp][Tt] { BEGIN INSCRIPT; }
+
+"" {
+   BEGIN INITIAL; 
+   *tsearch2_yytext=' '; *(tsearch2_yytext+1) = '\0'; 
+   token = tsearch2_yytext;
+   tokenlen = tsearch2_yyleng;
+   return SPACE;
+}
+
+""   { 
+   BEGIN INITIAL;
+   *tsearch2_yytext=' '; *(tsearch2_yytext+1) = '\0'; 
+   token = tsearch2_yytext;
+   tokenlen = tsearch2_yyleng;
+   return SPACE;
+}
+
+
+"<"[\![:alpha:]]   { BEGIN INTAG; }
+
+"
+
+"\""    { BEGIN QINTAG; }
+
+"\\\"" ;
+
+"\""   { BEGIN INTAG; }
+
+">" { 
+   BEGIN INITIAL;
+   token = tsearch2_yytext;
+   *tsearch2_yytext=' '; 
+   token = tsearch2_yytext;
+   tokenlen = 1;
+   return TAG;
+}
+
+.|\n  ;
+
+\&(quot|amp|nbsp|lt|gt)\;   {
+   token = tsearch2_yytext;
+   tokenlen = tsearch2_yyleng;
+   return HTMLENTITY;
+}
+
+\&\#[0-9][0-9]?[0-9]?\; {
+   token = tsearch2_yytext;
+   tokenlen = tsearch2_yyleng;
+   return HTMLENTITY;
+}
+ 
+[-_\.[:alnum:]]+@{HOSTNAME}  /* Emails */ { 
+   token = tsearch2_yytext; 
+   tokenlen = tsearch2_yyleng;
+   return EMAIL; 
+}
+
+[+-]?[0-9]+(\.[0-9]+)?[eEdD][+-]?[0-9]+  /* float */   { 
+   token = tsearch2_yytext; 
+   tokenlen = tsearch2_yyleng;
+   return SCIENTIFIC; 
+}
+
+[0-9]+\.[0-9]+\.[0-9\.]*[0-9] {
+   token = tsearch2_yytext;
+   tokenlen = tsearch2_yyleng;
+   return VERSIONNUMBER;
+}
+
+[+-]?[0-9]+\.[0-9]+ {
+   token = tsearch2_yytext;
+   tokenlen = tsearch2_yyleng;
+   return DECIMAL;
+}
+
+[+-][0-9]+ { 
+   token = tsearch2_yytext; 
+   tokenlen = tsearch2_yyleng;
+   return SIGNEDINT; 
+}
+
+[0-9]+ { 
+   token = tsearch2_yytext; 
+   tokenlen = tsearch2_yyleng;
+   return UNSIGNEDINT; 
+}
+
+http"://"        { 
+   BEGIN URL; 
+   token = tsearch2_yytext;
+   tokenlen = tsearch2_yyleng;
+   return HTTP;
+}
+
+ftp"://"        { 
+   BEGIN URL; 
+   token = tsearch2_yytext;
+   tokenlen = tsearch2_yyleng;
+   return HTTP;
+}
+
+{HOSTNAME}[/:]{URI} { 
+   BEGIN SERVER;
+   if (s) { free(s); s=NULL; } 
+   s = strdup( tsearch2_yytext ); 
+   tokenlen = tsearch2_yyleng;
+   yyless( 0 ); 
+   token = s;
+   return FURL;
+}
+
+{HOSTNAME} {
+   token = tsearch2_yytext; 
+   tokenlen = tsearch2_yyleng;
+   return HOST;
+}
+
+[/:]{URI}  {
+   token = tsearch2_yytext;
+   tokenlen = tsearch2_yyleng;
+   return URI;
+}
+
+[[:alnum:]\./_-]+"/"[[:alnum:]\./_-]+ {
+   token = tsearch2_yytext;
+   tokenlen = tsearch2_yyleng;
+   return FILEPATH;
+}
+
+({CYRALPHA}+-)+{CYRALPHA}+ /* composite-word */    {
+   BEGIN DELIM;
+   if (s) { free(s); s=NULL; } 
+   s = strdup( tsearch2_yytext );
+   tokenlen = tsearch2_yyleng;
+   yyless( 0 );
+   token = s;
+   return CYRHYPHENWORD;
+}
+
+([[:alpha:]]+-)+[[:alpha:]]+ /* composite-word */  {
+    BEGIN DELIM;
+   if (s) { free(s); s=NULL; } 
+   s = strdup( tsearch2_yytext );
+   tokenlen = tsearch2_yyleng;
+   yyless( 0 );
+   token = s;
+   return LATHYPHENWORD;
+}
+
+({ALNUM}+-)+{ALNUM}+ /* composite-word */  {
+   BEGIN DELIM;
+   if (s) { free(s); s=NULL; } 
+   s = strdup( tsearch2_yytext );
+   tokenlen = tsearch2_yyleng;
+   yyless( 0 );
+   token = s;
+   return HYPHENWORD;
+}
+
+[0-9]+\.[0-9]+\.[0-9\.]*[0-9] {
+   token = tsearch2_yytext;
+   tokenlen = tsearch2_yyleng;
+   return VERSIONNUMBER;
+}
+
+\+?[0-9]+\.[0-9]+ {
+   token = tsearch2_yytext;
+   tokenlen = tsearch2_yyleng;
+   return DECIMAL;
+}
+
+{CYRALPHA}+  /* one word in composite-word */   { 
+   token = tsearch2_yytext; 
+   tokenlen = tsearch2_yyleng;
+   return CYRPARTHYPHENWORD; 
+}
+
+[[:alpha:]]+  /* one word in composite-word */  { 
+   token = tsearch2_yytext; 
+   tokenlen = tsearch2_yyleng;
+   return LATPARTHYPHENWORD; 
+}
+
+{ALNUM}+  /* one word in composite-word */  { 
+   token = tsearch2_yytext; 
+   tokenlen = tsearch2_yyleng;
+   return PARTHYPHENWORD; 
+}
+
+-  { 
+   token = tsearch2_yytext;
+   tokenlen = tsearch2_yyleng;
+   return SPACE;
+}
+
+.|\n /* return in basic state */ {
+   BEGIN INITIAL;
+   yyless( 0 );
+}
+
+{CYRALPHA}+ /* normal word */  { 
+   token = tsearch2_yytext; 
+   tokenlen = tsearch2_yyleng;
+   return CYRWORD; 
+}
+
+[[:alpha:]]+ /* normal word */ { 
+   token = tsearch2_yytext; 
+   tokenlen = tsearch2_yyleng;
+   return LATWORD; 
+}
+
+{ALNUM}+ /* normal word */ { 
+   token = tsearch2_yytext; 
+   tokenlen = tsearch2_yyleng;
+   return UWORD; 
+}
+
+[ \r\n\t]+ {
+   token = tsearch2_yytext;
+   tokenlen = tsearch2_yyleng;
+   return SPACE;
+}
+
+. {
+   token = tsearch2_yytext;
+   tokenlen = tsearch2_yyleng;
+   return SPACE;
+} 
+
+%%
+
+/* clearing after parsing from string */
+void end_parse() {
+   if (s) { free(s); s=NULL; } 
+   tsearch2_yy_delete_buffer( buf );
+   buf = NULL;
+} 
+
+/* start parse from string */
+void start_parse_str(char* str, int limit) {
+   if (buf) end_parse();
+   buf = tsearch2_yy_scan_bytes( str, limit );
+   tsearch2_yy_switch_to_buffer( buf );
+   BEGIN INITIAL;
+}
+
+/* start parse from filehandle */
+void start_parse_fh( FILE* fh, int limit ) {
+   if (buf) end_parse();
+   lrlimit = ( limit ) ? limit : -1;
+   buf = tsearch2_yy_create_buffer( fh, YY_BUF_SIZE );
+   tsearch2_yy_switch_to_buffer( buf );
+   BEGIN INITIAL;
+}
+
+


diff --git a/contrib/tsearch2/wparser.c b/contrib/tsearch2/wparser.c

new file mode 100644 (file)

index 0000000..deff94c


--- /dev/null
+++ b/contrib/tsearch2/wparser.c
@@ -0,0 +1,529 @@
+/* 
+ * interface functions to parser 
+ * Teodor Sigaev 
+ */
+#include 
+#include 
+#include 
+#include 
+
+#include "postgres.h"
+#include "fmgr.h"
+#include "utils/array.h"
+#include "catalog/pg_type.h"
+#include "executor/spi.h"
+#include "funcapi.h"
+
+#include "wparser.h"
+#include "ts_cfg.h"
+#include "snmap.h"
+#include "common.h"
+
+/*********top interface**********/
+
+static void *plan_getparser=NULL;
+static Oid current_parser_id=InvalidOid;
+
+void
+init_prs(Oid id, WParserInfo *prs) {
+   Oid arg[1]={ OIDOID };
+   bool isnull;
+   Datum pars[1]={ ObjectIdGetDatum(id) };
+   int stat;
+
+   memset(prs,0,sizeof(WParserInfo));
+   SPI_connect();
+   if ( !plan_getparser ) {
+       plan_getparser = SPI_saveplan( SPI_prepare( "select prs_start, prs_nexttoken, prs_end, prs_lextype, prs_headline from pg_ts_parser where oid = $1" , 1, arg ) );
+       if ( !plan_getparser ) 
+           ts_error(ERROR, "SPI_prepare() failed");
+   }
+
+   stat = SPI_execp(plan_getparser, pars, " ", 1);
+   if ( stat < 0 )
+       ts_error (ERROR, "SPI_execp return %d", stat);
+   if ( SPI_processed > 0 ) {
+       Oid oid=InvalidOid;
+       oid=DatumGetObjectId( SPI_getbinval(SPI_tuptable->vals[0], SPI_tuptable->tupdesc, 1, &isnull) );
+       fmgr_info_cxt(oid, &(prs->start_info), TopMemoryContext);
+       oid=DatumGetObjectId( SPI_getbinval(SPI_tuptable->vals[0], SPI_tuptable->tupdesc, 2, &isnull) );
+       fmgr_info_cxt(oid, &(prs->getlexeme_info), TopMemoryContext);
+       oid=DatumGetObjectId( SPI_getbinval(SPI_tuptable->vals[0], SPI_tuptable->tupdesc, 3, &isnull) );
+       fmgr_info_cxt(oid, &(prs->end_info), TopMemoryContext);
+       prs->lextype=DatumGetObjectId( SPI_getbinval(SPI_tuptable->vals[0], SPI_tuptable->tupdesc, 4, &isnull) );
+       oid=DatumGetObjectId( SPI_getbinval(SPI_tuptable->vals[0], SPI_tuptable->tupdesc, 5, &isnull) );
+       fmgr_info_cxt(oid, &(prs->headline_info), TopMemoryContext);
+       prs->prs_id=id;
+   } else 
+       ts_error(ERROR, "No parser with id %d", id);
+   SPI_finish();
+}
+
+typedef struct {
+   WParserInfo *last_prs;
+   int     len;
+   int     reallen;
+   WParserInfo *list;
+   SNMap       name2id_map;
+} PrsList;
+
+static PrsList PList = {NULL,0,0,NULL,{0,0,NULL}};
+
+void    
+reset_prs(void) {
+   freeSNMap( &(PList.name2id_map) );
+   if ( PList.list )
+       free(PList.list);
+   memset(&PList,0,sizeof(PrsList));
+}
+
+static int
+compareprs(const void *a, const void *b) {
+   return ((WParserInfo*)a)->prs_id - ((WParserInfo*)b)->prs_id;
+}
+
+WParserInfo *
+findprs(Oid id) {
+   /* last used prs */
+   if ( PList.last_prs && PList.last_prs->prs_id==id )
+       return PList.last_prs;
+
+   /* already used prs */
+   if ( PList.len != 0 ) {
+       WParserInfo key;
+       key.prs_id=id;
+       PList.last_prs = bsearch(&key, PList.list, PList.len, sizeof(WParserInfo), compareprs);
+       if ( PList.last_prs != NULL )
+           return PList.last_prs;
+   }
+
+   /* last chance */
+   if ( PList.len==PList.reallen ) {
+       WParserInfo *tmp;
+       int reallen = ( PList.reallen ) ? 2*PList.reallen : 16;
+       tmp=(WParserInfo*)realloc(PList.list,sizeof(WParserInfo)*reallen);
+       if ( !tmp ) 
+           ts_error(ERROR,"No memory");
+       PList.reallen=reallen;
+       PList.list=tmp;
+   }
+   PList.last_prs=&(PList.list[PList.len]);
+   init_prs(id, PList.last_prs);
+   PList.len++;
+   qsort(PList.list, PList.len, sizeof(WParserInfo), compareprs);
+   return findprs(id); /* qsort changed order!! */;
+}
+
+static void *plan_name2id=NULL;
+
+Oid
+name2id_prs(text *name) {
+   Oid arg[1]={ TEXTOID };
+   bool isnull;
+   Datum pars[1]={ PointerGetDatum(name) };
+   int stat;
+   Oid id=findSNMap_t( &(PList.name2id_map), name );
+   
+   if ( id ) 
+       return id;
+   
+
+   SPI_connect();
+   if ( !plan_name2id ) {
+       plan_name2id = SPI_saveplan( SPI_prepare( "select oid from pg_ts_parser where prs_name = $1" , 1, arg ) );
+       if ( !plan_name2id ) 
+           ts_error(ERROR, "SPI_prepare() failed");
+   }
+
+   stat = SPI_execp(plan_name2id, pars, " ", 1);
+   if ( stat < 0 )
+       ts_error (ERROR, "SPI_execp return %d", stat);
+   if ( SPI_processed > 0 )
+       id=DatumGetObjectId( SPI_getbinval(SPI_tuptable->vals[0], SPI_tuptable->tupdesc, 1, &isnull) );
+   else 
+       ts_error(ERROR, "No parser '%s'", text2char(name));
+   SPI_finish();
+   addSNMap_t( &(PList.name2id_map), name, id );
+   return id;
+}
+
+
+/******sql-level interface******/
+typedef struct {
+   int     cur;
+   LexDescr    *list;
+} TypeStorage;
+
+static void
+setup_firstcall(FuncCallContext  *funcctx, Oid prsid) {
+   TupleDesc            tupdesc;
+   MemoryContext     oldcontext;
+   TypeStorage     *st;
+   WParserInfo *prs = findprs(prsid); 
+
+   oldcontext = MemoryContextSwitchTo(funcctx->multi_call_memory_ctx);
+
+   st=(TypeStorage*)palloc( sizeof(TypeStorage) );
+   st->cur=0;
+   st->list = (LexDescr*)DatumGetPointer(
+       OidFunctionCall1( prs->lextype, PointerGetDatum(prs->prs) )
+   );
+   funcctx->user_fctx = (void*)st;
+   tupdesc = RelationNameGetTupleDesc("tokentype");
+   funcctx->slot = TupleDescGetSlot(tupdesc);
+   funcctx->attinmeta = TupleDescGetAttInMetadata(tupdesc);
+   MemoryContextSwitchTo(oldcontext);
+}
+
+static Datum
+process_call(FuncCallContext  *funcctx) {
+   TypeStorage     *st;
+
+   st=(TypeStorage*)funcctx->user_fctx;
+   if (  st->list && st->list[st->cur].lexid ) {
+       Datum result;
+       char* values[3];
+       char    txtid[16];
+       HeapTuple    tuple;
+
+       values[0]=txtid;
+       sprintf(txtid,"%d",st->list[st->cur].lexid);
+       values[1]=st->list[st->cur].alias;
+       values[2]=st->list[st->cur].descr;
+
+       tuple = BuildTupleFromCStrings(funcctx->attinmeta, values);
+       result = TupleGetDatum(funcctx->slot, tuple);
+
+       pfree(values[1]);
+       pfree(values[2]);
+       st->cur++;
+       return result;
+   } else {
+       if ( st->list ) pfree(st->list);
+       pfree(st);
+   }
+   return (Datum)0;
+}
+
+PG_FUNCTION_INFO_V1(token_type);
+Datum token_type(PG_FUNCTION_ARGS);
+
+Datum
+token_type(PG_FUNCTION_ARGS) {
+   FuncCallContext  *funcctx;
+   Datum result;
+
+   if (SRF_IS_FIRSTCALL()) { 
+       funcctx = SRF_FIRSTCALL_INIT();
+       setup_firstcall(funcctx, PG_GETARG_OID(0) );
+   }
+
+   funcctx = SRF_PERCALL_SETUP();
+
+   if (  (result=process_call(funcctx)) != (Datum)0 )
+       SRF_RETURN_NEXT(funcctx, result);
+   SRF_RETURN_DONE(funcctx);
+}
+
+PG_FUNCTION_INFO_V1(token_type_byname);
+Datum token_type_byname(PG_FUNCTION_ARGS);
+Datum
+token_type_byname(PG_FUNCTION_ARGS) {
+   FuncCallContext  *funcctx;
+   Datum result;
+
+   if (SRF_IS_FIRSTCALL()) {
+       text *name = PG_GETARG_TEXT_P(0); 
+       funcctx = SRF_FIRSTCALL_INIT();
+       setup_firstcall(funcctx, name2id_prs( name ) );
+       PG_FREE_IF_COPY(name,0);
+   }
+
+   funcctx = SRF_PERCALL_SETUP();
+
+   if (  (result=process_call(funcctx)) != (Datum)0 )
+       SRF_RETURN_NEXT(funcctx, result);
+   SRF_RETURN_DONE(funcctx);
+}
+
+PG_FUNCTION_INFO_V1(token_type_current);
+Datum token_type_current(PG_FUNCTION_ARGS);
+Datum
+token_type_current(PG_FUNCTION_ARGS) {
+   FuncCallContext  *funcctx;
+   Datum result;
+
+   if (SRF_IS_FIRSTCALL()) {
+       funcctx = SRF_FIRSTCALL_INIT();
+       if ( current_parser_id==InvalidOid ) 
+           current_parser_id = name2id_prs( char2text("default") );
+       setup_firstcall(funcctx, current_parser_id );
+   }
+
+   funcctx = SRF_PERCALL_SETUP();
+
+   if (  (result=process_call(funcctx)) != (Datum)0 )
+       SRF_RETURN_NEXT(funcctx, result);
+   SRF_RETURN_DONE(funcctx);
+}
+
+
+PG_FUNCTION_INFO_V1(set_curprs);
+Datum set_curprs(PG_FUNCTION_ARGS);
+Datum
+set_curprs(PG_FUNCTION_ARGS) {
+        findprs(PG_GETARG_OID(0));
+        current_parser_id=PG_GETARG_OID(0);
+        PG_RETURN_VOID();
+}
+
+PG_FUNCTION_INFO_V1(set_curprs_byname);
+Datum set_curprs_byname(PG_FUNCTION_ARGS);
+Datum
+set_curprs_byname(PG_FUNCTION_ARGS) {
+        text *name=PG_GETARG_TEXT_P(0);
+    
+        DirectFunctionCall1(
+                set_curprs,
+                ObjectIdGetDatum( name2id_prs(name) )
+        );
+        PG_FREE_IF_COPY(name, 0);
+        PG_RETURN_VOID();
+}
+
+typedef struct {
+   int type;
+   char    *lexem;
+} LexemEntry;
+
+typedef struct {
+   int cur;
+   int len;
+   LexemEntry  *list;
+} PrsStorage;
+   
+
+static void
+prs_setup_firstcall(FuncCallContext  *funcctx, int prsid, text *txt) {
+   TupleDesc            tupdesc;
+   MemoryContext     oldcontext;
+   PrsStorage  *st;
+   WParserInfo *prs = findprs(prsid); 
+   char    *lex=NULL;
+   int     llen=0, type=0; 
+
+   oldcontext = MemoryContextSwitchTo(funcctx->multi_call_memory_ctx);
+
+   st=(PrsStorage*)palloc( sizeof(PrsStorage) );
+   st->cur=0;
+   st->len=16;
+   st->list=(LexemEntry*)palloc( sizeof(LexemEntry)*st->len );
+
+   prs->prs = (void*)DatumGetPointer(
+       FunctionCall2(
+           &(prs->start_info),
+           PointerGetDatum(VARDATA(txt)),
+           Int32GetDatum(VARSIZE(txt)-VARHDRSZ)
+       )
+   );
+
+   while( ( type=DatumGetInt32(FunctionCall3(
+           &(prs->getlexeme_info),
+           PointerGetDatum(prs->prs),
+           PointerGetDatum(&lex),
+           PointerGetDatum(&llen))) ) != 0 ) {
+
+       if ( st->cur>=st->len ) {
+           st->len=2*st->len;
+           st->list=(LexemEntry*)repalloc(st->list, sizeof(LexemEntry)*st->len);
+       }
+       st->list[st->cur].lexem = palloc(llen+1);
+       memcpy( st->list[st->cur].lexem, lex, llen);
+       st->list[st->cur].lexem[llen]='\0';
+       st->list[st->cur].type=type;
+       st->cur++;
+   }
+       
+   FunctionCall1(
+       &(prs->end_info),
+       PointerGetDatum(prs->prs)
+   );
+
+   st->len=st->cur;
+   st->cur=0;
+   
+   funcctx->user_fctx = (void*)st;
+   tupdesc = RelationNameGetTupleDesc("tokenout");
+   funcctx->slot = TupleDescGetSlot(tupdesc);
+   funcctx->attinmeta = TupleDescGetAttInMetadata(tupdesc);
+   MemoryContextSwitchTo(oldcontext);
+}
+
+static Datum
+prs_process_call(FuncCallContext  *funcctx) {
+   PrsStorage  *st;
+
+   st=(PrsStorage*)funcctx->user_fctx;
+   if (  st->cur < st->len ) {
+       Datum result;
+       char* values[2];
+       char    tid[16];
+       HeapTuple    tuple;
+
+       values[0]=tid;
+       sprintf(tid,"%d",st->list[st->cur].type);
+       values[1]=st->list[st->cur].lexem;
+       tuple = BuildTupleFromCStrings(funcctx->attinmeta, values);
+       result = TupleGetDatum(funcctx->slot, tuple);
+
+       pfree(values[1]);
+       st->cur++;
+       return result;
+   } else {
+       if ( st->list ) pfree(st->list);
+       pfree(st);
+   }
+   return (Datum)0;
+}
+
+           
+
+PG_FUNCTION_INFO_V1(parse);
+Datum parse(PG_FUNCTION_ARGS);
+Datum
+parse(PG_FUNCTION_ARGS) {
+   FuncCallContext  *funcctx;
+   Datum result;
+
+   if (SRF_IS_FIRSTCALL()) {
+       text *txt = PG_GETARG_TEXT_P(1); 
+       funcctx = SRF_FIRSTCALL_INIT();
+       prs_setup_firstcall(funcctx, PG_GETARG_OID(0),txt );
+       PG_FREE_IF_COPY(txt,1);
+   }
+
+   funcctx = SRF_PERCALL_SETUP();
+
+   if (  (result=prs_process_call(funcctx)) != (Datum)0 )
+       SRF_RETURN_NEXT(funcctx, result);
+   SRF_RETURN_DONE(funcctx);
+}
+
+PG_FUNCTION_INFO_V1(parse_byname);
+Datum parse_byname(PG_FUNCTION_ARGS);
+Datum
+parse_byname(PG_FUNCTION_ARGS) {
+   FuncCallContext  *funcctx;
+   Datum result;
+
+   if (SRF_IS_FIRSTCALL()) {
+       text *name = PG_GETARG_TEXT_P(0); 
+       text *txt = PG_GETARG_TEXT_P(1); 
+       funcctx = SRF_FIRSTCALL_INIT();
+       prs_setup_firstcall(funcctx, name2id_prs( name ),txt );
+       PG_FREE_IF_COPY(name,0);
+       PG_FREE_IF_COPY(txt,1);
+   }
+
+   funcctx = SRF_PERCALL_SETUP();
+
+   if (  (result=prs_process_call(funcctx)) != (Datum)0 )
+       SRF_RETURN_NEXT(funcctx, result);
+   SRF_RETURN_DONE(funcctx);
+}
+
+
+PG_FUNCTION_INFO_V1(parse_current);
+Datum parse_current(PG_FUNCTION_ARGS);
+Datum
+parse_current(PG_FUNCTION_ARGS) {
+   FuncCallContext  *funcctx;
+   Datum result;
+
+   if (SRF_IS_FIRSTCALL()) {
+       text *txt = PG_GETARG_TEXT_P(0); 
+       funcctx = SRF_FIRSTCALL_INIT();
+       if ( current_parser_id==InvalidOid ) 
+           current_parser_id = name2id_prs( char2text("default") );
+       prs_setup_firstcall(funcctx, current_parser_id,txt );
+       PG_FREE_IF_COPY(txt,0);
+   }
+
+   funcctx = SRF_PERCALL_SETUP();
+
+   if (  (result=prs_process_call(funcctx)) != (Datum)0 )
+       SRF_RETURN_NEXT(funcctx, result);
+   SRF_RETURN_DONE(funcctx);
+}
+
+PG_FUNCTION_INFO_V1(headline);
+Datum headline(PG_FUNCTION_ARGS);
+Datum
+headline(PG_FUNCTION_ARGS) {
+   TSCfgInfo *cfg=findcfg(PG_GETARG_OID(0));
+   text       *in = PG_GETARG_TEXT_P(1);
+   QUERYTYPE  *query = (QUERYTYPE *) DatumGetPointer(PG_DETOAST_DATUM(PG_GETARG_DATUM(2)));
+   text       *opt=( PG_NARGS()>3 && PG_GETARG_POINTER(3) ) ? PG_GETARG_TEXT_P(3) : NULL;
+   HLPRSTEXT   prs;
+   text *out;
+   WParserInfo *prsobj = findprs(cfg->prs_id);
+
+   memset(&prs,0,sizeof(HLPRSTEXT));
+   prs.lenwords = 32;
+   prs.words = (HLWORD *) palloc(sizeof(HLWORD) * prs.lenwords);
+   hlparsetext(cfg, &prs, query, VARDATA(in), VARSIZE(in) - VARHDRSZ);
+
+
+   FunctionCall3(
+       &(prsobj->headline_info),
+       PointerGetDatum(&prs),
+       PointerGetDatum(opt),
+       PointerGetDatum(query)
+   );
+
+   out = genhl(&prs);
+
+   PG_FREE_IF_COPY(in,1);
+   PG_FREE_IF_COPY(query,2);
+   if ( opt ) PG_FREE_IF_COPY(opt,3);
+   pfree(prs.words);
+   pfree(prs.startsel);
+   pfree(prs.stopsel);
+
+   PG_RETURN_POINTER(out);
+}
+
+
+PG_FUNCTION_INFO_V1(headline_byname);
+Datum headline_byname(PG_FUNCTION_ARGS);
+Datum
+headline_byname(PG_FUNCTION_ARGS) {
+   text *cfg=PG_GETARG_TEXT_P(0);
+
+   Datum out=DirectFunctionCall4(
+       headline,
+       ObjectIdGetDatum(name2id_cfg( cfg ) ),
+       PG_GETARG_DATUM(1),
+       PG_GETARG_DATUM(2),
+       ( PG_NARGS()>3 ) ? PG_GETARG_DATUM(3) : PointerGetDatum(NULL)
+   );
+
+   PG_FREE_IF_COPY(cfg,0);
+   PG_RETURN_DATUM(out);   
+}
+
+PG_FUNCTION_INFO_V1(headline_current);
+Datum headline_current(PG_FUNCTION_ARGS);
+Datum
+headline_current(PG_FUNCTION_ARGS) {
+   PG_RETURN_DATUM(DirectFunctionCall4(
+       headline,
+       ObjectIdGetDatum(get_currcfg()),
+       PG_GETARG_DATUM(0),
+       PG_GETARG_DATUM(1),
+       ( PG_NARGS()>2 ) ? PG_GETARG_DATUM(2) : PointerGetDatum(NULL)
+   ));
+}
+
+
+


diff --git a/contrib/tsearch2/wparser.h b/contrib/tsearch2/wparser.h

new file mode 100644 (file)

index 0000000..a8afc56


--- /dev/null
+++ b/contrib/tsearch2/wparser.h
@@ -0,0 +1,28 @@
+#ifndef __WPARSER_H__
+#define __WPARSER_H__
+#include "postgres.h"
+#include "fmgr.h"
+
+typedef struct {
+   Oid prs_id;
+   FmgrInfo start_info;
+   FmgrInfo getlexeme_info;
+   FmgrInfo end_info;
+   FmgrInfo headline_info;
+   Oid lextype;
+   void *prs;
+} WParserInfo;
+
+void init_prs(Oid id, WParserInfo *prs);
+WParserInfo* findprs(Oid id);
+Oid name2id_prs(text *name);
+void   reset_prs(void);
+
+
+typedef struct {
+   int lexid;
+   char    *alias;
+   char    *descr;
+} LexDescr;
+
+#endif


diff --git a/contrib/tsearch2/wparser_def.c b/contrib/tsearch2/wparser_def.c

new file mode 100644 (file)

index 0000000..eec8b03


--- /dev/null
+++ b/contrib/tsearch2/wparser_def.c
@@ -0,0 +1,291 @@
+/* 
+ * default word parser 
+ * Teodor Sigaev 
+ */
+#include 
+#include 
+#include 
+
+#include "postgres.h"
+#include "utils/builtins.h"
+
+#include "dict.h"
+#include "wparser.h"
+#include "common.h"
+#include "ts_cfg.h"
+#include "wordparser/parser.h"
+#include "wordparser/deflex.h"
+
+PG_FUNCTION_INFO_V1(prsd_lextype);
+Datum prsd_lextype(PG_FUNCTION_ARGS);
+
+Datum 
+prsd_lextype(PG_FUNCTION_ARGS) {
+   LexDescr *descr=(LexDescr*)palloc(sizeof(LexDescr)*(LASTNUM+1));
+   int i;
+
+   for(i=1;i<=LASTNUM;i++) {
+       descr[i-1].lexid = i;
+       descr[i-1].alias = pstrdup(tok_alias[i]);
+       descr[i-1].descr = pstrdup(lex_descr[i]);
+   }
+   
+   descr[LASTNUM].lexid=0;
+       
+   PG_RETURN_POINTER(descr);
+}
+
+PG_FUNCTION_INFO_V1(prsd_start);
+Datum prsd_start(PG_FUNCTION_ARGS);
+Datum 
+prsd_start(PG_FUNCTION_ARGS) {
+   start_parse_str( (char*)PG_GETARG_POINTER(0), PG_GETARG_INT32(1) );
+   PG_RETURN_POINTER(NULL);
+}
+
+PG_FUNCTION_INFO_V1(prsd_getlexeme);
+Datum prsd_getlexeme(PG_FUNCTION_ARGS);
+Datum 
+prsd_getlexeme(PG_FUNCTION_ARGS) {
+   /* ParserState *p=(ParserState*)PG_GETARG_POINTER(0); */
+   char **t=(char**)PG_GETARG_POINTER(1); 
+   int *tlen=(int*)PG_GETARG_POINTER(2);
+   int  type=tsearch2_yylex();
+
+   *t = token;
+   *tlen = tokenlen;
+   PG_RETURN_INT32(type);
+}
+
+PG_FUNCTION_INFO_V1(prsd_end);
+Datum prsd_end(PG_FUNCTION_ARGS);
+Datum 
+prsd_end(PG_FUNCTION_ARGS) {
+   /* ParserState *p=(ParserState*)PG_GETARG_POINTER(0); */
+   end_parse();
+   PG_RETURN_VOID();
+}
+
+#define LEAVETOKEN(x)  ( (x)==12 )
+#define COMPLEXTOKEN(x)    ( (x)==5 || (x)==15 || (x)==16 || (x)==17 )
+#define ENDPUNCTOKEN(x)    ( (x)==12 )
+
+
+#define IDIGNORE(x) ( (x)==13 || (x)==14 || (x)==12 || (x)==23 )
+#define HLIDIGNORE(x) ( (x)==5 || (x)==13 || (x)==15 || (x)==16 || (x)==17 )
+#define NONWORDTOKEN(x)    ( (x)==12 || HLIDIGNORE(x) )
+#define NOENDTOKEN(x)  ( NONWORDTOKEN(x) || (x)==7 || (x)==8 || (x)==20 || (x)==21 || (x)==22 || IDIGNORE(x) )
+
+typedef struct {
+   HLWORD  *words;
+   int len;
+} hlCheck;
+
+static bool
+checkcondition_HL(void *checkval, ITEM *val) {
+   int i;
+   for(i=0;i<((hlCheck*)checkval)->len;i++) {
+       if ( ((hlCheck*)checkval)->words[i].item==val )
+           return true;
+   }
+   return false;
+}
+
+
+static bool
+hlCover(HLPRSTEXT *prs, QUERYTYPE *query, int *p, int *q) {
+   int i,j;
+   ITEM    *item=GETQUERY(query);
+   int pos=*p;
+   *q=0;
+   *p=0x7fffffff;
+
+   for(j=0;jsize;j++) {
+       if ( item->type != VAL ) {
+           item++;
+           continue;
+       }
+       for(i=pos;icurwords;i++) {
+           if ( prs->words[i].item == item ) {
+               if ( i>*q) 
+                   *q = i;
+               break;
+           }
+       }
+       item++;
+   }
+
+   if ( *q==0 )
+       return false;
+
+   item=GETQUERY(query);
+   for(j=0;jsize;j++) {
+       if ( item->type != VAL ) {
+           item++;
+           continue;
+       }
+       for(i=*q;i>=pos;i--) {
+           if ( prs->words[i].item == item ) {
+               if ( i<*p )
+                   *p=i;
+               break;
+           }
+       }
+       item++;
+   }   
+
+   if ( *p<=*q ) {
+       hlCheck ch={ &(prs->words[*p]), *q-*p+1 };
+       if ( TS_execute(GETQUERY(query), &ch, false, checkcondition_HL) ) { 
+           return true;
+       } else {
+           (*p)++;
+           return hlCover(prs,query,p,q);
+       }
+   }
+
+   return false;
+}
+
+PG_FUNCTION_INFO_V1(prsd_headline);
+Datum prsd_headline(PG_FUNCTION_ARGS);
+Datum 
+prsd_headline(PG_FUNCTION_ARGS) {
+   HLPRSTEXT   *prs=(HLPRSTEXT*)PG_GETARG_POINTER(0);
+   text    *opt=(text*)PG_GETARG_POINTER(1); /* can't be toasted */
+   QUERYTYPE   *query=(QUERYTYPE*)PG_GETARG_POINTER(2); /* can't be toasted */
+   /* from opt + start and and tag */
+   int min_words=15;   
+   int max_words=35;   
+   int shortword=3;    
+
+   int p=0,q=0;
+   int bestb=-1,beste=-1;
+   int bestlen=-1;
+   int pose=0, poslen, curlen;
+
+   int i;
+
+   /*config*/
+   prs->startsel=NULL;
+   prs->stopsel=NULL;
+   if ( opt ) {
+       Map *map,*mptr;
+       
+       parse_cfgdict(opt,&map);
+       mptr=map;
+
+       while(mptr && mptr->key) {
+           if ( strcasecmp(mptr->key,"MaxWords")==0 )
+               max_words=pg_atoi(mptr->value,4,1);
+           else if ( strcasecmp(mptr->key,"MinWords")==0 )
+               min_words=pg_atoi(mptr->value,4,1);
+           else if ( strcasecmp(mptr->key,"ShortWord")==0 )
+               shortword=pg_atoi(mptr->value,4,1);
+           else if ( strcasecmp(mptr->key,"StartSel")==0 )
+               prs->startsel=pstrdup(mptr->value);
+           else if ( strcasecmp(mptr->key,"StopSel")==0 )
+               prs->stopsel=pstrdup(mptr->value);
+               
+           pfree(mptr->key);
+           pfree(mptr->value);
+
+           mptr++;
+       }
+       pfree(map);
+
+       if ( min_words >= max_words )
+           elog(ERROR,"Must be MinWords < MaxWords");
+       if ( min_words<=0 )
+           elog(ERROR,"Must be MinWords > 0");
+       if ( shortword<0 )
+           elog(ERROR,"Must be ShortWord >= 0");
+   }
+
+   while( hlCover(prs,query,&p,&q) ) {
+       /* find cover len in words */
+       curlen=0;
+       poslen=0;
+       for(i=p;i<=q && curlen < max_words ; i++) {
+           if ( !NONWORDTOKEN(prs->words[i].type) ) 
+               curlen++;
+           if ( prs->words[i].item && !prs->words[i].repeated )
+               poslen++; 
+           pose=i;
+       }
+
+       if ( poslenwords[beste].type) || prs->words[beste].len <= shortword) ) { 
+           /* best already finded, so try one more cover */
+           p++;
+           continue;
+       }
+
+       if ( curlen < max_words ) { /* find good end */
+           for(i=i-1 ;icurwords && curlen
+               if ( i!=q ) {
+                   if ( !NONWORDTOKEN(prs->words[i].type) ) 
+                       curlen++;
+                   if ( prs->words[i].item && !prs->words[i].repeated )
+                       poslen++;
+               }
+               pose=i;
+               if ( NOENDTOKEN(prs->words[i].type) || prs->words[i].len <= shortword ) 
+                   continue;
+               if ( curlen>=min_words )    
+                   break;
+           }
+       } else { /* shorter cover :((( */
+           for(;curlen>min_words;i--) {
+               if ( !NONWORDTOKEN(prs->words[i].type) ) 
+                   curlen--;
+               if ( prs->words[i].item && !prs->words[i].repeated )
+                   poslen--;
+               pose=i;
+               if ( NOENDTOKEN(prs->words[i].type) || prs->words[i].len <= shortword ) 
+                   continue;
+               break;
+           }
+       }
+
+       if ( bestlen <0 || (poslen>bestlen && !(NOENDTOKEN(prs->words[pose].type) || prs->words[pose].len <= shortword)) || 
+               ( bestlen>=0 && !(NOENDTOKEN(prs->words[pose].type)  || prs->words[pose].len <= shortword) && 
+                   (NOENDTOKEN(prs->words[beste].type) || prs->words[beste].len <= shortword) ) ) {
+           bestb=p; beste=pose;
+           bestlen=poslen;
+       } 
+
+       p++;
+   }
+
+   if ( bestlen<0 ) {
+       curlen=0;
+       poslen=0;
+       for(i=0;icurwords && curlen
+           if ( !NONWORDTOKEN(prs->words[i].type) ) 
+               curlen++;
+           pose=i;
+       }
+       bestb=0; beste=pose;
+   }
+
+   for(i=bestb;i<=beste;i++) {
+       if ( prs->words[i].item )
+           prs->words[i].selected=1;
+       if ( prs->words[i].repeated )
+           prs->words[i].skip=1;
+       if ( HLIDIGNORE(prs->words[i].type) )
+           prs->words[i].replace=1;
+
+       prs->words[i].in=1;
+   }
+
+   if (!prs->startsel)
+       prs->startsel=pstrdup("");

+   if (!prs->stopsel)
+       prs->stopsel=pstrdup("");
+        prs->startsellen=strlen(prs->startsel);
+   prs->stopsellen=strlen(prs->stopsel);
+
+   PG_RETURN_POINTER(prs);
+}
+




This is the main PostgreSQL git repository.
RSS
Atom
+           nptr->nentry=POSDATALEN(txt,*ptr);
+           if ( nptr->nentry==0 )
+               nptr->nentry=1; 
+           nptr->ndoc=1;
+           nptr->len=(*ptr)->len;
+           memcpy(curptr, STRPTR(txt) + (*ptr)->pos, nptr->len);
+           nptr->pos = curptr - STATSTRPTR(newstat);
+           curptr += nptr->len;
+           ptr++; nptr++;
+       }
+   }
+
+   return newstat;
+} 
+
+PG_FUNCTION_INFO_V1(ts_accum);
+Datum           ts_accum(PG_FUNCTION_ARGS);
+Datum 
+ts_accum(PG_FUNCTION_ARGS) {
+   tsstat *newstat,*stat= (tsstat*)PG_GETARG_POINTER(0);
+   tsvector  *txt = (tsvector *) PG_DETOAST_DATUM(PG_GETARG_DATUM(1));
+   WordEntry   **newentry=NULL;
+   uint32  len=0, cur=0;
+   StatEntry   *sptr;
+   WordEntry   *wptr;
+
+   if ( stat==NULL || PG_ARGISNULL(0) ) { /* Init in first */ 
+       stat=palloc(STATHDRSIZE);
+       stat->len=STATHDRSIZE;
+       stat->size=0;
+   }
+
+   /* simple check of correctness */
+   if ( txt==NULL || PG_ARGISNULL(1) || txt->size==0 ) {
+       PG_FREE_IF_COPY(txt,1); 
+       PG_RETURN_POINTER(stat);
+   }
+
+   sptr=STATPTR(stat);
+   wptr=ARRPTR(txt);
+
+   if ( stat->size < 100*txt->size ) { /* merge */
+       while( sptr-STATPTR(stat) < stat->size && wptr-ARRPTR(txt) < txt->size ) {
+           int cmp = compareStatWord(sptr,wptr,stat,txt);
+           if ( cmp<0 ) {
+               sptr++;
+           } else if ( cmp==0 ) {
+               int n=POSDATALEN(txt,wptr);
+   
+               if (n==0) n=1;
+               sptr->ndoc++;
+               sptr->nentry +=n ;
+               sptr++; wptr++;
+           } else {
+               if ( cur==len )
+                   newentry=SEI_realloc(newentry, &len);
+               newentry[cur]=wptr;
+               wptr++; cur++;
+           }
+       }
+
+       while( wptr-ARRPTR(txt) < txt->size ) {
+           if ( cur==len )
+               newentry=SEI_realloc(newentry, &len);
+           newentry[cur]=wptr;
+           wptr++; cur++;
+       }
+   } else { /* search */
+       while( wptr-ARRPTR(txt) < txt->size ) {
+           StatEntry *StopLow = STATPTR(stat);
+           StatEntry *StopHigh = (StatEntry*)STATSTRPTR(stat);
+           int cmp;
+
+           while (StopLow < StopHigh) {
+               sptr=StopLow + (StopHigh - StopLow) / 2;
+               cmp =  compareStatWord(sptr,wptr,stat,txt);
+               if (cmp==0) {
+                   int n=POSDATALEN(txt,wptr);
+                   if (n==0) n=1;
+                   sptr->ndoc++;
+                   sptr->nentry +=n ;
+                   break;
+               } else if ( cmp < 0 )
+                   StopLow = sptr + 1;
+               else
+                   StopHigh = sptr; 
+           }
+       
+           if ( StopLow >= StopHigh ) { /* not found */
+               if ( cur==len )
+                   newentry=SEI_realloc(newentry, &len);
+               newentry[cur]=wptr;
+               cur++;
+           }
+           wptr++;
+       }   
+   }
+
+   
+   if ( cur==0 ) { /* no new words */ 
+       PG_FREE_IF_COPY(txt,1);
+       PG_RETURN_POINTER(stat);
+   }
+
+   newstat = formstat(stat, txt, newentry, cur);
+   pfree(newentry);
+   PG_FREE_IF_COPY(txt,1);
+   /* pfree(stat); */
+
+   PG_RETURN_POINTER(newstat);
+}
+
+typedef struct {
+   uint32  cur;
+   tsvector *stat;
+} StatStorage;
+
+static void
+ts_setup_firstcall(FuncCallContext  *funcctx, tsstat *stat) {
+   TupleDesc            tupdesc;
+   MemoryContext     oldcontext;
+   StatStorage     *st;
+   
+   oldcontext = MemoryContextSwitchTo(funcctx->multi_call_memory_ctx);
+   st=palloc( sizeof(StatStorage) );
+   st->cur=0;
+   st->stat=palloc( stat->len );
+   memcpy(st->stat, stat, stat->len);
+   funcctx->user_fctx = (void*)st;
+   tupdesc = RelationNameGetTupleDesc("statinfo");
+   funcctx->slot = TupleDescGetSlot(tupdesc);
+   funcctx->attinmeta = TupleDescGetAttInMetadata(tupdesc);
+   MemoryContextSwitchTo(oldcontext);
+}
+
+
+static Datum
+ts_process_call(FuncCallContext  *funcctx) {
+   StatStorage     *st;
+   st=(StatStorage*)funcctx->user_fctx;
+
+   if ( st->cur < st->stat->size ) {
+       Datum result;
+       char* values[3];
+       char    ndoc[16];
+       char    nentry[16];
+       StatEntry *entry=STATPTR(st->stat) + st->cur;
+       HeapTuple    tuple;
+
+       values[1]=ndoc;
+       sprintf(ndoc,"%d",entry->ndoc);
+       values[2]=nentry;
+       sprintf(nentry,"%d",entry->nentry);
+       values[0]=palloc( entry->len+1 );
+       memcpy( values[0], STATSTRPTR(st->stat)+entry->pos, entry->len);
+       (values[0])[entry->len]='\0';
+
+       tuple = BuildTupleFromCStrings(funcctx->attinmeta, values);
+       result = TupleGetDatum(funcctx->slot, tuple);
+
+       pfree(values[0]);
+       st->cur++;
+       return result;  
+   } else {
+       pfree(st->stat);
+       pfree(st);
+   }
+   
+   return (Datum)0;
+}
+
+PG_FUNCTION_INFO_V1(ts_accum_finish);
+Datum           ts_accum_finish(PG_FUNCTION_ARGS);
+Datum 
+ts_accum_finish(PG_FUNCTION_ARGS) {
+   FuncCallContext  *funcctx;
+   Datum result;
+
+   if (SRF_IS_FIRSTCALL()) {
+       funcctx = SRF_FIRSTCALL_INIT();
+       ts_setup_firstcall(funcctx, (tsstat*)PG_GETARG_POINTER(0) );
+   }
+
+   funcctx = SRF_PERCALL_SETUP();
+   if (  (result=ts_process_call(funcctx)) != (Datum)0 )
+       SRF_RETURN_NEXT(funcctx, result);
+   SRF_RETURN_DONE(funcctx);
+}
+
+static Oid tiOid=InvalidOid;
+static void 
+get_ti_Oid(void) {
+   int ret;
+   bool isnull; 
+
+   if ( (ret = SPI_exec("select oid from pg_type where typname='tsvector'",1)) < 0 )   
+       elog(ERROR, "SPI_exec to get tsvector oid returns %d", ret);
+
+   if ( SPI_processed<0 )
+       elog(ERROR, "There is no tsvector type");
+   tiOid = DatumGetObjectId( SPI_getbinval(SPI_tuptable->vals[0], SPI_tuptable->tupdesc, 1, &isnull) );
+   if ( tiOid==InvalidOid )
+       elog(ERROR, "tsvector type has InvalidOid");
+}
+
+static tsstat*
+ts_stat_sql(text *txt) {
+   char *query=text2char(txt);
+   int i;
+   tsstat *newstat,*stat;
+   bool isnull;
+   Portal portal;
+   void    *plan;
+
+   if ( tiOid==InvalidOid ) 
+       get_ti_Oid();
+
+   if ( (plan = SPI_prepare(query,0,NULL))==NULL )
+       elog(ERROR, "SPI_prepare('%s') returns NULL",query);
+
+   if ( (portal = SPI_cursor_open(NULL, plan, NULL, NULL)) == NULL )
+       elog(ERROR, "SPI_cursor_open('%s') returns NULL",query);
+
+   SPI_cursor_fetch(portal, true, 100);
+
+   if ( SPI_tuptable->tupdesc->natts != 1 )
+       elog(ERROR, "Number of fields doesn't equal to 1");
+
+   if ( SPI_gettypeid(SPI_tuptable->tupdesc, 1) != tiOid )
+       elog(ERROR, "Column isn't of tsvector type");
+
+   stat=palloc(STATHDRSIZE);
+   stat->len=STATHDRSIZE;
+   stat->size=0;
+
+   while(SPI_processed>0) {
+       for(i=0;i
+           Datum data=SPI_getbinval(SPI_tuptable->vals[i], SPI_tuptable->tupdesc, 1, &isnull);
+
+           if ( !isnull ) {
+               newstat = (tsstat*)DatumGetPointer(DirectFunctionCall2(
+                   ts_accum,
+                   PointerGetDatum(stat),
+                   data
+               ));
+               if ( stat!=newstat && stat )
+                   pfree(stat);
+               stat=newstat;
+           }
+       } 
+
+       SPI_freetuptable(SPI_tuptable);
+       SPI_cursor_fetch(portal, true, 100);        
+   }   
+
+   SPI_freetuptable(SPI_tuptable);
+   SPI_cursor_close(portal);
+   SPI_freeplan(plan);
+   pfree(query);
+
+   return stat;    
+}
+
+PG_FUNCTION_INFO_V1(ts_stat);
+Datum           ts_stat(PG_FUNCTION_ARGS);
+Datum 
+ts_stat(PG_FUNCTION_ARGS) {
+   FuncCallContext  *funcctx;
+   Datum result;
+
+   if (SRF_IS_FIRSTCALL()) {
+       tsstat *stat;
+       text    *txt=PG_GETARG_TEXT_P(0);
+   
+       funcctx = SRF_FIRSTCALL_INIT();
+       SPI_connect();
+       stat = ts_stat_sql(txt);
+       PG_FREE_IF_COPY(txt,0); 
+       ts_setup_firstcall(funcctx, stat );
+       SPI_finish();
+   }
+
+   funcctx = SRF_PERCALL_SETUP();
+   if (  (result=ts_process_call(funcctx)) != (Datum)0 )
+       SRF_RETURN_NEXT(funcctx, result);
+   SRF_RETURN_DONE(funcctx);
+}
+
+
+           Datum data=SPI_getbinval(SPI_tuptable->vals[i], SPI_tuptable->tupdesc, 1, &isnull);
+
+           if ( !isnull ) {
+               newstat = (tsstat*)DatumGetPointer(DirectFunctionCall2(
+                   ts_accum,
+                   PointerGetDatum(stat),
+                   data
+               ));
+               if ( stat!=newstat && stat )
+                   pfree(stat);
+               stat=newstat;
+           }
+       } 
+
+       SPI_freetuptable(SPI_tuptable);
+       SPI_cursor_fetch(portal, true, 100);        
+   }   
+
+   SPI_freetuptable(SPI_tuptable);
+   SPI_cursor_close(portal);
+   SPI_freeplan(plan);
+   pfree(query);
+
+   return stat;    
+}
+
+PG_FUNCTION_INFO_V1(ts_stat);
+Datum           ts_stat(PG_FUNCTION_ARGS);
+Datum 
+ts_stat(PG_FUNCTION_ARGS) {
+   FuncCallContext  *funcctx;
+   Datum result;
+
+   if (SRF_IS_FIRSTCALL()) {
+       tsstat *stat;
+       text    *txt=PG_GETARG_TEXT_P(0);
+   
+       funcctx = SRF_FIRSTCALL_INIT();
+       SPI_connect();
+       stat = ts_stat_sql(txt);
+       PG_FREE_IF_COPY(txt,0); 
+       ts_setup_firstcall(funcctx, stat );
+       SPI_finish();
+   }
+
+   funcctx = SRF_PERCALL_SETUP();
+   if (  (result=ts_process_call(funcctx)) != (Datum)0 )
+       SRF_RETURN_NEXT(funcctx, result);
+   SRF_RETURN_DONE(funcctx);
+}
+
+
diff --git a/contrib/tsearch2/ts_stat.h b/contrib/tsearch2/ts_stat.h

new file mode 100644 (file)

index 0000000..c32b17a
--- /dev/null
+++ b/contrib/tsearch2/ts_stat.h
@@ -0,0 +1,32 @@
+#ifndef __TXTIDX_STAT_H__
+#define __TXTIDX_STAT_H__
+
+#include "postgres.h"
+
+#include "access/gist.h"
+#include "access/itup.h"
+#include "utils/elog.h"
+#include "utils/palloc.h"
+#include "utils/builtins.h"
+#include "storage/bufpage.h"
+
+typedef struct {
+   uint32  len;
+   uint32  pos;
+   uint32  ndoc;   
+   uint32  nentry; 
+}  StatEntry;
+
+typedef struct {
+   int4        len;
+   int4        size;
+   char        data[1];
+}  tsstat;
+
+#define STATHDRSIZE (sizeof(int4)*2)
+#define CALCSTATSIZE(x, lenstr) ( x * sizeof(StatEntry) + STATHDRSIZE + lenstr )
+#define STATPTR(x) ( (StatEntry*) ( (char*)x + STATHDRSIZE ) )
+#define STATSTRPTR(x)  ( (char*)x + STATHDRSIZE + ( sizeof(StatEntry) * ((tsvector*)x)->size ) )
+#define STATSTRSIZE(x) ( ((tsvector*)x)->len - STATHDRSIZE - ( sizeof(StatEntry) * ((tsvector*)x)->size ) )
+
+#endif
diff --git a/contrib/tsearch2/tsearch.sql._in b/contrib/tsearch2/tsearch.sql._in

new file mode 100644 (file)

index 0000000..91ffbc8
--- /dev/null
+++ b/contrib/tsearch2/tsearch.sql._in
@@ -0,0 +1,674 @@
+-- Adjust this setting to control where the objects get CREATEd.
+SET search_path = public;
+
+BEGIN;
+
+--dict conf
+CREATE TABLE pg_ts_dict (
+   dict_name   text not null primary key,
+   dict_init   oid,
+   dict_initoption text,
+   dict_lexize oid not null,
+   dict_comment    text
+) with oids;
+
+--dict interface
+CREATE FUNCTION lexize(oid, text) 
+   returns _text
+   as 'MODULE_PATHNAME'
+   language 'C'
+   with (isstrict);
+
+CREATE FUNCTION lexize(text, text)
+        returns _text
+        as 'MODULE_PATHNAME', 'lexize_byname'
+        language 'C'
+        with (isstrict);
+
+CREATE FUNCTION lexize(text)
+        returns _text
+        as 'MODULE_PATHNAME', 'lexize_bycurrent'
+        language 'C'
+        with (isstrict);
+
+CREATE FUNCTION set_curdict(int)
+   returns void
+   as 'MODULE_PATHNAME'
+   language 'C'
+   with (isstrict);
+
+CREATE FUNCTION set_curdict(text)
+   returns void
+   as 'MODULE_PATHNAME', 'set_curdict_byname'
+   language 'C'
+   with (isstrict);
+
+--built-in dictionaries
+CREATE FUNCTION dex_init(text)
+   returns internal
+   as 'MODULE_PATHNAME' 
+   language 'C';
+
+CREATE FUNCTION dex_lexize(internal,internal,int4)
+   returns internal
+   as 'MODULE_PATHNAME'
+   language 'C'
+   with (isstrict);
+
+insert into pg_ts_dict select 
+   'simple', 
+   (select oid from pg_proc where proname='dex_init'),
+   null,
+   (select oid from pg_proc where proname='dex_lexize'),
+   'Simple example of dictionary.'
+;
+    
+CREATE FUNCTION snb_en_init(text)
+   returns internal
+   as 'MODULE_PATHNAME' 
+   language 'C';
+
+CREATE FUNCTION snb_lexize(internal,internal,int4)
+   returns internal
+   as 'MODULE_PATHNAME'
+   language 'C'
+   with (isstrict);
+
+insert into pg_ts_dict select 
+   'en_stem', 
+   (select oid from pg_proc where proname='snb_en_init'),
+   'DATA_PATH/english.stop',
+   (select oid from pg_proc where proname='snb_lexize'),
+   'English Stemmer. Snowball.'
+;
+
+CREATE FUNCTION snb_ru_init(text)
+   returns internal
+   as 'MODULE_PATHNAME' 
+   language 'C';
+
+insert into pg_ts_dict select 
+   'ru_stem', 
+   (select oid from pg_proc where proname='snb_ru_init'),
+   'DATA_PATH/russian.stop',
+   (select oid from pg_proc where proname='snb_lexize'),
+   'Russian Stemmer. Snowball.'
+;
+    
+CREATE FUNCTION spell_init(text)
+   returns internal
+   as 'MODULE_PATHNAME' 
+   language 'C';
+
+CREATE FUNCTION spell_lexize(internal,internal,int4)
+   returns internal
+   as 'MODULE_PATHNAME'
+   language 'C'
+   with (isstrict);
+
+insert into pg_ts_dict select 
+   'ispell_template', 
+   (select oid from pg_proc where proname='spell_init'),
+   null,
+   (select oid from pg_proc where proname='spell_lexize'),
+   'ISpell interface. Must have .dict and .aff files'
+;
+
+CREATE FUNCTION syn_init(text)
+   returns internal
+   as 'MODULE_PATHNAME' 
+   language 'C';
+
+CREATE FUNCTION syn_lexize(internal,internal,int4)
+   returns internal
+   as 'MODULE_PATHNAME'
+   language 'C'
+   with (isstrict);
+
+insert into pg_ts_dict select 
+   'synonym', 
+   (select oid from pg_proc where proname='syn_init'),
+   null,
+   (select oid from pg_proc where proname='syn_lexize'),
+   'Example of synonym dictionary'
+;
+
+--dict conf
+CREATE TABLE pg_ts_parser (
+   prs_name    text not null primary key,
+   prs_start   oid not null,
+   prs_nexttoken   oid not null,
+   prs_end     oid not null,
+   prs_headline    oid not null,
+   prs_lextype oid not null,
+   prs_comment text
+) with oids;
+
+--sql-level interface
+CREATE TYPE tokentype 
+   as (tokid int4, alias text, descr text); 
+
+CREATE FUNCTION token_type(int4)
+   returns setof tokentype
+   as 'MODULE_PATHNAME'
+   language 'C'
+   with (isstrict);
+
+CREATE FUNCTION token_type(text)
+   returns setof tokentype
+   as 'MODULE_PATHNAME', 'token_type_byname'
+   language 'C'
+   with (isstrict);
+
+CREATE FUNCTION token_type()
+   returns setof tokentype
+   as 'MODULE_PATHNAME', 'token_type_current'
+   language 'C'
+   with (isstrict);
+
+CREATE FUNCTION set_curprs(int)
+   returns void
+   as 'MODULE_PATHNAME'
+   language 'C'
+   with (isstrict);
+
+CREATE FUNCTION set_curprs(text)
+   returns void
+   as 'MODULE_PATHNAME', 'set_curprs_byname'
+   language 'C'
+   with (isstrict);
+
+CREATE TYPE tokenout 
+   as (tokid int4, token text);
+
+CREATE FUNCTION parse(oid,text)
+   returns setof tokenout
+   as 'MODULE_PATHNAME'
+   language 'C'
+   with (isstrict);
+ 
+CREATE FUNCTION parse(text,text)
+   returns setof tokenout
+   as 'MODULE_PATHNAME', 'parse_byname'
+   language 'C'
+   with (isstrict);
+ 
+CREATE FUNCTION parse(text)
+   returns setof tokenout
+   as 'MODULE_PATHNAME', 'parse_current'
+   language 'C'
+   with (isstrict);
+ 
+--default parser
+CREATE FUNCTION prsd_start(internal,int4)
+   returns internal
+   as 'MODULE_PATHNAME'
+   language 'C';
+
+CREATE FUNCTION prsd_getlexeme(internal,internal,internal)
+   returns int4
+   as 'MODULE_PATHNAME'
+   language 'C';
+
+CREATE FUNCTION prsd_end(internal)
+   returns void
+   as 'MODULE_PATHNAME'
+   language 'C';
+
+CREATE FUNCTION prsd_lextype(internal)
+   returns internal
+   as 'MODULE_PATHNAME'
+   language 'C';
+
+CREATE FUNCTION prsd_headline(internal,internal,internal)
+   returns internal
+   as 'MODULE_PATHNAME'
+   language 'C';
+
+insert into pg_ts_parser select
+   'default',
+   (select oid from pg_proc where proname='prsd_start'),   
+   (select oid from pg_proc where proname='prsd_getlexeme'),   
+   (select oid from pg_proc where proname='prsd_end'), 
+   (select oid from pg_proc where proname='prsd_headline'),
+   (select oid from pg_proc where proname='prsd_lextype'),
+   'Parser from OpenFTS v0.34'
+;  
+
+--tsearch config
+
+CREATE TABLE pg_ts_cfg (
+   ts_name     text not null primary key,
+   prs_name    text not null,
+   locale      text
+) with oids;
+
+CREATE TABLE pg_ts_cfgmap (
+   ts_name     text not null,
+   tok_alias   text not null,
+   dict_name   text[],
+   primary key (ts_name,tok_alias)
+) with oids;
+
+CREATE FUNCTION set_curcfg(int)
+   returns void
+   as 'MODULE_PATHNAME'
+   language 'C'
+   with (isstrict);
+
+CREATE FUNCTION set_curcfg(text)
+   returns void
+   as 'MODULE_PATHNAME', 'set_curcfg_byname'
+   language 'C'
+   with (isstrict);
+
+CREATE FUNCTION show_curcfg()
+   returns oid
+   as 'MODULE_PATHNAME'
+   language 'C'
+   with (isstrict);
+
+insert into pg_ts_cfg values ('default', 'default','C');
+insert into pg_ts_cfg values ('default_russian', 'default','ru_RU.KOI8-R');
+insert into pg_ts_cfg values ('simple', 'default');
+
+copy pg_ts_cfgmap from stdin;
+default    lword   {en_stem}
+default    nlword  {simple}
+default    word    {simple}
+default    email   {simple}
+default    url {simple}
+default    host    {simple}
+default    sfloat  {simple}
+default    version {simple}
+default    part_hword  {simple}
+default    nlpart_hword    {simple}
+default    lpart_hword {en_stem}
+default    hword   {simple}
+default    lhword  {en_stem}
+default    nlhword {simple}
+default    uri {simple}
+default    file    {simple}
+default    float   {simple}
+default    int {simple}
+default    uint    {simple}
+default_russian    lword   {en_stem}
+default_russian    nlword  {ru_stem}
+default_russian    word    {ru_stem}
+default_russian    email   {simple}
+default_russian    url {simple}
+default_russian    host    {simple}
+default_russian    sfloat  {simple}
+default_russian    version {simple}
+default_russian    part_hword  {simple}
+default_russian    nlpart_hword    {ru_stem}
+default_russian    lpart_hword {en_stem}
+default_russian    hword   {ru_stem}
+default_russian    lhword  {en_stem}
+default_russian    nlhword {ru_stem}
+default_russian    uri {simple}
+default_russian    file    {simple}
+default_russian    float   {simple}
+default_russian    int {simple}
+default_russian    uint    {simple}
+simple lword   {simple}
+simple nlword  {simple}
+simple word    {simple}
+simple email   {simple}
+simple url {simple}
+simple host    {simple}
+simple sfloat  {simple}
+simple version {simple}
+simple part_hword  {simple}
+simple nlpart_hword    {simple}
+simple lpart_hword {simple}
+simple hword   {simple}
+simple lhword  {simple}
+simple nlhword {simple}
+simple uri {simple}
+simple file    {simple}
+simple float   {simple}
+simple int {simple}
+simple uint    {simple}
+\.
+
+--tsvector type
+CREATE FUNCTION tsvector_in(cstring)
+RETURNS tsvector
+AS 'MODULE_PATHNAME'
+LANGUAGE 'C' with (isstrict);
+
+CREATE FUNCTION tsvector_out(tsvector)
+RETURNS cstring
+AS 'MODULE_PATHNAME'
+LANGUAGE 'C' with (isstrict);
+
+CREATE TYPE tsvector (
+        INTERNALLENGTH = -1,
+        INPUT = tsvector_in,
+        OUTPUT = tsvector_out,
+        STORAGE = extended
+);
+
+CREATE FUNCTION length(tsvector)
+RETURNS int4
+AS 'MODULE_PATHNAME', 'tsvector_length'
+LANGUAGE 'C' with (isstrict,iscachable);
+
+CREATE FUNCTION to_tsvector(oid, text)
+RETURNS tsvector
+AS 'MODULE_PATHNAME'
+LANGUAGE 'C' with (isstrict,iscachable);
+
+CREATE FUNCTION to_tsvector(text, text)
+RETURNS tsvector
+AS 'MODULE_PATHNAME', 'to_tsvector_name'
+LANGUAGE 'C' with (isstrict,iscachable);
+
+CREATE FUNCTION to_tsvector(text)
+RETURNS tsvector
+AS 'MODULE_PATHNAME', 'to_tsvector_current'
+LANGUAGE 'C' with (isstrict,iscachable);
+
+CREATE FUNCTION strip(tsvector)
+RETURNS tsvector
+AS 'MODULE_PATHNAME'
+LANGUAGE 'C' with (isstrict,iscachable);
+
+CREATE FUNCTION setweight(tsvector,"char")
+RETURNS tsvector
+AS 'MODULE_PATHNAME'
+LANGUAGE 'C' with (isstrict,iscachable);
+
+CREATE FUNCTION concat(tsvector,tsvector)
+RETURNS tsvector
+AS 'MODULE_PATHNAME'
+LANGUAGE 'C' with (isstrict,iscachable);
+
+CREATE OPERATOR || (
+        LEFTARG = tsvector,
+        RIGHTARG = tsvector,
+        PROCEDURE = concat
+);
+
+--query type
+CREATE FUNCTION tsquery_in(cstring)
+RETURNS tsquery
+AS 'MODULE_PATHNAME'
+LANGUAGE 'C' with (isstrict);
+
+CREATE FUNCTION tsquery_out(tsquery)
+RETURNS cstring
+AS 'MODULE_PATHNAME'
+LANGUAGE 'C' with (isstrict);
+
+CREATE TYPE tsquery (
+        INTERNALLENGTH = -1,
+        INPUT = tsquery_in,
+        OUTPUT = tsquery_out
+);
+
+CREATE FUNCTION querytree(tsquery)
+RETURNS text
+AS 'MODULE_PATHNAME', 'tsquerytree'
+LANGUAGE 'C' with (isstrict);
+
+CREATE FUNCTION to_tsquery(oid, text)
+RETURNS tsquery
+AS 'MODULE_PATHNAME'
+LANGUAGE 'c' with (isstrict,iscachable);
+
+CREATE FUNCTION to_tsquery(text, text)
+RETURNS tsquery
+AS 'MODULE_PATHNAME','to_tsquery_name'
+LANGUAGE 'c' with (isstrict,iscachable);
+
+CREATE FUNCTION to_tsquery(text)
+RETURNS tsquery
+AS 'MODULE_PATHNAME','to_tsquery_current'
+LANGUAGE 'c' with (isstrict,iscachable);
+
+--operations
+CREATE FUNCTION exectsq(tsvector, tsquery)
+RETURNS bool
+AS 'MODULE_PATHNAME'
+LANGUAGE 'C' with (isstrict, iscachable);
+  
+COMMENT ON FUNCTION exectsq(tsvector, tsquery) IS 'boolean operation with text index';
+
+CREATE FUNCTION rexectsq(tsquery, tsvector)
+RETURNS bool
+AS 'MODULE_PATHNAME'
+LANGUAGE 'C' with (isstrict, iscachable);
+
+COMMENT ON FUNCTION rexectsq(tsquery, tsvector) IS 'boolean operation with text index';
+
+CREATE OPERATOR @@ (
+        LEFTARG = tsvector,
+        RIGHTARG = tsquery,
+        PROCEDURE = exectsq,
+        COMMUTATOR = '@@',
+        RESTRICT = contsel,
+        JOIN = contjoinsel
+);
+CREATE OPERATOR @@ (
+        LEFTARG = tsquery,
+        RIGHTARG = tsvector,
+        PROCEDURE = rexectsq,
+        COMMUTATOR = '@@',
+        RESTRICT = contsel,
+        JOIN = contjoinsel
+);
+
+--Trigger
+CREATE FUNCTION tsearch2()
+RETURNS trigger
+AS 'MODULE_PATHNAME'
+LANGUAGE 'C';
+
+--Relevation
+CREATE FUNCTION rank(float4[], tsvector, tsquery)
+RETURNS float4
+AS 'MODULE_PATHNAME'
+LANGUAGE 'C' WITH (isstrict, iscachable);
+
+CREATE FUNCTION rank(float4[], tsvector, tsquery, int4)
+RETURNS float4
+AS 'MODULE_PATHNAME'
+LANGUAGE 'C' WITH (isstrict, iscachable);
+
+CREATE FUNCTION rank(tsvector, tsquery)
+RETURNS float4
+AS 'MODULE_PATHNAME', 'rank_def'
+LANGUAGE 'C' WITH (isstrict, iscachable);
+
+CREATE FUNCTION rank(tsvector, tsquery, int4)
+RETURNS float4
+AS 'MODULE_PATHNAME', 'rank_def'
+LANGUAGE 'C' WITH (isstrict, iscachable);
+
+CREATE FUNCTION rank_cd(int4, tsvector, tsquery)
+RETURNS float4
+AS 'MODULE_PATHNAME'
+LANGUAGE 'C' WITH (isstrict, iscachable);
+
+CREATE FUNCTION rank_cd(int4, tsvector, tsquery, int4)
+RETURNS float4
+AS 'MODULE_PATHNAME'
+LANGUAGE 'C' WITH (isstrict, iscachable);
+
+CREATE FUNCTION rank_cd(tsvector, tsquery)
+RETURNS float4
+AS 'MODULE_PATHNAME', 'rank_cd_def'
+LANGUAGE 'C' WITH (isstrict, iscachable);
+
+CREATE FUNCTION rank_cd(tsvector, tsquery, int4)
+RETURNS float4
+AS 'MODULE_PATHNAME', 'rank_cd_def'
+LANGUAGE 'C' WITH (isstrict, iscachable);
+
+CREATE FUNCTION headline(oid, text, tsquery, text)
+RETURNS text
+AS 'MODULE_PATHNAME', 'headline'
+LANGUAGE 'C' WITH (isstrict, iscachable);
+
+CREATE FUNCTION headline(oid, text, tsquery)
+RETURNS text
+AS 'MODULE_PATHNAME', 'headline'
+LANGUAGE 'C' WITH (isstrict, iscachable);
+
+CREATE FUNCTION headline(text, text, tsquery, text)
+RETURNS text
+AS 'MODULE_PATHNAME', 'headline_byname'
+LANGUAGE 'C' WITH (isstrict, iscachable);
+
+CREATE FUNCTION headline(text, text, tsquery)
+RETURNS text
+AS 'MODULE_PATHNAME', 'headline_byname'
+LANGUAGE 'C' WITH (isstrict, iscachable);
+
+CREATE FUNCTION headline(text, tsquery, text)
+RETURNS text
+AS 'MODULE_PATHNAME', 'headline_current'
+LANGUAGE 'C' WITH (isstrict, iscachable);
+
+CREATE FUNCTION headline(text, tsquery)
+RETURNS text
+AS 'MODULE_PATHNAME', 'headline_current'
+LANGUAGE 'C' WITH (isstrict, iscachable);
+
+--GiST
+--GiST key type 
+CREATE FUNCTION gtsvector_in(cstring)
+RETURNS gtsvector
+AS 'MODULE_PATHNAME'
+LANGUAGE 'C' with (isstrict);
+
+CREATE FUNCTION gtsvector_out(gtsvector)
+RETURNS cstring
+AS 'MODULE_PATHNAME'
+LANGUAGE 'C' with (isstrict);
+
+CREATE TYPE gtsvector (
+        INTERNALLENGTH = -1,
+        INPUT = gtsvector_in,
+        OUTPUT = gtsvector_out
+);
+
+-- support FUNCTIONs
+CREATE FUNCTION gtsvector_consistent(gtsvector,internal,int4)
+RETURNS bool
+AS 'MODULE_PATHNAME'
+LANGUAGE 'C';
+  
+CREATE FUNCTION gtsvector_compress(internal)
+RETURNS internal
+AS 'MODULE_PATHNAME'
+LANGUAGE 'C';
+
+CREATE FUNCTION gtsvector_decompress(internal)
+RETURNS internal
+AS 'MODULE_PATHNAME'
+LANGUAGE 'C';
+
+CREATE FUNCTION gtsvector_penalty(internal,internal,internal)
+RETURNS internal
+AS 'MODULE_PATHNAME'
+LANGUAGE 'C' with (isstrict);
+
+CREATE FUNCTION gtsvector_picksplit(internal, internal)
+RETURNS internal
+AS 'MODULE_PATHNAME'
+LANGUAGE 'C';
+
+CREATE FUNCTION gtsvector_union(bytea, internal)
+RETURNS _int4
+AS 'MODULE_PATHNAME'
+LANGUAGE 'C';
+
+CREATE FUNCTION gtsvector_same(gtsvector, gtsvector, internal)
+RETURNS internal
+AS 'MODULE_PATHNAME'
+LANGUAGE 'C';
+
+-- CREATE the OPERATOR class
+CREATE OPERATOR CLASS gist_tsvector_ops
+DEFAULT FOR TYPE tsvector USING gist
+AS
+        OPERATOR        1       @@ (tsvector, tsquery)  RECHECK ,
+        FUNCTION        1       gtsvector_consistent (gtsvector, internal, int4),
+        FUNCTION        2       gtsvector_union (bytea, internal),
+        FUNCTION        3       gtsvector_compress (internal),
+        FUNCTION        4       gtsvector_decompress (internal),
+        FUNCTION        5       gtsvector_penalty (internal, internal, internal),
+        FUNCTION        6       gtsvector_picksplit (internal, internal),
+        FUNCTION        7       gtsvector_same (gtsvector, gtsvector, internal),
+        STORAGE         gtsvector;
+
+
+--stat info
+CREATE TYPE statinfo 
+   as (word text, ndoc int4, nentry int4);
+
+--REATE FUNCTION tsstat_in(cstring)
+--RETURNS tsstat
+--AS 'MODULE_PATHNAME'
+--LANGUAGE 'C' with (isstrict);
+--
+--CREATE FUNCTION tsstat_out(tsstat)
+--RETURNS cstring
+--AS 'MODULE_PATHNAME'
+--LANGUAGE 'C' with (isstrict);
+--
+--CREATE TYPE tsstat (
+--        INTERNALLENGTH = -1,
+--        INPUT = tsstat_in,
+--        OUTPUT = tsstat_out,
+--        STORAGE = plain
+--);
+--
+--CREATE FUNCTION ts_accum(tsstat,tsvector)
+--RETURNS tsstat
+--AS 'MODULE_PATHNAME'
+--LANGUAGE 'C' with (isstrict);
+--
+--CREATE FUNCTION ts_accum_finish(tsstat)
+-- returns setof statinfo
+-- as 'MODULE_PATHNAME'
+-- language 'C'
+-- with (isstrict);
+--
+--CREATE AGGREGATE stat (
+-- BASETYPE=tsvector,
+-- SFUNC=ts_accum,
+-- STYPE=tsstat,
+-- FINALFUNC = ts_accum_finish,
+-- initcond = ''
+--); 
+
+CREATE FUNCTION stat(text)
+   returns setof statinfo
+   as 'MODULE_PATHNAME', 'ts_stat'
+   language 'C'
+   with (isstrict);
+
+--reset - just for debuging
+CREATE FUNCTION reset_tsearch()
+        returns void
+        as 'MODULE_PATHNAME'
+        language 'C'
+        with (isstrict);
+
+--get cover (debug for rank_cd)
+CREATE FUNCTION get_covers(tsvector,tsquery)
+        returns text
+        as 'MODULE_PATHNAME'
+        language 'C'
+        with (isstrict);
+
+
+--example of ISpell dictionary
+--update pg_ts_dict set dict_initoption='DictFile="/usr/local/share/ispell/russian.dict" ,AffFile ="/usr/local/share/ispell/russian.aff", StopFile="/usr/local/share/ispell/russian.stop"' where dict_id=4;
+--example of synonym dict
+--update pg_ts_dict set dict_initoption='/usr/local/share/ispell/english.syn' where dict_id=5;
+END;
diff --git a/contrib/tsearch2/tsvector.c b/contrib/tsearch2/tsvector.c

new file mode 100644 (file)

index 0000000..ff0794d
--- /dev/null
+++ b/contrib/tsearch2/tsvector.c
@@ -0,0 +1,804 @@
+/*
+ * In/Out definitions for tsvector type
+ * Internal structure:
+ * string of values, array of position lexem in string and it's length
+ * Teodor Sigaev 
+ */
+#include "postgres.h"
+
+#include "access/gist.h"
+#include "access/itup.h"
+#include "utils/elog.h"
+#include "utils/palloc.h"
+#include "utils/builtins.h"
+#include "storage/bufpage.h"
+#include "executor/spi.h"
+#include "commands/trigger.h"
+#include "nodes/pg_list.h"
+#include "catalog/namespace.h"
+
+#include "utils/pg_locale.h"
+
+#include              /* tolower */
+#include "tsvector.h"
+#include "query.h"
+#include "ts_cfg.h"
+#include "common.h"
+
+PG_FUNCTION_INFO_V1(tsvector_in);
+Datum      tsvector_in(PG_FUNCTION_ARGS);
+
+PG_FUNCTION_INFO_V1(tsvector_out);
+Datum      tsvector_out(PG_FUNCTION_ARGS);
+
+PG_FUNCTION_INFO_V1(to_tsvector);
+Datum      to_tsvector(PG_FUNCTION_ARGS);
+PG_FUNCTION_INFO_V1(to_tsvector_current);
+Datum      to_tsvector_current(PG_FUNCTION_ARGS);
+PG_FUNCTION_INFO_V1(to_tsvector_name);
+Datum      to_tsvector_name(PG_FUNCTION_ARGS);
+
+PG_FUNCTION_INFO_V1(tsearch2);
+Datum      tsearch2(PG_FUNCTION_ARGS);
+
+PG_FUNCTION_INFO_V1(tsvector_length);
+Datum      tsvector_length(PG_FUNCTION_ARGS);
+
+/*
+ * in/out text index type
+ */
+static int 
+comparePos(const void *a, const void *b) {
+   if ( ((WordEntryPos *) a)->pos == ((WordEntryPos *) b)->pos )
+       return 1;
+   return ( ((WordEntryPos *) a)->pos > ((WordEntryPos *) b)->pos ) ? 1 : -1;
+}
+
+static int
+uniquePos(WordEntryPos *a, int4 l) {
+   WordEntryPos *ptr, *res;
+
+   res=a;
+   if (l==1)
+       return l;
+
+   qsort((void *) a, l, sizeof(WordEntryPos), comparePos);
+
+   ptr = a + 1;
+   while (ptr - a < l) {
+       if ( ptr->pos != res->pos ) {
+           res++;
+           res->pos = ptr->pos;
+           res->weight = ptr->weight;
+           if ( res-a >= MAXNUMPOS-1 || res->pos == MAXENTRYPOS-1 )
+               break;
+       } else if ( ptr->weight > res->weight )
+           res->weight = ptr->weight;
+       ptr++;
+   }
+   return res + 1 - a;
+}
+
+static char *BufferStr;
+static int
+compareentry(const void *a, const void *b)
+{
+   if ( ((WordEntryIN *) a)->entry.len == ((WordEntryIN *) b)->entry.len)
+   {
+       return strncmp(
+                      &BufferStr[((WordEntryIN *) a)->entry.pos],
+                      &BufferStr[((WordEntryIN *) b)->entry.pos],
+                      ((WordEntryIN *) a)->entry.len);
+   }
+   return ( ((WordEntryIN *) a)->entry.len > ((WordEntryIN *) b)->entry.len ) ? 1 : -1;
+}
+
+static int
+uniqueentry(WordEntryIN * a, int4 l, char *buf, int4 *outbuflen)
+{
+   WordEntryIN  *ptr,
+              *res;
+
+   res = a;
+   if (l == 1) {
+       if ( a->entry.haspos ) {
+           *(uint16*)(a->pos) = uniquePos( &(a->pos[1]), *(uint16*)(a->pos));
+           *outbuflen = SHORTALIGN(res->entry.len) + (*(uint16*)(a->pos) +1 )*sizeof(WordEntryPos);
+       }
+       return l;
+   }
+
+   ptr = a + 1;
+   BufferStr = buf;
+   qsort((void *) a, l, sizeof(WordEntryIN), compareentry);
+
+   while (ptr - a < l)
+   {
+       if (!(ptr->entry.len == res->entry.len &&
+             strncmp(&buf[ptr->entry.pos], &buf[res->entry.pos], res->entry.len) == 0))
+       {
+           if ( res->entry.haspos ) {
+               *(uint16*)(res->pos) = uniquePos( &(res->pos[1]), *(uint16*)(res->pos));
+               *outbuflen += *(uint16*)(res->pos) * sizeof(WordEntryPos);
+           }
+           *outbuflen += SHORTALIGN(res->entry.len);
+           res++;
+           memcpy(res,ptr,sizeof(WordEntryIN));
+       } else if ( ptr->entry.haspos ){
+           if ( res->entry.haspos ) {
+               int4 len=*(uint16*)(ptr->pos) + 1 + *(uint16*)(res->pos);
+               res->pos=(WordEntryPos*)repalloc( res->pos, len*sizeof(WordEntryPos));
+               memcpy( &(res->pos[ *(uint16*)(res->pos) + 1 ]), 
+                   &(ptr->pos[1]), *(uint16*)(ptr->pos) * sizeof(WordEntryPos));
+               *(uint16*)(res->pos) += *(uint16*)(ptr->pos);
+               pfree( ptr->pos );
+           } else {
+               res->entry.haspos=1;
+               res->pos = ptr->pos;
+           }
+       }
+       ptr++;
+   }
+   if ( res->entry.haspos ) {
+       *(uint16*)(res->pos) = uniquePos( &(res->pos[1]), *(uint16*)(res->pos));
+       *outbuflen += *(uint16*)(res->pos) * sizeof(WordEntryPos);
+   }
+   *outbuflen += SHORTALIGN(res->entry.len);
+
+   return res + 1 - a;
+}
+
+#define WAITWORD   1
+#define WAITENDWORD 2
+#define WAITNEXTCHAR   3
+#define WAITENDCMPLX   4
+#define WAITPOSINFO    5
+#define INPOSINFO  6
+#define WAITPOSDELIM   7
+
+#define RESIZEPRSBUF \
+do { \
+   if ( state->curpos - state->word + 1 >= state->len ) \
+   { \
+       int4 clen = state->curpos - state->word; \
+       state->len *= 2; \
+       state->word = (char*)repalloc( (void*)state->word, state->len ); \
+       state->curpos = state->word + clen; \
+   } \
+} while (0)
+
+int4
+gettoken_tsvector(TI_IN_STATE * state)
+{
+   int4        oldstate = 0;
+
+   state->curpos = state->word;
+   state->state = WAITWORD;
+   state->alen=0;
+
+   while (1)
+   {
+       if (state->state == WAITWORD)
+       {
+           if (*(state->prsbuf) == '\0')
+               return 0;
+           else if (*(state->prsbuf) == '\'')
+               state->state = WAITENDCMPLX;
+           else if (*(state->prsbuf) == '\\')
+           {
+               state->state = WAITNEXTCHAR;
+               oldstate = WAITENDWORD;
+           }
+           else if (state->oprisdelim && ISOPERATOR(*(state->prsbuf)))
+               elog(ERROR, "Syntax error");
+           else if (*(state->prsbuf) != ' ')
+           {
+               *(state->curpos) = *(state->prsbuf);
+               state->curpos++;
+               state->state = WAITENDWORD;
+           }
+       }
+       else if (state->state == WAITNEXTCHAR)
+       {
+           if (*(state->prsbuf) == '\0')
+               elog(ERROR, "There is no escaped character");
+           else
+           {
+               RESIZEPRSBUF;
+               *(state->curpos) = *(state->prsbuf);
+               state->curpos++;
+               state->state = oldstate;
+           }
+       }
+       else if (state->state == WAITENDWORD)
+       {
+           if (*(state->prsbuf) == '\\')
+           {
+               state->state = WAITNEXTCHAR;
+               oldstate = WAITENDWORD;
+           }
+           else if (*(state->prsbuf) == ' ' || *(state->prsbuf) == '\0' ||
+                    (state->oprisdelim && ISOPERATOR(*(state->prsbuf))))
+           {
+               RESIZEPRSBUF;
+               if (state->curpos == state->word)
+                   elog(ERROR, "Syntax error");
+               *(state->curpos) = '\0';
+               return 1; 
+           } else if ( *(state->prsbuf) == ':' ) {
+               if (state->curpos == state->word)
+                   elog(ERROR, "Syntax error");
+               *(state->curpos) = '\0';
+               if ( state->oprisdelim )
+                   return 1;
+               else
+                   state->state = INPOSINFO;
+           }
+           else
+           {
+               RESIZEPRSBUF;
+               *(state->curpos) = *(state->prsbuf);
+               state->curpos++;
+           }
+       }
+       else if (state->state == WAITENDCMPLX)
+       {
+           if (*(state->prsbuf) == '\'')
+           {
+               RESIZEPRSBUF;
+               *(state->curpos) = '\0';
+               if (state->curpos == state->word)
+                   elog(ERROR, "Syntax error");
+               if ( state->oprisdelim ) {
+                   state->prsbuf++;
+                   return 1;
+               } else
+                   state->state = WAITPOSINFO;
+           }
+           else if (*(state->prsbuf) == '\\')
+           {
+               state->state = WAITNEXTCHAR;
+               oldstate = WAITENDCMPLX;
+           }
+           else if (*(state->prsbuf) == '\0')
+               elog(ERROR, "Syntax error");
+           else
+           {
+               RESIZEPRSBUF;
+               *(state->curpos) = *(state->prsbuf);
+               state->curpos++;
+           }
+       } else if (state->state == WAITPOSINFO) {
+           if ( *(state->prsbuf) == ':' )
+               state->state=INPOSINFO;
+           else
+               return 1;
+       } else if (state->state == INPOSINFO) {
+           if ( isdigit(*(state->prsbuf)) ) {
+               if ( state->alen==0 ) {
+                   state->alen=4;
+                   state->pos = (WordEntryPos*)palloc( sizeof(WordEntryPos)*state->alen );
+                   *(uint16*)(state->pos)=0;
+               } else if ( *(uint16*)(state->pos) +1 >= state->alen ) {
+                   state->alen *= 2; 
+                   state->pos = (WordEntryPos*)repalloc( state->pos, sizeof(WordEntryPos)*state->alen );
+               }
+               (  *(uint16*)(state->pos) )++;
+               state->pos[ *(uint16*)(state->pos) ].pos = LIMITPOS(atoi(state->prsbuf));
+               if ( state->pos[ *(uint16*)(state->pos) ].pos == 0 )
+                   elog(ERROR,"Wrong position info");
+               state->pos[ *(uint16*)(state->pos) ].weight = 0;
+               state->state = WAITPOSDELIM;
+           } else
+               elog(ERROR,"Syntax error");
+       } else if (state->state == WAITPOSDELIM) {
+           if ( *(state->prsbuf) == ',' ) {
+               state->state = INPOSINFO;
+           } else if ( tolower(*(state->prsbuf)) == 'a' || *(state->prsbuf)=='*' ) {
+               if ( state->pos[ *(uint16*)(state->pos) ].weight )
+                   elog(ERROR,"Syntax error");
+               state->pos[ *(uint16*)(state->pos) ].weight = 3;
+           } else if ( tolower(*(state->prsbuf)) == 'b' ) {
+               if ( state->pos[ *(uint16*)(state->pos) ].weight )
+                   elog(ERROR,"Syntax error");
+               state->pos[ *(uint16*)(state->pos) ].weight = 2;
+           } else if ( tolower(*(state->prsbuf)) == 'c' ) {
+               if ( state->pos[ *(uint16*)(state->pos) ].weight )
+                   elog(ERROR,"Syntax error");
+               state->pos[ *(uint16*)(state->pos) ].weight = 1;
+           } else if ( tolower(*(state->prsbuf)) == 'd' ) {
+               if ( state->pos[ *(uint16*)(state->pos) ].weight )
+                   elog(ERROR,"Syntax error");
+               state->pos[ *(uint16*)(state->pos) ].weight = 0;
+           } else if ( isspace(*(state->prsbuf)) || *(state->prsbuf) == '\0' ) {
+               return 1;
+           } else if ( !isdigit(*(state->prsbuf)) )
+               elog(ERROR,"Syntax error");
+       } else
+           elog(ERROR, "Inner bug :(");
+       state->prsbuf++;
+   }
+
+   return 0;
+}
+
+Datum
+tsvector_in(PG_FUNCTION_ARGS)
+{
+   char       *buf = PG_GETARG_CSTRING(0);
+   TI_IN_STATE state;
+   WordEntryIN  *arr;
+   WordEntry  *inarr;
+   int4        len = 0,
+               totallen = 64;
+   tsvector       *in;
+   char       *tmpbuf,
+              *cur;
+   int4        i,
+               buflen = 256;
+
+   state.prsbuf = buf;
+   state.len = 32;
+   state.word = (char *) palloc(state.len);
+   state.oprisdelim = false;
+
+   arr = (WordEntryIN *) palloc(sizeof(WordEntryIN) * totallen);
+   cur = tmpbuf = (char *) palloc(buflen);
+   while (gettoken_tsvector(&state))
+   {
+       if (len >= totallen)
+       {
+           totallen *= 2;
+           arr = (WordEntryIN *) repalloc((void *) arr, sizeof(WordEntryIN) * totallen);
+       }
+       while ((cur - tmpbuf) + (state.curpos - state.word) >= buflen)
+       {
+           int4        dist = cur - tmpbuf;
+
+           buflen *= 2;
+           tmpbuf = (char *) repalloc((void *) tmpbuf, buflen);
+           cur = tmpbuf + dist;
+       }
+       if (state.curpos - state.word >= MAXSTRLEN)
+           elog(ERROR, "Word is too long");
+       arr[len].entry.len= state.curpos - state.word;
+       if (cur - tmpbuf > MAXSTRPOS)
+           elog(ERROR, "Too long value");
+       arr[len].entry.pos=cur - tmpbuf;
+       memcpy((void *) cur, (void *) state.word, arr[len].entry.len);
+       cur += arr[len].entry.len;
+       if ( state.alen ) {
+           arr[len].entry.haspos=1;
+           arr[len].pos = state.pos;
+       } else
+           arr[len].entry.haspos=0;
+       len++;
+   }
+   pfree(state.word);
+
+   if ( len > 0 )
+       len = uniqueentry(arr, len, tmpbuf, &buflen);
+   totallen = CALCDATASIZE(len, buflen);
+   in = (tsvector *) palloc(totallen);
+   memset(in,0,totallen);
+   in->len = totallen;
+   in->size = len;
+   cur = STRPTR(in);
+   inarr = ARRPTR(in);
+   for (i = 0; i < len; i++)
+   {
+       memcpy((void *) cur, (void *) &tmpbuf[arr[i].entry.pos], arr[i].entry.len);
+       arr[i].entry.pos=cur - STRPTR(in);
+       cur += SHORTALIGN(arr[i].entry.len);
+       if ( arr[i].entry.haspos ) {
+           memcpy( cur, arr[i].pos, (*(uint16*)arr[i].pos + 1) * sizeof(WordEntryPos));
+           cur +=  (*(uint16*)arr[i].pos + 1) * sizeof(WordEntryPos);
+           pfree( arr[i].pos ); 
+       }
+       memcpy( &(inarr[i]), &(arr[i].entry), sizeof(WordEntry) );
+   }
+   pfree(tmpbuf);
+   pfree(arr);
+   PG_RETURN_POINTER(in);
+}
+
+Datum
+tsvector_length(PG_FUNCTION_ARGS)
+{
+   tsvector       *in = (tsvector *) PG_DETOAST_DATUM(PG_GETARG_DATUM(0));
+   int4        ret = in->size;
+
+   PG_FREE_IF_COPY(in, 0);
+   PG_RETURN_INT32(ret);
+}
+
+Datum
+tsvector_out(PG_FUNCTION_ARGS)
+{
+   tsvector       *out = (tsvector *) PG_DETOAST_DATUM(PG_GETARG_DATUM(0));
+   char       *outbuf;
+   int4        i,
+               j,
+               lenbuf = 0, pp;
+   WordEntry  *ptr = ARRPTR(out);
+   char       *curin,
+              *curout;
+
+       lenbuf=out->size * 2 /* '' */ + out->size - 1 /* space */ + 2 /*\0*/;
+       for (i = 0; i < out->size; i++) {
+               lenbuf += ptr[i].len*2 /*for escape */;
+               if ( ptr[i].haspos )
+                       lenbuf += 7*POSDATALEN(out, &(ptr[i]));
+       }
+
+   curout = outbuf = (char *) palloc(lenbuf);
+   for (i = 0; i < out->size; i++)
+   {
+       curin = STRPTR(out)+ptr->pos;
+       if (i != 0)
+           *curout++ = ' ';
+       *curout++ = '\'';
+       j = ptr->len;
+       while (j--)
+       {
+           if (*curin == '\'')
+           {
+               int4        pos = curout - outbuf;
+
+               outbuf = (char *) repalloc((void *) outbuf, ++lenbuf);
+               curout = outbuf + pos;
+               *curout++ = '\\';
+           }
+           *curout++ = *curin++;
+       }
+       *curout++ = '\'';
+       if ( (pp=POSDATALEN(out,ptr)) != 0 ) {
+           WordEntryPos *wptr;
+           *curout++ = ':';
+           wptr=POSDATAPTR(out,ptr);
+           while(pp) {
+               sprintf(curout,"%d",wptr->pos);
+               curout=strchr(curout,'\0');
+               switch( wptr->weight ) {
+                   case 3:   *curout++ = 'A'; break;
+                   case 2:   *curout++ = 'B'; break;
+                   case 1:   *curout++ = 'C'; break;
+                   case 0: 
+                   default: break;
+               }
+               if ( pp>1 )     *curout++ = ',';
+               pp--; wptr++;
+           }
+       }
+       ptr++;
+   }
+   *curout='\0';
+   outbuf[lenbuf - 1] = '\0';
+   PG_FREE_IF_COPY(out, 0);
+   PG_RETURN_POINTER(outbuf);
+}
+
+static int
+compareWORD(const void *a, const void *b)
+{
+   if (((WORD *) a)->len == ((WORD *) b)->len) {
+       int res = strncmp(
+                      ((WORD *) a)->word,
+                      ((WORD *) b)->word,
+                      ((WORD *) b)->len);
+       if ( res==0 ) 
+           return ( ((WORD *) a)->pos.pos > ((WORD *) b)->pos.pos ) ? 1 : -1;
+       return res;
+   }
+   return (((WORD *) a)->len > ((WORD *) b)->len) ? 1 : -1;
+}
+
+static int
+uniqueWORD(WORD * a, int4 l)
+{
+   WORD       *ptr,
+              *res;
+   int tmppos;
+
+   if (l == 1) {
+       tmppos=LIMITPOS(a->pos.pos);
+       a->alen=2;
+       a->pos.apos=(uint16*)palloc( sizeof(uint16)*a->alen );
+       a->pos.apos[0]=1;
+       a->pos.apos[1]=tmppos;
+       return l;
+   }
+
+   res = a;
+   ptr = a + 1;
+
+   qsort((void *) a, l, sizeof(WORD), compareWORD);
+   tmppos=LIMITPOS(a->pos.pos);
+   a->alen=2;
+   a->pos.apos=(uint16*)palloc( sizeof(uint16)*a->alen );
+   a->pos.apos[0]=1;
+   a->pos.apos[1]=tmppos;
+
+   while (ptr - a < l)
+   {
+       if (!(ptr->len == res->len &&
+             strncmp(ptr->word, res->word, res->len) == 0))
+       {
+           res++;
+           res->len = ptr->len;
+           res->word = ptr->word;
+           tmppos=LIMITPOS(ptr->pos.pos);
+           res->alen=2;
+           res->pos.apos=(uint16*)palloc( sizeof(uint16)*res->alen );
+           res->pos.apos[0]=1;
+           res->pos.apos[1]=tmppos;
+       } else {
+           pfree(ptr->word);
+           if ( res->pos.apos[0] < MAXNUMPOS-1 && res->pos.apos[ res->pos.apos[0] ] != MAXENTRYPOS-1 ) {
+               if ( res->pos.apos[0]+1 >= res->alen ) {
+                   res->alen*=2;
+                   res->pos.apos=(uint16*)repalloc( res->pos.apos, sizeof(uint16)*res->alen );
+               }
+               res->pos.apos[ res->pos.apos[0]+1 ] = LIMITPOS(ptr->pos.pos);
+               res->pos.apos[0]++; 
+           }
+       }
+       ptr++;
+   }
+
+   return res + 1 - a;
+}
+
+/*
+ * make value of tsvector
+ */
+static tsvector *
+makevalue(PRSTEXT * prs)
+{
+   int4        i,j,
+               lenstr = 0,
+               totallen;
+   tsvector       *in;
+   WordEntry  *ptr;
+   char       *str,
+              *cur;
+
+   prs->curwords = uniqueWORD(prs->words, prs->curwords);
+   for (i = 0; i < prs->curwords; i++) {
+       lenstr += SHORTALIGN(prs->words[i].len);
+
+       if ( prs->words[i].alen )
+           lenstr += sizeof(uint16) + prs->words[i].pos.apos[0] * sizeof(WordEntryPos);
+   }
+
+   totallen = CALCDATASIZE(prs->curwords, lenstr);
+   in = (tsvector *) palloc(totallen);
+   memset(in,0,totallen);  
+   in->len = totallen;
+   in->size = prs->curwords;
+
+   ptr = ARRPTR(in);
+   cur = str = STRPTR(in);
+   for (i = 0; i < prs->curwords; i++)
+   {
+       ptr->len = prs->words[i].len;
+       if (cur - str > MAXSTRPOS)
+           elog(ERROR, "Value is too big");
+       ptr->pos= cur - str;
+       memcpy((void *) cur, (void *) prs->words[i].word, prs->words[i].len);
+       pfree(prs->words[i].word);
+       cur += SHORTALIGN(prs->words[i].len);
+       if ( prs->words[i].alen ) {
+           WordEntryPos *wptr;
+           
+           ptr->haspos=1;
+           *(uint16*)cur = prs->words[i].pos.apos[0];
+           wptr=POSDATAPTR(in,ptr);
+           for(j=0;j<*(uint16*)cur;j++) {
+               wptr[j].weight=0;
+               wptr[j].pos=prs->words[i].pos.apos[j+1];
+           }
+           cur += sizeof(uint16) + prs->words[i].pos.apos[0] * sizeof(WordEntryPos);
+           pfree(prs->words[i].pos.apos);
+       } else
+           ptr->haspos=0;
+       ptr++;
+   }
+   pfree(prs->words);
+   return in;
+}
+
+
+Datum
+to_tsvector(PG_FUNCTION_ARGS)
+{
+   text       *in = PG_GETARG_TEXT_P(1);
+   PRSTEXT     prs;
+   tsvector       *out = NULL;
+   TSCfgInfo *cfg=findcfg(PG_GETARG_INT32(0)); 
+
+   prs.lenwords = 32;
+   prs.curwords = 0;
+   prs.pos = 0;
+   prs.words = (WORD *) palloc(sizeof(WORD) * prs.lenwords);
+   
+   parsetext_v2(cfg, &prs, VARDATA(in), VARSIZE(in) - VARHDRSZ);
+   PG_FREE_IF_COPY(in, 1);
+
+   if (prs.curwords)
+       out = makevalue(&prs);
+   else {
+       pfree(prs.words);
+       out = palloc(CALCDATASIZE(0,0));
+       out->len = CALCDATASIZE(0,0);
+       out->size = 0;
+   } 
+   PG_RETURN_POINTER(out);
+}
+
+Datum
+to_tsvector_name(PG_FUNCTION_ARGS) {
+   text       *cfg=PG_GETARG_TEXT_P(0);
+   Datum res = DirectFunctionCall3(
+       to_tsvector,
+       Int32GetDatum( name2id_cfg( cfg ) ),
+       PG_GETARG_DATUM(1),
+       (Datum)0
+   );
+   PG_FREE_IF_COPY(cfg,0);
+   PG_RETURN_DATUM(res);   
+}
+
+Datum
+to_tsvector_current(PG_FUNCTION_ARGS) {
+   Datum res = DirectFunctionCall3(
+       to_tsvector,
+       Int32GetDatum( get_currcfg() ),
+       PG_GETARG_DATUM(0),
+       (Datum)0
+   );
+   PG_RETURN_DATUM(res);   
+}
+
+static Oid
+findFunc(char *fname) {
+   FuncCandidateList clist,ptr;
+   Oid funcid = InvalidOid;
+   List *names=makeList1(makeString(fname));
+
+   ptr = clist = FuncnameGetCandidates(names, 1);
+   freeList(names);
+
+   if ( !ptr )
+       return funcid;
+
+   while(ptr) {
+       if ( ptr->args[0] == TEXTOID && funcid == InvalidOid )
+           funcid=ptr->oid;
+       clist=ptr->next;
+       pfree(ptr);
+       ptr=clist;
+   }
+
+   return funcid;
+}
+
+/*
+ * Trigger
+ */
+Datum
+tsearch2(PG_FUNCTION_ARGS)
+{
+   TriggerData *trigdata;
+   Trigger    *trigger;
+   Relation    rel;
+   HeapTuple   rettuple = NULL;
+   TSCfgInfo *cfg=findcfg(get_currcfg()); 
+   int         numidxattr,
+               i;
+   PRSTEXT     prs;
+   Datum       datum = (Datum) 0;
+   Oid     funcoid = InvalidOid;
+
+   if (!CALLED_AS_TRIGGER(fcinfo))
+       elog(ERROR, "TSearch: Not fired by trigger manager");
+
+   trigdata = (TriggerData *) fcinfo->context;
+   if (TRIGGER_FIRED_FOR_STATEMENT(trigdata->tg_event))
+       elog(ERROR, "TSearch: Can't process STATEMENT events");
+   if (TRIGGER_FIRED_AFTER(trigdata->tg_event))
+       elog(ERROR, "TSearch: Must be fired BEFORE event");
+
+   if (TRIGGER_FIRED_BY_INSERT(trigdata->tg_event))
+       rettuple = trigdata->tg_trigtuple;
+   else if (TRIGGER_FIRED_BY_UPDATE(trigdata->tg_event))
+       rettuple = trigdata->tg_newtuple;
+   else
+       elog(ERROR, "TSearch: Unknown event");
+
+   trigger = trigdata->tg_trigger;
+   rel = trigdata->tg_relation;
+
+   if (trigger->tgnargs < 2)
+       elog(ERROR, "TSearch: format tsearch2(tsvector_field, text_field1,...)");
+
+   numidxattr = SPI_fnumber(rel->rd_att, trigger->tgargs[0]);
+   if (numidxattr == SPI_ERROR_NOATTRIBUTE)
+       elog(ERROR, "TSearch: Can not find tsvector_field");
+
+   prs.lenwords = 32;
+   prs.curwords = 0;
+   prs.pos = 0;
+   prs.words = (WORD *) palloc(sizeof(WORD) * prs.lenwords);
+
+   /* find all words in indexable column */
+   for (i = 1; i < trigger->tgnargs; i++)
+   {
+       int         numattr;
+       Oid         oidtype;
+       Datum       txt_toasted;
+       bool        isnull;
+       text       *txt;
+
+       numattr = SPI_fnumber(rel->rd_att, trigger->tgargs[i]);
+       if (numattr == SPI_ERROR_NOATTRIBUTE)
+       {
+           funcoid=findFunc(trigger->tgargs[i]);
+           if ( funcoid==InvalidOid )
+               elog(ERROR,"TSearch: can't find function or field '%s'",trigger->tgargs[i]);
+           continue;
+       }
+       oidtype = SPI_gettypeid(rel->rd_att, numattr);
+       /* We assume char() and varchar() are binary-equivalent to text */
+       if (!(oidtype == TEXTOID ||
+             oidtype == VARCHAROID ||
+             oidtype == BPCHAROID))
+       {
+           elog(WARNING, "TSearch: '%s' is not of character type",
+                trigger->tgargs[i]);
+           continue;
+       }
+       txt_toasted = SPI_getbinval(rettuple, rel->rd_att, numattr, &isnull);
+       if (isnull)
+           continue;
+
+       if ( funcoid!=InvalidOid ) {
+           text *txttmp = (text *) DatumGetPointer( OidFunctionCall1(
+               funcoid,
+               PointerGetDatum(txt_toasted)
+           ));
+           txt = (text *) DatumGetPointer(PG_DETOAST_DATUM(PointerGetDatum(txttmp)));
+           if ( txt == txttmp )
+               txt_toasted = PointerGetDatum(txt);
+       } else
+            txt = (text *) DatumGetPointer(PG_DETOAST_DATUM(PointerGetDatum(txt_toasted)));
+
+       parsetext_v2(cfg, &prs, VARDATA(txt), VARSIZE(txt) - VARHDRSZ);
+       if (txt != (text*)DatumGetPointer(txt_toasted) )
+           pfree(txt);
+   }
+
+   /* make tsvector value */
+   if (prs.curwords)
+   {
+       datum = PointerGetDatum(makevalue(&prs));
+       rettuple = SPI_modifytuple(rel, rettuple, 1, &numidxattr,
+                                  &datum, NULL);
+       pfree(DatumGetPointer(datum));
+   }
+   else
+   {
+       tsvector *out = palloc(CALCDATASIZE(0,0));
+       out->len = CALCDATASIZE(0,0);
+       out->size = 0;
+       datum = PointerGetDatum(out);
+       pfree(prs.words);
+       rettuple = SPI_modifytuple(rel, rettuple, 1, &numidxattr,
+                                  &datum, NULL);
+   }
+
+   if (rettuple == NULL)
+       elog(ERROR, "TSearch: %d returned by SPI_modifytuple", SPI_result);
+
+   return PointerGetDatum(rettuple);
+}
diff --git a/contrib/tsearch2/tsvector.h b/contrib/tsearch2/tsvector.h

new file mode 100644 (file)

index 0000000..31e6a4b
--- /dev/null
+++ b/contrib/tsearch2/tsvector.h
@@ -0,0 +1,71 @@
+#ifndef __TXTIDX_H__
+#define __TXTIDX_H__
+
+/*
+#define TXTIDX_DEBUG
+*/
+
+#include "postgres.h"
+
+#include "access/gist.h"
+#include "access/itup.h"
+#include "utils/elog.h"
+#include "utils/palloc.h"
+#include "utils/builtins.h"
+#include "storage/bufpage.h"
+
+typedef struct {
+   uint32
+       haspos:1,
+       len:11, /* MAX 2Kb */
+       pos:20; /* MAX 1Mb */
+}  WordEntry;
+#define MAXSTRLEN ( 1<<11 )
+#define MAXSTRPOS ( 1<<20 )
+
+typedef struct {
+   uint16
+       weight:2,
+       pos:14;
+} WordEntryPos;
+#define MAXENTRYPOS    (1<<14)
+#define MAXNUMPOS  256
+#define LIMITPOS(x)    ( ( (x) >= MAXENTRYPOS ) ? (MAXENTRYPOS-1) : (x) )
+
+typedef struct
+{
+   int4        len;
+   int4        size;
+   char        data[1];
+}  tsvector;
+
+#define DATAHDRSIZE (sizeof(int4)*2)
+#define CALCDATASIZE(x, lenstr) ( x * sizeof(WordEntry) + DATAHDRSIZE + lenstr )
+#define ARRPTR(x)  ( (WordEntry*) ( (char*)x + DATAHDRSIZE ) )
+#define STRPTR(x)  ( (char*)x + DATAHDRSIZE + ( sizeof(WordEntry) * ((tsvector*)x)->size ) )
+#define STRSIZE(x) ( ((tsvector*)x)->len - DATAHDRSIZE - ( sizeof(WordEntry) * ((tsvector*)x)->size ) )
+#define _POSDATAPTR(x,e)   (STRPTR(x)+((WordEntry*)(e))->pos+SHORTALIGN(((WordEntry*)(e))->len))
+#define POSDATALEN(x,e) ( ( ((WordEntry*)(e))->haspos ) ? (*(uint16*)_POSDATAPTR(x,e)) : 0 ) 
+#define POSDATAPTR(x,e)    ( (WordEntryPos*)( _POSDATAPTR(x,e)+sizeof(uint16) ) )
+
+
+typedef struct {
+   WordEntry   entry;
+   WordEntryPos    *pos;
+}  WordEntryIN;
+
+typedef struct
+{
+   char       *prsbuf;
+   char       *word;
+   char       *curpos;
+   int4        len;
+   int4        state;
+   int4        alen;
+   WordEntryPos    *pos;
+   bool        oprisdelim;
+}  TI_IN_STATE;
+
+int4       gettoken_tsvector(TI_IN_STATE * state);
+
+#endif
diff --git a/contrib/tsearch2/tsvector_op.c b/contrib/tsearch2/tsvector_op.c

new file mode 100644 (file)

index 0000000..3f38014
--- /dev/null
+++ b/contrib/tsearch2/tsvector_op.c
@@ -0,0 +1,264 @@
+/*
+ * Operations for tsvector type
+ * Teodor Sigaev 
+ */
+#include "postgres.h"
+
+#include "access/gist.h"
+#include "access/itup.h"
+#include "utils/elog.h"
+#include "utils/palloc.h"
+#include "utils/builtins.h"
+#include "storage/bufpage.h"
+#include "executor/spi.h"
+#include "commands/trigger.h"
+#include "nodes/pg_list.h"
+#include "catalog/namespace.h"
+
+#include "utils/pg_locale.h"
+
+#include              /* tolower */
+#include "tsvector.h"
+#include "query.h"
+#include "ts_cfg.h"
+#include "common.h"
+
+PG_FUNCTION_INFO_V1(strip);
+Datum      strip(PG_FUNCTION_ARGS);
+
+PG_FUNCTION_INFO_V1(setweight);
+Datum      setweight(PG_FUNCTION_ARGS);
+
+PG_FUNCTION_INFO_V1(concat);
+Datum      concat(PG_FUNCTION_ARGS);
+
+Datum
+strip(PG_FUNCTION_ARGS)
+{
+   tsvector       *in = (tsvector *) PG_DETOAST_DATUM(PG_GETARG_DATUM(0));
+   tsvector    *out;
+   int i,len=0;
+   WordEntry *arrin=ARRPTR(in), *arrout;
+   char *cur;
+
+   for(i=0;isize;i++) 
+       len += SHORTALIGN( arrin[i].len );
+
+   len = CALCDATASIZE(in->size, len);
+   out=(tsvector*)palloc(len);
+   memset(out,0,len);
+   out->len=len;
+   out->size=in->size;
+   arrout=ARRPTR(out);
+   cur=STRPTR(out);
+   for(i=0;isize;i++) {
+       memcpy(cur, STRPTR(in)+arrin[i].pos, arrin[i].len);
+       arrout[i].haspos = 0;
+       arrout[i].len = arrin[i].len;
+       arrout[i].pos = cur - STRPTR(out);
+       cur += SHORTALIGN( arrout[i].len );
+   }
+       
+   PG_FREE_IF_COPY(in, 0);
+   PG_RETURN_POINTER(out);
+}
+
+Datum
+setweight(PG_FUNCTION_ARGS)
+{
+   tsvector       *in = (tsvector *) PG_DETOAST_DATUM(PG_GETARG_DATUM(0));
+   char       cw = PG_GETARG_CHAR(1);
+   tsvector    *out;
+   int i,j;
+   WordEntry *entry;
+   WordEntryPos *p;
+   int w=0;
+
+   switch(tolower(cw)) {
+       case 'a': w=3; break;
+       case 'b': w=2; break;
+       case 'c': w=1; break;
+       case 'd': w=0; break;
+       default: elog(ERROR,"Unknown weight");
+   }
+
+   out=(tsvector*)palloc(in->len);
+   memcpy(out,in,in->len);
+   entry=ARRPTR(out);
+   i=out->size;    
+   while(i--) {
+       if ( (j=POSDATALEN(out,entry)) != 0 ) {
+           p=POSDATAPTR(out,entry);
+           while(j--) {
+               p->weight=w;
+               p++;
+           }
+       }
+       entry++;
+   }
+       
+   PG_FREE_IF_COPY(in, 0);
+   PG_RETURN_POINTER(out);
+}
+
+static int
+compareEntry(char *ptra, WordEntry* a, char *ptrb, WordEntry* b)
+{
+        if ( a->len == b->len)
+        {
+                return strncmp(
+                                           ptra + a->pos,
+                                           ptrb + b->pos,
+                                           a->len);
+        }
+        return ( a->len > b->len ) ? 1 : -1;
+}
+
+static int4
+add_pos(tsvector *src, WordEntry *srcptr, tsvector *dest, WordEntry *destptr, int4 maxpos ) {
+   uint16 *clen = (uint16*)_POSDATAPTR(dest,destptr);
+   int i;
+   uint16 slen = POSDATALEN(src, srcptr), startlen;
+   WordEntryPos *spos=POSDATAPTR(src, srcptr), *dpos=POSDATAPTR(dest,destptr);
+
+   if ( ! destptr->haspos ) 
+       *clen=0;
+
+   startlen = *clen;
+   for(i=0; i
+       dpos[ *clen ].weight = spos[i].weight; 
+       dpos[ *clen ].pos    = LIMITPOS(spos[i].pos + maxpos);
+       (*clen)++;
+   }
+
+   if ( *clen != startlen )
+       destptr->haspos=1; 
+   return  *clen - startlen;
+}
+
+
+Datum
+concat(PG_FUNCTION_ARGS) {
+   tsvector       *in1 = (tsvector *) PG_DETOAST_DATUM(PG_GETARG_DATUM(0));
+   tsvector       *in2 = (tsvector *) PG_DETOAST_DATUM(PG_GETARG_DATUM(1));
+   tsvector       *out;
+   WordEntry *ptr;
+   WordEntry *ptr1,*ptr2;
+   WordEntryPos *p;
+   int maxpos=0,i,j,i1,i2;
+   char *cur;
+   char *data,*data1,*data2;
+
+   ptr=ARRPTR(in1);
+   i=in1->size;
+   while(i--) {
+       if ( (j=POSDATALEN(in1,ptr)) != 0 ) {
+           p=POSDATAPTR(in1,ptr);
+           while(j--) {
+               if ( p->pos > maxpos ) 
+                   maxpos = p->pos;
+               p++;
+           }
+       }
+       ptr++;
+   }
+   
+   ptr1=ARRPTR(in1); ptr2=ARRPTR(in2);
+   data1=STRPTR(in1); data2=STRPTR(in2);
+   i1=in1->size;   i2=in2->size;
+   out=(tsvector*)palloc( in1->len + in2->len );
+   memset(out,0,in1->len + in2->len);
+   out->len = in1->len + in2->len;
+   out->size = in1->size + in2->size;
+   data=cur=STRPTR(out);
+   ptr=ARRPTR(out);
+   while( i1 && i2 ) {
+       int cmp=compareEntry(data1,ptr1,data2,ptr2);
+       if ( cmp < 0 ) { /* in1 first */
+           ptr->haspos = ptr1->haspos;
+           ptr->len = ptr1->len;
+           memcpy( cur, data1 + ptr1->pos, ptr1->len );
+           ptr->pos = cur - data; 
+           cur+=SHORTALIGN(ptr1->len);
+           if ( ptr->haspos ) {
+               memcpy(cur, _POSDATAPTR(in1, ptr1), POSDATALEN(in1, ptr1)*sizeof(WordEntryPos) + sizeof(uint16));
+               cur+=POSDATALEN(in1, ptr1)*sizeof(WordEntryPos) + sizeof(uint16);
+           }
+           ptr++; ptr1++; i1--;
+       } else if ( cmp>0 ) { /* in2 first */ 
+           ptr->haspos = ptr2->haspos;
+           ptr->len = ptr2->len;
+           memcpy( cur, data2 + ptr2->pos, ptr2->len );
+           ptr->pos = cur - data; 
+           cur+=SHORTALIGN(ptr2->len);
+           if ( ptr->haspos ) {
+               int addlen = add_pos(in2, ptr2, out, ptr, maxpos );
+               if ( addlen == 0 )
+                   ptr->haspos=0;
+               else
+                   cur += addlen*sizeof(WordEntryPos) + sizeof(uint16); 
+           }
+           ptr++; ptr2++; i2--;
+       } else {
+           ptr->haspos = ptr1->haspos | ptr2->haspos;
+           ptr->len = ptr1->len;
+           memcpy( cur, data1 + ptr1->pos, ptr1->len );
+           ptr->pos = cur - data; 
+           cur+=SHORTALIGN(ptr1->len);
+           if ( ptr->haspos ) {
+               if ( ptr1->haspos ) {
+                   memcpy(cur, _POSDATAPTR(in1, ptr1), POSDATALEN(in1, ptr1)*sizeof(WordEntryPos) + sizeof(uint16));
+                   cur+=POSDATALEN(in1, ptr1)*sizeof(WordEntryPos) + sizeof(uint16);
+                   if ( ptr2->haspos )
+                       cur += add_pos(in2, ptr2, out, ptr, maxpos )*sizeof(WordEntryPos);
+               } else if ( ptr2->haspos ) {
+                   int addlen = add_pos(in2, ptr2, out, ptr, maxpos );
+                   if ( addlen == 0 )
+                       ptr->haspos=0;
+                   else
+                       cur += addlen*sizeof(WordEntryPos) + sizeof(uint16); 
+               }
+           }
+           ptr++; ptr1++; ptr2++; i1--; i2--;
+       }
+   }
+
+   while(i1) {
+       ptr->haspos = ptr1->haspos;
+       ptr->len = ptr1->len;
+       memcpy( cur, data1 + ptr1->pos, ptr1->len );
+       ptr->pos = cur - data; 
+       cur+=SHORTALIGN(ptr1->len);
+       if ( ptr->haspos ) {
+           memcpy(cur, _POSDATAPTR(in1, ptr1), POSDATALEN(in1, ptr1)*sizeof(WordEntryPos) + sizeof(uint16));
+           cur+=POSDATALEN(in1, ptr1)*sizeof(WordEntryPos) + sizeof(uint16);
+       }
+       ptr++; ptr1++; i1--;
+   }
+
+   while(i2) {
+       ptr->haspos = ptr2->haspos;
+       ptr->len = ptr2->len;
+       memcpy( cur, data2 + ptr2->pos, ptr2->len );
+       ptr->pos = cur - data; 
+       cur+=SHORTALIGN(ptr2->len);
+       if ( ptr->haspos ) {
+           int addlen = add_pos(in2, ptr2, out, ptr, maxpos );
+           if ( addlen == 0 )
+               ptr->haspos=0;
+           else
+               cur += addlen*sizeof(WordEntryPos) + sizeof(uint16); 
+       }
+       ptr++; ptr2++; i2--;
+   }
+   
+   out->size=ptr-ARRPTR(out);
+   out->len = CALCDATASIZE( out->size, cur-data );
+   if ( data != STRPTR(out) )
+       memmove( STRPTR(out), data, cur-data );
+
+   PG_FREE_IF_COPY(in1, 0);
+   PG_FREE_IF_COPY(in2, 1);
+   PG_RETURN_POINTER(out);
+}
+
+       dpos[ *clen ].weight = spos[i].weight; 
+       dpos[ *clen ].pos    = LIMITPOS(spos[i].pos + maxpos);
+       (*clen)++;
+   }
+
+   if ( *clen != startlen )
+       destptr->haspos=1; 
+   return  *clen - startlen;
+}
+
+
+Datum
+concat(PG_FUNCTION_ARGS) {
+   tsvector       *in1 = (tsvector *) PG_DETOAST_DATUM(PG_GETARG_DATUM(0));
+   tsvector       *in2 = (tsvector *) PG_DETOAST_DATUM(PG_GETARG_DATUM(1));
+   tsvector       *out;
+   WordEntry *ptr;
+   WordEntry *ptr1,*ptr2;
+   WordEntryPos *p;
+   int maxpos=0,i,j,i1,i2;
+   char *cur;
+   char *data,*data1,*data2;
+
+   ptr=ARRPTR(in1);
+   i=in1->size;
+   while(i--) {
+       if ( (j=POSDATALEN(in1,ptr)) != 0 ) {
+           p=POSDATAPTR(in1,ptr);
+           while(j--) {
+               if ( p->pos > maxpos ) 
+                   maxpos = p->pos;
+               p++;
+           }
+       }
+       ptr++;
+   }
+   
+   ptr1=ARRPTR(in1); ptr2=ARRPTR(in2);
+   data1=STRPTR(in1); data2=STRPTR(in2);
+   i1=in1->size;   i2=in2->size;
+   out=(tsvector*)palloc( in1->len + in2->len );
+   memset(out,0,in1->len + in2->len);
+   out->len = in1->len + in2->len;
+   out->size = in1->size + in2->size;
+   data=cur=STRPTR(out);
+   ptr=ARRPTR(out);
+   while( i1 && i2 ) {
+       int cmp=compareEntry(data1,ptr1,data2,ptr2);
+       if ( cmp < 0 ) { /* in1 first */
+           ptr->haspos = ptr1->haspos;
+           ptr->len = ptr1->len;
+           memcpy( cur, data1 + ptr1->pos, ptr1->len );
+           ptr->pos = cur - data; 
+           cur+=SHORTALIGN(ptr1->len);
+           if ( ptr->haspos ) {
+               memcpy(cur, _POSDATAPTR(in1, ptr1), POSDATALEN(in1, ptr1)*sizeof(WordEntryPos) + sizeof(uint16));
+               cur+=POSDATALEN(in1, ptr1)*sizeof(WordEntryPos) + sizeof(uint16);
+           }
+           ptr++; ptr1++; i1--;
+       } else if ( cmp>0 ) { /* in2 first */ 
+           ptr->haspos = ptr2->haspos;
+           ptr->len = ptr2->len;
+           memcpy( cur, data2 + ptr2->pos, ptr2->len );
+           ptr->pos = cur - data; 
+           cur+=SHORTALIGN(ptr2->len);
+           if ( ptr->haspos ) {
+               int addlen = add_pos(in2, ptr2, out, ptr, maxpos );
+               if ( addlen == 0 )
+                   ptr->haspos=0;
+               else
+                   cur += addlen*sizeof(WordEntryPos) + sizeof(uint16); 
+           }
+           ptr++; ptr2++; i2--;
+       } else {
+           ptr->haspos = ptr1->haspos | ptr2->haspos;
+           ptr->len = ptr1->len;
+           memcpy( cur, data1 + ptr1->pos, ptr1->len );
+           ptr->pos = cur - data; 
+           cur+=SHORTALIGN(ptr1->len);
+           if ( ptr->haspos ) {
+               if ( ptr1->haspos ) {
+                   memcpy(cur, _POSDATAPTR(in1, ptr1), POSDATALEN(in1, ptr1)*sizeof(WordEntryPos) + sizeof(uint16));
+                   cur+=POSDATALEN(in1, ptr1)*sizeof(WordEntryPos) + sizeof(uint16);
+                   if ( ptr2->haspos )
+                       cur += add_pos(in2, ptr2, out, ptr, maxpos )*sizeof(WordEntryPos);
+               } else if ( ptr2->haspos ) {
+                   int addlen = add_pos(in2, ptr2, out, ptr, maxpos );
+                   if ( addlen == 0 )
+                       ptr->haspos=0;
+                   else
+                       cur += addlen*sizeof(WordEntryPos) + sizeof(uint16); 
+               }
+           }
+           ptr++; ptr1++; ptr2++; i1--; i2--;
+       }
+   }
+
+   while(i1) {
+       ptr->haspos = ptr1->haspos;
+       ptr->len = ptr1->len;
+       memcpy( cur, data1 + ptr1->pos, ptr1->len );
+       ptr->pos = cur - data; 
+       cur+=SHORTALIGN(ptr1->len);
+       if ( ptr->haspos ) {
+           memcpy(cur, _POSDATAPTR(in1, ptr1), POSDATALEN(in1, ptr1)*sizeof(WordEntryPos) + sizeof(uint16));
+           cur+=POSDATALEN(in1, ptr1)*sizeof(WordEntryPos) + sizeof(uint16);
+       }
+       ptr++; ptr1++; i1--;
+   }
+
+   while(i2) {
+       ptr->haspos = ptr2->haspos;
+       ptr->len = ptr2->len;
+       memcpy( cur, data2 + ptr2->pos, ptr2->len );
+       ptr->pos = cur - data; 
+       cur+=SHORTALIGN(ptr2->len);
+       if ( ptr->haspos ) {
+           int addlen = add_pos(in2, ptr2, out, ptr, maxpos );
+           if ( addlen == 0 )
+               ptr->haspos=0;
+           else
+               cur += addlen*sizeof(WordEntryPos) + sizeof(uint16); 
+       }
+       ptr++; ptr2++; i2--;
+   }
+   
+   out->size=ptr-ARRPTR(out);
+   out->len = CALCDATASIZE( out->size, cur-data );
+   if ( data != STRPTR(out) )
+       memmove( STRPTR(out), data, cur-data );
+
+   PG_FREE_IF_COPY(in1, 0);
+   PG_FREE_IF_COPY(in2, 1);
+   PG_RETURN_POINTER(out);
+}
+
diff --git a/contrib/tsearch2/untsearch.sql.in b/contrib/tsearch2/untsearch.sql.in

new file mode 100644 (file)

index 0000000..a4fe145
--- /dev/null
+++ b/contrib/tsearch2/untsearch.sql.in
@@ -0,0 +1,62 @@
+BEGIN;
+
+--Be careful !!!
+--script drops all indices, triggers and columns with types defined
+--in tsearch2.sql
+
+
+DROP OPERATOR CLASS gist_tsvector_ops USING gist CASCADE;
+
+
+DROP OPERATOR || (tsvector, tsvector);
+DROP OPERATOR @@ (tsvector, tsquery);
+DROP OPERATOR @@ (tsquery, tsvector);
+
+DROP AGGREGATE stat(tsvector);
+
+DROP TABLE pg_ts_dict;
+DROP TABLE pg_ts_parser;
+DROP TABLE pg_ts_cfg;
+DROP TABLE pg_ts_cfgmap;
+
+DROP TYPE tokentype CASCADE;
+DROP TYPE tokenout CASCADE;
+DROP TYPE tsvector CASCADE;
+DROP TYPE tsquery CASCADE;
+DROP TYPE gtsvector CASCADE;
+DROP TYPE tsstat CASCADE;
+DROP TYPE statinfo CASCADE;
+
+DROP FUNCTION lexize(oid, text) ;
+DROP FUNCTION lexize(text, text);
+DROP FUNCTION lexize(text);
+DROP FUNCTION set_curdict(int);
+DROP FUNCTION set_curdict(text);
+DROP FUNCTION dex_init(text);
+DROP FUNCTION dex_lexize(internal,internal,int4);
+DROP FUNCTION snb_en_init(text);
+DROP FUNCTION snb_lexize(internal,internal,int4);
+DROP FUNCTION snb_ru_init(text);
+DROP FUNCTION spell_init(text);
+DROP FUNCTION spell_lexize(internal,internal,int4);
+DROP FUNCTION syn_init(text);
+DROP FUNCTION syn_lexize(internal,internal,int4);
+DROP FUNCTION set_curprs(int);
+DROP FUNCTION set_curprs(text);
+DROP FUNCTION prsd_start(internal,int4);
+DROP FUNCTION prsd_getlexeme(internal,internal,internal);
+DROP FUNCTION prsd_end(internal);
+DROP FUNCTION prsd_lextype(internal);
+DROP FUNCTION prsd_headline(internal,internal,internal);
+DROP FUNCTION set_curcfg(int);
+DROP FUNCTION set_curcfg(text);
+DROP FUNCTION show_curcfg();
+DROP FUNCTION gtsvector_compress(internal);
+DROP FUNCTION gtsvector_decompress(internal);
+DROP FUNCTION gtsvector_penalty(internal,internal,internal);
+DROP FUNCTION gtsvector_picksplit(internal, internal);
+DROP FUNCTION gtsvector_union(bytea, internal);
+DROP FUNCTION reset_tsearch();
+DROP FUNCTION tsearch2() CASCADE;
+
+END;
diff --git a/contrib/tsearch2/wordparser/deflex.c b/contrib/tsearch2/wordparser/deflex.c

new file mode 100644 (file)

index 0000000..ea596c5
--- /dev/null
+++ b/contrib/tsearch2/wordparser/deflex.c
@@ -0,0 +1,56 @@
+#include "deflex.h"
+
+const char *lex_descr[]={
+   "",
+   "Latin word",
+   "Non-latin word",
+   "Word",
+   "Email",
+   "URL",
+   "Host",
+   "Scientific notation",
+   "VERSION",
+   "Part of hyphenated word",
+   "Non-latin part of hyphenated word",
+   "Latin part of hyphenated word",
+   "Space symbols",
+   "HTML Tag",
+   "HTTP head",
+   "Hyphenated word",
+   "Latin hyphenated word",
+   "Non-latin hyphenated word",
+   "URI",
+   "File or path name",
+   "Decimal notation",
+   "Signed integer",
+   "Unsigned integer",
+   "HTML Entity"
+};
+
+const char *tok_alias[]={
+   "",
+   "lword",
+   "nlword",
+   "word",
+   "email",
+   "url",
+   "host",
+   "sfloat",
+   "version",
+   "part_hword",
+   "nlpart_hword",
+   "lpart_hword",
+   "blank",
+   "tag",
+   "http",
+   "hword",
+   "lhword",
+   "nlhword",
+   "uri",
+   "file",
+   "float",
+   "int",
+   "uint",
+   "entity"
+};
+
diff --git a/contrib/tsearch2/wordparser/deflex.h b/contrib/tsearch2/wordparser/deflex.h

new file mode 100644 (file)

index 0000000..651d1f9
--- /dev/null
+++ b/contrib/tsearch2/wordparser/deflex.h
@@ -0,0 +1,34 @@
+#ifndef __DEFLEX_H__
+#define __DEFLEX_H__
+
+/* rememder !!!! */
+#define LASTNUM        23
+
+#define LATWORD        1
+#define CYRWORD        2
+#define UWORD      3
+#define EMAIL      4
+#define FURL       5
+#define HOST       6
+#define SCIENTIFIC 7
+#define VERSIONNUMBER  8
+#define PARTHYPHENWORD 9
+#define CYRPARTHYPHENWORD  10
+#define LATPARTHYPHENWORD  11
+#define SPACE      12
+#define TAG            13
+#define HTTP       14
+#define HYPHENWORD 15
+#define LATHYPHENWORD  16
+#define CYRHYPHENWORD  17
+#define URI        18
+#define FILEPATH   19
+#define DECIMAL        20
+#define SIGNEDINT  21
+#define UNSIGNEDINT 22
+#define HTMLENTITY 23
+
+extern const char *lex_descr[];
+extern const char *tok_alias[];
+
+#endif
diff --git a/contrib/tsearch2/wordparser/parser.h b/contrib/tsearch2/wordparser/parser.h

new file mode 100644 (file)

index 0000000..55cf005
--- /dev/null
+++ b/contrib/tsearch2/wordparser/parser.h
@@ -0,0 +1,11 @@
+#ifndef __PARSER_H__
+#define __PARSER_H__
+
+char      *token;
+int            tokenlen;
+int            tsearch2_yylex(void);
+void       start_parse_str(char *, int);
+void       start_parse_fh(FILE *, int);
+void       end_parse(void);
+
+#endif
diff --git a/contrib/tsearch2/wordparser/parser.l b/contrib/tsearch2/wordparser/parser.l

new file mode 100644 (file)

index 0000000..49824f5
--- /dev/null
+++ b/contrib/tsearch2/wordparser/parser.l
@@ -0,0 +1,346 @@
+%{
+#include "postgres.h"
+
+#include "deflex.h"
+#include "parser.h"
+#include "common.h"
+
+/* Avoid exit() on fatal scanner errors */
+#define fprintf(file, fmt, msg)  ts_error(ERROR, fmt, msg)
+
+/* postgres allocation function */
+#define free    pfree
+#define malloc  palloc
+#define realloc repalloc
+
+#ifdef strdup
+#undef strdup
+#endif
+#define strdup  pstrdup
+
+char *token = NULL;  /* pointer to token */
+char *s     = NULL;  /* to return WHOLE hyphenated-word */
+
+YY_BUFFER_STATE buf = NULL; /* buffer to parse; it need for parse from string */
+
+int lrlimit = -1;  /* for limiting read from filehandle ( -1 - unlimited read ) */
+int bytestoread = 0;   /* for limiting read from filehandle */
+
+/* redefine macro for read limited length */
+#define YY_INPUT(buf,result,max_size) \
+   if ( yy_current_buffer->yy_is_interactive ) { \
+                int c = '*', n; \
+                for ( n = 0; n < max_size && \
+                             (c = getc( tsearch2_yyin )) != EOF && c != '\n'; ++n ) \
+                        buf[n] = (char) c; \
+                if ( c == '\n' ) \
+                        buf[n++] = (char) c; \
+                if ( c == EOF && ferror( tsearch2_yyin ) ) \
+                        YY_FATAL_ERROR( "input in flex scanner failed" ); \
+                result = n; \
+        }  else { \
+       if ( lrlimit == 0 ) \
+           result=YY_NULL; \
+       else { \
+           if ( lrlimit>0 ) { \
+               bytestoread = ( lrlimit > max_size ) ? max_size : lrlimit; \
+               lrlimit -= bytestoread; \
+           } else \
+               bytestoread = max_size; \
+               if ( ((result = fread( buf, 1, bytestoread, tsearch2_yyin )) == 0) \
+                       && ferror( tsearch2_yyin ) ) \
+                       YY_FATAL_ERROR( "input in flex scanner failed" ); \
+       } \
+   }
+
+%}
+
+%option 8bit
+%option never-interactive
+%option nounput
+%option noyywrap
+
+/* parser's state for parsing hyphenated-word */
+%x DELIM  
+/* parser's state for parsing URL*/
+%x URL  
+%x SERVER  
+
+/* parser's state for parsing TAGS */
+%x INTAG
+%x QINTAG
+%x INCOMMENT
+%x INSCRIPT
+
+/* cyrillic koi8 char */
+CYRALNUM   [0-9\200-\377]
+CYRALPHA   [\200-\377]
+ALPHA      [a-zA-Z\200-\377]
+ALNUM      [0-9a-zA-Z\200-\377]
+
+
+HOSTNAME   ([-_[:alnum:]]+\.)+[[:alpha:]]+
+URI        [-_[:alnum:]/%,\.;=&?#]+
+
+%%
+
+"<"[Ss][Cc][Rr][Ii][Pp][Tt] { BEGIN INSCRIPT; }
+
+"" {
+   BEGIN INITIAL; 
+   *tsearch2_yytext=' '; *(tsearch2_yytext+1) = '\0'; 
+   token = tsearch2_yytext;
+   tokenlen = tsearch2_yyleng;
+   return SPACE;
+}
+
+""   { 
+   BEGIN INITIAL;
+   *tsearch2_yytext=' '; *(tsearch2_yytext+1) = '\0'; 
+   token = tsearch2_yytext;
+   tokenlen = tsearch2_yyleng;
+   return SPACE;
+}
+
+
+"<"[\![:alpha:]]   { BEGIN INTAG; }
+
+"
+
+"\""    { BEGIN QINTAG; }
+
+"\\\"" ;
+
+"\""   { BEGIN INTAG; }
+
+">" { 
+   BEGIN INITIAL;
+   token = tsearch2_yytext;
+   *tsearch2_yytext=' '; 
+   token = tsearch2_yytext;
+   tokenlen = 1;
+   return TAG;
+}
+
+.|\n  ;
+
+\&(quot|amp|nbsp|lt|gt)\;   {
+   token = tsearch2_yytext;
+   tokenlen = tsearch2_yyleng;
+   return HTMLENTITY;
+}
+
+\&\#[0-9][0-9]?[0-9]?\; {
+   token = tsearch2_yytext;
+   tokenlen = tsearch2_yyleng;
+   return HTMLENTITY;
+}
+ 
+[-_\.[:alnum:]]+@{HOSTNAME}  /* Emails */ { 
+   token = tsearch2_yytext; 
+   tokenlen = tsearch2_yyleng;
+   return EMAIL; 
+}
+
+[+-]?[0-9]+(\.[0-9]+)?[eEdD][+-]?[0-9]+  /* float */   { 
+   token = tsearch2_yytext; 
+   tokenlen = tsearch2_yyleng;
+   return SCIENTIFIC; 
+}
+
+[0-9]+\.[0-9]+\.[0-9\.]*[0-9] {
+   token = tsearch2_yytext;
+   tokenlen = tsearch2_yyleng;
+   return VERSIONNUMBER;
+}
+
+[+-]?[0-9]+\.[0-9]+ {
+   token = tsearch2_yytext;
+   tokenlen = tsearch2_yyleng;
+   return DECIMAL;
+}
+
+[+-][0-9]+ { 
+   token = tsearch2_yytext; 
+   tokenlen = tsearch2_yyleng;
+   return SIGNEDINT; 
+}
+
+[0-9]+ { 
+   token = tsearch2_yytext; 
+   tokenlen = tsearch2_yyleng;
+   return UNSIGNEDINT; 
+}
+
+http"://"        { 
+   BEGIN URL; 
+   token = tsearch2_yytext;
+   tokenlen = tsearch2_yyleng;
+   return HTTP;
+}
+
+ftp"://"        { 
+   BEGIN URL; 
+   token = tsearch2_yytext;
+   tokenlen = tsearch2_yyleng;
+   return HTTP;
+}
+
+{HOSTNAME}[/:]{URI} { 
+   BEGIN SERVER;
+   if (s) { free(s); s=NULL; } 
+   s = strdup( tsearch2_yytext ); 
+   tokenlen = tsearch2_yyleng;
+   yyless( 0 ); 
+   token = s;
+   return FURL;
+}
+
+{HOSTNAME} {
+   token = tsearch2_yytext; 
+   tokenlen = tsearch2_yyleng;
+   return HOST;
+}
+
+[/:]{URI}  {
+   token = tsearch2_yytext;
+   tokenlen = tsearch2_yyleng;
+   return URI;
+}
+
+[[:alnum:]\./_-]+"/"[[:alnum:]\./_-]+ {
+   token = tsearch2_yytext;
+   tokenlen = tsearch2_yyleng;
+   return FILEPATH;
+}
+
+({CYRALPHA}+-)+{CYRALPHA}+ /* composite-word */    {
+   BEGIN DELIM;
+   if (s) { free(s); s=NULL; } 
+   s = strdup( tsearch2_yytext );
+   tokenlen = tsearch2_yyleng;
+   yyless( 0 );
+   token = s;
+   return CYRHYPHENWORD;
+}
+
+([[:alpha:]]+-)+[[:alpha:]]+ /* composite-word */  {
+    BEGIN DELIM;
+   if (s) { free(s); s=NULL; } 
+   s = strdup( tsearch2_yytext );
+   tokenlen = tsearch2_yyleng;
+   yyless( 0 );
+   token = s;
+   return LATHYPHENWORD;
+}
+
+({ALNUM}+-)+{ALNUM}+ /* composite-word */  {
+   BEGIN DELIM;
+   if (s) { free(s); s=NULL; } 
+   s = strdup( tsearch2_yytext );
+   tokenlen = tsearch2_yyleng;
+   yyless( 0 );
+   token = s;
+   return HYPHENWORD;
+}
+
+[0-9]+\.[0-9]+\.[0-9\.]*[0-9] {
+   token = tsearch2_yytext;
+   tokenlen = tsearch2_yyleng;
+   return VERSIONNUMBER;
+}
+
+\+?[0-9]+\.[0-9]+ {
+   token = tsearch2_yytext;
+   tokenlen = tsearch2_yyleng;
+   return DECIMAL;
+}
+
+{CYRALPHA}+  /* one word in composite-word */   { 
+   token = tsearch2_yytext; 
+   tokenlen = tsearch2_yyleng;
+   return CYRPARTHYPHENWORD; 
+}
+
+[[:alpha:]]+  /* one word in composite-word */  { 
+   token = tsearch2_yytext; 
+   tokenlen = tsearch2_yyleng;
+   return LATPARTHYPHENWORD; 
+}
+
+{ALNUM}+  /* one word in composite-word */  { 
+   token = tsearch2_yytext; 
+   tokenlen = tsearch2_yyleng;
+   return PARTHYPHENWORD; 
+}
+
+-  { 
+   token = tsearch2_yytext;
+   tokenlen = tsearch2_yyleng;
+   return SPACE;
+}
+
+.|\n /* return in basic state */ {
+   BEGIN INITIAL;
+   yyless( 0 );
+}
+
+{CYRALPHA}+ /* normal word */  { 
+   token = tsearch2_yytext; 
+   tokenlen = tsearch2_yyleng;
+   return CYRWORD; 
+}
+
+[[:alpha:]]+ /* normal word */ { 
+   token = tsearch2_yytext; 
+   tokenlen = tsearch2_yyleng;
+   return LATWORD; 
+}
+
+{ALNUM}+ /* normal word */ { 
+   token = tsearch2_yytext; 
+   tokenlen = tsearch2_yyleng;
+   return UWORD; 
+}
+
+[ \r\n\t]+ {
+   token = tsearch2_yytext;
+   tokenlen = tsearch2_yyleng;
+   return SPACE;
+}
+
+. {
+   token = tsearch2_yytext;
+   tokenlen = tsearch2_yyleng;
+   return SPACE;
+} 
+
+%%
+
+/* clearing after parsing from string */
+void end_parse() {
+   if (s) { free(s); s=NULL; } 
+   tsearch2_yy_delete_buffer( buf );
+   buf = NULL;
+} 
+
+/* start parse from string */
+void start_parse_str(char* str, int limit) {
+   if (buf) end_parse();
+   buf = tsearch2_yy_scan_bytes( str, limit );
+   tsearch2_yy_switch_to_buffer( buf );
+   BEGIN INITIAL;
+}
+
+/* start parse from filehandle */
+void start_parse_fh( FILE* fh, int limit ) {
+   if (buf) end_parse();
+   lrlimit = ( limit ) ? limit : -1;
+   buf = tsearch2_yy_create_buffer( fh, YY_BUF_SIZE );
+   tsearch2_yy_switch_to_buffer( buf );
+   BEGIN INITIAL;
+}
+
+
+
+"\""    { BEGIN QINTAG; }
+
+"\\\"" ;
+
+"\""   { BEGIN INTAG; }
+
+">" { 
+   BEGIN INITIAL;
+   token = tsearch2_yytext;
+   *tsearch2_yytext=' '; 
+   token = tsearch2_yytext;
+   tokenlen = 1;
+   return TAG;
+}
+
+.|\n  ;
+
+\&(quot|amp|nbsp|lt|gt)\;   {
+   token = tsearch2_yytext;
+   tokenlen = tsearch2_yyleng;
+   return HTMLENTITY;
+}
+
+\&\#[0-9][0-9]?[0-9]?\; {
+   token = tsearch2_yytext;
+   tokenlen = tsearch2_yyleng;
+   return HTMLENTITY;
+}
+ 
+[-_\.[:alnum:]]+@{HOSTNAME}  /* Emails */ { 
+   token = tsearch2_yytext; 
+   tokenlen = tsearch2_yyleng;
+   return EMAIL; 
+}
+
+[+-]?[0-9]+(\.[0-9]+)?[eEdD][+-]?[0-9]+  /* float */   { 
+   token = tsearch2_yytext; 
+   tokenlen = tsearch2_yyleng;
+   return SCIENTIFIC; 
+}
+
+[0-9]+\.[0-9]+\.[0-9\.]*[0-9] {
+   token = tsearch2_yytext;
+   tokenlen = tsearch2_yyleng;
+   return VERSIONNUMBER;
+}
+
+[+-]?[0-9]+\.[0-9]+ {
+   token = tsearch2_yytext;
+   tokenlen = tsearch2_yyleng;
+   return DECIMAL;
+}
+
+[+-][0-9]+ { 
+   token = tsearch2_yytext; 
+   tokenlen = tsearch2_yyleng;
+   return SIGNEDINT; 
+}
+
+[0-9]+ { 
+   token = tsearch2_yytext; 
+   tokenlen = tsearch2_yyleng;
+   return UNSIGNEDINT; 
+}
+
+http"://"        { 
+   BEGIN URL; 
+   token = tsearch2_yytext;
+   tokenlen = tsearch2_yyleng;
+   return HTTP;
+}
+
+ftp"://"        { 
+   BEGIN URL; 
+   token = tsearch2_yytext;
+   tokenlen = tsearch2_yyleng;
+   return HTTP;
+}
+
+{HOSTNAME}[/:]{URI} { 
+   BEGIN SERVER;
+   if (s) { free(s); s=NULL; } 
+   s = strdup( tsearch2_yytext ); 
+   tokenlen = tsearch2_yyleng;
+   yyless( 0 ); 
+   token = s;
+   return FURL;
+}
+
+{HOSTNAME} {
+   token = tsearch2_yytext; 
+   tokenlen = tsearch2_yyleng;
+   return HOST;
+}
+
+[/:]{URI}  {
+   token = tsearch2_yytext;
+   tokenlen = tsearch2_yyleng;
+   return URI;
+}
+
+[[:alnum:]\./_-]+"/"[[:alnum:]\./_-]+ {
+   token = tsearch2_yytext;
+   tokenlen = tsearch2_yyleng;
+   return FILEPATH;
+}
+
+({CYRALPHA}+-)+{CYRALPHA}+ /* composite-word */    {
+   BEGIN DELIM;
+   if (s) { free(s); s=NULL; } 
+   s = strdup( tsearch2_yytext );
+   tokenlen = tsearch2_yyleng;
+   yyless( 0 );
+   token = s;
+   return CYRHYPHENWORD;
+}
+
+([[:alpha:]]+-)+[[:alpha:]]+ /* composite-word */  {
+    BEGIN DELIM;
+   if (s) { free(s); s=NULL; } 
+   s = strdup( tsearch2_yytext );
+   tokenlen = tsearch2_yyleng;
+   yyless( 0 );
+   token = s;
+   return LATHYPHENWORD;
+}
+
+({ALNUM}+-)+{ALNUM}+ /* composite-word */  {
+   BEGIN DELIM;
+   if (s) { free(s); s=NULL; } 
+   s = strdup( tsearch2_yytext );
+   tokenlen = tsearch2_yyleng;
+   yyless( 0 );
+   token = s;
+   return HYPHENWORD;
+}
+
+[0-9]+\.[0-9]+\.[0-9\.]*[0-9] {
+   token = tsearch2_yytext;
+   tokenlen = tsearch2_yyleng;
+   return VERSIONNUMBER;
+}
+
+\+?[0-9]+\.[0-9]+ {
+   token = tsearch2_yytext;
+   tokenlen = tsearch2_yyleng;
+   return DECIMAL;
+}
+
+{CYRALPHA}+  /* one word in composite-word */   { 
+   token = tsearch2_yytext; 
+   tokenlen = tsearch2_yyleng;
+   return CYRPARTHYPHENWORD; 
+}
+
+[[:alpha:]]+  /* one word in composite-word */  { 
+   token = tsearch2_yytext; 
+   tokenlen = tsearch2_yyleng;
+   return LATPARTHYPHENWORD; 
+}
+
+{ALNUM}+  /* one word in composite-word */  { 
+   token = tsearch2_yytext; 
+   tokenlen = tsearch2_yyleng;
+   return PARTHYPHENWORD; 
+}
+
+-  { 
+   token = tsearch2_yytext;
+   tokenlen = tsearch2_yyleng;
+   return SPACE;
+}
+
+.|\n /* return in basic state */ {
+   BEGIN INITIAL;
+   yyless( 0 );
+}
+
+{CYRALPHA}+ /* normal word */  { 
+   token = tsearch2_yytext; 
+   tokenlen = tsearch2_yyleng;
+   return CYRWORD; 
+}
+
+[[:alpha:]]+ /* normal word */ { 
+   token = tsearch2_yytext; 
+   tokenlen = tsearch2_yyleng;
+   return LATWORD; 
+}
+
+{ALNUM}+ /* normal word */ { 
+   token = tsearch2_yytext; 
+   tokenlen = tsearch2_yyleng;
+   return UWORD; 
+}
+
+[ \r\n\t]+ {
+   token = tsearch2_yytext;
+   tokenlen = tsearch2_yyleng;
+   return SPACE;
+}
+
+. {
+   token = tsearch2_yytext;
+   tokenlen = tsearch2_yyleng;
+   return SPACE;
+} 
+
+%%
+
+/* clearing after parsing from string */
+void end_parse() {
+   if (s) { free(s); s=NULL; } 
+   tsearch2_yy_delete_buffer( buf );
+   buf = NULL;
+} 
+
+/* start parse from string */
+void start_parse_str(char* str, int limit) {
+   if (buf) end_parse();
+   buf = tsearch2_yy_scan_bytes( str, limit );
+   tsearch2_yy_switch_to_buffer( buf );
+   BEGIN INITIAL;
+}
+
+/* start parse from filehandle */
+void start_parse_fh( FILE* fh, int limit ) {
+   if (buf) end_parse();
+   lrlimit = ( limit ) ? limit : -1;
+   buf = tsearch2_yy_create_buffer( fh, YY_BUF_SIZE );
+   tsearch2_yy_switch_to_buffer( buf );
+   BEGIN INITIAL;
+}
+
+
diff --git a/contrib/tsearch2/wparser.c b/contrib/tsearch2/wparser.c

new file mode 100644 (file)

index 0000000..deff94c
--- /dev/null
+++ b/contrib/tsearch2/wparser.c
@@ -0,0 +1,529 @@
+/* 
+ * interface functions to parser 
+ * Teodor Sigaev 
+ */
+#include 
+#include 
+#include 
+#include 
+
+#include "postgres.h"
+#include "fmgr.h"
+#include "utils/array.h"
+#include "catalog/pg_type.h"
+#include "executor/spi.h"
+#include "funcapi.h"
+
+#include "wparser.h"
+#include "ts_cfg.h"
+#include "snmap.h"
+#include "common.h"
+
+/*********top interface**********/
+
+static void *plan_getparser=NULL;
+static Oid current_parser_id=InvalidOid;
+
+void
+init_prs(Oid id, WParserInfo *prs) {
+   Oid arg[1]={ OIDOID };
+   bool isnull;
+   Datum pars[1]={ ObjectIdGetDatum(id) };
+   int stat;
+
+   memset(prs,0,sizeof(WParserInfo));
+   SPI_connect();
+   if ( !plan_getparser ) {
+       plan_getparser = SPI_saveplan( SPI_prepare( "select prs_start, prs_nexttoken, prs_end, prs_lextype, prs_headline from pg_ts_parser where oid = $1" , 1, arg ) );
+       if ( !plan_getparser ) 
+           ts_error(ERROR, "SPI_prepare() failed");
+   }
+
+   stat = SPI_execp(plan_getparser, pars, " ", 1);
+   if ( stat < 0 )
+       ts_error (ERROR, "SPI_execp return %d", stat);
+   if ( SPI_processed > 0 ) {
+       Oid oid=InvalidOid;
+       oid=DatumGetObjectId( SPI_getbinval(SPI_tuptable->vals[0], SPI_tuptable->tupdesc, 1, &isnull) );
+       fmgr_info_cxt(oid, &(prs->start_info), TopMemoryContext);
+       oid=DatumGetObjectId( SPI_getbinval(SPI_tuptable->vals[0], SPI_tuptable->tupdesc, 2, &isnull) );
+       fmgr_info_cxt(oid, &(prs->getlexeme_info), TopMemoryContext);
+       oid=DatumGetObjectId( SPI_getbinval(SPI_tuptable->vals[0], SPI_tuptable->tupdesc, 3, &isnull) );
+       fmgr_info_cxt(oid, &(prs->end_info), TopMemoryContext);
+       prs->lextype=DatumGetObjectId( SPI_getbinval(SPI_tuptable->vals[0], SPI_tuptable->tupdesc, 4, &isnull) );
+       oid=DatumGetObjectId( SPI_getbinval(SPI_tuptable->vals[0], SPI_tuptable->tupdesc, 5, &isnull) );
+       fmgr_info_cxt(oid, &(prs->headline_info), TopMemoryContext);
+       prs->prs_id=id;
+   } else 
+       ts_error(ERROR, "No parser with id %d", id);
+   SPI_finish();
+}
+
+typedef struct {
+   WParserInfo *last_prs;
+   int     len;
+   int     reallen;
+   WParserInfo *list;
+   SNMap       name2id_map;
+} PrsList;
+
+static PrsList PList = {NULL,0,0,NULL,{0,0,NULL}};
+
+void    
+reset_prs(void) {
+   freeSNMap( &(PList.name2id_map) );
+   if ( PList.list )
+       free(PList.list);
+   memset(&PList,0,sizeof(PrsList));
+}
+
+static int
+compareprs(const void *a, const void *b) {
+   return ((WParserInfo*)a)->prs_id - ((WParserInfo*)b)->prs_id;
+}
+
+WParserInfo *
+findprs(Oid id) {
+   /* last used prs */
+   if ( PList.last_prs && PList.last_prs->prs_id==id )
+       return PList.last_prs;
+
+   /* already used prs */
+   if ( PList.len != 0 ) {
+       WParserInfo key;
+       key.prs_id=id;
+       PList.last_prs = bsearch(&key, PList.list, PList.len, sizeof(WParserInfo), compareprs);
+       if ( PList.last_prs != NULL )
+           return PList.last_prs;
+   }
+
+   /* last chance */
+   if ( PList.len==PList.reallen ) {
+       WParserInfo *tmp;
+       int reallen = ( PList.reallen ) ? 2*PList.reallen : 16;
+       tmp=(WParserInfo*)realloc(PList.list,sizeof(WParserInfo)*reallen);
+       if ( !tmp ) 
+           ts_error(ERROR,"No memory");
+       PList.reallen=reallen;
+       PList.list=tmp;
+   }
+   PList.last_prs=&(PList.list[PList.len]);
+   init_prs(id, PList.last_prs);
+   PList.len++;
+   qsort(PList.list, PList.len, sizeof(WParserInfo), compareprs);
+   return findprs(id); /* qsort changed order!! */;
+}
+
+static void *plan_name2id=NULL;
+
+Oid
+name2id_prs(text *name) {
+   Oid arg[1]={ TEXTOID };
+   bool isnull;
+   Datum pars[1]={ PointerGetDatum(name) };
+   int stat;
+   Oid id=findSNMap_t( &(PList.name2id_map), name );
+   
+   if ( id ) 
+       return id;
+   
+
+   SPI_connect();
+   if ( !plan_name2id ) {
+       plan_name2id = SPI_saveplan( SPI_prepare( "select oid from pg_ts_parser where prs_name = $1" , 1, arg ) );
+       if ( !plan_name2id ) 
+           ts_error(ERROR, "SPI_prepare() failed");
+   }
+
+   stat = SPI_execp(plan_name2id, pars, " ", 1);
+   if ( stat < 0 )
+       ts_error (ERROR, "SPI_execp return %d", stat);
+   if ( SPI_processed > 0 )
+       id=DatumGetObjectId( SPI_getbinval(SPI_tuptable->vals[0], SPI_tuptable->tupdesc, 1, &isnull) );
+   else 
+       ts_error(ERROR, "No parser '%s'", text2char(name));
+   SPI_finish();
+   addSNMap_t( &(PList.name2id_map), name, id );
+   return id;
+}
+
+
+/******sql-level interface******/
+typedef struct {
+   int     cur;
+   LexDescr    *list;
+} TypeStorage;
+
+static void
+setup_firstcall(FuncCallContext  *funcctx, Oid prsid) {
+   TupleDesc            tupdesc;
+   MemoryContext     oldcontext;
+   TypeStorage     *st;
+   WParserInfo *prs = findprs(prsid); 
+
+   oldcontext = MemoryContextSwitchTo(funcctx->multi_call_memory_ctx);
+
+   st=(TypeStorage*)palloc( sizeof(TypeStorage) );
+   st->cur=0;
+   st->list = (LexDescr*)DatumGetPointer(
+       OidFunctionCall1( prs->lextype, PointerGetDatum(prs->prs) )
+   );
+   funcctx->user_fctx = (void*)st;
+   tupdesc = RelationNameGetTupleDesc("tokentype");
+   funcctx->slot = TupleDescGetSlot(tupdesc);
+   funcctx->attinmeta = TupleDescGetAttInMetadata(tupdesc);
+   MemoryContextSwitchTo(oldcontext);
+}
+
+static Datum
+process_call(FuncCallContext  *funcctx) {
+   TypeStorage     *st;
+
+   st=(TypeStorage*)funcctx->user_fctx;
+   if (  st->list && st->list[st->cur].lexid ) {
+       Datum result;
+       char* values[3];
+       char    txtid[16];
+       HeapTuple    tuple;
+
+       values[0]=txtid;
+       sprintf(txtid,"%d",st->list[st->cur].lexid);
+       values[1]=st->list[st->cur].alias;
+       values[2]=st->list[st->cur].descr;
+
+       tuple = BuildTupleFromCStrings(funcctx->attinmeta, values);
+       result = TupleGetDatum(funcctx->slot, tuple);
+
+       pfree(values[1]);
+       pfree(values[2]);
+       st->cur++;
+       return result;
+   } else {
+       if ( st->list ) pfree(st->list);
+       pfree(st);
+   }
+   return (Datum)0;
+}
+
+PG_FUNCTION_INFO_V1(token_type);
+Datum token_type(PG_FUNCTION_ARGS);
+
+Datum
+token_type(PG_FUNCTION_ARGS) {
+   FuncCallContext  *funcctx;
+   Datum result;
+
+   if (SRF_IS_FIRSTCALL()) { 
+       funcctx = SRF_FIRSTCALL_INIT();
+       setup_firstcall(funcctx, PG_GETARG_OID(0) );
+   }
+
+   funcctx = SRF_PERCALL_SETUP();
+
+   if (  (result=process_call(funcctx)) != (Datum)0 )
+       SRF_RETURN_NEXT(funcctx, result);
+   SRF_RETURN_DONE(funcctx);
+}
+
+PG_FUNCTION_INFO_V1(token_type_byname);
+Datum token_type_byname(PG_FUNCTION_ARGS);
+Datum
+token_type_byname(PG_FUNCTION_ARGS) {
+   FuncCallContext  *funcctx;
+   Datum result;
+
+   if (SRF_IS_FIRSTCALL()) {
+       text *name = PG_GETARG_TEXT_P(0); 
+       funcctx = SRF_FIRSTCALL_INIT();
+       setup_firstcall(funcctx, name2id_prs( name ) );
+       PG_FREE_IF_COPY(name,0);
+   }
+
+   funcctx = SRF_PERCALL_SETUP();
+
+   if (  (result=process_call(funcctx)) != (Datum)0 )
+       SRF_RETURN_NEXT(funcctx, result);
+   SRF_RETURN_DONE(funcctx);
+}
+
+PG_FUNCTION_INFO_V1(token_type_current);
+Datum token_type_current(PG_FUNCTION_ARGS);
+Datum
+token_type_current(PG_FUNCTION_ARGS) {
+   FuncCallContext  *funcctx;
+   Datum result;
+
+   if (SRF_IS_FIRSTCALL()) {
+       funcctx = SRF_FIRSTCALL_INIT();
+       if ( current_parser_id==InvalidOid ) 
+           current_parser_id = name2id_prs( char2text("default") );
+       setup_firstcall(funcctx, current_parser_id );
+   }
+
+   funcctx = SRF_PERCALL_SETUP();
+
+   if (  (result=process_call(funcctx)) != (Datum)0 )
+       SRF_RETURN_NEXT(funcctx, result);
+   SRF_RETURN_DONE(funcctx);
+}
+
+
+PG_FUNCTION_INFO_V1(set_curprs);
+Datum set_curprs(PG_FUNCTION_ARGS);
+Datum
+set_curprs(PG_FUNCTION_ARGS) {
+        findprs(PG_GETARG_OID(0));
+        current_parser_id=PG_GETARG_OID(0);
+        PG_RETURN_VOID();
+}
+
+PG_FUNCTION_INFO_V1(set_curprs_byname);
+Datum set_curprs_byname(PG_FUNCTION_ARGS);
+Datum
+set_curprs_byname(PG_FUNCTION_ARGS) {
+        text *name=PG_GETARG_TEXT_P(0);
+    
+        DirectFunctionCall1(
+                set_curprs,
+                ObjectIdGetDatum( name2id_prs(name) )
+        );
+        PG_FREE_IF_COPY(name, 0);
+        PG_RETURN_VOID();
+}
+
+typedef struct {
+   int type;
+   char    *lexem;
+} LexemEntry;
+
+typedef struct {
+   int cur;
+   int len;
+   LexemEntry  *list;
+} PrsStorage;
+   
+
+static void
+prs_setup_firstcall(FuncCallContext  *funcctx, int prsid, text *txt) {
+   TupleDesc            tupdesc;
+   MemoryContext     oldcontext;
+   PrsStorage  *st;
+   WParserInfo *prs = findprs(prsid); 
+   char    *lex=NULL;
+   int     llen=0, type=0; 
+
+   oldcontext = MemoryContextSwitchTo(funcctx->multi_call_memory_ctx);
+
+   st=(PrsStorage*)palloc( sizeof(PrsStorage) );
+   st->cur=0;
+   st->len=16;
+   st->list=(LexemEntry*)palloc( sizeof(LexemEntry)*st->len );
+
+   prs->prs = (void*)DatumGetPointer(
+       FunctionCall2(
+           &(prs->start_info),
+           PointerGetDatum(VARDATA(txt)),
+           Int32GetDatum(VARSIZE(txt)-VARHDRSZ)
+       )
+   );
+
+   while( ( type=DatumGetInt32(FunctionCall3(
+           &(prs->getlexeme_info),
+           PointerGetDatum(prs->prs),
+           PointerGetDatum(&lex),
+           PointerGetDatum(&llen))) ) != 0 ) {
+
+       if ( st->cur>=st->len ) {
+           st->len=2*st->len;
+           st->list=(LexemEntry*)repalloc(st->list, sizeof(LexemEntry)*st->len);
+       }
+       st->list[st->cur].lexem = palloc(llen+1);
+       memcpy( st->list[st->cur].lexem, lex, llen);
+       st->list[st->cur].lexem[llen]='\0';
+       st->list[st->cur].type=type;
+       st->cur++;
+   }
+       
+   FunctionCall1(
+       &(prs->end_info),
+       PointerGetDatum(prs->prs)
+   );
+
+   st->len=st->cur;
+   st->cur=0;
+   
+   funcctx->user_fctx = (void*)st;
+   tupdesc = RelationNameGetTupleDesc("tokenout");
+   funcctx->slot = TupleDescGetSlot(tupdesc);
+   funcctx->attinmeta = TupleDescGetAttInMetadata(tupdesc);
+   MemoryContextSwitchTo(oldcontext);
+}
+
+static Datum
+prs_process_call(FuncCallContext  *funcctx) {
+   PrsStorage  *st;
+
+   st=(PrsStorage*)funcctx->user_fctx;
+   if (  st->cur < st->len ) {
+       Datum result;
+       char* values[2];
+       char    tid[16];
+       HeapTuple    tuple;
+
+       values[0]=tid;
+       sprintf(tid,"%d",st->list[st->cur].type);
+       values[1]=st->list[st->cur].lexem;
+       tuple = BuildTupleFromCStrings(funcctx->attinmeta, values);
+       result = TupleGetDatum(funcctx->slot, tuple);
+
+       pfree(values[1]);
+       st->cur++;
+       return result;
+   } else {
+       if ( st->list ) pfree(st->list);
+       pfree(st);
+   }
+   return (Datum)0;
+}
+
+           
+
+PG_FUNCTION_INFO_V1(parse);
+Datum parse(PG_FUNCTION_ARGS);
+Datum
+parse(PG_FUNCTION_ARGS) {
+   FuncCallContext  *funcctx;
+   Datum result;
+
+   if (SRF_IS_FIRSTCALL()) {
+       text *txt = PG_GETARG_TEXT_P(1); 
+       funcctx = SRF_FIRSTCALL_INIT();
+       prs_setup_firstcall(funcctx, PG_GETARG_OID(0),txt );
+       PG_FREE_IF_COPY(txt,1);
+   }
+
+   funcctx = SRF_PERCALL_SETUP();
+
+   if (  (result=prs_process_call(funcctx)) != (Datum)0 )
+       SRF_RETURN_NEXT(funcctx, result);
+   SRF_RETURN_DONE(funcctx);
+}
+
+PG_FUNCTION_INFO_V1(parse_byname);
+Datum parse_byname(PG_FUNCTION_ARGS);
+Datum
+parse_byname(PG_FUNCTION_ARGS) {
+   FuncCallContext  *funcctx;
+   Datum result;
+
+   if (SRF_IS_FIRSTCALL()) {
+       text *name = PG_GETARG_TEXT_P(0); 
+       text *txt = PG_GETARG_TEXT_P(1); 
+       funcctx = SRF_FIRSTCALL_INIT();
+       prs_setup_firstcall(funcctx, name2id_prs( name ),txt );
+       PG_FREE_IF_COPY(name,0);
+       PG_FREE_IF_COPY(txt,1);
+   }
+
+   funcctx = SRF_PERCALL_SETUP();
+
+   if (  (result=prs_process_call(funcctx)) != (Datum)0 )
+       SRF_RETURN_NEXT(funcctx, result);
+   SRF_RETURN_DONE(funcctx);
+}
+
+
+PG_FUNCTION_INFO_V1(parse_current);
+Datum parse_current(PG_FUNCTION_ARGS);
+Datum
+parse_current(PG_FUNCTION_ARGS) {
+   FuncCallContext  *funcctx;
+   Datum result;
+
+   if (SRF_IS_FIRSTCALL()) {
+       text *txt = PG_GETARG_TEXT_P(0); 
+       funcctx = SRF_FIRSTCALL_INIT();
+       if ( current_parser_id==InvalidOid ) 
+           current_parser_id = name2id_prs( char2text("default") );
+       prs_setup_firstcall(funcctx, current_parser_id,txt );
+       PG_FREE_IF_COPY(txt,0);
+   }
+
+   funcctx = SRF_PERCALL_SETUP();
+
+   if (  (result=prs_process_call(funcctx)) != (Datum)0 )
+       SRF_RETURN_NEXT(funcctx, result);
+   SRF_RETURN_DONE(funcctx);
+}
+
+PG_FUNCTION_INFO_V1(headline);
+Datum headline(PG_FUNCTION_ARGS);
+Datum
+headline(PG_FUNCTION_ARGS) {
+   TSCfgInfo *cfg=findcfg(PG_GETARG_OID(0));
+   text       *in = PG_GETARG_TEXT_P(1);
+   QUERYTYPE  *query = (QUERYTYPE *) DatumGetPointer(PG_DETOAST_DATUM(PG_GETARG_DATUM(2)));
+   text       *opt=( PG_NARGS()>3 && PG_GETARG_POINTER(3) ) ? PG_GETARG_TEXT_P(3) : NULL;
+   HLPRSTEXT   prs;
+   text *out;
+   WParserInfo *prsobj = findprs(cfg->prs_id);
+
+   memset(&prs,0,sizeof(HLPRSTEXT));
+   prs.lenwords = 32;
+   prs.words = (HLWORD *) palloc(sizeof(HLWORD) * prs.lenwords);
+   hlparsetext(cfg, &prs, query, VARDATA(in), VARSIZE(in) - VARHDRSZ);
+
+
+   FunctionCall3(
+       &(prsobj->headline_info),
+       PointerGetDatum(&prs),
+       PointerGetDatum(opt),
+       PointerGetDatum(query)
+   );
+
+   out = genhl(&prs);
+
+   PG_FREE_IF_COPY(in,1);
+   PG_FREE_IF_COPY(query,2);
+   if ( opt ) PG_FREE_IF_COPY(opt,3);
+   pfree(prs.words);
+   pfree(prs.startsel);
+   pfree(prs.stopsel);
+
+   PG_RETURN_POINTER(out);
+}
+
+
+PG_FUNCTION_INFO_V1(headline_byname);
+Datum headline_byname(PG_FUNCTION_ARGS);
+Datum
+headline_byname(PG_FUNCTION_ARGS) {
+   text *cfg=PG_GETARG_TEXT_P(0);
+
+   Datum out=DirectFunctionCall4(
+       headline,
+       ObjectIdGetDatum(name2id_cfg( cfg ) ),
+       PG_GETARG_DATUM(1),
+       PG_GETARG_DATUM(2),
+       ( PG_NARGS()>3 ) ? PG_GETARG_DATUM(3) : PointerGetDatum(NULL)
+   );
+
+   PG_FREE_IF_COPY(cfg,0);
+   PG_RETURN_DATUM(out);   
+}
+
+PG_FUNCTION_INFO_V1(headline_current);
+Datum headline_current(PG_FUNCTION_ARGS);
+Datum
+headline_current(PG_FUNCTION_ARGS) {
+   PG_RETURN_DATUM(DirectFunctionCall4(
+       headline,
+       ObjectIdGetDatum(get_currcfg()),
+       PG_GETARG_DATUM(0),
+       PG_GETARG_DATUM(1),
+       ( PG_NARGS()>2 ) ? PG_GETARG_DATUM(2) : PointerGetDatum(NULL)
+   ));
+}
+
+
+
diff --git a/contrib/tsearch2/wparser.h b/contrib/tsearch2/wparser.h

new file mode 100644 (file)

index 0000000..a8afc56
--- /dev/null
+++ b/contrib/tsearch2/wparser.h
@@ -0,0 +1,28 @@
+#ifndef __WPARSER_H__
+#define __WPARSER_H__
+#include "postgres.h"
+#include "fmgr.h"
+
+typedef struct {
+   Oid prs_id;
+   FmgrInfo start_info;
+   FmgrInfo getlexeme_info;
+   FmgrInfo end_info;
+   FmgrInfo headline_info;
+   Oid lextype;
+   void *prs;
+} WParserInfo;
+
+void init_prs(Oid id, WParserInfo *prs);
+WParserInfo* findprs(Oid id);
+Oid name2id_prs(text *name);
+void   reset_prs(void);
+
+
+typedef struct {
+   int lexid;
+   char    *alias;
+   char    *descr;
+} LexDescr;
+
+#endif
diff --git a/contrib/tsearch2/wparser_def.c b/contrib/tsearch2/wparser_def.c

new file mode 100644 (file)

index 0000000..eec8b03
--- /dev/null
+++ b/contrib/tsearch2/wparser_def.c
@@ -0,0 +1,291 @@
+/* 
+ * default word parser 
+ * Teodor Sigaev 
+ */
+#include 
+#include 
+#include 
+
+#include "postgres.h"
+#include "utils/builtins.h"
+
+#include "dict.h"
+#include "wparser.h"
+#include "common.h"
+#include "ts_cfg.h"
+#include "wordparser/parser.h"
+#include "wordparser/deflex.h"
+
+PG_FUNCTION_INFO_V1(prsd_lextype);
+Datum prsd_lextype(PG_FUNCTION_ARGS);
+
+Datum 
+prsd_lextype(PG_FUNCTION_ARGS) {
+   LexDescr *descr=(LexDescr*)palloc(sizeof(LexDescr)*(LASTNUM+1));
+   int i;
+
+   for(i=1;i<=LASTNUM;i++) {
+       descr[i-1].lexid = i;
+       descr[i-1].alias = pstrdup(tok_alias[i]);
+       descr[i-1].descr = pstrdup(lex_descr[i]);
+   }
+   
+   descr[LASTNUM].lexid=0;
+       
+   PG_RETURN_POINTER(descr);
+}
+
+PG_FUNCTION_INFO_V1(prsd_start);
+Datum prsd_start(PG_FUNCTION_ARGS);
+Datum 
+prsd_start(PG_FUNCTION_ARGS) {
+   start_parse_str( (char*)PG_GETARG_POINTER(0), PG_GETARG_INT32(1) );
+   PG_RETURN_POINTER(NULL);
+}
+
+PG_FUNCTION_INFO_V1(prsd_getlexeme);
+Datum prsd_getlexeme(PG_FUNCTION_ARGS);
+Datum 
+prsd_getlexeme(PG_FUNCTION_ARGS) {
+   /* ParserState *p=(ParserState*)PG_GETARG_POINTER(0); */
+   char **t=(char**)PG_GETARG_POINTER(1); 
+   int *tlen=(int*)PG_GETARG_POINTER(2);
+   int  type=tsearch2_yylex();
+
+   *t = token;
+   *tlen = tokenlen;
+   PG_RETURN_INT32(type);
+}
+
+PG_FUNCTION_INFO_V1(prsd_end);
+Datum prsd_end(PG_FUNCTION_ARGS);
+Datum 
+prsd_end(PG_FUNCTION_ARGS) {
+   /* ParserState *p=(ParserState*)PG_GETARG_POINTER(0); */
+   end_parse();
+   PG_RETURN_VOID();
+}
+
+#define LEAVETOKEN(x)  ( (x)==12 )
+#define COMPLEXTOKEN(x)    ( (x)==5 || (x)==15 || (x)==16 || (x)==17 )
+#define ENDPUNCTOKEN(x)    ( (x)==12 )
+
+
+#define IDIGNORE(x) ( (x)==13 || (x)==14 || (x)==12 || (x)==23 )
+#define HLIDIGNORE(x) ( (x)==5 || (x)==13 || (x)==15 || (x)==16 || (x)==17 )
+#define NONWORDTOKEN(x)    ( (x)==12 || HLIDIGNORE(x) )
+#define NOENDTOKEN(x)  ( NONWORDTOKEN(x) || (x)==7 || (x)==8 || (x)==20 || (x)==21 || (x)==22 || IDIGNORE(x) )
+
+typedef struct {
+   HLWORD  *words;
+   int len;
+} hlCheck;
+
+static bool
+checkcondition_HL(void *checkval, ITEM *val) {
+   int i;
+   for(i=0;i<((hlCheck*)checkval)->len;i++) {
+       if ( ((hlCheck*)checkval)->words[i].item==val )
+           return true;
+   }
+   return false;
+}
+
+
+static bool
+hlCover(HLPRSTEXT *prs, QUERYTYPE *query, int *p, int *q) {
+   int i,j;
+   ITEM    *item=GETQUERY(query);
+   int pos=*p;
+   *q=0;
+   *p=0x7fffffff;
+
+   for(j=0;jsize;j++) {
+       if ( item->type != VAL ) {
+           item++;
+           continue;
+       }
+       for(i=pos;icurwords;i++) {
+           if ( prs->words[i].item == item ) {
+               if ( i>*q) 
+                   *q = i;
+               break;
+           }
+       }
+       item++;
+   }
+
+   if ( *q==0 )
+       return false;
+
+   item=GETQUERY(query);
+   for(j=0;jsize;j++) {
+       if ( item->type != VAL ) {
+           item++;
+           continue;
+       }
+       for(i=*q;i>=pos;i--) {
+           if ( prs->words[i].item == item ) {
+               if ( i<*p )
+                   *p=i;
+               break;
+           }
+       }
+       item++;
+   }   
+
+   if ( *p<=*q ) {
+       hlCheck ch={ &(prs->words[*p]), *q-*p+1 };
+       if ( TS_execute(GETQUERY(query), &ch, false, checkcondition_HL) ) { 
+           return true;
+       } else {
+           (*p)++;
+           return hlCover(prs,query,p,q);
+       }
+   }
+
+   return false;
+}
+
+PG_FUNCTION_INFO_V1(prsd_headline);
+Datum prsd_headline(PG_FUNCTION_ARGS);
+Datum 
+prsd_headline(PG_FUNCTION_ARGS) {
+   HLPRSTEXT   *prs=(HLPRSTEXT*)PG_GETARG_POINTER(0);
+   text    *opt=(text*)PG_GETARG_POINTER(1); /* can't be toasted */
+   QUERYTYPE   *query=(QUERYTYPE*)PG_GETARG_POINTER(2); /* can't be toasted */
+   /* from opt + start and and tag */
+   int min_words=15;   
+   int max_words=35;   
+   int shortword=3;    
+
+   int p=0,q=0;
+   int bestb=-1,beste=-1;
+   int bestlen=-1;
+   int pose=0, poslen, curlen;
+
+   int i;
+
+   /*config*/
+   prs->startsel=NULL;
+   prs->stopsel=NULL;
+   if ( opt ) {
+       Map *map,*mptr;
+       
+       parse_cfgdict(opt,&map);
+       mptr=map;
+
+       while(mptr && mptr->key) {
+           if ( strcasecmp(mptr->key,"MaxWords")==0 )
+               max_words=pg_atoi(mptr->value,4,1);
+           else if ( strcasecmp(mptr->key,"MinWords")==0 )
+               min_words=pg_atoi(mptr->value,4,1);
+           else if ( strcasecmp(mptr->key,"ShortWord")==0 )
+               shortword=pg_atoi(mptr->value,4,1);
+           else if ( strcasecmp(mptr->key,"StartSel")==0 )
+               prs->startsel=pstrdup(mptr->value);
+           else if ( strcasecmp(mptr->key,"StopSel")==0 )
+               prs->stopsel=pstrdup(mptr->value);
+               
+           pfree(mptr->key);
+           pfree(mptr->value);
+
+           mptr++;
+       }
+       pfree(map);
+
+       if ( min_words >= max_words )
+           elog(ERROR,"Must be MinWords < MaxWords");
+       if ( min_words<=0 )
+           elog(ERROR,"Must be MinWords > 0");
+       if ( shortword<0 )
+           elog(ERROR,"Must be ShortWord >= 0");
+   }
+
+   while( hlCover(prs,query,&p,&q) ) {
+       /* find cover len in words */
+       curlen=0;
+       poslen=0;
+       for(i=p;i<=q && curlen < max_words ; i++) {
+           if ( !NONWORDTOKEN(prs->words[i].type) ) 
+               curlen++;
+           if ( prs->words[i].item && !prs->words[i].repeated )
+               poslen++; 
+           pose=i;
+       }
+
+       if ( poslenwords[beste].type) || prs->words[beste].len <= shortword) ) { 
+           /* best already finded, so try one more cover */
+           p++;
+           continue;
+       }
+
+       if ( curlen < max_words ) { /* find good end */
+           for(i=i-1 ;icurwords && curlen
+               if ( i!=q ) {
+                   if ( !NONWORDTOKEN(prs->words[i].type) ) 
+                       curlen++;
+                   if ( prs->words[i].item && !prs->words[i].repeated )
+                       poslen++;
+               }
+               pose=i;
+               if ( NOENDTOKEN(prs->words[i].type) || prs->words[i].len <= shortword ) 
+                   continue;
+               if ( curlen>=min_words )    
+                   break;
+           }
+       } else { /* shorter cover :((( */
+           for(;curlen>min_words;i--) {
+               if ( !NONWORDTOKEN(prs->words[i].type) ) 
+                   curlen--;
+               if ( prs->words[i].item && !prs->words[i].repeated )
+                   poslen--;
+               pose=i;
+               if ( NOENDTOKEN(prs->words[i].type) || prs->words[i].len <= shortword ) 
+                   continue;
+               break;
+           }
+       }
+
+       if ( bestlen <0 || (poslen>bestlen && !(NOENDTOKEN(prs->words[pose].type) || prs->words[pose].len <= shortword)) || 
+               ( bestlen>=0 && !(NOENDTOKEN(prs->words[pose].type)  || prs->words[pose].len <= shortword) && 
+                   (NOENDTOKEN(prs->words[beste].type) || prs->words[beste].len <= shortword) ) ) {
+           bestb=p; beste=pose;
+           bestlen=poslen;
+       } 
+
+       p++;
+   }
+
+   if ( bestlen<0 ) {
+       curlen=0;
+       poslen=0;
+       for(i=0;icurwords && curlen
+           if ( !NONWORDTOKEN(prs->words[i].type) ) 
+               curlen++;
+           pose=i;
+       }
+       bestb=0; beste=pose;
+   }
+
+   for(i=bestb;i<=beste;i++) {
+       if ( prs->words[i].item )
+           prs->words[i].selected=1;
+       if ( prs->words[i].repeated )
+           prs->words[i].skip=1;
+       if ( HLIDIGNORE(prs->words[i].type) )
+           prs->words[i].replace=1;
+
+       prs->words[i].in=1;
+   }
+
+   if (!prs->startsel)
+       prs->startsel=pstrdup("");

+   if (!prs->stopsel)
+       prs->stopsel=pstrdup("");
+        prs->startsellen=strlen(prs->startsel);
+   prs->stopsellen=strlen(prs->stopsel);
+
+   PG_RETURN_POINTER(prs);
+}
+
+               if ( i!=q ) {
+                   if ( !NONWORDTOKEN(prs->words[i].type) ) 
+                       curlen++;
+                   if ( prs->words[i].item && !prs->words[i].repeated )
+                       poslen++;
+               }
+               pose=i;
+               if ( NOENDTOKEN(prs->words[i].type) || prs->words[i].len <= shortword ) 
+                   continue;
+               if ( curlen>=min_words )    
+                   break;
+           }
+       } else { /* shorter cover :((( */
+           for(;curlen>min_words;i--) {
+               if ( !NONWORDTOKEN(prs->words[i].type) ) 
+                   curlen--;
+               if ( prs->words[i].item && !prs->words[i].repeated )
+                   poslen--;
+               pose=i;
+               if ( NOENDTOKEN(prs->words[i].type) || prs->words[i].len <= shortword ) 
+                   continue;
+               break;
+           }
+       }
+
+       if ( bestlen <0 || (poslen>bestlen && !(NOENDTOKEN(prs->words[pose].type) || prs->words[pose].len <= shortword)) || 
+               ( bestlen>=0 && !(NOENDTOKEN(prs->words[pose].type)  || prs->words[pose].len <= shortword) && 
+                   (NOENDTOKEN(prs->words[beste].type) || prs->words[beste].len <= shortword) ) ) {
+           bestb=p; beste=pose;
+           bestlen=poslen;
+       } 
+
+       p++;
+   }
+
+   if ( bestlen<0 ) {
+       curlen=0;
+       poslen=0;
+       for(i=0;icurwords && curlen
+           if ( !NONWORDTOKEN(prs->words[i].type) ) 
+               curlen++;
+           pose=i;
+       }
+       bestb=0; beste=pose;
+   }
+
+   for(i=bestb;i<=beste;i++) {
+       if ( prs->words[i].item )
+           prs->words[i].selected=1;
+       if ( prs->words[i].repeated )
+           prs->words[i].skip=1;
+       if ( HLIDIGNORE(prs->words[i].type) )
+           prs->words[i].replace=1;
+
+       prs->words[i].in=1;
+   }
+
+   if (!prs->startsel)
+       prs->startsel=pstrdup("");

+   if (!prs->stopsel)
+       prs->stopsel=pstrdup("");
+        prs->startsellen=strlen(prs->startsel);
+   prs->stopsellen=strlen(prs->stopsel);
+
+   PG_RETURN_POINTER(prs);
+}
+
+           if ( !NONWORDTOKEN(prs->words[i].type) ) 
+               curlen++;
+           pose=i;
+       }
+       bestb=0; beste=pose;
+   }
+
+   for(i=bestb;i<=beste;i++) {
+       if ( prs->words[i].item )
+           prs->words[i].selected=1;
+       if ( prs->words[i].repeated )
+           prs->words[i].skip=1;
+       if ( HLIDIGNORE(prs->words[i].type) )
+           prs->words[i].replace=1;
+
+       prs->words[i].in=1;
+   }
+
+   if (!prs->startsel)
+       prs->startsel=pstrdup("");
+   if (!prs->stopsel)
+       prs->stopsel=pstrdup("");
+        prs->startsellen=strlen(prs->startsel);
+   prs->stopsellen=strlen(prs->stopsel);
+
+   PG_RETURN_POINTER(prs);
+}
+
author	Teodor Sigaev
	Mon, 21 Jul 2003 10:27:44 +0000 (10:27 +0000)
committer	Teodor Sigaev
	Mon, 21 Jul 2003 10:27:44 +0000 (10:27 +0000)
contrib/tsearch2/Makefile	[new file with mode: 0644]	patch \| blob
contrib/tsearch2/README.tsearch2	[new file with mode: 0644]	patch \| blob
contrib/tsearch2/common.c	[new file with mode: 0644]	patch \| blob
contrib/tsearch2/common.h	[new file with mode: 0644]	patch \| blob
contrib/tsearch2/crc32.c	[new file with mode: 0644]	patch \| blob
contrib/tsearch2/crc32.h	[new file with mode: 0644]	patch \| blob
contrib/tsearch2/data/test_tsearch.data	[new file with mode: 0644]	patch \| blob
contrib/tsearch2/dict.c	[new file with mode: 0644]	patch \| blob
contrib/tsearch2/dict.h	[new file with mode: 0644]	patch \| blob
contrib/tsearch2/dict_ex.c	[new file with mode: 0644]	patch \| blob
contrib/tsearch2/dict_ispell.c	[new file with mode: 0644]	patch \| blob
contrib/tsearch2/dict_snowball.c	[new file with mode: 0644]	patch \| blob
contrib/tsearch2/dict_syn.c	[new file with mode: 0644]	patch \| blob
contrib/tsearch2/docs/tsearch-V2-intro.html	[new file with mode: 0644]	patch \| blob
contrib/tsearch2/docs/tsearch2-guide.html	[new file with mode: 0644]	patch \| blob
contrib/tsearch2/docs/tsearch2-ref.html	[new file with mode: 0644]	patch \| blob
contrib/tsearch2/expected/tsearch2.out	[new file with mode: 0644]	patch \| blob
contrib/tsearch2/gendict/Makefile.IN	[new file with mode: 0644]	patch \| blob
contrib/tsearch2/gendict/README.gendict	[new file with mode: 0644]	patch \| blob
contrib/tsearch2/gendict/config.sh	[new file with mode: 0755]	patch \| blob
contrib/tsearch2/gendict/dict_snowball.c.IN	[new file with mode: 0644]	patch \| blob
contrib/tsearch2/gendict/dict_tmpl.c.IN	[new file with mode: 0644]	patch \| blob
contrib/tsearch2/gendict/sql.IN	[new file with mode: 0644]	patch \| blob
contrib/tsearch2/gistidx.c	[new file with mode: 0644]	patch \| blob
contrib/tsearch2/gistidx.h	[new file with mode: 0644]	patch \| blob
contrib/tsearch2/ispell/spell.c	[new file with mode: 0644]	patch \| blob
contrib/tsearch2/ispell/spell.h	[new file with mode: 0644]	patch \| blob
contrib/tsearch2/prs_dcfg.c	[new file with mode: 0644]	patch \| blob
contrib/tsearch2/query.c	[new file with mode: 0644]	patch \| blob
contrib/tsearch2/query.h	[new file with mode: 0644]	patch \| blob
contrib/tsearch2/rank.c	[new file with mode: 0644]	patch \| blob
contrib/tsearch2/rewrite.c	[new file with mode: 0644]	patch \| blob
contrib/tsearch2/rewrite.h	[new file with mode: 0644]	patch \| blob
contrib/tsearch2/snmap.c	[new file with mode: 0644]	patch \| blob
contrib/tsearch2/snmap.h	[new file with mode: 0644]	patch \| blob
contrib/tsearch2/snowball/api.c	[new file with mode: 0644]	patch \| blob
contrib/tsearch2/snowball/api.h	[new file with mode: 0644]	patch \| blob
contrib/tsearch2/snowball/english_stem.c	[new file with mode: 0644]	patch \| blob
contrib/tsearch2/snowball/english_stem.h	[new file with mode: 0644]	patch \| blob
contrib/tsearch2/snowball/header.h	[new file with mode: 0644]	patch \| blob
contrib/tsearch2/snowball/russian_stem.c	[new file with mode: 0644]	patch \| blob
contrib/tsearch2/snowball/russian_stem.h	[new file with mode: 0644]	patch \| blob
contrib/tsearch2/snowball/utilities.c	[new file with mode: 0644]	patch \| blob
contrib/tsearch2/sql/tsearch2.sql	[new file with mode: 0644]	patch \| blob
contrib/tsearch2/stopword.c	[new file with mode: 0644]	patch \| blob
contrib/tsearch2/stopword/english.stop	[new file with mode: 0644]	patch \| blob
contrib/tsearch2/stopword/russian.stop	[new file with mode: 0644]	patch \| blob
contrib/tsearch2/ts_cfg.c	[new file with mode: 0644]	patch \| blob
contrib/tsearch2/ts_cfg.h	[new file with mode: 0644]	patch \| blob
contrib/tsearch2/ts_stat.c	[new file with mode: 0644]	patch \| blob
contrib/tsearch2/ts_stat.h	[new file with mode: 0644]	patch \| blob
contrib/tsearch2/tsearch.sql._in	[new file with mode: 0644]	patch \| blob
contrib/tsearch2/tsvector.c	[new file with mode: 0644]	patch \| blob
contrib/tsearch2/tsvector.h	[new file with mode: 0644]	patch \| blob
contrib/tsearch2/tsvector_op.c	[new file with mode: 0644]	patch \| blob
contrib/tsearch2/untsearch.sql.in	[new file with mode: 0644]	patch \| blob
contrib/tsearch2/wordparser/deflex.c	[new file with mode: 0644]	patch \| blob
contrib/tsearch2/wordparser/deflex.h	[new file with mode: 0644]	patch \| blob
contrib/tsearch2/wordparser/parser.h	[new file with mode: 0644]	patch \| blob
contrib/tsearch2/wordparser/parser.l	[new file with mode: 0644]	patch \| blob
contrib/tsearch2/wparser.c	[new file with mode: 0644]	patch \| blob
contrib/tsearch2/wparser.h	[new file with mode: 0644]	patch \| blob
contrib/tsearch2/wparser_def.c	[new file with mode: 0644]	patch \| blob